Initial commit: Market Trends Scraper
This commit is contained in:
101
config/sample_config.yaml
Normal file
101
config/sample_config.yaml
Normal file
@@ -0,0 +1,101 @@
|
||||
# Sample Configuration for Market Trends Scraper
|
||||
# Copy this file to config.yaml and customize for your needs
|
||||
|
||||
scraper:
|
||||
# Delay between requests in seconds (helps avoid being blocked)
|
||||
delay_between_requests: 1.5
|
||||
|
||||
# Request timeout in seconds
|
||||
timeout: 30
|
||||
|
||||
# Maximum number of retry attempts for failed requests
|
||||
max_retries: 3
|
||||
|
||||
# User agent string for HTTP requests
|
||||
user_agent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
||||
|
||||
# Run browser in headless mode (no visible UI)
|
||||
headless: true
|
||||
|
||||
# Browser window size [width, height]
|
||||
window_size: [1920, 1080]
|
||||
|
||||
# List of data sources to scrape
|
||||
sources:
|
||||
# Example: Amazon (fictional selectors for demonstration)
|
||||
- name: "amazon"
|
||||
url: "https://www.amazon.com/s?k=laptop"
|
||||
type: "ecommerce"
|
||||
enabled: false # Set to true to enable this source
|
||||
use_selenium: true # Amazon often requires JavaScript rendering
|
||||
selectors:
|
||||
product: "div[data-component-type='s-search-result']"
|
||||
name: "h2 span.a-text-normal"
|
||||
price: "span.a-price-whole"
|
||||
rating: "span.a-icon-alt"
|
||||
availability: "span.a-color-success"
|
||||
pagination:
|
||||
next_page: "a.s-pagination-next"
|
||||
max_pages: 5
|
||||
|
||||
# Example: Generic e-commerce site
|
||||
- name: "example_ecommerce"
|
||||
url: "https://example-ecommerce.com/search?q=phone"
|
||||
type: "ecommerce"
|
||||
enabled: true
|
||||
use_selenium: false
|
||||
selectors:
|
||||
product: "div.product-card"
|
||||
name: "h3.product-title"
|
||||
price: "span.price"
|
||||
rating: "div.rating-stars"
|
||||
availability: "div.stock-status"
|
||||
pagination:
|
||||
next_page: "a.pagination-next"
|
||||
max_pages: 10
|
||||
|
||||
# Example: Electronics retailer
|
||||
- name: "electronics_store"
|
||||
url: "https://example-electronics.com/category/smartphones"
|
||||
type: "ecommerce"
|
||||
enabled: true
|
||||
use_selenium: false
|
||||
selectors:
|
||||
product: "article.product-item"
|
||||
name: "h1.product-name"
|
||||
price: "div.current-price"
|
||||
rating: "div.product-rating"
|
||||
availability: "span.availability-label"
|
||||
pagination:
|
||||
next_page: "li.page-item.next a"
|
||||
max_pages: 3
|
||||
|
||||
# Output settings
|
||||
output:
|
||||
# Output format: csv, json, or excel
|
||||
format: "csv"
|
||||
|
||||
# Include timestamp in output filename
|
||||
include_timestamp: true
|
||||
|
||||
# Base filename for output files
|
||||
filename: "market_trends_data"
|
||||
|
||||
# Database settings (for future enhancements)
|
||||
database:
|
||||
# Database connection URL
|
||||
url: "sqlite:///data/market_trends.db"
|
||||
|
||||
# Enable SQL query logging
|
||||
echo: false
|
||||
|
||||
# Analysis settings
|
||||
analysis:
|
||||
# Number of days to consider for price history analysis
|
||||
price_history_days: 30
|
||||
|
||||
# Minimum price change percentage to consider as a trend (0.05 = 5%)
|
||||
trend_threshold: 0.05
|
||||
|
||||
# Generate trend charts (requires matplotlib and seaborn)
|
||||
generate_charts: true
|
Reference in New Issue
Block a user