Initial commit: Market Trends Scraper

This commit is contained in:
Dev
2025-09-11 17:46:14 +03:00
commit 4ddcde68d4
17 changed files with 3049 additions and 0 deletions

371
tests/test_scraper.py Normal file
View File

@@ -0,0 +1,371 @@
"""
Unit tests for the Scraper module.
"""
import pytest
import json
from unittest.mock import Mock, patch, MagicMock
from bs4 import BeautifulSoup
import pandas as pd
from src.scraper import MarketTrendsScraper
class TestMarketTrendsScraper:
"""Test cases for MarketTrendsScraper class."""
@pytest.fixture
def sample_config(self):
"""Sample configuration for testing."""
return {
"scraper": {
"delay_between_requests": 1.0,
"timeout": 30,
"max_retries": 3,
"user_agent": "Mozilla/5.0",
"headless": True,
"window_size": [1920, 1080]
},
"sources": [
{
"name": "test_source",
"url": "https://example.com/products",
"type": "ecommerce",
"enabled": True,
"selectors": {
"product": "div.product",
"name": "h2.title",
"price": "span.price",
"rating": "div.rating",
"availability": "div.stock"
},
"pagination": {
"next_page": "a.next",
"max_pages": 2
}
}
],
"output": {
"format": "csv",
"include_timestamp": True,
"filename": "market_trends_data"
}
}
@pytest.fixture
def sample_html(self):
"""Sample HTML for testing."""
return """
<html>
<body>
<div class="product">
<h2 class="title">Test Product 1</h2>
<span class="price">$19.99</span>
<div class="rating">4.5 stars</div>
<div class="stock">In Stock</div>
<a href="/product/1">View</a>
</div>
<div class="product">
<h2 class="title">Test Product 2</h2>
<span class="price">€29.99</span>
<div class="rating">3.8 stars</div>
<div class="stock">Out of Stock</div>
<a href="/product/2">View</a>
</div>
<a class="next" href="/page/2">Next</a>
</body>
</html>
"""
@patch('src.scraper.webdriver')
def test_init(self, mock_webdriver, sample_config):
"""Test scraper initialization."""
mock_driver = Mock()
mock_webdriver.Chrome.return_value = mock_driver
scraper = MarketTrendsScraper(sample_config)
assert scraper.config == sample_config
assert scraper.driver == mock_driver
assert scraper.session is not None
assert scraper.data == []
# Verify browser setup
mock_webdriver.Chrome.assert_called_once()
mock_driver.set_page_load_timeout.assert_called_with(30)
@patch('src.scraper.webdriver')
def test_setup_browser(self, mock_webdriver, sample_config):
"""Test browser setup."""
mock_driver = Mock()
mock_webdriver.Chrome.return_value = mock_driver
scraper = MarketTrendsScraper(sample_config)
# Verify options were set
call_args = mock_webdriver.Chrome.call_args
options = call_args[1]['options']
assert options.arguments is not None
assert any("--headless" in arg for arg in options.arguments)
assert any("--window-size=1920,1080" in arg for arg in options.arguments)
@patch('src.scraper.webdriver')
def test_setup_session(self, mock_webdriver, sample_config):
"""Test session setup."""
scraper = MarketTrendsScraper(sample_config)
# Verify headers were set
assert "User-Agent" in scraper.session.headers
assert scraper.session.headers["User-Agent"] == sample_config["scraper"]["user_agent"]
assert "Accept" in scraper.session.headers
@patch('src.scraper.webdriver')
def test_parse_price(self, mock_webdriver, sample_config):
"""Test price parsing."""
scraper = MarketTrendsScraper(sample_config)
# Test various price formats
assert scraper._parse_price("$19.99") == 19.99
assert scraper._parse_price("€29.99") == 29.99
assert scraper._parse_price("£39.99") == 39.99
assert scraper._parse_price("19,99") == 19.99
assert scraper._parse_price("Price: $49.99 USD") == 49.99
assert scraper._parse_price("Invalid price") is None
assert scraper._parse_price("") is None
@patch('src.scraper.webdriver')
def test_parse_rating(self, mock_webdriver, sample_config):
"""Test rating parsing."""
scraper = MarketTrendsScraper(sample_config)
# Test various rating formats
assert scraper._parse_rating("4.5 stars") == 4.5
assert scraper._parse_rating("Rating: 3.8/5") == 3.8
assert scraper._parse_rating("5 stars") == 5.0
assert scraper._parse_rating("Invalid rating") is None
assert scraper._parse_rating("") is None
@patch('src.scraper.webdriver')
def test_extract_product_data(self, mock_webdriver, sample_config, sample_html):
"""Test product data extraction from HTML."""
scraper = MarketTrendsScraper(sample_config)
soup = BeautifulSoup(sample_html, 'html.parser')
product = soup.find('div', class_='product')
selectors = sample_config["sources"][0]["selectors"]
data = scraper._extract_product_data(product, selectors)
assert data is not None
assert data["name"] == "Test Product 1"
assert data["price"] == 19.99
assert data["rating"] == 4.5
assert data["availability"] == "In Stock"
assert data["url"] == "/product/1"
@patch('src.scraper.webdriver')
def test_extract_product_data_no_name(self, mock_webdriver, sample_config):
"""Test product data extraction when name is missing."""
scraper = MarketTrendsScraper(sample_config)
html = """
<div class="product">
<span class="price">$19.99</span>
</div>
"""
soup = BeautifulSoup(html, 'html.parser')
product = soup.find('div', class_='product')
selectors = sample_config["sources"][0]["selectors"]
data = scraper._extract_product_data(product, selectors)
assert data is None # Should return None when name is missing
@patch('src.scraper.webdriver')
def test_make_request_with_retry_success(self, mock_webdriver, sample_config):
"""Test successful HTTP request with retry logic."""
scraper = MarketTrendsScraper(sample_config)
mock_response = Mock()
mock_response.status_code = 200
mock_response.text = "Success"
with patch.object(scraper.session, 'get', return_value=mock_response) as mock_get:
response = scraper._make_request_with_retry("https://example.com")
assert response == mock_response
mock_get.assert_called_once()
@patch('src.scraper.webdriver')
def test_make_request_with_retry_failure(self, mock_webdriver, sample_config):
"""Test HTTP request failure with retry logic."""
scraper = MarketTrendsScraper(sample_config)
with patch.object(scraper.session, 'get', side_effect=Exception("Connection error")):
response = scraper._make_request_with_retry("https://example.com")
assert response is None
@patch('src.scraper.webdriver')
@patch('src.scraper.MarketTrendsScraper._scrape_source')
def test_scrape_market_trends(self, mock_scrape_source, mock_webdriver, sample_config):
"""Test scraping market trends from multiple sources."""
scraper = MarketTrendsScraper(sample_config)
# Mock source data
mock_scrape_source.side_effect = [
[{"name": "Product 1", "price": 19.99}],
[{"name": "Product 2", "price": 29.99}]
]
# Add second source
sample_config["sources"].append({
"name": "test_source_2",
"url": "https://example2.com/products",
"enabled": True,
"selectors": {
"product": "div.product",
"name": "h2.title",
"price": "span.price"
}
})
data = scraper.scrape_market_trends()
assert len(data) == 2
assert data[0]["name"] == "Product 1"
assert data[1]["name"] == "Product 2"
assert mock_scrape_source.call_count == 2
@patch('src.scraper.webdriver')
@patch('src.scraper.MarketTrendsScraper._scrape_with_requests')
def test_scrape_source_with_requests(self, mock_scrape_requests, mock_webdriver, sample_config):
"""Test scraping a source using requests."""
scraper = MarketTrendsScraper(sample_config)
mock_scrape_requests.return_value = [{"name": "Test Product", "price": 19.99}]
source = sample_config["sources"][0]
data = scraper._scrape_source(source)
assert len(data) == 1
assert data[0]["name"] == "Test Product"
mock_scrape_requests.assert_called_once_with(source)
@patch('src.scraper.webdriver')
@patch('src.scraper.MarketTrendsScraper._scrape_with_selenium')
def test_scrape_source_with_selenium(self, mock_scrape_selenium, mock_webdriver, sample_config):
"""Test scraping a source using Selenium."""
scraper = MarketTrendsScraper(sample_config)
mock_scrape_selenium.return_value = [{"name": "Test Product", "price": 19.99}]
# Configure source to use Selenium
source = sample_config["sources"][0]
source["use_selenium"] = True
data = scraper._scrape_source(source)
assert len(data) == 1
assert data[0]["name"] == "Test Product"
mock_scrape_selenium.assert_called_once_with(source)
@patch('src.scraper.webdriver')
@patch('builtins.open', new_callable=Mock)
def test_save_data_csv(self, mock_open, mock_webdriver, sample_config):
"""Test saving data to CSV file."""
scraper = MarketTrendsScraper(sample_config)
data = [
{"name": "Product 1", "price": 19.99, "source": "Test"},
{"name": "Product 2", "price": 29.99, "source": "Test"}
]
with patch.object(pd.DataFrame, 'to_csv') as mock_to_csv:
scraper.save_data(data, "test_output.csv")
mock_to_csv.assert_called_once_with("test_output.csv", index=False)
@patch('src.scraper.webdriver')
@patch('builtins.open', new_callable=Mock)
def test_save_data_json(self, mock_open, mock_webdriver, sample_config):
"""Test saving data to JSON file."""
scraper = MarketTrendsScraper(sample_config)
data = [
{"name": "Product 1", "price": 19.99, "source": "Test"},
{"name": "Product 2", "price": 29.99, "source": "Test"}
]
with patch.object(pd.DataFrame, 'to_json') as mock_to_json:
scraper.save_data(data, "test_output.json")
mock_to_json.assert_called_once()
@patch('src.scraper.webdriver')
def test_analyze_trends(self, mock_webdriver, sample_config):
"""Test trend analysis."""
scraper = MarketTrendsScraper(sample_config)
data = [
{"name": "Product 1", "price": 19.99, "rating": 4.5, "source": "Source A"},
{"name": "Product 2", "price": 29.99, "rating": 3.8, "source": "Source A"},
{"name": "Product 3", "price": 39.99, "rating": 4.2, "source": "Source B"},
{"name": "Product 4", "price": 49.99, "rating": 4.7, "source": "Source B"}
]
analysis = scraper.analyze_trends(data)
assert analysis["total_products"] == 4
assert "price_analysis" in analysis
assert "rating_analysis" in analysis
assert "sources" in analysis
assert analysis["sources"]["Source A"] == 2
assert analysis["sources"]["Source B"] == 2
assert analysis["price_analysis"]["average_price"] == 34.99
assert analysis["price_analysis"]["min_price"] == 19.99
assert analysis["price_analysis"]["max_price"] == 49.99
assert analysis["rating_analysis"]["average_rating"] == 4.3
@patch('src.scraper.webdriver')
def test_analyze_trends_empty_data(self, mock_webdriver, sample_config):
"""Test trend analysis with empty data."""
scraper = MarketTrendsScraper(sample_config)
analysis = scraper.analyze_trends([])
assert "error" in analysis
assert analysis["error"] == "No data available for analysis"
@patch('src.scraper.webdriver')
@patch('builtins.open', new_callable=Mock)
def test_save_analysis(self, mock_open, mock_webdriver, sample_config):
"""Test saving analysis results."""
scraper = MarketTrendsScraper(sample_config)
analysis = {"total_products": 4, "average_price": 34.99}
with patch('json.dump') as mock_json_dump:
scraper.save_analysis(analysis, "test_analysis.json")
mock_json_dump.assert_called_once()
@patch('src.scraper.webdriver')
def test_close(self, mock_webdriver, sample_config):
"""Test closing browser and session."""
mock_driver = Mock()
mock_webdriver.Chrome.return_value = mock_driver
scraper = MarketTrendsScraper(sample_config)
scraper.close()
mock_driver.quit.assert_called_once()
@patch('src.scraper.webdriver')
def test_context_manager(self, mock_webdriver, sample_config):
"""Test using scraper as context manager."""
mock_driver = Mock()
mock_webdriver.Chrome.return_value = mock_driver
with MarketTrendsScraper(sample_config) as scraper:
assert scraper is not None
mock_driver.quit.assert_called_once()