371 lines
14 KiB
Python
371 lines
14 KiB
Python
"""
|
|
Unit tests for the Scraper module.
|
|
"""
|
|
|
|
import pytest
|
|
import json
|
|
from unittest.mock import Mock, patch, MagicMock
|
|
from bs4 import BeautifulSoup
|
|
import pandas as pd
|
|
|
|
from src.scraper import MarketTrendsScraper
|
|
|
|
|
|
class TestMarketTrendsScraper:
|
|
"""Test cases for MarketTrendsScraper class."""
|
|
|
|
@pytest.fixture
|
|
def sample_config(self):
|
|
"""Sample configuration for testing."""
|
|
return {
|
|
"scraper": {
|
|
"delay_between_requests": 1.0,
|
|
"timeout": 30,
|
|
"max_retries": 3,
|
|
"user_agent": "Mozilla/5.0",
|
|
"headless": True,
|
|
"window_size": [1920, 1080]
|
|
},
|
|
"sources": [
|
|
{
|
|
"name": "test_source",
|
|
"url": "https://example.com/products",
|
|
"type": "ecommerce",
|
|
"enabled": True,
|
|
"selectors": {
|
|
"product": "div.product",
|
|
"name": "h2.title",
|
|
"price": "span.price",
|
|
"rating": "div.rating",
|
|
"availability": "div.stock"
|
|
},
|
|
"pagination": {
|
|
"next_page": "a.next",
|
|
"max_pages": 2
|
|
}
|
|
}
|
|
],
|
|
"output": {
|
|
"format": "csv",
|
|
"include_timestamp": True,
|
|
"filename": "market_trends_data"
|
|
}
|
|
}
|
|
|
|
@pytest.fixture
|
|
def sample_html(self):
|
|
"""Sample HTML for testing."""
|
|
return """
|
|
<html>
|
|
<body>
|
|
<div class="product">
|
|
<h2 class="title">Test Product 1</h2>
|
|
<span class="price">$19.99</span>
|
|
<div class="rating">4.5 stars</div>
|
|
<div class="stock">In Stock</div>
|
|
<a href="/product/1">View</a>
|
|
</div>
|
|
<div class="product">
|
|
<h2 class="title">Test Product 2</h2>
|
|
<span class="price">€29.99</span>
|
|
<div class="rating">3.8 stars</div>
|
|
<div class="stock">Out of Stock</div>
|
|
<a href="/product/2">View</a>
|
|
</div>
|
|
<a class="next" href="/page/2">Next</a>
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
@patch('src.scraper.webdriver')
|
|
def test_init(self, mock_webdriver, sample_config):
|
|
"""Test scraper initialization."""
|
|
mock_driver = Mock()
|
|
mock_webdriver.Chrome.return_value = mock_driver
|
|
|
|
scraper = MarketTrendsScraper(sample_config)
|
|
|
|
assert scraper.config == sample_config
|
|
assert scraper.driver == mock_driver
|
|
assert scraper.session is not None
|
|
assert scraper.data == []
|
|
|
|
# Verify browser setup
|
|
mock_webdriver.Chrome.assert_called_once()
|
|
mock_driver.set_page_load_timeout.assert_called_with(30)
|
|
|
|
@patch('src.scraper.webdriver')
|
|
def test_setup_browser(self, mock_webdriver, sample_config):
|
|
"""Test browser setup."""
|
|
mock_driver = Mock()
|
|
mock_webdriver.Chrome.return_value = mock_driver
|
|
|
|
scraper = MarketTrendsScraper(sample_config)
|
|
|
|
# Verify options were set
|
|
call_args = mock_webdriver.Chrome.call_args
|
|
options = call_args[1]['options']
|
|
|
|
assert options.arguments is not None
|
|
assert any("--headless" in arg for arg in options.arguments)
|
|
assert any("--window-size=1920,1080" in arg for arg in options.arguments)
|
|
|
|
@patch('src.scraper.webdriver')
|
|
def test_setup_session(self, mock_webdriver, sample_config):
|
|
"""Test session setup."""
|
|
scraper = MarketTrendsScraper(sample_config)
|
|
|
|
# Verify headers were set
|
|
assert "User-Agent" in scraper.session.headers
|
|
assert scraper.session.headers["User-Agent"] == sample_config["scraper"]["user_agent"]
|
|
assert "Accept" in scraper.session.headers
|
|
|
|
@patch('src.scraper.webdriver')
|
|
def test_parse_price(self, mock_webdriver, sample_config):
|
|
"""Test price parsing."""
|
|
scraper = MarketTrendsScraper(sample_config)
|
|
|
|
# Test various price formats
|
|
assert scraper._parse_price("$19.99") == 19.99
|
|
assert scraper._parse_price("€29.99") == 29.99
|
|
assert scraper._parse_price("£39.99") == 39.99
|
|
assert scraper._parse_price("19,99") == 19.99
|
|
assert scraper._parse_price("Price: $49.99 USD") == 49.99
|
|
assert scraper._parse_price("Invalid price") is None
|
|
assert scraper._parse_price("") is None
|
|
|
|
@patch('src.scraper.webdriver')
|
|
def test_parse_rating(self, mock_webdriver, sample_config):
|
|
"""Test rating parsing."""
|
|
scraper = MarketTrendsScraper(sample_config)
|
|
|
|
# Test various rating formats
|
|
assert scraper._parse_rating("4.5 stars") == 4.5
|
|
assert scraper._parse_rating("Rating: 3.8/5") == 3.8
|
|
assert scraper._parse_rating("5 stars") == 5.0
|
|
assert scraper._parse_rating("Invalid rating") is None
|
|
assert scraper._parse_rating("") is None
|
|
|
|
@patch('src.scraper.webdriver')
|
|
def test_extract_product_data(self, mock_webdriver, sample_config, sample_html):
|
|
"""Test product data extraction from HTML."""
|
|
scraper = MarketTrendsScraper(sample_config)
|
|
|
|
soup = BeautifulSoup(sample_html, 'html.parser')
|
|
product = soup.find('div', class_='product')
|
|
selectors = sample_config["sources"][0]["selectors"]
|
|
|
|
data = scraper._extract_product_data(product, selectors)
|
|
|
|
assert data is not None
|
|
assert data["name"] == "Test Product 1"
|
|
assert data["price"] == 19.99
|
|
assert data["rating"] == 4.5
|
|
assert data["availability"] == "In Stock"
|
|
assert data["url"] == "/product/1"
|
|
|
|
@patch('src.scraper.webdriver')
|
|
def test_extract_product_data_no_name(self, mock_webdriver, sample_config):
|
|
"""Test product data extraction when name is missing."""
|
|
scraper = MarketTrendsScraper(sample_config)
|
|
|
|
html = """
|
|
<div class="product">
|
|
<span class="price">$19.99</span>
|
|
</div>
|
|
"""
|
|
soup = BeautifulSoup(html, 'html.parser')
|
|
product = soup.find('div', class_='product')
|
|
selectors = sample_config["sources"][0]["selectors"]
|
|
|
|
data = scraper._extract_product_data(product, selectors)
|
|
|
|
assert data is None # Should return None when name is missing
|
|
|
|
@patch('src.scraper.webdriver')
|
|
def test_make_request_with_retry_success(self, mock_webdriver, sample_config):
|
|
"""Test successful HTTP request with retry logic."""
|
|
scraper = MarketTrendsScraper(sample_config)
|
|
|
|
mock_response = Mock()
|
|
mock_response.status_code = 200
|
|
mock_response.text = "Success"
|
|
|
|
with patch.object(scraper.session, 'get', return_value=mock_response) as mock_get:
|
|
response = scraper._make_request_with_retry("https://example.com")
|
|
|
|
assert response == mock_response
|
|
mock_get.assert_called_once()
|
|
|
|
@patch('src.scraper.webdriver')
|
|
def test_make_request_with_retry_failure(self, mock_webdriver, sample_config):
|
|
"""Test HTTP request failure with retry logic."""
|
|
scraper = MarketTrendsScraper(sample_config)
|
|
|
|
with patch.object(scraper.session, 'get', side_effect=Exception("Connection error")):
|
|
response = scraper._make_request_with_retry("https://example.com")
|
|
|
|
assert response is None
|
|
|
|
@patch('src.scraper.webdriver')
|
|
@patch('src.scraper.MarketTrendsScraper._scrape_source')
|
|
def test_scrape_market_trends(self, mock_scrape_source, mock_webdriver, sample_config):
|
|
"""Test scraping market trends from multiple sources."""
|
|
scraper = MarketTrendsScraper(sample_config)
|
|
|
|
# Mock source data
|
|
mock_scrape_source.side_effect = [
|
|
[{"name": "Product 1", "price": 19.99}],
|
|
[{"name": "Product 2", "price": 29.99}]
|
|
]
|
|
|
|
# Add second source
|
|
sample_config["sources"].append({
|
|
"name": "test_source_2",
|
|
"url": "https://example2.com/products",
|
|
"enabled": True,
|
|
"selectors": {
|
|
"product": "div.product",
|
|
"name": "h2.title",
|
|
"price": "span.price"
|
|
}
|
|
})
|
|
|
|
data = scraper.scrape_market_trends()
|
|
|
|
assert len(data) == 2
|
|
assert data[0]["name"] == "Product 1"
|
|
assert data[1]["name"] == "Product 2"
|
|
assert mock_scrape_source.call_count == 2
|
|
|
|
@patch('src.scraper.webdriver')
|
|
@patch('src.scraper.MarketTrendsScraper._scrape_with_requests')
|
|
def test_scrape_source_with_requests(self, mock_scrape_requests, mock_webdriver, sample_config):
|
|
"""Test scraping a source using requests."""
|
|
scraper = MarketTrendsScraper(sample_config)
|
|
|
|
mock_scrape_requests.return_value = [{"name": "Test Product", "price": 19.99}]
|
|
|
|
source = sample_config["sources"][0]
|
|
data = scraper._scrape_source(source)
|
|
|
|
assert len(data) == 1
|
|
assert data[0]["name"] == "Test Product"
|
|
mock_scrape_requests.assert_called_once_with(source)
|
|
|
|
@patch('src.scraper.webdriver')
|
|
@patch('src.scraper.MarketTrendsScraper._scrape_with_selenium')
|
|
def test_scrape_source_with_selenium(self, mock_scrape_selenium, mock_webdriver, sample_config):
|
|
"""Test scraping a source using Selenium."""
|
|
scraper = MarketTrendsScraper(sample_config)
|
|
|
|
mock_scrape_selenium.return_value = [{"name": "Test Product", "price": 19.99}]
|
|
|
|
# Configure source to use Selenium
|
|
source = sample_config["sources"][0]
|
|
source["use_selenium"] = True
|
|
|
|
data = scraper._scrape_source(source)
|
|
|
|
assert len(data) == 1
|
|
assert data[0]["name"] == "Test Product"
|
|
mock_scrape_selenium.assert_called_once_with(source)
|
|
|
|
@patch('src.scraper.webdriver')
|
|
@patch('builtins.open', new_callable=Mock)
|
|
def test_save_data_csv(self, mock_open, mock_webdriver, sample_config):
|
|
"""Test saving data to CSV file."""
|
|
scraper = MarketTrendsScraper(sample_config)
|
|
|
|
data = [
|
|
{"name": "Product 1", "price": 19.99, "source": "Test"},
|
|
{"name": "Product 2", "price": 29.99, "source": "Test"}
|
|
]
|
|
|
|
with patch.object(pd.DataFrame, 'to_csv') as mock_to_csv:
|
|
scraper.save_data(data, "test_output.csv")
|
|
mock_to_csv.assert_called_once_with("test_output.csv", index=False)
|
|
|
|
@patch('src.scraper.webdriver')
|
|
@patch('builtins.open', new_callable=Mock)
|
|
def test_save_data_json(self, mock_open, mock_webdriver, sample_config):
|
|
"""Test saving data to JSON file."""
|
|
scraper = MarketTrendsScraper(sample_config)
|
|
|
|
data = [
|
|
{"name": "Product 1", "price": 19.99, "source": "Test"},
|
|
{"name": "Product 2", "price": 29.99, "source": "Test"}
|
|
]
|
|
|
|
with patch.object(pd.DataFrame, 'to_json') as mock_to_json:
|
|
scraper.save_data(data, "test_output.json")
|
|
mock_to_json.assert_called_once()
|
|
|
|
@patch('src.scraper.webdriver')
|
|
def test_analyze_trends(self, mock_webdriver, sample_config):
|
|
"""Test trend analysis."""
|
|
scraper = MarketTrendsScraper(sample_config)
|
|
|
|
data = [
|
|
{"name": "Product 1", "price": 19.99, "rating": 4.5, "source": "Source A"},
|
|
{"name": "Product 2", "price": 29.99, "rating": 3.8, "source": "Source A"},
|
|
{"name": "Product 3", "price": 39.99, "rating": 4.2, "source": "Source B"},
|
|
{"name": "Product 4", "price": 49.99, "rating": 4.7, "source": "Source B"}
|
|
]
|
|
|
|
analysis = scraper.analyze_trends(data)
|
|
|
|
assert analysis["total_products"] == 4
|
|
assert "price_analysis" in analysis
|
|
assert "rating_analysis" in analysis
|
|
assert "sources" in analysis
|
|
assert analysis["sources"]["Source A"] == 2
|
|
assert analysis["sources"]["Source B"] == 2
|
|
assert analysis["price_analysis"]["average_price"] == 34.99
|
|
assert analysis["price_analysis"]["min_price"] == 19.99
|
|
assert analysis["price_analysis"]["max_price"] == 49.99
|
|
assert analysis["rating_analysis"]["average_rating"] == 4.3
|
|
|
|
@patch('src.scraper.webdriver')
|
|
def test_analyze_trends_empty_data(self, mock_webdriver, sample_config):
|
|
"""Test trend analysis with empty data."""
|
|
scraper = MarketTrendsScraper(sample_config)
|
|
|
|
analysis = scraper.analyze_trends([])
|
|
|
|
assert "error" in analysis
|
|
assert analysis["error"] == "No data available for analysis"
|
|
|
|
@patch('src.scraper.webdriver')
|
|
@patch('builtins.open', new_callable=Mock)
|
|
def test_save_analysis(self, mock_open, mock_webdriver, sample_config):
|
|
"""Test saving analysis results."""
|
|
scraper = MarketTrendsScraper(sample_config)
|
|
|
|
analysis = {"total_products": 4, "average_price": 34.99}
|
|
|
|
with patch('json.dump') as mock_json_dump:
|
|
scraper.save_analysis(analysis, "test_analysis.json")
|
|
mock_json_dump.assert_called_once()
|
|
|
|
@patch('src.scraper.webdriver')
|
|
def test_close(self, mock_webdriver, sample_config):
|
|
"""Test closing browser and session."""
|
|
mock_driver = Mock()
|
|
mock_webdriver.Chrome.return_value = mock_driver
|
|
|
|
scraper = MarketTrendsScraper(sample_config)
|
|
scraper.close()
|
|
|
|
mock_driver.quit.assert_called_once()
|
|
|
|
@patch('src.scraper.webdriver')
|
|
def test_context_manager(self, mock_webdriver, sample_config):
|
|
"""Test using scraper as context manager."""
|
|
mock_driver = Mock()
|
|
mock_webdriver.Chrome.return_value = mock_driver
|
|
|
|
with MarketTrendsScraper(sample_config) as scraper:
|
|
assert scraper is not None
|
|
|
|
mock_driver.quit.assert_called_once() |