Initial commit: Market Trends Scraper
This commit is contained in:
355
tests/test_integration.py
Normal file
355
tests/test_integration.py
Normal file
@@ -0,0 +1,355 @@
|
||||
"""
|
||||
Integration tests for the Market Trends Scraper application.
|
||||
|
||||
These tests verify that all components work together correctly.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import tempfile
|
||||
import os
|
||||
import json
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch, Mock
|
||||
|
||||
from src.config_manager import ConfigManager
|
||||
from src.scraper import MarketTrendsScraper
|
||||
from src.logger import setup_logger
|
||||
import main
|
||||
|
||||
|
||||
class TestIntegration:
|
||||
"""Integration test cases for the entire application."""
|
||||
|
||||
@pytest.fixture
|
||||
def temp_dir(self):
|
||||
"""Create a temporary directory for test files."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
yield temp_dir
|
||||
|
||||
@pytest.fixture
|
||||
def sample_config_file(self, temp_dir):
|
||||
"""Create a sample configuration file for testing."""
|
||||
config_path = Path(temp_dir) / "config.yaml"
|
||||
config_content = {
|
||||
"scraper": {
|
||||
"delay_between_requests": 0.1, # Faster for testing
|
||||
"timeout": 10,
|
||||
"max_retries": 2,
|
||||
"user_agent": "Mozilla/5.0 (Test)",
|
||||
"headless": True,
|
||||
"window_size": [1024, 768]
|
||||
},
|
||||
"sources": [
|
||||
{
|
||||
"name": "test_source",
|
||||
"url": "https://httpbin.org/html",
|
||||
"type": "ecommerce",
|
||||
"enabled": True,
|
||||
"selectors": {
|
||||
"product": "p",
|
||||
"name": "p",
|
||||
"price": "p",
|
||||
"rating": "p",
|
||||
"availability": "p"
|
||||
},
|
||||
"pagination": {
|
||||
"next_page": "a",
|
||||
"max_pages": 1
|
||||
}
|
||||
}
|
||||
],
|
||||
"output": {
|
||||
"format": "csv",
|
||||
"include_timestamp": True,
|
||||
"filename": "test_output"
|
||||
},
|
||||
"database": {
|
||||
"url": f"sqlite:///{temp_dir}/test.db",
|
||||
"echo": False
|
||||
},
|
||||
"analysis": {
|
||||
"price_history_days": 30,
|
||||
"trend_threshold": 0.05,
|
||||
"generate_charts": True
|
||||
}
|
||||
}
|
||||
|
||||
import yaml
|
||||
with open(config_path, 'w') as f:
|
||||
yaml.dump(config_content, f)
|
||||
|
||||
return str(config_path)
|
||||
|
||||
@patch('src.scraper.webdriver')
|
||||
def test_full_workflow(self, mock_webdriver, sample_config_file, temp_dir):
|
||||
"""Test the complete workflow from config loading to analysis."""
|
||||
# Setup mock driver
|
||||
mock_driver = Mock()
|
||||
mock_webdriver.Chrome.return_value = mock_driver
|
||||
|
||||
# Setup logger
|
||||
setup_logger()
|
||||
|
||||
# Load configuration
|
||||
config_manager = ConfigManager(sample_config_file)
|
||||
config = config_manager.load_config()
|
||||
|
||||
# Verify config was loaded
|
||||
assert config["scraper"]["delay_between_requests"] == 0.1
|
||||
assert len(config["sources"]) == 1
|
||||
assert config["sources"][0]["name"] == "test_source"
|
||||
|
||||
# Initialize scraper
|
||||
scraper = MarketTrendsScraper(config, headless=True)
|
||||
|
||||
# Mock the scraping process to return sample data
|
||||
sample_data = [
|
||||
{
|
||||
"name": "Test Product 1",
|
||||
"price": 19.99,
|
||||
"rating": 4.5,
|
||||
"availability": "In Stock",
|
||||
"source": "test_source",
|
||||
"scraped_at": "2023-01-01T00:00:00"
|
||||
},
|
||||
{
|
||||
"name": "Test Product 2",
|
||||
"price": 29.99,
|
||||
"rating": 3.8,
|
||||
"availability": "Out of Stock",
|
||||
"source": "test_source",
|
||||
"scraped_at": "2023-01-01T00:00:00"
|
||||
}
|
||||
]
|
||||
|
||||
with patch.object(scraper, '_scrape_source', return_value=sample_data):
|
||||
# Scrape data
|
||||
data = scraper.scrape_market_trends()
|
||||
|
||||
# Verify data was scraped
|
||||
assert len(data) == 2
|
||||
assert data[0]["name"] == "Test Product 1"
|
||||
assert data[1]["price"] == 29.99
|
||||
|
||||
# Save data
|
||||
output_path = Path(temp_dir) / "test_output.csv"
|
||||
scraper.save_data(data, str(output_path))
|
||||
|
||||
# Verify file was created
|
||||
assert output_path.exists()
|
||||
|
||||
# Analyze trends
|
||||
analysis = scraper.analyze_trends(data)
|
||||
|
||||
# Verify analysis
|
||||
assert analysis["total_products"] == 2
|
||||
assert "price_analysis" in analysis
|
||||
assert analysis["price_analysis"]["average_price"] == 24.99
|
||||
assert analysis["price_analysis"]["min_price"] == 19.99
|
||||
assert analysis["price_analysis"]["max_price"] == 29.99
|
||||
|
||||
# Save analysis
|
||||
analysis_path = Path(temp_dir) / "test_analysis.json"
|
||||
scraper.save_analysis(analysis, str(analysis_path))
|
||||
|
||||
# Verify analysis file was created
|
||||
assert analysis_path.exists()
|
||||
|
||||
# Verify analysis content
|
||||
with open(analysis_path, 'r') as f:
|
||||
saved_analysis = json.load(f)
|
||||
|
||||
assert saved_analysis["total_products"] == 2
|
||||
assert saved_analysis["price_analysis"]["average_price"] == 24.99
|
||||
|
||||
@patch('src.scraper.webdriver')
|
||||
def test_multiple_sources(self, mock_webdriver, temp_dir):
|
||||
"""Test scraping from multiple sources."""
|
||||
# Setup mock driver
|
||||
mock_driver = Mock()
|
||||
mock_webdriver.Chrome.return_value = mock_driver
|
||||
|
||||
# Create config with multiple sources
|
||||
config_content = {
|
||||
"scraper": {
|
||||
"delay_between_requests": 0.1,
|
||||
"timeout": 10,
|
||||
"headless": True
|
||||
},
|
||||
"sources": [
|
||||
{
|
||||
"name": "source_1",
|
||||
"url": "https://example1.com",
|
||||
"enabled": True,
|
||||
"selectors": {
|
||||
"product": "div.product",
|
||||
"name": "h2",
|
||||
"price": "span.price"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "source_2",
|
||||
"url": "https://example2.com",
|
||||
"enabled": True,
|
||||
"selectors": {
|
||||
"product": "div.item",
|
||||
"name": "h3",
|
||||
"price": "div.cost"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "source_3",
|
||||
"url": "https://example3.com",
|
||||
"enabled": False, # Disabled source
|
||||
"selectors": {
|
||||
"product": "div.product",
|
||||
"name": "h2",
|
||||
"price": "span.price"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
import yaml
|
||||
config_path = Path(temp_dir) / "multi_source_config.yaml"
|
||||
with open(config_path, 'w') as f:
|
||||
yaml.dump(config_content, f)
|
||||
|
||||
# Initialize scraper
|
||||
scraper = MarketTrendsScraper(config_content, headless=True)
|
||||
|
||||
# Mock different data for each source
|
||||
def mock_scrape_source(source):
|
||||
if source["name"] == "source_1":
|
||||
return [{"name": "Product 1", "price": 10.00, "source": "source_1"}]
|
||||
elif source["name"] == "source_2":
|
||||
return [{"name": "Product 2", "price": 20.00, "source": "source_2"}]
|
||||
else:
|
||||
return []
|
||||
|
||||
with patch.object(scraper, '_scrape_source', side_effect=mock_scrape_source):
|
||||
data = scraper.scrape_market_trends()
|
||||
|
||||
# Verify data from both enabled sources
|
||||
assert len(data) == 2
|
||||
sources = {item["source"] for item in data}
|
||||
assert "source_1" in sources
|
||||
assert "source_2" in sources
|
||||
assert "source_3" not in sources # Disabled source should not appear
|
||||
|
||||
@patch('src.scraper.webdriver')
|
||||
def test_error_handling(self, mock_webdriver, temp_dir):
|
||||
"""Test error handling when scraping fails."""
|
||||
# Setup mock driver
|
||||
mock_driver = Mock()
|
||||
mock_webdriver.Chrome.return_value = mock_driver
|
||||
|
||||
config = {
|
||||
"scraper": {
|
||||
"delay_between_requests": 0.1,
|
||||
"timeout": 10,
|
||||
"headless": True
|
||||
},
|
||||
"sources": [
|
||||
{
|
||||
"name": "working_source",
|
||||
"url": "https://example.com",
|
||||
"enabled": True,
|
||||
"selectors": {
|
||||
"product": "div.product",
|
||||
"name": "h2",
|
||||
"price": "span.price"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "failing_source",
|
||||
"url": "https://example.com",
|
||||
"enabled": True,
|
||||
"selectors": {
|
||||
"product": "div.product",
|
||||
"name": "h2",
|
||||
"price": "span.price"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
# Initialize scraper
|
||||
scraper = MarketTrendsScraper(config, headless=True)
|
||||
|
||||
# Mock one source to succeed and one to fail
|
||||
def mock_scrape_source(source):
|
||||
if source["name"] == "working_source":
|
||||
return [{"name": "Working Product", "price": 15.00, "source": "working_source"}]
|
||||
else:
|
||||
raise Exception("Scraping failed")
|
||||
|
||||
with patch.object(scraper, '_scrape_source', side_effect=mock_scrape_source):
|
||||
data = scraper.scrape_market_trends()
|
||||
|
||||
# Should still get data from working source
|
||||
assert len(data) == 1
|
||||
assert data[0]["source"] == "working_source"
|
||||
|
||||
@patch('sys.argv', ['main.py', '--config', 'test_config.yaml', '--output', 'test_output.csv'])
|
||||
@patch('src.scraper.webdriver')
|
||||
def test_main_entry_point(self, mock_webdriver, temp_dir):
|
||||
"""Test the main entry point of the application."""
|
||||
# Setup mock driver
|
||||
mock_driver = Mock()
|
||||
mock_webdriver.Chrome.return_value = mock_driver
|
||||
|
||||
# Create test config
|
||||
config_path = Path(temp_dir) / "test_config.yaml"
|
||||
config_content = {
|
||||
"scraper": {
|
||||
"delay_between_requests": 0.1,
|
||||
"timeout": 10,
|
||||
"headless": True
|
||||
},
|
||||
"sources": [
|
||||
{
|
||||
"name": "test_source",
|
||||
"url": "https://example.com",
|
||||
"enabled": True,
|
||||
"selectors": {
|
||||
"product": "div.product",
|
||||
"name": "h2",
|
||||
"price": "span.price"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
import yaml
|
||||
with open(config_path, 'w') as f:
|
||||
yaml.dump(config_content, f)
|
||||
|
||||
# Mock the scraper to return sample data
|
||||
sample_data = [{"name": "Test Product", "price": 19.99, "source": "test_source"}]
|
||||
|
||||
with patch('main.ConfigManager') as mock_config_manager, \
|
||||
patch('main.MarketTrendsScraper') as mock_scraper_class:
|
||||
|
||||
# Setup mocks
|
||||
mock_config_instance = Mock()
|
||||
mock_config_manager.return_value = mock_config_instance
|
||||
mock_config_instance.load_config.return_value = config_content
|
||||
|
||||
mock_scraper_instance = Mock()
|
||||
mock_scraper_class.return_value = mock_scraper_instance
|
||||
mock_scraper_instance.scrape_market_trends.return_value = sample_data
|
||||
mock_scraper_instance.analyze_trends.return_value = {"total_products": 1}
|
||||
|
||||
# Run main function
|
||||
with patch('sys.argv', ['main.py', '--config', str(config_path), '--output', str(temp_dir / 'output.csv')]):
|
||||
result = main.main()
|
||||
|
||||
# Verify main completed successfully
|
||||
assert result == 0
|
||||
|
||||
# Verify scraper was called
|
||||
mock_scraper_instance.scrape_market_trends.assert_called_once()
|
||||
mock_scraper_instance.save_data.assert_called_once()
|
||||
mock_scraper_instance.analyze_trends.assert_called_once()
|
||||
mock_scraper_instance.save_analysis.assert_called_once()
|
Reference in New Issue
Block a user