""" Integration tests for the Market Trends Scraper application. These tests verify that all components work together correctly. """ import pytest import tempfile import os import json from pathlib import Path from unittest.mock import patch, Mock from src.config_manager import ConfigManager from src.scraper import MarketTrendsScraper from src.logger import setup_logger import main class TestIntegration: """Integration test cases for the entire application.""" @pytest.fixture def temp_dir(self): """Create a temporary directory for test files.""" with tempfile.TemporaryDirectory() as temp_dir: yield temp_dir @pytest.fixture def sample_config_file(self, temp_dir): """Create a sample configuration file for testing.""" config_path = Path(temp_dir) / "config.yaml" config_content = { "scraper": { "delay_between_requests": 0.1, # Faster for testing "timeout": 10, "max_retries": 2, "user_agent": "Mozilla/5.0 (Test)", "headless": True, "window_size": [1024, 768] }, "sources": [ { "name": "test_source", "url": "https://httpbin.org/html", "type": "ecommerce", "enabled": True, "selectors": { "product": "p", "name": "p", "price": "p", "rating": "p", "availability": "p" }, "pagination": { "next_page": "a", "max_pages": 1 } } ], "output": { "format": "csv", "include_timestamp": True, "filename": "test_output" }, "database": { "url": f"sqlite:///{temp_dir}/test.db", "echo": False }, "analysis": { "price_history_days": 30, "trend_threshold": 0.05, "generate_charts": True } } import yaml with open(config_path, 'w') as f: yaml.dump(config_content, f) return str(config_path) @patch('src.scraper.webdriver') def test_full_workflow(self, mock_webdriver, sample_config_file, temp_dir): """Test the complete workflow from config loading to analysis.""" # Setup mock driver mock_driver = Mock() mock_webdriver.Chrome.return_value = mock_driver # Setup logger setup_logger() # Load configuration config_manager = ConfigManager(sample_config_file) config = config_manager.load_config() # Verify config was loaded assert config["scraper"]["delay_between_requests"] == 0.1 assert len(config["sources"]) == 1 assert config["sources"][0]["name"] == "test_source" # Initialize scraper scraper = MarketTrendsScraper(config, headless=True) # Mock the scraping process to return sample data sample_data = [ { "name": "Test Product 1", "price": 19.99, "rating": 4.5, "availability": "In Stock", "source": "test_source", "scraped_at": "2023-01-01T00:00:00" }, { "name": "Test Product 2", "price": 29.99, "rating": 3.8, "availability": "Out of Stock", "source": "test_source", "scraped_at": "2023-01-01T00:00:00" } ] with patch.object(scraper, '_scrape_source', return_value=sample_data): # Scrape data data = scraper.scrape_market_trends() # Verify data was scraped assert len(data) == 2 assert data[0]["name"] == "Test Product 1" assert data[1]["price"] == 29.99 # Save data output_path = Path(temp_dir) / "test_output.csv" scraper.save_data(data, str(output_path)) # Verify file was created assert output_path.exists() # Analyze trends analysis = scraper.analyze_trends(data) # Verify analysis assert analysis["total_products"] == 2 assert "price_analysis" in analysis assert analysis["price_analysis"]["average_price"] == 24.99 assert analysis["price_analysis"]["min_price"] == 19.99 assert analysis["price_analysis"]["max_price"] == 29.99 # Save analysis analysis_path = Path(temp_dir) / "test_analysis.json" scraper.save_analysis(analysis, str(analysis_path)) # Verify analysis file was created assert analysis_path.exists() # Verify analysis content with open(analysis_path, 'r') as f: saved_analysis = json.load(f) assert saved_analysis["total_products"] == 2 assert saved_analysis["price_analysis"]["average_price"] == 24.99 @patch('src.scraper.webdriver') def test_multiple_sources(self, mock_webdriver, temp_dir): """Test scraping from multiple sources.""" # Setup mock driver mock_driver = Mock() mock_webdriver.Chrome.return_value = mock_driver # Create config with multiple sources config_content = { "scraper": { "delay_between_requests": 0.1, "timeout": 10, "headless": True }, "sources": [ { "name": "source_1", "url": "https://example1.com", "enabled": True, "selectors": { "product": "div.product", "name": "h2", "price": "span.price" } }, { "name": "source_2", "url": "https://example2.com", "enabled": True, "selectors": { "product": "div.item", "name": "h3", "price": "div.cost" } }, { "name": "source_3", "url": "https://example3.com", "enabled": False, # Disabled source "selectors": { "product": "div.product", "name": "h2", "price": "span.price" } } ] } import yaml config_path = Path(temp_dir) / "multi_source_config.yaml" with open(config_path, 'w') as f: yaml.dump(config_content, f) # Initialize scraper scraper = MarketTrendsScraper(config_content, headless=True) # Mock different data for each source def mock_scrape_source(source): if source["name"] == "source_1": return [{"name": "Product 1", "price": 10.00, "source": "source_1"}] elif source["name"] == "source_2": return [{"name": "Product 2", "price": 20.00, "source": "source_2"}] else: return [] with patch.object(scraper, '_scrape_source', side_effect=mock_scrape_source): data = scraper.scrape_market_trends() # Verify data from both enabled sources assert len(data) == 2 sources = {item["source"] for item in data} assert "source_1" in sources assert "source_2" in sources assert "source_3" not in sources # Disabled source should not appear @patch('src.scraper.webdriver') def test_error_handling(self, mock_webdriver, temp_dir): """Test error handling when scraping fails.""" # Setup mock driver mock_driver = Mock() mock_webdriver.Chrome.return_value = mock_driver config = { "scraper": { "delay_between_requests": 0.1, "timeout": 10, "headless": True }, "sources": [ { "name": "working_source", "url": "https://example.com", "enabled": True, "selectors": { "product": "div.product", "name": "h2", "price": "span.price" } }, { "name": "failing_source", "url": "https://example.com", "enabled": True, "selectors": { "product": "div.product", "name": "h2", "price": "span.price" } } ] } # Initialize scraper scraper = MarketTrendsScraper(config, headless=True) # Mock one source to succeed and one to fail def mock_scrape_source(source): if source["name"] == "working_source": return [{"name": "Working Product", "price": 15.00, "source": "working_source"}] else: raise Exception("Scraping failed") with patch.object(scraper, '_scrape_source', side_effect=mock_scrape_source): data = scraper.scrape_market_trends() # Should still get data from working source assert len(data) == 1 assert data[0]["source"] == "working_source" @patch('sys.argv', ['main.py', '--config', 'test_config.yaml', '--output', 'test_output.csv']) @patch('src.scraper.webdriver') def test_main_entry_point(self, mock_webdriver, temp_dir): """Test the main entry point of the application.""" # Setup mock driver mock_driver = Mock() mock_webdriver.Chrome.return_value = mock_driver # Create test config config_path = Path(temp_dir) / "test_config.yaml" config_content = { "scraper": { "delay_between_requests": 0.1, "timeout": 10, "headless": True }, "sources": [ { "name": "test_source", "url": "https://example.com", "enabled": True, "selectors": { "product": "div.product", "name": "h2", "price": "span.price" } } ] } import yaml with open(config_path, 'w') as f: yaml.dump(config_content, f) # Mock the scraper to return sample data sample_data = [{"name": "Test Product", "price": 19.99, "source": "test_source"}] with patch('main.ConfigManager') as mock_config_manager, \ patch('main.MarketTrendsScraper') as mock_scraper_class: # Setup mocks mock_config_instance = Mock() mock_config_manager.return_value = mock_config_instance mock_config_instance.load_config.return_value = config_content mock_scraper_instance = Mock() mock_scraper_class.return_value = mock_scraper_instance mock_scraper_instance.scrape_market_trends.return_value = sample_data mock_scraper_instance.analyze_trends.return_value = {"total_products": 1} # Run main function with patch('sys.argv', ['main.py', '--config', str(config_path), '--output', str(temp_dir / 'output.csv')]): result = main.main() # Verify main completed successfully assert result == 0 # Verify scraper was called mock_scraper_instance.scrape_market_trends.assert_called_once() mock_scraper_instance.save_data.assert_called_once() mock_scraper_instance.analyze_trends.assert_called_once() mock_scraper_instance.save_analysis.assert_called_once()