""" Unit tests for the Scraper module. """ import pytest import json from unittest.mock import Mock, patch, MagicMock from bs4 import BeautifulSoup import pandas as pd from src.scraper import MarketTrendsScraper class TestMarketTrendsScraper: """Test cases for MarketTrendsScraper class.""" @pytest.fixture def sample_config(self): """Sample configuration for testing.""" return { "scraper": { "delay_between_requests": 1.0, "timeout": 30, "max_retries": 3, "user_agent": "Mozilla/5.0", "headless": True, "window_size": [1920, 1080] }, "sources": [ { "name": "test_source", "url": "https://example.com/products", "type": "ecommerce", "enabled": True, "selectors": { "product": "div.product", "name": "h2.title", "price": "span.price", "rating": "div.rating", "availability": "div.stock" }, "pagination": { "next_page": "a.next", "max_pages": 2 } } ], "output": { "format": "csv", "include_timestamp": True, "filename": "market_trends_data" } } @pytest.fixture def sample_html(self): """Sample HTML for testing.""" return """

Test Product 1

$19.99
4.5 stars
In Stock
View

Test Product 2

€29.99
3.8 stars
Out of Stock
View
""" @patch('src.scraper.webdriver') def test_init(self, mock_webdriver, sample_config): """Test scraper initialization.""" mock_driver = Mock() mock_webdriver.Chrome.return_value = mock_driver scraper = MarketTrendsScraper(sample_config) assert scraper.config == sample_config assert scraper.driver == mock_driver assert scraper.session is not None assert scraper.data == [] # Verify browser setup mock_webdriver.Chrome.assert_called_once() mock_driver.set_page_load_timeout.assert_called_with(30) @patch('src.scraper.webdriver') def test_setup_browser(self, mock_webdriver, sample_config): """Test browser setup.""" mock_driver = Mock() mock_webdriver.Chrome.return_value = mock_driver scraper = MarketTrendsScraper(sample_config) # Verify options were set call_args = mock_webdriver.Chrome.call_args options = call_args[1]['options'] assert options.arguments is not None assert any("--headless" in arg for arg in options.arguments) assert any("--window-size=1920,1080" in arg for arg in options.arguments) @patch('src.scraper.webdriver') def test_setup_session(self, mock_webdriver, sample_config): """Test session setup.""" scraper = MarketTrendsScraper(sample_config) # Verify headers were set assert "User-Agent" in scraper.session.headers assert scraper.session.headers["User-Agent"] == sample_config["scraper"]["user_agent"] assert "Accept" in scraper.session.headers @patch('src.scraper.webdriver') def test_parse_price(self, mock_webdriver, sample_config): """Test price parsing.""" scraper = MarketTrendsScraper(sample_config) # Test various price formats assert scraper._parse_price("$19.99") == 19.99 assert scraper._parse_price("€29.99") == 29.99 assert scraper._parse_price("£39.99") == 39.99 assert scraper._parse_price("19,99") == 19.99 assert scraper._parse_price("Price: $49.99 USD") == 49.99 assert scraper._parse_price("Invalid price") is None assert scraper._parse_price("") is None @patch('src.scraper.webdriver') def test_parse_rating(self, mock_webdriver, sample_config): """Test rating parsing.""" scraper = MarketTrendsScraper(sample_config) # Test various rating formats assert scraper._parse_rating("4.5 stars") == 4.5 assert scraper._parse_rating("Rating: 3.8/5") == 3.8 assert scraper._parse_rating("5 stars") == 5.0 assert scraper._parse_rating("Invalid rating") is None assert scraper._parse_rating("") is None @patch('src.scraper.webdriver') def test_extract_product_data(self, mock_webdriver, sample_config, sample_html): """Test product data extraction from HTML.""" scraper = MarketTrendsScraper(sample_config) soup = BeautifulSoup(sample_html, 'html.parser') product = soup.find('div', class_='product') selectors = sample_config["sources"][0]["selectors"] data = scraper._extract_product_data(product, selectors) assert data is not None assert data["name"] == "Test Product 1" assert data["price"] == 19.99 assert data["rating"] == 4.5 assert data["availability"] == "In Stock" assert data["url"] == "/product/1" @patch('src.scraper.webdriver') def test_extract_product_data_no_name(self, mock_webdriver, sample_config): """Test product data extraction when name is missing.""" scraper = MarketTrendsScraper(sample_config) html = """
$19.99
""" soup = BeautifulSoup(html, 'html.parser') product = soup.find('div', class_='product') selectors = sample_config["sources"][0]["selectors"] data = scraper._extract_product_data(product, selectors) assert data is None # Should return None when name is missing @patch('src.scraper.webdriver') def test_make_request_with_retry_success(self, mock_webdriver, sample_config): """Test successful HTTP request with retry logic.""" scraper = MarketTrendsScraper(sample_config) mock_response = Mock() mock_response.status_code = 200 mock_response.text = "Success" with patch.object(scraper.session, 'get', return_value=mock_response) as mock_get: response = scraper._make_request_with_retry("https://example.com") assert response == mock_response mock_get.assert_called_once() @patch('src.scraper.webdriver') def test_make_request_with_retry_failure(self, mock_webdriver, sample_config): """Test HTTP request failure with retry logic.""" scraper = MarketTrendsScraper(sample_config) with patch.object(scraper.session, 'get', side_effect=Exception("Connection error")): response = scraper._make_request_with_retry("https://example.com") assert response is None @patch('src.scraper.webdriver') @patch('src.scraper.MarketTrendsScraper._scrape_source') def test_scrape_market_trends(self, mock_scrape_source, mock_webdriver, sample_config): """Test scraping market trends from multiple sources.""" scraper = MarketTrendsScraper(sample_config) # Mock source data mock_scrape_source.side_effect = [ [{"name": "Product 1", "price": 19.99}], [{"name": "Product 2", "price": 29.99}] ] # Add second source sample_config["sources"].append({ "name": "test_source_2", "url": "https://example2.com/products", "enabled": True, "selectors": { "product": "div.product", "name": "h2.title", "price": "span.price" } }) data = scraper.scrape_market_trends() assert len(data) == 2 assert data[0]["name"] == "Product 1" assert data[1]["name"] == "Product 2" assert mock_scrape_source.call_count == 2 @patch('src.scraper.webdriver') @patch('src.scraper.MarketTrendsScraper._scrape_with_requests') def test_scrape_source_with_requests(self, mock_scrape_requests, mock_webdriver, sample_config): """Test scraping a source using requests.""" scraper = MarketTrendsScraper(sample_config) mock_scrape_requests.return_value = [{"name": "Test Product", "price": 19.99}] source = sample_config["sources"][0] data = scraper._scrape_source(source) assert len(data) == 1 assert data[0]["name"] == "Test Product" mock_scrape_requests.assert_called_once_with(source) @patch('src.scraper.webdriver') @patch('src.scraper.MarketTrendsScraper._scrape_with_selenium') def test_scrape_source_with_selenium(self, mock_scrape_selenium, mock_webdriver, sample_config): """Test scraping a source using Selenium.""" scraper = MarketTrendsScraper(sample_config) mock_scrape_selenium.return_value = [{"name": "Test Product", "price": 19.99}] # Configure source to use Selenium source = sample_config["sources"][0] source["use_selenium"] = True data = scraper._scrape_source(source) assert len(data) == 1 assert data[0]["name"] == "Test Product" mock_scrape_selenium.assert_called_once_with(source) @patch('src.scraper.webdriver') @patch('builtins.open', new_callable=Mock) def test_save_data_csv(self, mock_open, mock_webdriver, sample_config): """Test saving data to CSV file.""" scraper = MarketTrendsScraper(sample_config) data = [ {"name": "Product 1", "price": 19.99, "source": "Test"}, {"name": "Product 2", "price": 29.99, "source": "Test"} ] with patch.object(pd.DataFrame, 'to_csv') as mock_to_csv: scraper.save_data(data, "test_output.csv") mock_to_csv.assert_called_once_with("test_output.csv", index=False) @patch('src.scraper.webdriver') @patch('builtins.open', new_callable=Mock) def test_save_data_json(self, mock_open, mock_webdriver, sample_config): """Test saving data to JSON file.""" scraper = MarketTrendsScraper(sample_config) data = [ {"name": "Product 1", "price": 19.99, "source": "Test"}, {"name": "Product 2", "price": 29.99, "source": "Test"} ] with patch.object(pd.DataFrame, 'to_json') as mock_to_json: scraper.save_data(data, "test_output.json") mock_to_json.assert_called_once() @patch('src.scraper.webdriver') def test_analyze_trends(self, mock_webdriver, sample_config): """Test trend analysis.""" scraper = MarketTrendsScraper(sample_config) data = [ {"name": "Product 1", "price": 19.99, "rating": 4.5, "source": "Source A"}, {"name": "Product 2", "price": 29.99, "rating": 3.8, "source": "Source A"}, {"name": "Product 3", "price": 39.99, "rating": 4.2, "source": "Source B"}, {"name": "Product 4", "price": 49.99, "rating": 4.7, "source": "Source B"} ] analysis = scraper.analyze_trends(data) assert analysis["total_products"] == 4 assert "price_analysis" in analysis assert "rating_analysis" in analysis assert "sources" in analysis assert analysis["sources"]["Source A"] == 2 assert analysis["sources"]["Source B"] == 2 assert analysis["price_analysis"]["average_price"] == 34.99 assert analysis["price_analysis"]["min_price"] == 19.99 assert analysis["price_analysis"]["max_price"] == 49.99 assert analysis["rating_analysis"]["average_rating"] == 4.3 @patch('src.scraper.webdriver') def test_analyze_trends_empty_data(self, mock_webdriver, sample_config): """Test trend analysis with empty data.""" scraper = MarketTrendsScraper(sample_config) analysis = scraper.analyze_trends([]) assert "error" in analysis assert analysis["error"] == "No data available for analysis" @patch('src.scraper.webdriver') @patch('builtins.open', new_callable=Mock) def test_save_analysis(self, mock_open, mock_webdriver, sample_config): """Test saving analysis results.""" scraper = MarketTrendsScraper(sample_config) analysis = {"total_products": 4, "average_price": 34.99} with patch('json.dump') as mock_json_dump: scraper.save_analysis(analysis, "test_analysis.json") mock_json_dump.assert_called_once() @patch('src.scraper.webdriver') def test_close(self, mock_webdriver, sample_config): """Test closing browser and session.""" mock_driver = Mock() mock_webdriver.Chrome.return_value = mock_driver scraper = MarketTrendsScraper(sample_config) scraper.close() mock_driver.quit.assert_called_once() @patch('src.scraper.webdriver') def test_context_manager(self, mock_webdriver, sample_config): """Test using scraper as context manager.""" mock_driver = Mock() mock_webdriver.Chrome.return_value = mock_driver with MarketTrendsScraper(sample_config) as scraper: assert scraper is not None mock_driver.quit.assert_called_once()