Initial commit: Market Trends Scraper

This commit is contained in:
Dev
2025-09-11 17:46:14 +03:00
commit 4ddcde68d4
17 changed files with 3049 additions and 0 deletions

0
tests/__init__.py Normal file
View File

View File

@@ -0,0 +1,165 @@
"""
Unit tests for the ConfigManager module.
"""
import pytest
import tempfile
import os
import yaml
from pathlib import Path
from unittest.mock import patch, mock_open
from src.config_manager import ConfigManager
class TestConfigManager:
"""Test cases for ConfigManager class."""
def test_init_with_default_path(self):
"""Test ConfigManager initialization with default path."""
config_manager = ConfigManager()
assert config_manager.config_path == Path("config/config.yaml")
assert isinstance(config_manager.config, dict)
assert isinstance(config_manager.default_config, dict)
def test_init_with_custom_path(self):
"""Test ConfigManager initialization with custom path."""
custom_path = "custom/config.yaml"
config_manager = ConfigManager(custom_path)
assert config_manager.config_path == Path(custom_path)
def test_get_default_config(self):
"""Test default configuration structure."""
config_manager = ConfigManager()
default_config = config_manager._get_default_config()
# Check required sections
assert "scraper" in default_config
assert "sources" in default_config
assert "output" in default_config
assert "database" in default_config
assert "analysis" in default_config
# Check some default values
assert default_config["scraper"]["delay_between_requests"] == 1.0
assert default_config["scraper"]["timeout"] == 30
assert default_config["scraper"]["headless"] is True
assert isinstance(default_config["sources"], list)
assert len(default_config["sources"]) > 0
@patch('builtins.open', new_callable=mock_open, read_data="scraper:\n timeout: 60")
@patch('pathlib.Path.exists')
def test_load_config_existing_file(self, mock_exists, mock_file):
"""Test loading configuration from existing file."""
mock_exists.return_value = True
config_manager = ConfigManager()
config = config_manager.load_config()
mock_file.assert_called_once()
assert config["scraper"]["timeout"] == 60
@patch('builtins.open', new_callable=mock_open)
@patch('pathlib.Path.exists')
def test_load_config_create_default(self, mock_exists, mock_file):
"""Test creating default configuration when file doesn't exist."""
mock_exists.return_value = False
config_manager = ConfigManager()
config = config_manager.load_config()
# Verify file was created
mock_file.assert_called_once()
# Verify config is default
assert config == config_manager.default_config
@patch('builtins.open', new_callable=mock_open)
def test_save_config(self, mock_file):
"""Test saving configuration to file."""
config_manager = ConfigManager()
config_manager.config = {"test": "value"}
config_manager.save_config()
mock_file.assert_called_once()
# Verify yaml.dump was called with correct arguments
with patch('yaml.dump') as mock_dump:
config_manager.save_config()
mock_dump.assert_called_once()
def test_validate_and_merge_config(self):
"""Test configuration validation and merging."""
config_manager = ConfigManager()
# Test with partial config
partial_config = {
"scraper": {
"timeout": 60
}
}
config_manager.config = partial_config
merged = config_manager._validate_and_merge_config()
# Should have all sections
assert "sources" in merged
assert "output" in merged
# Should have updated value
assert merged["scraper"]["timeout"] == 60
# Should have default values for missing keys
assert merged["scraper"]["delay_between_requests"] == 1.0
def test_validate_and_merge_config_missing_required(self):
"""Test validation fails when required sections are missing."""
config_manager = ConfigManager()
config_manager.config = {"invalid": "config"}
with pytest.raises(ValueError, match="Missing required configuration section"):
config_manager._validate_and_merge_config()
def test_validate_and_merge_config_no_sources(self):
"""Test validation fails when no sources are configured."""
config_manager = ConfigManager()
config_manager.config = {
"scraper": {},
"sources": [],
"output": {}
}
with pytest.raises(ValueError, match="At least one data source must be configured"):
config_manager._validate_and_merge_config()
def test_get_with_dot_notation(self):
"""Test getting configuration values with dot notation."""
config_manager = ConfigManager()
config_manager.config = {
"scraper": {
"timeout": 60,
"nested": {
"value": "test"
}
}
}
assert config_manager.get("scraper.timeout") == 60
assert config_manager.get("scraper.nested.value") == "test"
assert config_manager.get("nonexistent", "default") == "default"
def test_set_with_dot_notation(self):
"""Test setting configuration values with dot notation."""
config_manager = ConfigManager()
config_manager.config = {"scraper": {}}
config_manager.set("scraper.timeout", 60)
config_manager.set("new.nested.value", "test")
assert config_manager.config["scraper"]["timeout"] == 60
assert config_manager.config["new"]["nested"]["value"] == "test"
@patch.object(ConfigManager, 'load_config')
def test_reload(self, mock_load):
"""Test reloading configuration."""
config_manager = ConfigManager()
config_manager.reload()
mock_load.assert_called_once()

355
tests/test_integration.py Normal file
View File

@@ -0,0 +1,355 @@
"""
Integration tests for the Market Trends Scraper application.
These tests verify that all components work together correctly.
"""
import pytest
import tempfile
import os
import json
from pathlib import Path
from unittest.mock import patch, Mock
from src.config_manager import ConfigManager
from src.scraper import MarketTrendsScraper
from src.logger import setup_logger
import main
class TestIntegration:
"""Integration test cases for the entire application."""
@pytest.fixture
def temp_dir(self):
"""Create a temporary directory for test files."""
with tempfile.TemporaryDirectory() as temp_dir:
yield temp_dir
@pytest.fixture
def sample_config_file(self, temp_dir):
"""Create a sample configuration file for testing."""
config_path = Path(temp_dir) / "config.yaml"
config_content = {
"scraper": {
"delay_between_requests": 0.1, # Faster for testing
"timeout": 10,
"max_retries": 2,
"user_agent": "Mozilla/5.0 (Test)",
"headless": True,
"window_size": [1024, 768]
},
"sources": [
{
"name": "test_source",
"url": "https://httpbin.org/html",
"type": "ecommerce",
"enabled": True,
"selectors": {
"product": "p",
"name": "p",
"price": "p",
"rating": "p",
"availability": "p"
},
"pagination": {
"next_page": "a",
"max_pages": 1
}
}
],
"output": {
"format": "csv",
"include_timestamp": True,
"filename": "test_output"
},
"database": {
"url": f"sqlite:///{temp_dir}/test.db",
"echo": False
},
"analysis": {
"price_history_days": 30,
"trend_threshold": 0.05,
"generate_charts": True
}
}
import yaml
with open(config_path, 'w') as f:
yaml.dump(config_content, f)
return str(config_path)
@patch('src.scraper.webdriver')
def test_full_workflow(self, mock_webdriver, sample_config_file, temp_dir):
"""Test the complete workflow from config loading to analysis."""
# Setup mock driver
mock_driver = Mock()
mock_webdriver.Chrome.return_value = mock_driver
# Setup logger
setup_logger()
# Load configuration
config_manager = ConfigManager(sample_config_file)
config = config_manager.load_config()
# Verify config was loaded
assert config["scraper"]["delay_between_requests"] == 0.1
assert len(config["sources"]) == 1
assert config["sources"][0]["name"] == "test_source"
# Initialize scraper
scraper = MarketTrendsScraper(config, headless=True)
# Mock the scraping process to return sample data
sample_data = [
{
"name": "Test Product 1",
"price": 19.99,
"rating": 4.5,
"availability": "In Stock",
"source": "test_source",
"scraped_at": "2023-01-01T00:00:00"
},
{
"name": "Test Product 2",
"price": 29.99,
"rating": 3.8,
"availability": "Out of Stock",
"source": "test_source",
"scraped_at": "2023-01-01T00:00:00"
}
]
with patch.object(scraper, '_scrape_source', return_value=sample_data):
# Scrape data
data = scraper.scrape_market_trends()
# Verify data was scraped
assert len(data) == 2
assert data[0]["name"] == "Test Product 1"
assert data[1]["price"] == 29.99
# Save data
output_path = Path(temp_dir) / "test_output.csv"
scraper.save_data(data, str(output_path))
# Verify file was created
assert output_path.exists()
# Analyze trends
analysis = scraper.analyze_trends(data)
# Verify analysis
assert analysis["total_products"] == 2
assert "price_analysis" in analysis
assert analysis["price_analysis"]["average_price"] == 24.99
assert analysis["price_analysis"]["min_price"] == 19.99
assert analysis["price_analysis"]["max_price"] == 29.99
# Save analysis
analysis_path = Path(temp_dir) / "test_analysis.json"
scraper.save_analysis(analysis, str(analysis_path))
# Verify analysis file was created
assert analysis_path.exists()
# Verify analysis content
with open(analysis_path, 'r') as f:
saved_analysis = json.load(f)
assert saved_analysis["total_products"] == 2
assert saved_analysis["price_analysis"]["average_price"] == 24.99
@patch('src.scraper.webdriver')
def test_multiple_sources(self, mock_webdriver, temp_dir):
"""Test scraping from multiple sources."""
# Setup mock driver
mock_driver = Mock()
mock_webdriver.Chrome.return_value = mock_driver
# Create config with multiple sources
config_content = {
"scraper": {
"delay_between_requests": 0.1,
"timeout": 10,
"headless": True
},
"sources": [
{
"name": "source_1",
"url": "https://example1.com",
"enabled": True,
"selectors": {
"product": "div.product",
"name": "h2",
"price": "span.price"
}
},
{
"name": "source_2",
"url": "https://example2.com",
"enabled": True,
"selectors": {
"product": "div.item",
"name": "h3",
"price": "div.cost"
}
},
{
"name": "source_3",
"url": "https://example3.com",
"enabled": False, # Disabled source
"selectors": {
"product": "div.product",
"name": "h2",
"price": "span.price"
}
}
]
}
import yaml
config_path = Path(temp_dir) / "multi_source_config.yaml"
with open(config_path, 'w') as f:
yaml.dump(config_content, f)
# Initialize scraper
scraper = MarketTrendsScraper(config_content, headless=True)
# Mock different data for each source
def mock_scrape_source(source):
if source["name"] == "source_1":
return [{"name": "Product 1", "price": 10.00, "source": "source_1"}]
elif source["name"] == "source_2":
return [{"name": "Product 2", "price": 20.00, "source": "source_2"}]
else:
return []
with patch.object(scraper, '_scrape_source', side_effect=mock_scrape_source):
data = scraper.scrape_market_trends()
# Verify data from both enabled sources
assert len(data) == 2
sources = {item["source"] for item in data}
assert "source_1" in sources
assert "source_2" in sources
assert "source_3" not in sources # Disabled source should not appear
@patch('src.scraper.webdriver')
def test_error_handling(self, mock_webdriver, temp_dir):
"""Test error handling when scraping fails."""
# Setup mock driver
mock_driver = Mock()
mock_webdriver.Chrome.return_value = mock_driver
config = {
"scraper": {
"delay_between_requests": 0.1,
"timeout": 10,
"headless": True
},
"sources": [
{
"name": "working_source",
"url": "https://example.com",
"enabled": True,
"selectors": {
"product": "div.product",
"name": "h2",
"price": "span.price"
}
},
{
"name": "failing_source",
"url": "https://example.com",
"enabled": True,
"selectors": {
"product": "div.product",
"name": "h2",
"price": "span.price"
}
}
]
}
# Initialize scraper
scraper = MarketTrendsScraper(config, headless=True)
# Mock one source to succeed and one to fail
def mock_scrape_source(source):
if source["name"] == "working_source":
return [{"name": "Working Product", "price": 15.00, "source": "working_source"}]
else:
raise Exception("Scraping failed")
with patch.object(scraper, '_scrape_source', side_effect=mock_scrape_source):
data = scraper.scrape_market_trends()
# Should still get data from working source
assert len(data) == 1
assert data[0]["source"] == "working_source"
@patch('sys.argv', ['main.py', '--config', 'test_config.yaml', '--output', 'test_output.csv'])
@patch('src.scraper.webdriver')
def test_main_entry_point(self, mock_webdriver, temp_dir):
"""Test the main entry point of the application."""
# Setup mock driver
mock_driver = Mock()
mock_webdriver.Chrome.return_value = mock_driver
# Create test config
config_path = Path(temp_dir) / "test_config.yaml"
config_content = {
"scraper": {
"delay_between_requests": 0.1,
"timeout": 10,
"headless": True
},
"sources": [
{
"name": "test_source",
"url": "https://example.com",
"enabled": True,
"selectors": {
"product": "div.product",
"name": "h2",
"price": "span.price"
}
}
]
}
import yaml
with open(config_path, 'w') as f:
yaml.dump(config_content, f)
# Mock the scraper to return sample data
sample_data = [{"name": "Test Product", "price": 19.99, "source": "test_source"}]
with patch('main.ConfigManager') as mock_config_manager, \
patch('main.MarketTrendsScraper') as mock_scraper_class:
# Setup mocks
mock_config_instance = Mock()
mock_config_manager.return_value = mock_config_instance
mock_config_instance.load_config.return_value = config_content
mock_scraper_instance = Mock()
mock_scraper_class.return_value = mock_scraper_instance
mock_scraper_instance.scrape_market_trends.return_value = sample_data
mock_scraper_instance.analyze_trends.return_value = {"total_products": 1}
# Run main function
with patch('sys.argv', ['main.py', '--config', str(config_path), '--output', str(temp_dir / 'output.csv')]):
result = main.main()
# Verify main completed successfully
assert result == 0
# Verify scraper was called
mock_scraper_instance.scrape_market_trends.assert_called_once()
mock_scraper_instance.save_data.assert_called_once()
mock_scraper_instance.analyze_trends.assert_called_once()
mock_scraper_instance.save_analysis.assert_called_once()

165
tests/test_logger.py Normal file
View File

@@ -0,0 +1,165 @@
"""
Unit tests for the Logger module.
"""
import pytest
import sys
import tempfile
import os
from pathlib import Path
from unittest.mock import patch, MagicMock
from loguru import logger
from src.logger import setup_logger, get_logger, LoggerMixin
class TestLoggerSetup:
"""Test cases for logger setup functionality."""
def test_setup_logger_default(self):
"""Test logger setup with default parameters."""
with patch('loguru.logger.remove') as mock_remove, \
patch('loguru.logger.add') as mock_add:
setup_logger()
# Verify default logger was removed
mock_remove.assert_called_once()
# Verify console logger was added
assert mock_add.call_count == 1
console_call = mock_add.call_args_list[0]
assert console_call[0][0] == sys.stderr
assert "level" in console_call[1]
assert console_call[1]["colorize"] is True
def test_setup_logger_with_file(self):
"""Test logger setup with file output."""
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
log_file = tmp_file.name
try:
with patch('loguru.logger.remove') as mock_remove, \
patch('loguru.logger.add') as mock_add:
setup_logger(log_file=log_file)
# Verify both console and file loggers were added
assert mock_add.call_count == 2
# Check file logger call
file_call = mock_add.call_args_list[1]
assert file_call[0][0] == log_file
assert "rotation" in file_call[1]
assert "retention" in file_call[1]
assert "compression" in file_call[1]
finally:
# Clean up
if os.path.exists(log_file):
os.unlink(log_file)
def test_setup_logger_verbose(self):
"""Test logger setup with verbose mode."""
with patch('loguru.logger.remove') as mock_remove, \
patch('loguru.logger.add') as mock_add:
setup_logger(verbose=True)
# Verify DEBUG level was set
console_call = mock_add.call_args_list[0]
assert console_call[1]["level"] == "DEBUG"
def test_setup_logger_custom_level(self):
"""Test logger setup with custom log level."""
with patch('loguru.logger.remove') as mock_remove, \
patch('loguru.logger.add') as mock_add:
setup_logger(log_level="WARNING")
# Verify WARNING level was set
console_call = mock_add.call_args_list[0]
assert console_call[1]["level"] == "WARNING"
def test_setup_logger_custom_rotation(self):
"""Test logger setup with custom rotation settings."""
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
log_file = tmp_file.name
try:
with patch('loguru.logger.remove') as mock_remove, \
patch('loguru.logger.add') as mock_add:
setup_logger(log_file=log_file, rotation="100 MB")
# Verify custom rotation was set
file_call = mock_add.call_args_list[1]
assert file_call[1]["rotation"] == "100 MB"
finally:
# Clean up
if os.path.exists(log_file):
os.unlink(log_file)
def test_setup_logger_exception_handler(self):
"""Test that exception handler is set up."""
with patch('loguru.logger.remove') as mock_remove, \
patch('loguru.logger.add') as mock_add, \
patch('sys.excepthook') as mock_excepthook:
setup_logger()
# Verify exception handler was set
assert mock_excepthook is not None
assert callable(mock_excepthook)
class TestGetLogger:
"""Test cases for get_logger function."""
def test_get_logger_no_name(self):
"""Test getting logger without name."""
with patch('loguru.logger.bind') as mock_bind:
get_logger()
mock_bind.assert_called_once()
def test_get_logger_with_name(self):
"""Test getting logger with name."""
with patch('loguru.logger.bind') as mock_bind:
get_logger("test_module")
mock_bind.assert_called_once_with(name="test_module")
class TestLoggerMixin:
"""Test cases for LoggerMixin class."""
def test_logger_property(self):
"""Test logger property in mixin."""
class TestClass(LoggerMixin):
pass
with patch('loguru.logger.bind') as mock_bind:
test_obj = TestClass()
_ = test_obj.logger
# Verify logger was bound with class name and object id
mock_bind.assert_called_once()
args, kwargs = mock_bind.call_args
assert kwargs["name"] == "TestClass"
assert "id" in kwargs
def test_logger_mixin_inheritance(self):
"""Test that logger mixin works with inheritance."""
class ParentClass(LoggerMixin):
pass
class ChildClass(ParentClass):
pass
with patch('loguru.logger.bind') as mock_bind:
child_obj = ChildClass()
_ = child_obj.logger
# Verify logger was bound with child class name
args, kwargs = mock_bind.call_args
assert kwargs["name"] == "ChildClass"

371
tests/test_scraper.py Normal file
View File

@@ -0,0 +1,371 @@
"""
Unit tests for the Scraper module.
"""
import pytest
import json
from unittest.mock import Mock, patch, MagicMock
from bs4 import BeautifulSoup
import pandas as pd
from src.scraper import MarketTrendsScraper
class TestMarketTrendsScraper:
"""Test cases for MarketTrendsScraper class."""
@pytest.fixture
def sample_config(self):
"""Sample configuration for testing."""
return {
"scraper": {
"delay_between_requests": 1.0,
"timeout": 30,
"max_retries": 3,
"user_agent": "Mozilla/5.0",
"headless": True,
"window_size": [1920, 1080]
},
"sources": [
{
"name": "test_source",
"url": "https://example.com/products",
"type": "ecommerce",
"enabled": True,
"selectors": {
"product": "div.product",
"name": "h2.title",
"price": "span.price",
"rating": "div.rating",
"availability": "div.stock"
},
"pagination": {
"next_page": "a.next",
"max_pages": 2
}
}
],
"output": {
"format": "csv",
"include_timestamp": True,
"filename": "market_trends_data"
}
}
@pytest.fixture
def sample_html(self):
"""Sample HTML for testing."""
return """
<html>
<body>
<div class="product">
<h2 class="title">Test Product 1</h2>
<span class="price">$19.99</span>
<div class="rating">4.5 stars</div>
<div class="stock">In Stock</div>
<a href="/product/1">View</a>
</div>
<div class="product">
<h2 class="title">Test Product 2</h2>
<span class="price">€29.99</span>
<div class="rating">3.8 stars</div>
<div class="stock">Out of Stock</div>
<a href="/product/2">View</a>
</div>
<a class="next" href="/page/2">Next</a>
</body>
</html>
"""
@patch('src.scraper.webdriver')
def test_init(self, mock_webdriver, sample_config):
"""Test scraper initialization."""
mock_driver = Mock()
mock_webdriver.Chrome.return_value = mock_driver
scraper = MarketTrendsScraper(sample_config)
assert scraper.config == sample_config
assert scraper.driver == mock_driver
assert scraper.session is not None
assert scraper.data == []
# Verify browser setup
mock_webdriver.Chrome.assert_called_once()
mock_driver.set_page_load_timeout.assert_called_with(30)
@patch('src.scraper.webdriver')
def test_setup_browser(self, mock_webdriver, sample_config):
"""Test browser setup."""
mock_driver = Mock()
mock_webdriver.Chrome.return_value = mock_driver
scraper = MarketTrendsScraper(sample_config)
# Verify options were set
call_args = mock_webdriver.Chrome.call_args
options = call_args[1]['options']
assert options.arguments is not None
assert any("--headless" in arg for arg in options.arguments)
assert any("--window-size=1920,1080" in arg for arg in options.arguments)
@patch('src.scraper.webdriver')
def test_setup_session(self, mock_webdriver, sample_config):
"""Test session setup."""
scraper = MarketTrendsScraper(sample_config)
# Verify headers were set
assert "User-Agent" in scraper.session.headers
assert scraper.session.headers["User-Agent"] == sample_config["scraper"]["user_agent"]
assert "Accept" in scraper.session.headers
@patch('src.scraper.webdriver')
def test_parse_price(self, mock_webdriver, sample_config):
"""Test price parsing."""
scraper = MarketTrendsScraper(sample_config)
# Test various price formats
assert scraper._parse_price("$19.99") == 19.99
assert scraper._parse_price("€29.99") == 29.99
assert scraper._parse_price("£39.99") == 39.99
assert scraper._parse_price("19,99") == 19.99
assert scraper._parse_price("Price: $49.99 USD") == 49.99
assert scraper._parse_price("Invalid price") is None
assert scraper._parse_price("") is None
@patch('src.scraper.webdriver')
def test_parse_rating(self, mock_webdriver, sample_config):
"""Test rating parsing."""
scraper = MarketTrendsScraper(sample_config)
# Test various rating formats
assert scraper._parse_rating("4.5 stars") == 4.5
assert scraper._parse_rating("Rating: 3.8/5") == 3.8
assert scraper._parse_rating("5 stars") == 5.0
assert scraper._parse_rating("Invalid rating") is None
assert scraper._parse_rating("") is None
@patch('src.scraper.webdriver')
def test_extract_product_data(self, mock_webdriver, sample_config, sample_html):
"""Test product data extraction from HTML."""
scraper = MarketTrendsScraper(sample_config)
soup = BeautifulSoup(sample_html, 'html.parser')
product = soup.find('div', class_='product')
selectors = sample_config["sources"][0]["selectors"]
data = scraper._extract_product_data(product, selectors)
assert data is not None
assert data["name"] == "Test Product 1"
assert data["price"] == 19.99
assert data["rating"] == 4.5
assert data["availability"] == "In Stock"
assert data["url"] == "/product/1"
@patch('src.scraper.webdriver')
def test_extract_product_data_no_name(self, mock_webdriver, sample_config):
"""Test product data extraction when name is missing."""
scraper = MarketTrendsScraper(sample_config)
html = """
<div class="product">
<span class="price">$19.99</span>
</div>
"""
soup = BeautifulSoup(html, 'html.parser')
product = soup.find('div', class_='product')
selectors = sample_config["sources"][0]["selectors"]
data = scraper._extract_product_data(product, selectors)
assert data is None # Should return None when name is missing
@patch('src.scraper.webdriver')
def test_make_request_with_retry_success(self, mock_webdriver, sample_config):
"""Test successful HTTP request with retry logic."""
scraper = MarketTrendsScraper(sample_config)
mock_response = Mock()
mock_response.status_code = 200
mock_response.text = "Success"
with patch.object(scraper.session, 'get', return_value=mock_response) as mock_get:
response = scraper._make_request_with_retry("https://example.com")
assert response == mock_response
mock_get.assert_called_once()
@patch('src.scraper.webdriver')
def test_make_request_with_retry_failure(self, mock_webdriver, sample_config):
"""Test HTTP request failure with retry logic."""
scraper = MarketTrendsScraper(sample_config)
with patch.object(scraper.session, 'get', side_effect=Exception("Connection error")):
response = scraper._make_request_with_retry("https://example.com")
assert response is None
@patch('src.scraper.webdriver')
@patch('src.scraper.MarketTrendsScraper._scrape_source')
def test_scrape_market_trends(self, mock_scrape_source, mock_webdriver, sample_config):
"""Test scraping market trends from multiple sources."""
scraper = MarketTrendsScraper(sample_config)
# Mock source data
mock_scrape_source.side_effect = [
[{"name": "Product 1", "price": 19.99}],
[{"name": "Product 2", "price": 29.99}]
]
# Add second source
sample_config["sources"].append({
"name": "test_source_2",
"url": "https://example2.com/products",
"enabled": True,
"selectors": {
"product": "div.product",
"name": "h2.title",
"price": "span.price"
}
})
data = scraper.scrape_market_trends()
assert len(data) == 2
assert data[0]["name"] == "Product 1"
assert data[1]["name"] == "Product 2"
assert mock_scrape_source.call_count == 2
@patch('src.scraper.webdriver')
@patch('src.scraper.MarketTrendsScraper._scrape_with_requests')
def test_scrape_source_with_requests(self, mock_scrape_requests, mock_webdriver, sample_config):
"""Test scraping a source using requests."""
scraper = MarketTrendsScraper(sample_config)
mock_scrape_requests.return_value = [{"name": "Test Product", "price": 19.99}]
source = sample_config["sources"][0]
data = scraper._scrape_source(source)
assert len(data) == 1
assert data[0]["name"] == "Test Product"
mock_scrape_requests.assert_called_once_with(source)
@patch('src.scraper.webdriver')
@patch('src.scraper.MarketTrendsScraper._scrape_with_selenium')
def test_scrape_source_with_selenium(self, mock_scrape_selenium, mock_webdriver, sample_config):
"""Test scraping a source using Selenium."""
scraper = MarketTrendsScraper(sample_config)
mock_scrape_selenium.return_value = [{"name": "Test Product", "price": 19.99}]
# Configure source to use Selenium
source = sample_config["sources"][0]
source["use_selenium"] = True
data = scraper._scrape_source(source)
assert len(data) == 1
assert data[0]["name"] == "Test Product"
mock_scrape_selenium.assert_called_once_with(source)
@patch('src.scraper.webdriver')
@patch('builtins.open', new_callable=Mock)
def test_save_data_csv(self, mock_open, mock_webdriver, sample_config):
"""Test saving data to CSV file."""
scraper = MarketTrendsScraper(sample_config)
data = [
{"name": "Product 1", "price": 19.99, "source": "Test"},
{"name": "Product 2", "price": 29.99, "source": "Test"}
]
with patch.object(pd.DataFrame, 'to_csv') as mock_to_csv:
scraper.save_data(data, "test_output.csv")
mock_to_csv.assert_called_once_with("test_output.csv", index=False)
@patch('src.scraper.webdriver')
@patch('builtins.open', new_callable=Mock)
def test_save_data_json(self, mock_open, mock_webdriver, sample_config):
"""Test saving data to JSON file."""
scraper = MarketTrendsScraper(sample_config)
data = [
{"name": "Product 1", "price": 19.99, "source": "Test"},
{"name": "Product 2", "price": 29.99, "source": "Test"}
]
with patch.object(pd.DataFrame, 'to_json') as mock_to_json:
scraper.save_data(data, "test_output.json")
mock_to_json.assert_called_once()
@patch('src.scraper.webdriver')
def test_analyze_trends(self, mock_webdriver, sample_config):
"""Test trend analysis."""
scraper = MarketTrendsScraper(sample_config)
data = [
{"name": "Product 1", "price": 19.99, "rating": 4.5, "source": "Source A"},
{"name": "Product 2", "price": 29.99, "rating": 3.8, "source": "Source A"},
{"name": "Product 3", "price": 39.99, "rating": 4.2, "source": "Source B"},
{"name": "Product 4", "price": 49.99, "rating": 4.7, "source": "Source B"}
]
analysis = scraper.analyze_trends(data)
assert analysis["total_products"] == 4
assert "price_analysis" in analysis
assert "rating_analysis" in analysis
assert "sources" in analysis
assert analysis["sources"]["Source A"] == 2
assert analysis["sources"]["Source B"] == 2
assert analysis["price_analysis"]["average_price"] == 34.99
assert analysis["price_analysis"]["min_price"] == 19.99
assert analysis["price_analysis"]["max_price"] == 49.99
assert analysis["rating_analysis"]["average_rating"] == 4.3
@patch('src.scraper.webdriver')
def test_analyze_trends_empty_data(self, mock_webdriver, sample_config):
"""Test trend analysis with empty data."""
scraper = MarketTrendsScraper(sample_config)
analysis = scraper.analyze_trends([])
assert "error" in analysis
assert analysis["error"] == "No data available for analysis"
@patch('src.scraper.webdriver')
@patch('builtins.open', new_callable=Mock)
def test_save_analysis(self, mock_open, mock_webdriver, sample_config):
"""Test saving analysis results."""
scraper = MarketTrendsScraper(sample_config)
analysis = {"total_products": 4, "average_price": 34.99}
with patch('json.dump') as mock_json_dump:
scraper.save_analysis(analysis, "test_analysis.json")
mock_json_dump.assert_called_once()
@patch('src.scraper.webdriver')
def test_close(self, mock_webdriver, sample_config):
"""Test closing browser and session."""
mock_driver = Mock()
mock_webdriver.Chrome.return_value = mock_driver
scraper = MarketTrendsScraper(sample_config)
scraper.close()
mock_driver.quit.assert_called_once()
@patch('src.scraper.webdriver')
def test_context_manager(self, mock_webdriver, sample_config):
"""Test using scraper as context manager."""
mock_driver = Mock()
mock_webdriver.Chrome.return_value = mock_driver
with MarketTrendsScraper(sample_config) as scraper:
assert scraper is not None
mock_driver.quit.assert_called_once()