Initial commit: Market Trends Scraper
This commit is contained in:
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
165
tests/test_config_manager.py
Normal file
165
tests/test_config_manager.py
Normal file
@@ -0,0 +1,165 @@
|
||||
"""
|
||||
Unit tests for the ConfigManager module.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import tempfile
|
||||
import os
|
||||
import yaml
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch, mock_open
|
||||
|
||||
from src.config_manager import ConfigManager
|
||||
|
||||
|
||||
class TestConfigManager:
|
||||
"""Test cases for ConfigManager class."""
|
||||
|
||||
def test_init_with_default_path(self):
|
||||
"""Test ConfigManager initialization with default path."""
|
||||
config_manager = ConfigManager()
|
||||
assert config_manager.config_path == Path("config/config.yaml")
|
||||
assert isinstance(config_manager.config, dict)
|
||||
assert isinstance(config_manager.default_config, dict)
|
||||
|
||||
def test_init_with_custom_path(self):
|
||||
"""Test ConfigManager initialization with custom path."""
|
||||
custom_path = "custom/config.yaml"
|
||||
config_manager = ConfigManager(custom_path)
|
||||
assert config_manager.config_path == Path(custom_path)
|
||||
|
||||
def test_get_default_config(self):
|
||||
"""Test default configuration structure."""
|
||||
config_manager = ConfigManager()
|
||||
default_config = config_manager._get_default_config()
|
||||
|
||||
# Check required sections
|
||||
assert "scraper" in default_config
|
||||
assert "sources" in default_config
|
||||
assert "output" in default_config
|
||||
assert "database" in default_config
|
||||
assert "analysis" in default_config
|
||||
|
||||
# Check some default values
|
||||
assert default_config["scraper"]["delay_between_requests"] == 1.0
|
||||
assert default_config["scraper"]["timeout"] == 30
|
||||
assert default_config["scraper"]["headless"] is True
|
||||
assert isinstance(default_config["sources"], list)
|
||||
assert len(default_config["sources"]) > 0
|
||||
|
||||
@patch('builtins.open', new_callable=mock_open, read_data="scraper:\n timeout: 60")
|
||||
@patch('pathlib.Path.exists')
|
||||
def test_load_config_existing_file(self, mock_exists, mock_file):
|
||||
"""Test loading configuration from existing file."""
|
||||
mock_exists.return_value = True
|
||||
|
||||
config_manager = ConfigManager()
|
||||
config = config_manager.load_config()
|
||||
|
||||
mock_file.assert_called_once()
|
||||
assert config["scraper"]["timeout"] == 60
|
||||
|
||||
@patch('builtins.open', new_callable=mock_open)
|
||||
@patch('pathlib.Path.exists')
|
||||
def test_load_config_create_default(self, mock_exists, mock_file):
|
||||
"""Test creating default configuration when file doesn't exist."""
|
||||
mock_exists.return_value = False
|
||||
|
||||
config_manager = ConfigManager()
|
||||
config = config_manager.load_config()
|
||||
|
||||
# Verify file was created
|
||||
mock_file.assert_called_once()
|
||||
# Verify config is default
|
||||
assert config == config_manager.default_config
|
||||
|
||||
@patch('builtins.open', new_callable=mock_open)
|
||||
def test_save_config(self, mock_file):
|
||||
"""Test saving configuration to file."""
|
||||
config_manager = ConfigManager()
|
||||
config_manager.config = {"test": "value"}
|
||||
|
||||
config_manager.save_config()
|
||||
|
||||
mock_file.assert_called_once()
|
||||
# Verify yaml.dump was called with correct arguments
|
||||
with patch('yaml.dump') as mock_dump:
|
||||
config_manager.save_config()
|
||||
mock_dump.assert_called_once()
|
||||
|
||||
def test_validate_and_merge_config(self):
|
||||
"""Test configuration validation and merging."""
|
||||
config_manager = ConfigManager()
|
||||
|
||||
# Test with partial config
|
||||
partial_config = {
|
||||
"scraper": {
|
||||
"timeout": 60
|
||||
}
|
||||
}
|
||||
config_manager.config = partial_config
|
||||
|
||||
merged = config_manager._validate_and_merge_config()
|
||||
|
||||
# Should have all sections
|
||||
assert "sources" in merged
|
||||
assert "output" in merged
|
||||
# Should have updated value
|
||||
assert merged["scraper"]["timeout"] == 60
|
||||
# Should have default values for missing keys
|
||||
assert merged["scraper"]["delay_between_requests"] == 1.0
|
||||
|
||||
def test_validate_and_merge_config_missing_required(self):
|
||||
"""Test validation fails when required sections are missing."""
|
||||
config_manager = ConfigManager()
|
||||
config_manager.config = {"invalid": "config"}
|
||||
|
||||
with pytest.raises(ValueError, match="Missing required configuration section"):
|
||||
config_manager._validate_and_merge_config()
|
||||
|
||||
def test_validate_and_merge_config_no_sources(self):
|
||||
"""Test validation fails when no sources are configured."""
|
||||
config_manager = ConfigManager()
|
||||
config_manager.config = {
|
||||
"scraper": {},
|
||||
"sources": [],
|
||||
"output": {}
|
||||
}
|
||||
|
||||
with pytest.raises(ValueError, match="At least one data source must be configured"):
|
||||
config_manager._validate_and_merge_config()
|
||||
|
||||
def test_get_with_dot_notation(self):
|
||||
"""Test getting configuration values with dot notation."""
|
||||
config_manager = ConfigManager()
|
||||
config_manager.config = {
|
||||
"scraper": {
|
||||
"timeout": 60,
|
||||
"nested": {
|
||||
"value": "test"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert config_manager.get("scraper.timeout") == 60
|
||||
assert config_manager.get("scraper.nested.value") == "test"
|
||||
assert config_manager.get("nonexistent", "default") == "default"
|
||||
|
||||
def test_set_with_dot_notation(self):
|
||||
"""Test setting configuration values with dot notation."""
|
||||
config_manager = ConfigManager()
|
||||
config_manager.config = {"scraper": {}}
|
||||
|
||||
config_manager.set("scraper.timeout", 60)
|
||||
config_manager.set("new.nested.value", "test")
|
||||
|
||||
assert config_manager.config["scraper"]["timeout"] == 60
|
||||
assert config_manager.config["new"]["nested"]["value"] == "test"
|
||||
|
||||
@patch.object(ConfigManager, 'load_config')
|
||||
def test_reload(self, mock_load):
|
||||
"""Test reloading configuration."""
|
||||
config_manager = ConfigManager()
|
||||
config_manager.reload()
|
||||
|
||||
mock_load.assert_called_once()
|
355
tests/test_integration.py
Normal file
355
tests/test_integration.py
Normal file
@@ -0,0 +1,355 @@
|
||||
"""
|
||||
Integration tests for the Market Trends Scraper application.
|
||||
|
||||
These tests verify that all components work together correctly.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import tempfile
|
||||
import os
|
||||
import json
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch, Mock
|
||||
|
||||
from src.config_manager import ConfigManager
|
||||
from src.scraper import MarketTrendsScraper
|
||||
from src.logger import setup_logger
|
||||
import main
|
||||
|
||||
|
||||
class TestIntegration:
|
||||
"""Integration test cases for the entire application."""
|
||||
|
||||
@pytest.fixture
|
||||
def temp_dir(self):
|
||||
"""Create a temporary directory for test files."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
yield temp_dir
|
||||
|
||||
@pytest.fixture
|
||||
def sample_config_file(self, temp_dir):
|
||||
"""Create a sample configuration file for testing."""
|
||||
config_path = Path(temp_dir) / "config.yaml"
|
||||
config_content = {
|
||||
"scraper": {
|
||||
"delay_between_requests": 0.1, # Faster for testing
|
||||
"timeout": 10,
|
||||
"max_retries": 2,
|
||||
"user_agent": "Mozilla/5.0 (Test)",
|
||||
"headless": True,
|
||||
"window_size": [1024, 768]
|
||||
},
|
||||
"sources": [
|
||||
{
|
||||
"name": "test_source",
|
||||
"url": "https://httpbin.org/html",
|
||||
"type": "ecommerce",
|
||||
"enabled": True,
|
||||
"selectors": {
|
||||
"product": "p",
|
||||
"name": "p",
|
||||
"price": "p",
|
||||
"rating": "p",
|
||||
"availability": "p"
|
||||
},
|
||||
"pagination": {
|
||||
"next_page": "a",
|
||||
"max_pages": 1
|
||||
}
|
||||
}
|
||||
],
|
||||
"output": {
|
||||
"format": "csv",
|
||||
"include_timestamp": True,
|
||||
"filename": "test_output"
|
||||
},
|
||||
"database": {
|
||||
"url": f"sqlite:///{temp_dir}/test.db",
|
||||
"echo": False
|
||||
},
|
||||
"analysis": {
|
||||
"price_history_days": 30,
|
||||
"trend_threshold": 0.05,
|
||||
"generate_charts": True
|
||||
}
|
||||
}
|
||||
|
||||
import yaml
|
||||
with open(config_path, 'w') as f:
|
||||
yaml.dump(config_content, f)
|
||||
|
||||
return str(config_path)
|
||||
|
||||
@patch('src.scraper.webdriver')
|
||||
def test_full_workflow(self, mock_webdriver, sample_config_file, temp_dir):
|
||||
"""Test the complete workflow from config loading to analysis."""
|
||||
# Setup mock driver
|
||||
mock_driver = Mock()
|
||||
mock_webdriver.Chrome.return_value = mock_driver
|
||||
|
||||
# Setup logger
|
||||
setup_logger()
|
||||
|
||||
# Load configuration
|
||||
config_manager = ConfigManager(sample_config_file)
|
||||
config = config_manager.load_config()
|
||||
|
||||
# Verify config was loaded
|
||||
assert config["scraper"]["delay_between_requests"] == 0.1
|
||||
assert len(config["sources"]) == 1
|
||||
assert config["sources"][0]["name"] == "test_source"
|
||||
|
||||
# Initialize scraper
|
||||
scraper = MarketTrendsScraper(config, headless=True)
|
||||
|
||||
# Mock the scraping process to return sample data
|
||||
sample_data = [
|
||||
{
|
||||
"name": "Test Product 1",
|
||||
"price": 19.99,
|
||||
"rating": 4.5,
|
||||
"availability": "In Stock",
|
||||
"source": "test_source",
|
||||
"scraped_at": "2023-01-01T00:00:00"
|
||||
},
|
||||
{
|
||||
"name": "Test Product 2",
|
||||
"price": 29.99,
|
||||
"rating": 3.8,
|
||||
"availability": "Out of Stock",
|
||||
"source": "test_source",
|
||||
"scraped_at": "2023-01-01T00:00:00"
|
||||
}
|
||||
]
|
||||
|
||||
with patch.object(scraper, '_scrape_source', return_value=sample_data):
|
||||
# Scrape data
|
||||
data = scraper.scrape_market_trends()
|
||||
|
||||
# Verify data was scraped
|
||||
assert len(data) == 2
|
||||
assert data[0]["name"] == "Test Product 1"
|
||||
assert data[1]["price"] == 29.99
|
||||
|
||||
# Save data
|
||||
output_path = Path(temp_dir) / "test_output.csv"
|
||||
scraper.save_data(data, str(output_path))
|
||||
|
||||
# Verify file was created
|
||||
assert output_path.exists()
|
||||
|
||||
# Analyze trends
|
||||
analysis = scraper.analyze_trends(data)
|
||||
|
||||
# Verify analysis
|
||||
assert analysis["total_products"] == 2
|
||||
assert "price_analysis" in analysis
|
||||
assert analysis["price_analysis"]["average_price"] == 24.99
|
||||
assert analysis["price_analysis"]["min_price"] == 19.99
|
||||
assert analysis["price_analysis"]["max_price"] == 29.99
|
||||
|
||||
# Save analysis
|
||||
analysis_path = Path(temp_dir) / "test_analysis.json"
|
||||
scraper.save_analysis(analysis, str(analysis_path))
|
||||
|
||||
# Verify analysis file was created
|
||||
assert analysis_path.exists()
|
||||
|
||||
# Verify analysis content
|
||||
with open(analysis_path, 'r') as f:
|
||||
saved_analysis = json.load(f)
|
||||
|
||||
assert saved_analysis["total_products"] == 2
|
||||
assert saved_analysis["price_analysis"]["average_price"] == 24.99
|
||||
|
||||
@patch('src.scraper.webdriver')
|
||||
def test_multiple_sources(self, mock_webdriver, temp_dir):
|
||||
"""Test scraping from multiple sources."""
|
||||
# Setup mock driver
|
||||
mock_driver = Mock()
|
||||
mock_webdriver.Chrome.return_value = mock_driver
|
||||
|
||||
# Create config with multiple sources
|
||||
config_content = {
|
||||
"scraper": {
|
||||
"delay_between_requests": 0.1,
|
||||
"timeout": 10,
|
||||
"headless": True
|
||||
},
|
||||
"sources": [
|
||||
{
|
||||
"name": "source_1",
|
||||
"url": "https://example1.com",
|
||||
"enabled": True,
|
||||
"selectors": {
|
||||
"product": "div.product",
|
||||
"name": "h2",
|
||||
"price": "span.price"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "source_2",
|
||||
"url": "https://example2.com",
|
||||
"enabled": True,
|
||||
"selectors": {
|
||||
"product": "div.item",
|
||||
"name": "h3",
|
||||
"price": "div.cost"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "source_3",
|
||||
"url": "https://example3.com",
|
||||
"enabled": False, # Disabled source
|
||||
"selectors": {
|
||||
"product": "div.product",
|
||||
"name": "h2",
|
||||
"price": "span.price"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
import yaml
|
||||
config_path = Path(temp_dir) / "multi_source_config.yaml"
|
||||
with open(config_path, 'w') as f:
|
||||
yaml.dump(config_content, f)
|
||||
|
||||
# Initialize scraper
|
||||
scraper = MarketTrendsScraper(config_content, headless=True)
|
||||
|
||||
# Mock different data for each source
|
||||
def mock_scrape_source(source):
|
||||
if source["name"] == "source_1":
|
||||
return [{"name": "Product 1", "price": 10.00, "source": "source_1"}]
|
||||
elif source["name"] == "source_2":
|
||||
return [{"name": "Product 2", "price": 20.00, "source": "source_2"}]
|
||||
else:
|
||||
return []
|
||||
|
||||
with patch.object(scraper, '_scrape_source', side_effect=mock_scrape_source):
|
||||
data = scraper.scrape_market_trends()
|
||||
|
||||
# Verify data from both enabled sources
|
||||
assert len(data) == 2
|
||||
sources = {item["source"] for item in data}
|
||||
assert "source_1" in sources
|
||||
assert "source_2" in sources
|
||||
assert "source_3" not in sources # Disabled source should not appear
|
||||
|
||||
@patch('src.scraper.webdriver')
|
||||
def test_error_handling(self, mock_webdriver, temp_dir):
|
||||
"""Test error handling when scraping fails."""
|
||||
# Setup mock driver
|
||||
mock_driver = Mock()
|
||||
mock_webdriver.Chrome.return_value = mock_driver
|
||||
|
||||
config = {
|
||||
"scraper": {
|
||||
"delay_between_requests": 0.1,
|
||||
"timeout": 10,
|
||||
"headless": True
|
||||
},
|
||||
"sources": [
|
||||
{
|
||||
"name": "working_source",
|
||||
"url": "https://example.com",
|
||||
"enabled": True,
|
||||
"selectors": {
|
||||
"product": "div.product",
|
||||
"name": "h2",
|
||||
"price": "span.price"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "failing_source",
|
||||
"url": "https://example.com",
|
||||
"enabled": True,
|
||||
"selectors": {
|
||||
"product": "div.product",
|
||||
"name": "h2",
|
||||
"price": "span.price"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
# Initialize scraper
|
||||
scraper = MarketTrendsScraper(config, headless=True)
|
||||
|
||||
# Mock one source to succeed and one to fail
|
||||
def mock_scrape_source(source):
|
||||
if source["name"] == "working_source":
|
||||
return [{"name": "Working Product", "price": 15.00, "source": "working_source"}]
|
||||
else:
|
||||
raise Exception("Scraping failed")
|
||||
|
||||
with patch.object(scraper, '_scrape_source', side_effect=mock_scrape_source):
|
||||
data = scraper.scrape_market_trends()
|
||||
|
||||
# Should still get data from working source
|
||||
assert len(data) == 1
|
||||
assert data[0]["source"] == "working_source"
|
||||
|
||||
@patch('sys.argv', ['main.py', '--config', 'test_config.yaml', '--output', 'test_output.csv'])
|
||||
@patch('src.scraper.webdriver')
|
||||
def test_main_entry_point(self, mock_webdriver, temp_dir):
|
||||
"""Test the main entry point of the application."""
|
||||
# Setup mock driver
|
||||
mock_driver = Mock()
|
||||
mock_webdriver.Chrome.return_value = mock_driver
|
||||
|
||||
# Create test config
|
||||
config_path = Path(temp_dir) / "test_config.yaml"
|
||||
config_content = {
|
||||
"scraper": {
|
||||
"delay_between_requests": 0.1,
|
||||
"timeout": 10,
|
||||
"headless": True
|
||||
},
|
||||
"sources": [
|
||||
{
|
||||
"name": "test_source",
|
||||
"url": "https://example.com",
|
||||
"enabled": True,
|
||||
"selectors": {
|
||||
"product": "div.product",
|
||||
"name": "h2",
|
||||
"price": "span.price"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
import yaml
|
||||
with open(config_path, 'w') as f:
|
||||
yaml.dump(config_content, f)
|
||||
|
||||
# Mock the scraper to return sample data
|
||||
sample_data = [{"name": "Test Product", "price": 19.99, "source": "test_source"}]
|
||||
|
||||
with patch('main.ConfigManager') as mock_config_manager, \
|
||||
patch('main.MarketTrendsScraper') as mock_scraper_class:
|
||||
|
||||
# Setup mocks
|
||||
mock_config_instance = Mock()
|
||||
mock_config_manager.return_value = mock_config_instance
|
||||
mock_config_instance.load_config.return_value = config_content
|
||||
|
||||
mock_scraper_instance = Mock()
|
||||
mock_scraper_class.return_value = mock_scraper_instance
|
||||
mock_scraper_instance.scrape_market_trends.return_value = sample_data
|
||||
mock_scraper_instance.analyze_trends.return_value = {"total_products": 1}
|
||||
|
||||
# Run main function
|
||||
with patch('sys.argv', ['main.py', '--config', str(config_path), '--output', str(temp_dir / 'output.csv')]):
|
||||
result = main.main()
|
||||
|
||||
# Verify main completed successfully
|
||||
assert result == 0
|
||||
|
||||
# Verify scraper was called
|
||||
mock_scraper_instance.scrape_market_trends.assert_called_once()
|
||||
mock_scraper_instance.save_data.assert_called_once()
|
||||
mock_scraper_instance.analyze_trends.assert_called_once()
|
||||
mock_scraper_instance.save_analysis.assert_called_once()
|
165
tests/test_logger.py
Normal file
165
tests/test_logger.py
Normal file
@@ -0,0 +1,165 @@
|
||||
"""
|
||||
Unit tests for the Logger module.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import sys
|
||||
import tempfile
|
||||
import os
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch, MagicMock
|
||||
from loguru import logger
|
||||
|
||||
from src.logger import setup_logger, get_logger, LoggerMixin
|
||||
|
||||
|
||||
class TestLoggerSetup:
|
||||
"""Test cases for logger setup functionality."""
|
||||
|
||||
def test_setup_logger_default(self):
|
||||
"""Test logger setup with default parameters."""
|
||||
with patch('loguru.logger.remove') as mock_remove, \
|
||||
patch('loguru.logger.add') as mock_add:
|
||||
|
||||
setup_logger()
|
||||
|
||||
# Verify default logger was removed
|
||||
mock_remove.assert_called_once()
|
||||
|
||||
# Verify console logger was added
|
||||
assert mock_add.call_count == 1
|
||||
console_call = mock_add.call_args_list[0]
|
||||
assert console_call[0][0] == sys.stderr
|
||||
assert "level" in console_call[1]
|
||||
assert console_call[1]["colorize"] is True
|
||||
|
||||
def test_setup_logger_with_file(self):
|
||||
"""Test logger setup with file output."""
|
||||
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
|
||||
log_file = tmp_file.name
|
||||
|
||||
try:
|
||||
with patch('loguru.logger.remove') as mock_remove, \
|
||||
patch('loguru.logger.add') as mock_add:
|
||||
|
||||
setup_logger(log_file=log_file)
|
||||
|
||||
# Verify both console and file loggers were added
|
||||
assert mock_add.call_count == 2
|
||||
|
||||
# Check file logger call
|
||||
file_call = mock_add.call_args_list[1]
|
||||
assert file_call[0][0] == log_file
|
||||
assert "rotation" in file_call[1]
|
||||
assert "retention" in file_call[1]
|
||||
assert "compression" in file_call[1]
|
||||
finally:
|
||||
# Clean up
|
||||
if os.path.exists(log_file):
|
||||
os.unlink(log_file)
|
||||
|
||||
def test_setup_logger_verbose(self):
|
||||
"""Test logger setup with verbose mode."""
|
||||
with patch('loguru.logger.remove') as mock_remove, \
|
||||
patch('loguru.logger.add') as mock_add:
|
||||
|
||||
setup_logger(verbose=True)
|
||||
|
||||
# Verify DEBUG level was set
|
||||
console_call = mock_add.call_args_list[0]
|
||||
assert console_call[1]["level"] == "DEBUG"
|
||||
|
||||
def test_setup_logger_custom_level(self):
|
||||
"""Test logger setup with custom log level."""
|
||||
with patch('loguru.logger.remove') as mock_remove, \
|
||||
patch('loguru.logger.add') as mock_add:
|
||||
|
||||
setup_logger(log_level="WARNING")
|
||||
|
||||
# Verify WARNING level was set
|
||||
console_call = mock_add.call_args_list[0]
|
||||
assert console_call[1]["level"] == "WARNING"
|
||||
|
||||
def test_setup_logger_custom_rotation(self):
|
||||
"""Test logger setup with custom rotation settings."""
|
||||
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
|
||||
log_file = tmp_file.name
|
||||
|
||||
try:
|
||||
with patch('loguru.logger.remove') as mock_remove, \
|
||||
patch('loguru.logger.add') as mock_add:
|
||||
|
||||
setup_logger(log_file=log_file, rotation="100 MB")
|
||||
|
||||
# Verify custom rotation was set
|
||||
file_call = mock_add.call_args_list[1]
|
||||
assert file_call[1]["rotation"] == "100 MB"
|
||||
finally:
|
||||
# Clean up
|
||||
if os.path.exists(log_file):
|
||||
os.unlink(log_file)
|
||||
|
||||
def test_setup_logger_exception_handler(self):
|
||||
"""Test that exception handler is set up."""
|
||||
with patch('loguru.logger.remove') as mock_remove, \
|
||||
patch('loguru.logger.add') as mock_add, \
|
||||
patch('sys.excepthook') as mock_excepthook:
|
||||
|
||||
setup_logger()
|
||||
|
||||
# Verify exception handler was set
|
||||
assert mock_excepthook is not None
|
||||
assert callable(mock_excepthook)
|
||||
|
||||
|
||||
class TestGetLogger:
|
||||
"""Test cases for get_logger function."""
|
||||
|
||||
def test_get_logger_no_name(self):
|
||||
"""Test getting logger without name."""
|
||||
with patch('loguru.logger.bind') as mock_bind:
|
||||
get_logger()
|
||||
mock_bind.assert_called_once()
|
||||
|
||||
def test_get_logger_with_name(self):
|
||||
"""Test getting logger with name."""
|
||||
with patch('loguru.logger.bind') as mock_bind:
|
||||
get_logger("test_module")
|
||||
mock_bind.assert_called_once_with(name="test_module")
|
||||
|
||||
|
||||
class TestLoggerMixin:
|
||||
"""Test cases for LoggerMixin class."""
|
||||
|
||||
def test_logger_property(self):
|
||||
"""Test logger property in mixin."""
|
||||
|
||||
class TestClass(LoggerMixin):
|
||||
pass
|
||||
|
||||
with patch('loguru.logger.bind') as mock_bind:
|
||||
test_obj = TestClass()
|
||||
_ = test_obj.logger
|
||||
|
||||
# Verify logger was bound with class name and object id
|
||||
mock_bind.assert_called_once()
|
||||
args, kwargs = mock_bind.call_args
|
||||
assert kwargs["name"] == "TestClass"
|
||||
assert "id" in kwargs
|
||||
|
||||
def test_logger_mixin_inheritance(self):
|
||||
"""Test that logger mixin works with inheritance."""
|
||||
|
||||
class ParentClass(LoggerMixin):
|
||||
pass
|
||||
|
||||
class ChildClass(ParentClass):
|
||||
pass
|
||||
|
||||
with patch('loguru.logger.bind') as mock_bind:
|
||||
child_obj = ChildClass()
|
||||
_ = child_obj.logger
|
||||
|
||||
# Verify logger was bound with child class name
|
||||
args, kwargs = mock_bind.call_args
|
||||
assert kwargs["name"] == "ChildClass"
|
371
tests/test_scraper.py
Normal file
371
tests/test_scraper.py
Normal file
@@ -0,0 +1,371 @@
|
||||
"""
|
||||
Unit tests for the Scraper module.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import json
|
||||
from unittest.mock import Mock, patch, MagicMock
|
||||
from bs4 import BeautifulSoup
|
||||
import pandas as pd
|
||||
|
||||
from src.scraper import MarketTrendsScraper
|
||||
|
||||
|
||||
class TestMarketTrendsScraper:
|
||||
"""Test cases for MarketTrendsScraper class."""
|
||||
|
||||
@pytest.fixture
|
||||
def sample_config(self):
|
||||
"""Sample configuration for testing."""
|
||||
return {
|
||||
"scraper": {
|
||||
"delay_between_requests": 1.0,
|
||||
"timeout": 30,
|
||||
"max_retries": 3,
|
||||
"user_agent": "Mozilla/5.0",
|
||||
"headless": True,
|
||||
"window_size": [1920, 1080]
|
||||
},
|
||||
"sources": [
|
||||
{
|
||||
"name": "test_source",
|
||||
"url": "https://example.com/products",
|
||||
"type": "ecommerce",
|
||||
"enabled": True,
|
||||
"selectors": {
|
||||
"product": "div.product",
|
||||
"name": "h2.title",
|
||||
"price": "span.price",
|
||||
"rating": "div.rating",
|
||||
"availability": "div.stock"
|
||||
},
|
||||
"pagination": {
|
||||
"next_page": "a.next",
|
||||
"max_pages": 2
|
||||
}
|
||||
}
|
||||
],
|
||||
"output": {
|
||||
"format": "csv",
|
||||
"include_timestamp": True,
|
||||
"filename": "market_trends_data"
|
||||
}
|
||||
}
|
||||
|
||||
@pytest.fixture
|
||||
def sample_html(self):
|
||||
"""Sample HTML for testing."""
|
||||
return """
|
||||
<html>
|
||||
<body>
|
||||
<div class="product">
|
||||
<h2 class="title">Test Product 1</h2>
|
||||
<span class="price">$19.99</span>
|
||||
<div class="rating">4.5 stars</div>
|
||||
<div class="stock">In Stock</div>
|
||||
<a href="/product/1">View</a>
|
||||
</div>
|
||||
<div class="product">
|
||||
<h2 class="title">Test Product 2</h2>
|
||||
<span class="price">€29.99</span>
|
||||
<div class="rating">3.8 stars</div>
|
||||
<div class="stock">Out of Stock</div>
|
||||
<a href="/product/2">View</a>
|
||||
</div>
|
||||
<a class="next" href="/page/2">Next</a>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
@patch('src.scraper.webdriver')
|
||||
def test_init(self, mock_webdriver, sample_config):
|
||||
"""Test scraper initialization."""
|
||||
mock_driver = Mock()
|
||||
mock_webdriver.Chrome.return_value = mock_driver
|
||||
|
||||
scraper = MarketTrendsScraper(sample_config)
|
||||
|
||||
assert scraper.config == sample_config
|
||||
assert scraper.driver == mock_driver
|
||||
assert scraper.session is not None
|
||||
assert scraper.data == []
|
||||
|
||||
# Verify browser setup
|
||||
mock_webdriver.Chrome.assert_called_once()
|
||||
mock_driver.set_page_load_timeout.assert_called_with(30)
|
||||
|
||||
@patch('src.scraper.webdriver')
|
||||
def test_setup_browser(self, mock_webdriver, sample_config):
|
||||
"""Test browser setup."""
|
||||
mock_driver = Mock()
|
||||
mock_webdriver.Chrome.return_value = mock_driver
|
||||
|
||||
scraper = MarketTrendsScraper(sample_config)
|
||||
|
||||
# Verify options were set
|
||||
call_args = mock_webdriver.Chrome.call_args
|
||||
options = call_args[1]['options']
|
||||
|
||||
assert options.arguments is not None
|
||||
assert any("--headless" in arg for arg in options.arguments)
|
||||
assert any("--window-size=1920,1080" in arg for arg in options.arguments)
|
||||
|
||||
@patch('src.scraper.webdriver')
|
||||
def test_setup_session(self, mock_webdriver, sample_config):
|
||||
"""Test session setup."""
|
||||
scraper = MarketTrendsScraper(sample_config)
|
||||
|
||||
# Verify headers were set
|
||||
assert "User-Agent" in scraper.session.headers
|
||||
assert scraper.session.headers["User-Agent"] == sample_config["scraper"]["user_agent"]
|
||||
assert "Accept" in scraper.session.headers
|
||||
|
||||
@patch('src.scraper.webdriver')
|
||||
def test_parse_price(self, mock_webdriver, sample_config):
|
||||
"""Test price parsing."""
|
||||
scraper = MarketTrendsScraper(sample_config)
|
||||
|
||||
# Test various price formats
|
||||
assert scraper._parse_price("$19.99") == 19.99
|
||||
assert scraper._parse_price("€29.99") == 29.99
|
||||
assert scraper._parse_price("£39.99") == 39.99
|
||||
assert scraper._parse_price("19,99") == 19.99
|
||||
assert scraper._parse_price("Price: $49.99 USD") == 49.99
|
||||
assert scraper._parse_price("Invalid price") is None
|
||||
assert scraper._parse_price("") is None
|
||||
|
||||
@patch('src.scraper.webdriver')
|
||||
def test_parse_rating(self, mock_webdriver, sample_config):
|
||||
"""Test rating parsing."""
|
||||
scraper = MarketTrendsScraper(sample_config)
|
||||
|
||||
# Test various rating formats
|
||||
assert scraper._parse_rating("4.5 stars") == 4.5
|
||||
assert scraper._parse_rating("Rating: 3.8/5") == 3.8
|
||||
assert scraper._parse_rating("5 stars") == 5.0
|
||||
assert scraper._parse_rating("Invalid rating") is None
|
||||
assert scraper._parse_rating("") is None
|
||||
|
||||
@patch('src.scraper.webdriver')
|
||||
def test_extract_product_data(self, mock_webdriver, sample_config, sample_html):
|
||||
"""Test product data extraction from HTML."""
|
||||
scraper = MarketTrendsScraper(sample_config)
|
||||
|
||||
soup = BeautifulSoup(sample_html, 'html.parser')
|
||||
product = soup.find('div', class_='product')
|
||||
selectors = sample_config["sources"][0]["selectors"]
|
||||
|
||||
data = scraper._extract_product_data(product, selectors)
|
||||
|
||||
assert data is not None
|
||||
assert data["name"] == "Test Product 1"
|
||||
assert data["price"] == 19.99
|
||||
assert data["rating"] == 4.5
|
||||
assert data["availability"] == "In Stock"
|
||||
assert data["url"] == "/product/1"
|
||||
|
||||
@patch('src.scraper.webdriver')
|
||||
def test_extract_product_data_no_name(self, mock_webdriver, sample_config):
|
||||
"""Test product data extraction when name is missing."""
|
||||
scraper = MarketTrendsScraper(sample_config)
|
||||
|
||||
html = """
|
||||
<div class="product">
|
||||
<span class="price">$19.99</span>
|
||||
</div>
|
||||
"""
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
product = soup.find('div', class_='product')
|
||||
selectors = sample_config["sources"][0]["selectors"]
|
||||
|
||||
data = scraper._extract_product_data(product, selectors)
|
||||
|
||||
assert data is None # Should return None when name is missing
|
||||
|
||||
@patch('src.scraper.webdriver')
|
||||
def test_make_request_with_retry_success(self, mock_webdriver, sample_config):
|
||||
"""Test successful HTTP request with retry logic."""
|
||||
scraper = MarketTrendsScraper(sample_config)
|
||||
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.text = "Success"
|
||||
|
||||
with patch.object(scraper.session, 'get', return_value=mock_response) as mock_get:
|
||||
response = scraper._make_request_with_retry("https://example.com")
|
||||
|
||||
assert response == mock_response
|
||||
mock_get.assert_called_once()
|
||||
|
||||
@patch('src.scraper.webdriver')
|
||||
def test_make_request_with_retry_failure(self, mock_webdriver, sample_config):
|
||||
"""Test HTTP request failure with retry logic."""
|
||||
scraper = MarketTrendsScraper(sample_config)
|
||||
|
||||
with patch.object(scraper.session, 'get', side_effect=Exception("Connection error")):
|
||||
response = scraper._make_request_with_retry("https://example.com")
|
||||
|
||||
assert response is None
|
||||
|
||||
@patch('src.scraper.webdriver')
|
||||
@patch('src.scraper.MarketTrendsScraper._scrape_source')
|
||||
def test_scrape_market_trends(self, mock_scrape_source, mock_webdriver, sample_config):
|
||||
"""Test scraping market trends from multiple sources."""
|
||||
scraper = MarketTrendsScraper(sample_config)
|
||||
|
||||
# Mock source data
|
||||
mock_scrape_source.side_effect = [
|
||||
[{"name": "Product 1", "price": 19.99}],
|
||||
[{"name": "Product 2", "price": 29.99}]
|
||||
]
|
||||
|
||||
# Add second source
|
||||
sample_config["sources"].append({
|
||||
"name": "test_source_2",
|
||||
"url": "https://example2.com/products",
|
||||
"enabled": True,
|
||||
"selectors": {
|
||||
"product": "div.product",
|
||||
"name": "h2.title",
|
||||
"price": "span.price"
|
||||
}
|
||||
})
|
||||
|
||||
data = scraper.scrape_market_trends()
|
||||
|
||||
assert len(data) == 2
|
||||
assert data[0]["name"] == "Product 1"
|
||||
assert data[1]["name"] == "Product 2"
|
||||
assert mock_scrape_source.call_count == 2
|
||||
|
||||
@patch('src.scraper.webdriver')
|
||||
@patch('src.scraper.MarketTrendsScraper._scrape_with_requests')
|
||||
def test_scrape_source_with_requests(self, mock_scrape_requests, mock_webdriver, sample_config):
|
||||
"""Test scraping a source using requests."""
|
||||
scraper = MarketTrendsScraper(sample_config)
|
||||
|
||||
mock_scrape_requests.return_value = [{"name": "Test Product", "price": 19.99}]
|
||||
|
||||
source = sample_config["sources"][0]
|
||||
data = scraper._scrape_source(source)
|
||||
|
||||
assert len(data) == 1
|
||||
assert data[0]["name"] == "Test Product"
|
||||
mock_scrape_requests.assert_called_once_with(source)
|
||||
|
||||
@patch('src.scraper.webdriver')
|
||||
@patch('src.scraper.MarketTrendsScraper._scrape_with_selenium')
|
||||
def test_scrape_source_with_selenium(self, mock_scrape_selenium, mock_webdriver, sample_config):
|
||||
"""Test scraping a source using Selenium."""
|
||||
scraper = MarketTrendsScraper(sample_config)
|
||||
|
||||
mock_scrape_selenium.return_value = [{"name": "Test Product", "price": 19.99}]
|
||||
|
||||
# Configure source to use Selenium
|
||||
source = sample_config["sources"][0]
|
||||
source["use_selenium"] = True
|
||||
|
||||
data = scraper._scrape_source(source)
|
||||
|
||||
assert len(data) == 1
|
||||
assert data[0]["name"] == "Test Product"
|
||||
mock_scrape_selenium.assert_called_once_with(source)
|
||||
|
||||
@patch('src.scraper.webdriver')
|
||||
@patch('builtins.open', new_callable=Mock)
|
||||
def test_save_data_csv(self, mock_open, mock_webdriver, sample_config):
|
||||
"""Test saving data to CSV file."""
|
||||
scraper = MarketTrendsScraper(sample_config)
|
||||
|
||||
data = [
|
||||
{"name": "Product 1", "price": 19.99, "source": "Test"},
|
||||
{"name": "Product 2", "price": 29.99, "source": "Test"}
|
||||
]
|
||||
|
||||
with patch.object(pd.DataFrame, 'to_csv') as mock_to_csv:
|
||||
scraper.save_data(data, "test_output.csv")
|
||||
mock_to_csv.assert_called_once_with("test_output.csv", index=False)
|
||||
|
||||
@patch('src.scraper.webdriver')
|
||||
@patch('builtins.open', new_callable=Mock)
|
||||
def test_save_data_json(self, mock_open, mock_webdriver, sample_config):
|
||||
"""Test saving data to JSON file."""
|
||||
scraper = MarketTrendsScraper(sample_config)
|
||||
|
||||
data = [
|
||||
{"name": "Product 1", "price": 19.99, "source": "Test"},
|
||||
{"name": "Product 2", "price": 29.99, "source": "Test"}
|
||||
]
|
||||
|
||||
with patch.object(pd.DataFrame, 'to_json') as mock_to_json:
|
||||
scraper.save_data(data, "test_output.json")
|
||||
mock_to_json.assert_called_once()
|
||||
|
||||
@patch('src.scraper.webdriver')
|
||||
def test_analyze_trends(self, mock_webdriver, sample_config):
|
||||
"""Test trend analysis."""
|
||||
scraper = MarketTrendsScraper(sample_config)
|
||||
|
||||
data = [
|
||||
{"name": "Product 1", "price": 19.99, "rating": 4.5, "source": "Source A"},
|
||||
{"name": "Product 2", "price": 29.99, "rating": 3.8, "source": "Source A"},
|
||||
{"name": "Product 3", "price": 39.99, "rating": 4.2, "source": "Source B"},
|
||||
{"name": "Product 4", "price": 49.99, "rating": 4.7, "source": "Source B"}
|
||||
]
|
||||
|
||||
analysis = scraper.analyze_trends(data)
|
||||
|
||||
assert analysis["total_products"] == 4
|
||||
assert "price_analysis" in analysis
|
||||
assert "rating_analysis" in analysis
|
||||
assert "sources" in analysis
|
||||
assert analysis["sources"]["Source A"] == 2
|
||||
assert analysis["sources"]["Source B"] == 2
|
||||
assert analysis["price_analysis"]["average_price"] == 34.99
|
||||
assert analysis["price_analysis"]["min_price"] == 19.99
|
||||
assert analysis["price_analysis"]["max_price"] == 49.99
|
||||
assert analysis["rating_analysis"]["average_rating"] == 4.3
|
||||
|
||||
@patch('src.scraper.webdriver')
|
||||
def test_analyze_trends_empty_data(self, mock_webdriver, sample_config):
|
||||
"""Test trend analysis with empty data."""
|
||||
scraper = MarketTrendsScraper(sample_config)
|
||||
|
||||
analysis = scraper.analyze_trends([])
|
||||
|
||||
assert "error" in analysis
|
||||
assert analysis["error"] == "No data available for analysis"
|
||||
|
||||
@patch('src.scraper.webdriver')
|
||||
@patch('builtins.open', new_callable=Mock)
|
||||
def test_save_analysis(self, mock_open, mock_webdriver, sample_config):
|
||||
"""Test saving analysis results."""
|
||||
scraper = MarketTrendsScraper(sample_config)
|
||||
|
||||
analysis = {"total_products": 4, "average_price": 34.99}
|
||||
|
||||
with patch('json.dump') as mock_json_dump:
|
||||
scraper.save_analysis(analysis, "test_analysis.json")
|
||||
mock_json_dump.assert_called_once()
|
||||
|
||||
@patch('src.scraper.webdriver')
|
||||
def test_close(self, mock_webdriver, sample_config):
|
||||
"""Test closing browser and session."""
|
||||
mock_driver = Mock()
|
||||
mock_webdriver.Chrome.return_value = mock_driver
|
||||
|
||||
scraper = MarketTrendsScraper(sample_config)
|
||||
scraper.close()
|
||||
|
||||
mock_driver.quit.assert_called_once()
|
||||
|
||||
@patch('src.scraper.webdriver')
|
||||
def test_context_manager(self, mock_webdriver, sample_config):
|
||||
"""Test using scraper as context manager."""
|
||||
mock_driver = Mock()
|
||||
mock_webdriver.Chrome.return_value = mock_driver
|
||||
|
||||
with MarketTrendsScraper(sample_config) as scraper:
|
||||
assert scraper is not None
|
||||
|
||||
mock_driver.quit.assert_called_once()
|
Reference in New Issue
Block a user