Files
cicd-chaos/config/production.yml
2025-09-13 11:52:42 +03:00

387 lines
9.0 KiB
YAML

# Production Environment Configuration
# Seriously over-engineered production configuration
production:
# Pipeline Configuration
pipeline:
name: "Production Pipeline"
description: "Enterprise-grade production pipeline with maximum over-engineering"
chaos_level: 3 # Reduced chaos for production (but still some!)
roast_intensity: 5 # Less roasting in production
celebration_mode: "standard" # Professional celebrations only
developer_challenge: false # No challenges in production
# Environment Variables
variables:
NODE_ENV: "production"
DEBUG: "false"
LOG_LEVEL: "warn"
CHAOS_ENABLED: "false"
DEVELOPER_MODE: "false"
TESTING_MODE: "false"
# Ultra-Optimized Build Configuration
build:
optimization_level: "aggressive"
cache_strategy: "strategic"
parallel_jobs: 32
debug_symbols: false
profiling: false
memory_limit: "16GB"
timeout: "7200"
minification: true
tree_shaking: true
dead_code_elimination: true
# Comprehensive Testing Configuration
testing:
unit_tests:
enabled: true
framework: "jest"
coverage_threshold: 98
parallel_execution: true
mutation_testing: true
property_based_testing: true
performance_regression: true
integration_tests:
enabled: true
framework: "cypress"
environment: "production-like"
database: "production-clone"
mock_services: false
e2e_tests:
enabled: true
framework: "playwright"
browsers: ["chrome", "firefox", "safari", "edge"]
mobile: true
tablet: true
performance_tests:
enabled: true
tool: "k6"
virtual_users: 10000
duration: "2h"
thresholds:
http_req_duration: ["p(99)<500"]
http_req_failed: ["rate<0.001"]
security_tests:
enabled: true
tools: ["owasp-zap", "burp-suite", "clair"]
comprehensive_scan: true
# Enterprise Security Configuration
security:
sast:
enabled: true
tools: ["sonarqube", "semgrep", "bandit", "checkmarx"]
fail_build: true
quality_gate: 95
dast:
enabled: true
tool: "owasp-zap"
target: "prod-app"
full_scan: true
active_scanning: true
dependency_scanning:
enabled: true
tools: ["snyk", "dependabot", "npm audit", "whiteource"]
license_checking: true
compliance_checking: true
compliance:
enabled: true
frameworks: ["SOC2", "ISO27001", "GDPR", "HIPAA", "PCI-DSS"]
automated_auditing: true
# Enterprise Monitoring and Observability
monitoring:
metrics:
enabled: true
exporter: "prometheus"
granularity: "1s"
retention: "30d"
alerting: true
dashboard: "grafana"
logging:
level: "warn"
format: "json"
output: ["elasticsearch", "s3"]
file_rotation: true
compression: true
indexing: true
search: true
tracing:
enabled: true
service: "jaeger"
sampling_rate: 0.1
baggage: true
adaptive_sampling: true
alerting:
enabled: true
provider: "prometheus-alertmanager"
channels: ["slack", "pagerduty", "email"]
escalation_policy: true
incident_management: true
# Production Deployment Configuration
deployment:
strategy: "blue-green"
auto_rollback: true
canary:
enabled: true
initial_weight: 5
increment: 5
interval: "5m"
metrics: ["error_rate", "response_time", "cpu_usage"]
health_checks:
enabled: true
endpoints: ["/health", "/ready", "/live"]
timeout: "30s"
interval: "10s"
success_threshold: 3
failure_threshold: 2
# High Availability Configuration
high_availability:
enabled: true
min_replicas: 3
max_replicas: 10
auto_scaling:
enabled: true
cpu_threshold: 70
memory_threshold: 80
scale_up_cooldown: "5m"
scale_down_cooldown: "15m"
multi_az: true
multi_region: true
disaster_recovery: true
# Enterprise Database Configuration
database:
primary:
type: "postgresql"
version: "14"
instance_type: "db.r6g.4xlarge"
storage: "1000GB"
iops: 20000
multi_az: true
backup:
enabled: true
retention: "30d"
point_in_time_recovery: true
monitoring:
enhanced: true
performance_insights: true
read_replicas:
count: 3
instance_type: "db.r6g.2xlarge"
redis:
cluster_mode: true
node_type: "cache.r6g.2xlarge"
shards: 3
replicas_per_shard: 2
multi_az: true
# Enterprise Service Configuration
services:
api_gateway:
image: "chaos/api-gateway:production"
replicas: 6
resources:
limits:
memory: "2Gi"
cpu: "2000m"
requests:
memory: "1Gi"
cpu: "1000m"
autoscaling:
min_replicas: 4
max_replicas: 12
target_cpu_utilization: 70
user_service:
image: "chaos/user-service:production"
replicas: 4
database: "users_prod"
cache: "redis_users"
auth_service:
image: "chaos/auth-service:production"
replicas: 4
secrets: ["jwt_secret_prod", "oauth_credentials_prod"]
rate_limiting:
enabled: true
requests_per_minute: 1000
order_service:
image: "chaos/order-service:production"
replicas: 6
database: "orders_prod"
queue: "orders_queue"
payment_service:
image: "chaos/payment-service:production"
replicas: 4
pci_compliance: true
audit_logging: true
notification_service:
image: "chaos/notification-service:production"
replicas: 3
providers: ["email", "sms", "push", "webhook"]
queue: "notifications_queue"
analytics_service:
image: "chaos/analytics-service:production"
replicas: 4
database: "analytics_prod"
data_lake: true
# Enterprise Load Balancing
load_balancer:
type: "application"
scheme: "internet-facing"
ssl_policy: "ELBSecurityPolicy-TLS-1-2-2017-01"
waf:
enabled: true
rules: "OWASP"
access_logs:
enabled: true
s3_bucket: "prod-lb-logs"
retention: "365d"
# Enterprise SSL/TLS Configuration
ssl:
enabled: true
provider: "aws-certificate-manager"
domains:
- "api.example.com"
- "www.example.com"
- "admin.example.com"
hsts: true
certificate_transparency: true
# Enterprise Caching Configuration
cache:
cdn:
enabled: true
provider: "cloudflare"
zone: "example.com"
caching_level: "aggressive"
arl: true
image_optimization: true
application:
provider: "redis"
cluster_mode: true
node_count: 6
shard_count: 3
automatic_failover: true
persistence: true
# Enterprise Feature Flags
feature_flags:
provider: "launchdarkly"
environment: "production"
sdk_key: "$LD_SDK_KEY_PROD"
flags:
new_ui: false
advanced_analytics: true
beta_features: false
performance_optimizations: true
# Enterprise Backup Configuration
backup:
enabled: true
schedule: "0 2 * * *"
retention: "90d"
compression: true
encryption: true
cross_region: true
destinations:
- type: "s3"
bucket: "chaos-backup-prod-us-east-1"
region: "us-east-1"
- type: "s3"
bucket: "chaos-backup-prod-us-west-2"
region: "us-west-2"
- type: "glacier"
vault: "chaos-longterm-backup"
retention: "7y"
# Enterprise Disaster Recovery
disaster_recovery:
enabled: true
rpo: "15m"
rto: "1h"
geo_replication: true
failover:
enabled: true
automatic: true
health_check_interval: "30s"
dns_failover: true
backup_region: "us-west-2"
data_replication: "continuous"
# Enterprise Notification Configuration
notifications:
slack:
enabled: true
webhook: "$SLACK_WEBHOOK_PROD"
channel: "#production-alerts"
notify_on: ["failure", "rollback"]
escalation: true
pagerduty:
enabled: true
service_key: "$PAGERDUTY_KEY_PROD"
urgency: "high"
notify_on: ["critical", "error"]
email:
enabled: true
recipients: ["prod-team@example.com", "sre@example.com"]
notify_on: ["failure", "success", "rollback"]
# Enterprise Security Compliance
compliance:
automated_auditing: true
real_time_monitoring: true
compliance_frameworks:
- "SOC2 Type II"
- "ISO 27001"
- "GDPR"
- "HIPAA"
- "PCI-DSS Level 1"
- "FedRAMP"
# Enterprise Cost Optimization
cost_optimization:
enabled: true
rightsizing: true
scheduled_scaling: true
spot_instances: true
resource_cleanup: true
budget_alerts: true
monthly_budget: "$10000"
# Enterprise Support Configuration
support:
level: "enterprise"
response_time: "15m"
24_7_support: true
dedicated_account_manager: true
technical_account_manager: true
sla: "99.99%"