benchrust/src/benchmarks.rs

use crate::cli::{Args, WorkloadType};
use crate::stats::{BenchmarkResult, BenchmarkResults};
use anyhow::Result;
use colored::*;
use indicatif::{ProgressBar, ProgressStyle};
use rayon::prelude::*;
use std::time::{Duration, Instant};

pub struct BenchmarkSuite {
    args: Args,
}

impl BenchmarkSuite {
    pub fn new(args: Args) -> Self {
        Self { args }
    }

    pub fn run(&mut self) -> Result<BenchmarkResults> {
        // Validate arguments first
        if let Err(e) = self.args.validate() {
            return Err(anyhow::anyhow!("Invalid arguments: {}", e));
        }

        let mut results = BenchmarkResults::new();
        results.system_info = self.get_system_info();

        if self.args.verbose {
            println!("{}", format!("🔧 System: {} cores, {} threads",
                num_cpus::get(),
                self.args.effective_threads()).bright_blue());
        }

        // Determine which workloads to run
        let workloads = match &self.args.workload {
            WorkloadType::All => vec![
                WorkloadType::MathInt,
                WorkloadType::MathFloat,
                WorkloadType::Memory,
                WorkloadType::Compute,
                WorkloadType::Primes,
                WorkloadType::Matrix,
            ],
            single => vec![single.clone()],
        };

        for workload in workloads {
            if self.args.verbose {
                println!("{}", format!("\n🎯 Running {} benchmark...",
                    format!("{:?}", workload).to_lowercase()).bright_yellow().bold());
            }

            let result = self.run_workload(&workload)?;
            results.add_result(workload, result);
        }

        Ok(results)
    }

    fn run_workload(&self, workload: &WorkloadType) -> Result<BenchmarkResult> {
        let progress = if self.args.verbose {
            let pb = ProgressBar::new((self.args.warmup + self.args.iterations) as u64);
            pb.set_style(
                ProgressStyle::default_bar()
                    .template("{spinner:.green} [{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} ({msg})")
                    .unwrap()
                    .progress_chars("#>-"),
            );
            Some(pb)
        } else {
            None
        };

        // Warm-up phase
        if self.args.verbose {
            println!("🔥 Warming up CPU (because cold silicon is slow silicon)...");
        }

        for i in 0..self.args.warmup {
            if let Some(ref pb) = progress {
                pb.set_message(format!("Warmup {}/{}", i + 1, self.args.warmup));
                pb.inc(1);
            }
            let _ = self.execute_benchmark(workload)?;
        }

        // Actual benchmarking
        let mut times = Vec::new();
        let mut scores = Vec::new();

        for i in 0..self.args.iterations {
            if let Some(ref pb) = progress {
                pb.set_message(format!("Iteration {}/{}", i + 1, self.args.iterations));
                pb.inc(1);
            }

            let (duration, score) = self.execute_benchmark(workload)?;
            times.push(duration);
            scores.push(score);
        }

        if let Some(ref pb) = progress {
            pb.finish_with_message("✅ Complete");
        }

        Ok(BenchmarkResult::new(times, scores, self.args.effective_cores()))
    }

    fn execute_benchmark(&self, workload: &WorkloadType) -> Result<(Duration, f64)> {
        let start = Instant::now();

        let score = if self.args.effective_cores() == 1 {
            self.run_single_core_benchmark(workload)?
        } else {
            self.run_multi_core_benchmark(workload)?
        };

        let duration = start.elapsed();
        Ok((duration, score))
    }

    fn run_single_core_benchmark(&self, workload: &WorkloadType) -> Result<f64> {
        match workload {
            WorkloadType::MathInt => Ok(self.math_int_benchmark(1_000_000)),
            WorkloadType::MathFloat => Ok(self.math_float_benchmark(1_000_000)),
            WorkloadType::Memory => Ok(self.memory_benchmark(10_000_000)),
            WorkloadType::Compute => Ok(self.compute_benchmark(50_000)),
            WorkloadType::Primes => Ok(self.primes_benchmark(100_000) as f64),
            WorkloadType::Matrix => Ok(self.matrix_benchmark(256)),
            WorkloadType::All => unreachable!("All is handled at a higher level"),
        }
    }

    fn run_multi_core_benchmark(&self, workload: &WorkloadType) -> Result<f64> {
        let cores = self.args.effective_cores();
        let chunk_size = match workload {
            WorkloadType::MathInt => 1_000_000 / cores,
            WorkloadType::MathFloat => 1_000_000 / cores,
            WorkloadType::Memory => 10_000_000 / cores,
            WorkloadType::Compute => 50_000 / cores,
            WorkloadType::Primes => 100_000 / cores,
            WorkloadType::Matrix => 64, // Fixed matrix size per thread
            WorkloadType::All => unreachable!(),
        };

        let results: Vec<f64> = (0..cores)
            .into_par_iter()
            .map(|_| match workload {
                WorkloadType::MathInt => self.math_int_benchmark(chunk_size),
                WorkloadType::MathFloat => self.math_float_benchmark(chunk_size),
                WorkloadType::Memory => self.memory_benchmark(chunk_size),
                WorkloadType::Compute => self.compute_benchmark(chunk_size),
                WorkloadType::Primes => self.primes_benchmark(chunk_size) as f64,
                WorkloadType::Matrix => self.matrix_benchmark(chunk_size),
                WorkloadType::All => unreachable!(),
            })
            .collect();

        Ok(results.iter().sum())
    }

    // Benchmark implementations
    fn math_int_benchmark(&self, operations: usize) -> f64 {
        let mut result = 1i64;
        for i in 1..=operations {
            result = result.wrapping_mul(i as i64).wrapping_add(i as i64);
        }
        result as f64
    }

    fn math_float_benchmark(&self, operations: usize) -> f64 {
        let mut result = 1.0f64;
        for i in 1..=operations {
            result = result.sin().cos() + (i as f64).sqrt();
        }
        result
    }

    fn memory_benchmark(&self, size: usize) -> f64 {
        let mut vec: Vec<u64> = (0..size).map(|i| i as u64).collect();

        // Random access pattern to stress memory subsystem
        for i in 0..size {
            let idx = (i * 7919) % size; // Prime number for pseudo-random access
            vec[idx] = vec[idx].wrapping_mul(2).wrapping_add(1);
        }

        vec.iter().sum::<u64>() as f64
    }

    fn compute_benchmark(&self, iterations: usize) -> f64 {
        // Compute-intensive workload: iterative calculation
        let mut x = 2.0f64;
        for _ in 0..iterations {
            x = (x * x + 1.0) / (x + 1.0); // Iterative function
        }
        x
    }

    fn primes_benchmark(&self, limit: usize) -> usize {
        // Sieve of Eratosthenes
        let mut is_prime = vec![true; limit + 1];
        is_prime[0] = false;
        if limit > 0 {
            is_prime[1] = false;
        }

        let mut p = 2;
        while p * p <= limit {
            if is_prime[p] {
                let mut i = p * p;
                while i <= limit {
                    is_prime[i] = false;
                    i += p;
                }
            }
            p += 1;
        }

        is_prime.iter().filter(|&&x| x).count()
    }

    fn matrix_benchmark(&self, size: usize) -> f64 {
        // Matrix multiplication
        let matrix_a: Vec<Vec<f64>> = (0..size)
            .map(|i| (0..size).map(|j| (i + j) as f64).collect())
            .collect();

        let matrix_b: Vec<Vec<f64>> = (0..size)
            .map(|i| (0..size).map(|j| (i * j + 1) as f64).collect())
            .collect();

        let mut result = vec![vec![0.0; size]; size];

        for i in 0..size {
            for j in 0..size {
                for k in 0..size {
                    result[i][j] += matrix_a[i][k] * matrix_b[k][j];
                }
            }
        }

        result.iter().flatten().sum()
    }

    fn get_system_info(&self) -> String {
        format!(
            "CPU Cores: {}, Threads: {}, Rust: {}",
            num_cpus::get(),
            self.args.effective_threads(),
            "1.89.0" // Fixed version since env!() doesn't work here
        )
    }
}