diff --git a/src/main.rs b/src/main.rs index 63b4f06..dd6438c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -19,8 +19,8 @@ mod engine; mod game; mod openings; mod pgn_writer; -mod simulation; mod sprt; +mod stats; #[cfg(test)] mod tests; mod tournament; diff --git a/src/simulation/mod.rs b/src/simulation/mod.rs deleted file mode 100644 index b57e771..0000000 --- a/src/simulation/mod.rs +++ /dev/null @@ -1,120 +0,0 @@ -use std::{cmp::Ordering, fmt::Display}; - -use bpci::{Interval, NSuccessesSample, WilsonScore}; -use rand::{Rng, SeedableRng}; -use rand_distr::{Distribution, Normal}; - -#[derive(Clone, Copy, PartialEq, Eq, Debug, Default)] -pub struct MatchScore { - pub wins: u64, - pub draws: u64, - pub losses: u64, -} - -impl MatchScore { - pub fn score(self) -> f32 { - let num_games = self.num_games() as f32; - (self.wins as f32 + (self.draws as f32 / 2.0)) / num_games - } - - pub fn num_games(self) -> u64 { - self.wins + self.draws + self.losses - } -} - -impl Display for MatchScore { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "+{}-{}={}", self.wins, self.losses, self.draws) - } -} - -pub struct WilsonDistribution { - normal_dist: Normal, - wilson_sampler: NSuccessesSample, -} - -impl WilsonDistribution { - pub fn new(n_games: u32, n_draws: u32) -> Self { - let normal_dist = Normal::new(0.0, 1.0).unwrap(); - let wilson_sampler: NSuccessesSample = - NSuccessesSample::new(n_games, n_draws).unwrap(); - - WilsonDistribution { - normal_dist, - wilson_sampler, - } - } -} - -impl Distribution for WilsonDistribution { - fn sample(&self, rng: &mut R) -> f64 { - let z: f64 = self.normal_dist.sample(rng); - let interval = self.wilson_sampler.wilson_score_with_cc(z.abs()); - if z.is_sign_positive() { - interval.upper() - } else { - interval.lower() - } - } -} - -pub struct FullWinstonSimulation { - results: Vec, -} - -const NUM_SIMULATIONS: u64 = 100000; - -impl FullWinstonSimulation { - pub fn run_simulation(score: MatchScore) -> Self { - // assert_ne!(score.num_games(), 0); - let mut results = vec![]; - - let mut rng = rand::rngs::SmallRng::seed_from_u64(0); - - let wilson_sampler_draw: WilsonDistribution = - WilsonDistribution::new(score.num_games() as u32, score.draws as u32); - let wilson_sampler_win: WilsonDistribution = - WilsonDistribution::new((score.wins + score.losses) as u32, score.wins as u32); - - for _ in 0..=NUM_SIMULATIONS { - let draw_p = wilson_sampler_draw.sample(&mut rng) as f32; - let win_p = if (score.wins + score.losses) != 0 { - wilson_sampler_win.sample(&mut rng) as f32 - } else { - rng.gen() - }; - - let num_draws = draw_p * score.num_games() as f32; - let num_wins = win_p * (score.num_games() as f32 - num_draws); - - results.push((0.5 * num_draws + num_wins) / score.num_games() as f32); - } - results.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal)); - - FullWinstonSimulation { results } - } - - pub fn result_for_p(&self, p: f32) -> f32 { - self.results[(p * self.results.len() as f32) as usize] - } -} - -pub fn to_elo(p: f32) -> Option { - let elo = -400.0 * ((1.0 - p) / p).log10(); - if elo.is_finite() { - Some(elo as i32) - } else { - None - } -} -pub fn to_elo_string(p: f32) -> String { - if p <= 0.0 { - "-INF".to_string() - } else if p >= 1.0 { - "+INF".to_string() - } else if p.is_nan() { - "N/A".to_string() - } else { - format!("{:+}", to_elo(p).unwrap()) - } -} diff --git a/src/sprt.rs b/src/sprt.rs index d3218c3..ed8693c 100644 --- a/src/sprt.rs +++ b/src/sprt.rs @@ -1,16 +1,10 @@ -use std::convert::TryInto; +use crate::stats::{PentanomialResult, ResultExt}; +use std::{convert::TryInto, num::FpCategory}; // This is an implementation of GSPRT under a pentanomial model. - -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub struct PentanomialResult { - pub ww: usize, - pub wd: usize, - pub wl: usize, - pub dd: usize, - pub dl: usize, - pub ll: usize, -} +// +// References: +// [1] Michel Van den Bergh, Comments on Normalized Elo, https://www.cantate.be/Fishtest/normalized_elo_practical.pdf #[derive(Clone, Copy, Debug, PartialEq)] pub struct SprtParameters { @@ -47,51 +41,157 @@ impl SprtParameters { (self.elo0, self.elo1) } - // Approximate formula for the log-likelihood ratio for the given pentanomial result. - // See section 4.2 of https://archive.org/details/fishtest_mathematics/normalized_elo_practical/ - // Many thanks to Michel Van den Bergh. pub fn llr(self: SprtParameters, penta: PentanomialResult) -> f64 { - let (n, mean, variance) = penta.to_mean_and_variance(); - let sigma = (2.0 * variance).sqrt(); - let t = (mean - 0.5) / sigma; - let a = 1.0 + (t - self.t0).powf(2.0); - let b = 1.0 + (t - self.t1).powf(2.0); - n * f64::ln(a / b) + let count = penta.count() as f64; + let pdf: [f64; 5] = penta.probability_distribution().try_into().unwrap(); + let score: [f64; 5] = PentanomialResult::scores_map().try_into().unwrap(); + llr( + count, + pdf, + score, + self.t0 * f64::sqrt(2.0), + self.t1 * f64::sqrt(2.0), + ) } } -impl PentanomialResult { - pub fn to_pdf(self: PentanomialResult) -> (f64, [f64; 5]) { - let penta = [ - self.ll as f64, - self.dl as f64, - self.dd as f64 + self.wl as f64, - self.wd as f64, - self.ww as f64, - ]; - let zeros = penta.iter().filter(|&x| *x == 0.0).count(); - let regularisation = if zeros > 0 { 2.0 / zeros as f64 } else { 0.0 }; - let n: f64 = penta.iter().sum(); - ( - n, - penta - .iter() - .map(|x| (x + regularisation) / n) - .collect::>() - .try_into() - .unwrap(), - ) - } +/// Compute log-likelihood ratio for t = t0 versus t = t1. +fn llr(count: f64, pdf: [f64; N], score: [f64; N], t0: f64, t1: f64) -> f64 { + let p0 = mle(pdf, score, 0.5, t0); + let p1 = mle(pdf, score, 0.5, t1); + count * mean(std::array::from_fn(|i| p1[i].ln() - p0[i].ln()), pdf) +} + +/// Compute the maximum likelihood estimate for a discrete +/// probability distribution that has t = (mu - mu_ref) / sigma, +/// given `self` is an empirical distribution. +/// +/// See section 4.1 of [1] for details. +fn mle(pdf: [f64; N], score: [f64; N], mu_ref: f64, t_star: f64) -> [f64; N] { + const THETA_EPSILON: f64 = 1e-7; + const MLE_EPSILON: f64 = 1e-4; + + // This is an iterative method, so we need to start with + // an initial value. As suggested in [1], we start with a + // uniform distribution. + let mut p = [1.0 / N as f64; N]; - pub fn to_mean_and_variance(self: PentanomialResult) -> (f64, f64, f64) { - let scores = [0.0, 0.25, 0.5, 0.75, 1.0]; - let (n, pdf) = self.to_pdf(); - let mean: f64 = pdf.iter().zip(scores).map(|(p, s)| p * s).sum(); - let variance: f64 = pdf + // Have an upper limit for iteration. + for _ in 0..25 { + // Store our current estimate away to detect convergence. + let prev_p = p; + + // Calculate phi. + let (mu, variance) = mean_and_variance(score, p); + let phi: [f64; N] = std::array::from_fn(|i| { + let a_i = score[i]; + let sigma = variance.sqrt(); + a_i - mu_ref - 0.5 * t_star * sigma * (1.0 + ((a_i - mu) / sigma).powi(2)) + }); + + // We need to find a subset of the possible solutions for theta, + // so we need to calculate our constraints for theta. + let u = phi .iter() - .zip(scores) - .map(|(p, s)| p * (s - mean).powf(2.0)) - .sum(); - (n, mean, variance) + .min_by(|a, b| a.partial_cmp(b).expect("unexpected NaN")) + .unwrap(); + let v = phi + .iter() + .max_by(|a, b| a.partial_cmp(b).expect("unexpected NaN")) + .unwrap(); + let min_theta = -1.0 / v; + let max_theta = -1.0 / u; + + // Solve equation 4.9 in [1] for theta. + let theta = itp( + |x: f64| (0..N).map(|i| pdf[i] * phi[i] / (1.0 + x * phi[i])).sum(), + (min_theta, max_theta), + (f64::INFINITY, -f64::INFINITY), + 0.1, + 2.0, + 0.99, + THETA_EPSILON, + ); + + // Calculate new estimate + p = std::array::from_fn(|i| pdf[i] / (1.0 + theta * phi[i])); + + // Good enough? + if (0..N).all(|i| (prev_p[i] - p[i]).abs() < MLE_EPSILON) { + break; + } + } + + p +} + +fn mean(x: [f64; N], p: [f64; N]) -> f64 { + (0..N).map(|i| p[i] * x[i]).sum() +} + +fn mean_and_variance(x: [f64; N], p: [f64; N]) -> (f64, f64) { + let mu = mean(x, p); + (mu, (0..N).map(|i| p[i] * (x[i] - mu).powi(2)).sum()) +} + +// I. F. D. Oliveira and R. H. C. Takahashi. 2020. An Enhancement of the Bisection Method Average Performance +// Preserving Minmax Optimality. ACM Trans. Math. Softw. 47, 1, Article 5 (March 2021). +// https://doi.org/10.1145/3423597 +fn itp( + f: F, + (mut a, mut b): (f64, f64), + (mut f_a, mut f_b): (f64, f64), + k_1: f64, + k_2: f64, + n_0: f64, + epsilon: f64, +) -> f64 +where + F: Fn(f64) -> f64, +{ + if f_a > 0.0 { + (a, b) = (b, a); + (f_a, f_b) = (f_b, f_a); + } + assert!(f_a < 0.0 && 0.0 < f_b); + + let n_half = ((b - a).abs() / (2.0 * epsilon)).log2().ceil(); + let n_max = n_half + n_0; + let mut i = 0; + while (b - a).abs() > 2.0 * epsilon { + let x_half = (a + b) / 2.0; + let r = epsilon * f64::powf(2.0, n_max - i as f64) - (b - a) / 2.0; + let delta = k_1 * f64::powf(b - a, k_2); + + let x_f = (f_b * a - f_a * b) / (f_b - f_a); + + let sigma = (x_half - x_f) / (x_half - x_f).abs(); + let x_t = if delta <= (x_half - x_f).abs() { + x_f + sigma * delta + } else { + x_half + }; + + let x_itp = if (x_t - x_half).abs() <= r { + x_t + } else { + x_half - sigma * r + }; + + let f_itp = f(x_itp); + if f_itp.classify() == FpCategory::Zero { + a = x_itp; + b = x_itp; + } else if f_itp.is_sign_negative() { + a = x_itp; + f_a = f_itp; + } else { + b = x_itp; + f_b = f_itp; + } + + i += 1; } + + (a + b) / 2.0 } diff --git a/src/stats.rs b/src/stats.rs new file mode 100644 index 0000000..7ed988b --- /dev/null +++ b/src/stats.rs @@ -0,0 +1,155 @@ +// 97.5th percentile point of the normal distribution. +// This is used in computing 95% confidence intervals. +const NORM_PPF_0_975: f64 = 1.959963984540054; + +#[derive(Copy, Clone, Debug, Default)] +pub struct TrinomialResult { + pub w: u64, + pub d: u64, + pub l: u64, +} + +#[derive(Copy, Clone, Debug, Default)] +pub struct PentanomialResult { + pub ll: u64, + pub dl: u64, + pub dd: u64, + pub wl: u64, + pub wd: u64, + pub ww: u64, +} + +pub trait ResultExt { + fn scores_map() -> Vec; + + fn to_vec(&self) -> Vec; + + fn count(&self) -> u64 { + self.to_vec().iter().sum() + } + + fn probability_distribution(&self) -> Vec { + let v = self.to_vec(); + let n = self.count() as f64; + let zeros = v.iter().filter(|&x| *x == 0).count(); + let regularisation = if zeros > 0 { 0.001 / zeros as f64 } else { 0.0 }; + v.iter() + .map(|&x| (x as f64 + regularisation) / (n + regularisation * v.len() as f64)) + .collect() + } + + fn score(&self) -> f64 { + let pdf = self.probability_distribution(); + pdf.iter().zip(Self::scores_map()).map(|(p, s)| p * s).sum() + } + + fn variance(&self) -> f64 { + let pdf = self.probability_distribution(); + let mean = self.score(); + let variance: f64 = pdf + .iter() + .zip(Self::scores_map()) + .map(|(p, s)| p * (s - mean).powf(2.0)) + .sum(); + variance + } + + // 95% confidence interval for score + fn score_confidence_interval(&self) -> (f64, f64, f64) { + let count = self.count() as f64; + let score = self.score(); + let variance = self.variance(); + let per_count_variance = variance / count; + let score_lower = score - NORM_PPF_0_975 * per_count_variance.sqrt(); + let score_upper = score + NORM_PPF_0_975 * per_count_variance.sqrt(); + + (score_lower, score, score_upper) + } + + // 95% confidence interval for Elo + fn logistic_elo(&self) -> (f64, f64, f64) { + let (score_lower, score, score_upper) = self.score_confidence_interval(); + + let elo_lower = logistic_elo(score_lower); + let elo = logistic_elo(score); + let elo_upper = logistic_elo(score_upper); + + (elo_lower, elo, elo_upper) + } + + // 95% confidence interval for nElo + fn normalized_elo(&self) -> (f64, f64, f64) { + let variance = self.variance(); + let (score_lower, score, score_upper) = self.score_confidence_interval(); + + let elo_lower = normalized_elo(score_lower, variance); + let elo = normalized_elo(score, variance); + let elo_upper = normalized_elo(score_upper, variance); + + (elo_lower, elo, elo_upper) + } +} + +impl ResultExt for TrinomialResult { + fn scores_map() -> Vec { + vec![0.0, 0.5, 1.0] + } + + fn to_vec(&self) -> Vec { + vec![self.l, self.d, self.w] + } +} + +impl ResultExt for PentanomialResult { + fn scores_map() -> Vec { + vec![0.0, 0.25, 0.5, 0.75, 1.0] + } + + fn to_vec(&self) -> Vec { + vec![self.ll, self.dl, self.dd + self.wl, self.wd, self.ww] + } +} + +impl std::fmt::Display for TrinomialResult { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "+{}-{}={}", self.w, self.l, self.d) + } +} + +impl std::fmt::Display for PentanomialResult { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "[{}, {}, {}, {}, {}]", + self.ll, + self.dl, + self.dd + self.wl, + self.wd, + self.ww + ) + } +} + +pub fn elo_to_string(p: f64) -> String { + if p.is_infinite() && p.is_sign_negative() { + "-INF".to_string() + } else if p.is_infinite() && p.is_sign_positive() { + "+INF".to_string() + } else if p.is_nan() { + "N/A".to_string() + } else { + format!("{:+.2}", p) + } +} + +fn logistic_elo(score: f64) -> f64 { + let score = score.clamp(1e-6, 1.0 - 1e-6); + -400.0 * (1.0 / score - 1.0).log10() +} + +// References: +// - Michel Van den Bergh. Normalized Elo, https://cantate.be/Fishtest/normalized_elo.pdf +// - Michel Van den Bergh. Comments On Normalized Elo, https://cantate.be/Fishtest/normalized_elo_practical.pdf +fn normalized_elo(score: f64, variance: f64) -> f64 { + (score - 0.5) / (2.0 * variance).sqrt() * (800.0 / f64::ln(10.0)) +} diff --git a/src/tests/mod.rs b/src/tests/mod.rs index 01e0599..a30d16a 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -1,6 +1,5 @@ mod cli_tests; -mod simulation_tests; mod sprt_tests; -mod uci_tests; - +mod stats_tests; mod tournament_tests; +mod uci_tests; diff --git a/src/tests/simulation_tests.rs b/src/tests/simulation_tests.rs deleted file mode 100644 index 479cd40..0000000 --- a/src/tests/simulation_tests.rs +++ /dev/null @@ -1,69 +0,0 @@ -use crate::simulation::{FullWinstonSimulation, MatchScore}; - -#[test] -fn single_win_test() { - let score = MatchScore { - wins: 1, - draws: 0, - losses: 0, - }; - let simulation_result = FullWinstonSimulation::run_simulation(score); - let lower_bound = simulation_result.result_for_p(0.025); - // If you run 40 1-game tournaments between engines with 10% win chance for the weaker engine, - // 4 tournaments will result in a win for the weaker engines - assert!( - lower_bound > 0.01 && lower_bound < 0.1, - "Lower bound of true winning probability was {:.1}% after 1 winning game", - 100.0 * lower_bound - ); -} - -#[test] -fn ten_wins_test() { - let score = MatchScore { - wins: 10, - draws: 0, - losses: 0, - }; - let simulation_result = FullWinstonSimulation::run_simulation(score); - let lower_bound = simulation_result.result_for_p(0.025); - // The binomial probability for 10 trials with 10 successes with a 69% probability, is roughly 2.5% - assert!( - lower_bound > 0.62 && lower_bound < 0.76, - "Lower bound of true winning probability was {:.1}% after 10 winning games", - 100.0 * lower_bound - ); -} - -#[test] -fn expected_score_near_the_center_of_the_distribution_test() { - for wins in 0..10 { - for draws in 0..10 { - for losses in 0..10 { - if draws == 0 && (wins == 0 || losses == 0) { - continue; - } - let score = MatchScore { - wins, - draws, - losses, - }; - let simulation_result = FullWinstonSimulation::run_simulation(score); - let expected_score = score.score(); - - let low_bound = simulation_result.result_for_p(0.2); - let high_bound = simulation_result.result_for_p(0.8); - - assert!( - low_bound <= expected_score, - "Score {:?} had expected score {:.1}%, 20th percentile {:.1}%, 80th percentile {:.1}%", - score, - expected_score * 100.0, - low_bound * 100.0, - high_bound * 100.0 - ); - assert!(high_bound >= expected_score); - } - } - } -} diff --git a/src/tests/sprt_tests.rs b/src/tests/sprt_tests.rs index 2eb5f20..d87df9f 100644 --- a/src/tests/sprt_tests.rs +++ b/src/tests/sprt_tests.rs @@ -1,4 +1,4 @@ -use crate::sprt::{PentanomialResult, SprtParameters}; +use crate::{sprt::SprtParameters, stats::PentanomialResult}; #[test] fn sprt_threshold_test() { diff --git a/src/tests/stats_tests.rs b/src/tests/stats_tests.rs new file mode 100644 index 0000000..db8c9bb --- /dev/null +++ b/src/tests/stats_tests.rs @@ -0,0 +1,57 @@ +use crate::stats::{PentanomialResult, ResultExt, TrinomialResult}; + +#[test] +fn penta_logistic_elo() { + let examples = [ + (485, 1923, 2942, 1937, 594, (1.21, 5.11, 9.02)), + (261, 739, 2683, 737, 253, (-5.00, -0.67, 3.66)), + (63, 252, 385, 250, 74, (-7.38, 3.39, 14.17)), + (527, 1007, 1932, 933, 511, (-9.16, -3.75, 1.66)), + (175, 305, 694, 291, 157, (-14.57, -5.36, 3.85)), + ]; + for (ll, dl, wl, wd, ww, (expected_lower, expected_mean, expected_upper)) in examples { + let penta = PentanomialResult { + ll, + dl, + wl, + wd, + ww, + dd: 0, + }; + let (lower, mean, upper) = penta.logistic_elo(); + assert!(lower.is_finite()); + assert!(mean.is_finite()); + assert!(upper.is_finite()); + assert!(f64::abs(lower - expected_lower) <= 0.01); + assert!(f64::abs(mean - expected_mean) <= 0.01); + assert!(f64::abs(upper - expected_upper) <= 0.01); + } +} + +#[test] +fn penta_normalized_elo() { + let examples = [ + (485, 1923, 2942, 1937, 594, (1.68, 7.10, 12.53)), + (261, 739, 2683, 737, 253, (-8.13, -1.09, 5.96)), + (63, 252, 385, 250, 74, (-10.31, 4.74, 19.79)), + (527, 1007, 1932, 933, 511, (-11.63, -4.76, 2.11)), + (175, 305, 694, 291, 157, (-18.91, -6.96, 5.00)), + ]; + for (ll, dl, wl, wd, ww, (expected_lower, expected_mean, expected_upper)) in examples { + let penta = PentanomialResult { + ll, + dl, + wl, + wd, + ww, + dd: 0, + }; + let (lower, mean, upper) = penta.normalized_elo(); + assert!(lower.is_finite()); + assert!(mean.is_finite()); + assert!(upper.is_finite()); + assert!(f64::abs(lower - expected_lower) <= 0.01); + assert!(f64::abs(mean - expected_mean) <= 0.01); + assert!(f64::abs(upper - expected_upper) <= 0.01); + } +} diff --git a/src/tournament.rs b/src/tournament.rs index f4139a3..3e141ad 100644 --- a/src/tournament.rs +++ b/src/tournament.rs @@ -1,10 +1,10 @@ use crate::engine::{Engine, EngineBuilder}; +use crate::exit_with_error; use crate::game::ScheduledGame; use crate::openings::Opening; use crate::pgn_writer::PgnWriter; -use crate::simulation::MatchScore; -use crate::sprt::{PentanomialResult, SprtParameters}; -use crate::{exit_with_error, simulation}; +use crate::sprt::SprtParameters; +use crate::stats::{elo_to_string, PentanomialResult, ResultExt, TrinomialResult}; use board_game_traits::GameResult::*; use pgn_traits::PgnPosition; use std::num::NonZeroUsize; @@ -423,40 +423,45 @@ where } TournamentType::BookTest(_) => (), TournamentType::Sprt => { + let wdl = TrinomialResult { + w: engine_wins[1][0], + d: engine_draws[1][0], + l: engine_losses[1][0], + }; + let penta = Self::sprt_penta_stats(&finished_games); + println!("Base engine : {}", engine_names[0]); println!("Under test : {}", engine_names[1]); - let score = MatchScore { - wins: engine_wins[1][0], - draws: engine_draws[1][0], - losses: engine_losses[1][0], - }; - let full_simulation = simulation::FullWinstonSimulation::run_simulation(score); - let lower = full_simulation.result_for_p(0.025); - let expected = score.score(); - let upper = full_simulation.result_for_p(0.975); - let lower_elo = simulation::to_elo_string(lower); - let expected_elo = simulation::to_elo_string(expected); - let upper_elo = simulation::to_elo_string(upper); + let (tri_lower_elo, tri_expected_elo, tri_upper_elo) = wdl.logistic_elo(); println!( - "Elo : {} [{}, {}] (95%)", - expected_elo, lower_elo, upper_elo + "Elo (WDL) : {} [{}, {}] (95%)", + elo_to_string(tri_expected_elo), + elo_to_string(tri_lower_elo), + elo_to_string(tri_upper_elo) ); + + let (penta_lower_elo, penta_expected_elo, penta_upper_elo) = penta.logistic_elo(); println!( - "WDL : W: {}, D: {}, L: {}", - score.wins, score.draws, score.losses + "Elo (Penta) : {} [{}, {}] (95%)", + elo_to_string(penta_expected_elo), + elo_to_string(penta_lower_elo), + elo_to_string(penta_upper_elo) ); - let penta = Self::sprt_penta_stats(&finished_games); + let (penta_lower_nelo, penta_expected_nelo, penta_upper_nelo) = + penta.normalized_elo(); println!( - "Penta(0-2) : {}, {}, {}, {}, {}", - penta.ll, - penta.dl, - penta.dd + penta.wl, - penta.wd, - penta.ww + "nElo (Penta): {} [{}, {}] (95%)", + elo_to_string(penta_expected_nelo), + elo_to_string(penta_lower_nelo), + elo_to_string(penta_upper_nelo) ); + println!("WDL : W: {}, D: {}, L: {}", wdl.w, wdl.d, wdl.l); + + println!("Penta(0-2) : {}", penta); + if let Some(sprt) = self.sprt { let (elo0, elo1) = sprt.elo_bounds(); let (lower_bound, upper_bound) = sprt.llr_bounds(); @@ -541,30 +546,22 @@ fn print_head_to_head_score( engine1_id: usize, engine2_id: usize, ) { - let score = MatchScore { - wins: engine_wins[engine1_id][engine2_id], - draws: engine_draws[engine1_id][engine2_id], - losses: engine_wins[engine2_id][engine1_id], + let wdl = TrinomialResult { + w: engine_wins[engine1_id][engine2_id], + d: engine_draws[engine1_id][engine2_id], + l: engine_wins[engine2_id][engine1_id], }; - let full_simulation = simulation::FullWinstonSimulation::run_simulation(score); - - let lower = full_simulation.result_for_p(0.025); - let expected = score.score(); - let upper = full_simulation.result_for_p(0.975); - - let lower_elo = simulation::to_elo_string(lower); - let expected_elo = simulation::to_elo_string(expected); - let upper_elo = simulation::to_elo_string(upper); + let (lower_elo, expected_elo, upper_elo) = wdl.logistic_elo(); println!( "{} vs {}: {}, {} elo [{}, {}] (95% confidence).", engine_names[engine1_id], engine_names[engine2_id], - score, - expected_elo, - lower_elo, - upper_elo, + wdl, + elo_to_string(expected_elo), + elo_to_string(lower_elo), + elo_to_string(upper_elo), ); }