Skip to content

Commit

Permalink
QRDE-HD
Browse files Browse the repository at this point in the history
  • Loading branch information
gevtushenko committed May 4, 2023
1 parent 9686c2c commit 23ea5b9
Showing 1 changed file with 48 additions and 2 deletions.
50 changes: 48 additions & 2 deletions benchmarks/scripts/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@

import re
import cub
import math
import argparse
import itertools
import functools
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import mannwhitneyu
from scipy.stats.mstats import hdquantiles


def get_bench_columns():
Expand Down Expand Up @@ -220,6 +223,49 @@ def coverage(args):
iterate_case_dfs(args, case_coverage)


def qrde_hd(samples, precision=0.01):
"""
Computes quantile-respectful density estimation based on the Harrell-Davis
quantile estimator. The implementation is based on the following post:
https://aakinshin.net/posts/qrde-hd by Andrey Akinshin
"""
min_sample, max_sample = min(samples), max(samples)
num_quantiles = math.ceil(1.0 / precision)
quantiles = np.linspace(precision, 1 - precision, num_quantiles - 1)
hd_quantiles = [min_sample] + list(hdquantiles(samples, quantiles)) + [max_sample]
width = [hd_quantiles[idx + 1] - hd_quantiles[idx] for idx in range(num_quantiles)]
p = 1.0 / num_quantiles
height = [1.0 / (p * w) for w in width]
return width, height


def hd_displot(samples, label, ax, precision=0.01):
width, height = qrde_hd(samples, precision)

min_sample, max_sample = min(samples), max(samples)

xs = [min_sample]
ys = [0]

x = min(samples)
for idx in range(len(width)):
xs.append(x + width[idx] / 2)
ys.append(height[idx])
x += width[idx]

xs = xs + [max_sample]
ys = ys + [0]

ax.plot(xs, ys, label=label)
ax.legend()
ax.fill_between(xs, ys, 0, alpha=0.4)


def displot(data, ax, precision=0.01):
for variant in data:
hd_displot(data[variant], variant, ax, precision)


def case_variants(pattern, algname, ct_point_name, case_df):
title = "{}[{}]:".format(algname, ct_point_name)
df = case_df[case_df['variant'].str.contains(pattern, regex=True)].reset_index(drop=True)
Expand Down Expand Up @@ -286,10 +332,10 @@ def extract_horizontal_space(df):
variant_name = variant['variant']
if 'base' not in data:
data['base'] = horizontal_df[horizontal_df['variant'] == variant_name].iloc[0]['base_samples']

data[variant_name] = horizontal_df[horizontal_df['variant'] == variant_name].iloc[0]['samples']

sns.histplot(data=data, ax=ax, kde=True)
# sns.histplot(data=data, ax=ax, kde=True)
displot(data, ax, precision=0.03)

if len(horizontal_axes) > 0:
ax=axes[vertical_id, horizontal_id]
Expand Down

0 comments on commit 23ea5b9

Please sign in to comment.