diff --git a/benchmarks/scripts/analyze.py b/benchmarks/scripts/analyze.py index d303907be..97835e28b 100755 --- a/benchmarks/scripts/analyze.py +++ b/benchmarks/scripts/analyze.py @@ -216,7 +216,6 @@ def coverage(args): def case_variants(pattern, algname, ct_point_name, case_df): title = "{}[{}]:".format(algname, ct_point_name) df = case_df[case_df['variant'].str.contains(pattern, regex=True)].reset_index(drop=True) - num_records = len(df) rt_axes = get_rt_axes(df) rt_axes_values = extract_rt_axes_values(df) @@ -231,13 +230,13 @@ def case_variants(pattern, algname, ct_point_name, case_df): for idx, val in enumerate(vertical_axis_values): vertical_axis_ids[val] = idx - if len(horizontal_axes) > 0: - def extract_horizontal_space(df): - values = [] - for rt_axis in horizontal_axes: - values.append(["{}={}".format(rt_axis, v) for v in df[rt_axis].unique()]) - return list(itertools.product(*values)) + def extract_horizontal_space(df): + values = [] + for rt_axis in horizontal_axes: + values.append(["{}={}".format(rt_axis, v) for v in df[rt_axis].unique()]) + return list(itertools.product(*values)) + if len(horizontal_axes) > 0: idx = 0 horizontal_axis_ids = {} for point in extract_horizontal_space(df): @@ -245,40 +244,54 @@ def extract_horizontal_space(df): idx = idx + 1 num_rows = len(vertical_axis_ids) - num_cols = num_records // num_rows + num_cols = max(1, len(extract_horizontal_space(df))) fig, axes = plt.subplots(nrows=num_rows, ncols=num_cols, gridspec_kw = {'wspace': 0, 'hspace': 0}) - for _, row in df.iterrows(): - description = row['variant'] - data = {description: row['samples'], - 'base': row['base_samples']} - vertical_val = row[vertical_axis_name] + for _, vertical_row_description in df[[vertical_axis_name]].drop_duplicates().iterrows(): + vertical_val = vertical_row_description[vertical_axis_name] vertical_id = vertical_axis_ids[vertical_val] vertical_name = "{}={}".format(vertical_axis_name, vertical_val) - horizontal_id = 0 - - if len(horizontal_axes) > 0: - horizontal_point = [] - for rt_axis in horizontal_axes: - horizontal_point.append("{}={}".format(rt_axis, row[rt_axis])) - horizontal_name = " / ".join(horizontal_point) - horizontal_id = horizontal_axis_ids[horizontal_name] - ax=axes[vertical_id, horizontal_id] - else: - ax=axes[vertical_id] - ax.set_ylabel(vertical_name) - sns.histplot(data, ax=ax, kde=True) + vertical_df = df[df[vertical_axis_name] == vertical_val] - if len(horizontal_axes) > 0: - ax=axes[vertical_id, horizontal_id] - if vertical_id == (num_rows - 1): - ax.set_xlabel(horizontal_name) - if horizontal_id == 0: - ax.set_ylabel(vertical_name) + for _, horizontal_row_description in vertical_df[horizontal_axes].drop_duplicates().iterrows(): + horizontal_df = vertical_df + + for axis in horizontal_axes: + horizontal_df = horizontal_df[horizontal_df[axis] == horizontal_row_description[axis]] + + horizontal_id = 0 + + if len(horizontal_axes) > 0: + horizontal_point = [] + for rt_axis in horizontal_axes: + horizontal_point.append("{}={}".format(rt_axis, horizontal_row_description[rt_axis])) + horizontal_name = " / ".join(horizontal_point) + horizontal_id = horizontal_axis_ids[horizontal_name] + ax=axes[vertical_id, horizontal_id] else: - ax.set_ylabel('') + ax=axes[vertical_id] + ax.set_ylabel(vertical_name) + + data = {} + for _, variant in horizontal_df[['variant']].drop_duplicates().iterrows(): + variant_name = variant['variant'] + if 'base' not in data: + data['base'] = horizontal_df[horizontal_df['variant'] == variant_name].iloc[0]['base_samples'] + + data[variant_name] = horizontal_df[horizontal_df['variant'] == variant_name].iloc[0]['samples'] + + sns.histplot(data=data, ax=ax, kde=True) + + if len(horizontal_axes) > 0: + ax=axes[vertical_id, horizontal_id] + if vertical_id == (num_rows - 1): + ax.set_xlabel(horizontal_name) + if horizontal_id == 0: + ax.set_ylabel(vertical_name) + else: + ax.set_ylabel('') for ax in axes.flat: ax.set_xticklabels([])