-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwandb_csv_to_table.py
148 lines (125 loc) · 4.76 KB
/
wandb_csv_to_table.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
from enum import Enum
import math
from pathlib import Path
from typing import Final
import numpy as np
import pandas as pd
import typer
__all__ = ["Metrics", "METRICS_COL_NAMES", "METRICS_RENAMES"]
class MeanStd:
def __init__(self, round_to: int):
self.round_to = round_to
def __call__(self, data):
mean = np.mean(data)
if math.isnan(mean):
return "N/A"
std = np.std(data)
round_level = self.round_to if std > 2 * pow(10, -self.round_to) else self.round_to + 1
return f"{round(mean, round_level)} $\\pm$ {round(std, round_level)}"
class MedianIQR:
def __init__(self, round_to: int):
self.round_to = round_to
def __call__(self, data: np.ndarray):
q1, median, q3 = np.quantile(data, [0.25, 0.5, 0.75])
iqr = q3 - q1
if math.isnan(median):
return "N/A"
# round_level = self.round_to if std > 2 * pow(10, -self.round_to) else self.round_to + 1
return f"{round(median, self.round_to)} $\\pm$ {round(iqr, self.round_to)}"
class Aggregation(Enum):
mean = "mean"
median = "median"
AGGREGATION_LOOKUP: Final = {Aggregation.mean: MeanStd, Aggregation.median: MedianIQR}
def generate_table(
df: pd.DataFrame,
base_cols: list[str],
metrics: list[str],
aggregation: Aggregation,
round_to: int,
metrics_renames: dict[str, str] | None = None,
) -> pd.DataFrame:
AggClass = AGGREGATION_LOOKUP[aggregation]
col_renames = {"data": "type", "method": "classifier"}
if metrics_renames is not None:
col_renames.update(metrics_renames)
df = df[base_cols + metrics]
df = df.rename(columns=col_renames, inplace=False)
return (
df.groupby(base_cols, sort=False)
.agg(AggClass(round_to=round_to))
.reset_index(level=base_cols, inplace=False)
)
class Metrics(Enum):
acc = "acc"
# ratios
ar = "ar"
tpr = "tpr"
tnr = "tnr"
# cluster metrics
clust_acc = "clust_acc"
clust_ari = "clust_ari"
clust_nmi = "clust_nmi"
METRICS_COL_NAMES: Final = {
Metrics.acc: lambda s, cl: f"Accuracy ({cl})",
Metrics.ar: lambda s, cl: f"prob_pos_{s}_0.0÷{s}_1.0 ({cl})",
Metrics.tpr: lambda s, cl: f"TPR_{s}_0.0÷{s}_1.0 ({cl})",
Metrics.tnr: lambda s, cl: f"TNR_{s}_0.0÷{s}_1.0 ({cl})",
Metrics.clust_acc: lambda s, cl: f"Clust/Context Accuracy",
Metrics.clust_ari: lambda s, cl: f"Clust/Context ARI",
Metrics.clust_nmi: lambda s, cl: f"Clust/Context NMI",
}
METRICS_RENAMES: Final = {
Metrics.clust_acc: "Cluster. Acc. $\\uparrow$",
Metrics.acc: "Acc. $\\uparrow$",
Metrics.ar: "AR ratio $\\rightarrow 1.0 \\leftarrow$",
Metrics.tpr: "TPR ratio $\\rightarrow 1.0 \\leftarrow$",
Metrics.tnr: "TNR ratio $\\rightarrow 1.0 \\leftarrow$",
}
DEFAULT_METRICS: Final = [
# Metrics.clust_acc.value,
Metrics.acc.value,
Metrics.ar.value,
Metrics.tpr.value,
Metrics.tnr.value,
]
def main(
csv_file: Path,
metrics: list[Metrics] = typer.Option(DEFAULT_METRICS, "--metrics", "-m"),
sens_attr: str = typer.Option("colour", "--sens-attr", "-s"),
classifiers: list[str] = typer.Option(["pytorch_classifier"], "--classifiers", "-c"),
groupby: str = typer.Option("misc.log_method", "--groupby", "-g"),
aggregation: Aggregation = typer.Option(Aggregation.mean.value, "--aggregation", "-a"),
round_to: int = typer.Option(2, "--round-to", "-r"),
):
print("---------------------------------------")
print("Settings:")
print(f" aggregation (-a): {aggregation.value}")
print(f" classifiers (-c): {list(classifiers)}")
print(f' groupby (-g) : "{groupby}"')
print(f" metrics (-m) : [{', '.join(metric.value for metric in metrics)}]")
print(f" round_to (-r) : {round_to}")
print(f' sens_attr (-s) : "{sens_attr}"')
print("---------------------------------------\n")
df = pd.read_csv(csv_file)
rows = []
for classifier in classifiers:
metrics_str = [METRICS_COL_NAMES[metric](sens_attr, classifier) for metric in metrics]
metrics_renames = {
metric_str: METRICS_RENAMES[metric] for metric_str, metric in zip(metrics_str, metrics)
}
# print(f"Using metrics: {metrics_str}")
# print("------------------------------")
row = generate_table(
df=df,
base_cols=[groupby], # first columns in the table
metrics=metrics_str,
aggregation=aggregation,
round_to=round_to,
metrics_renames=metrics_renames,
)
rows.append(row)
tab = pd.concat(rows, axis="index", sort=False, ignore_index=True)
tab = tab.reset_index(drop=True, inplace=False)
print(tab.to_latex(escape=False, index=False))
if __name__ == "__main__":
typer.run(main)