Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update plotting scripts #51

Merged
merged 4 commits into from
Oct 16, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 58 additions & 29 deletions analysis/avg_agreement_final.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd

FONT_SIZES = {"small": 12, "medium": 16, "large": 18}
COLORS = {"green": "#355145", "purple": "#d8a6e5", "orange": "#fe7759"}
Expand All @@ -21,57 +22,85 @@
plt.rcParams.update(PLOT_PARAMS)


# data = {
# "LlaMa 3.1 8B": [0.3533086666014079, 0.052422082615756406],
# "Aya 23 35B": [0.43767196047824003, 0.026040919354464294],
# # "Aya 23 8B": [0.013483014909052663, 0.03363706833599835],
# "Command R": [0.374457668650282, 0.02926089754079793],
# "Command R+": [0.3830841816733316, 0.020185255968455686],
# "Gemma 1.1 7B": [0.5190375637539242, 0.027757722654111305],
# "Gemma 2 9B": [0.5181663123111222, 0.031090119385244894],
# "LlaMa 3 70B": [0.5685224105896568, 0.04853344616275034],
# "LlaMa 3 8B": [0.37936948540837095, 0.032172769265151994],
# "LlaMa 3.1 70B": [0.603536768244583, 0.027191895488989915],
# "Mistal 7B v0.2": [0.4071166722276529, 0.04577594028555328],
# "Mistral 7B v0.3": [0.41195018984687265, 0.056184679972755454],
# "GPT-4 Turbo": [0.6106943361444249, 0.02932446842558468],
# "GPT-4o": [0.5833874065757011, 0.023695391445384514],
# }

data = {
"LlaMa 3.1 8B": [0.3533086666014079, 0.052422082615756406],
"Aya 23 35B": [0.43767196047824003, 0.026040919354464294],
# "Aya 23 8B": [0.013483014909052663, 0.03363706833599835],
"Command R": [0.374457668650282, 0.02926089754079793],
"Command R+": [0.3830841816733316, 0.020185255968455686],
"Gemma 1.1 7B": [0.5190375637539242, 0.027757722654111305],
"Gemma 2 9B": [0.5181663123111222, 0.031090119385244894],
"LlaMa 3 70B": [0.5685224105896568, 0.04853344616275034],
"LlaMa 3 8B": [0.37936948540837095, 0.032172769265151994],
"LlaMa 3.1 70B": [0.603536768244583, 0.027191895488989915],
"Mistal 7B v0.2": [0.4071166722276529, 0.04577594028555328],
"Mistral 7B v0.3": [0.41195018984687265, 0.056184679972755454],
"GPT-4 Turbo": [0.6106943361444249, 0.02932446842558468],
"GPT-4o": [0.5833874065757011, 0.023695391445384514],
"Mistral 7B v0.2": [0.41964902527302483, 0.041728704319417186, "Generative RM"],
"Aya 23 35B": [0.4366594509037704, 0.02590083631166214, "Generative RM"],
"Aya 23 8B": [0.38482902327857127, 0.02093522000984476, "Generative RM"],
"Command R": [0.370172816882575, 0.02977439059146716, "Generative RM"],
"Command R+": [0.38117473236836474, 0.020413901190603385, "Generative RM"],
"Gemma 1.1 7B": [0.5121848983276365, 0.02775593676763153, "Generative RM"],
"Gemma 2 9B": [0.5239388151608217, 0.029070955636084302, "Generative RM"],
"Llama 3 70B": [0.5738032949863474, 0.04813697578838559, "Generative RM"],
"Llama 3 8B": [0.3426278270154337, 0.028673093628218196, "Generative RM"],
"Llama 3.1 70B": [0.6074197074501972, 0.028414614724563008, "Generative RM"],
"Llama 3.1 8B": [0.34965468089191665, 0.056407978898463204, "Generative RM"],
"Mistral 7B v0.3": [0.4166882337797498, 0.05085550655767351, "Generative RM"],
"GPT-4 Turbo": [0.6096953791655624, 0.028784709595173846, "Generative RM"],
"GPT-4o": [0.5833907047087866, 0.023692522150173454, "Generative RM"],
"Tulu 2 DPO 13B": [0.3416546214690787, 0.1304713944811808, "Implicit RM"],
"BTRM Qwen 2 7B": [0.4893276344968342, 0.07031889836622843, "Classifier RM"],
"Eurus RM 7B": [0.3586485854871021, 0.09638527344174744, "Classifier RM"],
"Zephyr 7B Beta": [0.35011426942621166, 0.176041224588175, "Implicit RM"],
"Mistral 7B DPO": [0.1902062108486662, 0.08462799373351747, "Implicit RM"],
"Qwen1.5 4B": [0.38751934608609767, 0.055096683780610285, "Implicit RM"],
"StableLM Zephyr 3B": [0.1708047069636795, 0.06315971482897487, "Implicit RM"],
"Tulu 2.5 13B RM": [0.3038059897554214, 0.1147333149007323, "Classifier RM"],
"URM LLaMa 3.1 8B": [0.3969881479982245, 0.07787037973169045, "Classifier RM"],
}


sorted_data = dict(sorted(data.items(), key=lambda item: item[1][0]))
labels_sorted = list(sorted_data.keys())
means_sorted = [v[0] for v in sorted_data.values()]
std_devs_sorted = [v[1] for v in sorted_data.values()]
model_type = [v[2] for v in sorted_data.values()]

df = pd.DataFrame({"means": means_sorted, "std": std_devs_sorted, "model_type": model_type})

# sns.set(style="whitegrid")
# palette = sns.color_palette("coolwarm", len(labels_sorted))

plt.figure(figsize=(7, 7))
plt.figure(figsize=(12, 7))
x_pos_sorted = np.arange(len(labels_sorted))

ax1 = sns.barplot(
x=x_pos_sorted,
y=means_sorted,
x=df.index,
y="means",
data=df,
errorbar=None,
color=COLORS.get("orange"),
edgecolor=COLORS.get("green"),
hue="model_type",
hue_order=["Classifier RM", "Generative RM", "Implicit RM"],
palette=[COLORS.get("green"), COLORS.get("purple"), COLORS.get("orange")],
# color=COLORS.get("orange"),
# edgecolor=COLORS.get("green"),
)
plt.errorbar(x_pos_sorted, means_sorted, yerr=std_devs_sorted, fmt="none", c="black", capsize=5)

# ax1.spines["top"].set_color("black")
# ax1.spines["right"].set_color("black")
# ax1.spines["left"].set_color("black")
# ax1.spines["bottom"].set_color("black")
# for spine in ax1.spines.values():
# spine.set_linewidth(2) # Make the border thicker
plt.grid(color="gray", axis="y", alpha=0.2)

plt.ylim(0, 0.8)
plt.gca().set_axisbelow(True)
plt.legend(frameon=False)
plt.xlabel("")

plt.xticks(x_pos_sorted, labels_sorted, rotation=45, ha="right")
plt.ylabel("Cohen's Kappa")
plt.title("Average Inner-Model Agreement Across Languages")
plt.title("Average Inner-Model Agreement Across Languages", fontsize=18)

plt.tight_layout()
plt.savefig("plots/innermodel_agreement_green_oracle.pdf", bbox_inches="tight")
plt.savefig("plots/innermodel_agreement_green_oracle_all.pdf", bbox_inches="tight")