From 4d0d78c2e8567157c810dd3abba4efaac9b779c7 Mon Sep 17 00:00:00 2001 From: Pablo Androetto Date: Thu, 16 May 2024 17:53:47 -0300 Subject: [PATCH 1/5] Added support for gpt-4o --- evals/registry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evals/registry.py b/evals/registry.py index 2d1c0fee1d..50580efec0 100644 --- a/evals/registry.py +++ b/evals/registry.py @@ -84,7 +84,7 @@ def is_chat_model(model_name: str) -> bool: if model_name in {"gpt-4-base"} or model_name.startswith("gpt-3.5-turbo-instruct"): return False - CHAT_MODEL_NAMES = {"gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4", "gpt-4-32k"} + CHAT_MODEL_NAMES = {"gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4", "gpt-4-32k", "gpt-4o"} if model_name in CHAT_MODEL_NAMES: return True From 8f854d355978c0d20260c7cfe9b0de4993e30164 Mon Sep 17 00:00:00 2001 From: Pablo Androetto Date: Fri, 17 May 2024 09:21:05 -0300 Subject: [PATCH 2/5] added model to make_plots and n_ctx fn --- evals/elsuite/track_the_stat/scripts/make_plots.py | 4 ++++ evals/registry.py | 2 ++ 2 files changed, 6 insertions(+) diff --git a/evals/elsuite/track_the_stat/scripts/make_plots.py b/evals/elsuite/track_the_stat/scripts/make_plots.py index b40e4a3586..f6463eaffa 100644 --- a/evals/elsuite/track_the_stat/scripts/make_plots.py +++ b/evals/elsuite/track_the_stat/scripts/make_plots.py @@ -20,6 +20,7 @@ def zero_if_none(input_num): MODELS = [ "gpt-4-0125-preview", "gpt-4-base", + "gpt-4o", "gpt-3.5-turbo-0125", "gemini-pro-1.0", "mixtral-8x7b-instruct", @@ -32,6 +33,7 @@ def zero_if_none(input_num): "gpt-4-0125-preview", "gpt-3.5-turbo-0125", "gpt-4-base", + "gpt-4o", ] STAT_TO_LABEL = { @@ -54,6 +56,8 @@ def get_model(spec): return "gpt-3.5-turbo-0125" elif "gpt-4-base" in spec["completion_fns"][0]: return "gpt-4-base" + elif "gpt-4o" in spec["completion_fns"][0]: + return "gpt-4o" elif "gemini-pro" in spec["completion_fns"][0]: return "gemini-pro-1.0" elif "mixtral-8x7b-instruct" in spec["completion_fns"][0]: diff --git a/evals/registry.py b/evals/registry.py index 50580efec0..ce0bc57b84 100644 --- a/evals/registry.py +++ b/evals/registry.py @@ -42,6 +42,7 @@ def n_ctx_from_model_name(model_name: str) -> Optional[int]: ("gpt-3.5-turbo-", 4096), ("gpt-4-32k-", 32768), ("gpt-4-", 8192), + ("gpt-4o", 128_000), ] MODEL_NAME_TO_N_CTX: dict[str, int] = { "ada": 2048, @@ -65,6 +66,7 @@ def n_ctx_from_model_name(model_name: str) -> Optional[int]: "gpt-4-1106-preview": 128_000, "gpt-4-turbo-preview": 128_000, "gpt-4-0125-preview": 128_000, + "gpt-4o": 128_000 } # first, look for an exact match From bc79f9673188ca6247f8f8e3de19785084a069e4 Mon Sep 17 00:00:00 2001 From: Pablo Androetto Date: Fri, 17 May 2024 12:50:34 +0000 Subject: [PATCH 3/5] added tests --- evals/registry_test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/evals/registry_test.py b/evals/registry_test.py index ef05316220..9033ff9c5f 100644 --- a/evals/registry_test.py +++ b/evals/registry_test.py @@ -6,6 +6,7 @@ def test_n_ctx_from_model_name(): assert n_ctx_from_model_name("gpt-3.5-turbo-0613") == 4096 assert n_ctx_from_model_name("gpt-3.5-turbo-16k") == 16384 assert n_ctx_from_model_name("gpt-3.5-turbo-16k-0613") == 16384 + assert n_ctx_from_model_name("gpt-4o") == 128_000 assert n_ctx_from_model_name("gpt-4") == 8192 assert n_ctx_from_model_name("gpt-4-0613") == 8192 assert n_ctx_from_model_name("gpt-4-32k") == 32768 @@ -23,6 +24,7 @@ def test_is_chat_model(): assert is_chat_model("gpt-3.5-turbo-0613") assert is_chat_model("gpt-3.5-turbo-16k") assert is_chat_model("gpt-3.5-turbo-16k-0613") + assert is_chat_model("gpt-4o") assert is_chat_model("gpt-4") assert is_chat_model("gpt-4-0613") assert is_chat_model("gpt-4-32k") From 875e6cb622fde072db2a73b7fe546e5dbf3f73c3 Mon Sep 17 00:00:00 2001 From: Pablo Androetto Date: Fri, 17 May 2024 17:35:26 -0300 Subject: [PATCH 4/5] add make_plots changes for already_said_that --- evals/elsuite/already_said_that/scripts/make_plots.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/evals/elsuite/already_said_that/scripts/make_plots.py b/evals/elsuite/already_said_that/scripts/make_plots.py index ede36291ec..99bf8568c1 100644 --- a/evals/elsuite/already_said_that/scripts/make_plots.py +++ b/evals/elsuite/already_said_that/scripts/make_plots.py @@ -23,6 +23,7 @@ def zero_if_none(input_num): "cot/gpt-3.5-turbo", "gpt-3.5-turbo", "gpt-4-base", + "gpt-4o", "gemini-pro", "mixtral-8x7b-instruct", "llama-2-70b-chat", @@ -35,6 +36,7 @@ def zero_if_none(input_num): "cot/gpt-3.5-turbo", "gpt-3.5-turbo", "gpt-4-base", + "gpt-4o", ] @@ -154,6 +156,8 @@ def get_model(spec): return "gpt-3.5-turbo" elif "gpt-4-base" in spec["completion_fns"][0]: return "gpt-4-base" + elif "gpt-4o" in spec["completion_fns"][0]: + return "gpt-4o" elif "gemini-pro" in spec["completion_fns"][0]: return "gemini-pro" elif "mixtral-8x7b-instruct" in spec["completion_fns"][0]: From 69682049da823c0c778d575410530430d2262b47 Mon Sep 17 00:00:00 2001 From: Pablo Androetto Date: Thu, 30 May 2024 17:20:26 -0300 Subject: [PATCH 5/5] fix prefix and n context --- evals/registry.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/evals/registry.py b/evals/registry.py index ce0bc57b84..2eaaec9cce 100644 --- a/evals/registry.py +++ b/evals/registry.py @@ -42,7 +42,7 @@ def n_ctx_from_model_name(model_name: str) -> Optional[int]: ("gpt-3.5-turbo-", 4096), ("gpt-4-32k-", 32768), ("gpt-4-", 8192), - ("gpt-4o", 128_000), + ("gpt-4o-", 128_000), ] MODEL_NAME_TO_N_CTX: dict[str, int] = { "ada": 2048, @@ -91,7 +91,7 @@ def is_chat_model(model_name: str) -> bool: if model_name in CHAT_MODEL_NAMES: return True - for model_prefix in {"gpt-3.5-turbo-", "gpt-4-"}: + for model_prefix in {"gpt-3.5-turbo-", "gpt-4-", "gpt-4o-"}: if model_name.startswith(model_prefix): return True