From d2871fc0639faf1cc04d8a752aa43c363e3b432c Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Wed, 15 Oct 2025 15:24:18 -0700
Subject: [PATCH 01/42] Update model_metadata.yaml

---
 src/helm/config/model_metadata.yaml | 50 +++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/src/helm/config/model_metadata.yaml b/src/helm/config/model_metadata.yaml
index 9f3fee61c65..c831eb25c05 100644
--- a/src/helm/config/model_metadata.yaml
+++ b/src/helm/config/model_metadata.yaml
@@ -5229,3 +5229,53 @@ models:
     access: limited
     release_date: 2025-01-31
     tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_0.7_logprobs_20
+    model_name: proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_0.7_logprobs_20
+    tokenizer_name: proxy_tuning/qwen3-30b
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
+
+  - name: proxy_tuning/llama-70b-chat_mellama-13b-base_llama-13b-base_1.0_logits_20
+    model_name: proxy_tuning/llama-70b-chat_mellama-13b-base_llama-13b-base_1.0_logits_20
+    tokenizer_name: proxy_tuning/llama-7b-chat
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
+
+  - name: proxy_tuning/qwen3-30b_none_none_0.7_logprobs_20
+    model_name: proxy_tuning/qwen3-30b_none_none_0.7_logprobs_20
+    tokenizer_name: proxy_tuning/qwen3-30b
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
+
+  - name: proxy_tuning/llama-70b-chat_none_none_1.0_logits_20
+    model_name: proxy_tuning/llama-70b-chat_none_none_1.0_logits_20
+    tokenizer_name: proxy_tuning/llama-7b-chat
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
+      
+  - name: proxy_tuning/mellama-70b-chat_none_none_0.7_logprobs_20
+    model_name: proxy_tuning/mellama-70b-chat_none_none_0.7_logprobs_20
+    tokenizer_name: proxy_tuning/llama-7b-chat
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
+
+  - name: proxy_tuning/mellama-13b-chat_none_none_0.7_logprobs_20
+    model_name: proxy_tuning/mellama-13b-chat_none_none_0.7_logprobs_20
+    tokenizer_name: proxy_tuning/llama-7b-chat
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
+      
+  - name: proxy_tuning/mellama-13b-base_none_none_0.7_logprobs_20
+    model_name: proxy_tuning/mellama-13b-base_none_none_0.7_logprobs_20
+    tokenizer_name: proxy_tuning/llama-7b-chat
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
+ 

From 4d05a2743ff87a44e5bc9fe29730c1f4b33f9fa2 Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Wed, 15 Oct 2025 15:25:33 -0700
Subject: [PATCH 02/42] Update model_metadata.yaml

---
 src/helm/config/model_metadata.yaml | 124 +++++++++++++++++++---------
 1 file changed, 83 insertions(+), 41 deletions(-)

diff --git a/src/helm/config/model_metadata.yaml b/src/helm/config/model_metadata.yaml
index c831eb25c05..8cbff69e32d 100644
--- a/src/helm/config/model_metadata.yaml
+++ b/src/helm/config/model_metadata.yaml
@@ -5230,52 +5230,94 @@ models:
     release_date: 2025-01-31
     tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
 
-  - name: proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_0.7_logprobs_20
-    model_name: proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_0.7_logprobs_20
-    tokenizer_name: proxy_tuning/qwen3-30b
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
+# proxy tuning 
 
+  - name: proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_0.7_logprobs_20
+    display_name: proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_0.7_logprobs_20
+    description: Proxy tuned Qwen3-30b with mellama-13b-chat expert and llama-13b-base antiexpert. 
+    creator_organization_name: Sasha Ronaghi
+    access: open
+    num_parameters: 30000000000
+    release_date: 2025-10-15
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+    
   - name: proxy_tuning/llama-70b-chat_mellama-13b-base_llama-13b-base_1.0_logits_20
-    model_name: proxy_tuning/llama-70b-chat_mellama-13b-base_llama-13b-base_1.0_logits_20
-    tokenizer_name: proxy_tuning/llama-7b-chat
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
+    display_name: proxy_tuning/llama-70b-chat_mellama-13b-base_llama-13b-base_1.0_logits_20
+    description: Proxy tuned Llama2-70b-chat with mellama-13b-chat expert and llama-13b-base antiexpert. 
+    creator_organization_name: Sasha Ronaghi
+    access: open
+    num_parameters: 70000000000
+    release_date: 2025-10-15
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
 
-  - name: proxy_tuning/qwen3-30b_none_none_0.7_logprobs_20
-    model_name: proxy_tuning/qwen3-30b_none_none_0.7_logprobs_20
-    tokenizer_name: proxy_tuning/qwen3-30b
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
+# unite
 
+  - name: proxy_tuning/qwen3-30b_mellama-13b-chat_none_0.7_logprobs_20
+    display_name: proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_0.7_logprobs_20
+    description: Unite of Qwen3-30b with mellama-13b-chat expert. 
+    creator_organization_name: Sasha Ronaghi
+    access: open
+    num_parameters: 30000000000
+    release_date: 2025-10-15
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+    
+  - name: proxy_tuning/llama-70b-chat_mellama-13b-base_none_1.0_logits_20
+    display_name: proxy_tuning/llama-70b-chat_mellama-13b-base_llama-13b-base_1.0_logits_20
+    description: Unite of llama-70b-chat with mellama-13b-base. 
+    creator_organization_name: Sasha Ronaghi
+    access: open
+    num_parameters: 70000000000
+    release_date: 2025-10-15
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+# base models 
+
+  - name: proxy_tuning/qwen3-30b_none_none_0.7_logprobs_20
+    display_name: proxy_tuning/qwen3-30b_none_none_0.7_logprobs_20
+    description: Qwen3-30b. 
+    creator_organization_name: Sasha Ronaghi
+    access: open
+    num_parameters: 30000000000
+    release_date: 2025-10-15
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+    
   - name: proxy_tuning/llama-70b-chat_none_none_1.0_logits_20
-    model_name: proxy_tuning/llama-70b-chat_none_none_1.0_logits_20
-    tokenizer_name: proxy_tuning/llama-7b-chat
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
-      
-  - name: proxy_tuning/mellama-70b-chat_none_none_0.7_logprobs_20
-    model_name: proxy_tuning/mellama-70b-chat_none_none_0.7_logprobs_20
-    tokenizer_name: proxy_tuning/llama-7b-chat
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
+    display_name: proxy_tuning/llama-70b-chat_none_none_1.0_logits_20
+    description: llama-70b-chat. 
+    creator_organization_name: Sasha Ronaghi
+    access: open
+    num_parameters: 70000000000
+    release_date: 2025-10-15
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+    
 
+# mellama models 
+
+  - name: proxy_tuning/mellama-70b-chat_none_none_0.7_logprobs_20
+    display_name: proxy_tuning/mellama-70b-chat_none_none_0.7_logprobs_20
+    description: mellama-70b-chat.
+    creator_organization_name: Sasha Ronaghi
+    access: open
+    num_parameters: 70000000000
+    release_date: 2025-10-15
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+    
   - name: proxy_tuning/mellama-13b-chat_none_none_0.7_logprobs_20
-    model_name: proxy_tuning/mellama-13b-chat_none_none_0.7_logprobs_20
-    tokenizer_name: proxy_tuning/llama-7b-chat
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
-      
+    display_name: proxy_tuning/mellama-13b-chat_none_none_0.7_logprobs_20
+    description: mellama-13b-chat. 
+    creator_organization_name: Sasha Ronaghi
+    access: open
+    num_parameters: 13000000000
+    release_date: 2025-10-15
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+    
   - name: proxy_tuning/mellama-13b-base_none_none_0.7_logprobs_20
-    model_name: proxy_tuning/mellama-13b-base_none_none_0.7_logprobs_20
-    tokenizer_name: proxy_tuning/llama-7b-chat
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
- 
+    display_name: proxy_tuning/mellama-13b-base_none_none_0.7_logprobs_20
+    description: mellama-13b-base.
+    creator_organization_name: Sasha Ronaghi
+    access: open
+    num_parameters: 13000000000
+    release_date: 2025-10-15
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+

From c1246b58d2247d0c4310033f289422526341d27e Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Wed, 15 Oct 2025 15:26:22 -0700
Subject: [PATCH 03/42] Update model_deployments.yaml

---
 src/helm/config/model_deployments.yaml | 50 ++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/src/helm/config/model_deployments.yaml b/src/helm/config/model_deployments.yaml
index cdbc0037a95..2f1c7927594 100644
--- a/src/helm/config/model_deployments.yaml
+++ b/src/helm/config/model_deployments.yaml
@@ -5107,3 +5107,53 @@ model_deployments:
         dspy_module: ChainOfThought
         dspy_api_model: openai/o3-mini-2025-01-31
         dspy_api_base: null
+ 
+  - name: proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_0.7_logprobs_20
+    model_name: proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_0.7_logprobs_20
+    tokenizer_name: proxy_tuning/qwen3-30b
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
+
+  - name: proxy_tuning/llama-70b-chat_mellama-13b-base_llama-13b-base_1.0_logits_20
+    model_name: proxy_tuning/llama-70b-chat_mellama-13b-base_llama-13b-base_1.0_logits_20
+    tokenizer_name: proxy_tuning/llama-7b-chat
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
+
+  - name: proxy_tuning/qwen3-30b_none_none_0.7_logprobs_20
+    model_name: proxy_tuning/qwen3-30b_none_none_0.7_logprobs_20
+    tokenizer_name: proxy_tuning/qwen3-30b
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
+
+  - name: proxy_tuning/llama-70b-chat_none_none_1.0_logits_20
+    model_name: proxy_tuning/llama-70b-chat_none_none_1.0_logits_20
+    tokenizer_name: proxy_tuning/llama-7b-chat
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
+      
+  - name: proxy_tuning/mellama-70b-chat_none_none_0.7_logprobs_20
+    model_name: proxy_tuning/mellama-70b-chat_none_none_0.7_logprobs_20
+    tokenizer_name: proxy_tuning/llama-7b-chat
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
+
+  - name: proxy_tuning/mellama-13b-chat_none_none_0.7_logprobs_20
+    model_name: proxy_tuning/mellama-13b-chat_none_none_0.7_logprobs_20
+    tokenizer_name: proxy_tuning/llama-7b-chat
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
+      
+  - name: proxy_tuning/mellama-13b-base_none_none_0.7_logprobs_20
+    model_name: proxy_tuning/mellama-13b-base_none_none_0.7_logprobs_20
+    tokenizer_name: proxy_tuning/llama-7b-chat
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
+ 

From e30447a09a25b3d84ed9ae7184357ad24892b6c5 Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Wed, 15 Oct 2025 15:26:55 -0700
Subject: [PATCH 04/42] Update tokenizer_configs.yaml

---
 src/helm/config/tokenizer_configs.yaml | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/src/helm/config/tokenizer_configs.yaml b/src/helm/config/tokenizer_configs.yaml
index 7232fc6b7d1..4e365633ea1 100644
--- a/src/helm/config/tokenizer_configs.yaml
+++ b/src/helm/config/tokenizer_configs.yaml
@@ -1286,3 +1286,20 @@ tokenizer_configs:
       class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
     end_of_text_token: "</s>"
     prefix_token: "<s>"
+
+  - name: proxy_tuning/llama-7b-chat
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: [PATH TO Llama-2-7b-chat]
+    end_of_text_token: "<s>"
+    prefix_token: "</s>"
+
+  - name: proxy_tuning/qwen3-30b
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: [PATH TO Qwen3-30B-A3B-Instruct-2507]
+    end_of_text_token: "<s>"
+    prefix_token: "</s>"
+

From 2be5a72ea8fa235c8269831e21fedcbc00f5f9c4 Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Wed, 15 Oct 2025 15:27:45 -0700
Subject: [PATCH 05/42] Create proxy_tuning_client.py

---
 src/helm/clients/proxy_tuning_client.py | 961 ++++++++++++++++++++++++
 1 file changed, 961 insertions(+)
 create mode 100644 src/helm/clients/proxy_tuning_client.py

diff --git a/src/helm/clients/proxy_tuning_client.py b/src/helm/clients/proxy_tuning_client.py
new file mode 100644
index 00000000000..e512bf580a7
--- /dev/null
+++ b/src/helm/clients/proxy_tuning_client.py
@@ -0,0 +1,961 @@
+# File: helm/clients/proxy_tuning_client.py
+from helm.clients.client import Client
+from helm.common.cache import CacheConfig
+from helm.tokenizers.tokenizer import Tokenizer
+from helm.common.cache import Cache
+from helm.common.request import Request, RequestResult, GeneratedOutput
+
+from typing import Optional, Dict, Any, List
+import torch, os, json
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch.nn.functional as F
+from transformers.generation.utils import (
+    ModelOutput,
+)
+import tqdm
+from transformers impor (
+    BitsAndBytesConfig, 
+)
+
+from datetime import datetime
+
+MODEL_PATHS = {
+    "llama-70b-chat":  "[MODEL PATH]",
+    "llama-13b-base": "[MODEL PATH]",
+    "llama-7b-chat": "[MODEL PATH]",
+    "mellama-13b-chat": "[MODEL PATH]",
+    "mellama-13b-base": "[MODEL PATH]",
+    "mellama-70b-chat": "[MODEL PATH]",    
+    "qwen3-30b": "[MODEL PATH]", 
+}
+
+RESULTS_DIR = "[results dir]"
+# helpers adapted from unite 
+
+def update_vocab(v1, vu, tokenizer, logits, model_name):
+    for vu_token, v1_token, logit_ele in zip(vu,v1,logits):
+        v1_token_ids = []
+        for item in v1_token.values():
+            v1_token_ids.append(item[1])
+        for token in vu_token:  
+            if token not in v1_token.keys():
+                if 'llama' in model_name.lower():
+                    token = token.replace('Ġ','▁')        
+                if token != '':
+                    subtoken_id = tokenizer.convert_tokens_to_ids(token)
+                    if subtoken_id != 0 and subtoken_id != None: #Mistral and Llama2 oov id 0
+                        logit = logit_ele[subtoken_id]
+                    else:
+                        subtokens = tokenizer.tokenize(token)
+                        for token_id in tokenizer.convert_tokens_to_ids(subtokens):
+                            #if 'llama2' in model_name:
+                            if 'llama' in model_name.lower():
+                                if token_id != 29871:
+                                    subtoken_id = token_id
+                                    break
+                            else:
+                                subtoken_id = token_id
+                                break
+                        logit = logit_ele[subtoken_id]
+                else:
+                    if 'qwen' in model_name.lower():
+                        logit = logit_ele[220]
+                        subtoken_id = 220
+                    if 'llama' in model_name.lower():
+                        logit = logit_ele[29871]
+                        subtoken_id = 29871
+
+                if 'llama' in model_name.lower():
+                    v1_token[token.replace('▁', 'Ġ')] = [logit, subtoken_id]
+                else:
+                    if subtoken_id not in v1_token_ids:
+                        v1_token[token] = [logit, subtoken_id]
+                        v1_token_ids.append(subtoken_id)
+                    else:
+                        v1_token[token] = [0, subtoken_id]
+    
+    v1_new = v1
+    return v1_new
+
+def vocab_softmax(v1):
+        v1_new = []
+        for element in v1:
+            ele = {}
+            ele_values = list(element.values())
+            ele_values0, ele_values1 = [], []
+            for item in ele_values:
+                ele_values0.append(item[0])
+                ele_values1.append(item[1])
+            ele_values0 = torch.softmax(torch.tensor(ele_values0), dim=0)
+            for token, prob, ids in zip(element.keys(),ele_values0,ele_values1):
+                ele[token] = [prob, ids]
+            v1_new.append(ele)
+
+        return v1_new
+    
+    
+def get_union_vocab(v1, v2):
+    # Extract unique tokens from both dictionaries
+        unique_tokens = []
+        for v1_tokens, v2_tokens in zip(v1,v2):
+            unique_tokens.append(list(set(v1_tokens.keys()) | set(v2_tokens.keys())))
+
+        return unique_tokens
+    
+def average_and_sample(v1, v2, lamda, tokenizer):
+    next_token, v_avg, next_token_id1, next_token_id2 = [], [], [], []
+    for element_v1, element_v2 in zip(v1, v2):
+        assert len(element_v1) == len(element_v2)
+        v_new = {}
+        for token1 in element_v1:
+            v_new[token1] = [lamda * element_v1[token1][0] + (1 - lamda) * element_v2[token1][0],
+                             element_v1[token1][1]]
+        v_avg.append(v_new)
+        probs = []
+        for item in v_new.values():
+            probs.append(item[0])
+        sample_index = probs.index(max(probs))
+        i = 0
+        for item1 in v_new.keys():
+            if i == sample_index:
+                next_token.append(tokenizer.convert_ids_to_tokens(element_v1[item1][1]))
+                next_token_id1.append(element_v1[item1][1])
+                next_token_id2.append(element_v2[item1][1])
+            i+=1
+    return next_token, v_avg, next_token_id1, next_token_id2
+    
+
+def get_top_k_tokens(self, logits, tokenizer, k=10):
+    probs = logits
+
+    top_k_indices = torch.topk(probs, k).indices
+    probs = probs.tolist()
+    top_k_probs = []
+    for idx, prob in zip(top_k_indices,probs):
+        prob_item = []
+        for i in idx:
+            prob_item.append(prob[i])
+        top_k_probs.append(prob_item)
+
+    top_k_tokens = []
+    for indices in top_k_indices:
+        token_item = []
+        for idx in indices:
+            token_item.append(tokenizer.convert_ids_to_tokens(idx.item(), skip_special_tokens=True))
+        top_k_tokens.append(token_item)
+
+    v1 = []
+    for token, prob, id in zip(top_k_tokens, top_k_probs, top_k_indices):
+        v1.append(
+            {token.replace('▁','Ġ').replace('<0x0A>','/n').replace('Ċ','/n'): [prob, int(id)] for token, prob, id in zip(token, prob, id)})
+
+    return v1
+
+#proxy tuning approach
+def logits_add(v1, v2, v3, tokenizer, alpha, device=None):
+    next_token, next_token_id1, next_token_id2, next_token_id3 = [], [], [], []
+    comb_ids_per_batch, comb_scores_per_batch = [], []
+
+    for element_v1, element_v2, element_v3 in zip(v1, v2, v3):
+
+        v_new = {}
+
+        for token1 in element_v1:
+            v_new[token1] = [
+                element_v1[token1][0] +
+                (alpha * (element_v2[token1][0] - element_v3[token1][0])),
+                element_v1[token1][1]
+            ]
+
+        probs = [item[0] for item in v_new.values()]
+
+
+        sample_index = probs.index(max(probs))
+
+        i = 0
+        for item1 in v_new.keys():
+            if i == sample_index:
+                next_token.append(tokenizer.convert_ids_to_tokens(element_v1[item1][1]))
+                next_token_id1.append(element_v1[item1][1])
+                next_token_id2.append(element_v2[item1][1])
+                next_token_id3.append(element_v3[item1][1])
+            i += 1
+        ids    = torch.tensor([v_new[t][1] for t in v_new], dtype=torch.long, device=device)
+        scores = torch.tensor([v_new[t][0] for t in v_new], dtype=torch.float32, device=device)
+        comb_ids_per_batch.append(ids)
+        comb_scores_per_batch.append(scores)
+    return next_token, next_token_id1, next_token_id2, next_token_id3, comb_ids_per_batch, comb_scores_per_batch
+
+
+
+class DExpertsLlama:
+    def __init__(
+        self,
+        base_name: str,
+        expert_name: str,
+        antiexpert_name: str,
+        tokenizer_base, tokenizer_expert, tokenizer_anti,
+        system_prompt: str = None,
+        alpha: float = 1.0,
+        unite: bool = False, 
+        model_kwargs: Dict[str, Any] = None
+    ):
+        
+        self.antiexpert = None  # ensure it exists
+        self.tok_anti = None
+        
+        self.base = AutoModelForCausalLM.from_pretrained(
+            base_name, **model_kwargs
+        )
+        self.expert = AutoModelForCausalLM.from_pretrained(
+            expert_name, **model_kwargs
+        )
+        self.base.eval()
+        self.expert.eval()
+       
+        self.tok_base  = tokenizer_base
+        self.tok_exp   = tokenizer_expert
+        
+        if not unite:
+            self.antiexpert = AutoModelForCausalLM.from_pretrained(
+                antiexpert_name, **model_kwargs
+            )
+            self.antiexpert.eval()
+            self.tok_anti  = tokenizer_anti
+
+        self.alpha = alpha
+        self.device = self.base.device
+        self.system_prompt = system_prompt
+               
+
+    def forward(
+        self,
+        base_inputs,
+        expert_inputs,
+        antiexpert_inputs=None,
+        return_dict=None
+    ):
+        base_outputs = self.base(**base_inputs, return_dict=return_dict)
+        expert_outputs = self.expert(**expert_inputs, return_dict=return_dict)
+        if antiexpert_inputs is not None:
+            antiexpert_outputs = self.antiexpert(**antiexpert_inputs, return_dict=return_dict)
+            return base_outputs, expert_outputs, antiexpert_outputs
+
+        return base_outputs, expert_outputs
+    
+
+    def _get_chat_template_tokenized_chat_inputs(self, tokenizer, prompts):
+        """
+        Use tokenizer.apply_chat_template for models like Qwen-Instruct/Yi/Mistral-Instruct.
+        Returns: input_ids (tensor on self.device)
+        """
+        def _msgs(p):
+            if self.system_prompt:
+                return [{"role": "system", "content": self.system_prompt},
+                        {"role": "user", "content": p}]
+            return [{"role": "user", "content": p}]
+
+        rendered = [
+            tokenizer.apply_chat_template(_msgs(p), tokenize=False, add_generation_prompt=True)
+            for p in prompts
+        ]
+        chat_inputs = tokenizer(rendered, padding="longest", return_tensors="pt", add_special_tokens=True)
+        return chat_inputs.input_ids.to(self.device)
+
+    def _encode_plain_inputs(self, tokenizer, prompts):
+        """
+        Plain (non-chat) encoding with the given tokenizer.
+        Returns: input_ids (tensor on self.device)
+        """
+        enc = tokenizer(prompts, padding="longest", return_tensors="pt", add_special_tokens=True)
+        return enc.input_ids.to(self.device)
+    
+    def _update_model_kwargs_for_generation(
+        self,
+        outputs: ModelOutput,
+        kwargs: Dict[str, Any],
+    ) -> Dict[str, Any]:
+        # update past_key_values
+        kwargs["past_key_values"] = outputs.past_key_values
+
+        # update attention mask
+        if "attention_mask" in kwargs:
+            attention_mask = kwargs["attention_mask"]
+            kwargs["attention_mask"] = torch.cat(
+                [attention_mask, attention_mask.new_ones((attention_mask.shape[0], 1))], dim=-1
+            )
+        if getattr(outputs, "cache_position", None) is not None:
+        # some models already return it
+            kwargs["cache_position"] = outputs.cache_position
+        else:
+            if "cache_position" in kwargs:
+                kwargs["cache_position"] = kwargs["cache_position"] + 1
+            else:
+                # first step: position is sequence-length-1
+                seq_len = kwargs["attention_mask"].shape[1]
+                kwargs["cache_position"] = torch.arange(seq_len - 1, seq_len, device=kwargs["attention_mask"].device)
+
+        return kwargs
+    
+    def generate(
+        self,
+        input_ids: Optional[torch.Tensor] = None,
+        max_new_tokens: Optional[int] = 100,
+        do_sample: bool = False,
+        alpha: float = 1.0,
+        return_logits_for_analysis: bool = False,
+        score_type=None,
+        k=20,
+        unite: bool = False,
+        **kwargs
+    ):
+        base_kwargs = kwargs.copy()
+
+        # Decode to strings once using base tokenizer
+        prompts = self.tok_base.batch_decode(input_ids, skip_special_tokens=True)
+
+        if hasattr(self.tok_base, "apply_chat_template") and getattr(self.tok_base, "chat_template", None):
+            base_input_ids = self._get_chat_template_tokenized_chat_inputs(self.tok_base, prompts)
+        else:
+            base_input_ids = self._encode_plain_inputs(self.tok_base, prompts)
+
+        base_kwargs["attention_mask"] = torch.ones_like(base_input_ids, dtype=torch.long, device=base_input_ids.device)
+
+     
+        expert_kwargs = kwargs.copy()
+        expert_input_ids     = input_ids
+        
+        if hasattr(self.tok_exp, "apply_chat_template") and getattr(self.tok_exp, "chat_template", None):
+            expert_input_ids = self._get_chat_template_tokenized_chat_inputs(self.tok_exp, prompts)
+        else:
+            expert_input_ids = self._encode_plain_inputs(self.tok_exp, prompts)
+        
+        expert_kwargs['attention_mask']     = torch.ones_like(expert_input_ids,     dtype=torch.long, device=expert_input_ids.device)
+        
+
+        if not unite:  
+            antiexpert_kwargs = kwargs.copy()
+            antiexpert_input_ids = input_ids
+
+            if hasattr(self.tok_anti, "apply_chat_template") and getattr(self.tok_anti, "chat_template", None):
+                antiexpert_input_ids = self._get_chat_template_tokenized_chat_inputs(self.tok_anti, prompts)
+            else:
+                antiexpert_input_ids = self._encode_plain_inputs(self.tok_anti, prompts)
+            antiexpert_kwargs['attention_mask'] = torch.ones_like(antiexpert_input_ids, dtype=torch.long, device=antiexpert_input_ids.device)
+        
+       
+        # keep track of which sequences are already finished
+        unfinished_sequences = torch.ones(input_ids.shape[0], dtype=torch.long, device=input_ids.device)
+        eos_token_id_tensor = torch.tensor([self.tok_base.eos_token_id]).to(input_ids.device)
+        
+        T = max_new_tokens
+        if (not unite) and return_logits_for_analysis:
+            device = input_ids.device
+            # 1 x T buffers on GPU
+            p_dexperts = torch.empty(T, device=device, dtype=torch.bfloat16)
+            p_base     = torch.empty(T, device=device, dtype=torch.bfloat16)
+            p_expert   = torch.empty(T, device=device, dtype=torch.bfloat16)
+            p_anti     = torch.empty(T, device=device, dtype=torch.bfloat16)
+
+            preds_dexperts = torch.empty(T, device=device, dtype=torch.int32)
+            preds_base     = torch.empty(T, device=device, dtype=torch.int32)
+            preds_expert   = torch.empty(T, device=device, dtype=torch.int32)
+            preds_anti     = torch.empty(T, device=device, dtype=torch.int32)
+
+            token_ids_out  = torch.empty(T, device=device, dtype=torch.int32)
+            t_write = 0
+
+        for step in range(max_new_tokens):
+            
+            base_inputs = self.base.prepare_inputs_for_generation(base_input_ids, **base_kwargs)
+            expert_inputs = self.expert.prepare_inputs_for_generation(expert_input_ids, **expert_kwargs)
+            
+            
+            if unite:
+                base_outputs, expert_outputs = self.forward(
+                    base_inputs, expert_inputs, return_dict=True
+                )
+
+                base_next_token_logits = base_outputs.logits[..., -1, :]
+                expert_next_token_logits = expert_outputs.logits[..., -1, :]
+                v_base = self.get_top_k_tokens(base_next_token_logits, self.tok_base, k=k)
+                v_exp = self.get_top_k_tokens(expert_next_token_logits, self.tok_exp, k=k)
+
+                vu = get_union_vocab(v_base, v_exp)
+
+                v_base = update_vocab(v_base, vu, self.tok_base, base_next_token_logits,'qwen')
+                v_base = vocab_softmax(v_base)
+                v_exp = update_vocab(v_exp, vu, self.tok_exp, expert_next_token_logits,'llama')
+                v_exp = vocab_softmax(v_exp)
+
+                next_token, v_avg, next_token_id1, next_token_id2 = average_and_sample(v_base,v_exp,0.5, self.tok_base)
+            
+            else:
+                antiexpert_inputs = self.antiexpert.prepare_inputs_for_generation(antiexpert_input_ids, **antiexpert_kwargs)
+                base_outputs, expert_outputs, antiexpert_outputs = self.forward(
+                    base_inputs, expert_inputs, antiexpert_inputs, return_dict=True
+                )
+
+                base_next_token_logits = base_outputs.logits[..., -1, :]
+                expert_next_token_logits = expert_outputs.logits[..., -1, :]
+                antiexpert_next_token_logits = antiexpert_outputs.logits[..., -1, :]
+ 
+                if score_type == "logprobs":
+                    base_next_token_logits  = F.log_softmax(base_outputs.logits[..., -1, :],  dim=-1)
+                    expert_next_token_logits = F.log_softmax(expert_outputs.logits[..., -1, :], dim=-1)
+                    antiexpert_next_token_logits = F.log_softmax(antiexpert_outputs.logits[..., -1, :], dim=-1)
+
+                v_base = self.get_top_k_tokens(base_next_token_logits, self.tok_base, k=k)
+                v_exp = self.get_top_k_tokens(expert_next_token_logits, self.tok_exp, k=0)
+                v_exp = update_vocab(v_exp, v_base, self.tok_exp, expert_next_token_logits,'llama')
+                v_anti = self.get_top_k_tokens(antiexpert_next_token_logits, self.tok_anti, k=0)
+                v_anti = update_vocab(v_anti, v_base, self.tok_anti, antiexpert_next_token_logits, 'llama')
+
+                next_token, next_token_id1, next_token_id2, next_token_id3, comb_ids, comb_scores = logit_add(v_base, v_exp, v_anti, self.tok_base, alpha, device=input_ids.device)
+                
+            next_tokens = torch.as_tensor(next_token_id1, device=input_ids.device, dtype=torch.long)
+               
+            input_ids      = torch.cat([input_ids,      next_tokens[:, None]], dim=-1)
+            base_input_ids = torch.cat([base_input_ids, next_tokens[:, None]], dim=-1)
+
+            exp_step_ids  = torch.as_tensor(next_token_id2,  device=expert_input_ids.device,   dtype=torch.long)
+            expert_input_ids     = torch.cat([expert_input_ids,     exp_step_ids[:,  None]], dim=-1)
+            
+            base_kwargs = self._update_model_kwargs_for_generation(base_outputs, base_kwargs)
+            expert_kwargs = self._update_model_kwargs_for_generation(expert_outputs, expert_kwargs)
+            
+            if not unite:
+                anti_step_ids = torch.as_tensor(next_token_id3, device=antiexpert_input_ids.device, dtype=torch.long)
+                antiexpert_input_ids = torch.cat([antiexpert_input_ids, anti_step_ids[:, None]], dim=-1)
+                antiexpert_kwargs= self._update_model_kwargs_for_generation(antiexpert_outputs,antiexpert_kwargs)
+
+            # if eos_token was found in one sentence, set sentence to finished
+            unfinished_sequences = unfinished_sequences.mul(
+                next_tokens.tile(eos_token_id_tensor.shape[0], 1).ne(eos_token_id_tensor.unsqueeze(1)).prod(dim=0)
+            )
+
+            # stop when each sentence is finished
+            if unfinished_sequences.max() == 0:
+                break
+                
+        if (not unite) and return_logits_for_analysis:
+            sl = slice(0, t_write)
+            results = [{
+                'token_ids':        token_ids_out[sl],     # [T’] int32 (GPU)
+                'p_dexperts':       p_dexperts[sl],        # [T’] fp16  (GPU)
+                'preds_dexperts':   preds_dexperts[sl],    # [T’] int32 (GPU)
+                'p_base':           p_base[sl],
+                'preds_base':       preds_base[sl],
+                'p_expert':         p_expert[sl],
+                'preds_expert':     preds_expert[sl],
+                'p_antiexpert':     p_anti[sl],
+                'preds_antiexpert': preds_anti[sl],
+                # (optional) decode later if you want strings
+            }]
+            return input_ids, results
+        return input_ids
+
+
+def ensure_dir(d):
+    if not os.path.exists(d):
+        os.makedirs(d, exist_ok=True)
+
+
+@torch.inference_mode()
+def generate_completions(
+    model,
+    tokenizer,
+    prompts,
+    batch_size=1,
+    add_special_tokens=True,
+    disable_tqdm=False,
+    return_logits_for_analysis=False,
+    score_type=None,
+    alpha=1.0,
+    k=20,
+    unite=False,
+    **generation_kwargs, 
+    
+):
+    generations = []
+    outputs = []
+    if not disable_tqdm:
+        progress = tqdm.tqdm(total=len(prompts), desc="Generating Completions")
+
+    num_return_sequences = generation_kwargs.get("num_return_sequences", 1)
+     
+    all_results = []
+    for i in range(0, len(prompts), batch_size):
+        batch_prompts = prompts[i:i+batch_size]
+        tokenized_prompts = tokenizer(
+            batch_prompts, padding="longest", return_tensors="pt", add_special_tokens=add_special_tokens
+        )
+        
+        # print ("tokenized_prompt: ", tokenized_prompts)
+        if hasattr(model, "device"):                 # DExpertsLlama
+            device = model.device
+            # print ("device = model.device")
+        else:                                        # vanilla HF model
+            device = next(model.parameters()).device
+            # print ("next(model.parameters()).devicedevice = next(model.parameters()).device")
+        batch_input_ids = tokenized_prompts['input_ids'].to(device)
+        attention_mask = tokenized_prompts['attention_mask'].to(device)
+        
+        batch_outputs = model.generate(
+                input_ids=batch_input_ids,
+                attention_mask=attention_mask,
+                alpha=alpha,
+                score_type=score_type,
+                k=k,
+                unite=unite,
+                **generation_kwargs
+        )
+        results = []
+        
+        # to support the logits processing below when using DExperts with mixed tokenizers
+        if isinstance(batch_input_ids, dict):
+            batch_input_ids = batch_input_ids['llama']
+
+        batch_outputs = tokenizer.batch_decode(batch_outputs, skip_special_tokens=True)
+        # print("batch_outputs: ", batch_outputs)
+        batch_prompts = tokenizer.batch_decode(batch_input_ids, skip_special_tokens=True)
+
+        # duplicate the prompts to match the number of return sequences
+        batch_prompts = [prompt for prompt in batch_prompts for _ in range(num_return_sequences)]
+        batch_generations = [
+            output[len(prompt):] for prompt, output in zip(batch_prompts, batch_outputs)
+        ]
+
+        generations += batch_generations
+
+        if not disable_tqdm:
+            progress.update(len(batch_prompts)//num_return_sequences)
+    # return generations, logits_for_analysis
+    return generations, all_results
+
+
+def add_pad_token(tokenizer, padding_side="left"):
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token
+        tokenizer.pad_token_id = tokenizer.eos_token_id
+    tokenizer.padding_side = padding_side
+    return tokenizer
+
+def load_dexperts_model_and_tokenizer(
+    base_name: str,
+    expert_name: str,
+    antiexpert_name: str,
+    device_map: str = "auto",
+    alpha: float = 1.0,
+    load_in_8bit: bool = False,
+    load_in_4bit: bool = False,
+    system_prompt: Optional[str] = None,
+    use_fast_tokenizer: bool = True,
+    padding_side: str = "left",
+    proxy_tune: bool = False,
+    unite: bool = False,
+):
+    
+    bnb_cfg = None
+
+    if load_in_8bit:
+        bnb_cfg = BitsAndBytesConfig(load_in_8bit=True, llm_int8_enable_fp32_cpu_offload=True)
+    
+    if load_in_4bit:
+        bnb_cfg = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_use_double_quant=True,
+            bnb_4bit_quant_type="nf4",         # {nf4, fp4}; nf4 is standard
+            bnb_4bit_compute_dtype=torch.bfloat16,  
+        )
+
+    model_kwargs = {
+        'device_map': device_map,
+        'torch_dtype': torch.bfloat16,
+        'quantization_config': bnb_cfg,
+        'low_cpu_mem_usage': True,
+        'trust_remote_code': True,
+    }
+    
+    
+    if "llama" in base_name and "chat" in base_name:
+        tok_base = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-7b-chat"], use_fast=use_fast_tokenizer)
+    else:
+        tok_base = AutoTokenizer.from_pretrained(MODEL_PATHS[base_name], use_fast=use_fast_tokenizer)
+        
+    if "llama" in expert_name and "chat" in expert_name:
+        tok_exp = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-7b-chat"], use_fast=use_fast_tokenizer)
+    elif "llama" in expert_name and "chat" not in expert_name:
+        tok_exp = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-13b-base"], use_fast=use_fast_tokenizer)
+    else:
+        tok_exp = AutoTokenizer.from_pretrained(MODEL_PATHS[expert_name], use_fast=use_fast_tokenizer)
+        
+    tok_base = add_pad_token(tok_base, padding_side)
+    tok_exp  = add_pad_token(tok_exp,  padding_side)
+    
+    if proxy_tune:
+        if "llama" in antiexpert_name and "chat" in antiexpert_name:
+            tok_anti = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-7b-chat"], use_fast=use_fast_tokenizer)
+        else:
+            tok_anti = AutoTokenizer.from_pretrained(MODEL_PATHS[antiexpert_name], use_fast=use_fast_tokenizer)
+
+        tok_anti = add_pad_token(tok_anti, padding_side)
+    
+    
+        model = DExpertsLlama(
+                base_name=MODEL_PATHS[base_name],
+                expert_name=MODEL_PATHS[expert_name],
+                antiexpert_name=MODEL_PATHS[antiexpert_name],
+                tokenizer_base=tok_base,
+                tokenizer_expert=tok_exp,
+                tokenizer_anti=tok_anti,
+                system_prompt=system_prompt,
+                alpha=alpha,
+                model_kwargs=model_kwargs,
+        )
+        print(f"[Loader] Base   : {MODEL_PATHS[base_name]}")
+        print(f"[Loader] Expert : {MODEL_PATHS[expert_name]}")
+        print(f"[Loader] Anti   : {MODEL_PATHS[antiexpert_name]}")
+        
+    elif unite: 
+        model = DExpertsLlama(
+                base_name=MODEL_PATHS[base_name],
+                expert_name=MODEL_PATHS[expert_name],
+                antiexpert_name="none",
+                tokenizer_base=tok_base,
+                tokenizer_expert=tok_exp,
+                tokenizer_anti="none",
+                system_prompt=system_prompt,
+                alpha=alpha,
+                unite=True,
+                model_kwargs=model_kwargs,
+        )
+        print(f"[Loader] Base   : {MODEL_PATHS[base_name]}")
+        print(f"[Loader] Expert : {MODEL_PATHS[expert_name]}")
+    
+    return model, tok_base
+
+
+def _safe_tag(model_name: str) -> str:
+    # e.g. "proxy_tuning/llama70b_mellama13bchat" -> "proxy_tuning_llama70b_mellama13bchat"
+    return model_name.replace("/", "_").replace(" ", "").replace(".", "").replace("-", "")
+
+def setup_run_dirs(model_name: str, root=LOCAL_RESULTS_DIR):
+    """
+    Creates:
+      <root>/<TAG>_<YYYYMMDD_HHMMSS>/
+          ├─ <TAG>_<YYYYMMDD_HHMMSS>.csv
+          └─ logits_analysis/
+    Returns: (run_dir, csv_path, logits_dir)
+    """
+    ensure_dir(root)
+    tag = _safe_tag(model_name)
+    stamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    run_dir = os.path.join(root, f"{tag}_{stamp}")
+    ensure_dir(run_dir)
+
+    csv_name = f"{tag}_{stamp}.csv"
+    csv_path = os.path.join(run_dir, csv_name)
+    with open(csv_path, "w") as f:
+        f.write("timestamp,request_id,model_name,prompt,output,logits_path\n")
+
+    logits_dir = os.path.join(run_dir, "logits_analysis")
+    ensure_dir(logits_dir)
+
+    print(f"[TokenLog] created run dir: {run_dir}")
+    print(f"[TokenLog] csv: {csv_path}")
+    print(f"[TokenLog] logits dir: {logits_dir}")
+    return run_dir, csv_path, logits_dir
+
+
+def append_request_row(csv_path: str, request_id: str, model_name: str, prompt: str, output: str, logits_path: str | None):
+    ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    def esc(s: str) -> str:
+        if s is None: return ""
+        return s.replace("\n", "\\n").replace(",", "&#44;")
+    with open(csv_path, "a") as f:
+        f.write(f"{ts},{request_id},{esc(model_name)},{esc(prompt)},{esc(output)},{esc(logits_path or '')}\n")
+
+        
+def load_base_model_and_tokenizer(
+    base_name: str,
+    device_map: str = "auto",
+    load_in_8bit: bool = False,
+    load_in_4bit: bool = False,
+    system_prompt: Optional[str] = None,
+    use_fast_tokenizer: bool = True,
+    padding_side: str = "left",
+):
+    bnb_cfg = None
+
+    if load_in_8bit:
+        bnb_cfg = BitsAndBytesConfig(load_in_8bit=True, llm_int8_enable_fp32_cpu_offload=True)
+    
+    if load_in_4bit:
+        bnb_cfg = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_use_double_quant=True,
+            bnb_4bit_quant_type="nf4",         # {nf4, fp4}; nf4 is standard
+            bnb_4bit_compute_dtype=torch.bfloat16,  
+        )
+
+    model_kwargs = {
+        'device_map': device_map,
+        'torch_dtype': torch.bfloat16,
+        'quantization_config': bnb_cfg,
+        'low_cpu_mem_usage': True,
+        'trust_remote_code': True,
+    }
+    
+    tok = AutoTokenizer.from_pretrained(MODEL_PATHS[base_name], use_fast=use_fast_tokenizer, trust_remote_code=True,)
+    tok = add_pad_token(tok, padding_side)
+
+    model = AutoModelForCausalLM.from_pretrained(
+        MODEL_PATHS[base_name],
+        device_map=device_map,
+        torch_dtype=torch.bfloat16,
+        quantization_config=bnb_cfg,
+        trust_remote_code=True,
+    )
+    model.eval()
+    return model, tok
+
+def get_chat_template_tokenized_chat_inputs(tokenizer, prompts):
+    """
+    Use tokenizer.apply_chat_template for models like Qwen/Yi/Mistral/Gemma-*.
+    Returns a BatchEncoding dict with 'input_ids' and 'attention_mask'.
+    """
+    def _msgs(p):
+        return [{"role": "user", "content": p}]
+
+    # Render to string first, then tokenize → BatchEncoding (dict-like)
+    rendered = [
+        tokenizer.apply_chat_template(
+            _msgs(p),
+            tokenize=False,                # <-- important
+            add_generation_prompt=True
+        )
+        for p in prompts
+    ]
+    enc = tokenizer(
+        rendered,
+        padding=True,
+        return_tensors="pt",
+        add_special_tokens=True
+    )
+    return enc
+
+def base_generate_completions(
+    model,
+    tokenizer,
+    prompts,
+    max_new_tokens=600,
+    do_sample=False,
+):
+    import torch
+    model.eval()
+    
+    # if chat template 
+    if hasattr(tokenizer, "apply_chat_template") and getattr(tokenizer, "chat_template", None):
+        enc = get_chat_template_tokenized_chat_inputs(tokenizer, prompts)
+    else: 
+        enc = tokenizer(
+            prompts,
+            return_tensors="pt",
+            padding=True,
+            truncation=True,
+        )
+    # Ensure pad token is set
+    if tokenizer.pad_token_id is None and tokenizer.eos_token_id is not None:
+        tokenizer.pad_token_id = tokenizer.eos_token_id
+
+    # Move to model device
+    enc = {k: v.to(model.device) for k, v in enc.items()}
+
+    with torch.no_grad():
+        gen_ids = model.generate(
+            **enc,
+            max_new_tokens=max_new_tokens,
+            do_sample=do_sample,
+            eos_token_id=tokenizer.eos_token_id,
+            pad_token_id=tokenizer.pad_token_id,
+        )
+
+    # Slice off the prompt portion
+    prompt_len = enc["input_ids"].shape[1]
+    new_tokens = gen_ids[:, prompt_len:]
+
+    decoded = tokenizer.batch_decode(new_tokens, skip_special_tokens=True)
+
+    predicted_labels = decoded          
+    all_results = None                  
+
+    return  predicted_labels, all_results
+
+    
+    
+
+class ProxyTuningClient(Client):
+    """
+    A HELM client that uses ProxyTuning for inference instead of directly calling the model.
+    """
+
+    def __init__(
+        self,
+        tokenizer: Tokenizer,
+        tokenizer_name: str,
+        cache_config: CacheConfig,
+        model_name: str = None,
+        api_base: str = None,
+        api_key: str = None,
+    ):
+        self.cache = Cache(cache_config)
+        """
+        Initializes the ProxyTuningClient.
+
+        Args:
+            tokenizer (Tokenizer): Tokenizer instance (unused but required by HELM interface).
+            tokenizer_name (str): Name of the tokenizer (unused but required by HELM interface).
+            cache_config (CacheConfig): Configuration for caching.
+
+        """
+        self.run_dir, self.token_log_path, self.logits_dir = setup_run_dirs(model_name)
+        self.model_name = model_name
+        self.run_id = datetime.now().strftime("%Y%m%d_%H%M%S")
+        self.req_seq = 0
+        tag = model_name.split("/")[-1]
+        # strip optional "proxy_tuning_" prefix
+        if tag.startswith("proxy_tuning_"):
+            tag = tag[len("proxy_tuning_"):]
+
+        parts = tag.split("_")
+        base_name, expert_name, antiexpert_name, self.alpha, self.score_type, k_str  = (
+            parts[0],
+            parts[1],
+            parts[2],
+            float(parts[3]),
+            parts[4],
+            parts[5]
+        )
+        self.k = int(k_str)
+        
+        self.is_unite = False
+        self.is_proxy = False
+        if expert_name != "none":
+            if antiexpert_name == "none":
+                self.is_unite = True
+            else:
+                self.is_proxy = True
+
+        print("mn:", model_name)
+        print("tag:", tag)
+        print("b: ", base_name)
+        print("Ex:", expert_name)
+        print("ax", antiexpert_name)
+        print(self.alpha)
+        print(self.score_type)
+        print(self.k)
+        print("proxy: ", self.is_proxy)
+        print("unite: ", self.is_unite)
+        
+        if self.is_proxy: 
+            self.model, self.hf_tokenizer = load_dexperts_model_and_tokenizer(
+                    base_name=base_name,
+                    expert_name=expert_name,
+                    antiexpert_name=antiexpert_name,
+                    load_in_8bit=False,
+                    load_in_4bit=True,
+                    use_fast_tokenizer=True,
+                    system_prompt=None,
+                    device_map='auto', 
+                    proxy_tune=self.is_proxy
+            )
+        elif self.is_unite:
+            self.model, self.hf_tokenizer = load_dexperts_model_and_tokenizer(
+                    base_name=base_name,
+                    expert_name=expert_name,
+                    antiexpert_name=antiexpert_name,
+                    load_in_8bit=False,
+                    load_in_4bit=True,
+                    use_fast_tokenizer=True,
+                    system_prompt=None,
+                    device_map='auto', 
+                    proxy_tune=self.is_proxy, 
+                    unite=self.is_unite
+            )
+            
+        else:
+            self.model, self.hf_tokenizer = load_base_model_and_tokenizer(
+                base_name=base_name,
+                load_in_4bit=False,
+                device_map="auto",
+                use_fast_tokenizer=True,
+            )
+    
+    def make_request(self, request: Request) -> RequestResult:
+        """
+        Handles a request by sending the prompt 
+
+        Args:
+            request (Request): The request object containing the prompt.
+
+        Returns:
+            RequestResult: A HELM-compatible response object.
+        """
+        prompt_text = request.prompt
+
+        if request.messages:
+            prompt_text = " ".join(msg["content"] for msg in request.messages if msg.get("role") != "system")
+
+        
+        print("prompt_text: ", prompt_text)
+        prompts = [prompt_text]
+         # turn prompt into a [] 
+        if self.is_proxy or self.is_unite: 
+            predicted_labels, all_results = generate_completions(
+                model=self.model,
+                tokenizer=self.hf_tokenizer,
+                prompts=prompts,
+                max_new_tokens=600,       
+                do_sample=False,        
+                num_return_sequences=1,
+                alpha=self.alpha,
+                k=self.k,
+                score_type=self.score_type,
+                unite=self.is_unite,
+                return_logits_for_analysis=False, 
+            )
+        else: 
+            predicted_labels, all_results = base_generate_completions(
+                model=self.model,
+                tokenizer=self.hf_tokenizer,
+                prompts=prompts,
+                max_new_tokens=600,
+                do_sample=False,
+            )
+
+            
+        output_text = predicted_labels[0]
+        print("output_text: ", output_text)
+        
+        self.req_seq += 1
+        request_id = f"{self.run_id}_r{self.req_seq:04d}"
+
+        logits_path = None
+        if self.is_proxy and all_results:
+            logits_path = os.path.join(self.logits_dir, f"logits_{request_id}.pt")
+            torch.save(all_results, logits_path)
+            print(f"[Logits] wrote {logits_path}")
+
+        append_request_row(
+            csv_path=self.token_log_path,
+            request_id=request_id,
+            model_name=self.model_name,
+            prompt=prompt_text,
+            output=output_text,
+            logits_path=logits_path,
+        )
+        
+        # Return a HELM-compatible RequestResult
+        output = GeneratedOutput(text=output_text, logprob=0.0, tokens=[])
+        return RequestResult(success=True, cached=False, completions=[output], embedding=[])

From c33bf29e50753569354e57061e50e628dc3ba50b Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Tue, 21 Oct 2025 20:48:53 -0700
Subject: [PATCH 06/42] Add files via upload

---
 src/helm/clients/proxy_tuning_client (2).py | 1112 +++++++++++++++++++
 1 file changed, 1112 insertions(+)
 create mode 100644 src/helm/clients/proxy_tuning_client (2).py

diff --git a/src/helm/clients/proxy_tuning_client (2).py b/src/helm/clients/proxy_tuning_client (2).py
new file mode 100644
index 00000000000..b920903cc10
--- /dev/null
+++ b/src/helm/clients/proxy_tuning_client (2).py	
@@ -0,0 +1,1112 @@
+# File: helm/clients/proxy_tuning_client.py
+from helm.clients.client import Client
+from helm.common.cache import CacheConfig
+from helm.tokenizers.tokenizer import Tokenizer
+from helm.common.cache import Cache
+from helm.common.request import Request, RequestResult, GeneratedOutput
+
+from typing import Optional, Dict, Any, List
+import torch, os, json
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch.nn.functional as F
+from transformers.generation.utils import (
+    ModelOutput,
+)
+import tqdm
+from transformers import BitsAndBytesConfig
+
+# from lmformatenforcer.integrations.transformers import build_transformers_prefix_allowed_tokens_fn, build_token_enforcer_tokenizer_data
+# import math
+# from pydantic import BaseModel
+
+from typing import Literal
+
+from datetime import datetime
+
+# MODEL_PATHS = {
+#     "llama-70b-chat":  "[MODEL PATH]",
+#     "llama-13b-base": "[MODEL PATH]",
+#     "llama-7b-chat": "[MODEL PATH]",
+#     "mellama-13b-chat": "[MODEL PATH]",
+#     "mellama-13b-base": "[MODEL PATH]",
+#     "mellama-70b-chat": "[MODEL PATH]",    
+#     "qwen3-30b": "[MODEL PATH]", 
+# }
+
+# LOCAL_RESULTS_DIR = "[results dir]"
+
+MODEL_PATHS = {
+    "llama-70b-chat": "/share/pi/ema2016/models/meta-llama/Llama-2-70b-chat-hf",
+    "llama-13b-base": "/share/pi/ema2016/models/meta-llama/Llama-2-13b-hf",
+    "llama-7b-chat": "/share/pi/ema2016/models/meta-llama/Llama-2-7b-chat-hf",
+    "mellama-13b-chat": "/share/pi/ema2016/models/me-llama/MeLLaMA-13B-chat",
+    "mellama-13b-base": "/share/pi/ema2016/models/me-llama/MeLLaMA-13B", 
+    "mellama-70b-chat": "/share/pi/ema2016/models/me-llama/MeLLaMA-70B-chat",    
+    "qwen3-30b": "/share/pi/ema2016/models/Qwen3-30B-A3B-Instruct-2507", 
+}
+
+LOCAL_RESULTS_DIR = "/share/pi/ema2016/users/sronaghi/proxy_tuning/results/medhelm"
+# helpers adapted from unite 
+
+def update_vocab(v1, vu, tokenizer, logits, model_name):
+    for vu_token, v1_token, logit_ele in zip(vu,v1,logits):
+        v1_token_ids = []
+        for item in v1_token.values():
+            v1_token_ids.append(item[1])
+        for token in vu_token:  
+            if token not in v1_token.keys():
+                if 'llama' in model_name.lower():
+                    token = token.replace('Ġ','▁')        
+                if token != '':
+                    subtoken_id = tokenizer.convert_tokens_to_ids(token)
+                    if subtoken_id != 0 and subtoken_id != None: #Mistral and Llama2 oov id 0
+                        logit = logit_ele[subtoken_id]
+                    else:
+                        subtokens = tokenizer.tokenize(token)
+                        for token_id in tokenizer.convert_tokens_to_ids(subtokens):
+                            #if 'llama2' in model_name:
+                            if 'llama' in model_name.lower():
+                                if token_id != 29871:
+                                    subtoken_id = token_id
+                                    break
+                            else:
+                                subtoken_id = token_id
+                                break
+                        logit = logit_ele[subtoken_id]
+                else:
+                    if 'qwen' in model_name.lower():
+                        logit = logit_ele[220]
+                        subtoken_id = 220
+                    if 'llama' in model_name.lower():
+                        logit = logit_ele[29871]
+                        subtoken_id = 29871
+
+                if 'llama' in model_name.lower():
+                    v1_token[token.replace('▁', 'Ġ')] = [logit, subtoken_id]
+                else:
+                    if subtoken_id not in v1_token_ids:
+                        v1_token[token] = [logit, subtoken_id]
+                        v1_token_ids.append(subtoken_id)
+                    else:
+                        v1_token[token] = [0, subtoken_id] 
+    v1_new = v1
+    return v1_new
+
+def vocab_softmax(v1):
+        v1_new = []
+        for element in v1:
+            ele = {}
+            ele_values = list(element.values())
+            ele_values0, ele_values1 = [], []
+            for item in ele_values:
+                ele_values0.append(item[0])
+                ele_values1.append(item[1])
+            ele_values0 = torch.softmax(torch.tensor(ele_values0), dim=0)
+            for token, prob, ids in zip(element.keys(),ele_values0,ele_values1):
+                ele[token] = [prob, ids]
+            v1_new.append(ele)
+
+        return v1_new
+    
+    
+def get_union_vocab(v1, v2):
+    # Extract unique tokens from both dictionaries
+        unique_tokens = []
+        for v1_tokens, v2_tokens in zip(v1,v2):
+            unique_tokens.append(list(set(v1_tokens.keys()) | set(v2_tokens.keys())))
+
+        return unique_tokens
+    
+def average_and_sample(v1, v2, lamda, tokenizer):
+    next_token, v_avg, next_token_id1, next_token_id2 = [], [], [], []
+    for element_v1, element_v2 in zip(v1, v2):
+        assert len(element_v1) == len(element_v2)
+        v_new = {}
+        for token1 in element_v1:
+            v_new[token1] = [lamda * element_v1[token1][0] + (1 - lamda) * element_v2[token1][0],
+                             element_v1[token1][1]]
+        v_avg.append(v_new)
+        probs = []
+        for item in v_new.values():
+            probs.append(item[0])
+        sample_index = probs.index(max(probs))
+        i = 0
+        for item1 in v_new.keys():
+            if i == sample_index:
+                next_token.append(tokenizer.convert_ids_to_tokens(element_v1[item1][1]))
+                next_token_id1.append(element_v1[item1][1])
+                next_token_id2.append(element_v2[item1][1])
+            i+=1
+    return next_token, v_avg, next_token_id1, next_token_id2
+    
+
+def get_top_k_tokens(logits, tokenizer, k=10):
+    probs = logits
+
+    top_k_indices = torch.topk(probs, k).indices
+    probs = probs.tolist()
+    top_k_probs = []
+    for idx, prob in zip(top_k_indices,probs):
+        prob_item = []
+        for i in idx:
+            prob_item.append(prob[i])
+        top_k_probs.append(prob_item)
+
+    top_k_tokens = []
+    for indices in top_k_indices:
+        token_item = []
+        for idx in indices:
+            token_item.append(tokenizer.convert_ids_to_tokens(idx.item(), skip_special_tokens=True))
+        top_k_tokens.append(token_item)
+
+    v1 = []
+    for token, prob, id in zip(top_k_tokens, top_k_probs, top_k_indices):
+        v1.append(
+            {token.replace('▁','Ġ').replace('<0x0A>','/n').replace('Ċ','/n'): [prob, int(id)] for token, prob, id in zip(token, prob, id)})
+
+    return v1
+
+#proxy tuning approach
+def logits_add(v1, v2, v3, tokenizer, alpha, device=None):
+    next_token, next_token_id1, next_token_id2, next_token_id3 = [], [], [], []
+    comb_ids_per_batch, comb_scores_per_batch = [], []
+
+    for element_v1, element_v2, element_v3 in zip(v1, v2, v3):
+
+        v_new = {}
+
+        for token1 in element_v1:
+            v_new[token1] = [
+                element_v1[token1][0] +
+                (alpha * (element_v2[token1][0] - element_v3[token1][0])),
+                element_v1[token1][1]
+            ]
+
+        probs = [item[0] for item in v_new.values()]
+
+
+        sample_index = probs.index(max(probs))
+
+        i = 0
+        for item1 in v_new.keys():
+            if i == sample_index:
+                next_token.append(tokenizer.convert_ids_to_tokens(element_v1[item1][1]))
+                next_token_id1.append(element_v1[item1][1])
+                next_token_id2.append(element_v2[item1][1])
+                next_token_id3.append(element_v3[item1][1])
+            i += 1
+        ids    = torch.tensor([v_new[t][1] for t in v_new], dtype=torch.long, device=device)
+        scores = torch.tensor([v_new[t][0] for t in v_new], dtype=torch.float32, device=device)
+        comb_ids_per_batch.append(ids)
+        comb_scores_per_batch.append(scores)
+    return next_token, next_token_id1, next_token_id2, next_token_id3, comb_ids_per_batch, comb_scores_per_batch
+
+
+
+class DExpertsLlama:
+    def __init__(
+        self,
+        base_name: str,
+        expert_name: str,
+        antiexpert_name: str,
+        tokenizer_base, tokenizer_expert, tokenizer_anti,
+        system_prompt: str = None,
+        alpha: float = 1.0,
+        unite: bool = False, 
+        model_kwargs: Dict[str, Any] = None
+    ):
+        
+        self.antiexpert = None  # ensure it exists
+        self.tok_anti = None
+        
+        self.base = AutoModelForCausalLM.from_pretrained(
+            base_name, **model_kwargs
+        )
+        self.expert = AutoModelForCausalLM.from_pretrained(
+            expert_name, **model_kwargs
+        )
+        self.base.eval()
+        self.expert.eval()
+       
+        self.tok_base  = tokenizer_base
+        self.tok_exp   = tokenizer_expert
+        
+        if not unite:
+            self.antiexpert = AutoModelForCausalLM.from_pretrained(
+                antiexpert_name, **model_kwargs
+            )
+            self.antiexpert.eval()
+            self.tok_anti  = tokenizer_anti
+
+        self.alpha = alpha
+        self.device = self.base.device
+        self.system_prompt = system_prompt
+               
+
+    def forward(
+        self,
+        base_inputs,
+        expert_inputs,
+        antiexpert_inputs=None,
+        return_dict=None
+    ):
+        base_outputs = self.base(**base_inputs, return_dict=return_dict)
+        expert_outputs = self.expert(**expert_inputs, return_dict=return_dict)
+        if antiexpert_inputs is not None:
+            antiexpert_outputs = self.antiexpert(**antiexpert_inputs, return_dict=return_dict)
+            return base_outputs, expert_outputs, antiexpert_outputs
+
+        return base_outputs, expert_outputs
+    
+
+    def _get_chat_template_tokenized_chat_inputs(self, tokenizer, prompts):
+        """
+        Use tokenizer.apply_chat_template for models like Qwen-Instruct/Yi/Mistral-Instruct.
+        Returns: input_ids (tensor on self.device)
+        """
+        def _msgs(p):
+            if self.system_prompt:
+                return [{"role": "system", "content": self.system_prompt},
+                        {"role": "user", "content": p}]
+            return [{"role": "user", "content": p}]
+
+        rendered = [
+            tokenizer.apply_chat_template(_msgs(p), tokenize=False, add_generation_prompt=True)
+            for p in prompts
+        ]
+        chat_inputs = tokenizer(rendered, padding="longest", return_tensors="pt", add_special_tokens=True)
+        return chat_inputs.input_ids.to(self.device)
+
+    def _encode_plain_inputs(self, tokenizer, prompts):
+        """
+        Plain (non-chat) encoding with the given tokenizer.
+        Returns: input_ids (tensor on self.device)
+        """
+        enc = tokenizer(prompts, padding="longest", return_tensors="pt", add_special_tokens=True)
+        return enc.input_ids.to(self.device)
+    
+    def _update_model_kwargs_for_generation(
+        self,
+        outputs: ModelOutput,
+        kwargs: Dict[str, Any],
+    ) -> Dict[str, Any]:
+        # update past_key_values
+        kwargs["past_key_values"] = outputs.past_key_values
+
+        # update attention mask
+        if "attention_mask" in kwargs:
+            attention_mask = kwargs["attention_mask"]
+            kwargs["attention_mask"] = torch.cat(
+                [attention_mask, attention_mask.new_ones((attention_mask.shape[0], 1))], dim=-1
+            )
+        if getattr(outputs, "cache_position", None) is not None:
+        # some models already return it
+            kwargs["cache_position"] = outputs.cache_position
+        else:
+            if "cache_position" in kwargs:
+                kwargs["cache_position"] = kwargs["cache_position"] + 1
+            else:
+                # first step: position is sequence-length-1
+                seq_len = kwargs["attention_mask"].shape[1]
+                kwargs["cache_position"] = torch.arange(seq_len - 1, seq_len, device=kwargs["attention_mask"].device)
+
+        return kwargs
+    
+    def generate(
+        self,
+        input_ids: Optional[torch.Tensor] = None,
+        max_new_tokens: Optional[int] = 100,
+        do_sample: bool = False,
+        alpha: float = 1.0,
+        return_logits_for_analysis: bool = False,
+        score_type=None,
+        k=20,
+        unite: bool = False,
+        prefix_allowed_tokens_fn=None,
+        prefix_allowed_tokens_fn_exp=None,
+        **kwargs
+    ):
+        base_kwargs = kwargs.copy()
+
+        # Decode to strings once using base tokenizer
+        prompts = self.tok_base.batch_decode(input_ids, skip_special_tokens=True)
+
+        if hasattr(self.tok_base, "apply_chat_template") and getattr(self.tok_base, "chat_template", None):
+            base_input_ids = self._get_chat_template_tokenized_chat_inputs(self.tok_base, prompts)
+        else:
+            base_input_ids = self._encode_plain_inputs(self.tok_base, prompts)
+
+        base_kwargs["attention_mask"] = torch.ones_like(base_input_ids, dtype=torch.long, device=base_input_ids.device)
+
+     
+        expert_kwargs = kwargs.copy()
+        expert_input_ids     = input_ids
+        
+        if hasattr(self.tok_exp, "apply_chat_template") and getattr(self.tok_exp, "chat_template", None):
+            expert_input_ids = self._get_chat_template_tokenized_chat_inputs(self.tok_exp, prompts)
+        else:
+            expert_input_ids = self._encode_plain_inputs(self.tok_exp, prompts)
+        
+        expert_kwargs['attention_mask']     = torch.ones_like(expert_input_ids,     dtype=torch.long, device=expert_input_ids.device)
+        
+
+        if not unite:  
+            antiexpert_kwargs = kwargs.copy()
+            antiexpert_input_ids = input_ids
+
+            if hasattr(self.tok_anti, "apply_chat_template") and getattr(self.tok_anti, "chat_template", None):
+                antiexpert_input_ids = self._get_chat_template_tokenized_chat_inputs(self.tok_anti, prompts)
+            else:
+                antiexpert_input_ids = self._encode_plain_inputs(self.tok_anti, prompts)
+            antiexpert_kwargs['attention_mask'] = torch.ones_like(antiexpert_input_ids, dtype=torch.long, device=antiexpert_input_ids.device)
+        
+       
+        # keep track of which sequences are already finished
+        unfinished_sequences = torch.ones(input_ids.shape[0], dtype=torch.long, device=input_ids.device)
+        eos_token_id_tensor = torch.tensor([self.tok_base.eos_token_id]).to(input_ids.device)
+        
+        T = max_new_tokens
+        if (not unite) and return_logits_for_analysis:
+            device = input_ids.device
+            # 1 x T buffers on GPU
+            p_dexperts = torch.empty(T, device=device, dtype=torch.bfloat16)
+            p_base     = torch.empty(T, device=device, dtype=torch.bfloat16)
+            p_expert   = torch.empty(T, device=device, dtype=torch.bfloat16)
+            p_anti     = torch.empty(T, device=device, dtype=torch.bfloat16)
+
+            preds_dexperts = torch.empty(T, device=device, dtype=torch.int32)
+            preds_base     = torch.empty(T, device=device, dtype=torch.int32)
+            preds_expert   = torch.empty(T, device=device, dtype=torch.int32)
+            preds_anti     = torch.empty(T, device=device, dtype=torch.int32)
+
+            token_ids_out  = torch.empty(T, device=device, dtype=torch.int32)
+            t_write = 0
+
+        for step in range(max_new_tokens):
+            
+            base_inputs = self.base.prepare_inputs_for_generation(base_input_ids, **base_kwargs)
+            expert_inputs = self.expert.prepare_inputs_for_generation(expert_input_ids, **expert_kwargs)
+            
+            
+            if unite:
+                base_outputs, expert_outputs = self.forward(
+                    base_inputs, expert_inputs, return_dict=True
+                )
+
+                base_next_token_logits = base_outputs.logits[..., -1, :]
+                expert_next_token_logits = expert_outputs.logits[..., -1, :]
+                
+                if prefix_allowed_tokens_fn:
+                    mask = torch.full_like(base_next_token_logits, -math.inf) 
+                    sent = base_input_ids[0]
+                    prefix_allowed_tokens = prefix_allowed_tokens_fn(0, sent)
+                    if len(prefix_allowed_tokens) == 0:
+                        raise ValueError(
+                            f"`prefix_allowed_tokens_fn` returned an empty list."
+                            f"This means that the constraint is unsatisfiable. Please check your implementation"
+                            f"of `prefix_allowed_tokens_fn` "
+                        )
+                    mask[0, prefix_allowed_tokens] = 0
+                    mask = torch.full_like(expert_next_token_logits, -math.inf) 
+                    sent = expert_input_ids[0]
+                    prefix_allowed_tokens_exp = prefix_allowed_tokens_fn_exp(0, sent)
+                    if len(prefix_allowed_tokens_exp) == 0:
+                        raise ValueError(
+                            f"`prefix_allowed_tokens_fn` returned an empty list."
+                            f"This means that the constraint is unsatisfiable. Please check your implementation"
+                            f"of `prefix_allowed_tokens_fn` "
+                        )        
+                    mask[0, prefix_allowed_tokens_exp] = 0
+                expert_next_token_logits = expert_next_token_logits + mask
+                base_next_token_logits = base_next_token_logits + mask
+                v_base = get_top_k_tokens(base_next_token_logits, self.tok_base, k=k)
+                v_exp = get_top_k_tokens(expert_next_token_logits, self.tok_exp, k=k)
+
+                vu = get_union_vocab(v_base, v_exp)
+
+                v_base = update_vocab(v_base, vu, self.tok_base, base_next_token_logits,'qwen')
+                v_base = vocab_softmax(v_base)
+                v_exp = update_vocab(v_exp, vu, self.tok_exp, expert_next_token_logits,'llama')
+                v_exp = vocab_softmax(v_exp)
+
+                next_token, v_avg, next_token_id1, next_token_id2 = average_and_sample(v_base,v_exp,0.5, self.tok_base)
+            
+            else:
+                antiexpert_inputs = self.antiexpert.prepare_inputs_for_generation(antiexpert_input_ids, **antiexpert_kwargs)
+                base_outputs, expert_outputs, antiexpert_outputs = self.forward(
+                    base_inputs, expert_inputs, antiexpert_inputs, return_dict=True
+                )
+
+                base_next_token_logits = base_outputs.logits[..., -1, :]
+                expert_next_token_logits = expert_outputs.logits[..., -1, :]
+                antiexpert_next_token_logits = antiexpert_outputs.logits[..., -1, :]
+                
+                if prefix_allowed_tokens_fn:
+                    mask = torch.full_like(base_next_token_logits, -math.inf) 
+                    sent = base_input_ids[0]
+                    prefix_allowed_tokens = prefix_allowed_tokens_fn(0, sent)
+                    if len(prefix_allowed_tokens) == 0:
+                        raise ValueError(
+                            f"`prefix_allowed_tokens_fn` returned an empty list."
+                            f"This means that the constraint is unsatisfiable. Please check your implementation"
+                            f"of `prefix_allowed_tokens_fn` "
+                        )
+                    mask[0, prefix_allowed_tokens] = 0
+                    base_next_token_logits = base_next_token_logits + mask
+ 
+                if score_type == "logprobs":
+                    base_next_token_logits  = F.log_softmax(base_outputs.logits[..., -1, :],  dim=-1)
+                    expert_next_token_logits = F.log_softmax(expert_outputs.logits[..., -1, :], dim=-1)
+                    antiexpert_next_token_logits = F.log_softmax(antiexpert_outputs.logits[..., -1, :], dim=-1)
+
+                    v_base = get_top_k_tokens(base_next_token_logits, self.tok_base, k=k)
+                    v_exp = get_top_k_tokens(expert_next_token_logits, self.tok_exp, k=0)
+                    v_exp = update_vocab(v_exp, v_base, self.tok_exp, expert_next_token_logits,'llama')
+                    v_anti = get_top_k_tokens(antiexpert_next_token_logits, self.tok_anti, k=0)
+                    v_anti = update_vocab(v_anti, v_base, self.tok_anti, antiexpert_next_token_logits, 'llama')
+
+                    next_token, next_token_id1, next_token_id2, next_token_id3, comb_ids, comb_scores = logits_add(v_base, v_exp, v_anti, self.tok_base, alpha, device=input_ids.device)
+                elif score_type == "logits":  # regular proxy tuning 
+                    expert_next_token_logits = expert_next_token_logits[:, :base_next_token_logits.shape[-1]]
+
+                    next_token_logits = (
+                        base_next_token_logits +
+                        self.alpha * (expert_next_token_logits - antiexpert_next_token_logits)
+                    )
+                    
+                    next_tokens = torch.argmax(next_token_logits, dim=-1)  # indices of top tokens
+                    next_token_id1 = next_tokens.tolist()
+                    next_token_id2 = list(next_token_id1)
+                    next_token_id3 = list(next_token_id1)
+                    next_token = [
+                        self.tok_base.convert_ids_to_tokens(tid, skip_special_tokens=False)
+                        for tid in next_token_id1
+                    ]
+                    
+
+            next_tokens = torch.as_tensor(next_token_id1, device=input_ids.device, dtype=torch.long)
+               
+            input_ids      = torch.cat([input_ids,      next_tokens[:, None]], dim=-1)
+            base_input_ids = torch.cat([base_input_ids, next_tokens[:, None]], dim=-1)
+
+            exp_step_ids  = torch.as_tensor(next_token_id2,  device=expert_input_ids.device,   dtype=torch.long)
+            expert_input_ids     = torch.cat([expert_input_ids,     exp_step_ids[:,  None]], dim=-1)
+            
+            base_kwargs = self._update_model_kwargs_for_generation(base_outputs, base_kwargs)
+            expert_kwargs = self._update_model_kwargs_for_generation(expert_outputs, expert_kwargs)
+            
+            if not unite:
+                anti_step_ids = torch.as_tensor(next_token_id3, device=antiexpert_input_ids.device, dtype=torch.long)
+                antiexpert_input_ids = torch.cat([antiexpert_input_ids, anti_step_ids[:, None]], dim=-1)
+                antiexpert_kwargs= self._update_model_kwargs_for_generation(antiexpert_outputs,antiexpert_kwargs)
+
+            # if eos_token was found in one sentence, set sentence to finished
+            unfinished_sequences = unfinished_sequences.mul(
+                next_tokens.tile(eos_token_id_tensor.shape[0], 1).ne(eos_token_id_tensor.unsqueeze(1)).prod(dim=0)
+            )
+
+            # stop when each sentence is finished
+            if unfinished_sequences.max() == 0:
+                break
+                
+        if (not unite) and return_logits_for_analysis:
+            sl = slice(0, t_write)
+            results = [{
+                'token_ids':        token_ids_out[sl],     # [T’] int32 (GPU)
+                'p_dexperts':       p_dexperts[sl],        # [T’] fp16  (GPU)
+                'preds_dexperts':   preds_dexperts[sl],    # [T’] int32 (GPU)
+                'p_base':           p_base[sl],
+                'preds_base':       preds_base[sl],
+                'p_expert':         p_expert[sl],
+                'preds_expert':     preds_expert[sl],
+                'p_antiexpert':     p_anti[sl],
+                'preds_antiexpert': preds_anti[sl],
+                # (optional) decode later if you want strings
+            }]
+            return input_ids, results
+        return input_ids
+
+class RegularModel:
+    def __init__(
+        self,
+        base_name: str,
+        tokenizer: AutoTokenizer,
+        system_prompt: str = None,
+        alpha: float = 1.0,
+        chat_response_prefix: str = None,
+        model_kwargs: Dict[str, Any] = None
+    ):
+        self.base = AutoModelForCausalLM.from_pretrained(
+            base_name, **model_kwargs
+        )
+       
+        self.base.eval()
+
+        #self.tokenizer = tokenizer
+        self.tok_base = tokenizer
+        self.alpha = alpha
+        self.device = self.base.device
+        self.system_prompt = system_prompt 
+
+
+    def forward(
+        self,
+        base_inputs,
+        return_dict=None
+    ):
+        base_outputs = self.base(**base_inputs, return_dict=return_dict)
+
+        return base_outputs
+
+    def _get_chat_template_tokenized_chat_inputs(self, tokenizer, prompts):
+        """
+        Use tokenizer.apply_chat_template for models like Qwen-Instruct/Yi/Mistral-Instruct.
+        Returns: input_ids (tensor on self.device)
+        """
+        def _msgs(p):
+            if self.system_prompt:
+                return [{"role": "system", "content": self.system_prompt},
+                        {"role": "user", "content": p}]
+            return [{"role": "user", "content": p}]
+
+        rendered = [
+            tokenizer.apply_chat_template(_msgs(p), tokenize=False, add_generation_prompt=True)
+            for p in prompts
+        ]
+        chat_inputs = tokenizer(rendered, padding="longest", return_tensors="pt", add_special_tokens=True)
+        return chat_inputs.input_ids.to(self.device)
+
+    def _encode_plain_inputs(self, tokenizer, prompts):
+        """
+        Plain (non-chat) encoding with the given tokenizer.
+        Returns: input_ids (tensor on self.device)
+        """
+        enc = tokenizer(prompts, padding="longest", return_tensors="pt", add_special_tokens=True)
+        return enc.input_ids.to(self.device)
+    
+    def _update_model_kwargs_for_generation(
+        self,
+        outputs: ModelOutput,
+        kwargs: Dict[str, Any],
+    ) -> Dict[str, Any]:
+        # update past_key_values
+        kwargs["past_key_values"] = outputs.past_key_values
+
+        # update attention mask
+        if "attention_mask" in kwargs:
+            attention_mask = kwargs["attention_mask"]
+            kwargs["attention_mask"] = torch.cat(
+                [attention_mask, attention_mask.new_ones((attention_mask.shape[0], 1))], dim=-1
+            )
+        if getattr(outputs, "cache_position", None) is not None:
+        # some models already return it
+            kwargs["cache_position"] = outputs.cache_position
+        else:
+            if "cache_position" in kwargs:
+                kwargs["cache_position"] = kwargs["cache_position"] + 1
+            else:
+                # first step: position is sequence-length-1
+                seq_len = kwargs["attention_mask"].shape[1]
+                kwargs["cache_position"] = torch.arange(seq_len - 1, seq_len, device=kwargs["attention_mask"].device)
+
+        return kwargs
+    
+
+    def generate(
+        self,
+        input_ids: Optional[torch.Tensor] = None,
+        max_new_tokens: Optional[int] = 100,
+        do_sample: bool = False,
+        return_logits_for_analysis: bool = False,
+        prefix_allowed_tokens_fn=None,
+        **kwargs
+    ):
+        base_kwargs = kwargs.copy()
+        prompts = self.tok_base.batch_decode(input_ids, skip_special_tokens=True)
+        
+        if hasattr(self.tok_base, "apply_chat_template") and getattr(self.tok_base, "chat_template", None):
+            base_input_ids = self._get_chat_template_tokenized_chat_inputs(self.tok_base, prompts)
+        else:
+            base_input_ids = self._encode_plain_inputs(self.tok_base, prompts)
+
+
+        base_kwargs["attention_mask"] = torch.ones_like(base_input_ids, dtype=torch.long, device=base_input_ids.device)
+
+        # keep track of which sequences are already finished
+        unfinished_sequences = torch.ones(input_ids.shape[0], dtype=torch.long, device=input_ids.device)
+      
+        eos_token_id_tensor = torch.tensor([self.tok_base.eos_token_id]).to(input_ids.device)
+
+        T = max_new_tokens
+        if return_logits_for_analysis:
+            device = input_ids.device
+            # 1 x T buffers on GPU
+            p_dexperts = torch.empty(T, device=device, dtype=torch.bfloat16)
+            p_base     = torch.empty(T, device=device, dtype=torch.bfloat16)
+           
+            preds_base     = torch.empty(T, device=device, dtype=torch.int32)
+         
+            token_ids_out  = torch.empty(T, device=device, dtype=torch.int32)
+            t_write = 0
+
+
+        for step in range(max_new_tokens):
+            # prepare model inputs with past_key_values and attention_mask
+            base_inputs = self.base.prepare_inputs_for_generation(base_input_ids, **base_kwargs)
+            base_outputs = self.forward(
+                base_inputs, return_dict=True
+            )
+            base_next_token_logits = base_outputs.logits[..., -1, :]
+            next_token_logits = base_next_token_logits 
+            if step < 2:                                       
+                next_token_logits[:, self.tok_base.eos_token_id] = -float("inf")
+                
+            if prefix_allowed_tokens_fn:
+                mask = torch.full_like(next_token_logits, -math.inf) 
+                sent = base_input_ids[0]
+                prefix_allowed_tokens = prefix_allowed_tokens_fn(0, sent)
+                if len(prefix_allowed_tokens) == 0:
+                    raise ValueError(
+                        f"`prefix_allowed_tokens_fn` returned an empty list."
+                        f"This means that the constraint is unsatisfiable. Please check your implementation"
+                        f"of `prefix_allowed_tokens_fn` "
+                    )
+                mask[0, prefix_allowed_tokens] = 0
+                next_token_logits = next_token_logits + mask
+            
+            next_tokens = torch.argmax(next_token_logits, dim=-1)
+
+            next_tokens = (
+                next_tokens * unfinished_sequences +
+                self.tok_base.pad_token_id * (1 - unfinished_sequences)
+            )
+
+            input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
+            base_input_ids = torch.cat([base_input_ids, next_tokens[:, None]], dim=-1)
+
+            # update kwargs
+            base_kwargs = self._update_model_kwargs_for_generation(base_outputs, base_kwargs)
+            # if eos_token was found in one sentence, set sentence to finished
+            unfinished_sequences = unfinished_sequences.mul(
+                next_tokens.tile(eos_token_id_tensor.shape[0], 1).ne(eos_token_id_tensor.unsqueeze(1)).prod(dim=0)
+            )
+
+            # stop when each sentence is finished
+            if unfinished_sequences.max() == 0:
+                break
+
+        if return_logits_for_analysis:
+            sl = slice(0, t_write)
+            results = [{
+                'token_ids':        token_ids_out[sl],     # [T’] int32 (GPU)
+                'p_base':           p_base[sl],
+                'preds_base':       preds_base[sl],
+            
+            }]
+            return input_ids, results
+        return input_ids
+
+
+def ensure_dir(d):
+    if not os.path.exists(d):
+        os.makedirs(d, exist_ok=True)
+
+
+@torch.inference_mode()
+def generate_completions(
+    model,
+    tokenizer,
+    prompts,
+    batch_size=1,
+    add_special_tokens=True,
+    disable_tqdm=False,
+    return_logits_for_analysis=False,
+    score_type=None,
+    alpha=1.0,
+    k=20,
+    unite=False,
+    prefix_allowed_tokens_fn=None,
+    prefix_allowed_tokens_fn_exp=None,
+    **generation_kwargs, 
+    
+):
+    generations = []
+    outputs = []
+    if not disable_tqdm:
+        progress = tqdm.tqdm(total=len(prompts), desc="Generating Completions")
+
+    num_return_sequences = generation_kwargs.get("num_return_sequences", 1)
+     
+    all_results = []
+    for i in range(0, len(prompts), batch_size):
+        batch_prompts = prompts[i:i+batch_size]
+        tokenized_prompts = tokenizer(
+            batch_prompts, padding="longest", return_tensors="pt", add_special_tokens=add_special_tokens
+        )
+        
+        # print ("tokenized_prompt: ", tokenized_prompts)
+        if hasattr(model, "device"):                 # DExpertsLlama
+            device = model.device
+            # print ("device = model.device")
+        else:                                        # vanilla HF model
+            device = next(model.parameters()).device
+            # print ("next(model.parameters()).devicedevice = next(model.parameters()).device")
+        batch_input_ids = tokenized_prompts['input_ids'].to(device)
+        attention_mask = tokenized_prompts['attention_mask'].to(device)
+        
+        batch_outputs = model.generate(
+                input_ids=batch_input_ids,
+                attention_mask=attention_mask,
+                alpha=alpha,
+                score_type=score_type,
+                prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
+                prefix_allowed_tokens_fn_exp=prefix_allowed_tokens_fn_exp,
+                k=k,
+                unite=unite,
+                **generation_kwargs
+        )
+        results = []
+        
+        # to support the logits processing below when using DExperts with mixed tokenizers
+        if isinstance(batch_input_ids, dict):
+            batch_input_ids = batch_input_ids['llama']
+
+        batch_outputs = tokenizer.batch_decode(batch_outputs, skip_special_tokens=True)
+        print("batch_outputs: ", batch_outputs)
+        batch_prompts = tokenizer.batch_decode(batch_input_ids, skip_special_tokens=True)
+
+        # duplicate the prompts to match the number of return sequences
+        batch_prompts = [prompt for prompt in batch_prompts for _ in range(num_return_sequences)]
+        batch_generations = [
+            output[len(prompt):] for prompt, output in zip(batch_prompts, batch_outputs)
+        ]
+
+        generations += batch_generations
+
+        if not disable_tqdm:
+            progress.update(len(batch_prompts)//num_return_sequences)
+    # return generations, logits_for_analysis
+    return generations, all_results
+
+
+def add_pad_token(tokenizer, padding_side="left"):
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token
+        tokenizer.pad_token_id = tokenizer.eos_token_id
+    tokenizer.padding_side = padding_side
+    return tokenizer
+
+def load_dexperts_model_and_tokenizer(
+    base_name: str,
+    expert_name: str,
+    antiexpert_name: str,
+    device_map: str = "auto",
+    alpha: float = 1.0,
+    load_in_8bit: bool = False,
+    load_in_4bit: bool = False,
+    system_prompt: Optional[str] = None,
+    use_fast_tokenizer: bool = True,
+    padding_side: str = "left",
+    proxy_tune: bool = False,
+    unite: bool = False,
+):
+    
+    bnb_cfg = None
+
+    if load_in_8bit:
+        bnb_cfg = BitsAndBytesConfig(load_in_8bit=True, llm_int8_enable_fp32_cpu_offload=True)
+    
+    if load_in_4bit:
+        bnb_cfg = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_use_double_quant=True,
+            bnb_4bit_quant_type="nf4",         # {nf4, fp4}; nf4 is standard
+            bnb_4bit_compute_dtype=torch.bfloat16,  
+        )
+
+    model_kwargs = {
+        'device_map': device_map,
+        'torch_dtype': torch.bfloat16,
+        'quantization_config': bnb_cfg,
+        'low_cpu_mem_usage': True,
+        'trust_remote_code': True,
+    }
+    
+    
+    if "llama" in base_name and "chat" in base_name:
+        tok_base = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-13b-base"], use_fast=use_fast_tokenizer)
+    elif "llama" in base_name and "chat" not in base_name:
+        tok_base = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-13b-base"], use_fast=use_fast_tokenizer)
+    else:
+        tok_base = AutoTokenizer.from_pretrained(MODEL_PATHS[base_name], use_fast=False, trust_remote_code=True)
+        
+    tok_base = add_pad_token(tok_base, padding_side)
+    
+    if proxy_tune or unite:
+        
+        if "llama" in expert_name and "chat" in expert_name:
+            tok_exp = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-13b-base"], use_fast=use_fast_tokenizer)
+        elif "llama" in expert_name and "chat" not in expert_name:
+            tok_exp = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-13b-base"], use_fast=use_fast_tokenizer)
+        else:
+            tok_exp = AutoTokenizer.from_pretrained(MODEL_PATHS[expert_name], use_fast=False, trust_remote_code=True)
+
+
+        tok_exp  = add_pad_token(tok_exp,  padding_side)
+    
+    if proxy_tune:
+        if "llama" in antiexpert_name and "chat" in antiexpert_name:
+            tok_anti = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-13b-base"], use_fast=use_fast_tokenizer)
+        elif "llama" in expert_name and "chat" not in expert_name:
+            tok_exp = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-13b-base"], use_fast=use_fast_tokenizer)
+        else:
+            tok_anti = AutoTokenizer.from_pretrained(MODEL_PATHS[antiexpert_name], use_fast=False, trust_remote_code=True)
+
+        tok_anti = add_pad_token(tok_anti, padding_side)
+    
+    
+        model = DExpertsLlama(
+                base_name=MODEL_PATHS[base_name],
+                expert_name=MODEL_PATHS[expert_name],
+                antiexpert_name=MODEL_PATHS[antiexpert_name],
+                tokenizer_base=tok_base,
+                tokenizer_expert=tok_exp,
+                tokenizer_anti=tok_anti,
+                system_prompt=system_prompt,
+                alpha=alpha,
+                model_kwargs=model_kwargs,
+        )
+        print(f"[Loader] Base   : {MODEL_PATHS[base_name]}")
+        print(f"[Loader] Expert : {MODEL_PATHS[expert_name]}")
+        print(f"[Loader] Anti   : {MODEL_PATHS[antiexpert_name]}")
+        
+    elif unite: 
+        model = DExpertsLlama(
+                base_name=MODEL_PATHS[base_name],
+                expert_name=MODEL_PATHS[expert_name],
+                antiexpert_name="none",
+                tokenizer_base=tok_base,
+                tokenizer_expert=tok_exp,
+                tokenizer_anti="none",
+                system_prompt=system_prompt,
+                alpha=alpha,
+                unite=True,
+                model_kwargs=model_kwargs,
+        )
+        print(f"[Loader] Base   : {MODEL_PATHS[base_name]}")
+        print(f"[Loader] Expert : {MODEL_PATHS[expert_name]}")
+    
+    else: 
+        model = RegularModel(base_name=MODEL_PATHS[base_name], tokenizer=tok_base, system_prompt=system_prompt, alpha=alpha, model_kwargs=model_kwargs)
+    
+    return model, tok_base
+
+
+def _safe_tag(model_name: str) -> str:
+    # e.g. "proxy_tuning/llama70b_mellama13bchat" -> "proxy_tuning_llama70b_mellama13bchat"
+    return model_name.replace("/", "_").replace(" ", "").replace(".", "").replace("-", "")
+
+def setup_run_dirs(model_name: str, root=LOCAL_RESULTS_DIR):
+    """
+    Creates:
+      <root>/<TAG>_<YYYYMMDD_HHMMSS>/
+          ├─ <TAG>_<YYYYMMDD_HHMMSS>.csv
+          └─ logits_analysis/
+    Returns: (run_dir, csv_path, logits_dir)
+    """
+    ensure_dir(root)
+    tag = _safe_tag(model_name)
+    stamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    run_dir = os.path.join(root, f"{tag}_{stamp}")
+    ensure_dir(run_dir)
+
+    csv_name = f"{tag}_{stamp}.csv"
+    csv_path = os.path.join(run_dir, csv_name)
+    with open(csv_path, "w") as f:
+        f.write("timestamp,request_id,model_name,prompt,output,logits_path\n")
+
+    logits_dir = os.path.join(run_dir, "logits_analysis")
+    ensure_dir(logits_dir)
+
+    print(f"[TokenLog] created run dir: {run_dir}")
+    print(f"[TokenLog] csv: {csv_path}")
+    print(f"[TokenLog] logits dir: {logits_dir}")
+    return run_dir, csv_path, logits_dir
+
+
+def append_request_row(csv_path: str, request_id: str, model_name: str, prompt: str, output: str, logits_path: str | None):
+    ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    def esc(s: str) -> str:
+        if s is None: return ""
+        return s.replace("\n", "\\n").replace(",", "&#44;")
+    with open(csv_path, "a") as f:
+        f.write(f"{ts},{request_id},{esc(model_name)},{esc(prompt)},{esc(output)},{esc(logits_path or '')}\n")
+
+
+class ProxyTuningClient(Client):
+    """
+    A HELM client that uses ProxyTuning for inference instead of directly calling the model.
+    """
+
+    def __init__(
+        self,
+        tokenizer: Tokenizer,
+        tokenizer_name: str,
+        cache_config: CacheConfig,
+        model_name: str = None,
+        api_base: str = None,
+        api_key: str = None,
+    ):
+        self.cache = Cache(cache_config)
+        """
+        Initializes the ProxyTuningClient.
+
+        Args:
+            tokenizer (Tokenizer): Tokenizer instance (unused but required by HELM interface).
+            tokenizer_name (str): Name of the tokenizer (unused but required by HELM interface).
+            cache_config (CacheConfig): Configuration for caching.
+
+        """
+        self.run_dir, self.token_log_path, self.logits_dir = setup_run_dirs(model_name)
+        self.model_name = model_name
+        self.run_id = datetime.now().strftime("%Y%m%d_%H%M%S")
+        self.req_seq = 0
+        tag = model_name.split("/")[-1]
+        # strip optional "proxy_tuning_" prefix
+        if tag.startswith("proxy_tuning_"):
+            tag = tag[len("proxy_tuning_"):]
+
+        parts = tag.split("_")
+        base_name, expert_name, antiexpert_name, self.alpha, self.score_type, k_str  = (
+            parts[0],
+            parts[1],
+            parts[2],
+            float(parts[3]),
+            parts[4],
+            parts[5]
+        )
+        self.k = int(k_str)
+        
+        self.is_unite = False
+        self.is_proxy = False
+        if expert_name != "none":
+            if antiexpert_name == "none":
+                self.is_unite = True
+            else:
+                self.is_proxy = True
+
+        print("mn:", model_name)
+        print("tag:", tag)
+        print("b: ", base_name)
+        print("Ex:", expert_name)
+        print("ax", antiexpert_name)
+        print(self.alpha)
+        print(self.score_type)
+        print(self.k)
+        print("proxy: ", self.is_proxy)
+        print("unite: ", self.is_unite)
+        
+    
+        self.model, self.hf_tokenizer = load_dexperts_model_and_tokenizer(
+                    base_name=base_name,
+                    expert_name=expert_name,
+                    antiexpert_name=antiexpert_name,
+                    load_in_8bit=False,
+                    load_in_4bit=True,
+                    use_fast_tokenizer=True,
+                    system_prompt=None,
+                    device_map='auto', 
+                    proxy_tune=self.is_proxy, 
+                    unite=self.is_unite
+                
+        )
+       
+
+    
+    def make_request(self, request: Request) -> RequestResult:
+        """
+        Handles a request by sending the prompt 
+
+        Args:
+            request (Request): The request object containing the prompt.
+
+        Returns:
+            RequestResult: A HELM-compatible response object.
+        """
+        prompt_text = request.prompt
+
+        if request.messages:
+            prompt_text = " ".join(msg["content"] for msg in request.messages if msg.get("role") != "system")
+
+        
+        print("prompt_text: ", prompt_text)
+        prompts = [prompt_text]
+        
+        max_new_tokens = 600
+        if prompt_text.strip().startswith("Answer 'A' for "):
+            max_new_tokens = 2
+           
+       
+        predicted_labels, all_results = generate_completions(
+                model=self.model,
+                tokenizer=self.hf_tokenizer,
+                prompts=prompts,
+                max_new_tokens=max_new_tokens,       
+                do_sample=False,        
+                num_return_sequences=1,
+                alpha=self.alpha,
+                k=self.k,
+                score_type=self.score_type,
+                unite=self.is_unite,
+                return_logits_for_analysis=False, 
+            )
+        
+        # if self.is_proxy or self.is_unite: 
+        #     predicted_labels, all_results = generate_completions(
+        #         model=self.model,
+        #         tokenizer=self.hf_tokenizer,
+        #         prompts=prompts,
+        #         max_new_tokens=max_new_tokens,       
+        #         do_sample=False,        
+        #         num_return_sequences=1,
+        #         alpha=self.alpha,
+        #         k=self.k,
+        #         score_type=self.score_type,
+        #         prefix_allowed_tokens_fn=prefix_func,
+        #         unite=self.is_unite,
+        #         return_logits_for_analysis=False, 
+        #     )
+        # else: 
+        #     predicted_labels, all_results = base_generate_completions(
+        #         model=self.model,
+        #         tokenizer=self.hf_tokenizer,
+        #         prompts=prompts,
+        #         max_new_tokens=max_new_tokens,
+        #         do_sample=False,
+        #     )
+
+            
+        output_text = predicted_labels[0]
+        print("output_text: ", output_text)
+        
+        self.req_seq += 1
+        request_id = f"{self.run_id}_r{self.req_seq:04d}"
+
+        logits_path = None
+        if self.is_proxy and all_results:
+            logits_path = os.path.join(self.logits_dir, f"logits_{request_id}.pt")
+            torch.save(all_results, logits_path)
+            print(f"[Logits] wrote {logits_path}")
+
+        append_request_row(
+            csv_path=self.token_log_path,
+            request_id=request_id,
+            model_name=self.model_name,
+            prompt=prompt_text,
+            output=output_text,
+            logits_path=logits_path,
+        )
+        
+        # Return a HELM-compatible RequestResult
+        output = GeneratedOutput(text=output_text, logprob=0.0, tokens=[])
+        return RequestResult(success=True, cached=False, completions=[output], embedding=[])

From 93f2a460879cb6e251edc0cb396333ab6a1affec Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Tue, 21 Oct 2025 20:49:20 -0700
Subject: [PATCH 07/42] Delete src/helm/clients/proxy_tuning_client.py

---
 src/helm/clients/proxy_tuning_client.py | 961 ------------------------
 1 file changed, 961 deletions(-)
 delete mode 100644 src/helm/clients/proxy_tuning_client.py

diff --git a/src/helm/clients/proxy_tuning_client.py b/src/helm/clients/proxy_tuning_client.py
deleted file mode 100644
index e512bf580a7..00000000000
--- a/src/helm/clients/proxy_tuning_client.py
+++ /dev/null
@@ -1,961 +0,0 @@
-# File: helm/clients/proxy_tuning_client.py
-from helm.clients.client import Client
-from helm.common.cache import CacheConfig
-from helm.tokenizers.tokenizer import Tokenizer
-from helm.common.cache import Cache
-from helm.common.request import Request, RequestResult, GeneratedOutput
-
-from typing import Optional, Dict, Any, List
-import torch, os, json
-from transformers import AutoModelForCausalLM, AutoTokenizer
-import torch.nn.functional as F
-from transformers.generation.utils import (
-    ModelOutput,
-)
-import tqdm
-from transformers impor (
-    BitsAndBytesConfig, 
-)
-
-from datetime import datetime
-
-MODEL_PATHS = {
-    "llama-70b-chat":  "[MODEL PATH]",
-    "llama-13b-base": "[MODEL PATH]",
-    "llama-7b-chat": "[MODEL PATH]",
-    "mellama-13b-chat": "[MODEL PATH]",
-    "mellama-13b-base": "[MODEL PATH]",
-    "mellama-70b-chat": "[MODEL PATH]",    
-    "qwen3-30b": "[MODEL PATH]", 
-}
-
-RESULTS_DIR = "[results dir]"
-# helpers adapted from unite 
-
-def update_vocab(v1, vu, tokenizer, logits, model_name):
-    for vu_token, v1_token, logit_ele in zip(vu,v1,logits):
-        v1_token_ids = []
-        for item in v1_token.values():
-            v1_token_ids.append(item[1])
-        for token in vu_token:  
-            if token not in v1_token.keys():
-                if 'llama' in model_name.lower():
-                    token = token.replace('Ġ','▁')        
-                if token != '':
-                    subtoken_id = tokenizer.convert_tokens_to_ids(token)
-                    if subtoken_id != 0 and subtoken_id != None: #Mistral and Llama2 oov id 0
-                        logit = logit_ele[subtoken_id]
-                    else:
-                        subtokens = tokenizer.tokenize(token)
-                        for token_id in tokenizer.convert_tokens_to_ids(subtokens):
-                            #if 'llama2' in model_name:
-                            if 'llama' in model_name.lower():
-                                if token_id != 29871:
-                                    subtoken_id = token_id
-                                    break
-                            else:
-                                subtoken_id = token_id
-                                break
-                        logit = logit_ele[subtoken_id]
-                else:
-                    if 'qwen' in model_name.lower():
-                        logit = logit_ele[220]
-                        subtoken_id = 220
-                    if 'llama' in model_name.lower():
-                        logit = logit_ele[29871]
-                        subtoken_id = 29871
-
-                if 'llama' in model_name.lower():
-                    v1_token[token.replace('▁', 'Ġ')] = [logit, subtoken_id]
-                else:
-                    if subtoken_id not in v1_token_ids:
-                        v1_token[token] = [logit, subtoken_id]
-                        v1_token_ids.append(subtoken_id)
-                    else:
-                        v1_token[token] = [0, subtoken_id]
-    
-    v1_new = v1
-    return v1_new
-
-def vocab_softmax(v1):
-        v1_new = []
-        for element in v1:
-            ele = {}
-            ele_values = list(element.values())
-            ele_values0, ele_values1 = [], []
-            for item in ele_values:
-                ele_values0.append(item[0])
-                ele_values1.append(item[1])
-            ele_values0 = torch.softmax(torch.tensor(ele_values0), dim=0)
-            for token, prob, ids in zip(element.keys(),ele_values0,ele_values1):
-                ele[token] = [prob, ids]
-            v1_new.append(ele)
-
-        return v1_new
-    
-    
-def get_union_vocab(v1, v2):
-    # Extract unique tokens from both dictionaries
-        unique_tokens = []
-        for v1_tokens, v2_tokens in zip(v1,v2):
-            unique_tokens.append(list(set(v1_tokens.keys()) | set(v2_tokens.keys())))
-
-        return unique_tokens
-    
-def average_and_sample(v1, v2, lamda, tokenizer):
-    next_token, v_avg, next_token_id1, next_token_id2 = [], [], [], []
-    for element_v1, element_v2 in zip(v1, v2):
-        assert len(element_v1) == len(element_v2)
-        v_new = {}
-        for token1 in element_v1:
-            v_new[token1] = [lamda * element_v1[token1][0] + (1 - lamda) * element_v2[token1][0],
-                             element_v1[token1][1]]
-        v_avg.append(v_new)
-        probs = []
-        for item in v_new.values():
-            probs.append(item[0])
-        sample_index = probs.index(max(probs))
-        i = 0
-        for item1 in v_new.keys():
-            if i == sample_index:
-                next_token.append(tokenizer.convert_ids_to_tokens(element_v1[item1][1]))
-                next_token_id1.append(element_v1[item1][1])
-                next_token_id2.append(element_v2[item1][1])
-            i+=1
-    return next_token, v_avg, next_token_id1, next_token_id2
-    
-
-def get_top_k_tokens(self, logits, tokenizer, k=10):
-    probs = logits
-
-    top_k_indices = torch.topk(probs, k).indices
-    probs = probs.tolist()
-    top_k_probs = []
-    for idx, prob in zip(top_k_indices,probs):
-        prob_item = []
-        for i in idx:
-            prob_item.append(prob[i])
-        top_k_probs.append(prob_item)
-
-    top_k_tokens = []
-    for indices in top_k_indices:
-        token_item = []
-        for idx in indices:
-            token_item.append(tokenizer.convert_ids_to_tokens(idx.item(), skip_special_tokens=True))
-        top_k_tokens.append(token_item)
-
-    v1 = []
-    for token, prob, id in zip(top_k_tokens, top_k_probs, top_k_indices):
-        v1.append(
-            {token.replace('▁','Ġ').replace('<0x0A>','/n').replace('Ċ','/n'): [prob, int(id)] for token, prob, id in zip(token, prob, id)})
-
-    return v1
-
-#proxy tuning approach
-def logits_add(v1, v2, v3, tokenizer, alpha, device=None):
-    next_token, next_token_id1, next_token_id2, next_token_id3 = [], [], [], []
-    comb_ids_per_batch, comb_scores_per_batch = [], []
-
-    for element_v1, element_v2, element_v3 in zip(v1, v2, v3):
-
-        v_new = {}
-
-        for token1 in element_v1:
-            v_new[token1] = [
-                element_v1[token1][0] +
-                (alpha * (element_v2[token1][0] - element_v3[token1][0])),
-                element_v1[token1][1]
-            ]
-
-        probs = [item[0] for item in v_new.values()]
-
-
-        sample_index = probs.index(max(probs))
-
-        i = 0
-        for item1 in v_new.keys():
-            if i == sample_index:
-                next_token.append(tokenizer.convert_ids_to_tokens(element_v1[item1][1]))
-                next_token_id1.append(element_v1[item1][1])
-                next_token_id2.append(element_v2[item1][1])
-                next_token_id3.append(element_v3[item1][1])
-            i += 1
-        ids    = torch.tensor([v_new[t][1] for t in v_new], dtype=torch.long, device=device)
-        scores = torch.tensor([v_new[t][0] for t in v_new], dtype=torch.float32, device=device)
-        comb_ids_per_batch.append(ids)
-        comb_scores_per_batch.append(scores)
-    return next_token, next_token_id1, next_token_id2, next_token_id3, comb_ids_per_batch, comb_scores_per_batch
-
-
-
-class DExpertsLlama:
-    def __init__(
-        self,
-        base_name: str,
-        expert_name: str,
-        antiexpert_name: str,
-        tokenizer_base, tokenizer_expert, tokenizer_anti,
-        system_prompt: str = None,
-        alpha: float = 1.0,
-        unite: bool = False, 
-        model_kwargs: Dict[str, Any] = None
-    ):
-        
-        self.antiexpert = None  # ensure it exists
-        self.tok_anti = None
-        
-        self.base = AutoModelForCausalLM.from_pretrained(
-            base_name, **model_kwargs
-        )
-        self.expert = AutoModelForCausalLM.from_pretrained(
-            expert_name, **model_kwargs
-        )
-        self.base.eval()
-        self.expert.eval()
-       
-        self.tok_base  = tokenizer_base
-        self.tok_exp   = tokenizer_expert
-        
-        if not unite:
-            self.antiexpert = AutoModelForCausalLM.from_pretrained(
-                antiexpert_name, **model_kwargs
-            )
-            self.antiexpert.eval()
-            self.tok_anti  = tokenizer_anti
-
-        self.alpha = alpha
-        self.device = self.base.device
-        self.system_prompt = system_prompt
-               
-
-    def forward(
-        self,
-        base_inputs,
-        expert_inputs,
-        antiexpert_inputs=None,
-        return_dict=None
-    ):
-        base_outputs = self.base(**base_inputs, return_dict=return_dict)
-        expert_outputs = self.expert(**expert_inputs, return_dict=return_dict)
-        if antiexpert_inputs is not None:
-            antiexpert_outputs = self.antiexpert(**antiexpert_inputs, return_dict=return_dict)
-            return base_outputs, expert_outputs, antiexpert_outputs
-
-        return base_outputs, expert_outputs
-    
-
-    def _get_chat_template_tokenized_chat_inputs(self, tokenizer, prompts):
-        """
-        Use tokenizer.apply_chat_template for models like Qwen-Instruct/Yi/Mistral-Instruct.
-        Returns: input_ids (tensor on self.device)
-        """
-        def _msgs(p):
-            if self.system_prompt:
-                return [{"role": "system", "content": self.system_prompt},
-                        {"role": "user", "content": p}]
-            return [{"role": "user", "content": p}]
-
-        rendered = [
-            tokenizer.apply_chat_template(_msgs(p), tokenize=False, add_generation_prompt=True)
-            for p in prompts
-        ]
-        chat_inputs = tokenizer(rendered, padding="longest", return_tensors="pt", add_special_tokens=True)
-        return chat_inputs.input_ids.to(self.device)
-
-    def _encode_plain_inputs(self, tokenizer, prompts):
-        """
-        Plain (non-chat) encoding with the given tokenizer.
-        Returns: input_ids (tensor on self.device)
-        """
-        enc = tokenizer(prompts, padding="longest", return_tensors="pt", add_special_tokens=True)
-        return enc.input_ids.to(self.device)
-    
-    def _update_model_kwargs_for_generation(
-        self,
-        outputs: ModelOutput,
-        kwargs: Dict[str, Any],
-    ) -> Dict[str, Any]:
-        # update past_key_values
-        kwargs["past_key_values"] = outputs.past_key_values
-
-        # update attention mask
-        if "attention_mask" in kwargs:
-            attention_mask = kwargs["attention_mask"]
-            kwargs["attention_mask"] = torch.cat(
-                [attention_mask, attention_mask.new_ones((attention_mask.shape[0], 1))], dim=-1
-            )
-        if getattr(outputs, "cache_position", None) is not None:
-        # some models already return it
-            kwargs["cache_position"] = outputs.cache_position
-        else:
-            if "cache_position" in kwargs:
-                kwargs["cache_position"] = kwargs["cache_position"] + 1
-            else:
-                # first step: position is sequence-length-1
-                seq_len = kwargs["attention_mask"].shape[1]
-                kwargs["cache_position"] = torch.arange(seq_len - 1, seq_len, device=kwargs["attention_mask"].device)
-
-        return kwargs
-    
-    def generate(
-        self,
-        input_ids: Optional[torch.Tensor] = None,
-        max_new_tokens: Optional[int] = 100,
-        do_sample: bool = False,
-        alpha: float = 1.0,
-        return_logits_for_analysis: bool = False,
-        score_type=None,
-        k=20,
-        unite: bool = False,
-        **kwargs
-    ):
-        base_kwargs = kwargs.copy()
-
-        # Decode to strings once using base tokenizer
-        prompts = self.tok_base.batch_decode(input_ids, skip_special_tokens=True)
-
-        if hasattr(self.tok_base, "apply_chat_template") and getattr(self.tok_base, "chat_template", None):
-            base_input_ids = self._get_chat_template_tokenized_chat_inputs(self.tok_base, prompts)
-        else:
-            base_input_ids = self._encode_plain_inputs(self.tok_base, prompts)
-
-        base_kwargs["attention_mask"] = torch.ones_like(base_input_ids, dtype=torch.long, device=base_input_ids.device)
-
-     
-        expert_kwargs = kwargs.copy()
-        expert_input_ids     = input_ids
-        
-        if hasattr(self.tok_exp, "apply_chat_template") and getattr(self.tok_exp, "chat_template", None):
-            expert_input_ids = self._get_chat_template_tokenized_chat_inputs(self.tok_exp, prompts)
-        else:
-            expert_input_ids = self._encode_plain_inputs(self.tok_exp, prompts)
-        
-        expert_kwargs['attention_mask']     = torch.ones_like(expert_input_ids,     dtype=torch.long, device=expert_input_ids.device)
-        
-
-        if not unite:  
-            antiexpert_kwargs = kwargs.copy()
-            antiexpert_input_ids = input_ids
-
-            if hasattr(self.tok_anti, "apply_chat_template") and getattr(self.tok_anti, "chat_template", None):
-                antiexpert_input_ids = self._get_chat_template_tokenized_chat_inputs(self.tok_anti, prompts)
-            else:
-                antiexpert_input_ids = self._encode_plain_inputs(self.tok_anti, prompts)
-            antiexpert_kwargs['attention_mask'] = torch.ones_like(antiexpert_input_ids, dtype=torch.long, device=antiexpert_input_ids.device)
-        
-       
-        # keep track of which sequences are already finished
-        unfinished_sequences = torch.ones(input_ids.shape[0], dtype=torch.long, device=input_ids.device)
-        eos_token_id_tensor = torch.tensor([self.tok_base.eos_token_id]).to(input_ids.device)
-        
-        T = max_new_tokens
-        if (not unite) and return_logits_for_analysis:
-            device = input_ids.device
-            # 1 x T buffers on GPU
-            p_dexperts = torch.empty(T, device=device, dtype=torch.bfloat16)
-            p_base     = torch.empty(T, device=device, dtype=torch.bfloat16)
-            p_expert   = torch.empty(T, device=device, dtype=torch.bfloat16)
-            p_anti     = torch.empty(T, device=device, dtype=torch.bfloat16)
-
-            preds_dexperts = torch.empty(T, device=device, dtype=torch.int32)
-            preds_base     = torch.empty(T, device=device, dtype=torch.int32)
-            preds_expert   = torch.empty(T, device=device, dtype=torch.int32)
-            preds_anti     = torch.empty(T, device=device, dtype=torch.int32)
-
-            token_ids_out  = torch.empty(T, device=device, dtype=torch.int32)
-            t_write = 0
-
-        for step in range(max_new_tokens):
-            
-            base_inputs = self.base.prepare_inputs_for_generation(base_input_ids, **base_kwargs)
-            expert_inputs = self.expert.prepare_inputs_for_generation(expert_input_ids, **expert_kwargs)
-            
-            
-            if unite:
-                base_outputs, expert_outputs = self.forward(
-                    base_inputs, expert_inputs, return_dict=True
-                )
-
-                base_next_token_logits = base_outputs.logits[..., -1, :]
-                expert_next_token_logits = expert_outputs.logits[..., -1, :]
-                v_base = self.get_top_k_tokens(base_next_token_logits, self.tok_base, k=k)
-                v_exp = self.get_top_k_tokens(expert_next_token_logits, self.tok_exp, k=k)
-
-                vu = get_union_vocab(v_base, v_exp)
-
-                v_base = update_vocab(v_base, vu, self.tok_base, base_next_token_logits,'qwen')
-                v_base = vocab_softmax(v_base)
-                v_exp = update_vocab(v_exp, vu, self.tok_exp, expert_next_token_logits,'llama')
-                v_exp = vocab_softmax(v_exp)
-
-                next_token, v_avg, next_token_id1, next_token_id2 = average_and_sample(v_base,v_exp,0.5, self.tok_base)
-            
-            else:
-                antiexpert_inputs = self.antiexpert.prepare_inputs_for_generation(antiexpert_input_ids, **antiexpert_kwargs)
-                base_outputs, expert_outputs, antiexpert_outputs = self.forward(
-                    base_inputs, expert_inputs, antiexpert_inputs, return_dict=True
-                )
-
-                base_next_token_logits = base_outputs.logits[..., -1, :]
-                expert_next_token_logits = expert_outputs.logits[..., -1, :]
-                antiexpert_next_token_logits = antiexpert_outputs.logits[..., -1, :]
- 
-                if score_type == "logprobs":
-                    base_next_token_logits  = F.log_softmax(base_outputs.logits[..., -1, :],  dim=-1)
-                    expert_next_token_logits = F.log_softmax(expert_outputs.logits[..., -1, :], dim=-1)
-                    antiexpert_next_token_logits = F.log_softmax(antiexpert_outputs.logits[..., -1, :], dim=-1)
-
-                v_base = self.get_top_k_tokens(base_next_token_logits, self.tok_base, k=k)
-                v_exp = self.get_top_k_tokens(expert_next_token_logits, self.tok_exp, k=0)
-                v_exp = update_vocab(v_exp, v_base, self.tok_exp, expert_next_token_logits,'llama')
-                v_anti = self.get_top_k_tokens(antiexpert_next_token_logits, self.tok_anti, k=0)
-                v_anti = update_vocab(v_anti, v_base, self.tok_anti, antiexpert_next_token_logits, 'llama')
-
-                next_token, next_token_id1, next_token_id2, next_token_id3, comb_ids, comb_scores = logit_add(v_base, v_exp, v_anti, self.tok_base, alpha, device=input_ids.device)
-                
-            next_tokens = torch.as_tensor(next_token_id1, device=input_ids.device, dtype=torch.long)
-               
-            input_ids      = torch.cat([input_ids,      next_tokens[:, None]], dim=-1)
-            base_input_ids = torch.cat([base_input_ids, next_tokens[:, None]], dim=-1)
-
-            exp_step_ids  = torch.as_tensor(next_token_id2,  device=expert_input_ids.device,   dtype=torch.long)
-            expert_input_ids     = torch.cat([expert_input_ids,     exp_step_ids[:,  None]], dim=-1)
-            
-            base_kwargs = self._update_model_kwargs_for_generation(base_outputs, base_kwargs)
-            expert_kwargs = self._update_model_kwargs_for_generation(expert_outputs, expert_kwargs)
-            
-            if not unite:
-                anti_step_ids = torch.as_tensor(next_token_id3, device=antiexpert_input_ids.device, dtype=torch.long)
-                antiexpert_input_ids = torch.cat([antiexpert_input_ids, anti_step_ids[:, None]], dim=-1)
-                antiexpert_kwargs= self._update_model_kwargs_for_generation(antiexpert_outputs,antiexpert_kwargs)
-
-            # if eos_token was found in one sentence, set sentence to finished
-            unfinished_sequences = unfinished_sequences.mul(
-                next_tokens.tile(eos_token_id_tensor.shape[0], 1).ne(eos_token_id_tensor.unsqueeze(1)).prod(dim=0)
-            )
-
-            # stop when each sentence is finished
-            if unfinished_sequences.max() == 0:
-                break
-                
-        if (not unite) and return_logits_for_analysis:
-            sl = slice(0, t_write)
-            results = [{
-                'token_ids':        token_ids_out[sl],     # [T’] int32 (GPU)
-                'p_dexperts':       p_dexperts[sl],        # [T’] fp16  (GPU)
-                'preds_dexperts':   preds_dexperts[sl],    # [T’] int32 (GPU)
-                'p_base':           p_base[sl],
-                'preds_base':       preds_base[sl],
-                'p_expert':         p_expert[sl],
-                'preds_expert':     preds_expert[sl],
-                'p_antiexpert':     p_anti[sl],
-                'preds_antiexpert': preds_anti[sl],
-                # (optional) decode later if you want strings
-            }]
-            return input_ids, results
-        return input_ids
-
-
-def ensure_dir(d):
-    if not os.path.exists(d):
-        os.makedirs(d, exist_ok=True)
-
-
-@torch.inference_mode()
-def generate_completions(
-    model,
-    tokenizer,
-    prompts,
-    batch_size=1,
-    add_special_tokens=True,
-    disable_tqdm=False,
-    return_logits_for_analysis=False,
-    score_type=None,
-    alpha=1.0,
-    k=20,
-    unite=False,
-    **generation_kwargs, 
-    
-):
-    generations = []
-    outputs = []
-    if not disable_tqdm:
-        progress = tqdm.tqdm(total=len(prompts), desc="Generating Completions")
-
-    num_return_sequences = generation_kwargs.get("num_return_sequences", 1)
-     
-    all_results = []
-    for i in range(0, len(prompts), batch_size):
-        batch_prompts = prompts[i:i+batch_size]
-        tokenized_prompts = tokenizer(
-            batch_prompts, padding="longest", return_tensors="pt", add_special_tokens=add_special_tokens
-        )
-        
-        # print ("tokenized_prompt: ", tokenized_prompts)
-        if hasattr(model, "device"):                 # DExpertsLlama
-            device = model.device
-            # print ("device = model.device")
-        else:                                        # vanilla HF model
-            device = next(model.parameters()).device
-            # print ("next(model.parameters()).devicedevice = next(model.parameters()).device")
-        batch_input_ids = tokenized_prompts['input_ids'].to(device)
-        attention_mask = tokenized_prompts['attention_mask'].to(device)
-        
-        batch_outputs = model.generate(
-                input_ids=batch_input_ids,
-                attention_mask=attention_mask,
-                alpha=alpha,
-                score_type=score_type,
-                k=k,
-                unite=unite,
-                **generation_kwargs
-        )
-        results = []
-        
-        # to support the logits processing below when using DExperts with mixed tokenizers
-        if isinstance(batch_input_ids, dict):
-            batch_input_ids = batch_input_ids['llama']
-
-        batch_outputs = tokenizer.batch_decode(batch_outputs, skip_special_tokens=True)
-        # print("batch_outputs: ", batch_outputs)
-        batch_prompts = tokenizer.batch_decode(batch_input_ids, skip_special_tokens=True)
-
-        # duplicate the prompts to match the number of return sequences
-        batch_prompts = [prompt for prompt in batch_prompts for _ in range(num_return_sequences)]
-        batch_generations = [
-            output[len(prompt):] for prompt, output in zip(batch_prompts, batch_outputs)
-        ]
-
-        generations += batch_generations
-
-        if not disable_tqdm:
-            progress.update(len(batch_prompts)//num_return_sequences)
-    # return generations, logits_for_analysis
-    return generations, all_results
-
-
-def add_pad_token(tokenizer, padding_side="left"):
-    if tokenizer.pad_token is None:
-        tokenizer.pad_token = tokenizer.eos_token
-        tokenizer.pad_token_id = tokenizer.eos_token_id
-    tokenizer.padding_side = padding_side
-    return tokenizer
-
-def load_dexperts_model_and_tokenizer(
-    base_name: str,
-    expert_name: str,
-    antiexpert_name: str,
-    device_map: str = "auto",
-    alpha: float = 1.0,
-    load_in_8bit: bool = False,
-    load_in_4bit: bool = False,
-    system_prompt: Optional[str] = None,
-    use_fast_tokenizer: bool = True,
-    padding_side: str = "left",
-    proxy_tune: bool = False,
-    unite: bool = False,
-):
-    
-    bnb_cfg = None
-
-    if load_in_8bit:
-        bnb_cfg = BitsAndBytesConfig(load_in_8bit=True, llm_int8_enable_fp32_cpu_offload=True)
-    
-    if load_in_4bit:
-        bnb_cfg = BitsAndBytesConfig(
-            load_in_4bit=True,
-            bnb_4bit_use_double_quant=True,
-            bnb_4bit_quant_type="nf4",         # {nf4, fp4}; nf4 is standard
-            bnb_4bit_compute_dtype=torch.bfloat16,  
-        )
-
-    model_kwargs = {
-        'device_map': device_map,
-        'torch_dtype': torch.bfloat16,
-        'quantization_config': bnb_cfg,
-        'low_cpu_mem_usage': True,
-        'trust_remote_code': True,
-    }
-    
-    
-    if "llama" in base_name and "chat" in base_name:
-        tok_base = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-7b-chat"], use_fast=use_fast_tokenizer)
-    else:
-        tok_base = AutoTokenizer.from_pretrained(MODEL_PATHS[base_name], use_fast=use_fast_tokenizer)
-        
-    if "llama" in expert_name and "chat" in expert_name:
-        tok_exp = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-7b-chat"], use_fast=use_fast_tokenizer)
-    elif "llama" in expert_name and "chat" not in expert_name:
-        tok_exp = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-13b-base"], use_fast=use_fast_tokenizer)
-    else:
-        tok_exp = AutoTokenizer.from_pretrained(MODEL_PATHS[expert_name], use_fast=use_fast_tokenizer)
-        
-    tok_base = add_pad_token(tok_base, padding_side)
-    tok_exp  = add_pad_token(tok_exp,  padding_side)
-    
-    if proxy_tune:
-        if "llama" in antiexpert_name and "chat" in antiexpert_name:
-            tok_anti = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-7b-chat"], use_fast=use_fast_tokenizer)
-        else:
-            tok_anti = AutoTokenizer.from_pretrained(MODEL_PATHS[antiexpert_name], use_fast=use_fast_tokenizer)
-
-        tok_anti = add_pad_token(tok_anti, padding_side)
-    
-    
-        model = DExpertsLlama(
-                base_name=MODEL_PATHS[base_name],
-                expert_name=MODEL_PATHS[expert_name],
-                antiexpert_name=MODEL_PATHS[antiexpert_name],
-                tokenizer_base=tok_base,
-                tokenizer_expert=tok_exp,
-                tokenizer_anti=tok_anti,
-                system_prompt=system_prompt,
-                alpha=alpha,
-                model_kwargs=model_kwargs,
-        )
-        print(f"[Loader] Base   : {MODEL_PATHS[base_name]}")
-        print(f"[Loader] Expert : {MODEL_PATHS[expert_name]}")
-        print(f"[Loader] Anti   : {MODEL_PATHS[antiexpert_name]}")
-        
-    elif unite: 
-        model = DExpertsLlama(
-                base_name=MODEL_PATHS[base_name],
-                expert_name=MODEL_PATHS[expert_name],
-                antiexpert_name="none",
-                tokenizer_base=tok_base,
-                tokenizer_expert=tok_exp,
-                tokenizer_anti="none",
-                system_prompt=system_prompt,
-                alpha=alpha,
-                unite=True,
-                model_kwargs=model_kwargs,
-        )
-        print(f"[Loader] Base   : {MODEL_PATHS[base_name]}")
-        print(f"[Loader] Expert : {MODEL_PATHS[expert_name]}")
-    
-    return model, tok_base
-
-
-def _safe_tag(model_name: str) -> str:
-    # e.g. "proxy_tuning/llama70b_mellama13bchat" -> "proxy_tuning_llama70b_mellama13bchat"
-    return model_name.replace("/", "_").replace(" ", "").replace(".", "").replace("-", "")
-
-def setup_run_dirs(model_name: str, root=LOCAL_RESULTS_DIR):
-    """
-    Creates:
-      <root>/<TAG>_<YYYYMMDD_HHMMSS>/
-          ├─ <TAG>_<YYYYMMDD_HHMMSS>.csv
-          └─ logits_analysis/
-    Returns: (run_dir, csv_path, logits_dir)
-    """
-    ensure_dir(root)
-    tag = _safe_tag(model_name)
-    stamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-    run_dir = os.path.join(root, f"{tag}_{stamp}")
-    ensure_dir(run_dir)
-
-    csv_name = f"{tag}_{stamp}.csv"
-    csv_path = os.path.join(run_dir, csv_name)
-    with open(csv_path, "w") as f:
-        f.write("timestamp,request_id,model_name,prompt,output,logits_path\n")
-
-    logits_dir = os.path.join(run_dir, "logits_analysis")
-    ensure_dir(logits_dir)
-
-    print(f"[TokenLog] created run dir: {run_dir}")
-    print(f"[TokenLog] csv: {csv_path}")
-    print(f"[TokenLog] logits dir: {logits_dir}")
-    return run_dir, csv_path, logits_dir
-
-
-def append_request_row(csv_path: str, request_id: str, model_name: str, prompt: str, output: str, logits_path: str | None):
-    ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-    def esc(s: str) -> str:
-        if s is None: return ""
-        return s.replace("\n", "\\n").replace(",", "&#44;")
-    with open(csv_path, "a") as f:
-        f.write(f"{ts},{request_id},{esc(model_name)},{esc(prompt)},{esc(output)},{esc(logits_path or '')}\n")
-
-        
-def load_base_model_and_tokenizer(
-    base_name: str,
-    device_map: str = "auto",
-    load_in_8bit: bool = False,
-    load_in_4bit: bool = False,
-    system_prompt: Optional[str] = None,
-    use_fast_tokenizer: bool = True,
-    padding_side: str = "left",
-):
-    bnb_cfg = None
-
-    if load_in_8bit:
-        bnb_cfg = BitsAndBytesConfig(load_in_8bit=True, llm_int8_enable_fp32_cpu_offload=True)
-    
-    if load_in_4bit:
-        bnb_cfg = BitsAndBytesConfig(
-            load_in_4bit=True,
-            bnb_4bit_use_double_quant=True,
-            bnb_4bit_quant_type="nf4",         # {nf4, fp4}; nf4 is standard
-            bnb_4bit_compute_dtype=torch.bfloat16,  
-        )
-
-    model_kwargs = {
-        'device_map': device_map,
-        'torch_dtype': torch.bfloat16,
-        'quantization_config': bnb_cfg,
-        'low_cpu_mem_usage': True,
-        'trust_remote_code': True,
-    }
-    
-    tok = AutoTokenizer.from_pretrained(MODEL_PATHS[base_name], use_fast=use_fast_tokenizer, trust_remote_code=True,)
-    tok = add_pad_token(tok, padding_side)
-
-    model = AutoModelForCausalLM.from_pretrained(
-        MODEL_PATHS[base_name],
-        device_map=device_map,
-        torch_dtype=torch.bfloat16,
-        quantization_config=bnb_cfg,
-        trust_remote_code=True,
-    )
-    model.eval()
-    return model, tok
-
-def get_chat_template_tokenized_chat_inputs(tokenizer, prompts):
-    """
-    Use tokenizer.apply_chat_template for models like Qwen/Yi/Mistral/Gemma-*.
-    Returns a BatchEncoding dict with 'input_ids' and 'attention_mask'.
-    """
-    def _msgs(p):
-        return [{"role": "user", "content": p}]
-
-    # Render to string first, then tokenize → BatchEncoding (dict-like)
-    rendered = [
-        tokenizer.apply_chat_template(
-            _msgs(p),
-            tokenize=False,                # <-- important
-            add_generation_prompt=True
-        )
-        for p in prompts
-    ]
-    enc = tokenizer(
-        rendered,
-        padding=True,
-        return_tensors="pt",
-        add_special_tokens=True
-    )
-    return enc
-
-def base_generate_completions(
-    model,
-    tokenizer,
-    prompts,
-    max_new_tokens=600,
-    do_sample=False,
-):
-    import torch
-    model.eval()
-    
-    # if chat template 
-    if hasattr(tokenizer, "apply_chat_template") and getattr(tokenizer, "chat_template", None):
-        enc = get_chat_template_tokenized_chat_inputs(tokenizer, prompts)
-    else: 
-        enc = tokenizer(
-            prompts,
-            return_tensors="pt",
-            padding=True,
-            truncation=True,
-        )
-    # Ensure pad token is set
-    if tokenizer.pad_token_id is None and tokenizer.eos_token_id is not None:
-        tokenizer.pad_token_id = tokenizer.eos_token_id
-
-    # Move to model device
-    enc = {k: v.to(model.device) for k, v in enc.items()}
-
-    with torch.no_grad():
-        gen_ids = model.generate(
-            **enc,
-            max_new_tokens=max_new_tokens,
-            do_sample=do_sample,
-            eos_token_id=tokenizer.eos_token_id,
-            pad_token_id=tokenizer.pad_token_id,
-        )
-
-    # Slice off the prompt portion
-    prompt_len = enc["input_ids"].shape[1]
-    new_tokens = gen_ids[:, prompt_len:]
-
-    decoded = tokenizer.batch_decode(new_tokens, skip_special_tokens=True)
-
-    predicted_labels = decoded          
-    all_results = None                  
-
-    return  predicted_labels, all_results
-
-    
-    
-
-class ProxyTuningClient(Client):
-    """
-    A HELM client that uses ProxyTuning for inference instead of directly calling the model.
-    """
-
-    def __init__(
-        self,
-        tokenizer: Tokenizer,
-        tokenizer_name: str,
-        cache_config: CacheConfig,
-        model_name: str = None,
-        api_base: str = None,
-        api_key: str = None,
-    ):
-        self.cache = Cache(cache_config)
-        """
-        Initializes the ProxyTuningClient.
-
-        Args:
-            tokenizer (Tokenizer): Tokenizer instance (unused but required by HELM interface).
-            tokenizer_name (str): Name of the tokenizer (unused but required by HELM interface).
-            cache_config (CacheConfig): Configuration for caching.
-
-        """
-        self.run_dir, self.token_log_path, self.logits_dir = setup_run_dirs(model_name)
-        self.model_name = model_name
-        self.run_id = datetime.now().strftime("%Y%m%d_%H%M%S")
-        self.req_seq = 0
-        tag = model_name.split("/")[-1]
-        # strip optional "proxy_tuning_" prefix
-        if tag.startswith("proxy_tuning_"):
-            tag = tag[len("proxy_tuning_"):]
-
-        parts = tag.split("_")
-        base_name, expert_name, antiexpert_name, self.alpha, self.score_type, k_str  = (
-            parts[0],
-            parts[1],
-            parts[2],
-            float(parts[3]),
-            parts[4],
-            parts[5]
-        )
-        self.k = int(k_str)
-        
-        self.is_unite = False
-        self.is_proxy = False
-        if expert_name != "none":
-            if antiexpert_name == "none":
-                self.is_unite = True
-            else:
-                self.is_proxy = True
-
-        print("mn:", model_name)
-        print("tag:", tag)
-        print("b: ", base_name)
-        print("Ex:", expert_name)
-        print("ax", antiexpert_name)
-        print(self.alpha)
-        print(self.score_type)
-        print(self.k)
-        print("proxy: ", self.is_proxy)
-        print("unite: ", self.is_unite)
-        
-        if self.is_proxy: 
-            self.model, self.hf_tokenizer = load_dexperts_model_and_tokenizer(
-                    base_name=base_name,
-                    expert_name=expert_name,
-                    antiexpert_name=antiexpert_name,
-                    load_in_8bit=False,
-                    load_in_4bit=True,
-                    use_fast_tokenizer=True,
-                    system_prompt=None,
-                    device_map='auto', 
-                    proxy_tune=self.is_proxy
-            )
-        elif self.is_unite:
-            self.model, self.hf_tokenizer = load_dexperts_model_and_tokenizer(
-                    base_name=base_name,
-                    expert_name=expert_name,
-                    antiexpert_name=antiexpert_name,
-                    load_in_8bit=False,
-                    load_in_4bit=True,
-                    use_fast_tokenizer=True,
-                    system_prompt=None,
-                    device_map='auto', 
-                    proxy_tune=self.is_proxy, 
-                    unite=self.is_unite
-            )
-            
-        else:
-            self.model, self.hf_tokenizer = load_base_model_and_tokenizer(
-                base_name=base_name,
-                load_in_4bit=False,
-                device_map="auto",
-                use_fast_tokenizer=True,
-            )
-    
-    def make_request(self, request: Request) -> RequestResult:
-        """
-        Handles a request by sending the prompt 
-
-        Args:
-            request (Request): The request object containing the prompt.
-
-        Returns:
-            RequestResult: A HELM-compatible response object.
-        """
-        prompt_text = request.prompt
-
-        if request.messages:
-            prompt_text = " ".join(msg["content"] for msg in request.messages if msg.get("role") != "system")
-
-        
-        print("prompt_text: ", prompt_text)
-        prompts = [prompt_text]
-         # turn prompt into a [] 
-        if self.is_proxy or self.is_unite: 
-            predicted_labels, all_results = generate_completions(
-                model=self.model,
-                tokenizer=self.hf_tokenizer,
-                prompts=prompts,
-                max_new_tokens=600,       
-                do_sample=False,        
-                num_return_sequences=1,
-                alpha=self.alpha,
-                k=self.k,
-                score_type=self.score_type,
-                unite=self.is_unite,
-                return_logits_for_analysis=False, 
-            )
-        else: 
-            predicted_labels, all_results = base_generate_completions(
-                model=self.model,
-                tokenizer=self.hf_tokenizer,
-                prompts=prompts,
-                max_new_tokens=600,
-                do_sample=False,
-            )
-
-            
-        output_text = predicted_labels[0]
-        print("output_text: ", output_text)
-        
-        self.req_seq += 1
-        request_id = f"{self.run_id}_r{self.req_seq:04d}"
-
-        logits_path = None
-        if self.is_proxy and all_results:
-            logits_path = os.path.join(self.logits_dir, f"logits_{request_id}.pt")
-            torch.save(all_results, logits_path)
-            print(f"[Logits] wrote {logits_path}")
-
-        append_request_row(
-            csv_path=self.token_log_path,
-            request_id=request_id,
-            model_name=self.model_name,
-            prompt=prompt_text,
-            output=output_text,
-            logits_path=logits_path,
-        )
-        
-        # Return a HELM-compatible RequestResult
-        output = GeneratedOutput(text=output_text, logprob=0.0, tokens=[])
-        return RequestResult(success=True, cached=False, completions=[output], embedding=[])

From bea4a9d61e477ec76e7762eef93d4d90e0799bef Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Tue, 21 Oct 2025 20:49:43 -0700
Subject: [PATCH 08/42] Rename proxy_tuning_client (2).py to
 proxy_tuning_client.py

---
 .../{proxy_tuning_client (2).py => proxy_tuning_client.py}        | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename src/helm/clients/{proxy_tuning_client (2).py => proxy_tuning_client.py} (100%)

diff --git a/src/helm/clients/proxy_tuning_client (2).py b/src/helm/clients/proxy_tuning_client.py
similarity index 100%
rename from src/helm/clients/proxy_tuning_client (2).py
rename to src/helm/clients/proxy_tuning_client.py

From fc4d6b1d9f54a8473d026001339f52c77aecf27d Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Tue, 21 Oct 2025 20:51:54 -0700
Subject: [PATCH 09/42] Add files via upload

---
 ..._entries_medhelm_private_proxy_tuning.conf | 192 ++++++++++++++++++
 1 file changed, 192 insertions(+)
 create mode 100644 src/helm/benchmark/presentation/run_entries_medhelm_private_proxy_tuning.conf

diff --git a/src/helm/benchmark/presentation/run_entries_medhelm_private_proxy_tuning.conf b/src/helm/benchmark/presentation/run_entries_medhelm_private_proxy_tuning.conf
new file mode 100644
index 00000000000..99c7e36968c
--- /dev/null
+++ b/src/helm/benchmark/presentation/run_entries_medhelm_private_proxy_tuning.conf
@@ -0,0 +1,192 @@
+# MedHELM RunSpecs for the private benchmarks from Stanford.
+
+entries: [
+
+  ########## Clinical Decision Support ##########
+
+  ### Supporting Diagnostic Decisions ###
+
+  #Alcohol Dependence
+  {description: "clear:condition=alcohol_dependence,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=alcohol_dependence,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=alcohol_dependence,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=alcohol_dependence,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=alcohol_dependence,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=alcohol_dependence,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=alcohol_dependence,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+
+  #Attention Deficit Hyperactivity Disorder
+  {description: "clear:condition=attention_deficit_hyperactivity_disorder,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=attention_deficit_hyperactivity_disorder,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=attention_deficit_hyperactivity_disorder,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=attention_deficit_hyperactivity_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=attention_deficit_hyperactivity_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=attention_deficit_hyperactivity_disorder,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=attention_deficit_hyperactivity_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+
+  #Bipolar Disorder
+  {description: "clear:condition=bipolar_disorder,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=bipolar_disorder,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=bipolar_disorder,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=bipolar_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=bipolar_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=bipolar_disorder,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=bipolar_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+
+  #Chronic Pain
+  {description: "clear:condition=chronic_pain,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=chronic_pain,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=chronic_pain,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=chronic_pain,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=chronic_pain,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=chronic_pain,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=chronic_pain,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+
+  #Homelessness
+  {description: "clear:condition=homelessness,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=homelessness,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=homelessness,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=homelessness,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=homelessness,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=homelessness,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=homelessness,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+
+  #Liver Disease
+  {description: "clear:condition=liver_disease,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=liver_disease,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=liver_disease,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=liver_disease,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=liver_disease,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=liver_disease,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=liver_disease,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+
+  #Major Depression
+  {description: "clear:condition=major_depression,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=major_depression,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=major_depression,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=major_depression,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=major_depression,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=major_depression,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=major_depression,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+
+  #Personality Disorder
+  {description: "clear:condition=personality_disorder,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=personality_disorder,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=personality_disorder,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=personality_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=personality_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=personality_disorder,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=personality_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+
+  #Post-Traumatic Stress Disorder
+  {description: "clear:condition=post_traumatic_stress_disorder,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=post_traumatic_stress_disorder,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=post_traumatic_stress_disorder,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=post_traumatic_stress_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=post_traumatic_stress_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=post_traumatic_stress_disorder,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=post_traumatic_stress_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+
+  #Substance Use Disorder
+  {description: "clear:condition=substance_use_disorder,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=substance_use_disorder,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=substance_use_disorder,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=substance_use_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=substance_use_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=substance_use_disorder,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=substance_use_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+
+  #Suicidal Behavior
+  {description: "clear:condition=suicidal_behavior,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=suicidal_behavior,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=suicidal_behavior,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=suicidal_behavior,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=suicidal_behavior,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=suicidal_behavior,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=suicidal_behavior,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+
+  #Tobacco Dependence
+  {description: "clear:condition=tobacco_dependence,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=tobacco_dependence,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=tobacco_dependence,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=tobacco_dependence,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=tobacco_dependence,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=tobacco_dependence,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=tobacco_dependence,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+
+  #Unemployment
+  {description: "clear:condition=unemployment,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=unemployment,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=unemployment,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=unemployment,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=unemployment,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=unemployment,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=unemployment,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+
+  ### Planning Treatments ###
+
+  ### Predicting Patient Risks and Outcomes ###
+
+
+  ########## Clinical Note Generation ##########
+
+  ### Documenting Patient Visits ###
+
+  ### Recording Procedures ###
+
+  ### Documenting Diagnostic Reports ###
+
+  ### Documenting Care Plans ###
+  {description: "chw_care_plan:model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/datasets/CHW_Dataset.csv", priority: 1},
+  {description: "chw_care_plan:model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/datasets/CHW_Dataset.csv", priority: 1},
+  {description: "chw_care_plan:model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/datasets/CHW_Dataset.csv", priority: 1},
+  {description: "chw_care_plan:model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/datasets/CHW_Dataset.csv", priority: 1},
+  {description: "chw_care_plan:model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/datasets/CHW_Dataset.csv", priority: 1},
+  {description: "chw_care_plan:model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/datasets/CHW_Dataset.csv", priority: 1},
+  {description: "chw_care_plan:model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/datasets/CHW_Dataset.csv", priority: 1},
+
+  ########## Patient Communication and Education ##########
+
+  ### Providing Patient Education Resources ###
+
+  ### Delivering Personalized Care Instructions ###
+  {description: "starr_patient_instructions:model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/starr-omop-personalized-care-instr/dataset_cases_qc.csv", priority: 1},
+  {description: "starr_patient_instructions:model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/starr-omop-personalized-care-instr/dataset_cases_qc.csv", priority: 1},
+  {description: "starr_patient_instructions:model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/starr-omop-personalized-care-instr/dataset_cases_qc.csv", priority: 1},
+  {description: "starr_patient_instructions:model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/starr-omop-personalized-care-instr/dataset_cases_qc.csv", priority: 1},
+  {description: "starr_patient_instructions:model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/starr-omop-personalized-care-instr/dataset_cases_qc.csv", priority: 1},
+  {description: "starr_patient_instructions:model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/starr-omop-personalized-care-instr/dataset_cases_qc.csv", priority: 1},
+  {description: "starr_patient_instructions:model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/starr-omop-personalized-care-instr/dataset_cases_qc.csv", priority: 1},
+
+  ### Patient-Provider Messaging ###
+
+
+  ### Enhancing Patient Understanding and Accessibility in Health Communication ###
+
+  ### Facilitating Patient Engagement and Support ###
+
+  ########## Medical Research Assistance ##########
+
+  ### Conducting Literature Research ###
+
+  ### Analyzing Clinical Research Data ###
+
+  ### Recording Research Processes ###
+
+
+  ### Ensuring Clinical Research Quality ###
+
+  ### Managing Research Enrollment ###
+
+  ########## Administration and Workflow ##########
+
+  ### Scheduling Resources and Staff ###
+
+
+  ### Overseeing Financial Activities ###
+
+  ### Care Coordination and Planning ###
+
+  ### Organizing Workflow Processes ###
+
+]

From 430fb519e5ae916ba20f7d66554c86a76ffd5d81 Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Tue, 21 Oct 2025 20:54:09 -0700
Subject: [PATCH 10/42] Add files via upload

---
 prod_env/model_deployments.yaml | 4732 +++++++++++++++++++++++++++++
 prod_env/model_metadata.yaml    | 4934 +++++++++++++++++++++++++++++++
 prod_env/tokenizer_configs.yaml | 1287 ++++++++
 3 files changed, 10953 insertions(+)
 create mode 100644 prod_env/model_deployments.yaml
 create mode 100644 prod_env/model_metadata.yaml
 create mode 100644 prod_env/tokenizer_configs.yaml

diff --git a/prod_env/model_deployments.yaml b/prod_env/model_deployments.yaml
new file mode 100644
index 00000000000..73e704ac21e
--- /dev/null
+++ b/prod_env/model_deployments.yaml
@@ -0,0 +1,4732 @@
+# This file defines all the model deployments that are supported by the Helm API.
+# Some models have several deployments, each with different parameters.
+
+# If you want to add a new deployment, you can technically do it here but we recommend
+# you to do it in prod_env/model_deployments.yaml instead.
+
+# Follow the template of this file to add a new deployment. You can copy paste this to get started:
+#    # This file defines all the model deployments that you do not want to be public.
+#    model_deployments: [] # Leave empty to disable private model deployments
+
+model_deployments:
+  - name: simple/model1
+    model_name: simple/model1
+    tokenizer_name: simple/tokenizer1
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.simple_client.SimpleClient"
+
+  # Stanford Health Care
+  # For internal use only for MedHELM
+  # Placed earlier in the file to make them non-default
+  - name: stanfordhealthcare/claude-3-5-sonnet-20241022
+    model_name: anthropic/claude-3-5-sonnet-20241022
+    tokenizer_name: anthropic/claude
+    max_sequence_length: 200000
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_claude_client.StanfordHealthCareClaudeClient"
+      args:
+        model: anthropic.claude-3-5-sonnet-20241022-v2:0
+        deployment: Claude35Sonnetv2/awssig4fa
+  
+  - name: stanfordhealthcare/claude-3-7-sonnet-20250219
+    model_name: anthropic/claude-3-7-sonnet-20250219
+    tokenizer_name: anthropic/claude
+    max_sequence_length: 200000
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_claude_client.StanfordHealthCareClaudeClient"
+      args:
+        model: arn:aws:bedrock:us-west-2:679683451337:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0
+        deployment: awssig4claude37/aswsig4claude37
+
+  - name: stanfordhealthcare/gemini-1.5-pro-001
+    model_name: google/gemini-1.5-pro-001
+    tokenizer_name: google/gemma-2b
+    max_sequence_length: 1000000
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_google_client.StanfordHealthCareGoogleClient"
+      args:
+        deployment: gcpgemini/apim-gcp-oauth-fa
+
+  - name: stanfordhealthcare/gemini-2.0-flash-001
+    model_name: google/gemini-2.0-flash-001
+    tokenizer_name: google/gemma-2b
+    max_sequence_length: 1000000
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_google_client.StanfordHealthCareGoogleClient"
+      args:
+        deployment: gcp-gem20flash-fa/apim-gcp-gem20flash-fa
+
+  - name: stanfordhealthcare/gpt-4o-mini-2024-07-18
+    model_name: openai/gpt-4o-mini-2024-07-18
+    tokenizer_name: openai/o200k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_azure_openai_client.StanfordHealthCareAzureOpenAIClient"
+      args:
+        openai_model_name: gpt-4o-mini
+        api_version: 2023-05-15
+
+  - name: stanfordhealthcare/gpt-4o-2024-05-13
+    model_name: openai/gpt-4o-2024-05-13
+    tokenizer_name: openai/o200k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_azure_openai_client.StanfordHealthCareAzureOpenAIClient"
+      args:
+        openai_model_name: gpt-4o
+        api_version: 2023-05-15
+        base_url: "https://apim.stanfordhealthcare.org/openai3/deployments/"
+
+  
+  - name: stanfordhealthcare/gpt-4-0613
+    model_name: openai/gpt-4-0613
+    tokenizer_name: openai/o200k_base
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_azure_openai_client.StanfordHealthCareAzureOpenAIClient"
+      args:
+        openai_model_name: gpt-4
+        api_version: 2023-05-15
+
+  - name: stanfordhealthcare/gpt-4-turbo-2024-04-09
+    model_name: openai/gpt-4-turbo-2024-04-09
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_azure_openai_client.StanfordHealthCareAzureOpenAIClient"
+      args:
+        openai_model_name: gpt-4-turbo
+        api_version: 2023-05-15
+
+  - name: stanfordhealthcare/gpt-4.1-2025-04-14
+    model_name: openai/gpt-4.1-2025-04-14
+    tokenizer_name: openai/o200k_base
+    max_sequence_length: 1047576
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_azure_openai_client.StanfordHealthCareAzureOpenAIClient"
+      args:
+        openai_model_name: gpt-4.1
+        api_version: 2025-01-01-preview
+        base_url: "{endpoint}/openai-eastus2"
+
+  - name: stanfordhealthcare/o3-mini-2025-01-31
+    model_name: openai/o3-mini-2025-01-31
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 200000
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_azure_openai_client.StanfordHealthCareAzureOpenAIClient"
+      args:
+        openai_model_name: o3-mini
+        api_version: 2024-12-01-preview
+        base_url: "{endpoint}/openai-eastus2"
+
+  - name: stanfordhealthcare/o1-2024-12-17
+    model_name: openai/o1-2024-12-17
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_azure_openai_client.StanfordHealthCareAzureOpenAIClient"
+      args:
+        openai_model_name: o1
+        api_version: 2024-12-01-preview
+        base_url: "{endpoint}/openai-eastus2"
+
+  - name: stanfordhealthcare/deepseek-r1
+    model_name: deepseek-ai/deepseek-r1
+    tokenizer_name: deepseek-ai/deepseek-r1
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_openai_client.StanfordHealthCareOpenAIClient"
+      args:
+        openai_model_name: deepseek-chat
+        output_processor: helm.benchmark.metrics.output_processors.remove_deepseek_r1_thinking
+        base_url: "{endpoint}/deepseekr1/v1"
+
+  - name: stanfordhealthcare/llama-3.3-70b-instruct
+    model_name: meta/llama-3.3-70b-instruct
+    tokenizer_name: meta/llama-3.3-70b-instruct
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_openai_client.StanfordHealthCareOpenAIClient"
+      args:
+        base_url: "{endpoint}/llama3370b/v1"
+
+  - name: stanfordhealthcare/llama-4-scout-17b-16e-instruct
+    model_name: meta/llama-4-scout-17b-16e-instruct
+    tokenizer_name: meta/llama-4-scout-17b-16e-instruct
+    max_sequence_length: 327680
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_openai_client.StanfordHealthCareOpenAIClient"
+      args:
+        base_url: "{endpoint}/llama4-scout/v1"
+  
+  - name: stanfordhealthcare/llama-4-maverick-17b-128e-instruct-fp8
+    model_name: meta/llama-4-maverick-17b-128e-instruct-fp8
+    tokenizer_name: meta/llama-4-scout-17b-16e-instruct
+    max_sequence_length: 524288
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_openai_client.StanfordHealthCareOpenAIClient"
+      args:
+        base_url: "{endpoint}/llama4-maverick/v1"
+
+  - name: stanfordhealthcare/phi-3.5-mini-instruct
+    model_name: microsoft/phi-3.5-mini-instruct
+    tokenizer_name: microsoft/phi-3.5-mini-instruct
+    max_sequence_length: 131072
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_openai_client.StanfordHealthCareOpenAIClient"
+      args:
+        base_url: "{endpoint}/phi35mi/v1"
+
+  - name: stanfordhealthcare_shc/gpt-4o-2024-05-13
+    model_name: openai/gpt-4o-2024-05-13
+    tokenizer_name: openai/o200k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_shc_openai_client.StanfordHealthCareSHCOpenAIClient"
+      deployment: gpt-4o
+
+  - name: stanfordhealthcare_shc/gpt-4o-mini-2024-07-18
+    model_name: openai/gpt-4o-mini-2024-07-18
+    tokenizer_name: openai/o200k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_shc_openai_client.StanfordHealthCareSHCOpenAIClient"
+      deployment: gpt-4o-mini
+  
+  - name: stanfordhealthcare_shc/gpt-4-turbo-2024-04-09
+    model_name: openai/gpt-4-turbo-2024-04-09
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_shc_openai_client.StanfordHealthCareSHCOpenAIClient"
+      deployment: gpt-4-turbo-2024-04-09
+
+  - name: stanfordhealthcare/claude-3-5-sonnet-20241022
+    model_name: anthropic/claude-3-5-sonnet-20241022
+    tokenizer_name: anthropic/claude
+    max_sequence_length: 200000
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_claude_client.StanfordHealthCareClaudeClient"
+      args:
+        model: anthropic.claude-3-5-sonnet-20241022-v2:0
+        deployment: Claude35Sonnetv2/awssig4fa
+
+  - name: stanfordhealthcare/claude-3-7-sonnet-20250219
+    model_name: anthropic/claude-3-7-sonnet-20250219
+    tokenizer_name: anthropic/claude
+    max_sequence_length: 200000
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_claude_client.StanfordHealthCareClaudeClient"
+      args:
+        model: arn:aws:bedrock:us-west-2:679683451337:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0
+        deployment: awssig4claude37/aswsig4claude37
+
+  - name: stanfordhealthcare/gemini-1.5-pro-001
+    model_name: google/gemini-1.5-pro-001
+    tokenizer_name: google/gemma-2b
+    max_sequence_length: 1000000
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_google_client.StanfordHealthCareGoogleClient"
+      args:
+        deployment: gcpgemini/apim-gcp-oauth-fa
+
+  - name: stanfordhealthcare/gemini-2.0-flash-001
+    model_name: google/gemini-2.0-flash-001
+    tokenizer_name: google/gemma-2b
+    max_sequence_length: 1000000
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_google_client.StanfordHealthCareGoogleClient"
+      args:
+        deployment: gcp-gem20flash-fa/apim-gcp-gem20flash-fa
+
+  - name: stanfordhealthcare/gpt-4o-mini-2024-07-18
+    model_name: openai/gpt-4o-mini-2024-07-18
+    tokenizer_name: openai/o200k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_azure_openai_client.StanfordHealthCareAzureOpenAIClient"
+      args:
+        openai_model_name: gpt-4o-mini
+        api_version: 2023-05-15
+
+  - name: stanfordhealthcare/gpt-4o-2024-05-13
+    model_name: openai/gpt-4o-2024-05-13
+    tokenizer_name: openai/o200k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_azure_openai_client.StanfordHealthCareAzureOpenAIClient"
+      args:
+        openai_model_name: gpt-4o
+        api_version: 2023-05-15
+
+  - name: stanfordhealthcare/gpt-4-0613
+    model_name: openai/gpt-4-0613
+    tokenizer_name: openai/o200k_base
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_azure_openai_client.StanfordHealthCareAzureOpenAIClient"
+      args:
+        openai_model_name: gpt-4
+        api_version: 2023-05-15
+
+  - name: stanfordhealthcare/gpt-4-turbo-2024-04-09
+    model_name: openai/gpt-4-turbo-2024-04-09
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_azure_openai_client.StanfordHealthCareAzureOpenAIClient"
+      args:
+        openai_model_name: gpt-4-turbo
+        api_version: 2023-05-15
+
+  - name: stanfordhealthcare/gpt-4.1-2025-04-14
+    model_name: openai/gpt-4.1-2025-04-14
+    tokenizer_name: openai/o200k_base
+    max_sequence_length: 1047576
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_azure_openai_client.StanfordHealthCareAzureOpenAIClient"
+      args:
+        openai_model_name: gpt-4.1
+        api_version: 2025-01-01-preview
+        base_url: "{endpoint}/openai-eastus2"
+
+  - name: stanfordhealthcare/o3-mini-2025-01-31
+    model_name: openai/o3-mini-2025-01-31
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 200000
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_azure_openai_client.StanfordHealthCareAzureOpenAIClient"
+      args:
+        openai_model_name: o3-mini
+        api_version: 2024-12-01-preview
+        base_url: "{endpoint}/openai-eastus2"
+
+  - name: stanfordhealthcare/o1-2024-12-17
+    model_name: openai/o1-2024-12-17
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_azure_openai_client.StanfordHealthCareAzureOpenAIClient"
+      args:
+        openai_model_name: o1
+        api_version: 2024-12-01-preview
+        base_url: "{endpoint}/openai-eastus2"
+
+  - name: stanfordhealthcare/deepseek-r1
+    model_name: deepseek-ai/deepseek-r1
+    tokenizer_name: deepseek-ai/deepseek-r1
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_openai_client.StanfordHealthCareOpenAIClient"
+      args:
+        openai_model_name: deepseek-chat
+        output_processor: helm.benchmark.metrics.output_processors.remove_deepseek_r1_thinking
+        base_url: "{endpoint}/deepseekr1/v1"
+
+  - name: stanfordhealthcare/llama-3.3-70b-instruct
+    model_name: meta/llama-3.3-70b-instruct
+    tokenizer_name: meta/llama-3.3-70b-instruct
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_openai_client.StanfordHealthCareOpenAIClient"
+      args:
+        base_url: "{endpoint}/llama3370b/v1"
+
+  - name: stanfordhealthcare/llama-4-scout-17b-16e-instruct
+    model_name: meta/llama-4-scout-17b-16e-instruct
+    tokenizer_name: meta/llama-4-scout-17b-16e-instruct
+    max_sequence_length: 327680
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_openai_client.StanfordHealthCareOpenAIClient"
+      args:
+        base_url: "{endpoint}/llama4-scout/v1"
+
+  - name: stanfordhealthcare/llama-4-maverick-17b-128e-instruct-fp8
+    model_name: meta/llama-4-maverick-17b-128e-instruct-fp8
+    tokenizer_name: meta/llama-4-scout-17b-16e-instruct
+    max_sequence_length: 524288
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_openai_client.StanfordHealthCareOpenAIClient"
+      args:
+        base_url: "{endpoint}/llama4-maverick/v1"
+
+  - name: stanfordhealthcare/phi-3.5-mini-instruct
+    model_name: microsoft/phi-3.5-mini-instruct
+    tokenizer_name: microsoft/phi-3.5-mini-instruct
+    max_sequence_length: 131072
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_openai_client.StanfordHealthCareOpenAIClient"
+      args:
+        base_url: "{endpoint}/phi35mi/v1"
+
+  - name: stanfordhealthcare_shc/gpt-4o-2024-05-13
+    model_name: openai/gpt-4o-2024-05-13
+    tokenizer_name: openai/o200k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_shc_openai_client.StanfordHealthCareSHCOpenAIClient"
+      deployment: gpt-4o
+
+  - name: stanfordhealthcare_shc/gpt-4o-mini-2024-07-18
+    model_name: openai/gpt-4o-mini-2024-07-18
+    tokenizer_name: openai/o200k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_shc_openai_client.StanfordHealthCareSHCOpenAIClient"
+      deployment: gpt-4o-mini
+
+  - name: stanfordhealthcare_shc/gpt-4-turbo-2024-04-09
+    model_name: openai/gpt-4-turbo-2024-04-09
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.stanfordhealthcare_shc_openai_client.StanfordHealthCareSHCOpenAIClient"
+      deployment: gpt-4-turbo-2024-04-09
+
+  # Adobe
+  - name: adobe/giga-gan
+    model_name: adobe/giga-gan
+    tokenizer_name: openai/clip-vit-large-patch14
+    max_sequence_length: 75
+    client_spec:
+      class_name: "helm.clients.image_generation.adobe_vision_client.AdobeVisionClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
+
+  # AI21 Labs
+
+  - name: ai21/j2-large
+    model_name: ai21/j2-large
+    tokenizer_name: ai21/j2-tokenizer
+    max_sequence_length: 2047
+    client_spec:
+      class_name: "helm.clients.ai21_client.AI21Client"
+
+  - name: ai21/j2-grande
+    model_name: ai21/j2-grande
+    tokenizer_name: ai21/j2-tokenizer
+    max_sequence_length: 2047
+    client_spec:
+      class_name: "helm.clients.ai21_client.AI21Client"
+
+  - name: ai21/j2-jumbo
+    model_name: ai21/j2-jumbo
+    tokenizer_name: ai21/j2-tokenizer
+    max_sequence_length: 6000
+    client_spec:
+      class_name: "helm.clients.ai21_client.AI21Client"
+
+  - name: ai21/jamba-instruct
+    model_name: ai21/jamba-instruct
+    tokenizer_name: ai21/jamba-instruct-tokenizer
+    max_sequence_length: 256000
+    client_spec:
+      class_name: "helm.clients.ai21_client.AI21ChatClient"
+
+  - name: ai21/jamba-1.5-mini
+    model_name: ai21/jamba-1.5-mini
+    tokenizer_name: ai21/jamba-1.5-mini-tokenizer
+    max_sequence_length: 256000
+    client_spec:
+      class_name: "helm.clients.ai21_client.AI21ChatClient"
+
+  - name: ai21/jamba-1.5-large
+    model_name: ai21/jamba-1.5-large
+    tokenizer_name: ai21/jamba-1.5-large-tokenizer
+    max_sequence_length: 256000
+    client_spec:
+      class_name: "helm.clients.ai21_client.AI21ChatClient"
+
+  # Aleph Alpha
+  - name: AlephAlpha/luminous-base
+    model_name: AlephAlpha/luminous-base
+    tokenizer_name: AlephAlpha/luminous-base
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.aleph_alpha_client.AlephAlphaClient"
+
+  - name: AlephAlpha/luminous-extended
+    model_name: AlephAlpha/luminous-extended
+    tokenizer_name: AlephAlpha/luminous-extended
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.aleph_alpha_client.AlephAlphaClient"
+
+  - name: AlephAlpha/luminous-supreme
+    model_name: AlephAlpha/luminous-supreme
+    tokenizer_name: AlephAlpha/luminous-supreme
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.aleph_alpha_client.AlephAlphaClient"
+
+  # TODO: Add luminous-world once it is released
+
+  - name: AlephAlpha/m-vader
+    model_name: AlephAlpha/m-vader
+    tokenizer_name: openai/clip-vit-large-patch14
+    max_sequence_length: 75
+    client_spec:
+      class_name: "helm.clients.image_generation.aleph_alpha_image_generation_client.AlephAlphaImageGenerationClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
+
+
+  # Amazon nova models
+  - name: amazon/nova-premier-v1:0
+    model_name: amazon/nova-premier-v1:0
+    tokenizer_name: huggingface/gpt2
+    max_sequence_length: 1000000
+    client_spec:
+      class_name: "helm.clients.bedrock_client.BedrockNovaClient"
+      args:
+        bedrock_model_id: us.amazon.nova-premier-v1:0
+
+  - name: amazon/nova-pro-v1:0
+    model_name: amazon/nova-pro-v1:0
+    tokenizer_name: huggingface/gpt2
+    max_sequence_length: 300000
+    client_spec:
+      class_name: "helm.clients.bedrock_client.BedrockNovaClient"
+
+  - name: amazon/nova-lite-v1:0
+    model_name: amazon/nova-lite-v1:0
+    tokenizer_name: huggingface/gpt2
+    max_sequence_length: 300000
+    client_spec:
+      class_name: "helm.clients.bedrock_client.BedrockNovaClient"
+
+  - name: amazon/nova-micro-v1:0
+    model_name: amazon/nova-micro-v1:0
+    tokenizer_name: huggingface/gpt2
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.bedrock_client.BedrockNovaClient"
+
+  # Titan on Amazon Bedrock
+
+  - name: amazon/titan-text-lite-v1
+    model_name: amazon/titan-text-lite-v1
+    tokenizer_name: huggingface/gpt2
+    max_sequence_length: 4000
+    client_spec:
+      class_name: "helm.clients.bedrock_client.BedrockTitanClient"
+
+  - name: amazon/titan-text-express-v1
+    model_name: amazon/titan-text-express-v1
+    tokenizer_name: huggingface/gpt2
+    max_sequence_length: 8000
+    client_spec:
+      class_name: "helm.clients.bedrock_client.BedrockTitanClient"
+  
+  # Mistral on Amazon Bedrock
+
+  - name: amazon/mistral-7b-instruct-v0:2
+    model_name: mistralai/amazon-mistral-7b-instruct-v0:2
+    tokenizer_name: huggingface/gpt2
+    max_sequence_length: 8000
+    client_spec:
+      class_name: "helm.clients.bedrock_client.BedrockMistralClient"
+  
+  - name: amazon/mixtral-8x7b-instruct-v0:1
+    model_name: mistralai/amazon-mixtral-8x7b-instruct-v0:1
+    tokenizer_name: huggingface/gpt2
+    max_sequence_length: 4000
+    client_spec:
+      class_name: "helm.clients.bedrock_client.BedrockMistralClient"
+  
+  - name: amazon/mistral-large-2402-v1:0
+    model_name: mistralai/amazon-mistral-large-2402-v1:0
+    tokenizer_name: huggingface/gpt2
+    max_sequence_length: 8000
+    client_spec:
+      class_name: "helm.clients.bedrock_client.BedrockMistralClient"
+  
+  - name: amazon/mistral-small-2402-v1:0
+    model_name: mistralai/amazon-mistral-small-2402-v1:0
+    tokenizer_name: huggingface/gpt2
+    max_sequence_length: 8000
+    client_spec:
+      class_name: "helm.clients.bedrock_client.BedrockMistralClient"
+
+  - name: amazon/mistral-large-2407-v1:0
+    model_name: mistralai/amazon-mistral-large-2407-v1:0
+    tokenizer_name: huggingface/gpt2
+    max_sequence_length: 8000
+    client_spec:
+      class_name: "helm.clients.bedrock_client.BedrockMistralClient"
+
+  # Llama 3 on Amazon Bedrock
+  
+  - name: amazon/llama3-8b-instruct-v1:0
+    model_name: meta/amazon-llama3-8b-instruct-v1:0
+    tokenizer_name: huggingface/gpt2
+    max_sequence_length: 2000
+    client_spec:
+      class_name: "helm.clients.bedrock_client.BedrockLlamaClient"
+
+  - name: amazon/llama3-70b-instruct-v1:0
+    model_name: meta/amazon-llama3-70b-instruct-v1:0
+    tokenizer_name: huggingface/gpt2
+    max_sequence_length: 2000
+    client_spec:
+      class_name: "helm.clients.bedrock_client.BedrockLlamaClient"
+
+  - name: amazon/llama3-1-405b-instruct-v1:0
+    model_name: meta/amazon-llama3-1-405b-instruct-v1:0
+    tokenizer_name: huggingface/gpt2
+    max_sequence_length: 2000
+    client_spec:
+      class_name: "helm.clients.bedrock_client.BedrockLlamaClient"
+
+  - name: amazon/llama3-1-70b-instruct-v1:0
+    model_name: meta/amazon-llama3-1-70b-instruct-v1:0
+    tokenizer_name: huggingface/gpt2
+    max_sequence_length: 2000
+    client_spec:
+      class_name: "helm.clients.bedrock_client.BedrockLlamaClient"
+
+  
+  - name: amazon/llama3-1-8b-instruct-v1:0
+    model_name: meta/amazon-llama3-1-8b-instruct-v1:0
+    tokenizer_name: huggingface/gpt2
+    max_sequence_length: 2000
+    client_spec:
+      class_name: "helm.clients.bedrock_client.BedrockLlamaClient"
+
+  # Anthropic
+  - name: anthropic/claude-v1.3
+    model_name: anthropic/claude-v1.3
+    tokenizer_name: anthropic/claude
+    max_sequence_length: 8000
+    max_sequence_and_generated_tokens_length: 9016
+    client_spec:
+      class_name: "helm.clients.anthropic_client.AnthropicClient"
+
+  - name: anthropic/claude-instant-v1
+    model_name: anthropic/claude-instant-v1
+    tokenizer_name: anthropic/claude
+    max_sequence_length: 8000
+    max_sequence_and_generated_tokens_length: 9016
+    client_spec:
+      class_name: "helm.clients.anthropic_client.AnthropicClient"
+
+  - name: anthropic/claude-instant-1.2
+    model_name: anthropic/claude-instant-1.2
+    tokenizer_name: anthropic/claude
+    max_sequence_length: 8000
+    max_sequence_and_generated_tokens_length: 9016
+    client_spec:
+      class_name: "helm.clients.anthropic_client.AnthropicClient"
+
+  - name: anthropic/claude-2.0
+    model_name: anthropic/claude-2.0
+    tokenizer_name: anthropic/claude
+    max_sequence_length: 8000
+    max_sequence_and_generated_tokens_length: 9016
+    client_spec:
+      class_name: "helm.clients.anthropic_client.AnthropicClient"
+
+  - name: anthropic/claude-2.1
+    model_name: anthropic/claude-2.1
+    tokenizer_name: anthropic/claude
+    max_sequence_length: 8000
+    max_sequence_and_generated_tokens_length: 9016
+    client_spec:
+      class_name: "helm.clients.anthropic_client.AnthropicClient"
+
+  - name: anthropic/claude-3-sonnet-20240229
+    model_name: anthropic/claude-3-sonnet-20240229
+    tokenizer_name: anthropic/claude
+    max_sequence_length: 200000
+    client_spec:
+      class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
+
+  - name: anthropic/claude-3-haiku-20240307
+    model_name: anthropic/claude-3-haiku-20240307
+    tokenizer_name: anthropic/claude
+    max_sequence_length: 200000
+    client_spec:
+      class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
+
+  - name: anthropic/claude-3-opus-20240229
+    model_name: anthropic/claude-3-opus-20240229
+    tokenizer_name: anthropic/claude
+    max_sequence_length: 200000
+    client_spec:
+      class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
+
+  - name: anthropic/claude-3-5-haiku-20241022
+    model_name: anthropic/claude-3-5-haiku-20241022
+    tokenizer_name: anthropic/claude
+    max_sequence_length: 200000
+    client_spec:
+      class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
+
+  - name: anthropic/claude-3-5-sonnet-20240620
+    model_name: anthropic/claude-3-5-sonnet-20240620
+    tokenizer_name: anthropic/claude
+    max_sequence_length: 200000
+    client_spec:
+      class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
+
+  - name: anthropic/claude-3-5-sonnet-20241022
+    model_name: anthropic/claude-3-5-sonnet-20241022
+    tokenizer_name: anthropic/claude
+    max_sequence_length: 200000
+    client_spec:
+      class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
+
+  - name: anthropic/claude-3-7-sonnet-20250219
+    model_name: anthropic/claude-3-7-sonnet-20250219
+    tokenizer_name: anthropic/claude
+    max_sequence_length: 200000
+    client_spec:
+      class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
+
+  - name: anthropic/claude-3-7-sonnet-20250219-thinking-10k
+    model_name: anthropic/claude-3-7-sonnet-20250219-thinking-10k
+    tokenizer_name: anthropic/claude
+    max_sequence_length: 200000
+    client_spec:
+      class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
+      args:
+        anthropic_model_name: claude-3-7-sonnet-20250219
+        thinking_budget_tokens: 10000
+        stream: true
+
+  - name: anthropic/claude-sonnet-4-20250514
+    model_name: anthropic/claude-sonnet-4-20250514
+    tokenizer_name: anthropic/claude
+    max_sequence_length: 200000
+    client_spec:
+      class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
+
+  - name: anthropic/claude-sonnet-4-20250514-thinking-10k
+    model_name: anthropic/claude-sonnet-4-20250514-thinking-10k
+    tokenizer_name: anthropic/claude
+    max_sequence_length: 200000
+    client_spec:
+      class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
+      args:
+        anthropic_model_name: claude-sonnet-4-20250514
+        thinking_budget_tokens: 10000
+        stream: true
+
+  - name: anthropic/claude-opus-4-20250514
+    model_name: anthropic/claude-opus-4-20250514
+    tokenizer_name: anthropic/claude
+    max_sequence_length: 200000
+    client_spec:
+      class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
+
+  - name: anthropic/claude-opus-4-20250514-thinking-10k
+    model_name: anthropic/claude-opus-4-20250514-thinking-10k
+    tokenizer_name: anthropic/claude
+    max_sequence_length: 200000
+    client_spec:
+      class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
+      args:
+        anthropic_model_name: claude-opus-4-20250514
+        thinking_budget_tokens: 10000
+        stream: true
+
+  - name: anthropic/stanford-online-all-v4-s3
+    deprecated: true # Closed model, not accessible via API
+    model_name: anthropic/stanford-online-all-v4-s3
+    tokenizer_name: huggingface/gpt2
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.anthropic_client.AnthropicLegacyClient"
+
+  # Cohere
+  - name: cohere/command
+    model_name: cohere/command
+    tokenizer_name: cohere/command
+    max_sequence_length: 2019 # TODO: verify this
+    max_request_length: 2020 # TODO: verify this
+    client_spec:
+      class_name: "helm.clients.cohere_client.CohereClient"
+
+  - name: cohere/command-light
+    model_name: cohere/command-light
+    tokenizer_name: cohere/command-light
+    max_sequence_length: 2019 # TODO: verify this
+    max_request_length: 2020 # TODO: verify this
+    client_spec:
+      class_name: "helm.clients.cohere_client.CohereClient"
+
+  - name: cohere/command-r
+    model_name: cohere/command-r
+    tokenizer_name: cohere/command-r
+    max_sequence_length: 128000
+    max_request_length: 128000
+    client_spec:
+      class_name: "helm.clients.cohere_client.CohereChatClient"
+
+  - name: cohere/command-r-plus
+    model_name: cohere/command-r-plus
+    tokenizer_name: cohere/command-r-plus
+    # "We have a known issue where prompts between 112K - 128K in length
+    # result in bad generations."
+    # Source: https://docs.cohere.com/docs/command-r-plus
+    max_sequence_length: 110000
+    max_request_length: 110000
+    client_spec:
+      class_name: "helm.clients.cohere_client.CohereChatClient"
+
+  # Craiyon
+
+  - name: craiyon/dalle-mini
+    model_name: craiyon/dalle-mini
+    tokenizer_name: openai/clip-vit-large-patch14
+    max_sequence_length: 75
+    client_spec:
+      class_name: "helm.clients.image_generation.dalle_mini_client.DALLEMiniClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
+
+  - name: craiyon/dalle-mega
+    model_name: craiyon/dalle-mega
+    tokenizer_name: openai/clip-vit-large-patch14
+    max_sequence_length: 75
+    client_spec:
+      class_name: "helm.clients.image_generation.dalle_mini_client.DALLEMiniClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
+
+  # Databricks
+
+  - name: together/dbrx-instruct
+    model_name: databricks/dbrx-instruct
+    tokenizer_name: databricks/dbrx-instruct
+    max_sequence_length: 32767
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+
+  # DeepFloyd
+
+  - name: DeepFloyd/IF-I-M-v1.0
+    model_name: DeepFloyd/IF-I-M-v1.0
+    tokenizer_name: openai/clip-vit-large-patch14
+    max_sequence_length: 75
+    client_spec:
+      class_name: "helm.clients.image_generation.deep_floyd_client.DeepFloydClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
+
+  - name: DeepFloyd/IF-I-L-v1.0
+    model_name: DeepFloyd/IF-I-L-v1.0
+    tokenizer_name: openai/clip-vit-large-patch14
+    max_sequence_length: 75
+    client_spec:
+      class_name: "helm.clients.image_generation.deep_floyd_client.DeepFloydClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
+
+  - name: DeepFloyd/IF-I-XL-v1.0
+    model_name: DeepFloyd/IF-I-XL-v1.0
+    tokenizer_name: openai/clip-vit-large-patch14
+    max_sequence_length: 75
+    client_spec:
+      class_name: "helm.clients.image_generation.deep_floyd_client.DeepFloydClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
+
+  # Deepseek
+
+  - name: together/deepseek-llm-67b-chat
+    model_name: deepseek-ai/deepseek-llm-67b-chat
+    tokenizer_name: deepseek-ai/deepseek-llm-67b-chat
+    max_sequence_length: 4095
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+
+  - name: together/deepseek-v3
+    model_name: deepseek-ai/deepseek-v3
+    tokenizer_name: deepseek-ai/deepseek-v3
+    max_sequence_length: 16384
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+      args:
+        disable_logprobs: True
+
+  - name: together/deepseek-r1-0528
+    model_name: deepseek-ai/deepseek-r1-0528
+    tokenizer_name: deepseek-ai/deepseek-r1
+    max_sequence_length: 32768
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+      args:
+        together_model: deepseek-ai/deepseek-r1
+        parse_thinking: true
+        disable_logprobs: True
+
+  # Gooseai
+
+  # TODO: Migrate these models to use OpenAIClient
+
+  ## EleutherAI
+  # - name: gooseai/gpt-neo-20b
+  #   model_name: eleutherai/gpt-neox-20b
+  #   tokenizer_name: EleutherAI/gpt-neox-20b
+  #   max_sequence_length: 2048
+  #   max_request_length: 2049
+  #   client_spec:
+  #     class_name: "helm.clients.goose_ai_client.GooseAIClient"
+
+  # - name: gooseai/gpt-j-6b
+  #   model_name: eleutherai/gpt-j-6b
+  #   tokenizer_name: EleutherAI/gpt-j-6B
+  #   max_sequence_length: 2048
+  #   max_request_length: 2049
+  #   client_spec:
+  #     class_name: "helm.clients.goose_ai_client.GooseAIClient"
+
+  # Google
+  # See: https://cloud.google.com/vertex-ai/docs/generative-ai/learn/model-versioning
+
+  ## Gemini
+  # See: https://ai.google.dev/models/gemini#model_variations
+  - name: google/gemini-pro
+    model_name: google/gemini-pro
+    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
+    max_sequence_length: 30720
+    max_sequence_and_generated_tokens_length: 32768 # Officially max_sequence_length + 2048
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+
+  - name: google/gemini-1.0-pro-001
+    model_name: google/gemini-1.0-pro-001
+    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
+    max_sequence_length: 30720
+    max_sequence_and_generated_tokens_length: 32768 # Officially max_sequence_length + 2048
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+
+  - name: google/gemini-1.0-pro-002
+    model_name: google/gemini-1.0-pro-002
+    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
+    max_sequence_length: 30720
+    max_sequence_and_generated_tokens_length: 32768 # Officially max_sequence_length + 2048
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+
+  - name: google/gemini-pro-vision
+    model_name: google/gemini-pro-vision
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 12288
+    max_sequence_and_generated_tokens_length: 16384 # Officially max_sequence_length + 4096, in practice max_output_tokens <= 2048 for vision models
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+
+  - name: google/gemini-1.0-pro-vision-001
+    model_name: google/gemini-1.0-pro-vision-001
+    tokenizer_name: hf-internal-testing/llama-tokenizer
+    max_sequence_length: 12288
+    max_sequence_and_generated_tokens_length: 16384
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+
+  - name: google/gemini-1.5-flash-001
+    model_name: google/gemini-1.5-flash-001
+    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
+    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
+    # TODO: Max output tokens: 8192
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+
+  - name: google/gemini-1.5-pro-001
+    model_name: google/gemini-1.5-pro-001
+    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
+    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
+    # TODO: Max output tokens: 8192
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+
+  - name: google/gemini-1.5-pro-preview-0409
+    model_name: google/gemini-1.5-pro-preview-0409
+    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
+    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
+    # TODO: Max output tokens: 8192
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+
+  - name: google/gemini-1.5-pro-preview-0514
+    model_name: google/gemini-1.5-pro-preview-0514
+    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
+    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
+    # TODO: Max output tokens: 8192
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+
+  - name: google/gemini-1.5-flash-preview-0514
+    model_name: google/gemini-1.5-flash-preview-0514
+    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
+    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
+    # TODO: Max output tokens: 8192
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+
+  ## Gemini with different safety settings
+  - name: google/gemini-1.5-pro-001-safety-default
+    model_name: google/gemini-1.5-pro-001-safety-default
+    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
+    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
+    # TODO: Max output tokens: 8192
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+      args:
+        vertexai_model: gemini-1.5-pro-001
+        safety_settings_preset: default
+
+  - name: google/gemini-1.5-pro-001-safety-block-none
+    model_name: google/gemini-1.5-pro-001-safety-block-none
+    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
+    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
+    # TODO: Max output tokens: 8192
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+      args:
+        vertexai_model: gemini-1.5-pro-001
+        safety_settings_preset: block_none
+
+  - name: google/gemini-1.5-flash-001-safety-default
+    model_name: google/gemini-1.5-flash-001-safety-default
+    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
+    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
+    # TODO: Max output tokens: 8192
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+      args:
+        vertexai_model: gemini-1.5-flash-001
+        safety_settings_preset: default
+
+  - name: google/gemini-1.5-flash-001-safety-block-none
+    model_name: google/gemini-1.5-flash-001-safety-block-none
+    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
+    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
+    # TODO: Max output tokens: 8192
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+      args:
+        vertexai_model: gemini-1.5-flash-001
+        safety_settings_preset: block_none
+
+  - name: google/gemini-1.5-pro-002
+    model_name: google/gemini-1.5-pro-002
+    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
+    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
+    # TODO: Max output tokens: 8192
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+
+  - name: google/gemini-1.5-flash-002
+    model_name: google/gemini-1.5-flash-002
+    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
+    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
+    # TODO: Max output tokens: 8192
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+
+  - name: google/gemini-2.0-pro-exp-02-05
+    model_name: google/gemini-2.0-pro-exp-02-05
+    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
+    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
+    # TODO: Max output tokens: 8192
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+
+  - name: google/gemini-2.0-flash-exp
+    model_name: google/gemini-2.0-flash-exp
+    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
+    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
+    # TODO: Max output tokens: 8192
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+
+  - name: google/gemini-2.0-flash-001
+    model_name: google/gemini-2.0-flash-001
+    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
+    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
+    # TODO: Max output tokens: 8192
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+
+  - name: google/gemini-2.0-flash-lite-preview-02-05
+    model_name: google/gemini-2.0-flash-lite-preview-02-05
+    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
+    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
+    # TODO: Max output tokens: 8192
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+
+  - name: google/gemini-2.0-flash-lite-001
+    model_name: google/gemini-2.0-flash-lite-001
+    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
+    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
+    # TODO: Max output tokens: 8192
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+
+  - name: google/gemini-2.0-flash-thinking-exp-01-21
+    model_name: google/gemini-2.0-flash-thinking-exp-01-21
+    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
+    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+
+  - name: google/gemini-2.5-flash-lite-preview-06-17
+    model_name: google/gemini-2.5-flash-lite-preview-06-17
+    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
+    max_sequence_length: 1048576  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash
+    # TODO: Max output tokens: 65536
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+      args:
+        # Only the global location is supported. See:
+        # - https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash-lite
+        # - https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations#global-endpoint
+        location: global
+
+  - name: google/gemini-2.5-flash-lite
+    model_name: google/gemini-2.5-flash-lite
+    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
+    max_sequence_length: 1048576  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash
+    # TODO: Max output tokens: 65536
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+
+  - name: google/gemini-2.5-flash-preview-04-17
+    model_name: google/gemini-2.5-flash-preview-04-17
+    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
+    max_sequence_length: 1048576  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash
+    # TODO: Max output tokens: 65536
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+
+  - name: google/gemini-2.5-flash-preview-05-20
+    model_name: google/gemini-2.5-flash-preview-05-20
+    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
+    max_sequence_length: 1048576  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash
+    # TODO: Max output tokens: 65536
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+
+  - name: google/gemini-2.5-flash
+    model_name: google/gemini-2.5-flash
+    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
+    max_sequence_length: 1048576  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash
+    # TODO: Max output tokens: 65536
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+
+  - name: google/gemini-2.5-pro-exp-03-25
+    model_name: google/gemini-2.5-pro-exp-03-25
+    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
+    max_sequence_length: 1048576   # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-pro
+    # TODO: Max output tokens: 65536
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+
+  - name: google/gemini-2.5-pro-preview-03-25
+    model_name: google/gemini-2.5-pro-preview-03-25
+    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
+    max_sequence_length: 1048576   # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-pro
+    # TODO: Max output tokens: 65536
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+
+  - name: google/gemini-2.5-pro-preview-05-06
+    model_name: google/gemini-2.5-pro-preview-05-06
+    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
+    max_sequence_length: 1048576   # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-pro
+    # TODO: Max output tokens: 65536
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+
+  - name: google/gemini-2.5-pro
+    model_name: google/gemini-2.5-pro
+    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
+    max_sequence_length: 1048576   # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-pro
+    # TODO: Max output tokens: 65536
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+
+  - name: google/gemini-1.5-flash-8b-001
+    model_name: google/gemini-1.5-flash-8b-001
+    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
+    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
+    # TODO: Max output tokens: 8192
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+
+  - name: google/llama-3.1-8b-instruct
+    model_name: meta/llama-3.1-8b-instruct
+    tokenizer_name: meta/llama-3.1-8b-instruct
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+      args:
+        vertexai_model: publishers/meta/models/llama-3.1-8b-instruct-maas
+
+  - name: google/llama-3.1-70b-instruct
+    model_name: meta/llama-3.1-70b-instruct
+    tokenizer_name: meta/llama-3.1-8b-instruct
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+      args:
+        vertexai_model: publishers/meta/models/llama-3.1-70b-instruct-maas
+
+  - name: google/llama-3.1-405b-instruct
+    model_name: meta/llama-3.1-405b-instruct
+    tokenizer_name: meta/llama-3.1-8b-instruct
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
+      args:
+        vertexai_model: publishers/meta/models/llama-3.1-405b-instruct-maas
+
+  ## Gemma
+  - name: together/gemma-2b
+    model_name: google/gemma-2b
+    tokenizer_name: google/gemma-2b
+    max_sequence_length: 7167
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+
+  - name: together/gemma-2b-it
+    model_name: google/gemma-2b-it
+    tokenizer_name: google/gemma-2b
+    max_sequence_length: 7167
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+
+  - name: together/gemma-7b
+    model_name: google/gemma-7b
+    tokenizer_name: google/gemma-2b
+    max_sequence_length: 7167
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+
+  - name: together/gemma-7b-it
+    model_name: google/gemma-7b-it
+    tokenizer_name: google/gemma-2b
+    max_sequence_length: 7167
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+
+  - name: together/gemma-2-9b-it
+    model_name: google/gemma-2-9b-it
+    tokenizer_name: google/gemma-2-9b
+    max_sequence_length: 8191
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+
+  - name: together/gemma-2-27b-it
+    model_name: google/gemma-2-27b-it
+    tokenizer_name: google/gemma-2-9b
+    max_sequence_length: 8191
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+
+  ## MedLM
+  - name: google/medlm-medium
+    model_name: google/medlm-medium
+    tokenizer_name: google/text-bison@001
+    max_sequence_length: 6000 # Officially 8192
+    max_sequence_and_generated_tokens_length: 7000 # Officially 9216
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAITextClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
+
+  - name: google/medlm-large
+    model_name: google/medlm-large
+    tokenizer_name: google/text-bison@001
+    max_sequence_length: 6000 # Officially 8192
+    max_sequence_and_generated_tokens_length: 7000 # Officially 9216
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAITextClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
+
+  ## PaliGemma
+  - name: google/paligemma-3b-mix-224
+    model_name: google/paligemma-3b-mix-224
+    tokenizer_name: google/gemma-2b
+    max_sequence_length: 7167
+    client_spec:
+      class_name: "helm.clients.vision_language.paligemma_client.PaliGemmaClient"
+
+  - name: google/paligemma-3b-mix-448
+    model_name: google/paligemma-3b-mix-448
+    tokenizer_name: google/gemma-2b
+    max_sequence_length: 7167
+    client_spec:
+      class_name: "helm.clients.vision_language.paligemma_client.PaliGemmaClient"
+
+  ## PaLM 2
+  - name: google/text-bison@001
+    model_name: google/text-bison@001
+    tokenizer_name: google/text-bison@001
+    max_sequence_length: 6000 # Officially 8192
+    max_sequence_and_generated_tokens_length: 7000 # Officially 9216
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAITextClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
+
+  - name: google/text-bison@002
+    model_name: google/text-bison@002
+    tokenizer_name: google/text-bison@002
+    max_sequence_length: 6000 # Officially 8192
+    max_sequence_and_generated_tokens_length: 9216
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAITextClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
+
+  - name: google/text-bison-32k
+    model_name: google/text-bison-32k
+    tokenizer_name: google/text-bison@001
+    max_sequence_length: 32000
+    max_sequence_and_generated_tokens_length: 32000
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAITextClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
+
+
+  - name: google/text-unicorn@001
+    model_name: google/text-unicorn@001
+    tokenizer_name: google/text-unicorn@001
+    max_sequence_length: 6000 # Officially 8192
+    max_sequence_and_generated_tokens_length: 7000 # Officially 9216
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAITextClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
+
+  - name: google/code-bison@001
+    model_name: google/code-bison@001
+    tokenizer_name: google/mt5-base # TODO #2188: change to actual tokenizer
+    max_sequence_length: 6000 # Officially 6144
+    max_sequence_and_generated_tokens_length: 7000 # Officially 7168
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAITextClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
+
+  - name: google/code-bison@002
+    model_name: google/code-bison@002
+    tokenizer_name: google/mt5-base # TODO #2188: change to actual tokenizer
+    max_sequence_length: 6000 # Officially 6144
+    max_sequence_and_generated_tokens_length: 7168
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAITextClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
+
+  - name: google/code-bison-32k
+    model_name: google/code-bison-32k
+    tokenizer_name: google/mt5-base # TODO #2188: change to actual tokenizer
+    max_sequence_length: 32000
+    max_sequence_and_generated_tokens_length: 32000
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAITextClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
+
+  # HuggingFace
+
+  ## AI Singapore
+  - name: huggingface/sea-lion-7b
+    model_name: aisingapore/sea-lion-7b
+    tokenizer_name: aisingapore/sea-lion-7b
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        trust_remote_code: true
+
+  - name: huggingface/sea-lion-7b-instruct
+    model_name: aisingapore/sea-lion-7b-instruct
+    tokenizer_name: aisingapore/sea-lion-7b
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        trust_remote_code: true
+
+  - name: huggingface/llama3-8b-cpt-sea-lionv2-base
+    model_name: aisingapore/llama3-8b-cpt-sea-lionv2-base
+    tokenizer_name: meta/llama-3-8b-instruct
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+        torch_dtype: torch.bfloat16
+
+  - name: huggingface/llama3-8b-cpt-sea-lionv2.1-instruct
+    model_name: aisingapore/llama3-8b-cpt-sea-lionv2.1-instruct
+    tokenizer_name: meta/llama-3-8b-instruct
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+        torch_dtype: torch.bfloat16
+
+  - name: huggingface/gemma2-9b-cpt-sea-lionv3-base
+    model_name: aisingapore/gemma2-9b-cpt-sea-lionv3-base
+    tokenizer_name: google/gemma-2-9b
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+        torch_dtype: torch.bfloat16
+
+  - name: huggingface/gemma2-9b-cpt-sea-lionv3-instruct
+    model_name: aisingapore/gemma2-9b-cpt-sea-lionv3-instruct
+    tokenizer_name: google/gemma-2-9b
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+        torch_dtype: torch.bfloat16
+
+  - name: huggingface/llama3.1-8b-cpt-sea-lionv3-base
+    model_name: aisingapore/llama3.1-8b-cpt-sea-lionv3-base
+    tokenizer_name: meta/llama-3.1-8b
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+        torch_dtype: torch.bfloat16
+
+  - name: huggingface/llama3.1-8b-cpt-sea-lionv3-instruct
+    model_name: aisingapore/llama3.1-8b-cpt-sea-lionv3-instruct
+    tokenizer_name: meta/llama-3.1-8b
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+        torch_dtype: torch.bfloat16
+
+  - name: huggingface/llama3.1-70b-cpt-sea-lionv3-base
+    model_name: aisingapore/llama3.1-70b-cpt-sea-lionv3-base
+    tokenizer_name: meta/llama-3.1-8b
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+        torch_dtype: torch.bfloat16
+
+  - name: huggingface/llama3.1-70b-cpt-sea-lionv3-instruct
+    model_name: aisingapore/llama3.1-70b-cpt-sea-lionv3-instruct
+    tokenizer_name: meta/llama-3.1-8b
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+        torch_dtype: torch.bfloat16
+
+  ## Bigcode
+  - name: huggingface/santacoder
+    model_name: bigcode/santacoder
+    tokenizer_name: bigcode/santacoder
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+
+  - name: huggingface/starcoder
+    model_name: bigcode/starcoder
+    tokenizer_name: bigcode/starcoder
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+
+  ## Biomistral 
+
+  - name: huggingface/biomistral-7b
+    model_name: biomistral/biomistral-7b
+    tokenizer_name: mistralai/Mistral-7B-v0.1
+    max_sequence_length: 32000
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+
+  ## Databricks
+  - name: huggingface/dolly-v2-3b
+    model_name: databricks/dolly-v2-3b
+    tokenizer_name: EleutherAI/gpt-neox-20b
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+
+  - name: huggingface/dolly-v2-7b
+    model_name: databricks/dolly-v2-7b
+    tokenizer_name: EleutherAI/gpt-neox-20b
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+
+  - name: huggingface/dolly-v2-12b
+    model_name: databricks/dolly-v2-12b
+    tokenizer_name: EleutherAI/gpt-neox-20b
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+
+  ## EleutherAI
+  - name: huggingface/pythia-1b-v0
+    model_name: eleutherai/pythia-1b-v0
+    tokenizer_name: EleutherAI/gpt-neox-20b
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+
+  - name: huggingface/pythia-2.8b-v0
+    model_name: eleutherai/pythia-2.8b-v0
+    tokenizer_name: EleutherAI/gpt-neox-20b
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+
+  - name: huggingface/pythia-6.9b
+    model_name: eleutherai/pythia-6.9b
+    tokenizer_name: EleutherAI/gpt-neox-20b
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+
+  - name: huggingface/pythia-12b-v0
+    model_name: eleutherai/pythia-12b-v0
+    tokenizer_name: EleutherAI/gpt-neox-20b
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+
+  - name: huggingface/gpt-j-6b
+    model_name: eleutherai/gpt-j-6b
+    tokenizer_name: EleutherAI/gpt-j-6B
+    max_sequence_length: 2048
+    max_request_length: 2049
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+
+  - name: huggingface/gpt-neox-20b
+    model_name: eleutherai/gpt-neox-20b
+    tokenizer_name: EleutherAI/gpt-neox-20b
+    max_sequence_length: 2048
+    max_request_length: 2049
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+
+  ## Google
+  - name: huggingface/gemma-2-9b
+    model_name: google/gemma-2-9b
+    tokenizer_name: google/gemma-2-9b
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+        torch_dtype: torch.bfloat16
+
+  - name: huggingface/gemma-2-9b-it
+    model_name: google/gemma-2-9b-it
+    tokenizer_name: google/gemma-2-9b
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+        torch_dtype: torch.bfloat16
+
+  - name: huggingface/gemma-2-27b
+    model_name: google/gemma-2-27b
+    tokenizer_name: google/gemma-2-9b
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+        torch_dtype: torch.bfloat16
+
+  - name: huggingface/gemma-2-27b-it
+    model_name: google/gemma-2-27b-it
+    tokenizer_name: google/gemma-2-9b
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+        torch_dtype: torch.bfloat16
+
+  ## LMSYS
+  - name: huggingface/vicuna-7b-v1.3
+    model_name: lmsys/vicuna-7b-v1.3
+    tokenizer_name: hf-internal-testing/llama-tokenizer
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+
+  - name: huggingface/vicuna-13b-v1.3
+    model_name: lmsys/vicuna-13b-v1.3
+    tokenizer_name: hf-internal-testing/llama-tokenizer
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+
+  ## Meditron 
+
+  - name: huggingface/meditron-7b
+    model_name: epfl-llm/meditron-7b
+    tokenizer_name: meta-llama/Llama-2-7b-hf
+    max_sequence_length: 4094
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+
+  ## Meta
+  - name: huggingface/llama-3.1-8b-instruct
+    model_name: meta/llama-3.1-8b-instruct
+    tokenizer_name: meta/llama-3.1-8b-instruct
+    max_sequence_length: 131072
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: meta-llama/Llama-3.1-8B-Instruct
+
+  - name: huggingface/llama-3.2-1b-instruct
+    model_name: meta/llama-3.2-1b-instruct
+    tokenizer_name: meta/llama-3.2-1b-instruct
+    max_sequence_length: 131072
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: meta-llama/Llama-3.2-1B-Instruct
+
+  - name: huggingface/opt-175b
+    model_name: meta/opt-175b
+    tokenizer_name: facebook/opt-66b
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: facebook/opt-175b
+
+  - name: huggingface/opt-66b
+    model_name: meta/opt-66b
+    tokenizer_name: facebook/opt-66b
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: facebook/opt-66b
+
+  - name: huggingface/opt-6.7b
+    model_name: meta/opt-6.7b
+    tokenizer_name: facebook/opt-66b
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: facebook/opt-6.7b
+
+  - name: huggingface/opt-1.3b
+    model_name: meta/opt-1.3b
+    tokenizer_name: facebook/opt-66b
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: facebook/opt-1.3b
+
+  ## Microsoft
+  - name: huggingface/llava-1.5-7b-hf
+    model_name: microsoft/llava-1.5-7b-hf
+    tokenizer_name: hf-internal-testing/llama-tokenizer
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
+  
+  - name: huggingface/llava-1.5-13b-hf
+    model_name: microsoft/llava-1.5-13b-hf
+    tokenizer_name: hf-internal-testing/llama-tokenizer
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
+
+  - name: huggingface/llava-v1.6-vicuna-7b-hf
+    model_name: uw-madison/llava-v1.6-vicuna-7b-hf
+    tokenizer_name: hf-internal-testing/llama-tokenizer
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
+
+  - name: huggingface/llava-v1.6-vicuna-13b-hf
+    model_name: uw-madison/llava-v1.6-vicuna-13b-hf
+    tokenizer_name: hf-internal-testing/llama-tokenizer
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
+
+  - name: huggingface/llava-v1.6-mistral-7b-hf
+    model_name: uw-madison/llava-v1.6-mistral-7b-hf
+    tokenizer_name: hf-internal-testing/llama-tokenizer
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
+
+  - name: huggingface/llava-v1.6-34b-hf
+    model_name: uw-madison/llava-v1.6-34b-hf
+    tokenizer_name: hf-internal-testing/llama-tokenizer
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
+      
+  ## NECTEC
+  - name: huggingface/Pathumma-llm-text-1.0.0
+    model_name: nectec/Pathumma-llm-text-1.0.0
+    tokenizer_name: nectec/Pathumma-llm-text-1.0.0
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+
+  - name: huggingface/OpenThaiLLM-Prebuilt-7B
+    model_name: nectec/OpenThaiLLM-Prebuilt-7B
+    tokenizer_name: nectec/OpenThaiLLM-Prebuilt-7B
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        apply_chat_template: false
+    
+  ## KAIST AI
+  - name: huggingface/prometheus-vision-13b-v1.0-hf
+    model_name: kaistai/prometheus-vision-13b-v1.0-hf
+    tokenizer_name: hf-internal-testing/llama-tokenizer
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
+
+  ## OpenFlamingo
+  - name: openflamingo/OpenFlamingo-9B-vitl-mpt7b
+    model_name: openflamingo/OpenFlamingo-9B-vitl-mpt7b
+    tokenizer_name: anas-awadalla/mpt-7b
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.vision_language.open_flamingo_client.OpenFlamingoClient"
+      args:
+        checkpoint_path: "openflamingo/OpenFlamingo-9B-vitl-mpt7b"
+        tokenizer_name: "anas-awadalla-2/mpt-7b"
+        cross_attn_every_n_layers: 4
+
+  ## Marin Community
+  - name: huggingface/marin-8b-instruct
+    model_name: marin-community/marin-8b-instruct
+    tokenizer_name: marin-community/marin-8b-instruct
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+
+  - name: together/marin-8b-instruct
+    model_name: marin-community/marin-8b-instruct
+    tokenizer_name: marin-community/marin-8b-instruct
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient" 
+
+  ## Microsoft
+  - name: together/phi-2
+    model_name: microsoft/phi-2
+    tokenizer_name: microsoft/phi-2
+    max_sequence_length: 2047
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient" 
+
+  - name: huggingface/phi-3-small-8k-instruct
+    model_name: microsoft/phi-3-small-8k-instruct
+    tokenizer_name: microsoft/phi-3-small-8k-instruct
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        torch_dtype: auto
+        trust_remote_code: true
+
+  - name: huggingface/phi-3-medium-4k-instruct
+    model_name: microsoft/phi-3-medium-4k-instruct
+    tokenizer_name: microsoft/phi-3-medium-4k-instruct
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+        torch_dtype: auto
+
+  - name: huggingface/phi-3.5-mini-instruct-4bit
+    model_name: microsoft/phi-3.5-mini-instruct
+    tokenizer_name: microsoft/phi-3.5-mini-instruct
+    max_sequence_length: 131072
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+        torch_dtype: "float16"
+        quantization_config:
+          load_in_4bit: true
+        attn_implementation: "flash_attention_2"
+  
+  - name: huggingface/phi-3.5-mini-instruct
+    model_name: microsoft/phi-3.5-mini-instruct
+    tokenizer_name: microsoft/phi-3.5-mini-instruct
+    max_sequence_length: 131072
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+
+  - name: huggingface/phi-3.5-moe-instruct
+    model_name: microsoft/phi-3.5-moe-instruct
+    tokenizer_name: microsoft/phi-3.5-mini-instruct
+    max_sequence_length: 131072
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+        torch_dtype: auto
+
+  ## Mistral AI
+  - name: huggingface/bakLlava-v1-hf
+    model_name: mistralai/bakLlava-v1-hf
+    tokenizer_name: hf-internal-testing/llama-tokenizer
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
+
+  ## Moonshot AI
+  - name: together/kimi-k2-instruct
+    model_name: moonshotai/kimi-k2-instruct
+    tokenizer_name: moonshotai/kimi-k2-instruct
+    max_sequence_length: 131072
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+
+  ## MosaicML
+  - name: huggingface/mpt-7b
+    model_name: mosaicml/mpt-7b
+    tokenizer_name: EleutherAI/gpt-neox-20b
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: mosaicml/mpt-7b
+
+  - name: huggingface/mpt-instruct-7b
+    model_name: mosaicml/mpt-instruct-7b
+    tokenizer_name: EleutherAI/gpt-neox-20b
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: mosaicml/mpt-7b-instruct
+
+  - name: huggingface/mpt-30b
+    model_name: mosaicml/mpt-30b
+    tokenizer_name: EleutherAI/gpt-neox-20b
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+
+  - name: huggingface/mpt-instruct-30b
+    model_name: mosaicml/mpt-instruct-30b
+    tokenizer_name: EleutherAI/gpt-neox-20b
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: mosaicml/mpt-30b-instruct
+
+  ## OpenAI
+  - name: huggingface/gpt2
+    model_name: openai/gpt2
+    tokenizer_name: huggingface/gpt2
+    max_sequence_length: 1024
+    max_request_length: 1025
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: openai-community/gpt2
+
+  ## OpenThaiGPT
+  - name: huggingface/openthaigpt-1.0.0-7b-chat
+    model_name: openthaigpt/openthaigpt-1.0.0-7b-chat
+    tokenizer_name: openthaigpt/openthaigpt-1.0.0-7b-chat
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+
+  - name: huggingface/openthaigpt-1.0.0-13b-chat
+    model_name: openthaigpt/openthaigpt-1.0.0-13b-chat
+    tokenizer_name: openthaigpt/openthaigpt-1.0.0-7b-chat
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+
+  - name: huggingface/openthaigpt-1.0.0-70b-chat
+    model_name: openthaigpt/openthaigpt-1.0.0-70b-chat
+    tokenizer_name: huggingface/openthaigpt-1.0.0-7b-chat
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+
+  ## SAIL (SEA AI Lab)
+  - name: huggingface/sailor-7b
+    model_name: sail/sailor-7b
+    tokenizer_name: qwen/qwen1.5-7b
+    max_sequence_length: 32768
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        apply_chat_template: false
+
+  - name: huggingface/sailor-7b-chat
+    model_name: sail/sailor-7b-chat
+    tokenizer_name: qwen/qwen1.5-7b
+    max_sequence_length: 32768
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+
+  - name: huggingface/sailor-14b
+    model_name: sail/sailor-14b
+    tokenizer_name: qwen/qwen1.5-7b
+    max_sequence_length: 32768
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+        apply_chat_template: false
+
+  - name: huggingface/sailor-14b-chat
+    model_name: sail/sailor-14b-chat
+    tokenizer_name: qwen/qwen1.5-7b
+    max_sequence_length: 32768
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+
+  # SambaNova
+  - name: huggingface/sambalingo-thai-base
+    model_name: sambanova/sambalingo-thai-base
+    tokenizer_name: sambanova/sambalingo-thai-base
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: sambanovasystems/SambaLingo-Thai-Base
+
+  - name: huggingface/sambalingo-thai-chat
+    model_name: sambanova/sambalingo-thai-chat
+    tokenizer_name: sambanova/sambalingo-thai-base
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: sambanovasystems/SambaLingo-Thai-Base
+
+  - name: huggingface/sambalingo-thai-base-70b
+    model_name: sambanova/sambalingo-thai-base-70b
+    tokenizer_name: sambanova/sambalingo-thai-base
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: sambanovasystems/SambaLingo-Thai-Base-70B
+        device_map: auto
+
+  - name: huggingface/sambalingo-thai-chat-70b
+    model_name: sambanova/sambalingo-thai-chat-70b
+    tokenizer_name: sambanova/sambalingo-thai-base
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: sambanovasystems/SambaLingo-Thai-Base-70B
+        device_map: auto
+
+  ## SCB10X
+  - name: huggingface/typhoon-7b
+    model_name: scb10x/typhoon-7b
+    tokenizer_name: scb10x/typhoon-7b
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+
+  - name: huggingface/typhoon-v1.5-8b
+    model_name: scb10x/typhoon-v1.5-8b
+    tokenizer_name: meta/llama-3-8b
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+
+  - name: huggingface/typhoon-v1.5-8b-instruct
+    model_name: scb10x/typhoon-v1.5-8b-instruct
+    tokenizer_name: meta/llama-3-8b
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+
+  - name: huggingface/typhoon-v1.5-72b
+    model_name: scb10x/typhoon-v1.5-72b
+    tokenizer_name: qwen/qwen1.5-7b
+    max_sequence_length: 32768
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+
+  - name: huggingface/typhoon-v1.5-72b-instruct
+    model_name: scb10x/typhoon-v1.5-72b-instruct
+    tokenizer_name: qwen/qwen1.5-7b
+    max_sequence_length: 32768
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+
+  - name: huggingface/llama-3-typhoon-v1.5x-8b-instruct
+    model_name: scb10x/llama-3-typhoon-v1.5x-8b-instruct
+    tokenizer_name: meta/llama-3-8b
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+
+  - name: huggingface/llama-3-typhoon-v1.5x-70b-instruct
+    model_name: scb10x/llama-3-typhoon-v1.5x-70b-instruct
+    tokenizer_name: meta/llama-3-8b
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+
+  # Alibaba DAMO Academy
+  - name: huggingface/seallm-7b-v2
+    model_name: damo/seallm-7b-v2
+    tokenizer_name: damo/seallm-7b-v2
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: SeaLLMs/SeaLLM-7B-v2
+
+  - name: huggingface/seallm-7b-v2.5
+    model_name: damo/seallm-7b-v2.5
+    tokenizer_name: damo/seallm-7b-v2.5
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: SeaLLMs/SeaLLM-7B-v2.5
+
+  ## StabilityAI
+  - name: huggingface/stablelm-base-alpha-3b
+    model_name: stabilityai/stablelm-base-alpha-3b
+    tokenizer_name: EleutherAI/gpt-neox-20b
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+
+  - name: huggingface/stablelm-base-alpha-7b
+    model_name: stabilityai/stablelm-base-alpha-7b
+    tokenizer_name: EleutherAI/gpt-neox-20b
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+
+  # Upstage
+  - name: huggingface/solar-pro-preview-instruct
+    model_name: upstage/solar-pro-preview-instruct
+    tokenizer_name: upstage/solar-pro-preview-instruct
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        torch_dtype: auto
+        trust_remote_code: true
+
+  ## Text-to-Image Diffusion Models
+
+  - name: huggingface/dreamlike-diffusion-v1-0
+    model_name: huggingface/dreamlike-diffusion-v1-0
+    tokenizer_name: openai/clip-vit-large-patch14
+    max_sequence_length: 75
+    client_spec:
+      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
+
+  - name: huggingface/dreamlike-photoreal-v2-0
+    model_name: huggingface/dreamlike-photoreal-v2-0
+    tokenizer_name: openai/clip-vit-large-patch14
+    max_sequence_length: 75
+    client_spec:
+      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
+
+  - name: huggingface/openjourney-v1-0
+    model_name: huggingface/openjourney-v1-0
+    tokenizer_name: openai/clip-vit-large-patch14
+    max_sequence_length: 75
+    client_spec:
+      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
+
+  - name: huggingface/openjourney-v2-0
+    model_name: huggingface/openjourney-v2-0
+    tokenizer_name: openai/clip-vit-large-patch14
+    max_sequence_length: 75
+    client_spec:
+      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
+
+  - name: huggingface/redshift-diffusion
+    model_name: huggingface/redshift-diffusion
+    tokenizer_name: openai/clip-vit-large-patch14
+    max_sequence_length: 75
+    client_spec:
+      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
+
+  - name: huggingface/promptist-stable-diffusion-v1-4
+    model_name: huggingface/promptist-stable-diffusion-v1-4
+    tokenizer_name: openai/clip-vit-large-patch14
+    max_sequence_length: 75
+    client_spec:
+      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
+
+  - name: huggingface/stable-diffusion-v1-4
+    model_name: huggingface/stable-diffusion-v1-4
+    tokenizer_name: openai/clip-vit-large-patch14
+    max_sequence_length: 75
+    client_spec:
+      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
+
+  - name: huggingface/stable-diffusion-v1-5
+    model_name: huggingface/stable-diffusion-v1-5
+    tokenizer_name: openai/clip-vit-large-patch14
+    max_sequence_length: 75
+    client_spec:
+      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
+
+  - name: huggingface/stable-diffusion-v2-base
+    model_name: huggingface/stable-diffusion-v2-base
+    tokenizer_name: openai/clip-vit-large-patch14
+    max_sequence_length: 75
+    client_spec:
+      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
+
+  - name: huggingface/stable-diffusion-v2-1-base
+    model_name: huggingface/stable-diffusion-v2-1-base
+    tokenizer_name: openai/clip-vit-large-patch14
+    max_sequence_length: 75
+    client_spec:
+      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
+
+  - name: huggingface/stable-diffusion-safe-weak
+    model_name: huggingface/stable-diffusion-safe-weak
+    tokenizer_name: openai/clip-vit-large-patch14
+    max_sequence_length: 75
+    client_spec:
+      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
+
+  - name: huggingface/stable-diffusion-safe-medium
+    model_name: huggingface/stable-diffusion-safe-medium
+    tokenizer_name: openai/clip-vit-large-patch14
+    max_sequence_length: 75
+    client_spec:
+      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
+
+  - name: huggingface/stable-diffusion-safe-strong
+    model_name: huggingface/stable-diffusion-safe-strong
+    tokenizer_name: openai/clip-vit-large-patch14
+    max_sequence_length: 75
+    client_spec:
+      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
+
+  - name: huggingface/stable-diffusion-safe-max
+    model_name: huggingface/stable-diffusion-safe-max
+    tokenizer_name: openai/clip-vit-large-patch14
+    max_sequence_length: 75
+    client_spec:
+      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
+
+  - name: huggingface/vintedois-diffusion-v0-1
+    model_name: huggingface/vintedois-diffusion-v0-1
+    tokenizer_name: openai/clip-vit-large-patch14
+    max_sequence_length: 75
+    client_spec:
+      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
+
+  - name: segmind/Segmind-Vega
+    model_name: segmind/Segmind-Vega
+    tokenizer_name: openai/clip-vit-large-patch14
+    max_sequence_length: 75
+    client_spec:
+      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
+
+  - name: segmind/SSD-1B
+    model_name: segmind/SSD-1B
+    tokenizer_name: openai/clip-vit-large-patch14
+    max_sequence_length: 75
+    client_spec:
+      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
+
+  - name: stabilityai/stable-diffusion-xl-base-1.0
+    model_name: stabilityai/stable-diffusion-xl-base-1.0
+    tokenizer_name: openai/clip-vit-large-patch14
+    max_sequence_length: 75
+    client_spec:
+      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
+
+  # HuggingFaceM4
+  - name: HuggingFaceM4/idefics2-8b
+    model_name: HuggingFaceM4/idefics2-8b
+    # From https://huggingface.co/docs/transformers/main/en/model_doc/idefics2,
+    # "constructs a IDEFICS2 processor which wraps a LLama tokenizer."
+    tokenizer_name: hf-internal-testing/llama-tokenizer
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.vision_language.huggingface_vision2seq_client.HuggingFaceVision2SeqClient"
+
+  - name: HuggingFaceM4/idefics-9b
+    model_name: HuggingFaceM4/idefics-9b
+    tokenizer_name: HuggingFaceM4/idefics-9b
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.vision_language.idefics_client.IDEFICSClient"
+
+  - name: HuggingFaceM4/idefics-9b-instruct
+    model_name: HuggingFaceM4/idefics-9b-instruct
+    tokenizer_name: HuggingFaceM4/idefics-9b-instruct
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.vision_language.idefics_client.IDEFICSClient"
+
+  - name: HuggingFaceM4/idefics-80b
+    model_name: HuggingFaceM4/idefics-80b
+    tokenizer_name: HuggingFaceM4/idefics-80b
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.vision_language.idefics_client.IDEFICSClient"
+
+  - name: HuggingFaceM4/idefics-80b-instruct
+    model_name: HuggingFaceM4/idefics-80b-instruct
+    tokenizer_name: HuggingFaceM4/idefics-80b-instruct
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.vision_language.idefics_client.IDEFICSClient"
+
+  # Lexica
+  - name: lexica/search-stable-diffusion-1.5
+    model_name: lexica/search-stable-diffusion-1.5
+    tokenizer_name: openai/clip-vit-large-patch14
+    max_sequence_length: 200
+    client_spec:
+      class_name: "helm.clients.image_generation.lexica_client.LexicaClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.image_generation.lexica_search_window_service.LexicaSearchWindowService"
+
+  # Kakao
+  - name: kakaobrain/mindall-e
+    model_name: kakaobrain/mindall-e
+    tokenizer_name: openai/clip-vit-large-patch14
+    max_sequence_length: 75
+    client_spec:
+      class_name: "helm.clients.image_generation.mindalle_client.MinDALLEClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
+
+  # Lighting AI
+  - name: lightningai/lit-gpt
+    model_name: lightningai/lit-gpt
+    tokenizer_name: lightningai/lit-gpt
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.lit_gpt_client.LitGPTClient"
+      args:
+        checkpoint_dir: "" # Path to the checkpoint directory
+        precision: bf16-true
+
+  # Mistral AI
+
+  - name: mistralai/ministral-3b-2410
+    model_name: mistralai/ministral-3b-2410
+    tokenizer_name: mistralai/Ministral-8B-Instruct-2410
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.mistral_client.MistralAIClient"
+
+  - name: mistralai/ministral-8b-2410
+    model_name: mistralai/ministral-8b-2410
+    tokenizer_name: mistralai/Ministral-8B-Instruct-2410
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.mistral_client.MistralAIClient"
+
+  - name: mistralai/mistral-small-2402
+    model_name: mistralai/mistral-small-2402
+    tokenizer_name: mistralai/Mistral-7B-v0.1
+    max_sequence_length: 32000
+    client_spec:
+      class_name: "helm.clients.mistral_client.MistralAIClient"
+
+  - name: mistralai/mistral-small-2409
+    model_name: mistralai/mistral-small-2409
+    tokenizer_name: mistralai/Mistral-7B-v0.1
+    max_sequence_length: 32000
+    client_spec:
+      class_name: "helm.clients.mistral_client.MistralAIClient"
+
+  - name: mistralai/mistral-small-2501
+    model_name: mistralai/mistral-small-2501
+    tokenizer_name: mistralai/Mistral-Small-24B-Instruct-2501
+    max_sequence_length: 32000
+    client_spec:
+      class_name: "helm.clients.mistral_client.MistralAIClient"
+
+  - name: mistralai/mistral-small-2503
+    model_name: mistralai/mistral-small-2503
+    tokenizer_name: mistralai/Mistral-Small-24B-Instruct-2501
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.mistral_client.MistralAIClient"
+
+  - name: mistralai/mistral-medium-2312
+    model_name: mistralai/mistral-medium-2312
+    tokenizer_name: mistralai/Mistral-7B-v0.1
+    max_sequence_length: 32000
+    client_spec:
+      class_name: "helm.clients.mistral_client.MistralAIClient"
+
+  - name: mistralai/mistral-medium-2505
+    model_name: mistralai/mistral-medium-2505
+    tokenizer_name: mistralai/Mistral-7B-v0.1
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.mistral_client.MistralAIClient"
+
+  - name: mistralai/mistral-large-2402
+    model_name: mistralai/mistral-large-2402
+    tokenizer_name: mistralai/Mistral-7B-v0.1
+    max_sequence_length: 32000
+    client_spec:
+      class_name: "helm.clients.mistral_client.MistralAIClient"
+
+  - name: mistralai/mistral-large-2407
+    model_name: mistralai/mistral-large-2407
+    tokenizer_name: mistralai/Mistral-Large-Instruct-2407
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.mistral_client.MistralAIClient"
+
+  - name: mistralai/mistral-large-2411
+    model_name: mistralai/mistral-large-2411
+    tokenizer_name: mistralai/Mistral-Large-Instruct-2411
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.mistral_client.MistralAIClient"
+
+  - name: mistralai/open-mistral-nemo-2407
+    model_name: mistralai/open-mistral-nemo-2407
+    tokenizer_name: mistralai/Mistral-Nemo-Base-2407
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.mistral_client.MistralAIClient"
+
+  - name: mistralai/pixtral-12b-2409
+    model_name: mistralai/pixtral-12b-2409
+    tokenizer_name: mistralai/Mistral-7B-v0.1
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.mistral_client.MistralAIClient"
+
+  - name: mistralai/pixtral-large-2411
+    model_name: mistralai/pixtral-large-2411
+    tokenizer_name: mistralai/Mistral-Large-Instruct-2407
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.mistral_client.MistralAIClient"
+
+
+  # Neurips
+  - name: neurips/local
+    model_name: neurips/local
+    tokenizer_name: neurips/local
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.http_model_client.HTTPModelClient"
+
+  # Nvidia
+  - name: nvidia/megatron-gpt2
+    model_name: nvidia/megatron-gpt2
+    tokenizer_name: huggingface/gpt2
+    max_sequence_length: 1024
+    client_spec:
+      class_name: "helm.clients.megatron_client.MegatronClient"
+
+  - name: nvidia/nemotron-4-340b-instruct
+    model_name: nvidia/nemotron-4-340b-instruct
+    tokenizer_name: nvidia/nemotron-4-340b-instruct
+    max_sequence_length: 4085
+    client_spec:
+      class_name: "helm.clients.nvidia_nim_client.NvidiaNimClient"
+
+  # OpenAI
+
+  ## GPT 3 Models
+
+  - name: openai/davinci-002
+    model_name: openai/davinci-002
+    tokenizer_name: openai/cl100k_base
+    # Claimed sequence length is 16,384 tokens but we round down to 16,000 tokens
+    # to provide a margin of error.
+    max_sequence_length: 16000
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAILegacyCompletionsClient"
+
+  - name: openai/babbage-002
+    model_name: openai/babbage-002
+    tokenizer_name: openai/cl100k_base
+    # Claimed sequence length is 16,384 tokens but we round down to 16,000 tokens
+    # to provide a margin of error.
+    max_sequence_length: 16000
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAILegacyCompletionsClient"
+
+  ## GPT 3.5 Turbo Models
+  # ChatGPT: https://openai.com/blog/chatgpt
+
+  - name: openai/gpt-3.5-turbo-instruct
+    model_name: openai/gpt-3.5-turbo-instruct
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 4096
+    max_request_length: 4097
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAILegacyCompletionsClient"
+
+  # The claimed sequence length is 4096, but as of 2023-03-07, the empirical usable
+  # sequence length is smaller at 4087 with one user input message and one assistant
+  # output message because ChatGPT uses special tokens for message roles and boundaries.
+  # We use a rounded-down sequence length of 4000 to account for these special tokens.
+  - name: openai/gpt-3.5-turbo-0301
+    model_name: openai/gpt-3.5-turbo-0301
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 4000
+    max_request_length: 4001
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+
+  # The claimed sequence length is 4096, but as of 2023-03-07, the empirical usable
+  # sequence length is smaller at 4087 with one user input message and one assistant
+  # output message because ChatGPT uses special tokens for message roles and boundaries.
+  # We use a rounded-down sequence length of 4000 to account for these special tokens.
+  - name: openai/gpt-3.5-turbo-0613
+    model_name: openai/gpt-3.5-turbo-0613
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 4000
+    max_request_length: 4001
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+
+  # Claimed length is 16,384; we round down to 16,000 for the same reasons as explained
+  # in the openai/gpt-3.5-turbo-0613 comment
+  - name: openai/gpt-3.5-turbo-16k-0613
+    model_name: openai/gpt-3.5-turbo-16k-0613
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 16000
+    max_request_length: 16001
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+
+  # Claimed length is 16,384; we round down to 16,000 for the same reasons as explained
+  # in the openai/gpt-3.5-turbo-0613 comment
+  - name: openai/gpt-3.5-turbo-1106
+    model_name: openai/gpt-3.5-turbo-1106
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 16000
+    max_request_length: 16001
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+
+  # Claimed length is 16,384; we round down to 16,000 for the same reasons as explained
+  # in the openai/gpt-3.5-turbo-0613 comment
+  - name: openai/gpt-3.5-turbo-0125
+    model_name: openai/gpt-3.5-turbo-0125
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 16000
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+
+  ## GPT 4 Models
+
+  - name: openai/gpt-4-1106-preview
+    model_name: openai/gpt-4-1106-preview
+    tokenizer_name: openai/cl100k_base
+    # According to https://help.openai.com/en/articles/8555510-gpt-4-turbo,
+    # the maximum number of output tokens for this model is 4096
+    # TODO: add max_generated_tokens_length of 4096 https://github.com/stanford-crfm/helm/issues/2098
+    max_sequence_length: 128000
+    max_request_length: 128001
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+
+  - name: openai/gpt-4-0314
+    model_name: openai/gpt-4-0314
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 8192
+    max_request_length: 8193
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+
+  - name: openai/gpt-4-32k-0314
+    model_name: openai/gpt-4-32k-0314
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 32768
+    max_request_length: 32769
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+
+  - name: openai/gpt-4-0613
+    model_name: openai/gpt-4-0613
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 8192
+    max_request_length: 8193
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+
+  - name: openai/gpt-4-32k-0613
+    model_name: openai/gpt-4-32k-0613
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 32768
+    max_request_length: 32769
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+
+  - name: openai/gpt-4-0125-preview
+    model_name: openai/gpt-4-0125-preview
+    tokenizer_name: openai/cl100k_base
+    # According to https://help.openai.com/en/articles/8555510-gpt-4-turbo,
+    # the maximum number of output tokens for this model is 4096
+    # TODO: add max_generated_tokens_length of 4096 https://github.com/stanford-crfm/helm/issues/2098
+    max_sequence_length: 128000
+    max_request_length: 128001
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+
+  - name: openai/gpt-4-turbo-2024-04-09
+    model_name: openai/gpt-4-turbo-2024-04-09
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+
+  - name: openai/gpt-4o-2024-05-13
+    model_name: openai/gpt-4o-2024-05-13
+    tokenizer_name: openai/o200k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+
+  - name: openai/gpt-4o-2024-08-06
+    model_name: openai/gpt-4o-2024-08-06
+    tokenizer_name: openai/o200k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+
+  - name: openai/gpt-4o-2024-11-20
+    model_name: openai/gpt-4o-2024-11-20
+    tokenizer_name: openai/o200k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+
+  - name: openai/gpt-4o-mini-2024-07-18
+    model_name: openai/gpt-4o-mini-2024-07-18
+    tokenizer_name: openai/o200k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+
+  - name: openai/gpt-4.1-2025-04-14
+    model_name: openai/gpt-4.1-2025-04-14
+    tokenizer_name: openai/o200k_base
+    max_sequence_length: 1047576
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+
+  - name: openai/gpt-4.1-mini-2025-04-14
+    model_name: openai/gpt-4.1-mini-2025-04-14
+    tokenizer_name: openai/o200k_base
+    max_sequence_length: 1047576
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+
+  - name: openai/gpt-4.1-nano-2025-04-14
+    model_name: openai/gpt-4.1-nano-2025-04-14
+    tokenizer_name: openai/o200k_base
+    max_sequence_length: 1047576
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+
+  - name: openai/gpt-5-2025-08-07
+    model_name: openai/gpt-5-2025-08-07
+    tokenizer_name: openai/o200k_base
+    max_sequence_length: 400000
+    client_spec:
+      class_name: "helm.clients.openai_responses_client.OpenAIResponseClient"
+
+  - name: openai/gpt-5-mini-2025-08-07
+    model_name: openai/gpt-5-mini-2025-08-07
+    tokenizer_name: openai/o200k_base
+    max_sequence_length: 400000
+    client_spec:
+      class_name: "helm.clients.openai_responses_client.OpenAIResponseClient"
+
+  - name: openai/gpt-5-nano-2025-08-07
+    model_name: openai/gpt-5-nano-2025-08-07
+    tokenizer_name: openai/o200k_base
+    max_sequence_length: 400000
+    client_spec:
+      class_name: "helm.clients.openai_responses_client.OpenAIResponseClient"
+
+  - name: openai/whisper-1_gpt-4o-2024-11-20
+    model_name: openai/whisper-1_gpt-4o-2024-11-20
+    tokenizer_name: openai/o200k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAITranscriptionThenCompletionClient"
+
+  - name: openai/gpt-4o-transcribe_gpt-4o-2024-11-20
+    model_name: openai/gpt-4o-transcribe_gpt-4o-2024-11-20
+    tokenizer_name: openai/o200k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAITranscriptionThenCompletionClient"
+
+  - name: openai/gpt-4o-mini-transcribe_gpt-4o-2024-11-20
+    model_name: openai/gpt-4o-mini-transcribe_gpt-4o-2024-11-20
+    tokenizer_name: openai/o200k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAITranscriptionThenCompletionClient"
+
+  - name: openai/gpt-4o-audio-preview-2024-10-01
+    model_name: openai/gpt-4o-audio-preview-2024-10-01
+    tokenizer_name: openai/o200k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+
+  - name: openai/gpt-4o-audio-preview-2024-12-17
+    model_name: openai/gpt-4o-audio-preview-2024-12-17
+    tokenizer_name: openai/o200k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+
+  - name: openai/gpt-4o-mini-audio-preview-2024-12-17
+    model_name: openai/gpt-4o-mini-audio-preview-2024-12-17
+    tokenizer_name: openai/o200k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+
+  - name: openai/gpt-4-vision-preview
+    model_name: openai/gpt-4-vision-preview
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 128000  # According to https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo
+    max_request_length: 128001
+    max_sequence_and_generated_tokens_length: 132096
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+
+  - name: openai/gpt-4-1106-vision-preview
+    model_name: openai/gpt-4-1106-vision-preview
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 128000  # According to https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo
+    max_request_length: 128001
+    max_sequence_and_generated_tokens_length: 132096
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+
+  ## GPT-4.5
+  - name: openai/gpt-4.5-preview-2025-02-27
+    model_name: openai/gpt-4.5-preview-2025-02-27
+    tokenizer_name: openai/o200k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+
+  ## o1 Models
+  - name: openai/o1-pro-2025-03-19
+    model_name: openai/o1-pro-2025-03-19
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.openai_responses_client.OpenAIResponseClient"
+
+  - name: openai/o1-pro-2025-03-19-low-reasoning-effort
+    model_name: openai/o1-pro-2025-03-19-low-reasoning-effort
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.openai_responses_client.OpenAIResponseClient"
+      args:
+        openai_model_name: o1-pro-2025-03-19
+        reasoning_effort: low
+
+  - name: openai/o1-pro-2025-03-19-high-reasoning-effort
+    model_name: openai/o1-pro-2025-03-19-high-reasoning-effort
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.openai_responses_client.OpenAIResponseClient"
+      args:
+        openai_model_name: o1-pro-2025-03-19
+        reasoning_effort: high
+
+  - name: openai/o1-2024-12-17
+    model_name: openai/o1-2024-12-17
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+
+  - name: openai/o1-2024-12-17-low-reasoning-effort
+    model_name: openai/o1-2024-12-17-low-reasoning-effort
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+      args:
+        openai_model_name: o1-2024-12-17
+        reasoning_effort: low
+
+  - name: openai/o1-2024-12-17-high-reasoning-effort
+    model_name: openai/o1-2024-12-17-high-reasoning-effort
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+      args:
+        openai_model_name: o1-2024-12-17
+        reasoning_effort: high
+
+  - name: openai/o1-preview-2024-09-12
+    model_name: openai/o1-preview-2024-09-12
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+
+  - name: openai/o1-mini-2024-09-12
+    model_name: openai/o1-mini-2024-09-12
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+
+  - name: openai/o3-mini-2025-01-31
+    model_name: openai/o3-mini-2025-01-31
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 200000
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+
+  - name: openai/o3-mini-2025-01-31-low-reasoning-effort
+    model_name: openai/o3-mini-2025-01-31-low-reasoning-effort
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 200000
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+      args:
+        openai_model_name: o3-mini-2025-01-31
+        reasoning_effort: low
+
+  - name: openai/o3-mini-2025-01-31-high-reasoning-effort
+    model_name: openai/o3-mini-2025-01-31-high-reasoning-effort
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 200000
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+      args:
+        openai_model_name: o3-mini-2025-01-31
+        reasoning_effort: high
+
+  - name: openai/o3-2025-04-16
+    model_name: openai/o3-2025-04-16
+    tokenizer_name: openai/cl100k_base
+    # Source: https://platform.openai.com/docs/models/o3
+    max_sequence_length: 200000
+    # TODO: max_output_tokens: 100000
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+
+  - name: openai/o3-2025-04-16-low-reasoning-effort
+    model_name: openai/o3-2025-04-16-low-reasoning-effort
+    tokenizer_name: openai/cl100k_base
+    # Source: https://platform.openai.com/docs/models/o3
+    max_sequence_length: 200000
+    # TODO: max_output_tokens: 100000
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+      args:
+        openai_model_name: o3-2025-04-16
+        reasoning_effort: low
+
+  - name: openai/o3-2025-04-16-high-reasoning-effort
+    model_name: openai/o3-2025-04-16-high-reasoning-effort
+    tokenizer_name: openai/cl100k_base
+    # Source: https://platform.openai.com/docs/models/o3
+    max_sequence_length: 200000
+    # TODO: max_output_tokens: 100000
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+      args:
+        openai_model_name: o3-2025-04-16
+        reasoning_effort: high
+
+  - name: openai/o4-mini-2025-04-16
+    model_name: openai/o4-mini-2025-04-16
+    tokenizer_name: openai/cl100k_base
+    # Source: https://platform.openai.com/docs/models/o4-mini
+    max_sequence_length: 200000
+    # TODO: max_output_tokens: 100000
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+
+  - name: openai/o4-mini-2025-04-16-low-reasoning-effort
+    model_name: openai/o4-mini-2025-04-16-low-reasoning-effort
+    tokenizer_name: openai/cl100k_base
+    # Source: https://platform.openai.com/docs/models/o4-mini
+    max_sequence_length: 200000
+    # TODO: max_output_tokens: 100000
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+      args:
+        openai_model_name: o4-mini-2025-04-16
+        reasoning_effort: low
+
+
+  - name: openai/o4-mini-2025-04-16-high-reasoning-effort
+    model_name: openai/o4-mini-2025-04-16-high-reasoning-effort
+    tokenizer_name: openai/cl100k_base
+    # Source: https://platform.openai.com/docs/models/o4-mini
+    max_sequence_length: 200000
+    # TODO: max_output_tokens: 100000
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+      args:
+        openai_model_name: o4-mini-2025-04-16
+        reasoning_effort: high
+
+
+  - name: openai/o3-pro-2025-06-10-high-reasoning-effort
+    model_name: openai/o3-pro-2025-06-10-high-reasoning-effort
+    tokenizer_name: openai/cl100k_base
+    # Source: https://platform.openai.com/docs/models/o3-pro
+    max_sequence_length: 200000
+    # TODO: max_output_tokens: 100000
+    client_spec:
+      class_name: "helm.clients.openai_responses_client.OpenAIResponseClient"
+      args:
+        openai_model_name: o3-pro-2025-06-10
+        reasoning_effort: high
+
+  ## GPT-OSS
+  - name: together/gpt-oss-20b
+    model_name: openai/gpt-oss-20b
+    tokenizer_name: openai/o200k_harmony
+    # Source: https://platform.openai.com/docs/models/gpt-oss-20b
+    max_sequence_length: 131072
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+
+  - name: together/gpt-oss-120b
+    model_name: openai/gpt-oss-120b
+    tokenizer_name: openai/o200k_harmony
+    # Source: https://platform.openai.com/docs/models/gpt-oss-120b
+    max_sequence_length: 131072
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+
+  ## Text Similarity Models
+  # OpenAI similarity embedding models: https://beta.openai.com/docs/guides/embeddings
+  # The number of parameters is guessed based on the number of parameters of the
+  # corresponding GPT-3 model.
+
+  # As of 2023-11-07, text-embedding-ada-002 is not deprecated:
+  # "We recommend using text-embedding-ada-002 for nearly all use cases."
+  # Source: https://platform.openai.com/docs/guides/embeddings/what-are-embeddings
+  - name: openai/text-embedding-ada-002
+    model_name: openai/text-embedding-ada-002
+    tokenizer_name: huggingface/gpt2
+    max_sequence_length: 2048
+    max_request_length: 2049
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+
+  # Text-to-image models
+  - name: openai/dall-e-2
+    model_name: openai/dall-e-2
+    tokenizer_name: openai/clip-vit-large-patch14
+    max_sequence_length: 1000
+    client_spec:
+      class_name: "helm.clients.image_generation.dalle2_client.DALLE2Client"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.image_generation.openai_dalle_window_service.OpenAIDALLEWindowService"
+
+  - name: openai/dall-e-3
+    model_name: openai/dall-e-3
+    tokenizer_name: openai/clip-vit-large-patch14
+    max_sequence_length: 1000
+    client_spec:
+      class_name: "helm.clients.image_generation.dalle3_client.DALLE3Client"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.image_generation.openai_dalle_window_service.OpenAIDALLEWindowService"
+
+  - name: openai/dall-e-3-natural
+    model_name: openai/dall-e-3-natural
+    tokenizer_name: openai/clip-vit-large-patch14
+    max_sequence_length: 1000
+    client_spec:
+      class_name: "helm.clients.image_generation.dalle3_client.DALLE3Client"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.image_generation.openai_dalle_window_service.OpenAIDALLEWindowService"
+
+  - name: openai/dall-e-3-hd
+    model_name: openai/dall-e-3-hd
+    tokenizer_name: openai/clip-vit-large-patch14
+    max_sequence_length: 1000
+    client_spec:
+      class_name: "helm.clients.image_generation.dalle3_client.DALLE3Client"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.image_generation.openai_dalle_window_service.OpenAIDALLEWindowService"
+
+  - name: openai/dall-e-3-hd-natural
+    model_name: openai/dall-e-3-hd-natural
+    tokenizer_name: openai/clip-vit-large-patch14
+    max_sequence_length: 1000
+    client_spec:
+      class_name: "helm.clients.image_generation.dalle3_client.DALLE3Client"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.image_generation.openai_dalle_window_service.OpenAIDALLEWindowService"
+
+  # Together
+  # The list of models served by Together changes often, to check the latest list, visit:
+  # https://docs.together.ai/docs/inference-models
+  # You can also check the playground to check that the live models are working:
+  # https://api.together.xyz/playground
+
+  ## BigScience
+  - name: together/bloom
+    deprecated: true  # Removed from Together
+    model_name: bigscience/bloom
+    tokenizer_name: bigscience/bloom
+    max_sequence_length: 2048
+    max_request_length: 2049
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+
+  - name: together/t0pp
+    deprecated: true  # Removed from Together
+    model_name: bigscience/t0pp
+    tokenizer_name: bigscience/T0pp
+    max_sequence_length: 1024
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.encoder_decoder_window_service.EncoderDecoderWindowService"
+
+  ## Google
+  - name: together/t5-11b
+    deprecated: true  # Removed from Together
+    model_name: google/t5-11b
+    tokenizer_name: google/t5-11b
+    max_sequence_length: 511
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.encoder_decoder_window_service.EncoderDecoderWindowService"
+
+  - name: together/flan-t5-xxl
+    deprecated: true  # Removed from Together
+    model_name: google/flan-t5-xxl
+    tokenizer_name: google/flan-t5-xxl
+    max_sequence_length: 511
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.encoder_decoder_window_service.EncoderDecoderWindowService"
+
+  - name: together/ul2
+    deprecated: true  # Removed from Together
+    model_name: google/ul2
+    tokenizer_name: google/ul2
+    max_sequence_length: 511
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.encoder_decoder_window_service.EncoderDecoderWindowService"
+
+  ## Meta
+  - name: together/llama-7b
+    model_name: meta/llama-7b
+    tokenizer_name: hf-internal-testing/llama-tokenizer
+    max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: huggyllama/llama-7b
+
+  - name: together/llama-13b
+    model_name: meta/llama-13b
+    tokenizer_name: hf-internal-testing/llama-tokenizer
+    max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: huggyllama/llama-13b
+
+  - name: together/llama-30b
+    model_name: meta/llama-30b
+    tokenizer_name: hf-internal-testing/llama-tokenizer
+    max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: huggyllama/llama-30b
+
+  - name: together/llama-65b
+    model_name: meta/llama-65b
+    tokenizer_name: hf-internal-testing/llama-tokenizer
+    max_sequence_length: 2047 # Subtract 1 tokens to work around a off-by-one bug in Together's input validation token counting (#2080)
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: huggyllama/llama-65b
+
+  - name: together/llama-2-7b
+    model_name: meta/llama-2-7b
+    tokenizer_name: meta-llama/Llama-2-7b-hf
+    max_sequence_length: 4094 # Subtract 2 tokens to work around a off-by-two bug in Together's token counting (#2080 and #2094)
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: togethercomputer/llama-2-7b
+
+  - name: together/llama-2-13b
+    model_name: meta/llama-2-13b
+    tokenizer_name: meta-llama/Llama-2-7b-hf
+    max_sequence_length: 4094 # Subtract 2 tokens to work around a off-by-two bug in Together's token counting (#2080 and #2094)
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: togethercomputer/llama-2-13b
+
+  - name: together/llama-2-70b
+    model_name: meta/llama-2-70b
+    tokenizer_name: meta-llama/Llama-2-7b-hf
+    max_sequence_length: 4094 # Subtract 2 tokens to work around a off-by-two bug in Together's token counting (#2080 and #2094)
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: togethercomputer/llama-2-70b
+
+  - name: together/llama-3-8b
+    model_name: meta/llama-3-8b
+    tokenizer_name: meta/llama-3-8b
+    max_sequence_length: 8191
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: meta-llama/Llama-3-8b-hf
+
+  - name: together/llama-3-8b-instruct-turbo
+    model_name: meta/llama-3-8b-instruct-turbo
+    tokenizer_name: meta/llama-3-8b
+    max_sequence_length: 8191
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: meta-llama/Meta-Llama-3-8B-Instruct-Turbo
+
+  - name: together/llama-3-8b-instruct-lite
+    model_name: meta/llama-3-8b-instruct-lite
+    tokenizer_name: meta/llama-3-8b
+    max_sequence_length: 8191
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: meta-llama/Meta-Llama-3-8B-Instruct-Lite
+
+  - name: together/llama-3-70b
+    model_name: meta/llama-3-70b
+    tokenizer_name: meta/llama-3-8b
+    max_sequence_length: 8191
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: meta-llama/Meta-Llama-3-70B
+
+  - name: together/llama-3-70b-instruct-turbo
+    model_name: meta/llama-3-70b-instruct-turbo
+    tokenizer_name: meta/llama-3-8b
+    max_sequence_length: 8191
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: meta-llama/Meta-Llama-3-70B-Instruct-Turbo
+
+  - name: together/llama-3-70b-instruct-lite
+    model_name: meta/llama-3-70b-instruct-lite
+    tokenizer_name: meta/llama-3-8b
+    max_sequence_length: 8191
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: meta-llama/Meta-Llama-3-70B-Instruct-Lite
+
+  - name: together/llama-3.1-8b-instruct-turbo
+    model_name: meta/llama-3.1-8b-instruct-turbo
+    tokenizer_name: meta/llama-3.1-8b
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+      args:
+        together_model: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
+
+  - name: together/llama-3.1-70b-instruct-turbo
+    model_name: meta/llama-3.1-70b-instruct-turbo
+    tokenizer_name: meta/llama-3.1-8b
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+      args:
+        together_model: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
+
+  - name: together/llama-3.1-405b-instruct-turbo
+    model_name: meta/llama-3.1-405b-instruct-turbo
+    tokenizer_name: meta/llama-3.1-8b
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+      args:
+        together_model: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
+
+  - name: together/llama-4-scout-17b-16e-instruct
+    model_name: meta/llama-4-scout-17b-16e-instruct
+    tokenizer_name: meta/llama-4-scout-17b-16e-instruct
+    max_sequence_length: 327680
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+      args:
+        together_model: meta-llama/Llama-4-Scout-17B-16E-Instruct
+
+  - name: together/llama-4-maverick-17b-128e-instruct-fp8
+    model_name: meta/llama-4-maverick-17b-128e-instruct-fp8
+    tokenizer_name: meta/llama-4-scout-17b-16e-instruct
+    max_sequence_length: 524288
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+      args:
+        together_model: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
+
+  - name: together/llama-3-8b-chat
+    model_name: meta/llama-3-8b-chat
+    tokenizer_name: meta/llama-3-8b-instruct
+    max_sequence_length: 8182
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+      args:
+        together_model: meta-llama/Llama-3-8b-chat-hf
+
+  - name: together/llama-3-70b-chat
+    model_name: meta/llama-3-70b-chat
+    tokenizer_name: meta/llama-3-8b-instruct
+    max_sequence_length: 8182
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+      args:
+        together_model: meta-llama/Llama-3-70b-chat-hf
+
+  - name: together/llama-3.2-3b-instruct-turbo
+    model_name: meta/llama-3.2-3b-instruct-turbo
+    tokenizer_name: meta/llama-3.2-3b-instruct
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+      args:
+        together_model: meta-llama/Llama-3.2-3B-Instruct-Turbo
+
+  - name: together/llama-3.2-11b-vision-instruct-turbo
+    model_name: meta/llama-3.2-11b-vision-instruct-turbo
+    tokenizer_name: meta/llama-3.2-11b-vision-instruct
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+      args:
+        together_model: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
+
+  - name: together/llama-3.2-90b-vision-instruct-turbo
+    model_name: meta/llama-3.2-90b-vision-instruct-turbo
+    tokenizer_name: meta/llama-3.2-11b-vision-instruct
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+      args:
+        together_model: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
+
+  - name: together/llama-3.3-70b-instruct-turbo
+    model_name: meta/llama-3.3-70b-instruct-turbo
+    tokenizer_name: meta/llama-3.3-70b-instruct
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+      args:
+        together_model: meta-llama/Llama-3.3-70B-Instruct-Turbo
+
+  - name: together/llama-guard-7b
+    model_name: meta/llama-guard-7b
+    tokenizer_name: meta-llama/Llama-2-7b-hf
+    max_sequence_length: 2047
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: meta-llama/llama-guard-7b
+
+  - name: together/llama-guard-2-8b
+    model_name: meta/llama-guard-2-8b
+    tokenizer_name: meta/llama-3-8b
+    max_sequence_length: 4094
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: meta-llama/llamaguard-2-8b
+
+  - name: together/llama-guard-3-8b
+    model_name: meta/llama-guard-3-8b
+    tokenizer_name: meta/llama-3.1-8b
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: meta-llama/Meta-Llama-Guard-3-8B
+
+  # NVIDIA  
+  - name: together/llama-3.1-nemotron-70b-instruct
+    model_name: nvidia/llama-3.1-nemotron-70b-instruct
+    tokenizer_name: nvidia/llama-3.1-nemotron-70b-instruct
+    max_sequence_length: 32768
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: nvidia/Llama-3.1-Nemotron-70B-Instruct-HF
+
+  # 01.AI
+  - name: together/yi-6b
+    model_name: 01-ai/yi-6b
+    tokenizer_name: 01-ai/Yi-6B
+    max_sequence_length: 4095
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: zero-one-ai/Yi-6B
+
+  - name: together/yi-34b
+    model_name: 01-ai/yi-34b
+    tokenizer_name: 01-ai/Yi-6B
+    max_sequence_length: 4095
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: zero-one-ai/Yi-34B
+
+  - name: together/yi-6b-chat
+    model_name: 01-ai/yi-6b-chat
+    tokenizer_name: 01-ai/Yi-6B
+    max_sequence_length: 4095
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+      args:
+        together_model: zero-one-ai/Yi-6B-Chat
+
+  - name: together/yi-34b-chat
+    model_name: 01-ai/yi-34b-chat
+    tokenizer_name: 01-ai/Yi-6B
+    max_sequence_length: 4095
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+      args:
+        together_model: zero-one-ai/Yi-34B-Chat
+
+  - name: 01-ai/yi-large
+    model_name: 01-ai/yi-large
+    tokenizer_name: 01-ai/Yi-6B  # Actual tokenizer is publicly unavailable, so use a substitute
+    max_sequence_length: 16000
+    client_spec:
+      class_name: "helm.clients.yi_client.YiChatClient"
+
+  - name: 01-ai/yi-large-preview
+    model_name: 01-ai/yi-large-preview
+    tokenizer_name: 01-ai/Yi-6B  # Actual tokenizer is publicly unavailable, so use a substitute
+    max_sequence_length: 16000
+    client_spec:
+      class_name: "helm.clients.yi_client.YiChatClient"
+
+
+  # Allen Institute for AI
+  - name: together/olmo-7b
+    model_name: allenai/olmo-7b
+    tokenizer_name: allenai/olmo-7b
+    max_sequence_length: 2047
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+
+  - name: together/olmo-7b-twin-2t
+    model_name: allenai/olmo-7b-twin-2t
+    tokenizer_name: allenai/olmo-7b
+    max_sequence_length: 2047
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+
+  - name: together/olmo-7b-instruct
+    model_name: allenai/olmo-7b-instruct
+    tokenizer_name: allenai/olmo-7b
+    max_sequence_length: 2047
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+
+  - name: huggingface/olmo-1.7-7b
+    model_name: allenai/olmo-1.7-7b
+    tokenizer_name: allenai/OLMo-1.7-7B-hf
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: allenai/OLMo-1.7-7B-hf
+
+  - name: huggingface/olmo-2-1124-7b-instruct
+    model_name: allenai/olmo-2-1124-7b-instruct
+    tokenizer_name: allenai/olmo-2-1124-7b-instruct
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+
+  - name: huggingface/olmo-2-1124-13b-instruct
+    model_name: allenai/olmo-2-1124-13b-instruct
+    tokenizer_name: allenai/olmo-2-1124-7b-instruct
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+
+  - name: huggingface/olmo-2-0325-32b-instruct
+    model_name: allenai/olmo-2-0325-32b-instruct
+    tokenizer_name: allenai/olmo-2-0325-32b-instruct
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+
+  - name: huggingface/olmoe-1b-7b-0125-instruct
+    model_name: allenai/olmoe-1b-7b-0125-instruct
+    tokenizer_name: allenai/olmoe-1b-7b-0125-instruct
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+
+  ## MistralAI
+  - name: together/mistral-7b-v0.1
+    model_name: mistralai/mistral-7b-v0.1
+    tokenizer_name: mistralai/Mistral-7B-v0.1
+    max_sequence_length: 4095 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: mistralai/Mistral-7B-v0.1
+
+  - name: together/mistral-7b-instruct-v0.1
+    model_name: mistralai/mistral-7b-instruct-v0.1
+    tokenizer_name: mistralai/Mistral-7B-Instruct-v0.1
+    max_sequence_length: 4000
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+
+  - name: together/mistral-7b-instruct-v0.2
+    model_name: mistralai/mistral-7b-instruct-v0.2
+    tokenizer_name: mistralai/Mistral-7B-Instruct-v0.2
+    max_sequence_length: 32000
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+
+  - name: huggingface/mistral-7b-instruct-v0.3
+    model_name: mistralai/mistral-7b-instruct-v0.3-hf
+    tokenizer_name: mistralai/Mistral-7B-Instruct-v0.3
+    max_sequence_length: 32000
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: mistralai/Mistral-7B-Instruct-v0.3
+
+  - name: together/mistral-7b-instruct-v0.3
+    model_name: mistralai/mistral-7b-instruct-v0.3
+    tokenizer_name: mistralai/Mistral-7B-Instruct-v0.3
+    max_sequence_length: 32000
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+      
+
+  - name: together/mixtral-8x7b-32kseqlen
+    model_name: mistralai/mixtral-8x7b-32kseqlen
+    tokenizer_name: mistralai/Mistral-7B-v0.1
+    max_sequence_length: 4095 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: mistralai/mixtral-8x7b-32kseqlen
+
+  - name: together/mixtral-8x7b-instruct-v0.1
+    model_name: mistralai/mixtral-8x7b-instruct-v0.1
+    tokenizer_name: mistralai/Mistral-7B-v0.1
+    max_sequence_length: 32767
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+
+  - name: together/mixtral-8x22b
+    model_name: mistralai/mixtral-8x22b
+    tokenizer_name: mistralai/Mistral-7B-v0.1
+    max_sequence_length: 65535
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+
+  - name: together/mixtral-8x22b-instruct-v0.1
+    model_name: mistralai/mixtral-8x22b-instruct-v0.1
+    tokenizer_name: mistralai/Mistral-7B-v0.1
+    max_sequence_length: 65535
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+
+
+  ## Snowflake
+  - name: together/snowflake-arctic-instruct
+    model_name: snowflake/snowflake-arctic-instruct
+    tokenizer_name: snowflake/snowflake-arctic-instruct
+    max_sequence_length: 4000  # Lower than 4096 because of chat tokens
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+
+  ## Stanford
+  - name: together/alpaca-7b
+    model_name: stanford/alpaca-7b
+    tokenizer_name: hf-internal-testing/llama-tokenizer
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: togethercomputer/alpaca-7b
+
+  ## Tiiuae
+  - name: together/falcon-7b
+    model_name: tiiuae/falcon-7b
+    tokenizer_name: tiiuae/falcon-7b
+    max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: togethercomputer/falcon-7b
+
+  - name: together/falcon-7b-instruct
+    model_name: tiiuae/falcon-7b-instruct
+    tokenizer_name: tiiuae/falcon-7b
+    max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: togethercomputer/falcon-7b-instruct
+
+  - name: together/falcon-40b
+    model_name: tiiuae/falcon-40b
+    tokenizer_name: tiiuae/falcon-7b
+    max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: togethercomputer/falcon-40b
+
+  - name: together/falcon-40b-instruct
+    model_name: tiiuae/falcon-40b-instruct
+    tokenizer_name: tiiuae/falcon-7b
+    max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: togethercomputer/falcon-40b-instruct
+
+  ## Together
+  # These are models fine-tuned by Together (and not simply hosted by Together).
+  - name: together/gpt-jt-6b-v1
+    model_name: together/gpt-jt-6b-v1
+    tokenizer_name: EleutherAI/gpt-j-6B
+    max_sequence_length: 2048
+    max_request_length: 2049
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: togethercomputer/GPT-JT-6B-v1
+
+  - name: together/gpt-neoxt-chat-base-20b
+    model_name: together/gpt-neoxt-chat-base-20b
+    tokenizer_name: EleutherAI/gpt-neox-20b
+    max_sequence_length: 2048
+    max_request_length: 2049
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: togethercomputer/GPT-NeoXT-Chat-Base-20B
+
+  - name: together/redpajama-incite-base-3b-v1
+    model_name: together/redpajama-incite-base-3b-v1
+    tokenizer_name: EleutherAI/gpt-neox-20b
+    max_sequence_length: 2048
+    max_request_length: 2049
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: togethercomputer/RedPajama-INCITE-Base-3B-v1
+
+  - name: together/redpajama-incite-instruct-3b-v1
+    model_name: together/redpajama-incite-instruct-3b-v1
+    tokenizer_name: EleutherAI/gpt-neox-20b
+    max_sequence_length: 2048
+    max_request_length: 2049
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: togethercomputer/RedPajama-INCITE-Instruct-3B-v1
+
+  - name: together/redpajama-incite-base-7b
+    model_name: together/redpajama-incite-base-7b
+    tokenizer_name: EleutherAI/gpt-neox-20b
+    max_sequence_length: 2048
+    max_request_length: 2049
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: togethercomputer/RedPajama-INCITE-7B-Base
+
+  - name: together/redpajama-incite-instruct-7b
+    model_name: together/redpajama-incite-instruct-7b
+    tokenizer_name: EleutherAI/gpt-neox-20b
+    max_sequence_length: 2048
+    max_request_length: 2049
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: togethercomputer/RedPajama-INCITE-7B-Instruct
+
+  ## Z.ai
+  - name: together/glm-4.5-air-fp8
+    model_name: zai-org/glm-4.5-air-fp8
+    tokenizer_name: zai-org/glm-4.5-air-fp8
+    max_sequence_length: 131072
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+      args:
+        parse_thinking: true
+
+  - name: thudm/cogview2
+    model_name: thudm/cogview2
+    tokenizer_name: openai/clip-vit-large-patch14
+    max_sequence_length: 75
+    client_spec:
+      class_name: "helm.clients.image_generation.cogview2_client.CogView2Client"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
+
+  ## Yandex
+  - name: together/yalm
+    deprecated: true  # Removed from Together
+    model_name: yandex/yalm
+    tokenizer_name: Yandex/yalm
+    max_sequence_length: 2048
+    max_request_length: 2049
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.yalm_window_service.YaLMWindowService"
+
+  # Writer
+  - name: writer/palmyra-base
+    model_name: writer/palmyra-base
+    tokenizer_name: writer/gpt2
+    max_sequence_length: 2048
+    max_sequence_and_generated_tokens_length: 2048
+    client_spec:
+      class_name: "helm.clients.palmyra_client.PalmyraClient"
+
+  - name: writer/palmyra-large
+    model_name: writer/palmyra-large
+    tokenizer_name: writer/gpt2
+    max_sequence_length: 2048
+    max_sequence_and_generated_tokens_length: 2048
+    client_spec:
+      class_name: "helm.clients.palmyra_client.PalmyraClient"
+
+  - name: writer/silk-road
+    model_name: writer/silk-road
+    tokenizer_name: writer/gpt2
+    max_sequence_length: 8192
+    max_sequence_and_generated_tokens_length: 8192
+    client_spec:
+      class_name: "helm.clients.palmyra_client.PalmyraClient"
+
+  - name: writer/palmyra-x
+    model_name: writer/palmyra-x
+    tokenizer_name: writer/gpt2
+    max_sequence_length: 8192
+    max_sequence_and_generated_tokens_length: 8192
+    client_spec:
+      class_name: "helm.clients.palmyra_client.PalmyraClient"
+
+  - name: writer/palmyra-x-v2
+    model_name: writer/palmyra-x-v2
+    tokenizer_name: writer/gpt2
+    max_sequence_length: 6000
+    max_sequence_and_generated_tokens_length: 7024
+    client_spec:
+      class_name: "helm.clients.palmyra_client.PalmyraClient"
+
+  - name: writer/palmyra-x-v3
+    model_name: writer/palmyra-x-v3
+    tokenizer_name: writer/gpt2
+    max_sequence_length: 6000
+    max_sequence_and_generated_tokens_length: 7024
+    client_spec:
+      class_name: "helm.clients.palmyra_client.PalmyraClient"
+
+  - name: writer/palmyra-x-32k
+    model_name: writer/palmyra-x-32k
+    tokenizer_name: writer/gpt2
+    max_sequence_length: 28000
+    max_sequence_and_generated_tokens_length: 30048
+    client_spec:
+      class_name: "helm.clients.palmyra_client.PalmyraClient"
+
+  - name: writer/palmyra-vision-003
+    model_name: writer/palmyra-vision-003
+    tokenizer_name: writer/gpt2
+    max_sequence_length: 2048
+    max_sequence_and_generated_tokens_length: 2048
+    client_spec:
+      class_name: "helm.clients.vision_language.palmyra_vision_client.PalmyraVisionClient"
+
+  - name: writer/palmyra-x-004
+    model_name: writer/palmyra-x-004
+    # Actual tokenizer is Llama 2, but it cannot be used in HELM due to this issue:
+    # https://github.com/stanford-crfm/helm/issues/2467
+    # Work around by using Llama 3 tokenizer for now.
+    tokenizer_name: meta/llama-3-8b
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.palmyra_client.PalmyraChatClient"
+
+  - name: writer/palmyra-x5
+    model_name: writer/palmyra-x5
+    # See tokenizer comment for writer/palmyra-x-004
+    tokenizer_name: meta/llama-3-8b
+    max_sequence_length: 1000000
+    client_spec:
+      class_name: "helm.clients.writer_client.WriterClient"
+
+  - name: writer/palmyra-med-32k
+    model_name: writer/palmyra-med-32k
+    # Palmyra-Med uses the "<|end_of_text|>" as the end of text token, which is used by meta/llama-3-8b,
+    # rather than "<|eot_id|>", which is used by meta/llama-3-8b-instruct
+    tokenizer_name: meta/llama-3-8b
+    max_sequence_length: 32000
+    client_spec:
+      class_name: "helm.clients.palmyra_client.PalmyraChatClient"
+
+  - name: writer/palmyra-med
+    model_name: writer/palmyra-med
+    tokenizer_name: meta/llama-3-8b
+    max_sequence_length: 32000
+    client_spec:
+      class_name: "helm.clients.writer_client.WriterClient"
+
+  - name: writer/palmyra-fin-32k
+    model_name: writer/palmyra-fin-32k
+    tokenizer_name: meta/llama-3-8b-instruct
+    max_sequence_length: 32000
+    client_spec:
+      class_name: "helm.clients.palmyra_client.PalmyraChatClient"
+
+  - name: writer/palmyra-fin
+    model_name: writer/palmyra-fin
+    tokenizer_name: meta/llama-3-8b-instruct
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.palmyra_client.PalmyraChatClient"
+
+
+  # xAI
+
+  - name: xai/grok-3-beta
+    model_name: xai/grok-3-beta
+    tokenizer_name: xai/grok-3-beta
+    max_sequence_length: 131072
+    client_spec:
+      class_name: "helm.clients.grok_client.GrokChatClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
+
+  - name: xai/grok-3-mini-beta
+    model_name: xai/grok-3-mini-beta
+    tokenizer_name: xai/grok-3-mini-beta
+    max_sequence_length: 131072
+    client_spec:
+      class_name: "helm.clients.grok_client.GrokChatClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
+
+  - name: xai/grok-4-0709
+    model_name: xai/grok-4-0709
+    tokenizer_name: xai/grok-4-0709
+    max_sequence_length: 256000
+    client_spec:
+      class_name: "helm.clients.grok_client.GrokChatClient"
+    window_service_spec:
+      class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
+
+  # Qwen
+
+  - name: together/qwen-7b
+    model_name: qwen/qwen-7b
+    tokenizer_name: qwen/qwen-7b
+    max_sequence_length: 32767
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: togethercomputer/Qwen-7B
+
+  - name: together/qwen1.5-7b
+    model_name: qwen/qwen1.5-7b
+    tokenizer_name: qwen/qwen1.5-7b
+    max_sequence_length: 32767
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: Qwen/Qwen1.5-7B
+
+  - name: together/qwen1.5-14b
+    model_name: qwen/qwen1.5-14b
+    tokenizer_name: qwen/qwen1.5-7b
+    max_sequence_length: 32767
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: Qwen/Qwen1.5-14B
+
+  - name: together/qwen1.5-32b
+    model_name: qwen/qwen1.5-32b
+    tokenizer_name: qwen/qwen1.5-7b
+    max_sequence_length: 32767
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: Qwen/Qwen1.5-32B
+
+  - name: together/qwen1.5-72b
+    model_name: qwen/qwen1.5-72b
+    tokenizer_name: qwen/qwen1.5-7b
+    max_sequence_length: 32767
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: Qwen/Qwen1.5-72B
+
+  - name: together/qwen1.5-7b-chat
+    model_name: qwen/qwen1.5-7b-chat
+    tokenizer_name: qwen/qwen1.5-7b
+    max_sequence_length: 32767
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+
+  - name: together/qwen1.5-14b-chat
+    model_name: qwen/qwen1.5-14b-chat
+    tokenizer_name: qwen/qwen1.5-7b
+    max_sequence_length: 32767
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+
+  - name: together/qwen1.5-32b-chat
+    model_name: qwen/qwen1.5-32b-chat
+    tokenizer_name: qwen/qwen1.5-7b
+    max_sequence_length: 32767
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+
+  - name: together/qwen1.5-72b-chat
+    model_name: qwen/qwen1.5-72b-chat
+    tokenizer_name: qwen/qwen1.5-7b
+    max_sequence_length: 32767
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+
+  - name: together/qwen1.5-110b-chat
+    model_name: qwen/qwen1.5-110b-chat
+    tokenizer_name: qwen/qwen1.5-7b
+    max_sequence_length: 32767
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+
+  - name: together/qwen2-72b-instruct
+    model_name: qwen/qwen2-72b-instruct
+    tokenizer_name: qwen/qwen2-72b-instruct
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+
+  - name: together/qwen2.5-7b-instruct-turbo
+    model_name: qwen/qwen2.5-7b-instruct-turbo
+    tokenizer_name: qwen/qwen2.5-7b-instruct
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+
+  - name: together/qwen2.5-72b-instruct-turbo
+    model_name: qwen/qwen2.5-72b-instruct-turbo
+    tokenizer_name: qwen/qwen2.5-7b-instruct
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+
+  - name: together/qwen3-235b-a22b-fp8-tput
+    model_name: qwen/qwen3-235b-a22b-fp8-tput
+    tokenizer_name: qwen/qwen3-235b-a22b
+    max_sequence_length: 40960
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+      args:
+        parse_thinking: true
+
+  - name: together/qwen3-235b-a22b-instruct-2507-fp8
+    model_name: qwen/qwen3-235b-a22b-instruct-2507-fp8
+    tokenizer_name: qwen/qwen3-235b-a22b-instruct-2507-fp8
+    max_sequence_length: 262144
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+      args:
+        together_model: Qwen/Qwen3-235B-A22B-Instruct-2507-tput
+
+  - name: huggingface/qwen2.5-7b-instruct-4bit
+    model_name: qwen/qwen2.5-7b-instruct
+    tokenizer_name: qwen/qwen2.5-7b-instruct
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: Qwen/Qwen2.5-7B-Instruct
+        torch_dtype: "float16"
+        quantization_config:
+          load_in_4bit: true
+        attn_implementation: "flash_attention_2"
+
+  - name: huggingface/qwen2.5-7b-instruct
+    model_name: qwen/qwen2.5-7b-instruct
+    tokenizer_name: qwen/qwen2.5-7b-instruct
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: Qwen/Qwen2.5-7B-Instruct
+
+  - name: huggingface/smollm2-135m
+    model_name: huggingface/smollm2-135m
+    tokenizer_name: huggingface/smollm2-135m
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.huggingface_pipeline_client.HuggingFacePipelineClient"
+      args:
+        pretrained_model_name_or_path: HuggingFaceTB/SmolLM2-135M
+
+  - name: huggingface/smollm2-360m
+    model_name: huggingface/smollm2-360m
+    tokenizer_name: huggingface/smollm2-135m
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.huggingface_pipeline_client.HuggingFacePipelineClient"
+      args:
+        pretrained_model_name_or_path: HuggingFaceTB/SmolLM2-360M
+
+  - name: huggingface/smollm2-1.7b
+    model_name: huggingface/smollm2-1.7b
+    tokenizer_name: huggingface/smollm2-135m
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.huggingface_pipeline_client.HuggingFacePipelineClient"
+      args:
+        pretrained_model_name_or_path: HuggingFaceTB/SmolLM2-1.7B
+
+  - name: huggingface/smollm2-135m-instruct
+    model_name: huggingface/smollm2-135m-instruct
+    tokenizer_name: huggingface/smollm2-135m-instruct
+    max_sequence_length: 328192768
+    client_spec:
+      class_name: "helm.clients.huggingface_pipeline_client.HuggingFacePipelineClient"
+      args:
+        pretrained_model_name_or_path: HuggingFaceTB/SmolLM2-135M-Instruct
+
+  - name: huggingface/smollm2-360m-instruct
+    model_name: huggingface/smollm2-360m-instruct
+    tokenizer_name: huggingface/smollm2-135m-instruct
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.huggingface_pipeline_client.HuggingFacePipelineClient"
+      args:
+        pretrained_model_name_or_path: HuggingFaceTB/SmolLM2-360M-Instruct
+
+  - name: huggingface/smollm2-1.7b-instruct
+    model_name: huggingface/smollm2-1.7b-instruct
+    tokenizer_name: huggingface/smollm2-135m-instruct
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.huggingface_pipeline_client.HuggingFacePipelineClient"
+      args:
+        pretrained_model_name_or_path: HuggingFaceTB/SmolLM2-1.7B-Instruct
+
+  - name: together/qwq-32b-preview
+    model_name: qwen/qwq-32b-preview
+    tokenizer_name: qwen/qwq-32b-preview
+    max_sequence_length: 32768
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+
+  - name: huggingface/qwen-vl
+    model_name: qwen/qwen-vl
+    tokenizer_name: qwen/qwen-vl
+    max_sequence_length: 8191
+    client_spec:
+      class_name: "helm.clients.vision_language.qwen_vlm_client.QwenVLMClient"
+
+  - name: huggingface/qwen-vl-chat
+    model_name: qwen/qwen-vl-chat
+    tokenizer_name: qwen/qwen-vl-chat
+    max_sequence_length: 8191
+    client_spec:
+      class_name: "helm.clients.vision_language.qwen_vlm_client.QwenVLMClient"
+
+  - name: huggingface/qwen2-vl-7b-instruct
+    model_name: qwen/qwen2-vl-7b-instruct
+    tokenizer_name: qwen/qwen-vl-chat
+    max_sequence_length: 8191
+    client_spec:
+      class_name: "helm.clients.vision_language.qwen2_vlm_client.Qwen2VLMClient"
+
+  - name: huggingface/qwen2-vl-72b-instruct
+    model_name: qwen/qwen2-vl-72b-instruct
+    tokenizer_name: qwen/qwen-vl-chat
+    max_sequence_length: 8191
+    client_spec:
+      class_name: "helm.clients.vision_language.qwen2_vlm_client.Qwen2VLMClient"
+
+  - name: huggingface/qwen2.5-vl-3b-instruct
+    model_name: qwen/qwen2.5-vl-3b-instruct
+    tokenizer_name: qwen/qwen-vl-chat
+    max_sequence_length: 8191
+    client_spec:
+      class_name: "helm.clients.vision_language.qwen2_vlm_client.Qwen2VLMClient"
+
+  - name: huggingface/qwen2.5-vl-7b-instruct
+    model_name: qwen/qwen2.5-vl-7b-instruct
+    tokenizer_name: qwen/qwen-vl-chat
+    max_sequence_length: 8191
+    client_spec:
+      class_name: "helm.clients.vision_language.qwen2_vlm_client.Qwen2VLMClient"
+
+  - name: huggingface/qwen2.5-vl-32b-instruct
+    model_name: qwen/qwen2.5-vl-32b-instruct
+    tokenizer_name: qwen/qwen-vl-chat
+    max_sequence_length: 8191
+    client_spec:
+      class_name: "helm.clients.vision_language.qwen2_vlm_client.Qwen2VLMClient"
+
+  - name: huggingface/qwen2.5-vl-72b-instruct
+    model_name: qwen/qwen2.5-vl-72b-instruct
+    tokenizer_name: qwen/qwen-vl-chat
+    max_sequence_length: 8191
+    client_spec:
+      class_name: "helm.clients.vision_language.qwen2_vlm_client.Qwen2VLMClient"
+
+  - name: huggingface/qwen-audio-chat
+    model_name: qwen/qwen-audio-chat
+    tokenizer_name: qwen/qwen-audio-chat
+    max_sequence_length: 8191
+    client_spec:
+      class_name: "helm.clients.audio_language.qwen_audiolm_client.QwenAudioLMClient"
+
+  - name: huggingface/qwen2-audio-7b-instruct
+    model_name: qwen/qwen2-audio-7b-instruct
+    tokenizer_name: qwen/qwen2-audio-instruct
+    max_sequence_length: 8191
+    client_spec:
+      class_name: "helm.clients.audio_language.qwen2_audiolm_client.Qwen2AudioLMClient"
+
+  - name: huggingface/qwen2.5-omni-7b
+    model_name: qwen/qwen2.5-omni-7b
+    tokenizer_name: qwen/qwen2.5-omni-7b
+    max_sequence_length: 8191
+    client_spec:
+      class_name: "helm.clients.audio_language.qwen2_5_omni_client.Qwen2_5OmniAudioLMClient"
+
+# Reka
+  - name: reka/reka-core
+    model_name: reka/reka-core
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.reka_client.RekaClient"
+  
+  - name: reka/reka-core-20240415
+    model_name: reka/reka-core-20240415
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.reka_client.RekaClient"
+  
+  - name: reka/reka-core-20240501
+    model_name: reka/reka-core-20240501
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.reka_client.RekaClient"
+
+  - name: reka/reka-flash
+    model_name: reka/reka-flash
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.reka_client.RekaClient"
+
+  - name: reka/reka-flash-20240226
+    model_name: reka/reka-flash-20240226
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.reka_client.RekaClient"
+
+  - name: reka/reka-edge
+    model_name: reka/reka-edge
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 64000
+    client_spec:
+      class_name: "helm.clients.reka_client.RekaClient"
+
+  - name: reka/reka-edge-20240208
+    model_name: reka/reka-edge-20240208
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 64000
+    client_spec:
+      class_name: "helm.clients.reka_client.RekaClient"
+
+  # Upstage
+  - name: upstage/solar-pro-241126
+    model_name: upstage/solar-pro-241126
+    tokenizer_name: upstage/solar-pro-preview-instruct
+    max_sequence_length: 32768
+    client_spec:
+      class_name: "helm.clients.upstage_client.UpstageChatClient"
+
+# Diva Llama
+  - name: huggingface/diva-llama
+    model_name: stanford/diva-llama
+    # TODO: Set the right tokenizer
+    tokenizer_name: meta/llama-3-8b-instruct
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.audio_language.diva_llama_client.DivaLlamaClient"
+
+# LLaMA-Omni
+  - name: ictnlp/llama-3.1-8b-omni
+    model_name: ictnlp/llama-3.1-8b-omni
+    tokenizer_name: ictnlp/llama-3.1-8b-omni
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.audio_language.llama_omni_client.LlamaOmniAudioLMClient"
+
+# IBM - Granite 3.0
+  - name: huggingface/granite-3.0-2b-base
+    model_name: ibm-granite/granite-3.0-2b-base
+    tokenizer_name: ibm-granite/granite-3.0-2b-base
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: ibm-granite/granite-3.0-2b-base 
+    
+  - name: huggingface/granite-3.0-2b-instruct
+    model_name: ibm-granite/granite-3.0-2b-instruct
+    tokenizer_name: ibm-granite/granite-3.0-2b-instruct
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: ibm-granite/granite-3.0-2b-instruct
+
+  - name: huggingface/granite-3.0-8b-instruct
+    model_name: ibm-granite/granite-3.0-8b-instruct
+    tokenizer_name: ibm-granite/granite-3.0-8b-instruct
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: ibm-granite/granite-3.0-8b-instruct
+  
+  - name: huggingface/granite-3.0-8b-base
+    model_name: ibm-granite/granite-3.0-8b-base
+    tokenizer_name: ibm-granite/granite-3.0-8b-base
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: ibm-granite/granite-3.0-8b-base
+
+  - name: huggingface/granite-3.0-3b-a800m-instruct
+    model_name: ibm-granite/granite-3.0-3b-a800m-instruct
+    tokenizer_name: ibm-granite/granite-3.0-3b-a800m-instruct
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: ibm-granite/granite-3.0-3b-a800m-instruct
+
+  - name: huggingface/granite-3.0-3b-a800m-base
+    model_name: ibm-granite/granite-3.0-3b-a800m-base
+    tokenizer_name: ibm-granite/granite-3.0-3b-a800m-base
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: ibm-granite/granite-3.0-3b-a800m-base
+
+  - name: huggingface/granite-3.0-1b-a400m-instruct
+    model_name: ibm-granite/granite-3.0-1b-a400m-instruct
+    tokenizer_name: ibm-granite/granite-3.0-1b-a400m-instruct
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: ibm-granite/granite-3.0-1b-a400m-instruct
+
+  - name: huggingface/granite-3.0-1b-a400m-base
+    model_name: ibm-granite/granite-3.0-1b-a400m-base
+    tokenizer_name: ibm-granite/granite-3.0-1b-a400m-base
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: ibm-granite/granite-3.0-1b-a400m-base
+
+# Maritaca AI
+  - name: huggingface/sabia-7b
+    model_name: maritaca-ai/sabia-7b
+    tokenizer_name: maritaca-ai/sabia-7b
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+    args:
+      pretrained_model_name_or_path: maritaca-ai/sabia-7b
+
+  - name: maritaca-ai/sabiazinho-3
+    model_name: maritaca-ai/sabiazinho-3
+    tokenizer_name: maritaca-ai/sabia-2-tokenizer-medium
+    max_sequence_length: 32000
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+
+  - name: maritaca-ai/sabia-3
+    model_name: maritaca-ai/sabia-3
+    tokenizer_name: maritaca-ai/sabia-2-tokenizer-medium
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+
+  - name: maritaca-ai/sabia-3.1-2025-05-08
+    model_name: maritaca-ai/sabia-3.1-2025-05-08
+    tokenizer_name: maritaca-ai/sabia-2-tokenizer-medium
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+
+# Granite-3.1-8b-base
+  - name: huggingface/granite-3.1-8b-base
+    model_name: ibm-granite/granite-3.1-8b-base
+    tokenizer_name: ibm-granite/granite-3.1-8b-base
+    max_sequence_length: 128000
+    client_spec:
+        class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+        args:
+            pretrained_model_name_or_path: ibm-granite/granite-3.1-8b-base
+
+# Granite-3.1-8b-instruct
+  - name: huggingface/granite-3.1-8b-instruct
+    model_name: ibm-granite/granite-3.1-8b-instruct
+    tokenizer_name: ibm-granite/granite-3.1-8b-instruct
+    max_sequence_length: 128000
+    client_spec:
+        class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+        args:
+            pretrained_model_name_or_path: ibm-granite/granite-3.1-8b-instruct
+
+# Granite-3.1-2b-instruct
+  - name: huggingface/granite-3.1-2b-instruct
+    model_name: ibm-granite/granite-3.1-2b-instruct
+    tokenizer_name: ibm-granite/granite-3.1-2b-instruct
+    max_sequence_length: 128000
+    client_spec:
+        class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+        args:
+            pretrained_model_name_or_path: ibm-granite/granite-3.1-2b-instruct
+
+# Granite-3.1-2b-base
+  - name: huggingface/granite-3.1-2b-base
+    model_name: ibm-granite/granite-3.1-2b-base
+    tokenizer_name: ibm-granite/granite-3.1-2b-base
+    max_sequence_length: 128000
+    client_spec:
+        class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+        args:
+            pretrained_model_name_or_path: ibm-granite/granite-3.1-2b-base
+
+# Granite-3.1-3b-a800m-instruct
+  - name: huggingface/granite-3.1-3b-a800m-instruct
+    model_name: ibm-granite/granite-3.1-3b-a800m-instruct
+    tokenizer_name: ibm-granite/granite-3.1-3b-a800m-instruct
+    max_sequence_length: 128000
+    client_spec:
+        class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+        args:
+            pretrained_model_name_or_path: ibm-granite/granite-3.1-3b-a800m-instruct
+
+# Granite-3.1-3b-a800m-base
+  - name: huggingface/granite-3.1-3b-a800m-base
+    model_name: ibm-granite/granite-3.1-3b-a800m-base
+    tokenizer_name: ibm-granite/granite-3.1-3b-a800m-base
+    max_sequence_length: 128000
+    client_spec:
+        class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+        args:
+            pretrained_model_name_or_path: ibm-granite/granite-3.1-3b-a800m-base
+
+# Granite-3.1-1b-a400m-instruct
+  - name: huggingface/granite-3.1-1b-a400m-instruct
+    model_name: ibm-granite/granite-3.1-1b-a400m-instruct
+    tokenizer_name: ibm-granite/granite-3.1-1b-a400m-instruct
+    max_sequence_length: 128000
+    client_spec:
+        class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+        args:
+            pretrained_model_name_or_path: ibm-granite/granite-3.1-1b-a400m-instruct
+
+# Granite-3.1-1b-a400m-base
+  - name: huggingface/granite-3.1-1b-a400m-base
+    model_name: ibm-granite/granite-3.1-1b-a400m-base
+    tokenizer_name: ibm-granite/granite-3.1-1b-a400m-base
+    max_sequence_length: 128000
+    client_spec:
+        class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+        args:
+            pretrained_model_name_or_path: ibm-granite/granite-3.1-1b-a400m-base
+
+# DeepSeek-R1-Distill-Llama-3.1-8b
+  - name: huggingface/DeepSeek-R1-Distill-Llama-8B
+    model_name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
+    tokenizer_name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
+    max_sequence_length: 128000
+    client_spec:
+        class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+        args:
+            pretrained_model_name_or_path: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
+
+# deepseek-ai/deepseek-coder-6.7b-instruct
+  - name: huggingface/deepseek-coder-6.7b-instruct
+    model_name: deepseek-ai/deepseek-coder-6.7b-instruct
+    tokenizer_name: deepseek-ai/deepseek-coder-6.7b-instruct
+    max_sequence_length: 128000
+    client_spec:
+        class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+        args:
+            pretrained_model_name_or_path: deepseek-ai/deepseek-coder-6.7b-instruct
+
+# IBM WatsonX
+  - name: ibm/llama-3.3-70b-instruct
+    model_name: meta/llama-3.3-70b-instruct
+    tokenizer_name: meta/llama-3.3-70b-instruct
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.ibm_client.IbmChatClient"
+      args:
+        watsonx_model_name: meta-llama/llama-3-3-70b-instruct
+        region: Dallas
+
+  - name: ibm/granite-3-2b-instruct
+    model_name: ibm/granite-3.1-2b-instruct
+    tokenizer_name: ibm-granite/granite-3.1-2b-instruct
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.ibm_client.IbmChatClient"
+      args:
+        watsonx_model_name: ibm/granite-3-2b-instruct
+        region: Dallas
+
+  - name: ibm/granite-3-8b-instruct
+    model_name: ibm/granite-3.1-8b-instruct
+    tokenizer_name: ibm-granite/granite-3.1-8b-instruct
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.ibm_client.IbmChatClient"
+      args:
+        watsonx_model_name: ibm/granite-3-8b-instruct
+        region: Dallas
+
+  - name: ibm/granite-13b-instruct-v2
+    model_name: ibm/granite-13b-instruct-v2
+    tokenizer_name: EleutherAI/gpt-neox-20b
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.ibm_client.IbmTextClient"
+      args:
+        watsonx_model_name: ibm/granite-13b-instruct-v2
+        region: Dallas
+
+  - name: ibm/granite-20b-code-instruct-8k
+    model_name: ibm/granite-20b-code-instruct-8k
+    tokenizer_name: ibm-granite/granite-20b-code-instruct-8k
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.ibm_client.IbmChatClient"
+      args:
+        watsonx_model_name: ibm/granite-20b-code-instruct
+        region: Dallas
+
+  - name: ibm/granite-34b-code-instruct
+    model_name: ibm/granite-34b-code-instruct
+    tokenizer_name: ibm-granite/granite-34b-code-instruct-8k
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.ibm_client.IbmChatClient"
+      args:
+        watsonx_model_name: ibm/granite-34b-code-instruct
+        region: Dallas
+
+  - name: ibm/granite-3b-code-instruct
+    model_name: ibm/granite-3b-code-instruct
+    tokenizer_name: ibm-granite/granite-3b-code-instruct-128k
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.ibm_client.IbmTextClient"
+      args:
+        watsonx_model_name: ibm/granite-3b-code-instruct
+        region: Dallas
+
+  - name: ibm/granite-8b-code-instruct
+    model_name: ibm/granite-8b-code-instruct
+    tokenizer_name: ibm-granite/granite-8b-code-instruct-128k
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.ibm_client.IbmTextClient"
+      args:
+        watsonx_model_name: ibm/granite-8b-code-instruct
+        region: Dallas
+
+  - name: ibm/granite-3.3-8b-instruct
+    model_name: ibm/granite-3.3-8b-instruct
+    tokenizer_name: ibm/granite-3.3-8b-instruct
+    max_sequence_length: 131072
+    client_spec:
+      class_name: "helm.clients.ibm_client.IbmTextClient"
+      args:
+        watsonx_model_name: ibm/granite-3-3-8b-instruct
+        region: Dallas
+
+  # Vietnamese
+  - name: ura-hcmut/ura-llama-2.1-8b
+    model_name: ura-hcmut/ura-llama-2.1-8b
+    tokenizer_name: meta/llama-3.1-8b-instruct
+    max_sequence_length: 131072
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: ura-hcmut/ura-llama-2.1-8b
+
+  - name: ura-hcmut/ura-llama-2-8b
+    model_name: ura-hcmut/ura-llama-2-8b
+    tokenizer_name: meta/llama-3-8b-instruct
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: ura-hcmut/ura-llama-2-8b
+
+  - name: ura-hcmut/ura-llama-7b
+    model_name: ura-hcmut/ura-llama-7b
+    tokenizer_name: meta-llama/Llama-2-7b-hf
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: ura-hcmut/ura-llama-7b
+
+  - name: ura-hcmut/ura-llama-13b
+    model_name: ura-hcmut/ura-llama-13b
+    tokenizer_name: meta-llama/Llama-2-7b-hf
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: ura-hcmut/ura-llama-13b
+
+  - name: ura-hcmut/ura-llama-70b
+    model_name: ura-hcmut/ura-llama-70b
+    tokenizer_name: meta-llama/Llama-2-7b-hf
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: ura-hcmut/ura-llama-70b
+
+  - name: ura-hcmut/GemSUra-7B
+    model_name: ura-hcmut/GemSUra-7B
+    tokenizer_name: google/gemma-2b
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: ura-hcmut/GemSUra-7B
+
+  - name: ura-hcmut/GemSUra-2B
+    model_name: ura-hcmut/GemSUra-2B
+    tokenizer_name: google/gemma-2b
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: ura-hcmut/GemSUra-2B
+
+  - name: ura-hcmut/MixSUra
+    model_name: ura-hcmut/MixSUra
+    tokenizer_name: mistralai/Mistral-7B-v0.1
+    max_sequence_length: 32768
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: ura-hcmut/MixSUra
+
+  - name: vilm/vinallama-7b-chat
+    model_name: vilm/vinallama-7b-chat
+    tokenizer_name: vilm/vinallama-7b-chat
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: vilm/vinallama-7b-chat
+
+  - name: vilm/vinallama-2.7b-chat
+    model_name: vilm/vinallama-2.7b-chat
+    tokenizer_name: vilm/vinallama-2.7b-chat
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: vilm/vinallama-2.7b-chat
+
+  - name: vilm/vietcuna-7b-v3
+    model_name: vilm/vietcuna-7b-v3
+    tokenizer_name: vilm/vietcuna-7b-v3
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: vilm/vietcuna-7b-v3
+
+  - name: vilm/vietcuna-3b-v2
+    model_name: vilm/vietcuna-3b-v2
+    tokenizer_name: vilm/vietcuna-7b-v3
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: vilm/vietcuna-3b-v2
+
+  - name: vilm/Quyen-v0.1
+    model_name: vilm/Quyen-v0.1
+    tokenizer_name: qwen/qwen2-72b-instruct
+    max_sequence_length: 32768
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: vilm/Quyen-v0.1
+
+  - name: vilm/Quyen-Plus-v0.1
+    model_name: vilm/Quyen-Plus-v0.1
+    tokenizer_name: qwen/qwen2-72b-instruct
+    max_sequence_length: 32768
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: vilm/Quyen-Plus-v0.1
+
+  - name: vilm/Quyen-Pro-v0.1
+    model_name: vilm/Quyen-Pro-v0.1
+    tokenizer_name: qwen/qwen2-72b-instruct
+    max_sequence_length: 32768
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: vilm/Quyen-Pro-v0.1
+  
+  - name: vilm/Quyen-Pro-Max-v0.1
+    model_name: vilm/Quyen-Pro-Max-v0.1
+    tokenizer_name: qwen/qwen2-72b-instruct
+    max_sequence_length: 32768
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: vilm/Quyen-Pro-Max-v0.1
+
+  - name: vilm/Quyen-Mini-v0.1
+    model_name: vilm/Quyen-Mini-v0.1
+    tokenizer_name: qwen/qwen2-72b-instruct
+    max_sequence_length: 32768
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: vilm/Quyen-Mini-v0.1
+
+  - name: vilm/Quyen-SE-v0.1
+    model_name: vilm/Quyen-SE-v0.1
+    tokenizer_name: qwen/qwen2-72b-instruct
+    max_sequence_length: 32768
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: vilm/Quyen-SE-v0.1
+
+  - name: Viet-Mistral/Vistral-7B-Chat
+    model_name: Viet-Mistral/Vistral-7B-Chat
+    tokenizer_name: Viet-Mistral/Vistral-7B-Chat
+    max_sequence_length: 32768
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: Viet-Mistral/Vistral-7B-Chat
+
+  - name: vinai/PhoGPT-7B5-Instruct
+    model_name: vinai/PhoGPT-7B5-Instruct
+    tokenizer_name: vinai/PhoGPT-7B5-Instruct
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: vinai/PhoGPT-7B5-Instruct
+
+  - name: vinai/PhoGPT-4B-Chat
+    model_name: vinai/PhoGPT-4B-Chat
+    tokenizer_name: vinai/PhoGPT-4B-Chat
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: vinai/PhoGPT-4B-Chat
+
+  - name: huggingface/Gemma-3-Gaia-PT-BR-4b-it
+    model_name: CEIA-UFG/Gemma-3-Gaia-PT-BR-4b-it
+    tokenizer_name: CEIA-UFG/Gemma-3-Gaia-PT-BR-4b-it
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: CEIA-UFG/Gemma-3-Gaia-PT-BR-4b-it
+
+  - name: recogna-nlp/bode-13b-alpaca-pt-br-no-peft
+    model_name: recogna-nlp/bode-13b-alpaca-pt-br-no-peft
+    tokenizer_name: recogna-nlp/bode-13b-alpaca-pt-br-no-peft
+    max_sequence_length: 4094
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: recogna-nlp/bode-13b-alpaca-pt-br-no-peft
+
+  - name: 22h/cabrita_7b_pt_850000
+    model_name: 22h/cabrita_7b_pt_850000
+    tokenizer_name: 22h/cabrita_7b_pt_850000
+    max_sequence_length: 4094
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: 22h/cabrita_7b_pt_850000
+
+  - name: PORTULAN/gervasio-7b-portuguese-ptbr-decoder
+    model_name: PORTULAN/gervasio-7b-portuguese-ptbr-decoder
+    tokenizer_name: PORTULAN/gervasio-7b-portuguese-ptbr-decoder
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: PORTULAN/gervasio-7b-portuguese-ptbr-decoder
+
+  - name: TucanoBR/Tucano-2b4
+    model_name: TucanoBR/Tucano-2b4
+    tokenizer_name: TucanoBR/Tucano-2b4
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: TucanoBR/Tucano-2b4
+
+  - name: nicholasKluge/TeenyTinyLlama-460m
+    model_name: nicholasKluge/TeenyTinyLlama-460m
+    tokenizer_name: nicholasKluge/TeenyTinyLlama-460m
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        pretrained_model_name_or_path: nicholasKluge/TeenyTinyLlama-460m
+
+  - name: openrouter/mistral-medium-3.1
+    model_name: mistralai/mistral-medium-3.1
+    tokenizer_name: mistralai/Mistral-7B-v0.1
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.openrouter_client.OpenRouterClient"
+      args:
+        model_name: mistralai/mistral-medium-3.1
+
+        
+  - name: proxy_tuning/llama-70b-chat_none_none_1.0_logits_20
+    model_name: proxy_tuning/llama-70b-chat_none_none_1.0_logits_20
+    tokenizer_name: proxy_tuning/llama-7b-chat
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
+
+  - name: proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20
+    model_name: proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20
+    tokenizer_name: proxy_tuning/llama-7b-chat
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
+
+  - name: proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20
+    model_name: proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20
+    tokenizer_name: proxy_tuning/llama-7b-chat
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
+      
+  - name: proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20
+    model_name: proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20
+    tokenizer_name: proxy_tuning/qwen3-30b
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
+     
+  - name: proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20
+    model_name: proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20
+    tokenizer_name: proxy_tuning/qwen3-30b
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
+
+  - name: proxy_tuning/qwen3-30b_mellama-13b-base_llama-13b-base_1.0_logprobs_20
+    model_name: proxy_tuning/qwen3-30b_mellama-13b-base_llama-13b-base_1.0_logprobs_20
+    tokenizer_name: proxy_tuning/qwen3-30b
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
+
+  - name: proxy_tuning/qwen3-30b_mellama-13b-chat_mellama-13b-base_1.0_logprobs_20
+    model_name: proxy_tuning/qwen3-30b_mellama-13b-chat_mellama-13b-base_1.0_logprobs_20
+    tokenizer_name: proxy_tuning/qwen3-30b
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
+
+  - name: proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20
+    model_name: proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20
+    tokenizer_name: proxy_tuning/llama-7b-chat
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
+
+  - name: proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20
+    model_name: proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20
+    tokenizer_name: proxy_tuning/qwen3-30b
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
\ No newline at end of file
diff --git a/prod_env/model_metadata.yaml b/prod_env/model_metadata.yaml
new file mode 100644
index 00000000000..8ac35fe5ba6
--- /dev/null
+++ b/prod_env/model_metadata.yaml
@@ -0,0 +1,4934 @@
+# This file defines all the models officially supported by the Helm API.
+# The model names here should match the model names in model_deployments.yaml.
+
+# If you want to add a new model, you can technically do it here but we recommend
+# you to do it in prod_env/model_metadata.yaml instead.
+
+# Follow the template of this file to add a new model. You can copy paste this to get started:
+#    # This file contains the metadata for private models
+#    models: [] # Leave empty to disable private models
+
+
+models:
+
+  - name: simple/model1
+    display_name: Simple Model 1
+    description: This is a test model.
+    creator_organization_name: Helm
+    access: open
+    release_date: 2023-01-01
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+    
+  # Adobe
+  - name: adobe/giga-gan
+    display_name: GigaGAN (1B)
+    description: GigaGAN is a GAN model that produces high-quality images extremely quickly. The model was trained on text and image pairs from LAION2B-en and COYO-700M. ([paper](https://arxiv.org/abs/2303.05511)).
+    creator_organization_name: Adobe
+    access: limited
+    num_parameters: 1000000000
+    release_date: 2023-06-22
+    tags: [TEXT_TO_IMAGE_MODEL_TAG]
+
+
+  # AI21 Labs
+  - name: ai21/j1-jumbo
+    display_name: J1-Jumbo v1 (178B)
+    description: Jurassic-1 Jumbo (178B parameters) ([docs](https://studio.ai21.com/docs/jurassic1-language-models/), [tech report](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)).
+    creator_organization_name: AI21 Labs
+    access: limited
+    num_parameters: 178000000000
+    release_date: 2021-08-11
+    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: ai21/j1-large
+    display_name: J1-Large v1 (7.5B)
+    description: Jurassic-1 Large (7.5B parameters) ([docs](https://studio.ai21.com/docs/jurassic1-language-models/), [tech report](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)).
+    creator_organization_name: AI21 Labs
+    access: limited
+    num_parameters: 7500000000
+    release_date: 2021-08-11
+    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: ai21/j1-grande
+    display_name: J1-Grande v1 (17B)
+    description: Jurassic-1 Grande (17B parameters) with a "few tweaks" to the training process ([docs](https://studio.ai21.com/docs/jurassic1-language-models/), [tech report](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)).
+    creator_organization_name: AI21 Labs
+    access: limited
+    num_parameters: 17000000000
+    release_date: 2022-05-03
+    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: ai21/j1-grande-v2-beta
+    display_name: J1-Grande v2 beta (17B)
+    description: Jurassic-1 Grande v2 beta (17B parameters)
+    creator_organization_name: AI21 Labs
+    access: limited
+    num_parameters: 17000000000
+    release_date: 2022-10-28
+    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: ai21/j2-large
+    display_name: Jurassic-2 Large (7.5B)
+    description: Jurassic-2 Large (7.5B parameters) ([docs](https://www.ai21.com/blog/introducing-j2))
+    creator_organization_name: AI21 Labs
+    access: limited
+    num_parameters: 7500000000
+    release_date: 2023-03-09
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: ai21/j2-grande
+    display_name: Jurassic-2 Grande (17B)
+    description: Jurassic-2 Grande (17B parameters) ([docs](https://www.ai21.com/blog/introducing-j2))
+    creator_organization_name: AI21 Labs
+    access: limited
+    num_parameters: 17000000000
+    release_date: 2023-03-09
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: ai21/j2-jumbo
+    display_name: Jurassic-2 Jumbo (178B)
+    description: Jurassic-2 Jumbo (178B parameters) ([docs](https://www.ai21.com/blog/introducing-j2))
+    creator_organization_name: AI21 Labs
+    access: limited
+    num_parameters: 178000000000
+    release_date: 2023-03-09
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  # TODO(1524): Change AI21 model names
+  # - j2-jumbo -> j2-ultra
+  # - j2-grande -> j2-mid
+  # - j2-large -> j2-light
+
+  - name: ai21/jamba-instruct
+    display_name: Jamba Instruct
+    description: Jamba Instruct is an instruction tuned version of Jamba, which uses a hybrid Transformer-Mamba mixture-of-experts (MoE) architecture that interleaves blocks of Transformer and Mamba layers. ([blog](https://www.ai21.com/blog/announcing-jamba-instruct))
+    creator_organization_name: AI21 Labs
+    access: limited
+    num_parameters: 52000000000
+    release_date: 2024-05-02
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: ai21/jamba-1.5-mini
+    display_name: Jamba 1.5 Mini
+    description: Jamba 1.5 Mini is a long-context, hybrid SSM-Transformer instruction following foundation model that is optimized for function calling, structured output, and grounded generation. ([blog](https://www.ai21.com/blog/announcing-jamba-model-family))
+    creator_organization_name: AI21 Labs
+    access: open
+    num_parameters: 51600000000
+    release_date: 2024-08-22
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: ai21/jamba-1.5-large
+    display_name: Jamba 1.5 Large
+    description: Jamba 1.5 Large is a long-context, hybrid SSM-Transformer instruction following foundation model that is optimized for function calling, structured output, and grounded generation. ([blog](https://www.ai21.com/blog/announcing-jamba-model-family))
+    creator_organization_name: AI21 Labs
+    access: open
+    num_parameters: 399000000000
+    release_date: 2024-08-22
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  # AI Singapore
+  - name: aisingapore/sea-lion-7b
+    display_name: SEA-LION 7B
+    description: SEA-LION is a collection of language models which has been pretrained and instruct-tuned on languages from the Southeast Asia region. It utilizes the MPT architecture and a custom SEABPETokenizer for tokenization.
+    creator_organization_name: AI Singapore
+    access: open
+    num_parameters: 7000000000
+    release_date: 2023-02-24
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: aisingapore/sea-lion-7b-instruct
+    display_name: SEA-LION 7B Instruct
+    description: SEA-LION is a collection of language models which has been pretrained and instruct-tuned on languages from the Southeast Asia region. It utilizes the MPT architecture and a custom SEABPETokenizer for tokenization.
+    creator_organization_name: AI Singapore
+    access: open
+    num_parameters: 7000000000
+    release_date: 2023-02-24
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: aisingapore/llama3-8b-cpt-sea-lionv2-base
+    display_name: Llama3 8B CPT SEA-LIONv2
+    description: Llama3 8B CPT SEA-LIONv2 is a multilingual model which was continued pre-trained on 48B additional tokens, including tokens in Southeast Asian languages.
+    creator_organization_name: AI Singapore
+    access: open
+    num_parameters: 8030000000
+    release_date: 2024-07-31
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: aisingapore/llama3-8b-cpt-sea-lionv2.1-instruct
+    display_name: Llama3 8B CPT SEA-LIONv2.1 Instruct
+    description: Llama3 8B CPT SEA-LIONv2.1 Instruct is a multilingual model which has been fine-tuned with around 100,000 English instruction-completion pairs alongside a smaller pool of around 50,000 instruction-completion pairs from other Southeast Asian languages, such as Indonesian, Thai and Vietnamese.
+    creator_organization_name: AI Singapore
+    access: open
+    num_parameters: 8030000000
+    release_date: 2024-08-21
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: aisingapore/gemma2-9b-cpt-sea-lionv3-base
+    display_name: Gemma2 9B CPT SEA-LIONv3
+    description: Gemma2 9B CPT SEA-LIONv3 Base is a multilingual model which has undergone continued pre-training on approximately 200B tokens across the 11 official Southeast Asian languages, such as English, Chinese, Vietnamese, Indonesian, Thai, Tamil, Filipino, Malay, Khmer, Lao, Burmese.
+    creator_organization_name: AI Singapore
+    access: open
+    num_parameters: 9240000000
+    release_date: 2024-10-30
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: aisingapore/gemma2-9b-cpt-sea-lionv3-instruct
+    display_name: Gemma2 9B CPT SEA-LIONv3 Instruct
+    description: Gemma2 9B CPT SEA-LIONv3 Instruct is a multilingual model which has been fine-tuned with around 500,000 English instruction-completion pairs alongside a larger pool of around 1,000,000 instruction-completion pairs from other ASEAN languages, such as Indonesian, Thai and Vietnamese.
+    creator_organization_name: AI Singapore
+    access: open
+    num_parameters: 9240000000
+    release_date: 2024-10-30
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: aisingapore/llama3.1-8b-cpt-sea-lionv3-base
+    display_name: Llama3.1 8B CPT SEA-LIONv3
+    description: Llama3.1 8B CPT SEA-LIONv3 Base is a multilingual model which has undergone continued pre-training on approximately 200B tokens across 11 SEA languages, such as Burmese, Chinese, English, Filipino, Indonesia, Khmer, Lao, Malay, Tamil, Thai and Vietnamese.
+    creator_organization_name: AI Singapore
+    access: open
+    num_parameters: 9240000000
+    release_date: 2024-12-11
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: aisingapore/llama3.1-8b-cpt-sea-lionv3-instruct
+    display_name: Llama3.1 8B CPT SEA-LIONv3 Instruct
+    description: Llama3.1 8B CPT SEA-LIONv3 Instruct is a multilingual model that has been fine-tuned in two stages on approximately 12.3M English instruction-completion pairs alongside a pool of 4.5M Southeast Asian instruction-completion pairs from SEA languages such as Indonesian, Javanese, Sundanese, Tamil, Thai and Vietnamese.
+    creator_organization_name: AI Singapore
+    access: open
+    num_parameters: 9240000000
+    release_date: 2024-12-11
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: aisingapore/llama3.1-70b-cpt-sea-lionv3-base
+    display_name: Llama3.1 70B CPT SEA-LIONv3
+    description: Llama3.1 70B CPT SEA-LIONv3 Base is a multilingual model which has undergone continued pre-training on approximately 200B tokens across 11 SEA languages, such as Burmese, Chinese, English, Filipino, Indonesia, Khmer, Lao, Malay, Tamil, Thai and Vietnamese.
+    creator_organization_name: AI Singapore
+    access: open
+    num_parameters: 70600000000
+    release_date: 2024-12-11
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: aisingapore/llama3.1-70b-cpt-sea-lionv3-instruct
+    display_name: Llama3.1 70B CPT SEA-LIONv3 Instruct
+    description: Llama3.1 70B CPT SEA-LIONv3 Instruct is a multilingual model that has been fine-tuned in two stages on approximately 12.3M English instruction-completion pairs alongside a pool of 4.5M Southeast Asian instruction-completion pairs from SEA languages such as Indonesian, Javanese, Sundanese, Tamil, Thai, and Vietnamese.
+    creator_organization_name: AI Singapore
+    access: open
+    num_parameters: 70600000000
+    release_date: 2024-12-11
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  # Aleph Alpha
+  # Aleph Alpha's Luminous models: https://docs.aleph-alpha.com/docs/introduction/luminous
+  # TODO: add Luminous World when it's released
+  - name: AlephAlpha/luminous-base
+    display_name: Luminous Base (13B)
+    description: Luminous Base (13B parameters) ([docs](https://docs.aleph-alpha.com/docs/introduction/luminous/))
+    creator_organization_name: Aleph Alpha
+    access: limited
+    num_parameters: 13000000000
+    # TODO: get exact release date
+    release_date: 2022-01-01
+    # Does not support echo
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
+
+  - name: AlephAlpha/luminous-extended
+    display_name: Luminous Extended (30B)
+    description: Luminous Extended (30B parameters) ([docs](https://docs.aleph-alpha.com/docs/introduction/luminous/))
+    creator_organization_name: Aleph Alpha
+    access: limited
+    num_parameters: 30000000000
+    release_date: 2022-01-01
+    # Does not support echo
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
+
+  - name: AlephAlpha/luminous-supreme
+    display_name: Luminous Supreme (70B)
+    description: Luminous Supreme (70B parameters) ([docs](https://docs.aleph-alpha.com/docs/introduction/luminous/))
+    creator_organization_name: Aleph Alpha
+    access: limited
+    num_parameters: 70000000000
+    release_date: 2022-01-01
+    # Does not support echo.
+    # Currently, only Luminous-extended and Luminous-base support multimodal inputs
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+  
+  # TODO: Uncomment when luminous-world is released.
+  # - name: AlephAlpha/luminous-world # Not released yet.
+  #   display_name: Luminous World (178B)
+  #   description: Luminous World (178B parameters) ([docs](https://docs.aleph-alpha.com/docs/introduction/luminous/))
+  #   creator_organization_name: Aleph Alpha
+  #   access: limited
+  #   num_parameters: TBD
+  #   release_date: TBD
+  #   # Does not support echo.
+  #   tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+  
+  - name: AlephAlpha/m-vader
+    display_name: MultiFusion (13B)
+    description: MultiFusion is a multimodal, multilingual diffusion model that extend the capabilities of Stable Diffusion v1.4 by integrating different pre-trained modules, which transfers capabilities to the downstream model ([paper](https://arxiv.org/abs/2305.15296))
+    creator_organization_name: Aleph Alpha
+    access: limited
+    num_parameters: 13000000000
+    release_date: 2023-05-24
+    tags: [TEXT_TO_IMAGE_MODEL_TAG]
+
+
+  # Amazon Nova models
+  # References for Amazon Nova models:
+  # https://aws.amazon.com/ai/generative-ai/nova/
+  - name: amazon/nova-premier-v1:0
+    display_name: Amazon Nova Premier
+    description: Amazon Nova Premier is the most capable model in the Nova family of foundation models. ([blog](https://aws.amazon.com/blogs/aws/amazon-nova-premier-our-most-capable-model-for-complex-tasks-and-teacher-for-model-distillation/))
+    creator_organization_name: Amazon
+    access: limited
+    release_date: 2025-04-30
+    tags: [NOVA_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: amazon/nova-pro-v1:0
+    display_name: Amazon Nova Pro
+    description: Amazon Nova Pro Model
+    creator_organization_name: Amazon
+    access: limited
+    release_date: 2024-12-03
+    tags: [NOVA_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: amazon/nova-lite-v1:0
+    display_name: Amazon Nova Lite
+    description: Amazon Nova Lite Model
+    creator_organization_name: Amazon
+    access: limited
+    release_date: 2024-12-03
+    tags: [NOVA_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: amazon/nova-micro-v1:0
+    display_name: Amazon Nova Micro
+    description: Amazon Nova Micro Model
+    creator_organization_name: Amazon
+    access: limited
+    release_date: 2024-12-03
+    tags: [NOVA_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  # Titan Models
+  # References for Amazon Titan models:
+  # - https://aws.amazon.com/bedrock/titan/
+  # - https://community.aws/content/2ZUVD3fkNtqEOYIa2iUJAFArS7c/family-of-titan-text-models---cli-demo
+  # - https://aws.amazon.com/about-aws/whats-new/2023/11/amazon-titan-models-express-lite-bedrock/
+  - name: amazon/titan-text-lite-v1
+    display_name: Amazon Titan Text Lite
+    description: Amazon Titan Text Lite is a lightweight, efficient model perfect for fine-tuning English-language tasks like summarization and copywriting. It caters to customers seeking a smaller, cost-effective, and highly customizable model. It supports various formats, including text generation, code generation, rich text formatting, and orchestration (agents). Key model attributes encompass fine-tuning, text generation, code generation, and rich text formatting.
+    creator_organization_name: Amazon
+    access: limited
+    release_date: 2023-11-29
+    tags: [BEDROCK_MODEL_TAG,TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+ 
+  - name: amazon/titan-text-express-v1
+    display_name: Amazon Titan Text Express
+    description: Amazon Titan Text Express, with a context length of up to 8,000 tokens, excels in advanced language tasks like open-ended text generation and conversational chat. It's also optimized for Retrieval Augmented Generation (RAG). Initially designed for English, the model offers preview multilingual support for over 100 additional languages.
+    creator_organization_name: Amazon
+    access: limited
+    release_date: 2023-11-29
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+# Mistral Models on Bedrock
+# References for Mistral on Amazon Bedrock
+# https://aws.amazon.com/bedrock/mistral/
+
+  - name: mistralai/amazon-mistral-7b-instruct-v0:2
+    display_name:  Mistral 7B Instruct on Amazon Bedrock
+    description: A 7B dense Transformer, fast-deployed and easily customisable. Small, yet powerful for a variety of use cases. Supports English and code, and a 32k context window.
+    creator_organization_name: Mistral
+    access: limited
+    release_date: 2024-03-23
+    tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: mistralai/amazon-mixtral-8x7b-instruct-v0:1
+    display_name: Mixtral 8x7B Instruct on Amazon Bedrock
+    description: A 7B sparse Mixture-of-Experts model with stronger capabilities than Mistral 7B. Uses 12B active parameters out of 45B total. Supports multiple languages, code and 32k context window.
+    creator_organization_name: Mistral
+    access: limited
+    release_date: 2023-12-11
+    tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: mistralai/amazon-mistral-large-2402-v1:0
+    display_name: Mistral Large(2402) on Amazon Bedrock
+    description: The most advanced Mistral AI Large Language model capable of handling any language task including complex multilingual reasoning, text understanding, transformation, and code generation.
+    creator_organization_name: Mistral
+    access: limited
+    release_date: 2023-07-26
+    tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: mistralai/amazon-mistral-small-2402-v1:0
+    display_name: Mistral Small on Amazon Bedrock
+    description: Mistral Small is perfectly suited for straightforward tasks that can be performed in bulk, such as classification, customer support, or text generation. It provides outstanding performance at a cost-effective price point.
+    creator_organization_name: Mistral
+    access: limited
+    release_date: 2023-02-26
+    tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: mistralai/amazon-mistral-large-2407-v1:0
+    display_name: Mistral Large(2407) on Amazon Bedrock
+    description: Mistral Large 2407 is an advanced Large Language Model (LLM) that supports dozens of languages and is trained on 80+ coding languages. It has best-in-class agentic capabilities with native function calling JSON outputting and reasoning capabilities.
+    creator_organization_name: Mistral
+    access: limited
+    release_date: 2024-07-24
+    tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+# Llama3 on Amazon Bedrock
+# References for Llama3 on Amazon Bedrock
+# https://aws.amazon.com/bedrock/llama/
+
+  - name: meta/amazon-llama3-8b-instruct-v1:0
+    display_name: Llama 3 8B Instruct on Amazon Bedrock
+    description: Meta Llama 3 is an accessible, open large language model (LLM) designed for developers, researchers, and businesses to build, experiment, and responsibly scale their generative AI ideas. Part of a foundational system, it serves as a bedrock for innovation in the global community. Ideal for limited computational power and resources, edge devices, and faster training times.
+    creator_organization_name: Meta
+    access: limited
+    release_date: 2024-04-23
+    tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: meta/amazon-llama3-70b-instruct-v1:0
+    display_name: Llama 3 70B Instruct on Amazon Bedrock
+    description: Meta Llama 3 is an accessible, open large language model (LLM) designed for developers, researchers, and businesses to build, experiment, and responsibly scale their generative AI ideas. Part of a foundational system, it serves as a bedrock for innovation in the global community. Ideal for content creation, conversational AI, language understanding, R&D, and Enterprise applications.
+    creator_organization_name: Meta
+    access: limited
+    release_date: 2024-04-23
+    tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: meta/amazon-llama3-1-405b-instruct-v1:0
+    display_name: Llama 3.1 405b Instruct on Amazon Bedrock.
+    description: Meta's Llama 3.1 offers multilingual models (8B, 70B, 405B) with 128K context, improved reasoning, and optimization for dialogue. It outperforms many open-source chat models and is designed for commercial and research use in multiple languages.
+    creator_organization_name: Meta
+    access: limited
+    release_date: 2024-07-26
+    tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: meta/amazon-llama3-1-70b-instruct-v1:0
+    display_name: Llama 3.1 70b Instruct on Amazon Bedrock.
+    description: Meta's Llama 3.1 offers multilingual models (8B, 70B, 405B) with 128K context, improved reasoning, and optimization for dialogue. It outperforms many open-source chat models and is designed for commercial and research use in multiple languages.
+    creator_organization_name: Meta
+    access: limited
+    release_date: 2024-07-26
+    tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: meta/amazon-llama3-1-8b-instruct-v1:0
+    display_name: Llama 3.1 8b Instruct on Amazon Bedrock.
+    description: Meta's Llama 3.1 offers multilingual models (8B, 70B, 405B) with 128K context, improved reasoning, and optimization for dialogue. It outperforms many open-source chat models and is designed for commercial and research use in multiple languages.
+    creator_organization_name: Meta
+    access: limited
+    release_date: 2024-07-26
+    tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  # Anthropic
+  - name: anthropic/claude-v1.3
+    display_name: Claude v1.3
+    description: A 52B parameter language model, trained using reinforcement learning from human feedback [paper](https://arxiv.org/pdf/2204.05862.pdf).
+    creator_organization_name: Anthropic
+    access: limited
+    num_parameters: 52000000000
+    release_date: 2023-03-17
+    tags: [ANTHROPIC_CLAUDE_1_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+ 
+  - name: anthropic/claude-instant-v1
+    display_name: Claude Instant V1
+    description: A lightweight version of Claude, a model trained using reinforcement learning from human feedback ([docs](https://www.anthropic.com/index/introducing-claude)).
+    creator_organization_name: Anthropic
+    access: limited
+    release_date: 2023-03-17
+    tags: [ANTHROPIC_CLAUDE_1_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: anthropic/claude-instant-1.2
+    display_name: Claude Instant 1.2
+    description: A lightweight version of Claude, a model trained using reinforcement learning from human feedback ([docs](https://www.anthropic.com/index/introducing-claude)).
+    creator_organization_name: Anthropic
+    access: limited
+    release_date: 2023-08-09
+    tags: [ANTHROPIC_CLAUDE_1_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: anthropic/claude-2.0
+    display_name: Claude 2.0
+    description: Claude 2.0 is a general purpose large language model developed by Anthropic. It uses a transformer architecture and is trained via unsupervised learning, RLHF, and Constitutional AI (including both a supervised and Reinforcement Learning (RL) phase). ([model card](https://efficient-manatee.files.svdcdn.com/production/images/Model-Card-Claude-2.pdf))
+    creator_organization_name: Anthropic
+    access: limited
+    release_date: 2023-07-11
+    tags: [ANTHROPIC_CLAUDE_2_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: anthropic/claude-2.1
+    display_name: Claude 2.1
+    description: Claude 2.1 is a general purpose large language model developed by Anthropic. It uses a transformer architecture and is trained via unsupervised learning, RLHF, and Constitutional AI (including both a supervised and Reinforcement Learning (RL) phase). ([model card](https://efficient-manatee.files.svdcdn.com/production/images/Model-Card-Claude-2.pdf))
+    creator_organization_name: Anthropic
+    access: limited
+    release_date: 2023-11-21
+    tags: [ANTHROPIC_CLAUDE_2_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: anthropic/claude-3-haiku-20240307
+    display_name: Claude 3 Haiku (20240307)
+    description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI ([blog](https://www.anthropic.com/news/claude-3-family)).
+    creator_organization_name: Anthropic
+    access: limited
+    release_date: 2024-03-13  # https://www.anthropic.com/news/claude-3-haiku
+    tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: anthropic/claude-3-sonnet-20240229
+    display_name: Claude 3 Sonnet (20240229)
+    description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI ([blog](https://www.anthropic.com/news/claude-3-family)).
+    creator_organization_name: Anthropic
+    access: limited
+    release_date: 2024-03-04  # https://www.anthropic.com/news/claude-3-family
+    tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: anthropic/claude-3-opus-20240229
+    display_name: Claude 3 Opus (20240229)
+    description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI ([blog](https://www.anthropic.com/news/claude-3-family)).
+    access: limited
+    creator_organization_name: Anthropic
+    release_date: 2024-03-04  # https://www.anthropic.com/news/claude-3-family
+    tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: anthropic/claude-3-5-haiku-20241022
+    display_name: Claude 3.5 Haiku (20241022)
+    description: Claude 3.5 Haiku is a Claude 3 family model which matches the performance of Claude 3 Opus at a similar speed to the previous generation of Haiku ([blog](https://www.anthropic.com/news/3-5-models-and-computer-use)).
+    creator_organization_name: Anthropic
+    access: limited
+    release_date: 2024-11-04  # Released after the blog post
+    tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: anthropic/claude-3-5-sonnet-20240620
+    display_name: Claude 3.5 Sonnet (20240620)
+    description: Claude 3.5 Sonnet is a Claude 3 family model which outperforms Claude 3 Opus while operating faster and at a lower cost. ([blog](https://www.anthropic.com/news/claude-3-5-sonnet))
+    creator_organization_name: Anthropic
+    access: limited
+    release_date: 2024-06-20
+    tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: anthropic/claude-3-5-sonnet-20241022
+    display_name: Claude 3.5 Sonnet (20241022)
+    description: Claude 3.5 Sonnet is a Claude 3 family model which outperforms Claude 3 Opus while operating faster and at a lower cost ([blog](https://www.anthropic.com/news/claude-3-5-sonnet)). This is an upgraded snapshot released on 2024-10-22 ([blog](https://www.anthropic.com/news/3-5-models-and-computer-use)).
+    creator_organization_name: Anthropic
+    access: limited
+    release_date: 2024-10-22
+    tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: anthropic/claude-3-7-sonnet-20250219
+    display_name: Claude 3.7 Sonnet (20250219)
+    description: Claude 3.7 Sonnet is a Claude 3 family hybrid reasoning model that can produce near-instant responses or extended, step-by-step thinking that is made visible to the user ([blog](https://www.anthropic.com/news/claude-3-7-sonnet)).
+    creator_organization_name: Anthropic
+    access: limited
+    release_date: 2025-02-24
+    tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: anthropic/claude-3-7-sonnet-20250219-thinking-10k
+    display_name: Claude 3.7 Sonnet (20250219, extended thinking)
+    description: Claude 3.7 Sonnet is a Claude 3 family hybrid reasoning model that can produce near-instant responses or extended, step-by-step thinking that is made visible to the user ([blog](https://www.anthropic.com/news/claude-3-7-sonnet)). Extended thinking is enabled with 10k budget tokens.
+    creator_organization_name: Anthropic
+    access: limited
+    release_date: 2025-02-24
+    tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: anthropic/claude-sonnet-4-20250514
+    display_name: Claude 4 Sonnet (20250514)
+    description: Claude 4 Sonnet is a hybrid model offering two modes - near-instant responses and extended thinking for deeper reasoning ([blog](https://www.anthropic.com/news/claude-4)).
+    creator_organization_name: Anthropic
+    access: limited
+    release_date: 2025-05-14
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: anthropic/claude-sonnet-4-20250514-thinking-10k
+    display_name: Claude 4 Sonnet (20250514, extended thinking)
+    description: Claude 4 Sonnet is a hybrid model offering two modes - near-instant responses and extended thinking for deeper reasoning ([blog](https://www.anthropic.com/news/claude-4)). Extended thinking is enabled with 10k budget tokens.
+    creator_organization_name: Anthropic
+    access: limited
+    release_date: 2025-05-14
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: anthropic/claude-opus-4-20250514
+    display_name: Claude 4 Opus (20250514)
+    description: Claude 4 Opus is a hybrid model offering two modes - near-instant responses and extended thinking for deeper reasoning ([blog](https://www.anthropic.com/news/claude-4)).
+    creator_organization_name: Anthropic
+    access: limited
+    release_date: 2025-05-14
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: anthropic/claude-opus-4-20250514-thinking-10k
+    display_name: Claude 4 Opus (20250514, extended thinking)
+    description: Claude 4 Opus is a hybrid model offering two modes - near-instant responses and extended thinking for deeper reasoning ([blog](https://www.anthropic.com/news/claude-4)). Extended thinking is enabled with 10k budget tokens.
+    creator_organization_name: Anthropic
+    access: limited
+    release_date: 2025-05-14
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: anthropic/stanford-online-all-v4-s3
+    display_name: Anthropic-LM v4-s3 (52B)
+    description: A 52B parameter language model, trained using reinforcement learning from human feedback [paper](https://arxiv.org/pdf/2204.05862.pdf).
+    creator_organization_name: Anthropic
+    access: closed
+    num_parameters: 52000000000
+    release_date: 2021-12-01
+    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG]
+
+
+
+  # Berkeley
+  - name: berkeley/koala-13b # NOT SUPPORTED
+    display_name: Koala (13B)
+    description: Koala (13B) is a chatbot fine-tuned from Llama (13B) on dialogue data gathered from the web. ([blog post](https://bair.berkeley.edu/blog/2023/04/03/koala/))
+    creator_organization_name: UC Berkeley
+    access: open
+    num_parameters: 13000000000
+    release_date: 2022-04-03
+    tags: [DEPRECATED_MODEL_TAG] # TODO: add tags
+
+
+
+  # BigScience
+  - name: bigscience/bloom
+    display_name: BLOOM (176B)
+    description: BLOOM (176B parameters) is an autoregressive model trained on 46 natural languages and 13 programming languages ([paper](https://arxiv.org/pdf/2211.05100.pdf)).
+    creator_organization_name: BigScience
+    access: open
+    num_parameters: 176000000000
+    release_date: 2022-06-28
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG]
+
+  - name: bigscience/bloomz # NOT SUPPORTED
+    display_name: BLOOMZ (176B)
+    description: BLOOMZ (176B parameters) is BLOOM that has been fine-tuned on natural language instructions ([details](https://huggingface.co/bigscience/bloomz)).
+    creator_organization_name: BigScience
+    access: open
+    num_parameters: 176000000000
+    release_date: 2022-11-03
+    tags: [DEPRECATED_MODEL_TAG] # TODO: add tags
+
+  - name: bigscience/t0pp
+    display_name: T0pp (11B)
+    description: T0pp (11B parameters) is an encoder-decoder model trained on a large set of different tasks specified in natural language prompts ([paper](https://arxiv.org/pdf/2110.08207.pdf)).
+    creator_organization_name: BigScience
+    access: open
+    num_parameters: 11000000000
+    release_date: 2021-10-15
+    # Does not support echo.
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, NO_NEWLINES_TAG]
+
+
+
+  # BigCode
+  - name: bigcode/santacoder
+    display_name: SantaCoder (1.1B)
+    description: SantaCoder (1.1B parameters) model trained on the Python, Java, and JavaScript subset of The Stack (v1.1) ([model card](https://huggingface.co/bigcode/santacoder)).
+    creator_organization_name: BigCode
+    access: open
+    num_parameters: 1100000000
+    release_date: 2023-01-09 # ArXiv submission date
+    tags: [CODE_MODEL_TAG]
+
+  - name: bigcode/starcoder
+    display_name: StarCoder (15.5B)
+    description: The StarCoder (15.5B parameter) model trained on 80+ programming languages from The Stack (v1.2) ([model card](https://huggingface.co/bigcode/starcoder)).
+    creator_organization_name: BigCode
+    access: open
+    num_parameters: 15500000000
+    release_date: 2023-05-09 # ArXiv submission date
+    tags: [CODE_MODEL_TAG]
+
+  # BioMistral
+
+  - name: biomistral/biomistral-7b
+    display_name: BioMistral (7B)
+    description: BioMistral 7B is an open-source LLM tailored for the biomedical domain, utilizing Mistral as its foundation model and further pre-trained on PubMed Central.
+    creator_organization_name: BioMistral
+    access: open
+    num_parameters: 7300000000
+    release_date: 2024-02-15
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+
+
+
+  # Cerebras Systems
+  - name: cerebras/cerebras-gpt-6.7b # NOT SUPPORTED
+    display_name: Cerebras GPT (6.7B)
+    description: Cerebras GPT is a family of open compute-optimal language models scaled from 111M to 13B parameters trained on the Eleuther Pile. ([paper](https://arxiv.org/pdf/2304.03208.pdf))
+    creator_organization_name: Cerebras
+    access: limited
+    num_parameters: 6700000000
+    release_date: 2023-04-06
+    tags: [DEPRECATED_MODEL_TAG] # TODO: add tags
+
+  - name: cerebras/cerebras-gpt-13b # NOT SUPPORTED
+    display_name: Cerebras GPT (13B)
+    description: Cerebras GPT is a family of open compute-optimal language models scaled from 111M to 13B parameters trained on the Eleuther Pile. ([paper](https://arxiv.org/pdf/2304.03208.pdf))
+    creator_organization_name: Cerebras
+    access: limited
+    num_parameters: 13000000000
+    release_date: 2023-04-06
+    tags: [DEPRECATED_MODEL_TAG] # TODO: add tags
+
+
+
+  # Cohere
+  # Model versioning and the possible versions are not documented here:
+  # https://docs.cohere.ai/generate-reference#model-optional.
+  # So, instead, we got the names of the models from the Cohere Playground.
+  #
+  # Note that their tokenizer and model were trained on English text and
+  # they do not have a dedicated decode API endpoint, so the adaptation
+  # step for language modeling fails for certain Scenarios:
+  # the_pile:subset=ArXiv
+  # the_pile:subset=Github
+  # the_pile:subset=PubMed Central
+
+  # TODO: Consider renaming to new model names.
+  - name: cohere/xlarge-20220609
+    display_name: Cohere xlarge v20220609 (52.4B)
+    description: Cohere xlarge v20220609 (52.4B parameters)
+    creator_organization_name: Cohere
+    access: limited
+    num_parameters: 52400000000
+    release_date: 2022-06-09
+    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: cohere/large-20220720
+    display_name: Cohere large v20220720 (13.1B)
+    description: Cohere large v20220720 (13.1B parameters), which is deprecated by Cohere as of December 2, 2022.
+    creator_organization_name: Cohere
+    access: limited
+    num_parameters: 13100000000
+    release_date: 2022-07-20
+    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: cohere/medium-20220720
+    display_name: Cohere medium v20220720 (6.1B)
+    description: Cohere medium v20220720 (6.1B parameters)
+    creator_organization_name: Cohere
+    access: limited
+    num_parameters: 6100000000
+    release_date: 2022-07-20
+    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: cohere/small-20220720
+    display_name: Cohere small v20220720 (410M)
+    description: Cohere small v20220720 (410M parameters), which is deprecated by Cohere as of December 2, 2022.
+    creator_organization_name: Cohere
+    access: limited
+    num_parameters: 410000000
+    release_date: 2022-07-20
+    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: cohere/xlarge-20221108
+    display_name: Cohere xlarge v20221108 (52.4B)
+    description: Cohere xlarge v20221108 (52.4B parameters)
+    creator_organization_name: Cohere
+    access: limited
+    num_parameters: 52400000000
+    release_date: 2022-11-08
+    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: cohere/medium-20221108
+    display_name: Cohere medium v20221108 (6.1B)
+    description: Cohere medium v20221108 (6.1B parameters)
+    creator_organization_name: Cohere
+    access: limited
+    num_parameters: 6100000000
+    release_date: 2022-11-08
+    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: cohere/command-medium-beta
+    display_name: Command beta (6.1B)
+    description: Command beta (6.1B parameters) is fine-tuned from the medium model to respond well with instruction-like prompts ([details](https://docs.cohere.ai/docs/command-beta)).
+    creator_organization_name: Cohere
+    access: limited
+    num_parameters: 6100000000
+    release_date: 2022-11-08
+    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: cohere/command-xlarge-beta
+    display_name: Command beta (52.4B)
+    description: Command beta (52.4B parameters) is fine-tuned from the XL model to respond well with instruction-like prompts ([details](https://docs.cohere.ai/docs/command-beta)).
+    creator_organization_name: Cohere
+    access: limited
+    num_parameters: 52400000000
+    release_date: 2022-11-08
+    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: cohere/command
+    display_name: Command
+    description: Command is Cohere’s flagship text generation model. It is trained to follow user commands and to be instantly useful in practical business applications. [docs](https://docs.cohere.com/reference/generate) and [changelog](https://docs.cohere.com/changelog)
+    creator_organization_name: Cohere
+    access: limited
+    release_date: 2023-09-29
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: cohere/command-light
+    display_name: Command Light
+    description: Command is Cohere’s flagship text generation model. It is trained to follow user commands and to be instantly useful in practical business applications. [docs](https://docs.cohere.com/reference/generate) and [changelog](https://docs.cohere.com/changelog)
+    creator_organization_name: Cohere
+    access: limited
+    release_date: 2023-09-29
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: cohere/command-r
+    display_name: Command R
+    description: Command R is a multilingual 35B parameter model with a context length of 128K that has been trained with conversational tool use capabilities.
+    creator_organization_name: Cohere
+    access: open
+    num_parameters: 35000000000
+    release_date: 2024-03-11
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: cohere/command-r-plus
+    display_name: Command R Plus
+    description: Command R+ is a multilingual 104B parameter model with a context length of 128K that has been trained with conversational tool use capabilities.
+    creator_organization_name: Cohere
+    access: open
+    num_parameters: 104000000000
+    release_date: 2024-04-04
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  # Craiyon
+  - name: craiyon/dalle-mini
+    display_name: DALL-E mini (0.4B)
+    description: DALL-E mini is an open-source text-to-image model that attempt to reproduce OpenAI's DALL-E 1 ([code](https://github.com/borisdayma/dalle-mini)).
+    creator_organization_name: Craiyon
+    access: open
+    num_parameters: 400000000
+    release_date: 2022-04-21
+    tags: [TEXT_TO_IMAGE_MODEL_TAG]
+
+  - name: craiyon/dalle-mega
+    display_name: DALL-E mega (2.6B)
+    description: DALL-E mega is an open-source text-to-image model that attempt to reproduce OpenAI's DALL-E 1 ([code](https://github.com/borisdayma/dalle-mini)).
+    creator_organization_name: Craiyon
+    access: open
+    num_parameters: 2600000000
+    release_date: 2022-04-21
+    tags: [TEXT_TO_IMAGE_MODEL_TAG]
+
+  # DeepFloyd
+  - name: DeepFloyd/IF-I-M-v1.0
+    display_name: DeepFloyd IF Medium (0.4B)
+    description: DeepFloyd-IF is a pixel-based text-to-image triple-cascaded diffusion model with state-of-the-art photorealism and language understanding (paper coming soon).
+    creator_organization_name: DeepFloyd
+    access: open
+    num_parameters: 400000000
+    release_date: 2023-04-28
+    tags: [TEXT_TO_IMAGE_MODEL_TAG]
+
+  - name: DeepFloyd/IF-I-L-v1.0
+    display_name: DeepFloyd IF Large (0.9B)
+    description: DeepFloyd-IF is a pixel-based text-to-image triple-cascaded diffusion model with state-of-the-art photorealism and language understanding (paper coming soon).
+    creator_organization_name: DeepFloyd
+    access: open
+    num_parameters: 900000000
+    release_date: 2023-04-28
+    tags: [TEXT_TO_IMAGE_MODEL_TAG]
+
+  - name: DeepFloyd/IF-I-XL-v1.0
+    display_name: DeepFloyd IF X-Large (4.3B)
+    description: DeepFloyd-IF is a pixel-based text-to-image triple-cascaded diffusion model with state-of-the-art photorealism and language understanding (paper coming soon).
+    creator_organization_name: DeepFloyd
+    access: open
+    num_parameters: 4300000000
+    release_date: 2023-04-28
+    tags: [TEXT_TO_IMAGE_MODEL_TAG]
+
+
+  # Databricks
+  - name: databricks/dolly-v2-3b
+    display_name: Dolly V2 (3B)
+    description: Dolly V2 (3B) is an instruction-following large language model trained on the Databricks machine learning platform. It is based on pythia-12b.
+    creator_organization_name: Databricks
+    access: open
+    num_parameters: 2517652480
+    release_date: 2023-04-12
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: databricks/dolly-v2-7b
+    display_name: Dolly V2 (7B)
+    description: Dolly V2 (7B) is an instruction-following large language model trained on the Databricks machine learning platform. It is based on pythia-12b.
+    creator_organization_name: Databricks
+    access: open
+    num_parameters: 6444163072
+    release_date: 2023-04-12
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: databricks/dolly-v2-12b
+    display_name: Dolly V2 (12B)
+    description: Dolly V2 (12B) is an instruction-following large language model trained on the Databricks machine learning platform. It is based on pythia-12b.
+    creator_organization_name: Databricks
+    access: open
+    num_parameters: 11327027200
+    release_date: 2023-04-12
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: databricks/dbrx-instruct
+    display_name: DBRX Instruct
+    description: DBRX is a large language model with a fine-grained mixture-of-experts (MoE) architecture that uses 16 experts and chooses 4. It has 132B total parameters, of which 36B parameters are active on any input. ([blog post](https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm))
+    creator_organization_name: Databricks
+    access: open
+    num_parameters: 132000000000
+    release_date: 2024-03-27
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+
+  # DeepMind
+  - name: deepmind/gopher # NOT SUPPORTED
+    display_name: Gopher (280B)
+    description: Gopher (280B parameters) ([paper](https://arxiv.org/pdf/2112.11446.pdf)).
+    creator_organization_name: DeepMind
+    access: closed
+    num_parameters: 280000000000
+    release_date: 2021-12-08
+    tags: [UNSUPPORTED_MODEL_TAG]
+
+  - name: deepmind/chinchilla # NOT SUPPORTED
+    display_name: Chinchilla (70B)
+    description: Chinchilla (70B parameters) ([paper](https://arxiv.org/pdf/2203.15556.pdf)).
+    creator_organization_name: DeepMind
+    access: closed
+    num_parameters: 70000000000
+    release_date: 2022-03-31
+    tags: [UNSUPPORTED_MODEL_TAG]
+
+
+  # Deepseek
+  - name: deepseek-ai/deepseek-llm-67b-chat
+    display_name: DeepSeek LLM Chat (67B)
+    description: DeepSeek LLM Chat is a open-source language model trained on 2 trillion tokens in both English and Chinese, and fine-tuned supervised fine-tuning (SFT) and Direct Preference Optimization (DPO). ([paper](https://arxiv.org/abs/2401.02954))
+    creator_organization_name: DeepSeek
+    access: open
+    num_parameters: 67000000000
+    release_date: 2024-01-05
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: deepseek-ai/deepseek-v3
+    display_name: DeepSeek v3
+    description: DeepSeek v3 a Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token. It adopts Multi-head Latent Attention (MLA) and DeepSeekMoE architectures. ([paper](https://github.com/deepseek-ai/DeepSeek-V3/blob/main/DeepSeek_V3.pdf))
+    creator_organization_name: DeepSeek
+    access: open
+    # NOTE: The total size of DeepSeek-V3 models on HuggingFace is 685B, which includes 671B of the Main Model weights and 14B of the Multi-Token Prediction (MTP) Module weights.
+    num_parameters: 685000000000
+    release_date: 2024-12-24
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: deepseek-ai/deepseek-r1
+    display_name: DeepSeek R1
+    description: DeepSeek R1 is DeepSeek's first-generation reasoning model which incoporates which incorporates multi-stage training and cold-start data before RL. ([paper](https://arxiv.org/abs/2501.12948))
+    creator_organization_name: DeepSeek
+    access: open
+    # NOTE: The total size of DeepSeek-R3 model1 on HuggingFace is 685B
+    num_parameters: 685000000000
+    release_date: 2025-01-20
+    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: deepseek-ai/deepseek-r1-hide-reasoning
+    display_name: DeepSeek R1 (hide reasoning)
+    description: DeepSeek R1 is DeepSeek's first-generation reasoning model which incoporates which incorporates multi-stage training and cold-start data before RL. ([paper](https://arxiv.org/abs/2501.12948)) The reasoning tokens are hidden from the output of the model.
+    creator_organization_name: DeepSeek
+    access: open
+    # NOTE: The total size of DeepSeek-R3 model1 on HuggingFace is 685B
+    num_parameters: 685000000000
+    release_date: 2025-01-20
+    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+ 
+  - name: deepseek-ai/deepseek-r1-0528
+    display_name: DeepSeek-R1-0528
+    description: DeepSeek-R1-0528 is a minor version upgrade from DeepSeek R1 that has improved its depth of reasoning and inference capabilities by leveraging increased computational resources and introducing algorithmic optimization mechanisms during post-training. ([paper](https://arxiv.org/abs/2501.12948))
+    creator_organization_name: DeepSeek
+    access: open
+    num_parameters: 685000000000
+    release_date: 2025-05-28
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
+    display_name: DeepSeek-R1-Distill-Llama-8b
+    description: DeepSeek-R1-Distill-Llama-8b is a model that is distilled from LLaMA 8B model for the DeepSeek-R1 task.
+    creator_organization_name: DeepSeek
+    access: open
+    num_parameters: 8000000000
+    release_date: 2025-01-20
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: deepseek-ai/deepseek-coder-6.7b-instruct
+    display_name: DeepSeek-Coder-6.7b-Instruct
+    description: DeepSeek-Coder-6.7b-Instruct is a model that is fine-tuned from the LLaMA 6.7B model for the DeepSeek-Coder task.
+    creator_organization_name: DeepSeek
+    access: open
+    num_parameters: 6740000000
+    release_date: 2025-01-20
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  # EleutherAI
+  - name: eleutherai/gpt-j-6b # Served by GooseAi, HuggingFace and Together.
+    display_name: GPT-J (6B)
+    description: GPT-J (6B parameters) autoregressive language model trained on The Pile ([details](https://arankomatsuzaki.wordpress.com/2021/06/04/gpt-j/)).
+    creator_organization_name: EleutherAI
+    access: open
+    num_parameters: 6000000000
+    release_date: 2021-06-04
+    # TODO: The BUGGY_TEMP_0_TAG is a deployment related tag (Together).
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, BUGGY_TEMP_0_TAG]
+
+  - name: eleutherai/gpt-neox-20b # Served by GooseAi and Together.
+    display_name: GPT-NeoX (20B)
+    description: GPT-NeoX (20B parameters) autoregressive language model trained on The Pile ([paper](https://arxiv.org/pdf/2204.06745.pdf)).
+    creator_organization_name: EleutherAI
+    access: open
+    num_parameters: 20000000000
+    release_date: 2022-02-02
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG]
+
+  - name: eleutherai/pythia-1b-v0
+    display_name: Pythia (1B)
+    description: Pythia (1B parameters). The Pythia project combines interpretability analysis and scaling laws to understand how knowledge develops and evolves during training in autoregressive transformers.
+    creator_organization_name: EleutherAI
+    access: open
+    num_parameters: 805736448
+    release_date: 2023-02-13
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: eleutherai/pythia-2.8b-v0
+    display_name: Pythia (2.8B)
+    description: Pythia (2.8B parameters). The Pythia project combines interpretability analysis and scaling laws to understand how knowledge develops and evolves during training in autoregressive transformers.
+    creator_organization_name: EleutherAI
+    access: open
+    num_parameters: 2517652480
+    release_date: 2023-02-13
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: eleutherai/pythia-6.9b
+    display_name: Pythia (6.9B)
+    description: Pythia (6.9B parameters). The Pythia project combines interpretability analysis and scaling laws to understand how knowledge develops and evolves during training in autoregressive transformers.
+    creator_organization_name: EleutherAI
+    access: open
+    num_parameters: 6444163072
+    release_date: 2023-02-13
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: eleutherai/pythia-12b-v0
+    display_name: Pythia (12B)
+    description: Pythia (12B parameters). The Pythia project combines interpretability analysis and scaling laws to understand how knowledge develops and evolves during training in autoregressive transformers.
+    creator_organization_name: EleutherAI
+    access: open
+    num_parameters: 11327027200
+    release_date: 2023-02-13
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  # EPFL LLM
+
+  - name: epfl-llm/meditron-7b
+    display_name: Meditron (7B)
+    description: Meditron-7B is a 7 billion parameter model adapted to the medical domain from Llama-2-7B through continued pretraining on a comprehensively curated medical corpus.
+    creator_organization_name: EPFL LLM
+    access: open
+    num_parameters: 7000000000
+    release_date: 2023-11-27
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  # Google
+  - name: google/t5-11b
+    display_name: T5 (11B)
+    description: T5 (11B parameters) is an encoder-decoder model trained on a multi-task mixture, where each task is converted into a text-to-text format ([paper](https://arxiv.org/pdf/1910.10683.pdf)).
+    creator_organization_name: Google
+    access: open
+    num_parameters: 11000000000
+    release_date: 2019-10-23
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, NO_NEWLINES_TAG]
+
+  - name: google/ul2
+    display_name: UL2 (20B)
+    description: UL2 (20B parameters) is an encoder-decoder model trained on the C4 corpus. It's similar to T5 but trained with a different objective and slightly different scaling knobs ([paper](https://arxiv.org/pdf/2205.05131.pdf)).
+    creator_organization_name: Google
+    access: open
+    num_parameters: 20000000000
+    release_date: 2022-05-10
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, NO_NEWLINES_TAG, NLG_PREFIX_TAG]
+
+  - name: google/flan-t5-xxl
+    display_name: Flan-T5 (11B)
+    description: Flan-T5 (11B parameters) is T5 fine-tuned on 1.8K tasks ([paper](https://arxiv.org/pdf/2210.11416.pdf)).
+    creator_organization_name: Google
+    access: open
+    num_parameters: 11000000000
+    release_date: 2022-12-06 # Paper date
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, NO_NEWLINES_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: google/palm # NOT SUPPORTED
+    display_name: PaLM (540B)
+    description: Pathways Language Model (540B parameters) is trained using 6144 TPU v4 chips ([paper](https://arxiv.org/pdf/2204.02311.pdf)).
+    creator_organization_name: Google
+    access: closed
+    num_parameters: 540000000000
+    release_date: 2023-03-01 # was first announced on 2022-04 but remained private.
+    tags: [UNSUPPORTED_MODEL_TAG]
+
+    # Note: This is aliased to a snapshot of gemini-pro. When possible, please use a versioned snapshot instead.
+  - name: google/gemini-pro
+    display_name: Gemini Pro
+    description: Gemini Pro is a multimodal model able to reason across text, images, video, audio and code. ([paper](https://arxiv.org/abs/2312.11805))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2023-12-13
+    tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: google/gemini-1.0-pro-001
+    display_name: Gemini 1.0 Pro (001)
+    description: Gemini 1.0 Pro is a multimodal model able to reason across text, images, video, audio and code. ([paper](https://arxiv.org/abs/2312.11805))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2023-12-13
+    tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: google/gemini-1.0-pro-002
+    display_name: Gemini 1.0 Pro (002)
+    description: Gemini 1.0 Pro is a multimodal model able to reason across text, images, video, audio and code. ([paper](https://arxiv.org/abs/2312.11805))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2024-04-09
+    tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+    # Note: This is aliased to a snapshot of gemini-pro-vision. When possible, please use a versioned snapshot instead.
+  - name: google/gemini-pro-vision
+    display_name: Gemini Pro Vision
+    description: Gemini Pro Vision is a multimodal model able to reason across text, images, video, audio and code. ([paper](https://arxiv.org/abs/2312.11805))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2023-12-13
+    tags: [VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG]
+
+  - name: google/gemini-1.0-pro-vision-001
+    display_name: Gemini 1.0 Pro Vision
+    description: Gemini 1.0 Pro Vision is a multimodal model able to reason across text, images, video, audio and code. ([paper](https://arxiv.org/abs/2312.11805))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2023-12-13
+    tags: [VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, GOOGLE_GEMINI_PRO_VISION_V1_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: google/gemini-1.5-pro-001
+    display_name: Gemini 1.5 Pro (001)
+    description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2024-05-24
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: google/gemini-1.5-flash-001
+    display_name: Gemini 1.5 Flash (001)
+    description: Gemini 1.5 Flash is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2024-05-24
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: google/gemini-1.5-pro-preview-0409
+    display_name: Gemini 1.5 Pro (0409 preview)
+    description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2024-04-10
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: google/gemini-1.5-pro-preview-0514
+    display_name: Gemini 1.5 Pro (0514 preview)
+    description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2024-05-14
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: google/gemini-1.5-flash-preview-0514
+    display_name: Gemini 1.5 Flash (0514 preview)
+    description: Gemini 1.5 Flash is a smaller Gemini model. It has a 1 million token context window and allows interleaving text, images, audio and video as inputs. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([blog](https://blog.google/technology/developers/gemini-gemma-developer-updates-may-2024/))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2024-05-14  
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: google/gemini-1.5-pro-001-safety-default
+    display_name: Gemini 1.5 Pro (001, default safety)
+    description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and uses default safety settings. ([paper](https://arxiv.org/abs/2403.05530))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2024-05-24
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: google/gemini-1.5-pro-001-safety-block-none
+    display_name: Gemini 1.5 Pro (001, BLOCK_NONE safety)
+    description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2024-05-24
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: google/gemini-1.5-flash-001-safety-default
+    display_name: Gemini 1.5 Flash (001, default safety)
+    description: Gemini 1.5 Flash is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and uses default safety settings. ([paper](https://arxiv.org/abs/2403.05530))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2024-05-24
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: google/gemini-1.5-flash-001-safety-block-none
+    display_name: Gemini 1.5 Flash (001, BLOCK_NONE safety)
+    description: Gemini 1.5 Flash is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2024-05-24
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: google/gemini-1.5-pro-002
+    display_name: Gemini 1.5 Pro (002)
+    description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2024-09-24
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: google/gemini-1.5-flash-002
+    display_name: Gemini 1.5 Flash (002)
+    description: Gemini 1.5 Flash is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2024-09-24
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: google/gemini-2.0-flash-exp
+    display_name: Gemini 2.0 Flash (Experimental)
+    description: Gemini 2.0 Flash (Experimental) is a Gemini model that supports multimodal inputs like images, video and audio, as well as multimodal output like natively generated images mixed with text and steerable text-to-speech (TTS) multilingual audio. ([blog](https://blog.google/technology/google-deepmind/google-gemini-ai-update-december-2024/#gemini-2-0-flash))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2024-12-11
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: google/gemini-1.5-flash-8b-001
+    display_name: Gemini 1.5 Flash 8B
+    description: Gemini 1.5 Flash-8B is a small model designed for lower intelligence tasks. ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2024-10-01
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: google/gemini-2.0-flash-001
+    display_name: Gemini 2.0 Flash
+    description: Gemini 2.0 Flash ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2025-02-01
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: google/gemini-2.0-flash-lite-preview-02-05
+    display_name: Gemini 2.0 Flash Lite (02-05 preview)
+    description: Gemini 2.0 Flash Lite (02-05 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2025-02-05
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: google/gemini-2.0-flash-lite-001
+    display_name: Gemini 2.0 Flash Lite
+    description: Gemini 2.0 Flash Lite ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2025-03-25
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: google/gemini-2.0-flash-thinking-exp-01-21
+    display_name: Gemini 2.0 Flash Thinking (01-21 preview)
+    description: Gemini 2.0 Flash Thinking (01-21 preview) ([documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/thinking))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2025-01-21
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: google/gemini-2.0-pro-exp-02-05
+    display_name: Gemini 2.0 Pro (02-05 preview)
+    description: Gemini 2.0 Pro (02-05 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2025-02-05
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: google/gemini-2.5-flash-lite-preview-06-17
+    display_name: Gemini 2.5 Flash-Lite (06-17 preview)
+    description: Gemini 2.5 Flash-Lite (06-17 preview) ([blog](https://blog.google/products/gemini/gemini-2-5-model-family-expands/))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2025-06-17
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: google/gemini-2.5-flash-lite
+    display_name: Gemini 2.5 Flash-Lite
+    description: Gemini 2.5 Flash-Lite ([blog](https://blog.google/products/gemini/gemini-2-5-model-family-expands/))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2025-07-22
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: google/gemini-2.5-flash-preview-04-17
+    display_name: Gemini 2.5 Flash (04-17 preview)
+    description: Gemini 2.5 Flash (04-17 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2025-04-17
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: google/gemini-2.5-flash-preview-05-20
+    display_name: Gemini 2.5 Flash (05-20 preview)
+    description: Gemini 2.5 Flash (05-20 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2025-04-17
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: google/gemini-2.5-flash
+    display_name: Gemini 2.5 Flash
+    description: Gemini 2.5 Flash ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2025-06-17
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: google/gemini-2.5-pro-exp-03-25
+    display_name: Gemini 2.5 Pro (03-25 experimental)
+    description: Gemini 2.5 Pro (03-25 experimental) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2025-03-25
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: google/gemini-2.5-pro-preview-03-25
+    display_name: Gemini 2.5 Pro (03-25 preview)
+    description: Gemini 2.5 Pro (03-25 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2025-04-09  # source: https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-pro
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: google/gemini-2.5-pro-preview-05-06
+    display_name: Gemini 2.5 Pro (05-06 preview)
+    description: Gemini 2.5 Pro (05-06 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2025-05-06  # source: https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-pro
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: google/gemini-2.5-pro
+    display_name: Gemini 2.5 Pro
+    description: Gemini 2.5 Pro ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2025-06-17
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: google/gemma-2b
+    display_name: Gemma (2B)
+    description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/gemma-open-models/))
+    creator_organization_name: Google
+    access: open
+    release_date: 2024-02-21
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: google/gemma-2b-it
+    display_name: Gemma Instruct (2B)
+    description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/gemma-open-models/))
+    creator_organization_name: Google
+    access: open
+    release_date: 2024-02-21
+    tags: [TEXT_MODEL_TAG, GOOGLE_GEMMA_INSTRUCT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: google/gemma-7b
+    display_name: Gemma (7B)
+    description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/gemma-open-models/))
+    creator_organization_name: Google
+    access: open
+    release_date: 2024-02-21
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: google/gemma-7b-it
+    display_name: Gemma Instruct (7B)
+    description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/gemma-open-models/))
+    creator_organization_name: Google
+    access: open
+    release_date: 2024-02-21
+    tags: [TEXT_MODEL_TAG, GOOGLE_GEMMA_INSTRUCT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: google/gemma-2-9b
+    display_name: Gemma 2 (9B)
+    description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/google-gemma-2/))
+    creator_organization_name: Google
+    access: open
+    release_date: 2024-06-27
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: google/gemma-2-9b-it
+    display_name: Gemma 2 Instruct (9B)
+    description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/google-gemma-2/))
+    creator_organization_name: Google
+    access: open
+    release_date: 2024-06-27
+    tags: [TEXT_MODEL_TAG, GOOGLE_GEMMA_INSTRUCT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: google/gemma-2-27b
+    display_name: Gemma 2 (27B)
+    description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/google-gemma-2/))
+    creator_organization_name: Google
+    access: open
+    release_date: 2024-06-27
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: google/gemma-2-27b-it
+    display_name: Gemma 2 Instruct (27B)
+    description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/google-gemma-2/))
+    creator_organization_name: Google
+    access: open
+    release_date: 2024-06-27
+    tags: [TEXT_MODEL_TAG, GOOGLE_GEMMA_INSTRUCT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: google/paligemma-3b-mix-224
+    display_name: PaliGemma (3B) Mix 224
+    description: PaliGemma is a versatile and lightweight vision-language model (VLM) inspired by PaLI-3 and based on open components such as the SigLIP vision model and the Gemma language model. Pre-trained with 224x224 input images and 128 token input/output text sequences. Finetuned on a mixture of downstream academic datasets. ([blog](https://developers.googleblog.com/en/gemma-family-and-toolkit-expansion-io-2024/))
+    creator_organization_name: Google
+    access: open
+    release_date: 2024-05-12
+    tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: google/paligemma-3b-mix-448
+    display_name: PaliGemma (3B) Mix 448
+    description: PaliGemma is a versatile and lightweight vision-language model (VLM) inspired by PaLI-3 and based on open components such as the SigLIP vision model and the Gemma language model. Pre-trained with 448x448 input images and 512 token input/output text sequences. Finetuned on a mixture of downstream academic datasets. ([blog](https://developers.googleblog.com/en/gemma-family-and-toolkit-expansion-io-2024/))
+    creator_organization_name: Google
+    access: open
+    release_date: 2024-05-12
+    tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: google/text-bison@001
+    display_name: PaLM-2 (Bison)
+    description: The best value PaLM model. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2023-06-07 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/text#model_versions
+    tags: [TEXT_MODEL_TAG, GOOGLE_PALM_2_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: google/text-bison@002
+    display_name: PaLM-2 (Bison)
+    description: The best value PaLM model. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2023-06-07 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/text#model_versions
+    tags: [TEXT_MODEL_TAG, GOOGLE_PALM_2_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: google/text-bison-32k
+    display_name: PaLM-2 (Bison)
+    description: The best value PaLM model with a 32K context. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2023-06-07 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/text#model_versions
+    tags: [TEXT_MODEL_TAG, GOOGLE_PALM_2_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: google/text-unicorn@001
+    display_name: PaLM-2 (Unicorn)
+    description: The largest model in PaLM family. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2023-11-30 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/text#model_versions
+    tags: [TEXT_MODEL_TAG, GOOGLE_PALM_2_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: google/code-bison@001
+    display_name: Codey PaLM-2 (Bison)
+    description: A model fine-tuned to generate code based on a natural language description of the desired code. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2023-06-29 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/code-generation#model_versions
+    tags: [CODE_MODEL_TAG]
+
+  - name: google/code-bison@002
+    display_name: Codey PaLM-2 (Bison)
+    description: A model fine-tuned to generate code based on a natural language description of the desired code. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2023-06-29 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/code-generation#model_versions
+    tags: [CODE_MODEL_TAG]
+
+  - name: google/code-bison-32k
+    display_name: Codey PaLM-2 (Bison)
+    description: Codey with a 32K context. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2023-06-29 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/code-generation#model_versions
+    tags: [CODE_MODEL_TAG]
+
+  - name: google/medlm-medium
+    display_name: MedLM (Medium)
+    description: MedLM is a family of foundation models fine-tuned for the healthcare industry based on Google Research's medically-tuned large language model, Med-PaLM 2. ([documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/medlm/overview))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2023-12-13
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: google/medlm-large
+    display_name: MedLM (Large)
+    description: MedLM is a family of foundation models fine-tuned for the healthcare industry based on Google Research's medically-tuned large language model, Med-PaLM 2. ([documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/medlm/overview))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2023-12-13
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  # HuggingFace
+  - name: HuggingFaceM4/idefics2-8b
+    display_name: IDEFICS 2 (8B)
+    description: IDEFICS 2 (8B parameters) is an open multimodal model that accepts arbitrary sequences of image and text inputs and produces text outputs. ([blog](https://huggingface.co/blog/idefics2)).
+    creator_organization_name: HuggingFace
+    access: open
+    num_parameters: 8000000000
+    release_date: 2024-04-15
+    tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
+
+  - name: HuggingFaceM4/idefics-9b
+    display_name: IDEFICS (9B)
+    description: IDEFICS (9B parameters) is an open-source model based on DeepMind's Flamingo ([blog](https://huggingface.co/blog/idefics)).
+    creator_organization_name: HuggingFace
+    access: open
+    num_parameters: 9000000000
+    release_date: 2023-08-22
+    tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
+
+  - name: HuggingFaceM4/idefics-9b-instruct
+    display_name: IDEFICS-instruct (9B)
+    description: IDEFICS-instruct (9B parameters) is the instruction-tuned version of IDEFICS 9B ([blog](https://huggingface.co/blog/idefics)).
+    creator_organization_name: HuggingFace
+    access: open
+    num_parameters: 9000000000
+    release_date: 2023-08-22
+    tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, IDEFICS_INSTRUCT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
+
+  - name: HuggingFaceM4/idefics-80b
+    display_name: IDEFICS (80B)
+    description: IDEFICS (80B parameters) is an open-source model based on DeepMind's Flamingo ([blog](https://huggingface.co/blog/idefics)).
+    creator_organization_name: HuggingFace
+    access: open
+    num_parameters: 80000000000
+    release_date: 2023-08-22
+    tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
+
+  - name: HuggingFaceM4/idefics-80b-instruct
+    display_name: IDEFICS-instruct (80B)
+    description: IDEFICS-instruct (80B parameters) is the instruction-tuned version of IDEFICS 80B ([blog](https://huggingface.co/blog/idefics)).
+    creator_organization_name: HuggingFace
+    access: open
+    num_parameters: 80000000000
+    release_date: 2023-08-22
+    tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, IDEFICS_INSTRUCT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
+
+  - name: huggingface/smollm2-135m
+    display_name: SmolLM2 (135M)
+    description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
+    creator_organization_name: HuggingFace
+    access: open
+    num_parameters: 135000000
+    release_date: 2024-10-31
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: huggingface/smollm2-360m
+    display_name: SmolLM2 (360M)
+    description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
+    creator_organization_name: HuggingFace
+    access: open
+    num_parameters: 362000000
+    release_date: 2024-10-31
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: huggingface/smollm2-1.7b
+    display_name: SmolLM2 (1.7B)
+    description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
+    creator_organization_name: HuggingFace
+    access: open
+    num_parameters: 1710000000
+    release_date: 2024-10-31
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: huggingface/smollm2-135m-instruct
+    display_name: SmolLM2 Instruct (135M)
+    description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
+    creator_organization_name: HuggingFace
+    access: open
+    num_parameters: 135000000
+    release_date: 2024-10-31
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: huggingface/smollm2-360m-instruct
+    display_name: SmolLM2 Instruct (360M)
+    description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
+    creator_organization_name: HuggingFace
+    access: open
+    num_parameters: 362000000
+    release_date: 2024-10-31
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: huggingface/smollm2-1.7b-instruct
+    display_name: SmolLM2 Instruct (1.7B)
+    description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
+    creator_organization_name: HuggingFace
+    access: open
+    num_parameters: 1710000000
+    release_date: 2024-10-31
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  ## Text-to-Image Diffusion Models
+  - name: huggingface/dreamlike-diffusion-v1-0
+    display_name: Dreamlike Diffusion v1.0 (1B)
+    description: Dreamlike Diffusion v1.0 is Stable Diffusion v1.5 fine tuned on high quality art ([HuggingFace model card](https://huggingface.co/dreamlike-art/dreamlike-diffusion-1.0))
+    creator_organization_name: dreamlike.art
+    access: open
+    num_parameters: 1000000000
+    release_date: 2023-03-08
+    tags: [TEXT_TO_IMAGE_MODEL_TAG]
+
+  - name: huggingface/dreamlike-photoreal-v2-0
+    display_name: Dreamlike Photoreal v2.0 (1B)
+    description: Dreamlike Photoreal v2.0 is a photorealistic model based on Stable Diffusion v1.5 ([HuggingFace model card](https://huggingface.co/dreamlike-art/dreamlike-photoreal-2.0))
+    creator_organization_name: dreamlike.art
+    access: open
+    num_parameters: 1000000000
+    release_date: 2022-11-23
+    tags: [TEXT_TO_IMAGE_MODEL_TAG]
+
+  - name: huggingface/openjourney-v1-0
+    display_name: Openjourney (1B)
+    description: Openjourney is an open source Stable Diffusion fine tuned model on Midjourney images ([HuggingFace model card](https://huggingface.co/prompthero/openjourney))
+    creator_organization_name: PromptHero
+    access: open
+    num_parameters: 1000000000
+    release_date: 2022-11-01  # TODO: get the exact date
+    tags: [TEXT_TO_IMAGE_MODEL_TAG]
+
+  - name: huggingface/openjourney-v2-0
+    display_name: Openjourney v2 (1B)
+    description: Openjourney v2 is an open source Stable Diffusion fine tuned model on Midjourney images. Openjourney v2 is now referred to as Openjourney v4 in Hugging Face ([HuggingFace model card](https://huggingface.co/prompthero/openjourney-v4)).
+    creator_organization_name: PromptHero
+    access: open
+    num_parameters: 1000000000
+    release_date: 2023-01-01  # TODO: get the exact date
+    tags: [TEXT_TO_IMAGE_MODEL_TAG]
+
+  - name: huggingface/promptist-stable-diffusion-v1-4
+    display_name: Promptist + Stable Diffusion v1.4 (1B)
+    description: Trained with human preferences, Promptist optimizes user input into model-preferred prompts for Stable Diffusion v1.4 ([paper](https://arxiv.org/abs/2212.09611))
+    creator_organization_name: Microsoft
+    access: open
+    num_parameters: 1000000000
+    release_date: 2022-12-19
+    tags: [TEXT_TO_IMAGE_MODEL_TAG]
+
+  - name: huggingface/redshift-diffusion
+    display_name: Redshift Diffusion (1B)
+    description: Redshift Diffusion is an open source Stable Diffusion model fine tuned on high resolution 3D artworks ([HuggingFace model card](https://huggingface.co/nitrosocke/redshift-diffusion))
+    creator_organization_name: nitrosocke
+    access: open
+    num_parameters: 1000000000
+    release_date: 2022-11-29
+    tags: [TEXT_TO_IMAGE_MODEL_TAG]
+
+  - name: huggingface/stable-diffusion-safe-weak
+    display_name: Safe Stable Diffusion weak (1B)
+    description: Safe Stable Diffusion is an extension to the Stable Diffusion that drastically reduces inappropriate content ([paper](https://arxiv.org/abs/2211.05105)).
+    creator_organization_name: TU Darmstadt
+    access: open
+    num_parameters: 1000000000
+    release_date: 2022-11-09
+    tags: [TEXT_TO_IMAGE_MODEL_TAG]
+
+  - name: huggingface/stable-diffusion-safe-medium
+    display_name: Safe Stable Diffusion medium (1B)
+    description: Safe Stable Diffusion is an extension to the Stable Diffusion that drastically reduces inappropriate content ([paper](https://arxiv.org/abs/2211.05105))
+    creator_organization_name: TU Darmstadt
+    access: open
+    num_parameters: 1000000000
+    release_date: 2022-11-09
+    tags: [TEXT_TO_IMAGE_MODEL_TAG]
+
+  - name: huggingface/stable-diffusion-safe-strong
+    display_name: Safe Stable Diffusion strong (1B)
+    description: Safe Stable Diffusion is an extension to the Stable Diffusion that drastically reduces inappropriate content ([paper](https://arxiv.org/abs/2211.05105))
+    creator_organization_name: TU Darmstadt
+    access: open
+    num_parameters: 1000000000
+    release_date: 2022-11-09
+    tags: [TEXT_TO_IMAGE_MODEL_TAG]
+
+  - name: huggingface/stable-diffusion-safe-max
+    display_name: Safe Stable Diffusion max (1B)
+    description: Safe Stable Diffusion is an extension to the Stable Diffusion that drastically reduces inappropriate content ([paper](https://arxiv.org/abs/2211.05105))
+    creator_organization_name: TU Darmstadt
+    access: open
+    num_parameters: 1000000000
+    release_date: 2022-11-09
+    tags: [TEXT_TO_IMAGE_MODEL_TAG]
+
+  - name: huggingface/stable-diffusion-v1-4
+    display_name: Stable Diffusion v1.4 (1B)
+    description: Stable Diffusion v1.4 is a latent text-to-image diffusion model capable of generating photorealistic images given any text input ([paper](https://arxiv.org/abs/2112.10752))
+    creator_organization_name: Ludwig Maximilian University of Munich CompVis
+    access: open
+    num_parameters: 1000000000
+    release_date: 2022-08-01
+    tags: [TEXT_TO_IMAGE_MODEL_TAG]
+
+  - name: huggingface/stable-diffusion-v1-5
+    display_name: Stable Diffusion v1.5 (1B)
+    description: The Stable-Diffusion-v1-5 checkpoint was initialized with the weights of the Stable-Diffusion-v1-2 checkpoint and subsequently fine-tuned on 595k steps at resolution 512x512 on laion-aesthetics v2 5+ and 10% dropping of the text-conditioning to improve classifier-free guidance sampling ([paper](https://arxiv.org/abs/2112.10752))
+    creator_organization_name: Runway
+    access: open
+    num_parameters: 1000000000
+    release_date: 2022-10-20
+    tags: [TEXT_TO_IMAGE_MODEL_TAG]
+
+  - name: huggingface/stable-diffusion-v2-base
+    display_name: Stable Diffusion v2 base (1B)
+    description: The model is trained from scratch 550k steps at resolution 256x256 on a subset of LAION-5B filtered for explicit pornographic material, using the LAION-NSFW classifier with punsafe=0.1 and an aesthetic score greater than 4.5. Then it is further trained for 850k steps at resolution 512x512 on the same dataset on images with resolution greater than 512x512 ([paper](https://arxiv.org/abs/2112.10752))
+    creator_organization_name: Stability AI
+    access: open
+    num_parameters: 1000000000
+    release_date: 2022-11-23
+    tags: [TEXT_TO_IMAGE_MODEL_TAG]
+
+  - name: huggingface/stable-diffusion-v2-1-base
+    display_name: Stable Diffusion v2.1 base (1B)
+    description: This stable-diffusion-2-1-base model fine-tunes stable-diffusion-2-base with 220k extra steps taken, with punsafe=0.98 on the same dataset ([paper](https://arxiv.org/abs/2112.10752))
+    creator_organization_name: Stability AI
+    access: open
+    num_parameters: 1000000000
+    release_date: 2022-11-23
+    tags: [TEXT_TO_IMAGE_MODEL_TAG]
+
+  - name: huggingface/vintedois-diffusion-v0-1
+    display_name: Vintedois (22h) Diffusion model v0.1 (1B)
+    description: Vintedois (22h) Diffusion model v0.1 is Stable Diffusion v1.5 that was finetuned on a large amount of high quality images with simple prompts to generate beautiful images without a lot of prompt engineering ([HuggingFace model card](https://huggingface.co/22h/vintedois-diffusion-v0-1))
+    creator_organization_name: 22 Hours
+    access: open
+    num_parameters: 1000000000
+    release_date: 2022-12-27
+    tags: [TEXT_TO_IMAGE_MODEL_TAG]
+
+  - name: segmind/Segmind-Vega
+    display_name: Segmind Stable Diffusion (0.74B)
+    description: The Segmind-Vega Model is a distilled version of the Stable Diffusion XL (SDXL), offering a remarkable 70% reduction in size and an impressive 100% speedup while retaining high-quality text-to-image generation capabilities. Trained on diverse datasets, including Grit and Midjourney scrape data, it excels at creating a wide range of visual content based on textual prompts. ([HuggingFace model card](https://huggingface.co/segmind/Segmind-Vega))
+    creator_organization_name: Segmind
+    access: open
+    num_parameters: 740000000
+    release_date: 2023-12-01
+    tags: [TEXT_TO_IMAGE_MODEL_TAG]
+
+  - name: segmind/SSD-1B
+    display_name: Segmind Stable Diffusion (1B)
+    description: The Segmind Stable Diffusion Model (SSD-1B) is a distilled 50% smaller version of the Stable Diffusion XL (SDXL), offering a 60% speedup while maintaining high-quality text-to-image generation capabilities. It has been trained on diverse datasets, including Grit and Midjourney scrape data, to enhance its ability to create a wide range of visual content based on textual prompts. ([HuggingFace model card](https://huggingface.co/segmind/SSD-1B))
+    creator_organization_name: Segmind
+    access: open
+    num_parameters: 1000000000
+    release_date: 2023-10-20
+    tags: [TEXT_TO_IMAGE_MODEL_TAG]
+
+  - name: stabilityai/stable-diffusion-xl-base-1.0
+    display_name: Stable Diffusion XL
+    description: Stable Diffusion XL (SDXL) consists of an ensemble of experts pipeline for latent diffusion. ([HuggingFace model card](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0))
+    creator_organization_name: Stability AI
+    access: open
+    num_parameters: 6600000000
+    release_date: 2023-07-26
+    tags: [TEXT_TO_IMAGE_MODEL_TAG]
+
+  # Kakao
+  - name: kakaobrain/mindall-e
+    display_name: minDALL-E (1.3B)
+    description: minDALL-E, named after minGPT, is an autoregressive text-to-image generation model trained on 14 million image-text pairs ([code](https://github.com/kakaobrain/minDALL-E))
+    creator_organization_name: Kakao
+    access: open
+    num_parameters: 1300000000
+    release_date: 2021-12-13
+    tags: [TEXT_TO_IMAGE_MODEL_TAG]
+
+  # Lexica
+  - name: lexica/search-stable-diffusion-1.5
+    display_name: Lexica Search with Stable Diffusion v1.5 (1B)
+    description: Retrieves Stable Diffusion v1.5 images Lexica users generated ([docs](https://lexica.art/docs)).
+    creator_organization_name: Lexica
+    access: open
+    release_date: 2023-01-01
+    tags: [TEXT_TO_IMAGE_MODEL_TAG]
+
+
+  # Lightning AI
+  - name: lightningai/lit-gpt
+    display_name: Lit-GPT
+    description: Lit-GPT is an optimized collection of open-source LLMs for finetuning and inference. It supports – Falcon, Llama 2, Vicuna, LongChat, and other top-performing open-source large language models.
+    creator_organization_name: Lightning AI
+    access: open
+    release_date: 2023-04-04
+    tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+
+
+  # LMSYS
+  - name: lmsys/vicuna-7b-v1.3
+    display_name: Vicuna v1.3 (7B)
+    description: Vicuna v1.3 (7B) is an open-source chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT.
+    creator_organization_name: LMSYS
+    access: open
+    num_parameters: 7000000000
+    release_date: 2023-06-22
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: lmsys/vicuna-13b-v1.3
+    display_name: Vicuna v1.3 (13B)
+    description: Vicuna v1.3 (13B) is an open-source chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT.
+    creator_organization_name: LMSYS
+    access: open
+    num_parameters: 13000000000
+    release_date: 2023-06-22
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  # Marin Community
+  - name: marin-community/marin-8b-instruct
+    display_name: Marin 8B Instruct
+    description: Marin 8B Instruct is an open-source chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT.
+    creator_organization_name: Marin Community
+    access: open
+    num_parameters: 8030000000
+    release_date: 2025-05-15
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  # Meta
+  - name: meta/opt-iml-175b # NOT SUPPORTED
+    display_name: OPT-IML (175B)
+    description: OPT-IML (175B parameters) is a suite of decoder-only transformer LMs that are multi-task fine-tuned on 2000 datasets ([paper](https://arxiv.org/pdf/2212.12017.pdf)).
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 175000000000
+    release_date: 2022-12-22
+    tags: [UNSUPPORTED_MODEL_TAG]
+
+  - name: meta/opt-iml-30b # NOT SUPPORTED
+    display_name: OPT-IML (30B)
+    description: OPT-IML (30B parameters) is a suite of decoder-only transformer LMs that are multi-task fine-tuned on 2000 datasets ([paper](https://arxiv.org/pdf/2212.12017.pdf)).
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 30000000000
+    release_date: 2022-12-22
+    tags: [UNSUPPORTED_MODEL_TAG]
+
+  - name: meta/opt-175b
+    display_name: OPT (175B)
+    description: Open Pre-trained Transformers (175B parameters) is a suite of decoder-only pre-trained transformers that are fully and responsibly shared with interested researchers ([paper](https://arxiv.org/pdf/2205.01068.pdf)).
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 175000000000
+    release_date: 2022-05-02
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG]
+
+  - name: meta/opt-66b
+    display_name: OPT (66B)
+    description: Open Pre-trained Transformers (66B parameters) is a suite of decoder-only pre-trained transformers that are fully and responsibly shared with interested researchers ([paper](https://arxiv.org/pdf/2205.01068.pdf)).
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 66000000000
+    release_date: 2022-05-02
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG]
+
+  - name: meta/opt-6.7b
+    display_name: OPT (6.7B)
+    description: Open Pre-trained Transformers (6.7B parameters) is a suite of decoder-only pre-trained transformers that are fully and responsibly shared with interested researchers ([paper](https://arxiv.org/pdf/2205.01068.pdf)).
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 6700000000
+    release_date: 2022-05-02
+    # TODO: The BUGGY_TEMP_0_TAG is a deployment related tag (Together).
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, BUGGY_TEMP_0_TAG]
+
+  - name: meta/opt-1.3b
+    display_name: OPT (1.3B)
+    description: Open Pre-trained Transformers (1.3B parameters) is a suite of decoder-only pre-trained transformers that are fully and responsibly shared with interested researchers ([paper](https://arxiv.org/pdf/2205.01068.pdf)).
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 1300000000
+    release_date: 2022-05-02
+    # TODO: The BUGGY_TEMP_0_TAG is a deployment related tag (Together).
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, BUGGY_TEMP_0_TAG]
+
+  - name: meta/galactica-120b # NOT SUPPORTED
+    display_name: Galactica (120B)
+    description: Galactica (120B parameters) is trained on 48 million papers, textbooks, lectures notes, compounds and proteins, scientific websites, etc. ([paper](https://galactica.org/static/paper.pdf)).
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 120000000000
+    release_date: 2022-11-15
+    tags: [UNSUPPORTED_MODEL_TAG]
+
+  - name: meta/galactica-30b # NOT SUPPORTED
+    display_name: Galactica (30B)
+    description: Galactica (30B parameters) is trained on 48 million papers, textbooks, lectures notes, compounds and proteins, scientific websites, etc. ([paper](https://galactica.org/static/paper.pdf)).
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 30000000000
+    release_date: 2022-11-15
+    tags: [UNSUPPORTED_MODEL_TAG]
+
+  - name: meta/llama-7b
+    display_name: LLaMA (7B)
+    description: LLaMA is a collection of foundation language models ranging from 7B to 65B parameters.
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 7000000000
+    release_date: 2023-02-24
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: meta/llama-13b
+    display_name: LLaMA (13B)
+    description: LLaMA is a collection of foundation language models ranging from 7B to 65B parameters.
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 13000000000
+    release_date: 2023-02-24
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: meta/llama-30b
+    display_name: LLaMA (30B)
+    description: LLaMA is a collection of foundation language models ranging from 7B to 65B parameters.
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 30000000000
+    release_date: 2023-02-24
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: meta/llama-65b
+    display_name: LLaMA (65B)
+    description: LLaMA is a collection of foundation language models ranging from 7B to 65B parameters.
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 65000000000
+    release_date: 2023-02-24
+    # TODO(#1828): Upgrade to FULL_FUNCTIONALITY_TEXT_MODEL_TAG
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: meta/llama-2-7b
+    display_name: Llama 2 (7B)
+    description: Llama 2 pretrained models are trained on 2 trillion tokens, and have double the context length than Llama 1.
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 7000000000
+    release_date: 2023-07-18
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: meta/llama-2-13b
+    display_name: Llama 2 (13B)
+    description: Llama 2 pretrained models are trained on 2 trillion tokens, and have double the context length than Llama 1.
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 13000000000
+    release_date: 2023-07-18
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: meta/llama-2-70b
+    display_name: Llama 2 (70B)
+    description: Llama 2 pretrained models are trained on 2 trillion tokens, and have double the context length than Llama 1.
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 70000000000
+    release_date: 2023-07-18
+    # TODO(#1828): Upgrade to FULL_FUNCTIONALITY_TEXT_MODEL_TAG
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: meta/llama-3-8b
+    display_name: Llama 3 (8B)
+    description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 8000000000
+    release_date: 2024-04-18
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: meta/llama-3-8b-instruct-turbo
+    display_name: Llama 3 Instruct Turbo (8B)
+    description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/) Turbo is Together's implementation, providing fast FP8 performance while maintaining quality, closely matching FP16 reference models. ([blog](https://www.together.ai/blog/together-inference-engine-2))
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 8000000000
+    release_date: 2024-07-18
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: meta/llama-3-8b-instruct-lite
+    display_name: Llama 3 Instruct Lite (8B)
+    description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/) Lite is Together's implementation, it leverages a number of optimizations including INT4 quantization, provides the most cost-efficient and scalable Llama 3 models available anywhere, while maintaining excellent quality relative to full precision reference implementations ([blog](https://www.together.ai/blog/together-inference-engine-2))
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 8000000000
+    release_date: 2024-07-18
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+    
+  - name: meta/llama-3-70b
+    display_name: Llama 3 (70B)
+    description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 70000000000
+    release_date: 2024-04-18
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+    
+  - name: meta/llama-3-70b-instruct-turbo
+    display_name: Llama 3 Instruct Turbo (70B)
+    description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/) Turbo is Together's implementation, providing fast FP8 performance while maintaining quality, closely matching FP16 reference models. ([blog](https://www.together.ai/blog/together-inference-engine-2))
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 70000000000
+    release_date: 2024-07-18
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+    
+  - name: meta/llama-3-70b-instruct-lite
+    display_name: Llama 3 Instruct Lite (70B)
+    description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/) Lite is Together's implementation, it leverages a number of optimizations including INT4 quantization, provides the most cost-efficient and scalable Llama 3 models available anywhere, while maintaining excellent quality relative to full precision reference implementations ([blog](https://www.together.ai/blog/together-inference-engine-2))
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 70000000000
+    release_date: 2024-07-18
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: meta/llama-3.1-8b-instruct
+    display_name: Llama 3.1 Instruct (8B)
+    description: Llama 3.1 (8B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/))
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 8000000000
+    release_date: 2024-07-23
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: meta/llama-3.1-70b-instruct
+    display_name: Llama 3.1 Instruct (70B)
+    description: Llama 3.1 (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/))
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 70000000000
+    release_date: 2024-07-23
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: meta/llama-3.1-405b-instruct
+    display_name: Llama 3.1 Instruct (405B)
+    description: Llama 3.1 (405B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/))
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 405000000000
+    release_date: 2024-07-23
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: meta/llama-3.1-8b-instruct-turbo
+    display_name: Llama 3.1 Instruct Turbo (8B)
+    description: Llama 3.1 (8B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/), [blog](https://ai.meta.com/blog/meta-llama-3-1/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 8000000000
+    release_date: 2024-07-23
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: meta/llama-3.1-70b-instruct-turbo
+    display_name: Llama 3.1 Instruct Turbo (70B)
+    description: Llama 3.1 (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/), [blog](https://ai.meta.com/blog/meta-llama-3-1/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 70000000000
+    release_date: 2024-07-23
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: meta/llama-3.1-405b-instruct-turbo
+    display_name: Llama 3.1 Instruct Turbo (405B)
+    description: Llama 3.1 (405B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/), [blog](https://ai.meta.com/blog/meta-llama-3-1/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 405000000000
+    release_date: 2024-07-23
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: meta/llama-3.2-1b-instruct
+    display_name: Llama 3.2 Instruct (1.23B)
+    description: The Meta Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned text-only generative models in 1B and 3B sizes. ([blog](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/))
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 1230000000
+    release_date: 2024-09-25
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: meta/llama-3.2-3b-instruct-turbo
+    display_name: Llama 3.2 Instruct Turbo (3B)
+    description: The Meta Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned text-only generative models in 1B and 3B sizes. ([blog](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 3210000000
+    release_date: 2024-09-25
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: meta/llama-3.2-11b-vision-instruct-turbo
+    display_name: Llama 3.2 Vision Instruct Turbo (11B)
+    description: The Llama 3.2 Vision collection of multimodal large language models (LLMs) is a collection of pretrained and instruction-tuned image reasoning generative models in 11B and 90B sizes. ([blog](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 10700000000
+    release_date: 2024-09-25
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: meta/llama-3.2-90b-vision-instruct-turbo
+    display_name: Llama 3.2 Vision Instruct Turbo (90B)
+    description: The Llama 3.2 Vision collection of multimodal large language models (LLMs) is a collection of pretrained and instruction-tuned image reasoning generative models in 11B and 90B sizes. ([blog](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 88600000000
+    release_date: 2024-09-25
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: meta/llama-3.3-70b-instruct-turbo
+    display_name: Llama 3.3 Instruct Turbo (70B)
+    description: Llama 3.3 (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 70000000000
+    release_date: 2024-12-06
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  
+  - name: meta/llama-3.3-70b-instruct
+    display_name: Llama 3.3 Instruct (70B)
+    description: Llama 3.3 (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/))
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 70000000000
+    release_date: 2024-12-06
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: meta/llama-4-scout-17b-16e-instruct
+    display_name: Llama 4 Scout (17Bx16E) Instruct
+    description: Llama 4 Scout (17Bx16E) Instruct is part of the Llama 4 collection of models are natively multimodal AI models that enable text and multimodal experiences using a mixture-of-experts architecture. ([blog](https://ai.meta.com/blog/llama-4-multimodal-intelligence/))
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 109000000000
+    release_date: 2025-04-05
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: meta/llama-4-maverick-17b-128e-instruct-fp8
+    display_name: Llama 4 Maverick (17Bx128E) Instruct FP8
+    description: Llama 4 Maverick (17Bx128E) Instruct FP8 is part of the Llama 4 collection of models are natively multimodal AI models that enable text and multimodal experiences using a mixture-of-experts architecture. ([blog](https://ai.meta.com/blog/llama-4-multimodal-intelligence/))
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 402000000000
+    release_date: 2025-04-05
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: meta/llama-3-8b-chat
+    display_name: Llama 3 Instruct (8B)
+    description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. It used SFT, rejection sampling, PPO and DPO for post-training. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 8000000000
+    release_date: 2024-04-18
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: meta/llama-3-70b-chat
+    display_name: Llama 3 Instruct (70B)
+    description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. It used SFT, rejection sampling, PPO and DPO for post-training. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 70000000000
+    release_date: 2024-04-18
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: meta/llama-guard-7b
+    display_name: Llama Guard (7B)
+    description: Llama-Guard is a 7B parameter Llama 2-based input-output safeguard model. It can be used for classifying content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM it generates text in its output that indicates whether a given prompt or response is safe/unsafe, and if unsafe based on a policy, it also lists the violating subcategories.
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 7000000000
+    release_date: 2023-12-07
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: meta/llama-guard-2-8b
+    display_name: Llama Guard 2 (8B)
+    description: Llama Guard 2 is an 8B parameter Llama 3-based LLM safeguard model. Similar to Llama Guard, it can be used for classifying content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM – it generates text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 8000000000
+    release_date: 2024-04-18
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: meta/llama-guard-3-8b
+    display_name: Llama Guard 3 (8B)
+    description: Llama Guard 3 is an 8B parameter Llama 3.1-based LLM safeguard model. Similar to Llama Guard, it can be used for classifying content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM – it generates text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 8000000000
+    release_date: 2024-07-23
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+
+  # Microsoft/NVIDIA
+  - name: microsoft/TNLGv2_530B
+    display_name: TNLG v2 (530B)
+    description: TNLG v2 (530B parameters) autoregressive language model trained on a filtered subset of the Pile and CommonCrawl ([paper](https://arxiv.org/pdf/2201.11990.pdf)).
+    creator_organization_name: Microsoft/NVIDIA
+    access: closed
+    num_parameters: 530000000000
+    release_date: 2022-01-28
+    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: microsoft/TNLGv2_7B
+    display_name: TNLG v2 (6.7B)
+    description: TNLG v2 (6.7B parameters) autoregressive language model trained on a filtered subset of the Pile and CommonCrawl ([paper](https://arxiv.org/pdf/2201.11990.pdf)).
+    creator_organization_name: Microsoft/NVIDIA
+    access: closed
+    num_parameters: 6700000000
+    release_date: 2022-01-28
+    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: microsoft/llava-1.5-7b-hf
+    display_name: LLaVA 1.5 (7B)
+    description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
+    creator_organization_name: Microsoft
+    access: open
+    num_parameters: 7000000000
+    release_date: 2023-10-05
+    tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
+
+  - name: microsoft/llava-1.5-13b-hf
+    display_name: LLaVA 1.5 (13B)
+    description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
+    creator_organization_name: Microsoft
+    access: open
+    num_parameters: 13000000000
+    release_date: 2023-10-05
+    tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
+
+  - name: uw-madison/llava-v1.6-vicuna-7b-hf
+    display_name: LLaVA 1.6 (7B)
+    description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
+    creator_organization_name: Microsoft
+    access: open
+    num_parameters: 7000000000
+    release_date: 2024-01-01
+    tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
+
+  - name: uw-madison/llava-v1.6-vicuna-13b-hf
+    display_name: LLaVA 1.6 (13B)
+    description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
+    creator_organization_name: Microsoft
+    access: open
+    num_parameters: 13000000000
+    release_date: 2024-01-01
+    tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
+
+  - name: uw-madison/llava-v1.6-mistral-7b-hf
+    display_name: LLaVA 1.6 + Mistral (7B)
+    description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
+    creator_organization_name: Microsoft
+    access: open
+    num_parameters: 7000000000
+    release_date: 2024-01-01
+    tags: [ VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG ]
+
+  - name: uw-madison/llava-v1.6-34b-hf
+    display_name: LLaVA + Nous-Hermes-2-Yi-34B (34B)
+    description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
+    creator_organization_name: Microsoft
+    access: open
+    num_parameters: 34000000000
+    release_date: 2024-01-01
+    tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
+  
+  - name: openflamingo/OpenFlamingo-9B-vitl-mpt7b
+    display_name: OpenFlamingo (9B)
+    description: OpenFlamingo is an open source implementation of DeepMind's Flamingo models. This 9B-parameter model uses a CLIP ViT-L/14 vision encoder and MPT-7B language model ([paper](https://arxiv.org/abs/2308.01390)).
+    creator_organization_name: OpenFlamingo
+    access: open
+    num_parameters: 9000000000
+    release_date: 2023-08-02
+    tags: [VISION_LANGUAGE_MODEL_TAG, OPEN_FLAMINGO_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
+
+  - name: microsoft/phi-2
+    display_name: Phi-2
+    description: Phi-2 is a Transformer with 2.7 billion parameters. It was trained using the same data sources as Phi-1.5, augmented with a new data source that consists of various NLP synthetic texts and filtered websites (for safety and educational value)
+    creator_organization_name: Microsoft
+    access: open
+    num_parameters: 13000000000
+    release_date: 2023-10-05
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: microsoft/phi-3-small-8k-instruct
+    display_name: Phi-3 (7B)
+    description: Phi-3-Small-8K-Instruct is a lightweight model trained with synthetic data and filtered publicly available website data with a focus on high-quality and reasoning dense properties. ([paper](https://arxiv.org/abs/2404.14219), [blog](https://azure.microsoft.com/en-us/blog/new-models-added-to-the-phi-3-family-available-on-microsoft-azure/))
+    creator_organization_name: Microsoft
+    access: open
+    num_parameters: 7000000000
+    release_date: 2024-05-21
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: microsoft/phi-3-medium-4k-instruct
+    display_name: Phi-3 (14B)
+    description: Phi-3-Medium-4K-Instruct is a lightweight model trained with synthetic data and filtered publicly available website data with a focus on high-quality and reasoning dense properties. ([paper](https://arxiv.org/abs/2404.14219), [blog](https://azure.microsoft.com/en-us/blog/new-models-added-to-the-phi-3-family-available-on-microsoft-azure/))
+    creator_organization_name: Microsoft
+    access: open
+    num_parameters: 14000000000
+    release_date: 2024-05-21
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  
+  - name: microsoft/phi-3.5-mini-instruct
+    display_name: Phi-3.5-mini-instruct (3.8B)
+    description: Phi-3.5-mini is a lightweight, state-of-the-art open model built upon datasets used for Phi-3 - synthetic data and filtered publicly available websites. ([paper](https://arxiv.org/abs/2404.14219), [blog](https://techcommunity.microsoft.com/blog/azure-ai-services-blog/discover-the-new-multi-lingual-high-quality-phi-3-5-slms/4225280))
+    creator_organization_name: Microsoft
+    access: open
+    num_parameters: 3800000000
+    release_date: 2024-08-22
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: microsoft/phi-3.5-moe-instruct
+    display_name: Phi-3.5 MoE
+    description: Phi-3.5 MoE is a lightweight, state-of-the-art open model built upon datasets used for Phi-3 - synthetic data and filtered publicly available documents - with a focus on very high-quality, reasoning dense data. ([paper](https://arxiv.org/abs/2404.14219), [blog](https://techcommunity.microsoft.com/blog/azure-ai-services-blog/discover-the-new-multi-lingual-high-quality-phi-3-5-slms/4225280))
+    creator_organization_name: Microsoft
+    access: open
+    num_parameters: 41900000000
+    release_date: 2024-08-22
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  # KAIST AI
+  - name: kaistai/prometheus-vision-13b-v1.0-hf
+    display_name: LLaVA + Vicuna-v1.5 (13B)
+    description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
+    creator_organization_name: KAIST AI
+    access: open
+    num_parameters: 13000000000
+    release_date: 2024-01-01
+    tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
+
+  # 01.AI
+  - name: 01-ai/yi-6b
+    display_name: Yi (6B)
+    description: The Yi models are large language models trained from scratch by developers at 01.AI.
+    creator_organization_name: 01.AI
+    access: open
+    num_parameters: 6000000000
+    release_date: 2023-11-02
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: 01-ai/yi-34b
+    display_name: Yi (34B)
+    description: The Yi models are large language models trained from scratch by developers at 01.AI.
+    creator_organization_name: 01.AI
+    access: open
+    num_parameters: 34000000000
+    release_date: 2023-11-02
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: 01-ai/yi-6b-chat
+    display_name: Yi Chat (6B)
+    description: The Yi models are large language models trained from scratch by developers at 01.AI.
+    creator_organization_name: 01.AI
+    access: open
+    num_parameters: 6000000000
+    release_date: 2023-11-23
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: 01-ai/yi-34b-chat
+    display_name: Yi Chat (34B)
+    description: The Yi models are large language models trained from scratch by developers at 01.AI.
+    creator_organization_name: 01.AI
+    access: open
+    num_parameters: 34000000000
+    release_date: 2023-11-23
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: 01-ai/yi-large
+    display_name: Yi Large
+    description: The Yi models are large language models trained from scratch by developers at 01.AI. ([tweet](https://x.com/01AI_Yi/status/1789894091620458667))
+    creator_organization_name: 01.AI
+    access: limited
+    release_date: 2024-05-12
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: 01-ai/yi-large-preview
+    display_name: Yi Large (Preview)
+    description: The Yi models are large language models trained from scratch by developers at 01.AI. ([tweet](https://x.com/01AI_Yi/status/1789894091620458667))
+    creator_organization_name: 01.AI
+    access: limited
+    release_date: 2024-05-12
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  # Allen Institute for AI
+  # OLMo Blog: https://blog.allenai.org/olmo-open-language-model-87ccfc95f580
+  - name: allenai/olmo-7b
+    display_name: OLMo (7B)
+    description: OLMo is a series of Open Language Models trained on the Dolma dataset.
+    creator_organization_name: Allen Institute for AI
+    access: open
+    num_parameters: 7000000000
+    release_date: 2024-02-01
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: allenai/olmo-7b-twin-2t
+    display_name: OLMo (7B Twin 2T)
+    description: OLMo is a series of Open Language Models trained on the Dolma dataset.
+    creator_organization_name: Allen Institute for AI
+    access: open
+    num_parameters: 7000000000
+    release_date: 2024-02-01
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: allenai/olmo-7b-instruct
+    display_name: OLMo (7B Instruct)
+    description: OLMo is a series of Open Language Models trained on the Dolma dataset. The instruct versions was trained on the Tulu SFT mixture and a cleaned version of the UltraFeedback dataset.
+    creator_organization_name: Allen Institute for AI
+    access: open
+    num_parameters: 7000000000
+    release_date: 2024-02-01
+    # TODO: Add instruct tag.
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: allenai/olmo-1.7-7b
+    display_name: OLMo 1.7 (7B)
+    description: OLMo is a series of Open Language Models trained on the Dolma dataset. The instruct versions was trained on the Tulu SFT mixture and a cleaned version of the UltraFeedback dataset.
+    creator_organization_name: Allen Institute for AI
+    access: open
+    num_parameters: 7000000000
+    release_date: 2024-04-17
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: allenai/olmo-2-1124-7b-instruct
+    display_name: OLMo 2 7B Instruct November 2024
+    description: OLMo 2 is a family of 7B and 13B models trained on up to 5T tokens. ([blog](https://allenai.org/blog/olmo2))
+    creator_organization_name: Allen Institute for AI
+    access: open
+    num_parameters: 7300000000
+    release_date: 2024-11-26
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: allenai/olmo-2-1124-13b-instruct
+    display_name: OLMo 2 13B Instruct November 2024
+    description: OLMo 2 is a family of 7B and 13B models trained on up to 5T tokens. ([blog](https://allenai.org/blog/olmo2))
+    creator_organization_name: Allen Institute for AI
+    access: open
+    num_parameters: 13700000000
+    release_date: 2024-11-26
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: allenai/olmo-2-0325-32b-instruct
+    display_name: OLMo 2 32B Instruct March 2025
+    description: OLMo 2 32B Instruct March 2025 is trained up to 6T tokens and post-trained using Tulu 3.1. ([blog](https://allenai.org/blog/olmo2-32B))
+    creator_organization_name: Allen Institute for AI
+    access: open
+    num_parameters: 32200000000
+    release_date: 2025-03-13
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: allenai/olmoe-1b-7b-0125-instruct
+    display_name: OLMoE 1B-7B Instruct January 2025
+    description: OLMoE 1B-7B Instruct January 2025 is a fully open language model leveraging sparse Mixture-of-Experts (MoE). It has 7B parameters but uses only 1B per input token. It was pretrained on 5T tokens. ([blog](https://allenai.org/blog/olmoe-an-open-small-and-state-of-the-art-mixture-of-experts-model-c258432d0514), [paper](https://arxiv.org/abs/2409.02060))
+    creator_organization_name: Allen Institute for AI
+    access: open
+    num_parameters: 32200000000
+    release_date: 2025-03-13
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  # Mistral AI
+  - name: mistralai/mistral-7b-v0.1
+    display_name: Mistral v0.1 (7B)
+    description: Mistral 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA) and Sliding-Window Attention (SWA). ([blog post](https://mistral.ai/news/announcing-mistral-7b/))
+    creator_organization_name: Mistral AI
+    access: open
+    num_parameters: 7300000000
+    release_date: 2023-09-27
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: mistralai/mistral-7b-instruct-v0.1
+    display_name: Mistral Instruct v0.1 (7B)
+    description: Mistral v0.1 Instruct 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA) and Sliding-Window Attention (SWA). The instruct version was fined-tuned using publicly available conversation datasets. ([blog post](https://mistral.ai/news/announcing-mistral-7b/))
+    creator_organization_name: Mistral AI
+    access: open
+    num_parameters: 7300000000
+    release_date: 2023-09-27
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: mistralai/mistral-7b-instruct-v0.2
+    display_name: Mistral Instruct v0.2 (7B)
+    description: Mistral v0.2 Instruct 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA). Compared to v0.1, v0.2 has a 32k context window and no Sliding-Window Attention (SWA). ([blog post](https://mistral.ai/news/la-plateforme/))
+    creator_organization_name: Mistral AI
+    access: open
+    num_parameters: 7300000000
+    release_date: 2024-03-23
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: mistralai/mistral-7b-instruct-v0.3
+    display_name: Mistral Instruct v0.3 (7B)
+    description: Mistral v0.3 Instruct 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA). Compared to v0.1, v0.2 has a 32k context window and no Sliding-Window Attention (SWA). ([blog post](https://mistral.ai/news/la-plateforme/))
+    creator_organization_name: Mistral AI
+    access: open
+    num_parameters: 7300000000
+    release_date: 2024-05-22
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  
+  - name: mistralai/mistral-7b-instruct-v0.3-hf
+    display_name: Mistral Instruct v0.3 (7B)
+    description: Mistral v0.3 Instruct 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA). Compared to v0.1, v0.2 has a 32k context window and no Sliding-Window Attention (SWA). ([blog post](https://mistral.ai/news/la-plateforme/))
+    creator_organization_name: Mistral AI
+    access: open
+    num_parameters: 7300000000
+    release_date: 2024-05-22
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: mistralai/mixtral-8x7b-32kseqlen
+    display_name: Mixtral (8x7B 32K seqlen)
+    description: Mixtral is a mixture-of-experts model that has 46.7B total parameters but only uses 12.9B parameters per token. ([blog post](https://mistral.ai/news/mixtral-of-experts/), [tweet](https://twitter.com/MistralAI/status/1733150512395038967)).
+    creator_organization_name: Mistral AI
+    access: open
+    num_parameters: 46700000000
+    release_date: 2023-12-08
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: mistralai/mixtral-8x7b-instruct-v0.1
+    display_name: Mixtral Instruct (8x7B)
+    description: Mixtral Instruct (8x7B) is a version of Mixtral (8x7B) that was optimized through supervised fine-tuning and direct preference optimisation (DPO) for careful instruction following. ([blog post](https://mistral.ai/news/mixtral-of-experts/)).
+    creator_organization_name: Mistral AI
+    access: open
+    num_parameters: 46700000000
+    # Blog post: https://mistral.ai/news/mixtral-of-experts/
+    release_date: 2023-12-11
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: mistralai/mixtral-8x22b
+    display_name: Mixtral (8x22B)
+    description: Mistral AI's mixture-of-experts model that uses 39B active parameters out of 141B ([blog post](https://mistral.ai/news/mixtral-8x22b/)).
+    creator_organization_name: Mistral AI
+    access: open
+    num_parameters: 176000000000
+    release_date: 2024-04-10
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: mistralai/mixtral-8x22b-instruct-v0.1
+    display_name: Mixtral Instruct (8x22B)
+    description: Mistral AI's mixture-of-experts model that uses 39B active parameters out of 141B ([blog post](https://mistral.ai/news/mixtral-8x22b/)).
+    creator_organization_name: Mistral AI
+    access: open
+    num_parameters: 176000000000
+    release_date: 2024-04-10
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: mistralai/bakLlava-v1-hf
+    display_name: BakLLaVA v1 (7B)
+    description: BakLLaVA v1 is a Mistral 7B base augmented with the LLaVA 1.5 architecture. ([blog](https://huggingface.co/llava-hf/bakLlava-v1-hf))
+    creator_organization_name: Mistral AI
+    access: open
+    num_parameters: 7000000000
+    release_date: 2023-10-16
+    tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
+
+  - name: mistralai/ministral-3b-2410
+    display_name: Ministral 3B (2402)
+    description: Ministral 3B (2402) is a model for on-device computing and at-the-edge use cases ([blog](https://mistral.ai/news/ministraux/)).
+    creator_organization_name: Mistral AI
+    access: limited
+    release_date: 2024-10-16
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: mistralai/ministral-8b-2410
+    display_name: Ministral 8B (2402)
+    description: Ministral 8B (2402) is a model for on-device computing and at-the-edge use cases a special interleaved sliding-window attention pattern for faster and memory-efficient inference ([blog](https://mistral.ai/news/ministraux/)).
+    creator_organization_name: Mistral AI
+    access: open
+    release_date: 2024-10-16
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: mistralai/mistral-small-2402
+    display_name: Mistral Small (2402)
+    description: Mistral Small is a multilingual model with a 32K tokens context window and function-calling capabilities. ([blog](https://mistral.ai/news/mistral-large/))
+    creator_organization_name: Mistral AI
+    access: limited
+    release_date: 2023-02-26
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: mistralai/mistral-small-2409
+    display_name: Mistral Small (2409)
+    description: Mistral Small is a multilingual model with a 32K tokens context window and function-calling capabilities. ([blog](https://mistral.ai/news/mistral-large/))
+    creator_organization_name: Mistral AI
+    access: limited
+    release_date: 2024-09-18
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: mistralai/mistral-small-2501
+    display_name: Mistral Small 3 (2501)
+    description: Mistral Small 3 (2501) is a pre-trained and instructed model catered to the '80%' of generative AI tasks—those that require robust language and instruction following performance, with very low latency. ([blog](https://mistral.ai/news/mistral-small-3/))
+    creator_organization_name: Mistral AI
+    access: open
+    num_parameters: 23600000000
+    release_date: 2025-01-30
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: mistralai/mistral-small-2503
+    display_name: Mistral Small 3.1 (2503)
+    description: Mistral Small 3.1 (2503) is a model with improved text performance, multimodal understanding, and an expanded context window of up to 128k tokens. ([blog](https://mistral.ai/news/mistral-small-3-1))
+    creator_organization_name: Mistral AI
+    access: open
+    num_parameters: 23600000000
+    release_date: 2025-03-17
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: mistralai/mistral-medium-2312
+    display_name: Mistral Medium (2312)
+    description: Mistral is a transformer model that uses Grouped-Query Attention (GQA) and Sliding-Window Attention (SWA).
+    creator_organization_name: Mistral AI
+    access: limited
+    release_date: 2023-12-11
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: mistralai/mistral-medium-2505
+    display_name: Mistral Medium 3 (2505)
+    description: Mistral Medium 3 (2505) is a language model that is intended to to deliver state-of-the-art performance at lower cost. ([blog](https://mistral.ai/news/mistral-medium-3))
+    creator_organization_name: Mistral AI
+    access: limited
+    release_date: 2025-05-07
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: mistralai/mistral-large-2402
+    display_name: Mistral Large (2402)
+    description: Mistral Large is a multilingual model with a 32K tokens context window and function-calling capabilities. ([blog](https://mistral.ai/news/mistral-large/))
+    creator_organization_name: Mistral AI
+    access: limited
+    release_date: 2023-02-26
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: mistralai/mistral-large-2407
+    display_name: Mistral Large 2 (2407)
+    description: Mistral Large 2 is a 123 billion parameter model that has a 128k context window and supports dozens of languages and 80+ coding languages. ([blog](https://mistral.ai/news/mistral-large-2407/))
+    creator_organization_name: Mistral AI
+    access: open
+    num_parameters: 123000000000
+    release_date: 2023-07-24
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: mistralai/mistral-large-2411
+    display_name: Mistral Large (2411)
+    description: Mistral Large (2411) is a 123B parameter model that has a 128k context window. ([blog](https://mistral.ai/news/pixtral-large/))
+    creator_organization_name: Mistral AI
+    access: open
+    num_parameters: 123000000000
+    release_date: 2024-11-18
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: mistralai/open-mistral-nemo-2407
+    display_name: Mistral NeMo (2402)
+    description: Mistral NeMo is a multilingual 12B model with a large context window of 128K tokens. ([blog](https://mistral.ai/news/mistral-nemo/))
+    creator_organization_name: Mistral AI
+    access: open
+    release_date: 2024-07-18
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: mistralai/pixtral-12b-2409
+    display_name: Mistral Pixtral (2409)
+    description: Mistral Pixtral 12B is the first multimodal Mistral model for image understanding. ([blog](https://mistral.ai/news/pixtral-12b/))
+    creator_organization_name: Mistral AI
+    access: open
+    release_date: 2024-09-17
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: mistralai/pixtral-large-2411
+    display_name: Mistral Pixtral Large (2411)
+    description: Mistral Pixtral Large is a 124B open-weights multimodal model built on top of Mistral Large 2 (2407). ([blog](https://mistral.ai/news/pixtral-large/))
+    creator_organization_name: Mistral AI
+    access: open
+    num_parameters: 124000000000
+    release_date: 2024-11-18
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  # Moonshot AI
+  - name: moonshotai/kimi-k2-instruct
+    display_name: Kimi K2 Instruct
+    description:  Kimi K2 Instruct is a mixture-of-experts (MoE) language model with 32 billion activated parameters and 1 trillion total parameters trained with the Muon optimizer on 15.5T tokens. ([blog](https://moonshotai.github.io/Kimi-K2/))
+    creator_organization_name: Moonshot AI
+    access: open
+    num_parameters: 1029173256720
+    release_date: 2024-07-14  # Blog post has no date, so use the date from this news article https://www.cnbc.com/2025/07/14/alibaba-backed-moonshot-releases-kimi-k2-ai-rivaling-chatgpt-claude.html
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  # MosaicML
+  - name: mosaicml/mpt-7b
+    display_name: MPT (7B)
+    description: MPT (7B) is a Transformer trained from scratch on 1T tokens of text and code.
+    creator_organization_name: MosaicML
+    access: open
+    num_parameters: 6700000000
+    release_date: 2023-05-05
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: mosaicml/mpt-7b-chat # NOT SUPPORTED
+    display_name: MPT-Chat (7B)
+    description: MPT-Chat (7B) is a chatbot-like model for dialogue generation. It is built by finetuning MPT (30B) , a Transformer trained from scratch on 1T tokens of text and code.
+    creator_organization_name: MosaicML
+    access: open
+    num_parameters: 6700000000
+    release_date: 2023-05-05
+    tags: [UNSUPPORTED_MODEL_TAG]
+
+  - name: mosaicml/mpt-instruct-7b
+    display_name: MPT-Instruct (7B)
+    description: MPT-Instruct (7B) is a model for short-form instruction following. It is built by finetuning MPT (30B), a Transformer trained from scratch on 1T tokens of text and code.
+    creator_organization_name: MosaicML
+    access: open
+    num_parameters: 6700000000
+    release_date: 2023-05-05
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: mosaicml/mpt-30b
+    display_name: MPT (30B)
+    description: MPT (30B) is a Transformer trained from scratch on 1T tokens of text and code.
+    creator_organization_name: MosaicML
+    access: open
+    num_parameters: 30000000000
+    release_date: 2023-06-22
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: mosaicml/mpt-30b-chat # NOT SUPPORTED
+    display_name: MPT-Chat (30B)
+    description: MPT-Chat (30B) is a chatbot-like model for dialogue generation. It is built by finetuning MPT (30B), a Transformer trained from scratch on 1T tokens of text and code.
+    creator_organization_name: MosaicML
+    access: open
+    num_parameters: 30000000000
+    release_date: 2023-06-22
+    tags: [UNSUPPORTED_MODEL_TAG]
+
+  - name: mosaicml/mpt-instruct-30b
+    display_name: MPT-Instruct (30B)
+    description: MPT-Instruct (30B) is a model for short-form instruction following. It is built by finetuning MPT (30B), a Transformer trained from scratch on 1T tokens of text and code.
+    creator_organization_name: MosaicML
+    access: open
+    num_parameters: 30000000000
+    release_date: 2023-06-22
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+
+    
+  # NECTEC
+  - name: nectec/Pathumma-llm-text-1.0.0
+    display_name: Pathumma-llm-text-1.0.0 (7B)
+    description: Pathumma-llm-text-1.0.0 (7B) is a instruction model from  OpenThaiLLM-Prebuilt-7B ([blog](https://medium.com/nectec/pathummallm-v-1-0-0-release-6a098ddfe276))
+    creator_organization_name: nectec
+    access: open
+    num_parameters: 7620000000
+    release_date: 2024-10-28
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+ 
+  - name: nectec/OpenThaiLLM-Prebuilt-7B
+    display_name: OpenThaiLLM-Prebuilt-7B (7B)
+    description: OpenThaiLLM-Prebuilt-7B (7B) is a pretrained Thai large language model with 7 billion parameters based on Qwen2.5-7B.
+    creator_organization_name: nectec
+    access: open
+    num_parameters: 7620000000
+    release_date: 2024-10-28
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+
+
+  # Neurips
+  - name: neurips/local
+    display_name: Neurips Local
+    description: Neurips Local
+    creator_organization_name: Neurips
+    access: open
+    release_date: 2023-06-01
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+
+
+  # NVIDIA
+  - name: nvidia/megatron-gpt2
+    display_name: Megatron GPT2
+    description: GPT-2 implemented in Megatron-LM ([paper](https://arxiv.org/abs/1909.08053)).
+    creator_organization_name: NVIDIA
+    access: open
+    release_date: 2019-09-17 # paper date
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, BUGGY_TEMP_0_TAG]
+
+  - name: nvidia/nemotron-4-340b-instruct
+    display_name: Nemotron-4 Instruct (340B)
+    description: Nemotron-4 Instruct (340B) is an open weights model sized to fit on a single DGX H100 with 8 GPUs when deployed in FP8 precision. 98% of the data used for model alignment was synthetically generated ([paper](https://arxiv.org/abs/2406.11704)).
+    creator_organization_name: NVIDIA
+    access: open
+    release_date: 2024-06-17
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: nvidia/llama-3.1-nemotron-70b-instruct
+    display_name: Llama 3.1 Nemotron Instruct (70B)
+    description: Llama-3.1-Nemotron-70B-Instruct is a large language model customized by NVIDIA to improve the helpfulness of LLM generated responses to user queries. It was trained using RLHF (specifically, REINFORCE), Llama-3.1-Nemotron-70B-Reward and HelpSteer2-Preference prompts on a Llama-3.1-70B-Instruct model. ([paper](https://arxiv.org/abs/2410.01257))
+    creator_organization_name: NVIDIA
+    access: open
+    num_parameters: 70000000000
+    release_date: 2024-10-02
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+
+  # OpenAI
+
+  ## GPT 2 Models
+  # Not served by OpenAI, instead served by HuggingFace.
+
+  - name: openai/gpt2
+    display_name: GPT-2 (1.5B)
+    description: GPT-2 (1.5B parameters) is a transformer model trained on a large corpus of English text in a self-supervised fashion ([paper](https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf)).
+    creator_organization_name: OpenAI
+    access: open
+    num_parameters: 1500000000
+    release_date: 2019-02-14
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+
+  ## GPT 3 Models
+  # The list of models can be found here: https://beta.openai.com/docs/engines/gpt-3
+
+  - name: openai/davinci-002
+    display_name: davinci-002
+    description: Replacement for the GPT-3 curie and davinci base models.
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2023-08-22
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: openai/babbage-002
+    display_name: babbage-002
+    description: Replacement for the GPT-3 ada and babbage base models.
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2023-08-22
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  # DEPRECATED: Announced on July 06 2023 that these models will be shut down on January 04 2024.
+
+  - name: openai/davinci
+    display_name: davinci (175B)
+    description: Original GPT-3 (175B parameters) autoregressive language model ([paper](https://arxiv.org/pdf/2005.14165.pdf), [docs](https://beta.openai.com/docs/model-index-for-researchers)).
+    creator_organization_name: OpenAI
+    access: limited
+    num_parameters: 175000000000
+    release_date: 2020-05-28
+    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: openai/curie
+    display_name: curie (6.7B)
+    description: Original GPT-3 (6.7B parameters) autoregressive language model ([paper](https://arxiv.org/pdf/2005.14165.pdf), [docs](https://beta.openai.com/docs/model-index-for-researchers)).
+    creator_organization_name: OpenAI
+    access: limited
+    num_parameters: 6700000000
+    release_date: 2020-05-28
+    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+    
+  - name: openai/babbage
+    display_name: babbage (1.3B)
+    description: Original GPT-3 (1.3B parameters) autoregressive language model ([paper](https://arxiv.org/pdf/2005.14165.pdf), [docs](https://beta.openai.com/docs/model-index-for-researchers)).
+    creator_organization_name: OpenAI
+    access: limited
+    num_parameters: 1300000000
+    release_date: 2020-05-28
+    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+    
+  - name: openai/ada
+    display_name: ada (350M)
+    description: Original GPT-3 (350M parameters) autoregressive language model ([paper](https://arxiv.org/pdf/2005.14165.pdf), [docs](https://beta.openai.com/docs/model-index-for-researchers)).
+    creator_organization_name: OpenAI
+    access: limited
+    num_parameters: 350000000
+    release_date: 2020-05-28
+    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: openai/text-davinci-003
+    display_name: GPT-3.5 (text-davinci-003)
+    description: text-davinci-003 model that involves reinforcement learning (PPO) with reward models. Derived from text-davinci-002 ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
+    creator_organization_name: OpenAI
+    access: limited
+    num_parameters: 175000000000
+    release_date: 2022-11-28
+    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/text-davinci-002
+    display_name: GPT-3.5 (text-davinci-002)
+    description: text-davinci-002 model that involves supervised fine-tuning on human-written demonstrations. Derived from code-davinci-002 ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
+    creator_organization_name: OpenAI
+    access: limited
+    num_parameters: 175000000000
+    release_date: 2022-01-27
+    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: openai/text-davinci-001
+    display_name: GPT-3.5 (text-davinci-001)
+    description: text-davinci-001 model that involves supervised fine-tuning on human-written demonstrations ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
+    creator_organization_name: OpenAI
+    access: limited
+    num_parameters: 175000000000
+    release_date: 2022-01-27
+    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: openai/text-curie-001
+    display_name: text-curie-001
+    description: text-curie-001 model that involves supervised fine-tuning on human-written demonstrations ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
+    creator_organization_name: OpenAI
+    access: limited
+    num_parameters: 6700000000
+    release_date: 2022-01-27
+    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+    
+  - name: openai/text-babbage-001
+    display_name: text-babbage-001
+    description: text-babbage-001 model that involves supervised fine-tuning on human-written demonstrations ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
+    creator_organization_name: OpenAI
+    access: limited
+    num_parameters: 1300000000
+    release_date: 2022-01-27
+    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+    
+  - name: openai/text-ada-001
+    display_name: text-ada-001
+    description: text-ada-001 model that involves supervised fine-tuning on human-written demonstrations ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
+    creator_organization_name: OpenAI
+    access: limited
+    num_parameters: 350000000
+    release_date: 2022-01-27
+    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+
+  ## GPT 3.5 Turbo Models
+  # ChatGPT: https://openai.com/blog/chatgpt
+  
+  - name: openai/gpt-3.5-turbo-instruct
+    display_name: GPT-3.5 Turbo Instruct
+    description: Similar capabilities as GPT-3 era models. Compatible with legacy Completions endpoint and not Chat Completions.
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2023-09-18
+    tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/gpt-3.5-turbo-0301
+    display_name: GPT-3.5 Turbo (0301)
+    description: Sibling model of text-davinci-003 that is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-03-01.
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2023-03-01
+    tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/gpt-3.5-turbo-0613
+    display_name: GPT-3.5 Turbo (0613)
+    description: Sibling model of text-davinci-003 that is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-06-13.
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2023-06-13
+    tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/gpt-3.5-turbo-1106
+    display_name: GPT-3.5 Turbo (1106)
+    description: Sibling model of text-davinci-003 that is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-11-06.
+    creator_organization_name: OpenAI
+    access: limited
+    # Actual release blog post was published on 2024-01-25:
+    # https://openai.com/blog/new-embedding-models-and-api-updates
+    release_date: 2024-01-25
+    tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/gpt-3.5-turbo-0125
+    display_name: GPT-3.5 Turbo (0125)
+    description: Sibling model of text-davinci-003 that is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2024-01-25.
+    creator_organization_name: OpenAI
+    access: limited
+    # Release blog post was published on 2024-01-25:
+    # https://openai.com/blog/new-embedding-models-and-api-updates
+    # The actual release date is unclear - it was described as "next week".
+    release_date: 2023-06-13
+    tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/gpt-3.5-turbo-16k-0613
+    display_name: gpt-3.5-turbo-16k-0613
+    description: Sibling model of text-davinci-003 that is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-06-13 with a longer context length of 16,384 tokens.
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2023-06-13
+    tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+
+  ## GPT-4 and GPT-4 Turbo
+
+  - name: openai/gpt-4-1106-preview
+    display_name: GPT-4 Turbo (1106 preview)
+    description: GPT-4 Turbo (preview) is a large multimodal model that is optimized for chat but works well for traditional completions tasks. The model is cheaper and faster than the original GPT-4 model. Preview snapshot from 2023-11-06.
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2023-11-06
+    tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/gpt-4-0314
+    display_name: GPT-4 (0314)
+    description: GPT-4 is a large multimodal model (currently only accepting text inputs and emitting text outputs) that is optimized for chat but works well for traditional completions tasks. Snapshot of gpt-4 from 2023-03-14.
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2023-03-14
+    tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/gpt-4-32k-0314
+    display_name: gpt-4-32k-0314
+    description: GPT-4 is a large multimodal model (currently only accepting text inputs and emitting text outputs) that is optimized for chat but works well for traditional completions tasks. Snapshot of gpt-4 with a longer context length of 32,768 tokens from March 14th 2023.
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2023-03-14
+    tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/gpt-4-0613
+    display_name: GPT-4 (0613)
+    description: GPT-4 is a large multimodal model (currently only accepting text inputs and emitting text outputs) that is optimized for chat but works well for traditional completions tasks. Snapshot of gpt-4 from 2023-06-13.
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2023-06-13
+    tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/gpt-4-32k-0613
+    display_name: gpt-4-32k-0613
+    description: GPT-4 is a large multimodal model (currently only accepting text inputs and emitting text outputs) that is optimized for chat but works well for traditional completions tasks. Snapshot of gpt-4 with a longer context length of 32,768 tokens from 2023-06-13.
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2023-06-13
+    tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/gpt-4-0125-preview
+    display_name: GPT-4 Turbo (0125 preview)
+    description: GPT-4 Turbo (preview) is a large multimodal model that is optimized for chat but works well for traditional completions tasks. The model is cheaper and faster than the original GPT-4 model. Preview snapshot from 2023-01-25. This snapshot is intended to reduce cases of “laziness” where the model doesn’t complete a task.
+    creator_organization_name: OpenAI
+    access: limited
+    # Actual release blog post was published on 2024-01-25:
+    # https://openai.com/blog/new-embedding-models-and-api-updates
+    release_date: 2024-01-25
+    tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  ## GPT-4o
+
+  - name: openai/gpt-4-turbo-2024-04-09
+    display_name: GPT-4 Turbo (2024-04-09)
+    description: GPT-4 Turbo (2024-04-09) is a large multimodal model that is optimized for chat but works well for traditional completions tasks. The model is cheaper and faster than the original GPT-4 model. Snapshot from 2024-04-09.
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2024-04-09
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/gpt-4o-2024-05-13
+    display_name: GPT-4o (2024-05-13)
+    description: GPT-4o (2024-05-13) is a large multimodal model that accepts as input any combination of text, audio, and image and generates any combination of text, audio, and image outputs. ([blog](https://openai.com/index/hello-gpt-4o/))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2024-04-09
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/gpt-4o-2024-08-06
+    display_name: GPT-4o (2024-08-06)
+    description: GPT-4o (2024-08-06) is a large multimodal model that accepts as input any combination of text, audio, and image and generates any combination of text, audio, and image outputs. ([blog](https://openai.com/index/introducing-structured-outputs-in-the-api/))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2024-08-06
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/gpt-4o-2024-11-20
+    display_name: GPT-4o (2024-11-20)
+    description: GPT-4o (2024-11-20) is a large multimodal model that accepts as input any combination of text, audio, and image and generates any combination of text, audio, and image outputs. ([blog](https://openai.com/index/introducing-structured-outputs-in-the-api/))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2024-11-20
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/gpt-4o-mini-2024-07-18
+    display_name: GPT-4o mini (2024-07-18)
+    description: GPT-4o mini (2024-07-18) is a multimodal model with a context window of 128K tokens and improved handling of non-English text. ([blog](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2024-07-18
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/gpt-4.1-2025-04-14
+    display_name: GPT-4.1 (2025-04-14)
+    description: GPT-4.1 (2025-04-14) is a multimdodal model in the GPT-4.1 family, which outperforms the GPT-4o family, with major gains in coding and instruction following. They also have larger context windows of 1 million tokens and are able to better use that context with improved long-context comprehension. ([blog](https://openai.com/index/gpt-4-1/))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-04-14
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/gpt-4.1-mini-2025-04-14
+    display_name: GPT-4.1 mini (2025-04-14)
+    description: GPT-4.1 mini (2025-04-14) is a multimdodal model in the GPT-4.1 family, which outperforms the GPT-4o family, with major gains in coding and instruction following. They also have larger context windows of 1 million tokens and are able to better use that context with improved long-context comprehension. ([blog](https://openai.com/index/gpt-4-1/))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-04-14
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/gpt-4.1-nano-2025-04-14
+    display_name: GPT-4.1 nano (2025-04-14)
+    description: GPT-4.1 nano (2025-04-14) is a multimdodal model in the GPT-4.1 family, which outperforms the GPT-4o family, with major gains in coding and instruction following. They also have larger context windows of 1 million tokens and are able to better use that context with improved long-context comprehension. ([blog](https://openai.com/index/gpt-4-1/))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-04-14
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/gpt-5-2025-08-07
+    display_name: GPT-5 (2025-08-07)
+    description: GPT-5 (2025-08-07) is a multimdodal model trained for real-world coding tasks and long-running agentic tasks. ([blog](https://openai.com/index/introducing-gpt-5-for-developers/), [system card](https://cdn.openai.com/pdf/8124a3ce-ab78-4f06-96eb-49ea29ffb52f/gpt5-system-card-aug7.pdf))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-08-07
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/gpt-5-mini-2025-08-07
+    display_name: GPT-5 mini (2025-08-07)
+    description: GPT-5 mini (2025-08-07) is a multimdodal model trained for real-world coding tasks and long-running agentic tasks. ([blog](https://openai.com/index/introducing-gpt-5-for-developers/), [system card](https://cdn.openai.com/pdf/8124a3ce-ab78-4f06-96eb-49ea29ffb52f/gpt5-system-card-aug7.pdf))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-08-07
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/gpt-5-nano-2025-08-07
+    display_name: GPT-5 nano (2025-08-07)
+    description: GPT-5 nano (2025-08-07) is a multimdodal model trained for real-world coding tasks and long-running agentic tasks. ([blog](https://openai.com/index/introducing-gpt-5-for-developers/), [system card](https://cdn.openai.com/pdf/8124a3ce-ab78-4f06-96eb-49ea29ffb52f/gpt5-system-card-aug7.pdf))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-08-07
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/whisper-1_gpt-4o-2024-11-20
+    display_name: Whisper-1 + GPT-4o (2024-11-20)
+    description: Transcribes the text with Whisper-1 and then uses GPT-4o to generate a response.
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2024-11-20
+    tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG]
+
+  - name: openai/gpt-4o-transcribe_gpt-4o-2024-11-20
+    display_name: GPT-4o Transcribe + GPT-4o (2024-11-20)
+    description: Transcribes the text with GPT-4o Transcribe and then uses GPT-4o to generate a response.
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-03-20
+    tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG]
+
+  - name: openai/gpt-4o-mini-transcribe_gpt-4o-2024-11-20
+    display_name: GPT-4o mini Transcribe + GPT-4o (2024-11-20)
+    description: Transcribes the text with GPT-4o mini Transcribe and then uses GPT-4o to generate a response.
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-03-20
+    tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG]
+
+  - name: openai/gpt-4o-audio-preview-2024-10-01
+    display_name: GPT-4o Audio (Preview 2024-10-01)
+    description: GPT-4o Audio (Preview 2024-10-01) is a preview model that allows using use audio inputs to prompt the model ([documentation](https://platform.openai.com/docs/guides/audio)).
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2024-10-01
+    tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/gpt-4o-audio-preview-2024-12-17
+    display_name: GPT-4o Audio (Preview 2024-12-17)
+    description: GPT-4o Audio (Preview 2024-12-17) is a preview model that allows using use audio inputs to prompt the model ([documentation](https://platform.openai.com/docs/guides/audio)).
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2024-12-17
+    tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/gpt-4o-mini-audio-preview-2024-12-17
+    display_name: GPT-4o mini Audio (Preview 2024-12-17)
+    description: GPT-4o mini Audio (Preview 2024-12-17) is a preview model that allows using use audio inputs to prompt the model ([documentation](https://platform.openai.com/docs/guides/audio)).
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2024-12-17
+    tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  # GPT-4V
+
+  - name: openai/gpt-4-vision-preview
+    # According to https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4, this model has pointed gpt-4-1106-vision-preview.
+    display_name: GPT-4V (1106 preview)
+    description: GPT-4V is a large multimodal model that accepts both text and images and is optimized for chat ([model card](https://openai.com/research/gpt-4v-system-card)).
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2023-11-06
+    tags: [VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
+
+  - name: openai/gpt-4-1106-vision-preview
+    display_name: GPT-4V (1106 preview)
+    description: GPT-4V is a large multimodal model that accepts both text and images and is optimized for chat ([model card](https://openai.com/research/gpt-4v-system-card)).
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2023-11-06
+    tags: [VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
+
+  ## GPT-4.5
+  - name: openai/gpt-4.5-preview-2025-02-27
+    display_name: GPT-4.5 (2025-02-27 preview)
+    description: GPT-4.5 (2025-02-27 preview) is a large multimodal model that is designed to be more general-purpose than OpenAI's STEM-focused reasoning models. It was trained using new supervision techniques combined with traditional methods like supervised fine-tuning (SFT) and reinforcement learning from human feedback (RLHF). ([blog](https://openai.com/index/introducing-gpt-4-5/), [system card](https://openai.com/index/gpt-4-5-system-card/))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-02-27
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  ## o1 Models
+  - name: openai/o1-pro-2025-03-19
+    display_name: o1 pro (2025-03-19)
+    description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-03-19
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/o1-pro-2025-03-19-low-reasoning-effort
+    display_name: o1 pro (2025-03-19, low reasoning effort)
+    description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/)) The requests' reasoning effort parameter in is set to low.
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-03-19
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/o1-pro-2025-03-19-high-reasoning-effort
+    display_name: o1 pro (2025-03-19, high reasoning effort)
+    description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/)) The requests' reasoning effort parameter in is set to high.
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-03-19
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/o1-2024-12-17
+    display_name: o1 (2024-12-17)
+    description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2024-12-17
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/o1-2024-12-17-low-reasoning-effort
+    display_name: o1 (2024-12-17, low reasoning effort)
+    description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/)) The requests' reasoning effort parameter in is set to low.
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2024-12-17
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/o1-2024-12-17-high-reasoning-effort
+    display_name: o1 (2024-12-17, high reasoning effort)
+    description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/)) The requests' reasoning effort parameter in is set to high.
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2024-12-17
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/o1-preview-2024-09-12
+    display_name: o1-preview (2024-09-12)
+    description: o1-preview is a language model trained with reinforcement learning to perform complex reasoning that can produce a long internal chain of thought before responding to the user. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2024-09-12
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/o1-mini-2024-09-12
+    display_name: o1-mini (2024-09-12)
+    description: o1-mini is a cost-effective reasoning model for applications that require reasoning without broad world knowledge. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/openai-o1-mini-advancing-cost-efficient-reasoning/))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2024-09-12
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/o3-mini-2025-01-31
+    display_name: o3-mini (2025-01-31)
+    description: o3-mini is a small reasoning model form OpenAI that aims to deliver STEM capabilities while maintaining the low cost and reduced latency of OpenAI o1-mini. ([blog post](https://openai.com/index/openai-o3-mini/))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-01-31
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/o3-mini-2025-01-31-low-reasoning-effort
+    display_name: o3-mini (2025-01-31, low reasoning effort)
+    description: o3-mini is a small reasoning model form OpenAI that aims to deliver STEM capabilities while maintaining the low cost and reduced latency of OpenAI o1-mini. ([blog post](https://openai.com/index/openai-o3-mini/)) The requests' reasoning effort parameter in is set to low.
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-01-31
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/o3-mini-2025-01-31-high-reasoning-effort
+    display_name: o3-mini (2025-01-31, high reasoning effort)
+    description: o3-mini is a small reasoning model form OpenAI that aims to deliver STEM capabilities while maintaining the low cost and reduced latency of OpenAI o1-mini. ([blog post](https://openai.com/index/openai-o3-mini/)) The requests' reasoning effort parameter in is set to high.
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-01-31
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/o3-2025-04-16
+    display_name: o3 (2025-04-16)
+    description: o3 is a reasoning model for math, science, coding, and visual reasoning tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-04-16
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/o3-2025-04-16-low-reasoning-effort
+    display_name: o3 (2025-04-16, low reasoning effort)
+    description: o3 is a reasoning model for math, science, coding, and visual reasoning tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-04-16
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/o3-2025-04-16-high-reasoning-effort
+    display_name: o3 (2025-04-16, high reasoning effort)
+    description: o3 is a reasoning model for math, science, coding, and visual reasoning tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-04-16
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/o4-mini-2025-04-16
+    display_name: o4-mini (2025-04-16)
+    description: o4-mini is an o-series model optimized for fast, effective reasoning with exceptionally efficient performance in coding and visual tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-04-16
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/o4-mini-2025-04-16-low-reasoning-effort
+    display_name: o4-mini (2025-04-16, low reasoning effort)
+    description: o4-mini is an o-series model optimized for fast, effective reasoning with exceptionally efficient performance in coding and visual tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-04-16
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/o4-mini-2025-04-16-high-reasoning-effort
+    display_name: o4-mini (2025-04-16, high reasoning effort)
+    description: o4-mini is an o-series model optimized for fast, effective reasoning with exceptionally efficient performance in coding and visual tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-04-16
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/o3-pro-2025-06-10-high-reasoning-effort
+    display_name: o3-pro (2025-06-10, high reasoning effort)
+    description: o3-pro is an o-series model designed to think longer and provide the most reliable responses. ([blog post](https://help.openai.com/en/articles/9624314-model-release-notes))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-06-10
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  ## GPT-OSS
+  - name: openai/gpt-oss-20b
+    display_name: gpt-oss-20b
+    description: gpt-oss-20b is an open-weight language model that was trained using a mix of reinforcement learning and other techniques informed by OpenAI's internal models. It uses a mixture-of-experts architecture and activates 3.6B parameters per token. ([blog](https://openai.com/index/introducing-gpt-oss/))
+    creator_organization_name: OpenAI
+    access: open
+    release_date: 2025-08-05
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openai/gpt-oss-120b
+    display_name: gpt-oss-120b
+    description: gpt-oss-120b is an open-weight language model that was trained using a mix of reinforcement learning and other techniques informed by OpenAI's internal models. It uses a mixture-of-experts architecture and activates 5.1B parameters per token. ([blog](https://openai.com/index/introducing-gpt-oss/))
+    creator_organization_name: OpenAI
+    access: open
+    release_date: 2025-08-05
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  ## Codex Models
+  # DEPRECATED: Codex models have been shut down on March 23 2023.
+
+  - name: openai/code-davinci-002
+    display_name: code-davinci-002
+    description: Codex-style model that is designed for pure code-completion tasks ([docs](https://beta.openai.com/docs/models/codex)).
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2021-07-01 # TODO: Find correct date (this is for v1)
+    tags: [DEPRECATED_MODEL_TAG, CODE_MODEL_TAG]
+
+  - name: openai/code-davinci-001
+    display_name: code-davinci-001
+    description: code-davinci-001 model
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2021-07-01 # Paper date
+    tags: [DEPRECATED_MODEL_TAG, CODE_MODEL_TAG]
+
+  - name: openai/code-cushman-001
+    display_name: code-cushman-001 (12B)
+    description: Codex-style model that is a stronger, multilingual version of the Codex (12B) model in the [Codex paper](https://arxiv.org/pdf/2107.03374.pdf).
+    creator_organization_name: OpenAI
+    access: limited
+    num_parameters: 12000000000
+    release_date: 2021-07-01 # Paper date
+    tags: [DEPRECATED_MODEL_TAG, CODE_MODEL_TAG]
+
+
+  ## Text Similarity Models
+  # OpenAI similarity embedding models: https://beta.openai.com/docs/guides/embeddings
+  # The number of parameters is guessed based on the number of parameters of the
+  # corresponding GPT-3 model.
+  # DEPRECATED: Announced on July 06 2023 that first generation embeddings models
+  #  will be shut down on January 04 2024.
+
+  - name: openai/text-similarity-davinci-001
+    display_name: text-similarity-davinci-001
+    description: Embedding model that is designed for text similarity tasks ([docs](https://openai.com/blog/introducing-text-and-code-embeddings)).
+    creator_organization_name: OpenAI
+    access: limited
+    num_parameters: 175000000000
+    release_date: 2022-01-25 # Blog post date
+    tags: [DEPRECATED_MODEL_TAG, TEXT_SIMILARITY_MODEL_TAG]
+
+  - name: openai/text-similarity-curie-001
+    display_name: text-similarity-curie-001
+    description: Embedding model that is designed for text similarity tasks ([docs](https://openai.com/blog/introducing-text-and-code-embeddings)).
+    creator_organization_name: OpenAI
+    access: limited
+    num_parameters: 6700000000
+    release_date: 2022-01-25 # Blog post date
+    tags: [DEPRECATED_MODEL_TAG, TEXT_SIMILARITY_MODEL_TAG]
+
+  - name: openai/text-similarity-babbage-001
+    display_name: text-similarity-babbage-001
+    description: Embedding model that is designed for text similarity tasks ([docs](https://openai.com/blog/introducing-text-and-code-embeddings)).
+    creator_organization_name: OpenAI
+    access: limited
+    num_parameters: 1300000000
+    release_date: 2022-01-25 # Blog post date
+    tags: [DEPRECATED_MODEL_TAG, TEXT_SIMILARITY_MODEL_TAG]
+
+  - name: openai/text-similarity-ada-001
+    display_name: text-similarity-ada-001
+    description: Embedding model that is designed for text similarity tasks ([docs](https://openai.com/blog/introducing-text-and-code-embeddings)).
+    creator_organization_name: OpenAI
+    access: limited
+    num_parameters: 350000000
+    release_date: 2022-01-25 # Blog post date
+    tags: [DEPRECATED_MODEL_TAG, TEXT_SIMILARITY_MODEL_TAG]
+
+  - name: openai/text-embedding-ada-002
+    display_name: text-embedding-ada-002
+    description: An improved embedding model that is designed for text similarity tasks ([docs](https://openai.com/blog/new-and-improved-embedding-model)).
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2022-12-15 # Blog post date
+    tags: [TEXT_SIMILARITY_MODEL_TAG]
+
+  # Text-to-image models
+  - name: openai/dall-e-2
+    display_name: DALL-E 2 (3.5B)
+    description: DALL-E 2 is a encoder-decoder-based latent diffusion model trained on large-scale paired text-image datasets. The model is available via the OpenAI API ([paper](https://arxiv.org/abs/2204.06125)).
+    creator_organization_name: OpenAI
+    access: limited
+    num_parameters: 3500000000
+    release_date: 2022-04-13
+    tags: [TEXT_TO_IMAGE_MODEL_TAG]
+
+  - name: openai/dall-e-3
+    display_name: DALL-E 3
+    description: DALL-E 3 is a text-to-image generation model built natively on ChatGPT, used to prompt engineer automatically. The default style, vivid, causes the model to lean towards generating hyper-real and dramatic images. The model is available via the OpenAI API ([paper](https://cdn.openai.com/papers/dall-e-3.pdf)).
+    creator_organization_name: OpenAI
+    access: limited
+    num_parameters: 0
+    release_date: 2023-11-06
+    tags: [TEXT_TO_IMAGE_MODEL_TAG]
+
+  - name: openai/dall-e-3-natural
+    display_name: DALL-E 3 (natural style)
+    description: DALL-E 3 is a text-to-image generation model built natively on ChatGPT, used to prompt engineer automatically. The natural style causes the model to produce more natural, less hyper-real looking images. The model is available via the OpenAI API ([paper](https://cdn.openai.com/papers/dall-e-3.pdf)).
+    creator_organization_name: OpenAI
+    access: limited
+    num_parameters: 0
+    release_date: 2023-11-06
+    tags: [TEXT_TO_IMAGE_MODEL_TAG]
+
+  - name: openai/dall-e-3-hd
+    display_name: DALL-E 3 HD
+    description: DALL-E 3 is a text-to-image generation model built natively on ChatGPT, used to prompt engineer automatically. The HD version creates images with finer details and greater consistency across the image, but generation is slower. The default style, vivid, causes the model to lean towards generating hyper-real and dramatic images. The model is available via the OpenAI API ([paper](https://cdn.openai.com/papers/dall-e-3.pdf)).
+    creator_organization_name: OpenAI
+    access: limited
+    num_parameters: 0
+    release_date: 2023-11-06
+    tags: [TEXT_TO_IMAGE_MODEL_TAG]
+
+  - name: openai/dall-e-3-hd-natural
+    display_name: DALL-E 3 HD (natural style)
+    description: DALL-E 3 is a text-to-image generation model built natively on ChatGPT, used to prompt engineer automatically. The HD version creates images with finer details and greater consistency across the image, but generation is slower. The natural style causes the model to produce more natural, less hyper-real looking images. The model is available via the OpenAI API ([paper](https://cdn.openai.com/papers/dall-e-3.pdf)).
+    creator_organization_name: OpenAI
+    access: limited
+    num_parameters: 0
+    release_date: 2023-11-06
+    tags: [TEXT_TO_IMAGE_MODEL_TAG]
+
+  # OpenThaiGPT
+  - name: openthaigpt/openthaigpt-1.0.0-7b-chat
+    display_name: OpenThaiGPT v1.0.0 (7B)
+    description: OpenThaiGPT v1.0.0 (7B) is a Thai language chat model based on Llama 2 that has been specifically fine-tuned for Thai instructions and enhanced by incorporating over 10,000 of the most commonly used Thai words into the dictionary. ([blog post](https://openthaigpt.aieat.or.th/openthaigpt-1.0.0-less-than-8-apr-2024-greater-than))
+    creator_organization_name: OpenThaiGPT
+    access: open
+    num_parameters: 7000000000
+    release_date: 2024-04-08
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openthaigpt/openthaigpt-1.0.0-13b-chat
+    display_name: OpenThaiGPT v1.0.0 (13B)
+    description: OpenThaiGPT v1.0.0 (13B) is a Thai language chat model based on Llama 2 that has been specifically fine-tuned for Thai instructions and enhanced by incorporating over 10,000 of the most commonly used Thai words into the dictionary. ([blog post](https://openthaigpt.aieat.or.th/openthaigpt-1.0.0-less-than-8-apr-2024-greater-than))
+    creator_organization_name: OpenThaiGPT
+    access: open
+    num_parameters: 13000000000
+    release_date: 2024-04-08
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: openthaigpt/openthaigpt-1.0.0-70b-chat
+    display_name: OpenThaiGPT v1.0.0 (70B)
+    description: OpenThaiGPT v1.0.0 (70B) is a Thai language chat model based on Llama 2 that has been specifically fine-tuned for Thai instructions and enhanced by incorporating over 10,000 of the most commonly used Thai words into the dictionary. ([blog post](https://openthaigpt.aieat.or.th/openthaigpt-1.0.0-less-than-8-apr-2024-greater-than))
+    creator_organization_name: OpenThaiGPT
+    access: open
+    num_parameters: 70000000000
+    release_date: 2024-04-08
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  # Qwen
+
+  - name: qwen/qwen-7b
+    display_name: Qwen
+    description: 7B-parameter version of the large language model series, Qwen (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
+    creator_organization_name: Qwen
+    access: open
+    release_date: 2024-02-05
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: qwen/qwen1.5-7b
+    display_name: Qwen1.5 (7B)
+    description: 7B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
+    creator_organization_name: Qwen
+    access: open
+    release_date: 2024-02-05
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: qwen/qwen1.5-14b
+    display_name: Qwen1.5 (14B)
+    description: 14B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
+    creator_organization_name: Qwen
+    access: open
+    release_date: 2024-02-05
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: qwen/qwen1.5-32b
+    display_name: Qwen1.5 (32B)
+    description: 32B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. The 32B version also includes grouped query attention (GQA). ([blog](https://qwenlm.github.io/blog/qwen1.5-32b/))
+    creator_organization_name: Qwen
+    access: open
+    release_date: 2024-04-02
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: qwen/qwen1.5-72b
+    display_name: Qwen1.5 (72B)
+    description: 72B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
+    creator_organization_name: Qwen
+    access: open
+    release_date: 2024-02-05
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: qwen/qwen1.5-7b-chat
+    display_name: Qwen1.5 Chat (7B)
+    description: 7B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
+    creator_organization_name: Qwen
+    access: open
+    release_date: 2024-02-05
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: qwen/qwen1.5-14b-chat
+    display_name: Qwen1.5 Chat (14B)
+    description: 14B-parameter chat version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
+    creator_organization_name: Qwen
+    access: open
+    release_date: 2024-02-05
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: qwen/qwen1.5-32b-chat
+    display_name: Qwen1.5 Chat (32B)
+    description: 32B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. The 32B version also includes grouped query attention (GQA). ([blog](https://qwenlm.github.io/blog/qwen1.5-32b/))
+    creator_organization_name: Qwen
+    access: open
+    release_date: 2024-04-02
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: qwen/qwen1.5-72b-chat
+    display_name: Qwen1.5 Chat (72B)
+    description: 72B-parameter chat version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
+    creator_organization_name: Qwen
+    access: open
+    release_date: 2024-02-05
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: qwen/qwen1.5-110b-chat
+    display_name: Qwen1.5 Chat (110B)
+    description: 110B-parameter chat version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. The 110B version also includes grouped query attention (GQA). ([blog](https://qwenlm.github.io/blog/qwen1.5-110b/))
+    creator_organization_name: Qwen
+    access: open
+    release_date: 2024-04-25
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: qwen/qwen2-72b-instruct
+    display_name: Qwen2 Instruct (72B)
+    description: 72B-parameter chat version of the large language model series, Qwen2. Qwen2 uses Group Query Attention (GQA) and has extended context length support up to 128K tokens. ([blog](https://qwenlm.github.io/blog/qwen2/))
+    creator_organization_name: Qwen
+    access: open
+    release_date: 2024-06-07
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: qwen/qwen2.5-7b-instruct-turbo
+    display_name: Qwen2.5 Instruct Turbo (7B)
+    description: Qwen2.5 Instruct Turbo (7B) was trained on 18 trillion tokens and supports 29 languages, and shows improvements over Qwen2 in knowledge, coding, mathematics, instruction following, generating long texts, and processing structure data. ([blog](https://qwenlm.github.io/blog/qwen2.5/)) Turbo is Together's cost-efficient implementation, providing fast FP8 performance while maintaining quality, closely matching FP16 reference models. ([blog](https://www.together.ai/blog/together-inference-engine-2))
+    creator_organization_name: Qwen
+    access: open
+    release_date: 2024-09-19
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: qwen/qwen2.5-7b-instruct
+    display_name: Qwen2.5 Instruct (7B)
+    description: Qwen2.5 Instruct (7B) was trained on 18 trillion tokens and supports 29 languages, and shows improvements over Qwen2 in knowledge, coding, mathematics, instruction following, generating long texts, and processing structure data. ([blog](https://qwenlm.github.io/blog/qwen2.5/)) Turbo is Together's cost-efficient implementation, providing fast FP8 performance while maintaining quality, closely matching FP16 reference models. ([blog](https://www.together.ai/blog/together-inference-engine-2))
+    creator_organization_name: Qwen
+    access: open
+    release_date: 2024-09-19
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: qwen/qwen2.5-72b-instruct-turbo
+    display_name: Qwen2.5 Instruct Turbo (72B)
+    description: Qwen2.5 Instruct Turbo (72B) was trained on 18 trillion tokens and supports 29 languages, and shows improvements over Qwen2 in knowledge, coding, mathematics, instruction following, generating long texts, and processing structure data. ([blog](https://qwenlm.github.io/blog/qwen2.5/)) Turbo is Together's cost-efficient implementation, providing fast FP8 performance while maintaining quality, closely matching FP16 reference models. ([blog](https://www.together.ai/blog/together-inference-engine-2))
+    creator_organization_name: Qwen
+    access: open
+    release_date: 2024-09-19
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: qwen/qwen3-235b-a22b-fp8-tput
+    display_name: Qwen3 235B A22B FP8 Throughput
+    description: Qwen3 235B A22B FP8 Throughput is a hybrid instruct and reasoning mixture-of-experts model ([blog](https://qwenlm.github.io/blog/qwen3/)).
+    creator_organization_name: Qwen
+    access: open
+    release_date: 2025-04-29
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: qwen/qwen3-235b-a22b-instruct-2507-fp8
+    display_name: Qwen3 235B A22B Instruct 2507 FP8
+    description: Qwen3 235B A22B Instruct 2507 FP8 is an updated version of the non-thinking mode of Qwen3 235B A22B FP8.
+    creator_organization_name: Qwen
+    access: open
+    release_date: 2025-07-21  # https://x.com/Alibaba_Qwen/status/1947344511988076547
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: qwen/qwq-32b-preview
+    display_name: QwQ (32B Preview)
+    description: QwQ-32B-Preview is an experimental research model developed by the Qwen Team, focused on advancing AI reasoning capabilities. ([blog post](https://qwenlm.github.io/blog/qwq-32b-preview/)).
+    creator_organization_name: Alibaba Cloud
+    access: open
+    num_parameters: 32800000000
+    release_date: 2024-11-28
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: qwen/qwen-vl
+    display_name: Qwen-VL
+    description: Visual multimodal version of the Qwen large language model series ([paper](https://arxiv.org/abs/2308.12966)).
+    creator_organization_name: Alibaba Cloud
+    access: open
+    release_date: 2023-08-24
+    tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
+
+  - name: qwen/qwen-vl-chat
+    display_name: Qwen-VL Chat
+    description: Chat version of Qwen-VL ([paper](https://arxiv.org/abs/2308.12966)).
+    creator_organization_name: Alibaba Cloud
+    access: open
+    release_date: 2023-08-24
+    tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
+
+  - name: qwen/qwen2-vl-7b-instruct
+    display_name: Qwen2-VL Instruct (7B)
+    description: The second generation of Qwen2-VL models ([paper](https://arxiv.org/abs/2409.12191)).
+    creator_organization_name: Alibaba Group
+    access: open
+    release_date: 2024-08-29
+    tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
+
+  - name: qwen/qwen2-vl-72b-instruct
+    display_name: Qwen2-VL Instruct (72B)
+    description: The second generation of Qwen2-VL models ([paper](https://arxiv.org/abs/2409.12191)).
+    creator_organization_name: Alibaba Group
+    access: open
+    release_date: 2024-08-29
+    tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
+
+  - name: qwen/qwen2.5-vl-3b-instruct
+    display_name: Qwen2.5-VL Instruct (3B)
+    description: The second generation of Qwen2.5-VL models ([blog](https://qwenlm.github.io/blog/qwen2.5-vl/)).
+    creator_organization_name: Alibaba Group
+    access: open
+    release_date: 2025-01-26
+    tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
+
+  - name: qwen/qwen2.5-vl-7b-instruct
+    display_name: Qwen2.5-VL Instruct (7B)
+    description: The second generation of Qwen2.5-VL models ([blog](https://qwenlm.github.io/blog/qwen2.5-vl/)).
+    creator_organization_name: Alibaba Group
+    access: open
+    release_date: 2025-01-26
+    tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
+
+  - name: qwen/qwen2.5-vl-32b-instruct
+    display_name: Qwen2.5-VL Instruct (32B)
+    description: The second generation of Qwen2.5-VL models ([blog](https://qwenlm.github.io/blog/qwen2.5-vl/)).
+    creator_organization_name: Alibaba Group
+    access: open
+    release_date: 2025-01-26
+    tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
+
+  - name: qwen/qwen2.5-vl-72b-instruct
+    display_name: Qwen2.5-VL Instruct (72B)
+    description: The second generation of Qwen2.5-VL models ([blog](https://qwenlm.github.io/blog/qwen2.5-vl/)).
+    creator_organization_name: Alibaba Group
+    access: open
+    release_date: 2025-01-26
+    tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
+
+  - name: qwen/qwen-audio-chat
+    display_name: Qwen-Audio Chat
+    description: Auditory multimodal version of the Qwen large language model series ([paper](https://arxiv.org/abs/2311.07919)).
+    creator_organization_name: Alibaba Cloud
+    access: open
+    release_date: 2023-11-14
+    tags: [AUDIO_LANGUAGE_MODEL_TAG]
+
+  - name: qwen/qwen2-audio-7b-instruct
+    display_name: Qwen2-Audio Instruct (7B)
+    description: The second version of auditory multimodal version of the Qwen large language model series ([paper](https://arxiv.org/abs/2407.10759)).
+    creator_organization_name: Alibaba Cloud
+    access: open
+    release_date: 2024-07-15
+    tags: [AUDIO_LANGUAGE_MODEL_TAG]
+
+  - name: qwen/qwen2.5-omni-7b
+    display_name: Qwen2.5-Omni (7B)
+    description: The new flagship end-to-end multimodal model in the Qwen series that can process inputs including text, images, audio, and video ([paper](https://arxiv.org/abs/2503.20215)).
+    creator_organization_name: Alibaba Cloud
+    access: open
+    release_date: 2025-03-27
+    tags: [AUDIO_LANGUAGE_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
+
+  # SAIL (Sea AI Lab)
+  - name: sail/sailor-7b
+    display_name: Sailor (7B)
+    description: Sailor is a suite of Open Language Models tailored for South-East Asia, focusing on languages such as Indonesian, Thai, Vietnamese, Malay, and Lao. These models were continually pre-trained from Qwen1.5. ([paper](https://arxiv.org/abs/2404.03608))
+    creator_organization_name: SAIL
+    access: open
+    num_parameters: 7000000000
+    release_date: 2024-04-04
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: sail/sailor-7b-chat
+    display_name: Sailor Chat (7B)
+    description: Sailor is a suite of Open Language Models tailored for South-East Asia, focusing on languages such as Indonesian, Thai, Vietnamese, Malay, and Lao. These models were continually pre-trained from Qwen1.5. ([paper](https://arxiv.org/abs/2404.03608))
+    creator_organization_name: SAIL
+    access: open
+    num_parameters: 7000000000
+    release_date: 2024-04-04
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: sail/sailor-14b
+    display_name: Sailor (14B)
+    description: Sailor is a suite of Open Language Models tailored for South-East Asia, focusing on languages such as Indonesian, Thai, Vietnamese, Malay, and Lao. These models were continually pre-trained from Qwen1.5. ([paper](https://arxiv.org/abs/2404.03608))
+    creator_organization_name: SAIL
+    access: open
+    num_parameters: 14000000000
+    release_date: 2024-04-04
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: sail/sailor-14b-chat
+    display_name: Sailor Chat (14B)
+    description: Sailor is a suite of Open Language Models tailored for South-East Asia, focusing on languages such as Indonesian, Thai, Vietnamese, Malay, and Lao. These models were continually pre-trained from Qwen1.5. ([paper](https://arxiv.org/abs/2404.03608))
+    creator_organization_name: SAIL
+    access: open
+    num_parameters: 14000000000
+    release_date: 2024-04-04
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  # Salesforce
+  - name: salesforce/codegen # NOT SUPPORTED
+    display_name: CodeGen (16B)
+    description: CodeGen (16B parameters) is an open dense code model trained for multi-turn program synthesis ([blog](https://arxiv.org/pdf/2203.13474.pdf)).
+    creator_organization_name: Tsinghua
+    access: open
+    num_parameters: 16000000000
+    release_date: 2022-03-25
+    tags: [UNSUPPORTED_MODEL_TAG]
+
+  # SambaNova
+  - name: sambanova/sambalingo-thai-base
+    display_name: SambaLingo-Thai-Base
+    description: SambaLingo-Thai-Base is a pretrained bi-lingual Thai and English model that adapts Llama 2 (7B) to Thai by training on 38 billion tokens from the Thai split of the Cultura-X dataset. ([paper](https://arxiv.org/abs/2404.05829))
+    creator_organization_name: SambaLingo
+    access: open
+    num_parameters: 7000000000
+    release_date: 2024-04-08
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: sambanova/sambalingo-thai-chat
+    display_name: SambaLingo-Thai-Chat
+    description: SambaLingo-Thai-Chat is a chat model trained using direct preference optimization on SambaLingo-Thai-Base. SambaLingo-Thai-Base adapts Llama 2 (7B) to Thai by training on 38 billion tokens from the Thai split of the Cultura-X dataset. ([paper](https://arxiv.org/abs/2404.05829))
+    creator_organization_name: SambaLingo
+    access: open
+    num_parameters: 7000000000
+    release_date: 2024-04-08
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: sambanova/sambalingo-thai-base-70b
+    display_name: SambaLingo-Thai-Base-70B
+    description: SambaLingo-Thai-Base-70B is a pretrained bi-lingual Thai and English model that adapts Llama 2 (70B) to Thai by training on 26 billion tokens from the Thai split of the Cultura-X dataset. ([paper](https://arxiv.org/abs/2404.05829))
+    creator_organization_name: SambaLingo
+    access: open
+    num_parameters: 70000000000
+    release_date: 2024-04-08
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: sambanova/sambalingo-thai-chat-70b
+    display_name: SambaLingo-Thai-Chat-70B
+    description: SambaLingo-Thai-Chat-70B is a chat model trained using direct preference optimization on SambaLingo-Thai-Base-70B. SambaLingo-Thai-Base-70B adapts Llama 2 (7B) to Thai by training on 26 billion tokens from the Thai split of the Cultura-X dataset. ([paper](https://arxiv.org/abs/2404.05829))
+    creator_organization_name: SambaLingo
+    access: open
+    num_parameters: 70000000000
+    release_date: 2024-04-08
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  # SCB10X
+  - name: scb10x/typhoon-7b
+    display_name: Typhoon (7B)
+    description: Typhoon (7B) is pretrained Thai large language model with 7 billion parameters based on Mistral 7B. ([paper](https://arxiv.org/abs/2312.13951))
+    creator_organization_name: SCB10X
+    access: open
+    num_parameters: 7000000000
+    release_date: 2023-12-21
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: scb10x/typhoon-v1.5-8b
+    display_name: Typhoon v1.5 (8B)
+    description: Typhoon v1.5 (8B) is a pretrained Thai large language model with 8 billion parameters based on Llama 3 8B. ([blog](https://blog.opentyphoon.ai/typhoon-1-5-release-a9364cb8e8d7))
+    creator_organization_name: SCB10X
+    access: open
+    num_parameters: 8000000000
+    release_date: 2024-05-08
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: scb10x/typhoon-v1.5-8b-instruct
+    display_name: Typhoon v1.5 Instruct (8B)
+    description: Typhoon v1.5 Instruct (8B) is a pretrained Thai large language model with 8 billion parameters based on Llama 3 8B. ([blog](https://blog.opentyphoon.ai/typhoon-1-5-release-a9364cb8e8d7))
+    creator_organization_name: SCB10X
+    access: open
+    num_parameters: 8000000000
+    release_date: 2024-05-08
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: scb10x/typhoon-v1.5-72b
+    display_name: Typhoon v1.5 (72B)
+    description: Typhoon v1.5 (72B) is a pretrained Thai large language model with 72 billion parameters based on Qwen1.5-72B. ([blog](https://blog.opentyphoon.ai/typhoon-1-5-release-a9364cb8e8d7))
+    creator_organization_name: SCB10X
+    access: open
+    num_parameters: 72000000000
+    release_date: 2024-05-08
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: scb10x/typhoon-v1.5-72b-instruct
+    display_name: Typhoon v1.5 Instruct (72B)
+    description: Typhoon v1.5 Instruct (72B) is a pretrained Thai large language model with 72 billion parameters based on Qwen1.5-72B. ([blog](https://blog.opentyphoon.ai/typhoon-1-5-release-a9364cb8e8d7))
+    creator_organization_name: SCB10X
+    access: open
+    num_parameters: 72000000000
+    release_date: 2024-05-08
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: scb10x/llama-3-typhoon-v1.5x-8b-instruct
+    display_name: Typhoon 1.5X instruct (8B)
+    description: Llama-3-Typhoon-1.5X-8B-instruct is a 8 billion parameter instruct model designed for the Thai language based on Llama 3 Instruct. It utilizes the task-arithmetic model editing technique. ([blog](https://blog.opentyphoon.ai/typhoon-1-5x-our-experiment-designed-for-application-use-cases-7b85d9e9845c))
+    creator_organization_name: SCB10X
+    access: open
+    num_parameters: 8000000000
+    release_date: 2024-05-29
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: scb10x/llama-3-typhoon-v1.5x-70b-instruct
+    display_name: Typhoon 1.5X instruct (70B)
+    description: Llama-3-Typhoon-1.5X-70B-instruct is a 70 billion parameter instruct model designed for the Thai language based on Llama 3 Instruct. It utilizes the task-arithmetic model editing technique. ([blog](https://blog.opentyphoon.ai/typhoon-1-5x-our-experiment-designed-for-application-use-cases-7b85d9e9845c))
+    creator_organization_name: SCB10X
+    access: open
+    num_parameters: 70000000000
+    release_date: 2024-05-29
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  # Alibaba DAMO Academy
+  - name: damo/seallm-7b-v2
+    display_name: SeaLLM v2 (7B)
+    description: SeaLLM v2 is a multilingual LLM for Southeast Asian (SEA) languages trained from Mistral (7B). ([website](https://damo-nlp-sg.github.io/SeaLLMs/))
+    creator_organization_name: Alibaba DAMO Academy
+    access: open
+    num_parameters: 7000000000
+    release_date: 2024-02-02
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: damo/seallm-7b-v2.5
+    display_name: SeaLLM v2.5 (7B)
+    description: SeaLLM is a multilingual LLM for Southeast Asian (SEA) languages trained from Gemma (7B). ([website](https://damo-nlp-sg.github.io/SeaLLMs/))
+    creator_organization_name: Alibaba DAMO Academy
+    access: open
+    num_parameters: 7000000000
+    release_date: 2024-04-12
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  # Snowflake
+  - name: snowflake/snowflake-arctic-instruct
+    display_name: Arctic Instruct
+    description: Arctic combines a 10B dense transformer model with a residual 128x3.66B MoE MLP resulting in 480B total and 17B active parameters chosen using a top-2 gating.
+    creator_organization_name: Snowflake
+    access: open
+    num_parameters: 482000000000
+    release_date: 2024-04-24
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+
+  # Stability AI
+  - name: stabilityai/stablelm-base-alpha-3b
+    display_name: StableLM-Base-Alpha (3B)
+    description: StableLM-Base-Alpha is a suite of 3B and 7B parameter decoder-only language models pre-trained on a diverse collection of English datasets with a sequence length of 4096 to push beyond the context window limitations of existing open-source language models.
+    creator_organization_name: Stability AI
+    access: open
+    num_parameters: 3000000000
+    release_date: 2023-04-20
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: stabilityai/stablelm-base-alpha-7b
+    display_name: StableLM-Base-Alpha (7B)
+    description: StableLM-Base-Alpha is a suite of 3B and 7B parameter decoder-only language models pre-trained on a diverse collection of English datasets with a sequence length of 4096 to push beyond the context window limitations of existing open-source language models.
+    creator_organization_name: Stability AI
+    access: open
+    num_parameters: 7000000000
+    release_date: 2023-04-20
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  # Stanford
+  - name: stanford/alpaca-7b
+    display_name: Alpaca (7B)
+    description: Alpaca 7B is a model fine-tuned from the LLaMA 7B model on 52K instruction-following demonstrations
+    creator_organization_name: Stanford
+    access: open
+    num_parameters: 7000000000
+    release_date: 2023-03-13
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+
+
+  # TII UAE
+  - name: tiiuae/falcon-7b
+    display_name: Falcon (7B)
+    description: Falcon-7B is a 7B parameters causal decoder-only model built by TII and trained on 1,500B tokens of RefinedWeb enhanced with curated corpora.
+    creator_organization_name: TII UAE
+    access: open
+    num_parameters: 7000000000
+    release_date: 2023-03-15
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: tiiuae/falcon-7b-instruct
+    display_name: Falcon-Instruct (7B)
+    description: Falcon-7B-Instruct is a 7B parameters causal decoder-only model built by TII based on Falcon-7B and finetuned on a mixture of chat/instruct datasets.
+    creator_organization_name: TII UAE
+    access: open
+    num_parameters: 7000000000
+    release_date: 2023-03-15
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: tiiuae/falcon-40b
+    display_name: Falcon (40B)
+    description: Falcon-40B is a 40B parameters causal decoder-only model built by TII and trained on 1,500B tokens of RefinedWeb enhanced with curated corpora.
+    creator_organization_name: TII UAE
+    access: open
+    num_parameters: 40000000000
+    release_date: 2023-05-25
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: tiiuae/falcon-40b-instruct
+    display_name: Falcon-Instruct (40B)
+    description: Falcon-40B-Instruct is a 40B parameters causal decoder-only model built by TII based on Falcon-7B and finetuned on a mixture of chat/instruct datasets.
+    creator_organization_name: TII UAE
+    access: open
+    num_parameters: 40000000000
+    release_date: 2023-05-25
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+
+
+  # Together
+  - name: together/gpt-jt-6b-v1
+    display_name: GPT-JT (6B)
+    description: GPT-JT (6B parameters) is a fork of GPT-J ([blog post](https://www.together.xyz/blog/releasing-v1-of-gpt-jt-powered-by-open-source-ai)).
+    creator_organization_name: Together
+    access: open
+    num_parameters: 6700000000
+    release_date: 2022-11-29
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: together/gpt-neoxt-chat-base-20b
+    display_name: GPT-NeoXT-Chat-Base (20B)
+    description: GPT-NeoXT-Chat-Base (20B) is fine-tuned from GPT-NeoX, serving as a base model for developing open-source chatbots.
+    creator_organization_name: Together
+    access: open
+    num_parameters: 20000000000
+    release_date: 2023-03-08
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, CHATML_MODEL_TAG]
+
+  - name: together/redpajama-incite-base-3b-v1
+    display_name: RedPajama-INCITE-Base-v1 (3B)
+    description: RedPajama-INCITE-Base-v1 (3B parameters) is a 3 billion base model that aims to replicate the LLaMA recipe as closely as possible.
+    creator_organization_name: Together
+    access: open
+    num_parameters: 3000000000
+    release_date: 2023-05-05
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: together/redpajama-incite-instruct-3b-v1
+    display_name: RedPajama-INCITE-Instruct-v1 (3B)
+    description: RedPajama-INCITE-Instruct-v1 (3B parameters) is a model fine-tuned for few-shot applications on the data of GPT-JT. It is built from RedPajama-INCITE-Base-v1 (3B), a 3 billion base model that aims to replicate the LLaMA recipe as closely as possible.
+    creator_organization_name: Together
+    access: open
+    num_parameters: 3000000000
+    release_date: 2023-05-05
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: together/redpajama-incite-chat-3b-v1 # NOT SUPPORTED
+    display_name: RedPajama-INCITE-Chat-v1 (3B)
+    description: RedPajama-INCITE-Chat-v1 (3B parameters) is a model fine-tuned on OASST1 and Dolly2 to enhance chatting ability. It is built from RedPajama-INCITE-Base-v1 (3B), a 3 billion base model that aims to replicate the LLaMA recipe as closely as possible.
+    creator_organization_name: Together
+    access: open
+    num_parameters: 3000000000
+    release_date: 2023-05-05
+    tags: [UNSUPPORTED_MODEL_TAG]
+
+  - name: together/redpajama-incite-base-7b
+    display_name: RedPajama-INCITE-Base (7B)
+    description: RedPajama-INCITE-Base (7B parameters) is a 7 billion base model that aims to replicate the LLaMA recipe as closely as possible.
+    creator_organization_name: Together
+    access: open
+    num_parameters: 7000000000
+    release_date: 2023-05-05
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: together/redpajama-incite-instruct-7b
+    display_name: RedPajama-INCITE-Instruct (7B)
+    description: RedPajama-INCITE-Instruct (7B parameters) is a model fine-tuned for few-shot applications on the data of GPT-JT. It is built from RedPajama-INCITE-Base (7B), a 7 billion base model that aims to replicate the LLaMA recipe as closely as possible.
+    creator_organization_name: Together
+    access: open
+    num_parameters: 7000000000
+    release_date: 2023-05-05
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+
+
+  # Tsinghua
+
+  - name: thudm/cogview2
+    display_name: CogView2 (6B)
+    description: CogView2 is a hierarchical transformer (6B-9B-9B parameters) for text-to-image generation that supports both English and Chinese input text ([paper](https://arxiv.org/abs/2105.13290))
+    creator_organization_name: Tsinghua
+    access: open
+    num_parameters: 6000000000
+    release_date: 2022-06-15
+    tags: [TEXT_TO_IMAGE_MODEL_TAG]
+
+  - name: tsinghua/glm
+    display_name: GLM (130B)
+    description: GLM (130B parameters) is an open bilingual (English & Chinese) bidirectional dense model that was trained using General Language Model (GLM) procedure ([paper](https://arxiv.org/pdf/2210.02414.pdf)).
+    creator_organization_name: Tsinghua
+    access: open
+    num_parameters: 130000000000
+    release_date: 2022-08-04
+    # Inference with echo=True is not feasible -- in the prompt encoding phase, they use
+    # bidirectional attention and do not perform predictions on them.
+    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, NO_NEWLINES_TAG]
+
+  - name: tsinghua/codegeex # NOT SUPPORTED
+    display_name: CodeGeeX (13B)
+    description: CodeGeeX (13B parameters) is an open dense code model trained on more than 20 programming languages on a corpus of more than 850B tokens ([blog](http://keg.cs.tsinghua.edu.cn/codegeex/)).
+    creator_organization_name: Tsinghua
+    access: open
+    num_parameters: 13000000000
+    release_date: 2022-09-19
+    tags: [UNSUPPORTED_MODEL_TAG]
+
+  # Upstage
+  - name: upstage/solar-pro-preview-instruct
+    display_name: Solar Pro Preview (22B)
+    description: Solar Pro Preview (22B) is open-weights model for single GPU inference that is a preview of the upcoming Solar Pro model ([blog](https://www.upstage.ai/products/solar-pro-preview)).
+    creator_organization_name: Upstage
+    access: open
+    num_parameters: 22000000000
+    release_date: 2024-09-11
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: upstage/solar-pro-241126
+    display_name: Solar Pro
+    display_name: Solar Pro
+    description: Solar Pro is a LLM designed for instruction-following and processing structured formats like HTML and Markdown. It supports English, Korean, and Japanese and has domain expertise in Finance, Healthcare, and Legal. ([blog](https://www.upstage.ai/blog/press/solar-pro-aws)).
+    creator_organization_name: Upstage
+    access: limited
+    num_parameters: 22000000000
+    release_date: 2024-11-26
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  # Writer
+  - name: writer/palmyra-base
+    display_name: Palmyra Base (5B)
+    description: Palmyra Base (5B)
+    creator_organization_name: Writer
+    access: limited
+    num_parameters: 5000000000
+    release_date: 2022-10-13
+    # Does not support echo
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: writer/palmyra-large
+    display_name: Palmyra Large (20B)
+    description: Palmyra Large (20B)
+    creator_organization_name: Writer
+    access: limited
+    num_parameters: 20000000000
+    release_date: 2022-12-23
+    # Does not support echo
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: writer/palmyra-instruct-30
+    display_name: InstructPalmyra (30B)
+    description: InstructPalmyra (30B parameters) is trained using reinforcement learning techniques based on feedback from humans.
+    creator_organization_name: Writer
+    access: limited
+    num_parameters: 30000000000
+    release_date: 2023-02-16
+    # Does not support echo
+    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: writer/palmyra-e
+    display_name: Palmyra E (30B)
+    description: Palmyra E (30B)
+    creator_organization_name: Writer
+    access: limited
+    num_parameters: 30000000000
+    release_date: 2023-03-03
+    # Does not support echo
+    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: writer/silk-road
+    display_name: Silk Road (35B)
+    description: Silk Road (35B)
+    creator_organization_name: Writer
+    access: limited
+    num_parameters: 35000000000
+    release_date: 2023-04-13
+    # Does not support echo
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: writer/palmyra-x
+    display_name: Palmyra X (43B)
+    description: Palmyra-X (43B parameters) is trained to adhere to instructions using human feedback and utilizes a technique called multiquery attention. Furthermore, a new feature called 'self-instruct' has been introduced, which includes the implementation of an early stopping criteria specifically designed for minimal instruction tuning ([paper](https://dev.writer.com/docs/becoming-self-instruct-introducing-early-stopping-criteria-for-minimal-instruct-tuning)).
+    creator_organization_name: Writer
+    access: limited
+    num_parameters: 43000000000
+    release_date: 2023-06-11
+    # Does not support echo
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: writer/palmyra-x-v2
+    display_name: Palmyra X V2 (33B)
+    description: Palmyra-X V2 (33B parameters) is a Transformer-based model, which is trained on extremely large-scale pre-training data. The pre-training data more than 2 trillion tokens types are diverse and cover a wide range of areas, used FlashAttention-2.
+    creator_organization_name: Writer
+    access: limited
+    num_parameters: 33000000000
+    release_date: 2023-12-01
+    # Does not support echo
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: writer/palmyra-x-v3
+    display_name: Palmyra X V3 (72B)
+    description: Palmyra-X V3 (72B parameters) is a Transformer-based model, which is trained on extremely large-scale pre-training data. It is trained via unsupervised learning and DPO and use multiquery attention.
+    creator_organization_name: Writer
+    access: limited
+    num_parameters: 72000000000
+    release_date: 2023-12-01
+    # Does not support echo
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: writer/palmyra-x-32k
+    display_name: Palmyra X-32K (33B)
+    description: Palmyra-X-32K (33B parameters) is a Transformer-based model, which is trained on large-scale pre-training data. The pre-training data types are diverse and cover a wide range of areas. These data types are used in conjunction and the alignment mechanism to extend context window.
+    creator_organization_name: Writer
+    access: limited
+    num_parameters: 33000000000
+    release_date: 2023-12-01
+    # Does not support echo
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: writer/palmyra-vision-003
+    display_name: Palmyra Vision 003
+    description:  Palmyra Vision 003 (internal only)
+    creator_organization_name: Writer
+    access: limited
+    num_parameters: 5000000000
+    release_date: 2024-05-24
+    # Does not support echo
+    tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
+
+  - name: writer/palmyra-x-004
+    display_name: Palmyra-X-004
+    description: Palmyra-X-004 language model with a large context window of up to 128,000 tokens that excels in processing and understanding complex tasks.
+    creator_organization_name: Writer
+    access: limited
+    release_date: 2024-09-12
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: writer/palmyra-x5
+    display_name: Palmyra X5
+    description: Palmyra X5 is a language model for enterprise that uses a Mixture of Experts (MoE) architecture and a hybrid attention mechanism that blends linear and softmax attention. ([blog](https://writer.com/engineering/long-context-palmyra-x5/))
+    creator_organization_name: Writer
+    access: limited
+    release_date: 2024-04-28
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: writer/palmyra-med-32k
+    display_name: Palmyra-Med 32K (70B)
+    description: Palmyra-Med 32K (70B) is a model finetuned from Palmyra-X-003 intended for medical applications.
+    creator_organization_name: Writer
+    access: open
+    num_parameters: 70600000000
+    release_date: 2024-07-31
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: writer/palmyra-med
+    display_name: Palmyra Med
+    description: Palmyra Med is a model intended for medical applications.
+    creator_organization_name: Writer
+    access: open
+    release_date: 2024-07-31
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: writer/palmyra-fin-32k
+    display_name: Palmyra-Fin 32K (70B)
+    description: Palmyra-Fin 32K (70B) is a model finetuned from Palmyra-X-003 intended for financial applications.
+    creator_organization_name: Writer
+    access: open
+    num_parameters: 70600000000
+    release_date: 2024-07-31
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: writer/palmyra-fin
+    display_name: Palmyra Fin
+    description: Palmyra Fin is a financial LLM built using combining a well-curated set of financial training data with custom fine-tuning instruction data([blog](https://writer.com/blog/palmyra-med-fin-models/)).
+    creator_organization_name: Writer
+    access: limited
+    release_date: 2024-07-31
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  # xAI
+
+  - name: xai/grok-3-beta
+    display_name: Grok 3 Beta
+    description: Grok 3 Beta is a model trained on xAI's Colossus supercluster with significant improvements in reasoning, mathematics, coding, world knowledge, and instruction-following tasks. ([blog](https://x.ai/news/grok-3))
+    creator_organization_name: xAI
+    access: limited
+    release_date: 2025-04-03  # https://docs.x.ai/docs/release-notes#april-2025
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: xai/grok-3-mini-beta
+    display_name: Grok 3 mini Beta
+    description: Grok 3 mini Beta is a model trained on xAI's Colossus supercluster with significant improvements in reasoning, mathematics, coding, world knowledge, and instruction-following tasks. ([blog](https://x.ai/news/grok-3))
+    creator_organization_name: xAI
+    access: limited
+    release_date: 2025-04-03  # https://docs.x.ai/docs/release-notes#april-2025
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: xai/grok-4-0709
+    display_name: Grok 4 (0709)
+    description: Grok 4 (0709) is a model that includes native tool use and real-time search integration. ([blog](https://x.ai/news/grok-4))
+    creator_organization_name: xAI
+    access: limited
+    release_date: 2025-07-09
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  # Yandex
+  - name: yandex/yalm
+    display_name: YaLM (100B)
+    description: YaLM (100B parameters) is an autoregressive language model trained on English and Russian text ([GitHub](https://github.com/yandex/YaLM-100B)).
+    creator_organization_name: Yandex
+    access: open
+    num_parameters: 100000000000
+    release_date: 2022-06-23
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG]
+
+  # Reka
+  - name: reka/reka-core
+    display_name: Reka-Core
+    description: Reka-Core
+    creator_organization_name: Reka AI
+    access: limited
+    release_date: 2024-04-18
+    tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: reka/reka-core-20240415
+    display_name: Reka-Core-20240415
+    description: Reka-Core-20240415
+    creator_organization_name: Reka AI
+    access: limited
+    release_date: 2024-04-18
+    tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+  
+  - name: reka/reka-core-20240501
+    display_name: Reka-Core-20240501
+    description: Reka-Core-20240501
+    creator_organization_name: Reka AI
+    access: limited
+    release_date: 2024-05-01
+    tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: reka/reka-flash
+    display_name: Reka-Flash (21B)
+    description: Reka-Flash (21B)
+    creator_organization_name: Reka AI
+    access: limited
+    num_parameters: 21000000000
+    release_date: 2024-04-18
+    tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: reka/reka-flash-20240226
+    display_name: Reka-Flash-20240226 (21B)
+    description: Reka-Flash-20240226 (21B)
+    creator_organization_name: Reka AI
+    access: limited
+    num_parameters: 21000000000
+    release_date: 2024-04-18
+    tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: reka/reka-edge
+    display_name: Reka-Edge (7B)
+    description: Reka-Edge (7B)
+    creator_organization_name: Reka AI
+    access: limited
+    num_parameters: 7000000000
+    release_date: 2024-04-18
+    tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: reka/reka-edge-20240208
+    display_name: Reka-Edge-20240208 (7B)
+    description: Reka-Edge-20240208 (7B)
+    creator_organization_name: Reka AI
+    access: limited
+    num_parameters: 7000000000
+    release_date: 2024-04-18
+    tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+  
+# Diva Llama
+  - name: stanford/diva-llama
+    display_name: Diva Llama 3 (8B)
+    description: Diva Llama 3 is an end-to-end Voice Assistant Model which can handle speech and text as inputs. It was trained using distillation loss. ([paper](https://arxiv.org/abs/2410.02678))
+    creator_organization_name: Stanford
+    access: open
+    num_parameters: 8000000000
+    release_date: 2024-10-03
+    tags: [AUDIO_LANGUAGE_MODEL_TAG]
+
+
+# LLaMA-Omni
+  - name: ictnlp/llama-3.1-8b-omni
+    display_name: LLaMA-Omni (8B)
+    description: The audio-visual multimodal version of the LLaMA 3.1 model ([paper](https://arxiv.org/abs/2409.06666)).
+    creator_organization_name: ICTNLP
+    access: open
+    num_parameters: 8000000000
+    release_date: 2024-09-10
+    tags: [AUDIO_LANGUAGE_MODEL_TAG]
+
+
+# Maritaca AI
+  - name: maritaca-ai/sabia-7b
+    display_name: Sabia 7B
+    description: Sabia 7B
+    creator_organization_name: MARITACA-AI
+    access: open
+    num_parameters: 6740000000
+    release_date: 2023-11-08
+    tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: maritaca-ai/sabiazinho-3
+    display_name: Sabiazinho 3
+    description: Sabiazinho-3 is a decoder-only language model designed for Portuguese text generation and understanding tasks. It supports a long context window of up to 128,000 tokens and is offered via API with scalable rate limits. The model is trained on diverse Portuguese corpora with knowledge up to july 2023.
+    creator_organization_name: Maritaca AI
+    access: limited
+    release_date: 2025-02-06
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: maritaca-ai/sabia-3
+    display_name: Sabía 3
+    description: Sabiá-3 is a decoder-only language model designed for Portuguese text generation and understanding tasks. It supports a long context window of up to 128,000 tokens and is offered via API with scalable rate limits. The model is trained on diverse Portuguese corpora with knowledge up to july 2023.
+    creator_organization_name: Maritaca AI
+    access: limited
+    release_date: 2024-12-11
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: maritaca-ai/sabia-3.1-2025-05-08
+    display_name: Sabía 3.1
+    description: Sabiá-3.1 is a decoder-only language model designed for Portuguese text generation and understanding tasks. It supports a long context window of up to 128,000 tokens and is offered via API with scalable rate limits. The model is trained on diverse Portuguese corpora with knowledge up to August 2024.
+    creator_organization_name: Maritaca AI
+    access: limited
+    release_date: 2025-05-08
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG] 
+
+  # Z.ai
+
+  - name: zai-org/glm-4.5-air-fp8
+    display_name: GLM-4.5-Air-FP8
+    description: GLM-4.5-Air-FP8 is a hybrid reasoning model designed to unify reasoning, coding, and agentic capabilities into a single model. It has 106 billion total parameters and 12 billion active parameters. The thinking mode is enabled by default. ([blog](https://z.ai/blog/glm-4.5))
+    creator_organization_name: Z.ai
+    access: open
+    num_parameters: 110000000000
+    release_date: 2025-07-28
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+
+# Granite - IBM
+# https://www.ibm.com/granite
+# https://github.com/ibm-granite/granite-3.0-language-models
+
+  - name: ibm-granite/granite-3.0-2b-base
+    display_name: Granite 3.0 base (2B)
+    description: Granite-3.0-2B-Base is a decoder-only language model to support a variety of text-to-text generation tasks.
+    creator_organization_name: IBM
+    access: open
+    num_parameters: 2530000000
+    release: 2024-10-21
+    tags: [TEXT_MODEL_TAG]
+
+  - name: ibm-granite/granite-3.0-2b-instruct
+    display_name: Granite 3.0 Instruct (2B)
+    description:  Granite-3.0-2B-Instruct is a 2B parameter model finetuned from Granite-3.0-2B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets. 
+    creator_organization_name: IBM
+    access: open
+    num_parameters: 2630000000
+    release: 2024-10-21
+    tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: ibm-granite/granite-3.0-8b-instruct
+    display_name: Granite 3.0 instruct (8B)
+    description:  Granite-3.0-8B-Instruct is a 8B parameter model finetuned from Granite-3.0-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets.
+    creator_organization_name: IBM
+    access: open
+    num_parameters: 8170000000
+    release: 2024-10-21
+    tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: ibm-granite/granite-3.0-8b-base
+    display_name: Granite 3.0 base (8B)
+    description: Granite-3.0-8B-Base is a decoder-only language model to support a variety of text-to-text generation tasks.
+    creator_organization_name: IBM 
+    access: open
+    num_parameters: 8170000000
+    release: 2024-10-21
+    tags: [TEXT_MODEL_TAG]
+
+  - name: ibm-granite/granite-3.0-3b-a800m-instruct
+    display_name: Granite 3.0 A800M instruct (3B)
+    description: Granite-3.0-3B-A800M-Instruct is a 3B parameter model finetuned from Granite-3.0-3B-A800M-Base-4K using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets.
+    creator_organization_name: IBM
+    access: open
+    num_parameters: 3370000000
+    release: 2024-10-21
+    tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: ibm-granite/granite-3.0-3b-a800m-base
+    display_name: Granite 3.0 A800M base (3B)
+    description: Granite-3.0-3B-A800M-Base is a decoder-only language model to support a variety of text-to-text generation tasks.
+    creator_organization_name: IBM
+    access: open
+    num_parameters: 3370000000
+    release: 2024-10-21
+    tags: [TEXT_MODEL_TAG]
+
+  - name: ibm-granite/granite-3.0-1b-a400m-instruct
+    display_name: Granite 3.0 A400M instruct (1B)
+    description: Granite-3.0-1B-A400M-Instruct is an 1B parameter model finetuned from Granite-3.0-1B-A400M-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets.
+    creator_organization_name: IBM
+    access: open
+    num_parameters: 1330000000
+    release: 2024-10-21
+    tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: ibm-granite/granite-3.0-1b-a400m-base
+    display_name: Granite 3.0 A400M base (1B)
+    description: Granite-3.0-1B-A400M-Base is a decoder-only language model to support a variety of text-to-text generation tasks. It is trained from scratch following a two-stage training strategy.
+    creator_organization_name: IBM
+    access: open
+    num_parameters: 1380000000
+    release: 2024-10-21
+    tags: [TEXT_MODEL_TAG]
+   
+  - name: ibm-granite/granite-3.1-8b-base
+    display_name: Granite 3.1 - 8B - Base
+    description: Granite-3.1-8B-Base extends the context length of Granite-3.0-8B-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
+    creator_organization_name: IBM-GRANITE
+    access: open
+    num_parameters: 8170000000
+    release_date: 2024-12-18
+    tags: [TEXT_MODEL_TAG]
+
+  - name: ibm-granite/granite-3.1-8b-instruct
+    display_name: Granite 3.1 - 8B - Instruct
+    description: Granite-3.1-8B-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
+    creator_organization_name: IBM
+    access: open
+    num_parameters: 8170000000
+    release_date: 2024-12-18
+    tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: ibm-granite/granite-3.1-2b-instruct
+    display_name: Granite 3.1 - 2B - Instruct
+    description: Granite-3.1-2B-Instruct is a 2B parameter long-context instruct model finetuned from Granite-3.1-2B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
+    creator_organization_name: IBM
+    access: open
+    num_parameters: 2530000000
+    release_date: 2024-12-18
+    tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: ibm-granite/granite-3.1-2b-base
+    display_name: Granite 3.1 - 2B - Base
+    description: Granite-3.1-2B-Base extends the context length of Granite-3.0-2B-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
+    creator_organization_name: IBM-GRANITE
+    access: open
+    num_parameters: 2530000000
+    release_date: 2024-12-18
+    tags: [TEXT_MODEL_TAG]
+
+  - name: ibm-granite/granite-3.1-3b-a800m-instruct
+    display_name: Granite 3.1 - 3B - A800M - Instruct
+    description: Granite-3.1-3B-A800M-Instruct is a 3B parameter long-context instruct model finetuned from Granite-3.1-3B-A800M-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
+    creator_organization_name: IBM-GRANITE
+    access: open
+    num_parameters: 3300000000
+    release_date: 2024-12-18
+    tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: ibm-granite/granite-3.1-3b-a800m-base
+    display_name: Granite 3.1 - 3B - A800M - Base
+    description: Granite-3.1-3B-A800M-Base extends the context length of Granite-3.0-3B-A800M-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
+    creator_organization_name: IBM-GRANITE
+    access: open
+    num_parameters: 3300000000
+    release_date: 2024-12-18
+    tags: [TEXT_MODEL_TAG]
+
+  - name: ibm-granite/granite-3.1-1b-a400m-instruct
+    display_name: Granite 3.1 - 1B - A400M - Instruct
+    description: Granite-3.1-1B-A400M-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-1B-A400M-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
+    creator_organization_name: IBM-GRANITE
+    access: open
+    num_parameters: 1330000000
+    release_date: 2024-12-18
+    tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: ibm-granite/granite-3.1-1b-a400m-base
+    display_name: Granite 3.1 - 1B - A400M - Base
+    description: Granite-3.1-1B-A400M-Base extends the context length of Granite-3.0-1B-A400M-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
+    creator_organization_name: IBM-GRANITE
+    access: open
+    num_parameters: 1330000000
+    release_date: 2024-12-18
+    tags: [TEXT_MODEL_TAG]
+
+  - name: ibm/granite-13b-instruct-v2
+    display_name: Granite 13b instruct v2
+    description: Granite Base (13B) Instruct V2.0 is a large decoder-only transformer model.The following features were used in the design of the model Decoder-only model
+    creator_organization_name: IBM
+    access: limited
+    num_parameters: 13000000000
+    release: 2023-11-30
+    tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
+
+  - name: ibm/granite-20b-code-instruct-8k
+    display_name: Granite 20b code instruct (8K)
+    description: Granite-20B-Code-Base-8K is a decoder-only code model designed for code generative tasks (e.g., code generation, code explanation, code fixing, etc.). It is trained from scratch with a two-phase training strategy. In phase 1, our model is trained on 3 trillion tokens sourced from 116 programming languages, ensuring a comprehensive understanding of programming languages and syntax. In phase 2, our model is trained on 500 billion tokens with a carefully designed mixture of high-quality data from code and natural language domains to improve the models’ ability to reason and follow instructions.
+    creator_organization_name: IBM
+    access: limited
+    num_parameters: 20000000000
+    release: 2024-18-4
+    tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
+
+  - name: ibm/granite-34b-code-instruct
+    display_name: Granite 34b code instruct
+    description: Granite Base (34B) Code Instruct is a 34B parameter model fine tuned from Granite-34B-Code-Base on a combination of permissively licensed instruction data to enhance instruction following capabilities including logical reasoning and problem-solving skills.
+    creator_organization_name: IBM
+    access: open
+    num_parameters: 34000000000
+    release: 2024-6-5
+    tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
+
+
+  - name: ibm/granite-3b-code-instruct
+    display_name: Granite 3b code instruct
+    description: Granite-3B-Code-Instruct-128K is a 3B parameter long-context instruct model fine tuned from Granite-3B-Code-Base-128K on a combination of permissively licensed data used in training the original Granite code instruct models, in addition to synthetically generated code instruction datasets tailored for solving long context problems. By exposing the model to both short and long context data, we aim to enhance its long-context capability without sacrificing code generation performance at short input context.
+    creator_organization_name: IBM
+    access: open
+    num_parameters: 3000000000
+    release: 2024-6-18
+    tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
+
+  - name: ibm/granite-8b-code-instruct
+    display_name: Granite 8b code instruct
+    description: Granite-8B-Code-Instruct-128K is a 8B parameter long-context instruct model fine tuned from Granite-8B-Code-Base-128K on a combination of permissively licensed data used in training the original Granite code instruct models, in addition to synthetically generated code instruction datasets tailored for solving long context problems. By exposing the model to both short and long context data, we aim to enhance its long-context capability without sacrificing code generation performance at short input context.
+    creator_organization_name: IBM
+    access: open
+    num_parameters: 8000000000
+    release: 2024-6-18
+    tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
+
+  - name: ibm/granite-3.1-8b-instruct
+    display_name: Granite 3.1 - 8B - Instruct
+    description: Granite-3.1-8B-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
+    creator_organization_name: IBM
+    access: open
+    num_parameters: 8170000000
+    release_date: 2024-12-18
+    tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
+
+  - name: ibm/granite-3.1-2b-instruct
+    display_name: Granite 3.1 - 2B - Instruct
+    description: Granite-3.1-2B-Instruct is a 2B parameter long-context instruct model finetuned from Granite-3.1-2B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
+    creator_organization_name: IBM
+    access: open
+    num_parameters: 2530000000
+    release_date: 2024-12-18
+    tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
+
+  - name: ibm/granite-3.3-8b-instruct
+    display_name: IBM Granite 3.3 8B Instruct
+    description: IBM Granite 3.3 8B Instruct is an 8-billion parameter 128K context length language model fine-tuned for improved reasoning and instruction-following capabilities. ([model card](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct))
+    creator_organization_name: IBM
+    access: open
+    num_parameters: 8170000000
+    release_date: 2025-04-16
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: ibm/granite-3.3-8b-instruct-with-guardian
+    display_name: IBM Granite 3.3 8B Instruct (with guardian)
+    description: IBM Granite 3.3 8B Instruct is an 8-billion parameter 128K context length language model fine-tuned for improved reasoning and instruction-following capabilities. All prompts were first evaluated for risk by [IBM Granite Guardian 3.2 5B](https://www.ibm.com/granite/docs/models/guardian/) and prompts that were deemed risky (with a risk threshold of 0.8) received the response "I'm very sorry, but I can't assist with that.". ([model card](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct))
+    creator_organization_name: IBM
+    access: open
+    num_parameters: 8170000000
+    release_date: 2025-04-16
+    # Unfortunately this setup is not easily reproducible, so we mark it with DEPRECATED_MODEL_TAG
+    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: ura-hcmut/ura-llama-2.1-8b
+    display_name: URA-Llama 2.1 (8B)
+    description: URA-Llama 2.1 (8B) is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: URA
+    access: open
+    num_parameters: 8000000000
+    release_date: 2024-08-04
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: ura-hcmut/ura-llama-2-8b
+    display_name: URA-Llama 2 (8B)
+    description: URA-Llama 2 (8B) is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: URA
+    access: open
+    num_parameters: 8000000000
+    release_date: 2024-08-04
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: ura-hcmut/ura-llama-7b
+    display_name: URA-Llama 7B (7B)
+    description: URA-Llama 7B (7B) is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: URA
+    access: open
+    num_parameters: 7000000000
+    release_date: 2023-10-10
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: ura-hcmut/ura-llama-13b
+    display_name: URA-Llama 13B (13B)
+    description: URA-Llama 13B (13B) is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: URA
+    access: open
+    num_parameters: 13000000000
+    release_date: 2023-10-10
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: ura-hcmut/ura-llama-70b
+    display_name: URA-Llama 70B (70B)
+    description: URA-Llama 70B (70B) is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: URA
+    access: open
+    num_parameters: 70000000000
+    release_date: 2023-10-10
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: ura-hcmut/GemSUra-7B
+    display_name: GemSUra 7B
+    description: GemSUra 7B is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: URA
+    access: open
+    num_parameters: 7000000000
+    release_date: 2024-03-12
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: ura-hcmut/GemSUra-2B
+    display_name: GemSUra 2B
+    description: GemSUra 2B is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: URA
+    access: open
+    num_parameters: 2000000000
+    release_date: 2024-03-12
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: ura-hcmut/MixSUra
+    display_name: MixSUra
+    description: MixSUra is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text. It is a mixture of experts model with 8 active experts.
+    creator_organization_name: URA
+    access: open
+    num_parameters: 46700000000
+    release_date: 2024-03-12
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: vilm/vinallama-7b-chat
+    display_name: VinaLLaMa
+    description: VinaLLaMa is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: ViLM
+    access: open
+    num_parameters: 7000000000
+    release_date: 2024-03-12
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: vilm/vinallama-2.7b-chat
+    display_name: VinaLLaMa 2.7B
+    description: VinaLLaMa 2.7B is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: ViLM
+    access: open
+    num_parameters: 2700000000
+    release_date: 2024-03-12
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: vilm/vietcuna-7b-v3
+    display_name: VietCuna 7B (v3)
+    description: VietCuna 7B is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: ViLM
+    access: open
+    num_parameters: 7000000000
+    release_date: 2023-08-07
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: vilm/vietcuna-3b-v2
+    display_name: VietCuna 3B (v2)
+    description: VietCuna 3B is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: ViLM
+    access: open
+    num_parameters: 3000000000
+    release_date: 2023-07-15
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: vilm/Quyen-v0.1
+    display_name: Quyen (v0.1)
+    description: Quyen is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: ViLM
+    access: open
+    num_parameters: 4000000000
+    release_date: 2024-02-26
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: vilm/Quyen-Plus-v0.1
+    display_name: Quyen Plus (v0.1)
+    description: Quyen Plus is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: ViLM
+    access: open
+    num_parameters: 7000000000
+    release_date: 2024-02-26
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: vilm/Quyen-Pro-v0.1
+    display_name: Quyen Pro (v0.1)
+    description: Quyen Pro is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: ViLM
+    access: open
+    num_parameters: 14000000000
+    release_date: 2024-02-26
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: vilm/Quyen-Pro-Max-v0.1
+    display_name: Quyen Pro Max (v0.1)
+    description: Quyen Pro Max is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: ViLM
+    access: open
+    num_parameters: 72000000000
+    release_date: 2024-02-26
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: vilm/Quyen-Mini-v0.1
+    display_name: Quyen Mini (v0.1)
+    description: Quyen Mini is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: ViLM
+    access: open
+    num_parameters: 1800000000
+    release_date: 2024-02-26
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: vilm/Quyen-SE-v0.1
+    display_name: Quyen SE (v0.1)
+    description: Quyen SE is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: ViLM
+    access: open
+    num_parameters: 500000000
+    release_date: 2024-02-26
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: Viet-Mistral/Vistral-7B-Chat
+    display_name: Vistral 7B Chat
+    description: Vistral 7B Chat is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: Viet-Mistral
+    access: open
+    num_parameters: 7000000000
+    release_date: 2024-02-28
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: vinai/PhoGPT-7B5-Instruct
+    display_name: PhoGPT 7B5 Instruct
+    description: PhoGPT 7B5 Instruct is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: VinAI
+    access: open
+    num_parameters: 7500000000
+    release_date: 2024-02-19
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: vinai/PhoGPT-4B-Chat
+    display_name: PhoGPT 4B Chat
+    description: PhoGPT 4B Chat is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: VinAI
+    access: open
+    num_parameters: 4000000000
+    release_date: 2024-04-02
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: CEIA-UFG/Gemma-3-Gaia-PT-BR-4b-it
+    display_name: Gemma-3 Gaia PT-BR 4b Instruct
+    description: Gemma-3 Gaia PT-BR 4b Instruct is a model trained by CEIA-UFG for understanding and generating Brazilian Portuguese text.
+    creator_organization_name: CEIA-UFG
+    access: open
+    num_parameters: 4000000000
+    release_date: 2025-06-01
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: recogna-nlp/bode-13b-alpaca-pt-br-no-peft
+    display_name: Bode 13B Alpaca PT-BR
+    description: Bode is a language model (LLM) for Portuguese, based on LLaMA 2 and fine-tuned with the Alpaca dataset translated into Portuguese. Suitable for instruction, text generation, translation and tasks in Portuguese.
+    creator_organization_name: Recogna NLP
+    access: open
+    num_parameters: 13000000000
+    release_date: 2024-01-05
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: 22h/cabrita_7b_pt_850000
+    display_name: Cabrita PT-BR 7B
+    description: Cabrita is an OpenLLaMA-based model, continuously trained in Portuguese (mC4-pt subset) for 850000 steps with efficient tokenization adapted to the language.
+    creator_organization_name: 22h
+    access: open
+    num_parameters: 7000000000
+    release_date: 2023-08-23
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: PORTULAN/gervasio-7b-portuguese-ptbr-decoder
+    display_name: Gervásio PT-BR/PT-PT 7B Decoder
+    description: Gervásio PT* is a 7B parameter decoder model, adapted from LLaMA27B, trained for both Brazilian and European Portuguese. Fine-tuned with translated data from benchmarks such as GLUE and SuperGLUE.
+    creator_organization_name: PORTULAN (University of Lisbon NLX)
+    access: open
+    num_parameters: 6740000000
+    release_date: 2024-02-29
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: TucanoBR/Tucano-2b4
+    display_name: Tucano PT-BR 2b4
+    description: Tucano is a series of decoder models based on LLaMA2, natively pre-trained in Portuguese using the GigaVerbo dataset (200B tokens), with the 2B model trained for 1.96M steps over 845h (515B tokens, 4 epochs).
+    creator_organization_name: TucanoBR (University of Bonn)
+    access: open
+    num_parameters: 2444618240
+    release_date: 2024-12-11
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+    
+
+  - name: proxy_tuning/llama-70b-chat_none_none_1.0_logits_20
+    display_name: proxy_tuning/llama-70b-chat_none_none_1.0_logits_20
+    description: llama-70b-chat. 
+    creator_organization_name: Sasha Ronaghi
+    access: open
+    num_parameters: 70000000000
+    release_date: 2025-10-15
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+    
+  - name: proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20
+    display_name: proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20
+    description: mellama-70b-chat.
+    creator_organization_name: Sasha Ronaghi
+    access: open
+    num_parameters: 70000000000
+    release_date: 2025-10-15
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+    
+  - name: proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20
+    display_name: proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20
+    description: mellama-13b-chat. 
+    creator_organization_name: Sasha Ronaghi
+    access: open
+    num_parameters: 13000000000
+    release_date: 2025-10-15
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+
+  - name: proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20
+    display_name: proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20
+    description: Qwen3-30b. 
+    creator_organization_name: Sasha Ronaghi
+    access: open
+    num_parameters: 30000000000
+    release_date: 2025-10-15
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+
+  - name: proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20
+    display_name: proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20
+    description: Proxy tuned Qwen3-30b with mellama-13b-chat expert and llama-13b-base antiexpert. 
+    creator_organization_name: Sasha Ronaghi
+    access: open
+    num_parameters: 30000000000
+    release_date: 2025-10-15
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: proxy_tuning/qwen3-30b_mellama-13b-base_llama-13b-base_1.0_logprobs_20
+    display_name: proxy_tuning/qwen3-30b_mellama-13b-base_llama-13b-base_1.0_logprobs_20
+    description: Proxy tuned Qwen3-30b with mellama-13b-base expert and llama-13b-base antiexpert. 
+    creator_organization_name: Sasha Ronaghi
+    access: open
+    num_parameters: 30000000000
+    release_date: 2025-10-15
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: proxy_tuning/qwen3-30b_mellama-13b-chat_mellama-13b-base_1.0_logprobs_20
+    display_name: proxy_tuning/qwen3-30b_mellama-13b-chat_mellama-13b-base_1.0_logprobs_20
+    description: Proxy tuned Qwen3-30b with mellama-13b-chat expert and mellama-13b-base antiexpert. 
+    creator_organization_name: Sasha Ronaghi
+    access: open
+    num_parameters: 30000000000
+    release_date: 2025-10-15
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+    
+  - name: proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20
+    display_name: proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20
+    description: Proxy tuned Llama2-70b-chat with mellama-13b-chat expert and llama-13b-base antiexpert. 
+    creator_organization_name: Sasha Ronaghi
+    access: open
+    num_parameters: 70000000000
+    release_date: 2025-10-15
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+
+  - name: proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20
+    display_name: proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20
+    description: Unite of Qwen3-30b with mellama-13b-chat expert. 
+    creator_organization_name: Sasha Ronaghi
+    access: open
+    num_parameters: 30000000000
+    release_date: 2025-10-15
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
diff --git a/prod_env/tokenizer_configs.yaml b/prod_env/tokenizer_configs.yaml
new file mode 100644
index 00000000000..e3431118217
--- /dev/null
+++ b/prod_env/tokenizer_configs.yaml
@@ -0,0 +1,1287 @@
+# This file defines all the tokenizers that are supported by the Helm API.
+
+# If you want to add a new tokenizer, you can technically do it here but we recommend
+# you to do it in prod_env/tokenizer_configs.yaml instead.
+
+# Follow the template of this file to add a new tokenizer. You can copy paste this to get started:
+#    # This file contains the tokenizer configs for the private tokenizers
+#    tokenizer_configs: [] # Leave empty to disable private tokenizers
+
+
+tokenizer_configs:
+
+  - name: simple/tokenizer1
+    tokenizer_spec:
+      class_name: "helm.tokenizers.simple_tokenizer.SimpleTokenizer"
+    end_of_text_token: "</s>"
+    prefix_token: "<s>"
+
+  # AI21
+  - name: ai21/j2-tokenizer
+    tokenizer_spec:
+      class_name: "helm.tokenizers.ai21_tokenizer.AI21LocalTokenizer"
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: "<|startoftext|>"
+  - name: ai21/jamba-tokenizer
+    tokenizer_spec:
+      class_name: "helm.tokenizers.ai21_tokenizer.AI21LocalTokenizer"
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: "<|startoftext|>"
+  - name: ai21/jamba-instruct-tokenizer
+    tokenizer_spec:
+      class_name: "helm.tokenizers.ai21_tokenizer.AI21LocalTokenizer"
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: "<|startoftext|>"
+  - name: ai21/jamba-1.5-mini-tokenizer
+    tokenizer_spec:
+      class_name: "helm.tokenizers.ai21_tokenizer.AI21LocalTokenizer"
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: "<|startoftext|>"
+  - name: ai21/jamba-1.5-large-tokenizer
+    tokenizer_spec:
+      class_name: "helm.tokenizers.ai21_tokenizer.AI21LocalTokenizer"
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: "<|startoftext|>"
+
+  # AlephAlpha
+  - name: AlephAlpha/luminous-base
+    tokenizer_spec:
+      class_name: "helm.tokenizers.aleph_alpha_tokenizer.AlephAlphaTokenizer"
+    end_of_text_token: ""
+    prefix_token: ""
+  - name: AlephAlpha/luminous-extended
+    tokenizer_spec:
+      class_name: "helm.tokenizers.aleph_alpha_tokenizer.AlephAlphaTokenizer"
+    end_of_text_token: ""
+    prefix_token: ""
+  - name: AlephAlpha/luminous-supreme
+    tokenizer_spec:
+      class_name: "helm.tokenizers.aleph_alpha_tokenizer.AlephAlphaTokenizer"
+    end_of_text_token: ""
+    prefix_token: ""
+  - name: AlephAlpha/luminous-world
+    tokenizer_spec:
+      class_name: "helm.tokenizers.aleph_alpha_tokenizer.AlephAlphaTokenizer"
+    end_of_text_token: ""
+    prefix_token: ""
+
+  # Alibaba DAMO Academy
+
+  - name: damo/seallm-7b-v2
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: SeaLLMs/SeaLLM-7B-v2
+    end_of_text_token: "</s>"
+    prefix_token: "<s>"
+
+  - name: damo/seallm-7b-v2.5
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: SeaLLMs/SeaLLM-7B-v2.5
+    end_of_text_token: "<eos>"
+    prefix_token: "<bos>"
+
+  # Anthropic
+  - name: anthropic/claude
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: Xenova/claude-tokenizer
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: "<|endoftext|>"
+
+  # Bigcode
+  - name: bigcode/santacoder
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: "<|endoftext|>"
+  - name: bigcode/starcoder
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: "<|endoftext|>"
+
+  # Bigscience
+  - name: bigscience/bloom
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "</s>"
+    prefix_token: "<s>"
+  - name: bigscience/T0pp
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "</s>"
+    prefix_token: ""
+
+  # Cohere
+  - name: cohere/command
+    tokenizer_spec:
+      class_name: "helm.tokenizers.cohere_tokenizer.CohereLocalTokenizer"
+    end_of_text_token: "<EOS_TOKEN>"
+    prefix_token: "<BOS_TOKEN>"
+
+  - name: cohere/command-light
+    tokenizer_spec:
+      class_name: "helm.tokenizers.cohere_tokenizer.CohereLocalTokenizer"
+    end_of_text_token: "<EOS_TOKEN>"
+    prefix_token: "<BOS_TOKEN>"
+
+  - name: cohere/command-r
+    tokenizer_spec:
+      class_name: "helm.tokenizers.cohere_tokenizer.CohereLocalTokenizer"
+    end_of_text_token: "<EOS_TOKEN>"
+    prefix_token: "<BOS_TOKEN>"
+
+  - name: cohere/command-r-plus
+    tokenizer_spec:
+      class_name: "helm.tokenizers.cohere_tokenizer.CohereLocalTokenizer"
+    end_of_text_token: "<EOS_TOKEN>"
+    prefix_token: "<BOS_TOKEN>"
+
+  - name: cohere/c4ai-command-r-v01
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: CohereForAI/c4ai-command-r-v01
+    end_of_text_token: "<EOS_TOKEN>"
+    prefix_token: "<BOS_TOKEN>"
+
+  - name: cohere/c4ai-command-r-plus
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: CohereForAI/c4ai-command-r-plus
+    end_of_text_token: "<EOS_TOKEN>"
+    prefix_token: "<BOS_TOKEN>"
+
+  # Databricks
+  - name: databricks/dbrx-instruct
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: "<|endoftext|>"
+
+  # DeepSeek
+  - name: deepseek-ai/deepseek-llm-67b-chat
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "<｜end▁of▁sentence｜>"
+    prefix_token: "<｜begin▁of▁sentence｜>"
+
+  - name: deepseek-ai/deepseek-v3
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "<｜end▁of▁sentence｜>"
+    prefix_token: "<｜begin▁of▁sentence｜>"
+
+  - name: deepseek-ai/deepseek-r1
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "<｜end▁of▁sentence｜>"
+    prefix_token: "<｜begin▁of▁sentence｜>"
+
+  # EleutherAI
+  - name: EleutherAI/gpt-j-6B
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: "<|endoftext|>"
+
+  - name: EleutherAI/gpt-neox-20b
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: "<|endoftext|>"
+
+  # Facebook
+  - name: facebook/opt-66b
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "</s>"
+    prefix_token: "</s>"
+
+  # Google
+  - name: google/t5-11b
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: google-t5/t5-11b
+    end_of_text_token: "</s>"
+    prefix_token: ""
+  - name: google/flan-t5-xxl
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "</s>"
+    prefix_token: ""
+  - name: google/ul2
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "</s>"
+    prefix_token: ""
+  - name: google/mt5-base
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "</s>"
+    prefix_token: ""
+  - name: google/text-bison@001
+    tokenizer_spec:
+      class_name: "helm.tokenizers.vertexai_tokenizer.VertexAITokenizer"
+    end_of_text_token: "</s>"
+    prefix_token: ""
+  - name: google/text-bison@002
+    tokenizer_spec:
+      class_name: "helm.tokenizers.vertexai_tokenizer.VertexAITokenizer"
+    end_of_text_token: "</s>"
+    prefix_token: ""
+  - name: google/text-unicorn@001
+    tokenizer_spec:
+      class_name: "helm.tokenizers.vertexai_tokenizer.VertexAITokenizer"
+    end_of_text_token: "</s>"
+    prefix_token: ""
+  - name: google/gemma-2b
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "<eos>"
+    prefix_token: "<bos>"
+  - name: google/gemma-2-9b
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "<eos>"
+    prefix_token: "<bos>"
+
+  # Grok
+  - name: xai/grok-3-beta
+    tokenizer_spec:
+      class_name: "helm.tokenizers.grok_tokenizer.GrokAPITokenizer"
+    end_of_text_token: ""
+    prefix_token: ""
+
+  - name: xai/grok-3-mini-beta
+    tokenizer_spec:
+      class_name: "helm.tokenizers.grok_tokenizer.GrokAPITokenizer"
+    end_of_text_token: ""
+    prefix_token: ""
+
+  - name: xai/grok-4-0709
+    tokenizer_spec:
+      class_name: "helm.tokenizers.grok_tokenizer.GrokAPITokenizer"
+    end_of_text_token: ""
+    prefix_token: ""
+
+  # Hf-internal-testing
+
+  # Tokenizer name hf-internal-testing/llama-tokenizer is taken from:
+  # https://huggingface.co/docs/transformers/main/en/model_doc/llama#transformers.LlamaTokenizerFast.example
+  - name: hf-internal-testing/llama-tokenizer
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "</s>"
+    prefix_token: "<s>"
+
+  # HuggingFaceM4
+  - name: HuggingFaceM4/idefics-9b
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "</s>"
+    prefix_token: "<s>"
+  - name: HuggingFaceM4/idefics-9b-instruct
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "</s>"
+    prefix_token: "<s>"
+  - name: HuggingFaceM4/idefics-80b
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "</s>"
+    prefix_token: "<s>"
+  - name: HuggingFaceM4/idefics-80b-instruct
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "</s>"
+    prefix_token: "<s>"
+    
+  - name: anas-awadalla/mpt-7b
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: ""
+
+  # Huggingface
+  - name: huggingface/gpt2
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: openai-community/gpt2
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: "<|endoftext|>"
+
+  - name: huggingface/smollm2-135m
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: HuggingFaceTB/SmolLM2-135M
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: "<|endoftext|>"
+
+  - name: huggingface/smollm2-135m-instruct
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: HuggingFaceTB/SmolLM2-135M-Instruct
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: "<|im_end|>"
+
+  # Lighting AI
+  - name: lightningai/lit-gpt
+    tokenizer_spec:
+      class_name: "helm.tokenizers.lit_gpt_tokenizer.LitGPTTokenizer"
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: "<|endoftext|>"
+
+  # Meta-llama
+
+  # To use the Llama-2 tokenizer:
+    #
+    # 1. Accept the license agreement: https://ai.meta.com/resources/models-and-libraries/llama-downloads/
+    # 2. Request to access the Hugging Face repository: https://huggingface.co/meta-llama/Llama-2-7b
+    # 3. Run `huggingface-cli login`
+    #
+    # If you encounter the following error, complete the above steps and try again:
+    #
+    #     meta-llama/Llama-2-70b-hf is not a local folder and is not a valid model identifier listed on
+    #     'https://huggingface.co/models'
+  - name: meta-llama/Llama-2-7b-hf
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "</s>"
+    prefix_token: "<s>"
+
+  - name: meta/llama-3-8b
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: meta-llama/Meta-Llama-3-8B
+    prefix_token: "<|begin_of_text|>"
+    end_of_text_token: "<|end_of_text|>"
+
+  - name: meta/llama-3-8b-instruct
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: meta-llama/Meta-Llama-3.1-8B-Instruct
+    prefix_token: "<|begin_of_text|>"
+    end_of_text_token: "<|eot_id|>"
+
+  - name: meta/llama-3.1-8b
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: meta-llama/Meta-Llama-3.1-8B-Instruct
+    prefix_token: "<|begin_of_text|>"
+    end_of_text_token: "<|end_of_text|>"
+
+  - name: meta/llama-3.1-8b-instruct
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: meta-llama/Meta-Llama-3.1-8B-Instruct
+    prefix_token: "<|begin_of_text|>"
+    end_of_text_token: "<|eot_id|>"
+
+  - name: meta/llama-3.2-3b-instruct
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: meta-llama/Llama-3.2-3B-Instruct
+    prefix_token: "<|begin_of_text|>"
+    end_of_text_token: "<|eot_id|>"
+  
+  - name: meta/llama-3.2-1b-instruct
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: meta-llama/Llama-3.2-1B-Instruct
+    prefix_token: "<|begin_of_text|>"
+    end_of_text_token: "<|eot_id|>"
+
+  - name: meta/llama-3.1-8b-instruct
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: meta-llama/Llama-3.1-8B-Instruct
+    prefix_token: "<|begin_of_text|>"
+    end_of_text_token: "<|eot_id|>"
+
+  - name: meta/llama-3.2-11b-vision-instruct
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: meta-llama/Llama-3.2-11B-Vision-Instruct
+    prefix_token: "<|begin_of_text|>"
+    end_of_text_token: "<|eot_id|>"
+
+  - name: meta/llama-3.3-70b-instruct
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: meta-llama/Llama-3.3-70B-Instruct
+    prefix_token: "<|begin_of_text|>"
+    end_of_text_token: "<|eot_id|>"
+
+  - name: meta/llama-4-scout-17b-16e-instruct
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: meta-llama/Llama-4-Scout-17B-16E-Instruct
+    prefix_token: "<|begin_of_text|>"
+    end_of_text_token: "<|end_of_text|>"
+
+  # 01-ai
+  - name: 01-ai/Yi-6B
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "</s>"
+    prefix_token: "<s>"
+
+  # AI Singapore
+  - name: aisingapore/sea-lion-7b
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        trust_remote_code: true
+        use_fast: false
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: ""
+
+
+
+  # Allen Institute for AI
+  # The allenai/olmo-7b requires Python 3.9 or newer.
+  # To use the allenai/olmo-7b tokenizer, run `pip install crfm-helm[allenai]` first.
+  - name: allenai/olmo-7b
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        trust_remote_code: true
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: ""
+
+  - name: allenai/OLMo-1.7-7B-hf
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: ""
+
+  - name: allenai/olmo-2-1124-7b-instruct
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: "<|endoftext|>"
+
+  - name: allenai/olmo-2-0325-32b-instruct
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: "<|endoftext|>"
+
+  - name: allenai/olmoe-1b-7b-0125-instruct
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "|||IP_ADDRESS|||"
+    prefix_token: "|||IP_ADDRESS|||"
+
+  # Marin Community
+  - name: marin-community/marin-8b-instruct
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "<|eot_id|>"
+    prefix_token: "<|begin_of_text|>"
+
+  # Microsoft
+  - name: microsoft/phi-2
+    tokenizer_spec:  
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"  
+    end_of_text_token: "<|endoftext|>"  
+    prefix_token: "<|endoftext|>"
+
+  - name: microsoft/phi-3-small-8k-instruct
+    tokenizer_spec:  
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        trust_remote_code: true
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: "<|endoftext|>"
+
+  - name: microsoft/phi-3-medium-4k-instruct
+    tokenizer_spec:  
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: "<s>"
+  
+  - name: microsoft/phi-3.5-mini-instruct
+    tokenizer_spec:  
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: "<s>"
+
+  - name: microsoft/phi-3.5-mini-instruct
+    tokenizer_spec:  
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: "<s>"
+
+  # Mistralai
+  - name: mistralai/Mistral-7B-v0.1
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "</s>"
+    prefix_token: "<s>"
+
+  - name: mistralai/Mistral-7B-Instruct-v0.1
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "</s>"
+    prefix_token: "<s>"
+
+  - name: mistralai/Mistral-7B-Instruct-v0.2
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "</s>"
+    prefix_token: "<s>"
+
+  - name: mistralai/Mistral-7B-Instruct-v0.3
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "</s>"
+    prefix_token: "<s>"
+
+  - name: mistralai/Mistral-Nemo-Base-2407
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "</s>"
+    prefix_token: "<s>"
+
+  - name: mistralai/Mistral-Large-Instruct-2407
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "</s>"
+    prefix_token: "<s>"
+
+  - name: mistralai/Mistral-Large-Instruct-2411
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "</s>"
+    prefix_token: "<s>"
+
+  - name: mistralai/Ministral-8B-Instruct-2410
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "</s>"
+    prefix_token: "<s>"
+
+  - name: mistralai/Mistral-Small-24B-Instruct-2501
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "</s>"
+    prefix_token: "<s>"
+
+  # Moonshot AI
+  - name: moonshotai/kimi-k2-instruct
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: moonshotai/Kimi-K2-Instruct
+        trust_remote_code: true
+        revision: 4f239503ad9d1a042f0a4bacac457931ab972cfc
+    end_of_text_token: "[EOS]"
+    prefix_token: "[BOS]"
+
+  # Nectec
+  - name: nectec/OpenThaiLLM-Prebuilt-7B
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "<|im_end|>"
+    prefix_token: ""
+  
+  - name: nectec/Pathumma-llm-text-1.0.0
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "<|im_end|>"
+    prefix_token: "<|im_start|>"
+  
+  # Neurips
+  - name: neurips/local
+    tokenizer_spec:
+      class_name: "helm.tokenizers.http_model_tokenizer.HTTPModelTokenizer"
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: "<|endoftext|>"
+
+  # NVIDIA
+  - name: nvidia/nemotron-4-340b-instruct
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: Xenova/Nemotron-4-340B-Instruct-Tokenizer
+        revision: b7aa0de92cda9f9e722d58d6ca90f46ae17d4701
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: "<|endoftext|>"
+
+  - name: nvidia/llama-3.1-nemotron-70b-instruct
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: nvidia/Llama-3.1-Nemotron-70B-Instruct-HF
+    end_of_text_token: "<|eot_id|>"
+    prefix_token: "<|begin_of_text|>"
+
+  # OpenAI
+  - name: openai/cl100k_base
+    tokenizer_spec:
+      class_name: "helm.tokenizers.tiktoken_tokenizer.TiktokenTokenizer"
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: "<|endoftext|>"
+
+  - name: openai/o200k_base
+    tokenizer_spec:
+      class_name: "helm.tokenizers.tiktoken_tokenizer.TiktokenTokenizer"
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: "<|endoftext|>"
+
+  - name: openai/o200k_harmony
+    tokenizer_spec:
+      class_name: "helm.tokenizers.tiktoken_tokenizer.TiktokenTokenizer"
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: "<|startoftext|>"
+
+  - name: openai/clip-vit-large-patch14
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: ""
+    prefix_token: ""
+
+  # OpenThaiGPT
+  - name: openthaigpt/openthaigpt-1.0.0-7b-chat
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "</s>"
+    prefix_token: "<s>"
+
+  # Qwen
+  - name: qwen/qwen-7b
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: Qwen/Qwen-7B
+        trust_remote_code: true
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: ""
+
+  - name: qwen/qwen1.5-7b
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: Qwen/Qwen1.5-7B
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: ""
+
+  - name: qwen/qwen2-72b-instruct
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: Qwen/Qwen2-72B-Instruct
+    end_of_text_token: "<|im_end|>"
+    prefix_token: "<|im_start|>"
+
+  - name: qwen/qwen2.5-7b-instruct
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: Qwen/Qwen2.5-7B-Instruct
+    end_of_text_token: "<|im_end|>"
+    prefix_token: "<|im_start|>"
+
+  - name: qwen/qwen3-235b-a22b
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: Qwen/Qwen3-235B-A22B
+    end_of_text_token: "<|im_end|>"
+    prefix_token: "<|im_start|>"
+
+  - name: qwen/qwen3-235b-a22b-instruct-2507-fp8
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "<|im_end|>"
+    prefix_token: ""
+
+  - name: qwen/qwq-32b-preview
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "<|im_end|>"
+    prefix_token: ""
+
+  - name: qwen/qwen-vl
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: Qwen/Qwen-VL
+        trust_remote_code: true
+    # Source: https://github.com/QwenLM/Qwen-VL
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: ""
+
+  - name: qwen/qwen-vl-chat
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: Qwen/Qwen-VL-Chat
+        trust_remote_code: true
+    # Source: https://github.com/QwenLM/Qwen-VL
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: ""
+
+  - name: qwen/qwen-audio-chat
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: Qwen/Qwen-Audio-Chat
+        trust_remote_code: true
+    # Source: https://github.com/QwenLM/Qwen-Audio
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: ""
+
+  - name: qwen/qwen2-audio-instruct
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: Qwen/Qwen2-Audio-7B-Instruct
+        trust_remote_code: false
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: ""
+
+  - name: qwen/qwen2.5-omni-7b
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: Qwen/Qwen2.5-Omni-7B
+        trust_remote_code: false
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: ""
+
+  # SambaLingo
+  - name: sambanova/sambalingo-thai-base
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: sambanovasystems/SambaLingo-Thai-Base
+    end_of_text_token: "</s>"
+    prefix_token: "<s>"
+
+  # Snowflake
+  - name: snowflake/snowflake-arctic-instruct
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: Snowflake/snowflake-arctic-instruct
+        trust_remote_code: true
+    end_of_text_token: "<|im_end|>"
+    prefix_token: "<|im_start|>"
+
+  # Tiiuae
+  - name: tiiuae/falcon-7b
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: ""
+
+  # TsinghuaKEG
+  - name: TsinghuaKEG/ice
+    tokenizer_spec:
+      class_name: "helm.tokenizers.ice_tokenizer.ICETokenizer"
+    end_of_text_token: "</s>"
+    prefix_token: ""
+
+  # Typhoon
+  - name: scb10x/typhoon-7b
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "</s>"
+    prefix_token: "<s>"
+
+  # Upstage
+  - name: upstage/solar-pro-preview-instruct
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        trust_remote_code: true
+    end_of_text_token: "<|im_end|>"
+    prefix_token: "<|startoftext|>"
+
+  # Writer
+  - name: writer/gpt2
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: openai-community/gpt2
+    end_of_text_token: ""
+    prefix_token: ""
+
+  # Yandex
+  - name: Yandex/yalm
+    tokenizer_spec:
+      class_name: "helm.tokenizers.yalm_tokenizer.YaLMTokenizer"
+    end_of_text_token: "</s>"
+    prefix_token: "</s>"
+
+  # Diva Llama
+  - name: stanford/diva-llama
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: WillHeld/DiVA-llama-3-v0-8b
+        trust_remote_code: true
+    prefix_token: "<|begin_of_text|>"
+    end_of_text_token: "<|eot_id|>"
+
+  # LLaMA-Omni
+  - name: ictnlp/llama-3.1-8b-omni
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: ICTNLP/Llama-3.1-8B-Omni
+        trust_remote_code: false
+    end_of_text_token: "<|eot_id|>"
+    prefix_token: "<|begin_of_text|>"
+
+  # IBM - Granite 3.0
+  - name: ibm-granite/granite-3.0-2b-base
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: ibm-granite/granite-3.0-2b-base
+    end_of_text_token: ""
+    prefix_token: ""
+
+  - name: ibm-granite/granite-3.0-2b-instruct
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: ibm-granite/granite-3.0-2b-instruct
+    end_of_text_token: ""
+    prefix_token: ""
+  
+  - name: ibm-granite/granite-3.0-8b-instruct
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: ibm-granite/granite-3.0-8b-instruct
+    end_of_text_token: ""
+    prefix_token: ""
+
+  - name: ibm-granite/granite-3.0-8b-base
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: ibm-granite/granite-3.0-8b-base
+    end_of_text_token: ""
+    prefix_token: ""
+
+  - name: ibm-granite/granite-3.0-3b-a800m-instruct
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: ibm-granite/granite-3.0-3b-a800m-instruct
+    end_of_text_token: ""
+    prefix_token: ""
+
+  - name: ibm-granite/granite-3.0-3b-a800m-base
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: ibm-granite/granite-3.0-3b-a800m-base
+    end_of_text_token: ""
+    prefix_token: ""
+
+  - name: ibm-granite/granite-3.0-1b-a400m-instruct
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: ibm-granite/granite-3.0-1b-a400m-instruct
+    end_of_text_token: ""
+    prefix_token: ""
+
+  - name: ibm-granite/granite-3.0-1b-a400m-base
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: ibm-granite/granite-3.0-1b-a400m-base
+    end_of_text_token: ""
+    prefix_token: ""
+
+# Maritaca AI
+  - name: maritaca-ai/sabia-7b
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: maritaca-ai/sabia-7b
+    end_of_text_token: "</s>"
+    prefix_token: "<s>"
+
+  - name: maritaca-ai/sabia-2-tokenizer-medium
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: maritaca-ai/sabia-2-tokenizer-medium
+    end_of_text_token: "</s>"
+    prefix_token: "<s>"
+
+# Granite-3.1-8b-base
+  - name: ibm-granite/granite-3.1-8b-base
+    tokenizer_spec:
+        class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+        args:
+            pretrained_model_name_or_path: ibm-granite/granite-3.1-8b-base
+    prefix_token: ""
+    end_of_text_token: "<|endoftext|>"
+
+# Granite-3.1-8b-instruct
+  - name: ibm-granite/granite-3.1-8b-instruct
+    tokenizer_spec:
+        class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+        args:
+            pretrained_model_name_or_path: ibm-granite/granite-3.1-8b-instruct
+    prefix_token: ""
+    end_of_text_token: "<|endoftext|>"
+
+# Granite-3.1-2b-instruct
+  - name: ibm-granite/granite-3.1-2b-instruct
+    tokenizer_spec:
+        class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+        args:
+            pretrained_model_name_or_path: ibm-granite/granite-3.1-2b-instruct
+    prefix_token: ""
+    end_of_text_token: ""
+
+# Granite-3.1-2b-base
+  - name: ibm-granite/granite-3.1-2b-base
+    tokenizer_spec:
+        class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+        args:
+            pretrained_model_name_or_path: ibm-granite/granite-3.1-2b-base
+    prefix_token: ""
+    end_of_text_token: ""
+
+# Granite-3.1-3b-a800m-instruct
+  - name: ibm-granite/granite-3.1-3b-a800m-instruct
+    tokenizer_spec:
+        class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+        args:
+            pretrained_model_name_or_path: ibm-granite/granite-3.1-3b-a800m-instruct
+    prefix_token: ""
+    end_of_text_token: ""
+
+# Granite-3.1-3b-a800m-base
+  - name: ibm-granite/granite-3.1-3b-a800m-base
+    tokenizer_spec:
+        class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+        args:
+            pretrained_model_name_or_path: ibm-granite/granite-3.1-3b-a800m-base
+    prefix_token: ""
+    end_of_text_token: ""
+
+# Granite-3.1-1b-a400m-instruct
+  - name: ibm-granite/granite-3.1-1b-a400m-instruct
+    tokenizer_spec:
+        class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+        args:
+            pretrained_model_name_or_path: ibm-granite/granite-3.1-1b-a400m-instruct
+    prefix_token: ""
+    end_of_text_token: ""
+
+# Granite-3.1-1b-a400m-base
+  - name: ibm-granite/granite-3.1-1b-a400m-base
+    tokenizer_spec:
+        class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+        args:
+            pretrained_model_name_or_path: ibm-granite/granite-3.1-1b-a400m-base
+    prefix_token: ""
+    end_of_text_token: ""
+
+  - name: ibm-granite/granite-20b-code-instruct-8k
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+          pretrained_model_name_or_path: ibm-granite/granite-20b-code-instruct-8k
+    prefix_token: ""
+    end_of_text_token: ""
+
+  - name:  ibm-granite/granite-3b-code-instruct-128k
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path:  ibm-granite/granite-3b-code-instruct-128k
+    prefix_token: ""
+    end_of_text_token: ""
+
+  - name: ibm-granite/granite-34b-code-instruct-8k
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+           pretrained_model_name_or_path: ibm-granite/granite-34b-code-instruct-8k
+    prefix_token: ""
+    end_of_text_token: ""
+
+  - name: ibm-granite/granite-8b-code-instruct-128k
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+          pretrained_model_name_or_path: ibm-granite/granite-8b-code-instruct-128k
+    prefix_token: ""
+    end_of_text_token: ""
+
+
+  - name: ibm-granite/granite-guardian-3.1-2b
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+          pretrained_model_name_or_path: ibm-granite/granite-guardian-3.1-2b
+    prefix_token: ""
+    end_of_text_token: ""
+
+  - name: ibm-granite/granite-guardian-3.1-8b
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+          pretrained_model_name_or_path: ibm-granite/granite-guardian-3.1-8b
+    prefix_token: ""
+    end_of_text_token: ""
+
+  # IBM Granite 3.3
+  - name: ibm/granite-3.3-8b-instruct
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: ibm-granite/granite-3.3-8b-instruct
+    end_of_text_token: "<|end_of_text|>"
+    prefix_token: "<|end_of_text|>"
+
+  # Z.ai GLM-4.5-AIR-FP8
+  - name: zai-org/glm-4.5-air-fp8
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "<|endoftext|>"
+    prefix_token: ""
+
+  
+
+  # DeepSeek-R1-Distill-Llama-3.1-8b
+  - name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
+    tokenizer_spec:
+        class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+        args:
+            pretrained_model_name_or_path: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
+    end_of_text_token: "<｜end▁of▁sentence｜>"
+    prefix_token: "<｜begin▁of▁sentence｜>"
+
+# deepseek-ai/deepseek-coder-6.7b-instruct
+  - name: deepseek-ai/deepseek-coder-6.7b-instruct
+    tokenizer_spec:
+        class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+        args:
+            pretrained_model_name_or_path: deepseek-ai/deepseek-coder-6.7b-instruct
+    end_of_text_token: "<｜end▁of▁sentence｜>"
+    prefix_token: "<｜begin▁of▁sentence｜>"
+
+
+# vilm/vinallama-2.7b-chat
+  - name: vilm/vinallama-2.7b-chat
+    tokenizer_spec:
+        class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+        args:
+            pretrained_model_name_or_path: vilm/vinallama-2.7b-chat
+    end_of_text_token: "<im_end>"
+    prefix_token: "<im_start>"
+
+# vilm/vinallama-7b-chat
+  - name: vilm/vinallama-7b-chat
+    tokenizer_spec:
+        class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+        args:
+            pretrained_model_name_or_path: vilm/vinallama-7b-chat
+    end_of_text_token: "<im_end>"
+    prefix_token: "<im_start>"
+
+# vilm/vietcuna-7b-v3
+  - name: vilm/vietcuna-7b-v3
+    tokenizer_spec:
+        class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+        args:
+            pretrained_model_name_or_path: vilm/vietcuna-7b-v3
+    end_of_text_token: "</s>"
+    prefix_token: "<s>"
+
+# Viet-Mistral/Vistral-7B-Chat
+  - name: Viet-Mistral/Vistral-7B-Chat
+    tokenizer_spec:
+        class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+        args:
+            pretrained_model_name_or_path: Viet-Mistral/Vistral-7B-Chat
+    end_of_text_token: "</s>"
+    prefix_token: "<s>"
+
+# vinai/PhoGPT-7B5-Instruct
+  - name: vinai/PhoGPT-7B5-Instruct
+    tokenizer_spec:
+        class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+        args:
+            pretrained_model_name_or_path: vinai/PhoGPT-7B5-Instruct
+    end_of_text_token: "</s>"
+    prefix_token: "<s>"
+
+# vinai/PhoGPT-4B-Chat
+  - name: vinai/PhoGPT-4B-Chat
+    tokenizer_spec:
+        class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+        args:
+            pretrained_model_name_or_path: vinai/PhoGPT-4B-Chat
+    end_of_text_token: "</s>"
+    prefix_token: "<s>"
+
+# Gemma-3-Gaia-PT-BR-4b-it
+  - name: CEIA-UFG/Gemma-3-Gaia-PT-BR-4b-it
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: CEIA-UFG/Gemma-3-Gaia-PT-BR-4b-it
+    end_of_text_token: "<eos>"
+    prefix_token: "<bos>"
+
+# Bode 13B Alpaca PT-BR
+  - name: recogna-nlp/bode-13b-alpaca-pt-br-no-peft
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: recogna-nlp/bode-13b-alpaca-pt-br-no-peft
+    end_of_text_token: "</s>"
+    prefix_token: "<s>"
+
+# Cabrita 7B PT-BR tokenizer
+  - name: 22h/cabrita_7b_pt_850000
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: 22h/cabrita_7b_pt_850000
+    end_of_text_token: "</s>"
+    prefix_token: "<s>"
+
+# Gervásio 7B PT‑BR/PT‑PT tokenizer
+  - name: PORTULAN/gervasio-7b-portuguese-ptbr-decoder
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: PORTULAN/gervasio-7b-portuguese-ptbr-decoder
+    end_of_text_token: "</s>"
+    prefix_token: "<s>"
+
+# Tucano 2b4 PT-BR tokenizer
+  - name: TucanoBR/Tucano-2b4
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: TucanoBR/Tucano-2b4
+    end_of_text_token: "</s>"
+    prefix_token: "<s>"
+
+# TeenyTinyLlama 460M PT-BR tokenizer
+  - name: nicholasKluge/TeenyTinyLlama-460m
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: nicholasKluge/TeenyTinyLlama-460m
+    end_of_text_token: "</s>"
+    prefix_token: "<s>"
+
+  - name: proxy_tuning/llama7b
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: /share/pi/ema2016/models/meta-llama/Llama-2-7b-hf
+    end_of_text_token: "<s>"
+    prefix_token: "</s>"
+
+  - name: proxy_tuning/qwen3-30b
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: /share/pi/ema2016/models/Qwen3-30B-A3B-Instruct-2507
+    end_of_text_token: "<s>"
+    prefix_token: "</s>"
+
+  - name: proxy_tuning/qwen3-80b
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: /share/pi/ema2016/models/Qwen3-Next-80B-A3B-Instruct
+    end_of_text_token: "<s>"
+    prefix_token: "</s>"
+
+  - name: proxy_tuning/gemma-3-27b-it
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: /share/pi/ema2016/models/google/gemma-3-27b-it
+    end_of_text_token: "<eos>"
+    prefix_token: "<bos>"
+
+  - name: proxy_tuning/medgemma-27b-it
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: /share/pi/ema2016/models/google/medgemma-27b-it
+    end_of_text_token: "<eos>"
+    prefix_token: "<bos>"
+
+  - name: proxy_tuning/medgemma-4b-it
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: /share/pi/ema2016/models/google/medgemma-4b-it
+    end_of_text_token: "<eos>"
+    prefix_token: "<bos>"
+
+  - name: proxy_tuning/medgemma-4b-pt
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: /share/pi/ema2016/models/google/medgemma-4b-pt
+    end_of_text_token: "<eos>"
+    prefix_token: "<bos>"
+
+  - name: proxy_tuning/gemma-3-4b-pt
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: /share/pi/ema2016/models/google/gemma-3-4b-pt
+    end_of_text_token: "<eos>"
+    prefix_token: "<bos>"
+
+
+
+  - name: proxy_tuning/llama-7b-chat
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: /share/pi/ema2016/models/meta-llama/Llama-2-7b-chat-hf
+    end_of_text_token: "</s>"
+    prefix_token: "<s>"
+
+  - name: proxy_tuning/qwen3-30b
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: /share/pi/ema2016/models/Qwen3-30B-A3B-Instruct-2507
+    end_of_text_token: "<|im_end|>"
+    prefix_token: "<|im_start|>"
\ No newline at end of file

From 622027e8340ca8b76a7822fe72175cd2907da215 Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Tue, 21 Oct 2025 20:57:19 -0700
Subject: [PATCH 11/42] Add files via upload

---
 src/helm/clients/proxy_tuning_client.py | 982 +++++++-----------------
 1 file changed, 280 insertions(+), 702 deletions(-)

diff --git a/src/helm/clients/proxy_tuning_client.py b/src/helm/clients/proxy_tuning_client.py
index b920903cc10..1bb3eaf0694 100644
--- a/src/helm/clients/proxy_tuning_client.py
+++ b/src/helm/clients/proxy_tuning_client.py
@@ -14,31 +14,19 @@
 )
 import tqdm
 from transformers import BitsAndBytesConfig
+import math
 
 # from lmformatenforcer.integrations.transformers import build_transformers_prefix_allowed_tokens_fn, build_token_enforcer_tokenizer_data
-# import math
 # from pydantic import BaseModel
 
 from typing import Literal
 
 from datetime import datetime
 
-# MODEL_PATHS = {
-#     "llama-70b-chat":  "[MODEL PATH]",
-#     "llama-13b-base": "[MODEL PATH]",
-#     "llama-7b-chat": "[MODEL PATH]",
-#     "mellama-13b-chat": "[MODEL PATH]",
-#     "mellama-13b-base": "[MODEL PATH]",
-#     "mellama-70b-chat": "[MODEL PATH]",    
-#     "qwen3-30b": "[MODEL PATH]", 
-# }
-
-# LOCAL_RESULTS_DIR = "[results dir]"
-
 MODEL_PATHS = {
     "llama-70b-chat": "/share/pi/ema2016/models/meta-llama/Llama-2-70b-chat-hf",
-    "llama-13b-base": "/share/pi/ema2016/models/meta-llama/Llama-2-13b-hf",
     "llama-7b-chat": "/share/pi/ema2016/models/meta-llama/Llama-2-7b-chat-hf",
+    "llama-13b-base": "/share/pi/ema2016/models/meta-llama/Llama-2-13b-hf",
     "mellama-13b-chat": "/share/pi/ema2016/models/me-llama/MeLLaMA-13B-chat",
     "mellama-13b-base": "/share/pi/ema2016/models/me-llama/MeLLaMA-13B", 
     "mellama-70b-chat": "/share/pi/ema2016/models/me-llama/MeLLaMA-70B-chat",    
@@ -117,28 +105,6 @@ def get_union_vocab(v1, v2):
 
         return unique_tokens
     
-def average_and_sample(v1, v2, lamda, tokenizer):
-    next_token, v_avg, next_token_id1, next_token_id2 = [], [], [], []
-    for element_v1, element_v2 in zip(v1, v2):
-        assert len(element_v1) == len(element_v2)
-        v_new = {}
-        for token1 in element_v1:
-            v_new[token1] = [lamda * element_v1[token1][0] + (1 - lamda) * element_v2[token1][0],
-                             element_v1[token1][1]]
-        v_avg.append(v_new)
-        probs = []
-        for item in v_new.values():
-            probs.append(item[0])
-        sample_index = probs.index(max(probs))
-        i = 0
-        for item1 in v_new.keys():
-            if i == sample_index:
-                next_token.append(tokenizer.convert_ids_to_tokens(element_v1[item1][1]))
-                next_token_id1.append(element_v1[item1][1])
-                next_token_id2.append(element_v2[item1][1])
-            i+=1
-    return next_token, v_avg, next_token_id1, next_token_id2
-    
 
 def get_top_k_tokens(logits, tokenizer, k=10):
     probs = logits
@@ -166,9 +132,33 @@ def get_top_k_tokens(logits, tokenizer, k=10):
 
     return v1
 
-#proxy tuning approach
-def logits_add(v1, v2, v3, tokenizer, alpha, device=None):
-    next_token, next_token_id1, next_token_id2, next_token_id3 = [], [], [], []
+# unite logit probability arithmetic
+    
+def unite_add(v1, v2, lamda, tokenizer):
+    next_token_id1, next_token_id2 = [], []
+    for element_v1, element_v2 in zip(v1, v2):
+        assert len(element_v1) == len(element_v2)
+        v_new = {}
+        for token1 in element_v1:
+            v_new[token1] = [lamda * element_v1[token1][0] + (1 - lamda) * element_v2[token1][0],
+                             element_v1[token1][1]]
+        probs = []
+        for item in v_new.values():
+            probs.append(item[0])
+        sample_index = probs.index(max(probs))
+        i = 0
+        for item1 in v_new.keys():
+            if i == sample_index:
+                next_token_id1.append(element_v1[item1][1])
+                next_token_id2.append(element_v2[item1][1])
+            i+=1
+    return next_token_id1, next_token_id2
+    
+
+
+# capt logit probability arithmetic
+def capt_add(v1, v2, v3, tokenizer, alpha, device=None):
+    next_token_id1, next_token_id2, next_token_id3 = [], [], []
     comb_ids_per_batch, comb_scores_per_batch = [], []
 
     for element_v1, element_v2, element_v3 in zip(v1, v2, v3):
@@ -183,14 +173,10 @@ def logits_add(v1, v2, v3, tokenizer, alpha, device=None):
             ]
 
         probs = [item[0] for item in v_new.values()]
-
-
         sample_index = probs.index(max(probs))
-
         i = 0
         for item1 in v_new.keys():
             if i == sample_index:
-                next_token.append(tokenizer.convert_ids_to_tokens(element_v1[item1][1]))
                 next_token_id1.append(element_v1[item1][1])
                 next_token_id2.append(element_v2[item1][1])
                 next_token_id3.append(element_v3[item1][1])
@@ -199,91 +185,67 @@ def logits_add(v1, v2, v3, tokenizer, alpha, device=None):
         scores = torch.tensor([v_new[t][0] for t in v_new], dtype=torch.float32, device=device)
         comb_ids_per_batch.append(ids)
         comb_scores_per_batch.append(scores)
-    return next_token, next_token_id1, next_token_id2, next_token_id3, comb_ids_per_batch, comb_scores_per_batch
+    return next_token_id1, next_token_id2, next_token_id3, comb_ids_per_batch, comb_scores_per_batch
 
 
+def add_pad_token(tok, padding_side="left"):
+    # Ensure pad token exists and set padding side
+    if tok.pad_token_id is None:
+        # Prefer to reuse eos as pad when no pad is defined
+        tok.pad_token = tok.eos_token
+    tok.padding_side = padding_side
+    return tok
 
-class DExpertsLlama:
+class AnyModel:
     def __init__(
         self,
-        base_name: str,
-        expert_name: str,
-        antiexpert_name: str,
-        tokenizer_base, tokenizer_expert, tokenizer_anti,
-        system_prompt: str = None,
+        base_model,
+        expert_model,
+        antiexpert_model,
+        base_tokenizer, 
+        expert_tokenizer, 
+        anti_tokenizer,
         alpha: float = 1.0,
         unite: bool = False, 
+        proxy: bool = False,
         model_kwargs: Dict[str, Any] = None
     ):
         
-        self.antiexpert = None  # ensure it exists
-        self.tok_anti = None
-        
-        self.base = AutoModelForCausalLM.from_pretrained(
-            base_name, **model_kwargs
-        )
-        self.expert = AutoModelForCausalLM.from_pretrained(
-            expert_name, **model_kwargs
-        )
-        self.base.eval()
-        self.expert.eval()
-       
-        self.tok_base  = tokenizer_base
-        self.tok_exp   = tokenizer_expert
-        
-        if not unite:
-            self.antiexpert = AutoModelForCausalLM.from_pretrained(
-                antiexpert_name, **model_kwargs
-            )
+        self.base = base_model
+        self.expert = expert_model
+        self.antiexpert = antiexpert_model
+        self.tok_base = base_tokenizer
+        self.tok_exp = expert_tokenizer
+        self.tok_anti = anti_tokenizer
+    
+        if self.base is not None:
+            self.base.eval()
+        if self.expert is not None:
+            self.expert.eval()
+        if self.antiexpert is not None:
             self.antiexpert.eval()
-            self.tok_anti  = tokenizer_anti
-
+        
         self.alpha = alpha
-        self.device = self.base.device
-        self.system_prompt = system_prompt
-               
-
-    def forward(
-        self,
-        base_inputs,
-        expert_inputs,
-        antiexpert_inputs=None,
-        return_dict=None
-    ):
-        base_outputs = self.base(**base_inputs, return_dict=return_dict)
-        expert_outputs = self.expert(**expert_inputs, return_dict=return_dict)
-        if antiexpert_inputs is not None:
-            antiexpert_outputs = self.antiexpert(**antiexpert_inputs, return_dict=return_dict)
-            return base_outputs, expert_outputs, antiexpert_outputs
+        self.device = getattr(self.base, "device", None)
 
-        return base_outputs, expert_outputs
     
+    def _encode_for_gen(self, tok, prompt: str, device=None):
+        text = prompt
+        if getattr(tok, "chat_template", None):
+            messages = [{"role": "user", "content": prompt}]
+            text = tok.apply_chat_template(
+                messages,
+                tokenize=False,
+                add_generation_prompt=True,
+            )
+        enc = tok(text, return_tensors="pt", add_special_tokens=True)
+        input_ids = enc["input_ids"]
+        attention_mask = enc.get("attention_mask", (input_ids != tok.pad_token_id).long())
+        if device is not None:
+            input_ids = input_ids.to(device)
+            attention_mask = attention_mask.to(device)
+        return input_ids, attention_mask, text
 
-    def _get_chat_template_tokenized_chat_inputs(self, tokenizer, prompts):
-        """
-        Use tokenizer.apply_chat_template for models like Qwen-Instruct/Yi/Mistral-Instruct.
-        Returns: input_ids (tensor on self.device)
-        """
-        def _msgs(p):
-            if self.system_prompt:
-                return [{"role": "system", "content": self.system_prompt},
-                        {"role": "user", "content": p}]
-            return [{"role": "user", "content": p}]
-
-        rendered = [
-            tokenizer.apply_chat_template(_msgs(p), tokenize=False, add_generation_prompt=True)
-            for p in prompts
-        ]
-        chat_inputs = tokenizer(rendered, padding="longest", return_tensors="pt", add_special_tokens=True)
-        return chat_inputs.input_ids.to(self.device)
-
-    def _encode_plain_inputs(self, tokenizer, prompts):
-        """
-        Plain (non-chat) encoding with the given tokenizer.
-        Returns: input_ids (tensor on self.device)
-        """
-        enc = tokenizer(prompts, padding="longest", return_tensors="pt", add_special_tokens=True)
-        return enc.input_ids.to(self.device)
     
     def _update_model_kwargs_for_generation(
         self,
@@ -312,62 +274,63 @@ def _update_model_kwargs_for_generation(
 
         return kwargs
     
+    @torch.inference_mode() 
     def generate(
         self,
-        input_ids: Optional[torch.Tensor] = None,
-        max_new_tokens: Optional[int] = 100,
-        do_sample: bool = False,
+        prompt,
+        max_new_tokens: Optional[int] = 700,
         alpha: float = 1.0,
         return_logits_for_analysis: bool = False,
         score_type=None,
         k=20,
         unite: bool = False,
+        proxy: bool = False,
         prefix_allowed_tokens_fn=None,
         prefix_allowed_tokens_fn_exp=None,
         **kwargs
     ):
+        # print("prompt: ", prompt)
+        base_input_ids, base_attn, text = self._encode_for_gen(self.tok_base, prompt, device=self.base.device)
+        print("prompt with (potential) instruction tag: ", text)
         base_kwargs = kwargs.copy()
+        base_kwargs["attention_mask"] = base_attn
+        base_kwargs["use_cache"] = True
+        original_prompt_len = base_input_ids.shape[1]
 
-        # Decode to strings once using base tokenizer
-        prompts = self.tok_base.batch_decode(input_ids, skip_special_tokens=True)
-
-        if hasattr(self.tok_base, "apply_chat_template") and getattr(self.tok_base, "chat_template", None):
-            base_input_ids = self._get_chat_template_tokenized_chat_inputs(self.tok_base, prompts)
-        else:
-            base_input_ids = self._encode_plain_inputs(self.tok_base, prompts)
-
-        base_kwargs["attention_mask"] = torch.ones_like(base_input_ids, dtype=torch.long, device=base_input_ids.device)
-
-     
-        expert_kwargs = kwargs.copy()
-        expert_input_ids     = input_ids
-        
-        if hasattr(self.tok_exp, "apply_chat_template") and getattr(self.tok_exp, "chat_template", None):
-            expert_input_ids = self._get_chat_template_tokenized_chat_inputs(self.tok_exp, prompts)
-        else:
-            expert_input_ids = self._encode_plain_inputs(self.tok_exp, prompts)
         
-        expert_kwargs['attention_mask']     = torch.ones_like(expert_input_ids,     dtype=torch.long, device=expert_input_ids.device)
+#         if not proxy and not unite:
+#             gen = self.base.generate(
+#                 input_ids=base_input_ids,
+#                 attention_mask=base_attn,
+#                 max_new_tokens=max_new_tokens,
+#                 do_sample=False,
+#                 eos_token_id=self.tok_base.eos_token_id,
+#                 pad_token_id=self.tok_base.pad_token_id,
+#             )
+#             gen_ids = gen[0, original_prompt_len:]
+#             generation = self.tok_base.decode(gen_ids, skip_special_tokens=True)
+#             return generation
         
+      
+        if proxy or unite:
+            expert_input_ids, expert_attn, expert_text = self._encode_for_gen(self.tok_exp, prompt, device=self.expert.device)
+            expert_kwargs = kwargs.copy()
+            expert_kwargs["attention_mask"] = expert_attn
+            expert_kwargs["use_cache"] = True
+            if proxy:
+                antiexpert_input_ids, anti_attn, anto = self._encode_for_gen(self.tok_anti, prompt, device=self.antiexpert.device)
+                antiexpert_kwargs = kwargs.copy()
+                antiexpert_kwargs["attention_mask"] = anti_attn
+                antiexpert_kwargs["use_cache"] = True
 
-        if not unite:  
-            antiexpert_kwargs = kwargs.copy()
-            antiexpert_input_ids = input_ids
-
-            if hasattr(self.tok_anti, "apply_chat_template") and getattr(self.tok_anti, "chat_template", None):
-                antiexpert_input_ids = self._get_chat_template_tokenized_chat_inputs(self.tok_anti, prompts)
-            else:
-                antiexpert_input_ids = self._encode_plain_inputs(self.tok_anti, prompts)
-            antiexpert_kwargs['attention_mask'] = torch.ones_like(antiexpert_input_ids, dtype=torch.long, device=antiexpert_input_ids.device)
-        
-       
         # keep track of which sequences are already finished
-        unfinished_sequences = torch.ones(input_ids.shape[0], dtype=torch.long, device=input_ids.device)
-        eos_token_id_tensor = torch.tensor([self.tok_base.eos_token_id]).to(input_ids.device)
+        unfinished_sequences = torch.ones(1, dtype=torch.long, device=base_input_ids.device)
+        eos_token_id_tensor = torch.tensor([self.tok_base.eos_token_id], device=base_input_ids.device)
+        
         
-        T = max_new_tokens
-        if (not unite) and return_logits_for_analysis:
-            device = input_ids.device
+        if return_logits_for_analysis:
+            T = max_new_tokens
+            device = base_input_ids.device
             # 1 x T buffers on GPU
             p_dexperts = torch.empty(T, device=device, dtype=torch.bfloat16)
             p_base     = torch.empty(T, device=device, dtype=torch.bfloat16)
@@ -381,135 +344,106 @@ def generate(
 
             token_ids_out  = torch.empty(T, device=device, dtype=torch.int32)
             t_write = 0
-
-        for step in range(max_new_tokens):
-            
+        
+ 
+        for step in range(max_new_tokens):      
             base_inputs = self.base.prepare_inputs_for_generation(base_input_ids, **base_kwargs)
-            expert_inputs = self.expert.prepare_inputs_for_generation(expert_input_ids, **expert_kwargs)
+            base_outputs = self.base(**base_inputs, return_dict=True)
+            base_next_token_logits = base_outputs.logits[..., -1, :]
             
+            if prefix_allowed_tokens_fn:
+                mask = torch.full_like(base_next_token_logits, -math.inf) 
+                sent = base_input_ids[0]
+                prefix_allowed_tokens = prefix_allowed_tokens_fn(0, sent)
+                if len(prefix_allowed_tokens) == 0:
+                    raise ValueError("prefix_allowed_tokens_fn returned an empty list.")
+                mask[0, prefix_allowed_tokens] = 0
+                base_next_token_logits = base_next_token_logits + mask
+                
+            next_token_id1 = next_token_id2 = next_token_id3 = None
             
-            if unite:
-                base_outputs, expert_outputs = self.forward(
-                    base_inputs, expert_inputs, return_dict=True
-                )
-
-                base_next_token_logits = base_outputs.logits[..., -1, :]
+            if not unite and not proxy:
+                next_tokens = torch.argmax(base_next_token_logits, dim=-1)  # indices of top tokens
+                next_token_id1 = next_tokens.tolist()              
+            
+            if proxy or unite:
+                expert_inputs = self.expert.prepare_inputs_for_generation(expert_input_ids, **expert_kwargs) 
+                expert_outputs = self.expert(**expert_inputs, return_dict=True)
                 expert_next_token_logits = expert_outputs.logits[..., -1, :]
                 
-                if prefix_allowed_tokens_fn:
-                    mask = torch.full_like(base_next_token_logits, -math.inf) 
-                    sent = base_input_ids[0]
-                    prefix_allowed_tokens = prefix_allowed_tokens_fn(0, sent)
-                    if len(prefix_allowed_tokens) == 0:
-                        raise ValueError(
-                            f"`prefix_allowed_tokens_fn` returned an empty list."
-                            f"This means that the constraint is unsatisfiable. Please check your implementation"
-                            f"of `prefix_allowed_tokens_fn` "
-                        )
-                    mask[0, prefix_allowed_tokens] = 0
+                if unite and prefix_allowed_tokens_fn_exp:
                     mask = torch.full_like(expert_next_token_logits, -math.inf) 
                     sent = expert_input_ids[0]
-                    prefix_allowed_tokens_exp = prefix_allowed_tokens_fn_exp(0, sent)
-                    if len(prefix_allowed_tokens_exp) == 0:
-                        raise ValueError(
-                            f"`prefix_allowed_tokens_fn` returned an empty list."
-                            f"This means that the constraint is unsatisfiable. Please check your implementation"
-                            f"of `prefix_allowed_tokens_fn` "
-                        )        
-                    mask[0, prefix_allowed_tokens_exp] = 0
-                expert_next_token_logits = expert_next_token_logits + mask
-                base_next_token_logits = base_next_token_logits + mask
-                v_base = get_top_k_tokens(base_next_token_logits, self.tok_base, k=k)
-                v_exp = get_top_k_tokens(expert_next_token_logits, self.tok_exp, k=k)
-
-                vu = get_union_vocab(v_base, v_exp)
-
-                v_base = update_vocab(v_base, vu, self.tok_base, base_next_token_logits,'qwen')
-                v_base = vocab_softmax(v_base)
-                v_exp = update_vocab(v_exp, vu, self.tok_exp, expert_next_token_logits,'llama')
-                v_exp = vocab_softmax(v_exp)
-
-                next_token, v_avg, next_token_id1, next_token_id2 = average_and_sample(v_base,v_exp,0.5, self.tok_base)
-            
-            else:
-                antiexpert_inputs = self.antiexpert.prepare_inputs_for_generation(antiexpert_input_ids, **antiexpert_kwargs)
-                base_outputs, expert_outputs, antiexpert_outputs = self.forward(
-                    base_inputs, expert_inputs, antiexpert_inputs, return_dict=True
-                )
-
-                base_next_token_logits = base_outputs.logits[..., -1, :]
-                expert_next_token_logits = expert_outputs.logits[..., -1, :]
-                antiexpert_next_token_logits = antiexpert_outputs.logits[..., -1, :]
-                
-                if prefix_allowed_tokens_fn:
-                    mask = torch.full_like(base_next_token_logits, -math.inf) 
-                    sent = base_input_ids[0]
-                    prefix_allowed_tokens = prefix_allowed_tokens_fn(0, sent)
-                    if len(prefix_allowed_tokens) == 0:
-                        raise ValueError(
-                            f"`prefix_allowed_tokens_fn` returned an empty list."
-                            f"This means that the constraint is unsatisfiable. Please check your implementation"
-                            f"of `prefix_allowed_tokens_fn` "
-                        )
-                    mask[0, prefix_allowed_tokens] = 0
-                    base_next_token_logits = base_next_token_logits + mask
- 
-                if score_type == "logprobs":
-                    base_next_token_logits  = F.log_softmax(base_outputs.logits[..., -1, :],  dim=-1)
-                    expert_next_token_logits = F.log_softmax(expert_outputs.logits[..., -1, :], dim=-1)
-                    antiexpert_next_token_logits = F.log_softmax(antiexpert_outputs.logits[..., -1, :], dim=-1)
-
-                    v_base = get_top_k_tokens(base_next_token_logits, self.tok_base, k=k)
-                    v_exp = get_top_k_tokens(expert_next_token_logits, self.tok_exp, k=0)
-                    v_exp = update_vocab(v_exp, v_base, self.tok_exp, expert_next_token_logits,'llama')
-                    v_anti = get_top_k_tokens(antiexpert_next_token_logits, self.tok_anti, k=0)
-                    v_anti = update_vocab(v_anti, v_base, self.tok_anti, antiexpert_next_token_logits, 'llama')
-
-                    next_token, next_token_id1, next_token_id2, next_token_id3, comb_ids, comb_scores = logits_add(v_base, v_exp, v_anti, self.tok_base, alpha, device=input_ids.device)
-                elif score_type == "logits":  # regular proxy tuning 
-                    expert_next_token_logits = expert_next_token_logits[:, :base_next_token_logits.shape[-1]]
-
-                    next_token_logits = (
-                        base_next_token_logits +
-                        self.alpha * (expert_next_token_logits - antiexpert_next_token_logits)
-                    )
-                    
-                    next_tokens = torch.argmax(next_token_logits, dim=-1)  # indices of top tokens
-                    next_token_id1 = next_tokens.tolist()
-                    next_token_id2 = list(next_token_id1)
-                    next_token_id3 = list(next_token_id1)
-                    next_token = [
-                        self.tok_base.convert_ids_to_tokens(tid, skip_special_tokens=False)
-                        for tid in next_token_id1
-                    ]
+                    allowed = prefix_allowed_tokens_fn_exp(0, sent)
+                    if len(allowed) == 0:
+                        raise ValueError("prefix_allowed_tokens_fn returned an empty list.")
+                    mask[0, allowed] = 0
+                    expert_next_token_logits = expert_next_token_logits + mask
                     
-
-            next_tokens = torch.as_tensor(next_token_id1, device=input_ids.device, dtype=torch.long)
-               
-            input_ids      = torch.cat([input_ids,      next_tokens[:, None]], dim=-1)
-            base_input_ids = torch.cat([base_input_ids, next_tokens[:, None]], dim=-1)
-
-            exp_step_ids  = torch.as_tensor(next_token_id2,  device=expert_input_ids.device,   dtype=torch.long)
-            expert_input_ids     = torch.cat([expert_input_ids,     exp_step_ids[:,  None]], dim=-1)
+                if unite:
+                    v_base = get_top_k_tokens(base_next_token_logits, self.tok_base, k=k)
+                    v_exp = get_top_k_tokens(expert_next_token_logits, self.tok_exp, k=k)
+                    vu = get_union_vocab(v_base, v_exp)
+                    v_base = update_vocab(v_base, vu, self.tok_base, base_next_token_logits,'qwen')
+                    v_base = vocab_softmax(v_base)
+                    v_exp = update_vocab(v_exp, vu, self.tok_exp, expert_next_token_logits,'llama')
+                    v_exp = vocab_softmax(v_exp)
+
+                    next_token_id1, next_token_id2 = unite_add(v_base,v_exp, 0.5, self.tok_base)
+
+                elif proxy:
+                    antiexpert_inputs = self.antiexpert.prepare_inputs_for_generation(antiexpert_input_ids, **antiexpert_kwargs) 
+                    antiexpert_outputs = self.antiexpert(**antiexpert_inputs, return_dict=True)
+                    antiexpert_next_token_logits = antiexpert_outputs.logits[..., -1, :] 
+
+                    if score_type == "logprobs": #capt
+                        base_lp = F.log_softmax(base_next_token_logits,  dim=-1)
+                        expert_lp = F.log_softmax(expert_next_token_logits, dim=-1)
+                        antiexpert_lp = F.log_softmax(antiexpert_next_token_logits, dim=-1)
+
+                        v_base = get_top_k_tokens(base_lp, self.tok_base, k=k)
+                        v_exp = get_top_k_tokens(expert_lp, self.tok_exp, k=0)
+                        v_exp = update_vocab(v_exp, v_base, self.tok_exp, expert_lp,'llama')
+                        v_anti = get_top_k_tokens(antiexpert_lp, self.tok_anti, k=0)
+                        v_anti = update_vocab(v_anti, v_base, self.tok_anti, antiexpert_lp, 'llama')
+
+                        next_token_id1, next_token_id2, next_token_id3, _, _ = capt_add(v_base, v_exp, v_anti, self.tok_base, alpha, device=base_input_ids.device)
+                    elif score_type == "logits":  # regular proxy tuning 
+                        expert_next_token_logits = expert_next_token_logits[:, :base_next_token_logits.shape[-1]]
+                        next_token_logits = (
+                            base_next_token_logits +
+                            self.alpha * (expert_next_token_logits - antiexpert_next_token_logits)
+                        )
+                        next_tokens = torch.argmax(next_token_logits, dim=-1)  # indices of top tokens
+                        next_token_id1 = next_tokens.tolist()
+                        next_token_id2 = list(next_token_id1)
+                        next_token_id3 = list(next_token_id1)
+                       
             
+            step_ids = torch.as_tensor(next_token_id1, device=base_input_ids.device, dtype=torch.long)
+            base_input_ids = torch.cat([base_input_ids, step_ids[:, None]], dim=-1)
             base_kwargs = self._update_model_kwargs_for_generation(base_outputs, base_kwargs)
-            expert_kwargs = self._update_model_kwargs_for_generation(expert_outputs, expert_kwargs)
-            
-            if not unite:
-                anti_step_ids = torch.as_tensor(next_token_id3, device=antiexpert_input_ids.device, dtype=torch.long)
-                antiexpert_input_ids = torch.cat([antiexpert_input_ids, anti_step_ids[:, None]], dim=-1)
-                antiexpert_kwargs= self._update_model_kwargs_for_generation(antiexpert_outputs,antiexpert_kwargs)
-
-            # if eos_token was found in one sentence, set sentence to finished
-            unfinished_sequences = unfinished_sequences.mul(
-                next_tokens.tile(eos_token_id_tensor.shape[0], 1).ne(eos_token_id_tensor.unsqueeze(1)).prod(dim=0)
-            )
 
-            # stop when each sentence is finished
+            if proxy or unite:
+                exp_step_ids = torch.as_tensor(next_token_id2,  device=expert_input_ids.device,   dtype=torch.long)
+                expert_input_ids = torch.cat([expert_input_ids,     exp_step_ids[:,  None]], dim=-1)
+                expert_kwargs = self._update_model_kwargs_for_generation(expert_outputs, expert_kwargs)
+                if proxy:
+                    anti_step_ids = torch.as_tensor(next_token_id3, device=antiexpert_input_ids.device, dtype=torch.long)
+                    antiexpert_input_ids = torch.cat([antiexpert_input_ids, anti_step_ids[:, None]], dim=-1)
+                    antiexpert_kwargs= self._update_model_kwargs_for_generation(antiexpert_outputs,antiexpert_kwargs)
+            
+            
+            at_eos = (step_ids == eos_token_id_tensor[0]).long()
+            unfinished_sequences = unfinished_sequences * (1 - at_eos)
             if unfinished_sequences.max() == 0:
                 break
-                
-        if (not unite) and return_logits_for_analysis:
+
+   
+        gen_ids = base_input_ids[0, original_prompt_len:]
+        generation = self.tok_base.decode(gen_ids, skip_special_tokens=True)
+        
+        if proxy and return_logits_for_analysis:
             sl = slice(0, t_write)
             results = [{
                 'token_ids':        token_ids_out[sl],     # [T’] int32 (GPU)
@@ -523,384 +457,87 @@ def generate(
                 'preds_antiexpert': preds_anti[sl],
                 # (optional) decode later if you want strings
             }]
-            return input_ids, results
-        return input_ids
-
-class RegularModel:
-    def __init__(
-        self,
-        base_name: str,
-        tokenizer: AutoTokenizer,
-        system_prompt: str = None,
-        alpha: float = 1.0,
-        chat_response_prefix: str = None,
-        model_kwargs: Dict[str, Any] = None
-    ):
-        self.base = AutoModelForCausalLM.from_pretrained(
-            base_name, **model_kwargs
-        )
-       
-        self.base.eval()
-
-        #self.tokenizer = tokenizer
-        self.tok_base = tokenizer
-        self.alpha = alpha
-        self.device = self.base.device
-        self.system_prompt = system_prompt 
-
-
-    def forward(
-        self,
-        base_inputs,
-        return_dict=None
-    ):
-        base_outputs = self.base(**base_inputs, return_dict=return_dict)
-
-        return base_outputs
-
-    def _get_chat_template_tokenized_chat_inputs(self, tokenizer, prompts):
-        """
-        Use tokenizer.apply_chat_template for models like Qwen-Instruct/Yi/Mistral-Instruct.
-        Returns: input_ids (tensor on self.device)
-        """
-        def _msgs(p):
-            if self.system_prompt:
-                return [{"role": "system", "content": self.system_prompt},
-                        {"role": "user", "content": p}]
-            return [{"role": "user", "content": p}]
-
-        rendered = [
-            tokenizer.apply_chat_template(_msgs(p), tokenize=False, add_generation_prompt=True)
-            for p in prompts
-        ]
-        chat_inputs = tokenizer(rendered, padding="longest", return_tensors="pt", add_special_tokens=True)
-        return chat_inputs.input_ids.to(self.device)
-
-    def _encode_plain_inputs(self, tokenizer, prompts):
-        """
-        Plain (non-chat) encoding with the given tokenizer.
-        Returns: input_ids (tensor on self.device)
-        """
-        enc = tokenizer(prompts, padding="longest", return_tensors="pt", add_special_tokens=True)
-        return enc.input_ids.to(self.device)
-    
-    def _update_model_kwargs_for_generation(
-        self,
-        outputs: ModelOutput,
-        kwargs: Dict[str, Any],
-    ) -> Dict[str, Any]:
-        # update past_key_values
-        kwargs["past_key_values"] = outputs.past_key_values
-
-        # update attention mask
-        if "attention_mask" in kwargs:
-            attention_mask = kwargs["attention_mask"]
-            kwargs["attention_mask"] = torch.cat(
-                [attention_mask, attention_mask.new_ones((attention_mask.shape[0], 1))], dim=-1
-            )
-        if getattr(outputs, "cache_position", None) is not None:
-        # some models already return it
-            kwargs["cache_position"] = outputs.cache_position
-        else:
-            if "cache_position" in kwargs:
-                kwargs["cache_position"] = kwargs["cache_position"] + 1
-            else:
-                # first step: position is sequence-length-1
-                seq_len = kwargs["attention_mask"].shape[1]
-                kwargs["cache_position"] = torch.arange(seq_len - 1, seq_len, device=kwargs["attention_mask"].device)
-
-        return kwargs
-    
-
-    def generate(
-        self,
-        input_ids: Optional[torch.Tensor] = None,
-        max_new_tokens: Optional[int] = 100,
-        do_sample: bool = False,
-        return_logits_for_analysis: bool = False,
-        prefix_allowed_tokens_fn=None,
-        **kwargs
-    ):
-        base_kwargs = kwargs.copy()
-        prompts = self.tok_base.batch_decode(input_ids, skip_special_tokens=True)
+            return generation, results
         
-        if hasattr(self.tok_base, "apply_chat_template") and getattr(self.tok_base, "chat_template", None):
-            base_input_ids = self._get_chat_template_tokenized_chat_inputs(self.tok_base, prompts)
-        else:
-            base_input_ids = self._encode_plain_inputs(self.tok_base, prompts)
-
-
-        base_kwargs["attention_mask"] = torch.ones_like(base_input_ids, dtype=torch.long, device=base_input_ids.device)
-
-        # keep track of which sequences are already finished
-        unfinished_sequences = torch.ones(input_ids.shape[0], dtype=torch.long, device=input_ids.device)
       
-        eos_token_id_tensor = torch.tensor([self.tok_base.eos_token_id]).to(input_ids.device)
-
-        T = max_new_tokens
-        if return_logits_for_analysis:
-            device = input_ids.device
-            # 1 x T buffers on GPU
-            p_dexperts = torch.empty(T, device=device, dtype=torch.bfloat16)
-            p_base     = torch.empty(T, device=device, dtype=torch.bfloat16)
-           
-            preds_base     = torch.empty(T, device=device, dtype=torch.int32)
-         
-            token_ids_out  = torch.empty(T, device=device, dtype=torch.int32)
-            t_write = 0
-
-
-        for step in range(max_new_tokens):
-            # prepare model inputs with past_key_values and attention_mask
-            base_inputs = self.base.prepare_inputs_for_generation(base_input_ids, **base_kwargs)
-            base_outputs = self.forward(
-                base_inputs, return_dict=True
-            )
-            base_next_token_logits = base_outputs.logits[..., -1, :]
-            next_token_logits = base_next_token_logits 
-            if step < 2:                                       
-                next_token_logits[:, self.tok_base.eos_token_id] = -float("inf")
-                
-            if prefix_allowed_tokens_fn:
-                mask = torch.full_like(next_token_logits, -math.inf) 
-                sent = base_input_ids[0]
-                prefix_allowed_tokens = prefix_allowed_tokens_fn(0, sent)
-                if len(prefix_allowed_tokens) == 0:
-                    raise ValueError(
-                        f"`prefix_allowed_tokens_fn` returned an empty list."
-                        f"This means that the constraint is unsatisfiable. Please check your implementation"
-                        f"of `prefix_allowed_tokens_fn` "
-                    )
-                mask[0, prefix_allowed_tokens] = 0
-                next_token_logits = next_token_logits + mask
-            
-            next_tokens = torch.argmax(next_token_logits, dim=-1)
-
-            next_tokens = (
-                next_tokens * unfinished_sequences +
-                self.tok_base.pad_token_id * (1 - unfinished_sequences)
-            )
-
-            input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
-            base_input_ids = torch.cat([base_input_ids, next_tokens[:, None]], dim=-1)
-
-            # update kwargs
-            base_kwargs = self._update_model_kwargs_for_generation(base_outputs, base_kwargs)
-            # if eos_token was found in one sentence, set sentence to finished
-            unfinished_sequences = unfinished_sequences.mul(
-                next_tokens.tile(eos_token_id_tensor.shape[0], 1).ne(eos_token_id_tensor.unsqueeze(1)).prod(dim=0)
-            )
-
-            # stop when each sentence is finished
-            if unfinished_sequences.max() == 0:
-                break
-
-        if return_logits_for_analysis:
-            sl = slice(0, t_write)
-            results = [{
-                'token_ids':        token_ids_out[sl],     # [T’] int32 (GPU)
-                'p_base':           p_base[sl],
-                'preds_base':       preds_base[sl],
-            
-            }]
-            return input_ids, results
-        return input_ids
-
+        return generation
 
 def ensure_dir(d):
     if not os.path.exists(d):
         os.makedirs(d, exist_ok=True)
 
-
-@torch.inference_mode()
-def generate_completions(
-    model,
-    tokenizer,
-    prompts,
-    batch_size=1,
-    add_special_tokens=True,
-    disable_tqdm=False,
-    return_logits_for_analysis=False,
-    score_type=None,
-    alpha=1.0,
-    k=20,
-    unite=False,
-    prefix_allowed_tokens_fn=None,
-    prefix_allowed_tokens_fn_exp=None,
-    **generation_kwargs, 
-    
-):
-    generations = []
-    outputs = []
-    if not disable_tqdm:
-        progress = tqdm.tqdm(total=len(prompts), desc="Generating Completions")
-
-    num_return_sequences = generation_kwargs.get("num_return_sequences", 1)
-     
-    all_results = []
-    for i in range(0, len(prompts), batch_size):
-        batch_prompts = prompts[i:i+batch_size]
-        tokenized_prompts = tokenizer(
-            batch_prompts, padding="longest", return_tensors="pt", add_special_tokens=add_special_tokens
-        )
-        
-        # print ("tokenized_prompt: ", tokenized_prompts)
-        if hasattr(model, "device"):                 # DExpertsLlama
-            device = model.device
-            # print ("device = model.device")
-        else:                                        # vanilla HF model
-            device = next(model.parameters()).device
-            # print ("next(model.parameters()).devicedevice = next(model.parameters()).device")
-        batch_input_ids = tokenized_prompts['input_ids'].to(device)
-        attention_mask = tokenized_prompts['attention_mask'].to(device)
-        
-        batch_outputs = model.generate(
-                input_ids=batch_input_ids,
-                attention_mask=attention_mask,
-                alpha=alpha,
-                score_type=score_type,
-                prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
-                prefix_allowed_tokens_fn_exp=prefix_allowed_tokens_fn_exp,
-                k=k,
-                unite=unite,
-                **generation_kwargs
-        )
-        results = []
-        
-        # to support the logits processing below when using DExperts with mixed tokenizers
-        if isinstance(batch_input_ids, dict):
-            batch_input_ids = batch_input_ids['llama']
-
-        batch_outputs = tokenizer.batch_decode(batch_outputs, skip_special_tokens=True)
-        print("batch_outputs: ", batch_outputs)
-        batch_prompts = tokenizer.batch_decode(batch_input_ids, skip_special_tokens=True)
-
-        # duplicate the prompts to match the number of return sequences
-        batch_prompts = [prompt for prompt in batch_prompts for _ in range(num_return_sequences)]
-        batch_generations = [
-            output[len(prompt):] for prompt, output in zip(batch_prompts, batch_outputs)
-        ]
-
-        generations += batch_generations
-
-        if not disable_tqdm:
-            progress.update(len(batch_prompts)//num_return_sequences)
-    # return generations, logits_for_analysis
-    return generations, all_results
-
-
-def add_pad_token(tokenizer, padding_side="left"):
-    if tokenizer.pad_token is None:
-        tokenizer.pad_token = tokenizer.eos_token
-        tokenizer.pad_token_id = tokenizer.eos_token_id
-    tokenizer.padding_side = padding_side
-    return tokenizer
-
-def load_dexperts_model_and_tokenizer(
+def load_model_and_tokenizer(
     base_name: str,
     expert_name: str,
     antiexpert_name: str,
     device_map: str = "auto",
     alpha: float = 1.0,
-    load_in_8bit: bool = False,
-    load_in_4bit: bool = False,
     system_prompt: Optional[str] = None,
     use_fast_tokenizer: bool = True,
     padding_side: str = "left",
-    proxy_tune: bool = False,
+    proxy: bool = False,
     unite: bool = False,
-):
-    
-    bnb_cfg = None
-
-    if load_in_8bit:
-        bnb_cfg = BitsAndBytesConfig(load_in_8bit=True, llm_int8_enable_fp32_cpu_offload=True)
-    
-    if load_in_4bit:
-        bnb_cfg = BitsAndBytesConfig(
-            load_in_4bit=True,
-            bnb_4bit_use_double_quant=True,
-            bnb_4bit_quant_type="nf4",         # {nf4, fp4}; nf4 is standard
-            bnb_4bit_compute_dtype=torch.bfloat16,  
-        )
+):          
+    bnb_cfg = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_use_double_quant=True,
+        bnb_4bit_quant_type="nf4",         
+        bnb_4bit_compute_dtype=torch.bfloat16,  
+    )
 
     model_kwargs = {
         'device_map': device_map,
-        'torch_dtype': torch.bfloat16,
+        'dtype': torch.bfloat16,
         'quantization_config': bnb_cfg,
         'low_cpu_mem_usage': True,
         'trust_remote_code': True,
     }
+     
+    base_model = AutoModelForCausalLM.from_pretrained(MODEL_PATHS[base_name], **model_kwargs)
     
-    
-    if "llama" in base_name and "chat" in base_name:
-        tok_base = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-13b-base"], use_fast=use_fast_tokenizer)
-    elif "llama" in base_name and "chat" not in base_name:
-        tok_base = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-13b-base"], use_fast=use_fast_tokenizer)
+    if base_name in ["mellama-13b-chat", "mellama-13b-base", "mellama-70b-chat"]:
+        tok_base = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-13b-base"], use_fast=use_fast_tokenizer)   
+        #tok_base = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-7b-chat"], use_fast=use_fast_tokenizer)   
     else:
-        tok_base = AutoTokenizer.from_pretrained(MODEL_PATHS[base_name], use_fast=False, trust_remote_code=True)
-        
+        tok_base = AutoTokenizer.from_pretrained(MODEL_PATHS[base_name], use_fast=use_fast_tokenizer)   
+
     tok_base = add_pad_token(tok_base, padding_side)
     
-    if proxy_tune or unite:
-        
-        if "llama" in expert_name and "chat" in expert_name:
-            tok_exp = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-13b-base"], use_fast=use_fast_tokenizer)
-        elif "llama" in expert_name and "chat" not in expert_name:
-            tok_exp = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-13b-base"], use_fast=use_fast_tokenizer)
-        else:
-            tok_exp = AutoTokenizer.from_pretrained(MODEL_PATHS[expert_name], use_fast=False, trust_remote_code=True)
+    expert_model = antiexpert_model = tok_exp = tok_anti = None
 
-
-        tok_exp  = add_pad_token(tok_exp,  padding_side)
     
-    if proxy_tune:
-        if "llama" in antiexpert_name and "chat" in antiexpert_name:
-            tok_anti = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-13b-base"], use_fast=use_fast_tokenizer)
-        elif "llama" in expert_name and "chat" not in expert_name:
-            tok_exp = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-13b-base"], use_fast=use_fast_tokenizer)
-        else:
-            tok_anti = AutoTokenizer.from_pretrained(MODEL_PATHS[antiexpert_name], use_fast=False, trust_remote_code=True)
-
+    # expert and anti expert will always be mellama or llama --> mellama models use llama-13b-base as tokenizer.  
+    if expert_name != "none":
+        tok_exp = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-13b-base"], use_fast=use_fast_tokenizer)       
+        tok_exp = add_pad_token(tok_exp, padding_side)
+        expert_model = AutoModelForCausalLM.from_pretrained(MODEL_PATHS[expert_name], **model_kwargs)
+        
+    if antiexpert_name != "none":
+        tok_anti = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-13b-base"], use_fast=use_fast_tokenizer)       
         tok_anti = add_pad_token(tok_anti, padding_side)
+        antiexpert_model = AutoModelForCausalLM.from_pretrained(MODEL_PATHS[antiexpert_name], **model_kwargs)
+
+    model = AnyModel(
+        base_model=base_model,
+        expert_model=expert_model,
+        antiexpert_model=antiexpert_model,
+        base_tokenizer=tok_base,
+        expert_tokenizer=tok_exp,
+        anti_tokenizer=tok_anti,
+        alpha=alpha,
+        proxy=proxy,
+        unite=unite,
+        model_kwargs=model_kwargs,
+    )
+
     
-    
-        model = DExpertsLlama(
-                base_name=MODEL_PATHS[base_name],
-                expert_name=MODEL_PATHS[expert_name],
-                antiexpert_name=MODEL_PATHS[antiexpert_name],
-                tokenizer_base=tok_base,
-                tokenizer_expert=tok_exp,
-                tokenizer_anti=tok_anti,
-                system_prompt=system_prompt,
-                alpha=alpha,
-                model_kwargs=model_kwargs,
-        )
-        print(f"[Loader] Base   : {MODEL_PATHS[base_name]}")
-        print(f"[Loader] Expert : {MODEL_PATHS[expert_name]}")
-        print(f"[Loader] Anti   : {MODEL_PATHS[antiexpert_name]}")
+    print(f"[Loader] Base   : {base_name}")
+    print(f"[Loader] Expert : {expert_name}")
+    print(f"[Loader] Anti   : {antiexpert_name}")
         
-    elif unite: 
-        model = DExpertsLlama(
-                base_name=MODEL_PATHS[base_name],
-                expert_name=MODEL_PATHS[expert_name],
-                antiexpert_name="none",
-                tokenizer_base=tok_base,
-                tokenizer_expert=tok_exp,
-                tokenizer_anti="none",
-                system_prompt=system_prompt,
-                alpha=alpha,
-                unite=True,
-                model_kwargs=model_kwargs,
-        )
-        print(f"[Loader] Base   : {MODEL_PATHS[base_name]}")
-        print(f"[Loader] Expert : {MODEL_PATHS[expert_name]}")
-    
-    else: 
-        model = RegularModel(base_name=MODEL_PATHS[base_name], tokenizer=tok_base, system_prompt=system_prompt, alpha=alpha, model_kwargs=model_kwargs)
-    
     return model, tok_base
 
+# proxy tuning helpers
 
 def _safe_tag(model_name: str) -> str:
     # e.g. "proxy_tuning/llama70b_mellama13bchat" -> "proxy_tuning_llama70b_mellama13bchat"
@@ -974,6 +611,7 @@ def __init__(
         tag = model_name.split("/")[-1]
         # strip optional "proxy_tuning_" prefix
         if tag.startswith("proxy_tuning_"):
+            print("doing tag change")
             tag = tag[len("proxy_tuning_"):]
 
         parts = tag.split("_")
@@ -986,7 +624,6 @@ def __init__(
             parts[5]
         )
         self.k = int(k_str)
-        
         self.is_unite = False
         self.is_proxy = False
         if expert_name != "none":
@@ -994,119 +631,60 @@ def __init__(
                 self.is_unite = True
             else:
                 self.is_proxy = True
-
-        print("mn:", model_name)
-        print("tag:", tag)
-        print("b: ", base_name)
-        print("Ex:", expert_name)
-        print("ax", antiexpert_name)
-        print(self.alpha)
-        print(self.score_type)
-        print(self.k)
-        print("proxy: ", self.is_proxy)
-        print("unite: ", self.is_unite)
         
-    
-        self.model, self.hf_tokenizer = load_dexperts_model_and_tokenizer(
-                    base_name=base_name,
-                    expert_name=expert_name,
-                    antiexpert_name=antiexpert_name,
-                    load_in_8bit=False,
-                    load_in_4bit=True,
-                    use_fast_tokenizer=True,
-                    system_prompt=None,
-                    device_map='auto', 
-                    proxy_tune=self.is_proxy, 
-                    unite=self.is_unite
+        print ("loading model")
+        self.any_model, self.hf_tokenizer = load_model_and_tokenizer(
+            base_name=base_name,
+            expert_name=expert_name,
+            antiexpert_name=antiexpert_name,
+            device_map='auto', 
+            proxy=self.is_proxy, 
+            unite=self.is_unite
                 
         )
-       
-
+        print ("loaded model")
     
     def make_request(self, request: Request) -> RequestResult:
-        """
-        Handles a request by sending the prompt 
-
-        Args:
-            request (Request): The request object containing the prompt.
-
-        Returns:
-            RequestResult: A HELM-compatible response object.
-        """
+    
         prompt_text = request.prompt
 
         if request.messages:
             prompt_text = " ".join(msg["content"] for msg in request.messages if msg.get("role") != "system")
-
-        
-        print("prompt_text: ", prompt_text)
-        prompts = [prompt_text]
-        
-        max_new_tokens = 600
-        if prompt_text.strip().startswith("Answer 'A' for "):
-            max_new_tokens = 2
-           
-       
-        predicted_labels, all_results = generate_completions(
-                model=self.model,
-                tokenizer=self.hf_tokenizer,
-                prompts=prompts,
-                max_new_tokens=max_new_tokens,       
-                do_sample=False,        
-                num_return_sequences=1,
-                alpha=self.alpha,
-                k=self.k,
-                score_type=self.score_type,
-                unite=self.is_unite,
-                return_logits_for_analysis=False, 
-            )
-        
-        # if self.is_proxy or self.is_unite: 
-        #     predicted_labels, all_results = generate_completions(
-        #         model=self.model,
-        #         tokenizer=self.hf_tokenizer,
-        #         prompts=prompts,
-        #         max_new_tokens=max_new_tokens,       
-        #         do_sample=False,        
-        #         num_return_sequences=1,
-        #         alpha=self.alpha,
-        #         k=self.k,
-        #         score_type=self.score_type,
-        #         prefix_allowed_tokens_fn=prefix_func,
-        #         unite=self.is_unite,
-        #         return_logits_for_analysis=False, 
-        #     )
-        # else: 
-        #     predicted_labels, all_results = base_generate_completions(
-        #         model=self.model,
-        #         tokenizer=self.hf_tokenizer,
-        #         prompts=prompts,
-        #         max_new_tokens=max_new_tokens,
-        #         do_sample=False,
-        #     )
-
             
-        output_text = predicted_labels[0]
-        print("output_text: ", output_text)
+        # progress = tqdm.tqdm(total=1, desc="Generating Completions")
+        generation = self.any_model.generate(
+            prompt = prompt_text,
+            max_new_tokens = 700,
+            alpha = self.alpha, 
+            return_logits_for_analysis = False,
+            score_type = self.score_type,
+            k = self.k,
+            unite = self.is_unite,
+            proxy = self.is_proxy,
+            prefix_allowed_tokens_fn=None,
+            prefix_allowed_tokens_fn_exp=None,
+        )
+
+        print("generation: ", generation)
         
         self.req_seq += 1
         request_id = f"{self.run_id}_r{self.req_seq:04d}"
 
         logits_path = None
-        if self.is_proxy and all_results:
-            logits_path = os.path.join(self.logits_dir, f"logits_{request_id}.pt")
-            torch.save(all_results, logits_path)
-            print(f"[Logits] wrote {logits_path}")
+#         if self.is_proxy and all_results:
+#             logits_path = os.path.join(self.logits_dir, f"logits_{request_id}.pt")
+#             torch.save(all_results, logits_path)
+#             print(f"[Logits] wrote {logits_path}")
 
         append_request_row(
             csv_path=self.token_log_path,
             request_id=request_id,
             model_name=self.model_name,
             prompt=prompt_text,
-            output=output_text,
+            output=generation,
             logits_path=logits_path,
         )
         
         # Return a HELM-compatible RequestResult
-        output = GeneratedOutput(text=output_text, logprob=0.0, tokens=[])
+        output = GeneratedOutput(text=generation, logprob=0.0, tokens=[])
         return RequestResult(success=True, cached=False, completions=[output], embedding=[])

From a700380d30aa6329de7aa1bcf8df83cf825a0020 Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Tue, 21 Oct 2025 21:22:14 -0700
Subject: [PATCH 12/42] Update model_deployments.yaml

---
 src/helm/config/model_deployments.yaml | 70 +++++++++++++++-----------
 1 file changed, 42 insertions(+), 28 deletions(-)

diff --git a/src/helm/config/model_deployments.yaml b/src/helm/config/model_deployments.yaml
index 2f1c7927594..318b8912c66 100644
--- a/src/helm/config/model_deployments.yaml
+++ b/src/helm/config/model_deployments.yaml
@@ -5108,52 +5108,66 @@ model_deployments:
         dspy_api_model: openai/o3-mini-2025-01-31
         dspy_api_base: null
  
-  - name: proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_0.7_logprobs_20
-    model_name: proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_0.7_logprobs_20
-    tokenizer_name: proxy_tuning/qwen3-30b
-    max_sequence_length: 128000
+        
+  - name: proxy_tuning/llama-70b-chat_none_none_1.0_logits_20
+    model_name: proxy_tuning/llama-70b-chat_none_none_1.0_logits_20
+    tokenizer_name: proxy_tuning/llama-7b-chat
+    max_sequence_length: 4096
     client_spec:
       class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
 
-  - name: proxy_tuning/llama-70b-chat_mellama-13b-base_llama-13b-base_1.0_logits_20
-    model_name: proxy_tuning/llama-70b-chat_mellama-13b-base_llama-13b-base_1.0_logits_20
+  - name: proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20
+    model_name: proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20
     tokenizer_name: proxy_tuning/llama-7b-chat
-    max_sequence_length: 128000
+    max_sequence_length: 4096
     client_spec:
       class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
 
-  - name: proxy_tuning/qwen3-30b_none_none_0.7_logprobs_20
-    model_name: proxy_tuning/qwen3-30b_none_none_0.7_logprobs_20
+  - name: proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20
+    model_name: proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20
+    tokenizer_name: proxy_tuning/llama-7b-chat
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
+      
+  - name: proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20
+    model_name: proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20
     tokenizer_name: proxy_tuning/qwen3-30b
-    max_sequence_length: 128000
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
+     
+  - name: proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20
+    model_name: proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20
+    tokenizer_name: proxy_tuning/qwen3-30b
+    max_sequence_length: 4096
     client_spec:
       class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
 
-  - name: proxy_tuning/llama-70b-chat_none_none_1.0_logits_20
-    model_name: proxy_tuning/llama-70b-chat_none_none_1.0_logits_20
-    tokenizer_name: proxy_tuning/llama-7b-chat
-    max_sequence_length: 128000
+  - name: proxy_tuning/qwen3-30b_mellama-13b-base_llama-13b-base_1.0_logprobs_20
+    model_name: proxy_tuning/qwen3-30b_mellama-13b-base_llama-13b-base_1.0_logprobs_20
+    tokenizer_name: proxy_tuning/qwen3-30b
+    max_sequence_length: 4096
     client_spec:
       class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
-      
-  - name: proxy_tuning/mellama-70b-chat_none_none_0.7_logprobs_20
-    model_name: proxy_tuning/mellama-70b-chat_none_none_0.7_logprobs_20
-    tokenizer_name: proxy_tuning/llama-7b-chat
-    max_sequence_length: 128000
+
+  - name: proxy_tuning/qwen3-30b_mellama-13b-chat_mellama-13b-base_1.0_logprobs_20
+    model_name: proxy_tuning/qwen3-30b_mellama-13b-chat_mellama-13b-base_1.0_logprobs_20
+    tokenizer_name: proxy_tuning/qwen3-30b
+    max_sequence_length: 4096
     client_spec:
       class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
 
-  - name: proxy_tuning/mellama-13b-chat_none_none_0.7_logprobs_20
-    model_name: proxy_tuning/mellama-13b-chat_none_none_0.7_logprobs_20
+  - name: proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20
+    model_name: proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20
     tokenizer_name: proxy_tuning/llama-7b-chat
-    max_sequence_length: 128000
+    max_sequence_length: 4096
     client_spec:
       class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
-      
-  - name: proxy_tuning/mellama-13b-base_none_none_0.7_logprobs_20
-    model_name: proxy_tuning/mellama-13b-base_none_none_0.7_logprobs_20
-    tokenizer_name: proxy_tuning/llama-7b-chat
-    max_sequence_length: 128000
+
+  - name: proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20
+    model_name: proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20
+    tokenizer_name: proxy_tuning/qwen3-30b
+    max_sequence_length: 4096
     client_spec:
       class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
- 

From 30b2040ecb6950667f8320680b54668ae3dfa205 Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Tue, 21 Oct 2025 21:22:56 -0700
Subject: [PATCH 13/42] Update model_metadata.yaml

---
 src/helm/config/model_metadata.yaml | 84 ++++++++++++++---------------
 1 file changed, 39 insertions(+), 45 deletions(-)

diff --git a/src/helm/config/model_metadata.yaml b/src/helm/config/model_metadata.yaml
index 8cbff69e32d..d6c172cf142 100644
--- a/src/helm/config/model_metadata.yaml
+++ b/src/helm/config/model_metadata.yaml
@@ -5230,93 +5230,87 @@ models:
     release_date: 2025-01-31
     tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
 
-# proxy tuning 
 
-  - name: proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_0.7_logprobs_20
-    display_name: proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_0.7_logprobs_20
-    description: Proxy tuned Qwen3-30b with mellama-13b-chat expert and llama-13b-base antiexpert. 
+  - name: proxy_tuning/llama-70b-chat_none_none_1.0_logits_20
+    display_name: proxy_tuning/llama-70b-chat_none_none_1.0_logits_20
+    description: llama-70b-chat. 
     creator_organization_name: Sasha Ronaghi
     access: open
-    num_parameters: 30000000000
+    num_parameters: 70000000000
     release_date: 2025-10-15
     tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
     
-  - name: proxy_tuning/llama-70b-chat_mellama-13b-base_llama-13b-base_1.0_logits_20
-    display_name: proxy_tuning/llama-70b-chat_mellama-13b-base_llama-13b-base_1.0_logits_20
-    description: Proxy tuned Llama2-70b-chat with mellama-13b-chat expert and llama-13b-base antiexpert. 
+  - name: proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20
+    display_name: proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20
+    description: mellama-70b-chat.
     creator_organization_name: Sasha Ronaghi
     access: open
     num_parameters: 70000000000
     release_date: 2025-10-15
     tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-# unite
-
-  - name: proxy_tuning/qwen3-30b_mellama-13b-chat_none_0.7_logprobs_20
-    display_name: proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_0.7_logprobs_20
-    description: Unite of Qwen3-30b with mellama-13b-chat expert. 
+    
+  - name: proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20
+    display_name: proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20
+    description: mellama-13b-chat. 
     creator_organization_name: Sasha Ronaghi
     access: open
-    num_parameters: 30000000000
+    num_parameters: 13000000000
     release_date: 2025-10-15
     tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-    
-  - name: proxy_tuning/llama-70b-chat_mellama-13b-base_none_1.0_logits_20
-    display_name: proxy_tuning/llama-70b-chat_mellama-13b-base_llama-13b-base_1.0_logits_20
-    description: Unite of llama-70b-chat with mellama-13b-base. 
+
+
+  - name: proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20
+    display_name: proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20
+    description: Qwen3-30b. 
     creator_organization_name: Sasha Ronaghi
     access: open
-    num_parameters: 70000000000
+    num_parameters: 30000000000
     release_date: 2025-10-15
     tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
 
-# base models 
 
-  - name: proxy_tuning/qwen3-30b_none_none_0.7_logprobs_20
-    display_name: proxy_tuning/qwen3-30b_none_none_0.7_logprobs_20
-    description: Qwen3-30b. 
+  - name: proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20
+    display_name: proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20
+    description: Proxy tuned Qwen3-30b with mellama-13b-chat expert and llama-13b-base antiexpert. 
     creator_organization_name: Sasha Ronaghi
     access: open
     num_parameters: 30000000000
     release_date: 2025-10-15
     tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-    
-  - name: proxy_tuning/llama-70b-chat_none_none_1.0_logits_20
-    display_name: proxy_tuning/llama-70b-chat_none_none_1.0_logits_20
-    description: llama-70b-chat. 
+
+  - name: proxy_tuning/qwen3-30b_mellama-13b-base_llama-13b-base_1.0_logprobs_20
+    display_name: proxy_tuning/qwen3-30b_mellama-13b-base_llama-13b-base_1.0_logprobs_20
+    description: Proxy tuned Qwen3-30b with mellama-13b-base expert and llama-13b-base antiexpert. 
     creator_organization_name: Sasha Ronaghi
     access: open
-    num_parameters: 70000000000
+    num_parameters: 30000000000
     release_date: 2025-10-15
     tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-    
 
-# mellama models 
-
-  - name: proxy_tuning/mellama-70b-chat_none_none_0.7_logprobs_20
-    display_name: proxy_tuning/mellama-70b-chat_none_none_0.7_logprobs_20
-    description: mellama-70b-chat.
+  - name: proxy_tuning/qwen3-30b_mellama-13b-chat_mellama-13b-base_1.0_logprobs_20
+    display_name: proxy_tuning/qwen3-30b_mellama-13b-chat_mellama-13b-base_1.0_logprobs_20
+    description: Proxy tuned Qwen3-30b with mellama-13b-chat expert and mellama-13b-base antiexpert. 
     creator_organization_name: Sasha Ronaghi
     access: open
-    num_parameters: 70000000000
+    num_parameters: 30000000000
     release_date: 2025-10-15
     tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
     
-  - name: proxy_tuning/mellama-13b-chat_none_none_0.7_logprobs_20
-    display_name: proxy_tuning/mellama-13b-chat_none_none_0.7_logprobs_20
-    description: mellama-13b-chat. 
+  - name: proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20
+    display_name: proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20
+    description: Proxy tuned Llama2-70b-chat with mellama-13b-chat expert and llama-13b-base antiexpert. 
     creator_organization_name: Sasha Ronaghi
     access: open
-    num_parameters: 13000000000
+    num_parameters: 70000000000
     release_date: 2025-10-15
     tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-    
-  - name: proxy_tuning/mellama-13b-base_none_none_0.7_logprobs_20
-    display_name: proxy_tuning/mellama-13b-base_none_none_0.7_logprobs_20
-    description: mellama-13b-base.
+
+  - name: proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20
+    display_name: proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20
+    description: Unite of Qwen3-30b with mellama-13b-chat expert. 
     creator_organization_name: Sasha Ronaghi
     access: open
-    num_parameters: 13000000000
+    num_parameters: 30000000000
     release_date: 2025-10-15
     tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
 

From 4bfdfdc9b6b69da0ddbbe86ea163f877491cb21a Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Tue, 21 Oct 2025 21:23:30 -0700
Subject: [PATCH 14/42] Update tokenizer_configs.yaml

---
 src/helm/config/tokenizer_configs.yaml | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/src/helm/config/tokenizer_configs.yaml b/src/helm/config/tokenizer_configs.yaml
index 4e365633ea1..be908a7e93d 100644
--- a/src/helm/config/tokenizer_configs.yaml
+++ b/src/helm/config/tokenizer_configs.yaml
@@ -1286,20 +1286,19 @@ tokenizer_configs:
       class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
     end_of_text_token: "</s>"
     prefix_token: "<s>"
-
+    
   - name: proxy_tuning/llama-7b-chat
     tokenizer_spec:
       class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
       args:
-        pretrained_model_name_or_path: [PATH TO Llama-2-7b-chat]
-    end_of_text_token: "<s>"
-    prefix_token: "</s>"
+        pretrained_model_name_or_path: /share/pi/ema2016/models/meta-llama/Llama-2-7b-chat-hf
+    end_of_text_token: "</s>"
+    prefix_token: "<s>"
 
   - name: proxy_tuning/qwen3-30b
     tokenizer_spec:
       class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
       args:
-        pretrained_model_name_or_path: [PATH TO Qwen3-30B-A3B-Instruct-2507]
-    end_of_text_token: "<s>"
-    prefix_token: "</s>"
-
+        pretrained_model_name_or_path: /share/pi/ema2016/models/Qwen3-30B-A3B-Instruct-2507
+    end_of_text_token: "<|im_end|>"
+    prefix_token: "<|im_start|>"

From 53ec6a51eb4f678e88eea1537400133ddfea22f7 Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Tue, 21 Oct 2025 21:43:37 -0700
Subject: [PATCH 15/42] Update medhelm_run_specs.py

---
 src/helm/benchmark/run_specs/medhelm_run_specs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/helm/benchmark/run_specs/medhelm_run_specs.py b/src/helm/benchmark/run_specs/medhelm_run_specs.py
index b7920a3a3c9..8ec30bcd698 100644
--- a/src/helm/benchmark/run_specs/medhelm_run_specs.py
+++ b/src/helm/benchmark/run_specs/medhelm_run_specs.py
@@ -150,7 +150,7 @@ def get_clear_spec(condition: str, data_path: str) -> RunSpec:
         name=f"clear:condition={condition}",
         scenario_spec=scenario_spec,
         adapter_spec=adapter_spec,
-        metric_specs=get_exact_match_metric_specs(),
+        metric_specs=get_f1_metric_specs(),
         groups=["clear"],
     )
 

From f1138adbc8ffcb3cbf0a7fd2487ce7ce63bfa918 Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Wed, 22 Oct 2025 12:09:04 -0700
Subject: [PATCH 16/42] Update proxy_tuning_client.py

---
 src/helm/clients/proxy_tuning_client.py | 123 +++++++++++++-----------
 1 file changed, 69 insertions(+), 54 deletions(-)

diff --git a/src/helm/clients/proxy_tuning_client.py b/src/helm/clients/proxy_tuning_client.py
index 1bb3eaf0694..fb5cd721b63 100644
--- a/src/helm/clients/proxy_tuning_client.py
+++ b/src/helm/clients/proxy_tuning_client.py
@@ -24,13 +24,14 @@
 from datetime import datetime
 
 MODEL_PATHS = {
+    # download from huggingface 
     "llama-70b-chat": "/share/pi/ema2016/models/meta-llama/Llama-2-70b-chat-hf",
-    "llama-7b-chat": "/share/pi/ema2016/models/meta-llama/Llama-2-7b-chat-hf",
     "llama-13b-base": "/share/pi/ema2016/models/meta-llama/Llama-2-13b-hf",
+    "qwen3-30b": "/share/pi/ema2016/models/Qwen3-30B-A3B-Instruct-2507", 
+    # download from physionet -- https://physionet.org/content/me-llama/1.0.0/
     "mellama-13b-chat": "/share/pi/ema2016/models/me-llama/MeLLaMA-13B-chat",
-    "mellama-13b-base": "/share/pi/ema2016/models/me-llama/MeLLaMA-13B", 
     "mellama-70b-chat": "/share/pi/ema2016/models/me-llama/MeLLaMA-70B-chat",    
-    "qwen3-30b": "/share/pi/ema2016/models/Qwen3-30B-A3B-Instruct-2507", 
+    
 }
 
 LOCAL_RESULTS_DIR = "/share/pi/ema2016/users/sronaghi/proxy_tuning/results/medhelm"
@@ -199,9 +200,9 @@ def add_pad_token(tok, padding_side="left"):
 class AnyModel:
     def __init__(
         self,
-        base_model,
-        expert_model,
-        antiexpert_model,
+        base_name,
+        expert_name,
+        antiexpert_name,
         base_tokenizer, 
         expert_tokenizer, 
         anti_tokenizer,
@@ -211,22 +212,35 @@ def __init__(
         model_kwargs: Dict[str, Any] = None
     ):
         
-        self.base = base_model
-        self.expert = expert_model
-        self.antiexpert = antiexpert_model
-        self.tok_base = base_tokenizer
-        self.tok_exp = expert_tokenizer
-        self.tok_anti = anti_tokenizer
-    
-        if self.base is not None:
-            self.base.eval()
-        if self.expert is not None:
+        self.expert = None  
+        self.tok_exp = None
+        self.antiexpert = None  
+        self.tok_anti = None
+        
+        print("loading base")
+        
+        self.base = AutoModelForCausalLM.from_pretrained(MODEL_PATHS[base_name], **model_kwargs)
+        self.base.eval()
+        self.tok_base  = base_tokenizer
+        print("done loading base")
+        
+        if proxy or unite:
+            print("loading exp")
+            self.expert = AutoModelForCausalLM.from_pretrained(MODEL_PATHS[expert_name], **model_kwargs)
             self.expert.eval()
-        if self.antiexpert is not None:
-            self.antiexpert.eval()
+            self.tok_exp   = expert_tokenizer
+            print("done loading exp")
+            
+            if proxy:
+                print("loading anti")
+                self.antiexpert = AutoModelForCausalLM.from_pretrained(MODEL_PATHS[antiexpert_name], **model_kwargs)
+                self.antiexpert.eval()
+                self.tok_anti  = anti_tokenizer
+                print("done loading anti")
+        
         
         self.alpha = alpha
-        self.device = getattr(self.base, "device", None)
+        self.device = self.base.device
 
     
     def _encode_for_gen(self, tok, prompt: str, device=None):
@@ -296,7 +310,7 @@ def generate(
         base_kwargs["attention_mask"] = base_attn
         base_kwargs["use_cache"] = True
         original_prompt_len = base_input_ids.shape[1]
-
+        print("1")
         
 #         if not proxy and not unite:
 #             gen = self.base.generate(
@@ -326,7 +340,7 @@ def generate(
         # keep track of which sequences are already finished
         unfinished_sequences = torch.ones(1, dtype=torch.long, device=base_input_ids.device)
         eos_token_id_tensor = torch.tensor([self.tok_base.eos_token_id], device=base_input_ids.device)
-        
+        print("2")
         
         if return_logits_for_analysis:
             T = max_new_tokens
@@ -344,7 +358,7 @@ def generate(
 
             token_ids_out  = torch.empty(T, device=device, dtype=torch.int32)
             t_write = 0
-        
+        print("3")
  
         for step in range(max_new_tokens):      
             base_inputs = self.base.prepare_inputs_for_generation(base_input_ids, **base_kwargs)
@@ -439,7 +453,7 @@ def generate(
             if unfinished_sequences.max() == 0:
                 break
 
-   
+        print("4")
         gen_ids = base_input_ids[0, original_prompt_len:]
         generation = self.tok_base.decode(gen_ids, skip_special_tokens=True)
         
@@ -459,7 +473,7 @@ def generate(
             }]
             return generation, results
         
-      
+        print("5")
         return generation
 
 def ensure_dir(d):
@@ -492,35 +506,38 @@ def load_model_and_tokenizer(
         'low_cpu_mem_usage': True,
         'trust_remote_code': True,
     }
-     
-    base_model = AutoModelForCausalLM.from_pretrained(MODEL_PATHS[base_name], **model_kwargs)
+    
+    print("loading base tok", flush=True)
+
     
     if base_name in ["mellama-13b-chat", "mellama-13b-base", "mellama-70b-chat"]:
-        tok_base = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-13b-base"], use_fast=use_fast_tokenizer)   
-        #tok_base = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-7b-chat"], use_fast=use_fast_tokenizer)   
+        tok_base = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-13b-base"], use_fast=use_fast_tokenizer)     
     else:
         tok_base = AutoTokenizer.from_pretrained(MODEL_PATHS[base_name], use_fast=use_fast_tokenizer)   
 
     tok_base = add_pad_token(tok_base, padding_side)
     
-    expert_model = antiexpert_model = tok_exp = tok_anti = None
-
+    print("done loading base tok", flush=True)
+    
+    tok_exp = tok_anti = None
     
-    # expert and anti expert will always be mellama or llama --> mellama models use llama-13b-base as tokenizer.  
-    if expert_name != "none":
-        tok_exp = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-13b-base"], use_fast=use_fast_tokenizer)       
+    if proxy or unite: 
+        print("loading exp tok", flush=True)
+        tok_exp = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-13b-base"], use_fast=use_fast_tokenizer) 
         tok_exp = add_pad_token(tok_exp, padding_side)
-        expert_model = AutoModelForCausalLM.from_pretrained(MODEL_PATHS[expert_name], **model_kwargs)
-        
-    if antiexpert_name != "none":
-        tok_anti = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-13b-base"], use_fast=use_fast_tokenizer)       
-        tok_anti = add_pad_token(tok_anti, padding_side)
-        antiexpert_model = AutoModelForCausalLM.from_pretrained(MODEL_PATHS[antiexpert_name], **model_kwargs)
-
+        print("done loading exp tok")
+        if proxy:
+            print("loading anti tok", flush=True)
+            tok_anti = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-13b-base"], use_fast=use_fast_tokenizer) 
+            tok_anti = add_pad_token(tok_anti, padding_side)
+            print("done loading anti tok", flush=True)
+    
+    
+    print ("creating any model", flush=True)
     model = AnyModel(
-        base_model=base_model,
-        expert_model=expert_model,
-        antiexpert_model=antiexpert_model,
+        base_name=base_name,
+        expert_name=expert_name,
+        antiexpert_name=antiexpert_name,
         base_tokenizer=tok_base,
         expert_tokenizer=tok_exp,
         anti_tokenizer=tok_anti,
@@ -529,11 +546,11 @@ def load_model_and_tokenizer(
         unite=unite,
         model_kwargs=model_kwargs,
     )
-
+    print ("created any model", flush=True)
     
-    print(f"[Loader] Base   : {base_name}")
-    print(f"[Loader] Expert : {expert_name}")
-    print(f"[Loader] Anti   : {antiexpert_name}")
+    print(f"[Loader] Base   : {base_name}", flush=True)
+    print(f"[Loader] Expert : {expert_name}", flush=True)
+    print(f"[Loader] Anti   : {antiexpert_name}", flush=True)
         
     return model, tok_base
 
@@ -609,10 +626,7 @@ def __init__(
         self.run_id = datetime.now().strftime("%Y%m%d_%H%M%S")
         self.req_seq = 0
         tag = model_name.split("/")[-1]
-        # strip optional "proxy_tuning_" prefix
-        if tag.startswith("proxy_tuning_"):
-            print("doing tag change")
-            tag = tag[len("proxy_tuning_"):]
+
 
         parts = tag.split("_")
         base_name, expert_name, antiexpert_name, self.alpha, self.score_type, k_str  = (
@@ -632,7 +646,7 @@ def __init__(
             else:
                 self.is_proxy = True
         
-        print ("loading model")
+        print ("loading model", flush=True)
         self.any_model, self.hf_tokenizer = load_model_and_tokenizer(
             base_name=base_name,
             expert_name=expert_name,
@@ -642,7 +656,7 @@ def __init__(
             unite=self.is_unite
                 
         )
-        print ("loaded model")
+        print ("loaded model", flush=True)
     
     def make_request(self, request: Request) -> RequestResult:
     
@@ -652,6 +666,7 @@ def make_request(self, request: Request) -> RequestResult:
             prompt_text = " ".join(msg["content"] for msg in request.messages if msg.get("role") != "system")
             
         # progress = tqdm.tqdm(total=1, desc="Generating Completions")
+        print("doing a generation", flush=True)
         generation = self.any_model.generate(
             prompt = prompt_text,
             max_new_tokens = 700,
@@ -665,7 +680,7 @@ def make_request(self, request: Request) -> RequestResult:
             prefix_allowed_tokens_fn_exp=None,
         )
 
-        print("generation: ", generation)
+        print("generation: ", generation, flush=True)
         
         self.req_seq += 1
         request_id = f"{self.run_id}_r{self.req_seq:04d}"

From 5909035b36dc5770abc47bc4ed7a59d36b39a416 Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Wed, 22 Oct 2025 12:10:47 -0700
Subject: [PATCH 17/42] Delete prod_env/model_deployments.yaml

---
 prod_env/model_deployments.yaml | 4732 -------------------------------
 1 file changed, 4732 deletions(-)
 delete mode 100644 prod_env/model_deployments.yaml

diff --git a/prod_env/model_deployments.yaml b/prod_env/model_deployments.yaml
deleted file mode 100644
index 73e704ac21e..00000000000
--- a/prod_env/model_deployments.yaml
+++ /dev/null
@@ -1,4732 +0,0 @@
-# This file defines all the model deployments that are supported by the Helm API.
-# Some models have several deployments, each with different parameters.
-
-# If you want to add a new deployment, you can technically do it here but we recommend
-# you to do it in prod_env/model_deployments.yaml instead.
-
-# Follow the template of this file to add a new deployment. You can copy paste this to get started:
-#    # This file defines all the model deployments that you do not want to be public.
-#    model_deployments: [] # Leave empty to disable private model deployments
-
-model_deployments:
-  - name: simple/model1
-    model_name: simple/model1
-    tokenizer_name: simple/tokenizer1
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.simple_client.SimpleClient"
-
-  # Stanford Health Care
-  # For internal use only for MedHELM
-  # Placed earlier in the file to make them non-default
-  - name: stanfordhealthcare/claude-3-5-sonnet-20241022
-    model_name: anthropic/claude-3-5-sonnet-20241022
-    tokenizer_name: anthropic/claude
-    max_sequence_length: 200000
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_claude_client.StanfordHealthCareClaudeClient"
-      args:
-        model: anthropic.claude-3-5-sonnet-20241022-v2:0
-        deployment: Claude35Sonnetv2/awssig4fa
-  
-  - name: stanfordhealthcare/claude-3-7-sonnet-20250219
-    model_name: anthropic/claude-3-7-sonnet-20250219
-    tokenizer_name: anthropic/claude
-    max_sequence_length: 200000
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_claude_client.StanfordHealthCareClaudeClient"
-      args:
-        model: arn:aws:bedrock:us-west-2:679683451337:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0
-        deployment: awssig4claude37/aswsig4claude37
-
-  - name: stanfordhealthcare/gemini-1.5-pro-001
-    model_name: google/gemini-1.5-pro-001
-    tokenizer_name: google/gemma-2b
-    max_sequence_length: 1000000
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_google_client.StanfordHealthCareGoogleClient"
-      args:
-        deployment: gcpgemini/apim-gcp-oauth-fa
-
-  - name: stanfordhealthcare/gemini-2.0-flash-001
-    model_name: google/gemini-2.0-flash-001
-    tokenizer_name: google/gemma-2b
-    max_sequence_length: 1000000
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_google_client.StanfordHealthCareGoogleClient"
-      args:
-        deployment: gcp-gem20flash-fa/apim-gcp-gem20flash-fa
-
-  - name: stanfordhealthcare/gpt-4o-mini-2024-07-18
-    model_name: openai/gpt-4o-mini-2024-07-18
-    tokenizer_name: openai/o200k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_azure_openai_client.StanfordHealthCareAzureOpenAIClient"
-      args:
-        openai_model_name: gpt-4o-mini
-        api_version: 2023-05-15
-
-  - name: stanfordhealthcare/gpt-4o-2024-05-13
-    model_name: openai/gpt-4o-2024-05-13
-    tokenizer_name: openai/o200k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_azure_openai_client.StanfordHealthCareAzureOpenAIClient"
-      args:
-        openai_model_name: gpt-4o
-        api_version: 2023-05-15
-        base_url: "https://apim.stanfordhealthcare.org/openai3/deployments/"
-
-  
-  - name: stanfordhealthcare/gpt-4-0613
-    model_name: openai/gpt-4-0613
-    tokenizer_name: openai/o200k_base
-    max_sequence_length: 8192
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_azure_openai_client.StanfordHealthCareAzureOpenAIClient"
-      args:
-        openai_model_name: gpt-4
-        api_version: 2023-05-15
-
-  - name: stanfordhealthcare/gpt-4-turbo-2024-04-09
-    model_name: openai/gpt-4-turbo-2024-04-09
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_azure_openai_client.StanfordHealthCareAzureOpenAIClient"
-      args:
-        openai_model_name: gpt-4-turbo
-        api_version: 2023-05-15
-
-  - name: stanfordhealthcare/gpt-4.1-2025-04-14
-    model_name: openai/gpt-4.1-2025-04-14
-    tokenizer_name: openai/o200k_base
-    max_sequence_length: 1047576
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_azure_openai_client.StanfordHealthCareAzureOpenAIClient"
-      args:
-        openai_model_name: gpt-4.1
-        api_version: 2025-01-01-preview
-        base_url: "{endpoint}/openai-eastus2"
-
-  - name: stanfordhealthcare/o3-mini-2025-01-31
-    model_name: openai/o3-mini-2025-01-31
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 200000
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_azure_openai_client.StanfordHealthCareAzureOpenAIClient"
-      args:
-        openai_model_name: o3-mini
-        api_version: 2024-12-01-preview
-        base_url: "{endpoint}/openai-eastus2"
-
-  - name: stanfordhealthcare/o1-2024-12-17
-    model_name: openai/o1-2024-12-17
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_azure_openai_client.StanfordHealthCareAzureOpenAIClient"
-      args:
-        openai_model_name: o1
-        api_version: 2024-12-01-preview
-        base_url: "{endpoint}/openai-eastus2"
-
-  - name: stanfordhealthcare/deepseek-r1
-    model_name: deepseek-ai/deepseek-r1
-    tokenizer_name: deepseek-ai/deepseek-r1
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_openai_client.StanfordHealthCareOpenAIClient"
-      args:
-        openai_model_name: deepseek-chat
-        output_processor: helm.benchmark.metrics.output_processors.remove_deepseek_r1_thinking
-        base_url: "{endpoint}/deepseekr1/v1"
-
-  - name: stanfordhealthcare/llama-3.3-70b-instruct
-    model_name: meta/llama-3.3-70b-instruct
-    tokenizer_name: meta/llama-3.3-70b-instruct
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_openai_client.StanfordHealthCareOpenAIClient"
-      args:
-        base_url: "{endpoint}/llama3370b/v1"
-
-  - name: stanfordhealthcare/llama-4-scout-17b-16e-instruct
-    model_name: meta/llama-4-scout-17b-16e-instruct
-    tokenizer_name: meta/llama-4-scout-17b-16e-instruct
-    max_sequence_length: 327680
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_openai_client.StanfordHealthCareOpenAIClient"
-      args:
-        base_url: "{endpoint}/llama4-scout/v1"
-  
-  - name: stanfordhealthcare/llama-4-maverick-17b-128e-instruct-fp8
-    model_name: meta/llama-4-maverick-17b-128e-instruct-fp8
-    tokenizer_name: meta/llama-4-scout-17b-16e-instruct
-    max_sequence_length: 524288
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_openai_client.StanfordHealthCareOpenAIClient"
-      args:
-        base_url: "{endpoint}/llama4-maverick/v1"
-
-  - name: stanfordhealthcare/phi-3.5-mini-instruct
-    model_name: microsoft/phi-3.5-mini-instruct
-    tokenizer_name: microsoft/phi-3.5-mini-instruct
-    max_sequence_length: 131072
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_openai_client.StanfordHealthCareOpenAIClient"
-      args:
-        base_url: "{endpoint}/phi35mi/v1"
-
-  - name: stanfordhealthcare_shc/gpt-4o-2024-05-13
-    model_name: openai/gpt-4o-2024-05-13
-    tokenizer_name: openai/o200k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_shc_openai_client.StanfordHealthCareSHCOpenAIClient"
-      deployment: gpt-4o
-
-  - name: stanfordhealthcare_shc/gpt-4o-mini-2024-07-18
-    model_name: openai/gpt-4o-mini-2024-07-18
-    tokenizer_name: openai/o200k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_shc_openai_client.StanfordHealthCareSHCOpenAIClient"
-      deployment: gpt-4o-mini
-  
-  - name: stanfordhealthcare_shc/gpt-4-turbo-2024-04-09
-    model_name: openai/gpt-4-turbo-2024-04-09
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_shc_openai_client.StanfordHealthCareSHCOpenAIClient"
-      deployment: gpt-4-turbo-2024-04-09
-
-  - name: stanfordhealthcare/claude-3-5-sonnet-20241022
-    model_name: anthropic/claude-3-5-sonnet-20241022
-    tokenizer_name: anthropic/claude
-    max_sequence_length: 200000
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_claude_client.StanfordHealthCareClaudeClient"
-      args:
-        model: anthropic.claude-3-5-sonnet-20241022-v2:0
-        deployment: Claude35Sonnetv2/awssig4fa
-
-  - name: stanfordhealthcare/claude-3-7-sonnet-20250219
-    model_name: anthropic/claude-3-7-sonnet-20250219
-    tokenizer_name: anthropic/claude
-    max_sequence_length: 200000
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_claude_client.StanfordHealthCareClaudeClient"
-      args:
-        model: arn:aws:bedrock:us-west-2:679683451337:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0
-        deployment: awssig4claude37/aswsig4claude37
-
-  - name: stanfordhealthcare/gemini-1.5-pro-001
-    model_name: google/gemini-1.5-pro-001
-    tokenizer_name: google/gemma-2b
-    max_sequence_length: 1000000
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_google_client.StanfordHealthCareGoogleClient"
-      args:
-        deployment: gcpgemini/apim-gcp-oauth-fa
-
-  - name: stanfordhealthcare/gemini-2.0-flash-001
-    model_name: google/gemini-2.0-flash-001
-    tokenizer_name: google/gemma-2b
-    max_sequence_length: 1000000
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_google_client.StanfordHealthCareGoogleClient"
-      args:
-        deployment: gcp-gem20flash-fa/apim-gcp-gem20flash-fa
-
-  - name: stanfordhealthcare/gpt-4o-mini-2024-07-18
-    model_name: openai/gpt-4o-mini-2024-07-18
-    tokenizer_name: openai/o200k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_azure_openai_client.StanfordHealthCareAzureOpenAIClient"
-      args:
-        openai_model_name: gpt-4o-mini
-        api_version: 2023-05-15
-
-  - name: stanfordhealthcare/gpt-4o-2024-05-13
-    model_name: openai/gpt-4o-2024-05-13
-    tokenizer_name: openai/o200k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_azure_openai_client.StanfordHealthCareAzureOpenAIClient"
-      args:
-        openai_model_name: gpt-4o
-        api_version: 2023-05-15
-
-  - name: stanfordhealthcare/gpt-4-0613
-    model_name: openai/gpt-4-0613
-    tokenizer_name: openai/o200k_base
-    max_sequence_length: 8192
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_azure_openai_client.StanfordHealthCareAzureOpenAIClient"
-      args:
-        openai_model_name: gpt-4
-        api_version: 2023-05-15
-
-  - name: stanfordhealthcare/gpt-4-turbo-2024-04-09
-    model_name: openai/gpt-4-turbo-2024-04-09
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_azure_openai_client.StanfordHealthCareAzureOpenAIClient"
-      args:
-        openai_model_name: gpt-4-turbo
-        api_version: 2023-05-15
-
-  - name: stanfordhealthcare/gpt-4.1-2025-04-14
-    model_name: openai/gpt-4.1-2025-04-14
-    tokenizer_name: openai/o200k_base
-    max_sequence_length: 1047576
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_azure_openai_client.StanfordHealthCareAzureOpenAIClient"
-      args:
-        openai_model_name: gpt-4.1
-        api_version: 2025-01-01-preview
-        base_url: "{endpoint}/openai-eastus2"
-
-  - name: stanfordhealthcare/o3-mini-2025-01-31
-    model_name: openai/o3-mini-2025-01-31
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 200000
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_azure_openai_client.StanfordHealthCareAzureOpenAIClient"
-      args:
-        openai_model_name: o3-mini
-        api_version: 2024-12-01-preview
-        base_url: "{endpoint}/openai-eastus2"
-
-  - name: stanfordhealthcare/o1-2024-12-17
-    model_name: openai/o1-2024-12-17
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_azure_openai_client.StanfordHealthCareAzureOpenAIClient"
-      args:
-        openai_model_name: o1
-        api_version: 2024-12-01-preview
-        base_url: "{endpoint}/openai-eastus2"
-
-  - name: stanfordhealthcare/deepseek-r1
-    model_name: deepseek-ai/deepseek-r1
-    tokenizer_name: deepseek-ai/deepseek-r1
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_openai_client.StanfordHealthCareOpenAIClient"
-      args:
-        openai_model_name: deepseek-chat
-        output_processor: helm.benchmark.metrics.output_processors.remove_deepseek_r1_thinking
-        base_url: "{endpoint}/deepseekr1/v1"
-
-  - name: stanfordhealthcare/llama-3.3-70b-instruct
-    model_name: meta/llama-3.3-70b-instruct
-    tokenizer_name: meta/llama-3.3-70b-instruct
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_openai_client.StanfordHealthCareOpenAIClient"
-      args:
-        base_url: "{endpoint}/llama3370b/v1"
-
-  - name: stanfordhealthcare/llama-4-scout-17b-16e-instruct
-    model_name: meta/llama-4-scout-17b-16e-instruct
-    tokenizer_name: meta/llama-4-scout-17b-16e-instruct
-    max_sequence_length: 327680
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_openai_client.StanfordHealthCareOpenAIClient"
-      args:
-        base_url: "{endpoint}/llama4-scout/v1"
-
-  - name: stanfordhealthcare/llama-4-maverick-17b-128e-instruct-fp8
-    model_name: meta/llama-4-maverick-17b-128e-instruct-fp8
-    tokenizer_name: meta/llama-4-scout-17b-16e-instruct
-    max_sequence_length: 524288
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_openai_client.StanfordHealthCareOpenAIClient"
-      args:
-        base_url: "{endpoint}/llama4-maverick/v1"
-
-  - name: stanfordhealthcare/phi-3.5-mini-instruct
-    model_name: microsoft/phi-3.5-mini-instruct
-    tokenizer_name: microsoft/phi-3.5-mini-instruct
-    max_sequence_length: 131072
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_openai_client.StanfordHealthCareOpenAIClient"
-      args:
-        base_url: "{endpoint}/phi35mi/v1"
-
-  - name: stanfordhealthcare_shc/gpt-4o-2024-05-13
-    model_name: openai/gpt-4o-2024-05-13
-    tokenizer_name: openai/o200k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_shc_openai_client.StanfordHealthCareSHCOpenAIClient"
-      deployment: gpt-4o
-
-  - name: stanfordhealthcare_shc/gpt-4o-mini-2024-07-18
-    model_name: openai/gpt-4o-mini-2024-07-18
-    tokenizer_name: openai/o200k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_shc_openai_client.StanfordHealthCareSHCOpenAIClient"
-      deployment: gpt-4o-mini
-
-  - name: stanfordhealthcare_shc/gpt-4-turbo-2024-04-09
-    model_name: openai/gpt-4-turbo-2024-04-09
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.stanfordhealthcare_shc_openai_client.StanfordHealthCareSHCOpenAIClient"
-      deployment: gpt-4-turbo-2024-04-09
-
-  # Adobe
-  - name: adobe/giga-gan
-    model_name: adobe/giga-gan
-    tokenizer_name: openai/clip-vit-large-patch14
-    max_sequence_length: 75
-    client_spec:
-      class_name: "helm.clients.image_generation.adobe_vision_client.AdobeVisionClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
-
-  # AI21 Labs
-
-  - name: ai21/j2-large
-    model_name: ai21/j2-large
-    tokenizer_name: ai21/j2-tokenizer
-    max_sequence_length: 2047
-    client_spec:
-      class_name: "helm.clients.ai21_client.AI21Client"
-
-  - name: ai21/j2-grande
-    model_name: ai21/j2-grande
-    tokenizer_name: ai21/j2-tokenizer
-    max_sequence_length: 2047
-    client_spec:
-      class_name: "helm.clients.ai21_client.AI21Client"
-
-  - name: ai21/j2-jumbo
-    model_name: ai21/j2-jumbo
-    tokenizer_name: ai21/j2-tokenizer
-    max_sequence_length: 6000
-    client_spec:
-      class_name: "helm.clients.ai21_client.AI21Client"
-
-  - name: ai21/jamba-instruct
-    model_name: ai21/jamba-instruct
-    tokenizer_name: ai21/jamba-instruct-tokenizer
-    max_sequence_length: 256000
-    client_spec:
-      class_name: "helm.clients.ai21_client.AI21ChatClient"
-
-  - name: ai21/jamba-1.5-mini
-    model_name: ai21/jamba-1.5-mini
-    tokenizer_name: ai21/jamba-1.5-mini-tokenizer
-    max_sequence_length: 256000
-    client_spec:
-      class_name: "helm.clients.ai21_client.AI21ChatClient"
-
-  - name: ai21/jamba-1.5-large
-    model_name: ai21/jamba-1.5-large
-    tokenizer_name: ai21/jamba-1.5-large-tokenizer
-    max_sequence_length: 256000
-    client_spec:
-      class_name: "helm.clients.ai21_client.AI21ChatClient"
-
-  # Aleph Alpha
-  - name: AlephAlpha/luminous-base
-    model_name: AlephAlpha/luminous-base
-    tokenizer_name: AlephAlpha/luminous-base
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.aleph_alpha_client.AlephAlphaClient"
-
-  - name: AlephAlpha/luminous-extended
-    model_name: AlephAlpha/luminous-extended
-    tokenizer_name: AlephAlpha/luminous-extended
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.aleph_alpha_client.AlephAlphaClient"
-
-  - name: AlephAlpha/luminous-supreme
-    model_name: AlephAlpha/luminous-supreme
-    tokenizer_name: AlephAlpha/luminous-supreme
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.aleph_alpha_client.AlephAlphaClient"
-
-  # TODO: Add luminous-world once it is released
-
-  - name: AlephAlpha/m-vader
-    model_name: AlephAlpha/m-vader
-    tokenizer_name: openai/clip-vit-large-patch14
-    max_sequence_length: 75
-    client_spec:
-      class_name: "helm.clients.image_generation.aleph_alpha_image_generation_client.AlephAlphaImageGenerationClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
-
-
-  # Amazon nova models
-  - name: amazon/nova-premier-v1:0
-    model_name: amazon/nova-premier-v1:0
-    tokenizer_name: huggingface/gpt2
-    max_sequence_length: 1000000
-    client_spec:
-      class_name: "helm.clients.bedrock_client.BedrockNovaClient"
-      args:
-        bedrock_model_id: us.amazon.nova-premier-v1:0
-
-  - name: amazon/nova-pro-v1:0
-    model_name: amazon/nova-pro-v1:0
-    tokenizer_name: huggingface/gpt2
-    max_sequence_length: 300000
-    client_spec:
-      class_name: "helm.clients.bedrock_client.BedrockNovaClient"
-
-  - name: amazon/nova-lite-v1:0
-    model_name: amazon/nova-lite-v1:0
-    tokenizer_name: huggingface/gpt2
-    max_sequence_length: 300000
-    client_spec:
-      class_name: "helm.clients.bedrock_client.BedrockNovaClient"
-
-  - name: amazon/nova-micro-v1:0
-    model_name: amazon/nova-micro-v1:0
-    tokenizer_name: huggingface/gpt2
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.bedrock_client.BedrockNovaClient"
-
-  # Titan on Amazon Bedrock
-
-  - name: amazon/titan-text-lite-v1
-    model_name: amazon/titan-text-lite-v1
-    tokenizer_name: huggingface/gpt2
-    max_sequence_length: 4000
-    client_spec:
-      class_name: "helm.clients.bedrock_client.BedrockTitanClient"
-
-  - name: amazon/titan-text-express-v1
-    model_name: amazon/titan-text-express-v1
-    tokenizer_name: huggingface/gpt2
-    max_sequence_length: 8000
-    client_spec:
-      class_name: "helm.clients.bedrock_client.BedrockTitanClient"
-  
-  # Mistral on Amazon Bedrock
-
-  - name: amazon/mistral-7b-instruct-v0:2
-    model_name: mistralai/amazon-mistral-7b-instruct-v0:2
-    tokenizer_name: huggingface/gpt2
-    max_sequence_length: 8000
-    client_spec:
-      class_name: "helm.clients.bedrock_client.BedrockMistralClient"
-  
-  - name: amazon/mixtral-8x7b-instruct-v0:1
-    model_name: mistralai/amazon-mixtral-8x7b-instruct-v0:1
-    tokenizer_name: huggingface/gpt2
-    max_sequence_length: 4000
-    client_spec:
-      class_name: "helm.clients.bedrock_client.BedrockMistralClient"
-  
-  - name: amazon/mistral-large-2402-v1:0
-    model_name: mistralai/amazon-mistral-large-2402-v1:0
-    tokenizer_name: huggingface/gpt2
-    max_sequence_length: 8000
-    client_spec:
-      class_name: "helm.clients.bedrock_client.BedrockMistralClient"
-  
-  - name: amazon/mistral-small-2402-v1:0
-    model_name: mistralai/amazon-mistral-small-2402-v1:0
-    tokenizer_name: huggingface/gpt2
-    max_sequence_length: 8000
-    client_spec:
-      class_name: "helm.clients.bedrock_client.BedrockMistralClient"
-
-  - name: amazon/mistral-large-2407-v1:0
-    model_name: mistralai/amazon-mistral-large-2407-v1:0
-    tokenizer_name: huggingface/gpt2
-    max_sequence_length: 8000
-    client_spec:
-      class_name: "helm.clients.bedrock_client.BedrockMistralClient"
-
-  # Llama 3 on Amazon Bedrock
-  
-  - name: amazon/llama3-8b-instruct-v1:0
-    model_name: meta/amazon-llama3-8b-instruct-v1:0
-    tokenizer_name: huggingface/gpt2
-    max_sequence_length: 2000
-    client_spec:
-      class_name: "helm.clients.bedrock_client.BedrockLlamaClient"
-
-  - name: amazon/llama3-70b-instruct-v1:0
-    model_name: meta/amazon-llama3-70b-instruct-v1:0
-    tokenizer_name: huggingface/gpt2
-    max_sequence_length: 2000
-    client_spec:
-      class_name: "helm.clients.bedrock_client.BedrockLlamaClient"
-
-  - name: amazon/llama3-1-405b-instruct-v1:0
-    model_name: meta/amazon-llama3-1-405b-instruct-v1:0
-    tokenizer_name: huggingface/gpt2
-    max_sequence_length: 2000
-    client_spec:
-      class_name: "helm.clients.bedrock_client.BedrockLlamaClient"
-
-  - name: amazon/llama3-1-70b-instruct-v1:0
-    model_name: meta/amazon-llama3-1-70b-instruct-v1:0
-    tokenizer_name: huggingface/gpt2
-    max_sequence_length: 2000
-    client_spec:
-      class_name: "helm.clients.bedrock_client.BedrockLlamaClient"
-
-  
-  - name: amazon/llama3-1-8b-instruct-v1:0
-    model_name: meta/amazon-llama3-1-8b-instruct-v1:0
-    tokenizer_name: huggingface/gpt2
-    max_sequence_length: 2000
-    client_spec:
-      class_name: "helm.clients.bedrock_client.BedrockLlamaClient"
-
-  # Anthropic
-  - name: anthropic/claude-v1.3
-    model_name: anthropic/claude-v1.3
-    tokenizer_name: anthropic/claude
-    max_sequence_length: 8000
-    max_sequence_and_generated_tokens_length: 9016
-    client_spec:
-      class_name: "helm.clients.anthropic_client.AnthropicClient"
-
-  - name: anthropic/claude-instant-v1
-    model_name: anthropic/claude-instant-v1
-    tokenizer_name: anthropic/claude
-    max_sequence_length: 8000
-    max_sequence_and_generated_tokens_length: 9016
-    client_spec:
-      class_name: "helm.clients.anthropic_client.AnthropicClient"
-
-  - name: anthropic/claude-instant-1.2
-    model_name: anthropic/claude-instant-1.2
-    tokenizer_name: anthropic/claude
-    max_sequence_length: 8000
-    max_sequence_and_generated_tokens_length: 9016
-    client_spec:
-      class_name: "helm.clients.anthropic_client.AnthropicClient"
-
-  - name: anthropic/claude-2.0
-    model_name: anthropic/claude-2.0
-    tokenizer_name: anthropic/claude
-    max_sequence_length: 8000
-    max_sequence_and_generated_tokens_length: 9016
-    client_spec:
-      class_name: "helm.clients.anthropic_client.AnthropicClient"
-
-  - name: anthropic/claude-2.1
-    model_name: anthropic/claude-2.1
-    tokenizer_name: anthropic/claude
-    max_sequence_length: 8000
-    max_sequence_and_generated_tokens_length: 9016
-    client_spec:
-      class_name: "helm.clients.anthropic_client.AnthropicClient"
-
-  - name: anthropic/claude-3-sonnet-20240229
-    model_name: anthropic/claude-3-sonnet-20240229
-    tokenizer_name: anthropic/claude
-    max_sequence_length: 200000
-    client_spec:
-      class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
-
-  - name: anthropic/claude-3-haiku-20240307
-    model_name: anthropic/claude-3-haiku-20240307
-    tokenizer_name: anthropic/claude
-    max_sequence_length: 200000
-    client_spec:
-      class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
-
-  - name: anthropic/claude-3-opus-20240229
-    model_name: anthropic/claude-3-opus-20240229
-    tokenizer_name: anthropic/claude
-    max_sequence_length: 200000
-    client_spec:
-      class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
-
-  - name: anthropic/claude-3-5-haiku-20241022
-    model_name: anthropic/claude-3-5-haiku-20241022
-    tokenizer_name: anthropic/claude
-    max_sequence_length: 200000
-    client_spec:
-      class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
-
-  - name: anthropic/claude-3-5-sonnet-20240620
-    model_name: anthropic/claude-3-5-sonnet-20240620
-    tokenizer_name: anthropic/claude
-    max_sequence_length: 200000
-    client_spec:
-      class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
-
-  - name: anthropic/claude-3-5-sonnet-20241022
-    model_name: anthropic/claude-3-5-sonnet-20241022
-    tokenizer_name: anthropic/claude
-    max_sequence_length: 200000
-    client_spec:
-      class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
-
-  - name: anthropic/claude-3-7-sonnet-20250219
-    model_name: anthropic/claude-3-7-sonnet-20250219
-    tokenizer_name: anthropic/claude
-    max_sequence_length: 200000
-    client_spec:
-      class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
-
-  - name: anthropic/claude-3-7-sonnet-20250219-thinking-10k
-    model_name: anthropic/claude-3-7-sonnet-20250219-thinking-10k
-    tokenizer_name: anthropic/claude
-    max_sequence_length: 200000
-    client_spec:
-      class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
-      args:
-        anthropic_model_name: claude-3-7-sonnet-20250219
-        thinking_budget_tokens: 10000
-        stream: true
-
-  - name: anthropic/claude-sonnet-4-20250514
-    model_name: anthropic/claude-sonnet-4-20250514
-    tokenizer_name: anthropic/claude
-    max_sequence_length: 200000
-    client_spec:
-      class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
-
-  - name: anthropic/claude-sonnet-4-20250514-thinking-10k
-    model_name: anthropic/claude-sonnet-4-20250514-thinking-10k
-    tokenizer_name: anthropic/claude
-    max_sequence_length: 200000
-    client_spec:
-      class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
-      args:
-        anthropic_model_name: claude-sonnet-4-20250514
-        thinking_budget_tokens: 10000
-        stream: true
-
-  - name: anthropic/claude-opus-4-20250514
-    model_name: anthropic/claude-opus-4-20250514
-    tokenizer_name: anthropic/claude
-    max_sequence_length: 200000
-    client_spec:
-      class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
-
-  - name: anthropic/claude-opus-4-20250514-thinking-10k
-    model_name: anthropic/claude-opus-4-20250514-thinking-10k
-    tokenizer_name: anthropic/claude
-    max_sequence_length: 200000
-    client_spec:
-      class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
-      args:
-        anthropic_model_name: claude-opus-4-20250514
-        thinking_budget_tokens: 10000
-        stream: true
-
-  - name: anthropic/stanford-online-all-v4-s3
-    deprecated: true # Closed model, not accessible via API
-    model_name: anthropic/stanford-online-all-v4-s3
-    tokenizer_name: huggingface/gpt2
-    max_sequence_length: 8192
-    client_spec:
-      class_name: "helm.clients.anthropic_client.AnthropicLegacyClient"
-
-  # Cohere
-  - name: cohere/command
-    model_name: cohere/command
-    tokenizer_name: cohere/command
-    max_sequence_length: 2019 # TODO: verify this
-    max_request_length: 2020 # TODO: verify this
-    client_spec:
-      class_name: "helm.clients.cohere_client.CohereClient"
-
-  - name: cohere/command-light
-    model_name: cohere/command-light
-    tokenizer_name: cohere/command-light
-    max_sequence_length: 2019 # TODO: verify this
-    max_request_length: 2020 # TODO: verify this
-    client_spec:
-      class_name: "helm.clients.cohere_client.CohereClient"
-
-  - name: cohere/command-r
-    model_name: cohere/command-r
-    tokenizer_name: cohere/command-r
-    max_sequence_length: 128000
-    max_request_length: 128000
-    client_spec:
-      class_name: "helm.clients.cohere_client.CohereChatClient"
-
-  - name: cohere/command-r-plus
-    model_name: cohere/command-r-plus
-    tokenizer_name: cohere/command-r-plus
-    # "We have a known issue where prompts between 112K - 128K in length
-    # result in bad generations."
-    # Source: https://docs.cohere.com/docs/command-r-plus
-    max_sequence_length: 110000
-    max_request_length: 110000
-    client_spec:
-      class_name: "helm.clients.cohere_client.CohereChatClient"
-
-  # Craiyon
-
-  - name: craiyon/dalle-mini
-    model_name: craiyon/dalle-mini
-    tokenizer_name: openai/clip-vit-large-patch14
-    max_sequence_length: 75
-    client_spec:
-      class_name: "helm.clients.image_generation.dalle_mini_client.DALLEMiniClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
-
-  - name: craiyon/dalle-mega
-    model_name: craiyon/dalle-mega
-    tokenizer_name: openai/clip-vit-large-patch14
-    max_sequence_length: 75
-    client_spec:
-      class_name: "helm.clients.image_generation.dalle_mini_client.DALLEMiniClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
-
-  # Databricks
-
-  - name: together/dbrx-instruct
-    model_name: databricks/dbrx-instruct
-    tokenizer_name: databricks/dbrx-instruct
-    max_sequence_length: 32767
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-
-  # DeepFloyd
-
-  - name: DeepFloyd/IF-I-M-v1.0
-    model_name: DeepFloyd/IF-I-M-v1.0
-    tokenizer_name: openai/clip-vit-large-patch14
-    max_sequence_length: 75
-    client_spec:
-      class_name: "helm.clients.image_generation.deep_floyd_client.DeepFloydClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
-
-  - name: DeepFloyd/IF-I-L-v1.0
-    model_name: DeepFloyd/IF-I-L-v1.0
-    tokenizer_name: openai/clip-vit-large-patch14
-    max_sequence_length: 75
-    client_spec:
-      class_name: "helm.clients.image_generation.deep_floyd_client.DeepFloydClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
-
-  - name: DeepFloyd/IF-I-XL-v1.0
-    model_name: DeepFloyd/IF-I-XL-v1.0
-    tokenizer_name: openai/clip-vit-large-patch14
-    max_sequence_length: 75
-    client_spec:
-      class_name: "helm.clients.image_generation.deep_floyd_client.DeepFloydClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
-
-  # Deepseek
-
-  - name: together/deepseek-llm-67b-chat
-    model_name: deepseek-ai/deepseek-llm-67b-chat
-    tokenizer_name: deepseek-ai/deepseek-llm-67b-chat
-    max_sequence_length: 4095
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-
-  - name: together/deepseek-v3
-    model_name: deepseek-ai/deepseek-v3
-    tokenizer_name: deepseek-ai/deepseek-v3
-    max_sequence_length: 16384
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-      args:
-        disable_logprobs: True
-
-  - name: together/deepseek-r1-0528
-    model_name: deepseek-ai/deepseek-r1-0528
-    tokenizer_name: deepseek-ai/deepseek-r1
-    max_sequence_length: 32768
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-      args:
-        together_model: deepseek-ai/deepseek-r1
-        parse_thinking: true
-        disable_logprobs: True
-
-  # Gooseai
-
-  # TODO: Migrate these models to use OpenAIClient
-
-  ## EleutherAI
-  # - name: gooseai/gpt-neo-20b
-  #   model_name: eleutherai/gpt-neox-20b
-  #   tokenizer_name: EleutherAI/gpt-neox-20b
-  #   max_sequence_length: 2048
-  #   max_request_length: 2049
-  #   client_spec:
-  #     class_name: "helm.clients.goose_ai_client.GooseAIClient"
-
-  # - name: gooseai/gpt-j-6b
-  #   model_name: eleutherai/gpt-j-6b
-  #   tokenizer_name: EleutherAI/gpt-j-6B
-  #   max_sequence_length: 2048
-  #   max_request_length: 2049
-  #   client_spec:
-  #     class_name: "helm.clients.goose_ai_client.GooseAIClient"
-
-  # Google
-  # See: https://cloud.google.com/vertex-ai/docs/generative-ai/learn/model-versioning
-
-  ## Gemini
-  # See: https://ai.google.dev/models/gemini#model_variations
-  - name: google/gemini-pro
-    model_name: google/gemini-pro
-    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
-    max_sequence_length: 30720
-    max_sequence_and_generated_tokens_length: 32768 # Officially max_sequence_length + 2048
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
-
-  - name: google/gemini-1.0-pro-001
-    model_name: google/gemini-1.0-pro-001
-    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
-    max_sequence_length: 30720
-    max_sequence_and_generated_tokens_length: 32768 # Officially max_sequence_length + 2048
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
-
-  - name: google/gemini-1.0-pro-002
-    model_name: google/gemini-1.0-pro-002
-    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
-    max_sequence_length: 30720
-    max_sequence_and_generated_tokens_length: 32768 # Officially max_sequence_length + 2048
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
-
-  - name: google/gemini-pro-vision
-    model_name: google/gemini-pro-vision
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 12288
-    max_sequence_and_generated_tokens_length: 16384 # Officially max_sequence_length + 4096, in practice max_output_tokens <= 2048 for vision models
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
-
-  - name: google/gemini-1.0-pro-vision-001
-    model_name: google/gemini-1.0-pro-vision-001
-    tokenizer_name: hf-internal-testing/llama-tokenizer
-    max_sequence_length: 12288
-    max_sequence_and_generated_tokens_length: 16384
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
-
-  - name: google/gemini-1.5-flash-001
-    model_name: google/gemini-1.5-flash-001
-    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
-    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
-    # TODO: Max output tokens: 8192
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
-
-  - name: google/gemini-1.5-pro-001
-    model_name: google/gemini-1.5-pro-001
-    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
-    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
-    # TODO: Max output tokens: 8192
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
-
-  - name: google/gemini-1.5-pro-preview-0409
-    model_name: google/gemini-1.5-pro-preview-0409
-    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
-    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
-    # TODO: Max output tokens: 8192
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
-
-  - name: google/gemini-1.5-pro-preview-0514
-    model_name: google/gemini-1.5-pro-preview-0514
-    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
-    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
-    # TODO: Max output tokens: 8192
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
-
-  - name: google/gemini-1.5-flash-preview-0514
-    model_name: google/gemini-1.5-flash-preview-0514
-    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
-    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
-    # TODO: Max output tokens: 8192
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
-
-  ## Gemini with different safety settings
-  - name: google/gemini-1.5-pro-001-safety-default
-    model_name: google/gemini-1.5-pro-001-safety-default
-    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
-    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
-    # TODO: Max output tokens: 8192
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
-      args:
-        vertexai_model: gemini-1.5-pro-001
-        safety_settings_preset: default
-
-  - name: google/gemini-1.5-pro-001-safety-block-none
-    model_name: google/gemini-1.5-pro-001-safety-block-none
-    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
-    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
-    # TODO: Max output tokens: 8192
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
-      args:
-        vertexai_model: gemini-1.5-pro-001
-        safety_settings_preset: block_none
-
-  - name: google/gemini-1.5-flash-001-safety-default
-    model_name: google/gemini-1.5-flash-001-safety-default
-    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
-    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
-    # TODO: Max output tokens: 8192
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
-      args:
-        vertexai_model: gemini-1.5-flash-001
-        safety_settings_preset: default
-
-  - name: google/gemini-1.5-flash-001-safety-block-none
-    model_name: google/gemini-1.5-flash-001-safety-block-none
-    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
-    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
-    # TODO: Max output tokens: 8192
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
-      args:
-        vertexai_model: gemini-1.5-flash-001
-        safety_settings_preset: block_none
-
-  - name: google/gemini-1.5-pro-002
-    model_name: google/gemini-1.5-pro-002
-    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
-    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
-    # TODO: Max output tokens: 8192
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
-
-  - name: google/gemini-1.5-flash-002
-    model_name: google/gemini-1.5-flash-002
-    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
-    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
-    # TODO: Max output tokens: 8192
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
-
-  - name: google/gemini-2.0-pro-exp-02-05
-    model_name: google/gemini-2.0-pro-exp-02-05
-    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
-    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
-    # TODO: Max output tokens: 8192
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
-
-  - name: google/gemini-2.0-flash-exp
-    model_name: google/gemini-2.0-flash-exp
-    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
-    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
-    # TODO: Max output tokens: 8192
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
-
-  - name: google/gemini-2.0-flash-001
-    model_name: google/gemini-2.0-flash-001
-    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
-    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
-    # TODO: Max output tokens: 8192
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
-
-  - name: google/gemini-2.0-flash-lite-preview-02-05
-    model_name: google/gemini-2.0-flash-lite-preview-02-05
-    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
-    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
-    # TODO: Max output tokens: 8192
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
-
-  - name: google/gemini-2.0-flash-lite-001
-    model_name: google/gemini-2.0-flash-lite-001
-    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
-    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
-    # TODO: Max output tokens: 8192
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
-
-  - name: google/gemini-2.0-flash-thinking-exp-01-21
-    model_name: google/gemini-2.0-flash-thinking-exp-01-21
-    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
-    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
-
-  - name: google/gemini-2.5-flash-lite-preview-06-17
-    model_name: google/gemini-2.5-flash-lite-preview-06-17
-    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
-    max_sequence_length: 1048576  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash
-    # TODO: Max output tokens: 65536
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
-      args:
-        # Only the global location is supported. See:
-        # - https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash-lite
-        # - https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations#global-endpoint
-        location: global
-
-  - name: google/gemini-2.5-flash-lite
-    model_name: google/gemini-2.5-flash-lite
-    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
-    max_sequence_length: 1048576  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash
-    # TODO: Max output tokens: 65536
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
-
-  - name: google/gemini-2.5-flash-preview-04-17
-    model_name: google/gemini-2.5-flash-preview-04-17
-    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
-    max_sequence_length: 1048576  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash
-    # TODO: Max output tokens: 65536
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
-
-  - name: google/gemini-2.5-flash-preview-05-20
-    model_name: google/gemini-2.5-flash-preview-05-20
-    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
-    max_sequence_length: 1048576  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash
-    # TODO: Max output tokens: 65536
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
-
-  - name: google/gemini-2.5-flash
-    model_name: google/gemini-2.5-flash
-    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
-    max_sequence_length: 1048576  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash
-    # TODO: Max output tokens: 65536
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
-
-  - name: google/gemini-2.5-pro-exp-03-25
-    model_name: google/gemini-2.5-pro-exp-03-25
-    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
-    max_sequence_length: 1048576   # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-pro
-    # TODO: Max output tokens: 65536
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
-
-  - name: google/gemini-2.5-pro-preview-03-25
-    model_name: google/gemini-2.5-pro-preview-03-25
-    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
-    max_sequence_length: 1048576   # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-pro
-    # TODO: Max output tokens: 65536
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
-
-  - name: google/gemini-2.5-pro-preview-05-06
-    model_name: google/gemini-2.5-pro-preview-05-06
-    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
-    max_sequence_length: 1048576   # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-pro
-    # TODO: Max output tokens: 65536
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
-
-  - name: google/gemini-2.5-pro
-    model_name: google/gemini-2.5-pro
-    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
-    max_sequence_length: 1048576   # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-pro
-    # TODO: Max output tokens: 65536
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
-
-  - name: google/gemini-1.5-flash-8b-001
-    model_name: google/gemini-1.5-flash-8b-001
-    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
-    max_sequence_length: 1000000  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
-    # TODO: Max output tokens: 8192
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
-
-  - name: google/llama-3.1-8b-instruct
-    model_name: meta/llama-3.1-8b-instruct
-    tokenizer_name: meta/llama-3.1-8b-instruct
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
-      args:
-        vertexai_model: publishers/meta/models/llama-3.1-8b-instruct-maas
-
-  - name: google/llama-3.1-70b-instruct
-    model_name: meta/llama-3.1-70b-instruct
-    tokenizer_name: meta/llama-3.1-8b-instruct
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
-      args:
-        vertexai_model: publishers/meta/models/llama-3.1-70b-instruct-maas
-
-  - name: google/llama-3.1-405b-instruct
-    model_name: meta/llama-3.1-405b-instruct
-    tokenizer_name: meta/llama-3.1-8b-instruct
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
-      args:
-        vertexai_model: publishers/meta/models/llama-3.1-405b-instruct-maas
-
-  ## Gemma
-  - name: together/gemma-2b
-    model_name: google/gemma-2b
-    tokenizer_name: google/gemma-2b
-    max_sequence_length: 7167
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-
-  - name: together/gemma-2b-it
-    model_name: google/gemma-2b-it
-    tokenizer_name: google/gemma-2b
-    max_sequence_length: 7167
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-
-  - name: together/gemma-7b
-    model_name: google/gemma-7b
-    tokenizer_name: google/gemma-2b
-    max_sequence_length: 7167
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-
-  - name: together/gemma-7b-it
-    model_name: google/gemma-7b-it
-    tokenizer_name: google/gemma-2b
-    max_sequence_length: 7167
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-
-  - name: together/gemma-2-9b-it
-    model_name: google/gemma-2-9b-it
-    tokenizer_name: google/gemma-2-9b
-    max_sequence_length: 8191
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-
-  - name: together/gemma-2-27b-it
-    model_name: google/gemma-2-27b-it
-    tokenizer_name: google/gemma-2-9b
-    max_sequence_length: 8191
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-
-  ## MedLM
-  - name: google/medlm-medium
-    model_name: google/medlm-medium
-    tokenizer_name: google/text-bison@001
-    max_sequence_length: 6000 # Officially 8192
-    max_sequence_and_generated_tokens_length: 7000 # Officially 9216
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAITextClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
-
-  - name: google/medlm-large
-    model_name: google/medlm-large
-    tokenizer_name: google/text-bison@001
-    max_sequence_length: 6000 # Officially 8192
-    max_sequence_and_generated_tokens_length: 7000 # Officially 9216
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAITextClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
-
-  ## PaliGemma
-  - name: google/paligemma-3b-mix-224
-    model_name: google/paligemma-3b-mix-224
-    tokenizer_name: google/gemma-2b
-    max_sequence_length: 7167
-    client_spec:
-      class_name: "helm.clients.vision_language.paligemma_client.PaliGemmaClient"
-
-  - name: google/paligemma-3b-mix-448
-    model_name: google/paligemma-3b-mix-448
-    tokenizer_name: google/gemma-2b
-    max_sequence_length: 7167
-    client_spec:
-      class_name: "helm.clients.vision_language.paligemma_client.PaliGemmaClient"
-
-  ## PaLM 2
-  - name: google/text-bison@001
-    model_name: google/text-bison@001
-    tokenizer_name: google/text-bison@001
-    max_sequence_length: 6000 # Officially 8192
-    max_sequence_and_generated_tokens_length: 7000 # Officially 9216
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAITextClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
-
-  - name: google/text-bison@002
-    model_name: google/text-bison@002
-    tokenizer_name: google/text-bison@002
-    max_sequence_length: 6000 # Officially 8192
-    max_sequence_and_generated_tokens_length: 9216
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAITextClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
-
-  - name: google/text-bison-32k
-    model_name: google/text-bison-32k
-    tokenizer_name: google/text-bison@001
-    max_sequence_length: 32000
-    max_sequence_and_generated_tokens_length: 32000
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAITextClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
-
-
-  - name: google/text-unicorn@001
-    model_name: google/text-unicorn@001
-    tokenizer_name: google/text-unicorn@001
-    max_sequence_length: 6000 # Officially 8192
-    max_sequence_and_generated_tokens_length: 7000 # Officially 9216
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAITextClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
-
-  - name: google/code-bison@001
-    model_name: google/code-bison@001
-    tokenizer_name: google/mt5-base # TODO #2188: change to actual tokenizer
-    max_sequence_length: 6000 # Officially 6144
-    max_sequence_and_generated_tokens_length: 7000 # Officially 7168
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAITextClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
-
-  - name: google/code-bison@002
-    model_name: google/code-bison@002
-    tokenizer_name: google/mt5-base # TODO #2188: change to actual tokenizer
-    max_sequence_length: 6000 # Officially 6144
-    max_sequence_and_generated_tokens_length: 7168
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAITextClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
-
-  - name: google/code-bison-32k
-    model_name: google/code-bison-32k
-    tokenizer_name: google/mt5-base # TODO #2188: change to actual tokenizer
-    max_sequence_length: 32000
-    max_sequence_and_generated_tokens_length: 32000
-    client_spec:
-      class_name: "helm.clients.vertexai_client.VertexAITextClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
-
-  # HuggingFace
-
-  ## AI Singapore
-  - name: huggingface/sea-lion-7b
-    model_name: aisingapore/sea-lion-7b
-    tokenizer_name: aisingapore/sea-lion-7b
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        trust_remote_code: true
-
-  - name: huggingface/sea-lion-7b-instruct
-    model_name: aisingapore/sea-lion-7b-instruct
-    tokenizer_name: aisingapore/sea-lion-7b
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        trust_remote_code: true
-
-  - name: huggingface/llama3-8b-cpt-sea-lionv2-base
-    model_name: aisingapore/llama3-8b-cpt-sea-lionv2-base
-    tokenizer_name: meta/llama-3-8b-instruct
-    max_sequence_length: 8192
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        device_map: auto
-        torch_dtype: torch.bfloat16
-
-  - name: huggingface/llama3-8b-cpt-sea-lionv2.1-instruct
-    model_name: aisingapore/llama3-8b-cpt-sea-lionv2.1-instruct
-    tokenizer_name: meta/llama-3-8b-instruct
-    max_sequence_length: 8192
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        device_map: auto
-        torch_dtype: torch.bfloat16
-
-  - name: huggingface/gemma2-9b-cpt-sea-lionv3-base
-    model_name: aisingapore/gemma2-9b-cpt-sea-lionv3-base
-    tokenizer_name: google/gemma-2-9b
-    max_sequence_length: 8192
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        device_map: auto
-        torch_dtype: torch.bfloat16
-
-  - name: huggingface/gemma2-9b-cpt-sea-lionv3-instruct
-    model_name: aisingapore/gemma2-9b-cpt-sea-lionv3-instruct
-    tokenizer_name: google/gemma-2-9b
-    max_sequence_length: 8192
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        device_map: auto
-        torch_dtype: torch.bfloat16
-
-  - name: huggingface/llama3.1-8b-cpt-sea-lionv3-base
-    model_name: aisingapore/llama3.1-8b-cpt-sea-lionv3-base
-    tokenizer_name: meta/llama-3.1-8b
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        device_map: auto
-        torch_dtype: torch.bfloat16
-
-  - name: huggingface/llama3.1-8b-cpt-sea-lionv3-instruct
-    model_name: aisingapore/llama3.1-8b-cpt-sea-lionv3-instruct
-    tokenizer_name: meta/llama-3.1-8b
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        device_map: auto
-        torch_dtype: torch.bfloat16
-
-  - name: huggingface/llama3.1-70b-cpt-sea-lionv3-base
-    model_name: aisingapore/llama3.1-70b-cpt-sea-lionv3-base
-    tokenizer_name: meta/llama-3.1-8b
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        device_map: auto
-        torch_dtype: torch.bfloat16
-
-  - name: huggingface/llama3.1-70b-cpt-sea-lionv3-instruct
-    model_name: aisingapore/llama3.1-70b-cpt-sea-lionv3-instruct
-    tokenizer_name: meta/llama-3.1-8b
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        device_map: auto
-        torch_dtype: torch.bfloat16
-
-  ## Bigcode
-  - name: huggingface/santacoder
-    model_name: bigcode/santacoder
-    tokenizer_name: bigcode/santacoder
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-
-  - name: huggingface/starcoder
-    model_name: bigcode/starcoder
-    tokenizer_name: bigcode/starcoder
-    max_sequence_length: 8192
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-
-  ## Biomistral 
-
-  - name: huggingface/biomistral-7b
-    model_name: biomistral/biomistral-7b
-    tokenizer_name: mistralai/Mistral-7B-v0.1
-    max_sequence_length: 32000
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-
-  ## Databricks
-  - name: huggingface/dolly-v2-3b
-    model_name: databricks/dolly-v2-3b
-    tokenizer_name: EleutherAI/gpt-neox-20b
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-
-  - name: huggingface/dolly-v2-7b
-    model_name: databricks/dolly-v2-7b
-    tokenizer_name: EleutherAI/gpt-neox-20b
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-
-  - name: huggingface/dolly-v2-12b
-    model_name: databricks/dolly-v2-12b
-    tokenizer_name: EleutherAI/gpt-neox-20b
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-
-  ## EleutherAI
-  - name: huggingface/pythia-1b-v0
-    model_name: eleutherai/pythia-1b-v0
-    tokenizer_name: EleutherAI/gpt-neox-20b
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-
-  - name: huggingface/pythia-2.8b-v0
-    model_name: eleutherai/pythia-2.8b-v0
-    tokenizer_name: EleutherAI/gpt-neox-20b
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-
-  - name: huggingface/pythia-6.9b
-    model_name: eleutherai/pythia-6.9b
-    tokenizer_name: EleutherAI/gpt-neox-20b
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-
-  - name: huggingface/pythia-12b-v0
-    model_name: eleutherai/pythia-12b-v0
-    tokenizer_name: EleutherAI/gpt-neox-20b
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-
-  - name: huggingface/gpt-j-6b
-    model_name: eleutherai/gpt-j-6b
-    tokenizer_name: EleutherAI/gpt-j-6B
-    max_sequence_length: 2048
-    max_request_length: 2049
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-
-  - name: huggingface/gpt-neox-20b
-    model_name: eleutherai/gpt-neox-20b
-    tokenizer_name: EleutherAI/gpt-neox-20b
-    max_sequence_length: 2048
-    max_request_length: 2049
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-
-  ## Google
-  - name: huggingface/gemma-2-9b
-    model_name: google/gemma-2-9b
-    tokenizer_name: google/gemma-2-9b
-    max_sequence_length: 8192
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        device_map: auto
-        torch_dtype: torch.bfloat16
-
-  - name: huggingface/gemma-2-9b-it
-    model_name: google/gemma-2-9b-it
-    tokenizer_name: google/gemma-2-9b
-    max_sequence_length: 8192
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        device_map: auto
-        torch_dtype: torch.bfloat16
-
-  - name: huggingface/gemma-2-27b
-    model_name: google/gemma-2-27b
-    tokenizer_name: google/gemma-2-9b
-    max_sequence_length: 8192
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        device_map: auto
-        torch_dtype: torch.bfloat16
-
-  - name: huggingface/gemma-2-27b-it
-    model_name: google/gemma-2-27b-it
-    tokenizer_name: google/gemma-2-9b
-    max_sequence_length: 8192
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        device_map: auto
-        torch_dtype: torch.bfloat16
-
-  ## LMSYS
-  - name: huggingface/vicuna-7b-v1.3
-    model_name: lmsys/vicuna-7b-v1.3
-    tokenizer_name: hf-internal-testing/llama-tokenizer
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-
-  - name: huggingface/vicuna-13b-v1.3
-    model_name: lmsys/vicuna-13b-v1.3
-    tokenizer_name: hf-internal-testing/llama-tokenizer
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-
-  ## Meditron 
-
-  - name: huggingface/meditron-7b
-    model_name: epfl-llm/meditron-7b
-    tokenizer_name: meta-llama/Llama-2-7b-hf
-    max_sequence_length: 4094
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-
-  ## Meta
-  - name: huggingface/llama-3.1-8b-instruct
-    model_name: meta/llama-3.1-8b-instruct
-    tokenizer_name: meta/llama-3.1-8b-instruct
-    max_sequence_length: 131072
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: meta-llama/Llama-3.1-8B-Instruct
-
-  - name: huggingface/llama-3.2-1b-instruct
-    model_name: meta/llama-3.2-1b-instruct
-    tokenizer_name: meta/llama-3.2-1b-instruct
-    max_sequence_length: 131072
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: meta-llama/Llama-3.2-1B-Instruct
-
-  - name: huggingface/opt-175b
-    model_name: meta/opt-175b
-    tokenizer_name: facebook/opt-66b
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: facebook/opt-175b
-
-  - name: huggingface/opt-66b
-    model_name: meta/opt-66b
-    tokenizer_name: facebook/opt-66b
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: facebook/opt-66b
-
-  - name: huggingface/opt-6.7b
-    model_name: meta/opt-6.7b
-    tokenizer_name: facebook/opt-66b
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: facebook/opt-6.7b
-
-  - name: huggingface/opt-1.3b
-    model_name: meta/opt-1.3b
-    tokenizer_name: facebook/opt-66b
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: facebook/opt-1.3b
-
-  ## Microsoft
-  - name: huggingface/llava-1.5-7b-hf
-    model_name: microsoft/llava-1.5-7b-hf
-    tokenizer_name: hf-internal-testing/llama-tokenizer
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
-  
-  - name: huggingface/llava-1.5-13b-hf
-    model_name: microsoft/llava-1.5-13b-hf
-    tokenizer_name: hf-internal-testing/llama-tokenizer
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
-
-  - name: huggingface/llava-v1.6-vicuna-7b-hf
-    model_name: uw-madison/llava-v1.6-vicuna-7b-hf
-    tokenizer_name: hf-internal-testing/llama-tokenizer
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
-
-  - name: huggingface/llava-v1.6-vicuna-13b-hf
-    model_name: uw-madison/llava-v1.6-vicuna-13b-hf
-    tokenizer_name: hf-internal-testing/llama-tokenizer
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
-
-  - name: huggingface/llava-v1.6-mistral-7b-hf
-    model_name: uw-madison/llava-v1.6-mistral-7b-hf
-    tokenizer_name: hf-internal-testing/llama-tokenizer
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
-
-  - name: huggingface/llava-v1.6-34b-hf
-    model_name: uw-madison/llava-v1.6-34b-hf
-    tokenizer_name: hf-internal-testing/llama-tokenizer
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
-      
-  ## NECTEC
-  - name: huggingface/Pathumma-llm-text-1.0.0
-    model_name: nectec/Pathumma-llm-text-1.0.0
-    tokenizer_name: nectec/Pathumma-llm-text-1.0.0
-    max_sequence_length: 8192
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-
-  - name: huggingface/OpenThaiLLM-Prebuilt-7B
-    model_name: nectec/OpenThaiLLM-Prebuilt-7B
-    tokenizer_name: nectec/OpenThaiLLM-Prebuilt-7B
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        apply_chat_template: false
-    
-  ## KAIST AI
-  - name: huggingface/prometheus-vision-13b-v1.0-hf
-    model_name: kaistai/prometheus-vision-13b-v1.0-hf
-    tokenizer_name: hf-internal-testing/llama-tokenizer
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
-
-  ## OpenFlamingo
-  - name: openflamingo/OpenFlamingo-9B-vitl-mpt7b
-    model_name: openflamingo/OpenFlamingo-9B-vitl-mpt7b
-    tokenizer_name: anas-awadalla/mpt-7b
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.vision_language.open_flamingo_client.OpenFlamingoClient"
-      args:
-        checkpoint_path: "openflamingo/OpenFlamingo-9B-vitl-mpt7b"
-        tokenizer_name: "anas-awadalla-2/mpt-7b"
-        cross_attn_every_n_layers: 4
-
-  ## Marin Community
-  - name: huggingface/marin-8b-instruct
-    model_name: marin-community/marin-8b-instruct
-    tokenizer_name: marin-community/marin-8b-instruct
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        device_map: auto
-
-  - name: together/marin-8b-instruct
-    model_name: marin-community/marin-8b-instruct
-    tokenizer_name: marin-community/marin-8b-instruct
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient" 
-
-  ## Microsoft
-  - name: together/phi-2
-    model_name: microsoft/phi-2
-    tokenizer_name: microsoft/phi-2
-    max_sequence_length: 2047
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient" 
-
-  - name: huggingface/phi-3-small-8k-instruct
-    model_name: microsoft/phi-3-small-8k-instruct
-    tokenizer_name: microsoft/phi-3-small-8k-instruct
-    max_sequence_length: 8192
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        torch_dtype: auto
-        trust_remote_code: true
-
-  - name: huggingface/phi-3-medium-4k-instruct
-    model_name: microsoft/phi-3-medium-4k-instruct
-    tokenizer_name: microsoft/phi-3-medium-4k-instruct
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        device_map: auto
-        torch_dtype: auto
-
-  - name: huggingface/phi-3.5-mini-instruct-4bit
-    model_name: microsoft/phi-3.5-mini-instruct
-    tokenizer_name: microsoft/phi-3.5-mini-instruct
-    max_sequence_length: 131072
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        device_map: auto
-        torch_dtype: "float16"
-        quantization_config:
-          load_in_4bit: true
-        attn_implementation: "flash_attention_2"
-  
-  - name: huggingface/phi-3.5-mini-instruct
-    model_name: microsoft/phi-3.5-mini-instruct
-    tokenizer_name: microsoft/phi-3.5-mini-instruct
-    max_sequence_length: 131072
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-
-  - name: huggingface/phi-3.5-moe-instruct
-    model_name: microsoft/phi-3.5-moe-instruct
-    tokenizer_name: microsoft/phi-3.5-mini-instruct
-    max_sequence_length: 131072
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        device_map: auto
-        torch_dtype: auto
-
-  ## Mistral AI
-  - name: huggingface/bakLlava-v1-hf
-    model_name: mistralai/bakLlava-v1-hf
-    tokenizer_name: hf-internal-testing/llama-tokenizer
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
-
-  ## Moonshot AI
-  - name: together/kimi-k2-instruct
-    model_name: moonshotai/kimi-k2-instruct
-    tokenizer_name: moonshotai/kimi-k2-instruct
-    max_sequence_length: 131072
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-
-  ## MosaicML
-  - name: huggingface/mpt-7b
-    model_name: mosaicml/mpt-7b
-    tokenizer_name: EleutherAI/gpt-neox-20b
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: mosaicml/mpt-7b
-
-  - name: huggingface/mpt-instruct-7b
-    model_name: mosaicml/mpt-instruct-7b
-    tokenizer_name: EleutherAI/gpt-neox-20b
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: mosaicml/mpt-7b-instruct
-
-  - name: huggingface/mpt-30b
-    model_name: mosaicml/mpt-30b
-    tokenizer_name: EleutherAI/gpt-neox-20b
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-
-  - name: huggingface/mpt-instruct-30b
-    model_name: mosaicml/mpt-instruct-30b
-    tokenizer_name: EleutherAI/gpt-neox-20b
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: mosaicml/mpt-30b-instruct
-
-  ## OpenAI
-  - name: huggingface/gpt2
-    model_name: openai/gpt2
-    tokenizer_name: huggingface/gpt2
-    max_sequence_length: 1024
-    max_request_length: 1025
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: openai-community/gpt2
-
-  ## OpenThaiGPT
-  - name: huggingface/openthaigpt-1.0.0-7b-chat
-    model_name: openthaigpt/openthaigpt-1.0.0-7b-chat
-    tokenizer_name: openthaigpt/openthaigpt-1.0.0-7b-chat
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-
-  - name: huggingface/openthaigpt-1.0.0-13b-chat
-    model_name: openthaigpt/openthaigpt-1.0.0-13b-chat
-    tokenizer_name: openthaigpt/openthaigpt-1.0.0-7b-chat
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        device_map: auto
-
-  - name: huggingface/openthaigpt-1.0.0-70b-chat
-    model_name: openthaigpt/openthaigpt-1.0.0-70b-chat
-    tokenizer_name: huggingface/openthaigpt-1.0.0-7b-chat
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        device_map: auto
-
-  ## SAIL (SEA AI Lab)
-  - name: huggingface/sailor-7b
-    model_name: sail/sailor-7b
-    tokenizer_name: qwen/qwen1.5-7b
-    max_sequence_length: 32768
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        apply_chat_template: false
-
-  - name: huggingface/sailor-7b-chat
-    model_name: sail/sailor-7b-chat
-    tokenizer_name: qwen/qwen1.5-7b
-    max_sequence_length: 32768
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-
-  - name: huggingface/sailor-14b
-    model_name: sail/sailor-14b
-    tokenizer_name: qwen/qwen1.5-7b
-    max_sequence_length: 32768
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        device_map: auto
-        apply_chat_template: false
-
-  - name: huggingface/sailor-14b-chat
-    model_name: sail/sailor-14b-chat
-    tokenizer_name: qwen/qwen1.5-7b
-    max_sequence_length: 32768
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        device_map: auto
-
-  # SambaNova
-  - name: huggingface/sambalingo-thai-base
-    model_name: sambanova/sambalingo-thai-base
-    tokenizer_name: sambanova/sambalingo-thai-base
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: sambanovasystems/SambaLingo-Thai-Base
-
-  - name: huggingface/sambalingo-thai-chat
-    model_name: sambanova/sambalingo-thai-chat
-    tokenizer_name: sambanova/sambalingo-thai-base
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: sambanovasystems/SambaLingo-Thai-Base
-
-  - name: huggingface/sambalingo-thai-base-70b
-    model_name: sambanova/sambalingo-thai-base-70b
-    tokenizer_name: sambanova/sambalingo-thai-base
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: sambanovasystems/SambaLingo-Thai-Base-70B
-        device_map: auto
-
-  - name: huggingface/sambalingo-thai-chat-70b
-    model_name: sambanova/sambalingo-thai-chat-70b
-    tokenizer_name: sambanova/sambalingo-thai-base
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: sambanovasystems/SambaLingo-Thai-Base-70B
-        device_map: auto
-
-  ## SCB10X
-  - name: huggingface/typhoon-7b
-    model_name: scb10x/typhoon-7b
-    tokenizer_name: scb10x/typhoon-7b
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-
-  - name: huggingface/typhoon-v1.5-8b
-    model_name: scb10x/typhoon-v1.5-8b
-    tokenizer_name: meta/llama-3-8b
-    max_sequence_length: 8192
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-
-  - name: huggingface/typhoon-v1.5-8b-instruct
-    model_name: scb10x/typhoon-v1.5-8b-instruct
-    tokenizer_name: meta/llama-3-8b
-    max_sequence_length: 8192
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-
-  - name: huggingface/typhoon-v1.5-72b
-    model_name: scb10x/typhoon-v1.5-72b
-    tokenizer_name: qwen/qwen1.5-7b
-    max_sequence_length: 32768
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        device_map: auto
-
-  - name: huggingface/typhoon-v1.5-72b-instruct
-    model_name: scb10x/typhoon-v1.5-72b-instruct
-    tokenizer_name: qwen/qwen1.5-7b
-    max_sequence_length: 32768
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        device_map: auto
-
-  - name: huggingface/llama-3-typhoon-v1.5x-8b-instruct
-    model_name: scb10x/llama-3-typhoon-v1.5x-8b-instruct
-    tokenizer_name: meta/llama-3-8b
-    max_sequence_length: 8192
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-
-  - name: huggingface/llama-3-typhoon-v1.5x-70b-instruct
-    model_name: scb10x/llama-3-typhoon-v1.5x-70b-instruct
-    tokenizer_name: meta/llama-3-8b
-    max_sequence_length: 8192
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        device_map: auto
-
-  # Alibaba DAMO Academy
-  - name: huggingface/seallm-7b-v2
-    model_name: damo/seallm-7b-v2
-    tokenizer_name: damo/seallm-7b-v2
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: SeaLLMs/SeaLLM-7B-v2
-
-  - name: huggingface/seallm-7b-v2.5
-    model_name: damo/seallm-7b-v2.5
-    tokenizer_name: damo/seallm-7b-v2.5
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: SeaLLMs/SeaLLM-7B-v2.5
-
-  ## StabilityAI
-  - name: huggingface/stablelm-base-alpha-3b
-    model_name: stabilityai/stablelm-base-alpha-3b
-    tokenizer_name: EleutherAI/gpt-neox-20b
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-
-  - name: huggingface/stablelm-base-alpha-7b
-    model_name: stabilityai/stablelm-base-alpha-7b
-    tokenizer_name: EleutherAI/gpt-neox-20b
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-
-  # Upstage
-  - name: huggingface/solar-pro-preview-instruct
-    model_name: upstage/solar-pro-preview-instruct
-    tokenizer_name: upstage/solar-pro-preview-instruct
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        torch_dtype: auto
-        trust_remote_code: true
-
-  ## Text-to-Image Diffusion Models
-
-  - name: huggingface/dreamlike-diffusion-v1-0
-    model_name: huggingface/dreamlike-diffusion-v1-0
-    tokenizer_name: openai/clip-vit-large-patch14
-    max_sequence_length: 75
-    client_spec:
-      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
-
-  - name: huggingface/dreamlike-photoreal-v2-0
-    model_name: huggingface/dreamlike-photoreal-v2-0
-    tokenizer_name: openai/clip-vit-large-patch14
-    max_sequence_length: 75
-    client_spec:
-      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
-
-  - name: huggingface/openjourney-v1-0
-    model_name: huggingface/openjourney-v1-0
-    tokenizer_name: openai/clip-vit-large-patch14
-    max_sequence_length: 75
-    client_spec:
-      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
-
-  - name: huggingface/openjourney-v2-0
-    model_name: huggingface/openjourney-v2-0
-    tokenizer_name: openai/clip-vit-large-patch14
-    max_sequence_length: 75
-    client_spec:
-      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
-
-  - name: huggingface/redshift-diffusion
-    model_name: huggingface/redshift-diffusion
-    tokenizer_name: openai/clip-vit-large-patch14
-    max_sequence_length: 75
-    client_spec:
-      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
-
-  - name: huggingface/promptist-stable-diffusion-v1-4
-    model_name: huggingface/promptist-stable-diffusion-v1-4
-    tokenizer_name: openai/clip-vit-large-patch14
-    max_sequence_length: 75
-    client_spec:
-      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
-
-  - name: huggingface/stable-diffusion-v1-4
-    model_name: huggingface/stable-diffusion-v1-4
-    tokenizer_name: openai/clip-vit-large-patch14
-    max_sequence_length: 75
-    client_spec:
-      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
-
-  - name: huggingface/stable-diffusion-v1-5
-    model_name: huggingface/stable-diffusion-v1-5
-    tokenizer_name: openai/clip-vit-large-patch14
-    max_sequence_length: 75
-    client_spec:
-      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
-
-  - name: huggingface/stable-diffusion-v2-base
-    model_name: huggingface/stable-diffusion-v2-base
-    tokenizer_name: openai/clip-vit-large-patch14
-    max_sequence_length: 75
-    client_spec:
-      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
-
-  - name: huggingface/stable-diffusion-v2-1-base
-    model_name: huggingface/stable-diffusion-v2-1-base
-    tokenizer_name: openai/clip-vit-large-patch14
-    max_sequence_length: 75
-    client_spec:
-      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
-
-  - name: huggingface/stable-diffusion-safe-weak
-    model_name: huggingface/stable-diffusion-safe-weak
-    tokenizer_name: openai/clip-vit-large-patch14
-    max_sequence_length: 75
-    client_spec:
-      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
-
-  - name: huggingface/stable-diffusion-safe-medium
-    model_name: huggingface/stable-diffusion-safe-medium
-    tokenizer_name: openai/clip-vit-large-patch14
-    max_sequence_length: 75
-    client_spec:
-      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
-
-  - name: huggingface/stable-diffusion-safe-strong
-    model_name: huggingface/stable-diffusion-safe-strong
-    tokenizer_name: openai/clip-vit-large-patch14
-    max_sequence_length: 75
-    client_spec:
-      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
-
-  - name: huggingface/stable-diffusion-safe-max
-    model_name: huggingface/stable-diffusion-safe-max
-    tokenizer_name: openai/clip-vit-large-patch14
-    max_sequence_length: 75
-    client_spec:
-      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
-
-  - name: huggingface/vintedois-diffusion-v0-1
-    model_name: huggingface/vintedois-diffusion-v0-1
-    tokenizer_name: openai/clip-vit-large-patch14
-    max_sequence_length: 75
-    client_spec:
-      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
-
-  - name: segmind/Segmind-Vega
-    model_name: segmind/Segmind-Vega
-    tokenizer_name: openai/clip-vit-large-patch14
-    max_sequence_length: 75
-    client_spec:
-      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
-
-  - name: segmind/SSD-1B
-    model_name: segmind/SSD-1B
-    tokenizer_name: openai/clip-vit-large-patch14
-    max_sequence_length: 75
-    client_spec:
-      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
-
-  - name: stabilityai/stable-diffusion-xl-base-1.0
-    model_name: stabilityai/stable-diffusion-xl-base-1.0
-    tokenizer_name: openai/clip-vit-large-patch14
-    max_sequence_length: 75
-    client_spec:
-      class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
-
-  # HuggingFaceM4
-  - name: HuggingFaceM4/idefics2-8b
-    model_name: HuggingFaceM4/idefics2-8b
-    # From https://huggingface.co/docs/transformers/main/en/model_doc/idefics2,
-    # "constructs a IDEFICS2 processor which wraps a LLama tokenizer."
-    tokenizer_name: hf-internal-testing/llama-tokenizer
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.vision_language.huggingface_vision2seq_client.HuggingFaceVision2SeqClient"
-
-  - name: HuggingFaceM4/idefics-9b
-    model_name: HuggingFaceM4/idefics-9b
-    tokenizer_name: HuggingFaceM4/idefics-9b
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.vision_language.idefics_client.IDEFICSClient"
-
-  - name: HuggingFaceM4/idefics-9b-instruct
-    model_name: HuggingFaceM4/idefics-9b-instruct
-    tokenizer_name: HuggingFaceM4/idefics-9b-instruct
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.vision_language.idefics_client.IDEFICSClient"
-
-  - name: HuggingFaceM4/idefics-80b
-    model_name: HuggingFaceM4/idefics-80b
-    tokenizer_name: HuggingFaceM4/idefics-80b
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.vision_language.idefics_client.IDEFICSClient"
-
-  - name: HuggingFaceM4/idefics-80b-instruct
-    model_name: HuggingFaceM4/idefics-80b-instruct
-    tokenizer_name: HuggingFaceM4/idefics-80b-instruct
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.vision_language.idefics_client.IDEFICSClient"
-
-  # Lexica
-  - name: lexica/search-stable-diffusion-1.5
-    model_name: lexica/search-stable-diffusion-1.5
-    tokenizer_name: openai/clip-vit-large-patch14
-    max_sequence_length: 200
-    client_spec:
-      class_name: "helm.clients.image_generation.lexica_client.LexicaClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.image_generation.lexica_search_window_service.LexicaSearchWindowService"
-
-  # Kakao
-  - name: kakaobrain/mindall-e
-    model_name: kakaobrain/mindall-e
-    tokenizer_name: openai/clip-vit-large-patch14
-    max_sequence_length: 75
-    client_spec:
-      class_name: "helm.clients.image_generation.mindalle_client.MinDALLEClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
-
-  # Lighting AI
-  - name: lightningai/lit-gpt
-    model_name: lightningai/lit-gpt
-    tokenizer_name: lightningai/lit-gpt
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.lit_gpt_client.LitGPTClient"
-      args:
-        checkpoint_dir: "" # Path to the checkpoint directory
-        precision: bf16-true
-
-  # Mistral AI
-
-  - name: mistralai/ministral-3b-2410
-    model_name: mistralai/ministral-3b-2410
-    tokenizer_name: mistralai/Ministral-8B-Instruct-2410
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.mistral_client.MistralAIClient"
-
-  - name: mistralai/ministral-8b-2410
-    model_name: mistralai/ministral-8b-2410
-    tokenizer_name: mistralai/Ministral-8B-Instruct-2410
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.mistral_client.MistralAIClient"
-
-  - name: mistralai/mistral-small-2402
-    model_name: mistralai/mistral-small-2402
-    tokenizer_name: mistralai/Mistral-7B-v0.1
-    max_sequence_length: 32000
-    client_spec:
-      class_name: "helm.clients.mistral_client.MistralAIClient"
-
-  - name: mistralai/mistral-small-2409
-    model_name: mistralai/mistral-small-2409
-    tokenizer_name: mistralai/Mistral-7B-v0.1
-    max_sequence_length: 32000
-    client_spec:
-      class_name: "helm.clients.mistral_client.MistralAIClient"
-
-  - name: mistralai/mistral-small-2501
-    model_name: mistralai/mistral-small-2501
-    tokenizer_name: mistralai/Mistral-Small-24B-Instruct-2501
-    max_sequence_length: 32000
-    client_spec:
-      class_name: "helm.clients.mistral_client.MistralAIClient"
-
-  - name: mistralai/mistral-small-2503
-    model_name: mistralai/mistral-small-2503
-    tokenizer_name: mistralai/Mistral-Small-24B-Instruct-2501
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.mistral_client.MistralAIClient"
-
-  - name: mistralai/mistral-medium-2312
-    model_name: mistralai/mistral-medium-2312
-    tokenizer_name: mistralai/Mistral-7B-v0.1
-    max_sequence_length: 32000
-    client_spec:
-      class_name: "helm.clients.mistral_client.MistralAIClient"
-
-  - name: mistralai/mistral-medium-2505
-    model_name: mistralai/mistral-medium-2505
-    tokenizer_name: mistralai/Mistral-7B-v0.1
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.mistral_client.MistralAIClient"
-
-  - name: mistralai/mistral-large-2402
-    model_name: mistralai/mistral-large-2402
-    tokenizer_name: mistralai/Mistral-7B-v0.1
-    max_sequence_length: 32000
-    client_spec:
-      class_name: "helm.clients.mistral_client.MistralAIClient"
-
-  - name: mistralai/mistral-large-2407
-    model_name: mistralai/mistral-large-2407
-    tokenizer_name: mistralai/Mistral-Large-Instruct-2407
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.mistral_client.MistralAIClient"
-
-  - name: mistralai/mistral-large-2411
-    model_name: mistralai/mistral-large-2411
-    tokenizer_name: mistralai/Mistral-Large-Instruct-2411
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.mistral_client.MistralAIClient"
-
-  - name: mistralai/open-mistral-nemo-2407
-    model_name: mistralai/open-mistral-nemo-2407
-    tokenizer_name: mistralai/Mistral-Nemo-Base-2407
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.mistral_client.MistralAIClient"
-
-  - name: mistralai/pixtral-12b-2409
-    model_name: mistralai/pixtral-12b-2409
-    tokenizer_name: mistralai/Mistral-7B-v0.1
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.mistral_client.MistralAIClient"
-
-  - name: mistralai/pixtral-large-2411
-    model_name: mistralai/pixtral-large-2411
-    tokenizer_name: mistralai/Mistral-Large-Instruct-2407
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.mistral_client.MistralAIClient"
-
-
-  # Neurips
-  - name: neurips/local
-    model_name: neurips/local
-    tokenizer_name: neurips/local
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.http_model_client.HTTPModelClient"
-
-  # Nvidia
-  - name: nvidia/megatron-gpt2
-    model_name: nvidia/megatron-gpt2
-    tokenizer_name: huggingface/gpt2
-    max_sequence_length: 1024
-    client_spec:
-      class_name: "helm.clients.megatron_client.MegatronClient"
-
-  - name: nvidia/nemotron-4-340b-instruct
-    model_name: nvidia/nemotron-4-340b-instruct
-    tokenizer_name: nvidia/nemotron-4-340b-instruct
-    max_sequence_length: 4085
-    client_spec:
-      class_name: "helm.clients.nvidia_nim_client.NvidiaNimClient"
-
-  # OpenAI
-
-  ## GPT 3 Models
-
-  - name: openai/davinci-002
-    model_name: openai/davinci-002
-    tokenizer_name: openai/cl100k_base
-    # Claimed sequence length is 16,384 tokens but we round down to 16,000 tokens
-    # to provide a margin of error.
-    max_sequence_length: 16000
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAILegacyCompletionsClient"
-
-  - name: openai/babbage-002
-    model_name: openai/babbage-002
-    tokenizer_name: openai/cl100k_base
-    # Claimed sequence length is 16,384 tokens but we round down to 16,000 tokens
-    # to provide a margin of error.
-    max_sequence_length: 16000
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAILegacyCompletionsClient"
-
-  ## GPT 3.5 Turbo Models
-  # ChatGPT: https://openai.com/blog/chatgpt
-
-  - name: openai/gpt-3.5-turbo-instruct
-    model_name: openai/gpt-3.5-turbo-instruct
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 4096
-    max_request_length: 4097
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAILegacyCompletionsClient"
-
-  # The claimed sequence length is 4096, but as of 2023-03-07, the empirical usable
-  # sequence length is smaller at 4087 with one user input message and one assistant
-  # output message because ChatGPT uses special tokens for message roles and boundaries.
-  # We use a rounded-down sequence length of 4000 to account for these special tokens.
-  - name: openai/gpt-3.5-turbo-0301
-    model_name: openai/gpt-3.5-turbo-0301
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 4000
-    max_request_length: 4001
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-
-  # The claimed sequence length is 4096, but as of 2023-03-07, the empirical usable
-  # sequence length is smaller at 4087 with one user input message and one assistant
-  # output message because ChatGPT uses special tokens for message roles and boundaries.
-  # We use a rounded-down sequence length of 4000 to account for these special tokens.
-  - name: openai/gpt-3.5-turbo-0613
-    model_name: openai/gpt-3.5-turbo-0613
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 4000
-    max_request_length: 4001
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-
-  # Claimed length is 16,384; we round down to 16,000 for the same reasons as explained
-  # in the openai/gpt-3.5-turbo-0613 comment
-  - name: openai/gpt-3.5-turbo-16k-0613
-    model_name: openai/gpt-3.5-turbo-16k-0613
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 16000
-    max_request_length: 16001
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-
-  # Claimed length is 16,384; we round down to 16,000 for the same reasons as explained
-  # in the openai/gpt-3.5-turbo-0613 comment
-  - name: openai/gpt-3.5-turbo-1106
-    model_name: openai/gpt-3.5-turbo-1106
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 16000
-    max_request_length: 16001
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-
-  # Claimed length is 16,384; we round down to 16,000 for the same reasons as explained
-  # in the openai/gpt-3.5-turbo-0613 comment
-  - name: openai/gpt-3.5-turbo-0125
-    model_name: openai/gpt-3.5-turbo-0125
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 16000
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-
-  ## GPT 4 Models
-
-  - name: openai/gpt-4-1106-preview
-    model_name: openai/gpt-4-1106-preview
-    tokenizer_name: openai/cl100k_base
-    # According to https://help.openai.com/en/articles/8555510-gpt-4-turbo,
-    # the maximum number of output tokens for this model is 4096
-    # TODO: add max_generated_tokens_length of 4096 https://github.com/stanford-crfm/helm/issues/2098
-    max_sequence_length: 128000
-    max_request_length: 128001
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-
-  - name: openai/gpt-4-0314
-    model_name: openai/gpt-4-0314
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 8192
-    max_request_length: 8193
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-
-  - name: openai/gpt-4-32k-0314
-    model_name: openai/gpt-4-32k-0314
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 32768
-    max_request_length: 32769
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-
-  - name: openai/gpt-4-0613
-    model_name: openai/gpt-4-0613
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 8192
-    max_request_length: 8193
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-
-  - name: openai/gpt-4-32k-0613
-    model_name: openai/gpt-4-32k-0613
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 32768
-    max_request_length: 32769
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-
-  - name: openai/gpt-4-0125-preview
-    model_name: openai/gpt-4-0125-preview
-    tokenizer_name: openai/cl100k_base
-    # According to https://help.openai.com/en/articles/8555510-gpt-4-turbo,
-    # the maximum number of output tokens for this model is 4096
-    # TODO: add max_generated_tokens_length of 4096 https://github.com/stanford-crfm/helm/issues/2098
-    max_sequence_length: 128000
-    max_request_length: 128001
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-
-  - name: openai/gpt-4-turbo-2024-04-09
-    model_name: openai/gpt-4-turbo-2024-04-09
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-
-  - name: openai/gpt-4o-2024-05-13
-    model_name: openai/gpt-4o-2024-05-13
-    tokenizer_name: openai/o200k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-
-  - name: openai/gpt-4o-2024-08-06
-    model_name: openai/gpt-4o-2024-08-06
-    tokenizer_name: openai/o200k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-
-  - name: openai/gpt-4o-2024-11-20
-    model_name: openai/gpt-4o-2024-11-20
-    tokenizer_name: openai/o200k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-
-  - name: openai/gpt-4o-mini-2024-07-18
-    model_name: openai/gpt-4o-mini-2024-07-18
-    tokenizer_name: openai/o200k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-
-  - name: openai/gpt-4.1-2025-04-14
-    model_name: openai/gpt-4.1-2025-04-14
-    tokenizer_name: openai/o200k_base
-    max_sequence_length: 1047576
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-
-  - name: openai/gpt-4.1-mini-2025-04-14
-    model_name: openai/gpt-4.1-mini-2025-04-14
-    tokenizer_name: openai/o200k_base
-    max_sequence_length: 1047576
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-
-  - name: openai/gpt-4.1-nano-2025-04-14
-    model_name: openai/gpt-4.1-nano-2025-04-14
-    tokenizer_name: openai/o200k_base
-    max_sequence_length: 1047576
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-
-  - name: openai/gpt-5-2025-08-07
-    model_name: openai/gpt-5-2025-08-07
-    tokenizer_name: openai/o200k_base
-    max_sequence_length: 400000
-    client_spec:
-      class_name: "helm.clients.openai_responses_client.OpenAIResponseClient"
-
-  - name: openai/gpt-5-mini-2025-08-07
-    model_name: openai/gpt-5-mini-2025-08-07
-    tokenizer_name: openai/o200k_base
-    max_sequence_length: 400000
-    client_spec:
-      class_name: "helm.clients.openai_responses_client.OpenAIResponseClient"
-
-  - name: openai/gpt-5-nano-2025-08-07
-    model_name: openai/gpt-5-nano-2025-08-07
-    tokenizer_name: openai/o200k_base
-    max_sequence_length: 400000
-    client_spec:
-      class_name: "helm.clients.openai_responses_client.OpenAIResponseClient"
-
-  - name: openai/whisper-1_gpt-4o-2024-11-20
-    model_name: openai/whisper-1_gpt-4o-2024-11-20
-    tokenizer_name: openai/o200k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAITranscriptionThenCompletionClient"
-
-  - name: openai/gpt-4o-transcribe_gpt-4o-2024-11-20
-    model_name: openai/gpt-4o-transcribe_gpt-4o-2024-11-20
-    tokenizer_name: openai/o200k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAITranscriptionThenCompletionClient"
-
-  - name: openai/gpt-4o-mini-transcribe_gpt-4o-2024-11-20
-    model_name: openai/gpt-4o-mini-transcribe_gpt-4o-2024-11-20
-    tokenizer_name: openai/o200k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAITranscriptionThenCompletionClient"
-
-  - name: openai/gpt-4o-audio-preview-2024-10-01
-    model_name: openai/gpt-4o-audio-preview-2024-10-01
-    tokenizer_name: openai/o200k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-
-  - name: openai/gpt-4o-audio-preview-2024-12-17
-    model_name: openai/gpt-4o-audio-preview-2024-12-17
-    tokenizer_name: openai/o200k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-
-  - name: openai/gpt-4o-mini-audio-preview-2024-12-17
-    model_name: openai/gpt-4o-mini-audio-preview-2024-12-17
-    tokenizer_name: openai/o200k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-
-  - name: openai/gpt-4-vision-preview
-    model_name: openai/gpt-4-vision-preview
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 128000  # According to https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo
-    max_request_length: 128001
-    max_sequence_and_generated_tokens_length: 132096
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-
-  - name: openai/gpt-4-1106-vision-preview
-    model_name: openai/gpt-4-1106-vision-preview
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 128000  # According to https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo
-    max_request_length: 128001
-    max_sequence_and_generated_tokens_length: 132096
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-
-  ## GPT-4.5
-  - name: openai/gpt-4.5-preview-2025-02-27
-    model_name: openai/gpt-4.5-preview-2025-02-27
-    tokenizer_name: openai/o200k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-
-  ## o1 Models
-  - name: openai/o1-pro-2025-03-19
-    model_name: openai/o1-pro-2025-03-19
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.openai_responses_client.OpenAIResponseClient"
-
-  - name: openai/o1-pro-2025-03-19-low-reasoning-effort
-    model_name: openai/o1-pro-2025-03-19-low-reasoning-effort
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.openai_responses_client.OpenAIResponseClient"
-      args:
-        openai_model_name: o1-pro-2025-03-19
-        reasoning_effort: low
-
-  - name: openai/o1-pro-2025-03-19-high-reasoning-effort
-    model_name: openai/o1-pro-2025-03-19-high-reasoning-effort
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.openai_responses_client.OpenAIResponseClient"
-      args:
-        openai_model_name: o1-pro-2025-03-19
-        reasoning_effort: high
-
-  - name: openai/o1-2024-12-17
-    model_name: openai/o1-2024-12-17
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-
-  - name: openai/o1-2024-12-17-low-reasoning-effort
-    model_name: openai/o1-2024-12-17-low-reasoning-effort
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-      args:
-        openai_model_name: o1-2024-12-17
-        reasoning_effort: low
-
-  - name: openai/o1-2024-12-17-high-reasoning-effort
-    model_name: openai/o1-2024-12-17-high-reasoning-effort
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-      args:
-        openai_model_name: o1-2024-12-17
-        reasoning_effort: high
-
-  - name: openai/o1-preview-2024-09-12
-    model_name: openai/o1-preview-2024-09-12
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-
-  - name: openai/o1-mini-2024-09-12
-    model_name: openai/o1-mini-2024-09-12
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-
-  - name: openai/o3-mini-2025-01-31
-    model_name: openai/o3-mini-2025-01-31
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 200000
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-
-  - name: openai/o3-mini-2025-01-31-low-reasoning-effort
-    model_name: openai/o3-mini-2025-01-31-low-reasoning-effort
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 200000
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-      args:
-        openai_model_name: o3-mini-2025-01-31
-        reasoning_effort: low
-
-  - name: openai/o3-mini-2025-01-31-high-reasoning-effort
-    model_name: openai/o3-mini-2025-01-31-high-reasoning-effort
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 200000
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-      args:
-        openai_model_name: o3-mini-2025-01-31
-        reasoning_effort: high
-
-  - name: openai/o3-2025-04-16
-    model_name: openai/o3-2025-04-16
-    tokenizer_name: openai/cl100k_base
-    # Source: https://platform.openai.com/docs/models/o3
-    max_sequence_length: 200000
-    # TODO: max_output_tokens: 100000
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-
-  - name: openai/o3-2025-04-16-low-reasoning-effort
-    model_name: openai/o3-2025-04-16-low-reasoning-effort
-    tokenizer_name: openai/cl100k_base
-    # Source: https://platform.openai.com/docs/models/o3
-    max_sequence_length: 200000
-    # TODO: max_output_tokens: 100000
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-      args:
-        openai_model_name: o3-2025-04-16
-        reasoning_effort: low
-
-  - name: openai/o3-2025-04-16-high-reasoning-effort
-    model_name: openai/o3-2025-04-16-high-reasoning-effort
-    tokenizer_name: openai/cl100k_base
-    # Source: https://platform.openai.com/docs/models/o3
-    max_sequence_length: 200000
-    # TODO: max_output_tokens: 100000
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-      args:
-        openai_model_name: o3-2025-04-16
-        reasoning_effort: high
-
-  - name: openai/o4-mini-2025-04-16
-    model_name: openai/o4-mini-2025-04-16
-    tokenizer_name: openai/cl100k_base
-    # Source: https://platform.openai.com/docs/models/o4-mini
-    max_sequence_length: 200000
-    # TODO: max_output_tokens: 100000
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-
-  - name: openai/o4-mini-2025-04-16-low-reasoning-effort
-    model_name: openai/o4-mini-2025-04-16-low-reasoning-effort
-    tokenizer_name: openai/cl100k_base
-    # Source: https://platform.openai.com/docs/models/o4-mini
-    max_sequence_length: 200000
-    # TODO: max_output_tokens: 100000
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-      args:
-        openai_model_name: o4-mini-2025-04-16
-        reasoning_effort: low
-
-
-  - name: openai/o4-mini-2025-04-16-high-reasoning-effort
-    model_name: openai/o4-mini-2025-04-16-high-reasoning-effort
-    tokenizer_name: openai/cl100k_base
-    # Source: https://platform.openai.com/docs/models/o4-mini
-    max_sequence_length: 200000
-    # TODO: max_output_tokens: 100000
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-      args:
-        openai_model_name: o4-mini-2025-04-16
-        reasoning_effort: high
-
-
-  - name: openai/o3-pro-2025-06-10-high-reasoning-effort
-    model_name: openai/o3-pro-2025-06-10-high-reasoning-effort
-    tokenizer_name: openai/cl100k_base
-    # Source: https://platform.openai.com/docs/models/o3-pro
-    max_sequence_length: 200000
-    # TODO: max_output_tokens: 100000
-    client_spec:
-      class_name: "helm.clients.openai_responses_client.OpenAIResponseClient"
-      args:
-        openai_model_name: o3-pro-2025-06-10
-        reasoning_effort: high
-
-  ## GPT-OSS
-  - name: together/gpt-oss-20b
-    model_name: openai/gpt-oss-20b
-    tokenizer_name: openai/o200k_harmony
-    # Source: https://platform.openai.com/docs/models/gpt-oss-20b
-    max_sequence_length: 131072
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-
-  - name: together/gpt-oss-120b
-    model_name: openai/gpt-oss-120b
-    tokenizer_name: openai/o200k_harmony
-    # Source: https://platform.openai.com/docs/models/gpt-oss-120b
-    max_sequence_length: 131072
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-
-  ## Text Similarity Models
-  # OpenAI similarity embedding models: https://beta.openai.com/docs/guides/embeddings
-  # The number of parameters is guessed based on the number of parameters of the
-  # corresponding GPT-3 model.
-
-  # As of 2023-11-07, text-embedding-ada-002 is not deprecated:
-  # "We recommend using text-embedding-ada-002 for nearly all use cases."
-  # Source: https://platform.openai.com/docs/guides/embeddings/what-are-embeddings
-  - name: openai/text-embedding-ada-002
-    model_name: openai/text-embedding-ada-002
-    tokenizer_name: huggingface/gpt2
-    max_sequence_length: 2048
-    max_request_length: 2049
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-
-  # Text-to-image models
-  - name: openai/dall-e-2
-    model_name: openai/dall-e-2
-    tokenizer_name: openai/clip-vit-large-patch14
-    max_sequence_length: 1000
-    client_spec:
-      class_name: "helm.clients.image_generation.dalle2_client.DALLE2Client"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.image_generation.openai_dalle_window_service.OpenAIDALLEWindowService"
-
-  - name: openai/dall-e-3
-    model_name: openai/dall-e-3
-    tokenizer_name: openai/clip-vit-large-patch14
-    max_sequence_length: 1000
-    client_spec:
-      class_name: "helm.clients.image_generation.dalle3_client.DALLE3Client"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.image_generation.openai_dalle_window_service.OpenAIDALLEWindowService"
-
-  - name: openai/dall-e-3-natural
-    model_name: openai/dall-e-3-natural
-    tokenizer_name: openai/clip-vit-large-patch14
-    max_sequence_length: 1000
-    client_spec:
-      class_name: "helm.clients.image_generation.dalle3_client.DALLE3Client"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.image_generation.openai_dalle_window_service.OpenAIDALLEWindowService"
-
-  - name: openai/dall-e-3-hd
-    model_name: openai/dall-e-3-hd
-    tokenizer_name: openai/clip-vit-large-patch14
-    max_sequence_length: 1000
-    client_spec:
-      class_name: "helm.clients.image_generation.dalle3_client.DALLE3Client"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.image_generation.openai_dalle_window_service.OpenAIDALLEWindowService"
-
-  - name: openai/dall-e-3-hd-natural
-    model_name: openai/dall-e-3-hd-natural
-    tokenizer_name: openai/clip-vit-large-patch14
-    max_sequence_length: 1000
-    client_spec:
-      class_name: "helm.clients.image_generation.dalle3_client.DALLE3Client"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.image_generation.openai_dalle_window_service.OpenAIDALLEWindowService"
-
-  # Together
-  # The list of models served by Together changes often, to check the latest list, visit:
-  # https://docs.together.ai/docs/inference-models
-  # You can also check the playground to check that the live models are working:
-  # https://api.together.xyz/playground
-
-  ## BigScience
-  - name: together/bloom
-    deprecated: true  # Removed from Together
-    model_name: bigscience/bloom
-    tokenizer_name: bigscience/bloom
-    max_sequence_length: 2048
-    max_request_length: 2049
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-
-  - name: together/t0pp
-    deprecated: true  # Removed from Together
-    model_name: bigscience/t0pp
-    tokenizer_name: bigscience/T0pp
-    max_sequence_length: 1024
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.encoder_decoder_window_service.EncoderDecoderWindowService"
-
-  ## Google
-  - name: together/t5-11b
-    deprecated: true  # Removed from Together
-    model_name: google/t5-11b
-    tokenizer_name: google/t5-11b
-    max_sequence_length: 511
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.encoder_decoder_window_service.EncoderDecoderWindowService"
-
-  - name: together/flan-t5-xxl
-    deprecated: true  # Removed from Together
-    model_name: google/flan-t5-xxl
-    tokenizer_name: google/flan-t5-xxl
-    max_sequence_length: 511
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.encoder_decoder_window_service.EncoderDecoderWindowService"
-
-  - name: together/ul2
-    deprecated: true  # Removed from Together
-    model_name: google/ul2
-    tokenizer_name: google/ul2
-    max_sequence_length: 511
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.encoder_decoder_window_service.EncoderDecoderWindowService"
-
-  ## Meta
-  - name: together/llama-7b
-    model_name: meta/llama-7b
-    tokenizer_name: hf-internal-testing/llama-tokenizer
-    max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: huggyllama/llama-7b
-
-  - name: together/llama-13b
-    model_name: meta/llama-13b
-    tokenizer_name: hf-internal-testing/llama-tokenizer
-    max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: huggyllama/llama-13b
-
-  - name: together/llama-30b
-    model_name: meta/llama-30b
-    tokenizer_name: hf-internal-testing/llama-tokenizer
-    max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: huggyllama/llama-30b
-
-  - name: together/llama-65b
-    model_name: meta/llama-65b
-    tokenizer_name: hf-internal-testing/llama-tokenizer
-    max_sequence_length: 2047 # Subtract 1 tokens to work around a off-by-one bug in Together's input validation token counting (#2080)
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: huggyllama/llama-65b
-
-  - name: together/llama-2-7b
-    model_name: meta/llama-2-7b
-    tokenizer_name: meta-llama/Llama-2-7b-hf
-    max_sequence_length: 4094 # Subtract 2 tokens to work around a off-by-two bug in Together's token counting (#2080 and #2094)
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: togethercomputer/llama-2-7b
-
-  - name: together/llama-2-13b
-    model_name: meta/llama-2-13b
-    tokenizer_name: meta-llama/Llama-2-7b-hf
-    max_sequence_length: 4094 # Subtract 2 tokens to work around a off-by-two bug in Together's token counting (#2080 and #2094)
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: togethercomputer/llama-2-13b
-
-  - name: together/llama-2-70b
-    model_name: meta/llama-2-70b
-    tokenizer_name: meta-llama/Llama-2-7b-hf
-    max_sequence_length: 4094 # Subtract 2 tokens to work around a off-by-two bug in Together's token counting (#2080 and #2094)
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: togethercomputer/llama-2-70b
-
-  - name: together/llama-3-8b
-    model_name: meta/llama-3-8b
-    tokenizer_name: meta/llama-3-8b
-    max_sequence_length: 8191
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: meta-llama/Llama-3-8b-hf
-
-  - name: together/llama-3-8b-instruct-turbo
-    model_name: meta/llama-3-8b-instruct-turbo
-    tokenizer_name: meta/llama-3-8b
-    max_sequence_length: 8191
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: meta-llama/Meta-Llama-3-8B-Instruct-Turbo
-
-  - name: together/llama-3-8b-instruct-lite
-    model_name: meta/llama-3-8b-instruct-lite
-    tokenizer_name: meta/llama-3-8b
-    max_sequence_length: 8191
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: meta-llama/Meta-Llama-3-8B-Instruct-Lite
-
-  - name: together/llama-3-70b
-    model_name: meta/llama-3-70b
-    tokenizer_name: meta/llama-3-8b
-    max_sequence_length: 8191
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: meta-llama/Meta-Llama-3-70B
-
-  - name: together/llama-3-70b-instruct-turbo
-    model_name: meta/llama-3-70b-instruct-turbo
-    tokenizer_name: meta/llama-3-8b
-    max_sequence_length: 8191
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: meta-llama/Meta-Llama-3-70B-Instruct-Turbo
-
-  - name: together/llama-3-70b-instruct-lite
-    model_name: meta/llama-3-70b-instruct-lite
-    tokenizer_name: meta/llama-3-8b
-    max_sequence_length: 8191
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: meta-llama/Meta-Llama-3-70B-Instruct-Lite
-
-  - name: together/llama-3.1-8b-instruct-turbo
-    model_name: meta/llama-3.1-8b-instruct-turbo
-    tokenizer_name: meta/llama-3.1-8b
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-      args:
-        together_model: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
-
-  - name: together/llama-3.1-70b-instruct-turbo
-    model_name: meta/llama-3.1-70b-instruct-turbo
-    tokenizer_name: meta/llama-3.1-8b
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-      args:
-        together_model: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
-
-  - name: together/llama-3.1-405b-instruct-turbo
-    model_name: meta/llama-3.1-405b-instruct-turbo
-    tokenizer_name: meta/llama-3.1-8b
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-      args:
-        together_model: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
-
-  - name: together/llama-4-scout-17b-16e-instruct
-    model_name: meta/llama-4-scout-17b-16e-instruct
-    tokenizer_name: meta/llama-4-scout-17b-16e-instruct
-    max_sequence_length: 327680
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-      args:
-        together_model: meta-llama/Llama-4-Scout-17B-16E-Instruct
-
-  - name: together/llama-4-maverick-17b-128e-instruct-fp8
-    model_name: meta/llama-4-maverick-17b-128e-instruct-fp8
-    tokenizer_name: meta/llama-4-scout-17b-16e-instruct
-    max_sequence_length: 524288
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-      args:
-        together_model: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
-
-  - name: together/llama-3-8b-chat
-    model_name: meta/llama-3-8b-chat
-    tokenizer_name: meta/llama-3-8b-instruct
-    max_sequence_length: 8182
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-      args:
-        together_model: meta-llama/Llama-3-8b-chat-hf
-
-  - name: together/llama-3-70b-chat
-    model_name: meta/llama-3-70b-chat
-    tokenizer_name: meta/llama-3-8b-instruct
-    max_sequence_length: 8182
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-      args:
-        together_model: meta-llama/Llama-3-70b-chat-hf
-
-  - name: together/llama-3.2-3b-instruct-turbo
-    model_name: meta/llama-3.2-3b-instruct-turbo
-    tokenizer_name: meta/llama-3.2-3b-instruct
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-      args:
-        together_model: meta-llama/Llama-3.2-3B-Instruct-Turbo
-
-  - name: together/llama-3.2-11b-vision-instruct-turbo
-    model_name: meta/llama-3.2-11b-vision-instruct-turbo
-    tokenizer_name: meta/llama-3.2-11b-vision-instruct
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-      args:
-        together_model: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
-
-  - name: together/llama-3.2-90b-vision-instruct-turbo
-    model_name: meta/llama-3.2-90b-vision-instruct-turbo
-    tokenizer_name: meta/llama-3.2-11b-vision-instruct
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-      args:
-        together_model: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
-
-  - name: together/llama-3.3-70b-instruct-turbo
-    model_name: meta/llama-3.3-70b-instruct-turbo
-    tokenizer_name: meta/llama-3.3-70b-instruct
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-      args:
-        together_model: meta-llama/Llama-3.3-70B-Instruct-Turbo
-
-  - name: together/llama-guard-7b
-    model_name: meta/llama-guard-7b
-    tokenizer_name: meta-llama/Llama-2-7b-hf
-    max_sequence_length: 2047
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: meta-llama/llama-guard-7b
-
-  - name: together/llama-guard-2-8b
-    model_name: meta/llama-guard-2-8b
-    tokenizer_name: meta/llama-3-8b
-    max_sequence_length: 4094
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: meta-llama/llamaguard-2-8b
-
-  - name: together/llama-guard-3-8b
-    model_name: meta/llama-guard-3-8b
-    tokenizer_name: meta/llama-3.1-8b
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: meta-llama/Meta-Llama-Guard-3-8B
-
-  # NVIDIA  
-  - name: together/llama-3.1-nemotron-70b-instruct
-    model_name: nvidia/llama-3.1-nemotron-70b-instruct
-    tokenizer_name: nvidia/llama-3.1-nemotron-70b-instruct
-    max_sequence_length: 32768
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: nvidia/Llama-3.1-Nemotron-70B-Instruct-HF
-
-  # 01.AI
-  - name: together/yi-6b
-    model_name: 01-ai/yi-6b
-    tokenizer_name: 01-ai/Yi-6B
-    max_sequence_length: 4095
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: zero-one-ai/Yi-6B
-
-  - name: together/yi-34b
-    model_name: 01-ai/yi-34b
-    tokenizer_name: 01-ai/Yi-6B
-    max_sequence_length: 4095
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: zero-one-ai/Yi-34B
-
-  - name: together/yi-6b-chat
-    model_name: 01-ai/yi-6b-chat
-    tokenizer_name: 01-ai/Yi-6B
-    max_sequence_length: 4095
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-      args:
-        together_model: zero-one-ai/Yi-6B-Chat
-
-  - name: together/yi-34b-chat
-    model_name: 01-ai/yi-34b-chat
-    tokenizer_name: 01-ai/Yi-6B
-    max_sequence_length: 4095
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-      args:
-        together_model: zero-one-ai/Yi-34B-Chat
-
-  - name: 01-ai/yi-large
-    model_name: 01-ai/yi-large
-    tokenizer_name: 01-ai/Yi-6B  # Actual tokenizer is publicly unavailable, so use a substitute
-    max_sequence_length: 16000
-    client_spec:
-      class_name: "helm.clients.yi_client.YiChatClient"
-
-  - name: 01-ai/yi-large-preview
-    model_name: 01-ai/yi-large-preview
-    tokenizer_name: 01-ai/Yi-6B  # Actual tokenizer is publicly unavailable, so use a substitute
-    max_sequence_length: 16000
-    client_spec:
-      class_name: "helm.clients.yi_client.YiChatClient"
-
-
-  # Allen Institute for AI
-  - name: together/olmo-7b
-    model_name: allenai/olmo-7b
-    tokenizer_name: allenai/olmo-7b
-    max_sequence_length: 2047
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-
-  - name: together/olmo-7b-twin-2t
-    model_name: allenai/olmo-7b-twin-2t
-    tokenizer_name: allenai/olmo-7b
-    max_sequence_length: 2047
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-
-  - name: together/olmo-7b-instruct
-    model_name: allenai/olmo-7b-instruct
-    tokenizer_name: allenai/olmo-7b
-    max_sequence_length: 2047
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-
-  - name: huggingface/olmo-1.7-7b
-    model_name: allenai/olmo-1.7-7b
-    tokenizer_name: allenai/OLMo-1.7-7B-hf
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: allenai/OLMo-1.7-7B-hf
-
-  - name: huggingface/olmo-2-1124-7b-instruct
-    model_name: allenai/olmo-2-1124-7b-instruct
-    tokenizer_name: allenai/olmo-2-1124-7b-instruct
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        device_map: auto
-
-  - name: huggingface/olmo-2-1124-13b-instruct
-    model_name: allenai/olmo-2-1124-13b-instruct
-    tokenizer_name: allenai/olmo-2-1124-7b-instruct
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        device_map: auto
-
-  - name: huggingface/olmo-2-0325-32b-instruct
-    model_name: allenai/olmo-2-0325-32b-instruct
-    tokenizer_name: allenai/olmo-2-0325-32b-instruct
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        device_map: auto
-
-  - name: huggingface/olmoe-1b-7b-0125-instruct
-    model_name: allenai/olmoe-1b-7b-0125-instruct
-    tokenizer_name: allenai/olmoe-1b-7b-0125-instruct
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        device_map: auto
-
-  ## MistralAI
-  - name: together/mistral-7b-v0.1
-    model_name: mistralai/mistral-7b-v0.1
-    tokenizer_name: mistralai/Mistral-7B-v0.1
-    max_sequence_length: 4095 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: mistralai/Mistral-7B-v0.1
-
-  - name: together/mistral-7b-instruct-v0.1
-    model_name: mistralai/mistral-7b-instruct-v0.1
-    tokenizer_name: mistralai/Mistral-7B-Instruct-v0.1
-    max_sequence_length: 4000
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-
-  - name: together/mistral-7b-instruct-v0.2
-    model_name: mistralai/mistral-7b-instruct-v0.2
-    tokenizer_name: mistralai/Mistral-7B-Instruct-v0.2
-    max_sequence_length: 32000
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-
-  - name: huggingface/mistral-7b-instruct-v0.3
-    model_name: mistralai/mistral-7b-instruct-v0.3-hf
-    tokenizer_name: mistralai/Mistral-7B-Instruct-v0.3
-    max_sequence_length: 32000
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: mistralai/Mistral-7B-Instruct-v0.3
-
-  - name: together/mistral-7b-instruct-v0.3
-    model_name: mistralai/mistral-7b-instruct-v0.3
-    tokenizer_name: mistralai/Mistral-7B-Instruct-v0.3
-    max_sequence_length: 32000
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-      
-
-  - name: together/mixtral-8x7b-32kseqlen
-    model_name: mistralai/mixtral-8x7b-32kseqlen
-    tokenizer_name: mistralai/Mistral-7B-v0.1
-    max_sequence_length: 4095 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: mistralai/mixtral-8x7b-32kseqlen
-
-  - name: together/mixtral-8x7b-instruct-v0.1
-    model_name: mistralai/mixtral-8x7b-instruct-v0.1
-    tokenizer_name: mistralai/Mistral-7B-v0.1
-    max_sequence_length: 32767
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-
-  - name: together/mixtral-8x22b
-    model_name: mistralai/mixtral-8x22b
-    tokenizer_name: mistralai/Mistral-7B-v0.1
-    max_sequence_length: 65535
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-
-  - name: together/mixtral-8x22b-instruct-v0.1
-    model_name: mistralai/mixtral-8x22b-instruct-v0.1
-    tokenizer_name: mistralai/Mistral-7B-v0.1
-    max_sequence_length: 65535
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-
-
-  ## Snowflake
-  - name: together/snowflake-arctic-instruct
-    model_name: snowflake/snowflake-arctic-instruct
-    tokenizer_name: snowflake/snowflake-arctic-instruct
-    max_sequence_length: 4000  # Lower than 4096 because of chat tokens
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-
-  ## Stanford
-  - name: together/alpaca-7b
-    model_name: stanford/alpaca-7b
-    tokenizer_name: hf-internal-testing/llama-tokenizer
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: togethercomputer/alpaca-7b
-
-  ## Tiiuae
-  - name: together/falcon-7b
-    model_name: tiiuae/falcon-7b
-    tokenizer_name: tiiuae/falcon-7b
-    max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: togethercomputer/falcon-7b
-
-  - name: together/falcon-7b-instruct
-    model_name: tiiuae/falcon-7b-instruct
-    tokenizer_name: tiiuae/falcon-7b
-    max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: togethercomputer/falcon-7b-instruct
-
-  - name: together/falcon-40b
-    model_name: tiiuae/falcon-40b
-    tokenizer_name: tiiuae/falcon-7b
-    max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: togethercomputer/falcon-40b
-
-  - name: together/falcon-40b-instruct
-    model_name: tiiuae/falcon-40b-instruct
-    tokenizer_name: tiiuae/falcon-7b
-    max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: togethercomputer/falcon-40b-instruct
-
-  ## Together
-  # These are models fine-tuned by Together (and not simply hosted by Together).
-  - name: together/gpt-jt-6b-v1
-    model_name: together/gpt-jt-6b-v1
-    tokenizer_name: EleutherAI/gpt-j-6B
-    max_sequence_length: 2048
-    max_request_length: 2049
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: togethercomputer/GPT-JT-6B-v1
-
-  - name: together/gpt-neoxt-chat-base-20b
-    model_name: together/gpt-neoxt-chat-base-20b
-    tokenizer_name: EleutherAI/gpt-neox-20b
-    max_sequence_length: 2048
-    max_request_length: 2049
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: togethercomputer/GPT-NeoXT-Chat-Base-20B
-
-  - name: together/redpajama-incite-base-3b-v1
-    model_name: together/redpajama-incite-base-3b-v1
-    tokenizer_name: EleutherAI/gpt-neox-20b
-    max_sequence_length: 2048
-    max_request_length: 2049
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: togethercomputer/RedPajama-INCITE-Base-3B-v1
-
-  - name: together/redpajama-incite-instruct-3b-v1
-    model_name: together/redpajama-incite-instruct-3b-v1
-    tokenizer_name: EleutherAI/gpt-neox-20b
-    max_sequence_length: 2048
-    max_request_length: 2049
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: togethercomputer/RedPajama-INCITE-Instruct-3B-v1
-
-  - name: together/redpajama-incite-base-7b
-    model_name: together/redpajama-incite-base-7b
-    tokenizer_name: EleutherAI/gpt-neox-20b
-    max_sequence_length: 2048
-    max_request_length: 2049
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: togethercomputer/RedPajama-INCITE-7B-Base
-
-  - name: together/redpajama-incite-instruct-7b
-    model_name: together/redpajama-incite-instruct-7b
-    tokenizer_name: EleutherAI/gpt-neox-20b
-    max_sequence_length: 2048
-    max_request_length: 2049
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: togethercomputer/RedPajama-INCITE-7B-Instruct
-
-  ## Z.ai
-  - name: together/glm-4.5-air-fp8
-    model_name: zai-org/glm-4.5-air-fp8
-    tokenizer_name: zai-org/glm-4.5-air-fp8
-    max_sequence_length: 131072
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-      args:
-        parse_thinking: true
-
-  - name: thudm/cogview2
-    model_name: thudm/cogview2
-    tokenizer_name: openai/clip-vit-large-patch14
-    max_sequence_length: 75
-    client_spec:
-      class_name: "helm.clients.image_generation.cogview2_client.CogView2Client"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
-
-  ## Yandex
-  - name: together/yalm
-    deprecated: true  # Removed from Together
-    model_name: yandex/yalm
-    tokenizer_name: Yandex/yalm
-    max_sequence_length: 2048
-    max_request_length: 2049
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.yalm_window_service.YaLMWindowService"
-
-  # Writer
-  - name: writer/palmyra-base
-    model_name: writer/palmyra-base
-    tokenizer_name: writer/gpt2
-    max_sequence_length: 2048
-    max_sequence_and_generated_tokens_length: 2048
-    client_spec:
-      class_name: "helm.clients.palmyra_client.PalmyraClient"
-
-  - name: writer/palmyra-large
-    model_name: writer/palmyra-large
-    tokenizer_name: writer/gpt2
-    max_sequence_length: 2048
-    max_sequence_and_generated_tokens_length: 2048
-    client_spec:
-      class_name: "helm.clients.palmyra_client.PalmyraClient"
-
-  - name: writer/silk-road
-    model_name: writer/silk-road
-    tokenizer_name: writer/gpt2
-    max_sequence_length: 8192
-    max_sequence_and_generated_tokens_length: 8192
-    client_spec:
-      class_name: "helm.clients.palmyra_client.PalmyraClient"
-
-  - name: writer/palmyra-x
-    model_name: writer/palmyra-x
-    tokenizer_name: writer/gpt2
-    max_sequence_length: 8192
-    max_sequence_and_generated_tokens_length: 8192
-    client_spec:
-      class_name: "helm.clients.palmyra_client.PalmyraClient"
-
-  - name: writer/palmyra-x-v2
-    model_name: writer/palmyra-x-v2
-    tokenizer_name: writer/gpt2
-    max_sequence_length: 6000
-    max_sequence_and_generated_tokens_length: 7024
-    client_spec:
-      class_name: "helm.clients.palmyra_client.PalmyraClient"
-
-  - name: writer/palmyra-x-v3
-    model_name: writer/palmyra-x-v3
-    tokenizer_name: writer/gpt2
-    max_sequence_length: 6000
-    max_sequence_and_generated_tokens_length: 7024
-    client_spec:
-      class_name: "helm.clients.palmyra_client.PalmyraClient"
-
-  - name: writer/palmyra-x-32k
-    model_name: writer/palmyra-x-32k
-    tokenizer_name: writer/gpt2
-    max_sequence_length: 28000
-    max_sequence_and_generated_tokens_length: 30048
-    client_spec:
-      class_name: "helm.clients.palmyra_client.PalmyraClient"
-
-  - name: writer/palmyra-vision-003
-    model_name: writer/palmyra-vision-003
-    tokenizer_name: writer/gpt2
-    max_sequence_length: 2048
-    max_sequence_and_generated_tokens_length: 2048
-    client_spec:
-      class_name: "helm.clients.vision_language.palmyra_vision_client.PalmyraVisionClient"
-
-  - name: writer/palmyra-x-004
-    model_name: writer/palmyra-x-004
-    # Actual tokenizer is Llama 2, but it cannot be used in HELM due to this issue:
-    # https://github.com/stanford-crfm/helm/issues/2467
-    # Work around by using Llama 3 tokenizer for now.
-    tokenizer_name: meta/llama-3-8b
-    max_sequence_length: 8192
-    client_spec:
-      class_name: "helm.clients.palmyra_client.PalmyraChatClient"
-
-  - name: writer/palmyra-x5
-    model_name: writer/palmyra-x5
-    # See tokenizer comment for writer/palmyra-x-004
-    tokenizer_name: meta/llama-3-8b
-    max_sequence_length: 1000000
-    client_spec:
-      class_name: "helm.clients.writer_client.WriterClient"
-
-  - name: writer/palmyra-med-32k
-    model_name: writer/palmyra-med-32k
-    # Palmyra-Med uses the "<|end_of_text|>" as the end of text token, which is used by meta/llama-3-8b,
-    # rather than "<|eot_id|>", which is used by meta/llama-3-8b-instruct
-    tokenizer_name: meta/llama-3-8b
-    max_sequence_length: 32000
-    client_spec:
-      class_name: "helm.clients.palmyra_client.PalmyraChatClient"
-
-  - name: writer/palmyra-med
-    model_name: writer/palmyra-med
-    tokenizer_name: meta/llama-3-8b
-    max_sequence_length: 32000
-    client_spec:
-      class_name: "helm.clients.writer_client.WriterClient"
-
-  - name: writer/palmyra-fin-32k
-    model_name: writer/palmyra-fin-32k
-    tokenizer_name: meta/llama-3-8b-instruct
-    max_sequence_length: 32000
-    client_spec:
-      class_name: "helm.clients.palmyra_client.PalmyraChatClient"
-
-  - name: writer/palmyra-fin
-    model_name: writer/palmyra-fin
-    tokenizer_name: meta/llama-3-8b-instruct
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.palmyra_client.PalmyraChatClient"
-
-
-  # xAI
-
-  - name: xai/grok-3-beta
-    model_name: xai/grok-3-beta
-    tokenizer_name: xai/grok-3-beta
-    max_sequence_length: 131072
-    client_spec:
-      class_name: "helm.clients.grok_client.GrokChatClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
-
-  - name: xai/grok-3-mini-beta
-    model_name: xai/grok-3-mini-beta
-    tokenizer_name: xai/grok-3-mini-beta
-    max_sequence_length: 131072
-    client_spec:
-      class_name: "helm.clients.grok_client.GrokChatClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
-
-  - name: xai/grok-4-0709
-    model_name: xai/grok-4-0709
-    tokenizer_name: xai/grok-4-0709
-    max_sequence_length: 256000
-    client_spec:
-      class_name: "helm.clients.grok_client.GrokChatClient"
-    window_service_spec:
-      class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
-
-  # Qwen
-
-  - name: together/qwen-7b
-    model_name: qwen/qwen-7b
-    tokenizer_name: qwen/qwen-7b
-    max_sequence_length: 32767
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: togethercomputer/Qwen-7B
-
-  - name: together/qwen1.5-7b
-    model_name: qwen/qwen1.5-7b
-    tokenizer_name: qwen/qwen1.5-7b
-    max_sequence_length: 32767
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: Qwen/Qwen1.5-7B
-
-  - name: together/qwen1.5-14b
-    model_name: qwen/qwen1.5-14b
-    tokenizer_name: qwen/qwen1.5-7b
-    max_sequence_length: 32767
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: Qwen/Qwen1.5-14B
-
-  - name: together/qwen1.5-32b
-    model_name: qwen/qwen1.5-32b
-    tokenizer_name: qwen/qwen1.5-7b
-    max_sequence_length: 32767
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: Qwen/Qwen1.5-32B
-
-  - name: together/qwen1.5-72b
-    model_name: qwen/qwen1.5-72b
-    tokenizer_name: qwen/qwen1.5-7b
-    max_sequence_length: 32767
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherClient"
-      args:
-        together_model: Qwen/Qwen1.5-72B
-
-  - name: together/qwen1.5-7b-chat
-    model_name: qwen/qwen1.5-7b-chat
-    tokenizer_name: qwen/qwen1.5-7b
-    max_sequence_length: 32767
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-
-  - name: together/qwen1.5-14b-chat
-    model_name: qwen/qwen1.5-14b-chat
-    tokenizer_name: qwen/qwen1.5-7b
-    max_sequence_length: 32767
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-
-  - name: together/qwen1.5-32b-chat
-    model_name: qwen/qwen1.5-32b-chat
-    tokenizer_name: qwen/qwen1.5-7b
-    max_sequence_length: 32767
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-
-  - name: together/qwen1.5-72b-chat
-    model_name: qwen/qwen1.5-72b-chat
-    tokenizer_name: qwen/qwen1.5-7b
-    max_sequence_length: 32767
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-
-  - name: together/qwen1.5-110b-chat
-    model_name: qwen/qwen1.5-110b-chat
-    tokenizer_name: qwen/qwen1.5-7b
-    max_sequence_length: 32767
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-
-  - name: together/qwen2-72b-instruct
-    model_name: qwen/qwen2-72b-instruct
-    tokenizer_name: qwen/qwen2-72b-instruct
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-
-  - name: together/qwen2.5-7b-instruct-turbo
-    model_name: qwen/qwen2.5-7b-instruct-turbo
-    tokenizer_name: qwen/qwen2.5-7b-instruct
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-
-  - name: together/qwen2.5-72b-instruct-turbo
-    model_name: qwen/qwen2.5-72b-instruct-turbo
-    tokenizer_name: qwen/qwen2.5-7b-instruct
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-
-  - name: together/qwen3-235b-a22b-fp8-tput
-    model_name: qwen/qwen3-235b-a22b-fp8-tput
-    tokenizer_name: qwen/qwen3-235b-a22b
-    max_sequence_length: 40960
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-      args:
-        parse_thinking: true
-
-  - name: together/qwen3-235b-a22b-instruct-2507-fp8
-    model_name: qwen/qwen3-235b-a22b-instruct-2507-fp8
-    tokenizer_name: qwen/qwen3-235b-a22b-instruct-2507-fp8
-    max_sequence_length: 262144
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-      args:
-        together_model: Qwen/Qwen3-235B-A22B-Instruct-2507-tput
-
-  - name: huggingface/qwen2.5-7b-instruct-4bit
-    model_name: qwen/qwen2.5-7b-instruct
-    tokenizer_name: qwen/qwen2.5-7b-instruct
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: Qwen/Qwen2.5-7B-Instruct
-        torch_dtype: "float16"
-        quantization_config:
-          load_in_4bit: true
-        attn_implementation: "flash_attention_2"
-
-  - name: huggingface/qwen2.5-7b-instruct
-    model_name: qwen/qwen2.5-7b-instruct
-    tokenizer_name: qwen/qwen2.5-7b-instruct
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: Qwen/Qwen2.5-7B-Instruct
-
-  - name: huggingface/smollm2-135m
-    model_name: huggingface/smollm2-135m
-    tokenizer_name: huggingface/smollm2-135m
-    max_sequence_length: 8192
-    client_spec:
-      class_name: "helm.clients.huggingface_pipeline_client.HuggingFacePipelineClient"
-      args:
-        pretrained_model_name_or_path: HuggingFaceTB/SmolLM2-135M
-
-  - name: huggingface/smollm2-360m
-    model_name: huggingface/smollm2-360m
-    tokenizer_name: huggingface/smollm2-135m
-    max_sequence_length: 8192
-    client_spec:
-      class_name: "helm.clients.huggingface_pipeline_client.HuggingFacePipelineClient"
-      args:
-        pretrained_model_name_or_path: HuggingFaceTB/SmolLM2-360M
-
-  - name: huggingface/smollm2-1.7b
-    model_name: huggingface/smollm2-1.7b
-    tokenizer_name: huggingface/smollm2-135m
-    max_sequence_length: 8192
-    client_spec:
-      class_name: "helm.clients.huggingface_pipeline_client.HuggingFacePipelineClient"
-      args:
-        pretrained_model_name_or_path: HuggingFaceTB/SmolLM2-1.7B
-
-  - name: huggingface/smollm2-135m-instruct
-    model_name: huggingface/smollm2-135m-instruct
-    tokenizer_name: huggingface/smollm2-135m-instruct
-    max_sequence_length: 328192768
-    client_spec:
-      class_name: "helm.clients.huggingface_pipeline_client.HuggingFacePipelineClient"
-      args:
-        pretrained_model_name_or_path: HuggingFaceTB/SmolLM2-135M-Instruct
-
-  - name: huggingface/smollm2-360m-instruct
-    model_name: huggingface/smollm2-360m-instruct
-    tokenizer_name: huggingface/smollm2-135m-instruct
-    max_sequence_length: 8192
-    client_spec:
-      class_name: "helm.clients.huggingface_pipeline_client.HuggingFacePipelineClient"
-      args:
-        pretrained_model_name_or_path: HuggingFaceTB/SmolLM2-360M-Instruct
-
-  - name: huggingface/smollm2-1.7b-instruct
-    model_name: huggingface/smollm2-1.7b-instruct
-    tokenizer_name: huggingface/smollm2-135m-instruct
-    max_sequence_length: 8192
-    client_spec:
-      class_name: "helm.clients.huggingface_pipeline_client.HuggingFacePipelineClient"
-      args:
-        pretrained_model_name_or_path: HuggingFaceTB/SmolLM2-1.7B-Instruct
-
-  - name: together/qwq-32b-preview
-    model_name: qwen/qwq-32b-preview
-    tokenizer_name: qwen/qwq-32b-preview
-    max_sequence_length: 32768
-    client_spec:
-      class_name: "helm.clients.together_client.TogetherChatClient"
-
-  - name: huggingface/qwen-vl
-    model_name: qwen/qwen-vl
-    tokenizer_name: qwen/qwen-vl
-    max_sequence_length: 8191
-    client_spec:
-      class_name: "helm.clients.vision_language.qwen_vlm_client.QwenVLMClient"
-
-  - name: huggingface/qwen-vl-chat
-    model_name: qwen/qwen-vl-chat
-    tokenizer_name: qwen/qwen-vl-chat
-    max_sequence_length: 8191
-    client_spec:
-      class_name: "helm.clients.vision_language.qwen_vlm_client.QwenVLMClient"
-
-  - name: huggingface/qwen2-vl-7b-instruct
-    model_name: qwen/qwen2-vl-7b-instruct
-    tokenizer_name: qwen/qwen-vl-chat
-    max_sequence_length: 8191
-    client_spec:
-      class_name: "helm.clients.vision_language.qwen2_vlm_client.Qwen2VLMClient"
-
-  - name: huggingface/qwen2-vl-72b-instruct
-    model_name: qwen/qwen2-vl-72b-instruct
-    tokenizer_name: qwen/qwen-vl-chat
-    max_sequence_length: 8191
-    client_spec:
-      class_name: "helm.clients.vision_language.qwen2_vlm_client.Qwen2VLMClient"
-
-  - name: huggingface/qwen2.5-vl-3b-instruct
-    model_name: qwen/qwen2.5-vl-3b-instruct
-    tokenizer_name: qwen/qwen-vl-chat
-    max_sequence_length: 8191
-    client_spec:
-      class_name: "helm.clients.vision_language.qwen2_vlm_client.Qwen2VLMClient"
-
-  - name: huggingface/qwen2.5-vl-7b-instruct
-    model_name: qwen/qwen2.5-vl-7b-instruct
-    tokenizer_name: qwen/qwen-vl-chat
-    max_sequence_length: 8191
-    client_spec:
-      class_name: "helm.clients.vision_language.qwen2_vlm_client.Qwen2VLMClient"
-
-  - name: huggingface/qwen2.5-vl-32b-instruct
-    model_name: qwen/qwen2.5-vl-32b-instruct
-    tokenizer_name: qwen/qwen-vl-chat
-    max_sequence_length: 8191
-    client_spec:
-      class_name: "helm.clients.vision_language.qwen2_vlm_client.Qwen2VLMClient"
-
-  - name: huggingface/qwen2.5-vl-72b-instruct
-    model_name: qwen/qwen2.5-vl-72b-instruct
-    tokenizer_name: qwen/qwen-vl-chat
-    max_sequence_length: 8191
-    client_spec:
-      class_name: "helm.clients.vision_language.qwen2_vlm_client.Qwen2VLMClient"
-
-  - name: huggingface/qwen-audio-chat
-    model_name: qwen/qwen-audio-chat
-    tokenizer_name: qwen/qwen-audio-chat
-    max_sequence_length: 8191
-    client_spec:
-      class_name: "helm.clients.audio_language.qwen_audiolm_client.QwenAudioLMClient"
-
-  - name: huggingface/qwen2-audio-7b-instruct
-    model_name: qwen/qwen2-audio-7b-instruct
-    tokenizer_name: qwen/qwen2-audio-instruct
-    max_sequence_length: 8191
-    client_spec:
-      class_name: "helm.clients.audio_language.qwen2_audiolm_client.Qwen2AudioLMClient"
-
-  - name: huggingface/qwen2.5-omni-7b
-    model_name: qwen/qwen2.5-omni-7b
-    tokenizer_name: qwen/qwen2.5-omni-7b
-    max_sequence_length: 8191
-    client_spec:
-      class_name: "helm.clients.audio_language.qwen2_5_omni_client.Qwen2_5OmniAudioLMClient"
-
-# Reka
-  - name: reka/reka-core
-    model_name: reka/reka-core
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.reka_client.RekaClient"
-  
-  - name: reka/reka-core-20240415
-    model_name: reka/reka-core-20240415
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.reka_client.RekaClient"
-  
-  - name: reka/reka-core-20240501
-    model_name: reka/reka-core-20240501
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.reka_client.RekaClient"
-
-  - name: reka/reka-flash
-    model_name: reka/reka-flash
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.reka_client.RekaClient"
-
-  - name: reka/reka-flash-20240226
-    model_name: reka/reka-flash-20240226
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.reka_client.RekaClient"
-
-  - name: reka/reka-edge
-    model_name: reka/reka-edge
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 64000
-    client_spec:
-      class_name: "helm.clients.reka_client.RekaClient"
-
-  - name: reka/reka-edge-20240208
-    model_name: reka/reka-edge-20240208
-    tokenizer_name: openai/cl100k_base
-    max_sequence_length: 64000
-    client_spec:
-      class_name: "helm.clients.reka_client.RekaClient"
-
-  # Upstage
-  - name: upstage/solar-pro-241126
-    model_name: upstage/solar-pro-241126
-    tokenizer_name: upstage/solar-pro-preview-instruct
-    max_sequence_length: 32768
-    client_spec:
-      class_name: "helm.clients.upstage_client.UpstageChatClient"
-
-# Diva Llama
-  - name: huggingface/diva-llama
-    model_name: stanford/diva-llama
-    # TODO: Set the right tokenizer
-    tokenizer_name: meta/llama-3-8b-instruct
-    max_sequence_length: 8192
-    client_spec:
-      class_name: "helm.clients.audio_language.diva_llama_client.DivaLlamaClient"
-
-# LLaMA-Omni
-  - name: ictnlp/llama-3.1-8b-omni
-    model_name: ictnlp/llama-3.1-8b-omni
-    tokenizer_name: ictnlp/llama-3.1-8b-omni
-    max_sequence_length: 8192
-    client_spec:
-      class_name: "helm.clients.audio_language.llama_omni_client.LlamaOmniAudioLMClient"
-
-# IBM - Granite 3.0
-  - name: huggingface/granite-3.0-2b-base
-    model_name: ibm-granite/granite-3.0-2b-base
-    tokenizer_name: ibm-granite/granite-3.0-2b-base
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: ibm-granite/granite-3.0-2b-base 
-    
-  - name: huggingface/granite-3.0-2b-instruct
-    model_name: ibm-granite/granite-3.0-2b-instruct
-    tokenizer_name: ibm-granite/granite-3.0-2b-instruct
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: ibm-granite/granite-3.0-2b-instruct
-
-  - name: huggingface/granite-3.0-8b-instruct
-    model_name: ibm-granite/granite-3.0-8b-instruct
-    tokenizer_name: ibm-granite/granite-3.0-8b-instruct
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: ibm-granite/granite-3.0-8b-instruct
-  
-  - name: huggingface/granite-3.0-8b-base
-    model_name: ibm-granite/granite-3.0-8b-base
-    tokenizer_name: ibm-granite/granite-3.0-8b-base
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: ibm-granite/granite-3.0-8b-base
-
-  - name: huggingface/granite-3.0-3b-a800m-instruct
-    model_name: ibm-granite/granite-3.0-3b-a800m-instruct
-    tokenizer_name: ibm-granite/granite-3.0-3b-a800m-instruct
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: ibm-granite/granite-3.0-3b-a800m-instruct
-
-  - name: huggingface/granite-3.0-3b-a800m-base
-    model_name: ibm-granite/granite-3.0-3b-a800m-base
-    tokenizer_name: ibm-granite/granite-3.0-3b-a800m-base
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: ibm-granite/granite-3.0-3b-a800m-base
-
-  - name: huggingface/granite-3.0-1b-a400m-instruct
-    model_name: ibm-granite/granite-3.0-1b-a400m-instruct
-    tokenizer_name: ibm-granite/granite-3.0-1b-a400m-instruct
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: ibm-granite/granite-3.0-1b-a400m-instruct
-
-  - name: huggingface/granite-3.0-1b-a400m-base
-    model_name: ibm-granite/granite-3.0-1b-a400m-base
-    tokenizer_name: ibm-granite/granite-3.0-1b-a400m-base
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: ibm-granite/granite-3.0-1b-a400m-base
-
-# Maritaca AI
-  - name: huggingface/sabia-7b
-    model_name: maritaca-ai/sabia-7b
-    tokenizer_name: maritaca-ai/sabia-7b
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-    args:
-      pretrained_model_name_or_path: maritaca-ai/sabia-7b
-
-  - name: maritaca-ai/sabiazinho-3
-    model_name: maritaca-ai/sabiazinho-3
-    tokenizer_name: maritaca-ai/sabia-2-tokenizer-medium
-    max_sequence_length: 32000
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-
-  - name: maritaca-ai/sabia-3
-    model_name: maritaca-ai/sabia-3
-    tokenizer_name: maritaca-ai/sabia-2-tokenizer-medium
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-
-  - name: maritaca-ai/sabia-3.1-2025-05-08
-    model_name: maritaca-ai/sabia-3.1-2025-05-08
-    tokenizer_name: maritaca-ai/sabia-2-tokenizer-medium
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.openai_client.OpenAIClient"
-
-# Granite-3.1-8b-base
-  - name: huggingface/granite-3.1-8b-base
-    model_name: ibm-granite/granite-3.1-8b-base
-    tokenizer_name: ibm-granite/granite-3.1-8b-base
-    max_sequence_length: 128000
-    client_spec:
-        class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-        args:
-            pretrained_model_name_or_path: ibm-granite/granite-3.1-8b-base
-
-# Granite-3.1-8b-instruct
-  - name: huggingface/granite-3.1-8b-instruct
-    model_name: ibm-granite/granite-3.1-8b-instruct
-    tokenizer_name: ibm-granite/granite-3.1-8b-instruct
-    max_sequence_length: 128000
-    client_spec:
-        class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-        args:
-            pretrained_model_name_or_path: ibm-granite/granite-3.1-8b-instruct
-
-# Granite-3.1-2b-instruct
-  - name: huggingface/granite-3.1-2b-instruct
-    model_name: ibm-granite/granite-3.1-2b-instruct
-    tokenizer_name: ibm-granite/granite-3.1-2b-instruct
-    max_sequence_length: 128000
-    client_spec:
-        class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-        args:
-            pretrained_model_name_or_path: ibm-granite/granite-3.1-2b-instruct
-
-# Granite-3.1-2b-base
-  - name: huggingface/granite-3.1-2b-base
-    model_name: ibm-granite/granite-3.1-2b-base
-    tokenizer_name: ibm-granite/granite-3.1-2b-base
-    max_sequence_length: 128000
-    client_spec:
-        class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-        args:
-            pretrained_model_name_or_path: ibm-granite/granite-3.1-2b-base
-
-# Granite-3.1-3b-a800m-instruct
-  - name: huggingface/granite-3.1-3b-a800m-instruct
-    model_name: ibm-granite/granite-3.1-3b-a800m-instruct
-    tokenizer_name: ibm-granite/granite-3.1-3b-a800m-instruct
-    max_sequence_length: 128000
-    client_spec:
-        class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-        args:
-            pretrained_model_name_or_path: ibm-granite/granite-3.1-3b-a800m-instruct
-
-# Granite-3.1-3b-a800m-base
-  - name: huggingface/granite-3.1-3b-a800m-base
-    model_name: ibm-granite/granite-3.1-3b-a800m-base
-    tokenizer_name: ibm-granite/granite-3.1-3b-a800m-base
-    max_sequence_length: 128000
-    client_spec:
-        class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-        args:
-            pretrained_model_name_or_path: ibm-granite/granite-3.1-3b-a800m-base
-
-# Granite-3.1-1b-a400m-instruct
-  - name: huggingface/granite-3.1-1b-a400m-instruct
-    model_name: ibm-granite/granite-3.1-1b-a400m-instruct
-    tokenizer_name: ibm-granite/granite-3.1-1b-a400m-instruct
-    max_sequence_length: 128000
-    client_spec:
-        class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-        args:
-            pretrained_model_name_or_path: ibm-granite/granite-3.1-1b-a400m-instruct
-
-# Granite-3.1-1b-a400m-base
-  - name: huggingface/granite-3.1-1b-a400m-base
-    model_name: ibm-granite/granite-3.1-1b-a400m-base
-    tokenizer_name: ibm-granite/granite-3.1-1b-a400m-base
-    max_sequence_length: 128000
-    client_spec:
-        class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-        args:
-            pretrained_model_name_or_path: ibm-granite/granite-3.1-1b-a400m-base
-
-# DeepSeek-R1-Distill-Llama-3.1-8b
-  - name: huggingface/DeepSeek-R1-Distill-Llama-8B
-    model_name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
-    tokenizer_name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
-    max_sequence_length: 128000
-    client_spec:
-        class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-        args:
-            pretrained_model_name_or_path: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
-
-# deepseek-ai/deepseek-coder-6.7b-instruct
-  - name: huggingface/deepseek-coder-6.7b-instruct
-    model_name: deepseek-ai/deepseek-coder-6.7b-instruct
-    tokenizer_name: deepseek-ai/deepseek-coder-6.7b-instruct
-    max_sequence_length: 128000
-    client_spec:
-        class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-        args:
-            pretrained_model_name_or_path: deepseek-ai/deepseek-coder-6.7b-instruct
-
-# IBM WatsonX
-  - name: ibm/llama-3.3-70b-instruct
-    model_name: meta/llama-3.3-70b-instruct
-    tokenizer_name: meta/llama-3.3-70b-instruct
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.ibm_client.IbmChatClient"
-      args:
-        watsonx_model_name: meta-llama/llama-3-3-70b-instruct
-        region: Dallas
-
-  - name: ibm/granite-3-2b-instruct
-    model_name: ibm/granite-3.1-2b-instruct
-    tokenizer_name: ibm-granite/granite-3.1-2b-instruct
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.ibm_client.IbmChatClient"
-      args:
-        watsonx_model_name: ibm/granite-3-2b-instruct
-        region: Dallas
-
-  - name: ibm/granite-3-8b-instruct
-    model_name: ibm/granite-3.1-8b-instruct
-    tokenizer_name: ibm-granite/granite-3.1-8b-instruct
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.ibm_client.IbmChatClient"
-      args:
-        watsonx_model_name: ibm/granite-3-8b-instruct
-        region: Dallas
-
-  - name: ibm/granite-13b-instruct-v2
-    model_name: ibm/granite-13b-instruct-v2
-    tokenizer_name: EleutherAI/gpt-neox-20b
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.ibm_client.IbmTextClient"
-      args:
-        watsonx_model_name: ibm/granite-13b-instruct-v2
-        region: Dallas
-
-  - name: ibm/granite-20b-code-instruct-8k
-    model_name: ibm/granite-20b-code-instruct-8k
-    tokenizer_name: ibm-granite/granite-20b-code-instruct-8k
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.ibm_client.IbmChatClient"
-      args:
-        watsonx_model_name: ibm/granite-20b-code-instruct
-        region: Dallas
-
-  - name: ibm/granite-34b-code-instruct
-    model_name: ibm/granite-34b-code-instruct
-    tokenizer_name: ibm-granite/granite-34b-code-instruct-8k
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.ibm_client.IbmChatClient"
-      args:
-        watsonx_model_name: ibm/granite-34b-code-instruct
-        region: Dallas
-
-  - name: ibm/granite-3b-code-instruct
-    model_name: ibm/granite-3b-code-instruct
-    tokenizer_name: ibm-granite/granite-3b-code-instruct-128k
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.ibm_client.IbmTextClient"
-      args:
-        watsonx_model_name: ibm/granite-3b-code-instruct
-        region: Dallas
-
-  - name: ibm/granite-8b-code-instruct
-    model_name: ibm/granite-8b-code-instruct
-    tokenizer_name: ibm-granite/granite-8b-code-instruct-128k
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.ibm_client.IbmTextClient"
-      args:
-        watsonx_model_name: ibm/granite-8b-code-instruct
-        region: Dallas
-
-  - name: ibm/granite-3.3-8b-instruct
-    model_name: ibm/granite-3.3-8b-instruct
-    tokenizer_name: ibm/granite-3.3-8b-instruct
-    max_sequence_length: 131072
-    client_spec:
-      class_name: "helm.clients.ibm_client.IbmTextClient"
-      args:
-        watsonx_model_name: ibm/granite-3-3-8b-instruct
-        region: Dallas
-
-  # Vietnamese
-  - name: ura-hcmut/ura-llama-2.1-8b
-    model_name: ura-hcmut/ura-llama-2.1-8b
-    tokenizer_name: meta/llama-3.1-8b-instruct
-    max_sequence_length: 131072
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: ura-hcmut/ura-llama-2.1-8b
-
-  - name: ura-hcmut/ura-llama-2-8b
-    model_name: ura-hcmut/ura-llama-2-8b
-    tokenizer_name: meta/llama-3-8b-instruct
-    max_sequence_length: 8192
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: ura-hcmut/ura-llama-2-8b
-
-  - name: ura-hcmut/ura-llama-7b
-    model_name: ura-hcmut/ura-llama-7b
-    tokenizer_name: meta-llama/Llama-2-7b-hf
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: ura-hcmut/ura-llama-7b
-
-  - name: ura-hcmut/ura-llama-13b
-    model_name: ura-hcmut/ura-llama-13b
-    tokenizer_name: meta-llama/Llama-2-7b-hf
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: ura-hcmut/ura-llama-13b
-
-  - name: ura-hcmut/ura-llama-70b
-    model_name: ura-hcmut/ura-llama-70b
-    tokenizer_name: meta-llama/Llama-2-7b-hf
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: ura-hcmut/ura-llama-70b
-
-  - name: ura-hcmut/GemSUra-7B
-    model_name: ura-hcmut/GemSUra-7B
-    tokenizer_name: google/gemma-2b
-    max_sequence_length: 8192
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: ura-hcmut/GemSUra-7B
-
-  - name: ura-hcmut/GemSUra-2B
-    model_name: ura-hcmut/GemSUra-2B
-    tokenizer_name: google/gemma-2b
-    max_sequence_length: 8192
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: ura-hcmut/GemSUra-2B
-
-  - name: ura-hcmut/MixSUra
-    model_name: ura-hcmut/MixSUra
-    tokenizer_name: mistralai/Mistral-7B-v0.1
-    max_sequence_length: 32768
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: ura-hcmut/MixSUra
-
-  - name: vilm/vinallama-7b-chat
-    model_name: vilm/vinallama-7b-chat
-    tokenizer_name: vilm/vinallama-7b-chat
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: vilm/vinallama-7b-chat
-
-  - name: vilm/vinallama-2.7b-chat
-    model_name: vilm/vinallama-2.7b-chat
-    tokenizer_name: vilm/vinallama-2.7b-chat
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: vilm/vinallama-2.7b-chat
-
-  - name: vilm/vietcuna-7b-v3
-    model_name: vilm/vietcuna-7b-v3
-    tokenizer_name: vilm/vietcuna-7b-v3
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: vilm/vietcuna-7b-v3
-
-  - name: vilm/vietcuna-3b-v2
-    model_name: vilm/vietcuna-3b-v2
-    tokenizer_name: vilm/vietcuna-7b-v3
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: vilm/vietcuna-3b-v2
-
-  - name: vilm/Quyen-v0.1
-    model_name: vilm/Quyen-v0.1
-    tokenizer_name: qwen/qwen2-72b-instruct
-    max_sequence_length: 32768
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: vilm/Quyen-v0.1
-
-  - name: vilm/Quyen-Plus-v0.1
-    model_name: vilm/Quyen-Plus-v0.1
-    tokenizer_name: qwen/qwen2-72b-instruct
-    max_sequence_length: 32768
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: vilm/Quyen-Plus-v0.1
-
-  - name: vilm/Quyen-Pro-v0.1
-    model_name: vilm/Quyen-Pro-v0.1
-    tokenizer_name: qwen/qwen2-72b-instruct
-    max_sequence_length: 32768
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: vilm/Quyen-Pro-v0.1
-  
-  - name: vilm/Quyen-Pro-Max-v0.1
-    model_name: vilm/Quyen-Pro-Max-v0.1
-    tokenizer_name: qwen/qwen2-72b-instruct
-    max_sequence_length: 32768
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: vilm/Quyen-Pro-Max-v0.1
-
-  - name: vilm/Quyen-Mini-v0.1
-    model_name: vilm/Quyen-Mini-v0.1
-    tokenizer_name: qwen/qwen2-72b-instruct
-    max_sequence_length: 32768
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: vilm/Quyen-Mini-v0.1
-
-  - name: vilm/Quyen-SE-v0.1
-    model_name: vilm/Quyen-SE-v0.1
-    tokenizer_name: qwen/qwen2-72b-instruct
-    max_sequence_length: 32768
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: vilm/Quyen-SE-v0.1
-
-  - name: Viet-Mistral/Vistral-7B-Chat
-    model_name: Viet-Mistral/Vistral-7B-Chat
-    tokenizer_name: Viet-Mistral/Vistral-7B-Chat
-    max_sequence_length: 32768
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: Viet-Mistral/Vistral-7B-Chat
-
-  - name: vinai/PhoGPT-7B5-Instruct
-    model_name: vinai/PhoGPT-7B5-Instruct
-    tokenizer_name: vinai/PhoGPT-7B5-Instruct
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: vinai/PhoGPT-7B5-Instruct
-
-  - name: vinai/PhoGPT-4B-Chat
-    model_name: vinai/PhoGPT-4B-Chat
-    tokenizer_name: vinai/PhoGPT-4B-Chat
-    max_sequence_length: 8192
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: vinai/PhoGPT-4B-Chat
-
-  - name: huggingface/Gemma-3-Gaia-PT-BR-4b-it
-    model_name: CEIA-UFG/Gemma-3-Gaia-PT-BR-4b-it
-    tokenizer_name: CEIA-UFG/Gemma-3-Gaia-PT-BR-4b-it
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: CEIA-UFG/Gemma-3-Gaia-PT-BR-4b-it
-
-  - name: recogna-nlp/bode-13b-alpaca-pt-br-no-peft
-    model_name: recogna-nlp/bode-13b-alpaca-pt-br-no-peft
-    tokenizer_name: recogna-nlp/bode-13b-alpaca-pt-br-no-peft
-    max_sequence_length: 4094
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: recogna-nlp/bode-13b-alpaca-pt-br-no-peft
-
-  - name: 22h/cabrita_7b_pt_850000
-    model_name: 22h/cabrita_7b_pt_850000
-    tokenizer_name: 22h/cabrita_7b_pt_850000
-    max_sequence_length: 4094
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: 22h/cabrita_7b_pt_850000
-
-  - name: PORTULAN/gervasio-7b-portuguese-ptbr-decoder
-    model_name: PORTULAN/gervasio-7b-portuguese-ptbr-decoder
-    tokenizer_name: PORTULAN/gervasio-7b-portuguese-ptbr-decoder
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: PORTULAN/gervasio-7b-portuguese-ptbr-decoder
-
-  - name: TucanoBR/Tucano-2b4
-    model_name: TucanoBR/Tucano-2b4
-    tokenizer_name: TucanoBR/Tucano-2b4
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: TucanoBR/Tucano-2b4
-
-  - name: nicholasKluge/TeenyTinyLlama-460m
-    model_name: nicholasKluge/TeenyTinyLlama-460m
-    tokenizer_name: nicholasKluge/TeenyTinyLlama-460m
-    max_sequence_length: 2048
-    client_spec:
-      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
-      args:
-        pretrained_model_name_or_path: nicholasKluge/TeenyTinyLlama-460m
-
-  - name: openrouter/mistral-medium-3.1
-    model_name: mistralai/mistral-medium-3.1
-    tokenizer_name: mistralai/Mistral-7B-v0.1
-    max_sequence_length: 128000
-    client_spec:
-      class_name: "helm.clients.openrouter_client.OpenRouterClient"
-      args:
-        model_name: mistralai/mistral-medium-3.1
-
-        
-  - name: proxy_tuning/llama-70b-chat_none_none_1.0_logits_20
-    model_name: proxy_tuning/llama-70b-chat_none_none_1.0_logits_20
-    tokenizer_name: proxy_tuning/llama-7b-chat
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
-
-  - name: proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20
-    model_name: proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20
-    tokenizer_name: proxy_tuning/llama-7b-chat
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
-
-  - name: proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20
-    model_name: proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20
-    tokenizer_name: proxy_tuning/llama-7b-chat
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
-      
-  - name: proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20
-    model_name: proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20
-    tokenizer_name: proxy_tuning/qwen3-30b
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
-     
-  - name: proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20
-    model_name: proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20
-    tokenizer_name: proxy_tuning/qwen3-30b
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
-
-  - name: proxy_tuning/qwen3-30b_mellama-13b-base_llama-13b-base_1.0_logprobs_20
-    model_name: proxy_tuning/qwen3-30b_mellama-13b-base_llama-13b-base_1.0_logprobs_20
-    tokenizer_name: proxy_tuning/qwen3-30b
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
-
-  - name: proxy_tuning/qwen3-30b_mellama-13b-chat_mellama-13b-base_1.0_logprobs_20
-    model_name: proxy_tuning/qwen3-30b_mellama-13b-chat_mellama-13b-base_1.0_logprobs_20
-    tokenizer_name: proxy_tuning/qwen3-30b
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
-
-  - name: proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20
-    model_name: proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20
-    tokenizer_name: proxy_tuning/llama-7b-chat
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
-
-  - name: proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20
-    model_name: proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20
-    tokenizer_name: proxy_tuning/qwen3-30b
-    max_sequence_length: 4096
-    client_spec:
-      class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
\ No newline at end of file

From 7c1c4a02298f5dca49d7c5ba68de18ce5b7e6d95 Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Wed, 22 Oct 2025 12:11:00 -0700
Subject: [PATCH 18/42] Delete prod_env/model_metadata.yaml

---
 prod_env/model_metadata.yaml | 4934 ----------------------------------
 1 file changed, 4934 deletions(-)
 delete mode 100644 prod_env/model_metadata.yaml

diff --git a/prod_env/model_metadata.yaml b/prod_env/model_metadata.yaml
deleted file mode 100644
index 8ac35fe5ba6..00000000000
--- a/prod_env/model_metadata.yaml
+++ /dev/null
@@ -1,4934 +0,0 @@
-# This file defines all the models officially supported by the Helm API.
-# The model names here should match the model names in model_deployments.yaml.
-
-# If you want to add a new model, you can technically do it here but we recommend
-# you to do it in prod_env/model_metadata.yaml instead.
-
-# Follow the template of this file to add a new model. You can copy paste this to get started:
-#    # This file contains the metadata for private models
-#    models: [] # Leave empty to disable private models
-
-
-models:
-
-  - name: simple/model1
-    display_name: Simple Model 1
-    description: This is a test model.
-    creator_organization_name: Helm
-    access: open
-    release_date: 2023-01-01
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-    
-  # Adobe
-  - name: adobe/giga-gan
-    display_name: GigaGAN (1B)
-    description: GigaGAN is a GAN model that produces high-quality images extremely quickly. The model was trained on text and image pairs from LAION2B-en and COYO-700M. ([paper](https://arxiv.org/abs/2303.05511)).
-    creator_organization_name: Adobe
-    access: limited
-    num_parameters: 1000000000
-    release_date: 2023-06-22
-    tags: [TEXT_TO_IMAGE_MODEL_TAG]
-
-
-  # AI21 Labs
-  - name: ai21/j1-jumbo
-    display_name: J1-Jumbo v1 (178B)
-    description: Jurassic-1 Jumbo (178B parameters) ([docs](https://studio.ai21.com/docs/jurassic1-language-models/), [tech report](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)).
-    creator_organization_name: AI21 Labs
-    access: limited
-    num_parameters: 178000000000
-    release_date: 2021-08-11
-    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: ai21/j1-large
-    display_name: J1-Large v1 (7.5B)
-    description: Jurassic-1 Large (7.5B parameters) ([docs](https://studio.ai21.com/docs/jurassic1-language-models/), [tech report](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)).
-    creator_organization_name: AI21 Labs
-    access: limited
-    num_parameters: 7500000000
-    release_date: 2021-08-11
-    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: ai21/j1-grande
-    display_name: J1-Grande v1 (17B)
-    description: Jurassic-1 Grande (17B parameters) with a "few tweaks" to the training process ([docs](https://studio.ai21.com/docs/jurassic1-language-models/), [tech report](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)).
-    creator_organization_name: AI21 Labs
-    access: limited
-    num_parameters: 17000000000
-    release_date: 2022-05-03
-    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: ai21/j1-grande-v2-beta
-    display_name: J1-Grande v2 beta (17B)
-    description: Jurassic-1 Grande v2 beta (17B parameters)
-    creator_organization_name: AI21 Labs
-    access: limited
-    num_parameters: 17000000000
-    release_date: 2022-10-28
-    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: ai21/j2-large
-    display_name: Jurassic-2 Large (7.5B)
-    description: Jurassic-2 Large (7.5B parameters) ([docs](https://www.ai21.com/blog/introducing-j2))
-    creator_organization_name: AI21 Labs
-    access: limited
-    num_parameters: 7500000000
-    release_date: 2023-03-09
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: ai21/j2-grande
-    display_name: Jurassic-2 Grande (17B)
-    description: Jurassic-2 Grande (17B parameters) ([docs](https://www.ai21.com/blog/introducing-j2))
-    creator_organization_name: AI21 Labs
-    access: limited
-    num_parameters: 17000000000
-    release_date: 2023-03-09
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: ai21/j2-jumbo
-    display_name: Jurassic-2 Jumbo (178B)
-    description: Jurassic-2 Jumbo (178B parameters) ([docs](https://www.ai21.com/blog/introducing-j2))
-    creator_organization_name: AI21 Labs
-    access: limited
-    num_parameters: 178000000000
-    release_date: 2023-03-09
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  # TODO(1524): Change AI21 model names
-  # - j2-jumbo -> j2-ultra
-  # - j2-grande -> j2-mid
-  # - j2-large -> j2-light
-
-  - name: ai21/jamba-instruct
-    display_name: Jamba Instruct
-    description: Jamba Instruct is an instruction tuned version of Jamba, which uses a hybrid Transformer-Mamba mixture-of-experts (MoE) architecture that interleaves blocks of Transformer and Mamba layers. ([blog](https://www.ai21.com/blog/announcing-jamba-instruct))
-    creator_organization_name: AI21 Labs
-    access: limited
-    num_parameters: 52000000000
-    release_date: 2024-05-02
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: ai21/jamba-1.5-mini
-    display_name: Jamba 1.5 Mini
-    description: Jamba 1.5 Mini is a long-context, hybrid SSM-Transformer instruction following foundation model that is optimized for function calling, structured output, and grounded generation. ([blog](https://www.ai21.com/blog/announcing-jamba-model-family))
-    creator_organization_name: AI21 Labs
-    access: open
-    num_parameters: 51600000000
-    release_date: 2024-08-22
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: ai21/jamba-1.5-large
-    display_name: Jamba 1.5 Large
-    description: Jamba 1.5 Large is a long-context, hybrid SSM-Transformer instruction following foundation model that is optimized for function calling, structured output, and grounded generation. ([blog](https://www.ai21.com/blog/announcing-jamba-model-family))
-    creator_organization_name: AI21 Labs
-    access: open
-    num_parameters: 399000000000
-    release_date: 2024-08-22
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  # AI Singapore
-  - name: aisingapore/sea-lion-7b
-    display_name: SEA-LION 7B
-    description: SEA-LION is a collection of language models which has been pretrained and instruct-tuned on languages from the Southeast Asia region. It utilizes the MPT architecture and a custom SEABPETokenizer for tokenization.
-    creator_organization_name: AI Singapore
-    access: open
-    num_parameters: 7000000000
-    release_date: 2023-02-24
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: aisingapore/sea-lion-7b-instruct
-    display_name: SEA-LION 7B Instruct
-    description: SEA-LION is a collection of language models which has been pretrained and instruct-tuned on languages from the Southeast Asia region. It utilizes the MPT architecture and a custom SEABPETokenizer for tokenization.
-    creator_organization_name: AI Singapore
-    access: open
-    num_parameters: 7000000000
-    release_date: 2023-02-24
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: aisingapore/llama3-8b-cpt-sea-lionv2-base
-    display_name: Llama3 8B CPT SEA-LIONv2
-    description: Llama3 8B CPT SEA-LIONv2 is a multilingual model which was continued pre-trained on 48B additional tokens, including tokens in Southeast Asian languages.
-    creator_organization_name: AI Singapore
-    access: open
-    num_parameters: 8030000000
-    release_date: 2024-07-31
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: aisingapore/llama3-8b-cpt-sea-lionv2.1-instruct
-    display_name: Llama3 8B CPT SEA-LIONv2.1 Instruct
-    description: Llama3 8B CPT SEA-LIONv2.1 Instruct is a multilingual model which has been fine-tuned with around 100,000 English instruction-completion pairs alongside a smaller pool of around 50,000 instruction-completion pairs from other Southeast Asian languages, such as Indonesian, Thai and Vietnamese.
-    creator_organization_name: AI Singapore
-    access: open
-    num_parameters: 8030000000
-    release_date: 2024-08-21
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: aisingapore/gemma2-9b-cpt-sea-lionv3-base
-    display_name: Gemma2 9B CPT SEA-LIONv3
-    description: Gemma2 9B CPT SEA-LIONv3 Base is a multilingual model which has undergone continued pre-training on approximately 200B tokens across the 11 official Southeast Asian languages, such as English, Chinese, Vietnamese, Indonesian, Thai, Tamil, Filipino, Malay, Khmer, Lao, Burmese.
-    creator_organization_name: AI Singapore
-    access: open
-    num_parameters: 9240000000
-    release_date: 2024-10-30
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: aisingapore/gemma2-9b-cpt-sea-lionv3-instruct
-    display_name: Gemma2 9B CPT SEA-LIONv3 Instruct
-    description: Gemma2 9B CPT SEA-LIONv3 Instruct is a multilingual model which has been fine-tuned with around 500,000 English instruction-completion pairs alongside a larger pool of around 1,000,000 instruction-completion pairs from other ASEAN languages, such as Indonesian, Thai and Vietnamese.
-    creator_organization_name: AI Singapore
-    access: open
-    num_parameters: 9240000000
-    release_date: 2024-10-30
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: aisingapore/llama3.1-8b-cpt-sea-lionv3-base
-    display_name: Llama3.1 8B CPT SEA-LIONv3
-    description: Llama3.1 8B CPT SEA-LIONv3 Base is a multilingual model which has undergone continued pre-training on approximately 200B tokens across 11 SEA languages, such as Burmese, Chinese, English, Filipino, Indonesia, Khmer, Lao, Malay, Tamil, Thai and Vietnamese.
-    creator_organization_name: AI Singapore
-    access: open
-    num_parameters: 9240000000
-    release_date: 2024-12-11
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: aisingapore/llama3.1-8b-cpt-sea-lionv3-instruct
-    display_name: Llama3.1 8B CPT SEA-LIONv3 Instruct
-    description: Llama3.1 8B CPT SEA-LIONv3 Instruct is a multilingual model that has been fine-tuned in two stages on approximately 12.3M English instruction-completion pairs alongside a pool of 4.5M Southeast Asian instruction-completion pairs from SEA languages such as Indonesian, Javanese, Sundanese, Tamil, Thai and Vietnamese.
-    creator_organization_name: AI Singapore
-    access: open
-    num_parameters: 9240000000
-    release_date: 2024-12-11
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: aisingapore/llama3.1-70b-cpt-sea-lionv3-base
-    display_name: Llama3.1 70B CPT SEA-LIONv3
-    description: Llama3.1 70B CPT SEA-LIONv3 Base is a multilingual model which has undergone continued pre-training on approximately 200B tokens across 11 SEA languages, such as Burmese, Chinese, English, Filipino, Indonesia, Khmer, Lao, Malay, Tamil, Thai and Vietnamese.
-    creator_organization_name: AI Singapore
-    access: open
-    num_parameters: 70600000000
-    release_date: 2024-12-11
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: aisingapore/llama3.1-70b-cpt-sea-lionv3-instruct
-    display_name: Llama3.1 70B CPT SEA-LIONv3 Instruct
-    description: Llama3.1 70B CPT SEA-LIONv3 Instruct is a multilingual model that has been fine-tuned in two stages on approximately 12.3M English instruction-completion pairs alongside a pool of 4.5M Southeast Asian instruction-completion pairs from SEA languages such as Indonesian, Javanese, Sundanese, Tamil, Thai, and Vietnamese.
-    creator_organization_name: AI Singapore
-    access: open
-    num_parameters: 70600000000
-    release_date: 2024-12-11
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  # Aleph Alpha
-  # Aleph Alpha's Luminous models: https://docs.aleph-alpha.com/docs/introduction/luminous
-  # TODO: add Luminous World when it's released
-  - name: AlephAlpha/luminous-base
-    display_name: Luminous Base (13B)
-    description: Luminous Base (13B parameters) ([docs](https://docs.aleph-alpha.com/docs/introduction/luminous/))
-    creator_organization_name: Aleph Alpha
-    access: limited
-    num_parameters: 13000000000
-    # TODO: get exact release date
-    release_date: 2022-01-01
-    # Does not support echo
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
-
-  - name: AlephAlpha/luminous-extended
-    display_name: Luminous Extended (30B)
-    description: Luminous Extended (30B parameters) ([docs](https://docs.aleph-alpha.com/docs/introduction/luminous/))
-    creator_organization_name: Aleph Alpha
-    access: limited
-    num_parameters: 30000000000
-    release_date: 2022-01-01
-    # Does not support echo
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
-
-  - name: AlephAlpha/luminous-supreme
-    display_name: Luminous Supreme (70B)
-    description: Luminous Supreme (70B parameters) ([docs](https://docs.aleph-alpha.com/docs/introduction/luminous/))
-    creator_organization_name: Aleph Alpha
-    access: limited
-    num_parameters: 70000000000
-    release_date: 2022-01-01
-    # Does not support echo.
-    # Currently, only Luminous-extended and Luminous-base support multimodal inputs
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-  
-  # TODO: Uncomment when luminous-world is released.
-  # - name: AlephAlpha/luminous-world # Not released yet.
-  #   display_name: Luminous World (178B)
-  #   description: Luminous World (178B parameters) ([docs](https://docs.aleph-alpha.com/docs/introduction/luminous/))
-  #   creator_organization_name: Aleph Alpha
-  #   access: limited
-  #   num_parameters: TBD
-  #   release_date: TBD
-  #   # Does not support echo.
-  #   tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-  
-  - name: AlephAlpha/m-vader
-    display_name: MultiFusion (13B)
-    description: MultiFusion is a multimodal, multilingual diffusion model that extend the capabilities of Stable Diffusion v1.4 by integrating different pre-trained modules, which transfers capabilities to the downstream model ([paper](https://arxiv.org/abs/2305.15296))
-    creator_organization_name: Aleph Alpha
-    access: limited
-    num_parameters: 13000000000
-    release_date: 2023-05-24
-    tags: [TEXT_TO_IMAGE_MODEL_TAG]
-
-
-  # Amazon Nova models
-  # References for Amazon Nova models:
-  # https://aws.amazon.com/ai/generative-ai/nova/
-  - name: amazon/nova-premier-v1:0
-    display_name: Amazon Nova Premier
-    description: Amazon Nova Premier is the most capable model in the Nova family of foundation models. ([blog](https://aws.amazon.com/blogs/aws/amazon-nova-premier-our-most-capable-model-for-complex-tasks-and-teacher-for-model-distillation/))
-    creator_organization_name: Amazon
-    access: limited
-    release_date: 2025-04-30
-    tags: [NOVA_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: amazon/nova-pro-v1:0
-    display_name: Amazon Nova Pro
-    description: Amazon Nova Pro Model
-    creator_organization_name: Amazon
-    access: limited
-    release_date: 2024-12-03
-    tags: [NOVA_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: amazon/nova-lite-v1:0
-    display_name: Amazon Nova Lite
-    description: Amazon Nova Lite Model
-    creator_organization_name: Amazon
-    access: limited
-    release_date: 2024-12-03
-    tags: [NOVA_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: amazon/nova-micro-v1:0
-    display_name: Amazon Nova Micro
-    description: Amazon Nova Micro Model
-    creator_organization_name: Amazon
-    access: limited
-    release_date: 2024-12-03
-    tags: [NOVA_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  # Titan Models
-  # References for Amazon Titan models:
-  # - https://aws.amazon.com/bedrock/titan/
-  # - https://community.aws/content/2ZUVD3fkNtqEOYIa2iUJAFArS7c/family-of-titan-text-models---cli-demo
-  # - https://aws.amazon.com/about-aws/whats-new/2023/11/amazon-titan-models-express-lite-bedrock/
-  - name: amazon/titan-text-lite-v1
-    display_name: Amazon Titan Text Lite
-    description: Amazon Titan Text Lite is a lightweight, efficient model perfect for fine-tuning English-language tasks like summarization and copywriting. It caters to customers seeking a smaller, cost-effective, and highly customizable model. It supports various formats, including text generation, code generation, rich text formatting, and orchestration (agents). Key model attributes encompass fine-tuning, text generation, code generation, and rich text formatting.
-    creator_organization_name: Amazon
-    access: limited
-    release_date: 2023-11-29
-    tags: [BEDROCK_MODEL_TAG,TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
- 
-  - name: amazon/titan-text-express-v1
-    display_name: Amazon Titan Text Express
-    description: Amazon Titan Text Express, with a context length of up to 8,000 tokens, excels in advanced language tasks like open-ended text generation and conversational chat. It's also optimized for Retrieval Augmented Generation (RAG). Initially designed for English, the model offers preview multilingual support for over 100 additional languages.
-    creator_organization_name: Amazon
-    access: limited
-    release_date: 2023-11-29
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-# Mistral Models on Bedrock
-# References for Mistral on Amazon Bedrock
-# https://aws.amazon.com/bedrock/mistral/
-
-  - name: mistralai/amazon-mistral-7b-instruct-v0:2
-    display_name:  Mistral 7B Instruct on Amazon Bedrock
-    description: A 7B dense Transformer, fast-deployed and easily customisable. Small, yet powerful for a variety of use cases. Supports English and code, and a 32k context window.
-    creator_organization_name: Mistral
-    access: limited
-    release_date: 2024-03-23
-    tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: mistralai/amazon-mixtral-8x7b-instruct-v0:1
-    display_name: Mixtral 8x7B Instruct on Amazon Bedrock
-    description: A 7B sparse Mixture-of-Experts model with stronger capabilities than Mistral 7B. Uses 12B active parameters out of 45B total. Supports multiple languages, code and 32k context window.
-    creator_organization_name: Mistral
-    access: limited
-    release_date: 2023-12-11
-    tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: mistralai/amazon-mistral-large-2402-v1:0
-    display_name: Mistral Large(2402) on Amazon Bedrock
-    description: The most advanced Mistral AI Large Language model capable of handling any language task including complex multilingual reasoning, text understanding, transformation, and code generation.
-    creator_organization_name: Mistral
-    access: limited
-    release_date: 2023-07-26
-    tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: mistralai/amazon-mistral-small-2402-v1:0
-    display_name: Mistral Small on Amazon Bedrock
-    description: Mistral Small is perfectly suited for straightforward tasks that can be performed in bulk, such as classification, customer support, or text generation. It provides outstanding performance at a cost-effective price point.
-    creator_organization_name: Mistral
-    access: limited
-    release_date: 2023-02-26
-    tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: mistralai/amazon-mistral-large-2407-v1:0
-    display_name: Mistral Large(2407) on Amazon Bedrock
-    description: Mistral Large 2407 is an advanced Large Language Model (LLM) that supports dozens of languages and is trained on 80+ coding languages. It has best-in-class agentic capabilities with native function calling JSON outputting and reasoning capabilities.
-    creator_organization_name: Mistral
-    access: limited
-    release_date: 2024-07-24
-    tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-# Llama3 on Amazon Bedrock
-# References for Llama3 on Amazon Bedrock
-# https://aws.amazon.com/bedrock/llama/
-
-  - name: meta/amazon-llama3-8b-instruct-v1:0
-    display_name: Llama 3 8B Instruct on Amazon Bedrock
-    description: Meta Llama 3 is an accessible, open large language model (LLM) designed for developers, researchers, and businesses to build, experiment, and responsibly scale their generative AI ideas. Part of a foundational system, it serves as a bedrock for innovation in the global community. Ideal for limited computational power and resources, edge devices, and faster training times.
-    creator_organization_name: Meta
-    access: limited
-    release_date: 2024-04-23
-    tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: meta/amazon-llama3-70b-instruct-v1:0
-    display_name: Llama 3 70B Instruct on Amazon Bedrock
-    description: Meta Llama 3 is an accessible, open large language model (LLM) designed for developers, researchers, and businesses to build, experiment, and responsibly scale their generative AI ideas. Part of a foundational system, it serves as a bedrock for innovation in the global community. Ideal for content creation, conversational AI, language understanding, R&D, and Enterprise applications.
-    creator_organization_name: Meta
-    access: limited
-    release_date: 2024-04-23
-    tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: meta/amazon-llama3-1-405b-instruct-v1:0
-    display_name: Llama 3.1 405b Instruct on Amazon Bedrock.
-    description: Meta's Llama 3.1 offers multilingual models (8B, 70B, 405B) with 128K context, improved reasoning, and optimization for dialogue. It outperforms many open-source chat models and is designed for commercial and research use in multiple languages.
-    creator_organization_name: Meta
-    access: limited
-    release_date: 2024-07-26
-    tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: meta/amazon-llama3-1-70b-instruct-v1:0
-    display_name: Llama 3.1 70b Instruct on Amazon Bedrock.
-    description: Meta's Llama 3.1 offers multilingual models (8B, 70B, 405B) with 128K context, improved reasoning, and optimization for dialogue. It outperforms many open-source chat models and is designed for commercial and research use in multiple languages.
-    creator_organization_name: Meta
-    access: limited
-    release_date: 2024-07-26
-    tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: meta/amazon-llama3-1-8b-instruct-v1:0
-    display_name: Llama 3.1 8b Instruct on Amazon Bedrock.
-    description: Meta's Llama 3.1 offers multilingual models (8B, 70B, 405B) with 128K context, improved reasoning, and optimization for dialogue. It outperforms many open-source chat models and is designed for commercial and research use in multiple languages.
-    creator_organization_name: Meta
-    access: limited
-    release_date: 2024-07-26
-    tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  # Anthropic
-  - name: anthropic/claude-v1.3
-    display_name: Claude v1.3
-    description: A 52B parameter language model, trained using reinforcement learning from human feedback [paper](https://arxiv.org/pdf/2204.05862.pdf).
-    creator_organization_name: Anthropic
-    access: limited
-    num_parameters: 52000000000
-    release_date: 2023-03-17
-    tags: [ANTHROPIC_CLAUDE_1_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
- 
-  - name: anthropic/claude-instant-v1
-    display_name: Claude Instant V1
-    description: A lightweight version of Claude, a model trained using reinforcement learning from human feedback ([docs](https://www.anthropic.com/index/introducing-claude)).
-    creator_organization_name: Anthropic
-    access: limited
-    release_date: 2023-03-17
-    tags: [ANTHROPIC_CLAUDE_1_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: anthropic/claude-instant-1.2
-    display_name: Claude Instant 1.2
-    description: A lightweight version of Claude, a model trained using reinforcement learning from human feedback ([docs](https://www.anthropic.com/index/introducing-claude)).
-    creator_organization_name: Anthropic
-    access: limited
-    release_date: 2023-08-09
-    tags: [ANTHROPIC_CLAUDE_1_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: anthropic/claude-2.0
-    display_name: Claude 2.0
-    description: Claude 2.0 is a general purpose large language model developed by Anthropic. It uses a transformer architecture and is trained via unsupervised learning, RLHF, and Constitutional AI (including both a supervised and Reinforcement Learning (RL) phase). ([model card](https://efficient-manatee.files.svdcdn.com/production/images/Model-Card-Claude-2.pdf))
-    creator_organization_name: Anthropic
-    access: limited
-    release_date: 2023-07-11
-    tags: [ANTHROPIC_CLAUDE_2_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: anthropic/claude-2.1
-    display_name: Claude 2.1
-    description: Claude 2.1 is a general purpose large language model developed by Anthropic. It uses a transformer architecture and is trained via unsupervised learning, RLHF, and Constitutional AI (including both a supervised and Reinforcement Learning (RL) phase). ([model card](https://efficient-manatee.files.svdcdn.com/production/images/Model-Card-Claude-2.pdf))
-    creator_organization_name: Anthropic
-    access: limited
-    release_date: 2023-11-21
-    tags: [ANTHROPIC_CLAUDE_2_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: anthropic/claude-3-haiku-20240307
-    display_name: Claude 3 Haiku (20240307)
-    description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI ([blog](https://www.anthropic.com/news/claude-3-family)).
-    creator_organization_name: Anthropic
-    access: limited
-    release_date: 2024-03-13  # https://www.anthropic.com/news/claude-3-haiku
-    tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: anthropic/claude-3-sonnet-20240229
-    display_name: Claude 3 Sonnet (20240229)
-    description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI ([blog](https://www.anthropic.com/news/claude-3-family)).
-    creator_organization_name: Anthropic
-    access: limited
-    release_date: 2024-03-04  # https://www.anthropic.com/news/claude-3-family
-    tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: anthropic/claude-3-opus-20240229
-    display_name: Claude 3 Opus (20240229)
-    description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI ([blog](https://www.anthropic.com/news/claude-3-family)).
-    access: limited
-    creator_organization_name: Anthropic
-    release_date: 2024-03-04  # https://www.anthropic.com/news/claude-3-family
-    tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: anthropic/claude-3-5-haiku-20241022
-    display_name: Claude 3.5 Haiku (20241022)
-    description: Claude 3.5 Haiku is a Claude 3 family model which matches the performance of Claude 3 Opus at a similar speed to the previous generation of Haiku ([blog](https://www.anthropic.com/news/3-5-models-and-computer-use)).
-    creator_organization_name: Anthropic
-    access: limited
-    release_date: 2024-11-04  # Released after the blog post
-    tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: anthropic/claude-3-5-sonnet-20240620
-    display_name: Claude 3.5 Sonnet (20240620)
-    description: Claude 3.5 Sonnet is a Claude 3 family model which outperforms Claude 3 Opus while operating faster and at a lower cost. ([blog](https://www.anthropic.com/news/claude-3-5-sonnet))
-    creator_organization_name: Anthropic
-    access: limited
-    release_date: 2024-06-20
-    tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: anthropic/claude-3-5-sonnet-20241022
-    display_name: Claude 3.5 Sonnet (20241022)
-    description: Claude 3.5 Sonnet is a Claude 3 family model which outperforms Claude 3 Opus while operating faster and at a lower cost ([blog](https://www.anthropic.com/news/claude-3-5-sonnet)). This is an upgraded snapshot released on 2024-10-22 ([blog](https://www.anthropic.com/news/3-5-models-and-computer-use)).
-    creator_organization_name: Anthropic
-    access: limited
-    release_date: 2024-10-22
-    tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: anthropic/claude-3-7-sonnet-20250219
-    display_name: Claude 3.7 Sonnet (20250219)
-    description: Claude 3.7 Sonnet is a Claude 3 family hybrid reasoning model that can produce near-instant responses or extended, step-by-step thinking that is made visible to the user ([blog](https://www.anthropic.com/news/claude-3-7-sonnet)).
-    creator_organization_name: Anthropic
-    access: limited
-    release_date: 2025-02-24
-    tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: anthropic/claude-3-7-sonnet-20250219-thinking-10k
-    display_name: Claude 3.7 Sonnet (20250219, extended thinking)
-    description: Claude 3.7 Sonnet is a Claude 3 family hybrid reasoning model that can produce near-instant responses or extended, step-by-step thinking that is made visible to the user ([blog](https://www.anthropic.com/news/claude-3-7-sonnet)). Extended thinking is enabled with 10k budget tokens.
-    creator_organization_name: Anthropic
-    access: limited
-    release_date: 2025-02-24
-    tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: anthropic/claude-sonnet-4-20250514
-    display_name: Claude 4 Sonnet (20250514)
-    description: Claude 4 Sonnet is a hybrid model offering two modes - near-instant responses and extended thinking for deeper reasoning ([blog](https://www.anthropic.com/news/claude-4)).
-    creator_organization_name: Anthropic
-    access: limited
-    release_date: 2025-05-14
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: anthropic/claude-sonnet-4-20250514-thinking-10k
-    display_name: Claude 4 Sonnet (20250514, extended thinking)
-    description: Claude 4 Sonnet is a hybrid model offering two modes - near-instant responses and extended thinking for deeper reasoning ([blog](https://www.anthropic.com/news/claude-4)). Extended thinking is enabled with 10k budget tokens.
-    creator_organization_name: Anthropic
-    access: limited
-    release_date: 2025-05-14
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: anthropic/claude-opus-4-20250514
-    display_name: Claude 4 Opus (20250514)
-    description: Claude 4 Opus is a hybrid model offering two modes - near-instant responses and extended thinking for deeper reasoning ([blog](https://www.anthropic.com/news/claude-4)).
-    creator_organization_name: Anthropic
-    access: limited
-    release_date: 2025-05-14
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: anthropic/claude-opus-4-20250514-thinking-10k
-    display_name: Claude 4 Opus (20250514, extended thinking)
-    description: Claude 4 Opus is a hybrid model offering two modes - near-instant responses and extended thinking for deeper reasoning ([blog](https://www.anthropic.com/news/claude-4)). Extended thinking is enabled with 10k budget tokens.
-    creator_organization_name: Anthropic
-    access: limited
-    release_date: 2025-05-14
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: anthropic/stanford-online-all-v4-s3
-    display_name: Anthropic-LM v4-s3 (52B)
-    description: A 52B parameter language model, trained using reinforcement learning from human feedback [paper](https://arxiv.org/pdf/2204.05862.pdf).
-    creator_organization_name: Anthropic
-    access: closed
-    num_parameters: 52000000000
-    release_date: 2021-12-01
-    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG]
-
-
-
-  # Berkeley
-  - name: berkeley/koala-13b # NOT SUPPORTED
-    display_name: Koala (13B)
-    description: Koala (13B) is a chatbot fine-tuned from Llama (13B) on dialogue data gathered from the web. ([blog post](https://bair.berkeley.edu/blog/2023/04/03/koala/))
-    creator_organization_name: UC Berkeley
-    access: open
-    num_parameters: 13000000000
-    release_date: 2022-04-03
-    tags: [DEPRECATED_MODEL_TAG] # TODO: add tags
-
-
-
-  # BigScience
-  - name: bigscience/bloom
-    display_name: BLOOM (176B)
-    description: BLOOM (176B parameters) is an autoregressive model trained on 46 natural languages and 13 programming languages ([paper](https://arxiv.org/pdf/2211.05100.pdf)).
-    creator_organization_name: BigScience
-    access: open
-    num_parameters: 176000000000
-    release_date: 2022-06-28
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG]
-
-  - name: bigscience/bloomz # NOT SUPPORTED
-    display_name: BLOOMZ (176B)
-    description: BLOOMZ (176B parameters) is BLOOM that has been fine-tuned on natural language instructions ([details](https://huggingface.co/bigscience/bloomz)).
-    creator_organization_name: BigScience
-    access: open
-    num_parameters: 176000000000
-    release_date: 2022-11-03
-    tags: [DEPRECATED_MODEL_TAG] # TODO: add tags
-
-  - name: bigscience/t0pp
-    display_name: T0pp (11B)
-    description: T0pp (11B parameters) is an encoder-decoder model trained on a large set of different tasks specified in natural language prompts ([paper](https://arxiv.org/pdf/2110.08207.pdf)).
-    creator_organization_name: BigScience
-    access: open
-    num_parameters: 11000000000
-    release_date: 2021-10-15
-    # Does not support echo.
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, NO_NEWLINES_TAG]
-
-
-
-  # BigCode
-  - name: bigcode/santacoder
-    display_name: SantaCoder (1.1B)
-    description: SantaCoder (1.1B parameters) model trained on the Python, Java, and JavaScript subset of The Stack (v1.1) ([model card](https://huggingface.co/bigcode/santacoder)).
-    creator_organization_name: BigCode
-    access: open
-    num_parameters: 1100000000
-    release_date: 2023-01-09 # ArXiv submission date
-    tags: [CODE_MODEL_TAG]
-
-  - name: bigcode/starcoder
-    display_name: StarCoder (15.5B)
-    description: The StarCoder (15.5B parameter) model trained on 80+ programming languages from The Stack (v1.2) ([model card](https://huggingface.co/bigcode/starcoder)).
-    creator_organization_name: BigCode
-    access: open
-    num_parameters: 15500000000
-    release_date: 2023-05-09 # ArXiv submission date
-    tags: [CODE_MODEL_TAG]
-
-  # BioMistral
-
-  - name: biomistral/biomistral-7b
-    display_name: BioMistral (7B)
-    description: BioMistral 7B is an open-source LLM tailored for the biomedical domain, utilizing Mistral as its foundation model and further pre-trained on PubMed Central.
-    creator_organization_name: BioMistral
-    access: open
-    num_parameters: 7300000000
-    release_date: 2024-02-15
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-
-
-
-  # Cerebras Systems
-  - name: cerebras/cerebras-gpt-6.7b # NOT SUPPORTED
-    display_name: Cerebras GPT (6.7B)
-    description: Cerebras GPT is a family of open compute-optimal language models scaled from 111M to 13B parameters trained on the Eleuther Pile. ([paper](https://arxiv.org/pdf/2304.03208.pdf))
-    creator_organization_name: Cerebras
-    access: limited
-    num_parameters: 6700000000
-    release_date: 2023-04-06
-    tags: [DEPRECATED_MODEL_TAG] # TODO: add tags
-
-  - name: cerebras/cerebras-gpt-13b # NOT SUPPORTED
-    display_name: Cerebras GPT (13B)
-    description: Cerebras GPT is a family of open compute-optimal language models scaled from 111M to 13B parameters trained on the Eleuther Pile. ([paper](https://arxiv.org/pdf/2304.03208.pdf))
-    creator_organization_name: Cerebras
-    access: limited
-    num_parameters: 13000000000
-    release_date: 2023-04-06
-    tags: [DEPRECATED_MODEL_TAG] # TODO: add tags
-
-
-
-  # Cohere
-  # Model versioning and the possible versions are not documented here:
-  # https://docs.cohere.ai/generate-reference#model-optional.
-  # So, instead, we got the names of the models from the Cohere Playground.
-  #
-  # Note that their tokenizer and model were trained on English text and
-  # they do not have a dedicated decode API endpoint, so the adaptation
-  # step for language modeling fails for certain Scenarios:
-  # the_pile:subset=ArXiv
-  # the_pile:subset=Github
-  # the_pile:subset=PubMed Central
-
-  # TODO: Consider renaming to new model names.
-  - name: cohere/xlarge-20220609
-    display_name: Cohere xlarge v20220609 (52.4B)
-    description: Cohere xlarge v20220609 (52.4B parameters)
-    creator_organization_name: Cohere
-    access: limited
-    num_parameters: 52400000000
-    release_date: 2022-06-09
-    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: cohere/large-20220720
-    display_name: Cohere large v20220720 (13.1B)
-    description: Cohere large v20220720 (13.1B parameters), which is deprecated by Cohere as of December 2, 2022.
-    creator_organization_name: Cohere
-    access: limited
-    num_parameters: 13100000000
-    release_date: 2022-07-20
-    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: cohere/medium-20220720
-    display_name: Cohere medium v20220720 (6.1B)
-    description: Cohere medium v20220720 (6.1B parameters)
-    creator_organization_name: Cohere
-    access: limited
-    num_parameters: 6100000000
-    release_date: 2022-07-20
-    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: cohere/small-20220720
-    display_name: Cohere small v20220720 (410M)
-    description: Cohere small v20220720 (410M parameters), which is deprecated by Cohere as of December 2, 2022.
-    creator_organization_name: Cohere
-    access: limited
-    num_parameters: 410000000
-    release_date: 2022-07-20
-    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: cohere/xlarge-20221108
-    display_name: Cohere xlarge v20221108 (52.4B)
-    description: Cohere xlarge v20221108 (52.4B parameters)
-    creator_organization_name: Cohere
-    access: limited
-    num_parameters: 52400000000
-    release_date: 2022-11-08
-    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: cohere/medium-20221108
-    display_name: Cohere medium v20221108 (6.1B)
-    description: Cohere medium v20221108 (6.1B parameters)
-    creator_organization_name: Cohere
-    access: limited
-    num_parameters: 6100000000
-    release_date: 2022-11-08
-    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: cohere/command-medium-beta
-    display_name: Command beta (6.1B)
-    description: Command beta (6.1B parameters) is fine-tuned from the medium model to respond well with instruction-like prompts ([details](https://docs.cohere.ai/docs/command-beta)).
-    creator_organization_name: Cohere
-    access: limited
-    num_parameters: 6100000000
-    release_date: 2022-11-08
-    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: cohere/command-xlarge-beta
-    display_name: Command beta (52.4B)
-    description: Command beta (52.4B parameters) is fine-tuned from the XL model to respond well with instruction-like prompts ([details](https://docs.cohere.ai/docs/command-beta)).
-    creator_organization_name: Cohere
-    access: limited
-    num_parameters: 52400000000
-    release_date: 2022-11-08
-    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: cohere/command
-    display_name: Command
-    description: Command is Cohere’s flagship text generation model. It is trained to follow user commands and to be instantly useful in practical business applications. [docs](https://docs.cohere.com/reference/generate) and [changelog](https://docs.cohere.com/changelog)
-    creator_organization_name: Cohere
-    access: limited
-    release_date: 2023-09-29
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: cohere/command-light
-    display_name: Command Light
-    description: Command is Cohere’s flagship text generation model. It is trained to follow user commands and to be instantly useful in practical business applications. [docs](https://docs.cohere.com/reference/generate) and [changelog](https://docs.cohere.com/changelog)
-    creator_organization_name: Cohere
-    access: limited
-    release_date: 2023-09-29
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: cohere/command-r
-    display_name: Command R
-    description: Command R is a multilingual 35B parameter model with a context length of 128K that has been trained with conversational tool use capabilities.
-    creator_organization_name: Cohere
-    access: open
-    num_parameters: 35000000000
-    release_date: 2024-03-11
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: cohere/command-r-plus
-    display_name: Command R Plus
-    description: Command R+ is a multilingual 104B parameter model with a context length of 128K that has been trained with conversational tool use capabilities.
-    creator_organization_name: Cohere
-    access: open
-    num_parameters: 104000000000
-    release_date: 2024-04-04
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  # Craiyon
-  - name: craiyon/dalle-mini
-    display_name: DALL-E mini (0.4B)
-    description: DALL-E mini is an open-source text-to-image model that attempt to reproduce OpenAI's DALL-E 1 ([code](https://github.com/borisdayma/dalle-mini)).
-    creator_organization_name: Craiyon
-    access: open
-    num_parameters: 400000000
-    release_date: 2022-04-21
-    tags: [TEXT_TO_IMAGE_MODEL_TAG]
-
-  - name: craiyon/dalle-mega
-    display_name: DALL-E mega (2.6B)
-    description: DALL-E mega is an open-source text-to-image model that attempt to reproduce OpenAI's DALL-E 1 ([code](https://github.com/borisdayma/dalle-mini)).
-    creator_organization_name: Craiyon
-    access: open
-    num_parameters: 2600000000
-    release_date: 2022-04-21
-    tags: [TEXT_TO_IMAGE_MODEL_TAG]
-
-  # DeepFloyd
-  - name: DeepFloyd/IF-I-M-v1.0
-    display_name: DeepFloyd IF Medium (0.4B)
-    description: DeepFloyd-IF is a pixel-based text-to-image triple-cascaded diffusion model with state-of-the-art photorealism and language understanding (paper coming soon).
-    creator_organization_name: DeepFloyd
-    access: open
-    num_parameters: 400000000
-    release_date: 2023-04-28
-    tags: [TEXT_TO_IMAGE_MODEL_TAG]
-
-  - name: DeepFloyd/IF-I-L-v1.0
-    display_name: DeepFloyd IF Large (0.9B)
-    description: DeepFloyd-IF is a pixel-based text-to-image triple-cascaded diffusion model with state-of-the-art photorealism and language understanding (paper coming soon).
-    creator_organization_name: DeepFloyd
-    access: open
-    num_parameters: 900000000
-    release_date: 2023-04-28
-    tags: [TEXT_TO_IMAGE_MODEL_TAG]
-
-  - name: DeepFloyd/IF-I-XL-v1.0
-    display_name: DeepFloyd IF X-Large (4.3B)
-    description: DeepFloyd-IF is a pixel-based text-to-image triple-cascaded diffusion model with state-of-the-art photorealism and language understanding (paper coming soon).
-    creator_organization_name: DeepFloyd
-    access: open
-    num_parameters: 4300000000
-    release_date: 2023-04-28
-    tags: [TEXT_TO_IMAGE_MODEL_TAG]
-
-
-  # Databricks
-  - name: databricks/dolly-v2-3b
-    display_name: Dolly V2 (3B)
-    description: Dolly V2 (3B) is an instruction-following large language model trained on the Databricks machine learning platform. It is based on pythia-12b.
-    creator_organization_name: Databricks
-    access: open
-    num_parameters: 2517652480
-    release_date: 2023-04-12
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: databricks/dolly-v2-7b
-    display_name: Dolly V2 (7B)
-    description: Dolly V2 (7B) is an instruction-following large language model trained on the Databricks machine learning platform. It is based on pythia-12b.
-    creator_organization_name: Databricks
-    access: open
-    num_parameters: 6444163072
-    release_date: 2023-04-12
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: databricks/dolly-v2-12b
-    display_name: Dolly V2 (12B)
-    description: Dolly V2 (12B) is an instruction-following large language model trained on the Databricks machine learning platform. It is based on pythia-12b.
-    creator_organization_name: Databricks
-    access: open
-    num_parameters: 11327027200
-    release_date: 2023-04-12
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: databricks/dbrx-instruct
-    display_name: DBRX Instruct
-    description: DBRX is a large language model with a fine-grained mixture-of-experts (MoE) architecture that uses 16 experts and chooses 4. It has 132B total parameters, of which 36B parameters are active on any input. ([blog post](https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm))
-    creator_organization_name: Databricks
-    access: open
-    num_parameters: 132000000000
-    release_date: 2024-03-27
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-
-  # DeepMind
-  - name: deepmind/gopher # NOT SUPPORTED
-    display_name: Gopher (280B)
-    description: Gopher (280B parameters) ([paper](https://arxiv.org/pdf/2112.11446.pdf)).
-    creator_organization_name: DeepMind
-    access: closed
-    num_parameters: 280000000000
-    release_date: 2021-12-08
-    tags: [UNSUPPORTED_MODEL_TAG]
-
-  - name: deepmind/chinchilla # NOT SUPPORTED
-    display_name: Chinchilla (70B)
-    description: Chinchilla (70B parameters) ([paper](https://arxiv.org/pdf/2203.15556.pdf)).
-    creator_organization_name: DeepMind
-    access: closed
-    num_parameters: 70000000000
-    release_date: 2022-03-31
-    tags: [UNSUPPORTED_MODEL_TAG]
-
-
-  # Deepseek
-  - name: deepseek-ai/deepseek-llm-67b-chat
-    display_name: DeepSeek LLM Chat (67B)
-    description: DeepSeek LLM Chat is a open-source language model trained on 2 trillion tokens in both English and Chinese, and fine-tuned supervised fine-tuning (SFT) and Direct Preference Optimization (DPO). ([paper](https://arxiv.org/abs/2401.02954))
-    creator_organization_name: DeepSeek
-    access: open
-    num_parameters: 67000000000
-    release_date: 2024-01-05
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: deepseek-ai/deepseek-v3
-    display_name: DeepSeek v3
-    description: DeepSeek v3 a Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token. It adopts Multi-head Latent Attention (MLA) and DeepSeekMoE architectures. ([paper](https://github.com/deepseek-ai/DeepSeek-V3/blob/main/DeepSeek_V3.pdf))
-    creator_organization_name: DeepSeek
-    access: open
-    # NOTE: The total size of DeepSeek-V3 models on HuggingFace is 685B, which includes 671B of the Main Model weights and 14B of the Multi-Token Prediction (MTP) Module weights.
-    num_parameters: 685000000000
-    release_date: 2024-12-24
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: deepseek-ai/deepseek-r1
-    display_name: DeepSeek R1
-    description: DeepSeek R1 is DeepSeek's first-generation reasoning model which incoporates which incorporates multi-stage training and cold-start data before RL. ([paper](https://arxiv.org/abs/2501.12948))
-    creator_organization_name: DeepSeek
-    access: open
-    # NOTE: The total size of DeepSeek-R3 model1 on HuggingFace is 685B
-    num_parameters: 685000000000
-    release_date: 2025-01-20
-    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: deepseek-ai/deepseek-r1-hide-reasoning
-    display_name: DeepSeek R1 (hide reasoning)
-    description: DeepSeek R1 is DeepSeek's first-generation reasoning model which incoporates which incorporates multi-stage training and cold-start data before RL. ([paper](https://arxiv.org/abs/2501.12948)) The reasoning tokens are hidden from the output of the model.
-    creator_organization_name: DeepSeek
-    access: open
-    # NOTE: The total size of DeepSeek-R3 model1 on HuggingFace is 685B
-    num_parameters: 685000000000
-    release_date: 2025-01-20
-    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
- 
-  - name: deepseek-ai/deepseek-r1-0528
-    display_name: DeepSeek-R1-0528
-    description: DeepSeek-R1-0528 is a minor version upgrade from DeepSeek R1 that has improved its depth of reasoning and inference capabilities by leveraging increased computational resources and introducing algorithmic optimization mechanisms during post-training. ([paper](https://arxiv.org/abs/2501.12948))
-    creator_organization_name: DeepSeek
-    access: open
-    num_parameters: 685000000000
-    release_date: 2025-05-28
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
-    display_name: DeepSeek-R1-Distill-Llama-8b
-    description: DeepSeek-R1-Distill-Llama-8b is a model that is distilled from LLaMA 8B model for the DeepSeek-R1 task.
-    creator_organization_name: DeepSeek
-    access: open
-    num_parameters: 8000000000
-    release_date: 2025-01-20
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: deepseek-ai/deepseek-coder-6.7b-instruct
-    display_name: DeepSeek-Coder-6.7b-Instruct
-    description: DeepSeek-Coder-6.7b-Instruct is a model that is fine-tuned from the LLaMA 6.7B model for the DeepSeek-Coder task.
-    creator_organization_name: DeepSeek
-    access: open
-    num_parameters: 6740000000
-    release_date: 2025-01-20
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  # EleutherAI
-  - name: eleutherai/gpt-j-6b # Served by GooseAi, HuggingFace and Together.
-    display_name: GPT-J (6B)
-    description: GPT-J (6B parameters) autoregressive language model trained on The Pile ([details](https://arankomatsuzaki.wordpress.com/2021/06/04/gpt-j/)).
-    creator_organization_name: EleutherAI
-    access: open
-    num_parameters: 6000000000
-    release_date: 2021-06-04
-    # TODO: The BUGGY_TEMP_0_TAG is a deployment related tag (Together).
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, BUGGY_TEMP_0_TAG]
-
-  - name: eleutherai/gpt-neox-20b # Served by GooseAi and Together.
-    display_name: GPT-NeoX (20B)
-    description: GPT-NeoX (20B parameters) autoregressive language model trained on The Pile ([paper](https://arxiv.org/pdf/2204.06745.pdf)).
-    creator_organization_name: EleutherAI
-    access: open
-    num_parameters: 20000000000
-    release_date: 2022-02-02
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG]
-
-  - name: eleutherai/pythia-1b-v0
-    display_name: Pythia (1B)
-    description: Pythia (1B parameters). The Pythia project combines interpretability analysis and scaling laws to understand how knowledge develops and evolves during training in autoregressive transformers.
-    creator_organization_name: EleutherAI
-    access: open
-    num_parameters: 805736448
-    release_date: 2023-02-13
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: eleutherai/pythia-2.8b-v0
-    display_name: Pythia (2.8B)
-    description: Pythia (2.8B parameters). The Pythia project combines interpretability analysis and scaling laws to understand how knowledge develops and evolves during training in autoregressive transformers.
-    creator_organization_name: EleutherAI
-    access: open
-    num_parameters: 2517652480
-    release_date: 2023-02-13
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: eleutherai/pythia-6.9b
-    display_name: Pythia (6.9B)
-    description: Pythia (6.9B parameters). The Pythia project combines interpretability analysis and scaling laws to understand how knowledge develops and evolves during training in autoregressive transformers.
-    creator_organization_name: EleutherAI
-    access: open
-    num_parameters: 6444163072
-    release_date: 2023-02-13
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: eleutherai/pythia-12b-v0
-    display_name: Pythia (12B)
-    description: Pythia (12B parameters). The Pythia project combines interpretability analysis and scaling laws to understand how knowledge develops and evolves during training in autoregressive transformers.
-    creator_organization_name: EleutherAI
-    access: open
-    num_parameters: 11327027200
-    release_date: 2023-02-13
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  # EPFL LLM
-
-  - name: epfl-llm/meditron-7b
-    display_name: Meditron (7B)
-    description: Meditron-7B is a 7 billion parameter model adapted to the medical domain from Llama-2-7B through continued pretraining on a comprehensively curated medical corpus.
-    creator_organization_name: EPFL LLM
-    access: open
-    num_parameters: 7000000000
-    release_date: 2023-11-27
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  # Google
-  - name: google/t5-11b
-    display_name: T5 (11B)
-    description: T5 (11B parameters) is an encoder-decoder model trained on a multi-task mixture, where each task is converted into a text-to-text format ([paper](https://arxiv.org/pdf/1910.10683.pdf)).
-    creator_organization_name: Google
-    access: open
-    num_parameters: 11000000000
-    release_date: 2019-10-23
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, NO_NEWLINES_TAG]
-
-  - name: google/ul2
-    display_name: UL2 (20B)
-    description: UL2 (20B parameters) is an encoder-decoder model trained on the C4 corpus. It's similar to T5 but trained with a different objective and slightly different scaling knobs ([paper](https://arxiv.org/pdf/2205.05131.pdf)).
-    creator_organization_name: Google
-    access: open
-    num_parameters: 20000000000
-    release_date: 2022-05-10
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, NO_NEWLINES_TAG, NLG_PREFIX_TAG]
-
-  - name: google/flan-t5-xxl
-    display_name: Flan-T5 (11B)
-    description: Flan-T5 (11B parameters) is T5 fine-tuned on 1.8K tasks ([paper](https://arxiv.org/pdf/2210.11416.pdf)).
-    creator_organization_name: Google
-    access: open
-    num_parameters: 11000000000
-    release_date: 2022-12-06 # Paper date
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, NO_NEWLINES_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: google/palm # NOT SUPPORTED
-    display_name: PaLM (540B)
-    description: Pathways Language Model (540B parameters) is trained using 6144 TPU v4 chips ([paper](https://arxiv.org/pdf/2204.02311.pdf)).
-    creator_organization_name: Google
-    access: closed
-    num_parameters: 540000000000
-    release_date: 2023-03-01 # was first announced on 2022-04 but remained private.
-    tags: [UNSUPPORTED_MODEL_TAG]
-
-    # Note: This is aliased to a snapshot of gemini-pro. When possible, please use a versioned snapshot instead.
-  - name: google/gemini-pro
-    display_name: Gemini Pro
-    description: Gemini Pro is a multimodal model able to reason across text, images, video, audio and code. ([paper](https://arxiv.org/abs/2312.11805))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2023-12-13
-    tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: google/gemini-1.0-pro-001
-    display_name: Gemini 1.0 Pro (001)
-    description: Gemini 1.0 Pro is a multimodal model able to reason across text, images, video, audio and code. ([paper](https://arxiv.org/abs/2312.11805))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2023-12-13
-    tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: google/gemini-1.0-pro-002
-    display_name: Gemini 1.0 Pro (002)
-    description: Gemini 1.0 Pro is a multimodal model able to reason across text, images, video, audio and code. ([paper](https://arxiv.org/abs/2312.11805))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2024-04-09
-    tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-    # Note: This is aliased to a snapshot of gemini-pro-vision. When possible, please use a versioned snapshot instead.
-  - name: google/gemini-pro-vision
-    display_name: Gemini Pro Vision
-    description: Gemini Pro Vision is a multimodal model able to reason across text, images, video, audio and code. ([paper](https://arxiv.org/abs/2312.11805))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2023-12-13
-    tags: [VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG]
-
-  - name: google/gemini-1.0-pro-vision-001
-    display_name: Gemini 1.0 Pro Vision
-    description: Gemini 1.0 Pro Vision is a multimodal model able to reason across text, images, video, audio and code. ([paper](https://arxiv.org/abs/2312.11805))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2023-12-13
-    tags: [VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, GOOGLE_GEMINI_PRO_VISION_V1_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: google/gemini-1.5-pro-001
-    display_name: Gemini 1.5 Pro (001)
-    description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2024-05-24
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: google/gemini-1.5-flash-001
-    display_name: Gemini 1.5 Flash (001)
-    description: Gemini 1.5 Flash is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2024-05-24
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: google/gemini-1.5-pro-preview-0409
-    display_name: Gemini 1.5 Pro (0409 preview)
-    description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2024-04-10
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: google/gemini-1.5-pro-preview-0514
-    display_name: Gemini 1.5 Pro (0514 preview)
-    description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2024-05-14
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: google/gemini-1.5-flash-preview-0514
-    display_name: Gemini 1.5 Flash (0514 preview)
-    description: Gemini 1.5 Flash is a smaller Gemini model. It has a 1 million token context window and allows interleaving text, images, audio and video as inputs. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([blog](https://blog.google/technology/developers/gemini-gemma-developer-updates-may-2024/))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2024-05-14  
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: google/gemini-1.5-pro-001-safety-default
-    display_name: Gemini 1.5 Pro (001, default safety)
-    description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and uses default safety settings. ([paper](https://arxiv.org/abs/2403.05530))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2024-05-24
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: google/gemini-1.5-pro-001-safety-block-none
-    display_name: Gemini 1.5 Pro (001, BLOCK_NONE safety)
-    description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2024-05-24
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: google/gemini-1.5-flash-001-safety-default
-    display_name: Gemini 1.5 Flash (001, default safety)
-    description: Gemini 1.5 Flash is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and uses default safety settings. ([paper](https://arxiv.org/abs/2403.05530))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2024-05-24
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: google/gemini-1.5-flash-001-safety-block-none
-    display_name: Gemini 1.5 Flash (001, BLOCK_NONE safety)
-    description: Gemini 1.5 Flash is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2024-05-24
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: google/gemini-1.5-pro-002
-    display_name: Gemini 1.5 Pro (002)
-    description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2024-09-24
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: google/gemini-1.5-flash-002
-    display_name: Gemini 1.5 Flash (002)
-    description: Gemini 1.5 Flash is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2024-09-24
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: google/gemini-2.0-flash-exp
-    display_name: Gemini 2.0 Flash (Experimental)
-    description: Gemini 2.0 Flash (Experimental) is a Gemini model that supports multimodal inputs like images, video and audio, as well as multimodal output like natively generated images mixed with text and steerable text-to-speech (TTS) multilingual audio. ([blog](https://blog.google/technology/google-deepmind/google-gemini-ai-update-december-2024/#gemini-2-0-flash))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2024-12-11
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: google/gemini-1.5-flash-8b-001
-    display_name: Gemini 1.5 Flash 8B
-    description: Gemini 1.5 Flash-8B is a small model designed for lower intelligence tasks. ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2024-10-01
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: google/gemini-2.0-flash-001
-    display_name: Gemini 2.0 Flash
-    description: Gemini 2.0 Flash ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2025-02-01
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: google/gemini-2.0-flash-lite-preview-02-05
-    display_name: Gemini 2.0 Flash Lite (02-05 preview)
-    description: Gemini 2.0 Flash Lite (02-05 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2025-02-05
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: google/gemini-2.0-flash-lite-001
-    display_name: Gemini 2.0 Flash Lite
-    description: Gemini 2.0 Flash Lite ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2025-03-25
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: google/gemini-2.0-flash-thinking-exp-01-21
-    display_name: Gemini 2.0 Flash Thinking (01-21 preview)
-    description: Gemini 2.0 Flash Thinking (01-21 preview) ([documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/thinking))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2025-01-21
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: google/gemini-2.0-pro-exp-02-05
-    display_name: Gemini 2.0 Pro (02-05 preview)
-    description: Gemini 2.0 Pro (02-05 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2025-02-05
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: google/gemini-2.5-flash-lite-preview-06-17
-    display_name: Gemini 2.5 Flash-Lite (06-17 preview)
-    description: Gemini 2.5 Flash-Lite (06-17 preview) ([blog](https://blog.google/products/gemini/gemini-2-5-model-family-expands/))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2025-06-17
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: google/gemini-2.5-flash-lite
-    display_name: Gemini 2.5 Flash-Lite
-    description: Gemini 2.5 Flash-Lite ([blog](https://blog.google/products/gemini/gemini-2-5-model-family-expands/))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2025-07-22
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: google/gemini-2.5-flash-preview-04-17
-    display_name: Gemini 2.5 Flash (04-17 preview)
-    description: Gemini 2.5 Flash (04-17 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2025-04-17
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: google/gemini-2.5-flash-preview-05-20
-    display_name: Gemini 2.5 Flash (05-20 preview)
-    description: Gemini 2.5 Flash (05-20 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2025-04-17
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: google/gemini-2.5-flash
-    display_name: Gemini 2.5 Flash
-    description: Gemini 2.5 Flash ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2025-06-17
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: google/gemini-2.5-pro-exp-03-25
-    display_name: Gemini 2.5 Pro (03-25 experimental)
-    description: Gemini 2.5 Pro (03-25 experimental) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2025-03-25
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: google/gemini-2.5-pro-preview-03-25
-    display_name: Gemini 2.5 Pro (03-25 preview)
-    description: Gemini 2.5 Pro (03-25 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2025-04-09  # source: https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-pro
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: google/gemini-2.5-pro-preview-05-06
-    display_name: Gemini 2.5 Pro (05-06 preview)
-    description: Gemini 2.5 Pro (05-06 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2025-05-06  # source: https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-pro
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: google/gemini-2.5-pro
-    display_name: Gemini 2.5 Pro
-    description: Gemini 2.5 Pro ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2025-06-17
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: google/gemma-2b
-    display_name: Gemma (2B)
-    description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/gemma-open-models/))
-    creator_organization_name: Google
-    access: open
-    release_date: 2024-02-21
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: google/gemma-2b-it
-    display_name: Gemma Instruct (2B)
-    description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/gemma-open-models/))
-    creator_organization_name: Google
-    access: open
-    release_date: 2024-02-21
-    tags: [TEXT_MODEL_TAG, GOOGLE_GEMMA_INSTRUCT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: google/gemma-7b
-    display_name: Gemma (7B)
-    description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/gemma-open-models/))
-    creator_organization_name: Google
-    access: open
-    release_date: 2024-02-21
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: google/gemma-7b-it
-    display_name: Gemma Instruct (7B)
-    description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/gemma-open-models/))
-    creator_organization_name: Google
-    access: open
-    release_date: 2024-02-21
-    tags: [TEXT_MODEL_TAG, GOOGLE_GEMMA_INSTRUCT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: google/gemma-2-9b
-    display_name: Gemma 2 (9B)
-    description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/google-gemma-2/))
-    creator_organization_name: Google
-    access: open
-    release_date: 2024-06-27
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: google/gemma-2-9b-it
-    display_name: Gemma 2 Instruct (9B)
-    description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/google-gemma-2/))
-    creator_organization_name: Google
-    access: open
-    release_date: 2024-06-27
-    tags: [TEXT_MODEL_TAG, GOOGLE_GEMMA_INSTRUCT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: google/gemma-2-27b
-    display_name: Gemma 2 (27B)
-    description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/google-gemma-2/))
-    creator_organization_name: Google
-    access: open
-    release_date: 2024-06-27
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: google/gemma-2-27b-it
-    display_name: Gemma 2 Instruct (27B)
-    description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/google-gemma-2/))
-    creator_organization_name: Google
-    access: open
-    release_date: 2024-06-27
-    tags: [TEXT_MODEL_TAG, GOOGLE_GEMMA_INSTRUCT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: google/paligemma-3b-mix-224
-    display_name: PaliGemma (3B) Mix 224
-    description: PaliGemma is a versatile and lightweight vision-language model (VLM) inspired by PaLI-3 and based on open components such as the SigLIP vision model and the Gemma language model. Pre-trained with 224x224 input images and 128 token input/output text sequences. Finetuned on a mixture of downstream academic datasets. ([blog](https://developers.googleblog.com/en/gemma-family-and-toolkit-expansion-io-2024/))
-    creator_organization_name: Google
-    access: open
-    release_date: 2024-05-12
-    tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: google/paligemma-3b-mix-448
-    display_name: PaliGemma (3B) Mix 448
-    description: PaliGemma is a versatile and lightweight vision-language model (VLM) inspired by PaLI-3 and based on open components such as the SigLIP vision model and the Gemma language model. Pre-trained with 448x448 input images and 512 token input/output text sequences. Finetuned on a mixture of downstream academic datasets. ([blog](https://developers.googleblog.com/en/gemma-family-and-toolkit-expansion-io-2024/))
-    creator_organization_name: Google
-    access: open
-    release_date: 2024-05-12
-    tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: google/text-bison@001
-    display_name: PaLM-2 (Bison)
-    description: The best value PaLM model. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2023-06-07 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/text#model_versions
-    tags: [TEXT_MODEL_TAG, GOOGLE_PALM_2_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: google/text-bison@002
-    display_name: PaLM-2 (Bison)
-    description: The best value PaLM model. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2023-06-07 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/text#model_versions
-    tags: [TEXT_MODEL_TAG, GOOGLE_PALM_2_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: google/text-bison-32k
-    display_name: PaLM-2 (Bison)
-    description: The best value PaLM model with a 32K context. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2023-06-07 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/text#model_versions
-    tags: [TEXT_MODEL_TAG, GOOGLE_PALM_2_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: google/text-unicorn@001
-    display_name: PaLM-2 (Unicorn)
-    description: The largest model in PaLM family. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2023-11-30 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/text#model_versions
-    tags: [TEXT_MODEL_TAG, GOOGLE_PALM_2_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: google/code-bison@001
-    display_name: Codey PaLM-2 (Bison)
-    description: A model fine-tuned to generate code based on a natural language description of the desired code. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2023-06-29 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/code-generation#model_versions
-    tags: [CODE_MODEL_TAG]
-
-  - name: google/code-bison@002
-    display_name: Codey PaLM-2 (Bison)
-    description: A model fine-tuned to generate code based on a natural language description of the desired code. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2023-06-29 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/code-generation#model_versions
-    tags: [CODE_MODEL_TAG]
-
-  - name: google/code-bison-32k
-    display_name: Codey PaLM-2 (Bison)
-    description: Codey with a 32K context. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2023-06-29 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/code-generation#model_versions
-    tags: [CODE_MODEL_TAG]
-
-  - name: google/medlm-medium
-    display_name: MedLM (Medium)
-    description: MedLM is a family of foundation models fine-tuned for the healthcare industry based on Google Research's medically-tuned large language model, Med-PaLM 2. ([documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/medlm/overview))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2023-12-13
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: google/medlm-large
-    display_name: MedLM (Large)
-    description: MedLM is a family of foundation models fine-tuned for the healthcare industry based on Google Research's medically-tuned large language model, Med-PaLM 2. ([documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/medlm/overview))
-    creator_organization_name: Google
-    access: limited
-    release_date: 2023-12-13
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  # HuggingFace
-  - name: HuggingFaceM4/idefics2-8b
-    display_name: IDEFICS 2 (8B)
-    description: IDEFICS 2 (8B parameters) is an open multimodal model that accepts arbitrary sequences of image and text inputs and produces text outputs. ([blog](https://huggingface.co/blog/idefics2)).
-    creator_organization_name: HuggingFace
-    access: open
-    num_parameters: 8000000000
-    release_date: 2024-04-15
-    tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
-
-  - name: HuggingFaceM4/idefics-9b
-    display_name: IDEFICS (9B)
-    description: IDEFICS (9B parameters) is an open-source model based on DeepMind's Flamingo ([blog](https://huggingface.co/blog/idefics)).
-    creator_organization_name: HuggingFace
-    access: open
-    num_parameters: 9000000000
-    release_date: 2023-08-22
-    tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
-
-  - name: HuggingFaceM4/idefics-9b-instruct
-    display_name: IDEFICS-instruct (9B)
-    description: IDEFICS-instruct (9B parameters) is the instruction-tuned version of IDEFICS 9B ([blog](https://huggingface.co/blog/idefics)).
-    creator_organization_name: HuggingFace
-    access: open
-    num_parameters: 9000000000
-    release_date: 2023-08-22
-    tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, IDEFICS_INSTRUCT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
-
-  - name: HuggingFaceM4/idefics-80b
-    display_name: IDEFICS (80B)
-    description: IDEFICS (80B parameters) is an open-source model based on DeepMind's Flamingo ([blog](https://huggingface.co/blog/idefics)).
-    creator_organization_name: HuggingFace
-    access: open
-    num_parameters: 80000000000
-    release_date: 2023-08-22
-    tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
-
-  - name: HuggingFaceM4/idefics-80b-instruct
-    display_name: IDEFICS-instruct (80B)
-    description: IDEFICS-instruct (80B parameters) is the instruction-tuned version of IDEFICS 80B ([blog](https://huggingface.co/blog/idefics)).
-    creator_organization_name: HuggingFace
-    access: open
-    num_parameters: 80000000000
-    release_date: 2023-08-22
-    tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, IDEFICS_INSTRUCT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
-
-  - name: huggingface/smollm2-135m
-    display_name: SmolLM2 (135M)
-    description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
-    creator_organization_name: HuggingFace
-    access: open
-    num_parameters: 135000000
-    release_date: 2024-10-31
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: huggingface/smollm2-360m
-    display_name: SmolLM2 (360M)
-    description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
-    creator_organization_name: HuggingFace
-    access: open
-    num_parameters: 362000000
-    release_date: 2024-10-31
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: huggingface/smollm2-1.7b
-    display_name: SmolLM2 (1.7B)
-    description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
-    creator_organization_name: HuggingFace
-    access: open
-    num_parameters: 1710000000
-    release_date: 2024-10-31
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: huggingface/smollm2-135m-instruct
-    display_name: SmolLM2 Instruct (135M)
-    description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
-    creator_organization_name: HuggingFace
-    access: open
-    num_parameters: 135000000
-    release_date: 2024-10-31
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: huggingface/smollm2-360m-instruct
-    display_name: SmolLM2 Instruct (360M)
-    description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
-    creator_organization_name: HuggingFace
-    access: open
-    num_parameters: 362000000
-    release_date: 2024-10-31
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: huggingface/smollm2-1.7b-instruct
-    display_name: SmolLM2 Instruct (1.7B)
-    description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
-    creator_organization_name: HuggingFace
-    access: open
-    num_parameters: 1710000000
-    release_date: 2024-10-31
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  ## Text-to-Image Diffusion Models
-  - name: huggingface/dreamlike-diffusion-v1-0
-    display_name: Dreamlike Diffusion v1.0 (1B)
-    description: Dreamlike Diffusion v1.0 is Stable Diffusion v1.5 fine tuned on high quality art ([HuggingFace model card](https://huggingface.co/dreamlike-art/dreamlike-diffusion-1.0))
-    creator_organization_name: dreamlike.art
-    access: open
-    num_parameters: 1000000000
-    release_date: 2023-03-08
-    tags: [TEXT_TO_IMAGE_MODEL_TAG]
-
-  - name: huggingface/dreamlike-photoreal-v2-0
-    display_name: Dreamlike Photoreal v2.0 (1B)
-    description: Dreamlike Photoreal v2.0 is a photorealistic model based on Stable Diffusion v1.5 ([HuggingFace model card](https://huggingface.co/dreamlike-art/dreamlike-photoreal-2.0))
-    creator_organization_name: dreamlike.art
-    access: open
-    num_parameters: 1000000000
-    release_date: 2022-11-23
-    tags: [TEXT_TO_IMAGE_MODEL_TAG]
-
-  - name: huggingface/openjourney-v1-0
-    display_name: Openjourney (1B)
-    description: Openjourney is an open source Stable Diffusion fine tuned model on Midjourney images ([HuggingFace model card](https://huggingface.co/prompthero/openjourney))
-    creator_organization_name: PromptHero
-    access: open
-    num_parameters: 1000000000
-    release_date: 2022-11-01  # TODO: get the exact date
-    tags: [TEXT_TO_IMAGE_MODEL_TAG]
-
-  - name: huggingface/openjourney-v2-0
-    display_name: Openjourney v2 (1B)
-    description: Openjourney v2 is an open source Stable Diffusion fine tuned model on Midjourney images. Openjourney v2 is now referred to as Openjourney v4 in Hugging Face ([HuggingFace model card](https://huggingface.co/prompthero/openjourney-v4)).
-    creator_organization_name: PromptHero
-    access: open
-    num_parameters: 1000000000
-    release_date: 2023-01-01  # TODO: get the exact date
-    tags: [TEXT_TO_IMAGE_MODEL_TAG]
-
-  - name: huggingface/promptist-stable-diffusion-v1-4
-    display_name: Promptist + Stable Diffusion v1.4 (1B)
-    description: Trained with human preferences, Promptist optimizes user input into model-preferred prompts for Stable Diffusion v1.4 ([paper](https://arxiv.org/abs/2212.09611))
-    creator_organization_name: Microsoft
-    access: open
-    num_parameters: 1000000000
-    release_date: 2022-12-19
-    tags: [TEXT_TO_IMAGE_MODEL_TAG]
-
-  - name: huggingface/redshift-diffusion
-    display_name: Redshift Diffusion (1B)
-    description: Redshift Diffusion is an open source Stable Diffusion model fine tuned on high resolution 3D artworks ([HuggingFace model card](https://huggingface.co/nitrosocke/redshift-diffusion))
-    creator_organization_name: nitrosocke
-    access: open
-    num_parameters: 1000000000
-    release_date: 2022-11-29
-    tags: [TEXT_TO_IMAGE_MODEL_TAG]
-
-  - name: huggingface/stable-diffusion-safe-weak
-    display_name: Safe Stable Diffusion weak (1B)
-    description: Safe Stable Diffusion is an extension to the Stable Diffusion that drastically reduces inappropriate content ([paper](https://arxiv.org/abs/2211.05105)).
-    creator_organization_name: TU Darmstadt
-    access: open
-    num_parameters: 1000000000
-    release_date: 2022-11-09
-    tags: [TEXT_TO_IMAGE_MODEL_TAG]
-
-  - name: huggingface/stable-diffusion-safe-medium
-    display_name: Safe Stable Diffusion medium (1B)
-    description: Safe Stable Diffusion is an extension to the Stable Diffusion that drastically reduces inappropriate content ([paper](https://arxiv.org/abs/2211.05105))
-    creator_organization_name: TU Darmstadt
-    access: open
-    num_parameters: 1000000000
-    release_date: 2022-11-09
-    tags: [TEXT_TO_IMAGE_MODEL_TAG]
-
-  - name: huggingface/stable-diffusion-safe-strong
-    display_name: Safe Stable Diffusion strong (1B)
-    description: Safe Stable Diffusion is an extension to the Stable Diffusion that drastically reduces inappropriate content ([paper](https://arxiv.org/abs/2211.05105))
-    creator_organization_name: TU Darmstadt
-    access: open
-    num_parameters: 1000000000
-    release_date: 2022-11-09
-    tags: [TEXT_TO_IMAGE_MODEL_TAG]
-
-  - name: huggingface/stable-diffusion-safe-max
-    display_name: Safe Stable Diffusion max (1B)
-    description: Safe Stable Diffusion is an extension to the Stable Diffusion that drastically reduces inappropriate content ([paper](https://arxiv.org/abs/2211.05105))
-    creator_organization_name: TU Darmstadt
-    access: open
-    num_parameters: 1000000000
-    release_date: 2022-11-09
-    tags: [TEXT_TO_IMAGE_MODEL_TAG]
-
-  - name: huggingface/stable-diffusion-v1-4
-    display_name: Stable Diffusion v1.4 (1B)
-    description: Stable Diffusion v1.4 is a latent text-to-image diffusion model capable of generating photorealistic images given any text input ([paper](https://arxiv.org/abs/2112.10752))
-    creator_organization_name: Ludwig Maximilian University of Munich CompVis
-    access: open
-    num_parameters: 1000000000
-    release_date: 2022-08-01
-    tags: [TEXT_TO_IMAGE_MODEL_TAG]
-
-  - name: huggingface/stable-diffusion-v1-5
-    display_name: Stable Diffusion v1.5 (1B)
-    description: The Stable-Diffusion-v1-5 checkpoint was initialized with the weights of the Stable-Diffusion-v1-2 checkpoint and subsequently fine-tuned on 595k steps at resolution 512x512 on laion-aesthetics v2 5+ and 10% dropping of the text-conditioning to improve classifier-free guidance sampling ([paper](https://arxiv.org/abs/2112.10752))
-    creator_organization_name: Runway
-    access: open
-    num_parameters: 1000000000
-    release_date: 2022-10-20
-    tags: [TEXT_TO_IMAGE_MODEL_TAG]
-
-  - name: huggingface/stable-diffusion-v2-base
-    display_name: Stable Diffusion v2 base (1B)
-    description: The model is trained from scratch 550k steps at resolution 256x256 on a subset of LAION-5B filtered for explicit pornographic material, using the LAION-NSFW classifier with punsafe=0.1 and an aesthetic score greater than 4.5. Then it is further trained for 850k steps at resolution 512x512 on the same dataset on images with resolution greater than 512x512 ([paper](https://arxiv.org/abs/2112.10752))
-    creator_organization_name: Stability AI
-    access: open
-    num_parameters: 1000000000
-    release_date: 2022-11-23
-    tags: [TEXT_TO_IMAGE_MODEL_TAG]
-
-  - name: huggingface/stable-diffusion-v2-1-base
-    display_name: Stable Diffusion v2.1 base (1B)
-    description: This stable-diffusion-2-1-base model fine-tunes stable-diffusion-2-base with 220k extra steps taken, with punsafe=0.98 on the same dataset ([paper](https://arxiv.org/abs/2112.10752))
-    creator_organization_name: Stability AI
-    access: open
-    num_parameters: 1000000000
-    release_date: 2022-11-23
-    tags: [TEXT_TO_IMAGE_MODEL_TAG]
-
-  - name: huggingface/vintedois-diffusion-v0-1
-    display_name: Vintedois (22h) Diffusion model v0.1 (1B)
-    description: Vintedois (22h) Diffusion model v0.1 is Stable Diffusion v1.5 that was finetuned on a large amount of high quality images with simple prompts to generate beautiful images without a lot of prompt engineering ([HuggingFace model card](https://huggingface.co/22h/vintedois-diffusion-v0-1))
-    creator_organization_name: 22 Hours
-    access: open
-    num_parameters: 1000000000
-    release_date: 2022-12-27
-    tags: [TEXT_TO_IMAGE_MODEL_TAG]
-
-  - name: segmind/Segmind-Vega
-    display_name: Segmind Stable Diffusion (0.74B)
-    description: The Segmind-Vega Model is a distilled version of the Stable Diffusion XL (SDXL), offering a remarkable 70% reduction in size and an impressive 100% speedup while retaining high-quality text-to-image generation capabilities. Trained on diverse datasets, including Grit and Midjourney scrape data, it excels at creating a wide range of visual content based on textual prompts. ([HuggingFace model card](https://huggingface.co/segmind/Segmind-Vega))
-    creator_organization_name: Segmind
-    access: open
-    num_parameters: 740000000
-    release_date: 2023-12-01
-    tags: [TEXT_TO_IMAGE_MODEL_TAG]
-
-  - name: segmind/SSD-1B
-    display_name: Segmind Stable Diffusion (1B)
-    description: The Segmind Stable Diffusion Model (SSD-1B) is a distilled 50% smaller version of the Stable Diffusion XL (SDXL), offering a 60% speedup while maintaining high-quality text-to-image generation capabilities. It has been trained on diverse datasets, including Grit and Midjourney scrape data, to enhance its ability to create a wide range of visual content based on textual prompts. ([HuggingFace model card](https://huggingface.co/segmind/SSD-1B))
-    creator_organization_name: Segmind
-    access: open
-    num_parameters: 1000000000
-    release_date: 2023-10-20
-    tags: [TEXT_TO_IMAGE_MODEL_TAG]
-
-  - name: stabilityai/stable-diffusion-xl-base-1.0
-    display_name: Stable Diffusion XL
-    description: Stable Diffusion XL (SDXL) consists of an ensemble of experts pipeline for latent diffusion. ([HuggingFace model card](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0))
-    creator_organization_name: Stability AI
-    access: open
-    num_parameters: 6600000000
-    release_date: 2023-07-26
-    tags: [TEXT_TO_IMAGE_MODEL_TAG]
-
-  # Kakao
-  - name: kakaobrain/mindall-e
-    display_name: minDALL-E (1.3B)
-    description: minDALL-E, named after minGPT, is an autoregressive text-to-image generation model trained on 14 million image-text pairs ([code](https://github.com/kakaobrain/minDALL-E))
-    creator_organization_name: Kakao
-    access: open
-    num_parameters: 1300000000
-    release_date: 2021-12-13
-    tags: [TEXT_TO_IMAGE_MODEL_TAG]
-
-  # Lexica
-  - name: lexica/search-stable-diffusion-1.5
-    display_name: Lexica Search with Stable Diffusion v1.5 (1B)
-    description: Retrieves Stable Diffusion v1.5 images Lexica users generated ([docs](https://lexica.art/docs)).
-    creator_organization_name: Lexica
-    access: open
-    release_date: 2023-01-01
-    tags: [TEXT_TO_IMAGE_MODEL_TAG]
-
-
-  # Lightning AI
-  - name: lightningai/lit-gpt
-    display_name: Lit-GPT
-    description: Lit-GPT is an optimized collection of open-source LLMs for finetuning and inference. It supports – Falcon, Llama 2, Vicuna, LongChat, and other top-performing open-source large language models.
-    creator_organization_name: Lightning AI
-    access: open
-    release_date: 2023-04-04
-    tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-
-
-  # LMSYS
-  - name: lmsys/vicuna-7b-v1.3
-    display_name: Vicuna v1.3 (7B)
-    description: Vicuna v1.3 (7B) is an open-source chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT.
-    creator_organization_name: LMSYS
-    access: open
-    num_parameters: 7000000000
-    release_date: 2023-06-22
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: lmsys/vicuna-13b-v1.3
-    display_name: Vicuna v1.3 (13B)
-    description: Vicuna v1.3 (13B) is an open-source chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT.
-    creator_organization_name: LMSYS
-    access: open
-    num_parameters: 13000000000
-    release_date: 2023-06-22
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  # Marin Community
-  - name: marin-community/marin-8b-instruct
-    display_name: Marin 8B Instruct
-    description: Marin 8B Instruct is an open-source chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT.
-    creator_organization_name: Marin Community
-    access: open
-    num_parameters: 8030000000
-    release_date: 2025-05-15
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  # Meta
-  - name: meta/opt-iml-175b # NOT SUPPORTED
-    display_name: OPT-IML (175B)
-    description: OPT-IML (175B parameters) is a suite of decoder-only transformer LMs that are multi-task fine-tuned on 2000 datasets ([paper](https://arxiv.org/pdf/2212.12017.pdf)).
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 175000000000
-    release_date: 2022-12-22
-    tags: [UNSUPPORTED_MODEL_TAG]
-
-  - name: meta/opt-iml-30b # NOT SUPPORTED
-    display_name: OPT-IML (30B)
-    description: OPT-IML (30B parameters) is a suite of decoder-only transformer LMs that are multi-task fine-tuned on 2000 datasets ([paper](https://arxiv.org/pdf/2212.12017.pdf)).
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 30000000000
-    release_date: 2022-12-22
-    tags: [UNSUPPORTED_MODEL_TAG]
-
-  - name: meta/opt-175b
-    display_name: OPT (175B)
-    description: Open Pre-trained Transformers (175B parameters) is a suite of decoder-only pre-trained transformers that are fully and responsibly shared with interested researchers ([paper](https://arxiv.org/pdf/2205.01068.pdf)).
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 175000000000
-    release_date: 2022-05-02
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG]
-
-  - name: meta/opt-66b
-    display_name: OPT (66B)
-    description: Open Pre-trained Transformers (66B parameters) is a suite of decoder-only pre-trained transformers that are fully and responsibly shared with interested researchers ([paper](https://arxiv.org/pdf/2205.01068.pdf)).
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 66000000000
-    release_date: 2022-05-02
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG]
-
-  - name: meta/opt-6.7b
-    display_name: OPT (6.7B)
-    description: Open Pre-trained Transformers (6.7B parameters) is a suite of decoder-only pre-trained transformers that are fully and responsibly shared with interested researchers ([paper](https://arxiv.org/pdf/2205.01068.pdf)).
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 6700000000
-    release_date: 2022-05-02
-    # TODO: The BUGGY_TEMP_0_TAG is a deployment related tag (Together).
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, BUGGY_TEMP_0_TAG]
-
-  - name: meta/opt-1.3b
-    display_name: OPT (1.3B)
-    description: Open Pre-trained Transformers (1.3B parameters) is a suite of decoder-only pre-trained transformers that are fully and responsibly shared with interested researchers ([paper](https://arxiv.org/pdf/2205.01068.pdf)).
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 1300000000
-    release_date: 2022-05-02
-    # TODO: The BUGGY_TEMP_0_TAG is a deployment related tag (Together).
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, BUGGY_TEMP_0_TAG]
-
-  - name: meta/galactica-120b # NOT SUPPORTED
-    display_name: Galactica (120B)
-    description: Galactica (120B parameters) is trained on 48 million papers, textbooks, lectures notes, compounds and proteins, scientific websites, etc. ([paper](https://galactica.org/static/paper.pdf)).
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 120000000000
-    release_date: 2022-11-15
-    tags: [UNSUPPORTED_MODEL_TAG]
-
-  - name: meta/galactica-30b # NOT SUPPORTED
-    display_name: Galactica (30B)
-    description: Galactica (30B parameters) is trained on 48 million papers, textbooks, lectures notes, compounds and proteins, scientific websites, etc. ([paper](https://galactica.org/static/paper.pdf)).
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 30000000000
-    release_date: 2022-11-15
-    tags: [UNSUPPORTED_MODEL_TAG]
-
-  - name: meta/llama-7b
-    display_name: LLaMA (7B)
-    description: LLaMA is a collection of foundation language models ranging from 7B to 65B parameters.
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 7000000000
-    release_date: 2023-02-24
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: meta/llama-13b
-    display_name: LLaMA (13B)
-    description: LLaMA is a collection of foundation language models ranging from 7B to 65B parameters.
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 13000000000
-    release_date: 2023-02-24
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: meta/llama-30b
-    display_name: LLaMA (30B)
-    description: LLaMA is a collection of foundation language models ranging from 7B to 65B parameters.
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 30000000000
-    release_date: 2023-02-24
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: meta/llama-65b
-    display_name: LLaMA (65B)
-    description: LLaMA is a collection of foundation language models ranging from 7B to 65B parameters.
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 65000000000
-    release_date: 2023-02-24
-    # TODO(#1828): Upgrade to FULL_FUNCTIONALITY_TEXT_MODEL_TAG
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: meta/llama-2-7b
-    display_name: Llama 2 (7B)
-    description: Llama 2 pretrained models are trained on 2 trillion tokens, and have double the context length than Llama 1.
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 7000000000
-    release_date: 2023-07-18
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: meta/llama-2-13b
-    display_name: Llama 2 (13B)
-    description: Llama 2 pretrained models are trained on 2 trillion tokens, and have double the context length than Llama 1.
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 13000000000
-    release_date: 2023-07-18
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: meta/llama-2-70b
-    display_name: Llama 2 (70B)
-    description: Llama 2 pretrained models are trained on 2 trillion tokens, and have double the context length than Llama 1.
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 70000000000
-    release_date: 2023-07-18
-    # TODO(#1828): Upgrade to FULL_FUNCTIONALITY_TEXT_MODEL_TAG
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: meta/llama-3-8b
-    display_name: Llama 3 (8B)
-    description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 8000000000
-    release_date: 2024-04-18
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: meta/llama-3-8b-instruct-turbo
-    display_name: Llama 3 Instruct Turbo (8B)
-    description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/) Turbo is Together's implementation, providing fast FP8 performance while maintaining quality, closely matching FP16 reference models. ([blog](https://www.together.ai/blog/together-inference-engine-2))
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 8000000000
-    release_date: 2024-07-18
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: meta/llama-3-8b-instruct-lite
-    display_name: Llama 3 Instruct Lite (8B)
-    description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/) Lite is Together's implementation, it leverages a number of optimizations including INT4 quantization, provides the most cost-efficient and scalable Llama 3 models available anywhere, while maintaining excellent quality relative to full precision reference implementations ([blog](https://www.together.ai/blog/together-inference-engine-2))
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 8000000000
-    release_date: 2024-07-18
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-    
-  - name: meta/llama-3-70b
-    display_name: Llama 3 (70B)
-    description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 70000000000
-    release_date: 2024-04-18
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-    
-  - name: meta/llama-3-70b-instruct-turbo
-    display_name: Llama 3 Instruct Turbo (70B)
-    description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/) Turbo is Together's implementation, providing fast FP8 performance while maintaining quality, closely matching FP16 reference models. ([blog](https://www.together.ai/blog/together-inference-engine-2))
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 70000000000
-    release_date: 2024-07-18
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-    
-  - name: meta/llama-3-70b-instruct-lite
-    display_name: Llama 3 Instruct Lite (70B)
-    description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/) Lite is Together's implementation, it leverages a number of optimizations including INT4 quantization, provides the most cost-efficient and scalable Llama 3 models available anywhere, while maintaining excellent quality relative to full precision reference implementations ([blog](https://www.together.ai/blog/together-inference-engine-2))
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 70000000000
-    release_date: 2024-07-18
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: meta/llama-3.1-8b-instruct
-    display_name: Llama 3.1 Instruct (8B)
-    description: Llama 3.1 (8B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/))
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 8000000000
-    release_date: 2024-07-23
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: meta/llama-3.1-70b-instruct
-    display_name: Llama 3.1 Instruct (70B)
-    description: Llama 3.1 (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/))
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 70000000000
-    release_date: 2024-07-23
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: meta/llama-3.1-405b-instruct
-    display_name: Llama 3.1 Instruct (405B)
-    description: Llama 3.1 (405B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/))
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 405000000000
-    release_date: 2024-07-23
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: meta/llama-3.1-8b-instruct-turbo
-    display_name: Llama 3.1 Instruct Turbo (8B)
-    description: Llama 3.1 (8B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/), [blog](https://ai.meta.com/blog/meta-llama-3-1/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 8000000000
-    release_date: 2024-07-23
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: meta/llama-3.1-70b-instruct-turbo
-    display_name: Llama 3.1 Instruct Turbo (70B)
-    description: Llama 3.1 (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/), [blog](https://ai.meta.com/blog/meta-llama-3-1/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 70000000000
-    release_date: 2024-07-23
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: meta/llama-3.1-405b-instruct-turbo
-    display_name: Llama 3.1 Instruct Turbo (405B)
-    description: Llama 3.1 (405B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/), [blog](https://ai.meta.com/blog/meta-llama-3-1/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 405000000000
-    release_date: 2024-07-23
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: meta/llama-3.2-1b-instruct
-    display_name: Llama 3.2 Instruct (1.23B)
-    description: The Meta Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned text-only generative models in 1B and 3B sizes. ([blog](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/))
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 1230000000
-    release_date: 2024-09-25
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: meta/llama-3.2-3b-instruct-turbo
-    display_name: Llama 3.2 Instruct Turbo (3B)
-    description: The Meta Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned text-only generative models in 1B and 3B sizes. ([blog](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 3210000000
-    release_date: 2024-09-25
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: meta/llama-3.2-11b-vision-instruct-turbo
-    display_name: Llama 3.2 Vision Instruct Turbo (11B)
-    description: The Llama 3.2 Vision collection of multimodal large language models (LLMs) is a collection of pretrained and instruction-tuned image reasoning generative models in 11B and 90B sizes. ([blog](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 10700000000
-    release_date: 2024-09-25
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: meta/llama-3.2-90b-vision-instruct-turbo
-    display_name: Llama 3.2 Vision Instruct Turbo (90B)
-    description: The Llama 3.2 Vision collection of multimodal large language models (LLMs) is a collection of pretrained and instruction-tuned image reasoning generative models in 11B and 90B sizes. ([blog](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 88600000000
-    release_date: 2024-09-25
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: meta/llama-3.3-70b-instruct-turbo
-    display_name: Llama 3.3 Instruct Turbo (70B)
-    description: Llama 3.3 (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 70000000000
-    release_date: 2024-12-06
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-  
-  - name: meta/llama-3.3-70b-instruct
-    display_name: Llama 3.3 Instruct (70B)
-    description: Llama 3.3 (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/))
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 70000000000
-    release_date: 2024-12-06
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: meta/llama-4-scout-17b-16e-instruct
-    display_name: Llama 4 Scout (17Bx16E) Instruct
-    description: Llama 4 Scout (17Bx16E) Instruct is part of the Llama 4 collection of models are natively multimodal AI models that enable text and multimodal experiences using a mixture-of-experts architecture. ([blog](https://ai.meta.com/blog/llama-4-multimodal-intelligence/))
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 109000000000
-    release_date: 2025-04-05
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: meta/llama-4-maverick-17b-128e-instruct-fp8
-    display_name: Llama 4 Maverick (17Bx128E) Instruct FP8
-    description: Llama 4 Maverick (17Bx128E) Instruct FP8 is part of the Llama 4 collection of models are natively multimodal AI models that enable text and multimodal experiences using a mixture-of-experts architecture. ([blog](https://ai.meta.com/blog/llama-4-multimodal-intelligence/))
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 402000000000
-    release_date: 2025-04-05
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: meta/llama-3-8b-chat
-    display_name: Llama 3 Instruct (8B)
-    description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. It used SFT, rejection sampling, PPO and DPO for post-training. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 8000000000
-    release_date: 2024-04-18
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: meta/llama-3-70b-chat
-    display_name: Llama 3 Instruct (70B)
-    description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. It used SFT, rejection sampling, PPO and DPO for post-training. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 70000000000
-    release_date: 2024-04-18
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: meta/llama-guard-7b
-    display_name: Llama Guard (7B)
-    description: Llama-Guard is a 7B parameter Llama 2-based input-output safeguard model. It can be used for classifying content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM it generates text in its output that indicates whether a given prompt or response is safe/unsafe, and if unsafe based on a policy, it also lists the violating subcategories.
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 7000000000
-    release_date: 2023-12-07
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: meta/llama-guard-2-8b
-    display_name: Llama Guard 2 (8B)
-    description: Llama Guard 2 is an 8B parameter Llama 3-based LLM safeguard model. Similar to Llama Guard, it can be used for classifying content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM – it generates text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 8000000000
-    release_date: 2024-04-18
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: meta/llama-guard-3-8b
-    display_name: Llama Guard 3 (8B)
-    description: Llama Guard 3 is an 8B parameter Llama 3.1-based LLM safeguard model. Similar to Llama Guard, it can be used for classifying content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM – it generates text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.
-    creator_organization_name: Meta
-    access: open
-    num_parameters: 8000000000
-    release_date: 2024-07-23
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-
-  # Microsoft/NVIDIA
-  - name: microsoft/TNLGv2_530B
-    display_name: TNLG v2 (530B)
-    description: TNLG v2 (530B parameters) autoregressive language model trained on a filtered subset of the Pile and CommonCrawl ([paper](https://arxiv.org/pdf/2201.11990.pdf)).
-    creator_organization_name: Microsoft/NVIDIA
-    access: closed
-    num_parameters: 530000000000
-    release_date: 2022-01-28
-    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: microsoft/TNLGv2_7B
-    display_name: TNLG v2 (6.7B)
-    description: TNLG v2 (6.7B parameters) autoregressive language model trained on a filtered subset of the Pile and CommonCrawl ([paper](https://arxiv.org/pdf/2201.11990.pdf)).
-    creator_organization_name: Microsoft/NVIDIA
-    access: closed
-    num_parameters: 6700000000
-    release_date: 2022-01-28
-    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: microsoft/llava-1.5-7b-hf
-    display_name: LLaVA 1.5 (7B)
-    description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
-    creator_organization_name: Microsoft
-    access: open
-    num_parameters: 7000000000
-    release_date: 2023-10-05
-    tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
-
-  - name: microsoft/llava-1.5-13b-hf
-    display_name: LLaVA 1.5 (13B)
-    description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
-    creator_organization_name: Microsoft
-    access: open
-    num_parameters: 13000000000
-    release_date: 2023-10-05
-    tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
-
-  - name: uw-madison/llava-v1.6-vicuna-7b-hf
-    display_name: LLaVA 1.6 (7B)
-    description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
-    creator_organization_name: Microsoft
-    access: open
-    num_parameters: 7000000000
-    release_date: 2024-01-01
-    tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
-
-  - name: uw-madison/llava-v1.6-vicuna-13b-hf
-    display_name: LLaVA 1.6 (13B)
-    description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
-    creator_organization_name: Microsoft
-    access: open
-    num_parameters: 13000000000
-    release_date: 2024-01-01
-    tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
-
-  - name: uw-madison/llava-v1.6-mistral-7b-hf
-    display_name: LLaVA 1.6 + Mistral (7B)
-    description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
-    creator_organization_name: Microsoft
-    access: open
-    num_parameters: 7000000000
-    release_date: 2024-01-01
-    tags: [ VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG ]
-
-  - name: uw-madison/llava-v1.6-34b-hf
-    display_name: LLaVA + Nous-Hermes-2-Yi-34B (34B)
-    description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
-    creator_organization_name: Microsoft
-    access: open
-    num_parameters: 34000000000
-    release_date: 2024-01-01
-    tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
-  
-  - name: openflamingo/OpenFlamingo-9B-vitl-mpt7b
-    display_name: OpenFlamingo (9B)
-    description: OpenFlamingo is an open source implementation of DeepMind's Flamingo models. This 9B-parameter model uses a CLIP ViT-L/14 vision encoder and MPT-7B language model ([paper](https://arxiv.org/abs/2308.01390)).
-    creator_organization_name: OpenFlamingo
-    access: open
-    num_parameters: 9000000000
-    release_date: 2023-08-02
-    tags: [VISION_LANGUAGE_MODEL_TAG, OPEN_FLAMINGO_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
-
-  - name: microsoft/phi-2
-    display_name: Phi-2
-    description: Phi-2 is a Transformer with 2.7 billion parameters. It was trained using the same data sources as Phi-1.5, augmented with a new data source that consists of various NLP synthetic texts and filtered websites (for safety and educational value)
-    creator_organization_name: Microsoft
-    access: open
-    num_parameters: 13000000000
-    release_date: 2023-10-05
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: microsoft/phi-3-small-8k-instruct
-    display_name: Phi-3 (7B)
-    description: Phi-3-Small-8K-Instruct is a lightweight model trained with synthetic data and filtered publicly available website data with a focus on high-quality and reasoning dense properties. ([paper](https://arxiv.org/abs/2404.14219), [blog](https://azure.microsoft.com/en-us/blog/new-models-added-to-the-phi-3-family-available-on-microsoft-azure/))
-    creator_organization_name: Microsoft
-    access: open
-    num_parameters: 7000000000
-    release_date: 2024-05-21
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: microsoft/phi-3-medium-4k-instruct
-    display_name: Phi-3 (14B)
-    description: Phi-3-Medium-4K-Instruct is a lightweight model trained with synthetic data and filtered publicly available website data with a focus on high-quality and reasoning dense properties. ([paper](https://arxiv.org/abs/2404.14219), [blog](https://azure.microsoft.com/en-us/blog/new-models-added-to-the-phi-3-family-available-on-microsoft-azure/))
-    creator_organization_name: Microsoft
-    access: open
-    num_parameters: 14000000000
-    release_date: 2024-05-21
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-  
-  - name: microsoft/phi-3.5-mini-instruct
-    display_name: Phi-3.5-mini-instruct (3.8B)
-    description: Phi-3.5-mini is a lightweight, state-of-the-art open model built upon datasets used for Phi-3 - synthetic data and filtered publicly available websites. ([paper](https://arxiv.org/abs/2404.14219), [blog](https://techcommunity.microsoft.com/blog/azure-ai-services-blog/discover-the-new-multi-lingual-high-quality-phi-3-5-slms/4225280))
-    creator_organization_name: Microsoft
-    access: open
-    num_parameters: 3800000000
-    release_date: 2024-08-22
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: microsoft/phi-3.5-moe-instruct
-    display_name: Phi-3.5 MoE
-    description: Phi-3.5 MoE is a lightweight, state-of-the-art open model built upon datasets used for Phi-3 - synthetic data and filtered publicly available documents - with a focus on very high-quality, reasoning dense data. ([paper](https://arxiv.org/abs/2404.14219), [blog](https://techcommunity.microsoft.com/blog/azure-ai-services-blog/discover-the-new-multi-lingual-high-quality-phi-3-5-slms/4225280))
-    creator_organization_name: Microsoft
-    access: open
-    num_parameters: 41900000000
-    release_date: 2024-08-22
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  # KAIST AI
-  - name: kaistai/prometheus-vision-13b-v1.0-hf
-    display_name: LLaVA + Vicuna-v1.5 (13B)
-    description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
-    creator_organization_name: KAIST AI
-    access: open
-    num_parameters: 13000000000
-    release_date: 2024-01-01
-    tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
-
-  # 01.AI
-  - name: 01-ai/yi-6b
-    display_name: Yi (6B)
-    description: The Yi models are large language models trained from scratch by developers at 01.AI.
-    creator_organization_name: 01.AI
-    access: open
-    num_parameters: 6000000000
-    release_date: 2023-11-02
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: 01-ai/yi-34b
-    display_name: Yi (34B)
-    description: The Yi models are large language models trained from scratch by developers at 01.AI.
-    creator_organization_name: 01.AI
-    access: open
-    num_parameters: 34000000000
-    release_date: 2023-11-02
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: 01-ai/yi-6b-chat
-    display_name: Yi Chat (6B)
-    description: The Yi models are large language models trained from scratch by developers at 01.AI.
-    creator_organization_name: 01.AI
-    access: open
-    num_parameters: 6000000000
-    release_date: 2023-11-23
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: 01-ai/yi-34b-chat
-    display_name: Yi Chat (34B)
-    description: The Yi models are large language models trained from scratch by developers at 01.AI.
-    creator_organization_name: 01.AI
-    access: open
-    num_parameters: 34000000000
-    release_date: 2023-11-23
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: 01-ai/yi-large
-    display_name: Yi Large
-    description: The Yi models are large language models trained from scratch by developers at 01.AI. ([tweet](https://x.com/01AI_Yi/status/1789894091620458667))
-    creator_organization_name: 01.AI
-    access: limited
-    release_date: 2024-05-12
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: 01-ai/yi-large-preview
-    display_name: Yi Large (Preview)
-    description: The Yi models are large language models trained from scratch by developers at 01.AI. ([tweet](https://x.com/01AI_Yi/status/1789894091620458667))
-    creator_organization_name: 01.AI
-    access: limited
-    release_date: 2024-05-12
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  # Allen Institute for AI
-  # OLMo Blog: https://blog.allenai.org/olmo-open-language-model-87ccfc95f580
-  - name: allenai/olmo-7b
-    display_name: OLMo (7B)
-    description: OLMo is a series of Open Language Models trained on the Dolma dataset.
-    creator_organization_name: Allen Institute for AI
-    access: open
-    num_parameters: 7000000000
-    release_date: 2024-02-01
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: allenai/olmo-7b-twin-2t
-    display_name: OLMo (7B Twin 2T)
-    description: OLMo is a series of Open Language Models trained on the Dolma dataset.
-    creator_organization_name: Allen Institute for AI
-    access: open
-    num_parameters: 7000000000
-    release_date: 2024-02-01
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: allenai/olmo-7b-instruct
-    display_name: OLMo (7B Instruct)
-    description: OLMo is a series of Open Language Models trained on the Dolma dataset. The instruct versions was trained on the Tulu SFT mixture and a cleaned version of the UltraFeedback dataset.
-    creator_organization_name: Allen Institute for AI
-    access: open
-    num_parameters: 7000000000
-    release_date: 2024-02-01
-    # TODO: Add instruct tag.
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: allenai/olmo-1.7-7b
-    display_name: OLMo 1.7 (7B)
-    description: OLMo is a series of Open Language Models trained on the Dolma dataset. The instruct versions was trained on the Tulu SFT mixture and a cleaned version of the UltraFeedback dataset.
-    creator_organization_name: Allen Institute for AI
-    access: open
-    num_parameters: 7000000000
-    release_date: 2024-04-17
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: allenai/olmo-2-1124-7b-instruct
-    display_name: OLMo 2 7B Instruct November 2024
-    description: OLMo 2 is a family of 7B and 13B models trained on up to 5T tokens. ([blog](https://allenai.org/blog/olmo2))
-    creator_organization_name: Allen Institute for AI
-    access: open
-    num_parameters: 7300000000
-    release_date: 2024-11-26
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: allenai/olmo-2-1124-13b-instruct
-    display_name: OLMo 2 13B Instruct November 2024
-    description: OLMo 2 is a family of 7B and 13B models trained on up to 5T tokens. ([blog](https://allenai.org/blog/olmo2))
-    creator_organization_name: Allen Institute for AI
-    access: open
-    num_parameters: 13700000000
-    release_date: 2024-11-26
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: allenai/olmo-2-0325-32b-instruct
-    display_name: OLMo 2 32B Instruct March 2025
-    description: OLMo 2 32B Instruct March 2025 is trained up to 6T tokens and post-trained using Tulu 3.1. ([blog](https://allenai.org/blog/olmo2-32B))
-    creator_organization_name: Allen Institute for AI
-    access: open
-    num_parameters: 32200000000
-    release_date: 2025-03-13
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: allenai/olmoe-1b-7b-0125-instruct
-    display_name: OLMoE 1B-7B Instruct January 2025
-    description: OLMoE 1B-7B Instruct January 2025 is a fully open language model leveraging sparse Mixture-of-Experts (MoE). It has 7B parameters but uses only 1B per input token. It was pretrained on 5T tokens. ([blog](https://allenai.org/blog/olmoe-an-open-small-and-state-of-the-art-mixture-of-experts-model-c258432d0514), [paper](https://arxiv.org/abs/2409.02060))
-    creator_organization_name: Allen Institute for AI
-    access: open
-    num_parameters: 32200000000
-    release_date: 2025-03-13
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  # Mistral AI
-  - name: mistralai/mistral-7b-v0.1
-    display_name: Mistral v0.1 (7B)
-    description: Mistral 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA) and Sliding-Window Attention (SWA). ([blog post](https://mistral.ai/news/announcing-mistral-7b/))
-    creator_organization_name: Mistral AI
-    access: open
-    num_parameters: 7300000000
-    release_date: 2023-09-27
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: mistralai/mistral-7b-instruct-v0.1
-    display_name: Mistral Instruct v0.1 (7B)
-    description: Mistral v0.1 Instruct 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA) and Sliding-Window Attention (SWA). The instruct version was fined-tuned using publicly available conversation datasets. ([blog post](https://mistral.ai/news/announcing-mistral-7b/))
-    creator_organization_name: Mistral AI
-    access: open
-    num_parameters: 7300000000
-    release_date: 2023-09-27
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: mistralai/mistral-7b-instruct-v0.2
-    display_name: Mistral Instruct v0.2 (7B)
-    description: Mistral v0.2 Instruct 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA). Compared to v0.1, v0.2 has a 32k context window and no Sliding-Window Attention (SWA). ([blog post](https://mistral.ai/news/la-plateforme/))
-    creator_organization_name: Mistral AI
-    access: open
-    num_parameters: 7300000000
-    release_date: 2024-03-23
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: mistralai/mistral-7b-instruct-v0.3
-    display_name: Mistral Instruct v0.3 (7B)
-    description: Mistral v0.3 Instruct 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA). Compared to v0.1, v0.2 has a 32k context window and no Sliding-Window Attention (SWA). ([blog post](https://mistral.ai/news/la-plateforme/))
-    creator_organization_name: Mistral AI
-    access: open
-    num_parameters: 7300000000
-    release_date: 2024-05-22
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-  
-  - name: mistralai/mistral-7b-instruct-v0.3-hf
-    display_name: Mistral Instruct v0.3 (7B)
-    description: Mistral v0.3 Instruct 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA). Compared to v0.1, v0.2 has a 32k context window and no Sliding-Window Attention (SWA). ([blog post](https://mistral.ai/news/la-plateforme/))
-    creator_organization_name: Mistral AI
-    access: open
-    num_parameters: 7300000000
-    release_date: 2024-05-22
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: mistralai/mixtral-8x7b-32kseqlen
-    display_name: Mixtral (8x7B 32K seqlen)
-    description: Mixtral is a mixture-of-experts model that has 46.7B total parameters but only uses 12.9B parameters per token. ([blog post](https://mistral.ai/news/mixtral-of-experts/), [tweet](https://twitter.com/MistralAI/status/1733150512395038967)).
-    creator_organization_name: Mistral AI
-    access: open
-    num_parameters: 46700000000
-    release_date: 2023-12-08
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: mistralai/mixtral-8x7b-instruct-v0.1
-    display_name: Mixtral Instruct (8x7B)
-    description: Mixtral Instruct (8x7B) is a version of Mixtral (8x7B) that was optimized through supervised fine-tuning and direct preference optimisation (DPO) for careful instruction following. ([blog post](https://mistral.ai/news/mixtral-of-experts/)).
-    creator_organization_name: Mistral AI
-    access: open
-    num_parameters: 46700000000
-    # Blog post: https://mistral.ai/news/mixtral-of-experts/
-    release_date: 2023-12-11
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: mistralai/mixtral-8x22b
-    display_name: Mixtral (8x22B)
-    description: Mistral AI's mixture-of-experts model that uses 39B active parameters out of 141B ([blog post](https://mistral.ai/news/mixtral-8x22b/)).
-    creator_organization_name: Mistral AI
-    access: open
-    num_parameters: 176000000000
-    release_date: 2024-04-10
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: mistralai/mixtral-8x22b-instruct-v0.1
-    display_name: Mixtral Instruct (8x22B)
-    description: Mistral AI's mixture-of-experts model that uses 39B active parameters out of 141B ([blog post](https://mistral.ai/news/mixtral-8x22b/)).
-    creator_organization_name: Mistral AI
-    access: open
-    num_parameters: 176000000000
-    release_date: 2024-04-10
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: mistralai/bakLlava-v1-hf
-    display_name: BakLLaVA v1 (7B)
-    description: BakLLaVA v1 is a Mistral 7B base augmented with the LLaVA 1.5 architecture. ([blog](https://huggingface.co/llava-hf/bakLlava-v1-hf))
-    creator_organization_name: Mistral AI
-    access: open
-    num_parameters: 7000000000
-    release_date: 2023-10-16
-    tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
-
-  - name: mistralai/ministral-3b-2410
-    display_name: Ministral 3B (2402)
-    description: Ministral 3B (2402) is a model for on-device computing and at-the-edge use cases ([blog](https://mistral.ai/news/ministraux/)).
-    creator_organization_name: Mistral AI
-    access: limited
-    release_date: 2024-10-16
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: mistralai/ministral-8b-2410
-    display_name: Ministral 8B (2402)
-    description: Ministral 8B (2402) is a model for on-device computing and at-the-edge use cases a special interleaved sliding-window attention pattern for faster and memory-efficient inference ([blog](https://mistral.ai/news/ministraux/)).
-    creator_organization_name: Mistral AI
-    access: open
-    release_date: 2024-10-16
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: mistralai/mistral-small-2402
-    display_name: Mistral Small (2402)
-    description: Mistral Small is a multilingual model with a 32K tokens context window and function-calling capabilities. ([blog](https://mistral.ai/news/mistral-large/))
-    creator_organization_name: Mistral AI
-    access: limited
-    release_date: 2023-02-26
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: mistralai/mistral-small-2409
-    display_name: Mistral Small (2409)
-    description: Mistral Small is a multilingual model with a 32K tokens context window and function-calling capabilities. ([blog](https://mistral.ai/news/mistral-large/))
-    creator_organization_name: Mistral AI
-    access: limited
-    release_date: 2024-09-18
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: mistralai/mistral-small-2501
-    display_name: Mistral Small 3 (2501)
-    description: Mistral Small 3 (2501) is a pre-trained and instructed model catered to the '80%' of generative AI tasks—those that require robust language and instruction following performance, with very low latency. ([blog](https://mistral.ai/news/mistral-small-3/))
-    creator_organization_name: Mistral AI
-    access: open
-    num_parameters: 23600000000
-    release_date: 2025-01-30
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: mistralai/mistral-small-2503
-    display_name: Mistral Small 3.1 (2503)
-    description: Mistral Small 3.1 (2503) is a model with improved text performance, multimodal understanding, and an expanded context window of up to 128k tokens. ([blog](https://mistral.ai/news/mistral-small-3-1))
-    creator_organization_name: Mistral AI
-    access: open
-    num_parameters: 23600000000
-    release_date: 2025-03-17
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: mistralai/mistral-medium-2312
-    display_name: Mistral Medium (2312)
-    description: Mistral is a transformer model that uses Grouped-Query Attention (GQA) and Sliding-Window Attention (SWA).
-    creator_organization_name: Mistral AI
-    access: limited
-    release_date: 2023-12-11
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: mistralai/mistral-medium-2505
-    display_name: Mistral Medium 3 (2505)
-    description: Mistral Medium 3 (2505) is a language model that is intended to to deliver state-of-the-art performance at lower cost. ([blog](https://mistral.ai/news/mistral-medium-3))
-    creator_organization_name: Mistral AI
-    access: limited
-    release_date: 2025-05-07
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: mistralai/mistral-large-2402
-    display_name: Mistral Large (2402)
-    description: Mistral Large is a multilingual model with a 32K tokens context window and function-calling capabilities. ([blog](https://mistral.ai/news/mistral-large/))
-    creator_organization_name: Mistral AI
-    access: limited
-    release_date: 2023-02-26
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: mistralai/mistral-large-2407
-    display_name: Mistral Large 2 (2407)
-    description: Mistral Large 2 is a 123 billion parameter model that has a 128k context window and supports dozens of languages and 80+ coding languages. ([blog](https://mistral.ai/news/mistral-large-2407/))
-    creator_organization_name: Mistral AI
-    access: open
-    num_parameters: 123000000000
-    release_date: 2023-07-24
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: mistralai/mistral-large-2411
-    display_name: Mistral Large (2411)
-    description: Mistral Large (2411) is a 123B parameter model that has a 128k context window. ([blog](https://mistral.ai/news/pixtral-large/))
-    creator_organization_name: Mistral AI
-    access: open
-    num_parameters: 123000000000
-    release_date: 2024-11-18
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: mistralai/open-mistral-nemo-2407
-    display_name: Mistral NeMo (2402)
-    description: Mistral NeMo is a multilingual 12B model with a large context window of 128K tokens. ([blog](https://mistral.ai/news/mistral-nemo/))
-    creator_organization_name: Mistral AI
-    access: open
-    release_date: 2024-07-18
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: mistralai/pixtral-12b-2409
-    display_name: Mistral Pixtral (2409)
-    description: Mistral Pixtral 12B is the first multimodal Mistral model for image understanding. ([blog](https://mistral.ai/news/pixtral-12b/))
-    creator_organization_name: Mistral AI
-    access: open
-    release_date: 2024-09-17
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: mistralai/pixtral-large-2411
-    display_name: Mistral Pixtral Large (2411)
-    description: Mistral Pixtral Large is a 124B open-weights multimodal model built on top of Mistral Large 2 (2407). ([blog](https://mistral.ai/news/pixtral-large/))
-    creator_organization_name: Mistral AI
-    access: open
-    num_parameters: 124000000000
-    release_date: 2024-11-18
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  # Moonshot AI
-  - name: moonshotai/kimi-k2-instruct
-    display_name: Kimi K2 Instruct
-    description:  Kimi K2 Instruct is a mixture-of-experts (MoE) language model with 32 billion activated parameters and 1 trillion total parameters trained with the Muon optimizer on 15.5T tokens. ([blog](https://moonshotai.github.io/Kimi-K2/))
-    creator_organization_name: Moonshot AI
-    access: open
-    num_parameters: 1029173256720
-    release_date: 2024-07-14  # Blog post has no date, so use the date from this news article https://www.cnbc.com/2025/07/14/alibaba-backed-moonshot-releases-kimi-k2-ai-rivaling-chatgpt-claude.html
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  # MosaicML
-  - name: mosaicml/mpt-7b
-    display_name: MPT (7B)
-    description: MPT (7B) is a Transformer trained from scratch on 1T tokens of text and code.
-    creator_organization_name: MosaicML
-    access: open
-    num_parameters: 6700000000
-    release_date: 2023-05-05
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: mosaicml/mpt-7b-chat # NOT SUPPORTED
-    display_name: MPT-Chat (7B)
-    description: MPT-Chat (7B) is a chatbot-like model for dialogue generation. It is built by finetuning MPT (30B) , a Transformer trained from scratch on 1T tokens of text and code.
-    creator_organization_name: MosaicML
-    access: open
-    num_parameters: 6700000000
-    release_date: 2023-05-05
-    tags: [UNSUPPORTED_MODEL_TAG]
-
-  - name: mosaicml/mpt-instruct-7b
-    display_name: MPT-Instruct (7B)
-    description: MPT-Instruct (7B) is a model for short-form instruction following. It is built by finetuning MPT (30B), a Transformer trained from scratch on 1T tokens of text and code.
-    creator_organization_name: MosaicML
-    access: open
-    num_parameters: 6700000000
-    release_date: 2023-05-05
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: mosaicml/mpt-30b
-    display_name: MPT (30B)
-    description: MPT (30B) is a Transformer trained from scratch on 1T tokens of text and code.
-    creator_organization_name: MosaicML
-    access: open
-    num_parameters: 30000000000
-    release_date: 2023-06-22
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: mosaicml/mpt-30b-chat # NOT SUPPORTED
-    display_name: MPT-Chat (30B)
-    description: MPT-Chat (30B) is a chatbot-like model for dialogue generation. It is built by finetuning MPT (30B), a Transformer trained from scratch on 1T tokens of text and code.
-    creator_organization_name: MosaicML
-    access: open
-    num_parameters: 30000000000
-    release_date: 2023-06-22
-    tags: [UNSUPPORTED_MODEL_TAG]
-
-  - name: mosaicml/mpt-instruct-30b
-    display_name: MPT-Instruct (30B)
-    description: MPT-Instruct (30B) is a model for short-form instruction following. It is built by finetuning MPT (30B), a Transformer trained from scratch on 1T tokens of text and code.
-    creator_organization_name: MosaicML
-    access: open
-    num_parameters: 30000000000
-    release_date: 2023-06-22
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-
-    
-  # NECTEC
-  - name: nectec/Pathumma-llm-text-1.0.0
-    display_name: Pathumma-llm-text-1.0.0 (7B)
-    description: Pathumma-llm-text-1.0.0 (7B) is a instruction model from  OpenThaiLLM-Prebuilt-7B ([blog](https://medium.com/nectec/pathummallm-v-1-0-0-release-6a098ddfe276))
-    creator_organization_name: nectec
-    access: open
-    num_parameters: 7620000000
-    release_date: 2024-10-28
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
- 
-  - name: nectec/OpenThaiLLM-Prebuilt-7B
-    display_name: OpenThaiLLM-Prebuilt-7B (7B)
-    description: OpenThaiLLM-Prebuilt-7B (7B) is a pretrained Thai large language model with 7 billion parameters based on Qwen2.5-7B.
-    creator_organization_name: nectec
-    access: open
-    num_parameters: 7620000000
-    release_date: 2024-10-28
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-
-
-  # Neurips
-  - name: neurips/local
-    display_name: Neurips Local
-    description: Neurips Local
-    creator_organization_name: Neurips
-    access: open
-    release_date: 2023-06-01
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-
-
-  # NVIDIA
-  - name: nvidia/megatron-gpt2
-    display_name: Megatron GPT2
-    description: GPT-2 implemented in Megatron-LM ([paper](https://arxiv.org/abs/1909.08053)).
-    creator_organization_name: NVIDIA
-    access: open
-    release_date: 2019-09-17 # paper date
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, BUGGY_TEMP_0_TAG]
-
-  - name: nvidia/nemotron-4-340b-instruct
-    display_name: Nemotron-4 Instruct (340B)
-    description: Nemotron-4 Instruct (340B) is an open weights model sized to fit on a single DGX H100 with 8 GPUs when deployed in FP8 precision. 98% of the data used for model alignment was synthetically generated ([paper](https://arxiv.org/abs/2406.11704)).
-    creator_organization_name: NVIDIA
-    access: open
-    release_date: 2024-06-17
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: nvidia/llama-3.1-nemotron-70b-instruct
-    display_name: Llama 3.1 Nemotron Instruct (70B)
-    description: Llama-3.1-Nemotron-70B-Instruct is a large language model customized by NVIDIA to improve the helpfulness of LLM generated responses to user queries. It was trained using RLHF (specifically, REINFORCE), Llama-3.1-Nemotron-70B-Reward and HelpSteer2-Preference prompts on a Llama-3.1-70B-Instruct model. ([paper](https://arxiv.org/abs/2410.01257))
-    creator_organization_name: NVIDIA
-    access: open
-    num_parameters: 70000000000
-    release_date: 2024-10-02
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-
-  # OpenAI
-
-  ## GPT 2 Models
-  # Not served by OpenAI, instead served by HuggingFace.
-
-  - name: openai/gpt2
-    display_name: GPT-2 (1.5B)
-    description: GPT-2 (1.5B parameters) is a transformer model trained on a large corpus of English text in a self-supervised fashion ([paper](https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf)).
-    creator_organization_name: OpenAI
-    access: open
-    num_parameters: 1500000000
-    release_date: 2019-02-14
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-
-  ## GPT 3 Models
-  # The list of models can be found here: https://beta.openai.com/docs/engines/gpt-3
-
-  - name: openai/davinci-002
-    display_name: davinci-002
-    description: Replacement for the GPT-3 curie and davinci base models.
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2023-08-22
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: openai/babbage-002
-    display_name: babbage-002
-    description: Replacement for the GPT-3 ada and babbage base models.
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2023-08-22
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  # DEPRECATED: Announced on July 06 2023 that these models will be shut down on January 04 2024.
-
-  - name: openai/davinci
-    display_name: davinci (175B)
-    description: Original GPT-3 (175B parameters) autoregressive language model ([paper](https://arxiv.org/pdf/2005.14165.pdf), [docs](https://beta.openai.com/docs/model-index-for-researchers)).
-    creator_organization_name: OpenAI
-    access: limited
-    num_parameters: 175000000000
-    release_date: 2020-05-28
-    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: openai/curie
-    display_name: curie (6.7B)
-    description: Original GPT-3 (6.7B parameters) autoregressive language model ([paper](https://arxiv.org/pdf/2005.14165.pdf), [docs](https://beta.openai.com/docs/model-index-for-researchers)).
-    creator_organization_name: OpenAI
-    access: limited
-    num_parameters: 6700000000
-    release_date: 2020-05-28
-    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-    
-  - name: openai/babbage
-    display_name: babbage (1.3B)
-    description: Original GPT-3 (1.3B parameters) autoregressive language model ([paper](https://arxiv.org/pdf/2005.14165.pdf), [docs](https://beta.openai.com/docs/model-index-for-researchers)).
-    creator_organization_name: OpenAI
-    access: limited
-    num_parameters: 1300000000
-    release_date: 2020-05-28
-    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-    
-  - name: openai/ada
-    display_name: ada (350M)
-    description: Original GPT-3 (350M parameters) autoregressive language model ([paper](https://arxiv.org/pdf/2005.14165.pdf), [docs](https://beta.openai.com/docs/model-index-for-researchers)).
-    creator_organization_name: OpenAI
-    access: limited
-    num_parameters: 350000000
-    release_date: 2020-05-28
-    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: openai/text-davinci-003
-    display_name: GPT-3.5 (text-davinci-003)
-    description: text-davinci-003 model that involves reinforcement learning (PPO) with reward models. Derived from text-davinci-002 ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
-    creator_organization_name: OpenAI
-    access: limited
-    num_parameters: 175000000000
-    release_date: 2022-11-28
-    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/text-davinci-002
-    display_name: GPT-3.5 (text-davinci-002)
-    description: text-davinci-002 model that involves supervised fine-tuning on human-written demonstrations. Derived from code-davinci-002 ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
-    creator_organization_name: OpenAI
-    access: limited
-    num_parameters: 175000000000
-    release_date: 2022-01-27
-    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: openai/text-davinci-001
-    display_name: GPT-3.5 (text-davinci-001)
-    description: text-davinci-001 model that involves supervised fine-tuning on human-written demonstrations ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
-    creator_organization_name: OpenAI
-    access: limited
-    num_parameters: 175000000000
-    release_date: 2022-01-27
-    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: openai/text-curie-001
-    display_name: text-curie-001
-    description: text-curie-001 model that involves supervised fine-tuning on human-written demonstrations ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
-    creator_organization_name: OpenAI
-    access: limited
-    num_parameters: 6700000000
-    release_date: 2022-01-27
-    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-    
-  - name: openai/text-babbage-001
-    display_name: text-babbage-001
-    description: text-babbage-001 model that involves supervised fine-tuning on human-written demonstrations ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
-    creator_organization_name: OpenAI
-    access: limited
-    num_parameters: 1300000000
-    release_date: 2022-01-27
-    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-    
-  - name: openai/text-ada-001
-    display_name: text-ada-001
-    description: text-ada-001 model that involves supervised fine-tuning on human-written demonstrations ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
-    creator_organization_name: OpenAI
-    access: limited
-    num_parameters: 350000000
-    release_date: 2022-01-27
-    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-
-  ## GPT 3.5 Turbo Models
-  # ChatGPT: https://openai.com/blog/chatgpt
-  
-  - name: openai/gpt-3.5-turbo-instruct
-    display_name: GPT-3.5 Turbo Instruct
-    description: Similar capabilities as GPT-3 era models. Compatible with legacy Completions endpoint and not Chat Completions.
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2023-09-18
-    tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/gpt-3.5-turbo-0301
-    display_name: GPT-3.5 Turbo (0301)
-    description: Sibling model of text-davinci-003 that is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-03-01.
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2023-03-01
-    tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/gpt-3.5-turbo-0613
-    display_name: GPT-3.5 Turbo (0613)
-    description: Sibling model of text-davinci-003 that is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-06-13.
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2023-06-13
-    tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/gpt-3.5-turbo-1106
-    display_name: GPT-3.5 Turbo (1106)
-    description: Sibling model of text-davinci-003 that is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-11-06.
-    creator_organization_name: OpenAI
-    access: limited
-    # Actual release blog post was published on 2024-01-25:
-    # https://openai.com/blog/new-embedding-models-and-api-updates
-    release_date: 2024-01-25
-    tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/gpt-3.5-turbo-0125
-    display_name: GPT-3.5 Turbo (0125)
-    description: Sibling model of text-davinci-003 that is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2024-01-25.
-    creator_organization_name: OpenAI
-    access: limited
-    # Release blog post was published on 2024-01-25:
-    # https://openai.com/blog/new-embedding-models-and-api-updates
-    # The actual release date is unclear - it was described as "next week".
-    release_date: 2023-06-13
-    tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/gpt-3.5-turbo-16k-0613
-    display_name: gpt-3.5-turbo-16k-0613
-    description: Sibling model of text-davinci-003 that is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-06-13 with a longer context length of 16,384 tokens.
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2023-06-13
-    tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-
-  ## GPT-4 and GPT-4 Turbo
-
-  - name: openai/gpt-4-1106-preview
-    display_name: GPT-4 Turbo (1106 preview)
-    description: GPT-4 Turbo (preview) is a large multimodal model that is optimized for chat but works well for traditional completions tasks. The model is cheaper and faster than the original GPT-4 model. Preview snapshot from 2023-11-06.
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2023-11-06
-    tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/gpt-4-0314
-    display_name: GPT-4 (0314)
-    description: GPT-4 is a large multimodal model (currently only accepting text inputs and emitting text outputs) that is optimized for chat but works well for traditional completions tasks. Snapshot of gpt-4 from 2023-03-14.
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2023-03-14
-    tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/gpt-4-32k-0314
-    display_name: gpt-4-32k-0314
-    description: GPT-4 is a large multimodal model (currently only accepting text inputs and emitting text outputs) that is optimized for chat but works well for traditional completions tasks. Snapshot of gpt-4 with a longer context length of 32,768 tokens from March 14th 2023.
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2023-03-14
-    tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/gpt-4-0613
-    display_name: GPT-4 (0613)
-    description: GPT-4 is a large multimodal model (currently only accepting text inputs and emitting text outputs) that is optimized for chat but works well for traditional completions tasks. Snapshot of gpt-4 from 2023-06-13.
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2023-06-13
-    tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/gpt-4-32k-0613
-    display_name: gpt-4-32k-0613
-    description: GPT-4 is a large multimodal model (currently only accepting text inputs and emitting text outputs) that is optimized for chat but works well for traditional completions tasks. Snapshot of gpt-4 with a longer context length of 32,768 tokens from 2023-06-13.
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2023-06-13
-    tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/gpt-4-0125-preview
-    display_name: GPT-4 Turbo (0125 preview)
-    description: GPT-4 Turbo (preview) is a large multimodal model that is optimized for chat but works well for traditional completions tasks. The model is cheaper and faster than the original GPT-4 model. Preview snapshot from 2023-01-25. This snapshot is intended to reduce cases of “laziness” where the model doesn’t complete a task.
-    creator_organization_name: OpenAI
-    access: limited
-    # Actual release blog post was published on 2024-01-25:
-    # https://openai.com/blog/new-embedding-models-and-api-updates
-    release_date: 2024-01-25
-    tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  ## GPT-4o
-
-  - name: openai/gpt-4-turbo-2024-04-09
-    display_name: GPT-4 Turbo (2024-04-09)
-    description: GPT-4 Turbo (2024-04-09) is a large multimodal model that is optimized for chat but works well for traditional completions tasks. The model is cheaper and faster than the original GPT-4 model. Snapshot from 2024-04-09.
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2024-04-09
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/gpt-4o-2024-05-13
-    display_name: GPT-4o (2024-05-13)
-    description: GPT-4o (2024-05-13) is a large multimodal model that accepts as input any combination of text, audio, and image and generates any combination of text, audio, and image outputs. ([blog](https://openai.com/index/hello-gpt-4o/))
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2024-04-09
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/gpt-4o-2024-08-06
-    display_name: GPT-4o (2024-08-06)
-    description: GPT-4o (2024-08-06) is a large multimodal model that accepts as input any combination of text, audio, and image and generates any combination of text, audio, and image outputs. ([blog](https://openai.com/index/introducing-structured-outputs-in-the-api/))
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2024-08-06
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/gpt-4o-2024-11-20
-    display_name: GPT-4o (2024-11-20)
-    description: GPT-4o (2024-11-20) is a large multimodal model that accepts as input any combination of text, audio, and image and generates any combination of text, audio, and image outputs. ([blog](https://openai.com/index/introducing-structured-outputs-in-the-api/))
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2024-11-20
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/gpt-4o-mini-2024-07-18
-    display_name: GPT-4o mini (2024-07-18)
-    description: GPT-4o mini (2024-07-18) is a multimodal model with a context window of 128K tokens and improved handling of non-English text. ([blog](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/))
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2024-07-18
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/gpt-4.1-2025-04-14
-    display_name: GPT-4.1 (2025-04-14)
-    description: GPT-4.1 (2025-04-14) is a multimdodal model in the GPT-4.1 family, which outperforms the GPT-4o family, with major gains in coding and instruction following. They also have larger context windows of 1 million tokens and are able to better use that context with improved long-context comprehension. ([blog](https://openai.com/index/gpt-4-1/))
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2025-04-14
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/gpt-4.1-mini-2025-04-14
-    display_name: GPT-4.1 mini (2025-04-14)
-    description: GPT-4.1 mini (2025-04-14) is a multimdodal model in the GPT-4.1 family, which outperforms the GPT-4o family, with major gains in coding and instruction following. They also have larger context windows of 1 million tokens and are able to better use that context with improved long-context comprehension. ([blog](https://openai.com/index/gpt-4-1/))
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2025-04-14
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/gpt-4.1-nano-2025-04-14
-    display_name: GPT-4.1 nano (2025-04-14)
-    description: GPT-4.1 nano (2025-04-14) is a multimdodal model in the GPT-4.1 family, which outperforms the GPT-4o family, with major gains in coding and instruction following. They also have larger context windows of 1 million tokens and are able to better use that context with improved long-context comprehension. ([blog](https://openai.com/index/gpt-4-1/))
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2025-04-14
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/gpt-5-2025-08-07
-    display_name: GPT-5 (2025-08-07)
-    description: GPT-5 (2025-08-07) is a multimdodal model trained for real-world coding tasks and long-running agentic tasks. ([blog](https://openai.com/index/introducing-gpt-5-for-developers/), [system card](https://cdn.openai.com/pdf/8124a3ce-ab78-4f06-96eb-49ea29ffb52f/gpt5-system-card-aug7.pdf))
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2025-08-07
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/gpt-5-mini-2025-08-07
-    display_name: GPT-5 mini (2025-08-07)
-    description: GPT-5 mini (2025-08-07) is a multimdodal model trained for real-world coding tasks and long-running agentic tasks. ([blog](https://openai.com/index/introducing-gpt-5-for-developers/), [system card](https://cdn.openai.com/pdf/8124a3ce-ab78-4f06-96eb-49ea29ffb52f/gpt5-system-card-aug7.pdf))
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2025-08-07
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/gpt-5-nano-2025-08-07
-    display_name: GPT-5 nano (2025-08-07)
-    description: GPT-5 nano (2025-08-07) is a multimdodal model trained for real-world coding tasks and long-running agentic tasks. ([blog](https://openai.com/index/introducing-gpt-5-for-developers/), [system card](https://cdn.openai.com/pdf/8124a3ce-ab78-4f06-96eb-49ea29ffb52f/gpt5-system-card-aug7.pdf))
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2025-08-07
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/whisper-1_gpt-4o-2024-11-20
-    display_name: Whisper-1 + GPT-4o (2024-11-20)
-    description: Transcribes the text with Whisper-1 and then uses GPT-4o to generate a response.
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2024-11-20
-    tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG]
-
-  - name: openai/gpt-4o-transcribe_gpt-4o-2024-11-20
-    display_name: GPT-4o Transcribe + GPT-4o (2024-11-20)
-    description: Transcribes the text with GPT-4o Transcribe and then uses GPT-4o to generate a response.
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2025-03-20
-    tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG]
-
-  - name: openai/gpt-4o-mini-transcribe_gpt-4o-2024-11-20
-    display_name: GPT-4o mini Transcribe + GPT-4o (2024-11-20)
-    description: Transcribes the text with GPT-4o mini Transcribe and then uses GPT-4o to generate a response.
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2025-03-20
-    tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG]
-
-  - name: openai/gpt-4o-audio-preview-2024-10-01
-    display_name: GPT-4o Audio (Preview 2024-10-01)
-    description: GPT-4o Audio (Preview 2024-10-01) is a preview model that allows using use audio inputs to prompt the model ([documentation](https://platform.openai.com/docs/guides/audio)).
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2024-10-01
-    tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/gpt-4o-audio-preview-2024-12-17
-    display_name: GPT-4o Audio (Preview 2024-12-17)
-    description: GPT-4o Audio (Preview 2024-12-17) is a preview model that allows using use audio inputs to prompt the model ([documentation](https://platform.openai.com/docs/guides/audio)).
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2024-12-17
-    tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/gpt-4o-mini-audio-preview-2024-12-17
-    display_name: GPT-4o mini Audio (Preview 2024-12-17)
-    description: GPT-4o mini Audio (Preview 2024-12-17) is a preview model that allows using use audio inputs to prompt the model ([documentation](https://platform.openai.com/docs/guides/audio)).
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2024-12-17
-    tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  # GPT-4V
-
-  - name: openai/gpt-4-vision-preview
-    # According to https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4, this model has pointed gpt-4-1106-vision-preview.
-    display_name: GPT-4V (1106 preview)
-    description: GPT-4V is a large multimodal model that accepts both text and images and is optimized for chat ([model card](https://openai.com/research/gpt-4v-system-card)).
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2023-11-06
-    tags: [VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
-
-  - name: openai/gpt-4-1106-vision-preview
-    display_name: GPT-4V (1106 preview)
-    description: GPT-4V is a large multimodal model that accepts both text and images and is optimized for chat ([model card](https://openai.com/research/gpt-4v-system-card)).
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2023-11-06
-    tags: [VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
-
-  ## GPT-4.5
-  - name: openai/gpt-4.5-preview-2025-02-27
-    display_name: GPT-4.5 (2025-02-27 preview)
-    description: GPT-4.5 (2025-02-27 preview) is a large multimodal model that is designed to be more general-purpose than OpenAI's STEM-focused reasoning models. It was trained using new supervision techniques combined with traditional methods like supervised fine-tuning (SFT) and reinforcement learning from human feedback (RLHF). ([blog](https://openai.com/index/introducing-gpt-4-5/), [system card](https://openai.com/index/gpt-4-5-system-card/))
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2025-02-27
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  ## o1 Models
-  - name: openai/o1-pro-2025-03-19
-    display_name: o1 pro (2025-03-19)
-    description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/))
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2025-03-19
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/o1-pro-2025-03-19-low-reasoning-effort
-    display_name: o1 pro (2025-03-19, low reasoning effort)
-    description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/)) The requests' reasoning effort parameter in is set to low.
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2025-03-19
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/o1-pro-2025-03-19-high-reasoning-effort
-    display_name: o1 pro (2025-03-19, high reasoning effort)
-    description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/)) The requests' reasoning effort parameter in is set to high.
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2025-03-19
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/o1-2024-12-17
-    display_name: o1 (2024-12-17)
-    description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/))
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2024-12-17
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/o1-2024-12-17-low-reasoning-effort
-    display_name: o1 (2024-12-17, low reasoning effort)
-    description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/)) The requests' reasoning effort parameter in is set to low.
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2024-12-17
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/o1-2024-12-17-high-reasoning-effort
-    display_name: o1 (2024-12-17, high reasoning effort)
-    description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/)) The requests' reasoning effort parameter in is set to high.
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2024-12-17
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/o1-preview-2024-09-12
-    display_name: o1-preview (2024-09-12)
-    description: o1-preview is a language model trained with reinforcement learning to perform complex reasoning that can produce a long internal chain of thought before responding to the user. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/))
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2024-09-12
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/o1-mini-2024-09-12
-    display_name: o1-mini (2024-09-12)
-    description: o1-mini is a cost-effective reasoning model for applications that require reasoning without broad world knowledge. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/openai-o1-mini-advancing-cost-efficient-reasoning/))
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2024-09-12
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/o3-mini-2025-01-31
-    display_name: o3-mini (2025-01-31)
-    description: o3-mini is a small reasoning model form OpenAI that aims to deliver STEM capabilities while maintaining the low cost and reduced latency of OpenAI o1-mini. ([blog post](https://openai.com/index/openai-o3-mini/))
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2025-01-31
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/o3-mini-2025-01-31-low-reasoning-effort
-    display_name: o3-mini (2025-01-31, low reasoning effort)
-    description: o3-mini is a small reasoning model form OpenAI that aims to deliver STEM capabilities while maintaining the low cost and reduced latency of OpenAI o1-mini. ([blog post](https://openai.com/index/openai-o3-mini/)) The requests' reasoning effort parameter in is set to low.
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2025-01-31
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/o3-mini-2025-01-31-high-reasoning-effort
-    display_name: o3-mini (2025-01-31, high reasoning effort)
-    description: o3-mini is a small reasoning model form OpenAI that aims to deliver STEM capabilities while maintaining the low cost and reduced latency of OpenAI o1-mini. ([blog post](https://openai.com/index/openai-o3-mini/)) The requests' reasoning effort parameter in is set to high.
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2025-01-31
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/o3-2025-04-16
-    display_name: o3 (2025-04-16)
-    description: o3 is a reasoning model for math, science, coding, and visual reasoning tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2025-04-16
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/o3-2025-04-16-low-reasoning-effort
-    display_name: o3 (2025-04-16, low reasoning effort)
-    description: o3 is a reasoning model for math, science, coding, and visual reasoning tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2025-04-16
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/o3-2025-04-16-high-reasoning-effort
-    display_name: o3 (2025-04-16, high reasoning effort)
-    description: o3 is a reasoning model for math, science, coding, and visual reasoning tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2025-04-16
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/o4-mini-2025-04-16
-    display_name: o4-mini (2025-04-16)
-    description: o4-mini is an o-series model optimized for fast, effective reasoning with exceptionally efficient performance in coding and visual tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2025-04-16
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/o4-mini-2025-04-16-low-reasoning-effort
-    display_name: o4-mini (2025-04-16, low reasoning effort)
-    description: o4-mini is an o-series model optimized for fast, effective reasoning with exceptionally efficient performance in coding and visual tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2025-04-16
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/o4-mini-2025-04-16-high-reasoning-effort
-    display_name: o4-mini (2025-04-16, high reasoning effort)
-    description: o4-mini is an o-series model optimized for fast, effective reasoning with exceptionally efficient performance in coding and visual tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2025-04-16
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/o3-pro-2025-06-10-high-reasoning-effort
-    display_name: o3-pro (2025-06-10, high reasoning effort)
-    description: o3-pro is an o-series model designed to think longer and provide the most reliable responses. ([blog post](https://help.openai.com/en/articles/9624314-model-release-notes))
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2025-06-10
-    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  ## GPT-OSS
-  - name: openai/gpt-oss-20b
-    display_name: gpt-oss-20b
-    description: gpt-oss-20b is an open-weight language model that was trained using a mix of reinforcement learning and other techniques informed by OpenAI's internal models. It uses a mixture-of-experts architecture and activates 3.6B parameters per token. ([blog](https://openai.com/index/introducing-gpt-oss/))
-    creator_organization_name: OpenAI
-    access: open
-    release_date: 2025-08-05
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openai/gpt-oss-120b
-    display_name: gpt-oss-120b
-    description: gpt-oss-120b is an open-weight language model that was trained using a mix of reinforcement learning and other techniques informed by OpenAI's internal models. It uses a mixture-of-experts architecture and activates 5.1B parameters per token. ([blog](https://openai.com/index/introducing-gpt-oss/))
-    creator_organization_name: OpenAI
-    access: open
-    release_date: 2025-08-05
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  ## Codex Models
-  # DEPRECATED: Codex models have been shut down on March 23 2023.
-
-  - name: openai/code-davinci-002
-    display_name: code-davinci-002
-    description: Codex-style model that is designed for pure code-completion tasks ([docs](https://beta.openai.com/docs/models/codex)).
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2021-07-01 # TODO: Find correct date (this is for v1)
-    tags: [DEPRECATED_MODEL_TAG, CODE_MODEL_TAG]
-
-  - name: openai/code-davinci-001
-    display_name: code-davinci-001
-    description: code-davinci-001 model
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2021-07-01 # Paper date
-    tags: [DEPRECATED_MODEL_TAG, CODE_MODEL_TAG]
-
-  - name: openai/code-cushman-001
-    display_name: code-cushman-001 (12B)
-    description: Codex-style model that is a stronger, multilingual version of the Codex (12B) model in the [Codex paper](https://arxiv.org/pdf/2107.03374.pdf).
-    creator_organization_name: OpenAI
-    access: limited
-    num_parameters: 12000000000
-    release_date: 2021-07-01 # Paper date
-    tags: [DEPRECATED_MODEL_TAG, CODE_MODEL_TAG]
-
-
-  ## Text Similarity Models
-  # OpenAI similarity embedding models: https://beta.openai.com/docs/guides/embeddings
-  # The number of parameters is guessed based on the number of parameters of the
-  # corresponding GPT-3 model.
-  # DEPRECATED: Announced on July 06 2023 that first generation embeddings models
-  #  will be shut down on January 04 2024.
-
-  - name: openai/text-similarity-davinci-001
-    display_name: text-similarity-davinci-001
-    description: Embedding model that is designed for text similarity tasks ([docs](https://openai.com/blog/introducing-text-and-code-embeddings)).
-    creator_organization_name: OpenAI
-    access: limited
-    num_parameters: 175000000000
-    release_date: 2022-01-25 # Blog post date
-    tags: [DEPRECATED_MODEL_TAG, TEXT_SIMILARITY_MODEL_TAG]
-
-  - name: openai/text-similarity-curie-001
-    display_name: text-similarity-curie-001
-    description: Embedding model that is designed for text similarity tasks ([docs](https://openai.com/blog/introducing-text-and-code-embeddings)).
-    creator_organization_name: OpenAI
-    access: limited
-    num_parameters: 6700000000
-    release_date: 2022-01-25 # Blog post date
-    tags: [DEPRECATED_MODEL_TAG, TEXT_SIMILARITY_MODEL_TAG]
-
-  - name: openai/text-similarity-babbage-001
-    display_name: text-similarity-babbage-001
-    description: Embedding model that is designed for text similarity tasks ([docs](https://openai.com/blog/introducing-text-and-code-embeddings)).
-    creator_organization_name: OpenAI
-    access: limited
-    num_parameters: 1300000000
-    release_date: 2022-01-25 # Blog post date
-    tags: [DEPRECATED_MODEL_TAG, TEXT_SIMILARITY_MODEL_TAG]
-
-  - name: openai/text-similarity-ada-001
-    display_name: text-similarity-ada-001
-    description: Embedding model that is designed for text similarity tasks ([docs](https://openai.com/blog/introducing-text-and-code-embeddings)).
-    creator_organization_name: OpenAI
-    access: limited
-    num_parameters: 350000000
-    release_date: 2022-01-25 # Blog post date
-    tags: [DEPRECATED_MODEL_TAG, TEXT_SIMILARITY_MODEL_TAG]
-
-  - name: openai/text-embedding-ada-002
-    display_name: text-embedding-ada-002
-    description: An improved embedding model that is designed for text similarity tasks ([docs](https://openai.com/blog/new-and-improved-embedding-model)).
-    creator_organization_name: OpenAI
-    access: limited
-    release_date: 2022-12-15 # Blog post date
-    tags: [TEXT_SIMILARITY_MODEL_TAG]
-
-  # Text-to-image models
-  - name: openai/dall-e-2
-    display_name: DALL-E 2 (3.5B)
-    description: DALL-E 2 is a encoder-decoder-based latent diffusion model trained on large-scale paired text-image datasets. The model is available via the OpenAI API ([paper](https://arxiv.org/abs/2204.06125)).
-    creator_organization_name: OpenAI
-    access: limited
-    num_parameters: 3500000000
-    release_date: 2022-04-13
-    tags: [TEXT_TO_IMAGE_MODEL_TAG]
-
-  - name: openai/dall-e-3
-    display_name: DALL-E 3
-    description: DALL-E 3 is a text-to-image generation model built natively on ChatGPT, used to prompt engineer automatically. The default style, vivid, causes the model to lean towards generating hyper-real and dramatic images. The model is available via the OpenAI API ([paper](https://cdn.openai.com/papers/dall-e-3.pdf)).
-    creator_organization_name: OpenAI
-    access: limited
-    num_parameters: 0
-    release_date: 2023-11-06
-    tags: [TEXT_TO_IMAGE_MODEL_TAG]
-
-  - name: openai/dall-e-3-natural
-    display_name: DALL-E 3 (natural style)
-    description: DALL-E 3 is a text-to-image generation model built natively on ChatGPT, used to prompt engineer automatically. The natural style causes the model to produce more natural, less hyper-real looking images. The model is available via the OpenAI API ([paper](https://cdn.openai.com/papers/dall-e-3.pdf)).
-    creator_organization_name: OpenAI
-    access: limited
-    num_parameters: 0
-    release_date: 2023-11-06
-    tags: [TEXT_TO_IMAGE_MODEL_TAG]
-
-  - name: openai/dall-e-3-hd
-    display_name: DALL-E 3 HD
-    description: DALL-E 3 is a text-to-image generation model built natively on ChatGPT, used to prompt engineer automatically. The HD version creates images with finer details and greater consistency across the image, but generation is slower. The default style, vivid, causes the model to lean towards generating hyper-real and dramatic images. The model is available via the OpenAI API ([paper](https://cdn.openai.com/papers/dall-e-3.pdf)).
-    creator_organization_name: OpenAI
-    access: limited
-    num_parameters: 0
-    release_date: 2023-11-06
-    tags: [TEXT_TO_IMAGE_MODEL_TAG]
-
-  - name: openai/dall-e-3-hd-natural
-    display_name: DALL-E 3 HD (natural style)
-    description: DALL-E 3 is a text-to-image generation model built natively on ChatGPT, used to prompt engineer automatically. The HD version creates images with finer details and greater consistency across the image, but generation is slower. The natural style causes the model to produce more natural, less hyper-real looking images. The model is available via the OpenAI API ([paper](https://cdn.openai.com/papers/dall-e-3.pdf)).
-    creator_organization_name: OpenAI
-    access: limited
-    num_parameters: 0
-    release_date: 2023-11-06
-    tags: [TEXT_TO_IMAGE_MODEL_TAG]
-
-  # OpenThaiGPT
-  - name: openthaigpt/openthaigpt-1.0.0-7b-chat
-    display_name: OpenThaiGPT v1.0.0 (7B)
-    description: OpenThaiGPT v1.0.0 (7B) is a Thai language chat model based on Llama 2 that has been specifically fine-tuned for Thai instructions and enhanced by incorporating over 10,000 of the most commonly used Thai words into the dictionary. ([blog post](https://openthaigpt.aieat.or.th/openthaigpt-1.0.0-less-than-8-apr-2024-greater-than))
-    creator_organization_name: OpenThaiGPT
-    access: open
-    num_parameters: 7000000000
-    release_date: 2024-04-08
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openthaigpt/openthaigpt-1.0.0-13b-chat
-    display_name: OpenThaiGPT v1.0.0 (13B)
-    description: OpenThaiGPT v1.0.0 (13B) is a Thai language chat model based on Llama 2 that has been specifically fine-tuned for Thai instructions and enhanced by incorporating over 10,000 of the most commonly used Thai words into the dictionary. ([blog post](https://openthaigpt.aieat.or.th/openthaigpt-1.0.0-less-than-8-apr-2024-greater-than))
-    creator_organization_name: OpenThaiGPT
-    access: open
-    num_parameters: 13000000000
-    release_date: 2024-04-08
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: openthaigpt/openthaigpt-1.0.0-70b-chat
-    display_name: OpenThaiGPT v1.0.0 (70B)
-    description: OpenThaiGPT v1.0.0 (70B) is a Thai language chat model based on Llama 2 that has been specifically fine-tuned for Thai instructions and enhanced by incorporating over 10,000 of the most commonly used Thai words into the dictionary. ([blog post](https://openthaigpt.aieat.or.th/openthaigpt-1.0.0-less-than-8-apr-2024-greater-than))
-    creator_organization_name: OpenThaiGPT
-    access: open
-    num_parameters: 70000000000
-    release_date: 2024-04-08
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  # Qwen
-
-  - name: qwen/qwen-7b
-    display_name: Qwen
-    description: 7B-parameter version of the large language model series, Qwen (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
-    creator_organization_name: Qwen
-    access: open
-    release_date: 2024-02-05
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: qwen/qwen1.5-7b
-    display_name: Qwen1.5 (7B)
-    description: 7B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
-    creator_organization_name: Qwen
-    access: open
-    release_date: 2024-02-05
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: qwen/qwen1.5-14b
-    display_name: Qwen1.5 (14B)
-    description: 14B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
-    creator_organization_name: Qwen
-    access: open
-    release_date: 2024-02-05
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: qwen/qwen1.5-32b
-    display_name: Qwen1.5 (32B)
-    description: 32B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. The 32B version also includes grouped query attention (GQA). ([blog](https://qwenlm.github.io/blog/qwen1.5-32b/))
-    creator_organization_name: Qwen
-    access: open
-    release_date: 2024-04-02
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: qwen/qwen1.5-72b
-    display_name: Qwen1.5 (72B)
-    description: 72B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
-    creator_organization_name: Qwen
-    access: open
-    release_date: 2024-02-05
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: qwen/qwen1.5-7b-chat
-    display_name: Qwen1.5 Chat (7B)
-    description: 7B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
-    creator_organization_name: Qwen
-    access: open
-    release_date: 2024-02-05
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: qwen/qwen1.5-14b-chat
-    display_name: Qwen1.5 Chat (14B)
-    description: 14B-parameter chat version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
-    creator_organization_name: Qwen
-    access: open
-    release_date: 2024-02-05
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: qwen/qwen1.5-32b-chat
-    display_name: Qwen1.5 Chat (32B)
-    description: 32B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. The 32B version also includes grouped query attention (GQA). ([blog](https://qwenlm.github.io/blog/qwen1.5-32b/))
-    creator_organization_name: Qwen
-    access: open
-    release_date: 2024-04-02
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: qwen/qwen1.5-72b-chat
-    display_name: Qwen1.5 Chat (72B)
-    description: 72B-parameter chat version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
-    creator_organization_name: Qwen
-    access: open
-    release_date: 2024-02-05
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: qwen/qwen1.5-110b-chat
-    display_name: Qwen1.5 Chat (110B)
-    description: 110B-parameter chat version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. The 110B version also includes grouped query attention (GQA). ([blog](https://qwenlm.github.io/blog/qwen1.5-110b/))
-    creator_organization_name: Qwen
-    access: open
-    release_date: 2024-04-25
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: qwen/qwen2-72b-instruct
-    display_name: Qwen2 Instruct (72B)
-    description: 72B-parameter chat version of the large language model series, Qwen2. Qwen2 uses Group Query Attention (GQA) and has extended context length support up to 128K tokens. ([blog](https://qwenlm.github.io/blog/qwen2/))
-    creator_organization_name: Qwen
-    access: open
-    release_date: 2024-06-07
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: qwen/qwen2.5-7b-instruct-turbo
-    display_name: Qwen2.5 Instruct Turbo (7B)
-    description: Qwen2.5 Instruct Turbo (7B) was trained on 18 trillion tokens and supports 29 languages, and shows improvements over Qwen2 in knowledge, coding, mathematics, instruction following, generating long texts, and processing structure data. ([blog](https://qwenlm.github.io/blog/qwen2.5/)) Turbo is Together's cost-efficient implementation, providing fast FP8 performance while maintaining quality, closely matching FP16 reference models. ([blog](https://www.together.ai/blog/together-inference-engine-2))
-    creator_organization_name: Qwen
-    access: open
-    release_date: 2024-09-19
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: qwen/qwen2.5-7b-instruct
-    display_name: Qwen2.5 Instruct (7B)
-    description: Qwen2.5 Instruct (7B) was trained on 18 trillion tokens and supports 29 languages, and shows improvements over Qwen2 in knowledge, coding, mathematics, instruction following, generating long texts, and processing structure data. ([blog](https://qwenlm.github.io/blog/qwen2.5/)) Turbo is Together's cost-efficient implementation, providing fast FP8 performance while maintaining quality, closely matching FP16 reference models. ([blog](https://www.together.ai/blog/together-inference-engine-2))
-    creator_organization_name: Qwen
-    access: open
-    release_date: 2024-09-19
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: qwen/qwen2.5-72b-instruct-turbo
-    display_name: Qwen2.5 Instruct Turbo (72B)
-    description: Qwen2.5 Instruct Turbo (72B) was trained on 18 trillion tokens and supports 29 languages, and shows improvements over Qwen2 in knowledge, coding, mathematics, instruction following, generating long texts, and processing structure data. ([blog](https://qwenlm.github.io/blog/qwen2.5/)) Turbo is Together's cost-efficient implementation, providing fast FP8 performance while maintaining quality, closely matching FP16 reference models. ([blog](https://www.together.ai/blog/together-inference-engine-2))
-    creator_organization_name: Qwen
-    access: open
-    release_date: 2024-09-19
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: qwen/qwen3-235b-a22b-fp8-tput
-    display_name: Qwen3 235B A22B FP8 Throughput
-    description: Qwen3 235B A22B FP8 Throughput is a hybrid instruct and reasoning mixture-of-experts model ([blog](https://qwenlm.github.io/blog/qwen3/)).
-    creator_organization_name: Qwen
-    access: open
-    release_date: 2025-04-29
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: qwen/qwen3-235b-a22b-instruct-2507-fp8
-    display_name: Qwen3 235B A22B Instruct 2507 FP8
-    description: Qwen3 235B A22B Instruct 2507 FP8 is an updated version of the non-thinking mode of Qwen3 235B A22B FP8.
-    creator_organization_name: Qwen
-    access: open
-    release_date: 2025-07-21  # https://x.com/Alibaba_Qwen/status/1947344511988076547
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: qwen/qwq-32b-preview
-    display_name: QwQ (32B Preview)
-    description: QwQ-32B-Preview is an experimental research model developed by the Qwen Team, focused on advancing AI reasoning capabilities. ([blog post](https://qwenlm.github.io/blog/qwq-32b-preview/)).
-    creator_organization_name: Alibaba Cloud
-    access: open
-    num_parameters: 32800000000
-    release_date: 2024-11-28
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: qwen/qwen-vl
-    display_name: Qwen-VL
-    description: Visual multimodal version of the Qwen large language model series ([paper](https://arxiv.org/abs/2308.12966)).
-    creator_organization_name: Alibaba Cloud
-    access: open
-    release_date: 2023-08-24
-    tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
-
-  - name: qwen/qwen-vl-chat
-    display_name: Qwen-VL Chat
-    description: Chat version of Qwen-VL ([paper](https://arxiv.org/abs/2308.12966)).
-    creator_organization_name: Alibaba Cloud
-    access: open
-    release_date: 2023-08-24
-    tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
-
-  - name: qwen/qwen2-vl-7b-instruct
-    display_name: Qwen2-VL Instruct (7B)
-    description: The second generation of Qwen2-VL models ([paper](https://arxiv.org/abs/2409.12191)).
-    creator_organization_name: Alibaba Group
-    access: open
-    release_date: 2024-08-29
-    tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
-
-  - name: qwen/qwen2-vl-72b-instruct
-    display_name: Qwen2-VL Instruct (72B)
-    description: The second generation of Qwen2-VL models ([paper](https://arxiv.org/abs/2409.12191)).
-    creator_organization_name: Alibaba Group
-    access: open
-    release_date: 2024-08-29
-    tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
-
-  - name: qwen/qwen2.5-vl-3b-instruct
-    display_name: Qwen2.5-VL Instruct (3B)
-    description: The second generation of Qwen2.5-VL models ([blog](https://qwenlm.github.io/blog/qwen2.5-vl/)).
-    creator_organization_name: Alibaba Group
-    access: open
-    release_date: 2025-01-26
-    tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
-
-  - name: qwen/qwen2.5-vl-7b-instruct
-    display_name: Qwen2.5-VL Instruct (7B)
-    description: The second generation of Qwen2.5-VL models ([blog](https://qwenlm.github.io/blog/qwen2.5-vl/)).
-    creator_organization_name: Alibaba Group
-    access: open
-    release_date: 2025-01-26
-    tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
-
-  - name: qwen/qwen2.5-vl-32b-instruct
-    display_name: Qwen2.5-VL Instruct (32B)
-    description: The second generation of Qwen2.5-VL models ([blog](https://qwenlm.github.io/blog/qwen2.5-vl/)).
-    creator_organization_name: Alibaba Group
-    access: open
-    release_date: 2025-01-26
-    tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
-
-  - name: qwen/qwen2.5-vl-72b-instruct
-    display_name: Qwen2.5-VL Instruct (72B)
-    description: The second generation of Qwen2.5-VL models ([blog](https://qwenlm.github.io/blog/qwen2.5-vl/)).
-    creator_organization_name: Alibaba Group
-    access: open
-    release_date: 2025-01-26
-    tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
-
-  - name: qwen/qwen-audio-chat
-    display_name: Qwen-Audio Chat
-    description: Auditory multimodal version of the Qwen large language model series ([paper](https://arxiv.org/abs/2311.07919)).
-    creator_organization_name: Alibaba Cloud
-    access: open
-    release_date: 2023-11-14
-    tags: [AUDIO_LANGUAGE_MODEL_TAG]
-
-  - name: qwen/qwen2-audio-7b-instruct
-    display_name: Qwen2-Audio Instruct (7B)
-    description: The second version of auditory multimodal version of the Qwen large language model series ([paper](https://arxiv.org/abs/2407.10759)).
-    creator_organization_name: Alibaba Cloud
-    access: open
-    release_date: 2024-07-15
-    tags: [AUDIO_LANGUAGE_MODEL_TAG]
-
-  - name: qwen/qwen2.5-omni-7b
-    display_name: Qwen2.5-Omni (7B)
-    description: The new flagship end-to-end multimodal model in the Qwen series that can process inputs including text, images, audio, and video ([paper](https://arxiv.org/abs/2503.20215)).
-    creator_organization_name: Alibaba Cloud
-    access: open
-    release_date: 2025-03-27
-    tags: [AUDIO_LANGUAGE_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
-
-  # SAIL (Sea AI Lab)
-  - name: sail/sailor-7b
-    display_name: Sailor (7B)
-    description: Sailor is a suite of Open Language Models tailored for South-East Asia, focusing on languages such as Indonesian, Thai, Vietnamese, Malay, and Lao. These models were continually pre-trained from Qwen1.5. ([paper](https://arxiv.org/abs/2404.03608))
-    creator_organization_name: SAIL
-    access: open
-    num_parameters: 7000000000
-    release_date: 2024-04-04
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: sail/sailor-7b-chat
-    display_name: Sailor Chat (7B)
-    description: Sailor is a suite of Open Language Models tailored for South-East Asia, focusing on languages such as Indonesian, Thai, Vietnamese, Malay, and Lao. These models were continually pre-trained from Qwen1.5. ([paper](https://arxiv.org/abs/2404.03608))
-    creator_organization_name: SAIL
-    access: open
-    num_parameters: 7000000000
-    release_date: 2024-04-04
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: sail/sailor-14b
-    display_name: Sailor (14B)
-    description: Sailor is a suite of Open Language Models tailored for South-East Asia, focusing on languages such as Indonesian, Thai, Vietnamese, Malay, and Lao. These models were continually pre-trained from Qwen1.5. ([paper](https://arxiv.org/abs/2404.03608))
-    creator_organization_name: SAIL
-    access: open
-    num_parameters: 14000000000
-    release_date: 2024-04-04
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: sail/sailor-14b-chat
-    display_name: Sailor Chat (14B)
-    description: Sailor is a suite of Open Language Models tailored for South-East Asia, focusing on languages such as Indonesian, Thai, Vietnamese, Malay, and Lao. These models were continually pre-trained from Qwen1.5. ([paper](https://arxiv.org/abs/2404.03608))
-    creator_organization_name: SAIL
-    access: open
-    num_parameters: 14000000000
-    release_date: 2024-04-04
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  # Salesforce
-  - name: salesforce/codegen # NOT SUPPORTED
-    display_name: CodeGen (16B)
-    description: CodeGen (16B parameters) is an open dense code model trained for multi-turn program synthesis ([blog](https://arxiv.org/pdf/2203.13474.pdf)).
-    creator_organization_name: Tsinghua
-    access: open
-    num_parameters: 16000000000
-    release_date: 2022-03-25
-    tags: [UNSUPPORTED_MODEL_TAG]
-
-  # SambaNova
-  - name: sambanova/sambalingo-thai-base
-    display_name: SambaLingo-Thai-Base
-    description: SambaLingo-Thai-Base is a pretrained bi-lingual Thai and English model that adapts Llama 2 (7B) to Thai by training on 38 billion tokens from the Thai split of the Cultura-X dataset. ([paper](https://arxiv.org/abs/2404.05829))
-    creator_organization_name: SambaLingo
-    access: open
-    num_parameters: 7000000000
-    release_date: 2024-04-08
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: sambanova/sambalingo-thai-chat
-    display_name: SambaLingo-Thai-Chat
-    description: SambaLingo-Thai-Chat is a chat model trained using direct preference optimization on SambaLingo-Thai-Base. SambaLingo-Thai-Base adapts Llama 2 (7B) to Thai by training on 38 billion tokens from the Thai split of the Cultura-X dataset. ([paper](https://arxiv.org/abs/2404.05829))
-    creator_organization_name: SambaLingo
-    access: open
-    num_parameters: 7000000000
-    release_date: 2024-04-08
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: sambanova/sambalingo-thai-base-70b
-    display_name: SambaLingo-Thai-Base-70B
-    description: SambaLingo-Thai-Base-70B is a pretrained bi-lingual Thai and English model that adapts Llama 2 (70B) to Thai by training on 26 billion tokens from the Thai split of the Cultura-X dataset. ([paper](https://arxiv.org/abs/2404.05829))
-    creator_organization_name: SambaLingo
-    access: open
-    num_parameters: 70000000000
-    release_date: 2024-04-08
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: sambanova/sambalingo-thai-chat-70b
-    display_name: SambaLingo-Thai-Chat-70B
-    description: SambaLingo-Thai-Chat-70B is a chat model trained using direct preference optimization on SambaLingo-Thai-Base-70B. SambaLingo-Thai-Base-70B adapts Llama 2 (7B) to Thai by training on 26 billion tokens from the Thai split of the Cultura-X dataset. ([paper](https://arxiv.org/abs/2404.05829))
-    creator_organization_name: SambaLingo
-    access: open
-    num_parameters: 70000000000
-    release_date: 2024-04-08
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  # SCB10X
-  - name: scb10x/typhoon-7b
-    display_name: Typhoon (7B)
-    description: Typhoon (7B) is pretrained Thai large language model with 7 billion parameters based on Mistral 7B. ([paper](https://arxiv.org/abs/2312.13951))
-    creator_organization_name: SCB10X
-    access: open
-    num_parameters: 7000000000
-    release_date: 2023-12-21
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: scb10x/typhoon-v1.5-8b
-    display_name: Typhoon v1.5 (8B)
-    description: Typhoon v1.5 (8B) is a pretrained Thai large language model with 8 billion parameters based on Llama 3 8B. ([blog](https://blog.opentyphoon.ai/typhoon-1-5-release-a9364cb8e8d7))
-    creator_organization_name: SCB10X
-    access: open
-    num_parameters: 8000000000
-    release_date: 2024-05-08
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: scb10x/typhoon-v1.5-8b-instruct
-    display_name: Typhoon v1.5 Instruct (8B)
-    description: Typhoon v1.5 Instruct (8B) is a pretrained Thai large language model with 8 billion parameters based on Llama 3 8B. ([blog](https://blog.opentyphoon.ai/typhoon-1-5-release-a9364cb8e8d7))
-    creator_organization_name: SCB10X
-    access: open
-    num_parameters: 8000000000
-    release_date: 2024-05-08
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: scb10x/typhoon-v1.5-72b
-    display_name: Typhoon v1.5 (72B)
-    description: Typhoon v1.5 (72B) is a pretrained Thai large language model with 72 billion parameters based on Qwen1.5-72B. ([blog](https://blog.opentyphoon.ai/typhoon-1-5-release-a9364cb8e8d7))
-    creator_organization_name: SCB10X
-    access: open
-    num_parameters: 72000000000
-    release_date: 2024-05-08
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: scb10x/typhoon-v1.5-72b-instruct
-    display_name: Typhoon v1.5 Instruct (72B)
-    description: Typhoon v1.5 Instruct (72B) is a pretrained Thai large language model with 72 billion parameters based on Qwen1.5-72B. ([blog](https://blog.opentyphoon.ai/typhoon-1-5-release-a9364cb8e8d7))
-    creator_organization_name: SCB10X
-    access: open
-    num_parameters: 72000000000
-    release_date: 2024-05-08
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: scb10x/llama-3-typhoon-v1.5x-8b-instruct
-    display_name: Typhoon 1.5X instruct (8B)
-    description: Llama-3-Typhoon-1.5X-8B-instruct is a 8 billion parameter instruct model designed for the Thai language based on Llama 3 Instruct. It utilizes the task-arithmetic model editing technique. ([blog](https://blog.opentyphoon.ai/typhoon-1-5x-our-experiment-designed-for-application-use-cases-7b85d9e9845c))
-    creator_organization_name: SCB10X
-    access: open
-    num_parameters: 8000000000
-    release_date: 2024-05-29
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: scb10x/llama-3-typhoon-v1.5x-70b-instruct
-    display_name: Typhoon 1.5X instruct (70B)
-    description: Llama-3-Typhoon-1.5X-70B-instruct is a 70 billion parameter instruct model designed for the Thai language based on Llama 3 Instruct. It utilizes the task-arithmetic model editing technique. ([blog](https://blog.opentyphoon.ai/typhoon-1-5x-our-experiment-designed-for-application-use-cases-7b85d9e9845c))
-    creator_organization_name: SCB10X
-    access: open
-    num_parameters: 70000000000
-    release_date: 2024-05-29
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  # Alibaba DAMO Academy
-  - name: damo/seallm-7b-v2
-    display_name: SeaLLM v2 (7B)
-    description: SeaLLM v2 is a multilingual LLM for Southeast Asian (SEA) languages trained from Mistral (7B). ([website](https://damo-nlp-sg.github.io/SeaLLMs/))
-    creator_organization_name: Alibaba DAMO Academy
-    access: open
-    num_parameters: 7000000000
-    release_date: 2024-02-02
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: damo/seallm-7b-v2.5
-    display_name: SeaLLM v2.5 (7B)
-    description: SeaLLM is a multilingual LLM for Southeast Asian (SEA) languages trained from Gemma (7B). ([website](https://damo-nlp-sg.github.io/SeaLLMs/))
-    creator_organization_name: Alibaba DAMO Academy
-    access: open
-    num_parameters: 7000000000
-    release_date: 2024-04-12
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  # Snowflake
-  - name: snowflake/snowflake-arctic-instruct
-    display_name: Arctic Instruct
-    description: Arctic combines a 10B dense transformer model with a residual 128x3.66B MoE MLP resulting in 480B total and 17B active parameters chosen using a top-2 gating.
-    creator_organization_name: Snowflake
-    access: open
-    num_parameters: 482000000000
-    release_date: 2024-04-24
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-
-  # Stability AI
-  - name: stabilityai/stablelm-base-alpha-3b
-    display_name: StableLM-Base-Alpha (3B)
-    description: StableLM-Base-Alpha is a suite of 3B and 7B parameter decoder-only language models pre-trained on a diverse collection of English datasets with a sequence length of 4096 to push beyond the context window limitations of existing open-source language models.
-    creator_organization_name: Stability AI
-    access: open
-    num_parameters: 3000000000
-    release_date: 2023-04-20
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: stabilityai/stablelm-base-alpha-7b
-    display_name: StableLM-Base-Alpha (7B)
-    description: StableLM-Base-Alpha is a suite of 3B and 7B parameter decoder-only language models pre-trained on a diverse collection of English datasets with a sequence length of 4096 to push beyond the context window limitations of existing open-source language models.
-    creator_organization_name: Stability AI
-    access: open
-    num_parameters: 7000000000
-    release_date: 2023-04-20
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  # Stanford
-  - name: stanford/alpaca-7b
-    display_name: Alpaca (7B)
-    description: Alpaca 7B is a model fine-tuned from the LLaMA 7B model on 52K instruction-following demonstrations
-    creator_organization_name: Stanford
-    access: open
-    num_parameters: 7000000000
-    release_date: 2023-03-13
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-
-
-  # TII UAE
-  - name: tiiuae/falcon-7b
-    display_name: Falcon (7B)
-    description: Falcon-7B is a 7B parameters causal decoder-only model built by TII and trained on 1,500B tokens of RefinedWeb enhanced with curated corpora.
-    creator_organization_name: TII UAE
-    access: open
-    num_parameters: 7000000000
-    release_date: 2023-03-15
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: tiiuae/falcon-7b-instruct
-    display_name: Falcon-Instruct (7B)
-    description: Falcon-7B-Instruct is a 7B parameters causal decoder-only model built by TII based on Falcon-7B and finetuned on a mixture of chat/instruct datasets.
-    creator_organization_name: TII UAE
-    access: open
-    num_parameters: 7000000000
-    release_date: 2023-03-15
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: tiiuae/falcon-40b
-    display_name: Falcon (40B)
-    description: Falcon-40B is a 40B parameters causal decoder-only model built by TII and trained on 1,500B tokens of RefinedWeb enhanced with curated corpora.
-    creator_organization_name: TII UAE
-    access: open
-    num_parameters: 40000000000
-    release_date: 2023-05-25
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: tiiuae/falcon-40b-instruct
-    display_name: Falcon-Instruct (40B)
-    description: Falcon-40B-Instruct is a 40B parameters causal decoder-only model built by TII based on Falcon-7B and finetuned on a mixture of chat/instruct datasets.
-    creator_organization_name: TII UAE
-    access: open
-    num_parameters: 40000000000
-    release_date: 2023-05-25
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-
-
-  # Together
-  - name: together/gpt-jt-6b-v1
-    display_name: GPT-JT (6B)
-    description: GPT-JT (6B parameters) is a fork of GPT-J ([blog post](https://www.together.xyz/blog/releasing-v1-of-gpt-jt-powered-by-open-source-ai)).
-    creator_organization_name: Together
-    access: open
-    num_parameters: 6700000000
-    release_date: 2022-11-29
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: together/gpt-neoxt-chat-base-20b
-    display_name: GPT-NeoXT-Chat-Base (20B)
-    description: GPT-NeoXT-Chat-Base (20B) is fine-tuned from GPT-NeoX, serving as a base model for developing open-source chatbots.
-    creator_organization_name: Together
-    access: open
-    num_parameters: 20000000000
-    release_date: 2023-03-08
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, CHATML_MODEL_TAG]
-
-  - name: together/redpajama-incite-base-3b-v1
-    display_name: RedPajama-INCITE-Base-v1 (3B)
-    description: RedPajama-INCITE-Base-v1 (3B parameters) is a 3 billion base model that aims to replicate the LLaMA recipe as closely as possible.
-    creator_organization_name: Together
-    access: open
-    num_parameters: 3000000000
-    release_date: 2023-05-05
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: together/redpajama-incite-instruct-3b-v1
-    display_name: RedPajama-INCITE-Instruct-v1 (3B)
-    description: RedPajama-INCITE-Instruct-v1 (3B parameters) is a model fine-tuned for few-shot applications on the data of GPT-JT. It is built from RedPajama-INCITE-Base-v1 (3B), a 3 billion base model that aims to replicate the LLaMA recipe as closely as possible.
-    creator_organization_name: Together
-    access: open
-    num_parameters: 3000000000
-    release_date: 2023-05-05
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: together/redpajama-incite-chat-3b-v1 # NOT SUPPORTED
-    display_name: RedPajama-INCITE-Chat-v1 (3B)
-    description: RedPajama-INCITE-Chat-v1 (3B parameters) is a model fine-tuned on OASST1 and Dolly2 to enhance chatting ability. It is built from RedPajama-INCITE-Base-v1 (3B), a 3 billion base model that aims to replicate the LLaMA recipe as closely as possible.
-    creator_organization_name: Together
-    access: open
-    num_parameters: 3000000000
-    release_date: 2023-05-05
-    tags: [UNSUPPORTED_MODEL_TAG]
-
-  - name: together/redpajama-incite-base-7b
-    display_name: RedPajama-INCITE-Base (7B)
-    description: RedPajama-INCITE-Base (7B parameters) is a 7 billion base model that aims to replicate the LLaMA recipe as closely as possible.
-    creator_organization_name: Together
-    access: open
-    num_parameters: 7000000000
-    release_date: 2023-05-05
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: together/redpajama-incite-instruct-7b
-    display_name: RedPajama-INCITE-Instruct (7B)
-    description: RedPajama-INCITE-Instruct (7B parameters) is a model fine-tuned for few-shot applications on the data of GPT-JT. It is built from RedPajama-INCITE-Base (7B), a 7 billion base model that aims to replicate the LLaMA recipe as closely as possible.
-    creator_organization_name: Together
-    access: open
-    num_parameters: 7000000000
-    release_date: 2023-05-05
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-
-
-  # Tsinghua
-
-  - name: thudm/cogview2
-    display_name: CogView2 (6B)
-    description: CogView2 is a hierarchical transformer (6B-9B-9B parameters) for text-to-image generation that supports both English and Chinese input text ([paper](https://arxiv.org/abs/2105.13290))
-    creator_organization_name: Tsinghua
-    access: open
-    num_parameters: 6000000000
-    release_date: 2022-06-15
-    tags: [TEXT_TO_IMAGE_MODEL_TAG]
-
-  - name: tsinghua/glm
-    display_name: GLM (130B)
-    description: GLM (130B parameters) is an open bilingual (English & Chinese) bidirectional dense model that was trained using General Language Model (GLM) procedure ([paper](https://arxiv.org/pdf/2210.02414.pdf)).
-    creator_organization_name: Tsinghua
-    access: open
-    num_parameters: 130000000000
-    release_date: 2022-08-04
-    # Inference with echo=True is not feasible -- in the prompt encoding phase, they use
-    # bidirectional attention and do not perform predictions on them.
-    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, NO_NEWLINES_TAG]
-
-  - name: tsinghua/codegeex # NOT SUPPORTED
-    display_name: CodeGeeX (13B)
-    description: CodeGeeX (13B parameters) is an open dense code model trained on more than 20 programming languages on a corpus of more than 850B tokens ([blog](http://keg.cs.tsinghua.edu.cn/codegeex/)).
-    creator_organization_name: Tsinghua
-    access: open
-    num_parameters: 13000000000
-    release_date: 2022-09-19
-    tags: [UNSUPPORTED_MODEL_TAG]
-
-  # Upstage
-  - name: upstage/solar-pro-preview-instruct
-    display_name: Solar Pro Preview (22B)
-    description: Solar Pro Preview (22B) is open-weights model for single GPU inference that is a preview of the upcoming Solar Pro model ([blog](https://www.upstage.ai/products/solar-pro-preview)).
-    creator_organization_name: Upstage
-    access: open
-    num_parameters: 22000000000
-    release_date: 2024-09-11
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: upstage/solar-pro-241126
-    display_name: Solar Pro
-    display_name: Solar Pro
-    description: Solar Pro is a LLM designed for instruction-following and processing structured formats like HTML and Markdown. It supports English, Korean, and Japanese and has domain expertise in Finance, Healthcare, and Legal. ([blog](https://www.upstage.ai/blog/press/solar-pro-aws)).
-    creator_organization_name: Upstage
-    access: limited
-    num_parameters: 22000000000
-    release_date: 2024-11-26
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  # Writer
-  - name: writer/palmyra-base
-    display_name: Palmyra Base (5B)
-    description: Palmyra Base (5B)
-    creator_organization_name: Writer
-    access: limited
-    num_parameters: 5000000000
-    release_date: 2022-10-13
-    # Does not support echo
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: writer/palmyra-large
-    display_name: Palmyra Large (20B)
-    description: Palmyra Large (20B)
-    creator_organization_name: Writer
-    access: limited
-    num_parameters: 20000000000
-    release_date: 2022-12-23
-    # Does not support echo
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: writer/palmyra-instruct-30
-    display_name: InstructPalmyra (30B)
-    description: InstructPalmyra (30B parameters) is trained using reinforcement learning techniques based on feedback from humans.
-    creator_organization_name: Writer
-    access: limited
-    num_parameters: 30000000000
-    release_date: 2023-02-16
-    # Does not support echo
-    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: writer/palmyra-e
-    display_name: Palmyra E (30B)
-    description: Palmyra E (30B)
-    creator_organization_name: Writer
-    access: limited
-    num_parameters: 30000000000
-    release_date: 2023-03-03
-    # Does not support echo
-    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: writer/silk-road
-    display_name: Silk Road (35B)
-    description: Silk Road (35B)
-    creator_organization_name: Writer
-    access: limited
-    num_parameters: 35000000000
-    release_date: 2023-04-13
-    # Does not support echo
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: writer/palmyra-x
-    display_name: Palmyra X (43B)
-    description: Palmyra-X (43B parameters) is trained to adhere to instructions using human feedback and utilizes a technique called multiquery attention. Furthermore, a new feature called 'self-instruct' has been introduced, which includes the implementation of an early stopping criteria specifically designed for minimal instruction tuning ([paper](https://dev.writer.com/docs/becoming-self-instruct-introducing-early-stopping-criteria-for-minimal-instruct-tuning)).
-    creator_organization_name: Writer
-    access: limited
-    num_parameters: 43000000000
-    release_date: 2023-06-11
-    # Does not support echo
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: writer/palmyra-x-v2
-    display_name: Palmyra X V2 (33B)
-    description: Palmyra-X V2 (33B parameters) is a Transformer-based model, which is trained on extremely large-scale pre-training data. The pre-training data more than 2 trillion tokens types are diverse and cover a wide range of areas, used FlashAttention-2.
-    creator_organization_name: Writer
-    access: limited
-    num_parameters: 33000000000
-    release_date: 2023-12-01
-    # Does not support echo
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: writer/palmyra-x-v3
-    display_name: Palmyra X V3 (72B)
-    description: Palmyra-X V3 (72B parameters) is a Transformer-based model, which is trained on extremely large-scale pre-training data. It is trained via unsupervised learning and DPO and use multiquery attention.
-    creator_organization_name: Writer
-    access: limited
-    num_parameters: 72000000000
-    release_date: 2023-12-01
-    # Does not support echo
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: writer/palmyra-x-32k
-    display_name: Palmyra X-32K (33B)
-    description: Palmyra-X-32K (33B parameters) is a Transformer-based model, which is trained on large-scale pre-training data. The pre-training data types are diverse and cover a wide range of areas. These data types are used in conjunction and the alignment mechanism to extend context window.
-    creator_organization_name: Writer
-    access: limited
-    num_parameters: 33000000000
-    release_date: 2023-12-01
-    # Does not support echo
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: writer/palmyra-vision-003
-    display_name: Palmyra Vision 003
-    description:  Palmyra Vision 003 (internal only)
-    creator_organization_name: Writer
-    access: limited
-    num_parameters: 5000000000
-    release_date: 2024-05-24
-    # Does not support echo
-    tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
-
-  - name: writer/palmyra-x-004
-    display_name: Palmyra-X-004
-    description: Palmyra-X-004 language model with a large context window of up to 128,000 tokens that excels in processing and understanding complex tasks.
-    creator_organization_name: Writer
-    access: limited
-    release_date: 2024-09-12
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: writer/palmyra-x5
-    display_name: Palmyra X5
-    description: Palmyra X5 is a language model for enterprise that uses a Mixture of Experts (MoE) architecture and a hybrid attention mechanism that blends linear and softmax attention. ([blog](https://writer.com/engineering/long-context-palmyra-x5/))
-    creator_organization_name: Writer
-    access: limited
-    release_date: 2024-04-28
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: writer/palmyra-med-32k
-    display_name: Palmyra-Med 32K (70B)
-    description: Palmyra-Med 32K (70B) is a model finetuned from Palmyra-X-003 intended for medical applications.
-    creator_organization_name: Writer
-    access: open
-    num_parameters: 70600000000
-    release_date: 2024-07-31
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: writer/palmyra-med
-    display_name: Palmyra Med
-    description: Palmyra Med is a model intended for medical applications.
-    creator_organization_name: Writer
-    access: open
-    release_date: 2024-07-31
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: writer/palmyra-fin-32k
-    display_name: Palmyra-Fin 32K (70B)
-    description: Palmyra-Fin 32K (70B) is a model finetuned from Palmyra-X-003 intended for financial applications.
-    creator_organization_name: Writer
-    access: open
-    num_parameters: 70600000000
-    release_date: 2024-07-31
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: writer/palmyra-fin
-    display_name: Palmyra Fin
-    description: Palmyra Fin is a financial LLM built using combining a well-curated set of financial training data with custom fine-tuning instruction data([blog](https://writer.com/blog/palmyra-med-fin-models/)).
-    creator_organization_name: Writer
-    access: limited
-    release_date: 2024-07-31
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  # xAI
-
-  - name: xai/grok-3-beta
-    display_name: Grok 3 Beta
-    description: Grok 3 Beta is a model trained on xAI's Colossus supercluster with significant improvements in reasoning, mathematics, coding, world knowledge, and instruction-following tasks. ([blog](https://x.ai/news/grok-3))
-    creator_organization_name: xAI
-    access: limited
-    release_date: 2025-04-03  # https://docs.x.ai/docs/release-notes#april-2025
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: xai/grok-3-mini-beta
-    display_name: Grok 3 mini Beta
-    description: Grok 3 mini Beta is a model trained on xAI's Colossus supercluster with significant improvements in reasoning, mathematics, coding, world knowledge, and instruction-following tasks. ([blog](https://x.ai/news/grok-3))
-    creator_organization_name: xAI
-    access: limited
-    release_date: 2025-04-03  # https://docs.x.ai/docs/release-notes#april-2025
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: xai/grok-4-0709
-    display_name: Grok 4 (0709)
-    description: Grok 4 (0709) is a model that includes native tool use and real-time search integration. ([blog](https://x.ai/news/grok-4))
-    creator_organization_name: xAI
-    access: limited
-    release_date: 2025-07-09
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  # Yandex
-  - name: yandex/yalm
-    display_name: YaLM (100B)
-    description: YaLM (100B parameters) is an autoregressive language model trained on English and Russian text ([GitHub](https://github.com/yandex/YaLM-100B)).
-    creator_organization_name: Yandex
-    access: open
-    num_parameters: 100000000000
-    release_date: 2022-06-23
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG]
-
-  # Reka
-  - name: reka/reka-core
-    display_name: Reka-Core
-    description: Reka-Core
-    creator_organization_name: Reka AI
-    access: limited
-    release_date: 2024-04-18
-    tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: reka/reka-core-20240415
-    display_name: Reka-Core-20240415
-    description: Reka-Core-20240415
-    creator_organization_name: Reka AI
-    access: limited
-    release_date: 2024-04-18
-    tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-  
-  - name: reka/reka-core-20240501
-    display_name: Reka-Core-20240501
-    description: Reka-Core-20240501
-    creator_organization_name: Reka AI
-    access: limited
-    release_date: 2024-05-01
-    tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: reka/reka-flash
-    display_name: Reka-Flash (21B)
-    description: Reka-Flash (21B)
-    creator_organization_name: Reka AI
-    access: limited
-    num_parameters: 21000000000
-    release_date: 2024-04-18
-    tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: reka/reka-flash-20240226
-    display_name: Reka-Flash-20240226 (21B)
-    description: Reka-Flash-20240226 (21B)
-    creator_organization_name: Reka AI
-    access: limited
-    num_parameters: 21000000000
-    release_date: 2024-04-18
-    tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: reka/reka-edge
-    display_name: Reka-Edge (7B)
-    description: Reka-Edge (7B)
-    creator_organization_name: Reka AI
-    access: limited
-    num_parameters: 7000000000
-    release_date: 2024-04-18
-    tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: reka/reka-edge-20240208
-    display_name: Reka-Edge-20240208 (7B)
-    description: Reka-Edge-20240208 (7B)
-    creator_organization_name: Reka AI
-    access: limited
-    num_parameters: 7000000000
-    release_date: 2024-04-18
-    tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
-  
-# Diva Llama
-  - name: stanford/diva-llama
-    display_name: Diva Llama 3 (8B)
-    description: Diva Llama 3 is an end-to-end Voice Assistant Model which can handle speech and text as inputs. It was trained using distillation loss. ([paper](https://arxiv.org/abs/2410.02678))
-    creator_organization_name: Stanford
-    access: open
-    num_parameters: 8000000000
-    release_date: 2024-10-03
-    tags: [AUDIO_LANGUAGE_MODEL_TAG]
-
-
-# LLaMA-Omni
-  - name: ictnlp/llama-3.1-8b-omni
-    display_name: LLaMA-Omni (8B)
-    description: The audio-visual multimodal version of the LLaMA 3.1 model ([paper](https://arxiv.org/abs/2409.06666)).
-    creator_organization_name: ICTNLP
-    access: open
-    num_parameters: 8000000000
-    release_date: 2024-09-10
-    tags: [AUDIO_LANGUAGE_MODEL_TAG]
-
-
-# Maritaca AI
-  - name: maritaca-ai/sabia-7b
-    display_name: Sabia 7B
-    description: Sabia 7B
-    creator_organization_name: MARITACA-AI
-    access: open
-    num_parameters: 6740000000
-    release_date: 2023-11-08
-    tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: maritaca-ai/sabiazinho-3
-    display_name: Sabiazinho 3
-    description: Sabiazinho-3 is a decoder-only language model designed for Portuguese text generation and understanding tasks. It supports a long context window of up to 128,000 tokens and is offered via API with scalable rate limits. The model is trained on diverse Portuguese corpora with knowledge up to july 2023.
-    creator_organization_name: Maritaca AI
-    access: limited
-    release_date: 2025-02-06
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: maritaca-ai/sabia-3
-    display_name: Sabía 3
-    description: Sabiá-3 is a decoder-only language model designed for Portuguese text generation and understanding tasks. It supports a long context window of up to 128,000 tokens and is offered via API with scalable rate limits. The model is trained on diverse Portuguese corpora with knowledge up to july 2023.
-    creator_organization_name: Maritaca AI
-    access: limited
-    release_date: 2024-12-11
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: maritaca-ai/sabia-3.1-2025-05-08
-    display_name: Sabía 3.1
-    description: Sabiá-3.1 is a decoder-only language model designed for Portuguese text generation and understanding tasks. It supports a long context window of up to 128,000 tokens and is offered via API with scalable rate limits. The model is trained on diverse Portuguese corpora with knowledge up to August 2024.
-    creator_organization_name: Maritaca AI
-    access: limited
-    release_date: 2025-05-08
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG] 
-
-  # Z.ai
-
-  - name: zai-org/glm-4.5-air-fp8
-    display_name: GLM-4.5-Air-FP8
-    description: GLM-4.5-Air-FP8 is a hybrid reasoning model designed to unify reasoning, coding, and agentic capabilities into a single model. It has 106 billion total parameters and 12 billion active parameters. The thinking mode is enabled by default. ([blog](https://z.ai/blog/glm-4.5))
-    creator_organization_name: Z.ai
-    access: open
-    num_parameters: 110000000000
-    release_date: 2025-07-28
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-
-# Granite - IBM
-# https://www.ibm.com/granite
-# https://github.com/ibm-granite/granite-3.0-language-models
-
-  - name: ibm-granite/granite-3.0-2b-base
-    display_name: Granite 3.0 base (2B)
-    description: Granite-3.0-2B-Base is a decoder-only language model to support a variety of text-to-text generation tasks.
-    creator_organization_name: IBM
-    access: open
-    num_parameters: 2530000000
-    release: 2024-10-21
-    tags: [TEXT_MODEL_TAG]
-
-  - name: ibm-granite/granite-3.0-2b-instruct
-    display_name: Granite 3.0 Instruct (2B)
-    description:  Granite-3.0-2B-Instruct is a 2B parameter model finetuned from Granite-3.0-2B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets. 
-    creator_organization_name: IBM
-    access: open
-    num_parameters: 2630000000
-    release: 2024-10-21
-    tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: ibm-granite/granite-3.0-8b-instruct
-    display_name: Granite 3.0 instruct (8B)
-    description:  Granite-3.0-8B-Instruct is a 8B parameter model finetuned from Granite-3.0-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets.
-    creator_organization_name: IBM
-    access: open
-    num_parameters: 8170000000
-    release: 2024-10-21
-    tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: ibm-granite/granite-3.0-8b-base
-    display_name: Granite 3.0 base (8B)
-    description: Granite-3.0-8B-Base is a decoder-only language model to support a variety of text-to-text generation tasks.
-    creator_organization_name: IBM 
-    access: open
-    num_parameters: 8170000000
-    release: 2024-10-21
-    tags: [TEXT_MODEL_TAG]
-
-  - name: ibm-granite/granite-3.0-3b-a800m-instruct
-    display_name: Granite 3.0 A800M instruct (3B)
-    description: Granite-3.0-3B-A800M-Instruct is a 3B parameter model finetuned from Granite-3.0-3B-A800M-Base-4K using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets.
-    creator_organization_name: IBM
-    access: open
-    num_parameters: 3370000000
-    release: 2024-10-21
-    tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: ibm-granite/granite-3.0-3b-a800m-base
-    display_name: Granite 3.0 A800M base (3B)
-    description: Granite-3.0-3B-A800M-Base is a decoder-only language model to support a variety of text-to-text generation tasks.
-    creator_organization_name: IBM
-    access: open
-    num_parameters: 3370000000
-    release: 2024-10-21
-    tags: [TEXT_MODEL_TAG]
-
-  - name: ibm-granite/granite-3.0-1b-a400m-instruct
-    display_name: Granite 3.0 A400M instruct (1B)
-    description: Granite-3.0-1B-A400M-Instruct is an 1B parameter model finetuned from Granite-3.0-1B-A400M-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets.
-    creator_organization_name: IBM
-    access: open
-    num_parameters: 1330000000
-    release: 2024-10-21
-    tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: ibm-granite/granite-3.0-1b-a400m-base
-    display_name: Granite 3.0 A400M base (1B)
-    description: Granite-3.0-1B-A400M-Base is a decoder-only language model to support a variety of text-to-text generation tasks. It is trained from scratch following a two-stage training strategy.
-    creator_organization_name: IBM
-    access: open
-    num_parameters: 1380000000
-    release: 2024-10-21
-    tags: [TEXT_MODEL_TAG]
-   
-  - name: ibm-granite/granite-3.1-8b-base
-    display_name: Granite 3.1 - 8B - Base
-    description: Granite-3.1-8B-Base extends the context length of Granite-3.0-8B-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
-    creator_organization_name: IBM-GRANITE
-    access: open
-    num_parameters: 8170000000
-    release_date: 2024-12-18
-    tags: [TEXT_MODEL_TAG]
-
-  - name: ibm-granite/granite-3.1-8b-instruct
-    display_name: Granite 3.1 - 8B - Instruct
-    description: Granite-3.1-8B-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
-    creator_organization_name: IBM
-    access: open
-    num_parameters: 8170000000
-    release_date: 2024-12-18
-    tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: ibm-granite/granite-3.1-2b-instruct
-    display_name: Granite 3.1 - 2B - Instruct
-    description: Granite-3.1-2B-Instruct is a 2B parameter long-context instruct model finetuned from Granite-3.1-2B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
-    creator_organization_name: IBM
-    access: open
-    num_parameters: 2530000000
-    release_date: 2024-12-18
-    tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: ibm-granite/granite-3.1-2b-base
-    display_name: Granite 3.1 - 2B - Base
-    description: Granite-3.1-2B-Base extends the context length of Granite-3.0-2B-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
-    creator_organization_name: IBM-GRANITE
-    access: open
-    num_parameters: 2530000000
-    release_date: 2024-12-18
-    tags: [TEXT_MODEL_TAG]
-
-  - name: ibm-granite/granite-3.1-3b-a800m-instruct
-    display_name: Granite 3.1 - 3B - A800M - Instruct
-    description: Granite-3.1-3B-A800M-Instruct is a 3B parameter long-context instruct model finetuned from Granite-3.1-3B-A800M-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
-    creator_organization_name: IBM-GRANITE
-    access: open
-    num_parameters: 3300000000
-    release_date: 2024-12-18
-    tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: ibm-granite/granite-3.1-3b-a800m-base
-    display_name: Granite 3.1 - 3B - A800M - Base
-    description: Granite-3.1-3B-A800M-Base extends the context length of Granite-3.0-3B-A800M-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
-    creator_organization_name: IBM-GRANITE
-    access: open
-    num_parameters: 3300000000
-    release_date: 2024-12-18
-    tags: [TEXT_MODEL_TAG]
-
-  - name: ibm-granite/granite-3.1-1b-a400m-instruct
-    display_name: Granite 3.1 - 1B - A400M - Instruct
-    description: Granite-3.1-1B-A400M-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-1B-A400M-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
-    creator_organization_name: IBM-GRANITE
-    access: open
-    num_parameters: 1330000000
-    release_date: 2024-12-18
-    tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: ibm-granite/granite-3.1-1b-a400m-base
-    display_name: Granite 3.1 - 1B - A400M - Base
-    description: Granite-3.1-1B-A400M-Base extends the context length of Granite-3.0-1B-A400M-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
-    creator_organization_name: IBM-GRANITE
-    access: open
-    num_parameters: 1330000000
-    release_date: 2024-12-18
-    tags: [TEXT_MODEL_TAG]
-
-  - name: ibm/granite-13b-instruct-v2
-    display_name: Granite 13b instruct v2
-    description: Granite Base (13B) Instruct V2.0 is a large decoder-only transformer model.The following features were used in the design of the model Decoder-only model
-    creator_organization_name: IBM
-    access: limited
-    num_parameters: 13000000000
-    release: 2023-11-30
-    tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
-
-  - name: ibm/granite-20b-code-instruct-8k
-    display_name: Granite 20b code instruct (8K)
-    description: Granite-20B-Code-Base-8K is a decoder-only code model designed for code generative tasks (e.g., code generation, code explanation, code fixing, etc.). It is trained from scratch with a two-phase training strategy. In phase 1, our model is trained on 3 trillion tokens sourced from 116 programming languages, ensuring a comprehensive understanding of programming languages and syntax. In phase 2, our model is trained on 500 billion tokens with a carefully designed mixture of high-quality data from code and natural language domains to improve the models’ ability to reason and follow instructions.
-    creator_organization_name: IBM
-    access: limited
-    num_parameters: 20000000000
-    release: 2024-18-4
-    tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
-
-  - name: ibm/granite-34b-code-instruct
-    display_name: Granite 34b code instruct
-    description: Granite Base (34B) Code Instruct is a 34B parameter model fine tuned from Granite-34B-Code-Base on a combination of permissively licensed instruction data to enhance instruction following capabilities including logical reasoning and problem-solving skills.
-    creator_organization_name: IBM
-    access: open
-    num_parameters: 34000000000
-    release: 2024-6-5
-    tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
-
-
-  - name: ibm/granite-3b-code-instruct
-    display_name: Granite 3b code instruct
-    description: Granite-3B-Code-Instruct-128K is a 3B parameter long-context instruct model fine tuned from Granite-3B-Code-Base-128K on a combination of permissively licensed data used in training the original Granite code instruct models, in addition to synthetically generated code instruction datasets tailored for solving long context problems. By exposing the model to both short and long context data, we aim to enhance its long-context capability without sacrificing code generation performance at short input context.
-    creator_organization_name: IBM
-    access: open
-    num_parameters: 3000000000
-    release: 2024-6-18
-    tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
-
-  - name: ibm/granite-8b-code-instruct
-    display_name: Granite 8b code instruct
-    description: Granite-8B-Code-Instruct-128K is a 8B parameter long-context instruct model fine tuned from Granite-8B-Code-Base-128K on a combination of permissively licensed data used in training the original Granite code instruct models, in addition to synthetically generated code instruction datasets tailored for solving long context problems. By exposing the model to both short and long context data, we aim to enhance its long-context capability without sacrificing code generation performance at short input context.
-    creator_organization_name: IBM
-    access: open
-    num_parameters: 8000000000
-    release: 2024-6-18
-    tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
-
-  - name: ibm/granite-3.1-8b-instruct
-    display_name: Granite 3.1 - 8B - Instruct
-    description: Granite-3.1-8B-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
-    creator_organization_name: IBM
-    access: open
-    num_parameters: 8170000000
-    release_date: 2024-12-18
-    tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
-
-  - name: ibm/granite-3.1-2b-instruct
-    display_name: Granite 3.1 - 2B - Instruct
-    description: Granite-3.1-2B-Instruct is a 2B parameter long-context instruct model finetuned from Granite-3.1-2B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
-    creator_organization_name: IBM
-    access: open
-    num_parameters: 2530000000
-    release_date: 2024-12-18
-    tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
-
-  - name: ibm/granite-3.3-8b-instruct
-    display_name: IBM Granite 3.3 8B Instruct
-    description: IBM Granite 3.3 8B Instruct is an 8-billion parameter 128K context length language model fine-tuned for improved reasoning and instruction-following capabilities. ([model card](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct))
-    creator_organization_name: IBM
-    access: open
-    num_parameters: 8170000000
-    release_date: 2025-04-16
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: ibm/granite-3.3-8b-instruct-with-guardian
-    display_name: IBM Granite 3.3 8B Instruct (with guardian)
-    description: IBM Granite 3.3 8B Instruct is an 8-billion parameter 128K context length language model fine-tuned for improved reasoning and instruction-following capabilities. All prompts were first evaluated for risk by [IBM Granite Guardian 3.2 5B](https://www.ibm.com/granite/docs/models/guardian/) and prompts that were deemed risky (with a risk threshold of 0.8) received the response "I'm very sorry, but I can't assist with that.". ([model card](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct))
-    creator_organization_name: IBM
-    access: open
-    num_parameters: 8170000000
-    release_date: 2025-04-16
-    # Unfortunately this setup is not easily reproducible, so we mark it with DEPRECATED_MODEL_TAG
-    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: ura-hcmut/ura-llama-2.1-8b
-    display_name: URA-Llama 2.1 (8B)
-    description: URA-Llama 2.1 (8B) is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
-    creator_organization_name: URA
-    access: open
-    num_parameters: 8000000000
-    release_date: 2024-08-04
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: ura-hcmut/ura-llama-2-8b
-    display_name: URA-Llama 2 (8B)
-    description: URA-Llama 2 (8B) is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
-    creator_organization_name: URA
-    access: open
-    num_parameters: 8000000000
-    release_date: 2024-08-04
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: ura-hcmut/ura-llama-7b
-    display_name: URA-Llama 7B (7B)
-    description: URA-Llama 7B (7B) is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
-    creator_organization_name: URA
-    access: open
-    num_parameters: 7000000000
-    release_date: 2023-10-10
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: ura-hcmut/ura-llama-13b
-    display_name: URA-Llama 13B (13B)
-    description: URA-Llama 13B (13B) is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
-    creator_organization_name: URA
-    access: open
-    num_parameters: 13000000000
-    release_date: 2023-10-10
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: ura-hcmut/ura-llama-70b
-    display_name: URA-Llama 70B (70B)
-    description: URA-Llama 70B (70B) is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
-    creator_organization_name: URA
-    access: open
-    num_parameters: 70000000000
-    release_date: 2023-10-10
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: ura-hcmut/GemSUra-7B
-    display_name: GemSUra 7B
-    description: GemSUra 7B is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
-    creator_organization_name: URA
-    access: open
-    num_parameters: 7000000000
-    release_date: 2024-03-12
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: ura-hcmut/GemSUra-2B
-    display_name: GemSUra 2B
-    description: GemSUra 2B is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
-    creator_organization_name: URA
-    access: open
-    num_parameters: 2000000000
-    release_date: 2024-03-12
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: ura-hcmut/MixSUra
-    display_name: MixSUra
-    description: MixSUra is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text. It is a mixture of experts model with 8 active experts.
-    creator_organization_name: URA
-    access: open
-    num_parameters: 46700000000
-    release_date: 2024-03-12
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: vilm/vinallama-7b-chat
-    display_name: VinaLLaMa
-    description: VinaLLaMa is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
-    creator_organization_name: ViLM
-    access: open
-    num_parameters: 7000000000
-    release_date: 2024-03-12
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: vilm/vinallama-2.7b-chat
-    display_name: VinaLLaMa 2.7B
-    description: VinaLLaMa 2.7B is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
-    creator_organization_name: ViLM
-    access: open
-    num_parameters: 2700000000
-    release_date: 2024-03-12
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: vilm/vietcuna-7b-v3
-    display_name: VietCuna 7B (v3)
-    description: VietCuna 7B is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
-    creator_organization_name: ViLM
-    access: open
-    num_parameters: 7000000000
-    release_date: 2023-08-07
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: vilm/vietcuna-3b-v2
-    display_name: VietCuna 3B (v2)
-    description: VietCuna 3B is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
-    creator_organization_name: ViLM
-    access: open
-    num_parameters: 3000000000
-    release_date: 2023-07-15
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: vilm/Quyen-v0.1
-    display_name: Quyen (v0.1)
-    description: Quyen is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
-    creator_organization_name: ViLM
-    access: open
-    num_parameters: 4000000000
-    release_date: 2024-02-26
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: vilm/Quyen-Plus-v0.1
-    display_name: Quyen Plus (v0.1)
-    description: Quyen Plus is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
-    creator_organization_name: ViLM
-    access: open
-    num_parameters: 7000000000
-    release_date: 2024-02-26
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: vilm/Quyen-Pro-v0.1
-    display_name: Quyen Pro (v0.1)
-    description: Quyen Pro is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
-    creator_organization_name: ViLM
-    access: open
-    num_parameters: 14000000000
-    release_date: 2024-02-26
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: vilm/Quyen-Pro-Max-v0.1
-    display_name: Quyen Pro Max (v0.1)
-    description: Quyen Pro Max is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
-    creator_organization_name: ViLM
-    access: open
-    num_parameters: 72000000000
-    release_date: 2024-02-26
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: vilm/Quyen-Mini-v0.1
-    display_name: Quyen Mini (v0.1)
-    description: Quyen Mini is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
-    creator_organization_name: ViLM
-    access: open
-    num_parameters: 1800000000
-    release_date: 2024-02-26
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: vilm/Quyen-SE-v0.1
-    display_name: Quyen SE (v0.1)
-    description: Quyen SE is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
-    creator_organization_name: ViLM
-    access: open
-    num_parameters: 500000000
-    release_date: 2024-02-26
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: Viet-Mistral/Vistral-7B-Chat
-    display_name: Vistral 7B Chat
-    description: Vistral 7B Chat is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
-    creator_organization_name: Viet-Mistral
-    access: open
-    num_parameters: 7000000000
-    release_date: 2024-02-28
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: vinai/PhoGPT-7B5-Instruct
-    display_name: PhoGPT 7B5 Instruct
-    description: PhoGPT 7B5 Instruct is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
-    creator_organization_name: VinAI
-    access: open
-    num_parameters: 7500000000
-    release_date: 2024-02-19
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: vinai/PhoGPT-4B-Chat
-    display_name: PhoGPT 4B Chat
-    description: PhoGPT 4B Chat is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
-    creator_organization_name: VinAI
-    access: open
-    num_parameters: 4000000000
-    release_date: 2024-04-02
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: CEIA-UFG/Gemma-3-Gaia-PT-BR-4b-it
-    display_name: Gemma-3 Gaia PT-BR 4b Instruct
-    description: Gemma-3 Gaia PT-BR 4b Instruct is a model trained by CEIA-UFG for understanding and generating Brazilian Portuguese text.
-    creator_organization_name: CEIA-UFG
-    access: open
-    num_parameters: 4000000000
-    release_date: 2025-06-01
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: recogna-nlp/bode-13b-alpaca-pt-br-no-peft
-    display_name: Bode 13B Alpaca PT-BR
-    description: Bode is a language model (LLM) for Portuguese, based on LLaMA 2 and fine-tuned with the Alpaca dataset translated into Portuguese. Suitable for instruction, text generation, translation and tasks in Portuguese.
-    creator_organization_name: Recogna NLP
-    access: open
-    num_parameters: 13000000000
-    release_date: 2024-01-05
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: 22h/cabrita_7b_pt_850000
-    display_name: Cabrita PT-BR 7B
-    description: Cabrita is an OpenLLaMA-based model, continuously trained in Portuguese (mC4-pt subset) for 850000 steps with efficient tokenization adapted to the language.
-    creator_organization_name: 22h
-    access: open
-    num_parameters: 7000000000
-    release_date: 2023-08-23
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: PORTULAN/gervasio-7b-portuguese-ptbr-decoder
-    display_name: Gervásio PT-BR/PT-PT 7B Decoder
-    description: Gervásio PT* is a 7B parameter decoder model, adapted from LLaMA27B, trained for both Brazilian and European Portuguese. Fine-tuned with translated data from benchmarks such as GLUE and SuperGLUE.
-    creator_organization_name: PORTULAN (University of Lisbon NLX)
-    access: open
-    num_parameters: 6740000000
-    release_date: 2024-02-29
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-
-  - name: TucanoBR/Tucano-2b4
-    display_name: Tucano PT-BR 2b4
-    description: Tucano is a series of decoder models based on LLaMA2, natively pre-trained in Portuguese using the GigaVerbo dataset (200B tokens), with the 2B model trained for 1.96M steps over 845h (515B tokens, 4 epochs).
-    creator_organization_name: TucanoBR (University of Bonn)
-    access: open
-    num_parameters: 2444618240
-    release_date: 2024-12-11
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-    
-
-  - name: proxy_tuning/llama-70b-chat_none_none_1.0_logits_20
-    display_name: proxy_tuning/llama-70b-chat_none_none_1.0_logits_20
-    description: llama-70b-chat. 
-    creator_organization_name: Sasha Ronaghi
-    access: open
-    num_parameters: 70000000000
-    release_date: 2025-10-15
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-    
-  - name: proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20
-    display_name: proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20
-    description: mellama-70b-chat.
-    creator_organization_name: Sasha Ronaghi
-    access: open
-    num_parameters: 70000000000
-    release_date: 2025-10-15
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-    
-  - name: proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20
-    display_name: proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20
-    description: mellama-13b-chat. 
-    creator_organization_name: Sasha Ronaghi
-    access: open
-    num_parameters: 13000000000
-    release_date: 2025-10-15
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-
-  - name: proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20
-    display_name: proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20
-    description: Qwen3-30b. 
-    creator_organization_name: Sasha Ronaghi
-    access: open
-    num_parameters: 30000000000
-    release_date: 2025-10-15
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-
-  - name: proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20
-    display_name: proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20
-    description: Proxy tuned Qwen3-30b with mellama-13b-chat expert and llama-13b-base antiexpert. 
-    creator_organization_name: Sasha Ronaghi
-    access: open
-    num_parameters: 30000000000
-    release_date: 2025-10-15
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: proxy_tuning/qwen3-30b_mellama-13b-base_llama-13b-base_1.0_logprobs_20
-    display_name: proxy_tuning/qwen3-30b_mellama-13b-base_llama-13b-base_1.0_logprobs_20
-    description: Proxy tuned Qwen3-30b with mellama-13b-base expert and llama-13b-base antiexpert. 
-    creator_organization_name: Sasha Ronaghi
-    access: open
-    num_parameters: 30000000000
-    release_date: 2025-10-15
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: proxy_tuning/qwen3-30b_mellama-13b-chat_mellama-13b-base_1.0_logprobs_20
-    display_name: proxy_tuning/qwen3-30b_mellama-13b-chat_mellama-13b-base_1.0_logprobs_20
-    description: Proxy tuned Qwen3-30b with mellama-13b-chat expert and mellama-13b-base antiexpert. 
-    creator_organization_name: Sasha Ronaghi
-    access: open
-    num_parameters: 30000000000
-    release_date: 2025-10-15
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-    
-  - name: proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20
-    display_name: proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20
-    description: Proxy tuned Llama2-70b-chat with mellama-13b-chat expert and llama-13b-base antiexpert. 
-    creator_organization_name: Sasha Ronaghi
-    access: open
-    num_parameters: 70000000000
-    release_date: 2025-10-15
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
-
-  - name: proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20
-    display_name: proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20
-    description: Unite of Qwen3-30b with mellama-13b-chat expert. 
-    creator_organization_name: Sasha Ronaghi
-    access: open
-    num_parameters: 30000000000
-    release_date: 2025-10-15
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]

From 47d9aaac2d3633526fd51e4a520ef848ff1f6d77 Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Wed, 22 Oct 2025 12:11:09 -0700
Subject: [PATCH 19/42] Delete
 src/helm/benchmark/presentation/run_entries_medhelm_private_proxy_tuning.conf

---
 ..._entries_medhelm_private_proxy_tuning.conf | 192 ------------------
 1 file changed, 192 deletions(-)
 delete mode 100644 src/helm/benchmark/presentation/run_entries_medhelm_private_proxy_tuning.conf

diff --git a/src/helm/benchmark/presentation/run_entries_medhelm_private_proxy_tuning.conf b/src/helm/benchmark/presentation/run_entries_medhelm_private_proxy_tuning.conf
deleted file mode 100644
index 99c7e36968c..00000000000
--- a/src/helm/benchmark/presentation/run_entries_medhelm_private_proxy_tuning.conf
+++ /dev/null
@@ -1,192 +0,0 @@
-# MedHELM RunSpecs for the private benchmarks from Stanford.
-
-entries: [
-
-  ########## Clinical Decision Support ##########
-
-  ### Supporting Diagnostic Decisions ###
-
-  #Alcohol Dependence
-  {description: "clear:condition=alcohol_dependence,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=alcohol_dependence,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=alcohol_dependence,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=alcohol_dependence,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=alcohol_dependence,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=alcohol_dependence,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=alcohol_dependence,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-
-  #Attention Deficit Hyperactivity Disorder
-  {description: "clear:condition=attention_deficit_hyperactivity_disorder,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=attention_deficit_hyperactivity_disorder,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=attention_deficit_hyperactivity_disorder,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=attention_deficit_hyperactivity_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=attention_deficit_hyperactivity_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=attention_deficit_hyperactivity_disorder,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=attention_deficit_hyperactivity_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-
-  #Bipolar Disorder
-  {description: "clear:condition=bipolar_disorder,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=bipolar_disorder,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=bipolar_disorder,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=bipolar_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=bipolar_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=bipolar_disorder,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=bipolar_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-
-  #Chronic Pain
-  {description: "clear:condition=chronic_pain,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=chronic_pain,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=chronic_pain,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=chronic_pain,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=chronic_pain,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=chronic_pain,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=chronic_pain,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-
-  #Homelessness
-  {description: "clear:condition=homelessness,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=homelessness,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=homelessness,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=homelessness,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=homelessness,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=homelessness,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=homelessness,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-
-  #Liver Disease
-  {description: "clear:condition=liver_disease,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=liver_disease,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=liver_disease,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=liver_disease,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=liver_disease,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=liver_disease,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=liver_disease,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-
-  #Major Depression
-  {description: "clear:condition=major_depression,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=major_depression,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=major_depression,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=major_depression,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=major_depression,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=major_depression,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=major_depression,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-
-  #Personality Disorder
-  {description: "clear:condition=personality_disorder,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=personality_disorder,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=personality_disorder,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=personality_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=personality_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=personality_disorder,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=personality_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-
-  #Post-Traumatic Stress Disorder
-  {description: "clear:condition=post_traumatic_stress_disorder,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=post_traumatic_stress_disorder,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=post_traumatic_stress_disorder,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=post_traumatic_stress_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=post_traumatic_stress_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=post_traumatic_stress_disorder,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=post_traumatic_stress_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-
-  #Substance Use Disorder
-  {description: "clear:condition=substance_use_disorder,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=substance_use_disorder,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=substance_use_disorder,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=substance_use_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=substance_use_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=substance_use_disorder,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=substance_use_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-
-  #Suicidal Behavior
-  {description: "clear:condition=suicidal_behavior,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=suicidal_behavior,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=suicidal_behavior,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=suicidal_behavior,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=suicidal_behavior,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=suicidal_behavior,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=suicidal_behavior,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-
-  #Tobacco Dependence
-  {description: "clear:condition=tobacco_dependence,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=tobacco_dependence,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=tobacco_dependence,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=tobacco_dependence,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=tobacco_dependence,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=tobacco_dependence,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=tobacco_dependence,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-
-  #Unemployment
-  {description: "clear:condition=unemployment,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=unemployment,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=unemployment,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=unemployment,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=unemployment,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=unemployment,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-  {description: "clear:condition=unemployment,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
-
-  ### Planning Treatments ###
-
-  ### Predicting Patient Risks and Outcomes ###
-
-
-  ########## Clinical Note Generation ##########
-
-  ### Documenting Patient Visits ###
-
-  ### Recording Procedures ###
-
-  ### Documenting Diagnostic Reports ###
-
-  ### Documenting Care Plans ###
-  {description: "chw_care_plan:model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/datasets/CHW_Dataset.csv", priority: 1},
-  {description: "chw_care_plan:model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/datasets/CHW_Dataset.csv", priority: 1},
-  {description: "chw_care_plan:model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/datasets/CHW_Dataset.csv", priority: 1},
-  {description: "chw_care_plan:model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/datasets/CHW_Dataset.csv", priority: 1},
-  {description: "chw_care_plan:model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/datasets/CHW_Dataset.csv", priority: 1},
-  {description: "chw_care_plan:model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/datasets/CHW_Dataset.csv", priority: 1},
-  {description: "chw_care_plan:model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/datasets/CHW_Dataset.csv", priority: 1},
-
-  ########## Patient Communication and Education ##########
-
-  ### Providing Patient Education Resources ###
-
-  ### Delivering Personalized Care Instructions ###
-  {description: "starr_patient_instructions:model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/starr-omop-personalized-care-instr/dataset_cases_qc.csv", priority: 1},
-  {description: "starr_patient_instructions:model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/starr-omop-personalized-care-instr/dataset_cases_qc.csv", priority: 1},
-  {description: "starr_patient_instructions:model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/starr-omop-personalized-care-instr/dataset_cases_qc.csv", priority: 1},
-  {description: "starr_patient_instructions:model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/starr-omop-personalized-care-instr/dataset_cases_qc.csv", priority: 1},
-  {description: "starr_patient_instructions:model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/starr-omop-personalized-care-instr/dataset_cases_qc.csv", priority: 1},
-  {description: "starr_patient_instructions:model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/starr-omop-personalized-care-instr/dataset_cases_qc.csv", priority: 1},
-  {description: "starr_patient_instructions:model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/starr-omop-personalized-care-instr/dataset_cases_qc.csv", priority: 1},
-
-  ### Patient-Provider Messaging ###
-
-
-  ### Enhancing Patient Understanding and Accessibility in Health Communication ###
-
-  ### Facilitating Patient Engagement and Support ###
-
-  ########## Medical Research Assistance ##########
-
-  ### Conducting Literature Research ###
-
-  ### Analyzing Clinical Research Data ###
-
-  ### Recording Research Processes ###
-
-
-  ### Ensuring Clinical Research Quality ###
-
-  ### Managing Research Enrollment ###
-
-  ########## Administration and Workflow ##########
-
-  ### Scheduling Resources and Staff ###
-
-
-  ### Overseeing Financial Activities ###
-
-  ### Care Coordination and Planning ###
-
-  ### Organizing Workflow Processes ###
-
-]

From bf0181e50067c3418ee400708043f6ae7d97f8c5 Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Wed, 22 Oct 2025 12:11:33 -0700
Subject: [PATCH 20/42] Delete prod_env/tokenizer_configs.yaml

---
 prod_env/tokenizer_configs.yaml | 1287 -------------------------------
 1 file changed, 1287 deletions(-)
 delete mode 100644 prod_env/tokenizer_configs.yaml

diff --git a/prod_env/tokenizer_configs.yaml b/prod_env/tokenizer_configs.yaml
deleted file mode 100644
index e3431118217..00000000000
--- a/prod_env/tokenizer_configs.yaml
+++ /dev/null
@@ -1,1287 +0,0 @@
-# This file defines all the tokenizers that are supported by the Helm API.
-
-# If you want to add a new tokenizer, you can technically do it here but we recommend
-# you to do it in prod_env/tokenizer_configs.yaml instead.
-
-# Follow the template of this file to add a new tokenizer. You can copy paste this to get started:
-#    # This file contains the tokenizer configs for the private tokenizers
-#    tokenizer_configs: [] # Leave empty to disable private tokenizers
-
-
-tokenizer_configs:
-
-  - name: simple/tokenizer1
-    tokenizer_spec:
-      class_name: "helm.tokenizers.simple_tokenizer.SimpleTokenizer"
-    end_of_text_token: "</s>"
-    prefix_token: "<s>"
-
-  # AI21
-  - name: ai21/j2-tokenizer
-    tokenizer_spec:
-      class_name: "helm.tokenizers.ai21_tokenizer.AI21LocalTokenizer"
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: "<|startoftext|>"
-  - name: ai21/jamba-tokenizer
-    tokenizer_spec:
-      class_name: "helm.tokenizers.ai21_tokenizer.AI21LocalTokenizer"
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: "<|startoftext|>"
-  - name: ai21/jamba-instruct-tokenizer
-    tokenizer_spec:
-      class_name: "helm.tokenizers.ai21_tokenizer.AI21LocalTokenizer"
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: "<|startoftext|>"
-  - name: ai21/jamba-1.5-mini-tokenizer
-    tokenizer_spec:
-      class_name: "helm.tokenizers.ai21_tokenizer.AI21LocalTokenizer"
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: "<|startoftext|>"
-  - name: ai21/jamba-1.5-large-tokenizer
-    tokenizer_spec:
-      class_name: "helm.tokenizers.ai21_tokenizer.AI21LocalTokenizer"
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: "<|startoftext|>"
-
-  # AlephAlpha
-  - name: AlephAlpha/luminous-base
-    tokenizer_spec:
-      class_name: "helm.tokenizers.aleph_alpha_tokenizer.AlephAlphaTokenizer"
-    end_of_text_token: ""
-    prefix_token: ""
-  - name: AlephAlpha/luminous-extended
-    tokenizer_spec:
-      class_name: "helm.tokenizers.aleph_alpha_tokenizer.AlephAlphaTokenizer"
-    end_of_text_token: ""
-    prefix_token: ""
-  - name: AlephAlpha/luminous-supreme
-    tokenizer_spec:
-      class_name: "helm.tokenizers.aleph_alpha_tokenizer.AlephAlphaTokenizer"
-    end_of_text_token: ""
-    prefix_token: ""
-  - name: AlephAlpha/luminous-world
-    tokenizer_spec:
-      class_name: "helm.tokenizers.aleph_alpha_tokenizer.AlephAlphaTokenizer"
-    end_of_text_token: ""
-    prefix_token: ""
-
-  # Alibaba DAMO Academy
-
-  - name: damo/seallm-7b-v2
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: SeaLLMs/SeaLLM-7B-v2
-    end_of_text_token: "</s>"
-    prefix_token: "<s>"
-
-  - name: damo/seallm-7b-v2.5
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: SeaLLMs/SeaLLM-7B-v2.5
-    end_of_text_token: "<eos>"
-    prefix_token: "<bos>"
-
-  # Anthropic
-  - name: anthropic/claude
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: Xenova/claude-tokenizer
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: "<|endoftext|>"
-
-  # Bigcode
-  - name: bigcode/santacoder
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: "<|endoftext|>"
-  - name: bigcode/starcoder
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: "<|endoftext|>"
-
-  # Bigscience
-  - name: bigscience/bloom
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "</s>"
-    prefix_token: "<s>"
-  - name: bigscience/T0pp
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "</s>"
-    prefix_token: ""
-
-  # Cohere
-  - name: cohere/command
-    tokenizer_spec:
-      class_name: "helm.tokenizers.cohere_tokenizer.CohereLocalTokenizer"
-    end_of_text_token: "<EOS_TOKEN>"
-    prefix_token: "<BOS_TOKEN>"
-
-  - name: cohere/command-light
-    tokenizer_spec:
-      class_name: "helm.tokenizers.cohere_tokenizer.CohereLocalTokenizer"
-    end_of_text_token: "<EOS_TOKEN>"
-    prefix_token: "<BOS_TOKEN>"
-
-  - name: cohere/command-r
-    tokenizer_spec:
-      class_name: "helm.tokenizers.cohere_tokenizer.CohereLocalTokenizer"
-    end_of_text_token: "<EOS_TOKEN>"
-    prefix_token: "<BOS_TOKEN>"
-
-  - name: cohere/command-r-plus
-    tokenizer_spec:
-      class_name: "helm.tokenizers.cohere_tokenizer.CohereLocalTokenizer"
-    end_of_text_token: "<EOS_TOKEN>"
-    prefix_token: "<BOS_TOKEN>"
-
-  - name: cohere/c4ai-command-r-v01
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: CohereForAI/c4ai-command-r-v01
-    end_of_text_token: "<EOS_TOKEN>"
-    prefix_token: "<BOS_TOKEN>"
-
-  - name: cohere/c4ai-command-r-plus
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: CohereForAI/c4ai-command-r-plus
-    end_of_text_token: "<EOS_TOKEN>"
-    prefix_token: "<BOS_TOKEN>"
-
-  # Databricks
-  - name: databricks/dbrx-instruct
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: "<|endoftext|>"
-
-  # DeepSeek
-  - name: deepseek-ai/deepseek-llm-67b-chat
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "<｜end▁of▁sentence｜>"
-    prefix_token: "<｜begin▁of▁sentence｜>"
-
-  - name: deepseek-ai/deepseek-v3
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "<｜end▁of▁sentence｜>"
-    prefix_token: "<｜begin▁of▁sentence｜>"
-
-  - name: deepseek-ai/deepseek-r1
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "<｜end▁of▁sentence｜>"
-    prefix_token: "<｜begin▁of▁sentence｜>"
-
-  # EleutherAI
-  - name: EleutherAI/gpt-j-6B
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: "<|endoftext|>"
-
-  - name: EleutherAI/gpt-neox-20b
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: "<|endoftext|>"
-
-  # Facebook
-  - name: facebook/opt-66b
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "</s>"
-    prefix_token: "</s>"
-
-  # Google
-  - name: google/t5-11b
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: google-t5/t5-11b
-    end_of_text_token: "</s>"
-    prefix_token: ""
-  - name: google/flan-t5-xxl
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "</s>"
-    prefix_token: ""
-  - name: google/ul2
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "</s>"
-    prefix_token: ""
-  - name: google/mt5-base
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "</s>"
-    prefix_token: ""
-  - name: google/text-bison@001
-    tokenizer_spec:
-      class_name: "helm.tokenizers.vertexai_tokenizer.VertexAITokenizer"
-    end_of_text_token: "</s>"
-    prefix_token: ""
-  - name: google/text-bison@002
-    tokenizer_spec:
-      class_name: "helm.tokenizers.vertexai_tokenizer.VertexAITokenizer"
-    end_of_text_token: "</s>"
-    prefix_token: ""
-  - name: google/text-unicorn@001
-    tokenizer_spec:
-      class_name: "helm.tokenizers.vertexai_tokenizer.VertexAITokenizer"
-    end_of_text_token: "</s>"
-    prefix_token: ""
-  - name: google/gemma-2b
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "<eos>"
-    prefix_token: "<bos>"
-  - name: google/gemma-2-9b
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "<eos>"
-    prefix_token: "<bos>"
-
-  # Grok
-  - name: xai/grok-3-beta
-    tokenizer_spec:
-      class_name: "helm.tokenizers.grok_tokenizer.GrokAPITokenizer"
-    end_of_text_token: ""
-    prefix_token: ""
-
-  - name: xai/grok-3-mini-beta
-    tokenizer_spec:
-      class_name: "helm.tokenizers.grok_tokenizer.GrokAPITokenizer"
-    end_of_text_token: ""
-    prefix_token: ""
-
-  - name: xai/grok-4-0709
-    tokenizer_spec:
-      class_name: "helm.tokenizers.grok_tokenizer.GrokAPITokenizer"
-    end_of_text_token: ""
-    prefix_token: ""
-
-  # Hf-internal-testing
-
-  # Tokenizer name hf-internal-testing/llama-tokenizer is taken from:
-  # https://huggingface.co/docs/transformers/main/en/model_doc/llama#transformers.LlamaTokenizerFast.example
-  - name: hf-internal-testing/llama-tokenizer
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "</s>"
-    prefix_token: "<s>"
-
-  # HuggingFaceM4
-  - name: HuggingFaceM4/idefics-9b
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "</s>"
-    prefix_token: "<s>"
-  - name: HuggingFaceM4/idefics-9b-instruct
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "</s>"
-    prefix_token: "<s>"
-  - name: HuggingFaceM4/idefics-80b
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "</s>"
-    prefix_token: "<s>"
-  - name: HuggingFaceM4/idefics-80b-instruct
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "</s>"
-    prefix_token: "<s>"
-    
-  - name: anas-awadalla/mpt-7b
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: ""
-
-  # Huggingface
-  - name: huggingface/gpt2
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: openai-community/gpt2
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: "<|endoftext|>"
-
-  - name: huggingface/smollm2-135m
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: HuggingFaceTB/SmolLM2-135M
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: "<|endoftext|>"
-
-  - name: huggingface/smollm2-135m-instruct
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: HuggingFaceTB/SmolLM2-135M-Instruct
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: "<|im_end|>"
-
-  # Lighting AI
-  - name: lightningai/lit-gpt
-    tokenizer_spec:
-      class_name: "helm.tokenizers.lit_gpt_tokenizer.LitGPTTokenizer"
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: "<|endoftext|>"
-
-  # Meta-llama
-
-  # To use the Llama-2 tokenizer:
-    #
-    # 1. Accept the license agreement: https://ai.meta.com/resources/models-and-libraries/llama-downloads/
-    # 2. Request to access the Hugging Face repository: https://huggingface.co/meta-llama/Llama-2-7b
-    # 3. Run `huggingface-cli login`
-    #
-    # If you encounter the following error, complete the above steps and try again:
-    #
-    #     meta-llama/Llama-2-70b-hf is not a local folder and is not a valid model identifier listed on
-    #     'https://huggingface.co/models'
-  - name: meta-llama/Llama-2-7b-hf
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "</s>"
-    prefix_token: "<s>"
-
-  - name: meta/llama-3-8b
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: meta-llama/Meta-Llama-3-8B
-    prefix_token: "<|begin_of_text|>"
-    end_of_text_token: "<|end_of_text|>"
-
-  - name: meta/llama-3-8b-instruct
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: meta-llama/Meta-Llama-3.1-8B-Instruct
-    prefix_token: "<|begin_of_text|>"
-    end_of_text_token: "<|eot_id|>"
-
-  - name: meta/llama-3.1-8b
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: meta-llama/Meta-Llama-3.1-8B-Instruct
-    prefix_token: "<|begin_of_text|>"
-    end_of_text_token: "<|end_of_text|>"
-
-  - name: meta/llama-3.1-8b-instruct
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: meta-llama/Meta-Llama-3.1-8B-Instruct
-    prefix_token: "<|begin_of_text|>"
-    end_of_text_token: "<|eot_id|>"
-
-  - name: meta/llama-3.2-3b-instruct
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: meta-llama/Llama-3.2-3B-Instruct
-    prefix_token: "<|begin_of_text|>"
-    end_of_text_token: "<|eot_id|>"
-  
-  - name: meta/llama-3.2-1b-instruct
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: meta-llama/Llama-3.2-1B-Instruct
-    prefix_token: "<|begin_of_text|>"
-    end_of_text_token: "<|eot_id|>"
-
-  - name: meta/llama-3.1-8b-instruct
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: meta-llama/Llama-3.1-8B-Instruct
-    prefix_token: "<|begin_of_text|>"
-    end_of_text_token: "<|eot_id|>"
-
-  - name: meta/llama-3.2-11b-vision-instruct
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: meta-llama/Llama-3.2-11B-Vision-Instruct
-    prefix_token: "<|begin_of_text|>"
-    end_of_text_token: "<|eot_id|>"
-
-  - name: meta/llama-3.3-70b-instruct
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: meta-llama/Llama-3.3-70B-Instruct
-    prefix_token: "<|begin_of_text|>"
-    end_of_text_token: "<|eot_id|>"
-
-  - name: meta/llama-4-scout-17b-16e-instruct
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: meta-llama/Llama-4-Scout-17B-16E-Instruct
-    prefix_token: "<|begin_of_text|>"
-    end_of_text_token: "<|end_of_text|>"
-
-  # 01-ai
-  - name: 01-ai/Yi-6B
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "</s>"
-    prefix_token: "<s>"
-
-  # AI Singapore
-  - name: aisingapore/sea-lion-7b
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        trust_remote_code: true
-        use_fast: false
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: ""
-
-
-
-  # Allen Institute for AI
-  # The allenai/olmo-7b requires Python 3.9 or newer.
-  # To use the allenai/olmo-7b tokenizer, run `pip install crfm-helm[allenai]` first.
-  - name: allenai/olmo-7b
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        trust_remote_code: true
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: ""
-
-  - name: allenai/OLMo-1.7-7B-hf
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: ""
-
-  - name: allenai/olmo-2-1124-7b-instruct
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: "<|endoftext|>"
-
-  - name: allenai/olmo-2-0325-32b-instruct
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: "<|endoftext|>"
-
-  - name: allenai/olmoe-1b-7b-0125-instruct
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "|||IP_ADDRESS|||"
-    prefix_token: "|||IP_ADDRESS|||"
-
-  # Marin Community
-  - name: marin-community/marin-8b-instruct
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "<|eot_id|>"
-    prefix_token: "<|begin_of_text|>"
-
-  # Microsoft
-  - name: microsoft/phi-2
-    tokenizer_spec:  
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"  
-    end_of_text_token: "<|endoftext|>"  
-    prefix_token: "<|endoftext|>"
-
-  - name: microsoft/phi-3-small-8k-instruct
-    tokenizer_spec:  
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        trust_remote_code: true
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: "<|endoftext|>"
-
-  - name: microsoft/phi-3-medium-4k-instruct
-    tokenizer_spec:  
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: "<s>"
-  
-  - name: microsoft/phi-3.5-mini-instruct
-    tokenizer_spec:  
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: "<s>"
-
-  - name: microsoft/phi-3.5-mini-instruct
-    tokenizer_spec:  
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: "<s>"
-
-  # Mistralai
-  - name: mistralai/Mistral-7B-v0.1
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "</s>"
-    prefix_token: "<s>"
-
-  - name: mistralai/Mistral-7B-Instruct-v0.1
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "</s>"
-    prefix_token: "<s>"
-
-  - name: mistralai/Mistral-7B-Instruct-v0.2
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "</s>"
-    prefix_token: "<s>"
-
-  - name: mistralai/Mistral-7B-Instruct-v0.3
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "</s>"
-    prefix_token: "<s>"
-
-  - name: mistralai/Mistral-Nemo-Base-2407
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "</s>"
-    prefix_token: "<s>"
-
-  - name: mistralai/Mistral-Large-Instruct-2407
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "</s>"
-    prefix_token: "<s>"
-
-  - name: mistralai/Mistral-Large-Instruct-2411
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "</s>"
-    prefix_token: "<s>"
-
-  - name: mistralai/Ministral-8B-Instruct-2410
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "</s>"
-    prefix_token: "<s>"
-
-  - name: mistralai/Mistral-Small-24B-Instruct-2501
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "</s>"
-    prefix_token: "<s>"
-
-  # Moonshot AI
-  - name: moonshotai/kimi-k2-instruct
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: moonshotai/Kimi-K2-Instruct
-        trust_remote_code: true
-        revision: 4f239503ad9d1a042f0a4bacac457931ab972cfc
-    end_of_text_token: "[EOS]"
-    prefix_token: "[BOS]"
-
-  # Nectec
-  - name: nectec/OpenThaiLLM-Prebuilt-7B
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "<|im_end|>"
-    prefix_token: ""
-  
-  - name: nectec/Pathumma-llm-text-1.0.0
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "<|im_end|>"
-    prefix_token: "<|im_start|>"
-  
-  # Neurips
-  - name: neurips/local
-    tokenizer_spec:
-      class_name: "helm.tokenizers.http_model_tokenizer.HTTPModelTokenizer"
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: "<|endoftext|>"
-
-  # NVIDIA
-  - name: nvidia/nemotron-4-340b-instruct
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: Xenova/Nemotron-4-340B-Instruct-Tokenizer
-        revision: b7aa0de92cda9f9e722d58d6ca90f46ae17d4701
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: "<|endoftext|>"
-
-  - name: nvidia/llama-3.1-nemotron-70b-instruct
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: nvidia/Llama-3.1-Nemotron-70B-Instruct-HF
-    end_of_text_token: "<|eot_id|>"
-    prefix_token: "<|begin_of_text|>"
-
-  # OpenAI
-  - name: openai/cl100k_base
-    tokenizer_spec:
-      class_name: "helm.tokenizers.tiktoken_tokenizer.TiktokenTokenizer"
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: "<|endoftext|>"
-
-  - name: openai/o200k_base
-    tokenizer_spec:
-      class_name: "helm.tokenizers.tiktoken_tokenizer.TiktokenTokenizer"
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: "<|endoftext|>"
-
-  - name: openai/o200k_harmony
-    tokenizer_spec:
-      class_name: "helm.tokenizers.tiktoken_tokenizer.TiktokenTokenizer"
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: "<|startoftext|>"
-
-  - name: openai/clip-vit-large-patch14
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: ""
-    prefix_token: ""
-
-  # OpenThaiGPT
-  - name: openthaigpt/openthaigpt-1.0.0-7b-chat
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "</s>"
-    prefix_token: "<s>"
-
-  # Qwen
-  - name: qwen/qwen-7b
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: Qwen/Qwen-7B
-        trust_remote_code: true
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: ""
-
-  - name: qwen/qwen1.5-7b
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: Qwen/Qwen1.5-7B
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: ""
-
-  - name: qwen/qwen2-72b-instruct
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: Qwen/Qwen2-72B-Instruct
-    end_of_text_token: "<|im_end|>"
-    prefix_token: "<|im_start|>"
-
-  - name: qwen/qwen2.5-7b-instruct
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: Qwen/Qwen2.5-7B-Instruct
-    end_of_text_token: "<|im_end|>"
-    prefix_token: "<|im_start|>"
-
-  - name: qwen/qwen3-235b-a22b
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: Qwen/Qwen3-235B-A22B
-    end_of_text_token: "<|im_end|>"
-    prefix_token: "<|im_start|>"
-
-  - name: qwen/qwen3-235b-a22b-instruct-2507-fp8
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "<|im_end|>"
-    prefix_token: ""
-
-  - name: qwen/qwq-32b-preview
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "<|im_end|>"
-    prefix_token: ""
-
-  - name: qwen/qwen-vl
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: Qwen/Qwen-VL
-        trust_remote_code: true
-    # Source: https://github.com/QwenLM/Qwen-VL
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: ""
-
-  - name: qwen/qwen-vl-chat
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: Qwen/Qwen-VL-Chat
-        trust_remote_code: true
-    # Source: https://github.com/QwenLM/Qwen-VL
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: ""
-
-  - name: qwen/qwen-audio-chat
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: Qwen/Qwen-Audio-Chat
-        trust_remote_code: true
-    # Source: https://github.com/QwenLM/Qwen-Audio
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: ""
-
-  - name: qwen/qwen2-audio-instruct
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: Qwen/Qwen2-Audio-7B-Instruct
-        trust_remote_code: false
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: ""
-
-  - name: qwen/qwen2.5-omni-7b
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: Qwen/Qwen2.5-Omni-7B
-        trust_remote_code: false
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: ""
-
-  # SambaLingo
-  - name: sambanova/sambalingo-thai-base
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: sambanovasystems/SambaLingo-Thai-Base
-    end_of_text_token: "</s>"
-    prefix_token: "<s>"
-
-  # Snowflake
-  - name: snowflake/snowflake-arctic-instruct
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: Snowflake/snowflake-arctic-instruct
-        trust_remote_code: true
-    end_of_text_token: "<|im_end|>"
-    prefix_token: "<|im_start|>"
-
-  # Tiiuae
-  - name: tiiuae/falcon-7b
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: ""
-
-  # TsinghuaKEG
-  - name: TsinghuaKEG/ice
-    tokenizer_spec:
-      class_name: "helm.tokenizers.ice_tokenizer.ICETokenizer"
-    end_of_text_token: "</s>"
-    prefix_token: ""
-
-  # Typhoon
-  - name: scb10x/typhoon-7b
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "</s>"
-    prefix_token: "<s>"
-
-  # Upstage
-  - name: upstage/solar-pro-preview-instruct
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        trust_remote_code: true
-    end_of_text_token: "<|im_end|>"
-    prefix_token: "<|startoftext|>"
-
-  # Writer
-  - name: writer/gpt2
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: openai-community/gpt2
-    end_of_text_token: ""
-    prefix_token: ""
-
-  # Yandex
-  - name: Yandex/yalm
-    tokenizer_spec:
-      class_name: "helm.tokenizers.yalm_tokenizer.YaLMTokenizer"
-    end_of_text_token: "</s>"
-    prefix_token: "</s>"
-
-  # Diva Llama
-  - name: stanford/diva-llama
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: WillHeld/DiVA-llama-3-v0-8b
-        trust_remote_code: true
-    prefix_token: "<|begin_of_text|>"
-    end_of_text_token: "<|eot_id|>"
-
-  # LLaMA-Omni
-  - name: ictnlp/llama-3.1-8b-omni
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: ICTNLP/Llama-3.1-8B-Omni
-        trust_remote_code: false
-    end_of_text_token: "<|eot_id|>"
-    prefix_token: "<|begin_of_text|>"
-
-  # IBM - Granite 3.0
-  - name: ibm-granite/granite-3.0-2b-base
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: ibm-granite/granite-3.0-2b-base
-    end_of_text_token: ""
-    prefix_token: ""
-
-  - name: ibm-granite/granite-3.0-2b-instruct
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: ibm-granite/granite-3.0-2b-instruct
-    end_of_text_token: ""
-    prefix_token: ""
-  
-  - name: ibm-granite/granite-3.0-8b-instruct
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: ibm-granite/granite-3.0-8b-instruct
-    end_of_text_token: ""
-    prefix_token: ""
-
-  - name: ibm-granite/granite-3.0-8b-base
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: ibm-granite/granite-3.0-8b-base
-    end_of_text_token: ""
-    prefix_token: ""
-
-  - name: ibm-granite/granite-3.0-3b-a800m-instruct
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: ibm-granite/granite-3.0-3b-a800m-instruct
-    end_of_text_token: ""
-    prefix_token: ""
-
-  - name: ibm-granite/granite-3.0-3b-a800m-base
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: ibm-granite/granite-3.0-3b-a800m-base
-    end_of_text_token: ""
-    prefix_token: ""
-
-  - name: ibm-granite/granite-3.0-1b-a400m-instruct
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: ibm-granite/granite-3.0-1b-a400m-instruct
-    end_of_text_token: ""
-    prefix_token: ""
-
-  - name: ibm-granite/granite-3.0-1b-a400m-base
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: ibm-granite/granite-3.0-1b-a400m-base
-    end_of_text_token: ""
-    prefix_token: ""
-
-# Maritaca AI
-  - name: maritaca-ai/sabia-7b
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: maritaca-ai/sabia-7b
-    end_of_text_token: "</s>"
-    prefix_token: "<s>"
-
-  - name: maritaca-ai/sabia-2-tokenizer-medium
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: maritaca-ai/sabia-2-tokenizer-medium
-    end_of_text_token: "</s>"
-    prefix_token: "<s>"
-
-# Granite-3.1-8b-base
-  - name: ibm-granite/granite-3.1-8b-base
-    tokenizer_spec:
-        class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-        args:
-            pretrained_model_name_or_path: ibm-granite/granite-3.1-8b-base
-    prefix_token: ""
-    end_of_text_token: "<|endoftext|>"
-
-# Granite-3.1-8b-instruct
-  - name: ibm-granite/granite-3.1-8b-instruct
-    tokenizer_spec:
-        class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-        args:
-            pretrained_model_name_or_path: ibm-granite/granite-3.1-8b-instruct
-    prefix_token: ""
-    end_of_text_token: "<|endoftext|>"
-
-# Granite-3.1-2b-instruct
-  - name: ibm-granite/granite-3.1-2b-instruct
-    tokenizer_spec:
-        class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-        args:
-            pretrained_model_name_or_path: ibm-granite/granite-3.1-2b-instruct
-    prefix_token: ""
-    end_of_text_token: ""
-
-# Granite-3.1-2b-base
-  - name: ibm-granite/granite-3.1-2b-base
-    tokenizer_spec:
-        class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-        args:
-            pretrained_model_name_or_path: ibm-granite/granite-3.1-2b-base
-    prefix_token: ""
-    end_of_text_token: ""
-
-# Granite-3.1-3b-a800m-instruct
-  - name: ibm-granite/granite-3.1-3b-a800m-instruct
-    tokenizer_spec:
-        class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-        args:
-            pretrained_model_name_or_path: ibm-granite/granite-3.1-3b-a800m-instruct
-    prefix_token: ""
-    end_of_text_token: ""
-
-# Granite-3.1-3b-a800m-base
-  - name: ibm-granite/granite-3.1-3b-a800m-base
-    tokenizer_spec:
-        class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-        args:
-            pretrained_model_name_or_path: ibm-granite/granite-3.1-3b-a800m-base
-    prefix_token: ""
-    end_of_text_token: ""
-
-# Granite-3.1-1b-a400m-instruct
-  - name: ibm-granite/granite-3.1-1b-a400m-instruct
-    tokenizer_spec:
-        class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-        args:
-            pretrained_model_name_or_path: ibm-granite/granite-3.1-1b-a400m-instruct
-    prefix_token: ""
-    end_of_text_token: ""
-
-# Granite-3.1-1b-a400m-base
-  - name: ibm-granite/granite-3.1-1b-a400m-base
-    tokenizer_spec:
-        class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-        args:
-            pretrained_model_name_or_path: ibm-granite/granite-3.1-1b-a400m-base
-    prefix_token: ""
-    end_of_text_token: ""
-
-  - name: ibm-granite/granite-20b-code-instruct-8k
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-          pretrained_model_name_or_path: ibm-granite/granite-20b-code-instruct-8k
-    prefix_token: ""
-    end_of_text_token: ""
-
-  - name:  ibm-granite/granite-3b-code-instruct-128k
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path:  ibm-granite/granite-3b-code-instruct-128k
-    prefix_token: ""
-    end_of_text_token: ""
-
-  - name: ibm-granite/granite-34b-code-instruct-8k
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-           pretrained_model_name_or_path: ibm-granite/granite-34b-code-instruct-8k
-    prefix_token: ""
-    end_of_text_token: ""
-
-  - name: ibm-granite/granite-8b-code-instruct-128k
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-          pretrained_model_name_or_path: ibm-granite/granite-8b-code-instruct-128k
-    prefix_token: ""
-    end_of_text_token: ""
-
-
-  - name: ibm-granite/granite-guardian-3.1-2b
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-          pretrained_model_name_or_path: ibm-granite/granite-guardian-3.1-2b
-    prefix_token: ""
-    end_of_text_token: ""
-
-  - name: ibm-granite/granite-guardian-3.1-8b
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-          pretrained_model_name_or_path: ibm-granite/granite-guardian-3.1-8b
-    prefix_token: ""
-    end_of_text_token: ""
-
-  # IBM Granite 3.3
-  - name: ibm/granite-3.3-8b-instruct
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: ibm-granite/granite-3.3-8b-instruct
-    end_of_text_token: "<|end_of_text|>"
-    prefix_token: "<|end_of_text|>"
-
-  # Z.ai GLM-4.5-AIR-FP8
-  - name: zai-org/glm-4.5-air-fp8
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-    end_of_text_token: "<|endoftext|>"
-    prefix_token: ""
-
-  
-
-  # DeepSeek-R1-Distill-Llama-3.1-8b
-  - name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
-    tokenizer_spec:
-        class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-        args:
-            pretrained_model_name_or_path: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
-    end_of_text_token: "<｜end▁of▁sentence｜>"
-    prefix_token: "<｜begin▁of▁sentence｜>"
-
-# deepseek-ai/deepseek-coder-6.7b-instruct
-  - name: deepseek-ai/deepseek-coder-6.7b-instruct
-    tokenizer_spec:
-        class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-        args:
-            pretrained_model_name_or_path: deepseek-ai/deepseek-coder-6.7b-instruct
-    end_of_text_token: "<｜end▁of▁sentence｜>"
-    prefix_token: "<｜begin▁of▁sentence｜>"
-
-
-# vilm/vinallama-2.7b-chat
-  - name: vilm/vinallama-2.7b-chat
-    tokenizer_spec:
-        class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-        args:
-            pretrained_model_name_or_path: vilm/vinallama-2.7b-chat
-    end_of_text_token: "<im_end>"
-    prefix_token: "<im_start>"
-
-# vilm/vinallama-7b-chat
-  - name: vilm/vinallama-7b-chat
-    tokenizer_spec:
-        class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-        args:
-            pretrained_model_name_or_path: vilm/vinallama-7b-chat
-    end_of_text_token: "<im_end>"
-    prefix_token: "<im_start>"
-
-# vilm/vietcuna-7b-v3
-  - name: vilm/vietcuna-7b-v3
-    tokenizer_spec:
-        class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-        args:
-            pretrained_model_name_or_path: vilm/vietcuna-7b-v3
-    end_of_text_token: "</s>"
-    prefix_token: "<s>"
-
-# Viet-Mistral/Vistral-7B-Chat
-  - name: Viet-Mistral/Vistral-7B-Chat
-    tokenizer_spec:
-        class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-        args:
-            pretrained_model_name_or_path: Viet-Mistral/Vistral-7B-Chat
-    end_of_text_token: "</s>"
-    prefix_token: "<s>"
-
-# vinai/PhoGPT-7B5-Instruct
-  - name: vinai/PhoGPT-7B5-Instruct
-    tokenizer_spec:
-        class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-        args:
-            pretrained_model_name_or_path: vinai/PhoGPT-7B5-Instruct
-    end_of_text_token: "</s>"
-    prefix_token: "<s>"
-
-# vinai/PhoGPT-4B-Chat
-  - name: vinai/PhoGPT-4B-Chat
-    tokenizer_spec:
-        class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-        args:
-            pretrained_model_name_or_path: vinai/PhoGPT-4B-Chat
-    end_of_text_token: "</s>"
-    prefix_token: "<s>"
-
-# Gemma-3-Gaia-PT-BR-4b-it
-  - name: CEIA-UFG/Gemma-3-Gaia-PT-BR-4b-it
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: CEIA-UFG/Gemma-3-Gaia-PT-BR-4b-it
-    end_of_text_token: "<eos>"
-    prefix_token: "<bos>"
-
-# Bode 13B Alpaca PT-BR
-  - name: recogna-nlp/bode-13b-alpaca-pt-br-no-peft
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: recogna-nlp/bode-13b-alpaca-pt-br-no-peft
-    end_of_text_token: "</s>"
-    prefix_token: "<s>"
-
-# Cabrita 7B PT-BR tokenizer
-  - name: 22h/cabrita_7b_pt_850000
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: 22h/cabrita_7b_pt_850000
-    end_of_text_token: "</s>"
-    prefix_token: "<s>"
-
-# Gervásio 7B PT‑BR/PT‑PT tokenizer
-  - name: PORTULAN/gervasio-7b-portuguese-ptbr-decoder
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: PORTULAN/gervasio-7b-portuguese-ptbr-decoder
-    end_of_text_token: "</s>"
-    prefix_token: "<s>"
-
-# Tucano 2b4 PT-BR tokenizer
-  - name: TucanoBR/Tucano-2b4
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: TucanoBR/Tucano-2b4
-    end_of_text_token: "</s>"
-    prefix_token: "<s>"
-
-# TeenyTinyLlama 460M PT-BR tokenizer
-  - name: nicholasKluge/TeenyTinyLlama-460m
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: nicholasKluge/TeenyTinyLlama-460m
-    end_of_text_token: "</s>"
-    prefix_token: "<s>"
-
-  - name: proxy_tuning/llama7b
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: /share/pi/ema2016/models/meta-llama/Llama-2-7b-hf
-    end_of_text_token: "<s>"
-    prefix_token: "</s>"
-
-  - name: proxy_tuning/qwen3-30b
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: /share/pi/ema2016/models/Qwen3-30B-A3B-Instruct-2507
-    end_of_text_token: "<s>"
-    prefix_token: "</s>"
-
-  - name: proxy_tuning/qwen3-80b
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: /share/pi/ema2016/models/Qwen3-Next-80B-A3B-Instruct
-    end_of_text_token: "<s>"
-    prefix_token: "</s>"
-
-  - name: proxy_tuning/gemma-3-27b-it
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: /share/pi/ema2016/models/google/gemma-3-27b-it
-    end_of_text_token: "<eos>"
-    prefix_token: "<bos>"
-
-  - name: proxy_tuning/medgemma-27b-it
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: /share/pi/ema2016/models/google/medgemma-27b-it
-    end_of_text_token: "<eos>"
-    prefix_token: "<bos>"
-
-  - name: proxy_tuning/medgemma-4b-it
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: /share/pi/ema2016/models/google/medgemma-4b-it
-    end_of_text_token: "<eos>"
-    prefix_token: "<bos>"
-
-  - name: proxy_tuning/medgemma-4b-pt
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: /share/pi/ema2016/models/google/medgemma-4b-pt
-    end_of_text_token: "<eos>"
-    prefix_token: "<bos>"
-
-  - name: proxy_tuning/gemma-3-4b-pt
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: /share/pi/ema2016/models/google/gemma-3-4b-pt
-    end_of_text_token: "<eos>"
-    prefix_token: "<bos>"
-
-
-
-  - name: proxy_tuning/llama-7b-chat
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: /share/pi/ema2016/models/meta-llama/Llama-2-7b-chat-hf
-    end_of_text_token: "</s>"
-    prefix_token: "<s>"
-
-  - name: proxy_tuning/qwen3-30b
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: /share/pi/ema2016/models/Qwen3-30B-A3B-Instruct-2507
-    end_of_text_token: "<|im_end|>"
-    prefix_token: "<|im_start|>"
\ No newline at end of file

From 3809df5a142a8baffb8544149cf61bc51c2b3429 Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Wed, 22 Oct 2025 12:23:40 -0700
Subject: [PATCH 21/42] Add files via upload

---
 ...ries_medhelm_private_proxy_tuning (1).conf | 192 ++++++++++++++++++
 1 file changed, 192 insertions(+)
 create mode 100644 src/helm/benchmark/presentation/run_entries_medhelm_private_proxy_tuning (1).conf

diff --git a/src/helm/benchmark/presentation/run_entries_medhelm_private_proxy_tuning (1).conf b/src/helm/benchmark/presentation/run_entries_medhelm_private_proxy_tuning (1).conf
new file mode 100644
index 00000000000..99c7e36968c
--- /dev/null
+++ b/src/helm/benchmark/presentation/run_entries_medhelm_private_proxy_tuning (1).conf	
@@ -0,0 +1,192 @@
+# MedHELM RunSpecs for the private benchmarks from Stanford.
+
+entries: [
+
+  ########## Clinical Decision Support ##########
+
+  ### Supporting Diagnostic Decisions ###
+
+  #Alcohol Dependence
+  {description: "clear:condition=alcohol_dependence,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=alcohol_dependence,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=alcohol_dependence,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=alcohol_dependence,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=alcohol_dependence,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=alcohol_dependence,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=alcohol_dependence,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+
+  #Attention Deficit Hyperactivity Disorder
+  {description: "clear:condition=attention_deficit_hyperactivity_disorder,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=attention_deficit_hyperactivity_disorder,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=attention_deficit_hyperactivity_disorder,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=attention_deficit_hyperactivity_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=attention_deficit_hyperactivity_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=attention_deficit_hyperactivity_disorder,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=attention_deficit_hyperactivity_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+
+  #Bipolar Disorder
+  {description: "clear:condition=bipolar_disorder,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=bipolar_disorder,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=bipolar_disorder,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=bipolar_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=bipolar_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=bipolar_disorder,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=bipolar_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+
+  #Chronic Pain
+  {description: "clear:condition=chronic_pain,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=chronic_pain,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=chronic_pain,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=chronic_pain,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=chronic_pain,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=chronic_pain,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=chronic_pain,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+
+  #Homelessness
+  {description: "clear:condition=homelessness,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=homelessness,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=homelessness,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=homelessness,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=homelessness,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=homelessness,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=homelessness,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+
+  #Liver Disease
+  {description: "clear:condition=liver_disease,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=liver_disease,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=liver_disease,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=liver_disease,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=liver_disease,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=liver_disease,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=liver_disease,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+
+  #Major Depression
+  {description: "clear:condition=major_depression,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=major_depression,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=major_depression,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=major_depression,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=major_depression,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=major_depression,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=major_depression,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+
+  #Personality Disorder
+  {description: "clear:condition=personality_disorder,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=personality_disorder,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=personality_disorder,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=personality_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=personality_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=personality_disorder,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=personality_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+
+  #Post-Traumatic Stress Disorder
+  {description: "clear:condition=post_traumatic_stress_disorder,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=post_traumatic_stress_disorder,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=post_traumatic_stress_disorder,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=post_traumatic_stress_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=post_traumatic_stress_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=post_traumatic_stress_disorder,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=post_traumatic_stress_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+
+  #Substance Use Disorder
+  {description: "clear:condition=substance_use_disorder,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=substance_use_disorder,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=substance_use_disorder,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=substance_use_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=substance_use_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=substance_use_disorder,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=substance_use_disorder,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+
+  #Suicidal Behavior
+  {description: "clear:condition=suicidal_behavior,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=suicidal_behavior,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=suicidal_behavior,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=suicidal_behavior,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=suicidal_behavior,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=suicidal_behavior,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=suicidal_behavior,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+
+  #Tobacco Dependence
+  {description: "clear:condition=tobacco_dependence,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=tobacco_dependence,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=tobacco_dependence,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=tobacco_dependence,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=tobacco_dependence,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=tobacco_dependence,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=tobacco_dependence,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+
+  #Unemployment
+  {description: "clear:condition=unemployment,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=unemployment,max_eval_instances=100,model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=unemployment,max_eval_instances=100,model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=unemployment,max_eval_instances=100,model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=unemployment,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=unemployment,max_eval_instances=100,model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+  {description: "clear:condition=unemployment,max_eval_instances=100,model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/CLEAR/human_labeled/", priority: 1},
+
+  ### Planning Treatments ###
+
+  ### Predicting Patient Risks and Outcomes ###
+
+
+  ########## Clinical Note Generation ##########
+
+  ### Documenting Patient Visits ###
+
+  ### Recording Procedures ###
+
+  ### Documenting Diagnostic Reports ###
+
+  ### Documenting Care Plans ###
+  {description: "chw_care_plan:model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/datasets/CHW_Dataset.csv", priority: 1},
+  {description: "chw_care_plan:model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/datasets/CHW_Dataset.csv", priority: 1},
+  {description: "chw_care_plan:model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/datasets/CHW_Dataset.csv", priority: 1},
+  {description: "chw_care_plan:model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/datasets/CHW_Dataset.csv", priority: 1},
+  {description: "chw_care_plan:model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/datasets/CHW_Dataset.csv", priority: 1},
+  {description: "chw_care_plan:model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/datasets/CHW_Dataset.csv", priority: 1},
+  {description: "chw_care_plan:model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/datasets/CHW_Dataset.csv", priority: 1},
+
+  ########## Patient Communication and Education ##########
+
+  ### Providing Patient Education Resources ###
+
+  ### Delivering Personalized Care Instructions ###
+  {description: "starr_patient_instructions:model=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_none_none_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/starr-omop-personalized-care-instr/dataset_cases_qc.csv", priority: 1},
+  {description: "starr_patient_instructions:model=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/starr-omop-personalized-care-instr/dataset_cases_qc.csv", priority: 1},
+  {description: "starr_patient_instructions:model=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/starr-omop-personalized-care-instr/dataset_cases_qc.csv", priority: 1},
+  {description: "starr_patient_instructions:model=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/starr-omop-personalized-care-instr/dataset_cases_qc.csv", priority: 1},
+  {description: "starr_patient_instructions:model=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/starr-omop-personalized-care-instr/dataset_cases_qc.csv", priority: 1},
+  {description: "starr_patient_instructions:model=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,model_deployment=proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20,data_path=/share/pi/nigam/suhana/medhelm/data/starr-omop-personalized-care-instr/dataset_cases_qc.csv", priority: 1},
+  {description: "starr_patient_instructions:model=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,model_deployment=proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20,data_path=/share/pi/nigam/suhana/medhelm/data/starr-omop-personalized-care-instr/dataset_cases_qc.csv", priority: 1},
+
+  ### Patient-Provider Messaging ###
+
+
+  ### Enhancing Patient Understanding and Accessibility in Health Communication ###
+
+  ### Facilitating Patient Engagement and Support ###
+
+  ########## Medical Research Assistance ##########
+
+  ### Conducting Literature Research ###
+
+  ### Analyzing Clinical Research Data ###
+
+  ### Recording Research Processes ###
+
+
+  ### Ensuring Clinical Research Quality ###
+
+  ### Managing Research Enrollment ###
+
+  ########## Administration and Workflow ##########
+
+  ### Scheduling Resources and Staff ###
+
+
+  ### Overseeing Financial Activities ###
+
+  ### Care Coordination and Planning ###
+
+  ### Organizing Workflow Processes ###
+
+]

From 1b6398b17ea0b71e753184c8c03419019ba7a954 Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Wed, 22 Oct 2025 12:24:49 -0700
Subject: [PATCH 22/42] Rename run_entries_medhelm_private_proxy_tuning
 (1).conf to run_entries_medhelm_private_proxy_tuning.conf

---
 ...ing (1).conf => run_entries_medhelm_private_proxy_tuning.conf} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename src/helm/benchmark/presentation/{run_entries_medhelm_private_proxy_tuning (1).conf => run_entries_medhelm_private_proxy_tuning.conf} (100%)

diff --git a/src/helm/benchmark/presentation/run_entries_medhelm_private_proxy_tuning (1).conf b/src/helm/benchmark/presentation/run_entries_medhelm_private_proxy_tuning.conf
similarity index 100%
rename from src/helm/benchmark/presentation/run_entries_medhelm_private_proxy_tuning (1).conf
rename to src/helm/benchmark/presentation/run_entries_medhelm_private_proxy_tuning.conf

From 3a450bab80dcebf61313245e47d3a90fc4ca903e Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Fri, 24 Oct 2025 00:09:45 -0700
Subject: [PATCH 23/42] Update proxy_tuning_client.py

---
 src/helm/clients/proxy_tuning_client.py | 93 +++++++++++++++++--------
 1 file changed, 63 insertions(+), 30 deletions(-)

diff --git a/src/helm/clients/proxy_tuning_client.py b/src/helm/clients/proxy_tuning_client.py
index fb5cd721b63..5fed9c32ba8 100644
--- a/src/helm/clients/proxy_tuning_client.py
+++ b/src/helm/clients/proxy_tuning_client.py
@@ -24,14 +24,14 @@
 from datetime import datetime
 
 MODEL_PATHS = {
-    # download from huggingface 
     "llama-70b-chat": "/share/pi/ema2016/models/meta-llama/Llama-2-70b-chat-hf",
+    "llama-7b-chat": "/share/pi/ema2016/models/meta-llama/Llama-2-7b-chat-hf",
+    "llama-7b-base": "/share/pi/ema2016/models/meta-llama/Llama-2-7b-hf",
     "llama-13b-base": "/share/pi/ema2016/models/meta-llama/Llama-2-13b-hf",
-    "qwen3-30b": "/share/pi/ema2016/models/Qwen3-30B-A3B-Instruct-2507", 
-    # download from physionet -- https://physionet.org/content/me-llama/1.0.0/
     "mellama-13b-chat": "/share/pi/ema2016/models/me-llama/MeLLaMA-13B-chat",
+    "mellama-13b-base": "/share/pi/ema2016/models/me-llama/MeLLaMA-13B", 
     "mellama-70b-chat": "/share/pi/ema2016/models/me-llama/MeLLaMA-70B-chat",    
-    
+    "qwen3-30b": "/share/pi/ema2016/models/Qwen3-30B-A3B-Instruct-2507", 
 }
 
 LOCAL_RESULTS_DIR = "/share/pi/ema2016/users/sronaghi/proxy_tuning/results/medhelm"
@@ -103,7 +103,6 @@ def get_union_vocab(v1, v2):
         unique_tokens = []
         for v1_tokens, v2_tokens in zip(v1,v2):
             unique_tokens.append(list(set(v1_tokens.keys()) | set(v2_tokens.keys())))
-
         return unique_tokens
     
 
@@ -217,26 +216,26 @@ def __init__(
         self.antiexpert = None  
         self.tok_anti = None
         
-        print("loading base")
+        print("loading base model")
         
         self.base = AutoModelForCausalLM.from_pretrained(MODEL_PATHS[base_name], **model_kwargs)
         self.base.eval()
         self.tok_base  = base_tokenizer
-        print("done loading base")
+        print("done loading base model")
         
         if proxy or unite:
-            print("loading exp")
+            print("loading exp model")
             self.expert = AutoModelForCausalLM.from_pretrained(MODEL_PATHS[expert_name], **model_kwargs)
             self.expert.eval()
             self.tok_exp   = expert_tokenizer
-            print("done loading exp")
+            print("done loading exp model")
             
             if proxy:
-                print("loading anti")
+                print("loading anti model")
                 self.antiexpert = AutoModelForCausalLM.from_pretrained(MODEL_PATHS[antiexpert_name], **model_kwargs)
                 self.antiexpert.eval()
                 self.tok_anti  = anti_tokenizer
-                print("done loading anti")
+                print("done loading anti model")
         
         
         self.alpha = alpha
@@ -310,8 +309,9 @@ def generate(
         base_kwargs["attention_mask"] = base_attn
         base_kwargs["use_cache"] = True
         original_prompt_len = base_input_ids.shape[1]
-        print("1")
         
+        # this allows for generation using huggingface's generate function -- 
+        # it doesn't make a difference, but since i do manual generation in classification tasks in order to implement constrained decoding, i don't do this. 
 #         if not proxy and not unite:
 #             gen = self.base.generate(
 #                 input_ids=base_input_ids,
@@ -330,12 +330,22 @@ def generate(
             expert_input_ids, expert_attn, expert_text = self._encode_for_gen(self.tok_exp, prompt, device=self.expert.device)
             expert_kwargs = kwargs.copy()
             expert_kwargs["attention_mask"] = expert_attn
-            expert_kwargs["use_cache"] = True
+            expert_kwargs["use_cache"] = False
+            original_prompt_len_expert = expert_input_ids.shape[1]
+            expert_prompt_ids = expert_input_ids[0, :original_prompt_len_expert]
+            expert_prompt_decoded = self.tok_exp.decode(expert_prompt_ids, skip_special_tokens=True)
             if proxy:
                 antiexpert_input_ids, anti_attn, anto = self._encode_for_gen(self.tok_anti, prompt, device=self.antiexpert.device)
                 antiexpert_kwargs = kwargs.copy()
                 antiexpert_kwargs["attention_mask"] = anti_attn
-                antiexpert_kwargs["use_cache"] = True
+                antiexpert_kwargs["use_cache"] = False
+                original_prompt_len_antiexpert = antiexpert_input_ids.shape[1]
+                antiexpert_prompt_ids = antiexpert_input_ids[0, :original_prompt_len_antiexpert]
+                antiexpert_prompt_decoded = self.tok_anti.decode(antiexpert_prompt_ids, skip_special_tokens=True)
+                
+        if proxy and score_type == "logits":
+            expert_kwargs["use_cache"] = True
+            antiexpert_kwargs["use_cache"] = True
 
         # keep track of which sequences are already finished
         unfinished_sequences = torch.ones(1, dtype=torch.long, device=base_input_ids.device)
@@ -359,8 +369,10 @@ def generate(
             token_ids_out  = torch.empty(T, device=device, dtype=torch.int32)
             t_write = 0
         print("3")
- 
-        for step in range(max_new_tokens):      
+         
+        for step in range(max_new_tokens): 
+            if step == max_new_tokens - 1:
+                print("hit max tokens")
             base_inputs = self.base.prepare_inputs_for_generation(base_input_ids, **base_kwargs)
             base_outputs = self.base(**base_inputs, return_dict=True)
             base_next_token_logits = base_outputs.logits[..., -1, :]
@@ -432,28 +444,43 @@ def generate(
                         next_token_id1 = next_tokens.tolist()
                         next_token_id2 = list(next_token_id1)
                         next_token_id3 = list(next_token_id1)
-                       
             
+                        exp_step_ids = torch.as_tensor([next_token_id2],  device=expert_input_ids.device,   dtype=torch.long)
+                        expert_input_ids = torch.cat([expert_input_ids,     exp_step_ids[:,  None]], dim=-1)
+                        expert_kwargs = self._update_model_kwargs_for_generation(expert_outputs, expert_kwargs)
+                        anti_step_ids = torch.as_tensor([next_token_id3], device=antiexpert_input_ids.device, dtype=torch.long)
+                        antiexpert_input_ids = torch.cat([antiexpert_input_ids, anti_step_ids[:, None]], dim=-1)
+                        antiexpert_kwargs= self._update_model_kwargs_for_generation(antiexpert_outputs,antiexpert_kwargs)
+
             step_ids = torch.as_tensor(next_token_id1, device=base_input_ids.device, dtype=torch.long)
             base_input_ids = torch.cat([base_input_ids, step_ids[:, None]], dim=-1)
             base_kwargs = self._update_model_kwargs_for_generation(base_outputs, base_kwargs)
-
-            if proxy or unite:
-                exp_step_ids = torch.as_tensor(next_token_id2,  device=expert_input_ids.device,   dtype=torch.long)
-                expert_input_ids = torch.cat([expert_input_ids,     exp_step_ids[:,  None]], dim=-1)
-                expert_kwargs = self._update_model_kwargs_for_generation(expert_outputs, expert_kwargs)
-                if proxy:
-                    anti_step_ids = torch.as_tensor(next_token_id3, device=antiexpert_input_ids.device, dtype=torch.long)
-                    antiexpert_input_ids = torch.cat([antiexpert_input_ids, anti_step_ids[:, None]], dim=-1)
-                    antiexpert_kwargs= self._update_model_kwargs_for_generation(antiexpert_outputs,antiexpert_kwargs)
-            
             
+            if (proxy and score_type == "logprobs") or unite:
+                base_gen_ids = base_input_ids[0, original_prompt_len:]
+                base_gen_decoded = self.tok_base.decode(base_gen_ids, skip_special_tokens=True)
+                expert_input_decoded = expert_prompt_decoded + base_gen_decoded
+                expert_input_ids, expert_kwargs["attention_mask"], expert_text = self._encode_for_gen(self.tok_exp, expert_input_decoded, device=self.expert.device)
+                if proxy:              
+                    antiexpert_input_decoded = antiexpert_prompt_decoded + base_gen_decoded
+                    antiexpert_input_ids, antiexpert_kwargs["attention_mask"], antiexpert_text = self._encode_for_gen(self.tok_exp, antiexpert_input_decoded, device=self.expert.device)
+                    
+                if step < 10:
+                    print(f"\n=== Step {step} ===")
+                    print(f"Base decoded: {self.tok_base.decode(base_input_ids[0], skip_special_tokens=False)}")
+                    if proxy or unite:
+                        print(f"Expert decoded: {self.tok_exp.decode(expert_input_ids[0], skip_special_tokens=False)}")
+                    if proxy:
+                        print(f"Anti-expert decoded: {self.tok_exp.decode(antiexpert_input_ids[0], skip_special_tokens=False)}")
+                    print(f"---")
+                
+
+
             at_eos = (step_ids == eos_token_id_tensor[0]).long()
             unfinished_sequences = unfinished_sequences * (1 - at_eos)
             if unfinished_sequences.max() == 0:
                 break
 
-        print("4")
         gen_ids = base_input_ids[0, original_prompt_len:]
         generation = self.tok_base.decode(gen_ids, skip_special_tokens=True)
         
@@ -473,7 +500,6 @@ def generate(
             }]
             return generation, results
         
-        print("5")
         return generation
 
 def ensure_dir(d):
@@ -661,15 +687,22 @@ def __init__(
     def make_request(self, request: Request) -> RequestResult:
     
         prompt_text = request.prompt
+        max_new_tokens=750
+        
+
+        if request.max_tokens:
+            max_new_tokens=request.max_tokens
+            print("max_new_tokens: ", max_new_tokens)
 
         if request.messages:
+            print(request.messages)
             prompt_text = " ".join(msg["content"] for msg in request.messages if msg.get("role") != "system")
             
         # progress = tqdm.tqdm(total=1, desc="Generating Completions")
         print("doing a generation", flush=True)
         generation = self.any_model.generate(
             prompt = prompt_text,
-            max_new_tokens = 700,
+            max_new_tokens = max_new_tokens,
             alpha = self.alpha, 
             return_logits_for_analysis = False,
             score_type = self.score_type,

From 1cde06af643a0417867549d7fc5901830b006fb6 Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Tue, 28 Oct 2025 12:20:48 -0700
Subject: [PATCH 24/42] Update medhelm_run_specs.py

---
 .../benchmark/run_specs/medhelm_run_specs.py  | 25 +++++++++++++------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/src/helm/benchmark/run_specs/medhelm_run_specs.py b/src/helm/benchmark/run_specs/medhelm_run_specs.py
index 8ec30bcd698..0ec0f0642ac 100644
--- a/src/helm/benchmark/run_specs/medhelm_run_specs.py
+++ b/src/helm/benchmark/run_specs/medhelm_run_specs.py
@@ -122,7 +122,7 @@ def get_medcalc_bench_spec() -> RunSpec:
 
 
 @run_spec_function("clear")
-def get_clear_spec(condition: str, data_path: str) -> RunSpec:
+def get_clear_spec(condition: str, data_path: str, f1: bool) -> RunSpec:
     scenario_spec = ScenarioSpec(
         class_name="helm.benchmark.scenarios.clear_scenario.CLEARScenario",
         args={
@@ -146,13 +146,22 @@ def get_clear_spec(condition: str, data_path: str) -> RunSpec:
         max_tokens=1,
     )
 
-    return RunSpec(
-        name=f"clear:condition={condition}",
-        scenario_spec=scenario_spec,
-        adapter_spec=adapter_spec,
-        metric_specs=get_f1_metric_specs(),
-        groups=["clear"],
-    )
+    if f1:
+        return RunSpec(
+            name=f"clear:condition={condition}",
+            scenario_spec=scenario_spec,
+            adapter_spec=adapter_spec,
+            metric_specs=get_f1_metric_specs(),
+            groups=["clear"],
+        )
+    else:
+        return RunSpec(
+            name=f"clear:condition={condition}",
+            scenario_spec=scenario_spec,
+            adapter_spec=adapter_spec,
+            metric_specs=get_exact_match_metric_specs(),
+            groups=["clear"],
+        )
 
 
 @run_spec_function("mtsamples_replicate")

From 5d92c1bf70548570c82b967ad0fe5c44a9ddb148 Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Tue, 28 Oct 2025 12:24:02 -0700
Subject: [PATCH 25/42] Update clear_scenario.py

---
 src/helm/benchmark/scenarios/clear_scenario.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/helm/benchmark/scenarios/clear_scenario.py b/src/helm/benchmark/scenarios/clear_scenario.py
index 2d6bdb5d3d1..01f76e8c35b 100644
--- a/src/helm/benchmark/scenarios/clear_scenario.py
+++ b/src/helm/benchmark/scenarios/clear_scenario.py
@@ -78,7 +78,7 @@ class CLEARScenario(Scenario):
         "unemployment": "unemployment",
     }
 
-    def __init__(self, condition: str, data_path: str):
+    def __init__(self, condition: str, data_path: str, f1: bool):
         """Initialize the scenario with a specific medical condition"""
         super().__init__()
 

From 74bb784eca340c1a2d4131354a812d14b58e8d06 Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Tue, 28 Oct 2025 12:30:17 -0700
Subject: [PATCH 26/42] Update clear_scenario.py

---
 src/helm/benchmark/scenarios/clear_scenario.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/helm/benchmark/scenarios/clear_scenario.py b/src/helm/benchmark/scenarios/clear_scenario.py
index 01f76e8c35b..2d6bdb5d3d1 100644
--- a/src/helm/benchmark/scenarios/clear_scenario.py
+++ b/src/helm/benchmark/scenarios/clear_scenario.py
@@ -78,7 +78,7 @@ class CLEARScenario(Scenario):
         "unemployment": "unemployment",
     }
 
-    def __init__(self, condition: str, data_path: str, f1: bool):
+    def __init__(self, condition: str, data_path: str):
         """Initialize the scenario with a specific medical condition"""
         super().__init__()
 

From e22366d27356736b7ffe95ac7a7c85452e42fecc Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Tue, 28 Oct 2025 12:30:59 -0700
Subject: [PATCH 27/42] Update medhelm_run_specs.py

---
 .../benchmark/run_specs/medhelm_run_specs.py  | 25 ++++++-------------
 1 file changed, 8 insertions(+), 17 deletions(-)

diff --git a/src/helm/benchmark/run_specs/medhelm_run_specs.py b/src/helm/benchmark/run_specs/medhelm_run_specs.py
index 0ec0f0642ac..b7920a3a3c9 100644
--- a/src/helm/benchmark/run_specs/medhelm_run_specs.py
+++ b/src/helm/benchmark/run_specs/medhelm_run_specs.py
@@ -122,7 +122,7 @@ def get_medcalc_bench_spec() -> RunSpec:
 
 
 @run_spec_function("clear")
-def get_clear_spec(condition: str, data_path: str, f1: bool) -> RunSpec:
+def get_clear_spec(condition: str, data_path: str) -> RunSpec:
     scenario_spec = ScenarioSpec(
         class_name="helm.benchmark.scenarios.clear_scenario.CLEARScenario",
         args={
@@ -146,22 +146,13 @@ def get_clear_spec(condition: str, data_path: str, f1: bool) -> RunSpec:
         max_tokens=1,
     )
 
-    if f1:
-        return RunSpec(
-            name=f"clear:condition={condition}",
-            scenario_spec=scenario_spec,
-            adapter_spec=adapter_spec,
-            metric_specs=get_f1_metric_specs(),
-            groups=["clear"],
-        )
-    else:
-        return RunSpec(
-            name=f"clear:condition={condition}",
-            scenario_spec=scenario_spec,
-            adapter_spec=adapter_spec,
-            metric_specs=get_exact_match_metric_specs(),
-            groups=["clear"],
-        )
+    return RunSpec(
+        name=f"clear:condition={condition}",
+        scenario_spec=scenario_spec,
+        adapter_spec=adapter_spec,
+        metric_specs=get_exact_match_metric_specs(),
+        groups=["clear"],
+    )
 
 
 @run_spec_function("mtsamples_replicate")

From b089298351585c5dfee3e03b6a5ff74f8be0af54 Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Tue, 28 Oct 2025 12:34:07 -0700
Subject: [PATCH 28/42] Update tokenizer_configs.yaml

---
 src/helm/config/tokenizer_configs.yaml | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/src/helm/config/tokenizer_configs.yaml b/src/helm/config/tokenizer_configs.yaml
index be908a7e93d..8edbf45fefe 100644
--- a/src/helm/config/tokenizer_configs.yaml
+++ b/src/helm/config/tokenizer_configs.yaml
@@ -1286,19 +1286,4 @@ tokenizer_configs:
       class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
     end_of_text_token: "</s>"
     prefix_token: "<s>"
-    
-  - name: proxy_tuning/llama-7b-chat
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: /share/pi/ema2016/models/meta-llama/Llama-2-7b-chat-hf
-    end_of_text_token: "</s>"
-    prefix_token: "<s>"
 
-  - name: proxy_tuning/qwen3-30b
-    tokenizer_spec:
-      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
-      args:
-        pretrained_model_name_or_path: /share/pi/ema2016/models/Qwen3-30B-A3B-Instruct-2507
-    end_of_text_token: "<|im_end|>"
-    prefix_token: "<|im_start|>"

From 396bbeec0af10941ffdd6c56631eb3108ced94c2 Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Tue, 28 Oct 2025 12:35:19 -0700
Subject: [PATCH 29/42] Update model_deployments.yaml

---
 src/helm/config/model_deployments.yaml | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/helm/config/model_deployments.yaml b/src/helm/config/model_deployments.yaml
index 318b8912c66..946c58fa3e5 100644
--- a/src/helm/config/model_deployments.yaml
+++ b/src/helm/config/model_deployments.yaml
@@ -5111,63 +5111,63 @@ model_deployments:
         
   - name: proxy_tuning/llama-70b-chat_none_none_1.0_logits_20
     model_name: proxy_tuning/llama-70b-chat_none_none_1.0_logits_20
-    tokenizer_name: proxy_tuning/llama-7b-chat
+    tokenizer_name: meta-llama/Llama-2-7b-hf
     max_sequence_length: 4096
     client_spec:
       class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
 
   - name: proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20
     model_name: proxy_tuning/mellama-70b-chat_none_none_1.0_logprobs_20
-    tokenizer_name: proxy_tuning/llama-7b-chat
+    tokenizer_name: meta-llama/Llama-2-7b-hf
     max_sequence_length: 4096
     client_spec:
       class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
 
   - name: proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20
     model_name: proxy_tuning/mellama-13b-chat_none_none_1.0_logprobs_20
-    tokenizer_name: proxy_tuning/llama-7b-chat
+    tokenizer_name: meta-llama/Llama-2-7b-hf
     max_sequence_length: 4096
     client_spec:
       class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
       
   - name: proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20
     model_name: proxy_tuning/qwen3-30b_none_none_1.0_logprobs_20
-    tokenizer_name: proxy_tuning/qwen3-30b
+    tokenizer_name: qwen/qwen2.5-7b-instruct
     max_sequence_length: 4096
     client_spec:
       class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
      
   - name: proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20
     model_name: proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_1.0_logprobs_20
-    tokenizer_name: proxy_tuning/qwen3-30b
+    tokenizer_name: qwen/qwen2.5-7b-instruct
     max_sequence_length: 4096
     client_spec:
       class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
 
   - name: proxy_tuning/qwen3-30b_mellama-13b-base_llama-13b-base_1.0_logprobs_20
     model_name: proxy_tuning/qwen3-30b_mellama-13b-base_llama-13b-base_1.0_logprobs_20
-    tokenizer_name: proxy_tuning/qwen3-30b
+    tokenizer_name: qwen/qwen2.5-7b-instruct
     max_sequence_length: 4096
     client_spec:
       class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
 
   - name: proxy_tuning/qwen3-30b_mellama-13b-chat_mellama-13b-base_1.0_logprobs_20
     model_name: proxy_tuning/qwen3-30b_mellama-13b-chat_mellama-13b-base_1.0_logprobs_20
-    tokenizer_name: proxy_tuning/qwen3-30b
+    tokenizer_name: qwen/qwen2.5-7b-instruct
     max_sequence_length: 4096
     client_spec:
       class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
 
   - name: proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20
     model_name: proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_1.0_logits_20
-    tokenizer_name: proxy_tuning/llama-7b-chat
+    tokenizer_name: meta-llama/Llama-2-7b-hf
     max_sequence_length: 4096
     client_spec:
       class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"
 
   - name: proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20
     model_name: proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20
-    tokenizer_name: proxy_tuning/qwen3-30b
+    tokenizer_name: qwen/qwen2.5-7b-instruct
     max_sequence_length: 4096
     client_spec:
       class_name: "helm.clients.proxy_tuning_client.ProxyTuningClient"

From b24bda5c77c2b9ebd28d8da976327972ab413414 Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Tue, 28 Oct 2025 12:36:34 -0700
Subject: [PATCH 30/42] Update tokenizer_configs.yaml

---
 src/helm/config/tokenizer_configs.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/helm/config/tokenizer_configs.yaml b/src/helm/config/tokenizer_configs.yaml
index 8edbf45fefe..7232fc6b7d1 100644
--- a/src/helm/config/tokenizer_configs.yaml
+++ b/src/helm/config/tokenizer_configs.yaml
@@ -1286,4 +1286,3 @@ tokenizer_configs:
       class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
     end_of_text_token: "</s>"
     prefix_token: "<s>"
-

From b4ce130d29779b6c5acb508df126cca1ffc94cc2 Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Tue, 28 Oct 2025 12:42:37 -0700
Subject: [PATCH 31/42] Update proxy_tuning_client.py

---
 src/helm/clients/proxy_tuning_client.py | 316 +++++++++---------------
 1 file changed, 121 insertions(+), 195 deletions(-)

diff --git a/src/helm/clients/proxy_tuning_client.py b/src/helm/clients/proxy_tuning_client.py
index 5fed9c32ba8..0df0bb12e2d 100644
--- a/src/helm/clients/proxy_tuning_client.py
+++ b/src/helm/clients/proxy_tuning_client.py
@@ -1,8 +1,5 @@
 # File: helm/clients/proxy_tuning_client.py
 from helm.clients.client import Client
-from helm.common.cache import CacheConfig
-from helm.tokenizers.tokenizer import Tokenizer
-from helm.common.cache import Cache
 from helm.common.request import Request, RequestResult, GeneratedOutput
 
 from typing import Optional, Dict, Any, List
@@ -15,12 +12,7 @@
 import tqdm
 from transformers import BitsAndBytesConfig
 import math
-
-# from lmformatenforcer.integrations.transformers import build_transformers_prefix_allowed_tokens_fn, build_token_enforcer_tokenizer_data
-# from pydantic import BaseModel
-
 from typing import Literal
-
 from datetime import datetime
 
 MODEL_PATHS = {
@@ -153,18 +145,16 @@ def unite_add(v1, v2, lamda, tokenizer):
                 next_token_id2.append(element_v2[item1][1])
             i+=1
     return next_token_id1, next_token_id2
-    
 
 
-# capt logit probability arithmetic
-def capt_add(v1, v2, v3, tokenizer, alpha, device=None):
+def capt_add(v1, v2, v3, alpha):
     next_token_id1, next_token_id2, next_token_id3 = [], [], []
-    comb_ids_per_batch, comb_scores_per_batch = [], []
+    base_lp_chosen = dexpert_lp_chosen = None
 
     for element_v1, element_v2, element_v3 in zip(v1, v2, v3):
 
         v_new = {}
-
+        
         for token1 in element_v1:
             v_new[token1] = [
                 element_v1[token1][0] +
@@ -180,18 +170,21 @@ def capt_add(v1, v2, v3, tokenizer, alpha, device=None):
                 next_token_id1.append(element_v1[item1][1])
                 next_token_id2.append(element_v2[item1][1])
                 next_token_id3.append(element_v3[item1][1])
+                base_lp_chosen  = element_v1[item1][0]  
+                dexpert_lp_chosen = v_new[item1][0] 
+                if torch.is_tensor(dexpert_lp_chosen):
+                    dexpert_lp_chosen = dexpert_lp_chosen.item()
+
+
             i += 1
-        ids    = torch.tensor([v_new[t][1] for t in v_new], dtype=torch.long, device=device)
-        scores = torch.tensor([v_new[t][0] for t in v_new], dtype=torch.float32, device=device)
-        comb_ids_per_batch.append(ids)
-        comb_scores_per_batch.append(scores)
-    return next_token_id1, next_token_id2, next_token_id3, comb_ids_per_batch, comb_scores_per_batch
+            
+    print("capt add is returning: " , next_token_id1, next_token_id2, next_token_id3, base_lp_chosen, dexpert_lp_chosen)
+    return next_token_id1, next_token_id2, next_token_id3, base_lp_chosen, dexpert_lp_chosen
 
 
 def add_pad_token(tok, padding_side="left"):
     # Ensure pad token exists and set padding side
     if tok.pad_token_id is None:
-        # Prefer to reuse eos as pad when no pad is defined
         tok.pad_token = tok.eos_token
     tok.padding_side = padding_side
     return tok
@@ -202,9 +195,6 @@ def __init__(
         base_name,
         expert_name,
         antiexpert_name,
-        base_tokenizer, 
-        expert_tokenizer, 
-        anti_tokenizer,
         alpha: float = 1.0,
         unite: bool = False, 
         proxy: bool = False,
@@ -218,33 +208,50 @@ def __init__(
         
         print("loading base model")
         
+        if base_name in ["mellama-13b-chat", "mellama-13b-base"]:
+            self.tok_base = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-7b-base"], use_fast=True)     
+        elif base_name in ["mellama-70b-chat"]:
+            self.tok_base = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-7b-chat"], use_fast=True)  
+        else:
+            self.tok_base = AutoTokenizer.from_pretrained(MODEL_PATHS[base_name], use_fast=True)   
+
+        self.tok_base = add_pad_token(self.tok_base)
+
+        print("done loading base tok", flush=True)
+        
         self.base = AutoModelForCausalLM.from_pretrained(MODEL_PATHS[base_name], **model_kwargs)
         self.base.eval()
-        self.tok_base  = base_tokenizer
+      
         print("done loading base model")
         
         if proxy or unite:
+            print("loading exp tok", flush=True)
+            self.tok_exp = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-7b-base"], use_fast=True)     
+            self.tok_exp = add_pad_token(self.tok_exp)
+            print("done loading exp tok")
             print("loading exp model")
             self.expert = AutoModelForCausalLM.from_pretrained(MODEL_PATHS[expert_name], **model_kwargs)
             self.expert.eval()
-            self.tok_exp   = expert_tokenizer
             print("done loading exp model")
             
             if proxy:
+                print("loading anti tok", flush=True)
+                self.tok_anti = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-7b-base"], use_fast=True) 
+                self.tok_anti = add_pad_token(self.tok_anti)
+                print("done loading anti tok", flush=True)
+    
                 print("loading anti model")
                 self.antiexpert = AutoModelForCausalLM.from_pretrained(MODEL_PATHS[antiexpert_name], **model_kwargs)
                 self.antiexpert.eval()
-                self.tok_anti  = anti_tokenizer
                 print("done loading anti model")
         
-        
         self.alpha = alpha
         self.device = self.base.device
 
     
-    def _encode_for_gen(self, tok, prompt: str, device=None):
+    def _encode_for_gen(self, tok, prompt: str, device=None, no_chat=False):
         text = prompt
-        if getattr(tok, "chat_template", None):
+        if not no_chat and getattr(tok, "chat_template", None):
             messages = [{"role": "user", "content": prompt}]
             text = tok.apply_chat_template(
                 messages,
@@ -298,11 +305,9 @@ def generate(
         k=20,
         unite: bool = False,
         proxy: bool = False,
-        prefix_allowed_tokens_fn=None,
-        prefix_allowed_tokens_fn_exp=None,
         **kwargs
     ):
-        # print("prompt: ", prompt)
+        logit_results = None
         base_input_ids, base_attn, text = self._encode_for_gen(self.tok_base, prompt, device=self.base.device)
         print("prompt with (potential) instruction tag: ", text)
         base_kwargs = kwargs.copy()
@@ -310,20 +315,19 @@ def generate(
         base_kwargs["use_cache"] = True
         original_prompt_len = base_input_ids.shape[1]
         
-        # this allows for generation using huggingface's generate function -- 
-        # it doesn't make a difference, but since i do manual generation in classification tasks in order to implement constrained decoding, i don't do this. 
-#         if not proxy and not unite:
-#             gen = self.base.generate(
-#                 input_ids=base_input_ids,
-#                 attention_mask=base_attn,
-#                 max_new_tokens=max_new_tokens,
-#                 do_sample=False,
-#                 eos_token_id=self.tok_base.eos_token_id,
-#                 pad_token_id=self.tok_base.pad_token_id,
-#             )
-#             gen_ids = gen[0, original_prompt_len:]
-#             generation = self.tok_base.decode(gen_ids, skip_special_tokens=True)
-#             return generation
+        # if not proxy or unite, do generation using huggingface's generate function -- 
+        if not proxy and not unite:
+            gen = self.base.generate(
+                input_ids=base_input_ids,
+                attention_mask=base_attn,
+                max_new_tokens=max_new_tokens,
+                do_sample=False,
+                eos_token_id=self.tok_base.eos_token_id,
+                pad_token_id=self.tok_base.pad_token_id,
+            )
+            gen_ids = gen[0, original_prompt_len:]
+            generation = self.tok_base.decode(gen_ids, skip_special_tokens=True)
+            return generation, logit_results
         
       
         if proxy or unite:
@@ -350,41 +354,23 @@ def generate(
         # keep track of which sequences are already finished
         unfinished_sequences = torch.ones(1, dtype=torch.long, device=base_input_ids.device)
         eos_token_id_tensor = torch.tensor([self.tok_base.eos_token_id], device=base_input_ids.device)
-        print("2")
         
         if return_logits_for_analysis:
             T = max_new_tokens
-            device = base_input_ids.device
-            # 1 x T buffers on GPU
-            p_dexperts = torch.empty(T, device=device, dtype=torch.bfloat16)
-            p_base     = torch.empty(T, device=device, dtype=torch.bfloat16)
-            p_expert   = torch.empty(T, device=device, dtype=torch.bfloat16)
-            p_anti     = torch.empty(T, device=device, dtype=torch.bfloat16)
-
-            preds_dexperts = torch.empty(T, device=device, dtype=torch.int32)
-            preds_base     = torch.empty(T, device=device, dtype=torch.int32)
-            preds_expert   = torch.empty(T, device=device, dtype=torch.int32)
-            preds_anti     = torch.empty(T, device=device, dtype=torch.int32)
-
-            token_ids_out  = torch.empty(T, device=device, dtype=torch.int32)
-            t_write = 0
-        print("3")
-         
+            tok_ids     = torch.empty(T, dtype=torch.int32,  device="cpu")
+            base_tok_ids = torch.empty(T, dtype=torch.int32,  device="cpu")
+            base_lps    = torch.empty(T, dtype=torch.float32, device="cpu")
+            dexpert_lps = torch.empty(T, dtype=torch.float32, device="cpu")
+            argdiffs    = torch.empty(T, dtype=torch.int8,    device="cpu")  # 0 or 1
+            t = 0
+
+
         for step in range(max_new_tokens): 
             if step == max_new_tokens - 1:
                 print("hit max tokens")
             base_inputs = self.base.prepare_inputs_for_generation(base_input_ids, **base_kwargs)
             base_outputs = self.base(**base_inputs, return_dict=True)
             base_next_token_logits = base_outputs.logits[..., -1, :]
-            
-            if prefix_allowed_tokens_fn:
-                mask = torch.full_like(base_next_token_logits, -math.inf) 
-                sent = base_input_ids[0]
-                prefix_allowed_tokens = prefix_allowed_tokens_fn(0, sent)
-                if len(prefix_allowed_tokens) == 0:
-                    raise ValueError("prefix_allowed_tokens_fn returned an empty list.")
-                mask[0, prefix_allowed_tokens] = 0
-                base_next_token_logits = base_next_token_logits + mask
                 
             next_token_id1 = next_token_id2 = next_token_id3 = None
             
@@ -433,7 +419,17 @@ def generate(
                         v_anti = get_top_k_tokens(antiexpert_lp, self.tok_anti, k=0)
                         v_anti = update_vocab(v_anti, v_base, self.tok_anti, antiexpert_lp, 'llama')
 
-                        next_token_id1, next_token_id2, next_token_id3, _, _ = capt_add(v_base, v_exp, v_anti, self.tok_base, alpha, device=base_input_ids.device)
+                        next_token_id1, next_token_id2, next_token_id3, base_lp_chosen, dexpert_lp_chosen = capt_add(v_base, v_exp, v_anti, alpha)
+                        if return_logits_for_analysis:
+                            base_argmax_id = int(torch.argmax(base_lp[0]).item())
+                            capt_choice_id = int(next_token_id1[0])
+                            tok_ids[t]     = capt_choice_id
+                            base_tok_ids[t]  = base_argmax_id
+                            base_lps[t]    = float(base_lp_chosen)
+                            dexpert_lps[t] = float(dexpert_lp_chosen)  
+                            argdiffs[t]    = 1 if base_argmax_id != capt_choice_id else 0
+                            t += 1
+                           
                     elif score_type == "logits":  # regular proxy tuning 
                         expert_next_token_logits = expert_next_token_logits[:, :base_next_token_logits.shape[-1]]
                         next_token_logits = (
@@ -445,10 +441,10 @@ def generate(
                         next_token_id2 = list(next_token_id1)
                         next_token_id3 = list(next_token_id1)
             
-                        exp_step_ids = torch.as_tensor([next_token_id2],  device=expert_input_ids.device,   dtype=torch.long)
+                        exp_step_ids = torch.as_tensor(next_token_id2,  device=expert_input_ids.device,   dtype=torch.long)
                         expert_input_ids = torch.cat([expert_input_ids,     exp_step_ids[:,  None]], dim=-1)
                         expert_kwargs = self._update_model_kwargs_for_generation(expert_outputs, expert_kwargs)
-                        anti_step_ids = torch.as_tensor([next_token_id3], device=antiexpert_input_ids.device, dtype=torch.long)
+                        anti_step_ids = torch.as_tensor(next_token_id3, device=antiexpert_input_ids.device, dtype=torch.long)
                         antiexpert_input_ids = torch.cat([antiexpert_input_ids, anti_step_ids[:, None]], dim=-1)
                         antiexpert_kwargs= self._update_model_kwargs_for_generation(antiexpert_outputs,antiexpert_kwargs)
 
@@ -460,18 +456,18 @@ def generate(
                 base_gen_ids = base_input_ids[0, original_prompt_len:]
                 base_gen_decoded = self.tok_base.decode(base_gen_ids, skip_special_tokens=True)
                 expert_input_decoded = expert_prompt_decoded + base_gen_decoded
-                expert_input_ids, expert_kwargs["attention_mask"], expert_text = self._encode_for_gen(self.tok_exp, expert_input_decoded, device=self.expert.device)
+                expert_input_ids, expert_kwargs["attention_mask"], expert_text = self._encode_for_gen(self.tok_exp, expert_input_decoded, device=self.expert.device, no_chat=True)
                 if proxy:              
                     antiexpert_input_decoded = antiexpert_prompt_decoded + base_gen_decoded
-                    antiexpert_input_ids, antiexpert_kwargs["attention_mask"], antiexpert_text = self._encode_for_gen(self.tok_exp, antiexpert_input_decoded, device=self.expert.device)
+                    antiexpert_input_ids, antiexpert_kwargs["attention_mask"], antiexpert_text = self._encode_for_gen(self.tok_anti, antiexpert_input_decoded, device=self.antiexpert.device,  no_chat=True)
                     
-                if step < 10:
+                if step < 15:
                     print(f"\n=== Step {step} ===")
                     print(f"Base decoded: {self.tok_base.decode(base_input_ids[0], skip_special_tokens=False)}")
                     if proxy or unite:
                         print(f"Expert decoded: {self.tok_exp.decode(expert_input_ids[0], skip_special_tokens=False)}")
                     if proxy:
-                        print(f"Anti-expert decoded: {self.tok_exp.decode(antiexpert_input_ids[0], skip_special_tokens=False)}")
+                        print(f"Anti-expert decoded: {self.tok_anti.decode(antiexpert_input_ids[0], skip_special_tokens=False)}")
                     print(f"---")
                 
 
@@ -484,102 +480,23 @@ def generate(
         gen_ids = base_input_ids[0, original_prompt_len:]
         generation = self.tok_base.decode(gen_ids, skip_special_tokens=True)
         
+        
         if proxy and return_logits_for_analysis:
-            sl = slice(0, t_write)
-            results = [{
-                'token_ids':        token_ids_out[sl],     # [T’] int32 (GPU)
-                'p_dexperts':       p_dexperts[sl],        # [T’] fp16  (GPU)
-                'preds_dexperts':   preds_dexperts[sl],    # [T’] int32 (GPU)
-                'p_base':           p_base[sl],
-                'preds_base':       preds_base[sl],
-                'p_expert':         p_expert[sl],
-                'preds_expert':     preds_expert[sl],
-                'p_antiexpert':     p_anti[sl],
-                'preds_antiexpert': preds_anti[sl],
-                # (optional) decode later if you want strings
-            }]
-            return generation, results
+            logit_results = {
+                "token_ids":   tok_ids[:t],
+                "base_tok_ids": base_tok_ids[:t], 
+                "base_lp":     base_lps[:t],
+                "dexpert_lp":  dexpert_lps[:t],
+                "argdiff":     argdiffs[:t],
+            }
+
         
-        return generation
+        return generation, logit_results
 
 def ensure_dir(d):
     if not os.path.exists(d):
         os.makedirs(d, exist_ok=True)
 
-def load_model_and_tokenizer(
-    base_name: str,
-    expert_name: str,
-    antiexpert_name: str,
-    device_map: str = "auto",
-    alpha: float = 1.0,
-    system_prompt: Optional[str] = None,
-    use_fast_tokenizer: bool = True,
-    padding_side: str = "left",
-    proxy: bool = False,
-    unite: bool = False,
-):          
-    bnb_cfg = BitsAndBytesConfig(
-        load_in_4bit=True,
-        bnb_4bit_use_double_quant=True,
-        bnb_4bit_quant_type="nf4",         
-        bnb_4bit_compute_dtype=torch.bfloat16,  
-    )
-
-    model_kwargs = {
-        'device_map': device_map,
-        'dtype': torch.bfloat16,
-        'quantization_config': bnb_cfg,
-        'low_cpu_mem_usage': True,
-        'trust_remote_code': True,
-    }
-    
-    print("loading base tok", flush=True)
-
-    
-    if base_name in ["mellama-13b-chat", "mellama-13b-base", "mellama-70b-chat"]:
-        tok_base = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-13b-base"], use_fast=use_fast_tokenizer)     
-    else:
-        tok_base = AutoTokenizer.from_pretrained(MODEL_PATHS[base_name], use_fast=use_fast_tokenizer)   
-
-    tok_base = add_pad_token(tok_base, padding_side)
-    
-    print("done loading base tok", flush=True)
-    
-    tok_exp = tok_anti = None
-    
-    if proxy or unite: 
-        print("loading exp tok", flush=True)
-        tok_exp = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-13b-base"], use_fast=use_fast_tokenizer) 
-        tok_exp = add_pad_token(tok_exp, padding_side)
-        print("done loading exp tok")
-        if proxy:
-            print("loading anti tok", flush=True)
-            tok_anti = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-13b-base"], use_fast=use_fast_tokenizer) 
-            tok_anti = add_pad_token(tok_anti, padding_side)
-            print("done loading anti tok", flush=True)
-    
-    
-    print ("creating any model", flush=True)
-    model = AnyModel(
-        base_name=base_name,
-        expert_name=expert_name,
-        antiexpert_name=antiexpert_name,
-        base_tokenizer=tok_base,
-        expert_tokenizer=tok_exp,
-        anti_tokenizer=tok_anti,
-        alpha=alpha,
-        proxy=proxy,
-        unite=unite,
-        model_kwargs=model_kwargs,
-    )
-    print ("created any model", flush=True)
-    
-    print(f"[Loader] Base   : {base_name}", flush=True)
-    print(f"[Loader] Expert : {expert_name}", flush=True)
-    print(f"[Loader] Anti   : {antiexpert_name}", flush=True)
-        
-    return model, tok_base
-
 # proxy tuning helpers
 
 def _safe_tag(model_name: str) -> str:
@@ -630,28 +547,18 @@ class ProxyTuningClient(Client):
 
     def __init__(
         self,
-        tokenizer: Tokenizer,
-        tokenizer_name: str,
-        cache_config: CacheConfig,
         model_name: str = None,
-        api_base: str = None,
-        api_key: str = None,
     ):
-        self.cache = Cache(cache_config)
         """
         Initializes the ProxyTuningClient.
 
-        Args:
-            tokenizer (Tokenizer): Tokenizer instance (unused but required by HELM interface).
-            tokenizer_name (str): Name of the tokenizer (unused but required by HELM interface).
-            cache_config (CacheConfig): Configuration for caching.
-
         """
         self.run_dir, self.token_log_path, self.logits_dir = setup_run_dirs(model_name)
         self.model_name = model_name
         self.run_id = datetime.now().strftime("%Y%m%d_%H%M%S")
         self.req_seq = 0
         tag = model_name.split("/")[-1]
+        self.return_logits_for_analysis = True
 
 
         parts = tag.split("_")
@@ -672,24 +579,43 @@ def __init__(
             else:
                 self.is_proxy = True
         
-        print ("loading model", flush=True)
-        self.any_model, self.hf_tokenizer = load_model_and_tokenizer(
+        
+        bnb_cfg = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_use_double_quant=True,
+            bnb_4bit_quant_type="nf4",         
+            bnb_4bit_compute_dtype=torch.bfloat16,  
+        )
+
+        model_kwargs = {
+            'device_map': "auto",
+            'dtype': torch.bfloat16,
+            'quantization_config': bnb_cfg,
+            'low_cpu_mem_usage': True,
+            'trust_remote_code': True,
+        }
+
+        print ("creating any model", flush=True)
+        self.any_model = AnyModel(
             base_name=base_name,
             expert_name=expert_name,
             antiexpert_name=antiexpert_name,
-            device_map='auto', 
-            proxy=self.is_proxy, 
-            unite=self.is_unite
-                
+            alpha=self.alpha,
+            proxy=self.is_proxy,
+            unite=self.is_unite,
+            model_kwargs=model_kwargs,
         )
-        print ("loaded model", flush=True)
+
+        print(f"[Loader] Base   : {base_name}", flush=True)
+        print(f"[Loader] Expert : {expert_name}", flush=True)
+        print(f"[Loader] Anti   : {antiexpert_name}", flush=True)
+
     
     def make_request(self, request: Request) -> RequestResult:
     
         prompt_text = request.prompt
         max_new_tokens=750
         
-
         if request.max_tokens:
             max_new_tokens=request.max_tokens
             print("max_new_tokens: ", max_new_tokens)
@@ -697,21 +623,21 @@ def make_request(self, request: Request) -> RequestResult:
         if request.messages:
             print(request.messages)
             prompt_text = " ".join(msg["content"] for msg in request.messages if msg.get("role") != "system")
-            
+           
         # progress = tqdm.tqdm(total=1, desc="Generating Completions")
         print("doing a generation", flush=True)
-        generation = self.any_model.generate(
+        
+        generation, logit_results = self.any_model.generate(
             prompt = prompt_text,
             max_new_tokens = max_new_tokens,
             alpha = self.alpha, 
-            return_logits_for_analysis = False,
+            return_logits_for_analysis = self.return_logits_for_analysis,
             score_type = self.score_type,
             k = self.k,
             unite = self.is_unite,
             proxy = self.is_proxy,
-            prefix_allowed_tokens_fn=None,
-            prefix_allowed_tokens_fn_exp=None,
         )
+        
 
         print("generation: ", generation, flush=True)
         
@@ -719,10 +645,10 @@ def make_request(self, request: Request) -> RequestResult:
         request_id = f"{self.run_id}_r{self.req_seq:04d}"
 
         logits_path = None
-#         if self.is_proxy and all_results:
-#             logits_path = os.path.join(self.logits_dir, f"logits_{request_id}.pt")
-#             torch.save(all_results, logits_path)
-#             print(f"[Logits] wrote {logits_path}")
+        if self.return_logits_for_analysis and logit_results:
+            logits_path = os.path.join(self.logits_dir, f"logits_{request_id}.pt")
+            torch.save(logit_results, logits_path)
+            print(f"[Logits] wrote {logits_path}")
 
         append_request_row(
             csv_path=self.token_log_path,

From db0ac7e08d8a5c12200389646ff705de3fdcfe0a Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Tue, 28 Oct 2025 13:04:02 -0700
Subject: [PATCH 32/42] Update proxy_tuning_client.py

---
 src/helm/clients/proxy_tuning_client.py | 63 +++++++++++++++++++++++++
 1 file changed, 63 insertions(+)

diff --git a/src/helm/clients/proxy_tuning_client.py b/src/helm/clients/proxy_tuning_client.py
index 0df0bb12e2d..b20d689ccf0 100644
--- a/src/helm/clients/proxy_tuning_client.py
+++ b/src/helm/clients/proxy_tuning_client.py
@@ -1,4 +1,67 @@
 # File: helm/clients/proxy_tuning_client.py
+"""
+Proxy-tuned HELM client
+=======================
+
+This module implements a HELM Client that routes generation through
+decoding-time strategies for domain-level adaptation. 
+It runs multiple models (base, expert, anti-expert).
+This is experimental code to test different decoding-time strategies. 
+
+Main classes
+------------
+- AnyModel: Thin wrapper that loads one or more HF models/tokenizers and
+  performs step-wise generation under three modes:
+  1) **Base** (single base model, runs using HF generate() function),
+  2) **Unite** (merge base + expert via vocabulary union arithmetic):
+    - adapted in from [this codebase](https://github.com/starrYYxuan/UniTE/)
+  3) **Proxy**:
+     - Original method adapted from [this codebase](https://github.com/alisawuffles/proxy-tuning/tree/main):
+         - base + alpha(expert − anti-expert) at the logit level with models of same vocabulary.
+     - Cross-Architecture Proxy Tuning (our novel method) 
+         - same formula as above using log-probs with models of differing vocabulary
+
+- ProxyTuningClient: A HELM client that parses the deployment tag to
+  configure `AnyModel`, runs generation for a given `Request`, and logs
+  per-request outputs and token-wise outputs for the proxy tuning method.
+
+Deployment/tag format
+---------------------
+The `model_name` (a.k.a. deployment tag) is expected to be of the form:
+    "proxy_tuning/{base}_{expert}_{antiexpert}_{alpha}_{score_type}_{k}"
+
+Examples:
+    proxy_tuning/mellama-13b-chat_none_none_1.0_logits_20 (base)         
+    proxy_tuning/qwen3-30b_mellama-13b-chat_none_1.0_logprobs_20 (unite)
+    proxy_tuning/llama-70b-chat_mellama-13b-chat_llama-13b-base_0.7_logits_10 (Original proxy, logits) 
+    proxy_tuning/qwen3-30b_mellama-13b-chat_llama-13b-base_0.7_logits_10 (CAPT proxy, logprobs) 
+
+Each sub-tag meaning:
+- base / expert / antiexpert: keys that must exist in `MODEL_PATHS` below
+  (use "none" to disable that role).
+  - if only base is not "none" --> base method
+  - if base and expert are not "none" --> unite method
+  - if base, expert, and antiexpert are not "none"  --> proxy method
+- alpha: float, strength of expert vs anti-expert adjustment.
+- score_type: "logits" (original proxy tuning) or "logprobs" (CAPT).
+- k: top-k token pool size when building the union vocabulary (for Unite
+  and CAPT).
+
+Artifacts & logging
+-------------------
+A results directory is created under:
+
+    LOCAL_RESULTS_DIR/<safe_tag>_<YYYYMMDD_HHMMSS>/
+
+Files inside:
+- `<safe_tag>_<stamp>.csv`  : One row per HELM request with columns:
+    timestamp, request_id, model_name, prompt, output, logits_path
+- `logits_analysis/`        : Optional per-request tensors (when
+  `return_logits_for_analysis=True`) saved via `torch.save(...)` as:
+    logits_<runid>_r####.pt
+    
+"""
+
 from helm.clients.client import Client
 from helm.common.request import Request, RequestResult, GeneratedOutput
 

From 0eb706fd9e4eda08dd332b1e1e094c2255eeed82 Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Tue, 28 Oct 2025 14:13:00 -0700
Subject: [PATCH 33/42] Update run_entries_medhelm_private_proxy_tuning.conf

---
 .../presentation/run_entries_medhelm_private_proxy_tuning.conf  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/helm/benchmark/presentation/run_entries_medhelm_private_proxy_tuning.conf b/src/helm/benchmark/presentation/run_entries_medhelm_private_proxy_tuning.conf
index 99c7e36968c..0c10f8b8443 100644
--- a/src/helm/benchmark/presentation/run_entries_medhelm_private_proxy_tuning.conf
+++ b/src/helm/benchmark/presentation/run_entries_medhelm_private_proxy_tuning.conf
@@ -1,4 +1,4 @@
-# MedHELM RunSpecs for the private benchmarks from Stanford.
+# MedHELM RunSpecs for the private benchmarks from Stanford with the inference-time domain adaptation models in the Proxy Tuning class.
 
 entries: [
 

From ff38c743bda0dcd8b87eafbcc614a02327ca0108 Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Tue, 28 Oct 2025 14:14:20 -0700
Subject: [PATCH 34/42] Update model_deployments.yaml

---
 src/helm/config/model_deployments.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/helm/config/model_deployments.yaml b/src/helm/config/model_deployments.yaml
index 946c58fa3e5..c4813d1267d 100644
--- a/src/helm/config/model_deployments.yaml
+++ b/src/helm/config/model_deployments.yaml
@@ -8,6 +8,8 @@
 #    # This file defines all the model deployments that you do not want to be public.
 #    model_deployments: [] # Leave empty to disable private model deployments
 
+# This file contains deployments for the Proxy Tuning class.
+
 model_deployments:
   - name: simple/model1
     model_name: simple/model1
@@ -5107,7 +5109,6 @@ model_deployments:
         dspy_module: ChainOfThought
         dspy_api_model: openai/o3-mini-2025-01-31
         dspy_api_base: null
- 
         
   - name: proxy_tuning/llama-70b-chat_none_none_1.0_logits_20
     model_name: proxy_tuning/llama-70b-chat_none_none_1.0_logits_20

From 7ee25c6b626fdcb79eff305196a2fcf91dbda886 Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Tue, 28 Oct 2025 14:15:26 -0700
Subject: [PATCH 35/42] Update model_metadata.yaml

---
 src/helm/config/model_metadata.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/helm/config/model_metadata.yaml b/src/helm/config/model_metadata.yaml
index d6c172cf142..6d616eb0cab 100644
--- a/src/helm/config/model_metadata.yaml
+++ b/src/helm/config/model_metadata.yaml
@@ -8,6 +8,8 @@
 #    # This file contains the metadata for private models
 #    models: [] # Leave empty to disable private models
 
+# This file contains model metadata for models using the ProxyTuning Class.
+
 
 models:
 

From 15398f6b43f1e5abf96a9eb0ab48b41842965530 Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Tue, 28 Oct 2025 14:15:58 -0700
Subject: [PATCH 36/42] Update model_metadata.yaml

---
 src/helm/config/model_metadata.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/helm/config/model_metadata.yaml b/src/helm/config/model_metadata.yaml
index 6d616eb0cab..8cd416d58c9 100644
--- a/src/helm/config/model_metadata.yaml
+++ b/src/helm/config/model_metadata.yaml
@@ -5232,7 +5232,6 @@ models:
     release_date: 2025-01-31
     tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
 
-
   - name: proxy_tuning/llama-70b-chat_none_none_1.0_logits_20
     display_name: proxy_tuning/llama-70b-chat_none_none_1.0_logits_20
     description: llama-70b-chat. 

From 9856941eba4cbb38cf64eaab6838a1c3445e93b5 Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Wed, 29 Oct 2025 13:46:40 -0700
Subject: [PATCH 37/42] Update proxy_tuning_client.py

---
 src/helm/clients/proxy_tuning_client.py | 576 ++----------------------
 1 file changed, 39 insertions(+), 537 deletions(-)

diff --git a/src/helm/clients/proxy_tuning_client.py b/src/helm/clients/proxy_tuning_client.py
index b20d689ccf0..f3e9a6e5ef1 100644
--- a/src/helm/clients/proxy_tuning_client.py
+++ b/src/helm/clients/proxy_tuning_client.py
@@ -1,4 +1,3 @@
-# File: helm/clients/proxy_tuning_client.py
 """
 Proxy-tuned HELM client
 =======================
@@ -10,8 +9,7 @@
 
 Main classes
 ------------
-- AnyModel: Thin wrapper that loads one or more HF models/tokenizers and
-  performs step-wise generation under three modes:
+- AnyModel: Imported from any_model.py in proxy_tuning directory. Performs step-wise generation under three modes:
   1) **Base** (single base model, runs using HF generate() function),
   2) **Unite** (merge base + expert via vocabulary union arithmetic):
     - adapted in from [this codebase](https://github.com/starrYYxuan/UniTE/)
@@ -47,8 +45,8 @@
 - k: top-k token pool size when building the union vocabulary (for Unite
   and CAPT).
 
-Artifacts & logging
--------------------
+Artifacts
+---------
 A results directory is created under:
 
     LOCAL_RESULTS_DIR/<safe_tag>_<YYYYMMDD_HHMMSS>/
@@ -64,508 +62,32 @@
 
 from helm.clients.client import Client
 from helm.common.request import Request, RequestResult, GeneratedOutput
-
-from typing import Optional, Dict, Any, List
-import torch, os, json
-from transformers import AutoModelForCausalLM, AutoTokenizer
-import torch.nn.functional as F
-from transformers.generation.utils import (
-    ModelOutput,
-)
-import tqdm
-from transformers import BitsAndBytesConfig
-import math
-from typing import Literal
+import os
+import sys
+import torch
 from datetime import datetime
 
-MODEL_PATHS = {
-    "llama-70b-chat": "/share/pi/ema2016/models/meta-llama/Llama-2-70b-chat-hf",
-    "llama-7b-chat": "/share/pi/ema2016/models/meta-llama/Llama-2-7b-chat-hf",
-    "llama-7b-base": "/share/pi/ema2016/models/meta-llama/Llama-2-7b-hf",
-    "llama-13b-base": "/share/pi/ema2016/models/meta-llama/Llama-2-13b-hf",
-    "mellama-13b-chat": "/share/pi/ema2016/models/me-llama/MeLLaMA-13B-chat",
-    "mellama-13b-base": "/share/pi/ema2016/models/me-llama/MeLLaMA-13B", 
-    "mellama-70b-chat": "/share/pi/ema2016/models/me-llama/MeLLaMA-70B-chat",    
-    "qwen3-30b": "/share/pi/ema2016/models/Qwen3-30B-A3B-Instruct-2507", 
-}
-
-LOCAL_RESULTS_DIR = "/share/pi/ema2016/users/sronaghi/proxy_tuning/results/medhelm"
-# helpers adapted from unite 
-
-def update_vocab(v1, vu, tokenizer, logits, model_name):
-    for vu_token, v1_token, logit_ele in zip(vu,v1,logits):
-        v1_token_ids = []
-        for item in v1_token.values():
-            v1_token_ids.append(item[1])
-        for token in vu_token:  
-            if token not in v1_token.keys():
-                if 'llama' in model_name.lower():
-                    token = token.replace('Ġ','▁')        
-                if token != '':
-                    subtoken_id = tokenizer.convert_tokens_to_ids(token)
-                    if subtoken_id != 0 and subtoken_id != None: #Mistral and Llama2 oov id 0
-                        logit = logit_ele[subtoken_id]
-                    else:
-                        subtokens = tokenizer.tokenize(token)
-                        for token_id in tokenizer.convert_tokens_to_ids(subtokens):
-                            #if 'llama2' in model_name:
-                            if 'llama' in model_name.lower():
-                                if token_id != 29871:
-                                    subtoken_id = token_id
-                                    break
-                            else:
-                                subtoken_id = token_id
-                                break
-                        logit = logit_ele[subtoken_id]
-                else:
-                    if 'qwen' in model_name.lower():
-                        logit = logit_ele[220]
-                        subtoken_id = 220
-                    if 'llama' in model_name.lower():
-                        logit = logit_ele[29871]
-                        subtoken_id = 29871
-
-                if 'llama' in model_name.lower():
-                    v1_token[token.replace('▁', 'Ġ')] = [logit, subtoken_id]
-                else:
-                    if subtoken_id not in v1_token_ids:
-                        v1_token[token] = [logit, subtoken_id]
-                        v1_token_ids.append(subtoken_id)
-                    else:
-                        v1_token[token] = [0, subtoken_id] 
-    v1_new = v1
-    return v1_new
-
-def vocab_softmax(v1):
-        v1_new = []
-        for element in v1:
-            ele = {}
-            ele_values = list(element.values())
-            ele_values0, ele_values1 = [], []
-            for item in ele_values:
-                ele_values0.append(item[0])
-                ele_values1.append(item[1])
-            ele_values0 = torch.softmax(torch.tensor(ele_values0), dim=0)
-            for token, prob, ids in zip(element.keys(),ele_values0,ele_values1):
-                ele[token] = [prob, ids]
-            v1_new.append(ele)
-
-        return v1_new
-    
-    
-def get_union_vocab(v1, v2):
-    # Extract unique tokens from both dictionaries
-        unique_tokens = []
-        for v1_tokens, v2_tokens in zip(v1,v2):
-            unique_tokens.append(list(set(v1_tokens.keys()) | set(v2_tokens.keys())))
-        return unique_tokens
-    
+# Make the top-level proxy_tuning dir importable
+sys.path.insert(0, "/share/pi/ema2016/users/sronaghi/proxy_tuning")
 
-def get_top_k_tokens(logits, tokenizer, k=10):
-    probs = logits
-
-    top_k_indices = torch.topk(probs, k).indices
-    probs = probs.tolist()
-    top_k_probs = []
-    for idx, prob in zip(top_k_indices,probs):
-        prob_item = []
-        for i in idx:
-            prob_item.append(prob[i])
-        top_k_probs.append(prob_item)
-
-    top_k_tokens = []
-    for indices in top_k_indices:
-        token_item = []
-        for idx in indices:
-            token_item.append(tokenizer.convert_ids_to_tokens(idx.item(), skip_special_tokens=True))
-        top_k_tokens.append(token_item)
-
-    v1 = []
-    for token, prob, id in zip(top_k_tokens, top_k_probs, top_k_indices):
-        v1.append(
-            {token.replace('▁','Ġ').replace('<0x0A>','/n').replace('Ċ','/n'): [prob, int(id)] for token, prob, id in zip(token, prob, id)})
-
-    return v1
-
-# unite logit probability arithmetic
-    
-def unite_add(v1, v2, lamda, tokenizer):
-    next_token_id1, next_token_id2 = [], []
-    for element_v1, element_v2 in zip(v1, v2):
-        assert len(element_v1) == len(element_v2)
-        v_new = {}
-        for token1 in element_v1:
-            v_new[token1] = [lamda * element_v1[token1][0] + (1 - lamda) * element_v2[token1][0],
-                             element_v1[token1][1]]
-        probs = []
-        for item in v_new.values():
-            probs.append(item[0])
-        sample_index = probs.index(max(probs))
-        i = 0
-        for item1 in v_new.keys():
-            if i == sample_index:
-                next_token_id1.append(element_v1[item1][1])
-                next_token_id2.append(element_v2[item1][1])
-            i+=1
-    return next_token_id1, next_token_id2
-
-
-def capt_add(v1, v2, v3, alpha):
-    next_token_id1, next_token_id2, next_token_id3 = [], [], []
-    base_lp_chosen = dexpert_lp_chosen = None
-
-    for element_v1, element_v2, element_v3 in zip(v1, v2, v3):
-
-        v_new = {}
-        
-        for token1 in element_v1:
-            v_new[token1] = [
-                element_v1[token1][0] +
-                (alpha * (element_v2[token1][0] - element_v3[token1][0])),
-                element_v1[token1][1]
-            ]
-
-        probs = [item[0] for item in v_new.values()]
-        sample_index = probs.index(max(probs))
-        i = 0
-        for item1 in v_new.keys():
-            if i == sample_index:
-                next_token_id1.append(element_v1[item1][1])
-                next_token_id2.append(element_v2[item1][1])
-                next_token_id3.append(element_v3[item1][1])
-                base_lp_chosen  = element_v1[item1][0]  
-                dexpert_lp_chosen = v_new[item1][0] 
-                if torch.is_tensor(dexpert_lp_chosen):
-                    dexpert_lp_chosen = dexpert_lp_chosen.item()
-
-
-            i += 1
-            
-    print("capt add is returning: " , next_token_id1, next_token_id2, next_token_id3, base_lp_chosen, dexpert_lp_chosen)
-    return next_token_id1, next_token_id2, next_token_id3, base_lp_chosen, dexpert_lp_chosen
-
-
-def add_pad_token(tok, padding_side="left"):
-    # Ensure pad token exists and set padding side
-    if tok.pad_token_id is None:
-        tok.pad_token = tok.eos_token
-    tok.padding_side = padding_side
-    return tok
-
-class AnyModel:
-    def __init__(
-        self,
-        base_name,
-        expert_name,
-        antiexpert_name,
-        alpha: float = 1.0,
-        unite: bool = False, 
-        proxy: bool = False,
-        model_kwargs: Dict[str, Any] = None
-    ):
-        
-        self.expert = None  
-        self.tok_exp = None
-        self.antiexpert = None  
-        self.tok_anti = None
-        
-        print("loading base model")
-        
-        if base_name in ["mellama-13b-chat", "mellama-13b-base"]:
-            self.tok_base = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-7b-base"], use_fast=True)     
-        elif base_name in ["mellama-70b-chat"]:
-            self.tok_base = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-7b-chat"], use_fast=True)  
-        else:
-            self.tok_base = AutoTokenizer.from_pretrained(MODEL_PATHS[base_name], use_fast=True)   
-
-        self.tok_base = add_pad_token(self.tok_base)
-
-        print("done loading base tok", flush=True)
-        
-        self.base = AutoModelForCausalLM.from_pretrained(MODEL_PATHS[base_name], **model_kwargs)
-        self.base.eval()
-      
-        print("done loading base model")
-        
-        if proxy or unite:
-            print("loading exp tok", flush=True)
-            self.tok_exp = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-7b-base"], use_fast=True)     
-            self.tok_exp = add_pad_token(self.tok_exp)
-            print("done loading exp tok")
-            print("loading exp model")
-            self.expert = AutoModelForCausalLM.from_pretrained(MODEL_PATHS[expert_name], **model_kwargs)
-            self.expert.eval()
-            print("done loading exp model")
-            
-            if proxy:
-                print("loading anti tok", flush=True)
-                self.tok_anti = AutoTokenizer.from_pretrained(MODEL_PATHS["llama-7b-base"], use_fast=True) 
-                self.tok_anti = add_pad_token(self.tok_anti)
-                print("done loading anti tok", flush=True)
-    
-                print("loading anti model")
-                self.antiexpert = AutoModelForCausalLM.from_pretrained(MODEL_PATHS[antiexpert_name], **model_kwargs)
-                self.antiexpert.eval()
-                print("done loading anti model")
-        
-        self.alpha = alpha
-        self.device = self.base.device
+from any_model import load_any_model
 
-    
-    def _encode_for_gen(self, tok, prompt: str, device=None, no_chat=False):
-        text = prompt
-        if not no_chat and getattr(tok, "chat_template", None):
-            messages = [{"role": "user", "content": prompt}]
-            text = tok.apply_chat_template(
-                messages,
-                tokenize=False,
-                add_generation_prompt=True,
-            )
-        enc = tok(text, return_tensors="pt", add_special_tokens=True)
-        input_ids = enc["input_ids"]
-        attention_mask = enc.get("attention_mask", (input_ids != tok.pad_token_id).long())
-        if device is not None:
-            input_ids = input_ids.to(device)
-            attention_mask = attention_mask.to(device)
-        return input_ids, attention_mask, text
-
-    
-    def _update_model_kwargs_for_generation(
-        self,
-        outputs: ModelOutput,
-        kwargs: Dict[str, Any],
-    ) -> Dict[str, Any]:
-        # update past_key_values
-        kwargs["past_key_values"] = outputs.past_key_values
-
-        # update attention mask
-        if "attention_mask" in kwargs:
-            attention_mask = kwargs["attention_mask"]
-            kwargs["attention_mask"] = torch.cat(
-                [attention_mask, attention_mask.new_ones((attention_mask.shape[0], 1))], dim=-1
-            )
-        if getattr(outputs, "cache_position", None) is not None:
-        # some models already return it
-            kwargs["cache_position"] = outputs.cache_position
-        else:
-            if "cache_position" in kwargs:
-                kwargs["cache_position"] = kwargs["cache_position"] + 1
-            else:
-                # first step: position is sequence-length-1
-                seq_len = kwargs["attention_mask"].shape[1]
-                kwargs["cache_position"] = torch.arange(seq_len - 1, seq_len, device=kwargs["attention_mask"].device)
+LOCAL_RESULTS_DIR = "/share/pi/ema2016/users/sronaghi/proxy_tuning/results/medhelm"
 
-        return kwargs
-    
-    @torch.inference_mode() 
-    def generate(
-        self,
-        prompt,
-        max_new_tokens: Optional[int] = 700,
-        alpha: float = 1.0,
-        return_logits_for_analysis: bool = False,
-        score_type=None,
-        k=20,
-        unite: bool = False,
-        proxy: bool = False,
-        **kwargs
-    ):
-        logit_results = None
-        base_input_ids, base_attn, text = self._encode_for_gen(self.tok_base, prompt, device=self.base.device)
-        print("prompt with (potential) instruction tag: ", text)
-        base_kwargs = kwargs.copy()
-        base_kwargs["attention_mask"] = base_attn
-        base_kwargs["use_cache"] = True
-        original_prompt_len = base_input_ids.shape[1]
-        
-        # if not proxy or unite, do generation using huggingface's generate function -- 
-        if not proxy and not unite:
-            gen = self.base.generate(
-                input_ids=base_input_ids,
-                attention_mask=base_attn,
-                max_new_tokens=max_new_tokens,
-                do_sample=False,
-                eos_token_id=self.tok_base.eos_token_id,
-                pad_token_id=self.tok_base.pad_token_id,
-            )
-            gen_ids = gen[0, original_prompt_len:]
-            generation = self.tok_base.decode(gen_ids, skip_special_tokens=True)
-            return generation, logit_results
-        
-      
-        if proxy or unite:
-            expert_input_ids, expert_attn, expert_text = self._encode_for_gen(self.tok_exp, prompt, device=self.expert.device)
-            expert_kwargs = kwargs.copy()
-            expert_kwargs["attention_mask"] = expert_attn
-            expert_kwargs["use_cache"] = False
-            original_prompt_len_expert = expert_input_ids.shape[1]
-            expert_prompt_ids = expert_input_ids[0, :original_prompt_len_expert]
-            expert_prompt_decoded = self.tok_exp.decode(expert_prompt_ids, skip_special_tokens=True)
-            if proxy:
-                antiexpert_input_ids, anti_attn, anto = self._encode_for_gen(self.tok_anti, prompt, device=self.antiexpert.device)
-                antiexpert_kwargs = kwargs.copy()
-                antiexpert_kwargs["attention_mask"] = anti_attn
-                antiexpert_kwargs["use_cache"] = False
-                original_prompt_len_antiexpert = antiexpert_input_ids.shape[1]
-                antiexpert_prompt_ids = antiexpert_input_ids[0, :original_prompt_len_antiexpert]
-                antiexpert_prompt_decoded = self.tok_anti.decode(antiexpert_prompt_ids, skip_special_tokens=True)
-                
-        if proxy and score_type == "logits":
-            expert_kwargs["use_cache"] = True
-            antiexpert_kwargs["use_cache"] = True
-
-        # keep track of which sequences are already finished
-        unfinished_sequences = torch.ones(1, dtype=torch.long, device=base_input_ids.device)
-        eos_token_id_tensor = torch.tensor([self.tok_base.eos_token_id], device=base_input_ids.device)
-        
-        if return_logits_for_analysis:
-            T = max_new_tokens
-            tok_ids     = torch.empty(T, dtype=torch.int32,  device="cpu")
-            base_tok_ids = torch.empty(T, dtype=torch.int32,  device="cpu")
-            base_lps    = torch.empty(T, dtype=torch.float32, device="cpu")
-            dexpert_lps = torch.empty(T, dtype=torch.float32, device="cpu")
-            argdiffs    = torch.empty(T, dtype=torch.int8,    device="cpu")  # 0 or 1
-            t = 0
-
-
-        for step in range(max_new_tokens): 
-            if step == max_new_tokens - 1:
-                print("hit max tokens")
-            base_inputs = self.base.prepare_inputs_for_generation(base_input_ids, **base_kwargs)
-            base_outputs = self.base(**base_inputs, return_dict=True)
-            base_next_token_logits = base_outputs.logits[..., -1, :]
-                
-            next_token_id1 = next_token_id2 = next_token_id3 = None
-            
-            if not unite and not proxy:
-                next_tokens = torch.argmax(base_next_token_logits, dim=-1)  # indices of top tokens
-                next_token_id1 = next_tokens.tolist()              
-            
-            if proxy or unite:
-                expert_inputs = self.expert.prepare_inputs_for_generation(expert_input_ids, **expert_kwargs) 
-                expert_outputs = self.expert(**expert_inputs, return_dict=True)
-                expert_next_token_logits = expert_outputs.logits[..., -1, :]
-                
-                if unite and prefix_allowed_tokens_fn_exp:
-                    mask = torch.full_like(expert_next_token_logits, -math.inf) 
-                    sent = expert_input_ids[0]
-                    allowed = prefix_allowed_tokens_fn_exp(0, sent)
-                    if len(allowed) == 0:
-                        raise ValueError("prefix_allowed_tokens_fn returned an empty list.")
-                    mask[0, allowed] = 0
-                    expert_next_token_logits = expert_next_token_logits + mask
-                    
-                if unite:
-                    v_base = get_top_k_tokens(base_next_token_logits, self.tok_base, k=k)
-                    v_exp = get_top_k_tokens(expert_next_token_logits, self.tok_exp, k=k)
-                    vu = get_union_vocab(v_base, v_exp)
-                    v_base = update_vocab(v_base, vu, self.tok_base, base_next_token_logits,'qwen')
-                    v_base = vocab_softmax(v_base)
-                    v_exp = update_vocab(v_exp, vu, self.tok_exp, expert_next_token_logits,'llama')
-                    v_exp = vocab_softmax(v_exp)
-
-                    next_token_id1, next_token_id2 = unite_add(v_base,v_exp, 0.5, self.tok_base)
-
-                elif proxy:
-                    antiexpert_inputs = self.antiexpert.prepare_inputs_for_generation(antiexpert_input_ids, **antiexpert_kwargs) 
-                    antiexpert_outputs = self.antiexpert(**antiexpert_inputs, return_dict=True)
-                    antiexpert_next_token_logits = antiexpert_outputs.logits[..., -1, :] 
-
-                    if score_type == "logprobs": #capt
-                        base_lp = F.log_softmax(base_next_token_logits,  dim=-1)
-                        expert_lp = F.log_softmax(expert_next_token_logits, dim=-1)
-                        antiexpert_lp = F.log_softmax(antiexpert_next_token_logits, dim=-1)
-
-                        v_base = get_top_k_tokens(base_lp, self.tok_base, k=k)
-                        v_exp = get_top_k_tokens(expert_lp, self.tok_exp, k=0)
-                        v_exp = update_vocab(v_exp, v_base, self.tok_exp, expert_lp,'llama')
-                        v_anti = get_top_k_tokens(antiexpert_lp, self.tok_anti, k=0)
-                        v_anti = update_vocab(v_anti, v_base, self.tok_anti, antiexpert_lp, 'llama')
-
-                        next_token_id1, next_token_id2, next_token_id3, base_lp_chosen, dexpert_lp_chosen = capt_add(v_base, v_exp, v_anti, alpha)
-                        if return_logits_for_analysis:
-                            base_argmax_id = int(torch.argmax(base_lp[0]).item())
-                            capt_choice_id = int(next_token_id1[0])
-                            tok_ids[t]     = capt_choice_id
-                            base_tok_ids[t]  = base_argmax_id
-                            base_lps[t]    = float(base_lp_chosen)
-                            dexpert_lps[t] = float(dexpert_lp_chosen)  
-                            argdiffs[t]    = 1 if base_argmax_id != capt_choice_id else 0
-                            t += 1
-                           
-                    elif score_type == "logits":  # regular proxy tuning 
-                        expert_next_token_logits = expert_next_token_logits[:, :base_next_token_logits.shape[-1]]
-                        next_token_logits = (
-                            base_next_token_logits +
-                            self.alpha * (expert_next_token_logits - antiexpert_next_token_logits)
-                        )
-                        next_tokens = torch.argmax(next_token_logits, dim=-1)  # indices of top tokens
-                        next_token_id1 = next_tokens.tolist()
-                        next_token_id2 = list(next_token_id1)
-                        next_token_id3 = list(next_token_id1)
-            
-                        exp_step_ids = torch.as_tensor(next_token_id2,  device=expert_input_ids.device,   dtype=torch.long)
-                        expert_input_ids = torch.cat([expert_input_ids,     exp_step_ids[:,  None]], dim=-1)
-                        expert_kwargs = self._update_model_kwargs_for_generation(expert_outputs, expert_kwargs)
-                        anti_step_ids = torch.as_tensor(next_token_id3, device=antiexpert_input_ids.device, dtype=torch.long)
-                        antiexpert_input_ids = torch.cat([antiexpert_input_ids, anti_step_ids[:, None]], dim=-1)
-                        antiexpert_kwargs= self._update_model_kwargs_for_generation(antiexpert_outputs,antiexpert_kwargs)
-
-            step_ids = torch.as_tensor(next_token_id1, device=base_input_ids.device, dtype=torch.long)
-            base_input_ids = torch.cat([base_input_ids, step_ids[:, None]], dim=-1)
-            base_kwargs = self._update_model_kwargs_for_generation(base_outputs, base_kwargs)
-            
-            if (proxy and score_type == "logprobs") or unite:
-                base_gen_ids = base_input_ids[0, original_prompt_len:]
-                base_gen_decoded = self.tok_base.decode(base_gen_ids, skip_special_tokens=True)
-                expert_input_decoded = expert_prompt_decoded + base_gen_decoded
-                expert_input_ids, expert_kwargs["attention_mask"], expert_text = self._encode_for_gen(self.tok_exp, expert_input_decoded, device=self.expert.device, no_chat=True)
-                if proxy:              
-                    antiexpert_input_decoded = antiexpert_prompt_decoded + base_gen_decoded
-                    antiexpert_input_ids, antiexpert_kwargs["attention_mask"], antiexpert_text = self._encode_for_gen(self.tok_anti, antiexpert_input_decoded, device=self.antiexpert.device,  no_chat=True)
-                    
-                if step < 15:
-                    print(f"\n=== Step {step} ===")
-                    print(f"Base decoded: {self.tok_base.decode(base_input_ids[0], skip_special_tokens=False)}")
-                    if proxy or unite:
-                        print(f"Expert decoded: {self.tok_exp.decode(expert_input_ids[0], skip_special_tokens=False)}")
-                    if proxy:
-                        print(f"Anti-expert decoded: {self.tok_anti.decode(antiexpert_input_ids[0], skip_special_tokens=False)}")
-                    print(f"---")
-                
-
-
-            at_eos = (step_ids == eos_token_id_tensor[0]).long()
-            unfinished_sequences = unfinished_sequences * (1 - at_eos)
-            if unfinished_sequences.max() == 0:
-                break
-
-        gen_ids = base_input_ids[0, original_prompt_len:]
-        generation = self.tok_base.decode(gen_ids, skip_special_tokens=True)
-        
-        
-        if proxy and return_logits_for_analysis:
-            logit_results = {
-                "token_ids":   tok_ids[:t],
-                "base_tok_ids": base_tok_ids[:t], 
-                "base_lp":     base_lps[:t],
-                "dexpert_lp":  dexpert_lps[:t],
-                "argdiff":     argdiffs[:t],
-            }
-
-        
-        return generation, logit_results
 
 def ensure_dir(d):
     if not os.path.exists(d):
         os.makedirs(d, exist_ok=True)
 
+
 # proxy tuning helpers
 
+
 def _safe_tag(model_name: str) -> str:
     # e.g. "proxy_tuning/llama70b_mellama13bchat" -> "proxy_tuning_llama70b_mellama13bchat"
     return model_name.replace("/", "_").replace(" ", "").replace(".", "").replace("-", "")
 
+
 def setup_run_dirs(model_name: str, root=LOCAL_RESULTS_DIR):
     """
     Creates:
@@ -594,11 +116,16 @@ def setup_run_dirs(model_name: str, root=LOCAL_RESULTS_DIR):
     return run_dir, csv_path, logits_dir
 
 
-def append_request_row(csv_path: str, request_id: str, model_name: str, prompt: str, output: str, logits_path: str | None):
+def append_request_row(
+    csv_path: str, request_id: str, model_name: str, prompt: str, output: str, logits_path: str | None
+):
     ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+
     def esc(s: str) -> str:
-        if s is None: return ""
+        if s is None:
+            return ""
         return s.replace("\n", "\\n").replace(",", "&#44;")
+
     with open(csv_path, "a") as f:
         f.write(f"{ts},{request_id},{esc(model_name)},{esc(prompt)},{esc(output)},{esc(logits_path or '')}\n")
 
@@ -623,15 +150,14 @@ def __init__(
         tag = model_name.split("/")[-1]
         self.return_logits_for_analysis = True
 
-
         parts = tag.split("_")
-        base_name, expert_name, antiexpert_name, self.alpha, self.score_type, k_str  = (
+        base_name, expert_name, antiexpert_name, self.alpha, self.score_type, k_str = (
             parts[0],
             parts[1],
             parts[2],
             float(parts[3]),
             parts[4],
-            parts[5]
+            parts[5],
         )
         self.k = int(k_str)
         self.is_unite = False
@@ -641,69 +167,45 @@ def __init__(
                 self.is_unite = True
             else:
                 self.is_proxy = True
-        
-        
-        bnb_cfg = BitsAndBytesConfig(
-            load_in_4bit=True,
-            bnb_4bit_use_double_quant=True,
-            bnb_4bit_quant_type="nf4",         
-            bnb_4bit_compute_dtype=torch.bfloat16,  
-        )
-
-        model_kwargs = {
-            'device_map': "auto",
-            'dtype': torch.bfloat16,
-            'quantization_config': bnb_cfg,
-            'low_cpu_mem_usage': True,
-            'trust_remote_code': True,
-        }
 
-        print ("creating any model", flush=True)
-        self.any_model = AnyModel(
+        self.any_model = load_any_model(
             base_name=base_name,
             expert_name=expert_name,
             antiexpert_name=antiexpert_name,
             alpha=self.alpha,
             proxy=self.is_proxy,
             unite=self.is_unite,
-            model_kwargs=model_kwargs,
         )
 
-        print(f"[Loader] Base   : {base_name}", flush=True)
-        print(f"[Loader] Expert : {expert_name}", flush=True)
-        print(f"[Loader] Anti   : {antiexpert_name}", flush=True)
-
-    
     def make_request(self, request: Request) -> RequestResult:
-    
+
         prompt_text = request.prompt
-        max_new_tokens=750
-        
+        max_new_tokens = 750
+
         if request.max_tokens:
-            max_new_tokens=request.max_tokens
+            max_new_tokens = request.max_tokens
             print("max_new_tokens: ", max_new_tokens)
 
         if request.messages:
             print(request.messages)
             prompt_text = " ".join(msg["content"] for msg in request.messages if msg.get("role") != "system")
-           
+
         # progress = tqdm.tqdm(total=1, desc="Generating Completions")
         print("doing a generation", flush=True)
-        
+
         generation, logit_results = self.any_model.generate(
-            prompt = prompt_text,
-            max_new_tokens = max_new_tokens,
-            alpha = self.alpha, 
-            return_logits_for_analysis = self.return_logits_for_analysis,
-            score_type = self.score_type,
-            k = self.k,
-            unite = self.is_unite,
-            proxy = self.is_proxy,
+            prompt=prompt_text,
+            max_new_tokens=max_new_tokens,
+            alpha=self.alpha,
+            return_logits_for_analysis=self.return_logits_for_analysis,
+            score_type=self.score_type,
+            k=self.k,
+            unite=self.is_unite,
+            proxy=self.is_proxy,
         )
-        
 
         print("generation: ", generation, flush=True)
-        
+
         self.req_seq += 1
         request_id = f"{self.run_id}_r{self.req_seq:04d}"
 
@@ -721,7 +223,7 @@ def make_request(self, request: Request) -> RequestResult:
             output=generation,
             logits_path=logits_path,
         )
-        
+
         # Return a HELM-compatible RequestResult
         output = GeneratedOutput(text=generation, logprob=0.0, tokens=[])
         return RequestResult(success=True, cached=False, completions=[output], embedding=[])

From 8a2bb7e5076f0cee414c785efe861c7005d6ce7d Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Wed, 29 Oct 2025 13:50:26 -0700
Subject: [PATCH 38/42] Update pyproject.toml

This addition allows for proxy tuning class to run for MedHelm scenarios. After creating conda environment, only need to run pip install -U "crfm-helm[proxy_tuning]"
---
 pyproject.toml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index ac742a44510..3ffb9858c56 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -352,6 +352,20 @@ medhelm = [
     "transformers~=4.45,<4.50",
 ]
 
+proxy_tuning = [
+    "transformers~=4.51.0,<4.53.0", #for qwen MoE models
+    "bitsandbytes>=0.43",
+    "accelerate~=0.25",
+    
+    "crfm-helm[openai]",
+    "crfm-helm[yandex]",
+    "crfm-helm[scenarios]",
+    "lxml~=5.3",
+    "openpyxl~=3.1",
+    "python-docx~=1.1",
+    "lm-format-enforcer~=0.11.3"
+] 
+
 audiolm = [
     "crfm-helm[openai]",
     "crfm-helm[google]",

From 2e60d94ac547f66f464ad0bf41ad7afbbb350f04 Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Fri, 31 Oct 2025 10:17:30 -0700
Subject: [PATCH 39/42] Update proxy_tuning_client.py

---
 src/helm/clients/proxy_tuning_client.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/helm/clients/proxy_tuning_client.py b/src/helm/clients/proxy_tuning_client.py
index f3e9a6e5ef1..c3f19cabeb5 100644
--- a/src/helm/clients/proxy_tuning_client.py
+++ b/src/helm/clients/proxy_tuning_client.py
@@ -66,6 +66,7 @@
 import sys
 import torch
 from datetime import datetime
+from typing import Optional 
 
 # Make the top-level proxy_tuning dir importable
 sys.path.insert(0, "/share/pi/ema2016/users/sronaghi/proxy_tuning")

From 19d24b09555ab0c97da25aa84cbd35a804e5440e Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Fri, 31 Oct 2025 11:03:33 -0700
Subject: [PATCH 40/42] Update proxy_tuning_client.py

---
 src/helm/clients/proxy_tuning_client.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/helm/clients/proxy_tuning_client.py b/src/helm/clients/proxy_tuning_client.py
index c3f19cabeb5..8f20075a7a3 100644
--- a/src/helm/clients/proxy_tuning_client.py
+++ b/src/helm/clients/proxy_tuning_client.py
@@ -65,8 +65,8 @@
 import os
 import sys
 import torch
+from typing import Optional
 from datetime import datetime
-from typing import Optional 
 
 # Make the top-level proxy_tuning dir importable
 sys.path.insert(0, "/share/pi/ema2016/users/sronaghi/proxy_tuning")
@@ -82,8 +82,6 @@ def ensure_dir(d):
 
 
 # proxy tuning helpers
-
-
 def _safe_tag(model_name: str) -> str:
     # e.g. "proxy_tuning/llama70b_mellama13bchat" -> "proxy_tuning_llama70b_mellama13bchat"
     return model_name.replace("/", "_").replace(" ", "").replace(".", "").replace("-", "")

From 4577725a60db52404bcb528f4a394c97c41a0d22 Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Fri, 31 Oct 2025 11:48:43 -0700
Subject: [PATCH 41/42] Update proxy_tuning_client.py

---
 src/helm/clients/proxy_tuning_client.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/helm/clients/proxy_tuning_client.py b/src/helm/clients/proxy_tuning_client.py
index 8f20075a7a3..5d2d5f074fc 100644
--- a/src/helm/clients/proxy_tuning_client.py
+++ b/src/helm/clients/proxy_tuning_client.py
@@ -68,7 +68,6 @@
 from typing import Optional
 from datetime import datetime
 
-# Make the top-level proxy_tuning dir importable
 sys.path.insert(0, "/share/pi/ema2016/users/sronaghi/proxy_tuning")
 
 from any_model import load_any_model
@@ -136,7 +135,7 @@ class ProxyTuningClient(Client):
 
     def __init__(
         self,
-        model_name: str = None,
+        model_name: str,
     ):
         """
         Initializes the ProxyTuningClient.

From fa38ee26a7aba57e2e894a1c9c47672bc83c3fb8 Mon Sep 17 00:00:00 2001
From: Sasha Ronaghi <106111965+sronaghi@users.noreply.github.com>
Date: Fri, 31 Oct 2025 13:20:07 -0700
Subject: [PATCH 42/42] Update proxy_tuning_client.py

---
 src/helm/clients/proxy_tuning_client.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/helm/clients/proxy_tuning_client.py b/src/helm/clients/proxy_tuning_client.py
index 5d2d5f074fc..9b30456565c 100644
--- a/src/helm/clients/proxy_tuning_client.py
+++ b/src/helm/clients/proxy_tuning_client.py
@@ -3,9 +3,9 @@
 =======================
 
 This module implements a HELM Client that routes generation through
-decoding-time strategies for domain-level adaptation. 
+decoding-time strategies for domain-level adaptation.
 It runs multiple models (base, expert, anti-expert).
-This is experimental code to test different decoding-time strategies. 
+This is experimental code to test different decoding-time strategies.
 
 Main classes
 ------------
@@ -16,7 +16,7 @@
   3) **Proxy**:
      - Original method adapted from [this codebase](https://github.com/alisawuffles/proxy-tuning/tree/main):
          - base + alpha(expert − anti-expert) at the logit level with models of same vocabulary.
-     - Cross-Architecture Proxy Tuning (our novel method) 
+     - Cross-Architecture Proxy Tuning (our novel method)
          - same formula as above using log-probs with models of differing vocabulary
 
 - ProxyTuningClient: A HELM client that parses the deployment tag to
@@ -57,7 +57,7 @@
 - `logits_analysis/`        : Optional per-request tensors (when
   `return_logits_for_analysis=True`) saved via `torch.save(...)` as:
     logits_<runid>_r####.pt
-    
+
 """
 
 from helm.clients.client import Client
@@ -65,12 +65,11 @@
 import os
 import sys
 import torch
-from typing import Optional
 from datetime import datetime
 
 sys.path.insert(0, "/share/pi/ema2016/users/sronaghi/proxy_tuning")
 
-from any_model import load_any_model
+from any_model import load_any_model # noqa: E402
 
 LOCAL_RESULTS_DIR = "/share/pi/ema2016/users/sronaghi/proxy_tuning/results/medhelm"