diff --git a/.gitignore b/.gitignore
index aaae2d2db..a9b67e00e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,7 +16,7 @@ fastspeech_output
 .bash_history.local
 
 # Byte-compiled / optimized / DLL files
-__pycache__/
+**/__pycache__/
 *.py[cod]
 *$py.class
 **.pyc
diff --git a/nemo_gym/config_types.py b/nemo_gym/config_types.py
index b340081c7..dd1b18831 100644
--- a/nemo_gym/config_types.py
+++ b/nemo_gym/config_types.py
@@ -312,6 +312,7 @@ class DatasetConfig(BaseModel):
             Literal["MIT"],
             Literal["Creative Commons Attribution 4.0 International"],
             Literal["Creative Commons Attribution-ShareAlike 4.0 International"],
+            Literal["NVIDIA Internal Use Only, Do Not Distribute"],
             Literal["TBD"],
             Literal["MIT"],
         ]
diff --git a/resources_servers/translation_bleu/README.md b/resources_servers/translation_bleu/README.md
new file mode 100644
index 000000000..6a7d6bffc
--- /dev/null
+++ b/resources_servers/translation_bleu/README.md
@@ -0,0 +1,11 @@
+# Description
+
+Data links: ?
+
+# Licensing information
+Code: Apache 2.0
+Data: NVIDIA Internal Use Only, Do Not Distribute
+
+Dependencies
+- nemo_gym: Apache 2.0
+- sacrebleu: Apache 2.0
diff --git a/resources_servers/translation_bleu/app.py b/resources_servers/translation_bleu/app.py
new file mode 100644
index 000000000..6cb85a1bb
--- /dev/null
+++ b/resources_servers/translation_bleu/app.py
@@ -0,0 +1,110 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Dict
+
+from fastapi import FastAPI
+from sacrebleu.metrics import BLEU
+
+from nemo_gym.base_resources_server import (
+    BaseResourcesServerConfig,
+    BaseVerifyRequest,
+    BaseVerifyResponse,
+    SimpleResourcesServer,
+)
+
+
+class TranslationBleuResourcesServerConfig(BaseResourcesServerConfig):
+    reasoning_split_word: str = "</think>"
+
+
+class TranslationBleuVerifyRequest(BaseVerifyRequest):
+    trg_txt: str
+    trg_lang: str
+
+
+class TranslationBleuVerifyResponse(BaseVerifyResponse):
+    trg_txt: str
+    trg_lang: str
+    extracted_answer: str
+
+
+class TranslationBleuResourcesServer(SimpleResourcesServer):
+    config: TranslationBleuResourcesServerConfig
+
+    TOKENIZER_MAP: Dict[str, str] = {
+        "zh": "zh",
+        "zh-cn": "zh",
+        "zh-tw": "zh",
+        "zho-CN": "zh",
+        "zho_simpl": "zh",
+        "ja": "ja-mecab",
+        "jpn": "ja-mecab",
+        "th": "flores200",
+        "ko": "ko-mecab",
+    }
+
+    def setup_webserver(self) -> FastAPI:
+        app = super().setup_webserver()
+
+        # Additional server routes go here! e.g.:
+        # app.post("/get_weather")(self.get_weather)
+
+        return app
+
+    async def verify(self, body: TranslationBleuVerifyRequest) -> TranslationBleuVerifyResponse:
+        assistant_responses = []
+        for output_item in body.response.output:
+            if output_item.type != "message":
+                continue
+
+            for content_item in output_item.content:
+                if content_item.type != "output_text":
+                    continue
+
+                assistant_responses.append(content_item.text)
+
+        combined_response = "".join(assistant_responses)
+
+        (reward, extracted_answer) = self._verify_answer(
+            ground_truth=body.trg_txt, target_lang=body.trg_lang, model_response=combined_response
+        )
+
+        return TranslationBleuVerifyResponse(**body.model_dump(), extracted_answer=extracted_answer, reward=reward)
+
+    def _verify_answer(self, ground_truth: str, target_lang: str, model_response: str) -> tuple[float, str]:
+        extracted_answer = self._extract_answer(model_response)
+
+        if target_lang in self.TOKENIZER_MAP:
+            tokenize = self.TOKENIZER_MAP[target_lang]
+        else:
+            tokenize = None
+        # Use effective_order for sentence-level BLEU
+        bleu = BLEU(trg_lang=target_lang, effective_order=True, tokenize=tokenize)
+
+        bleu_output = bleu.sentence_score(extracted_answer, [ground_truth])
+        # TODO Do we want to report any other BLEU outputs?
+        bleu_score = bleu_output.score
+        reward = bleu_score / 100.0
+
+        return reward, extracted_answer
+
+    def _extract_answer(self, model_response: str) -> str:
+        # Strip any thinking
+        no_think_response = model_response.split(self.config.reasoning_split_word)[-1]
+        no_think_response = no_think_response.strip()
+        return no_think_response
+
+
+if __name__ == "__main__":
+    TranslationBleuResourcesServer.run_webserver()
diff --git a/resources_servers/translation_bleu/configs/translation_bleu.yaml b/resources_servers/translation_bleu/configs/translation_bleu.yaml
new file mode 100644
index 000000000..81b004aac
--- /dev/null
+++ b/resources_servers/translation_bleu/configs/translation_bleu.yaml
@@ -0,0 +1,38 @@
+translation_bleu:
+  resources_servers:
+    translation_bleu:
+      entrypoint: app.py
+      domain: translation
+translation_bleu_simple_agent:
+  responses_api_agents:
+    simple_agent:
+      entrypoint: app.py
+      resources_server:
+        type: resources_servers
+        name: translation_bleu
+      model_server:
+        type: responses_api_models
+        name: policy_model
+      datasets:
+      - name: train
+        type: train
+        jsonl_fpath: resources_servers/translation_bleu/data/riva_mt_v3_nothinkInSys_train.jsonl
+        num_repeats: 1
+        gitlab_identifier:
+          dataset_name: riva_mt_v3_nothinkInSys_train
+          version: 0.0.3
+          artifact_fpath: riva_mt_v3_nothinkInSys_train.jsonl
+        license: NVIDIA Internal Use Only, Do Not Distribute
+      - name: validation
+        type: validation
+        jsonl_fpath: resources_servers/translation_bleu/data/riva_mt_v3_nothinkInSys_validation.jsonl
+        num_repeats: 1
+        gitlab_identifier:
+          dataset_name: riva_mt_v3_nothinkInSys_validation
+          version: 0.0.1
+          artifact_fpath: riva_mt_v3_nothinkInSys_validation.jsonl
+        license: NVIDIA Internal Use Only, Do Not Distribute
+      - name: example
+        type: example
+        jsonl_fpath: resources_servers/translation_bleu/data/example.jsonl
+        num_repeats: 1
diff --git a/resources_servers/translation_bleu/data/.gitignore b/resources_servers/translation_bleu/data/.gitignore
new file mode 100644
index 000000000..4424b6fde
--- /dev/null
+++ b/resources_servers/translation_bleu/data/.gitignore
@@ -0,0 +1,5 @@
+*train.jsonl
+*validation.jsonl
+*train_prepare.jsonl
+*validation_prepare.jsonl
+*example_prepare.jsonl
diff --git a/resources_servers/translation_bleu/data/example.jsonl b/resources_servers/translation_bleu/data/example.jsonl
new file mode 100644
index 000000000..2c921e654
--- /dev/null
+++ b/resources_servers/translation_bleu/data/example.jsonl
@@ -0,0 +1,5 @@
+{"responses_create_params": {"input": [{"role": "user", "content": "Convert the following text into English. Text:\n```\n¿Por qué malgastar el tiempo buscando las llaves?\n```"}]}, "src_txt": "¿Por qué malgastar el tiempo buscando las llaves?", "trg_txt": "Why waste your time looking for the keys?", "src_lang": "es-us", "trg_lang": "en"}
+{"responses_create_params": {"input": [{"role": "user", "content": "Translate this into English: Ihm wurden das Konservatorium und das Theater gewidmet, in dem jedes Jahr das „Rossini Opera Festival“ stattfindet, das Opernliebhaber aus aller Welt anlockt."}]}, "src_txt": "Ihm wurden das Konservatorium und das Theater gewidmet, in dem jedes Jahr das „Rossini Opera Festival“ stattfindet, das Opernliebhaber aus aller Welt anlockt.", "trg_txt": "Every year, the conservatory and theatre, both named after him, host the Rossini Opera Festival, drawing enthusiasts from around the world.", "src_lang": "de", "trg_lang": "en"}
+{"responses_create_params": {"input": [{"role": "user", "content": "Convert the following text into French. Text:\n```\nAll questions have been correctly answered by the treasurer Thomas Kräuchi.\n```"}]}, "src_txt": "All questions have been correctly answered by the treasurer Thomas Kräuchi.", "trg_txt": "Le trésorier Thomas Kräuchi a répondu correctement à toutes les questions.", "src_lang": "en", "trg_lang": "fr"}
+{"responses_create_params": {"input": [{"role": "user", "content": "Convert the following text into Japanese. Text:\n```\nThe next picture shows the atoms emitting photons. Of course, in reality photons are a lot smaller than those in the picture.\n```"}]}, "src_txt": "The next picture shows the atoms emitting photons. Of course, in reality photons are a lot smaller than those in the picture.", "trg_txt": "次の写真は、原子が光子を放出している様子です。もちろん、実際には写真よりもはるかに微小です。", "src_lang": "en", "trg_lang": "ja"}
+{"responses_create_params": {"input": [{"role": "user", "content": "Translate the following text from Simplified Chinese to English: 比赛是在草地上进行的，洞周围的草被修剪得更短，被称为果岭。"}]}, "src_txt": "比赛是在草地上进行的，洞周围的草被修剪得更短，被称为果岭。", "trg_txt": "The game is played on grass, and the grass around the hole is mown shorter and called the green.", "src_lang": "zh-cn", "trg_lang": "en"}
\ No newline at end of file
diff --git a/resources_servers/translation_bleu/data/example_nothink.jsonl b/resources_servers/translation_bleu/data/example_nothink.jsonl
new file mode 100644
index 000000000..42529ab9a
--- /dev/null
+++ b/resources_servers/translation_bleu/data/example_nothink.jsonl
@@ -0,0 +1,5 @@
+{"responses_create_params": {"input": [{"role": "system", "content":"/no_think"}, {"role": "user", "content": "Convert the following text into English. Text:\n```\n¿Por qué malgastar el tiempo buscando las llaves?\n```"}]}, "src_txt": "¿Por qué malgastar el tiempo buscando las llaves?", "trg_txt": "Why waste your time looking for the keys?", "src_lang": "es-us", "trg_lang": "en"}
+{"responses_create_params": {"input": [{"role": "system", "content":"/no_think"}, {"role": "user", "content": "Translate this into English: Ihm wurden das Konservatorium und das Theater gewidmet, in dem jedes Jahr das „Rossini Opera Festival“ stattfindet, das Opernliebhaber aus aller Welt anlockt."}]}, "src_txt": "Ihm wurden das Konservatorium und das Theater gewidmet, in dem jedes Jahr das „Rossini Opera Festival“ stattfindet, das Opernliebhaber aus aller Welt anlockt.", "trg_txt": "Every year, the conservatory and theatre, both named after him, host the Rossini Opera Festival, drawing enthusiasts from around the world.", "src_lang": "de", "trg_lang": "en"}
+{"responses_create_params": {"input": [{"role": "system", "content":"/no_think"}, {"role": "user", "content": "Convert the following text into French. Text:\n```\nAll questions have been correctly answered by the treasurer Thomas Kräuchi.\n```"}]}, "src_txt": "All questions have been correctly answered by the treasurer Thomas Kräuchi.", "trg_txt": "Le trésorier Thomas Kräuchi a répondu correctement à toutes les questions.", "src_lang": "en", "trg_lang": "fr"}
+{"responses_create_params": {"input": [{"role": "system", "content": "/no_think"}, {"role": "user", "content": "Convert the following text into Japanese. Text:\n```\nThe next picture shows the atoms emitting photons. Of course, in reality photons are a lot smaller than those in the picture.\n```"}]}, "src_txt": "The next picture shows the atoms emitting photons. Of course, in reality photons are a lot smaller than those in the picture.", "trg_txt": "次の写真は、原子が光子を放出している様子です。もちろん、実際には写真よりもはるかに微小です。", "src_lang": "en", "trg_lang": "ja"}
+{"responses_create_params": {"input": [{"role": "system", "content": "/no_think"}, {"role": "user", "content": "Translate the following text from Simplified Chinese to English: 比赛是在草地上进行的，洞周围的草被修剪得更短，被称为果岭。"}]}, "src_txt": "比赛是在草地上进行的，洞周围的草被修剪得更短，被称为果岭。", "trg_txt": "The game is played on grass, and the grass around the hole is mown shorter and called the green.", "src_lang": "zh-cn", "trg_lang": "en"}
\ No newline at end of file
diff --git a/resources_servers/translation_bleu/data/example_nothink_rollouts.jsonl b/resources_servers/translation_bleu/data/example_nothink_rollouts.jsonl
new file mode 100644
index 000000000..7c7c2c018
--- /dev/null
+++ b/resources_servers/translation_bleu/data/example_nothink_rollouts.jsonl
@@ -0,0 +1,5 @@
+{"responses_create_params": {"background": null, "include": null, "input": [{"content": "/no_think", "role": "system", "type": "message"}, {"content": "Translate this into English: Ihm wurden das Konservatorium und das Theater gewidmet, in dem jedes Jahr das „Rossini Opera Festival“ stattfindet, das Opernliebhaber aus aller Welt anlockt.", "role": "user", "type": "message"}], "instructions": null, "max_output_tokens": 4096, "max_tool_calls": null, "metadata": null, "model": null, "parallel_tool_calls": true, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "store": null, "temperature": null, "text": null, "tool_choice": "auto", "tools": [], "top_logprobs": null, "top_p": null, "truncation": null, "user": null, "stream": null}, "response": {"id": "resp_de822fae9a3048609f9d29124711ef7e", "created_at": 1761320055.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": null, "model": "nvidia/nvidia-nemotron-nano-9b-v2", "object": "response", "output": [{"id": "msg_ca1ab146369449f6af314513647808ef", "content": [{"annotations": [], "text": "The Conservatory and the theater were dedicated to him, in which every year the \"Rossini Opera Festival\" is held, attracting opera lovers from around the world.\n", "type": "output_text", "logprobs": null}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": true, "temperature": null, "tool_choice": "auto", "tools": [], "top_p": null, "background": null, "max_output_tokens": 4096, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "status": null, "text": null, "top_logprobs": null, "truncation": null, "usage": null, "user": null}, "reward": 0.1826249361348376, "trg_txt": "Every year, the conservatory and theatre, both named after him, host the Rossini Opera Festival, drawing enthusiasts from around the world.", "trg_lang": "en", "extracted_answer": "The Conservatory and the theater were dedicated to him, in which every year the \"Rossini Opera Festival\" is held, attracting opera lovers from around the world."}
+{"responses_create_params": {"background": null, "include": null, "input": [{"content": "/no_think", "role": "system", "type": "message"}, {"content": "Convert the following text into Japanese. Text:\n```\nThe next picture shows the atoms emitting photons. Of course, in reality photons are a lot smaller than those in the picture.\n```", "role": "user", "type": "message"}], "instructions": null, "max_output_tokens": 4096, "max_tool_calls": null, "metadata": null, "model": null, "parallel_tool_calls": true, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "store": null, "temperature": null, "text": null, "tool_choice": "auto", "tools": [], "top_logprobs": null, "top_p": null, "truncation": null, "user": null, "stream": null}, "response": {"id": "resp_1c8244efef744b38a8adb1e3265e6ca9", "created_at": 1761320055.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": null, "model": "nvidia/nvidia-nemotron-nano-9b-v2", "object": "response", "output": [{"id": "msg_ff0e183cd1f6448a8840104b9beb235c", "content": [{"annotations": [], "text": "次の図は、原子が光子を放出している様子を示しています。もちろん、実際には光子はその図の中のものよりもずっと小さいです。\n", "type": "output_text", "logprobs": null}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": true, "temperature": null, "tool_choice": "auto", "tools": [], "top_p": null, "background": null, "max_output_tokens": 4096, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "status": null, "text": null, "top_logprobs": null, "truncation": null, "usage": null, "user": null}, "reward": 0.41614625199196853, "trg_txt": "次の写真は、原子が光子を放出している様子です。もちろん、実際には写真よりもはるかに微小です。", "trg_lang": "ja", "extracted_answer": "次の図は、原子が光子を放出している様子を示しています。もちろん、実際には光子はその図の中のものよりもずっと小さいです。"}
+{"responses_create_params": {"background": null, "include": null, "input": [{"content": "/no_think", "role": "system", "type": "message"}, {"content": "Convert the following text into English. Text:\n```\n¿Por qué malgastar el tiempo buscando las llaves?\n```", "role": "user", "type": "message"}], "instructions": null, "max_output_tokens": 4096, "max_tool_calls": null, "metadata": null, "model": null, "parallel_tool_calls": true, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "store": null, "temperature": null, "text": null, "tool_choice": "auto", "tools": [], "top_logprobs": null, "top_p": null, "truncation": null, "user": null, "stream": null}, "response": {"id": "resp_ac13960214d348b1b341ab0c8db10cfc", "created_at": 1761320055.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": null, "model": "nvidia/nvidia-nemotron-nano-9b-v2", "object": "response", "output": [{"id": "msg_94ba9b4835954fceaa534381996ace6c", "content": [{"annotations": [], "text": "Why waste time looking for the keys?\n", "type": "output_text", "logprobs": null}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": true, "temperature": null, "tool_choice": "auto", "tools": [], "top_p": null, "background": null, "max_output_tokens": 4096, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "status": null, "text": null, "top_logprobs": null, "truncation": null, "usage": null, "user": null}, "reward": 0.6752918218126552, "trg_txt": "Why waste your time looking for the keys?", "trg_lang": "en", "extracted_answer": "Why waste time looking for the keys?"}
+{"responses_create_params": {"background": null, "include": null, "input": [{"content": "/no_think", "role": "system", "type": "message"}, {"content": "Convert the following text into French. Text:\n```\nAll questions have been correctly answered by the treasurer Thomas Kräuchi.\n```", "role": "user", "type": "message"}], "instructions": null, "max_output_tokens": 4096, "max_tool_calls": null, "metadata": null, "model": null, "parallel_tool_calls": true, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "store": null, "temperature": null, "text": null, "tool_choice": "auto", "tools": [], "top_logprobs": null, "top_p": null, "truncation": null, "user": null, "stream": null}, "response": {"id": "resp_a8cda57b843444168553e60a61b6d0e7", "created_at": 1761320055.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": null, "model": "nvidia/nvidia-nemotron-nano-9b-v2", "object": "response", "output": [{"id": "msg_4da079740c9440169da96073a99113bc", "content": [{"annotations": [], "text": "Toutes les questions ont été répondues correctement par le trésorier Thomas Kräuchi.\n", "type": "output_text", "logprobs": null}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": true, "temperature": null, "tool_choice": "auto", "tools": [], "top_p": null, "background": null, "max_output_tokens": 4096, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "status": null, "text": null, "top_logprobs": null, "truncation": null, "usage": null, "user": null}, "reward": 0.15727800941615358, "trg_txt": "Le trésorier Thomas Kräuchi a répondu correctement à toutes les questions.", "trg_lang": "fr", "extracted_answer": "Toutes les questions ont été répondues correctement par le trésorier Thomas Kräuchi."}
+{"responses_create_params": {"background": null, "include": null, "input": [{"content": "/no_think", "role": "system", "type": "message"}, {"content": "Translate the following text from Simplified Chinese to English: 比赛是在草地上进行的，洞周围的草被修剪得更短，被称为果岭。", "role": "user", "type": "message"}], "instructions": null, "max_output_tokens": 4096, "max_tool_calls": null, "metadata": null, "model": null, "parallel_tool_calls": true, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "store": null, "temperature": null, "text": null, "tool_choice": "auto", "tools": [], "top_logprobs": null, "top_p": null, "truncation": null, "user": null, "stream": null}, "response": {"id": "resp_027e2b003199426d88afbf0be7b18ab0", "created_at": 1761320055.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": null, "model": "nvidia/nvidia-nemotron-nano-9b-v2", "object": "response", "output": [{"id": "msg_8f45cceaf9c741fb8dda838c68338f3b", "content": [{"annotations": [], "text": "The competition takes place on the grass, and the grass around the holes is cut shorter, referred to as the green.\n", "type": "output_text", "logprobs": null}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": true, "temperature": null, "tool_choice": "auto", "tools": [], "top_p": null, "background": null, "max_output_tokens": 4096, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "status": null, "text": null, "top_logprobs": null, "truncation": null, "usage": null, "user": null}, "reward": 0.3204144198812986, "trg_txt": "The game is played on grass, and the grass around the hole is mown shorter and called the green.", "trg_lang": "en", "extracted_answer": "The competition takes place on the grass, and the grass around the holes is cut shorter, referred to as the green."}
diff --git a/resources_servers/translation_bleu/data/riva_mt_v3_nothinkInSys_train_metrics.json b/resources_servers/translation_bleu/data/riva_mt_v3_nothinkInSys_train_metrics.json
new file mode 100644
index 000000000..369e43852
--- /dev/null
+++ b/resources_servers/translation_bleu/data/riva_mt_v3_nothinkInSys_train_metrics.json
@@ -0,0 +1,61 @@
+{
+    "name": "train",
+    "type": "train",
+    "jsonl_fpath": "resources_servers/translation_bleu/data/riva_mt_v3_nothinkInSys_train.jsonl",
+    "num_repeats": 1,
+    "gitlab_identifier": {
+        "dataset_name": "riva_mt_v3_nothinkInSys_train",
+        "version": "0.0.3",
+        "artifact_fpath": "riva_mt_v3_nothinkInSys_train.jsonl"
+    },
+    "license": "NVIDIA Internal Use Only, Do Not Distribute",
+    "Number of examples": 770000,
+    "Number of tools": {
+        "Total # non-null values": 0,
+        "Average": 0.0,
+        "Min": 0.0,
+        "Max": 0.0,
+        "Median": 0.0,
+        "Standard deviation": 0.0
+    },
+    "Json-dumped number of words (proxy for token count)": {
+        "Total # non-null values": 770000,
+        "Average": 26.18,
+        "Min": 10.0,
+        "Max": 267.0,
+        "Median": 24.44,
+        "Standard deviation": 9.86
+    },
+    "Number of turns": {
+        "Total # non-null values": 770000,
+        "Average": 1.0,
+        "Min": 1.0,
+        "Max": 1.0,
+        "Median": 1.0,
+        "Standard deviation": 0.0
+    },
+    "Temperature": {
+        "Total # non-null values": 0,
+        "Average": 0.0,
+        "Min": 0.0,
+        "Max": 0.0,
+        "Median": 0.0,
+        "Standard deviation": 0.0
+    },
+    "src_txt": {
+        "unique_count": 767619,
+        "total_count": 770000
+    },
+    "trg_txt": {
+        "unique_count": 767567,
+        "total_count": 770000
+    },
+    "src_lang": {
+        "unique_count": 12,
+        "total_count": 770000
+    },
+    "trg_lang": {
+        "unique_count": 12,
+        "total_count": 770000
+    }
+}
\ No newline at end of file
diff --git a/resources_servers/translation_bleu/data/riva_mt_v3_nothinkInSys_validation_metrics.json b/resources_servers/translation_bleu/data/riva_mt_v3_nothinkInSys_validation_metrics.json
new file mode 100644
index 000000000..1d6645b38
--- /dev/null
+++ b/resources_servers/translation_bleu/data/riva_mt_v3_nothinkInSys_validation_metrics.json
@@ -0,0 +1,61 @@
+{
+    "name": "validation",
+    "type": "validation",
+    "jsonl_fpath": "resources_servers/translation_bleu/data/riva_mt_v3_nothinkInSys_validation.jsonl",
+    "num_repeats": 1,
+    "gitlab_identifier": {
+        "dataset_name": "riva_mt_v3_nothinkInSys_validation",
+        "version": "0.0.1",
+        "artifact_fpath": "riva_mt_v3_nothinkInSys_validation.jsonl"
+    },
+    "license": "NVIDIA Internal Use Only, Do Not Distribute",
+    "Number of examples": 19940,
+    "Number of tools": {
+        "Total # non-null values": 0,
+        "Average": 0.0,
+        "Min": 0.0,
+        "Max": 0.0,
+        "Median": 0.0,
+        "Standard deviation": 0.0
+    },
+    "Json-dumped number of words (proxy for token count)": {
+        "Total # non-null values": 19940,
+        "Average": 32.68,
+        "Min": 10.0,
+        "Max": 82.0,
+        "Median": 33.05,
+        "Standard deviation": 10.38
+    },
+    "Number of turns": {
+        "Total # non-null values": 19940,
+        "Average": 1.0,
+        "Min": 1.0,
+        "Max": 1.0,
+        "Median": 1.0,
+        "Standard deviation": 0.0
+    },
+    "Temperature": {
+        "Total # non-null values": 0,
+        "Average": 0.0,
+        "Min": 0.0,
+        "Max": 0.0,
+        "Median": 0.0,
+        "Standard deviation": 0.0
+    },
+    "src_txt": {
+        "unique_count": 10965,
+        "total_count": 19940
+    },
+    "trg_txt": {
+        "unique_count": 10965,
+        "total_count": 19940
+    },
+    "src_lang": {
+        "unique_count": 11,
+        "total_count": 19940
+    },
+    "trg_lang": {
+        "unique_count": 11,
+        "total_count": 19940
+    }
+}
\ No newline at end of file
diff --git a/resources_servers/translation_bleu/requirements.txt b/resources_servers/translation_bleu/requirements.txt
new file mode 100644
index 000000000..f3ee63442
--- /dev/null
+++ b/resources_servers/translation_bleu/requirements.txt
@@ -0,0 +1,3 @@
+-e nemo-gym[dev] @ ../../
+mecab-ko==1.0.2
+sacrebleu[ja,ko]>=2.0.0
\ No newline at end of file
diff --git a/resources_servers/translation_bleu/tests/test_app.py b/resources_servers/translation_bleu/tests/test_app.py
new file mode 100644
index 000000000..06dff1afc
--- /dev/null
+++ b/resources_servers/translation_bleu/tests/test_app.py
@@ -0,0 +1,143 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from copy import deepcopy
+from typing import Any
+from unittest.mock import MagicMock
+
+from pytest import approx, fixture
+
+from nemo_gym.openai_utils import (
+    NeMoGymResponse,
+    NeMoGymResponseCreateParamsNonStreaming,
+    NeMoGymResponseOutputMessage,
+    NeMoGymResponseOutputText,
+)
+from nemo_gym.server_utils import ServerClient
+from resources_servers.translation_bleu.app import (
+    TranslationBleuResourcesServer,
+    TranslationBleuResourcesServerConfig,
+    TranslationBleuVerifyRequest,
+)
+
+
+class TestApp:
+    @fixture
+    def config(self) -> TranslationBleuResourcesServerConfig:
+        return TranslationBleuResourcesServerConfig(
+            host="0.0.0.0",
+            port=8080,
+            entrypoint="",
+            name="",
+        )
+
+    def _create_response(self, id: str, model_response_text: str) -> dict[str, Any]:
+        return NeMoGymResponse(
+            id=id,
+            created_at=1234.5,
+            model="response_model",
+            object="response",
+            parallel_tool_calls=False,
+            tool_choice="none",
+            tools=[],
+            output=[
+                NeMoGymResponseOutputMessage(
+                    id=f"ID for {model_response_text}",
+                    role="assistant",
+                    status="in_progress",
+                    type="message",
+                    content=[NeMoGymResponseOutputText(annotations=[], text=model_response_text, type="output_text")],
+                )
+            ],
+        ).model_dump()
+
+    async def test_verify_identical(self, config: TranslationBleuResourcesServerConfig) -> None:
+        server_mock = MagicMock(spec=ServerClient)
+        resources_server = TranslationBleuResourcesServer(config=config, server_client=server_mock)
+
+        source_text = "What is the name of your cat?"
+        ground_truth = "Was ist der Name deiner Katze?"
+        target_lang = "de"
+        target_lang_name = "German"
+        model_create_params = NeMoGymResponseCreateParamsNonStreaming(
+            input=[
+                {
+                    "role": "user",
+                    "content": f'Translate this into {target_lang_name}: "{source_text}"',
+                }
+            ]
+        )
+        model_response = NeMoGymResponse(**self._create_response("model_response_id", ground_truth))
+        identical_verify_request = TranslationBleuVerifyRequest(
+            responses_create_params=deepcopy(model_create_params),
+            response=model_response.model_copy(deep=True),
+            trg_txt=ground_truth,
+            trg_lang=target_lang,
+        )
+        identical_verify_response = await resources_server.verify(identical_verify_request)
+        assert identical_verify_response.responses_create_params == model_create_params
+        assert identical_verify_response.response == model_response
+        assert identical_verify_response.trg_txt == ground_truth
+        assert identical_verify_response.trg_lang == target_lang
+        assert identical_verify_response.reward == approx(1.0)
+        assert identical_verify_response.extracted_answer == ground_truth
+
+        assert sorted(list(identical_verify_response.model_dump())) == [
+            "extracted_answer",
+            "response",
+            "responses_create_params",
+            "reward",
+            "trg_lang",
+            "trg_txt",
+        ]
+
+    def test_verify_answer_identical(self, config: TranslationBleuResourcesServerConfig) -> None:
+        resources_server = TranslationBleuResourcesServer(config=config, server_client=MagicMock(spec=ServerClient))
+
+        # source_text = "What is the name of your cat?"
+        ground_truth = "Was ist der Name deiner Katze?"
+        target_lang = "de"
+        model_response_text = ground_truth
+
+        assert resources_server._verify_answer(ground_truth, target_lang, model_response_text) == (
+            approx(1.0),
+            ground_truth,
+        )
+
+    def test_verify_answer_think_tags(self, config: TranslationBleuResourcesServerConfig) -> None:
+        resources_server = TranslationBleuResourcesServer(config=config, server_client=MagicMock(spec=ServerClient))
+
+        # source_text = "What is the name of your cat?"
+        ground_truth = "Was ist der Name deiner Katze?"
+        target_lang = "de"
+        model_response_text = f"<think></think>\n\n{ground_truth}"
+
+        assert resources_server._verify_answer(ground_truth, target_lang, model_response_text) == (
+            approx(1.0),
+            ground_truth,
+        )
+
+    def test_verify_answer_no_match(self, config: TranslationBleuResourcesServerConfig) -> None:
+        resources_server = TranslationBleuResourcesServer(config=config, server_client=MagicMock(spec=ServerClient))
+
+        # source_text = "What is the name of your cat?"
+        ground_truth = "Was ist der Name deiner Katze?"
+        target_lang = "de"
+        model_response_text = "Incorrect translation."
+
+        assert resources_server._verify_answer(ground_truth, target_lang, model_response_text) == (
+            approx(0.0),
+            model_response_text,
+        )
diff --git a/resources_servers/translation_comet/README.md b/resources_servers/translation_comet/README.md
new file mode 100644
index 000000000..cf6920b56
--- /dev/null
+++ b/resources_servers/translation_comet/README.md
@@ -0,0 +1,11 @@
+# Description
+
+Data links: ?
+
+# Licensing information
+Code: Apache 2.0
+Data: NVIDIA Internal Use Only, Do Not Distribute
+
+Dependencies
+- nemo_gym: Apache 2.0
+- unbabel-comet: Apache 2.0
diff --git a/resources_servers/translation_comet/app.py b/resources_servers/translation_comet/app.py
new file mode 100644
index 000000000..83ae1fc12
--- /dev/null
+++ b/resources_servers/translation_comet/app.py
@@ -0,0 +1,129 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, List, Optional, Union
+
+from comet.models import download_model, load_from_checkpoint
+from fastapi import FastAPI
+
+from nemo_gym.base_resources_server import (
+    BaseResourcesServerConfig,
+    BaseVerifyRequest,
+    BaseVerifyResponse,
+    SimpleResourcesServer,
+)
+
+
+class TranslationCometResourcesServerConfig(BaseResourcesServerConfig):
+    use_reference: bool = True  # Must match model set in comet_model_name
+    comet_model_name: str = "Unbabel/wmt22-comet-da"
+    comet_gpu_count: int = None  # CPU only
+    comet_gpu_devices: Union[List[int], str, int] = "auto"
+    model_cache_dir: Optional[str] = None
+    reasoning_split_word: str = "</think>"
+
+
+class TranslationCometVerifyRequest(BaseVerifyRequest):
+    src_txt: str
+    trg_txt: Optional[str] = None
+
+
+class TranslationCometVerifyResponse(BaseVerifyResponse):
+    src_txt: str
+    trg_txt: Optional[str] = None
+    extracted_answer: str
+
+
+class TranslationCometResourcesServer(SimpleResourcesServer):
+    config: TranslationCometResourcesServerConfig
+    batch_size: int = 1  # We only process one item at a time so this is always 1
+
+    def model_post_init(self, context: Any) -> None:
+        super().model_post_init(context)
+
+        # # Manually load the model without the Comet wrapper class so we can control the GPU allocation
+        # # https://stackoverflow.com/questions/75879866/how-to-load-unbabel-comet-model-without-nested-wrapper-initialization
+
+        # model_path = snapshot_download(repo_id=self.config.comet_model_name)
+        # model_checkpoint_path = f'{model_path}/checkpoints/model.ckpt'
+        # if self.config.use_reference:
+        #     self._comet_model = RegressionMetric.load_from_checkpoint(model_checkpoint_path, layer_transformation='softmax')
+        # else:
+        #     self._comet_model = ReferencelessRegression.load_from_checkpoint(model_checkpoint_path, layer_transformation='softmax')
+
+        model_path = download_model(model=self.config.comet_model_name, saving_directory=self.config.model_cache_dir)
+        self._comet_model = load_from_checkpoint(model_path)
+
+        # TODO long-term we want to pull logic out of predict in base.py in COMET
+        # so that we keep one PTL Trainer for the whole server, and don't make a new one for every datapoint
+        # since each PTL trainer moves the model weights from CPU to GPU each time which will be slow
+
+    def setup_webserver(self) -> FastAPI:
+        app = super().setup_webserver()
+
+        # Additional server routes go here! e.g.:
+        # app.post("/get_weather")(self.get_weather)
+
+        return app
+
+    async def verify(self, body: TranslationCometVerifyRequest) -> TranslationCometVerifyResponse:
+        assistant_responses = []
+        for output_item in body.response.output:
+            if output_item.type != "message":
+                continue
+
+            for content_item in output_item.content:
+                if content_item.type != "output_text":
+                    continue
+
+                assistant_responses.append(content_item.text)
+
+        combined_response = "".join(assistant_responses)
+
+        (reward, extracted_answer) = self._verify_answer(
+            source_text=body.src_txt, target_text=body.trg_txt, model_response=combined_response
+        )
+
+        return TranslationCometVerifyResponse(**body.model_dump(), extracted_answer=extracted_answer, reward=reward)
+
+    def _verify_answer(self, source_text: str, target_text: str, model_response: str) -> tuple[float, str]:
+        extracted_answer = self._extract_answer(model_response)
+
+        if self.config.use_reference:
+            comet_data = [{"src": source_text, "mt": extracted_answer, "ref": target_text}]
+        else:
+            comet_data = [{"src": source_text, "mt": extracted_answer}]
+
+        # TODO this is inefficent and sets up a new PTL Trainer each time
+        # It's designed to be run on a whole dataset at once
+        # This means the weights get moved from CPU to GPU (if applicable) each time this is called
+        model_output = self._comet_model.predict(
+            comet_data,
+            batch_size=self.batch_size,
+            gpus=self.config.comet_gpu_count,
+            devices=self.config.comet_gpu_devices,
+            progress_bar=False,
+        )
+        reward = model_output.system_score
+
+        return reward, extracted_answer
+
+    def _extract_answer(self, model_response: str) -> str:
+        # Strip any thinking
+        no_think_response = model_response.split(self.config.reasoning_split_word)[-1]
+        no_think_response = no_think_response.strip()
+        return no_think_response
+
+
+if __name__ == "__main__":
+    TranslationCometResourcesServer.run_webserver()
diff --git a/resources_servers/translation_comet/configs/translation_comet.yaml b/resources_servers/translation_comet/configs/translation_comet.yaml
new file mode 100644
index 000000000..e0f5c5e75
--- /dev/null
+++ b/resources_servers/translation_comet/configs/translation_comet.yaml
@@ -0,0 +1,40 @@
+translation_comet:
+  resources_servers:
+    translation_comet:
+      entrypoint: app.py
+      domain: translation
+      use_reference: true
+      comet_model_name: Unbabel/wmt22-comet-da
+translation_comet_simple_agent:
+  responses_api_agents:
+    simple_agent:
+      entrypoint: app.py
+      resources_server:
+        type: resources_servers
+        name: translation_comet
+      model_server:
+        type: responses_api_models
+        name: policy_model
+      datasets:
+      - name: train
+        type: train
+        jsonl_fpath: resources_servers/translation_comet/data/riva_mt_v3_nothinkInSys_train.jsonl
+        num_repeats: 1
+        gitlab_identifier:
+          dataset_name: riva_mt_v3_nothinkInSys_train
+          version: 0.0.1
+          artifact_fpath: riva_mt_v3_nothinkInSys_train.jsonl
+        license: NVIDIA Internal Use Only, Do Not Distribute
+      - name: validation
+        type: validation
+        jsonl_fpath: resources_servers/translation_comet/data/riva_mt_v3_nothinkInSys_validation.jsonl
+        num_repeats: 1
+        gitlab_identifier:
+          dataset_name: riva_mt_v3_nothinkInSys_validation
+          version: 0.0.1
+          artifact_fpath: riva_mt_v3_nothinkInSys_validation.jsonl
+        license: NVIDIA Internal Use Only, Do Not Distribute
+      - name: example
+        type: example
+        jsonl_fpath: resources_servers/translation_comet/data/example.jsonl
+        num_repeats: 1
diff --git a/resources_servers/translation_comet/configs/translation_comet_referencefree.yaml b/resources_servers/translation_comet/configs/translation_comet_referencefree.yaml
new file mode 100644
index 000000000..944df2982
--- /dev/null
+++ b/resources_servers/translation_comet/configs/translation_comet_referencefree.yaml
@@ -0,0 +1,40 @@
+translation_comet:
+  resources_servers:
+    translation_comet:
+      entrypoint: app.py
+      domain: translation
+      use_reference: false
+      comet_model_name: Unbabel/wmt22-cometkiwi-da
+translation_comet_simple_agent:
+  responses_api_agents:
+    simple_agent:
+      entrypoint: app.py
+      resources_server:
+        type: resources_servers
+        name: translation_comet
+      model_server:
+        type: responses_api_models
+        name: policy_model
+      datasets:
+      - name: train
+        type: train
+        jsonl_fpath: resources_servers/translation_comet/data/riva_mt_v3_nothinkInSys_train.jsonl
+        num_repeats: 1
+        gitlab_identifier:
+          dataset_name: riva_mt_v3_nothinkInSys_train
+          version: 0.0.1
+          artifact_fpath: riva_mt_v3_nothinkInSys_train.jsonl
+        license: NVIDIA Internal Use Only, Do Not Distribute
+      - name: validation
+        type: validation
+        jsonl_fpath: resources_servers/translation_comet/data/riva_mt_v3_nothinkInSys_validation.jsonl
+        num_repeats: 1
+        gitlab_identifier:
+          dataset_name: riva_mt_v3_nothinkInSys_validation
+          version: 0.0.1
+          artifact_fpath: riva_mt_v3_nothinkInSys_validation.jsonl
+        license: NVIDIA Internal Use Only, Do Not Distribute
+      - name: example
+        type: example
+        jsonl_fpath: resources_servers/translation_comet/data/example.jsonl
+        num_repeats: 1
diff --git a/resources_servers/translation_comet/data/.gitignore b/resources_servers/translation_comet/data/.gitignore
new file mode 100644
index 000000000..4424b6fde
--- /dev/null
+++ b/resources_servers/translation_comet/data/.gitignore
@@ -0,0 +1,5 @@
+*train.jsonl
+*validation.jsonl
+*train_prepare.jsonl
+*validation_prepare.jsonl
+*example_prepare.jsonl
diff --git a/resources_servers/translation_comet/data/example.jsonl b/resources_servers/translation_comet/data/example.jsonl
new file mode 100644
index 000000000..2c921e654
--- /dev/null
+++ b/resources_servers/translation_comet/data/example.jsonl
@@ -0,0 +1,5 @@
+{"responses_create_params": {"input": [{"role": "user", "content": "Convert the following text into English. Text:\n```\n¿Por qué malgastar el tiempo buscando las llaves?\n```"}]}, "src_txt": "¿Por qué malgastar el tiempo buscando las llaves?", "trg_txt": "Why waste your time looking for the keys?", "src_lang": "es-us", "trg_lang": "en"}
+{"responses_create_params": {"input": [{"role": "user", "content": "Translate this into English: Ihm wurden das Konservatorium und das Theater gewidmet, in dem jedes Jahr das „Rossini Opera Festival“ stattfindet, das Opernliebhaber aus aller Welt anlockt."}]}, "src_txt": "Ihm wurden das Konservatorium und das Theater gewidmet, in dem jedes Jahr das „Rossini Opera Festival“ stattfindet, das Opernliebhaber aus aller Welt anlockt.", "trg_txt": "Every year, the conservatory and theatre, both named after him, host the Rossini Opera Festival, drawing enthusiasts from around the world.", "src_lang": "de", "trg_lang": "en"}
+{"responses_create_params": {"input": [{"role": "user", "content": "Convert the following text into French. Text:\n```\nAll questions have been correctly answered by the treasurer Thomas Kräuchi.\n```"}]}, "src_txt": "All questions have been correctly answered by the treasurer Thomas Kräuchi.", "trg_txt": "Le trésorier Thomas Kräuchi a répondu correctement à toutes les questions.", "src_lang": "en", "trg_lang": "fr"}
+{"responses_create_params": {"input": [{"role": "user", "content": "Convert the following text into Japanese. Text:\n```\nThe next picture shows the atoms emitting photons. Of course, in reality photons are a lot smaller than those in the picture.\n```"}]}, "src_txt": "The next picture shows the atoms emitting photons. Of course, in reality photons are a lot smaller than those in the picture.", "trg_txt": "次の写真は、原子が光子を放出している様子です。もちろん、実際には写真よりもはるかに微小です。", "src_lang": "en", "trg_lang": "ja"}
+{"responses_create_params": {"input": [{"role": "user", "content": "Translate the following text from Simplified Chinese to English: 比赛是在草地上进行的，洞周围的草被修剪得更短，被称为果岭。"}]}, "src_txt": "比赛是在草地上进行的，洞周围的草被修剪得更短，被称为果岭。", "trg_txt": "The game is played on grass, and the grass around the hole is mown shorter and called the green.", "src_lang": "zh-cn", "trg_lang": "en"}
\ No newline at end of file
diff --git a/resources_servers/translation_comet/data/example_nothink.jsonl b/resources_servers/translation_comet/data/example_nothink.jsonl
new file mode 100644
index 000000000..42529ab9a
--- /dev/null
+++ b/resources_servers/translation_comet/data/example_nothink.jsonl
@@ -0,0 +1,5 @@
+{"responses_create_params": {"input": [{"role": "system", "content":"/no_think"}, {"role": "user", "content": "Convert the following text into English. Text:\n```\n¿Por qué malgastar el tiempo buscando las llaves?\n```"}]}, "src_txt": "¿Por qué malgastar el tiempo buscando las llaves?", "trg_txt": "Why waste your time looking for the keys?", "src_lang": "es-us", "trg_lang": "en"}
+{"responses_create_params": {"input": [{"role": "system", "content":"/no_think"}, {"role": "user", "content": "Translate this into English: Ihm wurden das Konservatorium und das Theater gewidmet, in dem jedes Jahr das „Rossini Opera Festival“ stattfindet, das Opernliebhaber aus aller Welt anlockt."}]}, "src_txt": "Ihm wurden das Konservatorium und das Theater gewidmet, in dem jedes Jahr das „Rossini Opera Festival“ stattfindet, das Opernliebhaber aus aller Welt anlockt.", "trg_txt": "Every year, the conservatory and theatre, both named after him, host the Rossini Opera Festival, drawing enthusiasts from around the world.", "src_lang": "de", "trg_lang": "en"}
+{"responses_create_params": {"input": [{"role": "system", "content":"/no_think"}, {"role": "user", "content": "Convert the following text into French. Text:\n```\nAll questions have been correctly answered by the treasurer Thomas Kräuchi.\n```"}]}, "src_txt": "All questions have been correctly answered by the treasurer Thomas Kräuchi.", "trg_txt": "Le trésorier Thomas Kräuchi a répondu correctement à toutes les questions.", "src_lang": "en", "trg_lang": "fr"}
+{"responses_create_params": {"input": [{"role": "system", "content": "/no_think"}, {"role": "user", "content": "Convert the following text into Japanese. Text:\n```\nThe next picture shows the atoms emitting photons. Of course, in reality photons are a lot smaller than those in the picture.\n```"}]}, "src_txt": "The next picture shows the atoms emitting photons. Of course, in reality photons are a lot smaller than those in the picture.", "trg_txt": "次の写真は、原子が光子を放出している様子です。もちろん、実際には写真よりもはるかに微小です。", "src_lang": "en", "trg_lang": "ja"}
+{"responses_create_params": {"input": [{"role": "system", "content": "/no_think"}, {"role": "user", "content": "Translate the following text from Simplified Chinese to English: 比赛是在草地上进行的，洞周围的草被修剪得更短，被称为果岭。"}]}, "src_txt": "比赛是在草地上进行的，洞周围的草被修剪得更短，被称为果岭。", "trg_txt": "The game is played on grass, and the grass around the hole is mown shorter and called the green.", "src_lang": "zh-cn", "trg_lang": "en"}
\ No newline at end of file
diff --git a/resources_servers/translation_comet/data/example_nothink_rollouts.jsonl b/resources_servers/translation_comet/data/example_nothink_rollouts.jsonl
new file mode 100644
index 000000000..8b253272b
--- /dev/null
+++ b/resources_servers/translation_comet/data/example_nothink_rollouts.jsonl
@@ -0,0 +1,5 @@
+{"responses_create_params": {"background": null, "include": null, "input": [{"content": "/no_think", "role": "system", "type": "message"}, {"content": "Convert the following text into English. Text:\n```\n¿Por qué malgastar el tiempo buscando las llaves?\n```", "role": "user", "type": "message"}], "instructions": null, "max_output_tokens": 4096, "max_tool_calls": null, "metadata": null, "model": null, "parallel_tool_calls": true, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "store": null, "temperature": null, "text": null, "tool_choice": "auto", "tools": [], "top_logprobs": null, "top_p": null, "truncation": null, "user": null, "stream": null}, "response": {"id": "resp_9ba0dbafb376409e8a7ed772adab1ecf", "created_at": 1761320110.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": null, "model": "nvidia/nvidia-nemotron-nano-9b-v2", "object": "response", "output": [{"id": "msg_f0f3a1e9b0784933bdd719fd569e6077", "content": [{"annotations": [], "text": "Why waste time looking for the keys?\n", "type": "output_text", "logprobs": null}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": true, "temperature": null, "tool_choice": "auto", "tools": [], "top_p": null, "background": null, "max_output_tokens": 4096, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "status": null, "text": null, "top_logprobs": null, "truncation": null, "usage": null, "user": null}, "reward": 0.9511789679527283, "src_txt": "¿Por qué malgastar el tiempo buscando las llaves?", "trg_txt": "Why waste your time looking for the keys?", "extracted_answer": "Why waste time looking for the keys?"}
+{"responses_create_params": {"background": null, "include": null, "input": [{"content": "/no_think", "role": "system", "type": "message"}, {"content": "Translate the following text from Simplified Chinese to English: 比赛是在草地上进行的，洞周围的草被修剪得更短，被称为果岭。", "role": "user", "type": "message"}], "instructions": null, "max_output_tokens": 4096, "max_tool_calls": null, "metadata": null, "model": null, "parallel_tool_calls": true, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "store": null, "temperature": null, "text": null, "tool_choice": "auto", "tools": [], "top_logprobs": null, "top_p": null, "truncation": null, "user": null, "stream": null}, "response": {"id": "resp_411f1d1acdeb4190ac860db659abc3cf", "created_at": 1761320110.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": null, "model": "nvidia/nvidia-nemotron-nano-9b-v2", "object": "response", "output": [{"id": "msg_edbfc6e1503143dc82f1b94a9ea51bdc", "content": [{"annotations": [], "text": "The competition was held on a lawn, where the grass around the hole was cut shorter and is called the putting green.\n", "type": "output_text", "logprobs": null}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": true, "temperature": null, "tool_choice": "auto", "tools": [], "top_p": null, "background": null, "max_output_tokens": 4096, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "status": null, "text": null, "top_logprobs": null, "truncation": null, "usage": null, "user": null}, "reward": 0.8277285695075989, "src_txt": "比赛是在草地上进行的，洞周围的草被修剪得更短，被称为果岭。", "trg_txt": "The game is played on grass, and the grass around the hole is mown shorter and called the green.", "extracted_answer": "The competition was held on a lawn, where the grass around the hole was cut shorter and is called the putting green."}
+{"responses_create_params": {"background": null, "include": null, "input": [{"content": "/no_think", "role": "system", "type": "message"}, {"content": "Convert the following text into French. Text:\n```\nAll questions have been correctly answered by the treasurer Thomas Kräuchi.\n```", "role": "user", "type": "message"}], "instructions": null, "max_output_tokens": 4096, "max_tool_calls": null, "metadata": null, "model": null, "parallel_tool_calls": true, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "store": null, "temperature": null, "text": null, "tool_choice": "auto", "tools": [], "top_logprobs": null, "top_p": null, "truncation": null, "user": null, "stream": null}, "response": {"id": "resp_4b200f77ec6d47cebbd6f4dec45239e8", "created_at": 1761320110.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": null, "model": "nvidia/nvidia-nemotron-nano-9b-v2", "object": "response", "output": [{"id": "msg_eb5d7cb1c8f74a4ead4d9d1d8d0e7cd5", "content": [{"annotations": [], "text": "Toutes les questions ont été répondues correctement par le trésorier Thomas Kräuchi.\n", "type": "output_text", "logprobs": null}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": true, "temperature": null, "tool_choice": "auto", "tools": [], "top_p": null, "background": null, "max_output_tokens": 4096, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "status": null, "text": null, "top_logprobs": null, "truncation": null, "usage": null, "user": null}, "reward": 0.9093431234359741, "src_txt": "All questions have been correctly answered by the treasurer Thomas Kräuchi.", "trg_txt": "Le trésorier Thomas Kräuchi a répondu correctement à toutes les questions.", "extracted_answer": "Toutes les questions ont été répondues correctement par le trésorier Thomas Kräuchi."}
+{"responses_create_params": {"background": null, "include": null, "input": [{"content": "/no_think", "role": "system", "type": "message"}, {"content": "Convert the following text into Japanese. Text:\n```\nThe next picture shows the atoms emitting photons. Of course, in reality photons are a lot smaller than those in the picture.\n```", "role": "user", "type": "message"}], "instructions": null, "max_output_tokens": 4096, "max_tool_calls": null, "metadata": null, "model": null, "parallel_tool_calls": true, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "store": null, "temperature": null, "text": null, "tool_choice": "auto", "tools": [], "top_logprobs": null, "top_p": null, "truncation": null, "user": null, "stream": null}, "response": {"id": "resp_64b8e9cb25a8414b9a61615c90d63b68", "created_at": 1761320110.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": null, "model": "nvidia/nvidia-nemotron-nano-9b-v2", "object": "response", "output": [{"id": "msg_71c84f51f02c437d9421550f6870d827", "content": [{"annotations": [], "text": "画像の次に原子が光子を放出している様子が示されています。もちろん、実際の光子はそのようにはとても小さいです。\n", "type": "output_text", "logprobs": null}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": true, "temperature": null, "tool_choice": "auto", "tools": [], "top_p": null, "background": null, "max_output_tokens": 4096, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "status": null, "text": null, "top_logprobs": null, "truncation": null, "usage": null, "user": null}, "reward": 0.9037765264511108, "src_txt": "The next picture shows the atoms emitting photons. Of course, in reality photons are a lot smaller than those in the picture.", "trg_txt": "次の写真は、原子が光子を放出している様子です。もちろん、実際には写真よりもはるかに微小です。", "extracted_answer": "画像の次に原子が光子を放出している様子が示されています。もちろん、実際の光子はそのようにはとても小さいです。"}
+{"responses_create_params": {"background": null, "include": null, "input": [{"content": "/no_think", "role": "system", "type": "message"}, {"content": "Translate this into English: Ihm wurden das Konservatorium und das Theater gewidmet, in dem jedes Jahr das „Rossini Opera Festival“ stattfindet, das Opernliebhaber aus aller Welt anlockt.", "role": "user", "type": "message"}], "instructions": null, "max_output_tokens": 4096, "max_tool_calls": null, "metadata": null, "model": null, "parallel_tool_calls": true, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "store": null, "temperature": null, "text": null, "tool_choice": "auto", "tools": [], "top_logprobs": null, "top_p": null, "truncation": null, "user": null, "stream": null}, "response": {"id": "resp_ef4433e72c5d46ed8a39460bf8039518", "created_at": 1761320110.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": null, "model": "nvidia/nvidia-nemotron-nano-9b-v2", "object": "response", "output": [{"id": "msg_d60da844dc104c3aa41836580f4970bc", "content": [{"annotations": [], "text": "The conservatory and the theater were dedicated to him, in which every year the \"Rossini Opera Festival\" takes place, attracting opera lovers from all over the world.\n", "type": "output_text", "logprobs": null}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": true, "temperature": null, "tool_choice": "auto", "tools": [], "top_p": null, "background": null, "max_output_tokens": 4096, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "status": null, "text": null, "top_logprobs": null, "truncation": null, "usage": null, "user": null}, "reward": 0.8553621172904968, "src_txt": "Ihm wurden das Konservatorium und das Theater gewidmet, in dem jedes Jahr das „Rossini Opera Festival“ stattfindet, das Opernliebhaber aus aller Welt anlockt.", "trg_txt": "Every year, the conservatory and theatre, both named after him, host the Rossini Opera Festival, drawing enthusiasts from around the world.", "extracted_answer": "The conservatory and the theater were dedicated to him, in which every year the \"Rossini Opera Festival\" takes place, attracting opera lovers from all over the world."}
diff --git a/resources_servers/translation_comet/requirements.txt b/resources_servers/translation_comet/requirements.txt
new file mode 100644
index 000000000..302adca33
--- /dev/null
+++ b/resources_servers/translation_comet/requirements.txt
@@ -0,0 +1,2 @@
+-e nemo-gym[dev] @ ../../
+unbabel-comet>=2.2.4
\ No newline at end of file
diff --git a/resources_servers/translation_comet/tests/test_app.py b/resources_servers/translation_comet/tests/test_app.py
new file mode 100644
index 000000000..68b2477b4
--- /dev/null
+++ b/resources_servers/translation_comet/tests/test_app.py
@@ -0,0 +1,211 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from copy import deepcopy
+from os.path import dirname, join
+from typing import Any
+from unittest.mock import MagicMock
+
+from pytest import approx, fixture
+
+from nemo_gym import CACHE_DIR
+from nemo_gym.openai_utils import (
+    NeMoGymResponse,
+    NeMoGymResponseCreateParamsNonStreaming,
+    NeMoGymResponseOutputMessage,
+    NeMoGymResponseOutputText,
+)
+from nemo_gym.server_utils import ServerClient
+from resources_servers.translation_comet.app import (
+    TranslationCometResourcesServer,
+    TranslationCometResourcesServerConfig,
+    TranslationCometVerifyRequest,
+)
+
+
+logger = logging.getLogger(__name__)
+
+
+class TestApp:
+    @fixture(scope="class")
+    def resources_server(self) -> TranslationCometResourcesServer:
+        """We only want to spin up the server once since it has to load the comet model.
+        Although, the slowest part is actually `import comet` :)
+        """
+        logger.info("Spinning up server with COMET model...")
+
+        server = TranslationCometResourcesServer(
+            config=TranslationCometResourcesServerConfig(
+                host="0.0.0.0",
+                port=8080,
+                entrypoint="",
+                name="",
+                use_reference=True,
+                comet_model_name="Unbabel/wmt22-comet-da",  # 0.5B parameter model runs fine on CPU
+                # Need to use the actual model as the cometinho model does not return values in [0,1]
+                comet_gpu_count=0,  # CPU
+                comet_gpu_devices="auto",  # CPU
+                model_cache_dir=join(CACHE_DIR, "ptl_cache"),
+            ),
+            server_client=MagicMock(spec=ServerClient),
+        )
+
+        logger.info("Model loaded and server started successfully")
+        return server
+
+    def reference_free_resources_server(self) -> TranslationCometResourcesServer:
+        logger.info("Spinning up server with reference-free COMET model...")
+
+        server = TranslationCometResourcesServer(
+            config=TranslationCometResourcesServerConfig(
+                host="0.0.0.0",
+                port=8080,
+                entrypoint="",
+                name="",
+                use_reference=False,
+                comet_model_name="Unbabel/wmt22-cometkiwi-da",  # reference-free COMET
+                comet_gpu_count=0,  # CPU
+                comet_gpu_devices="auto",  # CPU
+                model_cache_dir=join(dirname(__file__), "..", "..", "..", "cache", "ptl_cache"),
+            ),
+            server_client=MagicMock(spec=ServerClient),
+        )
+
+        logger.info("Reference-free COMET model loaded and server started successfully")
+        return server
+
+    def _create_response(self, id: str, model_response_text: str) -> dict[str, Any]:
+        return NeMoGymResponse(
+            id=id,
+            created_at=1234.5,
+            model="response_model",
+            object="response",
+            parallel_tool_calls=False,
+            tool_choice="none",
+            tools=[],
+            output=[
+                NeMoGymResponseOutputMessage(
+                    id=f"ID for {model_response_text}",
+                    role="assistant",
+                    status="in_progress",
+                    type="message",
+                    content=[NeMoGymResponseOutputText(annotations=[], text=model_response_text, type="output_text")],
+                )
+            ],
+        ).model_dump()
+
+    async def test_verify_identical(self, resources_server: TranslationCometResourcesServer) -> None:
+        source_text = "What is the name of your cat?"
+        target_text = "Was ist der Name deiner Katze?"
+        target_lang_name = "German"
+        model_create_params = NeMoGymResponseCreateParamsNonStreaming(
+            input=[
+                {
+                    "role": "user",
+                    "content": f'Translate this into {target_lang_name}: "{source_text}"',
+                }
+            ]
+        )
+        model_response = NeMoGymResponse(**self._create_response("model_response_id", target_text))
+        identical_verify_request = TranslationCometVerifyRequest(
+            responses_create_params=deepcopy(model_create_params),
+            response=model_response.model_copy(deep=True),
+            src_txt=source_text,
+            trg_txt=target_text,
+        )
+        identical_verify_response = await resources_server.verify(identical_verify_request)
+        assert identical_verify_response.responses_create_params == model_create_params
+        assert identical_verify_response.response == model_response
+        assert identical_verify_response.src_txt == source_text
+        assert identical_verify_response.trg_txt == target_text
+        assert identical_verify_response.reward == approx(1.0, abs=0.05)
+        assert identical_verify_response.extracted_answer == target_text
+
+        assert sorted(list(identical_verify_response.model_dump())) == [
+            "extracted_answer",
+            "response",
+            "responses_create_params",
+            "reward",
+            "src_txt",
+            "trg_txt",
+        ]
+
+    def test_verify_answer_identical(self, resources_server: TranslationCometResourcesServer) -> None:
+        source_text = "What is the name of your cat?"
+        target_text = "Was ist der Name deiner Katze?"
+        model_response_text = target_text
+
+        assert resources_server._verify_answer(source_text, target_text, model_response_text) == (
+            approx(1.0, abs=0.05),  # It's a model output so it won't be exact
+            target_text,
+        )
+
+    def test_verify_answer_think_tags(self, resources_server: TranslationCometResourcesServer) -> None:
+        source_text = "What is the name of your cat?"
+        target_text = "Was ist der Name deiner Katze?"
+        model_response_text = f"<think></think>\n\n{target_text}"
+
+        assert resources_server._verify_answer(source_text, target_text, model_response_text) == (
+            approx(1.0, abs=0.05),  # It's a model output so it won't be exact
+            target_text,
+        )
+
+    def test_verify_answer_no_match(self, resources_server: TranslationCometResourcesServer) -> None:
+        source_text = "What is the name of your cat?"
+        target_text = "Was ist der Name deiner Katze?"
+        model_response_text = "Incorrect translation."
+
+        assert resources_server._verify_answer(source_text, target_text, model_response_text) == (
+            approx(0.0, abs=0.5),  # This returns about 0.3 in practice but it's fine as long as it's low
+            model_response_text,
+        )
+
+    async def test_verify_identical_reference_free(self, resources_server: TranslationCometResourcesServer) -> None:
+        reference_free_resources_server = self.reference_free_resources_server()
+
+        source_text = "two three"
+        target_text = "zwei drei"
+        target_lang_name = "German"
+        model_create_params = NeMoGymResponseCreateParamsNonStreaming(
+            input=[
+                {
+                    "role": "user",
+                    "content": f'Translate this into {target_lang_name}: "{source_text}"',
+                }
+            ]
+        )
+        model_response = NeMoGymResponse(**self._create_response("model_response_id", target_text))
+        identical_verify_request = TranslationCometVerifyRequest(
+            responses_create_params=deepcopy(model_create_params),
+            response=model_response.model_copy(deep=True),
+            src_txt=source_text,
+        )
+        identical_verify_response = await reference_free_resources_server.verify(identical_verify_request)
+        assert identical_verify_response.responses_create_params == model_create_params
+        assert identical_verify_response.response == model_response
+        assert identical_verify_response.src_txt == source_text
+        assert identical_verify_response.trg_txt is None
+        assert identical_verify_response.reward == approx(
+            1.0, abs=0.25
+        )  # It's hard to get a score near 1.0 with the reference-free model
+        assert identical_verify_response.extracted_answer == target_text
+
+        assert sorted(list(identical_verify_response.model_dump())) == [
+            "extracted_answer",
+            "response",
+            "responses_create_params",
+            "reward",
+            "src_txt",
+            "trg_txt",  # Should be present but None
+        ]
diff --git a/resources_servers/translation_llm_judge/README.md b/resources_servers/translation_llm_judge/README.md
new file mode 100644
index 000000000..c427b79a4
--- /dev/null
+++ b/resources_servers/translation_llm_judge/README.md
@@ -0,0 +1,10 @@
+# Description
+
+Data links: ?
+
+# Licensing information
+Code: Apache 2.0
+Data: NVIDIA Internal Use Only, Do Not Distribute
+
+Dependencies
+- nemo_gym: Apache 2.0
diff --git a/resources_servers/translation_llm_judge/app.py b/resources_servers/translation_llm_judge/app.py
new file mode 100644
index 000000000..b5e3923f5
--- /dev/null
+++ b/resources_servers/translation_llm_judge/app.py
@@ -0,0 +1,207 @@
+"""
+LLM-as-judge resources server.
+
+Compares a model's generated answer to an expected answer using an LLM judge.
+The judge prompt is fully configurable via server config.
+"""
+
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+import re
+from typing import Optional
+
+from fastapi import FastAPI
+from pydantic import BaseModel
+
+from nemo_gym.base_resources_server import (
+    BaseResourcesServerConfig,
+    BaseRunRequest,
+    BaseVerifyRequest,
+    BaseVerifyResponse,
+    SimpleResourcesServer,
+)
+from nemo_gym.config_types import ModelServerRef
+from nemo_gym.openai_utils import (
+    NeMoGymEasyInputMessage,
+    NeMoGymResponse,
+    NeMoGymResponseCreateParamsNonStreaming,
+)
+
+
+class TranslationLLMJudgeResourcesServerConfig(BaseResourcesServerConfig):
+    """Configuration for the LLM judge server.
+
+    - judge_model_server: target model server to use as the judge.
+    - judge_responses_create_params: base create params; input will be set per request.
+    - judge_system_message: optional custom system message for the judge.
+    - judge_prompt_template: optional custom prompt template. Supported placeholders:
+        {generated_text}, {src_text}, {src_lang}, {trg_lang}, {trg_text} (if use_reference is True, otherwise {trg_lang} and {src_lang} are required)
+    """
+
+    name: str = "translation_llm_judge"
+    judge_model_server: ModelServerRef
+    judge_responses_create_params: NeMoGymResponseCreateParamsNonStreaming
+
+    judge_system_message: Optional[str] = None
+    judge_prompt_template: str
+    judge_score_extract_regex: str
+    judge_max_score: int
+    use_reference: bool = True  # If True, judge_prompt_template should include {trg_text}
+    reasoning_split_word: str = "</think>"
+
+
+class TranslationLLMJudgeRunRequest(BaseRunRequest):
+    src_txt: str
+    src_lang: str  # TODO should this be optional? Not all judge prompts will use it
+    trg_lang: str  # TODO should this be optional? Not all judge prompts will use it
+    trg_txt: Optional[str] = None
+
+
+class TranslationLLMJudgeVerifyRequest(TranslationLLMJudgeRunRequest, BaseVerifyRequest):
+    pass
+
+
+class TranslationLLMJudgeEvaluation(BaseModel):
+    responses_create_params: NeMoGymResponseCreateParamsNonStreaming
+    response: NeMoGymResponse
+    # Extracted score from judge output
+    score: Optional[float] = None
+
+
+class TranslationLLMJudgeVerifyResponse(BaseVerifyResponse):
+    src_txt: str
+    src_lang: str
+    trg_lang: str
+    trg_txt: Optional[str] = None
+    judge_evaluation: TranslationLLMJudgeEvaluation
+
+
+class TranslationLLMJudgeResourcesServer(SimpleResourcesServer):
+    """Judge-only verifier using an LLM to evaluate translation quality."""
+
+    config: TranslationLLMJudgeResourcesServerConfig
+
+    def setup_webserver(self) -> FastAPI:
+        app = super().setup_webserver()
+        return app
+
+    def _extract_last_assistant_text(self, body: BaseVerifyRequest) -> str:
+        """Extract the last assistant message text from the response.
+
+        - If the assistant message has multiple text blocks, they are joined with newlines.
+        - If ``extract_regex`` is provided, the last regex match is used; if capture
+        groups exist, the first non-empty group is returned, otherwise the full match.
+        - Returns an empty string when no assistant text is available.
+        """
+        # Return only the last assistant message's text content.
+        for o in reversed(body.response.output):
+            text = ""
+            if getattr(o, "type", None) == "message" and getattr(o, "role", None) == "assistant":
+                content = getattr(o, "content", None)
+                if isinstance(content, list):
+                    # Some providers split a single assistant message into multiple text blocks.
+                    # Join all text blocks to reconstruct the full message text.
+                    texts: list[str] = []
+                    for c in content:
+                        t = getattr(c, "text", None)
+                        if isinstance(t, str):
+                            texts.append(t)
+                    text = "\n".join(texts).strip()
+                elif isinstance(content, str):
+                    text = content.strip()
+
+        # Strip thinking if not already removed by reasoning parser
+        text = self._strip_thinking(text)
+        return text
+
+    def _strip_thinking(self, model_response: str) -> str:
+        # Strip any thinking
+        no_think_response = model_response.split(self.config.reasoning_split_word)[-1]
+        no_think_response = no_think_response.strip()
+        return no_think_response
+
+    async def verify(self, body: TranslationLLMJudgeVerifyRequest) -> TranslationLLMJudgeVerifyResponse:
+        generated = self._extract_last_assistant_text(body)
+
+        eval = await self._generate_judge_evaluation(
+            generated_text=generated,
+            src_txt=body.src_txt,
+            src_lang=body.src_lang,
+            trg_lang=body.trg_lang,
+            trg_txt=body.trg_txt,
+        )
+
+        payload = body.model_dump()
+        reward = eval.score / self.config.judge_max_score
+        return TranslationLLMJudgeVerifyResponse(**payload, reward=reward, judge_evaluation=eval)
+
+    async def _generate_judge_evaluation(
+        self, *, generated_text: str, src_txt: str, src_lang: str, trg_lang: str, trg_txt: Optional[str] = None
+    ) -> TranslationLLMJudgeEvaluation:
+        cfg = self.config
+
+        responses_create_params = cfg.judge_responses_create_params.model_copy(deep=True)
+        prompt_template = cfg.judge_prompt_template
+        system_message = cfg.judge_system_message
+
+        if self.config.use_reference and trg_txt is not None:
+            user_prompt = prompt_template.format(
+                generated_text=generated_text, src_txt=src_txt, src_lang=src_lang, trg_lang=trg_lang, trg_txt=trg_txt
+            )
+        else:
+            user_prompt = prompt_template.format(
+                generated_text=generated_text, src_txt=src_txt, src_lang=src_lang, trg_lang=trg_lang
+            )
+
+        msgs: list[NeMoGymEasyInputMessage] = []
+        if system_message is not None and system_message != "":
+            msgs.append(NeMoGymEasyInputMessage(role="system", content=system_message))
+        msgs.append(NeMoGymEasyInputMessage(role="user", content=user_prompt))
+        responses_create_params.input = msgs
+
+        response = await self.server_client.post(
+            server_name=cfg.judge_model_server.name,
+            url_path="/v1/responses",
+            json=responses_create_params,
+        )
+        judge_response = NeMoGymResponse.model_validate(await response.json())
+        eval_record = TranslationLLMJudgeEvaluation(
+            responses_create_params=responses_create_params,
+            response=judge_response,
+            score=0.0,
+        )
+
+        # Parse the last output; fall back to a score of 0 if unexpected.
+        try:
+            last_output = judge_response.output[-1]
+            if getattr(last_output, "type", None) != "message":
+                return eval_record
+            last_content = last_output.content[-1]
+            text = getattr(last_content, "text", "")
+        except Exception:
+            return eval_record
+
+        # Extract the score from the judge output
+        match = re.search(cfg.judge_score_extract_regex, text)
+        if match is None:
+            return eval_record
+        score = int(match.group(1))
+        eval_record.score = score
+        return eval_record
+
+
+if __name__ == "__main__":
+    TranslationLLMJudgeResourcesServer.run_webserver()
diff --git a/resources_servers/translation_llm_judge/configs/translation_llm_judge_vip.yaml b/resources_servers/translation_llm_judge/configs/translation_llm_judge_vip.yaml
new file mode 100644
index 000000000..8a752c9e5
--- /dev/null
+++ b/resources_servers/translation_llm_judge/configs/translation_llm_judge_vip.yaml
@@ -0,0 +1,66 @@
+translation_llm_judge:
+  resources_servers:
+    translation_llm_judge:
+      entrypoint: app.py
+      domain: translation
+      judge_model_server:
+        type: responses_api_models
+        name: policy_model
+      judge_responses_create_params:
+        input: []
+      judge_system_message: "You are a professional translation evaluator."
+      # This uses the VIP process from Cheng et al. 2024 (https://arxiv.org/abs/2407.21646)
+      # as implemented by Ouyang et al. 2025 (NVIDIA internal, not yet published)
+      judge_prompt_template: |-
+        Your task is to assess whether a translation segment successfully conveys the semantic content of the original speech according to the following criteria:
+
+        1. Key Information Recognition: Identify whether the key information in the source (e.g., proper nouns, keywords, terminologies, or sentence structures) is present in the translation.
+        2. Correctness Assessment: Determine whether the translation accurately conveys the speaker’s intention, without misinterpretation or contextual errors.
+        3. Expressiveness Assessment: Evaluate whether the translation is fluent, clear, and intuitive to human readers. It should avoid unnecessary verbosity, ambiguous phrases, or awkward grammar.
+
+        Given a source text and its translation, answer "1" if the translation meets all three criteria and answer "0" otherwise. Only output the answer, no other text.
+
+        <begin_of_source>
+        {src_txt}
+        <end_of_source>
+
+        <begin_of_translation>
+        {generated_text}
+        <end_of_translation>
+      judge_score_extract_regex: "(1|0)"
+      judge_max_score: 1
+translation_llm_judge_simple_agent:
+  responses_api_agents:
+    simple_agent:
+      entrypoint: app.py
+      resources_server:
+        type: resources_servers
+        name: translation_llm_judge
+      model_server:
+        type: responses_api_models
+        name: policy_model
+      datasets:
+      - name: train
+        type: train
+        jsonl_fpath: resources_servers/translation_llm_judge/data/riva_mt_v3_nothinkInSys_train.jsonl
+        num_repeats: 1
+        gitlab_identifier:
+          dataset_name: riva_mt_v3_nothinkInSys_train
+          version: 0.0.1
+          artifact_fpath: riva_mt_v3_nothinkInSys_train.jsonl
+        license: NVIDIA Internal Use Only, Do Not Distribute
+      - name: validation
+        type: validation
+        jsonl_fpath: resources_servers/translation_llm_judge/data/riva_mt_v3_nothinkInSys_validation.jsonl
+        num_repeats: 1
+        gitlab_identifier:
+          dataset_name: riva_mt_v3_nothinkInSys_validation
+          version: 0.0.1
+          artifact_fpath: riva_mt_v3_nothinkInSys_validation.jsonl
+        license: NVIDIA Internal Use Only, Do Not Distribute
+      - name: example
+        type: example
+        license: NVIDIA Internal Use Only, Do Not Distribute
+        jsonl_fpath: resources_servers/translation_llm_judge/data/example.jsonl
+
+
diff --git a/resources_servers/translation_llm_judge/data/example.jsonl b/resources_servers/translation_llm_judge/data/example.jsonl
new file mode 100644
index 000000000..2c921e654
--- /dev/null
+++ b/resources_servers/translation_llm_judge/data/example.jsonl
@@ -0,0 +1,5 @@
+{"responses_create_params": {"input": [{"role": "user", "content": "Convert the following text into English. Text:\n```\n¿Por qué malgastar el tiempo buscando las llaves?\n```"}]}, "src_txt": "¿Por qué malgastar el tiempo buscando las llaves?", "trg_txt": "Why waste your time looking for the keys?", "src_lang": "es-us", "trg_lang": "en"}
+{"responses_create_params": {"input": [{"role": "user", "content": "Translate this into English: Ihm wurden das Konservatorium und das Theater gewidmet, in dem jedes Jahr das „Rossini Opera Festival“ stattfindet, das Opernliebhaber aus aller Welt anlockt."}]}, "src_txt": "Ihm wurden das Konservatorium und das Theater gewidmet, in dem jedes Jahr das „Rossini Opera Festival“ stattfindet, das Opernliebhaber aus aller Welt anlockt.", "trg_txt": "Every year, the conservatory and theatre, both named after him, host the Rossini Opera Festival, drawing enthusiasts from around the world.", "src_lang": "de", "trg_lang": "en"}
+{"responses_create_params": {"input": [{"role": "user", "content": "Convert the following text into French. Text:\n```\nAll questions have been correctly answered by the treasurer Thomas Kräuchi.\n```"}]}, "src_txt": "All questions have been correctly answered by the treasurer Thomas Kräuchi.", "trg_txt": "Le trésorier Thomas Kräuchi a répondu correctement à toutes les questions.", "src_lang": "en", "trg_lang": "fr"}
+{"responses_create_params": {"input": [{"role": "user", "content": "Convert the following text into Japanese. Text:\n```\nThe next picture shows the atoms emitting photons. Of course, in reality photons are a lot smaller than those in the picture.\n```"}]}, "src_txt": "The next picture shows the atoms emitting photons. Of course, in reality photons are a lot smaller than those in the picture.", "trg_txt": "次の写真は、原子が光子を放出している様子です。もちろん、実際には写真よりもはるかに微小です。", "src_lang": "en", "trg_lang": "ja"}
+{"responses_create_params": {"input": [{"role": "user", "content": "Translate the following text from Simplified Chinese to English: 比赛是在草地上进行的，洞周围的草被修剪得更短，被称为果岭。"}]}, "src_txt": "比赛是在草地上进行的，洞周围的草被修剪得更短，被称为果岭。", "trg_txt": "The game is played on grass, and the grass around the hole is mown shorter and called the green.", "src_lang": "zh-cn", "trg_lang": "en"}
\ No newline at end of file
diff --git a/resources_servers/translation_llm_judge/data/example_nothink.jsonl b/resources_servers/translation_llm_judge/data/example_nothink.jsonl
new file mode 100644
index 000000000..42529ab9a
--- /dev/null
+++ b/resources_servers/translation_llm_judge/data/example_nothink.jsonl
@@ -0,0 +1,5 @@
+{"responses_create_params": {"input": [{"role": "system", "content":"/no_think"}, {"role": "user", "content": "Convert the following text into English. Text:\n```\n¿Por qué malgastar el tiempo buscando las llaves?\n```"}]}, "src_txt": "¿Por qué malgastar el tiempo buscando las llaves?", "trg_txt": "Why waste your time looking for the keys?", "src_lang": "es-us", "trg_lang": "en"}
+{"responses_create_params": {"input": [{"role": "system", "content":"/no_think"}, {"role": "user", "content": "Translate this into English: Ihm wurden das Konservatorium und das Theater gewidmet, in dem jedes Jahr das „Rossini Opera Festival“ stattfindet, das Opernliebhaber aus aller Welt anlockt."}]}, "src_txt": "Ihm wurden das Konservatorium und das Theater gewidmet, in dem jedes Jahr das „Rossini Opera Festival“ stattfindet, das Opernliebhaber aus aller Welt anlockt.", "trg_txt": "Every year, the conservatory and theatre, both named after him, host the Rossini Opera Festival, drawing enthusiasts from around the world.", "src_lang": "de", "trg_lang": "en"}
+{"responses_create_params": {"input": [{"role": "system", "content":"/no_think"}, {"role": "user", "content": "Convert the following text into French. Text:\n```\nAll questions have been correctly answered by the treasurer Thomas Kräuchi.\n```"}]}, "src_txt": "All questions have been correctly answered by the treasurer Thomas Kräuchi.", "trg_txt": "Le trésorier Thomas Kräuchi a répondu correctement à toutes les questions.", "src_lang": "en", "trg_lang": "fr"}
+{"responses_create_params": {"input": [{"role": "system", "content": "/no_think"}, {"role": "user", "content": "Convert the following text into Japanese. Text:\n```\nThe next picture shows the atoms emitting photons. Of course, in reality photons are a lot smaller than those in the picture.\n```"}]}, "src_txt": "The next picture shows the atoms emitting photons. Of course, in reality photons are a lot smaller than those in the picture.", "trg_txt": "次の写真は、原子が光子を放出している様子です。もちろん、実際には写真よりもはるかに微小です。", "src_lang": "en", "trg_lang": "ja"}
+{"responses_create_params": {"input": [{"role": "system", "content": "/no_think"}, {"role": "user", "content": "Translate the following text from Simplified Chinese to English: 比赛是在草地上进行的，洞周围的草被修剪得更短，被称为果岭。"}]}, "src_txt": "比赛是在草地上进行的，洞周围的草被修剪得更短，被称为果岭。", "trg_txt": "The game is played on grass, and the grass around the hole is mown shorter and called the green.", "src_lang": "zh-cn", "trg_lang": "en"}
\ No newline at end of file
diff --git a/resources_servers/translation_llm_judge/data/riva_mt_v3_nothinkInSys_train_metrics.json b/resources_servers/translation_llm_judge/data/riva_mt_v3_nothinkInSys_train_metrics.json
new file mode 100644
index 000000000..0d0c53e2e
--- /dev/null
+++ b/resources_servers/translation_llm_judge/data/riva_mt_v3_nothinkInSys_train_metrics.json
@@ -0,0 +1,61 @@
+{
+    "name": "train",
+    "type": "train",
+    "jsonl_fpath": "resources_servers/translation_llm_judge/data/riva_mt_v3_nothinkInSys_train.jsonl",
+    "num_repeats": 1,
+    "gitlab_identifier": {
+        "dataset_name": "riva_mt_v3_nothinkInSys_train",
+        "version": "0.0.1",
+        "artifact_fpath": "riva_mt_v3_nothinkInSys_train.jsonl"
+    },
+    "license": "NVIDIA Internal Use Only, Do Not Distribute",
+    "Number of examples": 770000,
+    "Number of tools": {
+        "Total # non-null values": 0,
+        "Average": 0.0,
+        "Min": 0.0,
+        "Max": 0.0,
+        "Median": 0.0,
+        "Standard deviation": 0.0
+    },
+    "Json-dumped number of words (proxy for token count)": {
+        "Total # non-null values": 770000,
+        "Average": 26.18,
+        "Min": 10.0,
+        "Max": 267.0,
+        "Median": 24.44,
+        "Standard deviation": 9.86
+    },
+    "Number of turns": {
+        "Total # non-null values": 770000,
+        "Average": 1.0,
+        "Min": 1.0,
+        "Max": 1.0,
+        "Median": 1.0,
+        "Standard deviation": 0.0
+    },
+    "Temperature": {
+        "Total # non-null values": 0,
+        "Average": 0.0,
+        "Min": 0.0,
+        "Max": 0.0,
+        "Median": 0.0,
+        "Standard deviation": 0.0
+    },
+    "src_txt": {
+        "unique_count": 767619,
+        "total_count": 770000
+    },
+    "trg_txt": {
+        "unique_count": 767567,
+        "total_count": 770000
+    },
+    "src_lang": {
+        "unique_count": 12,
+        "total_count": 770000
+    },
+    "trg_lang": {
+        "unique_count": 12,
+        "total_count": 770000
+    }
+}
\ No newline at end of file
diff --git a/resources_servers/translation_llm_judge/data/riva_mt_v3_nothinkInSys_validation_metrics.json b/resources_servers/translation_llm_judge/data/riva_mt_v3_nothinkInSys_validation_metrics.json
new file mode 100644
index 000000000..bc35bb4d6
--- /dev/null
+++ b/resources_servers/translation_llm_judge/data/riva_mt_v3_nothinkInSys_validation_metrics.json
@@ -0,0 +1,61 @@
+{
+    "name": "validation",
+    "type": "validation",
+    "jsonl_fpath": "resources_servers/translation_llm_judge/data/riva_mt_v3_nothinkInSys_validation.jsonl",
+    "num_repeats": 1,
+    "gitlab_identifier": {
+        "dataset_name": "riva_mt_v3_nothinkInSys_validation",
+        "version": "0.0.1",
+        "artifact_fpath": "riva_mt_v3_nothinkInSys_validation.jsonl"
+    },
+    "license": "NVIDIA Internal Use Only, Do Not Distribute",
+    "Number of examples": 19940,
+    "Number of tools": {
+        "Total # non-null values": 0,
+        "Average": 0.0,
+        "Min": 0.0,
+        "Max": 0.0,
+        "Median": 0.0,
+        "Standard deviation": 0.0
+    },
+    "Json-dumped number of words (proxy for token count)": {
+        "Total # non-null values": 19940,
+        "Average": 32.68,
+        "Min": 10.0,
+        "Max": 82.0,
+        "Median": 33.05,
+        "Standard deviation": 10.38
+    },
+    "Number of turns": {
+        "Total # non-null values": 19940,
+        "Average": 1.0,
+        "Min": 1.0,
+        "Max": 1.0,
+        "Median": 1.0,
+        "Standard deviation": 0.0
+    },
+    "Temperature": {
+        "Total # non-null values": 0,
+        "Average": 0.0,
+        "Min": 0.0,
+        "Max": 0.0,
+        "Median": 0.0,
+        "Standard deviation": 0.0
+    },
+    "src_txt": {
+        "unique_count": 10965,
+        "total_count": 19940
+    },
+    "trg_txt": {
+        "unique_count": 10965,
+        "total_count": 19940
+    },
+    "src_lang": {
+        "unique_count": 11,
+        "total_count": 19940
+    },
+    "trg_lang": {
+        "unique_count": 11,
+        "total_count": 19940
+    }
+}
\ No newline at end of file
diff --git a/resources_servers/translation_llm_judge/requirements.txt b/resources_servers/translation_llm_judge/requirements.txt
new file mode 100644
index 000000000..5042d4648
--- /dev/null
+++ b/resources_servers/translation_llm_judge/requirements.txt
@@ -0,0 +1,2 @@
+-e nemo-gym[dev] @ ../../
+
diff --git a/resources_servers/translation_llm_judge/tests/test_app.py b/resources_servers/translation_llm_judge/tests/test_app.py
new file mode 100644
index 000000000..3f5f3ade3
--- /dev/null
+++ b/resources_servers/translation_llm_judge/tests/test_app.py
@@ -0,0 +1,158 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from copy import deepcopy
+from pathlib import Path
+from typing import Any
+from unittest.mock import AsyncMock, MagicMock
+
+from omegaconf import OmegaConf
+from pytest import approx, fixture
+
+from nemo_gym.config_types import ModelServerRef
+from nemo_gym.openai_utils import (
+    NeMoGymResponse,
+    NeMoGymResponseCreateParamsNonStreaming,
+    NeMoGymResponseOutputItem,
+    NeMoGymResponseOutputMessage,
+    NeMoGymResponseOutputText,
+)
+from nemo_gym.server_utils import ServerClient
+from resources_servers.translation_llm_judge.app import (
+    TranslationLLMJudgeResourcesServer,
+    TranslationLLMJudgeResourcesServerConfig,
+    TranslationLLMJudgeVerifyRequest,
+)
+
+
+class TestApp:
+    @fixture
+    def config(self) -> TranslationLLMJudgeResourcesServerConfig:
+        # Load judge template from YAML so tests mirror runtime config
+        yaml_path = Path(__file__).resolve().parents[1] / "configs" / "translation_llm_judge_vip.yaml"
+        yaml_cfg = OmegaConf.load(str(yaml_path))
+        judge_template: str = (
+            yaml_cfg.translation_llm_judge.resources_servers.translation_llm_judge.judge_prompt_template
+        )
+        judge_score_extract_regex: str = (
+            yaml_cfg.translation_llm_judge.resources_servers.translation_llm_judge.judge_score_extract_regex
+        )
+        judge_max_score: int = yaml_cfg.translation_llm_judge.resources_servers.translation_llm_judge.judge_max_score
+
+        cfg = TranslationLLMJudgeResourcesServerConfig(
+            host="0.0.0.0",
+            port=8080,
+            entrypoint="",
+            judge_model_server=ModelServerRef(type="responses_api_models", name="judge"),
+            judge_responses_create_params=NeMoGymResponseCreateParamsNonStreaming(input=[]),
+            judge_prompt_template=judge_template,
+            judge_score_extract_regex=judge_score_extract_regex,
+            judge_max_score=judge_max_score,
+        )
+        return cfg
+
+    def _create_response(self, id: str, output_item: NeMoGymResponseOutputItem) -> dict[str, Any]:
+        return NeMoGymResponse(
+            id=id,
+            created_at=123.0,
+            model="judge_model",
+            object="response",
+            output=[output_item],
+            parallel_tool_calls=False,
+            tool_choice="none",
+            tools=[],
+        ).model_dump()
+
+    def _msg(self, text: str) -> NeMoGymResponseOutputMessage:
+        return NeMoGymResponseOutputMessage(
+            id="msg_id",
+            content=[NeMoGymResponseOutputText(annotations=[], text=text, type="output_text")],
+            role="assistant",
+            status="completed",
+            type="message",
+        )
+
+    async def test_verify_identical(self, config: TranslationLLMJudgeResourcesServerConfig) -> None:
+        server_mock = MagicMock(spec=ServerClient)
+        rs = TranslationLLMJudgeResourcesServer(config=config, server_client=server_mock)
+
+        post_mock = MagicMock()
+        post_mock.json = AsyncMock()
+        server_mock.post = AsyncMock(return_value=post_mock)
+
+        post_mock.json.side_effect = [
+            self._create_response("first", self._msg("1")),
+        ]
+
+        src_txt = "One two three"
+        trg_txt = "Eins zwei drei"
+        src_lang = "en"
+        trg_lang = "de"
+
+        model_create_params = NeMoGymResponseCreateParamsNonStreaming(input=[{"role": "user", "content": "Q: 1+1?"}])
+        model_response = NeMoGymResponse(
+            id="resp",
+            created_at=0.0,
+            model="m",
+            object="response",
+            output=[self._msg(trg_txt)],
+            parallel_tool_calls=False,
+            tool_choice="none",
+            tools=[],
+        )
+
+        req = TranslationLLMJudgeVerifyRequest(
+            responses_create_params=deepcopy(model_create_params),
+            response=model_response.model_copy(deep=True),
+            src_txt=src_txt,
+            trg_txt=trg_txt,
+            src_lang=src_lang,
+            trg_lang=trg_lang,
+        )
+        res = await rs.verify(req)
+        assert res.reward == approx(1.0)
+        assert res.src_txt == src_txt
+        assert res.trg_txt == trg_txt
+        assert res.src_lang == src_lang
+        assert res.trg_lang == trg_lang
+        assert res.judge_evaluation.score == 1
+
+    async def test_unexpected_judge_output_defaults_to_zero(
+        self, config: TranslationLLMJudgeResourcesServerConfig
+    ) -> None:
+        server_mock = MagicMock(spec=ServerClient)
+        rs = TranslationLLMJudgeResourcesServer(config=config, server_client=server_mock)
+
+        post_mock = MagicMock()
+        post_mock.json = AsyncMock(return_value=self._create_response("f", self._msg("no label present")))
+        server_mock.post = AsyncMock(return_value=post_mock)
+
+        req = TranslationLLMJudgeVerifyRequest(
+            responses_create_params=NeMoGymResponseCreateParamsNonStreaming(input=[]),
+            response=NeMoGymResponse(
+                id="r",
+                created_at=0.0,
+                model="m",
+                object="response",
+                output=[self._msg("text")],
+                parallel_tool_calls=False,
+                tool_choice="none",
+                tools=[],
+            ),
+            src_txt="One two three",
+            trg_txt="Eins zwei drei",
+            src_lang="en",
+            trg_lang="de",
+        )
+        res = await rs.verify(req)
+        assert res.reward == approx(0.0)
diff --git a/resources_servers/translation_metricx/README.md b/resources_servers/translation_metricx/README.md
new file mode 100644
index 000000000..654132fd8
--- /dev/null
+++ b/resources_servers/translation_metricx/README.md
@@ -0,0 +1,11 @@
+# Description
+
+Data links: ?
+
+# Licensing information
+Code: Apache 2.0
+Data: NVIDIA Internal Use Only, Do Not Distribute
+
+Dependencies
+- nemo_gym: Apache 2.0
+- google-research/metricx: Apache 2.0
diff --git a/resources_servers/translation_metricx/app.py b/resources_servers/translation_metricx/app.py
new file mode 100644
index 000000000..dcb17d9bf
--- /dev/null
+++ b/resources_servers/translation_metricx/app.py
@@ -0,0 +1,196 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from pathlib import Path
+from typing import Any, Optional
+
+import datasets
+import transformers
+from fastapi import FastAPI
+from metricx24.models import MT5ForRegression
+
+from nemo_gym import CACHE_DIR
+from nemo_gym.base_resources_server import (
+    BaseResourcesServerConfig,
+    BaseVerifyRequest,
+    BaseVerifyResponse,
+    SimpleResourcesServer,
+)
+
+
+class TranslationMetricxResourcesServerConfig(BaseResourcesServerConfig):
+    """
+    Configuration for the TranslationMetricxResourcesServer.
+
+    Attributes:
+        use_reference (bool): Whether to use a reference translation
+        metricx_model_name (str): The MetricX model name to use. The default "google/metricx-24-hybrid-large-v2p6-bfloat16"
+            is the smallest model at 1.2B parameters.
+        tokenizer_name (str): The name of the mT5 tokenizer to use with the MetricX model. Size must match MetricX model.
+            For the default model above, use "google/mt5-large".
+        device_map (str): Device placement for the model. Options include "cpu", specific GPU (e.g., "cuda:1"),
+            "auto", "balanced", "balanced_low_0", "sequential".
+        max_input_length (int): Maximum input sequence length (see MetricX documentation, default 1536)
+        output_dir (str): Output directory for Trainer class. Nothing is actually output during prediction, but it's mandatory to supply.
+    """
+
+    use_reference: bool = True
+    metricx_model_name: str = "google/metricx-24-hybrid-large-v2p6-bfloat16"
+    tokenizer_name: str = "google/mt5-large"
+    device_map: str = "cpu"
+    max_input_length: int = 1536
+    output_dir: str = str(Path(CACHE_DIR) / "metricx_output")
+    reasoning_split_word: str = "</think>"
+
+
+class TranslationMetricxVerifyRequest(BaseVerifyRequest):
+    src_txt: str
+    trg_txt: Optional[str] = None
+
+
+class TranslationMetricxVerifyResponse(BaseVerifyResponse):
+    src_txt: str
+    trg_txt: Optional[str] = None
+    extracted_answer: str
+
+
+class TranslationMetricxResourcesServer(SimpleResourcesServer):
+    config: TranslationMetricxResourcesServerConfig
+
+    def model_post_init(self, context: Any) -> None:
+        super().model_post_init(context)
+
+        # Load tokenizer (MetricX models use MT5 tokenizers, separate from the model name)
+        tokenizer = transformers.AutoTokenizer.from_pretrained(self.config.tokenizer_name)
+        self._tokenizer = tokenizer
+
+        # Load model with device placement
+        model = MT5ForRegression.from_pretrained(
+            self.config.metricx_model_name, torch_dtype="auto", device_map=self.config.device_map
+        )
+        # Inputs should go to the device where the first layer is
+        # Get device from the first model parameter
+        self._inputs_device = next(model.parameters()).device
+
+        model.eval()
+        self._metricx_model = model
+
+        # Ensure output directory exists (following predict.py lines 167-169)
+        os.makedirs(self.config.output_dir, exist_ok=True)
+
+        # Create trainer
+        training_args = transformers.TrainingArguments(
+            output_dir=self.config.output_dir,
+            per_device_eval_batch_size=1,
+            dataloader_pin_memory=False,
+        )
+        trainer = transformers.Trainer(
+            model=model,
+            args=training_args,
+        )
+        self._metricx_trainer = trainer
+
+    def setup_webserver(self) -> FastAPI:
+        app = super().setup_webserver()
+
+        # Additional server routes go here! e.g.:
+        # app.post("/get_weather")(self.get_weather)
+
+        return app
+
+    async def verify(self, body: TranslationMetricxVerifyRequest) -> TranslationMetricxVerifyResponse:
+        assistant_responses = []
+        for output_item in body.response.output:
+            if output_item.type != "message":
+                continue
+
+            for content_item in output_item.content:
+                if content_item.type != "output_text":
+                    continue
+
+                assistant_responses.append(content_item.text)
+
+        combined_response = "".join(assistant_responses)
+
+        (reward, extracted_answer) = self._verify_answer(
+            model_response=combined_response, source_text=body.src_txt, target_text=body.trg_txt
+        )
+
+        return TranslationMetricxVerifyResponse(**body.model_dump(), extracted_answer=extracted_answer, reward=reward)
+
+    def _verify_answer(
+        self, model_response: str, source_text: str, target_text: Optional[str] = None
+    ) -> tuple[float, str]:
+        extracted_answer = self._extract_answer(model_response)
+        ds = self._create_dataset_from_example(extracted_answer, source_text, target_text)
+        predictions, _, _ = self._metricx_trainer.predict(test_dataset=ds)
+        score = float(predictions[0])
+
+        # MetricX scores are between 0 and 25, where 25 is worst, so we normalize to 0 to 1 where 0 is worst
+        reward = (25 - score) / 25.0
+        return reward, extracted_answer
+
+    def _create_dataset_from_example(
+        self, hypothesis: str, source_text: str, reference_text: Optional[str] = None
+    ) -> datasets.Dataset:
+        """Create a dataset from a single example, following get_dataset logic from predict.py."""
+        # Create input string based on reference ("QE") mode (QE is when use_reference is False)
+        if not self.config.use_reference or reference_text is None:
+            input_text = "source: " + source_text + " candidate: " + hypothesis
+        else:
+            input_text = "source: " + source_text + " candidate: " + hypothesis + " reference: " + reference_text
+
+        # Tokenize (returns dict with lists)
+        tokenized = self._tokenizer(
+            input_text,
+            max_length=self.config.max_input_length,
+            truncation=True,
+            padding=False,
+        )
+
+        # Create a single example dict (matching the format that predict.py creates)
+        example_dict = {
+            "input_ids": tokenized["input_ids"],
+            "attention_mask": tokenized["attention_mask"],
+        }
+
+        # If last token is EOS, remove it (following predict.py _remove_eos function logic)
+        eos_token_id = self._tokenizer.eos_token_id
+        if eos_token_id is not None and example_dict["input_ids"][-1] == eos_token_id:
+            example_dict["input_ids"] = example_dict["input_ids"][:-1]
+            example_dict["attention_mask"] = example_dict["attention_mask"][:-1]
+
+        # Create dataset from a list of examples (each example is one dict)
+        # Following predict.py structure: ds.map() operations create per-example dicts
+        ds = datasets.Dataset.from_list([example_dict])
+
+        # Set format to torch and move to device (following predict.py line 119-124)
+        ds.set_format(
+            type="torch",
+            columns=["input_ids", "attention_mask"],
+            device=self._inputs_device,
+            output_all_columns=True,
+        )
+
+        return ds
+
+    def _extract_answer(self, model_response: str) -> str:
+        # Strip any thinking
+        no_think_response = model_response.split(self.config.reasoning_split_word)[-1]
+        no_think_response = no_think_response.strip()
+        return no_think_response
+
+
+if __name__ == "__main__":
+    TranslationMetricxResourcesServer.run_webserver()
diff --git a/resources_servers/translation_metricx/configs/translation_metricx.yaml b/resources_servers/translation_metricx/configs/translation_metricx.yaml
new file mode 100644
index 000000000..8a8b18bcc
--- /dev/null
+++ b/resources_servers/translation_metricx/configs/translation_metricx.yaml
@@ -0,0 +1,42 @@
+translation_metricx:
+  resources_servers:
+    translation_metricx:
+      entrypoint: app.py
+      domain: translation
+      use_reference: true
+      metricx_model_name: google/metricx-24-hybrid-xl-v2p6
+      tokenizer_name: google/mt5-large
+      max_input_length: 1536
+translation_metricx_simple_agent:
+  responses_api_agents:
+    simple_agent:
+      entrypoint: app.py
+      resources_server:
+        type: resources_servers
+        name: translation_metricx
+      model_server:
+        type: responses_api_models
+        name: policy_model
+      datasets:
+      - name: train
+        type: train
+        jsonl_fpath: resources_servers/translation_metricx/data/riva_mt_v3_nothinkInSys_train.jsonl
+        num_repeats: 1
+        gitlab_identifier:
+          dataset_name: riva_mt_v3_nothinkInSys_train
+          version: 0.0.1
+          artifact_fpath: riva_mt_v3_nothinkInSys_train.jsonl
+        license: NVIDIA Internal Use Only, Do Not Distribute
+      - name: validation
+        type: validation
+        jsonl_fpath: resources_servers/translation_metricx/data/riva_mt_v3_nothinkInSys_validation.jsonl
+        num_repeats: 1
+        gitlab_identifier:
+          dataset_name: riva_mt_v3_nothinkInSys_validation
+          version: 0.0.1
+          artifact_fpath: riva_mt_v3_nothinkInSys_validation.jsonl
+        license: NVIDIA Internal Use Only, Do Not Distribute
+      - name: example
+        type: example
+        jsonl_fpath: resources_servers/translation_metricx/data/example.jsonl
+        num_repeats: 1
diff --git a/resources_servers/translation_metricx/data/.gitignore b/resources_servers/translation_metricx/data/.gitignore
new file mode 100644
index 000000000..4424b6fde
--- /dev/null
+++ b/resources_servers/translation_metricx/data/.gitignore
@@ -0,0 +1,5 @@
+*train.jsonl
+*validation.jsonl
+*train_prepare.jsonl
+*validation_prepare.jsonl
+*example_prepare.jsonl
diff --git a/resources_servers/translation_metricx/data/example.jsonl b/resources_servers/translation_metricx/data/example.jsonl
new file mode 100644
index 000000000..2c921e654
--- /dev/null
+++ b/resources_servers/translation_metricx/data/example.jsonl
@@ -0,0 +1,5 @@
+{"responses_create_params": {"input": [{"role": "user", "content": "Convert the following text into English. Text:\n```\n¿Por qué malgastar el tiempo buscando las llaves?\n```"}]}, "src_txt": "¿Por qué malgastar el tiempo buscando las llaves?", "trg_txt": "Why waste your time looking for the keys?", "src_lang": "es-us", "trg_lang": "en"}
+{"responses_create_params": {"input": [{"role": "user", "content": "Translate this into English: Ihm wurden das Konservatorium und das Theater gewidmet, in dem jedes Jahr das „Rossini Opera Festival“ stattfindet, das Opernliebhaber aus aller Welt anlockt."}]}, "src_txt": "Ihm wurden das Konservatorium und das Theater gewidmet, in dem jedes Jahr das „Rossini Opera Festival“ stattfindet, das Opernliebhaber aus aller Welt anlockt.", "trg_txt": "Every year, the conservatory and theatre, both named after him, host the Rossini Opera Festival, drawing enthusiasts from around the world.", "src_lang": "de", "trg_lang": "en"}
+{"responses_create_params": {"input": [{"role": "user", "content": "Convert the following text into French. Text:\n```\nAll questions have been correctly answered by the treasurer Thomas Kräuchi.\n```"}]}, "src_txt": "All questions have been correctly answered by the treasurer Thomas Kräuchi.", "trg_txt": "Le trésorier Thomas Kräuchi a répondu correctement à toutes les questions.", "src_lang": "en", "trg_lang": "fr"}
+{"responses_create_params": {"input": [{"role": "user", "content": "Convert the following text into Japanese. Text:\n```\nThe next picture shows the atoms emitting photons. Of course, in reality photons are a lot smaller than those in the picture.\n```"}]}, "src_txt": "The next picture shows the atoms emitting photons. Of course, in reality photons are a lot smaller than those in the picture.", "trg_txt": "次の写真は、原子が光子を放出している様子です。もちろん、実際には写真よりもはるかに微小です。", "src_lang": "en", "trg_lang": "ja"}
+{"responses_create_params": {"input": [{"role": "user", "content": "Translate the following text from Simplified Chinese to English: 比赛是在草地上进行的，洞周围的草被修剪得更短，被称为果岭。"}]}, "src_txt": "比赛是在草地上进行的，洞周围的草被修剪得更短，被称为果岭。", "trg_txt": "The game is played on grass, and the grass around the hole is mown shorter and called the green.", "src_lang": "zh-cn", "trg_lang": "en"}
\ No newline at end of file
diff --git a/resources_servers/translation_metricx/data/example_nothink.jsonl b/resources_servers/translation_metricx/data/example_nothink.jsonl
new file mode 100644
index 000000000..42529ab9a
--- /dev/null
+++ b/resources_servers/translation_metricx/data/example_nothink.jsonl
@@ -0,0 +1,5 @@
+{"responses_create_params": {"input": [{"role": "system", "content":"/no_think"}, {"role": "user", "content": "Convert the following text into English. Text:\n```\n¿Por qué malgastar el tiempo buscando las llaves?\n```"}]}, "src_txt": "¿Por qué malgastar el tiempo buscando las llaves?", "trg_txt": "Why waste your time looking for the keys?", "src_lang": "es-us", "trg_lang": "en"}
+{"responses_create_params": {"input": [{"role": "system", "content":"/no_think"}, {"role": "user", "content": "Translate this into English: Ihm wurden das Konservatorium und das Theater gewidmet, in dem jedes Jahr das „Rossini Opera Festival“ stattfindet, das Opernliebhaber aus aller Welt anlockt."}]}, "src_txt": "Ihm wurden das Konservatorium und das Theater gewidmet, in dem jedes Jahr das „Rossini Opera Festival“ stattfindet, das Opernliebhaber aus aller Welt anlockt.", "trg_txt": "Every year, the conservatory and theatre, both named after him, host the Rossini Opera Festival, drawing enthusiasts from around the world.", "src_lang": "de", "trg_lang": "en"}
+{"responses_create_params": {"input": [{"role": "system", "content":"/no_think"}, {"role": "user", "content": "Convert the following text into French. Text:\n```\nAll questions have been correctly answered by the treasurer Thomas Kräuchi.\n```"}]}, "src_txt": "All questions have been correctly answered by the treasurer Thomas Kräuchi.", "trg_txt": "Le trésorier Thomas Kräuchi a répondu correctement à toutes les questions.", "src_lang": "en", "trg_lang": "fr"}
+{"responses_create_params": {"input": [{"role": "system", "content": "/no_think"}, {"role": "user", "content": "Convert the following text into Japanese. Text:\n```\nThe next picture shows the atoms emitting photons. Of course, in reality photons are a lot smaller than those in the picture.\n```"}]}, "src_txt": "The next picture shows the atoms emitting photons. Of course, in reality photons are a lot smaller than those in the picture.", "trg_txt": "次の写真は、原子が光子を放出している様子です。もちろん、実際には写真よりもはるかに微小です。", "src_lang": "en", "trg_lang": "ja"}
+{"responses_create_params": {"input": [{"role": "system", "content": "/no_think"}, {"role": "user", "content": "Translate the following text from Simplified Chinese to English: 比赛是在草地上进行的，洞周围的草被修剪得更短，被称为果岭。"}]}, "src_txt": "比赛是在草地上进行的，洞周围的草被修剪得更短，被称为果岭。", "trg_txt": "The game is played on grass, and the grass around the hole is mown shorter and called the green.", "src_lang": "zh-cn", "trg_lang": "en"}
\ No newline at end of file
diff --git a/resources_servers/translation_metricx/data/riva_mt_v3_nothinkInSys_train_metrics.json b/resources_servers/translation_metricx/data/riva_mt_v3_nothinkInSys_train_metrics.json
new file mode 100644
index 000000000..146d1f6f9
--- /dev/null
+++ b/resources_servers/translation_metricx/data/riva_mt_v3_nothinkInSys_train_metrics.json
@@ -0,0 +1,61 @@
+{
+    "name": "train",
+    "type": "train",
+    "jsonl_fpath": "resources_servers/translation_metricx/data/riva_mt_v3_nothinkInSys_train.jsonl",
+    "num_repeats": 1,
+    "gitlab_identifier": {
+        "dataset_name": "riva_mt_v3_nothinkInSys_train",
+        "version": "0.0.1",
+        "artifact_fpath": "riva_mt_v3_nothinkInSys_train.jsonl"
+    },
+    "license": "NVIDIA Internal Use Only, Do Not Distribute",
+    "Number of examples": 770000,
+    "Number of tools": {
+        "Total # non-null values": 0,
+        "Average": 0.0,
+        "Min": 0.0,
+        "Max": 0.0,
+        "Median": 0.0,
+        "Standard deviation": 0.0
+    },
+    "Json-dumped number of words (proxy for token count)": {
+        "Total # non-null values": 770000,
+        "Average": 26.18,
+        "Min": 10.0,
+        "Max": 267.0,
+        "Median": 24.44,
+        "Standard deviation": 9.86
+    },
+    "Number of turns": {
+        "Total # non-null values": 770000,
+        "Average": 1.0,
+        "Min": 1.0,
+        "Max": 1.0,
+        "Median": 1.0,
+        "Standard deviation": 0.0
+    },
+    "Temperature": {
+        "Total # non-null values": 0,
+        "Average": 0.0,
+        "Min": 0.0,
+        "Max": 0.0,
+        "Median": 0.0,
+        "Standard deviation": 0.0
+    },
+    "src_txt": {
+        "unique_count": 767619,
+        "total_count": 770000
+    },
+    "trg_txt": {
+        "unique_count": 767567,
+        "total_count": 770000
+    },
+    "src_lang": {
+        "unique_count": 12,
+        "total_count": 770000
+    },
+    "trg_lang": {
+        "unique_count": 12,
+        "total_count": 770000
+    }
+}
\ No newline at end of file
diff --git a/resources_servers/translation_metricx/data/riva_mt_v3_nothinkInSys_validation_metrics.json b/resources_servers/translation_metricx/data/riva_mt_v3_nothinkInSys_validation_metrics.json
new file mode 100644
index 000000000..9070a8dee
--- /dev/null
+++ b/resources_servers/translation_metricx/data/riva_mt_v3_nothinkInSys_validation_metrics.json
@@ -0,0 +1,61 @@
+{
+    "name": "validation",
+    "type": "validation",
+    "jsonl_fpath": "resources_servers/translation_metricx/data/riva_mt_v3_nothinkInSys_validation.jsonl",
+    "num_repeats": 1,
+    "gitlab_identifier": {
+        "dataset_name": "riva_mt_v3_nothinkInSys_validation",
+        "version": "0.0.1",
+        "artifact_fpath": "riva_mt_v3_nothinkInSys_validation.jsonl"
+    },
+    "license": "NVIDIA Internal Use Only, Do Not Distribute",
+    "Number of examples": 19940,
+    "Number of tools": {
+        "Total # non-null values": 0,
+        "Average": 0.0,
+        "Min": 0.0,
+        "Max": 0.0,
+        "Median": 0.0,
+        "Standard deviation": 0.0
+    },
+    "Json-dumped number of words (proxy for token count)": {
+        "Total # non-null values": 19940,
+        "Average": 32.68,
+        "Min": 10.0,
+        "Max": 82.0,
+        "Median": 33.05,
+        "Standard deviation": 10.38
+    },
+    "Number of turns": {
+        "Total # non-null values": 19940,
+        "Average": 1.0,
+        "Min": 1.0,
+        "Max": 1.0,
+        "Median": 1.0,
+        "Standard deviation": 0.0
+    },
+    "Temperature": {
+        "Total # non-null values": 0,
+        "Average": 0.0,
+        "Min": 0.0,
+        "Max": 0.0,
+        "Median": 0.0,
+        "Standard deviation": 0.0
+    },
+    "src_txt": {
+        "unique_count": 10965,
+        "total_count": 19940
+    },
+    "trg_txt": {
+        "unique_count": 10965,
+        "total_count": 19940
+    },
+    "src_lang": {
+        "unique_count": 11,
+        "total_count": 19940
+    },
+    "trg_lang": {
+        "unique_count": 11,
+        "total_count": 19940
+    }
+}
\ No newline at end of file
diff --git a/resources_servers/translation_metricx/requirements.txt b/resources_servers/translation_metricx/requirements.txt
new file mode 100644
index 000000000..3950124ba
--- /dev/null
+++ b/resources_servers/translation_metricx/requirements.txt
@@ -0,0 +1,2 @@
+-e nemo-gym[dev] @ ../../
+metricx @ git+https://github.com/hrossnv/metricx.git@main
\ No newline at end of file
diff --git a/resources_servers/translation_metricx/tests/test_app.py b/resources_servers/translation_metricx/tests/test_app.py
new file mode 100644
index 000000000..0cafd5f9a
--- /dev/null
+++ b/resources_servers/translation_metricx/tests/test_app.py
@@ -0,0 +1,199 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import os
+from copy import deepcopy
+from pathlib import Path
+from typing import Any
+from unittest.mock import MagicMock
+
+from pytest import approx, fixture
+
+from nemo_gym import CACHE_DIR
+from nemo_gym.openai_utils import (
+    NeMoGymResponse,
+    NeMoGymResponseCreateParamsNonStreaming,
+    NeMoGymResponseOutputMessage,
+    NeMoGymResponseOutputText,
+)
+from nemo_gym.server_utils import ServerClient
+from resources_servers.translation_metricx.app import (
+    TranslationMetricxResourcesServer,
+    TranslationMetricxResourcesServerConfig,
+    TranslationMetricxVerifyRequest,
+)
+
+
+logger = logging.getLogger(__name__)
+
+os.environ["HF_HOME"] = str(Path(CACHE_DIR) / "hf_cache")
+
+
+class TestApp:
+    @fixture(scope="class")
+    def resources_server(self) -> TranslationMetricxResourcesServer:
+        """We only want to spin up the server once since it has to load the model."""
+        logger.info("Spinning up server with MetricX model...")
+
+        server = TranslationMetricxResourcesServer(
+            config=TranslationMetricxResourcesServerConfig(
+                host="0.0.0.0",
+                port=8080,
+                entrypoint="",
+                name="",
+                use_reference=True,
+                # 1.2B parameter model runs fine on CPU, though tests will take a couple of minutes
+                metricx_model_name="google/metricx-24-hybrid-large-v2p6-bfloat16",
+                tokenizer_name="google/mt5-large",
+                device_map="cpu",
+                max_input_length=1536,
+                output_dir=str(Path(CACHE_DIR) / "metricx_output"),
+            ),
+            server_client=MagicMock(spec=ServerClient),
+        )
+
+        logger.info("Model loaded and server started successfully")
+        return server
+
+    def _create_response(self, id: str, model_response_text: str) -> dict[str, Any]:
+        return NeMoGymResponse(
+            id=id,
+            created_at=1234.5,
+            model="response_model",
+            object="response",
+            parallel_tool_calls=False,
+            tool_choice="none",
+            tools=[],
+            output=[
+                NeMoGymResponseOutputMessage(
+                    id=f"ID for {model_response_text}",
+                    role="assistant",
+                    status="in_progress",
+                    type="message",
+                    content=[NeMoGymResponseOutputText(annotations=[], text=model_response_text, type="output_text")],
+                )
+            ],
+        ).model_dump()
+
+    async def test_verify_identical(self, resources_server: TranslationMetricxResourcesServer) -> None:
+        source_text = "What is the name of your cat?"
+        target_text = "Was ist der Name deiner Katze?"
+        target_lang_name = "German"
+        model_create_params = NeMoGymResponseCreateParamsNonStreaming(
+            input=[
+                {
+                    "role": "user",
+                    "content": f'Translate this into {target_lang_name}: "{source_text}"',
+                }
+            ]
+        )
+        model_response = NeMoGymResponse(**self._create_response("model_response_id", target_text))
+        identical_verify_request = TranslationMetricxVerifyRequest(
+            responses_create_params=deepcopy(model_create_params),
+            response=model_response.model_copy(deep=True),
+            src_txt=source_text,
+            trg_txt=target_text,
+        )
+        identical_verify_response = await resources_server.verify(identical_verify_request)
+        assert identical_verify_response.responses_create_params == model_create_params
+        assert identical_verify_response.response == model_response
+        assert identical_verify_response.src_txt == source_text
+        assert identical_verify_response.trg_txt == target_text
+        assert identical_verify_response.reward == approx(1.0, abs=0.1)
+        assert identical_verify_response.extracted_answer == target_text
+
+        assert sorted(list(identical_verify_response.model_dump())) == [
+            "extracted_answer",
+            "response",
+            "responses_create_params",
+            "reward",
+            "src_txt",
+            "trg_txt",
+        ]
+
+    async def test_verify_identical_without_reference(
+        self, resources_server: TranslationMetricxResourcesServer
+    ) -> None:
+        source_text = "two three"
+        target_text = "zwei drei"
+        target_lang_name = "German"
+        model_create_params = NeMoGymResponseCreateParamsNonStreaming(
+            input=[
+                {
+                    "role": "user",
+                    "content": f'Translate this into {target_lang_name}: "{source_text}"',
+                }
+            ]
+        )
+        model_response = NeMoGymResponse(**self._create_response("model_response_id", target_text))
+        identical_verify_request = TranslationMetricxVerifyRequest(
+            responses_create_params=deepcopy(model_create_params),
+            response=model_response.model_copy(deep=True),
+            src_txt=source_text,
+            trg_txt=None,  # Technically the model config is set up to use a reference but this triggers the same behavior
+        )
+        identical_verify_response = await resources_server.verify(identical_verify_request)
+        assert identical_verify_response.responses_create_params == model_create_params
+        assert identical_verify_response.response == model_response
+        assert identical_verify_response.src_txt == source_text
+        assert identical_verify_response.trg_txt is None
+        assert identical_verify_response.reward == approx(1.0, abs=0.1)
+        assert identical_verify_response.extracted_answer == target_text
+
+        assert sorted(list(identical_verify_response.model_dump())) == [
+            "extracted_answer",
+            "response",
+            "responses_create_params",
+            "reward",
+            "src_txt",
+            "trg_txt",
+        ]
+
+    def test_verify_answer_identical(self, resources_server: TranslationMetricxResourcesServer) -> None:
+        source_text = "two three"
+        target_text = "zwei drei"
+        model_response_text = target_text
+
+        assert resources_server._verify_answer(model_response_text, source_text, target_text) == (
+            approx(1.0, abs=0.1),  # It's a model output so it won't be exact
+            target_text,
+        )
+
+    def test_verify_answer_think_tags(self, resources_server: TranslationMetricxResourcesServer) -> None:
+        source_text = "What is the name of your cat?"
+        target_text = "Was ist der Name deiner Katze?"
+        model_response_text = f"<think></think>\n\n{target_text}"
+
+        assert resources_server._verify_answer(model_response_text, source_text, target_text) == (
+            approx(1.0, abs=0.1),  # It's a model output so it won't be exact
+            target_text,
+        )
+
+    def test_verify_answer_no_match(self, resources_server: TranslationMetricxResourcesServer) -> None:
+        source_text = "What is the name of your cat?"
+        target_text = "Was ist der Name deiner Katze?"
+        model_response_text = "Incorrect translation."
+
+        reward, extracted_answer = resources_server._verify_answer(model_response_text, source_text, target_text)
+        assert reward <= 0.6  # Raw score is around 10 for this example, where 25 is worst
+        assert extracted_answer == model_response_text
+
+    def test_verify_answer_without_reference(self, resources_server: TranslationMetricxResourcesServer) -> None:
+        source_text = "two three"
+        model_response_text = "zwei drei"
+
+        assert resources_server._verify_answer(model_response_text, source_text) == (
+            approx(1.0, abs=0.1),  # It's a model output so it won't be exact
+            model_response_text,
+        )