From f1f4834e53fd2327f726323e1f3d97c99fe8bc69 Mon Sep 17 00:00:00 2001 From: Grzegorz Aniol Date: Wed, 24 Dec 2025 13:43:50 +0100 Subject: [PATCH 1/7] Implement new translation tasks for google WMT24++ datasets ref: https://huggingface.co/datasets/google/wmt24pp --- lm_eval/tasks/wmt24pp/README.md | 176 +++++++++++++++++++ lm_eval/tasks/wmt24pp/utils.py | 46 +++++ lm_eval/tasks/wmt24pp/wmt24pp_common.yaml | 38 ++++ lm_eval/tasks/wmt24pp/wmt24pp_en-ar_EG.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-ar_SA.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-bg_BG.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-bn_IN.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-ca_ES.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-cs_CZ.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-da_DK.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-de_DE.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-el_GR.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-es_MX.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-et_EE.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-fa_IR.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-fi_FI.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-fil_PH.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-fr_CA.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-fr_FR.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-gu_IN.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-he_IL.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-hi_IN.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-hr_HR.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-hu_HU.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-id_ID.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-is_IS.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-it_IT.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-ja_JP.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-kn_IN.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-ko_KR.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-lt_LT.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-lv_LV.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-ml_IN.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-mr_IN.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-nl_NL.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-no_NO.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-pa_IN.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-pl_PL.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-pt_BR.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-pt_PT.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-ro_RO.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-ru_RU.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-sk_SK.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-sl_SI.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-sr_RS.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-sv_SE.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-sw_KE.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-sw_TZ.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-ta_IN.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-te_IN.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-th_TH.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-tr_TR.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-uk_UA.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-ur_PK.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-vi_VN.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-zh_CN.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-zh_TW.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_en-zu_ZA.yaml | 12 ++ lm_eval/tasks/wmt24pp/wmt24pp_group.yaml | 68 +++++++ 59 files changed, 988 insertions(+) create mode 100644 lm_eval/tasks/wmt24pp/README.md create mode 100644 lm_eval/tasks/wmt24pp/utils.py create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_common.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-ar_EG.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-ar_SA.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-bg_BG.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-bn_IN.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-ca_ES.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-cs_CZ.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-da_DK.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-de_DE.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-el_GR.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-es_MX.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-et_EE.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-fa_IR.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-fi_FI.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-fil_PH.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-fr_CA.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-fr_FR.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-gu_IN.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-he_IL.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-hi_IN.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-hr_HR.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-hu_HU.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-id_ID.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-is_IS.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-it_IT.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-ja_JP.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-kn_IN.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-ko_KR.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-lt_LT.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-lv_LV.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-ml_IN.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-mr_IN.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-nl_NL.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-no_NO.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-pa_IN.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-pl_PL.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-pt_BR.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-pt_PT.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-ro_RO.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-ru_RU.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-sk_SK.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-sl_SI.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-sr_RS.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-sv_SE.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-sw_KE.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-sw_TZ.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-ta_IN.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-te_IN.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-th_TH.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-tr_TR.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-uk_UA.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-ur_PK.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-vi_VN.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-zh_CN.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-zh_TW.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_en-zu_ZA.yaml create mode 100644 lm_eval/tasks/wmt24pp/wmt24pp_group.yaml diff --git a/lm_eval/tasks/wmt24pp/README.md b/lm_eval/tasks/wmt24pp/README.md new file mode 100644 index 00000000000..2a87b6d56da --- /dev/null +++ b/lm_eval/tasks/wmt24pp/README.md @@ -0,0 +1,176 @@ +# WMT24++ Translation Tasks + +This directory provides YAML-based tasks for evaluating English→X machine +translation on the **WMT24++** benchmark hosted on the Hugging Face Hub as +[`google/wmt24pp`](https://huggingface.co/datasets/google/wmt24pp). + +Each language pair is exposed as a separate task, using consistent +WMT-style generation and metrics. + +## Dataset + +- **HF ID**: `google/wmt24pp` +- **Configs**: one per language pair (e.g. `en-de_DE`, `en-pl_PL`, `en-pt_BR`, ...) +- **Split**: single split (`train`), used here as the evaluation split +- **Fields (per example)**: + - `lp`: language pair, e.g. `"en-de_DE"` + - `domain`: text domain (canary, news, social, speech, literary) + - `document_id`: document identifier + - `segment_id`: global segment identifier + - `is_bad_source`: boolean flag for low-quality sources + - `source`: English source sentence + - `target`: post-edit of `original_target` (recommended reference) + - `original_target`: original reference translation + +In this task family, we: +- **always evaluate English→X** using `source` as input and `target` as reference +- **drop all examples with `is_bad_source == True`** +- **use all domains** (no filtering on `domain`). + +## Tasks + +Common configuration is defined in `wmt24pp_common.yaml` (note the missing file +extension; this is the file referenced by `include: wmt24pp_common.yaml` in every +per-language YAML): + +- `dataset_path: google/wmt24pp` +- `test_split: train` +- `output_type: generate_until` +- `doc_to_text: "Translate English to the target language: {{source}}"` +- `doc_to_target: "{{target}}"` +- `custom_dataset: !function utils.load_wmt24pp_dataset` +- Metrics: **BLEU**, **TER**, **ChrF** (same triple as classic WMT tasks) + +The `lang_pair` in `metadata` is passed to `utils.load_wmt24pp_dataset`, which +loads the corresponding HF config and filters out bad sources. + +Each language pair has its own YAML including the common config, e.g.: + +```yaml +include: wmt24pp_common.yaml + +task: wmt24pp-en-de_DE + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-de_DE" +``` + +The `lang_pair` in `metadata` is passed to `utils.load_wmt24pp_dataset`, which +loads the corresponding HF config and filters out bad sources. + +All available language pairs are listed in the dataset card; in this repo they +are instantiated as tasks named `wmt24pp-`, where `` matches the HF +config (e.g. `wmt24pp-en-pt_BR`). + +### Group + +`wmt24pp_group.yaml` defines a group: + +- `group: wmt24pp` +- `group_alias: WMT24++` +- `task: [wmt24pp-en-de_DE, wmt24pp-en-pl_PL, ...]` +- `aggregate_metric_list` aggregating **ChrF** across all subtasks using + `mean` (weighted by dataset size). + +You can run all WMT24++ tasks via: + +```bash +python -m lm_eval run \ + --model hf --model_args pretrained=... \ + --tasks wmt24pp +``` + +or select any subset of language pairs explicitly: + +```bash +python -m lm_eval run \ + --model hf --model_args pretrained=... \ + --tasks wmt24pp-en-de_DE wmt24pp-en-pl_PL +``` + +You can also provide a chat template: + +```bash +python -m lm_eval run \ + --model hf --model_args pretrained=... \ + --tasks wmt24pp-en-de_DE wmt24pp-en-pl_PL \ + --apply_chat_template ... +``` + +## Example evaluation config + +You can run a subset of language pairs using a YAML config. + +```yaml +model: hf +model_args: + pretrained: Qwen/Qwen2.5-7B-Instruct + dtype: float16 + +tasks: + - wmt24pp-en-pl_PL + +num_fewshot: 0 +batch_size: 1 +max_batch_size: 1 +# device: cuda +limit: 10 + +gen_kwargs: + temperature: 0.0 + max_gen_toks: 1400 + +output_path: ./results/ +log_samples: true + +system_instruction: >- + You are a translation engine. Given an English sentence and a target language, + output ONLY the translation in the specified target language, with no + explanations, quotes, or extra text. + +wandb_args: {} +hf_hub_log_args: {} +``` + +With the configuration in the YAML file, you can run an experiment with the following command: + +```bash +lm_eval run \ + --config my-tasks-config.yaml \ + --apply_chat_template ... \ +``` + +## Metrics + +We follow the same metric setup as the other WMT translation tasks in this +repository, exposing three standard MT metrics: + +- **BLEU** (`bleu`) – via SacreBLEU +- **TER** (`ter`) – Translation Error Rate +- **ChrF++** (`chrf`) – primary metric of interest for WMT24++ (character n‑gram + F-score), matching common reporting practices (e.g. Nemotron-3 Nano 30B). + +All metrics are implemented via `lm_eval.api.metrics` and use SacreBLEU under +the hood. + +## Citation + +Please cite the original WMT24++ paper and the lm-evaluation-harness project +as appropriate when using these tasks in publications. + +``` +@misc{deutsch2025wmt24expandinglanguagecoverage, + title={{WMT24++: Expanding the Language Coverage of WMT24 to 55 Languages & Dialects}}, + author={Daniel Deutsch and Eleftheria Briakou and Isaac Caswell and Mara Finkelstein and Rebecca Galor and Juraj Juraska and Geza Kovacs and Alison Lui and Ricardo Rei and Jason Riesa and Shruti Rijhwani and Parker Riley and Elizabeth Salesky and Firas Trabelsi and Stephanie Winkler and Biao Zhang and Markus Freitag}, + year={2025}, + eprint={2502.12404}, + archivePrefix={arXiv}, + primaryClass={cs.CL}, + url={https://arxiv.org/abs/2502.12404}, +} +``` diff --git a/lm_eval/tasks/wmt24pp/utils.py b/lm_eval/tasks/wmt24pp/utils.py new file mode 100644 index 00000000000..849ca86e740 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/utils.py @@ -0,0 +1,46 @@ +"""Utilities for the WMT24++ translation tasks. + +This module provides a small helper used as `custom_dataset` in YAML-based +ConfigurableTasks. It loads the `google/wmt24pp` dataset for a specific +English→X language pair, filters bad sources, and returns a split dict +compatible with `ConfigurableTask`. +""" + +from __future__ import annotations + +from typing import Any, Dict + +from datasets import Dataset, load_dataset + + +def load_wmt24pp_dataset(*, lang_pair: str, split: str = "train", **kwargs: Any) -> Dict[str, Dataset]: + """Load and filter the WMT24++ dataset for a specific language pair. + + Parameters + ---------- + lang_pair: + Exact value of the `lp` field / HF config name, e.g. "en-de_DE". + split: + Dataset split name to load. WMT24++ exposes a single split ("train"), + which we treat as the evaluation split. + **kwargs: + Extra keyword arguments forwarded to `load_dataset`. + + Returns + ------- + dict[str, datasets.Dataset] + A mapping from split name to filtered dataset, as expected by + `ConfigurableTask.custom_dataset`. + """ + # For WMT24++, the config name is the language pair (`lang_pair`). + # Ignore extraneous kwargs coming from global metadata (e.g. model args + # like `pretrained`, `dtype`, etc.). We only pass arguments that + # `load_dataset` for this builder actually expects. + _ = kwargs # intentionally unused for now + + ds = load_dataset("google/wmt24pp", lang_pair, split=split) + + # Filter out bad sources as recommended by the dataset authors. + ds = ds.filter(lambda ex: not ex["is_bad_source"]) + + return {split: ds} diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_common.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_common.yaml new file mode 100644 index 00000000000..092becff29d --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_common.yaml @@ -0,0 +1,38 @@ +# Common configuration for WMT24++ English→X translation tasks + +# HF dataset information +# Note: we actually load via `custom_dataset`, but this documents the source. +dataset_path: google/wmt24pp +# Each language pair is a separate HF config; see per-language YAMLs. + +# We treat the single available split ("train") as the evaluation split. +training_split: null +validation_split: null +test_split: train + +output_type: generate_until + +# Plain translation: English source → target language +doc_to_text: "Translate English to the target language: {{source}}" +doc_to_target: "{{target}}" + +# Load and filter data via Python helper +custom_dataset: !function utils.load_wmt24pp_dataset + +# WMT-style metrics: BLEU, TER, ChrF +metric_list: + - metric: bleu + - metric: ter + - metric: chrf + +# Greedy decoding, stop at newline (mirrors translation/wmt_common_yaml) +generation_kwargs: + until: + - "\n" + do_sample: false + temperature: 0.0 + +# Zero-shot translation by default +num_fewshot: 0 + +repeats: 1 diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-ar_EG.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-ar_EG.yaml new file mode 100644 index 00000000000..dad533da212 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-ar_EG.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-ar_EG + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-ar_EG" +doc_to_text: "Translate English to Arabic (Egypt): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-ar_SA.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-ar_SA.yaml new file mode 100644 index 00000000000..797ec29a524 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-ar_SA.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-ar_SA + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-ar_SA" +doc_to_text: "Translate English to Arabic (Saudi Arabia): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-bg_BG.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-bg_BG.yaml new file mode 100644 index 00000000000..d5766159e78 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-bg_BG.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-bg_BG + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-bg_BG" +doc_to_text: "Translate English to Bulgarian: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-bn_IN.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-bn_IN.yaml new file mode 100644 index 00000000000..68021140614 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-bn_IN.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-bn_IN + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-bn_IN" +doc_to_text: "Translate English to Bengali (India): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-ca_ES.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-ca_ES.yaml new file mode 100644 index 00000000000..6be46fba346 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-ca_ES.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-ca_ES + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-ca_ES" +doc_to_text: "Translate English to Catalan (Spain): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-cs_CZ.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-cs_CZ.yaml new file mode 100644 index 00000000000..a0ed7ef2584 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-cs_CZ.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-cs_CZ + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-cs_CZ" +doc_to_text: "Translate English to Czech: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-da_DK.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-da_DK.yaml new file mode 100644 index 00000000000..c9f3bcd644d --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-da_DK.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-da_DK + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-da_DK" +doc_to_text: "Translate English to Danish: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-de_DE.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-de_DE.yaml new file mode 100644 index 00000000000..9207bab3fbc --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-de_DE.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-de_DE + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-de_DE" +doc_to_text: "Translate English to German: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-el_GR.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-el_GR.yaml new file mode 100644 index 00000000000..0172f023fe8 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-el_GR.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-el_GR + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-el_GR" +doc_to_text: "Translate English to Greek: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-es_MX.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-es_MX.yaml new file mode 100644 index 00000000000..b34800fa4dc --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-es_MX.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-es_MX + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-es_MX" +doc_to_text: "Translate English to Spanish (Mexico): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-et_EE.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-et_EE.yaml new file mode 100644 index 00000000000..919e65f895c --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-et_EE.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-et_EE + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-et_EE" +doc_to_text: "Translate English to Estonian: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-fa_IR.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-fa_IR.yaml new file mode 100644 index 00000000000..38e99de7953 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-fa_IR.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-fa_IR + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-fa_IR" +doc_to_text: "Translate English to Persian (Iran): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-fi_FI.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-fi_FI.yaml new file mode 100644 index 00000000000..e376a2e43bd --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-fi_FI.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-fi_FI + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-fi_FI" +doc_to_text: "Translate English to Finnish: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-fil_PH.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-fil_PH.yaml new file mode 100644 index 00000000000..8c6e49d3cdb --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-fil_PH.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-fil_PH + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-fil_PH" +doc_to_text: "Translate English to Filipino: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-fr_CA.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-fr_CA.yaml new file mode 100644 index 00000000000..94c12acbd61 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-fr_CA.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-fr_CA + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-fr_CA" +doc_to_text: "Translate English to French (Canada): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-fr_FR.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-fr_FR.yaml new file mode 100644 index 00000000000..9bbe00e1871 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-fr_FR.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-fr_FR + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-fr_FR" +doc_to_text: "Translate English to French (France): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-gu_IN.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-gu_IN.yaml new file mode 100644 index 00000000000..c84dc5b6cc5 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-gu_IN.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-gu_IN + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-gu_IN" +doc_to_text: "Translate English to Gujarati (India): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-he_IL.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-he_IL.yaml new file mode 100644 index 00000000000..ff72d99489c --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-he_IL.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-he_IL + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-he_IL" +doc_to_text: "Translate English to Hebrew: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-hi_IN.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-hi_IN.yaml new file mode 100644 index 00000000000..82c87ed2d7d --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-hi_IN.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-hi_IN + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-hi_IN" +doc_to_text: "Translate English to Hindi (India): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-hr_HR.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-hr_HR.yaml new file mode 100644 index 00000000000..f99f18a2b01 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-hr_HR.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-hr_HR + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-hr_HR" +doc_to_text: "Translate English to Croatian: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-hu_HU.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-hu_HU.yaml new file mode 100644 index 00000000000..af0e2843ec8 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-hu_HU.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-hu_HU + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-hu_HU" +doc_to_text: "Translate English to Hungarian: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-id_ID.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-id_ID.yaml new file mode 100644 index 00000000000..dc8ac5b60ec --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-id_ID.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-id_ID + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-id_ID" +doc_to_text: "Translate English to Indonesian: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-is_IS.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-is_IS.yaml new file mode 100644 index 00000000000..8a7a1030c80 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-is_IS.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-is_IS + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-is_IS" +doc_to_text: "Translate English to Icelandic: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-it_IT.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-it_IT.yaml new file mode 100644 index 00000000000..7bd2bcefcf4 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-it_IT.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-it_IT + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-it_IT" +doc_to_text: "Translate English to Italian: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-ja_JP.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-ja_JP.yaml new file mode 100644 index 00000000000..e09ced1971e --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-ja_JP.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-ja_JP + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-ja_JP" +doc_to_text: "Translate English to Japanese: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-kn_IN.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-kn_IN.yaml new file mode 100644 index 00000000000..5599aa0209d --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-kn_IN.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-kn_IN + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-kn_IN" +doc_to_text: "Translate English to Kannada (India): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-ko_KR.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-ko_KR.yaml new file mode 100644 index 00000000000..6bb33a6ad79 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-ko_KR.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-ko_KR + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-ko_KR" +doc_to_text: "Translate English to Korean: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-lt_LT.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-lt_LT.yaml new file mode 100644 index 00000000000..67387c6e5b7 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-lt_LT.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-lt_LT + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-lt_LT" +doc_to_text: "Translate English to Lithuanian: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-lv_LV.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-lv_LV.yaml new file mode 100644 index 00000000000..28397204da7 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-lv_LV.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-lv_LV + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-lv_LV" +doc_to_text: "Translate English to Latvian: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-ml_IN.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-ml_IN.yaml new file mode 100644 index 00000000000..f822aea5082 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-ml_IN.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-ml_IN + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-ml_IN" +doc_to_text: "Translate English to Malayalam (India): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-mr_IN.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-mr_IN.yaml new file mode 100644 index 00000000000..80c50f9d2cf --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-mr_IN.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-mr_IN + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-mr_IN" +doc_to_text: "Translate English to Marathi (India): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-nl_NL.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-nl_NL.yaml new file mode 100644 index 00000000000..7b9f6f97823 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-nl_NL.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-nl_NL + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-nl_NL" +doc_to_text: "Translate English to Dutch (Netherlands): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-no_NO.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-no_NO.yaml new file mode 100644 index 00000000000..82aca4296ca --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-no_NO.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-no_NO + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-no_NO" +doc_to_text: "Translate English to Norwegian: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-pa_IN.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-pa_IN.yaml new file mode 100644 index 00000000000..9282d474df2 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-pa_IN.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-pa_IN + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-pa_IN" +doc_to_text: "Translate English to Punjabi (India): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-pl_PL.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-pl_PL.yaml new file mode 100644 index 00000000000..46422720c7a --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-pl_PL.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-pl_PL + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-pl_PL" +doc_to_text: "Translate English to Polish: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-pt_BR.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-pt_BR.yaml new file mode 100644 index 00000000000..1ddebc0dc66 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-pt_BR.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-pt_BR + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-pt_BR" +doc_to_text: "Translate English to Brazilian Portuguese: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-pt_PT.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-pt_PT.yaml new file mode 100644 index 00000000000..a16efd4eb47 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-pt_PT.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-pt_PT + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-pt_PT" +doc_to_text: "Translate English to European Portuguese: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-ro_RO.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-ro_RO.yaml new file mode 100644 index 00000000000..be7964046f0 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-ro_RO.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-ro_RO + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-ro_RO" +doc_to_text: "Translate English to Romanian: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-ru_RU.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-ru_RU.yaml new file mode 100644 index 00000000000..ea66158b74d --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-ru_RU.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-ru_RU + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-ru_RU" +doc_to_text: "Translate English to Russian: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-sk_SK.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-sk_SK.yaml new file mode 100644 index 00000000000..59c7dec64ad --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-sk_SK.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-sk_SK + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-sk_SK" +doc_to_text: "Translate English to Slovak: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-sl_SI.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-sl_SI.yaml new file mode 100644 index 00000000000..86d31c61893 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-sl_SI.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-sl_SI + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-sl_SI" +doc_to_text: "Translate English to Slovenian: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-sr_RS.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-sr_RS.yaml new file mode 100644 index 00000000000..d3293f95bc9 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-sr_RS.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-sr_RS + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-sr_RS" +doc_to_text: "Translate English to Serbian: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-sv_SE.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-sv_SE.yaml new file mode 100644 index 00000000000..b1084b2eb29 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-sv_SE.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-sv_SE + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-sv_SE" +doc_to_text: "Translate English to Swedish: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-sw_KE.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-sw_KE.yaml new file mode 100644 index 00000000000..0e42064cc99 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-sw_KE.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-sw_KE + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-sw_KE" +doc_to_text: "Translate English to Swahili (Kenya): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-sw_TZ.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-sw_TZ.yaml new file mode 100644 index 00000000000..0ed7733c259 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-sw_TZ.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-sw_TZ + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-sw_TZ" +doc_to_text: "Translate English to Swahili (Tanzania): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-ta_IN.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-ta_IN.yaml new file mode 100644 index 00000000000..b1007bbad31 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-ta_IN.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-ta_IN + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-ta_IN" +doc_to_text: "Translate English to Tamil (India): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-te_IN.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-te_IN.yaml new file mode 100644 index 00000000000..0ed5f234f9a --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-te_IN.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-te_IN + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-te_IN" +doc_to_text: "Translate English to Telugu (India): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-th_TH.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-th_TH.yaml new file mode 100644 index 00000000000..ff861b866a3 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-th_TH.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-th_TH + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-th_TH" +doc_to_text: "Translate English to Thai: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-tr_TR.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-tr_TR.yaml new file mode 100644 index 00000000000..197084ed8f6 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-tr_TR.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-tr_TR + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-tr_TR" +doc_to_text: "Translate English to Turkish: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-uk_UA.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-uk_UA.yaml new file mode 100644 index 00000000000..72ea95523a6 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-uk_UA.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-uk_UA + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-uk_UA" +doc_to_text: "Translate English to Ukrainian: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-ur_PK.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-ur_PK.yaml new file mode 100644 index 00000000000..01940dd991b --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-ur_PK.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-ur_PK + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-ur_PK" +doc_to_text: "Translate English to Urdu (Pakistan): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-vi_VN.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-vi_VN.yaml new file mode 100644 index 00000000000..aa0c27ade9b --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-vi_VN.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-vi_VN + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-vi_VN" +doc_to_text: "Translate English to Vietnamese: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-zh_CN.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-zh_CN.yaml new file mode 100644 index 00000000000..d45019d34b6 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-zh_CN.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-zh_CN + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-zh_CN" +doc_to_text: "Translate English to Chinese (Simplified, China): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-zh_TW.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-zh_TW.yaml new file mode 100644 index 00000000000..ac7b10d983d --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-zh_TW.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-zh_TW + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-zh_TW" +doc_to_text: "Translate English to Chinese (Traditional, Taiwan): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-zu_ZA.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-zu_ZA.yaml new file mode 100644 index 00000000000..f3a7cf0bd9c --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-zu_ZA.yaml @@ -0,0 +1,12 @@ +include: wmt24pp_common.yaml + +task: wmt24pp-en-zu_ZA + +tag: + - translation + - wmt24pp + +metadata: + version: 1.0 + lang_pair: "en-zu_ZA" +doc_to_text: "Translate English to Zulu (South Africa): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_group.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_group.yaml new file mode 100644 index 00000000000..ef18d85fe31 --- /dev/null +++ b/lm_eval/tasks/wmt24pp/wmt24pp_group.yaml @@ -0,0 +1,68 @@ +group: wmt24pp +group_alias: WMT24++ + +task: + - wmt24pp-en-ar_EG + - wmt24pp-en-ar_SA + - wmt24pp-en-bg_BG + - wmt24pp-en-bn_IN + - wmt24pp-en-ca_ES + - wmt24pp-en-cs_CZ + - wmt24pp-en-da_DK + - wmt24pp-en-de_DE + - wmt24pp-en-el_GR + - wmt24pp-en-es_MX + - wmt24pp-en-et_EE + - wmt24pp-en-fa_IR + - wmt24pp-en-fi_FI + - wmt24pp-en-fil_PH + - wmt24pp-en-fr_CA + - wmt24pp-en-fr_FR + - wmt24pp-en-gu_IN + - wmt24pp-en-he_IL + - wmt24pp-en-hi_IN + - wmt24pp-en-hr_HR + - wmt24pp-en-hu_HU + - wmt24pp-en-id_ID + - wmt24pp-en-is_IS + - wmt24pp-en-it_IT + - wmt24pp-en-ja_JP + - wmt24pp-en-kn_IN + - wmt24pp-en-ko_KR + - wmt24pp-en-lt_LT + - wmt24pp-en-lv_LV + - wmt24pp-en-ml_IN + - wmt24pp-en-mr_IN + - wmt24pp-en-nl_NL + - wmt24pp-en-no_NO + - wmt24pp-en-pa_IN + - wmt24pp-en-pl_PL + - wmt24pp-en-pt_BR + - wmt24pp-en-pt_PT + - wmt24pp-en-ro_RO + - wmt24pp-en-ru_RU + - wmt24pp-en-sk_SK + - wmt24pp-en-sl_SI + - wmt24pp-en-sr_RS + - wmt24pp-en-sv_SE + - wmt24pp-en-sw_KE + - wmt24pp-en-sw_TZ + - wmt24pp-en-ta_IN + - wmt24pp-en-te_IN + - wmt24pp-en-th_TH + - wmt24pp-en-tr_TR + - wmt24pp-en-uk_UA + - wmt24pp-en-ur_PK + - wmt24pp-en-vi_VN + - wmt24pp-en-zh_CN + - wmt24pp-en-zh_TW + - wmt24pp-en-zu_ZA + +aggregate_metric_list: + - metric: chrf + aggregation: mean + higher_is_better: true + weight_by_size: true + +metadata: + version: 1.0 From 14ce5d83c9b520262860c437612e837c65460d77 Mon Sep 17 00:00:00 2001 From: Grzegorz Aniol Date: Thu, 25 Dec 2025 16:50:46 +0100 Subject: [PATCH 2/7] Implement evaluation prompt consistent with the wmt24++ paper --- lm_eval/tasks/wmt24pp/utils.py | 129 ++++++++++++++++--- lm_eval/tasks/wmt24pp/wmt24pp_common.yaml | 5 +- lm_eval/tasks/wmt24pp/wmt24pp_en-ar_EG.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-ar_SA.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-bg_BG.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-bn_IN.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-ca_ES.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-cs_CZ.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-da_DK.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-de_DE.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-el_GR.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-es_MX.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-et_EE.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-fa_IR.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-fi_FI.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-fil_PH.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-fr_CA.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-fr_FR.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-gu_IN.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-he_IL.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-hi_IN.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-hr_HR.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-hu_HU.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-id_ID.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-is_IS.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-it_IT.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-ja_JP.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-kn_IN.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-ko_KR.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-lt_LT.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-lv_LV.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-ml_IN.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-mr_IN.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-nl_NL.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-no_NO.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-pa_IN.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-pl_PL.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-pt_BR.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-pt_PT.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-ro_RO.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-ru_RU.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-sk_SK.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-sl_SI.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-sr_RS.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-sv_SE.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-sw_KE.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-sw_TZ.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-ta_IN.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-te_IN.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-th_TH.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-tr_TR.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-uk_UA.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-ur_PK.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-vi_VN.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-zh_CN.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-zh_TW.yaml | 1 - lm_eval/tasks/wmt24pp/wmt24pp_en-zu_ZA.yaml | 1 - 57 files changed, 116 insertions(+), 73 deletions(-) diff --git a/lm_eval/tasks/wmt24pp/utils.py b/lm_eval/tasks/wmt24pp/utils.py index 849ca86e740..14294bfa9c7 100644 --- a/lm_eval/tasks/wmt24pp/utils.py +++ b/lm_eval/tasks/wmt24pp/utils.py @@ -1,9 +1,9 @@ """Utilities for the WMT24++ translation tasks. -This module provides a small helper used as `custom_dataset` in YAML-based -ConfigurableTasks. It loads the `google/wmt24pp` dataset for a specific -English→X language pair, filters bad sources, and returns a split dict -compatible with `ConfigurableTask`. +This module provides helpers used by YAML-configured ConfigurableTasks. It +exposes the `custom_dataset` loader, along with logic to render the official +WMT24++ prompt template so that all language-pair YAMLs can share a single +`doc_to_text` implementation. """ from __future__ import annotations @@ -12,6 +12,110 @@ from datasets import Dataset, load_dataset +SRC_LANG = "English" + +TARGET_METADATA = { + "ar_EG": {"tgt_lang": "Arabic", "tgt_region": "Egypt"}, + "ar_SA": {"tgt_lang": "Arabic", "tgt_region": "Saudi Arabia"}, + "bg_BG": {"tgt_lang": "Bulgarian", "tgt_region": "Bulgaria"}, + "bn_IN": {"tgt_lang": "Bengali", "tgt_region": "India"}, + "ca_ES": {"tgt_lang": "Catalan", "tgt_region": "Spain"}, + "cs_CZ": {"tgt_lang": "Czech", "tgt_region": "Czechia"}, + "da_DK": {"tgt_lang": "Danish", "tgt_region": "Denmark"}, + "de_DE": {"tgt_lang": "German", "tgt_region": "Germany"}, + "el_GR": {"tgt_lang": "Greek", "tgt_region": "Greece"}, + "es_MX": {"tgt_lang": "Spanish", "tgt_region": "Mexico"}, + "et_EE": {"tgt_lang": "Estonian", "tgt_region": "Estonia"}, + "fa_IR": {"tgt_lang": "Persian", "tgt_region": "Iran"}, + "fi_FI": {"tgt_lang": "Finnish", "tgt_region": "Finland"}, + "fil_PH": {"tgt_lang": "Filipino", "tgt_region": "Philippines"}, + "fr_CA": {"tgt_lang": "French", "tgt_region": "Canada"}, + "fr_FR": {"tgt_lang": "French", "tgt_region": "France"}, + "gu_IN": {"tgt_lang": "Gujarati", "tgt_region": "India"}, + "he_IL": {"tgt_lang": "Hebrew", "tgt_region": "Israel"}, + "hi_IN": {"tgt_lang": "Hindi", "tgt_region": "India"}, + "hr_HR": {"tgt_lang": "Croatian", "tgt_region": "Croatia"}, + "hu_HU": {"tgt_lang": "Hungarian", "tgt_region": "Hungary"}, + "id_ID": {"tgt_lang": "Indonesian", "tgt_region": "Indonesia"}, + "is_IS": {"tgt_lang": "Icelandic", "tgt_region": "Iceland"}, + "it_IT": {"tgt_lang": "Italian", "tgt_region": "Italy"}, + "ja_JP": {"tgt_lang": "Japanese", "tgt_region": "Japan"}, + "kn_IN": {"tgt_lang": "Kannada", "tgt_region": "India"}, + "ko_KR": {"tgt_lang": "Korean", "tgt_region": "South Korea"}, + "lt_LT": {"tgt_lang": "Lithuanian", "tgt_region": "Lithuania"}, + "lv_LV": {"tgt_lang": "Latvian", "tgt_region": "Latvia"}, + "ml_IN": {"tgt_lang": "Malayalam", "tgt_region": "India"}, + "mr_IN": {"tgt_lang": "Marathi", "tgt_region": "India"}, + "nl_NL": {"tgt_lang": "Dutch", "tgt_region": "Netherlands"}, + "no_NO": {"tgt_lang": "Norwegian", "tgt_region": "Norway"}, + "pa_IN": {"tgt_lang": "Punjabi", "tgt_region": "India"}, + "pl_PL": {"tgt_lang": "Polish", "tgt_region": "Poland"}, + "pt_BR": {"tgt_lang": "Portuguese", "tgt_region": "Brazil"}, + "pt_PT": {"tgt_lang": "Portuguese", "tgt_region": "Portugal"}, + "ro_RO": {"tgt_lang": "Romanian", "tgt_region": "Romania"}, + "ru_RU": {"tgt_lang": "Russian", "tgt_region": "Russia"}, + "sk_SK": {"tgt_lang": "Slovak", "tgt_region": "Slovakia"}, + "sl_SI": {"tgt_lang": "Slovenian", "tgt_region": "Slovenia"}, + "sr_RS": {"tgt_lang": "Serbian", "tgt_region": "Serbia"}, + "sv_SE": {"tgt_lang": "Swedish", "tgt_region": "Sweden"}, + "sw_KE": {"tgt_lang": "Swahili", "tgt_region": "Kenya"}, + "sw_TZ": {"tgt_lang": "Swahili", "tgt_region": "Tanzania"}, + "ta_IN": {"tgt_lang": "Tamil", "tgt_region": "India"}, + "te_IN": {"tgt_lang": "Telugu", "tgt_region": "India"}, + "th_TH": {"tgt_lang": "Thai", "tgt_region": "Thailand"}, + "tr_TR": {"tgt_lang": "Turkish", "tgt_region": "Turkey"}, + "uk_UA": {"tgt_lang": "Ukrainian", "tgt_region": "Ukraine"}, + "ur_PK": {"tgt_lang": "Urdu", "tgt_region": "Pakistan"}, + "vi_VN": {"tgt_lang": "Vietnamese", "tgt_region": "Vietnam"}, + "zh_CN": {"tgt_lang": "Chinese", "tgt_region": "China"}, + "zh_TW": {"tgt_lang": "Chinese", "tgt_region": "Taiwan"}, + "zu_ZA": {"tgt_lang": "Zulu", "tgt_region": "South Africa"}, +} + +PROMPT_TEMPLATE = ( + "You are a professional {src_lang} to {tgt_lang} translator, tasked with providing " + "translations suitable for use in {tgt_region} ({tgt_code}). Your goal is to accurately " + "convey the meaning and nuances of the original {src_lang} text while adhering to {tgt_lang} " + "grammar, vocabulary, and cultural sensitivities.\n" + "Please translate the following {src_lang} text into {tgt_lang} ({tgt_code}):\n\n" + "{input_text}\n\n" + "Produce only the {tgt_lang} translation, without any additional explanations or commentary." +) + + +def render_prompt(*, lang_pair: str, source_text: str) -> str: + """Render the official WMT24++ translation prompt for a given language pair.""" + if "-" not in lang_pair: + msg = f"lang_pair must be of the form 'en-XX_YY', got {lang_pair}" + raise ValueError(msg) + + _, tgt_code = lang_pair.split("-", maxsplit=1) + info = TARGET_METADATA.get(tgt_code) + if info is None: + msg = ( + f"Unknown WMT24++ target code '{tgt_code}'. Please add metadata to" + " TARGET_METADATA to render the prompt." + ) + raise KeyError(msg) + + return PROMPT_TEMPLATE.format( + src_lang=SRC_LANG, + tgt_lang=info["tgt_lang"], + tgt_region=info["tgt_region"], + tgt_code=tgt_code, + input_text=source_text, + ) + + +def doc_to_text(doc: Dict[str, Any]) -> str: + """Shared doc_to_text function that renders the WMT24++ prompt.""" + lang_pair = doc.get("lp") + if not lang_pair: + raise KeyError("Expected 'lp' field in WMT24++ example.") + + source = doc.get("source", "") + return render_prompt(lang_pair=lang_pair, source_text=source) + def load_wmt24pp_dataset(*, lang_pair: str, split: str = "train", **kwargs: Any) -> Dict[str, Dataset]: """Load and filter the WMT24++ dataset for a specific language pair. @@ -24,23 +128,16 @@ def load_wmt24pp_dataset(*, lang_pair: str, split: str = "train", **kwargs: Any) Dataset split name to load. WMT24++ exposes a single split ("train"), which we treat as the evaluation split. **kwargs: - Extra keyword arguments forwarded to `load_dataset`. + Extra keyword arguments forwarded to `load_dataset`. Currently unused + but accepted for compatibility with ConfigurableTask metadata plumbing. Returns ------- - dict[str, datasets.Dataset] - A mapping from split name to filtered dataset, as expected by - `ConfigurableTask.custom_dataset`. + dict[str, Dataset] + Mapping from the requested split name to the filtered dataset. """ - # For WMT24++, the config name is the language pair (`lang_pair`). - # Ignore extraneous kwargs coming from global metadata (e.g. model args - # like `pretrained`, `dtype`, etc.). We only pass arguments that - # `load_dataset` for this builder actually expects. - _ = kwargs # intentionally unused for now + _ = kwargs # ignore extraneous metadata ds = load_dataset("google/wmt24pp", lang_pair, split=split) - - # Filter out bad sources as recommended by the dataset authors. ds = ds.filter(lambda ex: not ex["is_bad_source"]) - return {split: ds} diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_common.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_common.yaml index 092becff29d..d9ffeb6add2 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_common.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_common.yaml @@ -12,8 +12,9 @@ test_split: train output_type: generate_until -# Plain translation: English source → target language -doc_to_text: "Translate English to the target language: {{source}}" +# Shared prompt renderer: official WMT24++ instructions +doc_to_text: !function utils.doc_to_text + doc_to_target: "{{target}}" # Load and filter data via Python helper diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-ar_EG.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-ar_EG.yaml index dad533da212..fb9e0287461 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-ar_EG.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-ar_EG.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-ar_EG" -doc_to_text: "Translate English to Arabic (Egypt): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-ar_SA.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-ar_SA.yaml index 797ec29a524..ed6bcf75f7e 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-ar_SA.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-ar_SA.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-ar_SA" -doc_to_text: "Translate English to Arabic (Saudi Arabia): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-bg_BG.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-bg_BG.yaml index d5766159e78..d14cc317afb 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-bg_BG.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-bg_BG.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-bg_BG" -doc_to_text: "Translate English to Bulgarian: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-bn_IN.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-bn_IN.yaml index 68021140614..8c2973a1b1c 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-bn_IN.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-bn_IN.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-bn_IN" -doc_to_text: "Translate English to Bengali (India): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-ca_ES.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-ca_ES.yaml index 6be46fba346..c22a9f9208e 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-ca_ES.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-ca_ES.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-ca_ES" -doc_to_text: "Translate English to Catalan (Spain): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-cs_CZ.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-cs_CZ.yaml index a0ed7ef2584..eacb13e32f0 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-cs_CZ.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-cs_CZ.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-cs_CZ" -doc_to_text: "Translate English to Czech: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-da_DK.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-da_DK.yaml index c9f3bcd644d..98a5112dafe 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-da_DK.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-da_DK.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-da_DK" -doc_to_text: "Translate English to Danish: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-de_DE.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-de_DE.yaml index 9207bab3fbc..65d381f2460 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-de_DE.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-de_DE.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-de_DE" -doc_to_text: "Translate English to German: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-el_GR.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-el_GR.yaml index 0172f023fe8..6f59395d341 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-el_GR.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-el_GR.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-el_GR" -doc_to_text: "Translate English to Greek: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-es_MX.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-es_MX.yaml index b34800fa4dc..be200a37120 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-es_MX.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-es_MX.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-es_MX" -doc_to_text: "Translate English to Spanish (Mexico): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-et_EE.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-et_EE.yaml index 919e65f895c..77d3dcc2ab4 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-et_EE.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-et_EE.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-et_EE" -doc_to_text: "Translate English to Estonian: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-fa_IR.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-fa_IR.yaml index 38e99de7953..8147e2cabcc 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-fa_IR.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-fa_IR.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-fa_IR" -doc_to_text: "Translate English to Persian (Iran): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-fi_FI.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-fi_FI.yaml index e376a2e43bd..965e0a5e03e 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-fi_FI.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-fi_FI.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-fi_FI" -doc_to_text: "Translate English to Finnish: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-fil_PH.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-fil_PH.yaml index 8c6e49d3cdb..ff34f333028 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-fil_PH.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-fil_PH.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-fil_PH" -doc_to_text: "Translate English to Filipino: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-fr_CA.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-fr_CA.yaml index 94c12acbd61..2f1d0c1f228 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-fr_CA.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-fr_CA.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-fr_CA" -doc_to_text: "Translate English to French (Canada): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-fr_FR.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-fr_FR.yaml index 9bbe00e1871..6e5ba544c8e 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-fr_FR.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-fr_FR.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-fr_FR" -doc_to_text: "Translate English to French (France): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-gu_IN.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-gu_IN.yaml index c84dc5b6cc5..d477aaff870 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-gu_IN.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-gu_IN.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-gu_IN" -doc_to_text: "Translate English to Gujarati (India): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-he_IL.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-he_IL.yaml index ff72d99489c..cb667e6e2a8 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-he_IL.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-he_IL.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-he_IL" -doc_to_text: "Translate English to Hebrew: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-hi_IN.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-hi_IN.yaml index 82c87ed2d7d..92221f1f89f 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-hi_IN.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-hi_IN.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-hi_IN" -doc_to_text: "Translate English to Hindi (India): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-hr_HR.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-hr_HR.yaml index f99f18a2b01..6d1fafd8270 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-hr_HR.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-hr_HR.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-hr_HR" -doc_to_text: "Translate English to Croatian: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-hu_HU.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-hu_HU.yaml index af0e2843ec8..ae2ec2a394a 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-hu_HU.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-hu_HU.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-hu_HU" -doc_to_text: "Translate English to Hungarian: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-id_ID.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-id_ID.yaml index dc8ac5b60ec..a0c6e045d5f 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-id_ID.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-id_ID.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-id_ID" -doc_to_text: "Translate English to Indonesian: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-is_IS.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-is_IS.yaml index 8a7a1030c80..0123617f36c 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-is_IS.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-is_IS.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-is_IS" -doc_to_text: "Translate English to Icelandic: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-it_IT.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-it_IT.yaml index 7bd2bcefcf4..2dea3852cbc 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-it_IT.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-it_IT.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-it_IT" -doc_to_text: "Translate English to Italian: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-ja_JP.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-ja_JP.yaml index e09ced1971e..af60ea57244 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-ja_JP.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-ja_JP.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-ja_JP" -doc_to_text: "Translate English to Japanese: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-kn_IN.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-kn_IN.yaml index 5599aa0209d..f4e0db0e18c 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-kn_IN.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-kn_IN.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-kn_IN" -doc_to_text: "Translate English to Kannada (India): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-ko_KR.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-ko_KR.yaml index 6bb33a6ad79..84b6ef8b674 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-ko_KR.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-ko_KR.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-ko_KR" -doc_to_text: "Translate English to Korean: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-lt_LT.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-lt_LT.yaml index 67387c6e5b7..cea6b406ca7 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-lt_LT.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-lt_LT.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-lt_LT" -doc_to_text: "Translate English to Lithuanian: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-lv_LV.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-lv_LV.yaml index 28397204da7..9bd5aa55857 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-lv_LV.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-lv_LV.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-lv_LV" -doc_to_text: "Translate English to Latvian: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-ml_IN.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-ml_IN.yaml index f822aea5082..33f2988eadb 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-ml_IN.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-ml_IN.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-ml_IN" -doc_to_text: "Translate English to Malayalam (India): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-mr_IN.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-mr_IN.yaml index 80c50f9d2cf..042e7e75567 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-mr_IN.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-mr_IN.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-mr_IN" -doc_to_text: "Translate English to Marathi (India): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-nl_NL.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-nl_NL.yaml index 7b9f6f97823..9a7866ce346 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-nl_NL.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-nl_NL.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-nl_NL" -doc_to_text: "Translate English to Dutch (Netherlands): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-no_NO.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-no_NO.yaml index 82aca4296ca..e93346c388d 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-no_NO.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-no_NO.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-no_NO" -doc_to_text: "Translate English to Norwegian: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-pa_IN.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-pa_IN.yaml index 9282d474df2..0bb9666dc4c 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-pa_IN.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-pa_IN.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-pa_IN" -doc_to_text: "Translate English to Punjabi (India): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-pl_PL.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-pl_PL.yaml index 46422720c7a..d34b7f18022 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-pl_PL.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-pl_PL.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-pl_PL" -doc_to_text: "Translate English to Polish: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-pt_BR.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-pt_BR.yaml index 1ddebc0dc66..fb60c9c7f84 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-pt_BR.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-pt_BR.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-pt_BR" -doc_to_text: "Translate English to Brazilian Portuguese: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-pt_PT.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-pt_PT.yaml index a16efd4eb47..c1055e824e1 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-pt_PT.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-pt_PT.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-pt_PT" -doc_to_text: "Translate English to European Portuguese: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-ro_RO.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-ro_RO.yaml index be7964046f0..04dbf23e9ca 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-ro_RO.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-ro_RO.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-ro_RO" -doc_to_text: "Translate English to Romanian: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-ru_RU.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-ru_RU.yaml index ea66158b74d..8a2d4127f95 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-ru_RU.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-ru_RU.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-ru_RU" -doc_to_text: "Translate English to Russian: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-sk_SK.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-sk_SK.yaml index 59c7dec64ad..2e1d50f13b3 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-sk_SK.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-sk_SK.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-sk_SK" -doc_to_text: "Translate English to Slovak: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-sl_SI.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-sl_SI.yaml index 86d31c61893..b48b028da8c 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-sl_SI.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-sl_SI.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-sl_SI" -doc_to_text: "Translate English to Slovenian: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-sr_RS.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-sr_RS.yaml index d3293f95bc9..a5931168814 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-sr_RS.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-sr_RS.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-sr_RS" -doc_to_text: "Translate English to Serbian: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-sv_SE.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-sv_SE.yaml index b1084b2eb29..bb5718e1734 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-sv_SE.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-sv_SE.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-sv_SE" -doc_to_text: "Translate English to Swedish: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-sw_KE.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-sw_KE.yaml index 0e42064cc99..ecca35e664f 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-sw_KE.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-sw_KE.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-sw_KE" -doc_to_text: "Translate English to Swahili (Kenya): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-sw_TZ.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-sw_TZ.yaml index 0ed7733c259..1da95c2d7f3 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-sw_TZ.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-sw_TZ.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-sw_TZ" -doc_to_text: "Translate English to Swahili (Tanzania): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-ta_IN.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-ta_IN.yaml index b1007bbad31..d4dd58e1e0e 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-ta_IN.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-ta_IN.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-ta_IN" -doc_to_text: "Translate English to Tamil (India): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-te_IN.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-te_IN.yaml index 0ed5f234f9a..33a313376e3 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-te_IN.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-te_IN.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-te_IN" -doc_to_text: "Translate English to Telugu (India): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-th_TH.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-th_TH.yaml index ff861b866a3..b13c7bd11d2 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-th_TH.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-th_TH.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-th_TH" -doc_to_text: "Translate English to Thai: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-tr_TR.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-tr_TR.yaml index 197084ed8f6..2ec2d88fd63 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-tr_TR.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-tr_TR.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-tr_TR" -doc_to_text: "Translate English to Turkish: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-uk_UA.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-uk_UA.yaml index 72ea95523a6..09504d0203a 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-uk_UA.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-uk_UA.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-uk_UA" -doc_to_text: "Translate English to Ukrainian: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-ur_PK.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-ur_PK.yaml index 01940dd991b..8eb5bd9eb53 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-ur_PK.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-ur_PK.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-ur_PK" -doc_to_text: "Translate English to Urdu (Pakistan): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-vi_VN.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-vi_VN.yaml index aa0c27ade9b..d10894c66dc 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-vi_VN.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-vi_VN.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-vi_VN" -doc_to_text: "Translate English to Vietnamese: {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-zh_CN.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-zh_CN.yaml index d45019d34b6..7ed98d870b4 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-zh_CN.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-zh_CN.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-zh_CN" -doc_to_text: "Translate English to Chinese (Simplified, China): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-zh_TW.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-zh_TW.yaml index ac7b10d983d..00f164aa3e3 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-zh_TW.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-zh_TW.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-zh_TW" -doc_to_text: "Translate English to Chinese (Traditional, Taiwan): {{source}}" diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_en-zu_ZA.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_en-zu_ZA.yaml index f3a7cf0bd9c..e37e3cda554 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_en-zu_ZA.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_en-zu_ZA.yaml @@ -9,4 +9,3 @@ tag: metadata: version: 1.0 lang_pair: "en-zu_ZA" -doc_to_text: "Translate English to Zulu (South Africa): {{source}}" From 9ffdbe326cd9dd7b1040b31a60a3a782a786f8b9 Mon Sep 17 00:00:00 2001 From: Grzegorz Aniol Date: Thu, 25 Dec 2025 16:51:06 +0100 Subject: [PATCH 3/7] Document new task --- lm_eval/tasks/README.md | 1 + lm_eval/tasks/wmt24pp/README.md | 27 ++++++++++++++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/lm_eval/tasks/README.md b/lm_eval/tasks/README.md index 28bdfa3bf8d..52c647b0108 100644 --- a/lm_eval/tasks/README.md +++ b/lm_eval/tasks/README.md @@ -190,6 +190,7 @@ provided to the individual README.md files for each subfolder. | [winogrande](winogrande/README.md) | A large-scale dataset for coreference resolution, inspired by the Winograd Schema Challenge. | English | | [wmdp](wmdp/README.md) | A benchmark with the objective of minimizing performance, based on potentially-sensitive multiple-choice knowledge questions. | English | | [wmt2016](wmt2016/README.md) | Tasks from the WMT 2016 shared task, focusing on translation between multiple languages. | English, Czech, German, Finnish, Russian, Romanian, Turkish | +| [wmt24pp](wmt24pp/README.md) | English→55 language/dialect translation benchmark built from the Google WMT24++ dataset, evaluated with BLEU/TER/ChrF per language pair. | English→Arabic, European, Indic, East Asian, African, and other WMT24++ target languages (55 total) | | [wsc273](wsc273/README.md) | The Winograd Schema Challenge, a test of commonsense reasoning and coreference resolution. | English | | [xcopa](xcopa/README.md) | Cross-lingual Choice of Plausible Alternatives, testing reasoning in multiple languages. | Estonian, Haitian, Indonesian, Italian, Quechua, Swahili, Tamil, Thai, Turkish, Vietnamese, Chinese | | [xnli](xnli/README.md) | Cross-Lingual Natural Language Inference to test understanding across different languages. | Arabic, Bulgarian, German, Greek, English, Spanish, French, Hindi, Russian, Swahili, Thai, Turkish, Urdu, Vietnamese, Chinese | diff --git a/lm_eval/tasks/wmt24pp/README.md b/lm_eval/tasks/wmt24pp/README.md index 2a87b6d56da..e74282c4acd 100644 --- a/lm_eval/tasks/wmt24pp/README.md +++ b/lm_eval/tasks/wmt24pp/README.md @@ -36,7 +36,7 @@ per-language YAML): - `dataset_path: google/wmt24pp` - `test_split: train` - `output_type: generate_until` -- `doc_to_text: "Translate English to the target language: {{source}}"` +- `doc_to_text: !function utils.doc_to_text` - `doc_to_target: "{{target}}"` - `custom_dataset: !function utils.load_wmt24pp_dataset` - Metrics: **BLEU**, **TER**, **ChrF** (same triple as classic WMT tasks) @@ -158,6 +158,31 @@ repository, exposing three standard MT metrics: All metrics are implemented via `lm_eval.api.metrics` and use SacreBLEU under the hood. +## Task Validity Checklist + +For adding novel benchmarks/datasets to the library: + +- [x] **Is the task an existing benchmark in the literature?** + Yes. WMT24++ extends the official WMT24 benchmark to 55 languages/dialects as +described by Deutsch et al. (2025). +- [x] **Have you referenced the original paper that introduced the task?** + The citation for the WMT24++ paper is provided in the section below. +- [ ] **If yes, does the original paper provide a reference implementation?** + Prompt template and dataset filtering match the reference release. But we didn't replicate full original implementation. + +If other tasks on this dataset are already supported: + +- [x] **Is the "Main" variant of this task clearly denoted?** + Yes. Every YAML task is `wmt24pp-en-` to emphasize the English→X +setup, and the group config exposes the complete benchmark as `wmt24pp`. +- [x] **Have you provided a short sentence on what each new variant adds / evaluates?** + The README explains that each YAML corresponds to a single HF config / language +pair; they all evaluate the same translation direction with identical metrics. +- [x] **Have you noted which published evaluation setups are matched by this variant?** + Yes. See the section above for the specific alignment with the WMT24++ dataset +card: same split (`train`), same bad-source filtering, same post-edited reference, +and the BLEU/TER/ChrF++ metric trio used in the paper/MTME release. + ## Citation Please cite the original WMT24++ paper and the lm-evaluation-harness project From d3ae7b388b9e543eef8280b7d1a44735e08343e3 Mon Sep 17 00:00:00 2001 From: Grzegorz Aniol Date: Thu, 25 Dec 2025 18:07:35 +0100 Subject: [PATCH 4/7] Update README: remove instruction as it's included in user's prompt already. --- lm_eval/tasks/wmt24pp/README.md | 5 ----- 1 file changed, 5 deletions(-) diff --git a/lm_eval/tasks/wmt24pp/README.md b/lm_eval/tasks/wmt24pp/README.md index e74282c4acd..53747a66283 100644 --- a/lm_eval/tasks/wmt24pp/README.md +++ b/lm_eval/tasks/wmt24pp/README.md @@ -128,11 +128,6 @@ gen_kwargs: output_path: ./results/ log_samples: true -system_instruction: >- - You are a translation engine. Given an English sentence and a target language, - output ONLY the translation in the specified target language, with no - explanations, quotes, or extra text. - wandb_args: {} hf_hub_log_args: {} ``` From cb1a102954634a2bb0af4b4a995c0abf5399e271 Mon Sep 17 00:00:00 2001 From: Grzegorz Aniol Date: Sat, 27 Dec 2025 15:45:38 +0100 Subject: [PATCH 5/7] Configure explicitly aggregation metrics --- lm_eval/tasks/wmt24pp/wmt24pp_common.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_common.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_common.yaml index d9ffeb6add2..edc36a23aa6 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_common.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_common.yaml @@ -23,8 +23,14 @@ custom_dataset: !function utils.load_wmt24pp_dataset # WMT-style metrics: BLEU, TER, ChrF metric_list: - metric: bleu + aggregation: bleu + higher_is_better: true - metric: ter + aggregation: ter + higher_is_better: false - metric: chrf + aggregation: chrf + higher_is_better: true # Greedy decoding, stop at newline (mirrors translation/wmt_common_yaml) generation_kwargs: From 6b059fb040ef2ef7f28bf9228f209d6feea34e45 Mon Sep 17 00:00:00 2001 From: Grzegorz Aniol Date: Sun, 28 Dec 2025 13:03:10 +0100 Subject: [PATCH 6/7] Improve prompt --- lm_eval/tasks/wmt24pp/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lm_eval/tasks/wmt24pp/utils.py b/lm_eval/tasks/wmt24pp/utils.py index 14294bfa9c7..27339e79551 100644 --- a/lm_eval/tasks/wmt24pp/utils.py +++ b/lm_eval/tasks/wmt24pp/utils.py @@ -79,7 +79,7 @@ "grammar, vocabulary, and cultural sensitivities.\n" "Please translate the following {src_lang} text into {tgt_lang} ({tgt_code}):\n\n" "{input_text}\n\n" - "Produce only the {tgt_lang} translation, without any additional explanations or commentary." + "Produce only the {tgt_lang} translation, without any additional explanations or commentary:\n\n" ) From e2e8186cb2e1073dafadda375c140cd5d31504be Mon Sep 17 00:00:00 2001 From: Grzegorz Aniol Date: Tue, 30 Dec 2025 13:08:23 +0100 Subject: [PATCH 7/7] Do not override default settings for generation --- lm_eval/tasks/wmt24pp/wmt24pp_common.yaml | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/lm_eval/tasks/wmt24pp/wmt24pp_common.yaml b/lm_eval/tasks/wmt24pp/wmt24pp_common.yaml index edc36a23aa6..4491f82d81d 100644 --- a/lm_eval/tasks/wmt24pp/wmt24pp_common.yaml +++ b/lm_eval/tasks/wmt24pp/wmt24pp_common.yaml @@ -31,15 +31,3 @@ metric_list: - metric: chrf aggregation: chrf higher_is_better: true - -# Greedy decoding, stop at newline (mirrors translation/wmt_common_yaml) -generation_kwargs: - until: - - "\n" - do_sample: false - temperature: 0.0 - -# Zero-shot translation by default -num_fewshot: 0 - -repeats: 1