diff --git a/lm_eval/api/task.py b/lm_eval/api/task.py index 09c558cd082..ee7c1115092 100644 --- a/lm_eval/api/task.py +++ b/lm_eval/api/task.py @@ -1069,18 +1069,20 @@ def build_qa_turn( whether an answer or gen_prefix is provided. """ assert isinstance(q, str), f"Context is not a string! : {q}" + # Check if answer is provided (handle a=0 as valid answer index) + has_answer = a is not None and a != "" msgs = [ Message( "user", q, tgt_delim - if a and not gen_prefix + if has_answer and not gen_prefix else tgt_delim if gen_prefix and requires_delimiter(q, gen_prefix) else "", ) ] - if a is not None and a != "": + if has_answer: answer_text = ( c[a] if (c and isinstance(a, int)) diff --git a/tests/test_fewshot_context.py b/tests/test_fewshot_context.py index 7f569163eff..54a6bd61df1 100644 --- a/tests/test_fewshot_context.py +++ b/tests/test_fewshot_context.py @@ -421,6 +421,36 @@ def test_raises_on_non_string_question(self, task): with pytest.raises(AssertionError, match="not a string"): ConfigurableTask.build_qa_turn(task, q=123, a="A") # type: ignore + def test_answer_index_zero_uses_delimiter(self, task): + """Answer index 0 should still use target delimiter (regression test for #3452). + + When answer is an integer index into choices, a=0 should be treated as a valid + answer, not as falsy. Previously, a=0 caused the target delimiter to be skipped. + """ + choices = ["A", "B", "C", "D"] + msgs = ConfigurableTask.build_qa_turn( + task, q="Question?", c=choices, a=0, tgt_delim=" ", few_delim="\n\n" + ) + + # Should have 2 messages: user question with delimiter, assistant answer + assert len(msgs) == 2 + assert msgs[0].role == "user" + assert msgs[0]._delimiter == " " # delimiter should be applied + assert msgs[1].role == "assistant" + assert msgs[1].content == "A" # choices[0] + assert messages_to_text(msgs) == "Question? A\n\n" + + def test_answer_index_nonzero_uses_delimiter(self, task): + """Answer index > 0 should use target delimiter.""" + choices = ["A", "B", "C", "D"] + msgs = ConfigurableTask.build_qa_turn( + task, q="Question?", c=choices, a=2, tgt_delim=" ", few_delim="\n\n" + ) + + assert msgs[0]._delimiter == " " + assert msgs[1].content == "C" # choices[2] + assert messages_to_text(msgs) == "Question? C\n\n" + # ============================================================================= # Fewshot Context Tests