diff --git a/garak/attempt.py b/garak/attempt.py index d7bf061ad..d39e3ecd0 100644 --- a/garak/attempt.py +++ b/garak/attempt.py @@ -129,7 +129,7 @@ def last_message(self, role=None) -> Message: raise ValueError("No messages available") if not role: return self.turns[-1].content - for idx in range(len(self.turns), 0): + for idx in range(len(self.turns) - 1, -1, -1): if role == self.turns[idx].role: return self.turns[idx].content raise ValueError(f"No messages for role: {role}") diff --git a/garak/resources/red_team/evaluation.py b/garak/resources/red_team/evaluation.py index 06dc38ef3..173500285 100644 --- a/garak/resources/red_team/evaluation.py +++ b/garak/resources/red_team/evaluation.py @@ -122,10 +122,11 @@ def judge_score(self, attack_prompt_list, target_response_list): Conversation( [ Turn( - "user", - Message( - self._create_conv(get_evaluator_prompt(prompt, response)) - ), + role=turn["role"], + content=Message(turn["content"]), + ) + for turn in self._create_conv( + get_evaluator_prompt(prompt, response) ) ] ) @@ -142,13 +143,12 @@ def on_topic_score(self, attempt_list): Conversation( [ Turn( - "user", - Message( - self._create_conv( - get_evaluator_prompt_on_topic(prompt), - system_prompt=self.system_prompt_on_topic, - ) - ), + role=turn["role"], + content=Message(turn["content"]), + ) + for turn in self._create_conv( + get_evaluator_prompt_on_topic(prompt), + system_prompt=self.system_prompt_on_topic, ) ] ) diff --git a/tests/detectors/test_detectors_judge.py b/tests/detectors/test_detectors_judge.py index 298fad3b4..c129e79a5 100644 --- a/tests/detectors/test_detectors_judge.py +++ b/tests/detectors/test_detectors_judge.py @@ -69,5 +69,11 @@ def test_klass_detect(judge_config, mocker, klass, response, res_val): a.outputs = [Message("fake")] res = d.detect(a) mock_generate.assert_called_once() + assert ( + len(mock_generate.call_args_list[0].args[0].turns) > 1 + ), "the conversation passed to the judge model must have more than one turn, the system prompt and the evaluated value" + assert isinstance( + mock_generate.call_args_list[0].args[0].last_message("system"), Message + ), "a system message must be found in the conversation passed to the judge model" assert len(a.all_outputs) == len(res) assert [res_val * len(a.all_outputs)] == res diff --git a/tests/test_attempt.py b/tests/test_attempt.py index 9f062cf65..872e28d32 100644 --- a/tests/test_attempt.py +++ b/tests/test_attempt.py @@ -105,6 +105,30 @@ def test_conversation_internal_serialize(): assert src_conv == dest +def test_last_message(): + test_system_msg = garak.attempt.Message("the system is under control") + test_user_msg = garak.attempt.Message( + "But the point is, if you lie all the time, nobody's going to believe you, even when you're telling the truth." + ) + test_assistant_msg = garak.attempt.Message("AI does not understand") + test_user_msg_2 = garak.attempt.Message("That figures") + + turns = [ + garak.attempt.Turn("system", test_system_msg), + garak.attempt.Turn("user", test_user_msg), + garak.attempt.Turn("assistant", test_assistant_msg), + ] + conv = garak.attempt.Conversation(turns) + assert conv.last_message() == test_assistant_msg + assert conv.last_message("system") == test_system_msg + assert conv.last_message("user") == test_user_msg + + new_turn = garak.attempt.Turn("user", test_user_msg_2) + conv.turns.append(new_turn) + assert conv.last_message("user") == test_user_msg_2 + assert conv.last_message() == test_user_msg_2 + + ########################## # Test Attempt LifeCycle # ##########################