randomization (#3710)

Add randomization to Adversarial Simulator via 2 new keyword arguments. Enabled by default. The Jailbreak sim internally selects a randomization seed for both its child simulators to ensure consistency. Also added some tests for this.
microsoft · Sep 4, 2024 · 5d9c715 · 5d9c715
1 parent 6e1d23d
commit 5d9c715
Show file tree

Hide file tree

Showing 7 changed files with 1,094,508 additions and 3 deletions.
diff --git a/src/promptflow-evals/CHANGELOG.md b/src/promptflow-evals/CHANGELOG.md
@@ -9,6 +9,7 @@
 
 ### Improvements
 - Set the PF_EVALS_BATCH_USE_ASYNC environment variable to True by default to enable asynchronous batch run for async-enabled built-in evaluators, improving performance.
+- The `AdversarialSimulator` class now supports randomization of simulation prompts, as well as seeding of said randomization for consistency via two new arguments: `randomize_order` and `randomization_seed`. Randomization is enabled by default. The child class `AdversarialJailbreakSimulator` always uses a `randomization_seed` to synchronize sub-simulators, even if none is provided.
 
 ## v0.3.2 (2024-08-13)
 ### Features Added

diff --git a/src/promptflow-evals/promptflow/evals/synthetic/adversarial_simulator.py b/src/promptflow-evals/promptflow/evals/synthetic/adversarial_simulator.py
@@ -6,7 +6,7 @@
 import functools
 import logging
 import random
-from typing import Any, Callable, Dict, List
+from typing import Any, Callable, Dict, List, Optional
 
 from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
 from azure.identity import DefaultAzureCredential
@@ -116,6 +116,8 @@ async def __call__(
         api_call_delay_sec: int = 0,
         concurrent_async_task: int = 3,
         jailbreak: bool = False,
+        randomize_order: bool = True,
+        randomization_seed: Optional[int] = None,
     ):
         """
         Executes the adversarial simulation against a specified target function asynchronously.
@@ -150,6 +152,11 @@ async def __call__(
         :keyword jailbreak: If set to True, allows breaking out of the conversation flow defined by the scenario.
             Defaults to False.
         :paramtype jailbreak: bool
+        :keyword randomize_order: Whether or not the order of the prompts should be randomized. Defaults to True.
+        :paramtype randomize_order: bool
+        :keyword randomization_seed: The seed used to randomize prompt selection. If unset, the system's
+            default seed is used. Defaults to None.
+        :paramtype randomization_seed: Optional[int]
         :return: A list of dictionaries, each representing a simulated conversation. Each dictionary contains:
 
          - 'template_parameters': A dictionary with parameters used in the conversation template,
@@ -220,7 +227,16 @@ async def __call__(
             unit="simulations",
         )
         for template in templates:
-            for parameter in template.template_parameters:
+            parameter_order = list(range(len(template.template_parameters)))
+            if randomize_order:
+                # The template parameter lists are persistent across sim runs within a session,
+                # So randomize a the selection instead of the parameter list directly,
+                # or a potentially large deep copy.
+                if randomization_seed is not None:
+                    random.seed(randomization_seed)
+                random.shuffle(parameter_order)
+            for index in parameter_order:
+                parameter = template.template_parameters[index].copy()
                 if jailbreak:
                     parameter = self._join_conversation_starter(parameter, random.choice(jailbreak_dataset))
                 tasks.append(

diff --git a/src/promptflow-evals/promptflow/evals/synthetic/jailbreak_adversarial_simulator.py b/src/promptflow-evals/promptflow/evals/synthetic/jailbreak_adversarial_simulator.py
@@ -4,7 +4,8 @@
 # noqa: E501
 import functools
 import logging
-from typing import Any, Callable, Dict
+from random import randint
+from typing import Any, Callable, Dict, Optional
 
 from azure.identity import DefaultAzureCredential
 
@@ -102,6 +103,7 @@ async def __call__(
         api_call_retry_sleep_sec: int = 1,
         api_call_delay_sec: int = 0,
         concurrent_async_task: int = 3,
+        randomization_seed: Optional[int] = None,
     ):
         """
         Executes the adversarial simulation and jailbreak adversarial simulation
@@ -134,6 +136,10 @@ async def __call__(
         :keyword concurrent_async_task: The number of asynchronous tasks to run concurrently during the simulation.
             Defaults to 3.
         :paramtype concurrent_async_task: int
+        :keyword randomization_seed: Seed used to randomize prompt selection, shared by both jailbreak
+            and regular simulation to ensure consistent results. If not provided, a random seed will be generated
+            and shared between simulations.
+        :paramtype randomization_seed: Optional[int]
         :return: A list of dictionaries, each representing a simulated conversation. Each dictionary contains:
 
          - 'template_parameters': A dictionary with parameters used in the conversation template,
@@ -186,6 +192,10 @@ async def __call__(
         """
         if scenario not in AdversarialScenario.__members__.values():
             raise ValueError("Invalid adversarial scenario")
+
+        if not randomization_seed:
+            randomization_seed = randint(0, 1000000)
+
         regular_sim = AdversarialSimulator(azure_ai_project=self.azure_ai_project, credential=self.credential)
         regular_sim_results = await regular_sim(
             scenario=scenario,
@@ -197,6 +207,8 @@ async def __call__(
             api_call_delay_sec=api_call_delay_sec,
             concurrent_async_task=concurrent_async_task,
             jailbreak=False,
+            randomize_order=True,
+            randomization_seed=randomization_seed,
         )
         jb_sim = AdversarialSimulator(azure_ai_project=self.azure_ai_project, credential=self.credential)
         jb_sim_results = await jb_sim(
@@ -209,5 +221,7 @@ async def __call__(
             api_call_delay_sec=api_call_delay_sec,
             concurrent_async_task=concurrent_async_task,
             jailbreak=True,
+            randomize_order=True,
+            randomization_seed=randomization_seed,
         )
         return {"jailbreak": jb_sim_results, "regular": regular_sim_results}
diff --git a/src/promptflow-evals/tests/evals/e2etests/test_adv_simulator.py b/src/promptflow-evals/tests/evals/e2etests/test_adv_simulator.py
@@ -360,3 +360,233 @@ async def callback(
             )
         )
         assert len(outputs) == 1
+
+    @pytest.mark.usefixtures("vcr_recording")
+    def test_adv_sim_order_randomness_with_jailbreak(self, azure_cred, project_scope):
+        os.environ.pop("RAI_SVC_URL", None)
+        from promptflow.evals.synthetic import AdversarialScenario, AdversarialSimulator
+
+        azure_ai_project = {
+            "subscription_id": project_scope["subscription_id"],
+            "resource_group_name": project_scope["resource_group_name"],
+            "project_name": project_scope["project_name"],
+        }
+
+        async def callback(
+            messages: List[Dict], stream: bool = False, session_state: Any = None, context: Dict[str, Any] = None
+        ) -> dict:
+            question = messages["messages"][0]["content"]
+
+            formatted_response = {"content": question, "role": "assistant"}
+            messages["messages"].append(formatted_response)
+            return {
+                "messages": messages["messages"],
+                "stream": stream,
+                "session_state": session_state,
+                "context": context,
+            }
+
+        simulator = AdversarialSimulator(azure_ai_project=azure_ai_project, credential=azure_cred)
+
+        outputs1 = asyncio.run(
+            simulator(
+                scenario=AdversarialScenario.ADVERSARIAL_REWRITE,
+                max_conversation_turns=1,
+                max_simulation_results=1,
+                target=callback,
+                api_call_retry_limit=3,
+                api_call_retry_sleep_sec=1,
+                api_call_delay_sec=30,
+                concurrent_async_task=1,
+                jailbreak=True,
+                randomization_seed=1,
+            )
+        )
+
+        outputs2 = asyncio.run(
+            simulator(
+                scenario=AdversarialScenario.ADVERSARIAL_REWRITE,
+                max_conversation_turns=1,
+                max_simulation_results=1,
+                target=callback,
+                api_call_retry_limit=3,
+                api_call_retry_sleep_sec=1,
+                api_call_delay_sec=30,
+                concurrent_async_task=1,
+                jailbreak=True,
+                randomization_seed=1,
+            )
+        )
+
+        outputs3 = asyncio.run(
+            simulator(
+                scenario=AdversarialScenario.ADVERSARIAL_REWRITE,
+                max_conversation_turns=1,
+                max_simulation_results=1,
+                target=callback,
+                api_call_retry_limit=3,
+                api_call_retry_sleep_sec=1,
+                api_call_delay_sec=30,
+                concurrent_async_task=1,
+                jailbreak=True,
+                randomization_seed=2,
+            )
+        )
+        # Make sure that outputs 1 and 2 are identical, but not identical to 3
+        assert outputs1[0]["messages"][0] == outputs2[0]["messages"][0]
+        assert outputs1[0]["messages"][0] != outputs3[0]["messages"][0]
+
+    @pytest.mark.usefixtures("vcr_recording")
+    def test_adv_sim_order_randomness(self, azure_cred, project_scope):
+        os.environ.pop("RAI_SVC_URL", None)
+        from promptflow.evals.synthetic import AdversarialScenario, AdversarialSimulator
+
+        azure_ai_project = {
+            "subscription_id": project_scope["subscription_id"],
+            "resource_group_name": project_scope["resource_group_name"],
+            "project_name": project_scope["project_name"],
+        }
+
+        async def callback(
+            messages: List[Dict], stream: bool = False, session_state: Any = None, context: Dict[str, Any] = None
+        ) -> dict:
+            question = messages["messages"][0]["content"]
+
+            formatted_response = {"content": question, "role": "assistant"}
+            messages["messages"].append(formatted_response)
+            return {
+                "messages": messages["messages"],
+                "stream": stream,
+                "session_state": session_state,
+                "context": context,
+            }
+
+        simulator = AdversarialSimulator(azure_ai_project=azure_ai_project, credential=azure_cred)
+
+        outputs1 = asyncio.run(
+            simulator(
+                scenario=AdversarialScenario.ADVERSARIAL_REWRITE,
+                max_conversation_turns=1,
+                max_simulation_results=1,
+                target=callback,
+                api_call_retry_limit=3,
+                api_call_retry_sleep_sec=1,
+                api_call_delay_sec=30,
+                concurrent_async_task=1,
+                jailbreak=False,
+                randomization_seed=1,
+            )
+        )
+
+        outputs2 = asyncio.run(
+            simulator(
+                scenario=AdversarialScenario.ADVERSARIAL_REWRITE,
+                max_conversation_turns=1,
+                max_simulation_results=1,
+                target=callback,
+                api_call_retry_limit=3,
+                api_call_retry_sleep_sec=1,
+                api_call_delay_sec=30,
+                concurrent_async_task=1,
+                jailbreak=False,
+                randomization_seed=1,
+            )
+        )
+
+        outputs3 = asyncio.run(
+            simulator(
+                scenario=AdversarialScenario.ADVERSARIAL_REWRITE,
+                max_conversation_turns=1,
+                max_simulation_results=1,
+                target=callback,
+                api_call_retry_limit=3,
+                api_call_retry_sleep_sec=1,
+                api_call_delay_sec=30,
+                concurrent_async_task=1,
+                jailbreak=False,
+                randomization_seed=2,
+            )
+        )
+        # Make sure that outputs 1 and 2 are identical, but not identical to 3
+        assert outputs1[0]["messages"][0] == outputs2[0]["messages"][0]
+        assert outputs1[0]["messages"][0] != outputs3[0]["messages"][0]
+
+    @pytest.mark.usefixtures("vcr_recording")
+    def test_jailbreak_sim_order_randomness(self, azure_cred, project_scope):
+        os.environ.pop("RAI_SVC_URL", None)
+        from promptflow.evals.synthetic import AdversarialScenario, JailbreakAdversarialSimulator
+
+        azure_ai_project = {
+            "subscription_id": project_scope["subscription_id"],
+            "resource_group_name": project_scope["resource_group_name"],
+            "project_name": project_scope["project_name"],
+        }
+
+        async def callback(
+            messages: List[Dict], stream: bool = False, session_state: Any = None, context: Dict[str, Any] = None
+        ) -> dict:
+            question = messages["messages"][0]["content"]
+
+            formatted_response = {"content": question, "role": "assistant"}
+            messages["messages"].append(formatted_response)
+            return {
+                "messages": messages["messages"],
+                "stream": stream,
+                "session_state": session_state,
+                "context": context,
+            }
+
+        simulator = JailbreakAdversarialSimulator(azure_ai_project=azure_ai_project, credential=azure_cred)
+
+        outputs1 = asyncio.run(
+            simulator(
+                scenario=AdversarialScenario.ADVERSARIAL_REWRITE,
+                max_conversation_turns=1,
+                max_simulation_results=1,
+                target=callback,
+                api_call_retry_limit=3,
+                api_call_retry_sleep_sec=1,
+                api_call_delay_sec=30,
+                concurrent_async_task=1,
+                randomization_seed=1,
+            )
+        )
+
+        outputs2 = asyncio.run(
+            simulator(
+                scenario=AdversarialScenario.ADVERSARIAL_REWRITE,
+                max_conversation_turns=1,
+                max_simulation_results=1,
+                target=callback,
+                api_call_retry_limit=3,
+                api_call_retry_sleep_sec=1,
+                api_call_delay_sec=30,
+                concurrent_async_task=1,
+                randomization_seed=1,
+            )
+        )
+
+        outputs3 = asyncio.run(
+            simulator(
+                scenario=AdversarialScenario.ADVERSARIAL_REWRITE,
+                max_conversation_turns=1,
+                max_simulation_results=1,
+                target=callback,
+                api_call_retry_limit=3,
+                api_call_retry_sleep_sec=1,
+                api_call_delay_sec=30,
+                concurrent_async_task=1,
+                randomization_seed=2,
+            )
+        )
+        # Make sure the regular prompt exists within the jailbroken equivalent, but also that they aren't identical.
+        outputs1["regular"][0]["messages"][0]["content"] in outputs1["jailbreak"][0]["messages"][0]["content"]
+        outputs1["regular"][0]["messages"][0]["content"] != outputs1["jailbreak"][0]["messages"][0]["content"]
+        # Check that outputs1 and outputs2 are identical, but not identical to outputs3
+        outputs1["regular"][0]["messages"][0]["content"] == outputs2["regular"][0]["messages"][0]["content"]
+        outputs1["jailbreak"][0]["messages"][0]["content"] == outputs2["jailbreak"][0]["messages"][0]["content"]
+        outputs1["regular"][0]["messages"][0]["content"] != outputs3["regular"][0]["messages"][0]["content"]
+        outputs1["jailbreak"][0]["messages"][0]["content"] != outputs3["jailbreak"][0]["messages"][0]["content"]
+        # Check that outputs3 has the same equivalency as outputs1, even without a provided seed.
+        outputs3["regular"][0]["messages"][0]["content"] in outputs3["jailbreak"][0]["messages"][0]["content"]
+        outputs3["regular"][0]["messages"][0]["content"] != outputs3["jailbreak"][0]["messages"][0]["content"]