Skip to content

Commit

Permalink
randomization (#3710)
Browse files Browse the repository at this point in the history
Add randomization to Adversarial Simulator via 2 new keyword arguments.
Enabled by default. The Jailbreak sim internally selects a randomization
seed for both its child simulators to ensure consistency.

Also added some tests for this.
  • Loading branch information
MilesHolland authored Sep 4, 2024
1 parent 6e1d23d commit 5d9c715
Show file tree
Hide file tree
Showing 7 changed files with 1,094,508 additions and 3 deletions.
1 change: 1 addition & 0 deletions src/promptflow-evals/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

### Improvements
- Set the PF_EVALS_BATCH_USE_ASYNC environment variable to True by default to enable asynchronous batch run for async-enabled built-in evaluators, improving performance.
- The `AdversarialSimulator` class now supports randomization of simulation prompts, as well as seeding of said randomization for consistency via two new arguments: `randomize_order` and `randomization_seed`. Randomization is enabled by default. The child class `AdversarialJailbreakSimulator` always uses a `randomization_seed` to synchronize sub-simulators, even if none is provided.

## v0.3.2 (2024-08-13)
### Features Added
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import functools
import logging
import random
from typing import Any, Callable, Dict, List
from typing import Any, Callable, Dict, List, Optional

from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
from azure.identity import DefaultAzureCredential
Expand Down Expand Up @@ -116,6 +116,8 @@ async def __call__(
api_call_delay_sec: int = 0,
concurrent_async_task: int = 3,
jailbreak: bool = False,
randomize_order: bool = True,
randomization_seed: Optional[int] = None,
):
"""
Executes the adversarial simulation against a specified target function asynchronously.
Expand Down Expand Up @@ -150,6 +152,11 @@ async def __call__(
:keyword jailbreak: If set to True, allows breaking out of the conversation flow defined by the scenario.
Defaults to False.
:paramtype jailbreak: bool
:keyword randomize_order: Whether or not the order of the prompts should be randomized. Defaults to True.
:paramtype randomize_order: bool
:keyword randomization_seed: The seed used to randomize prompt selection. If unset, the system's
default seed is used. Defaults to None.
:paramtype randomization_seed: Optional[int]
:return: A list of dictionaries, each representing a simulated conversation. Each dictionary contains:
- 'template_parameters': A dictionary with parameters used in the conversation template,
Expand Down Expand Up @@ -220,7 +227,16 @@ async def __call__(
unit="simulations",
)
for template in templates:
for parameter in template.template_parameters:
parameter_order = list(range(len(template.template_parameters)))
if randomize_order:
# The template parameter lists are persistent across sim runs within a session,
# So randomize a the selection instead of the parameter list directly,
# or a potentially large deep copy.
if randomization_seed is not None:
random.seed(randomization_seed)
random.shuffle(parameter_order)
for index in parameter_order:
parameter = template.template_parameters[index].copy()
if jailbreak:
parameter = self._join_conversation_starter(parameter, random.choice(jailbreak_dataset))
tasks.append(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
# noqa: E501
import functools
import logging
from typing import Any, Callable, Dict
from random import randint
from typing import Any, Callable, Dict, Optional

from azure.identity import DefaultAzureCredential

Expand Down Expand Up @@ -102,6 +103,7 @@ async def __call__(
api_call_retry_sleep_sec: int = 1,
api_call_delay_sec: int = 0,
concurrent_async_task: int = 3,
randomization_seed: Optional[int] = None,
):
"""
Executes the adversarial simulation and jailbreak adversarial simulation
Expand Down Expand Up @@ -134,6 +136,10 @@ async def __call__(
:keyword concurrent_async_task: The number of asynchronous tasks to run concurrently during the simulation.
Defaults to 3.
:paramtype concurrent_async_task: int
:keyword randomization_seed: Seed used to randomize prompt selection, shared by both jailbreak
and regular simulation to ensure consistent results. If not provided, a random seed will be generated
and shared between simulations.
:paramtype randomization_seed: Optional[int]
:return: A list of dictionaries, each representing a simulated conversation. Each dictionary contains:
- 'template_parameters': A dictionary with parameters used in the conversation template,
Expand Down Expand Up @@ -186,6 +192,10 @@ async def __call__(
"""
if scenario not in AdversarialScenario.__members__.values():
raise ValueError("Invalid adversarial scenario")

if not randomization_seed:
randomization_seed = randint(0, 1000000)

regular_sim = AdversarialSimulator(azure_ai_project=self.azure_ai_project, credential=self.credential)
regular_sim_results = await regular_sim(
scenario=scenario,
Expand All @@ -197,6 +207,8 @@ async def __call__(
api_call_delay_sec=api_call_delay_sec,
concurrent_async_task=concurrent_async_task,
jailbreak=False,
randomize_order=True,
randomization_seed=randomization_seed,
)
jb_sim = AdversarialSimulator(azure_ai_project=self.azure_ai_project, credential=self.credential)
jb_sim_results = await jb_sim(
Expand All @@ -209,5 +221,7 @@ async def __call__(
api_call_delay_sec=api_call_delay_sec,
concurrent_async_task=concurrent_async_task,
jailbreak=True,
randomize_order=True,
randomization_seed=randomization_seed,
)
return {"jailbreak": jb_sim_results, "regular": regular_sim_results}
230 changes: 230 additions & 0 deletions src/promptflow-evals/tests/evals/e2etests/test_adv_simulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,3 +360,233 @@ async def callback(
)
)
assert len(outputs) == 1

@pytest.mark.usefixtures("vcr_recording")
def test_adv_sim_order_randomness_with_jailbreak(self, azure_cred, project_scope):
os.environ.pop("RAI_SVC_URL", None)
from promptflow.evals.synthetic import AdversarialScenario, AdversarialSimulator

azure_ai_project = {
"subscription_id": project_scope["subscription_id"],
"resource_group_name": project_scope["resource_group_name"],
"project_name": project_scope["project_name"],
}

async def callback(
messages: List[Dict], stream: bool = False, session_state: Any = None, context: Dict[str, Any] = None
) -> dict:
question = messages["messages"][0]["content"]

formatted_response = {"content": question, "role": "assistant"}
messages["messages"].append(formatted_response)
return {
"messages": messages["messages"],
"stream": stream,
"session_state": session_state,
"context": context,
}

simulator = AdversarialSimulator(azure_ai_project=azure_ai_project, credential=azure_cred)

outputs1 = asyncio.run(
simulator(
scenario=AdversarialScenario.ADVERSARIAL_REWRITE,
max_conversation_turns=1,
max_simulation_results=1,
target=callback,
api_call_retry_limit=3,
api_call_retry_sleep_sec=1,
api_call_delay_sec=30,
concurrent_async_task=1,
jailbreak=True,
randomization_seed=1,
)
)

outputs2 = asyncio.run(
simulator(
scenario=AdversarialScenario.ADVERSARIAL_REWRITE,
max_conversation_turns=1,
max_simulation_results=1,
target=callback,
api_call_retry_limit=3,
api_call_retry_sleep_sec=1,
api_call_delay_sec=30,
concurrent_async_task=1,
jailbreak=True,
randomization_seed=1,
)
)

outputs3 = asyncio.run(
simulator(
scenario=AdversarialScenario.ADVERSARIAL_REWRITE,
max_conversation_turns=1,
max_simulation_results=1,
target=callback,
api_call_retry_limit=3,
api_call_retry_sleep_sec=1,
api_call_delay_sec=30,
concurrent_async_task=1,
jailbreak=True,
randomization_seed=2,
)
)
# Make sure that outputs 1 and 2 are identical, but not identical to 3
assert outputs1[0]["messages"][0] == outputs2[0]["messages"][0]
assert outputs1[0]["messages"][0] != outputs3[0]["messages"][0]

@pytest.mark.usefixtures("vcr_recording")
def test_adv_sim_order_randomness(self, azure_cred, project_scope):
os.environ.pop("RAI_SVC_URL", None)
from promptflow.evals.synthetic import AdversarialScenario, AdversarialSimulator

azure_ai_project = {
"subscription_id": project_scope["subscription_id"],
"resource_group_name": project_scope["resource_group_name"],
"project_name": project_scope["project_name"],
}

async def callback(
messages: List[Dict], stream: bool = False, session_state: Any = None, context: Dict[str, Any] = None
) -> dict:
question = messages["messages"][0]["content"]

formatted_response = {"content": question, "role": "assistant"}
messages["messages"].append(formatted_response)
return {
"messages": messages["messages"],
"stream": stream,
"session_state": session_state,
"context": context,
}

simulator = AdversarialSimulator(azure_ai_project=azure_ai_project, credential=azure_cred)

outputs1 = asyncio.run(
simulator(
scenario=AdversarialScenario.ADVERSARIAL_REWRITE,
max_conversation_turns=1,
max_simulation_results=1,
target=callback,
api_call_retry_limit=3,
api_call_retry_sleep_sec=1,
api_call_delay_sec=30,
concurrent_async_task=1,
jailbreak=False,
randomization_seed=1,
)
)

outputs2 = asyncio.run(
simulator(
scenario=AdversarialScenario.ADVERSARIAL_REWRITE,
max_conversation_turns=1,
max_simulation_results=1,
target=callback,
api_call_retry_limit=3,
api_call_retry_sleep_sec=1,
api_call_delay_sec=30,
concurrent_async_task=1,
jailbreak=False,
randomization_seed=1,
)
)

outputs3 = asyncio.run(
simulator(
scenario=AdversarialScenario.ADVERSARIAL_REWRITE,
max_conversation_turns=1,
max_simulation_results=1,
target=callback,
api_call_retry_limit=3,
api_call_retry_sleep_sec=1,
api_call_delay_sec=30,
concurrent_async_task=1,
jailbreak=False,
randomization_seed=2,
)
)
# Make sure that outputs 1 and 2 are identical, but not identical to 3
assert outputs1[0]["messages"][0] == outputs2[0]["messages"][0]
assert outputs1[0]["messages"][0] != outputs3[0]["messages"][0]

@pytest.mark.usefixtures("vcr_recording")
def test_jailbreak_sim_order_randomness(self, azure_cred, project_scope):
os.environ.pop("RAI_SVC_URL", None)
from promptflow.evals.synthetic import AdversarialScenario, JailbreakAdversarialSimulator

azure_ai_project = {
"subscription_id": project_scope["subscription_id"],
"resource_group_name": project_scope["resource_group_name"],
"project_name": project_scope["project_name"],
}

async def callback(
messages: List[Dict], stream: bool = False, session_state: Any = None, context: Dict[str, Any] = None
) -> dict:
question = messages["messages"][0]["content"]

formatted_response = {"content": question, "role": "assistant"}
messages["messages"].append(formatted_response)
return {
"messages": messages["messages"],
"stream": stream,
"session_state": session_state,
"context": context,
}

simulator = JailbreakAdversarialSimulator(azure_ai_project=azure_ai_project, credential=azure_cred)

outputs1 = asyncio.run(
simulator(
scenario=AdversarialScenario.ADVERSARIAL_REWRITE,
max_conversation_turns=1,
max_simulation_results=1,
target=callback,
api_call_retry_limit=3,
api_call_retry_sleep_sec=1,
api_call_delay_sec=30,
concurrent_async_task=1,
randomization_seed=1,
)
)

outputs2 = asyncio.run(
simulator(
scenario=AdversarialScenario.ADVERSARIAL_REWRITE,
max_conversation_turns=1,
max_simulation_results=1,
target=callback,
api_call_retry_limit=3,
api_call_retry_sleep_sec=1,
api_call_delay_sec=30,
concurrent_async_task=1,
randomization_seed=1,
)
)

outputs3 = asyncio.run(
simulator(
scenario=AdversarialScenario.ADVERSARIAL_REWRITE,
max_conversation_turns=1,
max_simulation_results=1,
target=callback,
api_call_retry_limit=3,
api_call_retry_sleep_sec=1,
api_call_delay_sec=30,
concurrent_async_task=1,
randomization_seed=2,
)
)
# Make sure the regular prompt exists within the jailbroken equivalent, but also that they aren't identical.
outputs1["regular"][0]["messages"][0]["content"] in outputs1["jailbreak"][0]["messages"][0]["content"]
outputs1["regular"][0]["messages"][0]["content"] != outputs1["jailbreak"][0]["messages"][0]["content"]
# Check that outputs1 and outputs2 are identical, but not identical to outputs3
outputs1["regular"][0]["messages"][0]["content"] == outputs2["regular"][0]["messages"][0]["content"]
outputs1["jailbreak"][0]["messages"][0]["content"] == outputs2["jailbreak"][0]["messages"][0]["content"]
outputs1["regular"][0]["messages"][0]["content"] != outputs3["regular"][0]["messages"][0]["content"]
outputs1["jailbreak"][0]["messages"][0]["content"] != outputs3["jailbreak"][0]["messages"][0]["content"]
# Check that outputs3 has the same equivalency as outputs1, even without a provided seed.
outputs3["regular"][0]["messages"][0]["content"] in outputs3["jailbreak"][0]["messages"][0]["content"]
outputs3["regular"][0]["messages"][0]["content"] != outputs3["jailbreak"][0]["messages"][0]["content"]
Loading

0 comments on commit 5d9c715

Please sign in to comment.