Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding tool handling sample and the recruiting scenario for the CallAutomation_AzOpenAI_Voice sample #181

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -348,3 +348,6 @@ MigrationBackup/

# Ionide (cross platform F# VS Code tools) working folder
.ionide/

# Azure OpenAI Voice Call Automation developer settings
**/appsettings.Development.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,18 @@
using Azure.AI.OpenAI;
using System.ClientModel;
using Azure.Communication.CallAutomation;
using CliWrap.Builders;
using System.Text.Json;

#pragma warning disable OPENAI002
namespace CallAutomationOpenAI
{
public class JobOfferParameters
{
public string CandidateId { get; set; }
public string JobOfferId { get; set; }
}

public class AzureOpenAIService
{
private WebSocket m_webSocket;
Expand All @@ -18,10 +26,10 @@ public class AzureOpenAIService
private string m_answerPromptSystemTemplate = "You are an AI assistant that helps people find information.";

public AzureOpenAIService(AcsMediaStreamingHandler mediaStreaming, IConfiguration configuration)
{
{
m_mediaStreaming = mediaStreaming;
m_cts = new CancellationTokenSource();
m_aiSession = CreateAISessionAsync(configuration).GetAwaiter().GetResult();
m_aiSession = CreateAISessionAsync(configuration).GetAwaiter().GetResult();
m_memoryStream = new MemoryStream();
}

Expand All @@ -40,14 +48,15 @@ private async Task<RealtimeConversationSession> CreateAISessionAsync(IConfigurat

var aiClient = new AzureOpenAIClient(new Uri(openAiUri), new ApiKeyCredential(openAiKey));
var RealtimeCovnClient = aiClient.GetRealtimeConversationClient(openAiModelName);
var session = await RealtimeCovnClient.StartConversationSessionAsync();
var session = await RealtimeCovnClient.StartConversationSessionAsync();

// Session options control connection-wide behavior shared across all conversations,
// including audio input format and voice activity detection settings.
ConversationSessionOptions sessionOptions = new()
{
Instructions = systemPrompt,
Voice = ConversationVoice.Alloy,
Tools = { AcceptJobOfferTool() },
InputAudioFormat = ConversationAudioFormat.Pcm16,
OutputAudioFormat = ConversationAudioFormat.Pcm16,
InputTranscriptionOptions = new()
Expand All @@ -58,6 +67,9 @@ private async Task<RealtimeConversationSession> CreateAISessionAsync(IConfigurat
};

await session.ConfigureSessionAsync(sessionOptions);
// get user profile here
await session.AddItemAsync(
ConversationItem.CreateUserMessage([GetUserProfileJson(), GetUserJobOfferJson()]));
return session;
}

Expand Down Expand Up @@ -90,9 +102,68 @@ private async Task GetOpenAiStreamResponseAsync()
$" -- Voice activity detection ended at {speechFinishedUpdate.AudioEndTime} ms");
}

// Item finished updates arrive when all streamed data for an item has arrived and the
// accumulated results are available. In the case of function calls, this is the point
// where all arguments are expected to be present.
if (update is ConversationItemStreamingFinishedUpdate itemFinishedUpdate)
{
Console.WriteLine();

if (itemFinishedUpdate.FunctionCallId is not null)
{
Console.WriteLine($" + Responding to tool invoked by item: {itemFinishedUpdate.FunctionName}");
string parameters = itemFinishedUpdate.FunctionCallArguments;
string functionName = itemFinishedUpdate.FunctionName;
string toolOutput = string.Empty;
switch (functionName)
{
case "accept_job_offer":
Console.WriteLine($" + Tool parameters: {parameters}");
// Set up JsonSerializerOptions to ignore case
var options = new JsonSerializerOptions
{
PropertyNameCaseInsensitive = true
};

// Deserialize the JSON string into a JobOfferParameters object
JobOfferParameters jobOfferParameters = JsonSerializer.Deserialize<JobOfferParameters>(parameters, options);
// Extract the values
string candidateId = jobOfferParameters.CandidateId;
string jobOfferId = jobOfferParameters.JobOfferId;
toolOutput = AcceptJobOffer(candidateId, jobOfferId);
Console.WriteLine($" + Tool parameters: {parameters}");
break;
default:
Console.WriteLine($" + Tool parameters: {parameters}");
break;
}

ConversationItem functionOutputItem = ConversationItem.CreateFunctionCallOutput(
callId: itemFinishedUpdate.FunctionCallId,
output: toolOutput);

await m_aiSession.AddItemAsync(functionOutputItem);
}
else if (itemFinishedUpdate.MessageContentParts?.Count > 0)
{
Console.Write($" + [{itemFinishedUpdate.MessageRole}]: ");
foreach (ConversationContentPart contentPart in itemFinishedUpdate.MessageContentParts)
{
Console.Write(contentPart.AudioTranscript);
}
Console.WriteLine();
}
Console.WriteLine($" -- Item streaming finished, response_id={itemFinishedUpdate.ResponseId}");
}


if (update is ConversationItemStreamingStartedUpdate itemStartedUpdate)
{
Console.WriteLine($" -- Begin streaming of new item");
if (!string.IsNullOrEmpty(itemStartedUpdate.FunctionName))
{
Console.Write($" {itemStartedUpdate.FunctionName}: ");
}
}

// Audio transcript updates contain the incremental text matching the generated
Expand All @@ -106,17 +177,18 @@ private async Task GetOpenAiStreamResponseAsync()
// audio, matching the output audio format configured for the session.
if (update is ConversationItemStreamingPartDeltaUpdate deltaUpdate)
{
if( deltaUpdate.AudioBytes != null)
if (deltaUpdate.AudioBytes != null)
{
Console.Write(deltaUpdate.FunctionArguments);
var jsonString = OutStreamingData.GetAudioDataForOutbound(deltaUpdate.AudioBytes.ToArray());
await m_mediaStreaming.SendMessageAsync(jsonString);
}
}

if (update is ConversationItemStreamingTextFinishedUpdate itemFinishedUpdate)
if (update is ConversationItemStreamingTextFinishedUpdate itemFinishedTextUpdate)
{
Console.WriteLine();
Console.WriteLine($" -- Item streaming finished, response_id={itemFinishedUpdate.ResponseId}");
Console.WriteLine($" -- Item streaming finished, response_id={itemFinishedTextUpdate.ResponseId}");
}

if (update is ConversationInputTranscriptionFinishedUpdate transcriptionCompletedUpdate)
Expand All @@ -129,6 +201,15 @@ private async Task GetOpenAiStreamResponseAsync()
if (update is ConversationResponseFinishedUpdate turnFinishedUpdate)
{
Console.WriteLine($" -- Model turn generation finished. Status: {turnFinishedUpdate.Status}");

// Here, if we processed tool calls in the course of the model turn, we finish the
// client turn to resume model generation. The next model turn will reflect the tool
// responses that were already provided.
if (turnFinishedUpdate.CreatedItems.Any(item => item.FunctionName?.Length > 0))
{
Console.WriteLine($" -- Ending client turn for pending tool responses");
await m_aiSession.StartResponseAsync();
}
}

if (update is ConversationErrorUpdate errorUpdate)
Expand Down Expand Up @@ -165,5 +246,47 @@ public void Close()
m_cts.Dispose();
m_aiSession.Dispose();
}

private static ConversationFunctionTool AcceptJobOfferTool()
{
return new ConversationFunctionTool()
{
Name = "accept_job_offer",
Description = "This tool accepts a job offer for the candidate id and job id. The user can aslo say 'I want to apply for the job'. Please repeat the job offer details back to the user.",
Parameters = BinaryData.FromString("""
{
"type": "object",
"properties": {
"candidateId": {
"type": "string",
"description": "Candidate Id from the user profile"
},
"jobOfferId": {
"type": "string",
"description": "this is the job offer id from the job offer"
}
},
"required": ["candidateId","jobOfferId"]
}
""")
};
}

private string GetUserProfileJson()
{
return "{'candidateId': '4711', 'name':'Joe Smith', 'email':'[email protected]','phone': '555-555-5555', 'address': '123 Main St, Seattle, WA 98101'}}";
}

private string GetUserJobOfferJson()
{
return "{'jobOfferId': '4711-4711', 'jobTitle':'Software Engineer','company':'Tech Solutions Inc.','location':'Seattle, WA','employmentType':'Full-time','jobDescription':'We are looking for a skilled Software Engineer to join our dynamic team. The ideal candidate will have experience in developing high-quality software solutions and a passion for technology.','responsibilities':['Design, develop, and maintain software applications','Collaborate with cross-functional teams to define and implement new features','Write clean, scalable, and efficient code','Perform code reviews and provide constructive feedback','Troubleshoot and debug software issues','Stay up-to-date with the latest industry trends and technologies'],'qualifications':['Bachelor\'s degree in Computer Science or related field','3+ years of experience in software development','Proficiency in C#, .NET, and JavaScript','Experience with cloud platforms such as Azure or AWS','Strong problem-solving skills','Excellent communication and teamwork abilities'],'benefits':['Competitive salary','Health, dental, and vision insurance','401(k) with company match','Flexible work hours','Remote work options','Professional development opportunities'],'applicationInstructions':'To apply, please send your resume and cover letter to [email protected].'}";
}

private static string AcceptJobOffer(string candidateId, string jobOfferId)
{
Console.WriteLine($"accepting the job offer for candidate {candidateId} with job offer id {jobOfferId}");
return $"You have successfully accepted the job offer with id {jobOfferId} for candidate {candidateId}.";
}

}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk.Web">

<PropertyGroup>
<TargetFramework>net7.0</TargetFramework>
<TargetFramework>net8.0</TargetFramework>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
</PropertyGroup>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
//Call Automation Client
var client = new CallAutomationClient(acsConnectionString);
var app = builder.Build();
var appBaseUrl = Environment.GetEnvironmentVariable("VS_TUNNEL_URL")?.TrimEnd('/');
//var appBaseUrl = Environment.GetEnvironmentVariable("VS_TUNNEL_URL")?.TrimEnd('/');
var appBaseUrl = builder.Configuration.GetValue<string>("DevTunnelUri")?.TrimEnd('/');

if (string.IsNullOrEmpty(appBaseUrl))
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"environmentVariables": {
"ASPNETCORE_ENVIRONMENT": "Development"
},
"applicationUrl": "https://localhost:49411;http://localhost:49412"
"applicationUrl": "https://localhost:5165;http://localhost:5166"
}
}
}

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@
"AzureOpenAIServiceKey": "AZURE_OPENAI_SERVICE_KEY",
"AzureOpenAIServiceEndpoint": "AZURE_OPENAI_ENDPOINT",
"AzureOpenAIDeploymentModelName": "AZURE_OPENAI_MODEL",
"SystemPrompt": "AI_SYSTEM_PROMPT"
"DevTunnelUri": "DEV_TUNNEL_URI",
"SystemPrompt": "You are the virtual job recruiting agent. You only talk about the jobs recommendations you offer to the users based on their profiles and available job offers. You answer user questions to the jobs recommended to them and help users to apply to one of these jobs. NEVER TELL users that they need to apply for a job by email or that you will help them to create a CV - all user profiles are already available to you! You speak English, German and French languages. You are polite, concise, and informative. You do not make jokes or use slang. You do not say 'I am a virtual agent' or 'I am a bot'."
}
6 changes: 4 additions & 2 deletions CallAutomation_AzOpenAI_Voice/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ products:

This is a sample application demonstrated during Microsoft Ignite 2024. It highlights an integration of Azure Communication Services with Azure OpenAI Service to enable intelligent conversational agents.

This sample is extended implementing tools for the the Contoso Recruting Scenario.

## Prerequisites

- Create an Azure account with an active subscription. For details, see [Create an account for free](https://azure.microsoft.com/free/)
Expand All @@ -30,14 +32,14 @@ Before running this sample, you'll need to setup the resources above with the fo

```bash
devtunnel create --allow-anonymous
devtunnel port create -p 5165
devtunnel port create -p 5166
devtunnel host
```

##### 2. Add the required API Keys and endpoints
Open the appsettings.json file to configure the following settings:

- `DevTunnelUri`: your dev tunnel endpoint
- `DevTunnelUri`: your dev tunnel endpoint. Please add '/api/incomingCall' to the end of your DevTunnelUri when you register your EventGrid webhook.
- `AcsConnectionString`: Azure Communication Service resource's connection string.
- `AzureOpenAIServiceKey`: Open AI's Service Key
- `AzureOpenAIServiceEndpoint`: Open AI's Service Endpoint
Expand Down