hypermodeinc
diff --git a/‎.trunk/configs/cspell.json
Lines changed: 2 additions & 0 deletions b/‎.trunk/configs/cspell.json
Lines changed: 2 additions & 0 deletions
diff --git a/‎sdk/go/examples/textgeneration/main.go
Lines changed: 7 additions & 151 deletions b/‎sdk/go/examples/textgeneration/main.go
Lines changed: 7 additions & 151 deletions
diff --git a/‎sdk/go/examples/textgeneration/media.go
Lines changed: 207 additions & 0 deletions b/‎sdk/go/examples/textgeneration/media.go
Lines changed: 207 additions & 0 deletions
@@ -134,6 +134,7 @@
     "omitif",
     "omitnull",
     "openai",
+    "openspeech",
     "operationreport",
     "PEMS",
     "pgconn",
@@ -175,6 +176,7 @@
     "textgeneration",
     "tidwall",
     "tinygo",
+    "toolcalling",
     "tseslint",
     "tsrv",
     "typedarray",
 
@@ -6,154 +6,10 @@
 
 package main
 
-import (
-	"encoding/json"
-	"fmt"
-	"strings"
-
-	"github.com/hypermodeinc/modus/sdk/go/pkg/models"
-	"github.com/hypermodeinc/modus/sdk/go/pkg/models/openai"
-)
-
-// In this example, we will generate text using the OpenAI Chat model.
-// See https://platform.openai.com/docs/api-reference/chat/create for more details
-// about the options available on the model, which you can set on the input object.
-
-// This model name should match the one defined in the modus.json manifest file.
-const modelName = "text-generator"
-
-// This function generates some text based on the instruction and prompt provided.
-func GenerateText(instruction, prompt string) (string, error) {
-
-	// The imported ChatModel type follows the OpenAI Chat completion model input format.
-	model, err := models.GetModel[openai.ChatModel](modelName)
-	if err != nil {
-		return "", err
-	}
-
-	// We'll start by creating an input object using the instruction and prompt provided.
-	input, err := model.CreateInput(
-		openai.NewSystemMessage(instruction),
-		openai.NewUserMessage(prompt),
-		// ... if we wanted to add more messages, we could do so here.
-	)
-	if err != nil {
-		return "", err
-	}
-
-	// This is one of many optional parameters available for the OpenAI Chat model.
-	input.Temperature = 0.7
-
-	// Here we invoke the model with the input we created.
-	output, err := model.Invoke(input)
-	if err != nil {
-		return "", err
-	}
-
-	// The output is also specific to the ChatModel interface.
-	// Here we return the trimmed content of the first choice.
-	return strings.TrimSpace(output.Choices[0].Message.Content), nil
-}
-
-// This function generates a single product.
-func GenerateProduct(category string) (*Product, error) {
-
-	// We can get creative with the instruction and prompt to guide the model
-	// in generating the desired output.  Here we provide a sample JSON of the
-	// object we want the model to generate.
-	instruction := "Generate a product for the category provided.\n" +
-		"Only respond with valid JSON object in this format:\n" + sampleProductJson
-	prompt := fmt.Sprintf(`The category is "%s".`, category)
-
-	// Set up the input for the model, creating messages for the instruction and prompt.
-	model, err := models.GetModel[openai.ChatModel](modelName)
-	if err != nil {
-		return nil, err
-	}
-	input, err := model.CreateInput(
-		openai.NewSystemMessage(instruction),
-		openai.NewUserMessage(prompt),
-	)
-	if err != nil {
-		return nil, err
-	}
-
-	// Let's increase the temperature to get more creative responses.
-	// Be careful though, if the temperature is too high, the model may generate invalid JSON.
-	input.Temperature = 1.2
-
-	// This model also has a response format parameter that can be set to JSON,
-	// Which, along with the instruction, can help guide the model in generating valid JSON output.
-	input.ResponseFormat = openai.ResponseFormatJson
-
-	// Here we invoke the model with the input we created.
-	output, err := model.Invoke(input)
-	if err != nil {
-		return nil, err
-	}
-
-	// The output should contain the JSON string we asked for.
-	content := strings.TrimSpace(output.Choices[0].Message.Content)
-
-	// We can now parse the JSON string as a Product object.
-	var product Product
-	if err := json.Unmarshal([]byte(content), &product); err != nil {
-		return nil, fmt.Errorf("failed to parse JSON: %w", err)
-	}
-
-	return &product, nil
-}
-
-// This function generates multiple product.
-func GenerateProducts(category string, quantity int) ([]Product, error) {
-
-	// Similar to the previous example above, we can tailor the instruction and prompt
-	// to guide the model in generating the desired output.  Note that understanding the behavior
-	// of the model is important to get the desired results.  In this case, we need the model
-	// to return an _object_ containing an array, not an array of objects directly.
-	// That's because the model will not reliably generate an array of objects directly.
-	instruction := fmt.Sprintf("Generate %d products for the category provided.\n"+
-		"Only respond with a valid JSON object containing a valid JSON array named 'list', in this format:\n"+
-		`{"list":[%s]}`, quantity, sampleProductJson)
-	prompt := fmt.Sprintf(`The category is "%s".`, category)
-
-	// Set up the input for the model, creating messages for the instruction and prompt.
-	model, err := models.GetModel[openai.ChatModel](modelName)
-	if err != nil {
-		return nil, err
-	}
-	input, err := model.CreateInput(
-		openai.NewSystemMessage(instruction),
-		openai.NewUserMessage(prompt),
-	)
-	if err != nil {
-		return nil, err
-	}
-
-	// Adjust the model inputs, just like in the previous example.
-	// Be careful, if the temperature is too high, the model may generate invalid JSON.
-	input.Temperature = 1.2
-	input.ResponseFormat = openai.ResponseFormatJson
-
-	// Here we invoke the model with the input we created.
-	output, err := model.Invoke(input)
-	if err != nil {
-		return nil, err
-	}
-
-	// The output should contain the JSON string we asked for.
-	content := strings.TrimSpace(output.Choices[0].Message.Content)
-
-	// We can parse that JSON to a compatible object, to get the data we're looking for.
-	var data map[string][]Product
-	if err := json.Unmarshal([]byte(content), &data); err != nil {
-		return nil, fmt.Errorf("failed to parse JSON: %w", err)
-	}
-
-	// Now we can extract the list of products from the data.
-	products, found := data["list"]
-	if !found {
-		return nil, fmt.Errorf("expected 'list' key in JSON object")
-	}
-	return products, nil
-}
+// The examples have been split into separate files for clarity.
+// See each of the following files for more details about the specific example.
+//
+// - simple.go
+// - products.go
+// - media.go
+// - toolcalling.go
@@ -0,0 +1,207 @@
+/*
+ * This example is part of the Modus project, licensed under the Apache License 2.0.
+ * You may modify and use this example in accordance with the license.
+ * See the LICENSE file that accompanied this code for further details.
+ */
+
+package main
+
+import (
+	"fmt"
+	"math/rand/v2"
+	"strings"
+
+	"github.com/hypermodeinc/modus/sdk/go/pkg/http"
+	"github.com/hypermodeinc/modus/sdk/go/pkg/models"
+	"github.com/hypermodeinc/modus/sdk/go/pkg/models/openai"
+)
+
+// These examples demonstrate how to use audio or image data with OpenAI chat models.
+// Currently, audio can be used for input or output, but images can be used only for input.
+
+// This type is used in these examples to represent images or audio.
+type Media struct {
+
+	// The content type of the media.
+	ContentType string
+
+	// The binary data of the media.
+	// This value will be base64 encoded when used in an API response.
+	Data []byte
+
+	// A text description or transcription of the media.
+	Text string
+}
+
+// This function generates an audio response based on the instruction and prompt provided.
+func GenerateAudio(instruction, prompt string) (*Media, error) {
+
+	// Note, this is similar to GenerateText above, but with audio output requested.
+
+	// We'll generate the audio using an audio-enabled OpenAI chat model.
+	model, err := models.GetModel[openai.ChatModel]("audio-model")
+	if err != nil {
+		return nil, err
+	}
+
+	input, err := model.CreateInput(
+		openai.NewSystemMessage(instruction),
+		openai.NewUserMessage(prompt),
+	)
+	if err != nil {
+		return nil, err
+	}
+
+	input.Temperature = 0.7
+
+	// Request audio output from the model.
+	// Note, this is a convenience method that requests audio modality and sets the voice and format.
+	// You can also set these values manually on the input object, if you prefer.
+	input.RequestAudioOutput("ash", "wav")
+
+	output, err := model.Invoke(input)
+	if err != nil {
+		return nil, err
+	}
+
+	// Return the audio and its transcription.
+	// Note that the message Content field will be empty for audio responses.
+	// Instead, the text will be in the Message.Audio.Transcript field.
+	audio := output.Choices[0].Message.Audio
+
+	media := &Media{
+		ContentType: "audio/wav",
+		Data:        audio.Data,
+		Text:        strings.TrimSpace(audio.Transcript),
+	}
+
+	return media, nil
+}
+
+// This function generates text that describes the image at the provided url.
+// In this example the image url is passed to the model, and the model retrieves the image.
+func DescribeImage(url string) (string, error) {
+
+	// Note that because the model retrieves the image, any URL can be used.
+	// However, this means that there is a risk of sending data to an unauthorized host, if the URL is not hardcoded or sanitized.
+	// See the DescribeRandomImage function below for a safer approach.
+
+	model, err := models.GetModel[openai.ChatModel]("text-generator")
+	if err != nil {
+		return "", err
+	}
+
+	input, err := model.CreateInput(
+		openai.NewUserMessageFromParts(
+			openai.NewTextContentPart("Describe this image."),
+			openai.NewImageContentPartFromUrl(url),
+		),
+	)
+	if err != nil {
+		return "", err
+	}
+
+	output, err := model.Invoke(input)
+	if err != nil {
+		return "", err
+	}
+
+	return strings.TrimSpace(output.Choices[0].Message.Content), nil
+}
+
+// This function fetches a random image, and then generates text that describes it.
+// In this example the image is retrieved by the function before passing it as data to the model.
+func DescribeRandomImage() (*Media, error) {
+
+	// Because this approach fetches the image directly, it is safer than the DescribeImage function above.
+	// The host URL is allow-listed in the modus.json file, so we can trust the image source.
+
+	// Fetch a random image from the Picsum API.  We'll just hardcode the size to make the demo simple to call.
+	response, err := http.Fetch("https://picsum.photos/640/480")
+	if err != nil {
+		return nil, err
+	}
+	data := response.Body
+	contentType := *response.Headers.Get("Content-Type")
+
+	// Describe the image using the OpenAI chat model.
+	model, err := models.GetModel[openai.ChatModel]("text-generator")
+	if err != nil {
+		return nil, err
+	}
+
+	input, err := model.CreateInput(
+		openai.NewUserMessageFromParts(
+			openai.NewTextContentPart("Describe this image."),
+			openai.NewImageContentPartFromData(data, contentType),
+		),
+	)
+	if err != nil {
+		return nil, err
+	}
+
+	output, err := model.Invoke(input)
+	if err != nil {
+		return nil, err
+	}
+
+	// Return the image and its generated description.
+	text := strings.TrimSpace(output.Choices[0].Message.Content)
+	media := &Media{
+		ContentType: contentType,
+		Data:        data,
+		Text:        text,
+	}
+
+	return media, nil
+}
+
+// This function fetches a random "Harvard Sentences" speech file from OpenSpeech, and then generates a transcript from it.
+// The sentences are from https://www.cs.columbia.edu/~hgs/audio/harvard.html
+func TranscribeRandomSpeech() (*Media, error) {
+
+	// Pick a random file number from the list of available here:
+	// https://www.voiptroubleshooter.com/open_speech/american.html
+	numbers := []int{10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 57, 58, 59, 60, 61}
+	num := numbers[rand.IntN(len(numbers))]
+
+	// Fetch the speech file corresponding to the number.
+	url := fmt.Sprintf("https://www.voiptroubleshooter.com/open_speech/american/OSR_us_000_%04d_8k.wav", num)
+	response, err := http.Fetch(url)
+	if err != nil {
+		return nil, err
+	}
+	data := response.Body
+
+	// Transcribe the audio using an audio-enabled OpenAI chat model.
+	model, err := models.GetModel[openai.ChatModel]("audio-model")
+	if err != nil {
+		return nil, err
+	}
+
+	input, err := model.CreateInput(
+		openai.NewDeveloperMessage("Do not include any newlines or surrounding quotation marks in the response. Omit any explanation beyond the request."),
+		openai.NewUserMessageFromParts(
+			openai.NewTextContentPart("Provide an exact transcription of the contents of this audio file."),
+			openai.NewAudioContentPartFromData(data, "wav"),
+		),
+	)
+	if err != nil {
+		return nil, err
+	}
+
+	output, err := model.Invoke(input)
+	if err != nil {
+		return nil, err
+	}
+
+	// Return the audio file and its transcript.
+	text := strings.TrimSpace(output.Choices[0].Message.Content)
+	media := &Media{
+		ContentType: "audio/wav",
+		Data:        data,
+		Text:        text,
+	}
+
+	return media, nil
+}