From acbc8f3d8e84d480957e3db6ebfdcbd333217f62 Mon Sep 17 00:00:00 2001
From: codingl2k1 <codingl2k1@outlook.com>
Date: Sat, 25 Jan 2025 22:27:39 +0100
Subject: [PATCH] Add LLM audio example

---
 .../models/model_abilities/multimodal.rst     | 80 ++++++++++---------
 1 file changed, 42 insertions(+), 38 deletions(-)

diff --git a/doc/source/models/model_abilities/multimodal.rst b/doc/source/models/model_abilities/multimodal.rst
index 6ac810aaab..6f11dae962 100644
--- a/doc/source/models/model_abilities/multimodal.rst
+++ b/doc/source/models/model_abilities/multimodal.rst
@@ -147,44 +147,48 @@ Images are made available to the model in two main ways: by passing a link to th
 audio url directly in the request.
 
 
-Uploading base 64 encoded images
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Chat with audio
+~~~~~~~~~~~~~~~
 
 .. code-block:: python
 
-    import openai
-    import base64
-
-    # Function to encode the image
-    def encode_image(image_path):
-    with open(image_path, "rb") as image_file:
-        return base64.b64encode(image_file.read()).decode('utf-8')
-
-    # Path to your image
-    image_path = "path_to_your_image.jpg"
-
-    # Getting the base64 string
-    b64_img = encode_image(image_path)
-
-    client = openai.Client(
-        api_key="cannot be empty",
-        base_url=f"http://<XINFERENCE_HOST>:<XINFERENCE_PORT>/v1"
-    )
-    response = client.chat.completions.create(
-        model="<MODEL_UID>",
-        messages=[
-            {
-                "role": "user",
-                "content": [
-                    {"type": "text", "text": "What’s in this image?"},
-                    {
-                        "type": "image_url",
-                        "image_url": {
-                            "url": f"data:image/jpeg;base64,{b64_img}",
-                        },
-                    },
-                ],
-            }
-        ],
-    )
-    print(response.choices[0])
+    from xinference.client import Client
+
+    client = Client("http://<XINFERENCE_HOST>:<XINFERENCE_HOST>")
+    model = client.get_model(<MODEL_UID>)
+
+    messages = [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "audio",
+                    "audio_url": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-Audio/audio/glass-breaking-151256.mp3",
+                },
+                {"type": "text", "text": "What's that sound?"},
+            ],
+        },
+        {"role": "assistant", "content": "It is the sound of glass shattering."},
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "What can you do when you hear that?"},
+            ],
+        },
+        {
+            "role": "assistant",
+            "content": "Stay alert and cautious, and check if anyone is hurt or if there is any damage to property.",
+        },
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "audio",
+                    "audio_url": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-Audio/audio/1272-128104-0000.flac",
+                },
+                {"type": "text", "text": "What does the person say?"},
+            ],
+        },
+    ]
+    print(model.chat(messages))