From acbc8f3d8e84d480957e3db6ebfdcbd333217f62 Mon Sep 17 00:00:00 2001 From: codingl2k1 Date: Sat, 25 Jan 2025 22:27:39 +0100 Subject: [PATCH] Add LLM audio example --- .../models/model_abilities/multimodal.rst | 80 ++++++++++--------- 1 file changed, 42 insertions(+), 38 deletions(-) diff --git a/doc/source/models/model_abilities/multimodal.rst b/doc/source/models/model_abilities/multimodal.rst index 6ac810aaab..6f11dae962 100644 --- a/doc/source/models/model_abilities/multimodal.rst +++ b/doc/source/models/model_abilities/multimodal.rst @@ -147,44 +147,48 @@ Images are made available to the model in two main ways: by passing a link to th audio url directly in the request. -Uploading base 64 encoded images -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Chat with audio +~~~~~~~~~~~~~~~ .. code-block:: python - import openai - import base64 - - # Function to encode the image - def encode_image(image_path): - with open(image_path, "rb") as image_file: - return base64.b64encode(image_file.read()).decode('utf-8') - - # Path to your image - image_path = "path_to_your_image.jpg" - - # Getting the base64 string - b64_img = encode_image(image_path) - - client = openai.Client( - api_key="cannot be empty", - base_url=f"http://:/v1" - ) - response = client.chat.completions.create( - model="", - messages=[ - { - "role": "user", - "content": [ - {"type": "text", "text": "What’s in this image?"}, - { - "type": "image_url", - "image_url": { - "url": f"data:image/jpeg;base64,{b64_img}", - }, - }, - ], - } - ], - ) - print(response.choices[0]) + from xinference.client import Client + + client = Client("http://:") + model = client.get_model() + + messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + { + "role": "user", + "content": [ + { + "type": "audio", + "audio_url": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-Audio/audio/glass-breaking-151256.mp3", + }, + {"type": "text", "text": "What's that sound?"}, + ], + }, + {"role": "assistant", "content": "It is the sound of glass shattering."}, + { + "role": "user", + "content": [ + {"type": "text", "text": "What can you do when you hear that?"}, + ], + }, + { + "role": "assistant", + "content": "Stay alert and cautious, and check if anyone is hurt or if there is any damage to property.", + }, + { + "role": "user", + "content": [ + { + "type": "audio", + "audio_url": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-Audio/audio/1272-128104-0000.flac", + }, + {"type": "text", "text": "What does the person say?"}, + ], + }, + ] + print(model.chat(messages))