diff --git a/examples/LangChain.Samples.HuggingFace/Program.cs b/examples/LangChain.Samples.HuggingFace/Program.cs
index a351b7e0..0dc272d2 100644
--- a/examples/LangChain.Samples.HuggingFace/Program.cs
+++ b/examples/LangChain.Samples.HuggingFace/Program.cs
@@ -1,10 +1,29 @@
-using LangChain.Providers.HuggingFace;
+using LangChain.Providers;
+using LangChain.Providers.HuggingFace;
using LangChain.Providers.HuggingFace.Predefined;
using var client = new HttpClient();
var provider = new HuggingFaceProvider(apiKey: string.Empty, client);
var gpt2Model = new Gpt2Model(provider);
-var response = await gpt2Model.GenerateAsync("What would be a good company name be for name a company that makes colorful socks?");
+var gp2ModelResponse = await gpt2Model.GenerateAsync("What would be a good company name be for name a company that makes colorful socks?");
-Console.WriteLine(response);
\ No newline at end of file
+Console.WriteLine("### GP2 Response");
+Console.WriteLine(gp2ModelResponse);
+
+const string imageToTextModel = "Salesforce/blip-image-captioning-base";
+var model = new HuggingFaceImageToTextModel(provider, imageToTextModel);
+
+var path = Path.Combine(Path.GetTempPath(), "solar_system.png");
+var imageData = await File.ReadAllBytesAsync(path);
+var binaryData = new BinaryData(imageData, "image/jpg");
+
+var imageToTextResponse = await model.GenerateTextFromImageAsync(new ImageToTextRequest
+{
+ Image = binaryData
+});
+
+Console.WriteLine("\n\n### ImageToText Response");
+Console.WriteLine(imageToTextResponse.Text);
+
+Console.ReadLine();
diff --git a/src/Core/src/Chains/Chain.cs b/src/Core/src/Chains/Chain.cs
index a4256475..4ffd2496 100644
--- a/src/Core/src/Chains/Chain.cs
+++ b/src/Core/src/Chains/Chain.cs
@@ -4,6 +4,7 @@
using LangChain.Chains.StackableChains.Agents.Crew;
using LangChain.Chains.StackableChains.Files;
using LangChain.Chains.StackableChains.ImageGeneration;
+using LangChain.Chains.StackableChains.ImageToTextGeneration;
using LangChain.Chains.StackableChains.ReAct;
using LangChain.Indexes;
using LangChain.Memory;
@@ -298,4 +299,19 @@ public static ExtractCodeChain ExtractCode(
{
return new ExtractCodeChain(inputKey, outputKey);
}
+
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ public static ImageToTextGenerationChain GenerateImageToText(
+ IImageToTextModel model,
+ BinaryData image,
+ string outputKey = "text")
+ {
+ return new ImageToTextGenerationChain(model, image, outputKey);
+ }
}
diff --git a/src/Core/src/Chains/StackableChains/ImageToTextGeneration/ImageToTextGenerationChain.cs b/src/Core/src/Chains/StackableChains/ImageToTextGeneration/ImageToTextGenerationChain.cs
new file mode 100644
index 00000000..a83e6e38
--- /dev/null
+++ b/src/Core/src/Chains/StackableChains/ImageToTextGeneration/ImageToTextGenerationChain.cs
@@ -0,0 +1,40 @@
+using LangChain.Abstractions.Schema;
+using LangChain.Chains.HelperChains;
+using LangChain.Providers;
+
+namespace LangChain.Chains.StackableChains.ImageToTextGeneration;
+
+///
+///
+///
+public class ImageToTextGenerationChain : BaseStackableChain
+{
+ private readonly IImageToTextModel _model;
+ private readonly BinaryData _image;
+
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ public ImageToTextGenerationChain(
+ IImageToTextModel model,
+ BinaryData image,
+ string outputKey = "text")
+ {
+ _model = model;
+ _image = image;
+ OutputKeys = new[] { outputKey };
+ }
+
+ ///
+ protected override async Task InternalCall(IChainValues values)
+ {
+ values = values ?? throw new ArgumentNullException(nameof(values));
+
+ var text = await _model.GenerateTextFromImageAsync(new ImageToTextRequest { Image = _image }).ConfigureAwait(false);
+ values.Value[OutputKeys[0]] = text;
+ return values;
+ }
+}
diff --git a/src/Directory.Packages.props b/src/Directory.Packages.props
index 2cd937a2..7cbd45be 100644
--- a/src/Directory.Packages.props
+++ b/src/Directory.Packages.props
@@ -54,6 +54,7 @@
runtime; build; native; contentfiles; analyzers; buildtransitive
+
diff --git a/src/Providers/Abstractions/src/Common/Provider.cs b/src/Providers/Abstractions/src/Common/Provider.cs
index 0e9c48be..4856b84c 100644
--- a/src/Providers/Abstractions/src/Common/Provider.cs
+++ b/src/Providers/Abstractions/src/Common/Provider.cs
@@ -21,4 +21,7 @@ public abstract class Provider(string id) : Model(id), IProvider
///
public TextToSpeechSettings? TextToSpeechSettings { get; init; }
+
+ ///
+ public ImageToTextSettings? ImageToTextSettings { get; init; }
}
\ No newline at end of file
diff --git a/src/Providers/Abstractions/src/ImageToText/IImageToTextModel.cs b/src/Providers/Abstractions/src/ImageToText/IImageToTextModel.cs
new file mode 100644
index 00000000..8ff51071
--- /dev/null
+++ b/src/Providers/Abstractions/src/ImageToText/IImageToTextModel.cs
@@ -0,0 +1,19 @@
+namespace LangChain.Providers;
+
+///
+/// Defines a large language model that can be used for image to text generation.
+///
+public interface IImageToTextModel : IModel
+{
+ ///
+ /// Run the LLM on the given image.
+ ///
+ ///
+ ///
+ ///
+ ///
+ public Task GenerateTextFromImageAsync(
+ ImageToTextRequest request,
+ ImageToTextSettings? settings = null,
+ CancellationToken cancellationToken = default);
+}
\ No newline at end of file
diff --git a/src/Providers/Abstractions/src/ImageToText/IImageToTextModel`2.cs b/src/Providers/Abstractions/src/ImageToText/IImageToTextModel`2.cs
new file mode 100644
index 00000000..f6c3142b
--- /dev/null
+++ b/src/Providers/Abstractions/src/ImageToText/IImageToTextModel`2.cs
@@ -0,0 +1,19 @@
+namespace LangChain.Providers;
+
+///
+/// Defines a large language model that can be used for image to text generation.
+///
+public interface IImageToTextModel : IImageToTextModel
+{
+ ///
+ /// Run the LLM on the image.
+ ///
+ ///
+ ///
+ ///
+ ///
+ public Task GenerateTextFromImageAsync(
+ TRequest request,
+ TSettings? settings = default,
+ CancellationToken cancellationToken = default);
+}
\ No newline at end of file
diff --git a/src/Providers/Abstractions/src/ImageToText/ImageToTextGenerationResponse.cs b/src/Providers/Abstractions/src/ImageToText/ImageToTextGenerationResponse.cs
new file mode 100644
index 00000000..b36868db
--- /dev/null
+++ b/src/Providers/Abstractions/src/ImageToText/ImageToTextGenerationResponse.cs
@@ -0,0 +1,15 @@
+using System.Text.Json.Serialization;
+
+namespace LangChain.Providers;
+
+public class ImageToTextGenerationResponse : List
+{
+ public sealed class GeneratedTextItem
+ {
+ ///
+ /// The continuated string
+ ///
+ [JsonPropertyName("generated_text")]
+ public string? GeneratedText { get; set; }
+ }
+}
\ No newline at end of file
diff --git a/src/Providers/Abstractions/src/ImageToText/ImageToTextModel.cs b/src/Providers/Abstractions/src/ImageToText/ImageToTextModel.cs
new file mode 100644
index 00000000..c09b7054
--- /dev/null
+++ b/src/Providers/Abstractions/src/ImageToText/ImageToTextModel.cs
@@ -0,0 +1,10 @@
+// ReSharper disable once CheckNamespace
+namespace LangChain.Providers;
+
+public abstract class ImageToTextModel(string id) : Model(id), IImageToTextModel
+{
+ public abstract Task GenerateTextFromImageAsync(
+ ImageToTextRequest request,
+ ImageToTextSettings? settings = default,
+ CancellationToken cancellationToken = default);
+}
\ No newline at end of file
diff --git a/src/Providers/Abstractions/src/ImageToText/ImageToTextRequest.cs b/src/Providers/Abstractions/src/ImageToText/ImageToTextRequest.cs
new file mode 100644
index 00000000..505a9382
--- /dev/null
+++ b/src/Providers/Abstractions/src/ImageToText/ImageToTextRequest.cs
@@ -0,0 +1,13 @@
+// ReSharper disable once CheckNamespace
+namespace LangChain.Providers;
+
+///
+/// Base class for image to text requests.
+///
+public class ImageToTextRequest
+{
+ ///
+ /// Image to upload.
+ ///
+ public required BinaryData Image { get; init; }
+}
\ No newline at end of file
diff --git a/src/Providers/Abstractions/src/ImageToText/ImageToTextResponse.cs b/src/Providers/Abstractions/src/ImageToText/ImageToTextResponse.cs
new file mode 100644
index 00000000..12839b19
--- /dev/null
+++ b/src/Providers/Abstractions/src/ImageToText/ImageToTextResponse.cs
@@ -0,0 +1,27 @@
+// ReSharper disable once CheckNamespace
+// ReSharper disable ConditionalAccessQualifierIsNonNullableAccordingToAPIContract
+namespace LangChain.Providers;
+
+#pragma warning disable CA2225
+
+///
+///
+///
+public class ImageToTextResponse
+{
+ ///
+ ///
+ ///
+ public required ImageToTextSettings UsedSettings { get; init; }
+
+ ///
+ ///
+ ///
+ public Usage Usage { get; init; } = Usage.Empty;
+
+
+ ///
+ /// Generated text
+ ///
+ public string? Text { get; set; }
+}
\ No newline at end of file
diff --git a/src/Providers/Abstractions/src/ImageToText/ImageToTextSettings.cs b/src/Providers/Abstractions/src/ImageToText/ImageToTextSettings.cs
new file mode 100644
index 00000000..fd791454
--- /dev/null
+++ b/src/Providers/Abstractions/src/ImageToText/ImageToTextSettings.cs
@@ -0,0 +1,55 @@
+// ReSharper disable once CheckNamespace
+namespace LangChain.Providers;
+
+///
+/// Base class for image to text request settings.
+///
+public class ImageToTextSettings
+{
+ public static ImageToTextSettings Default { get; } = new()
+ {
+ User = string.Empty,
+ Endpoint = "https://api-inference.huggingface.co/models/"
+ };
+
+ ///
+ /// Unique user identifier.
+ ///
+ public string? User { get; init; }
+
+ ///
+ /// Endpoint url for api.
+ ///
+ public string Endpoint { get; set; }
+
+
+ ///
+ /// Calculate the settings to use for the request.
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ public static ImageToTextSettings Calculate(
+ ImageToTextSettings? requestSettings,
+ ImageToTextSettings? modelSettings,
+ ImageToTextSettings? providerSettings)
+ {
+ return new ImageToTextSettings
+ {
+ User =
+ requestSettings?.User ??
+ modelSettings?.User ??
+ providerSettings?.User ??
+ Default.User ??
+ throw new InvalidOperationException("Default User is not set."),
+ Endpoint =
+ requestSettings?.Endpoint ??
+ modelSettings?.Endpoint ??
+ providerSettings?.Endpoint ??
+ Default.Endpoint ??
+ throw new InvalidOperationException("Default Endpoint is not set."),
+ };
+ }
+}
\ No newline at end of file
diff --git a/src/Providers/Abstractions/src/LangChain.Providers.Abstractions.csproj b/src/Providers/Abstractions/src/LangChain.Providers.Abstractions.csproj
index db5842fe..a39eaddf 100644
--- a/src/Providers/Abstractions/src/LangChain.Providers.Abstractions.csproj
+++ b/src/Providers/Abstractions/src/LangChain.Providers.Abstractions.csproj
@@ -16,6 +16,7 @@
all
runtime; build; native; contentfiles; analyzers; buildtransitive
+
diff --git a/src/Providers/Amazon.Bedrock/src/Internal/BedrockExtensions.cs b/src/Providers/Amazon.Bedrock/src/Internal/BedrockExtensions.cs
index 0cba107a..657b8611 100644
--- a/src/Providers/Amazon.Bedrock/src/Internal/BedrockExtensions.cs
+++ b/src/Providers/Amazon.Bedrock/src/Internal/BedrockExtensions.cs
@@ -1,4 +1,5 @@
using System.Text;
+using System.Text.Json;
using System.Text.Json.Nodes;
using Amazon.BedrockRuntime;
using Amazon.BedrockRuntime.Model;
@@ -16,10 +17,10 @@ public static IReadOnlyList Split(
{
var inputText = string.Join(" ", strings);
var textSplitter = new RecursiveCharacterTextSplitter(chunkSize: chunkSize);
-
+
return textSplitter.SplitText(inputText);
}
-
+
internal static async Task InvokeModelAsync(
this AmazonBedrockRuntimeClient client,
string id,
@@ -27,7 +28,7 @@ public static IReadOnlyList Split(
CancellationToken cancellationToken = default)
{
memoryStream = memoryStream ?? throw new ArgumentNullException(nameof(memoryStream));
-
+
var response = await client.InvokeModelAsync(new InvokeModelRequest
{
ModelId = id,
@@ -50,7 +51,7 @@ public static IReadOnlyList Split(
cancellationToken: cancellationToken).ConfigureAwait(false);
#endif
}
-
+
public static async Task InvokeModelAsync(
this AmazonBedrockRuntimeClient client,
string id,
@@ -58,13 +59,13 @@ public static IReadOnlyList Split(
CancellationToken cancellationToken = default)
{
using var stream = new MemoryStream(bytes);
-
+
return await client.InvokeModelAsync(
id: id,
memoryStream: stream,
cancellationToken).ConfigureAwait(false);
}
-
+
public static async Task InvokeModelAsync(
this AmazonBedrockRuntimeClient client,
string id,
@@ -72,17 +73,45 @@ public static IReadOnlyList Split(
CancellationToken cancellationToken = default)
{
using var stream = AWSSDKUtils.GenerateMemoryStreamFromString(jsonObject.ToJsonString());
-
+
return await client.InvokeModelAsync(
id: id,
memoryStream: stream,
cancellationToken).ConfigureAwait(false);
}
-
+
+ public static async Task InvokeModelAsync(
+ this AmazonBedrockRuntimeClient client,
+ string id,
+ JsonObject jsonObject,
+ CancellationToken cancellationToken = default)
+ {
+ using var stream = AWSSDKUtils.GenerateMemoryStreamFromString(jsonObject.ToJsonString());
+
+ var request = new InvokeModelRequest()
+ {
+ ContentType = "application/json",
+ Accept = "application/json",
+ ModelId = id,
+ Body = stream
+ };
+
+ var response = await client.InvokeModelAsync(request, cancellationToken).ConfigureAwait(false);
+
+ if (response.HttpStatusCode != System.Net.HttpStatusCode.OK)
+ {
+ throw new InvalidOperationException(
+ $"InvokeModelAsync failed with status code: {response.HttpStatusCode}");
+ }
+
+ return await JsonSerializer.DeserializeAsync(response.Body, cancellationToken: cancellationToken)
+ .ConfigureAwait(false);
+ }
+
public static string ToSimplePrompt(this IReadOnlyCollection messages)
{
messages = messages ?? throw new ArgumentNullException(nameof(messages));
-
+
var sb = new StringBuilder();
foreach (var item in messages)
@@ -91,11 +120,11 @@ public static string ToSimplePrompt(this IReadOnlyCollection messages)
}
return sb.ToString();
}
-
+
public static string ToRolePrompt(this IReadOnlyCollection messages)
{
messages = messages ?? throw new ArgumentNullException(nameof(messages));
-
+
var sb = new StringBuilder();
foreach (var item in messages)
diff --git a/src/Providers/Amazon.Bedrock/src/LangChain.Providers.Amazon.Bedrock.csproj b/src/Providers/Amazon.Bedrock/src/LangChain.Providers.Amazon.Bedrock.csproj
index 642178a0..74ec4e8c 100644
--- a/src/Providers/Amazon.Bedrock/src/LangChain.Providers.Amazon.Bedrock.csproj
+++ b/src/Providers/Amazon.Bedrock/src/LangChain.Providers.Amazon.Bedrock.csproj
@@ -22,7 +22,7 @@
- AWS Bedrock model provider.
+ Amazon Bedrock model provider.
$(PackageTags);aws;amazon;bedrock;api
diff --git a/src/Providers/Amazon.Bedrock/src/TextToImage/AmazonTitanImageGenerationModel.cs b/src/Providers/Amazon.Bedrock/src/TextToImage/AmazonTitanImageGenerationModel.cs
index b04a4a6a..7b6b6bf7 100644
--- a/src/Providers/Amazon.Bedrock/src/TextToImage/AmazonTitanImageGenerationModel.cs
+++ b/src/Providers/Amazon.Bedrock/src/TextToImage/AmazonTitanImageGenerationModel.cs
@@ -1,4 +1,5 @@
using System.Diagnostics;
+using System.Net.Mime;
using System.Text.Json.Nodes;
using LangChain.Providers.Amazon.Bedrock.Internal;
@@ -23,7 +24,7 @@ public async Task GenerateImageAsync(
requestSettings: settings,
modelSettings: Settings,
providerSettings: provider.TextToImageSettings);
- var response = await provider.Api.InvokeModelAsync(
+ var response = await provider.Api.InvokeModelAsync(
Id,
new JsonObject
{
@@ -44,7 +45,7 @@ public async Task GenerateImageAsync(
},
cancellationToken).ConfigureAwait(false);
- var generatedText = response?["images"]?[0]?.GetValue() ?? "";
+ var images = response.Images.Select(image => Data.FromBase64(image)).ToList();
var usage = Usage.Empty with
{
@@ -55,7 +56,7 @@ public async Task GenerateImageAsync(
return new TextToImageResponse
{
- Images = [Data.FromBase64(generatedText)],
+ Images = images,
UsedSettings = TextToImageSettings.Default,
Usage = usage,
};
diff --git a/src/Providers/Amazon.Bedrock/src/TextToImage/AmazonTitanTextToImageResponse.cs b/src/Providers/Amazon.Bedrock/src/TextToImage/AmazonTitanTextToImageResponse.cs
new file mode 100644
index 00000000..4f732b85
--- /dev/null
+++ b/src/Providers/Amazon.Bedrock/src/TextToImage/AmazonTitanTextToImageResponse.cs
@@ -0,0 +1,9 @@
+using System.Text.Json.Serialization;
+
+namespace LangChain.Providers.Amazon.Bedrock;
+
+public class AmazonTitanTextToImageResponse
+{
+ [JsonPropertyName("images")]
+ public IList Images { get; set; }
+}
\ No newline at end of file
diff --git a/src/Providers/Amazon.Bedrock/test/BedrockTests.cs b/src/Providers/Amazon.Bedrock/test/BedrockTests.cs
index f3112f3a..a8a9634f 100644
--- a/src/Providers/Amazon.Bedrock/test/BedrockTests.cs
+++ b/src/Providers/Amazon.Bedrock/test/BedrockTests.cs
@@ -22,7 +22,6 @@ namespace LangChain.Providers.Amazon.Bedrock.Tests;
[TestFixture, Explicit]
public class BedrockTests
{
-
[Test]
public async Task Chains()
{
@@ -217,10 +216,11 @@ public async Task CanGetImage()
{
var provider = new BedrockProvider();
var model = new TitanImageGeneratorV1Model(provider);
- var response = await model.GenerateImageAsync(
- "create a picture of the solar system");
+ var response = await model.GenerateImageAsync("create a picture of the solar system");
var path = Path.Combine(Path.GetTempPath(), "solar_system.png");
+ Data image = response.Images[0];
+ var images = response.Images.Select(x => x.ToByteArray()).ToList();
await File.WriteAllBytesAsync(path, response.Images[0].ToByteArray());
@@ -261,7 +261,7 @@ you are a comic book writer. you will be given a question and you will answer i
if (useChatSettings)
{
- var response = await llm.GenerateAsync(prompt, new BedrockChatSettings { UseStreaming = useStreaming});
+ var response = await llm.GenerateAsync(prompt, new BedrockChatSettings { UseStreaming = useStreaming });
response.LastMessageContent.Should().NotBeNull();
}
else
diff --git a/src/Providers/Amazon.Bedrock/test/EmbeddedResource.cs b/src/Providers/Amazon.Bedrock/test/EmbeddedResource.cs
new file mode 100644
index 00000000..fa8cfa4a
--- /dev/null
+++ b/src/Providers/Amazon.Bedrock/test/EmbeddedResource.cs
@@ -0,0 +1,67 @@
+// Copyright (c) Microsoft. All rights reserved.
+
+using System;
+using System.Configuration;
+using System.IO;
+using System.Reflection;
+using System.Threading.Tasks;
+
+namespace Resources;
+
+///
+/// Resource helper to load resources embedded in the assembly. By default we embed only
+/// text files, so the helper is limited to returning text.
+///
+/// You can find information about embedded resources here:
+/// * https://learn.microsoft.com/dotnet/core/extensions/create-resource-files
+/// * https://learn.microsoft.com/dotnet/api/system.reflection.assembly.getmanifestresourcestream?view=net-7.0
+///
+/// To know which resources are embedded, check the csproj file.
+///
+internal static class EmbeddedResource
+{
+ private static readonly string? s_namespace = typeof(EmbeddedResource).Namespace;
+
+ internal static string Read(string fileName)
+ {
+ // Get the current assembly. Note: this class is in the same assembly where the embedded resources are stored.
+ Assembly assembly =
+ typeof(EmbeddedResource).GetTypeInfo().Assembly ??
+ throw new ConfigurationException($"[{s_namespace}] {fileName} assembly not found");
+
+ // Resources are mapped like types, using the namespace and appending "." (dot) and the file name
+ var resourceName = $"{s_namespace}." + fileName;
+ using Stream resource =
+ assembly.GetManifestResourceStream(resourceName) ??
+ throw new ConfigurationException($"{resourceName} resource not found");
+
+ // Return the resource content, in text format.
+ using var reader = new StreamReader(resource);
+ return reader.ReadToEnd();
+ }
+
+ internal static Stream? ReadStream(string fileName)
+ {
+ // Get the current assembly. Note: this class is in the same assembly where the embedded resources are stored.
+ Assembly assembly =
+ typeof(EmbeddedResource).GetTypeInfo().Assembly ??
+ throw new ConfigurationException($"[{s_namespace}] {fileName} assembly not found");
+
+ // Resources are mapped like types, using the namespace and appending "." (dot) and the file name
+ var resourceName = $"{s_namespace}." + fileName;
+ return assembly.GetManifestResourceStream(resourceName);
+ }
+
+ internal async static Task> ReadAllAsync(string fileName)
+ {
+ await using Stream? resourceStream = ReadStream(fileName);
+ using var memoryStream = new MemoryStream();
+
+ // Copy the resource stream to the memory stream
+ await resourceStream!.CopyToAsync(memoryStream);
+
+ // Convert the memory stream's buffer to ReadOnlyMemory
+ // Note: ToArray() creates a copy of the buffer, which is fine for converting to ReadOnlyMemory
+ return new ReadOnlyMemory(memoryStream.ToArray());
+ }
+}
diff --git a/src/Providers/Amazon.Bedrock/test/LangChain.Providers.Amazon.Bedrock.Tests.csproj b/src/Providers/Amazon.Bedrock/test/LangChain.Providers.Amazon.Bedrock.Tests.csproj
index 971432e9..a76eaac8 100644
--- a/src/Providers/Amazon.Bedrock/test/LangChain.Providers.Amazon.Bedrock.Tests.csproj
+++ b/src/Providers/Amazon.Bedrock/test/LangChain.Providers.Amazon.Bedrock.Tests.csproj
@@ -7,6 +7,8 @@
+
+
@@ -14,6 +16,7 @@
+
diff --git a/src/Providers/HuggingFace/src/HuggingFaceImageToTextModel.cs b/src/Providers/HuggingFace/src/HuggingFaceImageToTextModel.cs
new file mode 100644
index 00000000..00edd2fd
--- /dev/null
+++ b/src/Providers/HuggingFace/src/HuggingFaceImageToTextModel.cs
@@ -0,0 +1,65 @@
+using System.Diagnostics;
+using System.Text.Json;
+
+namespace LangChain.Providers.HuggingFace;
+
+///
+///
+///
+public class HuggingFaceImageToTextModel(
+ HuggingFaceProvider provider,
+ string id)
+ : ImageToTextModel(id), IImageToTextModel
+{
+ public override async Task GenerateTextFromImageAsync(ImageToTextRequest request, ImageToTextSettings? settings = default,
+ CancellationToken cancellationToken = default)
+ {
+ request = request ?? throw new ArgumentNullException(nameof(request));
+
+ var watch = Stopwatch.StartNew();
+
+ var usedSettings = ImageToTextSettings.Calculate(
+ requestSettings: settings,
+ modelSettings: Settings,
+ providerSettings: provider.ImageToTextSettings);
+
+ var imageContent = new ByteArrayContent(request.Image.ToArray());
+ if (request.Image.MediaType != null) imageContent.Headers.ContentType = new(request.Image.MediaType);
+
+ var httpRequest = new HttpRequestMessage(HttpMethod.Post, usedSettings.Endpoint + id)
+ {
+ Content = imageContent
+ };
+
+ var response = await provider.HttpClient.SendAsync(httpRequest, HttpCompletionOption.ResponseContentRead, cancellationToken).ConfigureAwait(false);
+ var body = await response.Content.ReadAsStringAsync().ConfigureAwait(false);
+ var deserializeResponse = DeserializeResponse(body);
+
+ var usage = Usage.Empty with
+ {
+ Time = watch.Elapsed,
+ };
+ AddUsage(usage);
+ provider.AddUsage(usage);
+
+ return new ImageToTextResponse
+ {
+ Text = deserializeResponse.SingleOrDefault()?.GeneratedText,
+ UsedSettings = usedSettings,
+ Usage = usage,
+ };
+ }
+
+ private static T DeserializeResponse(string body)
+ {
+ body = body ?? throw new ArgumentNullException(nameof(body));
+
+ T? deserializedResponse = JsonSerializer.Deserialize(body);
+ if (deserializedResponse is null)
+ {
+ throw new JsonException("Response is null");
+ }
+
+ return deserializedResponse;
+ }
+}
\ No newline at end of file