-
-
Notifications
You must be signed in to change notification settings - Fork 97
Commit
…152) * fix: changed Titan's TextToImage to support images * ImageToText working. needs to get refactored and cleaned * feat: Added ImageToText abstractions and HuggingFace implementation. also added example to HF sample * fix: remove postgres tests from bedrock tests * feat: Added ImageToTextGenerationChain
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,29 @@ | ||
using LangChain.Providers.HuggingFace; | ||
using LangChain.Providers; | ||
using LangChain.Providers.HuggingFace; | ||
using LangChain.Providers.HuggingFace.Predefined; | ||
|
||
using var client = new HttpClient(); | ||
var provider = new HuggingFaceProvider(apiKey: string.Empty, client); | ||
var gpt2Model = new Gpt2Model(provider); | ||
|
||
var response = await gpt2Model.GenerateAsync("What would be a good company name be for name a company that makes colorful socks?"); | ||
var gp2ModelResponse = await gpt2Model.GenerateAsync("What would be a good company name be for name a company that makes colorful socks?"); | ||
|
||
Console.WriteLine(response); | ||
Console.WriteLine("### GP2 Response"); | ||
Console.WriteLine(gp2ModelResponse); | ||
|
||
const string imageToTextModel = "Salesforce/blip-image-captioning-base"; | ||
var model = new HuggingFaceImageToTextModel(provider, imageToTextModel); | ||
|
||
var path = Path.Combine(Path.GetTempPath(), "solar_system.png"); | ||
var imageData = await File.ReadAllBytesAsync(path); | ||
var binaryData = new BinaryData(imageData, "image/jpg"); | ||
|
||
var imageToTextResponse = await model.GenerateTextFromImageAsync(new ImageToTextRequest | ||
{ | ||
Image = binaryData | ||
}); | ||
|
||
Console.WriteLine("\n\n### ImageToText Response"); | ||
Console.WriteLine(imageToTextResponse.Text); | ||
|
||
Console.ReadLine(); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
using LangChain.Abstractions.Schema; | ||
using LangChain.Chains.HelperChains; | ||
using LangChain.Providers; | ||
|
||
namespace LangChain.Chains.StackableChains.ImageToTextGeneration; | ||
|
||
/// <summary> | ||
/// | ||
/// </summary> | ||
public class ImageToTextGenerationChain : BaseStackableChain | ||
{ | ||
private readonly IImageToTextModel _model; | ||
private readonly BinaryData _image; | ||
|
||
/// <summary> | ||
/// | ||
/// </summary> | ||
/// <param name="model"></param> | ||
/// <param name="image"></param> | ||
/// <param name="outputKey"></param> | ||
public ImageToTextGenerationChain( | ||
IImageToTextModel model, | ||
BinaryData image, | ||
string outputKey = "text") | ||
{ | ||
_model = model; | ||
_image = image; | ||
OutputKeys = new[] { outputKey }; | ||
} | ||
|
||
/// <inheritdoc /> | ||
protected override async Task<IChainValues> InternalCall(IChainValues values) | ||
{ | ||
values = values ?? throw new ArgumentNullException(nameof(values)); | ||
|
||
var text = await _model.GenerateTextFromImageAsync(new ImageToTextRequest { Image = _image }).ConfigureAwait(false); | ||
values.Value[OutputKeys[0]] = text; | ||
return values; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
namespace LangChain.Providers; | ||
|
||
/// <summary> | ||
/// Defines a large language model that can be used for image to text generation. | ||
/// </summary> | ||
public interface IImageToTextModel : IModel<ImageToTextSettings> | ||
Check warning on line 6 in src/Providers/Abstractions/src/ImageToText/IImageToTextModel.cs
|
||
{ | ||
/// <summary> | ||
/// Run the LLM on the given image. | ||
/// </summary> | ||
/// <param name="request"></param> | ||
/// <param name="settings"></param> | ||
/// <param name="cancellationToken"></param> | ||
/// <returns></returns> | ||
public Task<ImageToTextResponse> GenerateTextFromImageAsync( | ||
ImageToTextRequest request, | ||
ImageToTextSettings? settings = null, | ||
CancellationToken cancellationToken = default); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
namespace LangChain.Providers; | ||
|
||
/// <summary> | ||
/// Defines a large language model that can be used for image to text generation. | ||
/// </summary> | ||
public interface IImageToTextModel<in TRequest, TResponse, in TSettings> : IImageToTextModel | ||
Check warning on line 6 in src/Providers/Abstractions/src/ImageToText/IImageToTextModel`2.cs
|
||
{ | ||
/// <summary> | ||
/// Run the LLM on the image. | ||
/// </summary> | ||
/// <param name="request"></param> | ||
/// <param name="settings"></param> | ||
/// <param name="cancellationToken"></param> | ||
/// <returns></returns> | ||
public Task<TResponse> GenerateTextFromImageAsync( | ||
TRequest request, | ||
TSettings? settings = default, | ||
CancellationToken cancellationToken = default); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
using System.Text.Json.Serialization; | ||
|
||
namespace LangChain.Providers; | ||
|
||
public class ImageToTextGenerationResponse : List<ImageToTextGenerationResponse.GeneratedTextItem> | ||
Check warning on line 5 in src/Providers/Abstractions/src/ImageToText/ImageToTextGenerationResponse.cs
|
||
{ | ||
public sealed class GeneratedTextItem | ||
Check warning on line 7 in src/Providers/Abstractions/src/ImageToText/ImageToTextGenerationResponse.cs
|
||
{ | ||
/// <summary> | ||
/// The continuated string | ||
/// </summary> | ||
[JsonPropertyName("generated_text")] | ||
public string? GeneratedText { get; set; } | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
// ReSharper disable once CheckNamespace | ||
namespace LangChain.Providers; | ||
|
||
public abstract class ImageToTextModel(string id) : Model<ImageToTextSettings>(id), IImageToTextModel<ImageToTextRequest, ImageToTextResponse, ImageToTextSettings> | ||
Check warning on line 4 in src/Providers/Abstractions/src/ImageToText/ImageToTextModel.cs
|
||
{ | ||
public abstract Task<ImageToTextResponse> GenerateTextFromImageAsync( | ||
ImageToTextRequest request, | ||
ImageToTextSettings? settings = default, | ||
CancellationToken cancellationToken = default); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
// ReSharper disable once CheckNamespace | ||
namespace LangChain.Providers; | ||
|
||
/// <summary> | ||
/// Base class for image to text requests. | ||
/// </summary> | ||
public class ImageToTextRequest | ||
Check warning on line 7 in src/Providers/Abstractions/src/ImageToText/ImageToTextRequest.cs
|
||
{ | ||
/// <summary> | ||
/// Image to upload. | ||
/// </summary> | ||
public required BinaryData Image { get; init; } | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
// ReSharper disable once CheckNamespace | ||
// ReSharper disable ConditionalAccessQualifierIsNonNullableAccordingToAPIContract | ||
namespace LangChain.Providers; | ||
|
||
#pragma warning disable CA2225 | ||
|
||
/// <summary> | ||
/// | ||
/// </summary> | ||
public class ImageToTextResponse | ||
{ | ||
/// <summary> | ||
/// | ||
/// </summary> | ||
public required ImageToTextSettings UsedSettings { get; init; } | ||
|
||
/// <summary> | ||
/// | ||
/// </summary> | ||
public Usage Usage { get; init; } = Usage.Empty; | ||
|
||
|
||
/// <summary> | ||
/// Generated text | ||
/// </summary> | ||
public string? Text { get; set; } | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
// ReSharper disable once CheckNamespace | ||
namespace LangChain.Providers; | ||
|
||
/// <summary> | ||
/// Base class for image to text request settings. | ||
/// </summary> | ||
public class ImageToTextSettings | ||
{ | ||
public static ImageToTextSettings Default { get; } = new() | ||
{ | ||
User = string.Empty, | ||
Endpoint = "https://api-inference.huggingface.co/models/" | ||
}; | ||
|
||
/// <summary> | ||
/// Unique user identifier. | ||
/// </summary> | ||
public string? User { get; init; } | ||
|
||
/// <summary> | ||
/// Endpoint url for api. | ||
/// </summary> | ||
public string Endpoint { get; set; } | ||
Check warning on line 23 in src/Providers/Abstractions/src/ImageToText/ImageToTextSettings.cs
|
||
|
||
|
||
/// <summary> | ||
/// Calculate the settings to use for the request. | ||
/// </summary> | ||
/// <param name="requestSettings"></param> | ||
/// <param name="modelSettings"></param> | ||
/// <param name="providerSettings"></param> | ||
/// <returns></returns> | ||
/// <exception cref="InvalidOperationException"></exception> | ||
public static ImageToTextSettings Calculate( | ||
ImageToTextSettings? requestSettings, | ||
ImageToTextSettings? modelSettings, | ||
ImageToTextSettings? providerSettings) | ||
{ | ||
return new ImageToTextSettings | ||
{ | ||
User = | ||
requestSettings?.User ?? | ||
modelSettings?.User ?? | ||
providerSettings?.User ?? | ||
Default.User ?? | ||
throw new InvalidOperationException("Default User is not set."), | ||
Endpoint = | ||
requestSettings?.Endpoint ?? | ||
modelSettings?.Endpoint ?? | ||
providerSettings?.Endpoint ?? | ||
Default.Endpoint ?? | ||
throw new InvalidOperationException("Default Endpoint is not set."), | ||
}; | ||
} | ||
} |