-
Notifications
You must be signed in to change notification settings - Fork 1
/
Program.cs
75 lines (61 loc) · 3.03 KB
/
Program.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text.Json;
using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;
using SixLabors.ImageSharp;
using SixLabors.ImageSharp.Processing;
using SixLabors.ImageSharp.PixelFormats;
class CLIP {
static void Main(string[] args) {
// Download the model weights if we don't have them in the current directory
if (!File.Exists("clip-image-vit-32-float32.onnx"))
{
WebClient webClient = new WebClient();
webClient.DownloadFile(
"https://huggingface.co/rocca/openai-clip-js/resolve/main/clip-image-vit-32-float32.onnx",
@"clip-image-vit-32-float32.onnx"
);
}
// Load the model
// Model sourced from: https://huggingface.co/rocca/openai-clip-js/tree/main
var clipModel = new InferenceSession("clip-image-vit-32-float32.onnx");
// Load an image specified as a command line argument
var image = Image.Load<Rgba32>(File.ReadAllBytes(args[0]));
// Calculate the shortest side, and use that to extract a square from the center
// Known in other image libraries as Centercrop
// AFAIK Centercrop is not available in Sixlabors.ImageSharp, so we do it manually
var smallestSide = Math.Min(image.Width, image.Height);
image.Mutate(x => x.Crop(
new Rectangle(
(image.Width - smallestSide) / 2,
(image.Height - smallestSide) / 2,
smallestSide,
smallestSide
)));
// Resize to 224 x 224 (bicubic resizing is the default)
image.Mutate(x => x.Resize(224, 224));
// Create a new array for 1 picture, 3 channels (RGB) and 224 pixels height and width
var inputTensor = new DenseTensor<float>(new[] {1, 3, 224, 224});
// Put all the pixels in the input tensor
for (var x = 0; x < 224; x++)
{
for (var y = 0; y < 224; y++)
{
// Normalize from bytes (0-255) to floats (constants borrowed from CLIP repository)
inputTensor[0, 0, y, x] = Convert.ToSingle((((float) image[x, y].R / 255) - 0.48145466) / 0.26862954);
inputTensor[0, 1, y, x] = Convert.ToSingle((((float) image[x, y].G / 255) - 0.4578275 ) / 0.26130258);
inputTensor[0, 2, y, x] = Convert.ToSingle((((float) image[x, y].B / 255) - 0.40821073) / 0.27577711);
}
}
// Prepare the inputs as a named ONNX variable, name should be "input"
var inputs = new List<NamedOnnxValue> {NamedOnnxValue.CreateFromTensor("input", inputTensor)};
// Run the model, and get the output back as an Array of floats
var outputData = clipModel.Run(inputs).ToList().Last().AsTensor<float>().ToArray();
// Write the array serialized as JSON
Console.WriteLine(JsonSerializer.Serialize(outputData));
}
}