Skip to content

Commit 2dbcc42

Browse files
authored
Merge pull request #97 from unum-cloud/main-dev
Apple Neural Engine optimizations
2 parents 2c15cec + 00c92f2 commit 2dbcc42

File tree

2 files changed

+64
-18
lines changed

2 files changed

+64
-18
lines changed

swift/Encoders.swift

Lines changed: 36 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -94,20 +94,26 @@ func readConfig(fromPath path: String) throws -> [String: Any] {
9494
}
9595

9696
/// Compiles and loads a machine learning model from a URL.
97-
/// - Parameter modelURL: The URL where the model package is located.
97+
/// - Parameters:
98+
/// - modelURL: The URL where the model package is located.
99+
/// - computeUnits: The hardware devices to use for model computation.
98100
/// - Returns: An instance of `MLModel`.
99-
func readModel(fromURL modelURL: URL) throws -> MLModel {
101+
func readModel(fromURL modelURL: URL, computeUnits: MLComputeUnits = .all) throws -> MLModel {
100102
let compiledModelURL = try MLModel.compileModel(at: modelURL)
101-
return try MLModel(contentsOf: compiledModelURL)
103+
let config = MLModelConfiguration()
104+
config.computeUnits = computeUnits
105+
return try MLModel(contentsOf: compiledModelURL, configuration: config)
102106
}
103107

104108
/// Loads a machine learning model from a local file path.
105-
/// - Parameter path: The file path where the model file is located.
109+
/// - Parameters:
110+
/// - path: The file path where the model file is located.
111+
/// - computeUnits: The hardware devices to use for model computation.
106112
/// - Returns: An instance of `MLModel`.
107-
func readModel(fromPath path: String) throws -> MLModel {
113+
func readModel(fromPath path: String, computeUnits: MLComputeUnits = .all) throws -> MLModel {
108114
let absPath = path.hasPrefix("/") ? path : FileManager.default.currentDirectoryPath + "/" + path
109115
let modelURL = URL(fileURLWithPath: absPath, isDirectory: true)
110-
return try readModel(fromURL: modelURL)
116+
return try readModel(fromURL: modelURL, computeUnits: computeUnits)
111117
}
112118

113119
/// Encodes text input into embeddings using a machine learning model.
@@ -120,10 +126,16 @@ public class TextEncoder {
120126
/// - modelPath: The path to the directory containing the machine learning model.
121127
/// - configPath: Optional. The path to the configuration file. Defaults to config.json in the model directory.
122128
/// - tokenizerPath: Optional. The path to the tokenizer file. Defaults to tokenizer.json in the model directory.
123-
public init(modelPath: String, configPath: String? = nil, tokenizerPath: String? = nil) throws {
129+
/// - computeUnits: The hardware devices to use for model computation. Use `.cpuAndNeuralEngine` for best performance.
130+
public init(
131+
modelPath: String,
132+
configPath: String? = nil,
133+
tokenizerPath: String? = nil,
134+
computeUnits: MLComputeUnits = .all
135+
) throws {
124136
let finalConfigPath = configPath ?? modelPath + "/config.json"
125137
let finalTokenizerPath = tokenizerPath ?? modelPath + "/tokenizer.json"
126-
self.model = try readModel(fromPath: modelPath)
138+
self.model = try readModel(fromPath: modelPath, computeUnits: computeUnits)
127139
self.processor = try TextProcessor(
128140
configPath: finalConfigPath,
129141
tokenizerPath: finalTokenizerPath,
@@ -135,16 +147,20 @@ public class TextEncoder {
135147
/// - Parameters:
136148
/// - modelName: The identifier for the model repository.
137149
/// - hubApi: The API object to interact with the model hub. Defaults to a shared instance.
138-
public init(modelName: String, hubApi: HubApi = .shared) async throws {
150+
/// - computeUnits: The hardware devices to use for model computation. Use `.cpuAndNeuralEngine` for best performance.
151+
public init(modelName: String, hubApi: HubApi = .shared, computeUnits: MLComputeUnits = .all) async throws {
139152
let repo = Hub.Repo(id: modelName)
153+
let encoderMask =
154+
computeUnits == .cpuAndNeuralEngine ? "text_encoder_neural.mlpackage" : "text_encoder.mlpackage"
140155
let modelURL = try await hubApi.snapshot(
141156
from: repo,
142-
matching: ["text_encoder.mlpackage/*", "config.json", "tokenizer.json"]
157+
matching: ["\(encoderMask)/*", "config.json", "tokenizer.json"]
143158
)
144159
let configPath = modelURL.appendingPathComponent("config.json").path
145160
let tokenizerPath = modelURL.appendingPathComponent("tokenizer.json").path
146161
self.model = try readModel(
147-
fromURL: modelURL.appendingPathComponent("text_encoder.mlpackage", isDirectory: true)
162+
fromURL: modelURL.appendingPathComponent(encoderMask, isDirectory: true),
163+
computeUnits: computeUnits
148164
)
149165
self.processor = try TextProcessor(configPath: configPath, tokenizerPath: tokenizerPath, model: self.model)
150166
}
@@ -174,22 +190,26 @@ public class ImageEncoder {
174190
/// - Parameters:
175191
/// - modelPath: The path to the directory containing the machine learning model.
176192
/// - configPath: Optional. The path to the configuration file. Defaults to config.json in the model directory.
177-
public init(modelPath: String, configPath: String? = nil) throws {
193+
public init(modelPath: String, configPath: String? = nil, computeUnits: MLComputeUnits = .all) throws {
178194
let finalConfigPath = configPath ?? modelPath + "/config.json"
179-
self.model = try readModel(fromPath: modelPath)
195+
self.model = try readModel(fromPath: modelPath, computeUnits: computeUnits)
180196
self.processor = try ImageProcessor(configPath: finalConfigPath)
181197
}
182198

183199
/// Initializes an `ImageEncoder` using a model name and an API for fetching models.
184200
/// - Parameters:
185201
/// - modelName: The identifier for the model repository.
186202
/// - hubApi: The API object to interact with the model hub. Defaults to a shared instance.
187-
public init(modelName: String, hubApi: HubApi = .shared) async throws {
203+
/// - computeUnits: The hardware devices to use for model computation. Use `.cpuAndNeuralEngine` for best performance.
204+
public init(modelName: String, hubApi: HubApi = .shared, computeUnits: MLComputeUnits = .all) async throws {
188205
let repo = Hub.Repo(id: modelName)
189-
let modelURL = try await hubApi.snapshot(from: repo, matching: ["image_encoder.mlpackage/*", "config.json"])
206+
let encoderMask =
207+
computeUnits == .cpuAndNeuralEngine ? "image_encoder_neural.mlpackage" : "image_encoder.mlpackage"
208+
let modelURL = try await hubApi.snapshot(from: repo, matching: ["\(encoderMask)/*", "config.json"])
190209
let configPath = modelURL.appendingPathComponent("config.json").path
191210
self.model = try readModel(
192-
fromURL: modelURL.appendingPathComponent("image_encoder.mlpackage", isDirectory: true)
211+
fromURL: modelURL.appendingPathComponent(encoderMask, isDirectory: true),
212+
computeUnits: computeUnits
193213
)
194214
self.processor = try ImageProcessor(configPath: configPath)
195215
}

swift/README.md

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,10 @@ import UForm
1919
### Text Embeddings
2020

2121
```swift
22-
let textModel = try await TextEncoder(modelName: "unum-cloud/uform3-image-text-english-small")
22+
let textModel = try await TextEncoder(
23+
modelName: "unum-cloud/uform3-image-text-english-small",
24+
computeUnits: .cpuAndNeuralEngine
25+
)
2326
let text = "A group of friends enjoy a barbecue on a sandy beach, with one person grilling over a large black grill, while the other sits nearby, laughing and enjoying the camaraderie."
2427
let textEmbedding: Embedding = try textModel.encode(text)
2528
let textVector: [Float32] = textEmbedding.asFloats()
@@ -28,7 +31,10 @@ let textVector: [Float32] = textEmbedding.asFloats()
2831
### Image Embeddings
2932

3033
```swift
31-
let imageModel = try await ImageEncoder(modelName: "unum-cloud/uform3-image-text-english-small")
34+
let imageModel = try await ImageEncoder(
35+
modelName: "unum-cloud/uform3-image-text-english-small",
36+
computeUnits: .cpuAndNeuralEngine
37+
)
3238
let imageURL = "https://github.com/ashvardanian/ashvardanian/blob/master/demos/bbq-on-beach.jpg?raw=true"
3339
guard let url = URL(string: imageURL),
3440
let imageSource = CGImageSourceCreateWithURL(url as CFURL, nil),
@@ -40,6 +46,26 @@ var imageEmbedding: Embedding = try imageModel.encode(cgImage)
4046
var imageVector: [Float32] = embedding.asFloats()
4147
```
4248

49+
### Choosing Target Device
50+
51+
Apple chips provide several functional units capable of high-throughput matrix multiplication and AI inference.
52+
Those `computeUnits` include the CPU, GPU, and Neural Engine.
53+
For maximum compatibility, the `.all` option is used by default.
54+
Sadly, Apple's scheduler is not always optimal, and it might be beneficial to specify the target device explicitly, especially if the models are pre-compiled for the Apple Neural Engine, as it may yield significant performance gains.
55+
56+
| Model | GPU Text E. | ANE Text E. | GPU Image E. | ANE Image E. |
57+
| :------------------ | ----------: | ----------: | -----------: | -----------: |
58+
| `english-small` | 2.53 ms | 0.53 ms | 6.57 ms | 1.23 ms |
59+
| `english-base` | 2.54 ms | 0.61 ms | 18.90 ms | 3.79 ms |
60+
| `english-large` | 2.30 ms | 0.61 ms | 79.68 ms | 20.94 ms |
61+
| `multilingual-base` | 2.34 ms | 0.50 ms | 18.98 ms | 3.77 ms |
62+
63+
> On Apple M4 iPad, running iOS 18.2.
64+
> Batch size is 1, and the model is pre-loaded into memory.
65+
> The original encoders use `f32` single-precision numbers for maximum compatibility, and mostly rely on __GPU__ for computation.
66+
> The quantized encoders use a mixture of `i8`, `f16`, and `f32` numbers for maximum performance, and mostly rely on the Apple Neural Engine (__ANE__) for computation.
67+
> The median latency is reported.
68+
4369
### Computing Distances
4470

4571
There are several ways to compute distances between embeddings, once you have them.

0 commit comments

Comments
 (0)