Skip to content

Commit

Permalink
[WebNN EP] Enable IO Bindings with MLTensor (microsoft#21301)
Browse files Browse the repository at this point in the history
### Description
Enables using the MLTensor to pass data between models. 


### Motivation and Context
Using MLTensor instead of ArrayBuffers reduces the number of copies
between the CPU and devices as well as the renderer and GPU process in
Chromium.
  • Loading branch information
egalli authored and Ishwar Raut committed Nov 19, 2024
1 parent 8763941 commit f15202a
Show file tree
Hide file tree
Showing 33 changed files with 1,287 additions and 73 deletions.
1 change: 1 addition & 0 deletions include/onnxruntime/core/framework/allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ constexpr const char* OpenVINO_GPU = "OpenVINO_GPU";
constexpr const char* OpenVINO_RT = "OpenVINO_RT";
constexpr const char* OpenVINO_RT_NPU = "OpenVINO_RT_NPU";
constexpr const char* WEBGPU_BUFFER = "WebGPU_Buffer";
constexpr const char* WEBNN_TENSOR = "WebNN_Tensor";

constexpr size_t kAllocAlignment = 256;

Expand Down
12 changes: 12 additions & 0 deletions js/common/lib/tensor-factory-impl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import {
TensorFromImageBitmapOptions,
TensorFromImageDataOptions,
TensorFromImageElementOptions,
TensorFromMLTensorOptions,
TensorFromTextureOptions,
TensorFromUrlOptions,
} from './tensor-factory.js';
Expand Down Expand Up @@ -310,6 +311,17 @@ export const tensorFromGpuBuffer = <T extends TensorInterface.GpuBufferDataTypes
return new Tensor({ location: 'gpu-buffer', type: dataType ?? 'float32', gpuBuffer, dims, download, dispose });
};

/**
* implementation of Tensor.fromMLTensor().
*/
export const tensorFromMLTensor = <T extends TensorInterface.MLTensorDataTypes>(
mlTensor: TensorInterface.MLTensorType,
options: TensorFromMLTensorOptions<T>,
): Tensor => {
const { dataType, dims, download, dispose } = options;
return new Tensor({ location: 'ml-tensor', type: dataType ?? 'float32', mlTensor, dims, download, dispose });
};

/**
* implementation of Tensor.fromPinnedBuffer().
*/
Expand Down
46 changes: 46 additions & 0 deletions js/common/lib/tensor-factory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,20 @@ export interface GpuBufferConstructorParameters<T extends Tensor.GpuBufferDataTy
readonly gpuBuffer: Tensor.GpuBufferType;
}

export interface MLTensorConstructorParameters<T extends Tensor.MLTensorDataTypes = Tensor.MLTensorDataTypes>
extends CommonConstructorParameters<T>,
GpuResourceConstructorParameters<T> {
/**
* Specify the location of the data to be 'ml-tensor'.
*/
readonly location: 'ml-tensor';

/**
* Specify the WebNN MLTensor that holds the tensor data.
*/
readonly mlTensor: Tensor.MLTensorType;
}

// #endregion

// the following region contains type definitions of each individual options.
Expand Down Expand Up @@ -219,6 +233,15 @@ export interface TensorFromGpuBufferOptions<T extends Tensor.GpuBufferDataTypes>
dataType?: T;
}

export interface TensorFromMLTensorOptions<T extends Tensor.MLTensorDataTypes>
extends Pick<Tensor, 'dims'>,
GpuResourceConstructorParameters<T> {
/**
* Describes the data type of the tensor.
*/
dataType?: T;
}

// #endregion

/**
Expand Down Expand Up @@ -336,6 +359,29 @@ export interface TensorFactory {
options: TensorFromGpuBufferOptions<T>,
): TypedTensor<T>;

/**
* create a tensor from a WebNN MLTensor
*
* @param tensor - the MLTensor object to create tensor from
* @param options - An optional object representing options for creating tensor from a WebNN MLTensor.
*
* The options include following properties:
* - `dataType`: the data type of the tensor. If omitted, assume 'float32'.
* - `dims`: the dimension of the tensor. Required.
* - `download`: an optional function to download the tensor data from the MLTensor to CPU. If omitted, the MLTensor
* data will not be able to download. Usually, this is provided by the WebNN backend for the inference outputs.
* Users don't need to provide this function.
* - `dispose`: an optional function to dispose the tensor data on the WebNN MLTensor. If omitted, the MLTensor will
* not be disposed. Usually, this is provided by the WebNN backend for the inference outputs. Users don't need to
* provide this function.
*
* @returns a tensor object
*/
fromMLTensor<T extends Tensor.MLTensorDataTypes>(
tensor: Tensor.MLTensorType,
options: TensorFromMLTensorOptions<T>,
): TypedTensor<T>;

/**
* create a tensor from a pre-allocated buffer. The buffer will be used as a pinned buffer.
*
Expand Down
59 changes: 57 additions & 2 deletions js/common/lib/tensor-impl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,19 @@ import { TensorToDataUrlOptions, TensorToImageDataOptions } from './tensor-conve
import {
tensorFromGpuBuffer,
tensorFromImage,
tensorFromMLTensor,
tensorFromPinnedBuffer,
tensorFromTexture,
} from './tensor-factory-impl.js';
import {
CpuPinnedConstructorParameters,
GpuBufferConstructorParameters,
MLTensorConstructorParameters,
TensorFromGpuBufferOptions,
TensorFromImageBitmapOptions,
TensorFromImageDataOptions,
TensorFromImageElementOptions,
TensorFromMLTensorOptions,
TensorFromTextureOptions,
TensorFromUrlOptions,
TextureConstructorParameters,
Expand All @@ -37,6 +40,7 @@ type TensorDataType = TensorInterface.DataType;
type TensorDataLocation = TensorInterface.DataLocation;
type TensorTextureType = TensorInterface.TextureType;
type TensorGpuBufferType = TensorInterface.GpuBufferType;
type TensorMLTensorType = TensorInterface.MLTensorType;

/**
* the implementation of Tensor interface.
Expand Down Expand Up @@ -86,6 +90,15 @@ export class Tensor implements TensorInterface {
*/
constructor(params: GpuBufferConstructorParameters);

/**
* Construct a new tensor object from the WebNN MLTensor with the given type and dims.
*
* Tensor's location will be set to 'ml-tensor'.
*
* @param params - Specify the parameters to construct the tensor.
*/
constructor(params: MLTensorConstructorParameters);

/**
* implementation.
*/
Expand All @@ -98,7 +111,8 @@ export class Tensor implements TensorInterface {
| readonly boolean[]
| CpuPinnedConstructorParameters
| TextureConstructorParameters
| GpuBufferConstructorParameters,
| GpuBufferConstructorParameters
| MLTensorConstructorParameters,
arg1?: TensorDataType | Uint8ClampedArray | readonly number[] | readonly string[] | readonly boolean[],
arg2?: readonly number[],
) {
Expand Down Expand Up @@ -155,6 +169,25 @@ export class Tensor implements TensorInterface {
this.disposer = arg0.dispose;
break;
}
case 'ml-tensor': {
if (
type !== 'float32' &&
type !== 'float16' &&
type !== 'int32' &&
type !== 'int64' &&
type !== 'uint32' &&
type !== 'uint64' &&
type !== 'int8' &&
type !== 'uint8' &&
type !== 'bool'
) {
throw new TypeError(`unsupported type "${type}" to create tensor from MLTensor`);
}
this.mlTensorData = arg0.mlTensor;
this.downloader = arg0.download;
this.disposer = arg0.dispose;
break;
}
default:
throw new Error(`Tensor constructor: unsupported location '${this.dataLocation}'`);
}
Expand Down Expand Up @@ -325,6 +358,13 @@ export class Tensor implements TensorInterface {
return tensorFromGpuBuffer(gpuBuffer, options);
}

static fromMLTensor<T extends TensorInterface.MLTensorDataTypes>(
mlTensor: TensorMLTensorType,
options: TensorFromMLTensorOptions<T>,
): TensorInterface {
return tensorFromMLTensor(mlTensor, options);
}

static fromPinnedBuffer<T extends TensorInterface.CpuPinnedDataTypes>(
type: T,
buffer: TensorInterface.DataTypeMap[T],
Expand Down Expand Up @@ -373,6 +413,11 @@ export class Tensor implements TensorInterface {
*/
private gpuBufferData?: TensorGpuBufferType;

/**
* stores the underlying WebNN MLTensor when location is 'ml-tensor'. otherwise empty.
*/
private mlTensorData?: TensorMLTensorType;

/**
* stores an optional downloader function to download data from GPU to CPU.
*/
Expand Down Expand Up @@ -420,6 +465,14 @@ export class Tensor implements TensorInterface {
}
return this.gpuBufferData;
}

get mlTensor(): TensorMLTensorType {
this.ensureValid();
if (!this.mlTensorData) {
throw new Error('The data is not stored as a WebNN MLTensor.');
}
return this.mlTensorData;
}
// #endregion

// #region methods
Expand All @@ -431,7 +484,8 @@ export class Tensor implements TensorInterface {
case 'cpu-pinned':
return this.data;
case 'texture':
case 'gpu-buffer': {
case 'gpu-buffer':
case 'ml-tensor': {
if (!this.downloader) {
throw new Error('The current tensor is not created with a specified data downloader.');
}
Expand Down Expand Up @@ -472,6 +526,7 @@ export class Tensor implements TensorInterface {
this.cpuData = undefined;
this.gpuTextureData = undefined;
this.gpuBufferData = undefined;
this.mlTensorData = undefined;
this.downloader = undefined;
this.isDownloading = undefined;

Expand Down
8 changes: 8 additions & 0 deletions js/common/lib/tensor-utils-impl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import {
CpuPinnedConstructorParameters,
GpuBufferConstructorParameters,
MLTensorConstructorParameters,
TextureConstructorParameters,
} from './tensor-factory.js';
import { Tensor } from './tensor-impl.js';
Expand Down Expand Up @@ -56,6 +57,13 @@ export const tensorReshape = (tensor: Tensor, dims: readonly number[]): Tensor =
type: tensor.type as GpuBufferConstructorParameters['type'],
dims,
});
case 'ml-tensor':
return new Tensor({
location: 'ml-tensor',
mlTensor: tensor.mlTensor,
type: tensor.type as MLTensorConstructorParameters['type'],
dims,
});
default:
throw new Error(`tensorReshape: tensor location ${tensor.location} is not supported`);
}
Expand Down
30 changes: 29 additions & 1 deletion js/common/lib/tensor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,13 @@ interface TypedTensorBase<T extends Tensor.Type> {
*/
readonly gpuBuffer: Tensor.GpuBufferType;

/**
* Get the WebNN MLTensor that holds the tensor data.
*
* If the data is not in a WebNN MLTensor, throw error.
*/
readonly mlTensor: Tensor.MLTensorType;

/**
* Get the buffer data of the tensor.
*
Expand Down Expand Up @@ -136,15 +143,36 @@ export declare namespace Tensor {
*/
export type GpuBufferType = { size: number; mapState: 'unmapped' | 'pending' | 'mapped' };

/**
* type alias for WebNN MLTensor
*
* The specification for WebNN's MLTensor is currently in flux.
*/
export type MLTensorType = unknown;

/**
* supported data types for constructing a tensor from a WebGPU buffer
*/
export type GpuBufferDataTypes = 'float32' | 'float16' | 'int32' | 'int64' | 'uint32' | 'uint8' | 'bool';

/**
* supported data types for constructing a tensor from a WebNN MLTensor
*/
export type MLTensorDataTypes =
| 'float32'
| 'float16'
| 'int8'
| 'uint8'
| 'int32'
| 'uint32'
| 'int64'
| 'uint64'
| 'bool';

/**
* represent where the tensor data is stored
*/
export type DataLocation = 'none' | 'cpu' | 'cpu-pinned' | 'texture' | 'gpu-buffer';
export type DataLocation = 'none' | 'cpu' | 'cpu-pinned' | 'texture' | 'gpu-buffer' | 'ml-tensor';

/**
* represent the data type of a tensor
Expand Down
Loading

0 comments on commit f15202a

Please sign in to comment.