diff --git a/js/common/lib/tensor-factory-impl.ts b/js/common/lib/tensor-factory-impl.ts index ad255999cb96c..16adbe408b403 100644 --- a/js/common/lib/tensor-factory-impl.ts +++ b/js/common/lib/tensor-factory-impl.ts @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -import {OptionsDimensions, OptionsFormat, OptionsNormalizationParameters, OptionsTensorFormat, OptionsTensorLayout, TensorFromGpuBufferOptions, TensorFromImageBitmapOptions, TensorFromImageDataOptions, TensorFromImageElementOptions, TensorFromTextureOptions, TensorFromUrlOptions} from './tensor-factory.js'; +import {OptionsDimensions, OptionsFormat, OptionsNormalizationParameters, OptionsTensorFormat, OptionsTensorLayout, TensorFromGpuBufferOptions, TensorFromImageBitmapOptions, TensorFromImageDataOptions, TensorFromImageElementOptions, TensorFromMLBufferOptions, TensorFromTextureOptions, TensorFromUrlOptions} from './tensor-factory.js'; import {Tensor} from './tensor-impl.js'; import {Tensor as TensorInterface} from './tensor.js'; @@ -277,8 +277,8 @@ export const tensorFromGpuBuffer = ( - mlBuffer: TensorInterface.MLBufferType, options: TensorFromGpuBufferOptions): Tensor => { +export const tensorFromMLBuffer = ( + mlBuffer: TensorInterface.MLBufferType, options: TensorFromMLBufferOptions): Tensor => { const {dataType, dims, download, dispose} = options; return new Tensor({location: 'ml-buffer', type: dataType ?? 'float32', mlBuffer, dims, download, dispose}); }; diff --git a/js/common/lib/tensor-impl.ts b/js/common/lib/tensor-impl.ts index a0a03074e83fb..2b80362798033 100644 --- a/js/common/lib/tensor-impl.ts +++ b/js/common/lib/tensor-impl.ts @@ -4,7 +4,7 @@ import {tensorToDataURL, tensorToImageData} from './tensor-conversion-impl.js'; import {TensorToDataUrlOptions, TensorToImageDataOptions} from './tensor-conversion.js'; import {tensorFromGpuBuffer, tensorFromImage, tensorFromMLBuffer, tensorFromPinnedBuffer, tensorFromTexture} from './tensor-factory-impl.js'; -import {CpuPinnedConstructorParameters, GpuBufferConstructorParameters, MLBufferConstructorParameters, TensorFromGpuBufferOptions, TensorFromImageBitmapOptions, TensorFromImageDataOptions, TensorFromImageElementOptions, TensorFromTextureOptions, TensorFromUrlOptions, TextureConstructorParameters} from './tensor-factory.js'; +import {CpuPinnedConstructorParameters, GpuBufferConstructorParameters, MLBufferConstructorParameters, TensorFromGpuBufferOptions, TensorFromImageBitmapOptions, TensorFromImageDataOptions, TensorFromImageElementOptions, TensorFromMLBufferOptions, TensorFromTextureOptions, TensorFromUrlOptions, TextureConstructorParameters} from './tensor-factory.js'; import {checkTypedArray, NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP, NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP, SupportedTypedArray, SupportedTypedArrayConstructors} from './tensor-impl-type-mapping.js'; import {calculateSize, tensorReshape} from './tensor-utils-impl.js'; import {Tensor as TensorInterface} from './tensor.js'; @@ -273,8 +273,8 @@ export class Tensor implements TensorInterface { return tensorFromGpuBuffer(gpuBuffer, options); } - static fromMLBuffer( - mlBuffer: TensorMLBufferType, options: TensorFromGpuBufferOptions): TensorInterface { + static fromMLBuffer( + mlBuffer: TensorMLBufferType, options: TensorFromMLBufferOptions): TensorInterface { return tensorFromMLBuffer(mlBuffer, options); } diff --git a/js/web/lib/wasm/jsep/backend-webnn.ts b/js/web/lib/wasm/jsep/backend-webnn.ts index 6cad192479625..db8afcf3b775c 100644 --- a/js/web/lib/wasm/jsep/backend-webnn.ts +++ b/js/web/lib/wasm/jsep/backend-webnn.ts @@ -99,10 +99,6 @@ export class WebNNBackend { this.bufferManager.releaseBufferId(bufferId); } - public getBuffer(bufferId: BufferId): MLBuffer { - return this.bufferManager.getBuffer(bufferId); - } - public ensureBuffer(bufferId: BufferId, onnxDataType: number|MLOperandDataType, dimensions: number[]): MLBuffer { let dataType: MLOperandDataType; if (typeof onnxDataType === 'number') { @@ -129,7 +125,7 @@ export class WebNNBackend { return this.bufferManager.download(bufferId); } - public createMLBufferDownloader(bufferId: BufferId, type: Tensor.GpuBufferDataTypes): () => Promise { + public createMLBufferDownloader(bufferId: BufferId, type: Tensor.MLBufferDataTypes): () => Promise { return async () => { const data = await this.bufferManager.download(bufferId); return createView(data, type); diff --git a/js/web/lib/wasm/jsep/webnn/buffer-manager.ts b/js/web/lib/wasm/jsep/webnn/buffer-manager.ts index 6351ae184974d..7d13aa760504b 100644 --- a/js/web/lib/wasm/jsep/webnn/buffer-manager.ts +++ b/js/web/lib/wasm/jsep/webnn/buffer-manager.ts @@ -22,10 +22,6 @@ export interface BufferManager { * Release a BufferId. */ releaseBufferId(bufferId: BufferId): void; - /** - * Get MLBuffer by BufferId. - */ - getBuffer(bufferId: BufferId): MLBuffer; /** * Ensure a MLBuffer is created for the BufferId. */ @@ -155,16 +151,6 @@ class BufferManagerImpl implements BufferManager { } } - public getBuffer(bufferId: BufferId): MLBuffer { - if (!this.buffersById.has(bufferId)) { - throw new Error('BufferID not found.'); - } - if (!this.buffersById.get(bufferId)!.buffer) { - throw new Error('Buffer has not been created.'); - } - return this.buffersById.get(bufferId)!.buffer!; - } - public ensureBuffer(bufferId: BufferId, dataType: MLOperandDataType, dimensions: number[]): MLBuffer { const buffer = this.buffersById.get(bufferId); if (!buffer) { diff --git a/js/web/lib/wasm/session-handler-inference.ts b/js/web/lib/wasm/session-handler-inference.ts index cb8faa1675e51..42a0fac923037 100644 --- a/js/web/lib/wasm/session-handler-inference.ts +++ b/js/web/lib/wasm/session-handler-inference.ts @@ -37,7 +37,7 @@ export const decodeTensorMetadata = (tensor: TensorMetadata): Tensor => { case 'ml-buffer': { const dataType = tensor[0]; if (!isMLBufferSupportedType(dataType)) { - throw new Error(`not supported data type: ${dataType} for deserializing GPU tensor`); + throw new Error(`not supported data type: ${dataType} for deserializing MLBuffer tensor`); } const {mlBuffer, download, dispose} = tensor[2]; return Tensor.fromMLBuffer(mlBuffer, {dataType, dims: tensor[1], download, dispose}); diff --git a/js/web/lib/wasm/wasm-types.ts b/js/web/lib/wasm/wasm-types.ts index 783620b146364..ea237e7ac0391 100644 --- a/js/web/lib/wasm/wasm-types.ts +++ b/js/web/lib/wasm/wasm-types.ts @@ -160,14 +160,6 @@ export declare namespace JSEP { * @param bufferId - specify the MLBuffer ID. * @returns the MLBuffer. */ - jsepGetMLBuffer: (bufferId: number) => MLBuffer; - /** - * [exported from pre-jsep.js] Ensure MLBuffer has been created with the correct type and dimensions. - * @param bufferId - specify the MLBuffer ID. - * @param dataType - specify the data type. - * @param dimensions - specify the dimensions. - * @returns the MLBuffer. - */ jsepEnsureBuffer: (bufferId: number, dataType: number|MLOperandDataType, dimensions: number[]) => MLBuffer; /** * [exported from pre-jsep.js] Upload data to MLBuffer. diff --git a/js/web/script/test-runner-cli.ts b/js/web/script/test-runner-cli.ts index 6718dcb639a47..bf9518d7a49f2 100644 --- a/js/web/script/test-runner-cli.ts +++ b/js/web/script/test-runner-cli.ts @@ -359,7 +359,7 @@ async function main() { } let ioBinding: Test.IOBindingMode; - if (backend !== 'webgpu' && args.ioBindingMode !== 'none') { + if (!['webgpu', 'webnn'].includes(backend) && args.ioBindingMode !== 'none') { npmlog.warn( 'TestRunnerCli.Init.Model', `Ignoring IO Binding Mode "${args.ioBindingMode}" for backend "${backend}".`); ioBinding = 'none'; diff --git a/js/web/test/test-runner.ts b/js/web/test/test-runner.ts index dcf1330b2d481..9bbb6caf682db 100644 --- a/js/web/test/test-runner.ts +++ b/js/web/test/test-runner.ts @@ -204,6 +204,7 @@ export class ModelTestContext { readonly perfData: ModelTestContext.ModelTestPerfData, readonly ioBinding: Test.IOBindingMode, private readonly profile: boolean, + public readonly mlContext?: MLContext, ) {} /** @@ -254,7 +255,27 @@ export class ModelTestContext { const initStart = now(); const executionProviderConfig = - modelTest.backend === 'webnn' ? (testOptions?.webnnOptions || 'webnn') : modelTest.backend!; + modelTest.backend === 'webnn' ? (testOptions?.webnnOptions || {name: 'webnn'}) : modelTest.backend!; + let mlContext: MLContext|undefined; + console.log(executionProviderConfig); + console.log('ioBinding ', modelTest.ioBinding); + if (modelTest.ioBinding.includes('ml-tensor') || modelTest.ioBinding.includes('ml-location')) { + + const webnnOptions = executionProviderConfig as ort.InferenceSession.WebNNExecutionProviderOption; + const deviceType = (webnnOptions as ort.InferenceSession.WebNNContextOptions)?.deviceType; + const numThreads = (webnnOptions as ort.InferenceSession.WebNNContextOptions)?.numThreads; + const powerPreference = (webnnOptions as ort.InferenceSession.WebNNContextOptions)?.powerPreference; + + mlContext = await navigator.ml.createContext({ + deviceType, + numThreads, + powerPreference, + }); + (executionProviderConfig as ort.InferenceSession.WebNNExecutionProviderOption).context = mlContext; + if (!deviceType) { + (executionProviderConfig as ort.InferenceSession.WebNNContextOptions).deviceType = deviceType; + } + } const session = await initializeSession( modelTest.modelUrl, executionProviderConfig, modelTest.ioBinding, profile, modelTest.externalData, testOptions?.sessionOptions || {}, this.cache); @@ -271,6 +292,7 @@ export class ModelTestContext { {init: initEnd - initStart, firstRun: -1, runs: [], count: 0}, modelTest.ioBinding, profile, + mlContext, ); } finally { this.initializing = false; @@ -565,46 +587,34 @@ function createGpuTensorForOutput(type: ort.Tensor.Type, dims: readonly number[] }); } -const getContext = (() => { - let context: MLContext|undefined; - - return async(): Promise => { - if (!context) { - context = await navigator.ml.createContext(); - } - return context; - }; -})(); -async function createMlTensorForOutput(type: ort.Tensor.Type, dims: readonly number[]) { +async function createMLTensorForOutput(mlContext: MLContext, type: ort.Tensor.Type, dims: readonly number[]) { if (!isMLBufferSupportedType(type)) { - throw new Error(`createMlTensorForOutput can not work with ${type} tensor`); + throw new Error(`createMLTensorForOutput can not work with ${type} tensor`); } const dataType = type === 'bool' ? 'uint8' : type; - const context = await getContext(); - const mlBuffer = context.createBuffer({dataType, dimensions: dims as number[]}); + const mlBuffer = mlContext.createBuffer({dataType, dimensions: dims as number[]}); return ort.Tensor.fromMLBuffer(mlBuffer, { dataType: type, dims, dispose: () => mlBuffer.destroy(), download: async () => { - const arrayBuffer = await context.readBuffer(mlBuffer); - return createView(arrayBuffer, type) as ort.Tensor.DataTypeMap[ort.Tensor.GpuBufferDataTypes]; + const arrayBuffer = await mlContext.readBuffer(mlBuffer); + return createView(arrayBuffer, type) as ort.Tensor.DataTypeMap[ort.Tensor.MLBufferDataTypes]; } }); } -async function createMlTensorForInput(cpuTensor: ort.Tensor): Promise { +async function createMLTensorForInput(mlContext: MLContext, cpuTensor: ort.Tensor): Promise { if (!isMLBufferSupportedType(cpuTensor.type) || Array.isArray(cpuTensor.data)) { - throw new Error(`createMlTensorForInput can not work with ${cpuTensor.type} tensor`); + throw new Error(`createMLTensorForInput can not work with ${cpuTensor.type} tensor`); } - const context = await getContext(); const dataType = cpuTensor.type === 'bool' ? 'uint8' : cpuTensor.type; - const mlBuffer = context.createBuffer({dataType, dimensions: cpuTensor.dims as number[]}); - context.writeBuffer(mlBuffer, cpuTensor.data); + const mlBuffer = mlContext.createBuffer({dataType, dimensions: cpuTensor.dims as number[]}); + mlContext.writeBuffer(mlBuffer, cpuTensor.data); return ort.Tensor.fromMLBuffer( mlBuffer, {dataType: cpuTensor.type, dims: cpuTensor.dims, dispose: () => mlBuffer.destroy()}); } @@ -613,6 +623,7 @@ export async function sessionRun(options: { session: ort.InferenceSession; feeds: Record; outputsMetaInfo: Record>; ioBinding: Test.IOBindingMode; + mlContext?: MLContext; }): Promise<[number, number, ort.InferenceSession.OnnxValueMapType]> { const session = options.session; const feeds = options.feeds; @@ -633,7 +644,7 @@ export async function sessionRun(options: { if (Object.hasOwnProperty.call(feeds, name)) { if (feeds[name].size > 0) { if (options.ioBinding === 'ml-location' || options.ioBinding === 'ml-tensor') { - feeds[name] = await createMlTensorForInput(feeds[name]); + feeds[name] = await createMLTensorForInput(options.mlContext!, feeds[name]); } else { feeds[name] = createGpuTensorForInput(feeds[name]); } @@ -650,7 +661,7 @@ export async function sessionRun(options: { fetches[name] = new ort.Tensor(type, [], dims); } else { if (options.ioBinding === 'ml-tensor') { - fetches[name] = await createMlTensorForOutput(type, dims); + fetches[name] = await createMLTensorForOutput(options.mlContext!, type, dims); } else { fetches[name] = createGpuTensorForOutput(type, dims); } @@ -701,8 +712,8 @@ export async function runModelTestSet( const outputsMetaInfo: Record = {}; testCase.inputs!.forEach((tensor) => feeds[tensor.name] = tensor); testCase.outputs!.forEach((tensor) => outputsMetaInfo[tensor.name] = tensor); - const [start, end, outputs] = - await sessionRun({session: context.session, feeds, outputsMetaInfo, ioBinding: context.ioBinding}); + const [start, end, outputs] = await sessionRun( + {session: context.session, feeds, outputsMetaInfo, ioBinding: context.ioBinding, mlContext: context.mlContext}); if (context.perfData.count === 0) { context.perfData.firstRun = end - start; } else { diff --git a/onnxruntime/core/providers/webnn/data_transfer.cc b/onnxruntime/core/providers/webnn/data_transfer.cc index 3ba9d1171191a..fa0c73ee2690a 100644 --- a/onnxruntime/core/providers/webnn/data_transfer.cc +++ b/onnxruntime/core/providers/webnn/data_transfer.cc @@ -16,6 +16,11 @@ bool DataTransfer::CanCopy(const OrtDevice& src_device, const OrtDevice& dst_dev } common::Status DataTransfer::CopyTensor(const Tensor& src, Tensor& dst) const { + if (!emscripten::val::module_property("shouldTransferToMLBuffer").as()) { + // We don't need to transfer the buffer to an MLBuffer, so we don't need to copy the buffer. + return Status::OK(); + } + size_t bytes = src.SizeInBytes(); if (bytes > 0) { const void* src_data = src.DataRaw(); @@ -23,11 +28,6 @@ common::Status DataTransfer::CopyTensor(const Tensor& src, Tensor& dst) const { const auto& dst_device = dst.Location().device; - if (!emscripten::val::module_property("shouldTransferToMLBuffer").as()) { - // We don't need to transfer the buffer to an MLBuffer, so we don't need to copy the buffer. - return Status::OK(); - } - if (dst_device.Type() == OrtDevice::GPU) { EM_ASM({ Module.jsepUploadBuffer($0, HEAPU8.subarray($1, $1 + $2)); diff --git a/onnxruntime/wasm/pre-jsep.js b/onnxruntime/wasm/pre-jsep.js index c5bc4cd8c6a59..ba1c78c8be71c 100644 --- a/onnxruntime/wasm/pre-jsep.js +++ b/onnxruntime/wasm/pre-jsep.js @@ -201,11 +201,11 @@ Module['jsepInit'] = (name, params) => { } else if(name === 'webnn') { // Functions called from EM_ASM need to be assigned in a way that can be minified. [Module.jsepBackend, - Module.jsepReserveBufferId, - Module.jsepReleaseBufferId, - Module.jsepEnsureBuffer, - Module.jsepUploadBuffer, - Module.jsepDownloadBuffer, + Module.jsepReserveBufferId, + Module.jsepReleaseBufferId, + Module.jsepEnsureBuffer, + Module.jsepUploadBuffer, + Module.jsepDownloadBuffer, ] = params; @@ -225,12 +225,10 @@ Module['jsepInit'] = (name, params) => { Module['jsepOnReleaseSession'] = sessionId => { backend['onReleaseSession'](sessionId); }; + Module['jsepReleaseBufferId'] = Module.jsepReleaseBufferId; Module['jsepGetMLContext'] = sessionId => { return backend['getMLContext'](sessionId); }; - Module['jsepGetMLBuffer'] = (bufferId) => { - return backend['getBuffer'](bufferId); - } Module['jsepCreateMLBufferDownloader'] = (bufferId, type) => { return backend['createMLBufferDownloader'](bufferId, type); }