Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 59 additions & 19 deletions onnxruntime/core/providers/openvino/backend_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ using Exception = ov::Exception;
namespace onnxruntime {
namespace openvino_ep {

SharedContext::SharedWeights::WeightsFile::WeightsFile(std::filesystem::path filename) : file_(filename, std::ios::in | std::ios::binary) {
SharedContext::SharedWeights::WeightsFile::WeightsFile(std::filesystem::path filename) : file_(filename, std::ios::in | std::ios::binary), file_path_(filename) {
try {
file_.exceptions(std::ifstream::failbit | std::ifstream::badbit);
weights_size_ = file_.seekg(0, std::ios::end).tellg();
} catch (std::ifstream::failure& e) {
weights_size_ = std::filesystem::file_size(filename);
} catch (std::exception& e) {
ORT_THROW("Error: Failed to open weight file at ", filename.string(), " ", e.what());
}
}
Expand All @@ -35,6 +35,32 @@ void SharedContext::SharedWeights::WeightsFile::load_weights(size_t file_offset,
file_.read(reinterpret_cast<char*>(data), size);
}

void* SharedContext::SharedWeights::WeightsFile::TryGetOrCreateDeviceMapping(std::optional<ov::RemoteContext>& remote_context) {
std::string dev_name{};
if (remote_context) {
dev_name = remote_context->get_device_name();
}

auto [it, inserted] = imported_device_tensors_.emplace(dev_name, MappingContainer{});
if (inserted) {
if (dev_name == "NPU") {
#if OPENVINO_VERSION_AT_LEAST(2025, 3)
// try to import the memory mapped file to remote tensor
ORT_ENFORCE(remote_context, "Error: Remote context is required for NPU device.");
auto npu_context = remote_context->as<ov::intel_npu::level_zero::ZeroContext>();
auto&& l0_tensor = npu_context.create_tensor(ov::element::Type_t::u8, {weights_size_}, ov::intel_npu::FileDescriptor(file_path_));
it->second = MappingContainer{.ptr_ = l0_tensor.get(), .tensor_ = l0_tensor};
#endif
} else if (dev_name.empty()) {
// CPU/virtual device case, create a CPU tensor memory mapped from file
auto&& mmaped_tensor = ov::read_tensor_data(file_path_);
it->second = MappingContainer{.ptr_ = mmaped_tensor.data(), .tensor_ = mmaped_tensor};
Comment on lines +55 to +57
Copy link

Copilot AI Oct 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Corrected spelling of 'mmaped' to 'mmapped' in variable name and comment.

Suggested change
// CPU/virtual device case, create a CPU tensor memory mapped from file
auto&& mmaped_tensor = ov::read_tensor_data(file_path_);
it->second = MappingContainer{.ptr_ = mmaped_tensor.data(), .tensor_ = mmaped_tensor};
// CPU/virtual device case, create a CPU tensor memory mmapped from file
auto&& mmapped_tensor = ov::read_tensor_data(file_path_);
it->second = MappingContainer{.ptr_ = mmapped_tensor.data(), .tensor_ = mmapped_tensor};

Copilot uses AI. Check for mistakes.
}
}

return it->second.ptr_;
}

std::ostream& operator<<(std::ostream& stream, const SharedContext::SharedWeights::Metadata::Map& metadata) {
try {
stream << metadata.size();
Expand Down Expand Up @@ -405,29 +431,43 @@ ov::element::Type GetOpenVINOElementType(ONNX_NAMESPACE::TensorProto_DataType dt
void CreateOVTensors(const std::string& device_name,
SharedContext::SharedWeights::Metadata::Map& metadata_map,
SharedContext::SharedWeights::WeightsFile& weights) {
// Get remote context if available
std::optional<ov::RemoteContext> opt_remote_ctx;
try {
opt_remote_ctx = OVCore::Get()->core.get_default_context(device_name);
} catch (const std::exception&) {
// Remote context not available
}

for (auto& [key, value] : metadata_map) {
if (value.tensor) continue;

// Get element data type
auto onnx_element_type = (ONNX_NAMESPACE::TensorProto_DataType)value.element_type;

ov::element::Type ov_elementType = GetOpenVINOElementType(onnx_element_type); // Map to OpenVINO data type

// Create OpenVINO Tensor
if (device_name == "NPU") {
// Use remote tensors
auto npu_context = OVCore::Get()->core.get_default_context("NPU").as<ov::intel_npu::level_zero::ZeroContext>();
auto&& remote_tensor = npu_context.create_l0_host_tensor(ov_elementType, value.dimensions, ov::intel_npu::TensorType::INPUT);

// Copy data to remote tensor
weights.load_weights(value.data_offset, remote_tensor.get(), value.size);
value.tensor = std::make_shared<ov::Tensor>(remote_tensor);
ov::element::Type ov_elementType = GetOpenVINOElementType(onnx_element_type);

// Try to get memory-mapped weights
ov::Tensor tensor;
uint8_t* mmaped_weights = static_cast<uint8_t*>(weights.TryGetOrCreateDeviceMapping(opt_remote_ctx));

if (mmaped_weights) {
// We have memory mapped weights. Create a Tensor view into it for this value.
ORT_ENFORCE(value.data_offset < weights.Size() &&
value.size <= weights.Size() &&
(value.data_offset <= weights.Size() - value.size),
"File offset + size outside of external initializer file");
void* mmapped_offset = static_cast<void*>(mmaped_weights + value.data_offset);
Comment on lines +451 to +459
Copy link

Copilot AI Oct 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Corrected spelling of 'mmaped' to 'mmapped' in variable name.

Suggested change
uint8_t* mmaped_weights = static_cast<uint8_t*>(weights.TryGetOrCreateDeviceMapping(opt_remote_ctx));
if (mmaped_weights) {
// We have memory mapped weights. Create a Tensor view into it for this value.
ORT_ENFORCE(value.data_offset < weights.Size() &&
value.size <= weights.Size() &&
(value.data_offset <= weights.Size() - value.size),
"File offset + size outside of external initializer file");
void* mmapped_offset = static_cast<void*>(mmaped_weights + value.data_offset);
uint8_t* mmapped_weights = static_cast<uint8_t*>(weights.TryGetOrCreateDeviceMapping(opt_remote_ctx));
if (mmapped_weights) {
// We have memory mapped weights. Create a Tensor view into it for this value.
ORT_ENFORCE(value.data_offset < weights.Size() &&
value.size <= weights.Size() &&
(value.data_offset <= weights.Size() - value.size),
"File offset + size outside of external initializer file");
void* mmapped_offset = static_cast<void*>(mmapped_weights + value.data_offset);

Copilot uses AI. Check for mistakes.
Comment on lines +451 to +459
Copy link

Copilot AI Oct 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Corrected spelling of 'mmaped' to 'mmapped' in variable name.

Suggested change
uint8_t* mmaped_weights = static_cast<uint8_t*>(weights.TryGetOrCreateDeviceMapping(opt_remote_ctx));
if (mmaped_weights) {
// We have memory mapped weights. Create a Tensor view into it for this value.
ORT_ENFORCE(value.data_offset < weights.Size() &&
value.size <= weights.Size() &&
(value.data_offset <= weights.Size() - value.size),
"File offset + size outside of external initializer file");
void* mmapped_offset = static_cast<void*>(mmaped_weights + value.data_offset);
uint8_t* mmapped_weights = static_cast<uint8_t*>(weights.TryGetOrCreateDeviceMapping(opt_remote_ctx));
if (mmapped_weights) {
// We have memory mapped weights. Create a Tensor view into it for this value.
ORT_ENFORCE(value.data_offset < weights.Size() &&
value.size <= weights.Size() &&
(value.data_offset <= weights.Size() - value.size),
"File offset + size outside of external initializer file");
void* mmapped_offset = static_cast<void*>(mmapped_weights + value.data_offset);

Copilot uses AI. Check for mistakes.
Comment on lines +451 to +459
Copy link

Copilot AI Oct 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Corrected spelling of 'mmaped' to 'mmapped' in variable name (note: mmapped_offset is already correct).

Suggested change
uint8_t* mmaped_weights = static_cast<uint8_t*>(weights.TryGetOrCreateDeviceMapping(opt_remote_ctx));
if (mmaped_weights) {
// We have memory mapped weights. Create a Tensor view into it for this value.
ORT_ENFORCE(value.data_offset < weights.Size() &&
value.size <= weights.Size() &&
(value.data_offset <= weights.Size() - value.size),
"File offset + size outside of external initializer file");
void* mmapped_offset = static_cast<void*>(mmaped_weights + value.data_offset);
uint8_t* mmapped_weights = static_cast<uint8_t*>(weights.TryGetOrCreateDeviceMapping(opt_remote_ctx));
if (mmapped_weights) {
// We have memory mapped weights. Create a Tensor view into it for this value.
ORT_ENFORCE(value.data_offset < weights.Size() &&
value.size <= weights.Size() &&
(value.data_offset <= weights.Size() - value.size),
"File offset + size outside of external initializer file");
void* mmapped_offset = static_cast<void*>(mmapped_weights + value.data_offset);

Copilot uses AI. Check for mistakes.
tensor = ov::Tensor(ov_elementType, value.dimensions, mmapped_offset);
} else {
// Use vanilla tensors
value.tensor = std::make_shared<ov::Tensor>(ov_elementType, value.dimensions);
weights.load_weights(value.data_offset, value.tensor->data(), value.size);
ORT_ENFORCE(opt_remote_ctx, "Unexpected: Don't have remote context and memory mapped weights is null!");
// Can't mmap the file to device tensor, create a host tensor and copy the data
tensor = opt_remote_ctx->create_host_tensor(ov_elementType, value.dimensions);
ORT_ENFORCE(tensor.get_byte_size() == value.size, "Remote tensor size mismatch");
weights.load_weights(value.data_offset, tensor.data(), value.size);
}
ORT_ENFORCE(value.tensor->get_byte_size() == value.size, "Unexpected tensor size mismatch");

ORT_ENFORCE(tensor.get_byte_size() == value.size, "Unexpected tensor size mismatch");
value.tensor = std::make_shared<ov::Tensor>(std::move(tensor));
}
}

Expand Down
8 changes: 8 additions & 0 deletions onnxruntime/core/providers/openvino/contexts.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,18 @@ class SharedContext : public WeakSingleton<SharedContext> {
explicit WeightsFile(std::filesystem::path filename);

void load_weights(size_t file_offset, void* data, size_t size);
void* TryGetOrCreateDeviceMapping(std::optional<ov::RemoteContext>& remote_context);
size_t Size() const { return weights_size_; }

private:
std::ifstream file_;
std::filesystem::path file_path_;
size_t weights_size_;
struct MappingContainer {
void* ptr_{nullptr};
ov::Tensor tensor_;
};
std::map<std::string, MappingContainer> imported_device_tensors_;
};

fs::path external_weight_filename;
Expand Down
11 changes: 11 additions & 0 deletions onnxruntime/core/providers/openvino/ov_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,17 @@

#include <string>

// Helper macro to test OpenVINO version at compile time.
// Usage: #if OPENVINO_VERSION_AT_LEAST(2025, 3)
// Falls back to 0 if OPENVINO_VERSION_MAJOR/MINOR are not defined.
#if defined(OPENVINO_VERSION_MAJOR) && defined(OPENVINO_VERSION_MINOR)
#define OPENVINO_VERSION_AT_LEAST(major, minor) \
((OPENVINO_VERSION_MAJOR > (major)) || \
(OPENVINO_VERSION_MAJOR == (major) && OPENVINO_VERSION_MINOR >= (minor)))
#else
#define OPENVINO_VERSION_AT_LEAST(major, minor) 0
#endif

namespace onnxruntime {
namespace openvino_ep {
class OVCore;
Expand Down
Loading