From ecadd5cd8d5f56b7529fab1ed11886aff0bb1bc4 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 22 Nov 2024 15:36:56 -0800 Subject: [PATCH] Start graph saving --- include/onnxruntime/core/graph/graph.h | 41 ++---- .../core/graph/model_saving_options.h | 44 ++++++ onnxruntime/core/framework/session_state.h | 4 + onnxruntime/core/graph/graph.cc | 129 ++++++++++++++++-- onnxruntime/core/graph/model.cc | 25 ++-- onnxruntime/core/graph/model.h | 35 +---- .../shared_library/provider_interfaces.h | 8 +- .../shared_library/provider_wrappedtypes.h | 9 +- .../core/providers/vitisai/imp/graph.cc | 5 +- onnxruntime/core/session/inference_session.cc | 9 +- .../core/session/provider_bridge_ort.cc | 9 +- .../save_model_with_external_initializers.cc | 43 ++++-- .../core/session/training_session.cc | 4 +- .../orttraining/training_api/module.cc | 4 +- 14 files changed, 257 insertions(+), 112 deletions(-) create mode 100644 include/onnxruntime/core/graph/model_saving_options.h diff --git a/include/onnxruntime/core/graph/graph.h b/include/onnxruntime/core/graph/graph.h index eb9581e8018d1..7e0d74eb343d0 100644 --- a/include/onnxruntime/core/graph/graph.h +++ b/include/onnxruntime/core/graph/graph.h @@ -41,6 +41,7 @@ namespace onnxruntime { class Graph; struct IndexedSubGraph; class Model; +struct ModelSavingOptions; class OpSignature; #if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD) @@ -1153,29 +1154,6 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi const ONNX_NAMESPACE::GraphProto& ToGraphProto(); ONNX_NAMESPACE::GraphProto ToGraphProto() const; - // Options to align external initializer offset. - // For models running on CPU, ORT will try to use mmap to load external initializers. - // To use mmap, external initializer need to be offset aligned. - // ORT saves external initializers into signle data file, each initializer is accessed with - // offset(start position of initializer) and length(byte length of initializer) of the data file. - // To use mmap, each offset need to be aligned which means offset need to divisible by - // allocation granularity(64KB for windows and 4K for other OSes). - // With align_offset to true, ORT will align offset for large initializer when - // save ONNX model with external data file. - struct OffsetAlignmentInfo { - // Offset will always be page aligned and allocation granularity aligned for mmap support. - // This is done by padding previous tensor data with zeros keeping same length. - bool align_offset = false; - // Alignment threshold for size of data. - // Having a low threshold will waste file space for small initializers. - // Only when tensor's data size is > the page_align_threshold it will be force aligned. - // Default to 1MB. - int64_t align_threshold = 1048576; - // The allocation Granularity for mmap() support. - // Typically 64KB for Windows & 4KB for other OSes. Default to 64KB. - int64_t allocation_granularity = 65536; - }; - /** Gets the GraphProto representation of this Graph @param external_file_path File path of the binary file to use for initializers. @param model_file_path path of the model file. @@ -1186,15 +1164,7 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi */ ONNX_NAMESPACE::GraphProto ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_path, const std::filesystem::path& model_file_path, - size_t initializer_size_threshold, - const OffsetAlignmentInfo& align_info) const; - - ONNX_NAMESPACE::GraphProto ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_path, - const std::filesystem::path& model_file_path, - size_t initializer_size_threshold) const { - OffsetAlignmentInfo default_options; - return ToGraphProtoWithExternalInitializers(external_file_path, model_file_path, initializer_size_threshold, default_options); - } + const ModelSavingOptions& model_saving_options) const; /** Gets the ISchemaRegistry instances being used with this Graph. */ IOnnxRuntimeOpSchemaCollectionPtr GetSchemaRegistry() const; @@ -1519,6 +1489,13 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi Status AddConstantProtoAsInitializer(const ONNX_NAMESPACE::NodeProto& constant_node_proto, std::optional new_name); + ONNX_NAMESPACE::GraphProto ToGraphProtoWithExternalInitiallizersImpl(const std::filesystem::path& model_path, + const std::filesystem::path& external_file_path, + const ModelSavingOptions& model_saving_options, + ONNX_NAMESPACE::GraphProto& graph_proto, + std::ostream& external_stream, + int64_t& external_offset) const; + #endif Version IrVersion() const noexcept { diff --git a/include/onnxruntime/core/graph/model_saving_options.h b/include/onnxruntime/core/graph/model_saving_options.h new file mode 100644 index 0000000000000..d4ed2d0668f87 --- /dev/null +++ b/include/onnxruntime/core/graph/model_saving_options.h @@ -0,0 +1,44 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +namespace onnxruntime { + +class PrepackedForSerialization; + +// These options that affect how the model initializers are saved. +// This includes options to align external initializer offset. +// For models running on CPU, ORT will try to use mmap to load external +// initializers. To use mmap, external initializer need to be offset aligned. +// ORT saves external initializers into signle data file, each initializer is +// accessed with offset(start position of initializer) and length(byte length of +// initializer) of the data file. To use mmap, each offset need to be aligned +// which means offset need to divisible by allocation granularity(64KB for +// windows and 4K for other OSes). With align_offset to true, ORT will align +// offset for large initializer when save ONNX model with external data file. +struct ModelSavingOptions { + explicit ModelSavingOptions(size_t size_threshold) + : initializer_size_threshold(size_threshold) {} + + // Mimimal initializer size in bytes to be externalized on disk + size_t initializer_size_threshold; + // Offset will always be page aligned and allocation granularity aligned for + // mmap support. This is done by padding previous tensor data with zeros + // keeping same length. + bool align_offset = false; + // Alignment threshold for size of data. + // Having a low threshold will waste file space for small initializers. + // Only when tensor's data size is > the page_align_threshold it will be force + // aligned. Default to 1MB. + int64_t align_threshold = 1048576; + // The allocation Granularity for mmap() support. + // Typically 64KB for Windows & 4KB for other OSes. Default to 64KB. + int64_t allocation_granularity = 65536; + // Optional pointer to a container of pre-packed initializers to be + // embedded into the external initializers, so they can also be loaded + // from disk. + const PrepackedForSerialization* prepacked_for_save = nullptr; +}; + +} diff --git a/onnxruntime/core/framework/session_state.h b/onnxruntime/core/framework/session_state.h index c038b7c058a66..a08af36a8ae5a 100644 --- a/onnxruntime/core/framework/session_state.h +++ b/onnxruntime/core/framework/session_state.h @@ -374,6 +374,10 @@ class SessionState { void SetSaveModeForPrepacks(bool saving_model, bool saving_ort_format); + const PrepackedForSerialization& GetPrepackedForSerialization() const { + return prepacked_weights_for_serialization_; + } + private: ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(SessionState); diff --git a/onnxruntime/core/graph/graph.cc b/onnxruntime/core/graph/graph.cc index 7da4db1ecf92e..b7353ca3875bf 100644 --- a/onnxruntime/core/graph/graph.cc +++ b/onnxruntime/core/graph/graph.cc @@ -26,6 +26,7 @@ #include "core/graph/indexed_sub_graph.h" #include "core/graph/model.h" #include "core/graph/model_load_utils.h" +#include "core/graph/model_saving_options.h" #include "core/graph/node_attr_utils.h" #include "core/graph/op.h" #include "core/graph/runtime_optimization_record_container.h" @@ -4085,16 +4086,128 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProto() const { return result; } -ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_path, - const std::filesystem::path& model_file_path, - size_t initializer_size_threshold, - const OffsetAlignmentInfo& align_info) const { +// Create a recursive function that does bottom up with subgraphs +ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitiallizersImpl( + const std::filesystem::path& model_path, + const std::filesystem::path& external_file_path, + const ModelSavingOptions& model_saving_options, + ONNX_NAMESPACE::GraphProto& output_graph_proto, + std::ostream& external_stream, + int64_t& external_offset) const { + // update external_offset for alignment + // need to do padding before write actual tensor data as we do offset alignment at the begin of + // large tensors (offset need to be page aligned and allocation granularity aligned) like below: + // \242\2557\256\023.\031&0000000000000000\332)k+\253\246\342\246(&\006!\347\232\374\236\325\026\032+\36XXXX + // |<---small tensor---->|<---padding--->|<------------------large tensor----------------------------->| + auto compute_and_pad = [&external_stream](int64_t allocation_granularity, int64_t& external_offset) { + // Align to the larger of the page size or the allocation granularity + int64_t alignment_factor = std::max(static_cast(4096), allocation_granularity); + // Align to the next page or alloc granularity boundary + int64_t new_external_offset = static_cast( + std::floor((external_offset + alignment_factor - 1) / alignment_factor)) * + alignment_factor; + + // padding tensor with zeros for alignment + for (int64_t index = external_offset; index != new_external_offset; ++index) { + external_stream << '\0'; + } + external_offset = new_external_offset; + }; + + // Process subgraphs + for (const auto& node : Nodes()) { + if (node.ContainsSubgraph()) { + // Let find this node in the output_graph_proto + auto hit = std::find_if(output_graph_proto.node().begin(), + output_graph_proto.node().end(), + [&node](const ONNX_NAMESPACE::NodeProto& proto) { + return proto.name() == node.Name(); + }); + ORT_ENFORCE(hit != output_graph_proto.node().end(), "Node ", node.Name(), + " not found in output_graph_proto"); + auto& result_node = *hit; + for (const auto& [name, subgraph] : node.GetAttributeNameToSubgraphMap()) { + // Lets find this subgraph in the result_node + auto sub_hit = std::find_if(result_node.attribute().begin(), + result_node.attribute().end(), + [&name](const ONNX_NAMESPACE::AttributeProto& proto) { + return proto.name() == name; + }); + ORT_ENFORCE(sub_hit != result_node.attribute().end(), "Subgraph ", name, + " not found in node ", node.Name()); + } + } + } + + // Add the initializers to the result graph. + for (const auto& initializer : graph_proto_->initializer()) { +#if !defined(DISABLE_SPARSE_TENSORS) + if (IsSparseInitializer(initializer.name())) { + // Sparse tensors are added to the ONNX file. + auto& sparse_initializer = *output_graph_proto.add_sparse_initializer(); + auto status = utils::DenseTensorToSparseTensorProto(initializer, model_path, sparse_initializer); + ORT_ENFORCE(status.IsOK(), "Failed to convert dense initializer to sparse"); + } else { +#endif + // Dense tensors larger than the threshold are added to the external file. + TensorProto* output_proto = output_graph_proto.add_initializer(); + + std::vector raw_data; + ORT_THROW_IF_ERROR(utils::UnpackInitializerData(initializer, model_path, raw_data)); + size_t tensor_bytes_size = raw_data.size(); + if (tensor_bytes_size < model_saving_options.initializer_size_threshold) { + *output_proto = initializer; + continue; + } + + // update external_offset for alignment + // need to do padding before write actual tensor data as we do offset alignment at the begin of + // large tensors (offset need to be page aligned and allocation granularity aligned) like below: + // \242\2557\256\023.\031&0000000000000000\332)k+\253\246\342\246(&\006!\347\232\374\236\325\026\032+\36XXXX + // |<---small tensor---->|<---padding--->|<------------------large tensor----------------------------->| + if (model_saving_options.align_offset && static_cast(tensor_bytes_size) > + model_saving_options.align_threshold) { + compute_and_pad(model_saving_options.allocation_granularity, external_offset); + } + + if (!external_stream.write(reinterpret_cast(raw_data.data()), tensor_bytes_size)) { + ORT_THROW("Failed to write external initializers to file: ", modified_external_file_path); + } + + ExternalDataInfo::SetExternalLocationToProto(external_file_path, external_offset, + tensor_bytes_size, *output_proto); + + output_proto->set_name(initializer.name()); + output_proto->set_data_type(initializer.data_type()); + for (int i = 0; i != initializer.dims_size(); ++i) { + output_proto->add_dims(initializer.dims(i)); + } + output_proto->set_doc_string(initializer.doc_string()); + + external_offset += tensor_bytes_size; + + const PrepackedForSerialization::Subgraph* prepacked_subgraph = nullptr; + if (model_saving_options.prepacked_for_save != nullptr) { + prepacked_subgraph = *model_saving_options.prepacked_for_save->FindOrCreateSubgraph(*this); + } + +#if !defined(DISABLE_SPARSE_TENSORS) + } +#endif + } +} + +ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers( + const std::filesystem::path& external_file_path, + const std::filesystem::path& model_file_path, + const ModelSavingOptions& model_saving_options) const { GraphProto result; ToGraphProtoInternal(result); ORT_ENFORCE(external_file_path.is_relative()); // If model_file_path is just a file name without a path separator, for example: "model.onnx". Its parent path could // be empty. Else, save external data file in same directory as the model. const std::filesystem::path modified_external_file_path = model_file_path.parent_path() / external_file_path; + const auto& model_path = ModelPath(); // Create the external file. std::ofstream external_stream(modified_external_file_path, std::ofstream::out | std::ofstream::binary); @@ -4122,7 +4235,6 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std }; // Add the initializers to the result graph. - const auto& model_path = ModelPath(); #if !defined(DISABLE_SPARSE_TENSORS) const auto sparse_end = sparse_tensor_names_.end(); #endif @@ -4142,7 +4254,7 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std std::vector raw_data; ORT_THROW_IF_ERROR(utils::UnpackInitializerData(initializer, model_path, raw_data)); size_t tensor_bytes_size = raw_data.size(); - if (tensor_bytes_size < initializer_size_threshold) { + if (tensor_bytes_size < model_saving_options.initializer_size_threshold) { *output_proto = initializer; continue; } @@ -4152,8 +4264,9 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std // large tensors (offset need to be page aligned and allocation granularity aligned) like below: // \242\2557\256\023.\031&0000000000000000\332)k+\253\246\342\246(&\006!\347\232\374\236\325\026\032+\36XXXX // |<---small tensor---->|<---padding--->|<------------------large tensor----------------------------->| - if (align_info.align_offset && static_cast(tensor_bytes_size) > align_info.align_threshold) { - compute_and_pad(align_info.allocation_granularity, external_offset); + if (model_saving_options.align_offset && static_cast(tensor_bytes_size) > + model_saving_options.align_threshold) { + compute_and_pad(model_saving_options.allocation_granularity, external_offset); } if (!external_stream.write(reinterpret_cast(raw_data.data()), tensor_bytes_size)) { diff --git a/onnxruntime/core/graph/model.cc b/onnxruntime/core/graph/model.cc index 1bae63b510563..be0531e6473fb 100644 --- a/onnxruntime/core/graph/model.cc +++ b/onnxruntime/core/graph/model.cc @@ -383,14 +383,12 @@ ModelProto Model::ToProto() const { ModelProto Model::ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_name, const std::filesystem::path& file_path, - size_t initializer_size_threshold, - const Graph::OffsetAlignmentInfo& align_info) const { + const ModelSavingOptions& model_saving_options) const { ModelProto result(model_proto_); const auto& graph = *graph_; *(result.mutable_graph()) = graph.ToGraphProtoWithExternalInitializers(external_file_name, file_path, - initializer_size_threshold, - align_info); + model_saving_options); return result; } @@ -607,16 +605,13 @@ template static Status SaveModelWithExternalInitializers(Model& model, const T& file_path, const std::filesystem::path& external_file_name, - size_t initializer_size_threshold, - const Graph::OffsetAlignmentInfo& align_info) { + const ModelSavingOptions& save_options) { int fd = 0; Status status = Env::Default().FileOpenWr(file_path, fd); ORT_RETURN_IF_ERROR(status); ORT_TRY { - status = Model::SaveWithExternalInitializers(model, fd, file_path, external_file_name, - initializer_size_threshold, - align_info); + status = Model::SaveWithExternalInitializers(model, fd, file_path, external_file_name, save_options); } ORT_CATCH(const std::exception& ex) { ORT_HANDLE_EXCEPTION([&]() { @@ -646,10 +641,8 @@ Status Model::Load(const PathString& file_path, std::shared_ptr& p_model, Status Model::SaveWithExternalInitializers(Model& model, const std::filesystem::path& file_path, const std::filesystem::path& external_file_name, - size_t initializer_size_threshold, - const Graph::OffsetAlignmentInfo& align_info) { - return SaveModelWithExternalInitializers(model, file_path, external_file_name, initializer_size_threshold, - align_info); + const ModelSavingOptions& save_options) { + return SaveModelWithExternalInitializers(model, file_path, external_file_name, save_options); } Status Model::LoadFromBytes(int count, const void* p_bytes, /*out*/ ONNX_NAMESPACE::ModelProto& model_proto) { @@ -765,8 +758,7 @@ Status Model::SaveWithExternalInitializers(Model& model, int fd, const std::filesystem::path& file_path, const std::filesystem::path& external_file_name, - size_t initializer_size_threshold, - const Graph::OffsetAlignmentInfo& align_info) { + const ModelSavingOptions& model_saving_options) { if (fd < 0) { return Status(ONNXRUNTIME, INVALID_ARGUMENT, " is less than 0."); } @@ -774,8 +766,7 @@ Status Model::SaveWithExternalInitializers(Model& model, ORT_RETURN_IF_ERROR(model.MainGraph().Resolve()); auto model_proto = model.ToGraphProtoWithExternalInitializers(external_file_name, file_path, - initializer_size_threshold, - align_info); + model_saving_options); google::protobuf::io::FileOutputStream output(fd); const bool result = model_proto.SerializeToZeroCopyStream(&output) && output.Flush(); if (result) { diff --git a/onnxruntime/core/graph/model.h b/onnxruntime/core/graph/model.h index 9bcec6f78ca08..21fa5a2ff97bf 100644 --- a/onnxruntime/core/graph/model.h +++ b/onnxruntime/core/graph/model.h @@ -20,6 +20,8 @@ namespace onnxruntime { +class PrepackedForSerialization; + namespace fbs { struct Model; } // namespace fbs @@ -190,15 +192,7 @@ class Model { // initializer offset could be page aligned and allocation granularity aligned for mmap support. ONNX_NAMESPACE::ModelProto ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_name, const std::filesystem::path& file_path, - size_t initializer_size_threshold, - const Graph::OffsetAlignmentInfo& align_info) const; - - ONNX_NAMESPACE::ModelProto ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_name, - const std::filesystem::path& file_path, - size_t initializer_size_threshold) const { - Graph::OffsetAlignmentInfo default_align_info; - return ToGraphProtoWithExternalInitializers(external_file_name, file_path, initializer_size_threshold, default_align_info); - } + const ModelSavingOptions& model_saving_options) const; static common::Status Save(Model& model, const PathString& file_path); @@ -209,32 +203,13 @@ class Model { static common::Status SaveWithExternalInitializers(Model& model, const std::filesystem::path& file_path, const std::filesystem::path& external_file_path, - size_t initializer_size_threshold, - const Graph::OffsetAlignmentInfo& align_info); - - static common::Status SaveWithExternalInitializers(Model& model, - const std::filesystem::path& file_path, - const std::filesystem::path& external_file_path, - size_t initializer_size_threshold) { - Graph::OffsetAlignmentInfo default_align_info; - return SaveWithExternalInitializers(model, file_path, external_file_path, initializer_size_threshold, default_align_info); - } - - static common::Status SaveWithExternalInitializers(Model& model, - int fd, - const std::filesystem::path& file_path, - const std::filesystem::path& external_file_path, - size_t initializer_size_threshold, - const Graph::OffsetAlignmentInfo& align_info); + const ModelSavingOptions& save_options); static common::Status SaveWithExternalInitializers(Model& model, int fd, const std::filesystem::path& file_path, const std::filesystem::path& external_file_path, - size_t initializer_size_threshold) { - Graph::OffsetAlignmentInfo default_align_info; - return SaveWithExternalInitializers(model, fd, file_path, external_file_path, initializer_size_threshold, default_align_info); - } + const ModelSavingOptions& save_options); static common::Status Load(std::istream& model_istream, ONNX_NAMESPACE::ModelProto* p_model_proto); diff --git a/onnxruntime/core/providers/shared_library/provider_interfaces.h b/onnxruntime/core/providers/shared_library/provider_interfaces.h index 1436afa41c2f8..b67e4748f570c 100644 --- a/onnxruntime/core/providers/shared_library/provider_interfaces.h +++ b/onnxruntime/core/providers/shared_library/provider_interfaces.h @@ -42,6 +42,8 @@ using ProviderType = const std::string&; class RandomGenerator; class IOnnxRuntimeOpSchemaCollection; +struct ModelSavingOptions; + #ifdef ENABLE_TRAINING_TORCH_INTEROP namespace contrib { class PythonOpBase; @@ -899,7 +901,11 @@ struct ProviderHost { virtual void Model__operator_delete(Model* p) = 0; virtual Graph& Model__MainGraph(Model* p) = 0; virtual std::unique_ptr Model__ToProto(Model* p) = 0; - virtual std::unique_ptr Model__ToGraphProtoWithExternalInitializers(Model* p, const std::filesystem::path& external_file_name, const std::filesystem::path& file_path, size_t initializer_size_threshold) = 0; + virtual std::unique_ptr Model__ToGraphProtoWithExternalInitializers( + Model* p, + const std::filesystem::path& external_file_name, + const std::filesystem::path& file_path, + const ModelSavingOptions&) = 0; virtual const ModelMetaData& Model__MetaData(const Model* p) const noexcept = 0; virtual Status Model__Load(const PathString& file_path, /*out*/ ONNX_NAMESPACE::ModelProto& model_proto) = 0; diff --git a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h index 5e8996d590db8..0b7f9258a38ae 100644 --- a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h +++ b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h @@ -933,6 +933,8 @@ struct NodeUnit final { Node::EdgeConstIterator OutputEdgesEnd() const { return g_host->NodeUnit__OutputEdgesEnd(this); } }; +struct ModelSavingOptions; + struct Model final { static std::unique_ptr Create(ONNX_NAMESPACE::ModelProto&& model_proto, const PathString& model_path, const IOnnxRuntimeOpSchemaRegistryList* local_registries, const logging::Logger& logger) { @@ -944,7 +946,12 @@ struct Model final { Graph& MainGraph() { return g_host->Model__MainGraph(this); } std::unique_ptr ToProto() { return g_host->Model__ToProto(this); } - std::unique_ptr ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_name, const std::filesystem::path& file_path, size_t initializer_size_threshold) { return g_host->Model__ToGraphProtoWithExternalInitializers(this, external_file_name, file_path, initializer_size_threshold); } + std::unique_ptr ToGraphProtoWithExternalInitializers( + const std::filesystem::path& external_file_name, + const std::filesystem::path& file_path, const ModelSavingOptions& model_saving_options) { + return g_host->Model__ToGraphProtoWithExternalInitializers(this, external_file_name, file_path, + model_saving_options); + } const ModelMetaData& MetaData() const noexcept { return g_host->Model__MetaData(this); } Model() = delete; diff --git a/onnxruntime/core/providers/vitisai/imp/graph.cc b/onnxruntime/core/providers/vitisai/imp/graph.cc index 191d26f3ab269..e7b39546fda6a 100644 --- a/onnxruntime/core/providers/vitisai/imp/graph.cc +++ b/onnxruntime/core/providers/vitisai/imp/graph.cc @@ -9,6 +9,7 @@ #include #include +#include "core/graph/model_saving_options.h" #include "core/providers/shared_library/provider_api.h" #include "./vai_assert.h" @@ -111,7 +112,9 @@ void graph_save(const Graph& graph, const std::string& filename, const std::stri if (initializer_size_threshold == std::numeric_limits::max()) { model_proto = model->ToProto(); } else { - model_proto = model->ToGraphProtoWithExternalInitializers(ToPathString(filename_dat), ToPathString(filename), initializer_size_threshold); + ModelSavingOptions model_saving_options{initializer_size_threshold}; + model_proto = model->ToGraphProtoWithExternalInitializers(ToPathString(filename_dat), ToPathString(filename), + model_saving_options); } auto& metadata = model->MetaData(); if (!metadata.empty()) { diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc index 66dd7f6187903..3ca45e143af1c 100644 --- a/onnxruntime/core/session/inference_session.cc +++ b/onnxruntime/core/session/inference_session.cc @@ -38,6 +38,7 @@ #include "core/framework/utils.h" #include "core/graph/graph_viewer.h" #include "core/graph/model.h" +#include "core/graph/model_saving_options.h" #include "core/optimizer/graph_transformer_utils.h" #include "core/optimizer/graph_transformer.h" #include "core/optimizer/layout_transformation/layout_transformation.h" @@ -2100,13 +2101,13 @@ common::Status InferenceSession::Initialize() { const size_t optimized_model_external_initializers_min_size_in_bytes = ParseStringWithClassicLocale(session_options_.config_options.GetConfigOrDefault( kOrtSessionOptionsOptimizedModelExternalInitializersMinSizeInBytes, "1024")); - Graph::OffsetAlignmentInfo align_info; - align_info.align_offset = true; + ModelSavingOptions model_saving_options{optimized_model_external_initializers_min_size_in_bytes}; + model_saving_options.align_offset = true; + model_saving_options.prepacked_for_save = &session_state_->GetPrepackedForSerialization(); ORT_RETURN_IF_ERROR_SESSIONID_(Model::SaveWithExternalInitializers(*model_, session_options_.optimized_model_filepath, optimized_model_external_initializers_file_name, - optimized_model_external_initializers_min_size_in_bytes, - align_info)); + model_saving_options)); } } } diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index 0aa93bce354e8..5da5ef5fecaa2 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -1070,7 +1070,14 @@ struct ProviderHostImpl : ProviderHost { void Model__operator_delete(Model* p) override { delete p; } Graph& Model__MainGraph(Model* p) override { return p->MainGraph(); } std::unique_ptr Model__ToProto(Model* p) override { return std::make_unique(p->ToProto()); } - std::unique_ptr Model__ToGraphProtoWithExternalInitializers(Model* p, const std::filesystem::path& external_file_name, const std::filesystem::path& file_path, size_t initializer_size_threshold) override { return std::make_unique(p->ToGraphProtoWithExternalInitializers(external_file_name, file_path, initializer_size_threshold)); }; + std::unique_ptr Model__ToGraphProtoWithExternalInitializers(Model* p, + const std::filesystem::path& external_file_name, + const std::filesystem::path& file_path, + const ModelSavingOptions& model_saving_options) override { + return std::make_unique(p->ToGraphProtoWithExternalInitializers(external_file_name, + file_path, + model_saving_options)); + }; const ModelMetaData& Model__MetaData(const Model* p) const noexcept override { return p->MetaData(); }; Status Model__Load(const PathString& file_path, /*out*/ ONNX_NAMESPACE::ModelProto& model_proto) override { return Model::Load(file_path, model_proto); } diff --git a/onnxruntime/test/framework/save_model_with_external_initializers.cc b/onnxruntime/test/framework/save_model_with_external_initializers.cc index 294463464e771..98874874d50e9 100644 --- a/onnxruntime/test/framework/save_model_with_external_initializers.cc +++ b/onnxruntime/test/framework/save_model_with_external_initializers.cc @@ -6,6 +6,7 @@ #include "core/common/path_string.h" #include "core/framework/data_types.h" #include "core/graph/model.h" +#include "core/graph/model_saving_options.h" #include "core/framework/tensorprotoutils.h" #include "test/test_environment.h" #include "test_utils.h" @@ -23,16 +24,14 @@ Status LoadSaveAndCompareModel(const std::filesystem::path& input_onnx, const std::filesystem::path& input_external_init_file, const std::filesystem::path& output_onnx, const std::filesystem::path& output_external_init_file, - size_t initializer_size_threshold, - const Graph::OffsetAlignmentInfo& align_info) { + const ModelSavingOptions& model_saving_options) { auto logger = DefaultLoggingManager().CreateLogger("LoadSaveAndCompareModel"); std::shared_ptr model; ORT_RETURN_IF_ERROR(Model::Load(input_onnx, model, nullptr, *logger)); std::filesystem::remove(output_onnx); std::filesystem::remove(output_external_init_file); ORT_RETURN_IF_ERROR(Model::SaveWithExternalInitializers(*model, output_onnx, output_external_init_file, - initializer_size_threshold, - align_info)); + model_saving_options)); std::shared_ptr model_from_external; ORT_RETURN_IF_ERROR(Model::Load(output_onnx.native(), model_from_external, nullptr, *logger)); @@ -68,7 +67,7 @@ Status LoadSaveAndCompareModel(const std::filesystem::path& input_onnx, ORT_RETURN_IF_ERROR(utils::UnpackInitializerData(*from_external_tensor_proto, model_path, from_external_tensor_proto_data)); size_t from_external_tensor_proto_size = from_external_tensor_proto_data.size(); - if (from_external_tensor_proto_size < initializer_size_threshold) { + if (from_external_tensor_proto_size < model_saving_options.initializer_size_threshold) { // 'Small' tensors should be embedded in the onnx file. ORT_RETURN_IF_NOT(from_external_tensor_proto->data_location() == ONNX_NAMESPACE::TensorProto_DataLocation::TensorProto_DataLocation_DEFAULT, "location mismatch"); } else { @@ -79,13 +78,14 @@ Status LoadSaveAndCompareModel(const std::filesystem::path& input_onnx, ORT_RETURN_IF_NOT(tensor_proto_size == from_external_tensor_proto_size, "size mismatch"); ORT_RETURN_IF_NOT(memcmp(tensor_proto_data.data(), from_external_tensor_proto_data.data(), tensor_proto_size) == 0, "data mismatch"); - if (align_info.align_offset) { + if (model_saving_options.align_offset) { for (const StringStringEntryProto& entry : from_external_tensor_proto->external_data()) { if (entry.has_key() && entry.has_value() && entry.key() == "offset") { size_t tensor_offset; std::stringstream stream(entry.value()); stream >> tensor_offset; - ORT_RETURN_IF_NOT(tensor_offset % align_info.allocation_granularity == 0, "tensor offset not align"); + ORT_RETURN_IF_NOT(tensor_offset % model_saving_options.allocation_granularity == 0, + "tensor offset not align"); } } } @@ -98,22 +98,35 @@ Status LoadSaveAndCompareModel(const std::filesystem::path& input_onnx, // Original model does not have external initializers TEST(SaveWithExternalInitializers, Mnist) { - Graph::OffsetAlignmentInfo align_info; - ASSERT_STATUS_OK(LoadSaveAndCompareModel(ORT_TSTR("testdata/mnist.onnx"), ORT_TSTR(""), ORT_TSTR("testdata/mnist_with_external_initializers.onnx"), ORT_TSTR("mnist_external_initializers.bin"), 100, align_info)); + ModelSavingOptions model_saving_options{100}; + ASSERT_STATUS_OK(LoadSaveAndCompareModel( + ORT_TSTR("testdata/mnist.onnx"), + ORT_TSTR(""), ORT_TSTR("testdata/mnist_with_external_initializers.onnx"), + ORT_TSTR("mnist_external_initializers.bin"), + model_saving_options)); } // Original model has external initializers TEST(SaveWithExternalInitializers, ModelWithOriginalExternalData) { - Graph::OffsetAlignmentInfo align_info; - ASSERT_STATUS_OK(LoadSaveAndCompareModel(ORT_TSTR("testdata/model_with_orig_ext_data.onnx"), ORT_TSTR("model_with_orig_ext_data.onnx.data"), ORT_TSTR("testdata/model_with_new_external_initializers.onnx"), ORT_TSTR("model_with_new_external_initializers.bin"), 0, align_info)); + ModelSavingOptions model_saving_options{0}; + ASSERT_STATUS_OK(LoadSaveAndCompareModel( + ORT_TSTR("testdata/model_with_orig_ext_data.onnx"), + ORT_TSTR("model_with_orig_ext_data.onnx.data"), + ORT_TSTR("testdata/model_with_new_external_initializers.onnx"), + ORT_TSTR("model_with_new_external_initializers.bin"), + model_saving_options)); } // Original model has external initializers, align offset TEST(SaveWithExternalInitializers, ModelWithOriginalExternalDataAlignOffset) { - Graph::OffsetAlignmentInfo align_info; - align_info.align_offset = true; - align_info.align_threshold = 0; - ASSERT_STATUS_OK(LoadSaveAndCompareModel(ORT_TSTR("testdata/model_with_orig_ext_data.onnx"), ORT_TSTR("model_with_orig_ext_data.onnx.data"), ORT_TSTR("testdata/model_with_new_external_initializers.onnx"), ORT_TSTR("model_with_new_external_initializers.bin"), 0, align_info)); + ModelSavingOptions model_saving_options{0}; + model_saving_options.align_offset = true; + model_saving_options.align_threshold = 0; + ASSERT_STATUS_OK(LoadSaveAndCompareModel( + ORT_TSTR("testdata/model_with_orig_ext_data.onnx"), + ORT_TSTR("model_with_orig_ext_data.onnx.data"), + ORT_TSTR("testdata/model_with_new_external_initializers.onnx"), + ORT_TSTR("model_with_new_external_initializers.bin"), model_saving_options)); } } // namespace test diff --git a/orttraining/orttraining/core/session/training_session.cc b/orttraining/orttraining/core/session/training_session.cc index 87a7cbc0375a4..35ca6be8577fe 100644 --- a/orttraining/orttraining/core/session/training_session.cc +++ b/orttraining/orttraining/core/session/training_session.cc @@ -5,6 +5,7 @@ #include "core/framework/data_transfer_utils.h" #include "core/graph/model.h" +#include "core/graph/model_saving_options.h" #include "core/session/IOBinding.h" #include "core/optimizer/rule_based_graph_transformer.h" #include "core/providers/cpu/controlflow/utils.h" @@ -1002,7 +1003,8 @@ Status TrainingSession::SaveWithExternalInitializers(const PathString& model_uri std::remove(ToUTF8String(model_uri).c_str()); std::remove(external_file_name.c_str()); - return Model::SaveWithExternalInitializers(*model_, model_uri, external_file_name, initializer_size_threshold); + ModelSavingOptions model_saving_options{initializer_size_threshold}; + return Model::SaveWithExternalInitializers(*model_, model_uri, external_file_name, model_saving_options); } Status TrainingSession::Save(const PathString& model_uri, TrainingSession::SaveOption opt) { diff --git a/orttraining/orttraining/training_api/module.cc b/orttraining/orttraining/training_api/module.cc index 939e1de334e52..8f2d0f6531500 100644 --- a/orttraining/orttraining/training_api/module.cc +++ b/orttraining/orttraining/training_api/module.cc @@ -689,8 +689,10 @@ Status Module::ExportModelForInferencing(const std::string& inference_model_path std::string external_data_name = ORT_TSTR_CONVERT_TO_PRINTABLE_STRING(ExternalCheckpointDataPath(ToPathString(inference_model_path))); PathString inference_model_pathstring = ToPathString(inference_model_path); + ModelSavingOptions model_saving_options{64}; ORT_THROW_IF_ERROR( - Model::SaveWithExternalInitializers(*inference_model, inference_model_pathstring, external_data_name, 64)); + Model::SaveWithExternalInitializers(*inference_model, inference_model_pathstring, external_data_name, + model_saving_options)); } else { ORT_THROW_IF_ERROR(Model::Save(*inference_model, ToPathString(inference_model_path))); }