Skip to content

Commit

Permalink
Start graph saving
Browse files Browse the repository at this point in the history
  • Loading branch information
yuslepukhin committed Nov 22, 2024
1 parent c00d462 commit ecadd5c
Show file tree
Hide file tree
Showing 14 changed files with 257 additions and 112 deletions.
41 changes: 9 additions & 32 deletions include/onnxruntime/core/graph/graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ namespace onnxruntime {
class Graph;
struct IndexedSubGraph;
class Model;
struct ModelSavingOptions;
class OpSignature;

#if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)
Expand Down Expand Up @@ -1153,29 +1154,6 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
const ONNX_NAMESPACE::GraphProto& ToGraphProto();
ONNX_NAMESPACE::GraphProto ToGraphProto() const;

// Options to align external initializer offset.
// For models running on CPU, ORT will try to use mmap to load external initializers.
// To use mmap, external initializer need to be offset aligned.
// ORT saves external initializers into signle data file, each initializer is accessed with
// offset(start position of initializer) and length(byte length of initializer) of the data file.
// To use mmap, each offset need to be aligned which means offset need to divisible by
// allocation granularity(64KB for windows and 4K for other OSes).
// With align_offset to true, ORT will align offset for large initializer when
// save ONNX model with external data file.
struct OffsetAlignmentInfo {
// Offset will always be page aligned and allocation granularity aligned for mmap support.
// This is done by padding previous tensor data with zeros keeping same length.
bool align_offset = false;
// Alignment threshold for size of data.
// Having a low threshold will waste file space for small initializers.
// Only when tensor's data size is > the page_align_threshold it will be force aligned.
// Default to 1MB.
int64_t align_threshold = 1048576;
// The allocation Granularity for mmap() support.
// Typically 64KB for Windows & 4KB for other OSes. Default to 64KB.
int64_t allocation_granularity = 65536;
};

/** Gets the GraphProto representation of this Graph
@param external_file_path File path of the binary file to use for initializers.
@param model_file_path path of the model file.
Expand All @@ -1186,15 +1164,7 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
*/
ONNX_NAMESPACE::GraphProto ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_path,
const std::filesystem::path& model_file_path,
size_t initializer_size_threshold,
const OffsetAlignmentInfo& align_info) const;

ONNX_NAMESPACE::GraphProto ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_path,
const std::filesystem::path& model_file_path,
size_t initializer_size_threshold) const {
OffsetAlignmentInfo default_options;
return ToGraphProtoWithExternalInitializers(external_file_path, model_file_path, initializer_size_threshold, default_options);
}
const ModelSavingOptions& model_saving_options) const;

/** Gets the ISchemaRegistry instances being used with this Graph. */
IOnnxRuntimeOpSchemaCollectionPtr GetSchemaRegistry() const;
Expand Down Expand Up @@ -1519,6 +1489,13 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
Status AddConstantProtoAsInitializer(const ONNX_NAMESPACE::NodeProto& constant_node_proto,
std::optional<std::string_view> new_name);

ONNX_NAMESPACE::GraphProto ToGraphProtoWithExternalInitiallizersImpl(const std::filesystem::path& model_path,
const std::filesystem::path& external_file_path,
const ModelSavingOptions& model_saving_options,
ONNX_NAMESPACE::GraphProto& graph_proto,
std::ostream& external_stream,
int64_t& external_offset) const;

#endif

Version IrVersion() const noexcept {
Expand Down
44 changes: 44 additions & 0 deletions include/onnxruntime/core/graph/model_saving_options.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#pragma once

namespace onnxruntime {

class PrepackedForSerialization;

// These options that affect how the model initializers are saved.
// This includes options to align external initializer offset.
// For models running on CPU, ORT will try to use mmap to load external
// initializers. To use mmap, external initializer need to be offset aligned.
// ORT saves external initializers into signle data file, each initializer is
// accessed with offset(start position of initializer) and length(byte length of
// initializer) of the data file. To use mmap, each offset need to be aligned
// which means offset need to divisible by allocation granularity(64KB for
// windows and 4K for other OSes). With align_offset to true, ORT will align
// offset for large initializer when save ONNX model with external data file.
struct ModelSavingOptions {
explicit ModelSavingOptions(size_t size_threshold)
: initializer_size_threshold(size_threshold) {}

// Mimimal initializer size in bytes to be externalized on disk
size_t initializer_size_threshold;
// Offset will always be page aligned and allocation granularity aligned for
// mmap support. This is done by padding previous tensor data with zeros
// keeping same length.
bool align_offset = false;
// Alignment threshold for size of data.
// Having a low threshold will waste file space for small initializers.
// Only when tensor's data size is > the page_align_threshold it will be force
// aligned. Default to 1MB.
int64_t align_threshold = 1048576;
// The allocation Granularity for mmap() support.
// Typically 64KB for Windows & 4KB for other OSes. Default to 64KB.
int64_t allocation_granularity = 65536;
// Optional pointer to a container of pre-packed initializers to be
// embedded into the external initializers, so they can also be loaded
// from disk.
const PrepackedForSerialization* prepacked_for_save = nullptr;
};

}
4 changes: 4 additions & 0 deletions onnxruntime/core/framework/session_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,10 @@ class SessionState {
void SetSaveModeForPrepacks(bool saving_model,
bool saving_ort_format);

const PrepackedForSerialization& GetPrepackedForSerialization() const {
return prepacked_weights_for_serialization_;
}

private:
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(SessionState);

Expand Down
129 changes: 121 additions & 8 deletions onnxruntime/core/graph/graph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "core/graph/indexed_sub_graph.h"
#include "core/graph/model.h"
#include "core/graph/model_load_utils.h"
#include "core/graph/model_saving_options.h"
#include "core/graph/node_attr_utils.h"
#include "core/graph/op.h"
#include "core/graph/runtime_optimization_record_container.h"
Expand Down Expand Up @@ -4085,16 +4086,128 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProto() const {
return result;
}

ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_path,
const std::filesystem::path& model_file_path,
size_t initializer_size_threshold,
const OffsetAlignmentInfo& align_info) const {
// Create a recursive function that does bottom up with subgraphs
ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitiallizersImpl(
const std::filesystem::path& model_path,
const std::filesystem::path& external_file_path,
const ModelSavingOptions& model_saving_options,
ONNX_NAMESPACE::GraphProto& output_graph_proto,
std::ostream& external_stream,
int64_t& external_offset) const {
// update external_offset for alignment
// need to do padding before write actual tensor data as we do offset alignment at the begin of
// large tensors (offset need to be page aligned and allocation granularity aligned) like below:
// \242\2557\256\023.\031&0000000000000000\332)k+\253\246\342\246(&\006!\347\232\374\236\325\026\032+\36XXXX
// |<---small tensor---->|<---padding--->|<------------------large tensor----------------------------->|
auto compute_and_pad = [&external_stream](int64_t allocation_granularity, int64_t& external_offset) {
// Align to the larger of the page size or the allocation granularity
int64_t alignment_factor = std::max(static_cast<int64_t>(4096), allocation_granularity);
// Align to the next page or alloc granularity boundary
int64_t new_external_offset = static_cast<int64_t>(
std::floor((external_offset + alignment_factor - 1) / alignment_factor)) *
alignment_factor;

// padding tensor with zeros for alignment
for (int64_t index = external_offset; index != new_external_offset; ++index) {
external_stream << '\0';
}
external_offset = new_external_offset;
};

// Process subgraphs
for (const auto& node : Nodes()) {
if (node.ContainsSubgraph()) {
// Let find this node in the output_graph_proto
auto hit = std::find_if(output_graph_proto.node().begin(),
output_graph_proto.node().end(),
[&node](const ONNX_NAMESPACE::NodeProto& proto) {
return proto.name() == node.Name();
});
ORT_ENFORCE(hit != output_graph_proto.node().end(), "Node ", node.Name(),
" not found in output_graph_proto");
auto& result_node = *hit;
for (const auto& [name, subgraph] : node.GetAttributeNameToSubgraphMap()) {
// Lets find this subgraph in the result_node
auto sub_hit = std::find_if(result_node.attribute().begin(),
result_node.attribute().end(),
[&name](const ONNX_NAMESPACE::AttributeProto& proto) {
return proto.name() == name;
});
ORT_ENFORCE(sub_hit != result_node.attribute().end(), "Subgraph ", name,
" not found in node ", node.Name());
}
}
}

// Add the initializers to the result graph.
for (const auto& initializer : graph_proto_->initializer()) {
#if !defined(DISABLE_SPARSE_TENSORS)
if (IsSparseInitializer(initializer.name())) {
// Sparse tensors are added to the ONNX file.
auto& sparse_initializer = *output_graph_proto.add_sparse_initializer();
auto status = utils::DenseTensorToSparseTensorProto(initializer, model_path, sparse_initializer);
ORT_ENFORCE(status.IsOK(), "Failed to convert dense initializer to sparse");
} else {
#endif
// Dense tensors larger than the threshold are added to the external file.
TensorProto* output_proto = output_graph_proto.add_initializer();

std::vector<uint8_t> raw_data;
ORT_THROW_IF_ERROR(utils::UnpackInitializerData(initializer, model_path, raw_data));
size_t tensor_bytes_size = raw_data.size();
if (tensor_bytes_size < model_saving_options.initializer_size_threshold) {
*output_proto = initializer;
continue;
}

// update external_offset for alignment
// need to do padding before write actual tensor data as we do offset alignment at the begin of
// large tensors (offset need to be page aligned and allocation granularity aligned) like below:
// \242\2557\256\023.\031&0000000000000000\332)k+\253\246\342\246(&\006!\347\232\374\236\325\026\032+\36XXXX
// |<---small tensor---->|<---padding--->|<------------------large tensor----------------------------->|
if (model_saving_options.align_offset && static_cast<int64_t>(tensor_bytes_size) >
model_saving_options.align_threshold) {
compute_and_pad(model_saving_options.allocation_granularity, external_offset);
}

if (!external_stream.write(reinterpret_cast<const char*>(raw_data.data()), tensor_bytes_size)) {
ORT_THROW("Failed to write external initializers to file: ", modified_external_file_path);
}

ExternalDataInfo::SetExternalLocationToProto(external_file_path, external_offset,
tensor_bytes_size, *output_proto);

output_proto->set_name(initializer.name());
output_proto->set_data_type(initializer.data_type());
for (int i = 0; i != initializer.dims_size(); ++i) {
output_proto->add_dims(initializer.dims(i));
}
output_proto->set_doc_string(initializer.doc_string());

external_offset += tensor_bytes_size;

const PrepackedForSerialization::Subgraph* prepacked_subgraph = nullptr;
if (model_saving_options.prepacked_for_save != nullptr) {
prepacked_subgraph = *model_saving_options.prepacked_for_save->FindOrCreateSubgraph(*this);
}

#if !defined(DISABLE_SPARSE_TENSORS)
}
#endif
}
}

ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(
const std::filesystem::path& external_file_path,
const std::filesystem::path& model_file_path,
const ModelSavingOptions& model_saving_options) const {
GraphProto result;
ToGraphProtoInternal(result);
ORT_ENFORCE(external_file_path.is_relative());
// If model_file_path is just a file name without a path separator, for example: "model.onnx". Its parent path could
// be empty. Else, save external data file in same directory as the model.
const std::filesystem::path modified_external_file_path = model_file_path.parent_path() / external_file_path;
const auto& model_path = ModelPath();

// Create the external file.
std::ofstream external_stream(modified_external_file_path, std::ofstream::out | std::ofstream::binary);
Expand Down Expand Up @@ -4122,7 +4235,6 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std
};

// Add the initializers to the result graph.
const auto& model_path = ModelPath();
#if !defined(DISABLE_SPARSE_TENSORS)
const auto sparse_end = sparse_tensor_names_.end();
#endif
Expand All @@ -4142,7 +4254,7 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std
std::vector<uint8_t> raw_data;
ORT_THROW_IF_ERROR(utils::UnpackInitializerData(initializer, model_path, raw_data));
size_t tensor_bytes_size = raw_data.size();
if (tensor_bytes_size < initializer_size_threshold) {
if (tensor_bytes_size < model_saving_options.initializer_size_threshold) {
*output_proto = initializer;
continue;
}
Expand All @@ -4152,8 +4264,9 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std
// large tensors (offset need to be page aligned and allocation granularity aligned) like below:
// \242\2557\256\023.\031&0000000000000000\332)k+\253\246\342\246(&\006!\347\232\374\236\325\026\032+\36XXXX
// |<---small tensor---->|<---padding--->|<------------------large tensor----------------------------->|
if (align_info.align_offset && static_cast<int64_t>(tensor_bytes_size) > align_info.align_threshold) {
compute_and_pad(align_info.allocation_granularity, external_offset);
if (model_saving_options.align_offset && static_cast<int64_t>(tensor_bytes_size) >
model_saving_options.align_threshold) {
compute_and_pad(model_saving_options.allocation_granularity, external_offset);
}

if (!external_stream.write(reinterpret_cast<const char*>(raw_data.data()), tensor_bytes_size)) {
Expand Down
Loading

0 comments on commit ecadd5c

Please sign in to comment.