diff --git a/onnxruntime/lora/lora_format/README.md b/onnxruntime/lora/lora_format/README.md deleted file mode 100644 index d28f47186cbea..0000000000000 --- a/onnxruntime/lora/lora_format/README.md +++ /dev/null @@ -1,36 +0,0 @@ -# Lora Parameters Flatbuffer Schemas -This directory contains [ONNXRuntime Lora Parameter format schema](lora_schema.fbs) and [the generated C++ header file](lora_schema.fbs.h) for the -Lora Parameters file format. This file format is defined as means to deliver Lora parameters so it can read by ONNXRuntime C++ code. - -The format format is generally designed to house a single Lora adapter named Lora parameters. - -[ONNXRuntime Lora Parameter file format schema](lora_schema.fbs) uses the [FlatBuffers](https://github.com/google/flatbuffers) serialization library. - -Please do not directly modify the generated C++ header file for [ONNXRuntime Lora Parameter file format]((lora_schema.fbs.h)). - -Use flatc compiler for the purpose. - -e.g. - - Windows Debug build - - \build\Windows\Debug\_deps\flatbuffers-build\Debug\flatc.exe - - Linux Debug build - - /build/Linux/Debug/_deps/flatbuffers-build/flatc - -It is possible to use another flatc as well, e.g., from a separate installation. - -To update the flatbuffers schemas and generated files: -1. Modify [ONNXRuntime Lora Parameter file format schema](lora_schema.fbs). -2. Run [compile_schema.py](./compile_schema.py) to generate the C++ bindings. - - ``` - python onnxruntime/lora/lora_format/compile_schema.py --flatc - ``` -# Lora format version history -In [lora_format_version.h](../lora_format_version.h), see `IsLoraParameterslVersionSupported()` for the supported versions and -`kLoraParametersVersion` for the current version. - -## Version 1 -History begins. - -Initial support for FlatBuffers that Lora Parameters support. This includes a definition of Tensor entity -so it can be saved in a tensor per file format. diff --git a/onnxruntime/lora/lora_format/compile_schema.py b/onnxruntime/lora/lora_format/compile_schema.py deleted file mode 100644 index f98db367ae83b..0000000000000 --- a/onnxruntime/lora/lora_format/compile_schema.py +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. - -import argparse -import pathlib -import subprocess - -SCRIPT_DIR = pathlib.Path(__file__).parent.resolve() - - -def generate_cpp(flatc: pathlib.Path, schema_path: pathlib.Path): - # run flatc to generate C++ code - cmd = [str(flatc), "--cpp", "--scoped-enums", "--filename-suffix", ".fbs", str(schema_path)] - subprocess.run(cmd, check=True, cwd=SCRIPT_DIR) - - -def main(): - parser = argparse.ArgumentParser( - description="Generate language bindings for the ORT flatbuffers schema.", - usage="Provide the path to the flatbuffers flatc executable. " - "Script can be executed from anywhere but must be located in its original " - "directory in the ONNX Runtime enlistment.", - ) - - parser.add_argument( - "-f", - "--flatc", - required=True, - type=pathlib.Path, - help="Path to flatbuffers flatc executable. " - "Can be found in the build directory under _deps/flatbuffers-build//", - ) - - all_languages = ["cpp"] - parser.add_argument( - "-l", - "--language", - action="append", - dest="languages", - choices=all_languages, - help="Specify which language bindings to generate.", - ) - - args = parser.parse_args() - languages = args.languages if args.languages is not None else all_languages - flatc = args.flatc.resolve(strict=True) - schema_path = SCRIPT_DIR / "lora_schema.fbs" - - if "cpp" in languages: - generate_cpp(flatc, schema_path) - - -if __name__ == "__main__": - main() diff --git a/onnxruntime/lora/lora_format/lora_schema.fbs b/onnxruntime/lora/lora_format/lora_schema.fbs deleted file mode 100644 index 37e8195dab6f2..0000000000000 --- a/onnxruntime/lora/lora_format/lora_schema.fbs +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -namespace onnxruntime.lora; - -// Tensor -enum TensorDataType : int32 { - UNDEFINED = 0, - FLOAT = 1, - UINT8 = 2, - INT8 = 3, - UINT16 = 4, - INT16 = 5, - INT32 = 6, - INT64 = 7, - STRING = 8, - BOOL = 9, - FLOAT16 = 10, - DOUBLE = 11, - UINT32 = 12, - UINT64 = 13, - COMPLEX64 = 14, - COMPLEX128 = 15, - BFLOAT16 = 16, - FLOAT8E4M3FN = 17, - FLOAT8E4M3FNUZ = 18, - FLOAT8E5M2 = 19, - FLOAT8E5M2FNUZ = 20, -} - -// For simplicity, we will have only have one data field -// - raw_data for all primitive types. -// We do not foresee strings as parameters. -table Parameter { - name:string; - - dims:[int64]; - data_type:TensorDataType; - - raw_data:[uint8] (force_align : 8); -} - -table Adapter { - format_version:int; - adapter_version:int; - model_version:int; - parameters:[Parameter]; -} - -root_type Adapter; -file_identifier "GAIL"; diff --git a/onnxruntime/lora/lora_format/lora_schema.fbs.h b/onnxruntime/lora/lora_format/lora_schema.fbs.h deleted file mode 100644 index 097528d854bf8..0000000000000 --- a/onnxruntime/lora/lora_format/lora_schema.fbs.h +++ /dev/null @@ -1,338 +0,0 @@ -// automatically generated by the FlatBuffers compiler, do not modify - -#ifndef FLATBUFFERS_GENERATED_LORASCHEMA_ONNXRUNTIME_LORA_H_ -#define FLATBUFFERS_GENERATED_LORASCHEMA_ONNXRUNTIME_LORA_H_ - -#include "flatbuffers/flatbuffers.h" - -// Ensure the included flatbuffers.h is the same version as when this file was -// generated, otherwise it may not be compatible. -static_assert(FLATBUFFERS_VERSION_MAJOR == 23 && - FLATBUFFERS_VERSION_MINOR == 5 && - FLATBUFFERS_VERSION_REVISION == 26, - "Non-compatible flatbuffers version included"); - -namespace onnxruntime { -namespace lora { - -struct Parameter; -struct ParameterBuilder; - -struct Adapter; -struct AdapterBuilder; - -enum class TensorDataType : int32_t { - UNDEFINED = 0, - FLOAT = 1, - UINT8 = 2, - INT8 = 3, - UINT16 = 4, - INT16 = 5, - INT32 = 6, - INT64 = 7, - STRING = 8, - BOOL = 9, - FLOAT16 = 10, - DOUBLE = 11, - UINT32 = 12, - UINT64 = 13, - COMPLEX64 = 14, - COMPLEX128 = 15, - BFLOAT16 = 16, - FLOAT8E4M3FN = 17, - FLOAT8E4M3FNUZ = 18, - FLOAT8E5M2 = 19, - FLOAT8E5M2FNUZ = 20, - MIN = UNDEFINED, - MAX = FLOAT8E5M2FNUZ -}; - -inline const TensorDataType (&EnumValuesTensorDataType())[21] { - static const TensorDataType values[] = { - TensorDataType::UNDEFINED, - TensorDataType::FLOAT, - TensorDataType::UINT8, - TensorDataType::INT8, - TensorDataType::UINT16, - TensorDataType::INT16, - TensorDataType::INT32, - TensorDataType::INT64, - TensorDataType::STRING, - TensorDataType::BOOL, - TensorDataType::FLOAT16, - TensorDataType::DOUBLE, - TensorDataType::UINT32, - TensorDataType::UINT64, - TensorDataType::COMPLEX64, - TensorDataType::COMPLEX128, - TensorDataType::BFLOAT16, - TensorDataType::FLOAT8E4M3FN, - TensorDataType::FLOAT8E4M3FNUZ, - TensorDataType::FLOAT8E5M2, - TensorDataType::FLOAT8E5M2FNUZ}; - return values; -} - -inline const char* const* EnumNamesTensorDataType() { - static const char* const names[22] = { - "UNDEFINED", - "FLOAT", - "UINT8", - "INT8", - "UINT16", - "INT16", - "INT32", - "INT64", - "STRING", - "BOOL", - "FLOAT16", - "DOUBLE", - "UINT32", - "UINT64", - "COMPLEX64", - "COMPLEX128", - "BFLOAT16", - "FLOAT8E4M3FN", - "FLOAT8E4M3FNUZ", - "FLOAT8E5M2", - "FLOAT8E5M2FNUZ", - nullptr}; - return names; -} - -inline const char* EnumNameTensorDataType(TensorDataType e) { - if (::flatbuffers::IsOutRange(e, TensorDataType::UNDEFINED, TensorDataType::FLOAT8E5M2FNUZ)) return ""; - const size_t index = static_cast(e); - return EnumNamesTensorDataType()[index]; -} - -struct Parameter FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { - typedef ParameterBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_NAME = 4, - VT_DIMS = 6, - VT_DATA_TYPE = 8, - VT_RAW_DATA = 10 - }; - const ::flatbuffers::String* name() const { - return GetPointer(VT_NAME); - } - const ::flatbuffers::Vector* dims() const { - return GetPointer*>(VT_DIMS); - } - onnxruntime::lora::TensorDataType data_type() const { - return static_cast(GetField(VT_DATA_TYPE, 0)); - } - const ::flatbuffers::Vector* raw_data() const { - return GetPointer*>(VT_RAW_DATA); - } - bool Verify(::flatbuffers::Verifier& verifier) const { - return VerifyTableStart(verifier) && - VerifyOffset(verifier, VT_NAME) && - verifier.VerifyString(name()) && - VerifyOffset(verifier, VT_DIMS) && - verifier.VerifyVector(dims()) && - VerifyField(verifier, VT_DATA_TYPE, 4) && - VerifyOffset(verifier, VT_RAW_DATA) && - verifier.VerifyVector(raw_data()) && - verifier.EndTable(); - } -}; - -struct ParameterBuilder { - typedef Parameter Table; - ::flatbuffers::FlatBufferBuilder& fbb_; - ::flatbuffers::uoffset_t start_; - void add_name(::flatbuffers::Offset<::flatbuffers::String> name) { - fbb_.AddOffset(Parameter::VT_NAME, name); - } - void add_dims(::flatbuffers::Offset<::flatbuffers::Vector> dims) { - fbb_.AddOffset(Parameter::VT_DIMS, dims); - } - void add_data_type(onnxruntime::lora::TensorDataType data_type) { - fbb_.AddElement(Parameter::VT_DATA_TYPE, static_cast(data_type), 0); - } - void add_raw_data(::flatbuffers::Offset<::flatbuffers::Vector> raw_data) { - fbb_.AddOffset(Parameter::VT_RAW_DATA, raw_data); - } - explicit ParameterBuilder(::flatbuffers::FlatBufferBuilder& _fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - ::flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = ::flatbuffers::Offset(end); - return o; - } -}; - -inline ::flatbuffers::Offset CreateParameter( - ::flatbuffers::FlatBufferBuilder& _fbb, - ::flatbuffers::Offset<::flatbuffers::String> name = 0, - ::flatbuffers::Offset<::flatbuffers::Vector> dims = 0, - onnxruntime::lora::TensorDataType data_type = onnxruntime::lora::TensorDataType::UNDEFINED, - ::flatbuffers::Offset<::flatbuffers::Vector> raw_data = 0) { - ParameterBuilder builder_(_fbb); - builder_.add_raw_data(raw_data); - builder_.add_data_type(data_type); - builder_.add_dims(dims); - builder_.add_name(name); - return builder_.Finish(); -} - -inline ::flatbuffers::Offset CreateParameterDirect( - ::flatbuffers::FlatBufferBuilder& _fbb, - const char* name = nullptr, - const std::vector* dims = nullptr, - onnxruntime::lora::TensorDataType data_type = onnxruntime::lora::TensorDataType::UNDEFINED, - const std::vector* raw_data = nullptr) { - auto name__ = name ? _fbb.CreateString(name) : 0; - auto dims__ = dims ? _fbb.CreateVector(*dims) : 0; - if (raw_data) { - _fbb.ForceVectorAlignment(raw_data->size(), sizeof(uint8_t), 8); - } - auto raw_data__ = raw_data ? _fbb.CreateVector(*raw_data) : 0; - return onnxruntime::lora::CreateParameter( - _fbb, - name__, - dims__, - data_type, - raw_data__); -} - -struct Adapter FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { - typedef AdapterBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_FORMAT_VERSION = 4, - VT_ADAPTER_VERSION = 6, - VT_MODEL_VERSION = 8, - VT_PARAMETERS = 10 - }; - int32_t format_version() const { - return GetField(VT_FORMAT_VERSION, 0); - } - int32_t adapter_version() const { - return GetField(VT_ADAPTER_VERSION, 0); - } - int32_t model_version() const { - return GetField(VT_MODEL_VERSION, 0); - } - const ::flatbuffers::Vector<::flatbuffers::Offset>* parameters() const { - return GetPointer>*>(VT_PARAMETERS); - } - bool Verify(::flatbuffers::Verifier& verifier) const { - return VerifyTableStart(verifier) && - VerifyField(verifier, VT_FORMAT_VERSION, 4) && - VerifyField(verifier, VT_ADAPTER_VERSION, 4) && - VerifyField(verifier, VT_MODEL_VERSION, 4) && - VerifyOffset(verifier, VT_PARAMETERS) && - verifier.VerifyVector(parameters()) && - verifier.VerifyVectorOfTables(parameters()) && - verifier.EndTable(); - } -}; - -struct AdapterBuilder { - typedef Adapter Table; - ::flatbuffers::FlatBufferBuilder& fbb_; - ::flatbuffers::uoffset_t start_; - void add_format_version(int32_t format_version) { - fbb_.AddElement(Adapter::VT_FORMAT_VERSION, format_version, 0); - } - void add_adapter_version(int32_t adapter_version) { - fbb_.AddElement(Adapter::VT_ADAPTER_VERSION, adapter_version, 0); - } - void add_model_version(int32_t model_version) { - fbb_.AddElement(Adapter::VT_MODEL_VERSION, model_version, 0); - } - void add_parameters(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> parameters) { - fbb_.AddOffset(Adapter::VT_PARAMETERS, parameters); - } - explicit AdapterBuilder(::flatbuffers::FlatBufferBuilder& _fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - ::flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = ::flatbuffers::Offset(end); - return o; - } -}; - -inline ::flatbuffers::Offset CreateAdapter( - ::flatbuffers::FlatBufferBuilder& _fbb, - int32_t format_version = 0, - int32_t adapter_version = 0, - int32_t model_version = 0, - ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> parameters = 0) { - AdapterBuilder builder_(_fbb); - builder_.add_parameters(parameters); - builder_.add_model_version(model_version); - builder_.add_adapter_version(adapter_version); - builder_.add_format_version(format_version); - return builder_.Finish(); -} - -inline ::flatbuffers::Offset CreateAdapterDirect( - ::flatbuffers::FlatBufferBuilder& _fbb, - int32_t format_version = 0, - int32_t adapter_version = 0, - int32_t model_version = 0, - const std::vector<::flatbuffers::Offset>* parameters = nullptr) { - auto parameters__ = parameters ? _fbb.CreateVector<::flatbuffers::Offset>(*parameters) : 0; - return onnxruntime::lora::CreateAdapter( - _fbb, - format_version, - adapter_version, - model_version, - parameters__); -} - -inline const onnxruntime::lora::Adapter* GetAdapter(const void* buf) { - return ::flatbuffers::GetRoot(buf); -} - -inline const onnxruntime::lora::Adapter* GetSizePrefixedAdapter(const void* buf) { - return ::flatbuffers::GetSizePrefixedRoot(buf); -} - -inline const char* AdapterIdentifier() { - return "GAIL"; -} - -inline bool AdapterBufferHasIdentifier(const void* buf) { - return ::flatbuffers::BufferHasIdentifier( - buf, AdapterIdentifier()); -} - -inline bool SizePrefixedAdapterBufferHasIdentifier(const void* buf) { - return ::flatbuffers::BufferHasIdentifier( - buf, AdapterIdentifier(), true); -} - -inline bool VerifyAdapterBuffer( - ::flatbuffers::Verifier& verifier) { - return verifier.VerifyBuffer(AdapterIdentifier()); -} - -inline bool VerifySizePrefixedAdapterBuffer( - ::flatbuffers::Verifier& verifier) { - return verifier.VerifySizePrefixedBuffer(AdapterIdentifier()); -} - -inline void FinishAdapterBuffer( - ::flatbuffers::FlatBufferBuilder& fbb, - ::flatbuffers::Offset root) { - fbb.Finish(root, AdapterIdentifier()); -} - -inline void FinishSizePrefixedAdapterBuffer( - ::flatbuffers::FlatBufferBuilder& fbb, - ::flatbuffers::Offset root) { - fbb.FinishSizePrefixed(root, AdapterIdentifier()); -} - -} // namespace lora -} // namespace onnxruntime - -#endif // FLATBUFFERS_GENERATED_LORASCHEMA_ONNXRUNTIME_LORA_H_ diff --git a/onnxruntime/lora/lora_format_utils.cc b/onnxruntime/lora/lora_format_utils.cc deleted file mode 100644 index 9a4c1ce6f2415..0000000000000 --- a/onnxruntime/lora/lora_format_utils.cc +++ /dev/null @@ -1,158 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -#include "lora_format_utils.h" -#include "lora_format_version.h" - -#include "core/common/common.h" -#include "core/common/span_utils.h" -#include "core/framework/ortdevice.h" -#include "core/framework/ortmemoryinfo.h" -#include "core/framework/ort_value.h" -#include "core/framework/tensor.h" - -#include - -namespace onnxruntime { -namespace lora { -namespace utils { - -bool IsLoraFormatModelBytes(const void* bytes, size_t num_bytes) { - return num_bytes > 8 && // check buffer is large enough to contain identifier so we don't read random memory - AdapterBufferHasIdentifier(bytes); -} - -flatbuffers::Offset SaveStringToLoraFormat(flatbuffers::FlatBufferBuilder& builder, - bool has_string, const std::string& src) { - if (has_string) return builder.CreateString(src); - - // If the string does not exist, return 0 (the string does not exist in flatbuffer) - return 0; -} - -void LoadStringFromLoraFormat(std::string& dst, const flatbuffers::String* fbs_string) { - if (fbs_string) { - dst = fbs_string->str(); - } -} - -std::vector LoadLoraAdapterBytes(const std::filesystem::path& file_path) { - Env& env = Env::Default(); - - size_t file_size = 0; - ORT_THROW_IF_ERROR(env.GetFileLength(file_path.c_str(), file_size)); - - std::vector result; - result.resize(file_size); - - // The API accepts char span, so we need to reinterpret the uint8_t span as char span - auto dest_span = ReinterpretAsSpan(AsSpan(result)); - ORT_THROW_IF_ERROR(env.ReadFileIntoBuffer(file_path.c_str(), 0, file_size, dest_span)); - - return result; -} - -std::pair MemoryMapAdapterFile(const std::filesystem::path& file_path) { - Env& env = Env::Default(); - - size_t file_size = 0; - ORT_THROW_IF_ERROR(env.GetFileLength(file_path.c_str(), file_size)); - - Env::MappedMemoryPtr result; - ORT_THROW_IF_ERROR(env.MapFileIntoMemory(file_path.c_str(), 0, file_size, result)); - - return {std::move(result), file_size}; -} - -const Adapter* ValidateAndGetAdapterFromBytes(gsl::span bytes) { - if (!IsLoraFormatModelBytes(bytes.data(), bytes.size())) { - ORT_THROW("The buffer does not appear to be a valid lora parameter format"); - } - - flatbuffers::Verifier verifier(bytes.data(), bytes.size()); - if (!VerifyAdapterBuffer(verifier)) { - ORT_THROW("The buffer fails lora adapter format verification"); - } - - auto* adapter = GetAdapter(bytes.data()); - if (!IsLoraFormatVersionSupported(adapter->format_version())) { - ORT_THROW("Unsupported lora format version"); - } - - return adapter; -} - -void SaveLoraParameter(flatbuffers::FlatBufferBuilder& flat_builder, std::string_view name, - TensorDataType data_type, gsl::span shape, - gsl::span data, - flatbuffers::Offset& fbs_tensor) { - auto name_str = (name.empty()) ? 0 : flat_builder.CreateString(name.data(), name.size()); - auto shape_vec = flat_builder.CreateVector(shape.data(), shape.size()); - auto data_vec = flat_builder.CreateVector(data.data(), data.size()); - - fbs_tensor = CreateParameter(flat_builder, name_str, shape_vec, data_type, data_vec); -} - -std::pair CreateOrtValueOverLoraParameter(const Parameter& param) { - OrtValue result; - - std::string name; - LoadStringFromLoraFormat(name, param.name()); - - const auto data_type = param.data_type(); - gsl::span shape_span(param.dims()->data(), param.dims()->size()); - - static const OrtMemoryInfo cpu_meminfo(CPU, OrtAllocatorType::OrtDeviceAllocator); - - auto elem_type = DataTypeImpl::TensorTypeFromONNXEnum(static_cast(data_type))->GetElementType(); - // const_cast is necessery due to Tensor class API - Tensor::InitOrtValue(elem_type, - TensorShape(shape_span), - const_cast(param.raw_data()->data()), - cpu_meminfo, - result); - - return std::make_pair(std::move(name), std::move(result)); -} - -OrtValue CreateOrtValueOnDevice(const OrtValue& ort_value_mapped, const AllocatorPtr& device_allocator) { - OrtValue result; - - const auto& tensor = ort_value_mapped.Get(); - Tensor on_device(tensor.DataType(), tensor.Shape(), device_allocator); - - return result; -} - -void AdapterFormatBuilder::AddParameter(const std::string& name, lora::TensorDataType data_type, - gsl::span shape, gsl::span data) { - flatbuffers::Offset fbs_param; - SaveLoraParameter(builder_, name, data_type, shape, data, fbs_param); - params_.push_back(fbs_param); -} - -std::vector AdapterFormatBuilder::Finish(int adapter_version, int model_version) { - FinishImpl(adapter_version, model_version); - - std::vector result; - result.reserve(builder_.GetSize()); - gsl::span buffer(builder_.GetBufferPointer(), builder_.GetSize()); - std::copy(buffer.begin(), buffer.end(), std::back_inserter(result)); - return result; -} - -gsl::span AdapterFormatBuilder::FinishWithSpan(int adapter_version, int model_version) { - FinishImpl(adapter_version, model_version); - return gsl::make_span(builder_.GetBufferPointer(), builder_.GetSize()); -} - -void AdapterFormatBuilder::FinishImpl(int adapter_version, int model_version) { - auto fbs_params = builder_.CreateVector(params_); - auto fbs_adapter = lora::CreateAdapter(builder_, lora::kLoraFormatVersion, adapter_version, - model_version, fbs_params); - builder_.Finish(fbs_adapter, lora::AdapterIdentifier()); -} - -} // namespace utils -} // namespace lora -} // namespace onnxruntime diff --git a/onnxruntime/lora/lora_format_utils.h b/onnxruntime/lora/lora_format_utils.h deleted file mode 100644 index e7e341945f2ca..0000000000000 --- a/onnxruntime/lora/lora_format_utils.h +++ /dev/null @@ -1,138 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -#pragma once - -#include "core/common/flatbuffers.h" -#include "core/framework/allocator.h" -#include "core/platform/env.h" - -#include -#include - -#include "lora_format/lora_schema.fbs.h" - -#include -#include -#include -#include - -struct OrtValue; - -namespace onnxruntime { -namespace lora { -namespace utils { - -/// -/// Helper class to serialize Lora adapter -/// -class AdapterFormatBuilder { - public: - AdapterFormatBuilder() = default; - - /// - /// Appends parameter tensor to the adapter builder - /// - /// parameter name - /// - /// - /// - void AddParameter(const std::string& name, lora::TensorDataType data_type, - gsl::span shape, gsl::span data); - - /// - /// Finishes serialization and returns a serialized byte vector - /// - /// - /// - /// - std::vector Finish(int adapter_version, int model_version); - - /// - /// Finishes serialization and returns a span to internal buffer. - /// - /// - /// - /// - gsl::span FinishWithSpan(int adapter_version, int model_version); - - private: - void FinishImpl(int adapter_version, int model_version); - - flatbuffers::FlatBufferBuilder builder_; - std::vector> params_; -}; - -/// -/// -/// -/// -/// -/// -bool IsLoraFormatModelBytes(const void* bytes, size_t num_bytes); - -// Will only create string in flatbuffers when has_string is true -flatbuffers::Offset SaveStringToLoraFormat(flatbuffers::FlatBufferBuilder& builder, - bool has_string, const std::string& src); - -void LoadStringFromLoraFormat(std::string& dst, const flatbuffers::String* fbs_string); - -/// -/// The function loads the lora adapter bytes from the file system -/// -/// file path -/// bytes in a vector -/// If the path can not be found -std::vector LoadLoraAdapterBytes(const std::filesystem::path& file_path); - -/// -/// This function memory maps the adapter file in memory -/// -/// -/// memory handle and file size in a tuple -std::pair MemoryMapAdapterFile(const std::filesystem::path& file_path); - -/// -/// Validates underlying format and the format version -/// -/// -/// Adapter ptr -const Adapter* ValidateAndGetAdapterFromBytes(gsl::span bytes); - -/// -/// Serializes tensor data into flatbuffer -/// -/// -/// parameter name -/// doc, optional -/// -/// -/// -/// output offset -void SaveLoraParameter(flatbuffers::FlatBufferBuilder& flat_builder, std::string_view name, - lora::TensorDataType data_type, - gsl::span shape, gsl::span data, - flatbuffers::Offset& fbs_tensor); - -/// -/// Create an OrtValue on top of the flatbuffer tensor -/// No copying of data is done here. The caller is responsible for managing the lifetime of flatbuffer -/// structures. -/// -/// In this scenario, one can memory map the entire flatbuffer tensor data into OrtValue without copying. -/// -/// -/// -std::pair CreateOrtValueOverLoraParameter(const Parameter& param); - -/// -/// Allocates OrtValue on specified device and copies data there -/// -/// parameter on CPU -/// supplied device allocator -/// -OrtValue CreateOrtValueOnDevice(const OrtValue& ort_value_mapped, const AllocatorPtr& device_allocator); - -} // namespace utils -} // namespace lora -} // namespace onnxruntime diff --git a/onnxruntime/lora/lora_format_version.h b/onnxruntime/lora/lora_format_version.h deleted file mode 100644 index 9c90a86b16382..0000000000000 --- a/onnxruntime/lora/lora_format_version.h +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -#pragma once - -#include -#include - -namespace onnxruntime { -namespace lora { - -// The current model versions for saving lora parameters in flatbuffers -// Once this version is updated, the kSupportedLoraFormatVersions in IsGenAiLoraFormatModelBytes -// below will also need to be updated. -// See src/flatbuffers/schema/README.md for more details on versioning. -// Version 1 - history begins -constexpr const int kLoraFormatVersion = 1; - -// Check if the given lora format version is supported in this build -inline bool IsLoraFormatVersionSupported(const int lora_format_version) { - // The lora format versions we will support in this build - // This may contain more versions than the kLoraFormatVersion, based on the compatibilities - static constexpr std::array kSupportedLoraFormatVersions{ - kLoraFormatVersion, - }; - - const auto it = - std::find(kSupportedLoraFormatVersions.begin(), kSupportedLoraFormatVersions.end(), lora_format_version); - return it != kSupportedLoraFormatVersions.cend(); -} - -} // namespace lora -} // namespace onnxruntime diff --git a/onnxruntime/python/onnxruntime_pybind_lora.cc b/onnxruntime/python/onnxruntime_pybind_lora.cc index c99236498e5b6..72c92abcf0539 100644 --- a/onnxruntime/python/onnxruntime_pybind_lora.cc +++ b/onnxruntime/python/onnxruntime_pybind_lora.cc @@ -144,7 +144,8 @@ void addAdapterFormatMethods(pybind11::module& m) { py::class_ lora_adapter_binding(m, "LoraAdapter"); lora_adapter_binding.def(py::init()) - .def("Load", [](lora::LoraAdapter* adapter, const std::wstring& file_path) { adapter->Load(file_path); }, R"pbdoc(Memory map the specified file as LoraAdapter)pbdoc"); + .def("Load", [](lora::LoraAdapter* adapter, const std::wstring& file_path) { adapter->MemoryMap(file_path); }, + R"pbdoc(Memory map the specified file as LoraAdapter)pbdoc"); } } // namespace python