diff --git a/onnxruntime/lora/lora_format/README.md b/onnxruntime/lora/lora_format/README.md
deleted file mode 100644
index d28f47186cbea..0000000000000
--- a/onnxruntime/lora/lora_format/README.md
+++ /dev/null
@@ -1,36 +0,0 @@
-# Lora Parameters Flatbuffer Schemas
-This directory contains [ONNXRuntime Lora Parameter format schema](lora_schema.fbs) and [the generated C++ header file](lora_schema.fbs.h) for the
-Lora Parameters file format. This file format is defined as means to deliver Lora parameters so it can read by ONNXRuntime C++ code.
-
-The format format is generally designed to house a single Lora adapter named Lora parameters.
-
-[ONNXRuntime Lora Parameter file format schema](lora_schema.fbs) uses the [FlatBuffers](https://github.com/google/flatbuffers) serialization library.
-
-Please do not directly modify the generated C++ header file for [ONNXRuntime Lora Parameter file format]((lora_schema.fbs.h)).
-
-Use flatc compiler for the purpose.
-
-e.g.
-  - Windows Debug build
-    - \build\Windows\Debug\_deps\flatbuffers-build\Debug\flatc.exe
-  - Linux Debug build
-    - /build/Linux/Debug/_deps/flatbuffers-build/flatc
-
-It is possible to use another flatc as well, e.g., from a separate installation.
-
-To update the flatbuffers schemas and generated files:
-1. Modify [ONNXRuntime Lora Parameter file format schema](lora_schema.fbs).
-2. Run [compile_schema.py](./compile_schema.py) to generate the C++ bindings.
-
-    ```
-    python onnxruntime/lora/lora_format/compile_schema.py --flatc <path to flatc>
-    ```
-# Lora format version history
-In [lora_format_version.h](../lora_format_version.h), see `IsLoraParameterslVersionSupported()` for the supported versions and
-`kLoraParametersVersion` for the current version.
-
-## Version 1
-History begins.
-
-Initial support for FlatBuffers that Lora Parameters support. This includes a definition of Tensor entity
-so it can be saved in a tensor per file format.
diff --git a/onnxruntime/lora/lora_format/compile_schema.py b/onnxruntime/lora/lora_format/compile_schema.py
deleted file mode 100644
index f98db367ae83b..0000000000000
--- a/onnxruntime/lora/lora_format/compile_schema.py
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT License.
-
-import argparse
-import pathlib
-import subprocess
-
-SCRIPT_DIR = pathlib.Path(__file__).parent.resolve()
-
-
-def generate_cpp(flatc: pathlib.Path, schema_path: pathlib.Path):
-    # run flatc to generate C++ code
-    cmd = [str(flatc), "--cpp", "--scoped-enums", "--filename-suffix", ".fbs", str(schema_path)]
-    subprocess.run(cmd, check=True, cwd=SCRIPT_DIR)
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        description="Generate language bindings for the ORT flatbuffers schema.",
-        usage="Provide the path to the flatbuffers flatc executable. "
-        "Script can be executed from anywhere but must be located in its original "
-        "directory in the ONNX Runtime enlistment.",
-    )
-
-    parser.add_argument(
-        "-f",
-        "--flatc",
-        required=True,
-        type=pathlib.Path,
-        help="Path to flatbuffers flatc executable. "
-        "Can be found in the build directory under _deps/flatbuffers-build/<config>/",
-    )
-
-    all_languages = ["cpp"]
-    parser.add_argument(
-        "-l",
-        "--language",
-        action="append",
-        dest="languages",
-        choices=all_languages,
-        help="Specify which language bindings to generate.",
-    )
-
-    args = parser.parse_args()
-    languages = args.languages if args.languages is not None else all_languages
-    flatc = args.flatc.resolve(strict=True)
-    schema_path = SCRIPT_DIR / "lora_schema.fbs"
-
-    if "cpp" in languages:
-        generate_cpp(flatc, schema_path)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/onnxruntime/lora/lora_format/lora_schema.fbs b/onnxruntime/lora/lora_format/lora_schema.fbs
deleted file mode 100644
index 37e8195dab6f2..0000000000000
--- a/onnxruntime/lora/lora_format/lora_schema.fbs
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-namespace onnxruntime.lora;
-
-// Tensor
-enum TensorDataType : int32 {
-  UNDEFINED = 0,
-  FLOAT = 1,
-  UINT8 = 2,
-  INT8 = 3,
-  UINT16 = 4,
-  INT16 = 5,
-  INT32 = 6,
-  INT64 = 7,
-  STRING = 8,
-  BOOL = 9,
-  FLOAT16 = 10,
-  DOUBLE = 11,
-  UINT32 = 12,
-  UINT64 = 13,
-  COMPLEX64 = 14,
-  COMPLEX128 = 15,
-  BFLOAT16 = 16,
-  FLOAT8E4M3FN = 17,
-  FLOAT8E4M3FNUZ = 18,
-  FLOAT8E5M2 = 19,
-  FLOAT8E5M2FNUZ = 20,
-}
-
-// For simplicity, we will have only have one data field
-// - raw_data for all primitive types.
-// We do not foresee strings as parameters.
-table Parameter {
-  name:string;
-
-  dims:[int64];
-  data_type:TensorDataType;
-
-  raw_data:[uint8] (force_align : 8);
-}
-
-table Adapter {
-  format_version:int;
-  adapter_version:int;
-  model_version:int;
-  parameters:[Parameter];
-}
-
-root_type Adapter;
-file_identifier "GAIL";
diff --git a/onnxruntime/lora/lora_format/lora_schema.fbs.h b/onnxruntime/lora/lora_format/lora_schema.fbs.h
deleted file mode 100644
index 097528d854bf8..0000000000000
--- a/onnxruntime/lora/lora_format/lora_schema.fbs.h
+++ /dev/null
@@ -1,338 +0,0 @@
-// automatically generated by the FlatBuffers compiler, do not modify
-
-#ifndef FLATBUFFERS_GENERATED_LORASCHEMA_ONNXRUNTIME_LORA_H_
-#define FLATBUFFERS_GENERATED_LORASCHEMA_ONNXRUNTIME_LORA_H_
-
-#include "flatbuffers/flatbuffers.h"
-
-// Ensure the included flatbuffers.h is the same version as when this file was
-// generated, otherwise it may not be compatible.
-static_assert(FLATBUFFERS_VERSION_MAJOR == 23 &&
-                  FLATBUFFERS_VERSION_MINOR == 5 &&
-                  FLATBUFFERS_VERSION_REVISION == 26,
-              "Non-compatible flatbuffers version included");
-
-namespace onnxruntime {
-namespace lora {
-
-struct Parameter;
-struct ParameterBuilder;
-
-struct Adapter;
-struct AdapterBuilder;
-
-enum class TensorDataType : int32_t {
-  UNDEFINED = 0,
-  FLOAT = 1,
-  UINT8 = 2,
-  INT8 = 3,
-  UINT16 = 4,
-  INT16 = 5,
-  INT32 = 6,
-  INT64 = 7,
-  STRING = 8,
-  BOOL = 9,
-  FLOAT16 = 10,
-  DOUBLE = 11,
-  UINT32 = 12,
-  UINT64 = 13,
-  COMPLEX64 = 14,
-  COMPLEX128 = 15,
-  BFLOAT16 = 16,
-  FLOAT8E4M3FN = 17,
-  FLOAT8E4M3FNUZ = 18,
-  FLOAT8E5M2 = 19,
-  FLOAT8E5M2FNUZ = 20,
-  MIN = UNDEFINED,
-  MAX = FLOAT8E5M2FNUZ
-};
-
-inline const TensorDataType (&EnumValuesTensorDataType())[21] {
-  static const TensorDataType values[] = {
-      TensorDataType::UNDEFINED,
-      TensorDataType::FLOAT,
-      TensorDataType::UINT8,
-      TensorDataType::INT8,
-      TensorDataType::UINT16,
-      TensorDataType::INT16,
-      TensorDataType::INT32,
-      TensorDataType::INT64,
-      TensorDataType::STRING,
-      TensorDataType::BOOL,
-      TensorDataType::FLOAT16,
-      TensorDataType::DOUBLE,
-      TensorDataType::UINT32,
-      TensorDataType::UINT64,
-      TensorDataType::COMPLEX64,
-      TensorDataType::COMPLEX128,
-      TensorDataType::BFLOAT16,
-      TensorDataType::FLOAT8E4M3FN,
-      TensorDataType::FLOAT8E4M3FNUZ,
-      TensorDataType::FLOAT8E5M2,
-      TensorDataType::FLOAT8E5M2FNUZ};
-  return values;
-}
-
-inline const char* const* EnumNamesTensorDataType() {
-  static const char* const names[22] = {
-      "UNDEFINED",
-      "FLOAT",
-      "UINT8",
-      "INT8",
-      "UINT16",
-      "INT16",
-      "INT32",
-      "INT64",
-      "STRING",
-      "BOOL",
-      "FLOAT16",
-      "DOUBLE",
-      "UINT32",
-      "UINT64",
-      "COMPLEX64",
-      "COMPLEX128",
-      "BFLOAT16",
-      "FLOAT8E4M3FN",
-      "FLOAT8E4M3FNUZ",
-      "FLOAT8E5M2",
-      "FLOAT8E5M2FNUZ",
-      nullptr};
-  return names;
-}
-
-inline const char* EnumNameTensorDataType(TensorDataType e) {
-  if (::flatbuffers::IsOutRange(e, TensorDataType::UNDEFINED, TensorDataType::FLOAT8E5M2FNUZ)) return "";
-  const size_t index = static_cast<size_t>(e);
-  return EnumNamesTensorDataType()[index];
-}
-
-struct Parameter FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table {
-  typedef ParameterBuilder Builder;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_NAME = 4,
-    VT_DIMS = 6,
-    VT_DATA_TYPE = 8,
-    VT_RAW_DATA = 10
-  };
-  const ::flatbuffers::String* name() const {
-    return GetPointer<const ::flatbuffers::String*>(VT_NAME);
-  }
-  const ::flatbuffers::Vector<int64_t>* dims() const {
-    return GetPointer<const ::flatbuffers::Vector<int64_t>*>(VT_DIMS);
-  }
-  onnxruntime::lora::TensorDataType data_type() const {
-    return static_cast<onnxruntime::lora::TensorDataType>(GetField<int32_t>(VT_DATA_TYPE, 0));
-  }
-  const ::flatbuffers::Vector<uint8_t>* raw_data() const {
-    return GetPointer<const ::flatbuffers::Vector<uint8_t>*>(VT_RAW_DATA);
-  }
-  bool Verify(::flatbuffers::Verifier& verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyOffset(verifier, VT_NAME) &&
-           verifier.VerifyString(name()) &&
-           VerifyOffset(verifier, VT_DIMS) &&
-           verifier.VerifyVector(dims()) &&
-           VerifyField<int32_t>(verifier, VT_DATA_TYPE, 4) &&
-           VerifyOffset(verifier, VT_RAW_DATA) &&
-           verifier.VerifyVector(raw_data()) &&
-           verifier.EndTable();
-  }
-};
-
-struct ParameterBuilder {
-  typedef Parameter Table;
-  ::flatbuffers::FlatBufferBuilder& fbb_;
-  ::flatbuffers::uoffset_t start_;
-  void add_name(::flatbuffers::Offset<::flatbuffers::String> name) {
-    fbb_.AddOffset(Parameter::VT_NAME, name);
-  }
-  void add_dims(::flatbuffers::Offset<::flatbuffers::Vector<int64_t>> dims) {
-    fbb_.AddOffset(Parameter::VT_DIMS, dims);
-  }
-  void add_data_type(onnxruntime::lora::TensorDataType data_type) {
-    fbb_.AddElement<int32_t>(Parameter::VT_DATA_TYPE, static_cast<int32_t>(data_type), 0);
-  }
-  void add_raw_data(::flatbuffers::Offset<::flatbuffers::Vector<uint8_t>> raw_data) {
-    fbb_.AddOffset(Parameter::VT_RAW_DATA, raw_data);
-  }
-  explicit ParameterBuilder(::flatbuffers::FlatBufferBuilder& _fbb)
-      : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  ::flatbuffers::Offset<Parameter> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = ::flatbuffers::Offset<Parameter>(end);
-    return o;
-  }
-};
-
-inline ::flatbuffers::Offset<Parameter> CreateParameter(
-    ::flatbuffers::FlatBufferBuilder& _fbb,
-    ::flatbuffers::Offset<::flatbuffers::String> name = 0,
-    ::flatbuffers::Offset<::flatbuffers::Vector<int64_t>> dims = 0,
-    onnxruntime::lora::TensorDataType data_type = onnxruntime::lora::TensorDataType::UNDEFINED,
-    ::flatbuffers::Offset<::flatbuffers::Vector<uint8_t>> raw_data = 0) {
-  ParameterBuilder builder_(_fbb);
-  builder_.add_raw_data(raw_data);
-  builder_.add_data_type(data_type);
-  builder_.add_dims(dims);
-  builder_.add_name(name);
-  return builder_.Finish();
-}
-
-inline ::flatbuffers::Offset<Parameter> CreateParameterDirect(
-    ::flatbuffers::FlatBufferBuilder& _fbb,
-    const char* name = nullptr,
-    const std::vector<int64_t>* dims = nullptr,
-    onnxruntime::lora::TensorDataType data_type = onnxruntime::lora::TensorDataType::UNDEFINED,
-    const std::vector<uint8_t>* raw_data = nullptr) {
-  auto name__ = name ? _fbb.CreateString(name) : 0;
-  auto dims__ = dims ? _fbb.CreateVector<int64_t>(*dims) : 0;
-  if (raw_data) {
-    _fbb.ForceVectorAlignment(raw_data->size(), sizeof(uint8_t), 8);
-  }
-  auto raw_data__ = raw_data ? _fbb.CreateVector<uint8_t>(*raw_data) : 0;
-  return onnxruntime::lora::CreateParameter(
-      _fbb,
-      name__,
-      dims__,
-      data_type,
-      raw_data__);
-}
-
-struct Adapter FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table {
-  typedef AdapterBuilder Builder;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_FORMAT_VERSION = 4,
-    VT_ADAPTER_VERSION = 6,
-    VT_MODEL_VERSION = 8,
-    VT_PARAMETERS = 10
-  };
-  int32_t format_version() const {
-    return GetField<int32_t>(VT_FORMAT_VERSION, 0);
-  }
-  int32_t adapter_version() const {
-    return GetField<int32_t>(VT_ADAPTER_VERSION, 0);
-  }
-  int32_t model_version() const {
-    return GetField<int32_t>(VT_MODEL_VERSION, 0);
-  }
-  const ::flatbuffers::Vector<::flatbuffers::Offset<onnxruntime::lora::Parameter>>* parameters() const {
-    return GetPointer<const ::flatbuffers::Vector<::flatbuffers::Offset<onnxruntime::lora::Parameter>>*>(VT_PARAMETERS);
-  }
-  bool Verify(::flatbuffers::Verifier& verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int32_t>(verifier, VT_FORMAT_VERSION, 4) &&
-           VerifyField<int32_t>(verifier, VT_ADAPTER_VERSION, 4) &&
-           VerifyField<int32_t>(verifier, VT_MODEL_VERSION, 4) &&
-           VerifyOffset(verifier, VT_PARAMETERS) &&
-           verifier.VerifyVector(parameters()) &&
-           verifier.VerifyVectorOfTables(parameters()) &&
-           verifier.EndTable();
-  }
-};
-
-struct AdapterBuilder {
-  typedef Adapter Table;
-  ::flatbuffers::FlatBufferBuilder& fbb_;
-  ::flatbuffers::uoffset_t start_;
-  void add_format_version(int32_t format_version) {
-    fbb_.AddElement<int32_t>(Adapter::VT_FORMAT_VERSION, format_version, 0);
-  }
-  void add_adapter_version(int32_t adapter_version) {
-    fbb_.AddElement<int32_t>(Adapter::VT_ADAPTER_VERSION, adapter_version, 0);
-  }
-  void add_model_version(int32_t model_version) {
-    fbb_.AddElement<int32_t>(Adapter::VT_MODEL_VERSION, model_version, 0);
-  }
-  void add_parameters(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset<onnxruntime::lora::Parameter>>> parameters) {
-    fbb_.AddOffset(Adapter::VT_PARAMETERS, parameters);
-  }
-  explicit AdapterBuilder(::flatbuffers::FlatBufferBuilder& _fbb)
-      : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  ::flatbuffers::Offset<Adapter> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = ::flatbuffers::Offset<Adapter>(end);
-    return o;
-  }
-};
-
-inline ::flatbuffers::Offset<Adapter> CreateAdapter(
-    ::flatbuffers::FlatBufferBuilder& _fbb,
-    int32_t format_version = 0,
-    int32_t adapter_version = 0,
-    int32_t model_version = 0,
-    ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset<onnxruntime::lora::Parameter>>> parameters = 0) {
-  AdapterBuilder builder_(_fbb);
-  builder_.add_parameters(parameters);
-  builder_.add_model_version(model_version);
-  builder_.add_adapter_version(adapter_version);
-  builder_.add_format_version(format_version);
-  return builder_.Finish();
-}
-
-inline ::flatbuffers::Offset<Adapter> CreateAdapterDirect(
-    ::flatbuffers::FlatBufferBuilder& _fbb,
-    int32_t format_version = 0,
-    int32_t adapter_version = 0,
-    int32_t model_version = 0,
-    const std::vector<::flatbuffers::Offset<onnxruntime::lora::Parameter>>* parameters = nullptr) {
-  auto parameters__ = parameters ? _fbb.CreateVector<::flatbuffers::Offset<onnxruntime::lora::Parameter>>(*parameters) : 0;
-  return onnxruntime::lora::CreateAdapter(
-      _fbb,
-      format_version,
-      adapter_version,
-      model_version,
-      parameters__);
-}
-
-inline const onnxruntime::lora::Adapter* GetAdapter(const void* buf) {
-  return ::flatbuffers::GetRoot<onnxruntime::lora::Adapter>(buf);
-}
-
-inline const onnxruntime::lora::Adapter* GetSizePrefixedAdapter(const void* buf) {
-  return ::flatbuffers::GetSizePrefixedRoot<onnxruntime::lora::Adapter>(buf);
-}
-
-inline const char* AdapterIdentifier() {
-  return "GAIL";
-}
-
-inline bool AdapterBufferHasIdentifier(const void* buf) {
-  return ::flatbuffers::BufferHasIdentifier(
-      buf, AdapterIdentifier());
-}
-
-inline bool SizePrefixedAdapterBufferHasIdentifier(const void* buf) {
-  return ::flatbuffers::BufferHasIdentifier(
-      buf, AdapterIdentifier(), true);
-}
-
-inline bool VerifyAdapterBuffer(
-    ::flatbuffers::Verifier& verifier) {
-  return verifier.VerifyBuffer<onnxruntime::lora::Adapter>(AdapterIdentifier());
-}
-
-inline bool VerifySizePrefixedAdapterBuffer(
-    ::flatbuffers::Verifier& verifier) {
-  return verifier.VerifySizePrefixedBuffer<onnxruntime::lora::Adapter>(AdapterIdentifier());
-}
-
-inline void FinishAdapterBuffer(
-    ::flatbuffers::FlatBufferBuilder& fbb,
-    ::flatbuffers::Offset<onnxruntime::lora::Adapter> root) {
-  fbb.Finish(root, AdapterIdentifier());
-}
-
-inline void FinishSizePrefixedAdapterBuffer(
-    ::flatbuffers::FlatBufferBuilder& fbb,
-    ::flatbuffers::Offset<onnxruntime::lora::Adapter> root) {
-  fbb.FinishSizePrefixed(root, AdapterIdentifier());
-}
-
-}  // namespace lora
-}  // namespace onnxruntime
-
-#endif  // FLATBUFFERS_GENERATED_LORASCHEMA_ONNXRUNTIME_LORA_H_
diff --git a/onnxruntime/lora/lora_format_utils.cc b/onnxruntime/lora/lora_format_utils.cc
deleted file mode 100644
index 9a4c1ce6f2415..0000000000000
--- a/onnxruntime/lora/lora_format_utils.cc
+++ /dev/null
@@ -1,158 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-#include "lora_format_utils.h"
-#include "lora_format_version.h"
-
-#include "core/common/common.h"
-#include "core/common/span_utils.h"
-#include "core/framework/ortdevice.h"
-#include "core/framework/ortmemoryinfo.h"
-#include "core/framework/ort_value.h"
-#include "core/framework/tensor.h"
-
-#include <fstream>
-
-namespace onnxruntime {
-namespace lora {
-namespace utils {
-
-bool IsLoraFormatModelBytes(const void* bytes, size_t num_bytes) {
-  return num_bytes > 8 &&  // check buffer is large enough to contain identifier so we don't read random memory
-         AdapterBufferHasIdentifier(bytes);
-}
-
-flatbuffers::Offset<flatbuffers::String> SaveStringToLoraFormat(flatbuffers::FlatBufferBuilder& builder,
-                                                                bool has_string, const std::string& src) {
-  if (has_string) return builder.CreateString(src);
-
-  // If the string does not exist, return 0 (the string does not exist in flatbuffer)
-  return 0;
-}
-
-void LoadStringFromLoraFormat(std::string& dst, const flatbuffers::String* fbs_string) {
-  if (fbs_string) {
-    dst = fbs_string->str();
-  }
-}
-
-std::vector<uint8_t> LoadLoraAdapterBytes(const std::filesystem::path& file_path) {
-  Env& env = Env::Default();
-
-  size_t file_size = 0;
-  ORT_THROW_IF_ERROR(env.GetFileLength(file_path.c_str(), file_size));
-
-  std::vector<uint8_t> result;
-  result.resize(file_size);
-
-  // The API accepts char span, so we need to reinterpret the uint8_t span as char span
-  auto dest_span = ReinterpretAsSpan<char>(AsSpan(result));
-  ORT_THROW_IF_ERROR(env.ReadFileIntoBuffer(file_path.c_str(), 0, file_size, dest_span));
-
-  return result;
-}
-
-std::pair<Env::MappedMemoryPtr, size_t> MemoryMapAdapterFile(const std::filesystem::path& file_path) {
-  Env& env = Env::Default();
-
-  size_t file_size = 0;
-  ORT_THROW_IF_ERROR(env.GetFileLength(file_path.c_str(), file_size));
-
-  Env::MappedMemoryPtr result;
-  ORT_THROW_IF_ERROR(env.MapFileIntoMemory(file_path.c_str(), 0, file_size, result));
-
-  return {std::move(result), file_size};
-}
-
-const Adapter* ValidateAndGetAdapterFromBytes(gsl::span<const uint8_t> bytes) {
-  if (!IsLoraFormatModelBytes(bytes.data(), bytes.size())) {
-    ORT_THROW("The buffer does not appear to be a valid lora parameter format");
-  }
-
-  flatbuffers::Verifier verifier(bytes.data(), bytes.size());
-  if (!VerifyAdapterBuffer(verifier)) {
-    ORT_THROW("The buffer fails lora adapter format verification");
-  }
-
-  auto* adapter = GetAdapter(bytes.data());
-  if (!IsLoraFormatVersionSupported(adapter->format_version())) {
-    ORT_THROW("Unsupported lora format version");
-  }
-
-  return adapter;
-}
-
-void SaveLoraParameter(flatbuffers::FlatBufferBuilder& flat_builder, std::string_view name,
-                       TensorDataType data_type, gsl::span<const int64_t> shape,
-                       gsl::span<const uint8_t> data,
-                       flatbuffers::Offset<Parameter>& fbs_tensor) {
-  auto name_str = (name.empty()) ? 0 : flat_builder.CreateString(name.data(), name.size());
-  auto shape_vec = flat_builder.CreateVector(shape.data(), shape.size());
-  auto data_vec = flat_builder.CreateVector(data.data(), data.size());
-
-  fbs_tensor = CreateParameter(flat_builder, name_str, shape_vec, data_type, data_vec);
-}
-
-std::pair<std::string, OrtValue> CreateOrtValueOverLoraParameter(const Parameter& param) {
-  OrtValue result;
-
-  std::string name;
-  LoadStringFromLoraFormat(name, param.name());
-
-  const auto data_type = param.data_type();
-  gsl::span<const int64_t> shape_span(param.dims()->data(), param.dims()->size());
-
-  static const OrtMemoryInfo cpu_meminfo(CPU, OrtAllocatorType::OrtDeviceAllocator);
-
-  auto elem_type = DataTypeImpl::TensorTypeFromONNXEnum(static_cast<int32_t>(data_type))->GetElementType();
-  // const_cast is necessery due to Tensor class API
-  Tensor::InitOrtValue(elem_type,
-                       TensorShape(shape_span),
-                       const_cast<uint8_t*>(param.raw_data()->data()),
-                       cpu_meminfo,
-                       result);
-
-  return std::make_pair(std::move(name), std::move(result));
-}
-
-OrtValue CreateOrtValueOnDevice(const OrtValue& ort_value_mapped, const AllocatorPtr& device_allocator) {
-  OrtValue result;
-
-  const auto& tensor = ort_value_mapped.Get<Tensor>();
-  Tensor on_device(tensor.DataType(), tensor.Shape(), device_allocator);
-
-  return result;
-}
-
-void AdapterFormatBuilder::AddParameter(const std::string& name, lora::TensorDataType data_type,
-                                        gsl::span<const int64_t> shape, gsl::span<const uint8_t> data) {
-  flatbuffers::Offset<Parameter> fbs_param;
-  SaveLoraParameter(builder_, name, data_type, shape, data, fbs_param);
-  params_.push_back(fbs_param);
-}
-
-std::vector<uint8_t> AdapterFormatBuilder::Finish(int adapter_version, int model_version) {
-  FinishImpl(adapter_version, model_version);
-
-  std::vector<uint8_t> result;
-  result.reserve(builder_.GetSize());
-  gsl::span<uint8_t> buffer(builder_.GetBufferPointer(), builder_.GetSize());
-  std::copy(buffer.begin(), buffer.end(), std::back_inserter(result));
-  return result;
-}
-
-gsl::span<uint8_t> AdapterFormatBuilder::FinishWithSpan(int adapter_version, int model_version) {
-  FinishImpl(adapter_version, model_version);
-  return gsl::make_span(builder_.GetBufferPointer(), builder_.GetSize());
-}
-
-void AdapterFormatBuilder::FinishImpl(int adapter_version, int model_version) {
-  auto fbs_params = builder_.CreateVector(params_);
-  auto fbs_adapter = lora::CreateAdapter(builder_, lora::kLoraFormatVersion, adapter_version,
-                                         model_version, fbs_params);
-  builder_.Finish(fbs_adapter, lora::AdapterIdentifier());
-}
-
-}  // namespace utils
-}  // namespace lora
-}  // namespace onnxruntime
diff --git a/onnxruntime/lora/lora_format_utils.h b/onnxruntime/lora/lora_format_utils.h
deleted file mode 100644
index e7e341945f2ca..0000000000000
--- a/onnxruntime/lora/lora_format_utils.h
+++ /dev/null
@@ -1,138 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-#pragma once
-
-#include "core/common/flatbuffers.h"
-#include "core/framework/allocator.h"
-#include "core/platform/env.h"
-
-#include <gsl/gsl>
-#include <filesystem>
-
-#include "lora_format/lora_schema.fbs.h"
-
-#include <string>
-#include <string_view>
-#include <unordered_map>
-#include <vector>
-
-struct OrtValue;
-
-namespace onnxruntime {
-namespace lora {
-namespace utils {
-
-/// <summary>
-/// Helper class to serialize Lora adapter
-/// </summary>
-class AdapterFormatBuilder {
- public:
-  AdapterFormatBuilder() = default;
-
-  /// <summary>
-  /// Appends parameter tensor to the adapter builder
-  /// </summary>
-  /// <param name="name">parameter name</param>
-  /// <param name="data_type"></param>
-  /// <param name="shape"></param>
-  /// <param name="data"></param>
-  void AddParameter(const std::string& name, lora::TensorDataType data_type,
-                    gsl::span<const int64_t> shape, gsl::span<const uint8_t> data);
-
-  /// <summary>
-  /// Finishes serialization and returns a serialized byte vector
-  /// </summary>
-  /// <param name="adapter_version"></param>
-  /// <param name="model_version"></param>
-  /// <returns></returns>
-  std::vector<uint8_t> Finish(int adapter_version, int model_version);
-
-  /// <summary>
-  /// Finishes serialization and returns a span to internal buffer.
-  /// </summary>
-  /// <param name="adapter_version"></param>
-  /// <param name="model_version"></param>
-  /// <returns></returns>
-  gsl::span<uint8_t> FinishWithSpan(int adapter_version, int model_version);
-
- private:
-  void FinishImpl(int adapter_version, int model_version);
-
-  flatbuffers::FlatBufferBuilder builder_;
-  std::vector<flatbuffers::Offset<lora::Parameter>> params_;
-};
-
-/// <summary>
-///
-/// </summary>
-/// <param name="bytes"></param>
-/// <param name="num_bytes"></param>
-/// <returns></returns>
-bool IsLoraFormatModelBytes(const void* bytes, size_t num_bytes);
-
-// Will only create string in flatbuffers when has_string is true
-flatbuffers::Offset<flatbuffers::String> SaveStringToLoraFormat(flatbuffers::FlatBufferBuilder& builder,
-                                                                bool has_string, const std::string& src);
-
-void LoadStringFromLoraFormat(std::string& dst, const flatbuffers::String* fbs_string);
-
-/// <summary>
-/// The function loads the lora adapter bytes from the file system
-/// </summary>
-/// <param name="file_path">file path</param>
-/// <returns>bytes in a vector</returns>
-/// <throw>If the path can not be found</throw>
-std::vector<uint8_t> LoadLoraAdapterBytes(const std::filesystem::path& file_path);
-
-/// <summary>
-/// This function memory maps the adapter file in memory
-/// </summary>
-/// <param name="file_path"></param>
-/// <returns>memory handle and file size in a tuple</returns>
-std::pair<Env::MappedMemoryPtr, size_t> MemoryMapAdapterFile(const std::filesystem::path& file_path);
-
-/// <summary>
-/// Validates underlying format and the format version
-/// </summary>
-/// <param name="bytes"></param>
-/// <returns>Adapter ptr</returns>
-const Adapter* ValidateAndGetAdapterFromBytes(gsl::span<const uint8_t> bytes);
-
-/// <summary>
-/// Serializes tensor data into flatbuffer
-/// </summary>
-/// <param name="flat_builder"></param>
-/// <param name="name">parameter name</param>
-/// <param name="doc">doc, optional</param>
-/// <param name="data_type"></param>
-/// <param name="shape"></param>
-/// <param name="data"></param>
-/// <param name="fbs_tensor">output offset</param>
-void SaveLoraParameter(flatbuffers::FlatBufferBuilder& flat_builder, std::string_view name,
-                       lora::TensorDataType data_type,
-                       gsl::span<const int64_t> shape, gsl::span<const uint8_t> data,
-                       flatbuffers::Offset<Parameter>& fbs_tensor);
-
-/// <summary>
-/// Create an OrtValue on top of the flatbuffer tensor
-/// No copying of data is done here. The caller is responsible for managing the lifetime of flatbuffer
-/// structures.
-///
-/// In this scenario, one can memory map the entire flatbuffer tensor data into OrtValue without copying.
-/// </summary>
-/// <param name="tensor"></param>
-/// <returns></returns>
-std::pair<std::string, OrtValue> CreateOrtValueOverLoraParameter(const Parameter& param);
-
-/// <summary>
-/// Allocates OrtValue on specified device and copies data there
-/// </summary>
-/// <param name="ort_value_mapped">parameter on CPU</param>
-/// <param name="device_allocator">supplied device allocator</param>
-/// <returns></returns>
-OrtValue CreateOrtValueOnDevice(const OrtValue& ort_value_mapped, const AllocatorPtr& device_allocator);
-
-}  // namespace utils
-}  // namespace lora
-}  // namespace onnxruntime
diff --git a/onnxruntime/lora/lora_format_version.h b/onnxruntime/lora/lora_format_version.h
deleted file mode 100644
index 9c90a86b16382..0000000000000
--- a/onnxruntime/lora/lora_format_version.h
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-#pragma once
-
-#include <algorithm>
-#include <array>
-
-namespace onnxruntime {
-namespace lora {
-
-// The current model versions for saving lora parameters in flatbuffers
-// Once this version is updated, the kSupportedLoraFormatVersions in IsGenAiLoraFormatModelBytes
-// below will also need to be updated.
-// See src/flatbuffers/schema/README.md for more details on versioning.
-// Version 1 - history begins
-constexpr const int kLoraFormatVersion = 1;
-
-// Check if the given lora format version is supported in this build
-inline bool IsLoraFormatVersionSupported(const int lora_format_version) {
-  // The lora format versions we will support in this build
-  // This may contain more versions than the kLoraFormatVersion, based on the compatibilities
-  static constexpr std::array<int, 1U> kSupportedLoraFormatVersions{
-      kLoraFormatVersion,
-  };
-
-  const auto it =
-      std::find(kSupportedLoraFormatVersions.begin(), kSupportedLoraFormatVersions.end(), lora_format_version);
-  return it != kSupportedLoraFormatVersions.cend();
-}
-
-}  // namespace lora
-}  // namespace onnxruntime
diff --git a/onnxruntime/python/onnxruntime_pybind_lora.cc b/onnxruntime/python/onnxruntime_pybind_lora.cc
index c99236498e5b6..72c92abcf0539 100644
--- a/onnxruntime/python/onnxruntime_pybind_lora.cc
+++ b/onnxruntime/python/onnxruntime_pybind_lora.cc
@@ -144,7 +144,8 @@ void addAdapterFormatMethods(pybind11::module& m) {
 
   py::class_<lora::LoraAdapter> lora_adapter_binding(m, "LoraAdapter");
   lora_adapter_binding.def(py::init())
-      .def("Load", [](lora::LoraAdapter* adapter, const std::wstring& file_path) { adapter->Load(file_path); }, R"pbdoc(Memory map the specified file as LoraAdapter)pbdoc");
+      .def("Load", [](lora::LoraAdapter* adapter, const std::wstring& file_path) { adapter->MemoryMap(file_path); },
+        R"pbdoc(Memory map the specified file as LoraAdapter)pbdoc");
 }
 
 }  // namespace python