From e6b3b4a8077c94c434cf7529d621778e9c2c2f6b Mon Sep 17 00:00:00 2001 From: Rinne Date: Wed, 24 Apr 2024 00:20:15 +0800 Subject: [PATCH 01/11] feat: support auto-download for native libraries. --- LLama.Examples/Program.cs | 25 +- LLama/Abstractions/INativeLibrary.cs | 30 + .../INativeLibrarySelectingPolicy.cs | 24 + LLama/LLamaSharp.csproj | 5 +- .../DefaultNativeLibrarySelectingPolicy.cs | 69 ++ LLama/Native/Load/NativeLibraryConfig.cs | 664 ++++++++++++++++++ .../Load/NativeLibraryDownloadManager.cs | 285 ++++++++ LLama/Native/Load/NativeLibraryFromPath.cs | 31 + LLama/Native/Load/NativeLibraryMetadata.cs | 43 ++ LLama/Native/Load/NativeLibraryUtils.cs | 182 +++++ LLama/Native/Load/NativeLibraryWithAvx.cs | 75 ++ .../Native/Load/NativeLibraryWithCpuOrMac.cs | 77 ++ LLama/Native/Load/NativeLibraryWithCuda.cs | 91 +++ LLama/Native/Load/SystemInfo.cs | 129 ++++ LLama/Native/NativeApi.Load.cs | 358 +--------- LLama/Native/NativeApi.cs | 3 - LLama/Native/NativeLibraryConfig.cs | 332 --------- LLama/Native/NativeLogConfig.cs | 2 +- 18 files changed, 1735 insertions(+), 690 deletions(-) create mode 100644 LLama/Abstractions/INativeLibrary.cs create mode 100644 LLama/Abstractions/INativeLibrarySelectingPolicy.cs create mode 100644 LLama/Native/Load/DefaultNativeLibrarySelectingPolicy.cs create mode 100644 LLama/Native/Load/NativeLibraryConfig.cs create mode 100644 LLama/Native/Load/NativeLibraryDownloadManager.cs create mode 100644 LLama/Native/Load/NativeLibraryFromPath.cs create mode 100644 LLama/Native/Load/NativeLibraryMetadata.cs create mode 100644 LLama/Native/Load/NativeLibraryUtils.cs create mode 100644 LLama/Native/Load/NativeLibraryWithAvx.cs create mode 100644 LLama/Native/Load/NativeLibraryWithCpuOrMac.cs create mode 100644 LLama/Native/Load/NativeLibraryWithCuda.cs create mode 100644 LLama/Native/Load/SystemInfo.cs delete mode 100644 LLama/Native/NativeLibraryConfig.cs diff --git a/LLama.Examples/Program.cs b/LLama.Examples/Program.cs index b24ef406b..cee9540be 100644 --- a/LLama.Examples/Program.cs +++ b/LLama.Examples/Program.cs @@ -1,5 +1,6 @@ using LLama.Native; using Spectre.Console; +using System.Runtime.InteropServices; AnsiConsole.MarkupLineInterpolated( $""" @@ -16,23 +17,23 @@ __ __ ____ __ """); -// Configure native library to use. This must be done before any other llama.cpp methods are called! -NativeLibraryConfig - .Instance - .WithCuda(); - // Configure logging. Change this to `true` to see log messages from llama.cpp var showLLamaCppLogs = false; NativeLibraryConfig - .Instance + .All .WithLogCallback((level, message) => - { - if (showLLamaCppLogs) - Console.WriteLine($"[llama {level}]: {message.TrimEnd('\n')}"); - }); + { + if (showLLamaCppLogs) + Console.WriteLine($"[llama {level}]: {message.TrimEnd('\n')}"); + }); + +// Configure native library to use. This must be done before any other llama.cpp methods are called! +NativeLibraryConfig + .All + .WithCuda() + .WithAutoDownload().DryRun(); // Calling this method forces loading to occur now. NativeApi.llama_empty_call(); -await ExampleRunner.Run(); - +await ExampleRunner.Run(); \ No newline at end of file diff --git a/LLama/Abstractions/INativeLibrary.cs b/LLama/Abstractions/INativeLibrary.cs new file mode 100644 index 000000000..64ba182e0 --- /dev/null +++ b/LLama/Abstractions/INativeLibrary.cs @@ -0,0 +1,30 @@ +using LLama.Native; +using System; +using System.Collections.Generic; +using System.Text; + +namespace LLama.Abstractions +{ + /// + /// Descriptor of a native library. + /// + public interface INativeLibrary + { + /// + /// Metadata of this library. + /// + NativeLibraryMetadata? Metadata { get; } + + /// + /// Prepare the native library file and returns the local path of it. + /// If it's a relative path, LLamaSharp will search the path in the search directies you set. + /// + /// The system information of the current machine. + /// Whether it's allowed to download from remote. + /// The log callback. + /// + /// The relative paths of the library. You could return multiple paths to try them one by one. If no file is available, please return an empty array. + /// + IEnumerable Prepare(SystemInfo systemInfo, bool fromRemote = false, NativeLogConfig.LLamaLogCallback? logCallback = null); + } +} diff --git a/LLama/Abstractions/INativeLibrarySelectingPolicy.cs b/LLama/Abstractions/INativeLibrarySelectingPolicy.cs new file mode 100644 index 000000000..f57ae6d1a --- /dev/null +++ b/LLama/Abstractions/INativeLibrarySelectingPolicy.cs @@ -0,0 +1,24 @@ +using LLama.Native; +using System; +using System.Collections.Generic; +using System.Text; + +namespace LLama.Abstractions +{ +#if NET6_0_OR_GREATER + /// + /// Decides the selected native library that should be loaded according to the configurations. + /// + public interface INativeLibrarySelectingPolicy + { + /// + /// Select the native library. + /// + /// + /// The system information of the current machine. + /// The log callback. + /// The information of the selected native library files, in order by priority from the beginning to the end. + IEnumerable Select(NativeLibraryConfig.Description description, SystemInfo systemInfo, NativeLogConfig.LLamaLogCallback? logCallback = null); + } +#endif +} diff --git a/LLama/LLamaSharp.csproj b/LLama/LLamaSharp.csproj index 3947b7c31..8277ce9eb 100644 --- a/LLama/LLamaSharp.csproj +++ b/LLama/LLamaSharp.csproj @@ -1,9 +1,9 @@ - netstandard2.0;net6.0;net7.0;net8.0 + net6.0;net7.0;net8.0;netstandard2.0 LLama enable - 10 + 12 AnyCPU;x64;Arm64 True @@ -49,6 +49,7 @@ + diff --git a/LLama/Native/Load/DefaultNativeLibrarySelectingPolicy.cs b/LLama/Native/Load/DefaultNativeLibrarySelectingPolicy.cs new file mode 100644 index 000000000..42433cecb --- /dev/null +++ b/LLama/Native/Load/DefaultNativeLibrarySelectingPolicy.cs @@ -0,0 +1,69 @@ +using LLama.Abstractions; +using System.Collections.Generic; +using System.Runtime.InteropServices; + +namespace LLama.Native +{ +#if NET6_0_OR_GREATER + /// + public class DefaultNativeLibrarySelectingPolicy: INativeLibrarySelectingPolicy + { + /// + public IEnumerable Select(NativeLibraryConfig.Description description, SystemInfo systemInfo, NativeLogConfig.LLamaLogCallback? logCallback) + { + List results = new(); + + // Show the configuration we're working with + Log(description.ToString(), LLamaLogLevel.Info, logCallback); + + // If a specific path is requested, only use it, no fall back. + if (!string.IsNullOrEmpty(description.Path)) + { + yield return new NativeLibraryFromPath(description.Path); + } + else + { + if (description.UseCuda) + { + yield return new NativeLibraryWithCuda(systemInfo.CudaMajorVersion, description.Library, description.SkipCheck, description.DownloadSettings); + } + + if(!description.UseCuda || description.AllowFallback) + { + if (description.AllowFallback) + { + // Try all of the AVX levels we can support. + if (description.AvxLevel >= AvxLevel.Avx512) + yield return new NativeLibraryWithAvx(description.Library, AvxLevel.Avx512, description.SkipCheck, description.DownloadSettings); + + if (description.AvxLevel >= AvxLevel.Avx2) + yield return new NativeLibraryWithAvx(description.Library, AvxLevel.Avx2, description.SkipCheck, description.DownloadSettings); + + if (description.AvxLevel >= AvxLevel.Avx) + yield return new NativeLibraryWithAvx(description.Library, AvxLevel.Avx, description.SkipCheck, description.DownloadSettings); + + yield return new NativeLibraryWithAvx(description.Library, AvxLevel.None, description.SkipCheck, description.DownloadSettings); + } + else + { + yield return new NativeLibraryWithAvx(description.Library, description.AvxLevel, description.SkipCheck, description.DownloadSettings); + } + } + + if(systemInfo.OSPlatform == OSPlatform.OSX || description.AllowFallback) + { + yield return new NativeLibraryWithCpuOrMac(description.Library, description.SkipCheck, description.DownloadSettings); + } + } + } + + private void Log(string message, LLamaLogLevel level, NativeLogConfig.LLamaLogCallback? logCallback) + { + if (!message.EndsWith("\n")) + message += "\n"; + + logCallback?.Invoke(level, message); + } + } +#endif +} diff --git a/LLama/Native/Load/NativeLibraryConfig.cs b/LLama/Native/Load/NativeLibraryConfig.cs new file mode 100644 index 000000000..6f77927d5 --- /dev/null +++ b/LLama/Native/Load/NativeLibraryConfig.cs @@ -0,0 +1,664 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using LLama.Abstractions; +using Microsoft.Extensions.Logging; + +namespace LLama.Native +{ +#if NET6_0_OR_GREATER + /// + /// Allows configuration of the native llama.cpp libraries to load and use. + /// All configuration must be done before using **any** other LLamaSharp methods! + /// + public sealed partial class NativeLibraryConfig + { + private string? _libraryPath; + + private bool _useCuda = true; + private AvxLevel _avxLevel; + private bool _allowFallback = true; + private bool _skipCheck = false; + private bool _allowAutoDownload = false; + private NativeLibraryDownloadSettings _downloadSettings = NativeLibraryDownloadSettings.Create(); + + /// + /// search directory -> priority level, 0 is the lowest. + /// + private readonly List _searchDirectories = new List(); + + internal INativeLibrarySelectingPolicy SelectingPolicy { get; private set; } = new DefaultNativeLibrarySelectingPolicy(); + + internal bool AllowAutoDownload => _allowAutoDownload; + + #region configurators + /// + /// Load a specified native library as backend for LLamaSharp. + /// When this method is called, all the other configurations will be ignored. + /// + /// The full path to the native library to load. + /// Thrown if `LibraryHasLoaded` is true. + public NativeLibraryConfig WithLibrary(string? libraryPath) + { + ThrowIfLoaded(); + + _libraryPath = libraryPath; + return this; + } + + /// + /// Configure whether to use cuda backend if possible. Default is true. + /// + /// + /// + /// Thrown if `LibraryHasLoaded` is true. + public NativeLibraryConfig WithCuda(bool enable = true) + { + ThrowIfLoaded(); + + _useCuda = enable; + return this; + } + + /// + /// Configure the prefferred avx support level of the backend. + /// Default value is detected automatically due to your operating system. + /// + /// + /// + /// Thrown if `LibraryHasLoaded` is true. + public NativeLibraryConfig WithAvx(AvxLevel level) + { + ThrowIfLoaded(); + + _avxLevel = level; + return this; + } + + /// + /// Configure whether to allow fallback when there's no match for preferred settings. Default is true. + /// + /// + /// + /// Thrown if `LibraryHasLoaded` is true. + public NativeLibraryConfig WithAutoFallback(bool enable = true) + { + ThrowIfLoaded(); + + _allowFallback = enable; + return this; + } + + /// + /// Whether to skip the check when you don't allow fallback. This option + /// may be useful under some complex conditions. For example, you're sure + /// you have your cublas configured but LLamaSharp take it as invalid by mistake. Default is false; + /// + /// + /// + /// Thrown if `LibraryHasLoaded` is true. + public NativeLibraryConfig SkipCheck(bool enable = true) + { + ThrowIfLoaded(); + + _skipCheck = enable; + return this; + } + + /// + /// Add self-defined search directories. Note that the file stucture of the added + /// directories must be the same as the default directory. Besides, the directory + /// won't be used recursively. + /// + /// + /// + public NativeLibraryConfig WithSearchDirectories(IEnumerable directories) + { + ThrowIfLoaded(); + + _searchDirectories.AddRange(directories); + return this; + } + + /// + /// Add self-defined search directories. Note that the file stucture of the added + /// directories must be the same as the default directory. Besides, the directory + /// won't be used recursively. + /// + /// + /// + public NativeLibraryConfig WithSearchDirectory(string directory) + { + ThrowIfLoaded(); + + _searchDirectories.Add(directory); + return this; + } + + /// + /// Set whether to download the best-matched native library file automatically if there's no backend or specified file to load. + /// You could add a setting here to customize the behavior of the download. + /// + /// If auto-download is enabled, please call after you have finished setting your configurations. + /// + /// + /// + /// + public NativeLibraryConfig WithAutoDownload(bool enable = true, NativeLibraryDownloadSettings? settings = null) + { + ThrowIfLoaded(); + + _allowAutoDownload = enable; + if (settings is not null) + _downloadSettings = settings; + return this; + } + + /// + /// Set the policy which decides how to select the desired native libraries and order them by priority. + /// By default we use . + /// + /// + /// + public NativeLibraryConfig WithSelectingPolicy(INativeLibrarySelectingPolicy policy) + { + ThrowIfLoaded(); + + SelectingPolicy = policy; + return this; + } + + #endregion + + internal Description CheckAndGatherDescription() + { + if (_allowFallback && _skipCheck) + throw new ArgumentException("Cannot skip the check when fallback is allowed."); + + var path = _libraryPath; + + // Don't modify and pass the original object to `Description`, create a new one instead. + // Also, we need to set the default local directory if the user does not. + var defaultLocalDir = NativeLibraryDownloadSettings.GetDefaultLocalDir(GetCommitHash(_downloadSettings.Tag)); + var downloadSettings = NativeLibraryDownloadSettings.Create() + .WithEndpoint(_downloadSettings.Endpoint).WithEndpointFallbacks(_downloadSettings.EndpointFallbacks ?? []) + .WithRepoId(_downloadSettings.RepoId).WithToken(_downloadSettings.Token).WithTag(_downloadSettings.Tag) + .WithTimeout(_downloadSettings.Timeout).WithLocalDir(_downloadSettings.LocalDir ?? defaultLocalDir); + + return new Description( + path, + NativeLibraryName, + _useCuda, + _avxLevel, + _allowFallback, + _skipCheck, + _searchDirectories.Concat(new[] { "./" }).ToArray(), + _allowAutoDownload, + downloadSettings + ); + } + + internal static string AvxLevelToString(AvxLevel level) + { + return level switch + { + AvxLevel.None => string.Empty, + AvxLevel.Avx => "avx", + AvxLevel.Avx2 => "avx2", + AvxLevel.Avx512 => "avx512", + _ => throw new ArgumentException($"Unknown AvxLevel '{level}'") + }; + } + + /// + /// Private constructor prevents new instances of this class being created + /// + private NativeLibraryConfig(NativeLibraryName nativeLibraryName) + { + NativeLibraryName = nativeLibraryName; + // This value should be changed when we're going to publish new release. (any better approach?) + _downloadSettings = new NativeLibraryDownloadSettings().WithTag(GetCommitHash("master")); + + // Automatically detect the highest supported AVX level + if (System.Runtime.Intrinsics.X86.Avx.IsSupported) + _avxLevel = AvxLevel.Avx; + if (System.Runtime.Intrinsics.X86.Avx2.IsSupported) + _avxLevel = AvxLevel.Avx2; + + if (CheckAVX512()) + _avxLevel = AvxLevel.Avx512; + } + + private static bool CheckAVX512() + { + if (!System.Runtime.Intrinsics.X86.X86Base.IsSupported) + return false; + + // ReSharper disable UnusedVariable (ebx is used when < NET8) + var (_, ebx, ecx, _) = System.Runtime.Intrinsics.X86.X86Base.CpuId(7, 0); + // ReSharper restore UnusedVariable + + var vnni = (ecx & 0b_1000_0000_0000) != 0; + +#if NET8_0_OR_GREATER + var f = System.Runtime.Intrinsics.X86.Avx512F.IsSupported; + var bw = System.Runtime.Intrinsics.X86.Avx512BW.IsSupported; + var vbmi = System.Runtime.Intrinsics.X86.Avx512Vbmi.IsSupported; +#else + var f = (ebx & (1 << 16)) != 0; + var bw = (ebx & (1 << 30)) != 0; + var vbmi = (ecx & 0b_0000_0000_0010) != 0; +#endif + + return vnni && vbmi && bw && f; + } + + /// + /// The description of the native library configurations that's already specified. + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + public record Description(string? Path, NativeLibraryName Library, bool UseCuda, AvxLevel AvxLevel, bool AllowFallback, bool SkipCheck, + string[] SearchDirectories, bool AllowAutoDownload, NativeLibraryDownloadSettings DownloadSettings) + { + /// + public override string ToString() + { + string avxLevelString = AvxLevel switch + { + AvxLevel.None => "NoAVX", + AvxLevel.Avx => "AVX", + AvxLevel.Avx2 => "AVX2", + AvxLevel.Avx512 => "AVX512", + _ => "Unknown" + }; + + string searchDirectoriesString = "{ " + string.Join(", ", SearchDirectories) + " }"; + + return $"NativeLibraryConfig Description:\n" + + $"- LibraryName: {Library}\n" + + $"- Path: '{Path}'\n" + + $"- PreferCuda: {UseCuda}\n" + + $"- PreferredAvxLevel: {avxLevelString}\n" + + $"- AllowFallback: {AllowFallback}\n" + + $"- SkipCheck: {SkipCheck}\n" + + $"- SearchDirectories and Priorities: {searchDirectoriesString}" + + $"- AllowAutoDownload: {AllowAutoDownload}\n" + + $"- DownloadSettings: {DownloadSettings}\n"; + } + } + } +#endif + + public sealed partial class NativeLibraryConfig + { + /// + /// Set configurations for all the native libraries, including LLama and LLava + /// + [Obsolete("Please use NativeLibraryConfig.All instead, or set configurations for NativeLibraryConfig.LLama and NativeLibraryConfig.LLavaShared respectively.")] + public static NativeLibraryConfigContainer Instance { get; } + + /// + /// Set configurations for all the native libraries, including LLama and LLava + /// + public static NativeLibraryConfigContainer All { get; } + + /// + /// Configuration for LLama native library + /// + public static NativeLibraryConfig LLama { get; } + + /// + /// Configuration for LLava native library + /// + public static NativeLibraryConfig LLavaShared { get; } + + /// + /// A dictionary mapping from version to corresponding llama.cpp commit hash. + /// The version should be formatted int `[major].[minor].[patch]`. But there's an exceptance that you can + /// use `master` as a version to get the llama.cpp commit hash from the master branch. + /// + public static Dictionary VersionMap { get; } = new Dictionary() + // This value should be changed when we're going to publish new release. (any better approach?) + { + {"master", "f7001c"} + }; + + internal static string GetCommitHash(string version) + { + if(VersionMap.TryGetValue(version, out var hash)) + { + return hash; + } + else + { + return version; + } + } + + static NativeLibraryConfig() + { + LLama = new(NativeLibraryName.Llama); + LLavaShared = new(NativeLibraryName.LlavaShared); + All = new(LLama, LLavaShared); + Instance = All; + } + +#if NETSTANDARD2_0 + private NativeLibraryConfig(NativeLibraryName nativeLibraryName) + { + NativeLibraryName = nativeLibraryName; + } +#endif + + /// + /// Check if the native library has already been loaded. Configuration cannot be modified if this is true. + /// + public bool LibraryHasLoaded { get; internal set; } + + /// + /// Whether has been called. + /// + internal bool HasCalledDryRun { get; private set; } = false; + + internal NativeLibraryName NativeLibraryName { get; } + + internal NativeLogConfig.LLamaLogCallback? LogCallback { get; private set; } = null; + + private void ThrowIfLoaded() + { + if (LibraryHasLoaded) + throw new InvalidOperationException("NativeLibraryConfig must be configured before using **any** other LLamaSharp methods!"); + } + + /// + /// Set the log callback that will be used for all llama.cpp log messages + /// + /// + /// + public NativeLibraryConfig WithLogCallback(NativeLogConfig.LLamaLogCallback? callback) + { + ThrowIfLoaded(); + + LogCallback = callback; + return this; + } + + /// + /// Set the log callback that will be used for all llama.cpp log messages + /// + /// + /// + public NativeLibraryConfig WithLogCallback(ILogger? logger) + { + ThrowIfLoaded(); + + // Redirect to llama_log_set. This will wrap the logger in a delegate and bind that as the log callback instead. + NativeLogConfig.llama_log_set(logger); + + return this; + } + + /// + /// Try to load the native library with the current configurations, + /// but do not actually set it to . + /// + /// You can still modify the configuration after this calling but only before any call from . + /// + /// Whether the running is successful. + public bool DryRun() + { + LogCallback?.Invoke(LLamaLogLevel.Debug, $"Beginning dry run for {this.NativeLibraryName.GetLibraryName()}..."); + HasCalledDryRun = true; + return NativeLibraryUtils.TryLoadLibrary(this) != IntPtr.Zero; + } + } + + /// + /// A class to set same configurations to multiple libraries at the same time. + /// + public sealed partial class NativeLibraryConfigContainer + { + private NativeLibraryConfig[] _configs; + + internal NativeLibraryConfigContainer(params NativeLibraryConfig[] configs) + { + _configs = configs; + } + + #region configurators + +#if NET6_0_OR_GREATER + /// + /// Load a specified native library as backend for LLamaSharp. + /// When this method is called, all the other configurations will be ignored. + /// + /// The full path to the llama library to load. + /// The full path to the llava library to load. + /// Thrown if `LibraryHasLoaded` is true. + public NativeLibraryConfigContainer WithLibrary(string? llamaPath, string? llavaPath) + { + foreach(var config in _configs) + { + if(config.NativeLibraryName == NativeLibraryName.Llama && llamaPath is not null) + { + config.WithLibrary(llamaPath); + } + if(config.NativeLibraryName == NativeLibraryName.LlavaShared && llavaPath is not null) + { + config.WithLibrary(llavaPath); + } + } + + return this; + } + + /// + /// Configure whether to use cuda backend if possible. + /// + /// + /// + /// Thrown if `LibraryHasLoaded` is true. + public NativeLibraryConfigContainer WithCuda(bool enable = true) + { + foreach(var config in _configs) + { + config.WithCuda(enable); + } + return this; + } + + /// + /// Configure the prefferred avx support level of the backend. + /// + /// + /// + /// Thrown if `LibraryHasLoaded` is true. + public NativeLibraryConfigContainer WithAvx(AvxLevel level) + { + foreach (var config in _configs) + { + config.WithAvx(level); + } + return this; + } + + /// + /// Configure whether to allow fallback when there's no match for preferred settings. + /// + /// + /// + /// Thrown if `LibraryHasLoaded` is true. + public NativeLibraryConfigContainer WithAutoFallback(bool enable = true) + { + foreach (var config in _configs) + { + config.WithAutoFallback(enable); + } + return this; + } + + /// + /// Whether to skip the check when you don't allow fallback. This option + /// may be useful under some complex conditions. For example, you're sure + /// you have your cublas configured but LLamaSharp take it as invalid by mistake. + /// + /// + /// + /// Thrown if `LibraryHasLoaded` is true. + public NativeLibraryConfigContainer SkipCheck(bool enable = true) + { + foreach (var config in _configs) + { + config.SkipCheck(enable); + } + return this; + } + + /// + /// Add self-defined search directories. Note that the file stucture of the added + /// directories must be the same as the default directory. Besides, the directory + /// won't be used recursively. + /// + /// + /// + public NativeLibraryConfigContainer WithSearchDirectories(IEnumerable directories) + { + foreach (var config in _configs) + { + config.WithSearchDirectories(directories); + } + return this; + } + + /// + /// Add self-defined search directories. Note that the file stucture of the added + /// directories must be the same as the default directory. Besides, the directory + /// won't be used recursively. + /// + /// + /// + public NativeLibraryConfigContainer WithSearchDirectory(string directory) + { + foreach (var config in _configs) + { + config.WithSearchDirectory(directory); + } + return this; + } + + /// + /// Set whether to download the best-matched native library file automatically if there's no backend or specified file to load. + /// You could add a setting here to customize the behavior of the download. + /// + /// If auto-download is enabled, please call after you have finished setting your configurations. + /// + /// + /// + /// + public NativeLibraryConfigContainer WithAutoDownload(bool enable = true, NativeLibraryDownloadSettings? settings = null) + { + foreach (var config in _configs) + { + config.WithAutoDownload(enable, settings); + } + return this; + } + + /// + /// Set the policy which decides how to select the desired native libraries and order them by priority. + /// By default we use . + /// + /// + /// + public NativeLibraryConfigContainer WithSelectingPolicy(INativeLibrarySelectingPolicy policy) + { + foreach (var config in _configs) + { + config.WithSelectingPolicy(policy); + } + return this; + } +#endif + + /// + /// Set the log callback that will be used for all llama.cpp log messages + /// + /// + /// + public NativeLibraryConfigContainer WithLogCallback(NativeLogConfig.LLamaLogCallback? callback) + { + foreach (var config in _configs) + { + config.WithLogCallback(callback); + } + return this; + } + + /// + /// Set the log callback that will be used for all llama.cpp log messages + /// + /// + /// + public NativeLibraryConfigContainer WithLogCallback(ILogger? logger) + { + foreach (var config in _configs) + { + config.WithLogCallback(logger); + } + return this; + } + + #endregion + + /// + /// Try to load the native library with the current configurations, + /// but do not actually set it to . + /// + /// You can still modify the configuration after this calling but only before any call from . + /// + /// Whether the running is successful. + public bool DryRun() + { + return _configs.All(config => config.DryRun()); + } + } + + /// + /// The name of the native library + /// + public enum NativeLibraryName + { + /// + /// The native library compiled from llama.cpp. + /// + Llama, + /// + /// The native library compiled from the LLaVA example of llama.cpp. + /// + LlavaShared + } + + internal static class LibraryNameExtensions + { + public static string GetLibraryName(this NativeLibraryName name) + { + switch (name) + { + case NativeLibraryName.Llama: + return NativeApi.libraryName; + case NativeLibraryName.LlavaShared: + return NativeApi.llavaLibraryName; + default: + throw new ArgumentOutOfRangeException(nameof(name), name, null); + } + } + } +} diff --git a/LLama/Native/Load/NativeLibraryDownloadManager.cs b/LLama/Native/Load/NativeLibraryDownloadManager.cs new file mode 100644 index 000000000..b9a136991 --- /dev/null +++ b/LLama/Native/Load/NativeLibraryDownloadManager.cs @@ -0,0 +1,285 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Threading.Tasks; +using Huggingface; + +namespace LLama.Native +{ + internal class NativeLibraryDownloadManager + { + /// + /// Download the library file + /// + /// + /// + /// + /// The local path of the file if successful otherwise null. + public static async Task DownloadLibraryFile(NativeLibraryDownloadSettings settings, string remoteFilePath, NativeLogConfig.LLamaLogCallback? logCallback = null) + { + HFGlobalConfig.DefaultDownloadTimeout = settings.Timeout; + + HashSet endpointSet = new([settings.Endpoint]); + if (settings.EndpointFallbacks is not null) + { + foreach (var endpoint in settings.EndpointFallbacks) + { + endpointSet.Add(endpoint); + } + } + var endpoints = endpointSet.ToArray(); + + Dictionary exceptionMap = new(); + foreach(var endpoint in endpoints) + { + logCallback?.Invoke(LLamaLogLevel.Debug, $"Downloading the native library file '{remoteFilePath}' from {endpoint} with repo = {settings.RepoId}, tag = {settings.Tag}"); + var path = await HFDownloader.DownloadFileAsync(settings.RepoId, remoteFilePath, revision: settings.Tag, cacheDir: settings.CacheDir, + localDir: settings.LocalDir, token: settings.Token, endpoint: endpoint); + if (path is not null) + { + logCallback?.Invoke(LLamaLogLevel.Debug, $"Successfully downloaded the native library file to {path}"); + return path; + } + else + { + logCallback?.Invoke(LLamaLogLevel.Warning, "The download failed without an explicit error, please check your configuration or report an issue to LLamaSharp."); + } + //try + //{ + // logCallback?.Invoke(LLamaLogLevel.Debug, $"Downloading the native library file '{remoteFilePath}' from {endpoint} with repo = {settings.RepoId}, tag = {settings.Tag}"); + // var path = await HFDownloader.DownloadFileAsync(settings.RepoId, remoteFilePath, revision: settings.Tag, cacheDir: settings.CacheDir, + // localDir: settings.LocalDir, token: settings.Token, endpoint: endpoint); + // if(path is not null) + // { + // logCallback?.Invoke(LLamaLogLevel.Debug, $"Successfully downloaded the native library file to {path}"); + // return path; + // } + // else + // { + // logCallback?.Invoke(LLamaLogLevel.Warning, "The download failed without an explicit error, please check your configuration or report an issue to LLamaSharp."); + // } + //} + //catch(Exception ex) + //{ + // logCallback?.Invoke(LLamaLogLevel.Warning, $"An exception was thrown when downloading the native library file from {endpoint}: {ex.Message}"); + //} + } + + // means that the download finally fails. + return null; + } + } + + /// + /// Settings for downloading the native library. + /// + public class NativeLibraryDownloadSettings + { + /// + /// The endpoint to download from, by default the official site of HuggingFace. + /// + public string Endpoint { get; private set; } = "https://huggingface.co"; + + /// + /// Endpoints to fallback to if downloading with the main endpoint fails. + /// + /// Generally this is an option for those countries or regions where the main endpoint is blocked. + /// You should not put too many endpoints here, as it will slow down the downloading process. + /// + public string[]? EndpointFallbacks { get; private set; } = null; + + /// + /// The version of the library to download. Please use LLamaSharp version in format `[major].[minor].[patch]` as tag + /// or go to https://huggingface.co/AsakusaRinne/LLamaSharpNative + /// to see all available tags, or use your own repo and tags. + /// + public string Tag { get; private set; } = string.Empty; + + /// + /// The repo id to download the native library files. + /// + public string RepoId { get; private set; } = "AsakusaRinne/LLamaSharpNative"; + + /// + /// The directory to cache the downloaded files. If you only want to make the downloaded files appear in a directory, + /// regardless of whether the file will have a copy in another place, please set instead. + /// + public string CacheDir { get; private set; } + + /// + /// If provided, the downloaded file will be placed under this directory, + /// either as a symlink (default) or a regular file. + /// + public string? LocalDir { get; private set; } = null; + + /// + /// If you are using your own private repo as remote source, you could set the token to get the access. + /// + public string? Token { get; private set; } = null; + + /// + /// The timeout (second) of the native library file download. + /// + public int Timeout { get; private set; } = 10; + + /// + /// Extra search directories. They will only be used when finding files downloaded from remote. + /// Generally it will be useful when you wnat to replace the downloading process with your custom implementation. + /// If you are not sure how it works, please leave it empty. + /// + public string[]? ExtraSearchDirectories { get;private set; } = null; + + internal NativeLibraryDownloadSettings() + { + var home = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.UserProfile), ".cache"); + CacheDir = Path.Combine(home, "llama_sharp"); + } + + internal static string GetDefaultLocalDir(string tag) + { + var home = Environment.GetFolderPath(Environment.SpecialFolder.UserProfile); + return Path.Combine(home, ".llama_sharp", tag); + } + + /// + /// Create a with default settings. + /// + /// + public static NativeLibraryDownloadSettings Create() + { + return new NativeLibraryDownloadSettings(); + } + + /// + /// Set the default endpoint to download file from. + /// + /// + /// + public NativeLibraryDownloadSettings WithEndpoint(string endpoint) + { + Endpoint = endpoint; + return this; + } + + /// + /// Set the endpoints to try when the download fails with the default endpoint. + /// + /// + /// + public NativeLibraryDownloadSettings WithEndpointFallbacks(params string[] endpoints) + { + EndpointFallbacks = endpoints; + return this; + } + + /// + /// Set the + /// + /// + /// + public NativeLibraryDownloadSettings WithTag(string tag) + { + Tag = tag; + return this; + } + + /// + /// Set the + /// + /// + /// + public NativeLibraryDownloadSettings WithRepoId(string repoId) + { + RepoId = repoId; + return this; + } + + /// + /// Set the . If you only want to make the downloaded files appear in a directory, + /// regardless of whether the file may have a copy in another place, please use instead. + /// + /// + /// + public NativeLibraryDownloadSettings WithCacheDir(string cacheDir) + { + CacheDir = cacheDir; + return this; + } + + /// + /// Set the + /// + /// + /// + public NativeLibraryDownloadSettings WithLocalDir(string localDir) + { + LocalDir = localDir; + return this; + } + + /// + /// Set the + /// + /// + /// + public NativeLibraryDownloadSettings WithToken(string token) + { + Token = token; + return this; + } + + /// + /// Set the + /// + /// + /// + public NativeLibraryDownloadSettings WithTimeout(int timeout) + { + Timeout = timeout; + return this; + } + + /// + /// Set . They will only be used when finding files downloaded from remote. + /// Generally it will be useful when you wnat to replace the downloading process with your custom implementation. + /// If you are not sure how it works, please ignore this method. + /// + /// + /// + public NativeLibraryDownloadSettings WithExtraSearchDirectories(string[] directories) + { + ExtraSearchDirectories = directories; + return this; + } + + /// + public override string ToString() + { + // Token should be hidden when printing it. + string hiddenToken = ""; + if(Token is not null) + { + if(Token.Length <= 10) + { + hiddenToken = new string('*', Token.Length - 1) + Token.Last(); + } + else + { + hiddenToken += Token.Substring(0, 2); + hiddenToken += new string('*', Token.Length - 3); + hiddenToken += Token.Last(); + } + } + + return $"(Endpoint = {Endpoint}, " + + $"EndpointFallbacks = {string.Join(", ", EndpointFallbacks ?? new string[0])}, " + + $"Tag = {Tag}, " + + $"RepoId = {RepoId}, " + + $"CacheDir = {CacheDir}, " + + $"LocalDir = {LocalDir}, " + + $"Token = {hiddenToken}, " + + $"Timeout = {Timeout}s)"; + } + } +} diff --git a/LLama/Native/Load/NativeLibraryFromPath.cs b/LLama/Native/Load/NativeLibraryFromPath.cs new file mode 100644 index 000000000..45c7e48ec --- /dev/null +++ b/LLama/Native/Load/NativeLibraryFromPath.cs @@ -0,0 +1,31 @@ +using LLama.Abstractions; +using System.Collections.Generic; + +namespace LLama.Native +{ + /// + /// A native library specified with a lcoal file path. + /// + public class NativeLibraryFromPath: INativeLibrary + { + private string _path; + + /// + public NativeLibraryMetadata? Metadata => null; + + /// + /// + /// + /// + public NativeLibraryFromPath(string path) + { + _path = path; + } + + /// + public IEnumerable Prepare(SystemInfo systemInfo, bool fromRemote, NativeLogConfig.LLamaLogCallback? logCallback) + { + return fromRemote? [] : [_path]; + } + } +} diff --git a/LLama/Native/Load/NativeLibraryMetadata.cs b/LLama/Native/Load/NativeLibraryMetadata.cs new file mode 100644 index 000000000..654c9002f --- /dev/null +++ b/LLama/Native/Load/NativeLibraryMetadata.cs @@ -0,0 +1,43 @@ + +namespace LLama.Native +{ + /// + /// Information of a native library file. + /// + /// Which kind of library it is. + /// Whether it's compiled with cublas. + /// Which AvxLevel it's compiled with. + public record class NativeLibraryMetadata(NativeLibraryName NativeLibraryName, bool UseCuda, AvxLevel AvxLevel) + { + public override string ToString() + { + return $"(NativeLibraryName: {NativeLibraryName}, UseCuda: {UseCuda}, AvxLevel: {AvxLevel})"; + } + } + + /// + /// Avx support configuration + /// + public enum AvxLevel + { + /// + /// No AVX + /// + None, + + /// + /// Advanced Vector Extensions (supported by most processors after 2011) + /// + Avx, + + /// + /// AVX2 (supported by most processors after 2013) + /// + Avx2, + + /// + /// AVX512 (supported by some processors after 2016, not widely supported) + /// + Avx512, + } +} diff --git a/LLama/Native/Load/NativeLibraryUtils.cs b/LLama/Native/Load/NativeLibraryUtils.cs new file mode 100644 index 000000000..513f6ccc2 --- /dev/null +++ b/LLama/Native/Load/NativeLibraryUtils.cs @@ -0,0 +1,182 @@ +using LLama.Exceptions; +using System; +using System.Collections.Generic; +using System.IO; +using System.Runtime.InteropServices; + +namespace LLama.Native +{ + internal static class NativeLibraryUtils + { + /// + /// Try to load libllama/llava_shared, using CPU feature detection to try and load a more specialised DLL if possible + /// + /// The library handle to unload later, or IntPtr.Zero if no library was loaded + internal static IntPtr TryLoadLibrary(NativeLibraryConfig config) + { +#if NET6_0_OR_GREATER + var description = config.CheckAndGatherDescription(); + var systemInfo = SystemInfo.Get(); + Log($"Loading library: '{config.NativeLibraryName.GetLibraryName()}'", LLamaLogLevel.Debug, config.LogCallback); + + // Get platform specific parts of the path (e.g. .so/.dll/.dylib, libName prefix or not) + NativeLibraryUtils.GetPlatformPathParts(systemInfo.OSPlatform, out var os, out var ext, out var libPrefix); + Log($"Detected OS Platform: '{systemInfo.OSPlatform}'", LLamaLogLevel.Info, config.LogCallback); + Log($"Detected OS string: '{os}'", LLamaLogLevel.Debug, config.LogCallback); + Log($"Detected extension string: '{ext}'", LLamaLogLevel.Debug, config.LogCallback); + Log($"Detected prefix string: '{libPrefix}'", LLamaLogLevel.Debug, config.LogCallback); + + // Set the flag to ensure this config can no longer be modified + config.LibraryHasLoaded = true; + + // Show the configuration we're working with + Log(description.ToString(), LLamaLogLevel.Info, config.LogCallback); + + // Get the libraries ordered by priority from the selecting policy. + var libraries = config.SelectingPolicy.Select(description, systemInfo, config.LogCallback); + + foreach (var library in libraries) + { + // Prepare the local library file and get the path. + var paths = library.Prepare(systemInfo, false, config.LogCallback); + foreach (var path in paths) + { + Log($"Got relative library path '{path}' from local with {library.Metadata}, trying to load it...", LLamaLogLevel.Debug, config.LogCallback); + + var result = TryLoad(path, description.SearchDirectories, config.LogCallback); + if (result != IntPtr.Zero) + { + return result; + } + } + // If we failed but auto-download is allowed, try to prepare the file from remote. + if (description.AllowAutoDownload) + { + paths = library.Prepare(systemInfo, true, config.LogCallback); + if (description.DownloadSettings.LocalDir is null) + { + // Null local directory is not expected here (it will make things more complex if we want to handle it). + // It should always be set when gathering the description. + throw new RuntimeError("Auto-download is enabled for native library but the `LocalDir` is null. " + + "It's an unexpected behavior and please report an issue to LLamaSharp."); + } + // When using auto-download, this should be the only search directory. + List searchDirectoriesForDownload = [description.DownloadSettings.LocalDir]; + // unless extra search paths are added by the user. + searchDirectoriesForDownload.AddRange(description.DownloadSettings.ExtraSearchDirectories ?? []); + + foreach (var path in paths) + { + Log($"Got relative library path '{path}' from remote with {library.Metadata}, trying to load it...", LLamaLogLevel.Debug, config.LogCallback); + + var result = TryLoad(path, searchDirectoriesForDownload, config.LogCallback); + if (result != IntPtr.Zero) + { + return result; + } + } + } + } + + // If fallback is allowed, we will make the last try (the default system loading) when calling the native api. + // Otherwise we throw an exception here. + if (!description.AllowFallback) + { + throw new RuntimeError("Failed to load the native library. Please check the log for more information."); + } +#endif + + Log($"No library was loaded before calling native apis. " + + $"This is not an error under netstandard2.0 but needs attention with net6 or higher.", LLamaLogLevel.Warning, config.LogCallback); + return IntPtr.Zero; + +#if NET6_0_OR_GREATER + // Try to load a DLL from the path. + // Returns null if nothing is loaded. + static IntPtr TryLoad(string path, IEnumerable searchDirectories, NativeLogConfig.LLamaLogCallback? logCallback) + { + var fullPath = TryFindPath(path, searchDirectories); + Log($"Found full path file '{fullPath}' for relative path '{path}'", LLamaLogLevel.Debug, logCallback); + if (NativeLibrary.TryLoad(fullPath, out var handle)) + { + Log($"Successfully loaded '{fullPath}'", LLamaLogLevel.Info, logCallback); + return handle; + } + + Log($"Failed Loading '{fullPath}'", LLamaLogLevel.Info, logCallback); + return IntPtr.Zero; + } +#endif + } + + // Try to find the given file in any of the possible search paths + private static string TryFindPath(string filename, IEnumerable searchDirectories) + { + // Try the configured search directories in the configuration + foreach (var path in searchDirectories) + { + var candidate = Path.Combine(path, filename); + if (File.Exists(candidate)) + return candidate; + } + + // Try a few other possible paths + var possiblePathPrefix = new[] { + AppDomain.CurrentDomain.BaseDirectory, + Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location) ?? "" + }; + + foreach (var path in possiblePathPrefix) + { + var candidate = Path.Combine(path, filename); + if (File.Exists(candidate)) + return candidate; + } + + return filename; + } + + private static void Log(string message, LLamaLogLevel level, NativeLogConfig.LLamaLogCallback? logCallback) + { + if (!message.EndsWith("\n")) + message += "\n"; + + logCallback?.Invoke(level, message); + } + +#if NET6_0_OR_GREATER + public static void GetPlatformPathParts(OSPlatform platform, out string os, out string fileExtension, out string libPrefix) + { + if (platform == OSPlatform.Windows) + { + os = "win-x64"; + fileExtension = ".dll"; + libPrefix = ""; + return; + } + + if (platform == OSPlatform.Linux) + { + os = "linux-x64"; + fileExtension = ".so"; + libPrefix = "lib"; + return; + } + + if (platform == OSPlatform.OSX) + { + fileExtension = ".dylib"; + + os = System.Runtime.Intrinsics.Arm.ArmBase.Arm64.IsSupported + ? "osx-arm64" + : "osx-x64"; + libPrefix = "lib"; + } + else + { + throw new RuntimeError("Your operating system is not supported, please open an issue in LLamaSharp."); + } + } +#endif + } +} diff --git a/LLama/Native/Load/NativeLibraryWithAvx.cs b/LLama/Native/Load/NativeLibraryWithAvx.cs new file mode 100644 index 000000000..5b1f65142 --- /dev/null +++ b/LLama/Native/Load/NativeLibraryWithAvx.cs @@ -0,0 +1,75 @@ +using LLama.Abstractions; +using System.Collections.Generic; +using System.Runtime.InteropServices; + +namespace LLama.Native +{ +#if NET6_0_OR_GREATER + /// + /// A native library compiled with avx support but without cuda/cublas. + /// + public class NativeLibraryWithAvx : INativeLibrary + { + private NativeLibraryName _libraryName; + private AvxLevel _avxLevel; + private bool _skipCheck; + private NativeLibraryDownloadSettings _downloadSettings; + + /// + public NativeLibraryMetadata? Metadata + { + get + { + return new NativeLibraryMetadata(_libraryName, false, _avxLevel); + } + } + + /// + /// + /// + /// + /// + /// + /// + public NativeLibraryWithAvx(NativeLibraryName libraryName, AvxLevel avxLevel, bool skipCheck, NativeLibraryDownloadSettings downloadSettings) + { + _libraryName = libraryName; + _avxLevel = avxLevel; + _skipCheck = skipCheck; + _downloadSettings = downloadSettings; + } + + /// + public IEnumerable Prepare(SystemInfo systemInfo, bool fromRemote, NativeLogConfig.LLamaLogCallback? logCallback) + { + if (systemInfo.OSPlatform != OSPlatform.Windows && systemInfo.OSPlatform != OSPlatform.Linux && !_skipCheck) + { + // Not supported on systems other than Windows and Linux. + return []; + } + var path = GetAvxPath(systemInfo, _avxLevel, fromRemote, logCallback); + return path is null ? [] : [path]; + } + + private string? GetAvxPath(SystemInfo systemInfo, AvxLevel avxLevel, bool fromRemote, NativeLogConfig.LLamaLogCallback? logCallback) + { + NativeLibraryUtils.GetPlatformPathParts(systemInfo.OSPlatform, out var os, out var fileExtension, out var libPrefix); + var avxStr = NativeLibraryConfig.AvxLevelToString(avxLevel); + if (!string.IsNullOrEmpty(avxStr)) + avxStr += "/"; + var relativePath = $"runtimes/{os}/native/{avxStr}{libPrefix}{_libraryName.GetLibraryName()}{fileExtension}"; + + if (fromRemote) + { + // Download and return the local path. + // We make it sychronize because we c'd better not use async method when loading library later. + return NativeLibraryDownloadManager.DownloadLibraryFile(_downloadSettings, relativePath, logCallback).Result; + } + else + { + return relativePath; + } + } + } +#endif +} diff --git a/LLama/Native/Load/NativeLibraryWithCpuOrMac.cs b/LLama/Native/Load/NativeLibraryWithCpuOrMac.cs new file mode 100644 index 000000000..3bc09b13e --- /dev/null +++ b/LLama/Native/Load/NativeLibraryWithCpuOrMac.cs @@ -0,0 +1,77 @@ +using LLama.Abstractions; +using System.Collections.Generic; +using System.Runtime.InteropServices; + +namespace LLama.Native +{ +#if NET6_0_OR_GREATER + /// + /// A native library compiled on Mac, or fallbacks from all other libraries in the selection. + /// + public class NativeLibraryWithCpuOrMac + : INativeLibrary + { + private NativeLibraryName _libraryName; + private bool _skipCheck; + private NativeLibraryDownloadSettings _downloadSettings; + + /// + public NativeLibraryMetadata? Metadata + { + get + { + return new NativeLibraryMetadata(_libraryName, false, AvxLevel.None); + } + } + + /// + /// + /// + /// + /// + /// + public NativeLibraryWithCpuOrMac(NativeLibraryName libraryName, bool skipCheck, NativeLibraryDownloadSettings downloadSettings) + { + _libraryName = libraryName; + _skipCheck = skipCheck; + _downloadSettings = downloadSettings; + } + + /// + public IEnumerable Prepare(SystemInfo systemInfo, bool fromRemote, NativeLogConfig.LLamaLogCallback? logCallback) + { + var path = GetPath(systemInfo, AvxLevel.None, fromRemote, logCallback); + return path is null ?[] : [path]; + } + + private string? GetPath(SystemInfo systemInfo, AvxLevel avxLevel, bool fromRemote, NativeLogConfig.LLamaLogCallback? logCallback) + { + NativeLibraryUtils.GetPlatformPathParts(systemInfo.OSPlatform, out var os, out var fileExtension, out var libPrefix); + string relativePath; + if (systemInfo.OSPlatform == OSPlatform.OSX) + { + relativePath = $"runtimes/{os}/native/{libPrefix}{_libraryName.GetLibraryName()}{fileExtension}"; + } + else + { + var avxStr = NativeLibraryConfig.AvxLevelToString(AvxLevel.None); + if (!string.IsNullOrEmpty(avxStr)) + avxStr += "/"; + + relativePath = $"runtimes/{os}/native/{avxStr}{libPrefix}{_libraryName.GetLibraryName()}{fileExtension}"; + } + + if (fromRemote) + { + // Download and return the local path. + // We make it sychronize because we c'd better not use async method when loading library later. + return NativeLibraryDownloadManager.DownloadLibraryFile(_downloadSettings, relativePath, logCallback).Result; + } + else + { + return relativePath; + } + } + } +#endif +} diff --git a/LLama/Native/Load/NativeLibraryWithCuda.cs b/LLama/Native/Load/NativeLibraryWithCuda.cs new file mode 100644 index 000000000..1e2840d5e --- /dev/null +++ b/LLama/Native/Load/NativeLibraryWithCuda.cs @@ -0,0 +1,91 @@ +using LLama.Abstractions; +using System.Collections.Generic; +using System.Runtime.InteropServices; + +namespace LLama.Native +{ +#if NET6_0_OR_GREATER + /// + /// A native library compiled with cublas/cuda. + /// + public class NativeLibraryWithCuda : INativeLibrary + { + private int _majorCudaVersion; + private NativeLibraryName _libraryName; + private AvxLevel _avxLevel; + private bool _skipCheck; + private NativeLibraryDownloadSettings _downloadSettings; + + /// + public NativeLibraryMetadata? Metadata + { + get + { + return new NativeLibraryMetadata(_libraryName, true, _avxLevel); + } + } + + /// + /// + /// + /// + /// + /// + /// + public NativeLibraryWithCuda(int majorCudaVersion, NativeLibraryName libraryName, bool skipCheck, NativeLibraryDownloadSettings downloadSettings) + { + _majorCudaVersion = majorCudaVersion; + _libraryName = libraryName; + _skipCheck = skipCheck; + _downloadSettings = downloadSettings; + } + + /// + public IEnumerable Prepare(SystemInfo systemInfo, bool fromRemote, NativeLogConfig.LLamaLogCallback? logCallback) + { + // TODO: Avx level is ignored now, needs to be implemented in the future. + if (systemInfo.OSPlatform == OSPlatform.Windows || systemInfo.OSPlatform == OSPlatform.Linux || _skipCheck) + { + if (_majorCudaVersion == -1 && _skipCheck) + { + // Currently only 11 and 12 are supported. + var cuda12LibraryPath = GetCudaPath(systemInfo, 12, fromRemote, logCallback); + if (cuda12LibraryPath is not null) + { + yield return cuda12LibraryPath; + } + var cuda11LibraryPath = GetCudaPath(systemInfo, 11, fromRemote, logCallback); + if (cuda11LibraryPath is not null) + { + yield return cuda11LibraryPath; + } + } + else if (_majorCudaVersion != -1) + { + var cudaLibraryPath = GetCudaPath(systemInfo, _majorCudaVersion, fromRemote, logCallback); + if (cudaLibraryPath is not null) + { + yield return cudaLibraryPath; + } + } + } + } + + private string? GetCudaPath(SystemInfo systemInfo, int cudaVersion, bool remote, NativeLogConfig.LLamaLogCallback? logCallback) + { + NativeLibraryUtils.GetPlatformPathParts(systemInfo.OSPlatform, out var os, out var fileExtension, out var libPrefix); + var relativePath = $"runtimes/{os}/native/cuda{cudaVersion}/{libPrefix}{_libraryName.GetLibraryName()}{fileExtension}"; + if (remote) + { + // Download and return the local path. + // We make it sychronize because we c'd better not use async method when loading library later. + return NativeLibraryDownloadManager.DownloadLibraryFile(_downloadSettings, relativePath, logCallback).Result; + } + else + { + return relativePath; + } + } + } +#endif +} diff --git a/LLama/Native/Load/SystemInfo.cs b/LLama/Native/Load/SystemInfo.cs new file mode 100644 index 000000000..0ffc67e91 --- /dev/null +++ b/LLama/Native/Load/SystemInfo.cs @@ -0,0 +1,129 @@ +using System; +using System.IO; +using System.Runtime.InteropServices; +using System.Text.Json; + +namespace LLama.Native +{ + /// + /// Operating system information. + /// + /// + /// + public record class SystemInfo(OSPlatform OSPlatform, int CudaMajorVersion) + { + /// + /// Get the system information of the current machine. + /// + /// + /// + public static SystemInfo Get() + { + OSPlatform platform; + if(RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) + { + platform = OSPlatform.Windows; + } + else if(RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) + { + platform = OSPlatform.Linux; + } + else if(RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) + { + platform = OSPlatform.OSX; + } + else + { + throw new PlatformNotSupportedException(); + } + + return new SystemInfo(platform, GetCudaMajorVersion()); + } + + #region CUDA version + private static int GetCudaMajorVersion() + { + string? cudaPath; + string version = ""; + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) + { + cudaPath = Environment.GetEnvironmentVariable("CUDA_PATH"); + if (cudaPath is null) + { + return -1; + } + + //Ensuring cuda bin path is reachable. Especially for MAUI environment. + string cudaBinPath = Path.Combine(cudaPath, "bin"); + + if (Directory.Exists(cudaBinPath)) + { + AddDllDirectory(cudaBinPath); + } + + version = GetCudaVersionFromPath(cudaPath); + } + else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) + { + // Try the default first + cudaPath = "/usr/local/bin/cuda"; + version = GetCudaVersionFromPath(cudaPath); + if (string.IsNullOrEmpty(version)) + { + cudaPath = Environment.GetEnvironmentVariable("LD_LIBRARY_PATH"); + if (cudaPath is null) + { + return -1; + } + foreach (var path in cudaPath.Split(':')) + { + version = GetCudaVersionFromPath(Path.Combine(path, "..")); + if (string.IsNullOrEmpty(version)) + { + break; + } + } + } + } + + if (string.IsNullOrEmpty(version)) + return -1; + + version = version.Split('.')[0]; + if (int.TryParse(version, out var majorVersion)) + return majorVersion; + + return -1; + } + + private static string GetCudaVersionFromPath(string cudaPath) + { + try + { + string json = File.ReadAllText(Path.Combine(cudaPath, cudaVersionFile)); + using (JsonDocument document = JsonDocument.Parse(json)) + { + JsonElement root = document.RootElement; + JsonElement cublasNode = root.GetProperty("libcublas"); + JsonElement versionNode = cublasNode.GetProperty("version"); + if (versionNode.ValueKind == JsonValueKind.Undefined) + { + return string.Empty; + } + return versionNode.GetString() ?? ""; + } + } + catch (Exception) + { + return string.Empty; + } + } + + // Put it here to avoid calling NativeApi when getting the cuda version. + [DllImport("kernel32.dll", CharSet = CharSet.Unicode, SetLastError = true)] + internal static extern int AddDllDirectory(string NewDirectory); + + private const string cudaVersionFile = "version.json"; + #endregion + } +} diff --git a/LLama/Native/NativeApi.Load.cs b/LLama/Native/NativeApi.Load.cs index 4b4beea2e..b3c9a6ad2 100644 --- a/LLama/Native/NativeApi.Load.cs +++ b/LLama/Native/NativeApi.Load.cs @@ -18,7 +18,8 @@ static NativeApi() SetDllImportResolver(); // Set flag to indicate that this point has been passed. No native library config can be done after this point. - NativeLibraryConfig.LibraryHasLoaded = true; + NativeLibraryConfig.LLama.LibraryHasLoaded = true; + NativeLibraryConfig.LLavaShared.LibraryHasLoaded = true; // Immediately make a call which requires loading the llama DLL. This method call // can't fail unless the DLL hasn't been loaded. @@ -38,8 +39,8 @@ static NativeApi() } // Now that the "loaded" flag is set configure logging in llama.cpp - if (NativeLibraryConfig.Instance.LogCallback != null) - NativeLogConfig.llama_log_set(NativeLibraryConfig.Instance.LogCallback); + if (NativeLibraryConfig.LLama.LogCallback != null) + NativeLogConfig.llama_log_set(NativeLibraryConfig.LLama.LogCallback); // Init llama.cpp backend llama_backend_init(); @@ -63,8 +64,14 @@ private static void SetDllImportResolver() if (_loadedLlamaHandle != IntPtr.Zero) return _loadedLlamaHandle; + // We don't allow downloading in static constructor to avoid potentially uncertain behaviors now. + if (NativeLibraryConfig.LLama.AllowAutoDownload && !NativeLibraryConfig.LLama.HasCalledDryRun) + { + throw new RuntimeError("When using auto-download, please call NativeLibraryConfig.DryRun() right after finishing you configurations."); + } + // Try to load a preferred library, based on CPU feature detection - _loadedLlamaHandle = TryLoadLibraries(LibraryName.Llama); + _loadedLlamaHandle = NativeLibraryUtils.TryLoadLibrary(NativeLibraryConfig.LLama); return _loadedLlamaHandle; } @@ -74,8 +81,14 @@ private static void SetDllImportResolver() if (_loadedLlavaSharedHandle != IntPtr.Zero) return _loadedLlavaSharedHandle; + // We don't allow downloading in static constructor to avoid potentially uncertain behaviors now. + if (NativeLibraryConfig.LLavaShared.AllowAutoDownload && !NativeLibraryConfig.LLavaShared.HasCalledDryRun) + { + throw new RuntimeError("When using auto-download, please call NativeLibraryConfig.DryRun() right after finishing you configurations."); + } + // Try to load a preferred library, based on CPU feature detection - _loadedLlavaSharedHandle = TryLoadLibraries(LibraryName.LlavaShared); + _loadedLlavaSharedHandle = NativeLibraryUtils.TryLoadLibrary(NativeLibraryConfig.LLavaShared); return _loadedLlavaSharedHandle; } @@ -85,341 +98,6 @@ private static void SetDllImportResolver() #endif } - private static void Log(string message, LLamaLogLevel level) - { - if (!message.EndsWith("\n")) - message += "\n"; - - NativeLibraryConfig.Instance.LogCallback?.Invoke(level, message); - } - - #region CUDA version - private static int GetCudaMajorVersion() - { - string? cudaPath; - string version = ""; - if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) - { - cudaPath = Environment.GetEnvironmentVariable("CUDA_PATH"); - if (cudaPath is null) - { - return -1; - } - - //Ensuring cuda bin path is reachable. Especially for MAUI environment. - string cudaBinPath = Path.Combine(cudaPath, "bin"); - - if (Directory.Exists(cudaBinPath)) - { - AddDllDirectory(cudaBinPath); - } - - version = GetCudaVersionFromPath(cudaPath); - } - else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) - { - // Try the default first - cudaPath = "/usr/local/bin/cuda"; - version = GetCudaVersionFromPath(cudaPath); - if (string.IsNullOrEmpty(version)) - { - cudaPath = Environment.GetEnvironmentVariable("LD_LIBRARY_PATH"); - if (cudaPath is null) - { - return -1; - } - foreach (var path in cudaPath.Split(':')) - { - version = GetCudaVersionFromPath(Path.Combine(path, "..")); - if (string.IsNullOrEmpty(version)) - { - break; - } - } - } - } - - if (string.IsNullOrEmpty(version)) - return -1; - - version = version.Split('.')[0]; - if (int.TryParse(version, out var majorVersion)) - return majorVersion; - - return -1; - } - - private static string GetCudaVersionFromPath(string cudaPath) - { - try - { - string json = File.ReadAllText(Path.Combine(cudaPath, cudaVersionFile)); - using (JsonDocument document = JsonDocument.Parse(json)) - { - JsonElement root = document.RootElement; - JsonElement cublasNode = root.GetProperty("libcublas"); - JsonElement versionNode = cublasNode.GetProperty("version"); - if (versionNode.ValueKind == JsonValueKind.Undefined) - { - return string.Empty; - } - return versionNode.GetString() ?? ""; - } - } - catch (Exception) - { - return string.Empty; - } - } - #endregion - -#if NET6_0_OR_GREATER - private static IEnumerable GetLibraryTryOrder(NativeLibraryConfig.Description configuration) - { - var loadingName = configuration.Library.GetLibraryName(); - Log($"Loading library: '{loadingName}'", LLamaLogLevel.Debug); - - // Get platform specific parts of the path (e.g. .so/.dll/.dylib, libName prefix or not) - GetPlatformPathParts(out var platform, out var os, out var ext, out var libPrefix); - Log($"Detected OS Platform: '{platform}'", LLamaLogLevel.Info); - Log($"Detected OS string: '{os}'", LLamaLogLevel.Debug); - Log($"Detected extension string: '{ext}'", LLamaLogLevel.Debug); - Log($"Detected prefix string: '{libPrefix}'", LLamaLogLevel.Debug); - - if (configuration.UseCuda && (platform == OSPlatform.Windows || platform == OSPlatform.Linux)) - { - var cudaVersion = GetCudaMajorVersion(); - Log($"Detected cuda major version {cudaVersion}.", LLamaLogLevel.Info); - - if (cudaVersion == -1 && !configuration.AllowFallback) - { - // if check skipped, we just try to load cuda libraries one by one. - if (configuration.SkipCheck) - { - yield return GetCudaLibraryPath(loadingName, "cuda12"); - yield return GetCudaLibraryPath(loadingName, "cuda11"); - } - else - { - throw new RuntimeError("Configured to load a cuda library but no cuda detected on your device."); - } - } - else if (cudaVersion == 11) - { - yield return GetCudaLibraryPath(loadingName, "cuda11"); - } - else if (cudaVersion == 12) - { - yield return GetCudaLibraryPath(loadingName, "cuda12"); - } - else if (cudaVersion > 0) - { - throw new RuntimeError($"Cuda version {cudaVersion} hasn't been supported by LLamaSharp, please open an issue for it."); - } - - // otherwise no cuda detected but allow fallback - } - - // Add the CPU/Metal libraries - if (platform == OSPlatform.OSX) - { - // On Mac it's very simple, there's no AVX to consider. - yield return GetMacLibraryPath(loadingName); - } - else - { - if (configuration.AllowFallback) - { - // Try all of the AVX levels we can support. - if (configuration.AvxLevel >= NativeLibraryConfig.AvxLevel.Avx512) - yield return GetAvxLibraryPath(loadingName, NativeLibraryConfig.AvxLevel.Avx512); - - if (configuration.AvxLevel >= NativeLibraryConfig.AvxLevel.Avx2) - yield return GetAvxLibraryPath(loadingName, NativeLibraryConfig.AvxLevel.Avx2); - - if (configuration.AvxLevel >= NativeLibraryConfig.AvxLevel.Avx) - yield return GetAvxLibraryPath(loadingName, NativeLibraryConfig.AvxLevel.Avx); - - yield return GetAvxLibraryPath(loadingName, NativeLibraryConfig.AvxLevel.None); - } - else - { - // Fallback is not allowed - use the exact specified AVX level - yield return GetAvxLibraryPath(loadingName, configuration.AvxLevel); - } - } - } - - private static string GetMacLibraryPath(string libraryName) - { - GetPlatformPathParts(out _, out var os, out var fileExtension, out var libPrefix); - - return $"runtimes/{os}/native/{libPrefix}{libraryName}{fileExtension}"; - } - - /// - /// Given a CUDA version and some path parts, create a complete path to the library file - /// - /// Library being loaded (e.g. "llama") - /// CUDA version (e.g. "cuda11") - /// - private static string GetCudaLibraryPath(string libraryName, string cuda) - { - GetPlatformPathParts(out _, out var os, out var fileExtension, out var libPrefix); - - return $"runtimes/{os}/native/{cuda}/{libPrefix}{libraryName}{fileExtension}"; - } - - /// - /// Given an AVX level and some path parts, create a complete path to the library file - /// - /// Library being loaded (e.g. "llama") - /// - /// - private static string GetAvxLibraryPath(string libraryName, NativeLibraryConfig.AvxLevel avx) - { - GetPlatformPathParts(out _, out var os, out var fileExtension, out var libPrefix); - - var avxStr = NativeLibraryConfig.AvxLevelToString(avx); - if (!string.IsNullOrEmpty(avxStr)) - avxStr += "/"; - - return $"runtimes/{os}/native/{avxStr}{libPrefix}{libraryName}{fileExtension}"; - } - - private static void GetPlatformPathParts(out OSPlatform platform, out string os, out string fileExtension, out string libPrefix) - { - if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) - { - platform = OSPlatform.Windows; - os = "win-x64"; - fileExtension = ".dll"; - libPrefix = ""; - return; - } - - if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) - { - platform = OSPlatform.Linux; - os = "linux-x64"; - fileExtension = ".so"; - libPrefix = "lib"; - return; - } - - if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) - { - platform = OSPlatform.OSX; - fileExtension = ".dylib"; - - os = System.Runtime.Intrinsics.Arm.ArmBase.Arm64.IsSupported - ? "osx-arm64" - : "osx-x64"; - libPrefix = "lib"; - } - else - { - throw new RuntimeError("Your operating system is not supported, please open an issue in LLamaSharp."); - } - } -#endif - - /// - /// Try to load libllama/llava_shared, using CPU feature detection to try and load a more specialised DLL if possible - /// - /// The library handle to unload later, or IntPtr.Zero if no library was loaded - private static IntPtr TryLoadLibraries(LibraryName lib) - { -#if NET6_0_OR_GREATER - var configuration = NativeLibraryConfig.CheckAndGatherDescription(lib); - - // Set the flag to ensure the NativeLibraryConfig can no longer be modified - NativeLibraryConfig.LibraryHasLoaded = true; - - // Show the configuration we're working with - Log(configuration.ToString(), LLamaLogLevel.Info); - - // If a specific path is requested, load that or immediately fail - if (!string.IsNullOrEmpty(configuration.Path)) - { - if (!NativeLibrary.TryLoad(configuration.Path, out var handle)) - throw new RuntimeError($"Failed to load the native library [{configuration.Path}] you specified."); - - Log($"Successfully loaded the library [{configuration.Path}] specified by user", LLamaLogLevel.Info); - return handle; - } - - // Get a list of locations to try loading (in order of preference) - var libraryTryLoadOrder = GetLibraryTryOrder(configuration); - - foreach (var libraryPath in libraryTryLoadOrder) - { - var fullPath = TryFindPath(libraryPath); - Log($"Trying '{fullPath}'", LLamaLogLevel.Debug); - - var result = TryLoad(fullPath); - if (result != IntPtr.Zero) - { - Log($"Loaded '{fullPath}'", LLamaLogLevel.Info); - return result; - } - - Log($"Failed Loading '{fullPath}'", LLamaLogLevel.Info); - } - - if (!configuration.AllowFallback) - { - throw new RuntimeError("Failed to load the library that match your rule, please" + - " 1) check your rule." + - " 2) try to allow fallback." + - " 3) or open an issue if it's expected to be successful."); - } -#endif - - Log($"No library was loaded before calling native apis. " + - $"This is not an error under netstandard2.0 but needs attention with net6 or higher.", LLamaLogLevel.Warning); - return IntPtr.Zero; - -#if NET6_0_OR_GREATER - // Try to load a DLL from the path. - // Returns null if nothing is loaded. - static IntPtr TryLoad(string path) - { - if (NativeLibrary.TryLoad(path, out var handle)) - return handle; - - return IntPtr.Zero; - } - - // Try to find the given file in any of the possible search paths - string TryFindPath(string filename) - { - // Try the configured search directories in the configuration - foreach (var path in configuration.SearchDirectories) - { - var candidate = Path.Combine(path, filename); - if (File.Exists(candidate)) - return candidate; - } - - // Try a few other possible paths - var possiblePathPrefix = new[] { - AppDomain.CurrentDomain.BaseDirectory, - Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location) ?? "" - }; - - foreach (var path in possiblePathPrefix) - { - var candidate = Path.Combine(path, filename); - if (File.Exists(candidate)) - return candidate; - } - - return filename; - } -#endif - } - internal const string libraryName = "llama"; internal const string llavaLibraryName = "llava_shared"; private const string cudaVersionFile = "version.json"; diff --git a/LLama/Native/NativeApi.cs b/LLama/Native/NativeApi.cs index ed4561517..d57b00e77 100644 --- a/LLama/Native/NativeApi.cs +++ b/LLama/Native/NativeApi.cs @@ -19,9 +19,6 @@ public static void llama_empty_call() llama_max_devices(); } - [DllImport("kernel32.dll", CharSet = CharSet.Unicode, SetLastError = true)] - private static extern int AddDllDirectory(string NewDirectory); - /// /// Get the maximum number of devices supported by llama.cpp /// diff --git a/LLama/Native/NativeLibraryConfig.cs b/LLama/Native/NativeLibraryConfig.cs deleted file mode 100644 index ef7cd7c19..000000000 --- a/LLama/Native/NativeLibraryConfig.cs +++ /dev/null @@ -1,332 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using Microsoft.Extensions.Logging; - -namespace LLama.Native -{ -#if NET6_0_OR_GREATER - /// - /// Allows configuration of the native llama.cpp libraries to load and use. - /// All configuration must be done before using **any** other LLamaSharp methods! - /// - public sealed partial class NativeLibraryConfig - { - private string? _libraryPath; - private string? _libraryPathLLava; - - private bool _useCuda = true; - private AvxLevel _avxLevel; - private bool _allowFallback = true; - private bool _skipCheck = false; - - /// - /// search directory -> priority level, 0 is the lowest. - /// - private readonly List _searchDirectories = new List(); - - #region configurators - /// - /// Load a specified native library as backend for LLamaSharp. - /// When this method is called, all the other configurations will be ignored. - /// - /// The full path to the llama library to load. - /// The full path to the llava library to load. - /// Thrown if `LibraryHasLoaded` is true. - public NativeLibraryConfig WithLibrary(string? llamaPath, string? llavaPath) - { - ThrowIfLoaded(); - - _libraryPath = llamaPath; - _libraryPathLLava = llavaPath; - return this; - } - - /// - /// Configure whether to use cuda backend if possible. - /// - /// - /// - /// Thrown if `LibraryHasLoaded` is true. - public NativeLibraryConfig WithCuda(bool enable = true) - { - ThrowIfLoaded(); - - _useCuda = enable; - return this; - } - - /// - /// Configure the prefferred avx support level of the backend. - /// - /// - /// - /// Thrown if `LibraryHasLoaded` is true. - public NativeLibraryConfig WithAvx(AvxLevel level) - { - ThrowIfLoaded(); - - _avxLevel = level; - return this; - } - - /// - /// Configure whether to allow fallback when there's no match for preferred settings. - /// - /// - /// - /// Thrown if `LibraryHasLoaded` is true. - public NativeLibraryConfig WithAutoFallback(bool enable = true) - { - ThrowIfLoaded(); - - _allowFallback = enable; - return this; - } - - /// - /// Whether to skip the check when you don't allow fallback. This option - /// may be useful under some complex conditions. For example, you're sure - /// you have your cublas configured but LLamaSharp take it as invalid by mistake. - /// - /// - /// - /// Thrown if `LibraryHasLoaded` is true. - public NativeLibraryConfig SkipCheck(bool enable = true) - { - ThrowIfLoaded(); - - _skipCheck = enable; - return this; - } - - /// - /// Add self-defined search directories. Note that the file stucture of the added - /// directories must be the same as the default directory. Besides, the directory - /// won't be used recursively. - /// - /// - /// - public NativeLibraryConfig WithSearchDirectories(IEnumerable directories) - { - ThrowIfLoaded(); - - _searchDirectories.AddRange(directories); - return this; - } - - /// - /// Add self-defined search directories. Note that the file stucture of the added - /// directories must be the same as the default directory. Besides, the directory - /// won't be used recursively. - /// - /// - /// - public NativeLibraryConfig WithSearchDirectory(string directory) - { - ThrowIfLoaded(); - - _searchDirectories.Add(directory); - return this; - } - #endregion - - internal static Description CheckAndGatherDescription(LibraryName library) - { - if (Instance._allowFallback && Instance._skipCheck) - throw new ArgumentException("Cannot skip the check when fallback is allowed."); - - var path = library switch - { - LibraryName.Llama => Instance._libraryPath, - LibraryName.LlavaShared => Instance._libraryPathLLava, - _ => throw new ArgumentException($"Unknown library name '{library}'", nameof(library)), - }; - - return new Description( - path, - library, - Instance._useCuda, - Instance._avxLevel, - Instance._allowFallback, - Instance._skipCheck, - Instance._searchDirectories.Concat(new[] { "./" }).ToArray() - ); - } - - internal static string AvxLevelToString(AvxLevel level) - { - return level switch - { - AvxLevel.None => string.Empty, - AvxLevel.Avx => "avx", - AvxLevel.Avx2 => "avx2", - AvxLevel.Avx512 => "avx512", - _ => throw new ArgumentException($"Unknown AvxLevel '{level}'") - }; - } - - /// - /// Private constructor prevents new instances of this class being created - /// - private NativeLibraryConfig() - { - // Automatically detect the highest supported AVX level - if (System.Runtime.Intrinsics.X86.Avx.IsSupported) - _avxLevel = AvxLevel.Avx; - if (System.Runtime.Intrinsics.X86.Avx2.IsSupported) - _avxLevel = AvxLevel.Avx2; - - if (CheckAVX512()) - _avxLevel = AvxLevel.Avx512; - } - - private static bool CheckAVX512() - { - if (!System.Runtime.Intrinsics.X86.X86Base.IsSupported) - return false; - - // ReSharper disable UnusedVariable (ebx is used when < NET8) - var (_, ebx, ecx, _) = System.Runtime.Intrinsics.X86.X86Base.CpuId(7, 0); - // ReSharper restore UnusedVariable - - var vnni = (ecx & 0b_1000_0000_0000) != 0; - -#if NET8_0_OR_GREATER - var f = System.Runtime.Intrinsics.X86.Avx512F.IsSupported; - var bw = System.Runtime.Intrinsics.X86.Avx512BW.IsSupported; - var vbmi = System.Runtime.Intrinsics.X86.Avx512Vbmi.IsSupported; -#else - var f = (ebx & (1 << 16)) != 0; - var bw = (ebx & (1 << 30)) != 0; - var vbmi = (ecx & 0b_0000_0000_0010) != 0; -#endif - - return vnni && vbmi && bw && f; - } - - /// - /// Avx support configuration - /// - public enum AvxLevel - { - /// - /// No AVX - /// - None, - - /// - /// Advanced Vector Extensions (supported by most processors after 2011) - /// - Avx, - - /// - /// AVX2 (supported by most processors after 2013) - /// - Avx2, - - /// - /// AVX512 (supported by some processors after 2016, not widely supported) - /// - Avx512, - } - - internal record Description(string? Path, LibraryName Library, bool UseCuda, AvxLevel AvxLevel, bool AllowFallback, bool SkipCheck, string[] SearchDirectories) - { - public override string ToString() - { - string avxLevelString = AvxLevel switch - { - AvxLevel.None => "NoAVX", - AvxLevel.Avx => "AVX", - AvxLevel.Avx2 => "AVX2", - AvxLevel.Avx512 => "AVX512", - _ => "Unknown" - }; - - string searchDirectoriesString = "{ " + string.Join(", ", SearchDirectories) + " }"; - - return $"NativeLibraryConfig Description:\n" + - $"- LibraryName: {Library}\n" + - $"- Path: '{Path}'\n" + - $"- PreferCuda: {UseCuda}\n" + - $"- PreferredAvxLevel: {avxLevelString}\n" + - $"- AllowFallback: {AllowFallback}\n" + - $"- SkipCheck: {SkipCheck}\n" + - $"- SearchDirectories and Priorities: {searchDirectoriesString}"; - } - } - } -#endif - - public sealed partial class NativeLibraryConfig - { - /// - /// Get the config instance - /// - public static NativeLibraryConfig Instance { get; } = new(); - - /// - /// Check if the native library has already been loaded. Configuration cannot be modified if this is true. - /// - public static bool LibraryHasLoaded { get; internal set; } - - internal NativeLogConfig.LLamaLogCallback? LogCallback; - - private static void ThrowIfLoaded() - { - if (LibraryHasLoaded) - throw new InvalidOperationException("NativeLibraryConfig must be configured before using **any** other LLamaSharp methods!"); - } - - /// - /// Set the log callback that will be used for all llama.cpp log messages - /// - /// - /// - public NativeLibraryConfig WithLogCallback(NativeLogConfig.LLamaLogCallback? callback) - { - ThrowIfLoaded(); - - LogCallback = callback; - return this; - } - - /// - /// Set the log callback that will be used for all llama.cpp log messages - /// - /// - /// - public NativeLibraryConfig WithLogCallback(ILogger? logger) - { - ThrowIfLoaded(); - - // Redirect to llama_log_set. This will wrap the logger in a delegate and bind that as the log callback instead. - NativeLogConfig.llama_log_set(logger); - - return this; - } - } - - internal enum LibraryName - { - Llama, - LlavaShared - } - - internal static class LibraryNameExtensions - { - public static string GetLibraryName(this LibraryName name) - { - switch (name) - { - case LibraryName.Llama: - return NativeApi.libraryName; - case LibraryName.LlavaShared: - return NativeApi.llavaLibraryName; - default: - throw new ArgumentOutOfRangeException(nameof(name), name, null); - } - } - } -} diff --git a/LLama/Native/NativeLogConfig.cs b/LLama/Native/NativeLogConfig.cs index ebcd23d47..82b097fb3 100644 --- a/LLama/Native/NativeLogConfig.cs +++ b/LLama/Native/NativeLogConfig.cs @@ -37,7 +37,7 @@ public static class NativeLogConfig public static void llama_log_set(LLamaLogCallback? logCallback) #pragma warning restore IDE1006 // Naming Styles { - if (NativeLibraryConfig.LibraryHasLoaded) + if (NativeLibraryConfig.LLama.LibraryHasLoaded) { // The library is loaded, just pass the callback directly to llama.cpp native_llama_log_set(logCallback); From 6e28b21da3de91a473328e7ed3b5d4dfc9f727bf Mon Sep 17 00:00:00 2001 From: Rinne Date: Thu, 25 Apr 2024 02:39:25 +0800 Subject: [PATCH 02/11] refactor: remove the auto-download related parts. --- LLama.Examples/Program.cs | 5 +- LLama/Abstractions/INativeLibrary.cs | 3 +- .../INativeLibrarySelectingPolicy.cs | 2 +- LLama/LLamaSharp.csproj | 1 - .../DefaultNativeLibrarySelectingPolicy.cs | 16 +- LLama/Native/Load/NativeLibraryConfig.cs | 97 ++---- .../Load/NativeLibraryDownloadManager.cs | 285 ------------------ LLama/Native/Load/NativeLibraryFromPath.cs | 4 +- LLama/Native/Load/NativeLibraryUtils.cs | 31 +- LLama/Native/Load/NativeLibraryWithAvx.cs | 23 +- LLama/Native/Load/NativeLibraryWithCuda.cs | 26 +- ...c.cs => NativeLibraryWithMacOrFallback.cs} | 25 +- LLama/Native/NativeApi.Load.cs | 12 - 13 files changed, 54 insertions(+), 476 deletions(-) delete mode 100644 LLama/Native/Load/NativeLibraryDownloadManager.cs rename LLama/Native/Load/{NativeLibraryWithCpuOrMac.cs => NativeLibraryWithMacOrFallback.cs} (63%) diff --git a/LLama.Examples/Program.cs b/LLama.Examples/Program.cs index cee9540be..53edfb3be 100644 --- a/LLama.Examples/Program.cs +++ b/LLama.Examples/Program.cs @@ -18,7 +18,7 @@ __ __ ____ __ """); // Configure logging. Change this to `true` to see log messages from llama.cpp -var showLLamaCppLogs = false; +var showLLamaCppLogs = true; NativeLibraryConfig .All .WithLogCallback((level, message) => @@ -31,7 +31,8 @@ __ __ ____ __ NativeLibraryConfig .All .WithCuda() - .WithAutoDownload().DryRun(); + //.WithAutoDownload() // An experimental feature + .DryRun(); // Calling this method forces loading to occur now. NativeApi.llama_empty_call(); diff --git a/LLama/Abstractions/INativeLibrary.cs b/LLama/Abstractions/INativeLibrary.cs index 64ba182e0..a7e00b753 100644 --- a/LLama/Abstractions/INativeLibrary.cs +++ b/LLama/Abstractions/INativeLibrary.cs @@ -20,11 +20,10 @@ public interface INativeLibrary /// If it's a relative path, LLamaSharp will search the path in the search directies you set. /// /// The system information of the current machine. - /// Whether it's allowed to download from remote. /// The log callback. /// /// The relative paths of the library. You could return multiple paths to try them one by one. If no file is available, please return an empty array. /// - IEnumerable Prepare(SystemInfo systemInfo, bool fromRemote = false, NativeLogConfig.LLamaLogCallback? logCallback = null); + IEnumerable Prepare(SystemInfo systemInfo, NativeLogConfig.LLamaLogCallback? logCallback = null); } } diff --git a/LLama/Abstractions/INativeLibrarySelectingPolicy.cs b/LLama/Abstractions/INativeLibrarySelectingPolicy.cs index f57ae6d1a..41335202e 100644 --- a/LLama/Abstractions/INativeLibrarySelectingPolicy.cs +++ b/LLama/Abstractions/INativeLibrarySelectingPolicy.cs @@ -18,7 +18,7 @@ public interface INativeLibrarySelectingPolicy /// The system information of the current machine. /// The log callback. /// The information of the selected native library files, in order by priority from the beginning to the end. - IEnumerable Select(NativeLibraryConfig.Description description, SystemInfo systemInfo, NativeLogConfig.LLamaLogCallback? logCallback = null); + IEnumerable Apply(NativeLibraryConfig.Description description, SystemInfo systemInfo, NativeLogConfig.LLamaLogCallback? logCallback = null); } #endif } diff --git a/LLama/LLamaSharp.csproj b/LLama/LLamaSharp.csproj index 8277ce9eb..2d8bbefd4 100644 --- a/LLama/LLamaSharp.csproj +++ b/LLama/LLamaSharp.csproj @@ -49,7 +49,6 @@ - diff --git a/LLama/Native/Load/DefaultNativeLibrarySelectingPolicy.cs b/LLama/Native/Load/DefaultNativeLibrarySelectingPolicy.cs index 42433cecb..5cb3b0c5a 100644 --- a/LLama/Native/Load/DefaultNativeLibrarySelectingPolicy.cs +++ b/LLama/Native/Load/DefaultNativeLibrarySelectingPolicy.cs @@ -9,7 +9,7 @@ namespace LLama.Native public class DefaultNativeLibrarySelectingPolicy: INativeLibrarySelectingPolicy { /// - public IEnumerable Select(NativeLibraryConfig.Description description, SystemInfo systemInfo, NativeLogConfig.LLamaLogCallback? logCallback) + public IEnumerable Apply(NativeLibraryConfig.Description description, SystemInfo systemInfo, NativeLogConfig.LLamaLogCallback? logCallback) { List results = new(); @@ -25,7 +25,7 @@ public IEnumerable Select(NativeLibraryConfig.Description descri { if (description.UseCuda) { - yield return new NativeLibraryWithCuda(systemInfo.CudaMajorVersion, description.Library, description.SkipCheck, description.DownloadSettings); + yield return new NativeLibraryWithCuda(systemInfo.CudaMajorVersion, description.Library, description.SkipCheck); } if(!description.UseCuda || description.AllowFallback) @@ -34,25 +34,25 @@ public IEnumerable Select(NativeLibraryConfig.Description descri { // Try all of the AVX levels we can support. if (description.AvxLevel >= AvxLevel.Avx512) - yield return new NativeLibraryWithAvx(description.Library, AvxLevel.Avx512, description.SkipCheck, description.DownloadSettings); + yield return new NativeLibraryWithAvx(description.Library, AvxLevel.Avx512, description.SkipCheck); if (description.AvxLevel >= AvxLevel.Avx2) - yield return new NativeLibraryWithAvx(description.Library, AvxLevel.Avx2, description.SkipCheck, description.DownloadSettings); + yield return new NativeLibraryWithAvx(description.Library, AvxLevel.Avx2, description.SkipCheck); if (description.AvxLevel >= AvxLevel.Avx) - yield return new NativeLibraryWithAvx(description.Library, AvxLevel.Avx, description.SkipCheck, description.DownloadSettings); + yield return new NativeLibraryWithAvx(description.Library, AvxLevel.Avx, description.SkipCheck); - yield return new NativeLibraryWithAvx(description.Library, AvxLevel.None, description.SkipCheck, description.DownloadSettings); + yield return new NativeLibraryWithAvx(description.Library, AvxLevel.None, description.SkipCheck); } else { - yield return new NativeLibraryWithAvx(description.Library, description.AvxLevel, description.SkipCheck, description.DownloadSettings); + yield return new NativeLibraryWithAvx(description.Library, description.AvxLevel, description.SkipCheck); } } if(systemInfo.OSPlatform == OSPlatform.OSX || description.AllowFallback) { - yield return new NativeLibraryWithCpuOrMac(description.Library, description.SkipCheck, description.DownloadSettings); + yield return new NativeLibraryWithMacOrFallback(description.Library, description.SkipCheck); } } } diff --git a/LLama/Native/Load/NativeLibraryConfig.cs b/LLama/Native/Load/NativeLibraryConfig.cs index 6f77927d5..69ae7745a 100644 --- a/LLama/Native/Load/NativeLibraryConfig.cs +++ b/LLama/Native/Load/NativeLibraryConfig.cs @@ -19,8 +19,6 @@ public sealed partial class NativeLibraryConfig private AvxLevel _avxLevel; private bool _allowFallback = true; private bool _skipCheck = false; - private bool _allowAutoDownload = false; - private NativeLibraryDownloadSettings _downloadSettings = NativeLibraryDownloadSettings.Create(); /// /// search directory -> priority level, 0 is the lowest. @@ -29,8 +27,6 @@ public sealed partial class NativeLibraryConfig internal INativeLibrarySelectingPolicy SelectingPolicy { get; private set; } = new DefaultNativeLibrarySelectingPolicy(); - internal bool AllowAutoDownload => _allowAutoDownload; - #region configurators /// /// Load a specified native library as backend for LLamaSharp. @@ -135,25 +131,6 @@ public NativeLibraryConfig WithSearchDirectory(string directory) return this; } - /// - /// Set whether to download the best-matched native library file automatically if there's no backend or specified file to load. - /// You could add a setting here to customize the behavior of the download. - /// - /// If auto-download is enabled, please call after you have finished setting your configurations. - /// - /// - /// - /// - public NativeLibraryConfig WithAutoDownload(bool enable = true, NativeLibraryDownloadSettings? settings = null) - { - ThrowIfLoaded(); - - _allowAutoDownload = enable; - if (settings is not null) - _downloadSettings = settings; - return this; - } - /// /// Set the policy which decides how to select the desired native libraries and order them by priority. /// By default we use . @@ -177,13 +154,6 @@ internal Description CheckAndGatherDescription() var path = _libraryPath; - // Don't modify and pass the original object to `Description`, create a new one instead. - // Also, we need to set the default local directory if the user does not. - var defaultLocalDir = NativeLibraryDownloadSettings.GetDefaultLocalDir(GetCommitHash(_downloadSettings.Tag)); - var downloadSettings = NativeLibraryDownloadSettings.Create() - .WithEndpoint(_downloadSettings.Endpoint).WithEndpointFallbacks(_downloadSettings.EndpointFallbacks ?? []) - .WithRepoId(_downloadSettings.RepoId).WithToken(_downloadSettings.Token).WithTag(_downloadSettings.Tag) - .WithTimeout(_downloadSettings.Timeout).WithLocalDir(_downloadSettings.LocalDir ?? defaultLocalDir); return new Description( path, @@ -192,9 +162,7 @@ internal Description CheckAndGatherDescription() _avxLevel, _allowFallback, _skipCheck, - _searchDirectories.Concat(new[] { "./" }).ToArray(), - _allowAutoDownload, - downloadSettings + _searchDirectories.Concat(new[] { "./" }).ToArray() ); } @@ -216,8 +184,6 @@ internal static string AvxLevelToString(AvxLevel level) private NativeLibraryConfig(NativeLibraryName nativeLibraryName) { NativeLibraryName = nativeLibraryName; - // This value should be changed when we're going to publish new release. (any better approach?) - _downloadSettings = new NativeLibraryDownloadSettings().WithTag(GetCommitHash("master")); // Automatically detect the highest supported AVX level if (System.Runtime.Intrinsics.X86.Avx.IsSupported) @@ -263,10 +229,8 @@ private static bool CheckAVX512() /// /// /// - /// - /// public record Description(string? Path, NativeLibraryName Library, bool UseCuda, AvxLevel AvxLevel, bool AllowFallback, bool SkipCheck, - string[] SearchDirectories, bool AllowAutoDownload, NativeLibraryDownloadSettings DownloadSettings) + string[] SearchDirectories) { /// public override string ToString() @@ -289,9 +253,7 @@ public override string ToString() $"- PreferredAvxLevel: {avxLevelString}\n" + $"- AllowFallback: {AllowFallback}\n" + $"- SkipCheck: {SkipCheck}\n" + - $"- SearchDirectories and Priorities: {searchDirectoriesString}" + - $"- AllowAutoDownload: {AllowAutoDownload}\n" + - $"- DownloadSettings: {DownloadSettings}\n"; + $"- SearchDirectories and Priorities: {searchDirectoriesString}"; } } } @@ -331,17 +293,10 @@ public sealed partial class NativeLibraryConfig {"master", "f7001c"} }; - internal static string GetCommitHash(string version) - { - if(VersionMap.TryGetValue(version, out var hash)) - { - return hash; - } - else - { - return version; - } - } + /// + /// The current version. + /// + public static readonly string CurrentVersion = "master"; // This should be changed before publishing new version. TODO: any better approach? static NativeLibraryConfig() { @@ -363,11 +318,6 @@ private NativeLibraryConfig(NativeLibraryName nativeLibraryName) /// public bool LibraryHasLoaded { get; internal set; } - /// - /// Whether has been called. - /// - internal bool HasCalledDryRun { get; private set; } = false; - internal NativeLibraryName NativeLibraryName { get; } internal NativeLogConfig.LLamaLogCallback? LogCallback { get; private set; } = null; @@ -375,7 +325,10 @@ private NativeLibraryConfig(NativeLibraryName nativeLibraryName) private void ThrowIfLoaded() { if (LibraryHasLoaded) - throw new InvalidOperationException("NativeLibraryConfig must be configured before using **any** other LLamaSharp methods!"); + throw new InvalidOperationException("The library has already loaded, you can't change the configurations. " + + "Please finish the configuration setting before any call to LLamaSharp native APIs." + + "Please use NativeLibraryConfig.DryRun if you want to see whether it's loaded " + + "successfully but still have chance to modify the configurations."); } /// @@ -416,7 +369,6 @@ public NativeLibraryConfig WithLogCallback(ILogger? logger) public bool DryRun() { LogCallback?.Invoke(LLamaLogLevel.Debug, $"Beginning dry run for {this.NativeLibraryName.GetLibraryName()}..."); - HasCalledDryRun = true; return NativeLibraryUtils.TryLoadLibrary(this) != IntPtr.Zero; } } @@ -424,10 +376,17 @@ public bool DryRun() /// /// A class to set same configurations to multiple libraries at the same time. /// - public sealed partial class NativeLibraryConfigContainer + public sealed class NativeLibraryConfigContainer { private NativeLibraryConfig[] _configs; + /// + /// All the configurations in this container. + /// Please avoid calling this property explicitly, use + /// and instead. + /// + public NativeLibraryConfig[] Configs => _configs; + internal NativeLibraryConfigContainer(params NativeLibraryConfig[] configs) { _configs = configs; @@ -554,24 +513,6 @@ public NativeLibraryConfigContainer WithSearchDirectory(string directory) return this; } - /// - /// Set whether to download the best-matched native library file automatically if there's no backend or specified file to load. - /// You could add a setting here to customize the behavior of the download. - /// - /// If auto-download is enabled, please call after you have finished setting your configurations. - /// - /// - /// - /// - public NativeLibraryConfigContainer WithAutoDownload(bool enable = true, NativeLibraryDownloadSettings? settings = null) - { - foreach (var config in _configs) - { - config.WithAutoDownload(enable, settings); - } - return this; - } - /// /// Set the policy which decides how to select the desired native libraries and order them by priority. /// By default we use . diff --git a/LLama/Native/Load/NativeLibraryDownloadManager.cs b/LLama/Native/Load/NativeLibraryDownloadManager.cs deleted file mode 100644 index b9a136991..000000000 --- a/LLama/Native/Load/NativeLibraryDownloadManager.cs +++ /dev/null @@ -1,285 +0,0 @@ -using System; -using System.Collections.Generic; -using System.IO; -using System.Linq; -using System.Threading.Tasks; -using Huggingface; - -namespace LLama.Native -{ - internal class NativeLibraryDownloadManager - { - /// - /// Download the library file - /// - /// - /// - /// - /// The local path of the file if successful otherwise null. - public static async Task DownloadLibraryFile(NativeLibraryDownloadSettings settings, string remoteFilePath, NativeLogConfig.LLamaLogCallback? logCallback = null) - { - HFGlobalConfig.DefaultDownloadTimeout = settings.Timeout; - - HashSet endpointSet = new([settings.Endpoint]); - if (settings.EndpointFallbacks is not null) - { - foreach (var endpoint in settings.EndpointFallbacks) - { - endpointSet.Add(endpoint); - } - } - var endpoints = endpointSet.ToArray(); - - Dictionary exceptionMap = new(); - foreach(var endpoint in endpoints) - { - logCallback?.Invoke(LLamaLogLevel.Debug, $"Downloading the native library file '{remoteFilePath}' from {endpoint} with repo = {settings.RepoId}, tag = {settings.Tag}"); - var path = await HFDownloader.DownloadFileAsync(settings.RepoId, remoteFilePath, revision: settings.Tag, cacheDir: settings.CacheDir, - localDir: settings.LocalDir, token: settings.Token, endpoint: endpoint); - if (path is not null) - { - logCallback?.Invoke(LLamaLogLevel.Debug, $"Successfully downloaded the native library file to {path}"); - return path; - } - else - { - logCallback?.Invoke(LLamaLogLevel.Warning, "The download failed without an explicit error, please check your configuration or report an issue to LLamaSharp."); - } - //try - //{ - // logCallback?.Invoke(LLamaLogLevel.Debug, $"Downloading the native library file '{remoteFilePath}' from {endpoint} with repo = {settings.RepoId}, tag = {settings.Tag}"); - // var path = await HFDownloader.DownloadFileAsync(settings.RepoId, remoteFilePath, revision: settings.Tag, cacheDir: settings.CacheDir, - // localDir: settings.LocalDir, token: settings.Token, endpoint: endpoint); - // if(path is not null) - // { - // logCallback?.Invoke(LLamaLogLevel.Debug, $"Successfully downloaded the native library file to {path}"); - // return path; - // } - // else - // { - // logCallback?.Invoke(LLamaLogLevel.Warning, "The download failed without an explicit error, please check your configuration or report an issue to LLamaSharp."); - // } - //} - //catch(Exception ex) - //{ - // logCallback?.Invoke(LLamaLogLevel.Warning, $"An exception was thrown when downloading the native library file from {endpoint}: {ex.Message}"); - //} - } - - // means that the download finally fails. - return null; - } - } - - /// - /// Settings for downloading the native library. - /// - public class NativeLibraryDownloadSettings - { - /// - /// The endpoint to download from, by default the official site of HuggingFace. - /// - public string Endpoint { get; private set; } = "https://huggingface.co"; - - /// - /// Endpoints to fallback to if downloading with the main endpoint fails. - /// - /// Generally this is an option for those countries or regions where the main endpoint is blocked. - /// You should not put too many endpoints here, as it will slow down the downloading process. - /// - public string[]? EndpointFallbacks { get; private set; } = null; - - /// - /// The version of the library to download. Please use LLamaSharp version in format `[major].[minor].[patch]` as tag - /// or go to https://huggingface.co/AsakusaRinne/LLamaSharpNative - /// to see all available tags, or use your own repo and tags. - /// - public string Tag { get; private set; } = string.Empty; - - /// - /// The repo id to download the native library files. - /// - public string RepoId { get; private set; } = "AsakusaRinne/LLamaSharpNative"; - - /// - /// The directory to cache the downloaded files. If you only want to make the downloaded files appear in a directory, - /// regardless of whether the file will have a copy in another place, please set instead. - /// - public string CacheDir { get; private set; } - - /// - /// If provided, the downloaded file will be placed under this directory, - /// either as a symlink (default) or a regular file. - /// - public string? LocalDir { get; private set; } = null; - - /// - /// If you are using your own private repo as remote source, you could set the token to get the access. - /// - public string? Token { get; private set; } = null; - - /// - /// The timeout (second) of the native library file download. - /// - public int Timeout { get; private set; } = 10; - - /// - /// Extra search directories. They will only be used when finding files downloaded from remote. - /// Generally it will be useful when you wnat to replace the downloading process with your custom implementation. - /// If you are not sure how it works, please leave it empty. - /// - public string[]? ExtraSearchDirectories { get;private set; } = null; - - internal NativeLibraryDownloadSettings() - { - var home = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.UserProfile), ".cache"); - CacheDir = Path.Combine(home, "llama_sharp"); - } - - internal static string GetDefaultLocalDir(string tag) - { - var home = Environment.GetFolderPath(Environment.SpecialFolder.UserProfile); - return Path.Combine(home, ".llama_sharp", tag); - } - - /// - /// Create a with default settings. - /// - /// - public static NativeLibraryDownloadSettings Create() - { - return new NativeLibraryDownloadSettings(); - } - - /// - /// Set the default endpoint to download file from. - /// - /// - /// - public NativeLibraryDownloadSettings WithEndpoint(string endpoint) - { - Endpoint = endpoint; - return this; - } - - /// - /// Set the endpoints to try when the download fails with the default endpoint. - /// - /// - /// - public NativeLibraryDownloadSettings WithEndpointFallbacks(params string[] endpoints) - { - EndpointFallbacks = endpoints; - return this; - } - - /// - /// Set the - /// - /// - /// - public NativeLibraryDownloadSettings WithTag(string tag) - { - Tag = tag; - return this; - } - - /// - /// Set the - /// - /// - /// - public NativeLibraryDownloadSettings WithRepoId(string repoId) - { - RepoId = repoId; - return this; - } - - /// - /// Set the . If you only want to make the downloaded files appear in a directory, - /// regardless of whether the file may have a copy in another place, please use instead. - /// - /// - /// - public NativeLibraryDownloadSettings WithCacheDir(string cacheDir) - { - CacheDir = cacheDir; - return this; - } - - /// - /// Set the - /// - /// - /// - public NativeLibraryDownloadSettings WithLocalDir(string localDir) - { - LocalDir = localDir; - return this; - } - - /// - /// Set the - /// - /// - /// - public NativeLibraryDownloadSettings WithToken(string token) - { - Token = token; - return this; - } - - /// - /// Set the - /// - /// - /// - public NativeLibraryDownloadSettings WithTimeout(int timeout) - { - Timeout = timeout; - return this; - } - - /// - /// Set . They will only be used when finding files downloaded from remote. - /// Generally it will be useful when you wnat to replace the downloading process with your custom implementation. - /// If you are not sure how it works, please ignore this method. - /// - /// - /// - public NativeLibraryDownloadSettings WithExtraSearchDirectories(string[] directories) - { - ExtraSearchDirectories = directories; - return this; - } - - /// - public override string ToString() - { - // Token should be hidden when printing it. - string hiddenToken = ""; - if(Token is not null) - { - if(Token.Length <= 10) - { - hiddenToken = new string('*', Token.Length - 1) + Token.Last(); - } - else - { - hiddenToken += Token.Substring(0, 2); - hiddenToken += new string('*', Token.Length - 3); - hiddenToken += Token.Last(); - } - } - - return $"(Endpoint = {Endpoint}, " + - $"EndpointFallbacks = {string.Join(", ", EndpointFallbacks ?? new string[0])}, " + - $"Tag = {Tag}, " + - $"RepoId = {RepoId}, " + - $"CacheDir = {CacheDir}, " + - $"LocalDir = {LocalDir}, " + - $"Token = {hiddenToken}, " + - $"Timeout = {Timeout}s)"; - } - } -} diff --git a/LLama/Native/Load/NativeLibraryFromPath.cs b/LLama/Native/Load/NativeLibraryFromPath.cs index 45c7e48ec..c3e62a223 100644 --- a/LLama/Native/Load/NativeLibraryFromPath.cs +++ b/LLama/Native/Load/NativeLibraryFromPath.cs @@ -23,9 +23,9 @@ public NativeLibraryFromPath(string path) } /// - public IEnumerable Prepare(SystemInfo systemInfo, bool fromRemote, NativeLogConfig.LLamaLogCallback? logCallback) + public IEnumerable Prepare(SystemInfo systemInfo, NativeLogConfig.LLamaLogCallback? logCallback) { - return fromRemote? [] : [_path]; + return [_path]; } } } diff --git a/LLama/Native/Load/NativeLibraryUtils.cs b/LLama/Native/Load/NativeLibraryUtils.cs index 513f6ccc2..e3ca06e14 100644 --- a/LLama/Native/Load/NativeLibraryUtils.cs +++ b/LLama/Native/Load/NativeLibraryUtils.cs @@ -33,12 +33,12 @@ internal static IntPtr TryLoadLibrary(NativeLibraryConfig config) Log(description.ToString(), LLamaLogLevel.Info, config.LogCallback); // Get the libraries ordered by priority from the selecting policy. - var libraries = config.SelectingPolicy.Select(description, systemInfo, config.LogCallback); + var libraries = config.SelectingPolicy.Apply(description, systemInfo, config.LogCallback); foreach (var library in libraries) { // Prepare the local library file and get the path. - var paths = library.Prepare(systemInfo, false, config.LogCallback); + var paths = library.Prepare(systemInfo, config.LogCallback); foreach (var path in paths) { Log($"Got relative library path '{path}' from local with {library.Metadata}, trying to load it...", LLamaLogLevel.Debug, config.LogCallback); @@ -49,33 +49,6 @@ internal static IntPtr TryLoadLibrary(NativeLibraryConfig config) return result; } } - // If we failed but auto-download is allowed, try to prepare the file from remote. - if (description.AllowAutoDownload) - { - paths = library.Prepare(systemInfo, true, config.LogCallback); - if (description.DownloadSettings.LocalDir is null) - { - // Null local directory is not expected here (it will make things more complex if we want to handle it). - // It should always be set when gathering the description. - throw new RuntimeError("Auto-download is enabled for native library but the `LocalDir` is null. " + - "It's an unexpected behavior and please report an issue to LLamaSharp."); - } - // When using auto-download, this should be the only search directory. - List searchDirectoriesForDownload = [description.DownloadSettings.LocalDir]; - // unless extra search paths are added by the user. - searchDirectoriesForDownload.AddRange(description.DownloadSettings.ExtraSearchDirectories ?? []); - - foreach (var path in paths) - { - Log($"Got relative library path '{path}' from remote with {library.Metadata}, trying to load it...", LLamaLogLevel.Debug, config.LogCallback); - - var result = TryLoad(path, searchDirectoriesForDownload, config.LogCallback); - if (result != IntPtr.Zero) - { - return result; - } - } - } } // If fallback is allowed, we will make the last try (the default system loading) when calling the native api. diff --git a/LLama/Native/Load/NativeLibraryWithAvx.cs b/LLama/Native/Load/NativeLibraryWithAvx.cs index 5b1f65142..7b5421b4d 100644 --- a/LLama/Native/Load/NativeLibraryWithAvx.cs +++ b/LLama/Native/Load/NativeLibraryWithAvx.cs @@ -13,7 +13,6 @@ public class NativeLibraryWithAvx : INativeLibrary private NativeLibraryName _libraryName; private AvxLevel _avxLevel; private bool _skipCheck; - private NativeLibraryDownloadSettings _downloadSettings; /// public NativeLibraryMetadata? Metadata @@ -30,45 +29,33 @@ public NativeLibraryMetadata? Metadata /// /// /// - /// - public NativeLibraryWithAvx(NativeLibraryName libraryName, AvxLevel avxLevel, bool skipCheck, NativeLibraryDownloadSettings downloadSettings) + public NativeLibraryWithAvx(NativeLibraryName libraryName, AvxLevel avxLevel, bool skipCheck) { _libraryName = libraryName; _avxLevel = avxLevel; _skipCheck = skipCheck; - _downloadSettings = downloadSettings; } /// - public IEnumerable Prepare(SystemInfo systemInfo, bool fromRemote, NativeLogConfig.LLamaLogCallback? logCallback) + public IEnumerable Prepare(SystemInfo systemInfo, NativeLogConfig.LLamaLogCallback? logCallback) { if (systemInfo.OSPlatform != OSPlatform.Windows && systemInfo.OSPlatform != OSPlatform.Linux && !_skipCheck) { // Not supported on systems other than Windows and Linux. return []; } - var path = GetAvxPath(systemInfo, _avxLevel, fromRemote, logCallback); + var path = GetAvxPath(systemInfo, _avxLevel, logCallback); return path is null ? [] : [path]; } - private string? GetAvxPath(SystemInfo systemInfo, AvxLevel avxLevel, bool fromRemote, NativeLogConfig.LLamaLogCallback? logCallback) + private string? GetAvxPath(SystemInfo systemInfo, AvxLevel avxLevel, NativeLogConfig.LLamaLogCallback? logCallback) { NativeLibraryUtils.GetPlatformPathParts(systemInfo.OSPlatform, out var os, out var fileExtension, out var libPrefix); var avxStr = NativeLibraryConfig.AvxLevelToString(avxLevel); if (!string.IsNullOrEmpty(avxStr)) avxStr += "/"; var relativePath = $"runtimes/{os}/native/{avxStr}{libPrefix}{_libraryName.GetLibraryName()}{fileExtension}"; - - if (fromRemote) - { - // Download and return the local path. - // We make it sychronize because we c'd better not use async method when loading library later. - return NativeLibraryDownloadManager.DownloadLibraryFile(_downloadSettings, relativePath, logCallback).Result; - } - else - { - return relativePath; - } + return relativePath; } } #endif diff --git a/LLama/Native/Load/NativeLibraryWithCuda.cs b/LLama/Native/Load/NativeLibraryWithCuda.cs index 1e2840d5e..d3b06b864 100644 --- a/LLama/Native/Load/NativeLibraryWithCuda.cs +++ b/LLama/Native/Load/NativeLibraryWithCuda.cs @@ -14,7 +14,6 @@ public class NativeLibraryWithCuda : INativeLibrary private NativeLibraryName _libraryName; private AvxLevel _avxLevel; private bool _skipCheck; - private NativeLibraryDownloadSettings _downloadSettings; /// public NativeLibraryMetadata? Metadata @@ -31,17 +30,15 @@ public NativeLibraryMetadata? Metadata /// /// /// - /// - public NativeLibraryWithCuda(int majorCudaVersion, NativeLibraryName libraryName, bool skipCheck, NativeLibraryDownloadSettings downloadSettings) + public NativeLibraryWithCuda(int majorCudaVersion, NativeLibraryName libraryName, bool skipCheck) { _majorCudaVersion = majorCudaVersion; _libraryName = libraryName; _skipCheck = skipCheck; - _downloadSettings = downloadSettings; } /// - public IEnumerable Prepare(SystemInfo systemInfo, bool fromRemote, NativeLogConfig.LLamaLogCallback? logCallback) + public IEnumerable Prepare(SystemInfo systemInfo, NativeLogConfig.LLamaLogCallback? logCallback) { // TODO: Avx level is ignored now, needs to be implemented in the future. if (systemInfo.OSPlatform == OSPlatform.Windows || systemInfo.OSPlatform == OSPlatform.Linux || _skipCheck) @@ -49,12 +46,12 @@ public IEnumerable Prepare(SystemInfo systemInfo, bool fromRemote, Nativ if (_majorCudaVersion == -1 && _skipCheck) { // Currently only 11 and 12 are supported. - var cuda12LibraryPath = GetCudaPath(systemInfo, 12, fromRemote, logCallback); + var cuda12LibraryPath = GetCudaPath(systemInfo, 12, logCallback); if (cuda12LibraryPath is not null) { yield return cuda12LibraryPath; } - var cuda11LibraryPath = GetCudaPath(systemInfo, 11, fromRemote, logCallback); + var cuda11LibraryPath = GetCudaPath(systemInfo, 11, logCallback); if (cuda11LibraryPath is not null) { yield return cuda11LibraryPath; @@ -62,7 +59,7 @@ public IEnumerable Prepare(SystemInfo systemInfo, bool fromRemote, Nativ } else if (_majorCudaVersion != -1) { - var cudaLibraryPath = GetCudaPath(systemInfo, _majorCudaVersion, fromRemote, logCallback); + var cudaLibraryPath = GetCudaPath(systemInfo, _majorCudaVersion, logCallback); if (cudaLibraryPath is not null) { yield return cudaLibraryPath; @@ -71,20 +68,11 @@ public IEnumerable Prepare(SystemInfo systemInfo, bool fromRemote, Nativ } } - private string? GetCudaPath(SystemInfo systemInfo, int cudaVersion, bool remote, NativeLogConfig.LLamaLogCallback? logCallback) + private string? GetCudaPath(SystemInfo systemInfo, int cudaVersion, NativeLogConfig.LLamaLogCallback? logCallback) { NativeLibraryUtils.GetPlatformPathParts(systemInfo.OSPlatform, out var os, out var fileExtension, out var libPrefix); var relativePath = $"runtimes/{os}/native/cuda{cudaVersion}/{libPrefix}{_libraryName.GetLibraryName()}{fileExtension}"; - if (remote) - { - // Download and return the local path. - // We make it sychronize because we c'd better not use async method when loading library later. - return NativeLibraryDownloadManager.DownloadLibraryFile(_downloadSettings, relativePath, logCallback).Result; - } - else - { - return relativePath; - } + return relativePath; } } #endif diff --git a/LLama/Native/Load/NativeLibraryWithCpuOrMac.cs b/LLama/Native/Load/NativeLibraryWithMacOrFallback.cs similarity index 63% rename from LLama/Native/Load/NativeLibraryWithCpuOrMac.cs rename to LLama/Native/Load/NativeLibraryWithMacOrFallback.cs index 3bc09b13e..5df339307 100644 --- a/LLama/Native/Load/NativeLibraryWithCpuOrMac.cs +++ b/LLama/Native/Load/NativeLibraryWithMacOrFallback.cs @@ -8,12 +8,10 @@ namespace LLama.Native /// /// A native library compiled on Mac, or fallbacks from all other libraries in the selection. /// - public class NativeLibraryWithCpuOrMac - : INativeLibrary + public class NativeLibraryWithMacOrFallback : INativeLibrary { private NativeLibraryName _libraryName; private bool _skipCheck; - private NativeLibraryDownloadSettings _downloadSettings; /// public NativeLibraryMetadata? Metadata @@ -29,22 +27,20 @@ public NativeLibraryMetadata? Metadata /// /// /// - /// - public NativeLibraryWithCpuOrMac(NativeLibraryName libraryName, bool skipCheck, NativeLibraryDownloadSettings downloadSettings) + public NativeLibraryWithMacOrFallback(NativeLibraryName libraryName, bool skipCheck) { _libraryName = libraryName; _skipCheck = skipCheck; - _downloadSettings = downloadSettings; } /// - public IEnumerable Prepare(SystemInfo systemInfo, bool fromRemote, NativeLogConfig.LLamaLogCallback? logCallback) + public IEnumerable Prepare(SystemInfo systemInfo, NativeLogConfig.LLamaLogCallback? logCallback) { - var path = GetPath(systemInfo, AvxLevel.None, fromRemote, logCallback); + var path = GetPath(systemInfo, AvxLevel.None, logCallback); return path is null ?[] : [path]; } - private string? GetPath(SystemInfo systemInfo, AvxLevel avxLevel, bool fromRemote, NativeLogConfig.LLamaLogCallback? logCallback) + private string? GetPath(SystemInfo systemInfo, AvxLevel avxLevel, NativeLogConfig.LLamaLogCallback? logCallback) { NativeLibraryUtils.GetPlatformPathParts(systemInfo.OSPlatform, out var os, out var fileExtension, out var libPrefix); string relativePath; @@ -61,16 +57,7 @@ public IEnumerable Prepare(SystemInfo systemInfo, bool fromRemote, Nativ relativePath = $"runtimes/{os}/native/{avxStr}{libPrefix}{_libraryName.GetLibraryName()}{fileExtension}"; } - if (fromRemote) - { - // Download and return the local path. - // We make it sychronize because we c'd better not use async method when loading library later. - return NativeLibraryDownloadManager.DownloadLibraryFile(_downloadSettings, relativePath, logCallback).Result; - } - else - { - return relativePath; - } + return relativePath; } } #endif diff --git a/LLama/Native/NativeApi.Load.cs b/LLama/Native/NativeApi.Load.cs index b3c9a6ad2..277555e7b 100644 --- a/LLama/Native/NativeApi.Load.cs +++ b/LLama/Native/NativeApi.Load.cs @@ -64,12 +64,6 @@ private static void SetDllImportResolver() if (_loadedLlamaHandle != IntPtr.Zero) return _loadedLlamaHandle; - // We don't allow downloading in static constructor to avoid potentially uncertain behaviors now. - if (NativeLibraryConfig.LLama.AllowAutoDownload && !NativeLibraryConfig.LLama.HasCalledDryRun) - { - throw new RuntimeError("When using auto-download, please call NativeLibraryConfig.DryRun() right after finishing you configurations."); - } - // Try to load a preferred library, based on CPU feature detection _loadedLlamaHandle = NativeLibraryUtils.TryLoadLibrary(NativeLibraryConfig.LLama); return _loadedLlamaHandle; @@ -81,12 +75,6 @@ private static void SetDllImportResolver() if (_loadedLlavaSharedHandle != IntPtr.Zero) return _loadedLlavaSharedHandle; - // We don't allow downloading in static constructor to avoid potentially uncertain behaviors now. - if (NativeLibraryConfig.LLavaShared.AllowAutoDownload && !NativeLibraryConfig.LLavaShared.HasCalledDryRun) - { - throw new RuntimeError("When using auto-download, please call NativeLibraryConfig.DryRun() right after finishing you configurations."); - } - // Try to load a preferred library, based on CPU feature detection _loadedLlavaSharedHandle = NativeLibraryUtils.TryLoadLibrary(NativeLibraryConfig.LLavaShared); return _loadedLlavaSharedHandle; From f0e2a3dc457643ab33c95866302e1e647b7f5df6 Mon Sep 17 00:00:00 2001 From: Rinne Date: Fri, 26 Apr 2024 09:18:52 +0800 Subject: [PATCH 03/11] Update LLama/Native/Load/NativeLibraryFromPath.cs Co-authored-by: Martin Evans --- LLama/Native/Load/NativeLibraryFromPath.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LLama/Native/Load/NativeLibraryFromPath.cs b/LLama/Native/Load/NativeLibraryFromPath.cs index c3e62a223..8cd99c308 100644 --- a/LLama/Native/Load/NativeLibraryFromPath.cs +++ b/LLama/Native/Load/NativeLibraryFromPath.cs @@ -4,7 +4,7 @@ namespace LLama.Native { /// - /// A native library specified with a lcoal file path. + /// A native library specified with a local file path. /// public class NativeLibraryFromPath: INativeLibrary { From 31ff3636305f15a360cc2a8b96d7a570864bc61d Mon Sep 17 00:00:00 2001 From: Rinne Date: Sat, 27 Apr 2024 02:23:36 +0800 Subject: [PATCH 04/11] fix: resolve comments. --- LLama.Examples/Program.cs | 4 +- LLama/Native/Load/NativeLibraryConfig.cs | 59 ++++++++++++++--------- LLama/Native/Load/NativeLibraryUtils.cs | 9 +++- LLama/Native/Load/UnknownNativeLibrary.cs | 25 ++++++++++ LLama/Native/NativeApi.Load.cs | 4 +- 5 files changed, 72 insertions(+), 29 deletions(-) create mode 100644 LLama/Native/Load/UnknownNativeLibrary.cs diff --git a/LLama.Examples/Program.cs b/LLama.Examples/Program.cs index 53edfb3be..63114120d 100644 --- a/LLama.Examples/Program.cs +++ b/LLama.Examples/Program.cs @@ -18,7 +18,7 @@ __ __ ____ __ """); // Configure logging. Change this to `true` to see log messages from llama.cpp -var showLLamaCppLogs = true; +var showLLamaCppLogs = false; NativeLibraryConfig .All .WithLogCallback((level, message) => @@ -32,7 +32,7 @@ __ __ ____ __ .All .WithCuda() //.WithAutoDownload() // An experimental feature - .DryRun(); + .DryRun(out var loadedllamaLibrary, out var loadedLLavaLibrary); // Calling this method forces loading to occur now. NativeApi.llama_empty_call(); diff --git a/LLama/Native/Load/NativeLibraryConfig.cs b/LLama/Native/Load/NativeLibraryConfig.cs index 69ae7745a..3d9fe69ac 100644 --- a/LLama/Native/Load/NativeLibraryConfig.cs +++ b/LLama/Native/Load/NativeLibraryConfig.cs @@ -265,7 +265,7 @@ public sealed partial class NativeLibraryConfig /// Set configurations for all the native libraries, including LLama and LLava /// [Obsolete("Please use NativeLibraryConfig.All instead, or set configurations for NativeLibraryConfig.LLama and NativeLibraryConfig.LLavaShared respectively.")] - public static NativeLibraryConfigContainer Instance { get; } + public static NativeLibraryConfigContainer Instance => All; /// /// Set configurations for all the native libraries, including LLama and LLava @@ -282,28 +282,26 @@ public sealed partial class NativeLibraryConfig /// public static NativeLibraryConfig LLavaShared { get; } + /// - /// A dictionary mapping from version to corresponding llama.cpp commit hash. - /// The version should be formatted int `[major].[minor].[patch]`. But there's an exceptance that you can - /// use `master` as a version to get the llama.cpp commit hash from the master branch. + /// The current version. /// - public static Dictionary VersionMap { get; } = new Dictionary() - // This value should be changed when we're going to publish new release. (any better approach?) - { - {"master", "f7001c"} - }; + public static string CurrentVersion => VERSION; // This should be changed before publishing new version. TODO: any better approach? + + private const string COMMIT_HASH = "f7001c"; + private const string VERSION = "master"; /// - /// The current version. + /// Get the llama.cpp commit hash of the current version. /// - public static readonly string CurrentVersion = "master"; // This should be changed before publishing new version. TODO: any better approach? + /// + public static string GetNativeLibraryCommitHash() => COMMIT_HASH; static NativeLibraryConfig() { LLama = new(NativeLibraryName.Llama); LLavaShared = new(NativeLibraryName.LlavaShared); All = new(LLama, LLavaShared); - Instance = All; } #if NETSTANDARD2_0 @@ -365,11 +363,15 @@ public NativeLibraryConfig WithLogCallback(ILogger? logger) /// /// You can still modify the configuration after this calling but only before any call from . /// + /// + /// The loaded livrary. When the loading failed, this will be null. + /// However if you are using .NET standard2.0, this will never return null. + /// /// Whether the running is successful. - public bool DryRun() + public bool DryRun(out INativeLibrary? loadedLibrary) { LogCallback?.Invoke(LLamaLogLevel.Debug, $"Beginning dry run for {this.NativeLibraryName.GetLibraryName()}..."); - return NativeLibraryUtils.TryLoadLibrary(this) != IntPtr.Zero; + return NativeLibraryUtils.TryLoadLibrary(this, out loadedLibrary) != IntPtr.Zero; } } @@ -380,13 +382,6 @@ public sealed class NativeLibraryConfigContainer { private NativeLibraryConfig[] _configs; - /// - /// All the configurations in this container. - /// Please avoid calling this property explicitly, use - /// and instead. - /// - public NativeLibraryConfig[] Configs => _configs; - internal NativeLibraryConfigContainer(params NativeLibraryConfig[] configs) { _configs = configs; @@ -566,9 +561,27 @@ public NativeLibraryConfigContainer WithLogCallback(ILogger? logger) /// You can still modify the configuration after this calling but only before any call from . /// /// Whether the running is successful. - public bool DryRun() + public bool DryRun(out INativeLibrary? loadedLLamaNativeLibrary, out INativeLibrary? loadedLLavaNativeLibrary) { - return _configs.All(config => config.DryRun()); + bool success = true; + foreach(var config in _configs) + { + success &= config.DryRun(out var loadedLibrary); + if(config.NativeLibraryName == NativeLibraryName.Llama) + { + loadedLLamaNativeLibrary = loadedLibrary; + } + else if(config.NativeLibraryName == NativeLibraryName.LlavaShared) + { + loadedLLavaNativeLibrary = loadedLibrary; + } + else + { + throw new Exception("Unknown native library config during the dry run."); + } + } + loadedLLamaNativeLibrary = loadedLLavaNativeLibrary = null; + return success; } } diff --git a/LLama/Native/Load/NativeLibraryUtils.cs b/LLama/Native/Load/NativeLibraryUtils.cs index e3ca06e14..9dd7c8af1 100644 --- a/LLama/Native/Load/NativeLibraryUtils.cs +++ b/LLama/Native/Load/NativeLibraryUtils.cs @@ -1,4 +1,5 @@ -using LLama.Exceptions; +using LLama.Abstractions; +using LLama.Exceptions; using System; using System.Collections.Generic; using System.IO; @@ -12,7 +13,7 @@ internal static class NativeLibraryUtils /// Try to load libllama/llava_shared, using CPU feature detection to try and load a more specialised DLL if possible /// /// The library handle to unload later, or IntPtr.Zero if no library was loaded - internal static IntPtr TryLoadLibrary(NativeLibraryConfig config) + internal static IntPtr TryLoadLibrary(NativeLibraryConfig config, out INativeLibrary? loadedLibrary) { #if NET6_0_OR_GREATER var description = config.CheckAndGatherDescription(); @@ -46,6 +47,7 @@ internal static IntPtr TryLoadLibrary(NativeLibraryConfig config) var result = TryLoad(path, description.SearchDirectories, config.LogCallback); if (result != IntPtr.Zero) { + loadedLibrary = library; return result; } } @@ -57,6 +59,9 @@ internal static IntPtr TryLoadLibrary(NativeLibraryConfig config) { throw new RuntimeError("Failed to load the native library. Please check the log for more information."); } + loadedLibrary = null; +#else + loadedLibrary = new UnknownNativeLibrary(); #endif Log($"No library was loaded before calling native apis. " + diff --git a/LLama/Native/Load/UnknownNativeLibrary.cs b/LLama/Native/Load/UnknownNativeLibrary.cs new file mode 100644 index 000000000..823e77a38 --- /dev/null +++ b/LLama/Native/Load/UnknownNativeLibrary.cs @@ -0,0 +1,25 @@ +using LLama.Abstractions; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace LLama.Native +{ + /// + /// When you are using .NET standard2.0, dynamic native library loading is not supported. + /// This class will be returned in . + /// + public class UnknownNativeLibrary: INativeLibrary + { + /// + public NativeLibraryMetadata? Metadata => null; + + /// + public IEnumerable Prepare(SystemInfo systemInfo, NativeLogConfig.LLamaLogCallback? logCallback = null) + { + throw new NotImplementedException("This class is only a placeholder and should not be used to load native library."); + } + } +} diff --git a/LLama/Native/NativeApi.Load.cs b/LLama/Native/NativeApi.Load.cs index 277555e7b..3c3edcb03 100644 --- a/LLama/Native/NativeApi.Load.cs +++ b/LLama/Native/NativeApi.Load.cs @@ -65,7 +65,7 @@ private static void SetDllImportResolver() return _loadedLlamaHandle; // Try to load a preferred library, based on CPU feature detection - _loadedLlamaHandle = NativeLibraryUtils.TryLoadLibrary(NativeLibraryConfig.LLama); + _loadedLlamaHandle = NativeLibraryUtils.TryLoadLibrary(NativeLibraryConfig.LLama, out var _); return _loadedLlamaHandle; } @@ -76,7 +76,7 @@ private static void SetDllImportResolver() return _loadedLlavaSharedHandle; // Try to load a preferred library, based on CPU feature detection - _loadedLlavaSharedHandle = NativeLibraryUtils.TryLoadLibrary(NativeLibraryConfig.LLavaShared); + _loadedLlavaSharedHandle = NativeLibraryUtils.TryLoadLibrary(NativeLibraryConfig.LLavaShared, out var _); return _loadedLlavaSharedHandle; } From 2c19b8b8311d7a0ad158eb2d4c33d74282b22591 Mon Sep 17 00:00:00 2001 From: Rinne Date: Sat, 27 Apr 2024 03:13:24 +0800 Subject: [PATCH 05/11] rename the llava library name. --- LLama/Native/Load/NativeLibraryConfig.cs | 6 +++--- LLama/Native/NativeApi.Load.cs | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/LLama/Native/Load/NativeLibraryConfig.cs b/LLama/Native/Load/NativeLibraryConfig.cs index 3d9fe69ac..720ad772f 100644 --- a/LLama/Native/Load/NativeLibraryConfig.cs +++ b/LLama/Native/Load/NativeLibraryConfig.cs @@ -280,7 +280,7 @@ public sealed partial class NativeLibraryConfig /// /// Configuration for LLava native library /// - public static NativeLibraryConfig LLavaShared { get; } + public static NativeLibraryConfig LLava { get; } /// @@ -300,8 +300,8 @@ public sealed partial class NativeLibraryConfig static NativeLibraryConfig() { LLama = new(NativeLibraryName.Llama); - LLavaShared = new(NativeLibraryName.LlavaShared); - All = new(LLama, LLavaShared); + LLava = new(NativeLibraryName.LlavaShared); + All = new(LLama, LLava); } #if NETSTANDARD2_0 diff --git a/LLama/Native/NativeApi.Load.cs b/LLama/Native/NativeApi.Load.cs index 3c3edcb03..927f63e82 100644 --- a/LLama/Native/NativeApi.Load.cs +++ b/LLama/Native/NativeApi.Load.cs @@ -19,7 +19,7 @@ static NativeApi() // Set flag to indicate that this point has been passed. No native library config can be done after this point. NativeLibraryConfig.LLama.LibraryHasLoaded = true; - NativeLibraryConfig.LLavaShared.LibraryHasLoaded = true; + NativeLibraryConfig.LLava.LibraryHasLoaded = true; // Immediately make a call which requires loading the llama DLL. This method call // can't fail unless the DLL hasn't been loaded. @@ -76,7 +76,7 @@ private static void SetDllImportResolver() return _loadedLlavaSharedHandle; // Try to load a preferred library, based on CPU feature detection - _loadedLlavaSharedHandle = NativeLibraryUtils.TryLoadLibrary(NativeLibraryConfig.LLavaShared, out var _); + _loadedLlavaSharedHandle = NativeLibraryUtils.TryLoadLibrary(NativeLibraryConfig.LLava, out var _); return _loadedLlavaSharedHandle; } From a86f14d1fb6c215bd92094181664c39c8f8c1f20 Mon Sep 17 00:00:00 2001 From: Rinne Date: Thu, 2 May 2024 07:44:25 +0800 Subject: [PATCH 06/11] Add an API to get the loaded native library. --- LLama/Native/Load/NativeLibraryConfig.cs | 20 +++++++++--------- LLama/Native/NativeApi.Load.cs | 27 ++++++++++++++++++++---- 2 files changed, 33 insertions(+), 14 deletions(-) diff --git a/LLama/Native/Load/NativeLibraryConfig.cs b/LLama/Native/Load/NativeLibraryConfig.cs index 720ad772f..4d158ebff 100644 --- a/LLama/Native/Load/NativeLibraryConfig.cs +++ b/LLama/Native/Load/NativeLibraryConfig.cs @@ -299,8 +299,8 @@ public sealed partial class NativeLibraryConfig static NativeLibraryConfig() { - LLama = new(NativeLibraryName.Llama); - LLava = new(NativeLibraryName.LlavaShared); + LLama = new(NativeLibraryName.LLama); + LLava = new(NativeLibraryName.LLava); All = new(LLama, LLava); } @@ -401,11 +401,11 @@ public NativeLibraryConfigContainer WithLibrary(string? llamaPath, string? llava { foreach(var config in _configs) { - if(config.NativeLibraryName == NativeLibraryName.Llama && llamaPath is not null) + if(config.NativeLibraryName == NativeLibraryName.LLama && llamaPath is not null) { config.WithLibrary(llamaPath); } - if(config.NativeLibraryName == NativeLibraryName.LlavaShared && llavaPath is not null) + if(config.NativeLibraryName == NativeLibraryName.LLava && llavaPath is not null) { config.WithLibrary(llavaPath); } @@ -567,11 +567,11 @@ public bool DryRun(out INativeLibrary? loadedLLamaNativeLibrary, out INativeLibr foreach(var config in _configs) { success &= config.DryRun(out var loadedLibrary); - if(config.NativeLibraryName == NativeLibraryName.Llama) + if(config.NativeLibraryName == NativeLibraryName.LLama) { loadedLLamaNativeLibrary = loadedLibrary; } - else if(config.NativeLibraryName == NativeLibraryName.LlavaShared) + else if(config.NativeLibraryName == NativeLibraryName.LLava) { loadedLLavaNativeLibrary = loadedLibrary; } @@ -593,11 +593,11 @@ public enum NativeLibraryName /// /// The native library compiled from llama.cpp. /// - Llama, + LLama, /// /// The native library compiled from the LLaVA example of llama.cpp. /// - LlavaShared + LLava } internal static class LibraryNameExtensions @@ -606,9 +606,9 @@ public static string GetLibraryName(this NativeLibraryName name) { switch (name) { - case NativeLibraryName.Llama: + case NativeLibraryName.LLama: return NativeApi.libraryName; - case NativeLibraryName.LlavaShared: + case NativeLibraryName.LLava: return NativeApi.llavaLibraryName; default: throw new ArgumentOutOfRangeException(nameof(name), name, null); diff --git a/LLama/Native/NativeApi.Load.cs b/LLama/Native/NativeApi.Load.cs index 927f63e82..cdf4c8e97 100644 --- a/LLama/Native/NativeApi.Load.cs +++ b/LLama/Native/NativeApi.Load.cs @@ -4,6 +4,7 @@ using System.Runtime.InteropServices; using System.Text.Json; using System.Collections.Generic; +using LLama.Abstractions; namespace LLama.Native { @@ -65,7 +66,7 @@ private static void SetDllImportResolver() return _loadedLlamaHandle; // Try to load a preferred library, based on CPU feature detection - _loadedLlamaHandle = NativeLibraryUtils.TryLoadLibrary(NativeLibraryConfig.LLama, out var _); + _loadedLlamaHandle = NativeLibraryUtils.TryLoadLibrary(NativeLibraryConfig.LLama, out _loadedLLamaLibrary); return _loadedLlamaHandle; } @@ -76,7 +77,7 @@ private static void SetDllImportResolver() return _loadedLlavaSharedHandle; // Try to load a preferred library, based on CPU feature detection - _loadedLlavaSharedHandle = NativeLibraryUtils.TryLoadLibrary(NativeLibraryConfig.LLava, out var _); + _loadedLlavaSharedHandle = NativeLibraryUtils.TryLoadLibrary(NativeLibraryConfig.LLava, out _loadedLLavaLibrary); return _loadedLlavaSharedHandle; } @@ -86,8 +87,26 @@ private static void SetDllImportResolver() #endif } + /// + /// Get the loaded native library. If you are using netstandard2.0, it will always return null. + /// + /// + /// + /// + public static INativeLibrary? GetLoadedNativeLibrary(NativeLibraryName name) + { + return name switch + { + NativeLibraryName.LLama => _loadedLLamaLibrary, + NativeLibraryName.LLava => _loadedLLavaLibrary, + _ => throw new ArgumentException($"Library name {name} is not found.") + }; + } + internal const string libraryName = "llama"; - internal const string llavaLibraryName = "llava_shared"; - private const string cudaVersionFile = "version.json"; + internal const string llavaLibraryName = "llava_shared"; + + private static INativeLibrary? _loadedLLamaLibrary = null; + private static INativeLibrary? _loadedLLavaLibrary = null; } } From 9d977b6ee322ef6e75a360a606495045e3d710f1 Mon Sep 17 00:00:00 2001 From: Rinne Date: Fri, 3 May 2024 09:13:17 +0800 Subject: [PATCH 07/11] Update LLama/Native/Load/UnknownNativeLibrary.cs Co-authored-by: Martin Evans --- LLama/Native/Load/UnknownNativeLibrary.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LLama/Native/Load/UnknownNativeLibrary.cs b/LLama/Native/Load/UnknownNativeLibrary.cs index 823e77a38..fa29ac0d4 100644 --- a/LLama/Native/Load/UnknownNativeLibrary.cs +++ b/LLama/Native/Load/UnknownNativeLibrary.cs @@ -19,7 +19,7 @@ public class UnknownNativeLibrary: INativeLibrary /// public IEnumerable Prepare(SystemInfo systemInfo, NativeLogConfig.LLamaLogCallback? logCallback = null) { - throw new NotImplementedException("This class is only a placeholder and should not be used to load native library."); + throw new NotSupportedException("This class is only a placeholder and should not be used to load native library."); } } } From eab674995348a59d172fbb01963a420de9e8be54 Mon Sep 17 00:00:00 2001 From: Rinne Date: Tue, 14 May 2024 04:57:46 +0800 Subject: [PATCH 08/11] feat: support native library dynamic loading in .NET standard2.0. --- .../INativeLibrarySelectingPolicy.cs | 4 +- LLama/LLamaSharp.csproj | 1 + LLama/Native/LLamaContextParams.cs | 21 +- LLama/Native/LLamaKvCacheView.cs | 47 ++ LLama/Native/LLamaModelQuantizeParams.cs | 21 +- .../DefaultNativeLibrarySelectingPolicy.cs | 4 +- LLama/Native/Load/NativeLibraryConfig.cs | 57 +- LLama/Native/Load/NativeLibraryUtils.cs | 68 +- LLama/Native/Load/NativeLibraryWithAvx.cs | 16 +- LLama/Native/Load/NativeLibraryWithCuda.cs | 37 +- .../Load/NativeLibraryWithMacOrFallback.cs | 23 +- LLama/Native/NativeApi.BeamSearch.cs | 2 + LLama/Native/NativeApi.Grammar.cs | 18 +- LLama/Native/NativeApi.LLava.cs | 3 +- LLama/Native/NativeApi.Load.cs | 75 +- LLama/Native/NativeApi.NetStandard.cs | 799 ++++++++++++++++++ LLama/Native/NativeApi.Quantize.cs | 4 +- LLama/Native/NativeApi.Sampling.cs | 42 +- LLama/Native/NativeApi.cs | 27 +- LLama/Native/NativeLogConfig.cs | 22 +- .../SafeLLamaContextHandle.NetStandard.cs | 200 +++++ LLama/Native/SafeLLamaContextHandle.cs | 5 +- .../SafeLlamaModelHandle.NetStandard.cs | 360 ++++++++ LLama/Native/SafeLlamaModelHandle.cs | 6 +- LLama/Native/SafeLlavaModelHandle.cs | 37 +- LLamaSharp.sln | 16 +- NetStandardTest/App.config | 14 + NetStandardTest/NetStandardTest.csproj | 109 +++ NetStandardTest/Program.cs | 184 ++++ NetStandardTest/Properties/AssemblyInfo.cs | 36 + .../Properties/Settings.Designer.cs | 26 + NetStandardTest/Properties/Settings.settings | 6 + NetStandardTest/packages.config | 17 + 33 files changed, 2145 insertions(+), 162 deletions(-) create mode 100644 LLama/Native/NativeApi.NetStandard.cs create mode 100644 LLama/Native/SafeLLamaContextHandle.NetStandard.cs create mode 100644 LLama/Native/SafeLlamaModelHandle.NetStandard.cs create mode 100644 NetStandardTest/App.config create mode 100644 NetStandardTest/NetStandardTest.csproj create mode 100644 NetStandardTest/Program.cs create mode 100644 NetStandardTest/Properties/AssemblyInfo.cs create mode 100644 NetStandardTest/Properties/Settings.Designer.cs create mode 100644 NetStandardTest/Properties/Settings.settings create mode 100644 NetStandardTest/packages.config diff --git a/LLama/Abstractions/INativeLibrarySelectingPolicy.cs b/LLama/Abstractions/INativeLibrarySelectingPolicy.cs index 41335202e..6e7d83bc6 100644 --- a/LLama/Abstractions/INativeLibrarySelectingPolicy.cs +++ b/LLama/Abstractions/INativeLibrarySelectingPolicy.cs @@ -1,11 +1,10 @@ -using LLama.Native; +using LLama.Native; using System; using System.Collections.Generic; using System.Text; namespace LLama.Abstractions { -#if NET6_0_OR_GREATER /// /// Decides the selected native library that should be loaded according to the configurations. /// @@ -20,5 +19,4 @@ public interface INativeLibrarySelectingPolicy /// The information of the selected native library files, in order by priority from the beginning to the end. IEnumerable Apply(NativeLibraryConfig.Description description, SystemInfo systemInfo, NativeLogConfig.LLamaLogCallback? logCallback = null); } -#endif } diff --git a/LLama/LLamaSharp.csproj b/LLama/LLamaSharp.csproj index b0da6108c..e4797a187 100644 --- a/LLama/LLamaSharp.csproj +++ b/LLama/LLamaSharp.csproj @@ -42,6 +42,7 @@ + diff --git a/LLama/Native/LLamaContextParams.cs b/LLama/Native/LLamaContextParams.cs index aab903785..9d1d1dbab 100644 --- a/LLama/Native/LLamaContextParams.cs +++ b/LLama/Native/LLamaContextParams.cs @@ -1,6 +1,10 @@ -using System; +using System; using System.Runtime.InteropServices; +#if NETSTANDARD +using NativeLibraryNetStandard; +#endif + namespace LLama.Native { /// @@ -180,10 +184,19 @@ public bool flash_attention public static LLamaContextParams Default() { return llama_context_default_params(); - - [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] - static extern LLamaContextParams llama_context_default_params(); } + +#if NETSTANDARD + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + static extern LLamaContextParams llama_context_default_params_r(); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + delegate LLamaContextParams llama_context_default_params_t(); + static LLamaContextParams llama_context_default_params() => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_context_default_params_r() : NativeApi.GetLLamaExport("llama_context_default_params")(); +#else + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + static extern LLamaContextParams llama_context_default_params(); +#endif } } diff --git a/LLama/Native/LLamaKvCacheView.cs b/LLama/Native/LLamaKvCacheView.cs index 36379bfdd..5f0857571 100644 --- a/LLama/Native/LLamaKvCacheView.cs +++ b/LLama/Native/LLamaKvCacheView.cs @@ -1,6 +1,10 @@ using System; using System.Runtime.InteropServices; +#if NETSTANDARD +using NativeLibraryNetStandard; +#endif + namespace LLama.Native; /// @@ -151,6 +155,48 @@ public Span GetCellSequences(int index) } #region native API + +#if NETSTANDARD + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern NativeLLamaKvCacheView llama_kv_cache_view_init_r(SafeLLamaContextHandle ctx, int n_seq_max); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate NativeLLamaKvCacheView llama_kv_cache_view_init_t(SafeLLamaContextHandle ctx, int n_seq_max); + private static NativeLLamaKvCacheView llama_kv_cache_view_init(SafeLLamaContextHandle ctx, int n_seq_max) => + NativeLibraryConfig.DynamicLoadingDisabled ? llama_kv_cache_view_init_r(ctx, n_seq_max) : NativeApi.GetLLamaExport("llama_kv_cache_view_init")(ctx, n_seq_max); + + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern void llama_kv_cache_view_free_r(ref NativeLLamaKvCacheView view); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llama_kv_cache_view_free_t(ref NativeLLamaKvCacheView view); + private static void llama_kv_cache_view_free(ref NativeLLamaKvCacheView view) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llama_kv_cache_view_free_r(ref view); + } + else + { + NativeApi.GetLLamaExport("llama_kv_cache_view_free")(ref view); + } + } + + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern void llama_kv_cache_view_update_r(SafeLLamaContextHandle ctx, ref NativeLLamaKvCacheView view); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llama_kv_cache_view_update_t(SafeLLamaContextHandle ctx, ref NativeLLamaKvCacheView view); + private static void llama_kv_cache_view_update(SafeLLamaContextHandle ctx, ref NativeLLamaKvCacheView view) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llama_kv_cache_view_update_r(ctx, ref view); + } + else + { + NativeApi.GetLLamaExport("llama_kv_cache_view_update")(ctx, ref view); + } + } + +#else /// /// Create an empty KV cache view. (use only for debugging purposes) /// @@ -173,6 +219,7 @@ public Span GetCellSequences(int index) /// [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] private static extern void llama_kv_cache_view_update(SafeLLamaContextHandle ctx, ref NativeLLamaKvCacheView view); +#endif /// /// Information associated with an individual cell in the KV cache view (llama_kv_cache_view_cell) diff --git a/LLama/Native/LLamaModelQuantizeParams.cs b/LLama/Native/LLamaModelQuantizeParams.cs index 4a6a4e218..09da1b99b 100644 --- a/LLama/Native/LLamaModelQuantizeParams.cs +++ b/LLama/Native/LLamaModelQuantizeParams.cs @@ -1,6 +1,10 @@ -using System; +using System; using System.Runtime.InteropServices; +#if NETSTANDARD +using NativeLibraryNetStandard; +#endif + namespace LLama.Native { /// @@ -97,9 +101,18 @@ public bool keep_split public static LLamaModelQuantizeParams Default() { return llama_model_quantize_default_params(); - - [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] - static extern LLamaModelQuantizeParams llama_model_quantize_default_params(); } + +#if NETSTANDARD + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + static extern LLamaModelQuantizeParams llama_model_quantize_default_params_r(); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + delegate LLamaModelQuantizeParams llama_model_quantize_default_params_t(); + static LLamaModelQuantizeParams llama_model_quantize_default_params() => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_model_quantize_default_params_r() : NativeApi.GetLLamaExport("llama_model_quantize_default_params")(); +#else + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + static extern LLamaModelQuantizeParams llama_model_quantize_default_params(); +#endif } } diff --git a/LLama/Native/Load/DefaultNativeLibrarySelectingPolicy.cs b/LLama/Native/Load/DefaultNativeLibrarySelectingPolicy.cs index 5cb3b0c5a..4ce13a63b 100644 --- a/LLama/Native/Load/DefaultNativeLibrarySelectingPolicy.cs +++ b/LLama/Native/Load/DefaultNativeLibrarySelectingPolicy.cs @@ -1,10 +1,9 @@ -using LLama.Abstractions; +using LLama.Abstractions; using System.Collections.Generic; using System.Runtime.InteropServices; namespace LLama.Native { -#if NET6_0_OR_GREATER /// public class DefaultNativeLibrarySelectingPolicy: INativeLibrarySelectingPolicy { @@ -65,5 +64,4 @@ private void Log(string message, LLamaLogLevel level, NativeLogConfig.LLamaLogCa logCallback?.Invoke(level, message); } } -#endif } diff --git a/LLama/Native/Load/NativeLibraryConfig.cs b/LLama/Native/Load/NativeLibraryConfig.cs index 26d05909a..f6a6079d5 100644 --- a/LLama/Native/Load/NativeLibraryConfig.cs +++ b/LLama/Native/Load/NativeLibraryConfig.cs @@ -1,15 +1,15 @@ -using System; +using System; using System.Collections.Generic; +using System.IO; using System.Linq; using LLama.Abstractions; using Microsoft.Extensions.Logging; namespace LLama.Native { -#if NET6_0_OR_GREATER /// /// Allows configuration of the native llama.cpp libraries to load and use. - /// All configuration must be done before using **any** other LLamaSharp methods! + /// All configuration must be done before using **any** other LLamaSharp methods! /// public sealed partial class NativeLibraryConfig { @@ -25,6 +25,25 @@ public sealed partial class NativeLibraryConfig /// private readonly List _searchDirectories = new List(); +#if NETSTANDARD + internal static bool LLavaDisabled { get; private set; } = false; + internal static bool DynamicLoadingDisabled { get; private set; } = false; + + /// + /// Disable the llava library. If this method is called, the llava library will not be loaded. + /// If the API related with LLava is called, An exception will be thrown. + /// This method is only available with .NET standard 2.0. + /// + public void DisableLLava() => LLavaDisabled = true; + + /// + /// Disable the dynamic loading. It might fix some weird behaviors of native API calling and might slightly improve the performance. + /// However, if the dynamic loading is disabled, the native library can only be loaded from the default path, with no flexibility. + /// This method is only available with .NET standard 2.0. + /// + public void DisableDynamicLoading() => DynamicLoadingDisabled = true; +#endif + internal INativeLibrarySelectingPolicy SelectingPolicy { get; private set; } = new DefaultNativeLibrarySelectingPolicy(); #region configurators @@ -153,7 +172,7 @@ internal Description CheckAndGatherDescription() throw new ArgumentException("Cannot skip the check when fallback is allowed."); var path = _libraryPath; - + var assemblyDirectoryName = Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location); return new Description( path, @@ -162,7 +181,11 @@ internal Description CheckAndGatherDescription() _avxLevel, _allowFallback, _skipCheck, - _searchDirectories.Concat(new[] { "./" }).ToArray() + _searchDirectories.Concat( + assemblyDirectoryName is null || assemblyDirectoryName == AppDomain.CurrentDomain.BaseDirectory ? + new[] { AppDomain.CurrentDomain.BaseDirectory } + : new[] { AppDomain.CurrentDomain.BaseDirectory, assemblyDirectoryName } + ).ToArray() ); } @@ -185,6 +208,10 @@ private NativeLibraryConfig(NativeLibraryName nativeLibraryName) { NativeLibraryName = nativeLibraryName; +#if NETSTANDARD + // In .NET standard2.0 we don't have a way to get the system avx level so we set it as avx2 by default. + _avxLevel = AvxLevel.Avx2; +#else // Automatically detect the highest supported AVX level if (System.Runtime.Intrinsics.X86.Avx.IsSupported) _avxLevel = AvxLevel.Avx; @@ -193,8 +220,10 @@ private NativeLibraryConfig(NativeLibraryName nativeLibraryName) if (CheckAVX512()) _avxLevel = AvxLevel.Avx512; +#endif } +#if !NETSTANDARD private static bool CheckAVX512() { if (!System.Runtime.Intrinsics.X86.X86Base.IsSupported) @@ -218,6 +247,7 @@ private static bool CheckAVX512() return vnni && vbmi && bw && f; } +#endif /// /// The description of the native library configurations that's already specified. @@ -257,7 +287,6 @@ public override string ToString() } } } -#endif public sealed partial class NativeLibraryConfig { @@ -304,13 +333,6 @@ static NativeLibraryConfig() All = new(LLama, LLava); } -#if NETSTANDARD2_0 - private NativeLibraryConfig(NativeLibraryName nativeLibraryName) - { - NativeLibraryName = nativeLibraryName; - } -#endif - /// /// Check if the native library has already been loaded. Configuration cannot be modified if this is true. /// @@ -356,7 +378,7 @@ public NativeLibraryConfig WithLogCallback(ILogger? logger) return this; } - +#if !NETSTANDARD /// /// Try to load the native library with the current configurations, /// but do not actually set it to . @@ -371,8 +393,9 @@ public NativeLibraryConfig WithLogCallback(ILogger? logger) public bool DryRun(out INativeLibrary? loadedLibrary) { LogCallback?.Invoke(LLamaLogLevel.Debug, $"Beginning dry run for {this.NativeLibraryName.GetLibraryName()}..."); - return NativeLibraryUtils.TryLoadLibrary(this, out loadedLibrary) != IntPtr.Zero; + return NativeLibraryUtils.TryLoadLibrary(this, out loadedLibrary, out var _) != IntPtr.Zero; } +#endif } /// @@ -389,7 +412,6 @@ internal NativeLibraryConfigContainer(params NativeLibraryConfig[] configs) #region configurators -#if NET6_0_OR_GREATER /// /// Load a specified native library as backend for LLamaSharp. /// When this method is called, all the other configurations will be ignored. @@ -522,7 +544,6 @@ public NativeLibraryConfigContainer WithSelectingPolicy(INativeLibrarySelectingP } return this; } -#endif /// /// Set the log callback that will be used for all llama.cpp log messages @@ -554,6 +575,7 @@ public NativeLibraryConfigContainer WithLogCallback(ILogger? logger) #endregion +#if !NETSTANDARD /// /// Try to load the native library with the current configurations, /// but do not actually set it to . @@ -583,6 +605,7 @@ public bool DryRun(out INativeLibrary? loadedLLamaNativeLibrary, out INativeLibr loadedLLamaNativeLibrary = loadedLLavaNativeLibrary = null; return success; } +#endif } /// diff --git a/LLama/Native/Load/NativeLibraryUtils.cs b/LLama/Native/Load/NativeLibraryUtils.cs index 9dd7c8af1..478796f01 100644 --- a/LLama/Native/Load/NativeLibraryUtils.cs +++ b/LLama/Native/Load/NativeLibraryUtils.cs @@ -1,10 +1,14 @@ -using LLama.Abstractions; +using LLama.Abstractions; using LLama.Exceptions; using System; using System.Collections.Generic; using System.IO; using System.Runtime.InteropServices; +#if NETSTANDARD +using NativeLibraryNetStandard; +#endif + namespace LLama.Native { internal static class NativeLibraryUtils @@ -12,10 +16,12 @@ internal static class NativeLibraryUtils /// /// Try to load libllama/llava_shared, using CPU feature detection to try and load a more specialised DLL if possible /// + /// + /// The loaded library, if successful. + /// The file path of the loaded library, if successful. /// The library handle to unload later, or IntPtr.Zero if no library was loaded - internal static IntPtr TryLoadLibrary(NativeLibraryConfig config, out INativeLibrary? loadedLibrary) + internal static IntPtr TryLoadLibrary(NativeLibraryConfig config, out INativeLibrary? loadedLibrary, out string? libraryPath) { -#if NET6_0_OR_GREATER var description = config.CheckAndGatherDescription(); var systemInfo = SystemInfo.Get(); Log($"Loading library: '{config.NativeLibraryName.GetLibraryName()}'", LLamaLogLevel.Debug, config.LogCallback); @@ -48,6 +54,7 @@ internal static IntPtr TryLoadLibrary(NativeLibraryConfig config, out INativeLib if (result != IntPtr.Zero) { loadedLibrary = library; + libraryPath = path; return result; } } @@ -60,58 +67,42 @@ internal static IntPtr TryLoadLibrary(NativeLibraryConfig config, out INativeLib throw new RuntimeError("Failed to load the native library. Please check the log for more information."); } loadedLibrary = null; -#else - loadedLibrary = new UnknownNativeLibrary(); -#endif + libraryPath = null; - Log($"No library was loaded before calling native apis. " + - $"This is not an error under netstandard2.0 but needs attention with net6 or higher.", LLamaLogLevel.Warning, config.LogCallback); + Log($"No library was loaded before calling native apis. ", LLamaLogLevel.Warning, config.LogCallback); return IntPtr.Zero; -#if NET6_0_OR_GREATER // Try to load a DLL from the path. // Returns null if nothing is loaded. static IntPtr TryLoad(string path, IEnumerable searchDirectories, NativeLogConfig.LLamaLogCallback? logCallback) { - var fullPath = TryFindPath(path, searchDirectories); - Log($"Found full path file '{fullPath}' for relative path '{path}'", LLamaLogLevel.Debug, logCallback); - if (NativeLibrary.TryLoad(fullPath, out var handle)) + var fullPaths = TryFindPaths(path, searchDirectories); + Log($"Found full path files <{string.Join(",", fullPaths)}> for relative path '{path}'", LLamaLogLevel.Debug, logCallback); + foreach(var fullPath in fullPaths) { - Log($"Successfully loaded '{fullPath}'", LLamaLogLevel.Info, logCallback); - return handle; + if (NativeLibrary.TryLoad(fullPath, out var handle)) + { + Log($"Successfully loaded '{fullPath}'", LLamaLogLevel.Info, logCallback); + return handle; + } + Log($"Failed Loading '{fullPath}'", LLamaLogLevel.Info, logCallback); } - - Log($"Failed Loading '{fullPath}'", LLamaLogLevel.Info, logCallback); + return IntPtr.Zero; } -#endif } // Try to find the given file in any of the possible search paths - private static string TryFindPath(string filename, IEnumerable searchDirectories) + private static IEnumerable TryFindPaths(string filename, IEnumerable searchDirectories) { + yield return filename; // Try the configured search directories in the configuration foreach (var path in searchDirectories) { var candidate = Path.Combine(path, filename); if (File.Exists(candidate)) - return candidate; - } - - // Try a few other possible paths - var possiblePathPrefix = new[] { - AppDomain.CurrentDomain.BaseDirectory, - Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location) ?? "" - }; - - foreach (var path in possiblePathPrefix) - { - var candidate = Path.Combine(path, filename); - if (File.Exists(candidate)) - return candidate; + yield return candidate; } - - return filename; } private static void Log(string message, LLamaLogLevel level, NativeLogConfig.LLamaLogCallback? logCallback) @@ -122,7 +113,6 @@ private static void Log(string message, LLamaLogLevel level, NativeLogConfig.LLa logCallback?.Invoke(level, message); } -#if NET6_0_OR_GREATER public static void GetPlatformPathParts(OSPlatform platform, out string os, out string fileExtension, out string libPrefix) { if (platform == OSPlatform.Windows) @@ -145,9 +135,16 @@ public static void GetPlatformPathParts(OSPlatform platform, out string os, out { fileExtension = ".dylib"; +#if NETSTANDARD + var arch = RuntimeInformation.OSArchitecture; + os = arch == Architecture.Arm64 || arch == Architecture.Arm + ? "osx-arm64" + : "osx-x64"; +#else os = System.Runtime.Intrinsics.Arm.ArmBase.Arm64.IsSupported ? "osx-arm64" : "osx-x64"; +#endif libPrefix = "lib"; } else @@ -155,6 +152,5 @@ public static void GetPlatformPathParts(OSPlatform platform, out string os, out throw new RuntimeError("Your operating system is not supported, please open an issue in LLamaSharp."); } } -#endif } } diff --git a/LLama/Native/Load/NativeLibraryWithAvx.cs b/LLama/Native/Load/NativeLibraryWithAvx.cs index 7b5421b4d..9463a9a5a 100644 --- a/LLama/Native/Load/NativeLibraryWithAvx.cs +++ b/LLama/Native/Load/NativeLibraryWithAvx.cs @@ -1,10 +1,9 @@ -using LLama.Abstractions; +using LLama.Abstractions; using System.Collections.Generic; using System.Runtime.InteropServices; namespace LLama.Native { -#if NET6_0_OR_GREATER /// /// A native library compiled with avx support but without cuda/cublas. /// @@ -44,19 +43,20 @@ public IEnumerable Prepare(SystemInfo systemInfo, NativeLogConfig.LLamaL // Not supported on systems other than Windows and Linux. return []; } - var path = GetAvxPath(systemInfo, _avxLevel, logCallback); - return path is null ? [] : [path]; + return GetAvxPaths(systemInfo, _avxLevel, logCallback); } - private string? GetAvxPath(SystemInfo systemInfo, AvxLevel avxLevel, NativeLogConfig.LLamaLogCallback? logCallback) + private IEnumerable GetAvxPaths(SystemInfo systemInfo, AvxLevel avxLevel, NativeLogConfig.LLamaLogCallback? logCallback) { NativeLibraryUtils.GetPlatformPathParts(systemInfo.OSPlatform, out var os, out var fileExtension, out var libPrefix); var avxStr = NativeLibraryConfig.AvxLevelToString(avxLevel); if (!string.IsNullOrEmpty(avxStr)) avxStr += "/"; - var relativePath = $"runtimes/{os}/native/{avxStr}{libPrefix}{_libraryName.GetLibraryName()}{fileExtension}"; - return relativePath; + yield return $"runtimes/{os}/native/{avxStr}{libPrefix}{_libraryName.GetLibraryName()}{fileExtension}"; +#if NETSTANDARD + // For .NET framework, the path might exclude `runtimes`. + yield return $"{os}/native/{avxStr}{libPrefix}{_libraryName.GetLibraryName()}{fileExtension}"; +#endif } } -#endif } diff --git a/LLama/Native/Load/NativeLibraryWithCuda.cs b/LLama/Native/Load/NativeLibraryWithCuda.cs index d3b06b864..3392aab10 100644 --- a/LLama/Native/Load/NativeLibraryWithCuda.cs +++ b/LLama/Native/Load/NativeLibraryWithCuda.cs @@ -1,10 +1,9 @@ -using LLama.Abstractions; +using LLama.Abstractions; using System.Collections.Generic; using System.Runtime.InteropServices; namespace LLama.Native { -#if NET6_0_OR_GREATER /// /// A native library compiled with cublas/cuda. /// @@ -46,34 +45,42 @@ public IEnumerable Prepare(SystemInfo systemInfo, NativeLogConfig.LLamaL if (_majorCudaVersion == -1 && _skipCheck) { // Currently only 11 and 12 are supported. - var cuda12LibraryPath = GetCudaPath(systemInfo, 12, logCallback); - if (cuda12LibraryPath is not null) + foreach(var cuda12LibraryPath in GetCudaPaths(systemInfo, 12, logCallback)) { - yield return cuda12LibraryPath; + if (cuda12LibraryPath is not null) + { + yield return cuda12LibraryPath; + } } - var cuda11LibraryPath = GetCudaPath(systemInfo, 11, logCallback); - if (cuda11LibraryPath is not null) + foreach (var cuda11LibraryPath in GetCudaPaths(systemInfo, 11, logCallback)) { - yield return cuda11LibraryPath; + if (cuda11LibraryPath is not null) + { + yield return cuda11LibraryPath; + } } } else if (_majorCudaVersion != -1) { - var cudaLibraryPath = GetCudaPath(systemInfo, _majorCudaVersion, logCallback); - if (cudaLibraryPath is not null) + foreach (var cudaLibraryPath in GetCudaPaths(systemInfo, _majorCudaVersion, logCallback)) { - yield return cudaLibraryPath; + if (cudaLibraryPath is not null) + { + yield return cudaLibraryPath; + } } } } } - private string? GetCudaPath(SystemInfo systemInfo, int cudaVersion, NativeLogConfig.LLamaLogCallback? logCallback) + private IEnumerable GetCudaPaths(SystemInfo systemInfo, int cudaVersion, NativeLogConfig.LLamaLogCallback? logCallback) { NativeLibraryUtils.GetPlatformPathParts(systemInfo.OSPlatform, out var os, out var fileExtension, out var libPrefix); - var relativePath = $"runtimes/{os}/native/cuda{cudaVersion}/{libPrefix}{_libraryName.GetLibraryName()}{fileExtension}"; - return relativePath; + yield return $"runtimes/{os}/native/cuda{cudaVersion}/{libPrefix}{_libraryName.GetLibraryName()}{fileExtension}"; +#if NETSTANDARD + // For .NET framework, the path might exclude `runtimes`. + yield return $"{os}/native/cuda{cudaVersion}/{libPrefix}{_libraryName.GetLibraryName()}{fileExtension}"; +#endif } } -#endif } diff --git a/LLama/Native/Load/NativeLibraryWithMacOrFallback.cs b/LLama/Native/Load/NativeLibraryWithMacOrFallback.cs index 5df339307..cadbe3b58 100644 --- a/LLama/Native/Load/NativeLibraryWithMacOrFallback.cs +++ b/LLama/Native/Load/NativeLibraryWithMacOrFallback.cs @@ -1,10 +1,9 @@ -using LLama.Abstractions; +using LLama.Abstractions; using System.Collections.Generic; using System.Runtime.InteropServices; namespace LLama.Native { -#if NET6_0_OR_GREATER /// /// A native library compiled on Mac, or fallbacks from all other libraries in the selection. /// @@ -36,17 +35,20 @@ public NativeLibraryWithMacOrFallback(NativeLibraryName libraryName, bool skipCh /// public IEnumerable Prepare(SystemInfo systemInfo, NativeLogConfig.LLamaLogCallback? logCallback) { - var path = GetPath(systemInfo, AvxLevel.None, logCallback); - return path is null ?[] : [path]; + return GetPaths(systemInfo, AvxLevel.None, logCallback); } - private string? GetPath(SystemInfo systemInfo, AvxLevel avxLevel, NativeLogConfig.LLamaLogCallback? logCallback) + private IEnumerable GetPaths(SystemInfo systemInfo, AvxLevel avxLevel, NativeLogConfig.LLamaLogCallback? logCallback) { NativeLibraryUtils.GetPlatformPathParts(systemInfo.OSPlatform, out var os, out var fileExtension, out var libPrefix); string relativePath; if (systemInfo.OSPlatform == OSPlatform.OSX) { - relativePath = $"runtimes/{os}/native/{libPrefix}{_libraryName.GetLibraryName()}{fileExtension}"; + yield return $"runtimes/{os}/native/{libPrefix}{_libraryName.GetLibraryName()}{fileExtension}"; +#if NETSTANDARD + // For .NET framework, the path might exclude `runtimes`. + yield return $"{os}/native/{libPrefix}{_libraryName.GetLibraryName()}{fileExtension}"; +#endif } else { @@ -54,11 +56,12 @@ public IEnumerable Prepare(SystemInfo systemInfo, NativeLogConfig.LLamaL if (!string.IsNullOrEmpty(avxStr)) avxStr += "/"; - relativePath = $"runtimes/{os}/native/{avxStr}{libPrefix}{_libraryName.GetLibraryName()}{fileExtension}"; + yield return $"runtimes/{os}/native/{avxStr}{libPrefix}{_libraryName.GetLibraryName()}{fileExtension}"; +#if NETSTANDARD + // For .NET framework, the path might exclude `runtimes`. + yield return $"{os}/native/{avxStr}{libPrefix}{_libraryName.GetLibraryName()}{fileExtension}"; +#endif } - - return relativePath; } } -#endif } diff --git a/LLama/Native/NativeApi.BeamSearch.cs b/LLama/Native/NativeApi.BeamSearch.cs index 142b997bb..04fc12b04 100644 --- a/LLama/Native/NativeApi.BeamSearch.cs +++ b/LLama/Native/NativeApi.BeamSearch.cs @@ -5,6 +5,7 @@ namespace LLama.Native; public static partial class NativeApi { +#if !NETSTANDARD /// /// Type of pointer to the beam_search_callback function. /// @@ -22,4 +23,5 @@ public static partial class NativeApi /// Number of threads. [DllImport(libraryName, EntryPoint = "llama_beam_search", CallingConvention = CallingConvention.Cdecl)] public static extern void llama_beam_search(SafeLLamaContextHandle ctx, LLamaBeamSearchCallback callback, IntPtr callback_data, ulong n_beams, int n_past, int n_predict, int n_threads); +#endif } \ No newline at end of file diff --git a/LLama/Native/NativeApi.Grammar.cs b/LLama/Native/NativeApi.Grammar.cs index 6279d425a..0de0144f2 100644 --- a/LLama/Native/NativeApi.Grammar.cs +++ b/LLama/Native/NativeApi.Grammar.cs @@ -1,17 +1,18 @@ -using System; +using System; using System.Runtime.InteropServices; namespace LLama.Native { public static partial class NativeApi { - /// - /// Create a new grammar from the given set of grammar rules - /// - /// - /// - /// - /// +#if !NETSTANDARD + /// + /// Create a new grammar from the given set of grammar rules + /// + /// + /// + /// + /// [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] public static extern unsafe SafeLLamaGrammarHandle llama_grammar_init(LLamaGrammarElement** rules, ulong n_rules, ulong start_rule_index); @@ -47,5 +48,6 @@ public static partial class NativeApi /// [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] public static extern void llama_grammar_accept_token(SafeLLamaContextHandle ctx, SafeLLamaGrammarHandle grammar, LLamaToken token); +#endif } } diff --git a/LLama/Native/NativeApi.LLava.cs b/LLama/Native/NativeApi.LLava.cs index 183f183a7..2b1e7f013 100644 --- a/LLama/Native/NativeApi.LLava.cs +++ b/LLama/Native/NativeApi.LLava.cs @@ -6,6 +6,7 @@ namespace LLama.Native; using clip_ctx = IntPtr; public static unsafe partial class NativeApi { +#if !NETSTANDARD /// /// Sanity check for clip <-> llava embed size match /// @@ -59,5 +60,5 @@ SafeLlavaImageEmbedHandle llava_image_embed_make_with_filename(SafeLlavaModelHan [DllImport(llavaLibraryName, EntryPoint = "llava_eval_image_embed", CallingConvention = CallingConvention.Cdecl)] [return: MarshalAs(UnmanagedType.U1)] public static extern bool llava_eval_image_embed(SafeLLamaContextHandle ctx_llama, SafeLlavaImageEmbedHandle embed, int n_batch, ref int n_past); - +#endif } \ No newline at end of file diff --git a/LLama/Native/NativeApi.Load.cs b/LLama/Native/NativeApi.Load.cs index f1bd765e4..6fbb8997c 100644 --- a/LLama/Native/NativeApi.Load.cs +++ b/LLama/Native/NativeApi.Load.cs @@ -1,10 +1,12 @@ -using LLama.Exceptions; +using LLama.Exceptions; using System; -using System.IO; using System.Runtime.InteropServices; -using System.Text.Json; -using System.Collections.Generic; using LLama.Abstractions; +using System.Diagnostics; + +#if NETSTANDARD +using NativeLibraryNetStandard; +#endif namespace LLama.Native { @@ -16,7 +18,7 @@ static NativeApi() // called by the runtime every time that a call into a DLL is required. The // resolver returns the loaded DLL handle. This allows us to take control of // which llama.dll is used. - SetDllImportResolver(); + ResolveDllImport(); // Set flag to indicate that this point has been passed. No native library config can be done after this point. NativeLibraryConfig.LLama.LibraryHasLoaded = true; @@ -47,16 +49,11 @@ static NativeApi() llama_backend_init(); } -#if NET5_0_OR_GREATER - private static IntPtr _loadedLlamaHandle; - private static IntPtr _loadedLlavaSharedHandle; -#endif - - private static void SetDllImportResolver() + private static void ResolveDllImport() { // NativeLibrary is not available on older runtimes. We'll have to depend on // the normal runtime dll resolution there. -#if NET5_0_OR_GREATER +#if !NETSTANDARD NativeLibrary.SetDllImportResolver(typeof(NativeApi).Assembly, (name, _, _) => { if (name == "llama") @@ -66,7 +63,7 @@ private static void SetDllImportResolver() return _loadedLlamaHandle; // Try to load a preferred library, based on CPU feature detection - _loadedLlamaHandle = NativeLibraryUtils.TryLoadLibrary(NativeLibraryConfig.LLama, out _loadedLLamaLibrary); + _loadedLlamaHandle = NativeLibraryUtils.TryLoadLibrary(NativeLibraryConfig.LLama, out _loadedLLamaLibrary, out var _); return _loadedLlamaHandle; } @@ -77,13 +74,45 @@ private static void SetDllImportResolver() return _loadedLlavaSharedHandle; // Try to load a preferred library, based on CPU feature detection - _loadedLlavaSharedHandle = NativeLibraryUtils.TryLoadLibrary(NativeLibraryConfig.LLava, out _loadedLLavaLibrary); + _loadedLlavaSharedHandle = NativeLibraryUtils.TryLoadLibrary(NativeLibraryConfig.LLava, out _loadedLLavaLibrary, out var _); return _loadedLlavaSharedHandle; } // Return null pointer to indicate that nothing was loaded. return IntPtr.Zero; }); +#else + if(NativeLibraryConfig.DynamicLoadingDisabled) + { + NativeLibraryConfig.LLama.LogCallback?.Invoke(LLamaLogLevel.Info, "Dynamic loading is disabled, using the default loading instead."); + return; + } + // Resolve LLama native library + var llamaHandle = NativeLibraryUtils.TryLoadLibrary(NativeLibraryConfig.LLama, out _loadedLLamaLibrary, out var _); + if(llamaHandle == IntPtr.Zero) + { + throw new RuntimeError("Failed to resolve the llama native library with dynamic loading."); + } + else + { + _llamaNativeLibraryHolder = new NativeLibraryHolder(llamaHandle, autoFree:true); + } + + if(!NativeLibraryConfig.LLavaDisabled) + { + var llavaHandle = NativeLibraryUtils.TryLoadLibrary(NativeLibraryConfig.LLava, out _loadedLLavaLibrary, out var _); + if(llavaHandle == IntPtr.Zero) + { + if(NativeLibraryConfig.LLama.LogCallback is not null) + { + throw new RuntimeError("Failed to resolve the llava native library with dynamic loading."); + } + } + else + { + _llavaNativeLibraryHolder = new NativeLibraryHolder(llavaHandle, autoFree:true); + } + } #endif } @@ -108,5 +137,23 @@ private static void SetDllImportResolver() private static INativeLibrary? _loadedLLamaLibrary = null; private static INativeLibrary? _loadedLLavaLibrary = null; + +#if NETSTANDARD + private static NativeLibraryHolder? _llamaNativeLibraryHolder = null; + private static NativeLibraryHolder? _llavaNativeLibraryHolder = null; + + internal static T GetLLamaExport(string name) where T: Delegate + { + return _llamaNativeLibraryHolder!.LoadFunction(name); + } + + internal static T GetLLavaExport(string name) where T: Delegate + { + return _llavaNativeLibraryHolder!.LoadFunction(name); + } +#else + private static IntPtr _loadedLlamaHandle; + private static IntPtr _loadedLlavaSharedHandle; +#endif } } diff --git a/LLama/Native/NativeApi.NetStandard.cs b/LLama/Native/NativeApi.NetStandard.cs new file mode 100644 index 000000000..788023452 --- /dev/null +++ b/LLama/Native/NativeApi.NetStandard.cs @@ -0,0 +1,799 @@ + +using System.Runtime.InteropServices; +using System; + +namespace LLama.Native; + +public unsafe partial class NativeApi +{ +#if NETSTANDARD +#region NativeApi.cs + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern long llama_max_devices_r(); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate long llama_max_device_t(); + public static long llama_max_devices() => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_max_devices_r() : _llamaNativeLibraryHolder.LoadFunction("llama_max_devices")(); + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + [return: MarshalAs(UnmanagedType.U1)] + public static extern bool llama_supports_mmap_r(); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + [return: MarshalAs(UnmanagedType.U1)] + private delegate bool llama_supports_mmap_t(); + public static bool llama_supports_mmap() => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_supports_mmap_r() : _llamaNativeLibraryHolder.LoadFunction("llama_supports_mmap")(); + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + [return: MarshalAs(UnmanagedType.U1)] + public static extern bool llama_supports_mlock_r(); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + [return: MarshalAs(UnmanagedType.U1)] + private delegate bool llama_supports_mlock_t(); + public static bool llama_supports_mlock() => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_supports_mlock_r() : _llamaNativeLibraryHolder.LoadFunction("llama_supports_mlock")(); + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + [return: MarshalAs(UnmanagedType.U1)] + public static extern bool llama_supports_gpu_offload_r(); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + [return: MarshalAs(UnmanagedType.U1)] + private delegate bool llama_supports_gpu_offload_t(); + public static bool llama_supports_gpu_offload() => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_supports_gpu_offload_r() : _llamaNativeLibraryHolder.LoadFunction("llama_supports_gpu_offload")(); + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern void llama_backend_init_r(); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llama_backend_init_t(); + public static void llama_backend_init() + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llama_backend_init_r(); + } + else + { + _llamaNativeLibraryHolder.LoadFunction("llama_backend_init")(); + } + } + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + [return: MarshalAs(UnmanagedType.U1)] + private static extern bool llama_state_load_file_r(SafeLLamaContextHandle ctx, string path_session, LLamaToken[] tokens_out, ulong n_token_capacity, out ulong n_token_count_out); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + [return: MarshalAs(UnmanagedType.U1)] + private delegate bool llama_state_load_file_t(SafeLLamaContextHandle ctx, string path_session, LLamaToken[] tokens_out, ulong n_token_capacity, out ulong n_token_count_out); + public static bool llama_state_load_file(SafeLLamaContextHandle ctx, string path_session, LLamaToken[] tokens_out, ulong n_token_capacity, out ulong n_token_count_out) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_state_load_file_r(ctx, path_session, tokens_out, n_token_capacity, out n_token_count_out) : _llamaNativeLibraryHolder.LoadFunction("llama_state_load_file")(ctx, path_session, tokens_out, n_token_capacity, out n_token_count_out); + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + [return: MarshalAs(UnmanagedType.U1)] + private static extern bool llama_state_save_file_r(SafeLLamaContextHandle ctx, string path_session, LLamaToken[] tokens, ulong n_token_count); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + [return: MarshalAs(UnmanagedType.U1)] + private delegate bool llama_state_save_file_t(SafeLLamaContextHandle ctx, string path_session, LLamaToken[] tokens, ulong n_token_count); + public static bool llama_state_save_file(SafeLLamaContextHandle ctx, string path_session, LLamaToken[] tokens, ulong n_token_count) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_state_save_file_r(ctx, path_session, tokens, n_token_count) : _llamaNativeLibraryHolder.LoadFunction("llama_state_save_file")(ctx, path_session, tokens, n_token_count); + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern unsafe nuint llama_state_seq_save_file_r(SafeLLamaContextHandle ctx, string filepath, LLamaSeqId seq_id, LLamaToken* tokens, nuint n_token_count); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate nuint llama_state_seq_save_file_t(SafeLLamaContextHandle ctx, string filepath, LLamaSeqId seq_id, LLamaToken* tokens, nuint n_token_count); + public static unsafe nuint llama_state_seq_save_file(SafeLLamaContextHandle ctx, string filepath, LLamaSeqId seq_id, LLamaToken* tokens, nuint n_token_count) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_state_seq_save_file_r(ctx, filepath, seq_id, tokens, n_token_count) : _llamaNativeLibraryHolder.LoadFunction("llama_state_seq_save_file")(ctx, filepath, seq_id, tokens, n_token_count); + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern unsafe nuint llama_state_seq_load_file_r(SafeLLamaContextHandle ctx, string filepath, LLamaSeqId dest_seq_id, LLamaToken* tokens_out, nuint n_token_capacity, out nuint n_token_count_out); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate nuint llama_state_seq_load_file_t(SafeLLamaContextHandle ctx, string filepath, LLamaSeqId dest_seq_id, LLamaToken* tokens_out, nuint n_token_capacity, out nuint n_token_count_out); + public static unsafe nuint llama_state_seq_load_file(SafeLLamaContextHandle ctx, string filepath, LLamaSeqId dest_seq_id, LLamaToken* tokens_out, nuint n_token_capacity, out nuint n_token_count_out) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_state_seq_load_file_r(ctx, filepath, dest_seq_id, tokens_out, n_token_capacity, out n_token_count_out) : _llamaNativeLibraryHolder.LoadFunction("llama_state_seq_load_file")(ctx, filepath, dest_seq_id, tokens_out, n_token_capacity, out n_token_count_out); + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern unsafe byte* llama_token_get_text_r(SafeLlamaModelHandle model, LLamaToken token); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate byte* llama_token_get_text_t(SafeLlamaModelHandle model, LLamaToken token); + public static unsafe byte* llama_token_get_text(SafeLlamaModelHandle model, LLamaToken token) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_token_get_text_r(model, token) : _llamaNativeLibraryHolder.LoadFunction("llama_token_get_text")(model, token); + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern void llama_set_causal_attn_r(SafeLlamaModelHandle ctx, bool causal_attn); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llama_set_causal_attn_t(SafeLlamaModelHandle ctx, bool causal_attn); + public static void llama_set_causal_attn(SafeLlamaModelHandle ctx, bool causal_attn) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llama_set_causal_attn_r(ctx, causal_attn); + } + else + { + _llamaNativeLibraryHolder.LoadFunction("llama_set_causal_attn")(ctx, causal_attn); + } + } + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern void llama_set_abort_callback_r(SafeLlamaModelHandle ctx, IntPtr /* ggml_abort_callback */ abort_callback, IntPtr abort_callback_data); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llama_set_abort_callback_t(SafeLlamaModelHandle ctx, IntPtr /* ggml_abort_callback */ abort_callback, IntPtr abort_callback_data); + public static void llama_set_abort_callback(SafeLlamaModelHandle ctx, IntPtr /* ggml_abort_callback */ abort_callback, IntPtr abort_callback_data) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llama_set_abort_callback_r(ctx, abort_callback, abort_callback_data); + } + else + { + _llamaNativeLibraryHolder.LoadFunction("llama_set_abort_callback")(ctx, abort_callback, abort_callback_data); + } + } + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern void llama_synchronize_r(SafeLlamaModelHandle ctx); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llama_synchronize_t(SafeLlamaModelHandle ctx); + public static void llama_synchronize(SafeLlamaModelHandle ctx) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llama_synchronize_r(ctx); + } + else + { + _llamaNativeLibraryHolder.LoadFunction("llama_synchronize")(ctx); + } + } + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern float llama_token_get_score_r(SafeLlamaModelHandle model, LLamaToken token); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate float llama_token_get_score_t(SafeLlamaModelHandle model, LLamaToken token); + public static float llama_token_get_score(SafeLlamaModelHandle model, LLamaToken token) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_token_get_score_r(model, token) : _llamaNativeLibraryHolder.LoadFunction("llama_token_get_score")(model, token); + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern LLamaTokenType llama_token_get_type_r(SafeLlamaModelHandle model, LLamaToken token); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate LLamaTokenType llama_token_get_type_t(SafeLlamaModelHandle model, LLamaToken token); + public static LLamaTokenType llama_token_get_type(SafeLlamaModelHandle model, LLamaToken token) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_token_get_type_r(model, token) : _llamaNativeLibraryHolder.LoadFunction("llama_token_get_type")(model, token); + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern uint llama_n_seq_max_r(SafeLLamaContextHandle ctx); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate uint llama_n_seq_max_t(SafeLLamaContextHandle ctx); + public static uint llama_n_seq_max(SafeLLamaContextHandle ctx) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_n_seq_max_r(ctx) : _llamaNativeLibraryHolder.LoadFunction("llama_n_seq_max")(ctx); + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern LLamaPoolingType llama_pooling_type_r(SafeLLamaContextHandle ctx); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate LLamaPoolingType llama_pooling_type_t(SafeLLamaContextHandle ctx); + public static LLamaPoolingType llama_pooling_type(SafeLLamaContextHandle ctx) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_pooling_type_r(ctx) : _llamaNativeLibraryHolder.LoadFunction("llama_pooling_type")(ctx); + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern unsafe float* llama_get_embeddings_seq_r(SafeLLamaContextHandle ctx, LLamaSeqId id); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate float* llama_get_embeddings_seq_t(SafeLLamaContextHandle ctx, LLamaSeqId id); + public static unsafe float* llama_get_embeddings_seq(SafeLLamaContextHandle ctx, LLamaSeqId id) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_get_embeddings_seq_r(ctx, id) : _llamaNativeLibraryHolder.LoadFunction("llama_get_embeddings_seq")(ctx, id); + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern unsafe float* llama_get_embeddings_ith_r(SafeLLamaContextHandle ctx, int i); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate float* llama_get_embeddings_ith_t(SafeLLamaContextHandle ctx, int i); + public static unsafe float* llama_get_embeddings_ith(SafeLLamaContextHandle ctx, int i) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_get_embeddings_ith_r(ctx, i) : _llamaNativeLibraryHolder.LoadFunction("llama_get_embeddings_ith")(ctx, i); + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern unsafe float* llama_get_embeddings_r(SafeLLamaContextHandle ctx); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate float* llama_get_embeddings_t(SafeLLamaContextHandle ctx); + public static unsafe float* llama_get_embeddings(SafeLLamaContextHandle ctx) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_get_embeddings_r(ctx) : _llamaNativeLibraryHolder.LoadFunction("llama_get_embeddings")(ctx); + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern int llama_add_bos_token_r(SafeLlamaModelHandle model); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate int llama_add_bos_token_t(SafeLlamaModelHandle model); + public static int llama_add_bos_token(SafeLlamaModelHandle model) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_add_bos_token_r(model) : _llamaNativeLibraryHolder.LoadFunction("llama_add_bos_token")(model); + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern int llama_add_eos_token_r(SafeLlamaModelHandle model); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate int llama_add_eos_token_t(SafeLlamaModelHandle model); + public static int llama_add_eos_token(SafeLlamaModelHandle model) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_add_eos_token_r(model) : _llamaNativeLibraryHolder.LoadFunction("llama_add_eos_token")(model); + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern void llama_print_timings_r(SafeLLamaContextHandle ctx); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llama_print_timings_t(SafeLLamaContextHandle ctx); + public static void llama_print_timings(SafeLLamaContextHandle ctx) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llama_print_timings_r(ctx); + } + else + { + _llamaNativeLibraryHolder.LoadFunction("llama_print_timings")(ctx); + } + } + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern void llama_reset_timings_r(SafeLLamaContextHandle ctx); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llama_reset_timings_t(SafeLLamaContextHandle ctx); + public static void llama_reset_timings(SafeLLamaContextHandle ctx) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llama_reset_timings_r(ctx); + } + else + { + _llamaNativeLibraryHolder.LoadFunction("llama_reset_timings")(ctx); + } + } + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern IntPtr llama_print_system_info_r(); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate IntPtr llama_print_system_info_t(); + public static IntPtr llama_print_system_info() => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_print_system_info_r() : _llamaNativeLibraryHolder.LoadFunction("llama_print_system_info")(); + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern unsafe int llama_tokenize_r(SafeLlamaModelHandle model, byte* text, int text_len, LLamaToken* tokens, int n_max_tokens, bool add_special, bool parse_special); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate int llama_tokenize_t(SafeLlamaModelHandle model, byte* text, int text_len, LLamaToken* tokens, int n_max_tokens, bool add_special, bool parse_special); + public static unsafe int llama_tokenize(SafeLlamaModelHandle model, byte* text, int text_len, LLamaToken* tokens, int n_max_tokens, bool add_special, bool parse_special) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_tokenize_r(model, text, text_len, tokens, n_max_tokens, add_special, parse_special) : _llamaNativeLibraryHolder.LoadFunction("llama_tokenize")(model, text, text_len, tokens, n_max_tokens, add_special, parse_special); + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern int llama_get_kv_cache_token_count_r(SafeLLamaContextHandle ctx); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate int llama_get_kv_cache_token_count_t(SafeLLamaContextHandle ctx); + public static int llama_get_kv_cache_token_count(SafeLLamaContextHandle ctx) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_get_kv_cache_token_count_r(ctx) : _llamaNativeLibraryHolder.LoadFunction("llama_get_kv_cache_token_count")(ctx); + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern int llama_get_kv_cache_used_cells_r(SafeLLamaContextHandle ctx); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate int llama_get_kv_cache_used_cells_t(SafeLLamaContextHandle ctx); + public static int llama_get_kv_cache_used_cells(SafeLLamaContextHandle ctx) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_get_kv_cache_used_cells_r(ctx) : _llamaNativeLibraryHolder.LoadFunction("llama_get_kv_cache_used_cells")(ctx); + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern void llama_kv_cache_clear_r(SafeLLamaContextHandle ctx); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llama_kv_cache_clear_t(SafeLLamaContextHandle ctx); + public static void llama_kv_cache_clear(SafeLLamaContextHandle ctx) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llama_kv_cache_clear_r(ctx); + } + else + { + _llamaNativeLibraryHolder.LoadFunction("llama_kv_cache_clear")(ctx); + + } + } + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern byte llama_kv_cache_seq_rm_r(SafeLLamaContextHandle ctx, LLamaSeqId seq, LLamaPos p0, LLamaPos p1); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate byte llama_kv_cache_seq_rm_t(SafeLLamaContextHandle ctx, LLamaSeqId seq, LLamaPos p0, LLamaPos p1); + public static byte llama_kv_cache_seq_rm(SafeLLamaContextHandle ctx, LLamaSeqId seq, LLamaPos p0, LLamaPos p1) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_kv_cache_seq_rm_r(ctx, seq, p0, p1) : _llamaNativeLibraryHolder.LoadFunction("llama_kv_cache_seq_rm")(ctx, seq, p0, p1); + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern void llama_kv_cache_seq_cp_r(SafeLLamaContextHandle ctx, LLamaSeqId src, LLamaSeqId dest, LLamaPos p0, LLamaPos p1); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llama_kv_cache_seq_cp_t(SafeLLamaContextHandle ctx, LLamaSeqId src, LLamaSeqId dest, LLamaPos p0, LLamaPos p1); + public static void llama_kv_cache_seq_cp(SafeLLamaContextHandle ctx, LLamaSeqId src, LLamaSeqId dest, LLamaPos p0, LLamaPos p1) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llama_kv_cache_seq_cp_r(ctx, src, dest, p0, p1); + } + else + { + _llamaNativeLibraryHolder.LoadFunction("llama_kv_cache_seq_cp")(ctx, src, dest, p0, p1); + } + } + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern void llama_kv_cache_seq_keep_r(SafeLLamaContextHandle ctx, LLamaSeqId seq); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llama_kv_cache_seq_keep_t(SafeLLamaContextHandle ctx, LLamaSeqId seq); + public static void llama_kv_cache_seq_keep(SafeLLamaContextHandle ctx, LLamaSeqId seq) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llama_kv_cache_seq_keep_r(ctx, seq); + } + else + { + _llamaNativeLibraryHolder.LoadFunction("llama_kv_cache_seq_keep")(ctx, seq); + } + } + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern void llama_kv_cache_seq_add_r(SafeLLamaContextHandle ctx, LLamaSeqId seq, LLamaPos p0, LLamaPos p1, int delta); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llama_kv_cache_seq_add_t(SafeLLamaContextHandle ctx, LLamaSeqId seq, LLamaPos p0, LLamaPos p1, int delta); + public static void llama_kv_cache_seq_add(SafeLLamaContextHandle ctx, LLamaSeqId seq, LLamaPos p0, LLamaPos p1, int delta) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llama_kv_cache_seq_add_r(ctx, seq, p0, p1, delta); + } + else + { + _llamaNativeLibraryHolder.LoadFunction("llama_kv_cache_seq_add")(ctx, seq, p0, p1, delta); + } + } + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern void llama_kv_cache_seq_div_r(SafeLLamaContextHandle ctx, LLamaSeqId seq, LLamaPos p0, LLamaPos p1, int d); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llama_kv_cache_seq_div_t(SafeLLamaContextHandle ctx, LLamaSeqId seq, LLamaPos p0, LLamaPos p1, int d); + public static void llama_kv_cache_seq_div(SafeLLamaContextHandle ctx, LLamaSeqId seq, LLamaPos p0, LLamaPos p1, int d) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llama_kv_cache_seq_div_r(ctx, seq, p0, p1, d); + } + else + { + _llamaNativeLibraryHolder.LoadFunction("llama_kv_cache_seq_div")(ctx, seq, p0, p1, d); + } + } + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern LLamaPos llama_kv_cache_seq_pos_max_r(SafeLLamaContextHandle ctx, LLamaSeqId seq); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate LLamaPos llama_kv_cache_seq_pos_max_t(SafeLLamaContextHandle ctx, LLamaSeqId seq); + public static LLamaPos llama_kv_cache_seq_pos_max(SafeLLamaContextHandle ctx, LLamaSeqId seq) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_kv_cache_seq_pos_max_r(ctx, seq) : _llamaNativeLibraryHolder.LoadFunction("llama_kv_cache_seq_pos_max")(ctx, seq); + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern LLamaNativeBatch llama_batch_init_r(int n_tokens, int embd, int n_seq_max); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate LLamaNativeBatch llama_batch_init_t(int n_tokens, int embd, int n_seq_max); + public static LLamaNativeBatch llama_batch_init(int n_tokens, int embd, int n_seq_max) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_batch_init_r(n_tokens, embd, n_seq_max) : _llamaNativeLibraryHolder.LoadFunction("llama_batch_init")(n_tokens, embd, n_seq_max); + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern void llama_batch_free_r(LLamaNativeBatch batch); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llama_batch_free_t(LLamaNativeBatch batch); + public static void llama_batch_free(LLamaNativeBatch batch) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llama_batch_free_r(batch); + } + else + { + _llamaNativeLibraryHolder.LoadFunction("llama_batch_free")(batch); + } + } + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern unsafe int llama_control_vector_apply_r(SafeLLamaContextHandle ctx, float* data, nuint len, int n_embd, int il_start, int il_end); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate int llama_control_vector_apply_t(SafeLLamaContextHandle ctx, float* data, nuint len, int n_embd, int il_start, int il_end); + public static unsafe int llama_control_vector_apply(SafeLLamaContextHandle ctx, float* data, nuint len, int n_embd, int il_start, int il_end) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_control_vector_apply_r(ctx, data, len, n_embd, il_start, il_end) : _llamaNativeLibraryHolder.LoadFunction("llama_control_vector_apply")(ctx, data, len, n_embd, il_start, il_end); + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern int llama_split_path_r(string split_path, nuint maxlen, string path_prefix, int split_no, int split_count); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate int llama_split_path_t(string split_path, nuint maxlen, string path_prefix, int split_no, int split_count); + public static int llama_split_path(string split_path, nuint maxlen, string path_prefix, int split_no, int split_count) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_split_path_r(split_path, maxlen, path_prefix, split_no, split_count) : _llamaNativeLibraryHolder.LoadFunction("llama_split_path")(split_path, maxlen, path_prefix, split_no, split_count); + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern int llama_split_prefix_r(string split_prefix, nuint maxlen, string split_path, int split_no, int split_count); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate int llama_split_prefix_t(string split_prefix, nuint maxlen, string split_path, int split_no, int split_count); + public static int llama_split_prefix(string split_prefix, nuint maxlen, string split_path, int split_no, int split_count) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_split_prefix_r(split_prefix, maxlen, split_path, split_no, split_count) : _llamaNativeLibraryHolder.LoadFunction("llama_split_prefix")(split_prefix, maxlen, split_path, split_no, split_count); + +#endregion + +#region NativeApi.BeamSearch.cs + public delegate void LLamaBeamSearchCallback(IntPtr callback_data, LLamaBeamsState state); + + [DllImport(libraryName, EntryPoint = "llama_beam_search", CallingConvention = CallingConvention.Cdecl)] + private static extern void llama_beam_search_r(SafeLLamaContextHandle ctx, LLamaBeamSearchCallback callback, IntPtr callback_data, ulong n_beams, int n_past, int n_predict, int n_threads); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llama_beam_search_t(SafeLLamaContextHandle ctx, LLamaBeamSearchCallback callback, IntPtr callback_data, ulong n_beams, int n_past, int n_predict, int n_threads); + public static void llama_beam_search(SafeLLamaContextHandle ctx, LLamaBeamSearchCallback callback, IntPtr callback_data, ulong n_beams, int n_past, int n_predict, int n_threads) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llama_beam_search_r(ctx, callback, callback_data, n_beams, n_past, n_predict, n_threads); + } + else + { + _llamaNativeLibraryHolder.LoadFunction("llama_beam_search")(ctx, callback, callback_data, n_beams, n_past, n_predict, n_threads); + } + } +#endregion + +#region NativeApi.Grammar.cs + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern unsafe SafeLLamaGrammarHandle llama_grammar_init_r(LLamaGrammarElement** rules, ulong n_rules, ulong start_rule_index); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate SafeLLamaGrammarHandle llama_grammar_init_t(LLamaGrammarElement** rules, ulong n_rules, ulong start_rule_index); + public static unsafe SafeLLamaGrammarHandle llama_grammar_init(LLamaGrammarElement** rules, ulong n_rules, ulong start_rule_index) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_grammar_init_r(rules, n_rules, start_rule_index) : _llamaNativeLibraryHolder.LoadFunction("llama_grammar_init")(rules, n_rules, start_rule_index); + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern void llama_grammar_free_r(IntPtr grammar); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llama_grammar_free_t(IntPtr grammar); + public static void llama_grammar_free(IntPtr grammar) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llama_grammar_free_r(grammar); + } + else + { + _llamaNativeLibraryHolder.LoadFunction("llama_grammar_free")(grammar); + } + } + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern SafeLLamaGrammarHandle llama_grammar_copy_r(SafeLLamaGrammarHandle grammar); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate SafeLLamaGrammarHandle llama_grammar_copy_t(SafeLLamaGrammarHandle grammar); + public static SafeLLamaGrammarHandle llama_grammar_copy(SafeLLamaGrammarHandle grammar) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_grammar_copy_r(grammar) : _llamaNativeLibraryHolder.LoadFunction("llama_grammar_copy")(grammar); + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern void llama_sample_grammar_r(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates, SafeLLamaGrammarHandle grammar); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llama_sample_grammar_t(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates, SafeLLamaGrammarHandle grammar); + public static void llama_sample_grammar(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates, SafeLLamaGrammarHandle grammar) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llama_sample_grammar_r(ctx, ref candidates, grammar); + } + else + { + _llamaNativeLibraryHolder.LoadFunction("llama_sample_grammar")(ctx, ref candidates, grammar); + } + } + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern void llama_grammar_accept_token_r(SafeLLamaContextHandle ctx, SafeLLamaGrammarHandle grammar, LLamaToken token); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llama_grammar_accept_token_t(SafeLLamaContextHandle ctx, SafeLLamaGrammarHandle grammar, LLamaToken token); + public static void llama_grammar_accept_token(SafeLLamaContextHandle ctx, SafeLLamaGrammarHandle grammar, LLamaToken token) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llama_grammar_accept_token_r(ctx, grammar, token); + } + else + { + _llamaNativeLibraryHolder.LoadFunction("llama_grammar_accept_token")(ctx, grammar, token); + } + } + +#endregion + +#region NativeApi.LLava.cs + + [DllImport(llavaLibraryName, EntryPoint = "llava_image_embed_get_image_size", CallingConvention = CallingConvention.Cdecl)] + [return: MarshalAs(UnmanagedType.U1)] + private static extern bool llava_validate_embed_size_r(SafeLLamaContextHandle ctxLlama, SafeLlavaModelHandle ctxClip); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + [return: MarshalAs(UnmanagedType.U1)] + private delegate bool llava_validate_embed_size_t(SafeLLamaContextHandle ctxLlama, SafeLlavaModelHandle ctxClip); + public static bool llava_validate_embed_size(SafeLLamaContextHandle ctxLlama, SafeLlavaModelHandle ctxClip) => NativeLibraryConfig.DynamicLoadingDisabled ? + llava_validate_embed_size_r(ctxLlama, ctxClip) : _llavaNativeLibraryHolder.LoadFunction("llava_validate_embed_size")(ctxLlama, ctxClip); + + [DllImport(llavaLibraryName, EntryPoint = "llava_image_embed_make_with_bytes", CallingConvention = CallingConvention.Cdecl)] + private static extern SafeLlavaImageEmbedHandle llava_image_embed_make_with_bytes_r(SafeLlavaModelHandle ctxClip, int nThreads, byte[] imageBytes, int imageBytesLength); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate SafeLlavaImageEmbedHandle llava_image_embed_make_with_bytes_t(SafeLlavaModelHandle ctxClip, int nThreads, byte[] imageBytes, int imageBytesLength); + public static SafeLlavaImageEmbedHandle llava_image_embed_make_with_bytes(SafeLlavaModelHandle ctxClip, int nThreads, byte[] imageBytes, int imageBytesLength) => NativeLibraryConfig.DynamicLoadingDisabled ? + llava_image_embed_make_with_bytes_r(ctxClip, nThreads, imageBytes, imageBytesLength) : _llavaNativeLibraryHolder.LoadFunction("llava_image_embed_make_with_bytes")(ctxClip, nThreads, imageBytes, imageBytesLength); + + [DllImport(llavaLibraryName, EntryPoint = "llava_image_embed_make_with_filename", CallingConvention = CallingConvention.Cdecl)] + private static extern SafeLlavaImageEmbedHandle llava_image_embed_make_with_filename_r(SafeLlavaModelHandle ctxClip, int nThreads, [MarshalAs(UnmanagedType.LPStr)] string imagePath); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate SafeLlavaImageEmbedHandle llava_image_embed_make_with_filename_t(SafeLlavaModelHandle ctxClip, int nThreads, [MarshalAs(UnmanagedType.LPStr)] string imagePath); + public static SafeLlavaImageEmbedHandle llava_image_embed_make_with_filename(SafeLlavaModelHandle ctxClip, int nThreads, string imagePath) => NativeLibraryConfig.DynamicLoadingDisabled ? + llava_image_embed_make_with_filename_r(ctxClip, nThreads, imagePath) : _llavaNativeLibraryHolder.LoadFunction("llava_image_embed_make_with_filename")(ctxClip, nThreads, imagePath); + + [DllImport(llavaLibraryName, EntryPoint = "llava_image_embed_free", CallingConvention = CallingConvention.Cdecl)] + private static extern void llava_image_embed_free_r(IntPtr embed); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llava_image_embed_free_t(IntPtr embed); + public static void llava_image_embed_free(IntPtr embed) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llava_image_embed_free_r(embed); + } + else + { + _llavaNativeLibraryHolder.LoadFunction("llava_image_embed_free")(embed); + } + } + + [DllImport(llavaLibraryName, EntryPoint = "llava_eval_image_embed", CallingConvention = CallingConvention.Cdecl)] + [return: MarshalAs(UnmanagedType.U1)] + private static extern bool llava_eval_image_embed_r(SafeLLamaContextHandle ctxLlama, SafeLlavaImageEmbedHandle embed, int nBatch, ref int nPast); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + [return: MarshalAs(UnmanagedType.U1)] + private delegate bool llava_eval_image_embed_t(SafeLLamaContextHandle ctxLlama, SafeLlavaImageEmbedHandle embed, int nBatch, ref int nPast); + public static bool llava_eval_image_embed(SafeLLamaContextHandle ctxLlama, SafeLlavaImageEmbedHandle embed, int nBatch, ref int nPast) => NativeLibraryConfig.DynamicLoadingDisabled ? + llava_eval_image_embed_r(ctxLlama, embed, nBatch, ref nPast) : _llavaNativeLibraryHolder.LoadFunction("llava_eval_image_embed")(ctxLlama, embed, nBatch, ref nPast); + +#endregion + +#region NativeApi.Quantize.cs + +[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] +public static extern uint llama_model_quantize_r(string fname_inp, string fname_out, ref LLamaModelQuantizeParams param); +[UnmanagedFunctionPointer(CallingConvention.Cdecl)] +private delegate uint llama_model_quantize_t(string fname_inp, string fname_out, ref LLamaModelQuantizeParams param); +public static uint llama_model_quantize(string fname_inp, string fname_out, ref LLamaModelQuantizeParams param) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_model_quantize_r(fname_inp, fname_out, ref param) : _llamaNativeLibraryHolder.LoadFunction("llama_model_quantize")(fname_inp, fname_out, ref param); + +#endregion + +#region NativeApi.Sampling.cs + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern unsafe void llama_sample_repetition_penalties_r(SafeLLamaContextHandle ctx, + ref LLamaTokenDataArrayNative candidates, + LLamaToken* last_tokens, ulong last_tokens_size, + float penalty_repeat, + float penalty_freq, + float penalty_present); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llama_sample_repetition_penalties_t(SafeLLamaContextHandle ctx, + ref LLamaTokenDataArrayNative candidates, + LLamaToken* last_tokens, ulong last_tokens_size, + float penalty_repeat, + float penalty_freq, + float penalty_present); + public static unsafe void llama_sample_repetition_penalties(SafeLLamaContextHandle ctx, + ref LLamaTokenDataArrayNative candidates, + LLamaToken* last_tokens, ulong last_tokens_size, + float penalty_repeat, + float penalty_freq, + float penalty_present) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llama_sample_repetition_penalties_r(ctx, ref candidates, last_tokens, last_tokens_size, penalty_repeat, penalty_freq, penalty_present); + } + else + { + _llamaNativeLibraryHolder.LoadFunction("llama_sample_repetition_penalties")(ctx, ref candidates, last_tokens, last_tokens_size, penalty_repeat, penalty_freq, penalty_present); + } + } + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern unsafe void llama_sample_apply_guidance_r(SafeLLamaContextHandle ctx, float* logits, float* logits_guidance, float scale); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llama_sample_apply_guidance_t(SafeLLamaContextHandle ctx, float* logits, float* logits_guidance, float scale); + public static void llama_sample_apply_guidance(SafeLLamaContextHandle ctx, float* logits, float* logits_guidance, float scale) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llama_sample_apply_guidance_r(ctx, logits, logits_guidance, scale); + } + else + { + _llamaNativeLibraryHolder.LoadFunction("llama_sample_apply_guidance")(ctx, logits, logits_guidance, scale); + } + } + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern void llama_sample_softmax_r(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llama_sample_softmax_t(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates); + public static void llama_sample_softmax(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llama_sample_softmax_r(ctx, ref candidates); + } + else + { + _llamaNativeLibraryHolder.LoadFunction("llama_sample_softmax")(ctx, ref candidates); + } + } + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern void llama_sample_top_k_r(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates, int k, ulong min_keep); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llama_sample_top_k_t(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates, int k, ulong min_keep); + public static void llama_sample_top_k(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates, int k, ulong min_keep) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llama_sample_top_k_r(ctx, ref candidates, k, min_keep); + } + else + { + _llamaNativeLibraryHolder.LoadFunction("llama_sample_top_k")(ctx, ref candidates, k, min_keep); + } + } + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern void llama_sample_top_p_r(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates, float p, ulong min_keep); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llama_sample_top_p_t(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates, float p, ulong min_keep); + public static void llama_sample_top_p(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates, float p, ulong min_keep) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llama_sample_top_p_r(ctx, ref candidates, p, min_keep); + } + else + { + _llamaNativeLibraryHolder.LoadFunction("llama_sample_top_p")(ctx, ref candidates, p, min_keep); + } + } + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern void llama_sample_min_p_r(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates, float p, ulong min_keep); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llama_sample_min_p_t(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates, float p, ulong min_keep); + public static void llama_sample_min_p(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates, float p, ulong min_keep) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llama_sample_min_p_r(ctx, ref candidates, p, min_keep); + } + else + { + _llamaNativeLibraryHolder.LoadFunction("llama_sample_min_p")(ctx, ref candidates, p, min_keep); + } + } + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern void llama_sample_tail_free_r(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates, float z, ulong min_keep); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llama_sample_tail_free_t(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates, float z, ulong min_keep); + public static void llama_sample_tail_free(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates, float z, ulong min_keep) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llama_sample_tail_free_r(ctx, ref candidates, z, min_keep); + } + else + { + _llamaNativeLibraryHolder.LoadFunction("llama_sample_tail_free")(ctx, ref candidates, z, min_keep); + } + } + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern void llama_sample_typical_r(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates, float p, ulong min_keep); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llama_sample_typical_t(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates, float p, ulong min_keep); + public static void llama_sample_typical(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates, float p, ulong min_keep) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llama_sample_typical_r(ctx, ref candidates, p, min_keep); + } + else + { + _llamaNativeLibraryHolder.LoadFunction("llama_sample_typical")(ctx, ref candidates, p, min_keep); + } + } + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern void llama_sample_typical_v2_r(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates, float min_temp, float max_temp, float exponent_val); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llama_sample_typical_v2_t(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates, float min_temp, float max_temp, float exponent_val); + public static void llama_sample_typical(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates, float min_temp, float max_temp, float exponent_val) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llama_sample_typical_v2_r(ctx, ref candidates, min_temp, max_temp, exponent_val); + } + else + { + _llamaNativeLibraryHolder.LoadFunction("llama_sample_typical")(ctx, ref candidates, min_temp, max_temp, exponent_val); + } + } + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern void llama_sample_temp_r(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates, float temp); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llama_sample_temp_t(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates, float temp); + public static void llama_sample_temp(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates, float temp) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llama_sample_temp_r(ctx, ref candidates, temp); + } + else + { + _llamaNativeLibraryHolder.LoadFunction("llama_sample_temp")(ctx, ref candidates, temp); + } + } + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern LLamaToken llama_sample_token_mirostat_r(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates, float tau, float eta, int m, ref float mu); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate LLamaToken llama_sample_token_mirostat_t(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates, float tau, float eta, int m, ref float mu); + public static LLamaToken llama_sample_token_mirostat(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates, float tau, float eta, int m, ref float mu) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_sample_token_mirostat_r(ctx, ref candidates, tau, eta, m, ref mu) : _llamaNativeLibraryHolder.LoadFunction("llama_sample_token_mirostat")(ctx, ref candidates, tau, eta, m, ref mu); + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern LLamaToken llama_sample_token_mirostat_v2_r(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates, float tau, float eta, ref float mu); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate LLamaToken llama_sample_token_mirostat_v2_t(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates, float tau, float eta, ref float mu); + public static LLamaToken llama_sample_token_mirostat_v2(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates, float tau, float eta, ref float mu) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_sample_token_mirostat_v2_r(ctx, ref candidates, tau, eta, ref mu) : _llamaNativeLibraryHolder.LoadFunction("llama_sample_token_mirostat_v2")(ctx, ref candidates, tau, eta, ref mu); + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern LLamaToken llama_sample_token_greedy_r(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate LLamaToken llama_sample_token_greedy_t(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates); + public static LLamaToken llama_sample_token_greedy(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_sample_token_greedy_r(ctx, ref candidates) : _llamaNativeLibraryHolder.LoadFunction("llama_sample_token_greedy")(ctx, ref candidates); + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern LLamaToken llama_sample_token_r(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate LLamaToken llama_sample_token_t(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates); + public static LLamaToken llama_sample_token(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_sample_token_r(ctx, ref candidates) : _llamaNativeLibraryHolder.LoadFunction("llama_sample_token")(ctx, ref candidates); + +#endregion + + public static unsafe int llama_chat_apply_template(SafeLlamaModelHandle? model, byte* tmpl, LLamaChatMessage* chat, nuint n_msg, bool add_ass, byte* buf, int length) + { + return internal_llama_chat_apply_template(model?.DangerousGetHandle() ?? IntPtr.Zero, tmpl, chat, n_msg, add_ass, buf, length); + } + + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "llama_chat_apply_template")] + static unsafe extern int internal_llama_chat_apply_template_r(IntPtr model, byte* tmpl, LLamaChatMessage* chat, nuint n_msg, bool add_ass, byte* buf, int length); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + unsafe delegate int internal_llama_chat_apply_template_t(IntPtr model, byte* tmpl, LLamaChatMessage* chat, nuint n_msg, bool add_ass, byte* buf, int length); + unsafe static int internal_llama_chat_apply_template(IntPtr model, byte* tmpl, LLamaChatMessage* chat, nuint n_msg, bool add_ass, byte* buf, int length) => + NativeLibraryConfig.DynamicLoadingDisabled ? internal_llama_chat_apply_template_r(model, tmpl, chat, n_msg, add_ass, buf, length) + : _llamaNativeLibraryHolder.LoadFunction("llama_chat_apply_template")(model, tmpl, chat, n_msg, add_ass, buf, length); + + public static int llama_token_to_piece(SafeLlamaModelHandle model, LLamaToken llamaToken, Span buffer, bool special) + { + unsafe + { + fixed (byte* bufferPtr = buffer) + { + return llama_token_to_piece_native(model, llamaToken, bufferPtr, buffer.Length, special); + } + } + } + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "llama_token_to_piece")] + static extern unsafe int llama_token_to_piece_native_r(SafeLlamaModelHandle model, LLamaToken llamaToken, byte* buffer, int length, bool special); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + delegate int llama_token_to_piece_native_t(SafeLlamaModelHandle model, LLamaToken llamaToken, byte* buffer, int length, bool special); + static int llama_token_to_piece_native(SafeLlamaModelHandle model, LLamaToken llamaToken, byte* buffer, int length, bool special) => + NativeLibraryConfig.DynamicLoadingDisabled ? llama_token_to_piece_native_r(model, llamaToken, buffer, length, special) + : _llamaNativeLibraryHolder.LoadFunction("llama_token_to_piece")(model, llamaToken, buffer, length, special); +#endif +} \ No newline at end of file diff --git a/LLama/Native/NativeApi.Quantize.cs b/LLama/Native/NativeApi.Quantize.cs index a2a372bc8..76731db08 100644 --- a/LLama/Native/NativeApi.Quantize.cs +++ b/LLama/Native/NativeApi.Quantize.cs @@ -1,9 +1,10 @@ -using System.Runtime.InteropServices; +using System.Runtime.InteropServices; namespace LLama.Native { public static partial class NativeApi { +#if !NETSTANDARD /// /// Returns 0 on success /// @@ -13,5 +14,6 @@ public static partial class NativeApi /// Returns 0 on success [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] public static extern uint llama_model_quantize(string fname_inp, string fname_out, ref LLamaModelQuantizeParams param); +#endif } } diff --git a/LLama/Native/NativeApi.Sampling.cs b/LLama/Native/NativeApi.Sampling.cs index 1b30a1cf7..fae382359 100644 --- a/LLama/Native/NativeApi.Sampling.cs +++ b/LLama/Native/NativeApi.Sampling.cs @@ -1,29 +1,10 @@ -using System; +using System; using System.Runtime.InteropServices; namespace LLama.Native { public static partial class NativeApi { - /// - /// Repetition penalty described in CTRL academic paper https://arxiv.org/abs/1909.05858, with negative logit fix. - /// Frequency and presence penalties described in OpenAI API https://platform.openai.com/docs/api-reference/parameter-details. - /// - /// - /// Pointer to LLamaTokenDataArray - /// - /// - /// Repetition penalty described in CTRL academic paper https://arxiv.org/abs/1909.05858, with negative logit fix. - /// Frequency and presence penalties described in OpenAI API https://platform.openai.com/docs/api-reference/parameter-details. - /// Frequency and presence penalties described in OpenAI API https://platform.openai.com/docs/api-reference/parameter-details. - [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] - public static extern unsafe void llama_sample_repetition_penalties(SafeLLamaContextHandle ctx, - ref LLamaTokenDataArrayNative candidates, - LLamaToken* last_tokens, ulong last_tokens_size, - float penalty_repeat, - float penalty_freq, - float penalty_present); - /// /// Apply classifier-free guidance to the logits as described in academic paper "Stay on topic with Classifier-Free Guidance" https://arxiv.org/abs/2306.17806 /// @@ -51,6 +32,26 @@ public static void llama_sample_apply_guidance(SafeLLamaContextHandle ctx, Span< } } +#if !NETSTANDARD + /// + /// Repetition penalty described in CTRL academic paper https://arxiv.org/abs/1909.05858, with negative logit fix. + /// Frequency and presence penalties described in OpenAI API https://platform.openai.com/docs/api-reference/parameter-details. + /// + /// + /// Pointer to LLamaTokenDataArray + /// + /// + /// Repetition penalty described in CTRL academic paper https://arxiv.org/abs/1909.05858, with negative logit fix. + /// Frequency and presence penalties described in OpenAI API https://platform.openai.com/docs/api-reference/parameter-details. + /// Frequency and presence penalties described in OpenAI API https://platform.openai.com/docs/api-reference/parameter-details. + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + public static extern unsafe void llama_sample_repetition_penalties(SafeLLamaContextHandle ctx, + ref LLamaTokenDataArrayNative candidates, + LLamaToken* last_tokens, ulong last_tokens_size, + float penalty_repeat, + float penalty_freq, + float penalty_present); + /// /// Apply classifier-free guidance to the logits as described in academic paper "Stay on topic with Classifier-Free Guidance" https://arxiv.org/abs/2306.17806 /// @@ -183,5 +184,6 @@ public static void llama_sample_apply_guidance(SafeLLamaContextHandle ctx, Span< /// [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] public static extern LLamaToken llama_sample_token(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates); +#endif } } diff --git a/LLama/Native/NativeApi.cs b/LLama/Native/NativeApi.cs index 8a6491776..958aafd68 100644 --- a/LLama/Native/NativeApi.cs +++ b/LLama/Native/NativeApi.cs @@ -8,7 +8,7 @@ namespace LLama.Native /// /// Direct translation of the llama.cpp API /// - public static partial class NativeApi + public unsafe static partial class NativeApi { /// /// A method that does nothing. This is a native method, calling it will force the llama native dependencies to be loaded. @@ -16,9 +16,21 @@ public static partial class NativeApi /// public static void llama_empty_call() { - llama_max_devices(); + var c = llama_max_devices(); + Console.WriteLine($"Max device: {c}"); } + /// + /// Register a callback to receive llama log messages + /// + /// + [Obsolete("Use `NativeLogConfig.llama_log_set` instead")] + public static void llama_log_set(NativeLogConfig.LLamaLogCallback logCallback) + { + NativeLogConfig.llama_log_set(logCallback); + } + +#if !NETSTANDARD /// /// Get the maximum number of devices supported by llama.cpp /// @@ -264,16 +276,6 @@ public static int llama_token_to_piece(SafeLlamaModelHandle model, LLamaToken ll /// [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] public static extern unsafe int llama_tokenize(SafeLlamaModelHandle model, byte* text, int text_len, LLamaToken* tokens, int n_max_tokens, bool add_special, bool parse_special); - - /// - /// Register a callback to receive llama log messages - /// - /// - [Obsolete("Use `NativeLogConfig.llama_log_set` instead")] - public static void llama_log_set(NativeLogConfig.LLamaLogCallback logCallback) - { - NativeLogConfig.llama_log_set(logCallback); - } /// /// Returns the number of tokens in the KV cache (slow, use only for debug) @@ -437,5 +439,6 @@ public static void llama_log_set(NativeLogConfig.LLamaLogCallback logCallback) /// Returns the split_prefix length. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] public static extern int llama_split_prefix(string split_prefix, nuint maxlen, string split_path, int split_no, int split_count); +#endif } } diff --git a/LLama/Native/NativeLogConfig.cs b/LLama/Native/NativeLogConfig.cs index 82b097fb3..74a77c86a 100644 --- a/LLama/Native/NativeLogConfig.cs +++ b/LLama/Native/NativeLogConfig.cs @@ -1,4 +1,4 @@ -using System.Runtime.InteropServices; +using System.Runtime.InteropServices; using System.Text; using System.Threading; using Microsoft.Extensions.Logging; @@ -17,12 +17,30 @@ public static class NativeLogConfig /// public delegate void LLamaLogCallback(LLamaLogLevel level, string message); +#if NETSTANDARD + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "llama_log_set")] + private static extern void native_llama_log_set_r(LLamaLogCallback? logCallback); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void native_llama_log_set_t(LLamaLogCallback? logCallback); + private static void native_llama_log_set(LLamaLogCallback? logCallback) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + native_llama_log_set_r(logCallback); + } + else + { + NativeApi.GetLLamaExport("llama_log_set")(logCallback); + } + } +#else /// /// Register a callback to receive llama log messages /// /// [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "llama_log_set")] private static extern void native_llama_log_set(LLamaLogCallback? logCallback); +#endif /// /// A GC handle for the current log callback to ensure the callback is not collected @@ -52,7 +70,7 @@ public static void llama_log_set(LLamaLogCallback? logCallback) { // We can't set the log method yet since that would cause the llama.dll to load. // Instead configure it to be set when the native library loading is done - NativeLibraryConfig.Instance.WithLogCallback(logCallback); + NativeLibraryConfig.LLama.WithLogCallback(logCallback); } } diff --git a/LLama/Native/SafeLLamaContextHandle.NetStandard.cs b/LLama/Native/SafeLLamaContextHandle.NetStandard.cs new file mode 100644 index 000000000..77046bf72 --- /dev/null +++ b/LLama/Native/SafeLLamaContextHandle.NetStandard.cs @@ -0,0 +1,200 @@ +using System; +using System.Runtime.InteropServices; + +namespace LLama.Native +{ + public unsafe partial class SafeLLamaContextHandle + { +#if NETSTANDARD + private unsafe delegate bool GgmlAbortCallback(void* data); + + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern SafeLLamaContextHandle llama_new_context_with_model_r(SafeLlamaModelHandle model, LLamaContextParams @params); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate SafeLLamaContextHandle llama_new_context_with_model_t(SafeLlamaModelHandle model, LLamaContextParams @params); + private static SafeLLamaContextHandle llama_new_context_with_model(SafeLlamaModelHandle model, LLamaContextParams @params) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_new_context_with_model_r(model, @params) : NativeApi.GetLLamaExport("llama_new_context_with_model")(model, @params); + + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern void llama_free_r(IntPtr ctx); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llama_free_t(IntPtr ctx); + private static void llama_free(IntPtr ctx) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llama_free_r(ctx); + } + else + { + NativeApi.GetLLamaExport("llama_free")(ctx); + } + } + + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern void llama_set_abort_callback_r(SafeLLamaContextHandle ctx, GgmlAbortCallback abort_callback, void* abort_callback_data); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llama_set_abort_callback_t(SafeLLamaContextHandle ctx, GgmlAbortCallback abort_callback, void* abort_callback_data); + private static void llama_set_abort_callback(SafeLLamaContextHandle ctx, GgmlAbortCallback abort_callback, void* abort_callback_data) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llama_set_abort_callback_r(ctx, abort_callback, abort_callback_data); + } + else + { + NativeApi.GetLLamaExport("llama_set_abort_callback")(ctx, abort_callback, abort_callback_data); + } + } + + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern int llama_decode_r(SafeLLamaContextHandle ctx, LLamaNativeBatch batch); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate int llama_decode_t(SafeLLamaContextHandle ctx, LLamaNativeBatch batch); + private static int llama_decode(SafeLLamaContextHandle ctx, LLamaNativeBatch batch) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_decode_r(ctx, batch) : NativeApi.GetLLamaExport("llama_decode")(ctx, batch); + + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern void llama_set_n_threads_r(SafeLLamaContextHandle ctx, uint n_threads, uint n_threads_batch); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llama_set_n_threads_t(SafeLLamaContextHandle ctx, uint n_threads, uint n_threads_batch); + private static void llama_set_n_threads(SafeLLamaContextHandle ctx, uint n_threads, uint n_threads_batch) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llama_set_n_threads_r(ctx, n_threads, n_threads_batch); + } + else + { + NativeApi.GetLLamaExport("llama_set_n_threads")(ctx, n_threads, n_threads_batch); + } + } + + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern float* llama_get_logits_r(SafeLLamaContextHandle ctx); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate float* llama_get_logits_t(SafeLLamaContextHandle ctx); + private static float* llama_get_logits(SafeLLamaContextHandle ctx) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_get_logits_r(ctx) : NativeApi.GetLLamaExport("llama_get_logits")(ctx); + + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern float* llama_get_logits_ith_r(SafeLLamaContextHandle ctx, int i); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate float* llama_get_logits_ith_t(SafeLLamaContextHandle ctx, int i); + private static float* llama_get_logits_ith(SafeLLamaContextHandle ctx, int i) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_get_logits_ith_r(ctx, i) : NativeApi.GetLLamaExport("llama_get_logits_ith")(ctx, i); + + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern uint llama_n_ctx_r(SafeLLamaContextHandle ctx); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate uint llama_n_ctx_t(SafeLLamaContextHandle ctx); + private static uint llama_n_ctx(SafeLLamaContextHandle ctx) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_n_ctx_r(ctx) : NativeApi.GetLLamaExport("llama_n_ctx")(ctx); + + + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern uint llama_n_batch_r(SafeLLamaContextHandle ctx); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate uint llama_n_batch_t(SafeLLamaContextHandle ctx); + private static uint llama_n_batch(SafeLLamaContextHandle ctx) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_n_batch_r(ctx) : NativeApi.GetLLamaExport("llama_n_batch")(ctx); + + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern uint llama_n_ubatch_r(SafeLLamaContextHandle ctx); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate uint llama_n_ubatch_t(SafeLLamaContextHandle ctx); + private static uint llama_n_ubatch(SafeLLamaContextHandle ctx) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_n_ubatch_r(ctx) : NativeApi.GetLLamaExport("llama_n_ubatch")(ctx); + + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern void llama_set_rng_seed_r(SafeLLamaContextHandle ctx, uint seed); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llama_set_rng_seed_t(SafeLLamaContextHandle ctx, uint seed); + private static void llama_set_rng_seed(SafeLLamaContextHandle ctx, uint seed) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llama_set_rng_seed_r(ctx, seed); + } + else + { + NativeApi.GetLLamaExport("llama_set_rng_seed")(ctx, seed); + } + } + + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern ulong llama_state_get_size_r(SafeLLamaContextHandle ctx); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate ulong llama_state_get_size_t(SafeLLamaContextHandle ctx); + private static ulong llama_state_get_size(SafeLLamaContextHandle ctx) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_state_get_size_r(ctx) : NativeApi.GetLLamaExport("llama_state_get_size")(ctx); + + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern ulong llama_state_get_data_r(SafeLLamaContextHandle ctx, byte* dest); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate ulong llama_state_get_data_t(SafeLLamaContextHandle ctx, byte* dest); + private static ulong llama_state_get_data(SafeLLamaContextHandle ctx, byte* dest) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_state_get_data_r(ctx, dest) : NativeApi.GetLLamaExport("llama_state_get_data")(ctx, dest); + + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern ulong llama_state_set_data_r(SafeLLamaContextHandle ctx, byte* src); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate ulong llama_state_set_data_t(SafeLLamaContextHandle ctx, byte* src); + private static ulong llama_state_set_data(SafeLLamaContextHandle ctx, byte* src) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_state_set_data_r(ctx, src) : NativeApi.GetLLamaExport("llama_state_set_data")(ctx, src); + + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern nuint llama_state_seq_get_size_r(SafeLLamaContextHandle ctx, LLamaSeqId seq_id); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate nuint llama_state_seq_get_size_t(SafeLLamaContextHandle ctx, LLamaSeqId seq_id); + private static nuint llama_state_seq_get_size(SafeLLamaContextHandle ctx, LLamaSeqId seq_id) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_state_seq_get_size_r(ctx, seq_id) : NativeApi.GetLLamaExport("llama_state_seq_get_size")(ctx, seq_id); + + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern unsafe nuint llama_state_seq_get_data_r(SafeLLamaContextHandle ctx, byte* dst, LLamaSeqId seq_id); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate nuint llama_state_seq_get_data_t(SafeLLamaContextHandle ctx, byte* dst, LLamaSeqId seq_id); + private static nuint llama_state_seq_get_data(SafeLLamaContextHandle ctx, byte* dst, LLamaSeqId seq_id) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_state_seq_get_data_r(ctx, dst, seq_id) : NativeApi.GetLLamaExport("llama_state_seq_get_data")(ctx, dst, seq_id); + + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern nuint llama_state_seq_set_data_r(SafeLLamaContextHandle ctx, byte* src, LLamaSeqId dest_seq_id); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate nuint llama_state_seq_set_data_t(SafeLLamaContextHandle ctx, byte* src, LLamaSeqId dest_seq_id); + private static nuint llama_state_seq_set_data(SafeLLamaContextHandle ctx, byte* src, LLamaSeqId dest_seq_id) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_state_seq_set_data_r(ctx, src, dest_seq_id) : NativeApi.GetLLamaExport("llama_state_seq_set_data")(ctx, src, dest_seq_id); + + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern void llama_kv_cache_defrag_r(SafeLLamaContextHandle ctx); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llama_kv_cache_defrag_t(SafeLLamaContextHandle ctx); + private static void llama_kv_cache_defrag(SafeLLamaContextHandle ctx) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llama_kv_cache_defrag_r(ctx); + } + else + { + NativeApi.GetLLamaExport("llama_kv_cache_defrag")(ctx); + } + } + + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + public static extern void llama_kv_cache_update_r(SafeLLamaContextHandle ctx); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llama_kv_cache_update_t(SafeLLamaContextHandle ctx); + public static void llama_kv_cache_update(SafeLLamaContextHandle ctx) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llama_kv_cache_update_r(ctx); + } + else + { + NativeApi.GetLLamaExport("llama_kv_cache_update")(ctx); + } + } +#endif + } +} \ No newline at end of file diff --git a/LLama/Native/SafeLLamaContextHandle.cs b/LLama/Native/SafeLLamaContextHandle.cs index 13a7aa1b2..6c5632a7c 100644 --- a/LLama/Native/SafeLLamaContextHandle.cs +++ b/LLama/Native/SafeLLamaContextHandle.cs @@ -10,7 +10,7 @@ namespace LLama.Native /// A safe wrapper around a llama_context /// // ReSharper disable once ClassNeverInstantiated.Global (used implicitly in native API) - public sealed class SafeLLamaContextHandle + public sealed partial class SafeLLamaContextHandle : SafeLLamaHandleBase { #region properties and fields @@ -103,6 +103,7 @@ static SafeLLamaContextHandle() NativeApi.llama_empty_call(); } +#if !NETSTANDARD /// /// Create a new llama_context with the given model. **This should never be called directly! Always use SafeLLamaContextHandle.Create**! /// @@ -283,6 +284,8 @@ static SafeLLamaContextHandle() /// [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] public static extern void llama_kv_cache_update(SafeLLamaContextHandle ctx); +#endif + #endregion /// diff --git a/LLama/Native/SafeLlamaModelHandle.NetStandard.cs b/LLama/Native/SafeLlamaModelHandle.NetStandard.cs new file mode 100644 index 000000000..7d8a68d84 --- /dev/null +++ b/LLama/Native/SafeLlamaModelHandle.NetStandard.cs @@ -0,0 +1,360 @@ +using System; +using System.Runtime.InteropServices; + +namespace LLama.Native +{ + public partial class SafeLlamaModelHandle + { +#if NETSTANDARD + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern SafeLlamaModelHandle llama_load_model_from_file_r(string path_model, LLamaModelParams @params); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + delegate SafeLlamaModelHandle llama_load_model_from_file_t(string path_model, LLamaModelParams @params); + private static SafeLlamaModelHandle llama_load_model_from_file(string path_model, LLamaModelParams @params) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_load_model_from_file_r(path_model, @params) : NativeApi.GetLLamaExport("llama_load_model_from_file")(path_model, @params); + + /// + /// Apply a LoRA adapter to a loaded model + /// path_base_model is the path to a higher quality model to use as a base for + /// the layers modified by the adapter. Can be NULL to use the current loaded model. + /// The model needs to be reloaded before applying a new adapter, otherwise the adapter + /// will be applied on top of the previous one + /// + /// + /// + /// + /// + /// + /// Returns 0 on success + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern int llama_model_apply_lora_from_file_r(SafeLlamaModelHandle model_ptr, string path_lora, float scale, string? path_base_model, int n_threads); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate int llama_model_apply_lora_from_file_t(SafeLlamaModelHandle model_ptr, string path_lora, float scale, string? path_base_model, int n_threads); + private static int llama_model_apply_lora_from_file(SafeLlamaModelHandle model_ptr, string path_lora, float scale, string? path_base_model, int n_threads) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_model_apply_lora_from_file_r(model_ptr, path_lora, scale, path_base_model, n_threads) : NativeApi.GetLLamaExport("llama_model_apply_lora_from_file")(model_ptr, path_lora, scale, path_base_model, n_threads); + + /// + /// Frees all allocated memory associated with a model + /// + /// + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern void llama_free_model_r(IntPtr model); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void llama_free_model_t(IntPtr model); + private static void llama_free_model(IntPtr model) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + llama_free_model_r(model); + } + else + { + NativeApi.GetLLamaExport("llama_free_model")(model); + } + } + + /// + /// Get the number of metadata key/value pairs + /// + /// + /// + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern int llama_model_meta_count_r(SafeLlamaModelHandle model); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate int llama_model_meta_count_t(SafeLlamaModelHandle model); + private static int llama_model_meta_count(SafeLlamaModelHandle model) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_model_meta_count_r(model) : NativeApi.GetLLamaExport("llama_model_meta_count")(model); + + /// + /// Get metadata key name by index + /// + /// Model to fetch from + /// Index of key to fetch + /// buffer to write result into + /// The length of the string on success (even if the buffer is too small). -1 is the key does not exist. + private static int llama_model_meta_key_by_index(SafeLlamaModelHandle model, int index, Span dest) + { + unsafe + { + fixed (byte* destPtr = dest) + { + return llama_model_meta_key_by_index_native(model, index, destPtr, dest.Length); + } + } + } + + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "llama_model_meta_key_by_index")] + static extern unsafe int llama_model_meta_key_by_index_native_r(SafeLlamaModelHandle model, int index, byte* buf, long buf_size); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + unsafe delegate int llama_model_meta_key_by_index_native_t(SafeLlamaModelHandle model, int index, byte* buf, long buf_size); + static unsafe int llama_model_meta_key_by_index_native(SafeLlamaModelHandle model, int index, byte* buf, long buf_size) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_model_meta_key_by_index_native_r(model, index, buf, buf_size) : NativeApi.GetLLamaExport("llama_model_meta_key_by_index")(model, index, buf, buf_size); + + /// + /// Get metadata value as a string by index + /// + /// Model to fetch from + /// Index of val to fetch + /// Buffer to write result into + /// The length of the string on success (even if the buffer is too small). -1 is the key does not exist. + private static int llama_model_meta_val_str_by_index(SafeLlamaModelHandle model, int index, Span dest) + { + unsafe + { + fixed (byte* destPtr = dest) + { + return llama_model_meta_val_str_by_index_native(model, index, destPtr, dest.Length); + } + } + } + + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "llama_model_meta_val_str_by_index")] + static extern unsafe int llama_model_meta_val_str_by_index_native_r(SafeLlamaModelHandle model, int index, byte* buf, long buf_size); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + unsafe delegate int llama_model_meta_val_str_by_index_native_t(SafeLlamaModelHandle model, int index, byte* buf, long buf_size); + static unsafe int llama_model_meta_val_str_by_index_native(SafeLlamaModelHandle model, int index, byte* buf, long buf_size) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_model_meta_val_str_by_index_native_r(model, index, buf, buf_size) : NativeApi.GetLLamaExport("llama_model_meta_val_str_by_index")(model, index, buf, buf_size); + + /// + /// Get metadata value as a string by key name + /// + /// + /// + /// + /// + /// The length of the string on success, or -1 on failure + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern unsafe int llama_model_meta_val_str_r(SafeLlamaModelHandle model, byte* key, byte* buf, long buf_size); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private unsafe delegate int llama_model_meta_val_str_t(SafeLlamaModelHandle model, byte* key, byte* buf, long buf_size); + public unsafe static int llama_model_meta_val_str(SafeLlamaModelHandle model, byte* key, byte* buf, long buf_size) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_model_meta_val_str_r(model, key, buf, buf_size) : NativeApi.GetLLamaExport("llama_model_meta_val_str")(model, key, buf, buf_size); + + /// + /// Get the number of tokens in the model vocabulary + /// + /// + /// + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern int llama_n_vocab_r(SafeLlamaModelHandle model); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate int llama_n_vocab_t(SafeLlamaModelHandle model); + private static int llama_n_vocab(SafeLlamaModelHandle model) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_n_vocab_r(model) : NativeApi.GetLLamaExport("llama_n_vocab")(model); + + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern LLamaVocabType llama_vocab_type_r(SafeLlamaModelHandle model); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate LLamaVocabType llama_vocab_type_t(SafeLlamaModelHandle model); + private static LLamaVocabType llama_vocab_type(SafeLlamaModelHandle model) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_vocab_type_r(model) : NativeApi.GetLLamaExport("llama_vocab_type")(model); + + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern LLamaRopeType llama_rope_type_r(SafeLlamaModelHandle model); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate LLamaRopeType llama_rope_type_t(SafeLlamaModelHandle model); + private static LLamaRopeType llama_rope_type(SafeLlamaModelHandle model) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_rope_type_r(model) : NativeApi.GetLLamaExport("llama_rope_type")(model); + + /// + /// Get the size of the context window for the model + /// + /// + /// + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern int llama_n_ctx_train_r(SafeLlamaModelHandle model); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate int llama_n_ctx_train_t(SafeLlamaModelHandle model); + private static int llama_n_ctx_train(SafeLlamaModelHandle model) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_n_ctx_train_r(model) : NativeApi.GetLLamaExport("llama_n_ctx_train")(model); + + /// + /// Get the dimension of embedding vectors from this model + /// + /// + /// + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern int llama_n_embd_r(SafeLlamaModelHandle model); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate int llama_n_embd_t(SafeLlamaModelHandle model); + private static int llama_n_embd(SafeLlamaModelHandle model) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_n_embd_r(model) : NativeApi.GetLLamaExport("llama_n_embd")(model); + + /// + /// Get the number of layers in this model + /// + /// + /// + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern int llama_n_layers_r(SafeLlamaModelHandle model); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate int llama_n_layers_t(SafeLlamaModelHandle model); + private static int llama_n_layers(SafeLlamaModelHandle model) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_n_layers_r(model) : NativeApi.GetLLamaExport("llama_n_layers")(model); + + /// + /// Get a string describing the model type + /// + /// + /// + /// + /// The length of the string on success (even if the buffer is too small)., or -1 on failure + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern unsafe int llama_model_desc_r(SafeLlamaModelHandle model, byte* buf, long buf_size); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private unsafe delegate int llama_model_desc_t(SafeLlamaModelHandle model, byte* buf, long buf_size); + private static unsafe int llama_model_desc(SafeLlamaModelHandle model, byte* buf, long buf_size) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_model_desc_r(model, buf, buf_size) : NativeApi.GetLLamaExport("llama_model_desc")(model, buf, buf_size); + + /// + /// Get the size of the model in bytes + /// + /// + /// The size of the model + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern ulong llama_model_size_r(SafeLlamaModelHandle model); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate ulong llama_model_size_t(SafeLlamaModelHandle model); + private static ulong llama_model_size(SafeLlamaModelHandle model) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_model_size_r(model) : NativeApi.GetLLamaExport("llama_model_size")(model); + + /// + /// Get the number of parameters in this model + /// + /// + /// The functions return the length of the string on success, or -1 on failure + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern ulong llama_model_n_params_r(SafeLlamaModelHandle model); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate ulong llama_model_n_params_t(SafeLlamaModelHandle model); + private static ulong llama_model_n_params(SafeLlamaModelHandle model) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_model_n_params_r(model) : NativeApi.GetLLamaExport("llama_model_n_params")(model); + + /// + /// Get the model's RoPE frequency scaling factor + /// + /// + /// + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern float llama_rope_freq_scale_train_r(SafeLlamaModelHandle model); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate float llama_rope_freq_scale_train_t(SafeLlamaModelHandle model); + private static float llama_rope_freq_scale_train(SafeLlamaModelHandle model) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_rope_freq_scale_train_r(model) : NativeApi.GetLLamaExport("llama_rope_freq_scale_train")(model); + + /// + /// Get the "Beginning of sentence" token + /// + /// + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern LLamaToken llama_token_bos_r(SafeLlamaModelHandle model); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate LLamaToken llama_token_bos_t(SafeLlamaModelHandle model); + private static LLamaToken llama_token_bos(SafeLlamaModelHandle model) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_token_bos_r(model) : NativeApi.GetLLamaExport("llama_token_bos")(model); + + /// + /// Get the "End of sentence" token + /// + /// + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern LLamaToken llama_token_eos_r(SafeLlamaModelHandle model); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate LLamaToken llama_token_eos_t(SafeLlamaModelHandle model); + private static LLamaToken llama_token_eos(SafeLlamaModelHandle model) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_token_eos_r(model) : NativeApi.GetLLamaExport("llama_token_eos")(model); + + /// + /// Get the "classification" token + /// + /// + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern LLamaToken llama_token_cls_r(SafeLlamaModelHandle model); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate LLamaToken llama_token_cls_t(SafeLlamaModelHandle model); + private static LLamaToken llama_token_cls(SafeLlamaModelHandle model) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_token_cls_r(model) : NativeApi.GetLLamaExport("llama_token_cls")(model); + + /// + /// Get the "sentence separator" token + /// + /// + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern LLamaToken llama_token_sep_r(SafeLlamaModelHandle model); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate LLamaToken llama_token_sep_t(SafeLlamaModelHandle model); + private static LLamaToken llama_token_sep(SafeLlamaModelHandle model) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_token_sep_r(model) : NativeApi.GetLLamaExport("llama_token_sep")(model); + + /// + /// Get the "new line" token + /// + /// + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern LLamaToken llama_token_nl_r(SafeLlamaModelHandle model); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate LLamaToken llama_token_nl_t(SafeLlamaModelHandle model); + private static LLamaToken llama_token_nl(SafeLlamaModelHandle model) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_token_nl_r(model) : NativeApi.GetLLamaExport("llama_token_nl")(model); + + /// + /// codellama infill tokens, Beginning of infill prefix + /// + /// + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern int llama_token_prefix_r(SafeLlamaModelHandle model); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate int llama_token_prefix_t(SafeLlamaModelHandle model); + private static int llama_token_prefix(SafeLlamaModelHandle model) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_token_prefix_r(model) : NativeApi.GetLLamaExport("llama_token_prefix")(model); + + /// + /// codellama infill tokens, Beginning of infill middle + /// + /// + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern int llama_token_middle_r(SafeLlamaModelHandle model); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate int llama_token_middle_t(SafeLlamaModelHandle model); + private static int llama_token_middle(SafeLlamaModelHandle model) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_token_middle_r(model) : NativeApi.GetLLamaExport("llama_token_middle")(model); + + /// + /// codellama infill tokens, Beginning of infill suffix + /// + /// + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern int llama_token_suffix_r(SafeLlamaModelHandle model); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate int llama_token_suffix_t(SafeLlamaModelHandle model); + private static int llama_token_suffix(SafeLlamaModelHandle model) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_token_suffix_r(model) : NativeApi.GetLLamaExport("llama_token_suffix")(model); + + /// + /// codellama infill tokens, End of infill middle + /// + /// + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + private static extern int llama_token_eot_r(SafeLlamaModelHandle model); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate int llama_token_eot_t(SafeLlamaModelHandle model); + private static int llama_token_eot(SafeLlamaModelHandle model) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_token_eot_r(model) : NativeApi.GetLLamaExport("llama_token_eot")(model); + + /// + /// Check if the token is supposed to end generation (end-of-generation, eg. EOS, EOT, etc.) + /// + /// + /// + /// + [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] + [return: MarshalAs(UnmanagedType.U1)] + private static extern bool llama_token_is_eog_r(SafeLlamaModelHandle model, LLamaToken token); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + [return: MarshalAs(UnmanagedType.U1)] + private delegate bool llama_token_is_eog_t(SafeLlamaModelHandle model, LLamaToken token); + private static bool llama_token_is_eog(SafeLlamaModelHandle model, LLamaToken token) => NativeLibraryConfig.DynamicLoadingDisabled ? + llama_token_is_eog_r(model, token) : NativeApi.GetLLamaExport("llama_token_is_eog")(model, token); +#endif + } +} \ No newline at end of file diff --git a/LLama/Native/SafeLlamaModelHandle.cs b/LLama/Native/SafeLlamaModelHandle.cs index f24cfe5fd..91594444d 100644 --- a/LLama/Native/SafeLlamaModelHandle.cs +++ b/LLama/Native/SafeLlamaModelHandle.cs @@ -14,7 +14,7 @@ namespace LLama.Native /// A reference to a set of llama model weights /// // ReSharper disable once ClassNeverInstantiated.Global (used implicitly in native API) - public sealed class SafeLlamaModelHandle + public sealed partial class SafeLlamaModelHandle : SafeLLamaHandleBase { /// @@ -134,6 +134,8 @@ static SafeLlamaModelHandle() NativeApi.llama_empty_call(); } +#if !NETSTANDARD + /// /// Load all of the weights of a model into memory. /// @@ -371,6 +373,8 @@ private static int llama_model_meta_val_str_by_index(SafeLlamaModelHandle model, [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] [return: MarshalAs(UnmanagedType.U1)] private static extern bool llama_token_is_eog(SafeLlamaModelHandle model, LLamaToken token); +#endif + #endregion #region LoRA diff --git a/LLama/Native/SafeLlavaModelHandle.cs b/LLama/Native/SafeLlavaModelHandle.cs index fd898b536..fa0ce12a7 100644 --- a/LLama/Native/SafeLlavaModelHandle.cs +++ b/LLama/Native/SafeLlavaModelHandle.cs @@ -1,4 +1,4 @@ -using System; +using System; using System.IO; using System.Runtime.InteropServices; using LLama.Exceptions; @@ -80,6 +80,40 @@ public bool EvalImageEmbed(LLamaContext ctxLlama, SafeLlavaImageEmbedHandle imag return NativeApi.llava_eval_image_embed(ctxLlama.NativeHandle, imageEmbed, (int)ctxLlama.Params.BatchSize, ref n_past ); } +#if NETSTANDARD + /// + /// Load MULTI MODAL PROJECTIONS model / Clip Model + /// + /// Model path/file + /// Verbosity level + /// SafeLlavaModelHandle + [DllImport(NativeApi.llavaLibraryName, EntryPoint = "clip_model_load", CallingConvention = CallingConvention.Cdecl)] + private static extern SafeLlavaModelHandle clip_model_load_r(string mmProj, int verbosity); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate SafeLlavaModelHandle clip_model_load_t(string mmProj, int verbosity); + private static SafeLlavaModelHandle clip_model_load(string mmProj, int verbosity) => NativeLibraryConfig.DynamicLoadingDisabled ? + clip_model_load_r(mmProj, verbosity) : NativeApi.GetLLavaExport("clip_model_load")(mmProj, verbosity); + + /// + /// Frees MULTI MODAL PROJECTIONS model / Clip Model + /// + /// Internal Pointer to the model + [DllImport(NativeApi.llavaLibraryName, EntryPoint = "clip_free", CallingConvention = CallingConvention.Cdecl)] + private static extern void clip_free_r(IntPtr ctx); + [UnmanagedFunctionPointer(CallingConvention.Cdecl)] + private delegate void clip_free_t(IntPtr ctx); + private static void clip_free(IntPtr ctx) + { + if (NativeLibraryConfig.DynamicLoadingDisabled) + { + clip_free_r(ctx); + } + else + { + NativeApi.GetLLavaExport("clip_free")(ctx); + } + } +#else /// /// Load MULTI MODAL PROJECTIONS model / Clip Model /// @@ -95,6 +129,7 @@ public bool EvalImageEmbed(LLamaContext ctxLlama, SafeLlavaImageEmbedHandle imag /// Internal Pointer to the model [DllImport(NativeApi.llavaLibraryName, EntryPoint = "clip_free", CallingConvention = CallingConvention.Cdecl)] private static extern void clip_free(IntPtr ctx); +#endif } diff --git a/LLamaSharp.sln b/LLamaSharp.sln index 8039982e5..9ee80bd92 100644 --- a/LLamaSharp.sln +++ b/LLamaSharp.sln @@ -17,7 +17,9 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LLamaSharp.SemanticKernel", EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LLamaSharp.KernelMemory", "LLama.KernelMemory\LLamaSharp.KernelMemory.csproj", "{E5589AE7-B86F-4343-A1CC-8E5D34596E52}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LLama.Benchmark", "LLama.Benchmark\LLama.Benchmark.csproj", "{90D38FEE-68EA-459E-A4EE-268B9DFA1CD5}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LLama.Benchmark", "LLama.Benchmark\LLama.Benchmark.csproj", "{90D38FEE-68EA-459E-A4EE-268B9DFA1CD5}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "NetStandardTest", "NetStandardTest\NetStandardTest.csproj", "{39274239-7BBA-4589-821F-649E2E9F1954}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -125,6 +127,18 @@ Global {90D38FEE-68EA-459E-A4EE-268B9DFA1CD5}.Release|Any CPU.Build.0 = Release|Any CPU {90D38FEE-68EA-459E-A4EE-268B9DFA1CD5}.Release|x64.ActiveCfg = Release|Any CPU {90D38FEE-68EA-459E-A4EE-268B9DFA1CD5}.Release|x64.Build.0 = Release|Any CPU + {39274239-7BBA-4589-821F-649E2E9F1954}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {39274239-7BBA-4589-821F-649E2E9F1954}.Debug|Any CPU.Build.0 = Debug|Any CPU + {39274239-7BBA-4589-821F-649E2E9F1954}.Debug|x64.ActiveCfg = Debug|Any CPU + {39274239-7BBA-4589-821F-649E2E9F1954}.Debug|x64.Build.0 = Debug|Any CPU + {39274239-7BBA-4589-821F-649E2E9F1954}.GPU|Any CPU.ActiveCfg = Debug|Any CPU + {39274239-7BBA-4589-821F-649E2E9F1954}.GPU|Any CPU.Build.0 = Debug|Any CPU + {39274239-7BBA-4589-821F-649E2E9F1954}.GPU|x64.ActiveCfg = Debug|Any CPU + {39274239-7BBA-4589-821F-649E2E9F1954}.GPU|x64.Build.0 = Debug|Any CPU + {39274239-7BBA-4589-821F-649E2E9F1954}.Release|Any CPU.ActiveCfg = Release|Any CPU + {39274239-7BBA-4589-821F-649E2E9F1954}.Release|Any CPU.Build.0 = Release|Any CPU + {39274239-7BBA-4589-821F-649E2E9F1954}.Release|x64.ActiveCfg = Release|Any CPU + {39274239-7BBA-4589-821F-649E2E9F1954}.Release|x64.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/NetStandardTest/App.config b/NetStandardTest/App.config new file mode 100644 index 000000000..64dfa54bd --- /dev/null +++ b/NetStandardTest/App.config @@ -0,0 +1,14 @@ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/NetStandardTest/NetStandardTest.csproj b/NetStandardTest/NetStandardTest.csproj new file mode 100644 index 000000000..0e76ef5f7 --- /dev/null +++ b/NetStandardTest/NetStandardTest.csproj @@ -0,0 +1,109 @@ + + + + + + + Debug + AnyCPU + {39274239-7BBA-4589-821F-649E2E9F1954} + Exe + NetStandardTest + NetStandardTest + v4.8.1 + 512 + true + true + + + 9 + + + x64 + true + full + false + bin\Debug\ + DEBUG;TRACE + prompt + 4 + + + AnyCPU + pdbonly + true + bin\Release\ + TRACE + prompt + 4 + + + + ..\packages\Microsoft.Bcl.AsyncInterfaces.8.0.0\lib\net462\Microsoft.Bcl.AsyncInterfaces.dll + + + ..\packages\Microsoft.Extensions.DependencyInjection.Abstractions.8.0.1\lib\net462\Microsoft.Extensions.DependencyInjection.Abstractions.dll + + + ..\packages\Microsoft.Extensions.Logging.Abstractions.8.0.1\lib\net462\Microsoft.Extensions.Logging.Abstractions.dll + + + ..\packages\Spectre.Console.0.49.2-preview.0.4\lib\netstandard2.0\Spectre.Console.dll + + + + ..\packages\System.Buffers.4.5.1\lib\net461\System.Buffers.dll + + + + ..\packages\System.Memory.4.5.5\lib\net461\System.Memory.dll + + + + ..\packages\System.Numerics.Vectors.4.5.0\lib\net46\System.Numerics.Vectors.dll + + + ..\packages\System.Runtime.CompilerServices.Unsafe.6.0.0\lib\net461\System.Runtime.CompilerServices.Unsafe.dll + + + ..\packages\System.Text.Encodings.Web.8.0.0\lib\net462\System.Text.Encodings.Web.dll + + + ..\packages\System.Text.Json.8.0.3\lib\net462\System.Text.Json.dll + + + ..\packages\System.Threading.Tasks.Extensions.4.5.4\lib\net461\System.Threading.Tasks.Extensions.dll + + + ..\packages\System.ValueTuple.4.5.0\lib\net47\System.ValueTuple.dll + + + + + + + + + + + + + + + + + + + {01a12d68-de95-425e-aeee-2d099305036d} + LLamaSharp + + + + + + This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. + + + + + \ No newline at end of file diff --git a/NetStandardTest/Program.cs b/NetStandardTest/Program.cs new file mode 100644 index 000000000..e6f2980ba --- /dev/null +++ b/NetStandardTest/Program.cs @@ -0,0 +1,184 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using LLama.Native; +using LLama.Common; +using LLama; +using LLama.Abstractions; +using System.Text.RegularExpressions; +using Spectre.Console; + +namespace NetStandardTest +{ + public class Program + { + public static void Main(string[] args) + { + //Console.WriteLine($"AppContext.BaseDirectory: {AppContext.BaseDirectory}"); + //var showLLamaCppLogs = true; + //NativeLibraryConfig + // .All + // .WithLogCallback((level, message) => + // { + // if (showLLamaCppLogs) + // Console.WriteLine($"[llama {level}]: {message.TrimEnd('\n')}"); + // }); + + //// Configure native library to use. This must be done before any other llama.cpp methods are called! + //NativeLibraryConfig + // .All + // .WithCuda(); + // //.WithAutoDownload() // An experimental feature + // //.DryRun(out var loadedllamaLibrary, out var loadedLLavaLibrary); + + //// Calling this method forces loading to occur now. + //NativeApi.llama_empty_call(); + + //string modelPath = @"D:\development\llama\weights\Wizard-Vicuna-7B-Uncensored.Q4_K_M.gguf"; + + //var prompt = File.ReadAllText(@"D:\development\llama\native\LLamaSharp\LLama.Examples\Assets\chat-with-bob.txt").Trim(); + + //var parameters = new ModelParams(modelPath) + //{ + // Seed = 1337, + // GpuLayerCount = 5 + //}; + //var model = LLamaWeights.LoadFromFile(parameters); + //var context = model.CreateContext(parameters); + //var ex = new InteractiveExecutor(context); + + //Console.ForegroundColor = ConsoleColor.Yellow; + //Console.WriteLine("The executor has been enabled. In this example, the prompt is printed, the maximum tokens is set to 128 and the context size is 256. (an example for small scale usage)"); + //Console.ForegroundColor = ConsoleColor.White; + + //Console.Write(prompt); + + //var inferenceParams = new InferenceParams() { Temperature = 0.6f, AntiPrompts = new List { "User:" }, MaxTokens = 128 }; + + Run().Wait(); + } + + private static async Task Run() + { + string multiModalProj = @"D:\development\llama\weights\llava-v1.5\llava-v1.5-7b-mmproj-Q4_0.gguf"; + string modelPath = @"D:\development\llama\weights\llava-v1.5\llava-v1.5-7b-Q4_K.gguf"; + string modelImage = @"C:\Users\liu_y\Pictures\avatar\df84c46bddf845a0c12f56a64409b184310669994.jpg"; + const int maxTokens = 1024; + + var prompt = $"{{{modelImage}}}\nUSER:\nProvide a full description of the image.\nASSISTANT:\n"; + + var parameters = new ModelParams(modelPath); + + using var model = await LLamaWeights.LoadFromFileAsync(parameters); + using var context = model.CreateContext(parameters); + + // Llava Init + using var clipModel = await LLavaWeights.LoadFromFileAsync(multiModalProj); + + var ex = new InteractiveExecutor(context, clipModel); + + Console.ForegroundColor = ConsoleColor.Yellow; + Console.WriteLine("The executor has been enabled. In this example, the prompt is printed, the maximum tokens is set to {0} and the context size is {1}.", maxTokens, parameters.ContextSize); + Console.WriteLine("To send an image, enter its filename in curly braces, like this {c:/image.jpg}."); + + var inferenceParams = new InferenceParams() { Temperature = 0.1f, AntiPrompts = new List { "\nUSER:" }, MaxTokens = maxTokens }; + + do + { + // Evaluate if we have images + // + //var imageMatches = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Value); + var imageCount = 1; + var hasImages = imageCount > 0; + + if (hasImages) + { + List imagePathsWithCurlyBraces = new(); + foreach(var match in Regex.Matches(prompt, "{([^}]*)}")) + { + imagePathsWithCurlyBraces.Add(((Match)match).Value); + } + List imagePaths = new(); + foreach (var match in Regex.Matches(prompt, "{([^}]*)}")) + { + imagePaths.Add(((Match)match).Groups[1].Value); + } + + List imageBytes; + try + { + imageBytes = imagePaths.Select(File.ReadAllBytes).ToList(); + } + catch (IOException exception) + { + Console.ForegroundColor = ConsoleColor.Red; + Console.Write( + $"Could not load your {(imageCount == 1 ? "image" : "images")}:"); + Console.Write($"{exception.Message}"); + Console.ForegroundColor = ConsoleColor.Yellow; + Console.WriteLine("Please try again."); + break; + } + + // Each prompt with images we clear cache + // When the prompt contains images we clear KV_CACHE to restart conversation + // See: + // https://github.com/ggerganov/llama.cpp/discussions/3620 + ex.Context.NativeHandle.KvCacheRemove(LLamaSeqId.Zero, -1, -1); + + int index = 0; + foreach (var path in imagePathsWithCurlyBraces) + { + // First image replace to tag " : ""); + } + + + Console.ForegroundColor = ConsoleColor.Yellow; + Console.WriteLine($"Here are the images, that are sent to the chat model in addition to your message."); + Console.WriteLine(); + + //foreach (var consoleImage in imageBytes?.Select(bytes => new CanvasImage(bytes))) + //{ + // consoleImage.MaxWidth = 50; + // AnsiConsole.Write(consoleImage); + //} + + Console.WriteLine(); + Console.ForegroundColor = ConsoleColor.Yellow; + Console.WriteLine($"The images were scaled down for the console only, the model gets full versions."); + Console.WriteLine($"Write /exit or press Ctrl+c to return to main menu."); + Console.WriteLine(); + + + // Initialize Images in executor + // + foreach (var image in imagePaths) + { + ex.Images.Add(File.ReadAllBytes(image)); + } + } + + Console.ForegroundColor = Color.White; + await foreach (var text in ex.InferAsync(prompt, inferenceParams)) + { + Console.Write(text); + } + Console.Write(" "); + Console.ForegroundColor = ConsoleColor.Green; + prompt = Console.ReadLine(); + Console.WriteLine(); + + // let the user finish with exit + // + if (prompt != null && prompt.Equals("/exit", StringComparison.OrdinalIgnoreCase)) + break; + + } + while (true); + } + } +} diff --git a/NetStandardTest/Properties/AssemblyInfo.cs b/NetStandardTest/Properties/AssemblyInfo.cs new file mode 100644 index 000000000..831246fe8 --- /dev/null +++ b/NetStandardTest/Properties/AssemblyInfo.cs @@ -0,0 +1,36 @@ +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +[assembly: AssemblyTitle("NetStandardTest")] +[assembly: AssemblyDescription("")] +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyCompany("")] +[assembly: AssemblyProduct("NetStandardTest")] +[assembly: AssemblyCopyright("Copyright © 2024")] +[assembly: AssemblyTrademark("")] +[assembly: AssemblyCulture("")] + +// Setting ComVisible to false makes the types in this assembly not visible +// to COM components. If you need to access a type in this assembly from +// COM, set the ComVisible attribute to true on that type. +[assembly: ComVisible(false)] + +// The following GUID is for the ID of the typelib if this project is exposed to COM +[assembly: Guid("39274239-7bba-4589-821f-649e2e9f1954")] + +// Version information for an assembly consists of the following four values: +// +// Major Version +// Minor Version +// Build Number +// Revision +// +// You can specify all the values or you can default the Build and Revision Numbers +// by using the '*' as shown below: +// [assembly: AssemblyVersion("1.0.*")] +[assembly: AssemblyVersion("1.0.0.0")] +[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/NetStandardTest/Properties/Settings.Designer.cs b/NetStandardTest/Properties/Settings.Designer.cs new file mode 100644 index 000000000..2689d7b68 --- /dev/null +++ b/NetStandardTest/Properties/Settings.Designer.cs @@ -0,0 +1,26 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Runtime Version:4.0.30319.42000 +// +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +//------------------------------------------------------------------------------ + +namespace NetStandardTest.Properties { + + + [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()] + [global::System.CodeDom.Compiler.GeneratedCodeAttribute("Microsoft.VisualStudio.Editors.SettingsDesigner.SettingsSingleFileGenerator", "17.8.0.0")] + internal sealed partial class Settings : global::System.Configuration.ApplicationSettingsBase { + + private static Settings defaultInstance = ((Settings)(global::System.Configuration.ApplicationSettingsBase.Synchronized(new Settings()))); + + public static Settings Default { + get { + return defaultInstance; + } + } + } +} diff --git a/NetStandardTest/Properties/Settings.settings b/NetStandardTest/Properties/Settings.settings new file mode 100644 index 000000000..049245f40 --- /dev/null +++ b/NetStandardTest/Properties/Settings.settings @@ -0,0 +1,6 @@ + + + + + + diff --git a/NetStandardTest/packages.config b/NetStandardTest/packages.config new file mode 100644 index 000000000..c48694eaf --- /dev/null +++ b/NetStandardTest/packages.config @@ -0,0 +1,17 @@ + + + + + + + + + + + + + + + + + \ No newline at end of file From 13c5ac052f06e4e29707d161986597793f166e6d Mon Sep 17 00:00:00 2001 From: Rinne Date: Tue, 14 May 2024 07:00:44 +0800 Subject: [PATCH 09/11] fix: linux error. --- LLama/Native/Load/NativeLibraryUtils.cs | 7 +- LLamaSharp.sln | 14 ++- NetAppTest/NetAppTest.csproj | 22 ++++ {NetStandardTest => NetAppTest}/Program.cs | 46 ++++---- NetStandardTest/App.config | 14 --- NetStandardTest/NetStandardTest.csproj | 109 ------------------ NetStandardTest/Properties/AssemblyInfo.cs | 36 ------ .../Properties/Settings.Designer.cs | 26 ----- NetStandardTest/Properties/Settings.settings | 6 - NetStandardTest/packages.config | 17 --- 10 files changed, 63 insertions(+), 234 deletions(-) create mode 100644 NetAppTest/NetAppTest.csproj rename {NetStandardTest => NetAppTest}/Program.cs (84%) delete mode 100644 NetStandardTest/App.config delete mode 100644 NetStandardTest/NetStandardTest.csproj delete mode 100644 NetStandardTest/Properties/AssemblyInfo.cs delete mode 100644 NetStandardTest/Properties/Settings.Designer.cs delete mode 100644 NetStandardTest/Properties/Settings.settings delete mode 100644 NetStandardTest/packages.config diff --git a/LLama/Native/Load/NativeLibraryUtils.cs b/LLama/Native/Load/NativeLibraryUtils.cs index 478796f01..c27ed23f1 100644 --- a/LLama/Native/Load/NativeLibraryUtils.cs +++ b/LLama/Native/Load/NativeLibraryUtils.cs @@ -94,12 +94,15 @@ static IntPtr TryLoad(string path, IEnumerable searchDirectories, Native // Try to find the given file in any of the possible search paths private static IEnumerable TryFindPaths(string filename, IEnumerable searchDirectories) - { +{ +#if !NETSTANDARD + // avoid return relative path under .NET standard2.0. yield return filename; +#endif // Try the configured search directories in the configuration foreach (var path in searchDirectories) { - var candidate = Path.Combine(path, filename); + var candidate = Path.GetFullPath(Path.Combine(path, filename)); if (File.Exists(candidate)) yield return candidate; } diff --git a/LLamaSharp.sln b/LLamaSharp.sln index 9ee80bd92..0bf144ac1 100644 --- a/LLamaSharp.sln +++ b/LLamaSharp.sln @@ -19,7 +19,7 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LLamaSharp.KernelMemory", " EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LLama.Benchmark", "LLama.Benchmark\LLama.Benchmark.csproj", "{90D38FEE-68EA-459E-A4EE-268B9DFA1CD5}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "NetStandardTest", "NetStandardTest\NetStandardTest.csproj", "{39274239-7BBA-4589-821F-649E2E9F1954}" +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "NetAppTest", "NetAppTest\NetAppTest.csproj", "{39FC0DF3-CA3A-4448-AAEA-672752BA8B44}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -139,6 +139,18 @@ Global {39274239-7BBA-4589-821F-649E2E9F1954}.Release|Any CPU.Build.0 = Release|Any CPU {39274239-7BBA-4589-821F-649E2E9F1954}.Release|x64.ActiveCfg = Release|Any CPU {39274239-7BBA-4589-821F-649E2E9F1954}.Release|x64.Build.0 = Release|Any CPU + {39FC0DF3-CA3A-4448-AAEA-672752BA8B44}.Debug|Any CPU.ActiveCfg = Debug|x64 + {39FC0DF3-CA3A-4448-AAEA-672752BA8B44}.Debug|Any CPU.Build.0 = Debug|x64 + {39FC0DF3-CA3A-4448-AAEA-672752BA8B44}.Debug|x64.ActiveCfg = Debug|x64 + {39FC0DF3-CA3A-4448-AAEA-672752BA8B44}.Debug|x64.Build.0 = Debug|x64 + {39FC0DF3-CA3A-4448-AAEA-672752BA8B44}.GPU|Any CPU.ActiveCfg = Debug|x64 + {39FC0DF3-CA3A-4448-AAEA-672752BA8B44}.GPU|Any CPU.Build.0 = Debug|x64 + {39FC0DF3-CA3A-4448-AAEA-672752BA8B44}.GPU|x64.ActiveCfg = Debug|x64 + {39FC0DF3-CA3A-4448-AAEA-672752BA8B44}.GPU|x64.Build.0 = Debug|x64 + {39FC0DF3-CA3A-4448-AAEA-672752BA8B44}.Release|Any CPU.ActiveCfg = Release|x64 + {39FC0DF3-CA3A-4448-AAEA-672752BA8B44}.Release|Any CPU.Build.0 = Release|x64 + {39FC0DF3-CA3A-4448-AAEA-672752BA8B44}.Release|x64.ActiveCfg = Release|x64 + {39FC0DF3-CA3A-4448-AAEA-672752BA8B44}.Release|x64.Build.0 = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/NetAppTest/NetAppTest.csproj b/NetAppTest/NetAppTest.csproj new file mode 100644 index 000000000..d485087f7 --- /dev/null +++ b/NetAppTest/NetAppTest.csproj @@ -0,0 +1,22 @@ + + + + Exe + net8.0 + enable + enable + x64 + + + + + + + + + + + + + + diff --git a/NetStandardTest/Program.cs b/NetAppTest/Program.cs similarity index 84% rename from NetStandardTest/Program.cs rename to NetAppTest/Program.cs index e6f2980ba..314a63b6a 100644 --- a/NetStandardTest/Program.cs +++ b/NetAppTest/Program.cs @@ -1,4 +1,4 @@ -using System; +using System; using System.Collections.Generic; using System.IO; using System.Linq; @@ -17,25 +17,25 @@ public class Program { public static void Main(string[] args) { - //Console.WriteLine($"AppContext.BaseDirectory: {AppContext.BaseDirectory}"); - //var showLLamaCppLogs = true; - //NativeLibraryConfig - // .All - // .WithLogCallback((level, message) => - // { - // if (showLLamaCppLogs) - // Console.WriteLine($"[llama {level}]: {message.TrimEnd('\n')}"); - // }); - - //// Configure native library to use. This must be done before any other llama.cpp methods are called! - //NativeLibraryConfig - // .All - // .WithCuda(); - // //.WithAutoDownload() // An experimental feature - // //.DryRun(out var loadedllamaLibrary, out var loadedLLavaLibrary); - - //// Calling this method forces loading to occur now. - //NativeApi.llama_empty_call(); + Console.WriteLine($"AppContext.BaseDirectory: {AppContext.BaseDirectory}"); + var showLLamaCppLogs = true; + NativeLibraryConfig + .All + .WithLogCallback((level, message) => + { + if (showLLamaCppLogs) + Console.WriteLine($"[llama {level}]: {message.TrimEnd('\n')}"); + }); + + // Configure native library to use. This must be done before any other llama.cpp methods are called! + NativeLibraryConfig + .All + .WithCuda(); + //.WithAutoDownload() // An experimental feature + //.DryRun(out var loadedllamaLibrary, out var loadedLLavaLibrary); + + // Calling this method forces loading to occur now. + NativeApi.llama_empty_call(); //string modelPath = @"D:\development\llama\weights\Wizard-Vicuna-7B-Uncensored.Q4_K_M.gguf"; @@ -63,9 +63,9 @@ public static void Main(string[] args) private static async Task Run() { - string multiModalProj = @"D:\development\llama\weights\llava-v1.5\llava-v1.5-7b-mmproj-Q4_0.gguf"; - string modelPath = @"D:\development\llama\weights\llava-v1.5\llava-v1.5-7b-Q4_K.gguf"; - string modelImage = @"C:\Users\liu_y\Pictures\avatar\df84c46bddf845a0c12f56a64409b184310669994.jpg"; + string multiModalProj = @"/home/rinne/models/llava-v1.5-7b-mmproj-Q4_0.gguf"; + string modelPath = @"/home/rinne/models/llava-v1.5-7b-Q4_K.gguf"; + string modelImage = @"/home/rinne/code/forks/LLamaSharp/Assets/LLamaSharp-Integrations.png"; const int maxTokens = 1024; var prompt = $"{{{modelImage}}}\nUSER:\nProvide a full description of the image.\nASSISTANT:\n"; diff --git a/NetStandardTest/App.config b/NetStandardTest/App.config deleted file mode 100644 index 64dfa54bd..000000000 --- a/NetStandardTest/App.config +++ /dev/null @@ -1,14 +0,0 @@ - - - - - - - - - - - - - - \ No newline at end of file diff --git a/NetStandardTest/NetStandardTest.csproj b/NetStandardTest/NetStandardTest.csproj deleted file mode 100644 index 0e76ef5f7..000000000 --- a/NetStandardTest/NetStandardTest.csproj +++ /dev/null @@ -1,109 +0,0 @@ - - - - - - - Debug - AnyCPU - {39274239-7BBA-4589-821F-649E2E9F1954} - Exe - NetStandardTest - NetStandardTest - v4.8.1 - 512 - true - true - - - 9 - - - x64 - true - full - false - bin\Debug\ - DEBUG;TRACE - prompt - 4 - - - AnyCPU - pdbonly - true - bin\Release\ - TRACE - prompt - 4 - - - - ..\packages\Microsoft.Bcl.AsyncInterfaces.8.0.0\lib\net462\Microsoft.Bcl.AsyncInterfaces.dll - - - ..\packages\Microsoft.Extensions.DependencyInjection.Abstractions.8.0.1\lib\net462\Microsoft.Extensions.DependencyInjection.Abstractions.dll - - - ..\packages\Microsoft.Extensions.Logging.Abstractions.8.0.1\lib\net462\Microsoft.Extensions.Logging.Abstractions.dll - - - ..\packages\Spectre.Console.0.49.2-preview.0.4\lib\netstandard2.0\Spectre.Console.dll - - - - ..\packages\System.Buffers.4.5.1\lib\net461\System.Buffers.dll - - - - ..\packages\System.Memory.4.5.5\lib\net461\System.Memory.dll - - - - ..\packages\System.Numerics.Vectors.4.5.0\lib\net46\System.Numerics.Vectors.dll - - - ..\packages\System.Runtime.CompilerServices.Unsafe.6.0.0\lib\net461\System.Runtime.CompilerServices.Unsafe.dll - - - ..\packages\System.Text.Encodings.Web.8.0.0\lib\net462\System.Text.Encodings.Web.dll - - - ..\packages\System.Text.Json.8.0.3\lib\net462\System.Text.Json.dll - - - ..\packages\System.Threading.Tasks.Extensions.4.5.4\lib\net461\System.Threading.Tasks.Extensions.dll - - - ..\packages\System.ValueTuple.4.5.0\lib\net47\System.ValueTuple.dll - - - - - - - - - - - - - - - - - - - {01a12d68-de95-425e-aeee-2d099305036d} - LLamaSharp - - - - - - This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. - - - - - \ No newline at end of file diff --git a/NetStandardTest/Properties/AssemblyInfo.cs b/NetStandardTest/Properties/AssemblyInfo.cs deleted file mode 100644 index 831246fe8..000000000 --- a/NetStandardTest/Properties/AssemblyInfo.cs +++ /dev/null @@ -1,36 +0,0 @@ -using System.Reflection; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; - -// General Information about an assembly is controlled through the following -// set of attributes. Change these attribute values to modify the information -// associated with an assembly. -[assembly: AssemblyTitle("NetStandardTest")] -[assembly: AssemblyDescription("")] -[assembly: AssemblyConfiguration("")] -[assembly: AssemblyCompany("")] -[assembly: AssemblyProduct("NetStandardTest")] -[assembly: AssemblyCopyright("Copyright © 2024")] -[assembly: AssemblyTrademark("")] -[assembly: AssemblyCulture("")] - -// Setting ComVisible to false makes the types in this assembly not visible -// to COM components. If you need to access a type in this assembly from -// COM, set the ComVisible attribute to true on that type. -[assembly: ComVisible(false)] - -// The following GUID is for the ID of the typelib if this project is exposed to COM -[assembly: Guid("39274239-7bba-4589-821f-649e2e9f1954")] - -// Version information for an assembly consists of the following four values: -// -// Major Version -// Minor Version -// Build Number -// Revision -// -// You can specify all the values or you can default the Build and Revision Numbers -// by using the '*' as shown below: -// [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("1.0.0.0")] -[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/NetStandardTest/Properties/Settings.Designer.cs b/NetStandardTest/Properties/Settings.Designer.cs deleted file mode 100644 index 2689d7b68..000000000 --- a/NetStandardTest/Properties/Settings.Designer.cs +++ /dev/null @@ -1,26 +0,0 @@ -//------------------------------------------------------------------------------ -// -// This code was generated by a tool. -// Runtime Version:4.0.30319.42000 -// -// Changes to this file may cause incorrect behavior and will be lost if -// the code is regenerated. -// -//------------------------------------------------------------------------------ - -namespace NetStandardTest.Properties { - - - [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()] - [global::System.CodeDom.Compiler.GeneratedCodeAttribute("Microsoft.VisualStudio.Editors.SettingsDesigner.SettingsSingleFileGenerator", "17.8.0.0")] - internal sealed partial class Settings : global::System.Configuration.ApplicationSettingsBase { - - private static Settings defaultInstance = ((Settings)(global::System.Configuration.ApplicationSettingsBase.Synchronized(new Settings()))); - - public static Settings Default { - get { - return defaultInstance; - } - } - } -} diff --git a/NetStandardTest/Properties/Settings.settings b/NetStandardTest/Properties/Settings.settings deleted file mode 100644 index 049245f40..000000000 --- a/NetStandardTest/Properties/Settings.settings +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - diff --git a/NetStandardTest/packages.config b/NetStandardTest/packages.config deleted file mode 100644 index c48694eaf..000000000 --- a/NetStandardTest/packages.config +++ /dev/null @@ -1,17 +0,0 @@ - - - - - - - - - - - - - - - - - \ No newline at end of file From 6e48df12dc6b9994c03bb2a07dcdfe924806d1bf Mon Sep 17 00:00:00 2001 From: Rinne Date: Tue, 14 May 2024 07:01:18 +0800 Subject: [PATCH 10/11] upgrade dependency version. --- LLama/LLamaSharp.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LLama/LLamaSharp.csproj b/LLama/LLamaSharp.csproj index e4797a187..c1dbe9f5c 100644 --- a/LLama/LLamaSharp.csproj +++ b/LLama/LLamaSharp.csproj @@ -42,7 +42,7 @@ - + From 510cd2737c2960f58f4c46fc7cbf90d6cf750c3b Mon Sep 17 00:00:00 2001 From: Rinne Date: Tue, 14 May 2024 07:22:09 +0800 Subject: [PATCH 11/11] feat: optimize some imlementations. --- LLama.Examples/Program.cs | 6 +++--- LLama/Native/Load/NativeLibraryConfig.cs | 12 +++++++----- NetAppTest/NetAppTest.csproj | 3 ++- NetAppTest/Program.cs | 9 ++++----- 4 files changed, 16 insertions(+), 14 deletions(-) diff --git a/LLama.Examples/Program.cs b/LLama.Examples/Program.cs index 63114120d..00de7d2ae 100644 --- a/LLama.Examples/Program.cs +++ b/LLama.Examples/Program.cs @@ -1,4 +1,4 @@ -using LLama.Native; +using LLama.Native; using Spectre.Console; using System.Runtime.InteropServices; @@ -30,9 +30,9 @@ __ __ ____ __ // Configure native library to use. This must be done before any other llama.cpp methods are called! NativeLibraryConfig .All - .WithCuda() + .WithCuda(); //.WithAutoDownload() // An experimental feature - .DryRun(out var loadedllamaLibrary, out var loadedLLavaLibrary); + //.DryRun(out var loadedllamaLibrary, out var loadedLLavaLibrary); // Calling this method forces loading to occur now. NativeApi.llama_empty_call(); diff --git a/LLama/Native/Load/NativeLibraryConfig.cs b/LLama/Native/Load/NativeLibraryConfig.cs index f6a6079d5..bfb5736a8 100644 --- a/LLama/Native/Load/NativeLibraryConfig.cs +++ b/LLama/Native/Load/NativeLibraryConfig.cs @@ -386,14 +386,16 @@ public NativeLibraryConfig WithLogCallback(ILogger? logger) /// You can still modify the configuration after this calling but only before any call from . /// /// - /// The loaded livrary. When the loading failed, this will be null. - /// However if you are using .NET standard2.0, this will never return null. + /// The loaded library. When the loading failed, it will be null. + /// + /// + /// The path of the loaded library. When the loading failed, it will be null. /// /// Whether the running is successful. - public bool DryRun(out INativeLibrary? loadedLibrary) + public bool DryRun(out INativeLibrary? loadedLibrary, out string? libraryPath) { LogCallback?.Invoke(LLamaLogLevel.Debug, $"Beginning dry run for {this.NativeLibraryName.GetLibraryName()}..."); - return NativeLibraryUtils.TryLoadLibrary(this, out loadedLibrary, out var _) != IntPtr.Zero; + return NativeLibraryUtils.TryLoadLibrary(this, out loadedLibrary, out libraryPath) != IntPtr.Zero; } #endif } @@ -588,7 +590,7 @@ public bool DryRun(out INativeLibrary? loadedLLamaNativeLibrary, out INativeLibr bool success = true; foreach(var config in _configs) { - success &= config.DryRun(out var loadedLibrary); + success &= config.DryRun(out var loadedLibrary, out var _); if(config.NativeLibraryName == NativeLibraryName.LLama) { loadedLLamaNativeLibrary = loadedLibrary; diff --git a/NetAppTest/NetAppTest.csproj b/NetAppTest/NetAppTest.csproj index d485087f7..c826261f9 100644 --- a/NetAppTest/NetAppTest.csproj +++ b/NetAppTest/NetAppTest.csproj @@ -1,4 +1,4 @@ - + Exe @@ -10,6 +10,7 @@ + diff --git a/NetAppTest/Program.cs b/NetAppTest/Program.cs index 314a63b6a..f773aa49e 100644 --- a/NetAppTest/Program.cs +++ b/NetAppTest/Program.cs @@ -1,4 +1,4 @@ -using System; +using System; using System.Collections.Generic; using System.IO; using System.Linq; @@ -17,7 +17,6 @@ public class Program { public static void Main(string[] args) { - Console.WriteLine($"AppContext.BaseDirectory: {AppContext.BaseDirectory}"); var showLLamaCppLogs = true; NativeLibraryConfig .All @@ -63,9 +62,9 @@ public static void Main(string[] args) private static async Task Run() { - string multiModalProj = @"/home/rinne/models/llava-v1.5-7b-mmproj-Q4_0.gguf"; - string modelPath = @"/home/rinne/models/llava-v1.5-7b-Q4_K.gguf"; - string modelImage = @"/home/rinne/code/forks/LLamaSharp/Assets/LLamaSharp-Integrations.png"; + string multiModalProj = @"/llava-v1.5-7b-mmproj-Q4_0.gguf"; + string modelPath = @"/llava-v1.5-7b-Q4_K.gguf"; + string modelImage = @".jpg"; const int maxTokens = 1024; var prompt = $"{{{modelImage}}}\nUSER:\nProvide a full description of the image.\nASSISTANT:\n";