From 8a60985363b537abdab9df8d32b59bb3d4c9ae8f Mon Sep 17 00:00:00 2001 From: Michael Lamothe Date: Sat, 4 Jan 2025 19:39:06 +1100 Subject: [PATCH] Upgraded to .NET 8 and made code style a little more internally consistent. (#1680) --- dotnet-examples/Common/Common.csproj | 2 +- dotnet-examples/Common/WaveHeader.cs | 281 +++++++++--------- .../keyword-spotting-from-files/Program.cs | 12 +- .../keyword-spotting-from-files.csproj | 2 +- .../Program.cs | 27 +- .../keyword-spotting-from-microphone.csproj | 2 +- .../offline-decode-files/Program.cs | 71 +++-- .../offline-decode-files.csproj | 2 +- .../offline-punctuation/Program.cs | 6 +- .../offline-punctuation.csproj | 2 +- .../offline-speaker-diarization/Program.cs | 13 +- .../offline-speaker-diarization.csproj | 2 +- dotnet-examples/offline-tts-play/Program.cs | 47 ++- .../offline-tts-play/offline-tts-play.csproj | 2 +- dotnet-examples/offline-tts/Program.cs | 37 ++- .../offline-tts/offline-tts.csproj | 2 +- .../online-decode-files/Program.cs | 44 ++- .../online-decode-files.csproj | 2 +- dotnet-examples/sherpa-onnx.sln | 8 +- .../speaker-identification/Program.cs | 26 +- .../speaker-identification.csproj | 2 +- .../Program.cs | 44 ++- .../speech-recognition-from-microphone.csproj | 2 +- .../spoken-language-identification/Program.cs | 5 +- .../spoken-language-identification.csproj | 2 +- .../streaming-hlg-decoding/Program.cs | 17 +- .../streaming-hlg-decoding.csproj | 2 +- .../Program.cs | 54 ++-- .../vad-non-streaming-asr-paraformer.csproj | 2 +- 29 files changed, 335 insertions(+), 385 deletions(-) diff --git a/dotnet-examples/Common/Common.csproj b/dotnet-examples/Common/Common.csproj index a9630614f..57c0ff743 100644 --- a/dotnet-examples/Common/Common.csproj +++ b/dotnet-examples/Common/Common.csproj @@ -1,7 +1,7 @@  - net6.0 + net8.0 true diff --git a/dotnet-examples/Common/WaveHeader.cs b/dotnet-examples/Common/WaveHeader.cs index 7d13b3553..0a6ca5284 100644 --- a/dotnet-examples/Common/WaveHeader.cs +++ b/dotnet-examples/Common/WaveHeader.cs @@ -4,171 +4,166 @@ using System.Runtime.InteropServices; -namespace SherpaOnnx -{ +namespace SherpaOnnx; - [StructLayout(LayoutKind.Sequential)] - public struct WaveHeader +[StructLayout(LayoutKind.Sequential)] +public struct WaveHeader +{ + public int ChunkID; + public int ChunkSize; + public int Format; + public int SubChunk1ID; + public int SubChunk1Size; + public short AudioFormat; + public short NumChannels; + public int SampleRate; + public int ByteRate; + public short BlockAlign; + public short BitsPerSample; + public int SubChunk2ID; + public int SubChunk2Size; + + public bool Validate() { - public Int32 ChunkID; - public Int32 ChunkSize; - public Int32 Format; - public Int32 SubChunk1ID; - public Int32 SubChunk1Size; - public Int16 AudioFormat; - public Int16 NumChannels; - public Int32 SampleRate; - public Int32 ByteRate; - public Int16 BlockAlign; - public Int16 BitsPerSample; - public Int32 SubChunk2ID; - public Int32 SubChunk2Size; - - public bool Validate() + if (ChunkID != 0x46464952) + { + Console.WriteLine($"Invalid chunk ID: 0x{ChunkID:X}. Expect 0x46464952"); + return false; + } + + // E V A W + if (Format != 0x45564157) + { + Console.WriteLine($"Invalid format: 0x{Format:X}. Expect 0x45564157"); + return false; + } + + // t m f + if (SubChunk1ID != 0x20746d66) + { + Console.WriteLine($"Invalid SubChunk1ID: 0x{SubChunk1ID:X}. Expect 0x20746d66"); + return false; + } + + if (SubChunk1Size != 16) + { + Console.WriteLine($"Invalid SubChunk1Size: {SubChunk1Size}. Expect 16"); + return false; + } + + if (AudioFormat != 1) + { + Console.WriteLine($"Invalid AudioFormat: {AudioFormat}. Expect 1"); + return false; + } + + if (NumChannels != 1) + { + Console.WriteLine($"Invalid NumChannels: {NumChannels}. Expect 1"); + return false; + } + + if (ByteRate != (SampleRate * NumChannels * BitsPerSample / 8)) + { + Console.WriteLine($"Invalid byte rate: {ByteRate}."); + return false; + } + + if (BlockAlign != (NumChannels * BitsPerSample / 8)) { - if (ChunkID != 0x46464952) - { - Console.WriteLine($"Invalid chunk ID: 0x{ChunkID:X}. Expect 0x46464952"); - return false; - } - - // E V A W - if (Format != 0x45564157) - { - Console.WriteLine($"Invalid format: 0x{Format:X}. Expect 0x45564157"); - return false; - } - - // t m f - if (SubChunk1ID != 0x20746d66) - { - Console.WriteLine($"Invalid SubChunk1ID: 0x{SubChunk1ID:X}. Expect 0x20746d66"); - return false; - } - - if (SubChunk1Size != 16) - { - Console.WriteLine($"Invalid SubChunk1Size: {SubChunk1Size}. Expect 16"); - return false; - } - - if (AudioFormat != 1) - { - Console.WriteLine($"Invalid AudioFormat: {AudioFormat}. Expect 1"); - return false; - } - - if (NumChannels != 1) - { - Console.WriteLine($"Invalid NumChannels: {NumChannels}. Expect 1"); - return false; - } - - if (ByteRate != (SampleRate * NumChannels * BitsPerSample / 8)) - { - Console.WriteLine($"Invalid byte rate: {ByteRate}."); - return false; - } - - if (BlockAlign != (NumChannels * BitsPerSample / 8)) - { - Console.WriteLine($"Invalid block align: {ByteRate}."); - return false; - } - - if (BitsPerSample != 16) - { // we support only 16 bits per sample - Console.WriteLine($"Invalid bits per sample: {BitsPerSample}. Expect 16"); - return false; - } - - return true; + Console.WriteLine($"Invalid block align: {ByteRate}."); + return false; } + + if (BitsPerSample != 16) + { // we support only 16 bits per sample + Console.WriteLine($"Invalid bits per sample: {BitsPerSample}. Expect 16"); + return false; + } + + return true; } +} - // It supports only 16-bit, single channel WAVE format. - // The sample rate can be any value. - public class WaveReader +// It supports only 16-bit, single channel WAVE format. +// The sample rate can be any value. +public class WaveReader +{ + public WaveReader(string fileName) { - public WaveReader(String fileName) + if (!File.Exists(fileName)) { - if (!File.Exists(fileName)) - { - throw new ApplicationException($"{fileName} does not exist!"); - } - - using (var stream = File.Open(fileName, FileMode.Open)) - { - using (var reader = new BinaryReader(stream)) - { - _header = ReadHeader(reader); - - if (!_header.Validate()) - { - throw new ApplicationException($"Invalid wave file ${fileName}"); - } - - SkipMetaData(reader); - - // now read samples - // _header.SubChunk2Size contains number of bytes in total. - // we assume each sample is of type int16 - byte[] buffer = reader.ReadBytes(_header.SubChunk2Size); - short[] samples_int16 = new short[_header.SubChunk2Size / 2]; - Buffer.BlockCopy(buffer, 0, samples_int16, 0, buffer.Length); - - _samples = new float[samples_int16.Length]; - - for (var i = 0; i < samples_int16.Length; ++i) - { - _samples[i] = samples_int16[i] / 32768.0F; - } - } - } + throw new ApplicationException($"{fileName} does not exist!"); } - private static WaveHeader ReadHeader(BinaryReader reader) - { - byte[] bytes = reader.ReadBytes(Marshal.SizeOf(typeof(WaveHeader))); + using var stream = File.Open(fileName, FileMode.Open); + using var reader = new BinaryReader(stream); - GCHandle handle = GCHandle.Alloc(bytes, GCHandleType.Pinned); - WaveHeader header = (WaveHeader)Marshal.PtrToStructure(handle.AddrOfPinnedObject(), typeof(WaveHeader))!; - handle.Free(); + _header = ReadHeader(reader); - return header; + if (!_header.Validate()) + { + throw new ApplicationException($"Invalid wave file ${fileName}"); } - private void SkipMetaData(BinaryReader reader) + SkipMetaData(reader); + + // now read samples + // _header.SubChunk2Size contains number of bytes in total. + // we assume each sample is of type int16 + var buffer = reader.ReadBytes(_header.SubChunk2Size); + var samples_int16 = new short[_header.SubChunk2Size / 2]; + Buffer.BlockCopy(buffer, 0, samples_int16, 0, buffer.Length); + + _samples = new float[samples_int16.Length]; + + for (var i = 0; i < samples_int16.Length; ++i) { - var bs = reader.BaseStream; - - Int32 subChunk2ID = _header.SubChunk2ID; - Int32 subChunk2Size = _header.SubChunk2Size; - - while (bs.Position != bs.Length && subChunk2ID != 0x61746164) - { - bs.Seek(subChunk2Size, SeekOrigin.Current); - subChunk2ID = reader.ReadInt32(); - subChunk2Size = reader.ReadInt32(); - } - _header.SubChunk2ID = subChunk2ID; - _header.SubChunk2Size = subChunk2Size; + _samples[i] = samples_int16[i] / 32768.0F; } + } - private WaveHeader _header; + private static WaveHeader ReadHeader(BinaryReader reader) + { + var bytes = reader.ReadBytes(Marshal.SizeOf(typeof(WaveHeader))); + + GCHandle handle = GCHandle.Alloc(bytes, GCHandleType.Pinned); + WaveHeader header = (WaveHeader)Marshal.PtrToStructure(handle.AddrOfPinnedObject(), typeof(WaveHeader))!; + handle.Free(); + + return header; + } - // Samples are normalized to the range [-1, 1] - private float[] _samples; + private void SkipMetaData(BinaryReader reader) + { + var bs = reader.BaseStream; - public int SampleRate => _header.SampleRate; - public float[] Samples => _samples; + var subChunk2ID = _header.SubChunk2ID; + var subChunk2Size = _header.SubChunk2Size; - public static void Test(String fileName) + while (bs.Position != bs.Length && subChunk2ID != 0x61746164) { - WaveReader reader = new WaveReader(fileName); - Console.WriteLine($"samples length: {reader.Samples.Length}"); - Console.WriteLine($"samples rate: {reader.SampleRate}"); + bs.Seek(subChunk2Size, SeekOrigin.Current); + subChunk2ID = reader.ReadInt32(); + subChunk2Size = reader.ReadInt32(); } + _header.SubChunk2ID = subChunk2ID; + _header.SubChunk2Size = subChunk2Size; } + private WaveHeader _header; + + // Samples are normalized to the range [-1, 1] + private float[] _samples; + + public int SampleRate => _header.SampleRate; + + public float[] Samples => _samples; + + public static void Test(string fileName) + { + WaveReader reader = new WaveReader(fileName); + Console.WriteLine($"samples length: {reader.Samples.Length}"); + Console.WriteLine($"samples rate: {reader.SampleRate}"); + } } diff --git a/dotnet-examples/keyword-spotting-from-files/Program.cs b/dotnet-examples/keyword-spotting-from-files/Program.cs index 2fea260d1..00ba3777a 100644 --- a/dotnet-examples/keyword-spotting-from-files/Program.cs +++ b/dotnet-examples/keyword-spotting-from-files/Program.cs @@ -13,8 +13,6 @@ // dotnet run using SherpaOnnx; -using System.Collections.Generic; -using System; class KeywordSpotterDemo { @@ -38,11 +36,11 @@ static void Main(string[] args) var filename = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav"; - WaveReader waveReader = new WaveReader(filename); + var waveReader = new WaveReader(filename); Console.WriteLine("----------Use pre-defined keywords----------"); - OnlineStream s = kws.CreateStream(); + var s = kws.CreateStream(); s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples); float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)]; @@ -53,7 +51,7 @@ static void Main(string[] args) { kws.Decode(s); var result = kws.GetResult(s); - if (result.Keyword != "") + if (result.Keyword != string.Empty) { Console.WriteLine("Detected: {0}", result.Keyword); } @@ -70,7 +68,7 @@ static void Main(string[] args) { kws.Decode(s); var result = kws.GetResult(s); - if (result.Keyword != "") + if (result.Keyword != string.Empty) { Console.WriteLine("Detected: {0}", result.Keyword); } @@ -89,7 +87,7 @@ static void Main(string[] args) { kws.Decode(s); var result = kws.GetResult(s); - if (result.Keyword != "") + if (result.Keyword != string.Empty) { Console.WriteLine("Detected: {0}", result.Keyword); } diff --git a/dotnet-examples/keyword-spotting-from-files/keyword-spotting-from-files.csproj b/dotnet-examples/keyword-spotting-from-files/keyword-spotting-from-files.csproj index 992f8e0e3..21b9d3ea5 100644 --- a/dotnet-examples/keyword-spotting-from-files/keyword-spotting-from-files.csproj +++ b/dotnet-examples/keyword-spotting-from-files/keyword-spotting-from-files.csproj @@ -2,7 +2,7 @@ Exe - net6.0 + net8.0 keyword_spotting_from_files enable enable diff --git a/dotnet-examples/keyword-spotting-from-microphone/Program.cs b/dotnet-examples/keyword-spotting-from-microphone/Program.cs index cb0c922f4..05d22aee0 100644 --- a/dotnet-examples/keyword-spotting-from-microphone/Program.cs +++ b/dotnet-examples/keyword-spotting-from-microphone/Program.cs @@ -12,12 +12,9 @@ // // dotnet run +using PortAudioSharp; using SherpaOnnx; -using System.Collections.Generic; using System.Runtime.InteropServices; -using System; - -using PortAudioSharp; class KeywordSpotterDemo { @@ -41,11 +38,11 @@ static void Main(string[] args) var filename = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav"; - WaveReader waveReader = new WaveReader(filename); + var waveReader = new WaveReader(filename); Console.WriteLine("----------Use pre-defined keywords----------"); - OnlineStream s = kws.CreateStream(); + var s = kws.CreateStream(); Console.WriteLine(PortAudio.VersionInfo.versionText); PortAudio.Initialize(); @@ -54,7 +51,7 @@ static void Main(string[] args) for (int i = 0; i != PortAudio.DeviceCount; ++i) { Console.WriteLine($" Device {i}"); - DeviceInfo deviceInfo = PortAudio.GetDeviceInfo(i); + var deviceInfo = PortAudio.GetDeviceInfo(i); Console.WriteLine($" Name: {deviceInfo.name}"); Console.WriteLine($" Max input channels: {deviceInfo.maxInputChannels}"); Console.WriteLine($" Default sample rate: {deviceInfo.defaultSampleRate}"); @@ -66,12 +63,12 @@ static void Main(string[] args) Environment.Exit(1); } - DeviceInfo info = PortAudio.GetDeviceInfo(deviceIndex); + var info = PortAudio.GetDeviceInfo(deviceIndex); Console.WriteLine(); Console.WriteLine($"Use default device {deviceIndex} ({info.name})"); - StreamParameters param = new StreamParameters(); + var param = new StreamParameters(); param.device = deviceIndex; param.channelCount = 1; param.sampleFormat = SampleFormat.Float32; @@ -79,21 +76,21 @@ static void Main(string[] args) param.hostApiSpecificStreamInfo = IntPtr.Zero; PortAudioSharp.Stream.Callback callback = (IntPtr input, IntPtr output, - UInt32 frameCount, + uint frameCount, ref StreamCallbackTimeInfo timeInfo, StreamCallbackFlags statusFlags, IntPtr userData ) => { - float[] samples = new float[frameCount]; - Marshal.Copy(input, samples, 0, (Int32)frameCount); + var samples = new float[frameCount]; + Marshal.Copy(input, samples, 0, (int)frameCount); s.AcceptWaveform(config.FeatConfig.SampleRate, samples); return StreamCallbackResult.Continue; }; - PortAudioSharp.Stream stream = new PortAudioSharp.Stream(inParams: param, outParams: null, sampleRate: config.FeatConfig.SampleRate, + var stream = new PortAudioSharp.Stream(inParams: param, outParams: null, sampleRate: config.FeatConfig.SampleRate, framesPerBuffer: 0, streamFlags: StreamFlags.ClipOff, callback: callback, @@ -113,15 +110,13 @@ IntPtr userData } var result = kws.GetResult(s); - if (result.Keyword != "") + if (result.Keyword != string.Empty) { Console.WriteLine("Detected: {0}", result.Keyword); } Thread.Sleep(200); // ms } - - PortAudio.Terminate(); } } diff --git a/dotnet-examples/keyword-spotting-from-microphone/keyword-spotting-from-microphone.csproj b/dotnet-examples/keyword-spotting-from-microphone/keyword-spotting-from-microphone.csproj index b3afae784..12415b81b 100644 --- a/dotnet-examples/keyword-spotting-from-microphone/keyword-spotting-from-microphone.csproj +++ b/dotnet-examples/keyword-spotting-from-microphone/keyword-spotting-from-microphone.csproj @@ -2,7 +2,7 @@ Exe - net6.0 + net8.0 keyword_spotting_from_microphone enable enable diff --git a/dotnet-examples/offline-decode-files/Program.cs b/dotnet-examples/offline-decode-files/Program.cs index d855da6f8..0d944e5a3 100644 --- a/dotnet-examples/offline-decode-files/Program.cs +++ b/dotnet-examples/offline-decode-files/Program.cs @@ -5,17 +5,14 @@ // Please refer to // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html // to download non-streaming models -using CommandLine.Text; using CommandLine; +using CommandLine.Text; using SherpaOnnx; -using System.Collections.Generic; -using System; class OfflineDecodeFiles { class Options { - [Option("sample-rate", Required = false, Default = 16000, HelpText = "Sample rate of the data used to train the model")] public int SampleRate { get; set; } = 16000; @@ -23,58 +20,58 @@ class Options public int FeatureDim { get; set; } = 80; [Option(Required = false, HelpText = "Path to tokens.txt")] - public string Tokens { get; set; } = ""; + public string Tokens { get; set; } = string.Empty; [Option(Required = false, Default = "", HelpText = "Path to transducer encoder.onnx. Used only for transducer models")] - public string Encoder { get; set; } = ""; + public string Encoder { get; set; } = string.Empty; [Option(Required = false, Default = "", HelpText = "Path to transducer decoder.onnx. Used only for transducer models")] - public string Decoder { get; set; } = ""; + public string Decoder { get; set; } = string.Empty; [Option(Required = false, Default = "", HelpText = "Path to transducer joiner.onnx. Used only for transducer models")] - public string Joiner { get; set; } = ""; + public string Joiner { get; set; } = string.Empty; [Option("model-type", Required = false, Default = "", HelpText = "model type")] - public string ModelType { get; set; } = ""; + public string ModelType { get; set; } = string.Empty; [Option("whisper-encoder", Required = false, Default = "", HelpText = "Path to whisper encoder.onnx. Used only for whisper models")] - public string WhisperEncoder { get; set; } = ""; + public string WhisperEncoder { get; set; } = string.Empty; [Option("whisper-decoder", Required = false, Default = "", HelpText = "Path to whisper decoder.onnx. Used only for whisper models")] - public string WhisperDecoder { get; set; } = ""; + public string WhisperDecoder { get; set; } = string.Empty; [Option("whisper-language", Required = false, Default = "", HelpText = "Language of the input file. Can be empty")] - public string WhisperLanguage { get; set; } = ""; + public string WhisperLanguage { get; set; } = string.Empty; [Option("whisper-task", Required = false, Default = "transcribe", HelpText = "transcribe or translate")] public string WhisperTask { get; set; } = "transcribe"; [Option("moonshine-preprocessor", Required = false, Default = "", HelpText = "Path to preprocess.onnx. Used only for Moonshine models")] - public string MoonshinePreprocessor { get; set; } = ""; + public string MoonshinePreprocessor { get; set; } = string.Empty; [Option("moonshine-encoder", Required = false, Default = "", HelpText = "Path to encode.onnx. Used only for Moonshine models")] - public string MoonshineEncoder { get; set; } = ""; + public string MoonshineEncoder { get; set; } = string.Empty; [Option("moonshine-uncached-decoder", Required = false, Default = "", HelpText = "Path to uncached_decode.onnx. Used only for Moonshine models")] - public string MoonshineUncachedDecoder { get; set; } = ""; + public string MoonshineUncachedDecoder { get; set; } = string.Empty; [Option("moonshine-cached-decoder", Required = false, Default = "", HelpText = "Path to cached_decode.onnx. Used only for Moonshine models")] - public string MoonshineCachedDecoder { get; set; } = ""; + public string MoonshineCachedDecoder { get; set; } = string.Empty; [Option("tdnn-model", Required = false, Default = "", HelpText = "Path to tdnn yesno model")] - public string TdnnModel { get; set; } = ""; + public string TdnnModel { get; set; } = string.Empty; [Option(Required = false, HelpText = "Path to model.onnx. Used only for paraformer models")] - public string Paraformer { get; set; } = ""; + public string Paraformer { get; set; } = string.Empty; [Option("nemo-ctc", Required = false, HelpText = "Path to model.onnx. Used only for NeMo CTC models")] - public string NeMoCtc { get; set; } = ""; + public string NeMoCtc { get; set; } = string.Empty; [Option("telespeech-ctc", Required = false, HelpText = "Path to model.onnx. Used only for TeleSpeech CTC models")] - public string TeleSpeechCtc { get; set; } = ""; + public string TeleSpeechCtc { get; set; } = string.Empty; [Option("sense-voice-model", Required = false, HelpText = "Path to model.onnx. Used only for SenseVoice CTC models")] - public string SenseVoiceModel { get; set; } = ""; + public string SenseVoiceModel { get; set; } = string.Empty; [Option("sense-voice-use-itn", Required = false, HelpText = "1 to use inverse text normalization for sense voice.")] public int SenseVoiceUseItn { get; set; } = 1; @@ -88,7 +85,7 @@ class Options [Option("rule-fsts", Required = false, Default = "", HelpText = "If not empty, path to rule fst for inverse text normalization")] - public string RuleFsts { get; set; } = ""; + public string RuleFsts { get; set; } = string.Empty; [Option("max-active-paths", Required = false, Default = 4, HelpText = @"Used only when --decoding--method is modified_beam_search. @@ -96,7 +93,7 @@ class Options public int MaxActivePaths { get; set; } = 4; [Option("hotwords-file", Required = false, Default = "", HelpText = "Path to hotwords.txt")] - public string HotwordsFile { get; set; } = ""; + public string HotwordsFile { get; set; } = string.Empty; [Option("hotwords-score", Required = false, Default = 1.5F, HelpText = "hotwords score")] public float HotwordsScore { get; set; } = 1.5F; @@ -117,7 +114,7 @@ static void Main(string[] args) private static void DisplayHelp(ParserResult result, IEnumerable errs) { - string usage = @" + var usage = @" # Zipformer dotnet run \ @@ -213,42 +210,42 @@ private static void Run(Options options) config.ModelConfig.Tokens = options.Tokens; - if (!String.IsNullOrEmpty(options.Encoder)) + if (!string.IsNullOrEmpty(options.Encoder)) { // this is a transducer model config.ModelConfig.Transducer.Encoder = options.Encoder; config.ModelConfig.Transducer.Decoder = options.Decoder; config.ModelConfig.Transducer.Joiner = options.Joiner; } - else if (!String.IsNullOrEmpty(options.Paraformer)) + else if (!string.IsNullOrEmpty(options.Paraformer)) { config.ModelConfig.Paraformer.Model = options.Paraformer; } - else if (!String.IsNullOrEmpty(options.NeMoCtc)) + else if (!string.IsNullOrEmpty(options.NeMoCtc)) { config.ModelConfig.NeMoCtc.Model = options.NeMoCtc; } - else if (!String.IsNullOrEmpty(options.TeleSpeechCtc)) + else if (!string.IsNullOrEmpty(options.TeleSpeechCtc)) { config.ModelConfig.TeleSpeechCtc = options.TeleSpeechCtc; } - else if (!String.IsNullOrEmpty(options.WhisperEncoder)) + else if (!string.IsNullOrEmpty(options.WhisperEncoder)) { config.ModelConfig.Whisper.Encoder = options.WhisperEncoder; config.ModelConfig.Whisper.Decoder = options.WhisperDecoder; config.ModelConfig.Whisper.Language = options.WhisperLanguage; config.ModelConfig.Whisper.Task = options.WhisperTask; } - else if (!String.IsNullOrEmpty(options.TdnnModel)) + else if (!string.IsNullOrEmpty(options.TdnnModel)) { config.ModelConfig.Tdnn.Model = options.TdnnModel; } - else if (!String.IsNullOrEmpty(options.SenseVoiceModel)) + else if (!string.IsNullOrEmpty(options.SenseVoiceModel)) { config.ModelConfig.SenseVoice.Model = options.SenseVoiceModel; config.ModelConfig.SenseVoice.UseInverseTextNormalization = options.SenseVoiceUseItn; } - else if (!String.IsNullOrEmpty(options.MoonshinePreprocessor)) + else if (!string.IsNullOrEmpty(options.MoonshinePreprocessor)) { config.ModelConfig.Moonshine.Preprocessor = options.MoonshinePreprocessor; config.ModelConfig.Moonshine.Encoder = options.MoonshineEncoder; @@ -270,17 +267,17 @@ private static void Run(Options options) config.ModelConfig.Debug = 0; - OfflineRecognizer recognizer = new OfflineRecognizer(config); + var recognizer = new OfflineRecognizer(config); - string[] files = options.Files.ToArray(); + var files = options.Files.ToArray(); // We create a separate stream for each file - List streams = new List(); + var streams = new List(); streams.EnsureCapacity(files.Length); for (int i = 0; i != files.Length; ++i) { - OfflineStream s = recognizer.CreateStream(); + var s = recognizer.CreateStream(); WaveReader waveReader = new WaveReader(files[i]); s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples); @@ -299,7 +296,7 @@ private static void Run(Options options) Console.WriteLine("Tokens: [{0}]", string.Join(", ", r.Tokens)); if (r.Timestamps != null && r.Timestamps.Length > 0) { Console.Write("Timestamps: ["); - var sep = ""; + var sep = string.Empty; for (int k = 0; k != r.Timestamps.Length; ++k) { Console.Write("{0}{1}", sep, r.Timestamps[k].ToString("0.00")); diff --git a/dotnet-examples/offline-decode-files/offline-decode-files.csproj b/dotnet-examples/offline-decode-files/offline-decode-files.csproj index ffdfb6ace..5b28d48b7 100644 --- a/dotnet-examples/offline-decode-files/offline-decode-files.csproj +++ b/dotnet-examples/offline-decode-files/offline-decode-files.csproj @@ -2,7 +2,7 @@ Exe - net6.0 + net8.0 offline_decode_files enable enable diff --git a/dotnet-examples/offline-punctuation/Program.cs b/dotnet-examples/offline-punctuation/Program.cs index d299f8abc..6f85237b6 100644 --- a/dotnet-examples/offline-punctuation/Program.cs +++ b/dotnet-examples/offline-punctuation/Program.cs @@ -12,8 +12,6 @@ // dotnet run using SherpaOnnx; -using System.Collections.Generic; -using System; class OfflinePunctuationDemo { @@ -25,14 +23,14 @@ static void Main(string[] args) config.Model.NumThreads = 1; var punct = new OfflinePunctuation(config); - string[] textList = new string[] { + var textList = new string[] { "这是一个测试你好吗How are you我很好thank you are you ok谢谢你", "我们都是木头人不会说话不会动", "The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry", }; Console.WriteLine("---------"); - foreach (string text in textList) + foreach (var text in textList) { string textWithPunct = punct.AddPunct(text); Console.WriteLine("Input text: {0}", text); diff --git a/dotnet-examples/offline-punctuation/offline-punctuation.csproj b/dotnet-examples/offline-punctuation/offline-punctuation.csproj index 2d94fcb38..0e3ee42a9 100644 --- a/dotnet-examples/offline-punctuation/offline-punctuation.csproj +++ b/dotnet-examples/offline-punctuation/offline-punctuation.csproj @@ -2,7 +2,7 @@ Exe - net6.0 + net8.0 offline_punctuation enable enable diff --git a/dotnet-examples/offline-speaker-diarization/Program.cs b/dotnet-examples/offline-speaker-diarization/Program.cs index 45316fe75..4d8d91b0e 100644 --- a/dotnet-examples/offline-speaker-diarization/Program.cs +++ b/dotnet-examples/offline-speaker-diarization/Program.cs @@ -34,7 +34,6 @@ dotnet run */ using SherpaOnnx; -using System; class OfflineSpeakerDiarizationDemo { @@ -54,7 +53,7 @@ static void Main(string[] args) var sd = new OfflineSpeakerDiarization(config); var testWaveFile = "./0-four-speakers-zh.wav"; - WaveReader waveReader = new WaveReader(testWaveFile); + var waveReader = new WaveReader(testWaveFile); if (sd.SampleRate != waveReader.SampleRate) { Console.WriteLine($"Expected sample rate: {sd.SampleRate}. Given: {waveReader.SampleRate}"); @@ -65,19 +64,19 @@ static void Main(string[] args) // var segments = sd.Process(waveReader.Samples); // this one is also ok - var MyProgressCallback = (int numProcessedChunks, int numTotalChunks, IntPtr arg) => + var progressCallback = (int numProcessedChunks, int numTotalChunks, IntPtr arg) => { - float progress = 100.0F * numProcessedChunks / numTotalChunks; - Console.WriteLine("Progress {0}%", String.Format("{0:0.00}", progress)); + var progress = 100.0F * numProcessedChunks / numTotalChunks; + Console.WriteLine("Progress {0}%", string.Format("{0:0.00}", progress)); return 0; }; - var callback = new OfflineSpeakerDiarizationProgressCallback(MyProgressCallback); + var callback = new OfflineSpeakerDiarizationProgressCallback(progressCallback); var segments = sd.ProcessWithCallback(waveReader.Samples, callback, IntPtr.Zero); foreach (var s in segments) { - Console.WriteLine("{0} -- {1} speaker_{2}", String.Format("{0:0.00}", s.Start), String.Format("{0:0.00}", s.End), s.Speaker); + Console.WriteLine("{0} -- {1} speaker_{2}", string.Format("{0:0.00}", s.Start), string.Format("{0:0.00}", s.End), s.Speaker); } } } diff --git a/dotnet-examples/offline-speaker-diarization/offline-speaker-diarization.csproj b/dotnet-examples/offline-speaker-diarization/offline-speaker-diarization.csproj index 3374dbca1..c7b15faa5 100644 --- a/dotnet-examples/offline-speaker-diarization/offline-speaker-diarization.csproj +++ b/dotnet-examples/offline-speaker-diarization/offline-speaker-diarization.csproj @@ -2,7 +2,7 @@ Exe - net6.0 + net8.0 offline_speaker_diarization enable enable diff --git a/dotnet-examples/offline-tts-play/Program.cs b/dotnet-examples/offline-tts-play/Program.cs index a142c127e..65eb22bf4 100644 --- a/dotnet-examples/offline-tts-play/Program.cs +++ b/dotnet-examples/offline-tts-play/Program.cs @@ -10,15 +10,12 @@ // Note that you need a speaker to run this file since it will play // the generated audio as it is generating. -using CommandLine.Text; using CommandLine; +using CommandLine.Text; using PortAudioSharp; using SherpaOnnx; using System.Collections.Concurrent; -using System.Collections.Generic; using System.Runtime.InteropServices; -using System.Threading; -using System; class OfflineTtsPlayDemo { @@ -26,13 +23,13 @@ class Options { [Option("tts-rule-fsts", Required = false, Default = "", HelpText = "path to rule.fst")] - public string RuleFsts { get; set; } + public string? RuleFsts { get; set; } [Option("vits-dict-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for jieba.")] - public string DictDir { get; set; } + public string? DictDir { get; set; } [Option("vits-data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")] - public string DataDir { get; set; } + public string? DataDir { get; set; } [Option("vits-length-scale", Required = false, Default = 1, HelpText = "speech speed. Larger->Slower; Smaller->faster")] public float LengthScale { get; set; } @@ -44,10 +41,10 @@ class Options public float NoiseScaleW { get; set; } [Option("vits-lexicon", Required = false, Default = "", HelpText = "Path to lexicon.txt")] - public string Lexicon { get; set; } + public string? Lexicon { get; set; } [Option("vits-tokens", Required = false, Default = "", HelpText = "Path to tokens.txt")] - public string Tokens { get; set; } + public string? Tokens { get; set; } [Option("tts-max-num-sentences", Required = false, Default = 1, HelpText = "Maximum number of sentences that we process at a time.")] public int MaxNumSentences { get; set; } @@ -56,16 +53,16 @@ class Options public int Debug { get; set; } [Option("vits-model", Required = true, HelpText = "Path to VITS model")] - public string Model { get; set; } + public string? Model { get; set; } [Option("sid", Required = false, Default = 0, HelpText = "Speaker ID")] public int SpeakerId { get; set; } [Option("text", Required = true, HelpText = "Text to synthesize")] - public string Text { get; set; } + public string? Text { get; set; } [Option("output-filename", Required = true, Default = "./generated.wav", HelpText = "Path to save the generated audio")] - public string OutputFilename { get; set; } + public string? OutputFilename { get; set; } } static void Main(string[] args) @@ -124,10 +121,9 @@ to download more models. Console.WriteLine(helpText); } - private static void Run(Options options) { - OfflineTtsConfig config = new OfflineTtsConfig(); + var config = new OfflineTtsConfig(); config.Model.Vits.Model = options.Model; config.Model.Vits.Lexicon = options.Lexicon; config.Model.Vits.Tokens = options.Tokens; @@ -142,10 +138,9 @@ private static void Run(Options options) config.RuleFsts = options.RuleFsts; config.MaxNumSentences = options.MaxNumSentences; - OfflineTts tts = new OfflineTts(config); - float speed = 1.0f / options.LengthScale; - int sid = options.SpeakerId; - + var tts = new OfflineTts(config); + var speed = 1.0f / options.LengthScale; + var sid = options.SpeakerId; Console.WriteLine(PortAudio.VersionInfo.versionText); PortAudio.Initialize(); @@ -166,11 +161,11 @@ private static void Run(Options options) Environment.Exit(1); } - DeviceInfo info = PortAudio.GetDeviceInfo(deviceIndex); + var info = PortAudio.GetDeviceInfo(deviceIndex); Console.WriteLine(); Console.WriteLine($"Use output default device {deviceIndex} ({info.name})"); - StreamParameters param = new StreamParameters(); + var param = new StreamParameters(); param.device = deviceIndex; param.channelCount = 1; param.sampleFormat = SampleFormat.Float32; @@ -178,7 +173,7 @@ private static void Run(Options options) param.hostApiSpecificStreamInfo = IntPtr.Zero; // https://learn.microsoft.com/en-us/dotnet/standard/collections/thread-safe/blockingcollection-overview - BlockingCollection dataItems = new BlockingCollection(); + var dataItems = new BlockingCollection(); var MyCallback = (IntPtr samples, int n) => { @@ -193,9 +188,9 @@ private static void Run(Options options) return 1; }; - bool playFinished = false; + var playFinished = false; - float[] lastSampleArray = null; + float[]? lastSampleArray = null; int lastIndex = 0; // not played PortAudioSharp.Stream.Callback playCallback = (IntPtr input, IntPtr output, @@ -270,10 +265,10 @@ IntPtr userData stream.Start(); - OfflineTtsCallback callback = new OfflineTtsCallback(MyCallback); + var callback = new OfflineTtsCallback(MyCallback); - OfflineTtsGeneratedAudio audio = tts.GenerateWithCallback(options.Text, speed, sid, callback); - bool ok = audio.SaveToWaveFile(options.OutputFilename); + var audio = tts.GenerateWithCallback(options.Text, speed, sid, callback); + var ok = audio.SaveToWaveFile(options.OutputFilename); if (ok) { diff --git a/dotnet-examples/offline-tts-play/offline-tts-play.csproj b/dotnet-examples/offline-tts-play/offline-tts-play.csproj index d28ae62c8..b777bcafe 100644 --- a/dotnet-examples/offline-tts-play/offline-tts-play.csproj +++ b/dotnet-examples/offline-tts-play/offline-tts-play.csproj @@ -2,7 +2,7 @@ Exe - net6.0 + net8.0 offline_tts_play enable enable diff --git a/dotnet-examples/offline-tts/Program.cs b/dotnet-examples/offline-tts/Program.cs index 6216095f4..f434ebf19 100644 --- a/dotnet-examples/offline-tts/Program.cs +++ b/dotnet-examples/offline-tts/Program.cs @@ -6,28 +6,25 @@ // and // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models // to download pre-trained models -using CommandLine.Text; using CommandLine; +using CommandLine.Text; using SherpaOnnx; -using System.Collections.Generic; -using System; class OfflineTtsDemo { class Options { - [Option("tts-rule-fsts", Required = false, Default = "", HelpText = "path to rule.fst")] - public string RuleFsts { get; set; } = ""; + public string RuleFsts { get; set; } = string.Empty; [Option("tts-rule-fars", Required = false, Default = "", HelpText = "path to rule.far")] - public string RuleFars { get; set; } = ""; + public string RuleFars { get; set; } = string.Empty; [Option("vits-dict-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for jieba.")] - public string DictDir { get; set; } = ""; + public string DictDir { get; set; } = string.Empty; [Option("vits-data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")] - public string DataDir { get; set; } = ""; + public string DataDir { get; set; } = string.Empty; [Option("vits-length-scale", Required = false, Default = 1, HelpText = "speech speed. Larger->Slower; Smaller->faster")] public float LengthScale { get; set; } = 1; @@ -39,10 +36,10 @@ class Options public float NoiseScaleW { get; set; } = 0.8F; [Option("vits-lexicon", Required = false, Default = "", HelpText = "Path to lexicon.txt")] - public string Lexicon { get; set; } = ""; + public string Lexicon { get; set; } = string.Empty; [Option("vits-tokens", Required = false, Default = "", HelpText = "Path to tokens.txt")] - public string Tokens { get; set; } = ""; + public string Tokens { get; set; } = string.Empty; [Option("tts-max-num-sentences", Required = false, Default = 1, HelpText = "Maximum number of sentences that we process at a time.")] public int MaxNumSentences { get; set; } = 1; @@ -51,13 +48,13 @@ class Options public int Debug { get; set; } = 0; [Option("vits-model", Required = true, HelpText = "Path to VITS model")] - public string Model { get; set; } = ""; + public string Model { get; set; } = string.Empty; [Option("sid", Required = false, Default = 0, HelpText = "Speaker ID")] public int SpeakerId { get; set; } = 0; [Option("text", Required = true, HelpText = "Text to synthesize")] - public string Text { get; set; } = ""; + public string Text { get; set; } = string.Empty; [Option("output-filename", Required = true, Default = "./generated.wav", HelpText = "Path to save the generated audio")] public string OutputFilename { get; set; } = "./generated.wav"; @@ -65,7 +62,7 @@ class Options static void Main(string[] args) { - var parser = new CommandLine.Parser(with => with.HelpWriter = null); + var parser = new Parser(with => with.HelpWriter = null); var parserResult = parser.ParseArguments(args); parserResult @@ -75,7 +72,7 @@ static void Main(string[] args) private static void DisplayHelp(ParserResult result, IEnumerable errs) { - string usage = @" + var usage = @" # vits-aishell3 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2 @@ -122,7 +119,7 @@ to download more models. private static void Run(Options options) { - OfflineTtsConfig config = new OfflineTtsConfig(); + var config = new OfflineTtsConfig(); config.Model.Vits.Model = options.Model; config.Model.Vits.Lexicon = options.Lexicon; config.Model.Vits.Tokens = options.Tokens; @@ -138,11 +135,11 @@ private static void Run(Options options) config.RuleFars = options.RuleFars; config.MaxNumSentences = options.MaxNumSentences; - OfflineTts tts = new OfflineTts(config); - float speed = 1.0f / options.LengthScale; - int sid = options.SpeakerId; - OfflineTtsGeneratedAudio audio = tts.Generate(options.Text, speed, sid); - bool ok = audio.SaveToWaveFile(options.OutputFilename); + var tts = new OfflineTts(config); + var speed = 1.0f / options.LengthScale; + var sid = options.SpeakerId; + var audio = tts.Generate(options.Text, speed, sid); + var ok = audio.SaveToWaveFile(options.OutputFilename); if (ok) { diff --git a/dotnet-examples/offline-tts/offline-tts.csproj b/dotnet-examples/offline-tts/offline-tts.csproj index 48548fc4c..20b048f19 100644 --- a/dotnet-examples/offline-tts/offline-tts.csproj +++ b/dotnet-examples/offline-tts/offline-tts.csproj @@ -2,7 +2,7 @@ Exe - net6.0 + net8.0 offline_tts enable enable diff --git a/dotnet-examples/online-decode-files/Program.cs b/dotnet-examples/online-decode-files/Program.cs index ad53624de..a1f01be57 100644 --- a/dotnet-examples/online-decode-files/Program.cs +++ b/dotnet-examples/online-decode-files/Program.cs @@ -6,40 +6,37 @@ // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html // to download streaming models -using CommandLine.Text; using CommandLine; +using CommandLine.Text; using SherpaOnnx; -using System.Collections.Generic; -using System.Linq; -using System; class OnlineDecodeFiles { class Options { [Option(Required = true, HelpText = "Path to tokens.txt")] - public string Tokens { get; set; } = ""; + public string Tokens { get; set; } = string.Empty; [Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")] - public string Provider { get; set; } = ""; + public string Provider { get; set; } = string.Empty; [Option(Required = false, HelpText = "Path to transducer encoder.onnx")] - public string Encoder { get; set; } = ""; + public string Encoder { get; set; } = string.Empty; [Option(Required = false, HelpText = "Path to transducer decoder.onnx")] - public string Decoder { get; set; } = ""; + public string Decoder { get; set; } = string.Empty; [Option(Required = false, HelpText = "Path to transducer joiner.onnx")] - public string Joiner { get; set; } = ""; + public string Joiner { get; set; } = string.Empty; [Option("paraformer-encoder", Required = false, HelpText = "Path to paraformer encoder.onnx")] - public string ParaformerEncoder { get; set; } = ""; + public string ParaformerEncoder { get; set; } = string.Empty; [Option("paraformer-decoder", Required = false, HelpText = "Path to paraformer decoder.onnx")] - public string ParaformerDecoder { get; set; } = ""; + public string ParaformerDecoder { get; set; } = string.Empty; [Option("zipformer2-ctc", Required = false, HelpText = "Path to zipformer2 CTC onnx model")] - public string Zipformer2Ctc { get; set; } = ""; + public string Zipformer2Ctc { get; set; } = string.Empty; [Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")] public int NumThreads { get; set; } = 1; @@ -80,15 +77,14 @@ larger than this value after something that is not blank has been decoded. Used public float Rule3MinUtteranceLength { get; set; } = 20.0F; [Option("hotwords-file", Required = false, Default = "", HelpText = "Path to hotwords.txt")] - public string HotwordsFile { get; set; } = ""; + public string HotwordsFile { get; set; } = string.Empty; [Option("hotwords-score", Required = false, Default = 1.5F, HelpText = "hotwords score")] public float HotwordsScore { get; set; } = 1.5F; [Option("rule-fsts", Required = false, Default = "", HelpText = "If not empty, path to rule fst for inverse text normalization")] - public string RuleFsts { get; set; } = ""; - + public string RuleFsts { get; set; } = string.Empty; [Option("files", Required = true, HelpText = "Audio files for decoding")] public IEnumerable Files { get; set; } = new string[] {}; @@ -162,7 +158,7 @@ to download pre-trained streaming models. private static void Run(Options options) { - OnlineRecognizerConfig config = new OnlineRecognizerConfig(); + var config = new OnlineRecognizerConfig(); config.FeatConfig.SampleRate = options.SampleRate; // All models from icefall using feature dim 80. @@ -194,22 +190,22 @@ private static void Run(Options options) config.HotwordsScore = options.HotwordsScore; config.RuleFsts = options.RuleFsts; - OnlineRecognizer recognizer = new OnlineRecognizer(config); + var recognizer = new OnlineRecognizer(config); - string[] files = options.Files.ToArray(); + var files = options.Files.ToArray(); // We create a separate stream for each file - List streams = new List(); + var streams = new List(); streams.EnsureCapacity(files.Length); for (int i = 0; i != files.Length; ++i) { - OnlineStream s = recognizer.CreateStream(); + var s = recognizer.CreateStream(); - WaveReader waveReader = new WaveReader(files[i]); + var waveReader = new WaveReader(files[i]); s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples); - float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)]; + var tailPadding = new float[(int)(waveReader.SampleRate * 0.3)]; s.AcceptWaveform(waveReader.SampleRate, tailPadding); s.InputFinished(); @@ -230,7 +226,7 @@ private static void Run(Options options) // display results for (int i = 0; i != files.Length; ++i) { - OnlineRecognizerResult r = recognizer.GetResult(streams[i]); + var r = recognizer.GetResult(streams[i]); var text = r.Text; var tokens = r.Tokens; Console.WriteLine("--------------------"); @@ -238,7 +234,7 @@ private static void Run(Options options) Console.WriteLine("text: {0}", text); Console.WriteLine("tokens: [{0}]", string.Join(", ", tokens)); Console.Write("timestamps: ["); - r.Timestamps.ToList().ForEach(i => Console.Write(String.Format("{0:0.00}", i) + ", ")); + r.Timestamps.ToList().ForEach(i => Console.Write(string.Format("{0:0.00}", i) + ", ")); Console.WriteLine("]"); } Console.WriteLine("--------------------"); diff --git a/dotnet-examples/online-decode-files/online-decode-files.csproj b/dotnet-examples/online-decode-files/online-decode-files.csproj index 0ff581102..f1cc3baa7 100644 --- a/dotnet-examples/online-decode-files/online-decode-files.csproj +++ b/dotnet-examples/online-decode-files/online-decode-files.csproj @@ -2,7 +2,7 @@ Exe - net6.0 + net8.0 online_decode_files enable enable diff --git a/dotnet-examples/sherpa-onnx.sln b/dotnet-examples/sherpa-onnx.sln index 0bff03f5c..1ebcdf464 100644 --- a/dotnet-examples/sherpa-onnx.sln +++ b/dotnet-examples/sherpa-onnx.sln @@ -29,9 +29,7 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "keyword-spotting-from-files EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "keyword-spotting-from-microphone", "keyword-spotting-from-microphone\keyword-spotting-from-microphone.csproj", "{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}" EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TTS", "TTS\TTS.csproj", "{DACE4A18-4FC8-4437-92BF-5A90BA81286C}" -EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-speaker-diarization", "offline-speaker-diarization\offline-speaker-diarization.csproj", "{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "offline-speaker-diarization", "offline-speaker-diarization\offline-speaker-diarization.csproj", "{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -91,10 +89,6 @@ Global {AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Debug|Any CPU.Build.0 = Debug|Any CPU {AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Release|Any CPU.ActiveCfg = Release|Any CPU {AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Release|Any CPU.Build.0 = Release|Any CPU - {DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Debug|Any CPU.Build.0 = Debug|Any CPU - {DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Release|Any CPU.ActiveCfg = Release|Any CPU - {DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Release|Any CPU.Build.0 = Release|Any CPU {D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Debug|Any CPU.Build.0 = Debug|Any CPU {D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Release|Any CPU.ActiveCfg = Release|Any CPU diff --git a/dotnet-examples/speaker-identification/Program.cs b/dotnet-examples/speaker-identification/Program.cs index aef53e851..20ac70390 100644 --- a/dotnet-examples/speaker-identification/Program.cs +++ b/dotnet-examples/speaker-identification/Program.cs @@ -16,20 +16,18 @@ // dotnet run using SherpaOnnx; -using System.Collections.Generic; -using System; class SpeakerIdentificationDemo { - public static float[] ComputeEmbedding(SpeakerEmbeddingExtractor extractor, String filename) + public static float[] ComputeEmbedding(SpeakerEmbeddingExtractor extractor, string filename) { - WaveReader reader = new WaveReader(filename); + var reader = new WaveReader(filename); - OnlineStream stream = extractor.CreateStream(); + var stream = extractor.CreateStream(); stream.AcceptWaveform(reader.SampleRate, reader.Samples); stream.InputFinished(); - float[] embedding = extractor.Compute(stream); + var embedding = extractor.Compute(stream); return embedding; } @@ -43,25 +41,25 @@ static void Main(string[] args) var manager = new SpeakerEmbeddingManager(extractor.Dim); - string[] spk1Files = + var spk1Files = new string[] { "./sr-data/enroll/fangjun-sr-1.wav", "./sr-data/enroll/fangjun-sr-2.wav", "./sr-data/enroll/fangjun-sr-3.wav", }; - float[][] spk1Vec = new float[spk1Files.Length][]; + var spk1Vec = new float[spk1Files.Length][]; for (int i = 0; i < spk1Files.Length; ++i) { spk1Vec[i] = ComputeEmbedding(extractor, spk1Files[i]); } - string[] spk2Files = + var spk2Files = new string[] { "./sr-data/enroll/leijun-sr-1.wav", "./sr-data/enroll/leijun-sr-2.wav", }; - float[][] spk2Vec = new float[spk2Files.Length][]; + var spk2Vec = new float[spk2Files.Length][]; for (int i = 0; i < spk2Files.Length; ++i) { @@ -100,14 +98,14 @@ static void Main(string[] args) Console.WriteLine("---All speakers---"); - string[] allSpeakers = manager.GetAllSpeakers(); + var allSpeakers = manager.GetAllSpeakers(); foreach (var s in allSpeakers) { Console.WriteLine(s); } Console.WriteLine("------------"); - string[] testFiles = + var testFiles = new string[] { "./sr-data/test/fangjun-test-sr-1.wav", "./sr-data/test/leijun-test-sr-1.wav", @@ -117,9 +115,9 @@ static void Main(string[] args) float threshold = 0.6f; foreach (var file in testFiles) { - float[] embedding = ComputeEmbedding(extractor, file); + var embedding = ComputeEmbedding(extractor, file); - String name = manager.Search(embedding, threshold); + var name = manager.Search(embedding, threshold); if (name == "") { name = ""; diff --git a/dotnet-examples/speaker-identification/speaker-identification.csproj b/dotnet-examples/speaker-identification/speaker-identification.csproj index 7c857fa54..45a42f49e 100644 --- a/dotnet-examples/speaker-identification/speaker-identification.csproj +++ b/dotnet-examples/speaker-identification/speaker-identification.csproj @@ -2,7 +2,7 @@ Exe - net6.0 + net8.0 speaker_identification enable enable diff --git a/dotnet-examples/speech-recognition-from-microphone/Program.cs b/dotnet-examples/speech-recognition-from-microphone/Program.cs index 586e3b162..aa0e7803f 100644 --- a/dotnet-examples/speech-recognition-from-microphone/Program.cs +++ b/dotnet-examples/speech-recognition-from-microphone/Program.cs @@ -6,47 +6,43 @@ // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html // to download streaming models -using CommandLine.Text; using CommandLine; +using CommandLine.Text; using PortAudioSharp; -using System.Threading; using SherpaOnnx; -using System.Collections.Generic; using System.Runtime.InteropServices; -using System; - class SpeechRecognitionFromMicrophone { class Options { [Option(Required = true, HelpText = "Path to tokens.txt")] - public string Tokens { get; set; } + public string? Tokens { get; set; } [Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")] - public string Provider { get; set; } + public string? Provider { get; set; } [Option(Required = false, HelpText = "Path to transducer encoder.onnx")] - public string Encoder { get; set; } + public string? Encoder { get; set; } [Option(Required = false, HelpText = "Path to transducer decoder.onnx")] - public string Decoder { get; set; } + public string? Decoder { get; set; } [Option(Required = false, HelpText = "Path to transducer joiner.onnx")] - public string Joiner { get; set; } + public string? Joiner { get; set; } [Option("paraformer-encoder", Required = false, HelpText = "Path to paraformer encoder.onnx")] - public string ParaformerEncoder { get; set; } + public string? ParaformerEncoder { get; set; } [Option("paraformer-decoder", Required = false, HelpText = "Path to paraformer decoder.onnx")] - public string ParaformerDecoder { get; set; } + public string? ParaformerDecoder { get; set; } [Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")] public int NumThreads { get; set; } [Option("decoding-method", Required = false, Default = "greedy_search", HelpText = "Valid decoding methods are: greedy_search, modified_beam_search")] - public string DecodingMethod { get; set; } + public string? DecodingMethod { get; set; } [Option(Required = false, Default = false, HelpText = "True to show model info during loading")] public bool Debug { get; set; } @@ -126,7 +122,7 @@ to download pre-trained streaming models. private static void Run(Options options) { - OnlineRecognizerConfig config = new OnlineRecognizerConfig(); + var config = new OnlineRecognizerConfig(); config.FeatConfig.SampleRate = options.SampleRate; // All models from icefall using feature dim 80. @@ -153,9 +149,9 @@ private static void Run(Options options) config.Rule2MinTrailingSilence = options.Rule2MinTrailingSilence; config.Rule3MinUtteranceLength = options.Rule3MinUtteranceLength; - OnlineRecognizer recognizer = new OnlineRecognizer(config); + var recognizer = new OnlineRecognizer(config); - OnlineStream s = recognizer.CreateStream(); + var s = recognizer.CreateStream(); Console.WriteLine(PortAudio.VersionInfo.versionText); PortAudio.Initialize(); @@ -176,12 +172,12 @@ private static void Run(Options options) Environment.Exit(1); } - DeviceInfo info = PortAudio.GetDeviceInfo(deviceIndex); + var info = PortAudio.GetDeviceInfo(deviceIndex); Console.WriteLine(); Console.WriteLine($"Use default device {deviceIndex} ({info.name})"); - StreamParameters param = new StreamParameters(); + var param = new StreamParameters(); param.device = deviceIndex; param.channelCount = 1; param.sampleFormat = SampleFormat.Float32; @@ -189,14 +185,14 @@ private static void Run(Options options) param.hostApiSpecificStreamInfo = IntPtr.Zero; PortAudioSharp.Stream.Callback callback = (IntPtr input, IntPtr output, - UInt32 frameCount, + uint frameCount, ref StreamCallbackTimeInfo timeInfo, StreamCallbackFlags statusFlags, IntPtr userData ) => { - float[] samples = new float[frameCount]; - Marshal.Copy(input, samples, 0, (Int32)frameCount); + var samples = new float[frameCount]; + Marshal.Copy(input, samples, 0, (int)frameCount); s.AcceptWaveform(options.SampleRate, samples); @@ -215,7 +211,7 @@ IntPtr userData stream.Start(); - String lastText = ""; + var lastText = string.Empty; int segmentIndex = 0; while (true) @@ -245,9 +241,5 @@ IntPtr userData Thread.Sleep(200); // ms } - - PortAudio.Terminate(); - - } } diff --git a/dotnet-examples/speech-recognition-from-microphone/speech-recognition-from-microphone.csproj b/dotnet-examples/speech-recognition-from-microphone/speech-recognition-from-microphone.csproj index 901c8a158..72b7b6c91 100644 --- a/dotnet-examples/speech-recognition-from-microphone/speech-recognition-from-microphone.csproj +++ b/dotnet-examples/speech-recognition-from-microphone/speech-recognition-from-microphone.csproj @@ -2,7 +2,7 @@ Exe - net6.0 + net8.0 speech_recognition_from_microphone enable enable diff --git a/dotnet-examples/spoken-language-identification/Program.cs b/dotnet-examples/spoken-language-identification/Program.cs index 05a785d7c..d2f210e85 100644 --- a/dotnet-examples/spoken-language-identification/Program.cs +++ b/dotnet-examples/spoken-language-identification/Program.cs @@ -15,12 +15,9 @@ // dotnet run using SherpaOnnx; -using System.Collections.Generic; -using System; class SpokenLanguageIdentificationDemo { - static void Main(string[] args) { var config = new SpokenLanguageIdentificationConfig(); @@ -30,7 +27,7 @@ static void Main(string[] args) var slid = new SpokenLanguageIdentification(config); var filename = "./sherpa-onnx-whisper-tiny/test_wavs/0.wav"; - WaveReader waveReader = new WaveReader(filename); + var waveReader = new WaveReader(filename); var s = slid.CreateStream(); s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples); diff --git a/dotnet-examples/spoken-language-identification/spoken-language-identification.csproj b/dotnet-examples/spoken-language-identification/spoken-language-identification.csproj index b8b431a48..e424b2d57 100644 --- a/dotnet-examples/spoken-language-identification/spoken-language-identification.csproj +++ b/dotnet-examples/spoken-language-identification/spoken-language-identification.csproj @@ -2,7 +2,7 @@ Exe - net6.0 + net8.0 spoken_language_identification enable enable diff --git a/dotnet-examples/streaming-hlg-decoding/Program.cs b/dotnet-examples/streaming-hlg-decoding/Program.cs index 6ac7c8c94..e522b8164 100644 --- a/dotnet-examples/streaming-hlg-decoding/Program.cs +++ b/dotnet-examples/streaming-hlg-decoding/Program.cs @@ -13,12 +13,9 @@ // dotnet run using SherpaOnnx; -using System.Collections.Generic; -using System; class StreamingHlgDecodingDemo { - static void Main(string[] args) { var config = new OnlineRecognizerConfig(); @@ -32,15 +29,15 @@ static void Main(string[] args) config.ModelConfig.Debug = 0; config.CtcFstDecoderConfig.Graph = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst"; - OnlineRecognizer recognizer = new OnlineRecognizer(config); + var recognizer = new OnlineRecognizer(config); var filename = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav"; - WaveReader waveReader = new WaveReader(filename); - OnlineStream s = recognizer.CreateStream(); + var waveReader = new WaveReader(filename); + var s = recognizer.CreateStream(); s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples); - float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)]; + var tailPadding = new float[(int)(waveReader.SampleRate * 0.3)]; s.AcceptWaveform(waveReader.SampleRate, tailPadding); s.InputFinished(); @@ -49,7 +46,7 @@ static void Main(string[] args) recognizer.Decode(s); } - OnlineRecognizerResult r = recognizer.GetResult(s); + var r = recognizer.GetResult(s); var text = r.Text; var tokens = r.Tokens; Console.WriteLine("--------------------"); @@ -57,10 +54,8 @@ static void Main(string[] args) Console.WriteLine("text: {0}", text); Console.WriteLine("tokens: [{0}]", string.Join(", ", tokens)); Console.Write("timestamps: ["); - r.Timestamps.ToList().ForEach(i => Console.Write(String.Format("{0:0.00}", i) + ", ")); + r.Timestamps.ToList().ForEach(i => Console.Write(string.Format("{0:0.00}", i) + ", ")); Console.WriteLine("]"); Console.WriteLine("--------------------"); } } - - diff --git a/dotnet-examples/streaming-hlg-decoding/streaming-hlg-decoding.csproj b/dotnet-examples/streaming-hlg-decoding/streaming-hlg-decoding.csproj index 66e0401f1..6ed8fc699 100644 --- a/dotnet-examples/streaming-hlg-decoding/streaming-hlg-decoding.csproj +++ b/dotnet-examples/streaming-hlg-decoding/streaming-hlg-decoding.csproj @@ -2,7 +2,7 @@ Exe - net6.0 + net8.0 streaming_hlg_decoding enable enable diff --git a/dotnet-examples/vad-non-streaming-asr-paraformer/Program.cs b/dotnet-examples/vad-non-streaming-asr-paraformer/Program.cs index abc080b88..e8dfbe6fa 100644 --- a/dotnet-examples/vad-non-streaming-asr-paraformer/Program.cs +++ b/dotnet-examples/vad-non-streaming-asr-paraformer/Program.cs @@ -3,8 +3,6 @@ // This file shows how to use a silero_vad model with a non-streaming Paraformer // for speech recognition. using SherpaOnnx; -using System.Collections.Generic; -using System; class VadNonStreamingAsrParaformer { @@ -12,45 +10,49 @@ static void Main(string[] args) { // please download model files from // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models - OfflineRecognizerConfig config = new OfflineRecognizerConfig(); + var config = new OfflineRecognizerConfig(); config.ModelConfig.Paraformer.Model = "./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx"; config.ModelConfig.Tokens = "./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt"; config.ModelConfig.Debug = 0; - OfflineRecognizer recognizer = new OfflineRecognizer(config); + var recognizer = new OfflineRecognizer(config); - VadModelConfig vadModelConfig = new VadModelConfig(); + var vadModelConfig = new VadModelConfig(); vadModelConfig.SileroVad.Model = "./silero_vad.onnx"; vadModelConfig.Debug = 0; - VoiceActivityDetector vad = new VoiceActivityDetector(vadModelConfig, 60); + var vad = new VoiceActivityDetector(vadModelConfig, 60); - string testWaveFilename = "./lei-jun-test.wav"; - WaveReader reader = new WaveReader(testWaveFilename); + var testWaveFilename = "./lei-jun-test.wav"; + var reader = new WaveReader(testWaveFilename); int numSamples = reader.Samples.Length; int windowSize = vadModelConfig.SileroVad.WindowSize; int sampleRate = vadModelConfig.SampleRate; int numIter = numSamples / windowSize; - for (int i = 0; i != numIter; ++i) { + for (int i = 0; i != numIter; ++i) + { int start = i * windowSize; - float[] samples = new float[windowSize]; + var samples = new float[windowSize]; Array.Copy(reader.Samples, start, samples, 0, windowSize); vad.AcceptWaveform(samples); - if (vad.IsSpeechDetected()) { - while (!vad.IsEmpty()) { + if (vad.IsSpeechDetected()) + { + while (!vad.IsEmpty()) + { SpeechSegment segment = vad.Front(); - float startTime = segment.Start / (float)sampleRate; - float duration = segment.Samples.Length / (float)sampleRate; + var startTime = segment.Start / (float)sampleRate; + var duration = segment.Samples.Length / (float)sampleRate; OfflineStream stream = recognizer.CreateStream(); stream.AcceptWaveform(sampleRate, segment.Samples); recognizer.Decode(stream); - String text = stream.Result.Text; + var text = stream.Result.Text; - if (!String.IsNullOrEmpty(text)) { - Console.WriteLine("{0}--{1}: {2}", String.Format("{0:0.00}", startTime), - String.Format("{0:0.00}", startTime+duration), text); + if (!string.IsNullOrEmpty(text)) + { + Console.WriteLine("{0}--{1}: {2}", string.Format("{0:0.00}", startTime), + string.Format("{0:0.00}", startTime + duration), text); } vad.Pop(); @@ -60,19 +62,21 @@ static void Main(string[] args) vad.Flush(); - while (!vad.IsEmpty()) { - SpeechSegment segment = vad.Front(); + while (!vad.IsEmpty()) + { + var segment = vad.Front(); float startTime = segment.Start / (float)sampleRate; float duration = segment.Samples.Length / (float)sampleRate; - OfflineStream stream = recognizer.CreateStream(); + var stream = recognizer.CreateStream(); stream.AcceptWaveform(sampleRate, segment.Samples); recognizer.Decode(stream); - String text = stream.Result.Text; + var text = stream.Result.Text; - if (!String.IsNullOrEmpty(text)) { - Console.WriteLine("{0}--{1}: {2}", String.Format("{0:0.00}", startTime), - String.Format("{0:0.00}", startTime+duration), text); + if (!string.IsNullOrEmpty(text)) + { + Console.WriteLine("{0}--{1}: {2}", string.Format("{0:0.00}", startTime), + string.Format("{0:0.00}", startTime + duration), text); } vad.Pop(); diff --git a/dotnet-examples/vad-non-streaming-asr-paraformer/vad-non-streaming-asr-paraformer.csproj b/dotnet-examples/vad-non-streaming-asr-paraformer/vad-non-streaming-asr-paraformer.csproj index a5c5f1022..1736869a8 100644 --- a/dotnet-examples/vad-non-streaming-asr-paraformer/vad-non-streaming-asr-paraformer.csproj +++ b/dotnet-examples/vad-non-streaming-asr-paraformer/vad-non-streaming-asr-paraformer.csproj @@ -2,7 +2,7 @@ Exe - net6.0 + net8.0 vad_non_streaming_asr_paraformer enable enable