Skip to content

Commit 3622104

Browse files
authored
Add C# API for Moonshine models. (#1483)
* Also, return timestamps for non-streaming ASR.
1 parent cdd8e1b commit 3622104

File tree

6 files changed

+143
-8
lines changed

6 files changed

+143
-8
lines changed

.github/scripts/test-dot-net.sh

+3
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ rm -fv *.wav
99
rm -rfv sherpa-onnx-pyannote-*
1010

1111
cd ../offline-decode-files
12+
./run-moonshine.sh
13+
rm -rf sherpa-onnx-*
14+
1215
./run-sense-voice-ctc.sh
1316
rm -rf sherpa-onnx-*
1417

dotnet-examples/offline-decode-files/Program.cs

+37-7
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ class Options
1717
{
1818

1919
[Option("sample-rate", Required = false, Default = 16000, HelpText = "Sample rate of the data used to train the model")]
20-
public int SampleRate { get; set; } = 16000;
20+
public int SampleRate { get; set; } = 16000;
2121

2222
[Option("feat-dim", Required = false, Default = 80, HelpText = "Dimension of the features used to train the model")]
2323
public int FeatureDim { get; set; } = 80;
@@ -31,7 +31,7 @@ class Options
3131
[Option(Required = false, Default = "", HelpText = "Path to transducer decoder.onnx. Used only for transducer models")]
3232
public string Decoder { get; set; } = "";
3333

34-
[Option(Required = false, Default = "",HelpText = "Path to transducer joiner.onnx. Used only for transducer models")]
34+
[Option(Required = false, Default = "", HelpText = "Path to transducer joiner.onnx. Used only for transducer models")]
3535
public string Joiner { get; set; } = "";
3636

3737
[Option("model-type", Required = false, Default = "", HelpText = "model type")]
@@ -44,10 +44,22 @@ class Options
4444
public string WhisperDecoder { get; set; } = "";
4545

4646
[Option("whisper-language", Required = false, Default = "", HelpText = "Language of the input file. Can be empty")]
47-
public string WhisperLanguage{ get; set; } = "";
47+
public string WhisperLanguage { get; set; } = "";
4848

4949
[Option("whisper-task", Required = false, Default = "transcribe", HelpText = "transcribe or translate")]
50-
public string WhisperTask{ get; set; } = "transcribe";
50+
public string WhisperTask { get; set; } = "transcribe";
51+
52+
[Option("moonshine-preprocessor", Required = false, Default = "", HelpText = "Path to preprocess.onnx. Used only for Moonshine models")]
53+
public string MoonshinePreprocessor { get; set; } = "";
54+
55+
[Option("moonshine-encoder", Required = false, Default = "", HelpText = "Path to encode.onnx. Used only for Moonshine models")]
56+
public string MoonshineEncoder { get; set; } = "";
57+
58+
[Option("moonshine-uncached-decoder", Required = false, Default = "", HelpText = "Path to uncached_decode.onnx. Used only for Moonshine models")]
59+
public string MoonshineUncachedDecoder { get; set; } = "";
60+
61+
[Option("moonshine-cached-decoder", Required = false, Default = "", HelpText = "Path to cached_decode.onnx. Used only for Moonshine models")]
62+
public string MoonshineCachedDecoder { get; set; } = "";
5163

5264
[Option("tdnn-model", Required = false, Default = "", HelpText = "Path to tdnn yesno model")]
5365
public string TdnnModel { get; set; } = "";
@@ -90,7 +102,7 @@ class Options
90102
public float HotwordsScore { get; set; } = 1.5F;
91103

92104
[Option("files", Required = true, HelpText = "Audio files for decoding")]
93-
public IEnumerable<string> Files { get; set; } = new string[] {};
105+
public IEnumerable<string> Files { get; set; } = new string[] { };
94106
}
95107

96108
static void Main(string[] args)
@@ -236,6 +248,13 @@ private static void Run(Options options)
236248
config.ModelConfig.SenseVoice.Model = options.SenseVoiceModel;
237249
config.ModelConfig.SenseVoice.UseInverseTextNormalization = options.SenseVoiceUseItn;
238250
}
251+
else if (!String.IsNullOrEmpty(options.MoonshinePreprocessor))
252+
{
253+
config.ModelConfig.Moonshine.Preprocessor = options.MoonshinePreprocessor;
254+
config.ModelConfig.Moonshine.Encoder = options.MoonshineEncoder;
255+
config.ModelConfig.Moonshine.UncachedDecoder = options.MoonshineUncachedDecoder;
256+
config.ModelConfig.Moonshine.CachedDecoder = options.MoonshineCachedDecoder;
257+
}
239258
else
240259
{
241260
Console.WriteLine("Please provide a model");
@@ -273,10 +292,21 @@ private static void Run(Options options)
273292
// display results
274293
for (int i = 0; i != files.Length; ++i)
275294
{
276-
var text = streams[i].Result.Text;
295+
var r = streams[i].Result;
277296
Console.WriteLine("--------------------");
278297
Console.WriteLine(files[i]);
279-
Console.WriteLine(text);
298+
Console.WriteLine("Text: {0}", r.Text);
299+
Console.WriteLine("Tokens: [{0}]", string.Join(", ", r.Tokens));
300+
if (r.Timestamps != null && r.Timestamps.Length > 0) {
301+
Console.Write("Timestamps: [");
302+
var sep = "";
303+
for (int k = 0; k != r.Timestamps.Length; ++k)
304+
{
305+
Console.Write("{0}{1}", sep, r.Timestamps[k].ToString("0.00"));
306+
sep = ", ";
307+
}
308+
Console.WriteLine("]");
309+
}
280310
}
281311
Console.WriteLine("--------------------");
282312
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#!/usr/bin/env bash
2+
3+
set -ex
4+
5+
if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then
6+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
7+
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
8+
rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
9+
fi
10+
11+
dotnet run \
12+
--num-threads=2 \
13+
--moonshine-preprocessor=./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx \
14+
--moonshine-encoder=./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx \
15+
--moonshine-uncached-decoder=./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx \
16+
--moonshine-cached-decoder=./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx \
17+
--tokens=./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt \
18+
--files ./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav

scripts/dotnet/OfflineModelConfig.cs

+2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ public OfflineModelConfig()
2424
BpeVocab = "";
2525
TeleSpeechCtc = "";
2626
SenseVoice = new OfflineSenseVoiceModelConfig();
27+
Moonshine = new OfflineMoonshineModelConfig();
2728
}
2829
public OfflineTransducerModelConfig Transducer;
2930
public OfflineParaformerModelConfig Paraformer;
@@ -54,5 +55,6 @@ public OfflineModelConfig()
5455
public string TeleSpeechCtc;
5556

5657
public OfflineSenseVoiceModelConfig SenseVoice;
58+
public OfflineMoonshineModelConfig Moonshine;
5759
}
5860
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang)
2+
3+
using System.Runtime.InteropServices;
4+
5+
namespace SherpaOnnx
6+
{
7+
[StructLayout(LayoutKind.Sequential)]
8+
public struct OfflineMoonshineModelConfig
9+
{
10+
public OfflineMoonshineModelConfig()
11+
{
12+
Preprocessor = "";
13+
Encoder = "";
14+
UncachedDecoder = "";
15+
CachedDecoder = "";
16+
}
17+
[MarshalAs(UnmanagedType.LPStr)]
18+
public string Preprocessor;
19+
20+
[MarshalAs(UnmanagedType.LPStr)]
21+
public string Encoder;
22+
23+
[MarshalAs(UnmanagedType.LPStr)]
24+
public string UncachedDecoder;
25+
26+
[MarshalAs(UnmanagedType.LPStr)]
27+
public string CachedDecoder;
28+
}
29+
}

scripts/dotnet/OfflineRecognizerResult.cs

+54-1
Original file line numberDiff line numberDiff line change
@@ -31,17 +31,70 @@ public OfflineRecognizerResult(IntPtr handle)
3131
byte[] stringBuffer = new byte[length];
3232
Marshal.Copy(impl.Text, stringBuffer, 0, length);
3333
_text = Encoding.UTF8.GetString(stringBuffer);
34+
35+
_tokens = new String[impl.Count];
36+
37+
unsafe
38+
{
39+
byte* buf = (byte*)impl.Tokens;
40+
for (int i = 0; i < impl.Count; i++)
41+
{
42+
length = 0;
43+
byte* start = buf;
44+
while (*buf != 0)
45+
{
46+
++buf;
47+
length += 1;
48+
}
49+
++buf;
50+
51+
stringBuffer = new byte[length];
52+
fixed (byte* pTarget = stringBuffer)
53+
{
54+
for (int k = 0; k < length; k++)
55+
{
56+
pTarget[k] = start[k];
57+
}
58+
}
59+
60+
_tokens[i] = Encoding.UTF8.GetString(stringBuffer);
61+
}
62+
}
63+
64+
unsafe
65+
{
66+
if (impl.Timestamps != IntPtr.Zero)
67+
{
68+
float *t = (float*)impl.Timestamps;
69+
_timestamps = new float[impl.Count];
70+
fixed (float* f = _timestamps)
71+
{
72+
for (int k = 0; k < impl.Count; k++)
73+
{
74+
f[k] = t[k];
75+
}
76+
}
77+
}
78+
}
79+
3480
}
3581

3682
[StructLayout(LayoutKind.Sequential)]
3783
struct Impl
3884
{
3985
public IntPtr Text;
86+
public IntPtr Timestamps;
87+
public int Count;
88+
public IntPtr Tokens;
4089
}
4190

4291
private String _text;
4392
public String Text => _text;
44-
}
4593

94+
private String[] _tokens;
95+
public String[] Tokens => _tokens;
4696

97+
private float[] _timestamps;
98+
public float[] Timestamps => _timestamps;
99+
}
47100
}

0 commit comments

Comments
 (0)