diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-speaker-diarization.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-speaker-diarization.cc index 2cda40e76..a6b1f302e 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-speaker-diarization.cc +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-speaker-diarization.cc @@ -282,6 +282,170 @@ static Napi::Array OfflineSpeakerDiarizationProcessWrapper( return ans; } +struct SpeakerDiarizationCallbackData { + int32_t num_processed_chunks; + int32_t num_total_chunks; +}; + +// see +// https://github.com/nodejs/node-addon-examples/blob/main/src/6-threadsafe-function/typed_threadsafe_function/node-addon-api/clock.cc +static void InvokeJsCallback(Napi::Env env, Napi::Function callback, + Napi::Reference *context, + SpeakerDiarizationCallbackData *data) { + if (env != nullptr) { + if (callback != nullptr) { + Napi::Number num_processed_chunks = + Napi::Number::New(env, data->num_processed_chunks); + Napi::Number num_total_chunks = + Napi::Number::New(env, data->num_total_chunks); + + callback.Call(context->Value(), {num_processed_chunks, num_total_chunks}); + } + } + delete data; +} + +using TSFN = Napi::TypedThreadSafeFunction, + SpeakerDiarizationCallbackData, + InvokeJsCallback>; + +class SpeakerDiarizationProcessWorker : public Napi::AsyncWorker { + public: + SpeakerDiarizationProcessWorker(const Napi::Env &env, TSFN tsfn, + const SherpaOnnxOfflineSpeakerDiarization *sd, + std::vector samples) + : tsfn_(tsfn), + Napi::AsyncWorker{env, "SpeakerDiarizationProcessAsyncWorker"}, + deferred_(env), + sd_(sd), + samples_(std::move(samples)) {} + + Napi::Promise Promise() { return deferred_.Promise(); } + + protected: + void Execute() override { + auto callback = [](int32_t num_processed_chunks, int32_t num_total_chunks, + void *arg) -> int32_t { + auto _this = reinterpret_cast(arg); + + auto data = new SpeakerDiarizationCallbackData; + data->num_processed_chunks = num_processed_chunks; + data->num_total_chunks = num_total_chunks; + + _this->tsfn_.NonBlockingCall(data); + + return 0; + }; + + r_ = SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback( + sd_, samples_.data(), samples_.size(), callback, this); + + tsfn_.Release(); + } + + void OnOK() override { + Napi::Env env = deferred_.Env(); + + int32_t num_segments = + SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(r_); + + const SherpaOnnxOfflineSpeakerDiarizationSegment *segments = + SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(r_); + + Napi::Array ans = Napi::Array::New(env, num_segments); + + for (int32_t i = 0; i != num_segments; ++i) { + Napi::Object obj = Napi::Object::New(env); + + obj.Set(Napi::String::New(env, "start"), segments[i].start); + obj.Set(Napi::String::New(env, "end"), segments[i].end); + obj.Set(Napi::String::New(env, "speaker"), segments[i].speaker); + + ans.Set(i, obj); + } + + SherpaOnnxOfflineSpeakerDiarizationDestroySegment(segments); + SherpaOnnxOfflineSpeakerDiarizationDestroyResult(r_); + + deferred_.Resolve(ans); + } + + private: + TSFN tsfn_; + Napi::Promise::Deferred deferred_; + const SherpaOnnxOfflineSpeakerDiarization *sd_; + std::vector samples_; + const SherpaOnnxOfflineSpeakerDiarizationResult *r_; +}; + +static Napi::Object OfflineSpeakerDiarizationProcessAsyncWrapper( + const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + + if (info.Length() != 3) { + std::ostringstream os; + os << "Expect only 3 arguments. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[0].IsExternal()) { + Napi::TypeError::New( + env, "Argument 0 should be an offline speaker diarization pointer.") + .ThrowAsJavaScriptException(); + + return {}; + } + + const SherpaOnnxOfflineSpeakerDiarization *sd = + info[0].As>().Data(); + + if (!info[1].IsTypedArray()) { + Napi::TypeError::New(env, "Argument 1 should be a typed array") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[2].IsFunction()) { + Napi::TypeError::New(env, "Argument 2 should be a function") + .ThrowAsJavaScriptException(); + + return {}; + } + + Napi::Function cb = info[2].As(); + + auto context = + new Napi::Reference(Napi::Persistent(info.This())); + + TSFN tsfn = TSFN::New( + env, + cb, // JavaScript function called asynchronously + "SpeakerDiarizationProcessAsyncFunc", // Name + 0, // Unlimited queue + 1, // Only one thread will use this initially + context, + [](Napi::Env, void *, Napi::Reference *ctx) { delete ctx; }); + + Napi::Float32Array samples = info[1].As(); + +#if __OHOS__ + int32_t num_samples = samples.ElementLength() / sizeof(float); +#else + int32_t num_samples = samples.ElementLength(); +#endif + std::vector v(num_samples); + std::copy(samples.Data(), samples.Data() + num_samples, v.begin()); + + SpeakerDiarizationProcessWorker *worker = + new SpeakerDiarizationProcessWorker(env, tsfn, sd, v); + worker->Queue(); + return worker->Promise(); +} + static void OfflineSpeakerDiarizationSetConfigWrapper( const Napi::CallbackInfo &info) { Napi::Env env = info.Env(); @@ -313,7 +477,7 @@ static void OfflineSpeakerDiarizationSetConfigWrapper( return; } - Napi::Object o = info[0].As(); + Napi::Object o = info[1].As(); SherpaOnnxOfflineSpeakerDiarizationConfig c; memset(&c, 0, sizeof(c)); @@ -334,6 +498,10 @@ void InitNonStreamingSpeakerDiarization(Napi::Env env, Napi::Object exports) { Napi::String::New(env, "offlineSpeakerDiarizationProcess"), Napi::Function::New(env, OfflineSpeakerDiarizationProcessWrapper)); + exports.Set( + Napi::String::New(env, "offlineSpeakerDiarizationProcessAsync"), + Napi::Function::New(env, OfflineSpeakerDiarizationProcessAsyncWrapper)); + exports.Set( Napi::String::New(env, "offlineSpeakerDiarizationSetConfig"), Napi::Function::New(env, OfflineSpeakerDiarizationSetConfigWrapper)); diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-tts.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-tts.cc index ed1f3afb3..05e27846d 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-tts.cc +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-tts.cc @@ -344,9 +344,9 @@ struct TtsCallbackData { // see // https://github.com/nodejs/node-addon-examples/blob/main/src/6-threadsafe-function/typed_threadsafe_function/node-addon-api/clock.cc -void InvokeJsCallback(Napi::Env env, Napi::Function callback, - Napi::Reference *context, - TtsCallbackData *data) { +static void InvokeJsCallback(Napi::Env env, Napi::Function callback, + Napi::Reference *context, + TtsCallbackData *data) { if (env != nullptr) { if (callback != nullptr) { Napi::ArrayBuffer arrayBuffer = @@ -580,7 +580,6 @@ static Napi::Object OfflineTtsGenerateAsyncWrapper( context, [](Napi::Env, void *, Napi::Reference *ctx) { delete ctx; }); - const SherpaOnnxGeneratedAudio *audio; TtsGenerateWorker *worker = new TtsGenerateWorker( env, tsfn, tts, text, speed, sid, enable_external_buffer); worker->Queue(); diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/types/libsherpa_onnx/Index.d.ts b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/types/libsherpa_onnx/Index.d.ts index f71e2f6ee..7db410a4c 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/types/libsherpa_onnx/Index.d.ts +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/types/libsherpa_onnx/Index.d.ts @@ -65,5 +65,6 @@ export const speakerEmbeddingManagerGetAllSpeakers: (handle: object) => Array object; export const getOfflineSpeakerDiarizationSampleRate: (handle: object) => number; -export const offlineSpeakerDiarizationProcess: (handle: object, samples: Float32Array) => object; +export const offlineSpeakerDiarizationProcess: (handle: object, input: object) => object; +export const offlineSpeakerDiarizationProcessAsync: (handle: object, input: object, callback: object) => object; export const offlineSpeakerDiarizationSetConfig: (handle: object, config: object) => void; diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingSpeakerDiarization.ets b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingSpeakerDiarization.ets index 327b63713..176da87a5 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingSpeakerDiarization.ets +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingSpeakerDiarization.ets @@ -2,6 +2,7 @@ import { createOfflineSpeakerDiarization, getOfflineSpeakerDiarizationSampleRate, offlineSpeakerDiarizationProcess, + offlineSpeakerDiarizationProcessAsync, offlineSpeakerDiarizationSetConfig, } from 'libsherpa_onnx.so'; @@ -32,9 +33,12 @@ export class OfflineSpeakerDiarizationConfig { } export class OfflineSpeakerDiarizationSegment { - public start: number = 0; // in seconds - public end: number = 0; // in seconds - public speaker: number = 0; // ID of the speaker; count from 0 + // in seconds + public start: number = 0; + // in seconds + public end: number = 0; + // ID of the speaker; count from 0 + public speaker: number = 0; } export class OfflineSpeakerDiarization { @@ -67,6 +71,12 @@ export class OfflineSpeakerDiarization { return offlineSpeakerDiarizationProcess(this.handle, samples) as OfflineSpeakerDiarizationSegment[]; } + processAsync(samples: Float32Array, callback: (numProcessedChunks: number, + numTotalChunks: number) => void): Promise { + return offlineSpeakerDiarizationProcessAsync(this.handle, samples, + callback) as Promise; + } + setConfig(config: OfflineSpeakerDiarizationConfig) { offlineSpeakerDiarizationSetConfig(this.handle, config); this.config.clustering = config.clustering; diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/entry/oh-package.json5 b/harmony-os/SherpaOnnxSpeakerDiarization/entry/oh-package.json5 index 248c3b754..97448bbf3 100644 --- a/harmony-os/SherpaOnnxSpeakerDiarization/entry/oh-package.json5 +++ b/harmony-os/SherpaOnnxSpeakerDiarization/entry/oh-package.json5 @@ -5,6 +5,8 @@ "main": "", "author": "", "license": "", - "dependencies": {} + "dependencies": { + "sherpa_onnx": "1.10.33" + } } diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/ets/pages/Index.ets b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/ets/pages/Index.ets index 52c065152..f6fc537b4 100644 --- a/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/ets/pages/Index.ets +++ b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/ets/pages/Index.ets @@ -1,6 +1,6 @@ -import { LengthUnit } from '@kit.ArkUI'; +import { LengthUnit, promptAction } from '@kit.ArkUI'; import worker, { MessageEvents } from '@ohos.worker'; -import { systemTime, BusinessError } from '@kit.BasicServicesKit'; +import { BusinessError, pasteboard } from '@kit.BasicServicesKit'; import { picker } from '@kit.CoreFileKit'; @@ -10,19 +10,15 @@ struct Index { @State title: string = 'Next-gen Kaldi: Speaker Diarization'; @State titleFontSize: number = 15; @State currentIndex: number = 0; - private controller: TabsController = new TabsController(); - - private workerInstance?: worker.ThreadWorker - private readonly scriptURL: string = 'entry/ets/workers/SpeakerDiarizationWorker.ets' - @State resultForFile: string = ''; @State resultForMic: string = ''; - - @State micBtnCaption: string = 'Start recording'; - @State micSaveBtnCaption: string = 'Save audio'; - - @State micBtnEnabled: boolean = false; + @State progressForFile: number = 0; @State selectFileBtnEnabled: boolean = false; + @State copyBtnForFileEnabled: boolean = false; + private controller: TabsController = new TabsController(); + private workerInstance?: worker.ThreadWorker + private readonly scriptURL: string = 'entry/ets/workers/SpeakerDiarizationWorker.ets' + private numSpeakers: string = '-1'; @Builder TabBuilder(title: string, targetIndex: number, selectedImg: Resource, normalImg: Resource) { @@ -42,7 +38,10 @@ struct Index { this.workerInstance.onmessage = (e: MessageEvents) => { const msgType = e.data['msgType'] as string; - console.log(`received msg from worker: ${msgType}`); + + if (msgType != 'speaker-diarization-file-progress') { + console.log(`received msg from worker: ${msgType}`); + } if (msgType == 'init-speaker-diarization-done') { console.log('Speaker diarization initialized successfully'); @@ -51,7 +50,10 @@ struct Index { this.resultForMic = 'Initialization finished.\nPlease click the button Start recording.'; this.selectFileBtnEnabled = true; - this.micBtnEnabled = true; + } + + if (msgType == 'speaker-diarization-file-progress') { + this.progressForFile = e.data['progress'] as number; } if (msgType == 'speaker-diarization-file-done') { @@ -59,6 +61,7 @@ struct Index { this.resultForFile = result; this.selectFileBtnEnabled = true; + this.copyBtnForFileEnabled = true; } }; @@ -73,12 +76,35 @@ struct Index { Column() { Tabs({ barPosition: BarPosition.End, controller: this.controller }) { TabContent() { - Column({space: 10}) { + Column({ space: 10 }) { Text(this.title).fontSize(this.titleFontSize).fontWeight(FontWeight.Bold); - Button('Select .wav file (16kHz) ') - .enabled(this.selectFileBtnEnabled) - .onClick(()=>{ + Row({ space: 10 }) { + Text(`Number of speakers`).width('60%') + + TextInput({ text: this.numSpeakers }).onChange((text) => { + this.numSpeakers = text.trim(); + }).width('20%') + }.justifyContent(FlexAlign.Center) + + Row({ space: 10 }) { + Button('Select .wav file (16kHz) ').enabled(this.selectFileBtnEnabled).onClick(() => { this.resultForFile = ''; + this.progressForFile = 0; + this.copyBtnForFileEnabled = false; + + let numSpeakers = parseInt(this.numSpeakers); + if (numSpeakers.toString() != this.numSpeakers) { + this.resultForFile = + 'Please input a valid value for the number of speakers in the .wav file you are going to select'; + return; + } + + if (numSpeakers < 1) { + this.resultForFile = + 'Please input a positive value for the number of speakers in the .wav file you are going to select'; + return; + } + this.selectFileBtnEnabled = false; const documentSelectOptions = new picker.DocumentSelectOptions(); @@ -97,7 +123,7 @@ struct Index { if (this.workerInstance) { this.workerInstance.postMessage({ - msgType: 'speaker-diarization-file', filename: result[0], + msgType: 'speaker-diarization-file', filename: result[0], numSpeakers, }); this.resultForFile = `Decoding ${result[0]} ... ...`; } else { @@ -108,31 +134,42 @@ struct Index { this.selectFileBtnEnabled = true; }) }) + Button('Copy results') + .enabled(this.copyBtnForFileEnabled) + .onClick(() => { // See https://developer.huawei.com/consumer/cn/doc/harmonyos-faqs/faqs-arkui-308-V5 + const pasteboardData = pasteboard.createData(pasteboard.MIMETYPE_TEXT_PLAIN, this.resultForFile); + const systemPasteboard = pasteboard.getSystemPasteboard(); + systemPasteboard.setData(pasteboardData); + systemPasteboard.getData().then((data) => { + if (data) { + promptAction.showToast({ message: 'Result copied.' }); + } else { + promptAction.showToast({ message: 'Failed to copy' }); + } + }) + }) + } - TextArea({ text: this.resultForFile }) - .lineSpacing({ value: 10, unit: LengthUnit.VP }) - .width('100%') - .height('100%') - } - }.tabBar(this.TabBuilder('From file', 0, $r('app.media.icon_doc'), $r('app.media.icon_doc'))) - - TabContent() { - Column({space: 10}) { - Text(this.title).fontSize(this.titleFontSize).fontWeight(FontWeight.Bold); - Row() { - Button(this.micBtnCaption) + if (this.progressForFile > 0) { + Row() { + Progress({ value: 0, total: 100, type: ProgressType.Capsule }) + .width('80%') + .height(20) + .value(this.progressForFile); - Button(this.micSaveBtnCaption) + Text(`${this.progressForFile.toFixed(2)}%`).width('15%') + }.width('100%').justifyContent(FlexAlign.Center) } - TextArea({ text: this.resultForMic }) + + TextArea({ text: this.resultForFile }) .lineSpacing({ value: 10, unit: LengthUnit.VP }) .width('100%') .height('100%') } - }.tabBar(this.TabBuilder('From mic', 1, $r('app.media.icon_mic'), $r('app.media.icon_mic'))) + }.tabBar(this.TabBuilder('From file', 0, $r('app.media.icon_doc'), $r('app.media.icon_doc'))) TabContent() { - Column({space: 10}) { + Column({ space: 10 }) { Text(this.title).fontSize(this.titleFontSize).fontWeight(FontWeight.Bold); TextArea({ text: ` @@ -150,7 +187,7 @@ https://k2-fsa.github.io/sherpa/social-groups.html ` }).width('100%').height('100%').focusable(false) }.justifyContent(FlexAlign.Start) - }.tabBar(this.TabBuilder('Help', 2, $r('app.media.info'), $r('app.media.info'))) + }.tabBar(this.TabBuilder('Help', 1, $r('app.media.info'), $r('app.media.info'))) }.scrollable(false) } } diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/ets/workers/SpeakerDiarizationWorker.ets b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/ets/workers/SpeakerDiarizationWorker.ets index 569b564a9..4a297ec87 100644 --- a/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/ets/workers/SpeakerDiarizationWorker.ets +++ b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/ets/workers/SpeakerDiarizationWorker.ets @@ -1,12 +1,17 @@ -import worker, { ThreadWorkerGlobalScope, MessageEvents, ErrorEvent } from '@ohos.worker'; -import { OfflineSpeakerDiarization, OfflineSpeakerDiarizationConfig, +import worker, { ErrorEvent, MessageEvents, ThreadWorkerGlobalScope } from '@ohos.worker'; +import { + OfflineSpeakerDiarization, + OfflineSpeakerDiarizationConfig, OfflineSpeakerDiarizationSegment, - readWaveFromBinary, Samples } from 'sherpa_onnx'; + readWaveFromBinary, + Samples +} from 'sherpa_onnx'; import { fileIo } from '@kit.CoreFileKit'; const workerPort: ThreadWorkerGlobalScope = worker.workerPort; let sd: OfflineSpeakerDiarization; +let useAsync: boolean = true; function readWave(filename: string): Samples { const fp = fileIo.openSync(filename); @@ -20,10 +25,20 @@ function readWave(filename: string): Samples { function initOfflineSpeakerDiarization(context: Context): OfflineSpeakerDiarization { const config: OfflineSpeakerDiarizationConfig = new OfflineSpeakerDiarizationConfig(); + // Please refer to https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models + // to download models. + // Make sure you have placed it inside the directory + // harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/rawfile + // + // Also, please delete unused files to reduce the size of the app config.segmentation.pyannote.model = 'sherpa-onnx-pyannote-segmentation-3-0/model.int8.onnx'; config.segmentation.numThreads = 2; config.segmentation.debug = true; + // Please refer to https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models + // to download models. + // Make sure you have placed it inside the directory + // harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/rawfile config.embedding.model = '3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx'; config.embedding.numThreads = 2; config.embedding.debug = true; @@ -31,6 +46,27 @@ function initOfflineSpeakerDiarization(context: Context): OfflineSpeakerDiarizat config.minDurationOn = 0.2; config.minDurationOff = 0.5; return new OfflineSpeakerDiarization(config, context.resourceManager); + + // For the above two models files, you should have the following directory structure + /* + (py38) fangjuns-MacBook-Pro:rawfile fangjun$ pwd + /Users/fangjun/open-source/sherpa-onnx/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/rawfile + (py38) fangjuns-MacBook-Pro:rawfile fangjun$ ls -lh + total 77336 + -rw-r--r-- 1 fangjun staff 38M Dec 10 16:28 3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx + drwxr-xr-x 3 fangjun staff 96B Dec 10 19:36 sherpa-onnx-pyannote-segmentation-3-0 + (py38) fangjuns-MacBook-Pro:rawfile fangjun$ tree . + . + ├── 3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx + └── sherpa-onnx-pyannote-segmentation-3-0 + └── model.int8.onnx + + 1 directory, 2 files + + (Note that we have kept only model.int8.onnx and removed all other files + from sherpa-onnx-pyannote-segmentation-3-0 + ) + */ } /** @@ -46,24 +82,76 @@ workerPort.onmessage = (e: MessageEvents) => { if (msgType == 'init-speaker-diarization' && !sd) { const context: Context = e.data['context'] as Context; sd = initOfflineSpeakerDiarization(context); - workerPort.postMessage({msgType: 'init-speaker-diarization-done'}); + workerPort.postMessage({ msgType: 'init-speaker-diarization-done' }); console.log('Init sd done'); } if (msgType == 'speaker-diarization-file') { const filename = e.data['filename'] as string; + const numSpeakers = e.data['numSpeakers'] as number; const wave = readWave(filename); let result = ''; if (wave == undefined || wave == null) { result = `Failed to read ${filename}`; - } else if (wave.sampleRate != sd.sampleRate) { + + workerPort.postMessage({ + msgType: 'speaker-diarization-file-done', result + }); + return; + } + + if (wave.sampleRate != sd.sampleRate) { result = `Expected sample rate: ${sd.sampleRate}`; result += '\n'; result += `Sample rate in file ${filename} is ${wave.sampleRate}`; - } else { - const duration = wave.samples.length / wave.sampleRate; - console.log(`Processing ${filename} of ${duration} seconds`); + workerPort.postMessage({ + msgType: 'speaker-diarization-file-done', result + }); + + return; + } + + const duration = wave.samples.length / wave.sampleRate; + console.log(`Processing ${filename} of ${duration} seconds`); + + // You can remove this if statement if you want + if (duration < 0.3) { + result = `${filename} has only ${duration} seconds. Please use a longer file`; + + workerPort.postMessage({ + msgType: 'speaker-diarization-file-done', result + }); + return; + } + sd.config.clustering.numClusters = numSpeakers; + sd.setConfig(sd.config); + + if (useAsync) { + sd.processAsync(wave.samples, (numProcessedChunks: number, numTotalChunks: number) => { + const progress = numProcessedChunks / numTotalChunks * 100; + workerPort.postMessage({ + msgType: 'speaker-diarization-file-progress', progress + }); + }).then((r: OfflineSpeakerDiarizationSegment[]) => { + console.log(`r is ${r.length}, ${r}`); + + for (const s of r) { + const start: string = s.start.toFixed(3); + const end: string = s.end.toFixed(3); + result += `${start}\t--\t${end}\tspeaker_${s.speaker}\n`; + console.log(`result: ${result}`); + } + + if (r.length == 0) { + result = 'The result is empty'; + } + + workerPort.postMessage({ + msgType: 'speaker-diarization-file-done', result + }); + }); + } else { const r: OfflineSpeakerDiarizationSegment[] = sd.process(wave.samples) console.log(`r is ${r.length}, ${r}`); for (const s of r) { @@ -76,15 +164,13 @@ workerPort.onmessage = (e: MessageEvents) => { if (r.length == 0) { result = 'The result is empty'; } - } - workerPort.postMessage({ - msgType: 'speaker-diarization-file-done', - result - }); + workerPort.postMessage({ + msgType: 'speaker-diarization-file-done', result + }); + } } -} -/** +} /** * Defines the event handler to be called when the worker receives a message that cannot be deserialized. * The event handler is executed in the worker thread. * diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/element/string.json b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/element/string.json index 08e810c12..55c8939f3 100644 --- a/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/element/string.json +++ b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/element/string.json @@ -11,10 +11,6 @@ { "name": "EntryAbility_label", "value": "Speaker diarization" - }, - { - "name": "mic_reason", - "value": "access the microphone for on-device speaker diarizatiaon with Next-gen Kaldi" } ] } \ No newline at end of file diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/en_US/element/string.json b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/en_US/element/string.json index 08e810c12..55c8939f3 100644 --- a/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/en_US/element/string.json +++ b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/en_US/element/string.json @@ -11,10 +11,6 @@ { "name": "EntryAbility_label", "value": "Speaker diarization" - }, - { - "name": "mic_reason", - "value": "access the microphone for on-device speaker diarizatiaon with Next-gen Kaldi" } ] } \ No newline at end of file diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/zh_CN/element/string.json b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/zh_CN/element/string.json index d6ca428c5..d9180dfd1 100644 --- a/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/zh_CN/element/string.json +++ b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/zh_CN/element/string.json @@ -11,10 +11,6 @@ { "name": "EntryAbility_label", "value": "说话人日志" - }, - { - "name": "mic_reason", - "value": "使用新一代Kaldi, 访问麦克风进行本地说话人日志 (不需要联网)" } ] } \ No newline at end of file diff --git a/sherpa-onnx/c-api/c-api.h b/sherpa-onnx/c-api/c-api.h index a781520ff..4d4a2c4fc 100644 --- a/sherpa-onnx/c-api/c-api.h +++ b/sherpa-onnx/c-api/c-api.h @@ -1512,10 +1512,10 @@ SHERPA_ONNX_API void SherpaOnnxOfflineSpeakerDiarizationDestroySegment( const SherpaOnnxOfflineSpeakerDiarizationSegment *s); typedef int32_t (*SherpaOnnxOfflineSpeakerDiarizationProgressCallback)( - int32_t num_processed_chunk, int32_t num_total_chunks, void *arg); + int32_t num_processed_chunks, int32_t num_total_chunks, void *arg); typedef int32_t (*SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg)( - int32_t num_processed_chunk, int32_t num_total_chunks); + int32_t num_processed_chunks, int32_t num_total_chunks); // The user has to invoke SherpaOnnxOfflineSpeakerDiarizationDestroyResult() // to free the returned pointer to avoid memory leak.