From 9d4659fd29d3e8f5bb41471abcd477c22e8cc0f3 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Wed, 11 Dec 2024 12:01:13 +0800 Subject: [PATCH] Add missing changes about speaker identfication demo for HarmonyOS (#1612) --- .../entry/oh-package-lock.json5 | 28 +++ .../entry/src/main/ets/pages/Index.ets | 177 +++++++++++++++--- .../workers/SpeakerIdentificationWorker.ets | 90 ++++++--- 3 files changed, 243 insertions(+), 52 deletions(-) create mode 100644 harmony-os/SherpaOnnxSpeakerIdentification/entry/oh-package-lock.json5 diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/entry/oh-package-lock.json5 b/harmony-os/SherpaOnnxSpeakerIdentification/entry/oh-package-lock.json5 new file mode 100644 index 000000000..5a9a42508 --- /dev/null +++ b/harmony-os/SherpaOnnxSpeakerIdentification/entry/oh-package-lock.json5 @@ -0,0 +1,28 @@ +{ + "meta": { + "stableOrder": true + }, + "lockfileVersion": 3, + "ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.", + "specifiers": { + "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1y+qvabrznvcerrtte4uydjhwfdt7hfnlsk0jsnicmy=/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1y+qvabrznvcerrtte4uydjhwfdt7hfnlsk0jsnicmy=/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx", + "sherpa_onnx@sherpa_onnx_2.har": "sherpa_onnx@sherpa_onnx_2.har" + }, + "packages": { + "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1y+qvabrznvcerrtte4uydjhwfdt7hfnlsk0jsnicmy=/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": { + "name": "libsherpa_onnx.so", + "version": "1.0.0", + "resolved": "../oh_modules/.ohpm/sherpa_onnx@1y+qvabrznvcerrtte4uydjhwfdt7hfnlsk0jsnicmy=/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx", + "registryType": "local" + }, + "sherpa_onnx@sherpa_onnx_2.har": { + "name": "sherpa_onnx", + "version": "1.10.33", + "resolved": "sherpa_onnx_2.har", + "registryType": "local", + "dependencies": { + "libsherpa_onnx.so": "file:./src/main/cpp/types/libsherpa_onnx" + } + } + } +} \ No newline at end of file diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/ets/pages/Index.ets b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/ets/pages/Index.ets index ec12f51a6..c17b8b0f6 100644 --- a/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/ets/pages/Index.ets +++ b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/ets/pages/Index.ets @@ -72,7 +72,7 @@ struct Index { @State currentIndex: number = 0; - @State message: string = 'Hello World'; + private threshold: string = '0.5'; private workerInstance?: worker.ThreadWorker private readonly scriptURL: string = 'entry/ets/workers/SpeakerIdentificationWorker.ets' @@ -83,15 +83,21 @@ struct Index { @State btnSaveAudioEnabled: boolean = false; @State btnAddEnabled: boolean = false; - private sampleRate: number = 16000; - private sampleList: Float32Array[] = [] + private sampleRate: number = 48000; + private sampleListForAdding: Float32Array[] = [] + private sampleListForTesting: Float32Array[] = [] private mic?: audio.AudioCapturer; @State infoHome: string = ''; @State infoAdd: string = ''; - @State micBtnCaption: string = 'Start recording'; - @State micStarted: boolean = false; + @State micBtnCaptionForAdding: string = 'Start recording'; + @State micStartedForAdding: boolean = false; + @State micBtnEnabledForAdding: boolean = true; + + @State micBtnCaptionForTesting: string = 'Start recording'; + @State micStartedForTesting: boolean = false; + @State micBtnEnabledForTesting: boolean = true; async initMic() { const permissions: Permissions[] = ["ohos.permission.MICROPHONE"]; @@ -158,6 +164,23 @@ struct Index { if (msgType == 'manager-all-speaker-names') { this.allSpeakerNames = e.data['allSpeakers'] as string[]; } + + if (msgType == 'manager-add-speaker-done') { + const ok: boolean = e.data['ok'] as boolean; + const status: string = e.data['status'] as string; + this.infoAdd += '\n' + status; + + if (ok) { + this.sampleListForAdding = []; + this.btnSaveAudioEnabled = false; + this.btnAddEnabled = false; + } + } + + if (msgType == 'manager-search-speaker-done') { + const name = e.data['name'] as string; + this.infoHome = name; + } }; this.workerInstance.postMessage({ msgType: 'init-extractor', context: getContext()}); @@ -181,7 +204,97 @@ struct Index { Tabs({ barPosition: BarPosition.End, controller: this.controller }) { TabContent() { Column({ space: 10 }) { - Button('Home') + Text(this.title).fontSize(this.titleFontSize).fontWeight(FontWeight.Bold); + Row() { + Text('Similary threshold').width('60%'); + + TextInput({ text: this.threshold }).onChange((text) => { + this.threshold = text.trim(); + }).width('20%') + } + Row() { + Button(this.micBtnCaptionForTesting) + .enabled(this.micBtnEnabledForTesting) + .onClick(()=>{ + if (this.allSpeakerNames.length == 0) { + this.infoHome = 'There are no speakers registered. Please add them first'; + return; + } + + let threshold = parseFloat(this.threshold); + if (isNaN(threshold)) { + this.infoHome = 'Please enter a valid threshold'; + return; + } + + if (threshold <= 0) { + this.infoHome = 'Please enter a positive threshold'; + return; + } + console.log(`threshold: ${threshold}`); + + if (this.micStartedForTesting) { + this.micStartedForTesting = false; + this.micBtnCaptionForTesting = 'Start'; + this.micBtnEnabledForAdding = true; + this.mic?.stop(); + + const samples = flatten(this.sampleListForTesting); + const duration = samples.length / this.sampleRate; + if (duration < 0.5) { + this.infoHome = `Please speak for a longer time! Current duration: ${duration}`; + return; + } + if (this.workerInstance) { + this.workerInstance.postMessage({ + msgType: 'manager-search-speaker', + samples: samples, + sampleRate: this.sampleRate, + threshold, + }); + } + } else { + this.sampleListForTesting = []; + this.micStartedForTesting = true; + this.micBtnCaptionForTesting = 'Stop'; + this.micBtnEnabledForAdding = false; + this.mic?.start(); + this.infoHome = `Use threshold: ${threshold}`; + this.infoHome += '\nPlease speak and then click Stop'; + } + }) + + Button('Save audio') + .enabled(!this.micStartedForTesting) + .onClick(()=>{ + if (this.sampleListForTesting.length == 0) { + this.infoHome = 'No audio samples recorded'; + return; + } + const samples = flatten(this.sampleListForTesting); + + if (samples.length == 0) { + this.infoHome = 'Empty samples'; + return; + } + + let uri: string = ''; + + const audioOptions = new picker.AudioSaveOptions(); // audioOptions.newFileNames = ['o.wav']; + + const audioViewPicker = new picker.AudioViewPicker(); + + audioViewPicker.save(audioOptions).then((audioSelectResult: Array) => { + uri = audioSelectResult[0]; + savePcmToWav(uri, toInt16Samples(samples), this.sampleRate); + console.log(`Saved to ${uri}`); + this.infoHome+= `\nSaved to ${uri}`; + }); + }) + } + TextArea({text: this.infoHome}) + .height('100%') + .focusable(false) } }.tabBar(this.TabBuilder('Home', 0, $r('app.media.icon_home'), $r('app.media.icon_home'))) @@ -244,22 +357,25 @@ struct Index { }.width('100%') Row({space: 10}) { - Button(this.micBtnCaption) + Button(this.micBtnCaptionForAdding) + .enabled(this.micBtnEnabledForAdding) .onClick(()=> { if (this.mic) { - if (this.micStarted) { - this.micStarted = false; - this.micBtnCaption = 'Start recording'; + if (this.micStartedForAdding) { + this.micStartedForAdding = false; + this.micBtnEnabledForTesting = true; + this.micBtnCaptionForAdding = 'Start recording'; this.mic.stop(); this.infoAdd = ''; - if (this.sampleList.length > 0) { + if (this.sampleListForAdding.length > 0) { this.btnAddEnabled = true; this.btnSaveAudioEnabled = true; } } else { - this.micStarted = true; - this.micBtnCaption = 'Stop recording'; - this.sampleList = []; + this.micStartedForAdding = true; + this.micBtnEnabledForTesting = false; + this.micBtnCaptionForAdding = 'Stop recording'; + this.sampleListForAdding = []; this.mic.start(); this.infoAdd = ''; @@ -267,30 +383,41 @@ struct Index { this.btnSaveAudioEnabled = false; } } - }) Button('Add') .enabled(this.btnAddEnabled) .onClick(()=>{ if (this.inputSpeakerName.trim() == '') { - this.infoAdd += 'Please input a speaker name first'; + this.infoAdd += '\nPlease input a speaker name first'; return; } - const samples = flatten(this.sampleList); - console.log(`number of samples: ${samples.length}, ${samples.length / this.sampleRate}`); + const samples = flatten(this.sampleListForAdding); + const duration = samples.length / this.sampleRate; + if (duration < 0.5) { + this.infoAdd = `Please speak for a longer time. Current duration: ${duration}`; + return; + } + if (this.workerInstance) { + this.workerInstance.postMessage({ + msgType: 'manager-add-speaker', + name: this.inputSpeakerName, + samples: samples, + sampleRate: this.sampleRate, + }) + } }) Button('Save audio') .enabled(this.btnSaveAudioEnabled) .onClick(()=>{ - if (this.sampleList.length == 0) { + if (this.sampleListForAdding.length == 0) { this.btnSaveAudioEnabled = false; return; } - const samples = flatten(this.sampleList); + const samples = flatten(this.sampleListForAdding); if (samples.length == 0) { this.btnSaveAudioEnabled = false; @@ -352,6 +479,12 @@ https://k2-fsa.github.io/sherpa/social-groups.html samplesFloat[i] = view[i] / 32768.0; } - this.sampleList.push(samplesFloat); + if (this.micStartedForAdding) { + this.sampleListForAdding.push(samplesFloat); + } + + if (this.micStartedForTesting) { + this.sampleListForTesting.push(samplesFloat); + } } -} \ No newline at end of file +} diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/ets/workers/SpeakerIdentificationWorker.ets b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/ets/workers/SpeakerIdentificationWorker.ets index 9dd97d108..5b0679742 100644 --- a/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/ets/workers/SpeakerIdentificationWorker.ets +++ b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/ets/workers/SpeakerIdentificationWorker.ets @@ -1,12 +1,12 @@ -import worker, { ThreadWorkerGlobalScope, MessageEvents, ErrorEvent } from '@ohos.worker'; +import worker, { ErrorEvent, MessageEvents, ThreadWorkerGlobalScope } from '@ohos.worker'; import { + OnlineStream, readWaveFromBinary, Samples, SpeakerEmbeddingExtractor, SpeakerEmbeddingExtractorConfig, SpeakerEmbeddingManager } from 'sherpa_onnx'; -import { fileIo } from '@kit.CoreFileKit'; const workerPort: ThreadWorkerGlobalScope = worker.workerPort; @@ -19,7 +19,19 @@ function readWaveFromRawfile(filename: string, context: Context): Samples { } function initExtractor(context: Context): SpeakerEmbeddingExtractor { - const config = new SpeakerEmbeddingExtractorConfig(); + const config: SpeakerEmbeddingExtractorConfig = new SpeakerEmbeddingExtractorConfig(); + + // Please put the model file inside the directory + // harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/rawfile +/* +(py38) fangjuns-MacBook-Pro:rawfile fangjun$ pwd +/Users/fangjun/open-source/sherpa-onnx/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/rawfile +(py38) fangjuns-MacBook-Pro:rawfile fangjun$ ls -lh +total 77336 +-rw-r--r-- 1 fangjun staff 38M Dec 9 19:34 3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx + */ + // You can find more models at + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models config.model = '3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx'; config.numThreads = 2; config.debug = true; @@ -28,7 +40,7 @@ function initExtractor(context: Context): SpeakerEmbeddingExtractor { } function extractEmbedding(samples: Samples): Float32Array { - const stream = extractor.createStream(); + const stream: OnlineStream = extractor.createStream(); stream.acceptWaveform(samples); return extractor.compute(stream); } @@ -49,30 +61,6 @@ workerPort.onmessage = (e: MessageEvents) => { extractor = initExtractor(context); manager = new SpeakerEmbeddingManager(extractor.dim); - const filename1 = 'sr-data/enroll/fangjun-sr-1.wav'; - const samples1 = readWaveFromRawfile(filename1, context); - console.log(`sample rate: ${samples1.sampleRate}`); - let ok = manager.add({ name: 'fangjun0', v: extractEmbedding(samples1) }); - ok = manager.add({ name: 'fangjun1', v: extractEmbedding(samples1) }); - /* - ok = manager.add({ name: 'fangjun2', v: extractEmbedding(samples1) }); - ok = manager.add({ name: 'fangjun3', v: extractEmbedding(samples1) }); - ok = manager.add({ name: 'fangjun4', v: extractEmbedding(samples1) }); - ok = manager.add({ name: 'fangjun5', v: extractEmbedding(samples1) }); - ok = manager.add({ name: 'fangjun6', v: extractEmbedding(samples1) }); - ok = manager.add({ name: 'fangjun7', v: extractEmbedding(samples1) }); - ok = manager.add({ name: 'fangjun8', v: extractEmbedding(samples1) }); - ok = manager.add({ name: 'fangjun9', v: extractEmbedding(samples1) }); - ok = manager.add({ name: 'fangjun10', v: extractEmbedding(samples1) }); - */ - - if (ok) { - console.log(`Added fangjun`); - let n = manager.getNumSpeakers(); - console.log(`number of speakers: ${n}`); - console.log(`speaker names: ${manager.getAllSpeakerNames().join('\n')}`); - } - workerPort.postMessage({ msgType: 'manager-all-speaker-names', allSpeakers: manager.getAllSpeakerNames(), }); @@ -80,7 +68,7 @@ workerPort.onmessage = (e: MessageEvents) => { if (msgType == 'manager-delete-speaker') { const name = e.data['name'] as string; - const ok = manager.remove(name); + const ok: boolean = manager.remove(name); if (ok) { console.log(`Removed ${name}.`); @@ -92,6 +80,48 @@ workerPort.onmessage = (e: MessageEvents) => { }); } } + + if (msgType == 'manager-add-speaker') { + const name = e.data['name'] as string; + const samples = e.data['samples'] as Float32Array; + const sampleRate = e.data['sampleRate'] as number; + + const v = extractEmbedding({ samples, sampleRate }); + const ok: boolean = manager.add({ name, v }); + if (ok) { + workerPort.postMessage({ + msgType: 'manager-add-speaker-done', + status: `Added ${name}`, + ok, + }); + workerPort.postMessage({ + msgType: 'manager-all-speaker-names', allSpeakers: manager.getAllSpeakerNames(), + } + ); + } else { + workerPort.postMessage({ + msgType: 'manager-add-speaker-done', + status: `Failed to add ${name}. Possibly due to exsiting speaker name. Please recheck`, + ok, + }); + } + } + + if (msgType == 'manager-search-speaker') { + const threshold = e.data['threshold'] as number; + const samples = e.data['samples'] as Float32Array; + const sampleRate = e.data['sampleRate'] as number; + + const v = extractEmbedding({ samples, sampleRate }); + let name: string = manager.search({ threshold, v }); + if (name == '' || name == undefined) { + name = "======"; + } + workerPort.postMessage({ + msgType: 'manager-search-speaker-done', + name + }); + } } /** @@ -110,4 +140,4 @@ workerPort.onmessageerror = (e: MessageEvents) => { * @param e error message */ workerPort.onerror = (e: ErrorEvent) => { -} \ No newline at end of file +}