Skip to content

Commit

Permalink
First working version for decoding a file
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj committed Dec 10, 2024
1 parent 2cc6f10 commit baac5df
Show file tree
Hide file tree
Showing 8 changed files with 211 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,9 @@ export class OfflineSpeakerDiarizationConfig {
}

export class OfflineSpeakerDiarizationSegment {
public start: number = 0; // in secondspublic end: number = 0; // in secondspublic speaker: number =
0; // ID of the speaker; count from 0
public start: number = 0; // in seconds
public end: number = 0; // in seconds
public speaker: number = 0; // ID of the speaker; count from 0
}

export class OfflineSpeakerDiarization {
Expand Down Expand Up @@ -62,8 +63,8 @@ export class OfflineSpeakerDiarization {
* "speaker": an_integer,
* }
*/
process(samples: Float32Array): OfflineSpeakerDiarizationSegment {
return offlineSpeakerDiarizationProcess(this.handle, samples) as OfflineSpeakerDiarizationSegment;
process(samples: Float32Array): OfflineSpeakerDiarizationSegment[] {
return offlineSpeakerDiarizationProcess(this.handle, samples) as OfflineSpeakerDiarizationSegment[];
}

setConfig(config: OfflineSpeakerDiarizationConfig) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
{
"apiType": "stageMode",
"buildOption": {
"sourceOption": {
"workers": [
'./src/main/ets/workers/SpeakerDiarizationWorker.ets'
]
}
},
"buildOptionSet": [
{
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
import { LengthUnit } from '@kit.ArkUI';
import worker, { MessageEvents } from '@ohos.worker';
import { systemTime, BusinessError } from '@kit.BasicServicesKit';
import { picker } from '@kit.CoreFileKit';


@Entry
@Component
Expand All @@ -8,11 +12,17 @@ struct Index {
@State currentIndex: number = 0;
private controller: TabsController = new TabsController();

private workerInstance?: worker.ThreadWorker
private readonly scriptURL: string = 'entry/ets/workers/SpeakerDiarizationWorker.ets'

@State resultForFile: string = '';
@State resultForMic: string = '';

@State micBtnCaption: string = 'Start recording';
@State micSaveBtnCaption: string = 'Save';
@State micSaveBtnCaption: string = 'Save audio';

@State micBtnEnabled: boolean = false;
@State selectFileBtnEnabled: boolean = false;

@Builder
TabBuilder(title: string, targetIndex: number, selectedImg: Resource, normalImg: Resource) {
Expand All @@ -25,13 +35,79 @@ struct Index {
})
}

aboutToAppear(): void {
this.workerInstance = new worker.ThreadWorker(this.scriptURL, {
name: 'Streaming ASR worker'
});

this.workerInstance.onmessage = (e: MessageEvents) => {
const msgType = e.data['msgType'] as string;
console.log(`received msg from worker: ${msgType}`);

if (msgType == 'init-speaker-diarization-done') {
console.log('Speaker diarization initialized successfully');

this.resultForFile = 'Initialization finished.\nPlease select a .wav file.';
this.resultForMic = 'Initialization finished.\nPlease click the button Start recording.';

this.selectFileBtnEnabled = true;
this.micBtnEnabled = true;
}

if (msgType == 'speaker-diarization-file-done') {
const result = e.data['result'] as string;
this.resultForFile = result;

this.selectFileBtnEnabled = true;
}
};

const context = getContext();
this.workerInstance.postMessage({ msgType: 'init-speaker-diarization', context });
console.log('initializing');
this.resultForFile = 'Initializing models. Please wait';
this.resultForMic = this.resultForFile;
}

build() {
Column() {
Tabs({ barPosition: BarPosition.End, controller: this.controller }) {
TabContent() {
Column({space: 10}) {
Text(this.title).fontSize(this.titleFontSize).fontWeight(FontWeight.Bold);
Button('Select .wav file (16kHz) ')
.enabled(this.selectFileBtnEnabled)
.onClick(()=>{
this.resultForFile = '';
this.selectFileBtnEnabled = false;

const documentSelectOptions = new picker.DocumentSelectOptions();
documentSelectOptions.maxSelectNumber = 1;
documentSelectOptions.fileSuffixFilters = ['.wav'];
const documentViewPicker = new picker.DocumentViewPicker();

documentViewPicker.select(documentSelectOptions).then((result: Array<string>) => {
console.log(`select file result: ${result}`);

if (!result[0]) {
this.resultForFile = 'Please select a file to decode';
this.selectFileBtnEnabled = true;
return;
}

if (this.workerInstance) {
this.workerInstance.postMessage({
msgType: 'speaker-diarization-file', filename: result[0],
});
this.resultForFile = `Decoding ${result[0]} ... ...`;
} else {
console.log(`this worker instance is undefined ${this.workerInstance}`);
}
}).catch((err: BusinessError) => {
console.error(`Failed to select file, code is ${err.code}, message is ${err.message}`);
this.selectFileBtnEnabled = true;
})
})

TextArea({ text: this.resultForFile })
.lineSpacing({ value: 10, unit: LengthUnit.VP })
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import worker, { ThreadWorkerGlobalScope, MessageEvents, ErrorEvent } from '@ohos.worker';
import { OfflineSpeakerDiarization, OfflineSpeakerDiarizationConfig,
OfflineSpeakerDiarizationSegment,
readWaveFromBinary, Samples } from 'sherpa_onnx';
import { fileIo } from '@kit.CoreFileKit';

const workerPort: ThreadWorkerGlobalScope = worker.workerPort;

let sd: OfflineSpeakerDiarization;

function readWave(filename: string): Samples {
const fp = fileIo.openSync(filename);
const stat = fileIo.statSync(fp.fd);
const arrayBuffer = new ArrayBuffer(stat.size);
fileIo.readSync(fp.fd, arrayBuffer);
const data: Uint8Array = new Uint8Array(arrayBuffer);
return readWaveFromBinary(data) as Samples;
}

function initOfflineSpeakerDiarization(context: Context): OfflineSpeakerDiarization {
const config: OfflineSpeakerDiarizationConfig = new OfflineSpeakerDiarizationConfig();

config.segmentation.pyannote.model = 'sherpa-onnx-pyannote-segmentation-3-0/model.int8.onnx';
config.segmentation.numThreads = 2;
config.segmentation.debug = true;

config.embedding.model = '3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx';
config.embedding.numThreads = 2;
config.embedding.debug = true;

config.minDurationOn = 0.2;
config.minDurationOff = 0.5;
return new OfflineSpeakerDiarization(config, context.resourceManager);
}

/**
* Defines the event handler to be called when the worker thread receives a message sent by the host thread.
* The event handler is executed in the worker thread.
*
* @param e message data
*/
workerPort.onmessage = (e: MessageEvents) => {
const msgType = e.data['msgType'] as string;

console.log(`from the main thread, msg-type: ${msgType}`);
if (msgType == 'init-speaker-diarization' && !sd) {
const context: Context = e.data['context'] as Context;
sd = initOfflineSpeakerDiarization(context);
workerPort.postMessage({msgType: 'init-speaker-diarization-done'});
console.log('Init sd done');
}

if (msgType == 'speaker-diarization-file') {
const filename = e.data['filename'] as string;
const wave = readWave(filename);
let result = '';
if (wave == undefined || wave == null) {
result = `Failed to read ${filename}`;
} else if (wave.sampleRate != sd.sampleRate) {
result = `Expected sample rate: ${sd.sampleRate}`;
result += '\n';
result += `Sample rate in file ${filename} is ${wave.sampleRate}`;
} else {
const duration = wave.samples.length / wave.sampleRate;
console.log(`Processing ${filename} of ${duration} seconds`);

const r: OfflineSpeakerDiarizationSegment[] = sd.process(wave.samples)
console.log(`r is ${r.length}, ${r}`);
for (const s of r) {
const start: string = s.start.toFixed(3);
const end: string = s.end.toFixed(3);
result += `${start}\t--\t${end}\tspeaker_${s.speaker}\n`;
console.log(`result: ${result}`);
}

if (r.length == 0) {
result = 'The result is empty';
}
}

workerPort.postMessage({
msgType: 'speaker-diarization-file-done',
result
});
}
}
/**
* Defines the event handler to be called when the worker receives a message that cannot be deserialized.
* The event handler is executed in the worker thread.
*
* @param e message data
*/
workerPort.onmessageerror = (e: MessageEvents) => {
}

/**
* Defines the event handler to be called when an exception occurs during worker execution.
* The event handler is executed in the worker thread.
*
* @param e error message
*/
workerPort.onerror = (e: ErrorEvent) => {
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,19 @@
"string": [
{
"name": "module_desc",
"value": "module description"
"value": "On-device speaker diarization with Next-gen Kaldi"
},
{
"name": "EntryAbility_desc",
"value": "description"
"value": "On-device speaker diarization with Next-gen Kaldi"
},
{
"name": "EntryAbility_label",
"value": "label"
"value": "Speaker diarization"
},
{
"name": "mic_reason",
"value": "access the microphone for on-device speaker diarizatiaon with Next-gen Kaldi"
}
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,19 @@
"string": [
{
"name": "module_desc",
"value": "module description"
"value": "On-device speaker diarization with Next-gen Kaldi"
},
{
"name": "EntryAbility_desc",
"value": "description"
"value": "On-device speaker diarization with Next-gen Kaldi"
},
{
"name": "EntryAbility_label",
"value": "label"
"value": "Speaker diarization"
},
{
"name": "mic_reason",
"value": "access the microphone for on-device speaker diarizatiaon with Next-gen Kaldi"
}
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,19 @@
"string": [
{
"name": "module_desc",
"value": "模块描述"
"value": "新一代Kaldi: 本地说话人日志"
},
{
"name": "EntryAbility_desc",
"value": "description"
"value": "新一代Kaldi: 本地说话人日志"
},
{
"name": "EntryAbility_label",
"value": "label"
"value": "说话人日志"
},
{
"name": "mic_reason",
"value": "使用新一代Kaldi, 访问麦克风进行本地说话人日志 (不需要联网)"
}
]
}
5 changes: 0 additions & 5 deletions sherpa-onnx/c-api/c-api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2053,11 +2053,6 @@ SherpaOnnxCreateOfflineSpeakerDiarizationOHOS(

auto sd_config = GetOfflineSpeakerDiarizationConfig(config);

if (!sd_config.Validate()) {
SHERPA_ONNX_LOGE("Errors in config");
return nullptr;
}

SherpaOnnxOfflineSpeakerDiarization *sd =
new SherpaOnnxOfflineSpeakerDiarization;

Expand Down

0 comments on commit baac5df

Please sign in to comment.