diff --git a/harmony-os/.gitignore b/harmony-os/.gitignore index ddb010f66..dd2f4066e 100644 --- a/harmony-os/.gitignore +++ b/harmony-os/.gitignore @@ -1 +1,2 @@ !build-profile.json5 +*.har diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/Index.ets b/harmony-os/SherpaOnnxHar/sherpa_onnx/Index.ets index 959b6ba02..14dff071e 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/Index.ets +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/Index.ets @@ -1,4 +1,8 @@ -export { readWave, readWaveFromBinary } from "libsherpa_onnx.so"; +export { + listRawfileDir, + readWave, + readWaveFromBinary, +} from "libsherpa_onnx.so"; export { CircularBuffer, diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/build-profile.json5 b/harmony-os/SherpaOnnxHar/sherpa_onnx/build-profile.json5 index 8f789fb2a..905c57127 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/build-profile.json5 +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/build-profile.json5 @@ -4,7 +4,7 @@ "externalNativeOptions": { "path": "./src/main/cpp/CMakeLists.txt", "arguments": "", - "cppFlags": "", + "cppFlags": "-std=c++17", "abiFilters": [ "arm64-v8a", "x86_64", diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/CMakeLists.txt b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/CMakeLists.txt index e131b21da..26dda1789 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/CMakeLists.txt +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/CMakeLists.txt @@ -2,6 +2,10 @@ cmake_minimum_required(VERSION 3.13.0) project(myNpmLib) +if (NOT CMAKE_CXX_STANDARD) + set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ version to use") +endif() + # Disable warning about # # "The DOWNLOAD_EXTRACT_TIMESTAMP option was not given and policy CMP0135 is @@ -46,6 +50,7 @@ add_library(sherpa_onnx SHARED speaker-identification.cc spoken-language-identification.cc streaming-asr.cc + utils.cc vad.cc wave-reader.cc wave-writer.cc diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-tts.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-tts.cc index da70e662c..67f348e9b 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-tts.cc +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-tts.cc @@ -213,12 +213,13 @@ static Napi::Number OfflineTtsNumSpeakersWrapper( return Napi::Number::New(env, num_speakers); } +// synchronous version static Napi::Object OfflineTtsGenerateWrapper(const Napi::CallbackInfo &info) { Napi::Env env = info.Env(); if (info.Length() != 2) { std::ostringstream os; - os << "Expect only 1 argument. Given: " << info.Length(); + os << "Expect only 2 arguments. Given: " << info.Length(); Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); @@ -298,8 +299,8 @@ static Napi::Object OfflineTtsGenerateWrapper(const Napi::CallbackInfo &info) { int32_t sid = obj.Get("sid").As().Int32Value(); float speed = obj.Get("speed").As().FloatValue(); - const SherpaOnnxGeneratedAudio *audio = - SherpaOnnxOfflineTtsGenerate(tts, text.c_str(), sid, speed); + const SherpaOnnxGeneratedAudio *audio; + audio = SherpaOnnxOfflineTtsGenerate(tts, text.c_str(), sid, speed); if (enable_external_buffer) { Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New( @@ -334,6 +335,256 @@ static Napi::Object OfflineTtsGenerateWrapper(const Napi::CallbackInfo &info) { } } +struct TtsCallbackData { + std::vector samples; + float progress; + bool processed = false; + bool cancelled = false; +}; + +// see +// https://github.com/nodejs/node-addon-examples/blob/main/src/6-threadsafe-function/typed_threadsafe_function/node-addon-api/clock.cc +void InvokeJsCallback(Napi::Env env, Napi::Function callback, + Napi::Reference *context, + TtsCallbackData *data) { + if (env != nullptr) { + if (callback != nullptr) { + Napi::ArrayBuffer arrayBuffer = + Napi::ArrayBuffer::New(env, sizeof(float) * data->samples.size()); + + Napi::Float32Array float32Array = + Napi::Float32Array::New(env, data->samples.size(), arrayBuffer, 0); + + std::copy(data->samples.begin(), data->samples.end(), + float32Array.Data()); + + Napi::Object arg = Napi::Object::New(env); + arg.Set(Napi::String::New(env, "samples"), float32Array); + arg.Set(Napi::String::New(env, "progress"), data->progress); + + auto v = callback.Call(context->Value(), {arg}); + data->processed = true; + if (v.IsNumber() && v.As().Int32Value()) { + data->cancelled = false; + } else { + data->cancelled = true; + } + } + } +} + +using TSFN = Napi::TypedThreadSafeFunction, + TtsCallbackData, InvokeJsCallback>; + +class TtsGenerateWorker : public Napi::AsyncWorker { + public: + TtsGenerateWorker(const Napi::Env &env, TSFN tsfn, SherpaOnnxOfflineTts *tts, + const std::string &text, float speed, int32_t sid, + bool use_external_buffer) + : tsfn_(tsfn), + Napi::AsyncWorker{env, "TtsGenerateWorker"}, + deferred_(env), + tts_(tts), + text_(text), + speed_(speed), + sid_(sid), + use_external_buffer_(use_external_buffer) {} + + Napi::Promise Promise() { return deferred_.Promise(); } + + ~TtsGenerateWorker() { + for (auto d : data_list_) { + delete d; + } + } + + protected: + void Execute() override { + auto callback = [](const float *samples, int32_t n, float progress, + void *arg) -> int32_t { + TtsGenerateWorker *_this = reinterpret_cast(arg); + + for (auto d : _this->data_list_) { + if (d->cancelled) { + OH_LOG_INFO(LOG_APP, "TtsGenerate is cancelled"); + return 0; + } + } + + auto data = new TtsCallbackData; + data->samples = std::vector{samples, samples + n}; + data->progress = progress; + _this->data_list_.push_back(data); + + _this->tsfn_.NonBlockingCall(data); + + return 1; + }; + audio_ = SherpaOnnxOfflineTtsGenerateWithProgressCallbackWithArg( + tts_, text_.c_str(), sid_, speed_, callback, this); + + tsfn_.Release(); + } + + void OnOK() override { + Napi::Env env = deferred_.Env(); + Napi::Object ans = Napi::Object::New(env); + if (use_external_buffer_) { + Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New( + env, const_cast(audio_->samples), sizeof(float) * audio_->n, + [](Napi::Env /*env*/, void * /*data*/, + const SherpaOnnxGeneratedAudio *hint) { + SherpaOnnxDestroyOfflineTtsGeneratedAudio(hint); + }, + audio_); + Napi::Float32Array float32Array = + Napi::Float32Array::New(env, audio_->n, arrayBuffer, 0); + + ans.Set(Napi::String::New(env, "samples"), float32Array); + ans.Set(Napi::String::New(env, "sampleRate"), audio_->sample_rate); + } else { + // don't use external buffer + Napi::ArrayBuffer arrayBuffer = + Napi::ArrayBuffer::New(env, sizeof(float) * audio_->n); + + Napi::Float32Array float32Array = + Napi::Float32Array::New(env, audio_->n, arrayBuffer, 0); + + std::copy(audio_->samples, audio_->samples + audio_->n, + float32Array.Data()); + + ans.Set(Napi::String::New(env, "samples"), float32Array); + ans.Set(Napi::String::New(env, "sampleRate"), audio_->sample_rate); + SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio_); + } + + deferred_.Resolve(ans); + } + + private: + TSFN tsfn_; + Napi::Promise::Deferred deferred_; + SherpaOnnxOfflineTts *tts_; + std::string text_; + float speed_; + int32_t sid_; + bool use_external_buffer_; + + const SherpaOnnxGeneratedAudio *audio_; + + std::vector data_list_; +}; + +static Napi::Object OfflineTtsGenerateAsyncWrapper( + const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + + if (info.Length() != 2) { + std::ostringstream os; + os << "Expect only 2 arguments. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[0].IsExternal()) { + Napi::TypeError::New(env, "Argument 0 should be an offline tts pointer.") + .ThrowAsJavaScriptException(); + + return {}; + } + + SherpaOnnxOfflineTts *tts = + info[0].As>().Data(); + + if (!info[1].IsObject()) { + Napi::TypeError::New(env, "Argument 1 should be an object") + .ThrowAsJavaScriptException(); + + return {}; + } + + Napi::Object obj = info[1].As(); + + if (!obj.Has("text")) { + Napi::TypeError::New(env, "The argument object should have a field text") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!obj.Get("text").IsString()) { + Napi::TypeError::New(env, "The object['text'] should be a string") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!obj.Has("sid")) { + Napi::TypeError::New(env, "The argument object should have a field sid") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!obj.Get("sid").IsNumber()) { + Napi::TypeError::New(env, "The object['sid'] should be a number") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!obj.Has("speed")) { + Napi::TypeError::New(env, "The argument object should have a field speed") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!obj.Get("speed").IsNumber()) { + Napi::TypeError::New(env, "The object['speed'] should be a number") + .ThrowAsJavaScriptException(); + + return {}; + } + + bool enable_external_buffer = true; + if (obj.Has("enableExternalBuffer") && + obj.Get("enableExternalBuffer").IsBoolean()) { + enable_external_buffer = + obj.Get("enableExternalBuffer").As().Value(); + } + + Napi::String _text = obj.Get("text").As(); + std::string text = _text.Utf8Value(); + int32_t sid = obj.Get("sid").As().Int32Value(); + float speed = obj.Get("speed").As().FloatValue(); + + Napi::Function cb; + if (obj.Has("callback") && obj.Get("callback").IsFunction()) { + cb = obj.Get("callback").As(); + } + + auto context = + new Napi::Reference(Napi::Persistent(info.This())); + + TSFN tsfn = TSFN::New( + env, + cb, // JavaScript function called asynchronously + "TtsGenerateFunc", // Name + 0, // Unlimited queue + 1, // Only one thread will use this initially + context, + [](Napi::Env, void *, Napi::Reference *ctx) { delete ctx; }); + + const SherpaOnnxGeneratedAudio *audio; + TtsGenerateWorker *worker = new TtsGenerateWorker( + env, tsfn, tts, text, speed, sid, enable_external_buffer); + worker->Queue(); + return worker->Promise(); +} + void InitNonStreamingTts(Napi::Env env, Napi::Object exports) { exports.Set(Napi::String::New(env, "createOfflineTts"), Napi::Function::New(env, CreateOfflineTtsWrapper)); @@ -346,4 +597,7 @@ void InitNonStreamingTts(Napi::Env env, Napi::Object exports) { exports.Set(Napi::String::New(env, "offlineTtsGenerate"), Napi::Function::New(env, OfflineTtsGenerateWrapper)); + + exports.Set(Napi::String::New(env, "offlineTtsGenerateAsync"), + Napi::Function::New(env, OfflineTtsGenerateAsyncWrapper)); } diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/sherpa-onnx-node-addon-api.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/sherpa-onnx-node-addon-api.cc index 3f0affd79..54f0350fe 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/sherpa-onnx-node-addon-api.cc +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/sherpa-onnx-node-addon-api.cc @@ -27,6 +27,10 @@ void InitKeywordSpotting(Napi::Env env, Napi::Object exports); void InitNonStreamingSpeakerDiarization(Napi::Env env, Napi::Object exports); +#if __OHOS__ +void InitUtils(Napi::Env env, Napi::Object exports); +#endif + Napi::Object Init(Napi::Env env, Napi::Object exports) { InitStreamingAsr(env, exports); InitNonStreamingAsr(env, exports); @@ -41,7 +45,15 @@ Napi::Object Init(Napi::Env env, Napi::Object exports) { InitKeywordSpotting(env, exports); InitNonStreamingSpeakerDiarization(env, exports); +#if __OHOS__ + InitUtils(env, exports); +#endif + return exports; } +#if __OHOS__ +NODE_API_MODULE(sherpa_onnx, Init) +#else NODE_API_MODULE(addon, Init) +#endif diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/types/libsherpa_onnx/Index.d.ts b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/types/libsherpa_onnx/Index.d.ts index f44ade356..057d5af25 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/types/libsherpa_onnx/Index.d.ts +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/types/libsherpa_onnx/Index.d.ts @@ -1,3 +1,5 @@ +export const listRawfileDir: (mgr: object, dir: string) => Array; + export const readWave: (filename: string, enableExternalBuffer: boolean = true) => {samples: Float32Array, sampleRate: number}; export const readWaveFromBinary: (data: Uint8Array, enableExternalBuffer: boolean = true) => {samples: Float32Array, sampleRate: number}; export const createCircularBuffer: (capacity: number) => object; @@ -37,4 +39,11 @@ export const getOnlineStreamResultAsJson: (handle: object, streamHandle: object) export const createOfflineTts: (config: object, mgr?: object) => object; export const getOfflineTtsNumSpeakers: (handle: object) => number; export const getOfflineTtsSampleRate: (handle: object) => number; -export const offlineTtsGenerate: (handle: object, input: object) => object; + +export type TtsOutput = { + samples: Float32Array; + sampleRate: number; +}; + +export const offlineTtsGenerate: (handle: object, input: object) => TtsOutput; +export const offlineTtsGenerateAsync: (handle: object, input: object) => Promise; diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/utils.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/utils.cc new file mode 100644 index 000000000..33b8f2a29 --- /dev/null +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/utils.cc @@ -0,0 +1,76 @@ +// Copyright (c) 2024 Xiaomi Corporation + +#include +#include +#include +#include + +#include "macros.h" // NOLINT +#include "napi.h" // NOLINT + +static std::vector GetFilenames(NativeResourceManager *mgr, + const std::string &d) { + std::unique_ptr raw_dir( + OH_ResourceManager_OpenRawDir(mgr, d.c_str()), + &OH_ResourceManager_CloseRawDir); + int count = OH_ResourceManager_GetRawFileCount(raw_dir.get()); + std::vector ans; + ans.reserve(count); + for (int32_t i = 0; i < count; ++i) { + std::string filename = OH_ResourceManager_GetRawFileName(raw_dir.get(), i); + bool is_dir = OH_ResourceManager_IsRawDir( + mgr, d.empty() ? filename.c_str() : (d + "/" + filename).c_str()); + if (is_dir) { + auto files = GetFilenames(mgr, d.empty() ? filename : d + "/" + filename); + for (auto &f : files) { + ans.push_back(std::move(f)); + } + } else { + if (d.empty()) { + ans.push_back(std::move(filename)); + } else { + ans.push_back(d + "/" + filename); + } + } + } + + return ans; +} + +static Napi::Array ListRawFileDir(const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + + if (info.Length() != 2) { + std::ostringstream os; + os << "Expect only 2 arguments. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + + std::unique_ptr + mgr(OH_ResourceManager_InitNativeResourceManager(env, info[0]), + &OH_ResourceManager_ReleaseNativeResourceManager); + + if (!info[1].IsString()) { + Napi::TypeError::New(env, "Argument 1 should be a string") + .ThrowAsJavaScriptException(); + + return {}; + } + + std::string dir = info[1].As().Utf8Value(); + + auto files = GetFilenames(mgr.get(), dir); + Napi::Array ans = Napi::Array::New(env, files.size()); + for (int32_t i = 0; i != files.size(); ++i) { + ans[i] = Napi::String::New(env, files[i]); + } + return ans; +} +void InitUtils(Napi::Env env, Napi::Object exports) { + exports.Set(Napi::String::New(env, "listRawfileDir"), + Napi::Function::New(env, ListRawFileDir)); +} diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingTts.ets b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingTts.ets index c568b9990..a60a0e748 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingTts.ets +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingTts.ets @@ -3,6 +3,7 @@ import { getOfflineTtsNumSpeakers, getOfflineTtsSampleRate, offlineTtsGenerate, + offlineTtsGenerateAsync, } from "libsherpa_onnx.so"; export class OfflineTtsVitsModelConfig { @@ -16,14 +17,14 @@ export class OfflineTtsVitsModelConfig { public lengthScale: number = 1.0; } -export class OfflineTtsModelConfig{ +export class OfflineTtsModelConfig { public vits: OfflineTtsVitsModelConfig = new OfflineTtsVitsModelConfig(); public numThreads: number = 1; public debug: boolean = false; public provider: string = 'cpu'; } -export class OfflineTtsConfig{ +export class OfflineTtsConfig { public model: OfflineTtsModelConfig = new OfflineTtsModelConfig(); public ruleFsts: string = ''; public ruleFars: string = ''; @@ -35,17 +36,24 @@ export class TtsOutput { public sampleRate: number = 0; } +interface TtsCallbackData { + samples: Float32Array; + progress: number; +} + export class TtsInput { public text: string = ''; public sid: number = 0; public speed: number = 1.0; + public callback?: (data: TtsCallbackData) => number; } export class OfflineTts { - private handle: object; public config: OfflineTtsConfig; public numSpeakers: number; public sampleRate: number; + private handle: object; + constructor(config: OfflineTtsConfig, mgr?: object) { this.handle = createOfflineTts(config, mgr); this.config = config; @@ -63,4 +71,8 @@ export class OfflineTts { generate(input: TtsInput): TtsOutput { return offlineTtsGenerate(this.handle, input) as TtsOutput; } + + generateAsync(input: TtsInput): Promise { + return offlineTtsGenerateAsync(this.handle, input); + } } \ No newline at end of file diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/Vad.ets b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/Vad.ets index 155eac680..8f1bf18d6 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/Vad.ets +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/Vad.ets @@ -57,7 +57,6 @@ export class CircularBuffer { // samples is a float32 array push(samples: Float32Array) { - console.log(`here samples: ${samples}`); circularBufferPush(this.handle, samples); } diff --git a/harmony-os/SherpaOnnxTts/.gitignore b/harmony-os/SherpaOnnxTts/.gitignore new file mode 100644 index 000000000..d2ff20141 --- /dev/null +++ b/harmony-os/SherpaOnnxTts/.gitignore @@ -0,0 +1,12 @@ +/node_modules +/oh_modules +/local.properties +/.idea +**/build +/.hvigor +.cxx +/.clangd +/.clang-format +/.clang-tidy +**/.test +/.appanalyzer \ No newline at end of file diff --git a/harmony-os/SherpaOnnxTts/AppScope/app.json5 b/harmony-os/SherpaOnnxTts/AppScope/app.json5 new file mode 100644 index 000000000..e5d0228ac --- /dev/null +++ b/harmony-os/SherpaOnnxTts/AppScope/app.json5 @@ -0,0 +1,10 @@ +{ + "app": { + "bundleName": "com.k2fsa.sherpa.onnx.tts", + "vendor": "next-gen Kaldi", + "versionCode": 1000000, + "versionName": "1.0.0", + "icon": "$media:app_icon", + "label": "$string:app_name" + } +} diff --git a/harmony-os/SherpaOnnxTts/AppScope/resources/base/element/string.json b/harmony-os/SherpaOnnxTts/AppScope/resources/base/element/string.json new file mode 100644 index 000000000..2db317614 --- /dev/null +++ b/harmony-os/SherpaOnnxTts/AppScope/resources/base/element/string.json @@ -0,0 +1,8 @@ +{ + "string": [ + { + "name": "app_name", + "value": "SherpaOnnxTts" + } + ] +} diff --git a/harmony-os/SherpaOnnxTts/AppScope/resources/base/media/app_icon.png b/harmony-os/SherpaOnnxTts/AppScope/resources/base/media/app_icon.png new file mode 100644 index 000000000..a39445dc8 Binary files /dev/null and b/harmony-os/SherpaOnnxTts/AppScope/resources/base/media/app_icon.png differ diff --git a/harmony-os/SherpaOnnxTts/build-profile.json5 b/harmony-os/SherpaOnnxTts/build-profile.json5 new file mode 100644 index 000000000..8e63d9768 --- /dev/null +++ b/harmony-os/SherpaOnnxTts/build-profile.json5 @@ -0,0 +1,40 @@ +{ + "app": { + "signingConfigs": [], + "products": [ + { + "name": "default", + "signingConfig": "default", + "compatibleSdkVersion": "4.0.0(10)", + "runtimeOS": "HarmonyOS", + "buildOption": { + "strictMode": { + "caseSensitiveCheck": true, + } + } + } + ], + "buildModeSet": [ + { + "name": "debug", + }, + { + "name": "release" + } + ] + }, + "modules": [ + { + "name": "entry", + "srcPath": "./entry", + "targets": [ + { + "name": "default", + "applyToProducts": [ + "default" + ] + } + ] + } + ] +} \ No newline at end of file diff --git a/harmony-os/SherpaOnnxTts/code-linter.json5 b/harmony-os/SherpaOnnxTts/code-linter.json5 new file mode 100644 index 000000000..77b31b517 --- /dev/null +++ b/harmony-os/SherpaOnnxTts/code-linter.json5 @@ -0,0 +1,20 @@ +{ + "files": [ + "**/*.ets" + ], + "ignore": [ + "**/src/ohosTest/**/*", + "**/src/test/**/*", + "**/src/mock/**/*", + "**/node_modules/**/*", + "**/oh_modules/**/*", + "**/build/**/*", + "**/.preview/**/*" + ], + "ruleSet": [ + "plugin:@performance/recommended", + "plugin:@typescript-eslint/recommended" + ], + "rules": { + } +} \ No newline at end of file diff --git a/harmony-os/SherpaOnnxTts/entry/.gitignore b/harmony-os/SherpaOnnxTts/entry/.gitignore new file mode 100644 index 000000000..e2713a277 --- /dev/null +++ b/harmony-os/SherpaOnnxTts/entry/.gitignore @@ -0,0 +1,6 @@ +/node_modules +/oh_modules +/.preview +/build +/.cxx +/.test \ No newline at end of file diff --git a/harmony-os/SherpaOnnxTts/entry/build-profile.json5 b/harmony-os/SherpaOnnxTts/entry/build-profile.json5 new file mode 100644 index 000000000..554d19f3b --- /dev/null +++ b/harmony-os/SherpaOnnxTts/entry/build-profile.json5 @@ -0,0 +1,33 @@ +{ + "apiType": "stageMode", + "buildOption": { + "sourceOption": { + "workers": [ + "./src/main/ets/workers/NonStreamingTtsWorker.ets" + ] + } + }, + "buildOptionSet": [ + { + "name": "release", + "arkOptions": { + "obfuscation": { + "ruleOptions": { + "enable": false, + "files": [ + "./obfuscation-rules.txt" + ] + } + } + } + }, + ], + "targets": [ + { + "name": "default" + }, + { + "name": "ohosTest", + } + ] +} \ No newline at end of file diff --git a/harmony-os/SherpaOnnxTts/entry/hvigorfile.ts b/harmony-os/SherpaOnnxTts/entry/hvigorfile.ts new file mode 100644 index 000000000..c6edcd904 --- /dev/null +++ b/harmony-os/SherpaOnnxTts/entry/hvigorfile.ts @@ -0,0 +1,6 @@ +import { hapTasks } from '@ohos/hvigor-ohos-plugin'; + +export default { + system: hapTasks, /* Built-in plugin of Hvigor. It cannot be modified. */ + plugins:[] /* Custom plugin to extend the functionality of Hvigor. */ +} diff --git a/harmony-os/SherpaOnnxTts/entry/obfuscation-rules.txt b/harmony-os/SherpaOnnxTts/entry/obfuscation-rules.txt new file mode 100644 index 000000000..272efb6ca --- /dev/null +++ b/harmony-os/SherpaOnnxTts/entry/obfuscation-rules.txt @@ -0,0 +1,23 @@ +# Define project specific obfuscation rules here. +# You can include the obfuscation configuration files in the current module's build-profile.json5. +# +# For more details, see +# https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/source-obfuscation-V5 + +# Obfuscation options: +# -disable-obfuscation: disable all obfuscations +# -enable-property-obfuscation: obfuscate the property names +# -enable-toplevel-obfuscation: obfuscate the names in the global scope +# -compact: remove unnecessary blank spaces and all line feeds +# -remove-log: remove all console.* statements +# -print-namecache: print the name cache that contains the mapping from the old names to new names +# -apply-namecache: reuse the given cache file + +# Keep options: +# -keep-property-name: specifies property names that you want to keep +# -keep-global-name: specifies names that you want to keep in the global scope + +-enable-property-obfuscation +-enable-toplevel-obfuscation +-enable-filename-obfuscation +-enable-export-obfuscation \ No newline at end of file diff --git a/harmony-os/SherpaOnnxTts/entry/oh-package-lock.json5 b/harmony-os/SherpaOnnxTts/entry/oh-package-lock.json5 new file mode 100644 index 000000000..debb8e01e --- /dev/null +++ b/harmony-os/SherpaOnnxTts/entry/oh-package-lock.json5 @@ -0,0 +1,29 @@ +{ + "meta": { + "stableOrder": true + }, + "lockfileVersion": 3, + "ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.", + "specifiers": { + "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx", + "sherpa_onnx@1.10.32": "sherpa_onnx@1.10.32" + }, + "packages": { + "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": { + "name": "libsherpa_onnx.so", + "version": "1.0.0", + "resolved": "../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx", + "registryType": "local" + }, + "sherpa_onnx@1.10.32": { + "name": "sherpa_onnx", + "version": "1.10.32", + "integrity": "sha512-yHYmWoeqhrunOqGr9gxPJJH/8+rdwcKFOW6onYByVObQVpbqypslg301IjGm9xpnc5bJEkO3S9sra2zQTpPA/w==", + "resolved": "https://ohpm.openharmony.cn/ohpm/sherpa_onnx/-/sherpa_onnx-1.10.32.har", + "registryType": "ohpm", + "dependencies": { + "libsherpa_onnx.so": "file:./src/main/cpp/types/libsherpa_onnx" + } + } + } +} \ No newline at end of file diff --git a/harmony-os/SherpaOnnxTts/entry/oh-package.json5 b/harmony-os/SherpaOnnxTts/entry/oh-package.json5 new file mode 100644 index 000000000..daff21b30 --- /dev/null +++ b/harmony-os/SherpaOnnxTts/entry/oh-package.json5 @@ -0,0 +1,12 @@ +{ + "name": "entry", + "version": "1.0.0", + "description": "Please describe the basic information.", + "main": "", + "author": "", + "license": "", + "dependencies": { + "sherpa_onnx": "1.10.32", + } +} + diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/ets/entryability/EntryAbility.ets b/harmony-os/SherpaOnnxTts/entry/src/main/ets/entryability/EntryAbility.ets new file mode 100644 index 000000000..679d91453 --- /dev/null +++ b/harmony-os/SherpaOnnxTts/entry/src/main/ets/entryability/EntryAbility.ets @@ -0,0 +1,43 @@ +import AbilityConstant from '@ohos.app.ability.AbilityConstant'; +import hilog from '@ohos.hilog'; +import UIAbility from '@ohos.app.ability.UIAbility'; +import Want from '@ohos.app.ability.Want'; +import window from '@ohos.window'; + +export default class EntryAbility extends UIAbility { + onCreate(want: Want, launchParam: AbilityConstant.LaunchParam): void { + hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onCreate'); + } + + onDestroy(): void { + hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onDestroy'); + } + + onWindowStageCreate(windowStage: window.WindowStage): void { + // Main window is created, set main page for this ability + hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageCreate'); + + windowStage.loadContent('pages/Index', (err) => { + if (err.code) { + hilog.error(0x0000, 'testTag', 'Failed to load the content. Cause: %{public}s', JSON.stringify(err) ?? ''); + return; + } + hilog.info(0x0000, 'testTag', 'Succeeded in loading the content.'); + }); + } + + onWindowStageDestroy(): void { + // Main window is destroyed, release UI related resources + hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageDestroy'); + } + + onForeground(): void { + // Ability has brought to foreground + hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onForeground'); + } + + onBackground(): void { + // Ability has back to background + hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onBackground'); + } +} diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/ets/entrybackupability/EntryBackupAbility.ets b/harmony-os/SherpaOnnxTts/entry/src/main/ets/entrybackupability/EntryBackupAbility.ets new file mode 100644 index 000000000..d2c48b421 --- /dev/null +++ b/harmony-os/SherpaOnnxTts/entry/src/main/ets/entrybackupability/EntryBackupAbility.ets @@ -0,0 +1,12 @@ +import hilog from '@ohos.hilog'; +import BackupExtensionAbility, { BundleVersion } from '@ohos.application.BackupExtensionAbility'; + +export default class EntryBackupAbility extends BackupExtensionAbility { + async onBackup() { + hilog.info(0x0000, 'testTag', 'onBackup ok'); + } + + async onRestore(bundleVersion: BundleVersion) { + hilog.info(0x0000, 'testTag', 'onRestore ok %{public}s', JSON.stringify(bundleVersion)); + } +} \ No newline at end of file diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/ets/pages/Index.ets b/harmony-os/SherpaOnnxTts/entry/src/main/ets/pages/Index.ets new file mode 100644 index 000000000..45927b772 --- /dev/null +++ b/harmony-os/SherpaOnnxTts/entry/src/main/ets/pages/Index.ets @@ -0,0 +1,409 @@ +import { CircularBuffer } from 'sherpa_onnx'; +import worker, { MessageEvents } from '@ohos.worker'; +import { audio } from '@kit.AudioKit'; +import picker from '@ohos.file.picker'; +import fs from '@ohos.file.fs'; +import systemTime from '@ohos.systemTime'; + + +function savePcmToWav(filename: string, samples: Int16Array, sampleRate: number) { + const fp = fs.openSync(filename, fs.OpenMode.READ_WRITE | fs.OpenMode.CREATE); + + const header = new ArrayBuffer(44); + const view = new DataView(header); + + // http://soundfile.sapp.org/doc/WaveFormat/ + // F F I R + view.setUint32(0, 0x46464952, true); // chunkID + view.setUint32(4, 36 + samples.length * 2, true); // chunkSize // E V A W + view.setUint32(8, 0x45564157, true); // format // // t m f + view.setUint32(12, 0x20746d66, true); // subchunk1ID + view.setUint32(16, 16, true); // subchunk1Size, 16 for PCM + view.setUint32(20, 1, true); // audioFormat, 1 for PCM + view.setUint16(22, 1, true); // numChannels: 1 channel + view.setUint32(24, sampleRate, true); // sampleRate + view.setUint32(28, sampleRate * 2, true); // byteRate + view.setUint16(32, 2, true); // blockAlign + view.setUint16(34, 16, true); // bitsPerSample + view.setUint32(36, 0x61746164, true); // Subchunk2ID + view.setUint32(40, samples.length * 2, true); // subchunk2Size + + fs.writeSync(fp.fd, new Uint8Array(header).buffer, { length: header.byteLength }); + fs.writeSync(fp.fd, samples.buffer, { length: samples.buffer.byteLength }); + + fs.closeSync(fp.fd); +} + +function toInt16Samples(samples: Float32Array): Int16Array { + const int16Samples = new Int16Array(samples.length); + for (let i = 0; i < samples.length; ++i) { + let s = samples[i] * 32767; + s = s > 32767 ? 32767 : s; + s = s < -32768 ? -32768 : s; + int16Samples[i] = s; + } + + return int16Samples; +} + + +@Entry +@Component +struct Index { + @State currentIndex: number = 0; + @State title: string = 'Next-gen Kaldi: Text-to-speech'; + @State info: string = ''; + @State btnStartCaption: string = 'Start'; + @State btnStartEnabled: boolean = false; + @State btnStopCaption: string = 'Stop'; + @State btnStopEnabled: boolean = false; + @State btnSaveCaption: string = 'Save'; + @State btnSaveEnabled: boolean = false; + @State progress: number = 0; + @State sid: string = '0'; + @State speechSpeed: string = '1.0'; + @State isGenerating: boolean = false; + @State initTtsDone: boolean = false; + @State ttsGeneratedDone: boolean = true; + @State numSpeakers: number = 1; + @State initAudioDone: boolean = false; + private controller: TabsController = new TabsController(); + private cancelled: boolean = false; + private sampleRate: number = 0; + private startTime: number = 0; + private stopTime: number = 0; + private inputText: string = ''; + // it specifies only the initial capacity. + private workerInstance?: worker.ThreadWorker + private readonly scriptURL: string = 'entry/ets/workers/NonStreamingTtsWorker.ets' + // note that circular buffer can automatically resize. + private sampleBuffer: CircularBuffer = new CircularBuffer(16000 * 5); + private finalSamples: Float32Array | null = null; + private audioRenderer: audio.AudioRenderer | null = null; + + initAudioRenderer() { + if (this.audioRenderer) { + console.log(`Audio renderer has already been created. Skip creating`); + return; + } // see // https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/using-audiorenderer-for-playback-V5 + console.log('Initializing audio renderer'); + const audioStreamInfo: audio.AudioStreamInfo = { + samplingRate: this.sampleRate, + channels: audio.AudioChannel.CHANNEL_1, // 通道 + sampleFormat: audio.AudioSampleFormat.SAMPLE_FORMAT_S16LE, + encodingType: audio.AudioEncodingType.ENCODING_TYPE_RAW + }; + + const audioRendererInfo: audio.AudioRendererInfo = { + usage: audio.StreamUsage.STREAM_USAGE_MUSIC, rendererFlags: 0 + }; + + const audioRendererOptions: audio.AudioRendererOptions = { + streamInfo: audioStreamInfo, rendererInfo: audioRendererInfo + }; + + audio.createAudioRenderer(audioRendererOptions, (err, renderer) => { + if (!err) { + console.log('audio renderer initialized successfully'); + this.initAudioDone = true; + if (renderer) { + this.audioRenderer = renderer; + this.audioRenderer.on("writeData", this.audioPlayCallback); + if (this.sampleBuffer.size()) { + this.audioRenderer.start(); + } + } else { + console.log(`returned audio renderer is ${renderer}`); + } + } else { + console.log(`Failed to initialize audio renderer. error message: ${err.message}, error code: ${err.code}`); + } + }); + } + + async aboutToAppear() { + this.initAudioRenderer(); + + this.workerInstance = new worker.ThreadWorker(this.scriptURL, { + name: 'NonStreaming TTS worker' + }); + this.workerInstance.onmessage = (e: MessageEvents) => { + const msgType = e.data['msgType'] as string; + console.log(`received msg from worker: ${msgType}`); + + if (msgType == 'init-tts-done') { + this.info = 'Model initialized!\nPlease enter text and press start.'; + this.sampleRate = e.data['sampleRate'] as number; + this.numSpeakers = e.data['numSpeakers'] as number; + + this.initTtsDone = true; + } + + if (msgType == 'tts-generate-partial') { + if (this.cancelled) { + return; + } + + const samples: Float32Array = e.data['samples'] as Float32Array; + const progress: number = e.data['progress'] as number; + this.progress = progress; + + this.sampleBuffer.push(samples); + + if (!this.initAudioDone) { + this.initAudioRenderer(); + } + + if (this.audioRenderer && this.audioRenderer?.state != audio.AudioState.STATE_RUNNING) { + this.audioRenderer.start(); + } + } + + if (msgType == 'tts-generate-done') { + this.isGenerating = false; + const samples: Float32Array = e.data['samples'] as Float32Array; + + systemTime.getRealTime((err, data) => { + + if (err) { + console.log(`Failed to get stop time`) + } else { + this.stopTime = data; + + const audioDuration = samples.length / this.sampleRate; + const elapsedSeconds = (this.stopTime - this.startTime) / 1000; + const RTF = elapsedSeconds / audioDuration; + + this.info = `Audio duration: ${audioDuration} s +Elapsed: ${elapsedSeconds} s +RTF = ${elapsedSeconds.toFixed(2)}/${audioDuration.toFixed(2)} = ${RTF.toFixed(3)} +`; + if (this.cancelled) { + this.info += '\nCancelled.'; + } + } + }); + + this.finalSamples = samples; + this.ttsGeneratedDone = true; + this.btnSaveEnabled = true; + + this.ttsGeneratedDone = true; + + if (this.audioRenderer && this.audioRenderer?.state != audio.AudioState.STATE_RUNNING && + this.sampleBuffer.size() == 0) { + this.sampleBuffer.push(samples); + this.progress = 1; + this.audioRenderer.start(); + } + + if (!this.initAudioDone) { + this.btnStartEnabled = true; + this.btnStopEnabled = false; + this.info += '\nAudio renderer is not initialized. Disable playing audio.'; + } + } + } + + this.info = 'Initializing TTS model ...'; + this.workerInstance.postMessage({ msgType: 'init-tts', context: getContext() }); + } + + @Builder + TabBuilder(title: string, targetIndex: number, selectedImg: Resource, normalImg: Resource) { + Column() { + Image(this.currentIndex == targetIndex ? selectedImg : normalImg).size({ width: 25, height: 25 }) + Text(title).fontColor(this.currentIndex == targetIndex ? '#28bff1' : '#8a8a8a') + }.width('100%').height(50).justifyContent(FlexAlign.Center).onClick(() => { + this.currentIndex = targetIndex; + this.controller.changeIndex(this.currentIndex); + }) + } + + build() { + Column() { + Tabs({ barPosition: BarPosition.End, controller: this.controller }) { + TabContent() { + Column({ space: 10 }) { + Text(this.title).fontSize(20).fontWeight(FontWeight.Bold); + if (this.numSpeakers > 1) { + Row({ space: 10 }) { + Text(`Speaker ID (0-${this.numSpeakers - 1})`).width('60%') + + TextInput({ text: this.sid }).onChange((text) => { + this.sid = text.trim(); + }).width('20%') + }.justifyContent(FlexAlign.Center) + } + + Row() { + Text('Speech speed').width('60%'); + + TextInput({ text: this.speechSpeed }).onChange((text) => { + this.speechSpeed = text.trim(); + }).width('20%') + } + + Row({ space: 10 }) { + Button(this.btnStartCaption).enabled(this.btnStartEnabled).onClick(async () => { + let sid = parseInt(this.sid); + if (sid.toString() != this.sid) { + this.info = 'Please input a valid speaker ID'; + return; + } + + let speed = parseFloat(this.speechSpeed); + if (isNaN(speed)) { + this.info = 'Please enter a valid speech speed'; + return; + } + + if (speed <= 0) { + this.info = 'Please enter a positive speech speed'; + return; + } + + if (this.workerInstance && this.initTtsDone) { + this.info = 'Generating...'; + this.cancelled = false; + this.finalSamples = null; + this.sampleBuffer.reset(); + this.ttsGeneratedDone = false; + this.progress = 0; + + this.btnStartEnabled = false; + this.btnStopEnabled = true; + this.btnSaveEnabled = false; + console.log(`sending ${this.inputText}`) + this.ttsGeneratedDone = false; + this.startTime = await systemTime.getRealTime(); + this.workerInstance?.postMessage({ + msgType: 'tts-generate', + text: this.inputText, + sid: sid, + speed: speed, + }); + this.isGenerating = true; + this.info = ''; + } else { + this.info = 'Failed to initialize tts model'; + this.btnStartEnabled = false; + } + }); + + Button(this.btnStopCaption).enabled(this.btnStopEnabled).onClick(() => { + this.ttsGeneratedDone = true; + this.btnStartEnabled = true; + this.btnStopEnabled = false; + this.sampleBuffer.reset(); + this.cancelled = true; + this.isGenerating = false; + + if (this.workerInstance && this.initTtsDone) { + this.workerInstance.postMessage({ msgType: 'tts-generate-cancel' }); + } + this.audioRenderer?.stop(); + }) + + Button(this.btnSaveCaption).enabled(this.btnSaveEnabled).onClick(() => { + if (!this.finalSamples || this.finalSamples.length == 0) { + + this.btnSaveEnabled = false; + return; + } + + let uri: string = ''; + + const audioOptions = new picker.AudioSaveOptions(); // audioOptions.newFileNames = ['o.wav']; + + const audioViewPicker = new picker.AudioViewPicker(); + + audioViewPicker.save(audioOptions).then((audioSelectResult: Array) => { + uri = audioSelectResult[0]; + if (this.finalSamples) { + savePcmToWav(uri, toInt16Samples(this.finalSamples), this.sampleRate); + console.log(`Saved to ${uri}`); + this.info += `\nSaved to ${uri}`; + } + }); + }); + } + + if (this.info != '') { + TextArea({ text: this.info }).focusable(false); + } + if (this.progress > 0) { + Row() { + Progress({ value: 0, total: 100, type: ProgressType.Capsule }) + .width('80%') + .height(20) + .value(this.progress * 100); + + Text(`${(this.progress * 100).toFixed(2)}%`).width('15%') + }.width('100%').justifyContent(FlexAlign.Center) + } + + TextArea({ placeholder: 'Input text for TTS and click the start button' }) + .width('100%') + .height('100%') + .focusable(this.isGenerating == false && this.initTtsDone) + .onChange((text) => { + this.inputText = text; + if (text.trim() == '') { + this.btnStartEnabled = false; + return; + } + this.btnStartEnabled = true; + }) + }.width('100%') + + // see https://composeicons.com/ + }.tabBar(this.TabBuilder('TTS', 0, $r('app.media.home'), $r('app.media.home'))) + + TabContent() { + Column({space: 10}) { + Text(this.title).fontSize(20).fontWeight(FontWeight.Bold); + TextArea({text: ` +Everyting is open-sourced. + +It runs locally, without accessing the network + +See also https://github.com/k2-fsa/sherpa-onnx + +新一代 Kaldi QQ 和微信交流群: 请看 + +https://k2-fsa.github.io/sherpa/social-groups.html + +微信公众号: 新一代 Kaldi + `}).width('100%') + .height('100%') + .focusable(false) + }.justifyContent(FlexAlign.Start) + }.tabBar(this.TabBuilder('Help', 1, $r('app.media.info'), $r('app.media.info'))) + }.scrollable(false) + } + } + + private audioPlayCallback = (buffer: ArrayBuffer) => { + const numSamples = buffer.byteLength / 2; + if (this.sampleBuffer.size() >= numSamples) { + const samples: Float32Array = this.sampleBuffer.get(this.sampleBuffer.head(), numSamples); + + const int16Samples = new Int16Array(buffer); + for (let i = 0; i < numSamples; ++i) { + let s = samples[i] * 32767; + s = s > 32767 ? 32767 : s; + s = s < -32768 ? -32768 : s; + int16Samples[i] = s; + } + this.sampleBuffer.pop(numSamples); + } else { + (new Int16Array(buffer)).fill(0); + if (this.ttsGeneratedDone) { + this.audioRenderer?.stop(); + this.btnStartEnabled = true; + this.btnStopEnabled = false; + } + } + }; +} \ No newline at end of file diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/ets/workers/NonStreamingTtsWorker.ets b/harmony-os/SherpaOnnxTts/entry/src/main/ets/workers/NonStreamingTtsWorker.ets new file mode 100644 index 000000000..bd5c7a5b8 --- /dev/null +++ b/harmony-os/SherpaOnnxTts/entry/src/main/ets/workers/NonStreamingTtsWorker.ets @@ -0,0 +1,284 @@ +import worker, { ThreadWorkerGlobalScope, MessageEvents, ErrorEvent } from '@ohos.worker'; + +import { fileIo as fs } from '@kit.CoreFileKit'; + +import {OfflineTtsConfig, OfflineTts, listRawfileDir, TtsInput, TtsOutput} from 'sherpa_onnx'; +import { buffer } from '@kit.ArkTS'; + +const workerPort: ThreadWorkerGlobalScope = worker.workerPort; + +let tts: OfflineTts; +let cancelled = false; + +function mkdir(context: Context, parts: string[]) { + const path = parts.join('/'); + if (fs.accessSync(path)) { + return; + } + + const sandboxPath: string = context.getApplicationContext().filesDir; + let d = sandboxPath + for (const p of parts) { + d = d + '/' + p; + + if (fs.accessSync(d)) { + continue; + } + + fs.mkdirSync(d); + } +} + +function copyRawFileDirToSandbox(context: Context, srcDir: string) { + let mgr = context.resourceManager; + const allFiles: string[] = listRawfileDir(mgr, srcDir); + for (const src of allFiles) { + const parts: string[] = src.split('/'); + if (parts.length != 1) { + mkdir(context, parts.slice(0, -1)); + } + + copyRawFileToSandbox(context, src, src); + } +} + +function copyRawFileToSandbox(context: Context, src: string, dst: string) { + // see https://blog.csdn.net/weixin_44640245/article/details/142634846 + // https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/rawfile-guidelines-V5 + let uint8Array: Uint8Array = context.resourceManager.getRawFileContentSync(src); + + // https://developer.huawei.com/consumer/cn/doc/harmonyos-references-V5/js-apis-file-fs-V5#fsmkdir + let sandboxPath: string = context.getApplicationContext().filesDir; + let filepath = sandboxPath + '/' + dst; + + if (fs.accessSync(filepath)) { + // if the destination exists and has the expected file size, + // then we skip copying it + let stat = fs.statSync(filepath); + if (stat.size == uint8Array.length) { + return; + } + } + + const fp = fs.openSync(filepath, fs.OpenMode.WRITE_ONLY | fs.OpenMode.CREATE | fs.OpenMode.TRUNC); + fs.writeSync(fp.fd, buffer.from(uint8Array).buffer) + fs.close(fp.fd); +} + +function initTts(context: Context): OfflineTts { + // Such a design is to make it easier to build flutter APPs with + // github actions for a variety of tts models + // + // See https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/flutter/generate-tts.py + // for details + + let modelDir = ''; + let modelName = ''; + let ruleFsts = ''; + let ruleFars = ''; + let lexicon = ''; + let dataDir = ''; + let dictDir = ''; + // You can select an example below and change it according to match your + // selected tts model + + // ============================================================ + // Your change starts here + // ============================================================ + + // Example 1: + // modelDir = 'vits-vctk'; + // modelName = 'vits-vctk.onnx'; + // lexicon = 'lexicon.txt'; + + // Example 2: + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models + // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2 + // modelDir = 'vits-piper-en_US-amy-low'; + // modelName = 'en_US-amy-low.onnx'; + // dataDir = 'espeak-ng-data'; + + // Example 3: + // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2 + // modelDir = 'vits-icefall-zh-aishell3'; + // modelName = 'model.onnx'; + // ruleFsts = 'phone.fst,date.fst,number.fst,new_heteronym.fst'; + // ruleFars = 'rule.far'; + // lexicon = 'lexicon.txt'; + + // Example 4: + // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#csukuangfj-vits-zh-hf-fanchen-c-chinese-187-speakers + // modelDir = 'vits-zh-hf-fanchen-C'; + // modelName = 'vits-zh-hf-fanchen-C.onnx'; + // lexicon = 'lexicon.txt'; + // dictDir = 'dict'; + + // Example 5: + // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2 + // modelDir = 'vits-coqui-de-css10'; + // modelName = 'model.onnx'; + + // Example 6 + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models + // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-libritts_r-medium.tar.bz2 + // modelDir = 'vits-piper-en_US-libritts_r-medium'; + // modelName = 'en_US-libritts_r-medium.onnx'; + // dataDir = 'espeak-ng-data'; + + // Example 7 + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models + // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-melo-tts-zh_en.tar.bz2 + // modelDir = 'vits-melo-tts-zh_en'; + // modelName = 'model.onnx'; + // lexicon = 'lexicon.txt'; + // dictDir = 'dict'; + // ruleFsts = `date.fst,phone.fst,number.fst`; + + // ============================================================ + // Please don't change the remaining part of this function + // ============================================================ + + if (modelName == '') { + throw new Error('You are supposed to select a model by changing the code before you run the app'); + } + + modelName = modelDir + '/' + modelName; + + if (ruleFsts != '') { + let fsts = ruleFsts.split(',') + let tmp: string[] = []; + for (const f of fsts) { + tmp.push(modelDir + '/' + f); + } + ruleFsts = tmp.join(','); + } + + if (ruleFars != '') { + let fars = ruleFars.split(',') + let tmp: string[] = []; + for (const f of fars) { + tmp.push(modelDir + '/' + f); + } + ruleFars = tmp.join(','); + } + + if (lexicon != '') { + lexicon = modelDir + '/' + lexicon; + } + + if (dataDir != '') { + copyRawFileDirToSandbox(context, modelDir + '/' + dataDir) + let sandboxPath: string = context.getApplicationContext().filesDir; + dataDir = sandboxPath + '/' + modelDir + '/' + dataDir; + } + + if (dictDir != '') { + copyRawFileDirToSandbox(context, modelDir + '/' + dictDir) + let sandboxPath: string = context.getApplicationContext().filesDir; + dictDir = sandboxPath + '/' + modelDir + '/' + dictDir; + } + + const tokens = modelDir + '/tokens.txt'; + + const config: OfflineTtsConfig = new OfflineTtsConfig(); + config.model.vits.model = modelName; + config.model.vits.lexicon = lexicon; + config.model.vits.tokens = tokens; + config.model.vits.dataDir = dataDir; + config.model.vits.dictDir = dictDir; + config.model.numThreads = 2; + config.model.debug = true; + config.ruleFsts = ruleFsts; + config.ruleFars = ruleFars; + + return new OfflineTts(config, context.resourceManager); +} + +interface TtsCallbackData { + samples: Float32Array; + progress: number; +} + +function callback(data: TtsCallbackData): number { + workerPort.postMessage({ + 'msgType': 'tts-generate-partial', + samples: Float32Array.from(data.samples), + progress: data.progress, + }); + + // 0 means to stop generating in C++ + // 1 means to continue generating in C++ + return cancelled? 0 : 1; +} + +/** + * Defines the event handler to be called when the worker thread receives a message sent by the host thread. + * The event handler is executed in the worker thread. + * + * @param e message data + */ +workerPort.onmessage = (e: MessageEvents) => { + const msgType = e.data['msgType'] as string; + console.log(`msg-type: ${msgType}`); + if (msgType == 'init-tts' && !tts) { + const context = e.data['context'] as Context; + tts = initTts(context); + workerPort.postMessage({ 'msgType': 'init-tts-done', + sampleRate: tts.sampleRate, + numSpeakers: tts.numSpeakers, + }); + } + + if (msgType == 'tts-generate-cancel') { + cancelled = true; + } + + if (msgType == 'tts-generate') { + const text = e.data['text'] as string; + console.log(`recevied text ${text}`); + const input: TtsInput = new TtsInput(); + input.text = text; + input.sid = e.data['sid'] as number; + input.speed = e.data['speed'] as number; + input.callback = callback; + + cancelled = false; + if (true) { + tts.generateAsync(input).then((ttsOutput: TtsOutput) => { + console.log(`sampleRate: ${ttsOutput.sampleRate}`); + + workerPort.postMessage({ + 'msgType': 'tts-generate-done', + samples: Float32Array.from(ttsOutput.samples), + }); + + }); + } else { + const ttsOutput: TtsOutput = tts.generate(input); + workerPort.postMessage({ + 'msgType': 'tts-generate-done', + samples: Float32Array.from(ttsOutput.samples), + }); + } + + + } +} + +/** + * Defines the event handler to be called when the worker receives a message that cannot be deserialized. + * The event handler is executed in the worker thread. + * + * @param e message data + */ +workerPort.onmessageerror = (e: MessageEvents) => { +} + +/** + * Defines the event handler to be called when an exception occurs during worker execution. + * The event handler is executed in the worker thread. + * + * @param e error message + */ +workerPort.onerror = (e: ErrorEvent) => { +} diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/module.json5 b/harmony-os/SherpaOnnxTts/entry/src/main/module.json5 new file mode 100644 index 000000000..a1cea8b6a --- /dev/null +++ b/harmony-os/SherpaOnnxTts/entry/src/main/module.json5 @@ -0,0 +1,52 @@ +{ + "module": { + "name": "entry", + "type": "entry", + "description": "$string:module_desc", + "mainElement": "EntryAbility", + "deviceTypes": [ + "phone", + "tablet", + "2in1" + ], + "deliveryWithInstall": true, + "installationFree": false, + "pages": "$profile:main_pages", + "abilities": [ + { + "name": "EntryAbility", + "srcEntry": "./ets/entryability/EntryAbility.ets", + "description": "$string:EntryAbility_desc", + "icon": "$media:layered_image", + "label": "$string:EntryAbility_label", + "startWindowIcon": "$media:startIcon", + "startWindowBackground": "$color:start_window_background", + "exported": true, + "skills": [ + { + "entities": [ + "entity.system.home" + ], + "actions": [ + "action.system.home" + ] + } + ] + } + ], + "extensionAbilities": [ + { + "name": "EntryBackupAbility", + "srcEntry": "./ets/entrybackupability/EntryBackupAbility.ets", + "type": "backup", + "exported": false, + "metadata": [ + { + "name": "ohos.extension.backup", + "resource": "$profile:backup_config" + } + ], + } + ] + } +} \ No newline at end of file diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/element/color.json b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/element/color.json new file mode 100644 index 000000000..3c712962d --- /dev/null +++ b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/element/color.json @@ -0,0 +1,8 @@ +{ + "color": [ + { + "name": "start_window_background", + "value": "#FFFFFF" + } + ] +} \ No newline at end of file diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/element/string.json b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/element/string.json new file mode 100644 index 000000000..29b5d21cd --- /dev/null +++ b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/element/string.json @@ -0,0 +1,16 @@ +{ + "string": [ + { + "name": "module_desc", + "value": "On-device text-to-speech with Next-gen Kaldi" + }, + { + "name": "EntryAbility_desc", + "value": "On-device text-to-speech with Next-gen Kaldi" + }, + { + "name": "EntryAbility_label", + "value": "TTS" + } + ] +} \ No newline at end of file diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/background.png b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/background.png new file mode 100644 index 000000000..f939c9fa8 Binary files /dev/null and b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/background.png differ diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/foreground.png b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/foreground.png new file mode 100644 index 000000000..4483ddad1 Binary files /dev/null and b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/foreground.png differ diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/home.svg b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/home.svg new file mode 100644 index 000000000..504af3400 --- /dev/null +++ b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/home.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/info.svg b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/info.svg new file mode 100644 index 000000000..2210223f4 --- /dev/null +++ b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/info.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/layered_image.json b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/layered_image.json new file mode 100644 index 000000000..fb4992044 --- /dev/null +++ b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/layered_image.json @@ -0,0 +1,7 @@ +{ + "layered-image": + { + "background" : "$media:background", + "foreground" : "$media:foreground" + } +} \ No newline at end of file diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/startIcon.png b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/startIcon.png new file mode 100644 index 000000000..205ad8b5a Binary files /dev/null and b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/startIcon.png differ diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/profile/backup_config.json b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/profile/backup_config.json new file mode 100644 index 000000000..78f40ae7c --- /dev/null +++ b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/profile/backup_config.json @@ -0,0 +1,3 @@ +{ + "allowToBackupRestore": true +} \ No newline at end of file diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/profile/main_pages.json b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/profile/main_pages.json new file mode 100644 index 000000000..1898d94f5 --- /dev/null +++ b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/profile/main_pages.json @@ -0,0 +1,5 @@ +{ + "src": [ + "pages/Index" + ] +} diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/resources/en_US/element/string.json b/harmony-os/SherpaOnnxTts/entry/src/main/resources/en_US/element/string.json new file mode 100644 index 000000000..29b5d21cd --- /dev/null +++ b/harmony-os/SherpaOnnxTts/entry/src/main/resources/en_US/element/string.json @@ -0,0 +1,16 @@ +{ + "string": [ + { + "name": "module_desc", + "value": "On-device text-to-speech with Next-gen Kaldi" + }, + { + "name": "EntryAbility_desc", + "value": "On-device text-to-speech with Next-gen Kaldi" + }, + { + "name": "EntryAbility_label", + "value": "TTS" + } + ] +} \ No newline at end of file diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/resources/rawfile/.gitkeep b/harmony-os/SherpaOnnxTts/entry/src/main/resources/rawfile/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/resources/zh_CN/element/string.json b/harmony-os/SherpaOnnxTts/entry/src/main/resources/zh_CN/element/string.json new file mode 100644 index 000000000..c545b1b46 --- /dev/null +++ b/harmony-os/SherpaOnnxTts/entry/src/main/resources/zh_CN/element/string.json @@ -0,0 +1,16 @@ +{ + "string": [ + { + "name": "module_desc", + "value": "使用新一代Kaldi进行本地离线语音合成" + }, + { + "name": "EntryAbility_desc", + "value": "使用新一代Kaldi进行本地离线语音合成" + }, + { + "name": "EntryAbility_label", + "value": "本地语音合成" + } + ] +} \ No newline at end of file diff --git a/harmony-os/SherpaOnnxTts/entry/src/ohosTest/ets/test/Ability.test.ets b/harmony-os/SherpaOnnxTts/entry/src/ohosTest/ets/test/Ability.test.ets new file mode 100644 index 000000000..8aa374977 --- /dev/null +++ b/harmony-os/SherpaOnnxTts/entry/src/ohosTest/ets/test/Ability.test.ets @@ -0,0 +1,35 @@ +import hilog from '@ohos.hilog'; +import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium'; + +export default function abilityTest() { + describe('ActsAbilityTest', () => { + // Defines a test suite. Two parameters are supported: test suite name and test suite function. + beforeAll(() => { + // Presets an action, which is performed only once before all test cases of the test suite start. + // This API supports only one parameter: preset action function. + }) + beforeEach(() => { + // Presets an action, which is performed before each unit test case starts. + // The number of execution times is the same as the number of test cases defined by **it**. + // This API supports only one parameter: preset action function. + }) + afterEach(() => { + // Presets a clear action, which is performed after each unit test case ends. + // The number of execution times is the same as the number of test cases defined by **it**. + // This API supports only one parameter: clear action function. + }) + afterAll(() => { + // Presets a clear action, which is performed after all test cases of the test suite end. + // This API supports only one parameter: clear action function. + }) + it('assertContain', 0, () => { + // Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function. + hilog.info(0x0000, 'testTag', '%{public}s', 'it begin'); + let a = 'abc'; + let b = 'b'; + // Defines a variety of assertion methods, which are used to declare expected boolean conditions. + expect(a).assertContain(b); + expect(a).assertEqual(a); + }) + }) +} \ No newline at end of file diff --git a/harmony-os/SherpaOnnxTts/entry/src/ohosTest/ets/test/List.test.ets b/harmony-os/SherpaOnnxTts/entry/src/ohosTest/ets/test/List.test.ets new file mode 100644 index 000000000..794c7dc4e --- /dev/null +++ b/harmony-os/SherpaOnnxTts/entry/src/ohosTest/ets/test/List.test.ets @@ -0,0 +1,5 @@ +import abilityTest from './Ability.test'; + +export default function testsuite() { + abilityTest(); +} \ No newline at end of file diff --git a/harmony-os/SherpaOnnxTts/entry/src/ohosTest/module.json5 b/harmony-os/SherpaOnnxTts/entry/src/ohosTest/module.json5 new file mode 100644 index 000000000..55725a929 --- /dev/null +++ b/harmony-os/SherpaOnnxTts/entry/src/ohosTest/module.json5 @@ -0,0 +1,13 @@ +{ + "module": { + "name": "entry_test", + "type": "feature", + "deviceTypes": [ + "phone", + "tablet", + "2in1" + ], + "deliveryWithInstall": true, + "installationFree": false + } +} diff --git a/harmony-os/SherpaOnnxTts/entry/src/test/List.test.ets b/harmony-os/SherpaOnnxTts/entry/src/test/List.test.ets new file mode 100644 index 000000000..bb5b5c373 --- /dev/null +++ b/harmony-os/SherpaOnnxTts/entry/src/test/List.test.ets @@ -0,0 +1,5 @@ +import localUnitTest from './LocalUnit.test'; + +export default function testsuite() { + localUnitTest(); +} \ No newline at end of file diff --git a/harmony-os/SherpaOnnxTts/entry/src/test/LocalUnit.test.ets b/harmony-os/SherpaOnnxTts/entry/src/test/LocalUnit.test.ets new file mode 100644 index 000000000..165fc1615 --- /dev/null +++ b/harmony-os/SherpaOnnxTts/entry/src/test/LocalUnit.test.ets @@ -0,0 +1,33 @@ +import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium'; + +export default function localUnitTest() { + describe('localUnitTest', () => { + // Defines a test suite. Two parameters are supported: test suite name and test suite function. + beforeAll(() => { + // Presets an action, which is performed only once before all test cases of the test suite start. + // This API supports only one parameter: preset action function. + }); + beforeEach(() => { + // Presets an action, which is performed before each unit test case starts. + // The number of execution times is the same as the number of test cases defined by **it**. + // This API supports only one parameter: preset action function. + }); + afterEach(() => { + // Presets a clear action, which is performed after each unit test case ends. + // The number of execution times is the same as the number of test cases defined by **it**. + // This API supports only one parameter: clear action function. + }); + afterAll(() => { + // Presets a clear action, which is performed after all test cases of the test suite end. + // This API supports only one parameter: clear action function. + }); + it('assertContain', 0, () => { + // Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function. + let a = 'abc'; + let b = 'b'; + // Defines a variety of assertion methods, which are used to declare expected boolean conditions. + expect(a).assertContain(b); + expect(a).assertEqual(a); + }); + }); +} \ No newline at end of file diff --git a/harmony-os/SherpaOnnxTts/hvigor/hvigor-config.json5 b/harmony-os/SherpaOnnxTts/hvigor/hvigor-config.json5 new file mode 100644 index 000000000..06b278367 --- /dev/null +++ b/harmony-os/SherpaOnnxTts/hvigor/hvigor-config.json5 @@ -0,0 +1,22 @@ +{ + "modelVersion": "5.0.0", + "dependencies": { + }, + "execution": { + // "analyze": "normal", /* Define the build analyze mode. Value: [ "normal" | "advanced" | false ]. Default: "normal" */ + // "daemon": true, /* Enable daemon compilation. Value: [ true | false ]. Default: true */ + // "incremental": true, /* Enable incremental compilation. Value: [ true | false ]. Default: true */ + // "parallel": true, /* Enable parallel compilation. Value: [ true | false ]. Default: true */ + // "typeCheck": false, /* Enable typeCheck. Value: [ true | false ]. Default: false */ + }, + "logging": { + // "level": "info" /* Define the log level. Value: [ "debug" | "info" | "warn" | "error" ]. Default: "info" */ + }, + "debugging": { + // "stacktrace": false /* Disable stacktrace compilation. Value: [ true | false ]. Default: false */ + }, + "nodeOptions": { + // "maxOldSpaceSize": 8192 /* Enable nodeOptions maxOldSpaceSize compilation. Unit M. Used for the daemon process. Default: 8192*/ + // "exposeGC": true /* Enable to trigger garbage collection explicitly. Default: true*/ + } +} diff --git a/harmony-os/SherpaOnnxTts/hvigorfile.ts b/harmony-os/SherpaOnnxTts/hvigorfile.ts new file mode 100644 index 000000000..f3cb9f1a8 --- /dev/null +++ b/harmony-os/SherpaOnnxTts/hvigorfile.ts @@ -0,0 +1,6 @@ +import { appTasks } from '@ohos/hvigor-ohos-plugin'; + +export default { + system: appTasks, /* Built-in plugin of Hvigor. It cannot be modified. */ + plugins:[] /* Custom plugin to extend the functionality of Hvigor. */ +} diff --git a/harmony-os/SherpaOnnxTts/oh-package-lock.json5 b/harmony-os/SherpaOnnxTts/oh-package-lock.json5 new file mode 100644 index 000000000..f538ae290 --- /dev/null +++ b/harmony-os/SherpaOnnxTts/oh-package-lock.json5 @@ -0,0 +1,19 @@ +{ + "meta": { + "stableOrder": true + }, + "lockfileVersion": 3, + "ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.", + "specifiers": { + "@ohos/hypium@1.0.19": "@ohos/hypium@1.0.19" + }, + "packages": { + "@ohos/hypium@1.0.19": { + "name": "@ohos/hypium", + "version": "1.0.19", + "integrity": "sha512-cEjDgLFCm3cWZDeRXk7agBUkPqjWxUo6AQeiu0gEkb3J8ESqlduQLSIXeo3cCsm8U/asL7iKjF85ZyOuufAGSQ==", + "resolved": "https://ohpm.openharmony.cn/ohpm/@ohos/hypium/-/hypium-1.0.19.har", + "registryType": "ohpm" + } + } +} \ No newline at end of file diff --git a/harmony-os/SherpaOnnxTts/oh-package.json5 b/harmony-os/SherpaOnnxTts/oh-package.json5 new file mode 100644 index 000000000..a79d5300e --- /dev/null +++ b/harmony-os/SherpaOnnxTts/oh-package.json5 @@ -0,0 +1,9 @@ +{ + "modelVersion": "5.0.0", + "description": "Please describe the basic information.", + "dependencies": { + }, + "devDependencies": { + "@ohos/hypium": "1.0.19" + } +} diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/pages/Index.ets b/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/pages/Index.ets index b0695f3a0..2675c7b77 100644 --- a/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/pages/Index.ets +++ b/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/pages/Index.ets @@ -11,6 +11,7 @@ import { audio } from '@kit.AudioKit'; @Entry @Component struct Index { + @State title: string = 'Next-gen Kaldi: VAD + ASR'; @State currentIndex: number = 0; @State resultForFile: string = ''; @State progressForFile: number = 0; @@ -73,13 +74,11 @@ struct Index { }; const audioCapturerInfo: audio.AudioCapturerInfo = { - source: audio.SourceType.SOURCE_TYPE_MIC, - capturerFlags: 0 + source: audio.SourceType.SOURCE_TYPE_MIC, capturerFlags: 0 }; const audioCapturerOptions: audio.AudioCapturerOptions = { - streamInfo: audioStreamInfo, - capturerInfo: audioCapturerInfo + streamInfo: audioStreamInfo, capturerInfo: audioCapturerInfo }; audio.createAudioCapturer(audioCapturerOptions, (err, data) => { @@ -162,15 +161,9 @@ struct Index { @Builder TabBuilder(title: string, targetIndex: number, selectedImg: Resource, normalImg: Resource) { Column() { - Image(this.currentIndex == targetIndex ? selectedImg : normalImg) - .size({ width: 25, height: 25 }) - Text(title) - .fontColor(this.currentIndex == targetIndex ? '#28bff1' : '#8a8a8a') - } - .width('100%') - .height(50) - .justifyContent(FlexAlign.Center) - .onClick(() => { + Image(this.currentIndex == targetIndex ? selectedImg : normalImg).size({ width: 25, height: 25 }) + Text(title).fontColor(this.currentIndex == targetIndex ? '#28bff1' : '#8a8a8a') + }.width('100%').height(50).justifyContent(FlexAlign.Center).onClick(() => { this.currentIndex = targetIndex; this.controller.changeIndex(this.currentIndex); }) @@ -181,11 +174,7 @@ struct Index { Tabs({ barPosition: BarPosition.End, controller: this.controller }) { TabContent() { Column({ space: 10 }) { - Text('Next-gen Kaldi: VAD + ASR') - .fontColor('#182431') - .fontSize(25) - .lineHeight(41) - .fontWeight(500) + Text(this.title).fontSize(20).fontWeight(FontWeight.Bold); Button('Select .wav file (16kHz) ') .enabled(this.selectFileBtnEnabled) @@ -211,8 +200,7 @@ struct Index { if (this.workerInstance) { this.workerInstance.postMessage({ - msgType: 'non-streaming-asr-vad-decode', - filename: result[0], + msgType: 'non-streaming-asr-vad-decode', filename: result[0], }); } else { console.log(`this worker instance is undefined ${this.workerInstance}`); @@ -236,80 +224,86 @@ struct Index { }.width('100%').justifyContent(FlexAlign.Center) } - TextArea({ text: this.resultForFile }).width('100%').lineSpacing({ value: 10, unit: LengthUnit.VP }); - - } - .alignItems(HorizontalAlign.Center) - .justifyContent(FlexAlign.Start) + TextArea({ text: this.resultForFile }) + .width('100%') + .lineSpacing({ value: 10, unit: LengthUnit.VP }) + .height('100%'); + }.alignItems(HorizontalAlign.Center).justifyContent(FlexAlign.Start) }.tabBar(this.TabBuilder('From file', 0, $r('app.media.icon_doc'), $r('app.media.icon_doc_default'))) TabContent() { - Column() { - Button(this.message) - .enabled(this.micInitDone) - .onClick(() => { - console.log('clicked mic button'); - this.resultForMic = ''; - if (this.mic) { - if (this.micStarted) { - this.mic.stop(); - this.message = "Start recording"; - this.micStarted = false; - console.log('mic stopped'); - - const samples = this.flatten(this.sampleList); - let s = 0; - for (let i = 0; i < samples.length; ++i) { - s += samples[i]; - } - console.log(`samples ${samples.length}, sum: ${s}`); - - if (this.workerInstance) { - console.log('decode mic'); - this.workerInstance.postMessage({ - msgType: 'non-streaming-asr-vad-mic', - samples, - }); - } else { - console.log(`this worker instance is undefined ${this.workerInstance}`); - } + Column({ space: 10 }) { + Text(this.title).fontSize(20).fontWeight(FontWeight.Bold); + Button(this.message).enabled(this.micInitDone).onClick(() => { + console.log('clicked mic button'); + this.resultForMic = ''; + if (this.mic) { + if (this.micStarted) { + this.mic.stop(); + this.message = "Start recording"; + this.micStarted = false; + console.log('mic stopped'); + + const samples = this.flatten(this.sampleList); + let s = 0; + for (let i = 0; i < samples.length; ++i) { + s += samples[i]; + } + console.log(`samples ${samples.length}, sum: ${s}`); + + if (this.workerInstance) { + console.log('decode mic'); + this.workerInstance.postMessage({ + msgType: 'non-streaming-asr-vad-mic', samples, + }); } else { - this.sampleList = []; - this.mic.start(); - this.message = "Stop recording"; - this.micStarted = true; - console.log('mic started'); + console.log(`this worker instance is undefined ${this.workerInstance}`); } + } else { + this.sampleList = []; + this.mic.start(); + this.message = "Stop recording"; + this.micStarted = true; + console.log('mic started'); } - }); + } + }); Text(`Supported languages: ${this.lang}`) - TextArea({ text: this.resultForMic }).width('100%').lineSpacing({ value: 10, unit: LengthUnit.VP }); - } - .alignItems(HorizontalAlign.Center) - .justifyContent(FlexAlign.Start) + TextArea({ text: this.resultForMic }) + .width('100%') + .lineSpacing({ value: 10, unit: LengthUnit.VP }) + .width('100%') + .height('100%'); + }.alignItems(HorizontalAlign.Center).justifyContent(FlexAlign.Start) } .tabBar(this.TabBuilder('From mic', 1, $r('app.media.ic_public_input_voice'), $r('app.media.ic_public_input_voice_default'))) TabContent() { - Column() { - Text("Everything is open-sourced"); - Divider(); - Text("It runs locally, without accessing the network"); - Divider(); - Text("See also https://github.com/k2-fsa/sherpa-onnx"); - Divider(); - Text("and https://k2-fsa.github.io/sherpa/social-groups.html"); + Column({ space: 10 }) { + Text(this.title).fontSize(20).fontWeight(FontWeight.Bold); + TextArea({ + text: ` +Everyting is open-sourced. + +It runs locally, without accessing the network + +See also https://github.com/k2-fsa/sherpa-onnx + +新一代 Kaldi QQ 和微信交流群: 请看 + +https://k2-fsa.github.io/sherpa/social-groups.html + +微信公众号: 新一代 Kaldi + ` + }).width('100%').height('100%').focusable(false) }.justifyContent(FlexAlign.Start) - }.tabBar(this.TabBuilder('Help', 2, $r('app.media.info_circle'), - $r('app.media.info_circle_default'))) + }.tabBar(this.TabBuilder('Help', 2, $r('app.media.info_circle'), $r('app.media.info_circle_default'))) }.scrollable(false) - } - .width('100%') - .justifyContent(FlexAlign.Start) + }.width('100%').justifyContent(FlexAlign.Start) } private micCallback = (buffer: ArrayBuffer) => { diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/element/string.json b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/element/string.json index 09e201b54..652fac4cc 100644 --- a/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/element/string.json +++ b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/element/string.json @@ -2,19 +2,19 @@ "string": [ { "name": "module_desc", - "value": "VAD+ASR with Next-gen Kaldi" + "value": "On-device VAD+ASR with Next-gen Kaldi" }, { "name": "EntryAbility_desc", - "value": "VAD+ASR" + "value": "On-device VAD+ASR with Next-gen Kaldi" }, { "name": "EntryAbility_label", - "value": "VAD_ASR" + "value": "On-device speech recognition" }, { "name": "mic_reason", - "value": "access the microhone for speech recognition" + "value": "access the microhone for on-device speech recognition with Next-gen Kaldi" } ] } \ No newline at end of file diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/en_US/element/string.json b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/en_US/element/string.json index f94595515..652fac4cc 100644 --- a/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/en_US/element/string.json +++ b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/en_US/element/string.json @@ -2,15 +2,19 @@ "string": [ { "name": "module_desc", - "value": "module description" + "value": "On-device VAD+ASR with Next-gen Kaldi" }, { "name": "EntryAbility_desc", - "value": "description" + "value": "On-device VAD+ASR with Next-gen Kaldi" }, { "name": "EntryAbility_label", - "value": "label" + "value": "On-device speech recognition" + }, + { + "name": "mic_reason", + "value": "access the microhone for on-device speech recognition with Next-gen Kaldi" } ] } \ No newline at end of file diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/zh_CN/element/string.json b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/zh_CN/element/string.json index 597ecf95e..00384ae7f 100644 --- a/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/zh_CN/element/string.json +++ b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/zh_CN/element/string.json @@ -2,15 +2,19 @@ "string": [ { "name": "module_desc", - "value": "模块描述" + "value": "基于新一代Kaldi的本地语音识别" }, { "name": "EntryAbility_desc", - "value": "description" + "value": "基于新一代Kaldi的本地语音识别" }, { "name": "EntryAbility_label", - "value": "label" + "value": "本地语音识别" + }, + { + "name": "mic_reason", + "value": "使用新一代Kaldi, 访问麦克风进行本地语音识别 (不需要联网)" } ] } \ No newline at end of file diff --git a/sherpa-onnx/c-api/c-api.cc b/sherpa-onnx/c-api/c-api.cc index 166430da4..e25097809 100644 --- a/sherpa-onnx/c-api/c-api.cc +++ b/sherpa-onnx/c-api/c-api.cc @@ -1169,6 +1169,17 @@ SherpaOnnxOfflineTtsGenerateWithProgressCallback( return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, wrapper); } +const SherpaOnnxGeneratedAudio * +SherpaOnnxOfflineTtsGenerateWithProgressCallbackWithArg( + const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed, + SherpaOnnxGeneratedAudioProgressCallbackWithArg callback, void *arg) { + auto wrapper = [callback, arg](const float *samples, int32_t n, + float progress) { + return callback(samples, n, progress, arg); + }; + return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, wrapper); +} + const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithCallbackWithArg( const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed, SherpaOnnxGeneratedAudioCallbackWithArg callback, void *arg) { diff --git a/sherpa-onnx/c-api/c-api.h b/sherpa-onnx/c-api/c-api.h index e9cd5be0a..fde626e99 100644 --- a/sherpa-onnx/c-api/c-api.h +++ b/sherpa-onnx/c-api/c-api.h @@ -930,6 +930,9 @@ typedef int32_t (*SherpaOnnxGeneratedAudioCallbackWithArg)(const float *samples, typedef int32_t (*SherpaOnnxGeneratedAudioProgressCallback)( const float *samples, int32_t n, float p); +typedef int32_t (*SherpaOnnxGeneratedAudioProgressCallbackWithArg)( + const float *samples, int32_t n, float p, void *arg); + SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTts SherpaOnnxOfflineTts; // Create an instance of offline TTS. The user has to use DestroyOfflineTts() @@ -964,11 +967,19 @@ SherpaOnnxOfflineTtsGenerateWithCallback( const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed, SherpaOnnxGeneratedAudioCallback callback); +SHERPA_ONNX_API const SherpaOnnxGeneratedAudio * SherpaOnnxOfflineTtsGenerateWithProgressCallback( const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed, + SherpaOnnxGeneratedAudioProgressCallback callback); +SHERPA_ONNX_API +const SherpaOnnxGeneratedAudio * +SherpaOnnxOfflineTtsGenerateWithProgressCallbackWithArg( + const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed, + SherpaOnnxGeneratedAudioProgressCallbackWithArg callback, void *arg); + // Same as SherpaOnnxGeneratedAudioCallback but you can pass an additional // `void* arg` to the callback. SHERPA_ONNX_API const SherpaOnnxGeneratedAudio * diff --git a/sherpa-onnx/csrc/circular-buffer.cc b/sherpa-onnx/csrc/circular-buffer.cc index 2fd19cdfa..2ba81807b 100644 --- a/sherpa-onnx/csrc/circular-buffer.cc +++ b/sherpa-onnx/csrc/circular-buffer.cc @@ -22,8 +22,14 @@ CircularBuffer::CircularBuffer(int32_t capacity) { void CircularBuffer::Resize(int32_t new_capacity) { int32_t capacity = static_cast(buffer_.size()); if (new_capacity <= capacity) { +#if __OHOS__ + SHERPA_ONNX_LOGE( + "new_capacity (%{public}d) <= original capacity (%{public}d). Skip it.", + new_capacity, capacity); +#else SHERPA_ONNX_LOGE("new_capacity (%d) <= original capacity (%d). Skip it.", new_capacity, capacity); +#endif return; } @@ -90,10 +96,18 @@ void CircularBuffer::Push(const float *p, int32_t n) { int32_t size = Size(); if (n + size > capacity) { int32_t new_capacity = std::max(capacity * 2, n + size); +#if __OHOS__ + SHERPA_ONNX_LOGE( + "Overflow! n: %{public}d, size: %{public}d, n+size: %{public}d, " + "capacity: %{public}d. Increase " + "capacity to: %{public}d. (Original data is copied. No data loss!)", + n, size, n + size, capacity, new_capacity); +#else SHERPA_ONNX_LOGE( "Overflow! n: %d, size: %d, n+size: %d, capacity: %d. Increase " - "capacity to: %d", + "capacity to: %d. (Original data is copied. No data loss!)", n, size, n + size, capacity, new_capacity); +#endif Resize(new_capacity); capacity = new_capacity; diff --git a/sherpa-onnx/csrc/lexicon.cc b/sherpa-onnx/csrc/lexicon.cc index fe5e595b9..505ea37a8 100644 --- a/sherpa-onnx/csrc/lexicon.cc +++ b/sherpa-onnx/csrc/lexicon.cc @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -159,17 +160,26 @@ std::vector Lexicon::ConvertTextToTokenIdsChinese( words = ProcessHeteronyms(words); if (debug_) { - fprintf(stderr, "Input text in string: %s\n", text.c_str()); - fprintf(stderr, "Input text in bytes:"); + std::ostringstream os; + + os << "Input text in string: " << text << "\n"; + os << "Input text in bytes:"; for (uint8_t c : text) { - fprintf(stderr, " %02x", c); + os << " 0x" << std::setfill('0') << std::setw(2) << std::right << std::hex + << c; } - fprintf(stderr, "\n"); - fprintf(stderr, "After splitting to words:"); + os << "\n"; + os << "After splitting to words:"; for (const auto &w : words) { - fprintf(stderr, " %s", w.c_str()); + os << " " << w; } - fprintf(stderr, "\n"); + os << "\n"; + +#if __OHOS__ + SHERPA_ONNX_LOGE("%{public}s", os.str().c_str()); +#else + SHERPA_ONNX_LOGE("%s", os.str().c_str()); +#endif } std::vector ans; @@ -259,17 +269,26 @@ std::vector Lexicon::ConvertTextToTokenIdsNotChinese( std::vector words = SplitUtf8(text); if (debug_) { - fprintf(stderr, "Input text (lowercase) in string: %s\n", text.c_str()); - fprintf(stderr, "Input text in bytes:"); + std::ostringstream os; + + os << "Input text (lowercase) in string: " << text << "\n"; + os << "Input text in bytes:"; for (uint8_t c : text) { - fprintf(stderr, " %02x", c); + os << " 0x" << std::setfill('0') << std::setw(2) << std::right << std::hex + << c; } - fprintf(stderr, "\n"); - fprintf(stderr, "After splitting to words:"); + os << "\n"; + os << "After splitting to words:"; for (const auto &w : words) { - fprintf(stderr, " %s", w.c_str()); + os << " " << w; } - fprintf(stderr, "\n"); + os << "\n"; + +#if __OHOS__ + SHERPA_ONNX_LOGE("%{public}s", os.str().c_str()); +#else + SHERPA_ONNX_LOGE("%s", os.str().c_str()); +#endif } int32_t blank = token2id_.at(" "); diff --git a/sherpa-onnx/csrc/melo-tts-lexicon.cc b/sherpa-onnx/csrc/melo-tts-lexicon.cc index 29857824f..ec729cdb5 100644 --- a/sherpa-onnx/csrc/melo-tts-lexicon.cc +++ b/sherpa-onnx/csrc/melo-tts-lexicon.cc @@ -6,11 +6,21 @@ #include #include // NOLINT +#include #include +#if __ANDROID_API__ >= 9 +#include "android/asset_manager.h" +#include "android/asset_manager_jni.h" +#endif + +#if __OHOS__ +#include "rawfile/raw_file_manager.h" +#endif #include "cppjieba/Jieba.hpp" #include "sherpa-onnx/csrc/file-utils.h" #include "sherpa-onnx/csrc/macros.h" +#include "sherpa-onnx/csrc/onnx-utils.h" #include "sherpa-onnx/csrc/symbol-table.h" #include "sherpa-onnx/csrc/text-utils.h" @@ -62,6 +72,60 @@ class MeloTtsLexicon::Impl { } } + template + Impl(Manager *mgr, const std::string &lexicon, const std::string &tokens, + const std::string &dict_dir, + const OfflineTtsVitsModelMetaData &meta_data, bool debug) + : meta_data_(meta_data), debug_(debug) { + std::string dict = dict_dir + "/jieba.dict.utf8"; + std::string hmm = dict_dir + "/hmm_model.utf8"; + std::string user_dict = dict_dir + "/user.dict.utf8"; + std::string idf = dict_dir + "/idf.utf8"; + std::string stop_word = dict_dir + "/stop_words.utf8"; + + AssertFileExists(dict); + AssertFileExists(hmm); + AssertFileExists(user_dict); + AssertFileExists(idf); + AssertFileExists(stop_word); + + jieba_ = + std::make_unique(dict, hmm, user_dict, idf, stop_word); + + { + auto buf = ReadFile(mgr, tokens); + + std::istrstream is(buf.data(), buf.size()); + InitTokens(is); + } + + { + auto buf = ReadFile(mgr, lexicon); + + std::istrstream is(buf.data(), buf.size()); + InitLexicon(is); + } + } + + template + Impl(Manager *mgr, const std::string &lexicon, const std::string &tokens, + const OfflineTtsVitsModelMetaData &meta_data, bool debug) + : meta_data_(meta_data), debug_(debug) { + { + auto buf = ReadFile(mgr, tokens); + + std::istrstream is(buf.data(), buf.size()); + InitTokens(is); + } + + { + auto buf = ReadFile(mgr, lexicon); + + std::istrstream is(buf.data(), buf.size()); + InitLexicon(is); + } + } + std::vector ConvertTextToTokenIds(const std::string &_text) const { std::string text = ToLowerCase(_text); // see @@ -84,17 +148,24 @@ class MeloTtsLexicon::Impl { jieba_->Cut(text, words, is_hmm); if (debug_) { - SHERPA_ONNX_LOGE("input text: %s", text.c_str()); - SHERPA_ONNX_LOGE("after replacing punctuations: %s", s.c_str()); - std::ostringstream os; std::string sep = ""; for (const auto &w : words) { os << sep << w; sep = "_"; } +#if __OHOS__ + SHERPA_ONNX_LOGE("input text: %{public}s", text.c_str()); + SHERPA_ONNX_LOGE("after replacing punctuations: %{public}s", s.c_str()); + + SHERPA_ONNX_LOGE("after jieba processing: %{public}s", + os.str().c_str()); +#else + SHERPA_ONNX_LOGE("input text: %s", text.c_str()); + SHERPA_ONNX_LOGE("after replacing punctuations: %s", s.c_str()); SHERPA_ONNX_LOGE("after jieba processing: %s", os.str().c_str()); +#endif } } else { words = SplitUtf8(text); @@ -102,7 +173,7 @@ class MeloTtsLexicon::Impl { if (debug_) { fprintf(stderr, "Input text in string (lowercase): %s\n", text.c_str()); fprintf(stderr, "Input text in bytes (lowercase):"); - for (uint8_t c : text) { + for (int8_t c : text) { fprintf(stderr, " %02x", c); } fprintf(stderr, "\n"); @@ -307,9 +378,48 @@ MeloTtsLexicon::MeloTtsLexicon(const std::string &lexicon, bool debug) : impl_(std::make_unique(lexicon, tokens, meta_data, debug)) {} +template +MeloTtsLexicon::MeloTtsLexicon(Manager *mgr, const std::string &lexicon, + const std::string &tokens, + const std::string &dict_dir, + const OfflineTtsVitsModelMetaData &meta_data, + bool debug) + : impl_(std::make_unique(mgr, lexicon, tokens, dict_dir, meta_data, + debug)) {} + +template +MeloTtsLexicon::MeloTtsLexicon(Manager *mgr, const std::string &lexicon, + const std::string &tokens, + const OfflineTtsVitsModelMetaData &meta_data, + bool debug) + : impl_(std::make_unique(mgr, lexicon, tokens, meta_data, debug)) {} + std::vector MeloTtsLexicon::ConvertTextToTokenIds( const std::string &text, const std::string & /*unused_voice = ""*/) const { return impl_->ConvertTextToTokenIds(text); } +#if __ANDROID_API__ >= 9 +template MeloTtsLexicon::MeloTtsLexicon( + AAssetManager *mgr, const std::string &lexicon, const std::string &tokens, + const std::string &dict_dir, const OfflineTtsVitsModelMetaData &meta_data, + bool debug); + +template MeloTtsLexicon::MeloTtsLexicon( + AAssetManager *mgr, const std::string &lexicon, const std::string &tokens, + const OfflineTtsVitsModelMetaData &meta_data, bool debug); +#endif + +#if __OHOS__ +template MeloTtsLexicon::MeloTtsLexicon( + NativeResourceManager *mgr, const std::string &lexicon, + const std::string &tokens, const std::string &dict_dir, + const OfflineTtsVitsModelMetaData &meta_data, bool debug); + +template MeloTtsLexicon::MeloTtsLexicon( + NativeResourceManager *mgr, const std::string &lexicon, + const std::string &tokens, const OfflineTtsVitsModelMetaData &meta_data, + bool debug); +#endif + } // namespace sherpa_onnx diff --git a/sherpa-onnx/csrc/melo-tts-lexicon.h b/sherpa-onnx/csrc/melo-tts-lexicon.h index da0644be2..e91cf33f2 100644 --- a/sherpa-onnx/csrc/melo-tts-lexicon.h +++ b/sherpa-onnx/csrc/melo-tts-lexicon.h @@ -25,6 +25,16 @@ class MeloTtsLexicon : public OfflineTtsFrontend { MeloTtsLexicon(const std::string &lexicon, const std::string &tokens, const OfflineTtsVitsModelMetaData &meta_data, bool debug); + template + MeloTtsLexicon(Manager *mgr, const std::string &lexicon, + const std::string &tokens, const std::string &dict_dir, + const OfflineTtsVitsModelMetaData &meta_data, bool debug); + + template + MeloTtsLexicon(Manager *mgr, const std::string &lexicon, + const std::string &tokens, + const OfflineTtsVitsModelMetaData &meta_data, bool debug); + std::vector ConvertTextToTokenIds( const std::string &text, const std::string &unused_voice = "") const override; diff --git a/sherpa-onnx/csrc/offline-tts-vits-impl.h b/sherpa-onnx/csrc/offline-tts-vits-impl.h index 972303dd4..5ef79f69b 100644 --- a/sherpa-onnx/csrc/offline-tts-vits-impl.h +++ b/sherpa-onnx/csrc/offline-tts-vits-impl.h @@ -40,7 +40,11 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { tn_list_.reserve(files.size()); for (const auto &f : files) { if (config.model.debug) { +#if __OHOS__ + SHERPA_ONNX_LOGE("rule fst: %{public}s", f.c_str()); +#else SHERPA_ONNX_LOGE("rule fst: %s", f.c_str()); +#endif } tn_list_.push_back(std::make_unique(f)); } @@ -57,7 +61,11 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { for (const auto &f : files) { if (config.model.debug) { +#if __OHOS__ SHERPA_ONNX_LOGE("rule far: %s", f.c_str()); +#else + SHERPA_ONNX_LOGE("rule far: %{public}s", f.c_str()); +#endif } std::unique_ptr> reader( fst::FarReader::Open(f)); @@ -88,7 +96,11 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { tn_list_.reserve(files.size()); for (const auto &f : files) { if (config.model.debug) { +#if __OHOS__ + SHERPA_ONNX_LOGE("rule fst: %{public}s", f.c_str()); +#else SHERPA_ONNX_LOGE("rule fst: %s", f.c_str()); +#endif } auto buf = ReadFile(mgr, f); std::istrstream is(buf.data(), buf.size()); @@ -103,7 +115,11 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { for (const auto &f : files) { if (config.model.debug) { +#if __OHOS__ + SHERPA_ONNX_LOGE("rule far: %{public}s", f.c_str()); +#else SHERPA_ONNX_LOGE("rule far: %s", f.c_str()); +#endif } auto buf = ReadFile(mgr, f); @@ -156,14 +172,22 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { std::string text = _text; if (config_.model.debug) { +#if __OHOS__ + SHERPA_ONNX_LOGE("Raw text: %{public}s", text.c_str()); +#else SHERPA_ONNX_LOGE("Raw text: %s", text.c_str()); +#endif } if (!tn_list_.empty()) { for (const auto &tn : tn_list_) { text = tn->Normalize(text); if (config_.model.debug) { +#if __OHOS__ + SHERPA_ONNX_LOGE("After normalizing: %{public}s", text.c_str()); +#else SHERPA_ONNX_LOGE("After normalizing: %s", text.c_str()); +#endif } } } @@ -226,10 +250,17 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { int32_t num_batches = x_size / batch_size; if (config_.model.debug) { +#if __OHOS__ + SHERPA_ONNX_LOGE( + "Text is too long. Split it into %{public}d batches. batch size: " + "%{public}d. Number of sentences: %{public}d", + num_batches, batch_size, x_size); +#else SHERPA_ONNX_LOGE( "Text is too long. Split it into %d batches. batch size: %d. Number " "of sentences: %d", num_batches, batch_size, x_size); +#endif } GeneratedAudio ans; @@ -255,7 +286,7 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { audio.samples.end()); if (callback) { should_continue = callback(audio.samples.data(), audio.samples.size(), - b * 1.0 / num_batches); + (b + 1) * 1.0 / num_batches); // Caution(fangjun): audio is freed when the callback returns, so users // should copy the data if they want to access the data after // the callback returns to avoid segmentation fault. @@ -297,6 +328,16 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { if (meta_data.frontend == "characters") { frontend_ = std::make_unique( mgr, config_.model.vits.tokens, meta_data); + } else if (meta_data.jieba && !config_.model.vits.dict_dir.empty() && + meta_data.is_melo_tts) { + frontend_ = std::make_unique( + mgr, config_.model.vits.lexicon, config_.model.vits.tokens, + config_.model.vits.dict_dir, model_->GetMetaData(), + config_.model.debug); + } else if (meta_data.is_melo_tts && meta_data.language == "English") { + frontend_ = std::make_unique( + mgr, config_.model.vits.lexicon, config_.model.vits.tokens, + model_->GetMetaData(), config_.model.debug); } else if ((meta_data.is_piper || meta_data.is_coqui || meta_data.is_icefall) && !config_.model.vits.data_dir.empty()) { diff --git a/sherpa-onnx/csrc/offline-tts-vits-model.cc b/sherpa-onnx/csrc/offline-tts-vits-model.cc index 38efc6204..eb605a7bd 100644 --- a/sherpa-onnx/csrc/offline-tts-vits-model.cc +++ b/sherpa-onnx/csrc/offline-tts-vits-model.cc @@ -144,7 +144,11 @@ class OfflineTtsVitsModel::Impl { ++i; } +#if __OHOS__ + SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str()); +#else SHERPA_ONNX_LOGE("%s\n", os.str().c_str()); +#endif } Ort::AllocatorWithDefaultOptions allocator; // used in the macro below