diff --git a/sherpa-onnx/c-api/c-api.cc b/sherpa-onnx/c-api/c-api.cc index 03106a88d..46da81ca5 100644 --- a/sherpa-onnx/c-api/c-api.cc +++ b/sherpa-onnx/c-api/c-api.cc @@ -531,6 +531,20 @@ const SherpaOnnxOfflineRecognizerResult *SherpaOnnxGetOfflineStreamResult( c_lang[lang.size()] = '\0'; r->lang = c_lang; + // emotion + const auto &emotion = result.emotion; + char *c_emotion = new char[emotion.size() + 1]; + std::copy(emotion.begin(), emotion.end(), c_emotion); + c_emotion[emotion.size()] = '\0'; + r->emotion = c_emotion; + + // event + const auto &event = result.event; + char *c_event = new char[event.size() + 1]; + std::copy(event.begin(), event.end(), c_event); + c_event[event.size()] = '\0'; + r->event = c_event; + // copy json std::string json = result.AsJsonString(); char *pJson = new char[json.size() + 1]; @@ -588,6 +602,8 @@ void SherpaOnnxDestroyOfflineRecognizerResult( delete[] r->tokens_arr; delete[] r->json; delete[] r->lang; + delete[] r->emotion; + delete[] r->event; delete r; } } diff --git a/sherpa-onnx/c-api/c-api.h b/sherpa-onnx/c-api/c-api.h index 33d1ad37c..9ddabba00 100644 --- a/sherpa-onnx/c-api/c-api.h +++ b/sherpa-onnx/c-api/c-api.h @@ -544,6 +544,12 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerResult { // return recognized language const char *lang; + + // return emotion. + const char *emotion; + + // return event. + const char *event; } SherpaOnnxOfflineRecognizerResult; /// Get the result of the offline stream. diff --git a/sherpa-onnx/csrc/offline-recognizer-sense-voice-impl.h b/sherpa-onnx/csrc/offline-recognizer-sense-voice-impl.h index 6d7397dea..6cebf23c7 100644 --- a/sherpa-onnx/csrc/offline-recognizer-sense-voice-impl.h +++ b/sherpa-onnx/csrc/offline-recognizer-sense-voice-impl.h @@ -52,6 +52,13 @@ static OfflineRecognitionResult ConvertSenseVoiceResult( r.words = std::move(src.words); + // parse lang, emotion and event from tokens. + if (src.tokens.size() >= 3) { + r.lang = sym_table[src.tokens[0]]; + r.emotion = sym_table[src.tokens[1]]; + r.event = sym_table[src.tokens[2]]; + } + return r; } diff --git a/sherpa-onnx/csrc/offline-stream.cc b/sherpa-onnx/csrc/offline-stream.cc index 31f4a5748..a9b42dec8 100644 --- a/sherpa-onnx/csrc/offline-stream.cc +++ b/sherpa-onnx/csrc/offline-stream.cc @@ -304,6 +304,19 @@ const OfflineRecognitionResult &OfflineStream::GetResult() const { std::string OfflineRecognitionResult::AsJsonString() const { std::ostringstream os; os << "{"; + + os << "\"lang\"" + << ": "; + os << std::quoted(lang) << ", "; + + os << "\"emotion\"" + << ": "; + os << std::quoted(emotion) << ", "; + + os << "\"event\"" + << ": "; + os << std::quoted(event) << ", "; + os << "\"text\"" << ": "; os << std::quoted(text) << ", "; diff --git a/sherpa-onnx/csrc/offline-stream.h b/sherpa-onnx/csrc/offline-stream.h index 0bc7b4a9b..95bc80e83 100644 --- a/sherpa-onnx/csrc/offline-stream.h +++ b/sherpa-onnx/csrc/offline-stream.h @@ -28,6 +28,12 @@ struct OfflineRecognitionResult { std::string lang; + // emotion target of the audio. + std::string emotion; + + // event target of the audio. + std::string event; + /// timestamps.size() == tokens.size() /// timestamps[i] records the time in seconds when tokens[i] is decoded. std::vector timestamps; diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineRecognizer.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineRecognizer.java index aa865fe3f..1133ed326 100644 --- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineRecognizer.java +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineRecognizer.java @@ -41,7 +41,10 @@ public OfflineRecognizerResult getResult(OfflineStream s) { String text = (String) arr[0]; String[] tokens = (String[]) arr[1]; float[] timestamps = (float[]) arr[2]; - return new OfflineRecognizerResult(text, tokens, timestamps); + String lang = (String) arr[3]; + String emotion = (String) arr[4]; + String event = (String) arr[5]; + return new OfflineRecognizerResult(text, tokens, timestamps, lang, emotion, event); } private native void delete(long ptr); diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineRecognizerResult.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineRecognizerResult.java index 826305392..b6f6595a5 100644 --- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineRecognizerResult.java +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineRecognizerResult.java @@ -6,11 +6,17 @@ public class OfflineRecognizerResult { private final String text; private final String[] tokens; private final float[] timestamps; + private final String lang; + private final String emotion; + private final String event; - public OfflineRecognizerResult(String text, String[] tokens, float[] timestamps) { + public OfflineRecognizerResult(String text, String[] tokens, float[] timestamps, String lang, String emotion, String event) { this.text = text; this.tokens = tokens; this.timestamps = timestamps; + this.lang = lang; + this.emotion = emotion; + this.event = event; } public String getText() { @@ -24,4 +30,16 @@ public String[] getTokens() { public float[] getTimestamps() { return timestamps; } + + public String getLang() { + return lang; + } + + public String getEmotion() { + return emotion; + } + + public String getEvent() { + return event; + } } diff --git a/sherpa-onnx/jni/offline-recognizer.cc b/sherpa-onnx/jni/offline-recognizer.cc index c122ad6ab..3a7602dbe 100644 --- a/sherpa-onnx/jni/offline-recognizer.cc +++ b/sherpa-onnx/jni/offline-recognizer.cc @@ -320,8 +320,11 @@ Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_getResult(JNIEnv *env, // [0]: text, jstring // [1]: tokens, array of jstring // [2]: timestamps, array of float + // [3]: lang, jstring + // [4]: emotion, jstring + // [5]: event, jstring jobjectArray obj_arr = (jobjectArray)env->NewObjectArray( - 3, env->FindClass("java/lang/Object"), nullptr); + 6, env->FindClass("java/lang/Object"), nullptr); jstring text = env->NewStringUTF(result.text.c_str()); env->SetObjectArrayElement(obj_arr, 0, text); @@ -344,5 +347,12 @@ Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_getResult(JNIEnv *env, env->SetObjectArrayElement(obj_arr, 2, timestamps_arr); + // [3]: lang, jstring + // [4]: emotion, jstring + // [5]: event, jstring + env->SetObjectArrayElement(obj_arr, 3, env->NewStringUTF(result.lang.c_str())); + env->SetObjectArrayElement(obj_arr, 4, env->NewStringUTF(result.emotion.c_str())); + env->SetObjectArrayElement(obj_arr, 5, env->NewStringUTF(result.event.c_str())); + return obj_arr; } diff --git a/sherpa-onnx/kotlin-api/OfflineRecognizer.kt b/sherpa-onnx/kotlin-api/OfflineRecognizer.kt index 91fdbc77e..0003eb424 100644 --- a/sherpa-onnx/kotlin-api/OfflineRecognizer.kt +++ b/sherpa-onnx/kotlin-api/OfflineRecognizer.kt @@ -6,6 +6,9 @@ data class OfflineRecognizerResult( val text: String, val tokens: Array, val timestamps: FloatArray, + val lang: String, + val emotion: String, + val event: String, ) data class OfflineTransducerModelConfig( @@ -96,7 +99,10 @@ class OfflineRecognizer( val text = objArray[0] as String val tokens = objArray[1] as Array val timestamps = objArray[2] as FloatArray - return OfflineRecognizerResult(text = text, tokens = tokens, timestamps = timestamps) + val lang = objArray[3] as String + val emotion = objArray[4] as String + val event = objArray[5] as String + return OfflineRecognizerResult(text = text, tokens = tokens, timestamps = timestamps, lang = lang, emotion = emotion, event = event) } fun decode(stream: OfflineStream) = decode(ptr, stream.ptr) diff --git a/sherpa-onnx/python/csrc/offline-stream.cc b/sherpa-onnx/python/csrc/offline-stream.cc index 3c1cf3486..b330c712d 100644 --- a/sherpa-onnx/python/csrc/offline-stream.cc +++ b/sherpa-onnx/python/csrc/offline-stream.cc @@ -32,6 +32,12 @@ static void PybindOfflineRecognitionResult(py::module *m) { // NOLINT return py::str(PyUnicode_DecodeUTF8(self.text.c_str(), self.text.size(), "ignore")); }) + .def_property_readonly("lang", + [](const PyClass &self) { return self.lang; }) + .def_property_readonly("emotion", + [](const PyClass &self) { return self.emotion; }) + .def_property_readonly("event", + [](const PyClass &self) { return self.event; }) .def_property_readonly("tokens", [](const PyClass &self) { return self.tokens; }) .def_property_readonly("words",