Skip to content

Commit

Permalink
Add C API for audio tagging (#754)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored Apr 11, 2024
1 parent 34d70a2 commit f204e62
Show file tree
Hide file tree
Showing 9 changed files with 285 additions and 32 deletions.
13 changes: 13 additions & 0 deletions .github/scripts/test-c-api.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,21 @@ log() {

echo "SLID_EXE is $SLID_EXE"
echo "SID_EXE is $SID_EXE"
echo "AT_EXE is $AT_EXE"
echo "PATH: $PATH"

log "------------------------------------------------------------"
log "Test audio tagging "
log "------------------------------------------------------------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
tar xvf sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
rm sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2

$AT_EXE

rm -rf sherpa-onnx-zipformer-audio-tagging-2024-04-09


log "------------------------------------------------------------"
log "Download whisper tiny for spoken language identification "
Expand Down
18 changes: 10 additions & 8 deletions .github/workflows/linux.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,16 @@ jobs:
name: release-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }}
path: build/bin/*

- name: Test C API
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export SLID_EXE=spoken-language-identification-c-api
export SID_EXE=speaker-identification-c-api
export AT_EXE=audio-tagging-c-api
.github/scripts/test-c-api.sh
- name: Test Audio tagging
shell: bash
run: |
Expand All @@ -142,14 +152,6 @@ jobs:
.github/scripts/test-online-ctc.sh
- name: Test C API
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export SLID_EXE=spoken-language-identification-c-api
export SID_EXE=speaker-identification-c-api
.github/scripts/test-c-api.sh
- name: Test spoken language identification (C++ API)
shell: bash
Expand Down
15 changes: 8 additions & 7 deletions .github/workflows/macos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -105,22 +105,23 @@ jobs:
otool -L build/bin/sherpa-onnx
otool -l build/bin/sherpa-onnx
- name: Test Audio tagging
- name: Test C API
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline-audio-tagging
export SLID_EXE=spoken-language-identification-c-api
export SID_EXE=speaker-identification-c-api
export AT_EXE=audio-tagging-c-api
.github/scripts/test-audio-tagging.sh
.github/scripts/test-c-api.sh
- name: Test C API
- name: Test Audio tagging
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export SLID_EXE=spoken-language-identification-c-api
export SID_EXE=speaker-identification-c-api
export EXE=sherpa-onnx-offline-audio-tagging
.github/scripts/test-c-api.sh
.github/scripts/test-audio-tagging.sh
- name: Test spoken language identification (C++ API)
shell: bash
Expand Down
16 changes: 9 additions & 7 deletions .github/workflows/windows-x64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,22 +72,24 @@ jobs:
ls -lh ./bin/Release/sherpa-onnx.exe
- name: Test Audio tagging
- name: Test C API
shell: bash
run: |
export PATH=$PWD/build/bin/Release:$PATH
export EXE=sherpa-onnx-offline-audio-tagging.exe
export SLID_EXE=spoken-language-identification-c-api.exe
export SID_EXE=speaker-identification-c-api.exe
export AT_EXE=audio-tagging-c-api.exe
.github/scripts/test-audio-tagging.sh
.github/scripts/test-c-api.sh
- name: Test C API
- name: Test Audio tagging
shell: bash
run: |
export PATH=$PWD/build/bin/Release:$PATH
export SLID_EXE=spoken-language-identification-c-api.exe
export SID_EXE=speaker-identification-c-api.exe
export EXE=sherpa-onnx-offline-audio-tagging.exe
.github/scripts/test-c-api.sh
.github/scripts/test-audio-tagging.sh
- name: Test spoken language identification (C++ API)
shell: bash
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/windows-x86.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ jobs:
run: |
export PATH=$PWD/build/bin/Release:$PATH
export SLID_EXE=spoken-language-identification-c-api.exe
export SID_EXE=speaker-identification-c-api.exe
export AT_EXE=audio-tagging-c-api.exe
.github/scripts/test-c-api.sh
Expand Down
3 changes: 3 additions & 0 deletions c-api-examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ target_link_libraries(speaker-identification-c-api sherpa-onnx-c-api)
add_executable(streaming-hlg-decode-file-c-api streaming-hlg-decode-file-c-api.c)
target_link_libraries(streaming-hlg-decode-file-c-api sherpa-onnx-c-api)

add_executable(audio-tagging-c-api audio-tagging-c-api.c)
target_link_libraries(audio-tagging-c-api sherpa-onnx-c-api)

if(SHERPA_ONNX_HAS_ALSA)
add_subdirectory(./asr-microphone-example)
elseif((UNIX AND NOT APPLE) OR LINUX)
Expand Down
79 changes: 79 additions & 0 deletions c-api-examples/audio-tagging-c-api.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// c-api-examples/audio-tagging-c-api.c
//
// Copyright (c) 2024 Xiaomi Corporation

// We assume you have pre-downloaded the model files for testing
// from https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models
//
// An example is given below:
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
// tar xvf sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
// rm sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
//
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
SherpaOnnxAudioTaggingConfig config;
memset(&config, 0, sizeof(config));

config.model.zipformer.model =
"./sherpa-onnx-zipformer-audio-tagging-2024-04-09/model.int8.onnx";
config.model.num_threads = 1;
config.model.debug = 1;
config.model.provider = "cpu";
// clang-format off
config.labels = "./sherpa-onnx-zipformer-audio-tagging-2024-04-09/class_labels_indices.csv";
// clang-format on

const SherpaOnnxAudioTagging *tagger = SherpaOnnxCreateAudioTagging(&config);
if (!tagger) {
fprintf(stderr, "Failed to create audio tagger. Please check your config");
return -1;
}

// You can find more test waves from
// https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
const char *wav_filename =
"./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/1.wav";

const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
if (wave == NULL) {
fprintf(stderr, "Failed to read %s\n", wav_filename);
return -1;
}

const SherpaOnnxOfflineStream *stream =
SherpaOnnxAudioTaggingCreateOfflineStream(tagger);

AcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
wave->num_samples);

int32_t top_k = 5;
const SherpaOnnxAudioEvent *const *results =
SherpaOnnxAudioTaggingCompute(tagger, stream, top_k);

fprintf(stderr, "--------------------------------------------------\n");
fprintf(stderr, "Index\t\tProbability\t\tEvent name\n");
fprintf(stderr, "--------------------------------------------------\n");
for (int32_t i = 0; i != top_k; ++i) {
fprintf(stderr, "%d\t\t%.3f\t\t\t%s\n", i, results[i]->prob,
results[i]->name);
}
fprintf(stderr, "--------------------------------------------------\n");

SherpaOnnxAudioTaggingFreeResults(results);
DestroyOfflineStream(stream);
SherpaOnnxFreeWave(wave);
SherpaOnnxDestroyAudioTagging(tagger);

return 0;
};
100 changes: 95 additions & 5 deletions sherpa-onnx/c-api/c-api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/audio-tagging.h"
#include "sherpa-onnx/csrc/circular-buffer.h"
#include "sherpa-onnx/csrc/display.h"
#include "sherpa-onnx/csrc/keyword-spotter.h"
Expand Down Expand Up @@ -400,15 +401,18 @@ SherpaOnnxOfflineStream *CreateOfflineStream(
return stream;
}

void DestroyOfflineStream(SherpaOnnxOfflineStream *stream) { delete stream; }
void DestroyOfflineStream(const SherpaOnnxOfflineStream *stream) {
delete stream;
}

void AcceptWaveformOffline(SherpaOnnxOfflineStream *stream, int32_t sample_rate,
const float *samples, int32_t n) {
void AcceptWaveformOffline(const SherpaOnnxOfflineStream *stream,
int32_t sample_rate, const float *samples,
int32_t n) {
stream->impl->AcceptWaveform(sample_rate, samples, n);
}

void DecodeOfflineStream(SherpaOnnxOfflineRecognizer *recognizer,
SherpaOnnxOfflineStream *stream) {
void DecodeOfflineStream(const SherpaOnnxOfflineRecognizer *recognizer,
const SherpaOnnxOfflineStream *stream) {
recognizer->impl->DecodeStream(stream->impl.get());
}

Expand Down Expand Up @@ -1209,3 +1213,89 @@ void SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers(

delete[] names;
}

struct SherpaOnnxAudioTagging {
std::unique_ptr<sherpa_onnx::AudioTagging> impl;
};

const SherpaOnnxAudioTagging *SherpaOnnxCreateAudioTagging(
const SherpaOnnxAudioTaggingConfig *config) {
sherpa_onnx::AudioTaggingConfig ac;
ac.model.zipformer.model = SHERPA_ONNX_OR(config->model.zipformer.model, "");
ac.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
ac.model.debug = config->model.debug;
ac.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
ac.labels = SHERPA_ONNX_OR(config->labels, "");
ac.top_k = SHERPA_ONNX_OR(config->top_k, 5);

if (ac.model.debug) {
SHERPA_ONNX_LOGE("%s\n", ac.ToString().c_str());
}

if (!ac.Validate()) {
SHERPA_ONNX_LOGE("Errors in config");
return nullptr;
}

SherpaOnnxAudioTagging *tagger = new SherpaOnnxAudioTagging;
tagger->impl = std::make_unique<sherpa_onnx::AudioTagging>(ac);

return tagger;
}

void SherpaOnnxDestroyAudioTagging(const SherpaOnnxAudioTagging *tagger) {
delete tagger;
}

const SherpaOnnxOfflineStream *SherpaOnnxAudioTaggingCreateOfflineStream(
const SherpaOnnxAudioTagging *tagger) {
const SherpaOnnxOfflineStream *stream =
new SherpaOnnxOfflineStream(tagger->impl->CreateStream());
return stream;
}

const SherpaOnnxAudioEvent *const *SherpaOnnxAudioTaggingCompute(
const SherpaOnnxAudioTagging *tagger, const SherpaOnnxOfflineStream *s,
int32_t top_k) {
std::vector<sherpa_onnx::AudioEvent> events =
tagger->impl->Compute(s->impl.get(), top_k);

int32_t n = static_cast<int32_t>(events.size());
SherpaOnnxAudioEvent **ans = new SherpaOnnxAudioEvent *[n + 1];
ans[n] = nullptr;

int32_t i = 0;
for (const auto &e : events) {
SherpaOnnxAudioEvent *p = new SherpaOnnxAudioEvent;

char *name = new char[e.name.size() + 1];
std::copy(e.name.begin(), e.name.end(), name);
name[e.name.size()] = 0;

p->name = name;

p->index = e.index;
p->prob = e.prob;

ans[i] = p;
i += 1;
}

return ans;
}

void SherpaOnnxAudioTaggingFreeResults(
const SherpaOnnxAudioEvent *const *events) {
auto p = events;

while (p && *p) {
auto e = *p;

delete[] e->name;
delete e;

++p;
}

delete[] events;
}
Loading

0 comments on commit f204e62

Please sign in to comment.