MTG · dbogdanov · Jan 9, 2025 · Aug 30, 2024 · Sep 2, 2024 · Sep 2, 2024
diff --git a/src/algorithms/tonal/audio2midi.cpp b/src/algorithms/tonal/audio2midi.cpp
@@ -0,0 +1,97 @@
+#include "audio2midi.h"
+
+using namespace std;
+using namespace essentia;
+using namespace standard;
+
+const char *Audio2Midi::name = "Audio2Midi";
+const char *Audio2Midi::category = "Pitch";
+const char *Audio2Midi::description = DOC("Wrapper around Audio2Pitch and Pitch2Midi for real time application. This algorithm has a state that is used to estimate note on/off events based on consequent compute() calls.");
+
+void Audio2Midi::configure()
+{
+  _sampleRate = parameter("sampleRate").toReal();
+  _hopSize = parameter("hopSize").toInt();
+  _minFrequency = parameter("minFrequency").toReal();
+  _maxFrequency = parameter("maxFrequency").toReal();
+  _tuningFrequency = parameter("tuningFrequency").toInt();
+  _pitchConfidenceThreshold = parameter("pitchConfidenceThreshold").toReal();
+  _loudnessThreshold = parameter("loudnessThreshold").toReal();
+  _transposition = parameter("transpositionAmount").toInt();
+  _minOccurrenceRate = parameter("minOccurrenceRate").toReal();
+  _midiBufferDuration = parameter("midiBufferDuration").toReal();
+  _minNoteChangePeriod = parameter("minNoteChangePeriod").toReal();
+  _minOnsetCheckPeriod = parameter("minOnsetCheckPeriod").toReal();
+  _minOffsetCheckPeriod = parameter("minOffsetCheckPeriod").toReal();
+
+  // define frameSize depending on sampleRate
+  if (static_cast<int>(_sampleRate) <= 16000){
+    _frameSize = 2048;
+  }
+  else if (static_cast<int>(_sampleRate) <= 24000){
+    _frameSize = 4096;
+  }
+  else {
+    _frameSize = 8192;
+  }
+
+  _applyTimeCompensation = parameter("applyTimeCompensation").toBool();
+
+  _lowpass->configure(INHERIT("sampleRate"),
+                      "cutoffFrequency", 1000);
+  _framebuffer->configure("bufferSize", _frameSize);
+  _audio2pitch->configure(INHERIT("sampleRate"),
+                          "frameSize", _frameSize,
+                          "pitchAlgorithm", _pitchAlgorithm,
+                          "minFrequency", _minFrequency,
+                          "maxFrequency", _maxFrequency,
+                          INHERIT("pitchConfidenceThreshold"),
+                          INHERIT("loudnessThreshold"));
+
+  _pitch2midi->configure(INHERIT("sampleRate"),
+                       INHERIT("hopSize"),
+                       INHERIT("minOccurrenceRate"),
+                       INHERIT("applyTimeCompensation"),
+                       "minOnsetCheckPeriod", _minOnsetCheckPeriod,
+                       "minOffsetCheckPeriod", _minOffsetCheckPeriod,
+                       "minNoteChangePeriod", _minNoteChangePeriod,
+                       "midiBufferDuration", _midiBufferDuration,
+                       "minFrequency", _minFrequency,
+                       "tuningFrequency", _tuningFrequency,
+                       "transpositionAmount", _transposition);
+}
+
+void Audio2Midi::compute()
+{
+  // get ref to input
+  const std::vector<Real> &frame = _frame.get();
+  Real& pitch = _pitch.get();
+  Real& loudness = _loudness.get();
+  vector<string>& messageType = _messageType.get();
+  vector<Real>& midiNoteNumber = _midiNoteNumber.get();
+  vector<Real>& timeCompensation = _timeCompensation.get();
+
+  _lowpass->input("signal").set(frame);
+  _lowpass->output("signal").set(lpFrame);
+
+  _framebuffer->input("frame").set(lpFrame);
+  _framebuffer->output("frame").set(analysisFrame);
+
+  _audio2pitch->input("frame").set(analysisFrame);
+  _audio2pitch->output("pitch").set(pitch);
+  _audio2pitch->output("pitchConfidence").set(pitchConfidence);
+  _audio2pitch->output("loudness").set(loudness);
+  _audio2pitch->output("voiced").set(voiced);
+
+  _pitch2midi->input("pitch").set(pitch);
+  _pitch2midi->input("voiced").set(voiced);
+  _pitch2midi->output("midiNoteNumber").set(midiNoteNumber);
+  _pitch2midi->output("timeCompensation").set(timeCompensation);
+  _pitch2midi->output("messageType").set(messageType);
+
+  _lowpass->compute();
+  _framebuffer->compute();
+  _audio2pitch->compute();
+  _pitch2midi->compute();
+
+}
diff --git a/src/algorithms/tonal/audio2midi.h b/src/algorithms/tonal/audio2midi.h
@@ -0,0 +1,100 @@
+#ifndef ESSENTIA_AUDIO2MIDI_H
+#define ESSENTIA_AUDIO2MIDI_H
+
+#include "algorithmfactory.h"
+
+namespace essentia {
+namespace standard {
+
+  class Audio2Midi : public Algorithm {
+    protected:
+      Input<std::vector<Real>> _frame;
+      Output<Real> _pitch;
+      Output<Real> _loudness;
+      Output<std::vector<std::string> > _messageType;
+      Output<std::vector<Real> > _midiNoteNumber;
+      Output<std::vector<Real> > _timeCompensation;
+
+      Algorithm* _lowpass;
+      Algorithm* _framebuffer;
+      Algorithm* _audio2pitch;
+      Algorithm* _pitch2midi;
+
+      Real _sampleRate;
+      int _frameSize;
+      int _hopSize;
+      std::string _pitchAlgorithm = "pitchyinfft";
+      std::string _loudnessAlgorithm = "rms";
+      Real _minFrequency;
+      Real _maxFrequency;
+      int _tuningFrequency;
+      Real _pitchConfidenceThreshold, _loudnessThreshold, _minOccurrenceRate;
+      Real _midiBufferDuration;
+      Real _minNoteChangePeriod;
+      Real _minOnsetCheckPeriod;
+      Real _minOffsetCheckPeriod;
+
+      bool _applyTimeCompensation;
+      int _transposition;
+
+      // Containers
+      std::vector<Real> lpFrame, analysisFrame;
+      Real pitch, pitchConfidence, loudness;
+      std::vector<Real> midiNoteNumber, timeCompensation;
+      std::vector<std::string> messageType;
+      Real onsetTimeCompensation, offsetTimeCompensation;
+
+      int voiced;
+
+    public:
+      Audio2Midi() {
+        declareInput(_frame, "frame", "the input frame to analyse");
+        declareOutput(_pitch, "pitch", "pitch given in Hz");
+        declareOutput(_loudness, "loudness", "detected loudness in decibels");
+        declareOutput(_messageType, "messageType", "the output of MIDI message type, as string, {noteoff, noteon, noteoff-noteon}");
+        declareOutput(_midiNoteNumber, "midiNoteNumber", "the output of detected MIDI note number, as integer, in range [0,127]");
+        declareOutput(_timeCompensation, "timeCompensation", "time to be compensated in the messages");
+
+        _lowpass = AlgorithmFactory::create("LowPass");
+        _framebuffer = AlgorithmFactory::create("FrameBuffer");
+        _audio2pitch = AlgorithmFactory::create("Audio2Pitch");
+        _pitch2midi = AlgorithmFactory::create("Pitch2Midi");
+      }
+
+      ~Audio2Midi() {
+        delete _lowpass;
+        delete _framebuffer;
+        delete _audio2pitch;
+        delete _pitch2midi;
+      }
+
+      void declareParameters() {
+        declareParameter("sampleRate", "sample rate of incoming audio frames", "[8000,inf)", 44100);
+        declareParameter("hopSize", "equivalent to I/O buffer size", "[1,inf)", 32);
+        declareParameter("minFrequency", "minimum frequency to detect in Hz", "[10,20000]", 60.0);
+        declareParameter("maxFrequency", "maximum frequency to detect in Hz", "[10,20000]", 2300.0);
+        declareParameter("tuningFrequency", "tuning frequency for semitone index calculation, corresponding to A3 [Hz]", "{432,440}", 440);
+        declareParameter("pitchConfidenceThreshold", "level of pitch confidence above which note ON/OFF start to be considered", "[0,1]", 0.25);
+        declareParameter("loudnessThreshold", "loudness level above/below which note ON/OFF start to be considered, in decibels", "[-inf,0]", -51.0);
+        declareParameter("transpositionAmount", "Apply transposition (in semitones) to the detected MIDI notes.", "(-69,50)", 0);
+        declareParameter("minOccurrenceRate", "rate of predominant pitch occurrence in MidiPool buffer to consider note ON event", "[0,1]", 0.5);
+        declareParameter("midiBufferDuration", "duration in seconds of buffer used for voting in MidiPool algorithm", "[0.005,0.5]", 0.05); // 15ms
+        declareParameter("minNoteChangePeriod", "minimum time to wait until a note change is detected (testing only)", "(0,1]", 0.030);
+        declareParameter("minOnsetCheckPeriod", "minimum time to wait until an onset is detected (testing only)", "(0,1]", 0.075);
+        declareParameter("minOffsetCheckPeriod", "minimum time to wait until an offset is detected (testing only)", "(0,1]", 0.2);
+        declareParameter("applyTimeCompensation", "whether to apply time compensation correction to MIDI note detection", "{true,false}", true);
+      }
+
+      void configure();
+      void compute();
+
+      static const char* name;
+      static const char* category;
+      static const char* description;
+  };
+
+
+} // namespace standard
+} // namespace essentia
+
+#endif
diff --git a/src/algorithms/tonal/pitch2midi.cpp b/src/algorithms/tonal/pitch2midi.cpp
@@ -14,7 +14,7 @@ void Pitch2Midi::configure()
   _sampleRate = parameter("sampleRate").toReal();
   _hopSize = parameter("hopSize").toInt();
   _minFrequency = parameter("minFrequency").toReal();
-  _minOcurrenceRate = parameter("minOcurrenceRate").toReal();
+  _minOccurrenceRate = parameter("minOccurrenceRate").toReal();
   _bufferDuration = parameter("midiBufferDuration").toReal();
   _minOnsetCheckPeriod = parameter("minOnsetCheckPeriod").toReal();
   _minOffsetCheckPeriod = parameter("minOffsetCheckPeriod").toReal();
@@ -33,8 +33,8 @@ void Pitch2Midi::configure()
   _offsetCheckCounter = 0;
   _onsetCheckCounter = 0;
 
-  _minOcurrenceRatePeriod = _minOcurrenceRate * _bufferDuration;
-  _minOcurrenceRateThreshold = _minOcurrenceRatePeriod / _frameTime;
+  _minOccurrenceRatePeriod = _minOccurrenceRate * _bufferDuration;
+  _minOccurrenceRateThreshold = _minOccurrenceRatePeriod / _frameTime;
 
   // estimate buffer capacity
   int c = static_cast<int>( round( _sampleRate / float(_hopSize) * _bufferDuration ) );
@@ -151,7 +151,6 @@ void Pitch2Midi::compute()
         _noteOff = true;
         updateDnote();
         setOutputs(dnote, 0.0, _minNoteChangePeriod);
-        //E_INFO("offset(unvoiced frame)");
         _unvoicedFrameCounter = 0;
         _offsetCheckCounter = 0;
         _onsetCheckCounter = 0;
@@ -220,27 +219,27 @@ void Pitch2Midi::compute()
   if (!hasCoherence() && _NOTED_ON) {
     if (_maxVoted[0] != 0.0) {
       _onsetCheckCounter++;
-      // combines checker with minOcurrenceRate
-      if ((_onsetCheckCounter > _minOcurrenceRateThreshold)){
+      // combines checker with minOccurrenceRate
+      if ((_onsetCheckCounter > _minOccurrenceRateThreshold)){
         _NOTED_ON = true;
         if (note != _maxVoted[0]){  // avoid note slicing effect
             _noteOff = true;
             _noteOn = true;
             updateDnote();
             note = _maxVoted[0];
         }
-        //E_INFO("off-onset(" << _maxVoted[0] << ", uncoherent & NOTED): " << _onsetCheckCounter << " - " << _minOcurrenceRateThreshold);
+        //E_INFO("off-onset(" << _maxVoted[0] << ", uncoherent & NOTED): " << _onsetCheckCounter << " - " << _minOccurrenceRateThreshold);
         _offsetCheckCounter = 0;
         _onsetCheckCounter = 0;
       }
     }
     // output the max-voted midi note to avoid unestable midi note numbers
-    setOutputs(_maxVoted[0], _minOcurrenceRatePeriod, _minOcurrenceRatePeriod);
+    setOutputs(_maxVoted[0], _minOccurrenceRatePeriod, _minOccurrenceRatePeriod);
     return;
   }
 
   if (!hasCoherence() && !_NOTED_ON) {
-    if (_maxVoted[1] > _minOcurrenceRate) {
+    if (_maxVoted[1] > _minOccurrenceRate) {
       _onsetCheckCounter++;
 
       if (_onsetCheckCounter > _minOnsetCheckThreshold) {

diff --git a/src/algorithms/tonal/pitch2midi.h b/src/algorithms/tonal/pitch2midi.h
@@ -26,7 +26,7 @@ namespace standard {
       Real _sampleRate;
       int _hopSize;
       Real _minFrequency;
-      Real _minOcurrenceRate;
+      Real _minOccurrenceRate;
       Real _minOnsetCheckPeriod;
       Real _minOffsetCheckPeriod;
       Real _minNoteChangePeriod;
@@ -66,8 +66,8 @@ namespace standard {
       int _onsetCheckCounter;
 
       Real _frameTime;
-      Real _minOcurrenceRateThreshold;
-      Real _minOcurrenceRatePeriod;
+      Real _minOccurrenceRateThreshold;
+      Real _minOccurrenceRatePeriod;
 
       // former Pitch2Midi outputs, now interal vars
       Real _midiNoteNumberTransposed;
@@ -89,7 +89,7 @@ namespace standard {
         declareParameter("sampleRate", "Audio sample rate", "[8000,inf)", 44100);
         declareParameter("hopSize", "Pitch Detection analysis hop size in samples, equivalent to I/O buffer size", "[1,inf)", 128);
         declareParameter("minFrequency", "minimum detectable frequency", "[20,20000]", 60.0);
-        declareParameter("minOcurrenceRate", "minimum number of times a midi note has to ocur compared to total capacity", "[0,1]", 0.5);
+        declareParameter("minOccurrenceRate", "minimum number of times a midi note has to ocur compared to total capacity", "[0,1]", 0.5);
         declareParameter("midiBufferDuration", "duration in seconds of buffer used for voting in the note toggle detection algorithm", "[0.005,0.5]", 0.015); // 15ms
         declareParameter("minNoteChangePeriod", "minimum time to wait until a note change is detected (s)", "(0,1]", 0.030);
         declareParameter("minOnsetCheckPeriod", "minimum time to wait until an onset is detected (s)", "(0,1]", 0.075);

diff --git a/test/src/unittests/tonal/audio2midi/359500__mtg__sax-tenor-e-major.npy b/test/src/unittests/tonal/audio2midi/359500__mtg__sax-tenor-e-major.npy
diff --git a/test/src/unittests/tonal/audio2midi/359628__mtg__sax-tenor-d-minor.npy b/test/src/unittests/tonal/audio2midi/359628__mtg__sax-tenor-d-minor.npy
diff --git a/test/src/unittests/tonal/audio2midi/387517__deleted_user_7267864__saxophone-going-up.npy b/test/src/unittests/tonal/audio2midi/387517__deleted_user_7267864__saxophone-going-up.npy