-
Notifications
You must be signed in to change notification settings - Fork 597
Add new algo audio2midi #1437
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Add new algo audio2midi #1437
Changes from all commits
Commits
Show all changes
19 commits
Select commit
Hold shift + click to select a range
1a6063a
Add first implementation and unitests of Audio2Midi algo
xaviliz b631343
Fix unitest with separated notes.
xaviliz 77e1594
Remove output values for unvoiced frame.
xaviliz 63396b4
Small clean
xaviliz f55d2cb
Merge branch 'master' into add-new-algo-audio2midi
xaviliz 6050e18
Typo
xaviliz c0e8ad1
Fix testSeparatedNotes()
xaviliz 0003016
Hardcode _fixedFrameSize values here instead of using the variable
xaviliz d373570
Use sample rate frequency ranges for each frameSize to use.
xaviliz 2c17e35
Remove legacy text
xaviliz f6c0af7
Remove unclear required condition
xaviliz da4661f
Remove redundant code
xaviliz c6a04bb
Clarify algorithm documentation.
xaviliz 49dc967
Specified explicitly in the calls to runARealCase() all parameter values
xaviliz c6be702
Remove unnecessary prints
xaviliz dd2f3c5
Assess global mses for onsets, offsets and midi notes
xaviliz 3a582e0
Small clean
xaviliz afa0a07
Small clean
xaviliz 5b9499d
Remove soundfile module's dependency
xaviliz File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,97 @@ | ||
| #include "audio2midi.h" | ||
|
|
||
| using namespace std; | ||
| using namespace essentia; | ||
| using namespace standard; | ||
|
|
||
| const char *Audio2Midi::name = "Audio2Midi"; | ||
| const char *Audio2Midi::category = "Pitch"; | ||
| const char *Audio2Midi::description = DOC("Wrapper around Audio2Pitch and Pitch2Midi for real time application. This algorithm has a state that is used to estimate note on/off events based on consequent compute() calls."); | ||
|
|
||
| void Audio2Midi::configure() | ||
| { | ||
| _sampleRate = parameter("sampleRate").toReal(); | ||
| _hopSize = parameter("hopSize").toInt(); | ||
| _minFrequency = parameter("minFrequency").toReal(); | ||
| _maxFrequency = parameter("maxFrequency").toReal(); | ||
| _tuningFrequency = parameter("tuningFrequency").toInt(); | ||
| _pitchConfidenceThreshold = parameter("pitchConfidenceThreshold").toReal(); | ||
| _loudnessThreshold = parameter("loudnessThreshold").toReal(); | ||
| _transposition = parameter("transpositionAmount").toInt(); | ||
| _minOccurrenceRate = parameter("minOccurrenceRate").toReal(); | ||
| _midiBufferDuration = parameter("midiBufferDuration").toReal(); | ||
| _minNoteChangePeriod = parameter("minNoteChangePeriod").toReal(); | ||
| _minOnsetCheckPeriod = parameter("minOnsetCheckPeriod").toReal(); | ||
| _minOffsetCheckPeriod = parameter("minOffsetCheckPeriod").toReal(); | ||
|
|
||
| // define frameSize depending on sampleRate | ||
| if (static_cast<int>(_sampleRate) <= 16000){ | ||
| _frameSize = 2048; | ||
| } | ||
| else if (static_cast<int>(_sampleRate) <= 24000){ | ||
| _frameSize = 4096; | ||
| } | ||
| else { | ||
| _frameSize = 8192; | ||
| } | ||
|
|
||
| _applyTimeCompensation = parameter("applyTimeCompensation").toBool(); | ||
|
|
||
| _lowpass->configure(INHERIT("sampleRate"), | ||
| "cutoffFrequency", 1000); | ||
| _framebuffer->configure("bufferSize", _frameSize); | ||
| _audio2pitch->configure(INHERIT("sampleRate"), | ||
| "frameSize", _frameSize, | ||
| "pitchAlgorithm", _pitchAlgorithm, | ||
| "minFrequency", _minFrequency, | ||
| "maxFrequency", _maxFrequency, | ||
| INHERIT("pitchConfidenceThreshold"), | ||
| INHERIT("loudnessThreshold")); | ||
|
|
||
| _pitch2midi->configure(INHERIT("sampleRate"), | ||
| INHERIT("hopSize"), | ||
| INHERIT("minOccurrenceRate"), | ||
| INHERIT("applyTimeCompensation"), | ||
| "minOnsetCheckPeriod", _minOnsetCheckPeriod, | ||
| "minOffsetCheckPeriod", _minOffsetCheckPeriod, | ||
| "minNoteChangePeriod", _minNoteChangePeriod, | ||
| "midiBufferDuration", _midiBufferDuration, | ||
| "minFrequency", _minFrequency, | ||
| "tuningFrequency", _tuningFrequency, | ||
| "transpositionAmount", _transposition); | ||
| } | ||
|
|
||
| void Audio2Midi::compute() | ||
| { | ||
| // get ref to input | ||
| const std::vector<Real> &frame = _frame.get(); | ||
| Real& pitch = _pitch.get(); | ||
| Real& loudness = _loudness.get(); | ||
| vector<string>& messageType = _messageType.get(); | ||
| vector<Real>& midiNoteNumber = _midiNoteNumber.get(); | ||
| vector<Real>& timeCompensation = _timeCompensation.get(); | ||
|
|
||
| _lowpass->input("signal").set(frame); | ||
| _lowpass->output("signal").set(lpFrame); | ||
|
|
||
| _framebuffer->input("frame").set(lpFrame); | ||
| _framebuffer->output("frame").set(analysisFrame); | ||
|
|
||
| _audio2pitch->input("frame").set(analysisFrame); | ||
| _audio2pitch->output("pitch").set(pitch); | ||
| _audio2pitch->output("pitchConfidence").set(pitchConfidence); | ||
| _audio2pitch->output("loudness").set(loudness); | ||
| _audio2pitch->output("voiced").set(voiced); | ||
|
|
||
| _pitch2midi->input("pitch").set(pitch); | ||
| _pitch2midi->input("voiced").set(voiced); | ||
| _pitch2midi->output("midiNoteNumber").set(midiNoteNumber); | ||
| _pitch2midi->output("timeCompensation").set(timeCompensation); | ||
| _pitch2midi->output("messageType").set(messageType); | ||
|
|
||
| _lowpass->compute(); | ||
| _framebuffer->compute(); | ||
| _audio2pitch->compute(); | ||
| _pitch2midi->compute(); | ||
|
|
||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,100 @@ | ||
| #ifndef ESSENTIA_AUDIO2MIDI_H | ||
| #define ESSENTIA_AUDIO2MIDI_H | ||
|
|
||
| #include "algorithmfactory.h" | ||
|
|
||
| namespace essentia { | ||
| namespace standard { | ||
|
|
||
| class Audio2Midi : public Algorithm { | ||
| protected: | ||
| Input<std::vector<Real>> _frame; | ||
| Output<Real> _pitch; | ||
| Output<Real> _loudness; | ||
| Output<std::vector<std::string> > _messageType; | ||
| Output<std::vector<Real> > _midiNoteNumber; | ||
| Output<std::vector<Real> > _timeCompensation; | ||
|
|
||
| Algorithm* _lowpass; | ||
| Algorithm* _framebuffer; | ||
| Algorithm* _audio2pitch; | ||
| Algorithm* _pitch2midi; | ||
|
|
||
| Real _sampleRate; | ||
| int _frameSize; | ||
| int _hopSize; | ||
| std::string _pitchAlgorithm = "pitchyinfft"; | ||
| std::string _loudnessAlgorithm = "rms"; | ||
| Real _minFrequency; | ||
| Real _maxFrequency; | ||
| int _tuningFrequency; | ||
| Real _pitchConfidenceThreshold, _loudnessThreshold, _minOccurrenceRate; | ||
| Real _midiBufferDuration; | ||
| Real _minNoteChangePeriod; | ||
| Real _minOnsetCheckPeriod; | ||
| Real _minOffsetCheckPeriod; | ||
|
|
||
| bool _applyTimeCompensation; | ||
| int _transposition; | ||
|
|
||
| // Containers | ||
| std::vector<Real> lpFrame, analysisFrame; | ||
| Real pitch, pitchConfidence, loudness; | ||
| std::vector<Real> midiNoteNumber, timeCompensation; | ||
| std::vector<std::string> messageType; | ||
| Real onsetTimeCompensation, offsetTimeCompensation; | ||
|
|
||
| int voiced; | ||
|
|
||
| public: | ||
| Audio2Midi() { | ||
| declareInput(_frame, "frame", "the input frame to analyse"); | ||
| declareOutput(_pitch, "pitch", "pitch given in Hz"); | ||
| declareOutput(_loudness, "loudness", "detected loudness in decibels"); | ||
| declareOutput(_messageType, "messageType", "the output of MIDI message type, as string, {noteoff, noteon, noteoff-noteon}"); | ||
| declareOutput(_midiNoteNumber, "midiNoteNumber", "the output of detected MIDI note number, as integer, in range [0,127]"); | ||
| declareOutput(_timeCompensation, "timeCompensation", "time to be compensated in the messages"); | ||
|
|
||
| _lowpass = AlgorithmFactory::create("LowPass"); | ||
| _framebuffer = AlgorithmFactory::create("FrameBuffer"); | ||
| _audio2pitch = AlgorithmFactory::create("Audio2Pitch"); | ||
| _pitch2midi = AlgorithmFactory::create("Pitch2Midi"); | ||
| } | ||
|
|
||
| ~Audio2Midi() { | ||
| delete _lowpass; | ||
| delete _framebuffer; | ||
| delete _audio2pitch; | ||
| delete _pitch2midi; | ||
| } | ||
|
|
||
| void declareParameters() { | ||
| declareParameter("sampleRate", "sample rate of incoming audio frames", "[8000,inf)", 44100); | ||
| declareParameter("hopSize", "equivalent to I/O buffer size", "[1,inf)", 32); | ||
| declareParameter("minFrequency", "minimum frequency to detect in Hz", "[10,20000]", 60.0); | ||
| declareParameter("maxFrequency", "maximum frequency to detect in Hz", "[10,20000]", 2300.0); | ||
| declareParameter("tuningFrequency", "tuning frequency for semitone index calculation, corresponding to A3 [Hz]", "{432,440}", 440); | ||
| declareParameter("pitchConfidenceThreshold", "level of pitch confidence above which note ON/OFF start to be considered", "[0,1]", 0.25); | ||
| declareParameter("loudnessThreshold", "loudness level above/below which note ON/OFF start to be considered, in decibels", "[-inf,0]", -51.0); | ||
| declareParameter("transpositionAmount", "Apply transposition (in semitones) to the detected MIDI notes.", "(-69,50)", 0); | ||
| declareParameter("minOccurrenceRate", "rate of predominant pitch occurrence in MidiPool buffer to consider note ON event", "[0,1]", 0.5); | ||
| declareParameter("midiBufferDuration", "duration in seconds of buffer used for voting in MidiPool algorithm", "[0.005,0.5]", 0.05); // 15ms | ||
| declareParameter("minNoteChangePeriod", "minimum time to wait until a note change is detected (testing only)", "(0,1]", 0.030); | ||
| declareParameter("minOnsetCheckPeriod", "minimum time to wait until an onset is detected (testing only)", "(0,1]", 0.075); | ||
| declareParameter("minOffsetCheckPeriod", "minimum time to wait until an offset is detected (testing only)", "(0,1]", 0.2); | ||
| declareParameter("applyTimeCompensation", "whether to apply time compensation correction to MIDI note detection", "{true,false}", true); | ||
| } | ||
|
|
||
| void configure(); | ||
| void compute(); | ||
|
|
||
| static const char* name; | ||
| static const char* category; | ||
| static const char* description; | ||
| }; | ||
|
|
||
|
|
||
| } // namespace standard | ||
| } // namespace essentia | ||
|
|
||
| #endif |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file added
BIN
+608 Bytes
test/src/unittests/tonal/audio2midi/359500__mtg__sax-tenor-e-major.npy
Binary file not shown.
Binary file added
BIN
+672 Bytes
test/src/unittests/tonal/audio2midi/359628__mtg__sax-tenor-d-minor.npy
Binary file not shown.
Binary file added
BIN
+448 Bytes
test/src/unittests/tonal/audio2midi/387517__deleted_user_7267864__saxophone-going-up.npy
Binary file not shown.
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.