diff --git a/packaging/build_config.sh b/packaging/build_config.sh index 5ce5fffc3..98d292c00 100755 --- a/packaging/build_config.sh +++ b/packaging/build_config.sh @@ -2,22 +2,17 @@ HOST=i686-w64-mingw32 if [ -z "${PREFIX}" ]; then - PREFIX=`pwd` + PREFIX=$(pwd) fi echo Installing to: $PREFIX -#SHARED_OR_STATIC=" -#--enable-shared \ -#--disable-static -#" - SHARED_OR_STATIC=" ---disable-shared \ +--disable-shared --enable-static " EIGEN_VERSION=3.3.7 -FFMPEG_VERSION=ffmpeg-2.8.12 +FFMPEG_VERSION=ffmpeg-7.1.1 LAME_VERSION=3.100 TAGLIB_VERSION=taglib-1.11.1 ZLIB_VERSION=zlib-1.2.12 @@ -29,14 +24,13 @@ QT_SOURCE_URL=https://download.qt.io/archive/qt/4.8/4.8.4/qt-everywhere-opensour GAIA_VERSION=2.4.6-86-ged433ed TENSORFLOW_VERSION=2.5.0 - FFMPEG_AUDIO_FLAGS=" --disable-programs --disable-doc --disable-debug --disable-avdevice - --disable-avresample + --disable-swresample --disable-swscale --disable-postproc --disable-avfilter @@ -58,7 +52,7 @@ FFMPEG_AUDIO_FLAGS=" --enable-protocol=file --enable-protocol=pipe - --disable-sdl + --disable-sdl2 --disable-lzma --disable-zlib --disable-xlib @@ -178,7 +172,6 @@ FFMPEG_AUDIO_FLAGS=" --enable-decoder=pcm_u24le --enable-decoder=pcm_u32be --enable-decoder=pcm_u32le - --enable-decoder=pcm_zork --enable-parser=aac --enable-parser=aac_latm @@ -211,14 +204,14 @@ FFMPEG_AUDIO_FLAGS_MUXERS=" # see http://www.fftw.org/install/windows.html FFTW_FLAGS=" - --enable-float \ - --enable-sse2 \ - --with-incoming-stack-boundary=2 \ + --enable-float + --enable-sse2 + --with-incoming-stack-boundary=2 --with-our-malloc16 " LIBSAMPLERATE_FLAGS=" - --disable-fftw \ + --disable-fftw --disable-sndfile " diff --git a/packaging/debian_3rdparty/build_ffmpeg.sh b/packaging/debian_3rdparty/build_ffmpeg.sh index c193ee38e..8261f9bf2 100755 --- a/packaging/debian_3rdparty/build_ffmpeg.sh +++ b/packaging/debian_3rdparty/build_ffmpeg.sh @@ -2,12 +2,12 @@ set -e . ../build_config.sh -echo "Building FFmpeg $FFMPEG_VERSION" +echo "Building $FFMPEG_VERSION" mux=$1 if test "$1" = "--no-muxers"; then - echo Building FFmpeg without muxers - FFMPEG_AUDIO_FLAGS_MUXERS="" + echo Building FFmpeg without muxers + FFMPEG_AUDIO_FLAGS_MUXERS="" fi rm -rf tmp @@ -19,13 +19,13 @@ tar xf $FFMPEG_VERSION.tar.gz cd $FFMPEG_VERSION ./configure \ - --enable-pic \ - $FFMPEG_AUDIO_FLAGS \ - $FFMPEG_AUDIO_FLAGS_MUXERS \ - --prefix=$PREFIX \ - --extra-ldflags=-L$PREFIX/lib \ - --extra-cflags=-I$PREFIX/include \ - $SHARED_OR_STATIC + --enable-pic \ + $FFMPEG_AUDIO_FLAGS \ + $FFMPEG_AUDIO_FLAGS_MUXERS \ + --prefix=$PREFIX \ + --extra-ldflags="-L$PREFIX/lib" \ + --extra-cflags="-I$PREFIX/include" \ + $SHARED_OR_STATIC make make install diff --git a/src/algorithms/io/audioloader.cpp b/src/algorithms/io/audioloader.cpp index 9f8d147ea..89bd331b3 100644 --- a/src/algorithms/io/audioloader.cpp +++ b/src/algorithms/io/audioloader.cpp @@ -42,8 +42,7 @@ AudioLoader::~AudioLoader() { void AudioLoader::configure() { // set ffmpeg to be silent by default, so we don't have these annoying // "invalid new backstep" messages anymore, when everything is actually fine - av_log_set_level(AV_LOG_QUIET); - //av_log_set_level(AV_LOG_VERBOSE); + av_log_set_level(AV_LOG_QUIET); // choices: {AV_LOG_VERBOSE, AV_LOG_QUIET} _computeMD5 = parameter("computeMD5").toBool(); _selectedStream = parameter("audioStream").toInt(); reset(); @@ -72,13 +71,12 @@ void AudioLoader::openAudioFile(const string& filename) { throw EssentiaException("AudioLoader: Could not find stream information, error = ", error); } - // Dump information about file onto standard error - //dump_format(_demuxCtx, 0, filename.c_str(), 0); - // Check that we have only 1 audio stream in the file _streams.clear(); for (int i=0; i<(int)_demuxCtx->nb_streams; i++) { - if (_demuxCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO) { + // Use modern API to get codec parameters + const AVCodecParameters* codecParams = _demuxCtx->streams[i]->codecpar; + if (codecParams->codec_type == AVMEDIA_TYPE_AUDIO) { _streams.push_back(i); } } @@ -98,32 +96,46 @@ void AudioLoader::openAudioFile(const string& filename) { _streamIdx = _streams[_selectedStream]; - // Load corresponding audio codec - _audioCtx = _demuxCtx->streams[_streamIdx]->codec; - _audioCodec = avcodec_find_decoder(_audioCtx->codec_id); + // Create codec context from stream parameters (modern approach) + const AVCodecParameters* codecParams = _demuxCtx->streams[_streamIdx]->codecpar; + _audioCodec = avcodec_find_decoder(codecParams->codec_id); if (!_audioCodec) { throw EssentiaException("AudioLoader: Unsupported codec!"); } + _audioCtx = avcodec_alloc_context3(_audioCodec); + if (!_audioCtx) { + throw EssentiaException("AudioLoader: Could not allocate codec context"); + } + + // Copy parameters from stream to codec context + if (avcodec_parameters_to_context(_audioCtx, codecParams) < 0) { + avcodec_free_context(&_audioCtx); + throw EssentiaException("AudioLoader: Could not copy codec parameters"); + } + if (avcodec_open2(_audioCtx, _audioCodec, NULL) < 0) { + avcodec_free_context(&_audioCtx); throw EssentiaException("AudioLoader: Unable to instantiate codec..."); } - // Configure format convertion (no samplerate conversion yet) - int64_t layout = av_get_default_channel_layout(_audioCtx->channels); - - /* - const char* fmt = 0; - get_format_from_sample_fmt(&fmt, _audioCtx->sample_fmt); - E_DEBUG(EAlgorithm, "AudioLoader: converting from " << (fmt ? fmt : "unknown") << " to FLT"); - */ + // Configure format conversion (no samplerate conversion yet) + // Use modern channel layout API + AVChannelLayout layout; + if (_audioCtx->ch_layout.nb_channels > 0) { + layout = _audioCtx->ch_layout; + } else { + // Fallback for older codecs that might not have channel layout set + av_channel_layout_default(&layout, _audioCtx->ch_layout.nb_channels); + } E_DEBUG(EAlgorithm, "AudioLoader: using sample format conversion from libswresample"); _convertCtxAv = swr_alloc(); - av_opt_set_int(_convertCtxAv, "in_channel_layout", layout, 0); - av_opt_set_int(_convertCtxAv, "out_channel_layout", layout, 0); + // Use modern channel layout API for swresample configuration + av_opt_set_chlayout(_convertCtxAv, "in_chlayout", &layout, 0); + av_opt_set_chlayout(_convertCtxAv, "out_chlayout", &layout, 0); av_opt_set_int(_convertCtxAv, "in_sample_rate", _audioCtx->sample_rate, 0); av_opt_set_int(_convertCtxAv, "out_sample_rate", _audioCtx->sample_rate, 0); av_opt_set_int(_convertCtxAv, "in_sample_fmt", _audioCtx->sample_fmt, 0); @@ -154,14 +166,16 @@ void AudioLoader::closeAudioFile() { swr_free(&_convertCtxAv); } - // Close the codec - if (_audioCtx) avcodec_close(_audioCtx); + // Close the codec using modern API + if (_audioCtx) { + avcodec_free_context(&_audioCtx); + } // Close the audio file if (_demuxCtx) avformat_close_input(&_demuxCtx); - // free AVPacket + // free AVPacket using modern API // TODO: use a variable for whether _packet is initialized or not - av_free_packet(&_packet); + av_packet_unref(&_packet); _demuxCtx = 0; _audioCtx = 0; _streams.clear(); @@ -216,8 +230,6 @@ AlgorithmStatus AudioLoader::process() { msg << "AudioLoader: Error reading frame: " << errstring; E_WARNING(msg.str()); } - // TODO: should try reading again on EAGAIN error? - // https://github.com/FFmpeg/FFmpeg/blob/master/ffmpeg.c shouldStop(true); flushPacket(); closeAudioFile(); @@ -237,14 +249,21 @@ AlgorithmStatus AudioLoader::process() { if (_computeMD5) { av_md5_update(_md5Encoded, _packet.data, _packet.size); } + + // decode ONE frame from this packet (if any). decodePacket() will + // *not* mutate _packet.data/_packet.size. It will set _dataSize to number of bytes written. + int consumed = decodePacket(); - // decode frames in packet - while(_packet.size > 0) { - if (!decodePacket()) break; + // After decodePacket we may have produced audio in _buffer (bytes in _dataSize). + if (_dataSize > 0) { + // copyFFmpegOutput will acquire once and release once copyFFmpegOutput(); + // reset _dataSize so we don't accidentally reuse it + _dataSize = 0; } - // neds to be freed !! - av_free_packet(&_packet); + + // needs to be freed using modern API !! + av_packet_unref(&_packet); return OK; } @@ -259,11 +278,23 @@ int AudioLoader::decode_audio_frame(AVCodecContext* audioCtx, // output = number of bytes actually written (actual: FLT data) //E_DEBUG(EAlgorithm, "decode_audio_frame, available bytes in buffer = " << _dataSize); int gotFrame = 0; - av_frame_unref(_decodedFrame); //avcodec_get_frame_defaults(_decodedFrame); - - int len = avcodec_decode_audio4(audioCtx, _decodedFrame, &gotFrame, packet); - - if (len < 0) return len; // error handling should be done outside + av_frame_unref(_decodedFrame); + + // Use modern decoding API: send packet to decoder + int send_result = avcodec_send_packet(audioCtx, packet); + if (send_result < 0) return send_result; // error handling should be done outside + + // Receive decoded frame from decoder + int receive_result = avcodec_receive_frame(audioCtx, _decodedFrame); + if (receive_result == AVERROR(EAGAIN) || receive_result == AVERROR_EOF) { + gotFrame = 0; + // Return the number of bytes that would have been consumed + // For flush packets (empty packets), return 0 + return (packet->size > 0) ? packet->size : 0; + } else if (receive_result < 0) { + return receive_result; // error handling should be done outside + } + gotFrame = 1; if (gotFrame) { int inputSamples = _decodedFrame->nb_samples; @@ -310,29 +341,59 @@ int AudioLoader::decode_audio_frame(AVCodecContext* audioCtx, *outputSize = 0; } - return len; + // Return the number of bytes consumed from the packet + // For the modern API, we consume the entire packet when we send it + return packet->size; } - void AudioLoader::flushPacket() { + // Sending a NULL packet tells the decoder to flush internal buffers + av_packet_unref(&_packet); AVPacket empty; av_init_packet(&empty); - do { - _dataSize = FFMPEG_BUFFER_SIZE; - empty.data = NULL; - empty.size = 0; - - int len = decode_audio_frame(_audioCtx, _buffer, &_dataSize, &empty); - if (len < 0) { - char errstring[1204]; - av_strerror(len, errstring, sizeof(errstring)); - ostringstream msg; - msg << "AudioLoader: decoding error while flushing a packet:" << errstring; - E_WARNING(msg.str()); + empty.data = NULL; + empty.size = 0; + + // keep draining until decoder stops returning frames + while (true) { + _dataSize = 0; + int send_result = avcodec_send_packet(_audioCtx, &empty); + if (send_result < 0 && send_result != AVERROR(EAGAIN)) { + break; + } + int receive_result = avcodec_receive_frame(_audioCtx, _decodedFrame); + if (receive_result == AVERROR(EAGAIN) || receive_result == AVERROR_EOF) { + break; + } else if (receive_result < 0) { + break; } - copyFFmpegOutput(); - } while (_dataSize > 0); + // got a frame -> convert to floats as in decodePacket() + int inputSamples = _decodedFrame->nb_samples; + int outPlaneSize = av_samples_get_buffer_size(NULL, _nChannels, inputSamples, AV_SAMPLE_FMT_FLT, 1); + if (outPlaneSize > 0) { + if (_audioCtx->sample_fmt == AV_SAMPLE_FMT_FLT) { + memcpy(_buffer, _decodedFrame->data[0], std::min(outPlaneSize, FFMPEG_BUFFER_SIZE)); + _dataSize = std::min(outPlaneSize, FFMPEG_BUFFER_SIZE); + } else { + float* outBuff = (float*)_buffer; + int samplesWritten = swr_convert(_convertCtxAv, + (uint8_t**)&outBuff, + inputSamples, + (const uint8_t**)_decodedFrame->data, + inputSamples); + if (samplesWritten > 0) { + _dataSize = std::min(samplesWritten * _nChannels * av_get_bytes_per_sample(AV_SAMPLE_FMT_FLT), + FFMPEG_BUFFER_SIZE); + } + } + } + + if (_dataSize > 0) { + copyFFmpegOutput(); + _dataSize = 0; + } + } } @@ -340,93 +401,88 @@ void AudioLoader::flushPacket() { * Gets the AVPacket stored in _packet, and decodes all the samples it can from it, * putting them in _buffer, the total number of bytes written begin stored in _dataSize. */ + int AudioLoader::decodePacket() { - /* - E_DEBUG(EAlgorithm, "-----------------------------------------------------"); - E_DEBUG(EAlgorithm, "decoding packet of " << _packet.size << " bytes"); - E_DEBUG(EAlgorithm, "pts: " << _packet.pts << " - dts: " << _packet.dts); //" - pos: " << pkt->pos); - E_DEBUG(EAlgorithm, "flags: " << _packet.flags); - E_DEBUG(EAlgorithm, "duration: " << _packet.duration); - */ - int len = 0; - - // buff is an offset in our output buffer, it points to where we should start - // writing the next decoded samples - float* buff = _buffer; - - // _dataSize gets the size of the buffer, in bytes - _dataSize = FFMPEG_BUFFER_SIZE; - - // Note: md5 should be computed before decoding frame, as the decoding may - // change the content of a packet. Still, not sure if it is correct to - // compute md5 over packet which contains incorrect frames, potentially - // belonging to id3 metadata (TODO: or is it just a missing header issue?), - // but computing md5 hash using ffmpeg will also treat it as audio: - // ffmpeg -i file.mp3 -acodec copy -f md5 - - - len = decode_audio_frame(_audioCtx, buff, &_dataSize, &_packet); - - if (len < 0) { + // Prepare float-view of the output buffer + float* outBuff = (float*)_buffer; + // Default: no bytes produced yet + _dataSize = 0; + + // Modern API: send the full packet to the decoder once + int send_result = avcodec_send_packet(_audioCtx, &_packet); + if (send_result == AVERROR(EAGAIN)) { + // decoder not ready to accept packet; try receiving frames first + // but for streaming we simply try to receive a frame below + } else if (send_result < 0) { + // fatal decoding error for this packet char errstring[1204]; - av_strerror(len, errstring, sizeof(errstring)); - ostringstream msg; - - if (_audioCtx->codec_id == AV_CODEC_ID_MP3) { - msg << "AudioLoader: invalid frame, skipping it: " << errstring; - // mp3 streams can have tag frames (id3v2?) which libavcodec tries to - // read as audio anyway, and we probably don't want print an error - // message for that... - // TODO: Are these frames really id3 tags? - - //E_DEBUG(EAlgorithm, msg); - E_WARNING(msg.str()); - } - else { - msg << "AudioLoader: error while decoding, skipping frame: " << errstring; - E_WARNING(msg.str()); - } + av_strerror(send_result, errstring, sizeof(errstring)); + E_WARNING("AudioLoader: avcodec_send_packet error: " << errstring); return 0; } - if (len != _packet.size) { - // https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga834bb1b062fbcc2de4cf7fb93f154a3e - - // Some decoders may support multiple frames in a single AVPacket. Such - // decoders would then just decode the first frame and the return value - // would be less than the packet size. In this case, avcodec_decode_audio4 - // has to be called again with an AVPacket containing the remaining data - // in order to decode the second frame, etc... Even if no frames are - // returned, the packet needs to be fed to the decoder with remaining - // data until it is completely consumed or an error occurs. - - E_WARNING("AudioLoader: more than 1 frame in packet, decoding remaining bytes..."); - E_WARNING("at sample index: " << output("audio").totalProduced()); - E_WARNING("decoded samples: " << len); - E_WARNING("packet size: " << _packet.size); + // Try to receive ONE frame + int receive_result = avcodec_receive_frame(_audioCtx, _decodedFrame); + if (receive_result == AVERROR(EAGAIN) || receive_result == AVERROR_EOF) { + // No frame ready from this packet + return 0; + } else if (receive_result < 0) { + char errstring[1204]; + av_strerror(receive_result, errstring, sizeof(errstring)); + E_WARNING("AudioLoader: avcodec_receive_frame error: " << errstring); + return 0; } - // update packet data pointer to data left undecoded (if any) - _packet.size -= len; - _packet.data += len; + // We got a frame -> convert it to float interleaved + int inputSamples = _decodedFrame->nb_samples; + // compute expected number of output bytes for these samples + int outPlaneSize = av_samples_get_buffer_size(NULL, _nChannels, inputSamples, AV_SAMPLE_FMT_FLT, 1); + if (outPlaneSize <= 0) { + E_WARNING("AudioLoader: computed non-positive outPlaneSize"); + return 0; + } + // Ensure output buffer is large enough + if (outPlaneSize > FFMPEG_BUFFER_SIZE) { + // this shouldn't normally happen; guard and shrink to prevent overflow + ostringstream msg; + msg << "AudioLoader: required buffer " << outPlaneSize << " exceeds allocated " << FFMPEG_BUFFER_SIZE; + E_WARNING(msg.str()); + // clamp to buffer size (will avoid overflow but may drop data) + } - if (_dataSize <= 0) { - // No data yet, get more frames - // cout << "no data yet, get more frames" << endl; - _dataSize = 0; + // Perform conversion if needed + if (_audioCtx->sample_fmt == AV_SAMPLE_FMT_FLT) { + // direct copy - frame data is interleaved in data[0] for packed formats + memcpy(outBuff, _decodedFrame->data[0], std::min(outPlaneSize, FFMPEG_BUFFER_SIZE)); + } else { + // Use swr_convert; use pointer to uint8_t* for API compatibility + int samplesWritten = swr_convert(_convertCtxAv, + (uint8_t**)&outBuff, + inputSamples, + (const uint8_t**)_decodedFrame->data, + inputSamples); + if (samplesWritten <= 0) { + E_WARNING("AudioLoader: swr_convert returned no samples"); + return 0; + } + // recompute bytes produced + outPlaneSize = samplesWritten * _nChannels * av_get_bytes_per_sample(AV_SAMPLE_FMT_FLT); } - return len; -} + // commit produced bytes + _dataSize = std::min(outPlaneSize, FFMPEG_BUFFER_SIZE); -/* -inline Real scale(int16_t value) { - return value / (Real)32767; + // Return number of bytes logically consumed from the packet. + // With modern API we don't need to tell caller how many bytes consumed; + // we return the original packet size as a hint (caller will unref packet). + return _packet.size; } -*/ + void AudioLoader::copyFFmpegOutput() { - int nsamples = _dataSize / (av_get_bytes_per_sample(AV_SAMPLE_FMT_FLT) * _nChannels); + int bytesPerSample = av_get_bytes_per_sample(AV_SAMPLE_FMT_FLT); + int nsamples = _dataSize / (bytesPerSample * _nChannels); if (nsamples == 0) return; // acquire necessary data @@ -437,33 +493,24 @@ void AudioLoader::copyFFmpegOutput() { vector& audio = *((vector*)_audio.getTokens()); + float* fbuf = (float*)_buffer; // interpret buffer as floats for copying + if (_nChannels == 1) { for (int i=0; ichannels, _audioCtx->sample_rate); + pushChannelsSampleRateInfo(_audioCtx->ch_layout.nb_channels, _audioCtx->sample_rate); pushCodecInfo(_audioCodec->name, _audioCtx->bit_rate); } diff --git a/src/algorithms/io/audioloader.h b/src/algorithms/io/audioloader.h index 4b1b15c29..f1d6b0b27 100644 --- a/src/algorithms/io/audioloader.h +++ b/src/algorithms/io/audioloader.h @@ -48,12 +48,13 @@ class AudioLoader : public Algorithm { // each time we decode a frame we need to have at least a full buffer of free space. const static int FFMPEG_BUFFER_SIZE = MAX_AUDIO_FRAME_SIZE * 2; - float* _buffer; + // float* _buffer; + uint8_t* _buffer; // byte-oriented buffer, clearer semantics int _dataSize; AVFormatContext* _demuxCtx; AVCodecContext* _audioCtx; - AVCodec* _audioCodec; + const AVCodec* _audioCodec; AVPacket _packet; AVMD5 *_md5Encoded; uint8_t _checksum[16]; @@ -94,11 +95,14 @@ class AudioLoader : public Algorithm { _audio.setBufferType(BufferUsage::forLargeAudioStream); - // Register all formats and codecs - av_register_all(); + // Note: av_register_all() was deprecated and removed in FFmpeg 4.0 + // Modern FFmpeg automatically registers formats and codecs - // use av_malloc, because we _need_ the buffer to be 16-byte aligned - _buffer = (float*)av_malloc(FFMPEG_BUFFER_SIZE); + // use av_malloc, allocate bytes; interpret as float* when needed + _buffer = (uint8_t*)av_malloc(FFMPEG_BUFFER_SIZE); + if (!_buffer) { + throw EssentiaException("Error allocating FFmpeg buffer"); + } _md5Encoded = av_md5_alloc(); if (!_md5Encoded) { diff --git a/src/essentia/utils/audiocontext.cpp b/src/essentia/utils/audiocontext.cpp index 8907887d8..69f1282b0 100644 --- a/src/essentia/utils/audiocontext.cpp +++ b/src/essentia/utils/audiocontext.cpp @@ -26,18 +26,16 @@ using namespace essentia; AudioContext::AudioContext() : _isOpen(false), _avStream(0), _muxCtx(0), _codecCtx(0), _inputBufSize(0), _buffer(0), _convertCtxAv(0) { - av_log_set_level(AV_LOG_VERBOSE); - //av_log_set_level(AV_LOG_QUIET); + av_log_set_level(AV_LOG_QUIET); // choices: {AV_LOG_VERBOSE, AV_LOG_QUIET} - // Register all formats and codecs - av_register_all(); // this should be done once only.. + // Note: av_register_all() was deprecated and removed in FFmpeg 4.0 + // Modern FFmpeg automatically registers formats and codecs if (sizeof(float) != av_get_bytes_per_sample(AV_SAMPLE_FMT_FLT)) { throw EssentiaException("Unsupported float size"); } } - int AudioContext::create(const std::string& filename, const std::string& format, int nChannels, int sampleRate, int bitrate) { @@ -45,7 +43,7 @@ int AudioContext::create(const std::string& filename, _filename = filename; - AVOutputFormat* av_output_format = av_guess_format(format.c_str(), 0, 0); + const AVOutputFormat* av_output_format = av_guess_format(format.c_str(), 0, 0); if (!av_output_format) { throw EssentiaException("Could not find a suitable output format for \"", filename, "\""); } @@ -56,107 +54,132 @@ int AudioContext::create(const std::string& filename, _muxCtx = avformat_alloc_context(); if (!_muxCtx) throw EssentiaException("Could not allocate the format context"); - _muxCtx->oformat = av_output_format; + _muxCtx->oformat = const_cast(av_output_format); + + // Find encoder first + const AVCodec* audioCodec = avcodec_find_encoder(av_output_format->audio_codec); + if (!audioCodec) { + // Try fallback: use codec id reported by format (older FFmpeg may set this) + audioCodec = avcodec_find_encoder(_muxCtx->oformat->audio_codec); + } + if (!audioCodec) { + throw EssentiaException("Codec for ", format, " files not found or not supported"); + } - // Create audio stream - _avStream = avformat_new_stream(_muxCtx, NULL); + // Create audio stream and pass the codec to help FFmpeg initialize defaults + _avStream = avformat_new_stream(_muxCtx, audioCodec); if (!_avStream) throw EssentiaException("Could not allocate stream"); - //_avStream->id = 1; // necessary? found here: http://sgros.blogspot.com.es/2013/01/deprecated-functions-in-ffmpeg-library.html - - // Load corresponding codec and set it up: - _codecCtx = _avStream->codec; - _codecCtx->codec_id = _muxCtx->oformat->audio_codec; - _codecCtx->codec_type = AVMEDIA_TYPE_AUDIO; - _codecCtx->bit_rate = bitrate; - _codecCtx->sample_rate = sampleRate; - _codecCtx->channels = nChannels; - _codecCtx->channel_layout = av_get_default_channel_layout(nChannels); - - // Find encoder - av_log_set_level(AV_LOG_VERBOSE); - AVCodec* audioCodec = avcodec_find_encoder(_codecCtx->codec_id); - if (!audioCodec) throw EssentiaException("Codec for ", format, " files not found or not supported"); - switch (_codecCtx->codec_id) { - case AV_CODEC_ID_VORBIS: - _codecCtx->sample_fmt = AV_SAMPLE_FMT_FLTP; - break; - case AV_CODEC_ID_MP3: - _codecCtx->sample_fmt = AV_SAMPLE_FMT_S16P; - break; - default: - _codecCtx->sample_fmt = AV_SAMPLE_FMT_S16; - } + // Create codec context + _codecCtx = avcodec_alloc_context3(audioCodec); + if (!_codecCtx) throw EssentiaException("Could not allocate codec context"); + + // Set codec context fields + _codecCtx->codec_id = audioCodec->id; + _codecCtx->codec_type = AVMEDIA_TYPE_AUDIO; + _codecCtx->bit_rate = bitrate; + _codecCtx->sample_rate = sampleRate; + + // channel layout + av_channel_layout_default(&_codecCtx->ch_layout, nChannels); + // set time_base for codec (1/sample_rate) + _codecCtx->time_base = AVRational{1, sampleRate}; - // Check if the hardcoded sample format is supported by the codec + // Choose a sample format: prefer common defaults but check codec supports it + enum AVSampleFormat desired_fmt = AV_SAMPLE_FMT_S16; + if (audioCodec->id == AV_CODEC_ID_VORBIS) desired_fmt = AV_SAMPLE_FMT_FLTP; + if (audioCodec->id == AV_CODEC_ID_MP3) desired_fmt = AV_SAMPLE_FMT_S16P; // keep MP3 as planar s16 if desired + + // If codec provides supported list, pick one from it (prefer desired_fmt) if (audioCodec->sample_fmts) { const enum AVSampleFormat* p = audioCodec->sample_fmts; + bool found = false; while (*p != AV_SAMPLE_FMT_NONE) { - if (*p == _codecCtx->sample_fmt) break; - p++; + if (*p == desired_fmt) { found = true; break; } + ++p; } - if (*p == AV_SAMPLE_FMT_NONE) { - // Not supported --> use the first one in the list as default? - // _codecCtx->sample_fmt = audioCodec->sample_fmts[0]; - ostringstream msg; - msg << "AudioWriter: Could not open codec \"" << audioCodec->long_name << "\" for " - << format << " files: sample format " << av_get_sample_fmt_name(_codecCtx->sample_fmt) << " is not supported"; - throw EssentiaException(msg); + if (!found) { + // fallback to first supported format + desired_fmt = audioCodec->sample_fmts[0]; } } + _codecCtx->sample_fmt = desired_fmt; - // Open codec and store it in _codecCtx. + // Open codec int result = avcodec_open2(_codecCtx, audioCodec, NULL); if (result < 0) { char errstring[1204]; av_strerror(result, errstring, sizeof(errstring)); - - ostringstream msg; + ostringstream msg; msg << "AudioWriter: Could not open codec \"" << audioCodec->long_name << "\" for " << format << " files: " << errstring; throw EssentiaException(msg); } + // Copy codec parameters to muxer stream (modern API) + result = avcodec_parameters_from_context(_avStream->codecpar, _codecCtx); + if (result < 0) { + char errstring[1204]; + av_strerror(result, errstring, sizeof(errstring)); + ostringstream msg; + msg << "Failed to copy codec parameters: " << errstring; + throw EssentiaException(msg); + } + + // Ensure stream is marked as audio and set a sensible time_base for muxer + _avStream->codecpar->codec_type = AVMEDIA_TYPE_AUDIO; + // Set stream time_base to match codec time base (or 1/sample_rate) + _avStream->time_base = _codecCtx->time_base; + + // Determine frame_size fallback for PCM codecs (some PCM codecs do not set frame_size) switch (_codecCtx->codec_id) { case AV_CODEC_ID_PCM_S16LE: case AV_CODEC_ID_PCM_S16BE: case AV_CODEC_ID_PCM_U16LE: case AV_CODEC_ID_PCM_U16BE: - // PCM codecs do not provide frame size in samples, use 4096 bytes on input - _codecCtx->frame_size = 4096 / _codecCtx->channels / av_get_bytes_per_sample(AV_SAMPLE_FMT_S16); + // use a default input frame size in samples + _codecCtx->frame_size = 4096 / (_codecCtx->ch_layout.nb_channels * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16)); break; - - //case AV_CODEC_ID_FLAC: - //case AV_CODEC_ID_VORBIS: - // break; - default: + // for encoders that set frame_size, keep it if (_codecCtx->frame_size <= 1) { - throw EssentiaException("Do not know how to encode given format: ", format); + // Some codecs (e.g. vorbis) have variable frame sizes — in that case use a safe default + if (_codecCtx->codec_id == AV_CODEC_ID_VORBIS || _codecCtx->codec_id == AV_CODEC_ID_FLAC) { + // vorbis and flac can accept arbitrary nb_samples; choose a reasonable default + _codecCtx->frame_size = 1024; + } else { + throw EssentiaException("Do not know how to encode given format: ", format); + } } } - // Allocate input audio FLT buffer - _inputBufSize = av_samples_get_buffer_size(NULL, - _codecCtx->channels, - _codecCtx->frame_size, + // Allocate input audio FLT buffer sized for codecCtx->frame_size samples + _inputBufSize = av_samples_get_buffer_size(NULL, + _codecCtx->ch_layout.nb_channels, + _codecCtx->frame_size, AV_SAMPLE_FMT_FLT, 0); _buffer = (float*)av_malloc(_inputBufSize); + if (!_buffer) { + throw EssentiaException("Could not allocate input float buffer"); + } + + _pts = 0; // reset PTS counter for new file - strncpy(_muxCtx->filename, _filename.c_str(), sizeof(_muxCtx->filename)); - - // Configure sample format convertion + // Configure sample format conversion E_DEBUG(EAlgorithm, "AudioContext: using sample format conversion from libswresample"); _convertCtxAv = swr_alloc(); - - av_opt_set_int(_convertCtxAv, "in_channel_layout", _codecCtx->channel_layout, 0); - av_opt_set_int(_convertCtxAv, "out_channel_layout", _codecCtx->channel_layout, 0); + if (!_convertCtxAv) { + throw EssentiaException("Could not allocate SwrContext"); + } + + av_opt_set_chlayout(_convertCtxAv, "in_chlayout", &_codecCtx->ch_layout, 0); + av_opt_set_chlayout(_convertCtxAv, "out_chlayout", &_codecCtx->ch_layout, 0); av_opt_set_int(_convertCtxAv, "in_sample_rate", _codecCtx->sample_rate, 0); av_opt_set_int(_convertCtxAv, "out_sample_rate", _codecCtx->sample_rate, 0); av_opt_set_int(_convertCtxAv, "in_sample_fmt", AV_SAMPLE_FMT_FLT, 0); av_opt_set_int(_convertCtxAv, "out_sample_fmt", _codecCtx->sample_fmt, 0); if (swr_init(_convertCtxAv) < 0) { - throw EssentiaException("AudioLoader: Could not initialize swresample context"); + throw EssentiaException("AudioLoader: Could not initialize swresample context"); } return _codecCtx->frame_size; @@ -167,13 +190,23 @@ void AudioContext::open() { if (_isOpen) return; if (!_muxCtx) throw EssentiaException("Trying to open an audio file that has not been created yet or has been closed"); - - // Open output file - if (avio_open(&_muxCtx->pb, _filename.c_str(), AVIO_FLAG_WRITE) < 0) { - throw EssentiaException("Could not open \"", _filename, "\""); + + // Open the output IO + int err = avio_open(&_muxCtx->pb, _filename.c_str(), AVIO_FLAG_WRITE); + if (err < 0) { + char errstring[1204]; + av_strerror(err, errstring, sizeof(errstring)); + throw EssentiaException("Could not open \"", _filename, "\": ", errstring); } - avformat_write_header(_muxCtx, /* AVDictionary **options */ NULL); + // Write header + err = avformat_write_header(_muxCtx, NULL); + if (err < 0) { + char errstring[1204]; + av_strerror(err, errstring, sizeof(errstring)); + throw EssentiaException("Could not write header for \"", _filename, "\": ", errstring); + } + _isOpen = true; } @@ -183,40 +216,50 @@ void AudioContext::close() { // Close output file if (_isOpen) { + // Flush encoder via writeEOF() writeEOF(); // Write trailer to the end of the file av_write_trailer(_muxCtx); - avio_close(_muxCtx->pb); - } - - avcodec_close(_avStream->codec); + // close output IO + if (_muxCtx->pb) { + avio_closep(&_muxCtx->pb); // modern safe API + } - av_freep(&_buffer); + _isOpen = false; + } - av_freep(&_avStream->codec); - av_freep(&_avStream); - av_freep(&_muxCtx); // TODO also must be av_free, not av_freep + // Use modern API for codec context cleanup + if (_codecCtx) { + avcodec_free_context(&_codecCtx); + _codecCtx = nullptr; + } - // TODO: need those assignments? - _muxCtx = 0; - _avStream = 0; - _codecCtx = 0; - _buffer = 0; + // free input buffer + if (_buffer) { + av_freep(&_buffer); + _buffer = nullptr; + } + // free swresample context if (_convertCtxAv) { swr_close(_convertCtxAv); swr_free(&_convertCtxAv); + _convertCtxAv = nullptr; } - _isOpen = false; + if (_muxCtx) { + avformat_free_context(_muxCtx); + _muxCtx = nullptr; + _avStream = nullptr; + } } void AudioContext::write(const vector& stereoData) { - if (_codecCtx->channels != 2) { - throw EssentiaException("Trying to write stereo audio data to an audio file with ", _codecCtx->channels, " channels"); + if (_codecCtx->ch_layout.nb_channels != 2) { + throw EssentiaException("Trying to write stereo audio data to an audio file with ", _codecCtx->ch_layout.nb_channels, " channels"); } int dsize = (int)stereoData.size(); @@ -240,8 +283,8 @@ void AudioContext::write(const vector& stereoData) { void AudioContext::write(const vector& monoData) { - if (_codecCtx->channels != 1) { - throw EssentiaException("Trying to write mono audio data to an audio file with ", _codecCtx->channels, " channels"); + if (_codecCtx->ch_layout.nb_channels != 1) { + throw EssentiaException("Trying to write mono audio data to an audio file with ", _codecCtx->ch_layout.nb_channels, " channels"); } int dsize = (int)monoData.size(); @@ -266,107 +309,173 @@ void AudioContext::encodePacket(int size) { _codecCtx->frame_size = size; } else if (size > _codecCtx->frame_size) { - // input audio vector does not fit into the codec's buffer throw EssentiaException("AudioLoader: Input audio segment is larger than the codec's frame size"); } - // convert sample format to the one required by codec - int inputPlaneSize = av_samples_get_buffer_size(NULL, - _codecCtx->channels, - size, - AV_SAMPLE_FMT_FLT, 0); - int outputPlaneSize; - uint8_t* bufferFmt; - - if (av_samples_alloc(&bufferFmt, &outputPlaneSize, - _codecCtx->channels, size, - _codecCtx->sample_fmt, 0) < 0) { - throw EssentiaException("Could not allocate output buffer for sample format conversion"); - } - - int written = swr_convert(_convertCtxAv, - &bufferFmt, - size, - (const uint8_t**) &_buffer, - size); - - if (written < size) { - // The same as in AudioLoader. There may be data remaining in the internal - // FIFO buffer to get this data: call swr_convert() with NULL input - // But we just throw exception instead. - ostringstream msg; - msg << "AudioLoader: Incomplete format conversion (some samples missing)" - << " from " << av_get_sample_fmt_name(AV_SAMPLE_FMT_FLT) - << " to " << av_get_sample_fmt_name(_codecCtx->sample_fmt); - throw EssentiaException(msg); - } + // prepare conversion buffers (bufferFmt[ch]) and linesize[ch] + uint8_t* bufferFmt[AV_NUM_DATA_POINTERS] = { nullptr }; + int linesize[AV_NUM_DATA_POINTERS] = { 0 }; + AVFrame* frame = nullptr; + + try { + if (av_samples_alloc(bufferFmt, + linesize, + _codecCtx->ch_layout.nb_channels, + size, + _codecCtx->sample_fmt, + 0) < 0) { + throw EssentiaException("Could not allocate output buffer for sample format conversion"); + } + + // perform sample format conversion + int written = swr_convert(_convertCtxAv, + bufferFmt, + size, + (const uint8_t**)&_buffer, + size); + + if (written < size) { + ostringstream msg; + msg << "AudioLoader: Incomplete format conversion (some samples missing)" + << " from " << av_get_sample_fmt_name(AV_SAMPLE_FMT_FLT) + << " to " << av_get_sample_fmt_name(_codecCtx->sample_fmt); + av_freep(&bufferFmt[0]); + throw EssentiaException(msg); + } + + // allocate frame + frame = av_frame_alloc(); + if (!frame) { + av_freep(&bufferFmt[0]); + throw EssentiaException("Error allocating audio frame"); + } + + frame->nb_samples = _codecCtx->frame_size; + frame->format = _codecCtx->sample_fmt; + frame->ch_layout = _codecCtx->ch_layout; + + if (av_frame_get_buffer(frame, 0) < 0) { + av_frame_free(&frame); + av_freep(&bufferFmt[0]); + throw EssentiaException("Could not allocate audio frame buffer"); + } + + // Copy converted audio into AVFrame + int bytesPerSample = av_get_bytes_per_sample(_codecCtx->sample_fmt); + if (av_sample_fmt_is_planar(_codecCtx->sample_fmt)) { + for (int ch = 0; ch < _codecCtx->ch_layout.nb_channels; ++ch) { + memcpy(frame->data[ch], bufferFmt[ch], size * bytesPerSample); + } + } else { + memcpy(frame->data[0], bufferFmt[0], size * _codecCtx->ch_layout.nb_channels * bytesPerSample); + } + + // send frame to encoder + int result = avcodec_send_frame(_codecCtx, frame); + if (result < 0) { + av_frame_free(&frame); + av_freep(&bufferFmt[0]); + char errstring[1024]; + av_strerror(result, errstring, sizeof(errstring)); + ostringstream msg; + msg << "Error sending frame to encoder: " << errstring; + throw EssentiaException(msg); + } + + // receive packets and write them (may be 0..N packets) + AVPacket packet; + av_init_packet(&packet); + packet.data = NULL; + packet.size = 0; + + while (result >= 0) { + result = avcodec_receive_packet(_codecCtx, &packet); + if (result == AVERROR(EAGAIN) || result == AVERROR_EOF) break; + else if (result < 0) { + char errstring[1024]; + av_strerror(result, errstring, sizeof(errstring)); + ostringstream msg; + msg << "Error receiving packet from encoder: " << errstring; + throw EssentiaException(msg); + } + + // ensure stream index set + packet.stream_index = _avStream->index; + + // assign PTS/DTS + packet.pts = _pts; + packet.dts = _pts; + _pts += frame->nb_samples; - AVFrame *frame; - frame = av_frame_alloc(); - if (!frame) { - throw EssentiaException("Error allocating audio frame"); + // write packet (interleaved) + if (av_write_frame(_muxCtx, &packet) != 0) { + av_packet_unref(&packet); + av_frame_free(&frame); + av_freep(&bufferFmt[0]); + throw EssentiaException("Error while writing audio frame"); + } + + av_packet_unref(&packet); + } + + // cleanup + av_frame_free(&frame); + av_freep(&bufferFmt[0]); // frees all planes + internal table + _codecCtx->frame_size = tmp_fs; + } catch (...) { + av_frame_free(&frame); + av_freep(&bufferFmt[0]); + throw; } +} - frame->nb_samples = _codecCtx->frame_size; - frame->format = _codecCtx->sample_fmt; - frame->channel_layout = _codecCtx->channel_layout; - frame->channels = _codecCtx->channels; - int result = avcodec_fill_audio_frame(frame, _codecCtx->channels, _codecCtx->sample_fmt, - bufferFmt, outputPlaneSize * _codecCtx->channels, 0); +void AudioContext::writeEOF() { + if (!_codecCtx) return; + // Send NULL frame to flush the encoder + int result = avcodec_send_frame(_codecCtx, NULL); if (result < 0) { - char errstring[1204]; + char errstring[1024]; av_strerror(result, errstring, sizeof(errstring)); ostringstream msg; - msg << "Could not setup audio frame: " << errstring; + msg << "Error flushing encoder: " << errstring; throw EssentiaException(msg); } + // Receive all remaining packets from encoder AVPacket packet; av_init_packet(&packet); - // Set the packet data and size so that it is recognized as being empty. packet.data = NULL; packet.size = 0; - int got_output; - result = avcodec_encode_audio2(_codecCtx, &packet, frame, &got_output); - if (result < 0) { - char errstring[1204]; - av_strerror(result, errstring, sizeof(errstring)); - ostringstream msg; - msg << "Error while encoding audio frame: " << errstring; - throw EssentiaException(msg); - } - - if (got_output) { // packet is not empty, write the frame in the media file - if (av_write_frame(_muxCtx, &packet) != 0 ) { - throw EssentiaException("Error while writing audio frame"); - } - av_free_packet(&packet); - } + try { + while (true) { + result = avcodec_receive_packet(_codecCtx, &packet); + if (result == AVERROR(EAGAIN) || result == AVERROR_EOF) { + break; // No more packets available + } else if (result < 0) { + char errstring[1024]; + av_strerror(result, errstring, sizeof(errstring)); + throw EssentiaException("Error receiving packet from encoder during EOF flush: ", errstring); + } - av_frame_free(&frame); - av_freep(&bufferFmt); - _codecCtx->frame_size = tmp_fs; -} + // Assign stream index + packet.stream_index = _avStream->index; -void AudioContext::writeEOF() { - AVPacket packet; - av_init_packet(&packet); - // Set the packet data and size so that it is recognized as being empty. - packet.data = NULL; - packet.size = 0; + // Update ptd and dts packet + packet.pts = _pts; + packet.dts = _pts; + _pts += _codecCtx->frame_size; - for (int got_output = 1; got_output;) { - if (avcodec_encode_audio2(_codecCtx, &packet, NULL, &got_output) < 0) { - throw EssentiaException("Error while encoding audio frame"); - } - if (got_output) { - if (av_write_frame(_muxCtx, &packet) != 0 ) { + // Write with interleaving + if (av_write_frame(_muxCtx, &packet) < 0) { + av_packet_unref(&packet); throw EssentiaException("Error while writing delayed audio frame"); } - av_free_packet(&packet); + av_packet_unref(&packet); } - else break; + } catch (...) { + av_packet_unref(&packet); // always free packet + throw; } } diff --git a/src/essentia/utils/audiocontext.h b/src/essentia/utils/audiocontext.h index 2f266ac27..de7e32e48 100644 --- a/src/essentia/utils/audiocontext.h +++ b/src/essentia/utils/audiocontext.h @@ -44,6 +44,7 @@ class AudioContext { int _inputBufSize; // input buffer size float* _buffer; // input FLT buffer interleaved uint8_t* _buffer_test; // input buffer in converted to codec sample format + int64_t _pts = 0; // initialize to 0 by default struct SwrContext* _convertCtxAv; diff --git a/test/src/unittests/essentia_test.py b/test/src/unittests/essentia_test.py index 0f3814598..7707377ef 100644 --- a/test/src/unittests/essentia_test.py +++ b/test/src/unittests/essentia_test.py @@ -86,6 +86,11 @@ def allTests(testClass): class TestCase(BaseTestCase): + def assert_(self, expr, msg=None): + """Compatibility shim for Python < 3.12 style assert_.""" + if not expr: + raise self.failureException(msg or f"Expression is not true: {expr}") + def assertValidNumber(self, x): self.assert_(not numpy.isnan(x)) self.assert_(not numpy.isinf(x)) diff --git a/test/src/unittests/io/test_audioloader_streaming.py b/test/src/unittests/io/test_audioloader_streaming.py index 4a636cb60..bfcba0deb 100644 --- a/test/src/unittests/io/test_audioloader_streaming.py +++ b/test/src/unittests/io/test_audioloader_streaming.py @@ -176,9 +176,9 @@ def testLoadMultiple(self): audio1, _, _, _, _, _ = algo() audio2, _, _, _, _, _ = algo() audio3, _, _, _, _, _ = algo() - self.assertEquals(len(audio1), 441000); - self.assertEquals(len(audio2), 441000); - self.assertEquals(len(audio3), 441000); + self.assertEqual(len(audio1), 441000) + self.assertEqual(len(audio2), 441000) + self.assertEqual(len(audio3), 441000) self.assertEqualMatrix(audio2, audio1) self.assertEqualMatrix(audio2, audio3) @@ -220,12 +220,12 @@ def testBitrate(self): def testMD5(self): - dir = join(testdata.audio_dir,'recorded') - _, _, _, md5_wav, _, _ = AudioLoader(filename=join(dir,"dubstep.wav"), computeMD5=True)() - _, _, _, md5_flac, _, _ = AudioLoader(filename=join(dir,"dubstep.flac"), computeMD5=True)() - _, _, _, md5_mp3, _, _ = AudioLoader(filename=join(dir,"dubstep.mp3"), computeMD5=True)() - _, _, _, md5_ogg, _, _ = AudioLoader(filename=join(dir,"dubstep.ogg"), computeMD5=True)() - _, _, _, md5_aac, _, _ = AudioLoader(filename=join(dir,"dubstep.aac"), computeMD5=True)() + directory = join(testdata.audio_dir,'recorded') + _, _, _, md5_wav, _, _ = AudioLoader(filename=join(directory,"dubstep.wav"), computeMD5=True)() + _, _, _, md5_flac, _, _ = AudioLoader(filename=join(directory,"dubstep.flac"), computeMD5=True)() + _, _, _, md5_mp3, _, _ = AudioLoader(filename=join(directory,"dubstep.mp3"), computeMD5=True)() + _, _, _, md5_ogg, _, _ = AudioLoader(filename=join(directory,"dubstep.ogg"), computeMD5=True)() + _, _, _, md5_aac, _, _ = AudioLoader(filename=join(directory,"dubstep.aac"), computeMD5=True)() # results should correspond to ffmpeg output (computed on debian wheezy) # ffmpeg -i dubstep.wav -acodec copy -f md5 - diff --git a/test/src/unittests/io/test_easyloader_streaming.py b/test/src/unittests/io/test_easyloader_streaming.py index 1489f48e9..d36d536e3 100644 --- a/test/src/unittests/io/test_easyloader_streaming.py +++ b/test/src/unittests/io/test_easyloader_streaming.py @@ -62,9 +62,9 @@ def testNoResample(self): def testResample(self): filename = join(testdata.audio_dir, 'generated','synthesised','impulse','resample', 'impulses_1samp_44100.wav') - self.load(44100, 22050, filename, "left" , 0., 0., 10.); - self.load(44100, 48000, filename, "right", -15., 3.34, 5.68); - self.load(44100, 11025, filename, "mix" , 30., 0.168, 8.32); + self.load(44100, 22050, filename, "left" , 0., 0., 10.) + self.load(44100, 48000, filename, "right", -15., 3.34, 5.68) + self.load(44100, 11025, filename, "mix" , 30., 0.168, 8.32) def testInvalidParam(self): filename = join(testdata.audio_dir, 'generated','synthesised','impulse','resample', @@ -80,10 +80,10 @@ def testResetStandard(self): from essentia.standard import EasyLoader as stdEasyLoader audiofile = join(testdata.audio_dir,'recorded','musicbox.wav') loader = stdEasyLoader(filename=audiofile, startTime=0, endTime=70) - audio1 = loader(); - audio2 = loader(); - loader.reset(); - audio3 = loader(); + audio1 = loader() + audio2 = loader() + loader.reset() + audio3 = loader() self.assertAlmostEqualVector(audio3, audio1) self.assertEqualVector(audio2, audio3) @@ -95,9 +95,9 @@ def testLoadMultiple(self): audio1 = algo() audio2 = algo() audio3 = algo() - self.assertEquals(len(audio1), 441000); - self.assertEquals(len(audio2), 441000); - self.assertEquals(len(audio3), 441000); + self.assertEqual(len(audio1), 441000) + self.assertEqual(len(audio2), 441000) + self.assertEqual(len(audio3), 441000) self.assertEqualVector(audio2, audio1) self.assertEqualVector(audio2, audio3) diff --git a/test/src/unittests/io/test_eqloudloader_streaming.py b/test/src/unittests/io/test_eqloudloader_streaming.py index 1a5c6b5e5..8cc5c1ab9 100644 --- a/test/src/unittests/io/test_eqloudloader_streaming.py +++ b/test/src/unittests/io/test_eqloudloader_streaming.py @@ -57,15 +57,15 @@ def load(self, inputSampleRate, outputSampleRate, def testNoResample(self): eqloud=join(testdata.audio_dir,'generated','doublesize','sin_30_seconds_eqloud.wav') normal=join(testdata.audio_dir,'generated','doublesize','sin_30_seconds.wav') - self.load(44100, 44100, eqloud, normal, "left" , -6.0, 0., 30.); - self.load(44100, 44100, eqloud, normal, "left", -6.0, 3.35, 5.68); - self.load(44100, 44100, eqloud, normal, "left" , -6.0, 0.169, 8.333); + self.load(44100, 44100, eqloud, normal, "left" , -6.0, 0., 30.) + self.load(44100, 44100, eqloud, normal, "left", -6.0, 3.35, 5.68) + self.load(44100, 44100, eqloud, normal, "left" , -6.0, 0.169, 8.333) def testResample(self): eqloud=join(testdata.audio_dir,'generated','doublesize','sin_30_seconds_eqloud.wav') normal=join(testdata.audio_dir,'generated','doublesize','sin_30_seconds.wav') - self.load(44100, 48000, eqloud, normal, "left", -6.0, 3.35, 5.68); - self.load(44100, 32000, eqloud, normal, "left", -6.0, 3.35, 5.68); + self.load(44100, 48000, eqloud, normal, "left", -6.0, 3.35, 5.68) + self.load(44100, 32000, eqloud, normal, "left", -6.0, 3.35, 5.68) @@ -83,10 +83,10 @@ def testResetStandard(self): from essentia.standard import EqloudLoader as stdEqloudLoader audiofile = join(testdata.audio_dir,'recorded','musicbox.wav') loader = stdEqloudLoader(filename=audiofile, endTime=31) - audio1 = loader(); - audio2 = loader(); - loader.reset(); - audio3 = loader(); + audio1 = loader() + audio2 = loader() + loader.reset() + audio3 = loader() self.assertAlmostEqualVector(audio3, audio1) self.assertEqualVector(audio2, audio1) @@ -98,9 +98,9 @@ def testLoadMultiple(self): audio1 = algo() audio2 = algo() audio3 = algo() - self.assertEquals(len(audio1), 441000); - self.assertEquals(len(audio2), 441000); - self.assertEquals(len(audio3), 441000); + self.assertEqual(len(audio1), 441000) + self.assertEqual(len(audio2), 441000) + self.assertEqual(len(audio3), 441000) self.assertEqualVector(audio2, audio1) self.assertEqualVector(audio2, audio3) diff --git a/test/src/unittests/io/test_monoloader.py b/test/src/unittests/io/test_monoloader.py index e8296a552..ea60080b9 100644 --- a/test/src/unittests/io/test_monoloader.py +++ b/test/src/unittests/io/test_monoloader.py @@ -39,25 +39,23 @@ def load(self, filename, downmix, sampleRate): def testInvalidParam(self): filename = join(wav_dir, 'impulses_1second_44100_st.wav') - self.assertConfigureFails(MonoLoader(sampleRate=44100), { 'filename' : filename, - 'downmix' : 'stereo', - 'sampleRate' : 44100}) + cfg = {'filename': filename, 'downmix': 'stereo', 'sampleRate': 44100} + self.assertConfigureFails(MonoLoader(sampleRate=44100), cfg) + + cfg = {'filename': filename, 'downmix': 'left', 'sampleRate': 0} + self.assertConfigureFails(MonoLoader(sampleRate=44100), cfg) - self.assertConfigureFails(MonoLoader(sampleRate=44100), { 'filename' : filename, - 'downmix' : 'left', - 'sampleRate' : 0}) filename = 'unknown.wav' - self.assertConfigureFails(MonoLoader(), { 'filename' : filename, - 'downmix' : 'left', - 'sampleRate' : 44100}) + cfg = {'filename': filename, 'downmix': 'left', 'sampleRate': 44100} + self.assertConfigureFails(MonoLoader(), cfg) def testWav44100(self): # files with 9 impulses in each channel filename = join(wav_dir, 'impulses_1second_44100_st.wav') - left = self.load(filename, 'left', 44100); - right = self.load(filename, 'right', 44100); - mix = self.load(filename, 'mix', 44100); + left = self.load(filename, 'left', 44100) + right = self.load(filename, 'right', 44100) + mix = self.load(filename, 'mix', 44100) self.assertEqual(self.round(sum(left)), 9) self.assertEqual(self.round(sum(right)), 9) self.assertEqual(self.round(sum(mix)), 9) @@ -65,9 +63,9 @@ def testWav44100(self): def testWav22050(self): # files with 9 impulses in each channel filename = join(wav_dir, 'impulses_1second_22050_st.wav') - left = self.load(filename, 'left', 22050); - right = self.load(filename, 'right', 22050); - mix = self.load(filename, 'mix', 22050); + left = self.load(filename, 'left', 22050) + right = self.load(filename, 'right', 22050) + mix = self.load(filename, 'mix', 22050) self.assertEqual(self.round(sum(left)), 9) self.assertEqual(self.round(sum(right)), 9) self.assertEqual(self.round(sum(mix)), 9) @@ -75,9 +73,9 @@ def testWav22050(self): def testWav48000(self): # files with 9 impulses in each channel filename = join(wav_dir, 'impulses_1second_48000_st.wav') - left = self.load(filename, 'left', 48000); - right = self.load(filename, 'right', 48000); - mix = self.load(filename, 'mix', 48000); + left = self.load(filename, 'left', 48000) + right = self.load(filename, 'right', 48000) + mix = self.load(filename, 'mix', 48000) self.assertEqual(self.round(sum(left)), 9) self.assertEqual(self.round(sum(right)), 9) self.assertEqual(self.round(sum(mix)), 9) @@ -90,9 +88,9 @@ def testWavLeftRightOffset(self): # file with 9 impulses in right channel and 10 in left channel dir = join(testdata.audio_dir, 'generated', 'synthesised', 'impulse', 'left_right_offset') filename = join(dir, 'impulses_1second_44100.wav') - left = self.load(filename, 'left', 44100); - right = self.load(filename, 'right', 44100); - mix = self.load(filename, 'mix', 44100); + left = self.load(filename, 'left', 44100) + right = self.load(filename, 'right', 44100) + mix = self.load(filename, 'mix', 44100) self.assertEqual(self.round(sum(left)), 10) self.assertEqual(self.round(sum(right)), 9) self.assertAlmostEqualFixedPrecision(sum(mix), 9.5, 3) # 0.5*left + 0.5*right @@ -106,15 +104,15 @@ def sum(self, l): noisefloor = 0.003 for i in range(len(l)): if fabs(l[i]) > noisefloor: - result+= l[i] + result+= l[i] return self.round(result) def testMp344100(self): # files with 9 impulses in each channel filename = join(mp3_dir, 'impulses_1second_44100_st.mp3') - left = self.load(filename, 'left', 44100); - right = self.load(filename, 'right', 44100); - mix = self.load(filename, 'mix', 44100); + left = self.load(filename, 'left', 44100) + right = self.load(filename, 'right', 44100) + mix = self.load(filename, 'mix', 44100) self.assertEqual(self.sum(left), 9) self.assertEqual(self.sum(right), 9) @@ -123,9 +121,9 @@ def testMp344100(self): def testMp322050(self): # files with 9 impulses in each channel filename = join(mp3_dir, 'impulses_1second_22050_st.mp3') - left = self.load(filename, 'left', 22050); - right = self.load(filename, 'right', 22050); - mix = self.load(filename, 'mix', 22050); + left = self.load(filename, 'left', 22050) + right = self.load(filename, 'right', 22050) + mix = self.load(filename, 'mix', 22050) self.assertEqual(self.sum(left), 9) self.assertEqual(self.sum(right), 9) @@ -134,20 +132,20 @@ def testMp322050(self): def testMp348000(self): # files with 9 impulses in each channel filename = join(mp3_dir, 'impulses_1second_48000_st.mp3') - left = self.load(filename, 'left', 48000); - right = self.load(filename, 'right', 48000); - mix = self.load(filename, 'mix', 48000); + left = self.load(filename, 'left', 48000) + right = self.load(filename, 'right', 48000) + mix = self.load(filename, 'mix', 48000) self.assertEqual(self.sum(left), 9) self.assertEqual(self.sum(right), 9) self.assertEqual(self.sum(mix), 9) def testMp3TimeShift(self): - # test mp3s are loaded with no time shift (lost frames) + # test mp3s are loaded with no time shift (lost frames) filename_mp3 = join(mp3_dir, 'impulses_1second_44100.mp3') filename_wav = join(wav_dir, 'impulses_1second_44100.wav') mp3 = self.load(filename_mp3, 'mix', 44100) wav = self.load(filename_wav, 'mix', 44100) - + # find time shift between impulse positions impulses_mp3 = [x for x in range(len(mp3)) if mp3[x]>0.9] impulses_wav = [x for x in range(len(wav)) if wav[x]>0.9] @@ -168,9 +166,9 @@ def testMp3TimeShift(self): def testOgg44100(self): filename = join(ogg_dir, 'impulses_1second_44100_st.ogg') - left = self.load(filename, 'left', 44100); - right = self.load(filename, 'right', 44100); - mix = self.load(filename, 'mix', 44100); + left = self.load(filename, 'left', 44100) + right = self.load(filename, 'right', 44100) + mix = self.load(filename, 'mix', 44100) self.assertEqual(abs(self.sum(left)), 9) self.assertEqual(abs(self.sum(right)), 9) self.assertEqual(abs(self.sum(mix)), 9) @@ -181,9 +179,9 @@ def testOgg44100(self): def testOgg22050(self): # files with 9 impulses in each channel filename = join(ogg_dir, 'impulses_1second_22050_st.ogg') - left = self.load(filename, 'left', 22050); - right = self.load(filename, 'right', 22050); - mix = self.load(filename, 'mix', 22050); + left = self.load(filename, 'left', 22050) + right = self.load(filename, 'right', 22050) + mix = self.load(filename, 'mix', 22050) self.assertEqual(abs(self.sum(left)), 9) self.assertEqual(abs(self.sum(right)), 9) self.assertEqual(abs(self.sum(mix)), 9) @@ -194,9 +192,9 @@ def testOgg22050(self): def testOgg48000(self): # files with 9 impulses in each channel filename = join(ogg_dir, 'impulses_1second_48000_st.ogg') - left = self.load(filename, 'left', 48000); - right = self.load(filename, 'right', 48000); - mix = self.load(filename, 'mix', 48000); + left = self.load(filename, 'left', 48000) + right = self.load(filename, 'right', 48000) + mix = self.load(filename, 'mix', 48000) self.assertEqual(abs(self.sum(left)), 9) self.assertEqual(abs(self.sum(right)), 9) self.assertEqual(abs(self.sum(mix)), 9) @@ -208,27 +206,27 @@ def testDownSampling(self): # files of 30s with impulses at every sample # from 44100 to 22050 filename = join(resamp_dir, 'impulses_1samp_44100.wav') - left = self.load(filename, 'left', 22050); + left = self.load(filename, 'left', 22050) self.assertAlmostEqual(sum(left), 30.*22050, 1e-4) # from 48000 to 44100 filename = join(resamp_dir, 'impulses_1samp_48000.wav') - left = self.load(filename, 'left', 44100); + left = self.load(filename, 'left', 44100) self.assertAlmostEqual(sum(left), 30.*44100, 1e-4) # from 48000 to 22050 - left = self.load(filename, 'left', 22050); + left = self.load(filename, 'left', 22050) self.assertAlmostEqual(sum(left), 30.*22050, 1e-4) def testUpSampling(self): # from 44100 to 48000 filename = join(resamp_dir, 'impulses_1samp_44100.wav') - left = self.load(filename, 'right', 48000); + left = self.load(filename, 'right', 48000) self.assertAlmostEqual(sum(left), 30.*48000, 1e-4) # from 22050 to 44100 filename = join(resamp_dir, 'impulses_1samp_22050.wav') - left = self.load(filename, 'right', 44100); + left = self.load(filename, 'right', 44100) self.assertAlmostEqual(sum(left), 30.*44100, 1e-4) # from 22050 to 48000 - left = self.load(filename, 'right', 48000); + left = self.load(filename, 'right', 48000) self.assertAlmostEqual(sum(left), 30.*48000, 1e-4) def testInvalidFilename(self): @@ -237,10 +235,10 @@ def testInvalidFilename(self): def testResetStandard(self): audiofile = join(testdata.audio_dir,'recorded','musicbox.wav') loader = MonoLoader(filename=audiofile) - audio1 = loader(); - audio2 = loader(); - loader.reset(); - audio3 = loader(); + audio1 = loader() + audio2 = loader() + loader.reset() + audio3 = loader() self.assertAlmostEqualVector(audio3, audio1) self.assertEqualVector(audio2, audio1) @@ -251,15 +249,12 @@ def testLoadMultiple(self): audio1 = algo() audio2 = algo() audio3 = algo() - self.assertEquals(len(audio1), 441000); - self.assertEquals(len(audio2), 441000); - self.assertEquals(len(audio3), 441000); + self.assertEqual(len(audio1), 441000) + self.assertEqual(len(audio2), 441000) + self.assertEqual(len(audio3), 441000) self.assertEqualVector(audio2, audio1) self.assertEqualVector(audio2, audio3) - - - suite = allTests(TestMonoLoader) if __name__ == '__main__':