Merge pull request #8 from mbsantiago/feature/libsndfile

Enhancement: Expanded Audio Format Support with Soundfile
mbsantiago · May 11, 2024 · c8fa490 · c8fa490
2 parents f9a6d1a + fb4943d
commit c8fa490
Show file tree

Hide file tree

Showing 192 changed files with 1,329 additions and 1,470 deletions.
diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml
@@ -17,7 +17,7 @@ jobs:
         run: |
           sudo apt-get update && sudo apt-get install libsndfile1
           python -m pip install --upgrade pip
-          python -m pip install pytest hypothesis coverage pytest-coverage html5lib
+          python -m pip install pytest pytest-xdist hypothesis coverage pytest-coverage html5lib
           python -m pip install ".[all]"
       - name: Run Tests
         run: |

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -21,7 +21,7 @@ jobs:
         run: |
           sudo apt-get update && sudo apt-get install libsndfile1
           python -m pip install --upgrade pip
-          python -m pip install pytest hypothesis ruff pyright html5lib
+          python -m pip install pytest pytest-xdist hypothesis ruff pyright html5lib
           python -m pip install ".[all]"
       - name: Make sure types are consistent
         run: pyright src

diff --git a/docs/introduction.md b/docs/introduction.md
@@ -31,7 +31,7 @@ Now, let's discuss why these data schemas matter to us bioacousticians:
    data? Schemas make it happen. They enable you to check an object against the
    schema, validating if the data has everything it needs and is correct.
 
-3. **Enhanced develper experience**: Python is entering the era of Type hints.
+3. **Enhanced developer experience**: Python is entering the era of Type hints.
    Using these hints makes your code more robust, acting like guardrails to
    ensure that your data follows the rules.
 

diff --git a/docs/javascripts/katex.js b/docs/javascripts/katex.js
@@ -0,0 +1,10 @@
+document$.subscribe(({ body }) => {
+  renderMathInElement(body, {
+    delimiters: [
+      { left: "$$", right: "$$", display: true },
+      { left: "$", right: "$", display: false },
+      { left: "\\(", right: "\\)", display: false },
+      { left: "\\[", right: "\\]", display: true },
+    ],
+  });
+});
diff --git a/docs/user_guide/2_loading_audio.py b/docs/user_guide/2_loading_audio.py
@@ -18,6 +18,16 @@
     `xarray.DataArray` objects, see the
     [xarray documentation](https://docs.xarray.dev/en/stable/getting-started-guide/why-xarray.html).
 
+
+!!! note "Supported audio formats"
+
+    `soundevent` supports most of the audio file formats supported by the
+    [`soundfile`](https://python-soundfile.readthedocs.io/) library. Some
+    formats were excluded because they do not support seeking and are not
+    suitable for random access. This still includes most of the common audio
+    file formats, such as WAV, FLAC, AIFF, and MP3. For a full list of
+    supported formats, see the
+    [audio.is_audio_file][soundevent.audio.is_audio_file] documentation.
 """
 
 # %%

diff --git a/docs/user_guide/3_computing_spectrograms.py b/docs/user_guide/3_computing_spectrograms.py
@@ -19,7 +19,8 @@
 # First, we will load a recording. We will use the
 # the example recording from the (audio loading tutorial)[audio_loading].
 
-from soundevent import audio, data
+import numpy as np
+from soundevent import audio, data, arrays
 
 recording = data.Recording.from_file("sample_audio.wav")
 wave = audio.load_recording(recording)
@@ -44,3 +45,35 @@
 # *frequency*, the second is *time*, and the third is *channel*. The
 # spectrogram is computed separately for each channel of the audio
 # signal.
+#
+# One of the nice things about using xarray is that it allows us to
+# easily plot the spectrogram using the built-in plotting functions.
+
+spectrogram.plot()
+
+# %%
+# The initial plot is hard to interpret due to the linear scale. Decibels (dB) 
+# are more perceptually relevant for sound.
+# Let's convert it to decibels using the
+# [`arrays.to_db`][soundevent.arrays.to_db] function.
+
+spectrogram_db = arrays.to_db(spectrogram)
+spectrogram_db.plot()
+
+# %%
+# This is much better! We can now clearly see the frequency
+# content evolving over time.
+#
+# To make subtle details even more apparent, we can apply a
+# de-noising technique like PCEN (Per-Channel Energy
+# Normalization). PCEN helps reduce background noise and
+# enhance the target sounds. We can apply PCEN using the
+# [`audio.pcen`][soundevent.audio.pcen] function.
+
+pcen = audio.pcen(spectrogram)
+pcen_db = arrays.to_db(pcen)
+pcen_db.plot()
+
+# %%
+# In this case the PCEN transformation has not made a huge
+# difference, but it can be very useful in other cases.
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -120,13 +120,19 @@ markdown_extensions:
       toc_depth: 4
       permalink: "#"
       separator: "_"
+  - pymdownx.arithmatex:
+      generic: true
 extra_javascript:
   - javascripts/jquery-3.3.1.min.js
   - javascripts/jquery.json-viewer.js
   - javascripts/json-viewer.js
   - optionalConfig.js
   - https://unpkg.com/[email protected]/dist/mermaid.min.js
   - extra-loader.js
+  - javascripts/katex.js
+  - https://unpkg.com/katex@0/dist/katex.min.js
+  - https://unpkg.com/katex@0/dist/contrib/auto-render.min.js
 extra_css:
   - stylesheets/jquery.json-viewer.css
   - css/mkdocstrings.css
+  - https://unpkg.com/katex@0/dist/katex.min.css
diff --git a/pyproject.toml b/pyproject.toml
@@ -56,10 +56,11 @@ rye = { dev-dependencies = [
     "pytest-testmon>=2.0.12",
     "html5lib>=1.1",
     "pyright>=1.1.362",
+    "pytest-xdist>=3.6.1",
 ] }
 
 [tool.pytest.ini_options]
-addopts = "-vv"
+addopts = "-vv -n auto"
 
 [tool.ruff]
 line-length = 79
@@ -76,10 +77,13 @@ ignore = ["D1"]
 [tool.ruff.lint.pydocstyle]
 convention = "numpy"
 
+[tool.ruff.lint.isort]
+known-local-folder = ["src", "soundevent"]
+
 [tool.pyright]
 venvPath = "."
 venv = ".venv"
-include = ["src", "tests"]
+include = ["src"]
 verboseOutput = true
 
 [tool.coverage.run]

diff --git a/requirements-dev.lock b/requirements-dev.lock
@@ -66,6 +66,8 @@ evfuncs==0.3.5.post1
 exceptiongroup==1.2.1
     # via hypothesis
     # via pytest
+execnet==2.1.1
+    # via pytest-xdist
 executing==2.0.1
     # via icecream
 filelock==3.13.4
@@ -210,13 +212,15 @@ pytest==8.1.2
     # via pytest-cov
     # via pytest-testmon
     # via pytest-watch
+    # via pytest-xdist
 pytest-cov==5.0.0
     # via pytest-cover
 pytest-cover==3.0.0
     # via pytest-coverage
 pytest-coverage==0.0
 pytest-testmon==2.1.1
 pytest-watch==4.2.0
+pytest-xdist==3.6.1
 python-dateutil==2.9.0.post0
     # via ghp-import
     # via matplotlib

diff --git a/src/soundevent/audio/__init__.py b/src/soundevent/audio/__init__.py
@@ -1,25 +1,22 @@
 """Soundevent functions for handling audio files and arrays."""
 
-from .files import is_audio_file
-from .filter import filter_audio
+from .files import get_audio_files, is_audio_file
 from .io import load_audio, load_clip, load_recording
 from .media_info import MediaInfo, compute_md5_checksum, get_media_info
-from .resample import resample_audio
-from .scaling import clamp_amplitude, pcen, scale_amplitude
+from .operations import filter, pcen, resample
 from .spectrograms import compute_spectrogram
 
 __all__ = [
     "MediaInfo",
     "compute_spectrogram",
     "compute_md5_checksum",
     "get_media_info",
+    "get_audio_files",
     "load_audio",
     "load_clip",
     "load_recording",
     "is_audio_file",
-    "resample_audio",
-    "filter_audio",
-    "scale_amplitude",
-    "clamp_amplitude",
+    "resample",
+    "filter",
     "pcen",
 ]
diff --git a/src/soundevent/audio/attributes.py b/src/soundevent/audio/attributes.py
@@ -0,0 +1,17 @@
+"""Definition of common attributes for audio objects."""
+
+from enum import Enum
+
+__all__ = [
+    "AudioAttrs",
+]
+
+
+class AudioAttrs(str, Enum):
+    samplerate = "samplerate"
+
+    recording_id = "recording_id"
+
+    clip_id = "clip_id"
+
+    path = "path"
diff --git a/src/soundevent/audio/chunks.py b/src/soundevent/audio/chunks.py