Merge pull request #2 from libAudioFlux/dev1

update python and README
libAudioFlux · Jan 19, 2023 · 9e2a8a7 · 9e2a8a7
2 parents 1092866 + a576530
commit 9e2a8a7
Show file tree

Hide file tree

Showing 72 changed files with 5,346 additions and 402 deletions.
diff --git a/README.md b/README.md
@@ -34,7 +34,29 @@
 
 A library for audio and music analysis, feature extraction. 
 
-Can be used for deep learning, pattern recognition, signal processing, bioinformatics, statistics, finance, etc
+
+#  Table of Contents
+
+- [Overview](#overview)
+  - [Description](#description)
+  - [Functionality](#functionality)
+    - [transform](#1-transform)
+    - [feature](#2-feature)
+    - [mir](#3-mir)
+- [Quickstart](#quickstart)
+	-  [Mel & MFCC](#mel--mfcc)
+	-  [CWT & Synchrosqueezing](#cwt--synchrosqueezing)
+	-  [Other examples](#other-examples)
+- [Installation](#installation)
+    - [Python Package Intsall](#python-package-intsall)
+    - [iOS build](#ios-build)
+    - [Android build](#android-build)
+    - [Compiling from source](#compiling-from-source)
+- [Documentation](#documentation)
+- [Contributing](#contributing)
+- [Citing](#citing)
+- [License](#license)
+
 
 ## Overview 
 
@@ -46,6 +68,8 @@ In the above tasks, **mel spectrogram** and **mfcc** features are commonly used
 
 **`audioFlux`** provides systematic, comprehensive and multi-dimensional feature extraction and combination, and combines various deep learning network models to conduct research and development learning in different fields.
 
+Can be used for deep learning, pattern recognition, signal processing, bioinformatics, statistics, finance, etc.
+
 ###  Functionality 
 
 **`audioFlux`** is based on the design of data flow. It decouples each algorithm module structurally, and it is convenient, fast and efficient to extract features from large batches.The following are the main feature architecture diagrams, specific and detailed description view the documentation.  
@@ -109,10 +133,130 @@ The mir module contains the following algorithms:
 - `onset` - Spectrum flux, novelty, etc algorithm. 
 - `hpss` - Median filtering, NMF algorithm.
 
+
+## Quickstart
+
+### Mel & MFCC
+
+Mel spectrogram and Mel-frequency cepstral coefficients 
+
+```python
+# Feature extraction example
+import numpy as np
+import audioflux as af
+import matplotlib.pyplot as plt
+from audioflux.display import fill_spec
+from audioflux.type import SpectralFilterBankScaleType
+
+# Get a 220Hz's audio file path
+sample_path = af.utils.sample_path('220')
+
+# Read audio data and sample rate
+audio_arr, sr = af.read(sample_path)
+
+# Extract mel spectrogram
+bft_obj = af.BFT(num=128, radix2_exp=12, samplate=sr,
+                 scale_type=SpectralFilterBankScaleType.MEL)
+spec_arr = bft_obj.bft(audio_arr)
+spec_arr = np.abs(spec_arr)
+
+# Create XXCC object and extract mfcc
+xxcc_obj = af.XXCC(bft_obj.num)
+xxcc_obj.set_time_length(time_length=spec_arr.shape[1])
+mfcc_arr = xxcc_obj.xxcc(spec_arr)
+
+audio_len = audio_arr.shape[0]
+fig, ax = plt.subplots()
+img = fill_spec(spec_arr, axes=ax,
+          x_coords=bft_obj.x_coords(audio_len),
+          y_coords=bft_obj.y_coords(),
+          x_axis='time', y_axis='log',
+          title='Mel Spectrogram')
+fig.colorbar(img, ax=ax)
+
+fig, ax = plt.subplots()
+img = fill_spec(mfcc_arr, axes=ax,
+          x_coords=bft_obj.x_coords(audio_len), x_axis='time',
+          title='MFCC')
+fig.colorbar(img, ax=ax)
+
+plt.show()
+```
+
+<img src='image/demo_mel.png'  width="415"  /><img src='image/demo_mfcc.png'  width="415"  />
+
+### CWT & Synchrosqueezing
+
+Continuous Wavelet Transform spectrogram and its corresponding synchrosqueezing reassignment spectrogram
+
+```python
+# Feature extraction example
+import numpy as np
+import audioflux as af
+import matplotlib.pyplot as plt
+from audioflux.display import fill_spec
+from audioflux.type import SpectralFilterBankScaleType, WaveletContinueType
+from audioflux.utils import note_to_hz
+
+# Get a 220Hz's audio file path
+sample_path = af.utils.sample_path('220')
+
+# Read audio data and sample rate
+audio_arr, sr = af.read(sample_path)
+audio_arr = audio_arr[:4096]
+
+cwt_obj = af.CWT(num=84, radix2_exp=12, samplate=sr, low_fre=note_to_hz('C1'),
+                 bin_per_octave=12, wavelet_type=WaveletContinueType.MORSE,
+                 scale_type=SpectralFilterBankScaleType.OCTAVE)
+
+cwt_spec_arr = cwt_obj.cwt(audio_arr)
+
+synsq_obj = af.Synsq(num=cwt_obj.num,
+                     radix2_exp=cwt_obj.radix2_exp,
+                     samplate=cwt_obj.samplate)
+
+synsq_arr = synsq_obj.synsq(cwt_spec_arr,
+                            filter_bank_type=cwt_obj.scale_type,
+                            fre_arr=cwt_obj.get_fre_band_arr())
+
+# Show CWT
+fig, ax = plt.subplots(figsize=(7,4))
+img = fill_spec(np.abs(cwt_spec_arr), axes=ax,
+                x_coords=cwt_obj.x_coords(),
+                y_coords=cwt_obj.y_coords(),
+                x_axis='time', y_axis='log',
+                title='CWT')
+fig.colorbar(img, ax=ax)
+# Show Synsq
+fig, ax = plt.subplots(figsize=(7,4))
+img = fill_spec(np.abs(synsq_arr), axes=ax,
+                x_coords=cwt_obj.x_coords(),
+                y_coords=cwt_obj.y_coords(),
+                x_axis='time', y_axis='log',
+                title='Synsq')
+fig.colorbar(img, ax=ax)
+
+plt.show()
+```
+
+<img src='image/demo_cwt.png'  width="415"  /><img src='image/demo_synsq.png'  width="415"  />
+
+
+### Other examples
+
+- [CQT & Chroma](docs/examples.md#cqt--chroma)
+- [Different Wavelet Type](docs/examples.md#different-wavelet-type)
+- [Spectral Features](docs/examples.md#spectral-features)
+- [Pitch Estimate](docs/examples.md#pitch-estimate)
+- [Onset Detection](docs/examples.md#onset-detection)
+- [Harmonic Percussive Source Separation](docs/examples.md#harmonic-percussive-source-separation)
+
+More example scripts are provided in the [Documentation](https://audioflux.top/) section.
+
 ## Installation
 ![language](https://img.shields.io/badge/platform-iOS%20|%20android%20|%20macOS%20|%20linux%20|%20windows%20-lyellow.svg) 
 
-The library is cross-platform and currently supports Linux, macOS, Windows, iOS and Android systems. 
+The library is cross-platform and currently supports Linux, macOS, Windows, iOS and Android systems.
 
 ### Python Package Intsall  
 
@@ -122,23 +266,19 @@ Using PyPI:
 $ pip install audioflux 
 ```
 
-Using Anaconda: 
+<!--Using Anaconda: 
 
 ```
 $ conda install -c conda-forge audioflux
-```
+```-->
 
-Building from source:
-
-```
-$ python setup.py build
-$ python setup.py install
-```
 
 <!--Read installation instructions:
 https://audioflux.top/install-->
 
+
 ### iOS build
+
 To compile iOS on a Mac, Xcode Command Line Tools must exist in the system:  
 
 - Install the full Xcode package
@@ -156,6 +296,7 @@ $ ./build_iOS.sh
 Build  and compile successfully, the project build compilation results are in the **`build`** folder
 
 ### Android build
+
 The current system development environment needs to be installed [**android NDK**](https://developer.android.com/ndk), ndk version>=16,after installation, set the environment variable ndk path.  
 
 For example, ndk installation path is `~/Android/android-ndk-r16b`:  
@@ -175,6 +316,13 @@ $ ./build_android.sh
 
 Build  and compile successfully, the project build compilation results are in the **`build`** folder
 
+
+### Compiling from source
+
+For Linux, macOS, Windows systems. Read installation instructions:
+
+* [docs/installing.md](docs/installing.md)
+
 ## Documentation
 
 Documentation of the package can be found online:
@@ -186,7 +334,15 @@ We are more than happy to collaborate and receive your contributions to **`audio
 
 You are also more than welcome to suggest any improvements, including proposals for need help, find a bug, have a feature request, ask a general question, new algorithms. <a href="https://github.com/libAudioFlux/audioFlux/issues/new">Open an issue</a>
 
-<!-- ## Citing -->
+
+## Citing
+
+If you want to cite **`audioFlux`** in a scholarly work, there are two ways to do it.
+
+- If you are using the library for your work, for the sake of reproducibility, please cite
+  the version you used as indexed at Zenodo:
+
+    [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.7548289.svg)](https://doi.org/10.5281/zenodo.7548289)
 
 ## License
 audioFlux project is available MIT License.
diff --git a/audioflux/__version__.py b/audioflux/__version__.py
@@ -1,3 +1,3 @@
 __title__ = 'audioflux'
 __description__ = 'A library for audio and music analysis, feature extraction.'
-__version__ = '0.0.1'
+__version__ = '0.1.1'
diff --git a/audioflux/bft.py b/audioflux/bft.py
@@ -104,8 +104,6 @@ class BFT(Base):
     >>> import audioflux as af
     >>> audio_path = af.utils.sample_path('220')
     >>> audio_arr, sr = af.read(audio_path)
-    array([-5.5879354e-09, -9.3132257e-09,  0.0000000e+00, ...,
-            3.2826858e-03,  3.2447521e-03,  3.0795704e-03], dtype=float32)
 
     Create BFT object of Linser(STFT)
 
@@ -124,19 +122,6 @@ class BFT(Base):
     >>> spec_arr = obj.bft(audio_arr)
     >>> spec_arr = np.abs(spec_arr)
     >>> spec_dB_arr = power_to_db(spec_arr)
-    array([[-41.382824, -37.95072 , -50.98091 , ..., -48.275932, -66.01512 ,
-            -53.229565],
-           [-29.873356, -33.225224, -32.94691 , ..., -49.855965, -49.439796,
-            -53.827766],
-           [-27.326801, -36.17459 , -32.978054, ..., -56.360283, -51.485504,
-            -51.036415],
-           ...,
-           [-80.      , -80.      , -80.      , ..., -80.      , -80.      ,
-            -80.      ],
-           [-80.      , -80.      , -80.      , ..., -80.      , -80.      ,
-            -80.      ],
-           [-80.      , -80.      , -80.      , ..., -80.      , -80.      ,
-            -80.      ]], dtype=float32)
 
     Show spectrogram plot
 
@@ -164,6 +149,12 @@ def __init__(self, num, radix2_exp=12, samplate=32000,
                  is_reassign=False, is_temporal=False):
         super(BFT, self).__init__(pointer(OpaqueBFT()))
 
+        self.fft_length = fft_length = 1 << radix2_exp
+
+        # check num
+        if num > (fft_length // 2 + 1):
+            raise ValueError(f'num={num} is too large')
+
         # check BPO
         if scale_type == SpectralFilterBankScaleType.OCTAVE and bin_per_octave < 1:
             raise ValueError(f'bin_per_octave={bin_per_octave} must be a positive integer')