3030 http://zafarrafii.com
3131 https://github.com/zafarrafii
3232 https://www.linkedin.com/in/zafarrafii/
33- 01/23 /21
33+ 01/26 /21
3434"""
3535
3636import numpy as np
@@ -154,12 +154,11 @@ def original(audio_signal, sampling_frequency):
154154 # Compute the STFT of the current channel
155155 audio_stft [:, :, i ] = _stft (audio_signal [:, i ], window_function , step_length )
156156
157- # Derive the magnitude spectrogram
158- # (with the DC component and without the mirrored frequencies)
157+ # Derive the magnitude spectrogram (with the DC component and without the mirrored frequencies)
159158 audio_spectrogram = abs (audio_stft [0 : int (window_length / 2 ) + 1 , :, :])
160159
161160 # Compute the beat spectrum of the spectrograms averaged over the channels
162- # (take the square to emphasize periodicitiy peaks)
161+ # (take the square to emphasize peaks of periodicitiy )
163162 beat_spectrum = _beatspectrum (np .power (np .mean (audio_spectrogram , axis = 2 ), 2 ))
164163
165164 # Get the period range in time frames for the beat spectrum
@@ -171,9 +170,7 @@ def original(audio_signal, sampling_frequency):
171170 repeating_period = _periods (beat_spectrum , period_range2 )
172171
173172 # Get the cutoff frequency in frequency channels for the dual high-pass filter of the foreground
174- cutoff_frequency2 = int (
175- np .ceil (cutoff_frequency * window_length / sampling_frequency )
176- )
173+ cutoff_frequency2 = round (cutoff_frequency * window_length / sampling_frequency )
177174
178175 # Initialize the background signal
179176 background_signal = np .zeros ((number_samples , number_channels ))
@@ -185,7 +182,7 @@ def original(audio_signal, sampling_frequency):
185182 repeating_mask = _mask (audio_spectrogram [:, :, i ], repeating_period )
186183
187184 # Perform a high-pass filtering of the dual foreground
188- repeating_mask [1 : cutoff_frequency2 + 2 , :] = 1
185+ repeating_mask [1 : cutoff_frequency2 + 1 , :] = 1
189186
190187 # Recover the mirrored frequencies
191188 repeating_mask = np .concatenate (
@@ -299,11 +296,9 @@ def extended(audio_signal, sampling_frequency):
299296 ).astype (int )
300297
301298 # Get the cutoff frequency in frequency channels for the dual high-pass filter of the foreground
302- cutoff_frequency2 = int (
303- np .ceil (cutoff_frequency * window_length / sampling_frequency )
304- )
299+ cutoff_frequency2 = round (cutoff_frequency * window_length / sampling_frequency )
305300
306- # Initialize background signal
301+ # Initialize the background signal
307302 background_signal = np .zeros ((number_samples , number_channels ))
308303
309304 # Loop over the segments
@@ -364,7 +359,7 @@ def extended(audio_signal, sampling_frequency):
364359 repeating_mask = _mask (audio_spectrogram [:, :, i ], repeating_period )
365360
366361 # Perform a high-pass filtering of the dual foreground
367- repeating_mask [1 : cutoff_frequency2 + 2 , :] = 1
362+ repeating_mask [1 : cutoff_frequency2 + 1 , :] = 1
368363
369364 # Recover the mirrored frequencies
370365 repeating_mask = np .concatenate (
@@ -389,7 +384,7 @@ def extended(audio_signal, sampling_frequency):
389384
390385 else :
391386
392- # Check if it is one of the first segments or the last one
387+ # Check if it is the first segment or the following ones
393388 if j == 0 :
394389
395390 # Add the segment to the signal
@@ -539,9 +534,7 @@ def adaptive(audio_signal, sampling_frequency):
539534 repeating_periods = _periods (beat_spectrogram , period_range2 )
540535
541536 # Get the cutoff frequency in frequency channels for the dual high-pass filter of the foreground
542- cutoff_frequency2 = (
543- int (np .ceil (cutoff_frequency * (window_length - 1 ) / sampling_frequency )) - 1
544- )
537+ cutoff_frequency2 = round (cutoff_frequency * window_length / sampling_frequency )
545538
546539 # Initialize the background signal
547540 background_signal = np .zeros ((number_samples , number_channels ))
@@ -555,7 +548,7 @@ def adaptive(audio_signal, sampling_frequency):
555548 )
556549
557550 # Perform a high-pass filtering of the dual foreground
558- repeating_mask [1 : cutoff_frequency2 + 2 , :] = 1
551+ repeating_mask [1 : cutoff_frequency2 + 1 , :] = 1
559552
560553 # Recover the mirrored frequencies
561554 repeating_mask = np .concatenate (
@@ -664,7 +657,7 @@ def sim(audio_signal, sampling_frequency):
664657 # Loop over the channels
665658 for i in range (number_channels ):
666659
667- # STFT of the current channel
660+ # Compute the STFT of the current channel
668661 audio_stft [:, :, i ] = _stft (audio_signal [:, i ], window_function , step_length )
669662
670663 # Derive the magnitude spectrogram (with the DC component and without the mirrored frequencies)
@@ -684,9 +677,7 @@ def sim(audio_signal, sampling_frequency):
684677 )
685678
686679 # Get the cutoff frequency in frequency channels for the dual high-pass filter of the foreground
687- cutoff_frequency2 = (
688- int (np .ceil (cutoff_frequency * (window_length - 1 ) / sampling_frequency )) - 1
689- )
680+ cutoff_frequency2 = round (cutoff_frequency * window_length / sampling_frequency )
690681
691682 # Initialize the background signal
692683 background_signal = np .zeros ((number_samples , number_channels ))
@@ -698,7 +689,7 @@ def sim(audio_signal, sampling_frequency):
698689 repeating_mask = _simmask (audio_spectrogram [:, :, i ], similarity_indices )
699690
700691 # Perform a high-pass filtering of the dual foreground
701- repeating_mask [1 : cutoff_frequency2 + 2 , :] = 1
692+ repeating_mask [1 : cutoff_frequency2 + 1 , :] = 1
702693
703694 # Recover the mirrored frequencies
704695 repeating_mask = np .concatenate (
@@ -832,9 +823,7 @@ def simonline(audio_signal, sampling_frequency):
832823 )
833824
834825 # Get the cutoff frequency in frequency channels for the dual high-pass filter of the foreground
835- cutoff_frequency2 = (
836- int (np .ceil (cutoff_frequency * (window_length - 1 ) / sampling_frequency )) - 1
837- )
826+ cutoff_frequency2 = round (cutoff_frequency * window_length / sampling_frequency )
838827
839828 # Initialize the background signal
840829 background_signal = np .zeros (
@@ -895,7 +884,7 @@ def simonline(audio_signal, sampling_frequency):
895884 )
896885
897886 # Perform a high-pass filtering of the dual foreground
898- repeating_mask [1 : cutoff_frequency2 + 2 ] = 1
887+ repeating_mask [1 : cutoff_frequency2 + 1 ] = 1
899888
900889 # Recover the mirrored frequencies
901890 repeating_mask = np .concatenate ((repeating_mask , repeating_mask [- 2 :0 :- 1 ]))
@@ -1173,7 +1162,7 @@ def _beatspectrogram(audio_spectrogram, segment_length, segment_step):
11731162 """
11741163 Compute the beat spectrogram using the beat sectrum.
11751164
1176- Input :
1165+ Inputs :
11771166 audio_spectrogram: audio spectrogram (number_frequencies, number_times)
11781167 segment_length: segment length in seconds for the segmentation
11791168 segment_step: step length in seconds for the segmentation
@@ -1209,7 +1198,7 @@ def _beatspectrogram(audio_spectrogram, segment_length, segment_step):
12091198 audio_spectrogram [:, i : i + segment_length ]
12101199 )
12111200
1212- # Simply duplicate the values between segment steps
1201+ # Replicate the values between segment steps
12131202 beat_spectrogram [
12141203 :, i : min (i + segment_step - 1 , number_times )
12151204 ] = beat_spectrogram [:, i : i + 1 ]
@@ -1345,12 +1334,13 @@ def _localmaxima(data_vector, minimum_value, minimum_distance, number_values):
13451334 # Get the corresponding indices sorted in ascending order
13461335 sort_indices = np .argsort (maximum_values )[::- 1 ]
13471336
1348- # Keep only the top values for the local maxima
1337+ # Keep only the sorted indices for the top values
13491338 number_values = min (number_values , len (maximum_values ))
1350- maximum_values = maximum_values [0 :number_values ]
1339+ sort_indices = sort_indices [0 :number_values ]
13511340
1352- # Get the indices of the top local maxima
1353- maximum_indices = maximum_indices [sort_indices [0 :number_values ]]
1341+ # Get the values and indices of the top local maxima
1342+ maximum_values = maximum_values [sort_indices ]
1343+ maximum_indices = maximum_indices [sort_indices ]
13541344
13551345 return maximum_values , maximum_indices
13561346
@@ -1397,10 +1387,11 @@ def _mask(audio_spectrogram, repeating_period):
13971387 """
13981388 Compute the repeating mask for REPET.
13991389
1400- Input :
1390+ Inputs :
14011391 audio_spectrogram: audio spectrogram (number_frequencies, number_times)
1402- Output:
14031392 repeating_period: repeating period in lag
1393+ Output:
1394+ repeating_mask: repeating mask (number_frequencies, number_times)
14041395 """
14051396
14061397 # Get the number of frequency channels and time frames in the spectrogram
@@ -1471,11 +1462,12 @@ def _adaptivemask(audio_spectrogram, repeating_periods, filter_order):
14711462 """
14721463 Compute the repeating mask for the adaptive REPET.
14731464
1474- Input :
1465+ Inputs :
14751466 audio_spectrogram: audio spectrogram (number_frequencies, number_times)
1476- Output:
14771467 repeating_periods: repeating periods in lag
14781468 filter_order: filter order for the median filter in number of time frames
1469+ Output:
1470+ repeating_mask: repeating mask (number_frequencies, number_times)
14791471 """
14801472
14811473 # Get the number of frequency channels and time frames in the spectrogram
@@ -1520,10 +1512,11 @@ def _simmask(audio_spectrogram, similarity_indices):
15201512 """
15211513 Compute the repeating mask for REPET-SIM.
15221514
1523- Input :
1515+ Inputs :
15241516 audio_spectrogram: audio spectrogram (number_frequencies, number_times)
1525- Output:
15261517 similarity_indices: list of indices of the similar frames for every frame (number_frames, )
1518+ Output:
1519+ repeating_mask: repeating mask (number_frequencies, number_times)
15271520 """
15281521
15291522 # Get the number of frequency channels and time frames in the spectrogram
0 commit comments