Skip to content

Commit 25faa95

Browse files
committed
Little comment updates.
1 parent 592e5e8 commit 25faa95

File tree

1 file changed

+32
-39
lines changed

1 file changed

+32
-39
lines changed

repet.py

Lines changed: 32 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
http://zafarrafii.com
3131
https://github.com/zafarrafii
3232
https://www.linkedin.com/in/zafarrafii/
33-
01/23/21
33+
01/26/21
3434
"""
3535

3636
import numpy as np
@@ -154,12 +154,11 @@ def original(audio_signal, sampling_frequency):
154154
# Compute the STFT of the current channel
155155
audio_stft[:, :, i] = _stft(audio_signal[:, i], window_function, step_length)
156156

157-
# Derive the magnitude spectrogram
158-
# (with the DC component and without the mirrored frequencies)
157+
# Derive the magnitude spectrogram (with the DC component and without the mirrored frequencies)
159158
audio_spectrogram = abs(audio_stft[0 : int(window_length / 2) + 1, :, :])
160159

161160
# Compute the beat spectrum of the spectrograms averaged over the channels
162-
# (take the square to emphasize periodicitiy peaks)
161+
# (take the square to emphasize peaks of periodicitiy)
163162
beat_spectrum = _beatspectrum(np.power(np.mean(audio_spectrogram, axis=2), 2))
164163

165164
# Get the period range in time frames for the beat spectrum
@@ -171,9 +170,7 @@ def original(audio_signal, sampling_frequency):
171170
repeating_period = _periods(beat_spectrum, period_range2)
172171

173172
# Get the cutoff frequency in frequency channels for the dual high-pass filter of the foreground
174-
cutoff_frequency2 = int(
175-
np.ceil(cutoff_frequency * window_length / sampling_frequency)
176-
)
173+
cutoff_frequency2 = round(cutoff_frequency * window_length / sampling_frequency)
177174

178175
# Initialize the background signal
179176
background_signal = np.zeros((number_samples, number_channels))
@@ -185,7 +182,7 @@ def original(audio_signal, sampling_frequency):
185182
repeating_mask = _mask(audio_spectrogram[:, :, i], repeating_period)
186183

187184
# Perform a high-pass filtering of the dual foreground
188-
repeating_mask[1 : cutoff_frequency2 + 2, :] = 1
185+
repeating_mask[1 : cutoff_frequency2 + 1, :] = 1
189186

190187
# Recover the mirrored frequencies
191188
repeating_mask = np.concatenate(
@@ -299,11 +296,9 @@ def extended(audio_signal, sampling_frequency):
299296
).astype(int)
300297

301298
# Get the cutoff frequency in frequency channels for the dual high-pass filter of the foreground
302-
cutoff_frequency2 = int(
303-
np.ceil(cutoff_frequency * window_length / sampling_frequency)
304-
)
299+
cutoff_frequency2 = round(cutoff_frequency * window_length / sampling_frequency)
305300

306-
# Initialize background signal
301+
# Initialize the background signal
307302
background_signal = np.zeros((number_samples, number_channels))
308303

309304
# Loop over the segments
@@ -364,7 +359,7 @@ def extended(audio_signal, sampling_frequency):
364359
repeating_mask = _mask(audio_spectrogram[:, :, i], repeating_period)
365360

366361
# Perform a high-pass filtering of the dual foreground
367-
repeating_mask[1 : cutoff_frequency2 + 2, :] = 1
362+
repeating_mask[1 : cutoff_frequency2 + 1, :] = 1
368363

369364
# Recover the mirrored frequencies
370365
repeating_mask = np.concatenate(
@@ -389,7 +384,7 @@ def extended(audio_signal, sampling_frequency):
389384

390385
else:
391386

392-
# Check if it is one of the first segments or the last one
387+
# Check if it is the first segment or the following ones
393388
if j == 0:
394389

395390
# Add the segment to the signal
@@ -539,9 +534,7 @@ def adaptive(audio_signal, sampling_frequency):
539534
repeating_periods = _periods(beat_spectrogram, period_range2)
540535

541536
# Get the cutoff frequency in frequency channels for the dual high-pass filter of the foreground
542-
cutoff_frequency2 = (
543-
int(np.ceil(cutoff_frequency * (window_length - 1) / sampling_frequency)) - 1
544-
)
537+
cutoff_frequency2 = round(cutoff_frequency * window_length / sampling_frequency)
545538

546539
# Initialize the background signal
547540
background_signal = np.zeros((number_samples, number_channels))
@@ -555,7 +548,7 @@ def adaptive(audio_signal, sampling_frequency):
555548
)
556549

557550
# Perform a high-pass filtering of the dual foreground
558-
repeating_mask[1 : cutoff_frequency2 + 2, :] = 1
551+
repeating_mask[1 : cutoff_frequency2 + 1, :] = 1
559552

560553
# Recover the mirrored frequencies
561554
repeating_mask = np.concatenate(
@@ -664,7 +657,7 @@ def sim(audio_signal, sampling_frequency):
664657
# Loop over the channels
665658
for i in range(number_channels):
666659

667-
# STFT of the current channel
660+
# Compute the STFT of the current channel
668661
audio_stft[:, :, i] = _stft(audio_signal[:, i], window_function, step_length)
669662

670663
# Derive the magnitude spectrogram (with the DC component and without the mirrored frequencies)
@@ -684,9 +677,7 @@ def sim(audio_signal, sampling_frequency):
684677
)
685678

686679
# Get the cutoff frequency in frequency channels for the dual high-pass filter of the foreground
687-
cutoff_frequency2 = (
688-
int(np.ceil(cutoff_frequency * (window_length - 1) / sampling_frequency)) - 1
689-
)
680+
cutoff_frequency2 = round(cutoff_frequency * window_length / sampling_frequency)
690681

691682
# Initialize the background signal
692683
background_signal = np.zeros((number_samples, number_channels))
@@ -698,7 +689,7 @@ def sim(audio_signal, sampling_frequency):
698689
repeating_mask = _simmask(audio_spectrogram[:, :, i], similarity_indices)
699690

700691
# Perform a high-pass filtering of the dual foreground
701-
repeating_mask[1 : cutoff_frequency2 + 2, :] = 1
692+
repeating_mask[1 : cutoff_frequency2 + 1, :] = 1
702693

703694
# Recover the mirrored frequencies
704695
repeating_mask = np.concatenate(
@@ -832,9 +823,7 @@ def simonline(audio_signal, sampling_frequency):
832823
)
833824

834825
# Get the cutoff frequency in frequency channels for the dual high-pass filter of the foreground
835-
cutoff_frequency2 = (
836-
int(np.ceil(cutoff_frequency * (window_length - 1) / sampling_frequency)) - 1
837-
)
826+
cutoff_frequency2 = round(cutoff_frequency * window_length / sampling_frequency)
838827

839828
# Initialize the background signal
840829
background_signal = np.zeros(
@@ -895,7 +884,7 @@ def simonline(audio_signal, sampling_frequency):
895884
)
896885

897886
# Perform a high-pass filtering of the dual foreground
898-
repeating_mask[1 : cutoff_frequency2 + 2] = 1
887+
repeating_mask[1 : cutoff_frequency2 + 1] = 1
899888

900889
# Recover the mirrored frequencies
901890
repeating_mask = np.concatenate((repeating_mask, repeating_mask[-2:0:-1]))
@@ -1173,7 +1162,7 @@ def _beatspectrogram(audio_spectrogram, segment_length, segment_step):
11731162
"""
11741163
Compute the beat spectrogram using the beat sectrum.
11751164
1176-
Input:
1165+
Inputs:
11771166
audio_spectrogram: audio spectrogram (number_frequencies, number_times)
11781167
segment_length: segment length in seconds for the segmentation
11791168
segment_step: step length in seconds for the segmentation
@@ -1209,7 +1198,7 @@ def _beatspectrogram(audio_spectrogram, segment_length, segment_step):
12091198
audio_spectrogram[:, i : i + segment_length]
12101199
)
12111200

1212-
# Simply duplicate the values between segment steps
1201+
# Replicate the values between segment steps
12131202
beat_spectrogram[
12141203
:, i : min(i + segment_step - 1, number_times)
12151204
] = beat_spectrogram[:, i : i + 1]
@@ -1345,12 +1334,13 @@ def _localmaxima(data_vector, minimum_value, minimum_distance, number_values):
13451334
# Get the corresponding indices sorted in ascending order
13461335
sort_indices = np.argsort(maximum_values)[::-1]
13471336

1348-
# Keep only the top values for the local maxima
1337+
# Keep only the sorted indices for the top values
13491338
number_values = min(number_values, len(maximum_values))
1350-
maximum_values = maximum_values[0:number_values]
1339+
sort_indices = sort_indices[0:number_values]
13511340

1352-
# Get the indices of the top local maxima
1353-
maximum_indices = maximum_indices[sort_indices[0:number_values]]
1341+
# Get the values and indices of the top local maxima
1342+
maximum_values = maximum_values[sort_indices]
1343+
maximum_indices = maximum_indices[sort_indices]
13541344

13551345
return maximum_values, maximum_indices
13561346

@@ -1397,10 +1387,11 @@ def _mask(audio_spectrogram, repeating_period):
13971387
"""
13981388
Compute the repeating mask for REPET.
13991389
1400-
Input:
1390+
Inputs:
14011391
audio_spectrogram: audio spectrogram (number_frequencies, number_times)
1402-
Output:
14031392
repeating_period: repeating period in lag
1393+
Output:
1394+
repeating_mask: repeating mask (number_frequencies, number_times)
14041395
"""
14051396

14061397
# Get the number of frequency channels and time frames in the spectrogram
@@ -1471,11 +1462,12 @@ def _adaptivemask(audio_spectrogram, repeating_periods, filter_order):
14711462
"""
14721463
Compute the repeating mask for the adaptive REPET.
14731464
1474-
Input:
1465+
Inputs:
14751466
audio_spectrogram: audio spectrogram (number_frequencies, number_times)
1476-
Output:
14771467
repeating_periods: repeating periods in lag
14781468
filter_order: filter order for the median filter in number of time frames
1469+
Output:
1470+
repeating_mask: repeating mask (number_frequencies, number_times)
14791471
"""
14801472

14811473
# Get the number of frequency channels and time frames in the spectrogram
@@ -1520,10 +1512,11 @@ def _simmask(audio_spectrogram, similarity_indices):
15201512
"""
15211513
Compute the repeating mask for REPET-SIM.
15221514
1523-
Input:
1515+
Inputs:
15241516
audio_spectrogram: audio spectrogram (number_frequencies, number_times)
1525-
Output:
15261517
similarity_indices: list of indices of the similar frames for every frame (number_frames, )
1518+
Output:
1519+
repeating_mask: repeating mask (number_frequencies, number_times)
15271520
"""
15281521

15291522
# Get the number of frequency channels and time frames in the spectrogram

0 commit comments

Comments
 (0)