forked from erogol/WaveRNN
-
Notifications
You must be signed in to change notification settings - Fork 0
/
preprocess_data.py
118 lines (99 loc) · 3.76 KB
/
preprocess_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import librosa
import shutil
import argparse
import matplotlib.pyplot as plt
import math, pickle, os, glob
import numpy as np
from tqdm import tqdm
from utils import *
from utils.display import *
from utils.generic_utils import load_config
from utils.audio import AudioProcessor
from multiprocessing import Pool
def get_files(path, extension=".wav"):
filenames = []
for filename in glob.iglob(f"{path}/**/*{extension}", recursive=True):
filenames += [filename]
return filenames
def process_file(path):
wav = ap.load_wav(path)
mel = ap.melspectrogram(wav)
if CONFIG.mode in ['mold', 'gauss']:
# copy symbolic link of wav file
quant = None
elif type(CONFIG.mode) is int and CONFIG.mulaw:
quant = ap.mulaw_encode(wav, self.mode)
quant = quant.astype(np.int32)
elif type(CONFIG.mode) is int:
quant = ap.quantize(wav)
quant = quant.clip(0, 2 ** CONFIG.audio['bits'] - 1)
quant = quant.astype(np.int32)
return mel.astype(np.float32), quant, wav
def extract_feats(wav_path):
idx = wav_path.split("/")[-1][:-4]
try:
m, quant, wav = process_file(wav_path)
except:
if args.ignore_errors:
return None
else:
raise RuntimeError(" [!] Cannot process {}".format(wav_path))
if quant is None and CONFIG.mode not in ['mold', 'gauss']:
raise RuntimeError(" [!] Audio file cannot be quantized!")
if quant:
assert quant.max() < 2 ** CONFIG.audio['bits'], wav_path
assert quant.min() >= 0
np.save(f"{QUANT_PATH}{idx}.npy", quant, allow_pickle=False)
np.save(f"{MEL_PATH}{idx}.npy", m, allow_pickle=False)
return idx
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--config_path", type=str, help="path to config file for feature extraction."
)
parser.add_argument(
"--num_procs", type=int, default=4, help="number of parallel processes."
)
parser.add_argument(
"--data_path", type=str, default='', help="data path to overwrite config.json."
)
parser.add_argument(
"--out_path", type=str, default='', help="destination to write files."
)
parser.add_argument(
"--ignore_errors", type=bool, default=False, help="ignore bad files."
)
args = parser.parse_args()
config_path = args.config_path
CONFIG = load_config(config_path)
if args.data_path != '':
CONFIG.data_path = args.data_path
ap = AudioProcessor(**CONFIG.audio)
SEG_PATH = CONFIG.data_path
# OUT_PATH = os.path.join(args.out_path, CONFIG.run_name, "data/")
OUT_PATH = args.out_path
QUANT_PATH = os.path.join(OUT_PATH, "quant/")
MEL_PATH = os.path.join(OUT_PATH, "mel/")
os.makedirs(OUT_PATH, exist_ok=True)
os.makedirs(QUANT_PATH, exist_ok=True)
os.makedirs(MEL_PATH, exist_ok=True)
wav_files = get_files(SEG_PATH)
print(" > Number of audio files : {}".format(len(wav_files)))
wav_file = wav_files[1]
m, quant, wav = process_file(wav_file)
# save an example for sanity check
if type(CONFIG.mode) is int:
wav_hat = ap.dequantize(quant)
librosa.output.write_wav(
OUT_PATH + "test_converted_audio.wav", wav_hat, sr=CONFIG.audio['sample_rate']
)
shutil.copyfile(wav_files[1], OUT_PATH + "test_target_audio.wav")
# This will take a while depending on size of dataset
with Pool(args.num_procs) as p:
dataset_ids = list(tqdm(p.imap(extract_feats, wav_files), total=len(wav_files)))
# remove None items
if args.ignore_errors:
dataset_ids = [idx for idx in dataset_ids if idx is not None]
# save metadata
with open(os.path.join(OUT_PATH, "dataset_ids.pkl"), "wb") as f:
pickle.dump(dataset_ids, f)