forked from openvpi/DiffSinger
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvocode.py
More file actions
90 lines (73 loc) · 2.88 KB
/
vocode.py
File metadata and controls
90 lines (73 loc) · 2.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# coding=utf8
import argparse
import os
import pathlib
import sys
root_dir = pathlib.Path(__file__).parent.parent.resolve()
os.environ['PYTHONPATH'] = str(root_dir)
sys.path.insert(0, str(root_dir))
import numpy as np
import torch
import tqdm
from inference.ds_acoustic import DiffSingerAcousticInfer
from utils.infer_utils import cross_fade, save_wav
from utils.hparams import set_hparams, hparams
parser = argparse.ArgumentParser(description='Run DiffSinger vocoder')
parser.add_argument('mel', type=str, help='Path to the input file')
parser.add_argument('--exp', type=str, required=False, help='Read vocoder class and path from chosen experiment')
parser.add_argument('--config', type=str, required=False, help='Read vocoder class and path from config file')
parser.add_argument('--class', type=str, required=False, help='Specify vocoder class')
parser.add_argument('--ckpt', type=str, required=False, help='Specify vocoder checkpoint path')
parser.add_argument('--out', type=str, required=False, help='Path of the output folder')
parser.add_argument('--title', type=str, required=False, help='Title of output file')
args = parser.parse_args()
mel = pathlib.Path(args.mel)
name = mel.stem if not args.title else args.title
config = None
if args.exp:
config = root_dir / 'checkpoints' / args.exp / 'config.yaml'
elif args.config:
config = pathlib.Path(args.config)
else:
assert False, 'Either argument \'--exp\' or \'--config\' should be specified.'
sys.argv = [
sys.argv[0],
'--config',
str(config)
]
set_hparams(print_hparams=False)
cls = getattr(args, 'class')
if cls:
hparams['vocoder'] = cls
if args.ckpt:
hparams['vocoder_ckpt'] = args.ckpt
out = args.out
if args.out:
out = pathlib.Path(args.out)
else:
out = mel.parent
mel_seq = torch.load(mel)
assert isinstance(mel_seq, list), 'Not a valid mel sequence.'
assert len(mel_seq) > 0, 'Mel sequence is empty.'
sample_rate = hparams['audio_sample_rate']
infer_ins = DiffSingerAcousticInfer(load_model=False)
def run_vocoder(path: pathlib.Path):
result = np.zeros(0)
current_length = 0
for seg_mel in tqdm.tqdm(mel_seq, desc='mel segment', total=len(mel_seq)):
seg_audio = infer_ins.run_vocoder(seg_mel['mel'].to(infer_ins.device), f0=seg_mel['f0'].to(infer_ins.device))
seg_audio = seg_audio.squeeze(0).cpu().numpy()
silent_length = round(seg_mel['offset'] * sample_rate) - current_length
if silent_length >= 0:
result = np.append(result, np.zeros(silent_length))
result = np.append(result, seg_audio)
else:
result = cross_fade(result, seg_audio, current_length + silent_length)
current_length = current_length + silent_length + seg_audio.shape[0]
print(f'| save audio: {path}')
save_wav(result, path, sample_rate)
os.makedirs(out, exist_ok=True)
try:
run_vocoder(out / (name + '.wav'))
except KeyboardInterrupt:
exit(-1)