Skip to content

Commit

Permalink
Merge pull request #35 from oxygen-dioxide/diffsinger
Browse files Browse the repository at this point in the history
Diffsinger
  • Loading branch information
oxygen-dioxide authored Apr 9, 2023
2 parents e737ca2 + 5089e12 commit 6224ab8
Show file tree
Hide file tree
Showing 24 changed files with 3,072 additions and 300 deletions.
45 changes: 31 additions & 14 deletions OpenUtau.Core/Api/PhonemizerRunner.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ internal class PhonemizerRunner : IDisposable {
private readonly TaskScheduler mainScheduler;
private readonly CancellationTokenSource shutdown = new CancellationTokenSource();
private readonly BlockingCollection<PhonemizerRequest> requests = new BlockingCollection<PhonemizerRequest>();
private readonly object busyLock = new object();
private Thread thread;

public PhonemizerRunner(TaskScheduler mainScheduler) {
Expand All @@ -49,22 +50,24 @@ void PhonemizerLoop() {
var parts = new HashSet<UVoicePart>();
var toRun = new List<PhonemizerRequest>();
while (!shutdown.IsCancellationRequested) {
while (requests.TryTake(out var request)) {
toRun.Add(request);
}
foreach (var request in toRun) {
parts.Add(request.part);
}
for (int i = toRun.Count - 1; i >= 0; i--) {
if (parts.Remove(toRun[i].part)) {
SendResponse(Phonemize(toRun[i]));
lock (busyLock) {
while (requests.TryTake(out var request)) {
toRun.Add(request);
}
foreach (var request in toRun) {
parts.Add(request.part);
}
for (int i = toRun.Count - 1; i >= 0; i--) {
if (parts.Remove(toRun[i].part)) {
SendResponse(Phonemize(toRun[i]));
}
}
parts.Clear();
toRun.Clear();
try {
toRun.Add(requests.Take(shutdown.Token));
} catch (OperationCanceledException) { }
}
parts.Clear();
toRun.Clear();
try {
toRun.Add(requests.Take(shutdown.Token));
} catch (OperationCanceledException) { }
}
}

Expand Down Expand Up @@ -170,6 +173,20 @@ static PhonemizerResponse Phonemize(PhonemizerRequest request) {
};
}

/// <summary>
/// Wait already queued phonemizer requests to finish.
/// Should only be used in command line mode.
/// </summary>
public void WaitFinish() {
while (true) {
lock (busyLock) {
if (requests.Count == 0) {
return;
}
}
}
}

public void Dispose() {
if (shutdown.IsCancellationRequested) {
return;
Expand Down
4 changes: 2 additions & 2 deletions OpenUtau.Core/Classic/ExeWavtool.cs
Original file line number Diff line number Diff line change
Expand Up @@ -93,15 +93,15 @@ void WriteSetUp(StreamWriter writer, List<ResamplerItem> resamplerItems, string

void WriteItem(StreamWriter writer, ResamplerItem item, int index, int total) {
writer.WriteLine($"@set resamp={item.resampler.FilePath}");
writer.WriteLine($"@set params={item.volume} {item.modulation} !{item.tempo} {Base64.Base64EncodeInt12(item.pitches)}");
writer.WriteLine($"@set params={item.volume} {item.modulation} !{item.tempo.ToString("G999")} {Base64.Base64EncodeInt12(item.pitches)}");
writer.WriteLine($"@set flag=\"{item.GetFlagsString()}\"");
writer.WriteLine($"@set env={GetEnvelope(item)}");
writer.WriteLine($"@set stp={item.skipOver}");
writer.WriteLine($"@set vel={item.velocity}");
string relOutputFile = Path.GetRelativePath(PathManager.Inst.CachePath, item.outputFile);
writer.WriteLine($"@set temp=\"%cachedir%\\{relOutputFile}\"");
string toneName = MusicMath.GetToneName(item.tone);
string dur = $"{item.phone.duration}@{item.phone.tempo}{(item.durCorrection >= 0 ? "+" : "")}{item.durCorrection}";
string dur = $"{item.phone.duration.ToString("G999")}@{item.phone.adjustedTempo.ToString("G999")}{(item.durCorrection >= 0 ? "+" : "")}{item.durCorrection}";
string relInputTemp = Path.GetRelativePath(PathManager.Inst.CachePath, item.inputTemp);
writer.WriteLine($"@echo {MakeProgressBar(index + 1, total)}");
writer.WriteLine($"@call %helper% \"%oto%\\{relInputTemp}\" {toneName} {dur} {item.preutter} {item.offset} {item.durRequired} {item.consonant} {item.cutoff} {index}");
Expand Down
45 changes: 28 additions & 17 deletions OpenUtau.Core/Classic/ResamplerItem.cs
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using K4os.Hash.xxHash;
using NAudio.Wave;
using OpenUtau.Core;
using OpenUtau.Core.Render;
using OpenUtau.Core.Ustx;
using static OpenUtau.Api.Phonemizer;

namespace OpenUtau.Classic {
public class ResamplerItem {
Expand Down Expand Up @@ -64,29 +67,37 @@ public ResamplerItem(RenderPhrase phrase, RenderPhone phone) {
consonant = phone.oto.Consonant;
cutoff = phone.oto.Cutoff;

int pitchLeading = phrase.timeAxis.TicksBetweenMsPos(phone.positionMs - pitchLeadingMs, phone.positionMs);
int pitchSkip = (phrase.leading + phone.position - pitchLeading) / 5;
int pitchCount = (int)Math.Ceiling(
(double)phrase.timeAxis.TicksBetweenMsPos(
phone.positionMs - pitchLeadingMs,
phone.positionMs + phone.envelope[4].X) / 5);
tempo = phone.tempo;
pitches = phrase.pitches
.Skip(pitchSkip)
.Take(pitchCount)
.Select(pitch => (int)Math.Round(pitch - phone.tone * 100))
.ToArray();
if (pitchSkip < 0) {
pitches = Enumerable.Repeat(pitches[0], -pitchSkip)
.Concat(pitches)
.ToArray();
tempo = phone.adjustedTempo;

double pitchCountMs = (phone.positionMs + phone.envelope[4].X) - (phone.positionMs - pitchLeadingMs);
int pitchCount = (int)Math.Ceiling(MusicMath.TempoMsToTick(tempo, pitchCountMs) / 5.0);
pitchCount = Math.Max(pitchCount, 0);
pitches = new int[pitchCount];

double phoneStartMs = phone.positionMs - pitchLeadingMs;
double phraseStartMs = phrase.positionMs - phrase.leadingMs;
for (int i = 0; i < phone.tempos.Length; i++) {
double startMs = Math.Max(phrase.timeAxis.TickPosToMsPos(phone.tempos[i].position), phoneStartMs);
double endMs = i + 1 < phone.tempos.Length ? phrase.timeAxis.TickPosToMsPos(phone.tempos[i + 1].position) : phone.positionMs + phone.envelope[4].X;
double durationMs = endMs - startMs;
int tempoPitchCount = (int)Math.Floor(MusicMath.TempoMsToTick(tempo, durationMs) / 5.0);
int tempoPitchSkip = (int)Math.Floor(MusicMath.TempoMsToTick(tempo, startMs - phoneStartMs) / 5.0);
tempoPitchCount = Math.Min(tempoPitchCount, pitches.Length - tempoPitchSkip);
int phrasePitchSkip = (int)Math.Floor(phrase.timeAxis.TicksBetweenMsPos(phraseStartMs, startMs) / 5.0);
double tempoRatio = phone.tempos[i].bpm / tempo;
for (int j = 0; j < tempoPitchCount; j++) {
int index = tempoPitchSkip + j;
int scaled = phrasePitchSkip + (int)Math.Ceiling(j * tempoRatio);
scaled = Math.Clamp(scaled, 0, phrase.pitches.Length - 1);
index = Math.Clamp(index, 0, pitchCount - 1);
pitches[index] = (int)Math.Round(phrase.pitches[scaled] - phone.tone * 100);
}
}

hash = Hash();
outputFile = Path.Join(PathManager.Inst.CachePath,
$"res-{XXH32.DigestOf(Encoding.UTF8.GetBytes(phrase.singer.Id)):x8}-{hash:x16}.wav");
}

public string GetFlagsString() {
var builder = new StringBuilder();
foreach (var flag in flags) {
Expand Down
4 changes: 3 additions & 1 deletion OpenUtau.Core/Classic/VoicebankInstaller.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ public VoicebankInstaller(string basePath, Action<double, string> progress, Enco
public void LoadArchive(string path) {
progress.Invoke(0, "Analyzing archive...");
var readerOptions = new ReaderOptions {
ArchiveEncoding = new ArchiveEncoding(archiveEncoding, archiveEncoding)
ArchiveEncoding = new ArchiveEncoding {
Forced = archiveEncoding,
}
};
var extractionOptions = new ExtractionOptions {
Overwrite = true,
Expand Down
14 changes: 14 additions & 0 deletions OpenUtau.Core/Editing/LyricBatchEdits.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
using OpenUtau.Core.Ustx;
using TinyPinyin;
using WanaKanaNet;
Expand Down Expand Up @@ -87,6 +88,19 @@ private bool ShouldRemove(char c) {
}
}

public class RemovePhoneticHint : SingleNoteLyricEdit {
static readonly Regex phoneticHintPattern = new Regex(@"\[(.*)\]");
public override string Name => "pianoroll.menu.lyrics.removephonetichint";
protected override string Transform(string lyric) {
var lrc = lyric;
lrc = phoneticHintPattern.Replace(lrc, match => "");
if (string.IsNullOrEmpty(lrc)) {
return lyric;
}
return lrc;
}
}

public class DashToPlus : SingleNoteLyricEdit {
public override string Name => "pianoroll.menu.lyrics.dashtoplus";
protected override string Transform(string lyric) {
Expand Down
22 changes: 12 additions & 10 deletions OpenUtau.Core/OpenUtau.Core.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -10,32 +10,34 @@
<ItemGroup>
<PackageReference Include="BunLabs.NAudio.Flac" Version="2.0.1" />
<PackageReference Include="Concentus.OggFile" Version="1.0.4" />
<PackageReference Include="K4os.Hash.xxHash" Version="1.0.7" />
<PackageReference Include="Melanchall.DryWetMidi" Version="6.1.2" />
<PackageReference Include="Microsoft.ML.OnnxRuntime" Version="1.12.1" />
<PackageReference Include="Microsoft.ML.OnnxRuntime.DirectML" Version="1.13.1" />
<PackageReference Include="K4os.Hash.xxHash" Version="1.0.8" />
<PackageReference Include="Melanchall.DryWetMidi" Version="6.1.4" />
<PackageReference Include="NAudio.Core" Version="2.0.0" />
<PackageReference Include="NAudio.Midi" Version="2.0.1" />
<PackageReference Include="NAudio.Vorbis" Version="1.5.0" />
<PackageReference Include="Newtonsoft.Json" Version="13.0.1" />
<PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
<PackageReference Include="NLayer.NAudioSupport" Version="1.3.0" />
<PackageReference Include="NumSharp" Version="0.30.0" />
<PackageReference Include="NWaves" Version="0.9.6" />
<PackageReference Include="Serilog" Version="2.11.0" />
<PackageReference Include="SharpCompress" Version="0.32.2" />
<PackageReference Include="Serilog" Version="2.12.0" />
<PackageReference Include="SharpCompress" Version="0.33.0" />
<PackageReference Include="System.Buffers" Version="4.5.1" />
<PackageReference Include="System.ComponentModel.Annotations" Version="5.0.0" />
<PackageReference Include="System.IO.Packaging" Version="6.0.0" />
<PackageReference Include="TinyPinyin.Net" Version="1.0.2" />
<PackageReference Include="ToolGood.Words.Pinyin" Version="3.1.0" />
<PackageReference Include="UTF.Unknown" Version="2.5.1" />
<PackageReference Include="Vortice.DXGI" Version="2.2.0" />
<PackageReference Include="Vortice.DXGI" Version="2.4.2" />
<PackageReference Include="WanaKana-net" Version="1.0.0" />
<PackageReference Include="YamlDotNet" Version="12.3.1" />
<PackageReference Include="NetMQ" Version="4.0.1.9" />
<PackageReference Include="YamlDotNet" Version="13.0.2" />
<PackageReference Include="NetMQ" Version="4.0.1.11" />
</ItemGroup>
<ItemGroup Condition="'$([System.Runtime.InteropServices.RuntimeInformation]::IsOSPlatform($([System.Runtime.InteropServices.OSPlatform]::Windows)))' == 'true'">
<PackageReference Include="NAudio" Version="2.0.1" />
<PackageReference Include="Microsoft.ML.OnnxRuntime.DirectML" Version="1.14.1" />
</ItemGroup>
<ItemGroup Condition="'$([System.Runtime.InteropServices.RuntimeInformation]::IsOSPlatform($([System.Runtime.InteropServices.OSPlatform]::Windows)))' == 'false'">
<PackageReference Include="Microsoft.ML.OnnxRuntime" Version="1.14.1" />
</ItemGroup>
<ItemGroup>
<Compile Update="Analysis\Crepe\Resources.Designer.cs">
Expand Down
20 changes: 18 additions & 2 deletions OpenUtau.Core/Render/RenderPhrase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,14 @@ public class RenderPhone {
public readonly int tone;
public readonly int noteIndex;
public readonly double tempo;
public readonly UTempo[] tempos;

// classic args
public readonly double preutterMs;
public readonly double overlapMs;
public readonly double durCorrectionMs;
public readonly string resampler;
public readonly double adjustedTempo;
public readonly Tuple<string, int?>[] flags;
public readonly string suffix;
public readonly float volume;
Expand All @@ -78,11 +80,25 @@ internal RenderPhone(UProject project, UTrack track, UVoicePart part, UNote note

this.phoneme = phoneme.phoneme;
tone = note.tone;
tempo = project.timeAxis.GetBpmAtTick(part.position + phoneme.position);
tempos = project.timeAxis.TemposBetweenTicks(part.position + phoneme.position - leading, part.position + phoneme.End);
UTempo[] noteTempos = project.timeAxis.TemposBetweenTicks(part.position + phoneme.position, part.position + phoneme.End);
tempo = noteTempos[0].bpm;

double actualTickDuration = 0;
for (int i = 0; i < noteTempos.Length; i++) {
int tempoStart = Math.Max(part.position + phoneme.position, noteTempos[i].position);
int tempoEnd = i + 1 < noteTempos.Length ? noteTempos[i + 1].position : part.position + phoneme.End;
int tempoLength = tempoEnd - tempoStart;
actualTickDuration += (double)(tempoLength * (tempo / noteTempos[i].bpm));
}

adjustedTempo = duration / actualTickDuration * tempo;

preutterMs = phoneme.preutter;
overlapMs = phoneme.overlap;
durCorrectionMs = phoneme.preutter - phoneme.tailIntrude + phoneme.tailOverlap;


resampler = track.RendererSettings.resampler;
int eng = (int)phoneme.GetExpression(project, track, Format.Ustx.ENG).Item1;
if (project.expressions.TryGetValue(Format.Ustx.ENG, out var descriptor)
Expand All @@ -103,10 +119,10 @@ internal RenderPhone(UProject project, UTrack track, UVoicePart part, UNote note
oto = phoneme.oto;
hash = Hash();
}

private ulong Hash() {
using (var stream = new MemoryStream()) {
using (var writer = new BinaryWriter(stream)) {
writer.Write(adjustedTempo);
writer.Write(duration);
writer.Write(phoneme ?? string.Empty);
writer.Write(tone);
Expand Down
6 changes: 5 additions & 1 deletion OpenUtau.Core/SingerManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,20 @@ namespace OpenUtau.Core {
public class SingerManager : SingletonBase<SingerManager> {
public Dictionary<string, USinger> Singers { get; private set; } = new Dictionary<string, USinger>();
public Dictionary<USingerType, List<USinger>> SingerGroups { get; private set; } = new Dictionary<USingerType, List<USinger>>();
public Task? InitializationTask = null;

private readonly ConcurrentQueue<USinger> reloadQueue = new ConcurrentQueue<USinger>();
private CancellationTokenSource reloadCancellation;

public void Initialize() {
SearchAllSingers();
InitializationTask = Task.Run(() => {
SearchAllSingers();
});
}

public void SearchAllSingers() {
try {
Log.Information("Searching singers.");
Directory.CreateDirectory(PathManager.Inst.SingersPath);
var stopWatch = Stopwatch.StartNew();
var singers = ClassicSingerLoader.FindAllSingers()
Expand Down
8 changes: 8 additions & 0 deletions OpenUtau.Core/Util/MusicMath.cs
Original file line number Diff line number Diff line change
Expand Up @@ -189,5 +189,13 @@ public static void GetSnapUnit(
div *= 2;
}
}

public static double TempoMsToTick(double tempo, double ms) {
return (tempo * 480 * ms) / (60.0 * 1000.0);
}

public static double TempoTickToMs(double tempo, int tick) {
return (60.0 * 1000.0 * tick) / (tempo * 480);
}
}
}
8 changes: 8 additions & 0 deletions OpenUtau.Core/Util/TimeAxis.cs
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,14 @@ public void NextBarBeat(int bar, int beat, out int nextBar, out int nextBeat) {
}
}

public UTempo[] TemposBetweenTicks(int start, int end) {
var list = tempoSegments
.Where(tempo => start < tempo.tickEnd && tempo.tickPos < end)
.Select(tempo => new UTempo { position = tempo.tickPos, bpm = tempo.bpm })
.ToArray();
return list;
}

public UTimeSignature TimeSignatureAtTick(int tick) {
var segment = timeSigSegments.First(seg => seg.tickPos == tick || seg.tickEnd > tick); // TODO: optimize
return new UTimeSignature {
Expand Down
4 changes: 2 additions & 2 deletions OpenUtau.Core/Vogen/VogenRenderer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ float[] InvokeVogen(RenderPhrase phrase) {
new DenseTensor<string>(phonemes.ToArray(), new int[] { phonemes.Count })));
inputs.Add(NamedOnnxValue.CreateFromTensor("phDurs",
new DenseTensor<long>(phDurs.ToArray(), new int[] { phonemes.Count })));
using (var session = new InferenceSession(Data.VogenRes.f0_man)) {
using (var session = Onnx.getInferenceSession(Data.VogenRes.f0_man)) {
using var outputs = session.Run(inputs);
var f0Out = outputs.First().AsTensor<float>();
var f0Path = Path.Join(PathManager.Inst.CachePath, $"vog-{phrase.hash:x16}-f0.npy");
Expand All @@ -169,7 +169,7 @@ float[] InvokeVogen(RenderPhrase phrase) {
new DenseTensor<float>(breAmp, new int[] { 1, f0.Length })));
double[,] sp;
double[,] ap;
using (var session = new InferenceSession(singer.model)) {
using (var session = Onnx.getInferenceSession(singer.model)) {
using var outputs = session.Run(inputs);
var mgc = outputs.First().AsTensor<float>().Select(f => (double)f).ToArray();
var bap = outputs.Last().AsTensor<float>().Select(f => (double)f).ToArray();
Expand Down
2 changes: 1 addition & 1 deletion OpenUtau.Plugin.Builtin/JapaneseCVVCPhonemizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ public class JapaneseCVVCPhonemizer : Phonemizer {
"s","sh","z","j","t","ch","ty","ts",
"d","dy","n","ny","h","hy","f","b",
"by","p","py","m","my","y","r","4",
"ry","w","v","ng","l","・",
"ry","w","v","ng","l","・","B", "H",
};

static readonly string[] vowels = new string[] {
Expand Down
Loading

0 comments on commit 6224ab8

Please sign in to comment.