diff --git a/Assets/uLipSync/Editor/ProfileEditor.cs b/Assets/uLipSync/Editor/ProfileEditor.cs
index 04a09d9..949e7f9 100644
--- a/Assets/uLipSync/Editor/ProfileEditor.cs
+++ b/Assets/uLipSync/Editor/ProfileEditor.cs
@@ -5,6 +5,7 @@
using System.Collections.Generic;
using System.Text;
using uLipSync.Debugging;
+using System;
namespace uLipSync
{
@@ -36,11 +37,11 @@ public void Draw(bool showCalibration)
if (EditorUtil.SimpleFoldout("MFCC", true, "-uLipSync-Profile"))
{
EditorGUI.BeginChangeCheck();
-
+
++EditorGUI.indentLevel;
DrawMfccReorderableList(showCalibration);
--EditorGUI.indentLevel;
-
+
if (EditorGUI.EndChangeCheck())
{
EditorUtility.SetDirty(target);
@@ -62,6 +63,20 @@ public void Draw(bool showCalibration)
profile.UpdateMeansAndStandardization();
EditorUtility.SetDirty(target);
}
+ GUI.enabled = profile.mfccs.Count == 0;
+ if (profile.mfccs.Count != 0)
+ {
+ EditorGUILayout.HelpBox("Can't change delta setting when mfcc data exist." +
+ Environment.NewLine + "Create a new profile or delete the mfcc data", MessageType.Warning);
+ }
+ bool useDelta = EditorGUILayout.Toggle("Use Delta", profile.useDelta);
+ if (useDelta != profile.useDelta)
+ {
+ Undo.RecordObject(target, "Change Use Delta");
+ profile.useDelta = useDelta;
+ EditorUtility.SetDirty(target);
+ }
+ GUI.enabled = true;
EditorUtil.DrawProperty(serializedObject, nameof(profile.compareMethod));
profile.mfccDataCount = Mathf.Clamp(profile.mfccDataCount, 1, 256);
profile.melFilterBankChannels = Mathf.Clamp(profile.melFilterBankChannels, 12, 256);
@@ -118,7 +133,7 @@ void DrawMfccReorderableList(bool showCalibration)
if (_reorderableList == null)
{
_reorderableList = new ReorderableList(profile.mfccs, typeof(MfccData));
- _reorderableList.drawHeaderCallback = rect =>
+ _reorderableList.drawHeaderCallback = rect =>
{
rect.xMin -= EditorGUI.indentLevel * 12f;
EditorGUI.LabelField(rect, "MFCCs");
@@ -182,7 +197,7 @@ void DrawMFCC(Rect position, int index, bool showCalibration)
if (!_texturePool.TryGetValue(data, out Texture2D tex)) tex = null;
tex = TextureCreator.CreateMfccTexture(tex, data, Common.MfccMinValue, Common.MfccMaxValue);
_texturePool[data] = tex;
-
+
var area = EditorGUI.IndentedRect(mfccPos);
area.height = data.mfccCalibrationDataList.Count * 3f;
GUI.DrawTexture(area, tex, ScaleMode.StretchToFill);
diff --git a/Assets/uLipSync/Editor/uLipSyncAnimatorEditor.cs b/Assets/uLipSync/Editor/uLipSyncAnimatorEditor.cs
index 23b5e4d..7a34376 100644
--- a/Assets/uLipSync/Editor/uLipSyncAnimatorEditor.cs
+++ b/Assets/uLipSync/Editor/uLipSyncAnimatorEditor.cs
@@ -44,13 +44,13 @@ public override void OnInspectorGUI()
if (EditorUtil.Foldout("Animator Controller Parameters", true))
{
++EditorGUI.indentLevel;
- if (anim.animator != null)
- {
- DrawAnimatorReorderableList();
+ if (anim.animator != null && anim.animator.isActiveAndEnabled)
+ {
+ DrawAnimatorReorderableList();
}
else
- {
- EditorGUILayout.HelpBox("Animator is not available.", MessageType.Warning);
+ {
+ EditorGUILayout.HelpBox("Animator is not available! To edit parameters open the prefab or have game object in scene.", MessageType.Warning);
}
--EditorGUI.indentLevel;
EditorGUILayout.Separator();
@@ -246,6 +246,7 @@ protected void DrawParameters()
EditorGUILayout.EndHorizontal();
EditorUtil.DrawProperty(serializedObject, nameof(anim.smoothness));
+ EditorUtil.DrawProperty(serializedObject, nameof(anim.minimalValueThreshold));
}
}
diff --git a/Assets/uLipSync/Runtime/Core/Algorithm.cs b/Assets/uLipSync/Runtime/Core/Algorithm.cs
index 487051c..2e094b6 100644
--- a/Assets/uLipSync/Runtime/Core/Algorithm.cs
+++ b/Assets/uLipSync/Runtime/Core/Algorithm.cs
@@ -2,18 +2,24 @@
using Unity.Mathematics;
using Unity.Burst;
using Unity.Collections.LowLevel.Unsafe;
+using System.Runtime.CompilerServices;
namespace uLipSync
{
-[BurstCompile]
+[BurstCompile(FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)]
public static unsafe class Algorithm
{
+ ///
+ /// Get the maximum value of the array.
+ ///
+ /// Array to get max from.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float GetMaxValue(in NativeArray array)
{
return GetMaxValue((float*)array.GetUnsafeReadOnlyPtr(), array.Length);
}
-
+
[BurstCompile]
static float GetMaxValue(float* array, int len)
{
@@ -25,6 +31,10 @@ static float GetMaxValue(float* array, int len)
return max;
}
+ ///
+ /// Get RMS volume.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float GetRMSVolume(in NativeArray array)
{
return GetRMSVolume((float*)array.GetUnsafeReadOnlyPtr(), array.Length);
@@ -41,13 +51,20 @@ static float GetRMSVolume(float *array, int len)
return math.sqrt(average / len);
}
+ ///
+ /// Copy ring buffer, startSrcIndex is the index of the oldest data.
+ ///
+ /// Source buffer.
+ /// Destination buffer. This is a temporary buffer and needs to be disposed.
+ /// Index of the oldest data.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void CopyRingBuffer(in NativeArray input, out NativeArray output, int startSrcIndex)
{
output = new NativeArray(input.Length, Allocator.Temp);
CopyRingBuffer(
- (float*)input.GetUnsafeReadOnlyPtr(),
- (float*)output.GetUnsafePtr(),
- input.Length,
+ (float*)input.GetUnsafeReadOnlyPtr(),
+ (float*)output.GetUnsafePtr(),
+ input.Length,
startSrcIndex);
}
@@ -60,6 +77,12 @@ static void CopyRingBuffer(float* input, float* output, int len, int startSrcInd
}
}
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ ///
+ /// Normalize array to the specified value.
+ ///
+ /// Array to normalize.
+ /// Value to use for normalization.
public static void Normalize(ref NativeArray array, float value = 1f)
{
Normalize((float*)array.GetUnsafePtr(), array.Length, value);
@@ -77,6 +100,14 @@ static void Normalize(float* array, int len, float value = 1f)
}
}
+ ///
+ /// Low-pass filter, cutoff is normalized by sample rate.
+ ///
+ /// Data.
+ /// Sample rate.
+ /// Cutoff frequency.
+ /// Range of cutoff frequency.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void LowPassFilter(ref NativeArray data, float sampleRate, float cutoff, float range)
{
cutoff = (cutoff - range) / sampleRate;
@@ -122,6 +153,14 @@ static void LowPassFilter(float* data, int len, float cutoff, float* tmp, float*
}
}
+ ///
+ /// Down sample the specified input.
+ ///
+ /// Input.
+ /// Output. This is a temporary buffer and needs to be disposed.
+ /// Sample rate.
+ /// Target sample rate.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void DownSample(in NativeArray input, out NativeArray output, int sampleRate, int targetSampleRate)
{
if (sampleRate <= targetSampleRate)
@@ -133,8 +172,8 @@ public static void DownSample(in NativeArray input, out NativeArray(input.Length / skip, Allocator.Temp);
DownSample1(
- (float*)input.GetUnsafeReadOnlyPtr(),
- (float*)output.GetUnsafePtr(),
+ (float*)input.GetUnsafeReadOnlyPtr(),
+ (float*)output.GetUnsafePtr(),
output.Length,
skip);
}
@@ -144,9 +183,9 @@ public static void DownSample(in NativeArray input, out NativeArray(n, Allocator.Temp);
DownSample2(
- (float*)input.GetUnsafeReadOnlyPtr(),
+ (float*)input.GetUnsafeReadOnlyPtr(),
input.Length,
- (float*)output.GetUnsafePtr(),
+ (float*)output.GetUnsafePtr(),
output.Length,
df);
}
@@ -176,6 +215,10 @@ static void DownSample2(float* input, int inputLen, float* output, int outputLen
}
}
+ ///
+ /// Pre-emphasis, which is a high-pass filter
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void PreEmphasis(ref NativeArray data, float p)
{
var tmp = new NativeArray(data, Allocator.Temp);
@@ -196,6 +239,7 @@ static void PreEmphasis(float* data, float* tmp, int len, float p)
}
}
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void HammingWindow(ref NativeArray array)
{
HammingWindow((float*)array.GetUnsafePtr(), array.Length);
@@ -211,7 +255,12 @@ static void HammingWindow(float* array, int len)
}
}
- public static void ZeroPadding(ref NativeArray data, out NativeArray dataWithPadding)
+ ///
+ /// Add zero padding to the begin and end of the data.
+ ///
+ /// Data.
+ /// Data with padding. This is a temporary buffer and needs to be disposed.
+ public static void ZeroPadding(ref NativeArray data, out NativeArray dataWithPadding)
{
int N = data.Length;
dataWithPadding = new NativeArray(N * 2, Allocator.Temp);
@@ -226,6 +275,12 @@ public static void ZeroPadding(ref NativeArray data, out NativeArray(), 0, sizeof(float) * slice1.Length);
}
+ ///
+ /// Fast Fourier transform.
+ ///
+ /// Data.
+ /// Spectrum. This is a temporary buffer and needs to be disposed.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void FFT(in NativeArray data, out NativeArray spectrum)
{
int N = data.Length;
@@ -298,8 +353,14 @@ static void _FFT(float* spectrumRe, float* spectrumIm, int N)
oddIm.Dispose();
}
+ ///
+ /// Convert frequency to mel frequency by subdividing the mel scale into melDiv parts.
+ ///
+ /// Spectrum.
+ /// Mel spectrum. This is a temporary buffer and needs to be disposed.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void MelFilterBank(
- in NativeArray spectrum,
+ in NativeArray spectrum,
out NativeArray melSpectrum,
float sampleRate,
int melDiv)
@@ -315,7 +376,7 @@ public static void MelFilterBank(
[BurstCompile]
static void MelFilterBank(
- float* spectrum,
+ float* spectrum,
float* melSpectrum,
int len,
float sampleRate,
@@ -345,8 +406,8 @@ static void MelFilterBank(
for (int i = iBegin + 1; i <= iEnd; ++i)
{
float f = df * i;
- float a = (i < iCenter) ?
- (f - fBegin) / (fCenter - fBegin) :
+ float a = (i < iCenter) ?
+ (f - fBegin) / (fCenter - fBegin) :
(fEnd - f) / (fEnd - fCenter);
a /= (fEnd - fBegin) * 0.5f;
sum += a * spectrum[i];
@@ -355,6 +416,10 @@ static void MelFilterBank(
}
}
+ ///
+ /// Convert power spectrum to decibel.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void PowerToDb(ref NativeArray array)
{
PowerToDb((float*)array.GetUnsafePtr(), array.Length);
@@ -383,13 +448,19 @@ static float ToHz(float mel, bool slaney = false)
return 700f * (math.exp(mel / a) - 1f);
}
+ ///
+ /// Discrete Cosine Transform.
+ ///
+ /// Spectrum.
+ /// Cepstrum. This is a temporary buffer and needs to be disposed.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void DCT(
in NativeArray spectrum,
out NativeArray cepstrum)
{
cepstrum = new NativeArray(spectrum.Length, Allocator.Temp);
DCT(
- (float*)spectrum.GetUnsafeReadOnlyPtr(),
+ (float*)spectrum.GetUnsafeReadOnlyPtr(),
(float*)cepstrum.GetUnsafePtr(),
spectrum.Length);
}
@@ -413,11 +484,62 @@ static void DCT(
}
}
+ ///
+ /// Calculate delta coefficients.
+ ///
+ /// MFCC buffer.
+ /// Delta coefficients. This is a temporary buffer and needs to be disposed.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static void CalculateDelta(
+ in NativeArray buffer,
+ out NativeArray delta,
+ int bufferSize = 3,
+ int numCoefficients = 12)
+ {
+ delta = new NativeArray(numCoefficients, Allocator.Temp);
+
+ CalculateDelta(
+ (float*)buffer.GetUnsafeReadOnlyPtr(),
+ (float*)delta.GetUnsafePtr(),
+ bufferSize,
+ numCoefficients);
+ }
+
+ [BurstCompile]
+ private static unsafe void CalculateDelta(
+ float* buffer,
+ float* delta,
+ int bufferSize,
+ int numCoefficients)
+ {
+ for (int i = 0; i < numCoefficients; i++)
+ {
+ // Calculate delta
+ float numerator = 0f;
+ float denominator = 0f;
+ int index = 0;
+
+ for (int j = 0; j < bufferSize-1; j++)
+ {
+ float frameMfcc = buffer[j * numCoefficients + i];
+
+ float weight = index * index;
+ numerator += weight * frameMfcc;
+ denominator += weight;
+ index++;
+ }
+
+ delta[i] = numerator / (2 * denominator);
+ }
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float Norm(in NativeArray array)
{
return Norm((float*)array.GetUnsafeReadOnlyPtr(), array.Length);
}
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float Norm(in NativeSlice slice)
{
return Norm((float*)slice.GetUnsafeReadOnlyPtr(), slice.Length);
diff --git a/Assets/uLipSync/Runtime/Core/LipSyncJob.cs b/Assets/uLipSync/Runtime/Core/LipSyncJob.cs
index 91e8897..adc4640 100644
--- a/Assets/uLipSync/Runtime/Core/LipSyncJob.cs
+++ b/Assets/uLipSync/Runtime/Core/LipSyncJob.cs
@@ -26,24 +26,29 @@ public struct Info
[ReadOnly] public NativeArray means;
[ReadOnly] public NativeArray standardDeviations;
[ReadOnly] public NativeArray phonemes;
+ [ReadOnly] public bool useDelta;
+ [ReadOnly] public NativeArray bufferMelCepOffset;
public NativeArray mfcc;
public NativeArray scores;
- public NativeArray info;
-
+ [WriteOnly] public NativeArray info;
+ public NativeArray bufferMelCep;
+
#if ULIPSYNC_DEBUG
- public NativeArray debugData;
- public NativeArray debugSpectrum;
- public NativeArray debugMelSpectrum;
- public NativeArray debugMelCepstrum;
+ [WriteOnly] public NativeArray debugData;
+ [WriteOnly] public NativeArray debugSpectrum;
+ [WriteOnly] public NativeArray debugMelSpectrum;
+ [WriteOnly] public NativeArray debugMelCepstrum;
#endif
int cutoff => targetSampleRate / 2;
int range => 500;
+ int bufferSize => 3;
+ int calcLength => (int)mfcc.Length/2; //Todo: calculate the total length mfcc divide by the number of delta's
+ // delta should be an enum instead of a bool for next version.
public void Execute()
{
float volume = Algorithm.GetRMSVolume(input);
-
Algorithm.CopyRingBuffer(input, out var buffer, startIndex);
Algorithm.LowPassFilter(ref buffer, outputSampleRate, cutoff, range);
Algorithm.DownSample(buffer, out var data, outputSampleRate, targetSampleRate);
@@ -55,10 +60,35 @@ public void Execute()
Algorithm.PowerToDb(ref melSpectrum);
Algorithm.DCT(melSpectrum, out var melCepstrum);
- for (int i = 1; i <= mfcc.Length; ++i)
- {
- mfcc[i - 1] = melCepstrum[i];
- }
+ if (useDelta)
+ {
+ // Fill the first slot of buffer with current melCepstrum
+ bufferMelCep.Slice(bufferMelCepOffset[0], calcLength).CopyFrom(melCepstrum.Slice(0, calcLength));
+
+ // Calculate delta
+ Algorithm.CalculateDelta(bufferMelCep, out var deltaMelCepstrum);
+
+ // Move the buffer values up one slot
+ NativeArray tempBuffer = new NativeArray(calcLength, Allocator.Temp);
+ for (int j = bufferSize - 1; j > 0; j--)
+ {
+ int srcOffset = bufferMelCepOffset[j - 1];
+ int dstOffset = bufferMelCepOffset[j];
+ NativeArray.Copy(bufferMelCep, srcOffset, tempBuffer, 0, calcLength);
+ NativeArray.Copy(tempBuffer, 0, bufferMelCep, dstOffset, calcLength);
+ }
+
+ // Copy the cepstrum and delta to mfcc
+ NativeArray.Copy(melCepstrum, 1, mfcc, 0, calcLength);
+ NativeArray.Copy(deltaMelCepstrum, 0, mfcc, calcLength, calcLength);
+
+ deltaMelCepstrum.Dispose();
+ tempBuffer.Dispose();
+ }
+ else
+ {
+ NativeArray.Copy(melCepstrum, 1, mfcc, 0, mfcc.Length);
+ }
CalcScores();
@@ -67,7 +97,7 @@ public void Execute()
volume = volume,
mainPhonemeIndex = GetVowel(),
};
-
+
#if ULIPSYNC_DEBUG
data.CopyTo(debugData);
spectrum.CopyTo(debugSpectrum);
@@ -82,17 +112,23 @@ public void Execute()
melCepstrum.Dispose();
}
+ ///
+ /// Calculates the scores of each phoneme. The scores can be calculated by the following methods.
+ /// - L1 Norm (Manhattan Distance)
+ /// - L2 Norm (Euclidean Distance)
+ /// - Cosine Similarity (Cosine Distance)
+ ///
void CalcScores()
{
float sum = 0f;
-
+
for (int i = 0; i < scores.Length; ++i)
{
float score = CalcScore(i);
scores[i] = score;
sum += score;
}
-
+
for (int i = 0; i < scores.Length; ++i)
{
scores[i] = sum > 0 ? scores[i] / sum : 0f;
@@ -117,7 +153,7 @@ float CalcL1NormScore(int index)
{
int n = mfcc.Length;
var phoneme = new NativeSlice(phonemes, index * n, n);
-
+
var distance = 0f;
for (int i = 0; i < n; ++i)
{
@@ -134,7 +170,7 @@ float CalcL2NormScore(int index)
{
int n = mfcc.Length;
var phoneme = new NativeSlice(phonemes, index * n, n);
-
+
var distance = 0f;
for (int i = 0; i < n; ++i)
{
@@ -153,7 +189,7 @@ float CalcCosineSimilarityScore(int index)
var phoneme = new NativeSlice(phonemes, index * n, n);
float mfccNorm = 0f;
float phonemeNorm = 0f;
-
+
float prod = 0f;
for (int i = 0; i < n; ++i)
{
@@ -171,6 +207,9 @@ float CalcCosineSimilarityScore(int index)
return math.pow(similarity, 100f);
}
+ ///
+ /// Gets the index of the phoneme with the highest score.
+ ///
int GetVowel()
{
int index = -1;
diff --git a/Assets/uLipSync/Runtime/Core/Profile.cs b/Assets/uLipSync/Runtime/Core/Profile.cs
index c415467..ff03882 100644
--- a/Assets/uLipSync/Runtime/Core/Profile.cs
+++ b/Assets/uLipSync/Runtime/Core/Profile.cs
@@ -36,11 +36,11 @@ public MfccData(string name)
Deallocate();
}
- public void Allocate()
+ public void Allocate(int arraySize)
{
if (IsAllocated()) return;
- mfccNativeArray = new NativeArray(12, Allocator.Persistent);
+ mfccNativeArray = new NativeArray(arraySize, Allocator.Persistent);
}
public void Deallocate()
@@ -57,10 +57,10 @@ bool IsAllocated()
public void AddCalibrationData(float[] mfcc)
{
- if (mfcc.Length != 12)
+ if (mfcc.Length != 12 && mfcc.Length != 24)
{
- Debug.LogError("The length of MFCC array should be 12.");
- return;
+ Debug.LogError("The length of MFCC array should be 12. When using delta it should be 24.");
+ return;
}
mfccCalibrationDataList.Add(new MfccCalibrationData() { array = mfcc });
}
@@ -69,12 +69,12 @@ public void RemoveOldCalibrationData(int dataCount)
{
while (mfccCalibrationDataList.Count > dataCount) mfccCalibrationDataList.RemoveAt(0);
}
-
+
public void UpdateNativeArray()
{
if (mfccCalibrationDataList.Count == 0) return;
- for (int i = 0; i < 12; ++i)
+ for (int i = 0; i < mfccNativeArray.Length; ++i)
{
mfccNativeArray[i] = 0f;
foreach (var mfcc in mfccCalibrationDataList)
@@ -97,7 +97,7 @@ public class Profile : ScriptableObject
[HideInInspector] public string jsonPath = "";
[Tooltip("The number of MFCC")]
- public int mfccNum = 12;
+ public int mfccNum => useDelta ? 24 : 12;
[Tooltip("The number of MFCC data to calculate the average MFCC values")]
public int mfccDataCount = 16;
[Tooltip("The number of Mel Filter Bank channels")]
@@ -106,25 +106,30 @@ public class Profile : ScriptableObject
public int targetSampleRate = 16000;
[Tooltip("Number of audio samples after downsampling is applied")]
public int sampleCount = 1024;
- [Tooltip("Whether to perform standardization of each coefficient of MFCC")]
+ [Tooltip("Whether to perform standardization of each coefficient of MFCC. This normalization ensures that the coefficient values are centered around zero (zero mean) and have a spread of one (unit variance).")]
public bool useStandardization = false;
+ [Tooltip("Add delta of MFCC's for even better accuracy")]
+ public bool useDelta = false;
[Tooltip("The comparison method for MFCC")]
public CompareMethod compareMethod = CompareMethod.L2Norm;
+ // hide from inspector
+ [HideInInspector] public int arraySize = 12;
public List mfccs = new List();
-
+
float[] _means = new float[12];
float[] _stdDevs = new float[12];
- public float[] means => _means;
- public float[] standardDeviation => _stdDevs;
-
+ public float[] means => _means;
+ public float[] standardDeviation => _stdDevs;
+
void OnEnable()
{
+ arraySize = GetArraySizeMfcc();
UpdateMeansAndStandardization();
foreach (var data in mfccs)
{
- data.Allocate();
+ data.Allocate(mfccNum);
data.RemoveOldCalibrationData(mfccDataCount);
data.UpdateNativeArray();
}
@@ -138,20 +143,37 @@ void OnDisable()
}
}
+ public int GetArraySizeMfcc()
+ {
+ if (useDelta)
+ {
+ arraySize = 24;
+ _means = new float[24];
+ _stdDevs = new float[24];
+ }
+ else
+ {
+ arraySize = 12;
+ _means = new float[12];
+ _stdDevs = new float[12];
+ }
+ return arraySize;
+ }
+
public string GetPhoneme(int index)
{
if (index < 0 || index >= mfccs.Count) return "";
-
+
return mfccs[index].name;
}
public void AddMfcc(string name)
{
var data = new MfccData(name);
- data.Allocate();
+ data.Allocate(mfccNum);
for (int i = 0; i < mfccDataCount; ++i)
{
- data.AddCalibrationData(new float[12]);
+ data.AddCalibrationData(new float[mfccNum]);
}
mfccs.Add(data);
}
@@ -159,11 +181,11 @@ public void AddMfcc(string name)
public void RemoveMfcc(int index)
{
if (index < 0 || index >= mfccs.Count) return;
-
+
var data = mfccs[index];
data.Deallocate();
mfccs.RemoveAt(index);
-
+
UpdateMeansAndStandardization();
}
@@ -237,7 +259,7 @@ public void UpdateMeansAndStandardization()
UpdateMeans();
UpdateStandardizations();
}
-
+
void UpdateMeans()
{
for (int i = 0; i < _means.Length; ++i)
@@ -282,7 +304,7 @@ void UpdateStandardizations()
{
_stdDevs[i] = 0f;
}
-
+
int n = 0;
foreach (var mfccData in mfccs)
{
@@ -296,7 +318,7 @@ void UpdateStandardizations()
++n;
}
}
-
+
for (int i = 0; i < _stdDevs.Length; ++i)
{
_stdDevs[i] = math.sqrt(_stdDevs[i] / n);
diff --git a/Assets/uLipSync/Runtime/uLipSync.cs b/Assets/uLipSync/Runtime/uLipSync.cs
index 94e8d28..5518752 100644
--- a/Assets/uLipSync/Runtime/uLipSync.cs
+++ b/Assets/uLipSync/Runtime/uLipSync.cs
@@ -21,6 +21,9 @@ public class uLipSync : MonoBehaviour
bool _allocated = false;
int _index = 0;
bool _isDataReceived = false;
+ int bufferSize = 3;
+
+ int halfMfcc => mfccNum / 2;
NativeArray _rawInputData;
NativeArray _inputData;
@@ -31,12 +34,14 @@ public class uLipSync : MonoBehaviour
NativeArray _phonemes;
NativeArray _scores;
NativeArray _info;
+ NativeArray _bufferMelCep;
+ NativeArray _bufferMelCepOffset;
List _requestedCalibrationVowels = new List();
Dictionary _ratios = new Dictionary();
public NativeArray mfcc => _mfccForOther;
public LipSyncInfo result { get; private set; } = new LipSyncInfo();
-
+
#if ULIPSYNC_DEBUG
NativeArray _debugData;
NativeArray _debugSpectrum;
@@ -54,14 +59,14 @@ public class uLipSync : MonoBehaviour
int inputSampleCount
{
- get
- {
+ get
+ {
if (!profile) return AudioSettings.outputSampleRate;
float r = (float)AudioSettings.outputSampleRate / profile.targetSampleRate;
return Mathf.CeilToInt(profile.sampleCount * r);
}
}
-
+
int mfccNum => profile ? profile.mfccNum : 12;
void Awake()
@@ -99,25 +104,32 @@ void AllocateBuffers()
{
if (_allocated)
{
+ _jobHandle.Complete();
DisposeBuffers();
}
_allocated = true;
- _jobHandle.Complete();
lock (_lockObject)
{
int n = inputSampleCount;
int phonemeCount = profile ? profile.mfccs.Count : 1;
_rawInputData = new NativeArray(n, Allocator.Persistent);
- _inputData = new NativeArray(n, Allocator.Persistent);
- _mfcc = new NativeArray(mfccNum, Allocator.Persistent);
- _mfccForOther = new NativeArray(mfccNum, Allocator.Persistent);
- _means = new NativeArray(mfccNum, Allocator.Persistent);
- _standardDeviations = new NativeArray(mfccNum, Allocator.Persistent);
+ _inputData = new NativeArray(n, Allocator.Persistent);
+ _mfcc = new NativeArray(mfccNum, Allocator.Persistent);
+ _mfccForOther = new NativeArray(mfccNum, Allocator.Persistent);
+ _means = new NativeArray(mfccNum, Allocator.Persistent);
+ _standardDeviations = new NativeArray(mfccNum, Allocator.Persistent);
_scores = new NativeArray(phonemeCount, Allocator.Persistent);
_phonemes = new NativeArray(mfccNum * phonemeCount, Allocator.Persistent);
_info = new NativeArray(1, Allocator.Persistent);
+ _bufferMelCep = new NativeArray(bufferSize * halfMfcc, Allocator.Persistent);
+ _bufferMelCepOffset = new NativeArray(bufferSize, Allocator.Persistent)
+ {
+ [0] = 0,
+ [1] = halfMfcc,
+ [2] = mfccNum
+ };
#if ULIPSYNC_DEBUG
_debugData = new NativeArray(profile.sampleCount, Allocator.Persistent);
_debugDataForOther = new NativeArray(profile.sampleCount, Allocator.Persistent);
@@ -149,6 +161,8 @@ void DisposeBuffers()
_scores.Dispose();
_phonemes.Dispose();
_info.Dispose();
+ _bufferMelCep.Dispose();
+ _bufferMelCepOffset.Dispose();
#if ULIPSYNC_DEBUG
_debugData.Dispose();
_debugDataForOther.Dispose();
@@ -189,7 +203,7 @@ void UpdateResult()
_debugMelSpectrumForOther.CopyFrom(_debugMelSpectrum);
_debugMelCepstrumForOther.CopyFrom(_debugMelCepstrum);
#endif
-
+
int index = _info[0].mainPhonemeIndex;
string mainPhoneme = profile.GetPhoneme(index);
@@ -241,7 +255,7 @@ void UpdatePhonemes()
{
foreach (var value in data.mfccNativeArray)
{
- if (index >= _phonemes.Length) break;
+ if (index >= _phonemes.Length) return;
_phonemes[index++] = value;
}
}
@@ -270,11 +284,14 @@ void ScheduleJob()
melFilterBankChannels = profile.melFilterBankChannels,
means = _means,
standardDeviations = _standardDeviations,
+ useDelta = profile.useDelta,
mfcc = _mfcc,
phonemes = _phonemes,
compareMethod = profile.compareMethod,
scores = _scores,
info = _info,
+ bufferMelCep = _bufferMelCep,
+ bufferMelCepOffset = _bufferMelCepOffset,
#if ULIPSYNC_DEBUG
debugData = _debugData,
debugSpectrum = _debugSpectrum,
@@ -328,7 +345,7 @@ public void OnDataReceived(float[] input, int channels)
{
int n = _rawInputData.Length;
_index = _index % n;
- for (int i = 0; i < input.Length; i += channels)
+ for (int i = 0; i < input.Length; i += channels)
{
_rawInputData[_index++ % n] = input[i];
}
@@ -337,7 +354,7 @@ public void OnDataReceived(float[] input, int channels)
if (math.abs(outputSoundGain - 1f) > math.EPSILON)
{
int n = input.Length;
- for (int i = 0; i < n; ++i)
+ for (int i = 0; i < n; ++i)
{
input[i] *= outputSoundGain;
}
diff --git a/Assets/uLipSync/Runtime/uLipSyncAnimator.cs b/Assets/uLipSync/Runtime/uLipSyncAnimator.cs
index 95c919b..2330524 100644
--- a/Assets/uLipSync/Runtime/uLipSyncAnimator.cs
+++ b/Assets/uLipSync/Runtime/uLipSyncAnimator.cs
@@ -26,6 +26,7 @@ public class AnimatorInfo
public float minVolume = -2.5f;
public float maxVolume = -1.5f;
[Range(0f, 0.3f)] public float smoothness = 0.05f;
+ [Range(0.00001f, 0.01f)] public float minimalValueThreshold = 0.001f;
LipSyncInfo _info = new LipSyncInfo();
bool _lipSyncUpdated = false;
@@ -51,6 +52,14 @@ public void OnLipSyncUpdate(LipSyncInfo info)
}
}
+ void Awake()
+ {
+ foreach (AnimatorInfo par in parameters)
+ {
+ par.nameHash = Animator.StringToHash(par.name);
+ }
+ }
+
void Update()
{
if (updateMethod != UpdateMethod.LipSyncUpdateEvent)
@@ -80,9 +89,17 @@ void FixedUpdate()
}
}
- float SmoothDamp(float value, float target, ref float velocity)
+ float SmoothDamp(float value, float target, ref float velocity, float threshold)
{
- return Mathf.SmoothDamp(value, target, ref velocity, smoothness);
+ float smoothedValue = Mathf.SmoothDamp(value, target, ref velocity, smoothness);
+
+ // Check if the absolute value of the smoothedValue is below a threshold
+ if (Mathf.Abs(smoothedValue) < threshold)
+ {
+ smoothedValue = 0f; // Set it to zero
+ }
+
+ return smoothedValue;
}
void UpdateVolume()
@@ -94,7 +111,7 @@ void UpdateVolume()
normVol = (normVol - minVolume) / Mathf.Max(maxVolume - minVolume, 1e-4f);
normVol = Mathf.Clamp(normVol, 0f, 1f);
}
- _volume = SmoothDamp(_volume, normVol, ref _openCloseVelocity);
+ _volume = SmoothDamp(_volume, normVol, ref _openCloseVelocity, minimalValueThreshold);
}
void UpdateVowels()
@@ -110,7 +127,7 @@ void UpdateVowels()
ratios.TryGetValue(param.phoneme, out targetWeight);
}
float weightVel = param.weightVelocity;
- param.weight = SmoothDamp(param.weight, targetWeight, ref weightVel);
+ param.weight = SmoothDamp(param.weight, targetWeight, ref weightVel, minimalValueThreshold);
param.weightVelocity = weightVel;
sum += param.weight;
}
@@ -149,4 +166,4 @@ void OnApplyAnimator()
}
}
-}
\ No newline at end of file
+}