diff --git a/Assets/uLipSync/Editor/ProfileEditor.cs b/Assets/uLipSync/Editor/ProfileEditor.cs index 04a09d9..949e7f9 100644 --- a/Assets/uLipSync/Editor/ProfileEditor.cs +++ b/Assets/uLipSync/Editor/ProfileEditor.cs @@ -5,6 +5,7 @@ using System.Collections.Generic; using System.Text; using uLipSync.Debugging; +using System; namespace uLipSync { @@ -36,11 +37,11 @@ public void Draw(bool showCalibration) if (EditorUtil.SimpleFoldout("MFCC", true, "-uLipSync-Profile")) { EditorGUI.BeginChangeCheck(); - + ++EditorGUI.indentLevel; DrawMfccReorderableList(showCalibration); --EditorGUI.indentLevel; - + if (EditorGUI.EndChangeCheck()) { EditorUtility.SetDirty(target); @@ -62,6 +63,20 @@ public void Draw(bool showCalibration) profile.UpdateMeansAndStandardization(); EditorUtility.SetDirty(target); } + GUI.enabled = profile.mfccs.Count == 0; + if (profile.mfccs.Count != 0) + { + EditorGUILayout.HelpBox("Can't change delta setting when mfcc data exist." + + Environment.NewLine + "Create a new profile or delete the mfcc data", MessageType.Warning); + } + bool useDelta = EditorGUILayout.Toggle("Use Delta", profile.useDelta); + if (useDelta != profile.useDelta) + { + Undo.RecordObject(target, "Change Use Delta"); + profile.useDelta = useDelta; + EditorUtility.SetDirty(target); + } + GUI.enabled = true; EditorUtil.DrawProperty(serializedObject, nameof(profile.compareMethod)); profile.mfccDataCount = Mathf.Clamp(profile.mfccDataCount, 1, 256); profile.melFilterBankChannels = Mathf.Clamp(profile.melFilterBankChannels, 12, 256); @@ -118,7 +133,7 @@ void DrawMfccReorderableList(bool showCalibration) if (_reorderableList == null) { _reorderableList = new ReorderableList(profile.mfccs, typeof(MfccData)); - _reorderableList.drawHeaderCallback = rect => + _reorderableList.drawHeaderCallback = rect => { rect.xMin -= EditorGUI.indentLevel * 12f; EditorGUI.LabelField(rect, "MFCCs"); @@ -182,7 +197,7 @@ void DrawMFCC(Rect position, int index, bool showCalibration) if (!_texturePool.TryGetValue(data, out Texture2D tex)) tex = null; tex = TextureCreator.CreateMfccTexture(tex, data, Common.MfccMinValue, Common.MfccMaxValue); _texturePool[data] = tex; - + var area = EditorGUI.IndentedRect(mfccPos); area.height = data.mfccCalibrationDataList.Count * 3f; GUI.DrawTexture(area, tex, ScaleMode.StretchToFill); diff --git a/Assets/uLipSync/Editor/uLipSyncAnimatorEditor.cs b/Assets/uLipSync/Editor/uLipSyncAnimatorEditor.cs index 23b5e4d..7a34376 100644 --- a/Assets/uLipSync/Editor/uLipSyncAnimatorEditor.cs +++ b/Assets/uLipSync/Editor/uLipSyncAnimatorEditor.cs @@ -44,13 +44,13 @@ public override void OnInspectorGUI() if (EditorUtil.Foldout("Animator Controller Parameters", true)) { ++EditorGUI.indentLevel; - if (anim.animator != null) - { - DrawAnimatorReorderableList(); + if (anim.animator != null && anim.animator.isActiveAndEnabled) + { + DrawAnimatorReorderableList(); } else - { - EditorGUILayout.HelpBox("Animator is not available.", MessageType.Warning); + { + EditorGUILayout.HelpBox("Animator is not available! To edit parameters open the prefab or have game object in scene.", MessageType.Warning); } --EditorGUI.indentLevel; EditorGUILayout.Separator(); @@ -246,6 +246,7 @@ protected void DrawParameters() EditorGUILayout.EndHorizontal(); EditorUtil.DrawProperty(serializedObject, nameof(anim.smoothness)); + EditorUtil.DrawProperty(serializedObject, nameof(anim.minimalValueThreshold)); } } diff --git a/Assets/uLipSync/Runtime/Core/Algorithm.cs b/Assets/uLipSync/Runtime/Core/Algorithm.cs index 487051c..2e094b6 100644 --- a/Assets/uLipSync/Runtime/Core/Algorithm.cs +++ b/Assets/uLipSync/Runtime/Core/Algorithm.cs @@ -2,18 +2,24 @@ using Unity.Mathematics; using Unity.Burst; using Unity.Collections.LowLevel.Unsafe; +using System.Runtime.CompilerServices; namespace uLipSync { -[BurstCompile] +[BurstCompile(FloatMode = FloatMode.Fast, FloatPrecision = FloatPrecision.Low)] public static unsafe class Algorithm { + /// + /// Get the maximum value of the array. + /// + /// Array to get max from. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static float GetMaxValue(in NativeArray array) { return GetMaxValue((float*)array.GetUnsafeReadOnlyPtr(), array.Length); } - + [BurstCompile] static float GetMaxValue(float* array, int len) { @@ -25,6 +31,10 @@ static float GetMaxValue(float* array, int len) return max; } + /// + /// Get RMS volume. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static float GetRMSVolume(in NativeArray array) { return GetRMSVolume((float*)array.GetUnsafeReadOnlyPtr(), array.Length); @@ -41,13 +51,20 @@ static float GetRMSVolume(float *array, int len) return math.sqrt(average / len); } + /// + /// Copy ring buffer, startSrcIndex is the index of the oldest data. + /// + /// Source buffer. + /// Destination buffer. This is a temporary buffer and needs to be disposed. + /// Index of the oldest data. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void CopyRingBuffer(in NativeArray input, out NativeArray output, int startSrcIndex) { output = new NativeArray(input.Length, Allocator.Temp); CopyRingBuffer( - (float*)input.GetUnsafeReadOnlyPtr(), - (float*)output.GetUnsafePtr(), - input.Length, + (float*)input.GetUnsafeReadOnlyPtr(), + (float*)output.GetUnsafePtr(), + input.Length, startSrcIndex); } @@ -60,6 +77,12 @@ static void CopyRingBuffer(float* input, float* output, int len, int startSrcInd } } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + /// + /// Normalize array to the specified value. + /// + /// Array to normalize. + /// Value to use for normalization. public static void Normalize(ref NativeArray array, float value = 1f) { Normalize((float*)array.GetUnsafePtr(), array.Length, value); @@ -77,6 +100,14 @@ static void Normalize(float* array, int len, float value = 1f) } } + /// + /// Low-pass filter, cutoff is normalized by sample rate. + /// + /// Data. + /// Sample rate. + /// Cutoff frequency. + /// Range of cutoff frequency. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void LowPassFilter(ref NativeArray data, float sampleRate, float cutoff, float range) { cutoff = (cutoff - range) / sampleRate; @@ -122,6 +153,14 @@ static void LowPassFilter(float* data, int len, float cutoff, float* tmp, float* } } + /// + /// Down sample the specified input. + /// + /// Input. + /// Output. This is a temporary buffer and needs to be disposed. + /// Sample rate. + /// Target sample rate. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void DownSample(in NativeArray input, out NativeArray output, int sampleRate, int targetSampleRate) { if (sampleRate <= targetSampleRate) @@ -133,8 +172,8 @@ public static void DownSample(in NativeArray input, out NativeArray(input.Length / skip, Allocator.Temp); DownSample1( - (float*)input.GetUnsafeReadOnlyPtr(), - (float*)output.GetUnsafePtr(), + (float*)input.GetUnsafeReadOnlyPtr(), + (float*)output.GetUnsafePtr(), output.Length, skip); } @@ -144,9 +183,9 @@ public static void DownSample(in NativeArray input, out NativeArray(n, Allocator.Temp); DownSample2( - (float*)input.GetUnsafeReadOnlyPtr(), + (float*)input.GetUnsafeReadOnlyPtr(), input.Length, - (float*)output.GetUnsafePtr(), + (float*)output.GetUnsafePtr(), output.Length, df); } @@ -176,6 +215,10 @@ static void DownSample2(float* input, int inputLen, float* output, int outputLen } } + /// + /// Pre-emphasis, which is a high-pass filter + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void PreEmphasis(ref NativeArray data, float p) { var tmp = new NativeArray(data, Allocator.Temp); @@ -196,6 +239,7 @@ static void PreEmphasis(float* data, float* tmp, int len, float p) } } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void HammingWindow(ref NativeArray array) { HammingWindow((float*)array.GetUnsafePtr(), array.Length); @@ -211,7 +255,12 @@ static void HammingWindow(float* array, int len) } } - public static void ZeroPadding(ref NativeArray data, out NativeArray dataWithPadding) + /// + /// Add zero padding to the begin and end of the data. + /// + /// Data. + /// Data with padding. This is a temporary buffer and needs to be disposed. + public static void ZeroPadding(ref NativeArray data, out NativeArray dataWithPadding) { int N = data.Length; dataWithPadding = new NativeArray(N * 2, Allocator.Temp); @@ -226,6 +275,12 @@ public static void ZeroPadding(ref NativeArray data, out NativeArray(), 0, sizeof(float) * slice1.Length); } + /// + /// Fast Fourier transform. + /// + /// Data. + /// Spectrum. This is a temporary buffer and needs to be disposed. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void FFT(in NativeArray data, out NativeArray spectrum) { int N = data.Length; @@ -298,8 +353,14 @@ static void _FFT(float* spectrumRe, float* spectrumIm, int N) oddIm.Dispose(); } + /// + /// Convert frequency to mel frequency by subdividing the mel scale into melDiv parts. + /// + /// Spectrum. + /// Mel spectrum. This is a temporary buffer and needs to be disposed. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void MelFilterBank( - in NativeArray spectrum, + in NativeArray spectrum, out NativeArray melSpectrum, float sampleRate, int melDiv) @@ -315,7 +376,7 @@ public static void MelFilterBank( [BurstCompile] static void MelFilterBank( - float* spectrum, + float* spectrum, float* melSpectrum, int len, float sampleRate, @@ -345,8 +406,8 @@ static void MelFilterBank( for (int i = iBegin + 1; i <= iEnd; ++i) { float f = df * i; - float a = (i < iCenter) ? - (f - fBegin) / (fCenter - fBegin) : + float a = (i < iCenter) ? + (f - fBegin) / (fCenter - fBegin) : (fEnd - f) / (fEnd - fCenter); a /= (fEnd - fBegin) * 0.5f; sum += a * spectrum[i]; @@ -355,6 +416,10 @@ static void MelFilterBank( } } + /// + /// Convert power spectrum to decibel. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void PowerToDb(ref NativeArray array) { PowerToDb((float*)array.GetUnsafePtr(), array.Length); @@ -383,13 +448,19 @@ static float ToHz(float mel, bool slaney = false) return 700f * (math.exp(mel / a) - 1f); } + /// + /// Discrete Cosine Transform. + /// + /// Spectrum. + /// Cepstrum. This is a temporary buffer and needs to be disposed. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void DCT( in NativeArray spectrum, out NativeArray cepstrum) { cepstrum = new NativeArray(spectrum.Length, Allocator.Temp); DCT( - (float*)spectrum.GetUnsafeReadOnlyPtr(), + (float*)spectrum.GetUnsafeReadOnlyPtr(), (float*)cepstrum.GetUnsafePtr(), spectrum.Length); } @@ -413,11 +484,62 @@ static void DCT( } } + /// + /// Calculate delta coefficients. + /// + /// MFCC buffer. + /// Delta coefficients. This is a temporary buffer and needs to be disposed. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void CalculateDelta( + in NativeArray buffer, + out NativeArray delta, + int bufferSize = 3, + int numCoefficients = 12) + { + delta = new NativeArray(numCoefficients, Allocator.Temp); + + CalculateDelta( + (float*)buffer.GetUnsafeReadOnlyPtr(), + (float*)delta.GetUnsafePtr(), + bufferSize, + numCoefficients); + } + + [BurstCompile] + private static unsafe void CalculateDelta( + float* buffer, + float* delta, + int bufferSize, + int numCoefficients) + { + for (int i = 0; i < numCoefficients; i++) + { + // Calculate delta + float numerator = 0f; + float denominator = 0f; + int index = 0; + + for (int j = 0; j < bufferSize-1; j++) + { + float frameMfcc = buffer[j * numCoefficients + i]; + + float weight = index * index; + numerator += weight * frameMfcc; + denominator += weight; + index++; + } + + delta[i] = numerator / (2 * denominator); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static float Norm(in NativeArray array) { return Norm((float*)array.GetUnsafeReadOnlyPtr(), array.Length); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static float Norm(in NativeSlice slice) { return Norm((float*)slice.GetUnsafeReadOnlyPtr(), slice.Length); diff --git a/Assets/uLipSync/Runtime/Core/LipSyncJob.cs b/Assets/uLipSync/Runtime/Core/LipSyncJob.cs index 91e8897..adc4640 100644 --- a/Assets/uLipSync/Runtime/Core/LipSyncJob.cs +++ b/Assets/uLipSync/Runtime/Core/LipSyncJob.cs @@ -26,24 +26,29 @@ public struct Info [ReadOnly] public NativeArray means; [ReadOnly] public NativeArray standardDeviations; [ReadOnly] public NativeArray phonemes; + [ReadOnly] public bool useDelta; + [ReadOnly] public NativeArray bufferMelCepOffset; public NativeArray mfcc; public NativeArray scores; - public NativeArray info; - + [WriteOnly] public NativeArray info; + public NativeArray bufferMelCep; + #if ULIPSYNC_DEBUG - public NativeArray debugData; - public NativeArray debugSpectrum; - public NativeArray debugMelSpectrum; - public NativeArray debugMelCepstrum; + [WriteOnly] public NativeArray debugData; + [WriteOnly] public NativeArray debugSpectrum; + [WriteOnly] public NativeArray debugMelSpectrum; + [WriteOnly] public NativeArray debugMelCepstrum; #endif int cutoff => targetSampleRate / 2; int range => 500; + int bufferSize => 3; + int calcLength => (int)mfcc.Length/2; //Todo: calculate the total length mfcc divide by the number of delta's + // delta should be an enum instead of a bool for next version. public void Execute() { float volume = Algorithm.GetRMSVolume(input); - Algorithm.CopyRingBuffer(input, out var buffer, startIndex); Algorithm.LowPassFilter(ref buffer, outputSampleRate, cutoff, range); Algorithm.DownSample(buffer, out var data, outputSampleRate, targetSampleRate); @@ -55,10 +60,35 @@ public void Execute() Algorithm.PowerToDb(ref melSpectrum); Algorithm.DCT(melSpectrum, out var melCepstrum); - for (int i = 1; i <= mfcc.Length; ++i) - { - mfcc[i - 1] = melCepstrum[i]; - } + if (useDelta) + { + // Fill the first slot of buffer with current melCepstrum + bufferMelCep.Slice(bufferMelCepOffset[0], calcLength).CopyFrom(melCepstrum.Slice(0, calcLength)); + + // Calculate delta + Algorithm.CalculateDelta(bufferMelCep, out var deltaMelCepstrum); + + // Move the buffer values up one slot + NativeArray tempBuffer = new NativeArray(calcLength, Allocator.Temp); + for (int j = bufferSize - 1; j > 0; j--) + { + int srcOffset = bufferMelCepOffset[j - 1]; + int dstOffset = bufferMelCepOffset[j]; + NativeArray.Copy(bufferMelCep, srcOffset, tempBuffer, 0, calcLength); + NativeArray.Copy(tempBuffer, 0, bufferMelCep, dstOffset, calcLength); + } + + // Copy the cepstrum and delta to mfcc + NativeArray.Copy(melCepstrum, 1, mfcc, 0, calcLength); + NativeArray.Copy(deltaMelCepstrum, 0, mfcc, calcLength, calcLength); + + deltaMelCepstrum.Dispose(); + tempBuffer.Dispose(); + } + else + { + NativeArray.Copy(melCepstrum, 1, mfcc, 0, mfcc.Length); + } CalcScores(); @@ -67,7 +97,7 @@ public void Execute() volume = volume, mainPhonemeIndex = GetVowel(), }; - + #if ULIPSYNC_DEBUG data.CopyTo(debugData); spectrum.CopyTo(debugSpectrum); @@ -82,17 +112,23 @@ public void Execute() melCepstrum.Dispose(); } + /// + /// Calculates the scores of each phoneme. The scores can be calculated by the following methods. + /// - L1 Norm (Manhattan Distance) + /// - L2 Norm (Euclidean Distance) + /// - Cosine Similarity (Cosine Distance) + /// void CalcScores() { float sum = 0f; - + for (int i = 0; i < scores.Length; ++i) { float score = CalcScore(i); scores[i] = score; sum += score; } - + for (int i = 0; i < scores.Length; ++i) { scores[i] = sum > 0 ? scores[i] / sum : 0f; @@ -117,7 +153,7 @@ float CalcL1NormScore(int index) { int n = mfcc.Length; var phoneme = new NativeSlice(phonemes, index * n, n); - + var distance = 0f; for (int i = 0; i < n; ++i) { @@ -134,7 +170,7 @@ float CalcL2NormScore(int index) { int n = mfcc.Length; var phoneme = new NativeSlice(phonemes, index * n, n); - + var distance = 0f; for (int i = 0; i < n; ++i) { @@ -153,7 +189,7 @@ float CalcCosineSimilarityScore(int index) var phoneme = new NativeSlice(phonemes, index * n, n); float mfccNorm = 0f; float phonemeNorm = 0f; - + float prod = 0f; for (int i = 0; i < n; ++i) { @@ -171,6 +207,9 @@ float CalcCosineSimilarityScore(int index) return math.pow(similarity, 100f); } + /// + /// Gets the index of the phoneme with the highest score. + /// int GetVowel() { int index = -1; diff --git a/Assets/uLipSync/Runtime/Core/Profile.cs b/Assets/uLipSync/Runtime/Core/Profile.cs index c415467..ff03882 100644 --- a/Assets/uLipSync/Runtime/Core/Profile.cs +++ b/Assets/uLipSync/Runtime/Core/Profile.cs @@ -36,11 +36,11 @@ public MfccData(string name) Deallocate(); } - public void Allocate() + public void Allocate(int arraySize) { if (IsAllocated()) return; - mfccNativeArray = new NativeArray(12, Allocator.Persistent); + mfccNativeArray = new NativeArray(arraySize, Allocator.Persistent); } public void Deallocate() @@ -57,10 +57,10 @@ bool IsAllocated() public void AddCalibrationData(float[] mfcc) { - if (mfcc.Length != 12) + if (mfcc.Length != 12 && mfcc.Length != 24) { - Debug.LogError("The length of MFCC array should be 12."); - return; + Debug.LogError("The length of MFCC array should be 12. When using delta it should be 24."); + return; } mfccCalibrationDataList.Add(new MfccCalibrationData() { array = mfcc }); } @@ -69,12 +69,12 @@ public void RemoveOldCalibrationData(int dataCount) { while (mfccCalibrationDataList.Count > dataCount) mfccCalibrationDataList.RemoveAt(0); } - + public void UpdateNativeArray() { if (mfccCalibrationDataList.Count == 0) return; - for (int i = 0; i < 12; ++i) + for (int i = 0; i < mfccNativeArray.Length; ++i) { mfccNativeArray[i] = 0f; foreach (var mfcc in mfccCalibrationDataList) @@ -97,7 +97,7 @@ public class Profile : ScriptableObject [HideInInspector] public string jsonPath = ""; [Tooltip("The number of MFCC")] - public int mfccNum = 12; + public int mfccNum => useDelta ? 24 : 12; [Tooltip("The number of MFCC data to calculate the average MFCC values")] public int mfccDataCount = 16; [Tooltip("The number of Mel Filter Bank channels")] @@ -106,25 +106,30 @@ public class Profile : ScriptableObject public int targetSampleRate = 16000; [Tooltip("Number of audio samples after downsampling is applied")] public int sampleCount = 1024; - [Tooltip("Whether to perform standardization of each coefficient of MFCC")] + [Tooltip("Whether to perform standardization of each coefficient of MFCC. This normalization ensures that the coefficient values are centered around zero (zero mean) and have a spread of one (unit variance).")] public bool useStandardization = false; + [Tooltip("Add delta of MFCC's for even better accuracy")] + public bool useDelta = false; [Tooltip("The comparison method for MFCC")] public CompareMethod compareMethod = CompareMethod.L2Norm; + // hide from inspector + [HideInInspector] public int arraySize = 12; public List mfccs = new List(); - + float[] _means = new float[12]; float[] _stdDevs = new float[12]; - public float[] means => _means; - public float[] standardDeviation => _stdDevs; - + public float[] means => _means; + public float[] standardDeviation => _stdDevs; + void OnEnable() { + arraySize = GetArraySizeMfcc(); UpdateMeansAndStandardization(); foreach (var data in mfccs) { - data.Allocate(); + data.Allocate(mfccNum); data.RemoveOldCalibrationData(mfccDataCount); data.UpdateNativeArray(); } @@ -138,20 +143,37 @@ void OnDisable() } } + public int GetArraySizeMfcc() + { + if (useDelta) + { + arraySize = 24; + _means = new float[24]; + _stdDevs = new float[24]; + } + else + { + arraySize = 12; + _means = new float[12]; + _stdDevs = new float[12]; + } + return arraySize; + } + public string GetPhoneme(int index) { if (index < 0 || index >= mfccs.Count) return ""; - + return mfccs[index].name; } public void AddMfcc(string name) { var data = new MfccData(name); - data.Allocate(); + data.Allocate(mfccNum); for (int i = 0; i < mfccDataCount; ++i) { - data.AddCalibrationData(new float[12]); + data.AddCalibrationData(new float[mfccNum]); } mfccs.Add(data); } @@ -159,11 +181,11 @@ public void AddMfcc(string name) public void RemoveMfcc(int index) { if (index < 0 || index >= mfccs.Count) return; - + var data = mfccs[index]; data.Deallocate(); mfccs.RemoveAt(index); - + UpdateMeansAndStandardization(); } @@ -237,7 +259,7 @@ public void UpdateMeansAndStandardization() UpdateMeans(); UpdateStandardizations(); } - + void UpdateMeans() { for (int i = 0; i < _means.Length; ++i) @@ -282,7 +304,7 @@ void UpdateStandardizations() { _stdDevs[i] = 0f; } - + int n = 0; foreach (var mfccData in mfccs) { @@ -296,7 +318,7 @@ void UpdateStandardizations() ++n; } } - + for (int i = 0; i < _stdDevs.Length; ++i) { _stdDevs[i] = math.sqrt(_stdDevs[i] / n); diff --git a/Assets/uLipSync/Runtime/uLipSync.cs b/Assets/uLipSync/Runtime/uLipSync.cs index 94e8d28..5518752 100644 --- a/Assets/uLipSync/Runtime/uLipSync.cs +++ b/Assets/uLipSync/Runtime/uLipSync.cs @@ -21,6 +21,9 @@ public class uLipSync : MonoBehaviour bool _allocated = false; int _index = 0; bool _isDataReceived = false; + int bufferSize = 3; + + int halfMfcc => mfccNum / 2; NativeArray _rawInputData; NativeArray _inputData; @@ -31,12 +34,14 @@ public class uLipSync : MonoBehaviour NativeArray _phonemes; NativeArray _scores; NativeArray _info; + NativeArray _bufferMelCep; + NativeArray _bufferMelCepOffset; List _requestedCalibrationVowels = new List(); Dictionary _ratios = new Dictionary(); public NativeArray mfcc => _mfccForOther; public LipSyncInfo result { get; private set; } = new LipSyncInfo(); - + #if ULIPSYNC_DEBUG NativeArray _debugData; NativeArray _debugSpectrum; @@ -54,14 +59,14 @@ public class uLipSync : MonoBehaviour int inputSampleCount { - get - { + get + { if (!profile) return AudioSettings.outputSampleRate; float r = (float)AudioSettings.outputSampleRate / profile.targetSampleRate; return Mathf.CeilToInt(profile.sampleCount * r); } } - + int mfccNum => profile ? profile.mfccNum : 12; void Awake() @@ -99,25 +104,32 @@ void AllocateBuffers() { if (_allocated) { + _jobHandle.Complete(); DisposeBuffers(); } _allocated = true; - _jobHandle.Complete(); lock (_lockObject) { int n = inputSampleCount; int phonemeCount = profile ? profile.mfccs.Count : 1; _rawInputData = new NativeArray(n, Allocator.Persistent); - _inputData = new NativeArray(n, Allocator.Persistent); - _mfcc = new NativeArray(mfccNum, Allocator.Persistent); - _mfccForOther = new NativeArray(mfccNum, Allocator.Persistent); - _means = new NativeArray(mfccNum, Allocator.Persistent); - _standardDeviations = new NativeArray(mfccNum, Allocator.Persistent); + _inputData = new NativeArray(n, Allocator.Persistent); + _mfcc = new NativeArray(mfccNum, Allocator.Persistent); + _mfccForOther = new NativeArray(mfccNum, Allocator.Persistent); + _means = new NativeArray(mfccNum, Allocator.Persistent); + _standardDeviations = new NativeArray(mfccNum, Allocator.Persistent); _scores = new NativeArray(phonemeCount, Allocator.Persistent); _phonemes = new NativeArray(mfccNum * phonemeCount, Allocator.Persistent); _info = new NativeArray(1, Allocator.Persistent); + _bufferMelCep = new NativeArray(bufferSize * halfMfcc, Allocator.Persistent); + _bufferMelCepOffset = new NativeArray(bufferSize, Allocator.Persistent) + { + [0] = 0, + [1] = halfMfcc, + [2] = mfccNum + }; #if ULIPSYNC_DEBUG _debugData = new NativeArray(profile.sampleCount, Allocator.Persistent); _debugDataForOther = new NativeArray(profile.sampleCount, Allocator.Persistent); @@ -149,6 +161,8 @@ void DisposeBuffers() _scores.Dispose(); _phonemes.Dispose(); _info.Dispose(); + _bufferMelCep.Dispose(); + _bufferMelCepOffset.Dispose(); #if ULIPSYNC_DEBUG _debugData.Dispose(); _debugDataForOther.Dispose(); @@ -189,7 +203,7 @@ void UpdateResult() _debugMelSpectrumForOther.CopyFrom(_debugMelSpectrum); _debugMelCepstrumForOther.CopyFrom(_debugMelCepstrum); #endif - + int index = _info[0].mainPhonemeIndex; string mainPhoneme = profile.GetPhoneme(index); @@ -241,7 +255,7 @@ void UpdatePhonemes() { foreach (var value in data.mfccNativeArray) { - if (index >= _phonemes.Length) break; + if (index >= _phonemes.Length) return; _phonemes[index++] = value; } } @@ -270,11 +284,14 @@ void ScheduleJob() melFilterBankChannels = profile.melFilterBankChannels, means = _means, standardDeviations = _standardDeviations, + useDelta = profile.useDelta, mfcc = _mfcc, phonemes = _phonemes, compareMethod = profile.compareMethod, scores = _scores, info = _info, + bufferMelCep = _bufferMelCep, + bufferMelCepOffset = _bufferMelCepOffset, #if ULIPSYNC_DEBUG debugData = _debugData, debugSpectrum = _debugSpectrum, @@ -328,7 +345,7 @@ public void OnDataReceived(float[] input, int channels) { int n = _rawInputData.Length; _index = _index % n; - for (int i = 0; i < input.Length; i += channels) + for (int i = 0; i < input.Length; i += channels) { _rawInputData[_index++ % n] = input[i]; } @@ -337,7 +354,7 @@ public void OnDataReceived(float[] input, int channels) if (math.abs(outputSoundGain - 1f) > math.EPSILON) { int n = input.Length; - for (int i = 0; i < n; ++i) + for (int i = 0; i < n; ++i) { input[i] *= outputSoundGain; } diff --git a/Assets/uLipSync/Runtime/uLipSyncAnimator.cs b/Assets/uLipSync/Runtime/uLipSyncAnimator.cs index 95c919b..2330524 100644 --- a/Assets/uLipSync/Runtime/uLipSyncAnimator.cs +++ b/Assets/uLipSync/Runtime/uLipSyncAnimator.cs @@ -26,6 +26,7 @@ public class AnimatorInfo public float minVolume = -2.5f; public float maxVolume = -1.5f; [Range(0f, 0.3f)] public float smoothness = 0.05f; + [Range(0.00001f, 0.01f)] public float minimalValueThreshold = 0.001f; LipSyncInfo _info = new LipSyncInfo(); bool _lipSyncUpdated = false; @@ -51,6 +52,14 @@ public void OnLipSyncUpdate(LipSyncInfo info) } } + void Awake() + { + foreach (AnimatorInfo par in parameters) + { + par.nameHash = Animator.StringToHash(par.name); + } + } + void Update() { if (updateMethod != UpdateMethod.LipSyncUpdateEvent) @@ -80,9 +89,17 @@ void FixedUpdate() } } - float SmoothDamp(float value, float target, ref float velocity) + float SmoothDamp(float value, float target, ref float velocity, float threshold) { - return Mathf.SmoothDamp(value, target, ref velocity, smoothness); + float smoothedValue = Mathf.SmoothDamp(value, target, ref velocity, smoothness); + + // Check if the absolute value of the smoothedValue is below a threshold + if (Mathf.Abs(smoothedValue) < threshold) + { + smoothedValue = 0f; // Set it to zero + } + + return smoothedValue; } void UpdateVolume() @@ -94,7 +111,7 @@ void UpdateVolume() normVol = (normVol - minVolume) / Mathf.Max(maxVolume - minVolume, 1e-4f); normVol = Mathf.Clamp(normVol, 0f, 1f); } - _volume = SmoothDamp(_volume, normVol, ref _openCloseVelocity); + _volume = SmoothDamp(_volume, normVol, ref _openCloseVelocity, minimalValueThreshold); } void UpdateVowels() @@ -110,7 +127,7 @@ void UpdateVowels() ratios.TryGetValue(param.phoneme, out targetWeight); } float weightVel = param.weightVelocity; - param.weight = SmoothDamp(param.weight, targetWeight, ref weightVel); + param.weight = SmoothDamp(param.weight, targetWeight, ref weightVel, minimalValueThreshold); param.weightVelocity = weightVel; sum += param.weight; } @@ -149,4 +166,4 @@ void OnApplyAnimator() } } -} \ No newline at end of file +}