diff --git a/app/xcode/Sources/VCamTracking/AvatarWebCamera.swift b/app/xcode/Sources/VCamTracking/AvatarWebCamera.swift index 5bdb13d..973844e 100644 --- a/app/xcode/Sources/VCamTracking/AvatarWebCamera.swift +++ b/app/xcode/Sources/VCamTracking/AvatarWebCamera.swift @@ -95,6 +95,7 @@ public final class AvatarWebCamera { public func resetCalibration() { UserDefaults.standard.set(CGFloat(-facialEstimator.prevRawEyeballY()), for: .eyeTrackingOffsetY) + poseEstimator.calibrate() } private func didOutput(sampleBuffer: CMSampleBuffer) { @@ -113,16 +114,16 @@ public final class AvatarWebCamera { } } - private func onLandmarkUpdate(observation: VNFaceObservation, landmarks: VNFaceLandmarks2D) { + private func onLandmarkUpdate(observation: VNFaceObservation, vnLandmarks: VNFaceLandmarks2D) { guard Tracking.shared.faceTrackingMethod == .default else { return } - let pointsInImage = landmarks.allPoints!.pointsInImage(imageSize: cameraManager.captureDeviceResolution) - let (headPosition, headRotation) = poseEstimator.estimate(pointsInImage: pointsInImage, observation: observation) - let facial = facialEstimator.estimate(pointsInImage) + let landmarks = VisionLandmarks(landmarks: vnLandmarks, imageSize: cameraManager.captureDeviceResolution) + let (headPosition, headRotation) = poseEstimator.estimate(landmarks, observation: observation) + let facial = facialEstimator.estimate(landmarks) if isEmotionEnabled { if facialExpressionCounter > 4 { - let facialExp = facialExpressionEstimator.estimate(landmarks) + let facialExp = facialExpressionEstimator.estimate(vnLandmarks) DispatchQueue.main.async { UniBridge.shared.facialExpression(facialExp.rawValue) } @@ -138,8 +139,8 @@ public final class AvatarWebCamera { facial.distanceOfRightEyeHeight, facial.distanceOfNoseHeight, facial.distanceOfMouthHeight, - facial.eyeballX, - facial.eyeballY, + facial.eyeball.x, + facial.eyeball.y, Float(facial.vowel.rawValue) ) Tracking.shared.avatar.onFacialDataReceived(values) diff --git a/app/xcode/Sources/VCamTracking/FacialEsitimator.swift b/app/xcode/Sources/VCamTracking/FacialEsitimator.swift index 6d5c3e4..2afecd3 100644 --- a/app/xcode/Sources/VCamTracking/FacialEsitimator.swift +++ b/app/xcode/Sources/VCamTracking/FacialEsitimator.swift @@ -14,17 +14,15 @@ public struct Facial { public let distanceOfNoseHeight: Float public let distanceOfMouthHeight: Float public let vowel: Vowel - public let eyeballX: Float - public let eyeballY: Float + public let eyeball: SIMD2 - public init(distanceOfLeftEyeHeight: Float, distanceOfRightEyeHeight: Float, distanceOfNoseHeight: Float, distanceOfMouthHeight: Float, vowel: Vowel, eyeballX: Float, eyeballY: Float) { + public init(distanceOfLeftEyeHeight: Float, distanceOfRightEyeHeight: Float, distanceOfNoseHeight: Float, distanceOfMouthHeight: Float, vowel: Vowel, eyeball: SIMD2) { self.distanceOfLeftEyeHeight = distanceOfLeftEyeHeight self.distanceOfRightEyeHeight = distanceOfRightEyeHeight self.distanceOfNoseHeight = distanceOfNoseHeight self.distanceOfMouthHeight = distanceOfMouthHeight self.vowel = vowel - self.eyeballX = eyeballX - self.eyeballY = eyeballY + self.eyeball = eyeball } } @@ -35,16 +33,16 @@ public struct FacialEstimator { .init( prevRawEyeballY: { 0 }, estimate: { _ in - Facial(distanceOfLeftEyeHeight: 0, distanceOfRightEyeHeight: 0, distanceOfNoseHeight: 0, distanceOfMouthHeight: 0, vowel: .a, eyeballX: 0, eyeballY: 0) + Facial(distanceOfLeftEyeHeight: 0, distanceOfRightEyeHeight: 0, distanceOfNoseHeight: 0, distanceOfMouthHeight: 0, vowel: .a, eyeball: .zero) } ) } - public init(prevRawEyeballY: @escaping () -> Float, estimate: @escaping ([CGPoint]) -> Facial) { + public init(prevRawEyeballY: @escaping () -> Float, estimate: @escaping (VisionLandmarks) -> Facial) { self.prevRawEyeballY = prevRawEyeballY self.estimate = estimate } public private(set) var prevRawEyeballY: () -> Float - public private(set) var estimate: ([CGPoint]) -> Facial + public private(set) var estimate: (VisionLandmarks) -> Facial } diff --git a/app/xcode/Sources/VCamTracking/HeadPoseEstimator.swift b/app/xcode/Sources/VCamTracking/HeadPoseEstimator.swift index de9f266..ee4a378 100644 --- a/app/xcode/Sources/VCamTracking/HeadPoseEstimator.swift +++ b/app/xcode/Sources/VCamTracking/HeadPoseEstimator.swift @@ -10,7 +10,8 @@ import Accelerate public protocol HeadPoseEstimator { func configure(size: CGSize) - func estimate(pointsInImage p: [CGPoint], observation: VNFaceObservation) -> (position: SIMD3, rotation: SIMD3) + func calibrate() + func estimate(_ landmarks: VisionLandmarks, observation: VNFaceObservation) -> (position: SIMD3, rotation: SIMD3) } @@ -19,14 +20,22 @@ public final class VisionHeadPoseEstimator: HeadPoseEstimator { private var prevPos = SIMD3(repeating: 0) private var prevPitchYawRoll = RevisedMovingAverage>(weight: .custom(count: 12, weight: 60)) + private var baseNoseHeight: Float = 50 + private var prevNoseHeight: Float = 50 + private var prevZ = RevisedMovingAverage(weight: .six) + public init() {} public func configure(size: CGSize) { self.size = size } - public func estimate(pointsInImage p: [CGPoint], observation: VNFaceObservation) -> (position: SIMD3, rotation: SIMD3) { - guard let pitch = observation.pitch?.floatValue, + public func calibrate() { + baseNoseHeight = prevNoseHeight + } + + public func estimate(_ landmarks: VisionLandmarks, observation: VNFaceObservation) -> (position: SIMD3, rotation: SIMD3) { + guard var pitch = observation.pitch?.floatValue, let yaw = observation.yaw?.floatValue, let roll = observation.roll?.floatValue else { return (prevPos, prevPitchYawRoll.latestValue) @@ -37,6 +46,10 @@ public final class VisionHeadPoseEstimator: HeadPoseEstimator { let posX = (Float(p.x) - 0.5) * 2 * xRange prevPos.x = simd_mix(prevPos.x, posX, 0.2) +// prevPos.z = prevZ.appending(simd_clamp(landmarks.noseHeight / baseNoseHeight - 1.0, -0.2, 0.2)) + + // Adjust as Vision tends to look up when facing left or right + pitch += abs(yaw) * 0.2 let newRotation = prevPitchYawRoll.appending(.init(pitch, yaw, roll)) * 180 / .pi return (prevPos, newRotation) diff --git a/app/xcode/Sources/VCamTracking/VisionLandmarks.swift b/app/xcode/Sources/VCamTracking/VisionLandmarks.swift new file mode 100644 index 0000000..bb9d125 --- /dev/null +++ b/app/xcode/Sources/VCamTracking/VisionLandmarks.swift @@ -0,0 +1,68 @@ +// +// VisionLandmarks.swift +// +// +// Created by Tatsuya Tanaka on 2023/10/28. +// + +import Foundation +import simd +import Vision + +public struct VisionLandmarks { + let p: [CGPoint] + + public let leftEyeBall: SIMD2 + public let leftEyeInner: SIMD2 + public let leftEyeOuter: SIMD2 + public let leftEyeTop: SIMD2 + public let leftEyeBottom: SIMD2 + public let rightEyeBall: SIMD2 + public let rightEyeInner: SIMD2 + public let rightEyeOuter: SIMD2 + public let rightEyeTop: SIMD2 + public let rightEyeBottom: SIMD2 + public let noseCenter: SIMD2 + public let leftCheek: SIMD2 + public let rightCheek: SIMD2 + public let noseTop: SIMD2 + public let noseBottom: SIMD2 + public let lipInnerTop: SIMD2 + public let lipInnerBottom: SIMD2 + public let rightMouth: SIMD2 + public let leftMouth: SIMD2 + public let rightJaw: SIMD2 + public let leftJaw: SIMD2 + + public let noseHeight: Float + + init(landmarks: VNFaceLandmarks2D, imageSize: CGSize) { + p = landmarks.allPoints!.pointsInImage(imageSize: imageSize) + + leftEyeBall = SIMD2(p[13]) + leftEyeInner = SIMD2(p[8]) + leftEyeOuter = SIMD2(p[7]) + leftEyeTop = SIMD2(p[12]) + leftEyeBottom = SIMD2(p[10]) + rightEyeBall = SIMD2(p[6]) + rightEyeInner = SIMD2(p[1]) + rightEyeOuter = SIMD2(p[0]) + rightEyeTop = SIMD2(p[5]) + rightEyeBottom = SIMD2(p[3]) + noseCenter = SIMD2(p[49]) + leftCheek = SIMD2(p[61]) + rightCheek = SIMD2(p[73]) + noseTop = SIMD2(p[46]) + noseBottom = SIMD2(p[52]) + lipInnerTop = SIMD2(p[40]) + lipInnerBottom = SIMD2(p[41]) + rightMouth = SIMD2(p[34]) + leftMouth = SIMD2(p[26]) + rightJaw = SIMD2(p[65]) + leftJaw = SIMD2(p[69]) + + noseHeight = simd_fast_distance(noseTop, noseBottom) + + print(landmarks.allPoints!.precisionEstimatesPerPoint![46], landmarks.allPoints!.precisionEstimatesPerPoint![52]) + } +} diff --git a/app/xcode/Sources/VCamTracking/VowelEstimator.swift b/app/xcode/Sources/VCamTracking/VowelEstimator.swift index 048aa0e..25574bf 100644 --- a/app/xcode/Sources/VCamTracking/VowelEstimator.swift +++ b/app/xcode/Sources/VCamTracking/VowelEstimator.swift @@ -6,11 +6,12 @@ // import Foundation +import simd import VCamEntity public enum VowelEstimator { - public static func estimate(visionFeatures p: [CGPoint]) -> Vowel { - let mouthWide = (p[34].x - p[26].x) / (p[65].x - p[69].x) + public static func estimate(_ landmarks: VisionLandmarks) -> Vowel { + let mouthWide = simd_fast_distance(landmarks.rightMouth, landmarks.leftMouth) / simd_fast_distance(landmarks.rightJaw, landmarks.leftJaw) if mouthWide < 0.6 { // Judge 'u' based on the ratio of jaw width to mouth width return .u } else if mouthWide >= 0.8 {