diff --git a/CHANGELOG.md b/CHANGELOG.md index 33dd251e5561..b4201583816b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,11 @@ ### Payment Method Messaging Element * [Added] `PaymentMethodMessagingElement` is now in public preview +## XX.XX.XX + +### Identity +* [Added] Added a best frame detector to document capture. + ## 25.1.1 2025-11-24 ### Financial Connections diff --git a/StripeIdentity/StripeIdentity/Source/NativeComponents/Coordinators/ImageScanner/DocumentScanner/BestFramePicker.swift b/StripeIdentity/StripeIdentity/Source/NativeComponents/Coordinators/ImageScanner/DocumentScanner/BestFramePicker.swift new file mode 100644 index 000000000000..813b316b8ed4 --- /dev/null +++ b/StripeIdentity/StripeIdentity/Source/NativeComponents/Coordinators/ImageScanner/DocumentScanner/BestFramePicker.swift @@ -0,0 +1,70 @@ +// +// BestFramePicker.swift +// StripeIdentity +// +// Created by Kenneth Ackerson on 8/21/24. +// + +import CoreGraphics +import Foundation +@_spi(STP) import StripeCameraCore + +final class BestFramePicker { + struct Candidate { + let cgImage: CGImage + let output: DocumentScannerOutput + let exif: CameraExifMetadata? + let score: Float + } + + enum State { + case idle + case holding(remaining: TimeInterval, bestScore: Float) + case picked(Candidate) + } + + private let window: TimeInterval + private var deadline: Date? + private var best: Candidate? + + init(window: TimeInterval = 1.0) { + self.window = window + } + + func reset() { + deadline = nil + best = nil + } + + func consider(cgImage: CGImage, + output: DocumentScannerOutput, + exif: CameraExifMetadata?, + score: Float) -> State { + func deadlineCheck() -> State { + guard let deadline else { return .idle } + let remaining = deadline.timeIntervalSince(now) + if remaining <= 0 { + let picked = best + reset() + if let picked { return .picked(picked) } + return .idle + } else { + return .holding(remaining: remaining, bestScore: best?.score ?? 0) + } + } + let now = Date() + + if deadline == nil { + best = Candidate(cgImage: cgImage, output: output, exif: exif, score: score) + deadline = now.addingTimeInterval(window) + + return deadlineCheck() + } + + if let current = best, score > current.score { + best = Candidate(cgImage: cgImage, output: output, exif: exif, score: score) + } + + return deadlineCheck() + } +} diff --git a/StripeIdentity/StripeIdentity/Source/NativeComponents/Coordinators/ImageScanner/DocumentScanner/DocumentScannerOutput.swift b/StripeIdentity/StripeIdentity/Source/NativeComponents/Coordinators/ImageScanner/DocumentScanner/DocumentScannerOutput.swift index 00e02f1e230e..f03049f80edc 100644 --- a/StripeIdentity/StripeIdentity/Source/NativeComponents/Coordinators/ImageScanner/DocumentScanner/DocumentScannerOutput.swift +++ b/StripeIdentity/StripeIdentity/Source/NativeComponents/Coordinators/ImageScanner/DocumentScanner/DocumentScannerOutput.swift @@ -87,4 +87,33 @@ enum DocumentScannerOutput: Equatable { func getAllClassificationScores() -> [IDDetectorOutput.Classification: Float] { return idDetectorOutput.allClassificationScores } + + func qualityScore(side: DocumentSide) -> Float { + switch self { + case let .legacy(idDetectorOutput, _, motionBlur, _, _): + var components: [Float] = [] + + let scores = idDetectorOutput.allClassificationScores + let classificationScore: Float = { + switch side { + case .front: + let front = scores[.idCardFront] ?? 0 + let passport = scores[.passport] ?? 0 + return max(front, passport) + case .back: + return scores[.idCardBack] ?? 0 + } + }() + components.append(classificationScore) + + if let iou = motionBlur.iou { + components.append(iou) + } + + let total = components.reduce(0, +) + let score: Float = components.isEmpty ? 0 : total / Float(components.count) + + return score + } + } } diff --git a/StripeIdentity/StripeIdentity/Source/NativeComponents/ViewControllers/DocumentCaptureViewController.swift b/StripeIdentity/StripeIdentity/Source/NativeComponents/ViewControllers/DocumentCaptureViewController.swift index d71ddf64bdf6..148fa016ffad 100644 --- a/StripeIdentity/StripeIdentity/Source/NativeComponents/ViewControllers/DocumentCaptureViewController.swift +++ b/StripeIdentity/StripeIdentity/Source/NativeComponents/ViewControllers/DocumentCaptureViewController.swift @@ -271,6 +271,8 @@ final class DocumentCaptureViewController: IdentityFlowViewController { private let availableIDTypes: [String] + private let bestFramePicker: BestFramePicker + // MARK: Coordinators let documentUploader: DocumentUploaderProtocol let imageScanningSession: DocumentImageScanningSession @@ -282,12 +284,14 @@ final class DocumentCaptureViewController: IdentityFlowViewController { documentUploader: DocumentUploaderProtocol, imageScanningSession: DocumentImageScanningSession, sheetController: VerificationSheetControllerProtocol, - avaialableIDTypes: [String] + avaialableIDTypes: [String], + bestFramePicker: BestFramePicker = .init(window: 1.0) ) { self.apiConfig = apiConfig self.documentUploader = documentUploader self.imageScanningSession = imageScanningSession self.availableIDTypes = avaialableIDTypes + self.bestFramePicker = bestFramePicker super.init(sheetController: sheetController, analyticsScreenName: .documentCapture) imageScanningSession.setDelegate(delegate: self) } @@ -303,7 +307,8 @@ final class DocumentCaptureViewController: IdentityFlowViewController { anyDocumentScanner: AnyDocumentScanner, concurrencyManager: ImageScanningConcurrencyManagerProtocol? = nil, appSettingsHelper: AppSettingsHelperProtocol = AppSettingsHelper.shared, - avaialableIDTypes: [String] + avaialableIDTypes: [String], + bestFramePicker: BestFramePicker = .init(window: 1.0) ) { self.init( apiConfig: apiConfig, @@ -322,7 +327,8 @@ final class DocumentCaptureViewController: IdentityFlowViewController { appSettingsHelper: appSettingsHelper ), sheetController: sheetController, - avaialableIDTypes: avaialableIDTypes + avaialableIDTypes: avaialableIDTypes, + bestFramePicker: bestFramePicker ) updateUI() } @@ -519,6 +525,8 @@ extension DocumentCaptureViewController: ImageScanningSessionDelegate { _ scanningSession: DocumentImageScanningSession, willStartScanningForClassification documentSide: DocumentSide ) { + // Reset best-frame window when a new side starts + bestFramePicker.reset() // Focus the accessibility VoiceOver back onto the capture view UIAccessibility.post(notification: .layoutChanged, argument: self.documentCaptureView) @@ -579,21 +587,33 @@ extension DocumentCaptureViewController: ImageScanningSessionDelegate { return } - switch scannerOutput { - case .legacy(_, _, _, _, let blurResult): - documentUploader.uploadImages( - for: documentSide, - originalImage: image, - documentScannerOutput: scannerOutput, - exifMetadata: exifMetadata, - method: .autoCapture - ) - sheetController?.analyticsClient.updateBlurScore(blurResult.variance, for: documentSide) + // Combine all detector outputs into a single quality score for ranking + let frameScore = scannerOutput.qualityScore(side: documentSide) - imageScanningSession.setStateScanned( - expectedClassification: documentSide, - capturedData: UIImage(cgImage: image) - ) + switch bestFramePicker.consider(cgImage: image, + output: scannerOutput, + exif: exifMetadata, + score: frameScore) { + case .idle, .holding: + imageScanningSession.updateScanningState(scannerOutputOptional) + return + case .picked(let best): + switch best.output { + case .legacy(_, _, _, _, let blurResult): + documentUploader.uploadImages( + for: documentSide, + originalImage: best.cgImage, + documentScannerOutput: best.output, + exifMetadata: best.exif, + method: .autoCapture + ) + sheetController?.analyticsClient.updateBlurScore(blurResult.variance, for: documentSide) + + imageScanningSession.setStateScanned( + expectedClassification: documentSide, + capturedData: UIImage(cgImage: best.cgImage) + ) + } } } } diff --git a/StripeIdentity/StripeIdentityTests/Unit/NativeComponents/Coordinators/BestFramePickerTest.swift b/StripeIdentity/StripeIdentityTests/Unit/NativeComponents/Coordinators/BestFramePickerTest.swift new file mode 100644 index 000000000000..dc9720dfe3c1 --- /dev/null +++ b/StripeIdentity/StripeIdentityTests/Unit/NativeComponents/Coordinators/BestFramePickerTest.swift @@ -0,0 +1,169 @@ +// +// BestFramePickerTest.swift +// StripeIdentityTests +// +// Created by Kenneth Ackerson on 1/6/26. +// + +import CoreGraphics +import XCTest + +// swift-format-ignore +@testable @_spi(STP) import StripeCameraCore + +@testable import StripeIdentity + +final class BestFramePickerTest: XCTestCase { + + func testBestFramePickerTracksBestScoreAcrossFiveFrames() { + let picker = BestFramePicker(window: 10_000) + + let cgImage = CapturedImageMock.frontDriversLicense.image.cgImage! + let output = makeDocumentScannerOutputLegacy(with: .idCardFront) + + let scores: [Float] = [0.2, 0.4, 0.35, 0.9, 0.7] + var expectedBest: Float = 0 + + for score in scores { + expectedBest = max(expectedBest, score) + + let state = picker.consider( + cgImage: cgImage, + output: output, + exif: nil, + score: score + ) + + guard case .holding(_, let bestScore) = state else { + XCTFail("Expected holding state for score \(score), got \(state)") + return + } + + XCTAssertEqual(bestScore, expectedBest, accuracy: 0.0001) + } + } + + func testBestFramePickerDoesNotDecreaseBestScoreAndUpdatesOnNewHigh() { + let picker = BestFramePicker(window: 10_000) + + let cgImage = CapturedImageMock.frontDriversLicense.image.cgImage! + let output = makeDocumentScannerOutputLegacy(with: .idCardFront) + + // Starts high, drops, then exceeds the previous high again. + let inputsAndExpectedBest: [(score: Float, expectedBest: Float)] = [ + (score: 0.9, expectedBest: 0.9), + (score: 0.6, expectedBest: 0.9), + (score: 0.4, expectedBest: 0.9), + (score: 0.95, expectedBest: 0.95), + (score: 0.5, expectedBest: 0.95), + ] + + for (score, expectedBest) in inputsAndExpectedBest { + let state = picker.consider( + cgImage: cgImage, + output: output, + exif: nil, + score: score + ) + + guard case .holding(_, let bestScore) = state else { + XCTFail("Expected holding state for score \(score), got \(state)") + return + } + + XCTAssertEqual(bestScore, expectedBest, accuracy: 0.0001) + } + } + + func testResetClearsBestScore() { + let picker = BestFramePicker(window: 10_000) + + let cgImage = CapturedImageMock.frontDriversLicense.image.cgImage! + let output = makeDocumentScannerOutputLegacy(with: .idCardFront) + + let state1 = picker.consider( + cgImage: cgImage, + output: output, + exif: nil, + score: 0.9 + ) + guard case .holding(_, let bestScore1) = state1 else { + XCTFail("Expected holding state, got \(state1)") + return + } + XCTAssertEqual(bestScore1, 0.9, accuracy: 0.0001) + + picker.reset() + + let state2 = picker.consider( + cgImage: cgImage, + output: output, + exif: nil, + score: 0.2 + ) + guard case .holding(_, let bestScore2) = state2 else { + XCTFail("Expected holding state, got \(state2)") + return + } + XCTAssertEqual(bestScore2, 0.2, accuracy: 0.0001) + } + + func testZeroWindowPicksImmediatelyAndResets() { + let picker = BestFramePicker(window: 0) + + let cgImage = CapturedImageMock.frontDriversLicense.image.cgImage! + let output = makeDocumentScannerOutputLegacy(with: .idCardFront) + + let state1 = picker.consider( + cgImage: cgImage, + output: output, + exif: nil, + score: 0.2 + ) + guard case .picked(let candidate1) = state1 else { + XCTFail("Expected picked state, got \(state1)") + return + } + XCTAssertEqual(candidate1.score, 0.2, accuracy: 0.0001) + XCTAssertTrue(candidate1.cgImage === cgImage) + XCTAssertEqual(candidate1.output, output) + + // Picker auto-resets after returning .picked, so the next frame should be considered fresh. + let state2 = picker.consider( + cgImage: cgImage, + output: output, + exif: nil, + score: 0.9 + ) + guard case .picked(let candidate2) = state2 else { + XCTFail("Expected picked state, got \(state2)") + return + } + XCTAssertEqual(candidate2.score, 0.9, accuracy: 0.0001) + } +} + +extension BestFramePickerTest { + fileprivate func makeDocumentScannerOutputLegacy( + with classification: IDDetectorOutput.Classification + ) -> DocumentScannerOutput { + return .legacy( + .init( + classification: classification, + documentBounds: CGRect(x: 0.1, y: 0.33, width: 0.8, height: 0.33), + allClassificationScores: [ + classification: 0.9 + ] + ), + nil, + .init( + hasMotionBlur: false, + iou: nil, + frameCount: 0, + duration: 0 + ), + nil, + .init(isBlurry: false, variance: 0.1) + ) + } +} diff --git a/StripeIdentity/StripeIdentityTests/Unit/NativeComponents/ViewControllers/DocumentCaptureViewControllerTest.swift b/StripeIdentity/StripeIdentityTests/Unit/NativeComponents/ViewControllers/DocumentCaptureViewControllerTest.swift index 338121a6ea0a..69ba6e74b041 100644 --- a/StripeIdentity/StripeIdentityTests/Unit/NativeComponents/ViewControllers/DocumentCaptureViewControllerTest.swift +++ b/StripeIdentity/StripeIdentityTests/Unit/NativeComponents/ViewControllers/DocumentCaptureViewControllerTest.swift @@ -117,6 +117,70 @@ final class DocumentCaptureViewControllerTest: XCTestCase { ) } + func testTransitionFromScanningCardFrontLegacyPicksBestFrameWithinBestFrameWindow() { + let vc = DocumentCaptureViewController( + apiConfig: DocumentCaptureViewControllerTest.mockVerificationPage.documentCapture, + initialState: .scanning(.front, nil), + sheetController: mockSheetController, + cameraSession: mockCameraSession, + cameraPermissionsManager: mockCameraPermissionsManager, + documentUploader: mockDocumentUploader, + anyDocumentScanner: .init(mockDocumentScanner), + concurrencyManager: mockConcurrencyManager, + appSettingsHelper: mockAppSettingsHelper, + avaialableIDTypes: DocumentCaptureViewControllerTest.mockVerificationPage.documentSelect.idDocumentTypeAllowlistKeys, + bestFramePicker: .init(window: 1.0) + ) + + func makeOutput(frontScore: Float) -> DocumentScannerOutput { + return makeDocumentScannerOutputLegacyWithIddetectorOutput( + .init( + classification: .idCardFront, + documentBounds: CGRect(x: 0.1, y: 0.33, width: 0.8, height: 0.33), + allClassificationScores: [ + .idCardFront: frontScore + ] + ) + ) + } + + let outputs = [ + makeOutput(frontScore: 0.3), + makeOutput(frontScore: 0.6), + makeOutput(frontScore: 0.4), + makeOutput(frontScore: 0.9), + makeOutput(frontScore: 0.5), + ] + let expectedBestOutput = outputs[3] + + for output in outputs { + mockCameraFrameCaptured(vc) + mockConcurrencyManager.respondToScan(output: output) + } + + let windowExp = expectation(description: "Best frame window expired") + DispatchQueue.main.asyncAfter(deadline: .now() + 1.1) { + windowExp.fulfill() + } + wait(for: [windowExp], timeout: 2) + + // Next frame triggers the pick + upload. Use a lower score so it doesn't replace the best. + mockCameraFrameCaptured(vc) + mockConcurrencyManager.respondToScan(output: makeOutput(frontScore: 0.1)) + + verify( + vc, + expectedState: .scanned(.front, UIImage()), + expectedButtonState: .enabled + ) + waitForCameraSessionToStop() + XCTAssertTrue(mockDocumentScanner.didReset) + XCTAssertTrue(mockConcurrencyManager.didReset) + XCTAssertEqual(mockDocumentUploader.uploadedSide, .front) + XCTAssertEqual(mockDocumentUploader.uploadMethod, .autoCapture) + XCTAssertEqual(mockDocumentUploader.uploadedDocumentScannerOutput, expectedBestOutput) + } + func testTransitionFromScannedCardFront() { let vc = makeViewController( state: .scanned(.front, UIImage()) @@ -802,7 +866,8 @@ extension DocumentCaptureViewControllerTest { anyDocumentScanner: .init(mockDocumentScanner), concurrencyManager: mockConcurrencyManager, appSettingsHelper: mockAppSettingsHelper, - avaialableIDTypes: DocumentCaptureViewControllerTest.mockVerificationPage.documentSelect.idDocumentTypeAllowlistKeys + avaialableIDTypes: DocumentCaptureViewControllerTest.mockVerificationPage.documentSelect.idDocumentTypeAllowlistKeys, + bestFramePicker: .init(window: 0) ) }