Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@
### Payment Method Messaging Element
* [Added] `PaymentMethodMessagingElement` is now in public preview

## XX.XX.XX

### Identity
* [Added] Added a best frame detector to document capture.

## 25.1.1 2025-11-24

### Financial Connections
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
//
// BestFramePicker.swift
// StripeIdentity
//
// Created by Kenneth Ackerson on 8/21/24.
//

import CoreGraphics
import Foundation
@_spi(STP) import StripeCameraCore

final class BestFramePicker {
struct Candidate {
let cgImage: CGImage
let output: DocumentScannerOutput
let exif: CameraExifMetadata?
let score: Float
}

enum State {
case idle
case holding(remaining: TimeInterval, bestScore: Float)
case picked(Candidate)
}

private let window: TimeInterval
private var deadline: Date?
private var best: Candidate?

init(window: TimeInterval = 1.0) {
self.window = window
}

func reset() {
deadline = nil
best = nil
}

func consider(cgImage: CGImage,
output: DocumentScannerOutput,
exif: CameraExifMetadata?,
score: Float) -> State {
func deadlineCheck() -> State {
guard let deadline else { return .idle }
let remaining = deadline.timeIntervalSince(now)
if remaining <= 0 {
let picked = best
reset()
if let picked { return .picked(picked) }
return .idle
} else {
return .holding(remaining: remaining, bestScore: best?.score ?? 0)
}
}
let now = Date()

if deadline == nil {
best = Candidate(cgImage: cgImage, output: output, exif: exif, score: score)
deadline = now.addingTimeInterval(window)

return deadlineCheck()
}

if let current = best, score > current.score {
best = Candidate(cgImage: cgImage, output: output, exif: exif, score: score)
}

return deadlineCheck()
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -87,4 +87,33 @@ enum DocumentScannerOutput: Equatable {
func getAllClassificationScores() -> [IDDetectorOutput.Classification: Float] {
return idDetectorOutput.allClassificationScores
}

func qualityScore(side: DocumentSide) -> Float {
switch self {
case let .legacy(idDetectorOutput, _, motionBlur, _, _):
var components: [Float] = []

let scores = idDetectorOutput.allClassificationScores
let classificationScore: Float = {
switch side {
case .front:
let front = scores[.idCardFront] ?? 0
let passport = scores[.passport] ?? 0
return max(front, passport)
case .back:
return scores[.idCardBack] ?? 0
}
}()
components.append(classificationScore)

if let iou = motionBlur.iou {
components.append(iou)
}

let total = components.reduce(0, +)
let score: Float = components.isEmpty ? 0 : total / Float(components.count)

return score
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,8 @@ final class DocumentCaptureViewController: IdentityFlowViewController {

private let availableIDTypes: [String]

private let bestFramePicker: BestFramePicker

// MARK: Coordinators
let documentUploader: DocumentUploaderProtocol
let imageScanningSession: DocumentImageScanningSession
Expand All @@ -282,12 +284,14 @@ final class DocumentCaptureViewController: IdentityFlowViewController {
documentUploader: DocumentUploaderProtocol,
imageScanningSession: DocumentImageScanningSession,
sheetController: VerificationSheetControllerProtocol,
avaialableIDTypes: [String]
avaialableIDTypes: [String],
bestFramePicker: BestFramePicker = .init(window: 1.0)
) {
self.apiConfig = apiConfig
self.documentUploader = documentUploader
self.imageScanningSession = imageScanningSession
self.availableIDTypes = avaialableIDTypes
self.bestFramePicker = bestFramePicker
super.init(sheetController: sheetController, analyticsScreenName: .documentCapture)
imageScanningSession.setDelegate(delegate: self)
}
Expand All @@ -303,7 +307,8 @@ final class DocumentCaptureViewController: IdentityFlowViewController {
anyDocumentScanner: AnyDocumentScanner,
concurrencyManager: ImageScanningConcurrencyManagerProtocol? = nil,
appSettingsHelper: AppSettingsHelperProtocol = AppSettingsHelper.shared,
avaialableIDTypes: [String]
avaialableIDTypes: [String],
bestFramePicker: BestFramePicker = .init(window: 1.0)
) {
self.init(
apiConfig: apiConfig,
Expand All @@ -322,7 +327,8 @@ final class DocumentCaptureViewController: IdentityFlowViewController {
appSettingsHelper: appSettingsHelper
),
sheetController: sheetController,
avaialableIDTypes: avaialableIDTypes
avaialableIDTypes: avaialableIDTypes,
bestFramePicker: bestFramePicker
)
updateUI()
}
Expand Down Expand Up @@ -519,6 +525,8 @@ extension DocumentCaptureViewController: ImageScanningSessionDelegate {
_ scanningSession: DocumentImageScanningSession,
willStartScanningForClassification documentSide: DocumentSide
) {
// Reset best-frame window when a new side starts
bestFramePicker.reset()
// Focus the accessibility VoiceOver back onto the capture view
UIAccessibility.post(notification: .layoutChanged, argument: self.documentCaptureView)

Expand Down Expand Up @@ -579,21 +587,33 @@ extension DocumentCaptureViewController: ImageScanningSessionDelegate {
return
}

switch scannerOutput {
case .legacy(_, _, _, _, let blurResult):
documentUploader.uploadImages(
for: documentSide,
originalImage: image,
documentScannerOutput: scannerOutput,
exifMetadata: exifMetadata,
method: .autoCapture
)
sheetController?.analyticsClient.updateBlurScore(blurResult.variance, for: documentSide)
// Combine all detector outputs into a single quality score for ranking
let frameScore = scannerOutput.qualityScore(side: documentSide)

imageScanningSession.setStateScanned(
expectedClassification: documentSide,
capturedData: UIImage(cgImage: image)
)
switch bestFramePicker.consider(cgImage: image,
output: scannerOutput,
exif: exifMetadata,
score: frameScore) {
case .idle, .holding:
imageScanningSession.updateScanningState(scannerOutputOptional)
return
case .picked(let best):
switch best.output {
case .legacy(_, _, _, _, let blurResult):
documentUploader.uploadImages(
for: documentSide,
originalImage: best.cgImage,
documentScannerOutput: best.output,
exifMetadata: best.exif,
method: .autoCapture
)
sheetController?.analyticsClient.updateBlurScore(blurResult.variance, for: documentSide)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could be in a later PR but it might also be useful to add:

  • Number of frames we looked at
  • Individual scores
  • Final overall best score


imageScanningSession.setStateScanned(
expectedClassification: documentSide,
capturedData: UIImage(cgImage: best.cgImage)
)
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
//
// BestFramePickerTest.swift
// StripeIdentityTests
//
// Created by Kenneth Ackerson on 1/6/26.
//

import CoreGraphics
import XCTest

// swift-format-ignore
@testable @_spi(STP) import StripeCameraCore

@testable import StripeIdentity

final class BestFramePickerTest: XCTestCase {

func testBestFramePickerTracksBestScoreAcrossFiveFrames() {
let picker = BestFramePicker(window: 10_000)

let cgImage = CapturedImageMock.frontDriversLicense.image.cgImage!
let output = makeDocumentScannerOutputLegacy(with: .idCardFront)

let scores: [Float] = [0.2, 0.4, 0.35, 0.9, 0.7]
var expectedBest: Float = 0

for score in scores {
expectedBest = max(expectedBest, score)

let state = picker.consider(
cgImage: cgImage,
output: output,
exif: nil,
score: score
)

guard case .holding(_, let bestScore) = state else {
XCTFail("Expected holding state for score \(score), got \(state)")
return
}

XCTAssertEqual(bestScore, expectedBest, accuracy: 0.0001)
}
}

func testBestFramePickerDoesNotDecreaseBestScoreAndUpdatesOnNewHigh() {
let picker = BestFramePicker(window: 10_000)

let cgImage = CapturedImageMock.frontDriversLicense.image.cgImage!
let output = makeDocumentScannerOutputLegacy(with: .idCardFront)

// Starts high, drops, then exceeds the previous high again.
let inputsAndExpectedBest: [(score: Float, expectedBest: Float)] = [
(score: 0.9, expectedBest: 0.9),
(score: 0.6, expectedBest: 0.9),
(score: 0.4, expectedBest: 0.9),
(score: 0.95, expectedBest: 0.95),
(score: 0.5, expectedBest: 0.95),
]

for (score, expectedBest) in inputsAndExpectedBest {
let state = picker.consider(
cgImage: cgImage,
output: output,
exif: nil,
score: score
)

guard case .holding(_, let bestScore) = state else {
XCTFail("Expected holding state for score \(score), got \(state)")
return
}

XCTAssertEqual(bestScore, expectedBest, accuracy: 0.0001)
}
}

func testResetClearsBestScore() {
let picker = BestFramePicker(window: 10_000)

let cgImage = CapturedImageMock.frontDriversLicense.image.cgImage!
let output = makeDocumentScannerOutputLegacy(with: .idCardFront)

let state1 = picker.consider(
cgImage: cgImage,
output: output,
exif: nil,
score: 0.9
)
guard case .holding(_, let bestScore1) = state1 else {
XCTFail("Expected holding state, got \(state1)")
return
}
XCTAssertEqual(bestScore1, 0.9, accuracy: 0.0001)

picker.reset()

let state2 = picker.consider(
cgImage: cgImage,
output: output,
exif: nil,
score: 0.2
)
guard case .holding(_, let bestScore2) = state2 else {
XCTFail("Expected holding state, got \(state2)")
return
}
XCTAssertEqual(bestScore2, 0.2, accuracy: 0.0001)
}

func testZeroWindowPicksImmediatelyAndResets() {
let picker = BestFramePicker(window: 0)

let cgImage = CapturedImageMock.frontDriversLicense.image.cgImage!
let output = makeDocumentScannerOutputLegacy(with: .idCardFront)

let state1 = picker.consider(
cgImage: cgImage,
output: output,
exif: nil,
score: 0.2
)
guard case .picked(let candidate1) = state1 else {
XCTFail("Expected picked state, got \(state1)")
return
}
XCTAssertEqual(candidate1.score, 0.2, accuracy: 0.0001)
XCTAssertTrue(candidate1.cgImage === cgImage)
XCTAssertEqual(candidate1.output, output)

// Picker auto-resets after returning .picked, so the next frame should be considered fresh.
let state2 = picker.consider(
cgImage: cgImage,
output: output,
exif: nil,
score: 0.9
)
guard case .picked(let candidate2) = state2 else {
XCTFail("Expected picked state, got \(state2)")
return
}
XCTAssertEqual(candidate2.score, 0.9, accuracy: 0.0001)
}
}

extension BestFramePickerTest {
fileprivate func makeDocumentScannerOutputLegacy(
with classification: IDDetectorOutput.Classification
) -> DocumentScannerOutput {
return .legacy(
.init(
classification: classification,
documentBounds: CGRect(x: 0.1, y: 0.33, width: 0.8, height: 0.33),
allClassificationScores: [
classification: 0.9
]
),
nil,
.init(
hasMotionBlur: false,
iou: nil,
frameCount: 0,
duration: 0
),
nil,
.init(isBlurry: false, variance: 0.1)
)
}
}
Loading