Skip to content
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
## X.Y.Z - changes pending release

## XX.XX.XX

### Identity
* [Added] Added a best frame detector to document capture.

## 25.1.1 2025-11-24

### Financial Connections
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
//
// BestFramePicker.swift
// StripeIdentity
//
// Created by Kenneth Ackerson on 8/21/24.
//

import CoreGraphics
import Foundation
@_spi(STP) import StripeCameraCore

final class BestFramePicker {
struct Candidate {
let cgImage: CGImage
let output: DocumentScannerOutput
let exif: CameraExifMetadata?
let score: Float
}

enum State {
case idle
case holding(remaining: TimeInterval, bestScore: Float)
case picked(Candidate)
}

private let window: TimeInterval
private var deadline: Date?
private var best: Candidate?

init(window: TimeInterval = 1.0) {
self.window = window
}

func reset() {
deadline = nil
best = nil
}

func consider(cgImage: CGImage,
output: DocumentScannerOutput,
exif: CameraExifMetadata?,
score: Float) -> State {
func deadlineCheck() -> State {
guard let deadline else { return .idle }
let remaining = deadline.timeIntervalSince(now)
if remaining <= 0 {
let picked = best
reset()
if let picked { return .picked(picked) }
return .idle
} else {
return .holding(remaining: remaining, bestScore: best?.score ?? 0)
}
}
let now = Date()

if deadline == nil {
best = Candidate(cgImage: cgImage, output: output, exif: exif, score: score)
deadline = now.addingTimeInterval(window)

return deadlineCheck()
}

if let current = best, score > current.score {
best = Candidate(cgImage: cgImage, output: output, exif: exif, score: score)
}

return deadlineCheck()
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -87,4 +87,33 @@ enum DocumentScannerOutput: Equatable {
func getAllClassificationScores() -> [IDDetectorOutput.Classification: Float] {
return idDetectorOutput.allClassificationScores
}

func qualityScore(side: DocumentSide) -> Float {
switch self {
case let .legacy(idDetectorOutput, _, motionBlur, _, _):
var components: [Float] = []

let scores = idDetectorOutput.allClassificationScores
let classificationScore: Float = {
switch side {
case .front:
let front = scores[.idCardFront] ?? 0
let passport = scores[.passport] ?? 0
return max(front, passport)
case .back:
return scores[.idCardBack] ?? 0
}
}()
components.append(classificationScore)

if let iou = motionBlur.iou {
components.append(iou)
}

let total = components.reduce(0, +)
let score: Float = components.isEmpty ? 0 : total / Float(components.count)

return score
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,8 @@ final class DocumentCaptureViewController: IdentityFlowViewController {

private let availableIDTypes: [String]

private let bestFramePicker: BestFramePicker

// MARK: Coordinators
let documentUploader: DocumentUploaderProtocol
let imageScanningSession: DocumentImageScanningSession
Expand All @@ -282,12 +284,14 @@ final class DocumentCaptureViewController: IdentityFlowViewController {
documentUploader: DocumentUploaderProtocol,
imageScanningSession: DocumentImageScanningSession,
sheetController: VerificationSheetControllerProtocol,
avaialableIDTypes: [String]
avaialableIDTypes: [String],
bestFramePicker: BestFramePicker = .init(window: 1.0)
) {
self.apiConfig = apiConfig
self.documentUploader = documentUploader
self.imageScanningSession = imageScanningSession
self.availableIDTypes = avaialableIDTypes
self.bestFramePicker = bestFramePicker
super.init(sheetController: sheetController, analyticsScreenName: .documentCapture)
imageScanningSession.setDelegate(delegate: self)
}
Expand All @@ -303,7 +307,8 @@ final class DocumentCaptureViewController: IdentityFlowViewController {
anyDocumentScanner: AnyDocumentScanner,
concurrencyManager: ImageScanningConcurrencyManagerProtocol? = nil,
appSettingsHelper: AppSettingsHelperProtocol = AppSettingsHelper.shared,
avaialableIDTypes: [String]
avaialableIDTypes: [String],
bestFramePicker: BestFramePicker = .init(window: 1.0)
) {
self.init(
apiConfig: apiConfig,
Expand All @@ -322,7 +327,8 @@ final class DocumentCaptureViewController: IdentityFlowViewController {
appSettingsHelper: appSettingsHelper
),
sheetController: sheetController,
avaialableIDTypes: avaialableIDTypes
avaialableIDTypes: avaialableIDTypes,
bestFramePicker: bestFramePicker
)
updateUI()
}
Expand Down Expand Up @@ -519,6 +525,8 @@ extension DocumentCaptureViewController: ImageScanningSessionDelegate {
_ scanningSession: DocumentImageScanningSession,
willStartScanningForClassification documentSide: DocumentSide
) {
// Reset best-frame window when a new side starts
bestFramePicker.reset()
// Focus the accessibility VoiceOver back onto the capture view
UIAccessibility.post(notification: .layoutChanged, argument: self.documentCaptureView)

Expand Down Expand Up @@ -579,21 +587,33 @@ extension DocumentCaptureViewController: ImageScanningSessionDelegate {
return
}

switch scannerOutput {
case .legacy(_, _, _, _, let blurResult):
documentUploader.uploadImages(
for: documentSide,
originalImage: image,
documentScannerOutput: scannerOutput,
exifMetadata: exifMetadata,
method: .autoCapture
)
sheetController?.analyticsClient.updateBlurScore(blurResult.variance, for: documentSide)
// Combine all detector outputs into a single quality score for ranking
let frameScore = scannerOutput.qualityScore(side: documentSide)

imageScanningSession.setStateScanned(
expectedClassification: documentSide,
capturedData: UIImage(cgImage: image)
)
switch bestFramePicker.consider(cgImage: image,
output: scannerOutput,
exif: exifMetadata,
score: frameScore) {
case .idle, .holding:
imageScanningSession.updateScanningState(scannerOutputOptional)
return
case .picked(let best):
switch best.output {
case .legacy(_, _, _, _, let blurResult):
documentUploader.uploadImages(
for: documentSide,
originalImage: best.cgImage,
documentScannerOutput: best.output,
exifMetadata: best.exif,
method: .autoCapture
)
sheetController?.analyticsClient.updateBlurScore(blurResult.variance, for: documentSide)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could be in a later PR but it might also be useful to add:

  • Number of frames we looked at
  • Individual scores
  • Final overall best score


imageScanningSession.setStateScanned(
expectedClassification: documentSide,
capturedData: UIImage(cgImage: best.cgImage)
)
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -802,7 +802,8 @@ extension DocumentCaptureViewControllerTest {
anyDocumentScanner: .init(mockDocumentScanner),
concurrencyManager: mockConcurrencyManager,
appSettingsHelper: mockAppSettingsHelper,
avaialableIDTypes: DocumentCaptureViewControllerTest.mockVerificationPage.documentSelect.idDocumentTypeAllowlistKeys
avaialableIDTypes: DocumentCaptureViewControllerTest.mockVerificationPage.documentSelect.idDocumentTypeAllowlistKeys,
bestFramePicker: .init(window: 0)
)
}

Expand Down