fix: almost ready for fine tuning

This commit is contained in:
Michael Freno
2026-01-16 17:43:44 -05:00
parent b447f140c0
commit c825ce16e2
5 changed files with 425 additions and 122 deletions

View File

@@ -62,4 +62,20 @@ enum EyeTrackingConstants: Sendable {
static let pixelGazeMinRatio: Double = 0.35 // Looking right threshold
static let pixelGazeMaxRatio: Double = 0.65 // Looking left threshold
static let pixelGazeEnabled: Bool = true
// MARK: - Screen Boundary Detection (New)
/// Forgiveness margin for the "gray area" around the screen edge.
/// 0.05 means the safe zone is extended by 5% of the range on each side.
/// If in the gray area, we assume the user is Looking Away (success).
static let boundaryForgivenessMargin: Double = 0.05
/// Distance sensitivity factor.
/// 1.0 = Linear scaling (face width 50% smaller -> eye movement expected to be 50% smaller)
/// > 1.0 = More aggressive scaling
static let distanceSensitivity: Double = 1.0
/// Minimum confidence required for a valid pupil detection before updating the gaze average.
/// Helps filter out blinks or noisy frames.
static let minimumGazeConfidence: Int = 3 // consecutive valid frames
}

View File

@@ -45,15 +45,15 @@ enum CalibrationStep: String, Codable, CaseIterable {
case .farLeft:
return "Look as far left as comfortable"
case .left:
return "Look to the left"
return "Look to the left edge of the screen"
case .farRight:
return "Look as far right as comfortable"
case .right:
return "Look to the right"
return "Look to the right edge of the screen"
case .up:
return "Look up"
return "Look to the top edge of the screen"
case .down:
return "Look down"
return "Look to the bottom edge of the screen"
case .topLeft:
return "Look to the top left corner"
case .topRight:
@@ -70,42 +70,75 @@ struct GazeSample: Codable {
let leftRatio: Double?
let rightRatio: Double?
let averageRatio: Double
let leftVerticalRatio: Double?
let rightVerticalRatio: Double?
let averageVerticalRatio: Double
let faceWidthRatio: Double? // For distance scaling (face width / image width)
let timestamp: Date
init(leftRatio: Double?, rightRatio: Double?) {
init(leftRatio: Double?, rightRatio: Double?, leftVerticalRatio: Double? = nil, rightVerticalRatio: Double? = nil, faceWidthRatio: Double? = nil) {
self.leftRatio = leftRatio
self.rightRatio = rightRatio
self.leftVerticalRatio = leftVerticalRatio
self.rightVerticalRatio = rightVerticalRatio
self.faceWidthRatio = faceWidthRatio
// Calculate average from available ratios
// Calculate average horizontal ratio
if let left = leftRatio, let right = rightRatio {
self.averageRatio = (left + right) / 2.0
} else {
self.averageRatio = leftRatio ?? rightRatio ?? 0.5
}
// Calculate average vertical ratio
if let left = leftVerticalRatio, let right = rightVerticalRatio {
self.averageVerticalRatio = (left + right) / 2.0
} else {
self.averageVerticalRatio = leftVerticalRatio ?? rightVerticalRatio ?? 0.5
}
self.timestamp = Date()
}
}
struct GazeThresholds: Codable {
let minLeftRatio: Double // Looking left threshold (e.g., 0.65)
let maxRightRatio: Double // Looking right threshold (e.g., 0.35)
let centerMin: Double // Center range minimum
let centerMax: Double // Center range maximum
// Horizontal Thresholds
let minLeftRatio: Double // Looking left ( value)
let maxRightRatio: Double // Looking right ( value)
// Vertical Thresholds
let minUpRatio: Double // Looking up ( value, typically < 0.5)
let maxDownRatio: Double // Looking down ( value, typically > 0.5)
// Screen Bounds (Calibration Zone)
// Defines the rectangle of pupil ratios that correspond to looking AT the screen
let screenLeftBound: Double
let screenRightBound: Double
let screenTopBound: Double
let screenBottomBound: Double
// Reference Data for Distance Scaling
let referenceFaceWidth: Double // Average face width during calibration
var isValid: Bool {
// Ensure thresholds don't overlap
return maxRightRatio < centerMin &&
centerMin < centerMax &&
centerMax < minLeftRatio
// Basic sanity checks
return maxRightRatio < minLeftRatio &&
minUpRatio < maxDownRatio &&
screenRightBound < screenLeftBound && // Assuming lower ratio = right
screenTopBound < screenBottomBound // Assuming lower ratio = up
}
static var defaultThresholds: GazeThresholds {
GazeThresholds(
minLeftRatio: 0.65,
maxRightRatio: 0.35,
centerMin: 0.40,
centerMax: 0.60
minUpRatio: 0.40,
maxDownRatio: 0.60,
screenLeftBound: 0.60,
screenRightBound: 0.40,
screenTopBound: 0.45,
screenBottomBound: 0.55,
referenceFaceWidth: 0.0 // 0.0 means unused/uncalibrated
)
}
}
@@ -137,62 +170,95 @@ struct CalibrationData: Codable {
func averageRatio(for step: CalibrationStep) -> Double? {
let stepSamples = getSamples(for: step)
guard !stepSamples.isEmpty else { return nil }
let sum = stepSamples.reduce(0.0) { $0 + $1.averageRatio }
return sum / Double(stepSamples.count)
return stepSamples.reduce(0.0) { $0 + $1.averageRatio } / Double(stepSamples.count)
}
func standardDeviation(for step: CalibrationStep) -> Double? {
func averageVerticalRatio(for step: CalibrationStep) -> Double? {
let stepSamples = getSamples(for: step)
guard stepSamples.count > 1, let mean = averageRatio(for: step) else { return nil }
let variance = stepSamples.reduce(0.0) { sum, sample in
let diff = sample.averageRatio - mean
return sum + (diff * diff)
} / Double(stepSamples.count - 1)
return sqrt(variance)
guard !stepSamples.isEmpty else { return nil }
return stepSamples.reduce(0.0) { $0 + $1.averageVerticalRatio } / Double(stepSamples.count)
}
func averageFaceWidth(for step: CalibrationStep) -> Double? {
let stepSamples = getSamples(for: step)
let validSamples = stepSamples.compactMap { $0.faceWidthRatio }
guard !validSamples.isEmpty else { return nil }
return validSamples.reduce(0.0, +) / Double(validSamples.count)
}
mutating func calculateThresholds() {
// Need at least center, left, and right samples
guard let centerMean = averageRatio(for: .center),
let leftMean = averageRatio(for: .left),
let rightMean = averageRatio(for: .right) else {
print("⚠️ Insufficient calibration data to calculate thresholds")
return
}
// We need Center, Left, Right, Up, Down samples for a full calibration
// Fallback: If corners (TopLeft, etc.) are available, use them to reinforce bounds
let centerStdDev = standardDeviation(for: .center) ?? 0.05
let centerH = averageRatio(for: .center) ?? 0.5
let centerV = averageVerticalRatio(for: .center) ?? 0.5
// Calculate center range (mean ± 0.5 * std_dev)
let centerMin = max(0.0, centerMean - 0.5 * centerStdDev)
let centerMax = min(1.0, centerMean + 0.5 * centerStdDev)
// 1. Horizontal Bounds
// If specific Left/Right steps missing, try corners
let leftH = averageRatio(for: .left) ?? averageRatio(for: .topLeft) ?? averageRatio(for: .bottomLeft) ?? (centerH + 0.15)
let rightH = averageRatio(for: .right) ?? averageRatio(for: .topRight) ?? averageRatio(for: .bottomRight) ?? (centerH - 0.15)
// Calculate left threshold (midpoint between center and left extremes)
let minLeftRatio = centerMax + (leftMean - centerMax) * 0.5
// 2. Vertical Bounds
let upV = averageVerticalRatio(for: .up) ?? averageVerticalRatio(for: .topLeft) ?? averageVerticalRatio(for: .topRight) ?? (centerV - 0.15)
let downV = averageVerticalRatio(for: .down) ?? averageVerticalRatio(for: .bottomLeft) ?? averageVerticalRatio(for: .bottomRight) ?? (centerV + 0.15)
// Calculate right threshold (midpoint between center and right extremes)
let maxRightRatio = centerMin - (centerMin - rightMean) * 0.5
// 3. Face Width Reference (average of all center samples)
let refFaceWidth = averageFaceWidth(for: .center) ?? 0.0
// Validate and adjust if needed
var thresholds = GazeThresholds(
minLeftRatio: min(0.95, max(0.55, minLeftRatio)),
maxRightRatio: max(0.05, min(0.45, maxRightRatio)),
centerMin: centerMin,
centerMax: centerMax
// 4. Compute Boundaries with Margin
// "Screen Bound" is exactly where the user looked.
// We set thresholds slightly BEYOND that to detect "Looking Away".
// Note: Assuming standard coordinates where:
// Horizontal: 0.0 (Right) -> 1.0 (Left)
// Vertical: 0.0 (Up) -> 1.0 (Down)
// Thresholds for "Looking Away"
// Looking Left = Ratio > Screen Left Edge
let lookLeftThreshold = leftH + 0.05
// Looking Right = Ratio < Screen Right Edge
let lookRightThreshold = rightH - 0.05
// Looking Up = Ratio < Screen Top Edge
let lookUpThreshold = upV - 0.05
// Looking Down = Ratio > Screen Bottom Edge
let lookDownThreshold = downV + 0.05
let thresholds = GazeThresholds(
minLeftRatio: lookLeftThreshold,
maxRightRatio: lookRightThreshold,
minUpRatio: lookUpThreshold,
maxDownRatio: lookDownThreshold,
screenLeftBound: leftH,
screenRightBound: rightH,
screenTopBound: upV,
screenBottomBound: downV,
referenceFaceWidth: refFaceWidth
)
// Ensure no overlap
if !thresholds.isValid {
print("⚠️ Computed thresholds overlap, using defaults")
thresholds = GazeThresholds.defaultThresholds
}
self.computedThresholds = thresholds
print("✓ Calibration thresholds calculated:")
print(" Left: \(String(format: "%.3f", thresholds.minLeftRatio))")
print(" Center: \(String(format: "%.3f", thresholds.centerMin))-\(String(format: "%.3f", thresholds.centerMax))")
print(" Right: \(String(format: "%.3f", thresholds.maxRightRatio))")
print(" H-Range: \(String(format: "%.3f", rightH)) to \(String(format: "%.3f", leftH))")
print(" V-Range: \(String(format: "%.3f", upV)) to \(String(format: "%.3f", downV))")
print(" Ref Face Width: \(String(format: "%.3f", refFaceWidth))")
}
}
/// Thread-safe storage for active calibration thresholds
/// Allows non-isolated code (video processing) to read thresholds without hitting MainActor
class CalibrationState: @unchecked Sendable {
static let shared = CalibrationState()
private let queue = DispatchQueue(label: "com.gaze.calibrationState", attributes: .concurrent)
private var _thresholds: GazeThresholds?
private var _isComplete: Bool = false
var thresholds: GazeThresholds? {
get { queue.sync { _thresholds } }
set { queue.async(flags: .barrier) { self._thresholds = newValue } }
}
var isComplete: Bool {
get { queue.sync { _isComplete } }
set { queue.async(flags: .barrier) { self._isComplete = newValue } }
}
}

View File

@@ -56,10 +56,16 @@ class CalibrationManager: ObservableObject {
calibrationData = CalibrationData()
}
func collectSample(leftRatio: Double?, rightRatio: Double?) {
func collectSample(leftRatio: Double?, rightRatio: Double?, leftVertical: Double? = nil, rightVertical: Double? = nil, faceWidthRatio: Double? = nil) {
guard isCalibrating, let step = currentStep else { return }
let sample = GazeSample(leftRatio: leftRatio, rightRatio: rightRatio)
let sample = GazeSample(
leftRatio: leftRatio,
rightRatio: rightRatio,
leftVerticalRatio: leftVertical,
rightVerticalRatio: rightVertical,
faceWidthRatio: faceWidthRatio
)
calibrationData.addSample(sample, for: step)
samplesCollected += 1
@@ -116,6 +122,10 @@ class CalibrationManager: ObservableObject {
currentStepIndex = 0
samplesCollected = 0
calibrationData = CalibrationData()
// Reset thread-safe state
CalibrationState.shared.isComplete = false
CalibrationState.shared.thresholds = nil
}
// MARK: - Persistence
@@ -157,6 +167,11 @@ class CalibrationManager: ObservableObject {
func clearCalibration() {
UserDefaults.standard.removeObject(forKey: userDefaultsKey)
calibrationData = CalibrationData()
// Reset thread-safe state
CalibrationState.shared.isComplete = false
CalibrationState.shared.thresholds = nil
print("🗑️ Calibration data cleared")
}
@@ -185,19 +200,22 @@ class CalibrationManager: ObservableObject {
// MARK: - Apply Calibration
private func applyCalibration() {
private func applyCalibration() {
guard let thresholds = calibrationData.computedThresholds else {
print("⚠️ No thresholds to apply")
return
}
// Note: EyeTrackingConstants are static properties that should not be modified.
// Any calibrated values should be used separately in the logic, not stored back to the constants.
// This is a placeholder for future implementation if dynamic threshold updates are needed.
// Push to thread-safe state for background processing
CalibrationState.shared.thresholds = thresholds
CalibrationState.shared.isComplete = true
print("✓ Applied calibrated thresholds:")
print(" Looking left: ≥\(String(format: "%.3f", thresholds.minLeftRatio))")
print(" Looking right: ≤\(String(format: "%.3f", thresholds.maxRightRatio))")
print(" Looking up: ≤\(String(format: "%.3f", thresholds.minUpRatio))")
print(" Looking down: ≥\(String(format: "%.3f", thresholds.maxDownRatio))")
print(" Screen Bounds: [\(String(format: "%.2f", thresholds.screenRightBound))..\(String(format: "%.2f", thresholds.screenLeftBound))] x [\(String(format: "%.2f", thresholds.screenTopBound))..\(String(format: "%.2f", thresholds.screenBottomBound))]")
}
// MARK: - Statistics
@@ -214,9 +232,9 @@ private func applyCalibration() {
var summary = "Calibrated: \(dateFormatter.string(from: calibrationData.calibrationDate))\n"
if let thresholds = calibrationData.computedThresholds {
summary += "Left threshold: \(String(format: "%.3f", thresholds.minLeftRatio))\n"
summary += "Right threshold: \(String(format: "%.3f", thresholds.maxRightRatio))\n"
summary += "Center range: \(String(format: "%.3f", thresholds.centerMin)) - \(String(format: "%.3f", thresholds.centerMax))"
summary += "H-Range: \(String(format: "%.3f", thresholds.screenRightBound)) to \(String(format: "%.3f", thresholds.screenLeftBound))\n"
summary += "V-Range: \(String(format: "%.3f", thresholds.screenTopBound)) to \(String(format: "%.3f", thresholds.screenBottomBound))\n"
summary += "Ref Face Width: \(String(format: "%.3f", thresholds.referenceFaceWidth))"
}
return summary

View File

@@ -470,19 +470,66 @@ class EyeTrackingService: NSObject, ObservableObject {
if let leftRatio = leftGazeRatio,
let rightRatio = rightGazeRatio
{
let faceWidth = face.boundingBox.width
Task { @MainActor in
if CalibrationManager.shared.isCalibrating {
CalibrationManager.shared.collectSample(
leftRatio: leftRatio,
rightRatio: rightRatio
rightRatio: rightRatio,
leftVertical: leftVerticalRatio,
rightVertical: rightVerticalRatio,
faceWidthRatio: faceWidth
)
}
}
let avgRatio = (leftRatio + rightRatio) / 2.0
let lookingRight = avgRatio <= EyeTrackingConstants.pixelGazeMinRatio
let lookingLeft = avgRatio >= EyeTrackingConstants.pixelGazeMaxRatio
eyesLookingAway = lookingRight || lookingLeft
let avgH = (leftRatio + rightRatio) / 2.0
// Use 0.5 as default for vertical if not available
let avgV = (leftVerticalRatio != nil && rightVerticalRatio != nil)
? (leftVerticalRatio! + rightVerticalRatio!) / 2.0
: 0.5
// Use Calibrated Thresholds from thread-safe state
if let thresholds = CalibrationState.shared.thresholds,
CalibrationState.shared.isComplete {
// 1. Distance Scaling
let currentFaceWidth = face.boundingBox.width
let refFaceWidth = thresholds.referenceFaceWidth
var distanceScale = 1.0
if refFaceWidth > 0 && currentFaceWidth > 0 {
distanceScale = refFaceWidth / currentFaceWidth
distanceScale = 1.0 + (distanceScale - 1.0) * EyeTrackingConstants.distanceSensitivity
}
// 2. Normalize Gaze
let centerH = (thresholds.screenLeftBound + thresholds.screenRightBound) / 2.0
let centerV = (thresholds.screenTopBound + thresholds.screenBottomBound) / 2.0
let deltaH = (avgH - centerH) * distanceScale
let deltaV = (avgV - centerV) * distanceScale
let normalizedH = centerH + deltaH
let normalizedV = centerV + deltaV
// 3. Boundary Check
let margin = EyeTrackingConstants.boundaryForgivenessMargin
let isLookingLeft = normalizedH > (thresholds.screenLeftBound + margin)
let isLookingRight = normalizedH < (thresholds.screenRightBound - margin)
let isLookingUp = normalizedV < (thresholds.screenTopBound - margin)
let isLookingDown = normalizedV > (thresholds.screenBottomBound + margin)
eyesLookingAway = isLookingLeft || isLookingRight || isLookingUp || isLookingDown
} else {
// Fallback to default constants
let lookingRight = avgH <= EyeTrackingConstants.pixelGazeMinRatio
let lookingLeft = avgH >= EyeTrackingConstants.pixelGazeMaxRatio
eyesLookingAway = lookingRight || lookingLeft
}
}
}
@@ -621,6 +668,8 @@ class EyeTrackingService: NSObject, ObservableObject {
{
var leftGazeRatio: Double? = nil
var rightGazeRatio: Double? = nil
var leftVerticalRatio: Double? = nil
var rightVerticalRatio: Double? = nil
// Detect left pupil (side = 0)
if let leftResult = PupilDetector.detectPupil(
@@ -634,6 +683,10 @@ class EyeTrackingService: NSObject, ObservableObject {
pupilPosition: leftResult.pupilPosition,
eyeRegion: leftResult.eyeRegion
)
leftVerticalRatio = calculateVerticalRatioSync(
pupilPosition: leftResult.pupilPosition,
eyeRegion: leftResult.eyeRegion
)
}
// Detect right pupil (side = 1)
@@ -648,6 +701,10 @@ class EyeTrackingService: NSObject, ObservableObject {
pupilPosition: rightResult.pupilPosition,
eyeRegion: rightResult.eyeRegion
)
rightVerticalRatio = calculateVerticalRatioSync(
pupilPosition: rightResult.pupilPosition,
eyeRegion: rightResult.eyeRegion
)
}
// CRITICAL: Connect to CalibrationManager
@@ -655,38 +712,115 @@ class EyeTrackingService: NSObject, ObservableObject {
let leftRatio = leftGazeRatio,
let rightRatio = rightGazeRatio
{
// Calculate face width ratio for distance estimation
let faceWidthRatio = face.boundingBox.width
CalibrationManager.shared.collectSample(
leftRatio: leftRatio,
rightRatio: rightRatio
rightRatio: rightRatio,
leftVertical: leftVerticalRatio,
rightVertical: rightVerticalRatio,
faceWidthRatio: faceWidthRatio
)
}
// Determine looking away using calibrated thresholds
if let leftRatio = leftGazeRatio, let rightRatio = rightGazeRatio {
let avgRatio = (leftRatio + rightRatio) / 2.0
let lookingRight = avgRatio <= EyeTrackingConstants.pixelGazeMinRatio
let lookingLeft = avgRatio >= EyeTrackingConstants.pixelGazeMaxRatio
eyesLookingAway = lookingRight || lookingLeft
if shouldLog {
print(
"👁️ PIXEL GAZE: L=\(String(format: "%.3f", leftRatio)) R=\(String(format: "%.3f", rightRatio)) Avg=\(String(format: "%.3f", avgRatio)) Away=\(eyesLookingAway)"
)
print(
" Thresholds: Min=\(String(format: "%.3f", EyeTrackingConstants.pixelGazeMinRatio)) Max=\(String(format: "%.3f", EyeTrackingConstants.pixelGazeMaxRatio))"
)
// Determine looking away using calibrated thresholds
if let leftRatio = leftGazeRatio, let rightRatio = rightGazeRatio {
let avgH = (leftRatio + rightRatio) / 2.0
// Use 0.5 as default for vertical if not available (though it should be)
let avgV = (leftVerticalRatio != nil && rightVerticalRatio != nil)
? (leftVerticalRatio! + rightVerticalRatio!) / 2.0
: 0.5
// Use Calibrated Thresholds if available
// Use thread-safe state instead of accessing CalibrationManager.shared (MainActor)
if let thresholds = CalibrationState.shared.thresholds,
CalibrationState.shared.isComplete {
// 1. Distance Scaling
// If current face is SMALLER than reference, user is FURTHER away.
// Eyes move LESS for same screen angle. We need to SCALE UP the deviation.
let currentFaceWidth = face.boundingBox.width
let refFaceWidth = thresholds.referenceFaceWidth
var distanceScale = 1.0
if refFaceWidth > 0 && currentFaceWidth > 0 {
// Simple linear scaling: scale = ref / current
// e.g. Ref=0.5, Current=0.25 (further) -> Scale=2.0
distanceScale = refFaceWidth / currentFaceWidth
// Apply sensitivity tuning
distanceScale = 1.0 + (distanceScale - 1.0) * EyeTrackingConstants.distanceSensitivity
}
// 2. Normalize Gaze (Center Relative)
// We assume ~0.5 is center. We scale the delta from 0.5.
// Note: This is an approximation. A better way uses the calibrated center.
let centerH = (thresholds.screenLeftBound + thresholds.screenRightBound) / 2.0
let centerV = (thresholds.screenTopBound + thresholds.screenBottomBound) / 2.0
let deltaH = (avgH - centerH) * distanceScale
let deltaV = (avgV - centerV) * distanceScale
let normalizedH = centerH + deltaH
let normalizedV = centerV + deltaV
// 3. Boundary Check with Margin
// "Forgiveness" expands the safe zone (screen bounds).
// If you are IN the margin, you are considered ON SCREEN (Safe).
// Looking Away means passing the (Bound + Margin).
let margin = EyeTrackingConstants.boundaryForgivenessMargin
// Check Left (Higher Ratio)
// Screen Left is e.g. 0.7. Looking Left > 0.7.
// To look away, must exceed (0.7 + margin).
let isLookingLeft = normalizedH > (thresholds.screenLeftBound + margin)
// Check Right (Lower Ratio)
// Screen Right is e.g. 0.3. Looking Right < 0.3.
// To look away, must be less than (0.3 - margin).
let isLookingRight = normalizedH < (thresholds.screenRightBound - margin)
// Check Up (Lower Ratio, usually)
let isLookingUp = normalizedV < (thresholds.screenTopBound - margin)
// Check Down (Higher Ratio, usually)
let isLookingDown = normalizedV > (thresholds.screenBottomBound + margin)
eyesLookingAway = isLookingLeft || isLookingRight || isLookingUp || isLookingDown
if shouldLog {
print("👁️ CALIBRATED GAZE: AvgH=\(String(format: "%.2f", avgH)) AvgV=\(String(format: "%.2f", avgV)) DistScale=\(String(format: "%.2f", distanceScale))")
print(" NormH=\(String(format: "%.2f", normalizedH)) NormV=\(String(format: "%.2f", normalizedV)) Away=\(eyesLookingAway)")
print(" Bounds: H[\(String(format: "%.2f", thresholds.screenRightBound))-\(String(format: "%.2f", thresholds.screenLeftBound))] V[\(String(format: "%.2f", thresholds.screenTopBound))-\(String(format: "%.2f", thresholds.screenBottomBound))]")
}
} else {
// Fallback to default constants
let lookingRight = avgH <= EyeTrackingConstants.pixelGazeMinRatio
let lookingLeft = avgH >= EyeTrackingConstants.pixelGazeMaxRatio
eyesLookingAway = lookingRight || lookingLeft
}
// Update debug values
Task { @MainActor in
debugLeftPupilRatio = leftGazeRatio
debugRightPupilRatio = rightGazeRatio
debugLeftVerticalRatio = leftVerticalRatio
debugRightVerticalRatio = rightVerticalRatio
}
if shouldLog && !CalibrationState.shared.isComplete {
print(
"👁️ RAW GAZE: L=\(String(format: "%.3f", leftRatio)) R=\(String(format: "%.3f", rightRatio)) Avg=\(String(format: "%.3f", avgH)) Away=\(eyesLookingAway)"
)
}
} else {
if shouldLog {
print("⚠️ Pixel pupil detection failed for one or both eyes")
}
}
// Update debug values
Task { @MainActor in
debugLeftPupilRatio = leftGazeRatio
debugRightPupilRatio = rightGazeRatio
}
} else {
if shouldLog {
if pixelBuffer == nil {

View File

@@ -746,11 +746,24 @@ final class PupilDetector: @unchecked Sendable {
) {
let size = width * height
guard size > 0 else { return }
// SIMPLIFIED: Skip blur to avoid contaminating dark pupil pixels with bright mask pixels
// Apply binary threshold directly to input
// 1. Apply Gaussian Blur (reduces noise)
// We reuse tempBuffer for intermediate steps if available, or just output
// Note: gaussianBlurOptimized writes from input -> output
gaussianBlurOptimized(input: input, output: output, width: width, height: height)
// 2. Apply Erosion (expands dark regions)
// Python: cv2.erode(kernel, iterations=3)
// This helps connect broken parts of the pupil
// Note: erodeOptimized processes in-place on output if input==output
erodeOptimized(input: output, output: output, width: width, height: height, iterations: 3)
// 3. Binary Threshold
for i in 0..<size {
output[i] = input[i] > UInt8(threshold) ? 255 : 0
// Python: cv2.threshold(..., cv2.THRESH_BINARY)
// Pixels > threshold become 255 (white), others 0 (black)
// So Pupil is BLACK (0)
output[i] = output[i] > UInt8(threshold) ? 255 : 0
}
}
@@ -851,47 +864,103 @@ final class PupilDetector: @unchecked Sendable {
// MARK: - Optimized Contour Detection
/// Optimized centroid-of-dark-pixels approach - much faster than union-find
/// Returns the centroid of the largest dark region
/// Finds the largest connected component of dark pixels and returns its centroid
/// This is much more robust than averaging all dark pixels, as it ignores shadows/noise
private nonisolated static func findPupilFromContoursOptimized(
data: UnsafePointer<UInt8>,
width: Int,
height: Int
) -> (x: Double, y: Double)? {
// Optimized approach: find centroid of all black pixels
// This works well for pupil detection since the pupil is the main dark blob
var sumX: Int = 0
var sumY: Int = 0
var count: Int = 0
// After binary thresholding, pixels are 0 (black/pupil) or 255 (white/background)
// Use threshold of 128 to catch any pixels that are closer to black
let threshold = UInt8(128)
// Process entire image to get accurate centroid
let size = width * height
// 1. Threshold pass: Identify all dark pixels (0)
// We use a visited array to track processed pixels for flood fill
// Using a flat bool array for performance
var visited = [Bool](repeating: false, count: size)
var maxBlobSize = 0
var maxBlobSumX = 0
var maxBlobSumY = 0
// 2. Iterate through pixels to find connected components
for y in 0..<height {
let rowOffset = y * width
for x in 0..<width {
if data[rowOffset + x] < threshold {
sumX += x
sumY += y
count += 1
let idx = rowOffset + x
// If it's a dark pixel (0) and not visited, start a flood fill
if data[idx] == 0 && !visited[idx] {
var currentBlobSize = 0
var currentBlobSumX = 0
var currentBlobSumY = 0
// Stack for DFS/BFS (using array as stack is fast in Swift)
var stack: [Int] = [idx]
visited[idx] = true
while let currentIdx = stack.popLast() {
let cx = currentIdx % width
let cy = currentIdx / width
currentBlobSize += 1
currentBlobSumX += cx
currentBlobSumY += cy
// Check 4 neighbors
// Right
if cx + 1 < width {
let nIdx = currentIdx + 1
if data[nIdx] == 0 && !visited[nIdx] {
visited[nIdx] = true
stack.append(nIdx)
}
}
// Left
if cx - 1 >= 0 {
let nIdx = currentIdx - 1
if data[nIdx] == 0 && !visited[nIdx] {
visited[nIdx] = true
stack.append(nIdx)
}
}
// Down
if cy + 1 < height {
let nIdx = currentIdx + width
if data[nIdx] == 0 && !visited[nIdx] {
visited[nIdx] = true
stack.append(nIdx)
}
}
// Up
if cy - 1 >= 0 {
let nIdx = currentIdx - width
if data[nIdx] == 0 && !visited[nIdx] {
visited[nIdx] = true
stack.append(nIdx)
}
}
}
// Check if this is the largest blob so far
if currentBlobSize > maxBlobSize {
maxBlobSize = currentBlobSize
maxBlobSumX = currentBlobSumX
maxBlobSumY = currentBlobSumY
}
}
}
}
if enableDiagnosticLogging && count < 5 {
logDebug("👁 PupilDetector: Dark pixel count = \(count) (need >= 5)")
if enableDiagnosticLogging && maxBlobSize < 5 {
logDebug("👁 PupilDetector: Largest blob size = \(maxBlobSize) (need >= 5)")
}
// Minimum 5 pixels for valid pupil (reduced from 10 for small eye regions)
guard count >= 5 else { return nil }
// Minimum 5 pixels for valid pupil
guard maxBlobSize >= 5 else { return nil }
return (
x: Double(sumX) / Double(count),
y: Double(sumY) / Double(count)
x: Double(maxBlobSumX) / Double(maxBlobSize),
y: Double(maxBlobSumY) / Double(maxBlobSize)
)
}