general: implementation of what will be needed - perf tuning needed

This commit is contained in:
Michael Freno
2026-01-15 01:22:18 -05:00
parent f7eb6dfe34
commit 67d52cc333
7 changed files with 1328 additions and 413 deletions

View File

@@ -54,6 +54,16 @@ class EyeTrackingConstants: ObservableObject {
@Published var maxPupilRatio: Double = 0.45 @Published var maxPupilRatio: Double = 0.45
@Published var maxPupilEnabled: Bool = true @Published var maxPupilEnabled: Bool = true
// MARK: - Pixel-Based Gaze Detection Thresholds
/// Python GazeTracking thresholds for pixel-based pupil detection
/// Formula: pupilX / (eyeCenterX * 2 - 10)
/// Looking right: ratio 0.35
/// Looking center: 0.35 < ratio < 0.65
/// Looking left: ratio 0.65
@Published var pixelGazeMinRatio: Double = 0.35 // Looking right threshold
@Published var pixelGazeMaxRatio: Double = 0.65 // Looking left threshold
@Published var pixelGazeEnabled: Bool = true
private init() {} private init() {}
// MARK: - Reset to Defaults // MARK: - Reset to Defaults

View File

@@ -0,0 +1,198 @@
//
// CalibrationData.swift
// Gaze
//
// Created by Mike Freno on 1/15/26.
//
import Foundation
// MARK: - Calibration Models
enum CalibrationStep: String, Codable, CaseIterable {
case center
case farLeft
case left
case farRight
case right
case up
case down
case topLeft
case topRight
case bottomLeft
case bottomRight
var displayName: String {
switch self {
case .center: return "Center"
case .farLeft: return "Far Left"
case .left: return "Left"
case .farRight: return "Far Right"
case .right: return "Right"
case .up: return "Up"
case .down: return "Down"
case .topLeft: return "Top Left"
case .topRight: return "Top Right"
case .bottomLeft: return "Bottom Left"
case .bottomRight: return "Bottom Right"
}
}
var instructionText: String {
switch self {
case .center:
return "Look at the center of the screen"
case .farLeft:
return "Look as far left as comfortable"
case .left:
return "Look to the left"
case .farRight:
return "Look as far right as comfortable"
case .right:
return "Look to the right"
case .up:
return "Look up"
case .down:
return "Look down"
case .topLeft:
return "Look to the top left corner"
case .topRight:
return "Look to the top right corner"
case .bottomLeft:
return "Look to the bottom left corner"
case .bottomRight:
return "Look to the bottom right corner"
}
}
}
struct GazeSample: Codable {
let leftRatio: Double?
let rightRatio: Double?
let averageRatio: Double
let timestamp: Date
init(leftRatio: Double?, rightRatio: Double?) {
self.leftRatio = leftRatio
self.rightRatio = rightRatio
// Calculate average from available ratios
if let left = leftRatio, let right = rightRatio {
self.averageRatio = (left + right) / 2.0
} else {
self.averageRatio = leftRatio ?? rightRatio ?? 0.5
}
self.timestamp = Date()
}
}
struct GazeThresholds: Codable {
let minLeftRatio: Double // Looking left threshold (e.g., 0.65)
let maxRightRatio: Double // Looking right threshold (e.g., 0.35)
let centerMin: Double // Center range minimum
let centerMax: Double // Center range maximum
var isValid: Bool {
// Ensure thresholds don't overlap
return maxRightRatio < centerMin &&
centerMin < centerMax &&
centerMax < minLeftRatio
}
static var defaultThresholds: GazeThresholds {
GazeThresholds(
minLeftRatio: 0.65,
maxRightRatio: 0.35,
centerMin: 0.40,
centerMax: 0.60
)
}
}
struct CalibrationData: Codable {
var samples: [CalibrationStep: [GazeSample]]
var computedThresholds: GazeThresholds?
var calibrationDate: Date
var isComplete: Bool
init() {
self.samples = [:]
self.computedThresholds = nil
self.calibrationDate = Date()
self.isComplete = false
}
mutating func addSample(_ sample: GazeSample, for step: CalibrationStep) {
if samples[step] == nil {
samples[step] = []
}
samples[step]?.append(sample)
}
func getSamples(for step: CalibrationStep) -> [GazeSample] {
return samples[step] ?? []
}
func averageRatio(for step: CalibrationStep) -> Double? {
let stepSamples = getSamples(for: step)
guard !stepSamples.isEmpty else { return nil }
let sum = stepSamples.reduce(0.0) { $0 + $1.averageRatio }
return sum / Double(stepSamples.count)
}
func standardDeviation(for step: CalibrationStep) -> Double? {
let stepSamples = getSamples(for: step)
guard stepSamples.count > 1, let mean = averageRatio(for: step) else { return nil }
let variance = stepSamples.reduce(0.0) { sum, sample in
let diff = sample.averageRatio - mean
return sum + (diff * diff)
} / Double(stepSamples.count - 1)
return sqrt(variance)
}
mutating func calculateThresholds() {
// Need at least center, left, and right samples
guard let centerMean = averageRatio(for: .center),
let leftMean = averageRatio(for: .left),
let rightMean = averageRatio(for: .right) else {
print("⚠️ Insufficient calibration data to calculate thresholds")
return
}
let centerStdDev = standardDeviation(for: .center) ?? 0.05
// Calculate center range (mean ± 0.5 * std_dev)
let centerMin = max(0.0, centerMean - 0.5 * centerStdDev)
let centerMax = min(1.0, centerMean + 0.5 * centerStdDev)
// Calculate left threshold (midpoint between center and left extremes)
let minLeftRatio = centerMax + (leftMean - centerMax) * 0.5
// Calculate right threshold (midpoint between center and right extremes)
let maxRightRatio = centerMin - (centerMin - rightMean) * 0.5
// Validate and adjust if needed
var thresholds = GazeThresholds(
minLeftRatio: min(0.95, max(0.55, minLeftRatio)),
maxRightRatio: max(0.05, min(0.45, maxRightRatio)),
centerMin: centerMin,
centerMax: centerMax
)
// Ensure no overlap
if !thresholds.isValid {
print("⚠️ Computed thresholds overlap, using defaults")
thresholds = GazeThresholds.defaultThresholds
}
self.computedThresholds = thresholds
print("✓ Calibration thresholds calculated:")
print(" Left: ≥\(String(format: "%.3f", thresholds.minLeftRatio))")
print(" Center: \(String(format: "%.3f", thresholds.centerMin))-\(String(format: "%.3f", thresholds.centerMax))")
print(" Right: ≤\(String(format: "%.3f", thresholds.maxRightRatio))")
}
}

View File

@@ -0,0 +1,237 @@
//
// CalibrationManager.swift
// Gaze
//
// Created by Mike Freno on 1/15/26.
//
import Foundation
import Combine
@MainActor
class CalibrationManager: ObservableObject {
static let shared = CalibrationManager()
// MARK: - Published Properties
@Published var isCalibrating = false
@Published var currentStep: CalibrationStep?
@Published var currentStepIndex = 0
@Published var samplesCollected = 0
@Published var calibrationData = CalibrationData()
// MARK: - Configuration
private let samplesPerStep = 20 // Collect 20 samples per calibration point (~1 second at 30fps)
private let userDefaultsKey = "eyeTrackingCalibration"
private let calibrationValidityDays = 30
// Calibration sequence (9 steps)
private let calibrationSteps: [CalibrationStep] = [
.center,
.left,
.right,
.farLeft,
.farRight,
.up,
.down,
.topLeft,
.topRight
]
// MARK: - Initialization
private init() {
loadCalibration()
}
// MARK: - Calibration Flow
func startCalibration() {
print("🎯 Starting calibration...")
isCalibrating = true
currentStepIndex = 0
currentStep = calibrationSteps[0]
samplesCollected = 0
calibrationData = CalibrationData()
}
func collectSample(leftRatio: Double?, rightRatio: Double?) {
guard isCalibrating, let step = currentStep else { return }
let sample = GazeSample(leftRatio: leftRatio, rightRatio: rightRatio)
calibrationData.addSample(sample, for: step)
samplesCollected += 1
// Move to next step when enough samples collected
if samplesCollected >= samplesPerStep {
advanceToNextStep()
}
}
private func advanceToNextStep() {
currentStepIndex += 1
if currentStepIndex < calibrationSteps.count {
// Move to next calibration point
currentStep = calibrationSteps[currentStepIndex]
samplesCollected = 0
print("📍 Calibration step: \(currentStep?.displayName ?? "unknown")")
} else {
// All steps complete
finishCalibration()
}
}
func skipStep() {
// Allow skipping optional steps (up, down, diagonals)
guard isCalibrating, let step = currentStep else { return }
print("⏭️ Skipping calibration step: \(step.displayName)")
advanceToNextStep()
}
func finishCalibration() {
print("✓ Calibration complete, calculating thresholds...")
calibrationData.calculateThresholds()
calibrationData.isComplete = true
calibrationData.calibrationDate = Date()
saveCalibration()
applyCalibration()
isCalibrating = false
currentStep = nil
currentStepIndex = 0
samplesCollected = 0
print("✓ Calibration saved and applied")
}
func cancelCalibration() {
print("❌ Calibration cancelled")
isCalibrating = false
currentStep = nil
currentStepIndex = 0
samplesCollected = 0
calibrationData = CalibrationData()
}
// MARK: - Persistence
private func saveCalibration() {
do {
let encoder = JSONEncoder()
encoder.dateEncodingStrategy = .iso8601
let data = try encoder.encode(calibrationData)
UserDefaults.standard.set(data, forKey: userDefaultsKey)
print("💾 Calibration data saved to UserDefaults")
} catch {
print("❌ Failed to save calibration: \(error)")
}
}
func loadCalibration() {
guard let data = UserDefaults.standard.data(forKey: userDefaultsKey) else {
print(" No existing calibration found")
return
}
do {
let decoder = JSONDecoder()
decoder.dateDecodingStrategy = .iso8601
calibrationData = try decoder.decode(CalibrationData.self, from: data)
if isCalibrationValid() {
print("✓ Loaded valid calibration from \(calibrationData.calibrationDate)")
applyCalibration()
} else {
print("⚠️ Calibration expired, needs recalibration")
}
} catch {
print("❌ Failed to load calibration: \(error)")
}
}
func clearCalibration() {
UserDefaults.standard.removeObject(forKey: userDefaultsKey)
calibrationData = CalibrationData()
print("🗑️ Calibration data cleared")
}
// MARK: - Validation
func isCalibrationValid() -> Bool {
guard calibrationData.isComplete,
let thresholds = calibrationData.computedThresholds,
thresholds.isValid else {
return false
}
// Check if calibration is not too old
let daysSinceCalibration = Calendar.current.dateComponents(
[.day],
from: calibrationData.calibrationDate,
to: Date()
).day ?? 0
return daysSinceCalibration < calibrationValidityDays
}
func needsRecalibration() -> Bool {
return !isCalibrationValid()
}
// MARK: - Apply Calibration
private func applyCalibration() {
guard let thresholds = calibrationData.computedThresholds else {
print("⚠️ No thresholds to apply")
return
}
let constants = EyeTrackingConstants.shared
constants.pixelGazeMinRatio = thresholds.minLeftRatio
constants.pixelGazeMaxRatio = thresholds.maxRightRatio
print("✓ Applied calibrated thresholds:")
print(" Looking left: ≥\(String(format: "%.3f", thresholds.minLeftRatio))")
print(" Looking right: ≤\(String(format: "%.3f", thresholds.maxRightRatio))")
}
// MARK: - Statistics
func getCalibrationSummary() -> String {
guard calibrationData.isComplete else {
return "No calibration data"
}
let dateFormatter = DateFormatter()
dateFormatter.dateStyle = .medium
dateFormatter.timeStyle = .short
var summary = "Calibrated: \(dateFormatter.string(from: calibrationData.calibrationDate))\n"
if let thresholds = calibrationData.computedThresholds {
summary += "Left threshold: \(String(format: "%.3f", thresholds.minLeftRatio))\n"
summary += "Right threshold: \(String(format: "%.3f", thresholds.maxRightRatio))\n"
summary += "Center range: \(String(format: "%.3f", thresholds.centerMin)) - \(String(format: "%.3f", thresholds.centerMax))"
}
return summary
}
// MARK: - Progress
var progress: Double {
let totalSteps = calibrationSteps.count
let completedSteps = currentStepIndex
let currentProgress = Double(samplesCollected) / Double(samplesPerStep)
return (Double(completedSteps) + currentProgress) / Double(totalSteps)
}
var progressText: String {
"\(currentStepIndex + 1) of \(calibrationSteps.count)"
}
}

View File

@@ -127,7 +127,7 @@ class EyeTrackingService: NSObject, ObservableObject {
self.videoOutput = output self.videoOutput = output
} }
private func processFaceObservations(_ observations: [VNFaceObservation]?, imageSize: CGSize) { private func processFaceObservations(_ observations: [VNFaceObservation]?, imageSize: CGSize, pixelBuffer: CVPixelBuffer? = nil) {
guard let observations = observations, !observations.isEmpty else { guard let observations = observations, !observations.isEmpty else {
faceDetected = false faceDetected = false
userLookingAtScreen = false userLookingAtScreen = false
@@ -171,11 +171,13 @@ class EyeTrackingService: NSObject, ObservableObject {
face: face, face: face,
landmarks: landmarks, landmarks: landmarks,
imageSize: imageSize, imageSize: imageSize,
pixelBuffer: pixelBuffer,
shouldLog: enableDebugLogging shouldLog: enableDebugLogging
) )
userLookingAtScreen = !lookingAway userLookingAtScreen = !lookingAway
} }
private func detectEyesClosed( private func detectEyesClosed(
leftEye: VNFaceLandmarkRegion2D, rightEye: VNFaceLandmarkRegion2D, shouldLog: Bool leftEye: VNFaceLandmarkRegion2D, rightEye: VNFaceLandmarkRegion2D, shouldLog: Bool
) -> Bool { ) -> Bool {
@@ -214,7 +216,7 @@ class EyeTrackingService: NSObject, ObservableObject {
} }
private func detectLookingAway( private func detectLookingAway(
face: VNFaceObservation, landmarks: VNFaceLandmarks2D, imageSize: CGSize, shouldLog: Bool face: VNFaceObservation, landmarks: VNFaceLandmarks2D, imageSize: CGSize, pixelBuffer: CVPixelBuffer?, shouldLog: Bool
) -> Bool { ) -> Bool {
let constants = EyeTrackingConstants.shared let constants = EyeTrackingConstants.shared
@@ -266,99 +268,86 @@ class EyeTrackingService: NSObject, ObservableObject {
} }
} }
// 2. Eye Gaze Check (Pupil Position) // 2. Eye Gaze Check (Pixel-Based Pupil Detection)
var eyesLookingAway = false var eyesLookingAway = false
if let leftEye = landmarks.leftEye, if let pixelBuffer = pixelBuffer,
let leftEye = landmarks.leftEye,
let rightEye = landmarks.rightEye, let rightEye = landmarks.rightEye,
let leftPupil = landmarks.leftPupil, constants.pixelGazeEnabled
let rightPupil = landmarks.rightPupil
{ {
var leftGazeRatio: Double? = nil
var rightGazeRatio: Double? = nil
// NEW: Use inter-eye distance method // Detect left pupil (side = 0)
let gazeOffsets = calculateGazeUsingInterEyeDistance( if let leftResult = PupilDetector.detectPupil(
leftEye: leftEye, in: pixelBuffer,
rightEye: rightEye, eyeLandmarks: leftEye,
leftPupil: leftPupil, faceBoundingBox: face.boundingBox,
rightPupil: rightPupil,
imageSize: imageSize, imageSize: imageSize,
faceBoundingBox: face.boundingBox side: 0
) ) {
leftGazeRatio = calculateGazeRatio(
let leftRatio = calculatePupilHorizontalRatio( pupilPosition: leftResult.pupilPosition,
eye: leftEye, eyeRegion: leftResult.eyeRegion
pupil: leftPupil,
imageSize: imageSize,
faceBoundingBox: face.boundingBox
)
let rightRatio = calculatePupilHorizontalRatio(
eye: rightEye,
pupil: rightPupil,
imageSize: imageSize,
faceBoundingBox: face.boundingBox
)
// Debug logging
if shouldLog {
print(
"👁️ Pupil Ratios (OLD METHOD) - Left: \(String(format: "%.3f", leftRatio)), Right: \(String(format: "%.3f", rightRatio))"
)
print(
"👁️ Gaze Offsets (NEW METHOD) - Left: \(String(format: "%.3f", gazeOffsets.leftGaze)), Right: \(String(format: "%.3f", gazeOffsets.rightGaze))"
)
print(
"👁️ Thresholds - Min: \(constants.minPupilRatio), Max: \(constants.maxPupilRatio)"
) )
} }
// Detect right pupil (side = 1)
if let rightResult = PupilDetector.detectPupil(
in: pixelBuffer,
eyeLandmarks: rightEye,
faceBoundingBox: face.boundingBox,
imageSize: imageSize,
side: 1
) {
rightGazeRatio = calculateGazeRatio(
pupilPosition: rightResult.pupilPosition,
eyeRegion: rightResult.eyeRegion
)
}
// CRITICAL: Connect to CalibrationManager
if CalibrationManager.shared.isCalibrating,
let leftRatio = leftGazeRatio,
let rightRatio = rightGazeRatio {
CalibrationManager.shared.collectSample(
leftRatio: leftRatio,
rightRatio: rightRatio
)
}
// Determine looking away using calibrated thresholds
if let leftRatio = leftGazeRatio, let rightRatio = rightGazeRatio {
let avgRatio = (leftRatio + rightRatio) / 2.0
let lookingRight = avgRatio <= constants.pixelGazeMinRatio
let lookingLeft = avgRatio >= constants.pixelGazeMaxRatio
eyesLookingAway = lookingRight || lookingLeft
if shouldLog {
print("👁️ PIXEL GAZE: L=\(String(format: "%.3f", leftRatio)) R=\(String(format: "%.3f", rightRatio)) Avg=\(String(format: "%.3f", avgRatio)) Away=\(eyesLookingAway)")
print(" Thresholds: Min=\(String(format: "%.3f", constants.pixelGazeMinRatio)) Max=\(String(format: "%.3f", constants.pixelGazeMaxRatio))")
}
} else {
if shouldLog {
print("⚠️ Pixel pupil detection failed for one or both eyes")
}
}
// Update debug values // Update debug values
Task { @MainActor in Task { @MainActor in
debugLeftPupilRatio = leftRatio debugLeftPupilRatio = leftGazeRatio
debugRightPupilRatio = rightRatio debugRightPupilRatio = rightGazeRatio
}
// Normal range for "looking center" is roughly 0.3 to 0.7
// (0.0 = extreme right, 1.0 = extreme left relative to face)
// Note: Camera is mirrored, so logic might be inverted
var leftLookingAway = false
var rightLookingAway = false
// Check min pupil ratio if enabled
/*if constants.minPupilEnabled {*/
/*let minRatio = constants.minPupilRatio*/
/*if leftRatio < minRatio {*/
/*leftLookingAway = true*/
/*}*/
/*if rightRatio < minRatio {*/
/*rightLookingAway = true*/
/*}*/
/*}*/
/*// Check max pupil ratio if enabled*/
/*if constants.maxPupilEnabled {*/
/*let maxRatio = constants.maxPupilRatio*/
/*if leftRatio > maxRatio {*/
/*leftLookingAway = true*/
/*}*/
/*if rightRatio > maxRatio {*/
/*rightLookingAway = true*/
/*}*/
/*}*/
// Consider looking away if EITHER eye is off-center
// Changed from AND to OR logic because requiring both eyes makes detection too restrictive
// This is more sensitive but also more reliable for detecting actual looking away
eyesLookingAway = leftLookingAway || rightLookingAway
if shouldLog {
print(
"👁️ Looking Away - Left: \(leftLookingAway), Right: \(rightLookingAway), Either: \(eyesLookingAway)"
)
} }
} else { } else {
if shouldLog { if shouldLog {
print("👁️ Missing pupil or eye landmarks!") if pixelBuffer == nil {
print("⚠️ No pixel buffer available for pupil detection")
} else if !constants.pixelGazeEnabled {
print("⚠️ Pixel gaze detection disabled in constants")
} else {
print("⚠️ Missing eye landmarks for pupil detection")
}
} }
} }
@@ -366,228 +355,32 @@ class EyeTrackingService: NSObject, ObservableObject {
return isLookingAway return isLookingAway
} }
private func calculatePupilHorizontalRatio(
eye: VNFaceLandmarkRegion2D,
pupil: VNFaceLandmarkRegion2D,
imageSize: CGSize,
faceBoundingBox: CGRect
) -> Double {
// Use normalizedPoints which are already normalized to face bounding box
let eyePoints = eye.normalizedPoints
let pupilPoints = pupil.normalizedPoints
// Throttle debug logging to every 0.5 seconds
let now = Date()
let shouldLog = now.timeIntervalSince(lastDebugLogTime) >= 0.5
if shouldLog {
lastDebugLogTime = now
print("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
print("📊 EYE TRACKING DEBUG DATA")
print("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
print("\n🖼️ IMAGE SIZE:")
print(" Width: \(imageSize.width), Height: \(imageSize.height)")
print("\n📦 FACE BOUNDING BOX (normalized):")
print(" Origin: (\(faceBoundingBox.origin.x), \(faceBoundingBox.origin.y))")
print(" Size: (\(faceBoundingBox.size.width), \(faceBoundingBox.size.height))")
print("\n👁️ EYE LANDMARK POINTS (normalized to face bounding box - from Vision):")
print(" Count: \(eyePoints.count)")
let eyeMinX = eyePoints.min(by: { $0.x < $1.x })?.x ?? 0
let eyeMaxX = eyePoints.max(by: { $0.x < $1.x })?.x ?? 0
for (index, point) in eyePoints.enumerated() {
var marker = ""
if abs(point.x - eyeMinX) < 0.0001 {
marker = " ← LEFTMOST (inner corner)"
} else if abs(point.x - eyeMaxX) < 0.0001 {
marker = " ← RIGHTMOST (outer corner)"
}
if index == 0 {
marker += " [FIRST]"
} else if index == eyePoints.count - 1 {
marker += " [LAST]"
}
print(
" [\(index)]: (\(String(format: "%.4f", point.x)), \(String(format: "%.4f", point.y)))\(marker)"
)
}
print("\n👁️ PUPIL LANDMARK POINTS (normalized to face bounding box - from Vision):")
print(" Count: \(pupilPoints.count)")
for (index, point) in pupilPoints.enumerated() {
print(
" [\(index)]: (\(String(format: "%.4f", point.x)), \(String(format: "%.4f", point.y)))"
)
}
if let minPoint = eyePoints.min(by: { $0.x < $1.x }),
let maxPoint = eyePoints.max(by: { $0.x < $1.x })
{
let eyeMinX = minPoint.x
let eyeMaxX = maxPoint.x
let eyeWidth = eyeMaxX - eyeMinX
let pupilCenterX = pupilPoints.map { $0.x }.reduce(0, +) / Double(pupilPoints.count)
let ratio = (pupilCenterX - eyeMinX) / eyeWidth
print("\n📏 CALCULATIONS:")
print(" Eye MinX: \(String(format: "%.4f", eyeMinX))")
print(" Eye MaxX: \(String(format: "%.4f", eyeMaxX))")
print(" Eye Width: \(String(format: "%.4f", eyeWidth))")
// Analyze different point pairs to find better eye width
if eyePoints.count >= 6 {
let cornerWidth = eyePoints[5].x - eyePoints[0].x
print(" Corner-to-Corner Width [0→5]: \(String(format: "%.4f", cornerWidth))")
// Try middle points too
if eyePoints.count >= 4 {
let midWidth = eyePoints[3].x - eyePoints[0].x
print(" Point [0→3] Width: \(String(format: "%.4f", midWidth))")
}
}
print(" Pupil Center X: \(String(format: "%.4f", pupilCenterX))")
print(" Pupil Min X: \(String(format: "%.4f", pupilPoints.min(by: { $0.x < $1.x })?.x ?? 0))")
print(" Pupil Max X: \(String(format: "%.4f", pupilPoints.max(by: { $0.x < $1.x })?.x ?? 0))")
print(" Final Ratio (current method): \(String(format: "%.4f", ratio))")
// Calculate alternate ratios
if eyePoints.count >= 6 {
let cornerWidth = eyePoints[5].x - eyePoints[0].x
if cornerWidth > 0 {
let cornerRatio = (pupilCenterX - eyePoints[0].x) / cornerWidth
print(" Alternate Ratio (using corners [0→5]): \(String(format: "%.4f", cornerRatio))")
}
}
}
print("\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n")
}
guard !eyePoints.isEmpty, !pupilPoints.isEmpty else { return 0.5 }
guard let minPoint = eyePoints.min(by: { $0.x < $1.x }),
let maxPoint = eyePoints.max(by: { $0.x < $1.x })
else {
return 0.5
}
let eyeMinX = minPoint.x
let eyeMaxX = maxPoint.x
let eyeWidth = eyeMaxX - eyeMinX
guard eyeWidth > 0 else { return 0.5 }
let pupilCenterX = pupilPoints.map { $0.x }.reduce(0, +) / Double(pupilPoints.count)
// Calculate ratio (0.0 to 1.0) - already normalized to face bounding box by Vision
let ratio = (pupilCenterX - eyeMinX) / eyeWidth
return ratio
}
/// NEW APPROACH: Calculate gaze using inter-eye distance as reference /// Calculate gaze ratio using Python GazeTracking algorithm
/// This works around Vision's limitation that eye landmarks only track the iris, not true eye corners /// Formula: ratio = pupilX / (eyeCenterX * 2 - 10)
private func calculateGazeUsingInterEyeDistance( /// Returns: 0.0-1.0 (0.0 = far right, 1.0 = far left)
leftEye: VNFaceLandmarkRegion2D, private func calculateGazeRatio(pupilPosition: PupilPosition, eyeRegion: EyeRegion) -> Double {
rightEye: VNFaceLandmarkRegion2D, let pupilX = Double(pupilPosition.x)
leftPupil: VNFaceLandmarkRegion2D, let eyeCenterX = Double(eyeRegion.center.x)
rightPupil: VNFaceLandmarkRegion2D,
imageSize: CGSize,
faceBoundingBox: CGRect
) -> (leftGaze: Double, rightGaze: Double) {
// CRITICAL: Convert from face-normalized coordinates to image coordinates // Python formula from GazeTracking library
// normalizedPoints are relative to face bounding box, not stable for gaze tracking let denominator = (eyeCenterX * 2.0 - 10.0)
// Helper to convert face-normalized point to image coordinates guard denominator > 0 else {
func toImageCoords(_ point: CGPoint) -> CGPoint { // Fallback to simple normalized position
// Face bounding box origin is in Vision coordinates (bottom-left origin) let eyeLeft = Double(eyeRegion.frame.minX)
let imageX = faceBoundingBox.origin.x + point.x * faceBoundingBox.width let eyeRight = Double(eyeRegion.frame.maxX)
let imageY = faceBoundingBox.origin.y + point.y * faceBoundingBox.height let eyeWidth = eyeRight - eyeLeft
return CGPoint(x: imageX, y: imageY) guard eyeWidth > 0 else { return 0.5 }
return (pupilX - eyeLeft) / eyeWidth
} }
// Convert all points to image space let ratio = pupilX / denominator
let leftEyePointsImg = leftEye.normalizedPoints.map { toImageCoords($0) }
let rightEyePointsImg = rightEye.normalizedPoints.map { toImageCoords($0) }
let leftPupilPointsImg = leftPupil.normalizedPoints.map { toImageCoords($0) }
let rightPupilPointsImg = rightPupil.normalizedPoints.map { toImageCoords($0) }
// Calculate eye centers (average of all iris boundary points) // Clamp to valid range
let leftEyeCenterX = leftEyePointsImg.map { $0.x }.reduce(0, +) / Double(leftEyePointsImg.count) return max(0.0, min(1.0, ratio))
let rightEyeCenterX = rightEyePointsImg.map { $0.x }.reduce(0, +) / Double(rightEyePointsImg.count)
// Calculate pupil centers
let leftPupilX = leftPupilPointsImg.map { $0.x }.reduce(0, +) / Double(leftPupilPointsImg.count)
let rightPupilX = rightPupilPointsImg.map { $0.x }.reduce(0, +) / Double(rightPupilPointsImg.count)
// Inter-eye distance (the distance between eye centers) - should be stable now
let interEyeDistance = abs(rightEyeCenterX - leftEyeCenterX)
// Estimate iris width as a fraction of inter-eye distance
// Typical human: inter-pupil distance ~63mm, iris width ~12mm ratio ~1/5
let irisWidth = interEyeDistance / 5.0
// Calculate gaze offset for each eye (positive = looking right, negative = looking left)
let leftGazeOffset = (leftPupilX - leftEyeCenterX) / irisWidth
let rightGazeOffset = (rightPupilX - rightEyeCenterX) / irisWidth
// Throttle debug logging
let now = Date()
let shouldLog = now.timeIntervalSince(lastDebugLogTime) >= 0.5
if shouldLog {
lastDebugLogTime = now
print("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
print("📊 INTER-EYE DISTANCE GAZE (IMAGE COORDS)")
print("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
print("\n🖼️ IMAGE SPACE:")
print(" Image Size: \(Int(imageSize.width)) x \(Int(imageSize.height))")
print(" Face Box: x=\(String(format: "%.3f", faceBoundingBox.origin.x)) w=\(String(format: "%.3f", faceBoundingBox.width))")
print("\n👁️ EYE CENTERS (image coords):")
print(" Left Eye Center X: \(String(format: "%.4f", leftEyeCenterX)) (\(Int(leftEyeCenterX * imageSize.width))px)")
print(" Right Eye Center X: \(String(format: "%.4f", rightEyeCenterX)) (\(Int(rightEyeCenterX * imageSize.width))px)")
print(" Inter-Eye Distance: \(String(format: "%.4f", interEyeDistance)) (\(Int(interEyeDistance * imageSize.width))px)")
print(" Estimated Iris Width: \(String(format: "%.4f", irisWidth)) (\(Int(irisWidth * imageSize.width))px)")
print("\n👁️ PUPIL POSITIONS (image coords):")
print(" Left Pupil X: \(String(format: "%.4f", leftPupilX)) (\(Int(leftPupilX * imageSize.width))px)")
print(" Right Pupil X: \(String(format: "%.4f", rightPupilX)) (\(Int(rightPupilX * imageSize.width))px)")
print("\n📏 PUPIL OFFSETS FROM EYE CENTER:")
print(" Left Offset: \(String(format: "%.4f", leftPupilX - leftEyeCenterX)) (\(Int((leftPupilX - leftEyeCenterX) * imageSize.width))px)")
print(" Right Offset: \(String(format: "%.4f", rightPupilX - rightEyeCenterX)) (\(Int((rightPupilX - rightEyeCenterX) * imageSize.width))px)")
print("\n📏 GAZE OFFSETS (normalized to iris width):")
print(" Left Gaze Offset: \(String(format: "%.4f", leftGazeOffset)) (0=center, +right, -left)")
print(" Right Gaze Offset: \(String(format: "%.4f", rightGazeOffset)) (0=center, +right, -left)")
print(" Average Gaze: \(String(format: "%.4f", (leftGazeOffset + rightGazeOffset) / 2))")
// Interpretation
let avgGaze = (leftGazeOffset + rightGazeOffset) / 2
var interpretation = ""
if avgGaze < -0.5 {
interpretation = "Looking LEFT"
} else if avgGaze > 0.5 {
interpretation = "Looking RIGHT"
} else {
interpretation = "Looking CENTER"
}
print(" Interpretation: \(interpretation)")
print("\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n")
}
return (leftGazeOffset, rightGazeOffset)
} }
} }
extension EyeTrackingService: AVCaptureVideoDataOutputSampleBufferDelegate { extension EyeTrackingService: AVCaptureVideoDataOutputSampleBufferDelegate {
@@ -616,7 +409,8 @@ extension EyeTrackingService: AVCaptureVideoDataOutputSampleBufferDelegate {
Task { @MainActor in Task { @MainActor in
self.processFaceObservations( self.processFaceObservations(
request.results as? [VNFaceObservation], request.results as? [VNFaceObservation],
imageSize: size imageSize: size,
pixelBuffer: pixelBuffer
) )
} }
} }

View File

@@ -7,10 +7,20 @@
// Pixel-based pupil detection translated from Python GazeTracking library // Pixel-based pupil detection translated from Python GazeTracking library
// Original: https://github.com/antoinelame/GazeTracking // Original: https://github.com/antoinelame/GazeTracking
// //
// This implementation closely follows the Python pipeline:
// 1. Isolate eye region with polygon mask (cv2.fillPoly equivalent)
// 2. Bilateral filter (cv2.bilateralFilter(eye_frame, 10, 15, 15))
// 3. Erosion with 3x3 kernel, 3 iterations (cv2.erode)
// 4. Binary threshold (cv2.threshold)
// 5. Find contours, sort by area, use second-largest (cv2.findContours)
// 6. Calculate centroid via moments (cv2.moments)
//
import CoreImage import CoreImage
import Vision import Vision
import Accelerate import Accelerate
import ImageIO
import UniformTypeIdentifiers
struct PupilPosition { struct PupilPosition {
let x: CGFloat let x: CGFloat
@@ -18,24 +28,110 @@ struct PupilPosition {
} }
struct EyeRegion { struct EyeRegion {
let frame: CGRect // Bounding box of the eye in image coordinates let frame: CGRect
let center: CGPoint // Center point of the eye region let center: CGPoint
let origin: CGPoint
}
/// Calibration state for adaptive thresholding (matches Python Calibration class)
class PupilCalibration {
private let targetFrames = 20
private var thresholdsLeft: [Int] = []
private var thresholdsRight: [Int] = []
var isComplete: Bool {
thresholdsLeft.count >= targetFrames && thresholdsRight.count >= targetFrames
}
func threshold(forSide side: Int) -> Int {
let thresholds = side == 0 ? thresholdsLeft : thresholdsRight
guard !thresholds.isEmpty else { return 50 }
return thresholds.reduce(0, +) / thresholds.count
}
func evaluate(eyeData: [UInt8], width: Int, height: Int, side: Int) {
let bestThreshold = findBestThreshold(eyeData: eyeData, width: width, height: height)
if side == 0 {
thresholdsLeft.append(bestThreshold)
} else {
thresholdsRight.append(bestThreshold)
}
}
/// Finds optimal threshold by targeting ~48% iris coverage (matches Python)
private func findBestThreshold(eyeData: [UInt8], width: Int, height: Int) -> Int {
let averageIrisSize = 0.48
var trials: [Int: Double] = [:]
for threshold in stride(from: 5, to: 100, by: 5) {
let processed = PupilDetector.imageProcessing(
eyeData: eyeData,
width: width,
height: height,
threshold: threshold
)
let irisSize = Self.irisSize(data: processed, width: width, height: height)
trials[threshold] = irisSize
}
let best = trials.min { abs($0.value - averageIrisSize) < abs($1.value - averageIrisSize) }
return best?.key ?? 50
}
/// Returns percentage of dark pixels (iris area)
private static func irisSize(data: [UInt8], width: Int, height: Int) -> Double {
let margin = 5
guard width > margin * 2, height > margin * 2 else {
return 0
}
var blackCount = 0
var totalCount = 0
for y in margin..<(height - margin) {
for x in margin..<(width - margin) {
let index = y * width + x
if data[index] == 0 {
blackCount += 1
}
totalCount += 1
}
}
return totalCount > 0 ? Double(blackCount) / Double(totalCount) : 0
}
func reset() {
thresholdsLeft.removeAll()
thresholdsRight.removeAll()
}
} }
class PupilDetector { class PupilDetector {
/// Detects pupil position within an isolated eye region using pixel-based analysis static var enableDebugImageSaving = false
private static var debugImageCounter = 0
/// Shared calibration instance
static let calibration = PupilCalibration()
/// Detects pupil position within an isolated eye region
/// Closely follows Python GazeTracking pipeline
/// - Parameters: /// - Parameters:
/// - pixelBuffer: The camera frame pixel buffer /// - pixelBuffer: The camera frame pixel buffer
/// - eyeLandmarks: Vision eye landmarks (6 points around iris) /// - eyeLandmarks: Vision eye landmarks (6 points around iris)
/// - faceBoundingBox: Face bounding box from Vision /// - faceBoundingBox: Face bounding box from Vision
/// - imageSize: Size of the camera frame /// - imageSize: Size of the camera frame
/// - side: 0 for left eye, 1 for right eye
/// - threshold: Optional manual threshold (uses calibration if nil)
/// - Returns: Pupil position relative to eye region, or nil if detection fails /// - Returns: Pupil position relative to eye region, or nil if detection fails
static func detectPupil( static func detectPupil(
in pixelBuffer: CVPixelBuffer, in pixelBuffer: CVPixelBuffer,
eyeLandmarks: VNFaceLandmarkRegion2D, eyeLandmarks: VNFaceLandmarkRegion2D,
faceBoundingBox: CGRect, faceBoundingBox: CGRect,
imageSize: CGSize imageSize: CGSize,
side: Int = 0,
threshold: Int? = nil
) -> (pupilPosition: PupilPosition, eyeRegion: EyeRegion)? { ) -> (pupilPosition: PupilPosition, eyeRegion: EyeRegion)? {
// Step 1: Convert Vision landmarks to pixel coordinates // Step 1: Convert Vision landmarks to pixel coordinates
@@ -47,33 +143,102 @@ class PupilDetector {
guard eyePoints.count >= 6 else { return nil } guard eyePoints.count >= 6 else { return nil }
// Step 2: Create eye region bounding box // Step 2: Create eye region bounding box with margin
guard let eyeRegion = createEyeRegion(from: eyePoints, imageSize: imageSize) else { guard let eyeRegion = createEyeRegion(from: eyePoints, imageSize: imageSize) else {
return nil return nil
} }
// Step 3: Extract and process eye region from pixel buffer // Step 3: Extract grayscale eye data from pixel buffer
guard let eyeImage = extractEyeRegion( guard let fullFrameData = extractGrayscaleData(from: pixelBuffer) else {
from: pixelBuffer, return nil
region: eyeRegion.frame, }
mask: eyePoints
let frameWidth = CVPixelBufferGetWidth(pixelBuffer)
let frameHeight = CVPixelBufferGetHeight(pixelBuffer)
// Step 4: Isolate eye with polygon mask (matches Python _isolate method)
guard let (eyeData, eyeWidth, eyeHeight) = isolateEyeWithMask(
frameData: fullFrameData,
frameWidth: frameWidth,
frameHeight: frameHeight,
eyePoints: eyePoints,
region: eyeRegion
) else { ) else {
return nil return nil
} }
// Step 4: Process image to isolate pupil (bilateral filter + threshold) // Step 5: Get threshold (from calibration or parameter)
guard let processedImage = processEyeImage(eyeImage) else { let effectiveThreshold: Int
return nil if let manualThreshold = threshold {
} effectiveThreshold = manualThreshold
} else if calibration.isComplete {
// Step 5: Find pupil using contour detection effectiveThreshold = calibration.threshold(forSide: side)
guard let pupilPosition = findPupilCentroid(in: processedImage) else { } else {
// Calibrate
calibration.evaluate(eyeData: eyeData, width: eyeWidth, height: eyeHeight, side: side)
effectiveThreshold = calibration.threshold(forSide: side)
}
// Step 6: Process image (bilateral filter + erosion + threshold)
let processedData = imageProcessing(
eyeData: eyeData,
width: eyeWidth,
height: eyeHeight,
threshold: effectiveThreshold
)
// Debug: Save processed images if enabled
if enableDebugImageSaving {
saveDebugImage(data: processedData, width: eyeWidth, height: eyeHeight, name: "processed_eye_\(debugImageCounter)")
debugImageCounter += 1
}
// Step 7: Find contours and compute centroid of second-largest
guard let (centroidX, centroidY) = findPupilFromContours(
data: processedData,
width: eyeWidth,
height: eyeHeight
) else {
return nil return nil
} }
let pupilPosition = PupilPosition(x: CGFloat(centroidX), y: CGFloat(centroidY))
return (pupilPosition, eyeRegion) return (pupilPosition, eyeRegion)
} }
// MARK: - Debug Helper
private static func saveDebugImage(data: [UInt8], width: Int, height: Int, name: String) {
guard let cgImage = createCGImage(from: data, width: width, height: height) else {
return
}
let url = URL(fileURLWithPath: "/tmp/\(name).png")
guard let destination = CGImageDestinationCreateWithURL(url as CFURL, UTType.png.identifier as CFString, 1, nil) else {
return
}
CGImageDestinationAddImage(destination, cgImage, nil)
CGImageDestinationFinalize(destination)
print("💾 Saved debug image: \(url.path)")
}
private static func createCGImage(from data: [UInt8], width: Int, height: Int) -> CGImage? {
var mutableData = data
guard let context = CGContext(
data: &mutableData,
width: width,
height: height,
bitsPerComponent: 8,
bytesPerRow: width,
space: CGColorSpaceCreateDeviceGray(),
bitmapInfo: CGImageAlphaInfo.none.rawValue
) else {
return nil
}
return context.makeImage()
}
// MARK: - Step 1: Convert Landmarks to Pixel Coordinates // MARK: - Step 1: Convert Landmarks to Pixel Coordinates
private static func landmarksToPixelCoordinates( private static func landmarksToPixelCoordinates(
@@ -82,7 +247,6 @@ class PupilDetector {
imageSize: CGSize imageSize: CGSize
) -> [CGPoint] { ) -> [CGPoint] {
return landmarks.normalizedPoints.map { point in return landmarks.normalizedPoints.map { point in
// Vision coordinates are normalized to face bounding box
let imageX = (faceBoundingBox.origin.x + point.x * faceBoundingBox.width) * imageSize.width let imageX = (faceBoundingBox.origin.x + point.x * faceBoundingBox.width) * imageSize.width
let imageY = (faceBoundingBox.origin.y + point.y * faceBoundingBox.height) * imageSize.height let imageY = (faceBoundingBox.origin.y + point.y * faceBoundingBox.height) * imageSize.height
return CGPoint(x: imageX, y: imageY) return CGPoint(x: imageX, y: imageY)
@@ -100,7 +264,6 @@ class PupilDetector {
let minY = points.map { $0.y }.min()! - margin let minY = points.map { $0.y }.min()! - margin
let maxY = points.map { $0.y }.max()! + margin let maxY = points.map { $0.y }.max()! + margin
// Clamp to image bounds
let clampedMinX = max(0, minX) let clampedMinX = max(0, minX)
let clampedMaxX = min(imageSize.width, maxX) let clampedMaxX = min(imageSize.width, maxX)
let clampedMinY = max(0, minY) let clampedMinY = max(0, minY)
@@ -118,149 +281,335 @@ class PupilDetector {
y: frame.height / 2 y: frame.height / 2
) )
return EyeRegion(frame: frame, center: center) let origin = CGPoint(x: clampedMinX, y: clampedMinY)
return EyeRegion(frame: frame, center: center, origin: origin)
} }
// MARK: - Step 3: Extract Eye Region // MARK: - Step 3: Extract Grayscale Data from Pixel Buffer
private static func extractEyeRegion( private static func extractGrayscaleData(from pixelBuffer: CVPixelBuffer) -> [UInt8]? {
from pixelBuffer: CVPixelBuffer, CVPixelBufferLockBaseAddress(pixelBuffer, .readOnly)
region: CGRect, defer { CVPixelBufferUnlockBaseAddress(pixelBuffer, .readOnly) }
mask: [CGPoint]
) -> CIImage? {
let ciImage = CIImage(cvPixelBuffer: pixelBuffer) let width = CVPixelBufferGetWidth(pixelBuffer)
let height = CVPixelBufferGetHeight(pixelBuffer)
let bytesPerRow = CVPixelBufferGetBytesPerRow(pixelBuffer)
// Convert to grayscale guard let baseAddress = CVPixelBufferGetBaseAddress(pixelBuffer) else {
let grayscaleImage = ciImage.applyingFilter("CIPhotoEffectNoir") return nil
}
// Crop to eye region let pixelFormat = CVPixelBufferGetPixelFormatType(pixelBuffer)
let croppedImage = grayscaleImage.cropped(to: region) var grayscaleData = [UInt8](repeating: 0, count: width * height)
return croppedImage let buffer = baseAddress.assumingMemoryBound(to: UInt8.self)
switch pixelFormat {
case kCVPixelFormatType_32BGRA:
for y in 0..<height {
for x in 0..<width {
let offset = y * bytesPerRow + x * 4
let b = Double(buffer[offset])
let g = Double(buffer[offset + 1])
let r = Double(buffer[offset + 2])
let gray = UInt8(0.299 * r + 0.587 * g + 0.114 * b)
grayscaleData[y * width + x] = gray
}
}
case kCVPixelFormatType_420YpCbCr8BiPlanarFullRange,
kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange:
guard let yPlane = CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, 0) else {
return nil
}
let yBytesPerRow = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer, 0)
let yBuffer = yPlane.assumingMemoryBound(to: UInt8.self)
for y in 0..<height {
for x in 0..<width {
grayscaleData[y * width + x] = yBuffer[y * yBytesPerRow + x]
}
}
default:
for y in 0..<height {
for x in 0..<width {
let offset = y * bytesPerRow + x * 4
if offset + 2 < bytesPerRow * height {
let b = Double(buffer[offset])
let g = Double(buffer[offset + 1])
let r = Double(buffer[offset + 2])
let gray = UInt8(0.299 * r + 0.587 * g + 0.114 * b)
grayscaleData[y * width + x] = gray
}
}
}
}
return grayscaleData
} }
// MARK: - Step 4: Process Eye Image // MARK: - Step 4: Isolate Eye with Polygon Mask (matches Python _isolate)
private static func processEyeImage(_ image: CIImage) -> CIImage? { private static func isolateEyeWithMask(
// Apply bilateral filter (preserves edges while smoothing) frameData: [UInt8],
// CIBilateralFilter approximation: use CIMedianFilter + morphology frameWidth: Int,
var processed = image frameHeight: Int,
eyePoints: [CGPoint],
region: EyeRegion
) -> (data: [UInt8], width: Int, height: Int)? {
// 1. Median filter (reduces noise while preserving edges) let minX = Int(region.frame.origin.x)
processed = processed.applyingFilter("CIMedianFilter") let minY = Int(region.frame.origin.y)
let eyeWidth = Int(region.frame.width)
let eyeHeight = Int(region.frame.height)
// 2. Morphological erosion (makes dark regions larger - approximates cv2.erode) guard eyeWidth > 0, eyeHeight > 0 else { return nil }
// Use CIMorphologyMinimum with small radius
processed = processed.applyingFilter("CIMorphologyMinimum", parameters: [
kCIInputRadiusKey: 2.0
])
// 3. Threshold to binary (black/white) // Create output buffer initialized to white (255) - outside mask
// Use CIColorControls to increase contrast, then threshold var eyeData = [UInt8](repeating: 255, count: eyeWidth * eyeHeight)
processed = processed.applyingFilter("CIColorControls", parameters: [
kCIInputContrastKey: 2.0,
kCIInputBrightnessKey: -0.3
])
// Apply color threshold to make it binary // Convert eye points to local coordinates
processed = processed.applyingFilter("CIColorThreshold", parameters: [ let localPoints = eyePoints.map { point in
"inputThreshold": 0.5 CGPoint(x: point.x - CGFloat(minX), y: point.y - CGFloat(minY))
]) }
// For each pixel in eye region, check if inside polygon
for y in 0..<eyeHeight {
for x in 0..<eyeWidth {
let localPoint = CGPoint(x: CGFloat(x), y: CGFloat(y))
if pointInPolygon(point: localPoint, polygon: localPoints) {
let frameX = minX + x
let frameY = minY + y
if frameX >= 0, frameX < frameWidth, frameY >= 0, frameY < frameHeight {
let frameIndex = frameY * frameWidth + frameX
let eyeIndex = y * eyeWidth + x
eyeData[eyeIndex] = frameData[frameIndex]
}
}
}
}
return (eyeData, eyeWidth, eyeHeight)
}
/// Point-in-polygon test using ray casting algorithm
private static func pointInPolygon(point: CGPoint, polygon: [CGPoint]) -> Bool {
guard polygon.count >= 3 else { return false }
var inside = false
var j = polygon.count - 1
for i in 0..<polygon.count {
let pi = polygon[i]
let pj = polygon[j]
if ((pi.y > point.y) != (pj.y > point.y)) &&
(point.x < (pj.x - pi.x) * (point.y - pi.y) / (pj.y - pi.y) + pi.x) {
inside = !inside
}
j = i
}
return inside
}
// MARK: - Step 5: Image Processing (matches Python image_processing)
/// Performs operations on the eye frame to isolate the iris
/// Matches Python: bilateralFilter -> erode -> threshold
static func imageProcessing(
eyeData: [UInt8],
width: Int,
height: Int,
threshold: Int
) -> [UInt8] {
var processed = eyeData
// 1. Bilateral filter approximation
// Python: cv2.bilateralFilter(eye_frame, 10, 15, 15)
processed = bilateralFilter(data: processed, width: width, height: height, d: 10, sigmaColor: 15, sigmaSpace: 15)
// 2. Erosion with 3x3 kernel, 3 iterations
// Python: cv2.erode(new_frame, kernel, iterations=3)
for _ in 0..<3 {
processed = erode3x3(data: processed, width: width, height: height)
}
// 3. Binary threshold
// Python: cv2.threshold(new_frame, threshold, 255, cv2.THRESH_BINARY)[1]
processed = binaryThreshold(data: processed, width: width, height: height, threshold: threshold)
return processed return processed
} }
// MARK: - Step 5: Find Pupil Centroid /// Bilateral filter approximation - preserves edges while smoothing
private static func bilateralFilter(
private static func findPupilCentroid(in image: CIImage) -> PupilPosition? { data: [UInt8],
let context = CIContext() width: Int,
height: Int,
d: Int,
sigmaColor: Double,
sigmaSpace: Double
) -> [UInt8] {
var output = data
let radius = d / 2
// Convert CIImage to CGImage for contour detection // Precompute spatial Gaussian weights
guard let cgImage = context.createCGImage(image, from: image.extent) else { var spatialWeights = [[Double]](repeating: [Double](repeating: 0, count: d), count: d)
return nil for dy in 0..<d {
for dx in 0..<d {
let dist = sqrt(Double((dy - radius) * (dy - radius) + (dx - radius) * (dx - radius)))
spatialWeights[dy][dx] = exp(-dist * dist / (2 * sigmaSpace * sigmaSpace))
}
} }
// Convert to vImage buffer for processing for y in radius..<(height - radius) {
guard let (width, height, data) = cgImageToGrayscaleData(cgImage) else { for x in radius..<(width - radius) {
return nil let centerIndex = y * width + x
let centerValue = Double(data[centerIndex])
var sum = 0.0
var weightSum = 0.0
for dy in 0..<d {
for dx in 0..<d {
let ny = y + dy - radius
let nx = x + dx - radius
let neighborIndex = ny * width + nx
let neighborValue = Double(data[neighborIndex])
let colorDiff = abs(neighborValue - centerValue)
let colorWeight = exp(-colorDiff * colorDiff / (2 * sigmaColor * sigmaColor))
let weight = spatialWeights[dy][dx] * colorWeight
sum += neighborValue * weight
weightSum += weight
}
}
output[centerIndex] = UInt8(max(0, min(255, sum / weightSum)))
}
} }
// Find connected components (contours) return output
guard let (centroidX, centroidY) = findLargestDarkRegionCentroid(
data: data,
width: width,
height: height
) else {
return nil
}
return PupilPosition(x: CGFloat(centroidX), y: CGFloat(centroidY))
} }
// MARK: - Helper: Convert CGImage to Grayscale Data /// Erosion with 3x3 kernel (minimum filter)
private static func erode3x3(data: [UInt8], width: Int, height: Int) -> [UInt8] {
private static func cgImageToGrayscaleData(_ cgImage: CGImage) -> (width: Int, height: Int, data: [UInt8])? { var output = data
let width = cgImage.width
let height = cgImage.height
var data = [UInt8](repeating: 0, count: width * height) for y in 1..<(height - 1) {
for x in 1..<(width - 1) {
guard let context = CGContext( var minVal: UInt8 = 255
data: &data,
width: width, for dy in -1...1 {
height: height, for dx in -1...1 {
bitsPerComponent: 8, let index = (y + dy) * width + (x + dx)
bytesPerRow: width, minVal = min(minVal, data[index])
space: CGColorSpaceCreateDeviceGray(), }
bitmapInfo: CGImageAlphaInfo.none.rawValue }
) else {
return nil output[y * width + x] = minVal
}
} }
context.draw(cgImage, in: CGRect(x: 0, y: 0, width: width, height: height)) return output
return (width, height, data)
} }
// MARK: - Helper: Find Centroid of Largest Dark Region /// Binary threshold
private static func binaryThreshold(data: [UInt8], width: Int, height: Int, threshold: Int) -> [UInt8] {
return data.map { pixel in
Int(pixel) > threshold ? UInt8(255) : UInt8(0)
}
}
private static func findLargestDarkRegionCentroid( // MARK: - Step 6: Find Contours and Centroid (matches Python detect_iris)
/// Finds contours, sorts by area, and returns centroid of second-largest
/// Matches Python: cv2.findContours + cv2.moments
private static func findPupilFromContours(
data: [UInt8], data: [UInt8],
width: Int, width: Int,
height: Int height: Int
) -> (x: Double, y: Double)? { ) -> (x: Double, y: Double)? {
// Calculate image moments to find centroid let contours = findContours(data: data, width: width, height: height)
// m00 = sum of all pixels (area)
// m10 = sum of (x * pixel_value)
// m01 = sum of (y * pixel_value)
// centroid_x = m10 / m00
// centroid_y = m01 / m00
var m00: Double = 0 guard contours.count >= 2 else {
var m10: Double = 0 if let largest = contours.max(by: { $0.count < $1.count }) {
var m01: Double = 0 return computeCentroid(contour: largest)
}
return nil
}
// Sort by area (pixel count) descending
let sorted = contours.sorted { $0.count > $1.count }
// Use second-largest contour (matches Python: contours[-2] after ascending sort)
let targetContour = sorted[1]
return computeCentroid(contour: targetContour)
}
/// Finds connected components of black pixels (value == 0)
private static func findContours(data: [UInt8], width: Int, height: Int) -> [[(x: Int, y: Int)]] {
var visited = [Bool](repeating: false, count: width * height)
var contours: [[(x: Int, y: Int)]] = []
for y in 0..<height { for y in 0..<height {
for x in 0..<width { for x in 0..<width {
let index = y * width + x let index = y * width + x
let pixelValue = 255 - Int(data[index]) // Invert: we want dark regions
if pixelValue > 128 { // Only count dark pixels if data[index] == 0 && !visited[index] {
let weight = Double(pixelValue) var contour: [(x: Int, y: Int)] = []
m00 += weight var stack = [(x, y)]
m10 += Double(x) * weight
m01 += Double(y) * weight while !stack.isEmpty {
let (cx, cy) = stack.removeLast()
let cIndex = cy * width + cx
if cx < 0 || cx >= width || cy < 0 || cy >= height {
continue
}
if visited[cIndex] || data[cIndex] != 0 {
continue
}
visited[cIndex] = true
contour.append((cx, cy))
// 8-connectivity
stack.append((cx + 1, cy))
stack.append((cx - 1, cy))
stack.append((cx, cy + 1))
stack.append((cx, cy - 1))
stack.append((cx + 1, cy + 1))
stack.append((cx - 1, cy - 1))
stack.append((cx + 1, cy - 1))
stack.append((cx - 1, cy + 1))
}
if !contour.isEmpty {
contours.append(contour)
}
} }
} }
} }
return contours
}
/// Computes centroid using image moments (matches cv2.moments)
private static func computeCentroid(contour: [(x: Int, y: Int)]) -> (x: Double, y: Double)? {
guard !contour.isEmpty else { return nil }
let m00 = Double(contour.count)
let m10 = contour.reduce(0.0) { $0 + Double($1.x) }
let m01 = contour.reduce(0.0) { $0 + Double($1.y) }
guard m00 > 0 else { return nil } guard m00 > 0 else { return nil }
let centroidX = m10 / m00 return (m10 / m00, m01 / m00)
let centroidY = m01 / m00
return (centroidX, centroidY)
} }
} }

View File

@@ -0,0 +1,272 @@
//
// EyeTrackingCalibrationView.swift
// Gaze
//
// Created by Mike Freno on 1/15/26.
//
import SwiftUI
struct EyeTrackingCalibrationView: View {
@StateObject private var calibrationManager = CalibrationManager.shared
@Environment(\.dismiss) private var dismiss
@State private var countdownValue = 3
@State private var isCountingDown = false
var body: some View {
ZStack {
// Full-screen black background
Color.black.ignoresSafeArea()
if calibrationManager.isCalibrating {
calibrationContentView
} else {
introductionScreenView
}
}
.frame(minWidth: 800, minHeight: 600)
}
// MARK: - Introduction Screen
private var introductionScreenView: some View {
VStack(spacing: 30) {
Image(systemName: "eye.circle.fill")
.font(.system(size: 80))
.foregroundColor(.blue)
Text("Eye Tracking Calibration")
.font(.largeTitle)
.fontWeight(.bold)
Text("This calibration will help improve eye tracking accuracy.")
.font(.title3)
.multilineTextAlignment(.center)
.foregroundColor(.secondary)
VStack(alignment: .leading, spacing: 15) {
InstructionRow(icon: "1.circle.fill", text: "Look at each target on the screen")
InstructionRow(icon: "2.circle.fill", text: "Keep your head still, only move your eyes")
InstructionRow(icon: "3.circle.fill", text: "Follow the countdown at each position")
InstructionRow(icon: "4.circle.fill", text: "Takes about 30-45 seconds")
}
.padding(.vertical, 20)
if calibrationManager.calibrationData.isComplete {
VStack(spacing: 10) {
Text("Last calibration:")
.font(.caption)
.foregroundColor(.secondary)
Text(calibrationManager.getCalibrationSummary())
.font(.caption)
.multilineTextAlignment(.center)
.foregroundColor(.secondary)
}
.padding(.vertical)
}
HStack(spacing: 20) {
Button("Cancel") {
dismiss()
}
.keyboardShortcut(.escape, modifiers: [])
Button("Start Calibration") {
startCalibration()
}
.keyboardShortcut(.return, modifiers: [])
.buttonStyle(.borderedProminent)
}
.padding(.top, 20)
}
.padding(60)
.frame(maxWidth: 600)
}
// MARK: - Calibration Content
private var calibrationContentView: some View {
ZStack {
// Progress indicator at top
VStack {
progressBar
Spacer()
}
// Calibration target
if let step = calibrationManager.currentStep {
calibrationTarget(for: step)
}
// Skip button at bottom
VStack {
Spacer()
skipButton
}
}
}
// MARK: - Progress Bar
private var progressBar: some View {
VStack(spacing: 10) {
HStack {
Text("Calibrating...")
.foregroundColor(.white)
Spacer()
Text(calibrationManager.progressText)
.foregroundColor(.white.opacity(0.7))
}
ProgressView(value: calibrationManager.progress)
.progressViewStyle(.linear)
.tint(.blue)
}
.padding()
.background(Color.black.opacity(0.5))
}
// MARK: - Calibration Target
@ViewBuilder
private func calibrationTarget(for step: CalibrationStep) -> some View {
let position = targetPosition(for: step)
VStack(spacing: 20) {
// Target circle with countdown
ZStack {
// Outer ring (pulsing)
Circle()
.stroke(Color.blue.opacity(0.3), lineWidth: 3)
.frame(width: 100, height: 100)
.scaleEffect(isCountingDown ? 1.2 : 1.0)
.animation(.easeInOut(duration: 0.6).repeatForever(autoreverses: true), value: isCountingDown)
// Inner circle
Circle()
.fill(Color.blue)
.frame(width: 60, height: 60)
// Countdown number or checkmark
if isCountingDown && countdownValue > 0 {
Text("\(countdownValue)")
.font(.system(size: 36, weight: .bold))
.foregroundColor(.white)
} else if calibrationManager.samplesCollected > 0 {
Image(systemName: "checkmark")
.font(.system(size: 30, weight: .bold))
.foregroundColor(.white)
}
}
// Instruction text
Text(step.instructionText)
.font(.title2)
.foregroundColor(.white)
.padding(.horizontal, 40)
.padding(.vertical, 15)
.background(Color.black.opacity(0.7))
.cornerRadius(10)
}
.position(position)
.onAppear {
startStepCountdown()
}
}
// MARK: - Skip Button
private var skipButton: some View {
Button {
calibrationManager.skipStep()
} label: {
Text("Skip this position")
.foregroundColor(.white)
.padding(.horizontal, 20)
.padding(.vertical, 10)
.background(Color.white.opacity(0.2))
.cornerRadius(8)
}
.padding(.bottom, 40)
}
// MARK: - Helper Methods
private func startCalibration() {
calibrationManager.startCalibration()
}
private func startStepCountdown() {
countdownValue = 3
isCountingDown = true
// Countdown 3, 2, 1
Timer.scheduledTimer(withTimeInterval: 1.0, repeats: true) { timer in
if countdownValue > 0 {
countdownValue -= 1
} else {
timer.invalidate()
isCountingDown = false
}
}
}
private func targetPosition(for step: CalibrationStep) -> CGPoint {
let screenBounds = NSScreen.main?.frame ?? CGRect(x: 0, y: 0, width: 1920, height: 1080)
let width = screenBounds.width
let height = screenBounds.height
let centerX = width / 2
let centerY = height / 2
let margin: CGFloat = 150
switch step {
case .center:
return CGPoint(x: centerX, y: centerY)
case .left:
return CGPoint(x: centerX - width / 4, y: centerY)
case .right:
return CGPoint(x: centerX + width / 4, y: centerY)
case .farLeft:
return CGPoint(x: margin, y: centerY)
case .farRight:
return CGPoint(x: width - margin, y: centerY)
case .up:
return CGPoint(x: centerX, y: margin)
case .down:
return CGPoint(x: centerX, y: height - margin)
case .topLeft:
return CGPoint(x: margin, y: margin)
case .topRight:
return CGPoint(x: width - margin, y: margin)
case .bottomLeft:
return CGPoint(x: margin, y: height - margin)
case .bottomRight:
return CGPoint(x: width - margin, y: height - margin)
}
}
}
// MARK: - Instruction Row
struct InstructionRow: View {
let icon: String
let text: String
var body: some View {
HStack(spacing: 15) {
Image(systemName: icon)
.font(.title2)
.foregroundColor(.blue)
.frame(width: 30)
Text(text)
.font(.body)
}
}
}
#Preview {
EyeTrackingCalibrationView()
}

View File

@@ -21,6 +21,8 @@ struct EnforceModeSetupView: View {
@State private var showDebugView = false @State private var showDebugView = false
@State private var isViewActive = false @State private var isViewActive = false
@State private var showAdvancedSettings = false @State private var showAdvancedSettings = false
@State private var showCalibrationWindow = false
@ObservedObject var calibrationManager = CalibrationManager.shared
var body: some View { var body: some View {
VStack(spacing: 0) { VStack(spacing: 0) {
@@ -80,6 +82,7 @@ struct EnforceModeSetupView: View {
if enforceModeService.isEnforceModeEnabled { if enforceModeService.isEnforceModeEnabled {
testModeButton testModeButton
calibrationSection
} }
if isTestModeActive && enforceModeService.isCameraActive { if isTestModeActive && enforceModeService.isCameraActive {
@@ -150,6 +153,58 @@ struct EnforceModeSetupView: View {
.buttonStyle(.borderedProminent) .buttonStyle(.borderedProminent)
.controlSize(.large) .controlSize(.large)
} }
private var calibrationSection: some View {
VStack(alignment: .leading, spacing: 12) {
HStack {
Image(systemName: "target")
.font(.title3)
.foregroundColor(.blue)
Text("Eye Tracking Calibration")
.font(.headline)
}
if calibrationManager.calibrationData.isComplete {
VStack(alignment: .leading, spacing: 8) {
Text(calibrationManager.getCalibrationSummary())
.font(.caption)
.foregroundColor(.secondary)
if calibrationManager.needsRecalibration() {
Label("Calibration expired - recalibration recommended", systemImage: "exclamationmark.triangle.fill")
.font(.caption)
.foregroundColor(.orange)
} else {
Label("Calibration active and valid", systemImage: "checkmark.circle.fill")
.font(.caption)
.foregroundColor(.green)
}
}
} else {
Text("Not calibrated - using default thresholds")
.font(.caption)
.foregroundColor(.secondary)
}
Button(action: {
showCalibrationWindow = true
}) {
HStack {
Image(systemName: "target")
Text(calibrationManager.calibrationData.isComplete ? "Recalibrate" : "Run Calibration")
}
.frame(maxWidth: .infinity)
.padding(.vertical, 8)
}
.buttonStyle(.bordered)
.controlSize(.regular)
}
.padding()
.glassEffectIfAvailable(GlassStyle.regular.tint(.blue.opacity(0.1)), in: .rect(cornerRadius: 12))
.sheet(isPresented: $showCalibrationWindow) {
EyeTrackingCalibrationView()
}
}
private var testModePreviewView: some View { private var testModePreviewView: some View {
VStack(spacing: 16) { VStack(spacing: 16) {