diff --git a/Gaze/Constants/EyeTrackingConstants.swift b/Gaze/Constants/EyeTrackingConstants.swift index 6ff03ea..1105c52 100644 --- a/Gaze/Constants/EyeTrackingConstants.swift +++ b/Gaze/Constants/EyeTrackingConstants.swift @@ -54,6 +54,16 @@ class EyeTrackingConstants: ObservableObject { @Published var maxPupilRatio: Double = 0.45 @Published var maxPupilEnabled: Bool = true + // MARK: - Pixel-Based Gaze Detection Thresholds + /// Python GazeTracking thresholds for pixel-based pupil detection + /// Formula: pupilX / (eyeCenterX * 2 - 10) + /// Looking right: ratio ≤ 0.35 + /// Looking center: 0.35 < ratio < 0.65 + /// Looking left: ratio ≥ 0.65 + @Published var pixelGazeMinRatio: Double = 0.35 // Looking right threshold + @Published var pixelGazeMaxRatio: Double = 0.65 // Looking left threshold + @Published var pixelGazeEnabled: Bool = true + private init() {} // MARK: - Reset to Defaults diff --git a/Gaze/Models/CalibrationData.swift b/Gaze/Models/CalibrationData.swift new file mode 100644 index 0000000..91434d5 --- /dev/null +++ b/Gaze/Models/CalibrationData.swift @@ -0,0 +1,198 @@ +// +// CalibrationData.swift +// Gaze +// +// Created by Mike Freno on 1/15/26. +// + +import Foundation + +// MARK: - Calibration Models + +enum CalibrationStep: String, Codable, CaseIterable { + case center + case farLeft + case left + case farRight + case right + case up + case down + case topLeft + case topRight + case bottomLeft + case bottomRight + + var displayName: String { + switch self { + case .center: return "Center" + case .farLeft: return "Far Left" + case .left: return "Left" + case .farRight: return "Far Right" + case .right: return "Right" + case .up: return "Up" + case .down: return "Down" + case .topLeft: return "Top Left" + case .topRight: return "Top Right" + case .bottomLeft: return "Bottom Left" + case .bottomRight: return "Bottom Right" + } + } + + var instructionText: String { + switch self { + case .center: + return "Look at the center of the screen" + case .farLeft: + return "Look as far left as comfortable" + case .left: + return "Look to the left" + case .farRight: + return "Look as far right as comfortable" + case .right: + return "Look to the right" + case .up: + return "Look up" + case .down: + return "Look down" + case .topLeft: + return "Look to the top left corner" + case .topRight: + return "Look to the top right corner" + case .bottomLeft: + return "Look to the bottom left corner" + case .bottomRight: + return "Look to the bottom right corner" + } + } +} + +struct GazeSample: Codable { + let leftRatio: Double? + let rightRatio: Double? + let averageRatio: Double + let timestamp: Date + + init(leftRatio: Double?, rightRatio: Double?) { + self.leftRatio = leftRatio + self.rightRatio = rightRatio + + // Calculate average from available ratios + if let left = leftRatio, let right = rightRatio { + self.averageRatio = (left + right) / 2.0 + } else { + self.averageRatio = leftRatio ?? rightRatio ?? 0.5 + } + + self.timestamp = Date() + } +} + +struct GazeThresholds: Codable { + let minLeftRatio: Double // Looking left threshold (e.g., 0.65) + let maxRightRatio: Double // Looking right threshold (e.g., 0.35) + let centerMin: Double // Center range minimum + let centerMax: Double // Center range maximum + + var isValid: Bool { + // Ensure thresholds don't overlap + return maxRightRatio < centerMin && + centerMin < centerMax && + centerMax < minLeftRatio + } + + static var defaultThresholds: GazeThresholds { + GazeThresholds( + minLeftRatio: 0.65, + maxRightRatio: 0.35, + centerMin: 0.40, + centerMax: 0.60 + ) + } +} + +struct CalibrationData: Codable { + var samples: [CalibrationStep: [GazeSample]] + var computedThresholds: GazeThresholds? + var calibrationDate: Date + var isComplete: Bool + + init() { + self.samples = [:] + self.computedThresholds = nil + self.calibrationDate = Date() + self.isComplete = false + } + + mutating func addSample(_ sample: GazeSample, for step: CalibrationStep) { + if samples[step] == nil { + samples[step] = [] + } + samples[step]?.append(sample) + } + + func getSamples(for step: CalibrationStep) -> [GazeSample] { + return samples[step] ?? [] + } + + func averageRatio(for step: CalibrationStep) -> Double? { + let stepSamples = getSamples(for: step) + guard !stepSamples.isEmpty else { return nil } + + let sum = stepSamples.reduce(0.0) { $0 + $1.averageRatio } + return sum / Double(stepSamples.count) + } + + func standardDeviation(for step: CalibrationStep) -> Double? { + let stepSamples = getSamples(for: step) + guard stepSamples.count > 1, let mean = averageRatio(for: step) else { return nil } + + let variance = stepSamples.reduce(0.0) { sum, sample in + let diff = sample.averageRatio - mean + return sum + (diff * diff) + } / Double(stepSamples.count - 1) + + return sqrt(variance) + } + + mutating func calculateThresholds() { + // Need at least center, left, and right samples + guard let centerMean = averageRatio(for: .center), + let leftMean = averageRatio(for: .left), + let rightMean = averageRatio(for: .right) else { + print("⚠️ Insufficient calibration data to calculate thresholds") + return + } + + let centerStdDev = standardDeviation(for: .center) ?? 0.05 + + // Calculate center range (mean ± 0.5 * std_dev) + let centerMin = max(0.0, centerMean - 0.5 * centerStdDev) + let centerMax = min(1.0, centerMean + 0.5 * centerStdDev) + + // Calculate left threshold (midpoint between center and left extremes) + let minLeftRatio = centerMax + (leftMean - centerMax) * 0.5 + + // Calculate right threshold (midpoint between center and right extremes) + let maxRightRatio = centerMin - (centerMin - rightMean) * 0.5 + + // Validate and adjust if needed + var thresholds = GazeThresholds( + minLeftRatio: min(0.95, max(0.55, minLeftRatio)), + maxRightRatio: max(0.05, min(0.45, maxRightRatio)), + centerMin: centerMin, + centerMax: centerMax + ) + + // Ensure no overlap + if !thresholds.isValid { + print("⚠️ Computed thresholds overlap, using defaults") + thresholds = GazeThresholds.defaultThresholds + } + + self.computedThresholds = thresholds + print("✓ Calibration thresholds calculated:") + print(" Left: ≥\(String(format: "%.3f", thresholds.minLeftRatio))") + print(" Center: \(String(format: "%.3f", thresholds.centerMin))-\(String(format: "%.3f", thresholds.centerMax))") + print(" Right: ≤\(String(format: "%.3f", thresholds.maxRightRatio))") + } +} diff --git a/Gaze/Services/CalibrationManager.swift b/Gaze/Services/CalibrationManager.swift new file mode 100644 index 0000000..7c46192 --- /dev/null +++ b/Gaze/Services/CalibrationManager.swift @@ -0,0 +1,237 @@ +// +// CalibrationManager.swift +// Gaze +// +// Created by Mike Freno on 1/15/26. +// + +import Foundation +import Combine + +@MainActor +class CalibrationManager: ObservableObject { + static let shared = CalibrationManager() + + // MARK: - Published Properties + + @Published var isCalibrating = false + @Published var currentStep: CalibrationStep? + @Published var currentStepIndex = 0 + @Published var samplesCollected = 0 + @Published var calibrationData = CalibrationData() + + // MARK: - Configuration + + private let samplesPerStep = 20 // Collect 20 samples per calibration point (~1 second at 30fps) + private let userDefaultsKey = "eyeTrackingCalibration" + private let calibrationValidityDays = 30 + + // Calibration sequence (9 steps) + private let calibrationSteps: [CalibrationStep] = [ + .center, + .left, + .right, + .farLeft, + .farRight, + .up, + .down, + .topLeft, + .topRight + ] + + // MARK: - Initialization + + private init() { + loadCalibration() + } + + // MARK: - Calibration Flow + + func startCalibration() { + print("🎯 Starting calibration...") + isCalibrating = true + currentStepIndex = 0 + currentStep = calibrationSteps[0] + samplesCollected = 0 + calibrationData = CalibrationData() + } + + func collectSample(leftRatio: Double?, rightRatio: Double?) { + guard isCalibrating, let step = currentStep else { return } + + let sample = GazeSample(leftRatio: leftRatio, rightRatio: rightRatio) + calibrationData.addSample(sample, for: step) + samplesCollected += 1 + + // Move to next step when enough samples collected + if samplesCollected >= samplesPerStep { + advanceToNextStep() + } + } + + private func advanceToNextStep() { + currentStepIndex += 1 + + if currentStepIndex < calibrationSteps.count { + // Move to next calibration point + currentStep = calibrationSteps[currentStepIndex] + samplesCollected = 0 + print("📍 Calibration step: \(currentStep?.displayName ?? "unknown")") + } else { + // All steps complete + finishCalibration() + } + } + + func skipStep() { + // Allow skipping optional steps (up, down, diagonals) + guard isCalibrating, let step = currentStep else { return } + + print("⏭️ Skipping calibration step: \(step.displayName)") + advanceToNextStep() + } + + func finishCalibration() { + print("✓ Calibration complete, calculating thresholds...") + + calibrationData.calculateThresholds() + calibrationData.isComplete = true + calibrationData.calibrationDate = Date() + + saveCalibration() + applyCalibration() + + isCalibrating = false + currentStep = nil + currentStepIndex = 0 + samplesCollected = 0 + + print("✓ Calibration saved and applied") + } + + func cancelCalibration() { + print("❌ Calibration cancelled") + isCalibrating = false + currentStep = nil + currentStepIndex = 0 + samplesCollected = 0 + calibrationData = CalibrationData() + } + + // MARK: - Persistence + + private func saveCalibration() { + do { + let encoder = JSONEncoder() + encoder.dateEncodingStrategy = .iso8601 + let data = try encoder.encode(calibrationData) + UserDefaults.standard.set(data, forKey: userDefaultsKey) + print("💾 Calibration data saved to UserDefaults") + } catch { + print("❌ Failed to save calibration: \(error)") + } + } + + func loadCalibration() { + guard let data = UserDefaults.standard.data(forKey: userDefaultsKey) else { + print("ℹ️ No existing calibration found") + return + } + + do { + let decoder = JSONDecoder() + decoder.dateDecodingStrategy = .iso8601 + calibrationData = try decoder.decode(CalibrationData.self, from: data) + + if isCalibrationValid() { + print("✓ Loaded valid calibration from \(calibrationData.calibrationDate)") + applyCalibration() + } else { + print("⚠️ Calibration expired, needs recalibration") + } + } catch { + print("❌ Failed to load calibration: \(error)") + } + } + + func clearCalibration() { + UserDefaults.standard.removeObject(forKey: userDefaultsKey) + calibrationData = CalibrationData() + print("🗑️ Calibration data cleared") + } + + // MARK: - Validation + + func isCalibrationValid() -> Bool { + guard calibrationData.isComplete, + let thresholds = calibrationData.computedThresholds, + thresholds.isValid else { + return false + } + + // Check if calibration is not too old + let daysSinceCalibration = Calendar.current.dateComponents( + [.day], + from: calibrationData.calibrationDate, + to: Date() + ).day ?? 0 + + return daysSinceCalibration < calibrationValidityDays + } + + func needsRecalibration() -> Bool { + return !isCalibrationValid() + } + + // MARK: - Apply Calibration + + private func applyCalibration() { + guard let thresholds = calibrationData.computedThresholds else { + print("⚠️ No thresholds to apply") + return + } + + let constants = EyeTrackingConstants.shared + constants.pixelGazeMinRatio = thresholds.minLeftRatio + constants.pixelGazeMaxRatio = thresholds.maxRightRatio + + print("✓ Applied calibrated thresholds:") + print(" Looking left: ≥\(String(format: "%.3f", thresholds.minLeftRatio))") + print(" Looking right: ≤\(String(format: "%.3f", thresholds.maxRightRatio))") + } + + // MARK: - Statistics + + func getCalibrationSummary() -> String { + guard calibrationData.isComplete else { + return "No calibration data" + } + + let dateFormatter = DateFormatter() + dateFormatter.dateStyle = .medium + dateFormatter.timeStyle = .short + + var summary = "Calibrated: \(dateFormatter.string(from: calibrationData.calibrationDate))\n" + + if let thresholds = calibrationData.computedThresholds { + summary += "Left threshold: \(String(format: "%.3f", thresholds.minLeftRatio))\n" + summary += "Right threshold: \(String(format: "%.3f", thresholds.maxRightRatio))\n" + summary += "Center range: \(String(format: "%.3f", thresholds.centerMin)) - \(String(format: "%.3f", thresholds.centerMax))" + } + + return summary + } + + // MARK: - Progress + + var progress: Double { + let totalSteps = calibrationSteps.count + let completedSteps = currentStepIndex + let currentProgress = Double(samplesCollected) / Double(samplesPerStep) + return (Double(completedSteps) + currentProgress) / Double(totalSteps) + } + + var progressText: String { + "\(currentStepIndex + 1) of \(calibrationSteps.count)" + } +} diff --git a/Gaze/Services/EyeTrackingService.swift b/Gaze/Services/EyeTrackingService.swift index c4d6ba3..305a286 100644 --- a/Gaze/Services/EyeTrackingService.swift +++ b/Gaze/Services/EyeTrackingService.swift @@ -127,7 +127,7 @@ class EyeTrackingService: NSObject, ObservableObject { self.videoOutput = output } - private func processFaceObservations(_ observations: [VNFaceObservation]?, imageSize: CGSize) { + private func processFaceObservations(_ observations: [VNFaceObservation]?, imageSize: CGSize, pixelBuffer: CVPixelBuffer? = nil) { guard let observations = observations, !observations.isEmpty else { faceDetected = false userLookingAtScreen = false @@ -171,11 +171,13 @@ class EyeTrackingService: NSObject, ObservableObject { face: face, landmarks: landmarks, imageSize: imageSize, + pixelBuffer: pixelBuffer, shouldLog: enableDebugLogging ) userLookingAtScreen = !lookingAway } + private func detectEyesClosed( leftEye: VNFaceLandmarkRegion2D, rightEye: VNFaceLandmarkRegion2D, shouldLog: Bool ) -> Bool { @@ -214,7 +216,7 @@ class EyeTrackingService: NSObject, ObservableObject { } private func detectLookingAway( - face: VNFaceObservation, landmarks: VNFaceLandmarks2D, imageSize: CGSize, shouldLog: Bool + face: VNFaceObservation, landmarks: VNFaceLandmarks2D, imageSize: CGSize, pixelBuffer: CVPixelBuffer?, shouldLog: Bool ) -> Bool { let constants = EyeTrackingConstants.shared @@ -266,99 +268,86 @@ class EyeTrackingService: NSObject, ObservableObject { } } - // 2. Eye Gaze Check (Pupil Position) + // 2. Eye Gaze Check (Pixel-Based Pupil Detection) var eyesLookingAway = false - if let leftEye = landmarks.leftEye, + if let pixelBuffer = pixelBuffer, + let leftEye = landmarks.leftEye, let rightEye = landmarks.rightEye, - let leftPupil = landmarks.leftPupil, - let rightPupil = landmarks.rightPupil + constants.pixelGazeEnabled { + var leftGazeRatio: Double? = nil + var rightGazeRatio: Double? = nil - // NEW: Use inter-eye distance method - let gazeOffsets = calculateGazeUsingInterEyeDistance( - leftEye: leftEye, - rightEye: rightEye, - leftPupil: leftPupil, - rightPupil: rightPupil, + // Detect left pupil (side = 0) + if let leftResult = PupilDetector.detectPupil( + in: pixelBuffer, + eyeLandmarks: leftEye, + faceBoundingBox: face.boundingBox, imageSize: imageSize, - faceBoundingBox: face.boundingBox - ) - - let leftRatio = calculatePupilHorizontalRatio( - eye: leftEye, - pupil: leftPupil, - imageSize: imageSize, - faceBoundingBox: face.boundingBox - ) - let rightRatio = calculatePupilHorizontalRatio( - eye: rightEye, - pupil: rightPupil, - imageSize: imageSize, - faceBoundingBox: face.boundingBox - ) - - // Debug logging - if shouldLog { - print( - "👁️ Pupil Ratios (OLD METHOD) - Left: \(String(format: "%.3f", leftRatio)), Right: \(String(format: "%.3f", rightRatio))" - ) - print( - "👁️ Gaze Offsets (NEW METHOD) - Left: \(String(format: "%.3f", gazeOffsets.leftGaze)), Right: \(String(format: "%.3f", gazeOffsets.rightGaze))" - ) - print( - "👁️ Thresholds - Min: \(constants.minPupilRatio), Max: \(constants.maxPupilRatio)" + side: 0 + ) { + leftGazeRatio = calculateGazeRatio( + pupilPosition: leftResult.pupilPosition, + eyeRegion: leftResult.eyeRegion ) } - + + // Detect right pupil (side = 1) + if let rightResult = PupilDetector.detectPupil( + in: pixelBuffer, + eyeLandmarks: rightEye, + faceBoundingBox: face.boundingBox, + imageSize: imageSize, + side: 1 + ) { + rightGazeRatio = calculateGazeRatio( + pupilPosition: rightResult.pupilPosition, + eyeRegion: rightResult.eyeRegion + ) + } + + // CRITICAL: Connect to CalibrationManager + if CalibrationManager.shared.isCalibrating, + let leftRatio = leftGazeRatio, + let rightRatio = rightGazeRatio { + CalibrationManager.shared.collectSample( + leftRatio: leftRatio, + rightRatio: rightRatio + ) + } + + // Determine looking away using calibrated thresholds + if let leftRatio = leftGazeRatio, let rightRatio = rightGazeRatio { + let avgRatio = (leftRatio + rightRatio) / 2.0 + let lookingRight = avgRatio <= constants.pixelGazeMinRatio + let lookingLeft = avgRatio >= constants.pixelGazeMaxRatio + eyesLookingAway = lookingRight || lookingLeft + + if shouldLog { + print("👁️ PIXEL GAZE: L=\(String(format: "%.3f", leftRatio)) R=\(String(format: "%.3f", rightRatio)) Avg=\(String(format: "%.3f", avgRatio)) Away=\(eyesLookingAway)") + print(" Thresholds: Min=\(String(format: "%.3f", constants.pixelGazeMinRatio)) Max=\(String(format: "%.3f", constants.pixelGazeMaxRatio))") + } + } else { + if shouldLog { + print("⚠️ Pixel pupil detection failed for one or both eyes") + } + } + // Update debug values Task { @MainActor in - debugLeftPupilRatio = leftRatio - debugRightPupilRatio = rightRatio - } - - // Normal range for "looking center" is roughly 0.3 to 0.7 - // (0.0 = extreme right, 1.0 = extreme left relative to face) - // Note: Camera is mirrored, so logic might be inverted - - var leftLookingAway = false - var rightLookingAway = false - - // Check min pupil ratio if enabled - /*if constants.minPupilEnabled {*/ - /*let minRatio = constants.minPupilRatio*/ - /*if leftRatio < minRatio {*/ - /*leftLookingAway = true*/ - /*}*/ - /*if rightRatio < minRatio {*/ - /*rightLookingAway = true*/ - /*}*/ - /*}*/ - - /*// Check max pupil ratio if enabled*/ - /*if constants.maxPupilEnabled {*/ - /*let maxRatio = constants.maxPupilRatio*/ - /*if leftRatio > maxRatio {*/ - /*leftLookingAway = true*/ - /*}*/ - /*if rightRatio > maxRatio {*/ - /*rightLookingAway = true*/ - /*}*/ - /*}*/ - - // Consider looking away if EITHER eye is off-center - // Changed from AND to OR logic because requiring both eyes makes detection too restrictive - // This is more sensitive but also more reliable for detecting actual looking away - eyesLookingAway = leftLookingAway || rightLookingAway - - if shouldLog { - print( - "👁️ Looking Away - Left: \(leftLookingAway), Right: \(rightLookingAway), Either: \(eyesLookingAway)" - ) + debugLeftPupilRatio = leftGazeRatio + debugRightPupilRatio = rightGazeRatio } } else { if shouldLog { - print("👁️ Missing pupil or eye landmarks!") + if pixelBuffer == nil { + print("⚠️ No pixel buffer available for pupil detection") + } else if !constants.pixelGazeEnabled { + print("⚠️ Pixel gaze detection disabled in constants") + } else { + print("⚠️ Missing eye landmarks for pupil detection") + } } } @@ -366,228 +355,32 @@ class EyeTrackingService: NSObject, ObservableObject { return isLookingAway } - - private func calculatePupilHorizontalRatio( - eye: VNFaceLandmarkRegion2D, - pupil: VNFaceLandmarkRegion2D, - imageSize: CGSize, - faceBoundingBox: CGRect - ) -> Double { - // Use normalizedPoints which are already normalized to face bounding box - let eyePoints = eye.normalizedPoints - let pupilPoints = pupil.normalizedPoints - - // Throttle debug logging to every 0.5 seconds - let now = Date() - let shouldLog = now.timeIntervalSince(lastDebugLogTime) >= 0.5 - - if shouldLog { - lastDebugLogTime = now - - print("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━") - print("📊 EYE TRACKING DEBUG DATA") - print("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━") - - print("\n🖼️ IMAGE SIZE:") - print(" Width: \(imageSize.width), Height: \(imageSize.height)") - - print("\n📦 FACE BOUNDING BOX (normalized):") - print(" Origin: (\(faceBoundingBox.origin.x), \(faceBoundingBox.origin.y))") - print(" Size: (\(faceBoundingBox.size.width), \(faceBoundingBox.size.height))") - - print("\n👁️ EYE LANDMARK POINTS (normalized to face bounding box - from Vision):") - print(" Count: \(eyePoints.count)") - let eyeMinX = eyePoints.min(by: { $0.x < $1.x })?.x ?? 0 - let eyeMaxX = eyePoints.max(by: { $0.x < $1.x })?.x ?? 0 - for (index, point) in eyePoints.enumerated() { - var marker = "" - if abs(point.x - eyeMinX) < 0.0001 { - marker = " ← LEFTMOST (inner corner)" - } else if abs(point.x - eyeMaxX) < 0.0001 { - marker = " ← RIGHTMOST (outer corner)" - } - if index == 0 { - marker += " [FIRST]" - } else if index == eyePoints.count - 1 { - marker += " [LAST]" - } - print( - " [\(index)]: (\(String(format: "%.4f", point.x)), \(String(format: "%.4f", point.y)))\(marker)" - ) - } - - print("\n👁️ PUPIL LANDMARK POINTS (normalized to face bounding box - from Vision):") - print(" Count: \(pupilPoints.count)") - for (index, point) in pupilPoints.enumerated() { - print( - " [\(index)]: (\(String(format: "%.4f", point.x)), \(String(format: "%.4f", point.y)))" - ) - } - - if let minPoint = eyePoints.min(by: { $0.x < $1.x }), - let maxPoint = eyePoints.max(by: { $0.x < $1.x }) - { - let eyeMinX = minPoint.x - let eyeMaxX = maxPoint.x - let eyeWidth = eyeMaxX - eyeMinX - let pupilCenterX = pupilPoints.map { $0.x }.reduce(0, +) / Double(pupilPoints.count) - let ratio = (pupilCenterX - eyeMinX) / eyeWidth - - print("\n📏 CALCULATIONS:") - print(" Eye MinX: \(String(format: "%.4f", eyeMinX))") - print(" Eye MaxX: \(String(format: "%.4f", eyeMaxX))") - print(" Eye Width: \(String(format: "%.4f", eyeWidth))") - - // Analyze different point pairs to find better eye width - if eyePoints.count >= 6 { - let cornerWidth = eyePoints[5].x - eyePoints[0].x - print(" Corner-to-Corner Width [0→5]: \(String(format: "%.4f", cornerWidth))") - - // Try middle points too - if eyePoints.count >= 4 { - let midWidth = eyePoints[3].x - eyePoints[0].x - print(" Point [0→3] Width: \(String(format: "%.4f", midWidth))") - } - } - - print(" Pupil Center X: \(String(format: "%.4f", pupilCenterX))") - print(" Pupil Min X: \(String(format: "%.4f", pupilPoints.min(by: { $0.x < $1.x })?.x ?? 0))") - print(" Pupil Max X: \(String(format: "%.4f", pupilPoints.max(by: { $0.x < $1.x })?.x ?? 0))") - print(" Final Ratio (current method): \(String(format: "%.4f", ratio))") - - // Calculate alternate ratios - if eyePoints.count >= 6 { - let cornerWidth = eyePoints[5].x - eyePoints[0].x - if cornerWidth > 0 { - let cornerRatio = (pupilCenterX - eyePoints[0].x) / cornerWidth - print(" Alternate Ratio (using corners [0→5]): \(String(format: "%.4f", cornerRatio))") - } - } - } - - print("\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n") - } - - guard !eyePoints.isEmpty, !pupilPoints.isEmpty else { return 0.5 } - - guard let minPoint = eyePoints.min(by: { $0.x < $1.x }), - let maxPoint = eyePoints.max(by: { $0.x < $1.x }) - else { - return 0.5 - } - - let eyeMinX = minPoint.x - let eyeMaxX = maxPoint.x - let eyeWidth = eyeMaxX - eyeMinX - - guard eyeWidth > 0 else { return 0.5 } - - let pupilCenterX = pupilPoints.map { $0.x }.reduce(0, +) / Double(pupilPoints.count) - - // Calculate ratio (0.0 to 1.0) - already normalized to face bounding box by Vision - let ratio = (pupilCenterX - eyeMinX) / eyeWidth - - return ratio - } - /// NEW APPROACH: Calculate gaze using inter-eye distance as reference - /// This works around Vision's limitation that eye landmarks only track the iris, not true eye corners - private func calculateGazeUsingInterEyeDistance( - leftEye: VNFaceLandmarkRegion2D, - rightEye: VNFaceLandmarkRegion2D, - leftPupil: VNFaceLandmarkRegion2D, - rightPupil: VNFaceLandmarkRegion2D, - imageSize: CGSize, - faceBoundingBox: CGRect - ) -> (leftGaze: Double, rightGaze: Double) { + /// Calculate gaze ratio using Python GazeTracking algorithm + /// Formula: ratio = pupilX / (eyeCenterX * 2 - 10) + /// Returns: 0.0-1.0 (0.0 = far right, 1.0 = far left) + private func calculateGazeRatio(pupilPosition: PupilPosition, eyeRegion: EyeRegion) -> Double { + let pupilX = Double(pupilPosition.x) + let eyeCenterX = Double(eyeRegion.center.x) - // CRITICAL: Convert from face-normalized coordinates to image coordinates - // normalizedPoints are relative to face bounding box, not stable for gaze tracking + // Python formula from GazeTracking library + let denominator = (eyeCenterX * 2.0 - 10.0) - // Helper to convert face-normalized point to image coordinates - func toImageCoords(_ point: CGPoint) -> CGPoint { - // Face bounding box origin is in Vision coordinates (bottom-left origin) - let imageX = faceBoundingBox.origin.x + point.x * faceBoundingBox.width - let imageY = faceBoundingBox.origin.y + point.y * faceBoundingBox.height - return CGPoint(x: imageX, y: imageY) + guard denominator > 0 else { + // Fallback to simple normalized position + let eyeLeft = Double(eyeRegion.frame.minX) + let eyeRight = Double(eyeRegion.frame.maxX) + let eyeWidth = eyeRight - eyeLeft + guard eyeWidth > 0 else { return 0.5 } + return (pupilX - eyeLeft) / eyeWidth } - // Convert all points to image space - let leftEyePointsImg = leftEye.normalizedPoints.map { toImageCoords($0) } - let rightEyePointsImg = rightEye.normalizedPoints.map { toImageCoords($0) } - let leftPupilPointsImg = leftPupil.normalizedPoints.map { toImageCoords($0) } - let rightPupilPointsImg = rightPupil.normalizedPoints.map { toImageCoords($0) } + let ratio = pupilX / denominator - // Calculate eye centers (average of all iris boundary points) - let leftEyeCenterX = leftEyePointsImg.map { $0.x }.reduce(0, +) / Double(leftEyePointsImg.count) - let rightEyeCenterX = rightEyePointsImg.map { $0.x }.reduce(0, +) / Double(rightEyePointsImg.count) - - // Calculate pupil centers - let leftPupilX = leftPupilPointsImg.map { $0.x }.reduce(0, +) / Double(leftPupilPointsImg.count) - let rightPupilX = rightPupilPointsImg.map { $0.x }.reduce(0, +) / Double(rightPupilPointsImg.count) - - // Inter-eye distance (the distance between eye centers) - should be stable now - let interEyeDistance = abs(rightEyeCenterX - leftEyeCenterX) - - // Estimate iris width as a fraction of inter-eye distance - // Typical human: inter-pupil distance ~63mm, iris width ~12mm → ratio ~1/5 - let irisWidth = interEyeDistance / 5.0 - - // Calculate gaze offset for each eye (positive = looking right, negative = looking left) - let leftGazeOffset = (leftPupilX - leftEyeCenterX) / irisWidth - let rightGazeOffset = (rightPupilX - rightEyeCenterX) / irisWidth - - // Throttle debug logging - let now = Date() - let shouldLog = now.timeIntervalSince(lastDebugLogTime) >= 0.5 - - if shouldLog { - lastDebugLogTime = now - - print("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━") - print("📊 INTER-EYE DISTANCE GAZE (IMAGE COORDS)") - print("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━") - - print("\n🖼️ IMAGE SPACE:") - print(" Image Size: \(Int(imageSize.width)) x \(Int(imageSize.height))") - print(" Face Box: x=\(String(format: "%.3f", faceBoundingBox.origin.x)) w=\(String(format: "%.3f", faceBoundingBox.width))") - - print("\n👁️ EYE CENTERS (image coords):") - print(" Left Eye Center X: \(String(format: "%.4f", leftEyeCenterX)) (\(Int(leftEyeCenterX * imageSize.width))px)") - print(" Right Eye Center X: \(String(format: "%.4f", rightEyeCenterX)) (\(Int(rightEyeCenterX * imageSize.width))px)") - print(" Inter-Eye Distance: \(String(format: "%.4f", interEyeDistance)) (\(Int(interEyeDistance * imageSize.width))px)") - print(" Estimated Iris Width: \(String(format: "%.4f", irisWidth)) (\(Int(irisWidth * imageSize.width))px)") - - print("\n👁️ PUPIL POSITIONS (image coords):") - print(" Left Pupil X: \(String(format: "%.4f", leftPupilX)) (\(Int(leftPupilX * imageSize.width))px)") - print(" Right Pupil X: \(String(format: "%.4f", rightPupilX)) (\(Int(rightPupilX * imageSize.width))px)") - - print("\n📏 PUPIL OFFSETS FROM EYE CENTER:") - print(" Left Offset: \(String(format: "%.4f", leftPupilX - leftEyeCenterX)) (\(Int((leftPupilX - leftEyeCenterX) * imageSize.width))px)") - print(" Right Offset: \(String(format: "%.4f", rightPupilX - rightEyeCenterX)) (\(Int((rightPupilX - rightEyeCenterX) * imageSize.width))px)") - - print("\n📏 GAZE OFFSETS (normalized to iris width):") - print(" Left Gaze Offset: \(String(format: "%.4f", leftGazeOffset)) (0=center, +right, -left)") - print(" Right Gaze Offset: \(String(format: "%.4f", rightGazeOffset)) (0=center, +right, -left)") - print(" Average Gaze: \(String(format: "%.4f", (leftGazeOffset + rightGazeOffset) / 2))") - - // Interpretation - let avgGaze = (leftGazeOffset + rightGazeOffset) / 2 - var interpretation = "" - if avgGaze < -0.5 { - interpretation = "Looking LEFT" - } else if avgGaze > 0.5 { - interpretation = "Looking RIGHT" - } else { - interpretation = "Looking CENTER" - } - print(" Interpretation: \(interpretation)") - - print("\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n") - } - - return (leftGazeOffset, rightGazeOffset) + // Clamp to valid range + return max(0.0, min(1.0, ratio)) } + } extension EyeTrackingService: AVCaptureVideoDataOutputSampleBufferDelegate { @@ -616,7 +409,8 @@ extension EyeTrackingService: AVCaptureVideoDataOutputSampleBufferDelegate { Task { @MainActor in self.processFaceObservations( request.results as? [VNFaceObservation], - imageSize: size + imageSize: size, + pixelBuffer: pixelBuffer ) } } diff --git a/Gaze/Services/PupilDetector.swift b/Gaze/Services/PupilDetector.swift index 52c66a3..0164675 100644 --- a/Gaze/Services/PupilDetector.swift +++ b/Gaze/Services/PupilDetector.swift @@ -7,10 +7,20 @@ // Pixel-based pupil detection translated from Python GazeTracking library // Original: https://github.com/antoinelame/GazeTracking // +// This implementation closely follows the Python pipeline: +// 1. Isolate eye region with polygon mask (cv2.fillPoly equivalent) +// 2. Bilateral filter (cv2.bilateralFilter(eye_frame, 10, 15, 15)) +// 3. Erosion with 3x3 kernel, 3 iterations (cv2.erode) +// 4. Binary threshold (cv2.threshold) +// 5. Find contours, sort by area, use second-largest (cv2.findContours) +// 6. Calculate centroid via moments (cv2.moments) +// import CoreImage import Vision import Accelerate +import ImageIO +import UniformTypeIdentifiers struct PupilPosition { let x: CGFloat @@ -18,24 +28,110 @@ struct PupilPosition { } struct EyeRegion { - let frame: CGRect // Bounding box of the eye in image coordinates - let center: CGPoint // Center point of the eye region + let frame: CGRect + let center: CGPoint + let origin: CGPoint +} + +/// Calibration state for adaptive thresholding (matches Python Calibration class) +class PupilCalibration { + private let targetFrames = 20 + private var thresholdsLeft: [Int] = [] + private var thresholdsRight: [Int] = [] + + var isComplete: Bool { + thresholdsLeft.count >= targetFrames && thresholdsRight.count >= targetFrames + } + + func threshold(forSide side: Int) -> Int { + let thresholds = side == 0 ? thresholdsLeft : thresholdsRight + guard !thresholds.isEmpty else { return 50 } + return thresholds.reduce(0, +) / thresholds.count + } + + func evaluate(eyeData: [UInt8], width: Int, height: Int, side: Int) { + let bestThreshold = findBestThreshold(eyeData: eyeData, width: width, height: height) + if side == 0 { + thresholdsLeft.append(bestThreshold) + } else { + thresholdsRight.append(bestThreshold) + } + } + + /// Finds optimal threshold by targeting ~48% iris coverage (matches Python) + private func findBestThreshold(eyeData: [UInt8], width: Int, height: Int) -> Int { + let averageIrisSize = 0.48 + var trials: [Int: Double] = [:] + + for threshold in stride(from: 5, to: 100, by: 5) { + let processed = PupilDetector.imageProcessing( + eyeData: eyeData, + width: width, + height: height, + threshold: threshold + ) + let irisSize = Self.irisSize(data: processed, width: width, height: height) + trials[threshold] = irisSize + } + + let best = trials.min { abs($0.value - averageIrisSize) < abs($1.value - averageIrisSize) } + return best?.key ?? 50 + } + + /// Returns percentage of dark pixels (iris area) + private static func irisSize(data: [UInt8], width: Int, height: Int) -> Double { + let margin = 5 + guard width > margin * 2, height > margin * 2 else { + return 0 + } + + var blackCount = 0 + var totalCount = 0 + + for y in margin..<(height - margin) { + for x in margin..<(width - margin) { + let index = y * width + x + if data[index] == 0 { + blackCount += 1 + } + totalCount += 1 + } + } + + return totalCount > 0 ? Double(blackCount) / Double(totalCount) : 0 + } + + func reset() { + thresholdsLeft.removeAll() + thresholdsRight.removeAll() + } } class PupilDetector { - /// Detects pupil position within an isolated eye region using pixel-based analysis + static var enableDebugImageSaving = false + private static var debugImageCounter = 0 + + /// Shared calibration instance + static let calibration = PupilCalibration() + + /// Detects pupil position within an isolated eye region + /// Closely follows Python GazeTracking pipeline /// - Parameters: /// - pixelBuffer: The camera frame pixel buffer /// - eyeLandmarks: Vision eye landmarks (6 points around iris) /// - faceBoundingBox: Face bounding box from Vision /// - imageSize: Size of the camera frame + /// - side: 0 for left eye, 1 for right eye + /// - threshold: Optional manual threshold (uses calibration if nil) /// - Returns: Pupil position relative to eye region, or nil if detection fails static func detectPupil( in pixelBuffer: CVPixelBuffer, eyeLandmarks: VNFaceLandmarkRegion2D, faceBoundingBox: CGRect, - imageSize: CGSize + imageSize: CGSize, + side: Int = 0, + threshold: Int? = nil ) -> (pupilPosition: PupilPosition, eyeRegion: EyeRegion)? { // Step 1: Convert Vision landmarks to pixel coordinates @@ -47,33 +143,102 @@ class PupilDetector { guard eyePoints.count >= 6 else { return nil } - // Step 2: Create eye region bounding box + // Step 2: Create eye region bounding box with margin guard let eyeRegion = createEyeRegion(from: eyePoints, imageSize: imageSize) else { return nil } - // Step 3: Extract and process eye region from pixel buffer - guard let eyeImage = extractEyeRegion( - from: pixelBuffer, - region: eyeRegion.frame, - mask: eyePoints + // Step 3: Extract grayscale eye data from pixel buffer + guard let fullFrameData = extractGrayscaleData(from: pixelBuffer) else { + return nil + } + + let frameWidth = CVPixelBufferGetWidth(pixelBuffer) + let frameHeight = CVPixelBufferGetHeight(pixelBuffer) + + // Step 4: Isolate eye with polygon mask (matches Python _isolate method) + guard let (eyeData, eyeWidth, eyeHeight) = isolateEyeWithMask( + frameData: fullFrameData, + frameWidth: frameWidth, + frameHeight: frameHeight, + eyePoints: eyePoints, + region: eyeRegion ) else { return nil } - // Step 4: Process image to isolate pupil (bilateral filter + threshold) - guard let processedImage = processEyeImage(eyeImage) else { - return nil - } - - // Step 5: Find pupil using contour detection - guard let pupilPosition = findPupilCentroid(in: processedImage) else { + // Step 5: Get threshold (from calibration or parameter) + let effectiveThreshold: Int + if let manualThreshold = threshold { + effectiveThreshold = manualThreshold + } else if calibration.isComplete { + effectiveThreshold = calibration.threshold(forSide: side) + } else { + // Calibrate + calibration.evaluate(eyeData: eyeData, width: eyeWidth, height: eyeHeight, side: side) + effectiveThreshold = calibration.threshold(forSide: side) + } + + // Step 6: Process image (bilateral filter + erosion + threshold) + let processedData = imageProcessing( + eyeData: eyeData, + width: eyeWidth, + height: eyeHeight, + threshold: effectiveThreshold + ) + + // Debug: Save processed images if enabled + if enableDebugImageSaving { + saveDebugImage(data: processedData, width: eyeWidth, height: eyeHeight, name: "processed_eye_\(debugImageCounter)") + debugImageCounter += 1 + } + + // Step 7: Find contours and compute centroid of second-largest + guard let (centroidX, centroidY) = findPupilFromContours( + data: processedData, + width: eyeWidth, + height: eyeHeight + ) else { return nil } + let pupilPosition = PupilPosition(x: CGFloat(centroidX), y: CGFloat(centroidY)) return (pupilPosition, eyeRegion) } + // MARK: - Debug Helper + + private static func saveDebugImage(data: [UInt8], width: Int, height: Int, name: String) { + guard let cgImage = createCGImage(from: data, width: width, height: height) else { + return + } + + let url = URL(fileURLWithPath: "/tmp/\(name).png") + guard let destination = CGImageDestinationCreateWithURL(url as CFURL, UTType.png.identifier as CFString, 1, nil) else { + return + } + + CGImageDestinationAddImage(destination, cgImage, nil) + CGImageDestinationFinalize(destination) + print("💾 Saved debug image: \(url.path)") + } + + private static func createCGImage(from data: [UInt8], width: Int, height: Int) -> CGImage? { + var mutableData = data + guard let context = CGContext( + data: &mutableData, + width: width, + height: height, + bitsPerComponent: 8, + bytesPerRow: width, + space: CGColorSpaceCreateDeviceGray(), + bitmapInfo: CGImageAlphaInfo.none.rawValue + ) else { + return nil + } + return context.makeImage() + } + // MARK: - Step 1: Convert Landmarks to Pixel Coordinates private static func landmarksToPixelCoordinates( @@ -82,7 +247,6 @@ class PupilDetector { imageSize: CGSize ) -> [CGPoint] { return landmarks.normalizedPoints.map { point in - // Vision coordinates are normalized to face bounding box let imageX = (faceBoundingBox.origin.x + point.x * faceBoundingBox.width) * imageSize.width let imageY = (faceBoundingBox.origin.y + point.y * faceBoundingBox.height) * imageSize.height return CGPoint(x: imageX, y: imageY) @@ -100,7 +264,6 @@ class PupilDetector { let minY = points.map { $0.y }.min()! - margin let maxY = points.map { $0.y }.max()! + margin - // Clamp to image bounds let clampedMinX = max(0, minX) let clampedMaxX = min(imageSize.width, maxX) let clampedMinY = max(0, minY) @@ -118,149 +281,335 @@ class PupilDetector { y: frame.height / 2 ) - return EyeRegion(frame: frame, center: center) + let origin = CGPoint(x: clampedMinX, y: clampedMinY) + + return EyeRegion(frame: frame, center: center, origin: origin) } - // MARK: - Step 3: Extract Eye Region + // MARK: - Step 3: Extract Grayscale Data from Pixel Buffer - private static func extractEyeRegion( - from pixelBuffer: CVPixelBuffer, - region: CGRect, - mask: [CGPoint] - ) -> CIImage? { + private static func extractGrayscaleData(from pixelBuffer: CVPixelBuffer) -> [UInt8]? { + CVPixelBufferLockBaseAddress(pixelBuffer, .readOnly) + defer { CVPixelBufferUnlockBaseAddress(pixelBuffer, .readOnly) } - let ciImage = CIImage(cvPixelBuffer: pixelBuffer) + let width = CVPixelBufferGetWidth(pixelBuffer) + let height = CVPixelBufferGetHeight(pixelBuffer) + let bytesPerRow = CVPixelBufferGetBytesPerRow(pixelBuffer) - // Convert to grayscale - let grayscaleImage = ciImage.applyingFilter("CIPhotoEffectNoir") + guard let baseAddress = CVPixelBufferGetBaseAddress(pixelBuffer) else { + return nil + } - // Crop to eye region - let croppedImage = grayscaleImage.cropped(to: region) + let pixelFormat = CVPixelBufferGetPixelFormatType(pixelBuffer) + var grayscaleData = [UInt8](repeating: 0, count: width * height) - return croppedImage + let buffer = baseAddress.assumingMemoryBound(to: UInt8.self) + + switch pixelFormat { + case kCVPixelFormatType_32BGRA: + for y in 0.. CIImage? { - // Apply bilateral filter (preserves edges while smoothing) - // CIBilateralFilter approximation: use CIMedianFilter + morphology - var processed = image + private static func isolateEyeWithMask( + frameData: [UInt8], + frameWidth: Int, + frameHeight: Int, + eyePoints: [CGPoint], + region: EyeRegion + ) -> (data: [UInt8], width: Int, height: Int)? { - // 1. Median filter (reduces noise while preserving edges) - processed = processed.applyingFilter("CIMedianFilter") + let minX = Int(region.frame.origin.x) + let minY = Int(region.frame.origin.y) + let eyeWidth = Int(region.frame.width) + let eyeHeight = Int(region.frame.height) - // 2. Morphological erosion (makes dark regions larger - approximates cv2.erode) - // Use CIMorphologyMinimum with small radius - processed = processed.applyingFilter("CIMorphologyMinimum", parameters: [ - kCIInputRadiusKey: 2.0 - ]) + guard eyeWidth > 0, eyeHeight > 0 else { return nil } - // 3. Threshold to binary (black/white) - // Use CIColorControls to increase contrast, then threshold - processed = processed.applyingFilter("CIColorControls", parameters: [ - kCIInputContrastKey: 2.0, - kCIInputBrightnessKey: -0.3 - ]) + // Create output buffer initialized to white (255) - outside mask + var eyeData = [UInt8](repeating: 255, count: eyeWidth * eyeHeight) - // Apply color threshold to make it binary - processed = processed.applyingFilter("CIColorThreshold", parameters: [ - "inputThreshold": 0.5 - ]) + // Convert eye points to local coordinates + let localPoints = eyePoints.map { point in + CGPoint(x: point.x - CGFloat(minX), y: point.y - CGFloat(minY)) + } + + // For each pixel in eye region, check if inside polygon + for y in 0..= 0, frameX < frameWidth, frameY >= 0, frameY < frameHeight { + let frameIndex = frameY * frameWidth + frameX + let eyeIndex = y * eyeWidth + x + eyeData[eyeIndex] = frameData[frameIndex] + } + } + } + } + + return (eyeData, eyeWidth, eyeHeight) + } + + /// Point-in-polygon test using ray casting algorithm + private static func pointInPolygon(point: CGPoint, polygon: [CGPoint]) -> Bool { + guard polygon.count >= 3 else { return false } + + var inside = false + var j = polygon.count - 1 + + for i in 0.. point.y) != (pj.y > point.y)) && + (point.x < (pj.x - pi.x) * (point.y - pi.y) / (pj.y - pi.y) + pi.x) { + inside = !inside + } + j = i + } + + return inside + } + + // MARK: - Step 5: Image Processing (matches Python image_processing) + + /// Performs operations on the eye frame to isolate the iris + /// Matches Python: bilateralFilter -> erode -> threshold + static func imageProcessing( + eyeData: [UInt8], + width: Int, + height: Int, + threshold: Int + ) -> [UInt8] { + var processed = eyeData + + // 1. Bilateral filter approximation + // Python: cv2.bilateralFilter(eye_frame, 10, 15, 15) + processed = bilateralFilter(data: processed, width: width, height: height, d: 10, sigmaColor: 15, sigmaSpace: 15) + + // 2. Erosion with 3x3 kernel, 3 iterations + // Python: cv2.erode(new_frame, kernel, iterations=3) + for _ in 0..<3 { + processed = erode3x3(data: processed, width: width, height: height) + } + + // 3. Binary threshold + // Python: cv2.threshold(new_frame, threshold, 255, cv2.THRESH_BINARY)[1] + processed = binaryThreshold(data: processed, width: width, height: height, threshold: threshold) return processed } - // MARK: - Step 5: Find Pupil Centroid - - private static func findPupilCentroid(in image: CIImage) -> PupilPosition? { - let context = CIContext() + /// Bilateral filter approximation - preserves edges while smoothing + private static func bilateralFilter( + data: [UInt8], + width: Int, + height: Int, + d: Int, + sigmaColor: Double, + sigmaSpace: Double + ) -> [UInt8] { + var output = data + let radius = d / 2 - // Convert CIImage to CGImage for contour detection - guard let cgImage = context.createCGImage(image, from: image.extent) else { - return nil + // Precompute spatial Gaussian weights + var spatialWeights = [[Double]](repeating: [Double](repeating: 0, count: d), count: d) + for dy in 0.. (width: Int, height: Int, data: [UInt8])? { - let width = cgImage.width - let height = cgImage.height + /// Erosion with 3x3 kernel (minimum filter) + private static func erode3x3(data: [UInt8], width: Int, height: Int) -> [UInt8] { + var output = data - var data = [UInt8](repeating: 0, count: width * height) - - guard let context = CGContext( - data: &data, - width: width, - height: height, - bitsPerComponent: 8, - bytesPerRow: width, - space: CGColorSpaceCreateDeviceGray(), - bitmapInfo: CGImageAlphaInfo.none.rawValue - ) else { - return nil + for y in 1..<(height - 1) { + for x in 1..<(width - 1) { + var minVal: UInt8 = 255 + + for dy in -1...1 { + for dx in -1...1 { + let index = (y + dy) * width + (x + dx) + minVal = min(minVal, data[index]) + } + } + + output[y * width + x] = minVal + } } - context.draw(cgImage, in: CGRect(x: 0, y: 0, width: width, height: height)) - - return (width, height, data) + return output } - // MARK: - Helper: Find Centroid of Largest Dark Region + /// Binary threshold + private static func binaryThreshold(data: [UInt8], width: Int, height: Int, threshold: Int) -> [UInt8] { + return data.map { pixel in + Int(pixel) > threshold ? UInt8(255) : UInt8(0) + } + } - private static func findLargestDarkRegionCentroid( + // MARK: - Step 6: Find Contours and Centroid (matches Python detect_iris) + + /// Finds contours, sorts by area, and returns centroid of second-largest + /// Matches Python: cv2.findContours + cv2.moments + private static func findPupilFromContours( data: [UInt8], width: Int, height: Int ) -> (x: Double, y: Double)? { - // Calculate image moments to find centroid - // m00 = sum of all pixels (area) - // m10 = sum of (x * pixel_value) - // m01 = sum of (y * pixel_value) - // centroid_x = m10 / m00 - // centroid_y = m01 / m00 + let contours = findContours(data: data, width: width, height: height) - var m00: Double = 0 - var m10: Double = 0 - var m01: Double = 0 + guard contours.count >= 2 else { + if let largest = contours.max(by: { $0.count < $1.count }) { + return computeCentroid(contour: largest) + } + return nil + } + + // Sort by area (pixel count) descending + let sorted = contours.sorted { $0.count > $1.count } + + // Use second-largest contour (matches Python: contours[-2] after ascending sort) + let targetContour = sorted[1] + + return computeCentroid(contour: targetContour) + } + + /// Finds connected components of black pixels (value == 0) + private static func findContours(data: [UInt8], width: Int, height: Int) -> [[(x: Int, y: Int)]] { + var visited = [Bool](repeating: false, count: width * height) + var contours: [[(x: Int, y: Int)]] = [] for y in 0.. 128 { // Only count dark pixels - let weight = Double(pixelValue) - m00 += weight - m10 += Double(x) * weight - m01 += Double(y) * weight + if data[index] == 0 && !visited[index] { + var contour: [(x: Int, y: Int)] = [] + var stack = [(x, y)] + + while !stack.isEmpty { + let (cx, cy) = stack.removeLast() + let cIndex = cy * width + cx + + if cx < 0 || cx >= width || cy < 0 || cy >= height { + continue + } + if visited[cIndex] || data[cIndex] != 0 { + continue + } + + visited[cIndex] = true + contour.append((cx, cy)) + + // 8-connectivity + stack.append((cx + 1, cy)) + stack.append((cx - 1, cy)) + stack.append((cx, cy + 1)) + stack.append((cx, cy - 1)) + stack.append((cx + 1, cy + 1)) + stack.append((cx - 1, cy - 1)) + stack.append((cx + 1, cy - 1)) + stack.append((cx - 1, cy + 1)) + } + + if !contour.isEmpty { + contours.append(contour) + } } } } + return contours + } + + /// Computes centroid using image moments (matches cv2.moments) + private static func computeCentroid(contour: [(x: Int, y: Int)]) -> (x: Double, y: Double)? { + guard !contour.isEmpty else { return nil } + + let m00 = Double(contour.count) + let m10 = contour.reduce(0.0) { $0 + Double($1.x) } + let m01 = contour.reduce(0.0) { $0 + Double($1.y) } + guard m00 > 0 else { return nil } - let centroidX = m10 / m00 - let centroidY = m01 / m00 - - return (centroidX, centroidY) + return (m10 / m00, m01 / m00) } } diff --git a/Gaze/Views/EyeTrackingCalibrationView.swift b/Gaze/Views/EyeTrackingCalibrationView.swift new file mode 100644 index 0000000..9059ce9 --- /dev/null +++ b/Gaze/Views/EyeTrackingCalibrationView.swift @@ -0,0 +1,272 @@ +// +// EyeTrackingCalibrationView.swift +// Gaze +// +// Created by Mike Freno on 1/15/26. +// + +import SwiftUI + +struct EyeTrackingCalibrationView: View { + @StateObject private var calibrationManager = CalibrationManager.shared + @Environment(\.dismiss) private var dismiss + + @State private var countdownValue = 3 + @State private var isCountingDown = false + + var body: some View { + ZStack { + // Full-screen black background + Color.black.ignoresSafeArea() + + if calibrationManager.isCalibrating { + calibrationContentView + } else { + introductionScreenView + } + } + .frame(minWidth: 800, minHeight: 600) + } + + // MARK: - Introduction Screen + + private var introductionScreenView: some View { + VStack(spacing: 30) { + Image(systemName: "eye.circle.fill") + .font(.system(size: 80)) + .foregroundColor(.blue) + + Text("Eye Tracking Calibration") + .font(.largeTitle) + .fontWeight(.bold) + + Text("This calibration will help improve eye tracking accuracy.") + .font(.title3) + .multilineTextAlignment(.center) + .foregroundColor(.secondary) + + VStack(alignment: .leading, spacing: 15) { + InstructionRow(icon: "1.circle.fill", text: "Look at each target on the screen") + InstructionRow(icon: "2.circle.fill", text: "Keep your head still, only move your eyes") + InstructionRow(icon: "3.circle.fill", text: "Follow the countdown at each position") + InstructionRow(icon: "4.circle.fill", text: "Takes about 30-45 seconds") + } + .padding(.vertical, 20) + + if calibrationManager.calibrationData.isComplete { + VStack(spacing: 10) { + Text("Last calibration:") + .font(.caption) + .foregroundColor(.secondary) + Text(calibrationManager.getCalibrationSummary()) + .font(.caption) + .multilineTextAlignment(.center) + .foregroundColor(.secondary) + } + .padding(.vertical) + } + + HStack(spacing: 20) { + Button("Cancel") { + dismiss() + } + .keyboardShortcut(.escape, modifiers: []) + + Button("Start Calibration") { + startCalibration() + } + .keyboardShortcut(.return, modifiers: []) + .buttonStyle(.borderedProminent) + } + .padding(.top, 20) + } + .padding(60) + .frame(maxWidth: 600) + } + + // MARK: - Calibration Content + + private var calibrationContentView: some View { + ZStack { + // Progress indicator at top + VStack { + progressBar + Spacer() + } + + // Calibration target + if let step = calibrationManager.currentStep { + calibrationTarget(for: step) + } + + // Skip button at bottom + VStack { + Spacer() + skipButton + } + } + } + + // MARK: - Progress Bar + + private var progressBar: some View { + VStack(spacing: 10) { + HStack { + Text("Calibrating...") + .foregroundColor(.white) + Spacer() + Text(calibrationManager.progressText) + .foregroundColor(.white.opacity(0.7)) + } + + ProgressView(value: calibrationManager.progress) + .progressViewStyle(.linear) + .tint(.blue) + } + .padding() + .background(Color.black.opacity(0.5)) + } + + // MARK: - Calibration Target + + @ViewBuilder + private func calibrationTarget(for step: CalibrationStep) -> some View { + let position = targetPosition(for: step) + + VStack(spacing: 20) { + // Target circle with countdown + ZStack { + // Outer ring (pulsing) + Circle() + .stroke(Color.blue.opacity(0.3), lineWidth: 3) + .frame(width: 100, height: 100) + .scaleEffect(isCountingDown ? 1.2 : 1.0) + .animation(.easeInOut(duration: 0.6).repeatForever(autoreverses: true), value: isCountingDown) + + // Inner circle + Circle() + .fill(Color.blue) + .frame(width: 60, height: 60) + + // Countdown number or checkmark + if isCountingDown && countdownValue > 0 { + Text("\(countdownValue)") + .font(.system(size: 36, weight: .bold)) + .foregroundColor(.white) + } else if calibrationManager.samplesCollected > 0 { + Image(systemName: "checkmark") + .font(.system(size: 30, weight: .bold)) + .foregroundColor(.white) + } + } + + // Instruction text + Text(step.instructionText) + .font(.title2) + .foregroundColor(.white) + .padding(.horizontal, 40) + .padding(.vertical, 15) + .background(Color.black.opacity(0.7)) + .cornerRadius(10) + } + .position(position) + .onAppear { + startStepCountdown() + } + } + + // MARK: - Skip Button + + private var skipButton: some View { + Button { + calibrationManager.skipStep() + } label: { + Text("Skip this position") + .foregroundColor(.white) + .padding(.horizontal, 20) + .padding(.vertical, 10) + .background(Color.white.opacity(0.2)) + .cornerRadius(8) + } + .padding(.bottom, 40) + } + + // MARK: - Helper Methods + + private func startCalibration() { + calibrationManager.startCalibration() + } + + private func startStepCountdown() { + countdownValue = 3 + isCountingDown = true + + // Countdown 3, 2, 1 + Timer.scheduledTimer(withTimeInterval: 1.0, repeats: true) { timer in + if countdownValue > 0 { + countdownValue -= 1 + } else { + timer.invalidate() + isCountingDown = false + } + } + } + + private func targetPosition(for step: CalibrationStep) -> CGPoint { + let screenBounds = NSScreen.main?.frame ?? CGRect(x: 0, y: 0, width: 1920, height: 1080) + let width = screenBounds.width + let height = screenBounds.height + + let centerX = width / 2 + let centerY = height / 2 + let margin: CGFloat = 150 + + switch step { + case .center: + return CGPoint(x: centerX, y: centerY) + case .left: + return CGPoint(x: centerX - width / 4, y: centerY) + case .right: + return CGPoint(x: centerX + width / 4, y: centerY) + case .farLeft: + return CGPoint(x: margin, y: centerY) + case .farRight: + return CGPoint(x: width - margin, y: centerY) + case .up: + return CGPoint(x: centerX, y: margin) + case .down: + return CGPoint(x: centerX, y: height - margin) + case .topLeft: + return CGPoint(x: margin, y: margin) + case .topRight: + return CGPoint(x: width - margin, y: margin) + case .bottomLeft: + return CGPoint(x: margin, y: height - margin) + case .bottomRight: + return CGPoint(x: width - margin, y: height - margin) + } + } +} + +// MARK: - Instruction Row + +struct InstructionRow: View { + let icon: String + let text: String + + var body: some View { + HStack(spacing: 15) { + Image(systemName: icon) + .font(.title2) + .foregroundColor(.blue) + .frame(width: 30) + + Text(text) + .font(.body) + } + } +} + +#Preview { + EyeTrackingCalibrationView() +} diff --git a/Gaze/Views/Setup/EnforceModeSetupView.swift b/Gaze/Views/Setup/EnforceModeSetupView.swift index ba8f93f..cadeb95 100644 --- a/Gaze/Views/Setup/EnforceModeSetupView.swift +++ b/Gaze/Views/Setup/EnforceModeSetupView.swift @@ -21,6 +21,8 @@ struct EnforceModeSetupView: View { @State private var showDebugView = false @State private var isViewActive = false @State private var showAdvancedSettings = false + @State private var showCalibrationWindow = false + @ObservedObject var calibrationManager = CalibrationManager.shared var body: some View { VStack(spacing: 0) { @@ -80,6 +82,7 @@ struct EnforceModeSetupView: View { if enforceModeService.isEnforceModeEnabled { testModeButton + calibrationSection } if isTestModeActive && enforceModeService.isCameraActive { @@ -150,6 +153,58 @@ struct EnforceModeSetupView: View { .buttonStyle(.borderedProminent) .controlSize(.large) } + + private var calibrationSection: some View { + VStack(alignment: .leading, spacing: 12) { + HStack { + Image(systemName: "target") + .font(.title3) + .foregroundColor(.blue) + Text("Eye Tracking Calibration") + .font(.headline) + } + + if calibrationManager.calibrationData.isComplete { + VStack(alignment: .leading, spacing: 8) { + Text(calibrationManager.getCalibrationSummary()) + .font(.caption) + .foregroundColor(.secondary) + + if calibrationManager.needsRecalibration() { + Label("Calibration expired - recalibration recommended", systemImage: "exclamationmark.triangle.fill") + .font(.caption) + .foregroundColor(.orange) + } else { + Label("Calibration active and valid", systemImage: "checkmark.circle.fill") + .font(.caption) + .foregroundColor(.green) + } + } + } else { + Text("Not calibrated - using default thresholds") + .font(.caption) + .foregroundColor(.secondary) + } + + Button(action: { + showCalibrationWindow = true + }) { + HStack { + Image(systemName: "target") + Text(calibrationManager.calibrationData.isComplete ? "Recalibrate" : "Run Calibration") + } + .frame(maxWidth: .infinity) + .padding(.vertical, 8) + } + .buttonStyle(.bordered) + .controlSize(.regular) + } + .padding() + .glassEffectIfAvailable(GlassStyle.regular.tint(.blue.opacity(0.1)), in: .rect(cornerRadius: 12)) + .sheet(isPresented: $showCalibrationWindow) { + EyeTrackingCalibrationView() + } + } private var testModePreviewView: some View { VStack(spacing: 16) {