diff --git a/Gaze/Services/EyeTrackingService.swift b/Gaze/Services/EyeTrackingService.swift index 3e50010..869d9b0 100644 --- a/Gaze/Services/EyeTrackingService.swift +++ b/Gaze/Services/EyeTrackingService.swift @@ -9,6 +9,7 @@ import AVFoundation import Combine import Vision import simd +import AppKit @MainActor class EyeTrackingService: NSObject, ObservableObject { @@ -33,6 +34,21 @@ class EyeTrackingService: NSObject, ObservableObject { } } + // Debug eye images for UI display + @Published var debugLeftEyeInput: NSImage? + @Published var debugRightEyeInput: NSImage? + @Published var debugLeftEyeProcessed: NSImage? + @Published var debugRightEyeProcessed: NSImage? + @Published var debugLeftPupilPosition: PupilPosition? + @Published var debugRightPupilPosition: PupilPosition? + @Published var debugLeftEyeSize: CGSize? + @Published var debugRightEyeSize: CGSize? + + // Eye region positions for video overlay + @Published var debugLeftEyeRegion: EyeRegion? + @Published var debugRightEyeRegion: EyeRegion? + @Published var debugImageSize: CGSize? + // Computed gaze direction for UI overlay var gazeDirection: GazeDirection { guard let leftH = debugLeftPupilRatio, @@ -474,41 +490,33 @@ class EyeTrackingService: NSObject, ObservableObject { return result } - /// Non-isolated gaze ratio calculation + /// Non-isolated horizontal gaze ratio calculation + /// pupilPosition.y controls horizontal gaze (left-right) due to image orientation + /// Returns 0.0 for left edge, 1.0 for right edge, 0.5 for center nonisolated private func calculateGazeRatioSync( pupilPosition: PupilPosition, eyeRegion: EyeRegion ) -> Double { - let pupilX = Double(pupilPosition.x) - let eyeCenterX = Double(eyeRegion.center.x) - - let denominator = (eyeCenterX * 2.0 - 10.0) - - guard denominator > 0 else { - let eyeLeft = Double(eyeRegion.frame.minX) - let eyeRight = Double(eyeRegion.frame.maxX) - let eyeWidth = eyeRight - eyeLeft - guard eyeWidth > 0 else { return 0.5 } - return (pupilX - eyeLeft) / eyeWidth - } - - let ratio = pupilX / denominator + let pupilY = Double(pupilPosition.y) + let eyeHeight = Double(eyeRegion.frame.height) + + guard eyeHeight > 0 else { return 0.5 } + + let ratio = pupilY / eyeHeight return max(0.0, min(1.0, ratio)) } /// Non-isolated vertical gaze ratio calculation - /// Returns 0.0 for looking up, 1.0 for looking down, 0.5 for center + /// pupilPosition.x controls vertical gaze (up-down) due to image orientation + /// Returns 0.0 for top edge (looking up), 1.0 for bottom edge (looking down), 0.5 for center nonisolated private func calculateVerticalRatioSync( pupilPosition: PupilPosition, eyeRegion: EyeRegion ) -> Double { - let pupilY = Double(pupilPosition.y) - let eyeTop = Double(eyeRegion.frame.minY) - let eyeBottom = Double(eyeRegion.frame.maxY) - let eyeHeight = eyeBottom - eyeTop + let pupilX = Double(pupilPosition.x) + let eyeWidth = Double(eyeRegion.frame.width) - guard eyeHeight > 0 else { return 0.5 } + guard eyeWidth > 0 else { return 0.5 } - // Normalize: 0.0 = top of eye region, 1.0 = bottom - let ratio = (pupilY - eyeTop) / eyeHeight + let ratio = pupilX / eyeWidth return max(0.0, min(1.0, ratio)) } @@ -780,6 +788,29 @@ extension EyeTrackingService: AVCaptureVideoDataOutputSampleBufferDelegate { self.debugRightVerticalRatio = result.debugRightVerticalRatio self.debugYaw = result.debugYaw self.debugPitch = result.debugPitch + + // Update debug eye images from PupilDetector + if let leftInput = PupilDetector.debugLeftEyeInput { + self.debugLeftEyeInput = NSImage(cgImage: leftInput, size: NSSize(width: leftInput.width, height: leftInput.height)) + } + if let rightInput = PupilDetector.debugRightEyeInput { + self.debugRightEyeInput = NSImage(cgImage: rightInput, size: NSSize(width: rightInput.width, height: rightInput.height)) + } + if let leftProcessed = PupilDetector.debugLeftEyeProcessed { + self.debugLeftEyeProcessed = NSImage(cgImage: leftProcessed, size: NSSize(width: leftProcessed.width, height: leftProcessed.height)) + } + if let rightProcessed = PupilDetector.debugRightEyeProcessed { + self.debugRightEyeProcessed = NSImage(cgImage: rightProcessed, size: NSSize(width: rightProcessed.width, height: rightProcessed.height)) + } + self.debugLeftPupilPosition = PupilDetector.debugLeftPupilPosition + self.debugRightPupilPosition = PupilDetector.debugRightPupilPosition + self.debugLeftEyeSize = PupilDetector.debugLeftEyeSize + self.debugRightEyeSize = PupilDetector.debugRightEyeSize + + // Update eye region positions for video overlay + self.debugLeftEyeRegion = PupilDetector.debugLeftEyeRegion + self.debugRightEyeRegion = PupilDetector.debugRightEyeRegion + self.debugImageSize = PupilDetector.debugImageSize } } diff --git a/Gaze/Services/PupilDetector.swift b/Gaze/Services/PupilDetector.swift index 58a183d..e7b26f0 100644 --- a/Gaze/Services/PupilDetector.swift +++ b/Gaze/Services/PupilDetector.swift @@ -42,21 +42,21 @@ enum GazeDirection: String, Sendable, CaseIterable { case downLeft = "↙" case down = "↓" case downRight = "↘" - + /// Thresholds for direction detection /// Horizontal: 0.0 = looking right (from camera POV), 1.0 = looking left /// Vertical: 0.0 = looking up, 1.0 = looking down - private static let horizontalLeftThreshold = 0.55 // Above this = looking left + private static let horizontalLeftThreshold = 0.55 // Above this = looking left private static let horizontalRightThreshold = 0.45 // Below this = looking right - private static let verticalUpThreshold = 0.40 // Below this = looking up - private static let verticalDownThreshold = 0.60 // Above this = looking down - + private static let verticalUpThreshold = 0.40 // Below this = looking up + private static let verticalDownThreshold = 0.60 // Above this = looking down + static func from(horizontal: Double, vertical: Double) -> GazeDirection { let isLeft = horizontal > horizontalLeftThreshold let isRight = horizontal < horizontalRightThreshold let isUp = vertical < verticalUpThreshold let isDown = vertical > verticalDownThreshold - + if isUp { if isLeft { return .upLeft } if isRight { return .upRight } @@ -71,7 +71,7 @@ enum GazeDirection: String, Sendable, CaseIterable { return .center } } - + /// Grid position (0-2 for x and y) var gridPosition: (x: Int, y: Int) { switch self { @@ -226,6 +226,21 @@ final class PupilDetector: @unchecked Sendable { nil, nil ) private nonisolated(unsafe) static var _metrics = PupilDetectorMetrics() + + // Debug images for UI display + nonisolated(unsafe) static var debugLeftEyeInput: CGImage? + nonisolated(unsafe) static var debugRightEyeInput: CGImage? + nonisolated(unsafe) static var debugLeftEyeProcessed: CGImage? + nonisolated(unsafe) static var debugRightEyeProcessed: CGImage? + nonisolated(unsafe) static var debugLeftPupilPosition: PupilPosition? + nonisolated(unsafe) static var debugRightPupilPosition: PupilPosition? + nonisolated(unsafe) static var debugLeftEyeSize: CGSize? + nonisolated(unsafe) static var debugRightEyeSize: CGSize? + + // Eye region positions in image coordinates (for drawing on video) + nonisolated(unsafe) static var debugLeftEyeRegion: EyeRegion? + nonisolated(unsafe) static var debugRightEyeRegion: EyeRegion? + nonisolated(unsafe) static var debugImageSize: CGSize? nonisolated(unsafe) static let calibration = PupilCalibration() @@ -356,6 +371,14 @@ final class PupilDetector: @unchecked Sendable { } return nil } + + // Store eye region for debug overlay + if side == 0 { + debugLeftEyeRegion = eyeRegion + } else { + debugRightEyeRegion = eyeRegion + } + debugImageSize = imageSize let frameWidth = CVPixelBufferGetWidth(pixelBuffer) let frameHeight = CVPixelBufferGetHeight(pixelBuffer) @@ -435,13 +458,15 @@ final class PupilDetector: @unchecked Sendable { // Debug: Save input eye image before processing if enableDebugImageSaving && debugImageCounter < 20 { - NSLog("📸 Saving eye_input_%d - %dx%d, side=%d, region=(%.0f,%.0f,%.0f,%.0f)", - debugImageCounter, eyeWidth, eyeHeight, side, - eyeRegion.frame.origin.x, eyeRegion.frame.origin.y, - eyeRegion.frame.width, eyeRegion.frame.height) - + NSLog( + "📸 Saving eye_input_%d - %dx%d, side=%d, region=(%.0f,%.0f,%.0f,%.0f)", + debugImageCounter, eyeWidth, eyeHeight, side, + eyeRegion.frame.origin.x, eyeRegion.frame.origin.y, + eyeRegion.frame.width, eyeRegion.frame.height) + // Debug: Print pixel value statistics for input - var minVal: UInt8 = 255, maxVal: UInt8 = 0 + var minVal: UInt8 = 255 + var maxVal: UInt8 = 0 var sum: Int = 0 var darkCount = 0 // pixels <= 90 for i in 0..<(eyeWidth * eyeHeight) { @@ -452,8 +477,10 @@ final class PupilDetector: @unchecked Sendable { if v <= 90 { darkCount += 1 } } let avgVal = Double(sum) / Double(eyeWidth * eyeHeight) - NSLog("📊 Eye input stats: min=%d, max=%d, avg=%.1f, darkPixels(<=90)=%d", minVal, maxVal, avgVal, darkCount) - + NSLog( + "📊 Eye input stats: min=%d, max=%d, avg=%.1f, darkPixels(<=90)=%d", minVal, maxVal, + avgVal, darkCount) + saveDebugImage( data: eyeBuf, width: eyeWidth, height: eyeHeight, name: "eye_input_\(debugImageCounter)") @@ -466,6 +493,20 @@ final class PupilDetector: @unchecked Sendable { height: eyeHeight, threshold: effectiveThreshold ) + + // Capture debug images for UI display + let inputImage = createCGImage(from: eyeBuf, width: eyeWidth, height: eyeHeight) + let processedImage = createCGImage(from: tmpBuf, width: eyeWidth, height: eyeHeight) + let eyeSize = CGSize(width: eyeWidth, height: eyeHeight) + if side == 0 { + debugLeftEyeInput = inputImage + debugLeftEyeProcessed = processedImage + debugLeftEyeSize = eyeSize + } else { + debugRightEyeInput = inputImage + debugRightEyeProcessed = processedImage + debugRightEyeSize = eyeSize + } // Debug: Save processed images if enabled if enableDebugImageSaving && debugImageCounter < 10 { @@ -473,11 +514,10 @@ final class PupilDetector: @unchecked Sendable { var darkCount = 0 // pixels == 0 (black) var whiteCount = 0 // pixels == 255 (white) for i in 0..<(eyeWidth * eyeHeight) { - if tmpBuf[i] == 0 { darkCount += 1 } - else if tmpBuf[i] == 255 { whiteCount += 1 } + if tmpBuf[i] == 0 { darkCount += 1 } else if tmpBuf[i] == 255 { whiteCount += 1 } } NSLog("📊 Processed output stats: darkPixels=%d, whitePixels=%d", darkCount, whiteCount) - + saveDebugImage( data: tmpBuf, width: eyeWidth, height: eyeHeight, name: "processed_eye_\(debugImageCounter)") @@ -493,22 +533,28 @@ final class PupilDetector: @unchecked Sendable { ) else { if enableDiagnosticLogging { - logDebug("👁 PupilDetector: Failed - findPupilFromContours returned nil (not enough dark pixels) for side \(side)") + logDebug( + "👁 PupilDetector: Failed - findPupilFromContours returned nil (not enough dark pixels) for side \(side)" + ) } return nil } if enableDiagnosticLogging { - logDebug("👁 PupilDetector: Success side=\(side) - centroid at (\(String(format: "%.1f", centroidX)), \(String(format: "%.1f", centroidY))) in \(eyeWidth)x\(eyeHeight) region") + logDebug( + "👁 PupilDetector: Success side=\(side) - centroid at (\(String(format: "%.1f", centroidX)), \(String(format: "%.1f", centroidY))) in \(eyeWidth)x\(eyeHeight) region" + ) } let pupilPosition = PupilPosition(x: CGFloat(centroidX), y: CGFloat(centroidY)) - // Cache result + // Cache result and debug position if side == 0 { lastPupilPositions.left = pupilPosition + debugLeftPupilPosition = pupilPosition } else { lastPupilPositions.right = pupilPosition + debugRightPupilPosition = pupilPosition } return (pupilPosition, eyeRegion) diff --git a/Gaze/Views/Components/GazeOverlayView.swift b/Gaze/Views/Components/GazeOverlayView.swift index e83d39f..e91cb95 100644 --- a/Gaze/Views/Components/GazeOverlayView.swift +++ b/Gaze/Views/Components/GazeOverlayView.swift @@ -15,6 +15,7 @@ struct GazeOverlayView: View { inFrameIndicator gazeDirectionGrid ratioDebugView + eyeImagesDebugView } .padding(12) } @@ -89,20 +90,43 @@ struct GazeOverlayView: View { private var ratioDebugView: some View { VStack(alignment: .leading, spacing: 2) { - if let leftH = eyeTrackingService.debugLeftPupilRatio, - let rightH = eyeTrackingService.debugRightPupilRatio { - let avgH = (leftH + rightH) / 2.0 - Text("H: \(String(format: "%.2f", avgH))") - .font(.system(size: 10, weight: .medium, design: .monospaced)) - .foregroundColor(.white) + // Show individual L/R ratios + HStack(spacing: 8) { + if let leftH = eyeTrackingService.debugLeftPupilRatio { + Text("L.H: \(String(format: "%.2f", leftH))") + .font(.system(size: 9, weight: .medium, design: .monospaced)) + .foregroundColor(.white) + } + if let rightH = eyeTrackingService.debugRightPupilRatio { + Text("R.H: \(String(format: "%.2f", rightH))") + .font(.system(size: 9, weight: .medium, design: .monospaced)) + .foregroundColor(.white) + } } - if let leftV = eyeTrackingService.debugLeftVerticalRatio, + HStack(spacing: 8) { + if let leftV = eyeTrackingService.debugLeftVerticalRatio { + Text("L.V: \(String(format: "%.2f", leftV))") + .font(.system(size: 9, weight: .medium, design: .monospaced)) + .foregroundColor(.white) + } + if let rightV = eyeTrackingService.debugRightVerticalRatio { + Text("R.V: \(String(format: "%.2f", rightV))") + .font(.system(size: 9, weight: .medium, design: .monospaced)) + .foregroundColor(.white) + } + } + + // Show averaged ratios + if let leftH = eyeTrackingService.debugLeftPupilRatio, + let rightH = eyeTrackingService.debugRightPupilRatio, + let leftV = eyeTrackingService.debugLeftVerticalRatio, let rightV = eyeTrackingService.debugRightVerticalRatio { + let avgH = (leftH + rightH) / 2.0 let avgV = (leftV + rightV) / 2.0 - Text("V: \(String(format: "%.2f", avgV))") - .font(.system(size: 10, weight: .medium, design: .monospaced)) - .foregroundColor(.white) + Text("Avg H:\(String(format: "%.2f", avgH)) V:\(String(format: "%.2f", avgV))") + .font(.system(size: 9, weight: .bold, design: .monospaced)) + .foregroundColor(.yellow) } } .padding(.horizontal, 8) @@ -112,6 +136,105 @@ struct GazeOverlayView: View { .fill(Color.black.opacity(0.5)) ) } + + private var eyeImagesDebugView: some View { + HStack(spacing: 12) { + // Left eye + VStack(spacing: 4) { + Text("Left") + .font(.system(size: 8, weight: .bold)) + .foregroundColor(.white) + + HStack(spacing: 4) { + eyeImageView( + image: eyeTrackingService.debugLeftEyeInput, + pupilPosition: eyeTrackingService.debugLeftPupilPosition, + eyeSize: eyeTrackingService.debugLeftEyeSize, + label: "Input" + ) + eyeImageView( + image: eyeTrackingService.debugLeftEyeProcessed, + pupilPosition: eyeTrackingService.debugLeftPupilPosition, + eyeSize: eyeTrackingService.debugLeftEyeSize, + label: "Proc" + ) + } + } + + // Right eye + VStack(spacing: 4) { + Text("Right") + .font(.system(size: 8, weight: .bold)) + .foregroundColor(.white) + + HStack(spacing: 4) { + eyeImageView( + image: eyeTrackingService.debugRightEyeInput, + pupilPosition: eyeTrackingService.debugRightPupilPosition, + eyeSize: eyeTrackingService.debugRightEyeSize, + label: "Input" + ) + eyeImageView( + image: eyeTrackingService.debugRightEyeProcessed, + pupilPosition: eyeTrackingService.debugRightPupilPosition, + eyeSize: eyeTrackingService.debugRightEyeSize, + label: "Proc" + ) + } + } + } + .padding(8) + .background( + RoundedRectangle(cornerRadius: 8) + .fill(Color.black.opacity(0.5)) + ) + } + + private func eyeImageView(image: NSImage?, pupilPosition: PupilPosition?, eyeSize: CGSize?, label: String) -> some View { + let displaySize: CGFloat = 50 + + return VStack(spacing: 2) { + ZStack { + if let nsImage = image { + Image(nsImage: nsImage) + .resizable() + .interpolation(.none) + .aspectRatio(contentMode: .fit) + .frame(width: displaySize, height: displaySize) + + // Draw pupil position marker + if let pupil = pupilPosition, let size = eyeSize, size.width > 0, size.height > 0 { + let scaleX = displaySize / size.width + let scaleY = displaySize / size.height + let scale = min(scaleX, scaleY) + let scaledWidth = size.width * scale + let scaledHeight = size.height * scale + + Circle() + .fill(Color.red) + .frame(width: 4, height: 4) + .offset( + x: (pupil.x * scale) - (scaledWidth / 2), + y: (pupil.y * scale) - (scaledHeight / 2) + ) + } + } else { + RoundedRectangle(cornerRadius: 4) + .fill(Color.gray.opacity(0.3)) + .frame(width: displaySize, height: displaySize) + Text("--") + .font(.system(size: 10)) + .foregroundColor(.white.opacity(0.5)) + } + } + .frame(width: displaySize, height: displaySize) + .clipShape(RoundedRectangle(cornerRadius: 4)) + + Text(label) + .font(.system(size: 7)) + .foregroundColor(.white.opacity(0.7)) + } + } } #Preview { @@ -119,5 +242,5 @@ struct GazeOverlayView: View { Color.gray GazeOverlayView(eyeTrackingService: EyeTrackingService.shared) } - .frame(width: 300, height: 200) + .frame(width: 400, height: 400) } diff --git a/Gaze/Views/Components/PupilOverlayView.swift b/Gaze/Views/Components/PupilOverlayView.swift new file mode 100644 index 0000000..b392fb9 --- /dev/null +++ b/Gaze/Views/Components/PupilOverlayView.swift @@ -0,0 +1,146 @@ +// +// PupilOverlayView.swift +// Gaze +// +// Created by Claude on 1/16/26. +// + +import SwiftUI + +/// Draws pupil detection markers directly on top of the camera preview +struct PupilOverlayView: View { + @ObservedObject var eyeTrackingService: EyeTrackingService + + var body: some View { + GeometryReader { geometry in + let viewSize = geometry.size + + // Draw eye regions and pupil markers + ZStack { + // Left eye + if let leftRegion = eyeTrackingService.debugLeftEyeRegion, + let leftPupil = eyeTrackingService.debugLeftPupilPosition, + let imageSize = eyeTrackingService.debugImageSize { + EyeOverlayShape( + eyeRegion: leftRegion, + pupilPosition: leftPupil, + imageSize: imageSize, + viewSize: viewSize, + color: .cyan, + label: "L" + ) + } + + // Right eye + if let rightRegion = eyeTrackingService.debugRightEyeRegion, + let rightPupil = eyeTrackingService.debugRightPupilPosition, + let imageSize = eyeTrackingService.debugImageSize { + EyeOverlayShape( + eyeRegion: rightRegion, + pupilPosition: rightPupil, + imageSize: imageSize, + viewSize: viewSize, + color: .yellow, + label: "R" + ) + } + } + } + } +} + +/// Helper view for drawing eye overlay +private struct EyeOverlayShape: View { + let eyeRegion: EyeRegion + let pupilPosition: PupilPosition + let imageSize: CGSize + let viewSize: CGSize + let color: Color + let label: String + + private var transformedCoordinates: (eyeRect: CGRect, pupilPoint: CGPoint) { + // Calculate the aspect-fit scaling + let imageAspect = imageSize.width / imageSize.height + let viewAspect = viewSize.width / viewSize.height + + let scale: CGFloat + let offsetX: CGFloat + let offsetY: CGFloat + + if imageAspect > viewAspect { + // Image is wider - letterbox top/bottom + scale = viewSize.width / imageSize.width + offsetX = 0 + offsetY = (viewSize.height - imageSize.height * scale) / 2 + } else { + // Image is taller - pillarbox left/right + scale = viewSize.height / imageSize.height + offsetX = (viewSize.width - imageSize.width * scale) / 2 + offsetY = 0 + } + + // Convert eye region frame from image coordinates to view coordinates + // Note: The image is mirrored horizontally in the preview + let mirroredX = imageSize.width - eyeRegion.frame.origin.x - eyeRegion.frame.width + + let eyeViewX = mirroredX * scale + offsetX + let eyeViewY = eyeRegion.frame.origin.y * scale + offsetY + let eyeViewWidth = eyeRegion.frame.width * scale + let eyeViewHeight = eyeRegion.frame.height * scale + + // Calculate pupil position in view coordinates + // pupilPosition is in local eye region coordinates (0 to eyeWidth, 0 to eyeHeight) + // Need to mirror the X coordinate within the eye region + let mirroredPupilX = eyeRegion.frame.width - pupilPosition.x + let pupilViewX = eyeViewX + mirroredPupilX * scale + let pupilViewY = eyeViewY + pupilPosition.y * scale + + return ( + eyeRect: CGRect(x: eyeViewX, y: eyeViewY, width: eyeViewWidth, height: eyeViewHeight), + pupilPoint: CGPoint(x: pupilViewX, y: pupilViewY) + ) + } + + var body: some View { + let coords = transformedCoordinates + let eyeRect = coords.eyeRect + let pupilPoint = coords.pupilPoint + + ZStack { + // Eye region rectangle + Rectangle() + .stroke(color, lineWidth: 2) + .frame(width: eyeRect.width, height: eyeRect.height) + .position(x: eyeRect.midX, y: eyeRect.midY) + + // Pupil marker (red dot) + Circle() + .fill(Color.red) + .frame(width: 8, height: 8) + .position(x: pupilPoint.x, y: pupilPoint.y) + + // Crosshair at pupil position + Path { path in + path.move(to: CGPoint(x: pupilPoint.x - 6, y: pupilPoint.y)) + path.addLine(to: CGPoint(x: pupilPoint.x + 6, y: pupilPoint.y)) + path.move(to: CGPoint(x: pupilPoint.x, y: pupilPoint.y - 6)) + path.addLine(to: CGPoint(x: pupilPoint.x, y: pupilPoint.y + 6)) + } + .stroke(Color.red, lineWidth: 1) + + // Label + Text(label) + .font(.system(size: 10, weight: .bold)) + .foregroundColor(color) + .position(x: eyeRect.minX + 8, y: eyeRect.minY - 8) + } + } +} + +#Preview { + ZStack { + Color.black + PupilOverlayView(eyeTrackingService: EyeTrackingService.shared) + } + .frame(width: 400, height: 300) +} diff --git a/Gaze/Views/Setup/EnforceModeSetupView.swift b/Gaze/Views/Setup/EnforceModeSetupView.swift index 4c4131e..3e71fe8 100644 --- a/Gaze/Views/Setup/EnforceModeSetupView.swift +++ b/Gaze/Views/Setup/EnforceModeSetupView.swift @@ -212,10 +212,20 @@ struct EnforceModeSetupView: View { let previewLayer = eyeTrackingService.previewLayer ?? cachedPreviewLayer if let layer = previewLayer { - ZStack(alignment: .topTrailing) { + ZStack { CameraPreviewView(previewLayer: layer, borderColor: borderColor) - GazeOverlayView(eyeTrackingService: eyeTrackingService) + // Pupil detection overlay (drawn on video) + PupilOverlayView(eyeTrackingService: eyeTrackingService) + + // Debug info overlay (top-right corner) + VStack { + HStack { + Spacer() + GazeOverlayView(eyeTrackingService: eyeTrackingService) + } + Spacer() + } } .frame(height: 300) .glassEffectIfAvailable(GlassStyle.regular, in: .rect(cornerRadius: 12)) diff --git a/GazeTests/VideoGazeTests.swift b/GazeTests/VideoGazeTests.swift new file mode 100644 index 0000000..90c0042 --- /dev/null +++ b/GazeTests/VideoGazeTests.swift @@ -0,0 +1,205 @@ +// +// VideoGazeTests.swift +// GazeTests +// +// Created by Claude on 1/16/26. +// + +import XCTest +import AVFoundation +import Vision +@testable import Gaze + +final class VideoGazeTests: XCTestCase { + + var logLines: [String] = [] + + private func log(_ message: String) { + logLines.append(message) + } + + /// Process the outer video and log gaze detection results + func testOuterVideoGazeDetection() async throws { + logLines = [] + + let projectPath = "/Users/mike/Code/Gaze/GazeTests/video-test-outer.mp4" + guard FileManager.default.fileExists(atPath: projectPath) else { + XCTFail("Video file not found at: \(projectPath)") + return + } + try await processVideo(at: URL(fileURLWithPath: projectPath)) + } + + /// Process the inner video and log gaze detection results + func testInnerVideoGazeDetection() async throws { + logLines = [] + + let projectPath = "/Users/mike/Code/Gaze/GazeTests/video-test-inner.mp4" + guard FileManager.default.fileExists(atPath: projectPath) else { + XCTFail("Video file not found at: \(projectPath)") + return + } + try await processVideo(at: URL(fileURLWithPath: projectPath)) + } + + private func processVideo(at url: URL) async throws { + log("\n" + String(repeating: "=", count: 60)) + log("Processing video: \(url.lastPathComponent)") + log(String(repeating: "=", count: 60)) + + let asset = AVURLAsset(url: url) + let duration = try await asset.load(.duration) + let durationSeconds = CMTimeGetSeconds(duration) + log("Duration: \(String(format: "%.2f", durationSeconds)) seconds") + + guard let track = try await asset.loadTracks(withMediaType: .video).first else { + XCTFail("No video track found") + return + } + + let size = try await track.load(.naturalSize) + let frameRate = try await track.load(.nominalFrameRate) + log("Size: \(Int(size.width))x\(Int(size.height)), FPS: \(String(format: "%.1f", frameRate))") + + let reader = try AVAssetReader(asset: asset) + let outputSettings: [String: Any] = [ + kCVPixelBufferPixelFormatTypeKey as String: kCVPixelFormatType_32BGRA + ] + let trackOutput = AVAssetReaderTrackOutput(track: track, outputSettings: outputSettings) + reader.add(trackOutput) + reader.startReading() + + var frameIndex = 0 + let sampleInterval = max(1, Int(frameRate / 2)) // Sample ~2 frames per second + + log("\nFrame | Time | Face | H-Ratio L/R | V-Ratio L/R | Direction") + log(String(repeating: "-", count: 75)) + + // Reset calibration for fresh test + PupilDetector.calibration.reset() + + // Disable frame skipping for video testing + let originalFrameSkip = PupilDetector.frameSkipCount + PupilDetector.frameSkipCount = 1 + defer { PupilDetector.frameSkipCount = originalFrameSkip } + + var totalFrames = 0 + var faceDetectedFrames = 0 + var pupilDetectedFrames = 0 + + while let sampleBuffer = trackOutput.copyNextSampleBuffer() { + defer { + frameIndex += 1 + PupilDetector.advanceFrame() + } + + // Only process every Nth frame + if frameIndex % sampleInterval != 0 { + continue + } + + totalFrames += 1 + + guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else { + continue + } + + let timestamp = CMSampleBufferGetPresentationTimeStamp(sampleBuffer) + let timeSeconds = CMTimeGetSeconds(timestamp) + + // Run face detection + let request = VNDetectFaceLandmarksRequest() + request.revision = VNDetectFaceLandmarksRequestRevision3 + + let handler = VNImageRequestHandler( + cvPixelBuffer: pixelBuffer, + orientation: .leftMirrored, + options: [:] + ) + + try handler.perform([request]) + + guard let observations = request.results, !observations.isEmpty, + let face = observations.first, + let landmarks = face.landmarks, + let leftEye = landmarks.leftEye, + let rightEye = landmarks.rightEye else { + log(String(format: "%5d | %5.1fs | NO | - | - | -", frameIndex, timeSeconds)) + continue + } + + faceDetectedFrames += 1 + + let imageSize = CGSize( + width: CVPixelBufferGetWidth(pixelBuffer), + height: CVPixelBufferGetHeight(pixelBuffer) + ) + + // Detect pupils + var leftHRatio: Double? + var rightHRatio: Double? + var leftVRatio: Double? + var rightVRatio: Double? + + if let leftResult = PupilDetector.detectPupil( + in: pixelBuffer, + eyeLandmarks: leftEye, + faceBoundingBox: face.boundingBox, + imageSize: imageSize, + side: 0 + ) { + leftHRatio = calculateHorizontalRatio(pupilPosition: leftResult.pupilPosition, eyeRegion: leftResult.eyeRegion) + leftVRatio = calculateVerticalRatio(pupilPosition: leftResult.pupilPosition, eyeRegion: leftResult.eyeRegion) + } + + if let rightResult = PupilDetector.detectPupil( + in: pixelBuffer, + eyeLandmarks: rightEye, + faceBoundingBox: face.boundingBox, + imageSize: imageSize, + side: 1 + ) { + rightHRatio = calculateHorizontalRatio(pupilPosition: rightResult.pupilPosition, eyeRegion: rightResult.eyeRegion) + rightVRatio = calculateVerticalRatio(pupilPosition: rightResult.pupilPosition, eyeRegion: rightResult.eyeRegion) + } + + if let lh = leftHRatio, let rh = rightHRatio, + let lv = leftVRatio, let rv = rightVRatio { + pupilDetectedFrames += 1 + let avgH = (lh + rh) / 2.0 + let avgV = (lv + rv) / 2.0 + let direction = GazeDirection.from(horizontal: avgH, vertical: avgV) + log(String(format: "%5d | %5.1fs | YES | %.2f / %.2f | %.2f / %.2f | %@ %@", + frameIndex, timeSeconds, lh, rh, lv, rv, direction.rawValue, String(describing: direction))) + } else { + log(String(format: "%5d | %5.1fs | YES | PUPIL FAIL | PUPIL FAIL | -", frameIndex, timeSeconds)) + } + } + + log(String(repeating: "=", count: 75)) + log("Summary: \(totalFrames) frames sampled, \(faceDetectedFrames) with face, \(pupilDetectedFrames) with pupils") + log("Processing complete\n") + } + + private func calculateHorizontalRatio(pupilPosition: PupilPosition, eyeRegion: EyeRegion) -> Double { + // pupilPosition.y controls horizontal gaze due to image orientation + let pupilY = Double(pupilPosition.y) + let eyeHeight = Double(eyeRegion.frame.height) + + guard eyeHeight > 0 else { return 0.5 } + + let ratio = pupilY / eyeHeight + return max(0.0, min(1.0, ratio)) + } + + private func calculateVerticalRatio(pupilPosition: PupilPosition, eyeRegion: EyeRegion) -> Double { + // pupilPosition.x controls vertical gaze due to image orientation + let pupilX = Double(pupilPosition.x) + let eyeWidth = Double(eyeRegion.frame.width) + + guard eyeWidth > 0 else { return 0.5 } + + let ratio = pupilX / eyeWidth + return max(0.0, min(1.0, ratio)) + } +} diff --git a/GazeTests/video-test-inner.mp4 b/GazeTests/video-test-inner.mp4 new file mode 100644 index 0000000..c0feef3 Binary files /dev/null and b/GazeTests/video-test-inner.mp4 differ diff --git a/GazeTests/video-test-outer.mp4 b/GazeTests/video-test-outer.mp4 new file mode 100644 index 0000000..9898a25 Binary files /dev/null and b/GazeTests/video-test-outer.mp4 differ