oh baby boy what is happening

This commit is contained in:
Michael Freno
2026-01-16 13:04:45 -05:00
parent 1cb9a2d4d4
commit 127ec66895
8 changed files with 618 additions and 57 deletions

View File

@@ -9,6 +9,7 @@ import AVFoundation
import Combine
import Vision
import simd
import AppKit
@MainActor
class EyeTrackingService: NSObject, ObservableObject {
@@ -33,6 +34,21 @@ class EyeTrackingService: NSObject, ObservableObject {
}
}
// Debug eye images for UI display
@Published var debugLeftEyeInput: NSImage?
@Published var debugRightEyeInput: NSImage?
@Published var debugLeftEyeProcessed: NSImage?
@Published var debugRightEyeProcessed: NSImage?
@Published var debugLeftPupilPosition: PupilPosition?
@Published var debugRightPupilPosition: PupilPosition?
@Published var debugLeftEyeSize: CGSize?
@Published var debugRightEyeSize: CGSize?
// Eye region positions for video overlay
@Published var debugLeftEyeRegion: EyeRegion?
@Published var debugRightEyeRegion: EyeRegion?
@Published var debugImageSize: CGSize?
// Computed gaze direction for UI overlay
var gazeDirection: GazeDirection {
guard let leftH = debugLeftPupilRatio,
@@ -474,41 +490,33 @@ class EyeTrackingService: NSObject, ObservableObject {
return result
}
/// Non-isolated gaze ratio calculation
/// Non-isolated horizontal gaze ratio calculation
/// pupilPosition.y controls horizontal gaze (left-right) due to image orientation
/// Returns 0.0 for left edge, 1.0 for right edge, 0.5 for center
nonisolated private func calculateGazeRatioSync(
pupilPosition: PupilPosition, eyeRegion: EyeRegion
) -> Double {
let pupilX = Double(pupilPosition.x)
let eyeCenterX = Double(eyeRegion.center.x)
let denominator = (eyeCenterX * 2.0 - 10.0)
guard denominator > 0 else {
let eyeLeft = Double(eyeRegion.frame.minX)
let eyeRight = Double(eyeRegion.frame.maxX)
let eyeWidth = eyeRight - eyeLeft
guard eyeWidth > 0 else { return 0.5 }
return (pupilX - eyeLeft) / eyeWidth
}
let ratio = pupilX / denominator
let pupilY = Double(pupilPosition.y)
let eyeHeight = Double(eyeRegion.frame.height)
guard eyeHeight > 0 else { return 0.5 }
let ratio = pupilY / eyeHeight
return max(0.0, min(1.0, ratio))
}
/// Non-isolated vertical gaze ratio calculation
/// Returns 0.0 for looking up, 1.0 for looking down, 0.5 for center
/// pupilPosition.x controls vertical gaze (up-down) due to image orientation
/// Returns 0.0 for top edge (looking up), 1.0 for bottom edge (looking down), 0.5 for center
nonisolated private func calculateVerticalRatioSync(
pupilPosition: PupilPosition, eyeRegion: EyeRegion
) -> Double {
let pupilY = Double(pupilPosition.y)
let eyeTop = Double(eyeRegion.frame.minY)
let eyeBottom = Double(eyeRegion.frame.maxY)
let eyeHeight = eyeBottom - eyeTop
let pupilX = Double(pupilPosition.x)
let eyeWidth = Double(eyeRegion.frame.width)
guard eyeHeight > 0 else { return 0.5 }
guard eyeWidth > 0 else { return 0.5 }
// Normalize: 0.0 = top of eye region, 1.0 = bottom
let ratio = (pupilY - eyeTop) / eyeHeight
let ratio = pupilX / eyeWidth
return max(0.0, min(1.0, ratio))
}
@@ -780,6 +788,29 @@ extension EyeTrackingService: AVCaptureVideoDataOutputSampleBufferDelegate {
self.debugRightVerticalRatio = result.debugRightVerticalRatio
self.debugYaw = result.debugYaw
self.debugPitch = result.debugPitch
// Update debug eye images from PupilDetector
if let leftInput = PupilDetector.debugLeftEyeInput {
self.debugLeftEyeInput = NSImage(cgImage: leftInput, size: NSSize(width: leftInput.width, height: leftInput.height))
}
if let rightInput = PupilDetector.debugRightEyeInput {
self.debugRightEyeInput = NSImage(cgImage: rightInput, size: NSSize(width: rightInput.width, height: rightInput.height))
}
if let leftProcessed = PupilDetector.debugLeftEyeProcessed {
self.debugLeftEyeProcessed = NSImage(cgImage: leftProcessed, size: NSSize(width: leftProcessed.width, height: leftProcessed.height))
}
if let rightProcessed = PupilDetector.debugRightEyeProcessed {
self.debugRightEyeProcessed = NSImage(cgImage: rightProcessed, size: NSSize(width: rightProcessed.width, height: rightProcessed.height))
}
self.debugLeftPupilPosition = PupilDetector.debugLeftPupilPosition
self.debugRightPupilPosition = PupilDetector.debugRightPupilPosition
self.debugLeftEyeSize = PupilDetector.debugLeftEyeSize
self.debugRightEyeSize = PupilDetector.debugRightEyeSize
// Update eye region positions for video overlay
self.debugLeftEyeRegion = PupilDetector.debugLeftEyeRegion
self.debugRightEyeRegion = PupilDetector.debugRightEyeRegion
self.debugImageSize = PupilDetector.debugImageSize
}
}

View File

@@ -42,21 +42,21 @@ enum GazeDirection: String, Sendable, CaseIterable {
case downLeft = ""
case down = ""
case downRight = ""
/// Thresholds for direction detection
/// Horizontal: 0.0 = looking right (from camera POV), 1.0 = looking left
/// Vertical: 0.0 = looking up, 1.0 = looking down
private static let horizontalLeftThreshold = 0.55 // Above this = looking left
private static let horizontalLeftThreshold = 0.55 // Above this = looking left
private static let horizontalRightThreshold = 0.45 // Below this = looking right
private static let verticalUpThreshold = 0.40 // Below this = looking up
private static let verticalDownThreshold = 0.60 // Above this = looking down
private static let verticalUpThreshold = 0.40 // Below this = looking up
private static let verticalDownThreshold = 0.60 // Above this = looking down
static func from(horizontal: Double, vertical: Double) -> GazeDirection {
let isLeft = horizontal > horizontalLeftThreshold
let isRight = horizontal < horizontalRightThreshold
let isUp = vertical < verticalUpThreshold
let isDown = vertical > verticalDownThreshold
if isUp {
if isLeft { return .upLeft }
if isRight { return .upRight }
@@ -71,7 +71,7 @@ enum GazeDirection: String, Sendable, CaseIterable {
return .center
}
}
/// Grid position (0-2 for x and y)
var gridPosition: (x: Int, y: Int) {
switch self {
@@ -226,6 +226,21 @@ final class PupilDetector: @unchecked Sendable {
nil, nil
)
private nonisolated(unsafe) static var _metrics = PupilDetectorMetrics()
// Debug images for UI display
nonisolated(unsafe) static var debugLeftEyeInput: CGImage?
nonisolated(unsafe) static var debugRightEyeInput: CGImage?
nonisolated(unsafe) static var debugLeftEyeProcessed: CGImage?
nonisolated(unsafe) static var debugRightEyeProcessed: CGImage?
nonisolated(unsafe) static var debugLeftPupilPosition: PupilPosition?
nonisolated(unsafe) static var debugRightPupilPosition: PupilPosition?
nonisolated(unsafe) static var debugLeftEyeSize: CGSize?
nonisolated(unsafe) static var debugRightEyeSize: CGSize?
// Eye region positions in image coordinates (for drawing on video)
nonisolated(unsafe) static var debugLeftEyeRegion: EyeRegion?
nonisolated(unsafe) static var debugRightEyeRegion: EyeRegion?
nonisolated(unsafe) static var debugImageSize: CGSize?
nonisolated(unsafe) static let calibration = PupilCalibration()
@@ -356,6 +371,14 @@ final class PupilDetector: @unchecked Sendable {
}
return nil
}
// Store eye region for debug overlay
if side == 0 {
debugLeftEyeRegion = eyeRegion
} else {
debugRightEyeRegion = eyeRegion
}
debugImageSize = imageSize
let frameWidth = CVPixelBufferGetWidth(pixelBuffer)
let frameHeight = CVPixelBufferGetHeight(pixelBuffer)
@@ -435,13 +458,15 @@ final class PupilDetector: @unchecked Sendable {
// Debug: Save input eye image before processing
if enableDebugImageSaving && debugImageCounter < 20 {
NSLog("📸 Saving eye_input_%d - %dx%d, side=%d, region=(%.0f,%.0f,%.0f,%.0f)",
debugImageCounter, eyeWidth, eyeHeight, side,
eyeRegion.frame.origin.x, eyeRegion.frame.origin.y,
eyeRegion.frame.width, eyeRegion.frame.height)
NSLog(
"📸 Saving eye_input_%d - %dx%d, side=%d, region=(%.0f,%.0f,%.0f,%.0f)",
debugImageCounter, eyeWidth, eyeHeight, side,
eyeRegion.frame.origin.x, eyeRegion.frame.origin.y,
eyeRegion.frame.width, eyeRegion.frame.height)
// Debug: Print pixel value statistics for input
var minVal: UInt8 = 255, maxVal: UInt8 = 0
var minVal: UInt8 = 255
var maxVal: UInt8 = 0
var sum: Int = 0
var darkCount = 0 // pixels <= 90
for i in 0..<(eyeWidth * eyeHeight) {
@@ -452,8 +477,10 @@ final class PupilDetector: @unchecked Sendable {
if v <= 90 { darkCount += 1 }
}
let avgVal = Double(sum) / Double(eyeWidth * eyeHeight)
NSLog("📊 Eye input stats: min=%d, max=%d, avg=%.1f, darkPixels(<=90)=%d", minVal, maxVal, avgVal, darkCount)
NSLog(
"📊 Eye input stats: min=%d, max=%d, avg=%.1f, darkPixels(<=90)=%d", minVal, maxVal,
avgVal, darkCount)
saveDebugImage(
data: eyeBuf, width: eyeWidth, height: eyeHeight,
name: "eye_input_\(debugImageCounter)")
@@ -466,6 +493,20 @@ final class PupilDetector: @unchecked Sendable {
height: eyeHeight,
threshold: effectiveThreshold
)
// Capture debug images for UI display
let inputImage = createCGImage(from: eyeBuf, width: eyeWidth, height: eyeHeight)
let processedImage = createCGImage(from: tmpBuf, width: eyeWidth, height: eyeHeight)
let eyeSize = CGSize(width: eyeWidth, height: eyeHeight)
if side == 0 {
debugLeftEyeInput = inputImage
debugLeftEyeProcessed = processedImage
debugLeftEyeSize = eyeSize
} else {
debugRightEyeInput = inputImage
debugRightEyeProcessed = processedImage
debugRightEyeSize = eyeSize
}
// Debug: Save processed images if enabled
if enableDebugImageSaving && debugImageCounter < 10 {
@@ -473,11 +514,10 @@ final class PupilDetector: @unchecked Sendable {
var darkCount = 0 // pixels == 0 (black)
var whiteCount = 0 // pixels == 255 (white)
for i in 0..<(eyeWidth * eyeHeight) {
if tmpBuf[i] == 0 { darkCount += 1 }
else if tmpBuf[i] == 255 { whiteCount += 1 }
if tmpBuf[i] == 0 { darkCount += 1 } else if tmpBuf[i] == 255 { whiteCount += 1 }
}
NSLog("📊 Processed output stats: darkPixels=%d, whitePixels=%d", darkCount, whiteCount)
saveDebugImage(
data: tmpBuf, width: eyeWidth, height: eyeHeight,
name: "processed_eye_\(debugImageCounter)")
@@ -493,22 +533,28 @@ final class PupilDetector: @unchecked Sendable {
)
else {
if enableDiagnosticLogging {
logDebug("👁 PupilDetector: Failed - findPupilFromContours returned nil (not enough dark pixels) for side \(side)")
logDebug(
"👁 PupilDetector: Failed - findPupilFromContours returned nil (not enough dark pixels) for side \(side)"
)
}
return nil
}
if enableDiagnosticLogging {
logDebug("👁 PupilDetector: Success side=\(side) - centroid at (\(String(format: "%.1f", centroidX)), \(String(format: "%.1f", centroidY))) in \(eyeWidth)x\(eyeHeight) region")
logDebug(
"👁 PupilDetector: Success side=\(side) - centroid at (\(String(format: "%.1f", centroidX)), \(String(format: "%.1f", centroidY))) in \(eyeWidth)x\(eyeHeight) region"
)
}
let pupilPosition = PupilPosition(x: CGFloat(centroidX), y: CGFloat(centroidY))
// Cache result
// Cache result and debug position
if side == 0 {
lastPupilPositions.left = pupilPosition
debugLeftPupilPosition = pupilPosition
} else {
lastPupilPositions.right = pupilPosition
debugRightPupilPosition = pupilPosition
}
return (pupilPosition, eyeRegion)

View File

@@ -15,6 +15,7 @@ struct GazeOverlayView: View {
inFrameIndicator
gazeDirectionGrid
ratioDebugView
eyeImagesDebugView
}
.padding(12)
}
@@ -89,20 +90,43 @@ struct GazeOverlayView: View {
private var ratioDebugView: some View {
VStack(alignment: .leading, spacing: 2) {
if let leftH = eyeTrackingService.debugLeftPupilRatio,
let rightH = eyeTrackingService.debugRightPupilRatio {
let avgH = (leftH + rightH) / 2.0
Text("H: \(String(format: "%.2f", avgH))")
.font(.system(size: 10, weight: .medium, design: .monospaced))
.foregroundColor(.white)
// Show individual L/R ratios
HStack(spacing: 8) {
if let leftH = eyeTrackingService.debugLeftPupilRatio {
Text("L.H: \(String(format: "%.2f", leftH))")
.font(.system(size: 9, weight: .medium, design: .monospaced))
.foregroundColor(.white)
}
if let rightH = eyeTrackingService.debugRightPupilRatio {
Text("R.H: \(String(format: "%.2f", rightH))")
.font(.system(size: 9, weight: .medium, design: .monospaced))
.foregroundColor(.white)
}
}
if let leftV = eyeTrackingService.debugLeftVerticalRatio,
HStack(spacing: 8) {
if let leftV = eyeTrackingService.debugLeftVerticalRatio {
Text("L.V: \(String(format: "%.2f", leftV))")
.font(.system(size: 9, weight: .medium, design: .monospaced))
.foregroundColor(.white)
}
if let rightV = eyeTrackingService.debugRightVerticalRatio {
Text("R.V: \(String(format: "%.2f", rightV))")
.font(.system(size: 9, weight: .medium, design: .monospaced))
.foregroundColor(.white)
}
}
// Show averaged ratios
if let leftH = eyeTrackingService.debugLeftPupilRatio,
let rightH = eyeTrackingService.debugRightPupilRatio,
let leftV = eyeTrackingService.debugLeftVerticalRatio,
let rightV = eyeTrackingService.debugRightVerticalRatio {
let avgH = (leftH + rightH) / 2.0
let avgV = (leftV + rightV) / 2.0
Text("V: \(String(format: "%.2f", avgV))")
.font(.system(size: 10, weight: .medium, design: .monospaced))
.foregroundColor(.white)
Text("Avg H:\(String(format: "%.2f", avgH)) V:\(String(format: "%.2f", avgV))")
.font(.system(size: 9, weight: .bold, design: .monospaced))
.foregroundColor(.yellow)
}
}
.padding(.horizontal, 8)
@@ -112,6 +136,105 @@ struct GazeOverlayView: View {
.fill(Color.black.opacity(0.5))
)
}
private var eyeImagesDebugView: some View {
HStack(spacing: 12) {
// Left eye
VStack(spacing: 4) {
Text("Left")
.font(.system(size: 8, weight: .bold))
.foregroundColor(.white)
HStack(spacing: 4) {
eyeImageView(
image: eyeTrackingService.debugLeftEyeInput,
pupilPosition: eyeTrackingService.debugLeftPupilPosition,
eyeSize: eyeTrackingService.debugLeftEyeSize,
label: "Input"
)
eyeImageView(
image: eyeTrackingService.debugLeftEyeProcessed,
pupilPosition: eyeTrackingService.debugLeftPupilPosition,
eyeSize: eyeTrackingService.debugLeftEyeSize,
label: "Proc"
)
}
}
// Right eye
VStack(spacing: 4) {
Text("Right")
.font(.system(size: 8, weight: .bold))
.foregroundColor(.white)
HStack(spacing: 4) {
eyeImageView(
image: eyeTrackingService.debugRightEyeInput,
pupilPosition: eyeTrackingService.debugRightPupilPosition,
eyeSize: eyeTrackingService.debugRightEyeSize,
label: "Input"
)
eyeImageView(
image: eyeTrackingService.debugRightEyeProcessed,
pupilPosition: eyeTrackingService.debugRightPupilPosition,
eyeSize: eyeTrackingService.debugRightEyeSize,
label: "Proc"
)
}
}
}
.padding(8)
.background(
RoundedRectangle(cornerRadius: 8)
.fill(Color.black.opacity(0.5))
)
}
private func eyeImageView(image: NSImage?, pupilPosition: PupilPosition?, eyeSize: CGSize?, label: String) -> some View {
let displaySize: CGFloat = 50
return VStack(spacing: 2) {
ZStack {
if let nsImage = image {
Image(nsImage: nsImage)
.resizable()
.interpolation(.none)
.aspectRatio(contentMode: .fit)
.frame(width: displaySize, height: displaySize)
// Draw pupil position marker
if let pupil = pupilPosition, let size = eyeSize, size.width > 0, size.height > 0 {
let scaleX = displaySize / size.width
let scaleY = displaySize / size.height
let scale = min(scaleX, scaleY)
let scaledWidth = size.width * scale
let scaledHeight = size.height * scale
Circle()
.fill(Color.red)
.frame(width: 4, height: 4)
.offset(
x: (pupil.x * scale) - (scaledWidth / 2),
y: (pupil.y * scale) - (scaledHeight / 2)
)
}
} else {
RoundedRectangle(cornerRadius: 4)
.fill(Color.gray.opacity(0.3))
.frame(width: displaySize, height: displaySize)
Text("--")
.font(.system(size: 10))
.foregroundColor(.white.opacity(0.5))
}
}
.frame(width: displaySize, height: displaySize)
.clipShape(RoundedRectangle(cornerRadius: 4))
Text(label)
.font(.system(size: 7))
.foregroundColor(.white.opacity(0.7))
}
}
}
#Preview {
@@ -119,5 +242,5 @@ struct GazeOverlayView: View {
Color.gray
GazeOverlayView(eyeTrackingService: EyeTrackingService.shared)
}
.frame(width: 300, height: 200)
.frame(width: 400, height: 400)
}

View File

@@ -0,0 +1,146 @@
//
// PupilOverlayView.swift
// Gaze
//
// Created by Claude on 1/16/26.
//
import SwiftUI
/// Draws pupil detection markers directly on top of the camera preview
struct PupilOverlayView: View {
@ObservedObject var eyeTrackingService: EyeTrackingService
var body: some View {
GeometryReader { geometry in
let viewSize = geometry.size
// Draw eye regions and pupil markers
ZStack {
// Left eye
if let leftRegion = eyeTrackingService.debugLeftEyeRegion,
let leftPupil = eyeTrackingService.debugLeftPupilPosition,
let imageSize = eyeTrackingService.debugImageSize {
EyeOverlayShape(
eyeRegion: leftRegion,
pupilPosition: leftPupil,
imageSize: imageSize,
viewSize: viewSize,
color: .cyan,
label: "L"
)
}
// Right eye
if let rightRegion = eyeTrackingService.debugRightEyeRegion,
let rightPupil = eyeTrackingService.debugRightPupilPosition,
let imageSize = eyeTrackingService.debugImageSize {
EyeOverlayShape(
eyeRegion: rightRegion,
pupilPosition: rightPupil,
imageSize: imageSize,
viewSize: viewSize,
color: .yellow,
label: "R"
)
}
}
}
}
}
/// Helper view for drawing eye overlay
private struct EyeOverlayShape: View {
let eyeRegion: EyeRegion
let pupilPosition: PupilPosition
let imageSize: CGSize
let viewSize: CGSize
let color: Color
let label: String
private var transformedCoordinates: (eyeRect: CGRect, pupilPoint: CGPoint) {
// Calculate the aspect-fit scaling
let imageAspect = imageSize.width / imageSize.height
let viewAspect = viewSize.width / viewSize.height
let scale: CGFloat
let offsetX: CGFloat
let offsetY: CGFloat
if imageAspect > viewAspect {
// Image is wider - letterbox top/bottom
scale = viewSize.width / imageSize.width
offsetX = 0
offsetY = (viewSize.height - imageSize.height * scale) / 2
} else {
// Image is taller - pillarbox left/right
scale = viewSize.height / imageSize.height
offsetX = (viewSize.width - imageSize.width * scale) / 2
offsetY = 0
}
// Convert eye region frame from image coordinates to view coordinates
// Note: The image is mirrored horizontally in the preview
let mirroredX = imageSize.width - eyeRegion.frame.origin.x - eyeRegion.frame.width
let eyeViewX = mirroredX * scale + offsetX
let eyeViewY = eyeRegion.frame.origin.y * scale + offsetY
let eyeViewWidth = eyeRegion.frame.width * scale
let eyeViewHeight = eyeRegion.frame.height * scale
// Calculate pupil position in view coordinates
// pupilPosition is in local eye region coordinates (0 to eyeWidth, 0 to eyeHeight)
// Need to mirror the X coordinate within the eye region
let mirroredPupilX = eyeRegion.frame.width - pupilPosition.x
let pupilViewX = eyeViewX + mirroredPupilX * scale
let pupilViewY = eyeViewY + pupilPosition.y * scale
return (
eyeRect: CGRect(x: eyeViewX, y: eyeViewY, width: eyeViewWidth, height: eyeViewHeight),
pupilPoint: CGPoint(x: pupilViewX, y: pupilViewY)
)
}
var body: some View {
let coords = transformedCoordinates
let eyeRect = coords.eyeRect
let pupilPoint = coords.pupilPoint
ZStack {
// Eye region rectangle
Rectangle()
.stroke(color, lineWidth: 2)
.frame(width: eyeRect.width, height: eyeRect.height)
.position(x: eyeRect.midX, y: eyeRect.midY)
// Pupil marker (red dot)
Circle()
.fill(Color.red)
.frame(width: 8, height: 8)
.position(x: pupilPoint.x, y: pupilPoint.y)
// Crosshair at pupil position
Path { path in
path.move(to: CGPoint(x: pupilPoint.x - 6, y: pupilPoint.y))
path.addLine(to: CGPoint(x: pupilPoint.x + 6, y: pupilPoint.y))
path.move(to: CGPoint(x: pupilPoint.x, y: pupilPoint.y - 6))
path.addLine(to: CGPoint(x: pupilPoint.x, y: pupilPoint.y + 6))
}
.stroke(Color.red, lineWidth: 1)
// Label
Text(label)
.font(.system(size: 10, weight: .bold))
.foregroundColor(color)
.position(x: eyeRect.minX + 8, y: eyeRect.minY - 8)
}
}
}
#Preview {
ZStack {
Color.black
PupilOverlayView(eyeTrackingService: EyeTrackingService.shared)
}
.frame(width: 400, height: 300)
}

View File

@@ -212,10 +212,20 @@ struct EnforceModeSetupView: View {
let previewLayer = eyeTrackingService.previewLayer ?? cachedPreviewLayer
if let layer = previewLayer {
ZStack(alignment: .topTrailing) {
ZStack {
CameraPreviewView(previewLayer: layer, borderColor: borderColor)
GazeOverlayView(eyeTrackingService: eyeTrackingService)
// Pupil detection overlay (drawn on video)
PupilOverlayView(eyeTrackingService: eyeTrackingService)
// Debug info overlay (top-right corner)
VStack {
HStack {
Spacer()
GazeOverlayView(eyeTrackingService: eyeTrackingService)
}
Spacer()
}
}
.frame(height: 300)
.glassEffectIfAvailable(GlassStyle.regular, in: .rect(cornerRadius: 12))

View File

@@ -0,0 +1,205 @@
//
// VideoGazeTests.swift
// GazeTests
//
// Created by Claude on 1/16/26.
//
import XCTest
import AVFoundation
import Vision
@testable import Gaze
final class VideoGazeTests: XCTestCase {
var logLines: [String] = []
private func log(_ message: String) {
logLines.append(message)
}
/// Process the outer video and log gaze detection results
func testOuterVideoGazeDetection() async throws {
logLines = []
let projectPath = "/Users/mike/Code/Gaze/GazeTests/video-test-outer.mp4"
guard FileManager.default.fileExists(atPath: projectPath) else {
XCTFail("Video file not found at: \(projectPath)")
return
}
try await processVideo(at: URL(fileURLWithPath: projectPath))
}
/// Process the inner video and log gaze detection results
func testInnerVideoGazeDetection() async throws {
logLines = []
let projectPath = "/Users/mike/Code/Gaze/GazeTests/video-test-inner.mp4"
guard FileManager.default.fileExists(atPath: projectPath) else {
XCTFail("Video file not found at: \(projectPath)")
return
}
try await processVideo(at: URL(fileURLWithPath: projectPath))
}
private func processVideo(at url: URL) async throws {
log("\n" + String(repeating: "=", count: 60))
log("Processing video: \(url.lastPathComponent)")
log(String(repeating: "=", count: 60))
let asset = AVURLAsset(url: url)
let duration = try await asset.load(.duration)
let durationSeconds = CMTimeGetSeconds(duration)
log("Duration: \(String(format: "%.2f", durationSeconds)) seconds")
guard let track = try await asset.loadTracks(withMediaType: .video).first else {
XCTFail("No video track found")
return
}
let size = try await track.load(.naturalSize)
let frameRate = try await track.load(.nominalFrameRate)
log("Size: \(Int(size.width))x\(Int(size.height)), FPS: \(String(format: "%.1f", frameRate))")
let reader = try AVAssetReader(asset: asset)
let outputSettings: [String: Any] = [
kCVPixelBufferPixelFormatTypeKey as String: kCVPixelFormatType_32BGRA
]
let trackOutput = AVAssetReaderTrackOutput(track: track, outputSettings: outputSettings)
reader.add(trackOutput)
reader.startReading()
var frameIndex = 0
let sampleInterval = max(1, Int(frameRate / 2)) // Sample ~2 frames per second
log("\nFrame | Time | Face | H-Ratio L/R | V-Ratio L/R | Direction")
log(String(repeating: "-", count: 75))
// Reset calibration for fresh test
PupilDetector.calibration.reset()
// Disable frame skipping for video testing
let originalFrameSkip = PupilDetector.frameSkipCount
PupilDetector.frameSkipCount = 1
defer { PupilDetector.frameSkipCount = originalFrameSkip }
var totalFrames = 0
var faceDetectedFrames = 0
var pupilDetectedFrames = 0
while let sampleBuffer = trackOutput.copyNextSampleBuffer() {
defer {
frameIndex += 1
PupilDetector.advanceFrame()
}
// Only process every Nth frame
if frameIndex % sampleInterval != 0 {
continue
}
totalFrames += 1
guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else {
continue
}
let timestamp = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
let timeSeconds = CMTimeGetSeconds(timestamp)
// Run face detection
let request = VNDetectFaceLandmarksRequest()
request.revision = VNDetectFaceLandmarksRequestRevision3
let handler = VNImageRequestHandler(
cvPixelBuffer: pixelBuffer,
orientation: .leftMirrored,
options: [:]
)
try handler.perform([request])
guard let observations = request.results, !observations.isEmpty,
let face = observations.first,
let landmarks = face.landmarks,
let leftEye = landmarks.leftEye,
let rightEye = landmarks.rightEye else {
log(String(format: "%5d | %5.1fs | NO | - | - | -", frameIndex, timeSeconds))
continue
}
faceDetectedFrames += 1
let imageSize = CGSize(
width: CVPixelBufferGetWidth(pixelBuffer),
height: CVPixelBufferGetHeight(pixelBuffer)
)
// Detect pupils
var leftHRatio: Double?
var rightHRatio: Double?
var leftVRatio: Double?
var rightVRatio: Double?
if let leftResult = PupilDetector.detectPupil(
in: pixelBuffer,
eyeLandmarks: leftEye,
faceBoundingBox: face.boundingBox,
imageSize: imageSize,
side: 0
) {
leftHRatio = calculateHorizontalRatio(pupilPosition: leftResult.pupilPosition, eyeRegion: leftResult.eyeRegion)
leftVRatio = calculateVerticalRatio(pupilPosition: leftResult.pupilPosition, eyeRegion: leftResult.eyeRegion)
}
if let rightResult = PupilDetector.detectPupil(
in: pixelBuffer,
eyeLandmarks: rightEye,
faceBoundingBox: face.boundingBox,
imageSize: imageSize,
side: 1
) {
rightHRatio = calculateHorizontalRatio(pupilPosition: rightResult.pupilPosition, eyeRegion: rightResult.eyeRegion)
rightVRatio = calculateVerticalRatio(pupilPosition: rightResult.pupilPosition, eyeRegion: rightResult.eyeRegion)
}
if let lh = leftHRatio, let rh = rightHRatio,
let lv = leftVRatio, let rv = rightVRatio {
pupilDetectedFrames += 1
let avgH = (lh + rh) / 2.0
let avgV = (lv + rv) / 2.0
let direction = GazeDirection.from(horizontal: avgH, vertical: avgV)
log(String(format: "%5d | %5.1fs | YES | %.2f / %.2f | %.2f / %.2f | %@ %@",
frameIndex, timeSeconds, lh, rh, lv, rv, direction.rawValue, String(describing: direction)))
} else {
log(String(format: "%5d | %5.1fs | YES | PUPIL FAIL | PUPIL FAIL | -", frameIndex, timeSeconds))
}
}
log(String(repeating: "=", count: 75))
log("Summary: \(totalFrames) frames sampled, \(faceDetectedFrames) with face, \(pupilDetectedFrames) with pupils")
log("Processing complete\n")
}
private func calculateHorizontalRatio(pupilPosition: PupilPosition, eyeRegion: EyeRegion) -> Double {
// pupilPosition.y controls horizontal gaze due to image orientation
let pupilY = Double(pupilPosition.y)
let eyeHeight = Double(eyeRegion.frame.height)
guard eyeHeight > 0 else { return 0.5 }
let ratio = pupilY / eyeHeight
return max(0.0, min(1.0, ratio))
}
private func calculateVerticalRatio(pupilPosition: PupilPosition, eyeRegion: EyeRegion) -> Double {
// pupilPosition.x controls vertical gaze due to image orientation
let pupilX = Double(pupilPosition.x)
let eyeWidth = Double(eyeRegion.frame.width)
guard eyeWidth > 0 else { return 0.5 }
let ratio = pupilX / eyeWidth
return max(0.0, min(1.0, ratio))
}
}

Binary file not shown.

Binary file not shown.