Files
Gaze/GazeTests/VideoGazeTests.swift
2026-01-16 22:58:53 -05:00

250 lines
11 KiB
Swift

//
// VideoGazeTests.swift
// GazeTests
//
// Created by Claude on 1/16/26.
//
import XCTest
import AVFoundation
import Vision
@testable import Gaze
final class VideoGazeTests: XCTestCase {
var logLines: [String] = []
private func log(_ message: String) {
logLines.append(message)
}
/// Process the outer video (looking away from screen) - should detect "looking away"
func testOuterVideoGazeDetection() async throws {
logLines = []
let projectPath = "/Users/mike/Code/Gaze/GazeTests/video-test-outer.mp4"
guard FileManager.default.fileExists(atPath: projectPath) else {
XCTFail("Video file not found at: \(projectPath)")
return
}
let stats = try await processVideo(at: URL(fileURLWithPath: projectPath), expectLookingAway: true)
// For outer video, most frames should detect gaze outside center
let nonCenterRatio = Double(stats.nonCenterFrames) / Double(max(1, stats.pupilDetectedFrames))
log("🎯 OUTER video: \(String(format: "%.1f%%", nonCenterRatio * 100)) frames detected as non-center (expected: >50%)")
log(" H-range: \(String(format: "%.3f", stats.minH)) to \(String(format: "%.3f", stats.maxH))")
log(" V-range: \(String(format: "%.3f", stats.minV)) to \(String(format: "%.3f", stats.maxV))")
// At least 50% should be detected as non-center when looking away
XCTAssertGreaterThan(nonCenterRatio, 0.5, "Looking away video should have >50% non-center detections. Log:\n\(logLines.joined(separator: "\n"))")
}
/// Process the inner video (looking at screen) - should detect "looking at screen"
func testInnerVideoGazeDetection() async throws {
logLines = []
let projectPath = "/Users/mike/Code/Gaze/GazeTests/video-test-inner.mp4"
guard FileManager.default.fileExists(atPath: projectPath) else {
XCTFail("Video file not found at: \(projectPath)")
return
}
let stats = try await processVideo(at: URL(fileURLWithPath: projectPath), expectLookingAway: false)
// For inner video, most frames should detect gaze at center
let centerRatio = Double(stats.centerFrames) / Double(max(1, stats.pupilDetectedFrames))
log("🎯 INNER video: \(String(format: "%.1f%%", centerRatio * 100)) frames detected as center (expected: >50%)")
log(" H-range: \(String(format: "%.3f", stats.minH)) to \(String(format: "%.3f", stats.maxH))")
log(" V-range: \(String(format: "%.3f", stats.minV)) to \(String(format: "%.3f", stats.maxV))")
// At least 50% should be detected as center when looking at screen
XCTAssertGreaterThan(centerRatio, 0.5, "Looking at screen video should have >50% center detections. Log:\n\(logLines.joined(separator: "\n"))")
}
struct VideoStats {
var totalFrames = 0
var faceDetectedFrames = 0
var pupilDetectedFrames = 0
var centerFrames = 0
var nonCenterFrames = 0
var minH = Double.greatestFiniteMagnitude
var maxH = -Double.greatestFiniteMagnitude
var minV = Double.greatestFiniteMagnitude
var maxV = -Double.greatestFiniteMagnitude
}
private func processVideo(at url: URL, expectLookingAway: Bool) async throws -> VideoStats {
var stats = VideoStats()
log("\n" + String(repeating: "=", count: 60))
log("Processing video: \(url.lastPathComponent)")
log("Expected behavior: \(expectLookingAway ? "LOOKING AWAY (non-center)" : "LOOKING AT SCREEN (center)")")
log(String(repeating: "=", count: 60))
let asset = AVURLAsset(url: url)
let duration = try await asset.load(.duration)
let durationSeconds = CMTimeGetSeconds(duration)
log("Duration: \(String(format: "%.2f", durationSeconds)) seconds")
guard let track = try await asset.loadTracks(withMediaType: .video).first else {
XCTFail("No video track found")
return stats
}
let size = try await track.load(.naturalSize)
let frameRate = try await track.load(.nominalFrameRate)
log("Size: \(Int(size.width))x\(Int(size.height)), FPS: \(String(format: "%.1f", frameRate))")
let reader = try AVAssetReader(asset: asset)
let outputSettings: [String: Any] = [
kCVPixelBufferPixelFormatTypeKey as String: kCVPixelFormatType_32BGRA
]
let trackOutput = AVAssetReaderTrackOutput(track: track, outputSettings: outputSettings)
reader.add(trackOutput)
reader.startReading()
var frameIndex = 0
let sampleInterval = max(1, Int(frameRate / 2)) // Sample ~2 frames per second
log("\nFrame | Time | Face | H-Ratio L/R | V-Ratio L/R | Direction")
log(String(repeating: "-", count: 75))
// Reset calibration for fresh test
PupilDetector.calibration.reset()
// Disable frame skipping for video testing
let originalFrameSkip = PupilDetector.frameSkipCount
PupilDetector.frameSkipCount = 1
defer { PupilDetector.frameSkipCount = originalFrameSkip }
while let sampleBuffer = trackOutput.copyNextSampleBuffer() {
defer {
frameIndex += 1
PupilDetector.advanceFrame()
}
// Only process every Nth frame
if frameIndex % sampleInterval != 0 {
continue
}
stats.totalFrames += 1
guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else {
continue
}
let timestamp = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
let timeSeconds = CMTimeGetSeconds(timestamp)
// Run face detection
let request = VNDetectFaceLandmarksRequest()
request.revision = VNDetectFaceLandmarksRequestRevision3
let handler = VNImageRequestHandler(
cvPixelBuffer: pixelBuffer,
orientation: .leftMirrored,
options: [:]
)
try handler.perform([request])
guard let observations = request.results, !observations.isEmpty,
let face = observations.first,
let landmarks = face.landmarks,
let leftEye = landmarks.leftEye,
let rightEye = landmarks.rightEye else {
log(String(format: "%5d | %5.1fs | NO | - | - | -", frameIndex, timeSeconds))
continue
}
stats.faceDetectedFrames += 1
let imageSize = CGSize(
width: CVPixelBufferGetWidth(pixelBuffer),
height: CVPixelBufferGetHeight(pixelBuffer)
)
// Detect pupils
var leftHRatio: Double?
var rightHRatio: Double?
var leftVRatio: Double?
var rightVRatio: Double?
if let leftResult = PupilDetector.detectPupil(
in: pixelBuffer,
eyeLandmarks: leftEye,
faceBoundingBox: face.boundingBox,
imageSize: imageSize,
side: 0
) {
leftHRatio = calculateHorizontalRatio(pupilPosition: leftResult.pupilPosition, eyeRegion: leftResult.eyeRegion)
leftVRatio = calculateVerticalRatio(pupilPosition: leftResult.pupilPosition, eyeRegion: leftResult.eyeRegion)
}
if let rightResult = PupilDetector.detectPupil(
in: pixelBuffer,
eyeLandmarks: rightEye,
faceBoundingBox: face.boundingBox,
imageSize: imageSize,
side: 1
) {
rightHRatio = calculateHorizontalRatio(pupilPosition: rightResult.pupilPosition, eyeRegion: rightResult.eyeRegion)
rightVRatio = calculateVerticalRatio(pupilPosition: rightResult.pupilPosition, eyeRegion: rightResult.eyeRegion)
}
if let lh = leftHRatio, let rh = rightHRatio,
let lv = leftVRatio, let rv = rightVRatio {
stats.pupilDetectedFrames += 1
let avgH = (lh + rh) / 2.0
let avgV = (lv + rv) / 2.0
// Track min/max ranges
stats.minH = min(stats.minH, avgH)
stats.maxH = max(stats.maxH, avgH)
stats.minV = min(stats.minV, avgV)
stats.maxV = max(stats.maxV, avgV)
let direction = GazeDirection.from(horizontal: avgH, vertical: avgV)
if direction == .center {
stats.centerFrames += 1
} else {
stats.nonCenterFrames += 1
}
log(String(format: "%5d | %5.1fs | YES | %.2f / %.2f | %.2f / %.2f | %@ %@",
frameIndex, timeSeconds, lh, rh, lv, rv, direction.rawValue, String(describing: direction)))
} else {
log(String(format: "%5d | %5.1fs | YES | PUPIL FAIL | PUPIL FAIL | -", frameIndex, timeSeconds))
}
}
log(String(repeating: "=", count: 75))
log("Summary: \(stats.totalFrames) frames sampled, \(stats.faceDetectedFrames) with face, \(stats.pupilDetectedFrames) with pupils")
log("Center frames: \(stats.centerFrames), Non-center: \(stats.nonCenterFrames)")
log("Processing complete\n")
return stats
}
private func calculateHorizontalRatio(pupilPosition: PupilPosition, eyeRegion: EyeRegion) -> Double {
// pupilPosition.y controls horizontal gaze due to image orientation
let pupilY = Double(pupilPosition.y)
let eyeHeight = Double(eyeRegion.frame.height)
guard eyeHeight > 0 else { return 0.5 }
let ratio = pupilY / eyeHeight
return max(0.0, min(1.0, ratio))
}
private func calculateVerticalRatio(pupilPosition: PupilPosition, eyeRegion: EyeRegion) -> Double {
// pupilPosition.x controls vertical gaze due to image orientation
let pupilX = Double(pupilPosition.x)
let eyeWidth = Double(eyeRegion.frame.width)
guard eyeWidth > 0 else { return 0.5 }
let ratio = pupilX / eyeWidth
return max(0.0, min(1.0, ratio))
}
}