This commit is contained in:
Michael Freno
2026-01-27 14:12:24 -05:00
parent fda136f3d4
commit f8868c9253
31 changed files with 2030 additions and 1790 deletions

View File

@@ -0,0 +1,36 @@
//
// CalibrationBridge.swift
// Gaze
//
// Thread-safe calibration access for eye tracking.
//
import Foundation
final class CalibrationBridge: @unchecked Sendable {
nonisolated var thresholds: GazeThresholds? {
CalibrationState.shared.thresholds
}
nonisolated var isComplete: Bool {
CalibrationState.shared.isComplete
}
nonisolated func submitSample(
leftRatio: Double,
rightRatio: Double,
leftVertical: Double?,
rightVertical: Double?,
faceWidthRatio: Double
) {
Task { @MainActor in
CalibrationManager.shared.collectSample(
leftRatio: leftRatio,
rightRatio: rightRatio,
leftVertical: leftVertical,
rightVertical: rightVertical,
faceWidthRatio: faceWidthRatio
)
}
}
}

View File

@@ -0,0 +1,123 @@
//
// CameraSessionManager.swift
// Gaze
//
// Manages AVCaptureSession lifecycle for eye tracking.
//
import AVFoundation
import Combine
import Foundation
protocol CameraSessionDelegate: AnyObject {
nonisolated func cameraSession(
_ manager: CameraSessionManager,
didOutput pixelBuffer: CVPixelBuffer,
imageSize: CGSize
)
}
final class CameraSessionManager: NSObject, ObservableObject {
@Published private(set) var isRunning = false
weak var delegate: CameraSessionDelegate?
private var captureSession: AVCaptureSession?
private var videoOutput: AVCaptureVideoDataOutput?
private let videoDataOutputQueue = DispatchQueue(
label: "com.gaze.videoDataOutput",
qos: .userInitiated
)
private var _previewLayer: AVCaptureVideoPreviewLayer?
var previewLayer: AVCaptureVideoPreviewLayer? {
guard let session = captureSession else {
_previewLayer = nil
return nil
}
if let existing = _previewLayer, existing.session === session {
return existing
}
let layer = AVCaptureVideoPreviewLayer(session: session)
layer.videoGravity = .resizeAspectFill
_previewLayer = layer
return layer
}
@MainActor
func start() async throws {
guard !isRunning else { return }
let cameraService = CameraAccessService.shared
if !cameraService.isCameraAuthorized {
try await cameraService.requestCameraAccess()
}
guard cameraService.isCameraAuthorized else {
throw CameraAccessError.accessDenied
}
try setupCaptureSession()
captureSession?.startRunning()
isRunning = true
}
@MainActor
func stop() {
captureSession?.stopRunning()
captureSession = nil
videoOutput = nil
_previewLayer = nil
isRunning = false
}
private func setupCaptureSession() throws {
let session = AVCaptureSession()
session.sessionPreset = .vga640x480
guard let videoDevice = AVCaptureDevice.default(for: .video) else {
throw EyeTrackingError.noCamera
}
let videoInput = try AVCaptureDeviceInput(device: videoDevice)
guard session.canAddInput(videoInput) else {
throw EyeTrackingError.cannotAddInput
}
session.addInput(videoInput)
let output = AVCaptureVideoDataOutput()
output.videoSettings = [
kCVPixelBufferPixelFormatTypeKey as String: kCVPixelFormatType_32BGRA
]
output.setSampleBufferDelegate(self, queue: videoDataOutputQueue)
output.alwaysDiscardsLateVideoFrames = true
guard session.canAddOutput(output) else {
throw EyeTrackingError.cannotAddOutput
}
session.addOutput(output)
self.captureSession = session
self.videoOutput = output
}
}
extension CameraSessionManager: AVCaptureVideoDataOutputSampleBufferDelegate {
nonisolated func captureOutput(
_ output: AVCaptureOutput,
didOutput sampleBuffer: CMSampleBuffer,
from connection: AVCaptureConnection
) {
guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else {
return
}
let size = CGSize(
width: CVPixelBufferGetWidth(pixelBuffer),
height: CVPixelBufferGetHeight(pixelBuffer)
)
delegate?.cameraSession(self, didOutput: pixelBuffer, imageSize: size)
}
}

View File

@@ -0,0 +1,101 @@
//
// EyeDebugStateAdapter.swift
// Gaze
//
// Debug state storage for eye tracking UI.
//
import AppKit
import Foundation
@MainActor
final class EyeDebugStateAdapter {
var leftPupilRatio: Double?
var rightPupilRatio: Double?
var leftVerticalRatio: Double?
var rightVerticalRatio: Double?
var yaw: Double?
var pitch: Double?
var enableDebugLogging: Bool = false {
didSet {
PupilDetector.enableDiagnosticLogging = enableDebugLogging
}
}
var leftEyeInput: NSImage?
var rightEyeInput: NSImage?
var leftEyeProcessed: NSImage?
var rightEyeProcessed: NSImage?
var leftPupilPosition: PupilPosition?
var rightPupilPosition: PupilPosition?
var leftEyeSize: CGSize?
var rightEyeSize: CGSize?
var leftEyeRegion: EyeRegion?
var rightEyeRegion: EyeRegion?
var imageSize: CGSize?
var gazeDirection: GazeDirection {
guard let leftH = leftPupilRatio,
let rightH = rightPupilRatio,
let leftV = leftVerticalRatio,
let rightV = rightVerticalRatio else {
return .center
}
let avgHorizontal = (leftH + rightH) / 2.0
let avgVertical = (leftV + rightV) / 2.0
return GazeDirection.from(horizontal: avgHorizontal, vertical: avgVertical)
}
func update(from result: EyeTrackingProcessingResult) {
leftPupilRatio = result.leftPupilRatio
rightPupilRatio = result.rightPupilRatio
leftVerticalRatio = result.leftVerticalRatio
rightVerticalRatio = result.rightVerticalRatio
yaw = result.yaw
pitch = result.pitch
}
func updateEyeImages(from detector: PupilDetector.Type) {
if let leftInput = detector.debugLeftEyeInput {
leftEyeInput = NSImage(cgImage: leftInput, size: NSSize(width: leftInput.width, height: leftInput.height))
}
if let rightInput = detector.debugRightEyeInput {
rightEyeInput = NSImage(cgImage: rightInput, size: NSSize(width: rightInput.width, height: rightInput.height))
}
if let leftProcessed = detector.debugLeftEyeProcessed {
leftEyeProcessed = NSImage(cgImage: leftProcessed, size: NSSize(width: leftProcessed.width, height: leftProcessed.height))
}
if let rightProcessed = detector.debugRightEyeProcessed {
rightEyeProcessed = NSImage(cgImage: rightProcessed, size: NSSize(width: rightProcessed.width, height: rightProcessed.height))
}
leftPupilPosition = detector.debugLeftPupilPosition
rightPupilPosition = detector.debugRightPupilPosition
leftEyeSize = detector.debugLeftEyeSize
rightEyeSize = detector.debugRightEyeSize
leftEyeRegion = detector.debugLeftEyeRegion
rightEyeRegion = detector.debugRightEyeRegion
imageSize = detector.debugImageSize
}
func clear() {
leftPupilRatio = nil
rightPupilRatio = nil
leftVerticalRatio = nil
rightVerticalRatio = nil
yaw = nil
pitch = nil
leftEyeInput = nil
rightEyeInput = nil
leftEyeProcessed = nil
rightEyeProcessed = nil
leftPupilPosition = nil
rightPupilPosition = nil
leftEyeSize = nil
rightEyeSize = nil
leftEyeRegion = nil
rightEyeRegion = nil
imageSize = nil
}
}

View File

@@ -0,0 +1,21 @@
//
// EyeTrackingProcessingResult.swift
// Gaze
//
// Shared processing result for eye tracking pipeline.
//
import Foundation
struct EyeTrackingProcessingResult: Sendable {
let faceDetected: Bool
let isEyesClosed: Bool
let userLookingAtScreen: Bool
let leftPupilRatio: Double?
let rightPupilRatio: Double?
let leftVerticalRatio: Double?
let rightVerticalRatio: Double?
let yaw: Double?
let pitch: Double?
let faceWidthRatio: Double?
}

View File

@@ -0,0 +1,331 @@
//
// GazeDetector.swift
// Gaze
//
// Gaze detection logic and pupil analysis.
//
import Foundation
import Vision
import simd
final class GazeDetector: @unchecked Sendable {
struct GazeResult: Sendable {
let isLookingAway: Bool
let isEyesClosed: Bool
let leftPupilRatio: Double?
let rightPupilRatio: Double?
let leftVerticalRatio: Double?
let rightVerticalRatio: Double?
let yaw: Double?
let pitch: Double?
}
struct Configuration: Sendable {
let thresholds: GazeThresholds?
let isCalibrationComplete: Bool
let eyeClosedEnabled: Bool
let eyeClosedThreshold: CGFloat
let yawEnabled: Bool
let yawThreshold: Double
let pitchUpEnabled: Bool
let pitchUpThreshold: Double
let pitchDownEnabled: Bool
let pitchDownThreshold: Double
let pixelGazeEnabled: Bool
let pixelGazeMinRatio: Double
let pixelGazeMaxRatio: Double
let boundaryForgivenessMargin: Double
let distanceSensitivity: Double
let defaultReferenceFaceWidth: Double
}
private let lock = NSLock()
private var configuration: Configuration
init(configuration: Configuration) {
self.configuration = configuration
}
func updateConfiguration(_ configuration: Configuration) {
lock.lock()
self.configuration = configuration
lock.unlock()
}
nonisolated func process(
analysis: VisionPipeline.FaceAnalysis,
pixelBuffer: CVPixelBuffer
) -> EyeTrackingProcessingResult {
let config: Configuration
lock.lock()
config = configuration
lock.unlock()
guard analysis.faceDetected, let face = analysis.face else {
return EyeTrackingProcessingResult(
faceDetected: false,
isEyesClosed: false,
userLookingAtScreen: false,
leftPupilRatio: nil,
rightPupilRatio: nil,
leftVerticalRatio: nil,
rightVerticalRatio: nil,
yaw: analysis.debugYaw,
pitch: analysis.debugPitch,
faceWidthRatio: nil
)
}
let landmarks = face.landmarks
let yaw = face.yaw?.doubleValue ?? 0.0
let pitch = face.pitch?.doubleValue ?? 0.0
var isEyesClosed = false
if let leftEye = landmarks?.leftEye, let rightEye = landmarks?.rightEye {
isEyesClosed = detectEyesClosed(leftEye: leftEye, rightEye: rightEye, configuration: config)
}
let gazeResult = detectLookingAway(
face: face,
landmarks: landmarks,
imageSize: analysis.imageSize,
pixelBuffer: pixelBuffer,
configuration: config
)
let lookingAway = gazeResult.lookingAway
let userLookingAtScreen = !lookingAway
return EyeTrackingProcessingResult(
faceDetected: true,
isEyesClosed: isEyesClosed,
userLookingAtScreen: userLookingAtScreen,
leftPupilRatio: gazeResult.leftPupilRatio,
rightPupilRatio: gazeResult.rightPupilRatio,
leftVerticalRatio: gazeResult.leftVerticalRatio,
rightVerticalRatio: gazeResult.rightVerticalRatio,
yaw: gazeResult.yaw ?? yaw,
pitch: gazeResult.pitch ?? pitch,
faceWidthRatio: face.boundingBox.width
)
}
private func detectEyesClosed(
leftEye: VNFaceLandmarkRegion2D,
rightEye: VNFaceLandmarkRegion2D,
configuration: Configuration
) -> Bool {
guard configuration.eyeClosedEnabled else { return false }
guard leftEye.pointCount >= 2, rightEye.pointCount >= 2 else { return false }
let leftEyeHeight = calculateEyeHeight(leftEye)
let rightEyeHeight = calculateEyeHeight(rightEye)
let closedThreshold = configuration.eyeClosedThreshold
return leftEyeHeight < closedThreshold && rightEyeHeight < closedThreshold
}
private func calculateEyeHeight(_ eye: VNFaceLandmarkRegion2D) -> CGFloat {
let points = eye.normalizedPoints
guard points.count >= 2 else { return 0 }
let yValues = points.map { $0.y }
let maxY = yValues.max() ?? 0
let minY = yValues.min() ?? 0
return abs(maxY - minY)
}
private struct GazeDetectionResult: Sendable {
var lookingAway: Bool = false
var leftPupilRatio: Double?
var rightPupilRatio: Double?
var leftVerticalRatio: Double?
var rightVerticalRatio: Double?
var yaw: Double?
var pitch: Double?
}
private func detectLookingAway(
face: VNFaceObservation,
landmarks: VNFaceLandmarks2D?,
imageSize: CGSize,
pixelBuffer: CVPixelBuffer,
configuration: Configuration
) -> GazeDetectionResult {
var result = GazeDetectionResult()
let yaw = face.yaw?.doubleValue ?? 0.0
let pitch = face.pitch?.doubleValue ?? 0.0
result.yaw = yaw
result.pitch = pitch
var poseLookingAway = false
if face.pitch != nil {
if configuration.yawEnabled {
let yawThreshold = configuration.yawThreshold
if abs(yaw) > yawThreshold {
poseLookingAway = true
}
}
if !poseLookingAway {
var pitchLookingAway = false
if configuration.pitchUpEnabled && pitch > configuration.pitchUpThreshold {
pitchLookingAway = true
}
if configuration.pitchDownEnabled && pitch < configuration.pitchDownThreshold {
pitchLookingAway = true
}
poseLookingAway = pitchLookingAway
}
}
var eyesLookingAway = false
if let landmarks,
let leftEye = landmarks.leftEye,
let rightEye = landmarks.rightEye,
configuration.pixelGazeEnabled {
var leftGazeRatio: Double? = nil
var rightGazeRatio: Double? = nil
var leftVerticalRatio: Double? = nil
var rightVerticalRatio: Double? = nil
if let leftResult = PupilDetector.detectPupil(
in: pixelBuffer,
eyeLandmarks: leftEye,
faceBoundingBox: face.boundingBox,
imageSize: imageSize,
side: 0
) {
leftGazeRatio = calculateGazeRatio(
pupilPosition: leftResult.pupilPosition,
eyeRegion: leftResult.eyeRegion
)
leftVerticalRatio = calculateVerticalRatio(
pupilPosition: leftResult.pupilPosition,
eyeRegion: leftResult.eyeRegion
)
}
if let rightResult = PupilDetector.detectPupil(
in: pixelBuffer,
eyeLandmarks: rightEye,
faceBoundingBox: face.boundingBox,
imageSize: imageSize,
side: 1
) {
rightGazeRatio = calculateGazeRatio(
pupilPosition: rightResult.pupilPosition,
eyeRegion: rightResult.eyeRegion
)
rightVerticalRatio = calculateVerticalRatio(
pupilPosition: rightResult.pupilPosition,
eyeRegion: rightResult.eyeRegion
)
}
result.leftPupilRatio = leftGazeRatio
result.rightPupilRatio = rightGazeRatio
result.leftVerticalRatio = leftVerticalRatio
result.rightVerticalRatio = rightVerticalRatio
if let leftRatio = leftGazeRatio,
let rightRatio = rightGazeRatio {
let avgH = (leftRatio + rightRatio) / 2.0
let avgV = (leftVerticalRatio != nil && rightVerticalRatio != nil)
? (leftVerticalRatio! + rightVerticalRatio!) / 2.0
: 0.5
if configuration.isCalibrationComplete,
let thresholds = configuration.thresholds {
let currentFaceWidth = face.boundingBox.width
let refFaceWidth = thresholds.referenceFaceWidth
var distanceScale = 1.0
if refFaceWidth > 0 && currentFaceWidth > 0 {
let rawScale = refFaceWidth / currentFaceWidth
distanceScale = 1.0 + (rawScale - 1.0) * configuration.distanceSensitivity
distanceScale = max(0.5, min(2.0, distanceScale))
}
let centerH = (thresholds.screenLeftBound + thresholds.screenRightBound) / 2.0
let centerV = (thresholds.screenTopBound + thresholds.screenBottomBound) / 2.0
let deltaH = (avgH - centerH) * distanceScale
let deltaV = (avgV - centerV) * distanceScale
let normalizedH = centerH + deltaH
let normalizedV = centerV + deltaV
let margin = configuration.boundaryForgivenessMargin
let isLookingLeft = normalizedH > (thresholds.screenLeftBound + margin)
let isLookingRight = normalizedH < (thresholds.screenRightBound - margin)
let isLookingUp = normalizedV < (thresholds.screenTopBound - margin)
let isLookingDown = normalizedV > (thresholds.screenBottomBound + margin)
eyesLookingAway = isLookingLeft || isLookingRight || isLookingUp || isLookingDown
} else {
let currentFaceWidth = face.boundingBox.width
let refFaceWidth = configuration.defaultReferenceFaceWidth
var distanceScale = 1.0
if refFaceWidth > 0 && currentFaceWidth > 0 {
let rawScale = refFaceWidth / currentFaceWidth
distanceScale = 1.0 + (rawScale - 1.0) * configuration.distanceSensitivity
distanceScale = max(0.5, min(2.0, distanceScale))
}
let centerH = (configuration.pixelGazeMinRatio + configuration.pixelGazeMaxRatio) / 2.0
let normalizedH = centerH + (avgH - centerH) * distanceScale
let lookingRight = normalizedH <= configuration.pixelGazeMinRatio
let lookingLeft = normalizedH >= configuration.pixelGazeMaxRatio
eyesLookingAway = lookingRight || lookingLeft
}
}
}
result.lookingAway = poseLookingAway || eyesLookingAway
return result
}
private func calculateGazeRatio(
pupilPosition: PupilPosition,
eyeRegion: EyeRegion
) -> Double {
let pupilX = Double(pupilPosition.x)
let eyeCenterX = Double(eyeRegion.center.x)
let denominator = (eyeCenterX * 2.0 - 10.0)
guard denominator > 0 else {
let eyeLeft = Double(eyeRegion.frame.minX)
let eyeRight = Double(eyeRegion.frame.maxX)
let eyeWidth = eyeRight - eyeLeft
guard eyeWidth > 0 else { return 0.5 }
return (pupilX - eyeLeft) / eyeWidth
}
let ratio = pupilX / denominator
return max(0.0, min(1.0, ratio))
}
private func calculateVerticalRatio(
pupilPosition: PupilPosition,
eyeRegion: EyeRegion
) -> Double {
let pupilX = Double(pupilPosition.x)
let eyeWidth = Double(eyeRegion.frame.width)
guard eyeWidth > 0 else { return 0.5 }
let ratio = pupilX / eyeWidth
return max(0.0, min(1.0, ratio))
}
}

View File

@@ -0,0 +1,67 @@
//
// VisionPipeline.swift
// Gaze
//
// Vision processing pipeline for face detection.
//
import Foundation
import Vision
final class VisionPipeline: @unchecked Sendable {
struct FaceAnalysis: Sendable {
let faceDetected: Bool
let face: VNFaceObservation?
let imageSize: CGSize
let debugYaw: Double?
let debugPitch: Double?
}
nonisolated func analyze(
pixelBuffer: CVPixelBuffer,
imageSize: CGSize
) -> FaceAnalysis {
let request = VNDetectFaceLandmarksRequest()
request.revision = VNDetectFaceLandmarksRequestRevision3
if #available(macOS 14.0, *) {
request.constellation = .constellation76Points
}
let handler = VNImageRequestHandler(
cvPixelBuffer: pixelBuffer,
orientation: .upMirrored,
options: [:]
)
do {
try handler.perform([request])
} catch {
return FaceAnalysis(
faceDetected: false,
face: nil,
imageSize: imageSize,
debugYaw: nil,
debugPitch: nil
)
}
guard let face = (request.results as? [VNFaceObservation])?.first else {
return FaceAnalysis(
faceDetected: false,
face: nil,
imageSize: imageSize,
debugYaw: nil,
debugPitch: nil
)
}
return FaceAnalysis(
faceDetected: true,
face: face,
imageSize: imageSize,
debugYaw: face.yaw?.doubleValue,
debugPitch: face.pitch?.doubleValue
)
}
}