Files
Gaze/Gaze/Services/EyeTracking/PupilDetector.swift
Michael Freno 6e41c4059c building
2026-01-30 12:20:56 -05:00

1112 lines
39 KiB
Swift

//
// PupilDetector.swift
// Gaze
//
// Created by Mike Freno on 1/15/26.
//
// Pixel-based pupil detection translated from Python GazeTracking library
// Original: https://github.com/antoinelame/GazeTracking
//
// Optimized with:
// - Frame skipping (process every Nth frame)
// - vImage/Accelerate for grayscale conversion and erosion
// - Precomputed lookup tables for bilateral filter
// - Efficient contour detection with union-find
//
import Accelerate
import CoreImage
import ImageIO
import UniformTypeIdentifiers
@preconcurrency import Vision
struct PupilPosition: Equatable, Sendable {
let x: CGFloat
let y: CGFloat
}
struct EyeRegion: Sendable {
let frame: CGRect
let center: CGPoint
let origin: CGPoint
}
/// 9-point gaze direction grid
enum GazeDirection: String, Sendable, CaseIterable {
case upLeft = ""
case up = ""
case upRight = ""
case left = ""
case center = ""
case right = ""
case downLeft = ""
case down = ""
case downRight = ""
/// Thresholds for direction detection
/// Based on actual video test data:
/// - Looking at screen (center): H 0.29-0.35
/// - Looking left (away): H 0.62-0.70
/// Horizontal: Lower values = center/right, Higher values = left
/// Vertical: 0.0 = looking up, 1.0 = looking down
private static let horizontalLeftThreshold = 0.50 // Above this = looking left (away)
private static let horizontalRightThreshold = 0.20 // Below this = looking right
private static let verticalUpThreshold = 0.35 // Below this = looking up
private static let verticalDownThreshold = 0.55 // Above this = looking down
static func from(horizontal: Double, vertical: Double) -> GazeDirection {
let isLeft = horizontal > horizontalLeftThreshold
let isRight = horizontal < horizontalRightThreshold
let isUp = vertical < verticalUpThreshold
let isDown = vertical > verticalDownThreshold
if isUp {
if isLeft { return .upLeft }
if isRight { return .upRight }
return .up
} else if isDown {
if isLeft { return .downLeft }
if isRight { return .downRight }
return .down
} else {
if isLeft { return .left }
if isRight { return .right }
return .center
}
}
/// Grid position (0-2 for x and y)
var gridPosition: (x: Int, y: Int) {
switch self {
case .upLeft: return (0, 0)
case .up: return (1, 0)
case .upRight: return (2, 0)
case .left: return (0, 1)
case .center: return (1, 1)
case .right: return (2, 1)
case .downLeft: return (0, 2)
case .down: return (1, 2)
case .downRight: return (2, 2)
}
}
}
/// Calibration state for adaptive thresholding (matches Python Calibration class)
final class PupilCalibration: @unchecked Sendable {
private nonisolated let lock = NSLock()
private nonisolated let targetFrames = 20
private nonisolated(unsafe) var thresholdsLeft: [Int] = []
private nonisolated(unsafe) var thresholdsRight: [Int] = []
nonisolated func isComplete() -> Bool {
lock.lock()
defer { lock.unlock() }
return thresholdsLeft.count >= targetFrames && thresholdsRight.count >= targetFrames
}
nonisolated func threshold(forSide side: Int) -> Int {
lock.lock()
defer { lock.unlock() }
let thresholds = side == 0 ? thresholdsLeft : thresholdsRight
// DEBUG: Use higher default threshold (was 50)
guard !thresholds.isEmpty else { return 90 }
return thresholds.reduce(0, +) / thresholds.count
}
nonisolated func evaluate(eyeData: UnsafePointer<UInt8>, width: Int, height: Int, side: Int) {
let bestThreshold = findBestThreshold(eyeData: eyeData, width: width, height: height)
lock.lock()
defer { lock.unlock() }
if side == 0 {
thresholdsLeft.append(bestThreshold)
} else {
thresholdsRight.append(bestThreshold)
}
}
private nonisolated func findBestThreshold(
eyeData: UnsafePointer<UInt8>, width: Int, height: Int
) -> Int {
let averageIrisSize = 0.48
var bestThreshold = 50
var bestDiff = Double.greatestFiniteMagnitude
let bufferSize = width * height
let tempBuffer = UnsafeMutablePointer<UInt8>.allocate(capacity: bufferSize)
defer { tempBuffer.deallocate() }
for threshold in stride(from: 5, to: 100, by: 5) {
PupilDetector.imageProcessingOptimized(
input: eyeData,
output: tempBuffer,
width: width,
height: height,
threshold: threshold
)
let irisSize = Self.irisSize(data: tempBuffer, width: width, height: height)
let diff = abs(irisSize - averageIrisSize)
if diff < bestDiff {
bestDiff = diff
bestThreshold = threshold
}
}
return bestThreshold
}
private nonisolated static func irisSize(data: UnsafePointer<UInt8>, width: Int, height: Int)
-> Double
{
let margin = 5
guard width > margin * 2, height > margin * 2 else { return 0 }
var blackCount = 0
let innerWidth = width - margin * 2
let innerHeight = height - margin * 2
for y in margin..<(height - margin) {
let rowStart = y * width + margin
for x in 0..<innerWidth {
if data[rowStart + x] == 0 {
blackCount += 1
}
}
}
let totalCount = innerWidth * innerHeight
return totalCount > 0 ? Double(blackCount) / Double(totalCount) : 0
}
nonisolated func reset() {
lock.lock()
defer { lock.unlock() }
thresholdsLeft.removeAll()
thresholdsRight.removeAll()
}
}
/// Performance metrics for pupil detection
struct PupilDetectorMetrics: Sendable {
var lastProcessingTimeMs: Double = 0
var averageProcessingTimeMs: Double = 0
var frameCount: Int = 0
var processedFrameCount: Int = 0
nonisolated mutating func recordProcessingTime(_ ms: Double) {
lastProcessingTimeMs = ms
processedFrameCount += 1
let alpha = 0.1
averageProcessingTimeMs = averageProcessingTimeMs * (1 - alpha) + ms * alpha
}
}
final class PupilDetector: @unchecked Sendable {
// MARK: - Thread Safety
private static let lock = NSLock()
// MARK: - Configuration
nonisolated(unsafe) static var enableDebugImageSaving: Bool = false // Disabled - causes sandbox errors
nonisolated(unsafe) static var enablePerformanceLogging = false
nonisolated(unsafe) static var enableDiagnosticLogging = false // Disabled - pupil detection now working
nonisolated(unsafe) static var enableDebugLogging: Bool {
#if DEBUG
return true
#else
return false
#endif
}
nonisolated(unsafe) static var frameSkipCount = 10 // Process every Nth frame
// MARK: - State (protected by lock)
private nonisolated(unsafe) static var _debugImageCounter = 0
private nonisolated(unsafe) static var _frameCounter = 0
private nonisolated(unsafe) static var _lastPupilPositions:
(left: PupilPosition?, right: PupilPosition?) = (
nil, nil
)
private nonisolated(unsafe) static var _metrics = PupilDetectorMetrics()
// Debug images for UI display
nonisolated(unsafe) static var debugLeftEyeInput: CGImage?
nonisolated(unsafe) static var debugRightEyeInput: CGImage?
nonisolated(unsafe) static var debugLeftEyeProcessed: CGImage?
nonisolated(unsafe) static var debugRightEyeProcessed: CGImage?
nonisolated(unsafe) static var debugLeftPupilPosition: PupilPosition?
nonisolated(unsafe) static var debugRightPupilPosition: PupilPosition?
nonisolated(unsafe) static var debugLeftEyeSize: CGSize?
nonisolated(unsafe) static var debugRightEyeSize: CGSize?
// Eye region positions in image coordinates (for drawing on video)
nonisolated(unsafe) static var debugLeftEyeRegion: EyeRegion?
nonisolated(unsafe) static var debugRightEyeRegion: EyeRegion?
nonisolated(unsafe) static var debugImageSize: CGSize?
nonisolated static let calibration = PupilCalibration()
// MARK: - Convenience Properties
private nonisolated(unsafe) static var debugImageCounter: Int {
get { _debugImageCounter }
set { _debugImageCounter = newValue }
}
private nonisolated(unsafe) static var frameCounter: Int {
get { _frameCounter }
set { _frameCounter = newValue }
}
private nonisolated(unsafe) static var lastPupilPositions: (left: PupilPosition?, right: PupilPosition?)
{
get { _lastPupilPositions }
set { _lastPupilPositions = newValue }
}
private nonisolated(unsafe) static var metrics: PupilDetectorMetrics {
get { _metrics }
set { _metrics = newValue }
}
// MARK: - Precomputed Tables
private nonisolated static let spatialWeightsLUT: [[Float]] = {
let d = 10
let radius = d / 2
let sigmaSpace: Float = 15.0
var weights = [[Float]](repeating: [Float](repeating: 0, count: d), count: d)
for dy in 0..<d {
for dx in 0..<d {
let dist = sqrt(
Float((dy - radius) * (dy - radius) + (dx - radius) * (dx - radius)))
weights[dy][dx] = exp(-dist * dist / (2 * sigmaSpace * sigmaSpace))
}
}
return weights
}()
private nonisolated static let colorWeightsLUT: [Float] = {
let sigmaColor: Float = 15.0
var lut = [Float](repeating: 0, count: 256)
for diff in 0..<256 {
let d = Float(diff)
lut[diff] = exp(-d * d / (2 * sigmaColor * sigmaColor))
}
return lut
}()
// MARK: - Reusable Buffers
private nonisolated(unsafe) static var grayscaleBuffer: UnsafeMutablePointer<UInt8>?
private nonisolated(unsafe) static var grayscaleBufferSize = 0
private nonisolated(unsafe) static var eyeBuffer: UnsafeMutablePointer<UInt8>?
private nonisolated(unsafe) static var eyeBufferSize = 0
private nonisolated(unsafe) static var tempBuffer: UnsafeMutablePointer<UInt8>?
private nonisolated(unsafe) static var tempBufferSize = 0
// MARK: - Public API
/// Call once per video frame to enable proper frame skipping
nonisolated static func advanceFrame() {
frameCounter += 1
}
/// Detects pupil position with frame skipping for performance
/// Returns cached result on skipped frames
nonisolated static func detectPupil(
in pixelBuffer: CVPixelBuffer,
eyeLandmarks: VNFaceLandmarkRegion2D,
faceBoundingBox: CGRect,
imageSize: CGSize,
side: Int = 0,
threshold: Int? = nil
) -> (pupilPosition: PupilPosition, eyeRegion: EyeRegion)? {
// Frame skipping - return cached result
if frameCounter % frameSkipCount != 0 {
let cachedPosition = side == 0 ? lastPupilPositions.left : lastPupilPositions.right
if let position = cachedPosition {
// Recreate eye region for consistency
let eyePoints = landmarksToPixelCoordinates(
landmarks: eyeLandmarks,
faceBoundingBox: faceBoundingBox,
imageSize: imageSize
)
if let eyeRegion = createEyeRegion(from: eyePoints, imageSize: imageSize) {
return (position, eyeRegion)
}
}
return nil
}
let startTime = CFAbsoluteTimeGetCurrent()
defer {
if enablePerformanceLogging {
let elapsed = (CFAbsoluteTimeGetCurrent() - startTime) * 1000
metrics.recordProcessingTime(elapsed)
if metrics.processedFrameCount % 30 == 0 {
logDebug(
"👁 PupilDetector: \(String(format: "%.2f", elapsed))ms (avg: \(String(format: "%.2f", metrics.averageProcessingTimeMs))ms)"
)
}
}
}
// Step 1: Convert Vision landmarks to pixel coordinates
let eyePoints = landmarksToPixelCoordinates(
landmarks: eyeLandmarks,
faceBoundingBox: faceBoundingBox,
imageSize: imageSize
)
guard eyePoints.count >= 6 else {
if enableDiagnosticLogging {
logDebug("👁 PupilDetector: Failed - eyePoints.count=\(eyePoints.count) < 6")
}
return nil
}
// Step 2: Create eye region bounding box with margin
guard let eyeRegion = createEyeRegion(from: eyePoints, imageSize: imageSize) else {
if enableDiagnosticLogging {
logDebug("👁 PupilDetector: Failed - createEyeRegion returned nil")
}
return nil
}
// Store eye region for debug overlay
if side == 0 {
debugLeftEyeRegion = eyeRegion
} else {
debugRightEyeRegion = eyeRegion
}
debugImageSize = imageSize
let frameWidth = CVPixelBufferGetWidth(pixelBuffer)
let frameHeight = CVPixelBufferGetHeight(pixelBuffer)
let frameSize = frameWidth * frameHeight
// Step 3: Ensure buffers are allocated
ensureBufferCapacity(
frameSize: frameSize, eyeSize: Int(eyeRegion.frame.width * eyeRegion.frame.height))
guard let grayBuffer = grayscaleBuffer,
let eyeBuf = eyeBuffer,
let tmpBuf = tempBuffer
else {
if enableDiagnosticLogging {
logDebug("👁 PupilDetector: Failed - buffers not allocated")
}
return nil
}
// Step 4: Extract grayscale data using vImage
guard
extractGrayscaleDataOptimized(
from: pixelBuffer, to: grayBuffer, width: frameWidth, height: frameHeight)
else {
if enableDiagnosticLogging {
logDebug("👁 PupilDetector: Failed - grayscale extraction failed")
}
return nil
}
// Step 5: Isolate eye with polygon mask
let eyeWidth = Int(eyeRegion.frame.width)
let eyeHeight = Int(eyeRegion.frame.height)
// Early exit for tiny regions (less than 10x10 pixels)
guard eyeWidth >= 10, eyeHeight >= 10 else {
if enableDiagnosticLogging {
logDebug(
"👁 PupilDetector: Failed - eye region too small (\(eyeWidth)x\(eyeHeight))")
}
return nil
}
guard
isolateEyeWithMaskOptimized(
frameData: grayBuffer,
frameWidth: frameWidth,
frameHeight: frameHeight,
eyePoints: eyePoints,
region: eyeRegion,
output: eyeBuf
)
else {
if enableDiagnosticLogging {
logDebug("👁 PupilDetector: Failed - isolateEyeWithMask failed")
}
return nil
}
// Step 6: Get threshold (from calibration or parameter)
let effectiveThreshold: Int
if let manualThreshold = threshold {
effectiveThreshold = manualThreshold
} else if calibration.isComplete() {
effectiveThreshold = calibration.threshold(forSide: side)
} else {
calibration.evaluate(eyeData: eyeBuf, width: eyeWidth, height: eyeHeight, side: side)
effectiveThreshold = calibration.threshold(forSide: side)
}
// Step 7: Process image (bilateral filter + erosion + threshold)
if enableDiagnosticLogging {
logDebug(
"👁 PupilDetector: Using threshold=\(effectiveThreshold) for \(eyeWidth)x\(eyeHeight) eye region"
)
}
// Debug: Save input eye image before processing
if enableDebugImageSaving && debugImageCounter < 20 {
NSLog(
"📸 Saving eye_input_%d - %dx%d, side=%d, region=(%.0f,%.0f,%.0f,%.0f)",
debugImageCounter, eyeWidth, eyeHeight, side,
eyeRegion.frame.origin.x, eyeRegion.frame.origin.y,
eyeRegion.frame.width, eyeRegion.frame.height)
// Debug: Print pixel value statistics for input
var minVal: UInt8 = 255
var maxVal: UInt8 = 0
var sum: Int = 0
var darkCount = 0 // pixels <= 90
for i in 0..<(eyeWidth * eyeHeight) {
let v = eyeBuf[i]
if v < minVal { minVal = v }
if v > maxVal { maxVal = v }
sum += Int(v)
if v <= 90 { darkCount += 1 }
}
let avgVal = Double(sum) / Double(eyeWidth * eyeHeight)
NSLog(
"📊 Eye input stats: min=%d, max=%d, avg=%.1f, darkPixels(<=90)=%d", minVal, maxVal,
avgVal, darkCount)
saveDebugImage(
data: eyeBuf, width: eyeWidth, height: eyeHeight,
name: "eye_input_\(debugImageCounter)")
}
imageProcessingOptimized(
input: eyeBuf,
output: tmpBuf,
width: eyeWidth,
height: eyeHeight,
threshold: effectiveThreshold
)
// Capture debug images for UI display
let inputImage = createCGImage(from: eyeBuf, width: eyeWidth, height: eyeHeight)
let processedImage = createCGImage(from: tmpBuf, width: eyeWidth, height: eyeHeight)
let eyeSize = CGSize(width: eyeWidth, height: eyeHeight)
if side == 0 {
debugLeftEyeInput = inputImage
debugLeftEyeProcessed = processedImage
debugLeftEyeSize = eyeSize
} else {
debugRightEyeInput = inputImage
debugRightEyeProcessed = processedImage
debugRightEyeSize = eyeSize
}
// Debug: Save processed images if enabled
if enableDebugImageSaving && debugImageCounter < 10 {
// Debug: Print pixel value statistics for output
var darkCount = 0 // pixels == 0 (black)
var whiteCount = 0 // pixels == 255 (white)
for i in 0..<(eyeWidth * eyeHeight) {
if tmpBuf[i] == 0 { darkCount += 1 } else if tmpBuf[i] == 255 { whiteCount += 1 }
}
NSLog("📊 Processed output stats: darkPixels=%d, whitePixels=%d", darkCount, whiteCount)
saveDebugImage(
data: tmpBuf, width: eyeWidth, height: eyeHeight,
name: "processed_eye_\(debugImageCounter)")
debugImageCounter += 1
}
// Step 8: Find contours and compute centroid
guard
let (centroidX, centroidY) = findPupilFromContoursOptimized(
data: tmpBuf,
width: eyeWidth,
height: eyeHeight
)
else {
if enableDiagnosticLogging {
logDebug(
"👁 PupilDetector: Failed - findPupilFromContours returned nil (not enough dark pixels) for side \(side)"
)
}
return nil
}
if enableDiagnosticLogging {
logDebug(
"👁 PupilDetector: Success side=\(side) - centroid at (\(String(format: "%.1f", centroidX)), \(String(format: "%.1f", centroidY))) in \(eyeWidth)x\(eyeHeight) region"
)
}
let pupilPosition = PupilPosition(x: CGFloat(centroidX), y: CGFloat(centroidY))
// Cache result and debug position
if side == 0 {
lastPupilPositions.left = pupilPosition
debugLeftPupilPosition = pupilPosition
} else {
lastPupilPositions.right = pupilPosition
debugRightPupilPosition = pupilPosition
}
return (pupilPosition, eyeRegion)
}
// MARK: - Buffer Management
// MARK: - Buffer Management
private nonisolated static func ensureBufferCapacity(frameSize: Int, eyeSize: Int) {
if grayscaleBufferSize < frameSize {
grayscaleBuffer?.deallocate()
grayscaleBuffer = UnsafeMutablePointer<UInt8>.allocate(capacity: frameSize)
grayscaleBufferSize = frameSize
}
let requiredEyeSize = max(eyeSize, 10000) // Minimum size for safety
if eyeBufferSize < requiredEyeSize {
eyeBuffer?.deallocate()
tempBuffer?.deallocate()
eyeBuffer = UnsafeMutablePointer<UInt8>.allocate(capacity: requiredEyeSize)
tempBuffer = UnsafeMutablePointer<UInt8>.allocate(capacity: requiredEyeSize)
eyeBufferSize = requiredEyeSize
}
}
// MARK: - Optimized Grayscale Conversion (vImage)
private nonisolated static func extractGrayscaleDataOptimized(
from pixelBuffer: CVPixelBuffer,
to output: UnsafeMutablePointer<UInt8>,
width: Int,
height: Int
) -> Bool {
CVPixelBufferLockBaseAddress(pixelBuffer, .readOnly)
defer { CVPixelBufferUnlockBaseAddress(pixelBuffer, .readOnly) }
let pixelFormat = CVPixelBufferGetPixelFormatType(pixelBuffer)
switch pixelFormat {
case kCVPixelFormatType_32BGRA:
guard let baseAddress = CVPixelBufferGetBaseAddress(pixelBuffer) else { return false }
let bytesPerRow = CVPixelBufferGetBytesPerRow(pixelBuffer)
var srcBuffer = vImage_Buffer(
data: baseAddress,
height: vImagePixelCount(height),
width: vImagePixelCount(width),
rowBytes: bytesPerRow
)
var dstBuffer = vImage_Buffer(
data: output,
height: vImagePixelCount(height),
width: vImagePixelCount(width),
rowBytes: width
)
// BGRA to Planar8 grayscale using luminance coefficients
// Y = 0.299*R + 0.587*G + 0.114*B
let matrix: [Int16] = [
28, // B coefficient (0.114 * 256 29, adjusted)
150, // G coefficient (0.587 * 256 150)
77, // R coefficient (0.299 * 256 77)
0, // A coefficient
]
let divisor: Int32 = 256
let error = vImageMatrixMultiply_ARGB8888ToPlanar8(
&srcBuffer,
&dstBuffer,
matrix,
divisor,
nil,
0,
vImage_Flags(kvImageNoFlags)
)
return error == kvImageNoError
case kCVPixelFormatType_420YpCbCr8BiPlanarFullRange,
kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange:
guard let yPlane = CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, 0) else {
return false
}
let yBytesPerRow = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer, 0)
let yBuffer = yPlane.assumingMemoryBound(to: UInt8.self)
// Direct copy of Y plane (already grayscale)
for y in 0..<height {
memcpy(
output.advanced(by: y * width), yBuffer.advanced(by: y * yBytesPerRow), width)
}
return true
default:
// Fallback to manual conversion
guard let baseAddress = CVPixelBufferGetBaseAddress(pixelBuffer) else { return false }
let bytesPerRow = CVPixelBufferGetBytesPerRow(pixelBuffer)
let buffer = baseAddress.assumingMemoryBound(to: UInt8.self)
for y in 0..<height {
for x in 0..<width {
let offset = y * bytesPerRow + x * 4
let b = Float(buffer[offset])
let g = Float(buffer[offset + 1])
let r = Float(buffer[offset + 2])
output[y * width + x] = UInt8(0.299 * r + 0.587 * g + 0.114 * b)
}
}
return true
}
}
// MARK: - Optimized Eye Isolation
private nonisolated static func isolateEyeWithMaskOptimized(
frameData: UnsafePointer<UInt8>,
frameWidth: Int,
frameHeight: Int,
eyePoints: [CGPoint],
region: EyeRegion,
output: UnsafeMutablePointer<UInt8>
) -> Bool {
let minX = Int(region.frame.origin.x)
let minY = Int(region.frame.origin.y)
let eyeWidth = Int(region.frame.width)
let eyeHeight = Int(region.frame.height)
guard eyeWidth > 0, eyeHeight > 0 else { return false }
// Initialize to WHITE (255) - masked pixels should be bright so they don't affect pupil detection
memset(output, 255, eyeWidth * eyeHeight)
// Convert eye points to local coordinates
let localPoints = eyePoints.map { point in
(x: Float(point.x) - Float(minX), y: Float(point.y) - Float(minY))
}
// Precompute edge data for faster point-in-polygon
let edges = (0..<localPoints.count).map { i in
let p1 = localPoints[i]
let p2 = localPoints[(i + 1) % localPoints.count]
return (x1: p1.x, y1: p1.y, x2: p2.x, y2: p2.y)
}
for y in 0..<eyeHeight {
let py = Float(y)
for x in 0..<eyeWidth {
let px = Float(x)
if pointInPolygonFast(px: px, py: py, edges: edges) {
let frameX = minX + x
let frameY = minY + y
if frameX >= 0, frameX < frameWidth, frameY >= 0, frameY < frameHeight {
output[y * eyeWidth + x] = frameData[frameY * frameWidth + frameX]
}
}
}
}
return true
}
@inline(__always)
private nonisolated static func pointInPolygonFast(
px: Float, py: Float, edges: [(x1: Float, y1: Float, x2: Float, y2: Float)]
) -> Bool {
var inside = false
for edge in edges {
if ((edge.y1 > py) != (edge.y2 > py))
&& (px < (edge.x2 - edge.x1) * (py - edge.y1) / (edge.y2 - edge.y1) + edge.x1)
{
inside = !inside
}
}
return inside
}
// MARK: - Optimized Image Processing
nonisolated static func imageProcessingOptimized(
input: UnsafePointer<UInt8>,
output: UnsafeMutablePointer<UInt8>,
width: Int,
height: Int,
threshold: Int
) {
let size = width * height
guard size > 0 else { return }
// 1. Apply Gaussian Blur (reduces noise)
// We reuse tempBuffer for intermediate steps if available, or just output
// Note: gaussianBlurOptimized writes from input -> output
gaussianBlurOptimized(input: input, output: output, width: width, height: height)
// 2. Apply Erosion (expands dark regions)
// Python: cv2.erode(kernel, iterations=3)
// This helps connect broken parts of the pupil
// Note: erodeOptimized processes in-place on output if input==output
erodeOptimized(input: output, output: output, width: width, height: height, iterations: 3)
// 3. Binary Threshold
for i in 0..<size {
// Python: cv2.threshold(..., cv2.THRESH_BINARY)
// Pixels > threshold become 255 (white), others 0 (black)
// So Pupil is BLACK (0)
output[i] = output[i] > UInt8(threshold) ? 255 : 0
}
}
private nonisolated static func gaussianBlurOptimized(
input: UnsafePointer<UInt8>,
output: UnsafeMutablePointer<UInt8>,
width: Int,
height: Int
) {
// Use a more appropriate convolution for performance
// Using vImageTentConvolve_Planar8 with optimized parameters
var srcBuffer = vImage_Buffer(
data: UnsafeMutableRawPointer(mutating: input),
height: vImagePixelCount(height),
width: vImagePixelCount(width),
rowBytes: width
)
var dstBuffer = vImage_Buffer(
data: UnsafeMutableRawPointer(output),
height: vImagePixelCount(height),
width: vImagePixelCount(width),
rowBytes: width
)
// Kernel size that provides good blur with minimal computational overhead
let kernelSize: UInt32 = 5
vImageTentConvolve_Planar8(
&srcBuffer,
&dstBuffer,
nil,
0, 0,
kernelSize,
kernelSize,
0,
vImage_Flags(kvImageEdgeExtend)
)
}
private nonisolated static func erodeOptimized(
input: UnsafePointer<UInt8>,
output: UnsafeMutablePointer<UInt8>,
width: Int,
height: Int,
iterations: Int
) {
guard iterations > 0 else {
memcpy(output, input, width * height)
return
}
// Copy input to output first so we can use output as working buffer
memcpy(output, input, width * height)
var srcBuffer = vImage_Buffer(
data: UnsafeMutableRawPointer(output),
height: vImagePixelCount(height),
width: vImagePixelCount(width),
rowBytes: width
)
// Allocate temp buffer for ping-pong
let tempData = UnsafeMutablePointer<UInt8>.allocate(capacity: width * height)
defer { tempData.deallocate() }
var dstBuffer = vImage_Buffer(
data: UnsafeMutableRawPointer(tempData),
height: vImagePixelCount(height),
width: vImagePixelCount(width),
rowBytes: width
)
// 3x3 erosion kernel (all ones)
let kernel: [UInt8] = [
1, 1, 1,
1, 1, 1,
1, 1, 1,
]
for i in 0..<iterations {
if i % 2 == 0 {
vImageErode_Planar8(
&srcBuffer, &dstBuffer, 0, 0, kernel, 3, 3, vImage_Flags(kvImageNoFlags))
} else {
vImageErode_Planar8(
&dstBuffer, &srcBuffer, 0, 0, kernel, 3, 3, vImage_Flags(kvImageNoFlags))
}
}
// If odd iterations, result is in dstBuffer (tempData), copy to output
if iterations % 2 == 1 {
memcpy(output, tempData, width * height)
}
// If even iterations, result is already in srcBuffer (output)
}
// MARK: - Optimized Contour Detection
/// Finds the largest connected component of dark pixels and returns its centroid
/// This is much more robust than averaging all dark pixels, as it ignores shadows/noise
private nonisolated static func findPupilFromContoursOptimized(
data: UnsafePointer<UInt8>,
width: Int,
height: Int
) -> (x: Double, y: Double)? {
let size = width * height
// 1. Threshold pass: Identify all dark pixels (0)
// We use a visited array to track processed pixels for flood fill
// Using a flat bool array for performance
var visited = [Bool](repeating: false, count: size)
var maxBlobSize = 0
var maxBlobSumX = 0
var maxBlobSumY = 0
// 2. Iterate through pixels to find connected components
for y in 0..<height {
let rowOffset = y * width
for x in 0..<width {
let idx = rowOffset + x
// If it's a dark pixel (0) and not visited, start a flood fill
if data[idx] == 0 && !visited[idx] {
var currentBlobSize = 0
var currentBlobSumX = 0
var currentBlobSumY = 0
// Stack for DFS/BFS (using array as stack is fast in Swift)
var stack: [Int] = [idx]
visited[idx] = true
while let currentIdx = stack.popLast() {
let cx = currentIdx % width
let cy = currentIdx / width
currentBlobSize += 1
currentBlobSumX += cx
currentBlobSumY += cy
// Check 4 neighbors
// Right
if cx + 1 < width {
let nIdx = currentIdx + 1
if data[nIdx] == 0 && !visited[nIdx] {
visited[nIdx] = true
stack.append(nIdx)
}
}
// Left
if cx - 1 >= 0 {
let nIdx = currentIdx - 1
if data[nIdx] == 0 && !visited[nIdx] {
visited[nIdx] = true
stack.append(nIdx)
}
}
// Down
if cy + 1 < height {
let nIdx = currentIdx + width
if data[nIdx] == 0 && !visited[nIdx] {
visited[nIdx] = true
stack.append(nIdx)
}
}
// Up
if cy - 1 >= 0 {
let nIdx = currentIdx - width
if data[nIdx] == 0 && !visited[nIdx] {
visited[nIdx] = true
stack.append(nIdx)
}
}
}
// Check if this is the largest blob so far
if currentBlobSize > maxBlobSize {
maxBlobSize = currentBlobSize
maxBlobSumX = currentBlobSumX
maxBlobSumY = currentBlobSumY
}
}
}
}
if enableDiagnosticLogging && maxBlobSize < 5 {
logDebug("👁 PupilDetector: Largest blob size = \(maxBlobSize) (need >= 5)")
}
// Minimum 5 pixels for valid pupil
guard maxBlobSize >= 5 else { return nil }
return (
x: Double(maxBlobSumX) / Double(maxBlobSize),
y: Double(maxBlobSumY) / Double(maxBlobSize)
)
}
// MARK: - Helper Methods
private nonisolated static func landmarksToPixelCoordinates(
landmarks: VNFaceLandmarkRegion2D,
faceBoundingBox: CGRect,
imageSize: CGSize
) -> [CGPoint] {
// Vision uses bottom-left origin (normalized 0-1), CVPixelBuffer uses top-left
// We need to flip Y: flippedY = 1.0 - y
return landmarks.normalizedPoints.map { point in
let imageX =
(faceBoundingBox.origin.x + point.x * faceBoundingBox.width) * imageSize.width
// Flip Y coordinate for pixel buffer coordinate system
let flippedY = 1.0 - (faceBoundingBox.origin.y + point.y * faceBoundingBox.height)
let imageY = flippedY * imageSize.height
return CGPoint(x: imageX, y: imageY)
}
}
private nonisolated static func createEyeRegion(from points: [CGPoint], imageSize: CGSize)
-> EyeRegion?
{
guard !points.isEmpty else { return nil }
let margin: CGFloat = 5
var minX = CGFloat.greatestFiniteMagnitude
var maxX = -CGFloat.greatestFiniteMagnitude
var minY = CGFloat.greatestFiniteMagnitude
var maxY = -CGFloat.greatestFiniteMagnitude
for point in points {
minX = min(minX, point.x)
maxX = max(maxX, point.x)
minY = min(minY, point.y)
maxY = max(maxY, point.y)
}
minX -= margin
maxX += margin
minY -= margin
maxY += margin
let clampedMinX = max(0, minX)
let clampedMaxX = min(imageSize.width, maxX)
let clampedMinY = max(0, minY)
let clampedMaxY = min(imageSize.height, maxY)
let frame = CGRect(
x: clampedMinX,
y: clampedMinY,
width: clampedMaxX - clampedMinX,
height: clampedMaxY - clampedMinY
)
let center = CGPoint(x: frame.width / 2, y: frame.height / 2)
let origin = CGPoint(x: clampedMinX, y: clampedMinY)
return EyeRegion(frame: frame, center: center, origin: origin)
}
// MARK: - Debug Helpers
private nonisolated static func saveDebugImage(
data: UnsafePointer<UInt8>, width: Int, height: Int, name: String
) {
guard let cgImage = createCGImage(from: data, width: width, height: height) else {
NSLog("⚠️ PupilDetector: createCGImage failed for %@ (%dx%d)", name, width, height)
return
}
let url = URL(fileURLWithPath: "/tmp/gaze_debug/\(name).png")
guard
let destination = CGImageDestinationCreateWithURL(
url as CFURL, UTType.png.identifier as CFString, 1, nil)
else {
NSLog("⚠️ PupilDetector: CGImageDestinationCreateWithURL failed for %@", url.path)
return
}
CGImageDestinationAddImage(destination, cgImage, nil)
let success = CGImageDestinationFinalize(destination)
if success {
NSLog("💾 Saved debug image: %@", url.path)
} else {
NSLog("⚠️ PupilDetector: CGImageDestinationFinalize failed for %@", url.path)
}
}
private nonisolated static func createCGImage(
from data: UnsafePointer<UInt8>, width: Int, height: Int
)
-> CGImage?
{
guard width > 0 && height > 0 else {
print("⚠️ PupilDetector: Invalid dimensions \(width)x\(height)")
return nil
}
// Create a Data object that copies the pixel data
let pixelData = Data(bytes: data, count: width * height)
// Create CGImage from the data using CGDataProvider
guard let provider = CGDataProvider(data: pixelData as CFData) else {
print("⚠️ PupilDetector: CGDataProvider creation failed")
return nil
}
let cgImage = CGImage(
width: width,
height: height,
bitsPerComponent: 8,
bitsPerPixel: 8,
bytesPerRow: width,
space: CGColorSpaceCreateDeviceGray(),
bitmapInfo: CGBitmapInfo(rawValue: CGImageAlphaInfo.none.rawValue),
provider: provider,
decode: nil,
shouldInterpolate: false,
intent: .defaultIntent
)
if cgImage == nil {
print("⚠️ PupilDetector: CGImage creation failed")
}
return cgImage
}
/// Clean up allocated buffers (call on app termination if needed)
nonisolated static func cleanup() {
grayscaleBuffer?.deallocate()
grayscaleBuffer = nil
grayscaleBufferSize = 0
eyeBuffer?.deallocate()
eyeBuffer = nil
tempBuffer?.deallocate()
tempBuffer = nil
eyeBufferSize = 0
}
}