fix: almost ready for fine tuning
This commit is contained in:
@@ -62,4 +62,20 @@ enum EyeTrackingConstants: Sendable {
|
||||
static let pixelGazeMinRatio: Double = 0.35 // Looking right threshold
|
||||
static let pixelGazeMaxRatio: Double = 0.65 // Looking left threshold
|
||||
static let pixelGazeEnabled: Bool = true
|
||||
|
||||
// MARK: - Screen Boundary Detection (New)
|
||||
|
||||
/// Forgiveness margin for the "gray area" around the screen edge.
|
||||
/// 0.05 means the safe zone is extended by 5% of the range on each side.
|
||||
/// If in the gray area, we assume the user is Looking Away (success).
|
||||
static let boundaryForgivenessMargin: Double = 0.05
|
||||
|
||||
/// Distance sensitivity factor.
|
||||
/// 1.0 = Linear scaling (face width 50% smaller -> eye movement expected to be 50% smaller)
|
||||
/// > 1.0 = More aggressive scaling
|
||||
static let distanceSensitivity: Double = 1.0
|
||||
|
||||
/// Minimum confidence required for a valid pupil detection before updating the gaze average.
|
||||
/// Helps filter out blinks or noisy frames.
|
||||
static let minimumGazeConfidence: Int = 3 // consecutive valid frames
|
||||
}
|
||||
|
||||
@@ -45,15 +45,15 @@ enum CalibrationStep: String, Codable, CaseIterable {
|
||||
case .farLeft:
|
||||
return "Look as far left as comfortable"
|
||||
case .left:
|
||||
return "Look to the left"
|
||||
return "Look to the left edge of the screen"
|
||||
case .farRight:
|
||||
return "Look as far right as comfortable"
|
||||
case .right:
|
||||
return "Look to the right"
|
||||
return "Look to the right edge of the screen"
|
||||
case .up:
|
||||
return "Look up"
|
||||
return "Look to the top edge of the screen"
|
||||
case .down:
|
||||
return "Look down"
|
||||
return "Look to the bottom edge of the screen"
|
||||
case .topLeft:
|
||||
return "Look to the top left corner"
|
||||
case .topRight:
|
||||
@@ -70,42 +70,75 @@ struct GazeSample: Codable {
|
||||
let leftRatio: Double?
|
||||
let rightRatio: Double?
|
||||
let averageRatio: Double
|
||||
let leftVerticalRatio: Double?
|
||||
let rightVerticalRatio: Double?
|
||||
let averageVerticalRatio: Double
|
||||
let faceWidthRatio: Double? // For distance scaling (face width / image width)
|
||||
let timestamp: Date
|
||||
|
||||
init(leftRatio: Double?, rightRatio: Double?) {
|
||||
init(leftRatio: Double?, rightRatio: Double?, leftVerticalRatio: Double? = nil, rightVerticalRatio: Double? = nil, faceWidthRatio: Double? = nil) {
|
||||
self.leftRatio = leftRatio
|
||||
self.rightRatio = rightRatio
|
||||
self.leftVerticalRatio = leftVerticalRatio
|
||||
self.rightVerticalRatio = rightVerticalRatio
|
||||
self.faceWidthRatio = faceWidthRatio
|
||||
|
||||
// Calculate average from available ratios
|
||||
// Calculate average horizontal ratio
|
||||
if let left = leftRatio, let right = rightRatio {
|
||||
self.averageRatio = (left + right) / 2.0
|
||||
} else {
|
||||
self.averageRatio = leftRatio ?? rightRatio ?? 0.5
|
||||
}
|
||||
|
||||
// Calculate average vertical ratio
|
||||
if let left = leftVerticalRatio, let right = rightVerticalRatio {
|
||||
self.averageVerticalRatio = (left + right) / 2.0
|
||||
} else {
|
||||
self.averageVerticalRatio = leftVerticalRatio ?? rightVerticalRatio ?? 0.5
|
||||
}
|
||||
|
||||
self.timestamp = Date()
|
||||
}
|
||||
}
|
||||
|
||||
struct GazeThresholds: Codable {
|
||||
let minLeftRatio: Double // Looking left threshold (e.g., 0.65)
|
||||
let maxRightRatio: Double // Looking right threshold (e.g., 0.35)
|
||||
let centerMin: Double // Center range minimum
|
||||
let centerMax: Double // Center range maximum
|
||||
// Horizontal Thresholds
|
||||
let minLeftRatio: Double // Looking left (≥ value)
|
||||
let maxRightRatio: Double // Looking right (≤ value)
|
||||
|
||||
// Vertical Thresholds
|
||||
let minUpRatio: Double // Looking up (≤ value, typically < 0.5)
|
||||
let maxDownRatio: Double // Looking down (≥ value, typically > 0.5)
|
||||
|
||||
// Screen Bounds (Calibration Zone)
|
||||
// Defines the rectangle of pupil ratios that correspond to looking AT the screen
|
||||
let screenLeftBound: Double
|
||||
let screenRightBound: Double
|
||||
let screenTopBound: Double
|
||||
let screenBottomBound: Double
|
||||
|
||||
// Reference Data for Distance Scaling
|
||||
let referenceFaceWidth: Double // Average face width during calibration
|
||||
|
||||
var isValid: Bool {
|
||||
// Ensure thresholds don't overlap
|
||||
return maxRightRatio < centerMin &&
|
||||
centerMin < centerMax &&
|
||||
centerMax < minLeftRatio
|
||||
// Basic sanity checks
|
||||
return maxRightRatio < minLeftRatio &&
|
||||
minUpRatio < maxDownRatio &&
|
||||
screenRightBound < screenLeftBound && // Assuming lower ratio = right
|
||||
screenTopBound < screenBottomBound // Assuming lower ratio = up
|
||||
}
|
||||
|
||||
static var defaultThresholds: GazeThresholds {
|
||||
GazeThresholds(
|
||||
minLeftRatio: 0.65,
|
||||
maxRightRatio: 0.35,
|
||||
centerMin: 0.40,
|
||||
centerMax: 0.60
|
||||
minUpRatio: 0.40,
|
||||
maxDownRatio: 0.60,
|
||||
screenLeftBound: 0.60,
|
||||
screenRightBound: 0.40,
|
||||
screenTopBound: 0.45,
|
||||
screenBottomBound: 0.55,
|
||||
referenceFaceWidth: 0.0 // 0.0 means unused/uncalibrated
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -137,62 +170,95 @@ struct CalibrationData: Codable {
|
||||
func averageRatio(for step: CalibrationStep) -> Double? {
|
||||
let stepSamples = getSamples(for: step)
|
||||
guard !stepSamples.isEmpty else { return nil }
|
||||
|
||||
let sum = stepSamples.reduce(0.0) { $0 + $1.averageRatio }
|
||||
return sum / Double(stepSamples.count)
|
||||
return stepSamples.reduce(0.0) { $0 + $1.averageRatio } / Double(stepSamples.count)
|
||||
}
|
||||
|
||||
func standardDeviation(for step: CalibrationStep) -> Double? {
|
||||
func averageVerticalRatio(for step: CalibrationStep) -> Double? {
|
||||
let stepSamples = getSamples(for: step)
|
||||
guard stepSamples.count > 1, let mean = averageRatio(for: step) else { return nil }
|
||||
|
||||
let variance = stepSamples.reduce(0.0) { sum, sample in
|
||||
let diff = sample.averageRatio - mean
|
||||
return sum + (diff * diff)
|
||||
} / Double(stepSamples.count - 1)
|
||||
|
||||
return sqrt(variance)
|
||||
guard !stepSamples.isEmpty else { return nil }
|
||||
return stepSamples.reduce(0.0) { $0 + $1.averageVerticalRatio } / Double(stepSamples.count)
|
||||
}
|
||||
|
||||
func averageFaceWidth(for step: CalibrationStep) -> Double? {
|
||||
let stepSamples = getSamples(for: step)
|
||||
let validSamples = stepSamples.compactMap { $0.faceWidthRatio }
|
||||
guard !validSamples.isEmpty else { return nil }
|
||||
return validSamples.reduce(0.0, +) / Double(validSamples.count)
|
||||
}
|
||||
|
||||
mutating func calculateThresholds() {
|
||||
// Need at least center, left, and right samples
|
||||
guard let centerMean = averageRatio(for: .center),
|
||||
let leftMean = averageRatio(for: .left),
|
||||
let rightMean = averageRatio(for: .right) else {
|
||||
print("⚠️ Insufficient calibration data to calculate thresholds")
|
||||
return
|
||||
}
|
||||
// We need Center, Left, Right, Up, Down samples for a full calibration
|
||||
// Fallback: If corners (TopLeft, etc.) are available, use them to reinforce bounds
|
||||
|
||||
let centerStdDev = standardDeviation(for: .center) ?? 0.05
|
||||
let centerH = averageRatio(for: .center) ?? 0.5
|
||||
let centerV = averageVerticalRatio(for: .center) ?? 0.5
|
||||
|
||||
// Calculate center range (mean ± 0.5 * std_dev)
|
||||
let centerMin = max(0.0, centerMean - 0.5 * centerStdDev)
|
||||
let centerMax = min(1.0, centerMean + 0.5 * centerStdDev)
|
||||
// 1. Horizontal Bounds
|
||||
// If specific Left/Right steps missing, try corners
|
||||
let leftH = averageRatio(for: .left) ?? averageRatio(for: .topLeft) ?? averageRatio(for: .bottomLeft) ?? (centerH + 0.15)
|
||||
let rightH = averageRatio(for: .right) ?? averageRatio(for: .topRight) ?? averageRatio(for: .bottomRight) ?? (centerH - 0.15)
|
||||
|
||||
// Calculate left threshold (midpoint between center and left extremes)
|
||||
let minLeftRatio = centerMax + (leftMean - centerMax) * 0.5
|
||||
// 2. Vertical Bounds
|
||||
let upV = averageVerticalRatio(for: .up) ?? averageVerticalRatio(for: .topLeft) ?? averageVerticalRatio(for: .topRight) ?? (centerV - 0.15)
|
||||
let downV = averageVerticalRatio(for: .down) ?? averageVerticalRatio(for: .bottomLeft) ?? averageVerticalRatio(for: .bottomRight) ?? (centerV + 0.15)
|
||||
|
||||
// Calculate right threshold (midpoint between center and right extremes)
|
||||
let maxRightRatio = centerMin - (centerMin - rightMean) * 0.5
|
||||
// 3. Face Width Reference (average of all center samples)
|
||||
let refFaceWidth = averageFaceWidth(for: .center) ?? 0.0
|
||||
|
||||
// Validate and adjust if needed
|
||||
var thresholds = GazeThresholds(
|
||||
minLeftRatio: min(0.95, max(0.55, minLeftRatio)),
|
||||
maxRightRatio: max(0.05, min(0.45, maxRightRatio)),
|
||||
centerMin: centerMin,
|
||||
centerMax: centerMax
|
||||
// 4. Compute Boundaries with Margin
|
||||
// "Screen Bound" is exactly where the user looked.
|
||||
// We set thresholds slightly BEYOND that to detect "Looking Away".
|
||||
|
||||
// Note: Assuming standard coordinates where:
|
||||
// Horizontal: 0.0 (Right) -> 1.0 (Left)
|
||||
// Vertical: 0.0 (Up) -> 1.0 (Down)
|
||||
|
||||
// Thresholds for "Looking Away"
|
||||
// Looking Left = Ratio > Screen Left Edge
|
||||
let lookLeftThreshold = leftH + 0.05
|
||||
// Looking Right = Ratio < Screen Right Edge
|
||||
let lookRightThreshold = rightH - 0.05
|
||||
|
||||
// Looking Up = Ratio < Screen Top Edge
|
||||
let lookUpThreshold = upV - 0.05
|
||||
// Looking Down = Ratio > Screen Bottom Edge
|
||||
let lookDownThreshold = downV + 0.05
|
||||
|
||||
let thresholds = GazeThresholds(
|
||||
minLeftRatio: lookLeftThreshold,
|
||||
maxRightRatio: lookRightThreshold,
|
||||
minUpRatio: lookUpThreshold,
|
||||
maxDownRatio: lookDownThreshold,
|
||||
screenLeftBound: leftH,
|
||||
screenRightBound: rightH,
|
||||
screenTopBound: upV,
|
||||
screenBottomBound: downV,
|
||||
referenceFaceWidth: refFaceWidth
|
||||
)
|
||||
|
||||
// Ensure no overlap
|
||||
if !thresholds.isValid {
|
||||
print("⚠️ Computed thresholds overlap, using defaults")
|
||||
thresholds = GazeThresholds.defaultThresholds
|
||||
}
|
||||
|
||||
self.computedThresholds = thresholds
|
||||
print("✓ Calibration thresholds calculated:")
|
||||
print(" Left: ≥\(String(format: "%.3f", thresholds.minLeftRatio))")
|
||||
print(" Center: \(String(format: "%.3f", thresholds.centerMin))-\(String(format: "%.3f", thresholds.centerMax))")
|
||||
print(" Right: ≤\(String(format: "%.3f", thresholds.maxRightRatio))")
|
||||
print(" H-Range: \(String(format: "%.3f", rightH)) to \(String(format: "%.3f", leftH))")
|
||||
print(" V-Range: \(String(format: "%.3f", upV)) to \(String(format: "%.3f", downV))")
|
||||
print(" Ref Face Width: \(String(format: "%.3f", refFaceWidth))")
|
||||
}
|
||||
}
|
||||
|
||||
/// Thread-safe storage for active calibration thresholds
|
||||
/// Allows non-isolated code (video processing) to read thresholds without hitting MainActor
|
||||
class CalibrationState: @unchecked Sendable {
|
||||
static let shared = CalibrationState()
|
||||
private let queue = DispatchQueue(label: "com.gaze.calibrationState", attributes: .concurrent)
|
||||
private var _thresholds: GazeThresholds?
|
||||
private var _isComplete: Bool = false
|
||||
|
||||
var thresholds: GazeThresholds? {
|
||||
get { queue.sync { _thresholds } }
|
||||
set { queue.async(flags: .barrier) { self._thresholds = newValue } }
|
||||
}
|
||||
|
||||
var isComplete: Bool {
|
||||
get { queue.sync { _isComplete } }
|
||||
set { queue.async(flags: .barrier) { self._isComplete = newValue } }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -56,10 +56,16 @@ class CalibrationManager: ObservableObject {
|
||||
calibrationData = CalibrationData()
|
||||
}
|
||||
|
||||
func collectSample(leftRatio: Double?, rightRatio: Double?) {
|
||||
func collectSample(leftRatio: Double?, rightRatio: Double?, leftVertical: Double? = nil, rightVertical: Double? = nil, faceWidthRatio: Double? = nil) {
|
||||
guard isCalibrating, let step = currentStep else { return }
|
||||
|
||||
let sample = GazeSample(leftRatio: leftRatio, rightRatio: rightRatio)
|
||||
let sample = GazeSample(
|
||||
leftRatio: leftRatio,
|
||||
rightRatio: rightRatio,
|
||||
leftVerticalRatio: leftVertical,
|
||||
rightVerticalRatio: rightVertical,
|
||||
faceWidthRatio: faceWidthRatio
|
||||
)
|
||||
calibrationData.addSample(sample, for: step)
|
||||
samplesCollected += 1
|
||||
|
||||
@@ -116,6 +122,10 @@ class CalibrationManager: ObservableObject {
|
||||
currentStepIndex = 0
|
||||
samplesCollected = 0
|
||||
calibrationData = CalibrationData()
|
||||
|
||||
// Reset thread-safe state
|
||||
CalibrationState.shared.isComplete = false
|
||||
CalibrationState.shared.thresholds = nil
|
||||
}
|
||||
|
||||
// MARK: - Persistence
|
||||
@@ -157,6 +167,11 @@ class CalibrationManager: ObservableObject {
|
||||
func clearCalibration() {
|
||||
UserDefaults.standard.removeObject(forKey: userDefaultsKey)
|
||||
calibrationData = CalibrationData()
|
||||
|
||||
// Reset thread-safe state
|
||||
CalibrationState.shared.isComplete = false
|
||||
CalibrationState.shared.thresholds = nil
|
||||
|
||||
print("🗑️ Calibration data cleared")
|
||||
}
|
||||
|
||||
@@ -185,19 +200,22 @@ class CalibrationManager: ObservableObject {
|
||||
|
||||
// MARK: - Apply Calibration
|
||||
|
||||
private func applyCalibration() {
|
||||
private func applyCalibration() {
|
||||
guard let thresholds = calibrationData.computedThresholds else {
|
||||
print("⚠️ No thresholds to apply")
|
||||
return
|
||||
}
|
||||
|
||||
// Note: EyeTrackingConstants are static properties that should not be modified.
|
||||
// Any calibrated values should be used separately in the logic, not stored back to the constants.
|
||||
// This is a placeholder for future implementation if dynamic threshold updates are needed.
|
||||
// Push to thread-safe state for background processing
|
||||
CalibrationState.shared.thresholds = thresholds
|
||||
CalibrationState.shared.isComplete = true
|
||||
|
||||
print("✓ Applied calibrated thresholds:")
|
||||
print(" Looking left: ≥\(String(format: "%.3f", thresholds.minLeftRatio))")
|
||||
print(" Looking right: ≤\(String(format: "%.3f", thresholds.maxRightRatio))")
|
||||
print(" Looking up: ≤\(String(format: "%.3f", thresholds.minUpRatio))")
|
||||
print(" Looking down: ≥\(String(format: "%.3f", thresholds.maxDownRatio))")
|
||||
print(" Screen Bounds: [\(String(format: "%.2f", thresholds.screenRightBound))..\(String(format: "%.2f", thresholds.screenLeftBound))] x [\(String(format: "%.2f", thresholds.screenTopBound))..\(String(format: "%.2f", thresholds.screenBottomBound))]")
|
||||
}
|
||||
|
||||
// MARK: - Statistics
|
||||
@@ -214,9 +232,9 @@ private func applyCalibration() {
|
||||
var summary = "Calibrated: \(dateFormatter.string(from: calibrationData.calibrationDate))\n"
|
||||
|
||||
if let thresholds = calibrationData.computedThresholds {
|
||||
summary += "Left threshold: \(String(format: "%.3f", thresholds.minLeftRatio))\n"
|
||||
summary += "Right threshold: \(String(format: "%.3f", thresholds.maxRightRatio))\n"
|
||||
summary += "Center range: \(String(format: "%.3f", thresholds.centerMin)) - \(String(format: "%.3f", thresholds.centerMax))"
|
||||
summary += "H-Range: \(String(format: "%.3f", thresholds.screenRightBound)) to \(String(format: "%.3f", thresholds.screenLeftBound))\n"
|
||||
summary += "V-Range: \(String(format: "%.3f", thresholds.screenTopBound)) to \(String(format: "%.3f", thresholds.screenBottomBound))\n"
|
||||
summary += "Ref Face Width: \(String(format: "%.3f", thresholds.referenceFaceWidth))"
|
||||
}
|
||||
|
||||
return summary
|
||||
|
||||
@@ -470,19 +470,66 @@ class EyeTrackingService: NSObject, ObservableObject {
|
||||
if let leftRatio = leftGazeRatio,
|
||||
let rightRatio = rightGazeRatio
|
||||
{
|
||||
let faceWidth = face.boundingBox.width
|
||||
|
||||
Task { @MainActor in
|
||||
if CalibrationManager.shared.isCalibrating {
|
||||
CalibrationManager.shared.collectSample(
|
||||
leftRatio: leftRatio,
|
||||
rightRatio: rightRatio
|
||||
rightRatio: rightRatio,
|
||||
leftVertical: leftVerticalRatio,
|
||||
rightVertical: rightVerticalRatio,
|
||||
faceWidthRatio: faceWidth
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
let avgRatio = (leftRatio + rightRatio) / 2.0
|
||||
let lookingRight = avgRatio <= EyeTrackingConstants.pixelGazeMinRatio
|
||||
let lookingLeft = avgRatio >= EyeTrackingConstants.pixelGazeMaxRatio
|
||||
eyesLookingAway = lookingRight || lookingLeft
|
||||
let avgH = (leftRatio + rightRatio) / 2.0
|
||||
// Use 0.5 as default for vertical if not available
|
||||
let avgV = (leftVerticalRatio != nil && rightVerticalRatio != nil)
|
||||
? (leftVerticalRatio! + rightVerticalRatio!) / 2.0
|
||||
: 0.5
|
||||
|
||||
// Use Calibrated Thresholds from thread-safe state
|
||||
if let thresholds = CalibrationState.shared.thresholds,
|
||||
CalibrationState.shared.isComplete {
|
||||
|
||||
// 1. Distance Scaling
|
||||
let currentFaceWidth = face.boundingBox.width
|
||||
let refFaceWidth = thresholds.referenceFaceWidth
|
||||
|
||||
var distanceScale = 1.0
|
||||
if refFaceWidth > 0 && currentFaceWidth > 0 {
|
||||
distanceScale = refFaceWidth / currentFaceWidth
|
||||
distanceScale = 1.0 + (distanceScale - 1.0) * EyeTrackingConstants.distanceSensitivity
|
||||
}
|
||||
|
||||
// 2. Normalize Gaze
|
||||
let centerH = (thresholds.screenLeftBound + thresholds.screenRightBound) / 2.0
|
||||
let centerV = (thresholds.screenTopBound + thresholds.screenBottomBound) / 2.0
|
||||
|
||||
let deltaH = (avgH - centerH) * distanceScale
|
||||
let deltaV = (avgV - centerV) * distanceScale
|
||||
|
||||
let normalizedH = centerH + deltaH
|
||||
let normalizedV = centerV + deltaV
|
||||
|
||||
// 3. Boundary Check
|
||||
let margin = EyeTrackingConstants.boundaryForgivenessMargin
|
||||
|
||||
let isLookingLeft = normalizedH > (thresholds.screenLeftBound + margin)
|
||||
let isLookingRight = normalizedH < (thresholds.screenRightBound - margin)
|
||||
let isLookingUp = normalizedV < (thresholds.screenTopBound - margin)
|
||||
let isLookingDown = normalizedV > (thresholds.screenBottomBound + margin)
|
||||
|
||||
eyesLookingAway = isLookingLeft || isLookingRight || isLookingUp || isLookingDown
|
||||
|
||||
} else {
|
||||
// Fallback to default constants
|
||||
let lookingRight = avgH <= EyeTrackingConstants.pixelGazeMinRatio
|
||||
let lookingLeft = avgH >= EyeTrackingConstants.pixelGazeMaxRatio
|
||||
eyesLookingAway = lookingRight || lookingLeft
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -621,6 +668,8 @@ class EyeTrackingService: NSObject, ObservableObject {
|
||||
{
|
||||
var leftGazeRatio: Double? = nil
|
||||
var rightGazeRatio: Double? = nil
|
||||
var leftVerticalRatio: Double? = nil
|
||||
var rightVerticalRatio: Double? = nil
|
||||
|
||||
// Detect left pupil (side = 0)
|
||||
if let leftResult = PupilDetector.detectPupil(
|
||||
@@ -634,6 +683,10 @@ class EyeTrackingService: NSObject, ObservableObject {
|
||||
pupilPosition: leftResult.pupilPosition,
|
||||
eyeRegion: leftResult.eyeRegion
|
||||
)
|
||||
leftVerticalRatio = calculateVerticalRatioSync(
|
||||
pupilPosition: leftResult.pupilPosition,
|
||||
eyeRegion: leftResult.eyeRegion
|
||||
)
|
||||
}
|
||||
|
||||
// Detect right pupil (side = 1)
|
||||
@@ -648,6 +701,10 @@ class EyeTrackingService: NSObject, ObservableObject {
|
||||
pupilPosition: rightResult.pupilPosition,
|
||||
eyeRegion: rightResult.eyeRegion
|
||||
)
|
||||
rightVerticalRatio = calculateVerticalRatioSync(
|
||||
pupilPosition: rightResult.pupilPosition,
|
||||
eyeRegion: rightResult.eyeRegion
|
||||
)
|
||||
}
|
||||
|
||||
// CRITICAL: Connect to CalibrationManager
|
||||
@@ -655,38 +712,115 @@ class EyeTrackingService: NSObject, ObservableObject {
|
||||
let leftRatio = leftGazeRatio,
|
||||
let rightRatio = rightGazeRatio
|
||||
{
|
||||
// Calculate face width ratio for distance estimation
|
||||
let faceWidthRatio = face.boundingBox.width
|
||||
|
||||
CalibrationManager.shared.collectSample(
|
||||
leftRatio: leftRatio,
|
||||
rightRatio: rightRatio
|
||||
rightRatio: rightRatio,
|
||||
leftVertical: leftVerticalRatio,
|
||||
rightVertical: rightVerticalRatio,
|
||||
faceWidthRatio: faceWidthRatio
|
||||
)
|
||||
}
|
||||
|
||||
// Determine looking away using calibrated thresholds
|
||||
if let leftRatio = leftGazeRatio, let rightRatio = rightGazeRatio {
|
||||
let avgRatio = (leftRatio + rightRatio) / 2.0
|
||||
let lookingRight = avgRatio <= EyeTrackingConstants.pixelGazeMinRatio
|
||||
let lookingLeft = avgRatio >= EyeTrackingConstants.pixelGazeMaxRatio
|
||||
eyesLookingAway = lookingRight || lookingLeft
|
||||
|
||||
if shouldLog {
|
||||
print(
|
||||
"👁️ PIXEL GAZE: L=\(String(format: "%.3f", leftRatio)) R=\(String(format: "%.3f", rightRatio)) Avg=\(String(format: "%.3f", avgRatio)) Away=\(eyesLookingAway)"
|
||||
)
|
||||
print(
|
||||
" Thresholds: Min=\(String(format: "%.3f", EyeTrackingConstants.pixelGazeMinRatio)) Max=\(String(format: "%.3f", EyeTrackingConstants.pixelGazeMaxRatio))"
|
||||
)
|
||||
// Determine looking away using calibrated thresholds
|
||||
if let leftRatio = leftGazeRatio, let rightRatio = rightGazeRatio {
|
||||
let avgH = (leftRatio + rightRatio) / 2.0
|
||||
// Use 0.5 as default for vertical if not available (though it should be)
|
||||
let avgV = (leftVerticalRatio != nil && rightVerticalRatio != nil)
|
||||
? (leftVerticalRatio! + rightVerticalRatio!) / 2.0
|
||||
: 0.5
|
||||
|
||||
// Use Calibrated Thresholds if available
|
||||
// Use thread-safe state instead of accessing CalibrationManager.shared (MainActor)
|
||||
if let thresholds = CalibrationState.shared.thresholds,
|
||||
CalibrationState.shared.isComplete {
|
||||
|
||||
// 1. Distance Scaling
|
||||
// If current face is SMALLER than reference, user is FURTHER away.
|
||||
// Eyes move LESS for same screen angle. We need to SCALE UP the deviation.
|
||||
let currentFaceWidth = face.boundingBox.width
|
||||
let refFaceWidth = thresholds.referenceFaceWidth
|
||||
|
||||
var distanceScale = 1.0
|
||||
if refFaceWidth > 0 && currentFaceWidth > 0 {
|
||||
// Simple linear scaling: scale = ref / current
|
||||
// e.g. Ref=0.5, Current=0.25 (further) -> Scale=2.0
|
||||
distanceScale = refFaceWidth / currentFaceWidth
|
||||
|
||||
// Apply sensitivity tuning
|
||||
distanceScale = 1.0 + (distanceScale - 1.0) * EyeTrackingConstants.distanceSensitivity
|
||||
}
|
||||
|
||||
// 2. Normalize Gaze (Center Relative)
|
||||
// We assume ~0.5 is center. We scale the delta from 0.5.
|
||||
// Note: This is an approximation. A better way uses the calibrated center.
|
||||
let centerH = (thresholds.screenLeftBound + thresholds.screenRightBound) / 2.0
|
||||
let centerV = (thresholds.screenTopBound + thresholds.screenBottomBound) / 2.0
|
||||
|
||||
let deltaH = (avgH - centerH) * distanceScale
|
||||
let deltaV = (avgV - centerV) * distanceScale
|
||||
|
||||
let normalizedH = centerH + deltaH
|
||||
let normalizedV = centerV + deltaV
|
||||
|
||||
// 3. Boundary Check with Margin
|
||||
// "Forgiveness" expands the safe zone (screen bounds).
|
||||
// If you are IN the margin, you are considered ON SCREEN (Safe).
|
||||
// Looking Away means passing the (Bound + Margin).
|
||||
|
||||
let margin = EyeTrackingConstants.boundaryForgivenessMargin
|
||||
|
||||
// Check Left (Higher Ratio)
|
||||
// Screen Left is e.g. 0.7. Looking Left > 0.7.
|
||||
// To look away, must exceed (0.7 + margin).
|
||||
let isLookingLeft = normalizedH > (thresholds.screenLeftBound + margin)
|
||||
|
||||
// Check Right (Lower Ratio)
|
||||
// Screen Right is e.g. 0.3. Looking Right < 0.3.
|
||||
// To look away, must be less than (0.3 - margin).
|
||||
let isLookingRight = normalizedH < (thresholds.screenRightBound - margin)
|
||||
|
||||
// Check Up (Lower Ratio, usually)
|
||||
let isLookingUp = normalizedV < (thresholds.screenTopBound - margin)
|
||||
|
||||
// Check Down (Higher Ratio, usually)
|
||||
let isLookingDown = normalizedV > (thresholds.screenBottomBound + margin)
|
||||
|
||||
eyesLookingAway = isLookingLeft || isLookingRight || isLookingUp || isLookingDown
|
||||
|
||||
if shouldLog {
|
||||
print("👁️ CALIBRATED GAZE: AvgH=\(String(format: "%.2f", avgH)) AvgV=\(String(format: "%.2f", avgV)) DistScale=\(String(format: "%.2f", distanceScale))")
|
||||
print(" NormH=\(String(format: "%.2f", normalizedH)) NormV=\(String(format: "%.2f", normalizedV)) Away=\(eyesLookingAway)")
|
||||
print(" Bounds: H[\(String(format: "%.2f", thresholds.screenRightBound))-\(String(format: "%.2f", thresholds.screenLeftBound))] V[\(String(format: "%.2f", thresholds.screenTopBound))-\(String(format: "%.2f", thresholds.screenBottomBound))]")
|
||||
}
|
||||
|
||||
} else {
|
||||
// Fallback to default constants
|
||||
let lookingRight = avgH <= EyeTrackingConstants.pixelGazeMinRatio
|
||||
let lookingLeft = avgH >= EyeTrackingConstants.pixelGazeMaxRatio
|
||||
eyesLookingAway = lookingRight || lookingLeft
|
||||
}
|
||||
|
||||
// Update debug values
|
||||
Task { @MainActor in
|
||||
debugLeftPupilRatio = leftGazeRatio
|
||||
debugRightPupilRatio = rightGazeRatio
|
||||
debugLeftVerticalRatio = leftVerticalRatio
|
||||
debugRightVerticalRatio = rightVerticalRatio
|
||||
}
|
||||
|
||||
if shouldLog && !CalibrationState.shared.isComplete {
|
||||
print(
|
||||
"👁️ RAW GAZE: L=\(String(format: "%.3f", leftRatio)) R=\(String(format: "%.3f", rightRatio)) Avg=\(String(format: "%.3f", avgH)) Away=\(eyesLookingAway)"
|
||||
)
|
||||
}
|
||||
} else {
|
||||
if shouldLog {
|
||||
print("⚠️ Pixel pupil detection failed for one or both eyes")
|
||||
}
|
||||
}
|
||||
|
||||
// Update debug values
|
||||
Task { @MainActor in
|
||||
debugLeftPupilRatio = leftGazeRatio
|
||||
debugRightPupilRatio = rightGazeRatio
|
||||
}
|
||||
} else {
|
||||
if shouldLog {
|
||||
if pixelBuffer == nil {
|
||||
|
||||
@@ -746,11 +746,24 @@ final class PupilDetector: @unchecked Sendable {
|
||||
) {
|
||||
let size = width * height
|
||||
guard size > 0 else { return }
|
||||
|
||||
// SIMPLIFIED: Skip blur to avoid contaminating dark pupil pixels with bright mask pixels
|
||||
// Apply binary threshold directly to input
|
||||
|
||||
// 1. Apply Gaussian Blur (reduces noise)
|
||||
// We reuse tempBuffer for intermediate steps if available, or just output
|
||||
// Note: gaussianBlurOptimized writes from input -> output
|
||||
gaussianBlurOptimized(input: input, output: output, width: width, height: height)
|
||||
|
||||
// 2. Apply Erosion (expands dark regions)
|
||||
// Python: cv2.erode(kernel, iterations=3)
|
||||
// This helps connect broken parts of the pupil
|
||||
// Note: erodeOptimized processes in-place on output if input==output
|
||||
erodeOptimized(input: output, output: output, width: width, height: height, iterations: 3)
|
||||
|
||||
// 3. Binary Threshold
|
||||
for i in 0..<size {
|
||||
output[i] = input[i] > UInt8(threshold) ? 255 : 0
|
||||
// Python: cv2.threshold(..., cv2.THRESH_BINARY)
|
||||
// Pixels > threshold become 255 (white), others 0 (black)
|
||||
// So Pupil is BLACK (0)
|
||||
output[i] = output[i] > UInt8(threshold) ? 255 : 0
|
||||
}
|
||||
}
|
||||
|
||||
@@ -851,47 +864,103 @@ final class PupilDetector: @unchecked Sendable {
|
||||
|
||||
// MARK: - Optimized Contour Detection
|
||||
|
||||
/// Optimized centroid-of-dark-pixels approach - much faster than union-find
|
||||
/// Returns the centroid of the largest dark region
|
||||
/// Finds the largest connected component of dark pixels and returns its centroid
|
||||
/// This is much more robust than averaging all dark pixels, as it ignores shadows/noise
|
||||
private nonisolated static func findPupilFromContoursOptimized(
|
||||
data: UnsafePointer<UInt8>,
|
||||
width: Int,
|
||||
height: Int
|
||||
) -> (x: Double, y: Double)? {
|
||||
|
||||
// Optimized approach: find centroid of all black pixels
|
||||
// This works well for pupil detection since the pupil is the main dark blob
|
||||
|
||||
var sumX: Int = 0
|
||||
var sumY: Int = 0
|
||||
var count: Int = 0
|
||||
|
||||
// After binary thresholding, pixels are 0 (black/pupil) or 255 (white/background)
|
||||
// Use threshold of 128 to catch any pixels that are closer to black
|
||||
let threshold = UInt8(128)
|
||||
|
||||
// Process entire image to get accurate centroid
|
||||
let size = width * height
|
||||
|
||||
// 1. Threshold pass: Identify all dark pixels (0)
|
||||
// We use a visited array to track processed pixels for flood fill
|
||||
// Using a flat bool array for performance
|
||||
var visited = [Bool](repeating: false, count: size)
|
||||
|
||||
var maxBlobSize = 0
|
||||
var maxBlobSumX = 0
|
||||
var maxBlobSumY = 0
|
||||
|
||||
// 2. Iterate through pixels to find connected components
|
||||
for y in 0..<height {
|
||||
let rowOffset = y * width
|
||||
for x in 0..<width {
|
||||
if data[rowOffset + x] < threshold {
|
||||
sumX += x
|
||||
sumY += y
|
||||
count += 1
|
||||
let idx = rowOffset + x
|
||||
|
||||
// If it's a dark pixel (0) and not visited, start a flood fill
|
||||
if data[idx] == 0 && !visited[idx] {
|
||||
var currentBlobSize = 0
|
||||
var currentBlobSumX = 0
|
||||
var currentBlobSumY = 0
|
||||
|
||||
// Stack for DFS/BFS (using array as stack is fast in Swift)
|
||||
var stack: [Int] = [idx]
|
||||
visited[idx] = true
|
||||
|
||||
while let currentIdx = stack.popLast() {
|
||||
let cx = currentIdx % width
|
||||
let cy = currentIdx / width
|
||||
|
||||
currentBlobSize += 1
|
||||
currentBlobSumX += cx
|
||||
currentBlobSumY += cy
|
||||
|
||||
// Check 4 neighbors
|
||||
// Right
|
||||
if cx + 1 < width {
|
||||
let nIdx = currentIdx + 1
|
||||
if data[nIdx] == 0 && !visited[nIdx] {
|
||||
visited[nIdx] = true
|
||||
stack.append(nIdx)
|
||||
}
|
||||
}
|
||||
// Left
|
||||
if cx - 1 >= 0 {
|
||||
let nIdx = currentIdx - 1
|
||||
if data[nIdx] == 0 && !visited[nIdx] {
|
||||
visited[nIdx] = true
|
||||
stack.append(nIdx)
|
||||
}
|
||||
}
|
||||
// Down
|
||||
if cy + 1 < height {
|
||||
let nIdx = currentIdx + width
|
||||
if data[nIdx] == 0 && !visited[nIdx] {
|
||||
visited[nIdx] = true
|
||||
stack.append(nIdx)
|
||||
}
|
||||
}
|
||||
// Up
|
||||
if cy - 1 >= 0 {
|
||||
let nIdx = currentIdx - width
|
||||
if data[nIdx] == 0 && !visited[nIdx] {
|
||||
visited[nIdx] = true
|
||||
stack.append(nIdx)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check if this is the largest blob so far
|
||||
if currentBlobSize > maxBlobSize {
|
||||
maxBlobSize = currentBlobSize
|
||||
maxBlobSumX = currentBlobSumX
|
||||
maxBlobSumY = currentBlobSumY
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if enableDiagnosticLogging && count < 5 {
|
||||
logDebug("👁 PupilDetector: Dark pixel count = \(count) (need >= 5)")
|
||||
if enableDiagnosticLogging && maxBlobSize < 5 {
|
||||
logDebug("👁 PupilDetector: Largest blob size = \(maxBlobSize) (need >= 5)")
|
||||
}
|
||||
|
||||
// Minimum 5 pixels for valid pupil (reduced from 10 for small eye regions)
|
||||
guard count >= 5 else { return nil }
|
||||
// Minimum 5 pixels for valid pupil
|
||||
guard maxBlobSize >= 5 else { return nil }
|
||||
|
||||
return (
|
||||
x: Double(sumX) / Double(count),
|
||||
y: Double(sumY) / Double(count)
|
||||
x: Double(maxBlobSumX) / Double(maxBlobSize),
|
||||
y: Double(maxBlobSumY) / Double(maxBlobSize)
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user