Add LuaJIT FFI optimizations for memory management

- New FFI module with object pooling for Vec2, Rect, Timer structs
- Integrated FFI into LayoutEngine, Performance, and Color modules
- Graceful fallback to standard Lua when LuaJIT unavailable
- Added ffi_comparison_profile.lua for automated benchmarking
- Comprehensive documentation of gains and real bottlenecks

Reality: 5-10% performance improvement (marginal gains)
FFI targets wrong bottleneck - real issue is O(n²) layout algorithm
See PERFORMANCE_ANALYSIS.md for high-impact optimizations (2-3x gains)
This commit is contained in:
Michael Freno
2025-12-05 14:35:37 -05:00
parent ddb708a920
commit 4652f05dac
8 changed files with 1274 additions and 13 deletions

View File

@@ -4,14 +4,18 @@
---@field b number Blue component (0-1)
---@field a number Alpha component (0-1)
---@field _ErrorHandler table? ErrorHandler module dependency
---@field _FFI table? FFI module dependency
---@field _useFFI boolean Whether to use FFI optimizations
local Color = {}
Color.__index = Color
--- Initialize module with shared dependencies
---@param deps table Dependencies {ErrorHandler}
---@param deps table Dependencies {ErrorHandler, FFI}
function Color.init(deps)
if type(deps) == "table" then
Color._ErrorHandler = deps.ErrorHandler
Color._FFI = deps.FFI
Color._useFFI = deps.FFI and deps.FFI.enabled or false
end
end
@@ -23,14 +27,16 @@ end
---@param a number? Alpha component (0-1), defaults to 1
---@return Color color The new color instance
function Color.new(r, g, b, a)
local self = setmetatable({}, Color)
-- Sanitize and clamp color components
local _, sanitizedR = Color.validateColorChannel(r or 0, 1)
local _, sanitizedG = Color.validateColorChannel(g or 0, 1)
local _, sanitizedB = Color.validateColorChannel(b or 0, 1)
local _, sanitizedA = Color.validateColorChannel(a or 1, 1)
-- Note: We don't use FFI for colors because they need methods (toRGBA, etc.)
-- FFI structs don't support metatables/methods without wrapping
-- The wrapping overhead negates the FFI benefits
local self = setmetatable({}, Color)
self.r = sanitizedR or 0
self.g = sanitizedG or 0
self.b = sanitizedB or 0
@@ -337,13 +343,16 @@ function Color.lerp(colorA, colorB, t)
-- Clamp t to 0-1 range
t = math.max(0, math.min(1, t))
-- Linear interpolation for each channel
local r = colorA.r * (1 - t) + colorB.r * t
local g = colorA.g * (1 - t) + colorB.g * t
local b = colorA.b * (1 - t) + colorB.b * t
local a = colorA.a * (1 - t) + colorB.a * t
-- Linear interpolation for each channel (optimized for both FFI and Lua)
local oneMinusT = 1 - t
local r = colorA.r * oneMinusT + colorB.r * t
local g = colorA.g * oneMinusT + colorB.g * t
local b = colorA.b * oneMinusT + colorB.b * t
local a = colorA.a * oneMinusT + colorB.a * t
return Color.new(r, g, b, a)
end
return Color

488
modules/FFI.lua Normal file
View File

@@ -0,0 +1,488 @@
---@class FFI
---@field enabled boolean Whether FFI is available and enabled
---@field _ffi table? LuaJIT FFI library reference
---@field _ColorStruct table? FFI color struct type
---@field _Vec2Struct table? FFI vec2 struct type
---@field _RectStruct table? FFI rect struct type
---@field _TimerStruct table? FFI timer struct type
---@field _colorPool table Pool of reusable color structs
---@field _vec2Pool table Pool of reusable vec2 structs
---@field _rectPool table Pool of reusable rect structs
---@field _ErrorHandler ErrorHandler
local FFI = {}
FFI.__index = FFI
---@type FFI|nil
local instance = nil
--- Initialize FFI module
---@param deps {ErrorHandler: ErrorHandler}
---@return FFI
function FFI.init(deps)
if instance then
return instance
end
local self = setmetatable({}, FFI)
self._ErrorHandler = deps.ErrorHandler
self.enabled = false
self._ffi = nil
-- Try to load LuaJIT FFI
local success, ffi = pcall(require, "ffi")
if success and ffi then
self._ffi = ffi
self.enabled = true
-- Define FFI structs
self:_defineStructs()
-- Initialize object pools
self:_initializePools()
-- FFI successfully enabled
else
-- FFI not available (not running on LuaJIT)
end
instance = self
return self
end
--- Define FFI struct types
function FFI:_defineStructs()
local ffi = self._ffi
if not ffi or not ffi.cdef then
self.enabled = false
return
end
-- Wrap in pcall to handle any FFI definition errors
local success, err = pcall(function()
-- Color struct (16 bytes - 4 floats)
ffi.cdef([[
typedef struct {
float r;
float g;
float b;
float a;
} FlexLove_Color;
]])
-- Vec2 struct (8 bytes - 2 floats)
ffi.cdef([[
typedef struct {
float x;
float y;
} FlexLove_Vec2;
]])
-- Rect struct (16 bytes - 4 floats)
ffi.cdef([[
typedef struct {
float x;
float y;
float width;
float height;
} FlexLove_Rect;
]])
-- Timer struct (16 bytes - 2 doubles)
ffi.cdef([[
typedef struct {
double startTime;
double elapsed;
} FlexLove_Timer;
]])
end)
if not success then
-- FFI definition failed, disable FFI
self.enabled = false
return
end
-- Cache struct types
self._ColorStruct = ffi.typeof("FlexLove_Color")
self._Vec2Struct = ffi.typeof("FlexLove_Vec2")
self._RectStruct = ffi.typeof("FlexLove_Rect")
self._TimerStruct = ffi.typeof("FlexLove_Timer")
end
--- Initialize object pools for reuse
function FFI:_initializePools()
self._colorPool = {
available = {},
inUse = {},
maxSize = 1000,
}
self._vec2Pool = {
available = {},
inUse = {},
maxSize = 2000,
}
self._rectPool = {
available = {},
inUse = {},
maxSize = 500,
}
end
--- Create a new color struct (pooled)
--- Note: Not used by Color module due to method requirement
--- Available for internal FFI operations that don't need methods
---@param r number Red component (0-1)
---@param g number Green component (0-1)
---@param b number Blue component (0-1)
---@param a number Alpha component (0-1)
---@return table color FFI color struct
function FFI:createColor(r, g, b, a)
if not self.enabled then
return { r = r, g = g, b = b, a = a }
end
local color
local pool = self._colorPool
-- Try to reuse from pool
if #pool.available > 0 then
color = table.remove(pool.available)
else
color = self._ColorStruct()
end
-- Set values
color.r = r or 0
color.g = g or 0
color.b = b or 0
color.a = a or 1
-- Track in use
pool.inUse[color] = true
return color
end
--- Release a color struct back to the pool
---@param color table FFI color struct
function FFI:releaseColor(color)
if not self.enabled or not color then
return
end
local pool = self._colorPool
-- Remove from in-use tracking
if pool.inUse[color] then
pool.inUse[color] = nil
-- Return to pool if not at max size
if #pool.available < pool.maxSize then
table.insert(pool.available, color)
end
end
end
--- Create a new vec2 struct (pooled)
---@param x number X component
---@param y number Y component
---@return table vec2 FFI vec2 struct
function FFI:createVec2(x, y)
if not self.enabled then
return { x = x, y = y }
end
local vec2
local pool = self._vec2Pool
-- Try to reuse from pool
if #pool.available > 0 then
vec2 = table.remove(pool.available)
else
vec2 = self._Vec2Struct()
end
-- Set values
vec2.x = x or 0
vec2.y = y or 0
-- Track in use
pool.inUse[vec2] = true
return vec2
end
--- Release a vec2 struct back to the pool
---@param vec2 table FFI vec2 struct
function FFI:releaseVec2(vec2)
if not self.enabled or not vec2 then
return
end
local pool = self._vec2Pool
-- Remove from in-use tracking
if pool.inUse[vec2] then
pool.inUse[vec2] = nil
-- Return to pool if not at max size
if #pool.available < pool.maxSize then
table.insert(pool.available, vec2)
end
end
end
--- Create a new rect struct (pooled)
---@param x number X position
---@param y number Y position
---@param width number Width
---@param height number Height
---@return table rect FFI rect struct
function FFI:createRect(x, y, width, height)
if not self.enabled then
return { x = x, y = y, width = width, height = height }
end
local rect
local pool = self._rectPool
-- Try to reuse from pool
if #pool.available > 0 then
rect = table.remove(pool.available)
else
rect = self._RectStruct()
end
-- Set values
rect.x = x or 0
rect.y = y or 0
rect.width = width or 0
rect.height = height or 0
-- Track in use
pool.inUse[rect] = true
return rect
end
--- Release a rect struct back to the pool
---@param rect table FFI rect struct
function FFI:releaseRect(rect)
if not self.enabled or not rect then
return
end
local pool = self._rectPool
-- Remove from in-use tracking
if pool.inUse[rect] then
pool.inUse[rect] = nil
-- Return to pool if not at max size
if #pool.available < pool.maxSize then
table.insert(pool.available, rect)
end
end
end
--- Create a new timer struct
---@return table timer FFI timer struct
function FFI:createTimer()
if not self.enabled then
return { startTime = 0, elapsed = 0 }
end
local timer = self._TimerStruct()
timer.startTime = 0
timer.elapsed = 0
return timer
end
--- Allocate a contiguous array of colors (for batch operations)
---@param count number Number of colors to allocate
---@return table colors FFI color array
function FFI:allocateColorArray(count)
if not self.enabled then
local colors = {}
for i = 1, count do
colors[i] = { r = 0, g = 0, b = 0, a = 1 }
end
return colors
end
return self._ffi.new("FlexLove_Color[?]", count)
end
--- Allocate a contiguous array of vec2s (for batch operations)
---@param count number Number of vec2s to allocate
---@return table vec2s FFI vec2 array
function FFI:allocateVec2Array(count)
if not self.enabled then
local vec2s = {}
for i = 1, count do
vec2s[i] = { x = 0, y = 0 }
end
return vec2s
end
return self._ffi.new("FlexLove_Vec2[?]", count)
end
--- Allocate a contiguous array of rects (for batch operations)
---@param count number Number of rects to allocate
---@return table rects FFI rect array
function FFI:allocateRectArray(count)
if not self.enabled then
local rects = {}
for i = 1, count do
rects[i] = { x = 0, y = 0, width = 0, height = 0 }
end
return rects
end
return self._ffi.new("FlexLove_Rect[?]", count)
end
--- Clear all object pools (useful for cleanup)
function FFI:clearPools()
if not self.enabled then
return
end
-- Clear color pool
self._colorPool.available = {}
self._colorPool.inUse = {}
-- Clear vec2 pool
self._vec2Pool.available = {}
self._vec2Pool.inUse = {}
-- Clear rect pool
self._rectPool.available = {}
self._rectPool.inUse = {}
end
--- Get pool statistics (for debugging)
---@return table stats Pool statistics
function FFI:getPoolStats()
if not self.enabled then
return {
enabled = false,
colors = { available = 0, inUse = 0 },
vec2s = { available = 0, inUse = 0 },
rects = { available = 0, inUse = 0 },
}
end
local function countInUse(pool)
local count = 0
for _ in pairs(pool.inUse) do
count = count + 1
end
return count
end
return {
enabled = true,
colors = {
available = #self._colorPool.available,
inUse = countInUse(self._colorPool),
maxSize = self._colorPool.maxSize,
},
vec2s = {
available = #self._vec2Pool.available,
inUse = countInUse(self._vec2Pool),
maxSize = self._vec2Pool.maxSize,
},
rects = {
available = #self._rectPool.available,
inUse = countInUse(self._rectPool),
maxSize = self._rectPool.maxSize,
},
}
end
--- Copy color values from FFI struct to Lua table (for compatibility)
---@param ffiColor table FFI color struct
---@return table color Lua table with r, g, b, a fields
function FFI:colorToTable(ffiColor)
return {
r = ffiColor.r,
g = ffiColor.g,
b = ffiColor.b,
a = ffiColor.a,
}
end
--- Copy vec2 values from FFI struct to Lua table (for compatibility)
---@param ffiVec2 table FFI vec2 struct
---@return table vec2 Lua table with x, y fields
function FFI:vec2ToTable(ffiVec2)
return {
x = ffiVec2.x,
y = ffiVec2.y,
}
end
--- Copy rect values from FFI struct to Lua table (for compatibility)
---@param ffiRect table FFI rect struct
---@return table rect Lua table with x, y, width, height fields
function FFI:rectToTable(ffiRect)
return {
x = ffiRect.x,
y = ffiRect.y,
width = ffiRect.width,
height = ffiRect.height,
}
end
--- Batch color multiplication (for opacity/tint operations)
---@param colors table Array of FFI color structs
---@param count number Number of colors
---@param multiplier number Multiplier value (0-1)
function FFI:batchMultiplyColors(colors, count, multiplier)
if not self.enabled then
for i = 1, count do
local c = colors[i]
c.r = c.r * multiplier
c.g = c.g * multiplier
c.b = c.b * multiplier
c.a = c.a * multiplier
end
return
end
-- FFI arrays are 0-indexed
for i = 0, count - 1 do
colors[i].r = colors[i].r * multiplier
colors[i].g = colors[i].g * multiplier
colors[i].b = colors[i].b * multiplier
colors[i].a = colors[i].a * multiplier
end
end
--- Batch vec2 addition (for offset operations)
---@param vec2s table Array of FFI vec2 structs
---@param count number Number of vec2s
---@param offsetX number X offset
---@param offsetY number Y offset
function FFI:batchAddVec2s(vec2s, count, offsetX, offsetY)
if not self.enabled then
for i = 1, count do
local v = vec2s[i]
v.x = v.x + offsetX
v.y = v.y + offsetY
end
return
end
-- FFI arrays are 0-indexed
for i = 0, count - 1 do
vec2s[i].x = vec2s[i].x + offsetX
vec2s[i].y = vec2s[i].y + offsetY
end
end
return FFI

View File

@@ -25,14 +25,18 @@
---@field _lastFrameCount number Last frame number for resetting counters
---@field _ErrorHandler ErrorHandler? ErrorHandler module dependency
---@field _Performance Performance? Performance module dependency
---@field _FFI table? FFI module dependency
---@field _useFFI boolean Whether to use FFI optimizations
local LayoutEngine = {}
LayoutEngine.__index = LayoutEngine
--- Initialize module with shared dependencies
---@param deps table Dependencies {ErrorHandler, Performance}
---@param deps table Dependencies {ErrorHandler, Performance, FFI}
function LayoutEngine.init(deps)
LayoutEngine._ErrorHandler = deps.ErrorHandler
LayoutEngine._Performance = deps.Performance
LayoutEngine._FFI = deps.FFI
LayoutEngine._useFFI = deps.FFI and deps.FFI.enabled or false
end
---@class LayoutEngineProps
@@ -163,6 +167,61 @@ function LayoutEngine:applyPositioningOffsets(child)
end
end
--- Batch calculate child positions using FFI (optimization for large child counts)
---@param children table Array of child elements
---@param startX number Starting X position
---@param startY number Starting Y position
---@param spacing number Spacing between children
---@param isHorizontal boolean True if horizontal layout
---@return table positions Array of {x, y} positions
function LayoutEngine:_batchCalculatePositions(children, startX, startY, spacing, isHorizontal)
local count = #children
-- Use FFI for batch calculations if available and count is large enough
if LayoutEngine._useFFI and LayoutEngine._FFI and count > 10 then
local positions = LayoutEngine._FFI:allocateVec2Array(count)
local currentPos = isHorizontal and startX or startY
for i = 0, count - 1 do
local child = children[i + 1] -- Lua is 1-indexed
if isHorizontal then
positions[i].x = currentPos + child.margin.left
positions[i].y = startY + child.margin.top
currentPos = currentPos + child:getBorderBoxWidth() + child.margin.left + child.margin.right + spacing
else
positions[i].x = startX + child.margin.left
positions[i].y = currentPos + child.margin.top
currentPos = currentPos + child:getBorderBoxHeight() + child.margin.top + child.margin.bottom + spacing
end
end
return positions
end
-- Fallback to Lua table
local positions = {}
local currentPos = isHorizontal and startX or startY
for i, child in ipairs(children) do
if isHorizontal then
positions[i] = {
x = currentPos + child.margin.left,
y = startY + child.margin.top
}
currentPos = currentPos + child:getBorderBoxWidth() + child.margin.left + child.margin.right + spacing
else
positions[i] = {
x = startX + child.margin.left,
y = currentPos + child.margin.top
}
currentPos = currentPos + child:getBorderBoxHeight() + child.margin.top + child.margin.bottom + spacing
end
end
return positions
end
--- Layout children within this element according to positioning mode
function LayoutEngine:layoutChildren()
-- Start performance timing first (before any early returns)

View File

@@ -9,6 +9,8 @@
---@field logWarnings boolean
---@field warningsEnabled boolean
---@field _ErrorHandler table?
---@field _FFI table?
---@field _useFFI boolean
---@field _timers table
---@field _metrics table
---@field _lastMetricsCleanup number
@@ -30,7 +32,7 @@ local MAX_METRICS_COUNT = 500
local CORE_METRICS = { frame = true, layout = true, render = true }
---@param config {enabled?: boolean, hudEnabled?: boolean, hudToggleKey?: string, hudPosition?: {x: number, y: number}, warningThresholdMs?: number, criticalThresholdMs?: number, logToConsole?: boolean, logWarnings?: boolean, warningsEnabled?: boolean, memoryProfiling?: boolean}?
---@param deps {ErrorHandler: ErrorHandler}
---@param deps {ErrorHandler: ErrorHandler, FFI: table?}
---@return Performance
function Performance.init(config, deps)
if instance == nil then
@@ -47,6 +49,10 @@ function Performance.init(config, deps)
self.logWarnings = config and config.logWarnings or true
self.warningsEnabled = config and config.warningsEnabled or true
-- FFI optimization
self._FFI = deps and deps.FFI
self._useFFI = self._FFI and self._FFI.enabled or false
self._timers = {}
self._metrics = {}
self._lastMetricsCleanup = 0