current org
This commit is contained in:
34
tasks/FRE-17.yaml
Normal file
34
tasks/FRE-17.yaml
Normal file
@@ -0,0 +1,34 @@
|
||||
---
|
||||
date: 2026-03-08
|
||||
day_of_week: Sunday
|
||||
task_id: FRE-17
|
||||
title: Add Memory-Efficient Model Loading
|
||||
status: todo
|
||||
company_id: FrenoCorp
|
||||
objective: Implement gradient checkpointing and mixed precision for lower VRAM usage
|
||||
context: |
|
||||
- Qwen3-TTS 1.7B may not fit in low-end GPUs
|
||||
- Gradient checkpointing trades compute for memory
|
||||
- Mixed precision (FP16) reduces memory by half
|
||||
issue_type: enhancement
|
||||
priority: medium
|
||||
assignee: Atlas
|
||||
parent_task: FRE-32
|
||||
goal_id: MVP_Pipeline_Working
|
||||
blocking_tasks: []
|
||||
expected_outcome: |
|
||||
- Model runs on GPUs with <8GB VRAM
|
||||
- Configurable precision (FP32/FP16/BF16)
|
||||
- Graceful degradation when memory insufficient
|
||||
acceptance_criteria:
|
||||
- FP16 mode reduces memory usage by ~50%
|
||||
- Gradient checkpointing option available
|
||||
- Clear error when memory still insufficient
|
||||
|
||||
notes:
|
||||
- Use torch.cuda.amp for mixed precision
|
||||
- Set gradient_checkpointing=True in model config
|
||||
|
||||
links:
|
||||
tts_model: /home/mike/code/AudiobookPipeline/src/generation/tts_model.py
|
||||
---
|
||||
Reference in New Issue
Block a user