automatic failover

This commit is contained in:
2026-05-31 02:01:37 -04:00
parent 925e37938b
commit d2ef124369
2 changed files with 103 additions and 58 deletions

View File

@@ -122,6 +122,10 @@ prompts:
> tasks, only the first two models are used. The third model is only touched when > tasks, only the first two models are used. The third model is only touched when
> a third concurrent task starts. Freed model slots are reused before new ones > a third concurrent task starts. Freed model slots are reused before new ones
> are allocated. > are allocated.
>
> **Automatic failover**: if a provider/API is unreachable (rate limit, 503, etc.),
> the task automatically cycles to the next model in the list without counting it
> as a task failure. Each model is tried once before the task is marked as failed.
The keys mirror the nested structure of `RalpiConfig` in `src/types.ts`. The keys mirror the nested structure of `RalpiConfig` in `src/types.ts`.

View File

@@ -54,6 +54,10 @@ class ModelRoundRobin {
this.freeSlots = []; this.freeSlots = [];
} }
get length(): number {
return this.models.length;
}
assign(taskId: string): unknown { assign(taskId: string): unknown {
let index: number; let index: number;
if (this.freeSlots.length > 0) { if (this.freeSlots.length > 0) {
@@ -385,8 +389,8 @@ export async function executeBatch(
projectDir, projectDir,
undefined, undefined,
model, model,
roundRobin,
); );
roundRobin?.release(task.id);
} }
} }
@@ -483,7 +487,8 @@ async function executeBatchParallel(
projectDir, projectDir,
sharedState, sharedState,
assignedModel, assignedModel,
).finally(() => roundRobin?.release(task.id)), roundRobin,
),
}); });
// Limit concurrency // Limit concurrency
@@ -514,75 +519,111 @@ async function executeTask(
projectDir: string = project.sourceDir, projectDir: string = project.sourceDir,
parallelState?: ParallelWidgetState, parallelState?: ParallelWidgetState,
assignedModel?: unknown, assignedModel?: unknown,
roundRobin?: ModelRoundRobin | null,
): Promise<void> { ): Promise<void> {
const maxRetries = config.execution.maxRetries; const maxRetries = config.execution.maxRetries;
let retries = 0;
while (retries <= maxRetries) { // Model failover: when a provider/API is down, cycle through available models.
try { // result.success === false always means an agent-session failure (API error,
// Mark as in progress // provider unreachable, etc.), not a task-work error.
progress.markInProgress(task.id); const maxModelAttempts = roundRobin ? roundRobin.length : 1;
let modelAttempt = 0;
let currentModel: unknown = assignedModel ?? config.model;
// Get dependency reflections while (modelAttempt < maxModelAttempts) {
const depReflections = progress.getDependencyReflections( // Get the next model from round-robin (on first try, use the pre-assigned model)
task.dependencies || [], if (modelAttempt > 0 && roundRobin) {
); currentModel = roundRobin.assign(task.id);
}
// Run the task let retries = 0;
const result = await runTask( while (retries <= maxRetries) {
task, try {
project, // Mark as in progress
config, progress.markInProgress(task.id);
depReflections,
ctx,
sendChatMessage,
projectDir,
parallelState,
assignedModel,
);
if (result.success) { // Get dependency reflections
// Save reflection const depReflections = progress.getDependencyReflections(
if (result.reflection) { task.dependencies || [],
saveReflectionToFile(projectDir, config, result.reflection); );
// Run the task
const result = await runTask(
task,
project,
config,
depReflections,
ctx,
sendChatMessage,
projectDir,
parallelState,
currentModel,
);
if (result.success) {
// Save reflection
if (result.reflection) {
saveReflectionToFile(projectDir, config, result.reflection);
}
// Mark completed with all metadata
progress.markCompleted(
task.id,
result.durationMs,
result.reflection,
result.toolUsage,
result.sessionFile,
result.outputPreview,
result.commitMessages,
result.commitSummary,
);
roundRobin?.release(task.id);
return;
} }
// Mark completed with all metadata // Agent session failed (provider error).
progress.markCompleted( // If we have more models, cycle immediately — don't waste retries.
task.id, if (roundRobin && modelAttempt < maxModelAttempts - 1) {
result.durationMs, roundRobin.release(task.id);
result.reflection, modelAttempt++;
result.toolUsage, ctx.ui.notify(
result.sessionFile, `Task ${task.id}: model failed, trying next (${modelAttempt + 1}/${maxModelAttempts}): ${result.error}`,
result.outputPreview, "warning",
result.commitMessages, );
result.commitSummary, break; // exit retry loop, cycle to next model
); }
return;
}
// Task failed, check if we should retry // No more models — use normal retry logic
if (retries < maxRetries) { if (retries < maxRetries) {
retries = progress.incrementRetry(task.id); retries = progress.incrementRetry(task.id);
ctx.ui.notify( ctx.ui.notify(
`Retrying task ${task.id} (${retries}/${maxRetries}): ${result.error}`, `Retrying task ${task.id} (${retries}/${maxRetries}): ${result.error}`,
"warning", "warning",
); );
// Exponential backoff // Exponential backoff
const delay = config.execution.retryDelayMs * 2 ** (retries - 1); const delay = config.execution.retryDelayMs * 2 ** (retries - 1);
await sleep(delay); await sleep(delay);
} else { } else {
// Max retries exceeded // Max retries exceeded
progress.markFailed(task.id, result.error || "Unknown error"); progress.markFailed(task.id, result.error || "Unknown error");
throw new Error(`Task ${task.id} failed: ${result.error}`); throw new Error(`Task ${task.id} failed: ${result.error}`);
}
} catch (error) {
roundRobin?.release(task.id);
const errorMsg = error instanceof Error ? error.message : String(error);
progress.markFailed(task.id, errorMsg);
throw error;
} }
} catch (error) {
const errorMsg = error instanceof Error ? error.message : String(error);
progress.markFailed(task.id, errorMsg);
throw error;
} }
// If we broke out (model cycling), continue the outer loop
modelAttempt++;
} }
// All models exhausted
progress.markFailed(task.id, "All configured models exhausted");
throw new Error(`Task ${task.id} failed: all configured models exhausted`);
} }
// ─── Save Reflection to File ──────────────────────────────────────────────── // ─── Save Reflection to File ────────────────────────────────────────────────