Initial commit: deep-research extension

This commit is contained in:
2026-05-31 13:13:18 -04:00
commit c104d2ed14
12 changed files with 1658 additions and 0 deletions

155
src/agent.ts Normal file
View File

@@ -0,0 +1,155 @@
/**
* Deep Research — Agent Session helper
*
* Uses pi's in-process `createAgentSession` for LLM subtasks
* (query generation, result analysis, report synthesis).
* Pattern borrowed from ralpi's runAgentSession().
*/
import {
createAgentSession,
DefaultResourceLoader,
getAgentDir,
SessionManager,
} from "@earendil-works/pi-coding-agent";
import type { AgentSessionEvent } from "@earendil-works/pi-coding-agent";
/** Aggregate tool usage stats */
export interface ToolUsage {
read: number;
write: number;
edit: number;
bash: number;
other: number;
}
export interface AgentResult {
success: boolean;
text: string;
error?: string;
toolUsage: ToolUsage;
}
/**
* Run a prompt through an in-process Pi agent session.
* Non-blocking — the event loop stays responsive.
*/
export async function runAnalysisAgent(
systemPrompt: string,
taskPrompt: string,
cwd: string,
timeoutMs: number = 120_000,
onEvent?: (event: AgentSessionEvent) => void,
signal?: AbortSignal,
): Promise<AgentResult> {
const toolUsage: ToolUsage = {
read: 0,
write: 0,
edit: 0,
bash: 0,
other: 0,
};
let timeoutHandle: ReturnType<typeof setTimeout> | null = null;
if (timeoutMs > 0) {
timeoutHandle = setTimeout(() => {
sessionRef.session?.agent.abort();
}, timeoutMs);
}
const sessionRef: {
session?: Awaited<ReturnType<typeof createAgentSession>>["session"];
} = {};
try {
const loader = new DefaultResourceLoader({
cwd,
agentDir: getAgentDir(),
noExtensions: true,
noSkills: true,
noPromptTemplates: true,
noThemes: true,
noContextFiles: true,
});
await loader.reload();
const result = await createAgentSession({
cwd,
sessionManager: SessionManager.inMemory(),
resourceLoader: loader,
tools: ["read", "grep", "find", "ls"],
systemPrompt,
});
sessionRef.session = result.session;
const abortHandler = () => result.session.agent.abort();
signal?.addEventListener("abort", abortHandler, { once: true });
let finalText = "";
let errorMessage: string | undefined;
const unsubscribe = result.session.subscribe((event: AgentSessionEvent) => {
onEvent?.(event);
if (event.type === "message_end") {
const message = event.message as {
role?: string;
content?: unknown;
errorMessage?: string;
};
if (message.role !== "assistant") return;
if (message.errorMessage) errorMessage = message.errorMessage;
const text = extractAssistantText(message.content);
if (text) finalText = text;
}
if (event.type === "tool_execution_start") {
const name = event.toolName;
if (name in toolUsage) {
(toolUsage as unknown as Record<string, number>)[name]++;
} else {
toolUsage.other++;
}
}
});
if (signal?.aborted) throw new Error("Aborted");
await result.session.prompt(taskPrompt);
await result.session.agent.waitForIdle();
unsubscribe();
result.session.dispose();
signal?.removeEventListener("abort", abortHandler);
if (timeoutHandle) clearTimeout(timeoutHandle);
if (errorMessage && !finalText) {
return { success: false, text: "", error: errorMessage, toolUsage };
}
return { success: true, text: finalText.trim(), toolUsage };
} catch (error) {
if (timeoutHandle) clearTimeout(timeoutHandle);
return {
success: false,
text: "",
error: error instanceof Error ? error.message : String(error),
toolUsage,
};
} finally {
sessionRef.session?.dispose();
}
}
function extractAssistantText(content: unknown): string {
if (typeof content === "string") return content;
if (!Array.isArray(content)) return "";
return content
.filter(
(c): c is { type: string; text?: string } =>
!!c &&
typeof c === "object" &&
(c as { type?: string }).type === "text",
)
.map((c) => (c as { text?: string }).text ?? "")
.join("");
}

159
src/firecrawl.ts Normal file
View File

@@ -0,0 +1,159 @@
/**
* Deep Research — direct Firecrawl HTTP client
*
* Calls the self-hosted Firecrawl API directly (same approach as the
* firecrawl.ts extension)
*/
import * as fs from "node:fs";
import * as path from "node:path";
import * as os from "node:os";
import type { SearchResult } from "./types";
/* ── Config ──────────────────────────────────────────────────────── */
function loadFirecrawlConfig() {
const settingsPath = path.join(os.homedir(), ".pi", "agent", "settings.json");
try {
const settings = JSON.parse(fs.readFileSync(settingsPath, "utf-8"));
const fc = settings.firecrawl ?? {};
return {
baseUrl: (
fc.baseUrl ??
process.env.FIRECRAWL_BASE_URL ??
"http://localhost:3002"
).replace(/\/+$/, ""),
apiKey: fc.apiKey ?? process.env.FIRECRAWL_API_KEY,
};
} catch {
return {
baseUrl: (
process.env.FIRECRAWL_BASE_URL ?? "http://localhost:3002"
).replace(/\/+$/, ""),
apiKey: process.env.FIRECRAWL_API_KEY,
};
}
}
const { baseUrl: BASE_URL, apiKey: API_KEY } = loadFirecrawlConfig();
/* ── Helpers ──────────────────────────────────────────────────────── */
async function firecrawlRequest(
endpoint: string,
body: Record<string, unknown>,
signal?: AbortSignal,
): Promise<unknown> {
const headers: Record<string, string> = {
"Content-Type": "application/json",
};
if (API_KEY) {
headers["Authorization"] = `Bearer ${API_KEY}`;
}
const res = await fetch(`${BASE_URL}/v1/${endpoint}`, {
method: "POST",
headers,
body: JSON.stringify(body),
signal,
});
if (!res.ok) {
const text = await res.text();
throw new Error(
`Firecrawl ${endpoint} failed (${res.status}): ${text.slice(0, 500)}`,
);
}
return res.json();
}
export async function isFirecrawlReachable(): Promise<boolean> {
try {
const res = await fetch(`${BASE_URL}/v1/scrape`, {
method: "POST",
headers: {
"Content-Type": "application/json",
...(API_KEY ? { Authorization: `Bearer ${API_KEY}` } : {}),
},
body: JSON.stringify({ url: "https://example.com", formats: ["links"] }),
signal: AbortSignal.timeout(10_000),
});
return res.ok;
} catch {
return false;
}
}
/* ── Search ───────────────────────────────────────────────────────── */
/**
* Search the web and return structured results.
* Uses Firecrawl's search endpoint with scrape to get full page content.
*/
export async function searchWeb(
query: string,
limit: number = 5,
signal?: AbortSignal,
): Promise<SearchResult[]> {
const body: Record<string, unknown> = {
query,
limit: Math.min(limit, 10),
scrapeOptions: {
formats: ["markdown"],
onlyMainContent: true,
},
};
const result = await firecrawlRequest("search", body, signal);
if (!result || typeof result !== "object") return [];
const res = result as {
success?: boolean;
data?: Record<string, unknown>[];
error?: string;
};
if (!res.success || !res.data) return [];
return res.data
.map((doc) => ({
title: (doc.title as string) ?? "",
url: (doc.url as string) ?? "",
description: (doc.description as string) ?? "",
markdown: (doc.markdown as string) ?? "",
}))
.filter((r) => r.markdown || r.description);
}
/* ── Scrape ───────────────────────────────────────────────────────── */
/**
* Scrape a single URL and return its markdown content.
*/
export async function scrapeUrl(
url: string,
signal?: AbortSignal,
): Promise<{ title: string; markdown: string; links: string[] } | null> {
const result = await firecrawlRequest(
"scrape",
{ url, formats: ["markdown"] },
signal,
);
if (!result || typeof result !== "object") return null;
const res = result as {
success?: boolean;
data?: Record<string, unknown>;
error?: string;
};
if (!res.success || !res.data) return null;
return {
title: (res.data.title as string) ?? "",
markdown: (res.data.markdown as string) ?? "",
links: (res.data.links as string[]) ?? [],
};
}

261
src/queries.ts Normal file
View File

@@ -0,0 +1,261 @@
/**
* Deep Research — Search query generation & refinement
*
* Uses an LLM agent to generate search queries from different research
* angles, then analyzes results to produce follow-up queries.
*/
import type { SearchQuery, Finding, ResearchRound } from "./types";
import { runAnalysisAgent } from "./agent";
const GENERATE_QUERIES_SYSTEM = `You are a research methodology expert. Your role is to generate effective web search queries that will yield high-quality, diverse information about a research topic.
Guidelines:
- Create queries from DIFFERENT angles (technical, practical, comparative, critical, forward-looking)
- Each query should target a specific facet of the question
- Queries should use keywords that search engines rank well (avoid overly long questions)
- Cover contrasting viewpoints and alternative approaches
- Include queries for finding authoritative sources (docs, papers, official sites)
- Prioritize recent information where relevant
Output ONLY a JSON array of objects with fields:
- "query": the search query string
- "rationale": why this query will help answer the research question
- "angle": one of "technical" | "practical" | "comparative" | "critical" | "forward-looking" | "authoritative"
Example:
[
{"query": "Rust async/await performance benchmarks 2024", "rationale": "Understanding current performance characteristics", "angle": "technical"},
{"query": "Rust vs Go concurrency patterns comparison", "rationale": "Comparative analysis helps contextualize trade-offs", "angle": "comparative"}
]
`;
const FOLLOWUP_SYSTEM = `You are a research analyst. Given the research question and findings so far, your job is to identify what's still unknown and generate follow-up search queries to fill those gaps.
Look for:
- Claims made without sufficient evidence
- Conflicting information that needs resolution
- Angles that haven't been explored yet
- Missing authoritative sources
- Practical implications that need more detail
- Recent developments that might have updated findings
Output ONLY a JSON array of objects with fields:
- "query": the search query string
- "rationale": what gap this query fills or what angle it explores
- "angle": one of "technical" | "practical" | "comparative" | "critical" | "forward-looking" | "authoritative"
`;
/**
* Generate initial search queries for a research question.
*/
export async function generateQueries(
question: string,
count: number,
cwd: string,
signal?: AbortSignal,
): Promise<SearchQuery[]> {
const taskPrompt = `Research question: ${question}
Generate ${count} diverse search queries to research this topic effectively. Cover different angles.`;
const result = await runAnalysisAgent(
GENERATE_QUERIES_SYSTEM,
taskPrompt,
cwd,
60_000,
undefined,
signal,
);
if (!result.success || !result.text) {
return generateFallbackQueries(question, count);
}
try {
const parsed = JSON.parse(result.text);
if (Array.isArray(parsed) && parsed.length > 0) {
return parsed
.slice(0, count)
.map((q: Record<string, unknown>) => ({
query: String(q.query ?? ""),
rationale: String(q.rationale ?? ""),
angle: String(q.angle ?? "technical"),
}))
.filter((q) => q.query.length > 0);
}
} catch {
// JSON parse failed, fall back
}
return generateFallbackQueries(question, count);
}
/**
* Generate follow-up queries based on findings from previous rounds.
*/
export async function generateFollowUpQueries(
question: string,
rounds: ResearchRound[],
count: number,
cwd: string,
signal?: AbortSignal,
): Promise<SearchQuery[]> {
// Build a summary of findings so far
const allFindings = rounds.flatMap((r) => r.findings);
const findingsSummary = allFindings
.map((f) => `- ${f.title}: ${f.summary} (confidence: ${f.confidence})`)
.join("\n");
const exploredAngles = rounds
.flatMap((r) => r.queries)
.map((q) => `[${q.angle}] ${q.query}${q.rationale}`)
.join("\n");
const taskPrompt = `Research question: ${question}
Queries already explored:
${exploredAngles}
Findings so far:
${findingsSummary}
Generate ${count} follow-up search queries to fill remaining gaps and deepen the research.`;
const result = await runAnalysisAgent(
FOLLOWUP_SYSTEM,
taskPrompt,
cwd,
60_000,
undefined,
signal,
);
if (!result.success || !result.text) {
return [];
}
try {
const parsed = JSON.parse(result.text);
if (Array.isArray(parsed) && parsed.length > 0) {
return parsed
.slice(0, count)
.map((q: Record<string, unknown>) => ({
query: String(q.query ?? ""),
rationale: String(q.rationale ?? ""),
angle: String(q.angle ?? "technical"),
}))
.filter((q) => q.query.length > 0);
}
} catch {
// parse failed
}
return [];
}
/**
* Fallback query generation when the LLM call fails.
*/
function generateFallbackQueries(
question: string,
count: number,
): SearchQuery[] {
const queries: SearchQuery[] = [];
const angles = [
{ angle: "technical", desc: "technical details and specifications" },
{
angle: "practical",
desc: "practical examples, tutorials, and best practices",
},
{ angle: "comparative", desc: "comparisons with alternatives" },
{ angle: "critical", desc: "limitations, challenges, and criticisms" },
{ angle: "forward-looking", desc: "future trends and developments" },
];
for (let i = 0; i < Math.min(count, angles.length); i++) {
queries.push({
query: `${question} ${angles[i].desc}`,
rationale: `Exploring ${angles[i].desc} related to the research question`,
angle: angles[i].angle as SearchQuery["angle"],
});
}
return queries;
}
const ANALYZE_SYSTEM = `You are a research analyst. Given search results for a specific query, extract key findings.
For each finding:
- Give it a concise title
- Summarize what was found in 1-3 sentences
- List which source URLs support this finding
- Include 1-2 key quotes from the sources
- Rate your confidence (high/medium/low) based on source authority and consistency
Output ONLY a JSON array of objects with fields:
- "title": concise finding title
- "summary": 1-3 sentence summary
- "sources": array of source URLs
- "keyQuotes": array of 1-2 key quotes
- "confidence": "high" | "medium" | "low"`;
/**
* Analyze search results for a specific query and extract findings.
*/
export async function analyzeResults(
query: string,
results: {
title: string;
url: string;
description: string;
markdown: string;
}[],
cwd: string,
signal?: AbortSignal,
): Promise<Finding[]> {
const resultsText = results
.map(
(r, i) =>
`--- Result ${i + 1} ---\nTitle: ${r.title}\nURL: ${r.url}\nDescription: ${r.description}\nContent:\n${r.markdown.slice(0, 3000)}`,
)
.join("\n\n");
const taskPrompt = `Search query: "${query}"
Search results:
${resultsText}
Extract key findings from these results.`;
const result = await runAnalysisAgent(
ANALYZE_SYSTEM,
taskPrompt,
cwd,
90_000,
undefined,
signal,
);
if (!result.success || !result.text) return [];
try {
const parsed = JSON.parse(result.text);
if (Array.isArray(parsed)) {
return parsed
.map((f: Record<string, unknown>) => ({
title: String(f.title ?? ""),
summary: String(f.summary ?? ""),
sources: Array.isArray(f.sources) ? f.sources.map(String) : [],
keyQuotes: Array.isArray(f.keyQuotes) ? f.keyQuotes.map(String) : [],
confidence: (["high", "medium", "low"].includes(String(f.confidence))
? String(f.confidence)
: "medium") as Finding["confidence"],
}))
.filter((f) => f.title && f.summary);
}
} catch {
// parse failed
}
return [];
}

170
src/report.ts Normal file
View File

@@ -0,0 +1,170 @@
/**
* Deep Research — Report synthesis
*
* Takes all research rounds and synthesizes a comprehensive report
* using an LLM agent.
*/
import type { ResearchRound, ResearchConfig } from "./types";
import { runAnalysisAgent } from "./agent";
const SYNTHESIS_SYSTEM = `You are a senior research analyst synthesizing findings from multiple web searches into a comprehensive, well-structured report.
Your report should:
1. Start with an executive summary (2-3 paragraphs covering the key answer to the research question)
2. Organize findings by theme, not by search query
3. Include specific evidence from sources (cite URLs in [brackets])
4. Note areas of disagreement or uncertainty
5. Identify knowledge gaps that remain
6. End with actionable conclusions
Style guidelines:
- Use clear section headings (## level)
- Write in an objective, authoritative tone
- Include bullet points for listing evidence
- Use inline citations like [source](url)
- Note the confidence level for key claims
- Be thorough but concise — every paragraph should add value`;
/**
* Synthesize a research report from all rounds.
*/
export async function synthesizeReport(
question: string,
rounds: ResearchRound[],
config: ResearchConfig,
cwd: string,
signal?: AbortSignal,
): Promise<string> {
// Build the evidence summary
const allFindings = rounds.flatMap((r) => r.findings);
const totalSearches = rounds.reduce((sum, r) => sum + r.queries.length, 0);
const totalPages = rounds.reduce((sum, r) => sum + r.results.length, 0);
const evidenceByAngle = new Map<string, ResearchRound["findings"]>();
for (const round of rounds) {
for (const query of round.queries) {
const key = query.angle;
if (!evidenceByAngle.has(key)) evidenceByAngle.set(key, []);
}
for (const finding of round.findings) {
// Try to determine angle from the round's queries
const angle = round.queries[0]?.angle ?? "technical";
if (!evidenceByAngle.has(angle)) evidenceByAngle.set(angle, []);
evidenceByAngle.get(angle)!.push(finding);
}
}
// Build structured evidence text
let evidenceText = `## Research Question\n${question}\n\n`;
evidenceText += `## Overview\n- Rounds of research: ${rounds.length}\n`;
evidenceText += `- Total searches executed: ${totalSearches}\n`;
evidenceText += `- Total pages analyzed: ${totalPages}\n`;
evidenceText += `- Key findings extracted: ${allFindings.length}\n\n`;
for (const [angle, findings] of Array.from(evidenceByAngle)) {
if (findings.length === 0) continue;
evidenceText += `## Angle: ${angle}\n\n`;
for (const finding of findings) {
evidenceText += `### ${finding.title}\n`;
evidenceText += `**Confidence:** ${finding.confidence}\n`;
evidenceText += `${finding.summary}\n\n`;
if (finding.keyQuotes.length > 0) {
evidenceText += `> ${finding.keyQuotes[0]}\n\n`;
}
if (finding.sources.length > 0) {
evidenceText += `Sources: ${finding.sources.map((s: string) => `[${s}](${s})`).join(", ")}\n\n`;
}
}
}
// Also include raw search context for depth
evidenceText += `## Raw Search Context\n\n`;
for (const round of rounds) {
evidenceText += `### Round ${round.round}\n`;
for (const q of round.queries) {
evidenceText += `- **"${q.query}"** (${q.angle}) — ${q.rationale}\n`;
}
evidenceText += `\n`;
}
const taskPrompt = `Synthesize the following research findings into a comprehensive, well-structured report.
${evidenceText}
Write a thorough report that answers the original question: "${question}"
Format: ${config.format === "structured" ? "Structured report with numbered sections, clear hierarchies, and data tables where appropriate." : "Well-formatted markdown report with ## headings, bullet points, and inline citations."}`;
const result = await runAnalysisAgent(
SYNTHESIS_SYSTEM,
taskPrompt,
cwd,
120_000,
undefined,
signal,
);
if (result.success && result.text) {
return result.text;
}
// Fallback: generate a simple report from the evidence
return generateFallbackReport(question, rounds);
}
/**
* Fallback report when the LLM synthesis fails.
*/
function generateFallbackReport(
question: string,
rounds: ResearchRound[],
): string {
const lines: string[] = [];
lines.push(`# Research Report: ${question}`);
lines.push("");
lines.push("## Executive Summary");
lines.push("");
lines.push(
`This report summarizes findings from ${rounds.length} research round(s) exploring the question above.`,
);
lines.push("");
const allFindings = rounds.flatMap((r) => r.findings);
if (allFindings.length > 0) {
lines.push("## Key Findings");
lines.push("");
for (const finding of allFindings) {
lines.push(`### ${finding.title}`);
lines.push(`*Confidence: ${finding.confidence}*`);
lines.push("");
lines.push(finding.summary);
lines.push("");
if (finding.keyQuotes.length > 0) {
lines.push(`> ${finding.keyQuotes[0]}`);
lines.push("");
}
if (finding.sources.length > 0) {
lines.push("Sources:");
for (const src of finding.sources) {
lines.push(`- [${src}](${src})`);
}
lines.push("");
}
}
}
lines.push("## Search Methodology");
lines.push("");
for (const round of rounds) {
lines.push(`### Round ${round.round}`);
lines.push(
`Queries: ${round.queries.map((q) => `"${q.query}"`).join(", ")}`,
);
lines.push(`Pages scraped: ${round.results.length}`);
lines.push(`Findings: ${round.findings.length}`);
lines.push("");
}
return lines.join("\n");
}

254
src/research.ts Normal file
View File

@@ -0,0 +1,254 @@
/**
* Deep Research — Core research orchestration
*
* Manages the multi-round deep research process:
* 1. Generate initial search queries
* 2. Execute all queries in parallel via Firecrawl
* 3. Analyze results and extract findings
* 4. Generate follow-up queries
* 5. Iterate for depth rounds
* 6. Synthesize final report
*
* Widget and progress callback patterns borrowed from ralpi's executor.
*/
import type { ExtensionContext } from "@earendil-works/pi-coding-agent";
import type {
ResearchConfig,
SearchResult,
ResearchRound,
ResearchReport,
} from "./types";
import { searchWeb } from "./firecrawl";
import {
generateQueries,
generateFollowUpQueries,
analyzeResults,
} from "./queries";
import { synthesizeReport } from "./report";
/** Progress callback for UI updates */
export type ResearchProgress = (update: {
phase:
| "generating_queries"
| "searching"
| "analyzing"
| "synthesizing"
| "complete";
round?: number;
totalRounds?: number;
message: string;
detail?: string;
fraction?: number; // 0-1
}) => void;
/**
* Run a complete deep research session.
*/
export async function runDeepResearch(
config: ResearchConfig,
ctx: ExtensionContext,
onProgress: ResearchProgress,
signal?: AbortSignal,
): Promise<ResearchReport> {
const startTime = Date.now();
const rounds: ResearchRound[] = [];
let totalSearches = 0;
let totalPages = 0;
// ── Round 1: Generate initial queries ──────────────────────────────
onProgress({
phase: "generating_queries",
round: 1,
totalRounds: config.depth,
message: "Generating initial search queries...",
fraction: 0,
});
if (signal?.aborted) throw new Error("Research cancelled");
const queries = await generateQueries(
config.question,
config.breadth,
ctx.cwd,
signal,
);
if (queries.length === 0) {
throw new Error("Failed to generate any search queries");
}
// ── Execute rounds ─────────────────────────────────────────────────
for (let round = 1; round <= config.depth; round++) {
if (signal?.aborted) throw new Error("Research cancelled");
const isFirstRound = round === 1;
const currentQueries = isFirstRound
? queries
: await generateFollowUpQueries(
config.question,
rounds,
config.breadth,
ctx.cwd,
signal,
);
if (!currentQueries || currentQueries.length === 0) {
// No follow-up queries to generate — stop here
break;
}
// ── Search phase ──────────────────────────────────────────────────
onProgress({
phase: "searching",
round,
totalRounds: config.depth,
message: `Searching with ${currentQueries.length} queries...`,
fraction: 0.25,
});
const searchResults: SearchResult[] = [];
for (let i = 0; i < currentQueries.length; i++) {
if (signal?.aborted) throw new Error("Research cancelled");
const q = currentQueries[i];
onProgress({
phase: "searching",
round,
totalRounds: config.depth,
message: `Searching: "${q.query.slice(0, 60)}..."`,
detail: q.rationale,
fraction: 0.25 + (i / currentQueries.length) * 0.25,
});
try {
const results = await searchWeb(q.query, 5, signal);
searchResults.push(...results);
} catch (error) {
// Individual search failure shouldn't crash the whole round
const errorMsg = error instanceof Error ? error.message : String(error);
onProgress({
phase: "searching",
round,
totalRounds: config.depth,
message: `Search failed: ${errorMsg.slice(0, 80)}`,
fraction: 0.25 + ((i + 1) / currentQueries.length) * 0.25,
});
}
// Small delay between searches to avoid rate limits
if (i < currentQueries.length - 1) {
await new Promise((r) => setTimeout(r, 300));
}
}
totalSearches += currentQueries.length;
// Deduplicate results by URL
const seen = new Set<string>();
const uniqueResults = searchResults.filter((r) => {
if (seen.has(r.url)) return false;
seen.add(r.url);
return true;
});
totalPages += uniqueResults.length;
// ── Analyze phase ──────────────────────────────────────────────────
onProgress({
phase: "analyzing",
round,
totalRounds: config.depth,
message: `Analyzing ${uniqueResults.length} search results...`,
fraction: 0.6,
});
// Analyze results per query group
const allFindings: ResearchRound["findings"] = [];
for (let i = 0; i < currentQueries.length; i++) {
if (signal?.aborted) throw new Error("Research cancelled");
const q = currentQueries[i];
// Find results that match this query (loosely: take a portion of results)
const resultsPerQuery = Math.ceil(
uniqueResults.length / currentQueries.length,
);
const startIdx = i * resultsPerQuery;
const endIdx = Math.min(startIdx + resultsPerQuery, uniqueResults.length);
const queryResults = uniqueResults.slice(startIdx, endIdx);
if (queryResults.length === 0) continue;
onProgress({
phase: "analyzing",
round,
totalRounds: config.depth,
message: `Analyzing results for "${q.query.slice(0, 40)}..."`,
fraction: 0.6 + (i / currentQueries.length) * 0.2,
});
try {
const findings = await analyzeResults(
q.query,
queryResults,
ctx.cwd,
signal,
);
allFindings.push(...findings);
} catch {
// Analysis failure shouldn't crash the round
}
}
// Record this round
rounds.push({
round,
queries: currentQueries,
results: uniqueResults,
findings: allFindings,
followUpTopics: allFindings
.filter((f) => f.confidence === "low")
.map((f) => f.title),
});
}
// ── Synthesis phase ─────────────────────────────────────────────────
onProgress({
phase: "synthesizing",
message: "Synthesizing research into final report...",
fraction: 0.9,
});
if (signal?.aborted) throw new Error("Research cancelled");
const finalReport = await synthesizeReport(
config.question,
rounds,
config,
ctx.cwd,
signal,
);
const durationMs = Date.now() - startTime;
onProgress({
phase: "complete",
message: "Research complete!",
fraction: 1.0,
});
return {
question: config.question,
rounds,
finalReport,
totalSearches,
totalPagesScraped: totalPages,
durationMs,
};
}

55
src/types.ts Normal file
View File

@@ -0,0 +1,55 @@
/**
* Deep Research — type definitions
*/
/** A single search result from Firecrawl */
export interface SearchResult {
title: string;
url: string;
description: string;
markdown: string;
}
/** A finding extracted from search results by an analysis agent */
export interface Finding {
title: string;
summary: string;
sources: string[];
keyQuotes: string[];
confidence: "high" | "medium" | "low";
}
/** A generated search query with its intent/rationale */
export interface SearchQuery {
query: string;
rationale: string;
angle: string;
}
/** Output from one research round */
export interface ResearchRound {
round: number;
queries: SearchQuery[];
results: SearchResult[];
findings: Finding[];
/** Any follow-up questions/angles the analysis suggests */
followUpTopics: string[];
}
/** Configuration for a research session */
export interface ResearchConfig {
question: string;
depth: number; // 1-3 rounds
breadth: number; // queries per round (1-5)
format: "markdown" | "structured";
}
/** Final research report */
export interface ResearchReport {
question: string;
rounds: ResearchRound[];
finalReport: string;
totalSearches: number;
totalPagesScraped: number;
durationMs: number;
}