Initial commit: deep-research extension

This commit is contained in:
2026-05-31 13:13:18 -04:00
commit c104d2ed14
12 changed files with 1658 additions and 0 deletions

4
.gitignore vendored Normal file
View File

@@ -0,0 +1,4 @@
node_modules/
dist/
*.js
*.js.map

1
.pi-lens/cache/review-graph.json vendored Normal file

File diff suppressed because one or more lines are too long

521
index.ts Normal file
View File

@@ -0,0 +1,521 @@
/**
* deep-research — Multi-round deep web research powered by Firecrawl
*
* Registers:
* - `deep_research` tool — callable by the LLM to conduct deep research
* - `/deep-research` command — interactive session invocation
*
* Architecture:
* Each research round generates queries, searches in parallel via
* Firecrawl, analyzes results with agent sessions, then generates
* follow-up queries. A final synthesis step produces the report.
*
* Patterns borrowed from:
* - firecrawl.ts extension (direct Firecrawl HTTP calls)
* - ralpi executor (agent sessions, widget updates, progress UX)
* - subagent extension (structured tool rendering)
*/
import type {
ExtensionAPI,
ExtensionCommandContext,
ExtensionContext,
} from "@earendil-works/pi-coding-agent";
import { Type } from "typebox";
import { Box, Text } from "@earendil-works/pi-tui";
import { runDeepResearch, type ResearchProgress } from "./src/research";
import { isFirecrawlReachable } from "./src/firecrawl";
import type { ResearchConfig, ResearchReport } from "./src/types";
/* ── Constants ────────────────────────────────────────────────────── */
const SPINNER_FRAMES = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"];
const PHASE_ICONS: Record<string, string> = {
generating_queries: "🔍",
searching: "🌐",
analyzing: "📊",
synthesizing: "📝",
complete: "✅",
};
/* ── Helpers ──────────────────────────────────────────────────────── */
function formatDuration(ms: number): string {
const seconds = Math.floor(ms / 1000);
const minutes = Math.floor(seconds / 60);
if (minutes > 0) return `${minutes}m ${seconds % 60}s`;
return `${seconds}s`;
}
function truncate(s: string, max: number): string {
if (s.length <= max) return s;
return s.slice(0, max - 3) + "...";
}
/* ── Tool Definition ──────────────────────────────────────────────── */
const DeepResearchParams = Type.Object({
question: Type.String({
description: "The research question to investigate",
}),
depth: Type.Optional(
Type.Integer({
description:
"Number of research rounds (1-3). Each round uses findings from the previous to generate deeper follow-up queries. Default: 2",
minimum: 1,
maximum: 3,
default: 2,
}),
),
breadth: Type.Optional(
Type.Integer({
description:
"Number of search queries per round (1-5). More queries = broader coverage. Default: 3",
minimum: 1,
maximum: 5,
default: 3,
}),
),
format: Type.Optional(
Type.Union([Type.Literal("markdown"), Type.Literal("structured")], {
description:
'Output format for the research report. "markdown" for prose, "structured" for detailed sections. Default: "markdown"',
default: "markdown",
}),
),
details: Type.Optional(
Type.Object({
showRoundDetails: Type.Optional(
Type.Boolean({
description:
"Include per-round search details in the output. Default: false",
}),
),
}),
),
});
interface ResearchDetails {
rounds: Array<{
round: number;
queries: string[];
findingsCount: number;
resultsCount: number;
}>;
totalSearches: number;
totalPagesScraped: number;
durationMs: number;
}
/* ── Extension Entry ───────────────────────────────────────────────── */
export default function (pi: ExtensionAPI) {
pi.registerTool({
name: "deep_research",
label: "Deep Research",
description: [
"Conduct multi-round deep web research on any topic using Firecrawl.",
"Generates diverse search queries, searches the web in parallel, analyzes results, and produces a comprehensive report.",
"Supports iterative refinement: each round builds on findings from the previous one.",
"Parameters: question (required), depth (1-3, default 2), breadth (1-5, default 3), format (markdown|structured).",
].join(" "),
promptSnippet:
"deep_research — multi-round deep web research via Firecrawl with iterative query refinement",
promptGuidelines: [
"Use deep_research for complex, multi-faceted questions that benefit from multiple search angles and iterative refinement.",
"The tool handles query generation, web search, result analysis, and report synthesis automatically.",
"For simple fact-finding questions, use firecrawl_search directly instead.",
],
parameters: DeepResearchParams,
async execute(
_toolCallId: string,
params: {
question: string;
depth?: number;
breadth?: number;
format?: "markdown" | "structured";
details?: { showRoundDetails?: boolean };
},
signal: AbortSignal | undefined,
onUpdate: ((partial: any) => void) | undefined,
ctx: any,
) {
const config: ResearchConfig = {
question: params.question,
depth: params.depth ?? 2,
breadth: params.breadth ?? 3,
format: params.format ?? "markdown",
};
// Use provided signals
const abortSignal = signal;
// Wire progress updates to both the widget and onUpdate
let spinnerIdx = 0;
const spinnerTimer = setInterval(() => {
spinnerIdx = (spinnerIdx + 1) % SPINNER_FRAMES.length;
}, 100);
let researchResult: ResearchReport | null = null;
let lastError: string | null = null;
const onProgress: ResearchProgress = (update) => {
const icon = PHASE_ICONS[update.phase] ?? "";
const spinner = SPINNER_FRAMES[spinnerIdx];
const roundInfo =
update.round && update.totalRounds
? ` Round ${update.round}/${update.totalRounds}`
: "";
// Update widget
const lines: string[] = [
`${spinner} ${icon} ${truncate(update.message, 80)}${roundInfo}`,
];
if (update.detail) {
lines.push(` ${truncate(update.detail, 76)}`);
}
if (update.fraction !== undefined) {
const barLen = 15;
const filled = Math.round(barLen * update.fraction);
const bar = "█".repeat(filled) + "░".repeat(barLen - filled);
lines.push(` ${bar}`);
}
ctx.ui.setWidget("deep-research", lines);
// Stream partial results via onUpdate
if (onUpdate) {
const partialText = lines.join("\n");
onUpdate({
content: [{ type: "text", text: partialText }],
details: {
phase: update.phase,
round: update.round,
message: update.message,
fraction: update.fraction,
},
});
}
};
try {
// Initial status
ctx.ui.setStatus(
"deep-research",
`🌐 Researching: ${truncate(config.question, 40)}`,
);
onProgress({
phase: "generating_queries",
message: "Starting deep research...",
fraction: 0,
});
researchResult = await runDeepResearch(
config,
ctx,
onProgress,
abortSignal,
);
// ── Build the tool result ──────────────────────────────────
const details: ResearchDetails = {
rounds: researchResult.rounds.map((r) => ({
round: r.round,
queries: r.queries.map((q) => q.query),
findingsCount: r.findings.length,
resultsCount: r.results.length,
})),
totalSearches: researchResult.totalSearches,
totalPagesScraped: researchResult.totalPagesScraped,
durationMs: researchResult.durationMs,
};
const showRoundDetails = params.details?.showRoundDetails ?? false;
let output = researchResult.finalReport;
if (showRoundDetails) {
output += `\n\n---\n\n## Research Methodology\n\n`;
for (const round of researchResult.rounds) {
output += `### Round ${round.round}\n\n`;
output += `**Queries:**\n`;
for (const q of round.queries) {
output += `- "${q.query}" (${q.angle}) — ${q.rationale}\n`;
}
output += `\n**Results scraped:** ${round.results.length}\n`;
output += `**Findings extracted:** ${round.findings.length}\n\n`;
}
output += `**Total searches:** ${researchResult.totalSearches}\n`;
output += `**Total pages scraped:** ${researchResult.totalPagesScraped}\n`;
output += `**Duration:** ${formatDuration(researchResult.durationMs)}\n`;
}
// Clean up widget
clearInterval(spinnerTimer);
ctx.ui.setWidget("deep-research", undefined);
ctx.ui.setStatus("deep-research", undefined);
return {
content: [{ type: "text", text: output }],
details,
};
} catch (error) {
clearInterval(spinnerTimer);
ctx.ui.setWidget("deep-research", undefined);
ctx.ui.setStatus("deep-research", undefined);
lastError = error instanceof Error ? error.message : String(error);
return {
content: [
{
type: "text",
text: `Research failed: ${lastError}`,
},
],
details: {
error: lastError,
phase: researchResult
? `completed ${researchResult.rounds.length} rounds`
: "preparation",
},
isError: true,
};
}
},
// ── TUI: Render the tool call (collapsed view) ──────────────────
renderCall(
args: {
question: string;
depth?: number;
breadth?: number;
format?: string;
},
theme: any,
_context: any,
) {
const question = truncate(args.question ?? "?", 70);
const depth = args.depth ?? 2;
const breadth = args.breadth ?? 3;
const format = args.format ?? "markdown";
const text =
theme.fg("toolTitle", theme.bold("deep_research ")) +
theme.fg("accent", `"${question}"`) +
theme.fg("muted", ` [depth:${depth} breadth:${breadth} ${format}]`);
return new Text(text, 0, 0);
},
// ── TUI: Render the tool result (expanded/collapsed) ─────────────
renderResult(
result: any,
{ expanded }: { expanded: boolean },
theme: any,
_context: any,
) {
const details = result.details as ResearchDetails | undefined;
if (!details) {
const text = result.content?.[0]?.text ?? "(no output)";
return new Text(text, 0, 0);
}
const container = new Box();
// ── Collapsed view ────────────────────────────────────────────
if (!expanded) {
const totalRounds = details.rounds.length;
const totalFindings = details.rounds.reduce(
(s, r) => s + r.findingsCount,
0,
);
const duration = formatDuration(details.durationMs);
let text = "";
text +=
theme.fg("success", "✓ ") +
theme.fg("toolTitle", theme.bold("deep research"));
text += theme.fg(
"muted",
`${totalRounds} rounds, ${totalFindings} findings`,
);
text += theme.fg("dim", ` (${duration})`);
text += "\n";
for (const round of details.rounds) {
const icon =
round.findingsCount > 0
? theme.fg("success", "✓")
: theme.fg("muted", "·");
text += ` ${icon} ${theme.fg("accent", `Round ${round.round}:`)} `;
text += theme.fg(
"dim",
`${round.queries.length} queries, ${round.resultsCount} pages, ${round.findingsCount} findings`,
);
text += "\n";
}
text += theme.fg("muted", "(Ctrl+O to expand)");
container.addChild(new Text(text, 0, 0));
return container;
}
// ── Expanded view ─────────────────────────────────────────────
const headerText =
theme.fg("toolTitle", theme.bold("Deep Research Results")) +
"\n" +
theme.fg("dim", `Duration: ${formatDuration(details.durationMs)} | `) +
theme.fg("dim", `Searches: ${details.totalSearches} | `) +
theme.fg("dim", `Pages scraped: ${details.totalPagesScraped}`);
container.addChild(new Text(headerText, 0, 0));
for (const round of details.rounds) {
container.addChild(new Text("", 0, 0)); // Spacer
const roundHeader = `Round ${round.round}`;
container.addChild(
new Text(theme.fg("toolTitle", theme.bold(roundHeader)), 0, 0),
);
container.addChild(
new Text(
theme.fg(
"dim",
`${round.queries.length} queries → ${round.resultsCount} pages → ${round.findingsCount} findings`,
),
0,
0,
),
);
for (const q of round.queries) {
container.addChild(
new Text(
theme.fg("muted", " · ") + theme.fg("accent", truncate(q, 70)),
0,
0,
),
);
}
}
return container;
},
});
// ── Command ───────────────────────────────────────────────────────
pi.registerCommand("deep-research", {
description:
"Conduct multi-round deep web research on any topic via Firecrawl. Usage: /deep-research <question>",
handler: async (args: string, ctx: ExtensionCommandContext) => {
if (!args || args.trim().length === 0) {
ctx.ui.notify(
"Usage: /deep-research <your research question>",
"error",
);
return;
}
// Ask about depth/breadth
const depthStr = await ctx.ui.select("Research depth?", [
"1 round (quick survey)",
"2 rounds (standard)",
"3 rounds (deep dive)",
]);
const depth = depthStr?.startsWith("1")
? 1
: depthStr?.startsWith("3")
? 3
: 2;
const breadthStr = await ctx.ui.select("Research breadth?", [
"1 query/round (narrow)",
"3 queries/round (balanced)",
"5 queries/round (broad)",
]);
const breadth = breadthStr?.startsWith("1")
? 1
: breadthStr?.startsWith("5")
? 5
: 3;
// Create a promise-based interaction
ctx.ui.setStatus(
"deep-research",
`🌐 Researching: ${truncate(args, 40)}`,
);
const config: ResearchConfig = {
question: args,
depth,
breadth,
format: "markdown",
};
let spinnerIdx = 0;
const spinnerTimer = setInterval(() => {
spinnerIdx = (spinnerIdx + 1) % SPINNER_FRAMES.length;
}, 100);
try {
const onProgress: ResearchProgress = (update) => {
const icon = PHASE_ICONS[update.phase] ?? "";
const spinner = SPINNER_FRAMES[spinnerIdx];
const lines: string[] = [
`${spinner} ${icon} ${truncate(update.message, 80)}`,
];
if (update.detail) {
lines.push(` ${truncate(update.detail, 76)}`);
}
if (update.fraction !== undefined) {
const barLen = 15;
const filled = Math.round(barLen * update.fraction);
const bar = "█".repeat(filled) + "░".repeat(barLen - filled);
lines.push(` ${bar}`);
}
ctx.ui.setWidget("deep-research", lines);
};
const report = await runDeepResearch(config, ctx, onProgress);
clearInterval(spinnerTimer);
ctx.ui.setWidget("deep-research", undefined);
ctx.ui.setStatus("deep-research", undefined);
// Show notification
ctx.ui.notify(
`Research complete: ${report.rounds.length} rounds, ${report.totalSearches} searches, ${report.totalPagesScraped} pages in ${formatDuration(report.durationMs)}`,
"info",
);
// Send the report as a user message
pi.sendUserMessage(
`## Deep Research: ${args}\n\n${report.finalReport}\n\n---\n*${report.rounds.length} rounds · ${report.totalSearches} searches · ${report.totalPagesScraped} pages · ${formatDuration(report.durationMs)}*`,
);
} catch (error) {
clearInterval(spinnerTimer);
ctx.ui.setWidget("deep-research", undefined);
ctx.ui.setStatus("deep-research", undefined);
ctx.ui.notify(
`Research failed: error instanceof Error ? error.message : String(error)`,
"error",
);
}
},
});
// ── Startup check ─────────────────────────────────────────────────
pi.on("session_start", async (_event: unknown, ctx: ExtensionContext) => {
const reachable = await isFirecrawlReachable();
if (!reachable) {
ctx.ui.notify(
"Deep Research: Firecrawl endpoint unreachable — searches will fail. Check FIRECRAWL_BASE_URL in settings.json or env.",
"warning",
);
}
});
}

47
package-lock.json generated Normal file
View File

@@ -0,0 +1,47 @@
{
"name": "deep-research",
"version": "1.0.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "deep-research",
"version": "1.0.0",
"devDependencies": {
"@types/node": "^20.0.0",
"typescript": "^5.3.0"
}
},
"node_modules/@types/node": {
"version": "20.19.41",
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.41.tgz",
"integrity": "sha512-ECymXOukMnOoVkC2bb1Vc/w/836DXncOg5m8Xj1RH7xSHZJWNYY6Zh7EH477vcnD5egKNNfy2RpNOmuChhFPgQ==",
"dev": true,
"license": "MIT",
"dependencies": {
"undici-types": "~6.21.0"
}
},
"node_modules/typescript": {
"version": "5.9.3",
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz",
"integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
"dev": true,
"license": "Apache-2.0",
"bin": {
"tsc": "bin/tsc",
"tsserver": "bin/tsserver"
},
"engines": {
"node": ">=14.17"
}
},
"node_modules/undici-types": {
"version": "6.21.0",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
"integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==",
"dev": true,
"license": "MIT"
}
}
}

15
package.json Normal file
View File

@@ -0,0 +1,15 @@
{
"name": "deep-research",
"version": "1.0.0",
"description": "Deep research extension for pi — parallel web research via Firecrawl with iterative query refinement",
"private": true,
"pi": {
"extensions": [
"./index.ts"
]
},
"devDependencies": {
"@types/node": "^20.0.0",
"typescript": "^5.3.0"
}
}

155
src/agent.ts Normal file
View File

@@ -0,0 +1,155 @@
/**
* Deep Research — Agent Session helper
*
* Uses pi's in-process `createAgentSession` for LLM subtasks
* (query generation, result analysis, report synthesis).
* Pattern borrowed from ralpi's runAgentSession().
*/
import {
createAgentSession,
DefaultResourceLoader,
getAgentDir,
SessionManager,
} from "@earendil-works/pi-coding-agent";
import type { AgentSessionEvent } from "@earendil-works/pi-coding-agent";
/** Aggregate tool usage stats */
export interface ToolUsage {
read: number;
write: number;
edit: number;
bash: number;
other: number;
}
export interface AgentResult {
success: boolean;
text: string;
error?: string;
toolUsage: ToolUsage;
}
/**
* Run a prompt through an in-process Pi agent session.
* Non-blocking — the event loop stays responsive.
*/
export async function runAnalysisAgent(
systemPrompt: string,
taskPrompt: string,
cwd: string,
timeoutMs: number = 120_000,
onEvent?: (event: AgentSessionEvent) => void,
signal?: AbortSignal,
): Promise<AgentResult> {
const toolUsage: ToolUsage = {
read: 0,
write: 0,
edit: 0,
bash: 0,
other: 0,
};
let timeoutHandle: ReturnType<typeof setTimeout> | null = null;
if (timeoutMs > 0) {
timeoutHandle = setTimeout(() => {
sessionRef.session?.agent.abort();
}, timeoutMs);
}
const sessionRef: {
session?: Awaited<ReturnType<typeof createAgentSession>>["session"];
} = {};
try {
const loader = new DefaultResourceLoader({
cwd,
agentDir: getAgentDir(),
noExtensions: true,
noSkills: true,
noPromptTemplates: true,
noThemes: true,
noContextFiles: true,
});
await loader.reload();
const result = await createAgentSession({
cwd,
sessionManager: SessionManager.inMemory(),
resourceLoader: loader,
tools: ["read", "grep", "find", "ls"],
systemPrompt,
});
sessionRef.session = result.session;
const abortHandler = () => result.session.agent.abort();
signal?.addEventListener("abort", abortHandler, { once: true });
let finalText = "";
let errorMessage: string | undefined;
const unsubscribe = result.session.subscribe((event: AgentSessionEvent) => {
onEvent?.(event);
if (event.type === "message_end") {
const message = event.message as {
role?: string;
content?: unknown;
errorMessage?: string;
};
if (message.role !== "assistant") return;
if (message.errorMessage) errorMessage = message.errorMessage;
const text = extractAssistantText(message.content);
if (text) finalText = text;
}
if (event.type === "tool_execution_start") {
const name = event.toolName;
if (name in toolUsage) {
(toolUsage as unknown as Record<string, number>)[name]++;
} else {
toolUsage.other++;
}
}
});
if (signal?.aborted) throw new Error("Aborted");
await result.session.prompt(taskPrompt);
await result.session.agent.waitForIdle();
unsubscribe();
result.session.dispose();
signal?.removeEventListener("abort", abortHandler);
if (timeoutHandle) clearTimeout(timeoutHandle);
if (errorMessage && !finalText) {
return { success: false, text: "", error: errorMessage, toolUsage };
}
return { success: true, text: finalText.trim(), toolUsage };
} catch (error) {
if (timeoutHandle) clearTimeout(timeoutHandle);
return {
success: false,
text: "",
error: error instanceof Error ? error.message : String(error),
toolUsage,
};
} finally {
sessionRef.session?.dispose();
}
}
function extractAssistantText(content: unknown): string {
if (typeof content === "string") return content;
if (!Array.isArray(content)) return "";
return content
.filter(
(c): c is { type: string; text?: string } =>
!!c &&
typeof c === "object" &&
(c as { type?: string }).type === "text",
)
.map((c) => (c as { text?: string }).text ?? "")
.join("");
}

159
src/firecrawl.ts Normal file
View File

@@ -0,0 +1,159 @@
/**
* Deep Research — direct Firecrawl HTTP client
*
* Calls the self-hosted Firecrawl API directly (same approach as the
* firecrawl.ts extension)
*/
import * as fs from "node:fs";
import * as path from "node:path";
import * as os from "node:os";
import type { SearchResult } from "./types";
/* ── Config ──────────────────────────────────────────────────────── */
function loadFirecrawlConfig() {
const settingsPath = path.join(os.homedir(), ".pi", "agent", "settings.json");
try {
const settings = JSON.parse(fs.readFileSync(settingsPath, "utf-8"));
const fc = settings.firecrawl ?? {};
return {
baseUrl: (
fc.baseUrl ??
process.env.FIRECRAWL_BASE_URL ??
"http://localhost:3002"
).replace(/\/+$/, ""),
apiKey: fc.apiKey ?? process.env.FIRECRAWL_API_KEY,
};
} catch {
return {
baseUrl: (
process.env.FIRECRAWL_BASE_URL ?? "http://localhost:3002"
).replace(/\/+$/, ""),
apiKey: process.env.FIRECRAWL_API_KEY,
};
}
}
const { baseUrl: BASE_URL, apiKey: API_KEY } = loadFirecrawlConfig();
/* ── Helpers ──────────────────────────────────────────────────────── */
async function firecrawlRequest(
endpoint: string,
body: Record<string, unknown>,
signal?: AbortSignal,
): Promise<unknown> {
const headers: Record<string, string> = {
"Content-Type": "application/json",
};
if (API_KEY) {
headers["Authorization"] = `Bearer ${API_KEY}`;
}
const res = await fetch(`${BASE_URL}/v1/${endpoint}`, {
method: "POST",
headers,
body: JSON.stringify(body),
signal,
});
if (!res.ok) {
const text = await res.text();
throw new Error(
`Firecrawl ${endpoint} failed (${res.status}): ${text.slice(0, 500)}`,
);
}
return res.json();
}
export async function isFirecrawlReachable(): Promise<boolean> {
try {
const res = await fetch(`${BASE_URL}/v1/scrape`, {
method: "POST",
headers: {
"Content-Type": "application/json",
...(API_KEY ? { Authorization: `Bearer ${API_KEY}` } : {}),
},
body: JSON.stringify({ url: "https://example.com", formats: ["links"] }),
signal: AbortSignal.timeout(10_000),
});
return res.ok;
} catch {
return false;
}
}
/* ── Search ───────────────────────────────────────────────────────── */
/**
* Search the web and return structured results.
* Uses Firecrawl's search endpoint with scrape to get full page content.
*/
export async function searchWeb(
query: string,
limit: number = 5,
signal?: AbortSignal,
): Promise<SearchResult[]> {
const body: Record<string, unknown> = {
query,
limit: Math.min(limit, 10),
scrapeOptions: {
formats: ["markdown"],
onlyMainContent: true,
},
};
const result = await firecrawlRequest("search", body, signal);
if (!result || typeof result !== "object") return [];
const res = result as {
success?: boolean;
data?: Record<string, unknown>[];
error?: string;
};
if (!res.success || !res.data) return [];
return res.data
.map((doc) => ({
title: (doc.title as string) ?? "",
url: (doc.url as string) ?? "",
description: (doc.description as string) ?? "",
markdown: (doc.markdown as string) ?? "",
}))
.filter((r) => r.markdown || r.description);
}
/* ── Scrape ───────────────────────────────────────────────────────── */
/**
* Scrape a single URL and return its markdown content.
*/
export async function scrapeUrl(
url: string,
signal?: AbortSignal,
): Promise<{ title: string; markdown: string; links: string[] } | null> {
const result = await firecrawlRequest(
"scrape",
{ url, formats: ["markdown"] },
signal,
);
if (!result || typeof result !== "object") return null;
const res = result as {
success?: boolean;
data?: Record<string, unknown>;
error?: string;
};
if (!res.success || !res.data) return null;
return {
title: (res.data.title as string) ?? "",
markdown: (res.data.markdown as string) ?? "",
links: (res.data.links as string[]) ?? [],
};
}

261
src/queries.ts Normal file
View File

@@ -0,0 +1,261 @@
/**
* Deep Research — Search query generation & refinement
*
* Uses an LLM agent to generate search queries from different research
* angles, then analyzes results to produce follow-up queries.
*/
import type { SearchQuery, Finding, ResearchRound } from "./types";
import { runAnalysisAgent } from "./agent";
const GENERATE_QUERIES_SYSTEM = `You are a research methodology expert. Your role is to generate effective web search queries that will yield high-quality, diverse information about a research topic.
Guidelines:
- Create queries from DIFFERENT angles (technical, practical, comparative, critical, forward-looking)
- Each query should target a specific facet of the question
- Queries should use keywords that search engines rank well (avoid overly long questions)
- Cover contrasting viewpoints and alternative approaches
- Include queries for finding authoritative sources (docs, papers, official sites)
- Prioritize recent information where relevant
Output ONLY a JSON array of objects with fields:
- "query": the search query string
- "rationale": why this query will help answer the research question
- "angle": one of "technical" | "practical" | "comparative" | "critical" | "forward-looking" | "authoritative"
Example:
[
{"query": "Rust async/await performance benchmarks 2024", "rationale": "Understanding current performance characteristics", "angle": "technical"},
{"query": "Rust vs Go concurrency patterns comparison", "rationale": "Comparative analysis helps contextualize trade-offs", "angle": "comparative"}
]
`;
const FOLLOWUP_SYSTEM = `You are a research analyst. Given the research question and findings so far, your job is to identify what's still unknown and generate follow-up search queries to fill those gaps.
Look for:
- Claims made without sufficient evidence
- Conflicting information that needs resolution
- Angles that haven't been explored yet
- Missing authoritative sources
- Practical implications that need more detail
- Recent developments that might have updated findings
Output ONLY a JSON array of objects with fields:
- "query": the search query string
- "rationale": what gap this query fills or what angle it explores
- "angle": one of "technical" | "practical" | "comparative" | "critical" | "forward-looking" | "authoritative"
`;
/**
* Generate initial search queries for a research question.
*/
export async function generateQueries(
question: string,
count: number,
cwd: string,
signal?: AbortSignal,
): Promise<SearchQuery[]> {
const taskPrompt = `Research question: ${question}
Generate ${count} diverse search queries to research this topic effectively. Cover different angles.`;
const result = await runAnalysisAgent(
GENERATE_QUERIES_SYSTEM,
taskPrompt,
cwd,
60_000,
undefined,
signal,
);
if (!result.success || !result.text) {
return generateFallbackQueries(question, count);
}
try {
const parsed = JSON.parse(result.text);
if (Array.isArray(parsed) && parsed.length > 0) {
return parsed
.slice(0, count)
.map((q: Record<string, unknown>) => ({
query: String(q.query ?? ""),
rationale: String(q.rationale ?? ""),
angle: String(q.angle ?? "technical"),
}))
.filter((q) => q.query.length > 0);
}
} catch {
// JSON parse failed, fall back
}
return generateFallbackQueries(question, count);
}
/**
* Generate follow-up queries based on findings from previous rounds.
*/
export async function generateFollowUpQueries(
question: string,
rounds: ResearchRound[],
count: number,
cwd: string,
signal?: AbortSignal,
): Promise<SearchQuery[]> {
// Build a summary of findings so far
const allFindings = rounds.flatMap((r) => r.findings);
const findingsSummary = allFindings
.map((f) => `- ${f.title}: ${f.summary} (confidence: ${f.confidence})`)
.join("\n");
const exploredAngles = rounds
.flatMap((r) => r.queries)
.map((q) => `[${q.angle}] ${q.query}${q.rationale}`)
.join("\n");
const taskPrompt = `Research question: ${question}
Queries already explored:
${exploredAngles}
Findings so far:
${findingsSummary}
Generate ${count} follow-up search queries to fill remaining gaps and deepen the research.`;
const result = await runAnalysisAgent(
FOLLOWUP_SYSTEM,
taskPrompt,
cwd,
60_000,
undefined,
signal,
);
if (!result.success || !result.text) {
return [];
}
try {
const parsed = JSON.parse(result.text);
if (Array.isArray(parsed) && parsed.length > 0) {
return parsed
.slice(0, count)
.map((q: Record<string, unknown>) => ({
query: String(q.query ?? ""),
rationale: String(q.rationale ?? ""),
angle: String(q.angle ?? "technical"),
}))
.filter((q) => q.query.length > 0);
}
} catch {
// parse failed
}
return [];
}
/**
* Fallback query generation when the LLM call fails.
*/
function generateFallbackQueries(
question: string,
count: number,
): SearchQuery[] {
const queries: SearchQuery[] = [];
const angles = [
{ angle: "technical", desc: "technical details and specifications" },
{
angle: "practical",
desc: "practical examples, tutorials, and best practices",
},
{ angle: "comparative", desc: "comparisons with alternatives" },
{ angle: "critical", desc: "limitations, challenges, and criticisms" },
{ angle: "forward-looking", desc: "future trends and developments" },
];
for (let i = 0; i < Math.min(count, angles.length); i++) {
queries.push({
query: `${question} ${angles[i].desc}`,
rationale: `Exploring ${angles[i].desc} related to the research question`,
angle: angles[i].angle as SearchQuery["angle"],
});
}
return queries;
}
const ANALYZE_SYSTEM = `You are a research analyst. Given search results for a specific query, extract key findings.
For each finding:
- Give it a concise title
- Summarize what was found in 1-3 sentences
- List which source URLs support this finding
- Include 1-2 key quotes from the sources
- Rate your confidence (high/medium/low) based on source authority and consistency
Output ONLY a JSON array of objects with fields:
- "title": concise finding title
- "summary": 1-3 sentence summary
- "sources": array of source URLs
- "keyQuotes": array of 1-2 key quotes
- "confidence": "high" | "medium" | "low"`;
/**
* Analyze search results for a specific query and extract findings.
*/
export async function analyzeResults(
query: string,
results: {
title: string;
url: string;
description: string;
markdown: string;
}[],
cwd: string,
signal?: AbortSignal,
): Promise<Finding[]> {
const resultsText = results
.map(
(r, i) =>
`--- Result ${i + 1} ---\nTitle: ${r.title}\nURL: ${r.url}\nDescription: ${r.description}\nContent:\n${r.markdown.slice(0, 3000)}`,
)
.join("\n\n");
const taskPrompt = `Search query: "${query}"
Search results:
${resultsText}
Extract key findings from these results.`;
const result = await runAnalysisAgent(
ANALYZE_SYSTEM,
taskPrompt,
cwd,
90_000,
undefined,
signal,
);
if (!result.success || !result.text) return [];
try {
const parsed = JSON.parse(result.text);
if (Array.isArray(parsed)) {
return parsed
.map((f: Record<string, unknown>) => ({
title: String(f.title ?? ""),
summary: String(f.summary ?? ""),
sources: Array.isArray(f.sources) ? f.sources.map(String) : [],
keyQuotes: Array.isArray(f.keyQuotes) ? f.keyQuotes.map(String) : [],
confidence: (["high", "medium", "low"].includes(String(f.confidence))
? String(f.confidence)
: "medium") as Finding["confidence"],
}))
.filter((f) => f.title && f.summary);
}
} catch {
// parse failed
}
return [];
}

170
src/report.ts Normal file
View File

@@ -0,0 +1,170 @@
/**
* Deep Research — Report synthesis
*
* Takes all research rounds and synthesizes a comprehensive report
* using an LLM agent.
*/
import type { ResearchRound, ResearchConfig } from "./types";
import { runAnalysisAgent } from "./agent";
const SYNTHESIS_SYSTEM = `You are a senior research analyst synthesizing findings from multiple web searches into a comprehensive, well-structured report.
Your report should:
1. Start with an executive summary (2-3 paragraphs covering the key answer to the research question)
2. Organize findings by theme, not by search query
3. Include specific evidence from sources (cite URLs in [brackets])
4. Note areas of disagreement or uncertainty
5. Identify knowledge gaps that remain
6. End with actionable conclusions
Style guidelines:
- Use clear section headings (## level)
- Write in an objective, authoritative tone
- Include bullet points for listing evidence
- Use inline citations like [source](url)
- Note the confidence level for key claims
- Be thorough but concise — every paragraph should add value`;
/**
* Synthesize a research report from all rounds.
*/
export async function synthesizeReport(
question: string,
rounds: ResearchRound[],
config: ResearchConfig,
cwd: string,
signal?: AbortSignal,
): Promise<string> {
// Build the evidence summary
const allFindings = rounds.flatMap((r) => r.findings);
const totalSearches = rounds.reduce((sum, r) => sum + r.queries.length, 0);
const totalPages = rounds.reduce((sum, r) => sum + r.results.length, 0);
const evidenceByAngle = new Map<string, ResearchRound["findings"]>();
for (const round of rounds) {
for (const query of round.queries) {
const key = query.angle;
if (!evidenceByAngle.has(key)) evidenceByAngle.set(key, []);
}
for (const finding of round.findings) {
// Try to determine angle from the round's queries
const angle = round.queries[0]?.angle ?? "technical";
if (!evidenceByAngle.has(angle)) evidenceByAngle.set(angle, []);
evidenceByAngle.get(angle)!.push(finding);
}
}
// Build structured evidence text
let evidenceText = `## Research Question\n${question}\n\n`;
evidenceText += `## Overview\n- Rounds of research: ${rounds.length}\n`;
evidenceText += `- Total searches executed: ${totalSearches}\n`;
evidenceText += `- Total pages analyzed: ${totalPages}\n`;
evidenceText += `- Key findings extracted: ${allFindings.length}\n\n`;
for (const [angle, findings] of Array.from(evidenceByAngle)) {
if (findings.length === 0) continue;
evidenceText += `## Angle: ${angle}\n\n`;
for (const finding of findings) {
evidenceText += `### ${finding.title}\n`;
evidenceText += `**Confidence:** ${finding.confidence}\n`;
evidenceText += `${finding.summary}\n\n`;
if (finding.keyQuotes.length > 0) {
evidenceText += `> ${finding.keyQuotes[0]}\n\n`;
}
if (finding.sources.length > 0) {
evidenceText += `Sources: ${finding.sources.map((s: string) => `[${s}](${s})`).join(", ")}\n\n`;
}
}
}
// Also include raw search context for depth
evidenceText += `## Raw Search Context\n\n`;
for (const round of rounds) {
evidenceText += `### Round ${round.round}\n`;
for (const q of round.queries) {
evidenceText += `- **"${q.query}"** (${q.angle}) — ${q.rationale}\n`;
}
evidenceText += `\n`;
}
const taskPrompt = `Synthesize the following research findings into a comprehensive, well-structured report.
${evidenceText}
Write a thorough report that answers the original question: "${question}"
Format: ${config.format === "structured" ? "Structured report with numbered sections, clear hierarchies, and data tables where appropriate." : "Well-formatted markdown report with ## headings, bullet points, and inline citations."}`;
const result = await runAnalysisAgent(
SYNTHESIS_SYSTEM,
taskPrompt,
cwd,
120_000,
undefined,
signal,
);
if (result.success && result.text) {
return result.text;
}
// Fallback: generate a simple report from the evidence
return generateFallbackReport(question, rounds);
}
/**
* Fallback report when the LLM synthesis fails.
*/
function generateFallbackReport(
question: string,
rounds: ResearchRound[],
): string {
const lines: string[] = [];
lines.push(`# Research Report: ${question}`);
lines.push("");
lines.push("## Executive Summary");
lines.push("");
lines.push(
`This report summarizes findings from ${rounds.length} research round(s) exploring the question above.`,
);
lines.push("");
const allFindings = rounds.flatMap((r) => r.findings);
if (allFindings.length > 0) {
lines.push("## Key Findings");
lines.push("");
for (const finding of allFindings) {
lines.push(`### ${finding.title}`);
lines.push(`*Confidence: ${finding.confidence}*`);
lines.push("");
lines.push(finding.summary);
lines.push("");
if (finding.keyQuotes.length > 0) {
lines.push(`> ${finding.keyQuotes[0]}`);
lines.push("");
}
if (finding.sources.length > 0) {
lines.push("Sources:");
for (const src of finding.sources) {
lines.push(`- [${src}](${src})`);
}
lines.push("");
}
}
}
lines.push("## Search Methodology");
lines.push("");
for (const round of rounds) {
lines.push(`### Round ${round.round}`);
lines.push(
`Queries: ${round.queries.map((q) => `"${q.query}"`).join(", ")}`,
);
lines.push(`Pages scraped: ${round.results.length}`);
lines.push(`Findings: ${round.findings.length}`);
lines.push("");
}
return lines.join("\n");
}

254
src/research.ts Normal file
View File

@@ -0,0 +1,254 @@
/**
* Deep Research — Core research orchestration
*
* Manages the multi-round deep research process:
* 1. Generate initial search queries
* 2. Execute all queries in parallel via Firecrawl
* 3. Analyze results and extract findings
* 4. Generate follow-up queries
* 5. Iterate for depth rounds
* 6. Synthesize final report
*
* Widget and progress callback patterns borrowed from ralpi's executor.
*/
import type { ExtensionContext } from "@earendil-works/pi-coding-agent";
import type {
ResearchConfig,
SearchResult,
ResearchRound,
ResearchReport,
} from "./types";
import { searchWeb } from "./firecrawl";
import {
generateQueries,
generateFollowUpQueries,
analyzeResults,
} from "./queries";
import { synthesizeReport } from "./report";
/** Progress callback for UI updates */
export type ResearchProgress = (update: {
phase:
| "generating_queries"
| "searching"
| "analyzing"
| "synthesizing"
| "complete";
round?: number;
totalRounds?: number;
message: string;
detail?: string;
fraction?: number; // 0-1
}) => void;
/**
* Run a complete deep research session.
*/
export async function runDeepResearch(
config: ResearchConfig,
ctx: ExtensionContext,
onProgress: ResearchProgress,
signal?: AbortSignal,
): Promise<ResearchReport> {
const startTime = Date.now();
const rounds: ResearchRound[] = [];
let totalSearches = 0;
let totalPages = 0;
// ── Round 1: Generate initial queries ──────────────────────────────
onProgress({
phase: "generating_queries",
round: 1,
totalRounds: config.depth,
message: "Generating initial search queries...",
fraction: 0,
});
if (signal?.aborted) throw new Error("Research cancelled");
const queries = await generateQueries(
config.question,
config.breadth,
ctx.cwd,
signal,
);
if (queries.length === 0) {
throw new Error("Failed to generate any search queries");
}
// ── Execute rounds ─────────────────────────────────────────────────
for (let round = 1; round <= config.depth; round++) {
if (signal?.aborted) throw new Error("Research cancelled");
const isFirstRound = round === 1;
const currentQueries = isFirstRound
? queries
: await generateFollowUpQueries(
config.question,
rounds,
config.breadth,
ctx.cwd,
signal,
);
if (!currentQueries || currentQueries.length === 0) {
// No follow-up queries to generate — stop here
break;
}
// ── Search phase ──────────────────────────────────────────────────
onProgress({
phase: "searching",
round,
totalRounds: config.depth,
message: `Searching with ${currentQueries.length} queries...`,
fraction: 0.25,
});
const searchResults: SearchResult[] = [];
for (let i = 0; i < currentQueries.length; i++) {
if (signal?.aborted) throw new Error("Research cancelled");
const q = currentQueries[i];
onProgress({
phase: "searching",
round,
totalRounds: config.depth,
message: `Searching: "${q.query.slice(0, 60)}..."`,
detail: q.rationale,
fraction: 0.25 + (i / currentQueries.length) * 0.25,
});
try {
const results = await searchWeb(q.query, 5, signal);
searchResults.push(...results);
} catch (error) {
// Individual search failure shouldn't crash the whole round
const errorMsg = error instanceof Error ? error.message : String(error);
onProgress({
phase: "searching",
round,
totalRounds: config.depth,
message: `Search failed: ${errorMsg.slice(0, 80)}`,
fraction: 0.25 + ((i + 1) / currentQueries.length) * 0.25,
});
}
// Small delay between searches to avoid rate limits
if (i < currentQueries.length - 1) {
await new Promise((r) => setTimeout(r, 300));
}
}
totalSearches += currentQueries.length;
// Deduplicate results by URL
const seen = new Set<string>();
const uniqueResults = searchResults.filter((r) => {
if (seen.has(r.url)) return false;
seen.add(r.url);
return true;
});
totalPages += uniqueResults.length;
// ── Analyze phase ──────────────────────────────────────────────────
onProgress({
phase: "analyzing",
round,
totalRounds: config.depth,
message: `Analyzing ${uniqueResults.length} search results...`,
fraction: 0.6,
});
// Analyze results per query group
const allFindings: ResearchRound["findings"] = [];
for (let i = 0; i < currentQueries.length; i++) {
if (signal?.aborted) throw new Error("Research cancelled");
const q = currentQueries[i];
// Find results that match this query (loosely: take a portion of results)
const resultsPerQuery = Math.ceil(
uniqueResults.length / currentQueries.length,
);
const startIdx = i * resultsPerQuery;
const endIdx = Math.min(startIdx + resultsPerQuery, uniqueResults.length);
const queryResults = uniqueResults.slice(startIdx, endIdx);
if (queryResults.length === 0) continue;
onProgress({
phase: "analyzing",
round,
totalRounds: config.depth,
message: `Analyzing results for "${q.query.slice(0, 40)}..."`,
fraction: 0.6 + (i / currentQueries.length) * 0.2,
});
try {
const findings = await analyzeResults(
q.query,
queryResults,
ctx.cwd,
signal,
);
allFindings.push(...findings);
} catch {
// Analysis failure shouldn't crash the round
}
}
// Record this round
rounds.push({
round,
queries: currentQueries,
results: uniqueResults,
findings: allFindings,
followUpTopics: allFindings
.filter((f) => f.confidence === "low")
.map((f) => f.title),
});
}
// ── Synthesis phase ─────────────────────────────────────────────────
onProgress({
phase: "synthesizing",
message: "Synthesizing research into final report...",
fraction: 0.9,
});
if (signal?.aborted) throw new Error("Research cancelled");
const finalReport = await synthesizeReport(
config.question,
rounds,
config,
ctx.cwd,
signal,
);
const durationMs = Date.now() - startTime;
onProgress({
phase: "complete",
message: "Research complete!",
fraction: 1.0,
});
return {
question: config.question,
rounds,
finalReport,
totalSearches,
totalPagesScraped: totalPages,
durationMs,
};
}

55
src/types.ts Normal file
View File

@@ -0,0 +1,55 @@
/**
* Deep Research — type definitions
*/
/** A single search result from Firecrawl */
export interface SearchResult {
title: string;
url: string;
description: string;
markdown: string;
}
/** A finding extracted from search results by an analysis agent */
export interface Finding {
title: string;
summary: string;
sources: string[];
keyQuotes: string[];
confidence: "high" | "medium" | "low";
}
/** A generated search query with its intent/rationale */
export interface SearchQuery {
query: string;
rationale: string;
angle: string;
}
/** Output from one research round */
export interface ResearchRound {
round: number;
queries: SearchQuery[];
results: SearchResult[];
findings: Finding[];
/** Any follow-up questions/angles the analysis suggests */
followUpTopics: string[];
}
/** Configuration for a research session */
export interface ResearchConfig {
question: string;
depth: number; // 1-3 rounds
breadth: number; // queries per round (1-5)
format: "markdown" | "structured";
}
/** Final research report */
export interface ResearchReport {
question: string;
rounds: ResearchRound[];
finalReport: string;
totalSearches: number;
totalPagesScraped: number;
durationMs: number;
}

16
tsconfig.json Normal file
View File

@@ -0,0 +1,16 @@
{
"compilerOptions": {
"target": "ES2022",
"module": "ES2022",
"moduleResolution": "node",
"lib": ["ES2022"],
"noEmit": true,
"strict": true,
"esModuleInterop": true,
"skipLibCheck": true,
"forceConsistentCasingInFileNames": true,
"resolveJsonModule": true
},
"include": ["index.ts", "src/**/*"],
"exclude": ["node_modules", "dist"]
}