Initial commit: deep-research extension
This commit is contained in:
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
node_modules/
|
||||
dist/
|
||||
*.js
|
||||
*.js.map
|
||||
1
.pi-lens/cache/review-graph.json
vendored
Normal file
1
.pi-lens/cache/review-graph.json
vendored
Normal file
File diff suppressed because one or more lines are too long
521
index.ts
Normal file
521
index.ts
Normal file
@@ -0,0 +1,521 @@
|
||||
/**
|
||||
* deep-research — Multi-round deep web research powered by Firecrawl
|
||||
*
|
||||
* Registers:
|
||||
* - `deep_research` tool — callable by the LLM to conduct deep research
|
||||
* - `/deep-research` command — interactive session invocation
|
||||
*
|
||||
* Architecture:
|
||||
* Each research round generates queries, searches in parallel via
|
||||
* Firecrawl, analyzes results with agent sessions, then generates
|
||||
* follow-up queries. A final synthesis step produces the report.
|
||||
*
|
||||
* Patterns borrowed from:
|
||||
* - firecrawl.ts extension (direct Firecrawl HTTP calls)
|
||||
* - ralpi executor (agent sessions, widget updates, progress UX)
|
||||
* - subagent extension (structured tool rendering)
|
||||
*/
|
||||
import type {
|
||||
ExtensionAPI,
|
||||
ExtensionCommandContext,
|
||||
ExtensionContext,
|
||||
} from "@earendil-works/pi-coding-agent";
|
||||
import { Type } from "typebox";
|
||||
import { Box, Text } from "@earendil-works/pi-tui";
|
||||
import { runDeepResearch, type ResearchProgress } from "./src/research";
|
||||
import { isFirecrawlReachable } from "./src/firecrawl";
|
||||
import type { ResearchConfig, ResearchReport } from "./src/types";
|
||||
|
||||
/* ── Constants ────────────────────────────────────────────────────── */
|
||||
|
||||
const SPINNER_FRAMES = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"];
|
||||
const PHASE_ICONS: Record<string, string> = {
|
||||
generating_queries: "🔍",
|
||||
searching: "🌐",
|
||||
analyzing: "📊",
|
||||
synthesizing: "📝",
|
||||
complete: "✅",
|
||||
};
|
||||
|
||||
/* ── Helpers ──────────────────────────────────────────────────────── */
|
||||
|
||||
function formatDuration(ms: number): string {
|
||||
const seconds = Math.floor(ms / 1000);
|
||||
const minutes = Math.floor(seconds / 60);
|
||||
if (minutes > 0) return `${minutes}m ${seconds % 60}s`;
|
||||
return `${seconds}s`;
|
||||
}
|
||||
|
||||
function truncate(s: string, max: number): string {
|
||||
if (s.length <= max) return s;
|
||||
return s.slice(0, max - 3) + "...";
|
||||
}
|
||||
|
||||
/* ── Tool Definition ──────────────────────────────────────────────── */
|
||||
|
||||
const DeepResearchParams = Type.Object({
|
||||
question: Type.String({
|
||||
description: "The research question to investigate",
|
||||
}),
|
||||
depth: Type.Optional(
|
||||
Type.Integer({
|
||||
description:
|
||||
"Number of research rounds (1-3). Each round uses findings from the previous to generate deeper follow-up queries. Default: 2",
|
||||
minimum: 1,
|
||||
maximum: 3,
|
||||
default: 2,
|
||||
}),
|
||||
),
|
||||
breadth: Type.Optional(
|
||||
Type.Integer({
|
||||
description:
|
||||
"Number of search queries per round (1-5). More queries = broader coverage. Default: 3",
|
||||
minimum: 1,
|
||||
maximum: 5,
|
||||
default: 3,
|
||||
}),
|
||||
),
|
||||
format: Type.Optional(
|
||||
Type.Union([Type.Literal("markdown"), Type.Literal("structured")], {
|
||||
description:
|
||||
'Output format for the research report. "markdown" for prose, "structured" for detailed sections. Default: "markdown"',
|
||||
default: "markdown",
|
||||
}),
|
||||
),
|
||||
details: Type.Optional(
|
||||
Type.Object({
|
||||
showRoundDetails: Type.Optional(
|
||||
Type.Boolean({
|
||||
description:
|
||||
"Include per-round search details in the output. Default: false",
|
||||
}),
|
||||
),
|
||||
}),
|
||||
),
|
||||
});
|
||||
|
||||
interface ResearchDetails {
|
||||
rounds: Array<{
|
||||
round: number;
|
||||
queries: string[];
|
||||
findingsCount: number;
|
||||
resultsCount: number;
|
||||
}>;
|
||||
totalSearches: number;
|
||||
totalPagesScraped: number;
|
||||
durationMs: number;
|
||||
}
|
||||
|
||||
/* ── Extension Entry ───────────────────────────────────────────────── */
|
||||
|
||||
export default function (pi: ExtensionAPI) {
|
||||
pi.registerTool({
|
||||
name: "deep_research",
|
||||
label: "Deep Research",
|
||||
description: [
|
||||
"Conduct multi-round deep web research on any topic using Firecrawl.",
|
||||
"Generates diverse search queries, searches the web in parallel, analyzes results, and produces a comprehensive report.",
|
||||
"Supports iterative refinement: each round builds on findings from the previous one.",
|
||||
"Parameters: question (required), depth (1-3, default 2), breadth (1-5, default 3), format (markdown|structured).",
|
||||
].join(" "),
|
||||
promptSnippet:
|
||||
"deep_research — multi-round deep web research via Firecrawl with iterative query refinement",
|
||||
promptGuidelines: [
|
||||
"Use deep_research for complex, multi-faceted questions that benefit from multiple search angles and iterative refinement.",
|
||||
"The tool handles query generation, web search, result analysis, and report synthesis automatically.",
|
||||
"For simple fact-finding questions, use firecrawl_search directly instead.",
|
||||
],
|
||||
parameters: DeepResearchParams,
|
||||
|
||||
async execute(
|
||||
_toolCallId: string,
|
||||
params: {
|
||||
question: string;
|
||||
depth?: number;
|
||||
breadth?: number;
|
||||
format?: "markdown" | "structured";
|
||||
details?: { showRoundDetails?: boolean };
|
||||
},
|
||||
signal: AbortSignal | undefined,
|
||||
onUpdate: ((partial: any) => void) | undefined,
|
||||
ctx: any,
|
||||
) {
|
||||
const config: ResearchConfig = {
|
||||
question: params.question,
|
||||
depth: params.depth ?? 2,
|
||||
breadth: params.breadth ?? 3,
|
||||
format: params.format ?? "markdown",
|
||||
};
|
||||
|
||||
// Use provided signals
|
||||
const abortSignal = signal;
|
||||
|
||||
// Wire progress updates to both the widget and onUpdate
|
||||
let spinnerIdx = 0;
|
||||
const spinnerTimer = setInterval(() => {
|
||||
spinnerIdx = (spinnerIdx + 1) % SPINNER_FRAMES.length;
|
||||
}, 100);
|
||||
|
||||
let researchResult: ResearchReport | null = null;
|
||||
let lastError: string | null = null;
|
||||
|
||||
const onProgress: ResearchProgress = (update) => {
|
||||
const icon = PHASE_ICONS[update.phase] ?? "";
|
||||
const spinner = SPINNER_FRAMES[spinnerIdx];
|
||||
const roundInfo =
|
||||
update.round && update.totalRounds
|
||||
? ` Round ${update.round}/${update.totalRounds}`
|
||||
: "";
|
||||
|
||||
// Update widget
|
||||
const lines: string[] = [
|
||||
`${spinner} ${icon} ${truncate(update.message, 80)}${roundInfo}`,
|
||||
];
|
||||
if (update.detail) {
|
||||
lines.push(` ${truncate(update.detail, 76)}`);
|
||||
}
|
||||
if (update.fraction !== undefined) {
|
||||
const barLen = 15;
|
||||
const filled = Math.round(barLen * update.fraction);
|
||||
const bar = "█".repeat(filled) + "░".repeat(barLen - filled);
|
||||
lines.push(` ${bar}`);
|
||||
}
|
||||
ctx.ui.setWidget("deep-research", lines);
|
||||
|
||||
// Stream partial results via onUpdate
|
||||
if (onUpdate) {
|
||||
const partialText = lines.join("\n");
|
||||
onUpdate({
|
||||
content: [{ type: "text", text: partialText }],
|
||||
details: {
|
||||
phase: update.phase,
|
||||
round: update.round,
|
||||
message: update.message,
|
||||
fraction: update.fraction,
|
||||
},
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
try {
|
||||
// Initial status
|
||||
ctx.ui.setStatus(
|
||||
"deep-research",
|
||||
`🌐 Researching: ${truncate(config.question, 40)}`,
|
||||
);
|
||||
|
||||
onProgress({
|
||||
phase: "generating_queries",
|
||||
message: "Starting deep research...",
|
||||
fraction: 0,
|
||||
});
|
||||
|
||||
researchResult = await runDeepResearch(
|
||||
config,
|
||||
ctx,
|
||||
onProgress,
|
||||
abortSignal,
|
||||
);
|
||||
|
||||
// ── Build the tool result ──────────────────────────────────
|
||||
|
||||
const details: ResearchDetails = {
|
||||
rounds: researchResult.rounds.map((r) => ({
|
||||
round: r.round,
|
||||
queries: r.queries.map((q) => q.query),
|
||||
findingsCount: r.findings.length,
|
||||
resultsCount: r.results.length,
|
||||
})),
|
||||
totalSearches: researchResult.totalSearches,
|
||||
totalPagesScraped: researchResult.totalPagesScraped,
|
||||
durationMs: researchResult.durationMs,
|
||||
};
|
||||
|
||||
const showRoundDetails = params.details?.showRoundDetails ?? false;
|
||||
|
||||
let output = researchResult.finalReport;
|
||||
if (showRoundDetails) {
|
||||
output += `\n\n---\n\n## Research Methodology\n\n`;
|
||||
for (const round of researchResult.rounds) {
|
||||
output += `### Round ${round.round}\n\n`;
|
||||
output += `**Queries:**\n`;
|
||||
for (const q of round.queries) {
|
||||
output += `- "${q.query}" (${q.angle}) — ${q.rationale}\n`;
|
||||
}
|
||||
output += `\n**Results scraped:** ${round.results.length}\n`;
|
||||
output += `**Findings extracted:** ${round.findings.length}\n\n`;
|
||||
}
|
||||
output += `**Total searches:** ${researchResult.totalSearches}\n`;
|
||||
output += `**Total pages scraped:** ${researchResult.totalPagesScraped}\n`;
|
||||
output += `**Duration:** ${formatDuration(researchResult.durationMs)}\n`;
|
||||
}
|
||||
|
||||
// Clean up widget
|
||||
clearInterval(spinnerTimer);
|
||||
ctx.ui.setWidget("deep-research", undefined);
|
||||
ctx.ui.setStatus("deep-research", undefined);
|
||||
|
||||
return {
|
||||
content: [{ type: "text", text: output }],
|
||||
details,
|
||||
};
|
||||
} catch (error) {
|
||||
clearInterval(spinnerTimer);
|
||||
ctx.ui.setWidget("deep-research", undefined);
|
||||
ctx.ui.setStatus("deep-research", undefined);
|
||||
|
||||
lastError = error instanceof Error ? error.message : String(error);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Research failed: ${lastError}`,
|
||||
},
|
||||
],
|
||||
details: {
|
||||
error: lastError,
|
||||
phase: researchResult
|
||||
? `completed ${researchResult.rounds.length} rounds`
|
||||
: "preparation",
|
||||
},
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
},
|
||||
|
||||
// ── TUI: Render the tool call (collapsed view) ──────────────────
|
||||
|
||||
renderCall(
|
||||
args: {
|
||||
question: string;
|
||||
depth?: number;
|
||||
breadth?: number;
|
||||
format?: string;
|
||||
},
|
||||
theme: any,
|
||||
_context: any,
|
||||
) {
|
||||
const question = truncate(args.question ?? "?", 70);
|
||||
const depth = args.depth ?? 2;
|
||||
const breadth = args.breadth ?? 3;
|
||||
const format = args.format ?? "markdown";
|
||||
|
||||
const text =
|
||||
theme.fg("toolTitle", theme.bold("deep_research ")) +
|
||||
theme.fg("accent", `"${question}"`) +
|
||||
theme.fg("muted", ` [depth:${depth} breadth:${breadth} ${format}]`);
|
||||
return new Text(text, 0, 0);
|
||||
},
|
||||
|
||||
// ── TUI: Render the tool result (expanded/collapsed) ─────────────
|
||||
|
||||
renderResult(
|
||||
result: any,
|
||||
{ expanded }: { expanded: boolean },
|
||||
theme: any,
|
||||
_context: any,
|
||||
) {
|
||||
const details = result.details as ResearchDetails | undefined;
|
||||
|
||||
if (!details) {
|
||||
const text = result.content?.[0]?.text ?? "(no output)";
|
||||
return new Text(text, 0, 0);
|
||||
}
|
||||
|
||||
const container = new Box();
|
||||
|
||||
// ── Collapsed view ────────────────────────────────────────────
|
||||
|
||||
if (!expanded) {
|
||||
const totalRounds = details.rounds.length;
|
||||
const totalFindings = details.rounds.reduce(
|
||||
(s, r) => s + r.findingsCount,
|
||||
0,
|
||||
);
|
||||
const duration = formatDuration(details.durationMs);
|
||||
|
||||
let text = "";
|
||||
text +=
|
||||
theme.fg("success", "✓ ") +
|
||||
theme.fg("toolTitle", theme.bold("deep research"));
|
||||
text += theme.fg(
|
||||
"muted",
|
||||
` — ${totalRounds} rounds, ${totalFindings} findings`,
|
||||
);
|
||||
text += theme.fg("dim", ` (${duration})`);
|
||||
text += "\n";
|
||||
|
||||
for (const round of details.rounds) {
|
||||
const icon =
|
||||
round.findingsCount > 0
|
||||
? theme.fg("success", "✓")
|
||||
: theme.fg("muted", "·");
|
||||
text += ` ${icon} ${theme.fg("accent", `Round ${round.round}:`)} `;
|
||||
text += theme.fg(
|
||||
"dim",
|
||||
`${round.queries.length} queries, ${round.resultsCount} pages, ${round.findingsCount} findings`,
|
||||
);
|
||||
text += "\n";
|
||||
}
|
||||
|
||||
text += theme.fg("muted", "(Ctrl+O to expand)");
|
||||
container.addChild(new Text(text, 0, 0));
|
||||
return container;
|
||||
}
|
||||
|
||||
// ── Expanded view ─────────────────────────────────────────────
|
||||
|
||||
const headerText =
|
||||
theme.fg("toolTitle", theme.bold("Deep Research Results")) +
|
||||
"\n" +
|
||||
theme.fg("dim", `Duration: ${formatDuration(details.durationMs)} | `) +
|
||||
theme.fg("dim", `Searches: ${details.totalSearches} | `) +
|
||||
theme.fg("dim", `Pages scraped: ${details.totalPagesScraped}`);
|
||||
container.addChild(new Text(headerText, 0, 0));
|
||||
|
||||
for (const round of details.rounds) {
|
||||
container.addChild(new Text("", 0, 0)); // Spacer
|
||||
const roundHeader = `Round ${round.round}`;
|
||||
container.addChild(
|
||||
new Text(theme.fg("toolTitle", theme.bold(roundHeader)), 0, 0),
|
||||
);
|
||||
container.addChild(
|
||||
new Text(
|
||||
theme.fg(
|
||||
"dim",
|
||||
`${round.queries.length} queries → ${round.resultsCount} pages → ${round.findingsCount} findings`,
|
||||
),
|
||||
0,
|
||||
0,
|
||||
),
|
||||
);
|
||||
for (const q of round.queries) {
|
||||
container.addChild(
|
||||
new Text(
|
||||
theme.fg("muted", " · ") + theme.fg("accent", truncate(q, 70)),
|
||||
0,
|
||||
0,
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return container;
|
||||
},
|
||||
});
|
||||
|
||||
// ── Command ───────────────────────────────────────────────────────
|
||||
|
||||
pi.registerCommand("deep-research", {
|
||||
description:
|
||||
"Conduct multi-round deep web research on any topic via Firecrawl. Usage: /deep-research <question>",
|
||||
handler: async (args: string, ctx: ExtensionCommandContext) => {
|
||||
if (!args || args.trim().length === 0) {
|
||||
ctx.ui.notify(
|
||||
"Usage: /deep-research <your research question>",
|
||||
"error",
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// Ask about depth/breadth
|
||||
const depthStr = await ctx.ui.select("Research depth?", [
|
||||
"1 round (quick survey)",
|
||||
"2 rounds (standard)",
|
||||
"3 rounds (deep dive)",
|
||||
]);
|
||||
const depth = depthStr?.startsWith("1")
|
||||
? 1
|
||||
: depthStr?.startsWith("3")
|
||||
? 3
|
||||
: 2;
|
||||
|
||||
const breadthStr = await ctx.ui.select("Research breadth?", [
|
||||
"1 query/round (narrow)",
|
||||
"3 queries/round (balanced)",
|
||||
"5 queries/round (broad)",
|
||||
]);
|
||||
const breadth = breadthStr?.startsWith("1")
|
||||
? 1
|
||||
: breadthStr?.startsWith("5")
|
||||
? 5
|
||||
: 3;
|
||||
|
||||
// Create a promise-based interaction
|
||||
ctx.ui.setStatus(
|
||||
"deep-research",
|
||||
`🌐 Researching: ${truncate(args, 40)}`,
|
||||
);
|
||||
|
||||
const config: ResearchConfig = {
|
||||
question: args,
|
||||
depth,
|
||||
breadth,
|
||||
format: "markdown",
|
||||
};
|
||||
|
||||
let spinnerIdx = 0;
|
||||
const spinnerTimer = setInterval(() => {
|
||||
spinnerIdx = (spinnerIdx + 1) % SPINNER_FRAMES.length;
|
||||
}, 100);
|
||||
|
||||
try {
|
||||
const onProgress: ResearchProgress = (update) => {
|
||||
const icon = PHASE_ICONS[update.phase] ?? "";
|
||||
const spinner = SPINNER_FRAMES[spinnerIdx];
|
||||
const lines: string[] = [
|
||||
`${spinner} ${icon} ${truncate(update.message, 80)}`,
|
||||
];
|
||||
if (update.detail) {
|
||||
lines.push(` ${truncate(update.detail, 76)}`);
|
||||
}
|
||||
if (update.fraction !== undefined) {
|
||||
const barLen = 15;
|
||||
const filled = Math.round(barLen * update.fraction);
|
||||
const bar = "█".repeat(filled) + "░".repeat(barLen - filled);
|
||||
lines.push(` ${bar}`);
|
||||
}
|
||||
ctx.ui.setWidget("deep-research", lines);
|
||||
};
|
||||
|
||||
const report = await runDeepResearch(config, ctx, onProgress);
|
||||
|
||||
clearInterval(spinnerTimer);
|
||||
ctx.ui.setWidget("deep-research", undefined);
|
||||
ctx.ui.setStatus("deep-research", undefined);
|
||||
|
||||
// Show notification
|
||||
ctx.ui.notify(
|
||||
`Research complete: ${report.rounds.length} rounds, ${report.totalSearches} searches, ${report.totalPagesScraped} pages in ${formatDuration(report.durationMs)}`,
|
||||
"info",
|
||||
);
|
||||
|
||||
// Send the report as a user message
|
||||
pi.sendUserMessage(
|
||||
`## Deep Research: ${args}\n\n${report.finalReport}\n\n---\n*${report.rounds.length} rounds · ${report.totalSearches} searches · ${report.totalPagesScraped} pages · ${formatDuration(report.durationMs)}*`,
|
||||
);
|
||||
} catch (error) {
|
||||
clearInterval(spinnerTimer);
|
||||
ctx.ui.setWidget("deep-research", undefined);
|
||||
ctx.ui.setStatus("deep-research", undefined);
|
||||
ctx.ui.notify(
|
||||
`Research failed: error instanceof Error ? error.message : String(error)`,
|
||||
"error",
|
||||
);
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
// ── Startup check ─────────────────────────────────────────────────
|
||||
|
||||
pi.on("session_start", async (_event: unknown, ctx: ExtensionContext) => {
|
||||
const reachable = await isFirecrawlReachable();
|
||||
if (!reachable) {
|
||||
ctx.ui.notify(
|
||||
"Deep Research: Firecrawl endpoint unreachable — searches will fail. Check FIRECRAWL_BASE_URL in settings.json or env.",
|
||||
"warning",
|
||||
);
|
||||
}
|
||||
});
|
||||
}
|
||||
47
package-lock.json
generated
Normal file
47
package-lock.json
generated
Normal file
@@ -0,0 +1,47 @@
|
||||
{
|
||||
"name": "deep-research",
|
||||
"version": "1.0.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "deep-research",
|
||||
"version": "1.0.0",
|
||||
"devDependencies": {
|
||||
"@types/node": "^20.0.0",
|
||||
"typescript": "^5.3.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/node": {
|
||||
"version": "20.19.41",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.41.tgz",
|
||||
"integrity": "sha512-ECymXOukMnOoVkC2bb1Vc/w/836DXncOg5m8Xj1RH7xSHZJWNYY6Zh7EH477vcnD5egKNNfy2RpNOmuChhFPgQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"undici-types": "~6.21.0"
|
||||
}
|
||||
},
|
||||
"node_modules/typescript": {
|
||||
"version": "5.9.3",
|
||||
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz",
|
||||
"integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"bin": {
|
||||
"tsc": "bin/tsc",
|
||||
"tsserver": "bin/tsserver"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=14.17"
|
||||
}
|
||||
},
|
||||
"node_modules/undici-types": {
|
||||
"version": "6.21.0",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
|
||||
"integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
}
|
||||
}
|
||||
}
|
||||
15
package.json
Normal file
15
package.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"name": "deep-research",
|
||||
"version": "1.0.0",
|
||||
"description": "Deep research extension for pi — parallel web research via Firecrawl with iterative query refinement",
|
||||
"private": true,
|
||||
"pi": {
|
||||
"extensions": [
|
||||
"./index.ts"
|
||||
]
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^20.0.0",
|
||||
"typescript": "^5.3.0"
|
||||
}
|
||||
}
|
||||
155
src/agent.ts
Normal file
155
src/agent.ts
Normal file
@@ -0,0 +1,155 @@
|
||||
/**
|
||||
* Deep Research — Agent Session helper
|
||||
*
|
||||
* Uses pi's in-process `createAgentSession` for LLM subtasks
|
||||
* (query generation, result analysis, report synthesis).
|
||||
* Pattern borrowed from ralpi's runAgentSession().
|
||||
*/
|
||||
import {
|
||||
createAgentSession,
|
||||
DefaultResourceLoader,
|
||||
getAgentDir,
|
||||
SessionManager,
|
||||
} from "@earendil-works/pi-coding-agent";
|
||||
import type { AgentSessionEvent } from "@earendil-works/pi-coding-agent";
|
||||
|
||||
/** Aggregate tool usage stats */
|
||||
export interface ToolUsage {
|
||||
read: number;
|
||||
write: number;
|
||||
edit: number;
|
||||
bash: number;
|
||||
other: number;
|
||||
}
|
||||
|
||||
export interface AgentResult {
|
||||
success: boolean;
|
||||
text: string;
|
||||
error?: string;
|
||||
toolUsage: ToolUsage;
|
||||
}
|
||||
|
||||
/**
|
||||
* Run a prompt through an in-process Pi agent session.
|
||||
* Non-blocking — the event loop stays responsive.
|
||||
*/
|
||||
export async function runAnalysisAgent(
|
||||
systemPrompt: string,
|
||||
taskPrompt: string,
|
||||
cwd: string,
|
||||
timeoutMs: number = 120_000,
|
||||
onEvent?: (event: AgentSessionEvent) => void,
|
||||
signal?: AbortSignal,
|
||||
): Promise<AgentResult> {
|
||||
const toolUsage: ToolUsage = {
|
||||
read: 0,
|
||||
write: 0,
|
||||
edit: 0,
|
||||
bash: 0,
|
||||
other: 0,
|
||||
};
|
||||
|
||||
let timeoutHandle: ReturnType<typeof setTimeout> | null = null;
|
||||
if (timeoutMs > 0) {
|
||||
timeoutHandle = setTimeout(() => {
|
||||
sessionRef.session?.agent.abort();
|
||||
}, timeoutMs);
|
||||
}
|
||||
|
||||
const sessionRef: {
|
||||
session?: Awaited<ReturnType<typeof createAgentSession>>["session"];
|
||||
} = {};
|
||||
|
||||
try {
|
||||
const loader = new DefaultResourceLoader({
|
||||
cwd,
|
||||
agentDir: getAgentDir(),
|
||||
noExtensions: true,
|
||||
noSkills: true,
|
||||
noPromptTemplates: true,
|
||||
noThemes: true,
|
||||
noContextFiles: true,
|
||||
});
|
||||
await loader.reload();
|
||||
|
||||
const result = await createAgentSession({
|
||||
cwd,
|
||||
sessionManager: SessionManager.inMemory(),
|
||||
resourceLoader: loader,
|
||||
tools: ["read", "grep", "find", "ls"],
|
||||
systemPrompt,
|
||||
});
|
||||
sessionRef.session = result.session;
|
||||
|
||||
const abortHandler = () => result.session.agent.abort();
|
||||
signal?.addEventListener("abort", abortHandler, { once: true });
|
||||
|
||||
let finalText = "";
|
||||
let errorMessage: string | undefined;
|
||||
|
||||
const unsubscribe = result.session.subscribe((event: AgentSessionEvent) => {
|
||||
onEvent?.(event);
|
||||
|
||||
if (event.type === "message_end") {
|
||||
const message = event.message as {
|
||||
role?: string;
|
||||
content?: unknown;
|
||||
errorMessage?: string;
|
||||
};
|
||||
if (message.role !== "assistant") return;
|
||||
if (message.errorMessage) errorMessage = message.errorMessage;
|
||||
const text = extractAssistantText(message.content);
|
||||
if (text) finalText = text;
|
||||
}
|
||||
|
||||
if (event.type === "tool_execution_start") {
|
||||
const name = event.toolName;
|
||||
if (name in toolUsage) {
|
||||
(toolUsage as unknown as Record<string, number>)[name]++;
|
||||
} else {
|
||||
toolUsage.other++;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
if (signal?.aborted) throw new Error("Aborted");
|
||||
|
||||
await result.session.prompt(taskPrompt);
|
||||
await result.session.agent.waitForIdle();
|
||||
|
||||
unsubscribe();
|
||||
result.session.dispose();
|
||||
signal?.removeEventListener("abort", abortHandler);
|
||||
if (timeoutHandle) clearTimeout(timeoutHandle);
|
||||
|
||||
if (errorMessage && !finalText) {
|
||||
return { success: false, text: "", error: errorMessage, toolUsage };
|
||||
}
|
||||
|
||||
return { success: true, text: finalText.trim(), toolUsage };
|
||||
} catch (error) {
|
||||
if (timeoutHandle) clearTimeout(timeoutHandle);
|
||||
return {
|
||||
success: false,
|
||||
text: "",
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
toolUsage,
|
||||
};
|
||||
} finally {
|
||||
sessionRef.session?.dispose();
|
||||
}
|
||||
}
|
||||
|
||||
function extractAssistantText(content: unknown): string {
|
||||
if (typeof content === "string") return content;
|
||||
if (!Array.isArray(content)) return "";
|
||||
return content
|
||||
.filter(
|
||||
(c): c is { type: string; text?: string } =>
|
||||
!!c &&
|
||||
typeof c === "object" &&
|
||||
(c as { type?: string }).type === "text",
|
||||
)
|
||||
.map((c) => (c as { text?: string }).text ?? "")
|
||||
.join("");
|
||||
}
|
||||
159
src/firecrawl.ts
Normal file
159
src/firecrawl.ts
Normal file
@@ -0,0 +1,159 @@
|
||||
/**
|
||||
* Deep Research — direct Firecrawl HTTP client
|
||||
*
|
||||
* Calls the self-hosted Firecrawl API directly (same approach as the
|
||||
* firecrawl.ts extension)
|
||||
*/
|
||||
import * as fs from "node:fs";
|
||||
import * as path from "node:path";
|
||||
import * as os from "node:os";
|
||||
import type { SearchResult } from "./types";
|
||||
|
||||
/* ── Config ──────────────────────────────────────────────────────── */
|
||||
|
||||
function loadFirecrawlConfig() {
|
||||
const settingsPath = path.join(os.homedir(), ".pi", "agent", "settings.json");
|
||||
try {
|
||||
const settings = JSON.parse(fs.readFileSync(settingsPath, "utf-8"));
|
||||
const fc = settings.firecrawl ?? {};
|
||||
return {
|
||||
baseUrl: (
|
||||
fc.baseUrl ??
|
||||
process.env.FIRECRAWL_BASE_URL ??
|
||||
"http://localhost:3002"
|
||||
).replace(/\/+$/, ""),
|
||||
apiKey: fc.apiKey ?? process.env.FIRECRAWL_API_KEY,
|
||||
};
|
||||
} catch {
|
||||
return {
|
||||
baseUrl: (
|
||||
process.env.FIRECRAWL_BASE_URL ?? "http://localhost:3002"
|
||||
).replace(/\/+$/, ""),
|
||||
apiKey: process.env.FIRECRAWL_API_KEY,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
const { baseUrl: BASE_URL, apiKey: API_KEY } = loadFirecrawlConfig();
|
||||
|
||||
/* ── Helpers ──────────────────────────────────────────────────────── */
|
||||
|
||||
async function firecrawlRequest(
|
||||
endpoint: string,
|
||||
body: Record<string, unknown>,
|
||||
signal?: AbortSignal,
|
||||
): Promise<unknown> {
|
||||
const headers: Record<string, string> = {
|
||||
"Content-Type": "application/json",
|
||||
};
|
||||
if (API_KEY) {
|
||||
headers["Authorization"] = `Bearer ${API_KEY}`;
|
||||
}
|
||||
|
||||
const res = await fetch(`${BASE_URL}/v1/${endpoint}`, {
|
||||
method: "POST",
|
||||
headers,
|
||||
body: JSON.stringify(body),
|
||||
signal,
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
const text = await res.text();
|
||||
throw new Error(
|
||||
`Firecrawl ${endpoint} failed (${res.status}): ${text.slice(0, 500)}`,
|
||||
);
|
||||
}
|
||||
|
||||
return res.json();
|
||||
}
|
||||
|
||||
export async function isFirecrawlReachable(): Promise<boolean> {
|
||||
try {
|
||||
const res = await fetch(`${BASE_URL}/v1/scrape`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
...(API_KEY ? { Authorization: `Bearer ${API_KEY}` } : {}),
|
||||
},
|
||||
body: JSON.stringify({ url: "https://example.com", formats: ["links"] }),
|
||||
signal: AbortSignal.timeout(10_000),
|
||||
});
|
||||
return res.ok;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* ── Search ───────────────────────────────────────────────────────── */
|
||||
|
||||
/**
|
||||
* Search the web and return structured results.
|
||||
* Uses Firecrawl's search endpoint with scrape to get full page content.
|
||||
*/
|
||||
export async function searchWeb(
|
||||
query: string,
|
||||
limit: number = 5,
|
||||
signal?: AbortSignal,
|
||||
): Promise<SearchResult[]> {
|
||||
const body: Record<string, unknown> = {
|
||||
query,
|
||||
limit: Math.min(limit, 10),
|
||||
scrapeOptions: {
|
||||
formats: ["markdown"],
|
||||
onlyMainContent: true,
|
||||
},
|
||||
};
|
||||
|
||||
const result = await firecrawlRequest("search", body, signal);
|
||||
|
||||
if (!result || typeof result !== "object") return [];
|
||||
|
||||
const res = result as {
|
||||
success?: boolean;
|
||||
data?: Record<string, unknown>[];
|
||||
error?: string;
|
||||
};
|
||||
|
||||
if (!res.success || !res.data) return [];
|
||||
|
||||
return res.data
|
||||
.map((doc) => ({
|
||||
title: (doc.title as string) ?? "",
|
||||
url: (doc.url as string) ?? "",
|
||||
description: (doc.description as string) ?? "",
|
||||
markdown: (doc.markdown as string) ?? "",
|
||||
}))
|
||||
.filter((r) => r.markdown || r.description);
|
||||
}
|
||||
|
||||
/* ── Scrape ───────────────────────────────────────────────────────── */
|
||||
|
||||
/**
|
||||
* Scrape a single URL and return its markdown content.
|
||||
*/
|
||||
export async function scrapeUrl(
|
||||
url: string,
|
||||
signal?: AbortSignal,
|
||||
): Promise<{ title: string; markdown: string; links: string[] } | null> {
|
||||
const result = await firecrawlRequest(
|
||||
"scrape",
|
||||
{ url, formats: ["markdown"] },
|
||||
signal,
|
||||
);
|
||||
|
||||
if (!result || typeof result !== "object") return null;
|
||||
|
||||
const res = result as {
|
||||
success?: boolean;
|
||||
data?: Record<string, unknown>;
|
||||
error?: string;
|
||||
};
|
||||
|
||||
if (!res.success || !res.data) return null;
|
||||
|
||||
return {
|
||||
title: (res.data.title as string) ?? "",
|
||||
markdown: (res.data.markdown as string) ?? "",
|
||||
links: (res.data.links as string[]) ?? [],
|
||||
};
|
||||
}
|
||||
261
src/queries.ts
Normal file
261
src/queries.ts
Normal file
@@ -0,0 +1,261 @@
|
||||
/**
|
||||
* Deep Research — Search query generation & refinement
|
||||
*
|
||||
* Uses an LLM agent to generate search queries from different research
|
||||
* angles, then analyzes results to produce follow-up queries.
|
||||
*/
|
||||
import type { SearchQuery, Finding, ResearchRound } from "./types";
|
||||
import { runAnalysisAgent } from "./agent";
|
||||
|
||||
const GENERATE_QUERIES_SYSTEM = `You are a research methodology expert. Your role is to generate effective web search queries that will yield high-quality, diverse information about a research topic.
|
||||
|
||||
Guidelines:
|
||||
- Create queries from DIFFERENT angles (technical, practical, comparative, critical, forward-looking)
|
||||
- Each query should target a specific facet of the question
|
||||
- Queries should use keywords that search engines rank well (avoid overly long questions)
|
||||
- Cover contrasting viewpoints and alternative approaches
|
||||
- Include queries for finding authoritative sources (docs, papers, official sites)
|
||||
- Prioritize recent information where relevant
|
||||
|
||||
Output ONLY a JSON array of objects with fields:
|
||||
- "query": the search query string
|
||||
- "rationale": why this query will help answer the research question
|
||||
- "angle": one of "technical" | "practical" | "comparative" | "critical" | "forward-looking" | "authoritative"
|
||||
|
||||
Example:
|
||||
[
|
||||
{"query": "Rust async/await performance benchmarks 2024", "rationale": "Understanding current performance characteristics", "angle": "technical"},
|
||||
{"query": "Rust vs Go concurrency patterns comparison", "rationale": "Comparative analysis helps contextualize trade-offs", "angle": "comparative"}
|
||||
]
|
||||
`;
|
||||
|
||||
const FOLLOWUP_SYSTEM = `You are a research analyst. Given the research question and findings so far, your job is to identify what's still unknown and generate follow-up search queries to fill those gaps.
|
||||
|
||||
Look for:
|
||||
- Claims made without sufficient evidence
|
||||
- Conflicting information that needs resolution
|
||||
- Angles that haven't been explored yet
|
||||
- Missing authoritative sources
|
||||
- Practical implications that need more detail
|
||||
- Recent developments that might have updated findings
|
||||
|
||||
Output ONLY a JSON array of objects with fields:
|
||||
- "query": the search query string
|
||||
- "rationale": what gap this query fills or what angle it explores
|
||||
- "angle": one of "technical" | "practical" | "comparative" | "critical" | "forward-looking" | "authoritative"
|
||||
`;
|
||||
|
||||
/**
|
||||
* Generate initial search queries for a research question.
|
||||
*/
|
||||
export async function generateQueries(
|
||||
question: string,
|
||||
count: number,
|
||||
cwd: string,
|
||||
signal?: AbortSignal,
|
||||
): Promise<SearchQuery[]> {
|
||||
const taskPrompt = `Research question: ${question}
|
||||
|
||||
Generate ${count} diverse search queries to research this topic effectively. Cover different angles.`;
|
||||
|
||||
const result = await runAnalysisAgent(
|
||||
GENERATE_QUERIES_SYSTEM,
|
||||
taskPrompt,
|
||||
cwd,
|
||||
60_000,
|
||||
undefined,
|
||||
signal,
|
||||
);
|
||||
|
||||
if (!result.success || !result.text) {
|
||||
return generateFallbackQueries(question, count);
|
||||
}
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(result.text);
|
||||
if (Array.isArray(parsed) && parsed.length > 0) {
|
||||
return parsed
|
||||
.slice(0, count)
|
||||
.map((q: Record<string, unknown>) => ({
|
||||
query: String(q.query ?? ""),
|
||||
rationale: String(q.rationale ?? ""),
|
||||
angle: String(q.angle ?? "technical"),
|
||||
}))
|
||||
.filter((q) => q.query.length > 0);
|
||||
}
|
||||
} catch {
|
||||
// JSON parse failed, fall back
|
||||
}
|
||||
|
||||
return generateFallbackQueries(question, count);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate follow-up queries based on findings from previous rounds.
|
||||
*/
|
||||
export async function generateFollowUpQueries(
|
||||
question: string,
|
||||
rounds: ResearchRound[],
|
||||
count: number,
|
||||
cwd: string,
|
||||
signal?: AbortSignal,
|
||||
): Promise<SearchQuery[]> {
|
||||
// Build a summary of findings so far
|
||||
const allFindings = rounds.flatMap((r) => r.findings);
|
||||
const findingsSummary = allFindings
|
||||
.map((f) => `- ${f.title}: ${f.summary} (confidence: ${f.confidence})`)
|
||||
.join("\n");
|
||||
|
||||
const exploredAngles = rounds
|
||||
.flatMap((r) => r.queries)
|
||||
.map((q) => `[${q.angle}] ${q.query} — ${q.rationale}`)
|
||||
.join("\n");
|
||||
|
||||
const taskPrompt = `Research question: ${question}
|
||||
|
||||
Queries already explored:
|
||||
${exploredAngles}
|
||||
|
||||
Findings so far:
|
||||
${findingsSummary}
|
||||
|
||||
Generate ${count} follow-up search queries to fill remaining gaps and deepen the research.`;
|
||||
|
||||
const result = await runAnalysisAgent(
|
||||
FOLLOWUP_SYSTEM,
|
||||
taskPrompt,
|
||||
cwd,
|
||||
60_000,
|
||||
undefined,
|
||||
signal,
|
||||
);
|
||||
|
||||
if (!result.success || !result.text) {
|
||||
return [];
|
||||
}
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(result.text);
|
||||
if (Array.isArray(parsed) && parsed.length > 0) {
|
||||
return parsed
|
||||
.slice(0, count)
|
||||
.map((q: Record<string, unknown>) => ({
|
||||
query: String(q.query ?? ""),
|
||||
rationale: String(q.rationale ?? ""),
|
||||
angle: String(q.angle ?? "technical"),
|
||||
}))
|
||||
.filter((q) => q.query.length > 0);
|
||||
}
|
||||
} catch {
|
||||
// parse failed
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Fallback query generation when the LLM call fails.
|
||||
*/
|
||||
function generateFallbackQueries(
|
||||
question: string,
|
||||
count: number,
|
||||
): SearchQuery[] {
|
||||
const queries: SearchQuery[] = [];
|
||||
const angles = [
|
||||
{ angle: "technical", desc: "technical details and specifications" },
|
||||
{
|
||||
angle: "practical",
|
||||
desc: "practical examples, tutorials, and best practices",
|
||||
},
|
||||
{ angle: "comparative", desc: "comparisons with alternatives" },
|
||||
{ angle: "critical", desc: "limitations, challenges, and criticisms" },
|
||||
{ angle: "forward-looking", desc: "future trends and developments" },
|
||||
];
|
||||
|
||||
for (let i = 0; i < Math.min(count, angles.length); i++) {
|
||||
queries.push({
|
||||
query: `${question} ${angles[i].desc}`,
|
||||
rationale: `Exploring ${angles[i].desc} related to the research question`,
|
||||
angle: angles[i].angle as SearchQuery["angle"],
|
||||
});
|
||||
}
|
||||
|
||||
return queries;
|
||||
}
|
||||
|
||||
const ANALYZE_SYSTEM = `You are a research analyst. Given search results for a specific query, extract key findings.
|
||||
|
||||
For each finding:
|
||||
- Give it a concise title
|
||||
- Summarize what was found in 1-3 sentences
|
||||
- List which source URLs support this finding
|
||||
- Include 1-2 key quotes from the sources
|
||||
- Rate your confidence (high/medium/low) based on source authority and consistency
|
||||
|
||||
Output ONLY a JSON array of objects with fields:
|
||||
- "title": concise finding title
|
||||
- "summary": 1-3 sentence summary
|
||||
- "sources": array of source URLs
|
||||
- "keyQuotes": array of 1-2 key quotes
|
||||
- "confidence": "high" | "medium" | "low"`;
|
||||
|
||||
/**
|
||||
* Analyze search results for a specific query and extract findings.
|
||||
*/
|
||||
export async function analyzeResults(
|
||||
query: string,
|
||||
results: {
|
||||
title: string;
|
||||
url: string;
|
||||
description: string;
|
||||
markdown: string;
|
||||
}[],
|
||||
cwd: string,
|
||||
signal?: AbortSignal,
|
||||
): Promise<Finding[]> {
|
||||
const resultsText = results
|
||||
.map(
|
||||
(r, i) =>
|
||||
`--- Result ${i + 1} ---\nTitle: ${r.title}\nURL: ${r.url}\nDescription: ${r.description}\nContent:\n${r.markdown.slice(0, 3000)}`,
|
||||
)
|
||||
.join("\n\n");
|
||||
|
||||
const taskPrompt = `Search query: "${query}"
|
||||
|
||||
Search results:
|
||||
${resultsText}
|
||||
|
||||
Extract key findings from these results.`;
|
||||
|
||||
const result = await runAnalysisAgent(
|
||||
ANALYZE_SYSTEM,
|
||||
taskPrompt,
|
||||
cwd,
|
||||
90_000,
|
||||
undefined,
|
||||
signal,
|
||||
);
|
||||
|
||||
if (!result.success || !result.text) return [];
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(result.text);
|
||||
if (Array.isArray(parsed)) {
|
||||
return parsed
|
||||
.map((f: Record<string, unknown>) => ({
|
||||
title: String(f.title ?? ""),
|
||||
summary: String(f.summary ?? ""),
|
||||
sources: Array.isArray(f.sources) ? f.sources.map(String) : [],
|
||||
keyQuotes: Array.isArray(f.keyQuotes) ? f.keyQuotes.map(String) : [],
|
||||
confidence: (["high", "medium", "low"].includes(String(f.confidence))
|
||||
? String(f.confidence)
|
||||
: "medium") as Finding["confidence"],
|
||||
}))
|
||||
.filter((f) => f.title && f.summary);
|
||||
}
|
||||
} catch {
|
||||
// parse failed
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
170
src/report.ts
Normal file
170
src/report.ts
Normal file
@@ -0,0 +1,170 @@
|
||||
/**
|
||||
* Deep Research — Report synthesis
|
||||
*
|
||||
* Takes all research rounds and synthesizes a comprehensive report
|
||||
* using an LLM agent.
|
||||
*/
|
||||
import type { ResearchRound, ResearchConfig } from "./types";
|
||||
import { runAnalysisAgent } from "./agent";
|
||||
|
||||
const SYNTHESIS_SYSTEM = `You are a senior research analyst synthesizing findings from multiple web searches into a comprehensive, well-structured report.
|
||||
|
||||
Your report should:
|
||||
1. Start with an executive summary (2-3 paragraphs covering the key answer to the research question)
|
||||
2. Organize findings by theme, not by search query
|
||||
3. Include specific evidence from sources (cite URLs in [brackets])
|
||||
4. Note areas of disagreement or uncertainty
|
||||
5. Identify knowledge gaps that remain
|
||||
6. End with actionable conclusions
|
||||
|
||||
Style guidelines:
|
||||
- Use clear section headings (## level)
|
||||
- Write in an objective, authoritative tone
|
||||
- Include bullet points for listing evidence
|
||||
- Use inline citations like [source](url)
|
||||
- Note the confidence level for key claims
|
||||
- Be thorough but concise — every paragraph should add value`;
|
||||
|
||||
/**
|
||||
* Synthesize a research report from all rounds.
|
||||
*/
|
||||
export async function synthesizeReport(
|
||||
question: string,
|
||||
rounds: ResearchRound[],
|
||||
config: ResearchConfig,
|
||||
cwd: string,
|
||||
signal?: AbortSignal,
|
||||
): Promise<string> {
|
||||
// Build the evidence summary
|
||||
const allFindings = rounds.flatMap((r) => r.findings);
|
||||
const totalSearches = rounds.reduce((sum, r) => sum + r.queries.length, 0);
|
||||
const totalPages = rounds.reduce((sum, r) => sum + r.results.length, 0);
|
||||
|
||||
const evidenceByAngle = new Map<string, ResearchRound["findings"]>();
|
||||
for (const round of rounds) {
|
||||
for (const query of round.queries) {
|
||||
const key = query.angle;
|
||||
if (!evidenceByAngle.has(key)) evidenceByAngle.set(key, []);
|
||||
}
|
||||
for (const finding of round.findings) {
|
||||
// Try to determine angle from the round's queries
|
||||
const angle = round.queries[0]?.angle ?? "technical";
|
||||
if (!evidenceByAngle.has(angle)) evidenceByAngle.set(angle, []);
|
||||
evidenceByAngle.get(angle)!.push(finding);
|
||||
}
|
||||
}
|
||||
|
||||
// Build structured evidence text
|
||||
let evidenceText = `## Research Question\n${question}\n\n`;
|
||||
evidenceText += `## Overview\n- Rounds of research: ${rounds.length}\n`;
|
||||
evidenceText += `- Total searches executed: ${totalSearches}\n`;
|
||||
evidenceText += `- Total pages analyzed: ${totalPages}\n`;
|
||||
evidenceText += `- Key findings extracted: ${allFindings.length}\n\n`;
|
||||
|
||||
for (const [angle, findings] of Array.from(evidenceByAngle)) {
|
||||
if (findings.length === 0) continue;
|
||||
evidenceText += `## Angle: ${angle}\n\n`;
|
||||
for (const finding of findings) {
|
||||
evidenceText += `### ${finding.title}\n`;
|
||||
evidenceText += `**Confidence:** ${finding.confidence}\n`;
|
||||
evidenceText += `${finding.summary}\n\n`;
|
||||
if (finding.keyQuotes.length > 0) {
|
||||
evidenceText += `> ${finding.keyQuotes[0]}\n\n`;
|
||||
}
|
||||
if (finding.sources.length > 0) {
|
||||
evidenceText += `Sources: ${finding.sources.map((s: string) => `[${s}](${s})`).join(", ")}\n\n`;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Also include raw search context for depth
|
||||
evidenceText += `## Raw Search Context\n\n`;
|
||||
for (const round of rounds) {
|
||||
evidenceText += `### Round ${round.round}\n`;
|
||||
for (const q of round.queries) {
|
||||
evidenceText += `- **"${q.query}"** (${q.angle}) — ${q.rationale}\n`;
|
||||
}
|
||||
evidenceText += `\n`;
|
||||
}
|
||||
|
||||
const taskPrompt = `Synthesize the following research findings into a comprehensive, well-structured report.
|
||||
|
||||
${evidenceText}
|
||||
|
||||
Write a thorough report that answers the original question: "${question}"
|
||||
|
||||
Format: ${config.format === "structured" ? "Structured report with numbered sections, clear hierarchies, and data tables where appropriate." : "Well-formatted markdown report with ## headings, bullet points, and inline citations."}`;
|
||||
|
||||
const result = await runAnalysisAgent(
|
||||
SYNTHESIS_SYSTEM,
|
||||
taskPrompt,
|
||||
cwd,
|
||||
120_000,
|
||||
undefined,
|
||||
signal,
|
||||
);
|
||||
|
||||
if (result.success && result.text) {
|
||||
return result.text;
|
||||
}
|
||||
|
||||
// Fallback: generate a simple report from the evidence
|
||||
return generateFallbackReport(question, rounds);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fallback report when the LLM synthesis fails.
|
||||
*/
|
||||
function generateFallbackReport(
|
||||
question: string,
|
||||
rounds: ResearchRound[],
|
||||
): string {
|
||||
const lines: string[] = [];
|
||||
lines.push(`# Research Report: ${question}`);
|
||||
lines.push("");
|
||||
lines.push("## Executive Summary");
|
||||
lines.push("");
|
||||
lines.push(
|
||||
`This report summarizes findings from ${rounds.length} research round(s) exploring the question above.`,
|
||||
);
|
||||
lines.push("");
|
||||
|
||||
const allFindings = rounds.flatMap((r) => r.findings);
|
||||
|
||||
if (allFindings.length > 0) {
|
||||
lines.push("## Key Findings");
|
||||
lines.push("");
|
||||
for (const finding of allFindings) {
|
||||
lines.push(`### ${finding.title}`);
|
||||
lines.push(`*Confidence: ${finding.confidence}*`);
|
||||
lines.push("");
|
||||
lines.push(finding.summary);
|
||||
lines.push("");
|
||||
if (finding.keyQuotes.length > 0) {
|
||||
lines.push(`> ${finding.keyQuotes[0]}`);
|
||||
lines.push("");
|
||||
}
|
||||
if (finding.sources.length > 0) {
|
||||
lines.push("Sources:");
|
||||
for (const src of finding.sources) {
|
||||
lines.push(`- [${src}](${src})`);
|
||||
}
|
||||
lines.push("");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
lines.push("## Search Methodology");
|
||||
lines.push("");
|
||||
for (const round of rounds) {
|
||||
lines.push(`### Round ${round.round}`);
|
||||
lines.push(
|
||||
`Queries: ${round.queries.map((q) => `"${q.query}"`).join(", ")}`,
|
||||
);
|
||||
lines.push(`Pages scraped: ${round.results.length}`);
|
||||
lines.push(`Findings: ${round.findings.length}`);
|
||||
lines.push("");
|
||||
}
|
||||
|
||||
return lines.join("\n");
|
||||
}
|
||||
254
src/research.ts
Normal file
254
src/research.ts
Normal file
@@ -0,0 +1,254 @@
|
||||
/**
|
||||
* Deep Research — Core research orchestration
|
||||
*
|
||||
* Manages the multi-round deep research process:
|
||||
* 1. Generate initial search queries
|
||||
* 2. Execute all queries in parallel via Firecrawl
|
||||
* 3. Analyze results and extract findings
|
||||
* 4. Generate follow-up queries
|
||||
* 5. Iterate for depth rounds
|
||||
* 6. Synthesize final report
|
||||
*
|
||||
* Widget and progress callback patterns borrowed from ralpi's executor.
|
||||
*/
|
||||
import type { ExtensionContext } from "@earendil-works/pi-coding-agent";
|
||||
import type {
|
||||
ResearchConfig,
|
||||
SearchResult,
|
||||
ResearchRound,
|
||||
ResearchReport,
|
||||
} from "./types";
|
||||
import { searchWeb } from "./firecrawl";
|
||||
import {
|
||||
generateQueries,
|
||||
generateFollowUpQueries,
|
||||
analyzeResults,
|
||||
} from "./queries";
|
||||
import { synthesizeReport } from "./report";
|
||||
|
||||
/** Progress callback for UI updates */
|
||||
export type ResearchProgress = (update: {
|
||||
phase:
|
||||
| "generating_queries"
|
||||
| "searching"
|
||||
| "analyzing"
|
||||
| "synthesizing"
|
||||
| "complete";
|
||||
round?: number;
|
||||
totalRounds?: number;
|
||||
message: string;
|
||||
detail?: string;
|
||||
fraction?: number; // 0-1
|
||||
}) => void;
|
||||
|
||||
/**
|
||||
* Run a complete deep research session.
|
||||
*/
|
||||
export async function runDeepResearch(
|
||||
config: ResearchConfig,
|
||||
ctx: ExtensionContext,
|
||||
onProgress: ResearchProgress,
|
||||
signal?: AbortSignal,
|
||||
): Promise<ResearchReport> {
|
||||
const startTime = Date.now();
|
||||
const rounds: ResearchRound[] = [];
|
||||
let totalSearches = 0;
|
||||
let totalPages = 0;
|
||||
|
||||
// ── Round 1: Generate initial queries ──────────────────────────────
|
||||
|
||||
onProgress({
|
||||
phase: "generating_queries",
|
||||
round: 1,
|
||||
totalRounds: config.depth,
|
||||
message: "Generating initial search queries...",
|
||||
fraction: 0,
|
||||
});
|
||||
|
||||
if (signal?.aborted) throw new Error("Research cancelled");
|
||||
|
||||
const queries = await generateQueries(
|
||||
config.question,
|
||||
config.breadth,
|
||||
ctx.cwd,
|
||||
signal,
|
||||
);
|
||||
|
||||
if (queries.length === 0) {
|
||||
throw new Error("Failed to generate any search queries");
|
||||
}
|
||||
|
||||
// ── Execute rounds ─────────────────────────────────────────────────
|
||||
|
||||
for (let round = 1; round <= config.depth; round++) {
|
||||
if (signal?.aborted) throw new Error("Research cancelled");
|
||||
|
||||
const isFirstRound = round === 1;
|
||||
const currentQueries = isFirstRound
|
||||
? queries
|
||||
: await generateFollowUpQueries(
|
||||
config.question,
|
||||
rounds,
|
||||
config.breadth,
|
||||
ctx.cwd,
|
||||
signal,
|
||||
);
|
||||
|
||||
if (!currentQueries || currentQueries.length === 0) {
|
||||
// No follow-up queries to generate — stop here
|
||||
break;
|
||||
}
|
||||
|
||||
// ── Search phase ──────────────────────────────────────────────────
|
||||
|
||||
onProgress({
|
||||
phase: "searching",
|
||||
round,
|
||||
totalRounds: config.depth,
|
||||
message: `Searching with ${currentQueries.length} queries...`,
|
||||
fraction: 0.25,
|
||||
});
|
||||
|
||||
const searchResults: SearchResult[] = [];
|
||||
|
||||
for (let i = 0; i < currentQueries.length; i++) {
|
||||
if (signal?.aborted) throw new Error("Research cancelled");
|
||||
|
||||
const q = currentQueries[i];
|
||||
onProgress({
|
||||
phase: "searching",
|
||||
round,
|
||||
totalRounds: config.depth,
|
||||
message: `Searching: "${q.query.slice(0, 60)}..."`,
|
||||
detail: q.rationale,
|
||||
fraction: 0.25 + (i / currentQueries.length) * 0.25,
|
||||
});
|
||||
|
||||
try {
|
||||
const results = await searchWeb(q.query, 5, signal);
|
||||
searchResults.push(...results);
|
||||
} catch (error) {
|
||||
// Individual search failure shouldn't crash the whole round
|
||||
const errorMsg = error instanceof Error ? error.message : String(error);
|
||||
onProgress({
|
||||
phase: "searching",
|
||||
round,
|
||||
totalRounds: config.depth,
|
||||
message: `Search failed: ${errorMsg.slice(0, 80)}`,
|
||||
fraction: 0.25 + ((i + 1) / currentQueries.length) * 0.25,
|
||||
});
|
||||
}
|
||||
|
||||
// Small delay between searches to avoid rate limits
|
||||
if (i < currentQueries.length - 1) {
|
||||
await new Promise((r) => setTimeout(r, 300));
|
||||
}
|
||||
}
|
||||
|
||||
totalSearches += currentQueries.length;
|
||||
|
||||
// Deduplicate results by URL
|
||||
const seen = new Set<string>();
|
||||
const uniqueResults = searchResults.filter((r) => {
|
||||
if (seen.has(r.url)) return false;
|
||||
seen.add(r.url);
|
||||
return true;
|
||||
});
|
||||
|
||||
totalPages += uniqueResults.length;
|
||||
|
||||
// ── Analyze phase ──────────────────────────────────────────────────
|
||||
|
||||
onProgress({
|
||||
phase: "analyzing",
|
||||
round,
|
||||
totalRounds: config.depth,
|
||||
message: `Analyzing ${uniqueResults.length} search results...`,
|
||||
fraction: 0.6,
|
||||
});
|
||||
|
||||
// Analyze results per query group
|
||||
const allFindings: ResearchRound["findings"] = [];
|
||||
|
||||
for (let i = 0; i < currentQueries.length; i++) {
|
||||
if (signal?.aborted) throw new Error("Research cancelled");
|
||||
|
||||
const q = currentQueries[i];
|
||||
// Find results that match this query (loosely: take a portion of results)
|
||||
const resultsPerQuery = Math.ceil(
|
||||
uniqueResults.length / currentQueries.length,
|
||||
);
|
||||
const startIdx = i * resultsPerQuery;
|
||||
const endIdx = Math.min(startIdx + resultsPerQuery, uniqueResults.length);
|
||||
const queryResults = uniqueResults.slice(startIdx, endIdx);
|
||||
|
||||
if (queryResults.length === 0) continue;
|
||||
|
||||
onProgress({
|
||||
phase: "analyzing",
|
||||
round,
|
||||
totalRounds: config.depth,
|
||||
message: `Analyzing results for "${q.query.slice(0, 40)}..."`,
|
||||
fraction: 0.6 + (i / currentQueries.length) * 0.2,
|
||||
});
|
||||
|
||||
try {
|
||||
const findings = await analyzeResults(
|
||||
q.query,
|
||||
queryResults,
|
||||
ctx.cwd,
|
||||
signal,
|
||||
);
|
||||
allFindings.push(...findings);
|
||||
} catch {
|
||||
// Analysis failure shouldn't crash the round
|
||||
}
|
||||
}
|
||||
|
||||
// Record this round
|
||||
rounds.push({
|
||||
round,
|
||||
queries: currentQueries,
|
||||
results: uniqueResults,
|
||||
findings: allFindings,
|
||||
followUpTopics: allFindings
|
||||
.filter((f) => f.confidence === "low")
|
||||
.map((f) => f.title),
|
||||
});
|
||||
}
|
||||
|
||||
// ── Synthesis phase ─────────────────────────────────────────────────
|
||||
|
||||
onProgress({
|
||||
phase: "synthesizing",
|
||||
message: "Synthesizing research into final report...",
|
||||
fraction: 0.9,
|
||||
});
|
||||
|
||||
if (signal?.aborted) throw new Error("Research cancelled");
|
||||
|
||||
const finalReport = await synthesizeReport(
|
||||
config.question,
|
||||
rounds,
|
||||
config,
|
||||
ctx.cwd,
|
||||
signal,
|
||||
);
|
||||
|
||||
const durationMs = Date.now() - startTime;
|
||||
|
||||
onProgress({
|
||||
phase: "complete",
|
||||
message: "Research complete!",
|
||||
fraction: 1.0,
|
||||
});
|
||||
|
||||
return {
|
||||
question: config.question,
|
||||
rounds,
|
||||
finalReport,
|
||||
totalSearches,
|
||||
totalPagesScraped: totalPages,
|
||||
durationMs,
|
||||
};
|
||||
}
|
||||
55
src/types.ts
Normal file
55
src/types.ts
Normal file
@@ -0,0 +1,55 @@
|
||||
/**
|
||||
* Deep Research — type definitions
|
||||
*/
|
||||
|
||||
/** A single search result from Firecrawl */
|
||||
export interface SearchResult {
|
||||
title: string;
|
||||
url: string;
|
||||
description: string;
|
||||
markdown: string;
|
||||
}
|
||||
|
||||
/** A finding extracted from search results by an analysis agent */
|
||||
export interface Finding {
|
||||
title: string;
|
||||
summary: string;
|
||||
sources: string[];
|
||||
keyQuotes: string[];
|
||||
confidence: "high" | "medium" | "low";
|
||||
}
|
||||
|
||||
/** A generated search query with its intent/rationale */
|
||||
export interface SearchQuery {
|
||||
query: string;
|
||||
rationale: string;
|
||||
angle: string;
|
||||
}
|
||||
|
||||
/** Output from one research round */
|
||||
export interface ResearchRound {
|
||||
round: number;
|
||||
queries: SearchQuery[];
|
||||
results: SearchResult[];
|
||||
findings: Finding[];
|
||||
/** Any follow-up questions/angles the analysis suggests */
|
||||
followUpTopics: string[];
|
||||
}
|
||||
|
||||
/** Configuration for a research session */
|
||||
export interface ResearchConfig {
|
||||
question: string;
|
||||
depth: number; // 1-3 rounds
|
||||
breadth: number; // queries per round (1-5)
|
||||
format: "markdown" | "structured";
|
||||
}
|
||||
|
||||
/** Final research report */
|
||||
export interface ResearchReport {
|
||||
question: string;
|
||||
rounds: ResearchRound[];
|
||||
finalReport: string;
|
||||
totalSearches: number;
|
||||
totalPagesScraped: number;
|
||||
durationMs: number;
|
||||
}
|
||||
16
tsconfig.json
Normal file
16
tsconfig.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2022",
|
||||
"module": "ES2022",
|
||||
"moduleResolution": "node",
|
||||
"lib": ["ES2022"],
|
||||
"noEmit": true,
|
||||
"strict": true,
|
||||
"esModuleInterop": true,
|
||||
"skipLibCheck": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"resolveJsonModule": true
|
||||
},
|
||||
"include": ["index.ts", "src/**/*"],
|
||||
"exclude": ["node_modules", "dist"]
|
||||
}
|
||||
Reference in New Issue
Block a user