Initial commit: deep-research extension
This commit is contained in:
155
src/agent.ts
Normal file
155
src/agent.ts
Normal file
@@ -0,0 +1,155 @@
|
||||
/**
|
||||
* Deep Research — Agent Session helper
|
||||
*
|
||||
* Uses pi's in-process `createAgentSession` for LLM subtasks
|
||||
* (query generation, result analysis, report synthesis).
|
||||
* Pattern borrowed from ralpi's runAgentSession().
|
||||
*/
|
||||
import {
|
||||
createAgentSession,
|
||||
DefaultResourceLoader,
|
||||
getAgentDir,
|
||||
SessionManager,
|
||||
} from "@earendil-works/pi-coding-agent";
|
||||
import type { AgentSessionEvent } from "@earendil-works/pi-coding-agent";
|
||||
|
||||
/** Aggregate tool usage stats */
|
||||
export interface ToolUsage {
|
||||
read: number;
|
||||
write: number;
|
||||
edit: number;
|
||||
bash: number;
|
||||
other: number;
|
||||
}
|
||||
|
||||
export interface AgentResult {
|
||||
success: boolean;
|
||||
text: string;
|
||||
error?: string;
|
||||
toolUsage: ToolUsage;
|
||||
}
|
||||
|
||||
/**
|
||||
* Run a prompt through an in-process Pi agent session.
|
||||
* Non-blocking — the event loop stays responsive.
|
||||
*/
|
||||
export async function runAnalysisAgent(
|
||||
systemPrompt: string,
|
||||
taskPrompt: string,
|
||||
cwd: string,
|
||||
timeoutMs: number = 120_000,
|
||||
onEvent?: (event: AgentSessionEvent) => void,
|
||||
signal?: AbortSignal,
|
||||
): Promise<AgentResult> {
|
||||
const toolUsage: ToolUsage = {
|
||||
read: 0,
|
||||
write: 0,
|
||||
edit: 0,
|
||||
bash: 0,
|
||||
other: 0,
|
||||
};
|
||||
|
||||
let timeoutHandle: ReturnType<typeof setTimeout> | null = null;
|
||||
if (timeoutMs > 0) {
|
||||
timeoutHandle = setTimeout(() => {
|
||||
sessionRef.session?.agent.abort();
|
||||
}, timeoutMs);
|
||||
}
|
||||
|
||||
const sessionRef: {
|
||||
session?: Awaited<ReturnType<typeof createAgentSession>>["session"];
|
||||
} = {};
|
||||
|
||||
try {
|
||||
const loader = new DefaultResourceLoader({
|
||||
cwd,
|
||||
agentDir: getAgentDir(),
|
||||
noExtensions: true,
|
||||
noSkills: true,
|
||||
noPromptTemplates: true,
|
||||
noThemes: true,
|
||||
noContextFiles: true,
|
||||
});
|
||||
await loader.reload();
|
||||
|
||||
const result = await createAgentSession({
|
||||
cwd,
|
||||
sessionManager: SessionManager.inMemory(),
|
||||
resourceLoader: loader,
|
||||
tools: ["read", "grep", "find", "ls"],
|
||||
systemPrompt,
|
||||
});
|
||||
sessionRef.session = result.session;
|
||||
|
||||
const abortHandler = () => result.session.agent.abort();
|
||||
signal?.addEventListener("abort", abortHandler, { once: true });
|
||||
|
||||
let finalText = "";
|
||||
let errorMessage: string | undefined;
|
||||
|
||||
const unsubscribe = result.session.subscribe((event: AgentSessionEvent) => {
|
||||
onEvent?.(event);
|
||||
|
||||
if (event.type === "message_end") {
|
||||
const message = event.message as {
|
||||
role?: string;
|
||||
content?: unknown;
|
||||
errorMessage?: string;
|
||||
};
|
||||
if (message.role !== "assistant") return;
|
||||
if (message.errorMessage) errorMessage = message.errorMessage;
|
||||
const text = extractAssistantText(message.content);
|
||||
if (text) finalText = text;
|
||||
}
|
||||
|
||||
if (event.type === "tool_execution_start") {
|
||||
const name = event.toolName;
|
||||
if (name in toolUsage) {
|
||||
(toolUsage as unknown as Record<string, number>)[name]++;
|
||||
} else {
|
||||
toolUsage.other++;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
if (signal?.aborted) throw new Error("Aborted");
|
||||
|
||||
await result.session.prompt(taskPrompt);
|
||||
await result.session.agent.waitForIdle();
|
||||
|
||||
unsubscribe();
|
||||
result.session.dispose();
|
||||
signal?.removeEventListener("abort", abortHandler);
|
||||
if (timeoutHandle) clearTimeout(timeoutHandle);
|
||||
|
||||
if (errorMessage && !finalText) {
|
||||
return { success: false, text: "", error: errorMessage, toolUsage };
|
||||
}
|
||||
|
||||
return { success: true, text: finalText.trim(), toolUsage };
|
||||
} catch (error) {
|
||||
if (timeoutHandle) clearTimeout(timeoutHandle);
|
||||
return {
|
||||
success: false,
|
||||
text: "",
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
toolUsage,
|
||||
};
|
||||
} finally {
|
||||
sessionRef.session?.dispose();
|
||||
}
|
||||
}
|
||||
|
||||
function extractAssistantText(content: unknown): string {
|
||||
if (typeof content === "string") return content;
|
||||
if (!Array.isArray(content)) return "";
|
||||
return content
|
||||
.filter(
|
||||
(c): c is { type: string; text?: string } =>
|
||||
!!c &&
|
||||
typeof c === "object" &&
|
||||
(c as { type?: string }).type === "text",
|
||||
)
|
||||
.map((c) => (c as { text?: string }).text ?? "")
|
||||
.join("");
|
||||
}
|
||||
159
src/firecrawl.ts
Normal file
159
src/firecrawl.ts
Normal file
@@ -0,0 +1,159 @@
|
||||
/**
|
||||
* Deep Research — direct Firecrawl HTTP client
|
||||
*
|
||||
* Calls the self-hosted Firecrawl API directly (same approach as the
|
||||
* firecrawl.ts extension)
|
||||
*/
|
||||
import * as fs from "node:fs";
|
||||
import * as path from "node:path";
|
||||
import * as os from "node:os";
|
||||
import type { SearchResult } from "./types";
|
||||
|
||||
/* ── Config ──────────────────────────────────────────────────────── */
|
||||
|
||||
function loadFirecrawlConfig() {
|
||||
const settingsPath = path.join(os.homedir(), ".pi", "agent", "settings.json");
|
||||
try {
|
||||
const settings = JSON.parse(fs.readFileSync(settingsPath, "utf-8"));
|
||||
const fc = settings.firecrawl ?? {};
|
||||
return {
|
||||
baseUrl: (
|
||||
fc.baseUrl ??
|
||||
process.env.FIRECRAWL_BASE_URL ??
|
||||
"http://localhost:3002"
|
||||
).replace(/\/+$/, ""),
|
||||
apiKey: fc.apiKey ?? process.env.FIRECRAWL_API_KEY,
|
||||
};
|
||||
} catch {
|
||||
return {
|
||||
baseUrl: (
|
||||
process.env.FIRECRAWL_BASE_URL ?? "http://localhost:3002"
|
||||
).replace(/\/+$/, ""),
|
||||
apiKey: process.env.FIRECRAWL_API_KEY,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
const { baseUrl: BASE_URL, apiKey: API_KEY } = loadFirecrawlConfig();
|
||||
|
||||
/* ── Helpers ──────────────────────────────────────────────────────── */
|
||||
|
||||
async function firecrawlRequest(
|
||||
endpoint: string,
|
||||
body: Record<string, unknown>,
|
||||
signal?: AbortSignal,
|
||||
): Promise<unknown> {
|
||||
const headers: Record<string, string> = {
|
||||
"Content-Type": "application/json",
|
||||
};
|
||||
if (API_KEY) {
|
||||
headers["Authorization"] = `Bearer ${API_KEY}`;
|
||||
}
|
||||
|
||||
const res = await fetch(`${BASE_URL}/v1/${endpoint}`, {
|
||||
method: "POST",
|
||||
headers,
|
||||
body: JSON.stringify(body),
|
||||
signal,
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
const text = await res.text();
|
||||
throw new Error(
|
||||
`Firecrawl ${endpoint} failed (${res.status}): ${text.slice(0, 500)}`,
|
||||
);
|
||||
}
|
||||
|
||||
return res.json();
|
||||
}
|
||||
|
||||
export async function isFirecrawlReachable(): Promise<boolean> {
|
||||
try {
|
||||
const res = await fetch(`${BASE_URL}/v1/scrape`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
...(API_KEY ? { Authorization: `Bearer ${API_KEY}` } : {}),
|
||||
},
|
||||
body: JSON.stringify({ url: "https://example.com", formats: ["links"] }),
|
||||
signal: AbortSignal.timeout(10_000),
|
||||
});
|
||||
return res.ok;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* ── Search ───────────────────────────────────────────────────────── */
|
||||
|
||||
/**
|
||||
* Search the web and return structured results.
|
||||
* Uses Firecrawl's search endpoint with scrape to get full page content.
|
||||
*/
|
||||
export async function searchWeb(
|
||||
query: string,
|
||||
limit: number = 5,
|
||||
signal?: AbortSignal,
|
||||
): Promise<SearchResult[]> {
|
||||
const body: Record<string, unknown> = {
|
||||
query,
|
||||
limit: Math.min(limit, 10),
|
||||
scrapeOptions: {
|
||||
formats: ["markdown"],
|
||||
onlyMainContent: true,
|
||||
},
|
||||
};
|
||||
|
||||
const result = await firecrawlRequest("search", body, signal);
|
||||
|
||||
if (!result || typeof result !== "object") return [];
|
||||
|
||||
const res = result as {
|
||||
success?: boolean;
|
||||
data?: Record<string, unknown>[];
|
||||
error?: string;
|
||||
};
|
||||
|
||||
if (!res.success || !res.data) return [];
|
||||
|
||||
return res.data
|
||||
.map((doc) => ({
|
||||
title: (doc.title as string) ?? "",
|
||||
url: (doc.url as string) ?? "",
|
||||
description: (doc.description as string) ?? "",
|
||||
markdown: (doc.markdown as string) ?? "",
|
||||
}))
|
||||
.filter((r) => r.markdown || r.description);
|
||||
}
|
||||
|
||||
/* ── Scrape ───────────────────────────────────────────────────────── */
|
||||
|
||||
/**
|
||||
* Scrape a single URL and return its markdown content.
|
||||
*/
|
||||
export async function scrapeUrl(
|
||||
url: string,
|
||||
signal?: AbortSignal,
|
||||
): Promise<{ title: string; markdown: string; links: string[] } | null> {
|
||||
const result = await firecrawlRequest(
|
||||
"scrape",
|
||||
{ url, formats: ["markdown"] },
|
||||
signal,
|
||||
);
|
||||
|
||||
if (!result || typeof result !== "object") return null;
|
||||
|
||||
const res = result as {
|
||||
success?: boolean;
|
||||
data?: Record<string, unknown>;
|
||||
error?: string;
|
||||
};
|
||||
|
||||
if (!res.success || !res.data) return null;
|
||||
|
||||
return {
|
||||
title: (res.data.title as string) ?? "",
|
||||
markdown: (res.data.markdown as string) ?? "",
|
||||
links: (res.data.links as string[]) ?? [],
|
||||
};
|
||||
}
|
||||
261
src/queries.ts
Normal file
261
src/queries.ts
Normal file
@@ -0,0 +1,261 @@
|
||||
/**
|
||||
* Deep Research — Search query generation & refinement
|
||||
*
|
||||
* Uses an LLM agent to generate search queries from different research
|
||||
* angles, then analyzes results to produce follow-up queries.
|
||||
*/
|
||||
import type { SearchQuery, Finding, ResearchRound } from "./types";
|
||||
import { runAnalysisAgent } from "./agent";
|
||||
|
||||
const GENERATE_QUERIES_SYSTEM = `You are a research methodology expert. Your role is to generate effective web search queries that will yield high-quality, diverse information about a research topic.
|
||||
|
||||
Guidelines:
|
||||
- Create queries from DIFFERENT angles (technical, practical, comparative, critical, forward-looking)
|
||||
- Each query should target a specific facet of the question
|
||||
- Queries should use keywords that search engines rank well (avoid overly long questions)
|
||||
- Cover contrasting viewpoints and alternative approaches
|
||||
- Include queries for finding authoritative sources (docs, papers, official sites)
|
||||
- Prioritize recent information where relevant
|
||||
|
||||
Output ONLY a JSON array of objects with fields:
|
||||
- "query": the search query string
|
||||
- "rationale": why this query will help answer the research question
|
||||
- "angle": one of "technical" | "practical" | "comparative" | "critical" | "forward-looking" | "authoritative"
|
||||
|
||||
Example:
|
||||
[
|
||||
{"query": "Rust async/await performance benchmarks 2024", "rationale": "Understanding current performance characteristics", "angle": "technical"},
|
||||
{"query": "Rust vs Go concurrency patterns comparison", "rationale": "Comparative analysis helps contextualize trade-offs", "angle": "comparative"}
|
||||
]
|
||||
`;
|
||||
|
||||
const FOLLOWUP_SYSTEM = `You are a research analyst. Given the research question and findings so far, your job is to identify what's still unknown and generate follow-up search queries to fill those gaps.
|
||||
|
||||
Look for:
|
||||
- Claims made without sufficient evidence
|
||||
- Conflicting information that needs resolution
|
||||
- Angles that haven't been explored yet
|
||||
- Missing authoritative sources
|
||||
- Practical implications that need more detail
|
||||
- Recent developments that might have updated findings
|
||||
|
||||
Output ONLY a JSON array of objects with fields:
|
||||
- "query": the search query string
|
||||
- "rationale": what gap this query fills or what angle it explores
|
||||
- "angle": one of "technical" | "practical" | "comparative" | "critical" | "forward-looking" | "authoritative"
|
||||
`;
|
||||
|
||||
/**
|
||||
* Generate initial search queries for a research question.
|
||||
*/
|
||||
export async function generateQueries(
|
||||
question: string,
|
||||
count: number,
|
||||
cwd: string,
|
||||
signal?: AbortSignal,
|
||||
): Promise<SearchQuery[]> {
|
||||
const taskPrompt = `Research question: ${question}
|
||||
|
||||
Generate ${count} diverse search queries to research this topic effectively. Cover different angles.`;
|
||||
|
||||
const result = await runAnalysisAgent(
|
||||
GENERATE_QUERIES_SYSTEM,
|
||||
taskPrompt,
|
||||
cwd,
|
||||
60_000,
|
||||
undefined,
|
||||
signal,
|
||||
);
|
||||
|
||||
if (!result.success || !result.text) {
|
||||
return generateFallbackQueries(question, count);
|
||||
}
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(result.text);
|
||||
if (Array.isArray(parsed) && parsed.length > 0) {
|
||||
return parsed
|
||||
.slice(0, count)
|
||||
.map((q: Record<string, unknown>) => ({
|
||||
query: String(q.query ?? ""),
|
||||
rationale: String(q.rationale ?? ""),
|
||||
angle: String(q.angle ?? "technical"),
|
||||
}))
|
||||
.filter((q) => q.query.length > 0);
|
||||
}
|
||||
} catch {
|
||||
// JSON parse failed, fall back
|
||||
}
|
||||
|
||||
return generateFallbackQueries(question, count);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate follow-up queries based on findings from previous rounds.
|
||||
*/
|
||||
export async function generateFollowUpQueries(
|
||||
question: string,
|
||||
rounds: ResearchRound[],
|
||||
count: number,
|
||||
cwd: string,
|
||||
signal?: AbortSignal,
|
||||
): Promise<SearchQuery[]> {
|
||||
// Build a summary of findings so far
|
||||
const allFindings = rounds.flatMap((r) => r.findings);
|
||||
const findingsSummary = allFindings
|
||||
.map((f) => `- ${f.title}: ${f.summary} (confidence: ${f.confidence})`)
|
||||
.join("\n");
|
||||
|
||||
const exploredAngles = rounds
|
||||
.flatMap((r) => r.queries)
|
||||
.map((q) => `[${q.angle}] ${q.query} — ${q.rationale}`)
|
||||
.join("\n");
|
||||
|
||||
const taskPrompt = `Research question: ${question}
|
||||
|
||||
Queries already explored:
|
||||
${exploredAngles}
|
||||
|
||||
Findings so far:
|
||||
${findingsSummary}
|
||||
|
||||
Generate ${count} follow-up search queries to fill remaining gaps and deepen the research.`;
|
||||
|
||||
const result = await runAnalysisAgent(
|
||||
FOLLOWUP_SYSTEM,
|
||||
taskPrompt,
|
||||
cwd,
|
||||
60_000,
|
||||
undefined,
|
||||
signal,
|
||||
);
|
||||
|
||||
if (!result.success || !result.text) {
|
||||
return [];
|
||||
}
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(result.text);
|
||||
if (Array.isArray(parsed) && parsed.length > 0) {
|
||||
return parsed
|
||||
.slice(0, count)
|
||||
.map((q: Record<string, unknown>) => ({
|
||||
query: String(q.query ?? ""),
|
||||
rationale: String(q.rationale ?? ""),
|
||||
angle: String(q.angle ?? "technical"),
|
||||
}))
|
||||
.filter((q) => q.query.length > 0);
|
||||
}
|
||||
} catch {
|
||||
// parse failed
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Fallback query generation when the LLM call fails.
|
||||
*/
|
||||
function generateFallbackQueries(
|
||||
question: string,
|
||||
count: number,
|
||||
): SearchQuery[] {
|
||||
const queries: SearchQuery[] = [];
|
||||
const angles = [
|
||||
{ angle: "technical", desc: "technical details and specifications" },
|
||||
{
|
||||
angle: "practical",
|
||||
desc: "practical examples, tutorials, and best practices",
|
||||
},
|
||||
{ angle: "comparative", desc: "comparisons with alternatives" },
|
||||
{ angle: "critical", desc: "limitations, challenges, and criticisms" },
|
||||
{ angle: "forward-looking", desc: "future trends and developments" },
|
||||
];
|
||||
|
||||
for (let i = 0; i < Math.min(count, angles.length); i++) {
|
||||
queries.push({
|
||||
query: `${question} ${angles[i].desc}`,
|
||||
rationale: `Exploring ${angles[i].desc} related to the research question`,
|
||||
angle: angles[i].angle as SearchQuery["angle"],
|
||||
});
|
||||
}
|
||||
|
||||
return queries;
|
||||
}
|
||||
|
||||
const ANALYZE_SYSTEM = `You are a research analyst. Given search results for a specific query, extract key findings.
|
||||
|
||||
For each finding:
|
||||
- Give it a concise title
|
||||
- Summarize what was found in 1-3 sentences
|
||||
- List which source URLs support this finding
|
||||
- Include 1-2 key quotes from the sources
|
||||
- Rate your confidence (high/medium/low) based on source authority and consistency
|
||||
|
||||
Output ONLY a JSON array of objects with fields:
|
||||
- "title": concise finding title
|
||||
- "summary": 1-3 sentence summary
|
||||
- "sources": array of source URLs
|
||||
- "keyQuotes": array of 1-2 key quotes
|
||||
- "confidence": "high" | "medium" | "low"`;
|
||||
|
||||
/**
|
||||
* Analyze search results for a specific query and extract findings.
|
||||
*/
|
||||
export async function analyzeResults(
|
||||
query: string,
|
||||
results: {
|
||||
title: string;
|
||||
url: string;
|
||||
description: string;
|
||||
markdown: string;
|
||||
}[],
|
||||
cwd: string,
|
||||
signal?: AbortSignal,
|
||||
): Promise<Finding[]> {
|
||||
const resultsText = results
|
||||
.map(
|
||||
(r, i) =>
|
||||
`--- Result ${i + 1} ---\nTitle: ${r.title}\nURL: ${r.url}\nDescription: ${r.description}\nContent:\n${r.markdown.slice(0, 3000)}`,
|
||||
)
|
||||
.join("\n\n");
|
||||
|
||||
const taskPrompt = `Search query: "${query}"
|
||||
|
||||
Search results:
|
||||
${resultsText}
|
||||
|
||||
Extract key findings from these results.`;
|
||||
|
||||
const result = await runAnalysisAgent(
|
||||
ANALYZE_SYSTEM,
|
||||
taskPrompt,
|
||||
cwd,
|
||||
90_000,
|
||||
undefined,
|
||||
signal,
|
||||
);
|
||||
|
||||
if (!result.success || !result.text) return [];
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(result.text);
|
||||
if (Array.isArray(parsed)) {
|
||||
return parsed
|
||||
.map((f: Record<string, unknown>) => ({
|
||||
title: String(f.title ?? ""),
|
||||
summary: String(f.summary ?? ""),
|
||||
sources: Array.isArray(f.sources) ? f.sources.map(String) : [],
|
||||
keyQuotes: Array.isArray(f.keyQuotes) ? f.keyQuotes.map(String) : [],
|
||||
confidence: (["high", "medium", "low"].includes(String(f.confidence))
|
||||
? String(f.confidence)
|
||||
: "medium") as Finding["confidence"],
|
||||
}))
|
||||
.filter((f) => f.title && f.summary);
|
||||
}
|
||||
} catch {
|
||||
// parse failed
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
170
src/report.ts
Normal file
170
src/report.ts
Normal file
@@ -0,0 +1,170 @@
|
||||
/**
|
||||
* Deep Research — Report synthesis
|
||||
*
|
||||
* Takes all research rounds and synthesizes a comprehensive report
|
||||
* using an LLM agent.
|
||||
*/
|
||||
import type { ResearchRound, ResearchConfig } from "./types";
|
||||
import { runAnalysisAgent } from "./agent";
|
||||
|
||||
const SYNTHESIS_SYSTEM = `You are a senior research analyst synthesizing findings from multiple web searches into a comprehensive, well-structured report.
|
||||
|
||||
Your report should:
|
||||
1. Start with an executive summary (2-3 paragraphs covering the key answer to the research question)
|
||||
2. Organize findings by theme, not by search query
|
||||
3. Include specific evidence from sources (cite URLs in [brackets])
|
||||
4. Note areas of disagreement or uncertainty
|
||||
5. Identify knowledge gaps that remain
|
||||
6. End with actionable conclusions
|
||||
|
||||
Style guidelines:
|
||||
- Use clear section headings (## level)
|
||||
- Write in an objective, authoritative tone
|
||||
- Include bullet points for listing evidence
|
||||
- Use inline citations like [source](url)
|
||||
- Note the confidence level for key claims
|
||||
- Be thorough but concise — every paragraph should add value`;
|
||||
|
||||
/**
|
||||
* Synthesize a research report from all rounds.
|
||||
*/
|
||||
export async function synthesizeReport(
|
||||
question: string,
|
||||
rounds: ResearchRound[],
|
||||
config: ResearchConfig,
|
||||
cwd: string,
|
||||
signal?: AbortSignal,
|
||||
): Promise<string> {
|
||||
// Build the evidence summary
|
||||
const allFindings = rounds.flatMap((r) => r.findings);
|
||||
const totalSearches = rounds.reduce((sum, r) => sum + r.queries.length, 0);
|
||||
const totalPages = rounds.reduce((sum, r) => sum + r.results.length, 0);
|
||||
|
||||
const evidenceByAngle = new Map<string, ResearchRound["findings"]>();
|
||||
for (const round of rounds) {
|
||||
for (const query of round.queries) {
|
||||
const key = query.angle;
|
||||
if (!evidenceByAngle.has(key)) evidenceByAngle.set(key, []);
|
||||
}
|
||||
for (const finding of round.findings) {
|
||||
// Try to determine angle from the round's queries
|
||||
const angle = round.queries[0]?.angle ?? "technical";
|
||||
if (!evidenceByAngle.has(angle)) evidenceByAngle.set(angle, []);
|
||||
evidenceByAngle.get(angle)!.push(finding);
|
||||
}
|
||||
}
|
||||
|
||||
// Build structured evidence text
|
||||
let evidenceText = `## Research Question\n${question}\n\n`;
|
||||
evidenceText += `## Overview\n- Rounds of research: ${rounds.length}\n`;
|
||||
evidenceText += `- Total searches executed: ${totalSearches}\n`;
|
||||
evidenceText += `- Total pages analyzed: ${totalPages}\n`;
|
||||
evidenceText += `- Key findings extracted: ${allFindings.length}\n\n`;
|
||||
|
||||
for (const [angle, findings] of Array.from(evidenceByAngle)) {
|
||||
if (findings.length === 0) continue;
|
||||
evidenceText += `## Angle: ${angle}\n\n`;
|
||||
for (const finding of findings) {
|
||||
evidenceText += `### ${finding.title}\n`;
|
||||
evidenceText += `**Confidence:** ${finding.confidence}\n`;
|
||||
evidenceText += `${finding.summary}\n\n`;
|
||||
if (finding.keyQuotes.length > 0) {
|
||||
evidenceText += `> ${finding.keyQuotes[0]}\n\n`;
|
||||
}
|
||||
if (finding.sources.length > 0) {
|
||||
evidenceText += `Sources: ${finding.sources.map((s: string) => `[${s}](${s})`).join(", ")}\n\n`;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Also include raw search context for depth
|
||||
evidenceText += `## Raw Search Context\n\n`;
|
||||
for (const round of rounds) {
|
||||
evidenceText += `### Round ${round.round}\n`;
|
||||
for (const q of round.queries) {
|
||||
evidenceText += `- **"${q.query}"** (${q.angle}) — ${q.rationale}\n`;
|
||||
}
|
||||
evidenceText += `\n`;
|
||||
}
|
||||
|
||||
const taskPrompt = `Synthesize the following research findings into a comprehensive, well-structured report.
|
||||
|
||||
${evidenceText}
|
||||
|
||||
Write a thorough report that answers the original question: "${question}"
|
||||
|
||||
Format: ${config.format === "structured" ? "Structured report with numbered sections, clear hierarchies, and data tables where appropriate." : "Well-formatted markdown report with ## headings, bullet points, and inline citations."}`;
|
||||
|
||||
const result = await runAnalysisAgent(
|
||||
SYNTHESIS_SYSTEM,
|
||||
taskPrompt,
|
||||
cwd,
|
||||
120_000,
|
||||
undefined,
|
||||
signal,
|
||||
);
|
||||
|
||||
if (result.success && result.text) {
|
||||
return result.text;
|
||||
}
|
||||
|
||||
// Fallback: generate a simple report from the evidence
|
||||
return generateFallbackReport(question, rounds);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fallback report when the LLM synthesis fails.
|
||||
*/
|
||||
function generateFallbackReport(
|
||||
question: string,
|
||||
rounds: ResearchRound[],
|
||||
): string {
|
||||
const lines: string[] = [];
|
||||
lines.push(`# Research Report: ${question}`);
|
||||
lines.push("");
|
||||
lines.push("## Executive Summary");
|
||||
lines.push("");
|
||||
lines.push(
|
||||
`This report summarizes findings from ${rounds.length} research round(s) exploring the question above.`,
|
||||
);
|
||||
lines.push("");
|
||||
|
||||
const allFindings = rounds.flatMap((r) => r.findings);
|
||||
|
||||
if (allFindings.length > 0) {
|
||||
lines.push("## Key Findings");
|
||||
lines.push("");
|
||||
for (const finding of allFindings) {
|
||||
lines.push(`### ${finding.title}`);
|
||||
lines.push(`*Confidence: ${finding.confidence}*`);
|
||||
lines.push("");
|
||||
lines.push(finding.summary);
|
||||
lines.push("");
|
||||
if (finding.keyQuotes.length > 0) {
|
||||
lines.push(`> ${finding.keyQuotes[0]}`);
|
||||
lines.push("");
|
||||
}
|
||||
if (finding.sources.length > 0) {
|
||||
lines.push("Sources:");
|
||||
for (const src of finding.sources) {
|
||||
lines.push(`- [${src}](${src})`);
|
||||
}
|
||||
lines.push("");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
lines.push("## Search Methodology");
|
||||
lines.push("");
|
||||
for (const round of rounds) {
|
||||
lines.push(`### Round ${round.round}`);
|
||||
lines.push(
|
||||
`Queries: ${round.queries.map((q) => `"${q.query}"`).join(", ")}`,
|
||||
);
|
||||
lines.push(`Pages scraped: ${round.results.length}`);
|
||||
lines.push(`Findings: ${round.findings.length}`);
|
||||
lines.push("");
|
||||
}
|
||||
|
||||
return lines.join("\n");
|
||||
}
|
||||
254
src/research.ts
Normal file
254
src/research.ts
Normal file
@@ -0,0 +1,254 @@
|
||||
/**
|
||||
* Deep Research — Core research orchestration
|
||||
*
|
||||
* Manages the multi-round deep research process:
|
||||
* 1. Generate initial search queries
|
||||
* 2. Execute all queries in parallel via Firecrawl
|
||||
* 3. Analyze results and extract findings
|
||||
* 4. Generate follow-up queries
|
||||
* 5. Iterate for depth rounds
|
||||
* 6. Synthesize final report
|
||||
*
|
||||
* Widget and progress callback patterns borrowed from ralpi's executor.
|
||||
*/
|
||||
import type { ExtensionContext } from "@earendil-works/pi-coding-agent";
|
||||
import type {
|
||||
ResearchConfig,
|
||||
SearchResult,
|
||||
ResearchRound,
|
||||
ResearchReport,
|
||||
} from "./types";
|
||||
import { searchWeb } from "./firecrawl";
|
||||
import {
|
||||
generateQueries,
|
||||
generateFollowUpQueries,
|
||||
analyzeResults,
|
||||
} from "./queries";
|
||||
import { synthesizeReport } from "./report";
|
||||
|
||||
/** Progress callback for UI updates */
|
||||
export type ResearchProgress = (update: {
|
||||
phase:
|
||||
| "generating_queries"
|
||||
| "searching"
|
||||
| "analyzing"
|
||||
| "synthesizing"
|
||||
| "complete";
|
||||
round?: number;
|
||||
totalRounds?: number;
|
||||
message: string;
|
||||
detail?: string;
|
||||
fraction?: number; // 0-1
|
||||
}) => void;
|
||||
|
||||
/**
|
||||
* Run a complete deep research session.
|
||||
*/
|
||||
export async function runDeepResearch(
|
||||
config: ResearchConfig,
|
||||
ctx: ExtensionContext,
|
||||
onProgress: ResearchProgress,
|
||||
signal?: AbortSignal,
|
||||
): Promise<ResearchReport> {
|
||||
const startTime = Date.now();
|
||||
const rounds: ResearchRound[] = [];
|
||||
let totalSearches = 0;
|
||||
let totalPages = 0;
|
||||
|
||||
// ── Round 1: Generate initial queries ──────────────────────────────
|
||||
|
||||
onProgress({
|
||||
phase: "generating_queries",
|
||||
round: 1,
|
||||
totalRounds: config.depth,
|
||||
message: "Generating initial search queries...",
|
||||
fraction: 0,
|
||||
});
|
||||
|
||||
if (signal?.aborted) throw new Error("Research cancelled");
|
||||
|
||||
const queries = await generateQueries(
|
||||
config.question,
|
||||
config.breadth,
|
||||
ctx.cwd,
|
||||
signal,
|
||||
);
|
||||
|
||||
if (queries.length === 0) {
|
||||
throw new Error("Failed to generate any search queries");
|
||||
}
|
||||
|
||||
// ── Execute rounds ─────────────────────────────────────────────────
|
||||
|
||||
for (let round = 1; round <= config.depth; round++) {
|
||||
if (signal?.aborted) throw new Error("Research cancelled");
|
||||
|
||||
const isFirstRound = round === 1;
|
||||
const currentQueries = isFirstRound
|
||||
? queries
|
||||
: await generateFollowUpQueries(
|
||||
config.question,
|
||||
rounds,
|
||||
config.breadth,
|
||||
ctx.cwd,
|
||||
signal,
|
||||
);
|
||||
|
||||
if (!currentQueries || currentQueries.length === 0) {
|
||||
// No follow-up queries to generate — stop here
|
||||
break;
|
||||
}
|
||||
|
||||
// ── Search phase ──────────────────────────────────────────────────
|
||||
|
||||
onProgress({
|
||||
phase: "searching",
|
||||
round,
|
||||
totalRounds: config.depth,
|
||||
message: `Searching with ${currentQueries.length} queries...`,
|
||||
fraction: 0.25,
|
||||
});
|
||||
|
||||
const searchResults: SearchResult[] = [];
|
||||
|
||||
for (let i = 0; i < currentQueries.length; i++) {
|
||||
if (signal?.aborted) throw new Error("Research cancelled");
|
||||
|
||||
const q = currentQueries[i];
|
||||
onProgress({
|
||||
phase: "searching",
|
||||
round,
|
||||
totalRounds: config.depth,
|
||||
message: `Searching: "${q.query.slice(0, 60)}..."`,
|
||||
detail: q.rationale,
|
||||
fraction: 0.25 + (i / currentQueries.length) * 0.25,
|
||||
});
|
||||
|
||||
try {
|
||||
const results = await searchWeb(q.query, 5, signal);
|
||||
searchResults.push(...results);
|
||||
} catch (error) {
|
||||
// Individual search failure shouldn't crash the whole round
|
||||
const errorMsg = error instanceof Error ? error.message : String(error);
|
||||
onProgress({
|
||||
phase: "searching",
|
||||
round,
|
||||
totalRounds: config.depth,
|
||||
message: `Search failed: ${errorMsg.slice(0, 80)}`,
|
||||
fraction: 0.25 + ((i + 1) / currentQueries.length) * 0.25,
|
||||
});
|
||||
}
|
||||
|
||||
// Small delay between searches to avoid rate limits
|
||||
if (i < currentQueries.length - 1) {
|
||||
await new Promise((r) => setTimeout(r, 300));
|
||||
}
|
||||
}
|
||||
|
||||
totalSearches += currentQueries.length;
|
||||
|
||||
// Deduplicate results by URL
|
||||
const seen = new Set<string>();
|
||||
const uniqueResults = searchResults.filter((r) => {
|
||||
if (seen.has(r.url)) return false;
|
||||
seen.add(r.url);
|
||||
return true;
|
||||
});
|
||||
|
||||
totalPages += uniqueResults.length;
|
||||
|
||||
// ── Analyze phase ──────────────────────────────────────────────────
|
||||
|
||||
onProgress({
|
||||
phase: "analyzing",
|
||||
round,
|
||||
totalRounds: config.depth,
|
||||
message: `Analyzing ${uniqueResults.length} search results...`,
|
||||
fraction: 0.6,
|
||||
});
|
||||
|
||||
// Analyze results per query group
|
||||
const allFindings: ResearchRound["findings"] = [];
|
||||
|
||||
for (let i = 0; i < currentQueries.length; i++) {
|
||||
if (signal?.aborted) throw new Error("Research cancelled");
|
||||
|
||||
const q = currentQueries[i];
|
||||
// Find results that match this query (loosely: take a portion of results)
|
||||
const resultsPerQuery = Math.ceil(
|
||||
uniqueResults.length / currentQueries.length,
|
||||
);
|
||||
const startIdx = i * resultsPerQuery;
|
||||
const endIdx = Math.min(startIdx + resultsPerQuery, uniqueResults.length);
|
||||
const queryResults = uniqueResults.slice(startIdx, endIdx);
|
||||
|
||||
if (queryResults.length === 0) continue;
|
||||
|
||||
onProgress({
|
||||
phase: "analyzing",
|
||||
round,
|
||||
totalRounds: config.depth,
|
||||
message: `Analyzing results for "${q.query.slice(0, 40)}..."`,
|
||||
fraction: 0.6 + (i / currentQueries.length) * 0.2,
|
||||
});
|
||||
|
||||
try {
|
||||
const findings = await analyzeResults(
|
||||
q.query,
|
||||
queryResults,
|
||||
ctx.cwd,
|
||||
signal,
|
||||
);
|
||||
allFindings.push(...findings);
|
||||
} catch {
|
||||
// Analysis failure shouldn't crash the round
|
||||
}
|
||||
}
|
||||
|
||||
// Record this round
|
||||
rounds.push({
|
||||
round,
|
||||
queries: currentQueries,
|
||||
results: uniqueResults,
|
||||
findings: allFindings,
|
||||
followUpTopics: allFindings
|
||||
.filter((f) => f.confidence === "low")
|
||||
.map((f) => f.title),
|
||||
});
|
||||
}
|
||||
|
||||
// ── Synthesis phase ─────────────────────────────────────────────────
|
||||
|
||||
onProgress({
|
||||
phase: "synthesizing",
|
||||
message: "Synthesizing research into final report...",
|
||||
fraction: 0.9,
|
||||
});
|
||||
|
||||
if (signal?.aborted) throw new Error("Research cancelled");
|
||||
|
||||
const finalReport = await synthesizeReport(
|
||||
config.question,
|
||||
rounds,
|
||||
config,
|
||||
ctx.cwd,
|
||||
signal,
|
||||
);
|
||||
|
||||
const durationMs = Date.now() - startTime;
|
||||
|
||||
onProgress({
|
||||
phase: "complete",
|
||||
message: "Research complete!",
|
||||
fraction: 1.0,
|
||||
});
|
||||
|
||||
return {
|
||||
question: config.question,
|
||||
rounds,
|
||||
finalReport,
|
||||
totalSearches,
|
||||
totalPagesScraped: totalPages,
|
||||
durationMs,
|
||||
};
|
||||
}
|
||||
55
src/types.ts
Normal file
55
src/types.ts
Normal file
@@ -0,0 +1,55 @@
|
||||
/**
|
||||
* Deep Research — type definitions
|
||||
*/
|
||||
|
||||
/** A single search result from Firecrawl */
|
||||
export interface SearchResult {
|
||||
title: string;
|
||||
url: string;
|
||||
description: string;
|
||||
markdown: string;
|
||||
}
|
||||
|
||||
/** A finding extracted from search results by an analysis agent */
|
||||
export interface Finding {
|
||||
title: string;
|
||||
summary: string;
|
||||
sources: string[];
|
||||
keyQuotes: string[];
|
||||
confidence: "high" | "medium" | "low";
|
||||
}
|
||||
|
||||
/** A generated search query with its intent/rationale */
|
||||
export interface SearchQuery {
|
||||
query: string;
|
||||
rationale: string;
|
||||
angle: string;
|
||||
}
|
||||
|
||||
/** Output from one research round */
|
||||
export interface ResearchRound {
|
||||
round: number;
|
||||
queries: SearchQuery[];
|
||||
results: SearchResult[];
|
||||
findings: Finding[];
|
||||
/** Any follow-up questions/angles the analysis suggests */
|
||||
followUpTopics: string[];
|
||||
}
|
||||
|
||||
/** Configuration for a research session */
|
||||
export interface ResearchConfig {
|
||||
question: string;
|
||||
depth: number; // 1-3 rounds
|
||||
breadth: number; // queries per round (1-5)
|
||||
format: "markdown" | "structured";
|
||||
}
|
||||
|
||||
/** Final research report */
|
||||
export interface ResearchReport {
|
||||
question: string;
|
||||
rounds: ResearchRound[];
|
||||
finalReport: string;
|
||||
totalSearches: number;
|
||||
totalPagesScraped: number;
|
||||
durationMs: number;
|
||||
}
|
||||
Reference in New Issue
Block a user