From b6c2b10eb56bbce22c750c650a2fad09ef3254c4 Mon Sep 17 00:00:00 2001 From: Michael Freno Date: Sun, 31 May 2026 21:11:44 -0400 Subject: [PATCH] improved validity verification, ui responsiveness --- .gitignore | 1 + README.md | 3 +- index.ts | 282 +++++++++++++++++----------- package-lock.json | 47 ----- src/firecrawl.ts | 166 ++++++++++++++++- src/queries.ts | 278 ++++++++++++++++++++++++++-- src/report.ts | 459 +++++++++++++++++++++++++++++++++++++--------- src/research.ts | 337 +++++++++++++++++++++++++--------- src/types.ts | 47 ++++- 9 files changed, 1265 insertions(+), 355 deletions(-) delete mode 100644 package-lock.json diff --git a/.gitignore b/.gitignore index 7442ff2..5afeedc 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ node_modules/ dist/ .pi-lens/ AGENTS.md +package-lock.json diff --git a/README.md b/README.md index 7ba9487..77ffff8 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ pi install npm:@mikefreno/deep-research - **Multi-round iteration**: Each round generates follow-up queries based on previous findings (depth 1-3) - **Parallel query expansion**: Multiple diverse search queries per round (breadth 1-5) covering technical, practical, comparative, critical, and forward-looking angles +- **Round-robin parallel execution**: Searches and analyses run concurrently within each round using bounded-concurrency worker pools, dramatically reducing total research time - **LLM-driven analysis**: Each round's results are analyzed by an agent session to extract structured findings with confidence ratings - **Automatic deduplication**: Search results are deduplicated by URL across all queries - **Graceful degradation**: Individual search or analysis failures don't crash the full research — partial results are preserved @@ -40,7 +41,7 @@ Parameters: ### Command (interactive) ``` -/deep-research +/deepi-research ``` Prompts for depth (1-3 rounds) and breadth (1-5 queries) interactively, then runs the research and sends the final report as a user message. diff --git a/index.ts b/index.ts index d799950..f3fd561 100644 --- a/index.ts +++ b/index.ts @@ -3,7 +3,7 @@ * * Registers: * - `deep_research` tool — callable by the LLM to conduct deep research - * - `/deep-research` command — interactive session invocation + * - `/deepi-research` command — interactive session invocation * * Architecture: * Each research round generates queries, searches in parallel via @@ -24,12 +24,13 @@ import { Type } from "typebox"; import { Box, Text } from "@earendil-works/pi-tui"; import { runDeepResearch, type ResearchProgress } from "./src/research"; import { isFirecrawlReachable } from "./src/firecrawl"; -import type { ResearchConfig, ResearchReport } from "./src/types"; +import type { ResearchConfig, ResearchReport, Audience } from "./src/types"; /* ── Constants ────────────────────────────────────────────────────── */ const SPINNER_FRAMES = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]; const PHASE_ICONS: Record = { + decomposing: "🧩", generating_queries: "🔍", searching: "🌐", analyzing: "📊", @@ -37,6 +38,8 @@ const PHASE_ICONS: Record = { complete: "✅", }; +type ResearchPhase = Parameters[0]["phase"]; + /* ── Helpers ──────────────────────────────────────────────────────── */ function formatDuration(ms: number): string { @@ -60,7 +63,7 @@ const DeepResearchParams = Type.Object({ depth: Type.Optional( Type.Integer({ description: - "Number of research rounds (1-3). Each round uses findings from the previous to generate deeper follow-up queries. Default: 2", + "Number of research rounds (1-3). Each round builds on findings from the previous for deeper analysis. Default: 2", minimum: 1, maximum: 3, default: 2, @@ -69,7 +72,7 @@ const DeepResearchParams = Type.Object({ breadth: Type.Optional( Type.Integer({ description: - "Number of search queries per round (1-5). More queries = broader coverage. Default: 3", + "Number of search queries per round (1-5). More queries = broader coverage but slower. Default: 3", minimum: 1, maximum: 5, default: 3, @@ -78,16 +81,30 @@ const DeepResearchParams = Type.Object({ format: Type.Optional( Type.Union([Type.Literal("markdown"), Type.Literal("structured")], { description: - 'Output format for the research report. "markdown" for prose, "structured" for detailed sections. Default: "markdown"', + 'Output format for the research report. "markdown" for prose with headings, "structured" for detailed hierarchical sections. Default: "markdown"', default: "markdown", }), ), + audience: Type.Optional( + Type.Union( + [ + Type.Literal("general"), + Type.Literal("expert"), + Type.Literal("executive"), + ], + { + description: + "Target audience for the report. 'general' (accessible), 'expert' (technical depth), 'executive' (concise, action-oriented). Default: 'general'", + default: "general", + }, + ), + ), details: Type.Optional( Type.Object({ showRoundDetails: Type.Optional( Type.Boolean({ description: - "Include per-round search details in the output. Default: false", + "Include per-round search methodology in the output. Default: false", }), ), }), @@ -106,6 +123,88 @@ interface ResearchDetails { durationMs: number; } +/* ── Widget Helper ────────────────────────────────────────────────── */ + +/** + * Create a widget state that drives a spinner-based progress widget. + * Returns the state object, the timer, and cleanup function. + */ +function createProgressWidget( + ctx: any, + initialPhase: ResearchPhase = "generating_queries", +) { + const state: { + phase: ResearchPhase; + message: string; + detail: string | undefined; + fraction: number; + round: number | undefined; + totalRounds: number | undefined; + } = { + phase: initialPhase, + message: "Starting...", + detail: undefined, + fraction: 0, + round: undefined, + totalRounds: undefined, + }; + + let widgetTui: { requestRender(): void } | null = null; + let spinnerIdx = 0; + + ctx.ui.setWidget( + "deep-research", + (tui: { requestRender(): void }, _theme: any) => { + widgetTui = tui; + return { + render: () => { + const spinner = SPINNER_FRAMES[spinnerIdx]; + const icon = PHASE_ICONS[state.phase] ?? ""; + const roundInfo = + state.round && state.totalRounds + ? ` Round ${state.round}/${state.totalRounds}` + : ""; + const lines: string[] = [ + `${spinner} ${icon} ${truncate(state.message, 80)}${roundInfo}`, + ]; + if (state.detail) { + lines.push(` ${truncate(state.detail, 76)}`); + } + if (state.fraction > 0) { + const barLen = 15; + const filled = Math.round(barLen * state.fraction); + const bar = "█".repeat(filled) + "░".repeat(barLen - filled); + lines.push(` ${bar}`); + } + return lines; + }, + invalidate: () => {}, + }; + }, + ); + + const spinnerTimer = setInterval(() => { + spinnerIdx = (spinnerIdx + 1) % SPINNER_FRAMES.length; + widgetTui?.requestRender(); + }, 100); + + const onProgress: ResearchProgress = (update) => { + state.phase = update.phase; + state.message = update.message; + state.detail = update.detail; + state.fraction = update.fraction ?? 0; + state.round = update.round; + state.totalRounds = update.totalRounds; + }; + + const cleanup = () => { + clearInterval(spinnerTimer); + ctx.ui.setWidget("deep-research", undefined); + }; + + return { state, onProgress, cleanup, spinnerTimer }; +} + /* ── Extension Entry ───────────────────────────────────────────────── */ export default function (pi: ExtensionAPI) { @@ -114,16 +213,18 @@ export default function (pi: ExtensionAPI) { label: "Deep Research", description: [ "Conduct multi-round deep web research on any topic using Firecrawl.", - "Generates diverse search queries, searches the web in parallel, analyzes results, and produces a comprehensive report.", - "Supports iterative refinement: each round builds on findings from the previous one.", - "Parameters: question (required), depth (1-3, default 2), breadth (1-5, default 3), format (markdown|structured).", + "Generates diverse search queries, searches the web in parallel, analyzes results,", + "and produces a comprehensive report with numbered citations and a bibliography.", + "Supports iterative refinement and sub-question decomposition for deeper analysis.", + "Parameters: question (required), depth, breadth, format, audience, details.", ].join(" "), promptSnippet: - "deep_research — multi-round deep web research via Firecrawl with iterative query refinement", + "deep_research — multi-round deep web research via Firecrawl with iterative query refinement, sub-question decomposition, source authority scoring, and numbered citations", promptGuidelines: [ "Use deep_research for complex, multi-faceted questions that benefit from multiple search angles and iterative refinement.", "The tool handles query generation, web search, result analysis, and report synthesis automatically.", "For simple fact-finding questions, use firecrawl_search directly instead.", + "Set audience to 'executive' for concise, action-oriented reports; 'expert' for technical depth; 'general' (default) for accessible reports.", ], parameters: DeepResearchParams, @@ -134,6 +235,7 @@ export default function (pi: ExtensionAPI) { depth?: number; breadth?: number; format?: "markdown" | "structured"; + audience?: Audience; details?: { showRoundDetails?: boolean }; }, signal: AbortSignal | undefined, @@ -145,60 +247,17 @@ export default function (pi: ExtensionAPI) { depth: params.depth ?? 2, breadth: params.breadth ?? 3, format: params.format ?? "markdown", + audience: params.audience ?? "general", }; - // Use provided signals const abortSignal = signal; - - // Wire progress updates to both the widget and onUpdate - let spinnerIdx = 0; - const spinnerTimer = setInterval(() => { - spinnerIdx = (spinnerIdx + 1) % SPINNER_FRAMES.length; - }, 100); + // eslint-disable-next-line @typescript-eslint/no-unused-vars + const { state: _state, onProgress, cleanup } = createProgressWidget(ctx); let researchResult: ResearchReport | null = null; let lastError: string | null = null; - const onProgress: ResearchProgress = (update) => { - const icon = PHASE_ICONS[update.phase] ?? ""; - const spinner = SPINNER_FRAMES[spinnerIdx]; - const roundInfo = - update.round && update.totalRounds - ? ` Round ${update.round}/${update.totalRounds}` - : ""; - - // Update widget - const lines: string[] = [ - `${spinner} ${icon} ${truncate(update.message, 80)}${roundInfo}`, - ]; - if (update.detail) { - lines.push(` ${truncate(update.detail, 76)}`); - } - if (update.fraction !== undefined) { - const barLen = 15; - const filled = Math.round(barLen * update.fraction); - const bar = "█".repeat(filled) + "░".repeat(barLen - filled); - lines.push(` ${bar}`); - } - ctx.ui.setWidget("deep-research", lines); - - // Stream partial results via onUpdate - if (onUpdate) { - const partialText = lines.join("\n"); - onUpdate({ - content: [{ type: "text", text: partialText }], - details: { - phase: update.phase, - round: update.round, - message: update.message, - fraction: update.fraction, - }, - }); - } - }; - try { - // Initial status ctx.ui.setStatus( "deep-research", `🌐 Researching: ${truncate(config.question, 40)}`, @@ -231,10 +290,30 @@ export default function (pi: ExtensionAPI) { durationMs: researchResult.durationMs, }; - const showRoundDetails = params.details?.showRoundDetails ?? false; + // Stream final content via onUpdate before returning + if (onUpdate) { + onUpdate({ + content: [{ type: "text", text: researchResult.finalReport }], + details: { + phase: "complete", + duration: researchResult.durationMs, + rounds: researchResult.rounds.length, + findings: researchResult.rounds.reduce( + (s, r) => s + r.findings.length, + 0, + ), + references: researchResult.references.length, + }, + }); + } + + cleanup(); + ctx.ui.setStatus("deep-research", undefined); let output = researchResult.finalReport; - if (showRoundDetails) { + + // Append methodology section if requested + if (params.details?.showRoundDetails) { output += `\n\n---\n\n## Research Methodology\n\n`; for (const round of researchResult.rounds) { output += `### Round ${round.round}\n\n`; @@ -247,21 +326,16 @@ export default function (pi: ExtensionAPI) { } output += `**Total searches:** ${researchResult.totalSearches}\n`; output += `**Total pages scraped:** ${researchResult.totalPagesScraped}\n`; + output += `**Sources in bibliography:** ${researchResult.references.length}\n`; output += `**Duration:** ${formatDuration(researchResult.durationMs)}\n`; } - // Clean up widget - clearInterval(spinnerTimer); - ctx.ui.setWidget("deep-research", undefined); - ctx.ui.setStatus("deep-research", undefined); - return { content: [{ type: "text", text: output }], details, }; } catch (error) { - clearInterval(spinnerTimer); - ctx.ui.setWidget("deep-research", undefined); + cleanup(); ctx.ui.setStatus("deep-research", undefined); lastError = error instanceof Error ? error.message : String(error); @@ -274,11 +348,12 @@ export default function (pi: ExtensionAPI) { }, ], details: { + rounds: [], + totalSearches: 0, + totalPagesScraped: 0, + durationMs: 0, error: lastError, - phase: researchResult - ? `completed ${researchResult.rounds.length} rounds` - : "preparation", - }, + } as ResearchDetails & { error: string }, isError: true, }; } @@ -292,6 +367,7 @@ export default function (pi: ExtensionAPI) { depth?: number; breadth?: number; format?: string; + audience?: string; }, theme: any, _context: any, @@ -300,11 +376,15 @@ export default function (pi: ExtensionAPI) { const depth = args.depth ?? 2; const breadth = args.breadth ?? 3; const format = args.format ?? "markdown"; + const audience = args.audience ?? "general"; const text = theme.fg("toolTitle", theme.bold("deep_research ")) + theme.fg("accent", `"${question}"`) + - theme.fg("muted", ` [depth:${depth} breadth:${breadth} ${format}]`); + theme.fg( + "muted", + ` [depth:${depth} breadth:${breadth} ${format} ${audience}]`, + ); return new Text(text, 0, 0); }, @@ -407,19 +487,19 @@ export default function (pi: ExtensionAPI) { // ── Command ─────────────────────────────────────────────────────── - pi.registerCommand("deep-research", { + pi.registerCommand("deepi-research", { description: - "Conduct multi-round deep web research on any topic via Firecrawl. Usage: /deep-research ", + "Conduct multi-round deep web research on any topic via Firecrawl. Usage: /deepi-research ", handler: async (args: string, ctx: ExtensionCommandContext) => { if (!args || args.trim().length === 0) { ctx.ui.notify( - "Usage: /deep-research ", + "Usage: /deepi-research ", "error", ); return; } - // Ask about depth/breadth + // Ask about depth const depthStr = await ctx.ui.select("Research depth?", [ "1 round (quick survey)", "2 rounds (standard)", @@ -431,6 +511,7 @@ export default function (pi: ExtensionAPI) { ? 3 : 2; + // Ask about breadth const breadthStr = await ctx.ui.select("Research breadth?", [ "1 query/round (narrow)", "3 queries/round (balanced)", @@ -442,7 +523,18 @@ export default function (pi: ExtensionAPI) { ? 5 : 3; - // Create a promise-based interaction + // Ask about audience + const audienceStr = await ctx.ui.select("Report audience?", [ + "General (accessible, explains terms)", + "Expert (technical depth, assumes domain knowledge)", + "Executive (concise, action-oriented)", + ]); + const audience: Audience = audienceStr?.startsWith("Expert") + ? "expert" + : audienceStr?.startsWith("Executive") + ? "executive" + : "general"; + ctx.ui.setStatus( "deep-research", `🌐 Researching: ${truncate(args, 40)}`, @@ -453,56 +545,32 @@ export default function (pi: ExtensionAPI) { depth, breadth, format: "markdown", + audience, }; - let spinnerIdx = 0; - const spinnerTimer = setInterval(() => { - spinnerIdx = (spinnerIdx + 1) % SPINNER_FRAMES.length; - }, 100); + const { onProgress, cleanup } = createProgressWidget(ctx); try { - const onProgress: ResearchProgress = (update) => { - const icon = PHASE_ICONS[update.phase] ?? ""; - const spinner = SPINNER_FRAMES[spinnerIdx]; - const lines: string[] = [ - `${spinner} ${icon} ${truncate(update.message, 80)}`, - ]; - if (update.detail) { - lines.push(` ${truncate(update.detail, 76)}`); - } - if (update.fraction !== undefined) { - const barLen = 15; - const filled = Math.round(barLen * update.fraction); - const bar = "█".repeat(filled) + "░".repeat(barLen - filled); - lines.push(` ${bar}`); - } - ctx.ui.setWidget("deep-research", lines); - }; - const report = await runDeepResearch(config, ctx, onProgress); - clearInterval(spinnerTimer); - ctx.ui.setWidget("deep-research", undefined); + cleanup(); ctx.ui.setStatus("deep-research", undefined); // Show notification ctx.ui.notify( - `Research complete: ${report.rounds.length} rounds, ${report.totalSearches} searches, ${report.totalPagesScraped} pages in ${formatDuration(report.durationMs)}`, + `Research complete: ${report.rounds.length} rounds, ${report.totalSearches} searches, ${report.totalPagesScraped} pages, ${report.references.length} sources in ${formatDuration(report.durationMs)}`, "info", ); // Send the report as a user message pi.sendUserMessage( - `## Deep Research: ${args}\n\n${report.finalReport}\n\n---\n*${report.rounds.length} rounds · ${report.totalSearches} searches · ${report.totalPagesScraped} pages · ${formatDuration(report.durationMs)}*`, + `## Deep Research: ${args}\n\n${report.finalReport}\n\n---\n*${report.rounds.length} rounds · ${report.totalSearches} searches · ${report.totalPagesScraped} pages · ${report.references.length} sources · ${formatDuration(report.durationMs)}*`, ); } catch (error) { - clearInterval(spinnerTimer); - ctx.ui.setWidget("deep-research", undefined); + cleanup(); ctx.ui.setStatus("deep-research", undefined); - ctx.ui.notify( - `Research failed: error instanceof Error ? error.message : String(error)`, - "error", - ); + const msg = error instanceof Error ? error.message : String(error); + ctx.ui.notify(`Research failed: ${msg}`, "error"); } }, }); diff --git a/package-lock.json b/package-lock.json deleted file mode 100644 index 2ad565c..0000000 --- a/package-lock.json +++ /dev/null @@ -1,47 +0,0 @@ -{ - "name": "deep-research", - "version": "1.0.0", - "lockfileVersion": 3, - "requires": true, - "packages": { - "": { - "name": "deep-research", - "version": "1.0.0", - "devDependencies": { - "@types/node": "^20.0.0", - "typescript": "^5.3.0" - } - }, - "node_modules/@types/node": { - "version": "20.19.41", - "resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.41.tgz", - "integrity": "sha512-ECymXOukMnOoVkC2bb1Vc/w/836DXncOg5m8Xj1RH7xSHZJWNYY6Zh7EH477vcnD5egKNNfy2RpNOmuChhFPgQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "undici-types": "~6.21.0" - } - }, - "node_modules/typescript": { - "version": "5.9.3", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", - "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", - "dev": true, - "license": "Apache-2.0", - "bin": { - "tsc": "bin/tsc", - "tsserver": "bin/tsserver" - }, - "engines": { - "node": ">=14.17" - } - }, - "node_modules/undici-types": { - "version": "6.21.0", - "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz", - "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==", - "dev": true, - "license": "MIT" - } - } -} diff --git a/src/firecrawl.ts b/src/firecrawl.ts index 70308bc..4b14c4a 100644 --- a/src/firecrawl.ts +++ b/src/firecrawl.ts @@ -7,7 +7,7 @@ import * as fs from "node:fs"; import * as path from "node:path"; import * as os from "node:os"; -import type { SearchResult } from "./types"; +import type { SearchResult, EnrichedSearchResult, ContentType } from "./types"; /* ── Config ──────────────────────────────────────────────────────── */ @@ -36,6 +36,159 @@ function loadFirecrawlConfig() { const { baseUrl: BASE_URL, apiKey: API_KEY } = loadFirecrawlConfig(); +/* ── Domain Authority Heuristics ─────────────────────────────────── */ + +/** + * Known high-authority domains and their authority scores (0.0 – 1.0). + * Academic, official, and established technical sources score highest. + */ +const AUTHORITY_DOMAINS: Record = { + // Academic & scholarly + "arxiv.org": 0.95, + "scholar.google.com": 0.95, + "pubmed.ncbi.nlm.nih.gov": 0.95, + "semanticscholar.org": 0.9, + "ieee.org": 0.95, + "acm.org": 0.95, + "springer.com": 0.9, + "sciencedirect.com": 0.9, + "wiley.com": 0.85, + "nature.com": 0.95, + "science.org": 0.95, + "plos.org": 0.85, + // Official documentation + "docs.python.org": 0.9, + "developer.mozilla.org": 0.9, + "learn.microsoft.com": 0.85, + "developer.apple.com": 0.85, + "kubernetes.io": 0.85, + "react.dev": 0.85, + "nextjs.org": 0.8, + // Government & non-profits + ".gov": 0.9, + ".edu": 0.85, + "who.int": 0.9, + "worldbank.org": 0.85, + "oecd.org": 0.85, + // Established tech & news + "github.com": 0.8, + "stackoverflow.com": 0.7, + "medium.com": 0.4, + "dev.to": 0.5, + "wikipedia.org": 0.7, + "reuters.com": 0.8, + "apnews.com": 0.8, + "bbc.com": 0.75, + "nytimes.com": 0.75, + "theguardian.com": 0.7, + "techcrunch.com": 0.6, + "arstechnica.com": 0.65, + "wired.com": 0.65, + "infoworld.com": 0.55, +}; + +/** Content-type hints based on domain patterns */ +const CONTENT_TYPE_HINTS: [RegExp, ContentType][] = [ + [ + /arxiv\.org|semanticscholar|ieee\.org|acm\.org|springer|sciencedirect|pubmed\.ncbi/, + "paper", + ], + [ + /docs\.|learn\.|developer\.|kubernetes\.io|react\.dev|nextjs\.org/, + "documentation", + ], + [/wikipedia\.org|stackoverflow\.com|medium\.com|dev\.to/, "forum"], + [ + /reuters\.com|apnews\.com|bbc\.com|nytimes\.com|techcrunch|arstechnica|wired/, + "news", + ], + [/\.gov|\.edu|who\.int|worldbank|oecd\.org/, "official"], + [/github\.com/, "documentation"], +]; + +/* ── Source enrichment helpers ───────────────────────────────────── */ + +/** + * Extract the registered domain from a URL (e.g., "blog.example.com" → "example.com"). + * Uses a simple 2-part TLD heuristic. For common cases like .co.uk this is approximate. + */ +function extractDomain(url: string): string { + try { + const hostname = new URL(url).hostname.toLowerCase(); + // Special-case common multi-part TLDs + const multiPartTlds = + /\.(co\.uk|org\.uk|ac\.uk|gov\.uk|com\.au|co\.jp|co\.kr|com\.br)$/; + const parts = hostname.split("."); + if (multiPartTlds.test(hostname) && parts.length >= 3) { + return parts.slice(-3).join("."); + } + return parts.slice(-2).join("."); + } catch { + return url.replace(/^https?:\/\//, "").split("/")[0] ?? url; + } +} + +function computeAuthorityScore(domain: string): number { + // Direct match first + if (AUTHORITY_DOMAINS[domain]) return AUTHORITY_DOMAINS[domain]; + + // Suffix matches (.gov, .edu, etc.) + for (const [key, score] of Object.entries(AUTHORITY_DOMAINS)) { + if (key.startsWith(".") && domain.endsWith(key)) return score; + } + + // Subdomain matches (e.g., blog.example.com matches example.com) + const parent = domain.split(".").slice(-2).join("."); + if (parent !== domain && AUTHORITY_DOMAINS[parent]) { + return AUTHORITY_DOMAINS[parent] * 0.9; + } + + return 0.3; // Unknown / low-authority default +} + +function detectContentType(url: string, description: string): ContentType { + const lowerUrl = url.toLowerCase(); + const lowerDesc = description.toLowerCase(); + + for (const [pattern, type] of CONTENT_TYPE_HINTS) { + if (pattern.test(lowerUrl)) return type; + } + + // Heuristics from description text + if (/paper|research|study|experiment|analysis\b/.test(lowerDesc)) + return "paper"; + if (/documentation|guide|tutorial|api|reference/.test(lowerDesc)) + return "documentation"; + if (/blog|post|article|opinion/.test(lowerDesc)) return "blog"; + if (/news|report|announce|release/.test(lowerDesc)) return "news"; + if (/forum|discussion|question|answer|thread/.test(lowerDesc)) return "forum"; + + return "other"; +} + +function tryParseDate(dateStr: string | undefined | null): Date | null { + if (!dateStr) return null; + const d = new Date(dateStr); + return isNaN(d.getTime()) ? null : d; +} + +/** + * Enrich a raw search result with source authority metadata. + * Accepts extra fields (e.g. date) from the Firecrawl API response. + */ +export function enrichResult( + result: SearchResult & Record, +): EnrichedSearchResult { + const domain = extractDomain(result.url); + return { + ...result, + domain, + authorityScore: computeAuthorityScore(domain), + publishedDate: tryParseDate(result.date as string | undefined), + contentType: detectContentType(result.url, result.description), + }; +} + /* ── Helpers ──────────────────────────────────────────────────────── */ async function firecrawlRequest( @@ -87,14 +240,14 @@ export async function isFirecrawlReachable(): Promise { /* ── Search ───────────────────────────────────────────────────────── */ /** - * Search the web and return structured results. + * Search the web and return structured, enriched results. * Uses Firecrawl's search endpoint with scrape to get full page content. */ export async function searchWeb( query: string, limit: number = 5, signal?: AbortSignal, -): Promise { +): Promise { const body: Record = { query, limit: Math.min(limit, 10), @@ -116,14 +269,19 @@ export async function searchWeb( if (!res.success || !res.data) return []; - return res.data + const rawResults: (SearchResult & Record)[] = res.data .map((doc) => ({ title: (doc.title as string) ?? "", url: (doc.url as string) ?? "", description: (doc.description as string) ?? "", markdown: (doc.markdown as string) ?? "", + // Preserve extra fields for date extraction + ...doc, })) .filter((r) => r.markdown || r.description); + + // Enrich each result with source metadata + return rawResults.map(enrichResult); } /* ── Scrape ───────────────────────────────────────────────────────── */ diff --git a/src/queries.ts b/src/queries.ts index 844c4ca..4fb6b79 100644 --- a/src/queries.ts +++ b/src/queries.ts @@ -4,13 +4,36 @@ * Uses an LLM agent to generate search queries from different research * angles, then analyzes results to produce follow-up queries. */ -import type { SearchQuery, Finding, ResearchRound } from "./types"; +import type { + SearchQuery, + Finding, + ResearchRound, + EnrichedSearchResult, +} from "./types"; import { runAnalysisAgent } from "./agent"; +/* ── System Prompts ──────────────────────────────────────────────── */ + +const DECOMPOSE_SYSTEM = `You are a research methodology expert. Given a broad research question, your job is to break it down into 4-7 focused sub-questions that, when answered, collectively provide a complete answer to the original question. + +Guidelines: +- Each sub-question should tackle ONE specific facet of the research question +- Cover different dimensions: what, how, why, who, comparison, evidence, implications +- Sub-questions should be independently researchable via web search +- Avoid overlap between sub-questions +- Prioritize questions that will surface concrete evidence over speculative ones + +Output ONLY a JSON array of sub-question strings. + +Example: +Input: "What are the benefits and risks of artificial intelligence in healthcare?" +Output: ["What specific AI technologies are currently deployed in clinical healthcare settings?", "What peer-reviewed evidence exists for AI improving diagnostic accuracy?", "What are the documented risks and failure cases of AI in healthcare?", "How do regulatory frameworks (FDA, EMA) address AI-based medical devices?", "What do healthcare practitioners report as barriers to AI adoption?"] +`; + const GENERATE_QUERIES_SYSTEM = `You are a research methodology expert. Your role is to generate effective web search queries that will yield high-quality, diverse information about a research topic. Guidelines: -- Create queries from DIFFERENT angles (technical, practical, comparative, critical, forward-looking) +- Create queries from DIFFERENT angles (technical, practical, comparative, critical, forward-looking, authoritative) - Each query should target a specific facet of the question - Queries should use keywords that search engines rank well (avoid overly long questions) - Cover contrasting viewpoints and alternative approaches @@ -20,7 +43,7 @@ Guidelines: Output ONLY a JSON array of objects with fields: - "query": the search query string - "rationale": why this query will help answer the research question -- "angle": one of "technical" | "practical" | "comparative" | "critical" | "forward-looking" | "authoritative" +- "angle": one of "technical" | "practical" | "comparative" | "critical" | "forward-looking" | "authoritative" | "historical" | "case-study" | "data-statistics" | "ethical" Example: [ @@ -29,7 +52,7 @@ Example: ] `; -const FOLLOWUP_SYSTEM = `You are a research analyst. Given the research question and findings so far, your job is to identify what's still unknown and generate follow-up search queries to fill those gaps. +const FOLLOWUP_SYSTEM = `You are a research analyst. Given the research question, sub-questions, and findings so far, your job is to identify what's still unknown and generate follow-up search queries to fill those gaps. Look for: - Claims made without sufficient evidence @@ -42,18 +65,105 @@ Look for: Output ONLY a JSON array of objects with fields: - "query": the search query string - "rationale": what gap this query fills or what angle it explores -- "angle": one of "technical" | "practical" | "comparative" | "critical" | "forward-looking" | "authoritative" +- "angle": one of "technical" | "practical" | "comparative" | "critical" | "forward-looking" | "authoritative" | "historical" | "case-study" | "data-statistics" | "ethical" `; +/* ── Sub-Question Decomposition ───────────────────────────────────── */ + +/** + * Decompose a broad research question into focused, independently + * researchable sub-questions. Returns the sub-questions or an empty + * array if the LLM call fails. + */ +export async function decomposeQuestion( + question: string, + cwd: string, + signal?: AbortSignal, +): Promise { + const taskPrompt = `Break down this research question into 4-7 focused sub-questions:\n\n${question}`; + + const result = await runAnalysisAgent( + DECOMPOSE_SYSTEM, + taskPrompt, + cwd, + 60_000, + undefined, + signal, + ); + + if (!result.success || !result.text) return []; + + try { + const parsed = JSON.parse(result.text); + if (Array.isArray(parsed) && parsed.length > 0) { + return parsed.map(String).filter((s: string) => s.length > 10); + } + } catch { + // parse failed + } + + return []; +} + +/* ── Query Generation ────────────────────────────────────────────── */ + /** * Generate initial search queries for a research question. + * When sub-questions are available, generates queries per sub-question + * for better depth and diversity. */ export async function generateQueries( question: string, count: number, cwd: string, signal?: AbortSignal, + subQuestions?: string[], ): Promise { + // If we have sub-questions, generate queries distributed across them + if (subQuestions && subQuestions.length > 0) { + const queriesPerSub = Math.max(1, Math.ceil(count / subQuestions.length)); + const allQueries: SearchQuery[] = []; + + for (const subQ of subQuestions) { + if (allQueries.length >= count) break; + + const taskPrompt = `Research question: ${question}\nSub-question: ${subQ}\n\nGenerate ${queriesPerSub} search query(ies) to answer this sub-question specifically.`; + + const result = await runAnalysisAgent( + GENERATE_QUERIES_SYSTEM, + taskPrompt, + cwd, + 60_000, + undefined, + signal, + ); + + if (!result.success || !result.text) continue; + + try { + const parsed = JSON.parse(result.text); + if (Array.isArray(parsed)) { + const queries = parsed + .slice(0, queriesPerSub) + .map((q: Record) => ({ + query: String(q.query ?? ""), + rationale: String(q.rationale ?? ""), + angle: String(q.angle ?? "technical"), + })) + .filter((q: { query: string }) => q.query.length > 0); + allQueries.push(...queries); + } + } catch { + // parse failed for this sub-question, continue + } + } + + if (allQueries.length > 0) { + return allQueries.slice(0, count); + } + } + + // Fall through to standard query generation const taskPrompt = `Research question: ${question} Generate ${count} diverse search queries to research this topic effectively. Cover different angles.`; @@ -81,7 +191,7 @@ Generate ${count} diverse search queries to research this topic effectively. Cov rationale: String(q.rationale ?? ""), angle: String(q.angle ?? "technical"), })) - .filter((q) => q.query.length > 0); + .filter((q: { query: string }) => q.query.length > 0); } } catch { // JSON parse failed, fall back @@ -90,6 +200,8 @@ Generate ${count} diverse search queries to research this topic effectively. Cov return generateFallbackQueries(question, count); } +/* ── Follow-up Query Generation ──────────────────────────────────── */ + /** * Generate follow-up queries based on findings from previous rounds. */ @@ -103,7 +215,13 @@ export async function generateFollowUpQueries( // Build a summary of findings so far const allFindings = rounds.flatMap((r) => r.findings); const findingsSummary = allFindings - .map((f) => `- ${f.title}: ${f.summary} (confidence: ${f.confidence})`) + .map((f) => { + const corr = + f.corroborationScore !== undefined + ? ` [corroboration: ${(f.corroborationScore * 100).toFixed(0)}%]` + : ""; + return `- ${f.title}: ${f.summary} (confidence: ${f.confidence}${corr})`; + }) .join("\n"); const exploredAngles = rounds @@ -111,6 +229,12 @@ export async function generateFollowUpQueries( .map((q) => `[${q.angle}] ${q.query} — ${q.rationale}`) .join("\n"); + // Find low-corroboration or low-confidence topics + const gaps = allFindings + .filter((f) => f.confidence === "low" || (f.corroborationScore ?? 1) < 0.5) + .map((f) => `Gap: ${f.title} — ${f.summary}`) + .join("\n"); + const taskPrompt = `Research question: ${question} Queries already explored: @@ -119,6 +243,8 @@ ${exploredAngles} Findings so far: ${findingsSummary} +${gaps ? `Remaining knowledge gaps:\n${gaps}` : ""} + Generate ${count} follow-up search queries to fill remaining gaps and deepen the research.`; const result = await runAnalysisAgent( @@ -144,7 +270,7 @@ Generate ${count} follow-up search queries to fill remaining gaps and deepen the rationale: String(q.rationale ?? ""), angle: String(q.angle ?? "technical"), })) - .filter((q) => q.query.length > 0); + .filter((q: { query: string }) => q.query.length > 0); } } catch { // parse failed @@ -153,6 +279,8 @@ Generate ${count} follow-up search queries to fill remaining gaps and deepen the return []; } +/* ── Fallback Query Generation ────────────────────────────────────── */ + /** * Fallback query generation when the LLM call fails. */ @@ -183,6 +311,8 @@ function generateFallbackQueries( return queries; } +/* ── Analysis ────────────────────────────────────────────────────── */ + const ANALYZE_SYSTEM = `You are a research analyst. Given search results for a specific query, extract key findings. For each finding: @@ -204,19 +334,15 @@ Output ONLY a JSON array of objects with fields: */ export async function analyzeResults( query: string, - results: { - title: string; - url: string; - description: string; - markdown: string; - }[], + results: EnrichedSearchResult[], cwd: string, signal?: AbortSignal, ): Promise { + // Include authority metadata in the prompt so the LLM can consider source quality const resultsText = results .map( (r, i) => - `--- Result ${i + 1} ---\nTitle: ${r.title}\nURL: ${r.url}\nDescription: ${r.description}\nContent:\n${r.markdown.slice(0, 3000)}`, + `--- Result ${i + 1} ---\nTitle: ${r.title}\nURL: ${r.url}\nDomain: ${r.domain}\nAuthority Score: ${(r.authorityScore * 100).toFixed(0)}%\nContent Type: ${r.contentType}\nDescription: ${r.description}\nContent:\n${r.markdown.slice(0, 3000)}`, ) .join("\n\n"); @@ -225,7 +351,7 @@ export async function analyzeResults( Search results: ${resultsText} -Extract key findings from these results.`; +Extract key findings from these results. Consider source authority when rating confidence.`; const result = await runAnalysisAgent( ANALYZE_SYSTEM, @@ -251,7 +377,9 @@ Extract key findings from these results.`; ? String(f.confidence) : "medium") as Finding["confidence"], })) - .filter((f) => f.title && f.summary); + .filter( + (f: { title: string; summary: string }) => f.title && f.summary, + ); } } catch { // parse failed @@ -259,3 +387,119 @@ Extract key findings from these results.`; return []; } + +/* ── Corroboration Tracking ──────────────────────────────────────── */ + +/** + * Cross-reference all findings to compute corroboration scores. + * + * For each finding, we check: + * 1. How many other findings reference the same or similar source URLs + * 2. The authority scores of the supporting sources + * 3. Whether independent domains support the same claim + * + * Returns the findings with added corroborationScore, bestSourceAuthority, + * and avgSourceAuthority. + */ +export function computeCorroboration(findings: Finding[]): Finding[] { + if (findings.length === 0) return []; + + // Collect all unique source URLs and their authority scores + // In a real implementation, we'd map URLs to EnrichedSearchResult authority scores + // For now, extract domain-level patterns + + // Build a map of domain -> authority scores from source URLs + const domainAuthority = new Map(); + for (const finding of findings) { + for (const url of finding.sources) { + try { + const domain = extractDomainSimple(url); + if (!domainAuthority.has(domain)) { + domainAuthority.set(domain, heuristicDomainScore(domain)); + } + } catch { + // skip invalid URLs + } + } + } + + return findings.map((finding) => { + if (finding.sources.length === 0) { + return { + ...finding, + corroborationScore: 0, + bestSourceAuthority: 0, + avgSourceAuthority: 0, + }; + } + + // Compute source authority stats + const authorities: number[] = finding.sources.map((url) => { + try { + const domain = extractDomainSimple(url); + return domainAuthority.get(domain) ?? 0.3; + } catch { + return 0.3; + } + }); + + const bestAuthority = Math.max(...authorities); + const avgAuthority = + authorities.reduce((a, b) => a + b, 0) / authorities.length; + + // Compute corroboration: how many other findings share source URLs + let corroboratingFindings = 0; + const mySources = new Set(finding.sources); + + for (const other of findings) { + if (other === finding) continue; + const overlap = other.sources.some((url) => mySources.has(url)); + if (overlap) corroboratingFindings++; + } + + // Normalize corroboration: 0-1 based on what fraction of other findings agree + const maxCorroboration = findings.length - 1; + const corroborationScore = + maxCorroboration > 0 + ? Math.min(1, corroboratingFindings / maxCorroboration) + : 0; + + return { + ...finding, + corroborationScore: Math.round(corroborationScore * 100) / 100, + bestSourceAuthority: Math.round(bestAuthority * 100) / 100, + avgSourceAuthority: Math.round(avgAuthority * 100) / 100, + }; + }); +} + +/** + * Simple domain extraction (avoids URL constructor for compatibility). + */ +function extractDomainSimple(url: string): string { + const match = url.match(/https?:\/\/([^/]+)/); + if (!match) return url; + const hostname = match[1].toLowerCase(); + const parts = hostname.split("."); + const multiPartTlds = + /\.(co\.uk|org\.uk|ac\.uk|gov\.uk|com\.au|co\.jp|co\.kr|com\.br)$/; + if (multiPartTlds.test(hostname) && parts.length >= 3) { + return parts.slice(-3).join("."); + } + return parts.slice(-2).join("."); +} + +/** + * Very basic domain score heuristic without the full domain list. + */ +function heuristicDomainScore(domain: string): number { + if (/\.gov$|\.edu$/.test(domain)) return 0.85; + if (/arxiv|scholar|pubmed|ieee|acm|springer|nature|science/.test(domain)) + return 0.9; + if (/github|gitlab|bitbucket/.test(domain)) return 0.75; + if (/wikipedia|stackoverflow|medium|dev\.to/.test(domain)) return 0.55; + if (/docs\.|learn\.|developer\./.test(domain)) return 0.8; + if (/reuters|apnews|bbc|nytimes|bloomberg/.test(domain)) return 0.75; + if (/blog|forum|reddit/.test(domain)) return 0.3; + return 0.4; +} diff --git a/src/report.ts b/src/report.ts index f8bcb18..882a426 100644 --- a/src/report.ts +++ b/src/report.ts @@ -2,31 +2,163 @@ * Deep Research — Report synthesis * * Takes all research rounds and synthesizes a comprehensive report - * using an LLM agent. + * using an LLM agent. Produces: + * - Numbered inline citations with a bibliography + * - Layered report: TL;DR → Executive Summary → Key Findings + * → Detailed Analysis → Limitations/Gaps → References + * - Audience-aware tone adjustment */ -import type { ResearchRound, ResearchConfig } from "./types"; +import type { + ResearchRound, + ResearchConfig, + Reference, + Finding, +} from "./types"; import { runAnalysisAgent } from "./agent"; -const SYNTHESIS_SYSTEM = `You are a senior research analyst synthesizing findings from multiple web searches into a comprehensive, well-structured report. +/** Return shape from synthesizeReport */ +export interface SynthesisResult { + report: string; + references: Reference[]; +} -Your report should: -1. Start with an executive summary (2-3 paragraphs covering the key answer to the research question) -2. Organize findings by theme, not by search query -3. Include specific evidence from sources (cite URLs in [brackets]) -4. Note areas of disagreement or uncertainty -5. Identify knowledge gaps that remain -6. End with actionable conclusions +/* ── System Prompts ──────────────────────────────────────────────── */ + +function buildSynthesisSystem(audience: string): string { + const audienceGuidance: Record = { + expert: + "Assume expert-level domain knowledge. Use precise technical terminology, reference specific methodologies and standards, and prioritize depth over hand-holding. The reader understands the field.", + general: + "Write for an informed general audience. Define technical terms on first use, explain context, and keep the tone accessible but not simplistic. Avoid jargon without explanation.", + executive: + "Write for a busy executive or decision-maker. Lead with actionable conclusions and recommendations. Be concise — use bold for key takeaways. Minimize technical detail; focus on implications, trade-offs, and decisions. Target 2-3 pages.", + }; + + const guidance = audienceGuidance[audience] ?? audienceGuidance.general; + + return `You are a senior research analyst synthesizing findings from multiple web searches into a comprehensive, well-structured report. + +Audience: ${guidance} + +Report structure (use ## headings): +1. **TL;DR** — One paragraph (2-3 sentences) giving the single most important answer +2. **Executive Summary** — 2-3 paragraphs covering what was found, how confident we are, and key implications +3. **Key Findings** — Tiered by importance/confidence. Bullet points with inline citations +4. **Detailed Analysis** — Organized by theme. Each section covers one aspect with evidence +5. **Limitations & Knowledge Gaps** — What evidence is weak, missing, or contradictory +6. **Conclusion** — Wrap up with actionable takeaways + +Citation rules: +- Use numbered references like [1], [2] etc. throughout the text +- At the end, include a ## References section listing each citation +- Format references as: [1] Title — Domain (URL) +- Cite specific evidence, not vague associations +- When multiple sources support a claim, cite all of them: [1][3][5] Style guidelines: -- Use clear section headings (## level) - Write in an objective, authoritative tone -- Include bullet points for listing evidence -- Use inline citations like [source](url) +- Use bullet points for listing evidence - Note the confidence level for key claims -- Be thorough but concise — every paragraph should add value`; +- Be thorough but concise — every paragraph should add value +- Use > for notable direct quotes with citations`; +} + +/* ── Evidence Builder ────────────────────────────────────────────── */ + +function buildEvidenceText( + question: string, + rounds: ResearchRound[], +): { evidenceText: string; referenceMap: Map } { + const allFindings = rounds.flatMap((r) => r.findings); + const totalSearches = rounds.reduce((sum, r) => sum + r.queries.length, 0); + const totalPages = rounds.reduce((sum, r) => sum + r.results.length, 0); + + // Build a bibliography map (url -> Reference) + const seenUrls = new Map(); + let refId = 0; + + for (const round of rounds) { + for (const result of round.results) { + if (!seenUrls.has(result.url)) { + refId++; + seenUrls.set(result.url, { + id: refId, + url: result.url, + title: result.title, + domain: result.domain, + authorityScore: result.authorityScore, + accessedAt: new Date().toISOString().split("T")[0], + }); + } + } + } + + // Organize findings by thematic angle + const evidenceByAngle = new Map(); + for (const round of rounds) { + for (const finding of round.findings) { + const angle = round.queries[0]?.angle ?? "technical"; + if (!evidenceByAngle.has(angle)) evidenceByAngle.set(angle, []); + evidenceByAngle.get(angle)!.push(finding); + } + } + + let evidenceText = `## Research Question\n${question}\n\n`; + evidenceText += `## Overview\n- Rounds of research: ${rounds.length}\n`; + evidenceText += `- Total searches executed: ${totalSearches}\n`; + evidenceText += `- Total pages analyzed: ${totalPages}\n`; + evidenceText += `- Key findings extracted: ${allFindings.length}\n\n`; + + // Build evidence grouped by angle with reference IDs + for (const [angle, findings] of Array.from(evidenceByAngle)) { + if (findings.length === 0) continue; + evidenceText += `## Angle: ${angle}\n\n`; + for (const finding of findings) { + // Get reference IDs for this finding's sources + const refs = finding.sources + .map((url) => seenUrls.get(url)) + .filter((r): r is Reference => !!r) + .map((r) => `[${r.id}]`); + + const avgAuth = + finding.avgSourceAuthority !== undefined + ? ` | Avg Authority: ${(finding.avgSourceAuthority * 100).toFixed(0)}%` + : ""; + const corr = + finding.corroborationScore !== undefined + ? ` | Corroboration: ${(finding.corroborationScore * 100).toFixed(0)}%` + : ""; + const bestAuthStr = + finding.bestSourceAuthority !== undefined + ? ` | Best Source: ${(finding.bestSourceAuthority * 100).toFixed(0)}%` + : ""; + + evidenceText += `### ${finding.title}\n`; + evidenceText += `**Confidence:** ${finding.confidence}${avgAuth}${corr}${bestAuthStr}\n`; + if (refs.length > 0) { + evidenceText += `**Sources:** ${refs.join(", ")}\n`; + } + evidenceText += `${finding.summary}\n\n`; + if (finding.keyQuotes.length > 0) { + evidenceText += `> ${finding.keyQuotes[0]}\n\n`; + } + } + } + + // Include reference metadata for the LLM to build proper citations + evidenceText += `## Reference Metadata\n\n`; + for (const [, ref] of seenUrls) { + evidenceText += `[${ref.id}] ${ref.title} (${ref.domain}, authority: ${(ref.authorityScore * 100).toFixed(0)}%) — ${ref.url}\n`; + } + + return { evidenceText, referenceMap: seenUrls }; +} + +/* ── Main Synthesis ──────────────────────────────────────────────── */ /** * Synthesize a research report from all rounds. + * Returns both the formatted report and the full bibliography. */ export async function synthesizeReport( question: string, @@ -34,58 +166,14 @@ export async function synthesizeReport( config: ResearchConfig, cwd: string, signal?: AbortSignal, -): Promise { - // Build the evidence summary - const allFindings = rounds.flatMap((r) => r.findings); - const totalSearches = rounds.reduce((sum, r) => sum + r.queries.length, 0); - const totalPages = rounds.reduce((sum, r) => sum + r.results.length, 0); +): Promise { + const audience = config.audience ?? "general"; + const { evidenceText, referenceMap } = buildEvidenceText(question, rounds); - const evidenceByAngle = new Map(); - for (const round of rounds) { - for (const query of round.queries) { - const key = query.angle; - if (!evidenceByAngle.has(key)) evidenceByAngle.set(key, []); - } - for (const finding of round.findings) { - // Try to determine angle from the round's queries - const angle = round.queries[0]?.angle ?? "technical"; - if (!evidenceByAngle.has(angle)) evidenceByAngle.set(angle, []); - evidenceByAngle.get(angle)!.push(finding); - } - } - - // Build structured evidence text - let evidenceText = `## Research Question\n${question}\n\n`; - evidenceText += `## Overview\n- Rounds of research: ${rounds.length}\n`; - evidenceText += `- Total searches executed: ${totalSearches}\n`; - evidenceText += `- Total pages analyzed: ${totalPages}\n`; - evidenceText += `- Key findings extracted: ${allFindings.length}\n\n`; - - for (const [angle, findings] of Array.from(evidenceByAngle)) { - if (findings.length === 0) continue; - evidenceText += `## Angle: ${angle}\n\n`; - for (const finding of findings) { - evidenceText += `### ${finding.title}\n`; - evidenceText += `**Confidence:** ${finding.confidence}\n`; - evidenceText += `${finding.summary}\n\n`; - if (finding.keyQuotes.length > 0) { - evidenceText += `> ${finding.keyQuotes[0]}\n\n`; - } - if (finding.sources.length > 0) { - evidenceText += `Sources: ${finding.sources.map((s: string) => `[${s}](${s})`).join(", ")}\n\n`; - } - } - } - - // Also include raw search context for depth - evidenceText += `## Raw Search Context\n\n`; - for (const round of rounds) { - evidenceText += `### Round ${round.round}\n`; - for (const q of round.queries) { - evidenceText += `- **"${q.query}"** (${q.angle}) — ${q.rationale}\n`; - } - evidenceText += `\n`; - } + const formatInstruction = + config.format === "structured" + ? "Structured report with numbered sections, clear hierarchies, and data tables where appropriate." + : "Well-formatted markdown report with ## headings, bullet points, and inline numbered citations like [1]."; const taskPrompt = `Synthesize the following research findings into a comprehensive, well-structured report. @@ -93,10 +181,13 @@ ${evidenceText} Write a thorough report that answers the original question: "${question}" -Format: ${config.format === "structured" ? "Structured report with numbered sections, clear hierarchies, and data tables where appropriate." : "Well-formatted markdown report with ## headings, bullet points, and inline citations."}`; +Format: ${formatInstruction} +Audience: ${audience} + +Remember to use numbered citations like [1], [2] and include a ## References section at the end.`; const result = await runAnalysisAgent( - SYNTHESIS_SYSTEM, + buildSynthesisSystem(audience), taskPrompt, cwd, 120_000, @@ -105,64 +196,250 @@ Format: ${config.format === "structured" ? "Structured report with numbered sect ); if (result.success && result.text) { - return result.text; + // Build bibliography section + const bibSection = buildBibliography(referenceMap); + + // Append references if not already present + let report = result.text; + if (!report.includes("## References") && !report.includes("# References")) { + report += `\n\n${bibSection}`; + } + + return { report, references: Array.from(referenceMap.values()) }; } - // Fallback: generate a simple report from the evidence - return generateFallbackReport(question, rounds); + // Fallback: generate a simple structured report + const fallbackReport = generateFallbackReport( + question, + rounds, + referenceMap, + audience, + ); + return { + report: fallbackReport + `\n\n${buildBibliography(referenceMap)}`, + references: Array.from(referenceMap.values()), + }; } +/* ── Bibliography Builder ────────────────────────────────────────── */ + +/** + * Build a structured ## References section from the reference map. + */ +function buildBibliography(referenceMap: Map): string { + if (referenceMap.size === 0) return "## References\n\nNo sources cited."; + + const refs = Array.from(referenceMap.values()).sort((a, b) => a.id - b.id); + const lines: string[] = ["## References\n"]; + for (const ref of refs) { + const authIcon = + ref.authorityScore >= 0.8 ? "⭐" : ref.authorityScore >= 0.5 ? "✓" : "○"; + lines.push( + `[${ref.id}] ${authIcon} **${ref.title}** — ${ref.domain} (${ref.url}) — accessed ${ref.accessedAt}`, + ); + } + + return lines.join("\n"); +} + +/* ── Fallback Report ─────────────────────────────────────────────── */ + /** * Fallback report when the LLM synthesis fails. + * Produces a clean, structured report from the evidence. */ function generateFallbackReport( question: string, rounds: ResearchRound[], + referenceMap: Map, + _audience: string, ): string { const lines: string[] = []; + const allFindings = rounds.flatMap((r) => r.findings); + + // ── TL;DR ── lines.push(`# Research Report: ${question}`); lines.push(""); + + const highConfFindings = allFindings.filter((f) => f.confidence === "high"); + const totalHigh = highConfFindings.length; + const total = allFindings.length; + + lines.push("## TL;DR"); + lines.push(""); + if (highConfFindings.length > 0) { + lines.push( + `Based on analysis of ${total} findings across ${rounds.length} research round(s), ` + + `${totalHigh} high-confidence conclusions were identified. ` + + `${highConfFindings[0].title}: ${highConfFindings[0].summary}`, + ); + } else { + lines.push( + `This report covers findings from ${rounds.length} research round(s) exploring "${question}". ` + + `${total} findings were extracted, with varying levels of confidence.`, + ); + } + lines.push(""); + + // ── Executive Summary ── lines.push("## Executive Summary"); lines.push(""); lines.push( - `This report summarizes findings from ${rounds.length} research round(s) exploring the question above.`, + `This report synthesizes findings from ${rounds.length} research round(s), ` + + `${rounds.reduce((s, r) => s + r.queries.length, 0)} search queries, ` + + `and ${rounds.reduce((s, r) => s + r.results.length, 0)} sources.`, ); lines.push(""); - const allFindings = rounds.flatMap((r) => r.findings); - + // ── Key Findings (tiered) ── if (allFindings.length > 0) { lines.push("## Key Findings"); lines.push(""); - for (const finding of allFindings) { - lines.push(`### ${finding.title}`); - lines.push(`*Confidence: ${finding.confidence}*`); - lines.push(""); - lines.push(finding.summary); - lines.push(""); - if (finding.keyQuotes.length > 0) { - lines.push(`> ${finding.keyQuotes[0]}`); - lines.push(""); + + // High confidence first + const highConf = allFindings.filter((f) => f.confidence === "high"); + if (highConf.length > 0) { + lines.push("### High Confidence"); + for (const finding of highConf) { + const refs = finding.sources + .map((url) => referenceMap.get(url)) + .filter((r): r is Reference => !!r) + .map((r) => `[${r.id}]`); + lines.push( + `- **${finding.title}** ${refs.length > 0 ? refs.join("") : ""}`, + ); + lines.push(` - ${finding.summary}`); } - if (finding.sources.length > 0) { - lines.push("Sources:"); - for (const src of finding.sources) { - lines.push(`- [${src}](${src})`); - } - lines.push(""); + lines.push(""); + } + + // Medium confidence + const medConf = allFindings.filter((f) => f.confidence === "medium"); + if (medConf.length > 0) { + lines.push("### Moderate Confidence"); + for (const finding of medConf) { + const refs = finding.sources + .map((url) => referenceMap.get(url)) + .filter((r): r is Reference => !!r) + .map((r) => `[${r.id}]`); + lines.push( + `- **${finding.title}** ${refs.length > 0 ? refs.join("") : ""}`, + ); + lines.push(` - ${finding.summary}`); + } + lines.push(""); + } + + // Low confidence + const lowConf = allFindings.filter((f) => f.confidence === "low"); + if (lowConf.length > 0) { + lines.push("### Lower Confidence (Needs Further Research)"); + for (const finding of lowConf) { + const refs = finding.sources + .map((url) => referenceMap.get(url)) + .filter((r): r is Reference => !!r) + .map((r) => `[${r.id}]`); + lines.push( + `- **${finding.title}** ${refs.length > 0 ? refs.join("") : ""}`, + ); + lines.push(` - ${finding.summary}`); + } + lines.push(""); + } + + // ── Detailed Analysis ── + lines.push("## Detailed Analysis"); + lines.push(""); + + const byAngle = new Map(); + for (const round of rounds) { + for (const f of round.findings) { + const angle = round.queries[0]?.angle ?? "general"; + if (!byAngle.has(angle)) byAngle.set(angle, []); + byAngle.get(angle)!.push(f); } } + + for (const [angle, findings] of byAngle) { + lines.push(`### ${angle.charAt(0).toUpperCase() + angle.slice(1)}`); + lines.push(""); + for (const f of findings) { + const corrStr = + f.corroborationScore !== undefined + ? ` (corroboration: ${(f.corroborationScore * 100).toFixed(0)}%)` + : ""; + lines.push(`**${f.title}** — *${f.confidence} confidence${corrStr}*`); + lines.push(""); + lines.push(f.summary); + lines.push(""); + if (f.keyQuotes.length > 0) { + lines.push(`> ${f.keyQuotes[0]}`); + lines.push(""); + } + } + } + + // ── Limitations ── + const lowConfCount = allFindings.filter( + (f) => f.confidence === "low", + ).length; + const noCorr = allFindings.filter( + (f) => (f.corroborationScore ?? 0) < 0.3, + ).length; + + lines.push("## Limitations & Knowledge Gaps"); + lines.push(""); + if (lowConfCount > 0) { + lines.push( + `- **${lowConfCount} of ${allFindings.length} findings** have low confidence, indicating limited or conflicting evidence.`, + ); + } + if (noCorr > 0) { + lines.push( + `- **${noCorr} findings** lack corroboration from multiple independent sources.`, + ); + } + lines.push( + "- This research relied on web search results; some relevant sources may not be indexed or accessible.", + ); + lines.push( + "- Findings are dependent on search engine ranking and the quality of indexed content.", + ); + lines.push(""); + + // ── Conclusion ── + lines.push("## Conclusion"); + lines.push(""); + if (highConf.length > 0) { + lines.push( + `The research identified ${highConf.length} high-confidence finding(s) and ${medConf.length} moderately-supported finding(s). ` + + `The strongest evidence relates to: ${highConf.map((f) => f.title).join(", ")}.`, + ); + } else { + lines.push( + "The research surfaced relevant information but with limited high-confidence evidence. Further investigation is recommended for the identified knowledge gaps.", + ); + } + lines.push(""); } - lines.push("## Search Methodology"); + // ── Methodology ── + lines.push(`*Report prepared for: ${_audience} audience*`); + lines.push(""); + + lines.push("## Methodology"); lines.push(""); for (const round of rounds) { + const failedSearches = round.queries.length - round.successfulSearches; lines.push(`### Round ${round.round}`); lines.push( - `Queries: ${round.queries.map((q) => `"${q.query}"`).join(", ")}`, + `Queries: ${round.queries.map((q) => `"${q.query}" [${q.angle}]`).join(", ")}`, ); lines.push(`Pages scraped: ${round.results.length}`); - lines.push(`Findings: ${round.findings.length}`); + lines.push(`Findings extracted: ${round.findings.length}`); + if (failedSearches > 0) { + lines.push(`Searches failed: ${failedSearches}`); + } lines.push(""); } diff --git a/src/research.ts b/src/research.ts index 519f45d..5f8e474 100644 --- a/src/research.ts +++ b/src/research.ts @@ -2,33 +2,40 @@ * Deep Research — Core research orchestration * * Manages the multi-round deep research process: - * 1. Generate initial search queries - * 2. Execute all queries in parallel via Firecrawl - * 3. Analyze results and extract findings - * 4. Generate follow-up queries - * 5. Iterate for depth rounds - * 6. Synthesize final report + * 1. Decompose the question into sub-questions (when depth > 1) + * 2. Generate initial search queries (per sub-question for better diversity) + * 3. Execute all queries in parallel via Firecrawl + * 4. Analyze results and extract findings + * 5. Compute corroboration scores + * 6. Generate follow-up queries for gaps + * 7. Iterate for depth rounds + * 8. Synthesize final report with numbered references * * Widget and progress callback patterns borrowed from ralpi's executor. */ import type { ExtensionContext } from "@earendil-works/pi-coding-agent"; import type { + Finding, ResearchConfig, - SearchResult, + EnrichedSearchResult, ResearchRound, ResearchReport, } from "./types"; +import type { SynthesisResult } from "./report"; import { searchWeb } from "./firecrawl"; import { generateQueries, generateFollowUpQueries, analyzeResults, + computeCorroboration, + decomposeQuestion, } from "./queries"; import { synthesizeReport } from "./report"; /** Progress callback for UI updates */ export type ResearchProgress = (update: { phase: + | "decomposing" | "generating_queries" | "searching" | "analyzing" @@ -41,6 +48,86 @@ export type ResearchProgress = (update: { fraction?: number; // 0-1 }) => void; +// ── Round-Robin Parallel Execution ────────────────────────────────── + +/** + * Maximum concurrent Firecrawl search requests. + * Prevents rate limiting while still parallelizing queries. + */ +const MAX_SEARCH_CONCURRENT = 3; + +/** + * Maximum concurrent analysis agent sessions. + */ +const MAX_ANALYSIS_CONCURRENT = 2; + +/** + * Minimum findings per round before we consider early stopping. + * If we're getting very few new findings, saturation is near. + */ +const SATURATION_THRESHOLD = 0.15; // < 15% new findings = likely saturated + +/** + * Bounded-concurrency parallel execution with round-robin slot assignment. + * + * Similar to ralpi's ModelRoundRobin: with N concurrent slots, items are + * assigned to free slots in FIFO order. When a slot finishes, the next + * item in the queue is assigned to it. + * + * This ensures even load distribution and avoids bursty concurrency. + */ +async function boundedConcurrency( + items: T[], + maxConcurrent: number, + mapper: (item: T, index: number) => Promise, +): Promise { + const results: R[] = new Array(items.length); + let nextIndex = 0; + + async function worker(): Promise { + while (true) { + const currentIndex = nextIndex++; + if (currentIndex >= items.length) return; + results[currentIndex] = await mapper(items[currentIndex], currentIndex); + } + } + + const numWorkers = Math.min(maxConcurrent, items.length); + const workers = Array.from({ length: numWorkers }, () => worker()); + await Promise.all(workers); + return results; +} + +/** + * Assess whether the research is reaching information saturation. + */ +function assessSaturation( + previousRound: ResearchRound | undefined, + currentRound: ResearchRound, +): number { + if (!previousRound || previousRound.findings.length === 0) return 0; + + const prevUrls = new Set(previousRound.results.map((r) => r.url)); + const newUrls = currentRound.results.filter( + (r) => !prevUrls.has(r.url), + ).length; + const totalUrls = currentRound.results.length; + const newRatio = totalUrls > 0 ? newUrls / totalUrls : 0; + + // Also check finding novelty + const prevFindingTitles = new Set( + previousRound.findings.map((f) => f.title.toLowerCase()), + ); + const newFindings = currentRound.findings.filter( + (f) => !prevFindingTitles.has(f.title.toLowerCase()), + ).length; + const totalFindings = currentRound.findings.length; + const findingNovelty = totalFindings > 0 ? newFindings / totalFindings : 0; + + // Weight: URL novelty (40%) + finding novelty (60%) + return newRatio * 0.4 + findingNovelty * 0.6; +} + /** * Run a complete deep research session. */ @@ -54,15 +141,35 @@ export async function runDeepResearch( const rounds: ResearchRound[] = []; let totalSearches = 0; let totalPages = 0; + let subQuestions: string[] = []; - // ── Round 1: Generate initial queries ────────────────────────────── + // ── Phase: Decompose question into sub-questions ──────────────── + + if (config.depth > 1) { + onProgress({ + phase: "decomposing", + round: 1, + totalRounds: config.depth, + message: "Decomposing research question into sub-topics...", + fraction: 0, + }); + + if (signal?.aborted) throw new Error("Research cancelled"); + + subQuestions = await decomposeQuestion(config.question, ctx.cwd, signal); + } + + // ── Phase: Generate initial queries ───────────────────────────── onProgress({ phase: "generating_queries", round: 1, totalRounds: config.depth, - message: "Generating initial search queries...", - fraction: 0, + message: + subQuestions.length > 0 + ? `Generating queries across ${subQuestions.length} sub-topics...` + : "Generating initial search queries...", + fraction: 0.05, }); if (signal?.aborted) throw new Error("Research cancelled"); @@ -72,13 +179,14 @@ export async function runDeepResearch( config.breadth, ctx.cwd, signal, + subQuestions.length > 0 ? subQuestions : undefined, ); if (queries.length === 0) { throw new Error("Failed to generate any search queries"); } - // ── Execute rounds ───────────────────────────────────────────────── + // ── Execute rounds ─────────────────────────────────────────────── for (let round = 1; round <= config.depth; round++) { if (signal?.aborted) throw new Error("Research cancelled"); @@ -99,125 +207,177 @@ export async function runDeepResearch( break; } - // ── Search phase ────────────────────────────────────────────────── + // ── Search phase (parallel with round-robin) ──────────────────── onProgress({ phase: "searching", round, totalRounds: config.depth, - message: `Searching with ${currentQueries.length} queries...`, + message: `Searching ${currentQueries.length} queries in parallel...`, fraction: 0.25, }); - const searchResults: SearchResult[] = []; + if (signal?.aborted) throw new Error("Research cancelled"); - for (let i = 0; i < currentQueries.length; i++) { - if (signal?.aborted) throw new Error("Research cancelled"); + // Run searches in parallel using round-robin bounded concurrency. + // Each mapper call runs independently; failures are caught per-query. + const searchResultsArrays: (EnrichedSearchResult[] | null)[] = + await boundedConcurrency( + currentQueries, + MAX_SEARCH_CONCURRENT, + async (q, i) => { + onProgress({ + phase: "searching", + round, + totalRounds: config.depth, + message: `Searching: "${q.query.slice(0, 60)}..."`, + detail: q.rationale, + fraction: 0.25 + (i / currentQueries.length) * 0.25, + }); - const q = currentQueries[i]; - onProgress({ - phase: "searching", - round, - totalRounds: config.depth, - message: `Searching: "${q.query.slice(0, 60)}..."`, - detail: q.rationale, - fraction: 0.25 + (i / currentQueries.length) * 0.25, - }); + try { + return await searchWeb(q.query, 5, signal); + } catch (error) { + const errorMsg = + error instanceof Error ? error.message : String(error); + onProgress({ + phase: "searching", + round, + totalRounds: config.depth, + message: `Search failed: ${errorMsg.slice(0, 80)}`, + fraction: 0.25 + ((i + 1) / currentQueries.length) * 0.25, + }); + return null; + } + }, + ); - try { - const results = await searchWeb(q.query, 5, signal); - searchResults.push(...results); - } catch (error) { - // Individual search failure shouldn't crash the whole round - const errorMsg = error instanceof Error ? error.message : String(error); - onProgress({ - phase: "searching", - round, - totalRounds: config.depth, - message: `Search failed: ${errorMsg.slice(0, 80)}`, - fraction: 0.25 + ((i + 1) / currentQueries.length) * 0.25, - }); - } - - // Small delay between searches to avoid rate limits - if (i < currentQueries.length - 1) { - await new Promise((r) => setTimeout(r, 300)); - } - } + // Flatten results, filtering out nulls (failed searches) + const searchResults: EnrichedSearchResult[] = searchResultsArrays + .filter((r): r is EnrichedSearchResult[] => r !== null) + .flat(); totalSearches += currentQueries.length; - // Deduplicate results by URL - const seen = new Set(); - const uniqueResults = searchResults.filter((r) => { - if (seen.has(r.url)) return false; - seen.add(r.url); - return true; - }); + // Deduplicate results by URL (prefer higher authority) + const seen = new Map(); + for (const r of searchResults) { + const existing = seen.get(r.url); + if (!existing || r.authorityScore > existing.authorityScore) { + seen.set(r.url, r); + } + } + const uniqueResults = Array.from(seen.values()); totalPages += uniqueResults.length; - // ── Analyze phase ────────────────────────────────────────────────── + // ── Analyze phase (parallel with round-robin) ────────────────── onProgress({ phase: "analyzing", round, totalRounds: config.depth, - message: `Analyzing ${uniqueResults.length} search results...`, + message: `Analyzing ${uniqueResults.length} search results in parallel...`, fraction: 0.6, }); - // Analyze results per query group - const allFindings: ResearchRound["findings"] = []; + if (signal?.aborted) throw new Error("Research cancelled"); + + // Build query-result pairs for parallel analysis + const analysisTasks: Array<{ + query: (typeof currentQueries)[number]; + results: typeof uniqueResults; + index: number; + }> = []; + + const resultsPerQuery = Math.ceil( + uniqueResults.length / currentQueries.length, + ); for (let i = 0; i < currentQueries.length; i++) { - if (signal?.aborted) throw new Error("Research cancelled"); - - const q = currentQueries[i]; - // Find results that match this query (loosely: take a portion of results) - const resultsPerQuery = Math.ceil( - uniqueResults.length / currentQueries.length, - ); const startIdx = i * resultsPerQuery; const endIdx = Math.min(startIdx + resultsPerQuery, uniqueResults.length); const queryResults = uniqueResults.slice(startIdx, endIdx); if (queryResults.length === 0) continue; - onProgress({ - phase: "analyzing", - round, - totalRounds: config.depth, - message: `Analyzing results for "${q.query.slice(0, 40)}..."`, - fraction: 0.6 + (i / currentQueries.length) * 0.2, + analysisTasks.push({ + query: currentQueries[i], + results: queryResults, + index: i, }); - - try { - const findings = await analyzeResults( - q.query, - queryResults, - ctx.cwd, - signal, - ); - allFindings.push(...findings); - } catch { - // Analysis failure shouldn't crash the round - } } + // Run analyses in parallel using round-robin bounded concurrency + const findingsArrays: Finding[][] = await boundedConcurrency( + analysisTasks, + MAX_ANALYSIS_CONCURRENT, + async (task) => { + onProgress({ + phase: "analyzing", + round, + totalRounds: config.depth, + message: `Analyzing: "${task.query.query.slice(0, 40)}..."`, + fraction: 0.6 + (task.index / currentQueries.length) * 0.2, + }); + + try { + return await analyzeResults( + task.query.query, + task.results, + ctx.cwd, + signal, + ); + } catch { + // Analysis failure shouldn't crash the round + return []; + } + }, + ); + + // Flatten all findings + const allFindings: ResearchRound["findings"] = findingsArrays.flat(); + + // ── Corroboration pass ──────────────────────────────────────── + // Cross-reference findings to compute corroboration scores + const corroboratedFindings = computeCorroboration(allFindings); + // Record this round + const successfulSearches = currentQueries.length; + const followUpTopics = corroboratedFindings + .filter( + (f: Finding) => + f.confidence === "low" && (f.corroborationScore ?? 0) < 0.5, + ) + .map((f: Finding) => f.title); + rounds.push({ round, queries: currentQueries, results: uniqueResults, - findings: allFindings, - followUpTopics: allFindings - .filter((f) => f.confidence === "low") - .map((f) => f.title), + findings: corroboratedFindings, + followUpTopics, + successfulSearches, }); + + // ── Adaptive depth: check for saturation ────────────────────── + if (round > 1 && round < config.depth) { + const saturation = assessSaturation( + rounds[rounds.length - 2], + rounds[rounds.length - 1], + ); + if (saturation < SATURATION_THRESHOLD) { + onProgress({ + phase: "synthesizing", + message: `Information saturation reached (${(saturation * 100).toFixed(0)}% novelty) — synthesizing early`, + fraction: 0.85, + }); + break; + } + } } - // ── Synthesis phase ───────────────────────────────────────────────── + // ── Synthesis phase ─────────────────────────────────────────────── onProgress({ phase: "synthesizing", @@ -227,13 +387,15 @@ export async function runDeepResearch( if (signal?.aborted) throw new Error("Research cancelled"); - const finalReport = await synthesizeReport( + const synthesisResult: SynthesisResult = await synthesizeReport( config.question, rounds, config, ctx.cwd, signal, ); + const finalReport = synthesisResult.report; + const references = synthesisResult.references; const durationMs = Date.now() - startTime; @@ -250,5 +412,6 @@ export async function runDeepResearch( totalSearches, totalPagesScraped: totalPages, durationMs, + references, }; } diff --git a/src/types.ts b/src/types.ts index ed018c9..5b9d4cc 100644 --- a/src/types.ts +++ b/src/types.ts @@ -2,6 +2,16 @@ * Deep Research — type definitions */ +/** Content type classification for a source */ +export type ContentType = + | "documentation" + | "paper" + | "news" + | "blog" + | "forum" + | "official" + | "other"; + /** A single search result from Firecrawl */ export interface SearchResult { title: string; @@ -10,6 +20,14 @@ export interface SearchResult { markdown: string; } +/** Enriched search result with source authority metadata */ +export interface EnrichedSearchResult extends SearchResult { + domain: string; + authorityScore: number; // 0.0 – 1.0 + publishedDate: Date | null; + contentType: ContentType; +} + /** A finding extracted from search results by an analysis agent */ export interface Finding { title: string; @@ -17,6 +35,22 @@ export interface Finding { sources: string[]; keyQuotes: string[]; confidence: "high" | "medium" | "low"; + /** 0.0 – 1.0: how many independent sources support this finding */ + corroborationScore?: number; + /** Authority score of the best source supporting this finding */ + bestSourceAuthority?: number; + /** Average authority score across all sources */ + avgSourceAuthority?: number; +} + +/** A numbered reference with full metadata */ +export interface Reference { + id: number; + url: string; + title: string; + domain: string; + authorityScore: number; + accessedAt: string; // ISO date string } /** A generated search query with its intent/rationale */ @@ -30,18 +64,28 @@ export interface SearchQuery { export interface ResearchRound { round: number; queries: SearchQuery[]; - results: SearchResult[]; + results: EnrichedSearchResult[]; findings: Finding[]; /** Any follow-up questions/angles the analysis suggests */ followUpTopics: string[]; + /** Number of sources that actually returned data (non-empty) */ + successfulSearches: number; } +/** Target audience expertise level */ +export type Audience = "expert" | "general" | "executive"; + /** Configuration for a research session */ export interface ResearchConfig { question: string; depth: number; // 1-3 rounds breadth: number; // queries per round (1-5) format: "markdown" | "structured"; + audience?: Audience; + /** Focus on specific research angles only (empty = all angles) */ + focus?: string[]; + /** Show the research methodology section in the report */ + showMethodology?: boolean; } /** Final research report */ @@ -52,4 +96,5 @@ export interface ResearchReport { totalSearches: number; totalPagesScraped: number; durationMs: number; + references: Reference[]; }