improved validity verification, ui responsiveness
This commit is contained in:
166
src/firecrawl.ts
166
src/firecrawl.ts
@@ -7,7 +7,7 @@
|
||||
import * as fs from "node:fs";
|
||||
import * as path from "node:path";
|
||||
import * as os from "node:os";
|
||||
import type { SearchResult } from "./types";
|
||||
import type { SearchResult, EnrichedSearchResult, ContentType } from "./types";
|
||||
|
||||
/* ── Config ──────────────────────────────────────────────────────── */
|
||||
|
||||
@@ -36,6 +36,159 @@ function loadFirecrawlConfig() {
|
||||
|
||||
const { baseUrl: BASE_URL, apiKey: API_KEY } = loadFirecrawlConfig();
|
||||
|
||||
/* ── Domain Authority Heuristics ─────────────────────────────────── */
|
||||
|
||||
/**
|
||||
* Known high-authority domains and their authority scores (0.0 – 1.0).
|
||||
* Academic, official, and established technical sources score highest.
|
||||
*/
|
||||
const AUTHORITY_DOMAINS: Record<string, number> = {
|
||||
// Academic & scholarly
|
||||
"arxiv.org": 0.95,
|
||||
"scholar.google.com": 0.95,
|
||||
"pubmed.ncbi.nlm.nih.gov": 0.95,
|
||||
"semanticscholar.org": 0.9,
|
||||
"ieee.org": 0.95,
|
||||
"acm.org": 0.95,
|
||||
"springer.com": 0.9,
|
||||
"sciencedirect.com": 0.9,
|
||||
"wiley.com": 0.85,
|
||||
"nature.com": 0.95,
|
||||
"science.org": 0.95,
|
||||
"plos.org": 0.85,
|
||||
// Official documentation
|
||||
"docs.python.org": 0.9,
|
||||
"developer.mozilla.org": 0.9,
|
||||
"learn.microsoft.com": 0.85,
|
||||
"developer.apple.com": 0.85,
|
||||
"kubernetes.io": 0.85,
|
||||
"react.dev": 0.85,
|
||||
"nextjs.org": 0.8,
|
||||
// Government & non-profits
|
||||
".gov": 0.9,
|
||||
".edu": 0.85,
|
||||
"who.int": 0.9,
|
||||
"worldbank.org": 0.85,
|
||||
"oecd.org": 0.85,
|
||||
// Established tech & news
|
||||
"github.com": 0.8,
|
||||
"stackoverflow.com": 0.7,
|
||||
"medium.com": 0.4,
|
||||
"dev.to": 0.5,
|
||||
"wikipedia.org": 0.7,
|
||||
"reuters.com": 0.8,
|
||||
"apnews.com": 0.8,
|
||||
"bbc.com": 0.75,
|
||||
"nytimes.com": 0.75,
|
||||
"theguardian.com": 0.7,
|
||||
"techcrunch.com": 0.6,
|
||||
"arstechnica.com": 0.65,
|
||||
"wired.com": 0.65,
|
||||
"infoworld.com": 0.55,
|
||||
};
|
||||
|
||||
/** Content-type hints based on domain patterns */
|
||||
const CONTENT_TYPE_HINTS: [RegExp, ContentType][] = [
|
||||
[
|
||||
/arxiv\.org|semanticscholar|ieee\.org|acm\.org|springer|sciencedirect|pubmed\.ncbi/,
|
||||
"paper",
|
||||
],
|
||||
[
|
||||
/docs\.|learn\.|developer\.|kubernetes\.io|react\.dev|nextjs\.org/,
|
||||
"documentation",
|
||||
],
|
||||
[/wikipedia\.org|stackoverflow\.com|medium\.com|dev\.to/, "forum"],
|
||||
[
|
||||
/reuters\.com|apnews\.com|bbc\.com|nytimes\.com|techcrunch|arstechnica|wired/,
|
||||
"news",
|
||||
],
|
||||
[/\.gov|\.edu|who\.int|worldbank|oecd\.org/, "official"],
|
||||
[/github\.com/, "documentation"],
|
||||
];
|
||||
|
||||
/* ── Source enrichment helpers ───────────────────────────────────── */
|
||||
|
||||
/**
|
||||
* Extract the registered domain from a URL (e.g., "blog.example.com" → "example.com").
|
||||
* Uses a simple 2-part TLD heuristic. For common cases like .co.uk this is approximate.
|
||||
*/
|
||||
function extractDomain(url: string): string {
|
||||
try {
|
||||
const hostname = new URL(url).hostname.toLowerCase();
|
||||
// Special-case common multi-part TLDs
|
||||
const multiPartTlds =
|
||||
/\.(co\.uk|org\.uk|ac\.uk|gov\.uk|com\.au|co\.jp|co\.kr|com\.br)$/;
|
||||
const parts = hostname.split(".");
|
||||
if (multiPartTlds.test(hostname) && parts.length >= 3) {
|
||||
return parts.slice(-3).join(".");
|
||||
}
|
||||
return parts.slice(-2).join(".");
|
||||
} catch {
|
||||
return url.replace(/^https?:\/\//, "").split("/")[0] ?? url;
|
||||
}
|
||||
}
|
||||
|
||||
function computeAuthorityScore(domain: string): number {
|
||||
// Direct match first
|
||||
if (AUTHORITY_DOMAINS[domain]) return AUTHORITY_DOMAINS[domain];
|
||||
|
||||
// Suffix matches (.gov, .edu, etc.)
|
||||
for (const [key, score] of Object.entries(AUTHORITY_DOMAINS)) {
|
||||
if (key.startsWith(".") && domain.endsWith(key)) return score;
|
||||
}
|
||||
|
||||
// Subdomain matches (e.g., blog.example.com matches example.com)
|
||||
const parent = domain.split(".").slice(-2).join(".");
|
||||
if (parent !== domain && AUTHORITY_DOMAINS[parent]) {
|
||||
return AUTHORITY_DOMAINS[parent] * 0.9;
|
||||
}
|
||||
|
||||
return 0.3; // Unknown / low-authority default
|
||||
}
|
||||
|
||||
function detectContentType(url: string, description: string): ContentType {
|
||||
const lowerUrl = url.toLowerCase();
|
||||
const lowerDesc = description.toLowerCase();
|
||||
|
||||
for (const [pattern, type] of CONTENT_TYPE_HINTS) {
|
||||
if (pattern.test(lowerUrl)) return type;
|
||||
}
|
||||
|
||||
// Heuristics from description text
|
||||
if (/paper|research|study|experiment|analysis\b/.test(lowerDesc))
|
||||
return "paper";
|
||||
if (/documentation|guide|tutorial|api|reference/.test(lowerDesc))
|
||||
return "documentation";
|
||||
if (/blog|post|article|opinion/.test(lowerDesc)) return "blog";
|
||||
if (/news|report|announce|release/.test(lowerDesc)) return "news";
|
||||
if (/forum|discussion|question|answer|thread/.test(lowerDesc)) return "forum";
|
||||
|
||||
return "other";
|
||||
}
|
||||
|
||||
function tryParseDate(dateStr: string | undefined | null): Date | null {
|
||||
if (!dateStr) return null;
|
||||
const d = new Date(dateStr);
|
||||
return isNaN(d.getTime()) ? null : d;
|
||||
}
|
||||
|
||||
/**
|
||||
* Enrich a raw search result with source authority metadata.
|
||||
* Accepts extra fields (e.g. date) from the Firecrawl API response.
|
||||
*/
|
||||
export function enrichResult(
|
||||
result: SearchResult & Record<string, unknown>,
|
||||
): EnrichedSearchResult {
|
||||
const domain = extractDomain(result.url);
|
||||
return {
|
||||
...result,
|
||||
domain,
|
||||
authorityScore: computeAuthorityScore(domain),
|
||||
publishedDate: tryParseDate(result.date as string | undefined),
|
||||
contentType: detectContentType(result.url, result.description),
|
||||
};
|
||||
}
|
||||
|
||||
/* ── Helpers ──────────────────────────────────────────────────────── */
|
||||
|
||||
async function firecrawlRequest(
|
||||
@@ -87,14 +240,14 @@ export async function isFirecrawlReachable(): Promise<boolean> {
|
||||
/* ── Search ───────────────────────────────────────────────────────── */
|
||||
|
||||
/**
|
||||
* Search the web and return structured results.
|
||||
* Search the web and return structured, enriched results.
|
||||
* Uses Firecrawl's search endpoint with scrape to get full page content.
|
||||
*/
|
||||
export async function searchWeb(
|
||||
query: string,
|
||||
limit: number = 5,
|
||||
signal?: AbortSignal,
|
||||
): Promise<SearchResult[]> {
|
||||
): Promise<EnrichedSearchResult[]> {
|
||||
const body: Record<string, unknown> = {
|
||||
query,
|
||||
limit: Math.min(limit, 10),
|
||||
@@ -116,14 +269,19 @@ export async function searchWeb(
|
||||
|
||||
if (!res.success || !res.data) return [];
|
||||
|
||||
return res.data
|
||||
const rawResults: (SearchResult & Record<string, unknown>)[] = res.data
|
||||
.map((doc) => ({
|
||||
title: (doc.title as string) ?? "",
|
||||
url: (doc.url as string) ?? "",
|
||||
description: (doc.description as string) ?? "",
|
||||
markdown: (doc.markdown as string) ?? "",
|
||||
// Preserve extra fields for date extraction
|
||||
...doc,
|
||||
}))
|
||||
.filter((r) => r.markdown || r.description);
|
||||
|
||||
// Enrich each result with source metadata
|
||||
return rawResults.map(enrichResult);
|
||||
}
|
||||
|
||||
/* ── Scrape ───────────────────────────────────────────────────────── */
|
||||
|
||||
278
src/queries.ts
278
src/queries.ts
@@ -4,13 +4,36 @@
|
||||
* Uses an LLM agent to generate search queries from different research
|
||||
* angles, then analyzes results to produce follow-up queries.
|
||||
*/
|
||||
import type { SearchQuery, Finding, ResearchRound } from "./types";
|
||||
import type {
|
||||
SearchQuery,
|
||||
Finding,
|
||||
ResearchRound,
|
||||
EnrichedSearchResult,
|
||||
} from "./types";
|
||||
import { runAnalysisAgent } from "./agent";
|
||||
|
||||
/* ── System Prompts ──────────────────────────────────────────────── */
|
||||
|
||||
const DECOMPOSE_SYSTEM = `You are a research methodology expert. Given a broad research question, your job is to break it down into 4-7 focused sub-questions that, when answered, collectively provide a complete answer to the original question.
|
||||
|
||||
Guidelines:
|
||||
- Each sub-question should tackle ONE specific facet of the research question
|
||||
- Cover different dimensions: what, how, why, who, comparison, evidence, implications
|
||||
- Sub-questions should be independently researchable via web search
|
||||
- Avoid overlap between sub-questions
|
||||
- Prioritize questions that will surface concrete evidence over speculative ones
|
||||
|
||||
Output ONLY a JSON array of sub-question strings.
|
||||
|
||||
Example:
|
||||
Input: "What are the benefits and risks of artificial intelligence in healthcare?"
|
||||
Output: ["What specific AI technologies are currently deployed in clinical healthcare settings?", "What peer-reviewed evidence exists for AI improving diagnostic accuracy?", "What are the documented risks and failure cases of AI in healthcare?", "How do regulatory frameworks (FDA, EMA) address AI-based medical devices?", "What do healthcare practitioners report as barriers to AI adoption?"]
|
||||
`;
|
||||
|
||||
const GENERATE_QUERIES_SYSTEM = `You are a research methodology expert. Your role is to generate effective web search queries that will yield high-quality, diverse information about a research topic.
|
||||
|
||||
Guidelines:
|
||||
- Create queries from DIFFERENT angles (technical, practical, comparative, critical, forward-looking)
|
||||
- Create queries from DIFFERENT angles (technical, practical, comparative, critical, forward-looking, authoritative)
|
||||
- Each query should target a specific facet of the question
|
||||
- Queries should use keywords that search engines rank well (avoid overly long questions)
|
||||
- Cover contrasting viewpoints and alternative approaches
|
||||
@@ -20,7 +43,7 @@ Guidelines:
|
||||
Output ONLY a JSON array of objects with fields:
|
||||
- "query": the search query string
|
||||
- "rationale": why this query will help answer the research question
|
||||
- "angle": one of "technical" | "practical" | "comparative" | "critical" | "forward-looking" | "authoritative"
|
||||
- "angle": one of "technical" | "practical" | "comparative" | "critical" | "forward-looking" | "authoritative" | "historical" | "case-study" | "data-statistics" | "ethical"
|
||||
|
||||
Example:
|
||||
[
|
||||
@@ -29,7 +52,7 @@ Example:
|
||||
]
|
||||
`;
|
||||
|
||||
const FOLLOWUP_SYSTEM = `You are a research analyst. Given the research question and findings so far, your job is to identify what's still unknown and generate follow-up search queries to fill those gaps.
|
||||
const FOLLOWUP_SYSTEM = `You are a research analyst. Given the research question, sub-questions, and findings so far, your job is to identify what's still unknown and generate follow-up search queries to fill those gaps.
|
||||
|
||||
Look for:
|
||||
- Claims made without sufficient evidence
|
||||
@@ -42,18 +65,105 @@ Look for:
|
||||
Output ONLY a JSON array of objects with fields:
|
||||
- "query": the search query string
|
||||
- "rationale": what gap this query fills or what angle it explores
|
||||
- "angle": one of "technical" | "practical" | "comparative" | "critical" | "forward-looking" | "authoritative"
|
||||
- "angle": one of "technical" | "practical" | "comparative" | "critical" | "forward-looking" | "authoritative" | "historical" | "case-study" | "data-statistics" | "ethical"
|
||||
`;
|
||||
|
||||
/* ── Sub-Question Decomposition ───────────────────────────────────── */
|
||||
|
||||
/**
|
||||
* Decompose a broad research question into focused, independently
|
||||
* researchable sub-questions. Returns the sub-questions or an empty
|
||||
* array if the LLM call fails.
|
||||
*/
|
||||
export async function decomposeQuestion(
|
||||
question: string,
|
||||
cwd: string,
|
||||
signal?: AbortSignal,
|
||||
): Promise<string[]> {
|
||||
const taskPrompt = `Break down this research question into 4-7 focused sub-questions:\n\n${question}`;
|
||||
|
||||
const result = await runAnalysisAgent(
|
||||
DECOMPOSE_SYSTEM,
|
||||
taskPrompt,
|
||||
cwd,
|
||||
60_000,
|
||||
undefined,
|
||||
signal,
|
||||
);
|
||||
|
||||
if (!result.success || !result.text) return [];
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(result.text);
|
||||
if (Array.isArray(parsed) && parsed.length > 0) {
|
||||
return parsed.map(String).filter((s: string) => s.length > 10);
|
||||
}
|
||||
} catch {
|
||||
// parse failed
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
/* ── Query Generation ────────────────────────────────────────────── */
|
||||
|
||||
/**
|
||||
* Generate initial search queries for a research question.
|
||||
* When sub-questions are available, generates queries per sub-question
|
||||
* for better depth and diversity.
|
||||
*/
|
||||
export async function generateQueries(
|
||||
question: string,
|
||||
count: number,
|
||||
cwd: string,
|
||||
signal?: AbortSignal,
|
||||
subQuestions?: string[],
|
||||
): Promise<SearchQuery[]> {
|
||||
// If we have sub-questions, generate queries distributed across them
|
||||
if (subQuestions && subQuestions.length > 0) {
|
||||
const queriesPerSub = Math.max(1, Math.ceil(count / subQuestions.length));
|
||||
const allQueries: SearchQuery[] = [];
|
||||
|
||||
for (const subQ of subQuestions) {
|
||||
if (allQueries.length >= count) break;
|
||||
|
||||
const taskPrompt = `Research question: ${question}\nSub-question: ${subQ}\n\nGenerate ${queriesPerSub} search query(ies) to answer this sub-question specifically.`;
|
||||
|
||||
const result = await runAnalysisAgent(
|
||||
GENERATE_QUERIES_SYSTEM,
|
||||
taskPrompt,
|
||||
cwd,
|
||||
60_000,
|
||||
undefined,
|
||||
signal,
|
||||
);
|
||||
|
||||
if (!result.success || !result.text) continue;
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(result.text);
|
||||
if (Array.isArray(parsed)) {
|
||||
const queries = parsed
|
||||
.slice(0, queriesPerSub)
|
||||
.map((q: Record<string, unknown>) => ({
|
||||
query: String(q.query ?? ""),
|
||||
rationale: String(q.rationale ?? ""),
|
||||
angle: String(q.angle ?? "technical"),
|
||||
}))
|
||||
.filter((q: { query: string }) => q.query.length > 0);
|
||||
allQueries.push(...queries);
|
||||
}
|
||||
} catch {
|
||||
// parse failed for this sub-question, continue
|
||||
}
|
||||
}
|
||||
|
||||
if (allQueries.length > 0) {
|
||||
return allQueries.slice(0, count);
|
||||
}
|
||||
}
|
||||
|
||||
// Fall through to standard query generation
|
||||
const taskPrompt = `Research question: ${question}
|
||||
|
||||
Generate ${count} diverse search queries to research this topic effectively. Cover different angles.`;
|
||||
@@ -81,7 +191,7 @@ Generate ${count} diverse search queries to research this topic effectively. Cov
|
||||
rationale: String(q.rationale ?? ""),
|
||||
angle: String(q.angle ?? "technical"),
|
||||
}))
|
||||
.filter((q) => q.query.length > 0);
|
||||
.filter((q: { query: string }) => q.query.length > 0);
|
||||
}
|
||||
} catch {
|
||||
// JSON parse failed, fall back
|
||||
@@ -90,6 +200,8 @@ Generate ${count} diverse search queries to research this topic effectively. Cov
|
||||
return generateFallbackQueries(question, count);
|
||||
}
|
||||
|
||||
/* ── Follow-up Query Generation ──────────────────────────────────── */
|
||||
|
||||
/**
|
||||
* Generate follow-up queries based on findings from previous rounds.
|
||||
*/
|
||||
@@ -103,7 +215,13 @@ export async function generateFollowUpQueries(
|
||||
// Build a summary of findings so far
|
||||
const allFindings = rounds.flatMap((r) => r.findings);
|
||||
const findingsSummary = allFindings
|
||||
.map((f) => `- ${f.title}: ${f.summary} (confidence: ${f.confidence})`)
|
||||
.map((f) => {
|
||||
const corr =
|
||||
f.corroborationScore !== undefined
|
||||
? ` [corroboration: ${(f.corroborationScore * 100).toFixed(0)}%]`
|
||||
: "";
|
||||
return `- ${f.title}: ${f.summary} (confidence: ${f.confidence}${corr})`;
|
||||
})
|
||||
.join("\n");
|
||||
|
||||
const exploredAngles = rounds
|
||||
@@ -111,6 +229,12 @@ export async function generateFollowUpQueries(
|
||||
.map((q) => `[${q.angle}] ${q.query} — ${q.rationale}`)
|
||||
.join("\n");
|
||||
|
||||
// Find low-corroboration or low-confidence topics
|
||||
const gaps = allFindings
|
||||
.filter((f) => f.confidence === "low" || (f.corroborationScore ?? 1) < 0.5)
|
||||
.map((f) => `Gap: ${f.title} — ${f.summary}`)
|
||||
.join("\n");
|
||||
|
||||
const taskPrompt = `Research question: ${question}
|
||||
|
||||
Queries already explored:
|
||||
@@ -119,6 +243,8 @@ ${exploredAngles}
|
||||
Findings so far:
|
||||
${findingsSummary}
|
||||
|
||||
${gaps ? `Remaining knowledge gaps:\n${gaps}` : ""}
|
||||
|
||||
Generate ${count} follow-up search queries to fill remaining gaps and deepen the research.`;
|
||||
|
||||
const result = await runAnalysisAgent(
|
||||
@@ -144,7 +270,7 @@ Generate ${count} follow-up search queries to fill remaining gaps and deepen the
|
||||
rationale: String(q.rationale ?? ""),
|
||||
angle: String(q.angle ?? "technical"),
|
||||
}))
|
||||
.filter((q) => q.query.length > 0);
|
||||
.filter((q: { query: string }) => q.query.length > 0);
|
||||
}
|
||||
} catch {
|
||||
// parse failed
|
||||
@@ -153,6 +279,8 @@ Generate ${count} follow-up search queries to fill remaining gaps and deepen the
|
||||
return [];
|
||||
}
|
||||
|
||||
/* ── Fallback Query Generation ────────────────────────────────────── */
|
||||
|
||||
/**
|
||||
* Fallback query generation when the LLM call fails.
|
||||
*/
|
||||
@@ -183,6 +311,8 @@ function generateFallbackQueries(
|
||||
return queries;
|
||||
}
|
||||
|
||||
/* ── Analysis ────────────────────────────────────────────────────── */
|
||||
|
||||
const ANALYZE_SYSTEM = `You are a research analyst. Given search results for a specific query, extract key findings.
|
||||
|
||||
For each finding:
|
||||
@@ -204,19 +334,15 @@ Output ONLY a JSON array of objects with fields:
|
||||
*/
|
||||
export async function analyzeResults(
|
||||
query: string,
|
||||
results: {
|
||||
title: string;
|
||||
url: string;
|
||||
description: string;
|
||||
markdown: string;
|
||||
}[],
|
||||
results: EnrichedSearchResult[],
|
||||
cwd: string,
|
||||
signal?: AbortSignal,
|
||||
): Promise<Finding[]> {
|
||||
// Include authority metadata in the prompt so the LLM can consider source quality
|
||||
const resultsText = results
|
||||
.map(
|
||||
(r, i) =>
|
||||
`--- Result ${i + 1} ---\nTitle: ${r.title}\nURL: ${r.url}\nDescription: ${r.description}\nContent:\n${r.markdown.slice(0, 3000)}`,
|
||||
`--- Result ${i + 1} ---\nTitle: ${r.title}\nURL: ${r.url}\nDomain: ${r.domain}\nAuthority Score: ${(r.authorityScore * 100).toFixed(0)}%\nContent Type: ${r.contentType}\nDescription: ${r.description}\nContent:\n${r.markdown.slice(0, 3000)}`,
|
||||
)
|
||||
.join("\n\n");
|
||||
|
||||
@@ -225,7 +351,7 @@ export async function analyzeResults(
|
||||
Search results:
|
||||
${resultsText}
|
||||
|
||||
Extract key findings from these results.`;
|
||||
Extract key findings from these results. Consider source authority when rating confidence.`;
|
||||
|
||||
const result = await runAnalysisAgent(
|
||||
ANALYZE_SYSTEM,
|
||||
@@ -251,7 +377,9 @@ Extract key findings from these results.`;
|
||||
? String(f.confidence)
|
||||
: "medium") as Finding["confidence"],
|
||||
}))
|
||||
.filter((f) => f.title && f.summary);
|
||||
.filter(
|
||||
(f: { title: string; summary: string }) => f.title && f.summary,
|
||||
);
|
||||
}
|
||||
} catch {
|
||||
// parse failed
|
||||
@@ -259,3 +387,119 @@ Extract key findings from these results.`;
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
/* ── Corroboration Tracking ──────────────────────────────────────── */
|
||||
|
||||
/**
|
||||
* Cross-reference all findings to compute corroboration scores.
|
||||
*
|
||||
* For each finding, we check:
|
||||
* 1. How many other findings reference the same or similar source URLs
|
||||
* 2. The authority scores of the supporting sources
|
||||
* 3. Whether independent domains support the same claim
|
||||
*
|
||||
* Returns the findings with added corroborationScore, bestSourceAuthority,
|
||||
* and avgSourceAuthority.
|
||||
*/
|
||||
export function computeCorroboration(findings: Finding[]): Finding[] {
|
||||
if (findings.length === 0) return [];
|
||||
|
||||
// Collect all unique source URLs and their authority scores
|
||||
// In a real implementation, we'd map URLs to EnrichedSearchResult authority scores
|
||||
// For now, extract domain-level patterns
|
||||
|
||||
// Build a map of domain -> authority scores from source URLs
|
||||
const domainAuthority = new Map<string, number>();
|
||||
for (const finding of findings) {
|
||||
for (const url of finding.sources) {
|
||||
try {
|
||||
const domain = extractDomainSimple(url);
|
||||
if (!domainAuthority.has(domain)) {
|
||||
domainAuthority.set(domain, heuristicDomainScore(domain));
|
||||
}
|
||||
} catch {
|
||||
// skip invalid URLs
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return findings.map((finding) => {
|
||||
if (finding.sources.length === 0) {
|
||||
return {
|
||||
...finding,
|
||||
corroborationScore: 0,
|
||||
bestSourceAuthority: 0,
|
||||
avgSourceAuthority: 0,
|
||||
};
|
||||
}
|
||||
|
||||
// Compute source authority stats
|
||||
const authorities: number[] = finding.sources.map((url) => {
|
||||
try {
|
||||
const domain = extractDomainSimple(url);
|
||||
return domainAuthority.get(domain) ?? 0.3;
|
||||
} catch {
|
||||
return 0.3;
|
||||
}
|
||||
});
|
||||
|
||||
const bestAuthority = Math.max(...authorities);
|
||||
const avgAuthority =
|
||||
authorities.reduce((a, b) => a + b, 0) / authorities.length;
|
||||
|
||||
// Compute corroboration: how many other findings share source URLs
|
||||
let corroboratingFindings = 0;
|
||||
const mySources = new Set(finding.sources);
|
||||
|
||||
for (const other of findings) {
|
||||
if (other === finding) continue;
|
||||
const overlap = other.sources.some((url) => mySources.has(url));
|
||||
if (overlap) corroboratingFindings++;
|
||||
}
|
||||
|
||||
// Normalize corroboration: 0-1 based on what fraction of other findings agree
|
||||
const maxCorroboration = findings.length - 1;
|
||||
const corroborationScore =
|
||||
maxCorroboration > 0
|
||||
? Math.min(1, corroboratingFindings / maxCorroboration)
|
||||
: 0;
|
||||
|
||||
return {
|
||||
...finding,
|
||||
corroborationScore: Math.round(corroborationScore * 100) / 100,
|
||||
bestSourceAuthority: Math.round(bestAuthority * 100) / 100,
|
||||
avgSourceAuthority: Math.round(avgAuthority * 100) / 100,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple domain extraction (avoids URL constructor for compatibility).
|
||||
*/
|
||||
function extractDomainSimple(url: string): string {
|
||||
const match = url.match(/https?:\/\/([^/]+)/);
|
||||
if (!match) return url;
|
||||
const hostname = match[1].toLowerCase();
|
||||
const parts = hostname.split(".");
|
||||
const multiPartTlds =
|
||||
/\.(co\.uk|org\.uk|ac\.uk|gov\.uk|com\.au|co\.jp|co\.kr|com\.br)$/;
|
||||
if (multiPartTlds.test(hostname) && parts.length >= 3) {
|
||||
return parts.slice(-3).join(".");
|
||||
}
|
||||
return parts.slice(-2).join(".");
|
||||
}
|
||||
|
||||
/**
|
||||
* Very basic domain score heuristic without the full domain list.
|
||||
*/
|
||||
function heuristicDomainScore(domain: string): number {
|
||||
if (/\.gov$|\.edu$/.test(domain)) return 0.85;
|
||||
if (/arxiv|scholar|pubmed|ieee|acm|springer|nature|science/.test(domain))
|
||||
return 0.9;
|
||||
if (/github|gitlab|bitbucket/.test(domain)) return 0.75;
|
||||
if (/wikipedia|stackoverflow|medium|dev\.to/.test(domain)) return 0.55;
|
||||
if (/docs\.|learn\.|developer\./.test(domain)) return 0.8;
|
||||
if (/reuters|apnews|bbc|nytimes|bloomberg/.test(domain)) return 0.75;
|
||||
if (/blog|forum|reddit/.test(domain)) return 0.3;
|
||||
return 0.4;
|
||||
}
|
||||
|
||||
459
src/report.ts
459
src/report.ts
@@ -2,31 +2,163 @@
|
||||
* Deep Research — Report synthesis
|
||||
*
|
||||
* Takes all research rounds and synthesizes a comprehensive report
|
||||
* using an LLM agent.
|
||||
* using an LLM agent. Produces:
|
||||
* - Numbered inline citations with a bibliography
|
||||
* - Layered report: TL;DR → Executive Summary → Key Findings
|
||||
* → Detailed Analysis → Limitations/Gaps → References
|
||||
* - Audience-aware tone adjustment
|
||||
*/
|
||||
import type { ResearchRound, ResearchConfig } from "./types";
|
||||
import type {
|
||||
ResearchRound,
|
||||
ResearchConfig,
|
||||
Reference,
|
||||
Finding,
|
||||
} from "./types";
|
||||
import { runAnalysisAgent } from "./agent";
|
||||
|
||||
const SYNTHESIS_SYSTEM = `You are a senior research analyst synthesizing findings from multiple web searches into a comprehensive, well-structured report.
|
||||
/** Return shape from synthesizeReport */
|
||||
export interface SynthesisResult {
|
||||
report: string;
|
||||
references: Reference[];
|
||||
}
|
||||
|
||||
Your report should:
|
||||
1. Start with an executive summary (2-3 paragraphs covering the key answer to the research question)
|
||||
2. Organize findings by theme, not by search query
|
||||
3. Include specific evidence from sources (cite URLs in [brackets])
|
||||
4. Note areas of disagreement or uncertainty
|
||||
5. Identify knowledge gaps that remain
|
||||
6. End with actionable conclusions
|
||||
/* ── System Prompts ──────────────────────────────────────────────── */
|
||||
|
||||
function buildSynthesisSystem(audience: string): string {
|
||||
const audienceGuidance: Record<string, string> = {
|
||||
expert:
|
||||
"Assume expert-level domain knowledge. Use precise technical terminology, reference specific methodologies and standards, and prioritize depth over hand-holding. The reader understands the field.",
|
||||
general:
|
||||
"Write for an informed general audience. Define technical terms on first use, explain context, and keep the tone accessible but not simplistic. Avoid jargon without explanation.",
|
||||
executive:
|
||||
"Write for a busy executive or decision-maker. Lead with actionable conclusions and recommendations. Be concise — use bold for key takeaways. Minimize technical detail; focus on implications, trade-offs, and decisions. Target 2-3 pages.",
|
||||
};
|
||||
|
||||
const guidance = audienceGuidance[audience] ?? audienceGuidance.general;
|
||||
|
||||
return `You are a senior research analyst synthesizing findings from multiple web searches into a comprehensive, well-structured report.
|
||||
|
||||
Audience: ${guidance}
|
||||
|
||||
Report structure (use ## headings):
|
||||
1. **TL;DR** — One paragraph (2-3 sentences) giving the single most important answer
|
||||
2. **Executive Summary** — 2-3 paragraphs covering what was found, how confident we are, and key implications
|
||||
3. **Key Findings** — Tiered by importance/confidence. Bullet points with inline citations
|
||||
4. **Detailed Analysis** — Organized by theme. Each section covers one aspect with evidence
|
||||
5. **Limitations & Knowledge Gaps** — What evidence is weak, missing, or contradictory
|
||||
6. **Conclusion** — Wrap up with actionable takeaways
|
||||
|
||||
Citation rules:
|
||||
- Use numbered references like [1], [2] etc. throughout the text
|
||||
- At the end, include a ## References section listing each citation
|
||||
- Format references as: [1] Title — Domain (URL)
|
||||
- Cite specific evidence, not vague associations
|
||||
- When multiple sources support a claim, cite all of them: [1][3][5]
|
||||
|
||||
Style guidelines:
|
||||
- Use clear section headings (## level)
|
||||
- Write in an objective, authoritative tone
|
||||
- Include bullet points for listing evidence
|
||||
- Use inline citations like [source](url)
|
||||
- Use bullet points for listing evidence
|
||||
- Note the confidence level for key claims
|
||||
- Be thorough but concise — every paragraph should add value`;
|
||||
- Be thorough but concise — every paragraph should add value
|
||||
- Use > for notable direct quotes with citations`;
|
||||
}
|
||||
|
||||
/* ── Evidence Builder ────────────────────────────────────────────── */
|
||||
|
||||
function buildEvidenceText(
|
||||
question: string,
|
||||
rounds: ResearchRound[],
|
||||
): { evidenceText: string; referenceMap: Map<string, Reference> } {
|
||||
const allFindings = rounds.flatMap((r) => r.findings);
|
||||
const totalSearches = rounds.reduce((sum, r) => sum + r.queries.length, 0);
|
||||
const totalPages = rounds.reduce((sum, r) => sum + r.results.length, 0);
|
||||
|
||||
// Build a bibliography map (url -> Reference)
|
||||
const seenUrls = new Map<string, Reference>();
|
||||
let refId = 0;
|
||||
|
||||
for (const round of rounds) {
|
||||
for (const result of round.results) {
|
||||
if (!seenUrls.has(result.url)) {
|
||||
refId++;
|
||||
seenUrls.set(result.url, {
|
||||
id: refId,
|
||||
url: result.url,
|
||||
title: result.title,
|
||||
domain: result.domain,
|
||||
authorityScore: result.authorityScore,
|
||||
accessedAt: new Date().toISOString().split("T")[0],
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Organize findings by thematic angle
|
||||
const evidenceByAngle = new Map<string, Finding[]>();
|
||||
for (const round of rounds) {
|
||||
for (const finding of round.findings) {
|
||||
const angle = round.queries[0]?.angle ?? "technical";
|
||||
if (!evidenceByAngle.has(angle)) evidenceByAngle.set(angle, []);
|
||||
evidenceByAngle.get(angle)!.push(finding);
|
||||
}
|
||||
}
|
||||
|
||||
let evidenceText = `## Research Question\n${question}\n\n`;
|
||||
evidenceText += `## Overview\n- Rounds of research: ${rounds.length}\n`;
|
||||
evidenceText += `- Total searches executed: ${totalSearches}\n`;
|
||||
evidenceText += `- Total pages analyzed: ${totalPages}\n`;
|
||||
evidenceText += `- Key findings extracted: ${allFindings.length}\n\n`;
|
||||
|
||||
// Build evidence grouped by angle with reference IDs
|
||||
for (const [angle, findings] of Array.from(evidenceByAngle)) {
|
||||
if (findings.length === 0) continue;
|
||||
evidenceText += `## Angle: ${angle}\n\n`;
|
||||
for (const finding of findings) {
|
||||
// Get reference IDs for this finding's sources
|
||||
const refs = finding.sources
|
||||
.map((url) => seenUrls.get(url))
|
||||
.filter((r): r is Reference => !!r)
|
||||
.map((r) => `[${r.id}]`);
|
||||
|
||||
const avgAuth =
|
||||
finding.avgSourceAuthority !== undefined
|
||||
? ` | Avg Authority: ${(finding.avgSourceAuthority * 100).toFixed(0)}%`
|
||||
: "";
|
||||
const corr =
|
||||
finding.corroborationScore !== undefined
|
||||
? ` | Corroboration: ${(finding.corroborationScore * 100).toFixed(0)}%`
|
||||
: "";
|
||||
const bestAuthStr =
|
||||
finding.bestSourceAuthority !== undefined
|
||||
? ` | Best Source: ${(finding.bestSourceAuthority * 100).toFixed(0)}%`
|
||||
: "";
|
||||
|
||||
evidenceText += `### ${finding.title}\n`;
|
||||
evidenceText += `**Confidence:** ${finding.confidence}${avgAuth}${corr}${bestAuthStr}\n`;
|
||||
if (refs.length > 0) {
|
||||
evidenceText += `**Sources:** ${refs.join(", ")}\n`;
|
||||
}
|
||||
evidenceText += `${finding.summary}\n\n`;
|
||||
if (finding.keyQuotes.length > 0) {
|
||||
evidenceText += `> ${finding.keyQuotes[0]}\n\n`;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Include reference metadata for the LLM to build proper citations
|
||||
evidenceText += `## Reference Metadata\n\n`;
|
||||
for (const [, ref] of seenUrls) {
|
||||
evidenceText += `[${ref.id}] ${ref.title} (${ref.domain}, authority: ${(ref.authorityScore * 100).toFixed(0)}%) — ${ref.url}\n`;
|
||||
}
|
||||
|
||||
return { evidenceText, referenceMap: seenUrls };
|
||||
}
|
||||
|
||||
/* ── Main Synthesis ──────────────────────────────────────────────── */
|
||||
|
||||
/**
|
||||
* Synthesize a research report from all rounds.
|
||||
* Returns both the formatted report and the full bibliography.
|
||||
*/
|
||||
export async function synthesizeReport(
|
||||
question: string,
|
||||
@@ -34,58 +166,14 @@ export async function synthesizeReport(
|
||||
config: ResearchConfig,
|
||||
cwd: string,
|
||||
signal?: AbortSignal,
|
||||
): Promise<string> {
|
||||
// Build the evidence summary
|
||||
const allFindings = rounds.flatMap((r) => r.findings);
|
||||
const totalSearches = rounds.reduce((sum, r) => sum + r.queries.length, 0);
|
||||
const totalPages = rounds.reduce((sum, r) => sum + r.results.length, 0);
|
||||
): Promise<SynthesisResult> {
|
||||
const audience = config.audience ?? "general";
|
||||
const { evidenceText, referenceMap } = buildEvidenceText(question, rounds);
|
||||
|
||||
const evidenceByAngle = new Map<string, ResearchRound["findings"]>();
|
||||
for (const round of rounds) {
|
||||
for (const query of round.queries) {
|
||||
const key = query.angle;
|
||||
if (!evidenceByAngle.has(key)) evidenceByAngle.set(key, []);
|
||||
}
|
||||
for (const finding of round.findings) {
|
||||
// Try to determine angle from the round's queries
|
||||
const angle = round.queries[0]?.angle ?? "technical";
|
||||
if (!evidenceByAngle.has(angle)) evidenceByAngle.set(angle, []);
|
||||
evidenceByAngle.get(angle)!.push(finding);
|
||||
}
|
||||
}
|
||||
|
||||
// Build structured evidence text
|
||||
let evidenceText = `## Research Question\n${question}\n\n`;
|
||||
evidenceText += `## Overview\n- Rounds of research: ${rounds.length}\n`;
|
||||
evidenceText += `- Total searches executed: ${totalSearches}\n`;
|
||||
evidenceText += `- Total pages analyzed: ${totalPages}\n`;
|
||||
evidenceText += `- Key findings extracted: ${allFindings.length}\n\n`;
|
||||
|
||||
for (const [angle, findings] of Array.from(evidenceByAngle)) {
|
||||
if (findings.length === 0) continue;
|
||||
evidenceText += `## Angle: ${angle}\n\n`;
|
||||
for (const finding of findings) {
|
||||
evidenceText += `### ${finding.title}\n`;
|
||||
evidenceText += `**Confidence:** ${finding.confidence}\n`;
|
||||
evidenceText += `${finding.summary}\n\n`;
|
||||
if (finding.keyQuotes.length > 0) {
|
||||
evidenceText += `> ${finding.keyQuotes[0]}\n\n`;
|
||||
}
|
||||
if (finding.sources.length > 0) {
|
||||
evidenceText += `Sources: ${finding.sources.map((s: string) => `[${s}](${s})`).join(", ")}\n\n`;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Also include raw search context for depth
|
||||
evidenceText += `## Raw Search Context\n\n`;
|
||||
for (const round of rounds) {
|
||||
evidenceText += `### Round ${round.round}\n`;
|
||||
for (const q of round.queries) {
|
||||
evidenceText += `- **"${q.query}"** (${q.angle}) — ${q.rationale}\n`;
|
||||
}
|
||||
evidenceText += `\n`;
|
||||
}
|
||||
const formatInstruction =
|
||||
config.format === "structured"
|
||||
? "Structured report with numbered sections, clear hierarchies, and data tables where appropriate."
|
||||
: "Well-formatted markdown report with ## headings, bullet points, and inline numbered citations like [1].";
|
||||
|
||||
const taskPrompt = `Synthesize the following research findings into a comprehensive, well-structured report.
|
||||
|
||||
@@ -93,10 +181,13 @@ ${evidenceText}
|
||||
|
||||
Write a thorough report that answers the original question: "${question}"
|
||||
|
||||
Format: ${config.format === "structured" ? "Structured report with numbered sections, clear hierarchies, and data tables where appropriate." : "Well-formatted markdown report with ## headings, bullet points, and inline citations."}`;
|
||||
Format: ${formatInstruction}
|
||||
Audience: ${audience}
|
||||
|
||||
Remember to use numbered citations like [1], [2] and include a ## References section at the end.`;
|
||||
|
||||
const result = await runAnalysisAgent(
|
||||
SYNTHESIS_SYSTEM,
|
||||
buildSynthesisSystem(audience),
|
||||
taskPrompt,
|
||||
cwd,
|
||||
120_000,
|
||||
@@ -105,64 +196,250 @@ Format: ${config.format === "structured" ? "Structured report with numbered sect
|
||||
);
|
||||
|
||||
if (result.success && result.text) {
|
||||
return result.text;
|
||||
// Build bibliography section
|
||||
const bibSection = buildBibliography(referenceMap);
|
||||
|
||||
// Append references if not already present
|
||||
let report = result.text;
|
||||
if (!report.includes("## References") && !report.includes("# References")) {
|
||||
report += `\n\n${bibSection}`;
|
||||
}
|
||||
|
||||
return { report, references: Array.from(referenceMap.values()) };
|
||||
}
|
||||
|
||||
// Fallback: generate a simple report from the evidence
|
||||
return generateFallbackReport(question, rounds);
|
||||
// Fallback: generate a simple structured report
|
||||
const fallbackReport = generateFallbackReport(
|
||||
question,
|
||||
rounds,
|
||||
referenceMap,
|
||||
audience,
|
||||
);
|
||||
return {
|
||||
report: fallbackReport + `\n\n${buildBibliography(referenceMap)}`,
|
||||
references: Array.from(referenceMap.values()),
|
||||
};
|
||||
}
|
||||
|
||||
/* ── Bibliography Builder ────────────────────────────────────────── */
|
||||
|
||||
/**
|
||||
* Build a structured ## References section from the reference map.
|
||||
*/
|
||||
function buildBibliography(referenceMap: Map<string, Reference>): string {
|
||||
if (referenceMap.size === 0) return "## References\n\nNo sources cited.";
|
||||
|
||||
const refs = Array.from(referenceMap.values()).sort((a, b) => a.id - b.id);
|
||||
const lines: string[] = ["## References\n"];
|
||||
for (const ref of refs) {
|
||||
const authIcon =
|
||||
ref.authorityScore >= 0.8 ? "⭐" : ref.authorityScore >= 0.5 ? "✓" : "○";
|
||||
lines.push(
|
||||
`[${ref.id}] ${authIcon} **${ref.title}** — ${ref.domain} (${ref.url}) — accessed ${ref.accessedAt}`,
|
||||
);
|
||||
}
|
||||
|
||||
return lines.join("\n");
|
||||
}
|
||||
|
||||
/* ── Fallback Report ─────────────────────────────────────────────── */
|
||||
|
||||
/**
|
||||
* Fallback report when the LLM synthesis fails.
|
||||
* Produces a clean, structured report from the evidence.
|
||||
*/
|
||||
function generateFallbackReport(
|
||||
question: string,
|
||||
rounds: ResearchRound[],
|
||||
referenceMap: Map<string, Reference>,
|
||||
_audience: string,
|
||||
): string {
|
||||
const lines: string[] = [];
|
||||
const allFindings = rounds.flatMap((r) => r.findings);
|
||||
|
||||
// ── TL;DR ──
|
||||
lines.push(`# Research Report: ${question}`);
|
||||
lines.push("");
|
||||
|
||||
const highConfFindings = allFindings.filter((f) => f.confidence === "high");
|
||||
const totalHigh = highConfFindings.length;
|
||||
const total = allFindings.length;
|
||||
|
||||
lines.push("## TL;DR");
|
||||
lines.push("");
|
||||
if (highConfFindings.length > 0) {
|
||||
lines.push(
|
||||
`Based on analysis of ${total} findings across ${rounds.length} research round(s), ` +
|
||||
`${totalHigh} high-confidence conclusions were identified. ` +
|
||||
`${highConfFindings[0].title}: ${highConfFindings[0].summary}`,
|
||||
);
|
||||
} else {
|
||||
lines.push(
|
||||
`This report covers findings from ${rounds.length} research round(s) exploring "${question}". ` +
|
||||
`${total} findings were extracted, with varying levels of confidence.`,
|
||||
);
|
||||
}
|
||||
lines.push("");
|
||||
|
||||
// ── Executive Summary ──
|
||||
lines.push("## Executive Summary");
|
||||
lines.push("");
|
||||
lines.push(
|
||||
`This report summarizes findings from ${rounds.length} research round(s) exploring the question above.`,
|
||||
`This report synthesizes findings from ${rounds.length} research round(s), ` +
|
||||
`${rounds.reduce((s, r) => s + r.queries.length, 0)} search queries, ` +
|
||||
`and ${rounds.reduce((s, r) => s + r.results.length, 0)} sources.`,
|
||||
);
|
||||
lines.push("");
|
||||
|
||||
const allFindings = rounds.flatMap((r) => r.findings);
|
||||
|
||||
// ── Key Findings (tiered) ──
|
||||
if (allFindings.length > 0) {
|
||||
lines.push("## Key Findings");
|
||||
lines.push("");
|
||||
for (const finding of allFindings) {
|
||||
lines.push(`### ${finding.title}`);
|
||||
lines.push(`*Confidence: ${finding.confidence}*`);
|
||||
lines.push("");
|
||||
lines.push(finding.summary);
|
||||
lines.push("");
|
||||
if (finding.keyQuotes.length > 0) {
|
||||
lines.push(`> ${finding.keyQuotes[0]}`);
|
||||
lines.push("");
|
||||
|
||||
// High confidence first
|
||||
const highConf = allFindings.filter((f) => f.confidence === "high");
|
||||
if (highConf.length > 0) {
|
||||
lines.push("### High Confidence");
|
||||
for (const finding of highConf) {
|
||||
const refs = finding.sources
|
||||
.map((url) => referenceMap.get(url))
|
||||
.filter((r): r is Reference => !!r)
|
||||
.map((r) => `[${r.id}]`);
|
||||
lines.push(
|
||||
`- **${finding.title}** ${refs.length > 0 ? refs.join("") : ""}`,
|
||||
);
|
||||
lines.push(` - ${finding.summary}`);
|
||||
}
|
||||
if (finding.sources.length > 0) {
|
||||
lines.push("Sources:");
|
||||
for (const src of finding.sources) {
|
||||
lines.push(`- [${src}](${src})`);
|
||||
}
|
||||
lines.push("");
|
||||
lines.push("");
|
||||
}
|
||||
|
||||
// Medium confidence
|
||||
const medConf = allFindings.filter((f) => f.confidence === "medium");
|
||||
if (medConf.length > 0) {
|
||||
lines.push("### Moderate Confidence");
|
||||
for (const finding of medConf) {
|
||||
const refs = finding.sources
|
||||
.map((url) => referenceMap.get(url))
|
||||
.filter((r): r is Reference => !!r)
|
||||
.map((r) => `[${r.id}]`);
|
||||
lines.push(
|
||||
`- **${finding.title}** ${refs.length > 0 ? refs.join("") : ""}`,
|
||||
);
|
||||
lines.push(` - ${finding.summary}`);
|
||||
}
|
||||
lines.push("");
|
||||
}
|
||||
|
||||
// Low confidence
|
||||
const lowConf = allFindings.filter((f) => f.confidence === "low");
|
||||
if (lowConf.length > 0) {
|
||||
lines.push("### Lower Confidence (Needs Further Research)");
|
||||
for (const finding of lowConf) {
|
||||
const refs = finding.sources
|
||||
.map((url) => referenceMap.get(url))
|
||||
.filter((r): r is Reference => !!r)
|
||||
.map((r) => `[${r.id}]`);
|
||||
lines.push(
|
||||
`- **${finding.title}** ${refs.length > 0 ? refs.join("") : ""}`,
|
||||
);
|
||||
lines.push(` - ${finding.summary}`);
|
||||
}
|
||||
lines.push("");
|
||||
}
|
||||
|
||||
// ── Detailed Analysis ──
|
||||
lines.push("## Detailed Analysis");
|
||||
lines.push("");
|
||||
|
||||
const byAngle = new Map<string, Finding[]>();
|
||||
for (const round of rounds) {
|
||||
for (const f of round.findings) {
|
||||
const angle = round.queries[0]?.angle ?? "general";
|
||||
if (!byAngle.has(angle)) byAngle.set(angle, []);
|
||||
byAngle.get(angle)!.push(f);
|
||||
}
|
||||
}
|
||||
|
||||
for (const [angle, findings] of byAngle) {
|
||||
lines.push(`### ${angle.charAt(0).toUpperCase() + angle.slice(1)}`);
|
||||
lines.push("");
|
||||
for (const f of findings) {
|
||||
const corrStr =
|
||||
f.corroborationScore !== undefined
|
||||
? ` (corroboration: ${(f.corroborationScore * 100).toFixed(0)}%)`
|
||||
: "";
|
||||
lines.push(`**${f.title}** — *${f.confidence} confidence${corrStr}*`);
|
||||
lines.push("");
|
||||
lines.push(f.summary);
|
||||
lines.push("");
|
||||
if (f.keyQuotes.length > 0) {
|
||||
lines.push(`> ${f.keyQuotes[0]}`);
|
||||
lines.push("");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Limitations ──
|
||||
const lowConfCount = allFindings.filter(
|
||||
(f) => f.confidence === "low",
|
||||
).length;
|
||||
const noCorr = allFindings.filter(
|
||||
(f) => (f.corroborationScore ?? 0) < 0.3,
|
||||
).length;
|
||||
|
||||
lines.push("## Limitations & Knowledge Gaps");
|
||||
lines.push("");
|
||||
if (lowConfCount > 0) {
|
||||
lines.push(
|
||||
`- **${lowConfCount} of ${allFindings.length} findings** have low confidence, indicating limited or conflicting evidence.`,
|
||||
);
|
||||
}
|
||||
if (noCorr > 0) {
|
||||
lines.push(
|
||||
`- **${noCorr} findings** lack corroboration from multiple independent sources.`,
|
||||
);
|
||||
}
|
||||
lines.push(
|
||||
"- This research relied on web search results; some relevant sources may not be indexed or accessible.",
|
||||
);
|
||||
lines.push(
|
||||
"- Findings are dependent on search engine ranking and the quality of indexed content.",
|
||||
);
|
||||
lines.push("");
|
||||
|
||||
// ── Conclusion ──
|
||||
lines.push("## Conclusion");
|
||||
lines.push("");
|
||||
if (highConf.length > 0) {
|
||||
lines.push(
|
||||
`The research identified ${highConf.length} high-confidence finding(s) and ${medConf.length} moderately-supported finding(s). ` +
|
||||
`The strongest evidence relates to: ${highConf.map((f) => f.title).join(", ")}.`,
|
||||
);
|
||||
} else {
|
||||
lines.push(
|
||||
"The research surfaced relevant information but with limited high-confidence evidence. Further investigation is recommended for the identified knowledge gaps.",
|
||||
);
|
||||
}
|
||||
lines.push("");
|
||||
}
|
||||
|
||||
lines.push("## Search Methodology");
|
||||
// ── Methodology ──
|
||||
lines.push(`*Report prepared for: ${_audience} audience*`);
|
||||
lines.push("");
|
||||
|
||||
lines.push("## Methodology");
|
||||
lines.push("");
|
||||
for (const round of rounds) {
|
||||
const failedSearches = round.queries.length - round.successfulSearches;
|
||||
lines.push(`### Round ${round.round}`);
|
||||
lines.push(
|
||||
`Queries: ${round.queries.map((q) => `"${q.query}"`).join(", ")}`,
|
||||
`Queries: ${round.queries.map((q) => `"${q.query}" [${q.angle}]`).join(", ")}`,
|
||||
);
|
||||
lines.push(`Pages scraped: ${round.results.length}`);
|
||||
lines.push(`Findings: ${round.findings.length}`);
|
||||
lines.push(`Findings extracted: ${round.findings.length}`);
|
||||
if (failedSearches > 0) {
|
||||
lines.push(`Searches failed: ${failedSearches}`);
|
||||
}
|
||||
lines.push("");
|
||||
}
|
||||
|
||||
|
||||
337
src/research.ts
337
src/research.ts
@@ -2,33 +2,40 @@
|
||||
* Deep Research — Core research orchestration
|
||||
*
|
||||
* Manages the multi-round deep research process:
|
||||
* 1. Generate initial search queries
|
||||
* 2. Execute all queries in parallel via Firecrawl
|
||||
* 3. Analyze results and extract findings
|
||||
* 4. Generate follow-up queries
|
||||
* 5. Iterate for depth rounds
|
||||
* 6. Synthesize final report
|
||||
* 1. Decompose the question into sub-questions (when depth > 1)
|
||||
* 2. Generate initial search queries (per sub-question for better diversity)
|
||||
* 3. Execute all queries in parallel via Firecrawl
|
||||
* 4. Analyze results and extract findings
|
||||
* 5. Compute corroboration scores
|
||||
* 6. Generate follow-up queries for gaps
|
||||
* 7. Iterate for depth rounds
|
||||
* 8. Synthesize final report with numbered references
|
||||
*
|
||||
* Widget and progress callback patterns borrowed from ralpi's executor.
|
||||
*/
|
||||
import type { ExtensionContext } from "@earendil-works/pi-coding-agent";
|
||||
import type {
|
||||
Finding,
|
||||
ResearchConfig,
|
||||
SearchResult,
|
||||
EnrichedSearchResult,
|
||||
ResearchRound,
|
||||
ResearchReport,
|
||||
} from "./types";
|
||||
import type { SynthesisResult } from "./report";
|
||||
import { searchWeb } from "./firecrawl";
|
||||
import {
|
||||
generateQueries,
|
||||
generateFollowUpQueries,
|
||||
analyzeResults,
|
||||
computeCorroboration,
|
||||
decomposeQuestion,
|
||||
} from "./queries";
|
||||
import { synthesizeReport } from "./report";
|
||||
|
||||
/** Progress callback for UI updates */
|
||||
export type ResearchProgress = (update: {
|
||||
phase:
|
||||
| "decomposing"
|
||||
| "generating_queries"
|
||||
| "searching"
|
||||
| "analyzing"
|
||||
@@ -41,6 +48,86 @@ export type ResearchProgress = (update: {
|
||||
fraction?: number; // 0-1
|
||||
}) => void;
|
||||
|
||||
// ── Round-Robin Parallel Execution ──────────────────────────────────
|
||||
|
||||
/**
|
||||
* Maximum concurrent Firecrawl search requests.
|
||||
* Prevents rate limiting while still parallelizing queries.
|
||||
*/
|
||||
const MAX_SEARCH_CONCURRENT = 3;
|
||||
|
||||
/**
|
||||
* Maximum concurrent analysis agent sessions.
|
||||
*/
|
||||
const MAX_ANALYSIS_CONCURRENT = 2;
|
||||
|
||||
/**
|
||||
* Minimum findings per round before we consider early stopping.
|
||||
* If we're getting very few new findings, saturation is near.
|
||||
*/
|
||||
const SATURATION_THRESHOLD = 0.15; // < 15% new findings = likely saturated
|
||||
|
||||
/**
|
||||
* Bounded-concurrency parallel execution with round-robin slot assignment.
|
||||
*
|
||||
* Similar to ralpi's ModelRoundRobin: with N concurrent slots, items are
|
||||
* assigned to free slots in FIFO order. When a slot finishes, the next
|
||||
* item in the queue is assigned to it.
|
||||
*
|
||||
* This ensures even load distribution and avoids bursty concurrency.
|
||||
*/
|
||||
async function boundedConcurrency<T, R>(
|
||||
items: T[],
|
||||
maxConcurrent: number,
|
||||
mapper: (item: T, index: number) => Promise<R>,
|
||||
): Promise<R[]> {
|
||||
const results: R[] = new Array(items.length);
|
||||
let nextIndex = 0;
|
||||
|
||||
async function worker(): Promise<void> {
|
||||
while (true) {
|
||||
const currentIndex = nextIndex++;
|
||||
if (currentIndex >= items.length) return;
|
||||
results[currentIndex] = await mapper(items[currentIndex], currentIndex);
|
||||
}
|
||||
}
|
||||
|
||||
const numWorkers = Math.min(maxConcurrent, items.length);
|
||||
const workers = Array.from({ length: numWorkers }, () => worker());
|
||||
await Promise.all(workers);
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Assess whether the research is reaching information saturation.
|
||||
*/
|
||||
function assessSaturation(
|
||||
previousRound: ResearchRound | undefined,
|
||||
currentRound: ResearchRound,
|
||||
): number {
|
||||
if (!previousRound || previousRound.findings.length === 0) return 0;
|
||||
|
||||
const prevUrls = new Set(previousRound.results.map((r) => r.url));
|
||||
const newUrls = currentRound.results.filter(
|
||||
(r) => !prevUrls.has(r.url),
|
||||
).length;
|
||||
const totalUrls = currentRound.results.length;
|
||||
const newRatio = totalUrls > 0 ? newUrls / totalUrls : 0;
|
||||
|
||||
// Also check finding novelty
|
||||
const prevFindingTitles = new Set(
|
||||
previousRound.findings.map((f) => f.title.toLowerCase()),
|
||||
);
|
||||
const newFindings = currentRound.findings.filter(
|
||||
(f) => !prevFindingTitles.has(f.title.toLowerCase()),
|
||||
).length;
|
||||
const totalFindings = currentRound.findings.length;
|
||||
const findingNovelty = totalFindings > 0 ? newFindings / totalFindings : 0;
|
||||
|
||||
// Weight: URL novelty (40%) + finding novelty (60%)
|
||||
return newRatio * 0.4 + findingNovelty * 0.6;
|
||||
}
|
||||
|
||||
/**
|
||||
* Run a complete deep research session.
|
||||
*/
|
||||
@@ -54,15 +141,35 @@ export async function runDeepResearch(
|
||||
const rounds: ResearchRound[] = [];
|
||||
let totalSearches = 0;
|
||||
let totalPages = 0;
|
||||
let subQuestions: string[] = [];
|
||||
|
||||
// ── Round 1: Generate initial queries ──────────────────────────────
|
||||
// ── Phase: Decompose question into sub-questions ────────────────
|
||||
|
||||
if (config.depth > 1) {
|
||||
onProgress({
|
||||
phase: "decomposing",
|
||||
round: 1,
|
||||
totalRounds: config.depth,
|
||||
message: "Decomposing research question into sub-topics...",
|
||||
fraction: 0,
|
||||
});
|
||||
|
||||
if (signal?.aborted) throw new Error("Research cancelled");
|
||||
|
||||
subQuestions = await decomposeQuestion(config.question, ctx.cwd, signal);
|
||||
}
|
||||
|
||||
// ── Phase: Generate initial queries ─────────────────────────────
|
||||
|
||||
onProgress({
|
||||
phase: "generating_queries",
|
||||
round: 1,
|
||||
totalRounds: config.depth,
|
||||
message: "Generating initial search queries...",
|
||||
fraction: 0,
|
||||
message:
|
||||
subQuestions.length > 0
|
||||
? `Generating queries across ${subQuestions.length} sub-topics...`
|
||||
: "Generating initial search queries...",
|
||||
fraction: 0.05,
|
||||
});
|
||||
|
||||
if (signal?.aborted) throw new Error("Research cancelled");
|
||||
@@ -72,13 +179,14 @@ export async function runDeepResearch(
|
||||
config.breadth,
|
||||
ctx.cwd,
|
||||
signal,
|
||||
subQuestions.length > 0 ? subQuestions : undefined,
|
||||
);
|
||||
|
||||
if (queries.length === 0) {
|
||||
throw new Error("Failed to generate any search queries");
|
||||
}
|
||||
|
||||
// ── Execute rounds ─────────────────────────────────────────────────
|
||||
// ── Execute rounds ───────────────────────────────────────────────
|
||||
|
||||
for (let round = 1; round <= config.depth; round++) {
|
||||
if (signal?.aborted) throw new Error("Research cancelled");
|
||||
@@ -99,125 +207,177 @@ export async function runDeepResearch(
|
||||
break;
|
||||
}
|
||||
|
||||
// ── Search phase ──────────────────────────────────────────────────
|
||||
// ── Search phase (parallel with round-robin) ────────────────────
|
||||
|
||||
onProgress({
|
||||
phase: "searching",
|
||||
round,
|
||||
totalRounds: config.depth,
|
||||
message: `Searching with ${currentQueries.length} queries...`,
|
||||
message: `Searching ${currentQueries.length} queries in parallel...`,
|
||||
fraction: 0.25,
|
||||
});
|
||||
|
||||
const searchResults: SearchResult[] = [];
|
||||
if (signal?.aborted) throw new Error("Research cancelled");
|
||||
|
||||
for (let i = 0; i < currentQueries.length; i++) {
|
||||
if (signal?.aborted) throw new Error("Research cancelled");
|
||||
// Run searches in parallel using round-robin bounded concurrency.
|
||||
// Each mapper call runs independently; failures are caught per-query.
|
||||
const searchResultsArrays: (EnrichedSearchResult[] | null)[] =
|
||||
await boundedConcurrency(
|
||||
currentQueries,
|
||||
MAX_SEARCH_CONCURRENT,
|
||||
async (q, i) => {
|
||||
onProgress({
|
||||
phase: "searching",
|
||||
round,
|
||||
totalRounds: config.depth,
|
||||
message: `Searching: "${q.query.slice(0, 60)}..."`,
|
||||
detail: q.rationale,
|
||||
fraction: 0.25 + (i / currentQueries.length) * 0.25,
|
||||
});
|
||||
|
||||
const q = currentQueries[i];
|
||||
onProgress({
|
||||
phase: "searching",
|
||||
round,
|
||||
totalRounds: config.depth,
|
||||
message: `Searching: "${q.query.slice(0, 60)}..."`,
|
||||
detail: q.rationale,
|
||||
fraction: 0.25 + (i / currentQueries.length) * 0.25,
|
||||
});
|
||||
try {
|
||||
return await searchWeb(q.query, 5, signal);
|
||||
} catch (error) {
|
||||
const errorMsg =
|
||||
error instanceof Error ? error.message : String(error);
|
||||
onProgress({
|
||||
phase: "searching",
|
||||
round,
|
||||
totalRounds: config.depth,
|
||||
message: `Search failed: ${errorMsg.slice(0, 80)}`,
|
||||
fraction: 0.25 + ((i + 1) / currentQueries.length) * 0.25,
|
||||
});
|
||||
return null;
|
||||
}
|
||||
},
|
||||
);
|
||||
|
||||
try {
|
||||
const results = await searchWeb(q.query, 5, signal);
|
||||
searchResults.push(...results);
|
||||
} catch (error) {
|
||||
// Individual search failure shouldn't crash the whole round
|
||||
const errorMsg = error instanceof Error ? error.message : String(error);
|
||||
onProgress({
|
||||
phase: "searching",
|
||||
round,
|
||||
totalRounds: config.depth,
|
||||
message: `Search failed: ${errorMsg.slice(0, 80)}`,
|
||||
fraction: 0.25 + ((i + 1) / currentQueries.length) * 0.25,
|
||||
});
|
||||
}
|
||||
|
||||
// Small delay between searches to avoid rate limits
|
||||
if (i < currentQueries.length - 1) {
|
||||
await new Promise((r) => setTimeout(r, 300));
|
||||
}
|
||||
}
|
||||
// Flatten results, filtering out nulls (failed searches)
|
||||
const searchResults: EnrichedSearchResult[] = searchResultsArrays
|
||||
.filter((r): r is EnrichedSearchResult[] => r !== null)
|
||||
.flat();
|
||||
|
||||
totalSearches += currentQueries.length;
|
||||
|
||||
// Deduplicate results by URL
|
||||
const seen = new Set<string>();
|
||||
const uniqueResults = searchResults.filter((r) => {
|
||||
if (seen.has(r.url)) return false;
|
||||
seen.add(r.url);
|
||||
return true;
|
||||
});
|
||||
// Deduplicate results by URL (prefer higher authority)
|
||||
const seen = new Map<string, EnrichedSearchResult>();
|
||||
for (const r of searchResults) {
|
||||
const existing = seen.get(r.url);
|
||||
if (!existing || r.authorityScore > existing.authorityScore) {
|
||||
seen.set(r.url, r);
|
||||
}
|
||||
}
|
||||
const uniqueResults = Array.from(seen.values());
|
||||
|
||||
totalPages += uniqueResults.length;
|
||||
|
||||
// ── Analyze phase ──────────────────────────────────────────────────
|
||||
// ── Analyze phase (parallel with round-robin) ──────────────────
|
||||
|
||||
onProgress({
|
||||
phase: "analyzing",
|
||||
round,
|
||||
totalRounds: config.depth,
|
||||
message: `Analyzing ${uniqueResults.length} search results...`,
|
||||
message: `Analyzing ${uniqueResults.length} search results in parallel...`,
|
||||
fraction: 0.6,
|
||||
});
|
||||
|
||||
// Analyze results per query group
|
||||
const allFindings: ResearchRound["findings"] = [];
|
||||
if (signal?.aborted) throw new Error("Research cancelled");
|
||||
|
||||
// Build query-result pairs for parallel analysis
|
||||
const analysisTasks: Array<{
|
||||
query: (typeof currentQueries)[number];
|
||||
results: typeof uniqueResults;
|
||||
index: number;
|
||||
}> = [];
|
||||
|
||||
const resultsPerQuery = Math.ceil(
|
||||
uniqueResults.length / currentQueries.length,
|
||||
);
|
||||
|
||||
for (let i = 0; i < currentQueries.length; i++) {
|
||||
if (signal?.aborted) throw new Error("Research cancelled");
|
||||
|
||||
const q = currentQueries[i];
|
||||
// Find results that match this query (loosely: take a portion of results)
|
||||
const resultsPerQuery = Math.ceil(
|
||||
uniqueResults.length / currentQueries.length,
|
||||
);
|
||||
const startIdx = i * resultsPerQuery;
|
||||
const endIdx = Math.min(startIdx + resultsPerQuery, uniqueResults.length);
|
||||
const queryResults = uniqueResults.slice(startIdx, endIdx);
|
||||
|
||||
if (queryResults.length === 0) continue;
|
||||
|
||||
onProgress({
|
||||
phase: "analyzing",
|
||||
round,
|
||||
totalRounds: config.depth,
|
||||
message: `Analyzing results for "${q.query.slice(0, 40)}..."`,
|
||||
fraction: 0.6 + (i / currentQueries.length) * 0.2,
|
||||
analysisTasks.push({
|
||||
query: currentQueries[i],
|
||||
results: queryResults,
|
||||
index: i,
|
||||
});
|
||||
|
||||
try {
|
||||
const findings = await analyzeResults(
|
||||
q.query,
|
||||
queryResults,
|
||||
ctx.cwd,
|
||||
signal,
|
||||
);
|
||||
allFindings.push(...findings);
|
||||
} catch {
|
||||
// Analysis failure shouldn't crash the round
|
||||
}
|
||||
}
|
||||
|
||||
// Run analyses in parallel using round-robin bounded concurrency
|
||||
const findingsArrays: Finding[][] = await boundedConcurrency(
|
||||
analysisTasks,
|
||||
MAX_ANALYSIS_CONCURRENT,
|
||||
async (task) => {
|
||||
onProgress({
|
||||
phase: "analyzing",
|
||||
round,
|
||||
totalRounds: config.depth,
|
||||
message: `Analyzing: "${task.query.query.slice(0, 40)}..."`,
|
||||
fraction: 0.6 + (task.index / currentQueries.length) * 0.2,
|
||||
});
|
||||
|
||||
try {
|
||||
return await analyzeResults(
|
||||
task.query.query,
|
||||
task.results,
|
||||
ctx.cwd,
|
||||
signal,
|
||||
);
|
||||
} catch {
|
||||
// Analysis failure shouldn't crash the round
|
||||
return [];
|
||||
}
|
||||
},
|
||||
);
|
||||
|
||||
// Flatten all findings
|
||||
const allFindings: ResearchRound["findings"] = findingsArrays.flat();
|
||||
|
||||
// ── Corroboration pass ────────────────────────────────────────
|
||||
// Cross-reference findings to compute corroboration scores
|
||||
const corroboratedFindings = computeCorroboration(allFindings);
|
||||
|
||||
// Record this round
|
||||
const successfulSearches = currentQueries.length;
|
||||
const followUpTopics = corroboratedFindings
|
||||
.filter(
|
||||
(f: Finding) =>
|
||||
f.confidence === "low" && (f.corroborationScore ?? 0) < 0.5,
|
||||
)
|
||||
.map((f: Finding) => f.title);
|
||||
|
||||
rounds.push({
|
||||
round,
|
||||
queries: currentQueries,
|
||||
results: uniqueResults,
|
||||
findings: allFindings,
|
||||
followUpTopics: allFindings
|
||||
.filter((f) => f.confidence === "low")
|
||||
.map((f) => f.title),
|
||||
findings: corroboratedFindings,
|
||||
followUpTopics,
|
||||
successfulSearches,
|
||||
});
|
||||
|
||||
// ── Adaptive depth: check for saturation ──────────────────────
|
||||
if (round > 1 && round < config.depth) {
|
||||
const saturation = assessSaturation(
|
||||
rounds[rounds.length - 2],
|
||||
rounds[rounds.length - 1],
|
||||
);
|
||||
if (saturation < SATURATION_THRESHOLD) {
|
||||
onProgress({
|
||||
phase: "synthesizing",
|
||||
message: `Information saturation reached (${(saturation * 100).toFixed(0)}% novelty) — synthesizing early`,
|
||||
fraction: 0.85,
|
||||
});
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Synthesis phase ─────────────────────────────────────────────────
|
||||
// ── Synthesis phase ───────────────────────────────────────────────
|
||||
|
||||
onProgress({
|
||||
phase: "synthesizing",
|
||||
@@ -227,13 +387,15 @@ export async function runDeepResearch(
|
||||
|
||||
if (signal?.aborted) throw new Error("Research cancelled");
|
||||
|
||||
const finalReport = await synthesizeReport(
|
||||
const synthesisResult: SynthesisResult = await synthesizeReport(
|
||||
config.question,
|
||||
rounds,
|
||||
config,
|
||||
ctx.cwd,
|
||||
signal,
|
||||
);
|
||||
const finalReport = synthesisResult.report;
|
||||
const references = synthesisResult.references;
|
||||
|
||||
const durationMs = Date.now() - startTime;
|
||||
|
||||
@@ -250,5 +412,6 @@ export async function runDeepResearch(
|
||||
totalSearches,
|
||||
totalPagesScraped: totalPages,
|
||||
durationMs,
|
||||
references,
|
||||
};
|
||||
}
|
||||
|
||||
47
src/types.ts
47
src/types.ts
@@ -2,6 +2,16 @@
|
||||
* Deep Research — type definitions
|
||||
*/
|
||||
|
||||
/** Content type classification for a source */
|
||||
export type ContentType =
|
||||
| "documentation"
|
||||
| "paper"
|
||||
| "news"
|
||||
| "blog"
|
||||
| "forum"
|
||||
| "official"
|
||||
| "other";
|
||||
|
||||
/** A single search result from Firecrawl */
|
||||
export interface SearchResult {
|
||||
title: string;
|
||||
@@ -10,6 +20,14 @@ export interface SearchResult {
|
||||
markdown: string;
|
||||
}
|
||||
|
||||
/** Enriched search result with source authority metadata */
|
||||
export interface EnrichedSearchResult extends SearchResult {
|
||||
domain: string;
|
||||
authorityScore: number; // 0.0 – 1.0
|
||||
publishedDate: Date | null;
|
||||
contentType: ContentType;
|
||||
}
|
||||
|
||||
/** A finding extracted from search results by an analysis agent */
|
||||
export interface Finding {
|
||||
title: string;
|
||||
@@ -17,6 +35,22 @@ export interface Finding {
|
||||
sources: string[];
|
||||
keyQuotes: string[];
|
||||
confidence: "high" | "medium" | "low";
|
||||
/** 0.0 – 1.0: how many independent sources support this finding */
|
||||
corroborationScore?: number;
|
||||
/** Authority score of the best source supporting this finding */
|
||||
bestSourceAuthority?: number;
|
||||
/** Average authority score across all sources */
|
||||
avgSourceAuthority?: number;
|
||||
}
|
||||
|
||||
/** A numbered reference with full metadata */
|
||||
export interface Reference {
|
||||
id: number;
|
||||
url: string;
|
||||
title: string;
|
||||
domain: string;
|
||||
authorityScore: number;
|
||||
accessedAt: string; // ISO date string
|
||||
}
|
||||
|
||||
/** A generated search query with its intent/rationale */
|
||||
@@ -30,18 +64,28 @@ export interface SearchQuery {
|
||||
export interface ResearchRound {
|
||||
round: number;
|
||||
queries: SearchQuery[];
|
||||
results: SearchResult[];
|
||||
results: EnrichedSearchResult[];
|
||||
findings: Finding[];
|
||||
/** Any follow-up questions/angles the analysis suggests */
|
||||
followUpTopics: string[];
|
||||
/** Number of sources that actually returned data (non-empty) */
|
||||
successfulSearches: number;
|
||||
}
|
||||
|
||||
/** Target audience expertise level */
|
||||
export type Audience = "expert" | "general" | "executive";
|
||||
|
||||
/** Configuration for a research session */
|
||||
export interface ResearchConfig {
|
||||
question: string;
|
||||
depth: number; // 1-3 rounds
|
||||
breadth: number; // queries per round (1-5)
|
||||
format: "markdown" | "structured";
|
||||
audience?: Audience;
|
||||
/** Focus on specific research angles only (empty = all angles) */
|
||||
focus?: string[];
|
||||
/** Show the research methodology section in the report */
|
||||
showMethodology?: boolean;
|
||||
}
|
||||
|
||||
/** Final research report */
|
||||
@@ -52,4 +96,5 @@ export interface ResearchReport {
|
||||
totalSearches: number;
|
||||
totalPagesScraped: number;
|
||||
durationMs: number;
|
||||
references: Reference[];
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user