feat(asl): Complete Deep Research V2.0 core development

Backend:
- Add SSE streaming client (unifuncsSseClient) replacing async polling
- Add paragraph-based reasoning parser with mergeConsecutiveThinking
- Add requirement expansion service (DeepSeek-V3 PICOS+MeSH)
- Add Word export service with Pandoc, inline hyperlinks, reference link expansion
- Add deep research V2 worker with 2s log flush and Chinese source prompt
- Add 5 curated data sources config (PubMed/ClinicalTrials/Cochrane/CNKI/MedJournals)
- Add 4 API endpoints (generate-requirement/tasks/task-status/export-word)
- Update Prisma schema with 6 new V2.0 fields on AslResearchTask
- Add DB migration for V2.0 fields
- Simplify ASL_DEEP_RESEARCH_EXPANSION prompt (remove strategy section)

Frontend:
- Add waterfall-flow DeepResearchPage (phase 0-4 progressive reveal)
- Add LandingView, SetupPanel, StrategyConfirm, AgentTerminal, ResultsView
- Add react-markdown + remark-gfm for report rendering
- Add custom link component showing visible URLs after references
- Add useDeepResearchTask polling hook
- Add deep research TypeScript types

Tests:
- Add E2E test, smoke test, and Chinese data source test scripts

Docs:
- Update ASL module status (v2.0 - core features complete)
- Update system status (v6.1 - ASL V2.0 milestone)
- Update Unifuncs DeepSearch API guide (v2.0 - SSE mode + Chinese source results)
- Update module auth specification (test script guidelines)
- Update V2.0 development plan

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
2026-02-23 13:21:52 +08:00
parent b06daecacd
commit 8f06d4f929
39 changed files with 5605 additions and 417 deletions

View File

@@ -0,0 +1,115 @@
/**
* Reasoning Content 解析器
*
* 将 Unifuncs 返回的 reasoning_content 增量文本解析为结构化日志条目。
*
* 核心策略:按段落(\n\n拆分同一段落内的思考内容合并为一条日志
* 只有 搜索/阅读/分析 等动作才单独成条。
*/
import { logger } from '../../../common/logging/index.js';
export interface ExecutionLogEntry {
type: 'thinking' | 'searching' | 'reading' | 'analyzing' | 'summary' | 'info';
title: string;
text: string;
ts: string;
}
const SEARCH_PATTERN = /(?:搜索|searching|search(?:ing)?\s+for|查找|检索|looking\s+for)[:\s]+(.+)/i;
const READ_PATTERN = /(?:阅读|reading|read(?:ing)?|访问|打开|visiting|open(?:ing)?)\s*[:\s]*(https?:\/\/\S+|\S+\.(?:com|org|net|gov|cn)\S*)/i;
const ANALYZE_PATTERN = /(?:分析|analyz|发现|总结|归纳|结论|found|result|finding|conclud|summariz)/i;
/**
* 将增量 reasoning 文本解析为段落级日志条目。
* 连续的思考行合并为一段,动作行(搜索/阅读/分析)独立成条。
*/
export function parseReasoningIncrement(
newText: string,
_previousLength: number
): ExecutionLogEntry[] {
if (!newText) return [];
const entries: ExecutionLogEntry[] = [];
const now = new Date().toISOString();
const paragraphs = newText.split(/\n{2,}/);
for (const para of paragraphs) {
const lines = para.split('\n').filter(l => l.trim());
if (lines.length === 0) continue;
let thinkingBuf: string[] = [];
const flushThinking = () => {
if (thinkingBuf.length === 0) return;
const text = thinkingBuf.join('').slice(0, 800);
if (text.length > 10) {
entries.push({ type: 'thinking', title: '思考', text, ts: now });
}
thinkingBuf = [];
};
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed) continue;
const searchMatch = trimmed.match(SEARCH_PATTERN);
if (searchMatch) {
flushThinking();
entries.push({ type: 'searching', title: '搜索', text: searchMatch[1].trim(), ts: now });
continue;
}
const readMatch = trimmed.match(READ_PATTERN);
if (readMatch) {
flushThinking();
entries.push({ type: 'reading', title: '阅读页面', text: readMatch[1].trim(), ts: now });
continue;
}
if (ANALYZE_PATTERN.test(trimmed) && trimmed.length > 20) {
flushThinking();
entries.push({ type: 'analyzing', title: '分析', text: trimmed.slice(0, 500), ts: now });
continue;
}
thinkingBuf.push(trimmed);
}
flushThinking();
}
return entries;
}
/**
* 合并连续的同类型thinking条目为一段。
* 在 Worker 写入 DB 前调用,减少碎片化。
*/
export function mergeConsecutiveThinking(entries: ExecutionLogEntry[]): ExecutionLogEntry[] {
if (entries.length <= 1) return entries;
const merged: ExecutionLogEntry[] = [];
let current = { ...entries[0] };
for (let i = 1; i < entries.length; i++) {
if (entries[i].type === 'thinking' && current.type === 'thinking') {
current.text = (current.text + ' ' + entries[i].text).slice(0, 800);
} else {
merged.push(current);
current = { ...entries[i] };
}
}
merged.push(current);
return merged;
}
/**
* 从完整的 reasoning_content 一次性提取摘要级日志
*/
export function parseFullReasoning(fullText: string): ExecutionLogEntry[] {
if (!fullText) return [];
return parseReasoningIncrement(fullText, 0);
}

View File

@@ -0,0 +1,113 @@
/**
* Deep Research V2.0 — 结果解析器
*
* 职责:
* 1. 从 Unifuncs content 中切割出 synthesisReport + resultList
* 2. safeParseJsonList: 4 层防崩溃 JSON 解析
*/
import { logger } from '../../../common/logging/index.js';
export interface LiteratureItem {
title: string;
authors?: string;
journal?: string;
year?: number | string;
doi?: string;
pmid?: string;
url?: string;
abstract?: string;
studyType?: string;
}
/**
* 从 Unifuncs 返回的 content 中拆分报告和文献列表
*/
export function parseContent(content: string): {
synthesisReport: string;
resultList: LiteratureItem[] | null;
} {
if (!content) {
return { synthesisReport: '', resultList: null };
}
const jsonBlockMatch = content.match(/```json\s*([\s\S]*?)```/);
if (jsonBlockMatch) {
const beforeJson = content.slice(0, content.indexOf('```json')).trim();
const jsonRaw = jsonBlockMatch[1];
const resultList = safeParseJsonList(jsonRaw);
const afterJsonEnd = content.indexOf('```', content.indexOf('```json') + 7) + 3;
const afterJson = content.slice(afterJsonEnd).trim();
const synthesisReport = (beforeJson + (afterJson ? '\n\n' + afterJson : '')).trim();
return { synthesisReport: synthesisReport || content, resultList };
}
const links = extractPubMedLinks(content);
if (links.length > 0) {
const resultList: LiteratureItem[] = links.map(url => ({
title: '',
url,
pmid: extractPmidFromUrl(url) || undefined,
}));
return { synthesisReport: content, resultList };
}
return { synthesisReport: content, resultList: null };
}
/**
* 4 层防崩溃 JSON 解析
*/
export function safeParseJsonList(raw: string | null): LiteratureItem[] | null {
if (!raw) return null;
let cleaned = raw.replace(/```json\s*/gi, '').replace(/```\s*/g, '');
cleaned = cleaned.replace(/,\s*([}\]])/g, '$1');
try {
const parsed = JSON.parse(cleaned);
return Array.isArray(parsed) ? parsed : [parsed];
} catch {
logger.warn('[resultParser] Standard JSON.parse failed, trying regex extraction');
}
const objects: any[] = [];
const regex = /\{[^{}]*\}/g;
let match;
while ((match = regex.exec(cleaned)) !== null) {
try {
objects.push(JSON.parse(match[0]));
} catch {
// skip unparseable fragment
}
}
if (objects.length > 0) {
logger.info('[resultParser] Regex extraction recovered items', { count: objects.length });
return objects;
}
logger.warn('[resultParser] All parsing strategies failed');
return null;
}
function extractPubMedLinks(content: string): string[] {
const linkSet = new Set<string>();
const pattern = /https?:\/\/pubmed\.ncbi\.nlm\.nih\.gov\/(\d+)\/?/gi;
let match;
while ((match = pattern.exec(content)) !== null) {
linkSet.add(`https://pubmed.ncbi.nlm.nih.gov/${match[1]}/`);
}
return Array.from(linkSet);
}
function extractPmidFromUrl(url: string): string | null {
const m = url.match(/pubmed\.ncbi\.nlm\.nih\.gov\/(\d+)/);
return m ? m[1] : null;
}