feat(asl): Complete Deep Research V2.0 core development
Backend: - Add SSE streaming client (unifuncsSseClient) replacing async polling - Add paragraph-based reasoning parser with mergeConsecutiveThinking - Add requirement expansion service (DeepSeek-V3 PICOS+MeSH) - Add Word export service with Pandoc, inline hyperlinks, reference link expansion - Add deep research V2 worker with 2s log flush and Chinese source prompt - Add 5 curated data sources config (PubMed/ClinicalTrials/Cochrane/CNKI/MedJournals) - Add 4 API endpoints (generate-requirement/tasks/task-status/export-word) - Update Prisma schema with 6 new V2.0 fields on AslResearchTask - Add DB migration for V2.0 fields - Simplify ASL_DEEP_RESEARCH_EXPANSION prompt (remove strategy section) Frontend: - Add waterfall-flow DeepResearchPage (phase 0-4 progressive reveal) - Add LandingView, SetupPanel, StrategyConfirm, AgentTerminal, ResultsView - Add react-markdown + remark-gfm for report rendering - Add custom link component showing visible URLs after references - Add useDeepResearchTask polling hook - Add deep research TypeScript types Tests: - Add E2E test, smoke test, and Chinese data source test scripts Docs: - Update ASL module status (v2.0 - core features complete) - Update system status (v6.1 - ASL V2.0 milestone) - Update Unifuncs DeepSearch API guide (v2.0 - SSE mode + Chinese source results) - Update module auth specification (test script guidelines) - Update V2.0 development plan Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
115
backend/src/modules/asl/utils/reasoningParser.ts
Normal file
115
backend/src/modules/asl/utils/reasoningParser.ts
Normal file
@@ -0,0 +1,115 @@
|
||||
/**
|
||||
* Reasoning Content 解析器
|
||||
*
|
||||
* 将 Unifuncs 返回的 reasoning_content 增量文本解析为结构化日志条目。
|
||||
*
|
||||
* 核心策略:按段落(\n\n)拆分,同一段落内的思考内容合并为一条日志,
|
||||
* 只有 搜索/阅读/分析 等动作才单独成条。
|
||||
*/
|
||||
|
||||
import { logger } from '../../../common/logging/index.js';
|
||||
|
||||
export interface ExecutionLogEntry {
|
||||
type: 'thinking' | 'searching' | 'reading' | 'analyzing' | 'summary' | 'info';
|
||||
title: string;
|
||||
text: string;
|
||||
ts: string;
|
||||
}
|
||||
|
||||
const SEARCH_PATTERN = /(?:搜索|searching|search(?:ing)?\s+for|查找|检索|looking\s+for)[::\s]+(.+)/i;
|
||||
const READ_PATTERN = /(?:阅读|reading|read(?:ing)?|访问|打开|visiting|open(?:ing)?)\s*[::\s]*(https?:\/\/\S+|\S+\.(?:com|org|net|gov|cn)\S*)/i;
|
||||
const ANALYZE_PATTERN = /(?:分析|analyz|发现|总结|归纳|结论|found|result|finding|conclud|summariz)/i;
|
||||
|
||||
/**
|
||||
* 将增量 reasoning 文本解析为段落级日志条目。
|
||||
* 连续的思考行合并为一段,动作行(搜索/阅读/分析)独立成条。
|
||||
*/
|
||||
export function parseReasoningIncrement(
|
||||
newText: string,
|
||||
_previousLength: number
|
||||
): ExecutionLogEntry[] {
|
||||
if (!newText) return [];
|
||||
|
||||
const entries: ExecutionLogEntry[] = [];
|
||||
const now = new Date().toISOString();
|
||||
|
||||
const paragraphs = newText.split(/\n{2,}/);
|
||||
|
||||
for (const para of paragraphs) {
|
||||
const lines = para.split('\n').filter(l => l.trim());
|
||||
if (lines.length === 0) continue;
|
||||
|
||||
let thinkingBuf: string[] = [];
|
||||
|
||||
const flushThinking = () => {
|
||||
if (thinkingBuf.length === 0) return;
|
||||
const text = thinkingBuf.join('').slice(0, 800);
|
||||
if (text.length > 10) {
|
||||
entries.push({ type: 'thinking', title: '思考', text, ts: now });
|
||||
}
|
||||
thinkingBuf = [];
|
||||
};
|
||||
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed) continue;
|
||||
|
||||
const searchMatch = trimmed.match(SEARCH_PATTERN);
|
||||
if (searchMatch) {
|
||||
flushThinking();
|
||||
entries.push({ type: 'searching', title: '搜索', text: searchMatch[1].trim(), ts: now });
|
||||
continue;
|
||||
}
|
||||
|
||||
const readMatch = trimmed.match(READ_PATTERN);
|
||||
if (readMatch) {
|
||||
flushThinking();
|
||||
entries.push({ type: 'reading', title: '阅读页面', text: readMatch[1].trim(), ts: now });
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ANALYZE_PATTERN.test(trimmed) && trimmed.length > 20) {
|
||||
flushThinking();
|
||||
entries.push({ type: 'analyzing', title: '分析', text: trimmed.slice(0, 500), ts: now });
|
||||
continue;
|
||||
}
|
||||
|
||||
thinkingBuf.push(trimmed);
|
||||
}
|
||||
|
||||
flushThinking();
|
||||
}
|
||||
|
||||
return entries;
|
||||
}
|
||||
|
||||
/**
|
||||
* 合并连续的同类型(thinking)条目为一段。
|
||||
* 在 Worker 写入 DB 前调用,减少碎片化。
|
||||
*/
|
||||
export function mergeConsecutiveThinking(entries: ExecutionLogEntry[]): ExecutionLogEntry[] {
|
||||
if (entries.length <= 1) return entries;
|
||||
|
||||
const merged: ExecutionLogEntry[] = [];
|
||||
let current = { ...entries[0] };
|
||||
|
||||
for (let i = 1; i < entries.length; i++) {
|
||||
if (entries[i].type === 'thinking' && current.type === 'thinking') {
|
||||
current.text = (current.text + ' ' + entries[i].text).slice(0, 800);
|
||||
} else {
|
||||
merged.push(current);
|
||||
current = { ...entries[i] };
|
||||
}
|
||||
}
|
||||
merged.push(current);
|
||||
|
||||
return merged;
|
||||
}
|
||||
|
||||
/**
|
||||
* 从完整的 reasoning_content 一次性提取摘要级日志
|
||||
*/
|
||||
export function parseFullReasoning(fullText: string): ExecutionLogEntry[] {
|
||||
if (!fullText) return [];
|
||||
return parseReasoningIncrement(fullText, 0);
|
||||
}
|
||||
113
backend/src/modules/asl/utils/resultParser.ts
Normal file
113
backend/src/modules/asl/utils/resultParser.ts
Normal file
@@ -0,0 +1,113 @@
|
||||
/**
|
||||
* Deep Research V2.0 — 结果解析器
|
||||
*
|
||||
* 职责:
|
||||
* 1. 从 Unifuncs content 中切割出 synthesisReport + resultList
|
||||
* 2. safeParseJsonList: 4 层防崩溃 JSON 解析
|
||||
*/
|
||||
|
||||
import { logger } from '../../../common/logging/index.js';
|
||||
|
||||
export interface LiteratureItem {
|
||||
title: string;
|
||||
authors?: string;
|
||||
journal?: string;
|
||||
year?: number | string;
|
||||
doi?: string;
|
||||
pmid?: string;
|
||||
url?: string;
|
||||
abstract?: string;
|
||||
studyType?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* 从 Unifuncs 返回的 content 中拆分报告和文献列表
|
||||
*/
|
||||
export function parseContent(content: string): {
|
||||
synthesisReport: string;
|
||||
resultList: LiteratureItem[] | null;
|
||||
} {
|
||||
if (!content) {
|
||||
return { synthesisReport: '', resultList: null };
|
||||
}
|
||||
|
||||
const jsonBlockMatch = content.match(/```json\s*([\s\S]*?)```/);
|
||||
|
||||
if (jsonBlockMatch) {
|
||||
const beforeJson = content.slice(0, content.indexOf('```json')).trim();
|
||||
const jsonRaw = jsonBlockMatch[1];
|
||||
|
||||
const resultList = safeParseJsonList(jsonRaw);
|
||||
|
||||
const afterJsonEnd = content.indexOf('```', content.indexOf('```json') + 7) + 3;
|
||||
const afterJson = content.slice(afterJsonEnd).trim();
|
||||
|
||||
const synthesisReport = (beforeJson + (afterJson ? '\n\n' + afterJson : '')).trim();
|
||||
|
||||
return { synthesisReport: synthesisReport || content, resultList };
|
||||
}
|
||||
|
||||
const links = extractPubMedLinks(content);
|
||||
if (links.length > 0) {
|
||||
const resultList: LiteratureItem[] = links.map(url => ({
|
||||
title: '',
|
||||
url,
|
||||
pmid: extractPmidFromUrl(url) || undefined,
|
||||
}));
|
||||
return { synthesisReport: content, resultList };
|
||||
}
|
||||
|
||||
return { synthesisReport: content, resultList: null };
|
||||
}
|
||||
|
||||
/**
|
||||
* 4 层防崩溃 JSON 解析
|
||||
*/
|
||||
export function safeParseJsonList(raw: string | null): LiteratureItem[] | null {
|
||||
if (!raw) return null;
|
||||
|
||||
let cleaned = raw.replace(/```json\s*/gi, '').replace(/```\s*/g, '');
|
||||
|
||||
cleaned = cleaned.replace(/,\s*([}\]])/g, '$1');
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(cleaned);
|
||||
return Array.isArray(parsed) ? parsed : [parsed];
|
||||
} catch {
|
||||
logger.warn('[resultParser] Standard JSON.parse failed, trying regex extraction');
|
||||
}
|
||||
|
||||
const objects: any[] = [];
|
||||
const regex = /\{[^{}]*\}/g;
|
||||
let match;
|
||||
while ((match = regex.exec(cleaned)) !== null) {
|
||||
try {
|
||||
objects.push(JSON.parse(match[0]));
|
||||
} catch {
|
||||
// skip unparseable fragment
|
||||
}
|
||||
}
|
||||
|
||||
if (objects.length > 0) {
|
||||
logger.info('[resultParser] Regex extraction recovered items', { count: objects.length });
|
||||
return objects;
|
||||
}
|
||||
|
||||
logger.warn('[resultParser] All parsing strategies failed');
|
||||
return null;
|
||||
}
|
||||
|
||||
function extractPubMedLinks(content: string): string[] {
|
||||
const linkSet = new Set<string>();
|
||||
const pattern = /https?:\/\/pubmed\.ncbi\.nlm\.nih\.gov\/(\d+)\/?/gi;
|
||||
let match;
|
||||
while ((match = pattern.exec(content)) !== null) {
|
||||
linkSet.add(`https://pubmed.ncbi.nlm.nih.gov/${match[1]}/`);
|
||||
}
|
||||
return Array.from(linkSet);
|
||||
}
|
||||
|
||||
function extractPmidFromUrl(url: string): string | null {
|
||||
const m = url.match(/pubmed\.ncbi\.nlm\.nih\.gov\/(\d+)/);
|
||||
return m ? m[1] : null;
|
||||
}
|
||||
Reference in New Issue
Block a user