feat(asl): Complete Deep Research V2.0 core development

Backend:
- Add SSE streaming client (unifuncsSseClient) replacing async polling
- Add paragraph-based reasoning parser with mergeConsecutiveThinking
- Add requirement expansion service (DeepSeek-V3 PICOS+MeSH)
- Add Word export service with Pandoc, inline hyperlinks, reference link expansion
- Add deep research V2 worker with 2s log flush and Chinese source prompt
- Add 5 curated data sources config (PubMed/ClinicalTrials/Cochrane/CNKI/MedJournals)
- Add 4 API endpoints (generate-requirement/tasks/task-status/export-word)
- Update Prisma schema with 6 new V2.0 fields on AslResearchTask
- Add DB migration for V2.0 fields
- Simplify ASL_DEEP_RESEARCH_EXPANSION prompt (remove strategy section)

Frontend:
- Add waterfall-flow DeepResearchPage (phase 0-4 progressive reveal)
- Add LandingView, SetupPanel, StrategyConfirm, AgentTerminal, ResultsView
- Add react-markdown + remark-gfm for report rendering
- Add custom link component showing visible URLs after references
- Add useDeepResearchTask polling hook
- Add deep research TypeScript types

Tests:
- Add E2E test, smoke test, and Chinese data source test scripts

Docs:
- Update ASL module status (v2.0 - core features complete)
- Update system status (v6.1 - ASL V2.0 milestone)
- Update Unifuncs DeepSearch API guide (v2.0 - SSE mode + Chinese source results)
- Update module auth specification (test script guidelines)
- Update V2.0 development plan

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
2026-02-23 13:21:52 +08:00
parent b06daecacd
commit 8f06d4f929
39 changed files with 5605 additions and 417 deletions

View File

@@ -0,0 +1,140 @@
/**
* Deep Research V2.0 — Word 导出服务
*
* 将 synthesisReport + resultList 拼接为完整 Markdown
* 调用 Python 微服务Pandoc转换为 .docx。
*
* 文献清单中标题直接作为超链接,确保 Word 中可点击。
*/
import axios from 'axios';
import { prisma } from '../../../config/database.js';
import { logger } from '../../../common/logging/index.js';
import type { LiteratureItem } from '../utils/resultParser.js';
const EXTRACTION_SERVICE_URL = process.env.EXTRACTION_SERVICE_URL || 'http://localhost:8000';
class WordExportService {
async exportTaskToWord(taskId: string): Promise<{
buffer: Buffer;
filename: string;
}> {
const task = await prisma.aslResearchTask.findUnique({
where: { id: taskId },
});
if (!task) throw new Error('任务不存在');
if (task.status !== 'completed') throw new Error('任务尚未完成');
const markdown = this.buildMarkdown(
task.query,
task.synthesisReport,
task.resultList as LiteratureItem[] | null,
task.completedAt,
);
const docxBuffer = await this.convertToDocx(markdown, task.query);
const safeQuery = task.query.replace(/[^\u4e00-\u9fa5a-zA-Z0-9]/g, '').slice(0, 30);
const dateStr = new Date().toISOString().slice(0, 10).replace(/-/g, '');
const filename = `DeepResearch_${safeQuery}_${dateStr}.docx`;
return { buffer: docxBuffer, filename };
}
private buildMarkdown(
query: string,
report: string | null,
resultList: LiteratureItem[] | null,
completedAt: Date | null,
): string {
const parts: string[] = [];
parts.push(`# Deep Research 报告\n`);
parts.push(`检索主题: ${query}\n`);
parts.push(`生成时间: ${completedAt ? new Date(completedAt).toLocaleString('zh-CN') : new Date().toLocaleString('zh-CN')}\n`);
parts.push('---\n');
if (report) {
parts.push('## 综合分析报告\n');
let cleaned = report.replace(/\*\*([^*]+)\*\*/g, '$1');
cleaned = this.expandReferenceLinks(cleaned);
parts.push(cleaned);
parts.push('\n');
}
if (resultList && resultList.length > 0) {
parts.push('---\n');
parts.push(`## 文献清单(共 ${resultList.length} 篇)\n`);
resultList.forEach((item, idx) => {
const title = item.title || '(无标题)';
const url = item.url || (item.pmid ? `https://pubmed.ncbi.nlm.nih.gov/${item.pmid}/` : '');
const titleLine = url ? `[${title}](${url})` : title;
parts.push(`### ${idx + 1}. ${titleLine}\n`);
const details: string[] = [];
if (item.authors) details.push(`作者: ${item.authors}`);
if (item.journal) details.push(`期刊: ${item.journal}`);
if (item.year) details.push(`年份: ${item.year}`);
if (item.studyType) details.push(`研究类型: ${item.studyType}`);
if (item.pmid) details.push(`PMID: ${item.pmid}`);
if (details.length > 0) {
parts.push(details.join(' | '));
}
if (url) {
parts.push(`\n链接: ${url}`);
}
parts.push('\n');
});
}
parts.push('---\n');
parts.push('*本报告由 AI Clinical Research 平台 Deep Research 引擎自动生成*\n');
return parts.join('\n');
}
/**
* 将 [[N]](url) 格式的引用链接展开为 [N] url 形式,
* 使 Word 中引用旁边可见完整 URL。
*/
private expandReferenceLinks(text: string): string {
return text.replace(
/\[\[(\d+)\]\]\((https?:\/\/[^\s)]+)\)/g,
'[$1]($2) ($2)'
);
}
private async convertToDocx(markdown: string, title: string): Promise<Buffer> {
try {
logger.info('[WordExport] Converting Markdown → Word');
const response = await axios.post(
`${EXTRACTION_SERVICE_URL}/api/convert/docx`,
{
content: markdown,
use_template: true,
title: `Deep Research: ${title.slice(0, 50)}`,
},
{
responseType: 'arraybuffer',
timeout: 30000,
}
);
logger.info(`[WordExport] Conversion success, size: ${response.data.length} bytes`);
return Buffer.from(response.data);
} catch (error) {
logger.error('[WordExport] Conversion failed:', error);
throw new Error(`Word 转换失败: ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}
}
export const wordExportService = new WordExportService();