Backend: - Add SSE streaming client (unifuncsSseClient) replacing async polling - Add paragraph-based reasoning parser with mergeConsecutiveThinking - Add requirement expansion service (DeepSeek-V3 PICOS+MeSH) - Add Word export service with Pandoc, inline hyperlinks, reference link expansion - Add deep research V2 worker with 2s log flush and Chinese source prompt - Add 5 curated data sources config (PubMed/ClinicalTrials/Cochrane/CNKI/MedJournals) - Add 4 API endpoints (generate-requirement/tasks/task-status/export-word) - Update Prisma schema with 6 new V2.0 fields on AslResearchTask - Add DB migration for V2.0 fields - Simplify ASL_DEEP_RESEARCH_EXPANSION prompt (remove strategy section) Frontend: - Add waterfall-flow DeepResearchPage (phase 0-4 progressive reveal) - Add LandingView, SetupPanel, StrategyConfirm, AgentTerminal, ResultsView - Add react-markdown + remark-gfm for report rendering - Add custom link component showing visible URLs after references - Add useDeepResearchTask polling hook - Add deep research TypeScript types Tests: - Add E2E test, smoke test, and Chinese data source test scripts Docs: - Update ASL module status (v2.0 - core features complete) - Update system status (v6.1 - ASL V2.0 milestone) - Update Unifuncs DeepSearch API guide (v2.0 - SSE mode + Chinese source results) - Update module auth specification (test script guidelines) - Update V2.0 development plan Co-authored-by: Cursor <cursoragent@cursor.com>
141 lines
4.4 KiB
TypeScript
141 lines
4.4 KiB
TypeScript
/**
|
||
* Deep Research V2.0 — Word 导出服务
|
||
*
|
||
* 将 synthesisReport + resultList 拼接为完整 Markdown,
|
||
* 调用 Python 微服务(Pandoc)转换为 .docx。
|
||
*
|
||
* 文献清单中标题直接作为超链接,确保 Word 中可点击。
|
||
*/
|
||
|
||
import axios from 'axios';
|
||
import { prisma } from '../../../config/database.js';
|
||
import { logger } from '../../../common/logging/index.js';
|
||
import type { LiteratureItem } from '../utils/resultParser.js';
|
||
|
||
const EXTRACTION_SERVICE_URL = process.env.EXTRACTION_SERVICE_URL || 'http://localhost:8000';
|
||
|
||
class WordExportService {
|
||
|
||
async exportTaskToWord(taskId: string): Promise<{
|
||
buffer: Buffer;
|
||
filename: string;
|
||
}> {
|
||
const task = await prisma.aslResearchTask.findUnique({
|
||
where: { id: taskId },
|
||
});
|
||
|
||
if (!task) throw new Error('任务不存在');
|
||
if (task.status !== 'completed') throw new Error('任务尚未完成');
|
||
|
||
const markdown = this.buildMarkdown(
|
||
task.query,
|
||
task.synthesisReport,
|
||
task.resultList as LiteratureItem[] | null,
|
||
task.completedAt,
|
||
);
|
||
|
||
const docxBuffer = await this.convertToDocx(markdown, task.query);
|
||
|
||
const safeQuery = task.query.replace(/[^\u4e00-\u9fa5a-zA-Z0-9]/g, '').slice(0, 30);
|
||
const dateStr = new Date().toISOString().slice(0, 10).replace(/-/g, '');
|
||
const filename = `DeepResearch_${safeQuery}_${dateStr}.docx`;
|
||
|
||
return { buffer: docxBuffer, filename };
|
||
}
|
||
|
||
private buildMarkdown(
|
||
query: string,
|
||
report: string | null,
|
||
resultList: LiteratureItem[] | null,
|
||
completedAt: Date | null,
|
||
): string {
|
||
const parts: string[] = [];
|
||
|
||
parts.push(`# Deep Research 报告\n`);
|
||
parts.push(`检索主题: ${query}\n`);
|
||
parts.push(`生成时间: ${completedAt ? new Date(completedAt).toLocaleString('zh-CN') : new Date().toLocaleString('zh-CN')}\n`);
|
||
parts.push('---\n');
|
||
|
||
if (report) {
|
||
parts.push('## 综合分析报告\n');
|
||
let cleaned = report.replace(/\*\*([^*]+)\*\*/g, '$1');
|
||
cleaned = this.expandReferenceLinks(cleaned);
|
||
parts.push(cleaned);
|
||
parts.push('\n');
|
||
}
|
||
|
||
if (resultList && resultList.length > 0) {
|
||
parts.push('---\n');
|
||
parts.push(`## 文献清单(共 ${resultList.length} 篇)\n`);
|
||
|
||
resultList.forEach((item, idx) => {
|
||
const title = item.title || '(无标题)';
|
||
const url = item.url || (item.pmid ? `https://pubmed.ncbi.nlm.nih.gov/${item.pmid}/` : '');
|
||
|
||
const titleLine = url ? `[${title}](${url})` : title;
|
||
parts.push(`### ${idx + 1}. ${titleLine}\n`);
|
||
|
||
const details: string[] = [];
|
||
if (item.authors) details.push(`作者: ${item.authors}`);
|
||
if (item.journal) details.push(`期刊: ${item.journal}`);
|
||
if (item.year) details.push(`年份: ${item.year}`);
|
||
if (item.studyType) details.push(`研究类型: ${item.studyType}`);
|
||
if (item.pmid) details.push(`PMID: ${item.pmid}`);
|
||
|
||
if (details.length > 0) {
|
||
parts.push(details.join(' | '));
|
||
}
|
||
|
||
if (url) {
|
||
parts.push(`\n链接: ${url}`);
|
||
}
|
||
|
||
parts.push('\n');
|
||
});
|
||
}
|
||
|
||
parts.push('---\n');
|
||
parts.push('*本报告由 AI Clinical Research 平台 Deep Research 引擎自动生成*\n');
|
||
|
||
return parts.join('\n');
|
||
}
|
||
|
||
/**
|
||
* 将 [[N]](url) 格式的引用链接展开为 [N] url 形式,
|
||
* 使 Word 中引用旁边可见完整 URL。
|
||
*/
|
||
private expandReferenceLinks(text: string): string {
|
||
return text.replace(
|
||
/\[\[(\d+)\]\]\((https?:\/\/[^\s)]+)\)/g,
|
||
'[$1]($2) ($2)'
|
||
);
|
||
}
|
||
|
||
private async convertToDocx(markdown: string, title: string): Promise<Buffer> {
|
||
try {
|
||
logger.info('[WordExport] Converting Markdown → Word');
|
||
|
||
const response = await axios.post(
|
||
`${EXTRACTION_SERVICE_URL}/api/convert/docx`,
|
||
{
|
||
content: markdown,
|
||
use_template: true,
|
||
title: `Deep Research: ${title.slice(0, 50)}`,
|
||
},
|
||
{
|
||
responseType: 'arraybuffer',
|
||
timeout: 30000,
|
||
}
|
||
);
|
||
|
||
logger.info(`[WordExport] Conversion success, size: ${response.data.length} bytes`);
|
||
return Buffer.from(response.data);
|
||
} catch (error) {
|
||
logger.error('[WordExport] Conversion failed:', error);
|
||
throw new Error(`Word 转换失败: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
||
}
|
||
}
|
||
}
|
||
|
||
export const wordExportService = new WordExportService();
|