Files
AIclinicalresearch/backend/src/modules/asl/services/wordExportService.ts
HaHafeng 8f06d4f929 feat(asl): Complete Deep Research V2.0 core development
Backend:
- Add SSE streaming client (unifuncsSseClient) replacing async polling
- Add paragraph-based reasoning parser with mergeConsecutiveThinking
- Add requirement expansion service (DeepSeek-V3 PICOS+MeSH)
- Add Word export service with Pandoc, inline hyperlinks, reference link expansion
- Add deep research V2 worker with 2s log flush and Chinese source prompt
- Add 5 curated data sources config (PubMed/ClinicalTrials/Cochrane/CNKI/MedJournals)
- Add 4 API endpoints (generate-requirement/tasks/task-status/export-word)
- Update Prisma schema with 6 new V2.0 fields on AslResearchTask
- Add DB migration for V2.0 fields
- Simplify ASL_DEEP_RESEARCH_EXPANSION prompt (remove strategy section)

Frontend:
- Add waterfall-flow DeepResearchPage (phase 0-4 progressive reveal)
- Add LandingView, SetupPanel, StrategyConfirm, AgentTerminal, ResultsView
- Add react-markdown + remark-gfm for report rendering
- Add custom link component showing visible URLs after references
- Add useDeepResearchTask polling hook
- Add deep research TypeScript types

Tests:
- Add E2E test, smoke test, and Chinese data source test scripts

Docs:
- Update ASL module status (v2.0 - core features complete)
- Update system status (v6.1 - ASL V2.0 milestone)
- Update Unifuncs DeepSearch API guide (v2.0 - SSE mode + Chinese source results)
- Update module auth specification (test script guidelines)
- Update V2.0 development plan

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-23 13:21:52 +08:00

141 lines
4.4 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* Deep Research V2.0 — Word 导出服务
*
* 将 synthesisReport + resultList 拼接为完整 Markdown
* 调用 Python 微服务Pandoc转换为 .docx。
*
* 文献清单中标题直接作为超链接,确保 Word 中可点击。
*/
import axios from 'axios';
import { prisma } from '../../../config/database.js';
import { logger } from '../../../common/logging/index.js';
import type { LiteratureItem } from '../utils/resultParser.js';
const EXTRACTION_SERVICE_URL = process.env.EXTRACTION_SERVICE_URL || 'http://localhost:8000';
class WordExportService {
async exportTaskToWord(taskId: string): Promise<{
buffer: Buffer;
filename: string;
}> {
const task = await prisma.aslResearchTask.findUnique({
where: { id: taskId },
});
if (!task) throw new Error('任务不存在');
if (task.status !== 'completed') throw new Error('任务尚未完成');
const markdown = this.buildMarkdown(
task.query,
task.synthesisReport,
task.resultList as LiteratureItem[] | null,
task.completedAt,
);
const docxBuffer = await this.convertToDocx(markdown, task.query);
const safeQuery = task.query.replace(/[^\u4e00-\u9fa5a-zA-Z0-9]/g, '').slice(0, 30);
const dateStr = new Date().toISOString().slice(0, 10).replace(/-/g, '');
const filename = `DeepResearch_${safeQuery}_${dateStr}.docx`;
return { buffer: docxBuffer, filename };
}
private buildMarkdown(
query: string,
report: string | null,
resultList: LiteratureItem[] | null,
completedAt: Date | null,
): string {
const parts: string[] = [];
parts.push(`# Deep Research 报告\n`);
parts.push(`检索主题: ${query}\n`);
parts.push(`生成时间: ${completedAt ? new Date(completedAt).toLocaleString('zh-CN') : new Date().toLocaleString('zh-CN')}\n`);
parts.push('---\n');
if (report) {
parts.push('## 综合分析报告\n');
let cleaned = report.replace(/\*\*([^*]+)\*\*/g, '$1');
cleaned = this.expandReferenceLinks(cleaned);
parts.push(cleaned);
parts.push('\n');
}
if (resultList && resultList.length > 0) {
parts.push('---\n');
parts.push(`## 文献清单(共 ${resultList.length} 篇)\n`);
resultList.forEach((item, idx) => {
const title = item.title || '(无标题)';
const url = item.url || (item.pmid ? `https://pubmed.ncbi.nlm.nih.gov/${item.pmid}/` : '');
const titleLine = url ? `[${title}](${url})` : title;
parts.push(`### ${idx + 1}. ${titleLine}\n`);
const details: string[] = [];
if (item.authors) details.push(`作者: ${item.authors}`);
if (item.journal) details.push(`期刊: ${item.journal}`);
if (item.year) details.push(`年份: ${item.year}`);
if (item.studyType) details.push(`研究类型: ${item.studyType}`);
if (item.pmid) details.push(`PMID: ${item.pmid}`);
if (details.length > 0) {
parts.push(details.join(' | '));
}
if (url) {
parts.push(`\n链接: ${url}`);
}
parts.push('\n');
});
}
parts.push('---\n');
parts.push('*本报告由 AI Clinical Research 平台 Deep Research 引擎自动生成*\n');
return parts.join('\n');
}
/**
* 将 [[N]](url) 格式的引用链接展开为 [N] url 形式,
* 使 Word 中引用旁边可见完整 URL。
*/
private expandReferenceLinks(text: string): string {
return text.replace(
/\[\[(\d+)\]\]\((https?:\/\/[^\s)]+)\)/g,
'[$1]($2) ($2)'
);
}
private async convertToDocx(markdown: string, title: string): Promise<Buffer> {
try {
logger.info('[WordExport] Converting Markdown → Word');
const response = await axios.post(
`${EXTRACTION_SERVICE_URL}/api/convert/docx`,
{
content: markdown,
use_template: true,
title: `Deep Research: ${title.slice(0, 50)}`,
},
{
responseType: 'arraybuffer',
timeout: 30000,
}
);
logger.info(`[WordExport] Conversion success, size: ${response.data.length} bytes`);
return Buffer.from(response.data);
} catch (error) {
logger.error('[WordExport] Conversion failed:', error);
throw new Error(`Word 转换失败: ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}
}
export const wordExportService = new WordExportService();