feat(asl): Complete Deep Research V2.0 core development

Backend: - Add SSE streaming client (unifuncsSseClient) replacing async polling - Add paragraph-based reasoning parser with mergeConsecutiveThinking - Add requirement expansion service (DeepSeek-V3 PICOS+MeSH) - Add Word export service with Pandoc, inline hyperlinks, reference link expansion - Add deep research V2 worker with 2s log flush and Chinese source prompt - Add 5 curated data sources config (PubMed/ClinicalTrials/Cochrane/CNKI/MedJournals) - Add 4 API endpoints (generate-requirement/tasks/task-status/export-word) - Update Prisma schema with 6 new V2.0 fields on AslResearchTask - Add DB migration for V2.0 fields - Simplify ASL_DEEP_RESEARCH_EXPANSION prompt (remove strategy section) Frontend: - Add waterfall-flow DeepResearchPage (phase 0-4 progressive reveal) - Add LandingView, SetupPanel, StrategyConfirm, AgentTerminal, ResultsView - Add react-markdown + remark-gfm for report rendering - Add custom link component showing visible URLs after references - Add useDeepResearchTask polling hook - Add deep research TypeScript types Tests: - Add E2E test, smoke test, and Chinese data source test scripts Docs: - Update ASL module status (v2.0 - core features complete) - Update system status (v6.1 - ASL V2.0 milestone) - Update Unifuncs DeepSearch API guide (v2.0 - SSE mode + Chinese source results) - Update module auth specification (test script guidelines) - Update V2.0 development plan Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-23 13:21:52 +08:00
parent b06daecacd
commit 8f06d4f929
39 changed files with 5605 additions and 417 deletions
--- a/backend/src/modules/asl/services/wordExportService.ts
+++ b/backend/src/modules/asl/services/wordExportService.ts
@@ -0,0 +1,140 @@
+/**
+ * Deep Research V2.0 — Word 导出服务
+ *
+ * 将 synthesisReport + resultList 拼接为完整 Markdown，
+ * 调用 Python 微服务（Pandoc）转换为 .docx。
+ *
+ * 文献清单中标题直接作为超链接，确保 Word 中可点击。
+ */
+
+import axios from 'axios';
+import { prisma } from '../../../config/database.js';
+import { logger } from '../../../common/logging/index.js';
+import type { LiteratureItem } from '../utils/resultParser.js';
+
+const EXTRACTION_SERVICE_URL = process.env.EXTRACTION_SERVICE_URL || 'http://localhost:8000';
+
+class WordExportService {
+
+  async exportTaskToWord(taskId: string): Promise<{
+    buffer: Buffer;
+    filename: string;
+  }> {
+    const task = await prisma.aslResearchTask.findUnique({
+      where: { id: taskId },
+    });
+
+    if (!task) throw new Error('任务不存在');
+    if (task.status !== 'completed') throw new Error('任务尚未完成');
+
+    const markdown = this.buildMarkdown(
+      task.query,
+      task.synthesisReport,
+      task.resultList as LiteratureItem[] | null,
+      task.completedAt,
+    );
+
+    const docxBuffer = await this.convertToDocx(markdown, task.query);
+
+    const safeQuery = task.query.replace(/[^\u4e00-\u9fa5a-zA-Z0-9]/g, '').slice(0, 30);
+    const dateStr = new Date().toISOString().slice(0, 10).replace(/-/g, '');
+    const filename = `DeepResearch_${safeQuery}_${dateStr}.docx`;
+
+    return { buffer: docxBuffer, filename };
+  }
+
+  private buildMarkdown(
+    query: string,
+    report: string | null,
+    resultList: LiteratureItem[] | null,
+    completedAt: Date | null,
+  ): string {
+    const parts: string[] = [];
+
+    parts.push(`# Deep Research 报告\n`);
+    parts.push(`检索主题： ${query}\n`);
+    parts.push(`生成时间： ${completedAt ? new Date(completedAt).toLocaleString('zh-CN') : new Date().toLocaleString('zh-CN')}\n`);
+    parts.push('---\n');
+
+    if (report) {
+      parts.push('## 综合分析报告\n');
+      let cleaned = report.replace(/\*\*([^*]+)\*\*/g, '$1');
+      cleaned = this.expandReferenceLinks(cleaned);
+      parts.push(cleaned);
+      parts.push('\n');
+    }
+
+    if (resultList && resultList.length > 0) {
+      parts.push('---\n');
+      parts.push(`## 文献清单（共 ${resultList.length} 篇）\n`);
+
+      resultList.forEach((item, idx) => {
+        const title = item.title || '(无标题)';
+        const url = item.url || (item.pmid ? `https://pubmed.ncbi.nlm.nih.gov/${item.pmid}/` : '');
+
+        const titleLine = url ? `[${title}](${url})` : title;
+        parts.push(`### ${idx + 1}. ${titleLine}\n`);
+
+        const details: string[] = [];
+        if (item.authors) details.push(`作者: ${item.authors}`);
+        if (item.journal) details.push(`期刊: ${item.journal}`);
+        if (item.year) details.push(`年份: ${item.year}`);
+        if (item.studyType) details.push(`研究类型: ${item.studyType}`);
+        if (item.pmid) details.push(`PMID: ${item.pmid}`);
+
+        if (details.length > 0) {
+          parts.push(details.join(' | '));
+        }
+
+        if (url) {
+          parts.push(`\n链接: ${url}`);
+        }
+
+        parts.push('\n');
+      });
+    }
+
+    parts.push('---\n');
+    parts.push('*本报告由 AI Clinical Research 平台 Deep Research 引擎自动生成*\n');
+
+    return parts.join('\n');
+  }
+
+  /**
+   * 将 [[N]](url) 格式的引用链接展开为 [N] url 形式，
+   * 使 Word 中引用旁边可见完整 URL。
+   */
+  private expandReferenceLinks(text: string): string {
+    return text.replace(
+      /\[\[(\d+)\]\]\((https?:\/\/[^\s)]+)\)/g,
+      '[$1]($2) ($2)'
+    );
+  }
+
+  private async convertToDocx(markdown: string, title: string): Promise<Buffer> {
+    try {
+      logger.info('[WordExport] Converting Markdown → Word');
+
+      const response = await axios.post(
+        `${EXTRACTION_SERVICE_URL}/api/convert/docx`,
+        {
+          content: markdown,
+          use_template: true,
+          title: `Deep Research: ${title.slice(0, 50)}`,
+        },
+        {
+          responseType: 'arraybuffer',
+          timeout: 30000,
+        }
+      );
+
+      logger.info(`[WordExport] Conversion success, size: ${response.data.length} bytes`);
+      return Buffer.from(response.data);
+    } catch (error) {
+      logger.error('[WordExport] Conversion failed:', error);
+      throw new Error(`Word 转换失败: ${error instanceof Error ? error.message : 'Unknown error'}`);
+    }
+  }
+}
+
+export const wordExportService = new WordExportService();