/** * M2 HITL 工作台集成测试 * * 运行方式（需先启动后端服务）： * cd backend && npx tsx src/modules/asl/extraction/__tests__/m2-hitl-test.ts * * 验证阶段： * Phase 1: M2 新增 API 端点验证（结果详情 / 审核 / SSE / 导出） * Phase 2: DynamicPromptBuilder 单元测试 * Phase 3: ExtractionValidator fuzzyQuoteMatch 单元测试 * Phase 4: ExtractionEventBus 单元测试 * Phase 5: Excel 导出端到端验证 * Phase 6: 断点恢复（URL → 正确步骤） */ import { PrismaClient } from '@prisma/client'; import jwt from 'jsonwebtoken'; import { buildExtractionPrompt } from '../services/DynamicPromptBuilder.js'; import { extractionValidator } from '../services/ExtractionValidator.js'; import { extractionEventBus } from '../services/ExtractionEventBus.js'; const prisma = new PrismaClient(); const API_BASE = 'http://localhost:3001/api/v1/asl/extraction'; const JWT_SECRET = process.env.JWT_SECRET || 'your-secret-key-change-in-production'; const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms)); let passed = 0; let failed = 0; function ok(name: string) { passed++; console.log(` ✅ ${name}`); } function fail(name: string, reason: string) { failed++; console.log(` ❌ ${name}: ${reason}`); } function assert(condition: boolean, name: string, reason = 'Assertion failed') { condition ? ok(name) : fail(name, reason); } let _cachedToken: string | null = null; function makeTestToken(userId: string, tenantId: string): string { return jwt.sign( { userId, phone: '13800000001', role: 'SUPER_ADMIN', tenantId }, JWT_SECRET, { expiresIn: '1h', issuer: 'aiclinical', subject: userId }, ); } async function getAuthToken(): Promise { if (_cachedToken) return _cachedToken; const admin = await prisma.user.findFirst({ where: { role: 'SUPER_ADMIN' } }); if (!admin) throw new Error('无 SUPER_ADMIN 用户，无法执行 API 测试'); _cachedToken = makeTestToken(admin.id, admin.tenantId); return _cachedToken; } async function fetchWithAuth(path: string, options: RequestInit = {}) { const token = await getAuthToken(); return fetch(`${API_BASE}${path}`, { ...options, headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${token}`, ...options.headers, }, }); } // ══════════════════════════════════════════════════════ // Phase 1: M2 新增 API 端点验证 // ══════════════════════════════════════════════════════ async function phase1() { console.log('\n📡 Phase 1: M2 API 端点验证'); // 查找一个已有的 completed result const result = await prisma.aslExtractionResult.findFirst({ where: { status: 'completed' }, orderBy: { createdAt: 'desc' }, }); if (!result) { console.log(' ⚠️ 无已完成的提取结果，跳过 API 端点测试（请先运行 M1 pipeline）'); return { resultId: null, taskId: null }; } // 1.1 GET /results/:resultId — 结果详情 const detailRes = await fetchWithAuth(`/results/${result.id}`); const detailJson = await detailRes.json(); assert(detailRes.ok && detailJson.success, '获取单条结果详情', `status=${detailRes.status}`); assert(detailJson.data?.id === result.id, '结果 ID 正确'); // 1.2 PUT /results/:resultId/review — 审核 const reviewRes = await fetchWithAuth(`/results/${result.id}/review`, { method: 'PUT', body: JSON.stringify({ reviewStatus: 'approved' }), }); const reviewJson = await reviewRes.json(); assert(reviewRes.ok && reviewJson.success, '审核接口返回成功'); // 验证审核状态已更新 const updatedResult = await prisma.aslExtractionResult.findUnique({ where: { id: result.id }, }); assert(updatedResult?.reviewStatus === 'approved', 'DB reviewStatus 已更新为 approved'); assert(updatedResult?.reviewedAt !== null, 'DB reviewedAt 已设置'); // 1.3 GET /tasks/:taskId/stream — SSE 端点（快速验证连接） const sseToken = await getAuthToken(); const sseRes = await fetch(`${API_BASE}/tasks/${result.taskId}/stream?token=${sseToken}`, { headers: { Authorization: `Bearer ${sseToken}` }, }); assert( sseRes.headers.get('content-type')?.includes('text/event-stream') || sseRes.ok, 'SSE 端点返回 event-stream', ); // 不需要等待完整 SSE 流，关闭连接 try { // @ts-ignore sseRes.body?.cancel?.(); } catch { /* ok */ } // 1.4 GET /tasks/:taskId/export — Excel 导出 const exportRes = await fetchWithAuth(`/tasks/${result.taskId}/export`); if (exportRes.ok) { const blob = await exportRes.blob(); assert(blob.size > 0, 'Excel 导出成功且非空', `size=${blob.size}`); } else { // 可能没有 approved 结果 const errText = await exportRes.text(); console.log(` ⚠️ 导出可能无 approved 结果: ${errText}`); ok('Excel 导出端点可达（无 approved 数据时预期 400）'); } return { resultId: result.id, taskId: result.taskId }; } // ══════════════════════════════════════════════════════ // Phase 2: DynamicPromptBuilder 单元测试 // ══════════════════════════════════════════════════════ function phase2() { console.log('\n🧩 Phase 2: DynamicPromptBuilder 单元测试'); const schema = { baseTemplateCode: 'RCT_ONCO', outcomeType: 'survival', schema: { metadata: ['study_id', 'authors', 'year'], baseline: ['total_n', 'median_age'], }, }; // 2.1 纯文本模式（无 MinerU 表格） const result1 = buildExtractionPrompt('This is the full text of the paper.', [], schema); assert(result1.systemPrompt.includes('clinical research'), 'System prompt 包含角色定义'); assert(result1.userPrompt.includes(''), 'User prompt 包含 FULL_TEXT 标签'); assert(!result1.userPrompt.includes(''), '纯文本模式不含 HIGH_FIDELITY_TABLES'); // 2.2 MinerU + Markdown 混合模式 const tables = ['

OS median: 12.3 months

']; const result2 = buildExtractionPrompt('Full text here.', tables, schema); assert(result2.userPrompt.includes(''), '混合模式包含 HIGH_FIDELITY_TABLES'); assert(result2.userPrompt.includes(''), '混合模式包含 FULL_TEXT'); assert(result2.systemPrompt.includes('AUTHORITATIVE'), 'System prompt 声明表格优先级'); // 2.3 Schema 正确嵌入 assert(result1.userPrompt.includes('RCT_ONCO'), 'Schema study type 正确嵌入'); assert(result1.userPrompt.includes('"study_id"'), 'Schema 字段正确嵌入'); // 2.4 Quote 指令 assert(result1.userPrompt.includes('quote'), 'Prompt 包含 quote 指令'); } // ══════════════════════════════════════════════════════ // Phase 3: ExtractionValidator fuzzyQuoteMatch 单元测试 // ══════════════════════════════════════════════════════ function phase3() { console.log('\n🔍 Phase 3: fuzzyQuoteMatch 单元测试'); const sourceText = ` The median overall survival was 12.3 months (95% CI, 10.1-15.7) in the pembrolizumab group versus 8.9 months in the placebo group (HR 0.69; 95% CI, 0.56-0.85; P < 0.001). A total of 305 patients were enrolled across 50 centers. `; const normalizedSource = sourceText.toLowerCase().replace(/[\s\u00A0]+/g, ' ').replace(/[^\w\s\u4e00-\u9fff]/g, '').trim(); // 3.1 精确匹配 → high const r1 = extractionValidator.fuzzyQuoteMatch( sourceText, normalizedSource, 'median overall survival was 12.3 months', ); assert(r1.confidence === 'high', '精确子串匹配 → high', `got ${r1.confidence}`); assert(r1.matchScore >= 0.95, '精确匹配 score ≥ 0.95', `got ${r1.matchScore}`); // 3.2 空白/标点差异 → high (normalized) const r2 = extractionValidator.fuzzyQuoteMatch( sourceText, normalizedSource, 'median overall survival was 12.3 months (95% CI, 10.1-15.7)', ); assert(r2.confidence === 'high', '标点差异匹配 → high', `got ${r2.confidence}`); // 3.3 关键词覆盖 ≥ 80% → medium const r3 = extractionValidator.fuzzyQuoteMatch( sourceText, normalizedSource, 'overall survival 12.3 months pembrolizumab group versus 8.9 months placebo group', ); assert(r3.confidence === 'high' || r3.confidence === 'medium', '高覆盖率关键词匹配 → high/medium', `got ${r3.confidence}`); // 3.4 完全不匹配 → low const r4 = extractionValidator.fuzzyQuoteMatch( sourceText, normalizedSource, 'This quote is completely fabricated by the LLM and has no match whatsoever', ); assert(r4.confidence === 'low', '不匹配 → low', `got ${r4.confidence}`); assert(r4.matchScore < 0.5, '不匹配 score < 0.5', `got ${r4.matchScore}`); // 3.5 verifyAllQuotes 集成 const extractedData = { metadata: { study_id: 'Gandhi 2018', study_id_quote: 'Gandhi 2018', total_n: 305, total_n_quote: '305 patients were enrolled across 50 centers', }, outcomes: { os_median: 12.3, os_median_quote: 'The median overall survival was 12.3 months', fake_field: 'fake', fake_field_quote: 'completely fabricated hallucination not in source text at all', }, }; const scope = extractionValidator.buildQuoteSearchScope(sourceText, []); const verification = extractionValidator.verifyAllQuotes(extractedData, scope); assert(verification.metadata?.total_n?.confidence === 'high', 'verifyAllQuotes: total_n → high', `got ${verification.metadata?.total_n?.confidence}`); assert(verification.outcomes?.os_median?.confidence === 'high', 'verifyAllQuotes: os_median → high', `got ${verification.outcomes?.os_median?.confidence}`); assert(verification.outcomes?.fake_field?.confidence === 'low', 'verifyAllQuotes: fake_field → low', `got ${verification.outcomes?.fake_field?.confidence}`); // 3.6 buildQuoteSearchScope 含 HTML 表格 const tableHtml = '

PFS median 6.9 months

'; const scopeWithTable = extractionValidator.buildQuoteSearchScope('Full text.', [tableHtml]); assert(scopeWithTable.includes('PFS median 6.9 months'), 'searchScope 包含 HTML 表格纯文本'); assert(!scopeWithTable.includes(''), 'searchScope 不含 HTML 标签'); } // ══════════════════════════════════════════════════════ // Phase 4: ExtractionEventBus 单元测试 // ══════════════════════════════════════════════════════ async function phase4() { console.log('\n📢 Phase 4: ExtractionEventBus 单元测试'); const testTaskId = 'test-eventbus-' + Date.now(); const received: any[] = []; // 4.1 订阅 + 发送 const unsub = extractionEventBus.subscribe(testTaskId, (entry) => { received.push(entry); }); extractionEventBus.emit(testTaskId, { source: 'MinerU', message: 'Processing page 1', level: 'info' }); extractionEventBus.emit(testTaskId, { source: 'DeepSeek', message: 'Extracting fields', level: 'info' }); extractionEventBus.emit(testTaskId, { source: 'System', message: 'Error occurred', level: 'error' }); await sleep(50); assert(received.length === 3, 'EventBus 收到 3 条消息', `got ${received.length}`); assert(received[0].source === 'MinerU', 'EventBus 消息 source 正确'); assert(received[0].timestamp !== undefined, 'EventBus 自动添加 timestamp'); // 4.2 getRecentLogs const recent = extractionEventBus.getRecentLogs(testTaskId); assert(recent.length === 3, 'getRecentLogs 返回 3 条', `got ${recent.length}`); // 4.3 取消订阅 unsub(); extractionEventBus.emit(testTaskId, { source: 'System', message: 'After unsub', level: 'info' }); await sleep(50); assert(received.length === 3, '取消订阅后不再接收', `got ${received.length}`); // 4.4 cleanup extractionEventBus.cleanup(testTaskId); const afterCleanup = extractionEventBus.getRecentLogs(testTaskId); assert(afterCleanup.length === 0, 'cleanup 后日志清空'); } // ══════════════════════════════════════════════════════ // Phase 5: Excel 导出端到端验证 // ══════════════════════════════════════════════════════ async function phase5(ctx: { taskId: string | null }) { console.log('\n📊 Phase 5: Excel 导出端到端验证'); if (!ctx.taskId) { console.log(' ⚠️ 无可用 taskId，跳过导出测试'); return; } // 确保至少有一个 approved result const approvedCount = await prisma.aslExtractionResult.count({ where: { taskId: ctx.taskId, reviewStatus: 'approved' }, }); if (approvedCount === 0) { console.log(' ⚠️ 无 approved 结果，跳过导出测试'); return; } const exportRes = await fetchWithAuth(`/tasks/${ctx.taskId}/export`); assert(exportRes.ok, 'Excel 导出 HTTP 200'); const contentType = exportRes.headers.get('content-type') || ''; assert( contentType.includes('spreadsheet') || contentType.includes('octet-stream'), 'Content-Type 为 Excel 格式', `got ${contentType}`, ); const disposition = exportRes.headers.get('content-disposition') || ''; assert(disposition.includes('.xlsx'), 'Content-Disposition 包含 .xlsx', `got ${disposition}`); const blob = await exportRes.blob(); assert(blob.size > 100, `Excel 文件大小合理 (${blob.size} bytes)`); } // ══════════════════════════════════════════════════════ // Phase 6: 断点恢复路由验证 // ══════════════════════════════════════════════════════ function phase6() { console.log('\n🔄 Phase 6: 断点恢复路由设计验证'); // 验证路由结构设计正确性（不实际测试前端路由，只验证约定） const routes = [ '/literature/extraction/setup', '/literature/extraction/progress/some-task-id', '/literature/extraction/workbench/some-task-id', ]; for (const route of routes) { assert(route.startsWith('/literature/extraction/'), `路由前缀正确: ${route}`); } assert(routes[1].includes('/progress/'), 'Progress 路由包含 taskId'); assert(routes[2].includes('/workbench/'), 'Workbench 路由包含 taskId'); ok('断点恢复路由设计正确（刷新后 URL 可定位到正确步骤 + taskId）'); } // ══════════════════════════════════════════════════════ // Main // ══════════════════════════════════════════════════════ async function main() { console.log('═══════════════════════════════════════════'); console.log(' M2 HITL 工作台集成测试'); console.log('═══════════════════════════════════════════'); try { const ctx = await phase1(); phase2(); phase3(); await phase4(); await phase5(ctx); phase6(); } catch (error: any) { console.error('\n💥 未捕获异常:', error.message); failed++; } finally { await prisma.$disconnect(); } console.log('\n═══════════════════════════════════════════'); console.log(` 结果: ✅ ${passed} 通过, ❌ ${failed} 失败`); console.log('═══════════════════════════════════════════'); process.exit(failed > 0 ? 1 : 0); } main();