refactor(asl): ASL frontend architecture refactoring with left navigation

- feat: Create ASLLayout component with 7-module left navigation - feat: Implement Title Screening Settings page with optimized PICOS layout - feat: Add placeholder pages for Workbench and Results - fix: Fix nested routing structure for React Router v6 - fix: Resolve Spin component warning in MainLayout - fix: Add QueryClientProvider to App.tsx - style: Optimize PICOS form layout (P+I left, C+O+S right) - style: Align Inclusion/Exclusion criteria side-by-side - docs: Add architecture refactoring and routing fix reports Ref: Week 2 Frontend Development Scope: ASL module MVP - Title Abstract Screening
2025-11-18 21:51:51 +08:00
parent e3e7e028e8
commit 3634933ece
213 changed files with 20054 additions and 442 deletions
--- a/backend/scripts/test-stroke-screening-lenient.ts
+++ b/backend/scripts/test-stroke-screening-lenient.ts
@@ -0,0 +1,205 @@
+/**
+ * 卒中数据测试 - 宽松模式
+ * 
+ * 测试目的：验证宽松Prompt是否能提高初筛准确率
+ * 
+ * 策略：
+ * - 宁可多纳入，也不要错过
+ * - 只排除明显不符合的
+ * - 边界情况倾向于纳入
+ */
+
+import * as fs from 'fs';
+import * as path from 'path';
+import * as XLSX from 'xlsx';
+import { fileURLToPath } from 'url';
+import { llmScreeningService } from '../src/modules/asl/services/llmScreeningService.js';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+
+// 读取PICOS
+const picoCriteria = {
+  population: '非心源性缺血性卒中患者、亚洲人群',
+  intervention: '抗血小板药物/抗凝药物/溶栓药物（阿司匹林、氯吡格雷、替格瑞洛、达比加群等）',
+  comparison: '安慰剂或常规治疗',
+  outcome: '卒中进展、复发、残疾程度、死亡率、出血事件等',
+  studyDesign: 'SR、RCT、RWE、OBS'
+};
+
+const inclusionCriteria = `
+1. 研究对象为非心源性缺血性卒中患者
+2. 研究人群为亚洲人群（优先）
+3. 干预措施为抗血小板/抗凝/溶栓药物
+4. 对照组为安慰剂或常规治疗
+5. 研究时间在2020年之后
+6. 研究设计为SR、RCT、RWE、OBS
+`;
+
+const exclusionCriteria = `
+1. 综述、病例报告、会议摘要
+2. 动物实验、体外实验
+3. 研究人群非亚洲人群（除非有特殊价值）
+4. 研究时间在2020年之前
+5. 心源性卒中或出血性卒中
+`;
+
+// 读取测试案例
+const excelPath = path.join(
+  __dirname,
+  '../../docs/03-业务模块/ASL-AI智能文献/05-测试文档/03-测试数据/screening/Test Cases.xlsx'
+);
+
+const workbook = XLSX.read(fs.readFileSync(excelPath), { type: 'buffer' });
+const data = XLSX.utils.sheet_to_json(workbook.Sheets[workbook.SheetNames[0]]);
+
+// 选择测试样本
+const includedCases = data.filter((row: any) => 
+  row['Decision']?.toString().toLowerCase().includes('include')
+).slice(0, 2);
+
+const excludedCases = data.filter((row: any) => 
+  row['Decision']?.toString().toLowerCase().includes('exclude')
+).slice(0, 3);
+
+const testCases = [...includedCases, ...excludedCases];
+
+console.log('\n🚀 开始宽松模式测试\n');
+console.log(`📊 测试配置:`);
+console.log(`   - 模型组合: DeepSeek-V3 + Qwen-Max`);
+console.log(`   - 筛选风格: 宽松模式（lenient）`);
+console.log(`   - 测试样本: ${testCases.length}篇\n`);
+
+interface TestResult {
+  caseIndex: number;
+  title: string;
+  humanDecision: string;
+  aiDecision: string;
+  model1Conclusion: string;
+  model2Conclusion: string;
+  isCorrect: boolean;
+  hasConflict: boolean;
+  confidence: number;
+  reason: string;
+}
+
+async function runTest() {
+  const results: TestResult[] = [];
+  
+  for (let i = 0; i < testCases.length; i++) {
+    const testCase = testCases[i];
+    const title = testCase['title'] || '';
+    const abstract = testCase['abstract'] || '';
+    const humanDecision = testCase['Decision'] || '';
+    
+    console.log(`[${i + 1}/${testCases.length}] 正在筛选...`);
+    console.log(`标题: ${title.substring(0, 60)}...`);
+    console.log(`人类决策: ${humanDecision}`);
+    
+    try {
+      const screeningResult = await llmScreeningService.dualModelScreening(
+        `test-case-${i + 1}`,
+        title,
+        abstract,
+        picoCriteria,
+        inclusionCriteria,
+        exclusionCriteria,
+        ['deepseek-chat', 'qwen-max'],
+        'lenient'  // ⭐ 使用宽松模式
+      );
+      
+      const normalizedHuman = humanDecision.toLowerCase().includes('include') ? 'include' : 'exclude';
+      const normalizedAI = screeningResult.finalDecision === 'pending' ? 'uncertain' : screeningResult.finalDecision;
+      const isCorrect = normalizedAI === normalizedHuman;
+      
+      console.log(`AI决策: ${screeningResult.finalDecision} ${isCorrect ? '✅' : '❌'}`);
+      console.log(`模型一致: ${!screeningResult.hasConflict ? '✅' : '❌'}`);
+      console.log(`置信度: ${screeningResult.deepseek.confidence.toFixed(2)}\n`);
+      
+      results.push({
+        caseIndex: i + 1,
+        title: title.substring(0, 100),
+        humanDecision: normalizedHuman,
+        aiDecision: normalizedAI,
+        model1Conclusion: screeningResult.deepseek.conclusion,
+        model2Conclusion: screeningResult.qwen.conclusion,
+        isCorrect,
+        hasConflict: screeningResult.hasConflict,
+        confidence: screeningResult.deepseek.confidence,
+        reason: screeningResult.deepseek.reason
+      });
+      
+    } catch (error: any) {
+      console.error(`❌ 筛选失败: ${error.message}\n`);
+    }
+  }
+  
+  // 生成对比报告
+  console.log('\n' + '='.repeat(80));
+  console.log('📊 宽松模式测试报告');
+  console.log('='.repeat(80) + '\n');
+  
+  const correct = results.filter(r => r.isCorrect).length;
+  const consistent = results.filter(r => !r.hasConflict).length;
+  const avgConfidence = results.reduce((sum, r) => sum + r.confidence, 0) / results.length;
+  
+  console.log(`✅ 准确率: ${(correct / results.length * 100).toFixed(1)}% (${correct}/${results.length})`);
+  console.log(`✅ 一致率: ${(consistent / results.length * 100).toFixed(1)}% (${consistent}/${results.length})`);
+  console.log(`✅ 平均置信度: ${avgConfidence.toFixed(2)}\n`);
+  
+  // 按人类决策分组统计
+  const includedResults = results.filter(r => r.humanDecision === 'include');
+  const excludedResults = results.filter(r => r.humanDecision === 'exclude');
+  
+  const includedCorrect = includedResults.filter(r => r.isCorrect).length;
+  const excludedCorrect = excludedResults.filter(r => r.isCorrect).length;
+  
+  console.log('📋 分类准确率:');
+  console.log(`   应纳入文献 (Included): ${(includedCorrect / includedResults.length * 100).toFixed(1)}% (${includedCorrect}/${includedResults.length})`);
+  console.log(`   应排除文献 (Excluded): ${(excludedCorrect / excludedResults.length * 100).toFixed(1)}% (${excludedCorrect}/${excludedResults.length})\n`);
+  
+  // 详细案例分析
+  console.log('📝 详细案例分析:\n');
+  results.forEach(r => {
+    const status = r.isCorrect ? '✅ 正确' : '❌ 错误';
+    console.log(`[案例 ${r.caseIndex}] ${status}`);
+    console.log(`  标题: ${r.title}`);
+    console.log(`  人类决策: ${r.humanDecision}`);
+    console.log(`  AI决策: ${r.aiDecision}`);
+    console.log(`  模型1: ${r.model1Conclusion}, 模型2: ${r.model2Conclusion}`);
+    console.log(`  置信度: ${r.confidence.toFixed(2)}`);
+    if (!r.isCorrect) {
+      console.log(`  AI理由: ${r.reason.substring(0, 150)}...`);
+    }
+    console.log('');
+  });
+  
+  // 与标准模式对比
+  console.log('='.repeat(80));
+  console.log('🔄 与标准模式对比\n');
+  console.log('| 指标 | 标准模式 | 宽松模式 | 改进 |');
+  console.log('|------|----------|----------|------|');
+  console.log(`| 准确率 | 60% | ${(correct / results.length * 100).toFixed(1)}% | ${(correct / results.length * 100 - 60).toFixed(1)}% |`);
+  console.log(`| 召回率(Included) | 0% | ${(includedCorrect / includedResults.length * 100).toFixed(1)}% | ${(includedCorrect / includedResults.length * 100).toFixed(1)}% |`);
+  console.log(`| 排除准确率 | 100% | ${(excludedCorrect / excludedResults.length * 100).toFixed(1)}% | ${(excludedCorrect / excludedResults.length * 100 - 100).toFixed(1)}% |`);
+  console.log('\n' + '='.repeat(80));
+  
+  // 结论
+  if (correct / results.length >= 0.8) {
+    console.log('\n🎉 宽松模式效果显著！准确率≥80%');
+    console.log('💡 建议: 初筛使用宽松模式，全文复筛使用严格模式');
+  } else if (correct / results.length >= 0.6) {
+    console.log('\n⚠️ 宽松模式有改进，但仍需优化');
+    console.log('💡 建议: 继续调整Prompt或考虑增加Few-shot示例');
+  } else {
+    console.log('\n❌ 宽松模式改进有限');
+    console.log('💡 建议: 问题不在宽松/严格，而在PICOS标准的理解差异');
+    console.log('   → 需要实现用户自定义边界情况功能');
+  }
+  
+  console.log('\n✅ 测试完成！\n');
+}
+
+runTest().catch(console.error);
+
+