- feat: Create ASLLayout component with 7-module left navigation - feat: Implement Title Screening Settings page with optimized PICOS layout - feat: Add placeholder pages for Workbench and Results - fix: Fix nested routing structure for React Router v6 - fix: Resolve Spin component warning in MainLayout - fix: Add QueryClientProvider to App.tsx - style: Optimize PICOS form layout (P+I left, C+O+S right) - style: Align Inclusion/Exclusion criteria side-by-side - docs: Add architecture refactoring and routing fix reports Ref: Week 2 Frontend Development Scope: ASL module MVP - Title Abstract Screening
294 lines
11 KiB
TypeScript
294 lines
11 KiB
TypeScript
/**
|
||
* 卒中文献筛选测试脚本
|
||
* 用真实数据验证泛化能力
|
||
*/
|
||
|
||
import XLSX from 'xlsx';
|
||
import * as path from 'path';
|
||
import { fileURLToPath } from 'url';
|
||
import { llmScreeningService } from '../src/modules/asl/services/llmScreeningService.js';
|
||
|
||
const __filename = fileURLToPath(import.meta.url);
|
||
const __dirname = path.dirname(__filename);
|
||
|
||
// 卒中研究的PICOS(从测试文档读取)
|
||
const STROKE_PICOS = {
|
||
population: "非心源性缺血性卒中(NCIS)患者、亚洲人群",
|
||
intervention: "抗血小板治疗药物(阿司匹林、氯吡格雷、奥扎格雷、贝前列素、西洛他唑、替罗非班、替格瑞洛、吲哚布芬、沙格雷酯、氯吡格雷阿司匹林、双嘧达莫等)或抗凝药物(阿加曲班、asundexian、milvexian、华法林、低分子肝素、肝素等)或溶栓药物(链激酶、尿激酶、阿替普酶、替奈普酶等)",
|
||
comparison: "安慰剂或常规治疗",
|
||
outcome: "疗效安全性:卒中进展、神经功能恶化、卒中复发、残疾、死亡、NIHSS评分变化、VTE、痴呆、认知功能减退、疲乏、抑郁等",
|
||
studyDesign: "系统评价(SR)、随机对照试验(RCT)、真实世界研究(RWE)、观察性研究(OBS)"
|
||
};
|
||
|
||
// 纳入标准
|
||
const INCLUSION_CRITERIA = `
|
||
1. 非心源性缺血性卒中、亚洲患者
|
||
2. 卒中后接受二级预防治疗的患者(Secondary Stroke Prevention, SSP)
|
||
3. 干预措施为抗血小板、抗凝或溶栓药物
|
||
4. 报告疗效或安全性结局(卒中进展、复发、残疾、死亡等)
|
||
5. 研究类型:系统评价、RCT、真实世界研究、观察性研究
|
||
6. 研究时间:2020年之后的文献
|
||
7. 包含"二级预防"或"预防复发"或"卒中预防"相关内容
|
||
8. 涉及抗血小板或抗凝药物
|
||
`;
|
||
|
||
// 排除标准
|
||
const EXCLUSION_CRITERIA = `
|
||
1. 心源性卒中患者、非亚洲人群
|
||
2. 其他类型卒中(非缺血性)
|
||
3. 用于急性冠脉综合征(ACS)的抗血小板治疗,未明确提及卒中
|
||
4. 房颤(AF)患者
|
||
5. 混合人群(包含非卒中患者)
|
||
6. 病例报告
|
||
7. 非中英文文献
|
||
8. 仅包含急性期治疗(如急性期溶栓、取栓),未涉及二级预防
|
||
`;
|
||
|
||
interface TestCase {
|
||
index: number;
|
||
pmid: string;
|
||
title: string;
|
||
abstract: string;
|
||
humanDecision: string; // Include/Exclude
|
||
excludeReason?: string;
|
||
}
|
||
|
||
async function readExcelTestCases(filePath: string, limit: number = 5): Promise<TestCase[]> {
|
||
console.log(`📖 读取Excel文件: ${filePath}`);
|
||
|
||
const workbook = XLSX.readFile(filePath);
|
||
const sheetName = workbook.SheetNames[0];
|
||
const worksheet = workbook.Sheets[sheetName];
|
||
const data = XLSX.utils.sheet_to_json(worksheet);
|
||
|
||
console.log(`✅ 读取到 ${data.length} 条数据`);
|
||
|
||
// 分别提取Included和Excluded的案例(混合测试)
|
||
const includedCases: any[] = [];
|
||
const excludedCases: any[] = [];
|
||
|
||
for (const row of data as any[]) {
|
||
// 跳过没有标题或摘要的行
|
||
if (!row['title'] || !row['abstract']) {
|
||
continue;
|
||
}
|
||
|
||
if (row['Decision'] && row['Decision'].toLowerCase().includes('include')) {
|
||
includedCases.push(row);
|
||
} else if (row['Decision'] && row['Decision'].toLowerCase().includes('exclude')) {
|
||
excludedCases.push(row);
|
||
}
|
||
}
|
||
|
||
console.log(` - Included案例: ${includedCases.length}条`);
|
||
console.log(` - Excluded案例: ${excludedCases.length}条`);
|
||
|
||
// 混合选择:2个Included + 3个Excluded
|
||
const testCases: TestCase[] = [];
|
||
|
||
// 取前2个Included
|
||
for (let i = 0; i < Math.min(2, includedCases.length); i++) {
|
||
const row = includedCases[i];
|
||
testCases.push({
|
||
index: testCases.length + 1,
|
||
pmid: row['key'] || `test-${testCases.length + 1}`,
|
||
title: row['title'] || '',
|
||
abstract: row['abstract'] || '',
|
||
humanDecision: row['Decision'] || 'Unknown',
|
||
excludeReason: row['Reason for excluded'] || undefined
|
||
});
|
||
}
|
||
|
||
// 取前3个Excluded
|
||
for (let i = 0; i < Math.min(3, excludedCases.length); i++) {
|
||
const row = excludedCases[i];
|
||
testCases.push({
|
||
index: testCases.length + 1,
|
||
pmid: row['key'] || `test-${testCases.length + 1}`,
|
||
title: row['title'] || '',
|
||
abstract: row['abstract'] || '',
|
||
humanDecision: row['Decision'] || 'Unknown',
|
||
excludeReason: row['Reason for excluded'] || undefined
|
||
});
|
||
}
|
||
|
||
console.log(`✅ 提取 ${testCases.length} 条有效测试案例 (${testCases.filter(t => t.humanDecision.toLowerCase().includes('include')).length} Included + ${testCases.filter(t => t.humanDecision.toLowerCase().includes('exclude')).length} Excluded)\n`);
|
||
return testCases;
|
||
}
|
||
|
||
async function testSingleLiterature(
|
||
testCase: TestCase,
|
||
models: [string, string]
|
||
): Promise<{
|
||
testCase: TestCase;
|
||
aiDecision: string;
|
||
isCorrect: boolean;
|
||
hasConsensus: boolean;
|
||
details: any;
|
||
}> {
|
||
console.log(`\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
|
||
console.log(`[${testCase.index}] PMID: ${testCase.pmid}`);
|
||
console.log(`标题: ${testCase.title.substring(0, 100)}...`);
|
||
console.log(`人类判断: ${testCase.humanDecision}`);
|
||
|
||
try {
|
||
const startTime = Date.now();
|
||
|
||
const result = await llmScreeningService.dualModelScreening(
|
||
testCase.pmid || `test-${testCase.index}`,
|
||
testCase.title,
|
||
testCase.abstract,
|
||
STROKE_PICOS,
|
||
INCLUSION_CRITERIA,
|
||
EXCLUSION_CRITERIA,
|
||
models
|
||
);
|
||
|
||
const duration = Date.now() - startTime;
|
||
|
||
// 映射AI决策到Include/Exclude
|
||
let aiDecision = 'Unknown';
|
||
if (result.finalDecision === 'include') {
|
||
aiDecision = 'Include';
|
||
} else if (result.finalDecision === 'exclude') {
|
||
aiDecision = 'Exclude';
|
||
} else {
|
||
aiDecision = 'Uncertain';
|
||
}
|
||
|
||
// 标准化比较(处理Included/Include, Excluded/Exclude的差异)
|
||
const normalizeDecision = (decision: string) => {
|
||
const lower = decision.toLowerCase();
|
||
if (lower.includes('include')) return 'include';
|
||
if (lower.includes('exclude')) return 'exclude';
|
||
return lower;
|
||
};
|
||
|
||
const isCorrect = normalizeDecision(aiDecision) === normalizeDecision(testCase.humanDecision);
|
||
|
||
console.log(`AI判断: ${aiDecision}`);
|
||
console.log(`DeepSeek: ${result.deepseek.conclusion} (置信度: ${result.deepseek.confidence})`);
|
||
console.log(`Qwen: ${result.qwen.conclusion} (置信度: ${result.qwen.confidence})`);
|
||
console.log(`一致性: ${result.hasConflict ? '❌ 冲突' : '✅ 一致'}`);
|
||
console.log(`结果: ${isCorrect ? '✅ 正确' : '❌ 错误'}`);
|
||
console.log(`耗时: ${duration}ms`);
|
||
|
||
if (!isCorrect) {
|
||
console.log(`\n❌ 判断错误!`);
|
||
console.log(`期望: ${testCase.humanDecision}`);
|
||
console.log(`实际: ${aiDecision}`);
|
||
if (testCase.excludeReason) {
|
||
console.log(`人类排除理由: ${testCase.excludeReason}`);
|
||
}
|
||
console.log(`DeepSeek理由: ${result.deepseek.reason}`);
|
||
console.log(`Qwen理由: ${result.qwen.reason}`);
|
||
}
|
||
|
||
return {
|
||
testCase,
|
||
aiDecision,
|
||
isCorrect,
|
||
hasConsensus: !result.hasConflict,
|
||
details: result
|
||
};
|
||
|
||
} catch (error) {
|
||
console.error(`❌ 测试失败:`, error);
|
||
return {
|
||
testCase,
|
||
aiDecision: 'Error',
|
||
isCorrect: false,
|
||
hasConsensus: false,
|
||
details: null
|
||
};
|
||
}
|
||
}
|
||
|
||
async function main() {
|
||
console.log('\n🔬 卒中文献筛选测试');
|
||
console.log('=' .repeat(60));
|
||
console.log('目的: 验证系统对不同研究主题的泛化能力\n');
|
||
|
||
// 读取测试数据
|
||
const excelPath = path.join(__dirname, '../docs/03-业务模块/ASL-AI智能文献/05-测试文档/03-测试数据/screening/Test Cases.xlsx');
|
||
|
||
let testCases: TestCase[];
|
||
try {
|
||
testCases = await readExcelTestCases(excelPath, 5);
|
||
} catch (error: any) {
|
||
console.error('❌ 读取Excel失败,尝试使用绝对路径...');
|
||
const absolutePath = 'D:\\MyCursor\\AIclinicalresearch\\docs\\03-业务模块\\ASL-AI智能文献\\05-测试文档\\03-测试数据\\screening\\Test Cases.xlsx';
|
||
testCases = await readExcelTestCases(absolutePath, 5);
|
||
}
|
||
|
||
if (testCases.length === 0) {
|
||
console.error('❌ 没有读取到有效的测试案例');
|
||
return;
|
||
}
|
||
|
||
console.log('📋 PICOS标准:');
|
||
console.log(`P: ${STROKE_PICOS.population}`);
|
||
console.log(`I: ${STROKE_PICOS.intervention.substring(0, 80)}...`);
|
||
console.log(`C: ${STROKE_PICOS.comparison}`);
|
||
console.log(`O: ${STROKE_PICOS.outcome.substring(0, 80)}...`);
|
||
console.log(`S: ${STROKE_PICOS.studyDesign}`);
|
||
|
||
console.log('\n🚀 开始测试...');
|
||
console.log(`测试样本数: ${testCases.length}`);
|
||
console.log(`测试模型: DeepSeek-V3 + Qwen-Max\n`);
|
||
|
||
const results: any[] = [];
|
||
|
||
for (const testCase of testCases) {
|
||
const result = await testSingleLiterature(testCase, ['deepseek-chat', 'qwen-max']);
|
||
results.push(result);
|
||
|
||
// 避免API限流
|
||
if (testCases.indexOf(testCase) < testCases.length - 1) {
|
||
await new Promise(resolve => setTimeout(resolve, 2000));
|
||
}
|
||
}
|
||
|
||
// 统计结果
|
||
console.log('\n\n' + '='.repeat(60));
|
||
console.log('📊 测试结果统计');
|
||
console.log('='.repeat(60));
|
||
|
||
const totalTests = results.length;
|
||
const correctCount = results.filter(r => r.isCorrect).length;
|
||
const consensusCount = results.filter(r => r.hasConsensus).length;
|
||
const accuracy = totalTests > 0 ? (correctCount / totalTests * 100).toFixed(1) : '0.0';
|
||
const consensusRate = totalTests > 0 ? (consensusCount / totalTests * 100).toFixed(1) : '0.0';
|
||
|
||
console.log(`\n总测试数: ${totalTests}`);
|
||
console.log(`正确判断: ${correctCount}`);
|
||
console.log(`准确率: ${accuracy}% ${parseFloat(accuracy) >= 85 ? '✅' : '❌'} (目标≥85%)`);
|
||
console.log(`双模型一致率: ${consensusRate}% ${parseFloat(consensusRate) >= 80 ? '✅' : '❌'} (目标≥80%)`);
|
||
|
||
console.log('\n📋 详细结果:');
|
||
results.forEach((r, i) => {
|
||
console.log(`${i + 1}. ${r.isCorrect ? '✅' : '❌'} PMID:${r.testCase.pmid} - 期望:${r.testCase.humanDecision}, AI:${r.aiDecision}`);
|
||
});
|
||
|
||
// 结论
|
||
console.log('\n' + '='.repeat(60));
|
||
console.log('🎯 结论');
|
||
console.log('='.repeat(60));
|
||
|
||
if (parseFloat(accuracy) >= 85) {
|
||
console.log('✅ 测试通过!系统对卒中研究的筛选准确率达标!');
|
||
console.log('📝 建议: 可以继续开发PICOS配置界面,实现MVP。');
|
||
} else if (parseFloat(accuracy) >= 60) {
|
||
console.log('⚠️ 准确率中等。系统有一定泛化能力,但需要优化。');
|
||
console.log('📝 建议: 分析错误案例,优化Prompt模板。');
|
||
} else {
|
||
console.log('❌ 准确率较低。当前Prompt对卒中研究泛化能力不足。');
|
||
console.log('📝 建议: 需要重新设计Prompt策略,或考虑用户自定义方案。');
|
||
}
|
||
|
||
console.log('='.repeat(60) + '\n');
|
||
}
|
||
|
||
main().catch(console.error);
|
||
|