Files
AIclinicalresearch/backend/scripts/test-stroke-screening.ts
HaHafeng 3634933ece refactor(asl): ASL frontend architecture refactoring with left navigation
- feat: Create ASLLayout component with 7-module left navigation
- feat: Implement Title Screening Settings page with optimized PICOS layout
- feat: Add placeholder pages for Workbench and Results
- fix: Fix nested routing structure for React Router v6
- fix: Resolve Spin component warning in MainLayout
- fix: Add QueryClientProvider to App.tsx
- style: Optimize PICOS form layout (P+I left, C+O+S right)
- style: Align Inclusion/Exclusion criteria side-by-side
- docs: Add architecture refactoring and routing fix reports

Ref: Week 2 Frontend Development
Scope: ASL module MVP - Title Abstract Screening
2025-11-18 21:51:51 +08:00

294 lines
11 KiB
TypeScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* 卒中文献筛选测试脚本
* 用真实数据验证泛化能力
*/
import XLSX from 'xlsx';
import * as path from 'path';
import { fileURLToPath } from 'url';
import { llmScreeningService } from '../src/modules/asl/services/llmScreeningService.js';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
// 卒中研究的PICOS从测试文档读取
const STROKE_PICOS = {
population: "非心源性缺血性卒中NCIS患者、亚洲人群",
intervention: "抗血小板治疗药物阿司匹林、氯吡格雷、奥扎格雷、贝前列素、西洛他唑、替罗非班、替格瑞洛、吲哚布芬、沙格雷酯、氯吡格雷阿司匹林、双嘧达莫等或抗凝药物阿加曲班、asundexian、milvexian、华法林、低分子肝素、肝素等或溶栓药物链激酶、尿激酶、阿替普酶、替奈普酶等",
comparison: "安慰剂或常规治疗",
outcome: "疗效安全性卒中进展、神经功能恶化、卒中复发、残疾、死亡、NIHSS评分变化、VTE、痴呆、认知功能减退、疲乏、抑郁等",
studyDesign: "系统评价SR、随机对照试验RCT、真实世界研究RWE、观察性研究OBS"
};
// 纳入标准
const INCLUSION_CRITERIA = `
1. 非心源性缺血性卒中、亚洲患者
2. 卒中后接受二级预防治疗的患者Secondary Stroke Prevention, SSP
3. 干预措施为抗血小板、抗凝或溶栓药物
4. 报告疗效或安全性结局(卒中进展、复发、残疾、死亡等)
5. 研究类型系统评价、RCT、真实世界研究、观察性研究
6. 研究时间2020年之后的文献
7. 包含"二级预防"或"预防复发"或"卒中预防"相关内容
8. 涉及抗血小板或抗凝药物
`;
// 排除标准
const EXCLUSION_CRITERIA = `
1. 心源性卒中患者、非亚洲人群
2. 其他类型卒中(非缺血性)
3. 用于急性冠脉综合征ACS的抗血小板治疗未明确提及卒中
4. 房颤AF患者
5. 混合人群(包含非卒中患者)
6. 病例报告
7. 非中英文文献
8. 仅包含急性期治疗(如急性期溶栓、取栓),未涉及二级预防
`;
interface TestCase {
index: number;
pmid: string;
title: string;
abstract: string;
humanDecision: string; // Include/Exclude
excludeReason?: string;
}
async function readExcelTestCases(filePath: string, limit: number = 5): Promise<TestCase[]> {
console.log(`📖 读取Excel文件: ${filePath}`);
const workbook = XLSX.readFile(filePath);
const sheetName = workbook.SheetNames[0];
const worksheet = workbook.Sheets[sheetName];
const data = XLSX.utils.sheet_to_json(worksheet);
console.log(`✅ 读取到 ${data.length} 条数据`);
// 分别提取Included和Excluded的案例混合测试
const includedCases: any[] = [];
const excludedCases: any[] = [];
for (const row of data as any[]) {
// 跳过没有标题或摘要的行
if (!row['title'] || !row['abstract']) {
continue;
}
if (row['Decision'] && row['Decision'].toLowerCase().includes('include')) {
includedCases.push(row);
} else if (row['Decision'] && row['Decision'].toLowerCase().includes('exclude')) {
excludedCases.push(row);
}
}
console.log(` - Included案例: ${includedCases.length}`);
console.log(` - Excluded案例: ${excludedCases.length}`);
// 混合选择2个Included + 3个Excluded
const testCases: TestCase[] = [];
// 取前2个Included
for (let i = 0; i < Math.min(2, includedCases.length); i++) {
const row = includedCases[i];
testCases.push({
index: testCases.length + 1,
pmid: row['key'] || `test-${testCases.length + 1}`,
title: row['title'] || '',
abstract: row['abstract'] || '',
humanDecision: row['Decision'] || 'Unknown',
excludeReason: row['Reason for excluded'] || undefined
});
}
// 取前3个Excluded
for (let i = 0; i < Math.min(3, excludedCases.length); i++) {
const row = excludedCases[i];
testCases.push({
index: testCases.length + 1,
pmid: row['key'] || `test-${testCases.length + 1}`,
title: row['title'] || '',
abstract: row['abstract'] || '',
humanDecision: row['Decision'] || 'Unknown',
excludeReason: row['Reason for excluded'] || undefined
});
}
console.log(`✅ 提取 ${testCases.length} 条有效测试案例 (${testCases.filter(t => t.humanDecision.toLowerCase().includes('include')).length} Included + ${testCases.filter(t => t.humanDecision.toLowerCase().includes('exclude')).length} Excluded)\n`);
return testCases;
}
async function testSingleLiterature(
testCase: TestCase,
models: [string, string]
): Promise<{
testCase: TestCase;
aiDecision: string;
isCorrect: boolean;
hasConsensus: boolean;
details: any;
}> {
console.log(`\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
console.log(`[${testCase.index}] PMID: ${testCase.pmid}`);
console.log(`标题: ${testCase.title.substring(0, 100)}...`);
console.log(`人类判断: ${testCase.humanDecision}`);
try {
const startTime = Date.now();
const result = await llmScreeningService.dualModelScreening(
testCase.pmid || `test-${testCase.index}`,
testCase.title,
testCase.abstract,
STROKE_PICOS,
INCLUSION_CRITERIA,
EXCLUSION_CRITERIA,
models
);
const duration = Date.now() - startTime;
// 映射AI决策到Include/Exclude
let aiDecision = 'Unknown';
if (result.finalDecision === 'include') {
aiDecision = 'Include';
} else if (result.finalDecision === 'exclude') {
aiDecision = 'Exclude';
} else {
aiDecision = 'Uncertain';
}
// 标准化比较处理Included/Include, Excluded/Exclude的差异
const normalizeDecision = (decision: string) => {
const lower = decision.toLowerCase();
if (lower.includes('include')) return 'include';
if (lower.includes('exclude')) return 'exclude';
return lower;
};
const isCorrect = normalizeDecision(aiDecision) === normalizeDecision(testCase.humanDecision);
console.log(`AI判断: ${aiDecision}`);
console.log(`DeepSeek: ${result.deepseek.conclusion} (置信度: ${result.deepseek.confidence})`);
console.log(`Qwen: ${result.qwen.conclusion} (置信度: ${result.qwen.confidence})`);
console.log(`一致性: ${result.hasConflict ? '❌ 冲突' : '✅ 一致'}`);
console.log(`结果: ${isCorrect ? '✅ 正确' : '❌ 错误'}`);
console.log(`耗时: ${duration}ms`);
if (!isCorrect) {
console.log(`\n❌ 判断错误!`);
console.log(`期望: ${testCase.humanDecision}`);
console.log(`实际: ${aiDecision}`);
if (testCase.excludeReason) {
console.log(`人类排除理由: ${testCase.excludeReason}`);
}
console.log(`DeepSeek理由: ${result.deepseek.reason}`);
console.log(`Qwen理由: ${result.qwen.reason}`);
}
return {
testCase,
aiDecision,
isCorrect,
hasConsensus: !result.hasConflict,
details: result
};
} catch (error) {
console.error(`❌ 测试失败:`, error);
return {
testCase,
aiDecision: 'Error',
isCorrect: false,
hasConsensus: false,
details: null
};
}
}
async function main() {
console.log('\n🔬 卒中文献筛选测试');
console.log('=' .repeat(60));
console.log('目的: 验证系统对不同研究主题的泛化能力\n');
// 读取测试数据
const excelPath = path.join(__dirname, '../docs/03-业务模块/ASL-AI智能文献/05-测试文档/03-测试数据/screening/Test Cases.xlsx');
let testCases: TestCase[];
try {
testCases = await readExcelTestCases(excelPath, 5);
} catch (error: any) {
console.error('❌ 读取Excel失败尝试使用绝对路径...');
const absolutePath = 'D:\\MyCursor\\AIclinicalresearch\\docs\\03-业务模块\\ASL-AI智能文献\\05-测试文档\\03-测试数据\\screening\\Test Cases.xlsx';
testCases = await readExcelTestCases(absolutePath, 5);
}
if (testCases.length === 0) {
console.error('❌ 没有读取到有效的测试案例');
return;
}
console.log('📋 PICOS标准:');
console.log(`P: ${STROKE_PICOS.population}`);
console.log(`I: ${STROKE_PICOS.intervention.substring(0, 80)}...`);
console.log(`C: ${STROKE_PICOS.comparison}`);
console.log(`O: ${STROKE_PICOS.outcome.substring(0, 80)}...`);
console.log(`S: ${STROKE_PICOS.studyDesign}`);
console.log('\n🚀 开始测试...');
console.log(`测试样本数: ${testCases.length}`);
console.log(`测试模型: DeepSeek-V3 + Qwen-Max\n`);
const results: any[] = [];
for (const testCase of testCases) {
const result = await testSingleLiterature(testCase, ['deepseek-chat', 'qwen-max']);
results.push(result);
// 避免API限流
if (testCases.indexOf(testCase) < testCases.length - 1) {
await new Promise(resolve => setTimeout(resolve, 2000));
}
}
// 统计结果
console.log('\n\n' + '='.repeat(60));
console.log('📊 测试结果统计');
console.log('='.repeat(60));
const totalTests = results.length;
const correctCount = results.filter(r => r.isCorrect).length;
const consensusCount = results.filter(r => r.hasConsensus).length;
const accuracy = totalTests > 0 ? (correctCount / totalTests * 100).toFixed(1) : '0.0';
const consensusRate = totalTests > 0 ? (consensusCount / totalTests * 100).toFixed(1) : '0.0';
console.log(`\n总测试数: ${totalTests}`);
console.log(`正确判断: ${correctCount}`);
console.log(`准确率: ${accuracy}% ${parseFloat(accuracy) >= 85 ? '✅' : '❌'} (目标≥85%)`);
console.log(`双模型一致率: ${consensusRate}% ${parseFloat(consensusRate) >= 80 ? '✅' : '❌'} (目标≥80%)`);
console.log('\n📋 详细结果:');
results.forEach((r, i) => {
console.log(`${i + 1}. ${r.isCorrect ? '✅' : '❌'} PMID:${r.testCase.pmid} - 期望:${r.testCase.humanDecision}, AI:${r.aiDecision}`);
});
// 结论
console.log('\n' + '='.repeat(60));
console.log('🎯 结论');
console.log('='.repeat(60));
if (parseFloat(accuracy) >= 85) {
console.log('✅ 测试通过!系统对卒中研究的筛选准确率达标!');
console.log('📝 建议: 可以继续开发PICOS配置界面实现MVP。');
} else if (parseFloat(accuracy) >= 60) {
console.log('⚠️ 准确率中等。系统有一定泛化能力,但需要优化。');
console.log('📝 建议: 分析错误案例优化Prompt模板。');
} else {
console.log('❌ 准确率较低。当前Prompt对卒中研究泛化能力不足。');
console.log('📝 建议: 需要重新设计Prompt策略或考虑用户自定义方案。');
}
console.log('='.repeat(60) + '\n');
}
main().catch(console.error);