Files
AIclinicalresearch/backend/scripts/test-stroke-screening-lenient.ts
HaHafeng 2e8699c217 feat(asl): Week 2 Day 2 - Excel import with template download and intelligent dedup
Features:
- feat: Excel template generation and download (with examples)
- feat: Excel file parsing in memory (cloud-native, no disk write)
- feat: Field validation (title + abstract required)
- feat: Smart deduplication (DOI priority + Title fallback)
- feat: Literature preview table with statistics
- feat: Complete submission flow (create project + import literatures)

Components:
- feat: Create excelUtils.ts with full Excel processing toolkit
- feat: Enhance TitleScreeningSettings page with upload/preview/submit
- feat: Update API interface signatures and export unified aslApi object

Dependencies:
- chore: Add xlsx library for Excel file processing

Ref: Week 2 Frontend Development - Day 2
Scope: ASL Module MVP - Title Abstract Screening
Cloud-Native: Memory parsing, no file persistence
2025-11-19 10:24:47 +08:00

207 lines
7.9 KiB
TypeScript
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* 卒中数据测试 - 宽松模式
*
* 测试目的验证宽松Prompt是否能提高初筛准确率
*
* 策略:
* - 宁可多纳入,也不要错过
* - 只排除明显不符合的
* - 边界情况倾向于纳入
*/
import * as fs from 'fs';
import * as path from 'path';
import * as XLSX from 'xlsx';
import { fileURLToPath } from 'url';
import { llmScreeningService } from '../src/modules/asl/services/llmScreeningService.js';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
// 读取PICOS
const picoCriteria = {
population: '非心源性缺血性卒中患者、亚洲人群',
intervention: '抗血小板药物/抗凝药物/溶栓药物(阿司匹林、氯吡格雷、替格瑞洛、达比加群等)',
comparison: '安慰剂或常规治疗',
outcome: '卒中进展、复发、残疾程度、死亡率、出血事件等',
studyDesign: 'SR、RCT、RWE、OBS'
};
const inclusionCriteria = `
1. 研究对象为非心源性缺血性卒中患者
2. 研究人群为亚洲人群(优先)
3. 干预措施为抗血小板/抗凝/溶栓药物
4. 对照组为安慰剂或常规治疗
5. 研究时间在2020年之后
6. 研究设计为SR、RCT、RWE、OBS
`;
const exclusionCriteria = `
1. 综述、病例报告、会议摘要
2. 动物实验、体外实验
3. 研究人群非亚洲人群(除非有特殊价值)
4. 研究时间在2020年之前
5. 心源性卒中或出血性卒中
`;
// 读取测试案例
const excelPath = path.join(
__dirname,
'../../docs/03-业务模块/ASL-AI智能文献/05-测试文档/03-测试数据/screening/Test Cases.xlsx'
);
const workbook = XLSX.read(fs.readFileSync(excelPath), { type: 'buffer' });
const data = XLSX.utils.sheet_to_json(workbook.Sheets[workbook.SheetNames[0]]);
// 选择测试样本
const includedCases = data.filter((row: any) =>
row['Decision']?.toString().toLowerCase().includes('include')
).slice(0, 2);
const excludedCases = data.filter((row: any) =>
row['Decision']?.toString().toLowerCase().includes('exclude')
).slice(0, 3);
const testCases = [...includedCases, ...excludedCases];
console.log('\n🚀 开始宽松模式测试\n');
console.log(`📊 测试配置:`);
console.log(` - 模型组合: DeepSeek-V3 + Qwen-Max`);
console.log(` - 筛选风格: 宽松模式lenient`);
console.log(` - 测试样本: ${testCases.length}\n`);
interface TestResult {
caseIndex: number;
title: string;
humanDecision: string;
aiDecision: string;
model1Conclusion: string;
model2Conclusion: string;
isCorrect: boolean;
hasConflict: boolean;
confidence: number;
reason: string;
}
async function runTest() {
const results: TestResult[] = [];
for (let i = 0; i < testCases.length; i++) {
const testCase = testCases[i];
const title = testCase['title'] || '';
const abstract = testCase['abstract'] || '';
const humanDecision = testCase['Decision'] || '';
console.log(`[${i + 1}/${testCases.length}] 正在筛选...`);
console.log(`标题: ${title.substring(0, 60)}...`);
console.log(`人类决策: ${humanDecision}`);
try {
const screeningResult = await llmScreeningService.dualModelScreening(
`test-case-${i + 1}`,
title,
abstract,
picoCriteria,
inclusionCriteria,
exclusionCriteria,
['deepseek-chat', 'qwen-max'],
'lenient' // ⭐ 使用宽松模式
);
const normalizedHuman = humanDecision.toLowerCase().includes('include') ? 'include' : 'exclude';
const normalizedAI = screeningResult.finalDecision === 'pending' ? 'uncertain' : screeningResult.finalDecision;
const isCorrect = normalizedAI === normalizedHuman;
console.log(`AI决策: ${screeningResult.finalDecision} ${isCorrect ? '✅' : '❌'}`);
console.log(`模型一致: ${!screeningResult.hasConflict ? '✅' : '❌'}`);
console.log(`置信度: ${screeningResult.deepseek.confidence.toFixed(2)}\n`);
results.push({
caseIndex: i + 1,
title: title.substring(0, 100),
humanDecision: normalizedHuman,
aiDecision: normalizedAI,
model1Conclusion: screeningResult.deepseek.conclusion,
model2Conclusion: screeningResult.qwen.conclusion,
isCorrect,
hasConflict: screeningResult.hasConflict,
confidence: screeningResult.deepseek.confidence,
reason: screeningResult.deepseek.reason
});
} catch (error: any) {
console.error(`❌ 筛选失败: ${error.message}\n`);
}
}
// 生成对比报告
console.log('\n' + '='.repeat(80));
console.log('📊 宽松模式测试报告');
console.log('='.repeat(80) + '\n');
const correct = results.filter(r => r.isCorrect).length;
const consistent = results.filter(r => !r.hasConflict).length;
const avgConfidence = results.reduce((sum, r) => sum + r.confidence, 0) / results.length;
console.log(`✅ 准确率: ${(correct / results.length * 100).toFixed(1)}% (${correct}/${results.length})`);
console.log(`✅ 一致率: ${(consistent / results.length * 100).toFixed(1)}% (${consistent}/${results.length})`);
console.log(`✅ 平均置信度: ${avgConfidence.toFixed(2)}\n`);
// 按人类决策分组统计
const includedResults = results.filter(r => r.humanDecision === 'include');
const excludedResults = results.filter(r => r.humanDecision === 'exclude');
const includedCorrect = includedResults.filter(r => r.isCorrect).length;
const excludedCorrect = excludedResults.filter(r => r.isCorrect).length;
console.log('📋 分类准确率:');
console.log(` 应纳入文献 (Included): ${(includedCorrect / includedResults.length * 100).toFixed(1)}% (${includedCorrect}/${includedResults.length})`);
console.log(` 应排除文献 (Excluded): ${(excludedCorrect / excludedResults.length * 100).toFixed(1)}% (${excludedCorrect}/${excludedResults.length})\n`);
// 详细案例分析
console.log('📝 详细案例分析:\n');
results.forEach(r => {
const status = r.isCorrect ? '✅ 正确' : '❌ 错误';
console.log(`[案例 ${r.caseIndex}] ${status}`);
console.log(` 标题: ${r.title}`);
console.log(` 人类决策: ${r.humanDecision}`);
console.log(` AI决策: ${r.aiDecision}`);
console.log(` 模型1: ${r.model1Conclusion}, 模型2: ${r.model2Conclusion}`);
console.log(` 置信度: ${r.confidence.toFixed(2)}`);
if (!r.isCorrect) {
console.log(` AI理由: ${r.reason.substring(0, 150)}...`);
}
console.log('');
});
// 与标准模式对比
console.log('='.repeat(80));
console.log('🔄 与标准模式对比\n');
console.log('| 指标 | 标准模式 | 宽松模式 | 改进 |');
console.log('|------|----------|----------|------|');
console.log(`| 准确率 | 60% | ${(correct / results.length * 100).toFixed(1)}% | ${(correct / results.length * 100 - 60).toFixed(1)}% |`);
console.log(`| 召回率(Included) | 0% | ${(includedCorrect / includedResults.length * 100).toFixed(1)}% | ${(includedCorrect / includedResults.length * 100).toFixed(1)}% |`);
console.log(`| 排除准确率 | 100% | ${(excludedCorrect / excludedResults.length * 100).toFixed(1)}% | ${(excludedCorrect / excludedResults.length * 100 - 100).toFixed(1)}% |`);
console.log('\n' + '='.repeat(80));
// 结论
if (correct / results.length >= 0.8) {
console.log('\n🎉 宽松模式效果显著准确率≥80%');
console.log('💡 建议: 初筛使用宽松模式,全文复筛使用严格模式');
} else if (correct / results.length >= 0.6) {
console.log('\n⚠ 宽松模式有改进,但仍需优化');
console.log('💡 建议: 继续调整Prompt或考虑增加Few-shot示例');
} else {
console.log('\n❌ 宽松模式改进有限');
console.log('💡 建议: 问题不在宽松/严格而在PICOS标准的理解差异');
console.log(' → 需要实现用户自定义边界情况功能');
}
console.log('\n✅ 测试完成!\n');
}
runTest().catch(console.error);