Features: - Backend statistics API (cloud-native Prisma aggregation) - Results page with hybrid solution (AI consensus + human final decision) - Excel export (frontend generation, zero disk write, cloud-native) - PRISMA-style exclusion reason analysis with bar chart - Batch selection and export (3 export methods) - Fixed logic contradiction (inclusion does not show exclusion reason) - Optimized table width (870px, no horizontal scroll) Components: - Backend: screeningController.ts - add getProjectStatistics API - Frontend: ScreeningResults.tsx - complete results page (hybrid solution) - Frontend: excelExport.ts - Excel export utility (40 columns full info) - Frontend: ScreeningWorkbench.tsx - add navigation button - Utils: get-test-projects.mjs - quick test tool Architecture: - Cloud-native: backend aggregation reduces network transfer - Cloud-native: frontend Excel generation (zero file persistence) - Reuse platform: global prisma instance, logger - Performance: statistics API < 500ms, Excel export < 3s (1000 records) Documentation: - Update module status guide (add Week 4 features) - Update task breakdown (mark Week 4 completed) - Update API design spec (add statistics API) - Update database design (add field usage notes) - Create Week 4 development plan - Create Week 4 completion report - Create technical debt list Test: - End-to-end flow test passed - All features verified - Performance test passed - Cloud-native compliance verified Ref: Week 4 Development Plan Scope: ASL Module MVP - Title Abstract Screening Results Cloud-Native: Backend aggregation + Frontend Excel generation
211 lines
7.9 KiB
TypeScript
211 lines
7.9 KiB
TypeScript
/**
|
||
* 卒中数据测试 - 宽松模式
|
||
*
|
||
* 测试目的:验证宽松Prompt是否能提高初筛准确率
|
||
*
|
||
* 策略:
|
||
* - 宁可多纳入,也不要错过
|
||
* - 只排除明显不符合的
|
||
* - 边界情况倾向于纳入
|
||
*/
|
||
|
||
import * as fs from 'fs';
|
||
import * as path from 'path';
|
||
import * as XLSX from 'xlsx';
|
||
import { fileURLToPath } from 'url';
|
||
import { llmScreeningService } from '../src/modules/asl/services/llmScreeningService.js';
|
||
|
||
const __filename = fileURLToPath(import.meta.url);
|
||
const __dirname = path.dirname(__filename);
|
||
|
||
// 读取PICOS
|
||
const picoCriteria = {
|
||
population: '非心源性缺血性卒中患者、亚洲人群',
|
||
intervention: '抗血小板药物/抗凝药物/溶栓药物(阿司匹林、氯吡格雷、替格瑞洛、达比加群等)',
|
||
comparison: '安慰剂或常规治疗',
|
||
outcome: '卒中进展、复发、残疾程度、死亡率、出血事件等',
|
||
studyDesign: 'SR、RCT、RWE、OBS'
|
||
};
|
||
|
||
const inclusionCriteria = `
|
||
1. 研究对象为非心源性缺血性卒中患者
|
||
2. 研究人群为亚洲人群(优先)
|
||
3. 干预措施为抗血小板/抗凝/溶栓药物
|
||
4. 对照组为安慰剂或常规治疗
|
||
5. 研究时间在2020年之后
|
||
6. 研究设计为SR、RCT、RWE、OBS
|
||
`;
|
||
|
||
const exclusionCriteria = `
|
||
1. 综述、病例报告、会议摘要
|
||
2. 动物实验、体外实验
|
||
3. 研究人群非亚洲人群(除非有特殊价值)
|
||
4. 研究时间在2020年之前
|
||
5. 心源性卒中或出血性卒中
|
||
`;
|
||
|
||
// 读取测试案例
|
||
const excelPath = path.join(
|
||
__dirname,
|
||
'../../docs/03-业务模块/ASL-AI智能文献/05-测试文档/03-测试数据/screening/Test Cases.xlsx'
|
||
);
|
||
|
||
const workbook = XLSX.read(fs.readFileSync(excelPath), { type: 'buffer' });
|
||
const data = XLSX.utils.sheet_to_json(workbook.Sheets[workbook.SheetNames[0]]);
|
||
|
||
// 选择测试样本
|
||
const includedCases = data.filter((row: any) =>
|
||
row['Decision']?.toString().toLowerCase().includes('include')
|
||
).slice(0, 2);
|
||
|
||
const excludedCases = data.filter((row: any) =>
|
||
row['Decision']?.toString().toLowerCase().includes('exclude')
|
||
).slice(0, 3);
|
||
|
||
const testCases = [...includedCases, ...excludedCases];
|
||
|
||
console.log('\n🚀 开始宽松模式测试\n');
|
||
console.log(`📊 测试配置:`);
|
||
console.log(` - 模型组合: DeepSeek-V3 + Qwen-Max`);
|
||
console.log(` - 筛选风格: 宽松模式(lenient)`);
|
||
console.log(` - 测试样本: ${testCases.length}篇\n`);
|
||
|
||
interface TestResult {
|
||
caseIndex: number;
|
||
title: string;
|
||
humanDecision: string;
|
||
aiDecision: string;
|
||
model1Conclusion: string;
|
||
model2Conclusion: string;
|
||
isCorrect: boolean;
|
||
hasConflict: boolean;
|
||
confidence: number;
|
||
reason: string;
|
||
}
|
||
|
||
async function runTest() {
|
||
const results: TestResult[] = [];
|
||
|
||
for (let i = 0; i < testCases.length; i++) {
|
||
const testCase = testCases[i];
|
||
const title = testCase['title'] || '';
|
||
const abstract = testCase['abstract'] || '';
|
||
const humanDecision = testCase['Decision'] || '';
|
||
|
||
console.log(`[${i + 1}/${testCases.length}] 正在筛选...`);
|
||
console.log(`标题: ${title.substring(0, 60)}...`);
|
||
console.log(`人类决策: ${humanDecision}`);
|
||
|
||
try {
|
||
const screeningResult = await llmScreeningService.dualModelScreening(
|
||
`test-case-${i + 1}`,
|
||
title,
|
||
abstract,
|
||
picoCriteria,
|
||
inclusionCriteria,
|
||
exclusionCriteria,
|
||
['deepseek-chat', 'qwen-max'],
|
||
'lenient' // ⭐ 使用宽松模式
|
||
);
|
||
|
||
const normalizedHuman = humanDecision.toLowerCase().includes('include') ? 'include' : 'exclude';
|
||
const normalizedAI = screeningResult.finalDecision === 'pending' ? 'uncertain' : screeningResult.finalDecision;
|
||
const isCorrect = normalizedAI === normalizedHuman;
|
||
|
||
console.log(`AI决策: ${screeningResult.finalDecision} ${isCorrect ? '✅' : '❌'}`);
|
||
console.log(`模型一致: ${!screeningResult.hasConflict ? '✅' : '❌'}`);
|
||
console.log(`置信度: ${screeningResult.deepseek.confidence.toFixed(2)}\n`);
|
||
|
||
results.push({
|
||
caseIndex: i + 1,
|
||
title: title.substring(0, 100),
|
||
humanDecision: normalizedHuman,
|
||
aiDecision: normalizedAI,
|
||
model1Conclusion: screeningResult.deepseek.conclusion,
|
||
model2Conclusion: screeningResult.qwen.conclusion,
|
||
isCorrect,
|
||
hasConflict: screeningResult.hasConflict,
|
||
confidence: screeningResult.deepseek.confidence,
|
||
reason: screeningResult.deepseek.reason
|
||
});
|
||
|
||
} catch (error: any) {
|
||
console.error(`❌ 筛选失败: ${error.message}\n`);
|
||
}
|
||
}
|
||
|
||
// 生成对比报告
|
||
console.log('\n' + '='.repeat(80));
|
||
console.log('📊 宽松模式测试报告');
|
||
console.log('='.repeat(80) + '\n');
|
||
|
||
const correct = results.filter(r => r.isCorrect).length;
|
||
const consistent = results.filter(r => !r.hasConflict).length;
|
||
const avgConfidence = results.reduce((sum, r) => sum + r.confidence, 0) / results.length;
|
||
|
||
console.log(`✅ 准确率: ${(correct / results.length * 100).toFixed(1)}% (${correct}/${results.length})`);
|
||
console.log(`✅ 一致率: ${(consistent / results.length * 100).toFixed(1)}% (${consistent}/${results.length})`);
|
||
console.log(`✅ 平均置信度: ${avgConfidence.toFixed(2)}\n`);
|
||
|
||
// 按人类决策分组统计
|
||
const includedResults = results.filter(r => r.humanDecision === 'include');
|
||
const excludedResults = results.filter(r => r.humanDecision === 'exclude');
|
||
|
||
const includedCorrect = includedResults.filter(r => r.isCorrect).length;
|
||
const excludedCorrect = excludedResults.filter(r => r.isCorrect).length;
|
||
|
||
console.log('📋 分类准确率:');
|
||
console.log(` 应纳入文献 (Included): ${(includedCorrect / includedResults.length * 100).toFixed(1)}% (${includedCorrect}/${includedResults.length})`);
|
||
console.log(` 应排除文献 (Excluded): ${(excludedCorrect / excludedResults.length * 100).toFixed(1)}% (${excludedCorrect}/${excludedResults.length})\n`);
|
||
|
||
// 详细案例分析
|
||
console.log('📝 详细案例分析:\n');
|
||
results.forEach(r => {
|
||
const status = r.isCorrect ? '✅ 正确' : '❌ 错误';
|
||
console.log(`[案例 ${r.caseIndex}] ${status}`);
|
||
console.log(` 标题: ${r.title}`);
|
||
console.log(` 人类决策: ${r.humanDecision}`);
|
||
console.log(` AI决策: ${r.aiDecision}`);
|
||
console.log(` 模型1: ${r.model1Conclusion}, 模型2: ${r.model2Conclusion}`);
|
||
console.log(` 置信度: ${r.confidence.toFixed(2)}`);
|
||
if (!r.isCorrect) {
|
||
console.log(` AI理由: ${r.reason.substring(0, 150)}...`);
|
||
}
|
||
console.log('');
|
||
});
|
||
|
||
// 与标准模式对比
|
||
console.log('='.repeat(80));
|
||
console.log('🔄 与标准模式对比\n');
|
||
console.log('| 指标 | 标准模式 | 宽松模式 | 改进 |');
|
||
console.log('|------|----------|----------|------|');
|
||
console.log(`| 准确率 | 60% | ${(correct / results.length * 100).toFixed(1)}% | ${(correct / results.length * 100 - 60).toFixed(1)}% |`);
|
||
console.log(`| 召回率(Included) | 0% | ${(includedCorrect / includedResults.length * 100).toFixed(1)}% | ${(includedCorrect / includedResults.length * 100).toFixed(1)}% |`);
|
||
console.log(`| 排除准确率 | 100% | ${(excludedCorrect / excludedResults.length * 100).toFixed(1)}% | ${(excludedCorrect / excludedResults.length * 100 - 100).toFixed(1)}% |`);
|
||
console.log('\n' + '='.repeat(80));
|
||
|
||
// 结论
|
||
if (correct / results.length >= 0.8) {
|
||
console.log('\n🎉 宽松模式效果显著!准确率≥80%');
|
||
console.log('💡 建议: 初筛使用宽松模式,全文复筛使用严格模式');
|
||
} else if (correct / results.length >= 0.6) {
|
||
console.log('\n⚠️ 宽松模式有改进,但仍需优化');
|
||
console.log('💡 建议: 继续调整Prompt或考虑增加Few-shot示例');
|
||
} else {
|
||
console.log('\n❌ 宽松模式改进有限');
|
||
console.log('💡 建议: 问题不在宽松/严格,而在PICOS标准的理解差异');
|
||
console.log(' → 需要实现用户自定义边界情况功能');
|
||
}
|
||
|
||
console.log('\n✅ 测试完成!\n');
|
||
}
|
||
|
||
runTest().catch(console.error);
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|