refactor(asl): ASL frontend architecture refactoring with left navigation

- feat: Create ASLLayout component with 7-module left navigation
- feat: Implement Title Screening Settings page with optimized PICOS layout
- feat: Add placeholder pages for Workbench and Results
- fix: Fix nested routing structure for React Router v6
- fix: Resolve Spin component warning in MainLayout
- fix: Add QueryClientProvider to App.tsx
- style: Optimize PICOS form layout (P+I left, C+O+S right)
- style: Align Inclusion/Exclusion criteria side-by-side
- docs: Add architecture refactoring and routing fix reports

Ref: Week 2 Frontend Development
Scope: ASL module MVP - Title Abstract Screening
This commit is contained in:
2025-11-18 21:51:51 +08:00
parent e3e7e028e8
commit 3634933ece
213 changed files with 20054 additions and 442 deletions

View File

@@ -0,0 +1,348 @@
/**
* 卒中数据测试 - 国际模型对比
*
* 目的对比国内模型DeepSeek+Qwenvs 国际模型GPT-4o+Claude
*
* 测试假设:
* 1. 如果国际模型准确率更高 → 是模型能力问题
* 2. 如果国际模型准确率相似 → 是Prompt或理解差异问题
*/
import * as fs from 'fs';
import * as path from 'path';
import * as XLSX from 'xlsx';
import { fileURLToPath } from 'url';
import { llmScreeningService } from '../src/modules/asl/services/llmScreeningService.js';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
// ========================================
// 📋 1. 读取PICOS和标准
// ========================================
console.log('📖 正在读取PICOS和纳排标准...\n');
const picosPath = path.join(
__dirname,
'../../docs/03-业务模块/ASL-AI智能文献/05-测试文档/03-测试数据/screening/测试案例的PICOS、纳入标准、排除标准.txt'
);
const picosContent = fs.readFileSync(picosPath, 'utf-8');
// 解析PICOS简化版
const picoCriteria = {
population: '非心源性缺血性卒中患者、亚洲人群',
intervention: '抗血小板药物/抗凝药物/溶栓药物(阿司匹林、氯吡格雷、替格瑞洛、达比加群等)',
comparison: '安慰剂或常规治疗',
outcome: '卒中进展、复发、残疾程度、死亡率、出血事件等',
studyDesign: 'SR、RCT、RWE、OBS'
};
const inclusionCriteria = `
1. 研究对象为非心源性缺血性卒中患者
2. 研究人群为亚洲人群(优先)
3. 干预措施为抗血小板/抗凝/溶栓药物
4. 对照组为安慰剂或常规治疗
5. 研究时间在2020年之后
6. 研究设计为SR、RCT、RWE、OBS
`;
const exclusionCriteria = `
1. 综述、病例报告、会议摘要
2. 动物实验、体外实验
3. 研究人群非亚洲人群(除非有特殊价值)
4. 研究时间在2020年之前
5. 心源性卒中或出血性卒中
`;
console.log('✅ PICOS标准已加载\n');
// ========================================
// 📋 2. 读取测试案例
// ========================================
console.log('📖 正在读取测试案例...\n');
const excelPath = path.join(
__dirname,
'../../docs/03-业务模块/ASL-AI智能文献/05-测试文档/03-测试数据/screening/Test Cases.xlsx'
);
const workbook = XLSX.read(fs.readFileSync(excelPath), { type: 'buffer' });
const sheetName = workbook.SheetNames[0];
const worksheet = workbook.Sheets[sheetName];
const data = XLSX.utils.sheet_to_json(worksheet);
console.log(`✅ 读取到 ${data.length} 条数据\n`);
// 选择测试样本2个Included + 3个Excluded
const includedCases = data.filter((row: any) =>
row['Decision']?.toString().toLowerCase().includes('include')
).slice(0, 2);
const excludedCases = data.filter((row: any) =>
row['Decision']?.toString().toLowerCase().includes('exclude')
).slice(0, 3);
const testCases = [...includedCases, ...excludedCases];
console.log(`✅ 选择测试样本: ${testCases.length}2 Included + 3 Excluded\n`);
// ========================================
// 🧪 3. 定义测试模型组合
// ========================================
const modelPairs = [
{
name: '国内模型组合',
model1: 'deepseek-chat',
model2: 'qwen3-72b',
description: 'DeepSeek-V3 + Qwen3-Max当前使用'
},
{
name: '国际模型组合',
model1: 'gpt-4o',
model2: 'claude-sonnet-4.5',
description: 'GPT-4o + Claude-4.5(国际顶级模型)'
}
];
// ========================================
// 🧪 4. 执行测试
// ========================================
interface TestResult {
caseIndex: number;
title: string;
humanDecision: string;
aiDecision: string;
model1Result: any;
model2Result: any;
isCorrect: boolean;
hasConflict: boolean;
processingTime: number;
}
async function testModelPair(
pairName: string,
model1: string,
model2: string,
cases: any[]
): Promise<TestResult[]> {
console.log(`\n${'='.repeat(60)}`);
console.log(`🧪 测试模型组合: ${pairName}`);
console.log(`${'='.repeat(60)}\n`);
const results: TestResult[] = [];
for (let i = 0; i < cases.length; i++) {
const testCase = cases[i];
const title = testCase['title'] || '';
const abstract = testCase['abstract'] || '';
const humanDecision = testCase['Decision'] || '';
console.log(`\n[${i + 1}/${cases.length}] 正在筛选...`);
console.log(`标题: ${title.substring(0, 60)}...`);
console.log(`人类决策: ${humanDecision}`);
const startTime = Date.now();
try {
const screeningResult = await llmScreeningService.dualModelScreening(
`test-case-${i + 1}`, // literatureId
title,
abstract,
picoCriteria,
inclusionCriteria,
exclusionCriteria,
[model1, model2], // models参数应该是一个数组
'standard' // style参数
);
const processingTime = Date.now() - startTime;
// 标准化决策
const normalizedHuman = humanDecision.toLowerCase().includes('include') ? 'include' : 'exclude';
const normalizedAI = screeningResult.finalDecision === 'pending' ? 'uncertain' : screeningResult.finalDecision;
const isCorrect = normalizedAI === normalizedHuman;
console.log(`AI决策: ${screeningResult.finalDecision} ${isCorrect ? '✅' : '❌'}`);
console.log(`模型一致: ${!screeningResult.hasConflict ? '✅' : '❌'}`);
console.log(`处理时间: ${(processingTime / 1000).toFixed(2)}`);
results.push({
caseIndex: i + 1,
title: title.substring(0, 100),
humanDecision: normalizedHuman,
aiDecision: normalizedAI,
model1Result: screeningResult.model1Result,
model2Result: screeningResult.model2Result,
isCorrect,
hasConflict: screeningResult.hasConflict,
processingTime
});
} catch (error: any) {
console.error(`❌ 筛选失败: ${error.message}`);
results.push({
caseIndex: i + 1,
title: title.substring(0, 100),
humanDecision: humanDecision.toLowerCase().includes('include') ? 'include' : 'exclude',
aiDecision: 'error',
model1Result: null,
model2Result: null,
isCorrect: false,
hasConflict: false,
processingTime: Date.now() - startTime
});
}
}
return results;
}
// ========================================
// 📊 5. 生成对比报告
// ========================================
function generateComparisonReport(
domesticResults: TestResult[],
internationalResults: TestResult[]
) {
console.log(`\n${'='.repeat(80)}`);
console.log(`📊 国内 vs 国际模型对比报告`);
console.log(`${'='.repeat(80)}\n`);
// 计算指标
function calculateMetrics(results: TestResult[]) {
const total = results.length;
const correct = results.filter(r => r.isCorrect).length;
const consistent = results.filter(r => !r.hasConflict).length;
const avgTime = results.reduce((sum, r) => sum + r.processingTime, 0) / total;
return {
accuracy: (correct / total * 100).toFixed(1),
consistency: (consistent / total * 100).toFixed(1),
avgTime: (avgTime / 1000).toFixed(2),
correct,
total
};
}
const domesticMetrics = calculateMetrics(domesticResults);
const internationalMetrics = calculateMetrics(internationalResults);
// 对比表格
console.log('| 指标 | 国内模型 | 国际模型 | 差异 |');
console.log('|------|----------|----------|------|');
console.log(`| 准确率 | ${domesticMetrics.accuracy}% (${domesticMetrics.correct}/${domesticMetrics.total}) | ${internationalMetrics.accuracy}% (${internationalMetrics.correct}/${internationalMetrics.total}) | ${(parseFloat(internationalMetrics.accuracy) - parseFloat(domesticMetrics.accuracy)).toFixed(1)}% |`);
console.log(`| 一致率 | ${domesticMetrics.consistency}% | ${internationalMetrics.consistency}% | ${(parseFloat(internationalMetrics.consistency) - parseFloat(domesticMetrics.consistency)).toFixed(1)}% |`);
console.log(`| 平均耗时 | ${domesticMetrics.avgTime}秒 | ${internationalMetrics.avgTime}秒 | ${(parseFloat(internationalMetrics.avgTime) - parseFloat(domesticMetrics.avgTime)).toFixed(2)}秒 |`);
console.log('\n');
// 逐案例对比
console.log('📋 逐案例对比:\n');
for (let i = 0; i < domesticResults.length; i++) {
const domestic = domesticResults[i];
const international = internationalResults[i];
console.log(`[案例 ${i + 1}] ${domestic.title}`);
console.log(` 人类: ${domestic.humanDecision}`);
console.log(` 国内模型: ${domestic.aiDecision} ${domestic.isCorrect ? '✅' : '❌'}`);
console.log(` 国际模型: ${international.aiDecision} ${international.isCorrect ? '✅' : '❌'}`);
if (domestic.aiDecision !== international.aiDecision) {
console.log(` ⚠️ 两组模型判断不一致!`);
}
console.log('');
}
// 结论分析
console.log('\n' + '='.repeat(80));
console.log('🎯 结论分析\n');
const accuracyDiff = parseFloat(internationalMetrics.accuracy) - parseFloat(domesticMetrics.accuracy);
if (Math.abs(accuracyDiff) <= 10) {
console.log('✅ 结论: 国内外模型准确率相近差异≤10%');
console.log(' → 问题不在模型能力,而在于:');
console.log(' 1. Prompt设计可能过于严格');
console.log(' 2. AI vs 人类对"匹配"的理解差异');
console.log(' 3. 纳排标准本身存在歧义');
console.log('\n💡 建议: 优化Prompt策略增加宽松/标准/严格三种模式');
} else if (accuracyDiff > 10) {
console.log('✅ 结论: 国际模型显著优于国内模型(差异>10%');
console.log(' → 问题在于模型能力差异');
console.log(' → 国际模型对医学文献的理解更准确');
console.log('\n💡 建议: 优先使用GPT-4o或Claude-4.5进行筛选');
} else {
console.log('✅ 结论: 国内模型优于国际模型(差异>10%');
console.log(' → 可能是国内模型对中文医学术语理解更好');
console.log(' → 或者国内模型更符合中国专家的筛选习惯');
console.log('\n💡 建议: 继续使用国内模型组合');
}
console.log('='.repeat(80) + '\n');
// 保存详细报告
const report = {
testDate: new Date().toISOString(),
testCases: testCases.length,
domesticModels: modelPairs[0],
internationalModels: modelPairs[1],
domesticMetrics,
internationalMetrics,
domesticResults,
internationalResults,
conclusion: {
accuracyDiff,
analysis: Math.abs(accuracyDiff) <= 10 ? 'Prompt问题' : (accuracyDiff > 10 ? '国际模型更优' : '国内模型更优')
}
};
const reportPath = path.join(__dirname, '../docs/国内外模型对比测试报告.json');
fs.writeFileSync(reportPath, JSON.stringify(report, null, 2), 'utf-8');
console.log(`📄 详细报告已保存: ${reportPath}\n`);
}
// ========================================
// 🚀 6. 执行主流程
// ========================================
async function main() {
console.log('\n🚀 开始国内外模型对比测试\n');
console.log(`测试样本: ${testCases.length}`);
console.log(`测试组合: 2组`);
console.log(`预计耗时: ${testCases.length * 2 * 15}秒(约${Math.ceil(testCases.length * 2 * 15 / 60)}分钟)\n`);
// 测试国内模型
const domesticResults = await testModelPair(
modelPairs[0].name,
modelPairs[0].model1,
modelPairs[0].model2,
testCases
);
// 等待2秒避免API限流
console.log('\n⏳ 等待2秒后测试国际模型...\n');
await new Promise(resolve => setTimeout(resolve, 2000));
// 测试国际模型
const internationalResults = await testModelPair(
modelPairs[1].name,
modelPairs[1].model1,
modelPairs[1].model2,
testCases
);
// 生成对比报告
generateComparisonReport(domesticResults, internationalResults);
console.log('✅ 测试完成!\n');
}
main().catch(console.error);