/** * 工具C - 高级场景测试(含多重插补) * * 测试复杂的数据清洗场景: * - 多条件筛选+分组统计 * - 时间序列计算 * - 多重插补(Multiple Imputation) * - 复杂分类逻辑 * - 数据探索(不生成代码) * * 运行命令:node test-tool-c-advanced-scenarios.mjs */ import fetch from 'node-fetch'; import FormData from 'form-data'; import fs from 'fs'; import path from 'path'; import { fileURLToPath } from 'url'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); // ==================== 配置 ==================== const API_BASE = 'http://localhost:3000/api/v1/dc/tool-c'; const TEST_FILE = path.join(__dirname, 'uploads', 'test_data_advanced.xlsx'); // 需要准备测试数据 // ==================== 颜色输出 ==================== const colors = { reset: '\x1b[0m', bright: '\x1b[1m', red: '\x1b[31m', green: '\x1b[32m', yellow: '\x1b[33m', blue: '\x1b[34m', magenta: '\x1b[35m', cyan: '\x1b[36m', }; function log(message, color = 'reset') { console.log(`${colors[color]}${message}${colors.reset}`); } // ==================== 高级测试场景 ==================== const advancedScenarios = [ { id: 1, name: '多条件筛选+分组统计', message: '筛选出年龄≥18岁、性别为女、BMI≥28的患者,按年龄段(18-30, 30-50, 50+)分组统计人数', description: '测试复杂的多条件筛选和分组统计功能', expectedFeatures: ['条件筛选', 'BMI过滤', '年龄分组', 'value_counts'], }, { id: 2, name: '时间序列计算', message: '计算每位患者的首次就诊日期和最近就诊日期,以及总就诊次数', description: '测试日期时间处理和分组聚合', expectedFeatures: ['pd.to_datetime', 'groupby', 'agg', '多列聚合'], }, { id: 3, name: '多重插补(Multiple Imputation)- 基础版', message: '对年龄列的缺失值进行多重插补:生成5个插补数据集,每个数据集用不同的随机种子填补缺失值(基于正态分布)', description: '测试统计学中的多重插补方法(基础实现)', expectedFeatures: ['np.random.seed', 'np.random.normal', '循环生成多个数据集'], multipleOutputs: true, }, { id: 4, name: '多重插补(MICE算法模拟)', message: '对年龄、BMI、血压三列进行链式方程多重插补(MICE):先用中位数初始填补,再迭代3轮,每轮用其他列预测填补当前列', description: '测试高级多重插补算法(MICE迭代填补)', expectedFeatures: ['多轮迭代', 'fillna', '分列填补', '链式方程'], }, { id: 5, name: '复杂分类逻辑', message: '根据BMI和血压综合判断健康等级:BMI<24且收缩压<140为健康,BMI 24-28或收缩压140-160为预警,其他为危险', description: '测试多条件嵌套逻辑判断', expectedFeatures: ['np.where', '嵌套条件', '多变量判断'], }, { id: 6, name: '数据探索(不生成代码)', message: '性别列有多少缺失值?年龄列的平均值是多少?BMI列的中位数是多少?', description: '测试数据探索问答能力(应直接回答,不生成代码)', expectedFeatures: ['直接回答', '不生成代码'], }, { id: 7, name: '分层多重插补', message: '对年龄列的缺失值进行分层多重插补:按性别分组,男性用男性年龄均值填补,女性用女性年龄均值填补,生成3个插补数据集', description: '测试分层多重插补(考虑分组)', expectedFeatures: ['groupby', 'transform', '分组填补', '多次生成'], multipleOutputs: true, }, { id: 8, name: '缺失模式分析', message: '分析数据集的缺失模式:统计每一列的缺失率,找出缺失率超过20%的列,并标记哪些行缺失值超过3个', description: '测试缺失值分析功能', expectedFeatures: ['isna()', 'sum()', 'mean()', '条件判断'], }, ]; // ==================== 辅助函数 ==================== async function uploadFile() { log('\n📤 步骤1: 上传测试文件...', 'cyan'); if (!fs.existsSync(TEST_FILE)) { log(`❌ 测试文件不存在: ${TEST_FILE}`, 'red'); log('💡 提示:请创建测试数据文件,包含以下列:', 'yellow'); log(' - patient_id(患者ID)', 'yellow'); log(' - gender(性别:男/女)', 'yellow'); log(' - age(年龄,带缺失值)', 'yellow'); log(' - BMI(带缺失值)', 'yellow'); log(' - systolic_bp(收缩压,带缺失值)', 'yellow'); log(' - visit_date(就诊日期)', 'yellow'); return null; } const formData = new FormData(); formData.append('file', fs.createReadStream(TEST_FILE)); const response = await fetch(`${API_BASE}/sessions/upload`, { method: 'POST', body: formData, }); const result = await response.json(); if (result.success) { log(`✅ 上传成功: Session ID = ${result.data.sessionId}`, 'green'); log(` 文件: ${result.data.fileName}`, 'bright'); log(` 数据: ${result.data.totalRows} 行 × ${result.data.totalCols} 列`, 'bright'); return result.data.sessionId; } else { log(`❌ 上传失败: ${result.error}`, 'red'); return null; } } async function testScenario(sessionId, scenario) { log(`\n${'='.repeat(80)}`, 'magenta'); log(`📋 场景${scenario.id}: ${scenario.name}`, 'bright'); log(`📝 描述: ${scenario.description}`, 'cyan'); log(`💬 用户输入: "${scenario.message}"`, 'yellow'); log(`${'='.repeat(80)}`, 'magenta'); try { const startTime = Date.now(); // 调用流式API const response = await fetch(`${API_BASE}/ai/stream-process`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ sessionId, message: scenario.message, maxRetries: 3, }), }); if (!response.ok) { throw new Error(`HTTP ${response.status}`); } log('\n📡 流式响应:', 'cyan'); let stepCount = 0; let lastStep = null; // 读取SSE流 const reader = response.body; let buffer = ''; for await (const chunk of reader) { buffer += chunk.toString(); const lines = buffer.split('\n'); buffer = lines.pop() || ''; for (const line of lines) { if (line.startsWith('data: ')) { const data = line.slice(6); if (data === '[DONE]') { const duration = ((Date.now() - startTime) / 1000).toFixed(2); log(`\n✅ 场景${scenario.id}完成 (耗时: ${duration}秒)`, 'green'); // 分析结果 if (lastStep) { if (lastStep.status === 'success') { log(` ✓ 执行成功`, 'green'); if (lastStep.retryCount && lastStep.retryCount > 0) { log(` ⚠️ 重试次数: ${lastStep.retryCount + 1}`, 'yellow'); } } else if (lastStep.status === 'failed') { log(` ✗ 执行失败: ${lastStep.error}`, 'red'); } } continue; } try { const step = JSON.parse(data); stepCount++; lastStep = step; // 显示步骤(简化) const icon = step.status === 'success' ? '✅' : step.status === 'failed' ? '❌' : step.status === 'retrying' ? '🔄' : '⏳'; log(` ${icon} Step ${step.step}: ${step.message}`, 'bright'); // 显示代码(Step 3) if (step.stepName === 'show_code' && step.data?.code) { log(`\n📝 生成的代码:`, 'cyan'); console.log(step.data.code); log(`\n💡 解释: ${step.data.explanation}\n`, 'yellow'); } // 显示错误 if (step.error) { log(` ⚠️ 错误: ${step.error.substring(0, 100)}...`, 'red'); } } catch (e) { // 忽略解析错误 } } } } return { success: true, steps: stepCount }; } catch (error) { log(`\n❌ 场景${scenario.id}失败: ${error.message}`, 'red'); return { success: false, error: error.message }; } } // ==================== 主测试函数 ==================== async function runAdvancedTests() { log('\n' + '='.repeat(80), 'bright'); log('🧪 工具C高级场景测试(含多重插补)', 'bright'); log('='.repeat(80) + '\n', 'bright'); // Step 1: 上传文件 const sessionId = await uploadFile(); if (!sessionId) { log('\n❌ 上传失败,测试终止', 'red'); return; } // Step 2: 运行所有场景 const results = []; for (const scenario of advancedScenarios) { const result = await testScenario(sessionId, scenario); results.push({ scenario: scenario.name, ...result }); // 等待1秒,避免请求过快 await new Promise(resolve => setTimeout(resolve, 1000)); } // Step 3: 生成测试报告 log('\n' + '='.repeat(80), 'bright'); log('📊 测试报告', 'bright'); log('='.repeat(80), 'bright'); const successCount = results.filter(r => r.success).length; const failCount = results.filter(r => !r.success).length; log(`\n✅ 成功: ${successCount}/${results.length}`, 'green'); log(`❌ 失败: ${failCount}/${results.length}`, failCount > 0 ? 'red' : 'green'); log('\n📋 详细结果:', 'cyan'); results.forEach((r, i) => { const icon = r.success ? '✅' : '❌'; log(` ${icon} 场景${i + 1}: ${r.scenario}`, r.success ? 'green' : 'red'); }); // Step 4: 测试导出功能 log('\n📥 测试导出功能...', 'cyan'); try { const exportResponse = await fetch(`${API_BASE}/sessions/${sessionId}/export`); if (exportResponse.ok) { const buffer = await exportResponse.arrayBuffer(); const exportPath = path.join(__dirname, 'test-output', `export_${Date.now()}.xlsx`); // 确保目录存在 const dir = path.dirname(exportPath); if (!fs.existsSync(dir)) { fs.mkdirSync(dir, { recursive: true }); } fs.writeFileSync(exportPath, Buffer.from(buffer)); log(`✅ 导出成功: ${exportPath} (${(buffer.byteLength / 1024).toFixed(2)}KB)`, 'green'); } else { log(`❌ 导出失败: HTTP ${exportResponse.status}`, 'red'); } } catch (error) { log(`❌ 导出异常: ${error.message}`, 'red'); } log('\n' + '='.repeat(80), 'bright'); log('🎉 测试完成!', 'bright'); log('='.repeat(80) + '\n', 'bright'); } // ==================== 执行测试 ==================== runAdvancedTests().catch(error => { log(`\n❌ 测试异常: ${error.message}`, 'red'); console.error(error); process.exit(1); });