feat(dc): Complete Tool C quick action buttons Phase 1-2 - 7 functions

Summary: - Implement 7 quick action functions (filter, recode, binning, conditional, dropna, compute, pivot) - Refactor to pre-written Python functions architecture (stable and secure) - Add 7 Python operations modules with full type hints - Add 7 frontend Dialog components with user-friendly UI - Fix NaN serialization issues and auto type conversion - Update all related documentation Technical Details: - Python: operations/ module (filter.py, recode.py, binning.py, conditional.py, dropna.py, compute.py, pivot.py) - Backend: QuickActionService.ts with 7 execute methods - Frontend: 7 Dialog components with complete validation - Toolbar: Enable 7 quick action buttons Status: Phase 1-2 completed, basic testing passed, ready for further testing
2025-12-08 17:38:08 +08:00
parent af325348b8
commit f729699510
158 changed files with 13814 additions and 273 deletions
--- a/backend/test-tool-c-advanced-scenarios.mjs
+++ b/backend/test-tool-c-advanced-scenarios.mjs
@@ -0,0 +1,319 @@
+/**
+ * 工具C - 高级场景测试（含多重插补）
+ * 
+ * 测试复杂的数据清洗场景：
+ * - 多条件筛选+分组统计
+ * - 时间序列计算
+ * - 多重插补（Multiple Imputation）
+ * - 复杂分类逻辑
+ * - 数据探索（不生成代码）
+ * 
+ * 运行命令：node test-tool-c-advanced-scenarios.mjs
+ */
+
+import fetch from 'node-fetch';
+import FormData from 'form-data';
+import fs from 'fs';
+import path from 'path';
+import { fileURLToPath } from 'url';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+
+// ==================== 配置 ====================
+
+const API_BASE = 'http://localhost:3000/api/v1/dc/tool-c';
+const TEST_FILE = path.join(__dirname, 'uploads', 'test_data_advanced.xlsx'); // 需要准备测试数据
+
+// ==================== 颜色输出 ====================
+
+const colors = {
+  reset: '\x1b[0m',
+  bright: '\x1b[1m',
+  red: '\x1b[31m',
+  green: '\x1b[32m',
+  yellow: '\x1b[33m',
+  blue: '\x1b[34m',
+  magenta: '\x1b[35m',
+  cyan: '\x1b[36m',
+};
+
+function log(message, color = 'reset') {
+  console.log(`${colors[color]}${message}${colors.reset}`);
+}
+
+// ==================== 高级测试场景 ====================
+
+const advancedScenarios = [
+  {
+    id: 1,
+    name: '多条件筛选+分组统计',
+    message: '筛选出年龄≥18岁、性别为女、BMI≥28的患者，按年龄段（18-30, 30-50, 50+）分组统计人数',
+    description: '测试复杂的多条件筛选和分组统计功能',
+    expectedFeatures: ['条件筛选', 'BMI过滤', '年龄分组', 'value_counts'],
+  },
+  {
+    id: 2,
+    name: '时间序列计算',
+    message: '计算每位患者的首次就诊日期和最近就诊日期，以及总就诊次数',
+    description: '测试日期时间处理和分组聚合',
+    expectedFeatures: ['pd.to_datetime', 'groupby', 'agg', '多列聚合'],
+  },
+  {
+    id: 3,
+    name: '多重插补（Multiple Imputation）- 基础版',
+    message: '对年龄列的缺失值进行多重插补：生成5个插补数据集，每个数据集用不同的随机种子填补缺失值（基于正态分布）',
+    description: '测试统计学中的多重插补方法（基础实现）',
+    expectedFeatures: ['np.random.seed', 'np.random.normal', '循环生成多个数据集'],
+    multipleOutputs: true,
+  },
+  {
+    id: 4,
+    name: '多重插补（MICE算法模拟）',
+    message: '对年龄、BMI、血压三列进行链式方程多重插补（MICE）：先用中位数初始填补，再迭代3轮，每轮用其他列预测填补当前列',
+    description: '测试高级多重插补算法（MICE迭代填补）',
+    expectedFeatures: ['多轮迭代', 'fillna', '分列填补', '链式方程'],
+  },
+  {
+    id: 5,
+    name: '复杂分类逻辑',
+    message: '根据BMI和血压综合判断健康等级：BMI<24且收缩压<140为健康，BMI 24-28或收缩压140-160为预警，其他为危险',
+    description: '测试多条件嵌套逻辑判断',
+    expectedFeatures: ['np.where', '嵌套条件', '多变量判断'],
+  },
+  {
+    id: 6,
+    name: '数据探索（不生成代码）',
+    message: '性别列有多少缺失值？年龄列的平均值是多少？BMI列的中位数是多少？',
+    description: '测试数据探索问答能力（应直接回答，不生成代码）',
+    expectedFeatures: ['直接回答', '不生成代码'],
+  },
+  {
+    id: 7,
+    name: '分层多重插补',
+    message: '对年龄列的缺失值进行分层多重插补：按性别分组，男性用男性年龄均值填补，女性用女性年龄均值填补，生成3个插补数据集',
+    description: '测试分层多重插补（考虑分组）',
+    expectedFeatures: ['groupby', 'transform', '分组填补', '多次生成'],
+    multipleOutputs: true,
+  },
+  {
+    id: 8,
+    name: '缺失模式分析',
+    message: '分析数据集的缺失模式：统计每一列的缺失率，找出缺失率超过20%的列，并标记哪些行缺失值超过3个',
+    description: '测试缺失值分析功能',
+    expectedFeatures: ['isna()', 'sum()', 'mean()', '条件判断'],
+  },
+];
+
+// ==================== 辅助函数 ====================
+
+async function uploadFile() {
+  log('\n📤 步骤1: 上传测试文件...', 'cyan');
+
+  if (!fs.existsSync(TEST_FILE)) {
+    log(`❌ 测试文件不存在: ${TEST_FILE}`, 'red');
+    log('💡 提示：请创建测试数据文件，包含以下列：', 'yellow');
+    log('   - patient_id（患者ID）', 'yellow');
+    log('   - gender（性别：男/女）', 'yellow');
+    log('   - age（年龄，带缺失值）', 'yellow');
+    log('   - BMI（带缺失值）', 'yellow');
+    log('   - systolic_bp（收缩压，带缺失值）', 'yellow');
+    log('   - visit_date（就诊日期）', 'yellow');
+    return null;
+  }
+
+  const formData = new FormData();
+  formData.append('file', fs.createReadStream(TEST_FILE));
+
+  const response = await fetch(`${API_BASE}/sessions/upload`, {
+    method: 'POST',
+    body: formData,
+  });
+
+  const result = await response.json();
+
+  if (result.success) {
+    log(`✅ 上传成功: Session ID = ${result.data.sessionId}`, 'green');
+    log(`   文件: ${result.data.fileName}`, 'bright');
+    log(`   数据: ${result.data.totalRows} 行 × ${result.data.totalCols} 列`, 'bright');
+    return result.data.sessionId;
+  } else {
+    log(`❌ 上传失败: ${result.error}`, 'red');
+    return null;
+  }
+}
+
+async function testScenario(sessionId, scenario) {
+  log(`\n${'='.repeat(80)}`, 'magenta');
+  log(`📋 场景${scenario.id}: ${scenario.name}`, 'bright');
+  log(`📝 描述: ${scenario.description}`, 'cyan');
+  log(`💬 用户输入: "${scenario.message}"`, 'yellow');
+  log(`${'='.repeat(80)}`, 'magenta');
+
+  try {
+    const startTime = Date.now();
+
+    // 调用流式API
+    const response = await fetch(`${API_BASE}/ai/stream-process`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({
+        sessionId,
+        message: scenario.message,
+        maxRetries: 3,
+      }),
+    });
+
+    if (!response.ok) {
+      throw new Error(`HTTP ${response.status}`);
+    }
+
+    log('\n📡 流式响应:', 'cyan');
+    let stepCount = 0;
+    let lastStep = null;
+
+    // 读取SSE流
+    const reader = response.body;
+    let buffer = '';
+
+    for await (const chunk of reader) {
+      buffer += chunk.toString();
+      const lines = buffer.split('\n');
+      buffer = lines.pop() || '';
+
+      for (const line of lines) {
+        if (line.startsWith('data: ')) {
+          const data = line.slice(6);
+
+          if (data === '[DONE]') {
+            const duration = ((Date.now() - startTime) / 1000).toFixed(2);
+            log(`\n✅ 场景${scenario.id}完成 (耗时: ${duration}秒)`, 'green');
+            
+            // 分析结果
+            if (lastStep) {
+              if (lastStep.status === 'success') {
+                log(`   ✓ 执行成功`, 'green');
+                if (lastStep.retryCount && lastStep.retryCount > 0) {
+                  log(`   ⚠️  重试次数: ${lastStep.retryCount + 1}`, 'yellow');
+                }
+              } else if (lastStep.status === 'failed') {
+                log(`   ✗ 执行失败: ${lastStep.error}`, 'red');
+              }
+            }
+            continue;
+          }
+
+          try {
+            const step = JSON.parse(data);
+            stepCount++;
+            lastStep = step;
+
+            // 显示步骤（简化）
+            const icon = step.status === 'success' ? '✅' : 
+                        step.status === 'failed' ? '❌' : 
+                        step.status === 'retrying' ? '🔄' : '⏳';
+            log(`   ${icon} Step ${step.step}: ${step.message}`, 'bright');
+
+            // 显示代码（Step 3）
+            if (step.stepName === 'show_code' && step.data?.code) {
+              log(`\n📝 生成的代码:`, 'cyan');
+              console.log(step.data.code);
+              log(`\n💡 解释: ${step.data.explanation}\n`, 'yellow');
+            }
+
+            // 显示错误
+            if (step.error) {
+              log(`   ⚠️  错误: ${step.error.substring(0, 100)}...`, 'red');
+            }
+          } catch (e) {
+            // 忽略解析错误
+          }
+        }
+      }
+    }
+
+    return { success: true, steps: stepCount };
+  } catch (error) {
+    log(`\n❌ 场景${scenario.id}失败: ${error.message}`, 'red');
+    return { success: false, error: error.message };
+  }
+}
+
+// ==================== 主测试函数 ====================
+
+async function runAdvancedTests() {
+  log('\n' + '='.repeat(80), 'bright');
+  log('🧪 工具C高级场景测试（含多重插补）', 'bright');
+  log('='.repeat(80) + '\n', 'bright');
+
+  // Step 1: 上传文件
+  const sessionId = await uploadFile();
+  if (!sessionId) {
+    log('\n❌ 上传失败，测试终止', 'red');
+    return;
+  }
+
+  // Step 2: 运行所有场景
+  const results = [];
+  for (const scenario of advancedScenarios) {
+    const result = await testScenario(sessionId, scenario);
+    results.push({ scenario: scenario.name, ...result });
+    
+    // 等待1秒，避免请求过快
+    await new Promise(resolve => setTimeout(resolve, 1000));
+  }
+
+  // Step 3: 生成测试报告
+  log('\n' + '='.repeat(80), 'bright');
+  log('📊 测试报告', 'bright');
+  log('='.repeat(80), 'bright');
+
+  const successCount = results.filter(r => r.success).length;
+  const failCount = results.filter(r => !r.success).length;
+
+  log(`\n✅ 成功: ${successCount}/${results.length}`, 'green');
+  log(`❌ 失败: ${failCount}/${results.length}`, failCount > 0 ? 'red' : 'green');
+
+  log('\n📋 详细结果:', 'cyan');
+  results.forEach((r, i) => {
+    const icon = r.success ? '✅' : '❌';
+    log(`   ${icon} 场景${i + 1}: ${r.scenario}`, r.success ? 'green' : 'red');
+  });
+
+  // Step 4: 测试导出功能
+  log('\n📥 测试导出功能...', 'cyan');
+  try {
+    const exportResponse = await fetch(`${API_BASE}/sessions/${sessionId}/export`);
+    if (exportResponse.ok) {
+      const buffer = await exportResponse.arrayBuffer();
+      const exportPath = path.join(__dirname, 'test-output', `export_${Date.now()}.xlsx`);
+      
+      // 确保目录存在
+      const dir = path.dirname(exportPath);
+      if (!fs.existsSync(dir)) {
+        fs.mkdirSync(dir, { recursive: true });
+      }
+      
+      fs.writeFileSync(exportPath, Buffer.from(buffer));
+      log(`✅ 导出成功: ${exportPath} (${(buffer.byteLength / 1024).toFixed(2)}KB)`, 'green');
+    } else {
+      log(`❌ 导出失败: HTTP ${exportResponse.status}`, 'red');
+    }
+  } catch (error) {
+    log(`❌ 导出异常: ${error.message}`, 'red');
+  }
+
+  log('\n' + '='.repeat(80), 'bright');
+  log('🎉 测试完成！', 'bright');
+  log('='.repeat(80) + '\n', 'bright');
+}
+
+// ==================== 执行测试 ====================
+
+runAdvancedTests().catch(error => {
+  log(`\n❌ 测试异常: ${error.message}`, 'red');
+  console.error(error);
+  process.exit(1);
+});
+
+