feat(ssa): Complete Phase I-IV intelligent dialogue and tool system development

Phase I - Session Blackboard + READ Layer: - SessionBlackboardService with Postgres-Only cache - DataProfileService for data overview generation - PicoInferenceService for LLM-driven PICO extraction - Frontend DataContextCard and VariableDictionaryPanel - E2E tests: 31/31 passed Phase II - Conversation Layer LLM + Intent Router: - ConversationService with SSE streaming - IntentRouterService (rule-first + LLM fallback, 6 intents) - SystemPromptService with 6-segment dynamic assembly - TokenTruncationService for context management - ChatHandlerService as unified chat entry - Frontend SSAChatPane and useSSAChat hook - E2E tests: 38/38 passed Phase III - Method Consultation + AskUser Standardization: - ToolRegistryService with Repository Pattern - MethodConsultService with DecisionTable + LLM enhancement - AskUserService with global interrupt handling - Frontend AskUserCard component - E2E tests: 13/13 passed Phase IV - Dialogue-Driven Analysis + QPER Integration: - ToolOrchestratorService (plan/execute/report) - analysis_plan SSE event for WorkflowPlan transmission - Dual-channel confirmation (ask_user card + workspace button) - PICO as optional hint for LLM parsing - E2E tests: 25/25 passed R Statistics Service: - 5 new R tools: anova_one, baseline_table, fisher, linear_reg, wilcoxon - Enhanced guardrails and block helpers - Comprehensive test suite (run_all_tools_test.js) Documentation: - Updated system status document (v5.9) - Updated SSA module status and development plan (v1.8) Total E2E: 107/107 passed (Phase I: 31, Phase II: 38, Phase III: 13, Phase IV: 25) Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-22 18:53:39 +08:00
parent bf10dec4c8
commit 3446909ff7
68 changed files with 11583 additions and 412 deletions
--- a/r-statistics-service/tests/run_all_tools_test.js
+++ b/r-statistics-service/tests/run_all_tools_test.js
@@ -0,0 +1,299 @@
+/**
+ * SSA R 统计引擎 — 全工具端到端测试
+ *
+ * 覆盖范围：12 个统计工具 + JIT 护栏 + report_blocks 协议验证
+ *
+ * 运行方式：
+ *   node r-statistics-service/tests/run_all_tools_test.js
+ *
+ * 前置条件：R 服务容器已启动（docker-compose up -d）
+ */
+
+const http = require('http');
+const fs = require('fs');
+const path = require('path');
+
+const R_URL = process.env.R_SERVICE_URL || 'http://localhost:8082';
+const TIMEOUT = 60000;
+
+// ==================== HTTP ====================
+
+function post(endpoint, body) {
+  return new Promise((resolve, reject) => {
+    const url = new URL(endpoint, R_URL);
+    const payload = JSON.stringify(body);
+    const req = http.request(
+      { hostname: url.hostname, port: url.port, path: url.pathname, method: 'POST',
+        headers: { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(payload) },
+        timeout: TIMEOUT },
+      (res) => {
+        let data = '';
+        res.on('data', c => (data += c));
+        res.on('end', () => {
+          try { resolve({ status: res.statusCode, body: JSON.parse(data) }); }
+          catch { resolve({ status: res.statusCode, body: data }); }
+        });
+      }
+    );
+    req.on('error', reject);
+    req.on('timeout', () => { req.destroy(); reject(new Error('timeout')); });
+    req.write(payload);
+    req.end();
+  });
+}
+
+function get(endpoint) {
+  return new Promise((resolve, reject) => {
+    const url = new URL(endpoint, R_URL);
+    http.get(url, { timeout: TIMEOUT }, (res) => {
+      let data = '';
+      res.on('data', c => (data += c));
+      res.on('end', () => {
+        try { resolve({ status: res.statusCode, body: JSON.parse(data) }); }
+        catch { resolve({ status: res.statusCode, body: data }); }
+      });
+    }).on('error', reject);
+  });
+}
+
+// ==================== 测试数据 ====================
+
+function loadCSV() {
+  const csvPath = path.join(__dirname, '..', '..', 'docs', '03-业务模块', 'SSA-智能统计分析', '05-测试文档', 'test.csv');
+  const lines = fs.readFileSync(csvPath, 'utf-8').trim().split('\n');
+  const headers = lines[0].split(',');
+  return lines.slice(1).map(line => {
+    const vals = line.split(',');
+    const row = {};
+    headers.forEach((h, i) => {
+      const v = vals[i];
+      if (v === '' || v === undefined) row[h] = null;
+      else if (!isNaN(Number(v))) row[h] = Number(v);
+      else row[h] = v;
+    });
+    return row;
+  });
+}
+
+function loadJSON(name) {
+  return JSON.parse(fs.readFileSync(path.join(__dirname, name), 'utf-8'));
+}
+
+// ==================== 校验 ====================
+
+function validateBlocks(blocks, toolName) {
+  const issues = [];
+  if (!Array.isArray(blocks)) { issues.push('report_blocks 不是数组'); return issues; }
+  if (blocks.length === 0) { issues.push('report_blocks 为空'); return issues; }
+  const validTypes = ['markdown', 'table', 'image', 'key_value'];
+  blocks.forEach((b, i) => {
+    if (!validTypes.includes(b.type)) issues.push(`blocks[${i}].type 非法: ${b.type}`);
+    if (b.type === 'table') {
+      if (!Array.isArray(b.headers)) issues.push(`blocks[${i}] table 缺少 headers`);
+      if (!Array.isArray(b.rows)) issues.push(`blocks[${i}] table 缺少 rows`);
+    }
+    if (b.type === 'markdown' && !b.content) issues.push(`blocks[${i}] markdown 缺少 content`);
+    if (b.type === 'image' && !b.data) issues.push(`blocks[${i}] image 缺少 data`);
+    if (b.type === 'key_value' && !Array.isArray(b.items)) issues.push(`blocks[${i}] key_value 缺少 items`);
+  });
+  return issues;
+}
+
+// ==================== 主测试 ====================
+
+async function main() {
+  console.log('\n╔══════════════════════════════════════════════════════════╗');
+  console.log('║   SSA R 统计引擎 — 全工具端到端测试 (12 tools + JIT)    ║');
+  console.log('║   ' + new Date().toISOString().slice(0, 19) + '                              ║');
+  console.log('╚══════════════════════════════════════════════════════════╝\n');
+
+  // 0. 健康检查
+  let toolsLoaded = 0;
+  try {
+    const h = await get('/health');
+    toolsLoaded = h.body.tools_loaded || 0;
+    console.log(`✅ 健康检查通过  version=${h.body.version}  tools_loaded=${toolsLoaded}  dev_mode=${h.body.dev_mode}\n`);
+  } catch (e) {
+    console.log(`❌ R 服务不可用: ${e.message}\n`);
+    process.exit(1);
+  }
+
+  // 0.1 工具列表
+  try {
+    const tl = await get('/api/v1/tools');
+    console.log(`📋 已注册工具 (${tl.body.count}): ${tl.body.tools.join(', ')}\n`);
+  } catch { /* skip */ }
+
+  const csvData = loadCSV();
+  const ds = { type: 'inline', data: csvData };
+
+  const results = [];
+
+  async function run(name, toolCode, body, checks) {
+    const t0 = Date.now();
+    try {
+      const res = await post(`/api/v1/skills/${toolCode}`, body);
+      const ms = Date.now() - t0;
+      const d = res.body;
+      if (d.status === 'success') {
+        const blockIssues = validateBlocks(d.report_blocks, toolCode);
+        const extra = checks ? checks(d) : {};
+        const hasPlots = Array.isArray(d.plots) && d.plots.length > 0;
+        const hasCode = !!d.reproducible_code;
+        const blocksOk = blockIssues.length === 0;
+        const icon = blocksOk ? '✅' : '⚠️';
+        console.log(`${icon} ${name}  (${ms}ms)  blocks=${(d.report_blocks||[]).length}  plots=${hasPlots?'✓':'✗'}  code=${hasCode?'✓':'✗'}  ${JSON.stringify(extra)}`);
+        if (!blocksOk) blockIssues.forEach(iss => console.log(`     ⚠ ${iss}`));
+        results.push({ name, status: 'pass', ms, blocksOk, extra });
+      } else if (d.status === 'blocked') {
+        console.log(`🔒 ${name}  (${ms}ms)  status=blocked  message=${d.message}`);
+        results.push({ name, status: 'blocked', ms });
+      } else {
+        console.log(`❌ ${name}  (${ms}ms)  error=${d.error_code||''} ${d.message||''}`);
+        results.push({ name, status: 'fail', ms, error: d.message });
+      }
+    } catch (e) {
+      console.log(`❌ ${name}  EXCEPTION: ${e.message}`);
+      results.push({ name, status: 'error', error: e.message });
+    }
+  }
+
+  // ========== Phase 2A 工具（原有 7 个） ==========
+  console.log('─'.repeat(60));
+  console.log('  Phase 2A 工具（原有 7 个）');
+  console.log('─'.repeat(60));
+
+  await run('ST_DESCRIPTIVE (描述性统计)', 'ST_DESCRIPTIVE', {
+    data_source: ds,
+    params: { variables: ['age', 'bmi', 'time'], group_var: 'sex' }
+  }, d => ({ groups: Object.keys(d.results?.summary || {}).length }));
+
+  await run('ST_T_TEST_IND (独立样本T检验)', 'ST_T_TEST_IND', {
+    data_source: ds,
+    params: { group_var: 'sex', value_var: 'age' },
+    guardrails: { check_normality: true }
+  }, d => ({ t: d.results?.statistic, p: d.results?.p_value_fmt }));
+
+  await run('ST_MANN_WHITNEY (Mann-Whitney U)', 'ST_MANN_WHITNEY', {
+    data_source: ds,
+    params: { group_var: 'sex', value_var: 'bmi' }
+  }, d => ({ U: d.results?.statistic_U, p: d.results?.p_value_fmt }));
+
+  await run('ST_CHI_SQUARE (卡方检验)', 'ST_CHI_SQUARE', {
+    data_source: ds,
+    params: { var1: 'sex', var2: 'smoke' }
+  }, d => ({ chi2: d.results?.statistic, p: d.results?.p_value_fmt }));
+
+  await run('ST_CORRELATION (相关分析)', 'ST_CORRELATION', {
+    data_source: ds,
+    params: { var_x: 'age', var_y: 'bmi', method: 'auto' }
+  }, d => ({ r: d.results?.statistic, p: d.results?.p_value_fmt }));
+
+  await run('ST_LOGISTIC_BINARY (Logistic回归)', 'ST_LOGISTIC_BINARY', {
+    data_source: ds,
+    params: { outcome_var: 'Yqol', predictors: ['age', 'bmi', 'sex', 'smoke'] }
+  }, d => ({ aic: d.results?.model_fit?.aic, sig: d.results?.coefficients?.filter(c => c.significant)?.length }));
+
+  await run('ST_T_TEST_PAIRED (配对T检验)', 'ST_T_TEST_PAIRED', {
+    data_source: ds,
+    params: { before_var: 'mouth_open', after_var: 'bucal_relax' },
+    guardrails: { check_normality: true }
+  }, d => ({ p: d.results?.p_value_fmt }));
+
+  // ========== Phase Deploy 新工具（5 个） ==========
+  console.log('\n' + '─'.repeat(60));
+  console.log('  Phase Deploy 新工具（5 个）');
+  console.log('─'.repeat(60));
+
+  const fisherData = loadJSON('test_fisher.json');
+  await run('ST_FISHER (Fisher精确检验)', 'ST_FISHER',
+    fisherData,
+    d => ({ p: d.results?.p_value_fmt, or: d.results?.odds_ratio }));
+
+  const anovaData = loadJSON('test_anova_one.json');
+  await run('ST_ANOVA_ONE (单因素方差分析)', 'ST_ANOVA_ONE',
+    anovaData,
+    d => ({ stat: d.results?.statistic, p: d.results?.p_value_fmt, method: d.results?.method }));
+
+  const wilcoxData = loadJSON('test_wilcoxon.json');
+  await run('ST_WILCOXON (Wilcoxon符号秩)', 'ST_WILCOXON',
+    wilcoxData,
+    d => ({ V: d.results?.statistic, p: d.results?.p_value_fmt, r: d.results?.effect_size?.r }));
+
+  const linearData = loadJSON('test_linear_reg.json');
+  await run('ST_LINEAR_REG (线性回归)', 'ST_LINEAR_REG',
+    linearData,
+    d => ({ r2: d.results?.model_fit?.r_squared, f: d.results?.model_fit?.f_statistic }));
+
+  const baselineData = loadJSON('test_baseline_table.json');
+  await run('ST_BASELINE_TABLE (基线特征表)', 'ST_BASELINE_TABLE',
+    baselineData,
+    d => ({
+      sig_vars: d.results?.significant_vars?.length || 0,
+      methods: d.results?.method_info?.length || 0,
+      is_baseline: d.report_blocks?.[0]?.metadata?.is_baseline_table
+    }));
+
+  // ========== JIT 护栏 ==========
+  console.log('\n' + '─'.repeat(60));
+  console.log('  JIT 护栏检查');
+  console.log('─'.repeat(60));
+
+  const jitTests = [
+    { name: 'JIT for ST_T_TEST_IND', code: 'ST_T_TEST_IND', body: { data_source: ds, tool_code: 'ST_T_TEST_IND', params: { group_var: 'sex', value_var: 'age' } } },
+    { name: 'JIT for ST_ANOVA_ONE', code: 'ST_ANOVA_ONE', body: { data_source: anovaData.data_source, tool_code: 'ST_ANOVA_ONE', params: anovaData.params } },
+    { name: 'JIT for ST_FISHER', code: 'ST_FISHER', body: { data_source: fisherData.data_source, tool_code: 'ST_FISHER', params: fisherData.params } },
+    { name: 'JIT for ST_LINEAR_REG', code: 'ST_LINEAR_REG', body: { data_source: linearData.data_source, tool_code: 'ST_LINEAR_REG', params: linearData.params } },
+  ];
+
+  for (const jt of jitTests) {
+    const t0 = Date.now();
+    try {
+      const res = await post('/api/v1/guardrails/jit', jt.body);
+      const ms = Date.now() - t0;
+      const d = res.body;
+      if (d.status === 'success') {
+        console.log(`✅ ${jt.name}  (${ms}ms)  checks=${d.checks?.length}  all_passed=${d.all_checks_passed}  suggested=${d.suggested_tool || 'none'}`);
+        results.push({ name: jt.name, status: 'pass', ms });
+      } else {
+        console.log(`❌ ${jt.name}  (${ms}ms)  ${d.message || ''}`);
+        results.push({ name: jt.name, status: 'fail', ms });
+      }
+    } catch (e) {
+      console.log(`❌ ${jt.name}  EXCEPTION: ${e.message}`);
+      results.push({ name: jt.name, status: 'error' });
+    }
+  }
+
+  // ========== 汇总 ==========
+  console.log('\n' + '═'.repeat(60));
+  console.log('  测试汇总');
+  console.log('═'.repeat(60));
+
+  const pass = results.filter(r => r.status === 'pass').length;
+  const blocked = results.filter(r => r.status === 'blocked').length;
+  const fail = results.filter(r => r.status === 'fail' || r.status === 'error').length;
+  const total = results.length;
+
+  console.log(`  通过: ${pass}/${total}    阻塞: ${blocked}    失败: ${fail}`);
+  if (fail > 0) {
+    console.log('\n  失败项:');
+    results.filter(r => r.status === 'fail' || r.status === 'error').forEach(r => {
+      console.log(`    ❌ ${r.name}: ${r.error || 'unknown'}`);
+    });
+  }
+
+  const avgMs = Math.round(results.filter(r => r.ms).reduce((s, r) => s + r.ms, 0) / results.filter(r => r.ms).length);
+  console.log(`\n  平均响应时间: ${avgMs}ms`);
+  console.log('═'.repeat(60));
+
+  if (fail === 0) {
+    console.log('🎉 全部测试通过！R 统计引擎 12 工具 + JIT 护栏就绪。\n');
+  } else {
+    console.log('⚠️  存在失败项，请检查 R 服务日志。\n');
+  }
+
+  process.exit(fail > 0 ? 1 : 0);
+}
+
+main().catch(e => { console.error('测试脚本异常:', e); process.exit(1); });