feat(ssa): Complete Phase I-IV intelligent dialogue and tool system development

Phase I - Session Blackboard + READ Layer: - SessionBlackboardService with Postgres-Only cache - DataProfileService for data overview generation - PicoInferenceService for LLM-driven PICO extraction - Frontend DataContextCard and VariableDictionaryPanel - E2E tests: 31/31 passed Phase II - Conversation Layer LLM + Intent Router: - ConversationService with SSE streaming - IntentRouterService (rule-first + LLM fallback, 6 intents) - SystemPromptService with 6-segment dynamic assembly - TokenTruncationService for context management - ChatHandlerService as unified chat entry - Frontend SSAChatPane and useSSAChat hook - E2E tests: 38/38 passed Phase III - Method Consultation + AskUser Standardization: - ToolRegistryService with Repository Pattern - MethodConsultService with DecisionTable + LLM enhancement - AskUserService with global interrupt handling - Frontend AskUserCard component - E2E tests: 13/13 passed Phase IV - Dialogue-Driven Analysis + QPER Integration: - ToolOrchestratorService (plan/execute/report) - analysis_plan SSE event for WorkflowPlan transmission - Dual-channel confirmation (ask_user card + workspace button) - PICO as optional hint for LLM parsing - E2E tests: 25/25 passed R Statistics Service: - 5 new R tools: anova_one, baseline_table, fisher, linear_reg, wilcoxon - Enhanced guardrails and block helpers - Comprehensive test suite (run_all_tools_test.js) Documentation: - Updated system status document (v5.9) - Updated SSA module status and development plan (v1.8) Total E2E: 107/107 passed (Phase I: 31, Phase II: 38, Phase III: 13, Phase IV: 25) Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-22 18:53:39 +08:00
parent bf10dec4c8
commit 3446909ff7
68 changed files with 11583 additions and 412 deletions
--- a/r-statistics-service/tests/run_all_tools_test.js
+++ b/r-statistics-service/tests/run_all_tools_test.js
@@ -0,0 +1,299 @@
+/**
+ * SSA R 统计引擎 — 全工具端到端测试
+ *
+ * 覆盖范围：12 个统计工具 + JIT 护栏 + report_blocks 协议验证
+ *
+ * 运行方式：
+ *   node r-statistics-service/tests/run_all_tools_test.js
+ *
+ * 前置条件：R 服务容器已启动（docker-compose up -d）
+ */
+
+const http = require('http');
+const fs = require('fs');
+const path = require('path');
+
+const R_URL = process.env.R_SERVICE_URL || 'http://localhost:8082';
+const TIMEOUT = 60000;
+
+// ==================== HTTP ====================
+
+function post(endpoint, body) {
+  return new Promise((resolve, reject) => {
+    const url = new URL(endpoint, R_URL);
+    const payload = JSON.stringify(body);
+    const req = http.request(
+      { hostname: url.hostname, port: url.port, path: url.pathname, method: 'POST',
+        headers: { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(payload) },
+        timeout: TIMEOUT },
+      (res) => {
+        let data = '';
+        res.on('data', c => (data += c));
+        res.on('end', () => {
+          try { resolve({ status: res.statusCode, body: JSON.parse(data) }); }
+          catch { resolve({ status: res.statusCode, body: data }); }
+        });
+      }
+    );
+    req.on('error', reject);
+    req.on('timeout', () => { req.destroy(); reject(new Error('timeout')); });
+    req.write(payload);
+    req.end();
+  });
+}
+
+function get(endpoint) {
+  return new Promise((resolve, reject) => {
+    const url = new URL(endpoint, R_URL);
+    http.get(url, { timeout: TIMEOUT }, (res) => {
+      let data = '';
+      res.on('data', c => (data += c));
+      res.on('end', () => {
+        try { resolve({ status: res.statusCode, body: JSON.parse(data) }); }
+        catch { resolve({ status: res.statusCode, body: data }); }
+      });
+    }).on('error', reject);
+  });
+}
+
+// ==================== 测试数据 ====================
+
+function loadCSV() {
+  const csvPath = path.join(__dirname, '..', '..', 'docs', '03-业务模块', 'SSA-智能统计分析', '05-测试文档', 'test.csv');
+  const lines = fs.readFileSync(csvPath, 'utf-8').trim().split('\n');
+  const headers = lines[0].split(',');
+  return lines.slice(1).map(line => {
+    const vals = line.split(',');
+    const row = {};
+    headers.forEach((h, i) => {
+      const v = vals[i];
+      if (v === '' || v === undefined) row[h] = null;
+      else if (!isNaN(Number(v))) row[h] = Number(v);
+      else row[h] = v;
+    });
+    return row;
+  });
+}
+
+function loadJSON(name) {
+  return JSON.parse(fs.readFileSync(path.join(__dirname, name), 'utf-8'));
+}
+
+// ==================== 校验 ====================
+
+function validateBlocks(blocks, toolName) {
+  const issues = [];
+  if (!Array.isArray(blocks)) { issues.push('report_blocks 不是数组'); return issues; }
+  if (blocks.length === 0) { issues.push('report_blocks 为空'); return issues; }
+  const validTypes = ['markdown', 'table', 'image', 'key_value'];
+  blocks.forEach((b, i) => {
+    if (!validTypes.includes(b.type)) issues.push(`blocks[${i}].type 非法: ${b.type}`);
+    if (b.type === 'table') {
+      if (!Array.isArray(b.headers)) issues.push(`blocks[${i}] table 缺少 headers`);
+      if (!Array.isArray(b.rows)) issues.push(`blocks[${i}] table 缺少 rows`);
+    }
+    if (b.type === 'markdown' && !b.content) issues.push(`blocks[${i}] markdown 缺少 content`);
+    if (b.type === 'image' && !b.data) issues.push(`blocks[${i}] image 缺少 data`);
+    if (b.type === 'key_value' && !Array.isArray(b.items)) issues.push(`blocks[${i}] key_value 缺少 items`);
+  });
+  return issues;
+}
+
+// ==================== 主测试 ====================
+
+async function main() {
+  console.log('\n╔══════════════════════════════════════════════════════════╗');
+  console.log('║   SSA R 统计引擎 — 全工具端到端测试 (12 tools + JIT)    ║');
+  console.log('║   ' + new Date().toISOString().slice(0, 19) + '                              ║');
+  console.log('╚══════════════════════════════════════════════════════════╝\n');
+
+  // 0. 健康检查
+  let toolsLoaded = 0;
+  try {
+    const h = await get('/health');
+    toolsLoaded = h.body.tools_loaded || 0;
+    console.log(`✅ 健康检查通过  version=${h.body.version}  tools_loaded=${toolsLoaded}  dev_mode=${h.body.dev_mode}\n`);
+  } catch (e) {
+    console.log(`❌ R 服务不可用: ${e.message}\n`);
+    process.exit(1);
+  }
+
+  // 0.1 工具列表
+  try {
+    const tl = await get('/api/v1/tools');
+    console.log(`📋 已注册工具 (${tl.body.count}): ${tl.body.tools.join(', ')}\n`);
+  } catch { /* skip */ }
+
+  const csvData = loadCSV();
+  const ds = { type: 'inline', data: csvData };
+
+  const results = [];
+
+  async function run(name, toolCode, body, checks) {
+    const t0 = Date.now();
+    try {
+      const res = await post(`/api/v1/skills/${toolCode}`, body);
+      const ms = Date.now() - t0;
+      const d = res.body;
+      if (d.status === 'success') {
+        const blockIssues = validateBlocks(d.report_blocks, toolCode);
+        const extra = checks ? checks(d) : {};
+        const hasPlots = Array.isArray(d.plots) && d.plots.length > 0;
+        const hasCode = !!d.reproducible_code;
+        const blocksOk = blockIssues.length === 0;
+        const icon = blocksOk ? '✅' : '⚠️';
+        console.log(`${icon} ${name}  (${ms}ms)  blocks=${(d.report_blocks||[]).length}  plots=${hasPlots?'✓':'✗'}  code=${hasCode?'✓':'✗'}  ${JSON.stringify(extra)}`);
+        if (!blocksOk) blockIssues.forEach(iss => console.log(`     ⚠ ${iss}`));
+        results.push({ name, status: 'pass', ms, blocksOk, extra });
+      } else if (d.status === 'blocked') {
+        console.log(`🔒 ${name}  (${ms}ms)  status=blocked  message=${d.message}`);
+        results.push({ name, status: 'blocked', ms });
+      } else {
+        console.log(`❌ ${name}  (${ms}ms)  error=${d.error_code||''} ${d.message||''}`);
+        results.push({ name, status: 'fail', ms, error: d.message });
+      }
+    } catch (e) {
+      console.log(`❌ ${name}  EXCEPTION: ${e.message}`);
+      results.push({ name, status: 'error', error: e.message });
+    }
+  }
+
+  // ========== Phase 2A 工具（原有 7 个） ==========
+  console.log('─'.repeat(60));
+  console.log('  Phase 2A 工具（原有 7 个）');
+  console.log('─'.repeat(60));
+
+  await run('ST_DESCRIPTIVE (描述性统计)', 'ST_DESCRIPTIVE', {
+    data_source: ds,
+    params: { variables: ['age', 'bmi', 'time'], group_var: 'sex' }
+  }, d => ({ groups: Object.keys(d.results?.summary || {}).length }));
+
+  await run('ST_T_TEST_IND (独立样本T检验)', 'ST_T_TEST_IND', {
+    data_source: ds,
+    params: { group_var: 'sex', value_var: 'age' },
+    guardrails: { check_normality: true }
+  }, d => ({ t: d.results?.statistic, p: d.results?.p_value_fmt }));
+
+  await run('ST_MANN_WHITNEY (Mann-Whitney U)', 'ST_MANN_WHITNEY', {
+    data_source: ds,
+    params: { group_var: 'sex', value_var: 'bmi' }
+  }, d => ({ U: d.results?.statistic_U, p: d.results?.p_value_fmt }));
+
+  await run('ST_CHI_SQUARE (卡方检验)', 'ST_CHI_SQUARE', {
+    data_source: ds,
+    params: { var1: 'sex', var2: 'smoke' }
+  }, d => ({ chi2: d.results?.statistic, p: d.results?.p_value_fmt }));
+
+  await run('ST_CORRELATION (相关分析)', 'ST_CORRELATION', {
+    data_source: ds,
+    params: { var_x: 'age', var_y: 'bmi', method: 'auto' }
+  }, d => ({ r: d.results?.statistic, p: d.results?.p_value_fmt }));
+
+  await run('ST_LOGISTIC_BINARY (Logistic回归)', 'ST_LOGISTIC_BINARY', {
+    data_source: ds,
+    params: { outcome_var: 'Yqol', predictors: ['age', 'bmi', 'sex', 'smoke'] }
+  }, d => ({ aic: d.results?.model_fit?.aic, sig: d.results?.coefficients?.filter(c => c.significant)?.length }));
+
+  await run('ST_T_TEST_PAIRED (配对T检验)', 'ST_T_TEST_PAIRED', {
+    data_source: ds,
+    params: { before_var: 'mouth_open', after_var: 'bucal_relax' },
+    guardrails: { check_normality: true }
+  }, d => ({ p: d.results?.p_value_fmt }));
+
+  // ========== Phase Deploy 新工具（5 个） ==========
+  console.log('\n' + '─'.repeat(60));
+  console.log('  Phase Deploy 新工具（5 个）');
+  console.log('─'.repeat(60));
+
+  const fisherData = loadJSON('test_fisher.json');
+  await run('ST_FISHER (Fisher精确检验)', 'ST_FISHER',
+    fisherData,
+    d => ({ p: d.results?.p_value_fmt, or: d.results?.odds_ratio }));
+
+  const anovaData = loadJSON('test_anova_one.json');
+  await run('ST_ANOVA_ONE (单因素方差分析)', 'ST_ANOVA_ONE',
+    anovaData,
+    d => ({ stat: d.results?.statistic, p: d.results?.p_value_fmt, method: d.results?.method }));
+
+  const wilcoxData = loadJSON('test_wilcoxon.json');
+  await run('ST_WILCOXON (Wilcoxon符号秩)', 'ST_WILCOXON',
+    wilcoxData,
+    d => ({ V: d.results?.statistic, p: d.results?.p_value_fmt, r: d.results?.effect_size?.r }));
+
+  const linearData = loadJSON('test_linear_reg.json');
+  await run('ST_LINEAR_REG (线性回归)', 'ST_LINEAR_REG',
+    linearData,
+    d => ({ r2: d.results?.model_fit?.r_squared, f: d.results?.model_fit?.f_statistic }));
+
+  const baselineData = loadJSON('test_baseline_table.json');
+  await run('ST_BASELINE_TABLE (基线特征表)', 'ST_BASELINE_TABLE',
+    baselineData,
+    d => ({
+      sig_vars: d.results?.significant_vars?.length || 0,
+      methods: d.results?.method_info?.length || 0,
+      is_baseline: d.report_blocks?.[0]?.metadata?.is_baseline_table
+    }));
+
+  // ========== JIT 护栏 ==========
+  console.log('\n' + '─'.repeat(60));
+  console.log('  JIT 护栏检查');
+  console.log('─'.repeat(60));
+
+  const jitTests = [
+    { name: 'JIT for ST_T_TEST_IND', code: 'ST_T_TEST_IND', body: { data_source: ds, tool_code: 'ST_T_TEST_IND', params: { group_var: 'sex', value_var: 'age' } } },
+    { name: 'JIT for ST_ANOVA_ONE', code: 'ST_ANOVA_ONE', body: { data_source: anovaData.data_source, tool_code: 'ST_ANOVA_ONE', params: anovaData.params } },
+    { name: 'JIT for ST_FISHER', code: 'ST_FISHER', body: { data_source: fisherData.data_source, tool_code: 'ST_FISHER', params: fisherData.params } },
+    { name: 'JIT for ST_LINEAR_REG', code: 'ST_LINEAR_REG', body: { data_source: linearData.data_source, tool_code: 'ST_LINEAR_REG', params: linearData.params } },
+  ];
+
+  for (const jt of jitTests) {
+    const t0 = Date.now();
+    try {
+      const res = await post('/api/v1/guardrails/jit', jt.body);
+      const ms = Date.now() - t0;
+      const d = res.body;
+      if (d.status === 'success') {
+        console.log(`✅ ${jt.name}  (${ms}ms)  checks=${d.checks?.length}  all_passed=${d.all_checks_passed}  suggested=${d.suggested_tool || 'none'}`);
+        results.push({ name: jt.name, status: 'pass', ms });
+      } else {
+        console.log(`❌ ${jt.name}  (${ms}ms)  ${d.message || ''}`);
+        results.push({ name: jt.name, status: 'fail', ms });
+      }
+    } catch (e) {
+      console.log(`❌ ${jt.name}  EXCEPTION: ${e.message}`);
+      results.push({ name: jt.name, status: 'error' });
+    }
+  }
+
+  // ========== 汇总 ==========
+  console.log('\n' + '═'.repeat(60));
+  console.log('  测试汇总');
+  console.log('═'.repeat(60));
+
+  const pass = results.filter(r => r.status === 'pass').length;
+  const blocked = results.filter(r => r.status === 'blocked').length;
+  const fail = results.filter(r => r.status === 'fail' || r.status === 'error').length;
+  const total = results.length;
+
+  console.log(`  通过: ${pass}/${total}    阻塞: ${blocked}    失败: ${fail}`);
+  if (fail > 0) {
+    console.log('\n  失败项:');
+    results.filter(r => r.status === 'fail' || r.status === 'error').forEach(r => {
+      console.log(`    ❌ ${r.name}: ${r.error || 'unknown'}`);
+    });
+  }
+
+  const avgMs = Math.round(results.filter(r => r.ms).reduce((s, r) => s + r.ms, 0) / results.filter(r => r.ms).length);
+  console.log(`\n  平均响应时间: ${avgMs}ms`);
+  console.log('═'.repeat(60));
+
+  if (fail === 0) {
+    console.log('🎉 全部测试通过！R 统计引擎 12 工具 + JIT 护栏就绪。\n');
+  } else {
+    console.log('⚠️  存在失败项，请检查 R 服务日志。\n');
+  }
+
+  process.exit(fail > 0 ? 1 : 0);
+}
+
+main().catch(e => { console.error('测试脚本异常:', e); process.exit(1); });
--- a/r-statistics-service/tests/test_anova_one.json
+++ b/r-statistics-service/tests/test_anova_one.json
@@ -0,0 +1,20 @@
+{
+  "data_source": {
+    "type": "inline",
+    "data": {
+      "group": ["A", "A", "A", "A", "A", "A", "A", "A", "A", "A",
+                 "B", "B", "B", "B", "B", "B", "B", "B", "B", "B",
+                 "C", "C", "C", "C", "C", "C", "C", "C", "C", "C"],
+      "score": [23, 25, 27, 24, 22, 26, 28, 21, 29, 24,
+                30, 32, 35, 31, 28, 33, 36, 29, 34, 31,
+                18, 20, 22, 19, 17, 21, 23, 16, 24, 19]
+    }
+  },
+  "params": {
+    "group_var": "group",
+    "value_var": "score"
+  },
+  "guardrails": {
+    "check_normality": true
+  }
+}
--- a/r-statistics-service/tests/test_baseline_table.json
+++ b/r-statistics-service/tests/test_baseline_table.json
@@ -0,0 +1,36 @@
+{
+  "data_source": {
+    "type": "inline",
+    "data": {
+      "group": ["Drug", "Drug", "Drug", "Drug", "Drug", "Drug", "Drug", "Drug", "Drug", "Drug",
+                 "Drug", "Drug", "Drug", "Drug", "Drug",
+                 "Placebo", "Placebo", "Placebo", "Placebo", "Placebo",
+                 "Placebo", "Placebo", "Placebo", "Placebo", "Placebo",
+                 "Placebo", "Placebo", "Placebo", "Placebo", "Placebo"],
+      "age": [45, 52, 38, 61, 44, 55, 49, 57, 42, 50,
+              48, 53, 41, 59, 46,
+              47, 51, 39, 58, 43, 54, 50, 56, 41, 49,
+              46, 52, 40, 60, 44],
+      "sex": ["M", "F", "M", "F", "M", "F", "M", "F", "M", "F",
+              "M", "M", "F", "F", "M",
+              "F", "M", "F", "M", "F", "M", "F", "M", "F", "M",
+              "F", "F", "M", "M", "F"],
+      "sbp": [130, 142, 125, 155, 128, 148, 135, 152, 127, 140,
+              132, 145, 126, 150, 133,
+              128, 138, 122, 150, 126, 142, 135, 148, 124, 136,
+              130, 140, 120, 153, 127],
+      "bmi": [24.5, 28.1, 22.3, 30.5, 23.8, 29.2, 25.6, 31.0, 22.0, 27.5,
+              24.8, 29.5, 21.8, 30.2, 25.1,
+              23.8, 27.2, 21.5, 29.8, 22.9, 28.5, 26.0, 30.1, 21.2, 26.8,
+              24.0, 28.8, 20.8, 31.2, 23.5],
+      "smoking": ["Yes", "No", "Yes", "No", "Yes", "No", "Yes", "No", "Yes", "No",
+                   "Yes", "Yes", "No", "No", "Yes",
+                   "No", "Yes", "No", "Yes", "No", "Yes", "No", "Yes", "No", "Yes",
+                   "No", "No", "Yes", "Yes", "No"]
+    }
+  },
+  "params": {
+    "group_var": "group",
+    "analyze_vars": ["age", "sex", "sbp", "bmi", "smoking"]
+  }
+}
--- a/r-statistics-service/tests/test_fisher.json
+++ b/r-statistics-service/tests/test_fisher.json
@@ -0,0 +1,15 @@
+{
+  "data_source": {
+    "type": "inline",
+    "data": {
+      "treatment": ["Drug", "Drug", "Drug", "Drug", "Drug", "Drug", "Drug", "Drug", "Drug", "Drug",
+                     "Placebo", "Placebo", "Placebo", "Placebo", "Placebo", "Placebo", "Placebo", "Placebo", "Placebo", "Placebo"],
+      "outcome": ["Improved", "Improved", "Improved", "Improved", "Improved", "Improved", "Improved", "Not improved", "Not improved", "Not improved",
+                   "Improved", "Improved", "Improved", "Not improved", "Not improved", "Not improved", "Not improved", "Not improved", "Not improved", "Not improved"]
+    }
+  },
+  "params": {
+    "var1": "treatment",
+    "var2": "outcome"
+  }
+}
--- a/r-statistics-service/tests/test_linear_reg.json
+++ b/r-statistics-service/tests/test_linear_reg.json
@@ -0,0 +1,24 @@
+{
+  "data_source": {
+    "type": "inline",
+    "data": {
+      "sbp": [120, 130, 125, 140, 135, 128, 145, 138, 122, 127,
+              133, 141, 136, 129, 132, 126, 148, 139, 124, 131,
+              137, 143, 134, 128, 150, 142, 123, 130, 136, 144],
+      "age": [25, 35, 30, 45, 40, 32, 50, 42, 28, 33,
+              38, 48, 43, 34, 36, 29, 55, 44, 27, 37,
+              41, 47, 39, 31, 58, 46, 26, 36, 40, 49],
+      "bmi": [22.1, 25.3, 23.5, 28.7, 26.4, 24.0, 30.2, 27.8, 21.5, 23.8,
+              25.6, 29.1, 27.2, 24.5, 25.1, 22.8, 31.5, 28.3, 21.9, 24.9,
+              26.8, 29.5, 26.0, 23.2, 32.1, 28.9, 21.3, 25.0, 26.5, 30.0],
+      "smoke": [0, 1, 0, 1, 1, 0, 1, 1, 0, 0,
+                1, 1, 1, 0, 0, 0, 1, 1, 0, 0,
+                1, 1, 0, 0, 1, 1, 0, 0, 1, 1]
+    }
+  },
+  "params": {
+    "outcome_var": "sbp",
+    "predictors": ["age", "bmi"],
+    "confounders": ["smoke"]
+  }
+}
--- a/r-statistics-service/tests/test_wilcoxon.json
+++ b/r-statistics-service/tests/test_wilcoxon.json
@@ -0,0 +1,15 @@
+{
+  "data_source": {
+    "type": "inline",
+    "data": {
+      "before": [120, 130, 125, 140, 135, 128, 132, 145, 138, 122,
+                  127, 133, 141, 136, 129],
+      "after": [115, 122, 118, 130, 125, 120, 126, 135, 128, 118,
+                121, 125, 132, 128, 122]
+    }
+  },
+  "params": {
+    "before_var": "before",
+    "after_var": "after"
+  }
+}