feat(ssa): Complete QPER architecture - Query, Planner, Execute, Reflection layers

Implement the full QPER intelligent analysis pipeline: - Phase E+: Block-based standardization for all 7 R tools, DynamicReport renderer, Word export enhancement - Phase Q: LLM intent parsing with dynamic Zod validation against real column names, ClarificationCard component, DataProfile is_id_like tagging - Phase P: ConfigLoader with Zod schema validation and hot-reload API, DecisionTableService (4-dimension matching), FlowTemplateService with EPV protection, PlannedTrace audit output - Phase R: ReflectionService with statistical slot injection, sensitivity analysis conflict rules, ConclusionReport with section reveal animation, conclusion caching API, graceful R error classification End-to-end test: 40/40 passed across two complete analysis scenarios. Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-21 18:15:53 +08:00
parent 428a22adf2
commit 371e1c069c
73 changed files with 9242 additions and 706 deletions
--- a/backend/scripts/test-ssa-planner-pipeline.ts
+++ b/backend/scripts/test-ssa-planner-pipeline.ts
@@ -0,0 +1,395 @@
+/**
+ * SSA Phase P — Tracer Bullet 测试脚本
+ *
+ * 验证范围：
+ * 1. ConfigLoader 加载 + Zod 校验 3 个 JSON 配置文件
+ * 2. DecisionTableService 四维匹配（6 种场景）
+ * 3. FlowTemplateService 模板填充 + EPV 截断
+ * 4. Q→P 集成：mock ParsedQuery → WorkflowPlan
+ *
+ * 运行方式：npx tsx scripts/test-ssa-planner-pipeline.ts
+ * 不依赖：数据库、LLM、R 引擎
+ */
+
+import { toolsRegistryLoader, decisionTablesLoader, flowTemplatesLoader, reloadAllConfigs } from '../src/modules/ssa/config/index.js';
+import { decisionTableService } from '../src/modules/ssa/services/DecisionTableService.js';
+import { flowTemplateService } from '../src/modules/ssa/services/FlowTemplateService.js';
+import type { ParsedQuery } from '../src/modules/ssa/types/query.types.js';
+import type { DataProfile } from '../src/modules/ssa/services/DataProfileService.js';
+
+// ────────────────────────────────────────────
+// 工具函数
+// ────────────────────────────────────────────
+
+let passed = 0;
+let failed = 0;
+
+function assert(condition: boolean, testName: string, detail?: string) {
+  if (condition) {
+    console.log(`  ✅ ${testName}`);
+    passed++;
+  } else {
+    console.log(`  ❌ ${testName}${detail ? ` — ${detail}` : ''}`);
+    failed++;
+  }
+}
+
+function section(title: string) {
+  console.log(`\n${'─'.repeat(60)}`);
+  console.log(`📋 ${title}`);
+  console.log('─'.repeat(60));
+}
+
+// ────────────────────────────────────────────
+// Mock 数据
+// ────────────────────────────────────────────
+
+function makeParsedQuery(overrides: Partial<ParsedQuery>): ParsedQuery {
+  return {
+    goal: 'comparison',
+    outcome_var: 'BP',
+    outcome_type: 'continuous',
+    predictor_vars: ['Drug'],
+    predictor_types: ['binary'],
+    grouping_var: 'Drug',
+    design: 'independent',
+    confidence: 0.9,
+    reasoning: 'test',
+    needsClarification: false,
+    ...overrides,
+  };
+}
+
+function makeMockProfile(outcomeVar: string, minEventCount: number): DataProfile {
+  return {
+    totalRows: 200,
+    totalColumns: 10,
+    columns: [
+      {
+        name: outcomeVar,
+        type: 'categorical',
+        missing: 0,
+        missingPercent: 0,
+        unique: 2,
+        topValues: [
+          { value: '0', count: 200 - minEventCount },
+          { value: '1', count: minEventCount },
+        ],
+      },
+      { name: 'Age', type: 'numeric', missing: 0, missingPercent: 0, unique: 50 },
+      { name: 'Sex', type: 'categorical', missing: 0, missingPercent: 0, unique: 2 },
+      { name: 'BMI', type: 'numeric', missing: 5, missingPercent: 2.5, unique: 80 },
+      { name: 'Smoking', type: 'categorical', missing: 0, missingPercent: 0, unique: 2 },
+      { name: 'SBP', type: 'numeric', missing: 0, missingPercent: 0, unique: 100 },
+    ],
+  } as any;
+}
+
+// ────────────────────────────────────────────
+// 测试 1: ConfigLoader + Zod 校验
+// ────────────────────────────────────────────
+
+function testConfigLoading() {
+  section('测试 1: ConfigLoader 加载 + Zod 校验');
+
+  try {
+    const tools = toolsRegistryLoader.get();
+    assert(!!tools, '工具注册表加载成功');
+    assert(tools.tools.length >= 7, `工具数量 >= 7（实际 ${tools.tools.length}）`);
+    assert(tools.tools.every(t => /^ST_[A-Z_]+$/.test(t.code)), '所有工具 code 格式正确 (ST_XXX)');
+    
+    const toolCodes = tools.tools.map(t => t.code);
+    assert(toolCodes.includes('ST_DESCRIPTIVE'), '包含 ST_DESCRIPTIVE');
+    assert(toolCodes.includes('ST_T_TEST_IND'), '包含 ST_T_TEST_IND');
+    assert(toolCodes.includes('ST_LOGISTIC_BINARY'), '包含 ST_LOGISTIC_BINARY');
+  } catch (e: any) {
+    assert(false, '工具注册表加载失败', e.message);
+  }
+
+  try {
+    const rules = decisionTablesLoader.get();
+    assert(!!rules, '决策表加载成功');
+    assert(rules.length >= 9, `规则数量 >= 9（实际 ${rules.length}）`);
+    assert(rules.every(r => r.id && r.goal && r.primaryTool), '所有规则含必填字段');
+    
+    const ids = rules.map(r => r.id);
+    assert(ids.includes('DESC_ANY'), '包含 DESC_ANY 兜底规则');
+    assert(ids.includes('COHORT_STUDY'), '包含 COHORT_STUDY 队列研究规则');
+  } catch (e: any) {
+    assert(false, '决策表加载失败', e.message);
+  }
+
+  try {
+    const templates = flowTemplatesLoader.get();
+    assert(!!templates, '流程模板加载成功');
+    assert(templates.templates.length >= 5, `模板数量 >= 5（实际 ${templates.templates.length}）`);
+    
+    const ids = templates.templates.map(t => t.id);
+    assert(ids.includes('standard_analysis'), '包含 standard_analysis 模板');
+    assert(ids.includes('cohort_study_standard'), '包含 cohort_study_standard 模板');
+    assert(ids.includes('descriptive_only'), '包含 descriptive_only 模板');
+  } catch (e: any) {
+    assert(false, '流程模板加载失败', e.message);
+  }
+
+  // 热更新测试
+  try {
+    const results = reloadAllConfigs();
+    assert(results.every(r => r.success), `热更新全部成功（${results.length} 个文件）`);
+  } catch (e: any) {
+    assert(false, '热更新失败', e.message);
+  }
+}
+
+// ────────────────────────────────────────────
+// 测试 2: DecisionTableService 四维匹配
+// ────────────────────────────────────────────
+
+function testDecisionTableMatching() {
+  section('测试 2: DecisionTableService 四维匹配');
+
+  // 场景 A: 两组连续变量差异比较（独立样本）→ T 检验 + Mann-Whitney fallback
+  const queryA = makeParsedQuery({
+    goal: 'comparison',
+    outcome_type: 'continuous',
+    predictor_types: ['binary'],
+    design: 'independent',
+  });
+  const matchA = decisionTableService.match(queryA);
+  assert(matchA.primaryTool === 'ST_T_TEST_IND', `场景 A: Primary = ST_T_TEST_IND（实际 ${matchA.primaryTool}）`);
+  assert(matchA.fallbackTool === 'ST_MANN_WHITNEY', `场景 A: Fallback = ST_MANN_WHITNEY（实际 ${matchA.fallbackTool}）`);
+  assert(matchA.switchCondition !== null, '场景 A: 有 switchCondition（正态性检验）');
+  assert(matchA.templateId === 'standard_analysis', `场景 A: Template = standard_analysis（实际 ${matchA.templateId}）`);
+
+  // 场景 B: 配对设计
+  const queryB = makeParsedQuery({
+    goal: 'comparison',
+    outcome_type: 'continuous',
+    predictor_types: ['binary'],
+    design: 'paired',
+  });
+  const matchB = decisionTableService.match(queryB);
+  assert(matchB.primaryTool === 'ST_T_TEST_PAIRED', `场景 B: Primary = ST_T_TEST_PAIRED（实际 ${matchB.primaryTool}）`);
+  assert(matchB.templateId === 'paired_analysis', `场景 B: Template = paired_analysis（实际 ${matchB.templateId}）`);
+
+  // 场景 C: 分类 vs 分类 → 卡方检验
+  const queryC = makeParsedQuery({
+    goal: 'comparison',
+    outcome_type: 'categorical',
+    predictor_types: ['categorical'],
+    design: 'independent',
+  });
+  const matchC = decisionTableService.match(queryC);
+  assert(matchC.primaryTool === 'ST_CHI_SQUARE', `场景 C: Primary = ST_CHI_SQUARE（实际 ${matchC.primaryTool}）`);
+
+  // 场景 D: 相关分析（连续 vs 连续）
+  const queryD = makeParsedQuery({
+    goal: 'correlation',
+    outcome_type: 'continuous',
+    predictor_types: ['continuous'],
+    design: 'independent',
+  });
+  const matchD = decisionTableService.match(queryD);
+  assert(matchD.primaryTool === 'ST_CORRELATION', `场景 D: Primary = ST_CORRELATION（实际 ${matchD.primaryTool}）`);
+
+  // 场景 E: Logistic 回归
+  const queryE = makeParsedQuery({
+    goal: 'regression',
+    outcome_type: 'binary',
+    predictor_types: ['continuous'],
+    design: 'independent',
+  });
+  const matchE = decisionTableService.match(queryE);
+  assert(matchE.primaryTool === 'ST_LOGISTIC_BINARY', `场景 E: Primary = ST_LOGISTIC_BINARY（实际 ${matchE.primaryTool}）`);
+
+  // 场景 F: 描述统计 fallback
+  const queryF = makeParsedQuery({
+    goal: 'descriptive',
+    outcome_type: null,
+    predictor_types: [],
+  });
+  const matchF = decisionTableService.match(queryF);
+  assert(matchF.primaryTool === 'ST_DESCRIPTIVE', `场景 F: Primary = ST_DESCRIPTIVE（实际 ${matchF.primaryTool}）`);
+
+  // 场景 G: 队列研究
+  const queryG = makeParsedQuery({
+    goal: 'cohort_study',
+    outcome_type: 'binary',
+    predictor_types: ['categorical'],
+    design: 'independent',
+  });
+  const matchG = decisionTableService.match(queryG);
+  assert(matchG.templateId === 'cohort_study_standard', `场景 G: Template = cohort_study_standard（实际 ${matchG.templateId}）`);
+
+  // 场景 H: 未知 goal → 应该 fallback 到描述统计
+  const queryH = makeParsedQuery({
+    goal: 'descriptive' as any, // 模拟未匹配场景
+    outcome_type: 'datetime' as any,
+    predictor_types: ['datetime' as any],
+  });
+  const matchH = decisionTableService.match(queryH);
+  assert(matchH.primaryTool === 'ST_DESCRIPTIVE', `场景 H: 无精确匹配 → Fallback ST_DESCRIPTIVE（实际 ${matchH.primaryTool}）`);
+}
+
+// ────────────────────────────────────────────
+// 测试 3: FlowTemplateService 模板填充
+// ────────────────────────────────────────────
+
+function testFlowTemplateFilling() {
+  section('测试 3: FlowTemplateService 模板填充');
+
+  // 场景 A: standard_analysis（有 fallback → 3 步）
+  const queryA = makeParsedQuery({
+    goal: 'comparison',
+    outcome_type: 'continuous',
+    predictor_types: ['binary'],
+    outcome_var: 'BP',
+    predictor_vars: ['Drug'],
+    grouping_var: 'Drug',
+  });
+  const matchA = decisionTableService.match(queryA);
+  const fillA = flowTemplateService.fill(matchA, queryA);
+  assert(fillA.steps.length === 3, `场景 A: 3 步流程（实际 ${fillA.steps.length}）`);
+  assert(fillA.steps[0].toolCode === 'ST_DESCRIPTIVE', `场景 A 步骤 1: ST_DESCRIPTIVE（实际 ${fillA.steps[0].toolCode}）`);
+  assert(fillA.steps[1].toolCode === 'ST_T_TEST_IND', `场景 A 步骤 2: ST_T_TEST_IND（实际 ${fillA.steps[1].toolCode}）`);
+  assert(fillA.steps[2].toolCode === 'ST_MANN_WHITNEY', `场景 A 步骤 3: ST_MANN_WHITNEY（实际 ${fillA.steps[2].toolCode}）`);
+  assert(fillA.steps[2].isSensitivity === true, '场景 A 步骤 3: isSensitivity = true');
+  assert(fillA.epvWarning === null, '场景 A: 无 EPV 警告');
+
+  // 场景 B: descriptive_only（无 fallback → 1 步）
+  const queryB = makeParsedQuery({
+    goal: 'descriptive',
+    outcome_type: null,
+    predictor_types: [],
+  });
+  const matchB = decisionTableService.match(queryB);
+  const fillB = flowTemplateService.fill(matchB, queryB);
+  assert(fillB.steps.length === 1, `场景 B: 1 步流程（实际 ${fillB.steps.length}）`);
+  assert(fillB.steps[0].toolCode === 'ST_DESCRIPTIVE', '场景 B: ST_DESCRIPTIVE');
+
+  // 场景 C: 队列研究 → 3 步 (Table 1/2/3)
+  const queryC = makeParsedQuery({
+    goal: 'cohort_study',
+    outcome_var: 'Event',
+    outcome_type: 'binary',
+    predictor_vars: ['Age', 'Sex', 'BMI', 'Smoking', 'SBP'],
+    predictor_types: ['continuous', 'binary', 'continuous', 'binary', 'continuous'],
+    grouping_var: 'Drug',
+    design: 'independent',
+  });
+  const matchC = decisionTableService.match(queryC);
+  const fillC = flowTemplateService.fill(matchC, queryC);
+  assert(fillC.steps.length === 3, `场景 C: 队列研究 3 步（实际 ${fillC.steps.length}）`);
+  assert(fillC.steps.length > 0 && fillC.steps[0].name.includes('表1'), `场景 C 步骤 1: 表1（实际 "${fillC.steps[0]?.name ?? 'N/A'}"）`);
+  assert(fillC.steps.length > 1 && fillC.steps[1].name.includes('表2'), `场景 C 步骤 2: 表2（实际 "${fillC.steps[1]?.name ?? 'N/A'}"）`);
+  assert(fillC.steps.length > 2 && fillC.steps[2].name.includes('表3'), `场景 C 步骤 3: 表3（实际 "${fillC.steps[2]?.name ?? 'N/A'}"）`);
+
+  // 场景 D: EPV 截断 — 30 个事件 / 10 = 最多 3 个变量
+  const queryD = makeParsedQuery({
+    goal: 'cohort_study',
+    outcome_var: 'Event',
+    outcome_type: 'binary',
+    predictor_vars: ['Age', 'Sex', 'BMI', 'Smoking', 'SBP', 'HR', 'Chol', 'LDL'],
+    predictor_types: ['continuous', 'binary', 'continuous', 'binary', 'continuous', 'continuous', 'continuous', 'continuous'],
+    grouping_var: 'Drug',
+    design: 'independent',
+  });
+  const profileD = makeMockProfile('Event', 30); // 只有 30 个 event → max 3 vars
+  const matchD = decisionTableService.match(queryD);
+  const fillD = flowTemplateService.fill(matchD, queryD, profileD);
+
+  const table3Step = fillD.steps.find(s => s.name.includes('表3'));
+  if (table3Step) {
+    const predictors = table3Step.params.predictors as string[] | undefined;
+    if (predictors) {
+      assert(predictors.length <= 3, `场景 D EPV 截断: 自变量 <= 3（实际 ${predictors.length}，原始 8）`);
+    } else {
+      assert(false, '场景 D EPV 截断: 未找到 predictors 参数');
+    }
+  } else {
+    assert(false, '场景 D: 未找到表3 步骤');
+  }
+  assert(fillD.epvWarning !== null, `场景 D: 有 EPV 警告（${fillD.epvWarning?.substring(0, 40)}...）`);
+
+  // 场景 E: 配对分析 → 2 步（无 sensitivity）
+  const queryE = makeParsedQuery({
+    goal: 'comparison',
+    outcome_type: 'continuous',
+    predictor_types: ['binary'],
+    design: 'paired',
+    outcome_var: 'BP_after',
+    predictor_vars: ['BP_before'],
+  });
+  const matchE = decisionTableService.match(queryE);
+  const fillE = flowTemplateService.fill(matchE, queryE);
+  assert(fillE.steps.length === 2, `场景 E: 配对分析 2 步（实际 ${fillE.steps.length}）`);
+  assert(fillE.steps.every(s => !s.isSensitivity), '场景 E: 无敏感性分析步骤');
+}
+
+// ────────────────────────────────────────────
+// 测试 4: PlannedTrace 完整性
+// ────────────────────────────────────────────
+
+function testPlannedTrace() {
+  section('测试 4: PlannedTrace 数据完整性');
+
+  const query = makeParsedQuery({
+    goal: 'comparison',
+    outcome_type: 'continuous',
+    predictor_types: ['binary'],
+    design: 'independent',
+    outcome_var: 'BP',
+    predictor_vars: ['Drug'],
+    grouping_var: 'Drug',
+  });
+
+  const match = decisionTableService.match(query);
+  const fill = flowTemplateService.fill(match, query);
+
+  // PlannedTrace 应具备的信息
+  assert(match.rule.id !== '', 'PlannedTrace: matchedRule 非空');
+  assert(match.primaryTool === 'ST_T_TEST_IND', `PlannedTrace: primaryTool = ST_T_TEST_IND`);
+  assert(match.fallbackTool === 'ST_MANN_WHITNEY', `PlannedTrace: fallbackTool = ST_MANN_WHITNEY`);
+  assert(match.switchCondition !== null, 'PlannedTrace: switchCondition 非空');
+  assert(fill.templateId === 'standard_analysis', 'PlannedTrace: templateUsed = standard_analysis');
+  assert(match.matchScore > 0, `PlannedTrace: matchScore > 0（实际 ${match.matchScore}）`);
+
+  // 确认参数正确传递
+  const primaryStep = fill.steps.find(s => s.role === 'primary_test');
+  assert(!!primaryStep, 'Primary step 存在');
+  if (primaryStep) {
+    assert(primaryStep.params.group_var === 'Drug' || primaryStep.params.value_var === 'BP',
+      `Primary step 参数包含正确变量`);
+  }
+}
+
+// ────────────────────────────────────────────
+// 运行所有测试
+// ────────────────────────────────────────────
+
+console.log('\n🧪 SSA Phase P — Tracer Bullet 测试\n');
+console.log('测试范围：ConfigLoader → DecisionTable → FlowTemplate → PlannedTrace');
+console.log('依赖项：无（不需要数据库、LLM、R 引擎）\n');
+
+try {
+  testConfigLoading();
+  testDecisionTableMatching();
+  testFlowTemplateFilling();
+  testPlannedTrace();
+} catch (e: any) {
+  console.error(`\n💥 测试过程中发生未捕获异常：${e.message}`);
+  console.error(e.stack);
+  failed++;
+}
+
+// 汇总
+console.log(`\n${'═'.repeat(60)}`);
+console.log(`📊 测试结果汇总：${passed} 通过 / ${failed} 失败 / ${passed + failed} 总计`);
+if (failed === 0) {
+  console.log('🎉 全部通过！P 层 Pipeline 验证成功。');
+} else {
+  console.log(`⚠️ 有 ${failed} 个测试失败，请检查上方输出。`);
+}
+console.log('═'.repeat(60));
+
+process.exit(failed > 0 ? 1 : 0);