feat(ssa): Complete QPER architecture - Query, Planner, Execute, Reflection layers

Implement the full QPER intelligent analysis pipeline:

- Phase E+: Block-based standardization for all 7 R tools, DynamicReport renderer, Word export enhancement

- Phase Q: LLM intent parsing with dynamic Zod validation against real column names, ClarificationCard component, DataProfile is_id_like tagging

- Phase P: ConfigLoader with Zod schema validation and hot-reload API, DecisionTableService (4-dimension matching), FlowTemplateService with EPV protection, PlannedTrace audit output

- Phase R: ReflectionService with statistical slot injection, sensitivity analysis conflict rules, ConclusionReport with section reveal animation, conclusion caching API, graceful R error classification

End-to-end test: 40/40 passed across two complete analysis scenarios.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
2026-02-21 18:15:53 +08:00
parent 428a22adf2
commit 371e1c069c
73 changed files with 9242 additions and 706 deletions

View File

@@ -0,0 +1,395 @@
/**
* SSA Phase P — Tracer Bullet 测试脚本
*
* 验证范围:
* 1. ConfigLoader 加载 + Zod 校验 3 个 JSON 配置文件
* 2. DecisionTableService 四维匹配6 种场景)
* 3. FlowTemplateService 模板填充 + EPV 截断
* 4. Q→P 集成mock ParsedQuery → WorkflowPlan
*
* 运行方式npx tsx scripts/test-ssa-planner-pipeline.ts
* 不依赖数据库、LLM、R 引擎
*/
import { toolsRegistryLoader, decisionTablesLoader, flowTemplatesLoader, reloadAllConfigs } from '../src/modules/ssa/config/index.js';
import { decisionTableService } from '../src/modules/ssa/services/DecisionTableService.js';
import { flowTemplateService } from '../src/modules/ssa/services/FlowTemplateService.js';
import type { ParsedQuery } from '../src/modules/ssa/types/query.types.js';
import type { DataProfile } from '../src/modules/ssa/services/DataProfileService.js';
// ────────────────────────────────────────────
// 工具函数
// ────────────────────────────────────────────
let passed = 0;
let failed = 0;
function assert(condition: boolean, testName: string, detail?: string) {
if (condition) {
console.log(`${testName}`);
passed++;
} else {
console.log(`${testName}${detail ? `${detail}` : ''}`);
failed++;
}
}
function section(title: string) {
console.log(`\n${'─'.repeat(60)}`);
console.log(`📋 ${title}`);
console.log('─'.repeat(60));
}
// ────────────────────────────────────────────
// Mock 数据
// ────────────────────────────────────────────
function makeParsedQuery(overrides: Partial<ParsedQuery>): ParsedQuery {
return {
goal: 'comparison',
outcome_var: 'BP',
outcome_type: 'continuous',
predictor_vars: ['Drug'],
predictor_types: ['binary'],
grouping_var: 'Drug',
design: 'independent',
confidence: 0.9,
reasoning: 'test',
needsClarification: false,
...overrides,
};
}
function makeMockProfile(outcomeVar: string, minEventCount: number): DataProfile {
return {
totalRows: 200,
totalColumns: 10,
columns: [
{
name: outcomeVar,
type: 'categorical',
missing: 0,
missingPercent: 0,
unique: 2,
topValues: [
{ value: '0', count: 200 - minEventCount },
{ value: '1', count: minEventCount },
],
},
{ name: 'Age', type: 'numeric', missing: 0, missingPercent: 0, unique: 50 },
{ name: 'Sex', type: 'categorical', missing: 0, missingPercent: 0, unique: 2 },
{ name: 'BMI', type: 'numeric', missing: 5, missingPercent: 2.5, unique: 80 },
{ name: 'Smoking', type: 'categorical', missing: 0, missingPercent: 0, unique: 2 },
{ name: 'SBP', type: 'numeric', missing: 0, missingPercent: 0, unique: 100 },
],
} as any;
}
// ────────────────────────────────────────────
// 测试 1: ConfigLoader + Zod 校验
// ────────────────────────────────────────────
function testConfigLoading() {
section('测试 1: ConfigLoader 加载 + Zod 校验');
try {
const tools = toolsRegistryLoader.get();
assert(!!tools, '工具注册表加载成功');
assert(tools.tools.length >= 7, `工具数量 >= 7实际 ${tools.tools.length}`);
assert(tools.tools.every(t => /^ST_[A-Z_]+$/.test(t.code)), '所有工具 code 格式正确 (ST_XXX)');
const toolCodes = tools.tools.map(t => t.code);
assert(toolCodes.includes('ST_DESCRIPTIVE'), '包含 ST_DESCRIPTIVE');
assert(toolCodes.includes('ST_T_TEST_IND'), '包含 ST_T_TEST_IND');
assert(toolCodes.includes('ST_LOGISTIC_BINARY'), '包含 ST_LOGISTIC_BINARY');
} catch (e: any) {
assert(false, '工具注册表加载失败', e.message);
}
try {
const rules = decisionTablesLoader.get();
assert(!!rules, '决策表加载成功');
assert(rules.length >= 9, `规则数量 >= 9实际 ${rules.length}`);
assert(rules.every(r => r.id && r.goal && r.primaryTool), '所有规则含必填字段');
const ids = rules.map(r => r.id);
assert(ids.includes('DESC_ANY'), '包含 DESC_ANY 兜底规则');
assert(ids.includes('COHORT_STUDY'), '包含 COHORT_STUDY 队列研究规则');
} catch (e: any) {
assert(false, '决策表加载失败', e.message);
}
try {
const templates = flowTemplatesLoader.get();
assert(!!templates, '流程模板加载成功');
assert(templates.templates.length >= 5, `模板数量 >= 5实际 ${templates.templates.length}`);
const ids = templates.templates.map(t => t.id);
assert(ids.includes('standard_analysis'), '包含 standard_analysis 模板');
assert(ids.includes('cohort_study_standard'), '包含 cohort_study_standard 模板');
assert(ids.includes('descriptive_only'), '包含 descriptive_only 模板');
} catch (e: any) {
assert(false, '流程模板加载失败', e.message);
}
// 热更新测试
try {
const results = reloadAllConfigs();
assert(results.every(r => r.success), `热更新全部成功(${results.length} 个文件)`);
} catch (e: any) {
assert(false, '热更新失败', e.message);
}
}
// ────────────────────────────────────────────
// 测试 2: DecisionTableService 四维匹配
// ────────────────────────────────────────────
function testDecisionTableMatching() {
section('测试 2: DecisionTableService 四维匹配');
// 场景 A: 两组连续变量差异比较(独立样本)→ T 检验 + Mann-Whitney fallback
const queryA = makeParsedQuery({
goal: 'comparison',
outcome_type: 'continuous',
predictor_types: ['binary'],
design: 'independent',
});
const matchA = decisionTableService.match(queryA);
assert(matchA.primaryTool === 'ST_T_TEST_IND', `场景 A: Primary = ST_T_TEST_IND实际 ${matchA.primaryTool}`);
assert(matchA.fallbackTool === 'ST_MANN_WHITNEY', `场景 A: Fallback = ST_MANN_WHITNEY实际 ${matchA.fallbackTool}`);
assert(matchA.switchCondition !== null, '场景 A: 有 switchCondition正态性检验');
assert(matchA.templateId === 'standard_analysis', `场景 A: Template = standard_analysis实际 ${matchA.templateId}`);
// 场景 B: 配对设计
const queryB = makeParsedQuery({
goal: 'comparison',
outcome_type: 'continuous',
predictor_types: ['binary'],
design: 'paired',
});
const matchB = decisionTableService.match(queryB);
assert(matchB.primaryTool === 'ST_T_TEST_PAIRED', `场景 B: Primary = ST_T_TEST_PAIRED实际 ${matchB.primaryTool}`);
assert(matchB.templateId === 'paired_analysis', `场景 B: Template = paired_analysis实际 ${matchB.templateId}`);
// 场景 C: 分类 vs 分类 → 卡方检验
const queryC = makeParsedQuery({
goal: 'comparison',
outcome_type: 'categorical',
predictor_types: ['categorical'],
design: 'independent',
});
const matchC = decisionTableService.match(queryC);
assert(matchC.primaryTool === 'ST_CHI_SQUARE', `场景 C: Primary = ST_CHI_SQUARE实际 ${matchC.primaryTool}`);
// 场景 D: 相关分析(连续 vs 连续)
const queryD = makeParsedQuery({
goal: 'correlation',
outcome_type: 'continuous',
predictor_types: ['continuous'],
design: 'independent',
});
const matchD = decisionTableService.match(queryD);
assert(matchD.primaryTool === 'ST_CORRELATION', `场景 D: Primary = ST_CORRELATION实际 ${matchD.primaryTool}`);
// 场景 E: Logistic 回归
const queryE = makeParsedQuery({
goal: 'regression',
outcome_type: 'binary',
predictor_types: ['continuous'],
design: 'independent',
});
const matchE = decisionTableService.match(queryE);
assert(matchE.primaryTool === 'ST_LOGISTIC_BINARY', `场景 E: Primary = ST_LOGISTIC_BINARY实际 ${matchE.primaryTool}`);
// 场景 F: 描述统计 fallback
const queryF = makeParsedQuery({
goal: 'descriptive',
outcome_type: null,
predictor_types: [],
});
const matchF = decisionTableService.match(queryF);
assert(matchF.primaryTool === 'ST_DESCRIPTIVE', `场景 F: Primary = ST_DESCRIPTIVE实际 ${matchF.primaryTool}`);
// 场景 G: 队列研究
const queryG = makeParsedQuery({
goal: 'cohort_study',
outcome_type: 'binary',
predictor_types: ['categorical'],
design: 'independent',
});
const matchG = decisionTableService.match(queryG);
assert(matchG.templateId === 'cohort_study_standard', `场景 G: Template = cohort_study_standard实际 ${matchG.templateId}`);
// 场景 H: 未知 goal → 应该 fallback 到描述统计
const queryH = makeParsedQuery({
goal: 'descriptive' as any, // 模拟未匹配场景
outcome_type: 'datetime' as any,
predictor_types: ['datetime' as any],
});
const matchH = decisionTableService.match(queryH);
assert(matchH.primaryTool === 'ST_DESCRIPTIVE', `场景 H: 无精确匹配 → Fallback ST_DESCRIPTIVE实际 ${matchH.primaryTool}`);
}
// ────────────────────────────────────────────
// 测试 3: FlowTemplateService 模板填充
// ────────────────────────────────────────────
function testFlowTemplateFilling() {
section('测试 3: FlowTemplateService 模板填充');
// 场景 A: standard_analysis有 fallback → 3 步)
const queryA = makeParsedQuery({
goal: 'comparison',
outcome_type: 'continuous',
predictor_types: ['binary'],
outcome_var: 'BP',
predictor_vars: ['Drug'],
grouping_var: 'Drug',
});
const matchA = decisionTableService.match(queryA);
const fillA = flowTemplateService.fill(matchA, queryA);
assert(fillA.steps.length === 3, `场景 A: 3 步流程(实际 ${fillA.steps.length}`);
assert(fillA.steps[0].toolCode === 'ST_DESCRIPTIVE', `场景 A 步骤 1: ST_DESCRIPTIVE实际 ${fillA.steps[0].toolCode}`);
assert(fillA.steps[1].toolCode === 'ST_T_TEST_IND', `场景 A 步骤 2: ST_T_TEST_IND实际 ${fillA.steps[1].toolCode}`);
assert(fillA.steps[2].toolCode === 'ST_MANN_WHITNEY', `场景 A 步骤 3: ST_MANN_WHITNEY实际 ${fillA.steps[2].toolCode}`);
assert(fillA.steps[2].isSensitivity === true, '场景 A 步骤 3: isSensitivity = true');
assert(fillA.epvWarning === null, '场景 A: 无 EPV 警告');
// 场景 B: descriptive_only无 fallback → 1 步)
const queryB = makeParsedQuery({
goal: 'descriptive',
outcome_type: null,
predictor_types: [],
});
const matchB = decisionTableService.match(queryB);
const fillB = flowTemplateService.fill(matchB, queryB);
assert(fillB.steps.length === 1, `场景 B: 1 步流程(实际 ${fillB.steps.length}`);
assert(fillB.steps[0].toolCode === 'ST_DESCRIPTIVE', '场景 B: ST_DESCRIPTIVE');
// 场景 C: 队列研究 → 3 步 (Table 1/2/3)
const queryC = makeParsedQuery({
goal: 'cohort_study',
outcome_var: 'Event',
outcome_type: 'binary',
predictor_vars: ['Age', 'Sex', 'BMI', 'Smoking', 'SBP'],
predictor_types: ['continuous', 'binary', 'continuous', 'binary', 'continuous'],
grouping_var: 'Drug',
design: 'independent',
});
const matchC = decisionTableService.match(queryC);
const fillC = flowTemplateService.fill(matchC, queryC);
assert(fillC.steps.length === 3, `场景 C: 队列研究 3 步(实际 ${fillC.steps.length}`);
assert(fillC.steps.length > 0 && fillC.steps[0].name.includes('表1'), `场景 C 步骤 1: 表1实际 "${fillC.steps[0]?.name ?? 'N/A'}"`);
assert(fillC.steps.length > 1 && fillC.steps[1].name.includes('表2'), `场景 C 步骤 2: 表2实际 "${fillC.steps[1]?.name ?? 'N/A'}"`);
assert(fillC.steps.length > 2 && fillC.steps[2].name.includes('表3'), `场景 C 步骤 3: 表3实际 "${fillC.steps[2]?.name ?? 'N/A'}"`);
// 场景 D: EPV 截断 — 30 个事件 / 10 = 最多 3 个变量
const queryD = makeParsedQuery({
goal: 'cohort_study',
outcome_var: 'Event',
outcome_type: 'binary',
predictor_vars: ['Age', 'Sex', 'BMI', 'Smoking', 'SBP', 'HR', 'Chol', 'LDL'],
predictor_types: ['continuous', 'binary', 'continuous', 'binary', 'continuous', 'continuous', 'continuous', 'continuous'],
grouping_var: 'Drug',
design: 'independent',
});
const profileD = makeMockProfile('Event', 30); // 只有 30 个 event → max 3 vars
const matchD = decisionTableService.match(queryD);
const fillD = flowTemplateService.fill(matchD, queryD, profileD);
const table3Step = fillD.steps.find(s => s.name.includes('表3'));
if (table3Step) {
const predictors = table3Step.params.predictors as string[] | undefined;
if (predictors) {
assert(predictors.length <= 3, `场景 D EPV 截断: 自变量 <= 3实际 ${predictors.length},原始 8`);
} else {
assert(false, '场景 D EPV 截断: 未找到 predictors 参数');
}
} else {
assert(false, '场景 D: 未找到表3 步骤');
}
assert(fillD.epvWarning !== null, `场景 D: 有 EPV 警告(${fillD.epvWarning?.substring(0, 40)}...`);
// 场景 E: 配对分析 → 2 步(无 sensitivity
const queryE = makeParsedQuery({
goal: 'comparison',
outcome_type: 'continuous',
predictor_types: ['binary'],
design: 'paired',
outcome_var: 'BP_after',
predictor_vars: ['BP_before'],
});
const matchE = decisionTableService.match(queryE);
const fillE = flowTemplateService.fill(matchE, queryE);
assert(fillE.steps.length === 2, `场景 E: 配对分析 2 步(实际 ${fillE.steps.length}`);
assert(fillE.steps.every(s => !s.isSensitivity), '场景 E: 无敏感性分析步骤');
}
// ────────────────────────────────────────────
// 测试 4: PlannedTrace 完整性
// ────────────────────────────────────────────
function testPlannedTrace() {
section('测试 4: PlannedTrace 数据完整性');
const query = makeParsedQuery({
goal: 'comparison',
outcome_type: 'continuous',
predictor_types: ['binary'],
design: 'independent',
outcome_var: 'BP',
predictor_vars: ['Drug'],
grouping_var: 'Drug',
});
const match = decisionTableService.match(query);
const fill = flowTemplateService.fill(match, query);
// PlannedTrace 应具备的信息
assert(match.rule.id !== '', 'PlannedTrace: matchedRule 非空');
assert(match.primaryTool === 'ST_T_TEST_IND', `PlannedTrace: primaryTool = ST_T_TEST_IND`);
assert(match.fallbackTool === 'ST_MANN_WHITNEY', `PlannedTrace: fallbackTool = ST_MANN_WHITNEY`);
assert(match.switchCondition !== null, 'PlannedTrace: switchCondition 非空');
assert(fill.templateId === 'standard_analysis', 'PlannedTrace: templateUsed = standard_analysis');
assert(match.matchScore > 0, `PlannedTrace: matchScore > 0实际 ${match.matchScore}`);
// 确认参数正确传递
const primaryStep = fill.steps.find(s => s.role === 'primary_test');
assert(!!primaryStep, 'Primary step 存在');
if (primaryStep) {
assert(primaryStep.params.group_var === 'Drug' || primaryStep.params.value_var === 'BP',
`Primary step 参数包含正确变量`);
}
}
// ────────────────────────────────────────────
// 运行所有测试
// ────────────────────────────────────────────
console.log('\n🧪 SSA Phase P — Tracer Bullet 测试\n');
console.log('测试范围ConfigLoader → DecisionTable → FlowTemplate → PlannedTrace');
console.log('依赖项不需要数据库、LLM、R 引擎)\n');
try {
testConfigLoading();
testDecisionTableMatching();
testFlowTemplateFilling();
testPlannedTrace();
} catch (e: any) {
console.error(`\n💥 测试过程中发生未捕获异常:${e.message}`);
console.error(e.stack);
failed++;
}
// 汇总
console.log(`\n${'═'.repeat(60)}`);
console.log(`📊 测试结果汇总:${passed} 通过 / ${failed} 失败 / ${passed + failed} 总计`);
if (failed === 0) {
console.log('🎉 全部通过P 层 Pipeline 验证成功。');
} else {
console.log(`⚠️ 有 ${failed} 个测试失败,请检查上方输出。`);
}
console.log('═'.repeat(60));
process.exit(failed > 0 ? 1 : 0);