Files
AIclinicalresearch/backend/scripts/test-ssa-planner-pipeline.ts
HaHafeng 371e1c069c feat(ssa): Complete QPER architecture - Query, Planner, Execute, Reflection layers
Implement the full QPER intelligent analysis pipeline:

- Phase E+: Block-based standardization for all 7 R tools, DynamicReport renderer, Word export enhancement

- Phase Q: LLM intent parsing with dynamic Zod validation against real column names, ClarificationCard component, DataProfile is_id_like tagging

- Phase P: ConfigLoader with Zod schema validation and hot-reload API, DecisionTableService (4-dimension matching), FlowTemplateService with EPV protection, PlannedTrace audit output

- Phase R: ReflectionService with statistical slot injection, sensitivity analysis conflict rules, ConclusionReport with section reveal animation, conclusion caching API, graceful R error classification

End-to-end test: 40/40 passed across two complete analysis scenarios.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-21 18:15:53 +08:00

396 lines
17 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* SSA Phase P — Tracer Bullet 测试脚本
*
* 验证范围:
* 1. ConfigLoader 加载 + Zod 校验 3 个 JSON 配置文件
* 2. DecisionTableService 四维匹配6 种场景)
* 3. FlowTemplateService 模板填充 + EPV 截断
* 4. Q→P 集成mock ParsedQuery → WorkflowPlan
*
* 运行方式npx tsx scripts/test-ssa-planner-pipeline.ts
* 不依赖数据库、LLM、R 引擎
*/
import { toolsRegistryLoader, decisionTablesLoader, flowTemplatesLoader, reloadAllConfigs } from '../src/modules/ssa/config/index.js';
import { decisionTableService } from '../src/modules/ssa/services/DecisionTableService.js';
import { flowTemplateService } from '../src/modules/ssa/services/FlowTemplateService.js';
import type { ParsedQuery } from '../src/modules/ssa/types/query.types.js';
import type { DataProfile } from '../src/modules/ssa/services/DataProfileService.js';
// ────────────────────────────────────────────
// 工具函数
// ────────────────────────────────────────────
let passed = 0;
let failed = 0;
function assert(condition: boolean, testName: string, detail?: string) {
if (condition) {
console.log(`${testName}`);
passed++;
} else {
console.log(`${testName}${detail ? `${detail}` : ''}`);
failed++;
}
}
function section(title: string) {
console.log(`\n${'─'.repeat(60)}`);
console.log(`📋 ${title}`);
console.log('─'.repeat(60));
}
// ────────────────────────────────────────────
// Mock 数据
// ────────────────────────────────────────────
function makeParsedQuery(overrides: Partial<ParsedQuery>): ParsedQuery {
return {
goal: 'comparison',
outcome_var: 'BP',
outcome_type: 'continuous',
predictor_vars: ['Drug'],
predictor_types: ['binary'],
grouping_var: 'Drug',
design: 'independent',
confidence: 0.9,
reasoning: 'test',
needsClarification: false,
...overrides,
};
}
function makeMockProfile(outcomeVar: string, minEventCount: number): DataProfile {
return {
totalRows: 200,
totalColumns: 10,
columns: [
{
name: outcomeVar,
type: 'categorical',
missing: 0,
missingPercent: 0,
unique: 2,
topValues: [
{ value: '0', count: 200 - minEventCount },
{ value: '1', count: minEventCount },
],
},
{ name: 'Age', type: 'numeric', missing: 0, missingPercent: 0, unique: 50 },
{ name: 'Sex', type: 'categorical', missing: 0, missingPercent: 0, unique: 2 },
{ name: 'BMI', type: 'numeric', missing: 5, missingPercent: 2.5, unique: 80 },
{ name: 'Smoking', type: 'categorical', missing: 0, missingPercent: 0, unique: 2 },
{ name: 'SBP', type: 'numeric', missing: 0, missingPercent: 0, unique: 100 },
],
} as any;
}
// ────────────────────────────────────────────
// 测试 1: ConfigLoader + Zod 校验
// ────────────────────────────────────────────
function testConfigLoading() {
section('测试 1: ConfigLoader 加载 + Zod 校验');
try {
const tools = toolsRegistryLoader.get();
assert(!!tools, '工具注册表加载成功');
assert(tools.tools.length >= 7, `工具数量 >= 7实际 ${tools.tools.length}`);
assert(tools.tools.every(t => /^ST_[A-Z_]+$/.test(t.code)), '所有工具 code 格式正确 (ST_XXX)');
const toolCodes = tools.tools.map(t => t.code);
assert(toolCodes.includes('ST_DESCRIPTIVE'), '包含 ST_DESCRIPTIVE');
assert(toolCodes.includes('ST_T_TEST_IND'), '包含 ST_T_TEST_IND');
assert(toolCodes.includes('ST_LOGISTIC_BINARY'), '包含 ST_LOGISTIC_BINARY');
} catch (e: any) {
assert(false, '工具注册表加载失败', e.message);
}
try {
const rules = decisionTablesLoader.get();
assert(!!rules, '决策表加载成功');
assert(rules.length >= 9, `规则数量 >= 9实际 ${rules.length}`);
assert(rules.every(r => r.id && r.goal && r.primaryTool), '所有规则含必填字段');
const ids = rules.map(r => r.id);
assert(ids.includes('DESC_ANY'), '包含 DESC_ANY 兜底规则');
assert(ids.includes('COHORT_STUDY'), '包含 COHORT_STUDY 队列研究规则');
} catch (e: any) {
assert(false, '决策表加载失败', e.message);
}
try {
const templates = flowTemplatesLoader.get();
assert(!!templates, '流程模板加载成功');
assert(templates.templates.length >= 5, `模板数量 >= 5实际 ${templates.templates.length}`);
const ids = templates.templates.map(t => t.id);
assert(ids.includes('standard_analysis'), '包含 standard_analysis 模板');
assert(ids.includes('cohort_study_standard'), '包含 cohort_study_standard 模板');
assert(ids.includes('descriptive_only'), '包含 descriptive_only 模板');
} catch (e: any) {
assert(false, '流程模板加载失败', e.message);
}
// 热更新测试
try {
const results = reloadAllConfigs();
assert(results.every(r => r.success), `热更新全部成功(${results.length} 个文件)`);
} catch (e: any) {
assert(false, '热更新失败', e.message);
}
}
// ────────────────────────────────────────────
// 测试 2: DecisionTableService 四维匹配
// ────────────────────────────────────────────
function testDecisionTableMatching() {
section('测试 2: DecisionTableService 四维匹配');
// 场景 A: 两组连续变量差异比较(独立样本)→ T 检验 + Mann-Whitney fallback
const queryA = makeParsedQuery({
goal: 'comparison',
outcome_type: 'continuous',
predictor_types: ['binary'],
design: 'independent',
});
const matchA = decisionTableService.match(queryA);
assert(matchA.primaryTool === 'ST_T_TEST_IND', `场景 A: Primary = ST_T_TEST_IND实际 ${matchA.primaryTool}`);
assert(matchA.fallbackTool === 'ST_MANN_WHITNEY', `场景 A: Fallback = ST_MANN_WHITNEY实际 ${matchA.fallbackTool}`);
assert(matchA.switchCondition !== null, '场景 A: 有 switchCondition正态性检验');
assert(matchA.templateId === 'standard_analysis', `场景 A: Template = standard_analysis实际 ${matchA.templateId}`);
// 场景 B: 配对设计
const queryB = makeParsedQuery({
goal: 'comparison',
outcome_type: 'continuous',
predictor_types: ['binary'],
design: 'paired',
});
const matchB = decisionTableService.match(queryB);
assert(matchB.primaryTool === 'ST_T_TEST_PAIRED', `场景 B: Primary = ST_T_TEST_PAIRED实际 ${matchB.primaryTool}`);
assert(matchB.templateId === 'paired_analysis', `场景 B: Template = paired_analysis实际 ${matchB.templateId}`);
// 场景 C: 分类 vs 分类 → 卡方检验
const queryC = makeParsedQuery({
goal: 'comparison',
outcome_type: 'categorical',
predictor_types: ['categorical'],
design: 'independent',
});
const matchC = decisionTableService.match(queryC);
assert(matchC.primaryTool === 'ST_CHI_SQUARE', `场景 C: Primary = ST_CHI_SQUARE实际 ${matchC.primaryTool}`);
// 场景 D: 相关分析(连续 vs 连续)
const queryD = makeParsedQuery({
goal: 'correlation',
outcome_type: 'continuous',
predictor_types: ['continuous'],
design: 'independent',
});
const matchD = decisionTableService.match(queryD);
assert(matchD.primaryTool === 'ST_CORRELATION', `场景 D: Primary = ST_CORRELATION实际 ${matchD.primaryTool}`);
// 场景 E: Logistic 回归
const queryE = makeParsedQuery({
goal: 'regression',
outcome_type: 'binary',
predictor_types: ['continuous'],
design: 'independent',
});
const matchE = decisionTableService.match(queryE);
assert(matchE.primaryTool === 'ST_LOGISTIC_BINARY', `场景 E: Primary = ST_LOGISTIC_BINARY实际 ${matchE.primaryTool}`);
// 场景 F: 描述统计 fallback
const queryF = makeParsedQuery({
goal: 'descriptive',
outcome_type: null,
predictor_types: [],
});
const matchF = decisionTableService.match(queryF);
assert(matchF.primaryTool === 'ST_DESCRIPTIVE', `场景 F: Primary = ST_DESCRIPTIVE实际 ${matchF.primaryTool}`);
// 场景 G: 队列研究
const queryG = makeParsedQuery({
goal: 'cohort_study',
outcome_type: 'binary',
predictor_types: ['categorical'],
design: 'independent',
});
const matchG = decisionTableService.match(queryG);
assert(matchG.templateId === 'cohort_study_standard', `场景 G: Template = cohort_study_standard实际 ${matchG.templateId}`);
// 场景 H: 未知 goal → 应该 fallback 到描述统计
const queryH = makeParsedQuery({
goal: 'descriptive' as any, // 模拟未匹配场景
outcome_type: 'datetime' as any,
predictor_types: ['datetime' as any],
});
const matchH = decisionTableService.match(queryH);
assert(matchH.primaryTool === 'ST_DESCRIPTIVE', `场景 H: 无精确匹配 → Fallback ST_DESCRIPTIVE实际 ${matchH.primaryTool}`);
}
// ────────────────────────────────────────────
// 测试 3: FlowTemplateService 模板填充
// ────────────────────────────────────────────
function testFlowTemplateFilling() {
section('测试 3: FlowTemplateService 模板填充');
// 场景 A: standard_analysis有 fallback → 3 步)
const queryA = makeParsedQuery({
goal: 'comparison',
outcome_type: 'continuous',
predictor_types: ['binary'],
outcome_var: 'BP',
predictor_vars: ['Drug'],
grouping_var: 'Drug',
});
const matchA = decisionTableService.match(queryA);
const fillA = flowTemplateService.fill(matchA, queryA);
assert(fillA.steps.length === 3, `场景 A: 3 步流程(实际 ${fillA.steps.length}`);
assert(fillA.steps[0].toolCode === 'ST_DESCRIPTIVE', `场景 A 步骤 1: ST_DESCRIPTIVE实际 ${fillA.steps[0].toolCode}`);
assert(fillA.steps[1].toolCode === 'ST_T_TEST_IND', `场景 A 步骤 2: ST_T_TEST_IND实际 ${fillA.steps[1].toolCode}`);
assert(fillA.steps[2].toolCode === 'ST_MANN_WHITNEY', `场景 A 步骤 3: ST_MANN_WHITNEY实际 ${fillA.steps[2].toolCode}`);
assert(fillA.steps[2].isSensitivity === true, '场景 A 步骤 3: isSensitivity = true');
assert(fillA.epvWarning === null, '场景 A: 无 EPV 警告');
// 场景 B: descriptive_only无 fallback → 1 步)
const queryB = makeParsedQuery({
goal: 'descriptive',
outcome_type: null,
predictor_types: [],
});
const matchB = decisionTableService.match(queryB);
const fillB = flowTemplateService.fill(matchB, queryB);
assert(fillB.steps.length === 1, `场景 B: 1 步流程(实际 ${fillB.steps.length}`);
assert(fillB.steps[0].toolCode === 'ST_DESCRIPTIVE', '场景 B: ST_DESCRIPTIVE');
// 场景 C: 队列研究 → 3 步 (Table 1/2/3)
const queryC = makeParsedQuery({
goal: 'cohort_study',
outcome_var: 'Event',
outcome_type: 'binary',
predictor_vars: ['Age', 'Sex', 'BMI', 'Smoking', 'SBP'],
predictor_types: ['continuous', 'binary', 'continuous', 'binary', 'continuous'],
grouping_var: 'Drug',
design: 'independent',
});
const matchC = decisionTableService.match(queryC);
const fillC = flowTemplateService.fill(matchC, queryC);
assert(fillC.steps.length === 3, `场景 C: 队列研究 3 步(实际 ${fillC.steps.length}`);
assert(fillC.steps.length > 0 && fillC.steps[0].name.includes('表1'), `场景 C 步骤 1: 表1实际 "${fillC.steps[0]?.name ?? 'N/A'}"`);
assert(fillC.steps.length > 1 && fillC.steps[1].name.includes('表2'), `场景 C 步骤 2: 表2实际 "${fillC.steps[1]?.name ?? 'N/A'}"`);
assert(fillC.steps.length > 2 && fillC.steps[2].name.includes('表3'), `场景 C 步骤 3: 表3实际 "${fillC.steps[2]?.name ?? 'N/A'}"`);
// 场景 D: EPV 截断 — 30 个事件 / 10 = 最多 3 个变量
const queryD = makeParsedQuery({
goal: 'cohort_study',
outcome_var: 'Event',
outcome_type: 'binary',
predictor_vars: ['Age', 'Sex', 'BMI', 'Smoking', 'SBP', 'HR', 'Chol', 'LDL'],
predictor_types: ['continuous', 'binary', 'continuous', 'binary', 'continuous', 'continuous', 'continuous', 'continuous'],
grouping_var: 'Drug',
design: 'independent',
});
const profileD = makeMockProfile('Event', 30); // 只有 30 个 event → max 3 vars
const matchD = decisionTableService.match(queryD);
const fillD = flowTemplateService.fill(matchD, queryD, profileD);
const table3Step = fillD.steps.find(s => s.name.includes('表3'));
if (table3Step) {
const predictors = table3Step.params.predictors as string[] | undefined;
if (predictors) {
assert(predictors.length <= 3, `场景 D EPV 截断: 自变量 <= 3实际 ${predictors.length},原始 8`);
} else {
assert(false, '场景 D EPV 截断: 未找到 predictors 参数');
}
} else {
assert(false, '场景 D: 未找到表3 步骤');
}
assert(fillD.epvWarning !== null, `场景 D: 有 EPV 警告(${fillD.epvWarning?.substring(0, 40)}...`);
// 场景 E: 配对分析 → 2 步(无 sensitivity
const queryE = makeParsedQuery({
goal: 'comparison',
outcome_type: 'continuous',
predictor_types: ['binary'],
design: 'paired',
outcome_var: 'BP_after',
predictor_vars: ['BP_before'],
});
const matchE = decisionTableService.match(queryE);
const fillE = flowTemplateService.fill(matchE, queryE);
assert(fillE.steps.length === 2, `场景 E: 配对分析 2 步(实际 ${fillE.steps.length}`);
assert(fillE.steps.every(s => !s.isSensitivity), '场景 E: 无敏感性分析步骤');
}
// ────────────────────────────────────────────
// 测试 4: PlannedTrace 完整性
// ────────────────────────────────────────────
function testPlannedTrace() {
section('测试 4: PlannedTrace 数据完整性');
const query = makeParsedQuery({
goal: 'comparison',
outcome_type: 'continuous',
predictor_types: ['binary'],
design: 'independent',
outcome_var: 'BP',
predictor_vars: ['Drug'],
grouping_var: 'Drug',
});
const match = decisionTableService.match(query);
const fill = flowTemplateService.fill(match, query);
// PlannedTrace 应具备的信息
assert(match.rule.id !== '', 'PlannedTrace: matchedRule 非空');
assert(match.primaryTool === 'ST_T_TEST_IND', `PlannedTrace: primaryTool = ST_T_TEST_IND`);
assert(match.fallbackTool === 'ST_MANN_WHITNEY', `PlannedTrace: fallbackTool = ST_MANN_WHITNEY`);
assert(match.switchCondition !== null, 'PlannedTrace: switchCondition 非空');
assert(fill.templateId === 'standard_analysis', 'PlannedTrace: templateUsed = standard_analysis');
assert(match.matchScore > 0, `PlannedTrace: matchScore > 0实际 ${match.matchScore}`);
// 确认参数正确传递
const primaryStep = fill.steps.find(s => s.role === 'primary_test');
assert(!!primaryStep, 'Primary step 存在');
if (primaryStep) {
assert(primaryStep.params.group_var === 'Drug' || primaryStep.params.value_var === 'BP',
`Primary step 参数包含正确变量`);
}
}
// ────────────────────────────────────────────
// 运行所有测试
// ────────────────────────────────────────────
console.log('\n🧪 SSA Phase P — Tracer Bullet 测试\n');
console.log('测试范围ConfigLoader → DecisionTable → FlowTemplate → PlannedTrace');
console.log('依赖项不需要数据库、LLM、R 引擎)\n');
try {
testConfigLoading();
testDecisionTableMatching();
testFlowTemplateFilling();
testPlannedTrace();
} catch (e: any) {
console.error(`\n💥 测试过程中发生未捕获异常:${e.message}`);
console.error(e.stack);
failed++;
}
// 汇总
console.log(`\n${'═'.repeat(60)}`);
console.log(`📊 测试结果汇总:${passed} 通过 / ${failed} 失败 / ${passed + failed} 总计`);
if (failed === 0) {
console.log('🎉 全部通过P 层 Pipeline 验证成功。');
} else {
console.log(`⚠️ 有 ${failed} 个测试失败,请检查上方输出。`);
}
console.log('═'.repeat(60));
process.exit(failed > 0 ? 1 : 0);