Implement the full QPER intelligent analysis pipeline: - Phase E+: Block-based standardization for all 7 R tools, DynamicReport renderer, Word export enhancement - Phase Q: LLM intent parsing with dynamic Zod validation against real column names, ClarificationCard component, DataProfile is_id_like tagging - Phase P: ConfigLoader with Zod schema validation and hot-reload API, DecisionTableService (4-dimension matching), FlowTemplateService with EPV protection, PlannedTrace audit output - Phase R: ReflectionService with statistical slot injection, sensitivity analysis conflict rules, ConclusionReport with section reveal animation, conclusion caching API, graceful R error classification End-to-end test: 40/40 passed across two complete analysis scenarios. Co-authored-by: Cursor <cursoragent@cursor.com>
396 lines
17 KiB
TypeScript
396 lines
17 KiB
TypeScript
/**
|
||
* SSA Phase P — Tracer Bullet 测试脚本
|
||
*
|
||
* 验证范围:
|
||
* 1. ConfigLoader 加载 + Zod 校验 3 个 JSON 配置文件
|
||
* 2. DecisionTableService 四维匹配(6 种场景)
|
||
* 3. FlowTemplateService 模板填充 + EPV 截断
|
||
* 4. Q→P 集成:mock ParsedQuery → WorkflowPlan
|
||
*
|
||
* 运行方式:npx tsx scripts/test-ssa-planner-pipeline.ts
|
||
* 不依赖:数据库、LLM、R 引擎
|
||
*/
|
||
|
||
import { toolsRegistryLoader, decisionTablesLoader, flowTemplatesLoader, reloadAllConfigs } from '../src/modules/ssa/config/index.js';
|
||
import { decisionTableService } from '../src/modules/ssa/services/DecisionTableService.js';
|
||
import { flowTemplateService } from '../src/modules/ssa/services/FlowTemplateService.js';
|
||
import type { ParsedQuery } from '../src/modules/ssa/types/query.types.js';
|
||
import type { DataProfile } from '../src/modules/ssa/services/DataProfileService.js';
|
||
|
||
// ────────────────────────────────────────────
|
||
// 工具函数
|
||
// ────────────────────────────────────────────
|
||
|
||
let passed = 0;
|
||
let failed = 0;
|
||
|
||
function assert(condition: boolean, testName: string, detail?: string) {
|
||
if (condition) {
|
||
console.log(` ✅ ${testName}`);
|
||
passed++;
|
||
} else {
|
||
console.log(` ❌ ${testName}${detail ? ` — ${detail}` : ''}`);
|
||
failed++;
|
||
}
|
||
}
|
||
|
||
function section(title: string) {
|
||
console.log(`\n${'─'.repeat(60)}`);
|
||
console.log(`📋 ${title}`);
|
||
console.log('─'.repeat(60));
|
||
}
|
||
|
||
// ────────────────────────────────────────────
|
||
// Mock 数据
|
||
// ────────────────────────────────────────────
|
||
|
||
function makeParsedQuery(overrides: Partial<ParsedQuery>): ParsedQuery {
|
||
return {
|
||
goal: 'comparison',
|
||
outcome_var: 'BP',
|
||
outcome_type: 'continuous',
|
||
predictor_vars: ['Drug'],
|
||
predictor_types: ['binary'],
|
||
grouping_var: 'Drug',
|
||
design: 'independent',
|
||
confidence: 0.9,
|
||
reasoning: 'test',
|
||
needsClarification: false,
|
||
...overrides,
|
||
};
|
||
}
|
||
|
||
function makeMockProfile(outcomeVar: string, minEventCount: number): DataProfile {
|
||
return {
|
||
totalRows: 200,
|
||
totalColumns: 10,
|
||
columns: [
|
||
{
|
||
name: outcomeVar,
|
||
type: 'categorical',
|
||
missing: 0,
|
||
missingPercent: 0,
|
||
unique: 2,
|
||
topValues: [
|
||
{ value: '0', count: 200 - minEventCount },
|
||
{ value: '1', count: minEventCount },
|
||
],
|
||
},
|
||
{ name: 'Age', type: 'numeric', missing: 0, missingPercent: 0, unique: 50 },
|
||
{ name: 'Sex', type: 'categorical', missing: 0, missingPercent: 0, unique: 2 },
|
||
{ name: 'BMI', type: 'numeric', missing: 5, missingPercent: 2.5, unique: 80 },
|
||
{ name: 'Smoking', type: 'categorical', missing: 0, missingPercent: 0, unique: 2 },
|
||
{ name: 'SBP', type: 'numeric', missing: 0, missingPercent: 0, unique: 100 },
|
||
],
|
||
} as any;
|
||
}
|
||
|
||
// ────────────────────────────────────────────
|
||
// 测试 1: ConfigLoader + Zod 校验
|
||
// ────────────────────────────────────────────
|
||
|
||
function testConfigLoading() {
|
||
section('测试 1: ConfigLoader 加载 + Zod 校验');
|
||
|
||
try {
|
||
const tools = toolsRegistryLoader.get();
|
||
assert(!!tools, '工具注册表加载成功');
|
||
assert(tools.tools.length >= 7, `工具数量 >= 7(实际 ${tools.tools.length})`);
|
||
assert(tools.tools.every(t => /^ST_[A-Z_]+$/.test(t.code)), '所有工具 code 格式正确 (ST_XXX)');
|
||
|
||
const toolCodes = tools.tools.map(t => t.code);
|
||
assert(toolCodes.includes('ST_DESCRIPTIVE'), '包含 ST_DESCRIPTIVE');
|
||
assert(toolCodes.includes('ST_T_TEST_IND'), '包含 ST_T_TEST_IND');
|
||
assert(toolCodes.includes('ST_LOGISTIC_BINARY'), '包含 ST_LOGISTIC_BINARY');
|
||
} catch (e: any) {
|
||
assert(false, '工具注册表加载失败', e.message);
|
||
}
|
||
|
||
try {
|
||
const rules = decisionTablesLoader.get();
|
||
assert(!!rules, '决策表加载成功');
|
||
assert(rules.length >= 9, `规则数量 >= 9(实际 ${rules.length})`);
|
||
assert(rules.every(r => r.id && r.goal && r.primaryTool), '所有规则含必填字段');
|
||
|
||
const ids = rules.map(r => r.id);
|
||
assert(ids.includes('DESC_ANY'), '包含 DESC_ANY 兜底规则');
|
||
assert(ids.includes('COHORT_STUDY'), '包含 COHORT_STUDY 队列研究规则');
|
||
} catch (e: any) {
|
||
assert(false, '决策表加载失败', e.message);
|
||
}
|
||
|
||
try {
|
||
const templates = flowTemplatesLoader.get();
|
||
assert(!!templates, '流程模板加载成功');
|
||
assert(templates.templates.length >= 5, `模板数量 >= 5(实际 ${templates.templates.length})`);
|
||
|
||
const ids = templates.templates.map(t => t.id);
|
||
assert(ids.includes('standard_analysis'), '包含 standard_analysis 模板');
|
||
assert(ids.includes('cohort_study_standard'), '包含 cohort_study_standard 模板');
|
||
assert(ids.includes('descriptive_only'), '包含 descriptive_only 模板');
|
||
} catch (e: any) {
|
||
assert(false, '流程模板加载失败', e.message);
|
||
}
|
||
|
||
// 热更新测试
|
||
try {
|
||
const results = reloadAllConfigs();
|
||
assert(results.every(r => r.success), `热更新全部成功(${results.length} 个文件)`);
|
||
} catch (e: any) {
|
||
assert(false, '热更新失败', e.message);
|
||
}
|
||
}
|
||
|
||
// ────────────────────────────────────────────
|
||
// 测试 2: DecisionTableService 四维匹配
|
||
// ────────────────────────────────────────────
|
||
|
||
function testDecisionTableMatching() {
|
||
section('测试 2: DecisionTableService 四维匹配');
|
||
|
||
// 场景 A: 两组连续变量差异比较(独立样本)→ T 检验 + Mann-Whitney fallback
|
||
const queryA = makeParsedQuery({
|
||
goal: 'comparison',
|
||
outcome_type: 'continuous',
|
||
predictor_types: ['binary'],
|
||
design: 'independent',
|
||
});
|
||
const matchA = decisionTableService.match(queryA);
|
||
assert(matchA.primaryTool === 'ST_T_TEST_IND', `场景 A: Primary = ST_T_TEST_IND(实际 ${matchA.primaryTool})`);
|
||
assert(matchA.fallbackTool === 'ST_MANN_WHITNEY', `场景 A: Fallback = ST_MANN_WHITNEY(实际 ${matchA.fallbackTool})`);
|
||
assert(matchA.switchCondition !== null, '场景 A: 有 switchCondition(正态性检验)');
|
||
assert(matchA.templateId === 'standard_analysis', `场景 A: Template = standard_analysis(实际 ${matchA.templateId})`);
|
||
|
||
// 场景 B: 配对设计
|
||
const queryB = makeParsedQuery({
|
||
goal: 'comparison',
|
||
outcome_type: 'continuous',
|
||
predictor_types: ['binary'],
|
||
design: 'paired',
|
||
});
|
||
const matchB = decisionTableService.match(queryB);
|
||
assert(matchB.primaryTool === 'ST_T_TEST_PAIRED', `场景 B: Primary = ST_T_TEST_PAIRED(实际 ${matchB.primaryTool})`);
|
||
assert(matchB.templateId === 'paired_analysis', `场景 B: Template = paired_analysis(实际 ${matchB.templateId})`);
|
||
|
||
// 场景 C: 分类 vs 分类 → 卡方检验
|
||
const queryC = makeParsedQuery({
|
||
goal: 'comparison',
|
||
outcome_type: 'categorical',
|
||
predictor_types: ['categorical'],
|
||
design: 'independent',
|
||
});
|
||
const matchC = decisionTableService.match(queryC);
|
||
assert(matchC.primaryTool === 'ST_CHI_SQUARE', `场景 C: Primary = ST_CHI_SQUARE(实际 ${matchC.primaryTool})`);
|
||
|
||
// 场景 D: 相关分析(连续 vs 连续)
|
||
const queryD = makeParsedQuery({
|
||
goal: 'correlation',
|
||
outcome_type: 'continuous',
|
||
predictor_types: ['continuous'],
|
||
design: 'independent',
|
||
});
|
||
const matchD = decisionTableService.match(queryD);
|
||
assert(matchD.primaryTool === 'ST_CORRELATION', `场景 D: Primary = ST_CORRELATION(实际 ${matchD.primaryTool})`);
|
||
|
||
// 场景 E: Logistic 回归
|
||
const queryE = makeParsedQuery({
|
||
goal: 'regression',
|
||
outcome_type: 'binary',
|
||
predictor_types: ['continuous'],
|
||
design: 'independent',
|
||
});
|
||
const matchE = decisionTableService.match(queryE);
|
||
assert(matchE.primaryTool === 'ST_LOGISTIC_BINARY', `场景 E: Primary = ST_LOGISTIC_BINARY(实际 ${matchE.primaryTool})`);
|
||
|
||
// 场景 F: 描述统计 fallback
|
||
const queryF = makeParsedQuery({
|
||
goal: 'descriptive',
|
||
outcome_type: null,
|
||
predictor_types: [],
|
||
});
|
||
const matchF = decisionTableService.match(queryF);
|
||
assert(matchF.primaryTool === 'ST_DESCRIPTIVE', `场景 F: Primary = ST_DESCRIPTIVE(实际 ${matchF.primaryTool})`);
|
||
|
||
// 场景 G: 队列研究
|
||
const queryG = makeParsedQuery({
|
||
goal: 'cohort_study',
|
||
outcome_type: 'binary',
|
||
predictor_types: ['categorical'],
|
||
design: 'independent',
|
||
});
|
||
const matchG = decisionTableService.match(queryG);
|
||
assert(matchG.templateId === 'cohort_study_standard', `场景 G: Template = cohort_study_standard(实际 ${matchG.templateId})`);
|
||
|
||
// 场景 H: 未知 goal → 应该 fallback 到描述统计
|
||
const queryH = makeParsedQuery({
|
||
goal: 'descriptive' as any, // 模拟未匹配场景
|
||
outcome_type: 'datetime' as any,
|
||
predictor_types: ['datetime' as any],
|
||
});
|
||
const matchH = decisionTableService.match(queryH);
|
||
assert(matchH.primaryTool === 'ST_DESCRIPTIVE', `场景 H: 无精确匹配 → Fallback ST_DESCRIPTIVE(实际 ${matchH.primaryTool})`);
|
||
}
|
||
|
||
// ────────────────────────────────────────────
|
||
// 测试 3: FlowTemplateService 模板填充
|
||
// ────────────────────────────────────────────
|
||
|
||
function testFlowTemplateFilling() {
|
||
section('测试 3: FlowTemplateService 模板填充');
|
||
|
||
// 场景 A: standard_analysis(有 fallback → 3 步)
|
||
const queryA = makeParsedQuery({
|
||
goal: 'comparison',
|
||
outcome_type: 'continuous',
|
||
predictor_types: ['binary'],
|
||
outcome_var: 'BP',
|
||
predictor_vars: ['Drug'],
|
||
grouping_var: 'Drug',
|
||
});
|
||
const matchA = decisionTableService.match(queryA);
|
||
const fillA = flowTemplateService.fill(matchA, queryA);
|
||
assert(fillA.steps.length === 3, `场景 A: 3 步流程(实际 ${fillA.steps.length})`);
|
||
assert(fillA.steps[0].toolCode === 'ST_DESCRIPTIVE', `场景 A 步骤 1: ST_DESCRIPTIVE(实际 ${fillA.steps[0].toolCode})`);
|
||
assert(fillA.steps[1].toolCode === 'ST_T_TEST_IND', `场景 A 步骤 2: ST_T_TEST_IND(实际 ${fillA.steps[1].toolCode})`);
|
||
assert(fillA.steps[2].toolCode === 'ST_MANN_WHITNEY', `场景 A 步骤 3: ST_MANN_WHITNEY(实际 ${fillA.steps[2].toolCode})`);
|
||
assert(fillA.steps[2].isSensitivity === true, '场景 A 步骤 3: isSensitivity = true');
|
||
assert(fillA.epvWarning === null, '场景 A: 无 EPV 警告');
|
||
|
||
// 场景 B: descriptive_only(无 fallback → 1 步)
|
||
const queryB = makeParsedQuery({
|
||
goal: 'descriptive',
|
||
outcome_type: null,
|
||
predictor_types: [],
|
||
});
|
||
const matchB = decisionTableService.match(queryB);
|
||
const fillB = flowTemplateService.fill(matchB, queryB);
|
||
assert(fillB.steps.length === 1, `场景 B: 1 步流程(实际 ${fillB.steps.length})`);
|
||
assert(fillB.steps[0].toolCode === 'ST_DESCRIPTIVE', '场景 B: ST_DESCRIPTIVE');
|
||
|
||
// 场景 C: 队列研究 → 3 步 (Table 1/2/3)
|
||
const queryC = makeParsedQuery({
|
||
goal: 'cohort_study',
|
||
outcome_var: 'Event',
|
||
outcome_type: 'binary',
|
||
predictor_vars: ['Age', 'Sex', 'BMI', 'Smoking', 'SBP'],
|
||
predictor_types: ['continuous', 'binary', 'continuous', 'binary', 'continuous'],
|
||
grouping_var: 'Drug',
|
||
design: 'independent',
|
||
});
|
||
const matchC = decisionTableService.match(queryC);
|
||
const fillC = flowTemplateService.fill(matchC, queryC);
|
||
assert(fillC.steps.length === 3, `场景 C: 队列研究 3 步(实际 ${fillC.steps.length})`);
|
||
assert(fillC.steps.length > 0 && fillC.steps[0].name.includes('表1'), `场景 C 步骤 1: 表1(实际 "${fillC.steps[0]?.name ?? 'N/A'}")`);
|
||
assert(fillC.steps.length > 1 && fillC.steps[1].name.includes('表2'), `场景 C 步骤 2: 表2(实际 "${fillC.steps[1]?.name ?? 'N/A'}")`);
|
||
assert(fillC.steps.length > 2 && fillC.steps[2].name.includes('表3'), `场景 C 步骤 3: 表3(实际 "${fillC.steps[2]?.name ?? 'N/A'}")`);
|
||
|
||
// 场景 D: EPV 截断 — 30 个事件 / 10 = 最多 3 个变量
|
||
const queryD = makeParsedQuery({
|
||
goal: 'cohort_study',
|
||
outcome_var: 'Event',
|
||
outcome_type: 'binary',
|
||
predictor_vars: ['Age', 'Sex', 'BMI', 'Smoking', 'SBP', 'HR', 'Chol', 'LDL'],
|
||
predictor_types: ['continuous', 'binary', 'continuous', 'binary', 'continuous', 'continuous', 'continuous', 'continuous'],
|
||
grouping_var: 'Drug',
|
||
design: 'independent',
|
||
});
|
||
const profileD = makeMockProfile('Event', 30); // 只有 30 个 event → max 3 vars
|
||
const matchD = decisionTableService.match(queryD);
|
||
const fillD = flowTemplateService.fill(matchD, queryD, profileD);
|
||
|
||
const table3Step = fillD.steps.find(s => s.name.includes('表3'));
|
||
if (table3Step) {
|
||
const predictors = table3Step.params.predictors as string[] | undefined;
|
||
if (predictors) {
|
||
assert(predictors.length <= 3, `场景 D EPV 截断: 自变量 <= 3(实际 ${predictors.length},原始 8)`);
|
||
} else {
|
||
assert(false, '场景 D EPV 截断: 未找到 predictors 参数');
|
||
}
|
||
} else {
|
||
assert(false, '场景 D: 未找到表3 步骤');
|
||
}
|
||
assert(fillD.epvWarning !== null, `场景 D: 有 EPV 警告(${fillD.epvWarning?.substring(0, 40)}...)`);
|
||
|
||
// 场景 E: 配对分析 → 2 步(无 sensitivity)
|
||
const queryE = makeParsedQuery({
|
||
goal: 'comparison',
|
||
outcome_type: 'continuous',
|
||
predictor_types: ['binary'],
|
||
design: 'paired',
|
||
outcome_var: 'BP_after',
|
||
predictor_vars: ['BP_before'],
|
||
});
|
||
const matchE = decisionTableService.match(queryE);
|
||
const fillE = flowTemplateService.fill(matchE, queryE);
|
||
assert(fillE.steps.length === 2, `场景 E: 配对分析 2 步(实际 ${fillE.steps.length})`);
|
||
assert(fillE.steps.every(s => !s.isSensitivity), '场景 E: 无敏感性分析步骤');
|
||
}
|
||
|
||
// ────────────────────────────────────────────
|
||
// 测试 4: PlannedTrace 完整性
|
||
// ────────────────────────────────────────────
|
||
|
||
function testPlannedTrace() {
|
||
section('测试 4: PlannedTrace 数据完整性');
|
||
|
||
const query = makeParsedQuery({
|
||
goal: 'comparison',
|
||
outcome_type: 'continuous',
|
||
predictor_types: ['binary'],
|
||
design: 'independent',
|
||
outcome_var: 'BP',
|
||
predictor_vars: ['Drug'],
|
||
grouping_var: 'Drug',
|
||
});
|
||
|
||
const match = decisionTableService.match(query);
|
||
const fill = flowTemplateService.fill(match, query);
|
||
|
||
// PlannedTrace 应具备的信息
|
||
assert(match.rule.id !== '', 'PlannedTrace: matchedRule 非空');
|
||
assert(match.primaryTool === 'ST_T_TEST_IND', `PlannedTrace: primaryTool = ST_T_TEST_IND`);
|
||
assert(match.fallbackTool === 'ST_MANN_WHITNEY', `PlannedTrace: fallbackTool = ST_MANN_WHITNEY`);
|
||
assert(match.switchCondition !== null, 'PlannedTrace: switchCondition 非空');
|
||
assert(fill.templateId === 'standard_analysis', 'PlannedTrace: templateUsed = standard_analysis');
|
||
assert(match.matchScore > 0, `PlannedTrace: matchScore > 0(实际 ${match.matchScore})`);
|
||
|
||
// 确认参数正确传递
|
||
const primaryStep = fill.steps.find(s => s.role === 'primary_test');
|
||
assert(!!primaryStep, 'Primary step 存在');
|
||
if (primaryStep) {
|
||
assert(primaryStep.params.group_var === 'Drug' || primaryStep.params.value_var === 'BP',
|
||
`Primary step 参数包含正确变量`);
|
||
}
|
||
}
|
||
|
||
// ────────────────────────────────────────────
|
||
// 运行所有测试
|
||
// ────────────────────────────────────────────
|
||
|
||
console.log('\n🧪 SSA Phase P — Tracer Bullet 测试\n');
|
||
console.log('测试范围:ConfigLoader → DecisionTable → FlowTemplate → PlannedTrace');
|
||
console.log('依赖项:无(不需要数据库、LLM、R 引擎)\n');
|
||
|
||
try {
|
||
testConfigLoading();
|
||
testDecisionTableMatching();
|
||
testFlowTemplateFilling();
|
||
testPlannedTrace();
|
||
} catch (e: any) {
|
||
console.error(`\n💥 测试过程中发生未捕获异常:${e.message}`);
|
||
console.error(e.stack);
|
||
failed++;
|
||
}
|
||
|
||
// 汇总
|
||
console.log(`\n${'═'.repeat(60)}`);
|
||
console.log(`📊 测试结果汇总:${passed} 通过 / ${failed} 失败 / ${passed + failed} 总计`);
|
||
if (failed === 0) {
|
||
console.log('🎉 全部通过!P 层 Pipeline 验证成功。');
|
||
} else {
|
||
console.log(`⚠️ 有 ${failed} 个测试失败,请检查上方输出。`);
|
||
}
|
||
console.log('═'.repeat(60));
|
||
|
||
process.exit(failed > 0 ? 1 : 0);
|