feat(ssa): Complete QPER architecture - Query, Planner, Execute, Reflection layers

Implement the full QPER intelligent analysis pipeline:

- Phase E+: Block-based standardization for all 7 R tools, DynamicReport renderer, Word export enhancement

- Phase Q: LLM intent parsing with dynamic Zod validation against real column names, ClarificationCard component, DataProfile is_id_like tagging

- Phase P: ConfigLoader with Zod schema validation and hot-reload API, DecisionTableService (4-dimension matching), FlowTemplateService with EPV protection, PlannedTrace audit output

- Phase R: ReflectionService with statistical slot injection, sensitivity analysis conflict rules, ConclusionReport with section reveal animation, conclusion caching API, graceful R error classification

End-to-end test: 40/40 passed across two complete analysis scenarios.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
2026-02-21 18:15:53 +08:00
parent 428a22adf2
commit 371e1c069c
73 changed files with 9242 additions and 706 deletions

View File

@@ -0,0 +1,85 @@
/**
* SSA ConfigLoader — 配置化基础设施
*
* 通用基类:读 JSON 文件 → Zod Schema 校验 → 内存缓存
* 支持热更新reload 时重新读盘 + 重新校验,失败保留旧配置)
*
* 核心原则第 6 条:一切业务逻辑靠读 JSON 驱动,不写死在代码中。
*/
import { readFileSync } from 'fs';
import { join } from 'path';
import { fileURLToPath } from 'url';
import { dirname } from 'path';
import type { ZodType } from 'zod';
import { logger } from '../../../common/logging/index.js';
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
export interface ReloadResult {
success: boolean;
file: string;
error?: string;
}
export class ConfigLoader<T> {
private cache: T | null = null;
private readonly filePath: string;
private readonly schema: ZodType<T>;
private readonly label: string;
constructor(fileName: string, schema: ZodType<T>, label: string) {
this.filePath = join(__dirname, fileName);
this.schema = schema;
this.label = label;
}
/**
* 获取配置(带内存缓存,首次自动加载)
*/
get(): T {
if (!this.cache) {
this.loadFromDisk();
}
return this.cache!;
}
/**
* 热更新 — 从磁盘重新读取 + Zod 校验
* 校验失败时保留旧配置,返回错误详情
*/
reload(): ReloadResult {
try {
this.loadFromDisk();
logger.info(`[SSA:Config] ${this.label} reloaded successfully`);
return { success: true, file: this.label };
} catch (err: any) {
logger.error(`[SSA:Config] ${this.label} reload failed, keeping old config`, {
error: err.message,
});
return { success: false, file: this.label, error: err.message };
}
}
private loadFromDisk(): void {
const raw = readFileSync(this.filePath, 'utf-8');
let parsed: unknown;
try {
parsed = JSON.parse(raw);
} catch (e: any) {
throw new Error(`${this.label}: JSON 语法错误 — ${e.message}`);
}
const result = this.schema.safeParse(parsed);
if (!result.success) {
const issues = result.error.issues
.map(i => ` - ${i.path.join('.')}: ${i.message}`)
.join('\n');
throw new Error(`${this.label}: Schema 校验失败\n${issues}`);
}
this.cache = result.data;
}
}

View File

@@ -0,0 +1,132 @@
[
{
"id": "DIFF_CONT_BIN_IND",
"goal": "comparison",
"outcomeType": "continuous",
"predictorType": "binary",
"design": "independent",
"primaryTool": "ST_T_TEST_IND",
"fallbackTool": "ST_MANN_WHITNEY",
"switchCondition": "normality_fail: Shapiro-Wilk P<0.05",
"templateId": "standard_analysis",
"priority": 10,
"description": "两组连续变量比较(独立样本)"
},
{
"id": "DIFF_CONT_BIN_PAIRED",
"goal": "comparison",
"outcomeType": "continuous",
"predictorType": "binary",
"design": "paired",
"primaryTool": "ST_T_TEST_PAIRED",
"fallbackTool": null,
"switchCondition": null,
"templateId": "paired_analysis",
"priority": 10,
"description": "配对设计前后对比"
},
{
"id": "DIFF_CONT_MULTI_IND",
"goal": "comparison",
"outcomeType": "continuous",
"predictorType": "categorical",
"design": "independent",
"primaryTool": "ST_T_TEST_IND",
"fallbackTool": "ST_MANN_WHITNEY",
"switchCondition": "normality_fail: Shapiro-Wilk P<0.05",
"templateId": "standard_analysis",
"priority": 5,
"description": "多组连续变量比较(暂用 T 检验处理两组场景ANOVA 待扩展)"
},
{
"id": "DIFF_CAT_CAT_IND",
"goal": "comparison",
"outcomeType": "categorical",
"predictorType": "categorical",
"design": "independent",
"primaryTool": "ST_CHI_SQUARE",
"fallbackTool": "ST_CHI_SQUARE",
"switchCondition": "expected_freq_low: 期望频数<5 时 R 内部自动切换 Fisher",
"templateId": "standard_analysis",
"priority": 10,
"description": "两个分类变量的独立性检验"
},
{
"id": "ASSOC_CONT_CONT",
"goal": "correlation",
"outcomeType": "continuous",
"predictorType": "continuous",
"design": "*",
"primaryTool": "ST_CORRELATION",
"fallbackTool": null,
"switchCondition": null,
"templateId": "standard_analysis",
"priority": 10,
"description": "两个连续变量的相关分析Pearson/Spearman 自动选择)"
},
{
"id": "ASSOC_CAT_ANY",
"goal": "correlation",
"outcomeType": "categorical",
"predictorType": "*",
"design": "*",
"primaryTool": "ST_CHI_SQUARE",
"fallbackTool": "ST_CHI_SQUARE",
"switchCondition": "expected_freq_low: 期望频数<5 时 R 内部自动切换 Fisher",
"templateId": "standard_analysis",
"priority": 5,
"description": "分类变量关联分析"
},
{
"id": "PRED_BIN_ANY",
"goal": "regression",
"outcomeType": "binary",
"predictorType": "*",
"design": "*",
"primaryTool": "ST_LOGISTIC_BINARY",
"fallbackTool": null,
"switchCondition": null,
"templateId": "regression_analysis",
"priority": 10,
"description": "二分类结局的多因素 Logistic 回归"
},
{
"id": "PRED_CONT_ANY",
"goal": "regression",
"outcomeType": "continuous",
"predictorType": "*",
"design": "*",
"primaryTool": "ST_CORRELATION",
"fallbackTool": null,
"switchCondition": null,
"templateId": "regression_analysis",
"priority": 5,
"description": "连续结局的回归分析(线性回归待扩展,暂用相关分析)"
},
{
"id": "DESC_ANY",
"goal": "descriptive",
"outcomeType": "*",
"predictorType": "*",
"design": "*",
"primaryTool": "ST_DESCRIPTIVE",
"fallbackTool": null,
"switchCondition": null,
"templateId": "descriptive_only",
"priority": 1,
"description": "纯描述性统计"
},
{
"id": "COHORT_STUDY",
"goal": "cohort_study",
"outcomeType": "binary",
"predictorType": "*",
"design": "*",
"primaryTool": "ST_DESCRIPTIVE",
"fallbackTool": null,
"switchCondition": null,
"templateId": "cohort_study_standard",
"priority": 20,
"description": "队列研究全套分析Table 1→2→3"
}
]

View File

@@ -0,0 +1,69 @@
{
"version": "1.0.0",
"templates": [
{
"id": "standard_analysis",
"name": "标准分析流程",
"description": "适用于差异比较、相关分析等场景的通用三步模板",
"steps": [
{ "order": 1, "role": "descriptive", "tool": "ST_DESCRIPTIVE", "name": "描述性统计" },
{ "order": 2, "role": "primary_test", "tool": "{{primaryTool}}", "name": "主分析" },
{ "order": 3, "role": "sensitivity", "tool": "{{fallbackTool}}", "name": "敏感性分析", "condition": "fallback_exists" }
]
},
{
"id": "paired_analysis",
"name": "配对设计分析",
"description": "配对设计的前后对比分析",
"steps": [
{ "order": 1, "role": "descriptive", "tool": "ST_DESCRIPTIVE", "name": "描述性统计" },
{ "order": 2, "role": "primary_test", "tool": "{{primaryTool}}", "name": "配对检验" }
]
},
{
"id": "regression_analysis",
"name": "回归建模",
"description": "描述统计 + 多因素回归分析",
"steps": [
{ "order": 1, "role": "descriptive", "tool": "ST_DESCRIPTIVE", "name": "描述性统计" },
{ "order": 2, "role": "primary_test", "tool": "{{primaryTool}}", "name": "多因素回归" }
]
},
{
"id": "descriptive_only",
"name": "描述性统计",
"description": "仅做数据概况分析",
"steps": [
{ "order": 1, "role": "descriptive", "tool": "ST_DESCRIPTIVE", "name": "描述性统计" }
]
},
{
"id": "cohort_study_standard",
"name": "经典队列研究全套分析",
"description": "覆盖 Table 1基线比较→ Table 2单因素筛选→ Table 3多因素回归",
"steps": [
{
"order": 1,
"role": "baseline_table",
"tool": "ST_DESCRIPTIVE",
"name": "表1: 组间基线特征比较",
"paramsMapping": { "group_var": "{{grouping_var}}", "variables": "{{all_predictors}}" }
},
{
"order": 2,
"role": "univariate_screen",
"tool": "ST_DESCRIPTIVE",
"name": "表2: 结局指标单因素分析",
"paramsMapping": { "group_var": "{{outcome_var}}", "variables": "{{all_predictors}}" }
},
{
"order": 3,
"role": "multivariate_reg",
"tool": "ST_LOGISTIC_BINARY",
"name": "表3: 多因素 Logistic 回归",
"paramsMapping": { "outcome_var": "{{outcome_var}}", "predictors": "{{epv_capped_predictors}}" }
}
]
}
]
}

View File

@@ -0,0 +1,48 @@
/**
* SSA 配置中心 — 统一管理所有领域 JSON 配置
*
* 每个 ConfigLoader 实例对应一个 JSON 文件 + Zod Schema。
* 提供 reloadAll() 供热更新 API 调用。
*/
import { ConfigLoader, type ReloadResult } from './ConfigLoader.js';
import {
ToolsRegistrySchema,
DecisionTablesSchema,
FlowTemplatesSchema,
type ToolsRegistry,
type DecisionTable,
type FlowTemplatesConfig,
} from './schemas.js';
export const toolsRegistryLoader = new ConfigLoader<ToolsRegistry>(
'tools_registry.json',
ToolsRegistrySchema,
'tools_registry'
);
export const decisionTablesLoader = new ConfigLoader<DecisionTable[]>(
'decision_tables.json',
DecisionTablesSchema,
'decision_tables'
);
export const flowTemplatesLoader = new ConfigLoader<FlowTemplatesConfig>(
'flow_templates.json',
FlowTemplatesSchema,
'flow_templates'
);
/**
* 热更新所有配置文件
* 每个文件独立校验 — 一个失败不影响其他
*/
export function reloadAllConfigs(): ReloadResult[] {
return [
toolsRegistryLoader.reload(),
decisionTablesLoader.reload(),
flowTemplatesLoader.reload(),
];
}
export type { ReloadResult } from './ConfigLoader.js';

View File

@@ -0,0 +1,91 @@
/**
* SSA 领域配置 Zod Schema
*
* 方法学团队编辑 JSON 时的拼写/结构错误在加载时立即拦截。
* 每个 Schema 对应一个 JSON 领域文件。
*/
import { z } from 'zod';
// ────────────────────────────────────────────
// 1. tools_registry.json — E 层工具注册表
// ────────────────────────────────────────────
const ToolParamSchema = z.object({
name: z.string(),
type: z.enum(['string', 'number', 'boolean', 'string[]', 'number[]']),
required: z.boolean().default(true),
description: z.string().optional(),
default: z.unknown().optional(),
});
const ToolDefinitionSchema = z.object({
code: z.string().regex(/^ST_[A-Z_]+$/, 'tool code must match ST_XXX pattern'),
name: z.string().min(1),
category: z.string(),
description: z.string(),
inputParams: z.array(ToolParamSchema),
outputType: z.string(),
prerequisite: z.string().optional(),
fallback: z.string().optional(),
});
export const ToolsRegistrySchema = z.object({
version: z.string().optional(),
tools: z.array(ToolDefinitionSchema).min(1),
});
export type ToolDefinition = z.infer<typeof ToolDefinitionSchema>;
export type ToolsRegistry = z.infer<typeof ToolsRegistrySchema>;
// ────────────────────────────────────────────
// 2. decision_tables.json — P 层决策表
// ────────────────────────────────────────────
const DecisionRuleSchema = z.object({
id: z.string(),
goal: z.string(),
outcomeType: z.string(),
predictorType: z.string(),
design: z.string(),
primaryTool: z.string(),
fallbackTool: z.string().nullable().default(null),
switchCondition: z.string().nullable().default(null),
templateId: z.string(),
priority: z.number().default(0),
description: z.string().optional(),
});
export const DecisionTablesSchema = z.array(DecisionRuleSchema).min(1);
export type DecisionRule = z.infer<typeof DecisionRuleSchema>;
export type DecisionTable = DecisionRule;
// ────────────────────────────────────────────
// 3. flow_templates.json — P 层流程模板
// ────────────────────────────────────────────
const TemplateStepSchema = z.object({
order: z.number(),
role: z.string(),
tool: z.string(),
name: z.string().optional(),
condition: z.string().optional(),
paramsMapping: z.record(z.string(), z.string()).optional(),
});
const FlowTemplateSchema = z.object({
id: z.string(),
name: z.string(),
description: z.string().optional(),
steps: z.array(TemplateStepSchema).min(1),
});
export const FlowTemplatesSchema = z.object({
version: z.string().optional(),
templates: z.array(FlowTemplateSchema).min(1),
});
export type TemplateStep = z.infer<typeof TemplateStepSchema>;
export type FlowTemplate = z.infer<typeof FlowTemplateSchema>;
export type FlowTemplatesConfig = z.infer<typeof FlowTemplatesSchema>;

View File

@@ -0,0 +1,87 @@
{
"version": "1.0.0",
"tools": [
{
"code": "ST_DESCRIPTIVE",
"name": "描述性统计",
"category": "basic",
"description": "数据概况、基线特征表",
"inputParams": [
{ "name": "variables", "type": "string[]", "required": true, "description": "分析变量列表" },
{ "name": "group_var", "type": "string", "required": false, "description": "分组变量" }
],
"outputType": "summary"
},
{
"code": "ST_T_TEST_IND",
"name": "独立样本T检验",
"category": "parametric",
"description": "两组连续变量比较(参数方法)",
"inputParams": [
{ "name": "group_var", "type": "string", "required": true, "description": "分组变量(二分类)" },
{ "name": "value_var", "type": "string", "required": true, "description": "连续型结局变量" }
],
"outputType": "comparison",
"prerequisite": "正态分布",
"fallback": "ST_MANN_WHITNEY"
},
{
"code": "ST_MANN_WHITNEY",
"name": "Mann-Whitney U检验",
"category": "nonparametric",
"description": "两组连续/等级变量比较(非参数方法)",
"inputParams": [
{ "name": "group_var", "type": "string", "required": true, "description": "分组变量(二分类)" },
{ "name": "value_var", "type": "string", "required": true, "description": "连续型结局变量" }
],
"outputType": "comparison"
},
{
"code": "ST_T_TEST_PAIRED",
"name": "配对T检验",
"category": "parametric",
"description": "配对设计的前后对比",
"inputParams": [
{ "name": "before_var", "type": "string", "required": true, "description": "前测变量" },
{ "name": "after_var", "type": "string", "required": true, "description": "后测变量" }
],
"outputType": "comparison"
},
{
"code": "ST_CHI_SQUARE",
"name": "卡方检验",
"category": "categorical",
"description": "两个分类变量的独立性检验",
"inputParams": [
{ "name": "var1", "type": "string", "required": true, "description": "分类变量1" },
{ "name": "var2", "type": "string", "required": true, "description": "分类变量2" }
],
"outputType": "association",
"fallback": "ST_FISHER"
},
{
"code": "ST_CORRELATION",
"name": "相关分析",
"category": "correlation",
"description": "Pearson/Spearman相关系数",
"inputParams": [
{ "name": "var_x", "type": "string", "required": true, "description": "自变量" },
{ "name": "var_y", "type": "string", "required": true, "description": "因变量" },
{ "name": "method", "type": "string", "required": false, "description": "auto/pearson/spearman", "default": "auto" }
],
"outputType": "correlation"
},
{
"code": "ST_LOGISTIC_BINARY",
"name": "二元Logistic回归",
"category": "regression",
"description": "二分类结局的多因素分析",
"inputParams": [
{ "name": "outcome_var", "type": "string", "required": true, "description": "二分类结局变量" },
{ "name": "predictors", "type": "string[]", "required": true, "description": "预测变量列表" },
{ "name": "confounders", "type": "string[]", "required": false, "description": "混杂因素列表" }
],
"outputType": "regression"
}
]
}