feat(ssa): Complete QPER architecture - Query, Planner, Execute, Reflection layers
Implement the full QPER intelligent analysis pipeline: - Phase E+: Block-based standardization for all 7 R tools, DynamicReport renderer, Word export enhancement - Phase Q: LLM intent parsing with dynamic Zod validation against real column names, ClarificationCard component, DataProfile is_id_like tagging - Phase P: ConfigLoader with Zod schema validation and hot-reload API, DecisionTableService (4-dimension matching), FlowTemplateService with EPV protection, PlannedTrace audit output - Phase R: ReflectionService with statistical slot injection, sensitivity analysis conflict rules, ConclusionReport with section reveal animation, conclusion caching API, graceful R error classification End-to-end test: 40/40 passed across two complete analysis scenarios. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
85
backend/src/modules/ssa/config/ConfigLoader.ts
Normal file
85
backend/src/modules/ssa/config/ConfigLoader.ts
Normal file
@@ -0,0 +1,85 @@
|
||||
/**
|
||||
* SSA ConfigLoader — 配置化基础设施
|
||||
*
|
||||
* 通用基类:读 JSON 文件 → Zod Schema 校验 → 内存缓存
|
||||
* 支持热更新(reload 时重新读盘 + 重新校验,失败保留旧配置)
|
||||
*
|
||||
* 核心原则第 6 条:一切业务逻辑靠读 JSON 驱动,不写死在代码中。
|
||||
*/
|
||||
|
||||
import { readFileSync } from 'fs';
|
||||
import { join } from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
import { dirname } from 'path';
|
||||
import type { ZodType } from 'zod';
|
||||
import { logger } from '../../../common/logging/index.js';
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = dirname(__filename);
|
||||
|
||||
export interface ReloadResult {
|
||||
success: boolean;
|
||||
file: string;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export class ConfigLoader<T> {
|
||||
private cache: T | null = null;
|
||||
private readonly filePath: string;
|
||||
private readonly schema: ZodType<T>;
|
||||
private readonly label: string;
|
||||
|
||||
constructor(fileName: string, schema: ZodType<T>, label: string) {
|
||||
this.filePath = join(__dirname, fileName);
|
||||
this.schema = schema;
|
||||
this.label = label;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取配置(带内存缓存,首次自动加载)
|
||||
*/
|
||||
get(): T {
|
||||
if (!this.cache) {
|
||||
this.loadFromDisk();
|
||||
}
|
||||
return this.cache!;
|
||||
}
|
||||
|
||||
/**
|
||||
* 热更新 — 从磁盘重新读取 + Zod 校验
|
||||
* 校验失败时保留旧配置,返回错误详情
|
||||
*/
|
||||
reload(): ReloadResult {
|
||||
try {
|
||||
this.loadFromDisk();
|
||||
logger.info(`[SSA:Config] ${this.label} reloaded successfully`);
|
||||
return { success: true, file: this.label };
|
||||
} catch (err: any) {
|
||||
logger.error(`[SSA:Config] ${this.label} reload failed, keeping old config`, {
|
||||
error: err.message,
|
||||
});
|
||||
return { success: false, file: this.label, error: err.message };
|
||||
}
|
||||
}
|
||||
|
||||
private loadFromDisk(): void {
|
||||
const raw = readFileSync(this.filePath, 'utf-8');
|
||||
let parsed: unknown;
|
||||
|
||||
try {
|
||||
parsed = JSON.parse(raw);
|
||||
} catch (e: any) {
|
||||
throw new Error(`${this.label}: JSON 语法错误 — ${e.message}`);
|
||||
}
|
||||
|
||||
const result = this.schema.safeParse(parsed);
|
||||
if (!result.success) {
|
||||
const issues = result.error.issues
|
||||
.map(i => ` - ${i.path.join('.')}: ${i.message}`)
|
||||
.join('\n');
|
||||
throw new Error(`${this.label}: Schema 校验失败\n${issues}`);
|
||||
}
|
||||
|
||||
this.cache = result.data;
|
||||
}
|
||||
}
|
||||
132
backend/src/modules/ssa/config/decision_tables.json
Normal file
132
backend/src/modules/ssa/config/decision_tables.json
Normal file
@@ -0,0 +1,132 @@
|
||||
[
|
||||
{
|
||||
"id": "DIFF_CONT_BIN_IND",
|
||||
"goal": "comparison",
|
||||
"outcomeType": "continuous",
|
||||
"predictorType": "binary",
|
||||
"design": "independent",
|
||||
"primaryTool": "ST_T_TEST_IND",
|
||||
"fallbackTool": "ST_MANN_WHITNEY",
|
||||
"switchCondition": "normality_fail: Shapiro-Wilk P<0.05",
|
||||
"templateId": "standard_analysis",
|
||||
"priority": 10,
|
||||
"description": "两组连续变量比较(独立样本)"
|
||||
},
|
||||
{
|
||||
"id": "DIFF_CONT_BIN_PAIRED",
|
||||
"goal": "comparison",
|
||||
"outcomeType": "continuous",
|
||||
"predictorType": "binary",
|
||||
"design": "paired",
|
||||
"primaryTool": "ST_T_TEST_PAIRED",
|
||||
"fallbackTool": null,
|
||||
"switchCondition": null,
|
||||
"templateId": "paired_analysis",
|
||||
"priority": 10,
|
||||
"description": "配对设计前后对比"
|
||||
},
|
||||
{
|
||||
"id": "DIFF_CONT_MULTI_IND",
|
||||
"goal": "comparison",
|
||||
"outcomeType": "continuous",
|
||||
"predictorType": "categorical",
|
||||
"design": "independent",
|
||||
"primaryTool": "ST_T_TEST_IND",
|
||||
"fallbackTool": "ST_MANN_WHITNEY",
|
||||
"switchCondition": "normality_fail: Shapiro-Wilk P<0.05",
|
||||
"templateId": "standard_analysis",
|
||||
"priority": 5,
|
||||
"description": "多组连续变量比较(暂用 T 检验处理两组场景,ANOVA 待扩展)"
|
||||
},
|
||||
{
|
||||
"id": "DIFF_CAT_CAT_IND",
|
||||
"goal": "comparison",
|
||||
"outcomeType": "categorical",
|
||||
"predictorType": "categorical",
|
||||
"design": "independent",
|
||||
"primaryTool": "ST_CHI_SQUARE",
|
||||
"fallbackTool": "ST_CHI_SQUARE",
|
||||
"switchCondition": "expected_freq_low: 期望频数<5 时 R 内部自动切换 Fisher",
|
||||
"templateId": "standard_analysis",
|
||||
"priority": 10,
|
||||
"description": "两个分类变量的独立性检验"
|
||||
},
|
||||
{
|
||||
"id": "ASSOC_CONT_CONT",
|
||||
"goal": "correlation",
|
||||
"outcomeType": "continuous",
|
||||
"predictorType": "continuous",
|
||||
"design": "*",
|
||||
"primaryTool": "ST_CORRELATION",
|
||||
"fallbackTool": null,
|
||||
"switchCondition": null,
|
||||
"templateId": "standard_analysis",
|
||||
"priority": 10,
|
||||
"description": "两个连续变量的相关分析(Pearson/Spearman 自动选择)"
|
||||
},
|
||||
{
|
||||
"id": "ASSOC_CAT_ANY",
|
||||
"goal": "correlation",
|
||||
"outcomeType": "categorical",
|
||||
"predictorType": "*",
|
||||
"design": "*",
|
||||
"primaryTool": "ST_CHI_SQUARE",
|
||||
"fallbackTool": "ST_CHI_SQUARE",
|
||||
"switchCondition": "expected_freq_low: 期望频数<5 时 R 内部自动切换 Fisher",
|
||||
"templateId": "standard_analysis",
|
||||
"priority": 5,
|
||||
"description": "分类变量关联分析"
|
||||
},
|
||||
{
|
||||
"id": "PRED_BIN_ANY",
|
||||
"goal": "regression",
|
||||
"outcomeType": "binary",
|
||||
"predictorType": "*",
|
||||
"design": "*",
|
||||
"primaryTool": "ST_LOGISTIC_BINARY",
|
||||
"fallbackTool": null,
|
||||
"switchCondition": null,
|
||||
"templateId": "regression_analysis",
|
||||
"priority": 10,
|
||||
"description": "二分类结局的多因素 Logistic 回归"
|
||||
},
|
||||
{
|
||||
"id": "PRED_CONT_ANY",
|
||||
"goal": "regression",
|
||||
"outcomeType": "continuous",
|
||||
"predictorType": "*",
|
||||
"design": "*",
|
||||
"primaryTool": "ST_CORRELATION",
|
||||
"fallbackTool": null,
|
||||
"switchCondition": null,
|
||||
"templateId": "regression_analysis",
|
||||
"priority": 5,
|
||||
"description": "连续结局的回归分析(线性回归待扩展,暂用相关分析)"
|
||||
},
|
||||
{
|
||||
"id": "DESC_ANY",
|
||||
"goal": "descriptive",
|
||||
"outcomeType": "*",
|
||||
"predictorType": "*",
|
||||
"design": "*",
|
||||
"primaryTool": "ST_DESCRIPTIVE",
|
||||
"fallbackTool": null,
|
||||
"switchCondition": null,
|
||||
"templateId": "descriptive_only",
|
||||
"priority": 1,
|
||||
"description": "纯描述性统计"
|
||||
},
|
||||
{
|
||||
"id": "COHORT_STUDY",
|
||||
"goal": "cohort_study",
|
||||
"outcomeType": "binary",
|
||||
"predictorType": "*",
|
||||
"design": "*",
|
||||
"primaryTool": "ST_DESCRIPTIVE",
|
||||
"fallbackTool": null,
|
||||
"switchCondition": null,
|
||||
"templateId": "cohort_study_standard",
|
||||
"priority": 20,
|
||||
"description": "队列研究全套分析(Table 1→2→3)"
|
||||
}
|
||||
]
|
||||
69
backend/src/modules/ssa/config/flow_templates.json
Normal file
69
backend/src/modules/ssa/config/flow_templates.json
Normal file
@@ -0,0 +1,69 @@
|
||||
{
|
||||
"version": "1.0.0",
|
||||
"templates": [
|
||||
{
|
||||
"id": "standard_analysis",
|
||||
"name": "标准分析流程",
|
||||
"description": "适用于差异比较、相关分析等场景的通用三步模板",
|
||||
"steps": [
|
||||
{ "order": 1, "role": "descriptive", "tool": "ST_DESCRIPTIVE", "name": "描述性统计" },
|
||||
{ "order": 2, "role": "primary_test", "tool": "{{primaryTool}}", "name": "主分析" },
|
||||
{ "order": 3, "role": "sensitivity", "tool": "{{fallbackTool}}", "name": "敏感性分析", "condition": "fallback_exists" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "paired_analysis",
|
||||
"name": "配对设计分析",
|
||||
"description": "配对设计的前后对比分析",
|
||||
"steps": [
|
||||
{ "order": 1, "role": "descriptive", "tool": "ST_DESCRIPTIVE", "name": "描述性统计" },
|
||||
{ "order": 2, "role": "primary_test", "tool": "{{primaryTool}}", "name": "配对检验" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "regression_analysis",
|
||||
"name": "回归建模",
|
||||
"description": "描述统计 + 多因素回归分析",
|
||||
"steps": [
|
||||
{ "order": 1, "role": "descriptive", "tool": "ST_DESCRIPTIVE", "name": "描述性统计" },
|
||||
{ "order": 2, "role": "primary_test", "tool": "{{primaryTool}}", "name": "多因素回归" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "descriptive_only",
|
||||
"name": "描述性统计",
|
||||
"description": "仅做数据概况分析",
|
||||
"steps": [
|
||||
{ "order": 1, "role": "descriptive", "tool": "ST_DESCRIPTIVE", "name": "描述性统计" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "cohort_study_standard",
|
||||
"name": "经典队列研究全套分析",
|
||||
"description": "覆盖 Table 1(基线比较)→ Table 2(单因素筛选)→ Table 3(多因素回归)",
|
||||
"steps": [
|
||||
{
|
||||
"order": 1,
|
||||
"role": "baseline_table",
|
||||
"tool": "ST_DESCRIPTIVE",
|
||||
"name": "表1: 组间基线特征比较",
|
||||
"paramsMapping": { "group_var": "{{grouping_var}}", "variables": "{{all_predictors}}" }
|
||||
},
|
||||
{
|
||||
"order": 2,
|
||||
"role": "univariate_screen",
|
||||
"tool": "ST_DESCRIPTIVE",
|
||||
"name": "表2: 结局指标单因素分析",
|
||||
"paramsMapping": { "group_var": "{{outcome_var}}", "variables": "{{all_predictors}}" }
|
||||
},
|
||||
{
|
||||
"order": 3,
|
||||
"role": "multivariate_reg",
|
||||
"tool": "ST_LOGISTIC_BINARY",
|
||||
"name": "表3: 多因素 Logistic 回归",
|
||||
"paramsMapping": { "outcome_var": "{{outcome_var}}", "predictors": "{{epv_capped_predictors}}" }
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
48
backend/src/modules/ssa/config/index.ts
Normal file
48
backend/src/modules/ssa/config/index.ts
Normal file
@@ -0,0 +1,48 @@
|
||||
/**
|
||||
* SSA 配置中心 — 统一管理所有领域 JSON 配置
|
||||
*
|
||||
* 每个 ConfigLoader 实例对应一个 JSON 文件 + Zod Schema。
|
||||
* 提供 reloadAll() 供热更新 API 调用。
|
||||
*/
|
||||
|
||||
import { ConfigLoader, type ReloadResult } from './ConfigLoader.js';
|
||||
import {
|
||||
ToolsRegistrySchema,
|
||||
DecisionTablesSchema,
|
||||
FlowTemplatesSchema,
|
||||
type ToolsRegistry,
|
||||
type DecisionTable,
|
||||
type FlowTemplatesConfig,
|
||||
} from './schemas.js';
|
||||
|
||||
export const toolsRegistryLoader = new ConfigLoader<ToolsRegistry>(
|
||||
'tools_registry.json',
|
||||
ToolsRegistrySchema,
|
||||
'tools_registry'
|
||||
);
|
||||
|
||||
export const decisionTablesLoader = new ConfigLoader<DecisionTable[]>(
|
||||
'decision_tables.json',
|
||||
DecisionTablesSchema,
|
||||
'decision_tables'
|
||||
);
|
||||
|
||||
export const flowTemplatesLoader = new ConfigLoader<FlowTemplatesConfig>(
|
||||
'flow_templates.json',
|
||||
FlowTemplatesSchema,
|
||||
'flow_templates'
|
||||
);
|
||||
|
||||
/**
|
||||
* 热更新所有配置文件
|
||||
* 每个文件独立校验 — 一个失败不影响其他
|
||||
*/
|
||||
export function reloadAllConfigs(): ReloadResult[] {
|
||||
return [
|
||||
toolsRegistryLoader.reload(),
|
||||
decisionTablesLoader.reload(),
|
||||
flowTemplatesLoader.reload(),
|
||||
];
|
||||
}
|
||||
|
||||
export type { ReloadResult } from './ConfigLoader.js';
|
||||
91
backend/src/modules/ssa/config/schemas.ts
Normal file
91
backend/src/modules/ssa/config/schemas.ts
Normal file
@@ -0,0 +1,91 @@
|
||||
/**
|
||||
* SSA 领域配置 Zod Schema
|
||||
*
|
||||
* 方法学团队编辑 JSON 时的拼写/结构错误在加载时立即拦截。
|
||||
* 每个 Schema 对应一个 JSON 领域文件。
|
||||
*/
|
||||
|
||||
import { z } from 'zod';
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// 1. tools_registry.json — E 层工具注册表
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
const ToolParamSchema = z.object({
|
||||
name: z.string(),
|
||||
type: z.enum(['string', 'number', 'boolean', 'string[]', 'number[]']),
|
||||
required: z.boolean().default(true),
|
||||
description: z.string().optional(),
|
||||
default: z.unknown().optional(),
|
||||
});
|
||||
|
||||
const ToolDefinitionSchema = z.object({
|
||||
code: z.string().regex(/^ST_[A-Z_]+$/, 'tool code must match ST_XXX pattern'),
|
||||
name: z.string().min(1),
|
||||
category: z.string(),
|
||||
description: z.string(),
|
||||
inputParams: z.array(ToolParamSchema),
|
||||
outputType: z.string(),
|
||||
prerequisite: z.string().optional(),
|
||||
fallback: z.string().optional(),
|
||||
});
|
||||
|
||||
export const ToolsRegistrySchema = z.object({
|
||||
version: z.string().optional(),
|
||||
tools: z.array(ToolDefinitionSchema).min(1),
|
||||
});
|
||||
|
||||
export type ToolDefinition = z.infer<typeof ToolDefinitionSchema>;
|
||||
export type ToolsRegistry = z.infer<typeof ToolsRegistrySchema>;
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// 2. decision_tables.json — P 层决策表
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
const DecisionRuleSchema = z.object({
|
||||
id: z.string(),
|
||||
goal: z.string(),
|
||||
outcomeType: z.string(),
|
||||
predictorType: z.string(),
|
||||
design: z.string(),
|
||||
primaryTool: z.string(),
|
||||
fallbackTool: z.string().nullable().default(null),
|
||||
switchCondition: z.string().nullable().default(null),
|
||||
templateId: z.string(),
|
||||
priority: z.number().default(0),
|
||||
description: z.string().optional(),
|
||||
});
|
||||
|
||||
export const DecisionTablesSchema = z.array(DecisionRuleSchema).min(1);
|
||||
|
||||
export type DecisionRule = z.infer<typeof DecisionRuleSchema>;
|
||||
export type DecisionTable = DecisionRule;
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// 3. flow_templates.json — P 层流程模板
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
const TemplateStepSchema = z.object({
|
||||
order: z.number(),
|
||||
role: z.string(),
|
||||
tool: z.string(),
|
||||
name: z.string().optional(),
|
||||
condition: z.string().optional(),
|
||||
paramsMapping: z.record(z.string(), z.string()).optional(),
|
||||
});
|
||||
|
||||
const FlowTemplateSchema = z.object({
|
||||
id: z.string(),
|
||||
name: z.string(),
|
||||
description: z.string().optional(),
|
||||
steps: z.array(TemplateStepSchema).min(1),
|
||||
});
|
||||
|
||||
export const FlowTemplatesSchema = z.object({
|
||||
version: z.string().optional(),
|
||||
templates: z.array(FlowTemplateSchema).min(1),
|
||||
});
|
||||
|
||||
export type TemplateStep = z.infer<typeof TemplateStepSchema>;
|
||||
export type FlowTemplate = z.infer<typeof FlowTemplateSchema>;
|
||||
export type FlowTemplatesConfig = z.infer<typeof FlowTemplatesSchema>;
|
||||
87
backend/src/modules/ssa/config/tools_registry.json
Normal file
87
backend/src/modules/ssa/config/tools_registry.json
Normal file
@@ -0,0 +1,87 @@
|
||||
{
|
||||
"version": "1.0.0",
|
||||
"tools": [
|
||||
{
|
||||
"code": "ST_DESCRIPTIVE",
|
||||
"name": "描述性统计",
|
||||
"category": "basic",
|
||||
"description": "数据概况、基线特征表",
|
||||
"inputParams": [
|
||||
{ "name": "variables", "type": "string[]", "required": true, "description": "分析变量列表" },
|
||||
{ "name": "group_var", "type": "string", "required": false, "description": "分组变量" }
|
||||
],
|
||||
"outputType": "summary"
|
||||
},
|
||||
{
|
||||
"code": "ST_T_TEST_IND",
|
||||
"name": "独立样本T检验",
|
||||
"category": "parametric",
|
||||
"description": "两组连续变量比较(参数方法)",
|
||||
"inputParams": [
|
||||
{ "name": "group_var", "type": "string", "required": true, "description": "分组变量(二分类)" },
|
||||
{ "name": "value_var", "type": "string", "required": true, "description": "连续型结局变量" }
|
||||
],
|
||||
"outputType": "comparison",
|
||||
"prerequisite": "正态分布",
|
||||
"fallback": "ST_MANN_WHITNEY"
|
||||
},
|
||||
{
|
||||
"code": "ST_MANN_WHITNEY",
|
||||
"name": "Mann-Whitney U检验",
|
||||
"category": "nonparametric",
|
||||
"description": "两组连续/等级变量比较(非参数方法)",
|
||||
"inputParams": [
|
||||
{ "name": "group_var", "type": "string", "required": true, "description": "分组变量(二分类)" },
|
||||
{ "name": "value_var", "type": "string", "required": true, "description": "连续型结局变量" }
|
||||
],
|
||||
"outputType": "comparison"
|
||||
},
|
||||
{
|
||||
"code": "ST_T_TEST_PAIRED",
|
||||
"name": "配对T检验",
|
||||
"category": "parametric",
|
||||
"description": "配对设计的前后对比",
|
||||
"inputParams": [
|
||||
{ "name": "before_var", "type": "string", "required": true, "description": "前测变量" },
|
||||
{ "name": "after_var", "type": "string", "required": true, "description": "后测变量" }
|
||||
],
|
||||
"outputType": "comparison"
|
||||
},
|
||||
{
|
||||
"code": "ST_CHI_SQUARE",
|
||||
"name": "卡方检验",
|
||||
"category": "categorical",
|
||||
"description": "两个分类变量的独立性检验",
|
||||
"inputParams": [
|
||||
{ "name": "var1", "type": "string", "required": true, "description": "分类变量1" },
|
||||
{ "name": "var2", "type": "string", "required": true, "description": "分类变量2" }
|
||||
],
|
||||
"outputType": "association",
|
||||
"fallback": "ST_FISHER"
|
||||
},
|
||||
{
|
||||
"code": "ST_CORRELATION",
|
||||
"name": "相关分析",
|
||||
"category": "correlation",
|
||||
"description": "Pearson/Spearman相关系数",
|
||||
"inputParams": [
|
||||
{ "name": "var_x", "type": "string", "required": true, "description": "自变量" },
|
||||
{ "name": "var_y", "type": "string", "required": true, "description": "因变量" },
|
||||
{ "name": "method", "type": "string", "required": false, "description": "auto/pearson/spearman", "default": "auto" }
|
||||
],
|
||||
"outputType": "correlation"
|
||||
},
|
||||
{
|
||||
"code": "ST_LOGISTIC_BINARY",
|
||||
"name": "二元Logistic回归",
|
||||
"category": "regression",
|
||||
"description": "二分类结局的多因素分析",
|
||||
"inputParams": [
|
||||
{ "name": "outcome_var", "type": "string", "required": true, "description": "二分类结局变量" },
|
||||
{ "name": "predictors", "type": "string[]", "required": true, "description": "预测变量列表" },
|
||||
{ "name": "confounders", "type": "string[]", "required": false, "description": "混杂因素列表" }
|
||||
],
|
||||
"outputType": "regression"
|
||||
}
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user