feat(ssa): Complete Phase I-IV intelligent dialogue and tool system development

Phase I - Session Blackboard + READ Layer:
- SessionBlackboardService with Postgres-Only cache
- DataProfileService for data overview generation
- PicoInferenceService for LLM-driven PICO extraction
- Frontend DataContextCard and VariableDictionaryPanel
- E2E tests: 31/31 passed

Phase II - Conversation Layer LLM + Intent Router:
- ConversationService with SSE streaming
- IntentRouterService (rule-first + LLM fallback, 6 intents)
- SystemPromptService with 6-segment dynamic assembly
- TokenTruncationService for context management
- ChatHandlerService as unified chat entry
- Frontend SSAChatPane and useSSAChat hook
- E2E tests: 38/38 passed

Phase III - Method Consultation + AskUser Standardization:
- ToolRegistryService with Repository Pattern
- MethodConsultService with DecisionTable + LLM enhancement
- AskUserService with global interrupt handling
- Frontend AskUserCard component
- E2E tests: 13/13 passed

Phase IV - Dialogue-Driven Analysis + QPER Integration:
- ToolOrchestratorService (plan/execute/report)
- analysis_plan SSE event for WorkflowPlan transmission
- Dual-channel confirmation (ask_user card + workspace button)
- PICO as optional hint for LLM parsing
- E2E tests: 25/25 passed

R Statistics Service:
- 5 new R tools: anova_one, baseline_table, fisher, linear_reg, wilcoxon
- Enhanced guardrails and block helpers
- Comprehensive test suite (run_all_tools_test.js)

Documentation:
- Updated system status document (v5.9)
- Updated SSA module status and development plan (v1.8)

Total E2E: 107/107 passed (Phase I: 31, Phase II: 38, Phase III: 13, Phase IV: 25)

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
2026-02-22 18:53:39 +08:00
parent bf10dec4c8
commit 3446909ff7
68 changed files with 11583 additions and 412 deletions

View File

@@ -19,8 +19,8 @@
"predictorType": "binary",
"design": "paired",
"primaryTool": "ST_T_TEST_PAIRED",
"fallbackTool": null,
"switchCondition": null,
"fallbackTool": "ST_WILCOXON",
"switchCondition": "normality_fail: 差值 Shapiro-Wilk P<0.05 时切换 Wilcoxon 符号秩检验",
"templateId": "paired_analysis",
"priority": 10,
"description": "配对设计前后对比"
@@ -31,12 +31,12 @@
"outcomeType": "continuous",
"predictorType": "categorical",
"design": "independent",
"primaryTool": "ST_T_TEST_IND",
"fallbackTool": "ST_MANN_WHITNEY",
"switchCondition": "normality_fail: Shapiro-Wilk P<0.05",
"primaryTool": "ST_ANOVA_ONE",
"fallbackTool": "ST_ANOVA_ONE",
"switchCondition": "normality_fail: Shapiro-Wilk P<0.05 时内部自动切换 Kruskal-Wallis",
"templateId": "standard_analysis",
"priority": 5,
"description": "多组连续变量比较(暂用 T 检验处理两组场景ANOVA 待扩展"
"description": "多组连续变量比较(ANOVA / Kruskal-Wallis"
},
{
"id": "DIFF_CAT_CAT_IND",
@@ -45,12 +45,25 @@
"predictorType": "categorical",
"design": "independent",
"primaryTool": "ST_CHI_SQUARE",
"fallbackTool": "ST_CHI_SQUARE",
"switchCondition": "expected_freq_low: 期望频数<5 时 R 内部自动切换 Fisher",
"fallbackTool": "ST_FISHER",
"switchCondition": "expected_freq_low: 期望频数<5 超过 20% 且为 2x2 表时切换 Fisher",
"templateId": "standard_analysis",
"priority": 10,
"description": "两个分类变量的独立性检验"
},
{
"id": "DIFF_CAT_CAT_SMALL",
"goal": "comparison",
"outcomeType": "categorical",
"predictorType": "binary",
"design": "independent",
"primaryTool": "ST_FISHER",
"fallbackTool": null,
"switchCondition": null,
"templateId": "standard_analysis",
"priority": 8,
"description": "小样本分类变量独立性检验Fisher 精确检验)"
},
{
"id": "ASSOC_CONT_CONT",
"goal": "correlation",
@@ -71,8 +84,8 @@
"predictorType": "*",
"design": "*",
"primaryTool": "ST_CHI_SQUARE",
"fallbackTool": "ST_CHI_SQUARE",
"switchCondition": "expected_freq_low: 期望频数<5 时 R 内部自动切换 Fisher",
"fallbackTool": "ST_FISHER",
"switchCondition": "expected_freq_low: 期望频数<5 超过 20% 且为 2x2 表时切换 Fisher",
"templateId": "standard_analysis",
"priority": 5,
"description": "分类变量关联分析"
@@ -96,12 +109,12 @@
"outcomeType": "continuous",
"predictorType": "*",
"design": "*",
"primaryTool": "ST_CORRELATION",
"primaryTool": "ST_LINEAR_REG",
"fallbackTool": null,
"switchCondition": null,
"templateId": "regression_analysis",
"priority": 5,
"description": "连续结局的回归分析(线性回归待扩展,暂用相关分析"
"description": "连续结局的多因素线性回归分析"
},
{
"id": "DESC_ANY",

View File

@@ -45,16 +45,16 @@
{
"order": 1,
"role": "baseline_table",
"tool": "ST_DESCRIPTIVE",
"tool": "ST_BASELINE_TABLE",
"name": "表1: 组间基线特征比较",
"paramsMapping": { "group_var": "{{grouping_var}}", "variables": "{{all_predictors}}" }
"paramsMapping": { "group_var": "{{grouping_var}}", "analyze_vars": "{{all_predictors}}" }
},
{
"order": 2,
"role": "univariate_screen",
"tool": "ST_DESCRIPTIVE",
"tool": "ST_BASELINE_TABLE",
"name": "表2: 结局指标单因素分析",
"paramsMapping": { "group_var": "{{outcome_var}}", "variables": "{{all_predictors}}" }
"paramsMapping": { "group_var": "{{outcome_var}}", "analyze_vars": "{{all_predictors}}" }
},
{
"order": 3,

View File

@@ -0,0 +1,58 @@
{
"rules": [
{
"intent": "analyze",
"keywords": ["分析", "检验", "t检验", "卡方", "回归", "比较一下", "跑一下", "执行分析", "做个分析", "方差分析", "ANOVA", "相关分析", "logistic", "生存分析", "Cox", "基线表"],
"excludeKeywords": ["什么方法", "用什么", "应该怎么", "推荐"],
"requires": ["dataOverview"],
"priority": 10
},
{
"intent": "discuss",
"keywords": ["什么意思", "说明什么", "怎么解释", "p值", "置信区间", "结果说明", "为什么显著", "为什么不显著", "临床意义", "效应量"],
"requires": ["dataOverview", "hasAnalysisResults"],
"priority": 9
},
{
"intent": "feedback",
"keywords": ["结果不对", "不太对", "换个方法", "重新分析", "有问题", "不满意", "重做"],
"requires": ["dataOverview", "hasAnalysisResults"],
"priority": 9
},
{
"intent": "explore",
"keywords": ["看看", "分布", "缺失", "概况", "有哪些变量", "数据特征", "异常值", "样本量", "描述一下数据", "多少例", "变量类型"],
"requires": ["dataOverview"],
"priority": 8
},
{
"intent": "consult",
"keywords": ["什么方法", "用什么", "应该怎么分析", "推荐方法", "分析方案", "哪种检验", "怎么选", "前提条件"],
"requires": ["dataOverview"],
"priority": 7
}
],
"contextGuards": {
"explore": {
"requires": ["dataOverview"],
"fallbackMessage": "您还没有上传数据。请先上传 CSV 或 Excel 文件,我就能帮您探索数据了。您也可以先问我统计方法相关的问题。"
},
"analyze": {
"requires": ["dataOverview"],
"fallbackMessage": "您还没有上传数据。请先上传数据文件,我才能帮您执行统计分析。"
},
"consult": {
"requires": ["dataOverview"],
"fallbackMessage": "如果您上传了数据,我可以根据数据特征给出更精准的方法推荐。不过您也可以直接描述研究设计,我来给出一般性建议。"
},
"discuss": {
"requires": ["dataOverview", "hasAnalysisResults"],
"fallbackMessage": "目前还没有分析结果可以讨论。请先执行一次统计分析,然后我们就可以深入讨论结果了。"
},
"feedback": {
"requires": ["dataOverview", "hasAnalysisResults"],
"fallbackMessage": "目前还没有分析结果可以改进。请先执行一次统计分析,如果对结果不满意,我来帮您调整。"
}
},
"defaultIntent": "chat"
}

View File

@@ -82,6 +82,65 @@
{ "name": "confounders", "type": "string[]", "required": false, "description": "混杂因素列表" }
],
"outputType": "regression"
},
{
"code": "ST_FISHER",
"name": "Fisher精确检验",
"category": "categorical",
"description": "小样本或稀疏列联表的精确独立性检验(卡方检验的替代方法)",
"inputParams": [
{ "name": "var1", "type": "string", "required": true, "description": "分类变量1" },
{ "name": "var2", "type": "string", "required": true, "description": "分类变量2" }
],
"outputType": "association"
},
{
"code": "ST_ANOVA_ONE",
"name": "单因素方差分析",
"category": "parametric",
"description": "三组及以上独立样本的均值差异比较(含事后多重比较)",
"inputParams": [
{ "name": "group_var", "type": "string", "required": true, "description": "分组变量3+水平)" },
{ "name": "value_var", "type": "string", "required": true, "description": "连续型结局变量" }
],
"outputType": "comparison",
"prerequisite": "正态分布 + 方差齐性",
"fallback": "Kruskal-Wallis"
},
{
"code": "ST_WILCOXON",
"name": "Wilcoxon符号秩检验",
"category": "nonparametric",
"description": "配对样本的非参数检验配对T检验的替代方法",
"inputParams": [
{ "name": "before_var", "type": "string", "required": true, "description": "前测变量" },
{ "name": "after_var", "type": "string", "required": true, "description": "后测变量" }
],
"outputType": "comparison"
},
{
"code": "ST_LINEAR_REG",
"name": "线性回归",
"category": "regression",
"description": "连续型结局变量的多因素线性回归分析",
"inputParams": [
{ "name": "outcome_var", "type": "string", "required": true, "description": "连续型结局变量" },
{ "name": "predictors", "type": "string[]", "required": true, "description": "预测变量列表" },
{ "name": "confounders", "type": "string[]", "required": false, "description": "混杂因素列表" }
],
"outputType": "regression"
},
{
"code": "ST_BASELINE_TABLE",
"name": "基线特征表",
"category": "composite",
"description": "基于 gtsummary 的一键式基线特征表生成,自动判断变量类型、选方法、合并出表(复合工具)",
"inputParams": [
{ "name": "group_var", "type": "string", "required": true, "description": "分组变量" },
{ "name": "analyze_vars", "type": "string[]", "required": false, "description": "分析变量列表(不传则自动选取全部)" }
],
"outputType": "baseline_table",
"composite": true
}
]
}