feat(ssa): Complete Phase I-IV intelligent dialogue and tool system development
Phase I - Session Blackboard + READ Layer: - SessionBlackboardService with Postgres-Only cache - DataProfileService for data overview generation - PicoInferenceService for LLM-driven PICO extraction - Frontend DataContextCard and VariableDictionaryPanel - E2E tests: 31/31 passed Phase II - Conversation Layer LLM + Intent Router: - ConversationService with SSE streaming - IntentRouterService (rule-first + LLM fallback, 6 intents) - SystemPromptService with 6-segment dynamic assembly - TokenTruncationService for context management - ChatHandlerService as unified chat entry - Frontend SSAChatPane and useSSAChat hook - E2E tests: 38/38 passed Phase III - Method Consultation + AskUser Standardization: - ToolRegistryService with Repository Pattern - MethodConsultService with DecisionTable + LLM enhancement - AskUserService with global interrupt handling - Frontend AskUserCard component - E2E tests: 13/13 passed Phase IV - Dialogue-Driven Analysis + QPER Integration: - ToolOrchestratorService (plan/execute/report) - analysis_plan SSE event for WorkflowPlan transmission - Dual-channel confirmation (ask_user card + workspace button) - PICO as optional hint for LLM parsing - E2E tests: 25/25 passed R Statistics Service: - 5 new R tools: anova_one, baseline_table, fisher, linear_reg, wilcoxon - Enhanced guardrails and block helpers - Comprehensive test suite (run_all_tools_test.js) Documentation: - Updated system status document (v5.9) - Updated SSA module status and development plan (v1.8) Total E2E: 107/107 passed (Phase I: 31, Phase II: 38, Phase III: 13, Phase IV: 25) Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -19,8 +19,8 @@
|
||||
"predictorType": "binary",
|
||||
"design": "paired",
|
||||
"primaryTool": "ST_T_TEST_PAIRED",
|
||||
"fallbackTool": null,
|
||||
"switchCondition": null,
|
||||
"fallbackTool": "ST_WILCOXON",
|
||||
"switchCondition": "normality_fail: 差值 Shapiro-Wilk P<0.05 时切换 Wilcoxon 符号秩检验",
|
||||
"templateId": "paired_analysis",
|
||||
"priority": 10,
|
||||
"description": "配对设计前后对比"
|
||||
@@ -31,12 +31,12 @@
|
||||
"outcomeType": "continuous",
|
||||
"predictorType": "categorical",
|
||||
"design": "independent",
|
||||
"primaryTool": "ST_T_TEST_IND",
|
||||
"fallbackTool": "ST_MANN_WHITNEY",
|
||||
"switchCondition": "normality_fail: Shapiro-Wilk P<0.05",
|
||||
"primaryTool": "ST_ANOVA_ONE",
|
||||
"fallbackTool": "ST_ANOVA_ONE",
|
||||
"switchCondition": "normality_fail: Shapiro-Wilk P<0.05 时内部自动切换 Kruskal-Wallis",
|
||||
"templateId": "standard_analysis",
|
||||
"priority": 5,
|
||||
"description": "多组连续变量比较(暂用 T 检验处理两组场景,ANOVA 待扩展)"
|
||||
"description": "多组连续变量比较(ANOVA / Kruskal-Wallis)"
|
||||
},
|
||||
{
|
||||
"id": "DIFF_CAT_CAT_IND",
|
||||
@@ -45,12 +45,25 @@
|
||||
"predictorType": "categorical",
|
||||
"design": "independent",
|
||||
"primaryTool": "ST_CHI_SQUARE",
|
||||
"fallbackTool": "ST_CHI_SQUARE",
|
||||
"switchCondition": "expected_freq_low: 期望频数<5 时 R 内部自动切换 Fisher",
|
||||
"fallbackTool": "ST_FISHER",
|
||||
"switchCondition": "expected_freq_low: 期望频数<5 超过 20% 且为 2x2 表时切换 Fisher",
|
||||
"templateId": "standard_analysis",
|
||||
"priority": 10,
|
||||
"description": "两个分类变量的独立性检验"
|
||||
},
|
||||
{
|
||||
"id": "DIFF_CAT_CAT_SMALL",
|
||||
"goal": "comparison",
|
||||
"outcomeType": "categorical",
|
||||
"predictorType": "binary",
|
||||
"design": "independent",
|
||||
"primaryTool": "ST_FISHER",
|
||||
"fallbackTool": null,
|
||||
"switchCondition": null,
|
||||
"templateId": "standard_analysis",
|
||||
"priority": 8,
|
||||
"description": "小样本分类变量独立性检验(Fisher 精确检验)"
|
||||
},
|
||||
{
|
||||
"id": "ASSOC_CONT_CONT",
|
||||
"goal": "correlation",
|
||||
@@ -71,8 +84,8 @@
|
||||
"predictorType": "*",
|
||||
"design": "*",
|
||||
"primaryTool": "ST_CHI_SQUARE",
|
||||
"fallbackTool": "ST_CHI_SQUARE",
|
||||
"switchCondition": "expected_freq_low: 期望频数<5 时 R 内部自动切换 Fisher",
|
||||
"fallbackTool": "ST_FISHER",
|
||||
"switchCondition": "expected_freq_low: 期望频数<5 超过 20% 且为 2x2 表时切换 Fisher",
|
||||
"templateId": "standard_analysis",
|
||||
"priority": 5,
|
||||
"description": "分类变量关联分析"
|
||||
@@ -96,12 +109,12 @@
|
||||
"outcomeType": "continuous",
|
||||
"predictorType": "*",
|
||||
"design": "*",
|
||||
"primaryTool": "ST_CORRELATION",
|
||||
"primaryTool": "ST_LINEAR_REG",
|
||||
"fallbackTool": null,
|
||||
"switchCondition": null,
|
||||
"templateId": "regression_analysis",
|
||||
"priority": 5,
|
||||
"description": "连续结局的回归分析(线性回归待扩展,暂用相关分析)"
|
||||
"description": "连续结局的多因素线性回归分析"
|
||||
},
|
||||
{
|
||||
"id": "DESC_ANY",
|
||||
|
||||
@@ -45,16 +45,16 @@
|
||||
{
|
||||
"order": 1,
|
||||
"role": "baseline_table",
|
||||
"tool": "ST_DESCRIPTIVE",
|
||||
"tool": "ST_BASELINE_TABLE",
|
||||
"name": "表1: 组间基线特征比较",
|
||||
"paramsMapping": { "group_var": "{{grouping_var}}", "variables": "{{all_predictors}}" }
|
||||
"paramsMapping": { "group_var": "{{grouping_var}}", "analyze_vars": "{{all_predictors}}" }
|
||||
},
|
||||
{
|
||||
"order": 2,
|
||||
"role": "univariate_screen",
|
||||
"tool": "ST_DESCRIPTIVE",
|
||||
"tool": "ST_BASELINE_TABLE",
|
||||
"name": "表2: 结局指标单因素分析",
|
||||
"paramsMapping": { "group_var": "{{outcome_var}}", "variables": "{{all_predictors}}" }
|
||||
"paramsMapping": { "group_var": "{{outcome_var}}", "analyze_vars": "{{all_predictors}}" }
|
||||
},
|
||||
{
|
||||
"order": 3,
|
||||
|
||||
58
backend/src/modules/ssa/config/intent_rules.json
Normal file
58
backend/src/modules/ssa/config/intent_rules.json
Normal file
@@ -0,0 +1,58 @@
|
||||
{
|
||||
"rules": [
|
||||
{
|
||||
"intent": "analyze",
|
||||
"keywords": ["分析", "检验", "t检验", "卡方", "回归", "比较一下", "跑一下", "执行分析", "做个分析", "方差分析", "ANOVA", "相关分析", "logistic", "生存分析", "Cox", "基线表"],
|
||||
"excludeKeywords": ["什么方法", "用什么", "应该怎么", "推荐"],
|
||||
"requires": ["dataOverview"],
|
||||
"priority": 10
|
||||
},
|
||||
{
|
||||
"intent": "discuss",
|
||||
"keywords": ["什么意思", "说明什么", "怎么解释", "p值", "置信区间", "结果说明", "为什么显著", "为什么不显著", "临床意义", "效应量"],
|
||||
"requires": ["dataOverview", "hasAnalysisResults"],
|
||||
"priority": 9
|
||||
},
|
||||
{
|
||||
"intent": "feedback",
|
||||
"keywords": ["结果不对", "不太对", "换个方法", "重新分析", "有问题", "不满意", "重做"],
|
||||
"requires": ["dataOverview", "hasAnalysisResults"],
|
||||
"priority": 9
|
||||
},
|
||||
{
|
||||
"intent": "explore",
|
||||
"keywords": ["看看", "分布", "缺失", "概况", "有哪些变量", "数据特征", "异常值", "样本量", "描述一下数据", "多少例", "变量类型"],
|
||||
"requires": ["dataOverview"],
|
||||
"priority": 8
|
||||
},
|
||||
{
|
||||
"intent": "consult",
|
||||
"keywords": ["什么方法", "用什么", "应该怎么分析", "推荐方法", "分析方案", "哪种检验", "怎么选", "前提条件"],
|
||||
"requires": ["dataOverview"],
|
||||
"priority": 7
|
||||
}
|
||||
],
|
||||
"contextGuards": {
|
||||
"explore": {
|
||||
"requires": ["dataOverview"],
|
||||
"fallbackMessage": "您还没有上传数据。请先上传 CSV 或 Excel 文件,我就能帮您探索数据了。您也可以先问我统计方法相关的问题。"
|
||||
},
|
||||
"analyze": {
|
||||
"requires": ["dataOverview"],
|
||||
"fallbackMessage": "您还没有上传数据。请先上传数据文件,我才能帮您执行统计分析。"
|
||||
},
|
||||
"consult": {
|
||||
"requires": ["dataOverview"],
|
||||
"fallbackMessage": "如果您上传了数据,我可以根据数据特征给出更精准的方法推荐。不过您也可以直接描述研究设计,我来给出一般性建议。"
|
||||
},
|
||||
"discuss": {
|
||||
"requires": ["dataOverview", "hasAnalysisResults"],
|
||||
"fallbackMessage": "目前还没有分析结果可以讨论。请先执行一次统计分析,然后我们就可以深入讨论结果了。"
|
||||
},
|
||||
"feedback": {
|
||||
"requires": ["dataOverview", "hasAnalysisResults"],
|
||||
"fallbackMessage": "目前还没有分析结果可以改进。请先执行一次统计分析,如果对结果不满意,我来帮您调整。"
|
||||
}
|
||||
},
|
||||
"defaultIntent": "chat"
|
||||
}
|
||||
@@ -82,6 +82,65 @@
|
||||
{ "name": "confounders", "type": "string[]", "required": false, "description": "混杂因素列表" }
|
||||
],
|
||||
"outputType": "regression"
|
||||
},
|
||||
{
|
||||
"code": "ST_FISHER",
|
||||
"name": "Fisher精确检验",
|
||||
"category": "categorical",
|
||||
"description": "小样本或稀疏列联表的精确独立性检验(卡方检验的替代方法)",
|
||||
"inputParams": [
|
||||
{ "name": "var1", "type": "string", "required": true, "description": "分类变量1" },
|
||||
{ "name": "var2", "type": "string", "required": true, "description": "分类变量2" }
|
||||
],
|
||||
"outputType": "association"
|
||||
},
|
||||
{
|
||||
"code": "ST_ANOVA_ONE",
|
||||
"name": "单因素方差分析",
|
||||
"category": "parametric",
|
||||
"description": "三组及以上独立样本的均值差异比较(含事后多重比较)",
|
||||
"inputParams": [
|
||||
{ "name": "group_var", "type": "string", "required": true, "description": "分组变量(3+水平)" },
|
||||
{ "name": "value_var", "type": "string", "required": true, "description": "连续型结局变量" }
|
||||
],
|
||||
"outputType": "comparison",
|
||||
"prerequisite": "正态分布 + 方差齐性",
|
||||
"fallback": "Kruskal-Wallis"
|
||||
},
|
||||
{
|
||||
"code": "ST_WILCOXON",
|
||||
"name": "Wilcoxon符号秩检验",
|
||||
"category": "nonparametric",
|
||||
"description": "配对样本的非参数检验(配对T检验的替代方法)",
|
||||
"inputParams": [
|
||||
{ "name": "before_var", "type": "string", "required": true, "description": "前测变量" },
|
||||
{ "name": "after_var", "type": "string", "required": true, "description": "后测变量" }
|
||||
],
|
||||
"outputType": "comparison"
|
||||
},
|
||||
{
|
||||
"code": "ST_LINEAR_REG",
|
||||
"name": "线性回归",
|
||||
"category": "regression",
|
||||
"description": "连续型结局变量的多因素线性回归分析",
|
||||
"inputParams": [
|
||||
{ "name": "outcome_var", "type": "string", "required": true, "description": "连续型结局变量" },
|
||||
{ "name": "predictors", "type": "string[]", "required": true, "description": "预测变量列表" },
|
||||
{ "name": "confounders", "type": "string[]", "required": false, "description": "混杂因素列表" }
|
||||
],
|
||||
"outputType": "regression"
|
||||
},
|
||||
{
|
||||
"code": "ST_BASELINE_TABLE",
|
||||
"name": "基线特征表",
|
||||
"category": "composite",
|
||||
"description": "基于 gtsummary 的一键式基线特征表生成,自动判断变量类型、选方法、合并出表(复合工具)",
|
||||
"inputParams": [
|
||||
{ "name": "group_var", "type": "string", "required": true, "description": "分组变量" },
|
||||
{ "name": "analyze_vars", "type": "string[]", "required": false, "description": "分析变量列表(不传则自动选取全部)" }
|
||||
],
|
||||
"outputType": "baseline_table",
|
||||
"composite": true
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user