feat(ssa): Complete T-test end-to-end testing with 9 bug fixes - Phase 1 core 85% complete. R service: missing value auto-filter. Backend: error handling, variable matching, dynamic filename. Frontend: module activation, session isolation, error propagation. Full flow verified.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
2026-02-19 20:57:00 +08:00
parent 8137e3cde2
commit 49b5c37cb1
86 changed files with 21207 additions and 252 deletions

View File

@@ -1,8 +1,8 @@
# SSA-Pro R 服务开发指南
> **文档版本:** v1.3
> **文档版本:** v1.5
> **创建日期:** 2026-02-18
> **最后更新:** 2026-02-18纳入 V3.0 终极审查建议
> **最后更新:** 2026-02-18纳入专家配置体系 + 统一入口函数
> **目标读者:** R 统计工程师
---
@@ -15,31 +15,51 @@ r-statistics-service/
├── renv.lock # 📌 包版本锁定文件
├── .Rprofile # renv 初始化
├── plumber.R # Plumber 入口
├── tools/ # 统计工具目录
│ ├── ST_T_TEST_IND.R
│ ├── ST_T_TEST_PAIRED.R
│ ├── ST_ANOVA_ONE.R
── ...
├── templates/ # 📌 代码模板目录glue
├── tools/ # 🆕 专家上传的 R 脚本(统一 run_analysis 入口)
│ ├── t_test_ind.R # 独立样本 T 检验
│ ├── t_test_paired.R # 配对样本 T 检验
│ ├── anova_one.R # 单因素方差分析
── wilcoxon.R # Wilcoxon 秩和检验
│ └── ... # 📌 约 100 个成熟脚本
├── templates/ # 📌 代码交付模板glue 格式)
│ ├── t_test.R.template
│ ├── anova.R.template
│ └── ...
├── utils/
│ ├── data_loader.R # 🆕 混合数据协议加载器
│ ├── guardrails.R # 护栏函数库
│ ├── data_loader.R # 混合数据协议加载器
│ ├── guardrails.R # 🆕 护栏函数库(支持 Block/Warn/Switch
│ ├── code_generator.R # 代码生成工具(使用 glue
│ ├── result_formatter.R # 🆕 结果格式化p_value_fmt
│ ├── result_formatter.R # 结果格式化p_value_fmt
│ ├── interpretation.R # 🆕 结果解读(基于配置模板)
│ └── error_codes.R # 📌 错误码定义
├── metadata/ # 工具元数据
│ └── tools.yaml # 所有工具定义
├── metadata/ # 工具元数据(由配置中台管理)
│ └── tools.yaml # 备用配置
└── tests/
├── test_tools.R # 单元测试
└── fixtures/ # 🆕 标准测试数据集
└── fixtures/ # 标准测试数据集
├── normal_data.csv
├── skewed_data.csv
└── missing_data.csv
```
### 1.1 🆕 专家 R 脚本规范
> **核心要求**:所有脚本必须使用统一入口函数 `run_analysis(input)`
```r
# 文件头部注释(必填)
#' @tool_code ST_T_TEST_IND
#' @name 独立样本 T 检验
#' @version 1.0.0
#' @description 比较两组独立样本的均值差异
#' @author 统计学专家团队
# 📌 统一入口函数(所有脚本必须实现)
run_analysis <- function(input) {
# ... 实现逻辑 ...
}
```
---
## 2. Dockerfile 模板
@@ -725,25 +745,46 @@ generate_boxplot <- function(df, group_var, value_var, tmp_files_ref) {
# 🆕 大样本优化阈值
LARGE_SAMPLE_THRESHOLD <- 5000
# 正态性检验(🆕 大样本优化)
check_normality <- function(values, alpha = 0.05) {
# 🆕 护栏 Action 类型
ACTION_BLOCK <- "Block" # 阻止执行
ACTION_WARN <- "Warn" # 警告但继续
ACTION_SWITCH <- "Switch" # 切换到备选方法
# 🆕 护栏检查结果结构
# list(
# passed = TRUE/FALSE,
# action = "Block" | "Warn" | "Switch",
# action_target = "ST_XXX" | NULL,
# p_value = 0.05,
# reason = "描述"
# )
# 正态性检验(🆕 支持三种 Action
check_normality <- function(values, alpha = 0.05, action = ACTION_SWITCH, action_target = NULL) {
n <- length(values)
# 样本量过小
if (n < 3) {
return(list(passed = TRUE, reason = "样本量过小,跳过正态性检验", skipped = TRUE))
return(list(
passed = TRUE,
action = NULL,
action_target = NULL,
reason = "样本量过小,跳过正态性检验",
skipped = TRUE
))
}
# 🆕 大样本优化N > 5000 时使用抽样检验
if (n > LARGE_SAMPLE_THRESHOLD) {
# 抽取 1000 个样本进行检验
set.seed(42) # 保证可重复性
set.seed(42)
sampled_values <- sample(values, 1000)
test <- shapiro.test(sampled_values)
passed <- test$p.value >= alpha
return(list(
passed = passed,
action = if (passed) NULL else action,
action_target = if (passed) NULL else action_target,
p_value = test$p.value,
reason = glue("大样本(N={n})抽样检验,{if (passed) '满足正态性' else '不满足正态性'}"),
sampled = TRUE,
@@ -757,6 +798,8 @@ check_normality <- function(values, alpha = 0.05) {
return(list(
passed = passed,
action = if (passed) NULL else action,
action_target = if (passed) NULL else action_target,
p_value = test$p.value,
reason = if (passed) "满足正态性" else "不满足正态性",
sampled = FALSE
@@ -764,7 +807,7 @@ check_normality <- function(values, alpha = 0.05) {
}
# 方差齐性检验 (Levene)
check_homogeneity <- function(df, group_var, value_var, alpha = 0.05) {
check_homogeneity <- function(df, group_var, value_var, alpha = 0.05, action = ACTION_WARN) {
library(car)
formula <- as.formula(paste(value_var, "~", group_var))
@@ -774,20 +817,52 @@ check_homogeneity <- function(df, group_var, value_var, alpha = 0.05) {
return(list(
passed = passed,
action = if (passed) NULL else action,
p_value = p_val,
reason = if (passed) "方差齐性满足" else "方差不齐性"
))
}
# 样本量检验
check_sample_size <- function(n, min_required = 30) {
check_sample_size <- function(n, min_required = 30, action = ACTION_BLOCK) {
passed <- n >= min_required
return(list(
passed = passed,
action = if (passed) NULL else action,
n = n,
reason = if (passed) "样本量充足" else paste0("样本量不足, 需要至少 ", min_required)
))
}
# 🆕 执行护栏链(按 check_order 顺序执行)
run_guardrail_chain <- function(input, guardrail_configs) {
for (config in guardrail_configs) {
check_func <- get(config$check_code)
result <- do.call(check_func, list(
input,
action = config$action_type,
action_target = config$action_target
))
if (!result$passed) {
if (result$action == ACTION_BLOCK) {
return(list(
status = "blocked",
reason = result$reason
))
} else if (result$action == ACTION_SWITCH) {
return(list(
status = "switch",
target_tool = result$action_target,
reason = result$reason
))
}
# WARN: 记录警告但继续
}
}
return(list(status = "passed"))
}
```
---