feat(ssa): Complete T-test end-to-end testing with 9 bug fixes - Phase 1 core 85% complete. R service: missing value auto-filter. Backend: error handling, variable matching, dynamic filename. Frontend: module activation, session isolation, error propagation. Full flow verified.
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -1,8 +1,8 @@
|
||||
# SSA-Pro R 服务开发指南
|
||||
|
||||
> **文档版本:** v1.3
|
||||
> **文档版本:** v1.5
|
||||
> **创建日期:** 2026-02-18
|
||||
> **最后更新:** 2026-02-18(纳入 V3.0 终极审查建议)
|
||||
> **最后更新:** 2026-02-18(纳入专家配置体系 + 统一入口函数)
|
||||
> **目标读者:** R 统计工程师
|
||||
|
||||
---
|
||||
@@ -15,31 +15,51 @@ r-statistics-service/
|
||||
├── renv.lock # 📌 包版本锁定文件
|
||||
├── .Rprofile # renv 初始化
|
||||
├── plumber.R # Plumber 入口
|
||||
├── tools/ # 统计工具目录
|
||||
│ ├── ST_T_TEST_IND.R
|
||||
│ ├── ST_T_TEST_PAIRED.R
|
||||
│ ├── ST_ANOVA_ONE.R
|
||||
│ └── ...
|
||||
├── templates/ # 📌 代码模板目录(glue)
|
||||
├── tools/ # 🆕 专家上传的 R 脚本(统一 run_analysis 入口)
|
||||
│ ├── t_test_ind.R # 独立样本 T 检验
|
||||
│ ├── t_test_paired.R # 配对样本 T 检验
|
||||
│ ├── anova_one.R # 单因素方差分析
|
||||
│ ├── wilcoxon.R # Wilcoxon 秩和检验
|
||||
│ └── ... # 📌 约 100 个成熟脚本
|
||||
├── templates/ # 📌 代码交付模板(glue 格式)
|
||||
│ ├── t_test.R.template
|
||||
│ ├── anova.R.template
|
||||
│ └── ...
|
||||
├── utils/
|
||||
│ ├── data_loader.R # 🆕 混合数据协议加载器
|
||||
│ ├── guardrails.R # 护栏函数库
|
||||
│ ├── data_loader.R # 混合数据协议加载器
|
||||
│ ├── guardrails.R # 🆕 护栏函数库(支持 Block/Warn/Switch)
|
||||
│ ├── code_generator.R # 代码生成工具(使用 glue)
|
||||
│ ├── result_formatter.R # 🆕 结果格式化(p_value_fmt)
|
||||
│ ├── result_formatter.R # 结果格式化(p_value_fmt)
|
||||
│ ├── interpretation.R # 🆕 结果解读(基于配置模板)
|
||||
│ └── error_codes.R # 📌 错误码定义
|
||||
├── metadata/ # 工具元数据
|
||||
│ └── tools.yaml # 所有工具定义
|
||||
├── metadata/ # 工具元数据(由配置中台管理)
|
||||
│ └── tools.yaml # 备用配置
|
||||
└── tests/
|
||||
├── test_tools.R # 单元测试
|
||||
└── fixtures/ # 🆕 标准测试数据集
|
||||
└── fixtures/ # 标准测试数据集
|
||||
├── normal_data.csv
|
||||
├── skewed_data.csv
|
||||
└── missing_data.csv
|
||||
```
|
||||
|
||||
### 1.1 🆕 专家 R 脚本规范
|
||||
|
||||
> **核心要求**:所有脚本必须使用统一入口函数 `run_analysis(input)`
|
||||
|
||||
```r
|
||||
# 文件头部注释(必填)
|
||||
#' @tool_code ST_T_TEST_IND
|
||||
#' @name 独立样本 T 检验
|
||||
#' @version 1.0.0
|
||||
#' @description 比较两组独立样本的均值差异
|
||||
#' @author 统计学专家团队
|
||||
|
||||
# 📌 统一入口函数(所有脚本必须实现)
|
||||
run_analysis <- function(input) {
|
||||
# ... 实现逻辑 ...
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 2. Dockerfile 模板
|
||||
@@ -725,25 +745,46 @@ generate_boxplot <- function(df, group_var, value_var, tmp_files_ref) {
|
||||
# 🆕 大样本优化阈值
|
||||
LARGE_SAMPLE_THRESHOLD <- 5000
|
||||
|
||||
# 正态性检验(🆕 大样本优化)
|
||||
check_normality <- function(values, alpha = 0.05) {
|
||||
# 🆕 护栏 Action 类型
|
||||
ACTION_BLOCK <- "Block" # 阻止执行
|
||||
ACTION_WARN <- "Warn" # 警告但继续
|
||||
ACTION_SWITCH <- "Switch" # 切换到备选方法
|
||||
|
||||
# 🆕 护栏检查结果结构
|
||||
# list(
|
||||
# passed = TRUE/FALSE,
|
||||
# action = "Block" | "Warn" | "Switch",
|
||||
# action_target = "ST_XXX" | NULL,
|
||||
# p_value = 0.05,
|
||||
# reason = "描述"
|
||||
# )
|
||||
|
||||
# 正态性检验(🆕 支持三种 Action)
|
||||
check_normality <- function(values, alpha = 0.05, action = ACTION_SWITCH, action_target = NULL) {
|
||||
n <- length(values)
|
||||
|
||||
# 样本量过小
|
||||
if (n < 3) {
|
||||
return(list(passed = TRUE, reason = "样本量过小,跳过正态性检验", skipped = TRUE))
|
||||
return(list(
|
||||
passed = TRUE,
|
||||
action = NULL,
|
||||
action_target = NULL,
|
||||
reason = "样本量过小,跳过正态性检验",
|
||||
skipped = TRUE
|
||||
))
|
||||
}
|
||||
|
||||
# 🆕 大样本优化:N > 5000 时使用抽样检验
|
||||
if (n > LARGE_SAMPLE_THRESHOLD) {
|
||||
# 抽取 1000 个样本进行检验
|
||||
set.seed(42) # 保证可重复性
|
||||
set.seed(42)
|
||||
sampled_values <- sample(values, 1000)
|
||||
test <- shapiro.test(sampled_values)
|
||||
passed <- test$p.value >= alpha
|
||||
|
||||
return(list(
|
||||
passed = passed,
|
||||
action = if (passed) NULL else action,
|
||||
action_target = if (passed) NULL else action_target,
|
||||
p_value = test$p.value,
|
||||
reason = glue("大样本(N={n})抽样检验,{if (passed) '满足正态性' else '不满足正态性'}"),
|
||||
sampled = TRUE,
|
||||
@@ -757,6 +798,8 @@ check_normality <- function(values, alpha = 0.05) {
|
||||
|
||||
return(list(
|
||||
passed = passed,
|
||||
action = if (passed) NULL else action,
|
||||
action_target = if (passed) NULL else action_target,
|
||||
p_value = test$p.value,
|
||||
reason = if (passed) "满足正态性" else "不满足正态性",
|
||||
sampled = FALSE
|
||||
@@ -764,7 +807,7 @@ check_normality <- function(values, alpha = 0.05) {
|
||||
}
|
||||
|
||||
# 方差齐性检验 (Levene)
|
||||
check_homogeneity <- function(df, group_var, value_var, alpha = 0.05) {
|
||||
check_homogeneity <- function(df, group_var, value_var, alpha = 0.05, action = ACTION_WARN) {
|
||||
library(car)
|
||||
|
||||
formula <- as.formula(paste(value_var, "~", group_var))
|
||||
@@ -774,20 +817,52 @@ check_homogeneity <- function(df, group_var, value_var, alpha = 0.05) {
|
||||
|
||||
return(list(
|
||||
passed = passed,
|
||||
action = if (passed) NULL else action,
|
||||
p_value = p_val,
|
||||
reason = if (passed) "方差齐性满足" else "方差不齐性"
|
||||
))
|
||||
}
|
||||
|
||||
# 样本量检验
|
||||
check_sample_size <- function(n, min_required = 30) {
|
||||
check_sample_size <- function(n, min_required = 30, action = ACTION_BLOCK) {
|
||||
passed <- n >= min_required
|
||||
return(list(
|
||||
passed = passed,
|
||||
action = if (passed) NULL else action,
|
||||
n = n,
|
||||
reason = if (passed) "样本量充足" else paste0("样本量不足, 需要至少 ", min_required)
|
||||
))
|
||||
}
|
||||
|
||||
# 🆕 执行护栏链(按 check_order 顺序执行)
|
||||
run_guardrail_chain <- function(input, guardrail_configs) {
|
||||
for (config in guardrail_configs) {
|
||||
check_func <- get(config$check_code)
|
||||
result <- do.call(check_func, list(
|
||||
input,
|
||||
action = config$action_type,
|
||||
action_target = config$action_target
|
||||
))
|
||||
|
||||
if (!result$passed) {
|
||||
if (result$action == ACTION_BLOCK) {
|
||||
return(list(
|
||||
status = "blocked",
|
||||
reason = result$reason
|
||||
))
|
||||
} else if (result$action == ACTION_SWITCH) {
|
||||
return(list(
|
||||
status = "switch",
|
||||
target_tool = result$action_target,
|
||||
reason = result$reason
|
||||
))
|
||||
}
|
||||
# WARN: 记录警告但继续
|
||||
}
|
||||
}
|
||||
|
||||
return(list(status = "passed"))
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
Reference in New Issue
Block a user