#' @tool_code ST_T_TEST_IND #' @name 独立样本 T 检验 #' @version 1.0.0 #' @description 比较两组独立样本的均值差异 #' @author SSA-Pro Team library(glue) library(ggplot2) library(base64enc) # 统一入口函数 run_analysis <- function(input) { # ===== 初始化 ===== logs <- c() log_add <- function(msg) { logs <<- c(logs, paste0("[", Sys.time(), "] ", msg)) } tmp_files <- c() # 确保退出时清理临时文件 on.exit({ if (length(tmp_files) > 0) { unlink(tmp_files) } }, add = TRUE) # ===== 数据加载 ===== log_add("开始加载输入数据") df <- tryCatch( load_input_data(input), error = function(e) { log_add(paste("数据加载失败:", e$message)) return(NULL) } ) if (is.null(df)) { return(make_error(ERROR_CODES$E100_INTERNAL_ERROR, details = "数据加载失败")) } log_add(glue("数据加载成功: {nrow(df)} 行, {ncol(df)} 列")) p <- input$params guardrails_cfg <- input$guardrails group_var <- p$group_var value_var <- p$value_var # ===== 参数校验 ===== if (!(group_var %in% names(df))) { return(make_error(ERROR_CODES$E001_COLUMN_NOT_FOUND, col = group_var)) } if (!(value_var %in% names(df))) { return(make_error(ERROR_CODES$E001_COLUMN_NOT_FOUND, col = value_var)) } # ===== 数据清洗:移除分组变量或数值变量中的缺失值 ===== original_rows <- nrow(df) # 处理分组变量:移除 NA、空字符串、纯空白字符 df <- df[!is.na(df[[group_var]]) & trimws(as.character(df[[group_var]])) != "", ] # 处理数值变量:移除 NA df <- df[!is.na(df[[value_var]]), ] removed_rows <- original_rows - nrow(df) if (removed_rows > 0) { log_add(glue("数据清洗: 移除 {removed_rows} 行缺失值 (剩余 {nrow(df)} 行)")) } if (nrow(df) < 6) { return(make_error(ERROR_CODES$E004_SAMPLE_TOO_SMALL, n = nrow(df), min_required = 6)) } groups <- unique(df[[group_var]]) if (length(groups) != 2) { return(make_error(ERROR_CODES$E003_INSUFFICIENT_GROUPS, col = group_var, expected = 2, actual = length(groups))) } # ===== 护栏检查 ===== guardrail_results <- list() method_used <- "t.test" warnings_list <- c() # 样本量检查 g1_vals <- df[df[[group_var]] == groups[1], value_var] g2_vals <- df[df[[group_var]] == groups[2], value_var] sample_check <- check_sample_size(min(length(g1_vals), length(g2_vals)), min_required = 3, action = ACTION_BLOCK) guardrail_results <- c(guardrail_results, list(sample_check)) log_add(glue("样本量检查: {sample_check$reason}")) # 正态性检验 if (isTRUE(guardrails_cfg$check_normality)) { log_add("执行正态性检验") for (g in groups) { vals <- df[df[[group_var]] == g, value_var] norm_check <- check_normality(vals, alpha = 0.05, action = ACTION_SWITCH, action_target = "ST_MANN_WHITNEY") guardrail_results <- c(guardrail_results, list(norm_check)) log_add(glue("组[{g}] 正态性检验: p = {round(norm_check$p_value, 4)}, {norm_check$reason}")) } } # 执行护栏链 guardrail_status <- run_guardrail_chain(guardrail_results) if (guardrail_status$status == "blocked") { return(list( status = "blocked", message = guardrail_status$reason, trace_log = logs )) } if (guardrail_status$status == "switch") { log_add(glue("触发方法切换: {guardrail_status$reason} -> {guardrail_status$target_tool}")) # TODO: 调用备选方法 # 目前先继续执行 T 检验,但添加警告 warnings_list <- c(warnings_list, guardrail_status$reason) } if (length(guardrail_status$warnings) > 0) { warnings_list <- c(warnings_list, guardrail_status$warnings) } # ===== 核心计算 ===== log_add("执行 T 检验") result <- t.test(g1_vals, g2_vals, var.equal = FALSE) # ===== 生成图表 ===== log_add("生成箱线图") plot_base64 <- tryCatch({ generate_boxplot(df, group_var, value_var, tmp_files) }, error = function(e) { log_add(paste("图表生成失败:", e$message)) NULL }) # ===== 生成可复现代码 ===== # 获取原始文件名(如果未提供则使用默认值) original_filename <- if (!is.null(input$original_filename) && nchar(input$original_filename) > 0) { input$original_filename } else { "data.csv" } reproducible_code <- glue(' # SSA-Pro 自动生成代码 # 工具: 独立样本 T 检验 # 时间: {Sys.time()} # ================================ # 自动安装依赖 required_packages <- c("ggplot2") new_packages <- required_packages[!(required_packages %in% installed.packages()[,"Package"])] if(length(new_packages)) install.packages(new_packages, repos = "https://cloud.r-project.org") library(ggplot2) # 数据准备 df <- read.csv("{original_filename}") group_var <- "{group_var}" value_var <- "{value_var}" # 独立样本 T 检验 (Welch) g1_vals <- df[df[[group_var]] == "{groups[1]}", value_var] g2_vals <- df[df[[group_var]] == "{groups[2]}", value_var] result <- t.test(g1_vals, g2_vals, var.equal = FALSE) print(result) # 可视化 ggplot(df, aes(x = .data[[group_var]], y = .data[[value_var]])) + geom_boxplot(fill = "#3b82f6", alpha = 0.6) + theme_minimal() + labs(title = paste("Distribution of", value_var, "by", group_var)) ') # ===== 返回结果 ===== log_add("分析完成") return(list( status = "success", message = "分析完成", warnings = if (length(warnings_list) > 0) warnings_list else NULL, results = list( method = result$method, statistic = jsonlite::unbox(as.numeric(result$statistic)), df = jsonlite::unbox(as.numeric(result$parameter)), p_value = jsonlite::unbox(as.numeric(result$p.value)), p_value_fmt = format_p_value(result$p.value), conf_int = as.numeric(result$conf.int), estimate = as.numeric(result$estimate), group_stats = list( list(group = as.character(groups[1]), n = length(g1_vals), mean = mean(g1_vals), sd = sd(g1_vals)), list(group = as.character(groups[2]), n = length(g2_vals), mean = mean(g2_vals), sd = sd(g2_vals)) ) ), plots = if (!is.null(plot_base64)) list(plot_base64) else list(), trace_log = logs, reproducible_code = as.character(reproducible_code) )) } # 辅助函数:生成箱线图 generate_boxplot <- function(df, group_var, value_var, tmp_files_ref) { p <- ggplot(df, aes(x = .data[[group_var]], y = .data[[value_var]])) + geom_boxplot(fill = "#3b82f6", alpha = 0.6) + theme_minimal() + labs(title = paste("Distribution of", value_var, "by", group_var)) tmp_file <- tempfile(fileext = ".png") ggsave(tmp_file, p, width = 6, height = 4, dpi = 100) base64_str <- base64encode(tmp_file) unlink(tmp_file) return(paste0("data:image/png;base64,", base64_str)) }