#' @tool_code ST_MANN_WHITNEY #' @name Mann-Whitney U 检验 #' @version 1.0.0 #' @description 两组独立样本非参数比较(Wilcoxon秩和检验) #' @author SSA-Pro Team library(glue) library(ggplot2) library(base64enc) run_analysis <- function(input) { # ===== 初始化 ===== logs <- c() log_add <- function(msg) { logs <<- c(logs, paste0("[", Sys.time(), "] ", msg)) } on.exit({}, add = TRUE) # ===== 数据加载 ===== log_add("开始加载输入数据") df <- tryCatch( load_input_data(input), error = function(e) { log_add(paste("数据加载失败:", e$message)) return(NULL) } ) if (is.null(df)) { return(make_error(ERROR_CODES$E100_INTERNAL_ERROR, details = "数据加载失败")) } log_add(glue("数据加载成功: {nrow(df)} 行, {ncol(df)} 列")) p <- input$params group_var <- p$group_var value_var <- p$value_var # ===== 参数校验 ===== if (!(group_var %in% names(df))) { return(make_error(ERROR_CODES$E001_COLUMN_NOT_FOUND, col = group_var)) } if (!(value_var %in% names(df))) { return(make_error(ERROR_CODES$E001_COLUMN_NOT_FOUND, col = value_var)) } # ===== 数据清洗 ===== original_rows <- nrow(df) df <- df[!is.na(df[[group_var]]) & trimws(as.character(df[[group_var]])) != "", ] df <- df[!is.na(df[[value_var]]), ] removed_rows <- original_rows - nrow(df) if (removed_rows > 0) { log_add(glue("数据清洗: 移除 {removed_rows} 行缺失值 (剩余 {nrow(df)} 行)")) } # 分组检查 groups <- unique(df[[group_var]]) if (length(groups) != 2) { return(make_error(ERROR_CODES$E003_INSUFFICIENT_GROUPS, col = group_var, expected = 2, actual = length(groups))) } # 提取两组数据 g1_vals <- df[df[[group_var]] == groups[1], value_var] g2_vals <- df[df[[group_var]] == groups[2], value_var] # ===== 护栏检查 ===== guardrail_results <- list() warnings_list <- c() # 样本量检查(每组至少5个) min_n <- min(length(g1_vals), length(g2_vals)) sample_check <- check_sample_size(min_n, min_required = 5, action = ACTION_BLOCK) guardrail_results <- c(guardrail_results, list(sample_check)) log_add(glue("样本量检查: 每组最小 {min_n}, {sample_check$reason}")) guardrail_status <- run_guardrail_chain(guardrail_results) if (guardrail_status$status == "blocked") { return(list( status = "blocked", message = guardrail_status$reason, trace_log = logs )) } if (length(guardrail_status$warnings) > 0) { warnings_list <- c(warnings_list, guardrail_status$warnings) } # ===== 核心计算 ===== log_add("执行 Mann-Whitney U 检验 (Wilcoxon rank-sum test)") result <- tryCatch({ wilcox.test(g1_vals, g2_vals, exact = FALSE, correct = TRUE) }, error = function(e) { log_add(paste("Mann-Whitney U 检验失败:", e$message)) return(NULL) }) if (is.null(result)) { return(make_error(ERROR_CODES$E100_INTERNAL_ERROR, details = "Mann-Whitney U 检验执行失败")) } # 计算效应量 r = Z / sqrt(N) n1 <- length(g1_vals) n2 <- length(g2_vals) N <- n1 + n2 # 从 U 统计量计算 Z 值 U <- result$statistic mu <- n1 * n2 / 2 sigma <- sqrt(n1 * n2 * (n1 + n2 + 1) / 12) z_value <- (U - mu) / sigma effect_r <- abs(z_value) / sqrt(N) # 效应量解释 effect_interpretation <- if (effect_r < 0.1) "微小" else if (effect_r < 0.3) "小" else if (effect_r < 0.5) "中等" else "大" log_add(glue("U = {round(U, 2)}, Z = {round(z_value, 3)}, p = {round(result$p.value, 4)}, r = {round(effect_r, 3)}")) # ===== 生成图表 ===== log_add("生成箱线图") plot_base64 <- tryCatch({ generate_boxplot(df, group_var, value_var) }, error = function(e) { log_add(paste("图表生成失败:", e$message)) NULL }) # ===== 生成可复现代码 ===== original_filename <- if (!is.null(input$original_filename) && nchar(input$original_filename) > 0) { input$original_filename } else { "data.csv" } reproducible_code <- glue(' # SSA-Pro 自动生成代码 # 工具: Mann-Whitney U 检验 # 时间: {Sys.time()} # ================================ library(ggplot2) # 数据准备 df <- read.csv("{original_filename}") group_var <- "{group_var}" value_var <- "{value_var}" # 数据清洗 df <- df[!is.na(df[[group_var]]) & !is.na(df[[value_var]]), ] # Mann-Whitney U 检验 g1_vals <- df[df[[group_var]] == "{groups[1]}", value_var] g2_vals <- df[df[[group_var]] == "{groups[2]}", value_var] result <- wilcox.test(g1_vals, g2_vals, exact = FALSE, correct = TRUE) print(result) # 计算效应量 r n1 <- length(g1_vals) n2 <- length(g2_vals) U <- result$statistic mu <- n1 * n2 / 2 sigma <- sqrt(n1 * n2 * (n1 + n2 + 1) / 12) z_value <- (U - mu) / sigma effect_r <- abs(z_value) / sqrt(n1 + n2) cat("Effect size r =", round(effect_r, 3), "\\n") # 可视化 ggplot(df, aes(x = .data[[group_var]], y = .data[[value_var]])) + geom_boxplot(fill = "#8b5cf6", alpha = 0.6) + theme_minimal() + labs(title = paste("Distribution of", value_var, "by", group_var)) ') # ===== 构建 report_blocks ===== log_add("构建 report_blocks") blocks <- list() # Block 1: 样本概况(两组 n, median, IQR) g1_label <- as.character(groups[1]) g2_label <- as.character(groups[2]) blocks[[length(blocks) + 1]] <- make_kv_block( title = "样本概况", items = list( list(key = paste0(g1_label, " (n, Median, IQR)"), value = paste0("n=", n1, ", ", round(median(g1_vals), 3), ", ", round(IQR(g1_vals), 3))), list(key = paste0(g2_label, " (n, Median, IQR)"), value = paste0("n=", n2, ", ", round(median(g2_vals), 3), ", ", round(IQR(g2_vals), 3))) ) ) # Block 2: 检验结果(U 统计量, Z 值, P 值, 效应量 r) blocks[[length(blocks) + 1]] <- make_table_block( title = "Mann-Whitney U 检验结果", headers = c("U 统计量", "Z 值", "P 值", "效应量 r", "效应量解释"), rows = list( list( round(as.numeric(U), 4), round(z_value, 4), format_p_value(result$p.value), round(effect_r, 4), effect_interpretation ) ), footnote = "Wilcoxon rank sum test with continuity correction" ) # Block 3: 箱线图(如果 plot_base64 不为 NULL) if (!is.null(plot_base64)) { blocks[[length(blocks) + 1]] <- make_image_block( base64_data = plot_base64, title = paste0(value_var, " by ", group_var), alt = paste("箱线图:", value_var, "按", group_var, "分组") ) } # Block 4: 结论摘要 sig <- if (result$p.value < 0.05) "存在统计学显著差异" else "差异无统计学意义" blocks[[length(blocks) + 1]] <- make_markdown_block( title = "结果摘要", content = paste0( "两组 **", value_var, "** 的比较(Mann-Whitney U 检验):", "U = ", round(as.numeric(U), 2), ",Z = ", round(z_value, 3), ",P ", format_p_value(result$p.value), ",效应量 r = ", round(effect_r, 3), "(", effect_interpretation, ")。", "两组间", sig, "。" ) ) # ===== 返回结果 ===== log_add("分析完成") return(list( status = "success", message = "分析完成", warnings = if (length(warnings_list) > 0) warnings_list else NULL, results = list( method = "Wilcoxon rank sum test with continuity correction", statistic_U = jsonlite::unbox(as.numeric(U)), z_value = jsonlite::unbox(round(z_value, 4)), p_value = jsonlite::unbox(as.numeric(result$p.value)), p_value_fmt = format_p_value(result$p.value), effect_size = list( r = jsonlite::unbox(round(effect_r, 4)), interpretation = effect_interpretation ), group_stats = list( list( group = as.character(groups[1]), n = n1, median = median(g1_vals), iqr = IQR(g1_vals), min = min(g1_vals), max = max(g1_vals) ), list( group = as.character(groups[2]), n = n2, median = median(g2_vals), iqr = IQR(g2_vals), min = min(g2_vals), max = max(g2_vals) ) ) ), report_blocks = blocks, plots = if (!is.null(plot_base64)) list(plot_base64) else list(), trace_log = logs, reproducible_code = as.character(reproducible_code) )) } # 辅助函数:生成箱线图 generate_boxplot <- function(df, group_var, value_var) { p <- ggplot(df, aes(x = .data[[group_var]], y = .data[[value_var]])) + geom_boxplot(fill = "#8b5cf6", alpha = 0.6) + geom_jitter(width = 0.2, alpha = 0.3, size = 1) + theme_minimal() + labs( title = paste("Distribution of", value_var, "by", group_var), subtitle = "Mann-Whitney U Test" ) tmp_file <- tempfile(fileext = ".png") ggsave(tmp_file, p, width = 6, height = 4, dpi = 100) base64_str <- base64encode(tmp_file) unlink(tmp_file) return(paste0("data:image/png;base64,", base64_str)) }