feat(ssa): Complete Phase 2A frontend integration - multi-step workflow end-to-end
Phase 2A: WorkflowPlannerService, WorkflowExecutorService, Python data quality, 6 bug fixes, DescriptiveResultView, multi-step R code/Word export, MVP UI reuse. V11 UI: Gemini-style, multi-task, single-page scroll, Word export. Architecture: Block-based rendering consensus (4 block types). New R tools: chi_square, correlation, descriptive, logistic_binary, mann_whitney, t_test_paired. Docs: dev summary, block-based plan, status updates, task list v2.0. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
254
r-statistics-service/tools/chi_square.R
Normal file
254
r-statistics-service/tools/chi_square.R
Normal file
@@ -0,0 +1,254 @@
|
||||
#' @tool_code ST_CHI_SQUARE
|
||||
#' @name 卡方检验
|
||||
#' @version 1.0.0
|
||||
#' @description 两个分类变量的独立性检验
|
||||
#' @author SSA-Pro Team
|
||||
|
||||
library(glue)
|
||||
library(ggplot2)
|
||||
library(base64enc)
|
||||
|
||||
run_analysis <- function(input) {
|
||||
# ===== 初始化 =====
|
||||
logs <- c()
|
||||
log_add <- function(msg) { logs <<- c(logs, paste0("[", Sys.time(), "] ", msg)) }
|
||||
|
||||
on.exit({}, add = TRUE)
|
||||
|
||||
# ===== 数据加载 =====
|
||||
log_add("开始加载输入数据")
|
||||
df <- tryCatch(
|
||||
load_input_data(input),
|
||||
error = function(e) {
|
||||
log_add(paste("数据加载失败:", e$message))
|
||||
return(NULL)
|
||||
}
|
||||
)
|
||||
|
||||
if (is.null(df)) {
|
||||
return(make_error(ERROR_CODES$E100_INTERNAL_ERROR, details = "数据加载失败"))
|
||||
}
|
||||
log_add(glue("数据加载成功: {nrow(df)} 行, {ncol(df)} 列"))
|
||||
|
||||
p <- input$params
|
||||
var1 <- p$var1
|
||||
var2 <- p$var2
|
||||
|
||||
# ===== 参数校验 =====
|
||||
if (!(var1 %in% names(df))) {
|
||||
return(make_error(ERROR_CODES$E001_COLUMN_NOT_FOUND, col = var1))
|
||||
}
|
||||
if (!(var2 %in% names(df))) {
|
||||
return(make_error(ERROR_CODES$E001_COLUMN_NOT_FOUND, col = var2))
|
||||
}
|
||||
|
||||
# ===== 数据清洗 =====
|
||||
original_rows <- nrow(df)
|
||||
df <- df[!is.na(df[[var1]]) & trimws(as.character(df[[var1]])) != "", ]
|
||||
df <- df[!is.na(df[[var2]]) & trimws(as.character(df[[var2]])) != "", ]
|
||||
|
||||
removed_rows <- original_rows - nrow(df)
|
||||
if (removed_rows > 0) {
|
||||
log_add(glue("数据清洗: 移除 {removed_rows} 行缺失值 (剩余 {nrow(df)} 行)"))
|
||||
}
|
||||
|
||||
# ===== 护栏检查 =====
|
||||
guardrail_results <- list()
|
||||
warnings_list <- c()
|
||||
|
||||
# 样本量检查
|
||||
sample_check <- check_sample_size(nrow(df), min_required = 10, action = ACTION_BLOCK)
|
||||
guardrail_results <- c(guardrail_results, list(sample_check))
|
||||
log_add(glue("样本量检查: N = {nrow(df)}, {sample_check$reason}"))
|
||||
|
||||
guardrail_status <- run_guardrail_chain(guardrail_results)
|
||||
|
||||
if (guardrail_status$status == "blocked") {
|
||||
return(list(
|
||||
status = "blocked",
|
||||
message = guardrail_status$reason,
|
||||
trace_log = logs
|
||||
))
|
||||
}
|
||||
|
||||
# ===== 构建列联表 =====
|
||||
contingency_table <- table(df[[var1]], df[[var2]])
|
||||
log_add(glue("列联表维度: {nrow(contingency_table)} x {ncol(contingency_table)}"))
|
||||
|
||||
# 检查列联表有效性
|
||||
if (nrow(contingency_table) < 2 || ncol(contingency_table) < 2) {
|
||||
return(make_error(ERROR_CODES$E003_INSUFFICIENT_GROUPS,
|
||||
col = paste(var1, "或", var2),
|
||||
expected = 2,
|
||||
actual = min(nrow(contingency_table), ncol(contingency_table))))
|
||||
}
|
||||
|
||||
# ===== 期望频数检查(决定使用卡方还是 Fisher) =====
|
||||
expected <- chisq.test(contingency_table)$expected
|
||||
low_expected_count <- sum(expected < 5)
|
||||
total_cells <- length(expected)
|
||||
low_expected_pct <- low_expected_count / total_cells
|
||||
|
||||
use_fisher <- FALSE
|
||||
is_2x2 <- nrow(contingency_table) == 2 && ncol(contingency_table) == 2
|
||||
|
||||
if (low_expected_pct > 0.2) {
|
||||
warnings_list <- c(warnings_list, glue("期望频数 < 5 的格子占 {round(low_expected_pct * 100, 1)}%"))
|
||||
if (is_2x2) {
|
||||
use_fisher <- TRUE
|
||||
log_add("2x2 表且期望频数不足,自动切换为 Fisher 精确检验")
|
||||
} else {
|
||||
log_add("期望频数不足,但非 2x2 表,继续使用卡方检验(结果需谨慎解读)")
|
||||
}
|
||||
}
|
||||
|
||||
# ===== 核心计算 =====
|
||||
if (use_fisher) {
|
||||
log_add("执行 Fisher 精确检验")
|
||||
result <- fisher.test(contingency_table)
|
||||
method_used <- "Fisher's Exact Test"
|
||||
|
||||
output_results <- list(
|
||||
method = method_used,
|
||||
p_value = jsonlite::unbox(as.numeric(result$p.value)),
|
||||
p_value_fmt = format_p_value(result$p.value),
|
||||
odds_ratio = if (!is.null(result$estimate)) jsonlite::unbox(as.numeric(result$estimate)) else NULL,
|
||||
conf_int = if (!is.null(result$conf.int)) as.numeric(result$conf.int) else NULL
|
||||
)
|
||||
} else {
|
||||
log_add("执行 Pearson 卡方检验")
|
||||
result <- chisq.test(contingency_table, correct = is_2x2) # 2x2表使用Yates连续性校正
|
||||
method_used <- if (is_2x2) "Pearson's Chi-squared test with Yates' continuity correction" else "Pearson's Chi-squared test"
|
||||
|
||||
# 计算 Cramér's V
|
||||
n <- sum(contingency_table)
|
||||
k <- min(nrow(contingency_table), ncol(contingency_table))
|
||||
cramers_v <- sqrt(result$statistic / (n * (k - 1)))
|
||||
|
||||
# 效应量解释
|
||||
v_interpretation <- if (cramers_v < 0.1) "微小" else if (cramers_v < 0.3) "小" else if (cramers_v < 0.5) "中等" else "大"
|
||||
|
||||
log_add(glue("χ² = {round(result$statistic, 3)}, df = {result$parameter}, p = {round(result$p.value, 4)}, Cramér's V = {round(cramers_v, 3)}"))
|
||||
|
||||
output_results <- list(
|
||||
method = method_used,
|
||||
statistic = jsonlite::unbox(as.numeric(result$statistic)),
|
||||
df = jsonlite::unbox(as.numeric(result$parameter)),
|
||||
p_value = jsonlite::unbox(as.numeric(result$p.value)),
|
||||
p_value_fmt = format_p_value(result$p.value),
|
||||
effect_size = list(
|
||||
cramers_v = jsonlite::unbox(round(as.numeric(cramers_v), 4)),
|
||||
interpretation = v_interpretation
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
# 添加列联表信息(精简版,不含原始数据)
|
||||
# 将 table 转为纯数值矩阵以便 JSON 序列化
|
||||
observed_matrix <- matrix(
|
||||
as.numeric(contingency_table),
|
||||
nrow = nrow(contingency_table),
|
||||
ncol = ncol(contingency_table),
|
||||
dimnames = list(rownames(contingency_table), colnames(contingency_table))
|
||||
)
|
||||
|
||||
output_results$contingency_table <- list(
|
||||
row_var = var1,
|
||||
col_var = var2,
|
||||
row_levels = as.character(rownames(contingency_table)),
|
||||
col_levels = as.character(colnames(contingency_table)),
|
||||
observed = observed_matrix,
|
||||
row_totals = as.numeric(rowSums(contingency_table)),
|
||||
col_totals = as.numeric(colSums(contingency_table)),
|
||||
grand_total = jsonlite::unbox(sum(contingency_table))
|
||||
)
|
||||
|
||||
# ===== 生成图表 =====
|
||||
log_add("生成马赛克图")
|
||||
plot_base64 <- tryCatch({
|
||||
generate_mosaic_plot(contingency_table, var1, var2)
|
||||
}, error = function(e) {
|
||||
log_add(paste("图表生成失败:", e$message))
|
||||
NULL
|
||||
})
|
||||
|
||||
# ===== 生成可复现代码 =====
|
||||
original_filename <- if (!is.null(input$original_filename) && nchar(input$original_filename) > 0) {
|
||||
input$original_filename
|
||||
} else {
|
||||
"data.csv"
|
||||
}
|
||||
|
||||
reproducible_code <- glue('
|
||||
# SSA-Pro 自动生成代码
|
||||
# 工具: 卡方检验
|
||||
# 时间: {Sys.time()}
|
||||
# ================================
|
||||
|
||||
library(ggplot2)
|
||||
|
||||
# 数据准备
|
||||
df <- read.csv("{original_filename}")
|
||||
var1 <- "{var1}"
|
||||
var2 <- "{var2}"
|
||||
|
||||
# 数据清洗
|
||||
df <- df[!is.na(df[[var1]]) & !is.na(df[[var2]]), ]
|
||||
|
||||
# 构建列联表
|
||||
contingency_table <- table(df[[var1]], df[[var2]])
|
||||
print(contingency_table)
|
||||
|
||||
# 卡方检验
|
||||
result <- chisq.test(contingency_table)
|
||||
print(result)
|
||||
|
||||
# 计算 Cramer V (效应量)
|
||||
n <- sum(contingency_table)
|
||||
k <- min(nrow(contingency_table), ncol(contingency_table))
|
||||
cramers_v <- sqrt(result$statistic / (n * (k - 1)))
|
||||
cat("Cramer V =", round(cramers_v, 3), "\\n")
|
||||
|
||||
# 可视化(马赛克图)
|
||||
mosaicplot(contingency_table, main = "Mosaic Plot", color = TRUE)
|
||||
')
|
||||
|
||||
# ===== 返回结果 =====
|
||||
log_add("分析完成")
|
||||
|
||||
return(list(
|
||||
status = "success",
|
||||
message = "分析完成",
|
||||
warnings = if (length(warnings_list) > 0) warnings_list else NULL,
|
||||
results = output_results,
|
||||
plots = if (!is.null(plot_base64)) list(plot_base64) else list(),
|
||||
trace_log = logs,
|
||||
reproducible_code = as.character(reproducible_code)
|
||||
))
|
||||
}
|
||||
|
||||
# 辅助函数:生成马赛克图(使用 ggplot2 模拟)
|
||||
generate_mosaic_plot <- function(contingency_table, var1, var2) {
|
||||
# 转换为长格式数据
|
||||
df_plot <- as.data.frame(contingency_table)
|
||||
names(df_plot) <- c("Var1", "Var2", "Freq")
|
||||
|
||||
p <- ggplot(df_plot, aes(x = Var1, y = Freq, fill = Var2)) +
|
||||
geom_bar(stat = "identity", position = "fill") +
|
||||
scale_y_continuous(labels = scales::percent) +
|
||||
theme_minimal() +
|
||||
labs(
|
||||
title = paste("Association between", var1, "and", var2),
|
||||
x = var1,
|
||||
y = "Proportion",
|
||||
fill = var2
|
||||
) +
|
||||
scale_fill_brewer(palette = "Set2")
|
||||
|
||||
tmp_file <- tempfile(fileext = ".png")
|
||||
ggsave(tmp_file, p, width = 7, height = 5, dpi = 100)
|
||||
base64_str <- base64encode(tmp_file)
|
||||
unlink(tmp_file)
|
||||
|
||||
return(paste0("data:image/png;base64,", base64_str))
|
||||
}
|
||||
242
r-statistics-service/tools/correlation.R
Normal file
242
r-statistics-service/tools/correlation.R
Normal file
@@ -0,0 +1,242 @@
|
||||
#' @tool_code ST_CORRELATION
|
||||
#' @name 相关分析
|
||||
#' @version 1.0.0
|
||||
#' @description Pearson/Spearman 相关系数计算
|
||||
#' @author SSA-Pro Team
|
||||
|
||||
library(glue)
|
||||
library(ggplot2)
|
||||
library(base64enc)
|
||||
|
||||
run_analysis <- function(input) {
|
||||
# ===== 初始化 =====
|
||||
logs <- c()
|
||||
log_add <- function(msg) { logs <<- c(logs, paste0("[", Sys.time(), "] ", msg)) }
|
||||
|
||||
on.exit({}, add = TRUE)
|
||||
|
||||
# ===== 数据加载 =====
|
||||
log_add("开始加载输入数据")
|
||||
df <- tryCatch(
|
||||
load_input_data(input),
|
||||
error = function(e) {
|
||||
log_add(paste("数据加载失败:", e$message))
|
||||
return(NULL)
|
||||
}
|
||||
)
|
||||
|
||||
if (is.null(df)) {
|
||||
return(make_error(ERROR_CODES$E100_INTERNAL_ERROR, details = "数据加载失败"))
|
||||
}
|
||||
log_add(glue("数据加载成功: {nrow(df)} 行, {ncol(df)} 列"))
|
||||
|
||||
p <- input$params
|
||||
guardrails_cfg <- input$guardrails
|
||||
|
||||
var_x <- p$var_x
|
||||
var_y <- p$var_y
|
||||
method <- tolower(p$method %||% "auto") # pearson, spearman, auto
|
||||
|
||||
# ===== 参数校验 =====
|
||||
if (!(var_x %in% names(df))) {
|
||||
return(make_error(ERROR_CODES$E001_COLUMN_NOT_FOUND, col = var_x))
|
||||
}
|
||||
if (!(var_y %in% names(df))) {
|
||||
return(make_error(ERROR_CODES$E001_COLUMN_NOT_FOUND, col = var_y))
|
||||
}
|
||||
|
||||
# ===== 数据清洗 =====
|
||||
original_rows <- nrow(df)
|
||||
df <- df[!is.na(df[[var_x]]) & !is.na(df[[var_y]]), ]
|
||||
|
||||
removed_rows <- original_rows - nrow(df)
|
||||
if (removed_rows > 0) {
|
||||
log_add(glue("数据清洗: 移除 {removed_rows} 行缺失值 (剩余 {nrow(df)} 行)"))
|
||||
}
|
||||
|
||||
x_vals <- df[[var_x]]
|
||||
y_vals <- df[[var_y]]
|
||||
n <- length(x_vals)
|
||||
|
||||
# ===== 护栏检查 =====
|
||||
guardrail_results <- list()
|
||||
warnings_list <- c()
|
||||
|
||||
# 样本量检查
|
||||
sample_check <- check_sample_size(n, min_required = 10, action = ACTION_WARN)
|
||||
guardrail_results <- c(guardrail_results, list(sample_check))
|
||||
log_add(glue("样本量检查: N = {n}, {sample_check$reason}"))
|
||||
|
||||
guardrail_status <- run_guardrail_chain(guardrail_results)
|
||||
|
||||
if (guardrail_status$status == "blocked") {
|
||||
return(list(
|
||||
status = "blocked",
|
||||
message = guardrail_status$reason,
|
||||
trace_log = logs
|
||||
))
|
||||
}
|
||||
|
||||
if (length(guardrail_status$warnings) > 0) {
|
||||
warnings_list <- c(warnings_list, guardrail_status$warnings)
|
||||
}
|
||||
|
||||
# ===== 方法选择 =====
|
||||
final_method <- method
|
||||
|
||||
if (method == "auto") {
|
||||
log_add("自动选择相关方法(检验正态性)")
|
||||
|
||||
# 检验两个变量的正态性
|
||||
norm_x <- check_normality(x_vals, alpha = 0.05)
|
||||
norm_y <- check_normality(y_vals, alpha = 0.05)
|
||||
|
||||
log_add(glue("{var_x} 正态性: p = {round(norm_x$p_value, 4)}, {norm_x$reason}"))
|
||||
log_add(glue("{var_y} 正态性: p = {round(norm_y$p_value, 4)}, {norm_y$reason}"))
|
||||
|
||||
if (norm_x$passed && norm_y$passed) {
|
||||
final_method <- "pearson"
|
||||
log_add("两变量均满足正态性,使用 Pearson 相关")
|
||||
} else {
|
||||
final_method <- "spearman"
|
||||
log_add("存在非正态变量,使用 Spearman 秩相关")
|
||||
warnings_list <- c(warnings_list, "变量不满足正态性,自动切换为 Spearman 秩相关")
|
||||
}
|
||||
}
|
||||
|
||||
# ===== 核心计算 =====
|
||||
log_add(glue("执行 {final_method} 相关分析"))
|
||||
|
||||
result <- cor.test(x_vals, y_vals, method = final_method)
|
||||
|
||||
r_value <- result$estimate
|
||||
p_value <- result$p.value
|
||||
|
||||
# 相关系数解释
|
||||
r_abs <- abs(r_value)
|
||||
r_interpretation <- if (r_abs < 0.1) "可忽略" else if (r_abs < 0.3) "弱" else if (r_abs < 0.5) "中等" else if (r_abs < 0.7) "较强" else "强"
|
||||
|
||||
log_add(glue("r = {round(r_value, 4)}, p = {round(p_value, 4)}, 相关强度: {r_interpretation}"))
|
||||
|
||||
# ===== 生成图表 =====
|
||||
log_add("生成散点图")
|
||||
plot_base64 <- tryCatch({
|
||||
generate_scatter_plot(df, var_x, var_y, r_value, p_value, final_method)
|
||||
}, error = function(e) {
|
||||
log_add(paste("图表生成失败:", e$message))
|
||||
NULL
|
||||
})
|
||||
|
||||
# ===== 生成可复现代码 =====
|
||||
original_filename <- if (!is.null(input$original_filename) && nchar(input$original_filename) > 0) {
|
||||
input$original_filename
|
||||
} else {
|
||||
"data.csv"
|
||||
}
|
||||
|
||||
reproducible_code <- glue('
|
||||
# SSA-Pro 自动生成代码
|
||||
# 工具: 相关分析
|
||||
# 时间: {Sys.time()}
|
||||
# ================================
|
||||
|
||||
library(ggplot2)
|
||||
|
||||
# 数据准备
|
||||
df <- read.csv("{original_filename}")
|
||||
var_x <- "{var_x}"
|
||||
var_y <- "{var_y}"
|
||||
|
||||
# 数据清洗
|
||||
df <- df[!is.na(df[[var_x]]) & !is.na(df[[var_y]]), ]
|
||||
|
||||
# {final_method} 相关分析
|
||||
result <- cor.test(df[[var_x]], df[[var_y]], method = "{final_method}")
|
||||
print(result)
|
||||
|
||||
# 可视化
|
||||
ggplot(df, aes(x = .data[[var_x]], y = .data[[var_y]])) +
|
||||
geom_point(alpha = 0.6) +
|
||||
geom_smooth(method = "lm", se = TRUE, color = "#3b82f6") +
|
||||
theme_minimal() +
|
||||
labs(title = paste("Correlation:", var_x, "vs", var_y),
|
||||
subtitle = paste("r =", round(result$estimate, 3), ", p =", round(result$p.value, 4)))
|
||||
')
|
||||
|
||||
# ===== 返回结果 =====
|
||||
log_add("分析完成")
|
||||
|
||||
output_results <- list(
|
||||
method = if (final_method == "pearson") "Pearson product-moment correlation" else "Spearman's rank correlation rho",
|
||||
method_code = final_method,
|
||||
statistic = jsonlite::unbox(round(as.numeric(r_value), 4)),
|
||||
p_value = jsonlite::unbox(as.numeric(p_value)),
|
||||
p_value_fmt = format_p_value(p_value),
|
||||
interpretation = r_interpretation,
|
||||
n = n,
|
||||
variables = list(x = var_x, y = var_y),
|
||||
descriptive = list(
|
||||
x = list(
|
||||
variable = var_x,
|
||||
mean = round(mean(x_vals), 3),
|
||||
sd = round(sd(x_vals), 3),
|
||||
median = round(median(x_vals), 3)
|
||||
),
|
||||
y = list(
|
||||
variable = var_y,
|
||||
mean = round(mean(y_vals), 3),
|
||||
sd = round(sd(y_vals), 3),
|
||||
median = round(median(y_vals), 3)
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
# Pearson 相关有置信区间
|
||||
if (final_method == "pearson" && !is.null(result$conf.int)) {
|
||||
output_results$conf_int <- as.numeric(result$conf.int)
|
||||
}
|
||||
|
||||
return(list(
|
||||
status = "success",
|
||||
message = "分析完成",
|
||||
warnings = if (length(warnings_list) > 0) warnings_list else NULL,
|
||||
results = output_results,
|
||||
plots = if (!is.null(plot_base64)) list(plot_base64) else list(),
|
||||
trace_log = logs,
|
||||
reproducible_code = as.character(reproducible_code)
|
||||
))
|
||||
}
|
||||
|
||||
# 辅助函数:生成散点图
|
||||
generate_scatter_plot <- function(df, var_x, var_y, r_value, p_value, method) {
|
||||
# 限制点数防止图表过大
|
||||
if (nrow(df) > 1000) {
|
||||
set.seed(42)
|
||||
df <- df[sample(nrow(df), 1000), ]
|
||||
}
|
||||
|
||||
p <- ggplot(df, aes(x = .data[[var_x]], y = .data[[var_y]])) +
|
||||
geom_point(alpha = 0.5, color = "#64748b", size = 2) +
|
||||
geom_smooth(method = "lm", se = TRUE, color = "#3b82f6", fill = "#93c5fd") +
|
||||
theme_minimal() +
|
||||
labs(
|
||||
title = paste("Correlation:", var_x, "vs", var_y),
|
||||
subtitle = paste0(
|
||||
ifelse(method == "pearson", "Pearson ", "Spearman "),
|
||||
"r = ", round(r_value, 3),
|
||||
", p ", format_p_value(p_value)
|
||||
),
|
||||
x = var_x,
|
||||
y = var_y
|
||||
)
|
||||
|
||||
tmp_file <- tempfile(fileext = ".png")
|
||||
ggsave(tmp_file, p, width = 6, height = 5, dpi = 100)
|
||||
base64_str <- base64encode(tmp_file)
|
||||
unlink(tmp_file)
|
||||
|
||||
return(paste0("data:image/png;base64,", base64_str))
|
||||
}
|
||||
|
||||
# 辅助:空值合并运算符
|
||||
`%||%` <- function(a, b) if (is.null(a)) b else a
|
||||
332
r-statistics-service/tools/descriptive.R
Normal file
332
r-statistics-service/tools/descriptive.R
Normal file
@@ -0,0 +1,332 @@
|
||||
#' @tool_code ST_DESCRIPTIVE
|
||||
#' @name 描述性统计
|
||||
#' @version 1.0.0
|
||||
#' @description 数据概况与基线特征表
|
||||
#' @author SSA-Pro Team
|
||||
|
||||
library(glue)
|
||||
library(ggplot2)
|
||||
library(base64enc)
|
||||
|
||||
run_analysis <- function(input) {
|
||||
# ===== 初始化 =====
|
||||
logs <- c()
|
||||
log_add <- function(msg) { logs <<- c(logs, paste0("[", Sys.time(), "] ", msg)) }
|
||||
|
||||
on.exit({}, add = TRUE)
|
||||
|
||||
# ===== 数据加载 =====
|
||||
log_add("开始加载输入数据")
|
||||
df <- tryCatch(
|
||||
load_input_data(input),
|
||||
error = function(e) {
|
||||
log_add(paste("数据加载失败:", e$message))
|
||||
return(NULL)
|
||||
}
|
||||
)
|
||||
|
||||
if (is.null(df)) {
|
||||
return(make_error(ERROR_CODES$E100_INTERNAL_ERROR, details = "数据加载失败"))
|
||||
}
|
||||
log_add(glue("数据加载成功: {nrow(df)} 行, {ncol(df)} 列"))
|
||||
|
||||
p <- input$params
|
||||
variables <- p$variables # 变量列表(可选,空则分析全部)
|
||||
group_var <- p$group_var # 分组变量(可选)
|
||||
|
||||
# ===== 确定要分析的变量 =====
|
||||
if (is.null(variables) || length(variables) == 0) {
|
||||
variables <- names(df)
|
||||
log_add("未指定变量,分析全部列")
|
||||
}
|
||||
|
||||
# 排除分组变量本身
|
||||
if (!is.null(group_var) && group_var %in% variables) {
|
||||
variables <- setdiff(variables, group_var)
|
||||
}
|
||||
|
||||
# 校验变量存在性
|
||||
missing_vars <- setdiff(variables, names(df))
|
||||
if (length(missing_vars) > 0) {
|
||||
return(make_error(ERROR_CODES$E001_COLUMN_NOT_FOUND,
|
||||
col = paste(missing_vars, collapse = ", ")))
|
||||
}
|
||||
|
||||
# 校验分组变量
|
||||
groups <- NULL
|
||||
if (!is.null(group_var) && group_var != "") {
|
||||
if (!(group_var %in% names(df))) {
|
||||
return(make_error(ERROR_CODES$E001_COLUMN_NOT_FOUND, col = group_var))
|
||||
}
|
||||
groups <- unique(df[[group_var]][!is.na(df[[group_var]])])
|
||||
log_add(glue("分组变量: {group_var}, 分组: {paste(groups, collapse=', ')}"))
|
||||
}
|
||||
|
||||
# ===== 变量类型推断 =====
|
||||
var_types <- sapply(variables, function(v) {
|
||||
vals <- df[[v]]
|
||||
if (is.numeric(vals)) {
|
||||
non_na_count <- sum(!is.na(vals))
|
||||
if (non_na_count == 0) {
|
||||
return("categorical") # 全是 NA,当作分类变量
|
||||
}
|
||||
unique_count <- length(unique(vals[!is.na(vals)]))
|
||||
unique_ratio <- unique_count / non_na_count
|
||||
if (unique_ratio < 0.05 && unique_count <= 10) {
|
||||
return("categorical")
|
||||
}
|
||||
return("numeric")
|
||||
} else {
|
||||
return("categorical")
|
||||
}
|
||||
})
|
||||
|
||||
log_add(glue("数值变量: {sum(var_types == 'numeric')}, 分类变量: {sum(var_types == 'categorical')}"))
|
||||
|
||||
# ===== 计算描述性统计 =====
|
||||
warnings_list <- c()
|
||||
results_list <- list()
|
||||
|
||||
for (v in variables) {
|
||||
var_type <- as.character(var_types[v])
|
||||
if (is.na(var_type) || length(var_type) == 0) {
|
||||
var_type <- "categorical" # 默认为分类变量
|
||||
}
|
||||
|
||||
if (is.null(groups)) {
|
||||
# 无分组
|
||||
if (identical(var_type, "numeric")) {
|
||||
stats <- calc_numeric_stats(df[[v]], v)
|
||||
} else {
|
||||
stats <- calc_categorical_stats(df[[v]], v)
|
||||
}
|
||||
stats$type <- var_type
|
||||
results_list[[v]] <- stats
|
||||
} else {
|
||||
# 有分组
|
||||
group_stats <- list()
|
||||
for (g in groups) {
|
||||
subset_vals <- df[df[[group_var]] == g, v, drop = TRUE]
|
||||
if (identical(var_type, "numeric")) {
|
||||
group_stats[[as.character(g)]] <- calc_numeric_stats(subset_vals, v)
|
||||
} else {
|
||||
group_stats[[as.character(g)]] <- calc_categorical_stats(subset_vals, v)
|
||||
}
|
||||
}
|
||||
results_list[[v]] <- list(
|
||||
variable = v,
|
||||
type = var_type,
|
||||
by_group = group_stats
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
# ===== 总体概况 =====
|
||||
summary_stats <- list(
|
||||
n_total = nrow(df),
|
||||
n_variables = length(variables),
|
||||
n_numeric = sum(var_types == "numeric"),
|
||||
n_categorical = sum(var_types == "categorical")
|
||||
)
|
||||
|
||||
if (!is.null(groups)) {
|
||||
summary_stats$group_var <- group_var
|
||||
summary_stats$groups <- lapply(groups, function(g) {
|
||||
list(name = as.character(g), n = sum(df[[group_var]] == g, na.rm = TRUE))
|
||||
})
|
||||
}
|
||||
|
||||
# ===== 生成图表 =====
|
||||
log_add("生成描述性统计图表")
|
||||
plots <- list()
|
||||
|
||||
# 只为前几个变量生成图表(避免过多)
|
||||
vars_to_plot <- head(variables, 4)
|
||||
|
||||
for (v in vars_to_plot) {
|
||||
plot_base64 <- tryCatch({
|
||||
if (var_types[v] == "numeric") {
|
||||
generate_histogram(df, v, group_var)
|
||||
} else {
|
||||
generate_bar_chart(df, v, group_var)
|
||||
}
|
||||
}, error = function(e) {
|
||||
log_add(paste("图表生成失败:", v, e$message))
|
||||
NULL
|
||||
})
|
||||
|
||||
if (!is.null(plot_base64)) {
|
||||
plots <- c(plots, list(plot_base64))
|
||||
}
|
||||
}
|
||||
|
||||
# ===== 生成可复现代码 =====
|
||||
original_filename <- if (!is.null(input$original_filename) && nchar(input$original_filename) > 0) {
|
||||
input$original_filename
|
||||
} else {
|
||||
"data.csv"
|
||||
}
|
||||
|
||||
reproducible_code <- glue('
|
||||
# SSA-Pro 自动生成代码
|
||||
# 工具: 描述性统计
|
||||
# 时间: {Sys.time()}
|
||||
# ================================
|
||||
|
||||
library(ggplot2)
|
||||
|
||||
# 数据准备
|
||||
df <- read.csv("{original_filename}")
|
||||
|
||||
# 数值变量描述性统计
|
||||
numeric_vars <- sapply(df, is.numeric)
|
||||
if (any(numeric_vars)) {{
|
||||
summary(df[, numeric_vars, drop = FALSE])
|
||||
}}
|
||||
|
||||
# 分类变量频数表
|
||||
categorical_vars <- !numeric_vars
|
||||
if (any(categorical_vars)) {{
|
||||
for (v in names(df)[categorical_vars]) {{
|
||||
cat("\\n变量:", v, "\\n")
|
||||
print(table(df[[v]], useNA = "ifany"))
|
||||
}}
|
||||
}}
|
||||
|
||||
# 可视化示例
|
||||
# ggplot(df, aes(x = your_variable)) + geom_histogram()
|
||||
')
|
||||
|
||||
# ===== 返回结果 =====
|
||||
log_add("分析完成")
|
||||
|
||||
return(list(
|
||||
status = "success",
|
||||
message = "分析完成",
|
||||
warnings = if (length(warnings_list) > 0) warnings_list else NULL,
|
||||
results = list(
|
||||
summary = summary_stats,
|
||||
variables = results_list
|
||||
),
|
||||
plots = plots,
|
||||
trace_log = logs,
|
||||
reproducible_code = as.character(reproducible_code)
|
||||
))
|
||||
}
|
||||
|
||||
# ===== 辅助函数 =====
|
||||
|
||||
# 数值变量统计
|
||||
calc_numeric_stats <- function(vals, var_name) {
|
||||
vals <- vals[!is.na(vals)]
|
||||
n <- length(vals)
|
||||
|
||||
if (n == 0) {
|
||||
return(list(
|
||||
variable = var_name,
|
||||
n = 0,
|
||||
missing = length(vals) - n,
|
||||
stats = NULL
|
||||
))
|
||||
}
|
||||
|
||||
list(
|
||||
variable = var_name,
|
||||
n = n,
|
||||
missing = 0,
|
||||
mean = round(mean(vals), 3),
|
||||
sd = round(sd(vals), 3),
|
||||
median = round(median(vals), 3),
|
||||
q1 = round(quantile(vals, 0.25), 3),
|
||||
q3 = round(quantile(vals, 0.75), 3),
|
||||
iqr = round(IQR(vals), 3),
|
||||
min = round(min(vals), 3),
|
||||
max = round(max(vals), 3),
|
||||
skewness = round(calc_skewness(vals), 3),
|
||||
formatted = paste0(round(mean(vals), 2), " ± ", round(sd(vals), 2))
|
||||
)
|
||||
}
|
||||
|
||||
# 分类变量统计
|
||||
calc_categorical_stats <- function(vals, var_name) {
|
||||
total <- length(vals)
|
||||
valid <- sum(!is.na(vals))
|
||||
|
||||
freq_table <- table(vals, useNA = "no")
|
||||
|
||||
levels_list <- lapply(names(freq_table), function(level) {
|
||||
count <- as.numeric(freq_table[level])
|
||||
pct <- round(count / valid * 100, 1)
|
||||
list(
|
||||
level = level,
|
||||
n = count,
|
||||
pct = pct,
|
||||
formatted = paste0(count, " (", pct, "%)")
|
||||
)
|
||||
})
|
||||
|
||||
list(
|
||||
variable = var_name,
|
||||
n = valid,
|
||||
missing = total - valid,
|
||||
levels = levels_list
|
||||
)
|
||||
}
|
||||
|
||||
# 计算偏度
|
||||
calc_skewness <- function(x) {
|
||||
n <- length(x)
|
||||
if (n < 3) return(NA)
|
||||
m <- mean(x)
|
||||
s <- sd(x)
|
||||
sum((x - m)^3) / (n * s^3)
|
||||
}
|
||||
|
||||
# 生成直方图
|
||||
generate_histogram <- function(df, var_name, group_var = NULL) {
|
||||
if (!is.null(group_var) && group_var != "") {
|
||||
p <- ggplot(df[!is.na(df[[var_name]]), ], aes(x = .data[[var_name]], fill = factor(.data[[group_var]]))) +
|
||||
geom_histogram(alpha = 0.6, position = "identity", bins = 30) +
|
||||
scale_fill_brewer(palette = "Set1", name = group_var) +
|
||||
theme_minimal()
|
||||
} else {
|
||||
p <- ggplot(df[!is.na(df[[var_name]]), ], aes(x = .data[[var_name]])) +
|
||||
geom_histogram(fill = "#3b82f6", alpha = 0.7, bins = 30) +
|
||||
theme_minimal()
|
||||
}
|
||||
|
||||
p <- p + labs(title = paste("Distribution of", var_name), x = var_name, y = "Count")
|
||||
|
||||
tmp_file <- tempfile(fileext = ".png")
|
||||
ggsave(tmp_file, p, width = 6, height = 4, dpi = 100)
|
||||
base64_str <- base64encode(tmp_file)
|
||||
unlink(tmp_file)
|
||||
|
||||
return(paste0("data:image/png;base64,", base64_str))
|
||||
}
|
||||
|
||||
# 生成柱状图
|
||||
generate_bar_chart <- function(df, var_name, group_var = NULL) {
|
||||
df_plot <- df[!is.na(df[[var_name]]), ]
|
||||
|
||||
if (!is.null(group_var) && group_var != "") {
|
||||
p <- ggplot(df_plot, aes(x = factor(.data[[var_name]]), fill = factor(.data[[group_var]]))) +
|
||||
geom_bar(position = "dodge") +
|
||||
scale_fill_brewer(palette = "Set1", name = group_var) +
|
||||
theme_minimal()
|
||||
} else {
|
||||
p <- ggplot(df_plot, aes(x = factor(.data[[var_name]]))) +
|
||||
geom_bar(fill = "#3b82f6", alpha = 0.7) +
|
||||
theme_minimal()
|
||||
}
|
||||
|
||||
p <- p + labs(title = paste("Frequency of", var_name), x = var_name, y = "Count") +
|
||||
theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
||||
|
||||
tmp_file <- tempfile(fileext = ".png")
|
||||
ggsave(tmp_file, p, width = 6, height = 4, dpi = 100)
|
||||
base64_str <- base64encode(tmp_file)
|
||||
unlink(tmp_file)
|
||||
|
||||
return(paste0("data:image/png;base64,", base64_str))
|
||||
}
|
||||
316
r-statistics-service/tools/logistic_binary.R
Normal file
316
r-statistics-service/tools/logistic_binary.R
Normal file
@@ -0,0 +1,316 @@
|
||||
#' @tool_code ST_LOGISTIC_BINARY
|
||||
#' @name 二元 Logistic 回归
|
||||
#' @version 1.0.0
|
||||
#' @description 二分类结局变量的多因素分析
|
||||
#' @author SSA-Pro Team
|
||||
|
||||
library(glue)
|
||||
library(ggplot2)
|
||||
library(base64enc)
|
||||
|
||||
run_analysis <- function(input) {
|
||||
# ===== 初始化 =====
|
||||
logs <- c()
|
||||
log_add <- function(msg) { logs <<- c(logs, paste0("[", Sys.time(), "] ", msg)) }
|
||||
|
||||
on.exit({}, add = TRUE)
|
||||
|
||||
# ===== 数据加载 =====
|
||||
log_add("开始加载输入数据")
|
||||
df <- tryCatch(
|
||||
load_input_data(input),
|
||||
error = function(e) {
|
||||
log_add(paste("数据加载失败:", e$message))
|
||||
return(NULL)
|
||||
}
|
||||
)
|
||||
|
||||
if (is.null(df)) {
|
||||
return(make_error(ERROR_CODES$E100_INTERNAL_ERROR, details = "数据加载失败"))
|
||||
}
|
||||
log_add(glue("数据加载成功: {nrow(df)} 行, {ncol(df)} 列"))
|
||||
|
||||
p <- input$params
|
||||
outcome_var <- p$outcome_var
|
||||
predictors <- p$predictors # 预测变量列表
|
||||
confounders <- p$confounders # 混杂因素(可选)
|
||||
|
||||
# ===== 参数校验 =====
|
||||
if (!(outcome_var %in% names(df))) {
|
||||
return(make_error(ERROR_CODES$E001_COLUMN_NOT_FOUND, col = outcome_var))
|
||||
}
|
||||
|
||||
all_vars <- c(predictors, confounders)
|
||||
all_vars <- all_vars[!is.null(all_vars) & all_vars != ""]
|
||||
|
||||
for (v in all_vars) {
|
||||
if (!(v %in% names(df))) {
|
||||
return(make_error(ERROR_CODES$E001_COLUMN_NOT_FOUND, col = v))
|
||||
}
|
||||
}
|
||||
|
||||
if (length(predictors) == 0) {
|
||||
return(make_error(ERROR_CODES$E100_INTERNAL_ERROR, details = "至少需要一个预测变量"))
|
||||
}
|
||||
|
||||
# ===== 数据清洗 =====
|
||||
original_rows <- nrow(df)
|
||||
|
||||
# 移除所有相关变量的缺失值
|
||||
vars_to_check <- c(outcome_var, all_vars)
|
||||
for (v in vars_to_check) {
|
||||
df <- df[!is.na(df[[v]]), ]
|
||||
}
|
||||
|
||||
removed_rows <- original_rows - nrow(df)
|
||||
if (removed_rows > 0) {
|
||||
log_add(glue("数据清洗: 移除 {removed_rows} 行缺失值 (剩余 {nrow(df)} 行)"))
|
||||
}
|
||||
|
||||
# ===== 结局变量检查 =====
|
||||
outcome_values <- unique(df[[outcome_var]])
|
||||
if (length(outcome_values) != 2) {
|
||||
return(make_error(ERROR_CODES$E003_INSUFFICIENT_GROUPS,
|
||||
col = outcome_var, expected = 2, actual = length(outcome_values)))
|
||||
}
|
||||
|
||||
# 确保结局变量是 0/1 或因子
|
||||
if (!is.factor(df[[outcome_var]])) {
|
||||
df[[outcome_var]] <- as.factor(df[[outcome_var]])
|
||||
}
|
||||
|
||||
# 事件数统计
|
||||
event_counts <- table(df[[outcome_var]])
|
||||
n_events <- min(event_counts)
|
||||
n_predictors <- length(all_vars)
|
||||
|
||||
log_add(glue("结局变量分布: {paste(names(event_counts), '=', event_counts, collapse=', ')}"))
|
||||
log_add(glue("事件数: {n_events}, 预测变量数: {n_predictors}"))
|
||||
|
||||
# ===== 护栏检查 =====
|
||||
guardrail_results <- list()
|
||||
warnings_list <- c()
|
||||
|
||||
# EPV 规则检查(Events Per Variable >= 10)
|
||||
epv <- n_events / n_predictors
|
||||
if (epv < 10) {
|
||||
warnings_list <- c(warnings_list, glue("EPV = {round(epv, 1)} < 10,模型可能不稳定"))
|
||||
log_add(glue("警告: EPV = {round(epv, 1)} < 10"))
|
||||
}
|
||||
|
||||
# 样本量检查
|
||||
sample_check <- check_sample_size(nrow(df), min_required = 20, action = ACTION_BLOCK)
|
||||
guardrail_results <- c(guardrail_results, list(sample_check))
|
||||
|
||||
guardrail_status <- run_guardrail_chain(guardrail_results)
|
||||
|
||||
if (guardrail_status$status == "blocked") {
|
||||
return(list(
|
||||
status = "blocked",
|
||||
message = guardrail_status$reason,
|
||||
trace_log = logs
|
||||
))
|
||||
}
|
||||
|
||||
# ===== 构建模型公式 =====
|
||||
formula_str <- paste(outcome_var, "~", paste(all_vars, collapse = " + "))
|
||||
formula_obj <- as.formula(formula_str)
|
||||
log_add(glue("模型公式: {formula_str}"))
|
||||
|
||||
# ===== 核心计算 =====
|
||||
log_add("拟合 Logistic 回归模型")
|
||||
|
||||
model <- tryCatch({
|
||||
glm(formula_obj, data = df, family = binomial(link = "logit"))
|
||||
}, error = function(e) {
|
||||
log_add(paste("模型拟合失败:", e$message))
|
||||
return(NULL)
|
||||
}, warning = function(w) {
|
||||
warnings_list <<- c(warnings_list, w$message)
|
||||
log_add(paste("模型警告:", w$message))
|
||||
invokeRestart("muffleWarning")
|
||||
})
|
||||
|
||||
if (is.null(model)) {
|
||||
return(map_r_error("模型拟合失败"))
|
||||
}
|
||||
|
||||
# 检查模型收敛
|
||||
if (!model$converged) {
|
||||
warnings_list <- c(warnings_list, "模型未完全收敛")
|
||||
log_add("警告: 模型未完全收敛")
|
||||
}
|
||||
|
||||
# ===== 提取模型结果 =====
|
||||
coef_summary <- summary(model)$coefficients
|
||||
|
||||
# 计算 OR 和 95% CI
|
||||
coef_table <- data.frame(
|
||||
variable = rownames(coef_summary),
|
||||
estimate = coef_summary[, "Estimate"],
|
||||
std_error = coef_summary[, "Std. Error"],
|
||||
z_value = coef_summary[, "z value"],
|
||||
p_value = coef_summary[, "Pr(>|z|)"],
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
|
||||
coef_table$OR <- exp(coef_table$estimate)
|
||||
coef_table$ci_lower <- exp(coef_table$estimate - 1.96 * coef_table$std_error)
|
||||
coef_table$ci_upper <- exp(coef_table$estimate + 1.96 * coef_table$std_error)
|
||||
|
||||
# 转换为列表格式(精简,不含原始系数)
|
||||
coefficients_list <- lapply(1:nrow(coef_table), function(i) {
|
||||
row <- coef_table[i, ]
|
||||
list(
|
||||
variable = row$variable,
|
||||
OR = round(row$OR, 3),
|
||||
ci_lower = round(row$ci_lower, 3),
|
||||
ci_upper = round(row$ci_upper, 3),
|
||||
p_value = round(row$p_value, 4),
|
||||
p_value_fmt = format_p_value(row$p_value),
|
||||
significant = row$p_value < 0.05
|
||||
)
|
||||
})
|
||||
|
||||
# ===== 模型拟合度 =====
|
||||
null_deviance <- model$null.deviance
|
||||
residual_deviance <- model$deviance
|
||||
aic <- AIC(model)
|
||||
|
||||
# Nagelkerke R²(伪 R²)
|
||||
n <- nrow(df)
|
||||
r2_nagelkerke <- (1 - exp((residual_deviance - null_deviance) / n)) / (1 - exp(-null_deviance / n))
|
||||
|
||||
log_add(glue("AIC = {round(aic, 2)}, Nagelkerke R² = {round(r2_nagelkerke, 3)}"))
|
||||
|
||||
# ===== 共线性检测(VIF) =====
|
||||
vif_results <- NULL
|
||||
if (length(all_vars) > 1) {
|
||||
tryCatch({
|
||||
if (requireNamespace("car", quietly = TRUE)) {
|
||||
vif_values <- car::vif(model)
|
||||
if (is.matrix(vif_values)) {
|
||||
vif_values <- vif_values[, "GVIF"]
|
||||
}
|
||||
vif_results <- lapply(names(vif_values), function(v) {
|
||||
list(variable = v, vif = round(vif_values[v], 2))
|
||||
})
|
||||
|
||||
high_vif <- names(vif_values)[vif_values > 5]
|
||||
if (length(high_vif) > 0) {
|
||||
warnings_list <- c(warnings_list, paste("VIF > 5 的变量:", paste(high_vif, collapse = ", ")))
|
||||
}
|
||||
}
|
||||
}, error = function(e) {
|
||||
log_add(paste("VIF 计算失败:", e$message))
|
||||
})
|
||||
}
|
||||
|
||||
# ===== 生成图表(森林图) =====
|
||||
log_add("生成森林图")
|
||||
plot_base64 <- tryCatch({
|
||||
generate_forest_plot(coef_table)
|
||||
}, error = function(e) {
|
||||
log_add(paste("图表生成失败:", e$message))
|
||||
NULL
|
||||
})
|
||||
|
||||
# ===== 生成可复现代码 =====
|
||||
original_filename <- if (!is.null(input$original_filename) && nchar(input$original_filename) > 0) {
|
||||
input$original_filename
|
||||
} else {
|
||||
"data.csv"
|
||||
}
|
||||
|
||||
reproducible_code <- glue('
|
||||
# SSA-Pro 自动生成代码
|
||||
# 工具: 二元 Logistic 回归
|
||||
# 时间: {Sys.time()}
|
||||
# ================================
|
||||
|
||||
# 数据准备
|
||||
df <- read.csv("{original_filename}")
|
||||
|
||||
# 模型拟合
|
||||
model <- glm({formula_str}, data = df, family = binomial(link = "logit"))
|
||||
summary(model)
|
||||
|
||||
# OR 和 95% CI
|
||||
coef_summary <- summary(model)$coefficients
|
||||
OR <- exp(coef_summary[, "Estimate"])
|
||||
CI_lower <- exp(coef_summary[, "Estimate"] - 1.96 * coef_summary[, "Std. Error"])
|
||||
CI_upper <- exp(coef_summary[, "Estimate"] + 1.96 * coef_summary[, "Std. Error"])
|
||||
results <- data.frame(OR = OR, CI_lower = CI_lower, CI_upper = CI_upper,
|
||||
p_value = coef_summary[, "Pr(>|z|)"])
|
||||
print(round(results, 3))
|
||||
|
||||
# 模型拟合度
|
||||
cat("AIC:", AIC(model), "\\n")
|
||||
|
||||
# VIF(需要 car 包)
|
||||
# library(car)
|
||||
# vif(model)
|
||||
')
|
||||
|
||||
# ===== 返回结果 =====
|
||||
log_add("分析完成")
|
||||
|
||||
return(list(
|
||||
status = "success",
|
||||
message = "分析完成",
|
||||
warnings = if (length(warnings_list) > 0) warnings_list else NULL,
|
||||
results = list(
|
||||
method = "Binary Logistic Regression (glm, binomial)",
|
||||
formula = formula_str,
|
||||
n_observations = nrow(df),
|
||||
n_predictors = n_predictors,
|
||||
coefficients = coefficients_list,
|
||||
model_fit = list(
|
||||
aic = jsonlite::unbox(round(aic, 2)),
|
||||
null_deviance = jsonlite::unbox(round(null_deviance, 2)),
|
||||
residual_deviance = jsonlite::unbox(round(residual_deviance, 2)),
|
||||
r2_nagelkerke = jsonlite::unbox(round(r2_nagelkerke, 4))
|
||||
),
|
||||
vif = vif_results,
|
||||
epv = jsonlite::unbox(round(epv, 1))
|
||||
),
|
||||
plots = if (!is.null(plot_base64)) list(plot_base64) else list(),
|
||||
trace_log = logs,
|
||||
reproducible_code = as.character(reproducible_code)
|
||||
))
|
||||
}
|
||||
|
||||
# 辅助函数:生成森林图
|
||||
generate_forest_plot <- function(coef_table) {
|
||||
# 移除截距项
|
||||
plot_data <- coef_table[coef_table$variable != "(Intercept)", ]
|
||||
|
||||
if (nrow(plot_data) == 0) {
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
plot_data$variable <- factor(plot_data$variable, levels = rev(plot_data$variable))
|
||||
|
||||
p <- ggplot(plot_data, aes(x = OR, y = variable)) +
|
||||
geom_vline(xintercept = 1, linetype = "dashed", color = "gray50") +
|
||||
geom_point(size = 3, color = "#3b82f6") +
|
||||
geom_errorbarh(aes(xmin = ci_lower, xmax = ci_upper), height = 0.2, color = "#3b82f6") +
|
||||
scale_x_log10() +
|
||||
theme_minimal() +
|
||||
labs(
|
||||
title = "Forest Plot: Odds Ratios with 95% CI",
|
||||
x = "Odds Ratio (log scale)",
|
||||
y = "Variable"
|
||||
) +
|
||||
theme(
|
||||
panel.grid.minor = element_blank(),
|
||||
axis.text.y = element_text(size = 10)
|
||||
)
|
||||
|
||||
tmp_file <- tempfile(fileext = ".png")
|
||||
ggsave(tmp_file, p, width = 8, height = max(4, nrow(plot_data) * 0.5 + 2), dpi = 100)
|
||||
base64_str <- base64encode(tmp_file)
|
||||
unlink(tmp_file)
|
||||
|
||||
return(paste0("data:image/png;base64,", base64_str))
|
||||
}
|
||||
235
r-statistics-service/tools/mann_whitney.R
Normal file
235
r-statistics-service/tools/mann_whitney.R
Normal file
@@ -0,0 +1,235 @@
|
||||
#' @tool_code ST_MANN_WHITNEY
|
||||
#' @name Mann-Whitney U 检验
|
||||
#' @version 1.0.0
|
||||
#' @description 两组独立样本非参数比较(Wilcoxon秩和检验)
|
||||
#' @author SSA-Pro Team
|
||||
|
||||
library(glue)
|
||||
library(ggplot2)
|
||||
library(base64enc)
|
||||
|
||||
run_analysis <- function(input) {
|
||||
# ===== 初始化 =====
|
||||
logs <- c()
|
||||
log_add <- function(msg) { logs <<- c(logs, paste0("[", Sys.time(), "] ", msg)) }
|
||||
|
||||
on.exit({}, add = TRUE)
|
||||
|
||||
# ===== 数据加载 =====
|
||||
log_add("开始加载输入数据")
|
||||
df <- tryCatch(
|
||||
load_input_data(input),
|
||||
error = function(e) {
|
||||
log_add(paste("数据加载失败:", e$message))
|
||||
return(NULL)
|
||||
}
|
||||
)
|
||||
|
||||
if (is.null(df)) {
|
||||
return(make_error(ERROR_CODES$E100_INTERNAL_ERROR, details = "数据加载失败"))
|
||||
}
|
||||
log_add(glue("数据加载成功: {nrow(df)} 行, {ncol(df)} 列"))
|
||||
|
||||
p <- input$params
|
||||
group_var <- p$group_var
|
||||
value_var <- p$value_var
|
||||
|
||||
# ===== 参数校验 =====
|
||||
if (!(group_var %in% names(df))) {
|
||||
return(make_error(ERROR_CODES$E001_COLUMN_NOT_FOUND, col = group_var))
|
||||
}
|
||||
if (!(value_var %in% names(df))) {
|
||||
return(make_error(ERROR_CODES$E001_COLUMN_NOT_FOUND, col = value_var))
|
||||
}
|
||||
|
||||
# ===== 数据清洗 =====
|
||||
original_rows <- nrow(df)
|
||||
df <- df[!is.na(df[[group_var]]) & trimws(as.character(df[[group_var]])) != "", ]
|
||||
df <- df[!is.na(df[[value_var]]), ]
|
||||
|
||||
removed_rows <- original_rows - nrow(df)
|
||||
if (removed_rows > 0) {
|
||||
log_add(glue("数据清洗: 移除 {removed_rows} 行缺失值 (剩余 {nrow(df)} 行)"))
|
||||
}
|
||||
|
||||
# 分组检查
|
||||
groups <- unique(df[[group_var]])
|
||||
if (length(groups) != 2) {
|
||||
return(make_error(ERROR_CODES$E003_INSUFFICIENT_GROUPS,
|
||||
col = group_var, expected = 2, actual = length(groups)))
|
||||
}
|
||||
|
||||
# 提取两组数据
|
||||
g1_vals <- df[df[[group_var]] == groups[1], value_var]
|
||||
g2_vals <- df[df[[group_var]] == groups[2], value_var]
|
||||
|
||||
# ===== 护栏检查 =====
|
||||
guardrail_results <- list()
|
||||
warnings_list <- c()
|
||||
|
||||
# 样本量检查(每组至少5个)
|
||||
min_n <- min(length(g1_vals), length(g2_vals))
|
||||
sample_check <- check_sample_size(min_n, min_required = 5, action = ACTION_BLOCK)
|
||||
guardrail_results <- c(guardrail_results, list(sample_check))
|
||||
log_add(glue("样本量检查: 每组最小 {min_n}, {sample_check$reason}"))
|
||||
|
||||
guardrail_status <- run_guardrail_chain(guardrail_results)
|
||||
|
||||
if (guardrail_status$status == "blocked") {
|
||||
return(list(
|
||||
status = "blocked",
|
||||
message = guardrail_status$reason,
|
||||
trace_log = logs
|
||||
))
|
||||
}
|
||||
|
||||
if (length(guardrail_status$warnings) > 0) {
|
||||
warnings_list <- c(warnings_list, guardrail_status$warnings)
|
||||
}
|
||||
|
||||
# ===== 核心计算 =====
|
||||
log_add("执行 Mann-Whitney U 检验 (Wilcoxon rank-sum test)")
|
||||
|
||||
result <- tryCatch({
|
||||
wilcox.test(g1_vals, g2_vals, exact = FALSE, correct = TRUE)
|
||||
}, error = function(e) {
|
||||
log_add(paste("Mann-Whitney U 检验失败:", e$message))
|
||||
return(NULL)
|
||||
})
|
||||
|
||||
if (is.null(result)) {
|
||||
return(make_error(ERROR_CODES$E100_INTERNAL_ERROR, details = "Mann-Whitney U 检验执行失败"))
|
||||
}
|
||||
|
||||
# 计算效应量 r = Z / sqrt(N)
|
||||
n1 <- length(g1_vals)
|
||||
n2 <- length(g2_vals)
|
||||
N <- n1 + n2
|
||||
|
||||
# 从 U 统计量计算 Z 值
|
||||
U <- result$statistic
|
||||
mu <- n1 * n2 / 2
|
||||
sigma <- sqrt(n1 * n2 * (n1 + n2 + 1) / 12)
|
||||
z_value <- (U - mu) / sigma
|
||||
effect_r <- abs(z_value) / sqrt(N)
|
||||
|
||||
# 效应量解释
|
||||
effect_interpretation <- if (effect_r < 0.1) "微小" else if (effect_r < 0.3) "小" else if (effect_r < 0.5) "中等" else "大"
|
||||
|
||||
log_add(glue("U = {round(U, 2)}, Z = {round(z_value, 3)}, p = {round(result$p.value, 4)}, r = {round(effect_r, 3)}"))
|
||||
|
||||
# ===== 生成图表 =====
|
||||
log_add("生成箱线图")
|
||||
plot_base64 <- tryCatch({
|
||||
generate_boxplot(df, group_var, value_var)
|
||||
}, error = function(e) {
|
||||
log_add(paste("图表生成失败:", e$message))
|
||||
NULL
|
||||
})
|
||||
|
||||
# ===== 生成可复现代码 =====
|
||||
original_filename <- if (!is.null(input$original_filename) && nchar(input$original_filename) > 0) {
|
||||
input$original_filename
|
||||
} else {
|
||||
"data.csv"
|
||||
}
|
||||
|
||||
reproducible_code <- glue('
|
||||
# SSA-Pro 自动生成代码
|
||||
# 工具: Mann-Whitney U 检验
|
||||
# 时间: {Sys.time()}
|
||||
# ================================
|
||||
|
||||
library(ggplot2)
|
||||
|
||||
# 数据准备
|
||||
df <- read.csv("{original_filename}")
|
||||
group_var <- "{group_var}"
|
||||
value_var <- "{value_var}"
|
||||
|
||||
# 数据清洗
|
||||
df <- df[!is.na(df[[group_var]]) & !is.na(df[[value_var]]), ]
|
||||
|
||||
# Mann-Whitney U 检验
|
||||
g1_vals <- df[df[[group_var]] == "{groups[1]}", value_var]
|
||||
g2_vals <- df[df[[group_var]] == "{groups[2]}", value_var]
|
||||
result <- wilcox.test(g1_vals, g2_vals, exact = FALSE, correct = TRUE)
|
||||
print(result)
|
||||
|
||||
# 计算效应量 r
|
||||
n1 <- length(g1_vals)
|
||||
n2 <- length(g2_vals)
|
||||
U <- result$statistic
|
||||
mu <- n1 * n2 / 2
|
||||
sigma <- sqrt(n1 * n2 * (n1 + n2 + 1) / 12)
|
||||
z_value <- (U - mu) / sigma
|
||||
effect_r <- abs(z_value) / sqrt(n1 + n2)
|
||||
cat("Effect size r =", round(effect_r, 3), "\\n")
|
||||
|
||||
# 可视化
|
||||
ggplot(df, aes(x = .data[[group_var]], y = .data[[value_var]])) +
|
||||
geom_boxplot(fill = "#8b5cf6", alpha = 0.6) +
|
||||
theme_minimal() +
|
||||
labs(title = paste("Distribution of", value_var, "by", group_var))
|
||||
')
|
||||
|
||||
# ===== 返回结果 =====
|
||||
log_add("分析完成")
|
||||
|
||||
return(list(
|
||||
status = "success",
|
||||
message = "分析完成",
|
||||
warnings = if (length(warnings_list) > 0) warnings_list else NULL,
|
||||
results = list(
|
||||
method = "Wilcoxon rank sum test with continuity correction",
|
||||
statistic_U = jsonlite::unbox(as.numeric(U)),
|
||||
z_value = jsonlite::unbox(round(z_value, 4)),
|
||||
p_value = jsonlite::unbox(as.numeric(result$p.value)),
|
||||
p_value_fmt = format_p_value(result$p.value),
|
||||
effect_size = list(
|
||||
r = jsonlite::unbox(round(effect_r, 4)),
|
||||
interpretation = effect_interpretation
|
||||
),
|
||||
group_stats = list(
|
||||
list(
|
||||
group = as.character(groups[1]),
|
||||
n = n1,
|
||||
median = median(g1_vals),
|
||||
iqr = IQR(g1_vals),
|
||||
min = min(g1_vals),
|
||||
max = max(g1_vals)
|
||||
),
|
||||
list(
|
||||
group = as.character(groups[2]),
|
||||
n = n2,
|
||||
median = median(g2_vals),
|
||||
iqr = IQR(g2_vals),
|
||||
min = min(g2_vals),
|
||||
max = max(g2_vals)
|
||||
)
|
||||
)
|
||||
),
|
||||
plots = if (!is.null(plot_base64)) list(plot_base64) else list(),
|
||||
trace_log = logs,
|
||||
reproducible_code = as.character(reproducible_code)
|
||||
))
|
||||
}
|
||||
|
||||
# 辅助函数:生成箱线图
|
||||
generate_boxplot <- function(df, group_var, value_var) {
|
||||
p <- ggplot(df, aes(x = .data[[group_var]], y = .data[[value_var]])) +
|
||||
geom_boxplot(fill = "#8b5cf6", alpha = 0.6) +
|
||||
geom_jitter(width = 0.2, alpha = 0.3, size = 1) +
|
||||
theme_minimal() +
|
||||
labs(
|
||||
title = paste("Distribution of", value_var, "by", group_var),
|
||||
subtitle = "Mann-Whitney U Test"
|
||||
)
|
||||
|
||||
tmp_file <- tempfile(fileext = ".png")
|
||||
ggsave(tmp_file, p, width = 6, height = 4, dpi = 100)
|
||||
base64_str <- base64encode(tmp_file)
|
||||
unlink(tmp_file)
|
||||
|
||||
return(paste0("data:image/png;base64,", base64_str))
|
||||
}
|
||||
274
r-statistics-service/tools/t_test_paired.R
Normal file
274
r-statistics-service/tools/t_test_paired.R
Normal file
@@ -0,0 +1,274 @@
|
||||
#' @tool_code ST_T_TEST_PAIRED
|
||||
#' @name 配对 T 检验
|
||||
#' @version 1.0.0
|
||||
#' @description 配对设计的均值差异检验(前后对比)
|
||||
#' @author SSA-Pro Team
|
||||
|
||||
library(glue)
|
||||
library(ggplot2)
|
||||
library(base64enc)
|
||||
|
||||
run_analysis <- function(input) {
|
||||
# ===== 初始化 =====
|
||||
logs <- c()
|
||||
log_add <- function(msg) { logs <<- c(logs, paste0("[", Sys.time(), "] ", msg)) }
|
||||
|
||||
on.exit({}, add = TRUE)
|
||||
|
||||
# ===== 数据加载 =====
|
||||
log_add("开始加载输入数据")
|
||||
df <- tryCatch(
|
||||
load_input_data(input),
|
||||
error = function(e) {
|
||||
log_add(paste("数据加载失败:", e$message))
|
||||
return(NULL)
|
||||
}
|
||||
)
|
||||
|
||||
if (is.null(df)) {
|
||||
return(make_error(ERROR_CODES$E100_INTERNAL_ERROR, details = "数据加载失败"))
|
||||
}
|
||||
log_add(glue("数据加载成功: {nrow(df)} 行, {ncol(df)} 列"))
|
||||
|
||||
p <- input$params
|
||||
guardrails_cfg <- input$guardrails
|
||||
|
||||
before_var <- p$before_var
|
||||
after_var <- p$after_var
|
||||
|
||||
# ===== 参数校验 =====
|
||||
if (!(before_var %in% names(df))) {
|
||||
return(make_error(ERROR_CODES$E001_COLUMN_NOT_FOUND, col = before_var))
|
||||
}
|
||||
if (!(after_var %in% names(df))) {
|
||||
return(make_error(ERROR_CODES$E001_COLUMN_NOT_FOUND, col = after_var))
|
||||
}
|
||||
|
||||
# ===== 数据清洗 =====
|
||||
original_rows <- nrow(df)
|
||||
df <- df[!is.na(df[[before_var]]) & !is.na(df[[after_var]]), ]
|
||||
|
||||
removed_rows <- original_rows - nrow(df)
|
||||
if (removed_rows > 0) {
|
||||
log_add(glue("数据清洗: 移除 {removed_rows} 行缺失值 (剩余 {nrow(df)} 行)"))
|
||||
}
|
||||
|
||||
before_vals <- df[[before_var]]
|
||||
after_vals <- df[[after_var]]
|
||||
diff_vals <- after_vals - before_vals
|
||||
|
||||
n <- length(diff_vals)
|
||||
|
||||
# ===== 护栏检查 =====
|
||||
guardrail_results <- list()
|
||||
warnings_list <- c()
|
||||
method_used <- "t.test"
|
||||
use_wilcoxon <- FALSE
|
||||
|
||||
# 样本量检查
|
||||
sample_check <- check_sample_size(n, min_required = 10, action = ACTION_WARN)
|
||||
guardrail_results <- c(guardrail_results, list(sample_check))
|
||||
log_add(glue("样本量检查: N = {n}, {sample_check$reason}"))
|
||||
|
||||
# 差值正态性检验
|
||||
if (isTRUE(guardrails_cfg$check_normality) && n >= 3) {
|
||||
log_add("执行差值正态性检验")
|
||||
norm_check <- check_normality(diff_vals, alpha = 0.05,
|
||||
action = ACTION_SWITCH,
|
||||
action_target = "Wilcoxon signed-rank test")
|
||||
guardrail_results <- c(guardrail_results, list(norm_check))
|
||||
log_add(glue("差值正态性: p = {round(norm_check$p_value, 4)}, {norm_check$reason}"))
|
||||
}
|
||||
|
||||
guardrail_status <- run_guardrail_chain(guardrail_results)
|
||||
|
||||
if (guardrail_status$status == "blocked") {
|
||||
return(list(
|
||||
status = "blocked",
|
||||
message = guardrail_status$reason,
|
||||
trace_log = logs
|
||||
))
|
||||
}
|
||||
|
||||
if (guardrail_status$status == "switch") {
|
||||
use_wilcoxon <- TRUE
|
||||
log_add(glue("触发方法切换: {guardrail_status$reason}"))
|
||||
warnings_list <- c(warnings_list, "差值不满足正态性,自动切换为 Wilcoxon 符号秩检验")
|
||||
}
|
||||
|
||||
if (length(guardrail_status$warnings) > 0) {
|
||||
warnings_list <- c(warnings_list, guardrail_status$warnings)
|
||||
}
|
||||
|
||||
# ===== 核心计算 =====
|
||||
if (use_wilcoxon) {
|
||||
log_add("执行 Wilcoxon 符号秩检验")
|
||||
result <- wilcox.test(before_vals, after_vals, paired = TRUE, exact = FALSE)
|
||||
method_used <- "Wilcoxon signed rank test"
|
||||
|
||||
# Wilcoxon 效应量 r
|
||||
z_value <- qnorm(result$p.value / 2) * sign(median(diff_vals))
|
||||
effect_r <- abs(z_value) / sqrt(n)
|
||||
effect_interpretation <- if (abs(effect_r) < 0.1) "微小" else if (abs(effect_r) < 0.3) "小" else if (abs(effect_r) < 0.5) "中等" else "大"
|
||||
|
||||
output_results <- list(
|
||||
method = method_used,
|
||||
statistic = jsonlite::unbox(as.numeric(result$statistic)),
|
||||
p_value = jsonlite::unbox(as.numeric(result$p.value)),
|
||||
p_value_fmt = format_p_value(result$p.value),
|
||||
effect_size = list(
|
||||
r = jsonlite::unbox(round(effect_r, 4)),
|
||||
interpretation = effect_interpretation
|
||||
)
|
||||
)
|
||||
} else {
|
||||
log_add("执行配对 T 检验")
|
||||
result <- t.test(before_vals, after_vals, paired = TRUE)
|
||||
method_used <- "Paired t-test"
|
||||
|
||||
# Cohen's d for paired samples
|
||||
mean_diff <- mean(diff_vals)
|
||||
sd_diff <- sd(diff_vals)
|
||||
cohens_d <- mean_diff / sd_diff
|
||||
effect_interpretation <- if (abs(cohens_d) < 0.2) "微小" else if (abs(cohens_d) < 0.5) "小" else if (abs(cohens_d) < 0.8) "中等" else "大"
|
||||
|
||||
log_add(glue("t = {round(result$statistic, 3)}, df = {round(result$parameter, 1)}, p = {round(result$p.value, 4)}, Cohen's d = {round(cohens_d, 3)}"))
|
||||
|
||||
output_results <- list(
|
||||
method = method_used,
|
||||
statistic = jsonlite::unbox(as.numeric(result$statistic)),
|
||||
df = jsonlite::unbox(as.numeric(result$parameter)),
|
||||
p_value = jsonlite::unbox(as.numeric(result$p.value)),
|
||||
p_value_fmt = format_p_value(result$p.value),
|
||||
conf_int = as.numeric(result$conf.int),
|
||||
effect_size = list(
|
||||
cohens_d = jsonlite::unbox(round(cohens_d, 4)),
|
||||
interpretation = effect_interpretation
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
# 添加描述性统计
|
||||
output_results$descriptive <- list(
|
||||
before = list(
|
||||
variable = before_var,
|
||||
n = n,
|
||||
mean = round(mean(before_vals), 3),
|
||||
sd = round(sd(before_vals), 3),
|
||||
median = round(median(before_vals), 3)
|
||||
),
|
||||
after = list(
|
||||
variable = after_var,
|
||||
n = n,
|
||||
mean = round(mean(after_vals), 3),
|
||||
sd = round(sd(after_vals), 3),
|
||||
median = round(median(after_vals), 3)
|
||||
),
|
||||
difference = list(
|
||||
mean = round(mean(diff_vals), 3),
|
||||
sd = round(sd(diff_vals), 3),
|
||||
median = round(median(diff_vals), 3)
|
||||
)
|
||||
)
|
||||
|
||||
# ===== 生成图表 =====
|
||||
log_add("生成配对比较图")
|
||||
plot_base64 <- tryCatch({
|
||||
generate_paired_plot(df, before_var, after_var, diff_vals)
|
||||
}, error = function(e) {
|
||||
log_add(paste("图表生成失败:", e$message))
|
||||
NULL
|
||||
})
|
||||
|
||||
# ===== 生成可复现代码 =====
|
||||
original_filename <- if (!is.null(input$original_filename) && nchar(input$original_filename) > 0) {
|
||||
input$original_filename
|
||||
} else {
|
||||
"data.csv"
|
||||
}
|
||||
|
||||
reproducible_code <- glue('
|
||||
# SSA-Pro 自动生成代码
|
||||
# 工具: 配对 T 检验
|
||||
# 时间: {Sys.time()}
|
||||
# ================================
|
||||
|
||||
library(ggplot2)
|
||||
|
||||
# 数据准备
|
||||
df <- read.csv("{original_filename}")
|
||||
before_var <- "{before_var}"
|
||||
after_var <- "{after_var}"
|
||||
|
||||
# 数据清洗
|
||||
df <- df[!is.na(df[[before_var]]) & !is.na(df[[after_var]]), ]
|
||||
|
||||
# 配对 T 检验
|
||||
before_vals <- df[[before_var]]
|
||||
after_vals <- df[[after_var]]
|
||||
result <- t.test(before_vals, after_vals, paired = TRUE)
|
||||
print(result)
|
||||
|
||||
# Cohen d (效应量)
|
||||
diff_vals <- after_vals - before_vals
|
||||
cohens_d <- mean(diff_vals) / sd(diff_vals)
|
||||
cat("Cohen d =", round(cohens_d, 3), "\\n")
|
||||
|
||||
# 可视化
|
||||
df_long <- data.frame(
|
||||
id = rep(1:nrow(df), 2),
|
||||
time = rep(c("Before", "After"), each = nrow(df)),
|
||||
value = c(before_vals, after_vals)
|
||||
)
|
||||
ggplot(df_long, aes(x = time, y = value, group = id)) +
|
||||
geom_line(alpha = 0.3) +
|
||||
geom_point() +
|
||||
theme_minimal() +
|
||||
labs(title = "Paired Comparison")
|
||||
')
|
||||
|
||||
# ===== 返回结果 =====
|
||||
log_add("分析完成")
|
||||
|
||||
return(list(
|
||||
status = "success",
|
||||
message = "分析完成",
|
||||
warnings = if (length(warnings_list) > 0) warnings_list else NULL,
|
||||
results = output_results,
|
||||
plots = if (!is.null(plot_base64)) list(plot_base64) else list(),
|
||||
trace_log = logs,
|
||||
reproducible_code = as.character(reproducible_code)
|
||||
))
|
||||
}
|
||||
|
||||
# 辅助函数:生成配对比较图
|
||||
generate_paired_plot <- function(df, before_var, after_var, diff_vals) {
|
||||
n <- nrow(df)
|
||||
|
||||
# 创建长格式数据
|
||||
df_long <- data.frame(
|
||||
id = rep(1:n, 2),
|
||||
time = factor(rep(c("Before", "After"), each = n), levels = c("Before", "After")),
|
||||
value = c(df[[before_var]], df[[after_var]])
|
||||
)
|
||||
|
||||
p <- ggplot(df_long, aes(x = time, y = value)) +
|
||||
geom_line(aes(group = id), alpha = 0.3, color = "gray60") +
|
||||
geom_point(aes(group = id), alpha = 0.5, size = 2) +
|
||||
stat_summary(fun = mean, geom = "point", size = 4, color = "#ef4444", shape = 18) +
|
||||
stat_summary(fun = mean, geom = "line", aes(group = 1), color = "#ef4444", size = 1.2) +
|
||||
theme_minimal() +
|
||||
labs(
|
||||
title = paste("Paired Comparison:", before_var, "vs", after_var),
|
||||
subtitle = paste("n =", n, ", Mean change =", round(mean(diff_vals), 2)),
|
||||
x = "Time Point",
|
||||
y = "Value"
|
||||
)
|
||||
|
||||
tmp_file <- tempfile(fileext = ".png")
|
||||
ggsave(tmp_file, p, width = 6, height = 5, dpi = 100)
|
||||
base64_str <- base64encode(tmp_file)
|
||||
unlink(tmp_file)
|
||||
|
||||
return(paste0("data:image/png;base64,", base64_str))
|
||||
}
|
||||
Reference in New Issue
Block a user