feat(asl): Complete Tool 4 SR Chart Generator and Tool 5 Meta Analysis Engine

Tool 4 - SR Chart Generator: - PRISMA 2020 flow diagram with Chinese/English toggle (SVG) - Baseline characteristics table (Table 1) - Dual data source: project pipeline API + Excel upload - SVG/PNG export support - Backend: ChartingService with Prisma aggregation - Frontend: PrismaFlowDiagram, BaselineTable, DataSourceSelector Tool 5 - Meta Analysis Engine: - 3 data types: HR (metagen), dichotomous (metabin), continuous (metacont) - Random and fixed effects models - Multiple effect measures: HR / OR / RR - Forest plot + funnel plot (base64 PNG from R) - Heterogeneity statistics: I2, Q, p-value, Tau2 - Data input via Excel upload or project pipeline - R Docker image updated with meta package (13 tools total) - E2E test: 36/36 passed - Key fix: exp() back-transformation for log-scale ratio measures Also includes: - IIT CRA Agent V3.0 routing and AI chat page integration - Updated ASL module status guide (v2.3) - Updated system status guide (v6.3) - Updated R statistics engine guide (v1.4) Tested: Frontend renders correctly, backend APIs functional, E2E tests passed Made-with: Cursor
2026-02-26 21:51:02 +08:00
parent 7c3cc12b2e
commit 205932bb3f
30 changed files with 3596 additions and 114 deletions
--- a/r-statistics-service/tools/meta_analysis.R
+++ b/r-statistics-service/tools/meta_analysis.R
@@ -0,0 +1,295 @@
+#' @tool_code ST_META_ANALYSIS
+#' @name Meta 分析引擎
+#' @version 1.0.0
+#' @description 支持 HR/二分类/连续型三种数据类型的 Meta 分析，生成森林图和漏斗图
+#' @author ASL Evidence Synthesis Team
+
+library(glue)
+library(ggplot2)
+library(base64enc)
+
+run_analysis <- function(input) {
+  logs <- c()
+  log_add <- function(msg) { logs <<- c(logs, paste0("[", Sys.time(), "] ", msg)) }
+  warnings_list <- c()
+
+  # ===== 依赖检查 =====
+  if (!requireNamespace("meta", quietly = TRUE)) {
+    return(make_error(ERROR_CODES$E101_PACKAGE_MISSING, package = "meta"))
+  }
+  library(meta)
+
+  # ===== 数据加载 =====
+  log_add("Loading input data")
+  df <- tryCatch(
+    load_input_data(input),
+    error = function(e) {
+      log_add(paste("Data loading failed:", e$message))
+      return(NULL)
+    }
+  )
+
+  if (is.null(df) || nrow(df) < 2) {
+    return(make_error(ERROR_CODES$E004_SAMPLE_TOO_SMALL, n = ifelse(is.null(df), 0, nrow(df)), min_required = 2))
+  }
+  log_add(glue("Data loaded: {nrow(df)} studies, {ncol(df)} columns"))
+
+  # ===== 参数提取 =====
+  p <- input$params
+  data_type <- tolower(p$data_type %||% "hr")
+  model_type <- tolower(p$model %||% "random")
+  effect_measure <- toupper(p$effect_measure %||% "")
+
+  use_random <- model_type == "random"
+  use_fixed  <- model_type == "fixed"
+
+  log_add(glue("Config: data_type={data_type}, model={model_type}"))
+
+  # ===== 执行 Meta 分析 =====
+  ma_result <- NULL
+
+  ma_result <- tryCatch(
+    withCallingHandlers(
+      {
+        if (data_type == "hr") {
+          run_hr_meta(df, use_random, use_fixed, log_add)
+        } else if (data_type == "dichotomous") {
+          sm <- if (effect_measure %in% c("OR", "RR", "RD")) effect_measure else "OR"
+          run_dichotomous_meta(df, sm, use_random, use_fixed, log_add)
+        } else if (data_type == "continuous") {
+          run_continuous_meta(df, use_random, use_fixed, log_add)
+        } else {
+          return(make_error(ERROR_CODES$E100_INTERNAL_ERROR, details = glue("Unknown data_type: {data_type}")))
+        }
+      },
+      warning = function(w) {
+        warnings_list <<- c(warnings_list, w$message)
+        invokeRestart("muffleWarning")
+      }
+    ),
+    error = function(e) {
+      log_add(glue("Meta-analysis failed: {e$message}"))
+      return(NULL)
+    }
+  )
+
+  if (is.null(ma_result)) {
+    return(list(
+      status = "error",
+      error_code = "E100",
+      message = "Meta-analysis computation failed. Check data format.",
+      user_hint = "Please verify your data columns and values.",
+      trace_log = logs
+    ))
+  }
+
+  # ===== 提取结果 =====
+  log_add("Extracting results")
+
+  is_random <- use_random
+  pooled_te <- if (is_random) ma_result$TE.random else ma_result$TE.fixed
+  pooled_lower <- if (is_random) ma_result$lower.random else ma_result$lower.fixed
+  pooled_upper <- if (is_random) ma_result$upper.random else ma_result$upper.fixed
+  pooled_pval <- if (is_random) ma_result$pval.random else ma_result$pval.fixed
+
+  i2_val <- ma_result$I2
+  tau2_val <- if (!is.null(ma_result$tau2)) ma_result$tau2 else NA
+  q_stat <- ma_result$Q
+  q_pval <- ma_result$pval.Q
+  k_studies <- ma_result$k
+
+  sm_label <- ma_result$sm
+
+  # Back-transform ratio measures from log scale (HR, OR, RR)
+  is_ratio <- sm_label %in% c("HR", "OR", "RR")
+  display_te    <- if (is_ratio) exp(pooled_te) else pooled_te
+  display_lower <- if (is_ratio) exp(pooled_lower) else pooled_lower
+  display_upper <- if (is_ratio) exp(pooled_upper) else pooled_upper
+
+  log_add(glue("Back-transform: is_ratio={is_ratio}, raw_TE={round(pooled_te,4)}, display={round(display_te,4)}"))
+
+  results_list <- list(
+    pooled_effect = jsonlite::unbox(round(display_te, 4)),
+    pooled_lower  = jsonlite::unbox(round(display_lower, 4)),
+    pooled_upper  = jsonlite::unbox(round(display_upper, 4)),
+    pooled_pvalue = jsonlite::unbox(round(pooled_pval, 6)),
+    i_squared     = jsonlite::unbox(round(i2_val * 100, 1)),
+    tau_squared   = jsonlite::unbox(round(tau2_val, 4)),
+    q_statistic   = jsonlite::unbox(round(q_stat, 2)),
+    q_pvalue      = jsonlite::unbox(round(q_pval, 6)),
+    k_studies     = jsonlite::unbox(k_studies),
+    effect_measure = jsonlite::unbox(sm_label),
+    model          = jsonlite::unbox(ifelse(is_random, "Random Effects", "Fixed Effect"))
+  )
+
+  # ===== 生成图表 =====
+  log_add("Generating forest plot")
+  forest_b64 <- generate_forest_plot(ma_result)
+
+  log_add("Generating funnel plot")
+  funnel_b64 <- generate_funnel_plot(ma_result)
+
+  # ===== 构建 report_blocks =====
+  blocks <- list()
+
+  kv_items <- list()
+  kv_items[["Effect Measure"]] <- sm_label
+  kv_items[["Model"]] <- ifelse(is_random, "Random Effects (DerSimonian-Laird)", "Fixed Effect (Mantel-Haenszel)")
+  kv_items[["Studies (k)"]] <- as.character(k_studies)
+  kv_items[["Pooled Effect"]] <- glue("{round(display_te, 3)} [{round(display_lower, 3)}, {round(display_upper, 3)}]")
+  kv_items[["P-value"]] <- format_p_value(pooled_pval)
+  kv_items[["I\u00b2 (heterogeneity)"]] <- glue("{round(i2_val * 100, 1)}%")
+  kv_items[["Q statistic"]] <- glue("{round(q_stat, 2)} (p = {format_p_value(q_pval)})")
+
+  blocks[[length(blocks) + 1]] <- make_kv_block(kv_items, title = "Meta-Analysis Summary")
+
+  if (!is.null(forest_b64)) {
+    blocks[[length(blocks) + 1]] <- make_image_block(forest_b64, title = "Forest Plot", alt = "Forest plot of meta-analysis")
+  }
+  if (!is.null(funnel_b64)) {
+    blocks[[length(blocks) + 1]] <- make_image_block(funnel_b64, title = "Funnel Plot", alt = "Funnel plot for publication bias assessment")
+  }
+
+  heterogeneity_text <- if (i2_val * 100 > 75) {
+    glue("Substantial heterogeneity observed (I\u00b2 = {round(i2_val*100,1)}%). Consider subgroup analysis or meta-regression.")
+  } else if (i2_val * 100 > 50) {
+    glue("Moderate heterogeneity (I\u00b2 = {round(i2_val*100,1)}%). Results should be interpreted with caution.")
+  } else {
+    glue("Low heterogeneity (I\u00b2 = {round(i2_val*100,1)}%). Studies appear reasonably homogeneous.")
+  }
+  blocks[[length(blocks) + 1]] <- make_markdown_block(heterogeneity_text, title = "Heterogeneity Assessment")
+
+  # ===== 可复现代码 =====
+  reproducible_code <- glue('
+# Auto-generated by AI Clinical Research Platform
+# Tool: Meta-Analysis Engine (ST_META_ANALYSIS)
+# Time: {Sys.time()}
+# ================================
+
+library(meta)
+
+# Your data:
+# df <- read.csv("meta_data.csv")
+
+# Run meta-analysis:
+# ma <- metagen(TE = log(df$hr), seTE = ..., studlab = df$study_id, sm = "{sm_label}")
+# forest(ma)
+# funnel(ma)
+')
+
+  plots_list <- list()
+  if (!is.null(forest_b64)) plots_list[[length(plots_list) + 1]] <- forest_b64
+  if (!is.null(funnel_b64)) plots_list[[length(plots_list) + 1]] <- funnel_b64
+
+  log_add("Analysis complete")
+
+  return(list(
+    status = "success",
+    message = glue("Meta-analysis completed: {k_studies} studies, model={model_type}"),
+    warnings = if (length(warnings_list) > 0) warnings_list else NULL,
+    results = results_list,
+    report_blocks = blocks,
+    plots = plots_list,
+    trace_log = logs,
+    reproducible_code = as.character(reproducible_code)
+  ))
+}
+
+# ===== Sub-functions =====
+
+`%||%` <- function(a, b) if (is.null(a)) b else a
+
+format_p_value <- function(p) {
+  if (is.na(p)) return("NA")
+  if (p < 0.001) return("p < .001")
+  return(paste0("p = ", formatC(p, format = "f", digits = 3)))
+}
+
+run_hr_meta <- function(df, use_random, use_fixed, log_add) {
+  required <- c("hr", "lower_ci", "upper_ci")
+  check_columns(df, required)
+
+  studlab <- if ("study_id" %in% names(df)) as.character(df$study_id) else paste0("Study ", seq_len(nrow(df)))
+
+  te <- log(df$hr)
+  se <- (log(df$upper_ci) - log(df$lower_ci)) / (2 * qnorm(0.975))
+
+  log_add(glue("Running metagen() with {length(te)} studies, sm=HR"))
+
+  metagen(
+    TE = te, seTE = se, studlab = studlab,
+    sm = "HR", method.tau = "DL",
+    random = use_random, fixed = use_fixed
+  )
+}
+
+run_dichotomous_meta <- function(df, sm, use_random, use_fixed, log_add) {
+  required <- c("events_e", "total_e", "events_c", "total_c")
+  check_columns(df, required)
+
+  studlab <- if ("study_id" %in% names(df)) as.character(df$study_id) else paste0("Study ", seq_len(nrow(df)))
+
+  log_add(glue("Running metabin() with {nrow(df)} studies, sm={sm}"))
+
+  metabin(
+    event.e = df$events_e, n.e = df$total_e,
+    event.c = df$events_c, n.c = df$total_c,
+    studlab = studlab, sm = sm, method.tau = "DL",
+    random = use_random, fixed = use_fixed
+  )
+}
+
+run_continuous_meta <- function(df, use_random, use_fixed, log_add) {
+  required <- c("mean_e", "sd_e", "n_e", "mean_c", "sd_c", "n_c")
+  check_columns(df, required)
+
+  studlab <- if ("study_id" %in% names(df)) as.character(df$study_id) else paste0("Study ", seq_len(nrow(df)))
+
+  log_add(glue("Running metacont() with {nrow(df)} studies, sm=MD"))
+
+  metacont(
+    n.e = df$n_e, mean.e = df$mean_e, sd.e = df$sd_e,
+    n.c = df$n_c, mean.c = df$mean_c, sd.c = df$sd_c,
+    studlab = studlab, sm = "MD", method.tau = "DL",
+    random = use_random, fixed = use_fixed
+  )
+}
+
+check_columns <- function(df, required) {
+  missing <- required[!(required %in% names(df))]
+  if (length(missing) > 0) {
+    stop(glue("Missing required columns: {paste(missing, collapse=', ')}"))
+  }
+}
+
+generate_forest_plot <- function(ma) {
+  tryCatch({
+    plot_h <- max(400, 120 + ma$k * 35)
+    tmp <- tempfile(fileext = ".png")
+    png(tmp, width = 900, height = plot_h, res = 100)
+    meta::forest(ma, sortvar = ma$TE, print.tau2 = TRUE, print.I2 = TRUE,
+                 col.diamond = "steelblue", col.square = "royalblue")
+    dev.off()
+    b64 <- base64encode(tmp)
+    unlink(tmp)
+    paste0("data:image/png;base64,", b64)
+  }, error = function(e) {
+    message(glue("[META] Forest plot error: {e$message}"))
+    NULL
+  })
+}
+
+generate_funnel_plot <- function(ma) {
+  tryCatch({
+    tmp <- tempfile(fileext = ".png")
+    png(tmp, width = 600, height = 500, res = 100)
+    meta::funnel(ma, studlab = TRUE, cex.studlab = 0.8)
+    dev.off()
+    b64 <- base64encode(tmp)
+    unlink(tmp)
+    paste0("data:image/png;base64,", b64)
+  }, error = function(e) {
+    message(glue("[META] Funnel plot error: {e$message}"))
+    NULL
+  })
+}