AIclinicalresearch/r-statistics-service/utils/error_codes.R

# utils/error_codes.R
# 结构化错误码，便于 LLM 自愈

ERROR_CODES <- list(
  # 业务错误（可被 Planner 修复）
  E001_COLUMN_NOT_FOUND = list(
    code = "E001",
    type = "business",
    message_template = "列名 '{col}' 在数据中不存在",
    user_hint = "请检查变量名是否拼写正确"
  ),
  E002_TYPE_MISMATCH = list(
    code = "E002",
    type = "business",
    message_template = "列 '{col}' 类型应为 {expected}，实际为 {actual}",
    user_hint = "该列包含非数值数据，请检查数据格式"
  ),
  E003_INSUFFICIENT_GROUPS = list(
    code = "E003",
    type = "business",
    message_template = "分组变量 '{col}' 应有 {expected} 个水平，实际有 {actual} 个",
    user_hint = "分组变量的取值个数不符合要求"
  ),
  E004_SAMPLE_TOO_SMALL = list(
    code = "E004",
    type = "business",
    message_template = "样本量 {n} 不足，至少需要 {min_required}",
    user_hint = "数据量太少，无法进行统计分析"
  ),

  # 统计计算错误（用户友好映射）
  E005_SINGULAR_MATRIX = list(
    code = "E005",
    type = "business",
    message_template = "矩阵计算异常: {details}",
    user_hint = "变量之间可能存在多重共线性，建议移除高度相关的变量"
  ),
  E006_CONVERGENCE_FAILED = list(
    code = "E006",
    type = "business",
    message_template = "模型未能收敛: {details}",
    user_hint = "算法未能找到稳定解，可能需要调整参数或检查数据"
  ),
  E007_VARIANCE_ZERO = list(
    code = "E007",
    type = "business",
    message_template = "变量 '{col}' 方差为零",
    user_hint = "该列的所有值都相同，无法进行比较"
  ),

  # 系统错误（需人工介入）
  E100_INTERNAL_ERROR = list(
    code = "E100",
    type = "system",
    message_template = "内部错误: {details}",
    user_hint = "系统繁忙，请稍后重试"
  ),
  E101_PACKAGE_MISSING = list(
    code = "E101",
    type = "system",
    message_template = "缺少依赖包: {package}",
    user_hint = "请联系管理员"
  )
)

# R 原始错误到错误码的映射字典
R_ERROR_MAPPING <- list(
  "system is computationally singular" = "E005_SINGULAR_MATRIX",
  "did not converge" = "E006_CONVERGENCE_FAILED",
  "constant" = "E007_VARIANCE_ZERO",
  "object '.*' not found" = "E001_COLUMN_NOT_FOUND",
  "undefined columns" = "E001_COLUMN_NOT_FOUND",
  "subscript out of bounds" = "E100_INTERNAL_ERROR",
  "cannot coerce" = "E002_TYPE_MISMATCH",
  "non-numeric argument" = "E002_TYPE_MISMATCH",
  "not meaningful for factors" = "E002_TYPE_MISMATCH",
  "missing value where TRUE/FALSE needed" = "E100_INTERNAL_ERROR",
  "replacement has" = "E100_INTERNAL_ERROR",
  "could not find function" = "E101_PACKAGE_MISSING",
  "there is no package called" = "E101_PACKAGE_MISSING",
  "cannot open the connection" = "E100_INTERNAL_ERROR",
  "singular gradient" = "E005_SINGULAR_MATRIX",
  "rank deficien" = "E005_SINGULAR_MATRIX",
  "contrasts can be applied only to factors" = "E002_TYPE_MISMATCH",
  "need at least 2 observations" = "E004_SAMPLE_TOO_SMALL",
  "not enough observations" = "E004_SAMPLE_TOO_SMALL",
  "sample size must be" = "E004_SAMPLE_TOO_SMALL",
  "groups with fewer than" = "E004_SAMPLE_TOO_SMALL"
)

# 构造错误响应（含用户友好提示）
make_error <- function(error_def, ...) {
  params <- list(...)
  msg <- error_def$message_template
  for (name in names(params)) {
    msg <- gsub(paste0("\\{", name, "\\}"), as.character(params[[name]]), msg)
  }
  return(list(
    status = "error",
    error_code = error_def$code,
    error_type = error_def$type,
    message = msg,
    user_hint = error_def$user_hint
  ))
}

#' Agent 通道专用：结构化错误格式
#' 提取行号、错误分类、修复建议，方便 LLM 理解并修复
format_agent_error <- function(e, code, warnings = list(), messages = character(0)) {
  raw_msg <- conditionMessage(e)

  error_line <- NULL
  line_match <- regmatches(raw_msg, regexpr("(line \\d+|at \\d+:\\d+|:( *)\\d+)", raw_msg))
  if (length(line_match) > 0 && nchar(line_match[1]) > 0) {
    nums <- regmatches(line_match[1], gregexpr("\\d+", line_match[1]))[[1]]
    error_line <- as.integer(nums[1])
  }

  error_context <- NULL
  if (!is.null(error_line) && !is.null(code)) {
    code_lines <- strsplit(code, "\n")[[1]]
    start <- max(1, error_line - 3)
    end <- min(length(code_lines), error_line + 3)
    context_lines <- paste0(
      ifelse(start:end == error_line, ">>> ", "    "),
      sprintf("%3d| ", start:end),
      code_lines[start:end]
    )
    error_context <- paste(context_lines, collapse = "\n")
  }

  error_code <- "E_EXEC"
  error_type <- "runtime"
  user_hint <- raw_msg

  for (pattern in names(R_ERROR_MAPPING)) {
    if (grepl(pattern, raw_msg, ignore.case = TRUE)) {
      key <- R_ERROR_MAPPING[[pattern]]
      info <- ERROR_CODES[[key]]
      error_code <- info$code
      error_type <- info$type
      user_hint <- paste0(info$user_hint, " | ", raw_msg)
      break
    }
  }

  friendly_msg <- paste0(
    "[", error_code, "] ", raw_msg,
    if (!is.null(error_line)) paste0(" (约第 ", error_line, " 行)") else "",
    if (length(warnings) > 0) paste0("\n[Warnings] ", paste(utils::head(warnings, 5), collapse = "; ")) else ""
  )

  list(
    message = friendly_msg,
    raw_error = raw_msg,
    error_code = error_code,
    error_type = error_type,
    error_line = error_line,
    error_context = error_context,
    user_hint = user_hint,
    warnings = if (length(warnings) > 0) warnings else NULL
  )
}

# 尝试将 R 原始错误映射为友好错误码
map_r_error <- function(raw_error_msg) {
  for (pattern in names(R_ERROR_MAPPING)) {
    if (grepl(pattern, raw_error_msg, ignore.case = TRUE)) {
      error_key <- R_ERROR_MAPPING[[pattern]]
      return(make_error(ERROR_CODES[[error_key]], details = raw_error_msg))
    }
  }
  # 无法映射，返回通用内部错误
  return(make_error(ERROR_CODES$E100_INTERNAL_ERROR, details = raw_error_msg))
}