feat(ssa): Complete Phase 2A frontend integration - multi-step workflow end-to-end
Phase 2A: WorkflowPlannerService, WorkflowExecutorService, Python data quality, 6 bug fixes, DescriptiveResultView, multi-step R code/Word export, MVP UI reuse. V11 UI: Gemini-style, multi-task, single-page scroll, Word export. Architecture: Block-based rendering consensus (4 block types). New R tools: chi_square, correlation, descriptive, logistic_binary, mann_whitney, t_test_paired. Docs: dev summary, block-based plan, status updates, task list v2.0. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -29,23 +29,59 @@ load_input_data <- function(input) {
|
||||
|
||||
# 调试:打印原始数据结构
|
||||
message(glue("[DataLoader] 原始数据类型: {class(raw_data)}"))
|
||||
message(glue("[DataLoader] 原始数据字段: {paste(names(raw_data), collapse=', ')}"))
|
||||
message(glue("[DataLoader] 原始数据长度: {length(raw_data)}"))
|
||||
|
||||
# 安全转换:处理不同的 JSON 解析结果
|
||||
if (is.data.frame(raw_data)) {
|
||||
# 已经是 data.frame
|
||||
df <- raw_data
|
||||
} else if (is.list(raw_data)) {
|
||||
# JSON 对象 {"col1": [...], "col2": [...]} -> data.frame
|
||||
# JSON 数组可能被解析为 list 而非 vector,需要先 unlist
|
||||
df <- data.frame(
|
||||
lapply(raw_data, function(x) {
|
||||
if (is.list(x)) unlist(x) else x
|
||||
}),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
message("[DataLoader] 数据已是 data.frame")
|
||||
|
||||
} else if (is.list(raw_data) && length(raw_data) > 0) {
|
||||
# 检查是行格式还是列格式
|
||||
first_elem <- raw_data[[1]]
|
||||
|
||||
if (is.list(first_elem) && !is.null(names(first_elem))) {
|
||||
# 行格式: [{"col1": val1, "col2": val2}, {...}, ...]
|
||||
# 每个元素是一行数据
|
||||
message("[DataLoader] 检测到行格式数据 (JSON array of objects)")
|
||||
|
||||
# 使用 jsonlite 的 bind_rows 功能
|
||||
df <- tryCatch({
|
||||
# 方法1:使用 do.call + rbind.data.frame
|
||||
df_list <- lapply(raw_data, function(row) {
|
||||
# 将每一行转为 data.frame
|
||||
as.data.frame(lapply(row, function(val) {
|
||||
if (is.null(val)) NA else val
|
||||
}), stringsAsFactors = FALSE)
|
||||
})
|
||||
do.call(rbind, df_list)
|
||||
}, error = function(e) {
|
||||
# 方法2:如果上面失败,尝试 jsonlite 转换
|
||||
message(glue("[DataLoader] rbind 失败,尝试 jsonlite 转换: {e$message}"))
|
||||
jsonlite::fromJSON(jsonlite::toJSON(raw_data), flatten = TRUE)
|
||||
})
|
||||
|
||||
} else if (!is.null(names(raw_data))) {
|
||||
# 列格式: {"col1": [...], "col2": [...]}
|
||||
message("[DataLoader] 检测到列格式数据 (JSON object with arrays)")
|
||||
df <- data.frame(
|
||||
lapply(raw_data, function(x) {
|
||||
if (is.list(x)) unlist(x) else x
|
||||
}),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
|
||||
} else {
|
||||
# 未知格式
|
||||
message(glue("[DataLoader] 未知数据格式,first_elem class: {class(first_elem)}"))
|
||||
stop(make_error(ERROR_CODES$E100_INTERNAL_ERROR,
|
||||
details = "无法识别的数据格式"))
|
||||
}
|
||||
|
||||
} else {
|
||||
stop(make_error(ERROR_CODES$E100_INTERNAL_ERROR,
|
||||
details = paste("无法解析的数据类型:", class(raw_data))))
|
||||
details = paste("无法解析的数据类型:", class(raw_data), "或数据为空")))
|
||||
}
|
||||
|
||||
message(glue("[DataLoader] 转换后: {nrow(df)} 行, {ncol(df)} 列, 列名: {paste(names(df), collapse=', ')}"))
|
||||
|
||||
@@ -114,3 +114,129 @@ run_guardrail_chain <- function(guardrail_results) {
|
||||
warnings = warnings
|
||||
))
|
||||
}
|
||||
|
||||
# ========== JIT 护栏接口(Phase 2A) ==========
|
||||
# 用于 WorkflowExecutor 在执行核心工具前调用
|
||||
|
||||
#' JIT 护栏检查:执行核心统计前检验假设
|
||||
#' @param df 数据框
|
||||
#' @param tool_code 目标工具代码
|
||||
#' @param params 工具参数(group_var, value_var 等)
|
||||
#' @return list(checks, suggested_tool, can_proceed)
|
||||
run_jit_guardrails <- function(df, tool_code, params) {
|
||||
checks <- list()
|
||||
suggested_tool <- tool_code
|
||||
can_proceed <- TRUE
|
||||
|
||||
# 根据工具类型执行不同的检验
|
||||
if (tool_code %in% c("ST_T_TEST_IND", "ST_MANN_WHITNEY")) {
|
||||
# 独立样本比较:需要正态性 + 方差齐性检验
|
||||
group_var <- params$group_var
|
||||
value_var <- params$value_var
|
||||
|
||||
if (!is.null(group_var) && !is.null(value_var)) {
|
||||
groups <- unique(df[[group_var]])
|
||||
|
||||
# 正态性检验(分组)
|
||||
for (g in groups) {
|
||||
vals <- df[df[[group_var]] == g, value_var]
|
||||
if (length(vals) >= 3) {
|
||||
norm_result <- check_normality(vals, alpha = 0.05)
|
||||
checks <- c(checks, list(list(
|
||||
check_name = glue("正态性检验 (组: {g})"),
|
||||
passed = norm_result$passed,
|
||||
p_value = norm_result$p_value,
|
||||
recommendation = if (norm_result$passed) "满足正态性" else "建议使用非参数方法"
|
||||
)))
|
||||
|
||||
if (!norm_result$passed && tool_code == "ST_T_TEST_IND") {
|
||||
suggested_tool <- "ST_MANN_WHITNEY"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# 方差齐性检验
|
||||
if (length(groups) == 2) {
|
||||
tryCatch({
|
||||
homo_result <- check_homogeneity(df, group_var, value_var, alpha = 0.05)
|
||||
checks <- c(checks, list(list(
|
||||
check_name = "方差齐性检验 (Levene)",
|
||||
passed = homo_result$passed,
|
||||
p_value = homo_result$p_value,
|
||||
recommendation = if (homo_result$passed) "方差齐性满足" else "建议使用 Welch 校正"
|
||||
)))
|
||||
}, error = function(e) {
|
||||
message("方差齐性检验失败: ", e$message)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
} else if (tool_code == "ST_T_TEST_PAIRED") {
|
||||
# 配对检验:需要差值正态性检验
|
||||
before_var <- params$before_var
|
||||
after_var <- params$after_var
|
||||
|
||||
if (!is.null(before_var) && !is.null(after_var)) {
|
||||
diff_vals <- df[[after_var]] - df[[before_var]]
|
||||
diff_vals <- diff_vals[!is.na(diff_vals)]
|
||||
|
||||
if (length(diff_vals) >= 3) {
|
||||
norm_result <- check_normality(diff_vals, alpha = 0.05)
|
||||
checks <- c(checks, list(list(
|
||||
check_name = "差值正态性检验",
|
||||
passed = norm_result$passed,
|
||||
p_value = norm_result$p_value,
|
||||
recommendation = if (norm_result$passed) "差值满足正态性" else "建议使用 Wilcoxon 符号秩检验"
|
||||
)))
|
||||
|
||||
if (!norm_result$passed) {
|
||||
suggested_tool <- "Wilcoxon signed-rank test"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} else if (tool_code == "ST_CORRELATION") {
|
||||
# 相关分析:需要双变量正态性检验
|
||||
var_x <- params$var_x
|
||||
var_y <- params$var_y
|
||||
|
||||
if (!is.null(var_x) && !is.null(var_y)) {
|
||||
x_vals <- df[[var_x]][!is.na(df[[var_x]])]
|
||||
y_vals <- df[[var_y]][!is.na(df[[var_y]])]
|
||||
|
||||
if (length(x_vals) >= 3) {
|
||||
norm_x <- check_normality(x_vals, alpha = 0.05)
|
||||
checks <- c(checks, list(list(
|
||||
check_name = glue("正态性检验 ({var_x})"),
|
||||
passed = norm_x$passed,
|
||||
p_value = norm_x$p_value,
|
||||
recommendation = if (norm_x$passed) "满足正态性" else "建议使用 Spearman 秩相关"
|
||||
)))
|
||||
}
|
||||
|
||||
if (length(y_vals) >= 3) {
|
||||
norm_y <- check_normality(y_vals, alpha = 0.05)
|
||||
checks <- c(checks, list(list(
|
||||
check_name = glue("正态性检验 ({var_y})"),
|
||||
passed = norm_y$passed,
|
||||
p_value = norm_y$p_value,
|
||||
recommendation = if (norm_y$passed) "满足正态性" else "建议使用 Spearman 秩相关"
|
||||
)))
|
||||
|
||||
if (!norm_x$passed || !norm_y$passed) {
|
||||
suggested_tool <- "ST_CORRELATION (Spearman)"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# 汇总
|
||||
all_passed <- all(sapply(checks, function(c) c$passed))
|
||||
|
||||
return(list(
|
||||
checks = checks,
|
||||
suggested_tool = suggested_tool,
|
||||
can_proceed = TRUE, # 即使检验不通过也允许继续,由用户/LLM 决定
|
||||
all_checks_passed = all_passed
|
||||
))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user