feat(ssa): Complete Phase 2A frontend integration - multi-step workflow end-to-end

Phase 2A: WorkflowPlannerService, WorkflowExecutorService, Python data quality, 6 bug fixes, DescriptiveResultView, multi-step R code/Word export, MVP UI reuse. V11 UI: Gemini-style, multi-task, single-page scroll, Word export. Architecture: Block-based rendering consensus (4 block types). New R tools: chi_square, correlation, descriptive, logistic_binary, mann_whitney, t_test_paired. Docs: dev summary, block-based plan, status updates, task list v2.0.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
2026-02-20 23:09:27 +08:00
parent 23b422f758
commit 428a22adf2
62 changed files with 15416 additions and 299 deletions

View File

@@ -29,23 +29,59 @@ load_input_data <- function(input) {
# 调试:打印原始数据结构
message(glue("[DataLoader] 原始数据类型: {class(raw_data)}"))
message(glue("[DataLoader] 原始数据字段: {paste(names(raw_data), collapse=', ')}"))
message(glue("[DataLoader] 原始数据长度: {length(raw_data)}"))
# 安全转换:处理不同的 JSON 解析结果
if (is.data.frame(raw_data)) {
# 已经是 data.frame
df <- raw_data
} else if (is.list(raw_data)) {
# JSON 对象 {"col1": [...], "col2": [...]} -> data.frame
# JSON 数组可能被解析为 list 而非 vector需要先 unlist
df <- data.frame(
lapply(raw_data, function(x) {
if (is.list(x)) unlist(x) else x
}),
stringsAsFactors = FALSE
)
message("[DataLoader] 数据已是 data.frame")
} else if (is.list(raw_data) && length(raw_data) > 0) {
# 检查是行格式还是列格式
first_elem <- raw_data[[1]]
if (is.list(first_elem) && !is.null(names(first_elem))) {
# 行格式: [{"col1": val1, "col2": val2}, {...}, ...]
# 每个元素是一行数据
message("[DataLoader] 检测到行格式数据 (JSON array of objects)")
# 使用 jsonlite 的 bind_rows 功能
df <- tryCatch({
# 方法1使用 do.call + rbind.data.frame
df_list <- lapply(raw_data, function(row) {
# 将每一行转为 data.frame
as.data.frame(lapply(row, function(val) {
if (is.null(val)) NA else val
}), stringsAsFactors = FALSE)
})
do.call(rbind, df_list)
}, error = function(e) {
# 方法2如果上面失败尝试 jsonlite 转换
message(glue("[DataLoader] rbind 失败,尝试 jsonlite 转换: {e$message}"))
jsonlite::fromJSON(jsonlite::toJSON(raw_data), flatten = TRUE)
})
} else if (!is.null(names(raw_data))) {
# 列格式: {"col1": [...], "col2": [...]}
message("[DataLoader] 检测到列格式数据 (JSON object with arrays)")
df <- data.frame(
lapply(raw_data, function(x) {
if (is.list(x)) unlist(x) else x
}),
stringsAsFactors = FALSE
)
} else {
# 未知格式
message(glue("[DataLoader] 未知数据格式first_elem class: {class(first_elem)}"))
stop(make_error(ERROR_CODES$E100_INTERNAL_ERROR,
details = "无法识别的数据格式"))
}
} else {
stop(make_error(ERROR_CODES$E100_INTERNAL_ERROR,
details = paste("无法解析的数据类型:", class(raw_data))))
details = paste("无法解析的数据类型:", class(raw_data), "或数据为空")))
}
message(glue("[DataLoader] 转换后: {nrow(df)} 行, {ncol(df)} 列, 列名: {paste(names(df), collapse=', ')}"))

View File

@@ -114,3 +114,129 @@ run_guardrail_chain <- function(guardrail_results) {
warnings = warnings
))
}
# ========== JIT 护栏接口Phase 2A ==========
# 用于 WorkflowExecutor 在执行核心工具前调用
#' JIT 护栏检查:执行核心统计前检验假设
#' @param df 数据框
#' @param tool_code 目标工具代码
#' @param params 工具参数group_var, value_var 等)
#' @return list(checks, suggested_tool, can_proceed)
run_jit_guardrails <- function(df, tool_code, params) {
checks <- list()
suggested_tool <- tool_code
can_proceed <- TRUE
# 根据工具类型执行不同的检验
if (tool_code %in% c("ST_T_TEST_IND", "ST_MANN_WHITNEY")) {
# 独立样本比较:需要正态性 + 方差齐性检验
group_var <- params$group_var
value_var <- params$value_var
if (!is.null(group_var) && !is.null(value_var)) {
groups <- unique(df[[group_var]])
# 正态性检验(分组)
for (g in groups) {
vals <- df[df[[group_var]] == g, value_var]
if (length(vals) >= 3) {
norm_result <- check_normality(vals, alpha = 0.05)
checks <- c(checks, list(list(
check_name = glue("正态性检验 (组: {g})"),
passed = norm_result$passed,
p_value = norm_result$p_value,
recommendation = if (norm_result$passed) "满足正态性" else "建议使用非参数方法"
)))
if (!norm_result$passed && tool_code == "ST_T_TEST_IND") {
suggested_tool <- "ST_MANN_WHITNEY"
}
}
}
# 方差齐性检验
if (length(groups) == 2) {
tryCatch({
homo_result <- check_homogeneity(df, group_var, value_var, alpha = 0.05)
checks <- c(checks, list(list(
check_name = "方差齐性检验 (Levene)",
passed = homo_result$passed,
p_value = homo_result$p_value,
recommendation = if (homo_result$passed) "方差齐性满足" else "建议使用 Welch 校正"
)))
}, error = function(e) {
message("方差齐性检验失败: ", e$message)
})
}
}
} else if (tool_code == "ST_T_TEST_PAIRED") {
# 配对检验:需要差值正态性检验
before_var <- params$before_var
after_var <- params$after_var
if (!is.null(before_var) && !is.null(after_var)) {
diff_vals <- df[[after_var]] - df[[before_var]]
diff_vals <- diff_vals[!is.na(diff_vals)]
if (length(diff_vals) >= 3) {
norm_result <- check_normality(diff_vals, alpha = 0.05)
checks <- c(checks, list(list(
check_name = "差值正态性检验",
passed = norm_result$passed,
p_value = norm_result$p_value,
recommendation = if (norm_result$passed) "差值满足正态性" else "建议使用 Wilcoxon 符号秩检验"
)))
if (!norm_result$passed) {
suggested_tool <- "Wilcoxon signed-rank test"
}
}
}
} else if (tool_code == "ST_CORRELATION") {
# 相关分析:需要双变量正态性检验
var_x <- params$var_x
var_y <- params$var_y
if (!is.null(var_x) && !is.null(var_y)) {
x_vals <- df[[var_x]][!is.na(df[[var_x]])]
y_vals <- df[[var_y]][!is.na(df[[var_y]])]
if (length(x_vals) >= 3) {
norm_x <- check_normality(x_vals, alpha = 0.05)
checks <- c(checks, list(list(
check_name = glue("正态性检验 ({var_x})"),
passed = norm_x$passed,
p_value = norm_x$p_value,
recommendation = if (norm_x$passed) "满足正态性" else "建议使用 Spearman 秩相关"
)))
}
if (length(y_vals) >= 3) {
norm_y <- check_normality(y_vals, alpha = 0.05)
checks <- c(checks, list(list(
check_name = glue("正态性检验 ({var_y})"),
passed = norm_y$passed,
p_value = norm_y$p_value,
recommendation = if (norm_y$passed) "满足正态性" else "建议使用 Spearman 秩相关"
)))
if (!norm_x$passed || !norm_y$passed) {
suggested_tool <- "ST_CORRELATION (Spearman)"
}
}
}
}
# 汇总
all_passed <- all(sapply(checks, function(c) c$passed))
return(list(
checks = checks,
suggested_tool = suggested_tool,
can_proceed = TRUE, # 即使检验不通过也允许继续,由用户/LLM 决定
all_checks_passed = all_passed
))
}