fix(ssa): Fix 7 integration bugs and refactor frontend unified state management
Bug fixes: - Fix garbled error messages in chat (TypeWriter rendering issue) - Fix R engine NA crash in descriptive.R (defensive isTRUE/is.na checks) - Fix intent misclassification for statistical significance queries - Fix step 2 results not displayed (accept warning status alongside success) - Fix incomplete R code download (only step 1 included) - Fix multi-task state confusion (clicking old card shows new results) - Add R engine and backend parameter logging for debugging Refactor - Unified Record Architecture: - Replace 12 global singleton fields with AnalysisRecord as single source of truth - Remove isWorkflowMode branching across all components - One Analysis = One Record = N Steps paradigm - selectRecord only sets currentRecordId, all rendering derives from currentRecord - Fix cross-hook-instance issue: executeWorkflow fallback to store currentRecordId Updated files: ssaStore, useWorkflow, useAnalysis, SSAChatPane, SSAWorkspacePane, SSACodeModal, WorkflowTimeline, QueryService, WorkflowExecutorService, descriptive.R Tested: Manual integration test passed - multi-task switching, R code completeness Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -31,30 +31,43 @@ run_analysis <- function(input) {
|
||||
log_add(glue("数据加载成功: {nrow(df)} 行, {ncol(df)} 列"))
|
||||
|
||||
p <- input$params
|
||||
variables <- p$variables # 变量列表(可选,空则分析全部)
|
||||
group_var <- p$group_var # 分组变量(可选)
|
||||
|
||||
variables <- p$variables
|
||||
group_var <- p$group_var
|
||||
|
||||
# Normalize group_var: ensure it's NULL or a valid non-empty string (never NA)
|
||||
if (is.null(group_var) || length(group_var) == 0 || isTRUE(is.na(group_var)) || !nzchar(trimws(as.character(group_var[1])))) {
|
||||
group_var <- NULL
|
||||
} else {
|
||||
group_var <- as.character(group_var[1])
|
||||
}
|
||||
|
||||
log_add(glue("=== 输入参数 === variables: [{paste(variables, collapse=', ')}], group_var: {ifelse(is.null(group_var), 'NULL', group_var)}"))
|
||||
log_add(glue("=== 数据列 === [{paste(names(df), collapse=', ')}]"))
|
||||
|
||||
# ===== 确定要分析的变量 =====
|
||||
if (is.null(variables) || length(variables) == 0) {
|
||||
variables <- names(df)
|
||||
log_add("未指定变量,分析全部列")
|
||||
}
|
||||
|
||||
variables <- as.character(variables)
|
||||
|
||||
# 排除分组变量本身
|
||||
if (!is.null(group_var) && group_var %in% variables) {
|
||||
variables <- setdiff(variables, group_var)
|
||||
}
|
||||
|
||||
|
||||
# 校验变量存在性
|
||||
missing_vars <- setdiff(variables, names(df))
|
||||
if (length(missing_vars) > 0) {
|
||||
log_add(glue("缺失变量: [{paste(missing_vars, collapse=', ')}]"))
|
||||
return(make_error(ERROR_CODES$E001_COLUMN_NOT_FOUND,
|
||||
col = paste(missing_vars, collapse = ", ")))
|
||||
}
|
||||
|
||||
log_add(glue("最终分析变量 ({length(variables)}): [{paste(variables, collapse=', ')}]"))
|
||||
|
||||
# 校验分组变量
|
||||
groups <- NULL
|
||||
if (!is.null(group_var) && group_var != "") {
|
||||
if (!is.null(group_var)) {
|
||||
if (!(group_var %in% names(df))) {
|
||||
return(make_error(ERROR_CODES$E001_COLUMN_NOT_FOUND, col = group_var))
|
||||
}
|
||||
@@ -63,25 +76,32 @@ run_analysis <- function(input) {
|
||||
}
|
||||
|
||||
# ===== 变量类型推断 =====
|
||||
var_types <- sapply(variables, function(v) {
|
||||
vals <- df[[v]]
|
||||
if (is.numeric(vals)) {
|
||||
non_na_count <- sum(!is.na(vals))
|
||||
if (non_na_count == 0) {
|
||||
return("categorical") # 全是 NA,当作分类变量
|
||||
}
|
||||
unique_count <- length(unique(vals[!is.na(vals)]))
|
||||
unique_ratio <- unique_count / non_na_count
|
||||
if (unique_ratio < 0.05 && unique_count <= 10) {
|
||||
var_types <- tryCatch({
|
||||
result <- sapply(variables, function(v) {
|
||||
vals <- df[[v]]
|
||||
if (is.null(vals)) return("categorical")
|
||||
if (isTRUE(is.numeric(vals))) {
|
||||
non_na_count <- sum(!is.na(vals))
|
||||
if (non_na_count == 0) return("categorical")
|
||||
unique_count <- length(unique(vals[!is.na(vals)]))
|
||||
unique_ratio <- unique_count / non_na_count
|
||||
if (isTRUE(unique_ratio < 0.05) && isTRUE(unique_count <= 10)) {
|
||||
return("categorical")
|
||||
}
|
||||
return("numeric")
|
||||
} else {
|
||||
return("categorical")
|
||||
}
|
||||
return("numeric")
|
||||
} else {
|
||||
return("categorical")
|
||||
}
|
||||
})
|
||||
if (is.null(names(result))) names(result) <- variables
|
||||
result
|
||||
}, error = function(e) {
|
||||
log_add(paste("变量类型推断失败:", e$message))
|
||||
setNames(rep("categorical", length(variables)), variables)
|
||||
})
|
||||
|
||||
log_add(glue("数值变量: {sum(var_types == 'numeric')}, 分类变量: {sum(var_types == 'categorical')}"))
|
||||
|
||||
log_add(glue("数值变量: {sum(var_types == 'numeric', na.rm=TRUE)}, 分类变量: {sum(var_types == 'categorical', na.rm=TRUE)}"))
|
||||
log_add(glue("var_types 详情: {paste(names(var_types), '=', var_types, collapse=', ')}"))
|
||||
|
||||
# ===== 计算描述性统计 =====
|
||||
warnings_list <- c()
|
||||
@@ -106,7 +126,8 @@ run_analysis <- function(input) {
|
||||
# 有分组
|
||||
group_stats <- list()
|
||||
for (g in groups) {
|
||||
subset_vals <- df[df[[group_var]] == g, v, drop = TRUE]
|
||||
mask <- df[[group_var]] == g & !is.na(df[[group_var]])
|
||||
subset_vals <- df[mask, v, drop = TRUE]
|
||||
if (identical(var_type, "numeric")) {
|
||||
group_stats[[as.character(g)]] <- calc_numeric_stats(subset_vals, v)
|
||||
} else {
|
||||
@@ -145,7 +166,7 @@ run_analysis <- function(input) {
|
||||
|
||||
for (v in vars_to_plot) {
|
||||
plot_base64 <- tryCatch({
|
||||
if (var_types[v] == "numeric") {
|
||||
if (isTRUE(var_types[v] == "numeric")) {
|
||||
generate_histogram(df, v, group_var)
|
||||
} else {
|
||||
generate_bar_chart(df, v, group_var)
|
||||
@@ -167,6 +188,67 @@ run_analysis <- function(input) {
|
||||
"data.csv"
|
||||
}
|
||||
|
||||
# Build dynamic visualization code based on actual variables
|
||||
plot_code_section <- tryCatch({
|
||||
plot_code_lines <- c()
|
||||
for (v in vars_to_plot) {
|
||||
safe_v <- gsub('"', '\\\\"', v)
|
||||
vt <- if (is.null(var_types) || is.na(var_types[v])) "categorical" else as.character(var_types[v])
|
||||
safe_var_name <- gsub("[^a-zA-Z0-9]", "_", v)
|
||||
if (vt == "numeric") {
|
||||
if (!is.null(group_var) && group_var != "") {
|
||||
safe_g <- gsub('"', '\\\\"', group_var)
|
||||
plot_code_lines <- c(plot_code_lines, glue('
|
||||
# Histogram: {safe_v}
|
||||
p_{safe_var_name} <- ggplot(df[!is.na(df[["{safe_v}"]]), ], aes(x = .data[["{safe_v}"]], fill = factor(.data[["{safe_g}"]]))) +
|
||||
geom_histogram(alpha = 0.6, position = "identity", bins = 30) +
|
||||
scale_fill_brewer(palette = "Set1", name = "{safe_g}") +
|
||||
labs(title = "Distribution of {safe_v}", x = "{safe_v}", y = "Count") +
|
||||
theme_minimal()
|
||||
print(p_{safe_var_name})
|
||||
'))
|
||||
} else {
|
||||
plot_code_lines <- c(plot_code_lines, glue('
|
||||
# Histogram: {safe_v}
|
||||
p_{safe_var_name} <- ggplot(df[!is.na(df[["{safe_v}"]]), ], aes(x = .data[["{safe_v}"]])) +
|
||||
geom_histogram(fill = "#3b82f6", alpha = 0.7, bins = 30) +
|
||||
labs(title = "Distribution of {safe_v}", x = "{safe_v}", y = "Count") +
|
||||
theme_minimal()
|
||||
print(p_{safe_var_name})
|
||||
'))
|
||||
}
|
||||
} else {
|
||||
if (!is.null(group_var) && group_var != "") {
|
||||
safe_g <- gsub('"', '\\\\"', group_var)
|
||||
plot_code_lines <- c(plot_code_lines, glue('
|
||||
# Bar chart: {safe_v}
|
||||
p_{safe_var_name} <- ggplot(df[!is.na(df[["{safe_v}"]]), ], aes(x = factor(.data[["{safe_v}"]]), fill = factor(.data[["{safe_g}"]]))) +
|
||||
geom_bar(position = "dodge") +
|
||||
scale_fill_brewer(palette = "Set1", name = "{safe_g}") +
|
||||
labs(title = "Frequency of {safe_v}", x = "{safe_v}", y = "Count") +
|
||||
theme_minimal() +
|
||||
theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
||||
print(p_{safe_var_name})
|
||||
'))
|
||||
} else {
|
||||
plot_code_lines <- c(plot_code_lines, glue('
|
||||
# Bar chart: {safe_v}
|
||||
p_{safe_var_name} <- ggplot(df[!is.na(df[["{safe_v}"]]), ], aes(x = factor(.data[["{safe_v}"]]))) +
|
||||
geom_bar(fill = "#3b82f6", alpha = 0.7) +
|
||||
labs(title = "Frequency of {safe_v}", x = "{safe_v}", y = "Count") +
|
||||
theme_minimal() +
|
||||
theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
||||
print(p_{safe_var_name})
|
||||
'))
|
||||
}
|
||||
}
|
||||
}
|
||||
paste(plot_code_lines, collapse = "\n")
|
||||
}, error = function(e) {
|
||||
log_add(paste("reproducible_code visualization generation failed:", e$message))
|
||||
"# ggplot(df, aes(x = your_variable)) + geom_histogram()"
|
||||
})
|
||||
|
||||
reproducible_code <- glue('
|
||||
# SSA-Pro 自动生成代码
|
||||
# 工具: 描述性统计
|
||||
@@ -181,7 +263,7 @@ df <- read.csv("{original_filename}")
|
||||
# 数值变量描述性统计
|
||||
numeric_vars <- sapply(df, is.numeric)
|
||||
if (any(numeric_vars)) {{
|
||||
summary(df[, numeric_vars, drop = FALSE])
|
||||
print(summary(df[, numeric_vars, drop = FALSE]))
|
||||
}}
|
||||
|
||||
# 分类变量频数表
|
||||
@@ -193,8 +275,8 @@ if (any(categorical_vars)) {{
|
||||
}}
|
||||
}}
|
||||
|
||||
# 可视化示例
|
||||
# ggplot(df, aes(x = your_variable)) + geom_histogram()
|
||||
# ======== 可视化 ========
|
||||
{plot_code_section}
|
||||
')
|
||||
|
||||
# ===== 返回结果 =====
|
||||
|
||||
Reference in New Issue
Block a user