fix(ssa): Fix 7 integration bugs and refactor frontend unified state management

Bug fixes: - Fix garbled error messages in chat (TypeWriter rendering issue) - Fix R engine NA crash in descriptive.R (defensive isTRUE/is.na checks) - Fix intent misclassification for statistical significance queries - Fix step 2 results not displayed (accept warning status alongside success) - Fix incomplete R code download (only step 1 included) - Fix multi-task state confusion (clicking old card shows new results) - Add R engine and backend parameter logging for debugging Refactor - Unified Record Architecture: - Replace 12 global singleton fields with AnalysisRecord as single source of truth - Remove isWorkflowMode branching across all components - One Analysis = One Record = N Steps paradigm - selectRecord only sets currentRecordId, all rendering derives from currentRecord - Fix cross-hook-instance issue: executeWorkflow fallback to store currentRecordId Updated files: ssaStore, useWorkflow, useAnalysis, SSAChatPane, SSAWorkspacePane, SSACodeModal, WorkflowTimeline, QueryService, WorkflowExecutorService, descriptive.R Tested: Manual integration test passed - multi-task switching, R code completeness Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-21 22:58:59 +08:00
parent 371e1c069c
commit 11676f2840
17 changed files with 1573 additions and 1829 deletions
--- a/r-statistics-service/tools/descriptive.R
+++ b/r-statistics-service/tools/descriptive.R
@@ -31,30 +31,43 @@ run_analysis <- function(input) {
  log_add(glue("数据加载成功: {nrow(df)} 行, {ncol(df)} 列"))
  
  p <- input$params
-  variables <- p$variables  # 变量列表（可选，空则分析全部）
-  group_var <- p$group_var  # 分组变量（可选）
-  
+  variables <- p$variables
+  group_var <- p$group_var
+
+  # Normalize group_var: ensure it's NULL or a valid non-empty string (never NA)
+  if (is.null(group_var) || length(group_var) == 0 || isTRUE(is.na(group_var)) || !nzchar(trimws(as.character(group_var[1])))) {
+    group_var <- NULL
+  } else {
+    group_var <- as.character(group_var[1])
+  }
+
+  log_add(glue("=== 输入参数 === variables: [{paste(variables, collapse=', ')}], group_var: {ifelse(is.null(group_var), 'NULL', group_var)}"))
+  log_add(glue("=== 数据列 === [{paste(names(df), collapse=', ')}]"))
+
  # ===== 确定要分析的变量 =====
  if (is.null(variables) || length(variables) == 0) {
    variables <- names(df)
    log_add("未指定变量，分析全部列")
  }
-  
+  variables <- as.character(variables)
+
  # 排除分组变量本身
  if (!is.null(group_var) && group_var %in% variables) {
    variables <- setdiff(variables, group_var)
  }
-  
+
  # 校验变量存在性
  missing_vars <- setdiff(variables, names(df))
  if (length(missing_vars) > 0) {
+    log_add(glue("缺失变量: [{paste(missing_vars, collapse=', ')}]"))
    return(make_error(ERROR_CODES$E001_COLUMN_NOT_FOUND, 
                      col = paste(missing_vars, collapse = ", ")))
  }
-  
+  log_add(glue("最终分析变量 ({length(variables)}): [{paste(variables, collapse=', ')}]"))
+
  # 校验分组变量
  groups <- NULL
-  if (!is.null(group_var) && group_var != "") {
+  if (!is.null(group_var)) {
    if (!(group_var %in% names(df))) {
      return(make_error(ERROR_CODES$E001_COLUMN_NOT_FOUND, col = group_var))
    }
@@ -63,25 +76,32 @@ run_analysis <- function(input) {
  }
  
  # ===== 变量类型推断 =====
-  var_types <- sapply(variables, function(v) {
-    vals <- df[[v]]
-    if (is.numeric(vals)) {
-      non_na_count <- sum(!is.na(vals))
-      if (non_na_count == 0) {
-        return("categorical")  # 全是 NA，当作分类变量
-      }
-      unique_count <- length(unique(vals[!is.na(vals)]))
-      unique_ratio <- unique_count / non_na_count
-      if (unique_ratio < 0.05 && unique_count <= 10) {
+  var_types <- tryCatch({
+    result <- sapply(variables, function(v) {
+      vals <- df[[v]]
+      if (is.null(vals)) return("categorical")
+      if (isTRUE(is.numeric(vals))) {
+        non_na_count <- sum(!is.na(vals))
+        if (non_na_count == 0) return("categorical")
+        unique_count <- length(unique(vals[!is.na(vals)]))
+        unique_ratio <- unique_count / non_na_count
+        if (isTRUE(unique_ratio < 0.05) && isTRUE(unique_count <= 10)) {
+          return("categorical")
+        }
+        return("numeric")
+      } else {
        return("categorical")
      }
-      return("numeric")
-    } else {
-      return("categorical")
-    }
+    })
+    if (is.null(names(result))) names(result) <- variables
+    result
+  }, error = function(e) {
+    log_add(paste("变量类型推断失败:", e$message))
+    setNames(rep("categorical", length(variables)), variables)
  })
-  
-  log_add(glue("数值变量: {sum(var_types == 'numeric')}, 分类变量: {sum(var_types == 'categorical')}"))
+
+  log_add(glue("数值变量: {sum(var_types == 'numeric', na.rm=TRUE)}, 分类变量: {sum(var_types == 'categorical', na.rm=TRUE)}"))
+  log_add(glue("var_types 详情: {paste(names(var_types), '=', var_types, collapse=', ')}"))
  
  # ===== 计算描述性统计 =====
  warnings_list <- c()
@@ -106,7 +126,8 @@ run_analysis <- function(input) {
      # 有分组
      group_stats <- list()
      for (g in groups) {
-        subset_vals <- df[df[[group_var]] == g, v, drop = TRUE]
+        mask <- df[[group_var]] == g & !is.na(df[[group_var]])
+        subset_vals <- df[mask, v, drop = TRUE]
        if (identical(var_type, "numeric")) {
          group_stats[[as.character(g)]] <- calc_numeric_stats(subset_vals, v)
        } else {
@@ -145,7 +166,7 @@ run_analysis <- function(input) {
  
  for (v in vars_to_plot) {
    plot_base64 <- tryCatch({
-      if (var_types[v] == "numeric") {
+      if (isTRUE(var_types[v] == "numeric")) {
        generate_histogram(df, v, group_var)
      } else {
        generate_bar_chart(df, v, group_var)
@@ -167,6 +188,67 @@ run_analysis <- function(input) {
    "data.csv"
  }
  
+  # Build dynamic visualization code based on actual variables
+  plot_code_section <- tryCatch({
+    plot_code_lines <- c()
+    for (v in vars_to_plot) {
+      safe_v <- gsub('"', '\\\\"', v)
+      vt <- if (is.null(var_types) || is.na(var_types[v])) "categorical" else as.character(var_types[v])
+      safe_var_name <- gsub("[^a-zA-Z0-9]", "_", v)
+      if (vt == "numeric") {
+        if (!is.null(group_var) && group_var != "") {
+          safe_g <- gsub('"', '\\\\"', group_var)
+          plot_code_lines <- c(plot_code_lines, glue('
+# Histogram: {safe_v}
+p_{safe_var_name} <- ggplot(df[!is.na(df[["{safe_v}"]]), ], aes(x = .data[["{safe_v}"]], fill = factor(.data[["{safe_g}"]]))) +
+  geom_histogram(alpha = 0.6, position = "identity", bins = 30) +
+  scale_fill_brewer(palette = "Set1", name = "{safe_g}") +
+  labs(title = "Distribution of {safe_v}", x = "{safe_v}", y = "Count") +
+  theme_minimal()
+print(p_{safe_var_name})
+'))
+        } else {
+          plot_code_lines <- c(plot_code_lines, glue('
+# Histogram: {safe_v}
+p_{safe_var_name} <- ggplot(df[!is.na(df[["{safe_v}"]]), ], aes(x = .data[["{safe_v}"]])) +
+  geom_histogram(fill = "#3b82f6", alpha = 0.7, bins = 30) +
+  labs(title = "Distribution of {safe_v}", x = "{safe_v}", y = "Count") +
+  theme_minimal()
+print(p_{safe_var_name})
+'))
+        }
+      } else {
+        if (!is.null(group_var) && group_var != "") {
+          safe_g <- gsub('"', '\\\\"', group_var)
+          plot_code_lines <- c(plot_code_lines, glue('
+# Bar chart: {safe_v}
+p_{safe_var_name} <- ggplot(df[!is.na(df[["{safe_v}"]]), ], aes(x = factor(.data[["{safe_v}"]]), fill = factor(.data[["{safe_g}"]]))) +
+  geom_bar(position = "dodge") +
+  scale_fill_brewer(palette = "Set1", name = "{safe_g}") +
+  labs(title = "Frequency of {safe_v}", x = "{safe_v}", y = "Count") +
+  theme_minimal() +
+  theme(axis.text.x = element_text(angle = 45, hjust = 1))
+print(p_{safe_var_name})
+'))
+        } else {
+          plot_code_lines <- c(plot_code_lines, glue('
+# Bar chart: {safe_v}
+p_{safe_var_name} <- ggplot(df[!is.na(df[["{safe_v}"]]), ], aes(x = factor(.data[["{safe_v}"]]))) +
+  geom_bar(fill = "#3b82f6", alpha = 0.7) +
+  labs(title = "Frequency of {safe_v}", x = "{safe_v}", y = "Count") +
+  theme_minimal() +
+  theme(axis.text.x = element_text(angle = 45, hjust = 1))
+print(p_{safe_var_name})
+'))
+        }
+      }
+    }
+    paste(plot_code_lines, collapse = "\n")
+  }, error = function(e) {
+    log_add(paste("reproducible_code visualization generation failed:", e$message))
+    "# ggplot(df, aes(x = your_variable)) + geom_histogram()"
+  })
+
  reproducible_code <- glue('
 # SSA-Pro 自动生成代码
 # 工具: 描述性统计
@@ -181,7 +263,7 @@ df <- read.csv("{original_filename}")
 # 数值变量描述性统计
 numeric_vars <- sapply(df, is.numeric)
 if (any(numeric_vars)) {{
-  summary(df[, numeric_vars, drop = FALSE])
+  print(summary(df[, numeric_vars, drop = FALSE]))
 }}

 # 分类变量频数表
@@ -193,8 +275,8 @@ if (any(categorical_vars)) {{
  }}
 }}

-# 可视化示例
-# ggplot(df, aes(x = your_variable)) + geom_histogram()
+# ======== 可视化 ========
+{plot_code_section}
 ')
  
  # ===== 返回结果 =====