feat(asl/extraction): Complete Tool 3 M1+M2 - skeleton pipeline and HITL workbench
M1 Skeleton Pipeline: - Scatter-dispatch + Aggregator polling pattern (PgBoss) - PKB ACL bridge (PkbBridgeService -> PkbExportService DTOs) - ExtractionSingleWorker with DeepSeek-V3 LLM extraction - PermanentExtractionError for non-retryable failures - Phantom Retry Guard (idempotent worker) - 3-step minimal frontend (Setup -> Progress -> Workbench) - 4 new DB tables (extraction_templates, project_templates, tasks, results) - 3 system templates seed (RCT, Cohort, QC) - M1 integration test suite M2 HITL Workbench: - MinerU VLM integration for high-fidelity table extraction - XML-isolated DynamicPromptBuilder with flat JSON output template - fuzzyQuoteMatch validator (3-tier confidence scoring) - SSE real-time logging via ExtractionEventBus - Schema-driven ExtractionDrawer (dynamic field rendering from template) - Excel wide-table export with flattenModuleData normalization - M2 integration test suite Critical Fixes (data normalization): - DynamicPromptBuilder: explicit flat key-value output format with example - ExtractionExcelExporter: handle both array and flat data formats - ExtractionDrawer: schema-driven rendering instead of hardcoded fields - ExtractionValidator: array-format quote verification support - SSE route: Fastify register encapsulation to bypass auth for EventSource - LLM JSON sanitizer: strip illegal control chars before JSON.parse Also includes: RVW stats verification spec, SSA expert config guide Tested: M1 pipeline test + M2 HITL test + manual frontend verification Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
76
r-statistics-service/tools/.Rhistory
Normal file
76
r-statistics-service/tools/.Rhistory
Normal file
@@ -0,0 +1,76 @@
|
||||
# ========================================
|
||||
# 步骤 1: 描述性统计
|
||||
# ========================================
|
||||
# SSA-Pro 自动生成代码
|
||||
# 工具: 描述性统计
|
||||
# 时间: 2026-02-25 07:58:34.356454
|
||||
# ================================
|
||||
library(ggplot2)
|
||||
# 数据准备
|
||||
df <- read.csv("E:/test.csv")
|
||||
# 数值变量描述性统计
|
||||
numeric_vars <- sapply(df, is.numeric)
|
||||
if (any(numeric_vars)) {
|
||||
print(summary(df[, numeric_vars, drop = FALSE]))
|
||||
}
|
||||
# 分类变量频数表
|
||||
categorical_vars <- !numeric_vars
|
||||
if (any(categorical_vars)) {
|
||||
for (v in names(df)[categorical_vars]) {
|
||||
cat("\n变量:", v, "\n")
|
||||
print(table(df[[v]], useNA = "ifany"))
|
||||
}
|
||||
}
|
||||
# ======== 可视化 ========
|
||||
# Bar chart: Yqol
|
||||
p_Yqol <- ggplot(df[!is.na(df[["Yqol"]]), ], aes(x = factor(.data[["Yqol"]]))) +
|
||||
geom_bar(fill = "#3b82f6", alpha = 0.7) +
|
||||
labs(title = "Frequency of Yqol", x = "Yqol", y = "Count") +
|
||||
theme_minimal() +
|
||||
theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
||||
print(p_Yqol)
|
||||
# Bar chart: sex
|
||||
p_sex <- ggplot(df[!is.na(df[["sex"]]), ], aes(x = factor(.data[["sex"]]))) +
|
||||
geom_bar(fill = "#3b82f6", alpha = 0.7) +
|
||||
labs(title = "Frequency of sex", x = "sex", y = "Count") +
|
||||
theme_minimal() +
|
||||
theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
||||
print(p_sex)
|
||||
# Bar chart: smoke
|
||||
p_smoke <- ggplot(df[!is.na(df[["smoke"]]), ], aes(x = factor(.data[["smoke"]]))) +
|
||||
geom_bar(fill = "#3b82f6", alpha = 0.7) +
|
||||
labs(title = "Frequency of smoke", x = "smoke", y = "Count") +
|
||||
theme_minimal() +
|
||||
theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
||||
print(p_smoke)
|
||||
# Histogram: age
|
||||
p_age <- ggplot(df[!is.na(df[["age"]]), ], aes(x = .data[["age"]])) +
|
||||
geom_histogram(fill = "#3b82f6", alpha = 0.7, bins = 30) +
|
||||
labs(title = "Distribution of age", x = "age", y = "Count") +
|
||||
theme_minimal()
|
||||
print(p_age)
|
||||
# ========================================
|
||||
# 步骤 2: 二元Logistic回归
|
||||
# ========================================
|
||||
# SSA-Pro 自动生成代码
|
||||
# 工具: 二元 Logistic 回归
|
||||
# 时间: 2026-02-25 07:58:34.813076
|
||||
# ================================
|
||||
# 数据准备
|
||||
df <- read.csv("E:/test.csv")
|
||||
# 模型拟合
|
||||
model <- glm(Yqol ~ sex + smoke + age + bmi + mouth_open + bucal_relax + toot_morph + root_number + root_curve + lenspace + denseratio + Pglevel + Pgverti + Winter + presyp + flap + operation + time + surgage + times, data = df, family = binomial(link = "logit"))
|
||||
summary(model)
|
||||
# OR 和 95% CI
|
||||
coef_summary <- summary(model)$coefficients
|
||||
OR <- exp(coef_summary[, "Estimate"])
|
||||
CI_lower <- exp(coef_summary[, "Estimate"] - 1.96 * coef_summary[, "Std. Error"])
|
||||
CI_upper <- exp(coef_summary[, "Estimate"] + 1.96 * coef_summary[, "Std. Error"])
|
||||
results <- data.frame(OR = OR, CI_lower = CI_lower, CI_upper = CI_upper,
|
||||
p_value = coef_summary[, "Pr(>|z|)"])
|
||||
print(round(results, 3))
|
||||
# 模型拟合度
|
||||
cat("AIC:", AIC(model), "\n")
|
||||
# VIF(需要 car 包)
|
||||
# library(car)
|
||||
# vif(model)
|
||||
Reference in New Issue
Block a user