# ========================================
# 步骤 1: 描述性统计
# ========================================
# SSA-Pro 自动生成代码
# 工具: 描述性统计
# 时间: 2026-02-25 07:58:34.356454
# ================================
library(ggplot2)
# 数据准备
df <- read.csv("E:/test.csv")
# 数值变量描述性统计
numeric_vars <- sapply(df, is.numeric)
if (any(numeric_vars)) {
print(summary(df[, numeric_vars, drop = FALSE]))
}
# 分类变量频数表
categorical_vars <- !numeric_vars
if (any(categorical_vars)) {
for (v in names(df)[categorical_vars]) {
cat("\n变量:", v, "\n")
print(table(df[[v]], useNA = "ifany"))
}
}
# ======== 可视化 ========
# Bar chart: Yqol
p_Yqol <- ggplot(df[!is.na(df[["Yqol"]]), ], aes(x = factor(.data[["Yqol"]]))) +
geom_bar(fill = "#3b82f6", alpha = 0.7) +
labs(title = "Frequency of Yqol", x = "Yqol", y = "Count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
print(p_Yqol)
# Bar chart: sex
p_sex <- ggplot(df[!is.na(df[["sex"]]), ], aes(x = factor(.data[["sex"]]))) +
geom_bar(fill = "#3b82f6", alpha = 0.7) +
labs(title = "Frequency of sex", x = "sex", y = "Count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
print(p_sex)
# Bar chart: smoke
p_smoke <- ggplot(df[!is.na(df[["smoke"]]), ], aes(x = factor(.data[["smoke"]]))) +
geom_bar(fill = "#3b82f6", alpha = 0.7) +
labs(title = "Frequency of smoke", x = "smoke", y = "Count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
print(p_smoke)
# Histogram: age
p_age <- ggplot(df[!is.na(df[["age"]]), ], aes(x = .data[["age"]])) +
geom_histogram(fill = "#3b82f6", alpha = 0.7, bins = 30) +
labs(title = "Distribution of age", x = "age", y = "Count") +
theme_minimal()
print(p_age)
# ========================================
# 步骤 2: 二元Logistic回归
# ========================================
# SSA-Pro 自动生成代码
# 工具: 二元 Logistic 回归
# 时间: 2026-02-25 07:58:34.813076
# ================================
# 数据准备
df <- read.csv("E:/test.csv")
# 模型拟合
model <- glm(Yqol ~ sex + smoke + age + bmi + mouth_open + bucal_relax + toot_morph + root_number + root_curve + lenspace + denseratio + Pglevel + Pgverti + Winter + presyp + flap + operation + time + surgage + times, data = df, family = binomial(link = "logit"))
summary(model)
# OR 和 95% CI
coef_summary <- summary(model)$coefficients
OR <- exp(coef_summary[, "Estimate"])
CI_lower <- exp(coef_summary[, "Estimate"] - 1.96 * coef_summary[, "Std. Error"])
CI_upper <- exp(coef_summary[, "Estimate"] + 1.96 * coef_summary[, "Std. Error"])
results <- data.frame(OR = OR, CI_lower = CI_lower, CI_upper = CI_upper,
p_value = coef_summary[, "Pr(>|z|)"])
print(round(results, 3))
# 模型拟合度
cat("AIC:", AIC(model), "\n")
# VIF（需要 car 包）
# library(car)
# vif(model)