Threshold by segmented
# 安裝並加載必要的套件
if (!require(segmented)) install.packages("segmented")
if (!require(splines)) install.packages("splines")
if (!require(ggplot2)) install.packages("ggplot2")
library(segmented)
library(splines)
library(ggplot2)
# 設定隨機種子與模擬參數
set.seed(123)
n <- 500 # 樣本數
x <- seq(0, 15, length.out = n) # 模擬風險因子範圍
# 模擬真實曲線 (二次曲線)
y_true <- 2 * x^3 +0.5 * x^2 + x + 5 # 真實曲線
y <- y_true + rnorm(n, mean = 0, sd = 1) # 添加隨機誤差
# 計算 x 的中位數
x_median <- median(x)
print(paste("x 的中位數:", round(x_median, 2)))
# 初始線性回歸
lin_mod <- lm(y ~ x)
y_linear <- predict(lin_mod, data.frame(x = x)) # 線性回歸預測值
# 分段回歸
seg_mod <- segmented(lin_mod, seg.Z = ~x, psi = c(7)) # 初始分段點設為 7
summary(seg_mod) # 顯示分段回歸結果
# 提取分段點和斜率
breakpoints <- seg_mod$psi[, "Est."]
slopes <- slope(seg_mod)$x[, "Est."]
# 輸出分段結果
cat("分段回歸結果:\n")
cat("分段點 (Breakpoints):", paste(round(breakpoints, 2), collapse = ", "), "\n")
cat("每段的斜率 (Slopes):", paste(round(slopes, 2), collapse = ", "), "\n")
# RCS 擬合
knots <- c(3, 7, 12) # 設定節點位置
rcs_mod <- lm(y ~ ns(x, knots = knots)) # 使用自然樣條擬合
y_rcs <- predict(rcs_mod, data.frame(x = x))
# 數據框整合
data <- data.frame(
x = x,
y = y,
y_true = y_true,
y_linear = y_linear,
y_rcs = y_rcs,
y_segmented = predict(seg_mod)
)
# 繪製圖形
ggplot(data, aes(x = x)) +
geom_point(aes(y = y), color = "gray", alpha = 0.5, size = 1.5) + # 原始數據
geom_line(aes(y = y_true, color = "True Curve"), size = 1.2) + # 真實曲線
geom_line(aes(y = y_linear, color = "Linear Regression Line"), size = 1.2, linetype = "dashed") + # 線性回歸線
geom_line(aes(y = y_rcs, color = "RCS Curve"), size = 1.2, linetype = "dotted") + # RCS 曲線
geom_line(aes(y = y_segmented, color = "Segmented Line"), size = 1.2, linetype = "dotdash") + # 分段回歸線
geom_vline(xintercept = breakpoints, color = "blue", linetype = "dotted", size = 1) + # 分段點
geom_vline(xintercept = x_median, color = "orange", linetype = "dashed", size = 1) + # x 的中位數
annotate("text", x = x_median, y = max(data$y), label = paste0("x Median: ", round(x_median, 2)),
color = "orange", angle = 90, vjust = -0.5, hjust = 1.2, size = 4) + # 標示中位數
labs(
title = "Comparison: True Curve, Linear Regression, RCS, and Segmented Regression",
x = "Risk Factor (x)",
y = "Outcome (y)",
color = "Curve Type"
) +
theme_minimal() +
scale_color_manual(values = c(
"True Curve" = "blue",
"Linear Regression Line" = "green",
"RCS Curve" = "red",
"Segmented Line" = "purple"
))
留言
張貼留言