Threshold by segmented

 # 安裝並加載必要的套件

if (!require(segmented)) install.packages("segmented")

if (!require(splines)) install.packages("splines")

if (!require(ggplot2)) install.packages("ggplot2")

library(segmented)

library(splines)

library(ggplot2)

# 設定隨機種子與模擬參數

set.seed(123)

n <- 500  # 樣本數

x <- seq(0, 15, length.out = n)  # 模擬風險因子範圍

# 模擬真實曲線 (二次曲線)

y_true <- 2 * x^3 +0.5 * x^2 + x + 5  # 真實曲線

y <- y_true + rnorm(n, mean = 0, sd = 1)  # 添加隨機誤差

# 計算 x 的中位數

x_median <- median(x)

print(paste("x 的中位數:", round(x_median, 2)))

# 初始線性回歸

lin_mod <- lm(y ~ x)

y_linear <- predict(lin_mod, data.frame(x = x))  # 線性回歸預測值

# 分段回歸

seg_mod <- segmented(lin_mod, seg.Z = ~x, psi = c(7))  # 初始分段點設為 7

summary(seg_mod)  # 顯示分段回歸結果

# 提取分段點和斜率

breakpoints <- seg_mod$psi[, "Est."]

slopes <- slope(seg_mod)$x[, "Est."]

# 輸出分段結果

cat("分段回歸結果:\n")

cat("分段點 (Breakpoints):", paste(round(breakpoints, 2), collapse = ", "), "\n")

cat("每段的斜率 (Slopes):", paste(round(slopes, 2), collapse = ", "), "\n")

# RCS 擬合

knots <- c(3, 7, 12)  # 設定節點位置

rcs_mod <- lm(y ~ ns(x, knots = knots))  # 使用自然樣條擬合

y_rcs <- predict(rcs_mod, data.frame(x = x))

# 數據框整合

data <- data.frame(

  x = x,

  y = y,

  y_true = y_true,

  y_linear = y_linear,

  y_rcs = y_rcs,

  y_segmented = predict(seg_mod)

)

# 繪製圖形

ggplot(data, aes(x = x)) +

  geom_point(aes(y = y), color = "gray", alpha = 0.5, size = 1.5) +  # 原始數據

  geom_line(aes(y = y_true, color = "True Curve"), size = 1.2) +  # 真實曲線

  geom_line(aes(y = y_linear, color = "Linear Regression Line"), size = 1.2, linetype = "dashed") +  # 線性回歸線

  geom_line(aes(y = y_rcs, color = "RCS Curve"), size = 1.2, linetype = "dotted") +  # RCS 曲線

  geom_line(aes(y = y_segmented, color = "Segmented Line"), size = 1.2, linetype = "dotdash") +  # 分段回歸線

  geom_vline(xintercept = breakpoints, color = "blue", linetype = "dotted", size = 1) +  # 分段點

  geom_vline(xintercept = x_median, color = "orange", linetype = "dashed", size = 1) +  # x 的中位數

  annotate("text", x = x_median, y = max(data$y), label = paste0("x Median: ", round(x_median, 2)),

           color = "orange", angle = 90, vjust = -0.5, hjust = 1.2, size = 4) +  # 標示中位數

  labs(

    title = "Comparison: True Curve, Linear Regression, RCS, and Segmented Regression",

    x = "Risk Factor (x)",

    y = "Outcome (y)",

    color = "Curve Type"

  ) +

  theme_minimal() +

  scale_color_manual(values = c(

    "True Curve" = "blue",

    "Linear Regression Line" = "green",

    "RCS Curve" = "red",

    "Segmented Line" = "purple"

  ))

留言

這個網誌中的熱門文章

可轉移性、普遍性、代表性和外部有效性

頻率學派 vs 貝氏學派

貝氏分析計算器