A Visual Diagnostic Tool for Causal Inference

 # Simulating the data with a heterogeneous treatment effect. Fitting a misspecified model without interaction. Generating residuals. Plotting residuals vs fits colored/faceted by treatment. Refitting a correct model with interaction. Re-plotting residuals vs fits. This shows how the diagnostic plot can detect model misspecification.


library(tidyverse)

## Simulate data

set.seed(8)  

n <- 1000

x <- rnorm(n)

t <- rbinom(n, 1, exp(x) / (1 + exp(x)))

y1 <- 0.5 * x + rnorm(n)

y0 <- - 0.5 * x + rnorm(n)

y_obs <- t * y1 + (1 - t) * y0


d <- tibble(

  y = y_obs,

  p = predict(lm(t ~ x)), 

  t = as.factor(t),

  f = predict(lm(y_obs ~ t + x)),

  r = resid(lm(y_obs ~ t + x)),

  f_correct = predict(lm(y_obs ~ t*x)),

  r_correct = resid(lm(y_obs ~ t*x))

)


## Plots

(p_naive <- ggplot(d, aes(x = f, y = r)) +

  geom_point(alpha = 0.25) +

  geom_smooth(

    color = "black",

    method = "loess",

    formula = y ~ x, 

    se = FALSE,

    span = 1

  ) +

  theme_minimal() +

  labs(x = "Fitted value",

       y = "Residuals"))


(p2 <- ggplot(d, aes(x = f, y = r, color = t)) +

  geom_point(alpha = 0.25) + 

  geom_smooth(

    method = "loess",

    formula = y ~ x,

    se = FALSE,

    span = 1

  ) +

  theme_minimal() +

  scale_color_manual(values = c("orange", "cornflower blue")) +

  labs(color = "Treatment",

       x = "Fitted value",

       y = "Residuals"))

       

(p3 <- ggplot(d, aes(x = f, y = r, color = t)) +

  geom_point(alpha = 0.25) +

  geom_smooth(

    method = "loess",

    formula = y ~ x,

    se = FALSE, 

    span = 1

  ) +

  facet_wrap( ~ t) +

  theme_minimal() +

  scale_color_manual(values = c("orange", "cornflower blue")) +

  labs(color = "Treatment",

       x = "Fitted value",

       y = "Residuals"))


(p4 <- ggplot(d, aes(x = f_correct, y = r_correct, color = t)) +

  geom_point(alpha = 0.25) +

  geom_smooth(  

    method = "loess",

    formula = y ~ x,

    se = FALSE,

    span = 1

  ) +

  theme_minimal() +

  scale_color_manual(values = c("orange", "cornflower blue")) + 

  labs(color = "Treatment",

       x = "Fitted value",

       y = "Residuals"))

留言

這個網誌中的熱門文章

可轉移性、普遍性、代表性和外部有效性

頻率學派 vs 貝氏學派

貝氏分析計算器