A Visual Diagnostic Tool for Causal Inference
# Simulating the data with a heterogeneous treatment effect. Fitting a misspecified model without interaction. Generating residuals. Plotting residuals vs fits colored/faceted by treatment. Refitting a correct model with interaction. Re-plotting residuals vs fits. This shows how the diagnostic plot can detect model misspecification.
library(tidyverse)
## Simulate data
set.seed(8)
n <- 1000
x <- rnorm(n)
t <- rbinom(n, 1, exp(x) / (1 + exp(x)))
y1 <- 0.5 * x + rnorm(n)
y0 <- - 0.5 * x + rnorm(n)
y_obs <- t * y1 + (1 - t) * y0
d <- tibble(
y = y_obs,
p = predict(lm(t ~ x)),
t = as.factor(t),
f = predict(lm(y_obs ~ t + x)),
r = resid(lm(y_obs ~ t + x)),
f_correct = predict(lm(y_obs ~ t*x)),
r_correct = resid(lm(y_obs ~ t*x))
)
## Plots
(p_naive <- ggplot(d, aes(x = f, y = r)) +
geom_point(alpha = 0.25) +
geom_smooth(
color = "black",
method = "loess",
formula = y ~ x,
se = FALSE,
span = 1
) +
theme_minimal() +
labs(x = "Fitted value",
y = "Residuals"))
(p2 <- ggplot(d, aes(x = f, y = r, color = t)) +
geom_point(alpha = 0.25) +
geom_smooth(
method = "loess",
formula = y ~ x,
se = FALSE,
span = 1
) +
theme_minimal() +
scale_color_manual(values = c("orange", "cornflower blue")) +
labs(color = "Treatment",
x = "Fitted value",
y = "Residuals"))
(p3 <- ggplot(d, aes(x = f, y = r, color = t)) +
geom_point(alpha = 0.25) +
geom_smooth(
method = "loess",
formula = y ~ x,
se = FALSE,
span = 1
) +
facet_wrap( ~ t) +
theme_minimal() +
scale_color_manual(values = c("orange", "cornflower blue")) +
labs(color = "Treatment",
x = "Fitted value",
y = "Residuals"))
(p4 <- ggplot(d, aes(x = f_correct, y = r_correct, color = t)) +
geom_point(alpha = 0.25) +
geom_smooth(
method = "loess",
formula = y ~ x,
se = FALSE,
span = 1
) +
theme_minimal() +
scale_color_manual(values = c("orange", "cornflower blue")) +
labs(color = "Treatment",
x = "Fitted value",
y = "Residuals"))
留言
張貼留言