# ── Helper: correlated binary covariates via Cholesky ────────────────────────
corr_binary <- function(N, D, rho) {
Sigma <- matrix(rho, D, D); diag(Sigma) <- 1
L <- chol(Sigma)
z <- matrix(rnorm(N * D), nrow = N) %*% L
(z > 0L) * 1L
}
# ── Simulation: P(max |r| < threshold) ───────────────────────────────────────
sim_orthog <- function(N, D, rho = 0, threshold = 0.10, n_sim = 1500) {
mean(replicate(n_sim, {
X <- if (rho == 0) matrix(rbinom(N * D, 1, 0.5), nrow = N)
else corr_binary(N, D, rho)
T_assign <- rbinom(N, 1, 0.5)
cors <- apply(X, 2, function(col) abs(cor(col, T_assign)))
max(cors) < threshold
}))
}
# ── Simulation: cumulative bias when any failure occurs ──────────────────────
sim_bias <- function(N = 200, D = 10, rho = 0, betas, threshold = 0.10, n_sim = 1000) {
replicate(n_sim, {
X <- if (rho == 0) matrix(rbinom(N * D, 1, 0.5), nrow = N)
else corr_binary(N, D, rho)
T_assign <- rbinom(N, 1, 0.5)
cors <- apply(X, 2, function(col) cor(col, T_assign))
sum(abs(cors[abs(cors) > threshold]) * betas[abs(cors) > threshold])
})
}
# ── Simulation: Type-1 and Type-2 error rates, conditional on orthogonality failing ──
# The key insight: under PERFECT randomization, Type-1 is always ~5% regardless of betas.
# The interesting question is: given your experiment FAILED the orthogonality check
# (as many real experiments do), how bad are your error rates?
# Large betas (Scenario B) turn small residual imbalance into large bias → inflated Type-1.
# Tiny betas (Scenario A) produce negligible bias even when imbalance exists → Type-1 near 5%.
sim_error_rates <- function(N, D, rho, betas, r_max = 0.10,
true_eff = 0.40, n_sim = 2000) {
results <- t(replicate(n_sim, {
X <- if (rho == 0) matrix(rbinom(N * D, 1, 0.5), nrow = N)
else corr_binary(N, D, rho)
T_assign <- rbinom(N, 1, 0.5)
cors <- apply(X, 2, function(x) abs(cor(x, T_assign)))
orth_ok <- max(cors) <= r_max # did this experiment pass the balance check?
Y_null <- as.vector(X %*% betas) + rnorm(N, 0, 1)
p_null <- t.test(Y_null[T_assign == 1], Y_null[T_assign == 0])$p.value
Y_alt <- true_eff * T_assign + as.vector(X %*% betas) + rnorm(N, 0, 1)
p_alt <- t.test(Y_alt[T_assign == 1], Y_alt[T_assign == 0])$p.value
c(t1 = as.integer(p_null < 0.05), t2 = as.integer(p_alt > 0.05),
orth_ok = as.integer(orth_ok))
}))
failed <- results[, "orth_ok"] == 0L
passed <- results[, "orth_ok"] == 1L
# Type-1: false-positive rate CONDITIONAL ON orthogonality FAILING (violations present)
t1_cond <- if (sum(failed) > 10) mean(results[failed, "t1"]) else NA_real_
# Type-2: miss rate CONDITIONAL ON orthogonality PASSING (no violations; p-values are valid here)
t2_valid <- if (sum(passed) > 10) mean(results[passed, "t2"]) else NA_real_
data.frame(t1_rate = t1_cond, t2_rate = t2_valid,
pct_fail = mean(failed))
}
set.seed(42)
N_vals <- c(50, 100, 200, 400, 800)
D <- 10
# Scenario betas — made more extreme to amplify the contrast
betas_helpful <- rep(0.15, D) # uniform tiny betas
betas_hurtful <- c(1.50, 1.20, 1.00, 0.90, 0.70,
0.50, 0.30, 0.20, 0.10, 0.05) # large heterogeneous betas
# P(orthogonal) across N, for three DGP types
prob_df <- bind_rows(
data.frame(N = N_vals, DGP = "Independent (rho = 0)",
prob = sapply(N_vals, sim_orthog, D = D, rho = 0.00)),
data.frame(N = N_vals, DGP = "Scenario A: Helpful (rho = 0.75, uniform small \u03b2)",
prob = sapply(N_vals, sim_orthog, D = D, rho = 0.75)),
data.frame(N = N_vals, DGP = "Scenario B: Hurtful (rho = 0.35, large varied \u03b2)",
prob = sapply(N_vals, sim_orthog, D = D, rho = 0.35))
)
scen_colors <- c(
"Independent (rho = 0)" = "#4a90d9",
"Scenario A: Helpful (rho = 0.75, uniform small \u03b2)" = "#52b788",
"Scenario B: Hurtful (rho = 0.35, large varied \u03b2)" = "#e63946"
)
p_prob <- ggplot(prob_df, aes(x = N, y = prob, color = DGP, group = DGP)) +
geom_line(linewidth = 1.3) +
geom_point(size = 3.5) +
geom_hline(yintercept = 0.95, linetype = "dashed", color = "gray40") +
annotate("text", x = 820, y = 0.957, label = "95% target",
size = 3.0, color = "gray40", hjust = 0) +
scale_x_continuous(breaks = N_vals) +
scale_y_continuous(labels = percent_format(accuracy = 1), limits = c(0, 1)) +
scale_color_manual(values = scen_colors, name = NULL) +
labs(x = "Sample size (N)",
y = "P(max |r| < 0.10 across all 10 dimensions)",
title = "Probability of Achieving Orthogonality (D = 10)",
subtitle = paste0("Scenario A (strong clustering, tiny betas): easier — correlation reduces effective checks\n",
"Scenario B (moderate correlation, large varied betas): hardest — and most dangerous when it fails")) +
theme_minimal(base_size = 13) +
theme(panel.grid.minor = element_blank(), legend.position = "top",
legend.text = element_text(size = 9))
# ── Bias when randomization fails ─────────────────────────────────────────────
set.seed(42)
bias_indep <- sim_bias(N = 200, D = D, rho = 0.00, betas = betas_hurtful)
bias_helpful <- sim_bias(N = 200, D = D, rho = 0.75, betas = betas_helpful)
bias_hurtful <- sim_bias(N = 200, D = D, rho = 0.35, betas = betas_hurtful)
bias_df <- bind_rows(
data.frame(bias = bias_indep, DGP = "Independent (rho = 0)"),
data.frame(bias = bias_helpful, DGP = "Scenario A: Helpful (rho = 0.75, uniform small \u03b2)"),
data.frame(bias = bias_hurtful, DGP = "Scenario B: Hurtful (rho = 0.35, large varied \u03b2)")
) |>
mutate(DGP = factor(DGP, levels = c("Independent (rho = 0)",
"Scenario A: Helpful (rho = 0.75, uniform small \u03b2)",
"Scenario B: Hurtful (rho = 0.35, large varied \u03b2)")))
p_bias <- ggplot(bias_df |> filter(bias > 0),
aes(x = bias, fill = DGP)) +
geom_histogram(bins = 35, alpha = 0.80, color = "white", linewidth = 0.2) +
geom_vline(data = bias_df |> filter(bias > 0) |>
group_by(DGP) |> summarise(med = median(bias), .groups = "drop"),
aes(xintercept = med, color = DGP), linewidth = 1.1, linetype = "dashed") +
facet_wrap(~DGP, nrow = 3, scales = "free") +
scale_fill_manual(values = scen_colors, guide = "none") +
scale_color_manual(values = scen_colors, guide = "none") +
scale_x_continuous(labels = function(x) sprintf("%.2f SD", x)) +
labs(x = "Cumulative OVB when randomization fails (SD units of Y)",
y = "Frequency across 1,000 experiments",
title = "When Randomization Fails: How Large Is the Bias? (N = 200)",
subtitle = "Dashed = median cumulative bias \u00b7 Only experiments with \u2265 1 failing dimension shown") +
theme_minimal(base_size = 12) +
theme(panel.grid.minor = element_blank(),
strip.text = element_text(face = "bold", size = 8.5))
# ── Type-1 error rates across N (Type-2 intentionally excluded — see note below) ──
set.seed(42)
err_df <- bind_rows(
lapply(N_vals, function(n) {
cbind(N = n, DGP = "Independent (rho = 0)",
sim_error_rates(n, D, rho = 0.00, betas = betas_hurtful))
}),
lapply(N_vals, function(n) {
cbind(N = n,
DGP = "Scenario A: Helpful (rho = 0.75, uniform small \u03b2)",
sim_error_rates(n, D, rho = 0.75, betas = betas_helpful))
}),
lapply(N_vals, function(n) {
cbind(N = n,
DGP = "Scenario B: Hurtful (rho = 0.35, large varied \u03b2)",
sim_error_rates(n, D, rho = 0.35, betas = betas_hurtful))
})
) |>
mutate(N = as.integer(N),
t1_rate = as.numeric(t1_rate),
pct_fail = as.numeric(pct_fail),
DGP = as.character(DGP))
# Show Type-1 error only (Type-2 is not reported — see callout below)
p_err <- ggplot(err_df |> filter(DGP != "Independent (rho = 0)"),
aes(x = N, y = t1_rate, color = DGP, group = DGP)) +
geom_line(linewidth = 1.2) +
geom_point(size = 3.0) +
geom_hline(yintercept = 0.05, linetype = "dashed", color = "gray40") +
annotate("text", x = max(N_vals) * 1.02, y = 0.055, label = "5% nominal",
color = "gray40", size = 3.2, hjust = 0) +
scale_x_continuous(breaks = N_vals) +
scale_y_continuous(labels = percent_format(accuracy = 1),
limits = c(0, NA)) +
scale_color_manual(values = scen_colors[names(scen_colors) != "Independent (rho = 0)"],
name = NULL) +
labs(x = "Sample size (N)",
y = "Type-1 error rate (among failed experiments)",
title = "False-Positive Rate Rises with N When Orthogonality Fails",
subtitle = paste0(
"Conditional on |r(T, X)| > 0.10 \u00b7 ",
"Dashed = 5% nominal \u00b7 ",
"Scenario B's large \u03b2s amplify residual imbalance into detectable bias")) +
theme_minimal(base_size = 12) +
theme(panel.grid.minor = element_blank(), legend.position = "top",
legend.text = element_text(size = 8.5),
plot.margin = margin(5, 50, 5, 5))
p_prob / p_bias / p_err