library(conflicted)
library(tidyverse)
── Attaching core tidyverse packages ───────────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.1 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.1
✔ purrr 1.0.2
library(tictoc)
Everyone loves a good randomised controlled trial because the mean
outcome of people who were exposed to treatment minus the mean outcome
of people who were exposed to control – a between-participant
difference – is an unbiased estimator of the the mean of
within-participant individual treatment effects.
Let’s simulate some data where we know the average treatment effect
(ATE). This is an efficacy trial, so we are only interested in the ATE
for the study participants. There is still an inferential problem since
not everyone gets treatment and not everyone gets control.
set.seed(3137942)
theN <- 500
sim_dat <- tibble(latent_magic = 50 + rnorm(theN, 0, sqrt(.6)),
y0 = round(latent_magic + rnorm(theN, 0, sqrt(.4)), 1),
y1 = round(latent_magic + 0.2 + rnorm(theN, 0, sqrt(.4)), 1),
ITS = y1 - y0) |>
select(-latent_magic)
Take a look:
sim_dat
y0 is the potential outcome under control, y1 the potential outcome
under treatment, and ITS is the individual treatment effect, y1 –
y0.
We can get the true average treatment effect of an efficacy trial by
averaging all those ITSes:
trueATE <- mean(sim_dat$ITS)
trueATE
[1] 0.2636
That’s the true answer we’re trying to recover with an RCT.
Now let’s simulate an RCT – one randomisation of participants to
treat or control. Here I’m using a coin toss.
rct_dat <- sim_dat |>
mutate(
treat = rbinom(n(), 1, .5),
y = treat * y1 + (1 - treat) * y0,
y1 = ifelse(treat == 1, y1, NA),
y0 = ifelse(treat == 0, y0, NA)
) |>
select(-ITS)
rct_dat
We only see one of the potential outcomes, depending on which was
realised, hence the missing data on y0 and y1 (NAs).
Estimate ATE and its 95% CI using linear regression:
lm_mod <- lm(y ~ treat, data = rct_dat)
cbind(ATE = coef(lm_mod), confint(lm_mod))["treat",]
ATE 2.5 % 97.5 %
0.2935164 0.1172047 0.4698281
Repeat that 2,000 times to see the average of the ATEs across
simulated trials:
sim_reses <- data.frame()
for (i in 1:2000) {
rct_dat <- sim_dat |>
mutate(treat = rbinom(n(), 1, .5),
y = treat * y1 + (1 - treat) * y0)
lm_mod <- lm(y ~ treat, data = rct_dat)
this_res <- cbind(ATE = coef(lm_mod), confint(lm_mod))["treat", ]
sim_reses <- bind_rows(sim_reses, this_res)
}
sim_reses |>
mutate(across(everything(), \(x) round(x, 2)))
The mean estimated ATE across simulated trials is really
close to the true ATE…
c(
`mean of ATE estimates` = mean(sim_reses$ATE),
`true ATE` = trueATE,
`true ATE - mean of estimates` = trueATE - mean(sim_reses$ATE)
) |> round(4)
mean of ATE estimates true ATE
0.2605 0.2636
true ATE - mean of estimates
0.0031
… but there’s quite a bit of variation around the mean answer.
sim_reses |>
ggplot(aes(x = ATE)) +
geom_histogram(bins = 40) +
xlab("estimated ATE")

Let’s look at the 95% confidence interval coverage:
sim_reses <- sim_reses |>
mutate(trueATE_in_interval = trueATE >= `2.5 %` &
trueATE <= `97.5 %`)
cover <- mean(sim_reses$trueATE_in_interval)
cover
[1] 0.9745
97% of the simulated studies’ 95% confidence intervals included the
true result.
5.79 sec elapsed
LS0tDQp0aXRsZTogIlNpbXVsYXRpbmcgZWZmaWNhY3kgdHJpYWxzIg0KYXV0aG9yOiBBbmRpIEZ1Z2FyZA0KZGF0ZTogMTIgTWF5IDIwMjQNCm91dHB1dDogDQogIGh0bWxfbm90ZWJvb2s6IA0KICAgIGNvZGVfZm9sZGluZzogbm9uZQ0KLS0tDQoNCg0KYGBge3J9DQpsaWJyYXJ5KGNvbmZsaWN0ZWQpDQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCmxpYnJhcnkodGljdG9jKQ0KYGBgDQoNCg0KYGBge3IgaW5jbHVkZT1GQUxTRX0NCnRpYygpDQpgYGANCg0KDQpFdmVyeW9uZSBsb3ZlcyBhIGdvb2QgcmFuZG9taXNlZCBjb250cm9sbGVkIHRyaWFsIGJlY2F1c2UgdGhlIG1lYW4gb3V0Y29tZSBvZiBwZW9wbGUgd2hvIHdlcmUgZXhwb3NlZCB0byB0cmVhdG1lbnQgbWludXMgdGhlIG1lYW4gb3V0Y29tZSBvZiBwZW9wbGUgd2hvIHdlcmUgZXhwb3NlZCB0byBjb250cm9sIC0tIGEgKmJldHdlZW4qLXBhcnRpY2lwYW50IGRpZmZlcmVuY2UgLS0gaXMgYW4gdW5iaWFzZWQgZXN0aW1hdG9yIG9mIHRoZSB0aGUgbWVhbiBvZiAqd2l0aGluKi1wYXJ0aWNpcGFudCBpbmRpdmlkdWFsIHRyZWF0bWVudCBlZmZlY3RzLg0KDQoNCkxldCdzIHNpbXVsYXRlIHNvbWUgZGF0YSB3aGVyZSB3ZSBrbm93IHRoZSBhdmVyYWdlIHRyZWF0bWVudCBlZmZlY3QgKEFURSkuIFRoaXMgaXMgYW4gZWZmaWNhY3kgdHJpYWwsIHNvIHdlIGFyZSBvbmx5IGludGVyZXN0ZWQgaW4gdGhlIEFURSBmb3IgdGhlIHN0dWR5IHBhcnRpY2lwYW50cy4gVGhlcmUgaXMgc3RpbGwgYW4gaW5mZXJlbnRpYWwgcHJvYmxlbSBzaW5jZSBub3QgZXZlcnlvbmUgZ2V0cyB0cmVhdG1lbnQgYW5kIG5vdCBldmVyeW9uZSBnZXRzIGNvbnRyb2wuDQoNCmBgYHtyfQ0Kc2V0LnNlZWQoMzEzNzk0MikNCnRoZU4gPC0gNTAwDQoNCnNpbV9kYXQgPC0gdGliYmxlKGxhdGVudF9tYWdpYyA9IDUwICsgcm5vcm0odGhlTiwgMCwgc3FydCguNikpLA0KICAgICAgICAgICAgICAgICAgeTAgPSByb3VuZChsYXRlbnRfbWFnaWMgKyBybm9ybSh0aGVOLCAwLCBzcXJ0KC40KSksIDEpLA0KICAgICAgICAgICAgICAgICAgeTEgPSByb3VuZChsYXRlbnRfbWFnaWMgKyAwLjIgKyBybm9ybSh0aGVOLCAwLCBzcXJ0KC40KSksIDEpLA0KICAgICAgICAgICAgICAgICAgSVRTID0geTEgLSB5MCkgfD4NCiAgc2VsZWN0KC1sYXRlbnRfbWFnaWMpDQpgYGANCg0KVGFrZSBhIGxvb2s6DQoNCmBgYHtyfQ0Kc2ltX2RhdA0KYGBgDQoNCnkwIGlzIHRoZSBwb3RlbnRpYWwgb3V0Y29tZSB1bmRlciBjb250cm9sLCB5MSB0aGUgcG90ZW50aWFsIG91dGNvbWUgdW5kZXIgdHJlYXRtZW50LCBhbmQgSVRTIGlzIHRoZSBpbmRpdmlkdWFsIHRyZWF0bWVudCBlZmZlY3QsIHkxIC0tIHkwLg0KDQpXZSBjYW4gZ2V0IHRoZSB0cnVlIGF2ZXJhZ2UgdHJlYXRtZW50IGVmZmVjdCBvZiBhbiBlZmZpY2FjeSB0cmlhbCBieSBhdmVyYWdpbmcgYWxsIHRob3NlIElUU2VzOg0KDQoNCmBgYHtyfQ0KdHJ1ZUFURSA8LSBtZWFuKHNpbV9kYXQkSVRTKQ0KdHJ1ZUFURQ0KYGBgDQoNClRoYXQncyB0aGUgdHJ1ZSBhbnN3ZXIgd2UncmUgdHJ5aW5nIHRvIHJlY292ZXIgd2l0aCBhbiBSQ1QuDQoNCk5vdyBsZXQncyBzaW11bGF0ZSBhbiBSQ1QgLS0gb25lIHJhbmRvbWlzYXRpb24gb2YgcGFydGljaXBhbnRzIHRvIHRyZWF0IG9yIGNvbnRyb2wuIEhlcmUgSSdtIHVzaW5nIGEgY29pbiB0b3NzLg0KDQpgYGB7cn0NCnJjdF9kYXQgPC0gc2ltX2RhdCB8Pg0KICBtdXRhdGUoDQogICAgdHJlYXQgPSByYmlub20obigpLCAxLCAuNSksDQogICAgeSA9IHRyZWF0ICogeTEgKyAoMSAtIHRyZWF0KSAqIHkwLA0KICAgIHkxID0gaWZlbHNlKHRyZWF0ID09IDEsIHkxLCBOQSksDQogICAgeTAgPSBpZmVsc2UodHJlYXQgPT0gMCwgeTAsIE5BKQ0KICApIHw+DQogIHNlbGVjdCgtSVRTKQ0KDQpyY3RfZGF0DQpgYGANCg0KV2Ugb25seSBzZWUgb25lIG9mIHRoZSBwb3RlbnRpYWwgb3V0Y29tZXMsIGRlcGVuZGluZyBvbiB3aGljaCB3YXMgcmVhbGlzZWQsIGhlbmNlIHRoZSBtaXNzaW5nIGRhdGEgb24geTAgYW5kIHkxIChOQXMpLg0KDQpFc3RpbWF0ZSBBVEUgYW5kIGl0cyA5NSUgQ0kgdXNpbmcgbGluZWFyIHJlZ3Jlc3Npb246DQoNCmBgYHtyfQ0KbG1fbW9kIDwtIGxtKHkgfiB0cmVhdCwgZGF0YSA9IHJjdF9kYXQpDQpjYmluZChBVEUgPSBjb2VmKGxtX21vZCksIGNvbmZpbnQobG1fbW9kKSlbInRyZWF0IixdDQpgYGANCg0KUmVwZWF0IHRoYXQgMiwwMDAgdGltZXMgdG8gc2VlIHRoZSBhdmVyYWdlIG9mIHRoZSBBVEVzIGFjcm9zcyBzaW11bGF0ZWQgdHJpYWxzOg0KDQoNCmBgYHtyfQ0Kc2ltX3Jlc2VzIDwtIGRhdGEuZnJhbWUoKQ0KDQpmb3IgKGkgaW4gMToyMDAwKSB7DQogIHJjdF9kYXQgPC0gc2ltX2RhdCB8Pg0KICAgIG11dGF0ZSh0cmVhdCA9IHJiaW5vbShuKCksIDEsIC41KSwNCiAgICAgICAgICAgeSA9IHRyZWF0ICogeTEgKyAoMSAtIHRyZWF0KSAqIHkwKQ0KICANCiAgbG1fbW9kIDwtIGxtKHkgfiB0cmVhdCwgZGF0YSA9IHJjdF9kYXQpDQogIHRoaXNfcmVzIDwtIGNiaW5kKEFURSA9IGNvZWYobG1fbW9kKSwgY29uZmludChsbV9tb2QpKVsidHJlYXQiLCBdDQogIA0KICBzaW1fcmVzZXMgPC0gYmluZF9yb3dzKHNpbV9yZXNlcywgdGhpc19yZXMpDQp9DQpgYGANCg0KDQpgYGB7cn0NCnNpbV9yZXNlcyAgfD4NCiAgbXV0YXRlKGFjcm9zcyhldmVyeXRoaW5nKCksIFwoeCkgcm91bmQoeCwgMikpKQ0KYGBgDQoNCg0KVGhlIG1lYW4gZXN0aW1hdGVkIEFURSBhY3Jvc3Mgc2ltdWxhdGVkIHRyaWFscyBpcyAqcmVhbGx5KiBjbG9zZSB0byB0aGUgdHJ1ZSBBVEUuLi4NCg0KDQpgYGB7cn0NCmMoDQogIGBtZWFuIG9mIEFURSBlc3RpbWF0ZXNgID0gbWVhbihzaW1fcmVzZXMkQVRFKSwNCiAgYHRydWUgQVRFYCA9IHRydWVBVEUsDQogIGB0cnVlIEFURSAtIG1lYW4gb2YgZXN0aW1hdGVzYCA9IHRydWVBVEUgLSBtZWFuKHNpbV9yZXNlcyRBVEUpIA0KKSB8PiByb3VuZCg0KQ0KYGBgDQoNCg0KLi4uIGJ1dCB0aGVyZSdzIHF1aXRlIGEgYml0IG9mIHZhcmlhdGlvbiBhcm91bmQgdGhlIG1lYW4gYW5zd2VyLg0KDQoNCmBgYHtyfQ0Kc2ltX3Jlc2VzIHw+DQogIGdncGxvdChhZXMoeCA9IEFURSkpICsNCiAgZ2VvbV9oaXN0b2dyYW0oYmlucyA9IDQwKSArDQogIHhsYWIoImVzdGltYXRlZCBBVEUiKQ0KYGBgDQoNCg0KTGV0J3MgbG9vayBhdCB0aGUgOTUlIGNvbmZpZGVuY2UgaW50ZXJ2YWwgY292ZXJhZ2U6DQoNCmBgYHtyfQ0Kc2ltX3Jlc2VzIDwtIHNpbV9yZXNlcyB8Pg0KICBtdXRhdGUodHJ1ZUFURV9pbl9pbnRlcnZhbCA9IHRydWVBVEUgPj0gYDIuNSAlYCAmDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgdHJ1ZUFURSA8PSBgOTcuNSAlYCkNCmNvdmVyIDwtIG1lYW4oc2ltX3Jlc2VzJHRydWVBVEVfaW5faW50ZXJ2YWwpDQpjb3Zlcg0KYGBgDQoNCg0KYHIgcm91bmQoY292ZXIgKiAxMDAsMClgJSBvZiB0aGUgc2ltdWxhdGVkIHN0dWRpZXMnIDk1JSBjb25maWRlbmNlIGludGVydmFscyBpbmNsdWRlZCB0aGUgdHJ1ZSByZXN1bHQuDQoNCg0KYGBge3IgZWNobz1GQUxTRX0NCnRvYygpDQpgYGANCg0KDQo=