Include packages:

library(tidyverse)
library(did)

First setup the true scores for each individual:

set.seed(42)
true_outcomes <- data.frame(id = 1:8000) |>
  mutate(true = rnorm(n(), 0, 1))

Look at a random sample:

true_outcomes |>
  mutate(true = round(true, 2)) |>
  slice_sample(n = 10)

Now the measured outcomes, which are noisy version of the true score:

gen_obs <- function(id, true, sd, n) {
  data.frame(time = 1:n) |>
    mutate(id = id, .before = time) |>
    mutate(true = true,
           y = true + rnorm(n, 0, sd))
}

sim_dat <- pmap_df(true_outcomes,
                   \(id, true) gen_obs(id = id, true = true, sd = 1, n = 4)) |>
  mutate(y = scale(y) |> as.numeric())

Look at a random sample:

sim_dat |>
  mutate(true = round(true, 2),
         y = round(y, 2)) |>
  slice_sample(n = 10)

We have standardised y:

mean(sim_dat$y) |> round(2)
## [1] 0
sd(sim_dat$y) |> round(2)
## [1] 1

The correlations:

sim_dat |>
  pivot_wider(names_from = time, values_from = y) |>
  select(-id) |>
  cor() |>
  round(1)
##      true   1   2   3   4
## true  1.0 0.7 0.7 0.7 0.7
## 1     0.7 1.0 0.5 0.5 0.5
## 2     0.7 0.5 1.0 0.5 0.5
## 3     0.7 0.5 0.5 1.0 0.5
## 4     0.7 0.5 0.5 0.5 1.0

Each measurement is correlated about .7 with the (unmeasurable) true score and the measurements are correlated around .5 with each other.

Now, select people for the intervention if their scores 1 SD below the population mean or less for periods prior to the intervention period.

ids <- list()

ids[[3]] <- sim_dat |>
  filter(time %in% 1:2 & y < -1) |>
  pull(id) |>
  unique() |>
  sample(size = 500)

ids[[4]] <- sim_dat |>
  filter(time %in% 1:3 & y < -1) |>
  pull(id) |>
  setdiff(ids[[3]]) |>
  unique() |>
  sample(size = 500)

ids[[5]] <- setdiff(sim_dat$id, c(ids[[3]], ids[[4]])) |>
  sample(size = 500)

Build a dataset mapping ID to group. {did} treats group = 0 as never-treated.

group_ids <- list(
  data.frame(id = ids[[3]], group = 3),
  data.frame(id = ids[[4]], group = 4),
  data.frame(id = ids[[5]], group = 0)) |>
  bind_rows()
stopifnot((group_ids |> duplicated() |> sum()) == 0)

Join into the main dataset and drop missings:

sim_dat <- sim_dat |>
  left_join(group_ids) |>
  na.omit()
## Joining with `by = join_by(id)`

Here are the means by group:

sim_dat |>
  group_by(group, time) |>
  summarise(mean_y = mean(y) |> round(1),
            n = n()) |>
  mutate(treat = time >= group & group != 0, .after = time)
## `summarise()` has grouped output by 'group'. You can override using the
## `.groups` argument.

A couple of plots:

curvy_plot <- sim_dat |>
  mutate(group = factor(group)) |>
  ggplot() +
  aes(x = time, y = y, colour = group) +
  geom_smooth(method = "loess", formula = y ~ x)
curvy_plot + 
  geom_jitter(alpha = .5, width = .1, size = .1)

Zoom in and remove the points:

curvy_plot

The never treated group (0) cruise along a little above the population mean (0 by design) with no apparent change. The means of group 3 and 4 are much lower (as expected, since we select them to be 1 SD below the mean or less), apparently increasing as the intervention is introduced.

Let’s see what {did} makes of this:

did_mod <- att_gt(
  yname = "y",
  gname = "group",
  control_group = "notyettreated",
  idname = "id",
  tname = "time",
  data = sim_dat,
  est_method = "reg",
  base_period = "varying"
)
did_mod
## 
## Call:
## att_gt(yname = "y", tname = "time", idname = "id", gname = "group", 
##     data = sim_dat, control_group = "notyettreated", est_method = "reg", 
##     base_period = "varying")
## 
## Reference: Callaway, Brantly and Pedro H.C. Sant'Anna.  "Difference-in-Differences with Multiple Time Periods." Journal of Econometrics, Vol. 225, No. 2, pp. 200-230, 2021. <https://doi.org/10.1016/j.jeconom.2020.12.001>, <https://arxiv.org/abs/1803.09015> 
## 
## Group-Time Average Treatment Effects:
##  Group Time ATT(g,t) Std. Error [95% Simult.  Conf. Band]  
##      3    2  -0.1112     0.0640       -0.2745      0.0521  
##      3    3   0.4829     0.0637        0.3204      0.6454 *
##      3    4   0.4423     0.0655        0.2751      0.6095 *
##      4    2   0.0179     0.0619       -0.1402      0.1759  
##      4    3   0.0368     0.0728       -0.1490      0.2226  
##      4    4   0.1580     0.0638       -0.0048      0.3208  
## ---
## Signif. codes: `*' confidence band does not cover 0
## 
## P-value for pre-test of parallel trends assumption:  0.28651
## Control Group:  Not Yet Treated,  Anticipation Periods:  0
## Estimation Method:  Outcome Regression
did_mod |>
  ggdid()

aggte(did_mod, type = "group") |>
  ggdid()

aggte(did_mod, type = "dynamic") |>
  ggdid()

Would we conclude that the intervention was effective if we didn’t know how the data were generated…?

What happens if we remove the never-treated group?

sim_dat_sometimes_treated <- sim_dat |>
  filter(group != 0)
did_mod_2 <- att_gt(
  yname = "y",
  gname = "group",
  control_group = "notyettreated",
  idname = "id",
  tname = "time",
  data = sim_dat_sometimes_treated,
  est_method = "reg",
  base_period = "varying"
)

did_mod_2
## 
## Call:
## att_gt(yname = "y", tname = "time", idname = "id", gname = "group", 
##     data = sim_dat_sometimes_treated, control_group = "notyettreated", 
##     est_method = "reg", base_period = "varying")
## 
## Reference: Callaway, Brantly and Pedro H.C. Sant'Anna.  "Difference-in-Differences with Multiple Time Periods." Journal of Econometrics, Vol. 225, No. 2, pp. 200-230, 2021. <https://doi.org/10.1016/j.jeconom.2020.12.001>, <https://arxiv.org/abs/1803.09015> 
## 
## Group-Time Average Treatment Effects:
##  Group Time ATT(g,t) Std. Error [95% Simult.  Conf. Band]  
##      3    2  -0.0861     0.0763       -0.2576      0.0854  
##      3    3   0.4645     0.0685        0.3106      0.6184 *
## ---
## Signif. codes: `*' confidence band does not cover 0
## 
## P-value for pre-test of parallel trends assumption:  0.26747
## Control Group:  Not Yet Treated,  Anticipation Periods:  0
## Estimation Method:  Outcome Regression
did_mod_2 |>
  ggdid()