Attempt to replicate Callaway and Sant’Anna (2021).

Get libraries loaded:

library(tidyverse)
library(did)
library(sandwich)
library(lmtest)

The example dataset supplied with {did}:

dat <- mpdta  # In case I wanted to change anything...
head(dat)

Estimate ATT(g, t) for each group g and time t:

did_mod <- att_gt(
  yname = "lemp",
  gname = "first.treat",
  control_group = "notyettreated",
  idname = "countyreal",
  tname = "year",
  data = dat,
  est_method = "reg",
  base_period = "varying"
)

did_mod

Call:
att_gt(yname = "lemp", tname = "year", idname = "countyreal", 
    gname = "first.treat", data = dat, control_group = "notyettreated", 
    est_method = "reg", base_period = "varying")

Reference: Callaway, Brantly and Pedro H.C. Sant'Anna.  "Difference-in-Differences with Multiple Time Periods." Journal of Econometrics, Vol. 225, No. 2, pp. 200-230, 2021. <https://doi.org/10.1016/j.jeconom.2020.12.001>, <https://arxiv.org/abs/1803.09015> 

Group-Time Average Treatment Effects:
---
Signif. codes: `*' confidence band does not cover 0

P-value for pre-test of parallel trends assumption:  0.16814
Control Group:  Not Yet Treated,  Anticipation Periods:  0
Estimation Method:  Outcome Regression

Now my attempt, initially focussed on the estimates. Standard errors can come later!

What made intuitive sense:

Here’s what initially tripped me up:

ATT_group_time <- function(the_group, the_year) {
  # The first time point we choose as we go depends on whether
  # the unit is treated in this "canonical" 2×2 dataset
  comparision_year <- ifelse(the_year < the_group,
                             the_year - 1,
                             the_group - 1)
  
  this_dat <- dat |>
    mutate(treat = as.numeric(first.treat == the_group),
           time  = as.numeric(year == the_year)) |>
    filter(treat |                # ever treated (current group)
           first.treat == 0 |     # never treated (weird convention of {did})
           year < first.treat) |> # not yet treated (other groups)
    filter(year %in% c(comparision_year, the_year))
  
  mod <- lm(lemp ~ 0 + factor(countyreal) + time + time:treat,
            data = this_dat)
  summary(mod) |>
    tidy() |>
    filter(term == "time:treat") |>
    mutate(group = the_group, .before = "term") |>
    mutate(year = the_year, .before = "term") |>
    mutate(sub_n = nrow(this_dat)) |>
    rename(my_est = estimate) |>
    select(-term)
}

Now zap the whole dataset and compare with {did}’s estimates.

res <- map2(did_mod$group, did_mod$t, ATT_group_time) |>
  bind_rows() |>
  mutate(did_est = did_mod$att, .after = "my_est") |>
  mutate(correct = abs(my_est - did_est) < 1e-6, .after = "did_est")
stopifnot(all(res$correct))
res |> select(group, year, my_est, did_est, correct, sub_n)

Hurrah - it worked! I’m going to worry about the standard errors another day.

Pooling the ATT(g,t)s?

Here are {did}’s estimates, pooled by group:

did_agg_group <- aggte(did_mod, type = "group")
did_agg_group

Call:
aggte(MP = did_mod, type = "group")

Reference: Callaway, Brantly and Pedro H.C. Sant'Anna.  "Difference-in-Differences with Multiple Time Periods." Journal of Econometrics, Vol. 225, No. 2, pp. 200-230, 2021. <https://doi.org/10.1016/j.jeconom.2020.12.001>, <https://arxiv.org/abs/1803.09015> 


Overall summary of ATT's based on group/cohort aggregation:  


Group Effects:
---
Signif. codes: `*' confidence band does not cover 0

Control Group:  Not Yet Treated,  Anticipation Periods:  0
Estimation Method:  Outcome Regression

We can replicate the estimated by just averaging the ATT(g,t) estimates across the t’s:

pool_estimates_naive <- function(ests) {
  data.frame(
    my_pooled_est = mean(ests),
    n_t = length(ests)
  )
}

agg_res <- res |>
  filter(year >= group) |>
  group_by(group) |>
  summarise(pool_estimates_naive(my_est)) |>
  mutate(did_agg_es = did_agg_group$att.egt, .after = "my_pooled_est") |>
  mutate(correct = abs(my_pooled_est - did_agg_es) < 1e-6)
stopifnot(all(agg_res$correct))

agg_res

That works.

References

Callaway, B., & Sant’Anna, P. H. C. (2021). Difference-in-Differences with multiple time periods. Journal of Econometrics, 225(2), 200–230.

LS0tDQp0aXRsZTogIkRpZmZlcmVuY2UtaW4tZGlmZmVyZW5jZXMgKGRpZmYtaW4tZGlmZnMpIGV4cGVyaW1lbnRzIg0KYXV0aG9yOiBBbmRpIEZ1Z2FyZA0KZGF0ZTogMjcgRGVjIDIwMjQNCm91dHB1dDogDQogIGh0bWxfbm90ZWJvb2s6IA0KICAgIGNvZGVfZm9sZGluZzogbm9uZQ0KLS0tDQoNCkF0dGVtcHQgdG8gcmVwbGljYXRlIENhbGxhd2F5IGFuZCBTYW504oCZQW5uYSAoMjAyMSkuDQoNCkdldCBsaWJyYXJpZXMgbG9hZGVkOg0KDQpgYGB7ciBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQ0KbGlicmFyeSh0aWR5dmVyc2UpDQpsaWJyYXJ5KGRpZCkNCmxpYnJhcnkoc2FuZHdpY2gpDQpsaWJyYXJ5KGxtdGVzdCkNCmBgYA0KDQpUaGUgZXhhbXBsZSBkYXRhc2V0IHN1cHBsaWVkIHdpdGgge2RpZH06DQoNCmBgYHtyfQ0KZGF0IDwtIG1wZHRhICAjIEluIGNhc2UgSSB3YW50ZWQgdG8gY2hhbmdlIGFueXRoaW5nLi4uDQpoZWFkKGRhdCkNCmBgYA0KDQpFc3RpbWF0ZSBBVFQoZywgdCkgZm9yIGVhY2ggZ3JvdXAgX2dfIGFuZCB0aW1lIF90XzoNCg0KYGBge3Igcm93cy5wcmludCA9IDIwfQ0KZGlkX21vZCA8LSBhdHRfZ3QoDQogIHluYW1lID0gImxlbXAiLA0KICBnbmFtZSA9ICJmaXJzdC50cmVhdCIsDQogIGNvbnRyb2xfZ3JvdXAgPSAibm90eWV0dHJlYXRlZCIsDQogIGlkbmFtZSA9ICJjb3VudHlyZWFsIiwNCiAgdG5hbWUgPSAieWVhciIsDQogIGRhdGEgPSBkYXQsDQogIGVzdF9tZXRob2QgPSAicmVnIiwNCiAgYmFzZV9wZXJpb2QgPSAidmFyeWluZyINCikNCg0KZGlkX21vZA0KYGBgDQoNCg0KTm93IG15IGF0dGVtcHQsIGluaXRpYWxseSBmb2N1c3NlZCBvbiB0aGUgZXN0aW1hdGVzLiBTdGFuZGFyZCBlcnJvcnMgY2FuIGNvbWUgbGF0ZXIhDQoNCldoYXQgbWFkZSBpbnR1aXRpdmUgc2Vuc2U6DQoNCiogV2UncmUgc3dlZXBpbmcgYWxvbmcgdGltZXMgYW5kIGdyb3VwcyAodGhlIHllYXIgd2hlbiBhIHVuaXQgaXMgZmlyc3QgdHJlYXRlZCksIGJ1aWxkaW5nIHRoZSBjYW5vbmljYWwgMsOXMiBkYXRhc2V0cyBhbmQgZXN0aW1hdGluZyB0aGUgdXN1YWwgVFdGRSBtb2RlbCBhcyB3ZSB6b29tIGFsb25nLg0KDQpIZXJlJ3Mgd2hhdCBpbml0aWFsbHkgdHJpcHBlZCBtZSB1cDoNCg0KKiBfYXR0X2d0XyB0cmVhdHMgZ3JvdXAgPSAwIGFzIG5ldmVyIHRyZWF0ZWQuIEEgYmV0dGVyIGNob2ljZSBtaWdodCBoYXZlIGJlZW4gYW55IG51bWJlciBncmVhdGVyIHRoYW4gdGhlIG1heGltdW0gdGltZSAoZS5nLiwgeWVhcikgaW4gdGhlIGRhdGFzZXQgKGluY2x1ZGluZyBfSW5mXykuDQoqIEFzIHdlIHN3ZWVwIGFsb25nIGFuZCBidWlsZCB0aGUgMsOXMiBkYXRhc2V0cywgd2UgY29uc2lkZXIgYSB1bml0ICJ0cmVhdGVkIiBpZiBpdCdzIGV2ZXIgdHJlYXRlZCBhbmQgaW4gdGhlIHNlY29uZCB0aW1lIHBvaW50LCBpLmUuLCBzbyB3ZSBjYW4gY2hlY2sgZm9yIGJhc2VsaW5lIHBhcmFsbGVsIHRyZW5kcy4NCiogSWYgd2UgYXJlIHN0aWxsIGluIHRoZSB1bnRyZWF0ZWQgcGhhc2UsIHRoZSBjb21wYXJpc29uIHRpbWUgcG9pbnQgaXMgb25lIGJlZm9yZSB0aGUgY3VycmVudCB0aW1lIHBvaW50LiBJZiB3ZSBhcmUgaW4gdGhlIHRyZWF0ZWQgcGhhc2UsIHRoZW4gdGhlIGNvbXBhcmlzb24gdGltZSBwb2ludCBpcyB0aGUgb25lIGp1c3QgYmVmb3JlIHdoZW4gdGhlIHVuaXQgZmlyc3QgcmVjZWl2ZWQgdHJlYXRtZW50Lg0KKiBBbHNvIGEgYml0IG9mIHNpbGxpbmVzcyB3aXRoIGhvdyBJIHNldHVwIHRoZSBmaXhlZCBlZmZlY3QgbW9kZWw6IEkgdHJpZWQgdG8gaW5jbHVkZSBhIG1haW4gZWZmZWN0IG9mIHRoZSB0cmVhdG1lbnQgZ3JvdXA7IGhvd2V2ZXIsIHRoYXQncyBwZXJmZWN0bHkgY29ycmVsYXRlZCB3aXRoIHRoZSB1bml0IGZpeGVkIGVmZmVjdHMsIG9idnMuDQoNCmBgYHtyfQ0KQVRUX2dyb3VwX3RpbWUgPC0gZnVuY3Rpb24odGhlX2dyb3VwLCB0aGVfeWVhcikgew0KICAjIFRoZSBmaXJzdCB0aW1lIHBvaW50IHdlIGNob29zZSBhcyB3ZSBnbyBkZXBlbmRzIG9uIHdoZXRoZXINCiAgIyB0aGUgdW5pdCBpcyB0cmVhdGVkIGluIHRoaXMgImNhbm9uaWNhbCIgMsOXMiBkYXRhc2V0DQogIGNvbXBhcmlzaW9uX3llYXIgPC0gaWZlbHNlKHRoZV95ZWFyIDwgdGhlX2dyb3VwLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICB0aGVfeWVhciAtIDEsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgIHRoZV9ncm91cCAtIDEpDQogIA0KICB0aGlzX2RhdCA8LSBkYXQgfD4NCiAgICBtdXRhdGUodHJlYXQgPSBhcy5udW1lcmljKGZpcnN0LnRyZWF0ID09IHRoZV9ncm91cCksDQogICAgICAgICAgIHRpbWUgID0gYXMubnVtZXJpYyh5ZWFyID09IHRoZV95ZWFyKSkgfD4NCiAgICBmaWx0ZXIodHJlYXQgfCAgICAgICAgICAgICAgICAjIGV2ZXIgdHJlYXRlZCAoY3VycmVudCBncm91cCkNCiAgICAgICAgICAgZmlyc3QudHJlYXQgPT0gMCB8ICAgICAjIG5ldmVyIHRyZWF0ZWQgKHdlaXJkIGNvbnZlbnRpb24gb2Yge2RpZH0pDQogICAgICAgICAgIHllYXIgPCBmaXJzdC50cmVhdCkgfD4gIyBub3QgeWV0IHRyZWF0ZWQgKG90aGVyIGdyb3VwcykNCiAgICBmaWx0ZXIoeWVhciAlaW4lIGMoY29tcGFyaXNpb25feWVhciwgdGhlX3llYXIpKQ0KICANCiAgbW9kIDwtIGxtKGxlbXAgfiAwICsgZmFjdG9yKGNvdW50eXJlYWwpICsgdGltZSArIHRpbWU6dHJlYXQsDQogICAgICAgICAgICBkYXRhID0gdGhpc19kYXQpDQogIHN1bW1hcnkobW9kKSB8Pg0KICAgIHRpZHkoKSB8Pg0KICAgIGZpbHRlcih0ZXJtID09ICJ0aW1lOnRyZWF0IikgfD4NCiAgICBtdXRhdGUoZ3JvdXAgPSB0aGVfZ3JvdXAsIC5iZWZvcmUgPSAidGVybSIpIHw+DQogICAgbXV0YXRlKHllYXIgPSB0aGVfeWVhciwgLmJlZm9yZSA9ICJ0ZXJtIikgfD4NCiAgICBtdXRhdGUoc3ViX24gPSBucm93KHRoaXNfZGF0KSkgfD4NCiAgICByZW5hbWUobXlfZXN0ID0gZXN0aW1hdGUpIHw+DQogICAgc2VsZWN0KC10ZXJtKQ0KfQ0KYGBgDQoNCk5vdyB6YXAgdGhlIHdob2xlIGRhdGFzZXQgYW5kIGNvbXBhcmUgd2l0aCB7ZGlkfSdzIGVzdGltYXRlcy4NCg0KYGBge3Igcm93cy5wcmludCA9IDIwfQ0KcmVzIDwtIG1hcDIoZGlkX21vZCRncm91cCwgZGlkX21vZCR0LCBBVFRfZ3JvdXBfdGltZSkgfD4NCiAgYmluZF9yb3dzKCkgfD4NCiAgbXV0YXRlKGRpZF9lc3QgPSBkaWRfbW9kJGF0dCwgLmFmdGVyID0gIm15X2VzdCIpIHw+DQogIG11dGF0ZShjb3JyZWN0ID0gYWJzKG15X2VzdCAtIGRpZF9lc3QpIDwgMWUtNiwgLmFmdGVyID0gImRpZF9lc3QiKQ0Kc3RvcGlmbm90KGFsbChyZXMkY29ycmVjdCkpDQpyZXMgfD4gc2VsZWN0KGdyb3VwLCB5ZWFyLCBteV9lc3QsIGRpZF9lc3QsIGNvcnJlY3QsIHN1Yl9uKQ0KYGBgDQoNCkh1cnJhaCAtIGl0IHdvcmtlZCEgSSdtIGdvaW5nIHRvIHdvcnJ5IGFib3V0IHRoZSBzdGFuZGFyZCBlcnJvcnMgYW5vdGhlciBkYXkuDQoNCg0KIyMjIFBvb2xpbmcgdGhlIEFUVChnLHQpcz8NCg0KSGVyZSBhcmUge2RpZH0ncyBlc3RpbWF0ZXMsIHBvb2xlZCBieSBncm91cDoNCg0KYGBge3J9DQpkaWRfYWdnX2dyb3VwIDwtIGFnZ3RlKGRpZF9tb2QsIHR5cGUgPSAiZ3JvdXAiKQ0KZGlkX2FnZ19ncm91cA0KYGBgDQoNCg0KV2UgY2FuIHJlcGxpY2F0ZSB0aGUgZXN0aW1hdGVkIGJ5IGp1c3QgYXZlcmFnaW5nIHRoZSBBVFQoZyx0KSBlc3RpbWF0ZXMgYWNyb3NzIHRoZSB0J3M6DQoNCmBgYHtyfQ0KcG9vbF9lc3RpbWF0ZXNfbmFpdmUgPC0gZnVuY3Rpb24oZXN0cykgew0KICBkYXRhLmZyYW1lKA0KICAgIG15X3Bvb2xlZF9lc3QgPSBtZWFuKGVzdHMpLA0KICAgIG5fdCA9IGxlbmd0aChlc3RzKQ0KICApDQp9DQoNCmFnZ19yZXMgPC0gcmVzIHw+DQogIGZpbHRlcih5ZWFyID49IGdyb3VwKSB8Pg0KICBncm91cF9ieShncm91cCkgfD4NCiAgc3VtbWFyaXNlKHBvb2xfZXN0aW1hdGVzX25haXZlKG15X2VzdCkpIHw+DQogIG11dGF0ZShkaWRfYWdnX2VzID0gZGlkX2FnZ19ncm91cCRhdHQuZWd0LCAuYWZ0ZXIgPSAibXlfcG9vbGVkX2VzdCIpIHw+DQogIG11dGF0ZShjb3JyZWN0ID0gYWJzKG15X3Bvb2xlZF9lc3QgLSBkaWRfYWdnX2VzKSA8IDFlLTYpDQpzdG9waWZub3QoYWxsKGFnZ19yZXMkY29ycmVjdCkpDQoNCmFnZ19yZXMNCmBgYA0KDQpUaGF0IHdvcmtzLg0KDQoNCg0KIyMjIFJlZmVyZW5jZXMNCg0KQ2FsbGF3YXksIEIuLCAmIFNhbnTigJlBbm5hLCBQLiBILiBDLiAoMjAyMSkuIFtEaWZmZXJlbmNlLWluLURpZmZlcmVuY2VzIHdpdGggbXVsdGlwbGUgdGltZSBwZXJpb2RzXShodHRwczovL2RvaS5vcmcvMTAuMTAxNi9qLmplY29ub20uMjAyMC4xMi4wMDEpLiBKb3VybmFsIG9mIEVjb25vbWV0cmljcywgMjI1KDIpLCAyMDDigJMyMzAuIA0KDQo=