library(conflicted)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.2.0 ──
## ✔ broom 1.0.6 ✔ rsample 1.2.1
## ✔ dials 1.3.0 ✔ tune 1.2.1
## ✔ infer 1.0.7 ✔ workflows 1.1.4
## ✔ modeldata 1.4.0 ✔ workflowsets 1.1.0
## ✔ parsnip 1.2.1 ✔ yardstick 1.3.1
## ✔ recipes 1.1.0
This one is from https://www.linkedin.com/pulse/main-chance-stephen-senn-c0u2e/
senn_dat <- read.csv(text = "treat,sex,events,patients
Ctr,F,18,100
Int,F,5,100
Ctr,M,5,50
Int,M,4,50") |>
mutate(sex = as.factor(sex), treat = as.factor(treat))
senn_dat
no_interaction <- read.csv(text = "treat,sex,events,patients
Ctr,F,20,100
Int,F,4,100
Ctr,M,10,50
Int,M,2,50") |>
mutate(sex = as.factor(sex), treat = as.factor(treat))
no_interaction
strong_interaction <- read.csv(text = "treat,sex,events,patients
Ctr,F,20,100
Int,F,4,100
Ctr,M,2,50
Int,M,10,50") |>
mutate(sex = as.factor(sex), treat = as.factor(treat))
strong_interaction
dat <- senn_dat
Statistician A:
modA <- glm(cbind(events, patients - events) ~ treat * sex,
data = dat,
family = binomial)
summary(modA)
##
## Call:
## glm(formula = cbind(events, patients - events) ~ treat * sex,
## family = binomial, data = dat)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.5163 0.2603 -5.826 5.69e-09 ***
## treatInt -1.4281 0.5275 -2.707 0.00679 **
## sexM -0.6809 0.5385 -1.264 0.20608
## treatInt:sexM 1.1830 0.8788 1.346 0.17825
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 9.3264e+00 on 3 degrees of freedom
## Residual deviance: 4.3077e-14 on 0 degrees of freedom
## AIC: 22.527
##
## Number of Fisher Scoring iterations: 3
Statistician B:
modB <- glm(
cbind(events, patients - events) ~ treat * relevel(sex, "M"),
data = dat,
family = binomial
)
summary(modB)
##
## Call:
## glm(formula = cbind(events, patients - events) ~ treat * relevel(sex,
## "M"), family = binomial, data = dat)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.1972 0.4714 -4.661 3.15e-06 ***
## treatInt -0.2451 0.7028 -0.349 0.727
## relevel(sex, "M")F 0.6809 0.5385 1.264 0.206
## treatInt:relevel(sex, "M")F -1.1830 0.8788 -1.346 0.178
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 9.3264e+00 on 3 degrees of freedom
## Residual deviance: 1.2434e-14 on 0 degrees of freedom
## AIC: 22.527
##
## Number of Fisher Scoring iterations: 3
Statistician C:
dat$sexC <- dat$sex
contrasts(dat$sexC) <- c(-1, 1)
contrasts(dat$sexC)
## [,1]
## F -1
## M 1
modC <- glm(
cbind(events, patients - events) ~ treat * sexC,
data = dat,
family = binomial
)
summary(modC)
##
## Call:
## glm(formula = cbind(events, patients - events) ~ treat * sexC,
## family = binomial, data = dat)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.8568 0.2692 -6.896 5.34e-12 ***
## treatInt -0.8366 0.4394 -1.904 0.0569 .
## sexC1 -0.3404 0.2692 -1.264 0.2061
## treatInt:sexC1 0.5915 0.4394 1.346 0.1782
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 9.3264e+00 on 3 degrees of freedom
## Residual deviance: 2.2204e-14 on 0 degrees of freedom
## AIC: 22.527
##
## Number of Fisher Scoring iterations: 3
Statistician D:
modD_M <- glm(
cbind(events, patients - events) ~ treat,
data = dat,
family = binomial,
subset = sex == "M"
)
modD_F <- glm(
cbind(events, patients - events) ~ treat,
data = dat,
family = binomial,
subset = sex == "F"
)
res <- bind_rows(tidy(modD_M) |> mutate(sex = "M"),
tidy(modD_F) |> mutate(sex = "F")) |>
dplyr::filter(term == "treatInt") |>
mutate(var = std.error ^ 2,
weight = 1 / var) |>
dplyr::select(-c(statistic, p.value))
res
Pooled effect, SE, and z
pooled_mod <- tibble(
term = "treatInt",
estimate = sum(res$estimate * res$weight) / sum(res$weight),
std.error = sqrt(1/sum(res$weight)),
statistic = estimate / std.error,
p.value = pnorm(abs(statistic), lower.tail = FALSE) * 2
)
pooled_mod
bind_rows(
tidy(modA) |> mutate(mod = "A"),
tidy(modB) |> mutate(mod = "B"),
tidy(modC) |> mutate(mod = "C"),
pooled_mod |> mutate(mod = "D")
) |>
dplyr::filter(term == "treatInt") |>
select(mod, estimate, std.error, statistic, p.value) |>
mutate(across(where(is.numeric), \(x) round(x,3)))
Check the interaction for model A, B, or C – all give the same answer:
bind_rows(
drop1(modA, test = "Chi") |> tidy() |> mutate(mod = "A"),
drop1(modB, test = "Chi") |> tidy() |> mutate(mod = "B"),
drop1(modC, test = "Chi") |> tidy() |> mutate(mod = "C")
) |>
dplyr::filter(term != "<none>") |>
dplyr::select(mod, df, LRT, p.value)
## Warning in tidy.anova(drop1(modA, test = "Chi")): The following column names in
## ANOVA output were not recognized or transformed: LRT
## Warning in tidy.anova(drop1(modB, test = "Chi")): The following column names in
## ANOVA output were not recognized or transformed: LRT
## Warning in tidy.anova(drop1(modC, test = "Chi")): The following column names in
## ANOVA output were not recognized or transformed: LRT
When looking only at main effects of treatment, it doesn’t matter how sex is coded:
modG1 <- glm(
cbind(events, patients - events) ~ treat + relevel(sex, "F"),
data = dat,
family = binomial
)
modG2 <- glm(
cbind(events, patients - events) ~ treat + relevel(sex, "M"),
data = dat,
family = binomial
)
modG3 <- glm(
cbind(events, patients - events) ~ treat + sexC,
data = dat,
family = binomial
)
bind_rows(modG1 |> tidy() |> mutate(mod = "G1"),
modG2 |> tidy() |> mutate(mod = "G2"),
modG3 |> tidy() |> mutate(mod = "G3")) |>
mutate(term = ifelse(str_detect(term, "sex"), "sex", term)) |>
mutate(across(where(is.numeric), \(x) round(x, 3))) |>
select(mod, term, estimate, std.error) |>
arrange(term)