Load in some packages:

library(conflicted)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2

Download the digits (in a zip file) from https://www.mersenne.org/primes/?press=M136279841

temp_dir  <- tempdir(check = TRUE)
temp_file <- tempfile(pattern = "primezip",
                      tmpdir = temp_dir)
download.file("https://www.mersenne.org/primes/digits/M136279841.zip",
              temp_file)

Unzip:

unzip(temp_file, files = "m136279841.txt", exdir = temp_dir)

Load it into R:

filename <- paste(temp_dir, "m136279841.txt", sep = "\\")
digits_str <- readLines(filename) |> paste(collapse = "")

Transform into a 1D vector of digits:

digits <- digits_str |>
  strsplit("") |>
  pluck(1) |>
  as.numeric()

Check the first and last three digits:

digits[1:3]
## [1] 8 8 1
digits[(length(digits) - 2):length(digits)]
## [1] 5 5 1

Count all the digits:

length(digits)
## [1] 41024320

Make a few tables and a pic:

dig_table <- table(digits) |> as.data.frame()
dig_table <- dig_table |>
  mutate(perc = 100 * Freq / sum(Freq))
dig_table
dig_table |>
  ggplot(aes(x = digits, y = Freq)) +
  geom_point() +
  labs(x = "Digit", y = "Frequency", title = "Digit frequency in M136279841") +
  theme(legend.position = "none")

dig_table |> arrange(Freq)

Are the digits all equally likely?

chisq.test(dig_table$Freq)
## 
##  Chi-squared test for given probabilities
## 
## data:  dig_table$Freq
## X-squared = 5.2656, df = 9, p-value = 0.8106
chisq.test(dig_table$Freq)$stdres
##  [1]  0.5459257  0.1014829 -0.9253154  1.6866970 -0.1790262  0.5141798
##  [7] -1.1079846  0.0166536 -0.0983603 -0.5542525

No evidence they aren’t.

How much more frequent would the 3 need to be to get a statistically significant \(\chi^2\)?

sens_check <- dig_table$Freq
the_p <- chisq.test(sens_check)$p.value
add_on <- 0

while (the_p > .05) {
  add_on <- add_on + 1
  sens_check[4] <- dig_table$Freq[4] + add_on
  the_p <- chisq.test(sens_check)$p.value
}

add_on
## [1] 4529
chisq.test(sens_check)
## 
##  Chi-squared test for given probabilities
## 
## data:  sens_check
## X-squared = 16.92, df = 9, p-value = 0.04999
chisq.test(sens_check)$stdres
##  [1]  0.3102081 -0.1342102 -1.1609518  3.8077907 -0.4147037  0.2784640
##  [7] -1.3436108 -0.2190347 -0.3340423 -0.7899093
dig_table |>
  mutate(Freq = sens_check) |>
  ggplot(aes(x = digits, y = Freq)) +
  geom_point() +
  labs(x = "Digit", y = "Frequency", title = "Mini freq for 3 needed to get a chi-square test with p < .05") +
  theme(legend.position = "none")

twogram <- function(test_vec) {
  res <- matrix(0, nrow = 10, ncol = 10)
  rownames(res) <- 0:9
  colnames(res) <- 0:9
  for (i in 1:(length(test_vec) - 1)) {
    res[test_vec[i] + 1,
        test_vec[i + 1] + 1] <- res[test_vec[i] + 1,
                                    test_vec[i + 1] + 1] + 1
  }
  res
}
two_grams <- twogram(digits)
two_grams - min(two_grams)
##      0    1    2    3    4    5    6    7    8    9
## 0 2477 1396  440 1619 2004 1535 1684 1376 1583 1317
## 1 1640  738 1958 1810 1698 1366  769 1925 1672 1000
## 2 1391 1378 1893 2165  827  123   88 1659 2009 1071
## 3 2527 1709 1791 2480 2501 2188 1238  576  733 1880
## 4  761 2450  533 1619  546  887 1029 2381 1200 2632
## 5 1516 1693 1902 1790 2541 1890 1466  766 1163  643
## 6 1216 1456  773 1036  664  987 1565 1472 1490 1594
## 7 1061  767 1716 1905 1074 2079 1348  875 2240 1349
## 8  951 2053 1598 1728  853 2858 1774 1329  659  390
## 9 1891  937    0 1471 1330 1457 1292 2055 1443 1441
chisq.test(two_grams)
## 
##  Pearson's Chi-squared test
## 
## data:  two_grams
## X-squared = 77.275, df = 81, p-value = 0.5966