I was curious if there’s a positive correlation between the total number of users on a server and how many followers I have from that server.

Include some packages…

library(rtoot)
library(stringr)
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────────── tidyverse 2.0.0 ──
âś” dplyr     1.1.2     âś” purrr     1.0.1
âś” forcats   1.0.0     âś” readr     2.1.4
âś” ggplot2   3.4.2     âś” tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.0── Conflicts ────────────────────────────────────────────── tidyverse_conflicts() ──
âś– dplyr::filter() masks stats::filter()
âś– dplyr::lag()    masks stats::lag()
â„ą Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors

Get {rtoot} authorised to talk to my server:

auth_setup(instance = "tech.lgbt", type ="user")

I’ll need my ID: who am I?

acc <- search_accounts("@Andi@tech.lgbt")
acc |>
  select(id, acct, display_name) |>
  head(1)

It me!

whoami <- "109273348690338129"

There’s a handy function in {rtoot} for getting all followers; however, it doesn’t (or didn’t, end of 2022) support auto-pagination. After reading the friendly manual, here’s a workaround:

really_get_all_followers <- function(id, sure = "No!") {
  stopifnot(sure == "Yes, I know what I am doing")
  
  followers <- c()
  still_working <- TRUE
  max_id <- NULL
  
  while (still_working) {
    next_lot <- get_account_followers(id,
                                      max_id = max_id)
    followers <- bind_rows(followers, next_lot)
  
    attrs <- attr(next_lot, "headers")
    if ("max_id" %in% names(attrs))
      max_id <- attrs$max_id
    else
      still_working <- FALSE
  }
  
  followers
}

Get my followers:

my_followers <- really_get_all_followers(
  whoami,
  sure = "Yes, I know what I am doing"
)

This number is correct: it worked!

nrow(my_followers)
[1] 494

What servers are they from?

get_servers <- function(followers) {
  servers <- followers$acct |> str_split_fixed("@", 2)
  servers[,2]
}
followers_servers <- my_followers |>
  mutate(server = get_servers(my_followers)) |>
  mutate(server = ifelse(server == "", "tech.lgbt", server))

Here are the counts:

server_count <- followers_servers |>
  group_by(server) |>
  summarise(n = n()) |>
  arrange(desc(n))

server_count

Check everything adds up:

server_count$n |> sum()
[1] 494

So far so good.

Next up, how many users are there on each of those servers? Note the exception handling…

get_user_count <- function(server) {
  res <- NA
  
  # This will catch problems like missing servers
  tryCatch(
    res <- get_instance_general(server)$stats$user_count,
    error = function(e) {
        cat("***")
        cat(server)
        cat("***")
        cat("\n")
        print(e)
        cat("\n")
      }
  )
  ifelse(is.numeric(res), ifelse(length(res) == 1, res, NA), NA)
}
server_count$server_user_n <- map(server_count$server, get_user_count)
***bbs.crumplab.com***
<simpleError in curl::curl_fetch_memory(url, handle = handle): SSL peer certificate or SSH remote key was not OK: [bbs.crumplab.com] schannel: SEC_E_UNTRUSTED_ROOT (0x80090325) - The certificate chain was issued by an authority that is not trusted.>

***bv.umbrellix.org***
<simpleError: something went wrong. Status code: 503>

***bytebuilders.uk***
<simpleError in curl::curl_fetch_memory(url, handle = handle): SSL peer certificate or SSH remote key was not OK: [bytebuilders.uk] schannel: SEC_E_UNTRUSTED_ROOT (0x80090325) - The certificate chain was issued by an authority that is not trusted.>

***calckey.social***
<simpleError: something went wrong. Status code: 500>

***fedi.astrid.tech***
<simpleError in curl::curl_fetch_memory(url, handle = handle): Timeout was reached: [fedi.astrid.tech] Connection timeout after 10012 ms>

***firefish.social***
<simpleError: something went wrong. Status code: 500>

***iscurrently.live***
<simpleError: something went wrong. Status code: 522>
No encoding supplied: defaulting to UTF-8.
***mythago.space***
<simpleError in curl::curl_fetch_memory(url, handle = handle): SSL peer certificate or SSH remote key was not OK: [mythago.space] schannel: SEC_E_UNTRUSTED_ROOT (0x80090325) - The certificate chain was issued by an authority that is not trusted.>

***social.ebusinessworkshop.co.uk***
<simpleError in curl::curl_fetch_memory(url, handle = handle): Could not resolve host: social.ebusinessworkshop.co.uk>

***toot.theresnotime.io***
<simpleError in curl::curl_fetch_memory(url, handle = handle): schannel: next InitializeSecurityContext failed: SEC_E_ILLEGAL_MESSAGE (0x80090326) - This error usually occurs when a fatal SSL/TLS alert is received (e.g. handshake failed). More detail may be available in the Windows System event log.>
server_count

Hmmm something went wrong… Quick fix:

server_count$server_user_n2 <-
  server_count$server_user_n |> sapply(\(x) ifelse(length(x) == 1, x[[1]], NA))
server_count

A couple of histograms:

server_count |>
  select(n, server_user_n2) |>
  na.omit()  |>
  pivot_longer(cols = everything(),
               names_to  = "key",
               values_to = "value") |>
  mutate(nice_name = case_when(key == "n" ~ "Followers",
                               key == "server_user_n2" ~ "Users on server")) |>
  ggplot(aes(value)) +
    facet_wrap(~ nice_name, scales = "free") +
    geom_histogram(bins = 40) +
    labs(x = "Users", y = "Freq")

A scatterplot:

server_count |>
  na.omit() |>
  mutate(home = ifelse(server == "tech.lgbt",
                                 "My home server",
                                 "Elsewhere"),
         home = factor(home,
                       c("My home server", "Elsewhere"))) |>
  ggplot(aes(y = log(n, 10),
             x = log(server_user_n2, 10),
             colour = home)) +
  geom_point() +
  scale_colour_manual(values = c("magenta", "black")) +
  #theme_bw() +
  labs(y = expression(log[10]~(followers)),
       x = expression(log[10]~(total~server~users)),
       title = "Follower count by server",
       colour = "")

There is indeed a correlation:

cor.test(~ n + server_user_n2, data = server_count, method = "kendall")

    Kendall's rank correlation tau

data:  n and server_user_n2
z = 6.0364, p-value = 1.576e-09
alternative hypothesis: true tau is not equal to 0
sample estimates:
      tau 
0.3970694 

Last run (or at least knitted) Mon Aug 7 21:42:09 2023.

LS0tDQp0aXRsZTogIlBsYXlpbmcgd2l0aCB7cnRvb3R9Ig0KYXV0aG9yOiAiQGFuZGlAdGVjaC5sZ2J0Ig0KZGF0ZTogMTMgTm92IDIwMjINCm91dHB1dDogDQogIGh0bWxfbm90ZWJvb2s6IA0KICAgIGNvZGVfZm9sZGluZzogbm9uZQ0KLS0tDQoNCkkgd2FzIGN1cmlvdXMgaWYgdGhlcmUncyBhIHBvc2l0aXZlIGNvcnJlbGF0aW9uIGJldHdlZW4gdGhlIHRvdGFsIG51bWJlciBvZiB1c2VycyBvbiBhIHNlcnZlciBhbmQgaG93IG1hbnkgZm9sbG93ZXJzIEkgaGF2ZSBmcm9tIHRoYXQgc2VydmVyLg0KDQpJbmNsdWRlIHNvbWUgcGFja2FnZXMuLi4NCg0KYGBge3J9DQpsaWJyYXJ5KHJ0b290KQ0KbGlicmFyeShzdHJpbmdyKQ0KbGlicmFyeSh0aWR5dmVyc2UpDQpgYGANCg0KDQpHZXQge3J0b290fSBhdXRob3Jpc2VkIHRvIHRhbGsgdG8gbXkgc2VydmVyOg0KDQpgYGB7ciBldmFsPUZBTFNFfQ0KYXV0aF9zZXR1cChpbnN0YW5jZSA9ICJ0ZWNoLmxnYnQiLCB0eXBlID0idXNlciIpDQpgYGANCg0KSSdsbCBuZWVkIG15IElEOiB3aG8gYW0gST8NCg0KYGBge3J9DQphY2MgPC0gc2VhcmNoX2FjY291bnRzKCJAQW5kaUB0ZWNoLmxnYnQiKQ0KYWNjIHw+DQogIHNlbGVjdChpZCwgYWNjdCwgZGlzcGxheV9uYW1lKSB8Pg0KICBoZWFkKDEpDQpgYGANCg0KSXQgbWUhDQoNCmBgYHtyfQ0Kd2hvYW1pIDwtICIxMDkyNzMzNDg2OTAzMzgxMjkiDQpgYGANCg0KDQpUaGVyZSdzIGEgaGFuZHkgZnVuY3Rpb24gaW4ge3J0b290fSBmb3IgZ2V0dGluZyBhbGwgZm9sbG93ZXJzOyBob3dldmVyLCBpdCBkb2Vzbid0IChvciBkaWRuJ3QsIGVuZCBvZiAyMDIyKSBzdXBwb3J0IGF1dG8tcGFnaW5hdGlvbi4gQWZ0ZXIgW3JlYWRpbmcgdGhlIGZyaWVuZGx5IG1hbnVhbF0oaHR0cHM6Ly9naXRodWIuY29tL3NjaG9jaGFzdGljcy9ydG9vdC93aWtpL1BhZ2luYXRpb24pLCBoZXJlJ3MgYSB3b3JrYXJvdW5kOg0KDQpgYGB7cn0NCnJlYWxseV9nZXRfYWxsX2ZvbGxvd2VycyA8LSBmdW5jdGlvbihpZCwgc3VyZSA9ICJObyEiKSB7DQogIHN0b3BpZm5vdChzdXJlID09ICJZZXMsIEkga25vdyB3aGF0IEkgYW0gZG9pbmciKQ0KICANCiAgZm9sbG93ZXJzIDwtIGMoKQ0KICBzdGlsbF93b3JraW5nIDwtIFRSVUUNCiAgbWF4X2lkIDwtIE5VTEwNCiAgDQogIHdoaWxlIChzdGlsbF93b3JraW5nKSB7DQogICAgbmV4dF9sb3QgPC0gZ2V0X2FjY291bnRfZm9sbG93ZXJzKGlkLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBtYXhfaWQgPSBtYXhfaWQpDQogICAgZm9sbG93ZXJzIDwtIGJpbmRfcm93cyhmb2xsb3dlcnMsIG5leHRfbG90KQ0KICANCiAgICBhdHRycyA8LSBhdHRyKG5leHRfbG90LCAiaGVhZGVycyIpDQogICAgaWYgKCJtYXhfaWQiICVpbiUgbmFtZXMoYXR0cnMpKQ0KICAgICAgbWF4X2lkIDwtIGF0dHJzJG1heF9pZA0KICAgIGVsc2UNCiAgICAgIHN0aWxsX3dvcmtpbmcgPC0gRkFMU0UNCiAgfQ0KICANCiAgZm9sbG93ZXJzDQp9DQpgYGANCg0KDQpHZXQgbXkgZm9sbG93ZXJzOg0KDQpgYGB7cn0NCm15X2ZvbGxvd2VycyA8LSByZWFsbHlfZ2V0X2FsbF9mb2xsb3dlcnMoDQogIHdob2FtaSwNCiAgc3VyZSA9ICJZZXMsIEkga25vdyB3aGF0IEkgYW0gZG9pbmciDQopDQpgYGANCg0KDQpUaGlzIG51bWJlciBpcyBjb3JyZWN0OiBpdCB3b3JrZWQhDQoNCmBgYHtyfQ0KbnJvdyhteV9mb2xsb3dlcnMpDQpgYGANCg0KDQpXaGF0IHNlcnZlcnMgYXJlIHRoZXkgZnJvbT8NCg0KYGBge3J9DQpnZXRfc2VydmVycyA8LSBmdW5jdGlvbihmb2xsb3dlcnMpIHsNCiAgc2VydmVycyA8LSBmb2xsb3dlcnMkYWNjdCB8PiBzdHJfc3BsaXRfZml4ZWQoIkAiLCAyKQ0KICBzZXJ2ZXJzWywyXQ0KfQ0KYGBgDQoNCg0KYGBge3J9DQpmb2xsb3dlcnNfc2VydmVycyA8LSBteV9mb2xsb3dlcnMgfD4NCiAgbXV0YXRlKHNlcnZlciA9IGdldF9zZXJ2ZXJzKG15X2ZvbGxvd2VycykpIHw+DQogIG11dGF0ZShzZXJ2ZXIgPSBpZmVsc2Uoc2VydmVyID09ICIiLCAidGVjaC5sZ2J0Iiwgc2VydmVyKSkNCmBgYA0KDQpIZXJlIGFyZSB0aGUgY291bnRzOg0KDQpgYGB7cn0NCnNlcnZlcl9jb3VudCA8LSBmb2xsb3dlcnNfc2VydmVycyB8Pg0KICBncm91cF9ieShzZXJ2ZXIpIHw+DQogIHN1bW1hcmlzZShuID0gbigpKSB8Pg0KICBhcnJhbmdlKGRlc2MobikpDQoNCnNlcnZlcl9jb3VudA0KYGBgDQoNCkNoZWNrIGV2ZXJ5dGhpbmcgYWRkcyB1cDoNCg0KYGBge3J9DQpzZXJ2ZXJfY291bnQkbiB8PiBzdW0oKQ0KYGBgDQoNClNvIGZhciBzbyBnb29kLg0KDQpOZXh0IHVwLCBob3cgbWFueSB1c2VycyBhcmUgdGhlcmUgb24gZWFjaCBvZiB0aG9zZSBzZXJ2ZXJzPyBOb3RlIHRoZSBleGNlcHRpb24gaGFuZGxpbmcuLi4NCg0KYGBge3J9DQpnZXRfdXNlcl9jb3VudCA8LSBmdW5jdGlvbihzZXJ2ZXIpIHsNCiAgcmVzIDwtIE5BDQogIA0KICAjIFRoaXMgd2lsbCBjYXRjaCBwcm9ibGVtcyBsaWtlIG1pc3Npbmcgc2VydmVycw0KICB0cnlDYXRjaCgNCiAgICByZXMgPC0gZ2V0X2luc3RhbmNlX2dlbmVyYWwoc2VydmVyKSRzdGF0cyR1c2VyX2NvdW50LA0KICAgIGVycm9yID0gZnVuY3Rpb24oZSkgew0KICAgICAgICBjYXQoIioqKiIpDQogICAgICAgIGNhdChzZXJ2ZXIpDQogICAgICAgIGNhdCgiKioqIikNCiAgICAgICAgY2F0KCJcbiIpDQogICAgICAgIHByaW50KGUpDQogICAgICAgIGNhdCgiXG4iKQ0KICAgICAgfQ0KICApDQogIGlmZWxzZShpcy5udW1lcmljKHJlcyksIGlmZWxzZShsZW5ndGgocmVzKSA9PSAxLCByZXMsIE5BKSwgTkEpDQp9DQpgYGANCg0KDQpgYGB7cn0NCnNlcnZlcl9jb3VudCRzZXJ2ZXJfdXNlcl9uIDwtIG1hcChzZXJ2ZXJfY291bnQkc2VydmVyLCBnZXRfdXNlcl9jb3VudCkNCnNlcnZlcl9jb3VudA0KYGBgDQoNCkhtbW0gc29tZXRoaW5nIHdlbnQgd3JvbmcuLi4gUXVpY2sgZml4Og0KDQpgYGB7cn0NCnNlcnZlcl9jb3VudCRzZXJ2ZXJfdXNlcl9uMiA8LQ0KICBzZXJ2ZXJfY291bnQkc2VydmVyX3VzZXJfbiB8PiBzYXBwbHkoXCh4KSBpZmVsc2UobGVuZ3RoKHgpID09IDEsIHhbWzFdXSwgTkEpKQ0KYGBgDQoNCg0KYGBge3J9DQpzZXJ2ZXJfY291bnQNCmBgYA0KDQoNCkEgY291cGxlIG9mIGhpc3RvZ3JhbXM6DQoNCmBgYHtyfQ0Kc2VydmVyX2NvdW50IHw+DQogIHNlbGVjdChuLCBzZXJ2ZXJfdXNlcl9uMikgfD4NCiAgbmEub21pdCgpICB8Pg0KICBwaXZvdF9sb25nZXIoY29scyA9IGV2ZXJ5dGhpbmcoKSwNCiAgICAgICAgICAgICAgIG5hbWVzX3RvICA9ICJrZXkiLA0KICAgICAgICAgICAgICAgdmFsdWVzX3RvID0gInZhbHVlIikgfD4NCiAgbXV0YXRlKG5pY2VfbmFtZSA9IGNhc2Vfd2hlbihrZXkgPT0gIm4iIH4gIkZvbGxvd2VycyIsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAga2V5ID09ICJzZXJ2ZXJfdXNlcl9uMiIgfiAiVXNlcnMgb24gc2VydmVyIikpIHw+DQogIGdncGxvdChhZXModmFsdWUpKSArDQogICAgZmFjZXRfd3JhcCh+IG5pY2VfbmFtZSwgc2NhbGVzID0gImZyZWUiKSArDQogICAgZ2VvbV9oaXN0b2dyYW0oYmlucyA9IDQwKSArDQogICAgbGFicyh4ID0gIlVzZXJzIiwgeSA9ICJGcmVxIikNCmBgYA0KDQoNCg0KQSBzY2F0dGVycGxvdDoNCg0KYGBge3J9DQpzZXJ2ZXJfY291bnQgfD4NCiAgbmEub21pdCgpIHw+DQogIG11dGF0ZShob21lID0gaWZlbHNlKHNlcnZlciA9PSAidGVjaC5sZ2J0IiwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICJNeSBob21lIHNlcnZlciIsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAiRWxzZXdoZXJlIiksDQogICAgICAgICBob21lID0gZmFjdG9yKGhvbWUsDQogICAgICAgICAgICAgICAgICAgICAgIGMoIk15IGhvbWUgc2VydmVyIiwgIkVsc2V3aGVyZSIpKSkgfD4NCiAgZ2dwbG90KGFlcyh5ID0gbG9nKG4sIDEwKSwNCiAgICAgICAgICAgICB4ID0gbG9nKHNlcnZlcl91c2VyX24yLCAxMCksDQogICAgICAgICAgICAgY29sb3VyID0gaG9tZSkpICsNCiAgZ2VvbV9wb2ludCgpICsNCiAgc2NhbGVfY29sb3VyX21hbnVhbCh2YWx1ZXMgPSBjKCJtYWdlbnRhIiwgImJsYWNrIikpICsNCiAgI3RoZW1lX2J3KCkgKw0KICBsYWJzKHkgPSBleHByZXNzaW9uKGxvZ1sxMF1+KGZvbGxvd2VycykpLA0KICAgICAgIHggPSBleHByZXNzaW9uKGxvZ1sxMF1+KHRvdGFsfnNlcnZlcn51c2VycykpLA0KICAgICAgIHRpdGxlID0gIkZvbGxvd2VyIGNvdW50IGJ5IHNlcnZlciIsDQogICAgICAgY29sb3VyID0gIiIpDQpgYGANCg0KVGhlcmUgaXMgaW5kZWVkIGEgY29ycmVsYXRpb246DQoNCmBgYHtyfQ0KY29yLnRlc3QofiBuICsgc2VydmVyX3VzZXJfbjIsIGRhdGEgPSBzZXJ2ZXJfY291bnQsIG1ldGhvZCA9ICJrZW5kYWxsIikNCmBgYA0KDQpMYXN0IHJ1biAob3IgYXQgbGVhc3Qga25pdHRlZCkgYHIgZGF0ZSgpYC4NCg0KDQo=