ONS recently released data about sexual orientation and gender identity from Census 2021 in England and Wales

I’d like to know whether your gender predicts your sexuality. ONS hasn’t released the relevant crosstabs yet, so here’s an approximation using variation in population counts across (lower-tier) local authorities. Beware the ecological fallacy, e.g., this might show that areas with more people of a particular sexuality also have more people of a particular gender, but not necessarily that they are the same people.

library(tidyverse)
library(car)

Read in the data

sexuality <- read.csv("census2021sexuality.csv") |>
  rename(LA_code = Lower.Tier.Local.Authorities.Code,  
         LA_name = Lower.Tier.Local.Authorities,        
         sexuality_code = Sexual.orientation..9.categories..Code,
         sexuality = Sexual.orientation..9.categories.,
         n = Observation)
head(sexuality)
##     LA_code    LA_name sexuality_code                sexuality     n
## 1 E06000001 Hartlepool             -8           Does not apply     0
## 2 E06000001 Hartlepool              1 Straight or Heterosexual 68070
## 3 E06000001 Hartlepool              2           Gay or Lesbian  1121
## 4 E06000001 Hartlepool              3                 Bisexual   784
## 5 E06000001 Hartlepool              4                Pansexual   118
## 6 E06000001 Hartlepool              5                  Asexual    28
gender <- read.csv("census2021gender.csv") |>
  rename(LA_code = Lower.Tier.Local.Authorities.Code,  
         LA_name = Lower.Tier.Local.Authorities,        
         gender_code = Gender.identity..8.categories..Code,
         gender = Gender.identity..8.categories.,
         n = Observation)
head(gender)
##     LA_code    LA_name gender_code
## 1 E06000001 Hartlepool          -8
## 2 E06000001 Hartlepool           1
## 3 E06000001 Hartlepool           2
## 4 E06000001 Hartlepool           3
## 5 E06000001 Hartlepool           4
## 6 E06000001 Hartlepool           5
##                                                                                  gender
## 1                                                                        Does not apply
## 2                                   Gender identity the same as sex registered at birth
## 3 Gender identity different from sex registered at birth but no specific identity given
## 4                                                                           Trans woman
## 5                                                                             Trans man
## 6                                                                            Non-binary
##       n
## 1     0
## 2 70588
## 3   167
## 4    49
## 5    51
## 6    33

Calculate per-milles

sex_wide <- sexuality |>
  group_by(LA_code, LA_name) |>
  mutate(perc = n * 1000 / sum(n)) |>
  pivot_wider(
    id_cols = c(LA_code, LA_name),
    names_from = sexuality,
    values_from = perc
  )
gender_wide <- gender |>
  group_by(LA_code, LA_name) |>
  mutate(perc = n * 1000 / sum(n)) |>
  pivot_wider(
    id_cols = c(LA_code, LA_name),
    names_from = gender,
    values_from = perc
  )

Glue together

sex_gender <- full_join(sex_wide, gender_wide, by = c("LA_code", "LA_name")) |>
  ungroup()
head(sex_gender)
## # A tibble: 6 × 19
##   LA_code LA_name Does …¹ Strai…² Gay o…³ Bisex…⁴ Panse…⁵ Asexual  Queer All o…⁶
##   <chr>   <chr>     <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>  <dbl>   <dbl>
## 1 E06000… Hartle…       0    911.    15.0   10.5     1.58   0.375 0.0937  0.0402
## 2 E06000… Middle…       0    895.    15.7   12.2     2.76   0.702 0.158   0.255 
## 3 E06000… Redcar…       0    913.    13.3    8.86    1.49   0.561 0.0890  0.0712
## 4 E06000… Stockt…       0    916.    14.4    9.31    1.56   0.329 0.0949  0.0886
## 5 E06000… Darlin…       0    914.    15.1   11.2     1.85   0.533 0.0794  0.0794
## 6 E06000… Halton        0    919.    15.0    9.39    1.56   0.308 0.0385  0.0673
## # … with 9 more variables: `Not answered.x` <dbl>, `Does not apply.y` <dbl>,
## #   `Gender identity the same as sex registered at birth` <dbl>,
## #   `Gender identity different from sex registered at birth but no specific identity given` <dbl>,
## #   `Trans woman` <dbl>, `Trans man` <dbl>, `Non-binary` <dbl>,
## #   `All other gender identities` <dbl>, `Not answered.y` <dbl>, and
## #   abbreviated variable names ¹​`Does not apply.x`,
## #   ²​`Straight or Heterosexual`, ³​`Gay or Lesbian`, ⁴​Bisexual, ⁵​Pansexual, …
names(sex_gender)
##  [1] "LA_code"                                                                              
##  [2] "LA_name"                                                                              
##  [3] "Does not apply.x"                                                                     
##  [4] "Straight or Heterosexual"                                                             
##  [5] "Gay or Lesbian"                                                                       
##  [6] "Bisexual"                                                                             
##  [7] "Pansexual"                                                                            
##  [8] "Asexual"                                                                              
##  [9] "Queer"                                                                                
## [10] "All other sexual orientations"                                                        
## [11] "Not answered.x"                                                                       
## [12] "Does not apply.y"                                                                     
## [13] "Gender identity the same as sex registered at birth"                                  
## [14] "Gender identity different from sex registered at birth but no specific identity given"
## [15] "Trans woman"                                                                          
## [16] "Trans man"                                                                            
## [17] "Non-binary"                                                                           
## [18] "All other gender identities"                                                          
## [19] "Not answered.y"

OLS

lm(
  `Non-binary` ~ `Straight or Heterosexual` +
    `Gay or Lesbian` +
    `Bisexual` + `Pansexual` + Queer + Asexual,
  data = sex_gender) |>
  confint() |> round(2)
##                            2.5 % 97.5 %
## (Intercept)                -3.94  -1.30
## `Straight or Heterosexual`  0.00   0.00
## `Gay or Lesbian`            0.00   0.00
## Bisexual                    0.03   0.05
## Pansexual                  -0.02   0.03
## Queer                       0.44   0.60
## Asexual                     0.28   0.45

LAs with more queer and asexual people have more nonbinary people.

lm(
  `Trans man` ~ `Straight or Heterosexual` +
    `Gay or Lesbian` +
    `Bisexual` + `Pansexual` + Queer + Asexual,
  data = sex_gender) |>
  confint() |> round(2)
##                            2.5 % 97.5 %
## (Intercept)                 1.63   6.00
## `Straight or Heterosexual` -0.01   0.00
## `Gay or Lesbian`           -0.01   0.00
## Bisexual                   -0.02   0.01
## Pansexual                   0.42   0.51
## Queer                      -0.69  -0.43
## Asexual                    -0.36  -0.08

Areas with more pansexual people and fewer queer and asexual people have more trans men.

lm(
  `Trans woman` ~ `Straight or Heterosexual` +
    `Gay or Lesbian` +
    `Bisexual` + `Pansexual` + Queer + Asexual,
  data = sex_gender) |>
  confint() |> round(2)
##                            2.5 % 97.5 %
## (Intercept)                 3.38   7.33
## `Straight or Heterosexual` -0.01   0.00
## `Gay or Lesbian`           -0.01   0.00
## Bisexual                   -0.03   0.00
## Pansexual                   0.34   0.42
## Queer                      -0.49  -0.25
## Asexual                    -0.22   0.03

Areas with more pansexual people and fewer queer and asexual people also have more trans women.

A picture

Let’s try to get this in one diagram.

library(MPsychoR)
library(qgraph)
forcors <- sex_gender |>
  select(`Straight or Heterosexual`:`All other gender identities`) |>
  select(-c(ends_with(".x"), ends_with(".y"))) |>
  rename(
    Het = "Straight or Heterosexual",
    GL  = "Gay or Lesbian",
    Bi  = "Bisexual",
    Pan = "Pansexual",
    Ace = "Asexual",
    Q   = "Queer",
    Osx = "All other sexual orientations",
    Cis = "Gender identity the same as sex registered at birth",
    NCI = "Gender identity different from sex registered at birth but no specific identity given",
    TM  = "Trans man",
    TW  = "Trans woman",
    NBi = "Non-binary",
    Ogn = "All other gender identities"
  ) |>
  select(-c(Osx,NCI,Ogn))
names(forcors)
##  [1] "Het" "GL"  "Bi"  "Pan" "Ace" "Q"   "Cis" "TW"  "TM"  "NBi"
cors <- cor(forcors)
qgraph(
  cors,
  palette = "ggplot2",
  layout = "spring",
  graph = "glasso",
  sampleSize = nrow(sex_gender),
  threshold = TRUE
)
## Note: Network with lowest lambda selected as best network: assumption of sparsity might be violated.