ONS recently released data about sexual orientation and gender identity from Census 2021 in England and Wales
I’d like to know whether your gender predicts your sexuality. ONS hasn’t released the relevant crosstabs yet, so here’s an approximation using variation in population counts across (lower-tier) local authorities. Beware the ecological fallacy, e.g., this might show that areas with more people of a particular sexuality also have more people of a particular gender, but not necessarily that they are the same people.
library(tidyverse)
library(car)
sexuality <- read.csv("census2021sexuality.csv") |>
rename(LA_code = Lower.Tier.Local.Authorities.Code,
LA_name = Lower.Tier.Local.Authorities,
sexuality_code = Sexual.orientation..9.categories..Code,
sexuality = Sexual.orientation..9.categories.,
n = Observation)
head(sexuality)
## LA_code LA_name sexuality_code sexuality n
## 1 E06000001 Hartlepool -8 Does not apply 0
## 2 E06000001 Hartlepool 1 Straight or Heterosexual 68070
## 3 E06000001 Hartlepool 2 Gay or Lesbian 1121
## 4 E06000001 Hartlepool 3 Bisexual 784
## 5 E06000001 Hartlepool 4 Pansexual 118
## 6 E06000001 Hartlepool 5 Asexual 28
gender <- read.csv("census2021gender.csv") |>
rename(LA_code = Lower.Tier.Local.Authorities.Code,
LA_name = Lower.Tier.Local.Authorities,
gender_code = Gender.identity..8.categories..Code,
gender = Gender.identity..8.categories.,
n = Observation)
head(gender)
## LA_code LA_name gender_code
## 1 E06000001 Hartlepool -8
## 2 E06000001 Hartlepool 1
## 3 E06000001 Hartlepool 2
## 4 E06000001 Hartlepool 3
## 5 E06000001 Hartlepool 4
## 6 E06000001 Hartlepool 5
## gender
## 1 Does not apply
## 2 Gender identity the same as sex registered at birth
## 3 Gender identity different from sex registered at birth but no specific identity given
## 4 Trans woman
## 5 Trans man
## 6 Non-binary
## n
## 1 0
## 2 70588
## 3 167
## 4 49
## 5 51
## 6 33
sex_wide <- sexuality |>
group_by(LA_code, LA_name) |>
mutate(perc = n * 1000 / sum(n)) |>
pivot_wider(
id_cols = c(LA_code, LA_name),
names_from = sexuality,
values_from = perc
)
gender_wide <- gender |>
group_by(LA_code, LA_name) |>
mutate(perc = n * 1000 / sum(n)) |>
pivot_wider(
id_cols = c(LA_code, LA_name),
names_from = gender,
values_from = perc
)
sex_gender <- full_join(sex_wide, gender_wide, by = c("LA_code", "LA_name")) |>
ungroup()
head(sex_gender)
## # A tibble: 6 × 19
## LA_code LA_name Does …¹ Strai…² Gay o…³ Bisex…⁴ Panse…⁵ Asexual Queer All o…⁶
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 E06000… Hartle… 0 911. 15.0 10.5 1.58 0.375 0.0937 0.0402
## 2 E06000… Middle… 0 895. 15.7 12.2 2.76 0.702 0.158 0.255
## 3 E06000… Redcar… 0 913. 13.3 8.86 1.49 0.561 0.0890 0.0712
## 4 E06000… Stockt… 0 916. 14.4 9.31 1.56 0.329 0.0949 0.0886
## 5 E06000… Darlin… 0 914. 15.1 11.2 1.85 0.533 0.0794 0.0794
## 6 E06000… Halton 0 919. 15.0 9.39 1.56 0.308 0.0385 0.0673
## # … with 9 more variables: `Not answered.x` <dbl>, `Does not apply.y` <dbl>,
## # `Gender identity the same as sex registered at birth` <dbl>,
## # `Gender identity different from sex registered at birth but no specific identity given` <dbl>,
## # `Trans woman` <dbl>, `Trans man` <dbl>, `Non-binary` <dbl>,
## # `All other gender identities` <dbl>, `Not answered.y` <dbl>, and
## # abbreviated variable names ¹`Does not apply.x`,
## # ²`Straight or Heterosexual`, ³`Gay or Lesbian`, ⁴Bisexual, ⁵Pansexual, …
names(sex_gender)
## [1] "LA_code"
## [2] "LA_name"
## [3] "Does not apply.x"
## [4] "Straight or Heterosexual"
## [5] "Gay or Lesbian"
## [6] "Bisexual"
## [7] "Pansexual"
## [8] "Asexual"
## [9] "Queer"
## [10] "All other sexual orientations"
## [11] "Not answered.x"
## [12] "Does not apply.y"
## [13] "Gender identity the same as sex registered at birth"
## [14] "Gender identity different from sex registered at birth but no specific identity given"
## [15] "Trans woman"
## [16] "Trans man"
## [17] "Non-binary"
## [18] "All other gender identities"
## [19] "Not answered.y"
lm(
`Non-binary` ~ `Straight or Heterosexual` +
`Gay or Lesbian` +
`Bisexual` + `Pansexual` + Queer + Asexual,
data = sex_gender) |>
confint() |> round(2)
## 2.5 % 97.5 %
## (Intercept) -3.94 -1.30
## `Straight or Heterosexual` 0.00 0.00
## `Gay or Lesbian` 0.00 0.00
## Bisexual 0.03 0.05
## Pansexual -0.02 0.03
## Queer 0.44 0.60
## Asexual 0.28 0.45
LAs with more queer and asexual people have more nonbinary people.
lm(
`Trans man` ~ `Straight or Heterosexual` +
`Gay or Lesbian` +
`Bisexual` + `Pansexual` + Queer + Asexual,
data = sex_gender) |>
confint() |> round(2)
## 2.5 % 97.5 %
## (Intercept) 1.63 6.00
## `Straight or Heterosexual` -0.01 0.00
## `Gay or Lesbian` -0.01 0.00
## Bisexual -0.02 0.01
## Pansexual 0.42 0.51
## Queer -0.69 -0.43
## Asexual -0.36 -0.08
Areas with more pansexual people and fewer queer and asexual people have more trans men.
lm(
`Trans woman` ~ `Straight or Heterosexual` +
`Gay or Lesbian` +
`Bisexual` + `Pansexual` + Queer + Asexual,
data = sex_gender) |>
confint() |> round(2)
## 2.5 % 97.5 %
## (Intercept) 3.38 7.33
## `Straight or Heterosexual` -0.01 0.00
## `Gay or Lesbian` -0.01 0.00
## Bisexual -0.03 0.00
## Pansexual 0.34 0.42
## Queer -0.49 -0.25
## Asexual -0.22 0.03
Areas with more pansexual people and fewer queer and asexual people also have more trans women.
Let’s try to get this in one diagram.
library(MPsychoR)
library(qgraph)
forcors <- sex_gender |>
select(`Straight or Heterosexual`:`All other gender identities`) |>
select(-c(ends_with(".x"), ends_with(".y"))) |>
rename(
Het = "Straight or Heterosexual",
GL = "Gay or Lesbian",
Bi = "Bisexual",
Pan = "Pansexual",
Ace = "Asexual",
Q = "Queer",
Osx = "All other sexual orientations",
Cis = "Gender identity the same as sex registered at birth",
NCI = "Gender identity different from sex registered at birth but no specific identity given",
TM = "Trans man",
TW = "Trans woman",
NBi = "Non-binary",
Ogn = "All other gender identities"
) |>
select(-c(Osx,NCI,Ogn))
names(forcors)
## [1] "Het" "GL" "Bi" "Pan" "Ace" "Q" "Cis" "TW" "TM" "NBi"
cors <- cor(forcors)
qgraph(
cors,
palette = "ggplot2",
layout = "spring",
graph = "glasso",
sampleSize = nrow(sex_gender),
threshold = TRUE
)
## Note: Network with lowest lambda selected as best network: assumption of sparsity might be violated.