Warning: these analyses use data from Google – data quality unclear…

library(tidyverse)
library(magrittr)
library(lubridate)

Data grabbed from over here. To quote the docs:

"This dataset is intended to help remediate the impact of COVID-19. It shouldn’t be used for medical diagnostic, prognostic, or treatment purposes. It also isn’t intended to be used for guidance on personal travel plans.

“The data shows how visits to places, such as grocery stores and parks, are changing in each geographic region.”

The categories of location are as follows:

Location Description
Grocery & pharmacy Mobility trends for places like grocery markets, food warehouses, farmers markets, specialty food shops, drug stores, and pharmacies.
Parks Mobility trends for places like local parks, national parks, public beaches, marinas, dog parks, plazas, and public gardens.
Transit stations Mobility trends for places like public transport hubs such as subway, bus, and train stations.
Retail & recreation Mobility trends for places like restaurants, cafes, shopping centers, theme parks, museums, libraries, and movie theaters.
Residential Mobility trends for places of residence.

Read in the data:

dat <- read.csv("2020_GB_Region_Mobility_Report.csv")

Parse the string dates into Date objects:

dat %<>% mutate(
  day = parse_date(date, "%Y-%m-%d"),
  day_of_week = wday(day, label = TRUE),
  weekday = ifelse(day_of_week %in% c("Sat", "Sun"), "Weekend", "Weekday")
)

Grab borough-level data for Greater London:

london_dat <- dat %>%
  filter(sub_region_1 == "Greater London") %>%
  filter(sub_region_2 != "")
london_dat %>%
  group_by(sub_region_2) %>%
  tally()
## # A tibble: 33 x 2
##    sub_region_2                               n
##    <chr>                                  <int>
##  1 City of London                           382
##  2 City of Westminster                      382
##  3 London Borough of Barking and Dagenham   382
##  4 London Borough of Barnet                 382
##  5 London Borough of Bexley                 382
##  6 London Borough of Brent                  382
##  7 London Borough of Bromley                382
##  8 London Borough of Camden                 382
##  9 London Borough of Croydon                382
## 10 London Borough of Ealing                 382
## # ... with 23 more rows

There’s a bunch of variables giving the percentage change from baseline for each on each day and in each borough:

london_dat %>%
  select(ends_with("percent_change_from_baseline")) %>%
  names()
## [1] "retail_and_recreation_percent_change_from_baseline"
## [2] "grocery_and_pharmacy_percent_change_from_baseline" 
## [3] "parks_percent_change_from_baseline"                
## [4] "transit_stations_percent_change_from_baseline"     
## [5] "workplaces_percent_change_from_baseline"           
## [6] "residential_percent_change_from_baseline"

So, for ease of plotting later, let’s tidy that a bit…

tidy_london <- london_dat %>%
  pivot_longer(
    cols = ends_with("percent_change_from_baseline"),
    names_to = "location",
    names_pattern = "(.+)_percent_change_from_baseline",
    values_to = "percent_change_from_baseline"
  )
tidy_london %>%
  select(location, percent_change_from_baseline) %>%
  slice_sample(n = 10)
## # A tibble: 10 x 2
##    location              percent_change_from_baseline
##    <chr>                                        <int>
##  1 grocery_and_pharmacy                           -29
##  2 parks                                          132
##  3 retail_and_recreation                           -7
##  4 grocery_and_pharmacy                            17
##  5 transit_stations                               -77
##  6 retail_and_recreation                          -82
##  7 residential                                     19
##  8 workplaces                                     -20
##  9 grocery_and_pharmacy                           -32
## 10 parks                                           14

Now a couple of plots… First setup the basic structure:

plot_base <- tidy_london %>%
  mutate(location = gsub("_", " ", location)) %>%
  ggplot(aes(x = day, y = percent_change_from_baseline,
             color = sub_region_2)) +
  theme(legend.position = "none") +
  facet_wrap(vars(location), ncol = 2) +
  labs(x = "Date", y = "% change from baseline")

First with a messy series of lines joining points:

plot_base +
  geom_line()

Now with a smoother:

plot_base +
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1537 rows containing non-finite values (stat_smooth).

Which areas have more people who still have to travel to their workplace?

tidy_london %>%
  filter(location %in% c("workplaces")) %>%
  filter(day >= as.Date("2021-01-01") &
         day < as.Date("2021-03-01")) %>%
  group_by(sub_region_2) %>%
  summarise(mean_change = mean(percent_change_from_baseline)) %>%
  na.omit() %>%
  mutate(sub_region_2 = fct_reorder(sub_region_2, mean_change)) %>%
  ggplot(aes(y = sub_region_2, x = mean_change)) +
  geom_point() +
  labs(y = NULL, x = "% Change from baseline", title = "Visits to workplaces",
       subtitle = "Mean change from pre-Covid baseline, Jan to Feb 2021")
## `summarise()` ungrouping output (override with `.groups` argument)

sum_res_weekday <- tidy_london %>%
  filter(location %in% c("residential")) %>%
  filter(day >= as.Date("2021-01-01") &
           day < as.Date("2021-03-01")) %>%
  group_by(sub_region_2, weekday) %>%
  summarise(mean_change = mean(percent_change_from_baseline)) %>%
  na.omit()
## `summarise()` regrouping output by 'sub_region_2' (override with `.groups` argument)
weekday_order <- sum_res_weekday %>%
  filter(weekday == "Weekday") %>%
  arrange(mean_change) %>%
  pull(sub_region_2)
sum_res_weekday %>%
  mutate(sub_region_2 = factor(sub_region_2, levels = weekday_order)) %>%
  ggplot(aes(y = sub_region_2, x = mean_change, colour = weekday)) +
  geom_point() +
  labs(
    y = NULL,
    x = "% Change from baseline",
    title = "Time in places of residence",
    subtitle = "Mean change from pre-Covid baseline, Jan to Feb 2021",
    colour = NULL
  )