library(tidyverse)
library(GGally)
library(viridis)
library(quantmod)
library(patchwork)
library(smooth)
covid_hosp <- read.csv("https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&metric=covidOccupiedMVBeds&metric=hospitalCases&metric=newAdmissions&format=csv")
View(covid_hosp)
covid_dat <- read.csv("https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&metric=newAdmissions&metric=newCasesBySpecimenDate&metric=newDeaths28DaysByDeathDate&metric=newVirusTests&format=csv")
vax_dat <- read.csv("https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&metric=cumVaccinationCompleteCoverageByPublishDatePercentage&metric=cumVaccinationFirstDoseUptakeByPublishDatePercentage&format=csv")
dat <- left_join(covid_dat, vax_dat)
## Joining, by = c("areaCode", "areaName", "areaType", "date")
names(dat)
## [1] "areaCode"
## [2] "areaName"
## [3] "areaType"
## [4] "date"
## [5] "newAdmissions"
## [6] "newCasesBySpecimenDate"
## [7] "newDeaths28DaysByDeathDate"
## [8] "newVirusTests"
## [9] "cumVaccinationCompleteCoverageByPublishDatePercentage"
## [10] "cumVaccinationFirstDoseUptakeByPublishDatePercentage"
dat <- dat %>%
mutate(
vax_1st = ifelse(
is.na(cumVaccinationFirstDoseUptakeByPublishDatePercentage),
0,
cumVaccinationFirstDoseUptakeByPublishDatePercentage),
vax_2nd = ifelse(
is.na(cumVaccinationCompleteCoverageByPublishDatePercentage),
0,
cumVaccinationCompleteCoverageByPublishDatePercentage)
)
scatter_cases_admissions_1st_dose <- dat %>%
ggplot(
aes(newCasesBySpecimenDate, newAdmissions,
colour = vax_1st)) +
geom_point() +
labs(colour = "% first dose",
x = "Daily cases",
y = "Daily hospital admissions") +
scale_colour_steps()
scatter_cases_admissions_1st_dose
## Warning: Removed 56 rows containing missing values (geom_point).
scatter_cases_admissions_2nd_dose <- dat %>%
ggplot(
aes(newCasesBySpecimenDate, newAdmissions,
colour = vax_2nd)) +
geom_point() +
labs(colour = "% second dose",
x = "Daily cases",
y = "Daily hospital admissions") +
scale_colour_steps()
scatter_cases_admissions_2nd_dose
## Warning: Removed 56 rows containing missing values (geom_point).
sma_order = 12
peaks_valleys_dat <- dat %>%
select(
date,
newAdmissions,
newCasesBySpecimenDate,
newVirusTests,
newDeaths28DaysByDeathDate
) %>%
arrange(as.Date(date)) %>%
mutate(i = 1:n())
peaks_valleys_dat_temp <- peaks_valleys_dat %>%
select(i, newAdmissions) %>%
na.omit()
peaks_valleys_dat_temp$newAdmissions_smooth =
sma(peaks_valleys_dat_temp$newAdmissions,
order = sma_order)$fitted
the_peaks <- peaks_valleys_dat_temp$i[
findPeaks(peaks_valleys_dat_temp$newAdmissions_smooth,thresh = 2)]
the_valleys <- peaks_valleys_dat_temp$i[
findValleys(peaks_valleys_dat_temp$newAdmissions_smooth, thresh = 2)]
switches <- c(1, the_peaks - sma_order,
the_valleys - sma_order,
max(peaks_valleys_dat$i)) %>%
sort()
switch_threshold <- 19
select_em <- c(TRUE, switches[-1] -
switches[-length(switches)] > switch_threshold)
switches <- switches[select_em]
plot(peaks_valleys_dat$newAdmissions)
abline(v = switches)
peaks_valleys_dat <- peaks_valleys_dat %>%
mutate(date_segment = cut(i, switches, include.lowest = T))
cut_dates <- peaks_valleys_dat %>%
group_by(date_segment) %>%
summarise(start_date = date %>% as.Date %>% min())
cut_dates$start_date
## [1] "2020-01-30" "2020-03-31" "2020-07-15" "2020-11-10" "2021-01-07"
## [6] "2021-04-18"
levels(peaks_valleys_dat$date_segment) <- cut_dates$start_date
date_range <- c(min(as.Date(peaks_valleys_dat$date)), as.Date("2021-08-01"))
scatter_deaths_date_rainbow <- peaks_valleys_dat %>%
ggplot(aes(as.Date(date), newDeaths28DaysByDeathDate, colour = date_segment)) +
geom_point() +
labs(x = "Date",
y = "Daily deaths") +
theme(legend.position = "none") +
scale_x_date(
date_labels = "%b %Y",
limits = date_range)
scatter_deaths_date_rainbow
## Warning: Removed 32 rows containing missing values (geom_point).
scatter_admissions_date_rainbow <- peaks_valleys_dat %>%
ggplot(aes(as.Date(date), newAdmissions, colour = date_segment)) +
geom_point() +
labs(x = "Date",
y = "Daily hospital admissions") +
theme(legend.position = "none") +
scale_x_date(
date_labels = "%b %Y",
limits = date_range)
scatter_admissions_date_rainbow
## Warning: Removed 56 rows containing missing values (geom_point).
scatter_tests_date_rainbow <- peaks_valleys_dat %>%
ggplot(aes(as.Date(date), newVirusTests/1000, colour = date_segment)) +
geom_point() +
labs(x = "Date",
y = "Daily tests (1000s)") +
theme(legend.position = "none") +
scale_x_date(
date_labels = "%b %Y",
limits = date_range)
scatter_tests_date_rainbow
## Warning: Removed 82 rows containing missing values (geom_point).
scatter_cases_date_rainbow <- peaks_valleys_dat %>%
ggplot(aes(as.Date(date),
newCasesBySpecimenDate,
colour = date_segment)) +
geom_point() +
labs(x = "Date",
y = "Daily cases") +
theme(legend.position = "none") +
scale_x_date(
date_labels = "%b %Y",
limits = date_range)
scatter_cases_date_rainbow
scatter_admissions_cases_rainbow <- peaks_valleys_dat %>%
ggplot(
aes(newCasesBySpecimenDate, newAdmissions,
colour = date_segment)) +
geom_point() +
labs(x = "Daily cases",
y = "Daily hospital admissions") +
theme(legend.position = "none")
scatter_admissions_cases_rainbow
## Warning: Removed 56 rows containing missing values (geom_point).
View(peaks_valleys_dat)
scatter_deaths_cases_rainbow <- peaks_valleys_dat %>%
ggplot(
aes(newCasesBySpecimenDate, newDeaths28DaysByDeathDate,
colour = date_segment)) +
geom_point() +
labs(x = "Daily cases",
y = "Daily deaths") +
theme(legend.position = "none")
scatter_deaths_cases_rainbow
## Warning: Removed 32 rows containing missing values (geom_point).
scatter_admissions_deaths <- peaks_valleys_dat %>%
ggplot(
aes(newAdmissions, newDeaths28DaysByDeathDate,
colour = date_segment)) +
geom_point() +
labs(x = "Daily hospital admissions",
y = "Daily deaths") +
theme(legend.position = "none")
scatter_admissions_deaths
## Warning: Removed 56 rows containing missing values (geom_point).
(scatter_admissions_date_rainbow/
scatter_admissions_cases_rainbow) |
(scatter_cases_admissions_1st_dose/
scatter_cases_admissions_2nd_dose)
## Warning: Removed 56 rows containing missing values (geom_point).
## Warning: Removed 56 rows containing missing values (geom_point).
## Warning: Removed 56 rows containing missing values (geom_point).
## Warning: Removed 56 rows containing missing values (geom_point).
(scatter_cases_date_rainbow /
(scatter_admissions_deaths + geom_smooth(se = FALSE)) |
(scatter_admissions_cases_rainbow +
geom_smooth(se = FALSE)) /
(scatter_deaths_cases_rainbow +
geom_smooth(se = FALSE)))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 56 rows containing non-finite values (stat_smooth).
## Warning: Removed 56 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 56 rows containing non-finite values (stat_smooth).
## Warning: Removed 56 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 32 rows containing non-finite values (stat_smooth).
## Warning: Removed 32 rows containing missing values (geom_point).
(scatter_cases_date_rainbow /
scatter_tests_date_rainbow /
scatter_admissions_date_rainbow) |
((scatter_admissions_cases_rainbow +
geom_smooth(se = FALSE)) /
(scatter_deaths_cases_rainbow +
geom_smooth(se = FALSE))/
scatter_deaths_date_rainbow)
## Warning: Removed 82 rows containing missing values (geom_point).
## Warning: Removed 56 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 56 rows containing non-finite values (stat_smooth).
## Warning: Removed 56 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 32 rows containing non-finite values (stat_smooth).
## Warning: Removed 32 rows containing missing values (geom_point).
## Warning: Removed 32 rows containing missing values (geom_point).
mod_deaths <- lm(
newDeaths28DaysByDeathDate ~
newCasesBySpecimenDate:date_segment + date_segment - 1,
data = peaks_valleys_dat
)
coef(mod_deaths) %>% round(6)
## date_segment2020-01-30
## -31.054151
## date_segment2020-03-31
## -79.808026
## date_segment2020-07-15
## -12.925272
## date_segment2020-11-10
## 335.407751
## date_segment2021-01-07
## 40.771887
## date_segment2021-04-18
## 7.053699
## newCasesBySpecimenDate:date_segment2020-01-30
## 0.138144
## newCasesBySpecimenDate:date_segment2020-03-31
## 0.177010
## newCasesBySpecimenDate:date_segment2020-07-15
## 0.011682
## newCasesBySpecimenDate:date_segment2020-11-10
## 0.006293
## newCasesBySpecimenDate:date_segment2021-01-07
## 0.030236
## newCasesBySpecimenDate:date_segment2021-04-18
## 0.000917
mod_hosp <- lm(
newAdmissions ~ newCasesBySpecimenDate:date_segment + date_segment -1,
data = peaks_valleys_dat)
coef(mod_hosp) %>% round(5)
## date_segment2020-01-30
## -261.35105
## date_segment2020-03-31
## -51.24448
## date_segment2020-07-15
## 48.11997
## date_segment2020-11-10
## 913.46450
## date_segment2021-01-07
## 153.13387
## date_segment2021-04-18
## 83.80818
## newCasesBySpecimenDate:date_segment2020-01-30
## 0.81414
## newCasesBySpecimenDate:date_segment2020-03-31
## 0.44307
## newCasesBySpecimenDate:date_segment2020-07-15
## 0.05522
## newCasesBySpecimenDate:date_segment2020-11-10
## 0.03941
## newCasesBySpecimenDate:date_segment2021-01-07
## 0.09754
## newCasesBySpecimenDate:date_segment2021-04-18
## 0.01487
last_date <- peaks_valleys_dat$date_segment %>%
levels() %>%
sort() %>%
last()
last_seg <- peaks_valleys_dat %>%
filter(date_segment == last_date)
last_seg
last_seg_deaths <- lm(
newDeaths28DaysByDeathDate ~ newCasesBySpecimenDate,
data = last_seg
)
summary(last_seg_deaths)
##
## Call:
## lm(formula = newDeaths28DaysByDeathDate ~ newCasesBySpecimenDate,
## data = last_seg)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.485 -5.673 -1.892 3.641 35.132
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.054e+00 1.319e+00 5.349 5.71e-07 ***
## newCasesBySpecimenDate 9.168e-04 6.567e-05 13.960 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.697 on 99 degrees of freedom
## Multiple R-squared: 0.6631, Adjusted R-squared: 0.6597
## F-statistic: 194.9 on 1 and 99 DF, p-value: < 2.2e-16
coef(last_seg_deaths)
## (Intercept) newCasesBySpecimenDate
## 7.0536990035 0.0009167871
last_seg_admissions <- lm(
newAdmissions ~ newCasesBySpecimenDate,
data = last_seg
)
summary(last_seg_admissions)
##
## Call:
## lm(formula = newAdmissions ~ newCasesBySpecimenDate, data = last_seg)
##
## Residuals:
## Min 1Q Median 3Q Max
## -214.96 -27.55 -7.89 9.27 413.98
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.381e+01 1.390e+01 6.03 3.04e-08 ***
## newCasesBySpecimenDate 1.487e-02 6.915e-04 21.50 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 101.5 on 96 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.8281, Adjusted R-squared: 0.8263
## F-statistic: 462.5 on 1 and 96 DF, p-value: < 2.2e-16
coef(last_seg_admissions)
## (Intercept) newCasesBySpecimenDate
## 83.80818056 0.01486982
Admissions / cases (%)
(coef(last_seg_admissions)[2] * 100) %>% round(2)
## newCasesBySpecimenDate
## 1.49
Deaths / cases (%)
(coef(last_seg_deaths)[2] * 100) %>% round(2)
## newCasesBySpecimenDate
## 0.09
predict(last_seg_admissions,
newdat = data.frame(newCasesBySpecimenDate = 100e3))
## 1
## 1570.79
predict(last_seg_deaths,
newdat = data.frame(newCasesBySpecimenDate = 100e3))
## 1
## 98.73241
last_seg %>%
ggplot(aes(newCasesBySpecimenDate, newAdmissions)) +
geom_point()
## Warning: Removed 3 rows containing missing values (geom_point).
last_seg %>%
ggplot(aes(newCasesBySpecimenDate, newDeaths28DaysByDeathDate )) +
geom_point()
last_seg %>%
ggplot(aes(newAdmissions, newDeaths28DaysByDeathDate)) +
geom_point()
## Warning: Removed 3 rows containing missing values (geom_point).
gpairs_lower <- function(g){
g$plots <- g$plots[-(1:g$nrow)]
g$yAxisLabels <- g$yAxisLabels[-1]
g$nrow <- g$nrow -1
g$plots <- g$plots[-(seq(g$ncol, length(g$plots), by = g$ncol))]
g$xAxisLabels <- g$xAxisLabels[-g$ncol]
g$ncol <- g$ncol - 1
g
}
to_plot <- peaks_valleys_dat %>%
select(
date,
newCasesBySpecimenDate,
newAdmissions,
newDeaths28DaysByDeathDate,
date_segment
) %>%
mutate(date = as.Date(date),
newCasesBySpecimenDate = newCasesBySpecimenDate / 1000)
to_plot %>%
ggpairs(columns = 1:(ncol(to_plot) - 1),
mapping = aes(colour = date_segment),
lower = list(continuous = "points"),
upper = list(continuous = "blank"),
diag = list(continuous = "blankDiag"),
columnLabels = c("Date",
"Cases (1000s)",
"Hospitalisations",
"Deaths")) %>%
gpairs_lower()
## Warning: Removed 56 rows containing missing values (geom_point).
## Warning: Removed 56 rows containing missing values (geom_point).
## Warning: Removed 32 rows containing missing values (geom_point).
## Warning: Removed 32 rows containing missing values (geom_point).
## Warning: Removed 56 rows containing missing values (geom_point).