This is a collection of examples showing how to query the WsprDaemon Timescale Database using R and SQL and visualise the results using the tidyverse.
The comments are a little sparse at present – stay tuned…
library(RPostgres)
library(DBI)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.0.4 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(DT)
library(knitr)
library(fuzzyjoin)
library(tmap)
library(sf)
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
library(ggeffects)
library(effects)
## Loading required package: carData
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
data(World)
dt_options <- list(pageLength=7, scrollX = "400px")
The WSPR database uses PostgreSQL which is supported in R by the RPostgres
package.
Here are the settings required to connect – available over here. (Normally you shouldn’t hardcode login details like this, but there is only one login and it’s public.)
db_name <- "wsprnet"
db_host <- "logs2.wsprdaemon.org"
db_user <- "wdread"
db_password <- "JTWSPR2008"
db_con <- dbConnect(RPostgres::Postgres(),
dbname = db_name,
host = db_host,
user = db_user,
password = db_password)
Check if it worked:
dbListTables(db_con)
## [1] "spots"
Yes!
Let’s try an example based on SQL from the WsprDaemon Timescale Databases Notes (Version 2.0 November 2020):
res <- dbSendQuery(db_con,
"SELECT * FROM spots
ORDER BY wd_time DESC
LIMIT 5;")
dbFetch(res) %>%
datatable(options = dt_options)
Yes – works.
dbClearResult(res)
Let’s try one day’s worth of data – reception reports of M0INF.
There’s a SQL quirk below. Variable names which aren’t all in lower case (such as CallSign
) have to be referenced using double quotation marks; the quotes are escaped using the backslash. String literals like a callsign or date use single quotes.
one_day <- dbSendQuery(db_con,
"SELECT * FROM spots
WHERE \"CallSign\" = 'M0INF'
AND wd_time >= '2021-01-03T00:00:00Z'
AND wd_time < '2021-01-04T00:00:00Z';")
This fetches the data to a data frame:
the_dat <- dbFetch(one_day)
dbClearResult(one_day)
the_dat %>%
datatable(options = dt_options)
Now plot:
the_dat %>%
ggplot(aes(wd_time, distance)) +
geom_point() +
geom_smooth(method = "gam",
formula = y ~ s(x, k = 20)) +
labs(x = "Time", y = "Distance (km)",
title = "WSPR reports of M0INF",
subtitle = "3 Jan 2021, 20m band") +
scale_x_datetime(date_breaks = "2 hour", date_labels = "%H")
Let’s do the same again; however, now with reception reports both by and of M0INF.
one_day_send_receive <- dbSendQuery(db_con,
"SELECT * FROM spots
WHERE (\"CallSign\" = 'M0INF' OR \"Reporter\" = 'M0INF')
AND wd_time >= '2021-01-03T00:00:00Z'
AND wd_time < '2021-01-04T00:00:00Z';")
the_dat_send_rec <- dbFetch(one_day_send_receive)
dbClearResult(one_day_send_receive)
the_dat_send_rec <- the_dat_send_rec %>%
mutate(Direction = ifelse(Reporter == "M0INF",
"Rx by M0INF",
"Tx by M0INF"))
The graph is a bit crowded so I put the y-axis on the \(\log_{10}\) scale.
the_dat_send_rec %>%
ggplot(aes(wd_time, distance, colour = Direction)) +
geom_point(alpha = 0.5, size = 1) +
stat_smooth(geom="line",
alpha = 0.5, size = 0.8,
method = "gam",
formula = y ~ s(x, k = 25)) +
labs(x = "Time", y = "Distance (km)",
title = "WSPR reports",
subtitle = "3 Jan 2021, 20m band") +
scale_y_continuous(trans = "log10") +
scale_x_datetime(date_breaks = "2 hour", date_labels = "%H") +
theme(legend.position="bottom")
Which of those reports are over 5000 km?
the_dat_send_rec %>%
filter(distance > 5000) %>%
group_by(CallSign, Reporter, distance) %>%
summarise(Spots = n()) %>%
arrange(desc(Spots)) %>%
kable()
## `summarise()` regrouping output by 'CallSign', 'Reporter' (override with `.groups` argument)
CallSign | Reporter | distance | Spots |
---|---|---|---|
VK3MO | M0INF | 16880 | 7 |
M0INF | KD2OM | 5635 | 2 |
M0INF | PT2FHC | 8786 | 1 |
And which are under 100 km?
the_dat_send_rec %>%
filter(distance < 100) %>%
group_by(CallSign, Reporter, distance) %>%
summarise(Spots = n()) %>%
arrange(desc(Spots)) %>%
kable()
## `summarise()` regrouping output by 'CallSign', 'Reporter' (override with `.groups` argument)
CallSign | Reporter | distance | Spots |
---|---|---|---|
M0INF | GB0SNB/SDR | 32 | 58 |
two_day_send_receive <- dbSendQuery(db_con,
"SELECT * FROM spots
WHERE (\"CallSign\" = 'M0INF' OR \"Reporter\" = 'M0INF')
AND wd_time >= '2021-01-03T00:00:00Z'
AND wd_time < '2021-01-05T00:00:00Z';")
two_dat_send_rec <- dbFetch(two_day_send_receive)
dbClearResult(two_day_send_receive)
two_dat_send_rec <- two_dat_send_rec %>%
mutate(Direction = ifelse(Reporter == "M0INF",
"Rx by M0INF",
"Tx by M0INF"))
two_dat_send_rec %>%
ggplot(aes(wd_time, distance, colour = Direction)) +
geom_point(alpha = 0.5, size = 1) +
stat_smooth(geom="line",
alpha = 0.5, size = 0.8,
method = "gam",
formula = y ~ s(x, k = 25)) +
labs(x = "Time", y = "Distance (km)",
title = "WSPR reports",
subtitle = "3-4 Jan 2021, 20m band") +
scale_y_continuous(trans = "log10") +
scale_x_datetime(date_breaks = "2 hour", date_labels = "%H") +
theme(legend.position="bottom")
The red curve doesn’t make sense since there is missing data which should essentially be zero. For now, let’s remove it…
two_dat_send_rec %>%
ggplot(aes(wd_time, distance, colour = Direction)) +
geom_point(alpha = 0.5, size = 1) +
labs(x = "Time", y = "Distance (km)",
title = "WSPR reports",
subtitle = "3-4 Jan 2021, 20m band") +
scale_y_continuous(trans = "log10") +
scale_x_datetime(date_breaks = "2 hour", date_labels = "%H") +
theme(legend.position="bottom")
day_40m_send_receive <- dbSendQuery(db_con,
"SELECT * FROM spots
WHERE (\"CallSign\" = 'M0INF' OR \"Reporter\" = 'M0INF')
AND wd_time >= '2021-01-05T00:00:00Z'
AND wd_time < '2021-01-06T00:00:00Z';")
day_40m_send_receive_dat <- dbFetch(day_40m_send_receive)
dbClearResult(day_40m_send_receive)
day_40m_send_receive_dat <- day_40m_send_receive_dat %>%
mutate(Direction = ifelse(Reporter == "M0INF",
"Rx by M0INF",
"Tx by M0INF"))
day_40m_send_receive_dat %>%
ggplot(aes(wd_time, distance, colour = Direction)) +
geom_point(alpha = 0.5, size = 1) +
labs(x = "Time", y = "Distance (km)",
title = "WSPR reports",
subtitle = "5 Jan 2021, 40m band") +
scale_y_continuous(trans = "log10") +
scale_x_datetime(date_breaks = "2 hour", date_labels = "%H") +
theme(legend.position="bottom")
First, look at data where there’s more than one report.
dat_sent_many <- the_dat %>%
group_by(Reporter) %>%
summarise(spots = n()) %>%
arrange(desc(spots)) %>%
slice_head(n = 7)
## `summarise()` ungrouping output (override with `.groups` argument)
dat_sent_many %>%
kable()
Reporter | spots |
---|---|
GB0SNB/SDR | 58 |
IW2NKE | 22 |
YU/G8ZMF | 10 |
SA2KHG | 8 |
OE1WYC | 7 |
IZ7VHF/RX | 6 |
UA3245SWL | 6 |
the_dat %>%
mutate(ReportDist = paste0(Reporter, " (", distance, "km)")) %>%
filter(Reporter %in% unique(dat_sent_many$Reporter)) %>%
ggplot(aes(wd_time, dB, colour = ReportDist)) +
geom_smooth(span = 1, se = FALSE) +
labs(x = "Time", y = "Signal report (dB)",
title = "WSPR reports (20m) – signal strength",
colour = "Reporter") +
scale_x_datetime(date_breaks = "2 hour", date_labels = "%H")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
It’s showing something… the red curve is straightish since it’s not relying on the ionosphere. The others do use ionospheric propagation so vary as conditions change.
Try again for latest 40m data:
one_day_40m <- dbSendQuery(db_con,
"SELECT * FROM spots
WHERE \"CallSign\" = 'M0INF'
AND wd_time >= '2021-01-05T00:00:00Z'
AND wd_time < '2021-01-06T00:00:00Z';")
dat_40m <- dbFetch(one_day_40m)
dbClearResult(one_day_40m)
dat_sent_many_40 <- dat_40m %>%
group_by(Reporter) %>%
summarise(spots = n()) %>%
arrange(desc(spots)) %>%
#filter(spots >= 5)
slice_head(n = 7)
## `summarise()` ungrouping output (override with `.groups` argument)
dat_sent_many_40 %>%
kable()
Reporter | spots |
---|---|
OE9HLH | 36 |
OE9GHV | 33 |
HB9TMC | 32 |
F4GFZ | 22 |
DK8FT/A | 21 |
IW2NKE | 21 |
DK2DB | 20 |
dat_40m %>%
mutate(ReportDist = paste0(Reporter, " (", distance, "km)")) %>%
filter(Reporter %in% unique(dat_sent_many_40$Reporter)) %>%
ggplot(aes(wd_time, dB, colour = ReportDist)) +
geom_smooth(span = 2, se = FALSE, size = 0.8) +
geom_point(size = 0.6, alpha = 0.5) +
labs(x = "Time", y = "dB",
title = "WSPR reports (40m) – signal strength",
subtitle = "A selection of reporters on 5 Jan 2021",
colour = "Reporter") +
scale_x_datetime(date_breaks = "2 hour", date_labels = "%H")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Here are the WSPR frequencies, for labeling the plot in a moment.
wspr_bands <- tibble(
MHz = c(
0.136,
0.4742,
1.8366,
3.5686,
5.2872,
5364.7,
7.0386,
10.1387,
14.0956,
18.1046,
21.0946,
24.9246,
28.1246,
50.293,
70.091,
144.489,
432.300,
1296.500
),
m = 299.792458 / MHz,
wavelength_text = ifelse(m >= 1,
paste(round(m, 0), "m"),
paste(round(m*100, 0), "cm"))
) %>%
arrange(MHz)
wspr_bands %>% kable()
MHz | m | wavelength_text |
---|---|---|
0.1360 | 2204.3563088 | 2204 m |
0.4742 | 632.2067862 | 632 m |
1.8366 | 163.2323086 | 163 m |
3.5686 | 84.0084229 | 84 m |
5.2872 | 56.7015543 | 57 m |
7.0386 | 42.5926261 | 43 m |
10.1387 | 29.5691221 | 30 m |
14.0956 | 21.2685134 | 21 m |
18.1046 | 16.5589109 | 17 m |
21.0946 | 14.2118105 | 14 m |
24.9246 | 12.0279747 | 12 m |
28.1246 | 10.6594390 | 11 m |
50.2930 | 5.9609182 | 6 m |
70.0910 | 4.2771891 | 4 m |
144.4890 | 2.0748462 | 2 m |
432.3000 | 0.6934824 | 69 cm |
1296.5000 | 0.2312321 | 23 cm |
5364.7000 | 0.0558824 | 6 cm |
Grab the data:
last_whispers <- dbSendQuery(db_con,
"SELECT * FROM spots
WHERE (\"ReporterGrid\" LIKE 'IO91%'
OR \"Grid\" LIKE 'IO91%')
AND wd_time > NOW() - INTERVAL '1 hour';")
the_dat <- dbFetch(last_whispers)
dbClearResult(last_whispers)
nrow(the_dat)
## [1] 3295
Group into WSPR bands. This uses the fuzzyjoin
package which matches by distance.
joined_dat <- the_dat %>%
difference_left_join(wspr_bands,
by = "MHz",
max_dist = 0.2,
distance_col = "dist_band_start") %>%
mutate(MHz.y = as.factor(MHz.y))
Check how many kHz off the match is (also look for NAs):
summary(joined_dat$dist_band_start*1000)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.373 1.470 1.502 4.524 1.534 79.099
Plot:
joined_dat %>%
ggplot(aes(distance, MHz.y, colour = MHz.y)) +
geom_jitter(height = 0.05, alpha = 1/3, size = 1) +
xlab("Distance (km)") +
ylab("Freq (MHz)") +
labs(title = "WSPR spots IO91 (sent or received)",
subtitle = paste0("From ", min(the_dat$wd_time),
" to ", max(the_dat$wd_time))) +
theme(legend.position = "none")
Try again, but this time separating the transmissions and receptions in IO91.
joined_dat <- joined_dat %>%
mutate(direction = ifelse(grepl(
pattern = "IO91",
x = Grid,
ignore.case = TRUE
), "Sent from IO91", "Received in IO91"))
joined_dat %>%
ggplot(aes(distance, MHz.y, colour = direction)) +
geom_point(position = position_jitterdodge(jitter.width = .1,
jitter.height = 0,
dodge.width = 0.4),
alpha = 1/3, size = 1) +
xlab("Distance (km)") +
ylab("Freq (MHz)") +
labs(title = "WSPR spots IO91 (sent or received)",
subtitle = paste0("From ", min(the_dat$wd_time),
" to ", max(the_dat$wd_time)),
colour = "Direction")
Let’s do the same again but also showing the direction, for transmissions from IO91 only.
tx_only <- joined_dat %>%
filter(grepl(pattern = "IO91", x = Grid, ignore.case = TRUE))
linear_scale_polar <- tx_only %>%
ggplot(aes(azimuth, distance, colour = MHz.y)) +
geom_point(alpha = 0.5) +
labs(
title = "WSPR spots IO91",
subtitle = paste0("From ", min(the_dat$wd_time),
" to ", max(the_dat$wd_time)),
colour = "Freq (MHz)",
x = NULL,
y = "Distance"
) +
scale_x_continuous(
limits = c(0, 360),
breaks = seq(0, 315, by = 45),
minor_breaks = seq(0, 360, by = 15)
) +
coord_polar()
linear_scale_polar
linear_scale_polar +
scale_y_continuous(trans = "log10")
tx_only %>%
filter(distance > 8000) %>%
select(CallSign, Reporter, distance, azimuth, MHz.x) %>%
kable()
CallSign | Reporter | distance | azimuth | MHz.x |
---|---|---|---|---|
G8MCD | VK2COW | 16963 | 64 | 14.09703 |
G8MCD | VK2COW | 16963 | 64 | 14.09703 |
G1CPC | VK5EI | 16296 | 76 | 14.09703 |
G1CPC | VK2COW | 17000 | 64 | 14.09707 |
G1CPC | VK3KHZ | 16965 | 73 | 14.09707 |
G8MCD | VK5EI | 16259 | 76 | 14.09700 |
G8MCD | VK2COW | 16963 | 64 | 14.09703 |
G4LRP | PT2FHC | 8731 | 226 | 14.09706 |
G8MCD | VK3KHZ | 16928 | 73 | 14.09703 |
G0OQK | VK4BAP | 15745 | 49 | 10.14018 |
G8MCD | VK2COW | 16963 | 64 | 14.09703 |
G8MCD | VK3KHZ | 16928 | 73 | 14.09703 |
G1CPC | VK5EI | 16296 | 76 | 14.09704 |
G1CPC | VK2COW | 17000 | 64 | 14.09707 |
G1CPC | VK3KHZ | 16965 | 73 | 14.09707 |
G8MCD | VK2COW | 16963 | 64 | 14.09703 |
G8MCD | VK2COW | 16963 | 64 | 14.09703 |
G1CPC | VK2COW | 17000 | 64 | 14.09707 |
G1CPC | VK3KHZ | 16965 | 73 | 14.09707 |
G8MCD | VK2COW | 16963 | 64 | 14.09703 |
G4LRP | VK2COW | 17004 | 66 | 14.09715 |
G8MCD | VK2COW | 16963 | 64 | 14.09703 |
G1CPC | VK2COW | 17000 | 64 | 14.09707 |
G8MCD | VK2COW | 16963 | 64 | 14.09703 |
G8MCD | VK2COW | 16963 | 64 | 14.09703 |
G1CPC | VK2COW | 17000 | 64 | 14.09707 |
G8MCD | VK2COW | 16963 | 64 | 14.09703 |
G4LRP | VK2COW | 17004 | 66 | 14.09715 |
IO91_48hrs <- dbSendQuery(db_con,
"SELECT * FROM spots
WHERE (\"ReporterGrid\" LIKE 'IO91%'
OR \"Grid\" LIKE 'IO91%')
AND wd_time >= '2021-01-05T00:00:00Z'
AND wd_time < '2021-01-07T00:00:00Z';")
IO91_48hrs_dat <- dbFetch(IO91_48hrs)
dbClearResult(IO91_48hrs)
nrow(IO91_48hrs_dat)
## [1] 189067
There were 189067 rows of data.
IO91_48hrs_dat_bands <- IO91_48hrs_dat %>%
difference_left_join(wspr_bands,
by = "MHz",
max_dist = 0.1,
distance_col = "dist_band_start") %>%
mutate(
MHz.y = as.factor(MHz.y),
direction = ifelse(
grepl(
pattern = "IO91",
x = Grid,
ignore.case = TRUE
),
"From IO91",
"To IO91"
)
)
Here are spots which couldn’t be classified in a band:
IO91_48hrs_dat_bands %>%
filter(is.na(MHz.y)) %>%
select(CallSign, Reporter, MHz.x, MHz.y, dist_band_start) %>%
datatable(options = dt_options)
summary(IO91_48hrs_dat_bands$dist_band_start*1000)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.362 1.456 1.505 2.619 1.555 79.092 2
IO91_48hrs_dat_bands %>%
filter(!is.na(MHz.y)) %>%
ggplot(aes(x = distance, fill = MHz.y)) +
geom_histogram(aes(y=..density..), binwidth = 500, position = "identity", alpha = 0.5) +
facet_grid(rows = vars(direction)) +
labs(x = "Distance (km)", y = "Mass", fill = "Freq (MHz)")
date_format <- "%d %b %Y (%H:%m)"
IO91_48hrs_dat_bands %>%
filter(!is.na(MHz.y)) %>%
ggplot(aes(x = distance, fill = MHz.y)) +
geom_histogram(
#aes(y = ..density..),
binwidth = 200,
position = "identity",
) +
facet_grid(cols = vars(direction), rows = vars(MHz.y)) +
labs(
x = "Distance (km)",
y = "Mass",
fill = "Freq (MHz)",
title = "WSPR reports to/from IO91 by band",
subtitle = paste0(format(
min(IO91_48hrs_dat_bands$wd_time), date_format
), " to ",
format(
max(IO91_48hrs_dat_bands$wd_time), date_format
))
) +
theme(legend.position = "none")
IO91_48hrs_dat_bands %>%
filter(!is.na(MHz.y) & direction == "From IO91") %>%
ggplot(aes(azimuth, y = distance)) +
geom_point() +
facet_grid(rows = vars(MHz.y)) +
labs(
x = "Distance (km)",
y = "Mass",
fill = "Freq (MHz)",
title = "WSPR reports from IO91 by band",
subtitle = paste0(format(
min(IO91_48hrs_dat_bands$wd_time), date_format
), " to ",
format(
max(IO91_48hrs_dat_bands$wd_time), date_format
))
) +
scale_x_continuous(
limits = c(0, 360),
breaks = seq(0, 315, by = 45),
minor_breaks = seq(0, 360, by = 15)
) +
coord_polar()
This needs to be tweaked but is a start. The 4326 magic number refers to the coordinate reference system. This Wikipedia article is as good a place as any to start… The st_as_sf
function transforms longitude and lattitude coordinates to a system that the map plot can use.
IO91_48hrs_dat_bands_sf <- IO91_48hrs_dat_bands %>%
st_as_sf(coords = c("wd_rx_lon", "wd_rx_lat"),
crs = 4326)
The following steps takes up to two minutes on my laptop, so I have cached the result in the markdown. Timing code added here to check that it worked.
t1 <- Sys.time()
IO91_48hrs_dat_bands_sf %>%
filter(!is.na(MHz.y)) %>%
ggplot() +
geom_sf(data = World) +
geom_sf(aes(colour = MHz.y)) +
facet_grid(rows = vars(MHz.y)) +
theme_classic() +
labs(
colour = "Freq (MHz)",
title = "WSPR reports from IO91 by band",
subtitle = paste0(format(
min(IO91_48hrs_dat_bands$wd_time), date_format
), " to ",
format(
max(IO91_48hrs_dat_bands$wd_time), date_format
))
) +
theme(legend.position = "none")
t2 <- Sys.time()
t2 - t1
## Time difference of 0.109705 secs
If the caching worked then that timer should be a few seconds.
This section seemed like a good idea at the time…
library(mixtools)
## mixtools package, version 1.2.0, Released 2020-02-05
## This package is based upon work supported by the National Science Foundation under Grant No. SES-0518772.
First summarise the data to work out which band to play with:
IO91_48hrs_dat_bands %>%
filter(direction == "From IO91") %>%
group_by(MHz.y) %>%
summarise(spots = n(),
dist_p25 = quantile(distance,p=.25),
dist_p75 = quantile(distance,p=.75)) %>%
kable()
## `summarise()` ungrouping output (override with `.groups` argument)
MHz.y | spots | dist_p25 | dist_p75 |
---|---|---|---|
0.136 | 1 | 646.00 | 646.00 |
0.4742 | 25309 | 283.00 | 887.00 |
1.8366 | 8613 | 347.00 | 835.00 |
3.5686 | 1730 | 307.00 | 784.50 |
5.2872 | 6071 | 410.00 | 922.00 |
7.0386 | 39909 | 689.00 | 1510.00 |
10.1387 | 12591 | 891.00 | 2127.00 |
14.0956 | 13805 | 268.00 | 2966.00 |
18.1046 | 29 | 2819.00 | 5255.00 |
21.0946 | 111 | 18.00 | 18.00 |
24.9246 | 88 | 18.00 | 18.00 |
28.1246 | 43 | 18.00 | 18.00 |
50.293 | 4 | 7.00 | 15.75 |
NA | 2 | 3684.75 | 5014.25 |
I’m going to go for the 20m band since there are loads of observations and the signals carry some distance. (I should have gone for unique reports to count, but onwards…)
dat_for_clust <- IO91_48hrs_dat_bands %>%
filter(MHz.y == "14.0956" & direction == "From IO91")
nrow(dat_for_clust)
## [1] 13805
This is a bit silly since to get it work I have had to provide a lot of clues based on visual inspection… I was going to constrain the means of clusters to be multiples of the second to try to model skip. It didn’t work, but I have left the code in commented out.
cluster_n = 6
start_means <- c(0, 1500, 6000, 10000, 12000, 17000)
#means <- c(NA, paste0(1:(cluster_n-1), "n"))
sds <- c(NA, rep("b", cluster_n-1))
clust_res <- normalmixEM(dat_for_clust$distance,
mu = start_means,
#mean.constr = means,
sd.constr = sds,
k = cluster_n)
## number of iterations= 12
summary(clust_res)
## summary of normalmixEM object:
## comp 1 comp 2 comp 3 comp 4 comp 5 comp 6
## lambda 0.23763 0.545196 0.194094 1.44116e-02 8.59046e-04 7.80888e-03
## mu 27.82031 1805.103140 5900.153689 9.17730e+03 1.16025e+04 1.37087e+04
## sigma 19.64022 690.233625 690.233625 6.90234e+02 6.90234e+02 6.90234e+02
## loglik at estimate: -113114.3
plot(clust_res, which = 2, breaks = 100)
clusters <- clust_res$posterior %>%
as.data.frame() %>%
rowwise() %>%
mutate(cluster = which.max(c_across()))
dat_for_clust$cluster <- ordered(clusters$cluster)
dat_for_clust %>%
ggplot(aes(x = distance, fill = cluster)) +
geom_histogram(alpha = 0.5,
position = "identity",
binwidth = 500) +
labs(x = "Distance (km)",
y = "Count",
fill = "Cluster") +
theme(legend.position = "bottom")
clust_means <- dat_for_clust %>%
group_by(cluster) %>%
summarise(clust_mean_dist = mean(distance))
## `summarise()` ungrouping output (override with `.groups` argument)
dat_for_clust_sf <- dat_for_clust %>%
left_join(clust_means) %>%
st_as_sf(coords = c("wd_rx_lon", "wd_rx_lat"),
crs = 4326)
## Joining, by = "cluster"
dat_for_clust_sf %>%
ggplot() +
geom_sf(data = World) +
geom_sf(aes(colour = cluster), size = 1) +
theme_classic() +
labs(
colour = "Cluster",
title = "WSPR reports from IO91 (clusters)",
subtitle = paste0(format(
min(IO91_48hrs_dat_bands$wd_time), date_format
), " to ",
format(
max(IO91_48hrs_dat_bands$wd_time), date_format
))
) +
theme(legend.position = "bottom") +
guides(colour = guide_legend(override.aes = list(size=3)))
dat_for_clust_sf %>%
ggplot(aes(x = wd_time, y = dB, colour = ordered(round(clust_mean_dist,0)))) +
#geom_smooth(span = 3, se = FALSE, size = 2)
geom_jitter(size = 1, alpha = 0.3) +
labs(colour = "Mean cluster dist (km)", x = NULL) +
theme(legend.position = "bottom") +
guides(colour = guide_legend(override.aes = list(size=3)))
Here’s the maximum distance half-way around earth (measured around the equator), to have a sense of how far signals can travel by shortest path…
half_earth_circle <- 40075/2
half_earth_circle
## [1] 20037.5
And actual distances:
IO91_48hrs_dat_bands %>%
ggplot(aes(x=distance+1)) +
geom_histogram(bins = 500) +
scale_x_continuous(trans = "log10") +
xlab("log(km + 1)")
Discretise the distances:
IO91_48hrs_dat_bands <- IO91_48hrs_dat_bands %>%
mutate(distance_band = cut(
distance,
right = FALSE,
breaks = c(0, 10 ^ (1:4), 2e5),
labels = c("[0, 10)",
"[10, 100)",
"[100, 1000)",
"[1000, 10000)",
"10000+")
))
IO91_48hrs_dat_bands %>%
group_by(distance_band) %>%
summarise(mean = mean(distance),
min = min(distance),
max = max(distance))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 5 x 4
## distance_band mean min max
## <fct> <dbl> <int> <int>
## 1 [0, 10) 7.42 0 9
## 2 [10, 100) 52.6 11 99
## 3 [100, 1000) 610. 100 999
## 4 [1000, 10000) 2703. 1000 9791
## 5 10000+ 14851. 10552 18887
Now select a few bands to plot.
IO91_48hrs_dat_bands$MHz.y %>%
levels
## [1] "0.136" "0.4742" "1.8366" "3.5686" "5.2872" "7.0386" "10.1387"
## [8] "14.0956" "18.1046" "21.0946" "24.9246" "28.1246" "50.293"
library(viridis)
## Loading required package: viridisLite
IO91_48hrs_dat_bands %>%
filter(MHz.y %in% c("7.0386", "10.1387", "14.0956")) %>%
filter(direction == "From IO91") %>%
ggplot(aes(
x = wd_time,
y = dB,
colour = ordered(distance_band)
)) +
geom_jitter(size = .5, alpha = 0.3) +
labs(colour = "Distance (km)",
x = NULL,
y = "Signal strength (dB)",
title = "Reports of WSPR signals sent from IO91") +
theme(legend.position = "bottom") +
guides(colour = guide_legend(override.aes = list(size = 7))) +
facet_wrap(vars(MHz.y), ncol = 1) +
scale_colour_viridis(option = "magma", discrete = T)
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
IO91_48hrs_dat_bands$wd_time[1]
## [1] "2021-01-06 23:58:00 GMT"
IO91_48hrs_dat_bands <- IO91_48hrs_dat_bands %>%
mutate(
date = format(IO91_48hrs_dat_bands$wd_time, "%F"),
hour = format(IO91_48hrs_dat_bands$wd_time, "%H"),
hour_num = as.numeric(hour),
dist_far = cut(distance,
c(0,100,500,1000,25000),
labels = c("[0,100) km",
"[100,500) km",
"[500,1000) km",
"1000+ km"),
right = FALSE),
daylight = factor(hour_num >= 8 &
hour_num <= 16,
labels = c("Light", "Dark")),
)
dat_for_mod <- IO91_48hrs_dat_bands %>%
mutate(the_freq = factor(round(as.numeric(as.character((MHz.y)))))) %>%
filter(MHz.y %in% c("7.0386", "10.1387", "14.0956")) %>%
filter(direction == "From IO91")
IO91_48hrs_dat_bands %>%
group_by(dist_far) %>%
tally()
## # A tibble: 4 x 2
## dist_far n
## <fct> <int>
## 1 [0,100) km 11862
## 2 [100,500) km 34492
## 3 [500,1000) km 71300
## 4 1000+ km 71413
db_mod <- lmer(dB ~ date + daylight *
dist_far * the_freq +
(1 | CallSign) +
(1 | Reporter),
data = dat_for_mod)
db_mod_preds <- ggeffect(db_mod,
terms = c("the_freq", "daylight", "dist_far"),
type = "fixed")
db_mod_preds %>%
plot() +
labs(x = "Band (MHz)", colour = "Daylight") +
theme_grey() +
facet_wrap(vars(facet), ncol = 2) +
theme(legend.position = "bottom")