This is a collection of examples showing how to query the WsprDaemon Timescale Database using R and SQL and visualise the results using the tidyverse.

The comments are a little sparse at present – stay tuned…

Setup packages

library(RPostgres)
library(DBI)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.0.4     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(DT)
library(knitr)
library(fuzzyjoin)
library(tmap)
library(sf)
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
library(ggeffects)
library(effects)
## Loading required package: carData
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
data(World)
dt_options <- list(pageLength=7, scrollX = "400px")

Connect to the database

The WSPR database uses PostgreSQL which is supported in R by the RPostgres package.

Here are the settings required to connect – available over here. (Normally you shouldn’t hardcode login details like this, but there is only one login and it’s public.)

db_name <- "wsprnet"
db_host <- "logs2.wsprdaemon.org"  
db_user <- "wdread"  
db_password <- "JTWSPR2008"
db_con <- dbConnect(RPostgres::Postgres(),
                    dbname = db_name,
                    host = db_host,
                    user = db_user,
                    password = db_password)  

Check if it worked:

dbListTables(db_con) 
## [1] "spots"

Yes!

Let’s try an example based on SQL from the WsprDaemon Timescale Databases Notes (Version 2.0 November 2020):

res <- dbSendQuery(db_con,
  "SELECT * FROM spots
   ORDER BY wd_time DESC
   LIMIT 5;")
dbFetch(res) %>%
  datatable(options = dt_options)

Yes – works.

dbClearResult(res)

Grab a day’s data

Transmissions

Let’s try one day’s worth of data – reception reports of M0INF.

There’s a SQL quirk below. Variable names which aren’t all in lower case (such as CallSign) have to be referenced using double quotation marks; the quotes are escaped using the backslash. String literals like a callsign or date use single quotes.

one_day <- dbSendQuery(db_con,
  "SELECT * FROM spots
   WHERE \"CallSign\" = 'M0INF'
         AND wd_time >= '2021-01-03T00:00:00Z'
         AND wd_time < '2021-01-04T00:00:00Z';")

This fetches the data to a data frame:

the_dat <- dbFetch(one_day)
dbClearResult(one_day)
the_dat %>%
  datatable(options = dt_options)

Now plot:

the_dat %>%
  ggplot(aes(wd_time, distance)) +
  geom_point() +
  geom_smooth(method = "gam",
              formula = y ~ s(x, k = 20)) +
  labs(x = "Time", y = "Distance (km)",
       title = "WSPR reports of M0INF",
       subtitle = "3 Jan 2021, 20m band") +
  scale_x_datetime(date_breaks = "2 hour", date_labels = "%H")

Both transmissions and reception reports

Let’s do the same again; however, now with reception reports both by and of M0INF.

one_day_send_receive <- dbSendQuery(db_con,
  "SELECT * FROM spots
   WHERE (\"CallSign\" = 'M0INF' OR \"Reporter\" = 'M0INF')
   AND wd_time >= '2021-01-03T00:00:00Z'
   AND wd_time <  '2021-01-04T00:00:00Z';")
the_dat_send_rec <- dbFetch(one_day_send_receive)
dbClearResult(one_day_send_receive)
the_dat_send_rec <- the_dat_send_rec %>%
  mutate(Direction = ifelse(Reporter == "M0INF",
                            "Rx by M0INF",
                            "Tx by M0INF"))

The graph is a bit crowded so I put the y-axis on the \(\log_{10}\) scale.

the_dat_send_rec %>%
  ggplot(aes(wd_time, distance, colour = Direction)) +
  geom_point(alpha = 0.5, size = 1) +
  stat_smooth(geom="line",
              alpha = 0.5, size = 0.8,
              method = "gam",
              formula = y ~ s(x, k = 25)) +
  labs(x = "Time", y = "Distance (km)",
       title = "WSPR reports",
       subtitle = "3 Jan 2021, 20m band") +
  scale_y_continuous(trans = "log10") +
  scale_x_datetime(date_breaks = "2 hour", date_labels = "%H") +
  theme(legend.position="bottom")

Which of those reports are over 5000 km?

the_dat_send_rec %>%
  filter(distance > 5000) %>%
  group_by(CallSign, Reporter, distance) %>%
  summarise(Spots = n()) %>%
  arrange(desc(Spots)) %>%
  kable()
## `summarise()` regrouping output by 'CallSign', 'Reporter' (override with `.groups` argument)
CallSign Reporter distance Spots
VK3MO M0INF 16880 7
M0INF KD2OM 5635 2
M0INF PT2FHC 8786 1

And which are under 100 km?

the_dat_send_rec %>%
  filter(distance < 100) %>%
  group_by(CallSign, Reporter, distance) %>%
  summarise(Spots = n()) %>%
  arrange(desc(Spots)) %>%
  kable()
## `summarise()` regrouping output by 'CallSign', 'Reporter' (override with `.groups` argument)
CallSign Reporter distance Spots
M0INF GB0SNB/SDR 32 58

Try two days (now there’s more data)

two_day_send_receive <- dbSendQuery(db_con,
  "SELECT * FROM spots
   WHERE (\"CallSign\" = 'M0INF' OR \"Reporter\" = 'M0INF')
   AND wd_time >= '2021-01-03T00:00:00Z'
   AND wd_time <  '2021-01-05T00:00:00Z';")
two_dat_send_rec <- dbFetch(two_day_send_receive)
dbClearResult(two_day_send_receive)
two_dat_send_rec <- two_dat_send_rec %>%
  mutate(Direction = ifelse(Reporter == "M0INF",
                            "Rx by M0INF",
                            "Tx by M0INF"))
two_dat_send_rec %>%
  ggplot(aes(wd_time, distance, colour = Direction)) +
  geom_point(alpha = 0.5, size = 1) +
  stat_smooth(geom="line",
              alpha = 0.5, size = 0.8,
              method = "gam",
              formula = y ~ s(x, k = 25)) +
  labs(x = "Time", y = "Distance (km)",
       title = "WSPR reports",
       subtitle = "3-4 Jan 2021, 20m band") +
  scale_y_continuous(trans = "log10") +
  scale_x_datetime(date_breaks = "2 hour", date_labels = "%H") +
  theme(legend.position="bottom")

The red curve doesn’t make sense since there is missing data which should essentially be zero. For now, let’s remove it…

two_dat_send_rec %>%
  ggplot(aes(wd_time, distance, colour = Direction)) +
  geom_point(alpha = 0.5, size = 1) +
  labs(x = "Time", y = "Distance (km)",
       title = "WSPR reports",
       subtitle = "3-4 Jan 2021, 20m band") +
  scale_y_continuous(trans = "log10") +
  scale_x_datetime(date_breaks = "2 hour", date_labels = "%H") +
  theme(legend.position="bottom")

Again on the 40 m band

day_40m_send_receive <- dbSendQuery(db_con,
  "SELECT * FROM spots
   WHERE (\"CallSign\" = 'M0INF' OR \"Reporter\" = 'M0INF')
   AND wd_time >= '2021-01-05T00:00:00Z'
   AND wd_time <  '2021-01-06T00:00:00Z';")
day_40m_send_receive_dat <- dbFetch(day_40m_send_receive)
dbClearResult(day_40m_send_receive)
day_40m_send_receive_dat <- day_40m_send_receive_dat %>%
  mutate(Direction = ifelse(Reporter == "M0INF",
                            "Rx by M0INF",
                            "Tx by M0INF"))
day_40m_send_receive_dat %>%
  ggplot(aes(wd_time, distance, colour = Direction)) +
  geom_point(alpha = 0.5, size = 1) +
  labs(x = "Time", y = "Distance (km)",
       title = "WSPR reports",
       subtitle = "5 Jan 2021, 40m band") +
  scale_y_continuous(trans = "log10") +
  scale_x_datetime(date_breaks = "2 hour", date_labels = "%H") +
  theme(legend.position="bottom")

Track signal strength by reporter over time…

First, look at data where there’s more than one report.

dat_sent_many <- the_dat %>%
  group_by(Reporter) %>%
  summarise(spots = n()) %>%
  arrange(desc(spots)) %>%
  slice_head(n = 7)
## `summarise()` ungrouping output (override with `.groups` argument)
dat_sent_many %>%
  kable()
Reporter spots
GB0SNB/SDR 58
IW2NKE 22
YU/G8ZMF 10
SA2KHG 8
OE1WYC 7
IZ7VHF/RX 6
UA3245SWL 6
the_dat %>%
  mutate(ReportDist = paste0(Reporter, " (", distance, "km)")) %>%
  filter(Reporter %in% unique(dat_sent_many$Reporter)) %>%
  ggplot(aes(wd_time, dB, colour = ReportDist)) +
  geom_smooth(span = 1, se = FALSE) +
  labs(x = "Time", y = "Signal report (dB)",
       title = "WSPR reports (20m) – signal strength",
       colour = "Reporter") +
  scale_x_datetime(date_breaks = "2 hour", date_labels = "%H")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

It’s showing something… the red curve is straightish since it’s not relying on the ionosphere. The others do use ionospheric propagation so vary as conditions change.

Try again for latest 40m data:

one_day_40m <- dbSendQuery(db_con,
  "SELECT * FROM spots
   WHERE \"CallSign\" = 'M0INF'
         AND wd_time >= '2021-01-05T00:00:00Z'
         AND wd_time < '2021-01-06T00:00:00Z';")
dat_40m <- dbFetch(one_day_40m)
dbClearResult(one_day_40m)
dat_sent_many_40 <- dat_40m %>%
  group_by(Reporter) %>%
  summarise(spots = n()) %>%
  arrange(desc(spots)) %>%
  #filter(spots >= 5)
  slice_head(n = 7)
## `summarise()` ungrouping output (override with `.groups` argument)
dat_sent_many_40 %>%
  kable()
Reporter spots
OE9HLH 36
OE9GHV 33
HB9TMC 32
F4GFZ 22
DK8FT/A 21
IW2NKE 21
DK2DB 20
dat_40m %>%
  mutate(ReportDist = paste0(Reporter, " (", distance, "km)")) %>%
  filter(Reporter %in% unique(dat_sent_many_40$Reporter)) %>%
  ggplot(aes(wd_time, dB, colour = ReportDist)) +
    geom_smooth(span = 2, se = FALSE, size = 0.8) +
    geom_point(size = 0.6, alpha = 0.5) +
    labs(x = "Time", y = "dB",
         title = "WSPR reports (40m) – signal strength",
         subtitle = "A selection of reporters on 5 Jan 2021",
         colour = "Reporter") +
    scale_x_datetime(date_breaks = "2 hour", date_labels = "%H")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Last hour sent/received from IO91

Jitter plot

Here are the WSPR frequencies, for labeling the plot in a moment.

wspr_bands <- tibble(
  MHz = c(
    0.136,
    0.4742,
    1.8366,
    3.5686,
    5.2872,
    5364.7,
    7.0386,
    10.1387,
    14.0956,
    18.1046,
    21.0946,
    24.9246,
    28.1246,
    50.293,
    70.091,
    144.489,
    432.300,
    1296.500
  ),
  m = 299.792458 / MHz,
  wavelength_text = ifelse(m >= 1,
                           paste(round(m, 0), "m"),
                           paste(round(m*100, 0), "cm"))
) %>%
  arrange(MHz)

wspr_bands %>% kable()
MHz m wavelength_text
0.1360 2204.3563088 2204 m
0.4742 632.2067862 632 m
1.8366 163.2323086 163 m
3.5686 84.0084229 84 m
5.2872 56.7015543 57 m
7.0386 42.5926261 43 m
10.1387 29.5691221 30 m
14.0956 21.2685134 21 m
18.1046 16.5589109 17 m
21.0946 14.2118105 14 m
24.9246 12.0279747 12 m
28.1246 10.6594390 11 m
50.2930 5.9609182 6 m
70.0910 4.2771891 4 m
144.4890 2.0748462 2 m
432.3000 0.6934824 69 cm
1296.5000 0.2312321 23 cm
5364.7000 0.0558824 6 cm

Grab the data:

last_whispers <- dbSendQuery(db_con,
  "SELECT * FROM spots
   WHERE (\"ReporterGrid\" LIKE 'IO91%'
          OR \"Grid\" LIKE 'IO91%')
   AND wd_time > NOW() - INTERVAL '1 hour';")
the_dat <- dbFetch(last_whispers)
dbClearResult(last_whispers)
nrow(the_dat)
## [1] 3295

Group into WSPR bands. This uses the fuzzyjoin package which matches by distance.

joined_dat <- the_dat %>%
  difference_left_join(wspr_bands,
                       by = "MHz",
                       max_dist = 0.2,
                       distance_col = "dist_band_start") %>%
  mutate(MHz.y = as.factor(MHz.y))

Check how many kHz off the match is (also look for NAs):

summary(joined_dat$dist_band_start*1000)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.373   1.470   1.502   4.524   1.534  79.099

Plot:

joined_dat %>%
  ggplot(aes(distance, MHz.y, colour = MHz.y)) +
  geom_jitter(height = 0.05, alpha = 1/3, size = 1) +
  xlab("Distance (km)") +
  ylab("Freq (MHz)") +
  labs(title = "WSPR spots IO91 (sent or received)",
       subtitle = paste0("From ", min(the_dat$wd_time),
                         " to ", max(the_dat$wd_time))) + 
  theme(legend.position = "none")

Try again, but this time separating the transmissions and receptions in IO91.

joined_dat <- joined_dat %>%
  mutate(direction = ifelse(grepl(
    pattern = "IO91",
    x = Grid,
    ignore.case = TRUE
  ), "Sent from IO91", "Received in IO91"))
joined_dat %>%
  ggplot(aes(distance, MHz.y, colour = direction)) +
  geom_point(position = position_jitterdodge(jitter.width = .1,
                                             jitter.height = 0,
                                             dodge.width = 0.4),
             
             alpha = 1/3, size = 1) +
  xlab("Distance (km)") +
  ylab("Freq (MHz)") +
  labs(title = "WSPR spots IO91 (sent or received)",
       subtitle = paste0("From ", min(the_dat$wd_time),
                         " to ", max(the_dat$wd_time)),
       colour = "Direction")

Polar plot

Let’s do the same again but also showing the direction, for transmissions from IO91 only.

tx_only <- joined_dat %>%
  filter(grepl(pattern = "IO91", x = Grid, ignore.case = TRUE))
linear_scale_polar <- tx_only %>%
  ggplot(aes(azimuth, distance, colour = MHz.y)) +
  geom_point(alpha = 0.5) +
  labs(
    title = "WSPR spots IO91",
    subtitle = paste0("From ", min(the_dat$wd_time),
                      " to ", max(the_dat$wd_time)),
    colour = "Freq (MHz)",
    x = NULL,
    y = "Distance"
  ) +
  scale_x_continuous(
    limits = c(0, 360),
    breaks = seq(0, 315, by = 45),
    minor_breaks = seq(0, 360, by = 15)
  ) +
  coord_polar()

linear_scale_polar

linear_scale_polar +
  scale_y_continuous(trans = "log10")

tx_only %>%
  filter(distance > 8000) %>%
  select(CallSign, Reporter, distance, azimuth, MHz.x) %>%
  kable()
CallSign Reporter distance azimuth MHz.x
G8MCD VK2COW 16963 64 14.09703
G8MCD VK2COW 16963 64 14.09703
G1CPC VK5EI 16296 76 14.09703
G1CPC VK2COW 17000 64 14.09707
G1CPC VK3KHZ 16965 73 14.09707
G8MCD VK5EI 16259 76 14.09700
G8MCD VK2COW 16963 64 14.09703
G4LRP PT2FHC 8731 226 14.09706
G8MCD VK3KHZ 16928 73 14.09703
G0OQK VK4BAP 15745 49 10.14018
G8MCD VK2COW 16963 64 14.09703
G8MCD VK3KHZ 16928 73 14.09703
G1CPC VK5EI 16296 76 14.09704
G1CPC VK2COW 17000 64 14.09707
G1CPC VK3KHZ 16965 73 14.09707
G8MCD VK2COW 16963 64 14.09703
G8MCD VK2COW 16963 64 14.09703
G1CPC VK2COW 17000 64 14.09707
G1CPC VK3KHZ 16965 73 14.09707
G8MCD VK2COW 16963 64 14.09703
G4LRP VK2COW 17004 66 14.09715
G8MCD VK2COW 16963 64 14.09703
G1CPC VK2COW 17000 64 14.09707
G8MCD VK2COW 16963 64 14.09703
G8MCD VK2COW 16963 64 14.09703
G1CPC VK2COW 17000 64 14.09707
G8MCD VK2COW 16963 64 14.09703
G4LRP VK2COW 17004 66 14.09715

A bigger one - 48 hours of spots on all frequencies to/from IO91

IO91_48hrs <- dbSendQuery(db_con,
  "SELECT * FROM spots
   WHERE (\"ReporterGrid\" LIKE 'IO91%'
          OR \"Grid\" LIKE 'IO91%')
         AND wd_time >= '2021-01-05T00:00:00Z'
         AND wd_time < '2021-01-07T00:00:00Z';")
IO91_48hrs_dat <- dbFetch(IO91_48hrs)
dbClearResult(IO91_48hrs)
nrow(IO91_48hrs_dat)
## [1] 189067

There were 189067 rows of data.

IO91_48hrs_dat_bands <- IO91_48hrs_dat %>%
  difference_left_join(wspr_bands,
                       by = "MHz",
                       max_dist = 0.1,
                       distance_col = "dist_band_start") %>%
  mutate(
    MHz.y = as.factor(MHz.y),
    direction = ifelse(
      grepl(
        pattern = "IO91",
        x = Grid,
        ignore.case = TRUE
      ),
      "From IO91",
      "To IO91"
    )
  )

Here are spots which couldn’t be classified in a band:

IO91_48hrs_dat_bands %>%
  filter(is.na(MHz.y)) %>%
  select(CallSign, Reporter, MHz.x, MHz.y, dist_band_start) %>%
  datatable(options = dt_options)
summary(IO91_48hrs_dat_bands$dist_band_start*1000)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   1.362   1.456   1.505   2.619   1.555  79.092       2
IO91_48hrs_dat_bands %>%
  filter(!is.na(MHz.y)) %>%
  ggplot(aes(x = distance, fill = MHz.y)) +
  geom_histogram(aes(y=..density..), binwidth = 500, position = "identity", alpha = 0.5) +
  facet_grid(rows = vars(direction)) +
  labs(x = "Distance (km)", y = "Mass", fill = "Freq (MHz)")

date_format <- "%d %b %Y (%H:%m)"
IO91_48hrs_dat_bands %>%
  filter(!is.na(MHz.y)) %>%
  ggplot(aes(x = distance, fill = MHz.y)) +
  geom_histogram(
    #aes(y = ..density..),
    binwidth = 200,
    position = "identity",
  ) +
  facet_grid(cols = vars(direction), rows = vars(MHz.y)) +
  labs(
    x = "Distance (km)",
    y = "Mass",
    fill = "Freq (MHz)",
    title = "WSPR reports to/from IO91 by band",
    subtitle = paste0(format(
      min(IO91_48hrs_dat_bands$wd_time), date_format
    ), " to ",
    format(
      max(IO91_48hrs_dat_bands$wd_time), date_format
    ))
  ) +
  theme(legend.position = "none")

Polar plots

IO91_48hrs_dat_bands %>%
  filter(!is.na(MHz.y) & direction == "From IO91") %>%
  ggplot(aes(azimuth, y = distance)) +
  geom_point() +
  facet_grid(rows = vars(MHz.y)) +
  labs(
    x = "Distance (km)",
    y = "Mass",
    fill = "Freq (MHz)",
    title = "WSPR reports from IO91 by band",
    subtitle = paste0(format(
      min(IO91_48hrs_dat_bands$wd_time), date_format
    ), " to ",
    format(
      max(IO91_48hrs_dat_bands$wd_time), date_format
    ))
  ) +
  scale_x_continuous(
    limits = c(0, 360),
    breaks = seq(0, 315, by = 45),
    minor_breaks = seq(0, 360, by = 15)
  ) +
  coord_polar()

Plot on maps

This needs to be tweaked but is a start. The 4326 magic number refers to the coordinate reference system. This Wikipedia article is as good a place as any to start… The st_as_sf function transforms longitude and lattitude coordinates to a system that the map plot can use.

IO91_48hrs_dat_bands_sf <- IO91_48hrs_dat_bands %>%
  st_as_sf(coords = c("wd_rx_lon", "wd_rx_lat"),
           crs = 4326)

The following steps takes up to two minutes on my laptop, so I have cached the result in the markdown. Timing code added here to check that it worked.

t1 <- Sys.time()
IO91_48hrs_dat_bands_sf %>%
  filter(!is.na(MHz.y)) %>%
  ggplot() +
    geom_sf(data = World) +
    geom_sf(aes(colour = MHz.y)) +
    facet_grid(rows = vars(MHz.y)) +
    theme_classic() +
    labs(
      colour = "Freq (MHz)",
      title = "WSPR reports from IO91 by band",
      subtitle = paste0(format(
        min(IO91_48hrs_dat_bands$wd_time), date_format
      ), " to ",
      format(
        max(IO91_48hrs_dat_bands$wd_time), date_format
      ))
    ) +
    theme(legend.position = "none")

t2 <- Sys.time()
t2 - t1
## Time difference of 0.109705 secs

If the caching worked then that timer should be a few seconds.

Try fitting a mixture model

This section seemed like a good idea at the time…

library(mixtools)
## mixtools package, version 1.2.0, Released 2020-02-05
## This package is based upon work supported by the National Science Foundation under Grant No. SES-0518772.

First summarise the data to work out which band to play with:

IO91_48hrs_dat_bands %>%
  filter(direction == "From IO91") %>%
  group_by(MHz.y) %>%
  summarise(spots = n(),
            dist_p25 = quantile(distance,p=.25),
            dist_p75 = quantile(distance,p=.75)) %>%
  kable()
## `summarise()` ungrouping output (override with `.groups` argument)
MHz.y spots dist_p25 dist_p75
0.136 1 646.00 646.00
0.4742 25309 283.00 887.00
1.8366 8613 347.00 835.00
3.5686 1730 307.00 784.50
5.2872 6071 410.00 922.00
7.0386 39909 689.00 1510.00
10.1387 12591 891.00 2127.00
14.0956 13805 268.00 2966.00
18.1046 29 2819.00 5255.00
21.0946 111 18.00 18.00
24.9246 88 18.00 18.00
28.1246 43 18.00 18.00
50.293 4 7.00 15.75
NA 2 3684.75 5014.25

I’m going to go for the 20m band since there are loads of observations and the signals carry some distance. (I should have gone for unique reports to count, but onwards…)

dat_for_clust <- IO91_48hrs_dat_bands %>%
  filter(MHz.y == "14.0956" & direction == "From IO91")
nrow(dat_for_clust)
## [1] 13805

This is a bit silly since to get it work I have had to provide a lot of clues based on visual inspection… I was going to constrain the means of clusters to be multiples of the second to try to model skip. It didn’t work, but I have left the code in commented out.

cluster_n = 6
start_means <- c(0, 1500, 6000, 10000, 12000, 17000)
#means <- c(NA, paste0(1:(cluster_n-1), "n"))
sds   <- c(NA, rep("b", cluster_n-1))


clust_res <- normalmixEM(dat_for_clust$distance,
                         mu = start_means,
                         #mean.constr = means,
                         sd.constr = sds,
                         k = cluster_n)
## number of iterations= 12
summary(clust_res)
## summary of normalmixEM object:
##          comp 1      comp 2      comp 3      comp 4      comp 5      comp 6
## lambda  0.23763    0.545196    0.194094 1.44116e-02 8.59046e-04 7.80888e-03
## mu     27.82031 1805.103140 5900.153689 9.17730e+03 1.16025e+04 1.37087e+04
## sigma  19.64022  690.233625  690.233625 6.90234e+02 6.90234e+02 6.90234e+02
## loglik at estimate:  -113114.3
plot(clust_res, which = 2, breaks = 100)

clusters <- clust_res$posterior %>%
  as.data.frame() %>%
  rowwise() %>%
  mutate(cluster = which.max(c_across()))

dat_for_clust$cluster <- ordered(clusters$cluster)

dat_for_clust %>%
  ggplot(aes(x = distance, fill = cluster)) +
  geom_histogram(alpha = 0.5,
                 position = "identity",
                 binwidth = 500) +
  labs(x = "Distance (km)",
       y = "Count",
       fill = "Cluster") +
  theme(legend.position = "bottom")

clust_means <- dat_for_clust %>%
  group_by(cluster) %>%
  summarise(clust_mean_dist = mean(distance))
## `summarise()` ungrouping output (override with `.groups` argument)
dat_for_clust_sf <- dat_for_clust %>%
  left_join(clust_means) %>%
  st_as_sf(coords = c("wd_rx_lon", "wd_rx_lat"),
           crs = 4326)
## Joining, by = "cluster"
dat_for_clust_sf %>%
  ggplot() +
    geom_sf(data = World) +
    geom_sf(aes(colour = cluster), size = 1) +
    theme_classic() +
    labs(
      colour = "Cluster",
      title = "WSPR reports from IO91 (clusters)",
      subtitle = paste0(format(
        min(IO91_48hrs_dat_bands$wd_time), date_format
      ), " to ",
      format(
        max(IO91_48hrs_dat_bands$wd_time), date_format
      ))
    ) +
  theme(legend.position = "bottom") +
  guides(colour = guide_legend(override.aes = list(size=3)))

dat_for_clust_sf %>%
  ggplot(aes(x = wd_time, y = dB, colour = ordered(round(clust_mean_dist,0)))) +
  #geom_smooth(span = 3, se = FALSE, size = 2) 
  geom_jitter(size = 1, alpha = 0.3) +
  labs(colour = "Mean cluster dist (km)", x = NULL) +
  theme(legend.position = "bottom") +
  guides(colour = guide_legend(override.aes = list(size=3)))

db plots again - the easy way…

Here’s the maximum distance half-way around earth (measured around the equator), to have a sense of how far signals can travel by shortest path…

half_earth_circle <- 40075/2
half_earth_circle
## [1] 20037.5

And actual distances:

IO91_48hrs_dat_bands %>%
  ggplot(aes(x=distance+1)) +
  geom_histogram(bins = 500) +
  scale_x_continuous(trans = "log10") +
  xlab("log(km + 1)")

Discretise the distances:

IO91_48hrs_dat_bands <- IO91_48hrs_dat_bands %>%
  mutate(distance_band = cut(
    distance,
    right = FALSE,
    breaks = c(0, 10 ^ (1:4), 2e5),
    labels = c("[0, 10)",
               "[10, 100)",
               "[100, 1000)",
               "[1000, 10000)",
               "10000+")
  ))

IO91_48hrs_dat_bands %>%
  group_by(distance_band) %>%
  summarise(mean = mean(distance),
          min  = min(distance),
          max  = max(distance))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 5 x 4
##   distance_band     mean   min   max
##   <fct>            <dbl> <int> <int>
## 1 [0, 10)           7.42     0     9
## 2 [10, 100)        52.6     11    99
## 3 [100, 1000)     610.     100   999
## 4 [1000, 10000)  2703.    1000  9791
## 5 10000+        14851.   10552 18887

Now select a few bands to plot.

IO91_48hrs_dat_bands$MHz.y %>%
  levels
##  [1] "0.136"   "0.4742"  "1.8366"  "3.5686"  "5.2872"  "7.0386"  "10.1387"
##  [8] "14.0956" "18.1046" "21.0946" "24.9246" "28.1246" "50.293"
library(viridis)
## Loading required package: viridisLite
IO91_48hrs_dat_bands %>%
  filter(MHz.y %in% c("7.0386", "10.1387", "14.0956")) %>%
  filter(direction == "From IO91") %>%
  ggplot(aes(
    x = wd_time,
    y = dB,
    colour = ordered(distance_band)
  )) +
  geom_jitter(size = .5, alpha = 0.3) +
  labs(colour = "Distance (km)",
       x = NULL,
       y = "Signal strength (dB)",
       title = "Reports of WSPR signals sent from IO91") +
  theme(legend.position = "bottom") +
  guides(colour = guide_legend(override.aes = list(size = 7))) +
  facet_wrap(vars(MHz.y), ncol = 1) +
  scale_colour_viridis(option = "magma", discrete = T)

Modelling

library(lme4)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
IO91_48hrs_dat_bands$wd_time[1]
## [1] "2021-01-06 23:58:00 GMT"
IO91_48hrs_dat_bands <- IO91_48hrs_dat_bands %>%
  mutate(
    date = format(IO91_48hrs_dat_bands$wd_time, "%F"),
    hour = format(IO91_48hrs_dat_bands$wd_time, "%H"),
    hour_num = as.numeric(hour),
    dist_far = cut(distance,
                   c(0,100,500,1000,25000),
                   labels = c("[0,100) km",
                              "[100,500) km",
                              "[500,1000) km",
                              "1000+ km"),
                   right = FALSE),
    daylight = factor(hour_num >= 8 &
                        hour_num <= 16,
                      labels = c("Light", "Dark")),
  )

dat_for_mod <- IO91_48hrs_dat_bands %>%
  mutate(the_freq = factor(round(as.numeric(as.character((MHz.y)))))) %>%
  filter(MHz.y %in% c("7.0386", "10.1387", "14.0956")) %>%
  filter(direction == "From IO91")
IO91_48hrs_dat_bands %>%
  group_by(dist_far) %>%
  tally()
## # A tibble: 4 x 2
##   dist_far          n
##   <fct>         <int>
## 1 [0,100) km    11862
## 2 [100,500) km  34492
## 3 [500,1000) km 71300
## 4 1000+ km      71413
db_mod <- lmer(dB ~ date + daylight *
                 dist_far * the_freq +
                 (1 | CallSign) +
                 (1 | Reporter),
               data = dat_for_mod)
db_mod_preds <- ggeffect(db_mod,
         terms = c("the_freq", "daylight", "dist_far"),
         type = "fixed")
db_mod_preds %>%
  plot() +
  labs(x = "Band (MHz)", colour = "Daylight") +
  theme_grey() +
  facet_wrap(vars(facet), ncol = 2) +
  theme(legend.position = "bottom")