Wavelet analysis

Data processing

Load data

Load measles epicurve data for SEA countries
suppressMessages(library(tidyverse))
library(janitor)
library(plotly)
library(patchwork)

world_epi_curve <- readxl::read_excel(
  "data/404-table-web-epi-curve-data.xlsx",
  sheet = "WEB")

# --- preprocess ----- 
world_epi_curve <- world_epi_curve %>% 
  clean_names() %>% 
  mutate(
    date = ym(str_c(year, month)),
    measles_clinical = as.integer(measles_clinical)
  ) 

filtered_countries <- c(
  "Viet Nam", "Thailand", "Cambodia", "Malaysia", "Singapore", "Myanmar", "Philippines","Brunei Darussalam", "Timor-Leste", "Indonesia",
  "Lao People's Democratic Republic")

# get epi curve of South East Asia countries
sea_epi_curve <- world_epi_curve %>% 
  clean_names() %>% 
  mutate(
    date = ym(str_c(year, month)),
    measles_suspect = as.integer(measles_suspect),
    measles_clinical = as.integer(measles_clinical),
    measles_epi_linked = as.integer(measles_epi_linked),
    measles_lab_confirmed = as.integer(measles_lab_confirmed),
    # NOTE: recompute measles total since some epi-linked cases are negative (e.g. record for Indonesia on 2016-10-01)
    # definition: t sum of clinically-compatible, epidemiologically linked and laboratory-confirmed cases
    measles_total = abs(measles_clinical) + abs(measles_epi_linked) + abs(measles_lab_confirmed)
  ) %>% 
  filter(
    country %in% filtered_countries
  ) 

Clean data

  • The column used for incidence data is measles total (sum of clinical compatible cases, epi-linked cases, laboratory confirmed cases)

  • Make sure the the time series is even-spaced

  • Check % of missing data from each of the 11 SEA countries

Generate evenly spaced time series for all countries
date_range <- range(sea_epi_curve$date)
sea_epi_curve <- sea_epi_curve %>% 
  group_by(country) %>%
  rename(
    incidence = measles_total
  ) %>% 
  complete(
    date = seq(date_range[1], date_range[2], by = "month"),
    fill = list(
      incidence = 0
    )
  ) %>% 
  ungroup()

View the percentage of data points where incidence==0

Get country with most incidence report being 0
percent_zero <- sea_epi_curve %>% 
  group_by(country) %>% 
  summarize(
    percent_zero = sum(incidence == 0)/n()
  ) %>% 
  arrange(
    desc(percent_zero)
  ) 

# filtered out countries where percent of data points where incidence = 0 > 50%
high_pct_zero <- percent_zero %>% filter(percent_zero > 0.5) %>% pull(country)

Plot incidence

Plor raw incidence
sea_epicurve_plot <- sea_epi_curve %>% 
  filter(!(country %in% high_pct_zero)) %>% 
  ggplot() +
  geom_line(
    aes(
      x = date,
      y = incidence,
      color = country
    )
  ) 

ggplotly(sea_epicurve_plot)

Preprocess data

Follow these steps

  • Log transform

  • Normalized

  • Remove latest observation of each country (most likely under-reported)

Preprocess data
filtered_sea_epicurve <- sea_epi_curve %>% 
  filter(!(country %in% high_pct_zero)) 

log_transformed <- filtered_sea_epicurve %>% 
  group_by(country) %>% 
  mutate(
    # log transform
    incidence = log10(incidence + 1),
    # normalized
    incidence = (incidence - mean(incidence))/sd(incidence)
  ) %>% 
  filter(
    date != max(date)
  ) %>% 
  ungroup()
Plot preprocessed data
log_epicurve_plot <- log_transformed %>% 
  ggplot() +
  geom_line(
    aes(
      x = date,
      y = incidence,
      color = country
    )
  ) 

ggplotly(log_epicurve_plot)

Wavelet analysis

library(WaveletComp)

Follow these steps of wavelet analysis

  • Inspect the power spectrum across time-period domain: identify the period that is consistently high over time, which indicates recurring cycles in the time series.

  • Inspect the global power (time-averaged power, analogue of the Fourier transform): identify the overall dominant periodicities.

  • Inspect the period-specific reconstructed component: extract period-specific oscillations to assess their contributions to the time series.

  • Inspect the period-specific phase: visualize the timing of cycles of different time series.

General setup

  • 1 time unit is 1 year. Since the data is collected every month, dt = 1/12

  • The analyzed range of period is [1/6, 8] (i.e., 2 months to 8 years)

Analyze on Vietnam data only

1. Check power spectrum across time-period

  • Consistently high power at period=5 suggests a cycle of major epidemic every 5-years, which can be observed from indicence data.

  • The plot also suggests (roughly) biennial cycle.

  • Annual cycle (seasonality) is only weakly detected before 2017 (maybe due to under-reporting).

Code
capture.output(
  wavelet <- log_transformed %>% 
    filter(
      country == "Viet Nam"
    ) %>% 
    analyze.wavelet(
      my.series = "incidence",
      dt = 1/12,
      lowerPeriod = 1/6,
      upperPeriod = 8,
      verbose = FALSE
    )
) %>% invisible()

wt.image(wavelet, 
         # n.levels = 250, 
         legend.params = list(lab = "Power"),
         periodlab = "Period(Year)",
         show.date=TRUE)

2. View global power (i.e., mean power across time)

wt.avg(wavelet)

3. Check the period specific reconstructed component

reconstructed component for 5-year period

Code
reconstruct(
  wavelet,
  sel.period = 5,
  plot.rec = TRUE,
  show.date = TRUE,
  verbose = FALSE
)

Reconstructed component for 2-year period (biennial)

Code
reconstruct(
  wavelet,
  sel.period = 2,
  plot.rec = TRUE,
  show.date = TRUE,
  verbose = FALSE
)

Reconstructed component for 1-year period (seasonal)

Code
reconstruct(
  wavelet,
  sel.period = 1,
  plot.rec = TRUE,
  show.date = TRUE,
  verbose = FALSE
)

4. Visualize phase angle (in radians)

Plot phase
# get the column for the 5-year period
row_5year <- which.min(abs(wavelet$Period - 5))

data.frame(
  date = wavelet$series$date,
  phase = wavelet$Phase[row_5year,]
) %>% 
  ggplot() +
  geom_line(
    aes(
      x = date,
      y = phase
    ), color = "cornflowerblue"
  )

Plot phase
# get the column for the 2-year period
row_2year <- which.min(abs(wavelet$Period - 2))

data.frame(
  date = wavelet$series$date,
  phase = wavelet$Phase[row_2year,]
) %>% 
  ggplot() +
  geom_line(
    aes(
      x = date,
      y = phase
    ), color = "cornflowerblue"
  )

Perform analysis on all data

Exclusion criteria:

  • Percentage of records where incidence = 0 greater than 50% (Brunei, Timer-Leste, Laos, Cambodia)

  • Low case count across time series (Singapore)

Perform wavelet data analysis on SEA countries
analyze_countries <- c("Thailand", "Myanmar", "Philippines", "Viet Nam", 
                       "Malaysia", "Indonesia")

countries_wavelets <- log_transformed %>% 
  filter(country %in% analyze_countries) %>%
  group_by(country) %>% 
  nest() %>% 
  mutate(
    wavelet = map(data, \(dat){
      capture.output(
        out <- dat %>% 
        analyze.wavelet(
          my.series = "incidence",
          dt = 1/12, # 1 time unit = 1 year 
          lowerPeriod = 1/6, # 2 months
          upperPeriod = 8, #  years
          verbose = FALSE
        )
      ) %>% invisible()
        
      out
    })
  ) %>% 
  select(country, wavelet) %>%
  deframe()

Power spectrum

These plots suggest that most analyzed SEA countries experience measles epidemic every 4-5 years

Function to plot power spectrum of all countries
plot_power <- function(wavelets){
  # return heatmap of power and also global power
  imap(wavelets, \(wavelet, country){
    wt.image(
        wavelet,
        main = paste0(country, " power spectrum"),
        legend.params = list(lab = "Power"),
        periodlab = "Period(Year)",
        show.date = TRUE
      )
    spectrum <- recordPlot()
    
    wt.avg(
        wavelet,
        main = paste0(country, " global power")
      )
    global_power <-  recordPlot()
    
    list(
      "spectrum" = spectrum,
      "global_power" = global_power
    )
  })
}
plot_power(countries_wavelets)

$Indonesia
$Indonesia$spectrum

$Indonesia$global_power


$Malaysia
$Malaysia$spectrum

$Malaysia$global_power


$Myanmar
$Myanmar$spectrum

$Myanmar$global_power


$Philippines
$Philippines$spectrum

$Philippines$global_power


$Thailand
$Thailand$spectrum

$Thailand$global_power


$`Viet Nam`
$`Viet Nam`$spectrum

$`Viet Nam`$global_power

Reconstructed component

Function to plot period specific component
reconstruct_ts <- function(wavelets, period = 5){
  df <- map2_dfr(wavelets, names(wavelets), \(wavelet, country){
    rec_period <- reconstruct(
      wavelet,
      sel.period = period,
      plot.rec = FALSE,
      verbose = FALSE
    )
    
    rec_period$series %>%
      mutate(country = country, .before = 1)
  })
  
  plot <- df %>% 
    ggplot() +
      geom_line(
        aes(x = date, y = incidence.r, color = country)
      ) +
      labs(
        y = paste0(round(mean(period), digits = 2), "-year period component"),
        x = "Date"
      )
  
  # return data.frame and plot
  list(
    df = df,
    plot = plot
  )
}

Take the average of the components of period ranging from 4-5.

Clear oscillations can be observed across all the countries, which further confirm the observations from previous section.

ggplotly(
  reconstruct_ts(countries_wavelets, period = c(4,5))$plot
)

Roughly biennial component. Take the average of the components of period ranging from 1.9-2.5.

Relatively consistent oscillation can be observed for Vietnam and Thailand which suggests biennial cycle exists for these 2 countries.

ggplotly(
  reconstruct_ts(countries_wavelets, period = c(1.9, 2.5))$plot
)

The plot suggests no SEA country exhibits consistent annual measles cycles

ggplotly(
  reconstruct_ts(countries_wavelets, period = 1)$plot
)

Phase

Function to plot period-specific phase
compute_phase <- function(wavelets, period = 5){
  df <- map2_dfr(wavelets, names(wavelets), \(wavelet, country){
    
    # if only one period is given -> select the closest period
    # else select all period within the range
    row_period <- if(length(period) == 1) which.min(abs(wavelet$Period - period)) else
      which( (wavelet$Period >= min(period)) & (wavelet$Period <= max(period)) )
    
    data.frame(
      date = wavelet$series$date,
      # compute mean phase across all selected periods
      phase = if(length(row_period) > 1) colMeans(wavelet$Phase[row_period,]) else wavelet$Phase[row_period,]
    ) %>% 
      mutate(
        country = country,
        .before = 1
      )
  })
  
  plot <- df %>% 
    ggplot() +
      geom_line(
        aes(x = date, y = phase, color = country)
      ) +
      labs(
        y = "Phase",
        x = "Date"
      )
  
  # return data.frame and plot
  list(
    df = df,
    plot = plot
  )
}

Major epidemics cycle (roughly 5 years)

countries_phase <- compute_phase(countries_wavelets, period = 5)

ggplotly(countries_phase$plot)

Roughly biennial epidemic

countries_phase <- compute_phase(countries_wavelets, period = 2.5)
ggplotly(countries_phase$plot)

Biwavelet analysis

Follow these steps of cross-wavelet analysis:

  • Inspect the cross-wavelet power spectrum: identify the period that consistently recurring for both pair of time series.

  • Inspect the coherence spectrum: identify the period where the pair of time series consistently correlates.

  • Inspect the period-specific phase difference: check the relative timing of the 2 time series (which series lead the other).

  • Compute and inspect the period-specific synchrony: check how closely the 2 time series align.

Function to generate biwavelet analyses for multiple pairs
library(janitor)

# Function that takes data instead of wavelets
# wavelets, period = 5, ref_country = "Viet Nam", constraint = TRUE, diff_to_time = TRUE
# when ref_country is not found, generate all unique pairs 
# when repeated = TRUE, included permutations of pairs intead of combinations
generate_biwavelet_analysis <- function(data, 
                                        ref_country = "Viet Nam",
                                        repeated = FALSE,
                                        verbose = FALSE){
  countries <- unique(data$country)
  
  # generate all unique pairs
  pairs <- if(!repeated) combn(countries, 2, simplify = FALSE) else
    expand.grid(countries, countries) %>% filter(Var1 != Var2) %>% 
    apply(1, \(row){as.character(row)}, simplify = FALSE)
  
  # if ref country is provided, keep pairs with the ref_country
  # also make sure ref country is the first time series
  if (!is.na(ref_country)){
    pairs <- keep(pairs, \(p) ref_country %in% p)
    pairs <- map(pairs, \(p) {
      if (p[1] == ref_country) p else rev(p)
    })
  }
  
  coherence <- map(pairs, \(curr_pair){
    capture.output(
      out <- data %>% 
      filter(
        country %in% curr_pair
      ) %>% 
      select(country, date, incidence) %>% 
      pivot_wider(
        names_from = country,
        values_from = incidence
      ) %>%
      as.data.frame() %>% 
      analyze.coherency(
        my.pair = curr_pair,
        dt = 1/12,
        lowerPeriod = 1/6,
        upperPeriod = 8,
        verbose = verbose
      )
    ) %>% invisible()
    
    out
  })
  
  names(coherence) <- map_chr(pairs, \(pair) paste(pair, collapse = " - "))

  coherence
}
Function to generate cross-wavelet power spectrum plot
plot_cwavelet_power <- function(coherence_list){
  # return heatmap of power and also global power
  imap(coherence_list, \(coherence, label){
    wc.image(
        coherence,
        # set significance level for drawing contour (white line around an area)
        siglvl.contour = 0.1, 
        # set significance level for drawing arrows
        siglvl.arrow = 0.05, which.arrow.sig = "wt",
        main = paste0(label, " power spectrum"),
        legend.params = list(lab = "cross-wavelet power"),
        periodlab = "Period(Year)",
        show.date = TRUE
      )
    
    spectrum <- recordPlot()
    
    wc.avg(
        coherence,
        main = paste0(label, " global power")
      )
    
    global_power <- recordPlot()
    
    list(
      "spectrum" = spectrum,
      "global_power" = global_power
    )
  })
}
capture.output(
  countries_coherence <- log_transformed %>% 
    filter(country %in% analyze_countries) %>% 
    generate_biwavelet_analysis()
) %>% invisible()

Cross-wavelet power spectrum

Interpretation of the arrows:

  • horizontal arrows pointing to the right indicate in-phase

  • horizontal arrows pointing to the left indicate anti-phase

Some notable observations at the 4-5 period (major epidemics cycle):

  • the measles cycle of Vietnam - Philippines started out in-phase but slowly turns out of phase at the end

  • the measles cycle of Vietnam - Thailand started out out of phase phase but slowly out turns in-phase (around the 2021 year mark)

plot_cwavelet_power(countries_coherence) %>% invisible()

Coherence

Plot coherence over time-period

Function to plot coherence
plot_coherence <- function(coherence_list){
  out <- map2(names(coherence_list), coherence_list, \(label, analysis){
      wc.image(
        analysis,
        which.image = "wc",
        # set significance level for drawing contour (white line around an area)
        siglvl.contour = 0.1, 
        # set significance level for drawing arrows
        siglvl.arrow = 0.05, which.arrow.sig = "wt",
        main = paste0(label, " coherence plot"),
        legend.params = list(lab = "wavelet coherence level"),
        periodlab = "Period(Year)",
        show.date = TRUE
      )
  })
  
  names(out) <- names(coherence_list)
  out
}
wc_plots <- plot_coherence(countries_coherence)

Plot average coherence at each period across time

Function to plot average coherence
plot_avg_coherence <- function(coherence_list){
  map2(names(coherence_list), coherence_list, \(label, analysis){
    # compute data for plotting
    df <- data.frame(
      period = analysis$Period,
      avg_coherence = analysis$Coherence.avg,
      p_val = analysis$Coherence.avg.pval
    ) %>% 
      mutate(
        p_val_label = case_when(
          p_val <= 0.05 ~ "<= 0.05",
            p_val <= 0.1 ~ "<= 0.1",
            .default = ">0.1"
        )
      )
    
    ggplot() +
      geom_line(
        aes(
          x = period, y = avg_coherence
        ),
        data = df
      ) +
      geom_point(
        aes(
          x = period, y = avg_coherence, color = p_val_label
        ), 
        data = df %>% filter(p_val_label != ">0.1")
      ) + 
      labs(
        title = paste0("Average coherence of ", label),
        x = "Period",
        y = "Coherence",
        color = "p-value"
      )
  })
}
plot_avg_coherence(countries_coherence)
[[1]]


[[2]]


[[3]]


[[4]]


[[5]]

Phase difference

Function to compute phase difference
# alternative way to get phase difference
compute_phase_diff <- function(coherence_list, period = 5,
                                   diff_to_time = TRUE,
                                   time_unit = "year"){
  df <- map2_dfr(coherence_list, names(coherence_list), \(coherence, label) {
    # if only one period is given -> select the closest period
    # else select all period within the range
    row_period <- if (length(period) == 1)
      which.min(abs(coherence$Period - period)) else
      which((coherence$Period >= min(period)) &
              (coherence$Period <= max(period)))
    
    data.frame(
      date = coherence$series$date,
      phase_diff = if (length(row_period) > 1)
        colMeans(coherence$Angle[row_period, ]) else
        coherence$Angle[row_period, ]
    ) %>% 
      mutate(
        phase_diff = if(diff_to_time) phase_diff*mean(period)/(2*pi) else phase_diff,
        label = label
      )
  })
  
  plot <- df %>% 
    ggplot() +
    geom_line(
      aes(x = date, y = phase_diff, color = label)
    ) +
    labs(
      x = "Date",
      y = paste0("Phase difference (", if(diff_to_time) time_unit else "radian",")" )
    ) +
    geom_hline(
      aes(yintercept = 0), linetype = "dashed", color = "black"
    )
  
  list(
    df = df, 
    plot = plot
  )
}
compute_phase_diff(countries_coherence, period = 5)$plot

compute_phase_diff(countries_coherence, period = 2.5, diff_to_time = TRUE)$plot

Synchrony

Synchrony is computed using the following formula

\[ \text{coherence} \cdot |1 - \frac{ \theta_{diff}}{\pi}| \]

Where \(\theta_{diff}\) is the phase difference (in radians)

Such that high coherence with small phase difference is closer to 1 (i.e. more closely aligned).

Function to compute synchrony
# Methodology proposed in the paper 10.1126/scitranslmed.adq4326
# Synchrony closer to 1 the better
compute_synchrony <- function(coherence_list, period = 5){
  df <- map2_dfr(coherence_list, names(coherence_list), \(coherence, label) {
    # if only one period is given -> select the closest period
    # else select all period within the range
    row_period <- if (length(period) == 1)
      which.min(abs(coherence$Period - period)) else
      which((coherence$Period >= min(period)) &
              (coherence$Period <= max(period)))
    
    data.frame(
      date = coherence$series$date,
      phase_diff = if (length(row_period) > 1)
        colMeans(coherence$Angle[row_period, ]) else
        coherence$Angle[row_period, ],
      coherence = if (length(row_period) > 1)
        colMeans(coherence$Coherence[row_period, ]) else
        coherence$Coherence[row_period, ]
    ) %>% 
      mutate(
        synchrony = abs(1 - phase_diff/pi) * coherence,
        label = label
      )
  })
  
  plot <- df %>% 
    ggplot() +
    geom_line(
      aes(x = date, y = synchrony, color = label)
    ) +
    labs(
      x = "Date",
      y = "Synchrony"
    ) +
    geom_hline(
      aes(yintercept = 1), linetype = "dashed", color = "black"
    )
  
  list(
    df = df, 
    plot = plot
  )
}

5-year period synchrony

multiannual_synch <- compute_synchrony(countries_coherence, period = 5)
multiannual_synch$plot

Roughly biennial synchrony

biennial_synch <- compute_synchrony(countries_coherence, period = 2.5)
biennial_synch$plot

SEA map

Get map data of SEA countries
library(geodata)
library(countrycode)
library(sf)
library(CoordinateCleaner)

# convert contry name as ISO vec
sea_iso <- countrycode(filtered_countries, origin = "country.name", destination = "iso3c")
analyzed_iso <- countrycode(names(countries_wavelets), origin = "country.name", destination = "iso3c")

# get gadm map 
map_data <- map_dfr(sea_iso, \(iso){
  data <- gadm(iso, level = 0, path = "data/gadm/")
  
  data.frame(
    iso = iso,
    sf = st_as_sf(data),
    status = ifelse(iso %in% analyzed_iso, "analyzed", "excluded")
  )
})

Get SEA map

generate SEA map
sea_plot <- map_data %>% 
  mutate(
    country = if_else(status == "analyzed", 
                      countrycode(iso, origin = "iso3c", destination = "country.name"), 
                      NA)
  ) %>% 
  ggplot() +
    geom_sf(aes(geometry = sf.geometry, fill = country)) +
    scale_fill_discrete(
      na.value = "grey80",
      labels = \(x) if_else(is.na(x), "Excluded", x)
    )

Prepare data for countries pairs

Generate data to plot arrow
# expected input to be the output from function compute_synchrony
prepare_plot_data <- function(synchrony_out, centroid_df = iso_centroid){
  synchrony_out$df %>% 
    # ---- compute mean coherence-weighted phase difference
    group_by(label) %>% 
    summarize(
      avg_phase_diff = sum(coherence * phase_diff)/sum(coherence)
    ) %>% 
    separate(label, into = c("from", "to"), sep = " - ") %>% 
    mutate(
      from = countrycode(from, origin = "country.name", destination = "iso3c"),
      to = countrycode(to, origin = "country.name", destination = "iso3c")
    ) %>% 
    # ---- swap from and to countries if the avg_phase_diff is negative (i.e. the "to" is actually the leading time series)
    mutate(
      tmp = if_else(avg_phase_diff < 0, from, to),
      from = if_else(avg_phase_diff < 0, to, from),
      to = tmp,
      # since we already use the sign for determine the direction, keep absolute value only
      avg_phase_diff = abs(avg_phase_diff)
    ) %>% select(-tmp) %>% 
    # ---- get the coordinate of the centroid of each country
    left_join(
      # get coordinate of centroid of origin country 
      centroid_df %>% rename(from_x = x, from_y = y),
      by = join_by(from == iso)
    ) %>% 
    left_join(
      # get coordinate of centroid of destination country
      centroid_df %>% rename(to_x = x, to_y = y),
      by = join_by(to == iso)
    )
}

# get data.frame of iso and coordinate of centroid of that country
# iso_centroid <- map_data %>% 
#   as_tibble() %>% 
#   mutate(
#     centroid = st_coordinates(st_point_on_surface(sf.geometry)),
#     x = centroid[,"X"],
#     y = centroid[,"Y"]
#   ) %>% 
#   select(iso, x, y)

# try centroid from CoordinateCleaner package
iso_centroid <- countryref %>% 
  filter(
    iso3 %in% sea_iso,
    type == "country"
  ) %>% 
  # for each country, only keep the first record
  group_by(iso3) %>% 
  slice(1) %>% 
  ungroup() %>% 
  rename(
    iso = iso3,
    x = centroid.lon,
    y = centroid.lat
  ) %>% 
  select(iso, x, y)

Vietnam measles wave

Visualize the direction of measles wave relative to Vietnam.

General interpretation of the plot:

  • Arrow indicates which time series leads (to) the other (i.e., pointing from the leading one to the lagging one).

  • Arrow boldness is determined by the absolute coherence-weighted average weight difference. Bolder arrow indicates that the lagging time series is more closely aligned with the leading time series.

The plot suggests that major measles cycle in Vietnam (roughly every 5-year) usually happen after Thailand and/or Philippines.

Code
multiannual_direction <- prepare_plot_data(multiannual_synch)
sea_plot +
  geom_segment(
    aes(x = from_x, y = from_y, xend = to_x, yend = to_y, alpha = avg_phase_diff),
    color = "black",
    arrow = grid::arrow(length = unit(0.2,"cm")),
    data = multiannual_direction
  ) + 
  scale_alpha(range = c(1, 0.3)) +
  labs(
    x = "", y = "",
    fill = "country",
    alpha = "|avg. phase diff|"
  )

The plot suggests that the (roughly) biennial measles cycle in Thailand and/or Philippines usually happen shortly after Vietnam.

Code
biennual_direction <- prepare_plot_data(biennial_synch)
sea_plot +
  geom_segment(
    aes(x = from_x, y = from_y, xend = to_x, yend = to_y, alpha = avg_phase_diff),
    color = "black",
    arrow = grid::arrow(length = unit(0.2,"cm")),
    data = biennual_direction
  ) +
  scale_alpha(range = c(1, 0.3)) +
  labs(
    x = "", y = "",
    fill = "country",
    alpha = "|avg. phase diff|"
  )

SEA countries

Visualize the flow of measles between SEA countries, following these steps:

  • Perform bi-wavelet analysis on all pairs of countries

  • Compute coherence-weighted average phase difference (CAPD), the result is in radian.

  • Generate the arrows:

    • For each origin country, create an arrow to the destination country with lowest absolute CAPD value.

    • Alpha value for the arrow is also determined by absolute CAPD value, where bolder color indicates lower absolute CAPD.

Compute coherence of all country pairs
all_countries_coherence <- log_transformed %>% 
    filter(country %in% analyze_countries) %>% 
    generate_biwavelet_analysis(ref_country = NA)
Compute synchrony
all_countries_synch <- compute_synchrony(all_countries_coherence, period = 5)
Compute measles direction between SEA countries
all_countries_direction <- prepare_plot_data(all_countries_synch)
Get the data for plotting direction of measles
nearest_flow <- all_countries_direction %>% 
      mutate(
        # update code for origin country 
        from = countrycode(from, origin = "iso3c", destination = "country.name")
      ) %>% 
      # for each origin country, keep the lowest absolute average phase difference
      group_by(from) %>% 
      filter(abs(avg_phase_diff) == min(abs(avg_phase_diff))) %>% 
      ungroup()

sea_plot +
  geom_segment(
    aes(
      x = from_x, y = from_y, xend = to_x, yend = to_y, 
      alpha = avg_phase_diff),
    color = "black",
    arrow = grid::arrow(length = unit(0.2,"cm"), angle = 20),
    data = nearest_flow
  ) +
  scale_alpha(range = c(1, 0.3)) +
  labs(
    x = "", y = "",
    fill = "country",
    alpha = "Absolute avg. phase diff"
  )

Correlation to distance

Distance between 2 countries are defined as the distance between centroids.

Only 5-year period is visualized here

Prepare data for plotting
countries_avg <- all_countries_synch$df %>% 
  group_by(label) %>% 
  summarize(
    avg_synchrony = mean(synchrony),
    avg_coherence = mean(coherence),
    # convert phase diff from radians to years
    avg_abs_phasediff = mean(abs(phase_diff))*5/(2*pi)
  ) %>% 
  ungroup() %>% 
  # get centroid of each country pairs
  separate(label, into = c("from", "to"), sep = " - ") %>% 
  mutate(
    from = countrycode(from, origin = "country.name", destination = "iso3c"),
    to = countrycode(to, origin = "country.name", destination = "iso3c")
  ) %>% 
  left_join(
    # get coordinate of centroid of origin country 
    iso_centroid %>% rename(from_x = x, from_y = y),
      by = join_by(from == iso)
  ) %>% 
  left_join(
    # get coordinate of centroid of destination country
    iso_centroid %>% rename(to_x = x, to_y = y),
    by = join_by(to == iso)
  )


# compute distance
from_pts <- st_as_sf(countries_avg, coords = c("from_x", "from_y"), crs = 4326)
to_pts   <- st_as_sf(countries_avg, coords = c("to_x", "to_y"), crs = 4326)
distances <- st_distance(from_pts, to_pts, by_element = TRUE)

countries_avg <- countries_avg %>% 
  mutate(
    # convert to km
    distances = as.numeric(distances)/1000
  )

Synchrony

Code
countries_avg %>% 
  ggplot(aes(x = distances, y = avg_synchrony)) +
    geom_point() +
    geom_smooth(
      color = "cornflowerblue",
      fill = "cornflowerblue",
      method = "loess"
    ) + 
    labs(
      x = "Distance (km)",
      y = "Average Synchrony"
    )
`geom_smooth()` using formula = 'y ~ x'

Coherence

Code
countries_avg %>% 
  ggplot(aes(x = distances, y = avg_coherence)) +
    geom_point() +
    geom_smooth(
      method = "loess",
      color = "cornflowerblue",
      fill = "cornflowerblue",
    ) + 
    labs(
      x = "Distance (km)",
      y = "Average Coherence"
    )
`geom_smooth()` using formula = 'y ~ x'

Phase difference

Visualize the average lag between 2 time series as distance grow

Code
countries_avg %>% 
  ggplot(aes(x = distances, y = avg_abs_phasediff)) +
    geom_point() +
    geom_smooth(
      method = "loess",
      color = "cornflowerblue",
      fill = "cornflowerblue",
    ) + 
    labs(
      x = "Distance (km)",
      y = "Average absolute phase difference (year)"
    )
`geom_smooth()` using formula = 'y ~ x'

6x6 grid

Arrange wavelet and biwavelet spectra into a 6x6 grid

Prepare data
permu_coherence <- generate_biwavelet_analysis(log_transformed, ref_country = NA, repeated = TRUE)
biwavelet_plots <- plot_cwavelet_power(permu_coherence) 
Prepare data
# only keep the spectrum plots
biwavelet_plots <- biwavelet_plots %>% lapply(\(plot){plot$spectrum})

wavelet_plots <- plot_power(countries_wavelets)
Prepare data
wavelet_plots <- wavelet_plots %>% lapply(\(plot){plot$spectrum})
Function to generate grid plot
library(grid)
library(gridBase)
library(cowplot)

# hacky way to turn recorded plot to grob
record_to_grob <- function(rec_plot, width = 8, height = 6, res = 200, zoom_w=0.9, zoom_h=0.8) {
  tf <- tempfile(fileext = ".png")
  png(tf, width=width, height = height,units="in", res=res)
  replayPlot(rec_plot)
  dev.off()
  
  g <- rasterGrob(png::readPNG(tf))
  # crop image
  grid::editGrob(
    g,
    vp = grid::viewport(x = 0.5, y = 0.5,
                        width = zoom_w, height = zoom_h,
                        just = c("center", "center"))
  )
}

generate_grid_plots <- function(wavelet_plots, cwavelet_plots){
  countries <- names(wavelet_plots)
  n <- length(countries)
  
  plot_rows <- lapply(seq_len(n+1), \(row){
    curr_row <- lapply(seq_len(n+1), \(col){
      curr_plt <- NULL
      if(row==1){
        curr_plt <- if(col == 1) NULL else textGrob(countries[col-1], 
                                                    gp=gpar(fontsize=8))
      }else if(col == 1){
        curr_plt <- textGrob(countries[row-1], gp=gpar(fontsize=8))
      }else{
        if(row==col) {
          curr_plt <- record_to_grob(wavelet_plots[[countries[row-1]]],
                                     zoom_w=1.08, zoom_h=1.4)
        }else{
          # nm <- intersect(
          #   c(paste(countries[row-1], "-", countries[col-1]),
          #     paste(countries[col-1], "-", countries[row-1])
          #   ),
          #   names(cwavelet_plots)
          # )
          
          nm <- paste(countries[row-1], "-", countries[col-1])
          curr_plt <- record_to_grob(cwavelet_plots[[nm]], zoom_w=1.08, zoom_h=1.4)
        }
      }
      curr_plt
    })
    
    plot_grid(plotlist = curr_row, 
              align="h", ncol = n+1, nrow=1,
              rel_widths = c(1, rep(2, n)),
              greedy = TRUE
              )
  })
  
  plot_grid(plotlist = plot_rows, align="v", nrow = n+1, ncol=1,
            rel_heights = c(1, rep(2, n)),
            greedy = TRUE
            )
}
Output
grid_out <- generate_grid_plots(wavelet_plots, biwavelet_plots)
grid_out

Output
# save output
# ggsave("out_figs/grid_spectrum.png", plot = grid_out, 
#        height = 1600, width = 2800, units = "px",
#        dpi = 700)