This document follows the modelling pipeline to produce a list of introduced species ranked by emerging status.

1 Setup

Load libraries:

library(tidyverse) # To do data science
library(tidylog) # To provide feedback on dplyr functions
library(here) # To find files

2 Get data

2.1 GAM outputs

We read the emerging status based on GAM, saved as one of the outputs of modelling pipeline. We do it for Belgium, its regions (Flanders, Wallonia and Brussels) and protected areas and for both occurrences and observed occupancy (number of occupied cells):

regions <- list(
  "Belgium",
  "Flanders",
  "Wallonia",
  "Brussels",
  "natura2000"
)
names(regions) <- regions
indicators <- c("occs", "occupancy")
names(indicators) <- indicators

# Create all combinations regions/indicators
combinations <- expand_grid(region = regions, indicator = indicators)

# Apply a function to read files for each combination
em_gam_list <- pmap(combinations, function(region, indicator) {
  paste("Reading", indicator, "for", region)
  readr::read_tsv(
    file = here::here(
      "data",
      "output",
      "GAM_outputs",
      paste0("summary_GAM_", indicator, "_", region, ".tsv")
    ),
    na = ""
  ) %>%
    tidylog::mutate(
      indicator = paste(indicator, region, sep = "_"),
      model = "GAM"
    )
})

# Name the results
names(em_gam_list) <- paste("gam", combinations$indicator,
  combinations$region,
  sep = "_"
)

Preview GAM ouput for modelling the number of occurrences in Belgium (100 rows):

em_gam_list$gam_occs_Belgium %>% head(100)

Preview GAM ouput for modelling occupancy (number of 1x1km cells) in Belgium (100 rows):

em_gam_list$gam_occupancy_Belgium %>% head(100)

The emerging status (column em_status) score is encoded as follows:

  • 3: emerging
  • 2: potentially emerging
  • 1: unclear
  • 0: not emerging

As you can see in preview above, GAM could not be applied to all taxa (em_status = NA). Number of taxon/year combinations without emerging status:

em_gam_na <- purrr::map(
  em_gam_list,
  function(df) {
    nas <- df %>%
      dplyr::filter(is.na(em_status)) %>%
      nrow()
    total <- nrow(df)
    list("NAs" = nas, "Total" = total)
  }
)
purrr::iwalk(
  em_gam_na,
  function(x, y) {
    message(paste0("Number of NAs (", y, "): ", x$NAs, "/", x$Total))
  }
)

In these cases we will use the emerging status as assessed by applying decision rules.

2.2 Decision rules outputs

We read the emerging status based on decision rules, saved as one of the outputs of modelling pipeline:

# Apply a function to read files for each combination
em_decision_rules_list <- pmap(combinations, function(region, indicator) {
  paste("Reading", indicator, "for", region)
  readr::read_tsv(
    file = here::here(
      "data",
      "output",
      "decision_rules_outputs",
      paste0("output_decision_rules_", indicator, "_", region, ".tsv")
    ),
    na = ""
  ) %>%
    tidylog::mutate(
      indicator = paste(indicator, region, sep = "_"),
      model = "decision_rules"
    )
})

# Name the results
names(em_decision_rules_list) <- paste(
  "decision_rules",
  combinations$indicator,
  combinations$region,
  sep = "_"
)

Previewing output of decision rules for occupancy in Natura2000 protected areas (100 rows):

em_decision_rules_list$decision_rules_occupancy_natura2000 %>% head(100)

2.3 Scientific names

Read list of scientific names:

taxa_names <- readr::read_tsv(
  file = here::here(
    "data",
    "interim",
    "timeseries_taxonomic_info.tsv"
  ),
  na = ""
)

These names will be added to ranking for better readability.

3 Preprocessing

Merge results from GAM and decision rules:

em_list <- append(em_gam_list, em_decision_rules_list)

We have now to make a distinction between Belgium and its provinces. Assessment at Belgian level takes into account the emerging status of the protected areas as well, while at regional level this is not (yet) done.

However, We can define a general function to spread columns for ranking. We spread the data frames by model (GAM or decision rules), year and indicator (occurrences or occupancy). We then assign a unique emerging score (if GAM is NA, we choose decision rules). We spread by years and indicators and add scientific names.

spread_em_dfs <- function(em_list) {
  # Make a data.frame binding all dfs by row
  em_df <-
    dplyr::bind_rows(em_list) %>%
    tidylog::select(-c(
      dplyr::starts_with("growth"),
      dplyr::starts_with("dr")
    )) %>%
    # Spread by model
    tidylog::pivot_wider(
      names_from = model,
      id_cols = c(taxonKey, year, indicator),
      values_from = em_status
    ) %>%
    # Assign unique emerging score (if GAM is NA, choose decision rules)
    tidylog::mutate(
      em = ifelse(is.na(GAM), decision_rules, GAM),
      method = ifelse(is.na(GAM), "decision_rules", "GAM")
    ) %>%
    # Spread by years
    tidylog::pivot_wider(
      names_from = year,
      values_from = em,
      id_cols = c(taxonKey, indicator),
      names_prefix = paste0("year_")
    ) %>%
    # Spread by indicator
    tidylog::pivot_wider(
      names_from = indicator,
      id_cols = taxonKey,
      values_from = starts_with("year"),
      names_prefix = "em_status_"
    ) %>%
    # Add scientific names
    tidylog::left_join(taxa_names, by = "taxonKey")
  em_df
}

3.1 Belgium

Select only list elements containing Belgium or natura2000 in the name:

em_list_be <- em_list[grep("Belgium|natura2000", names(em_list))]

Apply the function spread_em_dfs() to Belgium:

em_df_be <- spread_em_dfs(em_list_be)

To rank taxa equally placed, we use the average of the minimal guaranteed growth of number of occurrences in all Belgium over all the evaluation years.

# Add mean growth to em_df
em_df_be <-
  em_df_be %>%
  tidylog::left_join(em_gam_list$gam_occs_Belgium %>%
    tidylog::group_by(taxonKey) %>%
    tidylog::summarize(mean_growth = mean(growth, na.rm = TRUE))) %>%
  tidylog::relocate(.data$mean_growth, .before = .data$canonicalName)

A preview (100 rows):

em_df_be %>%
  tidylog::filter(!is.na(mean_growth)) %>%
  tidylog::slice_head(n = 100)

3.2 Regions

Select only list elements containing the region in the name:

regions <- list("Flanders", "Wallonia", "Brussels")
names(regions) <- regions
em_list_regions <- purrr::map(
  regions,
  function(r) em_list[grep(r, names(em_list))]
)

Apply the function spread_em_dfs() to each region:

em_df_regions <- purrr::map(em_list_regions, spread_em_dfs)

To rank taxa equally placed, we use the average of the minimal guaranteed growth of number of occurrences over all the evaluation years.

# Add mean growth to em_df
em_df_regions <-
  em_df_regions %>% purrr::imap(
    function(df, region) {
      occs_column <- paste0("gam_occs_", region)
      df %>%
        tidylog::left_join(em_gam_list[[occs_column]] %>%
          tidylog::group_by(taxonKey) %>%
          tidylog::summarize(mean_growth = mean(growth, na.rm = TRUE))) %>%
        tidylog::relocate(.data$mean_growth, .before = .data$canonicalName)
    }
  )

A preview (100 rows) for each region:

em_df_regions %>%
  purrr::map(function(df) {
    df %>%
      tidylog::filter(!is.na(mean_growth)) %>%
      tidylog::slice_head(n = 100)
  })
## $Flanders
## # A tibble: 100 × 13
##    taxonKey year_2020_em_status_…¹ year_2020_em_status_…² year_2021_em_status_…³
##       <dbl>                  <dbl>                  <dbl>                  <dbl>
##  1  1031394                      0                      1                      0
##  2  1031742                      1                      1                      1
##  3  1047536                      3                      3                      3
##  4  1309600                      2                      2                      3
##  5  1421091                      3                      0                      3
##  6  1652150                      1                      1                      1
##  7  1690429                      0                      1                      1
##  8  1718308                      0                      1                      0
##  9  1749449                      3                      2                      3
## 10  1749580                      0                      2                      0
## # ℹ 90 more rows
## # ℹ abbreviated names: ¹​year_2020_em_status_occs_Flanders,
## #   ²​year_2020_em_status_occupancy_Flanders, ³​year_2021_em_status_occs_Flanders
## # ℹ 9 more variables: year_2021_em_status_occupancy_Flanders <dbl>,
## #   year_2022_em_status_occs_Flanders <dbl>,
## #   year_2022_em_status_occupancy_Flanders <dbl>, mean_growth <dbl>,
## #   canonicalName <chr>, kingdomKey <dbl>, classKey <dbl>, class <chr>, …
## 
## $Wallonia
## # A tibble: 100 × 13
##    taxonKey year_2020_em_status_…¹ year_2020_em_status_…² year_2021_em_status_…³
##       <dbl>                  <dbl>                  <dbl>                  <dbl>
##  1  1047536                      3                      0                      3
##  2  1690429                      0                      0                      0
##  3  1718308                      1                      3                      1
##  4  2020655                      0                      2                      0
##  5  2041682                      1                      3                      1
##  6  2226990                      3                      3                      3
##  7  2337607                      1                      0                      1
##  8  2362868                      1                      1                      1
##  9  2379089                      0                      0                      0
## 10  2379245                      0                      1                      0
## # ℹ 90 more rows
## # ℹ abbreviated names: ¹​year_2020_em_status_occs_Wallonia,
## #   ²​year_2020_em_status_occupancy_Wallonia, ³​year_2021_em_status_occs_Wallonia
## # ℹ 9 more variables: year_2021_em_status_occupancy_Wallonia <dbl>,
## #   year_2022_em_status_occs_Wallonia <dbl>,
## #   year_2022_em_status_occupancy_Wallonia <dbl>, mean_growth <dbl>,
## #   canonicalName <chr>, kingdomKey <dbl>, classKey <dbl>, class <chr>, …
## 
## $Brussels
## # A tibble: 80 × 13
##    taxonKey year_2020_em_status_…¹ year_2020_em_status_…² year_2021_em_status_…³
##       <dbl>                  <dbl>                  <dbl>                  <dbl>
##  1  1047536                      3                      3                      3
##  2  1690429                      1                      2                      1
##  3  2020655                      1                      2                      1
##  4  2037925                      3                      3                      3
##  5  2041682                      3                      3                      3
##  6  2078852                      3                      1                      1
##  7  2436940                      1                      2                      1
##  8  2437450                      0                      2                      0
##  9  2439261                      2                      2                      2
## 10  2479226                      3                      1                      3
## # ℹ 70 more rows
## # ℹ abbreviated names: ¹​year_2020_em_status_occs_Brussels,
## #   ²​year_2020_em_status_occupancy_Brussels, ³​year_2021_em_status_occs_Brussels
## # ℹ 9 more variables: year_2021_em_status_occupancy_Brussels <dbl>,
## #   year_2022_em_status_occs_Brussels <dbl>,
## #   year_2022_em_status_occupancy_Brussels <dbl>, mean_growth <dbl>,
## #   canonicalName <chr>, kingdomKey <dbl>, classKey <dbl>, class <chr>, …

4 Ranking by emerging status

Now we are ready to rank the alien species based on the emerging status.

4.1 Hierarchical ranking

4.1.1 Belgium

The ranking is based on the highest emerging status. The following priority rules are applied, in order of importance:

  1. the more recent, the higher priority is
  2. emerging statuses in protected areas are more important than the ones defined over entire Belgium
  3. emerging statuses from occupancy are more important than the ones from occurrences
  4. the higher average minimal guaranteed growth (#occs/year), the higher priority is

We first define a function to rank the emerging status of species based on the priority rules above. The function takes as input the years, indicators, regions and the data frame with emerging status scores. It returns a data frame with the ranking of species based on the priority rules. The function is called hierarchical_ranking() and is defined as follows:

hierarchical_ranking <- function(years, indicators, regions, df) {
  # Generate the column names dynamically
  cols_to_sort <- expand.grid(indicator = indicators, region = regions, year = years) %>%
    mutate(col_name = paste("year", year, "em_status", indicator, region, sep = "_")) %>%
    pull(col_name)

  # Add mean_growth to the end of the sorting columns
  cols_to_sort <- c(cols_to_sort, "mean_growth")

  # Apply the sorting
  ranking <- df %>%
    arrange(across(all_of(cols_to_sort), desc)) %>%
    tidylog::select(
      taxonKey, canonicalName, kingdom, class,
      cols_to_sort,
      dplyr::everything()
    )
  ranking
}

We apply the function to Belgium and its Natura2000 protected areas:

years <- sort(
  unique(
    as.numeric(gsub("^year_(\\d+)_em_status_.*", "\\1", names(em_df_be)))
  ),
  decreasing = TRUE
)
indicators <- c("occupancy", "occs")
regions <- c("natura2000", "Belgium")

ranking_be <- hierarchical_ranking(years, indicators, regions, em_df_be)
ranking_be

4.1.2 Regions

We have not done (yet) an analysis for protected areas at regional level. So, the ranking for regions will be simplified. It means that the priority rule 2 described above will not be applied. The priority rules will then look as follows:

  1. the more recent, the higher priority is
  2. emerging statuses from occupancy indicators have priority above the ones from occurrence indicator
  3. the higher average minimal guaranteed growth (#occs/year), the higher priority is

We apply the function hierarchical_ranking() to Flanders, Wallonia and Brussels:

years <- sort(
  unique(
    as.numeric(gsub("^year_(\\d+)_em_status_.*", "\\1", names(em_df_be)))
  ),
  decreasing = TRUE
)
indicators <- c("occupancy", "occs")
regions <- c("Flanders", "Wallonia", "Brussels")

ranking_regions <- purrr::imap(
  em_df_regions,
  function(em_df, region) hierarchical_ranking(years, indicators, region, em_df)
)
ranking_regions
## $Flanders
## # A tibble: 1,655 × 13
##    taxonKey canonicalName              kingdom  class     year_2022_em_status_…¹
##       <dbl> <chr>                      <chr>    <chr>                      <dbl>
##  1  6098843 Ctenolepisma longicaudatum Animalia Insecta                        3
##  2  2225772 Hemigrapsus sanguineus     Animalia Malacost…                      3
##  3  1047536 Leptinotarsa decemlineata  Animalia Insecta                        3
##  4  2498344 Cygnus atratus             Animalia Aves                           3
##  5  3098912 Cosmos bipinnatus          Plantae  Magnolio…                      3
##  6  5232437 Branta canadensis          Animalia Aves                           3
##  7  2479226 Psittacula krameri         Animalia Aves                           3
##  8  2495494 Geopelia cuneata           Animalia Aves                           3
##  9  2650827 Cyrtomium fortunei         Plantae  Polypodi…                      3
## 10  6096602 Lophocolea semiteres       Plantae  Jungerma…                      3
## # ℹ 1,645 more rows
## # ℹ abbreviated name: ¹​year_2022_em_status_occupancy_Flanders
## # ℹ 8 more variables: year_2022_em_status_occs_Flanders <dbl>,
## #   year_2021_em_status_occupancy_Flanders <dbl>,
## #   year_2021_em_status_occs_Flanders <dbl>,
## #   year_2020_em_status_occupancy_Flanders <dbl>,
## #   year_2020_em_status_occs_Flanders <dbl>, mean_growth <dbl>, …
## 
## $Wallonia
## # A tibble: 866 × 13
##    taxonKey canonicalName            kingdom  class       year_2022_em_status_…¹
##       <dbl> <chr>                    <chr>    <chr>                        <dbl>
##  1  3033242 Anemone blanda           Plantae  Magnoliops…                      3
##  2  3098912 Cosmos bipinnatus        Plantae  Magnoliops…                      3
##  3  3152583 Hibiscus syriacus        Plantae  Magnoliops…                      3
##  4  3012089 Sorbaria sorbifolia      Plantae  Magnoliops…                      3
##  5  2888439 Papaver somniferum       Plantae  Magnoliops…                      3
##  6  5362054 Crassula helmsii         Plantae  Magnoliops…                      3
##  7  3084022 Phytolacca acinosa       Plantae  Magnoliops…                      3
##  8  5281901 Campylopus introflexus   Plantae  Bryopsida                        3
##  9  2226990 Pacifastacus leniusculus Animalia Malacostra…                      3
## 10  5285637 Pinus sylvestris         Plantae  Pinopsida                        3
## # ℹ 856 more rows
## # ℹ abbreviated name: ¹​year_2022_em_status_occupancy_Wallonia
## # ℹ 8 more variables: year_2022_em_status_occs_Wallonia <dbl>,
## #   year_2021_em_status_occupancy_Wallonia <dbl>,
## #   year_2021_em_status_occs_Wallonia <dbl>,
## #   year_2020_em_status_occupancy_Wallonia <dbl>,
## #   year_2020_em_status_occs_Wallonia <dbl>, mean_growth <dbl>, …
## 
## $Brussels
## # A tibble: 505 × 13
##    taxonKey canonicalName         kingdom  class         year_2022_em_status_o…¹
##       <dbl> <chr>                 <chr>    <chr>                           <dbl>
##  1  2037925 Orientus ishidae      Animalia Insecta                             3
##  2  1311477 Vespa velutina        Animalia Insecta                             3
##  3  2153287 Zoropsis spinimana    Animalia Arachnida                           3
##  4  3012089 Sorbaria sorbifolia   Plantae  Magnoliopsida                       3
##  5  3054117 Aubrieta deltoidea    Plantae  Magnoliopsida                       3
##  6  7127810 Cyclamen hederifolium Plantae  Magnoliopsida                       3
##  7  8313153 Quercus palustris     Plantae  Magnoliopsida                       3
##  8  3067119 Euphorbia maculata    Plantae  Magnoliopsida                       3
##  9  2891783 Impatiens balfourii   Plantae  Magnoliopsida                       3
## 10  3084015 Phytolacca americana  Plantae  Magnoliopsida                       3
## # ℹ 495 more rows
## # ℹ abbreviated name: ¹​year_2022_em_status_occupancy_Brussels
## # ℹ 8 more variables: year_2022_em_status_occs_Brussels <dbl>,
## #   year_2021_em_status_occupancy_Brussels <dbl>,
## #   year_2021_em_status_occs_Brussels <dbl>,
## #   year_2020_em_status_occupancy_Brussels <dbl>,
## #   year_2020_em_status_occs_Brussels <dbl>, mean_growth <dbl>, …

4.2 Point strategy

We can also rank using a point strategy reflecting the same ranking strategy as before. To do so we apply gain factors to all emerging status scores using the number of observations in 2020 in all Belgium/region as reference (gain factor = 1).

The following gain factors are applied for Belgium:

indicator year region gain factor
occupancy 2022 protected areas 3
observations 2022 protected areas 2.5
occupancy 2022 Belgium 2.5
observations 2022 Belgium 2
occupancy 2021 protected areas 2.5
observations 2021 protected areas 2
occupancy 2021 Belgium 2
observations 2021 Belgium 1.5
occupancy 2020 protected areas 2
observations 2020 protected areas 1.5
occupancy 2020 Belgium 1.5
observations 2020 Belgium 1

For regions, the gain factors are the same, but the ones for protected areas are not applied.

The following gain factors are applied for each region:

indicator year gain factor
occupancy 2022 2.5
observations 2022 2
occupancy 2021 2
observations 2021 1.5
occupancy 2020 1.5
observations 2020 1

4.2.1 Belgium

ranking_pts_be <-
  em_df_be %>%
  tidylog::mutate(em_pts = year_2022_em_status_occupancy_natura2000 * 3 +
    year_2022_em_status_occs_natura2000 * 2.5 +
    year_2022_em_status_occupancy_Belgium * 2.5 +
    year_2022_em_status_occs_Belgium * 2 +
    year_2021_em_status_occupancy_natura2000 * 2.5 +
    year_2021_em_status_occs_natura2000 * 2 +
    year_2021_em_status_occupancy_Belgium * 2 +
    year_2021_em_status_occs_Belgium * 1.5 +
    year_2020_em_status_occupancy_natura2000 * 2 +
    year_2020_em_status_occs_natura2000 * 1.5 +
    year_2020_em_status_occupancy_Belgium * 1.5 +
    year_2020_em_status_occs_Belgium) %>%
  dplyr::arrange(
    desc(em_pts),
    desc(mean_growth)
  ) %>%
  tidylog::select(
    taxonKey, canonicalName, kingdom, class,
    em_pts, mean_growth,
    year_2022_em_status_occupancy_natura2000,
    year_2022_em_status_occs_natura2000,
    year_2022_em_status_occupancy_Belgium,
    year_2022_em_status_occs_Belgium,
    year_2021_em_status_occupancy_natura2000,
    year_2021_em_status_occs_natura2000,
    year_2021_em_status_occupancy_Belgium,
    year_2021_em_status_occs_Belgium,
    year_2020_em_status_occupancy_natura2000,
    year_2020_em_status_occs_natura2000,
    year_2020_em_status_occupancy_Belgium,
    year_2020_em_status_occs_Belgium,
    dplyr::everything()
  )
ranking_pts_be

4.2.2 Regions

ranking_pts_regions <- purrr::imap(
  em_df_regions,
  function(em_df, region) {
    em_df %>%
      # Remove the region from the column names for generalization
      tidylog::rename_with(~ gsub(paste0("_", region, "$"), "", .x)) %>%
      tidylog::mutate(em_pts = year_2022_em_status_occupancy * 2.5 +
        year_2022_em_status_occs * 2 +
        year_2021_em_status_occupancy * 2 +
        year_2021_em_status_occs * 1.5 +
        year_2020_em_status_occupancy * 1.5 +
        year_2020_em_status_occs) %>%
      dplyr::arrange(
        desc(em_pts),
        desc(mean_growth)
      ) %>%
      tidylog::select(
        taxonKey, canonicalName, kingdom, class,
        em_pts, mean_growth,
        year_2022_em_status_occupancy,
        year_2022_em_status_occs,
        year_2021_em_status_occupancy,
        year_2021_em_status_occs,
        year_2020_em_status_occupancy,
        year_2020_em_status_occs,
        dplyr::everything()
      ) %>%
      # Add region back to the column names starting with `year`
      tidylog::rename_with(~ paste0(.x, "_", region), starts_with("year"))
  }
)
ranking_pts_regions
## $Flanders
## # A tibble: 1,655 × 14
##    taxonKey canonicalName              kingdom  class         em_pts mean_growth
##       <dbl> <chr>                      <chr>    <chr>          <dbl>       <dbl>
##  1  6098843 Ctenolepisma longicaudatum Animalia Insecta         31.5        1.95
##  2  2225772 Hemigrapsus sanguineus     Animalia Malacostraca    31.5        1.52
##  3  1047536 Leptinotarsa decemlineata  Animalia Insecta         31.5        1.39
##  4  2498344 Cygnus atratus             Animalia Aves            31.5        1.27
##  5  3098912 Cosmos bipinnatus          Plantae  Magnoliopsida   31.5        1.23
##  6  5232437 Branta canadensis          Animalia Aves            31.5        1.22
##  7  2479226 Psittacula krameri         Animalia Aves            31.5        1.21
##  8  2495494 Geopelia cuneata           Animalia Aves            31.5        1.20
##  9  2650827 Cyrtomium fortunei         Plantae  Polypodiopsi…   31.5        1.18
## 10  6096602 Lophocolea semiteres       Plantae  Jungermannio…   31.5        1.17
## # ℹ 1,645 more rows
## # ℹ 8 more variables: year_2022_em_status_occupancy_Flanders <dbl>,
## #   year_2022_em_status_occs_Flanders <dbl>,
## #   year_2021_em_status_occupancy_Flanders <dbl>,
## #   year_2021_em_status_occs_Flanders <dbl>,
## #   year_2020_em_status_occupancy_Flanders <dbl>,
## #   year_2020_em_status_occs_Flanders <dbl>, kingdomKey <dbl>, classKey <dbl>
## 
## $Wallonia
## # A tibble: 866 × 14
##    taxonKey canonicalName            kingdom  class         em_pts mean_growth
##       <dbl> <chr>                    <chr>    <chr>          <dbl>       <dbl>
##  1  3033242 Anemone blanda           Plantae  Magnoliopsida   31.5        1.47
##  2  3098912 Cosmos bipinnatus        Plantae  Magnoliopsida   31.5        1.46
##  3  3152583 Hibiscus syriacus        Plantae  Magnoliopsida   31.5        1.41
##  4  3012089 Sorbaria sorbifolia      Plantae  Magnoliopsida   31.5        1.35
##  5  2888439 Papaver somniferum       Plantae  Magnoliopsida   31.5        1.19
##  6  5362054 Crassula helmsii         Plantae  Magnoliopsida   31.5        1.16
##  7  3084022 Phytolacca acinosa       Plantae  Magnoliopsida   31.5        1.13
##  8  5281901 Campylopus introflexus   Plantae  Bryopsida       31.5        1.07
##  9  2226990 Pacifastacus leniusculus Animalia Malacostraca    31.5        1.03
## 10  5285637 Pinus sylvestris         Plantae  Pinopsida       31.5        1.02
## # ℹ 856 more rows
## # ℹ 8 more variables: year_2022_em_status_occupancy_Wallonia <dbl>,
## #   year_2022_em_status_occs_Wallonia <dbl>,
## #   year_2021_em_status_occupancy_Wallonia <dbl>,
## #   year_2021_em_status_occs_Wallonia <dbl>,
## #   year_2020_em_status_occupancy_Wallonia <dbl>,
## #   year_2020_em_status_occs_Wallonia <dbl>, kingdomKey <dbl>, classKey <dbl>
## 
## $Brussels
## # A tibble: 505 × 14
##    taxonKey canonicalName          kingdom  class         em_pts mean_growth
##       <dbl> <chr>                  <chr>    <chr>          <dbl>       <dbl>
##  1  2037925 Orientus ishidae       Animalia Insecta         31.5        1.16
##  2  1311477 Vespa velutina         Animalia Insecta         31.5      NaN   
##  3  2153287 Zoropsis spinimana     Animalia Arachnida       31.5      NaN   
##  4  3012089 Sorbaria sorbifolia    Plantae  Magnoliopsida   31.5      NaN   
##  5  3054117 Aubrieta deltoidea     Plantae  Magnoliopsida   31.5      NaN   
##  6  7127810 Cyclamen hederifolium  Plantae  Magnoliopsida   31.5      NaN   
##  7  8313153 Quercus palustris      Plantae  Magnoliopsida   31.5      NaN   
##  8  2164198 Cheiracanthium mildei  Animalia Arachnida       30        NaN   
##  9  3147168 Erigeron karvinskianus Plantae  Magnoliopsida   30        NaN   
## 10  5341201 Anchusa officinalis    Plantae  Magnoliopsida   29.5      NaN   
## # ℹ 495 more rows
## # ℹ 8 more variables: year_2022_em_status_occupancy_Brussels <dbl>,
## #   year_2022_em_status_occs_Brussels <dbl>,
## #   year_2021_em_status_occupancy_Brussels <dbl>,
## #   year_2021_em_status_occs_Brussels <dbl>,
## #   year_2020_em_status_occupancy_Brussels <dbl>,
## #   year_2020_em_status_occs_Brussels <dbl>, kingdomKey <dbl>, classKey <dbl>

5 Save results

5.1 Final emerging status

We save the final emerging status for Belgium, em_dfbe:

readr::write_tsv(em_df_be,
  file = here::here("data", "output", "emerging_status_Belgium.tsv"),
  na = ""
)

and the final emerging status for regions, em_df_regions:

purrr::iwalk(
  em_df_regions,
  function(df, region) {
    readr::write_tsv(df,
      file = here::here("data", "output", paste0("emerging_status_", region, ".tsv")),
      na = ""
    )
  }
)

5.2 Rankings

We save both the hierarchical ranking and the ranking based on points strategy for Belgium and its regions.

5.2.1 Hierarchical ranking

The ranking for Belgium based on hierarchical strategy is saved in data/output/ranking_emerging_status_hierarchical_strategy_Belgium.tsv:

readr::write_tsv(ranking_be,
  file = here::here(
    "data",
    "output",
    "ranking_emerging_status_hierarchical_strategy_Belgium.tsv"
  ),
  na = ""
)

The ranking for the regions based on hierarchical strategy are saved in data/output/ranking_emerging_status_hierarchical_strategy_*.tsv where * is the region:

purrr::iwalk(
  ranking_regions,
  function(df, region) {
    readr::write_tsv(df,
      file = here::here(
        "data",
        "output",
        paste0("ranking_emerging_status_hierarchical_strategy_", region, ".tsv")
      ),
      na = ""
    )
  }
)

5.2.2 Points strategy

Ranking for Belgium based on points strategy is saved in data/output/ranking_emerging_status_points_strategy_Belgium.tsv:

readr::write_tsv(ranking_pts_be,
  file = here::here(
    "data",
    "output",
    "ranking_emerging_status_points_strategy_Belgium.tsv"
  ),
  na = ""
)

Ranking for the regions based on points strategy are saved in data/output/ranking_emerging_status_points_strategy_*.tsv where * is the region:

purrr::iwalk(
  ranking_pts_regions,
  function(df, region) {
    readr::write_tsv(df,
      file = here::here(
        "data",
        "output",
        paste0("ranking_emerging_status_points_strategy_", region, ".tsv")
      ),
      na = ""
    )
  }
)