1 Goal

In this document we calculate the area of occupancy (AOO), the coverage and the number of observations in Belgian natura2000 protected areas. Data are aggregated per year, species and at a resolution of 1 km x 1km as we use the EEA reference grid for Belgium.

2 Setup

library(tidyverse) # To do datascience
library(tidylog) # To provide feedback on dplyr functions
library(here) # To find files
library(inborutils) # To download data from zenodo
library(rgbif) # To interface with GBIF

3 Read input data

3.1 Occurrence cube at species level

Read the occurrence cubes at species level for Belgium as published on Zenodo (https://zenodo.org/records/10528506):

be_species_cube <- read_csv(
  file = "https://zenodo.org/records/10528506/files/be_species_cube.csv?download=1",
  na = ""
)
be_species_info <- read_csv(
  file = "https://zenodo.org/records/10528506/files/be_species_info.csv?download=1",
  na = ""
) %>%
  tidylog::select(-kingdom)

Preview occurrence cube:

be_species_cube %>% tidylog::slice_head(n = 10)

Preview taxonomic metadata of occurrence cube:

be_species_info %>% tidylog::slice_head(n = 10)

3.2 Occurrence cube of alien taxa

Read the occurrence cubes of alien taxa as published on Zenodo (https://zenodo.org/records/10527772). It is based on GBIF occurrence data of taxa published in the Global Register of Introduced and Invasive Species - Belgium, shortly called the unified checklist:

be_alientaxa_cube <- read_csv(
  file = "https://zenodo.org/records/10527772/files/be_alientaxa_cube.csv?download=1",
  na = ""
)

be_alientaxa_info <- read_csv(
  file = "https://zenodo.org/records/10527772/files/be_alientaxa_info.csv?download=1",
  na = ""
) %>%
  tidylog::select(-kingdom) # as we still have to retrieve taxonomic info from GBIF

Preview occurrence cube:

be_alientaxa_cube %>% tidylog::slice_head(n = 10)

Preview taxonomic metadata of occurrence cube with alien taxa:

be_alientaxa_info %>% tidylog::slice_head(n = 10)

Number of taxa per rank:

be_alientaxa_info %>%
  tidylog::distinct(rank, taxonKey) %>%
  tidylog::group_by(rank) %>%
  tidylog::count()

Infraspecific alien taxa:

infraspecific_alien <-
  be_alientaxa_info %>%
  tidylog::distinct(rank, taxonKey) %>%
  tidylog::filter(rank %in% c(
    "SUBSPECIES",
    "VARIETY",
    "FORM"
  ))
infraspecific_alien <-
  purrr::map_dfr(
    infraspecific_alien$taxonKey,
    function(x) {
      rgbif::name_usage(x, "data")$data
    }
  )
infraspecific_alien <-
  infraspecific_alien %>%
  tidylog::mutate(
    species = dplyr::if_else(is.na(species),
      parent,
      species
    ),
    speciesKey = dplyr::if_else(is.na(speciesKey),
      parentKey,
      speciesKey
    )
  ) %>%
  tidylog::select(
    taxonKey = key,
    kingdom,
    phylum,
    order,
    class,
    family,
    genusKey,
    genus,
    speciesKey,
    species
  )
infraspecific_alien <-
  infraspecific_alien %>%
  tidylog::left_join(be_alientaxa_info, by = c("taxonKey"))
infraspecific_alien

3.3 Assessment alien species with alien infraspecific taxa

The unified checklist contains some infraspecific alien taxa belonging to alien species as well. The assessment whether the species is alien or not is at the moment based on a reference file maintained by TrIAS experts. This file should be revised each time the unified checklist is updated. We read it:

infraspecific_alien_status_species <-
  readr::read_tsv(
    here::here(
      "reference",
      "species_of_infraspecific_alien_taxa.tsv"
    ),
    na = ""
  )
infraspecific_alien_status_species

where key is the key of the taxon as published in the unified checklist, nubKey is the key of the corresponding taxon in the GBIF Backbone. Also the columns speciesKey, species, scientificName and kingdom contain information coming from the GBIF Backbone.

3.4 EEA cells containing the Natura2000 protected areas

We read also the file containing the intersection of protected areas and the EEA grid as produced in :

cells_of_prot_areas <- readr::read_tsv(
  file = here::here(
    "data",
    "interim",
    "EEA_ref_grid_cells_covering_protected_areas.tsv"
  ),
  na = ""
)

Preview:

cells_of_prot_areas %>% tidylog::slice_head(n = 10)

Number of cells per protected area:

n_cells_prot_areas <-
  cells_of_prot_areas %>%
  tidylog::group_by(SITECODE) %>%
  tidylog::summarize(n_cells = n())

Preview:

n_cells_prot_areas %>% tidylog::slice_head(n = 10)

3.5 Protected areas metadata

Metadata about all Belgian Natura2000 protected areas have been already saved in other pipeline. We read them:

protected_areas_metadata <- readr::read_tsv(
  here::here(
    "data",
    "interim",
    "protected_areas_metadata.tsv"
  ),
  na = ""
)

4 Calculate number of observations, AOO and coverage of Belgian protected areas

Get values from occurrence cube related to cells covering the protected areas:

be_prot_areas_cube <-
  cells_of_prot_areas %>%
  tidylog::inner_join(be_species_cube,
    by = c("CELLCODE" = "eea_cell_code")
  )

Preview:

be_prot_areas_cube %>% tidylog::slice_head(n = 10)

For each taxon, year and protected area, calculate the number of observations, the area of occupancy and the minimum coordinate uncertainty:

occs_prot_areas <-
  be_prot_areas_cube %>%
  tidylog::group_by(SITECODE, SITETYPE, year, speciesKey) %>%
  tidylog::summarize(
    aoo = n(),
    n = sum(n),
    min_coord_uncertainty = min(min_coord_uncertainty)
  ) %>%
  tidylog::ungroup()

Preview with the 10 highest areas of occupancy:

occs_prot_areas %>%
  arrange(desc(aoo)) %>%
  tidylog::slice_head(n = 10)

4.1 Add coverage of protected areas

To add the coverage of each protected area, we divide AOO by the area of each protected area defined as the number of cells totally covering the protected area:

occs_prot_areas <-
  occs_prot_areas %>%
  tidylog::left_join(n_cells_prot_areas,
    by = c("SITECODE")
  ) %>%
  tidylog::mutate(coverage = aoo / n_cells) %>%
  tidylog::select(-n_cells)

The coverage is a number between 0 (species not present) and 1 (species present in the entire protected area). Species/year/area with highest coverage value (1):

occs_prot_areas %>%
  arrange(desc(coverage)) %>%
  tidylog::filter(coverage == max(coverage))

4.2 Retrieve taxonomic metadata from GBIF

Not all taxa in Belgian occurrence cube are present in protected areas. We remove them from the taxonomic metadata of the occurrence cube:

prot_areas_species_info <-
  be_species_info %>%
  tidylog::filter(speciesKey %in% occs_prot_areas$speciesKey)

We retrieve the taxonomic tree from GBIF. This step can take long.

prot_areas_species_taxon_info <-
  purrr::map_dfr(
    prot_areas_species_info$speciesKey,
    function(x) {
      rgbif::name_usage(key = x)$data
    }
  )
prot_areas_species_taxon_info <-
  prot_areas_species_taxon_info %>%
  tidylog::select(
    speciesKey,
    kingdom,
    phylum,
    order,
    class,
    family,
    genusKey,
    genus,
    species
  )
prot_areas_species_info <-
  prot_areas_species_info %>%
  tidylog::left_join(prot_areas_species_taxon_info,
    by = "speciesKey"
  )

Preview:

prot_areas_species_taxon_info %>% tidylog::slice_head(n = 10)

4.3 Add alien status

We would also like to indicate whether the species is alien. We add a boolean column indicating whether the species is alien or not based on the unified checklist:

# alien species based on alien infraspecific taxa in unified
infraspecific_alien_and_species <-
  infraspecific_alien_status_species %>%
  tidylog::filter(species_is_alien == TRUE)

infraspecific_alien_species_unknown <-
  infraspecific_alien_status_species %>%
  tidylog::filter(is.na(species_is_alien))

occs_prot_areas <-
  occs_prot_areas %>%
  tidylog::mutate(is_alien = case_when(
    speciesKey %in% be_alientaxa_info$taxonKey |
      speciesKey %in% infraspecific_alien_and_species$speciesKey ~ TRUE,
    speciesKey %in%
      infraspecific_alien_species_unknown$speciesKey ~ NA,
    TRUE ~ FALSE
  ))

As the Belgian list of alien taxa contains infraspecific taxa, we add a remark for the corresponding species:

species_infraspecific_alien <-
  infraspecific_alien %>%
  tidylog::select(speciesKey, scientificName, taxonKey) %>%
  tidylog::filter(speciesKey %in% occs_prot_areas$speciesKey) %>%
  tidylog::group_by(speciesKey) %>%
  tidylog::summarize(remarks = paste0(
    "Infraspecific alien taxa present. ",
    paste(taxonKey,
      scientificName,
      sep = ":",
      collapse = ","
    )
  ))
occs_prot_areas <-
  occs_prot_areas %>%
  tidylog::left_join(species_infraspecific_alien,
    by = "speciesKey"
  )

Species with alien infraspecific taxa. Notice they can be considered alien as well:

occs_prot_areas %>%
  tidylog::filter(!is.na(remarks)) %>%
  tidylog::distinct(speciesKey, is_alien, remarks)

Finally we add a column, taxonKey, containing the key of all taxa in GBIF Backbone. It is by definition equal to speciesKey.

occs_prot_areas <-
  occs_prot_areas %>%
  tidylog::mutate(taxonKey = speciesKey)
prot_areas_species_info <-
  prot_areas_species_info %>%
  tidylog::mutate(taxonKey = speciesKey)

4.4 Include infraspecific alien taxa

To better study the spread of alien taxa we also add information about infraspecific alien taxa:

# Select cells with occurrences of infraspecific alien taxa
be_prot_areas_cube_infraspecific <-
  cells_of_prot_areas %>%
  tidylog::inner_join(
    be_alientaxa_cube %>%
      tidylog::filter(taxonKey %in% infraspecific_alien$taxonKey),
    by = c("CELLCODE" = "eea_cell_code")
  )
# Calculate n observations, aoo, min_coord_uncertainty
occs_prot_areas_infraspecific <-
  be_prot_areas_cube_infraspecific %>%
  tidylog::group_by(SITECODE, SITETYPE, year, taxonKey) %>%
  tidylog::summarize(
    aoo = n(),
    n = sum(n),
    min_coord_uncertainty = min(min_coord_uncertainty)
  ) %>%
  tidylog::ungroup()
# Add coverage and is_alien
occs_prot_areas_infraspecific <-
  occs_prot_areas_infraspecific %>%
  tidylog::left_join(n_cells_prot_areas,
    by = c("SITECODE")
  ) %>%
  tidylog::mutate(
    coverage = aoo / n_cells,
    is_alien = TRUE
  ) %>%
  tidylog::select(-n_cells)
# Remove taxa which are not present in protected areas
prot_areas_infraspecific_taxon_info <-
  infraspecific_alien %>%
  tidylog::filter(taxonKey %in% occs_prot_areas_infraspecific$taxonKey)
# Add speciesKey
occs_prot_areas_infraspecific <-
  occs_prot_areas_infraspecific %>%
  tidylog::left_join(
    infraspecific_alien %>%
      tidylog::select(taxonKey, speciesKey),
    by = "taxonKey"
  )
# Merge data
occs_prot_areas <-
  occs_prot_areas %>%
  dplyr::bind_rows(occs_prot_areas_infraspecific)
# Merge metadata
prot_areas_taxon_info <-
  prot_areas_species_info %>%
  dplyr::bind_rows(prot_areas_infraspecific_taxon_info)

5 Save data and metadata

5.1 Save data about protected area

We finalize the data.frame containing the information about number of observations, occupancy, coverage and alien status in Belgian protected areas.

Select column of interest and set columns order:

occs_prot_areas <-
  occs_prot_areas %>%
  dplyr::relocate(
    SITECODE,
    SITETYPE,
    year,
    taxonKey,
    speciesKey,
    n,
    aoo,
    coverage,
    min_coord_uncertainty,
    is_alien,
    remarks
  )

Preview:

occs_prot_areas %>% tidylog::slice_head(n = 10)

Save data:

occs_prot_areas %>%
  readr::write_csv(
    path = here::here(
      "data",
      "output",
      "protected_areas_species_occurrence.csv"
    ),
    na = ""
  )

5.2 Save metadata

5.2.1 Taxonomic metadata

Select taxonomic metadata we are interested to:

taxonomic_metadata <-
  prot_areas_taxon_info %>%
  tidylog::select(
    taxonKey,
    speciesKey,
    scientificName,
    kingdom,
    phylum,
    order,
    class,
    genus,
    family,
    species,
    rank,
    includes
  )

Preview:

taxonomic_metadata %>% tidylog::slice_head(n = 10)

Save taxonomic metadata:

taxonomic_metadata %>%
  readr::write_csv(
    here::here(
      "data",
      "output",
      "protected_areas_species_info.csv"
    ),
    na = ""
  )

5.2.2 Protected areas metadata

We remove metadata of areas without observations, if any:

protected_areas_metadata <-
  protected_areas_metadata %>%
  tidylog::filter(SITECODE %in% occs_prot_areas$SITECODE)

Preview:

protected_areas_metadata %>% tidylog::slice_head(n = 10)

We save the metadata as output:

protected_areas_metadata %>%
  readr::write_csv(
    file = here::here(
      "data",
      "output",
      "protected_areas_metadata.csv"
    ),
    na = ""
  )