In this document we calculate the area of occupancy (AOO), the coverage and the number of observations in Belgian natura2000 protected areas. Data are aggregated per year, species and at a resolution of 1 km x 1km as we use the EEA reference grid for Belgium.
library(tidyverse) # To do datascience
library(tidylog) # To provide feedback on dplyr functions
library(here) # To find files
library(inborutils) # To download data from zenodo
library(rgbif) # To interface with GBIF
Read the occurrence cubes at species level for Belgium as published on Zenodo (https://zenodo.org/records/10528506):
be_species_cube <- read_csv(
file = "https://zenodo.org/records/10528506/files/be_species_cube.csv?download=1",
na = ""
)
be_species_info <- read_csv(
file = "https://zenodo.org/records/10528506/files/be_species_info.csv?download=1",
na = ""
) %>%
tidylog::select(-kingdom)
Preview occurrence cube:
be_species_cube %>% tidylog::slice_head(n = 10)
Preview taxonomic metadata of occurrence cube:
be_species_info %>% tidylog::slice_head(n = 10)
Read the occurrence cubes of alien taxa as published on Zenodo (https://zenodo.org/records/10527772). It is based on GBIF occurrence data of taxa published in the Global Register of Introduced and Invasive Species - Belgium, shortly called the unified checklist:
be_alientaxa_cube <- read_csv(
file = "https://zenodo.org/records/10527772/files/be_alientaxa_cube.csv?download=1",
na = ""
)
be_alientaxa_info <- read_csv(
file = "https://zenodo.org/records/10527772/files/be_alientaxa_info.csv?download=1",
na = ""
) %>%
tidylog::select(-kingdom) # as we still have to retrieve taxonomic info from GBIF
Preview occurrence cube:
be_alientaxa_cube %>% tidylog::slice_head(n = 10)
Preview taxonomic metadata of occurrence cube with alien taxa:
be_alientaxa_info %>% tidylog::slice_head(n = 10)
Number of taxa per rank:
be_alientaxa_info %>%
tidylog::distinct(rank, taxonKey) %>%
tidylog::group_by(rank) %>%
tidylog::count()
Infraspecific alien taxa:
infraspecific_alien <-
be_alientaxa_info %>%
tidylog::distinct(rank, taxonKey) %>%
tidylog::filter(rank %in% c(
"SUBSPECIES",
"VARIETY",
"FORM"
))
infraspecific_alien <-
purrr::map_dfr(
infraspecific_alien$taxonKey,
function(x) {
rgbif::name_usage(x, "data")$data
}
)
infraspecific_alien <-
infraspecific_alien %>%
tidylog::mutate(
species = dplyr::if_else(is.na(species),
parent,
species
),
speciesKey = dplyr::if_else(is.na(speciesKey),
parentKey,
speciesKey
)
) %>%
tidylog::select(
taxonKey = key,
kingdom,
phylum,
order,
class,
family,
genusKey,
genus,
speciesKey,
species
)
infraspecific_alien <-
infraspecific_alien %>%
tidylog::left_join(be_alientaxa_info, by = c("taxonKey"))
infraspecific_alien
The unified checklist contains some infraspecific alien taxa belonging to alien species as well. The assessment whether the species is alien or not is at the moment based on a reference file maintained by TrIAS experts. This file should be revised each time the unified checklist is updated. We read it:
infraspecific_alien_status_species <-
readr::read_tsv(
here::here(
"reference",
"species_of_infraspecific_alien_taxa.tsv"
),
na = ""
)
infraspecific_alien_status_species
where key
is the key of the taxon as published in the
unified checklist, nubKey
is the key of the corresponding
taxon in the GBIF Backbone. Also the columns speciesKey
,
species
, scientificName
and
kingdom
contain information coming from the GBIF
Backbone.
We read also the file containing the intersection of protected areas and the EEA grid as produced in :
cells_of_prot_areas <- readr::read_tsv(
file = here::here(
"data",
"interim",
"EEA_ref_grid_cells_covering_protected_areas.tsv"
),
na = ""
)
Preview:
cells_of_prot_areas %>% tidylog::slice_head(n = 10)
Number of cells per protected area:
n_cells_prot_areas <-
cells_of_prot_areas %>%
tidylog::group_by(SITECODE) %>%
tidylog::summarize(n_cells = n())
Preview:
n_cells_prot_areas %>% tidylog::slice_head(n = 10)
Metadata about all Belgian Natura2000 protected areas have been already saved in other pipeline. We read them:
protected_areas_metadata <- readr::read_tsv(
here::here(
"data",
"interim",
"protected_areas_metadata.tsv"
),
na = ""
)
Get values from occurrence cube related to cells covering the protected areas:
be_prot_areas_cube <-
cells_of_prot_areas %>%
tidylog::inner_join(be_species_cube,
by = c("CELLCODE" = "eea_cell_code")
)
Preview:
be_prot_areas_cube %>% tidylog::slice_head(n = 10)
For each taxon, year and protected area, calculate the number of observations, the area of occupancy and the minimum coordinate uncertainty:
occs_prot_areas <-
be_prot_areas_cube %>%
tidylog::group_by(SITECODE, SITETYPE, year, speciesKey) %>%
tidylog::summarize(
aoo = n(),
n = sum(n),
min_coord_uncertainty = min(min_coord_uncertainty)
) %>%
tidylog::ungroup()
Preview with the 10 highest areas of occupancy:
occs_prot_areas %>%
arrange(desc(aoo)) %>%
tidylog::slice_head(n = 10)
To add the coverage of each protected area, we divide AOO by the area of each protected area defined as the number of cells totally covering the protected area:
occs_prot_areas <-
occs_prot_areas %>%
tidylog::left_join(n_cells_prot_areas,
by = c("SITECODE")
) %>%
tidylog::mutate(coverage = aoo / n_cells) %>%
tidylog::select(-n_cells)
The coverage is a number between 0 (species not present) and 1 (species present in the entire protected area). Species/year/area with highest coverage value (1):
occs_prot_areas %>%
arrange(desc(coverage)) %>%
tidylog::filter(coverage == max(coverage))
Not all taxa in Belgian occurrence cube are present in protected areas. We remove them from the taxonomic metadata of the occurrence cube:
prot_areas_species_info <-
be_species_info %>%
tidylog::filter(speciesKey %in% occs_prot_areas$speciesKey)
We retrieve the taxonomic tree from GBIF. This step can take long.
prot_areas_species_taxon_info <-
purrr::map_dfr(
prot_areas_species_info$speciesKey,
function(x) {
rgbif::name_usage(key = x)$data
}
)
prot_areas_species_taxon_info <-
prot_areas_species_taxon_info %>%
tidylog::select(
speciesKey,
kingdom,
phylum,
order,
class,
family,
genusKey,
genus,
species
)
prot_areas_species_info <-
prot_areas_species_info %>%
tidylog::left_join(prot_areas_species_taxon_info,
by = "speciesKey"
)
Preview:
prot_areas_species_taxon_info %>% tidylog::slice_head(n = 10)
We would also like to indicate whether the species is alien. We add a boolean column indicating whether the species is alien or not based on the unified checklist:
# alien species based on alien infraspecific taxa in unified
infraspecific_alien_and_species <-
infraspecific_alien_status_species %>%
tidylog::filter(species_is_alien == TRUE)
infraspecific_alien_species_unknown <-
infraspecific_alien_status_species %>%
tidylog::filter(is.na(species_is_alien))
occs_prot_areas <-
occs_prot_areas %>%
tidylog::mutate(is_alien = case_when(
speciesKey %in% be_alientaxa_info$taxonKey |
speciesKey %in% infraspecific_alien_and_species$speciesKey ~ TRUE,
speciesKey %in%
infraspecific_alien_species_unknown$speciesKey ~ NA,
TRUE ~ FALSE
))
As the Belgian list of alien taxa contains infraspecific taxa, we add a remark for the corresponding species:
species_infraspecific_alien <-
infraspecific_alien %>%
tidylog::select(speciesKey, scientificName, taxonKey) %>%
tidylog::filter(speciesKey %in% occs_prot_areas$speciesKey) %>%
tidylog::group_by(speciesKey) %>%
tidylog::summarize(remarks = paste0(
"Infraspecific alien taxa present. ",
paste(taxonKey,
scientificName,
sep = ":",
collapse = ","
)
))
occs_prot_areas <-
occs_prot_areas %>%
tidylog::left_join(species_infraspecific_alien,
by = "speciesKey"
)
Species with alien infraspecific taxa. Notice they can be considered alien as well:
occs_prot_areas %>%
tidylog::filter(!is.na(remarks)) %>%
tidylog::distinct(speciesKey, is_alien, remarks)
Finally we add a column, taxonKey
, containing the key of
all taxa in GBIF Backbone. It is by definition equal to
speciesKey
.
occs_prot_areas <-
occs_prot_areas %>%
tidylog::mutate(taxonKey = speciesKey)
prot_areas_species_info <-
prot_areas_species_info %>%
tidylog::mutate(taxonKey = speciesKey)
To better study the spread of alien taxa we also add information about infraspecific alien taxa:
# Select cells with occurrences of infraspecific alien taxa
be_prot_areas_cube_infraspecific <-
cells_of_prot_areas %>%
tidylog::inner_join(
be_alientaxa_cube %>%
tidylog::filter(taxonKey %in% infraspecific_alien$taxonKey),
by = c("CELLCODE" = "eea_cell_code")
)
# Calculate n observations, aoo, min_coord_uncertainty
occs_prot_areas_infraspecific <-
be_prot_areas_cube_infraspecific %>%
tidylog::group_by(SITECODE, SITETYPE, year, taxonKey) %>%
tidylog::summarize(
aoo = n(),
n = sum(n),
min_coord_uncertainty = min(min_coord_uncertainty)
) %>%
tidylog::ungroup()
# Add coverage and is_alien
occs_prot_areas_infraspecific <-
occs_prot_areas_infraspecific %>%
tidylog::left_join(n_cells_prot_areas,
by = c("SITECODE")
) %>%
tidylog::mutate(
coverage = aoo / n_cells,
is_alien = TRUE
) %>%
tidylog::select(-n_cells)
# Remove taxa which are not present in protected areas
prot_areas_infraspecific_taxon_info <-
infraspecific_alien %>%
tidylog::filter(taxonKey %in% occs_prot_areas_infraspecific$taxonKey)
# Add speciesKey
occs_prot_areas_infraspecific <-
occs_prot_areas_infraspecific %>%
tidylog::left_join(
infraspecific_alien %>%
tidylog::select(taxonKey, speciesKey),
by = "taxonKey"
)
# Merge data
occs_prot_areas <-
occs_prot_areas %>%
dplyr::bind_rows(occs_prot_areas_infraspecific)
# Merge metadata
prot_areas_taxon_info <-
prot_areas_species_info %>%
dplyr::bind_rows(prot_areas_infraspecific_taxon_info)
We finalize the data.frame containing the information about number of observations, occupancy, coverage and alien status in Belgian protected areas.
Select column of interest and set columns order:
occs_prot_areas <-
occs_prot_areas %>%
dplyr::relocate(
SITECODE,
SITETYPE,
year,
taxonKey,
speciesKey,
n,
aoo,
coverage,
min_coord_uncertainty,
is_alien,
remarks
)
Preview:
occs_prot_areas %>% tidylog::slice_head(n = 10)
Save data:
occs_prot_areas %>%
readr::write_csv(
path = here::here(
"data",
"output",
"protected_areas_species_occurrence.csv"
),
na = ""
)
Select taxonomic metadata we are interested to:
taxonomic_metadata <-
prot_areas_taxon_info %>%
tidylog::select(
taxonKey,
speciesKey,
scientificName,
kingdom,
phylum,
order,
class,
genus,
family,
species,
rank,
includes
)
Preview:
taxonomic_metadata %>% tidylog::slice_head(n = 10)
Save taxonomic metadata:
taxonomic_metadata %>%
readr::write_csv(
here::here(
"data",
"output",
"protected_areas_species_info.csv"
),
na = ""
)
We remove metadata of areas without observations, if any:
protected_areas_metadata <-
protected_areas_metadata %>%
tidylog::filter(SITECODE %in% occs_prot_areas$SITECODE)
Preview:
protected_areas_metadata %>% tidylog::slice_head(n = 10)
We save the metadata as output:
protected_areas_metadata %>%
readr::write_csv(
file = here::here(
"data",
"output",
"protected_areas_metadata.csv"
),
na = ""
)