---
title: "Merge Models"
subtitle: "Merge species distribution models and produce tabular summaries for score calculation"
format:
html:
code-fold: true
code-tools: true
editor_options:
chunk_output_type: console
---
## Overview
Merge species distribution models from multiple datasets, flag valid taxa
(`is_ok`), and produce tabular summaries as input to `calc_scores.qmd`.
Data preparation steps (taxonomic ID resolution, ad-hoc species fixes,
dataset ingestion) are in `merge_models_prep.qmd`.
```{r}
#| label: setup
librarian::shelf(
DBI,
dplyr,
DT,
duckdb,
glue,
here,
knitr,
readr,
stringr,
tibble,
tidyr,
quiet = T
)
options(readr.show_col_types = F)
source(here("libs/paths.R"))
# database connections
con_sdm <- dbConnect(duckdb(dbdir = sdm_db, read_only = F))
# section eval flags (set to TRUE to re-run) ----
do_merge <- F # create ms_merge merged models
do_is_ok <- T # flag valid taxa
```
## Add Merged Dataset (`ms_merge`)
### Insert ms_merge dataset row
```{r}
#| label: insert_dataset_merged
#| eval: !expr do_merge
ds_key <- "ms_merge"
row_dataset <- tibble(
ds_key = !!ds_key,
name_short = glue("Marine Sensitivity Merged Model, {Sys.Date()}"),
name_original = "Marine Sensitivity merged model from multiple inputs",
description = "This dataset is derived from others. When a given taxon has multiple distributions, the maximum value is taken. If an IUCN range exists, then that and any critical habitat (NOAA or FWS) are used to mask the other inputs so that only areas within the IUCN range are considered.",
citation = "",
source_broad = "BOEM",
source_detail = "https://marinesensitivity.org/docs",
regions = "USA",
response_type = "mixed",
taxa_groups = "all taxa",
year_pub = 2025,
date_obs_beg = NA,
date_obs_end = NA,
date_env_beg = NA,
date_env_end = NA,
link_info = "https://github.com/MarineSensitivity",
link_download = NA,
link_metadata = NA,
links_other = NA,
spatial_res_deg = 0.05,
temporal_res = "static"
)
if (dbExistsTable(con_sdm, "dataset")) {
dbExecute(con_sdm, glue("DELETE FROM dataset WHERE ds_key = '{ds_key}'"))
}
dbWriteTable(con_sdm, "dataset", row_dataset, append = TRUE)
# tbl(con_sdm, "dataset")
```
### Dataset metadata
```{r}
#| label: dataset_metadata
#| eval: !expr do_merge
# add metadata columns to dataset table ----
for (col_def in list(
c("name_display", "VARCHAR"),
c("value_info", "VARCHAR"),
c("is_mask", "BOOLEAN"),
c("sort_order", "INTEGER")
)) {
tryCatch(
dbExecute(
con_sdm,
glue(
"ALTER TABLE dataset ADD COLUMN IF NOT EXISTS {col_def[1]} {col_def[2]}"
)
),
error = function(e) {
if (!grepl("already exists", e$message, ignore.case = TRUE)) {
stop(e)
}
}
)
}
# populate metadata for each dataset
metadata <- tribble(
~ds_key , ~name_display , ~value_info , ~is_mask , ~sort_order ,
"am_0.05" , "AquaMaps SDM" , NA , FALSE , 1L ,
"ca_nmfs" , "NMFS Core Area" , "Core: 100" , TRUE , 2L ,
"ch_nmfs" , "NMFS Critical Habitat" , "EN:100, TN:50" , TRUE , 3L ,
"ch_fws" , "FWS Critical Habitat" , "EN:100, TN:50" , TRUE , 4L ,
"rng_fws" , "FWS Range" , "EN:100, TN:50, LC:1" , TRUE , 5L ,
"bl" , "BirdLife Range" , "CR:50, EN:25, VU:5, NT:2, LC:1, DD:1" , TRUE , 6L ,
"rng_iucn" , "IUCN Range" , "CR:50, EN:25, VU:5, NT:2, LC:1, DD:1" , TRUE , 7L ,
"ms_merge" , "Merged Model" , NA , FALSE , 0L
)
for (i in 1:nrow(metadata)) {
m <- metadata[i, ]
vi <- ifelse(is.na(m$value_info), "NULL", glue("'{m$value_info}'"))
dbExecute(
con_sdm,
glue(
"UPDATE dataset
SET name_display = '{m$name_display}',
value_info = {vi},
is_mask = {tolower(m$is_mask)},
sort_order = {m$sort_order}
WHERE ds_key = '{m$ds_key}'"
)
)
}
tbl(con_sdm, "dataset") |>
select(ds_key, name_display, is_mask, sort_order) |>
collect()
```
### Iterate merge across taxa
```{r}
#| label: iterate_merge_ds_mdl
#| eval: !expr do_merge
ds_key <- "ms_merge"
# cell_tif <- glue("{dir_data}/derived/r_bio-oracle_planarea.tif")
# r_cell <- rast(cell_tif)
# ext(r_cell) <- round(ext(r_cell), 3)
d_x <- tbl(con_sdm, "taxon") |>
filter(is_ok) |>
arrange(desc(n_ds), taxon_id) |>
collect()
# removed: quick fix for turtle sp_cat — now handled by reclassify_reptiles
# chunk after taxon table creation (bind_birds_notbirds_ds)
# table(d_x$sp_cat, useNA = "ifany")
# OLD:
# bird coral fish invertebrate mammal reptile
# 49 10 12 1 12 7
# NEW (rng_iucn):
# coral fish invertebrate mammal other turtle
# 375 927 148 58 2 6
ds_keys <- tbl(con_sdm, "dataset") |>
pull(ds_key) |>
setdiff("ms_merge")
# datasets for extracting max value SDM
# ds_keys_sdm <- setdiff(ds_keys, "rng_iucn") # OLD: excluding IUCN range map except for masking
ds_keys_sdm <- ds_keys # NEW: include IUCN range map for max value SDM
# datasets that form the combined mask (IUCN range + critical habitats + national range), only when rng_iucn exists for species
ds_keys_mask <- tbl(con_sdm, "dataset") |>
filter(is_mask) |>
pull(ds_key)
for (i in 1:nrow(d_x)) {
# i = 1 # which(str_detect(d_x$scientific_name, ".*ricei")) # i = 1512
d_sp <- d_x |> slice(i)
# d_sp |> glimpse()
# Rows: 1
# Columns: 18
# $ taxon_id <dbl> 127186
# $ taxon_authority <chr> "worms"
# $ n_ds <int> 5
# $ am_0.05 <int> 7466
# $ ch_nmfs <int> 18230
# $ ch_fws <int> 18309
# $ rng_fws <int> 18401
# $ sp_cat <chr> "fish"
# $ bl <int> NA
# $ mdl_seq <int> NA
# $ scientific_name <chr> "Salmo salar"
# $ common_name <chr> "silver salmon"
# $ redlist_code <chr> "EN"
# $ worms_is_marine <lgl> TRUE
# $ worms_is_extinct <lgl> NA
# $ worms_id <dbl> 127186
# $ is_ok <lgl> TRUE
# $ rng_iucn <int> 19445
# list species models by dataset from taxon_model junction table
d_sp_l <- tbl(con_sdm, "taxon_model") |>
filter(taxon_id == !!d_sp$taxon_id) |>
collect() |>
mutate(
taxon_authority = d_sp$taxon_authority
)
# taxon_id taxon_authority ds_key mdl_seq
# <dbl> <chr> <chr> <int>
# 1 127186 worms am_0.05 7466
# 2 127186 worms ch_fws 18309
# 3 127186 worms ch_nmfs 18230
# 4 127186 worms rng_fws 18401
# 5 127186 worms rng_iucn 19445
# NEW ds_key: "ms_merge"; mdl_seq: 20030
d_r_ds <- tbl(con_sdm, "model_cell") |>
filter(
mdl_seq %in% d_sp_l$mdl_seq
) |>
left_join(
tbl(con_sdm, "model") |>
select(mdl_seq, ds_key) |>
filter(mdl_seq %in% d_sp_l$mdl_seq),
by = "mdl_seq"
) |>
group_by(ds_key) |>
summarize(
n_cell = n(),
v_min = min(value, na.rm = T),
v_max = max(value, na.rm = T),
.groups = "drop"
) |>
collect() |>
mutate(
ds_str = glue("{ds_key} ({n_cell} cells, {v_min} - {v_max})")
)
# ds_key n_cell v_min v_max ds_str
# <chr> <dbl> <int> <int> <glue>
# 1 rng_iucn 6245 50 50 rng_iucn (6245 cells, 50 - 50)
# 2 ch_fws 29 90 90 ch_fws (29 cells, 90 - 90)
# 3 am_0.05 11739 1 100 am_0.05 (11739 cells, 1 - 100)
# 4 ch_nmfs 3 90 90 ch_nmfs (3 cells, 90 - 90)
# 5 rng_fws 264 50 90 rng_fws (264 cells, 50 - 90)
# datasets used for SDM
ds_sdm_sp <- intersect(d_sp_l$ds_key, ds_keys_sdm)
sdm_mdl_seqs <- d_sp_l |>
filter(ds_key %in% ds_sdm_sp) |>
pull(mdl_seq)
# query of SDM with max value across all datasets
q_sdm <- tbl(con_sdm, "model_cell") |>
filter(
mdl_seq %in% sdm_mdl_seqs
) |>
group_by(cell_id) |>
summarize(value = max(value, na.rm = T), .groups = "drop")
# check if IUCN range exists for this taxon (via taxon_model)
has_iucn <- "rng_iucn" %in% d_sp_l$ds_key
if (has_iucn) {
# get datasets used for mask
ds_mask_sp <- intersect(d_sp_l$ds_key, ds_keys_mask)
# get mdl_seqs for masking datasets
mask_mdl_seqs <- d_sp_l |>
filter(ds_key %in% ds_mask_sp) |>
pull(mdl_seq)
q_mask <- tbl(con_sdm, "model_cell") |>
filter(mdl_seq %in% mask_mdl_seqs) |>
distinct(cell_id) |>
mutate(value = 1)
# get SDM cells with mask applied
d_r_sp <- q_sdm |>
semi_join(
q_mask,
by = join_by(cell_id)
) |>
collect()
# visual checks ----
# # r_sdm_mask
# r <- init(r_cell[[1]], NA)
# r[d_r_sp$cell_id] <- d_r_sp$value
# r <- trim(r) |> rotate()
# plet(r)
#
# # r_mask
# d_mask <- q_mask |> collect()
# r <- init(r_cell[[1]], NA)
# r[d_mask$cell_id] <- d_mask$value
# names(r) <- "mask"
# r_mask <- trim(r) |> rotate()
# plet(r_mask)
#
# # r_sdm
# d_sdm <- q_sdm |> collect()
# r <- init(r_cell[[1]], NA)
# r[d_sdm$cell_id] <- d_sdm$value
# names(r) <- "sdm"
# r_sdm <- trim(r) |> rotate()
# plet(r_sdm)
} else {
# no IUCN range - use original logic (no masking), just max value of all SDMs
d_r_sp <- collect(q_sdm)
}
# apply MMPA/MBTA spatial minimum floors ----
if (isTRUE(d_sp$is_mmpa)) {
d_r_sp <- d_r_sp |>
mutate(value = pmax(value, 20L))
}
if (isTRUE(d_sp$is_mbta)) {
d_r_sp <- d_r_sp |>
mutate(value = pmax(value, 10L))
}
sp_sci <- d_sp$scientific_name
sp_cmn <- d_sp$common_name
sp_cat <- d_sp$sp_cat
sp_key <- glue("{ds_key}:{d_sp$taxon_id}")
message(glue(
"Processing {i}/{nrow(d_x)} [{sp_key}] ({sp_cat}): {sp_cmn} (_{sp_sci}_)"
))
n_cells <- nrow(d_r_sp)
if (n_cells == 0) {
message(glue("No cells for {sp_sci} ({i}/{nrow(d_x)})"))
next()
}
# delete: existing ----
mdl_seqs <- tbl(con_sdm, "model") |>
filter(ds_key == !!ds_key, taxa == !!sp_key) |>
pull(mdl_seq)
if (length(mdl_seqs) > 0) {
dbExecute(
con_sdm,
glue(
"DELETE FROM taxon_model WHERE ds_key = '{ds_key}' AND taxon_id = {d_sp$taxon_id}"
)
)
dbExecute(
con_sdm,
glue("DELETE FROM model WHERE ds_key = '{ds_key}' AND taxa = '{sp_key}'")
)
dbExecute(
con_sdm,
glue(
"DELETE FROM species WHERE ds_key = '{ds_key}' AND taxa = '{sp_key}'"
)
)
dbExecute(
con_sdm,
glue(
"DELETE FROM model_cell WHERE mdl_seq IN ({paste(mdl_seqs, collapse = ',')})"
)
)
dbExecute(
con_sdm,
glue(
"UPDATE taxon SET mdl_seq = NULL WHERE mdl_seq IN ({paste(mdl_seqs, collapse = ',')})"
)
)
}
# append: model ----
has_iucn_str <- if (has_iucn) {
glue(
"; Mask applied (because IUCN range map present) from combining datasets: {paste(ds_mask_sp, collapse = ', ')}"
)
} else {
""
}
d_model <- tibble(
ds_key = ds_key,
taxa = sp_key,
time_period = "2025",
region = "USA",
mdl_type = "mixed",
description = glue(
"Marine Sensitivity merged model with max values from datasets: {paste(ds_sdm_sp, collapse = ', ')}{has_iucn_str}"
)
)
dbWriteTable(con_sdm, "model", d_model, append = TRUE)
# tbl(con_sdm, "model") |> collect() |> tail()
# get the mdl_seq that was just created
mdl_seq <- dbGetQuery(
con_sdm,
glue(
"
SELECT mdl_seq FROM model
WHERE
ds_key = '{ds_key}' AND
taxa = '{sp_key}'
ORDER BY mdl_seq DESC LIMIT 1 "
)
)$mdl_seq
# d_sp |> glimpse()
# Rows: 1
# Columns: 15
# $ taxon_id <dbl> 127186
# $ taxon_authority <chr> "worms"
# $ n_ds <int> 4
# $ am_0.05 <int> 7466
# $ ch_nmfs <int> 18230
# $ ch_fws <int> 18309
# $ rng_fws <int> 18401
# $ sp_cat <chr> "fish"
# $ bl <int> NA
# $ mdl_seq <int> NA
# $ scientific_name <chr> "Salmo salar"
# $ common_name <chr> "silver salmon"
# $ redlist_code <chr> "EN"
# $ worms_is_marine <lgl> TRUE
# $ worms_is_extinct <lgl> NA
# append: species ----
d_species <- tibble(
ds_key = ds_key,
taxa = sp_key,
sp_key = sp_key,
worms_id = ifelse(
d_sp$taxon_authority == "worms",
d_sp$taxon_id,
NA_integer_
),
botw_id = ifelse(
d_sp$taxon_authority == "botw",
d_sp$taxon_id,
NA_integer_
),
gbif_id = NA_integer_,
itis_id = NA_integer_,
iucn_id = NA_integer_,
scientific_name_dataset = sp_sci,
common_name_dataset = sp_cmn,
scientific_name_accepted = sp_sci,
common_name_accepted = sp_cmn,
redlist_code = d_sp$redlist_code,
redlist_year = NA_integer_,
sp_cat = sp_cat,
worms_is_marine = d_sp$worms_is_marine,
worms_is_extinct = d_sp$worms_is_extinct
)
# d_species |> glimpse()
stopifnot(
length(setdiff(names(d_species), dbListFields(con_sdm, "species"))) == 0
)
stopifnot(
setdiff(dbListFields(con_sdm, "species"), names(d_species)) == "sp_seq"
)
dbWriteTable(con_sdm, "species", d_species, append = T)
# tbl(con_sdm, "species") |> collect() |> tail()
# tbl(con_sdm, "model") |> collect() |> tail()
stopifnot(
tbl(con_sdm, "species") |> filter(is.na(sp_seq)) |> collect() |> nrow() == 0
)
# append: model_cell ----
d_mdl_cell <- d_r_sp |>
mutate(
mdl_seq = mdl_seq
) |>
arrange(cell_id)
dbWriteTable(con_sdm, "model_cell", d_mdl_cell, append = T)
# update taxon with mdl_seq
dbExecute(
con_sdm,
glue(
"
UPDATE taxon
SET mdl_seq = {mdl_seq}
WHERE taxon_id = {d_sp$taxon_id}"
)
)
# append ms_merge row to taxon_model junction table
dbWriteTable(
con_sdm,
"taxon_model",
tibble(
taxon_id = d_sp$taxon_id,
ds_key = "ms_merge",
mdl_seq = as.integer(mdl_seq)
),
append = TRUE
)
}
# Salmo salar silver salmon
```
## Set taxon.is_ok
Add `is_ok`: a simple logical field for flagging valid taxa, ie (so far):
- birds:
- [x] add worms_id (if available)
- `redlist_code` != "EX"
- has a `botw_id`
- if has worms_id:
- `worms_is_marine` != F
- `worms_is_extinct` != T
- no model cells overlap with Program Areas
- category is "reptile" but not "turtle" (handled by reclassify_reptiles)
- not birds:
- has a `worms_id`
- `worms_is_marine` != F
- `worms_is_extinct` != T
### Flag valid taxa
```{r}
#| label: taxon_is_ok
#| eval: !expr do_is_ok
d <- tbl(con_sdm, "taxon") |>
collect() # 17,561 × 16
# birds ----
# d |>
# filter(
# taxon_authority == "botw") |>
# pull(redlist_code) |>
# table(useNA = "ifany")
# CR DD EN LC NT TN VU
# 3 1 58 452 45 14 41
#
# d |>
# filter(
# taxon_authority == "botw") |>
# select(worms_is_marine, worms_is_extinct) |>
# table(useNA = "ifany")
# worms_is_extinct
# worms_is_marine <NA>
# FALSE 118
# TRUE 221
# <NA> 275
d_b <- d |>
filter(
taxon_authority == "botw"
) |>
mutate(
is_ok = case_when(
is.na(taxon_id) ~ F,
is.na(mdl_seq) ~ F,
!is.na(redlist_code) & redlist_code == "EX" ~ F, # 0
!is.na(worms_id) & worms_is_marine == F ~ F, # 118
!is.na(worms_id) & worms_is_extinct == T ~ F, # 0
.default = T
)
)
# d_b$is_ok |> table(useNA = "ifany")
# FALSE TRUE
# 118 496
# worms ----
# d |>
# filter(
# taxon_authority == "worms") |>
# pull(redlist_code) |>
# table(useNA = "ifany")
# CR DD EN EX LC NT TN VU <NA>
# 47 340 235 1 5574 107 11 154 10478
#
# d |>
# filter(
# taxon_authority == "worms") |>
# select(worms_is_marine, worms_is_extinct) |>
# table(useNA = "ifany")
# worms_is_extinct
# worms_is_marine FALSE TRUE <NA>
# FALSE 19 0 24
# TRUE 4171 20 12708
# <NA> 0 0 5
d_w <- d |>
filter(
taxon_authority == "worms"
) |>
mutate(
is_ok = case_when(
is.na(taxon_id) ~ F,
is.na(mdl_seq) ~ F,
!is.na(redlist_code) & redlist_code == "EX" ~ F,
!is.na(worms_id) & worms_is_marine == F ~ F,
!is.na(worms_id) & worms_is_extinct == T ~ F,
!is.na(worms_taxonomic_status) &
!worms_taxonomic_status %in%
c("accepted", "alternative representation") ~ F,
sp_cat == "reptile" ~ F,
sp_cat == "turtle" ~ T,
.default = T
)
)
# d_w$is_ok |> table(useNA = "ifany")
# FALSE TRUE
# 64 16,883
d2 <- bind_rows(
d_b,
d_w
) |>
select(taxon_id, is_ok)
# flag taxa with no distribution inside program areas ----
# (mirrors r_mask approach in apps_2026/mapsp/app.R but with DB queries)
taxa_in_pra <- tbl(con_sdm, "taxon_model") |>
inner_join(
tbl(con_sdm, "model_cell") |> select(mdl_seq, cell_id),
by = "mdl_seq"
) |>
inner_join(
tbl(con_sdm, "zone_cell") |> select(zone_seq, cell_id),
by = "cell_id"
) |>
inner_join(
tbl(con_sdm, "zone") |>
filter(
tbl == !!glue("ply_programareas_2026{v_sfx}"),
fld == "programarea_key"
) |>
select(zone_seq),
by = "zone_seq"
) |>
distinct(taxon_id) |>
pull(taxon_id)
n_outside <- sum(d2$is_ok & !(d2$taxon_id %in% taxa_in_pra))
message(glue(
"{n_outside} taxa flagged is_ok=F (no distribution in program areas)"
))
d2 <- d2 |>
mutate(
is_ok = is_ok & taxon_id %in% taxa_in_pra
)
stopifnot(sum(duplicated(d2$taxon_id)) == 0)
dbExecute(
con_sdm,
"ALTER TABLE taxon ADD COLUMN IF NOT EXISTS is_ok BOOLEAN"
)
duckdb_register(con_sdm, "d2", d2)
dbExecute(
con_sdm,
"UPDATE taxon
SET is_ok = d2.is_ok
FROM d2
WHERE taxon.taxon_id = d2.taxon_id"
) # 17,561
duckdb_unregister(con_sdm, "d2")
```
## Taxon Summary
### Export taxon summary
```{r}
#| label: taxon_summary
taxon_csv <- here("data/taxon.csv")
# dbListFields(con_sdm, "taxon") |> paste(collapse = ", ") |> cat()
d_taxon <- tbl(con_sdm, "taxon") |>
select(
is_ok,
component = sp_cat,
common_name,
scientific_name,
redlist_code_max = redlist_code,
extrisk_code,
er_score,
is_mmpa,
is_mbta,
is_bcc,
worms_is_marine,
worms_is_extinct,
n_datasets = n_ds,
taxon_authority,
taxon_id,
model_id = mdl_seq
) |>
arrange(desc(is_ok), component, common_name) |>
collect()
write_csv(d_taxon, taxon_csv)
```
## Dataset Summary
```{r}
#| label: dataset_overview
tbl(con_sdm, "dataset") |>
select(sort_order, ds_key, name_display, value_info, is_mask) |>
arrange(sort_order) |>
collect() |>
DT::datatable()
```
```{r}
#| label: model_counts_by_ds
tbl(con_sdm, "model") |>
group_by(ds_key) |>
summarize(n_models = n()) |>
arrange(ds_key) |>
collect() |>
DT::datatable()
```
## Species Summary
### Species counts by sp_cat and ds_key
```{r}
#| label: spp_counts_by_cat_ds
tbl(con_sdm, "species") |>
group_by(sp_cat, ds_key) |>
summarize(n = n(), .groups = "drop") |>
arrange(sp_cat, ds_key) |>
collect() |>
DT::datatable()
```
### Taxonomic authority coverage
```{r}
#| label: auth_coverage
tbl(con_sdm, "species") |>
group_by(sp_cat, ds_key) |>
summarize(
n = n(),
n_worms = sum(!is.na(worms_id), na.rm = T),
n_itis = sum(!is.na(itis_id), na.rm = T),
n_gbif = sum(!is.na(gbif_id), na.rm = T),
n_allna = sum(
is.na(worms_id) & is.na(itis_id) & is.na(gbif_id),
na.rm = T
),
.groups = "drop"
) |>
mutate(
pct_worms = round(n_worms / n * 100, 1),
pct_itis = round(n_itis / n * 100, 1),
pct_gbif = round(n_gbif / n * 100, 1),
pct_allna = round(n_allna / n * 100, 1)
) |>
arrange(sp_cat, ds_key) |>
collect() |>
DT::datatable()
```
### WoRMS marine/extinct percentages
```{r}
#| label: worms_marine_extinct
tbl(con_sdm, "species") |>
group_by(sp_cat) |>
summarize(
n = n(),
n_worms_marine = sum(worms_is_marine, na.rm = T),
n_worms_extinct = sum(worms_is_extinct, na.rm = T)
) |>
mutate(
pct_worms_marine = round(n_worms_marine / n * 100, 1),
pct_worms_extinct = round(n_worms_extinct / n * 100, 1)
) |>
collect() |>
DT::datatable()
```
## Taxon Table Summary
### Taxon counts by sp_cat and is_ok
```{r}
#| label: taxon_counts_by_cat_ok
tbl(con_sdm, "taxon") |>
group_by(sp_cat, is_ok) |>
summarize(n = n(), .groups = "drop") |>
collect() |>
pivot_wider(
names_from = is_ok,
values_from = n,
names_prefix = "is_ok_"
) |>
DT::datatable()
```
### Redlist code distribution
```{r}
#| label: redlist_distribution
tbl(con_sdm, "taxon") |>
filter(is_ok) |>
group_by(sp_cat, redlist_code) |>
summarize(n = n(), .groups = "drop") |>
collect() |>
pivot_wider(
names_from = redlist_code,
values_from = n,
values_fill = 0
) |>
DT::datatable()
```
### Extinction risk authority summary
```{r}
#| label: extrisk_authority
tbl(con_sdm, "taxon") |>
filter(is_ok) |>
mutate(
authority = case_when(
str_starts(extrisk_code, "NMFS") ~ "NMFS",
str_starts(extrisk_code, "FWS") ~ "FWS",
str_starts(extrisk_code, "IUCN") ~ "IUCN",
TRUE ~ "none"
)
) |>
count(authority) |>
collect() |>
DT::datatable()
```
### Number of datasets per taxon
```{r}
#| label: n_ds_distribution
tbl(con_sdm, "taxon") |>
filter(is_ok) |>
count(n_ds) |>
collect() |>
DT::datatable()
```
## Taxon x Dataset Matrix
```{r}
#| label: taxon_dataset_matrix
tbl(con_sdm, "taxon_model") |>
inner_join(
tbl(con_sdm, "taxon") |>
filter(is_ok) |>
select(taxon_id, sp_cat),
by = "taxon_id"
) |>
group_by(sp_cat, ds_key) |>
summarize(n = n(), .groups = "drop") |>
collect() |>
pivot_wider(
names_from = ds_key,
values_from = n,
values_fill = 0
) |>
DT::datatable()
```
## Taxon Detail Table
```{r}
#| label: taxon_detail
tbl(con_sdm, "taxon") |>
filter(is_ok) |>
select(
sp_cat,
scientific_name,
common_name,
redlist_code,
extrisk_code,
er_score,
n_ds,
taxon_id,
taxon_authority
) |>
arrange(sp_cat, scientific_name) |>
collect() |>
DT::datatable(
filter = "top",
options = list(pageLength = 25)
)
```