Merge species distribution models and produce tabular summaries for score calculation

Published

2026-02-26 16:44:52

1 Overview

Merge species distribution models from multiple datasets, flag valid taxa (is_ok), and produce tabular summaries as input to calc_scores.qmd.

Data preparation steps (taxonomic ID resolution, ad-hoc species fixes, dataset ingestion) are in merge_models_prep.qmd.

Code
librarian::shelf(
  DBI,
  dplyr,
  DT,
  duckdb,
  glue,
  here,
  knitr,
  readr,
  stringr,
  tibble,
  tidyr,
  quiet = T
)
options(readr.show_col_types = F)

source(here("libs/paths.R"))

# database connections
con_sdm <- dbConnect(duckdb(dbdir = sdm_db, read_only = F))

# section eval flags (set to TRUE to re-run) ----
do_merge <- F # create ms_merge merged models
do_is_ok <- T # flag valid taxa

2 Add Merged Dataset (ms_merge)

2.1 Insert ms_merge dataset row

Code
ds_key <- "ms_merge"
row_dataset <- tibble(
  ds_key = !!ds_key,
  name_short = glue("Marine Sensitivity Merged Model, {Sys.Date()}"),
  name_original = "Marine Sensitivity merged model from multiple inputs",
  description = "This dataset is derived from others. When a given taxon has multiple distributions, the maximum value is taken. If an IUCN range exists, then that and any critical habitat (NOAA or FWS) are used to mask the other inputs so that only areas within the IUCN range are considered.",
  citation = "",
  source_broad = "BOEM",
  source_detail = "https://marinesensitivity.org/docs",
  regions = "USA",
  response_type = "mixed",
  taxa_groups = "all taxa",
  year_pub = 2025,
  date_obs_beg = NA,
  date_obs_end = NA,
  date_env_beg = NA,
  date_env_end = NA,
  link_info = "https://github.com/MarineSensitivity",
  link_download = NA,
  link_metadata = NA,
  links_other = NA,
  spatial_res_deg = 0.05,
  temporal_res = "static"
)

if (dbExistsTable(con_sdm, "dataset")) {
  dbExecute(con_sdm, glue("DELETE FROM dataset WHERE ds_key = '{ds_key}'"))
}

dbWriteTable(con_sdm, "dataset", row_dataset, append = TRUE)
# tbl(con_sdm, "dataset")

2.2 Dataset metadata

Code
# add metadata columns to dataset table ----
for (col_def in list(
  c("name_display", "VARCHAR"),
  c("value_info", "VARCHAR"),
  c("is_mask", "BOOLEAN"),
  c("sort_order", "INTEGER")
)) {
  tryCatch(
    dbExecute(
      con_sdm,
      glue(
        "ALTER TABLE dataset ADD COLUMN IF NOT EXISTS {col_def[1]} {col_def[2]}"
      )
    ),
    error = function(e) {
      if (!grepl("already exists", e$message, ignore.case = TRUE)) {
        stop(e)
      }
    }
  )
}

# populate metadata for each dataset
metadata <- tribble(
  ~ds_key    , ~name_display           , ~value_info                            , ~is_mask , ~sort_order ,
  "am_0.05"  , "AquaMaps SDM"          , NA                                     , FALSE    , 1L          ,
  "ca_nmfs"  , "NMFS Core Area"        , "Core: 100"                            , TRUE     , 2L          ,
  "ch_nmfs"  , "NMFS Critical Habitat" , "EN:100, TN:50"                        , TRUE     , 3L          ,
  "ch_fws"   , "FWS Critical Habitat"  , "EN:100, TN:50"                        , TRUE     , 4L          ,
  "rng_fws"  , "FWS Range"             , "EN:100, TN:50, LC:1"                  , TRUE     , 5L          ,
  "bl"       , "BirdLife Range"        , "CR:50, EN:25, VU:5, NT:2, LC:1, DD:1" , TRUE     , 6L          ,
  "rng_iucn" , "IUCN Range"            , "CR:50, EN:25, VU:5, NT:2, LC:1, DD:1" , TRUE     , 7L          ,
  "ms_merge" , "Merged Model"          , NA                                     , FALSE    , 0L
)

for (i in 1:nrow(metadata)) {
  m <- metadata[i, ]
  vi <- ifelse(is.na(m$value_info), "NULL", glue("'{m$value_info}'"))
  dbExecute(
    con_sdm,
    glue(
      "UPDATE dataset
     SET name_display = '{m$name_display}',
         value_info   = {vi},
         is_mask      = {tolower(m$is_mask)},
         sort_order   = {m$sort_order}
     WHERE ds_key = '{m$ds_key}'"
    )
  )
}

tbl(con_sdm, "dataset") |>
  select(ds_key, name_display, is_mask, sort_order) |>
  collect()

2.3 Iterate merge across taxa

Code
ds_key <- "ms_merge"

# cell_tif <- glue("{dir_data}/derived/r_bio-oracle_planarea.tif")
# r_cell <- rast(cell_tif)
# ext(r_cell) <- round(ext(r_cell), 3)

d_x <- tbl(con_sdm, "taxon") |>
  filter(is_ok) |>
  arrange(desc(n_ds), taxon_id) |>
  collect()

# removed: quick fix for turtle sp_cat — now handled by reclassify_reptiles
# chunk after taxon table creation (bind_birds_notbirds_ds)

# table(d_x$sp_cat, useNA = "ifany")
# OLD:
# bird        coral         fish invertebrate       mammal      reptile
#   49           10           12            1           12            7
# NEW (rng_iucn):
# coral         fish invertebrate       mammal        other       turtle
#   375          927          148           58            2            6

ds_keys <- tbl(con_sdm, "dataset") |>
  pull(ds_key) |>
  setdiff("ms_merge")

# datasets for extracting max value SDM
# ds_keys_sdm  <- setdiff(ds_keys, "rng_iucn")   # OLD: excluding IUCN range map except for masking
ds_keys_sdm <- ds_keys # NEW: include IUCN range map for max value SDM
# datasets that form the combined mask (IUCN range + critical habitats + national range), only when rng_iucn exists for species
ds_keys_mask <- tbl(con_sdm, "dataset") |>
  filter(is_mask) |>
  pull(ds_key)

for (i in 1:nrow(d_x)) {
  # i = 1  # which(str_detect(d_x$scientific_name, ".*ricei")) # i = 1512

  d_sp <- d_x |> slice(i)
  # d_sp |> glimpse()
  # Rows: 1
  # Columns: 18
  # $ taxon_id         <dbl> 127186
  # $ taxon_authority  <chr> "worms"
  # $ n_ds             <int> 5
  # $ am_0.05          <int> 7466
  # $ ch_nmfs          <int> 18230
  # $ ch_fws           <int> 18309
  # $ rng_fws          <int> 18401
  # $ sp_cat           <chr> "fish"
  # $ bl               <int> NA
  # $ mdl_seq          <int> NA
  # $ scientific_name  <chr> "Salmo salar"
  # $ common_name      <chr> "silver salmon"
  # $ redlist_code     <chr> "EN"
  # $ worms_is_marine  <lgl> TRUE
  # $ worms_is_extinct <lgl> NA
  # $ worms_id         <dbl> 127186
  # $ is_ok            <lgl> TRUE
  # $ rng_iucn         <int> 19445

  # list species models by dataset from taxon_model junction table
  d_sp_l <- tbl(con_sdm, "taxon_model") |>
    filter(taxon_id == !!d_sp$taxon_id) |>
    collect() |>
    mutate(
      taxon_authority = d_sp$taxon_authority
    )
  #   taxon_id taxon_authority ds_key   mdl_seq
  #      <dbl> <chr>           <chr>      <int>
  # 1   127186 worms           am_0.05     7466
  # 2   127186 worms           ch_fws     18309
  # 3   127186 worms           ch_nmfs    18230
  # 4   127186 worms           rng_fws    18401
  # 5   127186 worms           rng_iucn   19445
  # NEW ds_key: "ms_merge"; mdl_seq: 20030

  d_r_ds <- tbl(con_sdm, "model_cell") |>
    filter(
      mdl_seq %in% d_sp_l$mdl_seq
    ) |>
    left_join(
      tbl(con_sdm, "model") |>
        select(mdl_seq, ds_key) |>
        filter(mdl_seq %in% d_sp_l$mdl_seq),
      by = "mdl_seq"
    ) |>
    group_by(ds_key) |>
    summarize(
      n_cell = n(),
      v_min = min(value, na.rm = T),
      v_max = max(value, na.rm = T),
      .groups = "drop"
    ) |>
    collect() |>
    mutate(
      ds_str = glue("{ds_key} ({n_cell} cells, {v_min} - {v_max})")
    )
  #   ds_key   n_cell v_min v_max ds_str
  #   <chr>     <dbl> <int> <int> <glue>
  # 1 rng_iucn   6245    50    50 rng_iucn (6245 cells, 50 - 50)
  # 2 ch_fws       29    90    90 ch_fws (29 cells, 90 - 90)
  # 3 am_0.05   11739     1   100 am_0.05 (11739 cells, 1 - 100)
  # 4 ch_nmfs       3    90    90 ch_nmfs (3 cells, 90 - 90)
  # 5 rng_fws     264    50    90 rng_fws (264 cells, 50 - 90)

  # datasets used for SDM
  ds_sdm_sp <- intersect(d_sp_l$ds_key, ds_keys_sdm)

  sdm_mdl_seqs <- d_sp_l |>
    filter(ds_key %in% ds_sdm_sp) |>
    pull(mdl_seq)

  # query of SDM with max value across all datasets
  q_sdm <- tbl(con_sdm, "model_cell") |>
    filter(
      mdl_seq %in% sdm_mdl_seqs
    ) |>
    group_by(cell_id) |>
    summarize(value = max(value, na.rm = T), .groups = "drop")

  # check if IUCN range exists for this taxon (via taxon_model)
  has_iucn <- "rng_iucn" %in% d_sp_l$ds_key
  if (has_iucn) {
    # get datasets used for mask
    ds_mask_sp <- intersect(d_sp_l$ds_key, ds_keys_mask)

    # get mdl_seqs for masking datasets
    mask_mdl_seqs <- d_sp_l |>
      filter(ds_key %in% ds_mask_sp) |>
      pull(mdl_seq)

    q_mask <- tbl(con_sdm, "model_cell") |>
      filter(mdl_seq %in% mask_mdl_seqs) |>
      distinct(cell_id) |>
      mutate(value = 1)

    # get SDM cells with mask applied
    d_r_sp <- q_sdm |>
      semi_join(
        q_mask,
        by = join_by(cell_id)
      ) |>
      collect()

    # visual checks ----
    # # r_sdm_mask
    # r <- init(r_cell[[1]], NA)
    # r[d_r_sp$cell_id] <- d_r_sp$value
    # r <- trim(r) |> rotate()
    # plet(r)
    #
    # # r_mask
    # d_mask <- q_mask |> collect()
    # r <- init(r_cell[[1]], NA)
    # r[d_mask$cell_id] <- d_mask$value
    # names(r) <- "mask"
    # r_mask <- trim(r) |> rotate()
    # plet(r_mask)
    #
    # # r_sdm
    # d_sdm <- q_sdm |> collect()
    # r <- init(r_cell[[1]], NA)
    # r[d_sdm$cell_id] <- d_sdm$value
    # names(r) <- "sdm"
    # r_sdm <- trim(r) |> rotate()
    # plet(r_sdm)
  } else {
    # no IUCN range - use original logic (no masking), just max value of all SDMs
    d_r_sp <- collect(q_sdm)
  }

  # apply MMPA/MBTA spatial minimum floors ----
  if (isTRUE(d_sp$is_mmpa)) {
    d_r_sp <- d_r_sp |>
      mutate(value = pmax(value, 20L))
  }
  if (isTRUE(d_sp$is_mbta)) {
    d_r_sp <- d_r_sp |>
      mutate(value = pmax(value, 10L))
  }

  sp_sci <- d_sp$scientific_name
  sp_cmn <- d_sp$common_name
  sp_cat <- d_sp$sp_cat
  sp_key <- glue("{ds_key}:{d_sp$taxon_id}")
  message(glue(
    "Processing {i}/{nrow(d_x)} [{sp_key}] ({sp_cat}): {sp_cmn} (_{sp_sci}_)"
  ))

  n_cells <- nrow(d_r_sp)
  if (n_cells == 0) {
    message(glue("No cells for {sp_sci} ({i}/{nrow(d_x)})"))
    next()
  }

  # delete: existing ----
  mdl_seqs <- tbl(con_sdm, "model") |>
    filter(ds_key == !!ds_key, taxa == !!sp_key) |>
    pull(mdl_seq)
  if (length(mdl_seqs) > 0) {
    dbExecute(
      con_sdm,
      glue(
        "DELETE FROM taxon_model WHERE ds_key = '{ds_key}' AND taxon_id = {d_sp$taxon_id}"
      )
    )
    dbExecute(
      con_sdm,
      glue("DELETE FROM model WHERE ds_key = '{ds_key}' AND taxa = '{sp_key}'")
    )
    dbExecute(
      con_sdm,
      glue(
        "DELETE FROM species WHERE ds_key = '{ds_key}' AND taxa = '{sp_key}'"
      )
    )
    dbExecute(
      con_sdm,
      glue(
        "DELETE FROM model_cell WHERE mdl_seq IN ({paste(mdl_seqs, collapse = ',')})"
      )
    )
    dbExecute(
      con_sdm,
      glue(
        "UPDATE taxon SET mdl_seq = NULL WHERE mdl_seq IN ({paste(mdl_seqs, collapse = ',')})"
      )
    )
  }

  # append: model ----
  has_iucn_str <- if (has_iucn) {
    glue(
      "; Mask applied (because IUCN range map present) from combining datasets: {paste(ds_mask_sp, collapse = ', ')}"
    )
  } else {
    ""
  }
  d_model <- tibble(
    ds_key = ds_key,
    taxa = sp_key,
    time_period = "2025",
    region = "USA",
    mdl_type = "mixed",
    description = glue(
      "Marine Sensitivity merged model with max values from datasets: {paste(ds_sdm_sp, collapse = ', ')}{has_iucn_str}"
    )
  )
  dbWriteTable(con_sdm, "model", d_model, append = TRUE)
  # tbl(con_sdm, "model") |> collect() |> tail()

  # get the mdl_seq that was just created
  mdl_seq <- dbGetQuery(
    con_sdm,
    glue(
      "
    SELECT mdl_seq FROM model
    WHERE
      ds_key = '{ds_key}' AND
      taxa   = '{sp_key}'
    ORDER BY mdl_seq DESC LIMIT 1 "
    )
  )$mdl_seq

  # d_sp |> glimpse()
  # Rows: 1
  # Columns: 15
  # $ taxon_id         <dbl> 127186
  # $ taxon_authority  <chr> "worms"
  # $ n_ds             <int> 4
  # $ am_0.05          <int> 7466
  # $ ch_nmfs          <int> 18230
  # $ ch_fws           <int> 18309
  # $ rng_fws          <int> 18401
  # $ sp_cat           <chr> "fish"
  # $ bl               <int> NA
  # $ mdl_seq          <int> NA
  # $ scientific_name  <chr> "Salmo salar"
  # $ common_name      <chr> "silver salmon"
  # $ redlist_code     <chr> "EN"
  # $ worms_is_marine  <lgl> TRUE
  # $ worms_is_extinct <lgl> NA

  # append: species ----
  d_species <- tibble(
    ds_key = ds_key,
    taxa = sp_key,
    sp_key = sp_key,
    worms_id = ifelse(
      d_sp$taxon_authority == "worms",
      d_sp$taxon_id,
      NA_integer_
    ),
    botw_id = ifelse(
      d_sp$taxon_authority == "botw",
      d_sp$taxon_id,
      NA_integer_
    ),
    gbif_id = NA_integer_,
    itis_id = NA_integer_,
    iucn_id = NA_integer_,
    scientific_name_dataset = sp_sci,
    common_name_dataset = sp_cmn,
    scientific_name_accepted = sp_sci,
    common_name_accepted = sp_cmn,
    redlist_code = d_sp$redlist_code,
    redlist_year = NA_integer_,
    sp_cat = sp_cat,
    worms_is_marine = d_sp$worms_is_marine,
    worms_is_extinct = d_sp$worms_is_extinct
  )
  # d_species |> glimpse()

  stopifnot(
    length(setdiff(names(d_species), dbListFields(con_sdm, "species"))) == 0
  )
  stopifnot(
    setdiff(dbListFields(con_sdm, "species"), names(d_species)) == "sp_seq"
  )
  dbWriteTable(con_sdm, "species", d_species, append = T)
  # tbl(con_sdm, "species") |> collect() |> tail()
  # tbl(con_sdm, "model")   |> collect() |> tail()
  stopifnot(
    tbl(con_sdm, "species") |> filter(is.na(sp_seq)) |> collect() |> nrow() == 0
  )

  # append: model_cell ----
  d_mdl_cell <- d_r_sp |>
    mutate(
      mdl_seq = mdl_seq
    ) |>
    arrange(cell_id)
  dbWriteTable(con_sdm, "model_cell", d_mdl_cell, append = T)

  # update taxon with mdl_seq
  dbExecute(
    con_sdm,
    glue(
      "
    UPDATE taxon
    SET   mdl_seq  = {mdl_seq}
    WHERE taxon_id = {d_sp$taxon_id}"
    )
  )

  # append ms_merge row to taxon_model junction table
  dbWriteTable(
    con_sdm,
    "taxon_model",
    tibble(
      taxon_id = d_sp$taxon_id,
      ds_key = "ms_merge",
      mdl_seq = as.integer(mdl_seq)
    ),
    append = TRUE
  )
}
# Salmo salar            silver salmon

3 Set taxon.is_ok

Add is_ok: a simple logical field for flagging valid taxa, ie (so far):

  • birds:
    • redlist_code != “EX”
    • has a botw_id
    • if has worms_id:
      • worms_is_marine != F
      • worms_is_extinct != T
  • no model cells overlap with Program Areas
  • category is “reptile” but not “turtle” (handled by reclassify_reptiles)
  • not birds:
    • has a worms_id
    • worms_is_marine != F
    • worms_is_extinct != T

3.1 Flag valid taxa

Code
d <- tbl(con_sdm, "taxon") |>
  collect() # 17,561 × 16

# birds ----
# d |>
#   filter(
#     taxon_authority == "botw") |>
#   pull(redlist_code) |>
#   table(useNA = "ifany")
#  CR  DD  EN  LC  NT  TN  VU
#   3   1  58 452  45  14  41
#
# d |>
#   filter(
#     taxon_authority == "botw") |>
#   select(worms_is_marine, worms_is_extinct) |>
#   table(useNA = "ifany")
#                worms_is_extinct
# worms_is_marine <NA>
#           FALSE  118
#           TRUE   221
#           <NA>   275

d_b <- d |>
  filter(
    taxon_authority == "botw"
  ) |>
  mutate(
    is_ok = case_when(
      is.na(taxon_id) ~ F,
      is.na(mdl_seq) ~ F,
      !is.na(redlist_code) & redlist_code == "EX" ~ F, # 0
      !is.na(worms_id) & worms_is_marine == F ~ F, # 118
      !is.na(worms_id) & worms_is_extinct == T ~ F, # 0
      .default = T
    )
  )
# d_b$is_ok |> table(useNA = "ifany")
# FALSE  TRUE
#   118   496

# worms ----

# d |>
#   filter(
#     taxon_authority == "worms") |>
#   pull(redlist_code) |>
#   table(useNA = "ifany")
# CR    DD    EN    EX    LC    NT    TN    VU  <NA>
# 47   340   235     1  5574   107    11   154 10478
#
# d |>
#   filter(
#     taxon_authority == "worms") |>
#   select(worms_is_marine, worms_is_extinct) |>
#   table(useNA = "ifany")
#                worms_is_extinct
# worms_is_marine FALSE  TRUE  <NA>
#           FALSE    19     0    24
#           TRUE   4171    20 12708
#           <NA>      0     0     5

d_w <- d |>
  filter(
    taxon_authority == "worms"
  ) |>
  mutate(
    is_ok = case_when(
      is.na(taxon_id) ~ F,
      is.na(mdl_seq) ~ F,
      !is.na(redlist_code) & redlist_code == "EX" ~ F,
      !is.na(worms_id) & worms_is_marine == F ~ F,
      !is.na(worms_id) & worms_is_extinct == T ~ F,
      !is.na(worms_taxonomic_status) &
        !worms_taxonomic_status %in%
          c("accepted", "alternative representation") ~ F,
      sp_cat == "reptile" ~ F,
      sp_cat == "turtle" ~ T,
      .default = T
    )
  )
# d_w$is_ok |> table(useNA = "ifany")
# FALSE   TRUE
#    64 16,883

d2 <- bind_rows(
  d_b,
  d_w
) |>
  select(taxon_id, is_ok)

# flag taxa with no distribution inside program areas ----
# (mirrors r_mask approach in apps_2026/mapsp/app.R but with DB queries)
taxa_in_pra <- tbl(con_sdm, "taxon_model") |>
  inner_join(
    tbl(con_sdm, "model_cell") |> select(mdl_seq, cell_id),
    by = "mdl_seq"
  ) |>
  inner_join(
    tbl(con_sdm, "zone_cell") |> select(zone_seq, cell_id),
    by = "cell_id"
  ) |>
  inner_join(
    tbl(con_sdm, "zone") |>
      filter(
        tbl == !!glue("ply_programareas_2026{v_sfx}"),
        fld == "programarea_key"
      ) |>
      select(zone_seq),
    by = "zone_seq"
  ) |>
  distinct(taxon_id) |>
  pull(taxon_id)

n_outside <- sum(d2$is_ok & !(d2$taxon_id %in% taxa_in_pra))
message(glue(
  "{n_outside} taxa flagged is_ok=F (no distribution in program areas)"
))
6734 taxa flagged is_ok=F (no distribution in program areas)
Code
d2 <- d2 |>
  mutate(
    is_ok = is_ok & taxon_id %in% taxa_in_pra
  )

stopifnot(sum(duplicated(d2$taxon_id)) == 0)

dbExecute(
  con_sdm,
  "ALTER TABLE taxon ADD COLUMN IF NOT EXISTS is_ok BOOLEAN"
)
[1] 0
Code
duckdb_register(con_sdm, "d2", d2)
dbExecute(
  con_sdm,
  "UPDATE taxon
    SET is_ok  = d2.is_ok
    FROM d2
    WHERE taxon.taxon_id = d2.taxon_id"
) # 17,561
[1] 17561
Code
duckdb_unregister(con_sdm, "d2")

4 Taxon Summary

4.1 Export taxon summary

Code
taxon_csv <- here("data/taxon.csv")

# dbListFields(con_sdm, "taxon") |> paste(collapse = ", ") |> cat()
d_taxon <- tbl(con_sdm, "taxon") |>
  select(
    is_ok,
    component = sp_cat,
    common_name,
    scientific_name,
    redlist_code_max = redlist_code,
    extrisk_code,
    er_score,
    is_mmpa,
    is_mbta,
    is_bcc,
    worms_is_marine,
    worms_is_extinct,
    n_datasets = n_ds,
    taxon_authority,
    taxon_id,
    model_id = mdl_seq
  ) |>
  arrange(desc(is_ok), component, common_name) |>
  collect()

write_csv(d_taxon, taxon_csv)

5 Dataset Summary

Code
tbl(con_sdm, "dataset") |>
  select(sort_order, ds_key, name_display, value_info, is_mask) |>
  arrange(sort_order) |>
  collect() |>
  DT::datatable()
Code
tbl(con_sdm, "model") |>
  group_by(ds_key) |>
  summarize(n_models = n()) |>
  arrange(ds_key) |>
  collect() |>
  DT::datatable()

6 Species Summary

6.1 Species counts by sp_cat and ds_key

Code
tbl(con_sdm, "species") |>
  group_by(sp_cat, ds_key) |>
  summarize(n = n(), .groups = "drop") |>
  arrange(sp_cat, ds_key) |>
  collect() |>
  DT::datatable()

6.2 Taxonomic authority coverage

Code
tbl(con_sdm, "species") |>
  group_by(sp_cat, ds_key) |>
  summarize(
    n = n(),
    n_worms = sum(!is.na(worms_id), na.rm = T),
    n_itis = sum(!is.na(itis_id), na.rm = T),
    n_gbif = sum(!is.na(gbif_id), na.rm = T),
    n_allna = sum(
      is.na(worms_id) & is.na(itis_id) & is.na(gbif_id),
      na.rm = T
    ),
    .groups = "drop"
  ) |>
  mutate(
    pct_worms = round(n_worms / n * 100, 1),
    pct_itis = round(n_itis / n * 100, 1),
    pct_gbif = round(n_gbif / n * 100, 1),
    pct_allna = round(n_allna / n * 100, 1)
  ) |>
  arrange(sp_cat, ds_key) |>
  collect() |>
  DT::datatable()

6.3 WoRMS marine/extinct percentages

Code
tbl(con_sdm, "species") |>
  group_by(sp_cat) |>
  summarize(
    n = n(),
    n_worms_marine = sum(worms_is_marine, na.rm = T),
    n_worms_extinct = sum(worms_is_extinct, na.rm = T)
  ) |>
  mutate(
    pct_worms_marine = round(n_worms_marine / n * 100, 1),
    pct_worms_extinct = round(n_worms_extinct / n * 100, 1)
  ) |>
  collect() |>
  DT::datatable()

7 Taxon Table Summary

7.1 Taxon counts by sp_cat and is_ok

Code
tbl(con_sdm, "taxon") |>
  group_by(sp_cat, is_ok) |>
  summarize(n = n(), .groups = "drop") |>
  collect() |>
  pivot_wider(
    names_from = is_ok,
    values_from = n,
    names_prefix = "is_ok_"
  ) |>
  DT::datatable()

7.2 Redlist code distribution

Code
tbl(con_sdm, "taxon") |>
  filter(is_ok) |>
  group_by(sp_cat, redlist_code) |>
  summarize(n = n(), .groups = "drop") |>
  collect() |>
  pivot_wider(
    names_from = redlist_code,
    values_from = n,
    values_fill = 0
  ) |>
  DT::datatable()

7.3 Extinction risk authority summary

Code
tbl(con_sdm, "taxon") |>
  filter(is_ok) |>
  mutate(
    authority = case_when(
      str_starts(extrisk_code, "NMFS") ~ "NMFS",
      str_starts(extrisk_code, "FWS") ~ "FWS",
      str_starts(extrisk_code, "IUCN") ~ "IUCN",
      TRUE ~ "none"
    )
  ) |>
  count(authority) |>
  collect() |>
  DT::datatable()

7.4 Number of datasets per taxon

Code
tbl(con_sdm, "taxon") |>
  filter(is_ok) |>
  count(n_ds) |>
  collect() |>
  DT::datatable()

8 Taxon x Dataset Matrix

Code
tbl(con_sdm, "taxon_model") |>
  inner_join(
    tbl(con_sdm, "taxon") |>
      filter(is_ok) |>
      select(taxon_id, sp_cat),
    by = "taxon_id"
  ) |>
  group_by(sp_cat, ds_key) |>
  summarize(n = n(), .groups = "drop") |>
  collect() |>
  pivot_wider(
    names_from = ds_key,
    values_from = n,
    values_fill = 0
  ) |>
  DT::datatable()

9 Taxon Detail Table

Code
tbl(con_sdm, "taxon") |>
  filter(is_ok) |>
  select(
    sp_cat,
    scientific_name,
    common_name,
    redlist_code,
    extrisk_code,
    er_score,
    n_ds,
    taxon_id,
    taxon_authority
  ) |>
  arrange(sp_cat, scientific_name) |>
  collect() |>
  DT::datatable(
    filter = "top",
    options = list(pageLength = 25)
  )
Warning in instance$preRenderHook(instance): It seems your data is too big for
client-side DataTables. You may consider server-side processing:
https://rstudio.github.io/DT/server.html