Calculate scores from models loaded into sdm db and taxon table

Published

2026-03-17 17:37:06

1 Overview

This notebook calculates scores from models loaded into sdm db and taxon table, including:

  • Extinction risk metrics by class (eg extrisk_bird, extrisk_fish, etc)
  • Other metrics TBD

Source:

Process:

  • Before:
    1. ingest_taxon.qmd
    • Added con_spp.botw (Birds of the World) as another taxonomic authority with ingest_taxon.qmd 1.merge_models.qmd
    • Built con_sdm.taxon table as authoritative unique taxa reference from multiple entries in species with merge_models.qmd
  • After:
    1. Summary data/figs/README sections are now integrated into this notebook (merged from msens-summary_programareas.qmd)

To render this notebook on Ben’s laptop:

cd ~/Github/MarineSensitivity/workflows
quarto render calc_scores.qmd

2 TODO

  • include more complex models:
    • unique by: ds_key, taxa_id, time_period
    • pkey: mdl_seq
Code
librarian::shelf(
  DBI,
  dbplyr,
  dplyr,
  duckdb,
  DT,
  flextable,
  fs,
  ftExtra,
  ggiraph,
  ggplot2,
  glue,
  gt,
  here,
  htmlwidgets,
  kableExtra,
  knitr,
  leaflet,
  logger,
  mapview,
  writexl,
  purrr,
  quarto,
  RColorBrewer,
  readr,
  scales,
  sf,
  shiny,
  stringr,
  terra,
  tibble,
  tidyr,
  units,
  webshot2,
  quiet = T
)
options(readr.show_col_types = F)

stopifnot(packageVersion("terra") >= "1.8.60")
# [rotate() issue](https://github.com/rspatial/terra/issues/1876)

source(here("libs/sdm_functions.R"))
source(here("libs/paths.R"))
dir_create(dir_v)
dir_create(dir_big_v)

# copy sdm.duckdb from prior version if missing
sdm_db_prev <- glue("{dir_big}/{ver_prev}/sdm.duckdb")
if (!file_exists(sdm_db) && file_exists(sdm_db_prev)) {
  message(glue("Copying sdm.duckdb from {ver_prev} to {ver}..."))
  file_copy(sdm_db_prev, sdm_db)
  message(glue("Copied sdm.duckdb ({file_size(sdm_db)})"))
}

# inputs (shared, unversioned)
pra_raw_gpkg <- glue("{dir_raw}/boem.gov/boem_program_areas_2025-11.gpkg")
cell_tif <- glue("{dir_derived}/r_bio-oracle_planarea.tif")
stopifnot(all(file_exists(c(cell_tif, pra_raw_gpkg))))

# version-independent input polygons (find from prior versions)
# note: search stable prior versions first; current dir_v files on Google Drive
#   can be evicted during long renders
find_input <- function(fname, search_dirs) {
  for (d in search_dirs) {
    f <- glue("{d}/{fname}")
    if (file_exists(f)) return(f)
  }
  stop(glue(
    "Input not found: {fname} (searched {paste(search_dirs, collapse=', ')})"
  ))
}
prev_dirs <- glue("{dir_derived}/{c(ver_prev, 'v2', 'v1')}")
er_raw_gpkg <- find_input("ply_ecoregions_2025.gpkg", prev_dirs)
pa_gpkg <- find_input("ply_planareas_2025.gpkg", prev_dirs)

# flags, typical
do_zones <- F # no, except if new spatial zones added
do_metrics <- T # YES
do_zone_taxon <- F # slow in past
do_postgis <- F # NO, since migrated to pmtiles
do_downloads <- T # YES
do_summary_data <- T # ecoregion x PRA intersections, label points, score tables
do_summary_figs <- T # maps, flower plots, primprod charts, Word/Excel tables
do_readme <- T # programmatic README.md + README.html
do_map <- F # no, but useful for visual checks during development
do_parquet <- T # no, but produces smaller faster duckdb
do_pmtiles <- F # generate PMTiles archives for mapgl/mapsp apps
do_pra_primprod <- T # NO, unless zones or primary prod ∆; zonal primary productivity for program areas
do_deploy <- T # sync derived data + app code to server

stopifnot(!do_pmtiles || (do_pmtiles && do_downloads)) # pmtiles should be generated from downloads

# spatial outputs (year reflects input data vintage, version suffix model version of derived metric columns)
pra_gpkg <- glue("{dir_v}/ply_programareas_2026_{ver}.gpkg")
sr_gpkg <- glue("{dir_v}/ply_subregions_2026_{ver}.gpkg")

# computed outputs (version suffix only — no obvious input year)
sr_pra_csv <- glue("{dir_v}/subregion_programareas.csv")
metrics_tif <- glue("{dir_v}/r_metrics_{ver}.tif")
metrics_pra_tif <- glue("{dir_v}/r_metrics_programareas_{ver}.tif")
zone_taxon_csv <- glue("{dir_v}/zone_taxon_{ver}.csv")
lyrs_csv <- glue("{dir_v}/layers_{ver}.csv")
# file_exists(c(sr_pra_csv, metrics_tif, metrics_pra_tif, zone_taxon_csv, lyrs_csv))

# summary outputs (merged from msens-summary_programareas.qmd)
er_pra_gpkg <- glue("{dir_v}/ply_ecoregion_programarea_{ver}.gpkg")
er_pra_csv <- glue("{dir_v}/ply_ecoregion_programarea_{ver}.csv")
er_pra_n_csv <- glue("{dir_v}/tbl_er_pra_scores_{ver}.csv")
pra_n_csv <- glue("{dir_v}/tbl_pra_scores_{ver}.csv")
lbls_csv <- glue("{dir_v}/ply_label_placement_pra.csv")
dir_figs <- here(glue("figs/msens-summary_programareas_{ver}"))
tbl_pra <- glue("ply_programareas_2026_{ver}")
tbl_er <- glue("ply_ecoregions_2025")
pp_csv <- glue("{dir_v}/programarea_primprod.csv")
pp_png <- glue("{dir_v}/programarea_primprod.png")
pp_pdf <- glue("{dir_v}/programarea_primprod.pdf")

# redo flags for summary sections
redo_summary_data <- T
redo_summary_figs <- T

# parquet (big files)
dir_pq <- glue("{dir_big_v}/sdm_parquet")

# helper functions ----
get_rast <- function(m_key) {
  d <- dbGetQuery(
    con_sdm,
    glue(
      "
        SELECT
          cm.cell_id,
          cm.value
        FROM cell_metric cm
        WHERE cm.metric_seq = (
          SELECT metric_seq
          FROM metric
          WHERE metric_key = '{m_key}' )"
    )
  )
  stopifnot(sum(duplicated(d$cell_id)) == 0)

  r <- init(r_cell[[1]], NA)
  r[d$cell_id] <- d$value

  r
}

plot_flower <- function(
  data,
  fld_category,
  fld_height,
  fld_width,
  colors,
  tooltip_expr = NULL,
  score = NULL,
  title = NULL,
  interactive = T
) {
  stopifnot(is.numeric(data |> pull({{ fld_height }})))
  stopifnot(is.numeric(data |> pull({{ fld_width }})))

  if (is.null(score)) {
    score <- data |>
      mutate(
        "{{fld_height}}" := as.double({{ fld_height }}),
        "{{fld_width}}" := as.double({{ fld_width }})
      ) |>
      summarize(
        score = weighted.mean({{ fld_height }}, {{ fld_width }}, na.rm = T)
      ) |>
      pull(score)
  }

  d <- data |>
    arrange({{ fld_category }}) |>
    mutate(across(!where(is.character), as.double)) |>
    mutate(
      ymax = cumsum({{ fld_width }}),
      ymin = lag(ymax, default = 0),
      xmax = {{ fld_height }},
      xmin = 0
    )

  sym_category <- ensym(fld_category)
  sym_height <- ensym(fld_height)
  sym_width <- ensym(fld_width)

  if (!is.null(tooltip_expr)) {
    d <- d |>
      mutate(tooltip = glue(tooltip_expr))
  } else {
    d <- d |>
      mutate(tooltip = glue("{!!fld_category}"))
  }

  g <- ggplot(d) +
    geom_rect_interactive(
      aes(
        xmin = xmin,
        xmax = xmax,
        ymin = ymin,
        ymax = ymax,
        fill = {{ fld_category }},
        color = "white",
        data_id = {{ fld_category }},
        tooltip = tooltip
      ),
      color = "white",
      alpha = 0.5
    ) +
    coord_polar(theta = "y") +
    xlim(c(-10, max(data |> pull({{ fld_height }})))) +
    annotate(
      "text",
      x = -10,
      y = 0,
      label = round(score),
      size = 8,
      fontface = "bold"
    ) +
    scale_fill_manual(values = colors) +
    theme_minimal() +
    theme(
      legend.position = "bottom",
      plot.margin = unit(c(20, 20, 20, 20), "pt")
    )

  if (!is.null(title)) {
    g <- g + ggtitle(title)
  }

  if (interactive) {
    g <- girafe(
      ggobj = g,
      options = list(
        opts_sizing(rescale = TRUE, width = 1),
        opts_tooltip(
          css = "background-color:white;color:black;padding:5px;border-radius:3px;"
        )
      )
    )
  }

  g
}

# conditional summary setup ----
if (do_summary_figs) {
  mapbox_tkn_txt <- glue("{dir_private}/mapbox_token_bdbest.txt")
  Sys.setenv(MAPBOX_PUBLIC_TOKEN = readLines(mapbox_tkn_txt))
  librarian::shelf(mapgl, quiet = T)
  dir_create(dir_figs)
}

con_sdm <- dbConnect(duckdb(), dbdir = sdm_db, read_only = F)
# dbDisconnect(con_sdm, shutdown = T); duckdb_shutdown(duckdb()); rm(con_sdm)
if (do_postgis) {
  source(here("libs/db.R"))
} # generates Postgres `con`; fails if ssh-msens not connected

# clean up prior version zone references in DuckDB ----
zone_tbls_db <- tbl(con_sdm, "zone") |> distinct(tbl) |> pull(tbl)

# rename version-independent zones (ecoregions, planning areas) to remove old suffix
for (tbl_base in c("ply_ecoregions_2025", "ply_planareas_2025")) {
  old_tbl <- glue("{tbl_base}_{ver_prev}")
  if (old_tbl %in% zone_tbls_db) {
    dbExecute(
      con_sdm,
      glue("UPDATE zone SET tbl = '{tbl_base}' WHERE tbl = '{old_tbl}'")
    )
    message(glue("Renamed zone tbl: {old_tbl} -> {tbl_base}"))
  }
}

# rename versioned zones (program areas, subregions) from ver_prev to ver
for (old_tbl in zone_tbls_db[str_detect(zone_tbls_db, glue("_{ver_prev}$"))]) {
  new_tbl <- str_replace(old_tbl, glue("_{ver_prev}$"), glue("_{ver}"))
  dbExecute(
    con_sdm,
    glue("UPDATE zone SET tbl = '{new_tbl}' WHERE tbl = '{old_tbl}'")
  )
  # zone_taxon uses zone_tbl column
  dbExecute(
    con_sdm,
    glue(
      "UPDATE zone_taxon SET zone_tbl = '{new_tbl}' WHERE zone_tbl = '{old_tbl}'"
    )
  )
  message(glue("Renamed zone tbl: {old_tbl} -> {new_tbl}"))
}

sp_cats <- tbl(con_sdm, "taxon") |>
  filter(is_ok) |>
  distinct(sp_cat) |>
  arrange(sp_cat) |>
  pull(sp_cat)

# for visual checks
r_cell <- rast(cell_tif)
ext(r_cell) <- round(ext(r_cell), 3) # TODO: culprit with warlus wonk?
Code
old_csv <- glue("{dir_v}/tbl_pra_scores_v3_old-pre-mmpa-mbta-floor.csv")
out_csv <- glue("{dir_v}/tbl_pra_scores_v3_new-vs-old-pre-mmpa-mbta-floor.csv")
new_csv <- pra_n_csv

d_old <- read_csv(old_csv)
d_new <- read_csv(new_csv)
d_comp <- d_old |>
  full_join(
    d_new,
    by = join_by(`Program Area`),
    suffix = c("_old", "_new")
  ) |>
  mutate(
    Score_diff = Score_new - Score_old
  ) |>
  select(`Program Area`, Score_old, Score_new, Score_diff)
write_csv(d_comp, out_csv)

Flags for evaluating R chunks:

  • do_zones: FALSE
  • do_metrics: TRUE
  • do_zone_taxon: FALSE
  • do_postgis: FALSE
  • do_downloads: TRUE
  • do_summary_data: TRUE
  • do_summary_figs: TRUE
  • do_readme: TRUE
  • do_map: FALSE
  • do_parquet: TRUE
  • do_deploy: TRUE

3 Add Zones

3.1 Add cell table to DuckDB if missing

Code
if (!"cell" %in% dbListTables(con_sdm)) {
  if (!exists("r_cell")) {
    r_cell <- rast(cell_tif)
  }

  d_cell <- values(r_cell, dataframe = T, na.rm = T) |>
    tibble() |>
    mutate(
      cell_id = as.integer(cell_id)
    )

  # dbRemoveTable(con_sdm, "cell")
  dbWriteTable(con_sdm, "cell", d_cell, append = F)
  dbExecute(con_sdm, "ALTER TABLE cell ADD PRIMARY KEY (cell_id);")
}

3.2 Add Program Areas to SDM zones

Add BOEM Program Areas (11th National Draft Proposed Program, 2025-11) as a new zone type. Program Areas are derived from Planning Areas with additional GOA subdivisions.

Code
zone_tbls <- tbl(con_sdm, "zone") |>
  distinct(tbl) |>
  pull(tbl)

if (!tbl_pra %in% zone_tbls) {
  source(here("libs/sdm_functions.R"))
  # TODO: move these into msens R
  # - set_geom_ctr_area()
  # - register_zones()
  # - insert_zone_cell_data()

  # load planning areas for field join
  pa <- read_sf(pa_gpkg) |>
    st_drop_geometry() |>
    select(planarea_name, region_key, planarea_key)

  # load and process program areas with field renaming
  pra <- read_sf(pra_raw_gpkg) |>
    rename(
      region_name = BOEM_OCS_REGION,
      planarea_name = PLANNING_AREA,
      programarea_name = Label,
      programarea_id = OBJECTID
    ) |>
    # left join to get region_key and planarea_key from planning areas
    left_join(pa, by = "planarea_name") |>
    # programarea_key defaults to planarea_key
    mutate(
      programarea_key = planarea_key
    ) |>
    # handle NA values for GOA Program Areas (not in planning areas)
    mutate(
      programarea_key = case_when(
        programarea_name == "GOA Program Area A" ~ "GAA",
        programarea_name == "GOA Program Area B" ~ "GAB",
        TRUE ~ programarea_key
      ),
      planarea_key = case_when(
        programarea_name == "GOA Program Area A" ~ "WGA,CGA,EGA",
        programarea_name == "GOA Program Area B" ~ "CGA,EGA",
        TRUE ~ planarea_key
      ),
      region_key = case_when(
        str_detect(programarea_name, "GOA") ~ "GA",
        TRUE ~ region_key
      )
    ) |>
    select(
      programarea_id,
      region_key,
      region_name,
      planarea_key,
      planarea_name,
      programarea_key,
      programarea_name
    ) |>
    st_transform(st_crs(4326))

  # add centroid and area
  pra <- pra |> set_geom_ctr_area()

  # write derived gpkg
  write_sf(pra, pra_gpkg)
  message(glue("Wrote {nrow(pra)} program areas to {pra_gpkg}"))
  # Wrote 20 program areas to ~/My Drive/projects/msens/data/derived/ply_programareas_2026.gpkg

  # shift lon [-180, 180] to [0, 360]
  pra_sl <- st_shift_longitude(pra)

  # load cell raster template
  r_cell <- rast(cell_tif, lyrs = "cell_id")

  # get percent each feature covers each cell
  r_pra <- rasterize(
    pra_sl,
    r_cell,
    fun = "max",
    touches = T,
    cover = T,
    by = "programarea_key"
  )

  # convert to data frame
  d_pra <- terra::as.data.frame(
    r_pra,
    cells = T,
    na.rm = F,
    wide = F
  ) |>
    filter(!is.na(values)) |>
    tibble()

  message(glue("Program areas cover {length(unique(d_pra$cell))} cells"))

  # register zones and insert zone data into con_sdm
  register_zones(pra, tbl_pra, "programarea_key")
  insert_zone_cell_data(d_pra, tbl_pra, "programarea_key")

  # verify registration
  tbl(con_sdm, "zone") |>
    group_by(tbl) |>
    summarise(n_zones = n()) |>
    collect() |>
    print()
}

3.3 Create ply_programareas_2026 in PostGIS database (deprecated)

Deprecated: migrated to PMTiles. Kept for reference.

Code
# devtools::install_local("../msens")
librarian::shelf(
  here,
  leaflet,
  leaflet.extras, # mapview,
  marinesensitivity / msens,
  sf,
  tibble,
  quiet = T
)

source(here("libs/db.R")) # define: con

# load from gpkg if not already in memory (eg zones already existed)
if (!exists("pra")) {
  pra <- read_sf(pra_gpkg)
}

pra |>
  st_write(
    dsn = con,
    layer = tbl_pra,
    driver = "PostgreSQL",
    layer_options = "OVERWRITE=true"
  )

# TODO: need to convert to 1st column to integer for mapgl viewing?

# enforce SRID so shows up in tile.marinesensitivity.org
dbExecute(
  con,
  glue(
    "SELECT UpdateGeometrySRID('{tbl_pra}','geom',4326);"
  )
)

# fix any invalid geometries
dbExecute(
  con,
  glue(
    "UPDATE {tbl_pra} SET geom = ST_MakeValid(geom) WHERE NOT ST_IsValid(geom)"
  )
) # 6 rows fixed

# add spatial index for faster queries
dbExecute(
  con,
  glue(
    "CREATE INDEX IF NOT EXISTS {tbl_pra}_geom_idx ON {tbl_pra} USING gist(geom);"
  )
)

3.4 Create Subregion-ProgramArea Lookup

Create lookup table for which program areas belong to which subregions.

Used in dropdown of mapgl app.

Code
# calculate intersections between subregions and program areas using zone_cell
d_sr_pra <- tbl(con_sdm, "zone_cell") |>
  inner_join(
    tbl(con_sdm, "zone") |>
      filter(tbl == !!tbl_sr),
    by = "zone_seq"
  ) |>
  rename(subregion_key = value) |>
  select(cell_id, subregion_key) |>
  inner_join(
    tbl(con_sdm, "zone_cell") |>
      inner_join(
        tbl(con_sdm, "zone") |>
          filter(tbl == !!tbl_pra),
        by = "zone_seq"
      ) |>
      select(cell_id, programarea_key = value),
    by = "cell_id"
  ) |>
  distinct(subregion_key, programarea_key) |>
  collect()


get_sr_bbox <- function(sr_key) {
  pra_sr <- d_sr_pra |>
    filter(subregion_key == !!sr_key) |>
    pull(programarea_key)
  r <- init(r_cell[[1]], NA)
  r_sr <- r_metrics[["programarea_key"]] %in% pra_sr
  trim(r_sr) |> st_bbox() |> as.numeric()
}
# TODO: pre-calc bboxes for all subregions

write_csv(d_sr_pra, sr_pra_csv)
message(glue(
  "Wrote {nrow(d_sr_pra)} subregion-programarea mappings to {sr_pra_csv}"
))
# Wrote 60 subregion-programarea mappings to ~/My Drive/projects/msens/data/derived/subregion_programareas.csv

3.5 Add SubRegions

Original:

  • USA: USA mainland, Alaska, Hawaii and territories
  • AK: Alaska
  • L48: USA mainland
  • AKL48: USA mainland & Alaska

Conceived for later:

  • HI: Hawaii
  • L48Pac: Pacific USA mainland
  • L48Atl: Atlantic USA mainland
  • L48Goa: Gulf of America USA mainland
  • PAC: Pacific USA (Hawaii, Pacific mainland and USA Territories)

version for 2026:

  • USA: USA mainland and Alaska
  • AK: Alaska
  • L48Pac: Pacific USA mainland
  • L48Atl: Atlantic USA mainland
  • L48Goa: Gulf of America USA mainland

v3:

  • USA: All USA (Alaska, Gulf of America and Pacific)
  • AK: Alaska
  • GA: Gulf of America
  • PA: Pacific
Code
zones <- tbl(con_sdm, "zone") |>
  distinct(tbl) |>
  pull(tbl)

# delete existing tbl_sr
if (tbl_sr %in% zones) {
  dbExecute(con_sdm, glue("DELETE FROM zone WHERE tbl = '{tbl_sr}'"))
  # dbExecute(con_sdm, "DELETE FROM zone_cell WHERE zone_seq NOT IN (SELECT zone_seq FROM zone)")
}

d_sr <- tribble(
  ~subregion_key , ~subregion_name   ,
  "AK"           , "Alaska"          ,
  "GA"           , "Gulf of America" ,
  "PA"           , "Pacific"         ,
  "USA"          , "All USA"
) # note: subregion_name not used (yet)
register_zones(d_sr, tbl_sr, "subregion_key")

# load pra from gpkg if not already in memory
if (!exists("pra")) {
  if (file_exists(pra_gpkg)) {
    pra <- read_sf(pra_gpkg)
  } else {
    pra_prev <- glue(
      "{dir_derived}/{ver_prev}/ply_programareas_2026_{ver_prev}.gpkg"
    )
    pra <- read_sf(pra_prev)
  }
}

# temporarily push tmp_pra into duckdb for later joining
tmp_pra <- pra |>
  st_drop_geometry() |>
  select(programarea_key, region_key)
copy_to(con_sdm, tmp_pra, "tmp_pra", temporary = T, overwrite = T)

df_z_sr_notusa <- tbl(con_sdm, "zone") |>
  filter(
    tbl == !!tbl_sr,
    value != "USA"
  ) |>
  select(region_key = value, zone_seq_sr = zone_seq) |>
  left_join(
    tbl(con_sdm, "tmp_pra"),
    by = "region_key"
  ) |>
  left_join(
    tbl(con_sdm, "zone") |>
      filter(tbl == !!tbl_pra) |>
      select(
        programarea_key = value,
        zone_seq
      ),
    by = "programarea_key"
  ) |>
  left_join(
    tbl(con_sdm, "zone_cell"),
    by = "zone_seq"
  ) |>
  group_by(zone_seq_sr, cell_id) |>
  summarize(
    pct_covered = sum(pct_covered, na.rm = T),
    .groups = "drop"
  ) |>
  mutate(
    pct_covered = pmin(pct_covered, 100, na.rm = T)
  ) |>
  select(
    zone_seq = zone_seq_sr,
    cell_id,
    pct_covered
  ) |>
  collect() # 349,139 × 3
# summary(df_z_sr_notusa)

# duplicated(df_z_sr_notusa$cell_id) |> any() # FALSE
# TODO: check why no overlapping cells across subregions?

# zone_seqs_sr <- tbl(con_sdm, "zone") |>
#   filter(
#     tbl == !!tbl_sr) |>
#   pull(zone_seq)
# dbExecute(con_sdm, glue("
#   DELETE FROM zone_cell
#   WHERE zone_seq IN ({paste(zone_seqs_sr, collapse = ',')})"))

# append USA, ie all cells in other subregions
zone_seq_USA <- tbl(con_sdm, "zone") |>
  filter(
    tbl == !!tbl_sr,
    value == "USA"
  ) |>
  pull(zone_seq) # 217

df_z_sr_usa <- df_z_sr_notusa |>
  group_by(cell_id) |>
  summarize(
    pct_covered = sum(pct_covered, na.rm = T),
    .groups = "drop"
  ) |>
  mutate(
    zone_seq = !!zone_seq_USA,
    pct_covered = pmin(pct_covered, 100, na.rm = T)
  ) # 349,139 × 3
# summary(df_z_sr_usa)

df_z_sr <- bind_rows(
  df_z_sr_notusa,
  df_z_sr_usa
)

# TODO: ....
copy_to(
  dest = con_sdm,
  df = df_z_sr,
  name = "zone_cell",
  append = T
)

tbl(con_sdm, "zone") |>
  filter(tbl == !!tbl_sr) |>
  select(zone_seq, subregion_key = value) |>
  left_join(
    tbl(con_sdm, "zone_cell"),
    by = "zone_seq"
  ) |>
  group_by(subregion_key) |>
  summarize(n_cell = n()) |>
  arrange(subregion_key)
#   subregion_key n_cell
#   <chr>          <dbl>
# 1 AK            307,445
# 2 GA             17,274
# 3 PA             24,420
# 4 USA           349,139
Code
zone_tbls <- tbl(con_sdm, "zone") |>
  distinct(tbl) |>
  pull(tbl)

if (!"ply_subregions_2025" %in% zone_tbls) {
  source(here("libs/sdm_functions.R"))
  # - set_geom_ctr_area()
  # - register_zones()
  # - insert_zone_cell_data()

  # spatial sources
  er <- read_sf(er_raw_gpkg)
  r_cell <- rast(cell_tif, lyrs = "cell_id")

  # USA: USA mainland, Alaska, Hawaii and territories
  sr_usa <- er |>
    st_union() |>
    st_as_sf() |>
    mutate(
      subregion_key = "USA",
      subregion_name = "USA mainland, Alaska, Hawaii and territories"
    )

  # AK: Alaska
  sr_ak <- er |>
    filter(region_key == "AK") |>
    st_union() |>
    st_as_sf() |>
    mutate(
      subregion_key = "AK",
      subregion_name = "Alaska"
    )

  # L48: USA mainland
  sr_l48 <- er |>
    filter(
      ecoregion_key %in%
        c(
          "CAC",
          "WAOR",
          "SECS",
          "WCGOA",
          "EGOA",
          "NECS"
        )
    ) |>
    st_union() |>
    st_as_sf() |>
    mutate(
      subregion_key = "L48",
      subregion_name = "USA mainland"
    )

  # AKL48: USA mainland & Alaska
  sr_akl48 <- bind_rows(sr_ak, sr_l48) |>
    st_union() |>
    st_as_sf() |>
    mutate(
      subregion_key = "AKL48",
      subregion_name = "USA mainland & Alaska"
    )

  # combine
  sr <- bind_rows(
    sr_ak,
    sr_l48,
    sr_akl48,
    sr_usa
  ) |>
    set_geom_ctr_area()

  # write gpkg
  write_sf(sr, sr_gpkg)

  # shift lon [-180, 180] to [0, 360]
  sr_sl <- st_shift_longitude(sr)

  # get percent each feature covers each cell
  r_sr <- rasterize(
    sr_sl,
    r_cell,
    fun = "max",
    touches = T,
    cover = T,
    by = "subregion_key"
  )

  # convert to data frame
  d_sr <- terra::as.data.frame(
    r_sr,
    cells = T,
    na.rm = F,
    wide = F
  ) |>
    filter(
      !is.na(values)
    ) |> # 839,534 × 3
    tibble()

  # d_sr |>
  #   group_by(layer) |>
  #   summarise(n_cells  = n()) |>
  #   collect() |>
  #   arrange(desc(n_cells))
  #
  #   layer  n_cells
  #   <chr>    <int>
  # 1 USA    630,806
  # 2 AKL48  419,767
  # 3 AK     315,369
  # 4 L48    104,398

  # register zones and insert zone data into con_sdm
  register_zones(sr, "ply_subregions_2025", "subregion_key")
  insert_zone_cell_data(d_sr, "ply_subregions_2025", "subregion_key")

  # tbl(con_sdm, "zone") |>
  #   group_by(tbl) |>
  #   summarise(n_zones = n()) |>
  #   collect()
  #
  #   tbl                 n_zones
  #   <chr>                 <dbl>
  # 1 ply_ecoregions_2025      12
  # 2 ply_planareas_2025       36
  # 3 ply_subregions_2025       4

  # TODO: load ply_subregions_2025 into PostGIS
}

4 Calculate Metrics

4.1 Calculate Extinction Risk Metrics by Taxonomic Component

Code
# Start fresh with all extrisk_* metrics
existing_seqs <- tbl(con_sdm, "metric") |>
  filter(
    str_starts(metric_key, "extrisk_")
  ) |>
  pull(metric_seq)
if (length(existing_seqs) > 0) {
  metric_sql <- paste(existing_seqs, collapse = ", ")
  dbExecute(
    con_sdm,
    glue("DELETE FROM cell_metric WHERE metric_seq IN ({metric_sql})")
  )
  dbExecute(
    con_sdm,
    glue("DELETE FROM zone_metric WHERE metric_seq IN ({metric_sql})")
  )
  dbExecute(
    con_sdm,
    glue("DELETE FROM metric WHERE metric_seq IN ({metric_sql})")
  )
}
[1] 28
Code
d_taxon_ok <- tbl(con_sdm, "taxon") |>
  filter(is_ok) |>
  collect()

for (sp_cat in sp_cats) {
  # sp_cat = sp_cats[1]

  message(glue("Calculating extinction risk metrics for {sp_cat}..."))

  metric_key <- glue("extrisk_{str_replace(sp_cat, ' ', '_')}")
  description <- glue("Extinction risk for {sp_cat}")
  # TODO: rename metric_key -> metric_key, description

  # Get metric_seq first, then delete/insert
  existing_seq <- dbGetQuery(
    con_sdm,
    glue(
      "SELECT metric_seq FROM metric WHERE metric_key = '{metric_key}'"
    )
  )

  if (nrow(existing_seq) > 0) {
    metric_seq <- existing_seq$metric_seq
    dbExecute(
      con_sdm,
      glue("DELETE FROM cell_metric WHERE metric_seq = {metric_seq}")
    )
    dbExecute(
      con_sdm,
      glue("DELETE FROM zone_metric WHERE metric_seq = {metric_seq}")
    )
    dbExecute(
      con_sdm,
      glue("DELETE FROM metric WHERE metric_seq = {metric_seq}")
    )
  }

  dbExecute(
    con_sdm,
    glue(
      "
    INSERT INTO metric (metric_key, description)
    VALUES ('{metric_key}', '{description}')"
    )
  )

  metric_seq <- dbGetQuery(
    con_sdm,
    glue(
      "SELECT max(metric_seq) AS metric_seq FROM metric WHERE metric_key = '{metric_key}'"
    )
  ) |>
    pull(metric_seq)

  # use er_score from taxon (pre-computed in ingest_nmfs-fws-mmpa-mbta-listings.qmd
  # and merged in merge_models.qmd)
  # for is_er_spatial species (turtles), ER is already baked into cell values
  # so set er_score = 100 to avoid double-counting (100 * value / 100 = value)
  d_er <- tbl(con_sdm, "taxon") |>
    filter(
      is_ok,
      if (!!sp_cat == "all") T else sp_cat == !!sp_cat
    ) |>
    select(mdl_seq, er_score, is_er_spatial) |>
    collect() |>
    mutate(
      er_score = ifelse(is_er_spatial, 100, er_score)
    ) |>
    select(mdl_seq, er_score)

  duckdb_register(con_sdm, "tmp_er", d_er |> select(mdl_seq, er_score))
  dbExecute(
    con_sdm,
    glue(
      "
    INSERT INTO cell_metric (cell_id, metric_seq, value)
    SELECT
      mc.cell_id,
      {metric_seq} AS metric_seq,
      ROUND(SUM(er.er_score * mc.value) / 100.0, 2) AS value
    FROM model_cell mc
    JOIN tmp_er er ON mc.mdl_seq = er.mdl_seq
    GROUP BY mc.cell_id"
    )
  ) # 6,662,075
  duckdb_unregister(con_sdm, "tmp_er")

  # visual check ----
  # d <- tbl(con_sdm, "cell_metric") |>
  #   filter(metric_seq == metric_seq) |>
  #   select(cell_id, value) |>
  #   collect()
  # r <- init(r_cell[[1]], NA)
  # r[d$cell_id] <- d$value
  # r_r <- rotate(r)
  # plet(r_r)
}
Calculating extinction risk metrics for bird...
Calculating extinction risk metrics for coral...
Calculating extinction risk metrics for fish...
Calculating extinction risk metrics for invertebrate...
Calculating extinction risk metrics for mammal...
Calculating extinction risk metrics for other...
Calculating extinction risk metrics for turtle...
Code
tbl(con_sdm, "metric") |>
  filter(str_starts(metric_key, "extrisk_")) |>
  arrange(metric_seq) |>
  select(-date_created) |>
  collect() |>
  print(n = Inf)
# A tibble: 7 × 5
  metric_seq metric_key           description         metric_title metric_abbrev
       <int> <chr>                <chr>               <chr>        <chr>        
1        708 extrisk_bird         Extinction risk fo… <NA>         <NA>         
2        709 extrisk_coral        Extinction risk fo… <NA>         <NA>         
3        710 extrisk_fish         Extinction risk fo… <NA>         <NA>         
4        711 extrisk_invertebrate Extinction risk fo… <NA>         <NA>         
5        712 extrisk_mammal       Extinction risk fo… <NA>         <NA>         
6        713 extrisk_other        Extinction risk fo… <NA>         <NA>         
7        714 extrisk_turtle       Extinction risk fo… <NA>         <NA>         

4.2 Calculate ExtinctionRisk min/max per SpeciesCategory and Ecoregion

Code
for (sp_cat in sp_cats) {
  # sp_cat = sp_cats[1]
  for (fxn in c("min", "max")) {
    # fxn = "min"

    m_key <- glue("extrisk_{sp_cat}_ecoregion_{fxn}")
    m_description <- glue("Extinction risk for {sp_cat} Ecoregional {fxn}")
    message(glue("m_key: {m_key}"))

    # Get metric_seq first, then delete/insert
    existing_seq <- dbGetQuery(
      con_sdm,
      glue(
        "SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
      )
    ) |>
      pull(metric_seq)
    if (length(existing_seq) > 0) {
      dbExecute(
        con_sdm,
        glue("DELETE FROM metric      WHERE metric_seq = {existing_seq}")
      )
      dbExecute(
        con_sdm,
        glue("DELETE FROM cell_metric WHERE metric_seq = {existing_seq}")
      )
      dbExecute(
        con_sdm,
        glue("DELETE FROM zone_metric WHERE metric_seq = {existing_seq}")
      )
    }

    dbExecute(
      con_sdm,
      glue(
        "INSERT INTO metric (metric_key, description)
      VALUES ('{m_key}', '{m_description}')"
      )
    )

    # get metric ID
    m_seq <- dbGetQuery(
      con_sdm,
      glue(
        "SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
      )
    ) |>
      pull(metric_seq)

    # calculate min/max extinction risk per ecoregion
    dbExecute(
      con_sdm,
      glue(
        "INSERT INTO zone_metric (zone_seq, metric_seq, value)
      SELECT 
        z.zone_seq,
        -- z.tbl, z.fld, z.value AS tbl_fld_val,
        {m_seq} as metric_seq,
        {fxn}(cm.value) as value
      FROM zone z
      LEFT JOIN zone_cell zc ON z.zone_seq = zc.zone_seq
      LEFT JOIN cell_metric cm ON zc.cell_id = cm.cell_id
      WHERE
        z.fld = 'ecoregion_key'
        AND cm.metric_seq IN (
          SELECT metric_seq FROM metric WHERE metric_key = 'extrisk_{sp_cat}')
      -- GROUP BY z.zone_seq, z.tbl, z.fld, z.value
      GROUP BY z.zone_seq "
      )
    )
  }
}
m_key: extrisk_bird_ecoregion_min
m_key: extrisk_bird_ecoregion_max
m_key: extrisk_coral_ecoregion_min
m_key: extrisk_coral_ecoregion_max
m_key: extrisk_fish_ecoregion_min
m_key: extrisk_fish_ecoregion_max
m_key: extrisk_invertebrate_ecoregion_min
m_key: extrisk_invertebrate_ecoregion_max
m_key: extrisk_mammal_ecoregion_min
m_key: extrisk_mammal_ecoregion_max
m_key: extrisk_other_ecoregion_min
m_key: extrisk_other_ecoregion_max
m_key: extrisk_turtle_ecoregion_min
m_key: extrisk_turtle_ecoregion_max
Code
tbl(con_sdm, "metric") |>
  filter(str_detect(metric_key, "^extrisk_.*_ecoregion_(min|max)$")) |>
  arrange(metric_seq) |>
  select(-date_created) |>
  collect() |>
  print(n = Inf)
# A tibble: 14 × 5
   metric_seq metric_key                  description metric_title metric_abbrev
        <int> <chr>                       <chr>       <chr>        <chr>        
 1        715 extrisk_bird_ecoregion_min  Extinction… <NA>         <NA>         
 2        716 extrisk_bird_ecoregion_max  Extinction… <NA>         <NA>         
 3        717 extrisk_coral_ecoregion_min Extinction… <NA>         <NA>         
 4        718 extrisk_coral_ecoregion_max Extinction… <NA>         <NA>         
 5        719 extrisk_fish_ecoregion_min  Extinction… <NA>         <NA>         
 6        720 extrisk_fish_ecoregion_max  Extinction… <NA>         <NA>         
 7        721 extrisk_invertebrate_ecore… Extinction… <NA>         <NA>         
 8        722 extrisk_invertebrate_ecore… Extinction… <NA>         <NA>         
 9        723 extrisk_mammal_ecoregion_m… Extinction… <NA>         <NA>         
10        724 extrisk_mammal_ecoregion_m… Extinction… <NA>         <NA>         
11        725 extrisk_other_ecoregion_min Extinction… <NA>         <NA>         
12        726 extrisk_other_ecoregion_max Extinction… <NA>         <NA>         
13        727 extrisk_turtle_ecoregion_m… Extinction… <NA>         <NA>         
14        728 extrisk_turtle_ecoregion_m… Extinction… <NA>         <NA>         

4.3 Calculate rescaled ExtinctionRisk per cell based on EcoregionMinMax per SpeciesCategory

Code
for (sp_cat in sp_cats) {
  # sp_cat = sp_cats[1]

  m_key <- glue("extrisk_{sp_cat}_ecoregion_rescaled")
  m_description <- glue(
    "Extinction risk for {sp_cat}, rescaled to [0,100] based on Ecoregional min/max values"
  )

  # Get metric_seq first, then delete/insert
  existing_seq <- dbGetQuery(
    con_sdm,
    glue(
      "SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
    )
  )
  if (nrow(existing_seq) > 0) {
    m_seq <- existing_seq$metric_seq
    dbExecute(
      con_sdm,
      glue("DELETE FROM metric      WHERE metric_seq = {m_seq}")
    )
    dbExecute(
      con_sdm,
      glue("DELETE FROM cell_metric WHERE metric_seq = {m_seq}")
    )
    dbExecute(
      con_sdm,
      glue("DELETE FROM zone_metric WHERE metric_seq = {m_seq}")
    )
  }

  # insert new metric
  dbExecute(
    con_sdm,
    glue(
      "
      INSERT INTO metric (metric_key, description)
      VALUES ('{m_key}', '{m_description}')"
    )
  )
  m_seq <- dbGetQuery(
    con_sdm,
    glue(
      "
      SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
    )
  ) |>
    pull(metric_seq)

  dbExecute(
    con_sdm,
    glue(
      "
      WITH
      -- get ecoregion_minmax from zone_metric
      ecoregion_minmax AS (
        SELECT 
          z.zone_seq,
          MIN(zm.value) as min_value,
          MAX(zm.value) as max_value
        FROM zone z
        JOIN zone_metric zm USING (zone_seq)
        WHERE 
          z.fld = 'ecoregion_key' AND 
          zm.metric_seq IN (
            SELECT metric_seq FROM metric WHERE metric_key IN (
              'extrisk_{sp_cat}_ecoregion_min',
              'extrisk_{sp_cat}_ecoregion_max') )
        GROUP BY z.zone_seq
      ),
      -- get cell_ecoregion from zone_cell with pct_covered
      cell_ecoregion_raw AS (
        SELECT 
          cell_id, 
          z.zone_seq, 
          zone_cell.pct_covered
        FROM zone_cell
        JOIN zone z ON zone_cell.zone_seq = z.zone_seq
        WHERE z.fld = 'ecoregion_key'
      ),
      -- calculate total coverage per cell for normalization
      cell_coverage_total AS (
        SELECT 
          cell_id, 
          SUM(pct_covered) as total_coverage
        FROM cell_ecoregion_raw
        GROUP BY cell_id
      ),
      -- normalize coverage percentages
      -- for cells with only one ecoregion, use 100% regardless of actual coverage
      -- for cells with multiple ecoregions, rescale to ensure percentages sum to 100
      cell_ecoregion AS (
        SELECT 
          ce.cell_id, 
          ce.zone_seq,
          CASE 
            WHEN COUNT(*) OVER (PARTITION BY ce.cell_id) = 1 THEN 100
            ELSE ce.pct_covered * 100.0 / ct.total_coverage
          END as normalized_pct
        FROM cell_ecoregion_raw ce
        JOIN cell_coverage_total ct ON ce.cell_id = ct.cell_id
      )
      -- insert rescaled extinction risk values
      INSERT INTO cell_metric (cell_id, metric_seq, value)
      SELECT 
        cm.cell_id,
        {m_seq} as metric_seq,
        SUM(
          (cm.value     - er.min_value) / 
          (er.max_value - er.min_value) * 
          (ce.normalized_pct / 100.0) ) * 100 AS value  -- scale to 0-100 range
      FROM cell_metric      cm
      JOIN cell_ecoregion   ce ON cm.cell_id  = ce.cell_id
      JOIN ecoregion_minmax er ON ce.zone_seq = er.zone_seq
      WHERE cm.metric_seq IN (
        SELECT metric_seq FROM metric WHERE metric_key = 'extrisk_{sp_cat}')
      AND cm.value IS NOT NULL
      AND er.min_value IS NOT NULL
      AND er.max_value IS NOT NULL
      AND er.min_value < er.max_value  -- avoid division by zero
      GROUP BY cm.cell_id
    "
    )
  )
}

tbl(con_sdm, "metric") |>
  filter(str_detect(metric_key, "^extrisk_.*_ecoregion_rescaled$")) |>
  arrange(metric_seq) |>
  select(-date_created) |>
  collect() |>
  print(n = Inf)
# A tibble: 7 × 5
  metric_seq metric_key                   description metric_title metric_abbrev
       <int> <chr>                        <chr>       <chr>        <chr>        
1        729 extrisk_bird_ecoregion_resc… Extinction… <NA>         <NA>         
2        730 extrisk_coral_ecoregion_res… Extinction… <NA>         <NA>         
3        731 extrisk_fish_ecoregion_resc… Extinction… <NA>         <NA>         
4        732 extrisk_invertebrate_ecoreg… Extinction… <NA>         <NA>         
5        733 extrisk_mammal_ecoregion_re… Extinction… <NA>         <NA>         
6        734 extrisk_other_ecoregion_res… Extinction… <NA>         <NA>         
7        735 extrisk_turtle_ecoregion_re… Extinction… <NA>         <NA>         

4.4 Transfer raw PrimProd into cell_metric

Code
# Create metric for primary productivity
m_key <- "primprod"
m_description <- "Primary productivity: Oregon State Vertically Generalized Production Model (VGPM) from Visible Infrared Imaging Radiometer Suite (VIIRS) satellite data (mg C / m^2 / day) from daily averages available as monthly averaged to annual and averaged to overall for the most recently available full years of data 2014 to 2023"
# TODO: consider log rescaling PrimaryProductivity

# workflow: ingest_productivity.qmd
dir_vgpm <- glue("{dir_data}/raw/oregonstate.edu")
vgpm_tif <- glue(
  "{dir_vgpm}/vgpm.r2022.v.chl.v.sst.2160x4320_2014-2023.avg.sd.tif"
)

r_cell <- rast(cell_tif) |> subset("cell_id")
ext(r_cell) <- round(ext(r_cell), 3)
# r_cell_r <- rotate(r_cell)
# ext(r_cell_r) <- round(ext(r_cell_r), 3)

r_vgpm <- rast(vgpm_tif) |> subset("npp_avg")
ext(r_vgpm) <- round(ext(r_vgpm), 3)
# r_vgpm_rc <- rotate(r_vgpm) |>
#   crop(r_cell) |>
#   mask(r_cell)

r_v_c <- resample(
  r_vgpm,
  r_cell,
  method = "bilinear"
) |>
  mask(r_cell) |>
  crop(r_cell)
# plot(r_v_c)
r_cv <- c(r_cell, r_v_c)

# Check if metric exists and delete if it does
existing_seq <- dbGetQuery(
  con_sdm,
  glue(
    "SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
  )
)
if (nrow(existing_seq) > 0) {
  m_seq <- existing_seq$metric_seq
  dbExecute(con_sdm, glue("DELETE FROM metric      WHERE metric_seq = {m_seq}"))
  dbExecute(con_sdm, glue("DELETE FROM cell_metric WHERE metric_seq = {m_seq}"))
  dbExecute(con_sdm, glue("DELETE FROM zone_metric WHERE metric_seq = {m_seq}"))
}
[1] 32
Code
# Create new metric
dbExecute(
  con_sdm,
  glue(
    "
    INSERT INTO metric (metric_key, description)
    VALUES ('{m_key}', '{m_description}')"
  )
)
[1] 1
Code
m_seq <- dbGetQuery(
  con_sdm,
  glue(
    "
    SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
  )
) |>
  pull(metric_seq)

d_cv <- values(r_cv, dataframe = T, na.rm = T) |>
  tibble() |>
  rename(
    value = npp_avg
  ) |>
  mutate(
    cell_id = as.integer(cell_id),
    metric_seq = !!m_seq
  )

dbWriteTable(con_sdm, "cell_metric", d_cv, append = T)

tbl(con_sdm, "metric") |>
  filter(str_detect(metric_key, "^primprod")) |>
  arrange(metric_seq) |>
  select(-date_created) |>
  collect() |>
  print(n = Inf)
# A tibble: 6 × 5
  metric_seq metric_key                  description  metric_title metric_abbrev
       <int> <chr>                       <chr>        <chr>        <chr>        
1        702 primprod_ecoregion_min      Primary pro… <NA>         <NA>         
2        703 primprod_ecoregion_max      Primary pro… <NA>         <NA>         
3        704 primprod_avg                Primary pro… <NA>         <NA>         
4        705 primprod_stddev             Primary pro… <NA>         <NA>         
5        706 primprod_ecoregion_rescaled Primary pro… Primary Pro… Prim. Prod.  
6        736 primprod                    Primary pro… <NA>         <NA>         
Code
#   metric_seq metric_key description
#        <int> <chr>      <chr>
# 1        160 primprod   Primary productivity: Oregon State Vertically Generali…

# visual check ----
# d <- tbl(con_sdm, "cell_metric") |>
#   filter(metric_seq == m_seq) |>
#   select(cell_id, value) |>
#   collect()
# r <- init(r_cell[[1]], NA)
# r[d$cell_id] <- d$value
# r_r <- rotate(r)
# plet(r_r)

4.5 Calculate PrimProd min/max by Ecoregion

Code
pp_metric <- "primprod"
pp_seq <- dbGetQuery(
  con_sdm,
  glue(
    "SELECT metric_seq FROM metric WHERE metric_key = '{pp_metric}'"
  )
) |>
  pull(metric_seq)

for (fxn in c("min", "max")) {
  # fxn = "min"

  # get existing primprod ecoregion min/max metric_seq
  m_key <- glue("primprod_ecoregion_{fxn}")
  m_description <- glue("Primary productivity Ecoregional {fxn}")

  # Check if metric exists and delete if it does
  existing_seq <- dbGetQuery(
    con_sdm,
    glue(
      "SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
    )
  )
  if (nrow(existing_seq) > 0) {
    m_seq <- existing_seq$metric_seq
    dbExecute(
      con_sdm,
      glue("DELETE FROM metric      WHERE metric_seq = {m_seq}")
    )
    dbExecute(
      con_sdm,
      glue("DELETE FROM cell_metric WHERE metric_seq = {m_seq}")
    )
    dbExecute(
      con_sdm,
      glue("DELETE FROM zone_metric WHERE metric_seq = {m_seq}")
    )
  }

  # insert new metric
  dbExecute(
    con_sdm,
    glue(
      "INSERT INTO metric (metric_key, description) VALUES ('{m_key}', '{m_description}')"
    )
  )

  # get metric_seq
  m_seq <- dbGetQuery(
    con_sdm,
    glue(
      "SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
    )
  ) |>
    pull(metric_seq)

  # Calculate min/max primary productivity per ecoregion
  dbExecute(
    con_sdm,
    glue(
      "INSERT INTO zone_metric (zone_seq, metric_seq, value)
    SELECT 
      z.zone_seq,
      -- z.tbl, z.fld, z.value AS tbl_fld_val,
      {m_seq} as metric_seq,
      {fxn}(cm.value) as value
    FROM      zone        z
    LEFT JOIN zone_cell   zc ON z.zone_seq = zc.zone_seq
    LEFT JOIN cell_metric cm ON zc.cell_id = cm.cell_id
    WHERE
      z.fld         = 'ecoregion_key' AND
      cm.metric_seq = {pp_seq}
    GROUP BY z.zone_seq"
    )
  )
}
tbl(con_sdm, "metric") |>
  filter(
    metric_key == "primprod_ecoregion_min"
  ) |>
  left_join(
    tbl(con_sdm, "zone_metric"),
    by = "metric_seq"
  ) |>
  pull(value) |>
  summary()
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  35.15  135.08  267.47  257.26  366.07  454.43 
Code
# OLD:
#  Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
# 65.61  130.50  237.31  247.02  358.66  435.62
# NEW:
#  Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
# 35.15  135.08  267.47  257.26  366.07  454.43
tbl(con_sdm, "metric") |>
  filter(
    metric_key == "primprod_ecoregion_max"
  ) |>
  left_join(
    tbl(con_sdm, "zone_metric"),
    by = "metric_seq"
  ) |>
  pull(value) |>
  summary()
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  750.8  3208.6  4229.2  4185.3  5347.1  7913.8 

4.6 Calculate PrimProd avg/sd by Planning Area

Code
pp_metric <- "primprod"
pp_seq <- dbGetQuery(
  con_sdm,
  glue(
    "SELECT metric_seq FROM metric WHERE metric_key = '{pp_metric}'"
  )
) |>
  pull(metric_seq)

for (fxn in c("avg", "stddev")) {
  # fxn = "avg"

  # get existing primprod avg/stddev metric_seq
  m_key <- glue("primprod_{fxn}")
  m_description <- glue("Primary productivity {fxn}")

  # Check if metric exists and delete if it does
  existing_seq <- dbGetQuery(
    con_sdm,
    glue(
      "SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
    )
  ) |>
    pull(metric_seq)
  if (length(existing_seq) > 0) {
    dbExecute(
      con_sdm,
      glue(
        "DELETE FROM metric      WHERE metric_seq IN ({paste(existing_seq, collapse = ',')})"
      )
    )
    dbExecute(
      con_sdm,
      glue(
        "DELETE FROM cell_metric WHERE metric_seq IN ({paste(existing_seq, collapse = ',')})"
      )
    )
    dbExecute(
      con_sdm,
      glue(
        "DELETE FROM zone_metric WHERE metric_seq IN ({paste(existing_seq, collapse = ',')})"
      )
    )
  }

  # insert new metric
  dbExecute(
    con_sdm,
    glue(
      "INSERT INTO metric (metric_key, description) VALUES ('{m_key}', '{m_description}')"
    )
  )

  # get metric_seq
  m_seq <- dbGetQuery(
    con_sdm,
    glue(
      "SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
    )
  ) |>
    pull(metric_seq)

  stopifnot(length(m_seq) == 1)

  # calculate avg/stddev primary productivity per planning area
  # dbGetQuery(con_sdm, glue(
  #   "SELECT
  dbExecute(
    con_sdm,
    glue(
      "INSERT INTO zone_metric (zone_seq, metric_seq, value)
     SELECT
      z.zone_seq,
      {m_seq} as metric_seq,
      {fxn}(cm.value) as value
    FROM      zone        z
    LEFT JOIN zone_cell   zc ON z.zone_seq = zc.zone_seq
    LEFT JOIN cell_metric cm ON zc.cell_id = cm.cell_id
    WHERE
      z.fld         = 'planarea_key' AND
      cm.metric_seq = {pp_seq}
    GROUP BY z.zone_seq"
    )
  )
}

# tbl(con_sdm, "cell_metric") |>
#   filter(
#     metric_seq == !!pp_seq) |>
#   pull(value) |>
#   summary()

tbl(con_sdm, "metric") |>
  filter(
    metric_key == "primprod_avg"
  ) |>
  left_join(
    tbl(con_sdm, "zone_metric"),
    by = "metric_seq"
  ) |>
  pull(value) |>
  summary()
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  124.4   369.6   602.2   652.7   817.8  1930.0 

4.7 Calculate rescaled PrimProd per cell based on EcoregionMinMax

Code
pp_metric <- "primprod"
pp_seq <- dbGetQuery(
  con_sdm,
  glue(
    "SELECT metric_seq FROM metric WHERE metric_key = '{pp_metric}'"
  )
) |>
  pull(metric_seq)

# Create new metric for rescaled primary productivity
m_key <- "primprod_ecoregion_rescaled"
m_description <- "Primary productivity rescaled to [0,100] based on Ecoregional min/max values"
# TODO: consider log rescaling PrimaryProductivity

# Check if metric exists and delete if it does
existing_seq <- dbGetQuery(
  con_sdm,
  glue(
    "SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
  )
) |>
  pull(metric_seq)
if (length(existing_seq) > 0) {
  dbExecute(
    con_sdm,
    glue("DELETE FROM metric      WHERE metric_seq = {existing_seq}")
  )
  dbExecute(
    con_sdm,
    glue("DELETE FROM cell_metric WHERE metric_seq = {existing_seq}")
  )
  dbExecute(
    con_sdm,
    glue("DELETE FROM zone_metric WHERE metric_seq = {existing_seq}")
  )
}
[1] 32
Code
# Create new metric
dbExecute(
  con_sdm,
  glue(
    "
    INSERT INTO metric (metric_key, description)
    VALUES ('{m_key}', '{m_description}')"
  )
)
[1] 1
Code
m_seq <- dbGetQuery(
  con_sdm,
  glue(
    "
    SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
  )
) |>
  pull(metric_seq)

# Rescale primary productivity by ecoregion
# dbGetQuery(con_sdm, glue("
dbExecute(
  con_sdm,
  glue(
    "
    WITH
    -- get ecoregion_minmax from cell_metric for primary productivity
    ecoregion_minmax AS (
      SELECT 
        z.zone_seq,
        MIN(zm.value) as min_value,
        MAX(zm.value) as max_value
      FROM zone z
      JOIN zone_metric zm USING (zone_seq)
      WHERE 
        z.fld = 'ecoregion_key' AND 
        zm.metric_seq IN (
          SELECT metric_seq FROM metric WHERE metric_key IN (
            'primprod_ecoregion_min',
            'primprod_ecoregion_max') )
      GROUP BY z.zone_seq ),
    -- get cell_ecoregion from zone_cell with pct_covered
    cell_ecoregion_raw AS (
      SELECT 
        cell_id, 
        z.zone_seq, 
        zone_cell.pct_covered
      FROM zone_cell
      JOIN zone z ON zone_cell.zone_seq = z.zone_seq
      WHERE z.fld = 'ecoregion_key'
    ),
    -- calculate total coverage per cell for normalization
    cell_coverage_total AS (
      SELECT 
        cell_id, 
        SUM(pct_covered) as total_coverage
      FROM cell_ecoregion_raw
      GROUP BY cell_id
    ),
    -- normalize coverage percentages
    cell_ecoregion AS (
      SELECT 
        ce.cell_id, 
        ce.zone_seq,
        CASE 
          WHEN COUNT(*) OVER (PARTITION BY ce.cell_id) = 1 THEN 100
          ELSE ce.pct_covered * 100.0 / ct.total_coverage
        END as normalized_pct
      FROM cell_ecoregion_raw ce
      JOIN cell_coverage_total ct ON ce.cell_id = ct.cell_id
    )
    -- insert rescaled primary productivity values
    INSERT INTO cell_metric (cell_id, metric_seq, value)
    SELECT 
      cm.cell_id,
      {m_seq} as metric_seq,
      SUM(
        -- (c.prim_prod_mean - er.min_value) / 
        (cm.value     - er.min_value) / 
        (er.max_value - er.min_value) * 
        (ce.normalized_pct / 100.0) ) * 100 AS value  -- scale to 0-100 range
    -- FROM cell c
    FROM cell_metric cm
    JOIN cell_ecoregion   ce ON cm.cell_id  = ce.cell_id
    JOIN ecoregion_minmax er ON ce.zone_seq = er.zone_seq
    WHERE 
    --c.prim_prod_mean IS NOT NULL AND 
    cm.metric_seq = {pp_seq} AND
    cm.value IS NOT NULL AND 
    ce.zone_seq IS NOT NULL AND
    er.min_value IS NOT NULL AND 
    er.max_value IS NOT NULL AND 
    er.min_value < er.max_value  -- avoid division by zero
    GROUP BY cm.cell_id
"
  )
)
[1] 623920
Code
# tbl(con_sdm, "cell_metric") |>
#   filter(
#     metric_seq == !!m_seq)

tbl(con_sdm, "metric") |>
  filter(
    metric_key == "primprod_ecoregion_rescaled"
  ) |>
  left_join(
    tbl(con_sdm, "cell_metric"),
    by = "metric_seq"
  ) |>
  pull(value) |>
  summary()
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  0.000   2.619   5.730  10.379  15.007 100.000 

4.8 Calculate ProgramArea metrics contributing to score based on cell metrics

Code
# get zone_seqs for program areas
z_seqs <- tbl(con_sdm, "zone") |>
  filter(tbl == !!tbl_pra) |>
  pull(zone_seq)

message(glue("Calculating metrics for {length(z_seqs)} program areas..."))
Calculating metrics for 20 program areas...
Code
# Calculating metrics for 20 program areas...

metrics <- c(
  glue("extrisk_{sp_cats}"),
  glue("extrisk_{sp_cats}_ecoregion_rescaled"),
  "primprod",
  "primprod_ecoregion_rescaled"
)

for (m_key in metrics) {
  # m_key = metrics[1]

  # get existing metric_seq
  m_seq <- tbl(con_sdm, "metric") |>
    filter(metric_key == !!m_key) |>
    pull(metric_seq)

  if (length(m_seq) != 1) {
    message(glue("  Skipping {m_key} - metric not found"))
    next
  }

  # delete existing zone_metric values for this metric and program areas
  dbExecute(
    con_sdm,
    glue(
      "DELETE FROM zone_metric
    WHERE metric_seq = {m_seq}
      AND zone_seq IN ({paste(z_seqs, collapse = ',')})"
    )
  )

  # stop if no cell_metric found
  n_cm <- tbl(con_sdm, "cell_metric") |>
    filter(metric_seq == !!m_seq) |>
    tally() |>
    pull(n)

  if (n_cm == 0) {
    message(glue("  Skipping {m_key} - no cell_metric values"))
    next
  }

  # calculate average score per program area weighted by cell coverage
  dbExecute(
    con_sdm,
    glue(
      "
    INSERT INTO zone_metric (zone_seq, metric_seq, value)
    SELECT
      zc.zone_seq,
      {m_seq} as metric_seq,
      SUM(cm.value * zc.pct_covered) / SUM(zc.pct_covered) as value
    FROM zone_cell zc
    JOIN cell_metric cm ON zc.cell_id = cm.cell_id
    WHERE
      cm.metric_seq = {m_seq}
      AND zc.zone_seq IN ({paste(z_seqs, collapse = ',')})
      AND cm.value IS NOT NULL
    GROUP BY zc.zone_seq"
    )
  )

  message(glue("  Calculated {m_key}"))
}
  Calculated extrisk_bird
  Calculated extrisk_coral
  Calculated extrisk_fish
  Calculated extrisk_invertebrate
  Calculated extrisk_mammal
  Calculated extrisk_other
  Calculated extrisk_turtle
  Calculated extrisk_bird_ecoregion_rescaled
  Calculated extrisk_coral_ecoregion_rescaled
  Calculated extrisk_fish_ecoregion_rescaled
  Calculated extrisk_invertebrate_ecoregion_rescaled
  Calculated extrisk_mammal_ecoregion_rescaled
  Calculated extrisk_other_ecoregion_rescaled
  Calculated extrisk_turtle_ecoregion_rescaled
  Calculated primprod
  Calculated primprod_ecoregion_rescaled
Code
# verify
d_ck <- tbl(con_sdm, "metric") |>
  select(metric_seq, metric_key) |>
  filter(metric_key %in% metrics) |>
  left_join(
    tbl(con_sdm, "zone_metric"),
    by = "metric_seq"
  ) |>
  left_join(
    tbl(con_sdm, "zone") |>
      filter(tbl == !!tbl_pra) |>
      select(zone_seq, pra_key = value),
    by = "zone_seq"
  ) |>
  filter(!is.na(pra_key)) |>
  collect()

d_ck |>
  filter(str_detect(metric_key, "_ecoregion_rescaled")) |>
  pull(value) |>
  summary()
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.8816  9.5945 19.9248 28.6261 45.5329 98.9915 
Code
#  Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
# 1.195  11.186  21.902  27.317  41.150  91.748

4.9 Calculate Ecoregion metrics contributing to score based on cell metrics

Code
# get zone_seqs for ecoregions
z_seqs <- tbl(con_sdm, "zone") |>
  filter(
    tbl == !!tbl_er
  ) |>
  pull(zone_seq)
stopifnot(
  `No ecoregion zones found in zone table — check tbl_er value matches zone$tbl` = length(
    z_seqs
  ) >
    0
)

# tbl(con_sdm, "metric") |> pull(metric_key) |>  sort()

metrics <- c(
  glue("extrisk_{sp_cats}"),
  glue("extrisk_{sp_cats}_ecoregion_rescaled"),
  "primprod",
  "primprod_ecoregion_rescaled"
)

for (m_key in metrics) {
  # m_key = metrics[1]

  # get existing metric_seq
  m_seq <- tbl(con_sdm, "metric") |>
    filter(metric_key == !!m_key) |>
    pull(metric_seq)
  stopifnot(length(m_seq) == 1)

  # delete existing zone_metric values for this metric and ecoregions
  dbExecute(
    con_sdm,
    glue(
      "DELETE FROM zone_metric
    WHERE metric_seq = {m_seq}
      AND zone_seq IN ({paste(z_seqs, collapse = ',')})"
    )
  )

  # stop if no cell_metric found
  n_cm <- tbl(con_sdm, "cell_metric") |>
    filter(
      metric_seq == !!m_seq
    ) |>
    tally() |>
    pull(n)
  stopifnot(n_cm > 0)

  # calculate average score per ecoregion weighted by cell coverage
  dbExecute(
    con_sdm,
    glue(
      "
    INSERT INTO zone_metric (zone_seq, metric_seq, value)
    SELECT 
      zc.zone_seq,
      {m_seq} as metric_seq,
      SUM(cm.value * zc.pct_covered) / SUM(zc.pct_covered) as value
    FROM zone_cell zc
    JOIN cell_metric cm ON zc.cell_id = cm.cell_id
    WHERE 
      cm.metric_seq = {m_seq} 
      AND zc.zone_seq IN ({paste(z_seqs, collapse = ',')})
      AND cm.value IS NOT NULL
    GROUP BY zc.zone_seq"
    )
  )
}

d_ck <- tbl(con_sdm, "metric") |>
  select(metric_seq, metric_key) |>
  filter(
    metric_key %in% metrics
  ) |>
  left_join(
    tbl(con_sdm, "zone_metric"),
    by = "metric_seq"
  ) |>
  left_join(
    tbl(con_sdm, "zone") |>
      filter(
        tbl == !!tbl_er
      ) |>
      select(zone_seq, pa_key = value),
    by = "zone_seq"
  ) |>
  filter(!is.na(pa_key)) |>
  collect()

d_ck |>
  filter(str_detect(metric_key, "_ecoregion_rescaled")) |>
  pull(value) |>
  summary()
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.8816 13.4333 21.2185 29.0290 39.9820 90.1758 
Code
# OLD:
#   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
# 0.1106 12.9840 24.9402 28.6058 43.1917 83.1341
# NEW:
#  Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
# 2.365  13.926  21.852  26.010  35.288  80.208
# NEWEST:
#  Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
# 2.365  13.923  22.475  27.350  38.309  80.039

4.10 Calculate cell_metric score: ExtinctionRisk + PrimProd (already EcoregionRescaled), equal weight

Code
# Create combined metric for extinction risk and primary productivity
m_key <- "score_extriskspcat_primprod_ecoregionrescaled_equalweights"
m_description <- "Combined score of extinction risk per species category and primary productivity, equally weighted (and each previously rescaled [0,100] based on Ecoregional min/max values)"

# Check if metric exists and delete if it does
m_seq <- dbGetQuery(
  con_sdm,
  glue(
    "SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
  )
) |>
  pull(metric_seq)
if (length(m_seq) > 0) {
  dbExecute(con_sdm, glue("DELETE FROM metric      WHERE metric_seq = {m_seq}"))
  dbExecute(con_sdm, glue("DELETE FROM cell_metric WHERE metric_seq = {m_seq}"))
  dbExecute(con_sdm, glue("DELETE FROM zone_metric WHERE metric_seq = {m_seq}"))
}
[1] 20
Code
# Create new metric
dbExecute(
  con_sdm,
  glue(
    "
  INSERT INTO metric (metric_key, description)
  VALUES ('{m_key}', '{m_description}')"
  )
)
[1] 1
Code
m_seq <- dbGetQuery(
  con_sdm,
  glue(
    "
  SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
  )
) |>
  pull(metric_seq)

# Get all metric_seqs for extinction risk metrics and primary productivity
metric_keys <- c(
  sapply(
    sp_cats,
    \(sc) glue("extrisk_{str_replace(sc, ' ', '_')}_ecoregion_rescaled"),
    USE.NAMES = F
  ),
  "primprod_ecoregion_rescaled"
)

# Create SQL to join all metrics with equal weights
dbExecute(
  con_sdm,
  glue(
    "
  WITH metric_values AS (
    SELECT 
      cm.cell_id,
      m.metric_key,
      cm.value
    FROM cell_metric cm
    JOIN metric m ON cm.metric_seq = m.metric_seq
    WHERE m.metric_key IN ({paste(shQuote(metric_keys), collapse=', ')}) )
  INSERT INTO cell_metric (cell_id, metric_seq, value)
  SELECT 
    cell_id,
    {m_seq} as metric_seq,
    ROUND(AVG(value)) AS value
  FROM metric_values
  GROUP BY cell_id"
  )
)
[1] 629771
Code
# Check the new metric
d_ck <- tbl(con_sdm, "metric") |>
  select(metric_seq, metric_key) |>
  filter(
    metric_key == !!m_key
  ) |>
  left_join(
    tbl(con_sdm, "cell_metric"),
    by = "metric_seq"
  ) |>
  collect()

d_ck |>
  pull(value) |>
  summary()
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   0.00   17.00   23.00   24.62   29.00   95.00 
Code
# OLD:
# Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
# 0.00   10.00   16.00   21.28   31.00   89.00
# NEW:
# Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
#  0.00   12.00   17.00   21.29   26.00   89.00
# NEWEST:
# Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
#  0.0    12.0    18.0    22.1    28.0    89.0

4.11 Calculate Score per ProgramArea from ProgramArea metrics

Code
# metric: calculate score per program area
m_key <- "score_extriskspcat_primprod_ecoregionrescaled_equalweights"
m_seq <- tbl(con_sdm, "metric") |>
  filter(metric_key == !!m_key) |>
  pull(metric_seq)

metric_input_keys <- c(
  glue("extrisk_{sp_cats}_ecoregion_rescaled"),
  "primprod_ecoregion_rescaled"
)
m_input_seqs <- tbl(con_sdm, "metric") |>
  filter(metric_key %in% metric_input_keys) |>
  pull(metric_seq)
stopifnot(length(metric_input_keys) == length(m_input_seqs))

# get zone_seqs for program areas
z_seqs <- tbl(con_sdm, "zone") |>
  filter(
    tbl == !!tbl_pra
  ) |>
  pull(zone_seq)

# DEBUG check
# tbl(con_sdm, "zone_metric") |>
#   filter(
#     metric_seq == !!m_seq,
#     zone_seq %in% z_seqs) |>
#   arrange(desc(value)) |>
#   pull(value) |>
#   range()
# 5.704735 61.050130
#   View()
#   nrow() -> n_existing_zm
# z_seq == 134 # (ALA)

# delete existing zone_metric values for this metric and planning areas
dbExecute(
  con_sdm,
  glue(
    "DELETE FROM zone_metric 
  WHERE metric_seq = {m_seq} 
    AND zone_seq IN ({paste(z_seqs, collapse = ',')})"
  )
)
[1] 0
Code
# calculate average score per program area
dbExecute(
  con_sdm,
  glue(
    "
  INSERT INTO zone_metric (zone_seq, metric_seq, value)
  SELECT 
    zone_seq,
    {m_seq} as metric_seq,
    SUM(value) / COUNT(value) as value
  FROM zone_metric zm
  WHERE 
    metric_seq IN ({paste(m_input_seqs, collapse = ',')}) 
    AND zone_seq IN ({paste(z_seqs, collapse = ',')})
    AND value IS NOT NULL
  GROUP BY zone_seq"
  )
) # 20
[1] 20
Code
# check the new metric
d_ck <- tbl(con_sdm, "metric") |>
  select(metric_seq, metric_key) |>
  filter(
    metric_key == !!m_key
  ) |>
  left_join(
    tbl(con_sdm, "zone_metric"),
    by = "metric_seq"
  ) |>
  left_join(
    tbl(con_sdm, "zone") |>
      filter(
        tbl == !!tbl_pra
      ) |>
      select(zone_seq, pa_key = value),
    by = "zone_seq"
  ) |>
  filter(!is.na(pa_key)) |>
  collect()

d_ck |>
  pull(value) |>
  summary()
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  11.14   21.49   27.55   28.34   32.39   52.62 
Code
# PlanAreas:
#  Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
#  7.16   17.10   22.86   25.42   31.59   60.07
# NEW ProgramAreas:
#  Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
# 7.642  17.372  24.131  26.439  32.972  60.068

4.12 Metric labels

Add display label columns to metric table as single source of truth for display names used in score tables, Word/Excel output, and flower plots.

Code
# add display label columns to metric table if missing
for (col in c("metric_title", "metric_abbrev")) {
  cols <- dbGetQuery(
    con_sdm,
    "SELECT column_name FROM information_schema.columns WHERE table_name = 'metric'"
  )
  if (!col %in% cols$column_name) {
    dbExecute(con_sdm, glue("ALTER TABLE metric ADD COLUMN {col} VARCHAR"))
  }
}

# populate labels for score-contributing metrics
metric_labels <- tribble(
  ~metric_key                                                  , ~metric_title        , ~metric_abbrev ,
  "score_extriskspcat_primprod_ecoregionrescaled_equalweights" , "Score"              , "Score"        ,
  "extrisk_bird_ecoregion_rescaled"                            , "Bird"               , "Bird"         ,
  "extrisk_coral_ecoregion_rescaled"                           , "Coral"              , "Coral"        ,
  "extrisk_fish_ecoregion_rescaled"                            , "Fish"               , "Fish"         ,
  "extrisk_invertebrate_ecoregion_rescaled"                    , "Invertebrate"       , "Invertebrate" ,
  "extrisk_mammal_ecoregion_rescaled"                          , "Mammal"             , "Mammal"       ,
  "extrisk_other_ecoregion_rescaled"                           , "Other"              , "Other"        ,
  "extrisk_turtle_ecoregion_rescaled"                          , "Turtle"             , "Turtle"       ,
  "primprod_ecoregion_rescaled"                                , "Primary Production" , "Prim. Prod."
)

duckdb_register(con_sdm, "tmp_labels", metric_labels)
dbExecute(
  con_sdm,
  "
  UPDATE metric
  SET metric_title  = t.metric_title,
      metric_abbrev = t.metric_abbrev
  FROM tmp_labels t
  WHERE metric.metric_key = t.metric_key"
)
[1] 9
Code
duckdb_unregister(con_sdm, "tmp_labels")

5 Add/update zone_taxon

For showing species composition per zone (subregion, program area) in mapgl app.

Code
tbl_taxon <- tbl(con_sdm, "taxon") |>
  filter(is_ok) |>
  select(
    sp_cat,
    sp_common = common_name,
    sp_scientific = scientific_name,
    taxon_id,
    taxon_authority,
    rl_code = extrisk_code,
    er_score,
    is_er_spatial,
    is_mmpa,
    is_mbta,
    is_bcc,
    esa_code,
    esa_source,
    mdl_seq
  )

d <- tbl(con_sdm, "zone") |>
  filter(
    tbl %in% c(tbl_sr, tbl_pra)
  ) |>
  select(zone_seq, zone_tbl = tbl, zone_fld = fld, zone_value = value) |> # zone_tbl = tbl,
  inner_join(
    tbl(con_sdm, "zone_cell") |>
      select(zone_seq, cell_id),
    by = join_by(zone_seq)
  ) |>
  inner_join(
    tbl(con_sdm, "model_cell") |>
      select(mdl_seq, cell_id, value) |>
      inner_join(
        tbl_taxon,
        by = join_by(mdl_seq)
      ),
    by = join_by(cell_id)
  ) |>
  inner_join(
    tbl(con_sdm, "cell") |>
      select(cell_id, area_km2),
    by = join_by(cell_id)
  ) |>
  group_by(
    zone_tbl,
    zone_fld,
    zone_value, # zone_seq,
    mdl_seq,
    sp_cat,
    sp_common,
    sp_scientific,
    taxon_id,
    taxon_authority,
    rl_code,
    er_score,
    is_er_spatial,
    is_mmpa,
    is_mbta,
    is_bcc,
    esa_code,
    esa_source
  ) |>
  summarize(
    area_km2 = sum(area_km2, na.rm = T),
    avg_suit = mean(value, na.rm = T) / 100,
    .groups = "drop"
  ) |>
  collect()


d <- d |>
  mutate(
    # for is_er_spatial (turtles), ER is already baked into cell values
    suit_rl = ifelse(is_er_spatial, avg_suit, avg_suit * er_score / 100),
    suit_rl_area = suit_rl * area_km2
  ) |>
  group_by(zone_fld, zone_value, sp_cat) |>
  mutate(
    cat_suit_rl_area = sum(suit_rl_area, na.rm = T)
  ) |>
  ungroup() |>
  mutate(
    pct_cat = suit_rl_area / cat_suit_rl_area
  )

write_csv(d, zone_taxon_csv)
dbWriteTable(con_sdm, glue("zone_taxon"), d, overwrite = T)

6 Update PostGIS layers with metrics from DuckDB

These layers are used in the mapgl app (via pg_tileserv) and need to be updated with the new metrics calculated in DuckDB. The update_table_with_clear function adds new metric columns if they don’t exist, clears existing values for those metrics, and then updates with new values from DuckDB.

6.1 Update Zone Tables in PostGIS with Metrics

Code
update_table_with_clear <- function(con, df, tbl, key_col = "programarea_key") {
  # df = d_pa_metrics; tbl = "ply_planareas_2025"; key_col = "planarea_key"

  stopifnot(dbExistsTable(con, tbl))
  existing_cols <- dbListFields(con, tbl)
  stopifnot(key_col %in% existing_cols)
  stopifnot(key_col %in% names(df))
  source_cols <- setdiff(names(df), key_col)

  # Add missing columns
  for (col in source_cols) {
    if (!col %in% existing_cols) {
      add_col_query <- glue(
        "ALTER TABLE {tbl} ADD COLUMN {col} DOUBLE PRECISION"
      )
      dbExecute(con, add_col_query)
      message("Added column: ", col)
    }
  }

  # Clear existing values for columns we're updating
  existing_source_cols <- intersect(source_cols, existing_cols)
  if (length(existing_source_cols) > 0) {
    clear_clauses <- paste0(existing_source_cols, " = NULL", collapse = ", ")
    clear_query <- glue("UPDATE {tbl} SET {clear_clauses}")
    dbExecute(con, clear_query)
    message(
      "Cleared existing values for: ",
      paste(existing_source_cols, collapse = ", "),
      "\n"
    )
  }

  # Now update with new values
  dbWriteTable(
    con,
    "temp_metrics_update",
    df,
    temporary = TRUE,
    overwrite = TRUE
  )

  set_clauses <- paste0(source_cols, " = temp.", source_cols, collapse = ", ")
  update_query <- glue(
    "
    UPDATE {tbl} 
    SET {set_clauses}
    FROM temp_metrics_update temp
    WHERE {tbl}.{key_col} = temp.{key_col}
  "
  )

  rows_affected <- dbExecute(con, update_query)
  message("Updated ", rows_affected, " rows")

  dbRemoveTable(con, "temp_metrics_update", force = TRUE)

  return(rows_affected)
}

# update zone.tbl to include version suffix ----
zone_tbls <- tbl(con_sdm, "zone") |> distinct(tbl) |> pull(tbl)
for (zt in zone_tbls) {
  zt_v <- if (!grepl("_v\\d+$", zt)) paste0(zt, v_sfx) else zt
  if (zt != zt_v) {
    # dbExecute(
    #   con_sdm,
    #   glue(
    #     "UPDATE zone SET tbl = '{zt_v}' WHERE tbl = '{zt}'"
    #   )
    # )
    cat(glue("zone.tbl: {zt} -> {zt_v} ~ {Sys.time()}"))
  }
}
Code
# create versioned PostGIS tables from existing tables ----
pg_zone_tbls <- c(
  tbl_er,
  tbl_pra
)
for (pg_tbl in pg_zone_tbls) {
  # pg_tbl already includes v_sfx (e.g. _v3) from tbl_er, tbl_pra
  pg_tbl_base <- str_remove(pg_tbl, v_sfx)
  if (!dbExistsTable(con, pg_tbl)) {
    dbExecute(con, glue("CREATE TABLE {pg_tbl} AS SELECT * FROM {pg_tbl_base}"))
    message(glue("Created {pg_tbl} from {pg_tbl_base}"))
  } else {
    message(glue("{pg_tbl} already exists"))
  }
}

6.2 Load Program Area Scores to PostGIS

Code
# delete zone_metric rows without a metric_key
dbExecute(
  con_sdm,
  "
  DELETE FROM zone_metric 
  WHERE metric_seq NOT IN (SELECT metric_seq FROM metric)"
) # 0

# check if table exists in PostGIS
if (!dbExistsTable(con, tbl_pra)) {
  stop("Run create_pg_v3_tables R chunk above")
}

# get program area metrics from DuckDB
d_pra_metrics <- tbl(con_sdm, "zone") |>
  filter(tbl == !!tbl_pra) |>
  select(programarea_key = value, zone_seq) |>
  left_join(
    tbl(con_sdm, "zone_metric") |>
      mutate(value = round(value, 1)),
    by = "zone_seq"
  ) |>
  left_join(
    tbl(con_sdm, "metric") |>
      select(metric_seq, metric_key),
    by = "metric_seq"
  ) |>
  select(programarea_key, metric_key, value) |>
  filter(
    !is.na(programarea_key),
    !is.na(metric_key),
    !is.na(value)
  ) |>
  pivot_wider(
    names_from = metric_key,
    values_from = value
  ) |>
  collect()
message(glue("Loaded {nrow(d_pra_metrics)} program area metrics"))

# For complete refresh (clears existing values first)
update_table_with_clear(
  con,
  d_pra_metrics,
  tbl_pra,
  "programarea_key"
)
Code
flds_pra_df <- names(d_pra_metrics)
flds_pra_db <- dbListFields(con, tbl_pra)
fdls_pra_rm <- setdiff(flds_pra_db, flds_pra_df) |>
  str_subset("(extrisk_)|(^na$)")

d_rm <-
  tibble(
    tbl = tbl_pra,
    fld = fdls_pra_rm
  )

for (i in seq_len(nrow(d_rm))) {
  # i = 1
  tbl <- d_rm$tbl[i]
  fld <- d_rm$fld[i]

  a <- dbExecute(con, glue('ALTER TABLE {tbl} DROP COLUMN "{fld}"'))
  message("Removed column: ", fld, " from ", tbl)
}

6.3 Add index for vector tiles

per Feature id | pg_tileserv

Code
# add integer primary key to postgres programareas table
dbExecute(
  con,
  glue(
    "ALTER TABLE public.{tbl_pra} ALTER COLUMN programarea_id TYPE INTEGER;"
  )
)
dbExecute(
  con,
  glue(
    "ALTER TABLE public.{tbl_er} ADD COLUMN IF NOT EXISTS ecoregion_id SERIAL PRIMARY KEY;"
  )
)

7 Generate downloads

For sharing with BOEM and external partners.

Code
redo_dl <- T

# ecoregion gpkg destination (input copy, no metrics — just copy to dir_v)
er_gpkg_dst <- glue("{dir_v}/ply_ecoregions_2025.gpkg")

# redo_dl: delete existing files
if (redo_dl) {
  for (f in c(
    pra_gpkg,
    lyrs_csv,
    metrics_tif,
    metrics_pra_tif,
    er_gpkg_dst
  )) {
    if (file_exists(f)) {
      file_delete(f)
    }
  }
}

# * pra_gpkg (with metrics from DuckDB) ----
if (!file.exists(pra_gpkg)) {
  message("Generating Program Areas geopackage with metrics...")

  # get metrics from DuckDB
  d_pra_metrics <- tbl(con_sdm, "zone") |>
    filter(tbl == !!tbl_pra) |>
    select(programarea_key = value, zone_seq) |>
    left_join(
      tbl(con_sdm, "zone_metric") |>
        mutate(value = round(value, 1)),
      by = "zone_seq"
    ) |>
    left_join(
      tbl(con_sdm, "metric") |>
        select(metric_seq, metric_key),
      by = "metric_seq"
    ) |>
    select(programarea_key, metric_key, value) |>
    filter(!is.na(metric_key), !is.na(value)) |>
    pivot_wider(names_from = metric_key, values_from = value) |>
    collect()

  # read base geometry (spatial columns only, no metrics)
  base_cols <- c(
    "programarea_id",
    "region_key",
    "region_name",
    "planarea_key",
    "planarea_name",
    "programarea_key",
    "programarea_name",
    "ctr_lon",
    "ctr_lat",
    "area_km2"
  )
  pra_prev_gpkg <- glue(
    "{dir_derived}/{ver_prev}/ply_programareas_2026_{ver_prev}.gpkg"
  )
  pra_base <- read_sf(pra_prev_gpkg) |>
    select(any_of(base_cols))

  # join metrics to geometry
  pra_out <- pra_base |>
    left_join(d_pra_metrics, by = "programarea_key")

  st_write(pra_out, pra_gpkg, delete_dsn = T, quiet = T)
}
Generating Program Areas geopackage with metrics...
Code
# * lyrs_csv ----
if (!file.exists(lyrs_csv)) {
  message("Generating layers CSV...")

  d_lyrs <- bind_rows(
    tibble(
      order = 1,
      category = "Overall",
      layer = "score",
      lyr = "score_extriskspcat_primprod_ecoregionrescaled_equalweights"
    ),
    tibble(
      order = 2,
      category = "Species, rescaled by Ecoregion",
      layer = glue("{sp_cats}: ext. risk, ecorgn"),
      lyr = glue("extrisk_{sp_cats}_ecoregion_rescaled")
    ),
    tibble(
      order = 3,
      category = "Primary Productivity, rescaled by Ecoregion",
      layer = glue("prim prod, ecorgn"),
      lyr = glue("primprod_ecoregion_rescaled")
    ),
    tibble(
      order = 4,
      category = "Species, raw Extinction Risk",
      layer = glue("{sp_cats}: ext. risk"),
      lyr = glue("extrisk_{sp_cats}")
    ),
    tibble(
      order = 5,
      category = "Primary Productivity, raw Phytoplankton",
      lyr = "primprod",
      layer = "prim prod, 2014-2023 avg (mg C/m^2/day)"
    )
  )

  write_csv(d_lyrs, lyrs_csv)
}
Generating layers CSV...
Code
# * metrics_tif ----
if (!file.exists(metrics_tif)) {
  message("Generating Metrics raster...")

  d_lyrs <- read_csv(lyrs_csv)

  # add zones
  lst <- list()
  for (zone_fld in c("ecoregion_key", "programarea_key")) {
    # zone_fld = "ecoregion_key"
    # zone_fld = "programarea_key"

    d <- tbl(con_sdm, "zone") |>
      filter(fld == !!zone_fld) |>
      left_join(
        tbl(con_sdm, "zone_cell"),
        by = "zone_seq"
      ) |>
      group_by(cell_id) |>
      window_order(pct_covered) |>
      summarize(
        value = last(value),
        .groups = "drop"
      ) |>
      collect() |>
      mutate(
        "{zone_fld}" := as.factor(value)
      ) |>
      select(all_of(c("cell_id", zone_fld)))

    lvls <- levels(d[[zone_fld]])
    if (length(lvls) == 0) {
      message(glue("Skipping {zone_fld}: no zone data found"))
      next
    }

    r <- init(r_cell[[1]], NA) # |> as.factor() #sort(unique(d$value)))
    # r$chr <- NA_character_ # ensure chr type
    r[d$cell_id] <- d[[zone_fld]]
    varnames(r) <- zone_fld
    names(r) <- zone_fld
    levels(r) <- tibble(
      id = seq_along(lvls),
      "{zone_fld}" := lvls
    )
    lst[[zone_fld]] <- r
  }
  r_zones <- do.call(c, lst |> unname())

  # add layers
  lst <- list()
  for (i in 1:nrow(d_lyrs)) {
    # i = 2
    lyr <- d_lyrs$lyr[i]
    layer <- d_lyrs$layer[i]

    r <- get_rast(lyr) # plot(r)
    names(r) <- lyr
    varnames(r) <- layer
    lst[[i]] <- r
  }
  r_metrics <- do.call(c, lst)

  # add log of primprod
  r_metrics[["primprod_log"]] <- log(r_metrics[["primprod"]])

  # show layer names
  names(r_metrics) |> sort() |> print()

  r_metrics <- c(r_zones, r_metrics)
  writeRaster(r_metrics, metrics_tif, overwrite = T)
}
Generating Metrics raster...
 [1] "extrisk_bird"                                              
 [2] "extrisk_bird_ecoregion_rescaled"                           
 [3] "extrisk_coral"                                             
 [4] "extrisk_coral_ecoregion_rescaled"                          
 [5] "extrisk_fish"                                              
 [6] "extrisk_fish_ecoregion_rescaled"                           
 [7] "extrisk_invertebrate"                                      
 [8] "extrisk_invertebrate_ecoregion_rescaled"                   
 [9] "extrisk_mammal"                                            
[10] "extrisk_mammal_ecoregion_rescaled"                         
[11] "extrisk_other"                                             
[12] "extrisk_other_ecoregion_rescaled"                          
[13] "extrisk_turtle"                                            
[14] "extrisk_turtle_ecoregion_rescaled"                         
[15] "primprod"                                                  
[16] "primprod_ecoregion_rescaled"                               
[17] "primprod_log"                                              
[18] "score_extriskspcat_primprod_ecoregionrescaled_equalweights"
Code
# * metrics_pra_tif ----
if (!file.exists(metrics_pra_tif)) {
  message("Generating Metrics raster masked to Program Areas...")
  # mask to study area of Program Areas

  # subregion_keys in sr_pra_csv: USA (all 20), AK (15), PA (3), GA (2)
  # AK + L48 = USA (all program areas)
  sr <- "USA"
  pra_sr <- read_csv(sr_pra_csv) |>
    filter(subregion_key == !!sr) |>
    pull(programarea_key)

  r_metrics <- rast(metrics_tif)
  r_mask <- subset(r_metrics, "programarea_key")

  r_metrics_pra <- r_metrics |>
    mask(r_mask) |>
    trim()

  writeRaster(r_metrics_pra, metrics_pra_tif, overwrite = T)
}
Generating Metrics raster masked to Program Areas...

|---------|---------|---------|---------|
=========================================
                                          
Code
# * er_gpkg_dst (input, no metrics — copy to dir_v for downloads) ----
if (!file.exists(er_gpkg_dst) && er_raw_gpkg != er_gpkg_dst) {
  message("Copying ecoregion gpkg to dir_v...")
  file_copy(er_raw_gpkg, er_gpkg_dst)
}
Copying ecoregion gpkg to dir_v...
Code
pa_metrics_geo <- glue(
  "{dir_derived}/OCS_Proposed_Planning_Area_Polygon_Env_Sensitivity_Metrics.geojson"
)

# * pa_metrics_geo ----
if (!file.exists(pa_metrics_geo)) {
  pa_boem_geo <- glue("{dir_data}/raw/boem.gov/OCS_Area_Polygons.geojson")

  pa <- read_sf(pa_gpkg)

  pa_boem <- read_sf(pa_boem_geo)

  (pa_miss <- pa_boem |>
    st_drop_geometry() |>
    anti_join(
      pa |>
        st_drop_geometry() |>
        select(planarea_key, planarea_name),
      by = join_by(PLANNING_AREA == planarea_key)
    ) |>
    pull(PLANNING_AREA))
  # [1] "CG" "EG" "WG"

  pa_boem |>
    filter(PLANNING_AREA %in% pa_miss) |>
    mapview::mapView(zcol = "PLANNING_AREA")

  pa |>
    st_drop_geometry() |>
    filter(
      region_name == "Gulf of America"
    ) |>
    select(planarea_key, planarea_name)
  #   planarea_key planarea_name
  #   <chr>        <chr>
  # 1 CGA          Central Gulf of America
  # 2 EGA          Eastern Gulf of America
  # 3 WGA          Western Gulf of America

  pa_metrics <- pa_boem |>
    left_join(
      pa |>
        st_drop_geometry() |>
        mutate(
          PLANNING_AREA = case_match(
            planarea_key,
            "CGA" ~ "CG",
            "EGA" ~ "EG",
            "WGA" ~ "WG",
            .default = planarea_key
          )
        ),
      by = join_by(PLANNING_AREA)
    )

  write_sf(pa_metrics, pa_metrics_geo)
}

8 Generate PMTiles

Static PMTiles archives for mapgl/mapsp apps, replacing PostGIS/pg_tileserv tile pipeline.

Code
# check / install tippecanoe and pmtiles CLI ----
check_brew_bin <- function(bin) {
  if (Sys.which(bin) == "") {
    if (Sys.which("brew") == "") {
      stop(glue("{bin} not found and homebrew not available"))
    }
    message(glue("installing {bin} via homebrew..."))
    system2("brew", c("install", bin))
    stopifnot(Sys.which(bin) != "")
  }
  message(glue("{bin}: {Sys.which(bin)}"))
}
check_brew_bin("tippecanoe") # tippecanoe: /opt/homebrew/bin/tippecanoe
check_brew_bin("pmtiles") # pmtiles:       /opt/homebrew/bin/pmtiles

dir_create(dir_pmtiles)

# helper: sf → pmtiles via tippecanoe ----
sf_to_pmtiles <- function(sf_obj, layer_name, out_dir = dir_pmtiles) {
  tmp_geojson <- tempfile(fileext = ".geojson")
  on.exit(unlink(tmp_geojson), add = TRUE)

  out_pmtiles <- glue("{out_dir}/{layer_name}.pmtiles")

  # wrap polygons crossing the antimeridian so GeoJSON [-180,180] doesn't clip
  #sf_obj <- st_wrap_dateline(sf_obj, options = c("WRAPDATELINE=YES"))

  st_write(sf_obj, tmp_geojson, driver = "GeoJSON") #, delete_dsn = TRUE)

  # tippecanoe: full detail for small polygon layers
  system2(
    "tippecanoe",
    c(
      "-z12",
      "-Z0",
      "--no-feature-limit",
      "--no-tile-size-limit",
      "-l",
      layer_name,
      "-o",
      out_pmtiles,
      "--force",
      tmp_geojson
    )
  )
  stopifnot(file_exists(out_pmtiles))
  message(glue("wrote {out_pmtiles} ({file_size(out_pmtiles)})"))
  out_pmtiles
}

# program areas WITH metrics ----
d_pra_metrics <- tbl(con_sdm, "zone") |>
  filter(tbl == !!tbl_pra) |>
  select(programarea_key = value, zone_seq) |>
  left_join(
    tbl(con_sdm, "zone_metric") |>
      mutate(value = round(value, 1)),
    by = "zone_seq"
  ) |>
  left_join(
    tbl(con_sdm, "metric") |>
      select(metric_seq, metric_key),
    by = "metric_seq"
  ) |>
  select(programarea_key, metric_key, value) |>
  filter(!is.na(programarea_key), !is.na(metric_key), !is.na(value)) |>
  pivot_wider(names_from = metric_key, values_from = value) |>
  collect()

# load geometry from memory or prior version (pra_gpkg may not exist yet)
pra_geom <- if (exists("pra")) {
  pra
} else if (file_exists(pra_gpkg)) {
  read_sf(pra_gpkg)
} else {
  pra_prev <- glue(
    "{dir_derived}/{ver_prev}/ply_programareas_2026_{ver_prev}.gpkg"
  )
  read_sf(pra_prev)
}

# strip any existing metric columns before joining fresh ones
base_cols <- c(
  "programarea_id",
  "region_key",
  "region_name",
  "planarea_key",
  "planarea_name",
  "programarea_key",
  "programarea_name",
  "ctr_lon",
  "ctr_lat",
  "area_km2"
)
pra_sf <- pra_geom |>
  select(any_of(base_cols)) |>
  left_join(d_pra_metrics, by = "programarea_key")

# mapView(pra_sf, zcol = "score_extriskspcat_primprod_ecoregionrescaled_equalweights")

# program areas for pmtiles: base columns only (scores joined at render time)
pra_pm_sf <- pra_geom |> select(any_of(base_cols))
sf_to_pmtiles(pra_pm_sf, tbl_pra_pm)

# plot(pra_sf["score_extriskspcat_primprod_ecoregionrescaled_equalweights"])

# ecoregions WITHOUT metrics (outlines only) ----
er_sf <- read_sf(er_raw_gpkg)
sf_to_pmtiles(er_sf, tbl_er_pm)

9 Summary data products

Merged from msens-summary_programareas.qmd: ecoregion × program area intersections, label placement points, and score tables.

Code
# delete existing summary data files if redo (except lbls_csv — manually adjusted)
if (redo_summary_data) {
  for (f in c(er_pra_gpkg, er_pra_csv, er_pra_n_csv, pra_n_csv)) {
    if (file_exists(f)) {
      file_delete(f)
    }
  }
}

9.1 Ecoregion × Program Area intersections

Code
er <- read_sf(er_raw_gpkg)
pra <- read_sf(pra_gpkg)

if (!file.exists(er_pra_gpkg)) {
  er_pra <- st_intersection(
    er |>
      select(ecoregion_key, ecoregion_name),
    pra |>
      select(region_key, region_name, programarea_key, programarea_name)
  ) |>
    st_make_valid() |>
    mutate(
      area_km2 = st_area(geom) |> set_units(km^2) |> drop_units(),
      ctr_lon = st_coordinates(st_centroid(geom))[, 1],
      ctr_lat = st_coordinates(st_centroid(geom))[, 2]
    ) |>
    filter(area_km2 > 1) |>
    arrange(programarea_key, ecoregion_key) |>
    group_by(programarea_key) |>
    mutate(
      pct_programarea_in_ecoregion = round(
        area_km2 / sum(area_km2) * 100,
        2
      ),
      pct_programarea_in_ecoregion = na_if(
        pct_programarea_in_ecoregion,
        100.0
      )
    ) |>
    ungroup()

  er_pra <- er_pra |>
    select(
      region_key,
      region_name,
      ecoregion_key,
      ecoregion_name,
      programarea_key,
      programarea_name,
      pct_programarea_in_ecoregion,
      area_km2,
      ctr_lon,
      ctr_lat
    ) |>
    arrange(region_key, ecoregion_key, programarea_key) |>
    mutate(
      area_km2 = round(area_km2, 2),
      ctr_lon = round(ctr_lon, 4),
      ctr_lat = round(ctr_lat, 4)
    )

  er_pra |>
    st_drop_geometry() |>
    write_csv(er_pra_csv, na = "")

  write_sf(er_pra, er_pra_gpkg, delete_dsn = T)
}
Warning: attribute variables are assumed to be spatially constant throughout
all geometries
Code
er_pra <- read_sf(er_pra_gpkg)

9.2 Label placement points

Code
er_pts <- er |>
  st_drop_geometry() |>
  select(
    ecoregion_key,
    ecoregion_name,
    region_name,
    region_key,
    ctr_lon,
    ctr_lat
  ) |>
  st_as_sf(coords = c("ctr_lon", "ctr_lat"), crs = 4326)
pra_pts <- pra |>
  st_drop_geometry() |>
  select(
    programarea_key,
    programarea_name,
    region_name,
    region_key,
    ctr_lon,
    ctr_lat
  ) |>
  st_as_sf(coords = c("ctr_lon", "ctr_lat"), crs = 4326)

# keep file-exists guard — label placement is manually adjusted
if (!file.exists(lbls_csv)) {
  bind_rows(
    er_pts |> st_drop_geometry() |> select(ecoregion_key),
    pra_pts |> st_drop_geometry() |> select(programarea_key)
  ) |>
    arrange(ecoregion_key, programarea_key) |>
    mutate(
      text_anchor = ifelse(!is.na(programarea_key), "center", "top"),
      text_justify = "center",
      text_offset_right = 0,
      text_offset_down = 0
    ) |>
    write_csv(lbls_csv, na = "")
}

9.3 Score tables

Code
d_pra <- read_sf(pra_gpkg) |>
  st_drop_geometry()

# get field-to-label mapping from metric table (single source of truth)
d_metric_labels <- tbl(con_sdm, "metric") |>
  filter(!is.na(metric_title)) |>
  select(metric_key, metric_title, metric_abbrev) |>
  collect()

# fields present in program area data that have labels
flds <- intersect(names(d_pra), d_metric_labels$metric_key)

# named vector: metric_key -> short name for column renaming
flds_rn <- d_metric_labels |>
  filter(metric_key %in% flds) |>
  mutate(
    short = metric_key |>
      str_replace(
        "score_extriskspcat_primprod_ecoregionrescaled_equalweights",
        "score"
      ) |>
      str_replace("_ecoregion_rescaled", "") |>
      str_replace("extrisk_", "")
  ) |>
  pull(short, name = metric_key)

# ecoregion header rows with NA for all metric columns
na_cols <- setNames(
  rep(list(NA_real_), length(flds_rn) + 1),
  c("area_km2", unname(flds_rn))
)
d_er_n <- read_csv(er_pra_csv) |>
  distinct(
    region_key,
    region_name,
    ecoregion_key,
    ecoregion_name
  ) |>
  bind_cols(as_tibble(na_cols))

d_pra_n <- read_csv(er_pra_csv) |>
  select(
    region_name,
    ecoregion_name,
    programarea_key,
    programarea_name,
    pct_programarea_in_ecoregion
  ) |>
  left_join(
    d_pra |>
      select(all_of(c("programarea_key", "area_km2", flds))) |>
      rename_with(~ flds_rn[.x], all_of(flds)),
    by = "programarea_key"
  )

d_er_pra_n <- bind_rows(
  d_er_n |> mutate(row_type = "ecoregion"),
  d_pra_n |> mutate(row_type = "programarea")
) |>
  relocate(
    row_type,
    programarea_key,
    programarea_name,
    pct_programarea_in_ecoregion,
    .after = ecoregion_name
  ) |>
  arrange(region_name, ecoregion_name, row_type, programarea_name) |>
  mutate(
    pct = ifelse(
      !is.na(pct_programarea_in_ecoregion),
      glue(" [{round(pct_programarea_in_ecoregion, 1)}%]", .trim = F),
      ""
    ),
    row_lbl = case_match(
      row_type,
      "ecoregion" ~ glue("{region_name}: {ecoregion_name} ({ecoregion_key})"),
      "programarea" ~ glue("· {programarea_name} ({programarea_key}){pct}")
    )
  ) |>
  relocate(row_lbl)
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `row_lbl = case_match(...)`.
Caused by warning:
! `case_match()` was deprecated in dplyr 1.2.0.
ℹ Please use `recode_values()` instead.
Code
d_er_pra_n |>
  select(
    row_type,
    region_name,
    ecoregion_name,
    ecoregion_key,
    programarea_name,
    programarea_key,
    pct_programarea_in_ecoregion,
    row_lbl,
    score,
    primprod,
    any_of(sp_cats),
    area_km2
  ) |>
  write_excel_csv(er_pra_n_csv)

10 Summary figures and tables

Merged from msens-summary_programareas.qmd: maps, flower plots, primary productivity charts, and Word/Excel tables.

Code
dir_create(dir_figs)

# delete figs if redo requested
if (redo_summary_figs) {
  fig_files <- dir_ls(dir_figs)
  if (length(fig_files) > 0) {
    file_delete(fig_files)
  }
}

10.1 Program Area boundary maps

Code
var_pra <- "score_extriskspcat_primprod_ecoregionrescaled_equalweights"
d_lbls <- read_csv(lbls_csv)

for (is_ak in c(TRUE, FALSE)) {
  if (is_ak) {
    pra_area <- pra |>
      filter(region_key == "AK") |>
      st_shift_longitude()
    er_area <- er |>
      filter(region_key == "AK") |>
      st_shift_longitude()

    er_pts_area <- er_pts |>
      filter(region_key == "AK") |>
      st_shift_longitude() |>
      left_join(
        d_lbls |> select(-programarea_key),
        by = "ecoregion_key"
      )
    pra_pts_area <- pra_pts |>
      filter(region_key == "AK") |>
      st_shift_longitude() |>
      left_join(
        d_lbls |> select(-ecoregion_key),
        by = "programarea_key"
      )

    lgnd_pos <- "top-left"
  } else {
    box_l48 <- pra |>
      filter(region_key != "AK") |>
      st_bbox() |>
      st_as_sfc()
    pra_area <- pra |>
      st_filter(box_l48, .predicate = st_intersects)
    er_area <- er |>
      st_filter(box_l48, .predicate = st_intersects)

    er_pts_area <- er_pts |>
      filter(ecoregion_key %in% er_area$ecoregion_key) |>
      left_join(
        d_lbls |> select(-programarea_key),
        by = "ecoregion_key"
      )
    pra_pts_area <- pra_pts |>
      filter(programarea_key %in% pra_area$programarea_key) |>
      left_join(
        d_lbls |> select(-ecoregion_key),
        by = "programarea_key"
      )

    lgnd_pos <- "bottom-left"
  }

  m <- mapboxgl(
    style = mapbox_style("dark"),
    projection = "globe"
  ) |>
    fit_bounds(bbox = er_area) |>
    msens::add_pmfill(
      url = glue("{pmtiles_base_url}/{tbl_pra_pm}.pmtiles"),
      source_layer = tbl_pra_pm,
      col_key = "programarea_key",
      d = pra_area |> sf::st_drop_geometry(),
      col_value = var_pra,
      filter_keys = pra_area$programarea_key
    )

  legend_meta <- attr(m, "legend_meta")

  m <- m |>
    msens::add_pmline(list(
      list(
        url = glue("{pmtiles_base_url}/{tbl_er_pm}.pmtiles"),
        source_layer = tbl_er_pm,
        line_color = "lightgray",
        line_width = 5,
        filter = c("in", "ecoregion_key", er_area$ecoregion_key)
      )
    )) |>
    msens::add_pmlabel(list(
      list(source = pra_pts_area, text_field = "programarea_key"),
      list(
        source = er_pts_area,
        text_field = "ecoregion_name",
        text_color = "black",
        text_halo_color = "lightgray",
        text_halo_width = 2,
        text_halo_blur = 2,
        text_line_height = 1.5,
        text_size = 20
      )
    )) |>
    mapgl::add_legend(
      "Score",
      values = legend_meta$rng,
      colors = legend_meta$colors,
      position = lgnd_pos
    ) |>
    mapgl::add_scale_control(position = "bottom-right")

  b <- glue("{dir_figs}/map_pra_{ifelse(is_ak, 'ak', 'l48')}{v_sfx}")
  b_html <- glue("{b}.html")
  b_files <- glue("{b}_files")
  b_png <- glue("{b}.png")
  saveWidget(m, b_html, selfcontained = FALSE, background = "transparent")
  webshot(
    b_html,
    b_png,
    vwidth = 1200,
    vheight = 800,
    zoom = 2,
    selector = "#htmlwidget_container",
    delay = 2,
    quiet = TRUE
  )
  file_delete(b_html)
  dir_delete(b_files)
}

10.2 Cell score & NPP maps

Code
vgpm_tif <- glue(
  "{dir_data}/raw/oregonstate.edu/vgpm.r2022.v.chl.v.sst.2160x4320_2014-2023.avg.sd_planareas.tif"
)

r_metrics <- rast(metrics_tif)
r_vgpm <- rast(vgpm_tif)
d_lbls <- read_csv(lbls_csv)

lyr_score <- "score_extriskspcat_primprod_ecoregionrescaled_equalweights"
lyr_npp <- "npp_avg"

# mask to program areas to exclude Atlantic coast
ply_mask <- vect(pra |> st_shift_longitude())
r_score <- r_metrics[[lyr_score]] |> mask(ply_mask)
r_npp <- r_vgpm |> log() |> mask(ply_mask)

n_cols <- 11
rng_r_score <- range(values(r_score, na.rm = T))
cols_r_score <- rev(RColorBrewer::brewer.pal(n_cols, "Spectral"))
brks_r_score <- seq(rng_r_score[1], rng_r_score[2], length.out = n_cols)

rng_r_npp <- range(values(r_npp, na.rm = T))
cols_r_npp <- pal_viridis()(n_cols)
brks_r_npp <- seq(rng_r_npp[1], rng_r_npp[2], length.out = n_cols)

for (is_vgpm in c(T, F)) {
  # npp: is_vgpm = T # score: is_vgpm = F
  if (is_vgpm) {
    r <- r_npp
    rng_r <- round(rng_r_npp, 2)
    cols_r <- cols_r_npp
    brks_r <- brks_r_npp
    lyr <- lyr_npp
    title <- "Primary Productivity (log(NPP))"
  } else {
    r <- r_score
    rng_r <- rng_r_score
    cols_r <- cols_r_score
    brks_r <- brks_r_score
    lyr <- lyr_score
    title <- "Score"
  }

  for (is_ak in c(T, F)) {
    # is_ak = T
    if (is_ak) {
      rgn_mask <- pra |>
        filter(region_key == "AK") |>
        st_shift_longitude() |>
        vect()

      r_area <- r |>
        mask(rgn_mask) |>
        trim()

      er_area <- er |>
        filter(region_key == "AK") |>
        st_shift_longitude()

      pra_area <- pra |>
        filter(region_key == "AK") |>
        st_shift_longitude()

      er_pts_area <- er_pts |>
        filter(region_key == "AK") |>
        st_shift_longitude() |>
        left_join(
          d_lbls |> select(-programarea_key),
          by = "ecoregion_key"
        )
      pra_pts_area <- pra_pts |>
        filter(region_key == "AK") |>
        st_shift_longitude() |>
        left_join(
          d_lbls |> select(-ecoregion_key),
          by = "programarea_key"
        )

      lgnd_pos <- "top-left"
    } else {
      rgn_mask <- pra |>
        filter(region_key != "AK") |>
        st_shift_longitude() |>
        vect()

      r_area <- r |>
        mask(rgn_mask) |>
        trim()

      box_lower48 <- pra |>
        filter(region_key != "AK") |>
        st_bbox() |>
        st_as_sfc()

      er_area <- er |>
        st_filter(box_lower48, .predicate = st_intersects)
      pra_area <- pra |>
        st_filter(box_lower48, .predicate = st_intersects)

      er_pts_area <- er_pts |>
        filter(ecoregion_key %in% er_area$ecoregion_key) |>
        left_join(
          d_lbls |> select(-programarea_key),
          by = "ecoregion_key"
        )
      pra_pts_area <- pra_pts |>
        filter(programarea_key %in% pra_area$programarea_key) |>
        left_join(
          d_lbls |> select(-ecoregion_key),
          by = "programarea_key"
        )

      lgnd_pos <- "bottom-left"
    }
    scale_pos <- "bottom-right"

    er_filter <- c("in", "ecoregion_key", er_area$ecoregion_key)
    pra_filter <- c("in", "programarea_key", pra_area$programarea_key)

    m <- msens::map_cells(
      r = r_area,
      colors = cols_r,
      bounds = er_area,
      raster_opacity = 0.9,
      legend_title = title,
      legend_position = lgnd_pos,
      legend_values = rng_r,
      pmtiles_outlines = list(
        list(
          url = glue("{pmtiles_base_url}/{tbl_er_pm}.pmtiles"),
          source_layer = tbl_er_pm,
          line_color = "lightgray",
          line_width = 5,
          filter = er_filter
        ),
        list(
          url = glue("{pmtiles_base_url}/{tbl_pra_pm}.pmtiles"),
          source_layer = tbl_pra_pm,
          line_color = "black",
          line_width = 1,
          filter = pra_filter
        )
      ),
      labels = list(
        list(source = pra_pts_area, text_field = "programarea_key"),
        list(
          source = er_pts_area,
          text_field = "ecoregion_name",
          text_color = "black",
          text_halo_color = "lightgray",
          text_halo_width = 2,
          text_halo_blur = 2,
          text_line_height = 1.5,
          text_size = 20
        )
      )
    )

    b <- glue(
      "{dir_figs}/map_cell_{ifelse(is_vgpm, 'npp', 'score')}_{ifelse(is_ak, 'ak', 'l48')}{v_sfx}"
    )
    b_html <- glue("{b}.html")
    b_files <- glue("{b}_files")
    b_png <- glue("{b}.png")
    saveWidget(m, b_html, selfcontained = F, background = "transparent")
    webshot(
      b_html,
      b_png,
      vwidth = 1200,
      vheight = 800,
      zoom = 2,
      selector = "#htmlwidget_container",
      delay = 2,
      quiet = T
    )
    file_delete(b_html)
    dir_delete(b_files)
  }
}

10.3 Ecoregion × Program Area table (docx)

Code
tbl_docx <- glue("{dir_figs}/table_ecoregion_programarea_scores{v_sfx}.docx")

set_flextable_defaults(
  font.family = "Arial",
  font.size = 10,
  border.color = "gray",
  big.mark = ","
)

# build rename vector from metric table: metric_abbrev -> short_name
abbrev_rn <- d_metric_labels |>
  filter(metric_key %in% flds) |>
  mutate(short = flds_rn[metric_key]) |>
  filter(short != "score") |>
  pull(short, name = metric_abbrev)

d_er_pra_n |>
  mutate(
    area_kkm2 = area_km2 / 1000
  ) |>
  mutate(across(where(is.numeric), round)) |>
  mutate(
    row_lbl = ifelse(
      row_type == "ecoregion",
      glue("**{row_lbl}**"),
      row_lbl
    )
  ) |>
  select(
    Area = row_lbl,
    Score = score,
    all_of(abbrev_rn),
    `Area (1,000 km^2^)` = area_kkm2
  ) |>
  flextable() |>
  ftExtra::colformat_md(part = "all") |>
  theme_vanilla() |>
  save_as_docx(path = tbl_docx)

# browseURL(tbl_docx)

10.4 Program Area table (docx + xlsx)

Code
tbl_docx <- glue("{dir_figs}/table_programarea_scores{v_sfx}.docx")
tbl_xlsx <- glue("{dir_figs}/table_programarea_scores{v_sfx}.xlsx")

set_flextable_defaults(
  font.family = "Arial",
  font.size = 10,
  border.color = "gray",
  big.mark = ","
)

# build rename vector: metric_title -> short_name
title_rn <- d_metric_labels |>
  filter(metric_key %in% flds) |>
  mutate(short = flds_rn[metric_key]) |>
  filter(short != "score") |>
  pull(short, name = metric_title)

d_pra_only <- d_er_pra_n |>
  filter(row_type == "programarea") |>
  arrange(desc(score)) |>
  select(
    -row_type,
    -ecoregion_name,
    -ecoregion_key,
    -pct_programarea_in_ecoregion
  ) |>
  mutate(
    area_kkm2 = area_km2 / 1000
  ) |>
  mutate(across(where(is.numeric), round)) |>
  mutate(
    row_lbl = glue("{programarea_name} ({programarea_key})")
  ) |>
  select(
    row_lbl,
    score,
    all_of(unname(title_rn)),
    area_kkm2
  ) |>
  distinct() |>
  select(
    `Program Area` = row_lbl,
    Score = score,
    all_of(title_rn),
    `Area (1,000 km^2^)` = area_kkm2
  )

write_excel_csv(d_pra_only, pra_n_csv, na = "")

d_pra_only |>
  flextable() |>
  ftExtra::colformat_md(part = "all") |>
  theme_vanilla() |>
  save_as_docx(path = tbl_docx)

# xlsx version (plain column names without superscript)
d_pra_only |>
  rename(`Area (1,000 km2)` = `Area (1,000 km^2^)`) |>
  openxlsx::write.xlsx(tbl_xlsx)

# browseURL(tbl_docx)

10.5 Compare scores with previous version

Code
# compare pra scores between two versions ----
# returns list with long (d_comp), wide diff (d_comp_wide), and
# wide prev/curr/diff (d_comp_pvt) data frames; NULL if files missing
compare_scores <- function(ver_curr, ver_prev) {
  csv_curr <- glue("{dir_derived}/{ver_curr}/tbl_pra_scores_{ver_curr}.csv")
  csv_prev <- glue("{dir_derived}/{ver_prev}/tbl_pra_scores_{ver_prev}.csv")

  if (!file_exists(csv_prev) || !file_exists(csv_curr)) {
    message(glue(
      "Skipping {ver_curr} vs {ver_prev}: ",
      "prev={file_exists(csv_prev)}, curr={file_exists(csv_curr)}"
    ))
    return(NULL)
  }

  d_prev <- read_csv(csv_prev, show_col_types = FALSE) |>
    rename(`Area (1,000 km2)` = `Area (1,000 km^2^)`)
  d_curr <- read_csv(csv_curr, show_col_types = FALSE) |>
    rename(`Area (1,000 km2)` = `Area (1,000 km^2^)`)

  # component columns (exclude Program Area and Area)
  comp_cols <- intersect(
    setdiff(names(d_prev), c("Program Area", "Area (1,000 km2)")),
    setdiff(names(d_curr), c("Program Area", "Area (1,000 km2)"))
  )

  # use version names for value columns (e.g., Score_v4b, Score_v4)
  v_c <- ver_curr
  v_p <- ver_prev

  d_comp <- d_prev |>
    select(`Program Area`, all_of(comp_cols)) |>
    pivot_longer(
      cols = all_of(comp_cols),
      names_to = "Component",
      values_to = v_p
    ) |>
    full_join(
      d_curr |>
        select(`Program Area`, all_of(comp_cols)) |>
        pivot_longer(
          cols = all_of(comp_cols),
          names_to = "Component",
          values_to = v_c
        ),
      by = c("Program Area", "Component")
    ) |>
    mutate(
      diff = .data[[v_c]] - .data[[v_p]]
    ) |>
    arrange(Component, `Program Area`)

  comp_csv <- glue("{dir_v}/tbl_pra_scores_{ver_curr}_vs_{ver_prev}.csv")
  write_csv(d_comp, comp_csv)
  message(glue("Wrote score comparison to {comp_csv}"))

  # wide diff table
  d_comp_wide <- d_comp |>
    select(`Program Area`, Component, diff) |>
    pivot_wider(
      names_from = Component,
      values_from = diff
    ) |>
    arrange(`Program Area`)

  # wide prev/curr/diff pivot (like Excel comparison) ----
  # only rows where Score differs; columns: Score_v4b, Score_v4, Score_diff, ...
  pra_diff <- d_comp |>
    filter(Component == "Score", diff != 0) |>
    pull(`Program Area`)

  score_diff_col <- glue("Score_diff")

  if (length(pra_diff) > 0) {
    d_comp_pvt <- d_comp |>
      filter(`Program Area` %in% pra_diff) |>
      pivot_wider(
        names_from = Component,
        values_from = c(all_of(v_p), all_of(v_c), diff),
        names_glue = "{Component}_{.value}"
      ) |>
      # rename ver columns: Score_v4 / Score_v4b / Score_diff
      rename_with(
        ~ str_replace(.x, glue("_{v_p}$"), glue("_{v_p}")),
        ends_with(glue("_{v_p}"))
      ) |>
      rename_with(
        ~ str_replace(.x, glue("_{v_c}$"), glue("_{v_c}")),
        ends_with(glue("_{v_c}"))
      ) |>
      select(
        `Program Area`,
        starts_with("Score_"),
        !starts_with("Score_")
      ) |>
      arrange(desc(.data[[score_diff_col]]))

    pvt_xlsx <- glue(
      "{dir_v}/tbl_pra_scores_{ver_curr}_vs_{ver_prev}_pivot.xlsx"
    )
    write_xlsx(d_comp_pvt, pvt_xlsx)
    message(glue("Wrote pivot comparison to {pvt_xlsx}"))
  } else {
    d_comp_pvt <- NULL
    message(glue("No score differences between {ver_curr} and {ver_prev}"))
  }

  list(
    d_comp = d_comp,
    d_comp_wide = d_comp_wide,
    d_comp_pvt = d_comp_pvt,
    comp_cols = comp_cols,
    ver_curr = ver_curr,
    ver_prev = ver_prev
  )
}

# compare ver vs ver_prev (v4b vs v4) ----
res <- compare_scores(ver, ver_prev)
Wrote score comparison to ~/My Drive/projects/msens/data/derived/v4b/tbl_pra_scores_v4b_vs_v4.csv
Wrote pivot comparison to ~/My Drive/projects/msens/data/derived/v4b/tbl_pra_scores_v4b_vs_v4_pivot.xlsx
Code
if (!is.null(res)) {
  res$d_comp_wide |>
    DT::datatable(
      caption = glue("Score differences ({res$ver_curr} - {res$ver_prev})"),
      options = list(pageLength = 25, dom = "t")
    ) |>
    DT::formatRound(columns = res$comp_cols, digits = 0)
}
Code
# pivot table: rows where scores differ (ver vs ver_prev) ----
if (!is.null(res) && !is.null(res$d_comp_pvt)) {
  res$d_comp_pvt |>
    DT::datatable(
      caption = glue(
        "Score pivot where different ({res$ver_curr} vs {res$ver_prev})"
      ),
      options = list(pageLength = 25, dom = "t", scrollX = TRUE)
    ) |>
    DT::formatRound(
      columns = setdiff(names(res$d_comp_pvt), "Program Area"),
      digits = 0
    )
}
Code
# compare ver vs v3 ----
res_v3 <- compare_scores(ver, "v3")
Wrote score comparison to ~/My Drive/projects/msens/data/derived/v4b/tbl_pra_scores_v4b_vs_v3.csv
Wrote pivot comparison to ~/My Drive/projects/msens/data/derived/v4b/tbl_pra_scores_v4b_vs_v3_pivot.xlsx
Code
if (!is.null(res_v3)) {
  res_v3$d_comp_wide |>
    DT::datatable(
      caption = glue(
        "Score differences ({res_v3$ver_curr} - {res_v3$ver_prev})"
      ),
      options = list(pageLength = 25, dom = "t")
    ) |>
    DT::formatRound(columns = res_v3$comp_cols, digits = 0)
}
Code
# pivot table: rows where scores differ (ver vs v3) ----
if (!is.null(res_v3) && !is.null(res_v3$d_comp_pvt)) {
  res_v3$d_comp_pvt |>
    DT::datatable(
      caption = glue(
        "Score pivot where different ({res_v3$ver_curr} vs {res_v3$ver_prev})"
      ),
      options = list(pageLength = 25, dom = "t", scrollX = TRUE)
    ) |>
    DT::formatRound(
      columns = setdiff(names(res_v3$d_comp_pvt), "Program Area"),
      digits = 0
    )
}

10.6 Program Area flower plots

Code
# pra list from gpkg (previously from PostGIS)
pra_all <- read_sf(pra_gpkg) |>
  st_drop_geometry() |>
  select(programarea_key, programarea_name)

# components
metric_input_keys <- c(
  glue("extrisk_{sp_cats}_ecoregion_rescaled"),
  "primprod_ecoregion_rescaled"
)
m_input_seqs <- tbl(con_sdm, "metric") |>
  filter(metric_key %in% metric_input_keys) |>
  pull(metric_seq)

# build components from metric table
components <- d_metric_labels |>
  filter(metric_key %in% flds, metric_title != "Score") |>
  pull(metric_title)
component_cols <- setNames(
  hue_pal()(length(components)),
  components
)

# d_scores
m_score <- "score_extriskspcat_primprod_ecoregionrescaled_equalweights"
m_key_score <- tbl(con_sdm, "metric") |>
  filter(metric_key == !!m_score) |>
  pull(metric_seq)

d_scores <- tbl(con_sdm, "zone") |>
  filter(
    tbl == !!tbl_pra,
    fld == "programarea_key",
    value %in% pra_all$programarea_key
  ) |>
  select(programarea_key = value, zone_seq) |>
  inner_join(
    tbl(con_sdm, "zone_metric") |>
      filter(metric_seq == !!m_key_score),
    by = join_by(zone_seq)
  ) |>
  select(programarea_key, score = value) |>
  mutate(score = round(score)) |>
  collect() |>
  arrange(desc(score), programarea_key) |>
  left_join(
    pra_all,
    by = "programarea_key"
  )

d_scores$programarea_lbl <- factor(
  str_wrap(d_scores$programarea_name, width = 20),
  levels = str_wrap(d_scores$programarea_name, width = 20),
  ordered = T
)

# d_fl
d_fl <- tbl(con_sdm, "zone") |>
  filter(
    tbl == !!tbl_pra,
    fld == "programarea_key",
    value %in% pra_all$programarea_key
  ) |>
  select(programarea_key = value, zone_seq) |>
  inner_join(
    tbl(con_sdm, "zone_metric"),
    by = "zone_seq"
  ) |>
  inner_join(
    tbl(con_sdm, "metric") |>
      filter(metric_seq %in% m_input_seqs) |>
      select(metric_seq, metric_key),
    by = "metric_seq"
  ) |>
  select(programarea_key, metric_key, value) |>
  arrange(programarea_key, metric_key) |>
  collect() |>
  mutate(
    component = metric_key |>
      str_replace("extrisk_", "") |>
      str_replace("_ecoregion_rescaled", "") |>
      str_replace("_", " ")
  ) |>
  select(programarea_key, component, value) |>
  arrange(programarea_key, component, value)

# add missing components with value 0
d_0 <- expand_grid(
  programarea_key = d_scores$programarea_key,
  component = sort(unique(d_fl$component)),
  value = 0
) |>
  anti_join(
    d_fl,
    by = join_by(programarea_key, component)
  )

d_fl <- d_fl |>
  bind_rows(d_0) |>
  arrange(programarea_key, component) |>
  mutate(even = 1)

# build component display name lookup from metric table
component_labels <- d_metric_labels |>
  filter(metric_key %in% flds, metric_title != "Score") |>
  mutate(
    component = metric_key |>
      str_replace("extrisk_", "") |>
      str_replace("_ecoregion_rescaled", "") |>
      str_replace("_", " ")
  ) |>
  pull(metric_title, name = component)

d_fl <- d_fl |>
  left_join(
    d_scores |> select(programarea_key, programarea_lbl),
    by = "programarea_key"
  ) |>
  mutate(
    component = component_labels[component]
  ) |>
  arrange(programarea_lbl, component)

# all flowers
d <- d_fl |>
  arrange(programarea_lbl, component) |>
  group_by(programarea_lbl) |>
  mutate(across(!where(is.character), as.double)) |>
  mutate(
    ymax = cumsum(even),
    ymin = lag(ymax, default = 0),
    xmax = value,
    xmin = 0
  )

for (theme_name in c("minimal", "linedraw", "min_line", "dark", "void")) {
  theme_fxn <- switch(
    theme_name,
    "minimal" = theme_minimal,
    "linedraw" = theme_linedraw,
    "min_line" = function(...) {
      theme_minimal(...) %+replace%
        theme(
          panel.grid = element_line(colour = "black"),
          panel.grid.major = element_line(linewidth = rel(0.1)),
          panel.grid.minor = element_line(linewidth = rel(0.05))
        )
    },
    "dark" = theme_dark,
    "void" = theme_void
  )

  g <- d |>
    ggplot() +
    geom_rect_interactive(
      aes(
        xmin = xmin,
        xmax = xmax,
        ymin = ymin,
        ymax = ymax,
        fill = component,
        color = "white",
        data_id = component
      ),
      color = "white",
      alpha = 1
    ) +
    scale_fill_manual(
      values = component_cols,
      name = "Component"
    ) +
    facet_wrap(~programarea_lbl) +
    coord_polar(theta = "y") +
    xlim(c(-20, max(d_scores$score, d$xmax))) +
    annotate(
      "rect",
      xmin = -Inf,
      xmax = 0,
      ymin = -Inf,
      ymax = Inf,
      fill = ifelse(theme_name == "dark", "grey20", "white"),
      color = NA
    ) +
    geom_text(
      aes(x, y, label = score),
      data = d_scores |> mutate(x = -20, y = 0),
      size = 2.5,
      fontface = "bold",
      color = ifelse(theme_name == "dark", "white", "black")
    ) +
    theme_fxn() +
    theme(
      legend.position = "bottom",
      legend.title = element_text(size = 8),
      legend.text = element_text(size = 6),
      plot.margin = unit(c(1, 1, 1, 1), "pt"),
      strip.text.x = element_text(size = 6),
      axis.title.x = element_blank(),
      axis.text.x = element_blank(),
      axis.ticks.x = element_blank(),
      axis.title.y = element_blank(),
      axis.text.y = element_blank(),
      axis.ticks.y = element_blank()
    )

  pra_fl_png <- glue(
    "{dir_figs}/programarea_flower-plot_scores_{theme_name}{v_sfx}.png"
  )
  pra_fl_pdf <- glue(
    "{dir_figs}/programarea_flower-plot_scores_{theme_name}{v_sfx}.pdf"
  )
  png(pra_fl_png, width = 1200, height = 800, res = 150)
  print(g)
  dev.off()
  pdf(pra_fl_pdf, width = 7, height = 5)
  print(g)
  dev.off()
}

10.7 Primary productivity by Program Area

Code
# zonal NPP from VGPM rasters, adapted from ingest_productivity.qmd pa_zonal_plot
dir_vgpm_yrs <- glue(
  "{dir_data}/raw/oregonstate.edu/vgpm.r2022.v.chl.v.sst.2160x4320"
)

pra <- read_sf(pra_gpkg)

r_yrs <- dir_ls(dir_vgpm_yrs, glob = "*.tif") |>
  rast()
names(r_yrs) <- glue("npp_{time(r_yrs)}")

d_pra_npp <- r_yrs |>
  zonal(
    pra |>
      select(programarea_key, programarea_name) |>
      vect(),
    fun = "mean",
    na.rm = T,
    as.polygons = T
  ) |>
  st_as_sf() |>
  st_drop_geometry() |>
  tibble() |>
  pivot_longer(
    cols = starts_with("npp_"),
    names_to = "year",
    values_to = "npp"
  ) |>
  filter(!is.na(npp)) |>
  mutate(
    year = str_replace(year, "npp_", "") |> as.integer(),
    npp = set_units(npp, mg / m^2 / day) |>
      set_units(t / km^2 / yr)
  ) |>
  group_by(programarea_key, programarea_name) |>
  summarize(
    npp_avg = mean(npp, na.rm = T),
    npp_sd = sd(npp, na.rm = T),
    .groups = "drop"
  ) |>
  arrange(desc(npp_avg)) |>
  mutate(
    npp_avg = npp_avg |> drop_units(),
    npp_sd = npp_sd
  ) |>
  mutate(
    programarea_name = factor(programarea_name, levels = programarea_name)
  )

write_csv(d_pra_npp, pp_csv)

g <- d_pra_npp |>
  ggplot(aes(x = programarea_name, y = npp_avg)) +
  geom_bar(
    position = position_dodge(),
    stat = "identity",
    fill = "darkgreen"
  ) +
  geom_errorbar(
    aes(ymin = npp_avg - npp_sd, ymax = npp_avg + npp_sd),
    width = .4
  ) +
  labs(
    title = NULL,
    x = NULL,
    y = expression("Areal NPP (metric tons C km"^-2 ~ "yr"^-1 * ")")
  ) +
  scale_y_continuous(expand = c(0, 20)) +
  theme(
    axis.text.x = element_text(
      angle = 45,
      vjust = 1,
      hjust = 1
    )
  )

png(pp_png, width = 1200, height = 800, res = 150)
print(g)
dev.off()
quartz_off_screen 
                2 
Code
pdf(pp_pdf, width = 7, height = 5)
print(g)
dev.off()
quartz_off_screen 
                2 

10.8 Copy figures to dir_v

Code
# data files are already written directly to dir_v;
# copy rendered figures from local figs/ into dir_v
fig_files <- dir_ls(dir_figs, regexp = "\\.(png|pdf|docx|xlsx|csv)$")
for (f in fig_files) {
  file_copy(f, glue("{dir_v}/{path_file(f)}"), overwrite = T)
}

message(glue("Copied {length(fig_files)} figure files to {dir_v}"))
Copied 19 figure files to ~/My Drive/projects/msens/data/derived/v4b

11 Generate README

Programmatically generate README.md and README.html for the versioned output directory. Uses data/derived_products.csv for file descriptions.

Code
dp_csv <- here("data/derived_products.csv")
stopifnot(file_exists(dp_csv))

# read and expand {ver} placeholders in output_file and dependencies
d_dp <- read_csv(dp_csv) |>
  rowwise() |>
  mutate(
    output_file = glue(output_file),
    dependencies = glue(dependencies)
  ) |>
  ungroup()

# list actual files in dir_v (non-recursive, exclude README itself)
v_files <- dir_ls(dir_v, recurse = F) |>
  path_file() |>
  str_subset("^README", negate = T) |>
  # exclude ~$ temp files from Word/Excel
  str_subset("^~\\$", negate = T)

# match files to descriptions
d_files <- tibble(file = v_files) |>
  left_join(
    d_dp |> select(file = output_file, description),
    by = "file"
  ) |>
  mutate(
    description = ifelse(is.na(description), "", description)
  ) |>
  arrange(file)

lns_files <- d_files |>
  knitr::kable(format = "pipe") |>
  paste(collapse = '\n')

# build README.md content
readme_lines <- c(
  glue(
    "
    ---
    format:
      html:
        embed-resources: true
    ---
    
    # Derived Products — {ver}
     
    Version: `{v_sfx}` | Generated: {Sys.Date()}
    
    ## File listing

    {lns_files}

    ## Naming conventions

    - `ply_*`: polygon spatial data (GeoPackage)
    - `r_*`: raster data (GeoTIFF)
    - `tbl_*`: tabular summary data (CSV) 
    - `layers_*`: layer metadata for metrics and species
    - `zone_taxon_*`: zone × taxon matrix
    - `*_{ver}`: version {ver} model outputs
    
    ## Large files
    
    Some files exceed GitHub size limits and are stored in the 
    `big-files/{ver}/` directory (not tracked by git). 
    See `sdm_parquet/` for parquet exports.
    
    _Auto-generated by `calc_scores.qmd` on {Sys.time()}_"
  )
)

readme_md <- glue("{dir_v}/README.md")
writeLines(readme_lines, readme_md)
message(glue("Wrote {readme_md}"))
Wrote ~/My Drive/projects/msens/data/derived/v4b/README.md
Code
# render to HTML
quarto_render(readme_md |> path_real())
pandoc 
  to: html
  output-file: README.html
  standalone: true
  embed-resources: true
  section-divs: true
  html-math-method: mathjax
  wrap: none
  default-image-extension: png
  variables: {}
  
metadata
  document-css: false
  link-citations: true
  date-format: long
  lang: en
  
Output created: README.html
Code
message(glue("Rendered {readme_md} to HTML"))
Rendered ~/My Drive/projects/msens/data/derived/v4b/README.md to HTML

12 Map score cells

Code
librarian::shelf(mapgl)

# load global raster template
# r_global <- readRDS(glue("{dir_data}/derived/r_global_0.05.rds"))
cell_tif <- glue("{dir_data}/derived/r_bio-oracle_planarea.tif")

r_cell <- rast(cell_tif)

# d_metric <- tbl(con_sdm, "metric") |> collect()
# View(d_metric)
# TODO: add sequences for date_created post-export-parquet

# get species richness metric ID
# m_key <- "extrisk_all_ecoregion_rescaled"
m_key <- "score_extriskspcat_primprod_ecoregionrescaled_equalweights"
m_seq <- tbl(con_sdm, "metric") |>
  filter(metric_key == !!m_key) |>
  pull(metric_seq)

# query species richness for a region
d_m <- dbGetQuery(
  con_sdm,
  glue(
    "
  SELECT
    cm.cell_id,
    cm.value
  FROM cell_metric cm
  WHERE cm.metric_seq = (
    SELECT metric_seq
    FROM metric
    WHERE metric_key = '{m_key}' )"
  )
) |>
  tibble()
stopifnot(sum(duplicated(d_m$cell_id)) == 0)

# convert to raster
r_m <- create_raster_from_df(d_m, r_cell, value_col = "value")

# display with mapgl
# TODO: make into function in msens R package
mapgl_rast <- function(
  r,
  cols_r = rev(RColorBrewer::brewer.pal(11, "Spectral")),
  layer_name = "SDM"
) {
  # r = r_m; cols_r = cols_r; layer_name = m_key

  rng_r <- minmax(r) |> as.numeric() |> signif(digits = 2)

  mapboxgl(
    style = mapbox_style("dark"),
    zoom = 3.5,
    center = c(-106, 40.1)
  ) |> # ply_pa centroid
    add_image_source(
      id = "r_src",
      data = r,
      colors = cols_r
    ) |>
    add_raster_layer(
      id = "r_lyr",
      source = "r_src",
      raster_resampling = "nearest"
    ) |>
    mapgl::add_legend(
      layer_name,
      values = rng_r,
      colors = cols_r,
      position = "bottom-right"
    ) |>
    add_fullscreen_control(position = "top-left") |>
    add_navigation_control() |>
    add_scale_control()
}

# visualize
mapgl_rast(r_m, layer_name = m_key)

13 Parquet export/import

This cleans up the DuckDB database by exporting tables to Parquet files (with sort order for faster reads) and then re-importing them to a new DuckDB database. This also allows us to remove indexes from the DuckDB database, which are not needed for the mapgl app and can slow down writes.

13.1 Export to Parquet

Code
# TODO: model_cell: PARTITION BY (mdl_seq);") # https://duckdb.org/docs/stable/data/parquet/overview

source(here("libs/sdm_functions.R"))
con_sdm <- dbConnect(duckdb(), dbdir = sdm_db, read_only = F) |>
  load_duckdb_extensions()

# dbListTables(con_sdm)
dbExecute(con_sdm, "SET threads = 7;")
[1] 0
Code
dbExecute(con_sdm, "SET memory_limit = '20GB';")
[1] 0
Code
dir_pq <- normalizePath(dir_pq)
if (dir_exists(dir_pq)) {
  dir_delete(dir_pq)
}
dir_create(dir_pq)

# OLD: add primary keys, indexes
# NEW: skip indexes, sort on export, per [performance](https://duckdb.org/docs/stable/guides/performance/indexing.html)

# dbRemoveTable(con_sdm, "cell_model", force = T)
tbl_keys <- list(
  cell = "cell_id",
  cell_metric = c("cell_id", "metric_seq"),
  # cell_model  = c("cell_id", "mdl_seq"),   # source: model_cell; alternative key sorting order
  dataset = "ds_key",
  listing = "spp_id",
  metric = "metric_seq",
  model = "mdl_seq",
  model_cell = c("mdl_seq", "cell_id"), # *big* one
  species = "sp_seq",
  taxon = "taxon_id",
  taxon_model = c("taxon_id", "ds_key"),
  zone = "zone_seq",
  zone_cell = c("zone_seq", "cell_id"),
  zone_metric = c("zone_seq", "metric_seq"),
  zone_taxon = c("zone_fld", "zone_value", "mdl_seq", "taxon_id")
)
stopifnot(length(setdiff(names(tbl_keys), dbListTables(con_sdm))) == 0)
stopifnot(length(setdiff(dbListTables(con_sdm), names(tbl_keys))) == 0)

# export parquet files with sort order
for (tbl in names(tbl_keys)) {
  # tbl = names(tbl_keys)[1]  # tbl = "cell_model"
  keys <- tbl_keys[[tbl]]
  keys_str <- paste(keys, collapse = ', ')
  pq_file <- glue("{dir_pq}/{tbl}.parquet")

  message(glue(
    "Exporting {tbl} (ORDER BY {keys_str}) to parquet  ~  {Sys.time()}"
  ))

  fro_tbl <- tbl
  if (tbl == "cell_model") {
    fro_tbl <- "model_cell"
  }
  stopifnot(all(keys %in% dbListFields(con_sdm, fro_tbl)))

  if (tbl %in% c("cell_model", "model_cell")) {
    flds <- dbListFields(con_sdm, fro_tbl) |>
      setdiff(keys)
    flds_str <- paste(flds, collapse = '\", \"')
    sql <- glue(
      "COPY (
      SELECT {keys_str}, {flds_str} FROM {fro_tbl} 
      ORDER BY {keys_str}) 
      TO '{pq_file}' 
    (FORMAT parquet, PER_THREAD_OUTPUT, COMPRESSION zstd) "
    )
  } else {
    sql <- glue(
      "COPY (
      SELECT * FROM {fro_tbl} 
      ORDER BY {keys_str}) 
      TO '{pq_file}' 
    (FORMAT parquet, PER_THREAD_OUTPUT, COMPRESSION zstd) "
    )
  }
  message(sql)
  dbExecute(con_sdm, sql)
}
Exporting cell (ORDER BY cell_id) to parquet  ~  2026-03-17 17:21:19.672908
COPY (
  SELECT * FROM cell 
  ORDER BY cell_id) 
  TO '/Users/bbest/_big/msens/derived/v4b/sdm_parquet/cell.parquet' 
(FORMAT parquet, PER_THREAD_OUTPUT, COMPRESSION zstd) 
Exporting cell_metric (ORDER BY cell_id, metric_seq) to parquet  ~  2026-03-17 17:21:19.841112
COPY (
  SELECT * FROM cell_metric 
  ORDER BY cell_id, metric_seq) 
  TO '/Users/bbest/_big/msens/derived/v4b/sdm_parquet/cell_metric.parquet' 
(FORMAT parquet, PER_THREAD_OUTPUT, COMPRESSION zstd) 
Exporting dataset (ORDER BY ds_key) to parquet  ~  2026-03-17 17:21:20.567691
COPY (
  SELECT * FROM dataset 
  ORDER BY ds_key) 
  TO '/Users/bbest/_big/msens/derived/v4b/sdm_parquet/dataset.parquet' 
(FORMAT parquet, PER_THREAD_OUTPUT, COMPRESSION zstd) 
Exporting listing (ORDER BY spp_id) to parquet  ~  2026-03-17 17:21:20.574428
COPY (
  SELECT * FROM listing 
  ORDER BY spp_id) 
  TO '/Users/bbest/_big/msens/derived/v4b/sdm_parquet/listing.parquet' 
(FORMAT parquet, PER_THREAD_OUTPUT, COMPRESSION zstd) 
Exporting metric (ORDER BY metric_seq) to parquet  ~  2026-03-17 17:21:20.578599
COPY (
  SELECT * FROM metric 
  ORDER BY metric_seq) 
  TO '/Users/bbest/_big/msens/derived/v4b/sdm_parquet/metric.parquet' 
(FORMAT parquet, PER_THREAD_OUTPUT, COMPRESSION zstd) 
Exporting model (ORDER BY mdl_seq) to parquet  ~  2026-03-17 17:21:20.58136
COPY (
  SELECT * FROM model 
  ORDER BY mdl_seq) 
  TO '/Users/bbest/_big/msens/derived/v4b/sdm_parquet/model.parquet' 
(FORMAT parquet, PER_THREAD_OUTPUT, COMPRESSION zstd) 
Exporting model_cell (ORDER BY mdl_seq, cell_id) to parquet  ~  2026-03-17 17:21:20.599319
COPY (
  SELECT mdl_seq, cell_id, value FROM model_cell 
  ORDER BY mdl_seq, cell_id) 
  TO '/Users/bbest/_big/msens/derived/v4b/sdm_parquet/model_cell.parquet' 
(FORMAT parquet, PER_THREAD_OUTPUT, COMPRESSION zstd) 
Exporting species (ORDER BY sp_seq) to parquet  ~  2026-03-17 17:22:52.945571
COPY (
  SELECT * FROM species 
  ORDER BY sp_seq) 
  TO '/Users/bbest/_big/msens/derived/v4b/sdm_parquet/species.parquet' 
(FORMAT parquet, PER_THREAD_OUTPUT, COMPRESSION zstd) 
Exporting taxon (ORDER BY taxon_id) to parquet  ~  2026-03-17 17:22:53.085109
COPY (
  SELECT * FROM taxon 
  ORDER BY taxon_id) 
  TO '/Users/bbest/_big/msens/derived/v4b/sdm_parquet/taxon.parquet' 
(FORMAT parquet, PER_THREAD_OUTPUT, COMPRESSION zstd) 
Exporting taxon_model (ORDER BY taxon_id, ds_key) to parquet  ~  2026-03-17 17:22:53.127844
COPY (
  SELECT * FROM taxon_model 
  ORDER BY taxon_id, ds_key) 
  TO '/Users/bbest/_big/msens/derived/v4b/sdm_parquet/taxon_model.parquet' 
(FORMAT parquet, PER_THREAD_OUTPUT, COMPRESSION zstd) 
Exporting zone (ORDER BY zone_seq) to parquet  ~  2026-03-17 17:22:53.151243
COPY (
  SELECT * FROM zone 
  ORDER BY zone_seq) 
  TO '/Users/bbest/_big/msens/derived/v4b/sdm_parquet/zone.parquet' 
(FORMAT parquet, PER_THREAD_OUTPUT, COMPRESSION zstd) 
Exporting zone_cell (ORDER BY zone_seq, cell_id) to parquet  ~  2026-03-17 17:22:53.166535
COPY (
  SELECT * FROM zone_cell 
  ORDER BY zone_seq, cell_id) 
  TO '/Users/bbest/_big/msens/derived/v4b/sdm_parquet/zone_cell.parquet' 
(FORMAT parquet, PER_THREAD_OUTPUT, COMPRESSION zstd) 
Exporting zone_metric (ORDER BY zone_seq, metric_seq) to parquet  ~  2026-03-17 17:22:53.277427
COPY (
  SELECT * FROM zone_metric 
  ORDER BY zone_seq, metric_seq) 
  TO '/Users/bbest/_big/msens/derived/v4b/sdm_parquet/zone_metric.parquet' 
(FORMAT parquet, PER_THREAD_OUTPUT, COMPRESSION zstd) 
Exporting zone_taxon (ORDER BY zone_fld, zone_value, mdl_seq, taxon_id) to parquet  ~  2026-03-17 17:22:53.291588
COPY (
  SELECT * FROM zone_taxon 
  ORDER BY zone_fld, zone_value, mdl_seq, taxon_id) 
  TO '/Users/bbest/_big/msens/derived/v4b/sdm_parquet/zone_taxon.parquet' 
(FORMAT parquet, PER_THREAD_OUTPUT, COMPRESSION zstd) 
Code
message(glue("Exporting finished  ~  {Sys.time()}")) # ~ 1 min
Exporting finished  ~  2026-03-17 17:22:53.414962
Code
dbDisconnect(con_sdm, shutdown = T)
rm(con_sdm)

13.2 Import from Parquet

Code
# dir_pq and sdm_db defined in setup chunk

# close any existing connection and force GC to clean up orphaned drivers
if (exists("con_sdm") && dbIsValid(con_sdm)) {
  dbDisconnect(con_sdm, shutdown = T)
}
if (exists("con_sdm")) {
  rm(con_sdm)
}
gc() # flush orphaned duckdb driver instances from earlier chunks
           used  (Mb) gc trigger  (Mb) limit (Mb) max used  (Mb)
Ncells  5255844 280.7   10237178 546.8         NA 10237178 546.8
Vcells 14935211 114.0   44663527 340.8      24576 69733884 532.1
Code
if (file_exists(sdm_db)) {
  file_delete(sdm_db)
}
# also remove stale WAL file
sdm_wal <- paste0(sdm_db, ".wal")
if (file_exists(sdm_wal)) {
  file_delete(sdm_wal)
}

source(here("libs/sdm_functions.R"))
con_sdm <- dbConnect(duckdb(), dbdir = sdm_db, read_only = F) |>
  load_duckdb_extensions()

# dbExecute(con_sdm, "SET threads = 8;")
# dbExecute(con_sdm, "SET memory_limit = '20GB';")

# import parquet ----
for (pq in dir_ls(dir_pq, glob = "*.parquet")) {
  # pq = dir_ls(dir_pq, glob = "*.parquet")[2]
  tbl <- path_file(pq) |> str_remove("\\.parquet$")
  if (!dbExistsTable(con_sdm, tbl)) {
    message(glue("Importing {tbl} from {basename(pq)}/*.parquet"))
    dbExecute(
      con_sdm,
      glue("CREATE TABLE {tbl} AS SELECT * FROM '{pq}/*.parquet'")
    )
  }
}
Importing cell from cell.parquet/*.parquet
Importing cell_metric from cell_metric.parquet/*.parquet
Importing dataset from dataset.parquet/*.parquet
Importing listing from listing.parquet/*.parquet
Importing metric from metric.parquet/*.parquet
Importing model from model.parquet/*.parquet
Importing model_cell from model_cell.parquet/*.parquet
Importing species from species.parquet/*.parquet
Importing taxon from taxon.parquet/*.parquet
Importing taxon_model from taxon_model.parquet/*.parquet
Importing zone from zone.parquet/*.parquet
Importing zone_cell from zone_cell.parquet/*.parquet
Importing zone_metric from zone_metric.parquet/*.parquet
Importing zone_taxon from zone_taxon.parquet/*.parquet
Code
# create index on 2nd column of multi-column index from parquet export
# tbl_idx <- list(
#   cell_metric = "metric_seq",
#   model_cell  = "mdl_seq",
#   zone_cell   = "cell_id",
#   zone_metric = "metric_seq")
# for (tbl in names(tbl_idx)){      # tbl = "model_cell"
#   fld <- tbl_idx[[tbl]]
#   idx <- glue("idx_{tbl}_{fld}")  # idx_model_cell_mdl_seq
#   stopifnot(fld %in% dbListFields(con_sdm, tbl))
#
#   sql <- glue("CREATE INDEX IF NOT EXISTS {idx} ON {tbl} ({fld});")
#   message(sql)
#   dbExecute(con_sdm, sql)
# }

# sequences ----
# for auto-incrementing primary keys
tbl_seq <- list(
  metric = "metric_seq",
  model = "mdl_seq",
  species = "sp_seq",
  zone = "zone_seq"
)
stopifnot(all(names(tbl_seq) %in% dbListTables(con_sdm)))

for (tbl in names(tbl_seq)) {
  # tbl = "metric"
  fld <- tbl_seq[[tbl]]
  id <- glue("seq_{tbl}")

  nxt <- dbGetQuery(con_sdm, glue("SELECT MAX({fld}) FROM {tbl}")) |>
    pull(1) +
    1

  dbExecute(con_sdm, glue("DROP SEQUENCE IF EXISTS {id}"))
  sql <- glue("CREATE SEQUENCE {id} START {nxt}")
  message(sql)
  dbExecute(con_sdm, sql)
  sql <- glue(
    "ALTER TABLE {tbl} ALTER COLUMN {fld} SET DEFAULT nextval('{id}')"
  )
  message(sql)
  dbExecute(con_sdm, sql)
}
CREATE SEQUENCE seq_metric START 743
ALTER TABLE metric ALTER COLUMN metric_seq SET DEFAULT nextval('seq_metric')
CREATE SEQUENCE seq_model START 51844
ALTER TABLE model ALTER COLUMN mdl_seq SET DEFAULT nextval('seq_model')
CREATE SEQUENCE seq_species START 51839
ALTER TABLE species ALTER COLUMN sp_seq SET DEFAULT nextval('seq_species')
CREATE SEQUENCE seq_zone START 250
ALTER TABLE zone ALTER COLUMN zone_seq SET DEFAULT nextval('seq_zone')
Code
# date_created ----
# default to current_date
for (tbl in dbListTables(con_sdm)) {
  # tbl = "dataset"
  fld <- dbListFields(con_sdm, tbl) |>
    str_subset("date_created")
  if (length(fld) == 1) {
    sql <- glue(
      "ALTER TABLE {tbl} ALTER COLUMN date_created SET DEFAULT CURRENT_DATE;"
    )
    message(sql)
    a <- dbExecute(con_sdm, sql)
  }
}
ALTER TABLE dataset ALTER COLUMN date_created SET DEFAULT CURRENT_DATE;
ALTER TABLE metric ALTER COLUMN date_created SET DEFAULT CURRENT_DATE;
ALTER TABLE model ALTER COLUMN date_created SET DEFAULT CURRENT_DATE;
ALTER TABLE zone ALTER COLUMN date_created SET DEFAULT CURRENT_DATE;
Code
# checkpoint WAL to main DB file, then shut down driver cleanly
dbExecute(con_sdm, "CHECKPOINT")
[1] 0
Code
dbDisconnect(con_sdm, shutdown = T)
rm(con_sdm)
gc() # flush duckdb driver finalizer so file handles are fully released
           used  (Mb) gc trigger  (Mb) limit (Mb) max used  (Mb)
Ncells  5256801 280.8   10237178 546.8         NA 10237178 546.8
Vcells 14936919 114.0   44663527 340.8      24576 69733884 532.1
Code
message(glue(
  "sdm.duckdb exists: {file_exists(sdm_db)}  size: {file_size(sdm_db)}"
))
sdm.duckdb exists: TRUE  size: 3.27G

14 Deploy to server

Sync derived data and app code to the msens server.

Code
deploy_sh <- here("libs/deploy_to_server.sh")
stopifnot(file_exists(deploy_sh))

# duckdb shutdown may need a moment for file handles to release
if (!file_exists(sdm_db)) {
  gc() # flush any orphaned duckdb driver finalizers
  Sys.sleep(2)
}
stopifnot(file_exists(sdm_db))
system2("bash", args = c(deploy_sh, ver), wait = T)

15 Close Connection

Code
if (exists("con_sdm") && dbIsValid(con_sdm)) {
  dbDisconnect(con_sdm, shutdown = T)
}