Calculate scores from models loaded into sdm db and taxon table

Published

2026-02-17 10:36:00

1 Overview

This notebook calculates scores from models loaded into sdm db and taxon table, including:

  • Extinction risk metrics by class (eg extrisk_bird, extrisk_fish, etc)
  • Other metrics TBD

Source:

Process:

  • Before:
    1. ingest_taxon.qmd
    • Added con_spp.botw (Birds of the World) as another taxonomic authority with ingest_taxon.qmd 1.merge_models.qmd
    • Built con_sdm.taxon table as authoritative unique taxa reference from multiple entries in species with merge_models.qmd
  • After:
    1. Summary data/figs/README sections are now integrated into this notebook (merged from msens-summary_programareas.qmd)

To render this notebook on Ben’s laptop:

cd ~/Github/MarineSensitivity/workflows
quarto render calc_scores.qmd

2 TODO

  • include more complex models:
    • unique by: ds_key, taxa_id, time_period
    • pkey: mdl_seq
Code
librarian::shelf(
  DBI,
  dbplyr,
  dplyr,
  duckdb,
  DT,
  flextable,
  fs,
  ftExtra,
  ggiraph,
  ggplot2,
  glue,
  gt,
  here,
  htmlwidgets,
  kableExtra,
  knitr,
  leaflet,
  logger,
  mapview,
  openxlsx,
  purrr,
  quarto,
  RColorBrewer,
  readr,
  scales,
  sf,
  shiny,
  stringr,
  terra,
  tibble,
  tidyr,
  units,
  webshot2,
  quiet = T
)
options(readr.show_col_types = F)

stopifnot(packageVersion("terra") >= "1.8.60")
# [rotate() issue](https://github.com/rspatial/terra/issues/1876)

source(here("libs/sdm_functions.R"))
source(here("libs/paths.R"))
dir_create(dir_v)
dir_create(dir_big_v)

# inputs (shared, unversioned)
pra_raw_gpkg <- glue("{dir_raw}/boem.gov/boem_program_areas_2025-11.gpkg")
cell_tif <- glue("{dir_derived}/r_bio-oracle_planarea.tif")
er_raw_gpkg <- glue("{dir_v}/../v2/ply_ecoregions_2025.gpkg")
stopifnot(all(file_exists(c(cell_tif, pra_raw_gpkg, er_raw_gpkg))))

# input polygon (from prior step)
pa_gpkg <- glue("{dir_v}/ply_planareas_2025{v_sfx}.gpkg")

# flags, typical
# do_zones <- F # no, except if new spatial
# do_metrics <- T # YES
# do_zone_taxon <- T # slow in past
# do_postgis <- T # YES
# do_downloads <- T # YES
# do_summary_data <- T # ecoregion x PRA intersections, label points, score tables
# do_summary_figs <- T # maps, flower plots, primprod charts, Word/Excel tables
# do_readme <- T # programmatic README.md + README.html
# do_map <- F # no, but useful for visual checks during development
# do_parquet <- F # no, but produces smaller faster duckdb
# do_deploy <- T # sync derived data + app code to server

# flags, fix: show all species categories in score tables + add metric labels
do_zones <- F # no change
do_metrics <- F # metrics already calculated correctly in DuckDB
do_zone_taxon <- F # not affected
do_postgis <- F # PostGIS tables already have v3 columns
do_downloads <- F # pra_gpkg already has correct columns
do_summary_data <- T # YES — score tables need regeneration
do_summary_figs <- T # YES — DOCX/XLSX/flower plots need regeneration
do_readme <- F # not affected
do_map <- F # not affected
do_parquet <- F # not affected
do_deploy <- F # can deploy separately

# spatial outputs (year reflects input data vintage, version suffix model version of derived metric columns)
pra_gpkg <- glue("{dir_v}/ply_programareas_2026{v_sfx}.gpkg")
sr_gpkg <- glue("{dir_v}/ply_subregions_2026{v_sfx}.gpkg")

# computed outputs (version suffix only — no obvious input year)
sr_pra_csv <- glue("{dir_v}/subregion_programareas.csv")
metrics_tif <- glue("{dir_v}/r_metrics{v_sfx}.tif")
metrics_pra_tif <- glue("{dir_v}/r_metrics_programareas{v_sfx}.tif")
zone_taxon_csv <- glue("{dir_v}/zone_taxon{v_sfx}.csv")
lyrs_csv <- glue("{dir_v}/layers{v_sfx}.csv")
# file_exists(c(sr_pra_csv, metrics_tif, metrics_pra_tif, zone_taxon_csv, lyrs_csv))

# summary outputs (merged from msens-summary_programareas.qmd)
er_pra_gpkg <- glue("{dir_v}/ply_ecoregion_programarea{v_sfx}.gpkg")
er_pra_csv <- glue("{dir_v}/ply_ecoregion_programarea{v_sfx}.csv")
er_pra_n_csv <- glue("{dir_v}/tbl_er_pra_scores{v_sfx}.csv")
pra_n_csv <- glue("{dir_v}/tbl_pra_scores{v_sfx}.csv")
lbls_csv <- glue("{dir_v}/ply_label_placement_pra.csv")
dir_figs <- here(glue("figs/msens-summary_programareas{v_sfx}"))

# redo flags for summary sections
redo_summary_data <- T
redo_summary_figs <- T

# parquet (big files)
dir_pq <- glue("{dir_big_v}/sdm_parquet")

# helper functions ----
get_rast <- function(m_key) {
  d <- dbGetQuery(
    con_sdm,
    glue(
      "
        SELECT
          cm.cell_id,
          cm.value
        FROM cell_metric cm
        WHERE cm.metric_seq = (
          SELECT metric_seq
          FROM metric
          WHERE metric_key = '{m_key}' )"
    )
  )
  stopifnot(sum(duplicated(d$cell_id)) == 0)

  r <- init(r_cell[[1]], NA)
  r[d$cell_id] <- d$value

  r
}

plot_flower <- function(
  data,
  fld_category,
  fld_height,
  fld_width,
  colors,
  tooltip_expr = NULL,
  score = NULL,
  title = NULL,
  interactive = T
) {
  stopifnot(is.numeric(data |> pull({{ fld_height }})))
  stopifnot(is.numeric(data |> pull({{ fld_width }})))

  if (is.null(score)) {
    score <- data |>
      mutate(
        "{{fld_height}}" := as.double({{ fld_height }}),
        "{{fld_width}}" := as.double({{ fld_width }})
      ) |>
      summarize(
        score = weighted.mean({{ fld_height }}, {{ fld_width }}, na.rm = T)
      ) |>
      pull(score)
  }

  d <- data |>
    arrange({{ fld_category }}) |>
    mutate(across(!where(is.character), as.double)) |>
    mutate(
      ymax = cumsum({{ fld_width }}),
      ymin = lag(ymax, default = 0),
      xmax = {{ fld_height }},
      xmin = 0
    )

  sym_category <- ensym(fld_category)
  sym_height <- ensym(fld_height)
  sym_width <- ensym(fld_width)

  if (!is.null(tooltip_expr)) {
    d <- d |>
      mutate(tooltip = glue(tooltip_expr))
  } else {
    d <- d |>
      mutate(tooltip = glue("{!!fld_category}"))
  }

  g <- ggplot(d) +
    geom_rect_interactive(
      aes(
        xmin = xmin,
        xmax = xmax,
        ymin = ymin,
        ymax = ymax,
        fill = {{ fld_category }},
        color = "white",
        data_id = {{ fld_category }},
        tooltip = tooltip
      ),
      color = "white",
      alpha = 0.5
    ) +
    coord_polar(theta = "y") +
    xlim(c(-10, max(data |> pull({{ fld_height }})))) +
    annotate(
      "text",
      x = -10,
      y = 0,
      label = round(score),
      size = 8,
      fontface = "bold"
    ) +
    scale_fill_manual(values = colors) +
    theme_minimal() +
    theme(
      legend.position = "bottom",
      plot.margin = unit(c(20, 20, 20, 20), "pt")
    )

  if (!is.null(title)) {
    g <- g + ggtitle(title)
  }

  if (interactive) {
    g <- girafe(
      ggobj = g,
      options = list(
        opts_sizing(rescale = TRUE, width = 1),
        opts_tooltip(
          css = "background-color:white;color:black;padding:5px;border-radius:3px;"
        )
      )
    )
  }

  g
}

# conditional summary setup ----
if (do_summary_figs) {
  mapbox_tkn_txt <- glue("{dir_private}/mapbox_token_bdbest.txt")
  Sys.setenv(MAPBOX_PUBLIC_TOKEN = readLines(mapbox_tkn_txt))
  librarian::shelf(mapgl, quiet = T)
  dir_create(dir_figs)
}

con_sdm <- dbConnect(duckdb(), dbdir = sdm_db, read_only = F)
# dbDisconnect(con_sdm, shutdown = T); duckdb_shutdown(duckdb()); rm(con_sdm)
source(here("libs/db.R")) # generates Postgres database connection `con`; fails if ssh-msens not connected
# dbListTables(con_sdm) # dbListTables(con)

sp_cats <- tbl(con_sdm, "taxon") |>
  filter(is_ok) |>
  distinct(sp_cat) |>
  arrange(sp_cat) |>
  pull(sp_cat)

# for visual checks
r_cell <- rast(cell_tif)
ext(r_cell) <- round(ext(r_cell), 3) # TODO: culprit with warlus wonk?

Flags for evaluating R chunks:

  • do_zones: FALSE
  • do_metrics: FALSE
  • do_zone_taxon: FALSE
  • do_postgis: FALSE
  • do_downloads: FALSE
  • do_summary_data: TRUE
  • do_summary_figs: TRUE
  • do_readme: FALSE
  • do_map: FALSE
  • do_parquet: FALSE
  • do_deploy: FALSE

3 Add Zones

3.1 Add cell table to DuckDB if missing

Code
if (!"cell" %in% dbListTables(con_sdm)) {
  if (!exists("r_cell")) {
    r_cell <- rast(cell_tif)
  }

  d_cell <- values(r_cell, dataframe = T, na.rm = T) |>
    tibble() |>
    mutate(
      cell_id = as.integer(cell_id)
    )

  # dbRemoveTable(con_sdm, "cell")
  dbWriteTable(con_sdm, "cell", d_cell, append = F)
  dbExecute(con_sdm, "ALTER TABLE cell ADD PRIMARY KEY (cell_id);")
}

3.2 Add Program Areas to SDM zones

Add BOEM Program Areas (11th National Draft Proposed Program, 2025-11) as a new zone type. Program Areas are derived from Planning Areas with additional GOA subdivisions.

Code
zone_tbls <- tbl(con_sdm, "zone") |>
  distinct(tbl) |>
  pull(tbl)

if (tbl_pra %in% zone_tbls) {
  source(here("libs/sdm_functions.R"))
  # TODO: move these into msens R
  # - set_geom_ctr_area()
  # - register_zones()
  # - insert_zone_cell_data()

  # load planning areas for field join
  pa <- read_sf(pa_gpkg) |>
    st_drop_geometry() |>
    select(planarea_name, region_key, planarea_key)

  # load and process program areas with field renaming
  pra <- read_sf(pra_raw_gpkg) |>
    rename(
      region_name = BOEM_OCS_REGION,
      planarea_name = PLANNING_AREA,
      programarea_name = Label,
      programarea_id = OBJECTID
    ) |>
    # left join to get region_key and planarea_key from planning areas
    left_join(pa, by = "planarea_name") |>
    # programarea_key defaults to planarea_key
    mutate(
      programarea_key = planarea_key
    ) |>
    # handle NA values for GOA Program Areas (not in planning areas)
    mutate(
      programarea_key = case_when(
        programarea_name == "GOA Program Area A" ~ "GAA",
        programarea_name == "GOA Program Area B" ~ "GAB",
        TRUE ~ programarea_key
      ),
      planarea_key = case_when(
        programarea_name == "GOA Program Area A" ~ "WGA,CGA,EGA",
        programarea_name == "GOA Program Area B" ~ "CGA,EGA",
        TRUE ~ planarea_key
      ),
      region_key = case_when(
        str_detect(programarea_name, "GOA") ~ "GA",
        TRUE ~ region_key
      )
    ) |>
    select(
      programarea_id,
      region_key,
      region_name,
      planarea_key,
      planarea_name,
      programarea_key,
      programarea_name
    ) |>
    st_transform(st_crs(4326))

  # add centroid and area
  pra <- pra |> set_geom_ctr_area()

  # write derived gpkg
  write_sf(pra, pra_gpkg)
  message(glue("Wrote {nrow(pra)} program areas to {pra_gpkg}"))
  # Wrote 20 program areas to ~/My Drive/projects/msens/data/derived/ply_programareas_2026.gpkg

  # shift lon [-180, 180] to [0, 360]
  pra_sl <- st_shift_longitude(pra)

  # load cell raster template
  r_cell <- rast(cell_tif, lyrs = "cell_id")

  # get percent each feature covers each cell
  r_pra <- rasterize(
    pra_sl,
    r_cell,
    fun = "max",
    touches = T,
    cover = T,
    by = "programarea_key"
  )

  # convert to data frame
  d_pra <- terra::as.data.frame(
    r_pra,
    cells = T,
    na.rm = F,
    wide = F
  ) |>
    filter(!is.na(values)) |>
    tibble()

  message(glue("Program areas cover {length(unique(d_pra$cell))} cells"))

  # register zones and insert zone data into con_sdm
  register_zones(pra, tbl_pra, "programarea_key")
  insert_zone_cell_data(d_pra, tbl_pra, "programarea_key")

  # verify registration
  tbl(con_sdm, "zone") |>
    group_by(tbl) |>
    summarise(n_zones = n()) |>
    collect() |>
    print()
}

3.3 Create ply_programareas_2026 in PostGIS database

For viewing in mapgl and mapsp apps.

Code
# devtools::install_local("../msens")
librarian::shelf(
  here,
  leaflet,
  leaflet.extras, # mapview,
  marinesensitivity / msens,
  sf,
  tibble,
  quiet = T
)

source(here("libs/db.R")) # define: con

pra |>
  st_write(
    dsn = con,
    layer = tbl_pra,
    driver = "PostgreSQL",
    layer_options = "OVERWRITE=true"
  )

# TODO: need to convert to 1st column to integer for mapgl viewing?

# enforce SRID so shows up in tile.marinesensitivity.org
dbExecute(
  con,
  glue(
    "SELECT UpdateGeometrySRID('{tbl_pra}','geom',4326);"
  )
)

# fix any invalid geometries
dbExecute(
  con,
  glue(
    "UPDATE {tbl_pra} SET geom = ST_MakeValid(geom) WHERE NOT ST_IsValid(geom)"
  )
) # 6 rows fixed

# add spatial index for faster queries
dbExecute(
  con,
  glue(
    "CREATE INDEX IF NOT EXISTS {tbl_pra}_geom_idx ON {tbl_pra} USING gist(geom);"
  )
)

3.4 Create Subregion-ProgramArea Lookup

Create lookup table for which program areas belong to which subregions.

Used in dropdown of mapgl app.

Code
# calculate intersections between subregions and program areas using zone_cell
d_sr_pra <- tbl(con_sdm, "zone_cell") |>
  inner_join(
    tbl(con_sdm, "zone") |>
      filter(tbl == !!tbl_sr),
    by = "zone_seq"
  ) |>
  rename(subregion_key = value) |>
  select(cell_id, subregion_key) |>
  inner_join(
    tbl(con_sdm, "zone_cell") |>
      inner_join(
        tbl(con_sdm, "zone") |>
          filter(tbl == !!tbl_pra),
        by = "zone_seq"
      ) |>
      select(cell_id, programarea_key = value),
    by = "cell_id"
  ) |>
  distinct(subregion_key, programarea_key) |>
  collect()


get_sr_bbox <- function(sr_key) {
  pra_sr <- d_sr_pra |>
    filter(subregion_key == !!sr_key) |>
    pull(programarea_key)
  r <- init(r_cell[[1]], NA)
  r_sr <- r_metrics[["programarea_key"]] %in% pra_sr
  trim(r_sr) |> st_bbox() |> as.numeric()
}
# TODO: pre-calc bboxes for all subregions

write_csv(d_sr_pra, sr_pra_csv)
message(glue(
  "Wrote {nrow(d_sr_pra)} subregion-programarea mappings to {sr_pra_csv}"
))
# Wrote 60 subregion-programarea mappings to ~/My Drive/projects/msens/data/derived/subregion_programareas.csv

3.5 Add SubRegions

Original:

  • USA: USA mainland, Alaska, Hawaii and territories
  • AK: Alaska
  • L48: USA mainland
  • AKL48: USA mainland & Alaska

Conceived for later:

  • HI: Hawaii
  • L48Pac: Pacific USA mainland
  • L48Atl: Atlantic USA mainland
  • L48Goa: Gulf of America USA mainland
  • PAC: Pacific USA (Hawaii, Pacific mainland and USA Territories)

version for 2026:

  • USA: USA mainland and Alaska
  • AK: Alaska
  • L48Pac: Pacific USA mainland
  • L48Atl: Atlantic USA mainland
  • L48Goa: Gulf of America USA mainland

v3:

  • USA: All USA (Alaska, Gulf of America and Pacific)
  • AK: Alaska
  • GA: Gulf of America
  • PA: Pacific
Code
zones <- tbl(con_sdm, "zone") |>
  distinct(tbl) |>
  pull(tbl)

# delete existing tbl_sr
if (tbl_sr %in% zones) {
  dbExecute(con_sdm, glue("DELETE FROM zone WHERE tbl = '{tbl_sr}'"))
  # dbExecute(con_sdm, "DELETE FROM zone_cell WHERE zone_seq NOT IN (SELECT zone_seq FROM zone)")
}

d_sr <- tribble(
  ~subregion_key , ~subregion_name   ,
  "AK"           , "Alaska"          ,
  "GA"           , "Gulf of America" ,
  "PA"           , "Pacific"         ,
  "USA"          , "All USA"
) # note: subregion_name not used (yet)
register_zones(d_sr, tbl_sr, "subregion_key")

# temporarily push tmp_pra into duckdb for later joining
tmp_pra <- pra |>
  st_drop_geometry() |>
  select(programarea_key, region_key)
# temporarily push tmp_pra into duckdb for later joining
copy_to(con_sdm, tmp_pra, "tmp_pra", temporary = T, overwrite = T)

df_z_sr_notusa <- tbl(con_sdm, "zone") |>
  filter(
    tbl == !!tbl_sr,
    value != "USA"
  ) |>
  select(region_key = value, zone_seq_sr = zone_seq) |>
  left_join(
    tbl(con_sdm, "tmp_pra"),
    by = "region_key"
  ) |>
  left_join(
    tbl(con_sdm, "zone") |>
      filter(tbl == !!tbl_pra) |>
      select(
        programarea_key = value,
        zone_seq
      ),
    by = "programarea_key"
  ) |>
  left_join(
    tbl(con_sdm, "zone_cell"),
    by = "zone_seq"
  ) |>
  group_by(zone_seq_sr, cell_id) |>
  summarize(
    pct_covered = sum(pct_covered, na.rm = T),
    .groups = "drop"
  ) |>
  mutate(
    pct_covered = pmin(pct_covered, 100, na.rm = T)
  ) |>
  select(
    zone_seq = zone_seq_sr,
    cell_id,
    pct_covered
  ) |>
  collect() # 349,139 × 3
# summary(df_z_sr_notusa)

# duplicated(df_z_sr_notusa$cell_id) |> any() # FALSE
# TODO: check why no overlapping cells across subregions?

# zone_seqs_sr <- tbl(con_sdm, "zone") |>
#   filter(
#     tbl == !!tbl_sr) |>
#   pull(zone_seq)
# dbExecute(con_sdm, glue("
#   DELETE FROM zone_cell
#   WHERE zone_seq IN ({paste(zone_seqs_sr, collapse = ',')})"))

# append USA, ie all cells in other subregions
zone_seq_USA <- tbl(con_sdm, "zone") |>
  filter(
    tbl == !!tbl_sr,
    value == "USA"
  ) |>
  pull(zone_seq) # 217

df_z_sr_usa <- df_z_sr_notusa |>
  group_by(cell_id) |>
  summarize(
    pct_covered = sum(pct_covered, na.rm = T),
    .groups = "drop"
  ) |>
  mutate(
    zone_seq = !!zone_seq_USA,
    pct_covered = pmin(pct_covered, 100, na.rm = T)
  ) # 349,139 × 3
# summary(df_z_sr_usa)

df_z_sr <- bind_rows(
  df_z_sr_notusa,
  df_z_sr_usa
)

# TODO: ....
copy_to(
  dest = con_sdm,
  df = df_z_sr,
  name = "zone_cell",
  append = T
)

tbl(con_sdm, "zone") |>
  filter(tbl == !!tbl_sr) |>
  select(zone_seq, subregion_key = value) |>
  left_join(
    tbl(con_sdm, "zone_cell"),
    by = "zone_seq"
  ) |>
  group_by(subregion_key) |>
  summarize(n_cell = n()) |>
  arrange(subregion_key)
#   subregion_key n_cell
#   <chr>          <dbl>
# 1 AK            307,445
# 2 GA             17,274
# 3 PA             24,420
# 4 USA           349,139
Code
zone_tbls <- tbl(con_sdm, "zone") |>
  distinct(tbl) |>
  pull(tbl)

if (!"ply_subregions_2025" %in% zone_tbls) {
  source(here("libs/sdm_functions.R"))
  # - set_geom_ctr_area()
  # - register_zones()
  # - insert_zone_cell_data()

  # spatial sources
  er <- read_sf(er_raw_gpkg)
  r_cell <- rast(cell_tif, lyrs = "cell_id")

  # USA: USA mainland, Alaska, Hawaii and territories
  sr_usa <- er |>
    st_union() |>
    st_as_sf() |>
    mutate(
      subregion_key = "USA",
      subregion_name = "USA mainland, Alaska, Hawaii and territories"
    )

  # AK: Alaska
  sr_ak <- er |>
    filter(region_key == "AK") |>
    st_union() |>
    st_as_sf() |>
    mutate(
      subregion_key = "AK",
      subregion_name = "Alaska"
    )

  # L48: USA mainland
  sr_l48 <- er |>
    filter(
      ecoregion_key %in%
        c(
          "CAC",
          "WAOR",
          "SECS",
          "WCGOA",
          "EGOA",
          "NECS"
        )
    ) |>
    st_union() |>
    st_as_sf() |>
    mutate(
      subregion_key = "L48",
      subregion_name = "USA mainland"
    )

  # AKL48: USA mainland & Alaska
  sr_akl48 <- bind_rows(sr_ak, sr_l48) |>
    st_union() |>
    st_as_sf() |>
    mutate(
      subregion_key = "AKL48",
      subregion_name = "USA mainland & Alaska"
    )

  # combine
  sr <- bind_rows(
    sr_ak,
    sr_l48,
    sr_akl48,
    sr_usa
  ) |>
    set_geom_ctr_area()

  # write gpkg
  write_sf(sr, sr_gpkg)

  # shift lon [-180, 180] to [0, 360]
  sr_sl <- st_shift_longitude(sr)

  # get percent each feature covers each cell
  r_sr <- rasterize(
    sr_sl,
    r_cell,
    fun = "max",
    touches = T,
    cover = T,
    by = "subregion_key"
  )

  # convert to data frame
  d_sr <- terra::as.data.frame(
    r_sr,
    cells = T,
    na.rm = F,
    wide = F
  ) |>
    filter(
      !is.na(values)
    ) |> # 839,534 × 3
    tibble()

  # d_sr |>
  #   group_by(layer) |>
  #   summarise(n_cells  = n()) |>
  #   collect() |>
  #   arrange(desc(n_cells))
  #
  #   layer  n_cells
  #   <chr>    <int>
  # 1 USA    630,806
  # 2 AKL48  419,767
  # 3 AK     315,369
  # 4 L48    104,398

  # register zones and insert zone data into con_sdm
  register_zones(sr, "ply_subregions_2025", "subregion_key")
  insert_zone_cell_data(d_sr, "ply_subregions_2025", "subregion_key")

  # tbl(con_sdm, "zone") |>
  #   group_by(tbl) |>
  #   summarise(n_zones = n()) |>
  #   collect()
  #
  #   tbl                 n_zones
  #   <chr>                 <dbl>
  # 1 ply_ecoregions_2025      12
  # 2 ply_planareas_2025       36
  # 3 ply_subregions_2025       4

  # TODO: load ply_subregions_2025 into PostGIS
}

4 Calculate Metrics

4.1 Calculate Extinction Risk Metrics by Taxonomic Component

Code
# Start fresh with all extrisk_* metrics
existing_seqs <- tbl(con_sdm, "metric") |>
  filter(
    str_starts(metric_key, "extrisk_")
  ) |>
  pull(metric_seq)
if (length(existing_seqs) > 0) {
  metric_sql <- paste(existing_seqs, collapse = ", ")
  dbExecute(
    con_sdm,
    glue("DELETE FROM cell_metric WHERE metric_seq IN ({metric_sql})")
  )
  dbExecute(
    con_sdm,
    glue("DELETE FROM zone_metric WHERE metric_seq IN ({metric_sql})")
  )
  dbExecute(
    con_sdm,
    glue("DELETE FROM metric WHERE metric_seq IN ({metric_sql})")
  )
}
Code
d_taxon_ok <- tbl(con_sdm, "taxon") |>
  filter(is_ok) |>
  collect()

for (sp_cat in sp_cats) {
  # sp_cat = sp_cats[1]

  message(glue("Calculating extinction risk metrics for {sp_cat}..."))

  metric_key <- glue("extrisk_{str_replace(sp_cat, ' ', '_')}")
  description <- glue("Extinction risk for {sp_cat}")
  # TODO: rename metric_key -> metric_key, description

  # Get metric_seq first, then delete/insert
  existing_seq <- dbGetQuery(
    con_sdm,
    glue(
      "SELECT metric_seq FROM metric WHERE metric_key = '{metric_key}'"
    )
  )

  if (nrow(existing_seq) > 0) {
    metric_seq <- existing_seq$metric_seq
    dbExecute(
      con_sdm,
      glue("DELETE FROM cell_metric WHERE metric_seq = {metric_seq}")
    )
    dbExecute(
      con_sdm,
      glue("DELETE FROM zone_metric WHERE metric_seq = {metric_seq}")
    )
    dbExecute(
      con_sdm,
      glue("DELETE FROM metric WHERE metric_seq = {metric_seq}")
    )
  }

  dbExecute(
    con_sdm,
    glue(
      "
    INSERT INTO metric (metric_key, description)
    VALUES ('{metric_key}', '{description}')"
    )
  )

  metric_seq <- dbGetQuery(
    con_sdm,
    glue(
      "SELECT max(metric_seq) AS metric_seq FROM metric WHERE metric_key = '{metric_key}'"
    )
  ) |>
    pull(metric_seq)

  # use er_score from taxon (pre-computed in ingest_nmfs-fws-mmpa-mbta-listings.qmd
  # and merged in merge_models.qmd)
  d_er <- tbl(con_sdm, "taxon") |>
    filter(
      is_ok,
      if (!!sp_cat == "all") T else sp_cat == !!sp_cat
    ) |>
    select(mdl_seq, er_score) |>
    collect()

  duckdb_register(con_sdm, "tmp_er", d_er |> select(mdl_seq, er_score))
  dbExecute(
    con_sdm,
    glue(
      "
    INSERT INTO cell_metric (cell_id, metric_seq, value)
    SELECT
      mc.cell_id,
      {metric_seq} AS metric_seq,
      ROUND(SUM(er.er_score * mc.value) / 100.0, 2) AS value
    FROM model_cell mc
    JOIN tmp_er er ON mc.mdl_seq = er.mdl_seq
    GROUP BY mc.cell_id"
    )
  ) # 6,662,075
  duckdb_unregister(con_sdm, "tmp_er")

  # visual check ----
  # d <- tbl(con_sdm, "cell_metric") |>
  #   filter(metric_seq == metric_seq) |>
  #   select(cell_id, value) |>
  #   collect()
  # r <- init(r_cell[[1]], NA)
  # r[d$cell_id] <- d$value
  # r_r <- rotate(r)
  # plet(r_r)
}

tbl(con_sdm, "metric") |>
  filter(str_starts(metric_key, "extrisk_")) |>
  arrange(metric_seq) |>
  select(-date_created) |>
  collect() |>
  print(n = Inf)

4.2 Calculate ExtinctionRisk min/max per SpeciesCategory and Ecoregion

Code
for (sp_cat in sp_cats) {
  # sp_cat = sp_cats[1]
  for (fxn in c("min", "max")) {
    # fxn = "min"

    m_key <- glue("extrisk_{sp_cat}_ecoregion_{fxn}")
    m_description <- glue("Extinction risk for {sp_cat} Ecoregional {fxn}")
    message(glue("m_key: {m_key}"))

    # Get metric_seq first, then delete/insert
    existing_seq <- dbGetQuery(
      con_sdm,
      glue(
        "SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
      )
    ) |>
      pull(metric_seq)
    if (length(existing_seq) > 0) {
      dbExecute(
        con_sdm,
        glue("DELETE FROM metric      WHERE metric_seq = {existing_seq}")
      )
      dbExecute(
        con_sdm,
        glue("DELETE FROM cell_metric WHERE metric_seq = {existing_seq}")
      )
      dbExecute(
        con_sdm,
        glue("DELETE FROM zone_metric WHERE metric_seq = {existing_seq}")
      )
    }

    dbExecute(
      con_sdm,
      glue(
        "INSERT INTO metric (metric_key, description)
      VALUES ('{m_key}', '{m_description}')"
      )
    )

    # get metric ID
    m_seq <- dbGetQuery(
      con_sdm,
      glue(
        "SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
      )
    ) |>
      pull(metric_seq)

    # calculate min/max extinction risk per ecoregion
    dbExecute(
      con_sdm,
      glue(
        "INSERT INTO zone_metric (zone_seq, metric_seq, value)
      SELECT 
        z.zone_seq,
        -- z.tbl, z.fld, z.value AS tbl_fld_val,
        {m_seq} as metric_seq,
        {fxn}(cm.value) as value
      FROM zone z
      LEFT JOIN zone_cell zc ON z.zone_seq = zc.zone_seq
      LEFT JOIN cell_metric cm ON zc.cell_id = cm.cell_id
      WHERE
        z.fld = 'ecoregion_key'
        AND cm.metric_seq IN (
          SELECT metric_seq FROM metric WHERE metric_key = 'extrisk_{sp_cat}')
      -- GROUP BY z.zone_seq, z.tbl, z.fld, z.value
      GROUP BY z.zone_seq "
      )
    )
  }
}

tbl(con_sdm, "metric") |>
  filter(str_detect(metric_key, "^extrisk_.*_ecoregion_(min|max)$")) |>
  arrange(metric_seq) |>
  select(-date_created) |>
  collect() |>
  print(n = Inf)

4.3 Calculate rescaled ExtinctionRisk per cell based on EcoregionMinMax per SpeciesCategory

Code
for (sp_cat in sp_cats) {
  # sp_cat = sp_cats[1]

  m_key <- glue("extrisk_{sp_cat}_ecoregion_rescaled")
  m_description <- glue(
    "Extinction risk for {sp_cat}, rescaled to [0,100] based on Ecoregional min/max values"
  )

  # Get metric_seq first, then delete/insert
  existing_seq <- dbGetQuery(
    con_sdm,
    glue(
      "SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
    )
  )
  if (nrow(existing_seq) > 0) {
    m_seq <- existing_seq$metric_seq
    dbExecute(
      con_sdm,
      glue("DELETE FROM metric      WHERE metric_seq = {m_seq}")
    )
    dbExecute(
      con_sdm,
      glue("DELETE FROM cell_metric WHERE metric_seq = {m_seq}")
    )
    dbExecute(
      con_sdm,
      glue("DELETE FROM zone_metric WHERE metric_seq = {m_seq}")
    )
  }

  # insert new metric
  dbExecute(
    con_sdm,
    glue(
      "
      INSERT INTO metric (metric_key, description)
      VALUES ('{m_key}', '{m_description}')"
    )
  )
  m_seq <- dbGetQuery(
    con_sdm,
    glue(
      "
      SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
    )
  ) |>
    pull(metric_seq)

  dbExecute(
    con_sdm,
    glue(
      "
      WITH
      -- get ecoregion_minmax from zone_metric
      ecoregion_minmax AS (
        SELECT 
          z.zone_seq,
          MIN(zm.value) as min_value,
          MAX(zm.value) as max_value
        FROM zone z
        JOIN zone_metric zm USING (zone_seq)
        WHERE 
          z.fld = 'ecoregion_key' AND 
          zm.metric_seq IN (
            SELECT metric_seq FROM metric WHERE metric_key IN (
              'extrisk_{sp_cat}_ecoregion_min',
              'extrisk_{sp_cat}_ecoregion_max') )
        GROUP BY z.zone_seq
      ),
      -- get cell_ecoregion from zone_cell with pct_covered
      cell_ecoregion_raw AS (
        SELECT 
          cell_id, 
          z.zone_seq, 
          zone_cell.pct_covered
        FROM zone_cell
        JOIN zone z ON zone_cell.zone_seq = z.zone_seq
        WHERE z.fld = 'ecoregion_key'
      ),
      -- calculate total coverage per cell for normalization
      cell_coverage_total AS (
        SELECT 
          cell_id, 
          SUM(pct_covered) as total_coverage
        FROM cell_ecoregion_raw
        GROUP BY cell_id
      ),
      -- normalize coverage percentages
      -- for cells with only one ecoregion, use 100% regardless of actual coverage
      -- for cells with multiple ecoregions, rescale to ensure percentages sum to 100
      cell_ecoregion AS (
        SELECT 
          ce.cell_id, 
          ce.zone_seq,
          CASE 
            WHEN COUNT(*) OVER (PARTITION BY ce.cell_id) = 1 THEN 100
            ELSE ce.pct_covered * 100.0 / ct.total_coverage
          END as normalized_pct
        FROM cell_ecoregion_raw ce
        JOIN cell_coverage_total ct ON ce.cell_id = ct.cell_id
      )
      -- insert rescaled extinction risk values
      INSERT INTO cell_metric (cell_id, metric_seq, value)
      SELECT 
        cm.cell_id,
        {m_seq} as metric_seq,
        SUM(
          (cm.value     - er.min_value) / 
          (er.max_value - er.min_value) * 
          (ce.normalized_pct / 100.0) ) * 100 AS value  -- scale to 0-100 range
      FROM cell_metric      cm
      JOIN cell_ecoregion   ce ON cm.cell_id  = ce.cell_id
      JOIN ecoregion_minmax er ON ce.zone_seq = er.zone_seq
      WHERE cm.metric_seq IN (
        SELECT metric_seq FROM metric WHERE metric_key = 'extrisk_{sp_cat}')
      AND cm.value IS NOT NULL
      AND er.min_value IS NOT NULL
      AND er.max_value IS NOT NULL
      AND er.min_value < er.max_value  -- avoid division by zero
      GROUP BY cm.cell_id
    "
    )
  )
}

tbl(con_sdm, "metric") |>
  filter(str_detect(metric_key, "^extrisk_.*_ecoregion_rescaled$")) |>
  arrange(metric_seq) |>
  select(-date_created) |>
  collect() |>
  print(n = Inf)

4.4 Transfer raw PrimProd into cell_metric

Code
# Create metric for primary productivity
m_key <- "primprod"
m_description <- "Primary productivity: Oregon State Vertically Generalized Production Model (VGPM) from Visible Infrared Imaging Radiometer Suite (VIIRS) satellite data (mg C / m^2 / day) from daily averages available as monthly averaged to annual and averaged to overall for the most recently available full years of data 2014 to 2023"
# TODO: consider log rescaling PrimaryProductivity

# workflow: ingest_productivity.qmd
dir_vgpm <- glue("{dir_data}/raw/oregonstate.edu")
vgpm_tif <- glue(
  "{dir_vgpm}/vgpm.r2022.v.chl.v.sst.2160x4320_2014-2023.avg.sd.tif"
)

r_cell <- rast(cell_tif) |> subset("cell_id")
ext(r_cell) <- round(ext(r_cell), 3)
# r_cell_r <- rotate(r_cell)
# ext(r_cell_r) <- round(ext(r_cell_r), 3)

r_vgpm <- rast(vgpm_tif) |> subset("npp_avg")
ext(r_vgpm) <- round(ext(r_vgpm), 3)
# r_vgpm_rc <- rotate(r_vgpm) |>
#   crop(r_cell) |>
#   mask(r_cell)

r_v_c <- resample(
  r_vgpm,
  r_cell,
  method = "bilinear"
) |>
  mask(r_cell) |>
  crop(r_cell)
# plot(r_v_c)
r_cv <- c(r_cell, r_v_c)

# Check if metric exists and delete if it does
existing_seq <- dbGetQuery(
  con_sdm,
  glue(
    "SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
  )
)
if (nrow(existing_seq) > 0) {
  m_seq <- existing_seq$metric_seq
  dbExecute(con_sdm, glue("DELETE FROM metric      WHERE metric_seq = {m_seq}"))
  dbExecute(con_sdm, glue("DELETE FROM cell_metric WHERE metric_seq = {m_seq}"))
  dbExecute(con_sdm, glue("DELETE FROM zone_metric WHERE metric_seq = {m_seq}"))
}

# Create new metric
dbExecute(
  con_sdm,
  glue(
    "
    INSERT INTO metric (metric_key, description)
    VALUES ('{m_key}', '{m_description}')"
  )
)
m_seq <- dbGetQuery(
  con_sdm,
  glue(
    "
    SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
  )
) |>
  pull(metric_seq)

d_cv <- values(r_cv, dataframe = T, na.rm = T) |>
  tibble() |>
  rename(
    value = npp_avg
  ) |>
  mutate(
    cell_id = as.integer(cell_id),
    metric_seq = !!m_seq
  )

dbWriteTable(con_sdm, "cell_metric", d_cv, append = T)

tbl(con_sdm, "metric") |>
  filter(str_detect(metric_key, "^primprod")) |>
  arrange(metric_seq) |>
  select(-date_created) |>
  collect() |>
  print(n = Inf)
#   metric_seq metric_key description
#        <int> <chr>      <chr>
# 1        160 primprod   Primary productivity: Oregon State Vertically Generali…

# visual check ----
# d <- tbl(con_sdm, "cell_metric") |>
#   filter(metric_seq == m_seq) |>
#   select(cell_id, value) |>
#   collect()
# r <- init(r_cell[[1]], NA)
# r[d$cell_id] <- d$value
# r_r <- rotate(r)
# plet(r_r)

4.5 Calculate PrimProd min/max by Ecoregion

Code
pp_metric <- "primprod"
pp_seq <- dbGetQuery(
  con_sdm,
  glue(
    "SELECT metric_seq FROM metric WHERE metric_key = '{pp_metric}'"
  )
) |>
  pull(metric_seq)

for (fxn in c("min", "max")) {
  # fxn = "min"

  # get existing primprod ecoregion min/max metric_seq
  m_key <- glue("primprod_ecoregion_{fxn}")
  m_description <- glue("Primary productivity Ecoregional {fxn}")

  # Check if metric exists and delete if it does
  existing_seq <- dbGetQuery(
    con_sdm,
    glue(
      "SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
    )
  )
  if (nrow(existing_seq) > 0) {
    m_seq <- existing_seq$metric_seq
    dbExecute(
      con_sdm,
      glue("DELETE FROM metric      WHERE metric_seq = {m_seq}")
    )
    dbExecute(
      con_sdm,
      glue("DELETE FROM cell_metric WHERE metric_seq = {m_seq}")
    )
    dbExecute(
      con_sdm,
      glue("DELETE FROM zone_metric WHERE metric_seq = {m_seq}")
    )
  }

  # insert new metric
  dbExecute(
    con_sdm,
    glue(
      "INSERT INTO metric (metric_key, description) VALUES ('{m_key}', '{m_description}')"
    )
  )

  # get metric_seq
  m_seq <- dbGetQuery(
    con_sdm,
    glue(
      "SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
    )
  ) |>
    pull(metric_seq)

  # Calculate min/max primary productivity per ecoregion
  dbExecute(
    con_sdm,
    glue(
      "INSERT INTO zone_metric (zone_seq, metric_seq, value)
    SELECT 
      z.zone_seq,
      -- z.tbl, z.fld, z.value AS tbl_fld_val,
      {m_seq} as metric_seq,
      {fxn}(cm.value) as value
    FROM      zone        z
    LEFT JOIN zone_cell   zc ON z.zone_seq = zc.zone_seq
    LEFT JOIN cell_metric cm ON zc.cell_id = cm.cell_id
    WHERE
      z.fld         = 'ecoregion_key' AND
      cm.metric_seq = {pp_seq}
    GROUP BY z.zone_seq"
    )
  )
}
tbl(con_sdm, "metric") |>
  filter(
    metric_key == "primprod_ecoregion_min"
  ) |>
  left_join(
    tbl(con_sdm, "zone_metric"),
    by = "metric_seq"
  ) |>
  pull(value) |>
  summary()
# OLD:
#  Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
# 65.61  130.50  237.31  247.02  358.66  435.62
# NEW:
#  Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
# 35.15  135.08  267.47  257.26  366.07  454.43
tbl(con_sdm, "metric") |>
  filter(
    metric_key == "primprod_ecoregion_max"
  ) |>
  left_join(
    tbl(con_sdm, "zone_metric"),
    by = "metric_seq"
  ) |>
  pull(value) |>
  summary()

4.6 Calculate PrimProd avg/sd by Planning Area

Code
pp_metric <- "primprod"
pp_seq <- dbGetQuery(
  con_sdm,
  glue(
    "SELECT metric_seq FROM metric WHERE metric_key = '{pp_metric}'"
  )
) |>
  pull(metric_seq)

for (fxn in c("avg", "stddev")) {
  # fxn = "avg"

  # get existing primprod avg/stddev metric_seq
  m_key <- glue("primprod_{fxn}")
  m_description <- glue("Primary productivity {fxn}")

  # Check if metric exists and delete if it does
  existing_seq <- dbGetQuery(
    con_sdm,
    glue(
      "SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
    )
  ) |>
    pull(metric_seq)
  if (length(existing_seq) > 0) {
    dbExecute(
      con_sdm,
      glue(
        "DELETE FROM metric      WHERE metric_seq IN ({paste(existing_seq, collapse = ',')})"
      )
    )
    dbExecute(
      con_sdm,
      glue(
        "DELETE FROM cell_metric WHERE metric_seq IN ({paste(existing_seq, collapse = ',')})"
      )
    )
    dbExecute(
      con_sdm,
      glue(
        "DELETE FROM zone_metric WHERE metric_seq IN ({paste(existing_seq, collapse = ',')})"
      )
    )
  }

  # insert new metric
  dbExecute(
    con_sdm,
    glue(
      "INSERT INTO metric (metric_key, description) VALUES ('{m_key}', '{m_description}')"
    )
  )

  # get metric_seq
  m_seq <- dbGetQuery(
    con_sdm,
    glue(
      "SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
    )
  ) |>
    pull(metric_seq)

  stopifnot(length(m_seq) == 1)

  # calculate avg/stddev primary productivity per planning area
  # dbGetQuery(con_sdm, glue(
  #   "SELECT
  dbExecute(
    con_sdm,
    glue(
      "INSERT INTO zone_metric (zone_seq, metric_seq, value)
     SELECT
      z.zone_seq,
      {m_seq} as metric_seq,
      {fxn}(cm.value) as value
    FROM      zone        z
    LEFT JOIN zone_cell   zc ON z.zone_seq = zc.zone_seq
    LEFT JOIN cell_metric cm ON zc.cell_id = cm.cell_id
    WHERE
      z.fld         = 'planarea_key' AND
      cm.metric_seq = {pp_seq}
    GROUP BY z.zone_seq"
    )
  )
}

# tbl(con_sdm, "cell_metric") |>
#   filter(
#     metric_seq == !!pp_seq) |>
#   pull(value) |>
#   summary()

tbl(con_sdm, "metric") |>
  filter(
    metric_key == "primprod_avg"
  ) |>
  left_join(
    tbl(con_sdm, "zone_metric"),
    by = "metric_seq"
  ) |>
  pull(value) |>
  summary()

4.7 Calculate rescaled PrimProd per cell based on EcoregionMinMax

Code
pp_metric <- "primprod"
pp_seq <- dbGetQuery(
  con_sdm,
  glue(
    "SELECT metric_seq FROM metric WHERE metric_key = '{pp_metric}'"
  )
) |>
  pull(metric_seq)

# Create new metric for rescaled primary productivity
m_key <- "primprod_ecoregion_rescaled"
m_description <- "Primary productivity rescaled to [0,100] based on Ecoregional min/max values"
# TODO: consider log rescaling PrimaryProductivity

# Check if metric exists and delete if it does
existing_seq <- dbGetQuery(
  con_sdm,
  glue(
    "SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
  )
) |>
  pull(metric_seq)
if (length(existing_seq) > 0) {
  dbExecute(
    con_sdm,
    glue("DELETE FROM metric      WHERE metric_seq = {existing_seq}")
  )
  dbExecute(
    con_sdm,
    glue("DELETE FROM cell_metric WHERE metric_seq = {existing_seq}")
  )
  dbExecute(
    con_sdm,
    glue("DELETE FROM zone_metric WHERE metric_seq = {existing_seq}")
  )
}

# Create new metric
dbExecute(
  con_sdm,
  glue(
    "
    INSERT INTO metric (metric_key, description)
    VALUES ('{m_key}', '{m_description}')"
  )
)
m_seq <- dbGetQuery(
  con_sdm,
  glue(
    "
    SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
  )
) |>
  pull(metric_seq)

# Rescale primary productivity by ecoregion
# dbGetQuery(con_sdm, glue("
dbExecute(
  con_sdm,
  glue(
    "
    WITH
    -- get ecoregion_minmax from cell_metric for primary productivity
    ecoregion_minmax AS (
      SELECT 
        z.zone_seq,
        MIN(zm.value) as min_value,
        MAX(zm.value) as max_value
      FROM zone z
      JOIN zone_metric zm USING (zone_seq)
      WHERE 
        z.fld = 'ecoregion_key' AND 
        zm.metric_seq IN (
          SELECT metric_seq FROM metric WHERE metric_key IN (
            'primprod_ecoregion_min',
            'primprod_ecoregion_max') )
      GROUP BY z.zone_seq ),
    -- get cell_ecoregion from zone_cell with pct_covered
    cell_ecoregion_raw AS (
      SELECT 
        cell_id, 
        z.zone_seq, 
        zone_cell.pct_covered
      FROM zone_cell
      JOIN zone z ON zone_cell.zone_seq = z.zone_seq
      WHERE z.fld = 'ecoregion_key'
    ),
    -- calculate total coverage per cell for normalization
    cell_coverage_total AS (
      SELECT 
        cell_id, 
        SUM(pct_covered) as total_coverage
      FROM cell_ecoregion_raw
      GROUP BY cell_id
    ),
    -- normalize coverage percentages
    cell_ecoregion AS (
      SELECT 
        ce.cell_id, 
        ce.zone_seq,
        CASE 
          WHEN COUNT(*) OVER (PARTITION BY ce.cell_id) = 1 THEN 100
          ELSE ce.pct_covered * 100.0 / ct.total_coverage
        END as normalized_pct
      FROM cell_ecoregion_raw ce
      JOIN cell_coverage_total ct ON ce.cell_id = ct.cell_id
    )
    -- insert rescaled primary productivity values
    INSERT INTO cell_metric (cell_id, metric_seq, value)
    SELECT 
      cm.cell_id,
      {m_seq} as metric_seq,
      SUM(
        -- (c.prim_prod_mean - er.min_value) / 
        (cm.value     - er.min_value) / 
        (er.max_value - er.min_value) * 
        (ce.normalized_pct / 100.0) ) * 100 AS value  -- scale to 0-100 range
    -- FROM cell c
    FROM cell_metric cm
    JOIN cell_ecoregion   ce ON cm.cell_id  = ce.cell_id
    JOIN ecoregion_minmax er ON ce.zone_seq = er.zone_seq
    WHERE 
    --c.prim_prod_mean IS NOT NULL AND 
    cm.metric_seq = {pp_seq} AND
    cm.value IS NOT NULL AND 
    ce.zone_seq IS NOT NULL AND
    er.min_value IS NOT NULL AND 
    er.max_value IS NOT NULL AND 
    er.min_value < er.max_value  -- avoid division by zero
    GROUP BY cm.cell_id
"
  )
)

# tbl(con_sdm, "cell_metric") |>
#   filter(
#     metric_seq == !!m_seq)

tbl(con_sdm, "metric") |>
  filter(
    metric_key == "primprod_ecoregion_rescaled"
  ) |>
  left_join(
    tbl(con_sdm, "cell_metric"),
    by = "metric_seq"
  ) |>
  pull(value) |>
  summary()

4.8 Calculate ProgramArea metrics contributing to score based on cell metrics

Code
# get zone_seqs for program areas
z_seqs <- tbl(con_sdm, "zone") |>
  filter(tbl == !!tbl_pra) |>
  pull(zone_seq)

message(glue("Calculating metrics for {length(z_seqs)} program areas..."))
# Calculating metrics for 20 program areas...

metrics <- c(
  glue("extrisk_{sp_cats}"),
  glue("extrisk_{sp_cats}_ecoregion_rescaled"),
  "primprod",
  "primprod_ecoregion_rescaled"
)

for (m_key in metrics) {
  # m_key = metrics[1]

  # get existing metric_seq
  m_seq <- tbl(con_sdm, "metric") |>
    filter(metric_key == !!m_key) |>
    pull(metric_seq)

  if (length(m_seq) != 1) {
    message(glue("  Skipping {m_key} - metric not found"))
    next
  }

  # delete existing zone_metric values for this metric and program areas
  dbExecute(
    con_sdm,
    glue(
      "DELETE FROM zone_metric
    WHERE metric_seq = {m_seq}
      AND zone_seq IN ({paste(z_seqs, collapse = ',')})"
    )
  )

  # stop if no cell_metric found
  n_cm <- tbl(con_sdm, "cell_metric") |>
    filter(metric_seq == !!m_seq) |>
    tally() |>
    pull(n)

  if (n_cm == 0) {
    message(glue("  Skipping {m_key} - no cell_metric values"))
    next
  }

  # calculate average score per program area weighted by cell coverage
  dbExecute(
    con_sdm,
    glue(
      "
    INSERT INTO zone_metric (zone_seq, metric_seq, value)
    SELECT
      zc.zone_seq,
      {m_seq} as metric_seq,
      SUM(cm.value * zc.pct_covered) / SUM(zc.pct_covered) as value
    FROM zone_cell zc
    JOIN cell_metric cm ON zc.cell_id = cm.cell_id
    WHERE
      cm.metric_seq = {m_seq}
      AND zc.zone_seq IN ({paste(z_seqs, collapse = ',')})
      AND cm.value IS NOT NULL
    GROUP BY zc.zone_seq"
    )
  )

  message(glue("  Calculated {m_key}"))
}

# verify
d_ck <- tbl(con_sdm, "metric") |>
  select(metric_seq, metric_key) |>
  filter(metric_key %in% metrics) |>
  left_join(
    tbl(con_sdm, "zone_metric"),
    by = "metric_seq"
  ) |>
  left_join(
    tbl(con_sdm, "zone") |>
      filter(tbl == !!tbl_pra) |>
      select(zone_seq, pra_key = value),
    by = "zone_seq"
  ) |>
  filter(!is.na(pra_key)) |>
  collect()

d_ck |>
  filter(str_detect(metric_key, "_ecoregion_rescaled")) |>
  pull(value) |>
  summary()
#  Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
# 1.195  11.186  21.902  27.317  41.150  91.748

4.9 Calculate Ecoregion metrics contributing to score based on cell metrics

Code
# get zone_seqs for ecoregions
z_seqs <- tbl(con_sdm, "zone") |>
  filter(
    tbl == !!tbl_er
  ) |>
  pull(zone_seq)

# tbl(con_sdm, "metric") |> pull(metric_key) |>  sort()

metrics <- c(
  glue("extrisk_{sp_cats}"),
  glue("extrisk_{sp_cats}_ecoregion_rescaled"),
  "primprod",
  "primprod_ecoregion_rescaled"
)

for (m_key in metrics) {
  # m_key = metrics[1]

  # get existing metric_seq
  m_seq <- tbl(con_sdm, "metric") |>
    filter(metric_key == !!m_key) |>
    pull(metric_seq)
  stopifnot(length(m_seq) == 1)

  # delete existing zone_metric values for this metric and ecoregions
  dbExecute(
    con_sdm,
    glue(
      "DELETE FROM zone_metric
    WHERE metric_seq = {m_seq}
      AND zone_seq IN ({paste(z_seqs, collapse = ',')})"
    )
  )

  # stop if no cell_metric found
  n_cm <- tbl(con_sdm, "cell_metric") |>
    filter(
      metric_seq == !!m_seq
    ) |>
    tally() |>
    pull(n)
  stopifnot(n_cm > 0)

  # calculate average score per ecoregion weighted by cell coverage
  dbExecute(
    con_sdm,
    glue(
      "
    INSERT INTO zone_metric (zone_seq, metric_seq, value)
    SELECT 
      zc.zone_seq,
      {m_seq} as metric_seq,
      SUM(cm.value * zc.pct_covered) / SUM(zc.pct_covered) as value
    FROM zone_cell zc
    JOIN cell_metric cm ON zc.cell_id = cm.cell_id
    WHERE 
      cm.metric_seq = {m_seq} 
      AND zc.zone_seq IN ({paste(z_seqs, collapse = ',')})
      AND cm.value IS NOT NULL
    GROUP BY zc.zone_seq"
    )
  )
}

d_ck <- tbl(con_sdm, "metric") |>
  select(metric_seq, metric_key) |>
  filter(
    metric_key %in% metrics
  ) |>
  left_join(
    tbl(con_sdm, "zone_metric"),
    by = "metric_seq"
  ) |>
  left_join(
    tbl(con_sdm, "zone") |>
      filter(
        tbl == !!tbl_er
      ) |>
      select(zone_seq, pa_key = value),
    by = "zone_seq"
  ) |>
  filter(!is.na(pa_key)) |>
  collect()

d_ck |>
  filter(str_detect(metric_key, "_ecoregion_rescaled")) |>
  pull(value) |>
  summary()
# OLD:
#   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
# 0.1106 12.9840 24.9402 28.6058 43.1917 83.1341
# NEW:
#  Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
# 2.365  13.926  21.852  26.010  35.288  80.208
# NEWEST:
#  Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
# 2.365  13.923  22.475  27.350  38.309  80.039

4.10 Calculate cell_metric score: ExtinctionRisk + PrimProd (already EcoregionRescaled), equal weight

Code
# Create combined metric for extinction risk and primary productivity
m_key <- "score_extriskspcat_primprod_ecoregionrescaled_equalweights"
m_description <- "Combined score of extinction risk per species category and primary productivity, equally weighted (and each previously rescaled [0,100] based on Ecoregional min/max values)"

# Check if metric exists and delete if it does
m_seq <- dbGetQuery(
  con_sdm,
  glue(
    "SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
  )
) |>
  pull(metric_seq)
if (length(m_seq) > 0) {
  dbExecute(con_sdm, glue("DELETE FROM metric      WHERE metric_seq = {m_seq}"))
  dbExecute(con_sdm, glue("DELETE FROM cell_metric WHERE metric_seq = {m_seq}"))
  dbExecute(con_sdm, glue("DELETE FROM zone_metric WHERE metric_seq = {m_seq}"))
}

# Create new metric
dbExecute(
  con_sdm,
  glue(
    "
  INSERT INTO metric (metric_key, description)
  VALUES ('{m_key}', '{m_description}')"
  )
)
m_seq <- dbGetQuery(
  con_sdm,
  glue(
    "
  SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
  )
) |>
  pull(metric_seq)

# Get all metric_seqs for extinction risk metrics and primary productivity
metric_keys <- c(
  sapply(
    sp_cats,
    \(sc) glue("extrisk_{str_replace(sc, ' ', '_')}_ecoregion_rescaled"),
    USE.NAMES = F
  ),
  "primprod_ecoregion_rescaled"
)

# Create SQL to join all metrics with equal weights
dbExecute(
  con_sdm,
  glue(
    "
  WITH metric_values AS (
    SELECT 
      cm.cell_id,
      m.metric_key,
      cm.value
    FROM cell_metric cm
    JOIN metric m ON cm.metric_seq = m.metric_seq
    WHERE m.metric_key IN ({paste(shQuote(metric_keys), collapse=', ')}) )
  INSERT INTO cell_metric (cell_id, metric_seq, value)
  SELECT 
    cell_id,
    {m_seq} as metric_seq,
    ROUND(AVG(value)) AS value
  FROM metric_values
  GROUP BY cell_id"
  )
)

# Check the new metric
d_ck <- tbl(con_sdm, "metric") |>
  select(metric_seq, metric_key) |>
  filter(
    metric_key == !!m_key
  ) |>
  left_join(
    tbl(con_sdm, "cell_metric"),
    by = "metric_seq"
  ) |>
  collect()

d_ck |>
  pull(value) |>
  summary()
# OLD:
# Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
# 0.00   10.00   16.00   21.28   31.00   89.00
# NEW:
# Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
#  0.00   12.00   17.00   21.29   26.00   89.00
# NEWEST:
# Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
#  0.0    12.0    18.0    22.1    28.0    89.0

4.11 Calculate Score per ProgramArea from ProgramArea metrics

Code
# metric: calculate score per program area
m_key <- "score_extriskspcat_primprod_ecoregionrescaled_equalweights"
m_seq <- tbl(con_sdm, "metric") |>
  filter(metric_key == !!m_key) |>
  pull(metric_seq)

metric_input_keys <- c(
  glue("extrisk_{sp_cats}_ecoregion_rescaled"),
  "primprod_ecoregion_rescaled"
)
m_input_seqs <- tbl(con_sdm, "metric") |>
  filter(metric_key %in% metric_input_keys) |>
  pull(metric_seq)
stopifnot(length(metric_input_keys) == length(m_input_seqs))

# get zone_seqs for program areas
z_seqs <- tbl(con_sdm, "zone") |>
  filter(
    tbl == !!tbl_pra
  ) |>
  pull(zone_seq)

# DEBUG check
# tbl(con_sdm, "zone_metric") |>
#   filter(
#     metric_seq == !!m_seq,
#     zone_seq %in% z_seqs) |>
#   arrange(desc(value)) |>
#   pull(value) |>
#   range()
# 5.704735 61.050130
#   View()
#   nrow() -> n_existing_zm
# z_seq == 134 # (ALA)

# delete existing zone_metric values for this metric and planning areas
dbExecute(
  con_sdm,
  glue(
    "DELETE FROM zone_metric 
  WHERE metric_seq = {m_seq} 
    AND zone_seq IN ({paste(z_seqs, collapse = ',')})"
  )
)

# calculate average score per program area
dbExecute(
  con_sdm,
  glue(
    "
  INSERT INTO zone_metric (zone_seq, metric_seq, value)
  SELECT 
    zone_seq,
    {m_seq} as metric_seq,
    SUM(value) / COUNT(value) as value
  FROM zone_metric zm
  WHERE 
    metric_seq IN ({paste(m_input_seqs, collapse = ',')}) 
    AND zone_seq IN ({paste(z_seqs, collapse = ',')})
    AND value IS NOT NULL
  GROUP BY zone_seq"
  )
) # 20

# check the new metric
d_ck <- tbl(con_sdm, "metric") |>
  select(metric_seq, metric_key) |>
  filter(
    metric_key == !!m_key
  ) |>
  left_join(
    tbl(con_sdm, "zone_metric"),
    by = "metric_seq"
  ) |>
  left_join(
    tbl(con_sdm, "zone") |>
      filter(
        tbl == !!tbl_pra
      ) |>
      select(zone_seq, pa_key = value),
    by = "zone_seq"
  ) |>
  filter(!is.na(pa_key)) |>
  collect()

d_ck |>
  pull(value) |>
  summary()
# PlanAreas:
#  Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
#  7.16   17.10   22.86   25.42   31.59   60.07
# NEW ProgramAreas:
#  Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
# 7.642  17.372  24.131  26.439  32.972  60.068

4.12 Metric labels

Add display label columns to metric table as single source of truth for display names used in score tables, Word/Excel output, and flower plots.

Code
# add display label columns to metric table if missing
for (col in c("metric_title", "metric_abbrev")) {
  cols <- dbGetQuery(
    con_sdm,
    "SELECT column_name FROM information_schema.columns WHERE table_name = 'metric'"
  )
  if (!col %in% cols$column_name) {
    dbExecute(con_sdm, glue("ALTER TABLE metric ADD COLUMN {col} VARCHAR"))
  }
}

# populate labels for score-contributing metrics
metric_labels <- tribble(
  ~metric_key                                                  , ~metric_title        , ~metric_abbrev ,
  "score_extriskspcat_primprod_ecoregionrescaled_equalweights" , "Score"              , "Score"        ,
  "extrisk_bird_ecoregion_rescaled"                            , "Bird"               , "Bird"         ,
  "extrisk_coral_ecoregion_rescaled"                           , "Coral"              , "Coral"        ,
  "extrisk_fish_ecoregion_rescaled"                            , "Fish"               , "Fish"         ,
  "extrisk_invertebrate_ecoregion_rescaled"                    , "Invertebrate"       , "Invertebrate" ,
  "extrisk_mammal_ecoregion_rescaled"                          , "Mammal"             , "Mammal"       ,
  "extrisk_other_ecoregion_rescaled"                           , "Other"              , "Other"        ,
  "extrisk_turtle_ecoregion_rescaled"                          , "Turtle"             , "Turtle"       ,
  "primprod_ecoregion_rescaled"                                , "Primary Production" , "Prim. Prod."
)

duckdb_register(con_sdm, "tmp_labels", metric_labels)
dbExecute(
  con_sdm,
  "
  UPDATE metric
  SET metric_title  = t.metric_title,
      metric_abbrev = t.metric_abbrev
  FROM tmp_labels t
  WHERE metric.metric_key = t.metric_key"
)
[1] 9
Code
duckdb_unregister(con_sdm, "tmp_labels")

5 Add/update zone_taxon

For showing species composition per zone (e.g. subregion, planning area, program area) in mapgl app.

Code
# SLOW

tbl_taxon <- tbl(con_sdm, "taxon") |>
  filter(is_ok) |>
  select(
    sp_cat,
    sp_common = common_name,
    sp_scientific = scientific_name,
    taxon_id,
    taxon_authority,
    rl_code = extrisk_code,
    er_score,
    is_mmpa,
    is_mbta,
    is_bcc,
    esa_code,
    esa_source,
    mdl_seq
  )

d <- tbl(con_sdm, "zone") |>
  filter(
    tbl == !!tbl_sr,
    value == "USA"
  ) |>
  select(zone_seq, zone_tbl = tbl, zone_fld = fld, zone_value = value) |> # zone_tbl = tbl,
  inner_join(
    tbl(con_sdm, "zone_cell") |>
      select(zone_seq, cell_id),
    by = join_by(zone_seq)
  ) |>
  inner_join(
    tbl(con_sdm, "model_cell") |>
      select(mdl_seq, cell_id, value) |>
      inner_join(
        tbl_taxon,
        by = join_by(mdl_seq)
      ),
    by = join_by(cell_id)
  ) |>
  inner_join(
    tbl(con_sdm, "cell") |>
      select(cell_id, area_km2),
    by = join_by(cell_id)
  ) |>
  group_by(
    zone_tbl,
    zone_fld,
    zone_value, # zone_seq,
    mdl_seq,
    sp_cat,
    sp_common,
    sp_scientific,
    taxon_id,
    taxon_authority,
    rl_code,
    er_score,
    is_mmpa,
    is_mbta,
    is_bcc,
    esa_code,
    esa_source
  ) |>
  summarize(
    area_km2 = sum(area_km2, na.rm = T),
    avg_suit = mean(value, na.rm = T) / 100,
    .groups = "drop"
  ) |>
  collect() # 220,534 × 12

# taxon_usa <- d |>
#   filter(
#     zone_fld   == "subregion_key",
#     zone_value == "USA") |>
#   pull(taxon_id)
#
# tbl_taxon |>
#   filter(!taxon_id %in% taxon_usa) |>
#   select(sp_cat, sp_common, sp_scientific, taxon_id, rl_code, mdl_seq) |>
#   arrange(sp_cat, sp_common, sp_scientific, taxon_id, rl_code, mdl_seq) |>
#   collect() |>
#   print(n = Inf)
#
# A tibble: 46 × 6
#    sp_cat       sp_common                  sp_scientific taxon_id rl_code mdl_seq
#    <chr>        <chr>                      <chr>            <dbl> <chr>     <int>
#  1 bird         African Collared-dove      Streptopelia…   2.27e7 LC        18033
#  2 bird         Aplomado Falcon            Falco femora…   2.27e7 EN        18359
#  3 bird         Bay-breasted Warbler       Setophaga ca…   2.27e7 LC        18156
#  4 bird         Blue-and-yellow Macaw      Ara ararauna    2.27e7 LC        17601
#  5 bird         Brown-throated Parakeet    Eupsittula p…   2.27e7 LC        18136
#  6 bird         Cerulean Warbler           Setophaga ce…   2.27e7 NT        17999
#  7 bird         Common Greenshank          Tringa nebul…   2.27e7 LC        18060
#  8 bird         Common Ringed Plover       Charadrius h…   2.27e7 LC        17677
#  9 bird         Common Vermilion Flycatch… Pyrocephalus…   1.04e8 LC        18275
# 10 bird         Dark-eyed Junco            Junco hyemal…   2.27e7 LC        18140
# 11 bird         Ferruginous Pygmy-owl      Glaucidium b…   2.27e7 TN        18366
# 12 bird         House Finch                Haemorhous m…   2.27e7 LC        17781
# 13 bird         Island Canary              Serinus cana…   2.27e7 LC        17995
# 14 bird         Lesser Antillean Pewee     Contopus lat…   2.27e7 LC        17700
# 15 bird         Long-eared Owl             Asio otus       2.27e7 LC        17617
# 16 bird         Middendorff's Grasshopper… Helopsaltes …   2.27e7 LC        18138
# 17 bird         Nanday Parakeet            Aratinga nen…   2.27e7 LC        17602
# 18 bird         Orange-fronted Parakeet    Eupsittula c…   2.27e7 VU        17740
# 19 bird         Pin-tailed Whydah          Vidua macrou…   2.27e7 LC        18078
# 20 bird         Pine Warbler               Setophaga pi…   2.27e7 LC        18157
# 21 bird         Puerto Rican Emerald       Riccordia ma…   2.27e7 LC        17987
# 22 bird         Purple Sandpiper           Calidris mar…   2.27e7 LC        18133
# 23 bird         Red Siskin                 Spinus cucul…   2.27e7 EN        18021
# 24 bird         Russet-backed Thrush       Catharus ust…   1.04e8 LC        18252
# 25 bird         Say's Phoebe               Sayornis saya   2.27e7 LC        17992
# 26 bird         Siberian Accentor          Prunella mon…   2.27e7 LC        17939
# 27 bird         Spot-breasted Oriole       Icterus pect…   2.27e7 LC        17803
# 28 bird         Spruce Grouse              Canachites c…   6.12e7 LC        17663
# 29 bird         Swainson's Thrush          Catharus swa…   1.04e8 LC        18251
# 30 bird         Two-barred Crossbill       Loxia leucop…   2.27e7 LC        18146
# 31 bird         Venezuelan Troupial        Icterus icte…   2.27e7 LC        17802
# 32 bird         Western Meadowlark         Sturnella ne…   2.27e7 LC        18161
# 33 bird         White-tailed Sea-eagle     Haliaeetus a…   2.27e7 LC        17782
# 34 bird         Yellow-crowned Amazon      Amazona ochr…   2.27e7 LC        17573
# 35 bird         Yellow-headed Amazon       Amazona orat…   2.27e7 EN        17574
# 36 bird         Yellow-throated Vireo      Vireo flavif…   2.27e7 LC        18080
# 37 fish         Arctic telescope           Protomyctoph…   1.59e5 LC        17531
# 38 fish         Cortez sea chub            Kyphosus ele…   2.74e5 LC        17509
# 39 fish         Gulf grouper               Mycteroperca…   2.74e5 EN        17506
# 40 fish         daggertooth pike conger    Muraenesox c…   1.26e5 LC        17503
# 41 fish         prowspine cusk-eel         Lepophidium …   2.76e5 LC        17513
# 42 fish         speckledtail flounder      Engyophrys s…   2.76e5 LC        17523
# 43 invertebrate waved whelk                Buccinum und…   1.39e5 NA        17224
# 44 invertebrate NA                         Paroediceros…   1.03e5 NA        17127
# 45 invertebrate NA                         Quadrimaera …   4.22e5 NA         8245
# 46 invertebrate NA                         Scabrotropho…   1.47e5 NA        17229

d <- d |>
  mutate(
    suit_rl = avg_suit * er_score / 100,
    suit_rl_area = avg_suit * er_score / 100 * area_km2
  ) |>
  group_by(zone_fld, zone_value, sp_cat) |>
  mutate(
    cat_suit_rl_area = sum(suit_rl_area, na.rm = T)
  ) |>
  ungroup() |>
  mutate(
    pct_cat = suit_rl_area / cat_suit_rl_area
  )

# d_akl48 <- d |>
#   filter(
#     zone_fld   == "subregion_key",
#     zone_value == "AKL48")
# # nrow(d_akl48) # 10,377 species
#
# d_akl48_sum <- d_akl48 |>
#   inner_join(
#     tbl(con_sdm, "taxon") |>
#       filter(
#         taxon_id %in% d_akl48$taxon_id) |>
#       select(taxon_id, n_ds) |>
#       collect(),
#     by = join_by(taxon_id)) |>
#   group_by(sp_cat) |>
#   summarize(
#     n_species = n(),
#     n_models  = sum(n_ds))
# d_akl48_sum |>
#   summarize(
#     total_species = sum(n_species),
#     total_models  = sum(n_models))
# #   total_species total_models
# #           <int>        <int>
# # 1         10377        10461
# #
# #   sp_cat        n_species  n_models
# #   <chr>             <int>     <int>
# # 1 bird                247       280
# # 2 coral               342       349
# # 3 fish              3,299     3,314
# # 4 invertebrate      5,452     5,453
# # 5 mammal               72        84
# # 6 other               953       953
# # 7 reptile              12        28
# #   Total            10,377    10,461
#
# write_csv(d_akl48, zone_taxon_akl48_csv)

write_csv(d, zone_taxon_csv)
dbWriteTable(con_sdm, glue("zone_taxon"), d, overwrite = T)

6 Update PostGIS layers with metrics from DuckDB

These layers are used in the mapgl app (via pg_tileserv) and need to be updated with the new metrics calculated in DuckDB. The update_table_with_clear function adds new metric columns if they don’t exist, clears existing values for those metrics, and then updates with new values from DuckDB.

6.1 Update Zone Tables in PostGIS with Metrics

Code
update_table_with_clear <- function(con, df, tbl, key_col = "programarea_key") {
  # df = d_pa_metrics; tbl = "ply_planareas_2025"; key_col = "planarea_key"

  stopifnot(dbExistsTable(con, tbl))
  existing_cols <- dbListFields(con, tbl)
  stopifnot(key_col %in% existing_cols)
  stopifnot(key_col %in% names(df))
  source_cols <- setdiff(names(df), key_col)

  # Add missing columns
  for (col in source_cols) {
    if (!col %in% existing_cols) {
      add_col_query <- glue(
        "ALTER TABLE {tbl} ADD COLUMN {col} DOUBLE PRECISION"
      )
      dbExecute(con, add_col_query)
      message("Added column: ", col)
    }
  }

  # Clear existing values for columns we're updating
  existing_source_cols <- intersect(source_cols, existing_cols)
  if (length(existing_source_cols) > 0) {
    clear_clauses <- paste0(existing_source_cols, " = NULL", collapse = ", ")
    clear_query <- glue("UPDATE {tbl} SET {clear_clauses}")
    dbExecute(con, clear_query)
    message(
      "Cleared existing values for: ",
      paste(existing_source_cols, collapse = ", "),
      "\n"
    )
  }

  # Now update with new values
  dbWriteTable(
    con,
    "temp_metrics_update",
    df,
    temporary = TRUE,
    overwrite = TRUE
  )

  set_clauses <- paste0(source_cols, " = temp.", source_cols, collapse = ", ")
  update_query <- glue(
    "
    UPDATE {tbl} 
    SET {set_clauses}
    FROM temp_metrics_update temp
    WHERE {tbl}.{key_col} = temp.{key_col}
  "
  )

  rows_affected <- dbExecute(con, update_query)
  message("Updated ", rows_affected, " rows")

  dbRemoveTable(con, "temp_metrics_update", force = TRUE)

  return(rows_affected)
}

# update zone.tbl to include version suffix ----
zone_tbls <- tbl(con_sdm, "zone") |> distinct(tbl) |> pull(tbl)
for (zt in zone_tbls) {
  zt_v <- if (!grepl("_v\\d+$", zt)) paste0(zt, v_sfx) else zt
  if (zt != zt_v) {
    # dbExecute(
    #   con_sdm,
    #   glue(
    #     "UPDATE zone SET tbl = '{zt_v}' WHERE tbl = '{zt}'"
    #   )
    # )
    cat(glue("zone.tbl: {zt} -> {zt_v} ~ {Sys.time()}"))
  }
}
Code
# create versioned PostGIS tables from existing tables ----
pg_zone_tbls <- c(
  tbl_er,
  tbl_pra
)
for (pg_tbl in pg_zone_tbls) {
  # pg_tbl already includes v_sfx (e.g. _v3) from tbl_er, tbl_pra
  pg_tbl_base <- str_remove(pg_tbl, v_sfx)
  if (!dbExistsTable(con, pg_tbl)) {
    dbExecute(con, glue("CREATE TABLE {pg_tbl} AS SELECT * FROM {pg_tbl_base}"))
    message(glue("Created {pg_tbl} from {pg_tbl_base}"))
  } else {
    message(glue("{pg_tbl} already exists"))
  }
}

6.2 Load Program Area Scores to PostGIS

Code
# delete zone_metric rows without a metric_key
dbExecute(
  con_sdm,
  "
  DELETE FROM zone_metric 
  WHERE metric_seq NOT IN (SELECT metric_seq FROM metric)"
) # 0

# check if table exists in PostGIS
if (!dbExistsTable(con, tbl_pra)) {
  stop("Run create_pg_v3_tables R chunk above")
}

# get program area metrics from DuckDB
d_pra_metrics <- tbl(con_sdm, "zone") |>
  filter(tbl == !!tbl_pra) |>
  select(programarea_key = value, zone_seq) |>
  left_join(
    tbl(con_sdm, "zone_metric") |>
      mutate(value = round(value, 1)),
    by = "zone_seq"
  ) |>
  left_join(
    tbl(con_sdm, "metric") |>
      select(metric_seq, metric_key),
    by = "metric_seq"
  ) |>
  select(programarea_key, metric_key, value) |>
  filter(
    !is.na(programarea_key),
    !is.na(metric_key),
    !is.na(value)
  ) |>
  pivot_wider(
    names_from = metric_key,
    values_from = value
  ) |>
  collect()
message(glue("Loaded {nrow(d_pra_metrics)} program area metrics"))

# For complete refresh (clears existing values first)
update_table_with_clear(
  con,
  d_pra_metrics,
  tbl_pra,
  "programarea_key"
)
Code
flds_pra_df <- names(d_pra_metrics)
flds_pra_db <- dbListFields(con, tbl_pra)
fdls_pra_rm <- setdiff(flds_pra_db, flds_pra_df) |>
  str_subset("(extrisk_)|(^na$)")

d_rm <-
  tibble(
    tbl = tbl_pra,
    fld = fdls_pra_rm
  )

for (i in seq_len(nrow(d_rm))) {
  # i = 1
  tbl <- d_rm$tbl[i]
  fld <- d_rm$fld[i]

  a <- dbExecute(con, glue('ALTER TABLE {tbl} DROP COLUMN "{fld}"'))
  message("Removed column: ", fld, " from ", tbl)
}

6.3 Add index for vector tiles

per Feature id | pg_tileserv

Code
# add integer primary key to postgres programareas table
dbExecute(
  con,
  glue(
    "ALTER TABLE public.ply_programareas_2026{v_sfx} ALTER COLUMN programarea_id TYPE INTEGER;"
  )
)
dbExecute(
  con,
  glue(
    "ALTER TABLE public.ply_ecoregions_2025{v_sfx} ADD COLUMN IF NOT EXISTS ecoregion_id SERIAL PRIMARY KEY;"
  )
)

7 Generate downloads

For sharing with BOEM and external partners.

Code
redo_dl <- T

# ecoregion gpkg destination (input copy, no metrics — just copy to dir_v)
er_gpkg_dst <- glue("{dir_v}/ply_ecoregions_2025.gpkg")

# redo_dl: delete existing files
if (redo_dl) {
  for (f in c(
    pra_gpkg,
    lyrs_csv,
    metrics_tif,
    metrics_pra_tif,
    er_gpkg_dst
  )) {
    if (file_exists(f)) {
      file_delete(f)
    }
  }
}

# * pra_gpkg ----
if (!file.exists(pra_gpkg)) {
  message("Generating Program Areas geopackage...")
  st_read(con, tbl_pra) |>
    st_write(pra_gpkg, delete_dsn = T, quiet = T)
}

# * lyrs_csv ----
if (!file.exists(lyrs_csv)) {
  message("Generating layers CSV...")

  d_lyrs <- bind_rows(
    tibble(
      order = 1,
      category = "Overall",
      layer = "score",
      lyr = "score_extriskspcat_primprod_ecoregionrescaled_equalweights"
    ),
    tibble(
      order = 2,
      category = "Species, rescaled by Ecoregion",
      layer = glue("{sp_cats}: ext. risk, ecorgn"),
      lyr = glue("extrisk_{sp_cats}_ecoregion_rescaled")
    ),
    tibble(
      order = 3,
      category = "Primary Productivity, rescaled by Ecoregion",
      layer = glue("prim prod, ecorgn"),
      lyr = glue("primprod_ecoregion_rescaled")
    ),
    tibble(
      order = 4,
      category = "Species, raw Extinction Risk",
      layer = glue("{sp_cats}: ext. risk"),
      lyr = glue("extrisk_{sp_cats}")
    ),
    tibble(
      order = 5,
      category = "Primary Productivity, raw Phytoplankton",
      lyr = "primprod",
      layer = "prim prod, 2014-2023 avg (mg C/m^2/day)"
    )
  )

  write_csv(d_lyrs, lyrs_csv)
}

# * metrics_tif ----
if (!file.exists(metrics_tif)) {
  message("Generating Metrics raster...")

  d_lyrs <- read_csv(lyrs_csv)

  # add zones
  lst <- list()
  for (zone_fld in c("ecoregion_key", "programarea_key")) {
    # zone_fld = "ecoregion_key"
    # zone_fld = "programarea_key"

    d <- tbl(con_sdm, "zone") |>
      filter(fld == !!zone_fld) |>
      left_join(
        tbl(con_sdm, "zone_cell"),
        by = "zone_seq"
      ) |>
      group_by(cell_id) |>
      window_order(pct_covered) |>
      summarize(
        value = last(value),
        .groups = "drop"
      ) |>
      collect() |>
      mutate(
        "{zone_fld}" := as.factor(value)
      ) |>
      select(all_of(c("cell_id", zone_fld)))

    r <- init(r_cell[[1]], NA) # |> as.factor() #sort(unique(d$value)))
    # r$chr <- NA_character_ # ensure chr type
    r[d$cell_id] <- d[[zone_fld]]
    varnames(r) <- zone_fld
    names(r) <- zone_fld
    levels(r) <- tibble(
      id = 1:length(levels(d[[zone_fld]])),
      "{zone_fld}" := levels(d[[zone_fld]])
    )
    lst[[zone_fld]] <- r
  }
  r_zones <- do.call(c, lst |> unname())

  # add layers
  lst <- list()
  for (i in 1:nrow(d_lyrs)) {
    # i = 2
    lyr <- d_lyrs$lyr[i]
    layer <- d_lyrs$layer[i]

    r <- get_rast(lyr) # plot(r)
    names(r) <- lyr
    varnames(r) <- layer
    lst[[i]] <- r
  }
  r_metrics <- do.call(c, lst)

  # add log of primprod
  r_metrics[["primprod_log"]] <- log(r_metrics[["primprod"]])

  # show layer names
  names(r_metrics) |> sort() |> print()

  r_metrics <- c(r_zones, r_metrics)
  writeRaster(r_metrics, metrics_tif, overwrite = T)
}

# * metrics_pra_tif ----
if (!file.exists(metrics_pra_tif)) {
  message("Generating Metrics raster masked to Program Areas...")
  # mask to study area of Program Areas

  # subregion_keys in sr_pra_csv: USA (all 20), AK (15), PA (3), GA (2)
  # AK + L48 = USA (all program areas)
  sr <- "USA"
  pra_sr <- read_csv(sr_pra_csv) |>
    filter(subregion_key == !!sr) |>
    pull(programarea_key)

  r_metrics <- rast(metrics_tif)
  r_mask <- subset(r_metrics, "programarea_key")

  r_metrics_pra <- r_metrics |>
    mask(r_mask) |>
    trim()

  writeRaster(r_metrics_pra, metrics_pra_tif, overwrite = T)
}

# * er_gpkg_dst (input, no metrics — just copy to dir_v) ----
if (!file.exists(er_gpkg_dst)) {
  message("Copying ecoregion gpkg to dir_v...")
  file_copy(er_raw_gpkg, er_gpkg_dst)
}
Code
pa_metrics_geo <- glue(
  "{dir_derived}/OCS_Proposed_Planning_Area_Polygon_Env_Sensitivity_Metrics.geojson"
)

# * pa_metrics_geo ----
if (!file.exists(pa_metrics_geo)) {
  pa_boem_geo <- glue("{dir_data}/raw/boem.gov/OCS_Area_Polygons.geojson")

  pa <- read_sf(pa_gpkg)

  pa_boem <- read_sf(pa_boem_geo)

  (pa_miss <- pa_boem |>
    st_drop_geometry() |>
    anti_join(
      pa |>
        st_drop_geometry() |>
        select(planarea_key, planarea_name),
      by = join_by(PLANNING_AREA == planarea_key)
    ) |>
    pull(PLANNING_AREA))
  # [1] "CG" "EG" "WG"

  pa_boem |>
    filter(PLANNING_AREA %in% pa_miss) |>
    mapview::mapView(zcol = "PLANNING_AREA")

  pa |>
    st_drop_geometry() |>
    filter(
      region_name == "Gulf of America"
    ) |>
    select(planarea_key, planarea_name)
  #   planarea_key planarea_name
  #   <chr>        <chr>
  # 1 CGA          Central Gulf of America
  # 2 EGA          Eastern Gulf of America
  # 3 WGA          Western Gulf of America

  pa_metrics <- pa_boem |>
    left_join(
      pa |>
        st_drop_geometry() |>
        mutate(
          PLANNING_AREA = case_match(
            planarea_key,
            "CGA" ~ "CG",
            "EGA" ~ "EG",
            "WGA" ~ "WG",
            .default = planarea_key
          )
        ),
      by = join_by(PLANNING_AREA)
    )

  write_sf(pa_metrics, pa_metrics_geo)
}

8 Summary data products

Merged from msens-summary_programareas.qmd: ecoregion × program area intersections, label placement points, and score tables.

Code
# delete existing summary data files if redo (except lbls_csv — manually adjusted)
if (redo_summary_data) {
  for (f in c(er_pra_gpkg, er_pra_csv, er_pra_n_csv, pra_n_csv)) {
    if (file_exists(f)) {
      file_delete(f)
    }
  }
}

8.1 Ecoregion × Program Area intersections

Code
er <- read_sf(er_raw_gpkg)
pra <- read_sf(pra_gpkg)

if (!file.exists(er_pra_gpkg)) {
  er_pra <- st_intersection(
    er |>
      select(ecoregion_key, ecoregion_name),
    pra |>
      select(region_key, region_name, programarea_key, programarea_name)
  ) |>
    st_make_valid() |>
    mutate(
      area_km2 = st_area(geom) |> set_units(km^2) |> drop_units(),
      ctr_lon = st_coordinates(st_centroid(geom))[, 1],
      ctr_lat = st_coordinates(st_centroid(geom))[, 2]
    ) |>
    filter(area_km2 > 1) |>
    arrange(programarea_key, ecoregion_key) |>
    group_by(programarea_key) |>
    mutate(
      pct_programarea_in_ecoregion = round(
        area_km2 / sum(area_km2) * 100,
        2
      ),
      pct_programarea_in_ecoregion = na_if(
        pct_programarea_in_ecoregion,
        100.0
      )
    ) |>
    ungroup()

  er_pra <- er_pra |>
    select(
      region_key,
      region_name,
      ecoregion_key,
      ecoregion_name,
      programarea_key,
      programarea_name,
      pct_programarea_in_ecoregion,
      area_km2,
      ctr_lon,
      ctr_lat
    ) |>
    arrange(region_key, ecoregion_key, programarea_key) |>
    mutate(
      area_km2 = round(area_km2, 2),
      ctr_lon = round(ctr_lon, 4),
      ctr_lat = round(ctr_lat, 4)
    )

  er_pra |>
    st_drop_geometry() |>
    write_csv(er_pra_csv, na = "")

  write_sf(er_pra, er_pra_gpkg, delete_dsn = T)
}
Warning: attribute variables are assumed to be spatially constant throughout
all geometries
Code
er_pra <- read_sf(er_pra_gpkg)

8.2 Label placement points

Code
er_pts <- er |>
  st_drop_geometry() |>
  select(
    ecoregion_key,
    ecoregion_name,
    region_name,
    region_key,
    ctr_lon,
    ctr_lat
  ) |>
  st_as_sf(coords = c("ctr_lon", "ctr_lat"), crs = 4326)
pra_pts <- pra |>
  st_drop_geometry() |>
  select(
    programarea_key,
    programarea_name,
    region_name,
    region_key,
    ctr_lon,
    ctr_lat
  ) |>
  st_as_sf(coords = c("ctr_lon", "ctr_lat"), crs = 4326)

# keep file-exists guard — label placement is manually adjusted
if (!file.exists(lbls_csv)) {
  bind_rows(
    er_pts |> st_drop_geometry() |> select(ecoregion_key),
    pra_pts |> st_drop_geometry() |> select(programarea_key)
  ) |>
    arrange(ecoregion_key, programarea_key) |>
    mutate(
      text_anchor = ifelse(!is.na(programarea_key), "center", "top"),
      text_justify = "center",
      text_offset_right = 0,
      text_offset_down = 0
    ) |>
    write_csv(lbls_csv, na = "")
}

8.3 Score tables

Code
d_pra <- read_sf(pra_gpkg) |>
  st_drop_geometry()

# get field-to-label mapping from metric table (single source of truth)
d_metric_labels <- tbl(con_sdm, "metric") |>
  filter(!is.na(metric_title)) |>
  select(metric_key, metric_title, metric_abbrev) |>
  collect()

# fields present in program area data that have labels
flds <- intersect(names(d_pra), d_metric_labels$metric_key)

# named vector: metric_key -> short name for column renaming
flds_rn <- d_metric_labels |>
  filter(metric_key %in% flds) |>
  mutate(
    short = metric_key |>
      str_replace(
        "score_extriskspcat_primprod_ecoregionrescaled_equalweights",
        "score"
      ) |>
      str_replace("_ecoregion_rescaled", "") |>
      str_replace("extrisk_", "")
  ) |>
  pull(short, name = metric_key)

# ecoregion header rows with NA for all metric columns
na_cols <- setNames(
  rep(list(NA_real_), length(flds_rn) + 1),
  c("area_km2", unname(flds_rn))
)
d_er_n <- read_csv(er_pra_csv) |>
  distinct(
    region_key,
    region_name,
    ecoregion_key,
    ecoregion_name
  ) |>
  bind_cols(as_tibble(na_cols))

d_pra_n <- read_csv(er_pra_csv) |>
  select(
    region_name,
    ecoregion_name,
    programarea_key,
    programarea_name,
    pct_programarea_in_ecoregion
  ) |>
  left_join(
    d_pra |>
      select(all_of(c("programarea_key", "area_km2", flds))) |>
      rename_with(~ flds_rn[.x], all_of(flds)),
    by = "programarea_key"
  )

d_er_pra_n <- bind_rows(
  d_er_n |> mutate(row_type = "ecoregion"),
  d_pra_n |> mutate(row_type = "programarea")
) |>
  relocate(
    row_type,
    programarea_key,
    programarea_name,
    pct_programarea_in_ecoregion,
    .after = ecoregion_name
  ) |>
  arrange(region_name, ecoregion_name, row_type, programarea_name) |>
  mutate(
    pct = ifelse(
      !is.na(pct_programarea_in_ecoregion),
      glue(" [{round(pct_programarea_in_ecoregion, 1)}%]", .trim = F),
      ""
    ),
    row_lbl = case_match(
      row_type,
      "ecoregion" ~ glue("{region_name}: {ecoregion_name} ({ecoregion_key})"),
      "programarea" ~ glue("· {programarea_name} ({programarea_key}){pct}")
    )
  ) |>
  relocate(row_lbl)
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `row_lbl = case_match(...)`.
Caused by warning:
! `case_match()` was deprecated in dplyr 1.2.0.
ℹ Please use `recode_values()` instead.
Code
d_er_pra_n |>
  select(
    row_type,
    region_name,
    ecoregion_name,
    ecoregion_key,
    programarea_name,
    programarea_key,
    pct_programarea_in_ecoregion,
    row_lbl,
    score,
    primprod,
    any_of(sp_cats),
    area_km2
  ) |>
  write_excel_csv(er_pra_n_csv)

9 Summary figures and tables

Merged from msens-summary_programareas.qmd: maps, flower plots, primary productivity charts, and Word/Excel tables.

Code
dir_create(dir_figs)

# delete figs if redo requested
if (redo_summary_figs) {
  fig_files <- dir_ls(dir_figs)
  if (length(fig_files) > 0) {
    file_delete(fig_files)
  }
}

9.1 Program Area boundary maps

Code
d_lbls <- read_csv(lbls_csv)

var_pra <- "score_extriskspcat_primprod_ecoregionrescaled_equalweights"
n_cols <- 11
rng_pra <- pra |>
  pull({{ var_pra }}) |>
  range()
cols_pra <- rev(RColorBrewer::brewer.pal(n_cols, "Spectral"))
brks_pra <- seq(rng_pra[1], rng_pra[2], length.out = n_cols)

for (is_ak in c(T, F)) {
  if (is_ak) {
    # alaska
    er_area <- er |>
      filter(region_key == "AK") |>
      st_shift_longitude()
    pra_area <- pra |>
      filter(region_key == "AK") |>
      st_shift_longitude()

    er_pts_area <- er_pts |>
      filter(region_key == "AK") |>
      st_shift_longitude() |>
      left_join(
        d_lbls |> select(-programarea_key),
        by = "ecoregion_key"
      )
    pra_pts_area <- pra_pts |>
      filter(region_key == "AK") |>
      st_shift_longitude() |>
      left_join(
        d_lbls |> select(-ecoregion_key),
        by = "programarea_key"
      )

    lgnd_pos <- "top-left"
  } else {
    # lower 48
    box_lower48 <- pra |>
      filter(region_key != "AK") |>
      st_bbox() |>
      st_as_sfc()

    er_area <- er |>
      st_filter(box_lower48, .predicate = st_intersects)
    pra_area <- pra |>
      st_filter(box_lower48, .predicate = st_intersects)

    er_pts_area <- er_pts |>
      filter(ecoregion_key %in% er_area$ecoregion_key) |>
      left_join(
        d_lbls |> select(-programarea_key),
        by = "ecoregion_key"
      )
    pra_pts_area <- pra_pts |>
      filter(programarea_key %in% pra_area$programarea_key) |>
      left_join(
        d_lbls |> select(-ecoregion_key),
        by = "programarea_key"
      )

    lgnd_pos <- "bottom-left"
  }
  scale_pos <- "bottom-right"

  er_url <- glue(
    "https://api.marinesensitivity.org/tilejson?table=public.ply_ecoregions_2025{v_sfx}&use_cache=FALSE"
  )
  pra_url <- glue(
    "https://api.marinesensitivity.org/tilejson?table=public.ply_programareas_2026{v_sfx}&use_cache=FALSE"
  )
  er_filter <- c("in", "ecoregion_key", er_area$ecoregion_key)
  pra_filter <- c("in", "programarea_key", pra_area$programarea_key)

  m <- mapboxgl(
    style = mapbox_style("dark"),
    projection = "globe"
  ) |>
    fit_bounds(bbox = er_area) |>
    add_vector_source(
      id = "pra_src",
      url = pra_url
    ) |>
    add_vector_source(
      id = "er_src",
      url = er_url
    ) |>
    add_fill_layer(
      id = "pra_fill",
      source = "pra_src",
      source_layer = glue("public.ply_programareas_2026{v_sfx}"),
      filter = pra_filter,
      fill_color = mapgl::interpolate(
        column = var_pra,
        values = brks_pra,
        stops = cols_pra
      )
    ) |>
    add_line_layer(
      id = "er_ln",
      source = "er_src",
      source_layer = glue("public.ply_ecoregions_2025{v_sfx}"),
      filter = er_filter,
      line_color = "lightgray",
      line_opacity = 1,
      line_width = 5
    ) |>
    add_line_layer(
      id = "pra_ln",
      source = "pra_src",
      source_layer = glue("public.ply_programareas_2026{v_sfx}"),
      filter = pra_filter,
      line_color = "black",
      line_opacity = 1,
      line_width = 1
    ) |>
    add_symbol_layer(
      id = "pra_symbol",
      source = pra_pts_area,
      filter = pra_filter,
      text_field = get_column("programarea_key"),
      text_allow_overlap = T,
      text_anchor = get_column("text_anchor"),
      text_justify = get_column("text_justify"),
      text_offset = list(
        get_column("text_offset_right"),
        get_column("text_offset_down")
      )
    ) |>
    add_symbol_layer(
      id = "er_symbol",
      source = er_pts_area,
      filter = er_filter,
      text_field = get_column("ecoregion_name"),
      text_color = "black",
      text_allow_overlap = T,
      text_anchor = get_column("text_anchor"),
      text_justify = get_column("text_justify"),
      text_offset = list(
        get_column("text_offset_right"),
        get_column("text_offset_down")
      ),
      text_halo_color = "lightgray",
      text_halo_width = 2,
      text_halo_blur = 2,
      text_line_height = 1.5,
      text_size = 20
    ) |>
    mapgl::add_legend(
      "Score",
      values = rng_pra,
      colors = cols_pra,
      position = lgnd_pos
    ) |>
    add_scale_control(position = scale_pos)

  b <- glue("{dir_figs}/map_pra_{ifelse(is_ak, 'ak', 'l48')}{v_sfx}")
  b_html <- glue("{b}.html")
  b_files <- glue("{b}_files")
  b_png <- glue("{b}.png")
  saveWidget(m, b_html, selfcontained = F, background = "transparent")
  webshot(
    b_html,
    b_png,
    vwidth = 1200,
    vheight = 800,
    zoom = 2,
    selector = "#htmlwidget_container",
    delay = 2,
    quiet = T
  )
  file_delete(b_html)
  dir_delete(b_files)
}

9.2 Cell score & NPP maps

Code
vgpm_tif <- glue(
  "{dir_data}/raw/oregonstate.edu/vgpm.r2022.v.chl.v.sst.2160x4320_2014-2023.avg.sd_planareas.tif"
)

r_metrics <- rast(metrics_tif)
r_vgpm <- rast(vgpm_tif)
d_lbls <- read_csv(lbls_csv)

lyr_score <- "score_extriskspcat_primprod_ecoregionrescaled_equalweights"
lyr_npp <- "npp_avg"

# mask to program areas to exclude Atlantic coast
ply_mask <- vect(pra |> st_shift_longitude())
r_score <- r_metrics[[lyr_score]] |> mask(ply_mask)
r_npp <- r_vgpm |> log() |> mask(ply_mask)

n_cols <- 11
rng_r_score <- range(values(r_score, na.rm = T))
cols_r_score <- rev(RColorBrewer::brewer.pal(n_cols, "Spectral"))
brks_r_score <- seq(rng_r_score[1], rng_r_score[2], length.out = n_cols)

rng_r_npp <- range(values(r_npp, na.rm = T))
cols_r_npp <- pal_viridis()(n_cols)
brks_r_npp <- seq(rng_r_npp[1], rng_r_npp[2], length.out = n_cols)

for (is_vgpm in c(T, F)) {
  # npp: is_vgpm = T # score: is_vgpm = F
  if (is_vgpm) {
    r <- r_npp
    rng_r <- round(rng_r_npp, 2)
    cols_r <- cols_r_npp
    brks_r <- brks_r_npp
    lyr <- lyr_npp
    title <- "Primary Productivity (log(NPP))"
  } else {
    r <- r_score
    rng_r <- rng_r_score
    cols_r <- cols_r_score
    brks_r <- brks_r_score
    lyr <- lyr_score
    title <- "Score"
  }

  for (is_ak in c(T, F)) {
    # is_ak = T
    if (is_ak) {
      rgn_mask <- pra |>
        filter(region_key == "AK") |>
        st_shift_longitude() |>
        vect()

      r_area <- r |>
        mask(rgn_mask) |>
        trim()

      er_area <- er |>
        filter(region_key == "AK") |>
        st_shift_longitude()

      pra_area <- pra |>
        filter(region_key == "AK") |>
        st_shift_longitude()

      er_pts_area <- er_pts |>
        filter(region_key == "AK") |>
        st_shift_longitude() |>
        left_join(
          d_lbls |> select(-programarea_key),
          by = "ecoregion_key"
        )
      pra_pts_area <- pra_pts |>
        filter(region_key == "AK") |>
        st_shift_longitude() |>
        left_join(
          d_lbls |> select(-ecoregion_key),
          by = "programarea_key"
        )

      lgnd_pos <- "top-left"
    } else {
      rgn_mask <- pra |>
        filter(region_key != "AK") |>
        st_shift_longitude() |>
        vect()

      r_area <- r |>
        mask(rgn_mask) |>
        trim()

      box_lower48 <- pra |>
        filter(region_key != "AK") |>
        st_bbox() |>
        st_as_sfc()

      er_area <- er |>
        st_filter(box_lower48, .predicate = st_intersects)
      pra_area <- pra |>
        st_filter(box_lower48, .predicate = st_intersects)

      er_pts_area <- er_pts |>
        filter(ecoregion_key %in% er_area$ecoregion_key) |>
        left_join(
          d_lbls |> select(-programarea_key),
          by = "ecoregion_key"
        )
      pra_pts_area <- pra_pts |>
        filter(programarea_key %in% pra_area$programarea_key) |>
        left_join(
          d_lbls |> select(-ecoregion_key),
          by = "programarea_key"
        )

      lgnd_pos <- "bottom-left"
    }
    scale_pos <- "bottom-right"

    er_url <- glue(
      "https://api.marinesensitivity.org/tilejson?table=public.ply_ecoregions_2025{v_sfx}&use_cache=FALSE"
    )
    pra_url <- glue(
      "https://api.marinesensitivity.org/tilejson?table=public.ply_programareas_2026{v_sfx}&use_cache=FALSE"
    )
    er_filter <- c("in", "ecoregion_key", er_area$ecoregion_key)
    pra_filter <- c("in", "programarea_key", pra_area$programarea_key)

    m <- mapboxgl(
      style = mapbox_style("dark"),
      projection = "globe"
    ) |>
      fit_bounds(bbox = er_area) |>
      add_image_source(
        id = "r_src",
        data = r_area,
        colors = cols_r
      ) |>
      add_vector_source(
        id = "pra_src",
        url = pra_url
      ) |>
      add_vector_source(
        id = "er_src",
        url = er_url
      ) |>
      add_raster_layer(
        id = "r_lyr",
        source = "r_src",
        raster_opacity = 0.9,
        raster_resampling = "nearest"
      ) |>
      add_line_layer(
        id = "er_ln",
        source = "er_src",
        source_layer = glue("public.ply_ecoregions_2025{v_sfx}"),
        filter = er_filter,
        line_color = "lightgray",
        line_opacity = 1,
        line_width = 5
      ) |>
      add_line_layer(
        id = "pra_ln",
        source = "pra_src",
        source_layer = glue("public.ply_programareas_2026{v_sfx}"),
        filter = pra_filter,
        line_color = "black",
        line_opacity = 1,
        line_width = 1
      ) |>
      add_symbol_layer(
        id = "pra_symbol",
        source = pra_pts_area,
        filter = pra_filter,
        text_field = get_column("programarea_key"),
        text_allow_overlap = T,
        text_anchor = get_column("text_anchor"),
        text_justify = get_column("text_justify"),
        text_offset = list(
          get_column("text_offset_right"),
          get_column("text_offset_down")
        )
      ) |>
      add_symbol_layer(
        id = "er_symbol",
        source = er_pts_area,
        filter = er_filter,
        text_field = get_column("ecoregion_name"),
        text_color = "black",
        text_allow_overlap = T,
        text_anchor = get_column("text_anchor"),
        text_justify = get_column("text_justify"),
        text_offset = list(
          get_column("text_offset_right"),
          get_column("text_offset_down")
        ),
        text_halo_color = "lightgray",
        text_halo_width = 2,
        text_halo_blur = 2,
        text_line_height = 1.5,
        text_size = 20
      ) |>
      mapgl::add_legend(
        title,
        values = rng_r,
        colors = cols_r,
        position = lgnd_pos
      ) |>
      add_scale_control(position = scale_pos)

    b <- glue(
      "{dir_figs}/map_cell_{ifelse(is_vgpm, 'npp', 'score')}_{ifelse(is_ak, 'ak', 'l48')}{v_sfx}"
    )
    b_html <- glue("{b}.html")
    b_files <- glue("{b}_files")
    b_png <- glue("{b}.png")
    saveWidget(m, b_html, selfcontained = F, background = "transparent")
    webshot(
      b_html,
      b_png,
      vwidth = 1200,
      vheight = 800,
      zoom = 2,
      selector = "#htmlwidget_container",
      delay = 2,
      quiet = T
    )
    file_delete(b_html)
    dir_delete(b_files)
  }
}

9.3 Ecoregion × Program Area table (docx)

Code
tbl_docx <- glue("{dir_figs}/table_ecoregion_programarea_scores{v_sfx}.docx")

set_flextable_defaults(
  font.family = "Arial",
  font.size = 10,
  border.color = "gray",
  big.mark = ","
)

# build rename vector from metric table: metric_abbrev -> short_name
abbrev_rn <- d_metric_labels |>
  filter(metric_key %in% flds) |>
  mutate(short = flds_rn[metric_key]) |>
  filter(short != "score") |>
  pull(short, name = metric_abbrev)

d_er_pra_n |>
  mutate(
    area_kkm2 = area_km2 / 1000
  ) |>
  mutate(across(where(is.numeric), round)) |>
  mutate(
    row_lbl = ifelse(
      row_type == "ecoregion",
      glue("**{row_lbl}**"),
      row_lbl
    )
  ) |>
  select(
    Area = row_lbl,
    Score = score,
    all_of(abbrev_rn),
    `Area (1,000 km^2^)` = area_kkm2
  ) |>
  flextable() |>
  ftExtra::colformat_md(part = "all") |>
  theme_vanilla() |>
  save_as_docx(path = tbl_docx)

# browseURL(tbl_docx)

9.4 Program Area table (docx + xlsx)

Code
tbl_docx <- glue("{dir_figs}/table_programarea_scores{v_sfx}.docx")
tbl_xlsx <- glue("{dir_figs}/table_programarea_scores{v_sfx}.xlsx")

set_flextable_defaults(
  font.family = "Arial",
  font.size = 10,
  border.color = "gray",
  big.mark = ","
)

# build rename vector: metric_title -> short_name
title_rn <- d_metric_labels |>
  filter(metric_key %in% flds) |>
  mutate(short = flds_rn[metric_key]) |>
  filter(short != "score") |>
  pull(short, name = metric_title)

d_pra_only <- d_er_pra_n |>
  filter(row_type == "programarea") |>
  arrange(desc(score)) |>
  select(
    -row_type,
    -ecoregion_name,
    -ecoregion_key,
    -pct_programarea_in_ecoregion
  ) |>
  mutate(
    area_kkm2 = area_km2 / 1000
  ) |>
  mutate(across(where(is.numeric), round)) |>
  mutate(
    row_lbl = glue("{programarea_name} ({programarea_key})")
  ) |>
  select(
    row_lbl,
    score,
    all_of(unname(title_rn)),
    area_kkm2
  ) |>
  distinct() |>
  select(
    `Program Area` = row_lbl,
    Score = score,
    all_of(title_rn),
    `Area (1,000 km^2^)` = area_kkm2
  )

write_excel_csv(d_pra_only, pra_n_csv, na = "")

d_pra_only |>
  flextable() |>
  ftExtra::colformat_md(part = "all") |>
  theme_vanilla() |>
  save_as_docx(path = tbl_docx)

# xlsx version (plain column names without superscript)
d_pra_only |>
  rename(`Area (1,000 km2)` = `Area (1,000 km^2^)`) |>
  openxlsx::write.xlsx(tbl_xlsx)

# browseURL(tbl_docx)

9.5 Program Area flower plots

Code
# pra list from PostGIS
pra_all <- tbl(con, glue("ply_programareas_2026{v_sfx}")) |>
  select(programarea_key, programarea_name) |>
  collect()

# components
metric_input_keys <- c(
  glue("extrisk_{sp_cats}_ecoregion_rescaled"),
  "primprod_ecoregion_rescaled"
)
m_input_seqs <- tbl(con_sdm, "metric") |>
  filter(metric_key %in% metric_input_keys) |>
  pull(metric_seq)

# build components from metric table
components <- d_metric_labels |>
  filter(metric_key %in% flds, metric_title != "Score") |>
  pull(metric_title)
component_cols <- setNames(
  hue_pal()(length(components)),
  components
)

# d_scores
pra_tbl <- glue("ply_programareas_2026{v_sfx}")
m_score <- "score_extriskspcat_primprod_ecoregionrescaled_equalweights"
m_key_score <- tbl(con_sdm, "metric") |>
  filter(metric_key == !!m_score) |>
  pull(metric_seq)

d_scores <- tbl(con_sdm, "zone") |>
  filter(
    tbl == !!pra_tbl,
    fld == "programarea_key",
    value %in% pra_all$programarea_key
  ) |>
  select(programarea_key = value, zone_seq) |>
  inner_join(
    tbl(con_sdm, "zone_metric") |>
      filter(metric_seq == !!m_key_score),
    by = join_by(zone_seq)
  ) |>
  select(programarea_key, score = value) |>
  mutate(score = round(score)) |>
  collect() |>
  arrange(desc(score), programarea_key)

d_scores$programarea_lbl <- factor(
  d_scores$programarea_key,
  levels = d_scores$programarea_key,
  ordered = T
)

# d_fl
d_fl <- tbl(con_sdm, "zone") |>
  filter(
    tbl == !!pra_tbl,
    fld == "programarea_key",
    value %in% pra_all$programarea_key
  ) |>
  select(programarea_key = value, zone_seq) |>
  inner_join(
    tbl(con_sdm, "zone_metric"),
    by = "zone_seq"
  ) |>
  inner_join(
    tbl(con_sdm, "metric") |>
      filter(metric_seq %in% m_input_seqs) |>
      select(metric_seq, metric_key),
    by = "metric_seq"
  ) |>
  select(programarea_key, metric_key, value) |>
  arrange(programarea_key, metric_key) |>
  collect() |>
  mutate(
    component = metric_key |>
      str_replace("extrisk_", "") |>
      str_replace("_ecoregion_rescaled", "") |>
      str_replace("_", " ")
  ) |>
  select(programarea_key, component, value) |>
  arrange(programarea_key, component, value)

# add missing components with value 0
d_0 <- expand_grid(
  programarea_key = d_scores$programarea_key,
  component = sort(unique(d_fl$component)),
  value = 0
) |>
  anti_join(
    d_fl,
    by = join_by(programarea_key, component)
  )

d_fl <- d_fl |>
  bind_rows(d_0) |>
  arrange(programarea_key, component) |>
  mutate(even = 1)

# build component display name lookup from metric table
component_labels <- d_metric_labels |>
  filter(metric_key %in% flds, metric_title != "Score") |>
  mutate(
    component = metric_key |>
      str_replace("extrisk_", "") |>
      str_replace("_ecoregion_rescaled", "") |>
      str_replace("_", " ")
  ) |>
  pull(metric_title, name = component)

d_fl <- d_fl |>
  mutate(
    programarea_lbl = factor(
      programarea_key,
      levels = d_scores$programarea_key,
      ordered = T
    ),
    component = component_labels[component]
  ) |>
  arrange(programarea_lbl, component)

# all flowers
d <- d_fl |>
  arrange(programarea_lbl, component) |>
  group_by(programarea_lbl) |>
  mutate(across(!where(is.character), as.double)) |>
  mutate(
    ymax = cumsum(even),
    ymin = lag(ymax, default = 0),
    xmax = value,
    xmin = 0
  )

for (theme_name in c("minimal", "linedraw", "min_line", "dark", "void")) {
  theme_fxn <- switch(
    theme_name,
    "minimal" = theme_minimal,
    "linedraw" = theme_linedraw,
    "min_line" = function(...) {
      theme_minimal(...) %+replace%
        theme(
          panel.grid = element_line(colour = "black"),
          panel.grid.major = element_line(linewidth = rel(0.1)),
          panel.grid.minor = element_line(linewidth = rel(0.05))
        )
    },
    "dark" = theme_dark,
    "void" = theme_void
  )

  g <- d |>
    ggplot() +
    geom_rect_interactive(
      aes(
        xmin = xmin,
        xmax = xmax,
        ymin = ymin,
        ymax = ymax,
        fill = component,
        color = "white",
        data_id = component
      ),
      color = "white",
      alpha = 1
    ) +
    scale_fill_manual(
      values = component_cols,
      name = "Component"
    ) +
    facet_wrap(~programarea_lbl) +
    coord_polar(theta = "y") +
    xlim(c(-20, max(d_scores$score, d$xmax))) +
    annotate(
      "rect",
      xmin = -20,
      xmax = 0,
      ymin = 0,
      ymax = max(d$ymax),
      fill = "white",
      color = NA
    ) +
    geom_text(
      aes(x, y, label = score),
      data = d_scores |> mutate(x = -20, y = 0),
      size = 1.5,
      fontface = "bold",
      color = ifelse(theme_name == "dark", "white", "black")
    ) +
    theme_fxn() +
    theme(
      legend.position = "bottom",
      legend.title = element_text(size = 8),
      legend.text = element_text(size = 6),
      plot.margin = unit(c(1, 1, 1, 1), "pt"),
      strip.text.x = element_text(size = 6),
      axis.title.x = element_blank(),
      axis.text.x = element_blank(),
      axis.ticks.x = element_blank(),
      axis.title.y = element_blank(),
      axis.text.y = element_blank(),
      axis.ticks.y = element_blank()
    )

  pra_fl_png <- glue(
    "{dir_figs}/programarea_flower-plot_scores_{theme_name}{v_sfx}.png"
  )
  pra_fl_pdf <- glue(
    "{dir_figs}/programarea_flower-plot_scores_{theme_name}{v_sfx}.pdf"
  )
  png(pra_fl_png, width = 1200, height = 800, res = 150)
  print(g)
  dev.off()
  pdf(pra_fl_pdf, width = 7, height = 5)
  print(g)
  dev.off()
}

9.6 Primary productivity by Program Area

Code
metric_pp_keys <- glue("primprod_{c('avg', 'stddev')}")

pra_tbl <- glue("ply_programareas_2026{v_sfx}")
d_pp_raw <- tbl(con_sdm, "zone") |>
  filter(
    tbl == !!pra_tbl,
    fld == "programarea_key"
  ) |>
  select(programarea_key = value, zone_seq) |>
  left_join(
    tbl(con_sdm, "zone_metric"),
    by = "zone_seq"
  ) |>
  left_join(
    tbl(con_sdm, "metric") |>
      filter(metric_key %in% metric_pp_keys),
    by = "metric_seq"
  ) |>
  filter(
    !is.na(programarea_key),
    !is.na(metric_key),
    !is.na(value)
  ) |>
  select(programarea_key, metric_key, value) |>
  arrange(programarea_key, metric_key) |>
  collect()

if (nrow(d_pp_raw) > 0) {
  d_pp <- d_pp_raw |>
    left_join(
      tbl(con, glue("ply_programareas_2026{v_sfx}")) |>
        select(programarea_key, programarea_name) |>
        collect(),
      by = "programarea_key"
    ) |>
    filter(!is.na(programarea_name)) |>
    tidyr::pivot_wider(
      names_from = metric_key,
      values_from = value
    ) |>
    rename(
      avg = primprod_avg,
      sd = primprod_stddev
    ) |>
    mutate(
      across(
        where(is.numeric),
        ~ set_units(.x, mg / m^2 / day) |>
          set_units(t / km^2 / yr)
      )
    )

  pp_csv <- glue("{dir_figs}/programarea_primprod{v_sfx}.csv")
  d_pp |>
    mutate(across(where(is.numeric), drop_units)) |>
    write_csv(pp_csv)

  g <- d_pp |>
    arrange(desc(avg)) |>
    mutate(across(where(is.numeric), drop_units)) |>
    mutate(
      programarea_name = factor(programarea_name, levels = programarea_name)
    ) |>
    ggplot(aes(x = programarea_name, y = avg)) +
    geom_bar(
      position = position_dodge(),
      stat = "identity",
      fill = "darkgreen"
    ) +
    geom_errorbar(aes(ymin = avg - sd, ymax = avg + sd), width = .4) +
    labs(
      title = NULL,
      x = NULL,
      y = expression("Areal NPP (metric tons C km"^-2 ~ "yr"^-1 * ")")
    ) +
    scale_y_continuous(expand = c(0, 0)) +
    theme(
      axis.text.x = element_text(
        angle = 45,
        vjust = 1,
        hjust = 1
      )
    )

  pp_png <- glue("{dir_figs}/programarea_primprod{v_sfx}.png")
  pp_pdf <- glue("{dir_figs}/programarea_primprod{v_sfx}.pdf")
  png(pp_png, width = 1200, height = 800, res = 150)
  print(g)
  dev.off()
  pdf(pp_pdf, width = 7, height = 5)
  print(g)
  dev.off()
} else {
  message(
    "No primprod_avg/primprod_stddev zone_metric data found; skipping primprod plot."
  )
}
No primprod_avg/primprod_stddev zone_metric data found; skipping primprod plot.

9.7 Copy figures to dir_v

Code
# data files are already written directly to dir_v;
# copy rendered figures from local figs/ into dir_v
fig_files <- dir_ls(dir_figs, regexp = "\\.(png|pdf|docx|xlsx|csv)$")
for (f in fig_files) {
  file_copy(f, glue("{dir_v}/{path_file(f)}"), overwrite = T)
}

message(glue("Copied {length(fig_files)} figure files to {dir_v}"))
Copied 19 figure files to ~/My Drive/projects/msens/data/derived/v3

10 Generate README

Programmatically generate README.md and README.html for the versioned output directory. Uses data/derived_products.csv for file descriptions.

Code
dp_csv <- here("data/derived_products.csv")
stopifnot(file_exists(dp_csv))

d_dp <- read_csv(dp_csv)

# list actual files in dir_v (non-recursive, exclude README itself)
v_files <- dir_ls(dir_v, recurse = F) |>
  path_file() |>
  str_subset("^README", negate = T) |>
  # exclude ~$ temp files from Word/Excel
  str_subset("^~\\$", negate = T)

# match files to descriptions
d_files <- tibble(file = v_files) |>
  left_join(
    d_dp |> select(file = output_file, description),
    by = "file"
  ) |>
  mutate(
    description = ifelse(is.na(description), "", description)
  ) |>
  arrange(file)

lns_files <- d_files |>
  knitr::kable(format = "pipe") |>
  paste(collapse = '\n')

# build README.md content
readme_lines <- c(
  glue(
    "
    ---
    format:
      html:
        embed-resources: true
    ---
    
    # Derived Products — {ver}
     
    Version: `{v_sfx}` | Generated: {Sys.Date()}
    
    ## File listing

    {lns_files}

    ## Naming conventions

    - `ply_*`: polygon spatial data (GeoPackage)
    - `r_*`: raster data (GeoTIFF)
    - `tbl_*`: tabular summary data (CSV) 
    - `layers_*`: layer metadata for metrics and species
    - `zone_taxon_*`: zone × taxon matrix
    - `*_v3`: version 3 model outputs
    
    ## Large files
    
    Some files exceed GitHub size limits and are stored in the 
    `big-files/{ver}/` directory (not tracked by git). 
    See `sdm_parquet/` for parquet exports.
    
    _Auto-generated by `calc_scores.qmd` on {Sys.time()}_"
  )
)

readme_md <- glue("{dir_v}/README.md")
writeLines(readme_lines, readme_md)
message(glue("Wrote {readme_md}"))

# render to HTML
quarto_render(readme_md |> path_real())
message(glue("Rendered {readme_md} to HTML"))

11 Map score cells

Code
librarian::shelf(mapgl)

# load global raster template
# r_global <- readRDS(glue("{dir_data}/derived/r_global_0.05.rds"))
cell_tif <- glue("{dir_data}/derived/r_bio-oracle_planarea.tif")

r_cell <- rast(cell_tif)

# d_metric <- tbl(con_sdm, "metric") |> collect()
# View(d_metric)
# TODO: add sequences for date_created post-export-parquet

# get species richness metric ID
# m_key <- "extrisk_all_ecoregion_rescaled"
m_key <- "score_extriskspcat_primprod_ecoregionrescaled_equalweights"
m_seq <- tbl(con_sdm, "metric") |>
  filter(metric_key == !!m_key) |>
  pull(metric_seq)

# query species richness for a region
d_m <- dbGetQuery(
  con_sdm,
  glue(
    "
  SELECT
    cm.cell_id,
    cm.value
  FROM cell_metric cm
  WHERE cm.metric_seq = (
    SELECT metric_seq
    FROM metric
    WHERE metric_key = '{m_key}' )"
  )
) |>
  tibble()
stopifnot(sum(duplicated(d_m$cell_id)) == 0)

# convert to raster
r_m <- create_raster_from_df(d_m, r_cell, value_col = "value")

# display with mapgl
# TODO: make into function in msens R package
mapgl_rast <- function(
  r,
  cols_r = rev(RColorBrewer::brewer.pal(11, "Spectral")),
  layer_name = "SDM"
) {
  # r = r_m; cols_r = cols_r; layer_name = m_key

  rng_r <- minmax(r) |> as.numeric() |> signif(digits = 2)

  mapboxgl(
    style = mapbox_style("dark"),
    zoom = 3.5,
    center = c(-106, 40.1)
  ) |> # ply_pa centroid
    add_image_source(
      id = "r_src",
      data = r,
      colors = cols_r
    ) |>
    add_raster_layer(
      id = "r_lyr",
      source = "r_src",
      raster_resampling = "nearest"
    ) |>
    mapgl::add_legend(
      layer_name,
      values = rng_r,
      colors = cols_r,
      position = "bottom-right"
    ) |>
    add_fullscreen_control(position = "top-left") |>
    add_navigation_control() |>
    add_scale_control()
}

# visualize
mapgl_rast(r_m, layer_name = m_key)

12 Parquet export/import

This cleans up the DuckDB database by exporting tables to Parquet files (with sort order for faster reads) and then re-importing them to a new DuckDB database. This also allows us to remove indexes from the DuckDB database, which are not needed for the mapgl app and can slow down writes.

12.1 Export to Parquet

Code
# TODO: model_cell: PARTITION BY (mdl_seq);") # https://duckdb.org/docs/stable/data/parquet/overview

source(here("libs/sdm_functions.R"))
con_sdm <- dbConnect(duckdb(dbdir = sdm_db, read_only = F)) |>
  load_duckdb_extensions()

# dbListTables(con_sdm)
dbExecute(con_sdm, "SET threads = 7;")
dbExecute(con_sdm, "SET memory_limit = '20GB';")

dir_pq <- normalizePath(dir_pq)
if (dir_exists(dir_pq)) {
  dir_delete(dir_pq)
}
dir_create(dir_pq)

# OLD: add primary keys, indexes
# NEW: skip indexes, sort on export, per [performance](https://duckdb.org/docs/stable/guides/performance/indexing.html)

# dbRemoveTable(con_sdm, "cell_model", force = T)
tbl_keys <- list(
  cell = "cell_id",
  cell_metric = c("cell_id", "metric_seq"),
  # cell_model  = c("cell_id", "mdl_seq"),   # source: model_cell; alternative key sorting order
  dataset = "ds_key",
  listing = "spp_id",
  metric = "metric_seq",
  model = "mdl_seq",
  model_cell = c("mdl_seq", "cell_id"), # *big* one
  species = "sp_seq",
  taxon = "taxon_id",
  taxon_model = c("taxon_id", "ds_key"),
  zone = "zone_seq",
  zone_cell = c("zone_seq", "cell_id"),
  zone_metric = c("zone_seq", "metric_seq"),
  zone_taxon = c("zone_fld", "zone_value", "mdl_seq", "taxon_id")
)
stopifnot(length(setdiff(names(tbl_keys), dbListTables(con_sdm))) == 0)
stopifnot(length(setdiff(dbListTables(con_sdm), names(tbl_keys))) == 0)

# export parquet files with sort order
for (tbl in names(tbl_keys)) {
  # tbl = names(tbl_keys)[1]  # tbl = "cell_model"
  keys <- tbl_keys[[tbl]]
  keys_str <- paste(keys, collapse = ', ')
  pq_file <- glue("{dir_pq}/{tbl}.parquet")

  message(glue(
    "Exporting {tbl} (ORDER BY {keys_str}) to parquet  ~  {Sys.time()}"
  ))

  fro_tbl <- tbl
  if (tbl == "cell_model") {
    fro_tbl <- "model_cell"
  }
  stopifnot(all(keys %in% dbListFields(con_sdm, fro_tbl)))

  if (tbl %in% c("cell_model", "model_cell")) {
    flds <- dbListFields(con_sdm, fro_tbl) |>
      setdiff(keys)
    flds_str <- paste(flds, collapse = '\", \"')
    sql <- glue(
      "COPY (
      SELECT {keys_str}, {flds_str} FROM {fro_tbl} 
      ORDER BY {keys_str}) 
      TO '{pq_file}' 
    (FORMAT parquet, PER_THREAD_OUTPUT, COMPRESSION zstd) "
    )
  } else {
    sql <- glue(
      "COPY (
      SELECT * FROM {fro_tbl} 
      ORDER BY {keys_str}) 
      TO '{pq_file}' 
    (FORMAT parquet, PER_THREAD_OUTPUT, COMPRESSION zstd) "
    )
  }
  message(sql)
  dbExecute(con_sdm, sql)
}
message(glue("Exporting finished  ~  {Sys.time()}")) # ~ 1 min

12.2 Import from Parquet

Code
# dir_pq and sdm_db defined in setup chunk

if (exists("con_sdm")) {
  dbDisconnect(con_sdm, shutdown = T)
  rm(con_sdm)
}
duckdb_shutdown(duckdb())

if (file_exists(sdm_db)) {
  file_delete(sdm_db)
}

source(here("libs/sdm_functions.R"))
con_sdm <- dbConnect(duckdb(dbdir = sdm_db, read_only = F)) |>
  load_duckdb_extensions()

# dbExecute(con_sdm, "SET threads = 8;")
# dbExecute(con_sdm, "SET memory_limit = '20GB';")

# import parquet ----
for (pq in dir_ls(dir_pq, glob = "*.parquet")) {
  # pq = dir_ls(dir_pq, glob = "*.parquet")[2]
  tbl <- path_file(pq) |> str_remove("\\.parquet$")
  if (!dbExistsTable(con_sdm, tbl)) {
    message(glue("Importing {tbl} from {basename(pq)}/*.parquet"))
    dbExecute(
      con_sdm,
      glue("CREATE TABLE {tbl} AS SELECT * FROM '{pq}/*.parquet'")
    )
  }
}

# create index on 2nd column of multi-column index from parquet export
# tbl_idx <- list(
#   cell_metric = "metric_seq",
#   model_cell  = "mdl_seq",
#   zone_cell   = "cell_id",
#   zone_metric = "metric_seq")
# for (tbl in names(tbl_idx)){      # tbl = "model_cell"
#   fld <- tbl_idx[[tbl]]
#   idx <- glue("idx_{tbl}_{fld}")  # idx_model_cell_mdl_seq
#   stopifnot(fld %in% dbListFields(con_sdm, tbl))
#
#   sql <- glue("CREATE INDEX IF NOT EXISTS {idx} ON {tbl} ({fld});")
#   message(sql)
#   dbExecute(con_sdm, sql)
# }

# sequences ----
# for auto-incrementing primary keys
tbl_seq <- list(
  metric = "metric_seq",
  model = "mdl_seq",
  species = "sp_seq",
  zone = "zone_seq"
)
stopifnot(all(names(tbl_seq) %in% dbListTables(con_sdm)))

for (tbl in names(tbl_seq)) {
  # tbl = "metric"
  fld <- tbl_seq[[tbl]]
  id <- glue("seq_{tbl}")

  nxt <- dbGetQuery(con_sdm, glue("SELECT MAX({fld}) FROM {tbl}")) |>
    pull(1) +
    1

  sql <- glue("CREATE SEQUENCE {id} START {nxt}")
  message(sql)
  dbExecute(con_sdm, sql)
  sql <- glue(
    "ALTER TABLE {tbl} ALTER COLUMN {fld} SET DEFAULT nextval('{id}')"
  )
  message(sql)
  dbExecute(con_sdm, sql)
}

# date_created ----
# default to current_date
for (tbl in dbListTables(con_sdm)) {
  # tbl = "dataset"
  fld <- dbListFields(con_sdm, tbl) |>
    str_subset("date_created")
  if (length(fld) == 1) {
    sql <- glue(
      "ALTER TABLE {tbl} ALTER COLUMN date_created SET DEFAULT CURRENT_DATE;"
    )
    message(sql)
    a <- dbExecute(con_sdm, sql)
  }
}

dbDisconnect(con_sdm, shutdown = T)

13 Deploy to server

Sync derived data and app code to the msens server.

Code
deploy_sh <- here("libs/deploy_to_server.sh")
stopifnot(file_exists(deploy_sh))
system2("bash", args = c(deploy_sh, ver), wait = T)

14 Close Connection

Code
dbDisconnect(con_sdm, shutdown = T)