---
title: "Calculate Scores"
subtitle: "Calculate scores from models loaded into sdm db and taxon table"
format:
html:
code-fold: true
code-tools: true
editor_options:
chunk_output_type: console
---
## Overview
This notebook calculates scores from models loaded into sdm db and taxon table, including:
- Extinction risk metrics by class (eg `extrisk_bird`, `extrisk_fish`, etc)
- Other metrics TBD
**Source**:
- Copied this notebook from [ingest_aquamaps_to_sdm_duckdb.qmd](https://github.com/MarineSensitivity/workflows/blob/3880f9edf3a204d8ec9768e7be1a1aa51f70d9f0/ingest_aquamaps_to_sdm_duckdb.qmd) to work up only calculation component from models already loaded
**Process**:
- Before:
1. `ingest_taxon.qmd`
- Added `con_spp.botw` (Birds of the World) as another taxonomic authority with [ingest_taxon.qmd](https://github.com/MarineSensitivity/workflows/blob/3880f9edf3a204d8ec9768e7be1a1aa51f70d9f0/ingest_taxon.qmd)
1.`merge_models.qmd`
- Built `con_sdm.taxon` table as authoritative unique taxa reference from multiple entries in `species` with [merge_models.qmd](https://github.com/MarineSensitivity/workflows/blob/3880f9edf3a204d8ec9768e7be1a1aa51f70d9f0/merge_models.qmd)
- After:
1. Summary data/figs/README sections are now integrated into this notebook (merged from `msens-summary_programareas.qmd`)
To render this notebook on Ben's laptop:
```bash
cd ~/Github/MarineSensitivity/workflows
quarto render calc_scores.qmd
```
## TODO
- [x] fix "reptile" in flower plots (addressed by merge_models.qmd reclassification)
- [ ] Add functions here and in `libs/sdm_functions.R` to `msens` R package
- [ ] All R chunks under `## Add Zones` should be re-evaluated with to check existence and creation with the version suffix (`{v_sfx} = "_v3"`), eg `ply_programareas_2026_v3`, `ply_subregions_2026_v3`, etc. For now evaluation is turned off (`eval = F`). - [ ] Delete orphans, like zone_cell without zone, zone_metric without zone & metric,... Reference [DuckDB Raster Data Metric Sequence Insertion - Claude](https://claude.ai/chat/748f00aa-d2f6-46a5-91d9-fb56b27e3ebe)
- [ ] Add relationships and primary keys to sdm db tables (eg taxon_id in taxon, ds_seq in species, etc) for data integrity, and schema viz (ERD) for query understanding
- [ ] Show tabular results of R chunks before / after execution with `DT::datatable()`
- [ ] Migrate from PostGIS & pg_tileserv to [creating PMTiles](https://docs.protomaps.com/pmtiles/create) and map with `mapgl::add_pmtiles_source()`
- [ ] Redo parquet export into dir outside Google Drive (sync issues)
- include more complex models:
- unique by: ds_key, taxa_id, time_period
- pkey: mdl_seq
- [ ] ∆ taxa -> taxa_id (unique worms_id, botw_id)
- [ ] ∆ time_period -> time_type == `2019` (AquaMaps) -> "climatic annual"; need "climatic monthly" for GoMMAPS, plus annualized version
- [ ] ∆ time_period == "month:January"? ISO?
- [ ] + year_published: `2019` (AquaMaps)
- [ ] ∆ ds_key -> ds_seq; eg "am_0.05" -> 1; allows for later changing name, year version, etc
- [ ] rename table `species` -> `dataset_taxon`
- [ ] `taxon`: all unique taxa cross-listed with identifiers
```{r}
#| label: setup
librarian::shelf(
DBI,
dbplyr,
dplyr,
duckdb,
DT,
flextable,
fs,
ftExtra,
ggiraph,
ggplot2,
glue,
gt,
here,
htmlwidgets,
kableExtra,
knitr,
leaflet,
logger,
mapview,
openxlsx,
purrr,
quarto,
RColorBrewer,
readr,
scales,
sf,
shiny,
stringr,
terra,
tibble,
tidyr,
units,
webshot2,
quiet = T
)
options(readr.show_col_types = F)
stopifnot(packageVersion("terra") >= "1.8.60")
# [rotate() issue](https://github.com/rspatial/terra/issues/1876)
source(here("libs/sdm_functions.R"))
source(here("libs/paths.R"))
dir_create(dir_v)
dir_create(dir_big_v)
# inputs (shared, unversioned)
pra_raw_gpkg <- glue("{dir_raw}/boem.gov/boem_program_areas_2025-11.gpkg")
cell_tif <- glue("{dir_derived}/r_bio-oracle_planarea.tif")
er_raw_gpkg <- glue("{dir_v}/../v2/ply_ecoregions_2025.gpkg")
stopifnot(all(file_exists(c(cell_tif, pra_raw_gpkg, er_raw_gpkg))))
# input polygon (from prior step)
pa_gpkg <- glue("{dir_v}/ply_planareas_2025{v_sfx}.gpkg")
# flags, typical
# do_zones <- F # no, except if new spatial
# do_metrics <- T # YES
# do_zone_taxon <- T # slow in past
# do_postgis <- T # YES
# do_downloads <- T # YES
# do_summary_data <- T # ecoregion x PRA intersections, label points, score tables
# do_summary_figs <- T # maps, flower plots, primprod charts, Word/Excel tables
# do_readme <- T # programmatic README.md + README.html
# do_map <- F # no, but useful for visual checks during development
# do_parquet <- F # no, but produces smaller faster duckdb
# do_deploy <- T # sync derived data + app code to server
# flags, fix: show all species categories in score tables + add metric labels
do_zones <- F # no change
do_metrics <- F # metrics already calculated correctly in DuckDB
do_zone_taxon <- F # not affected
do_postgis <- F # PostGIS tables already have v3 columns
do_downloads <- F # pra_gpkg already has correct columns
do_summary_data <- T # YES — score tables need regeneration
do_summary_figs <- T # YES — DOCX/XLSX/flower plots need regeneration
do_readme <- F # not affected
do_map <- F # not affected
do_parquet <- F # not affected
do_deploy <- F # can deploy separately
# spatial outputs (year reflects input data vintage, version suffix model version of derived metric columns)
pra_gpkg <- glue("{dir_v}/ply_programareas_2026{v_sfx}.gpkg")
sr_gpkg <- glue("{dir_v}/ply_subregions_2026{v_sfx}.gpkg")
# computed outputs (version suffix only — no obvious input year)
sr_pra_csv <- glue("{dir_v}/subregion_programareas.csv")
metrics_tif <- glue("{dir_v}/r_metrics{v_sfx}.tif")
metrics_pra_tif <- glue("{dir_v}/r_metrics_programareas{v_sfx}.tif")
zone_taxon_csv <- glue("{dir_v}/zone_taxon{v_sfx}.csv")
lyrs_csv <- glue("{dir_v}/layers{v_sfx}.csv")
# file_exists(c(sr_pra_csv, metrics_tif, metrics_pra_tif, zone_taxon_csv, lyrs_csv))
# summary outputs (merged from msens-summary_programareas.qmd)
er_pra_gpkg <- glue("{dir_v}/ply_ecoregion_programarea{v_sfx}.gpkg")
er_pra_csv <- glue("{dir_v}/ply_ecoregion_programarea{v_sfx}.csv")
er_pra_n_csv <- glue("{dir_v}/tbl_er_pra_scores{v_sfx}.csv")
pra_n_csv <- glue("{dir_v}/tbl_pra_scores{v_sfx}.csv")
lbls_csv <- glue("{dir_v}/ply_label_placement_pra.csv")
dir_figs <- here(glue("figs/msens-summary_programareas{v_sfx}"))
# redo flags for summary sections
redo_summary_data <- T
redo_summary_figs <- T
# parquet (big files)
dir_pq <- glue("{dir_big_v}/sdm_parquet")
# helper functions ----
get_rast <- function(m_key) {
d <- dbGetQuery(
con_sdm,
glue(
"
SELECT
cm.cell_id,
cm.value
FROM cell_metric cm
WHERE cm.metric_seq = (
SELECT metric_seq
FROM metric
WHERE metric_key = '{m_key}' )"
)
)
stopifnot(sum(duplicated(d$cell_id)) == 0)
r <- init(r_cell[[1]], NA)
r[d$cell_id] <- d$value
r
}
plot_flower <- function(
data,
fld_category,
fld_height,
fld_width,
colors,
tooltip_expr = NULL,
score = NULL,
title = NULL,
interactive = T
) {
stopifnot(is.numeric(data |> pull({{ fld_height }})))
stopifnot(is.numeric(data |> pull({{ fld_width }})))
if (is.null(score)) {
score <- data |>
mutate(
"{{fld_height}}" := as.double({{ fld_height }}),
"{{fld_width}}" := as.double({{ fld_width }})
) |>
summarize(
score = weighted.mean({{ fld_height }}, {{ fld_width }}, na.rm = T)
) |>
pull(score)
}
d <- data |>
arrange({{ fld_category }}) |>
mutate(across(!where(is.character), as.double)) |>
mutate(
ymax = cumsum({{ fld_width }}),
ymin = lag(ymax, default = 0),
xmax = {{ fld_height }},
xmin = 0
)
sym_category <- ensym(fld_category)
sym_height <- ensym(fld_height)
sym_width <- ensym(fld_width)
if (!is.null(tooltip_expr)) {
d <- d |>
mutate(tooltip = glue(tooltip_expr))
} else {
d <- d |>
mutate(tooltip = glue("{!!fld_category}"))
}
g <- ggplot(d) +
geom_rect_interactive(
aes(
xmin = xmin,
xmax = xmax,
ymin = ymin,
ymax = ymax,
fill = {{ fld_category }},
color = "white",
data_id = {{ fld_category }},
tooltip = tooltip
),
color = "white",
alpha = 0.5
) +
coord_polar(theta = "y") +
xlim(c(-10, max(data |> pull({{ fld_height }})))) +
annotate(
"text",
x = -10,
y = 0,
label = round(score),
size = 8,
fontface = "bold"
) +
scale_fill_manual(values = colors) +
theme_minimal() +
theme(
legend.position = "bottom",
plot.margin = unit(c(20, 20, 20, 20), "pt")
)
if (!is.null(title)) {
g <- g + ggtitle(title)
}
if (interactive) {
g <- girafe(
ggobj = g,
options = list(
opts_sizing(rescale = TRUE, width = 1),
opts_tooltip(
css = "background-color:white;color:black;padding:5px;border-radius:3px;"
)
)
)
}
g
}
# conditional summary setup ----
if (do_summary_figs) {
mapbox_tkn_txt <- glue("{dir_private}/mapbox_token_bdbest.txt")
Sys.setenv(MAPBOX_PUBLIC_TOKEN = readLines(mapbox_tkn_txt))
librarian::shelf(mapgl, quiet = T)
dir_create(dir_figs)
}
con_sdm <- dbConnect(duckdb(), dbdir = sdm_db, read_only = F)
# dbDisconnect(con_sdm, shutdown = T); duckdb_shutdown(duckdb()); rm(con_sdm)
source(here("libs/db.R")) # generates Postgres database connection `con`; fails if ssh-msens not connected
# dbListTables(con_sdm) # dbListTables(con)
sp_cats <- tbl(con_sdm, "taxon") |>
filter(is_ok) |>
distinct(sp_cat) |>
arrange(sp_cat) |>
pull(sp_cat)
# for visual checks
r_cell <- rast(cell_tif)
ext(r_cell) <- round(ext(r_cell), 3) # TODO: culprit with warlus wonk?
```
Flags for evaluating R chunks:
- `do_zones`: `r do_zones`
- `do_metrics`: `r do_metrics`
- `do_zone_taxon`: `r do_zone_taxon`
- `do_postgis`: `r do_postgis`
- `do_downloads`: `r do_downloads`
- `do_summary_data`: `r do_summary_data`
- `do_summary_figs`: `r do_summary_figs`
- `do_readme`: `r do_readme`
- `do_map`: `r do_map`
- `do_parquet`: `r do_parquet`
- `do_deploy`: `r do_deploy`
## Add Zones
### Add `cell` table to DuckDB if missing
```{r}
#| label: re-add_con_sdm
#| eval: !expr do_zones
if (!"cell" %in% dbListTables(con_sdm)) {
if (!exists("r_cell")) {
r_cell <- rast(cell_tif)
}
d_cell <- values(r_cell, dataframe = T, na.rm = T) |>
tibble() |>
mutate(
cell_id = as.integer(cell_id)
)
# dbRemoveTable(con_sdm, "cell")
dbWriteTable(con_sdm, "cell", d_cell, append = F)
dbExecute(con_sdm, "ALTER TABLE cell ADD PRIMARY KEY (cell_id);")
}
```
### Add Program Areas to SDM zones
Add BOEM Program Areas (11th National Draft Proposed Program, 2025-11) as a new zone type.
Program Areas are derived from Planning Areas with additional GOA subdivisions.
```{r}
#| label: add_programareas
#| eval: !expr do_zones
zone_tbls <- tbl(con_sdm, "zone") |>
distinct(tbl) |>
pull(tbl)
if (tbl_pra %in% zone_tbls) {
source(here("libs/sdm_functions.R"))
# TODO: move these into msens R
# - set_geom_ctr_area()
# - register_zones()
# - insert_zone_cell_data()
# load planning areas for field join
pa <- read_sf(pa_gpkg) |>
st_drop_geometry() |>
select(planarea_name, region_key, planarea_key)
# load and process program areas with field renaming
pra <- read_sf(pra_raw_gpkg) |>
rename(
region_name = BOEM_OCS_REGION,
planarea_name = PLANNING_AREA,
programarea_name = Label,
programarea_id = OBJECTID
) |>
# left join to get region_key and planarea_key from planning areas
left_join(pa, by = "planarea_name") |>
# programarea_key defaults to planarea_key
mutate(
programarea_key = planarea_key
) |>
# handle NA values for GOA Program Areas (not in planning areas)
mutate(
programarea_key = case_when(
programarea_name == "GOA Program Area A" ~ "GAA",
programarea_name == "GOA Program Area B" ~ "GAB",
TRUE ~ programarea_key
),
planarea_key = case_when(
programarea_name == "GOA Program Area A" ~ "WGA,CGA,EGA",
programarea_name == "GOA Program Area B" ~ "CGA,EGA",
TRUE ~ planarea_key
),
region_key = case_when(
str_detect(programarea_name, "GOA") ~ "GA",
TRUE ~ region_key
)
) |>
select(
programarea_id,
region_key,
region_name,
planarea_key,
planarea_name,
programarea_key,
programarea_name
) |>
st_transform(st_crs(4326))
# add centroid and area
pra <- pra |> set_geom_ctr_area()
# write derived gpkg
write_sf(pra, pra_gpkg)
message(glue("Wrote {nrow(pra)} program areas to {pra_gpkg}"))
# Wrote 20 program areas to ~/My Drive/projects/msens/data/derived/ply_programareas_2026.gpkg
# shift lon [-180, 180] to [0, 360]
pra_sl <- st_shift_longitude(pra)
# load cell raster template
r_cell <- rast(cell_tif, lyrs = "cell_id")
# get percent each feature covers each cell
r_pra <- rasterize(
pra_sl,
r_cell,
fun = "max",
touches = T,
cover = T,
by = "programarea_key"
)
# convert to data frame
d_pra <- terra::as.data.frame(
r_pra,
cells = T,
na.rm = F,
wide = F
) |>
filter(!is.na(values)) |>
tibble()
message(glue("Program areas cover {length(unique(d_pra$cell))} cells"))
# register zones and insert zone data into con_sdm
register_zones(pra, tbl_pra, "programarea_key")
insert_zone_cell_data(d_pra, tbl_pra, "programarea_key")
# verify registration
tbl(con_sdm, "zone") |>
group_by(tbl) |>
summarise(n_zones = n()) |>
collect() |>
print()
}
```
### Create ply_programareas_2026 in PostGIS database
For viewing in mapgl and mapsp apps.
```{r}
#| label: create_pg_ply_programareas_2026
#| eval: !expr do_zones
# devtools::install_local("../msens")
librarian::shelf(
here,
leaflet,
leaflet.extras, # mapview,
marinesensitivity / msens,
sf,
tibble,
quiet = T
)
source(here("libs/db.R")) # define: con
pra |>
st_write(
dsn = con,
layer = tbl_pra,
driver = "PostgreSQL",
layer_options = "OVERWRITE=true"
)
# TODO: need to convert to 1st column to integer for mapgl viewing?
# enforce SRID so shows up in tile.marinesensitivity.org
dbExecute(
con,
glue(
"SELECT UpdateGeometrySRID('{tbl_pra}','geom',4326);"
)
)
# fix any invalid geometries
dbExecute(
con,
glue(
"UPDATE {tbl_pra} SET geom = ST_MakeValid(geom) WHERE NOT ST_IsValid(geom)"
)
) # 6 rows fixed
# add spatial index for faster queries
dbExecute(
con,
glue(
"CREATE INDEX IF NOT EXISTS {tbl_pra}_geom_idx ON {tbl_pra} USING gist(geom);"
)
)
```
### Create Subregion-ProgramArea Lookup
Create lookup table for which program areas belong to which subregions.
Used in dropdown of mapgl app.
```{r}
#| label: subregion_programareas_lookup
#| eval: !expr do_zones
# calculate intersections between subregions and program areas using zone_cell
d_sr_pra <- tbl(con_sdm, "zone_cell") |>
inner_join(
tbl(con_sdm, "zone") |>
filter(tbl == !!tbl_sr),
by = "zone_seq"
) |>
rename(subregion_key = value) |>
select(cell_id, subregion_key) |>
inner_join(
tbl(con_sdm, "zone_cell") |>
inner_join(
tbl(con_sdm, "zone") |>
filter(tbl == !!tbl_pra),
by = "zone_seq"
) |>
select(cell_id, programarea_key = value),
by = "cell_id"
) |>
distinct(subregion_key, programarea_key) |>
collect()
get_sr_bbox <- function(sr_key) {
pra_sr <- d_sr_pra |>
filter(subregion_key == !!sr_key) |>
pull(programarea_key)
r <- init(r_cell[[1]], NA)
r_sr <- r_metrics[["programarea_key"]] %in% pra_sr
trim(r_sr) |> st_bbox() |> as.numeric()
}
# TODO: pre-calc bboxes for all subregions
write_csv(d_sr_pra, sr_pra_csv)
message(glue(
"Wrote {nrow(d_sr_pra)} subregion-programarea mappings to {sr_pra_csv}"
))
# Wrote 60 subregion-programarea mappings to ~/My Drive/projects/msens/data/derived/subregion_programareas.csv
```
### Add SubRegions
Original:
- `USA`: USA mainland, Alaska, Hawaii and territories
- `AK`: Alaska
- `L48`: USA mainland
- `AKL48`: USA mainland & Alaska
Conceived for later:
- `HI`: Hawaii
- `L48Pac`: Pacific USA mainland
- `L48Atl`: Atlantic USA mainland
- `L48Goa`: Gulf of America USA mainland
- `PAC`: Pacific USA (Hawaii, Pacific mainland and USA Territories)
version for 2026:
- `USA`: USA mainland and Alaska
- `AK`: Alaska
- `L48Pac`: Pacific USA mainland
- `L48Atl`: Atlantic USA mainland
- `L48Goa`: Gulf of America USA mainland
v3:
- `USA`: All USA (Alaska, Gulf of America and Pacific)
- `AK`: Alaska
- `GA`: Gulf of America
- `PA`: Pacific
```{r}
#| label: add_subregions_2026
#| eval: !expr do_zones
zones <- tbl(con_sdm, "zone") |>
distinct(tbl) |>
pull(tbl)
# delete existing tbl_sr
if (tbl_sr %in% zones) {
dbExecute(con_sdm, glue("DELETE FROM zone WHERE tbl = '{tbl_sr}'"))
# dbExecute(con_sdm, "DELETE FROM zone_cell WHERE zone_seq NOT IN (SELECT zone_seq FROM zone)")
}
d_sr <- tribble(
~subregion_key , ~subregion_name ,
"AK" , "Alaska" ,
"GA" , "Gulf of America" ,
"PA" , "Pacific" ,
"USA" , "All USA"
) # note: subregion_name not used (yet)
register_zones(d_sr, tbl_sr, "subregion_key")
# temporarily push tmp_pra into duckdb for later joining
tmp_pra <- pra |>
st_drop_geometry() |>
select(programarea_key, region_key)
# temporarily push tmp_pra into duckdb for later joining
copy_to(con_sdm, tmp_pra, "tmp_pra", temporary = T, overwrite = T)
df_z_sr_notusa <- tbl(con_sdm, "zone") |>
filter(
tbl == !!tbl_sr,
value != "USA"
) |>
select(region_key = value, zone_seq_sr = zone_seq) |>
left_join(
tbl(con_sdm, "tmp_pra"),
by = "region_key"
) |>
left_join(
tbl(con_sdm, "zone") |>
filter(tbl == !!tbl_pra) |>
select(
programarea_key = value,
zone_seq
),
by = "programarea_key"
) |>
left_join(
tbl(con_sdm, "zone_cell"),
by = "zone_seq"
) |>
group_by(zone_seq_sr, cell_id) |>
summarize(
pct_covered = sum(pct_covered, na.rm = T),
.groups = "drop"
) |>
mutate(
pct_covered = pmin(pct_covered, 100, na.rm = T)
) |>
select(
zone_seq = zone_seq_sr,
cell_id,
pct_covered
) |>
collect() # 349,139 × 3
# summary(df_z_sr_notusa)
# duplicated(df_z_sr_notusa$cell_id) |> any() # FALSE
# TODO: check why no overlapping cells across subregions?
# zone_seqs_sr <- tbl(con_sdm, "zone") |>
# filter(
# tbl == !!tbl_sr) |>
# pull(zone_seq)
# dbExecute(con_sdm, glue("
# DELETE FROM zone_cell
# WHERE zone_seq IN ({paste(zone_seqs_sr, collapse = ',')})"))
# append USA, ie all cells in other subregions
zone_seq_USA <- tbl(con_sdm, "zone") |>
filter(
tbl == !!tbl_sr,
value == "USA"
) |>
pull(zone_seq) # 217
df_z_sr_usa <- df_z_sr_notusa |>
group_by(cell_id) |>
summarize(
pct_covered = sum(pct_covered, na.rm = T),
.groups = "drop"
) |>
mutate(
zone_seq = !!zone_seq_USA,
pct_covered = pmin(pct_covered, 100, na.rm = T)
) # 349,139 × 3
# summary(df_z_sr_usa)
df_z_sr <- bind_rows(
df_z_sr_notusa,
df_z_sr_usa
)
# TODO: ....
copy_to(
dest = con_sdm,
df = df_z_sr,
name = "zone_cell",
append = T
)
tbl(con_sdm, "zone") |>
filter(tbl == !!tbl_sr) |>
select(zone_seq, subregion_key = value) |>
left_join(
tbl(con_sdm, "zone_cell"),
by = "zone_seq"
) |>
group_by(subregion_key) |>
summarize(n_cell = n()) |>
arrange(subregion_key)
# subregion_key n_cell
# <chr> <dbl>
# 1 AK 307,445
# 2 GA 17,274
# 3 PA 24,420
# 4 USA 349,139
```
```{r}
#| label: old_add_subregions_2025
#| eval: false
zone_tbls <- tbl(con_sdm, "zone") |>
distinct(tbl) |>
pull(tbl)
if (!"ply_subregions_2025" %in% zone_tbls) {
source(here("libs/sdm_functions.R"))
# - set_geom_ctr_area()
# - register_zones()
# - insert_zone_cell_data()
# spatial sources
er <- read_sf(er_raw_gpkg)
r_cell <- rast(cell_tif, lyrs = "cell_id")
# USA: USA mainland, Alaska, Hawaii and territories
sr_usa <- er |>
st_union() |>
st_as_sf() |>
mutate(
subregion_key = "USA",
subregion_name = "USA mainland, Alaska, Hawaii and territories"
)
# AK: Alaska
sr_ak <- er |>
filter(region_key == "AK") |>
st_union() |>
st_as_sf() |>
mutate(
subregion_key = "AK",
subregion_name = "Alaska"
)
# L48: USA mainland
sr_l48 <- er |>
filter(
ecoregion_key %in%
c(
"CAC",
"WAOR",
"SECS",
"WCGOA",
"EGOA",
"NECS"
)
) |>
st_union() |>
st_as_sf() |>
mutate(
subregion_key = "L48",
subregion_name = "USA mainland"
)
# AKL48: USA mainland & Alaska
sr_akl48 <- bind_rows(sr_ak, sr_l48) |>
st_union() |>
st_as_sf() |>
mutate(
subregion_key = "AKL48",
subregion_name = "USA mainland & Alaska"
)
# combine
sr <- bind_rows(
sr_ak,
sr_l48,
sr_akl48,
sr_usa
) |>
set_geom_ctr_area()
# write gpkg
write_sf(sr, sr_gpkg)
# shift lon [-180, 180] to [0, 360]
sr_sl <- st_shift_longitude(sr)
# get percent each feature covers each cell
r_sr <- rasterize(
sr_sl,
r_cell,
fun = "max",
touches = T,
cover = T,
by = "subregion_key"
)
# convert to data frame
d_sr <- terra::as.data.frame(
r_sr,
cells = T,
na.rm = F,
wide = F
) |>
filter(
!is.na(values)
) |> # 839,534 × 3
tibble()
# d_sr |>
# group_by(layer) |>
# summarise(n_cells = n()) |>
# collect() |>
# arrange(desc(n_cells))
#
# layer n_cells
# <chr> <int>
# 1 USA 630,806
# 2 AKL48 419,767
# 3 AK 315,369
# 4 L48 104,398
# register zones and insert zone data into con_sdm
register_zones(sr, "ply_subregions_2025", "subregion_key")
insert_zone_cell_data(d_sr, "ply_subregions_2025", "subregion_key")
# tbl(con_sdm, "zone") |>
# group_by(tbl) |>
# summarise(n_zones = n()) |>
# collect()
#
# tbl n_zones
# <chr> <dbl>
# 1 ply_ecoregions_2025 12
# 2 ply_planareas_2025 36
# 3 ply_subregions_2025 4
# TODO: load ply_subregions_2025 into PostGIS
}
```
## Calculate Metrics
### Calculate Extinction Risk Metrics by Taxonomic Component
```{r}
#| label: delete_extrisk_metrics
#| eval: !expr do_metrics
# Start fresh with all extrisk_* metrics
existing_seqs <- tbl(con_sdm, "metric") |>
filter(
str_starts(metric_key, "extrisk_")
) |>
pull(metric_seq)
if (length(existing_seqs) > 0) {
metric_sql <- paste(existing_seqs, collapse = ", ")
dbExecute(
con_sdm,
glue("DELETE FROM cell_metric WHERE metric_seq IN ({metric_sql})")
)
dbExecute(
con_sdm,
glue("DELETE FROM zone_metric WHERE metric_seq IN ({metric_sql})")
)
dbExecute(
con_sdm,
glue("DELETE FROM metric WHERE metric_seq IN ({metric_sql})")
)
}
```
```{r}
#| label: calc_cell_metric_redlist
#| eval: !expr do_metrics
d_taxon_ok <- tbl(con_sdm, "taxon") |>
filter(is_ok) |>
collect()
for (sp_cat in sp_cats) {
# sp_cat = sp_cats[1]
message(glue("Calculating extinction risk metrics for {sp_cat}..."))
metric_key <- glue("extrisk_{str_replace(sp_cat, ' ', '_')}")
description <- glue("Extinction risk for {sp_cat}")
# TODO: rename metric_key -> metric_key, description
# Get metric_seq first, then delete/insert
existing_seq <- dbGetQuery(
con_sdm,
glue(
"SELECT metric_seq FROM metric WHERE metric_key = '{metric_key}'"
)
)
if (nrow(existing_seq) > 0) {
metric_seq <- existing_seq$metric_seq
dbExecute(
con_sdm,
glue("DELETE FROM cell_metric WHERE metric_seq = {metric_seq}")
)
dbExecute(
con_sdm,
glue("DELETE FROM zone_metric WHERE metric_seq = {metric_seq}")
)
dbExecute(
con_sdm,
glue("DELETE FROM metric WHERE metric_seq = {metric_seq}")
)
}
dbExecute(
con_sdm,
glue(
"
INSERT INTO metric (metric_key, description)
VALUES ('{metric_key}', '{description}')"
)
)
metric_seq <- dbGetQuery(
con_sdm,
glue(
"SELECT max(metric_seq) AS metric_seq FROM metric WHERE metric_key = '{metric_key}'"
)
) |>
pull(metric_seq)
# use er_score from taxon (pre-computed in ingest_nmfs-fws-mmpa-mbta-listings.qmd
# and merged in merge_models.qmd)
d_er <- tbl(con_sdm, "taxon") |>
filter(
is_ok,
if (!!sp_cat == "all") T else sp_cat == !!sp_cat
) |>
select(mdl_seq, er_score) |>
collect()
duckdb_register(con_sdm, "tmp_er", d_er |> select(mdl_seq, er_score))
dbExecute(
con_sdm,
glue(
"
INSERT INTO cell_metric (cell_id, metric_seq, value)
SELECT
mc.cell_id,
{metric_seq} AS metric_seq,
ROUND(SUM(er.er_score * mc.value) / 100.0, 2) AS value
FROM model_cell mc
JOIN tmp_er er ON mc.mdl_seq = er.mdl_seq
GROUP BY mc.cell_id"
)
) # 6,662,075
duckdb_unregister(con_sdm, "tmp_er")
# visual check ----
# d <- tbl(con_sdm, "cell_metric") |>
# filter(metric_seq == metric_seq) |>
# select(cell_id, value) |>
# collect()
# r <- init(r_cell[[1]], NA)
# r[d$cell_id] <- d$value
# r_r <- rotate(r)
# plet(r_r)
}
tbl(con_sdm, "metric") |>
filter(str_starts(metric_key, "extrisk_")) |>
arrange(metric_seq) |>
select(-date_created) |>
collect() |>
print(n = Inf)
```
### Calculate ExtinctionRisk min/max per SpeciesCategory and Ecoregion
```{r}
#| label: calc_ecoregion_minmax
#| eval: !expr do_metrics
for (sp_cat in sp_cats) {
# sp_cat = sp_cats[1]
for (fxn in c("min", "max")) {
# fxn = "min"
m_key <- glue("extrisk_{sp_cat}_ecoregion_{fxn}")
m_description <- glue("Extinction risk for {sp_cat} Ecoregional {fxn}")
message(glue("m_key: {m_key}"))
# Get metric_seq first, then delete/insert
existing_seq <- dbGetQuery(
con_sdm,
glue(
"SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
)
) |>
pull(metric_seq)
if (length(existing_seq) > 0) {
dbExecute(
con_sdm,
glue("DELETE FROM metric WHERE metric_seq = {existing_seq}")
)
dbExecute(
con_sdm,
glue("DELETE FROM cell_metric WHERE metric_seq = {existing_seq}")
)
dbExecute(
con_sdm,
glue("DELETE FROM zone_metric WHERE metric_seq = {existing_seq}")
)
}
dbExecute(
con_sdm,
glue(
"INSERT INTO metric (metric_key, description)
VALUES ('{m_key}', '{m_description}')"
)
)
# get metric ID
m_seq <- dbGetQuery(
con_sdm,
glue(
"SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
)
) |>
pull(metric_seq)
# calculate min/max extinction risk per ecoregion
dbExecute(
con_sdm,
glue(
"INSERT INTO zone_metric (zone_seq, metric_seq, value)
SELECT
z.zone_seq,
-- z.tbl, z.fld, z.value AS tbl_fld_val,
{m_seq} as metric_seq,
{fxn}(cm.value) as value
FROM zone z
LEFT JOIN zone_cell zc ON z.zone_seq = zc.zone_seq
LEFT JOIN cell_metric cm ON zc.cell_id = cm.cell_id
WHERE
z.fld = 'ecoregion_key'
AND cm.metric_seq IN (
SELECT metric_seq FROM metric WHERE metric_key = 'extrisk_{sp_cat}')
-- GROUP BY z.zone_seq, z.tbl, z.fld, z.value
GROUP BY z.zone_seq "
)
)
}
}
tbl(con_sdm, "metric") |>
filter(str_detect(metric_key, "^extrisk_.*_ecoregion_(min|max)$")) |>
arrange(metric_seq) |>
select(-date_created) |>
collect() |>
print(n = Inf)
```
### Calculate rescaled ExtinctionRisk per cell based on EcoregionMinMax per SpeciesCategory
```{r}
#| label: extrisk_spcat_ecoregion_rescaled_cell
#| eval: !expr do_metrics
for (sp_cat in sp_cats) {
# sp_cat = sp_cats[1]
m_key <- glue("extrisk_{sp_cat}_ecoregion_rescaled")
m_description <- glue(
"Extinction risk for {sp_cat}, rescaled to [0,100] based on Ecoregional min/max values"
)
# Get metric_seq first, then delete/insert
existing_seq <- dbGetQuery(
con_sdm,
glue(
"SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
)
)
if (nrow(existing_seq) > 0) {
m_seq <- existing_seq$metric_seq
dbExecute(
con_sdm,
glue("DELETE FROM metric WHERE metric_seq = {m_seq}")
)
dbExecute(
con_sdm,
glue("DELETE FROM cell_metric WHERE metric_seq = {m_seq}")
)
dbExecute(
con_sdm,
glue("DELETE FROM zone_metric WHERE metric_seq = {m_seq}")
)
}
# insert new metric
dbExecute(
con_sdm,
glue(
"
INSERT INTO metric (metric_key, description)
VALUES ('{m_key}', '{m_description}')"
)
)
m_seq <- dbGetQuery(
con_sdm,
glue(
"
SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
)
) |>
pull(metric_seq)
dbExecute(
con_sdm,
glue(
"
WITH
-- get ecoregion_minmax from zone_metric
ecoregion_minmax AS (
SELECT
z.zone_seq,
MIN(zm.value) as min_value,
MAX(zm.value) as max_value
FROM zone z
JOIN zone_metric zm USING (zone_seq)
WHERE
z.fld = 'ecoregion_key' AND
zm.metric_seq IN (
SELECT metric_seq FROM metric WHERE metric_key IN (
'extrisk_{sp_cat}_ecoregion_min',
'extrisk_{sp_cat}_ecoregion_max') )
GROUP BY z.zone_seq
),
-- get cell_ecoregion from zone_cell with pct_covered
cell_ecoregion_raw AS (
SELECT
cell_id,
z.zone_seq,
zone_cell.pct_covered
FROM zone_cell
JOIN zone z ON zone_cell.zone_seq = z.zone_seq
WHERE z.fld = 'ecoregion_key'
),
-- calculate total coverage per cell for normalization
cell_coverage_total AS (
SELECT
cell_id,
SUM(pct_covered) as total_coverage
FROM cell_ecoregion_raw
GROUP BY cell_id
),
-- normalize coverage percentages
-- for cells with only one ecoregion, use 100% regardless of actual coverage
-- for cells with multiple ecoregions, rescale to ensure percentages sum to 100
cell_ecoregion AS (
SELECT
ce.cell_id,
ce.zone_seq,
CASE
WHEN COUNT(*) OVER (PARTITION BY ce.cell_id) = 1 THEN 100
ELSE ce.pct_covered * 100.0 / ct.total_coverage
END as normalized_pct
FROM cell_ecoregion_raw ce
JOIN cell_coverage_total ct ON ce.cell_id = ct.cell_id
)
-- insert rescaled extinction risk values
INSERT INTO cell_metric (cell_id, metric_seq, value)
SELECT
cm.cell_id,
{m_seq} as metric_seq,
SUM(
(cm.value - er.min_value) /
(er.max_value - er.min_value) *
(ce.normalized_pct / 100.0) ) * 100 AS value -- scale to 0-100 range
FROM cell_metric cm
JOIN cell_ecoregion ce ON cm.cell_id = ce.cell_id
JOIN ecoregion_minmax er ON ce.zone_seq = er.zone_seq
WHERE cm.metric_seq IN (
SELECT metric_seq FROM metric WHERE metric_key = 'extrisk_{sp_cat}')
AND cm.value IS NOT NULL
AND er.min_value IS NOT NULL
AND er.max_value IS NOT NULL
AND er.min_value < er.max_value -- avoid division by zero
GROUP BY cm.cell_id
"
)
)
}
tbl(con_sdm, "metric") |>
filter(str_detect(metric_key, "^extrisk_.*_ecoregion_rescaled$")) |>
arrange(metric_seq) |>
select(-date_created) |>
collect() |>
print(n = Inf)
```
### Transfer raw PrimProd into cell_metric
```{r}
#| label: primprod_cell_metric
#| eval: !expr do_metrics
# Create metric for primary productivity
m_key <- "primprod"
m_description <- "Primary productivity: Oregon State Vertically Generalized Production Model (VGPM) from Visible Infrared Imaging Radiometer Suite (VIIRS) satellite data (mg C / m^2 / day) from daily averages available as monthly averaged to annual and averaged to overall for the most recently available full years of data 2014 to 2023"
# TODO: consider log rescaling PrimaryProductivity
# workflow: ingest_productivity.qmd
dir_vgpm <- glue("{dir_data}/raw/oregonstate.edu")
vgpm_tif <- glue(
"{dir_vgpm}/vgpm.r2022.v.chl.v.sst.2160x4320_2014-2023.avg.sd.tif"
)
r_cell <- rast(cell_tif) |> subset("cell_id")
ext(r_cell) <- round(ext(r_cell), 3)
# r_cell_r <- rotate(r_cell)
# ext(r_cell_r) <- round(ext(r_cell_r), 3)
r_vgpm <- rast(vgpm_tif) |> subset("npp_avg")
ext(r_vgpm) <- round(ext(r_vgpm), 3)
# r_vgpm_rc <- rotate(r_vgpm) |>
# crop(r_cell) |>
# mask(r_cell)
r_v_c <- resample(
r_vgpm,
r_cell,
method = "bilinear"
) |>
mask(r_cell) |>
crop(r_cell)
# plot(r_v_c)
r_cv <- c(r_cell, r_v_c)
# Check if metric exists and delete if it does
existing_seq <- dbGetQuery(
con_sdm,
glue(
"SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
)
)
if (nrow(existing_seq) > 0) {
m_seq <- existing_seq$metric_seq
dbExecute(con_sdm, glue("DELETE FROM metric WHERE metric_seq = {m_seq}"))
dbExecute(con_sdm, glue("DELETE FROM cell_metric WHERE metric_seq = {m_seq}"))
dbExecute(con_sdm, glue("DELETE FROM zone_metric WHERE metric_seq = {m_seq}"))
}
# Create new metric
dbExecute(
con_sdm,
glue(
"
INSERT INTO metric (metric_key, description)
VALUES ('{m_key}', '{m_description}')"
)
)
m_seq <- dbGetQuery(
con_sdm,
glue(
"
SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
)
) |>
pull(metric_seq)
d_cv <- values(r_cv, dataframe = T, na.rm = T) |>
tibble() |>
rename(
value = npp_avg
) |>
mutate(
cell_id = as.integer(cell_id),
metric_seq = !!m_seq
)
dbWriteTable(con_sdm, "cell_metric", d_cv, append = T)
tbl(con_sdm, "metric") |>
filter(str_detect(metric_key, "^primprod")) |>
arrange(metric_seq) |>
select(-date_created) |>
collect() |>
print(n = Inf)
# metric_seq metric_key description
# <int> <chr> <chr>
# 1 160 primprod Primary productivity: Oregon State Vertically Generali…
# visual check ----
# d <- tbl(con_sdm, "cell_metric") |>
# filter(metric_seq == m_seq) |>
# select(cell_id, value) |>
# collect()
# r <- init(r_cell[[1]], NA)
# r[d$cell_id] <- d$value
# r_r <- rotate(r)
# plet(r_r)
```
### Calculate PrimProd min/max by Ecoregion
```{r}
#| label: primprod_ecoregion_minmax
#| eval: !expr do_metrics
pp_metric <- "primprod"
pp_seq <- dbGetQuery(
con_sdm,
glue(
"SELECT metric_seq FROM metric WHERE metric_key = '{pp_metric}'"
)
) |>
pull(metric_seq)
for (fxn in c("min", "max")) {
# fxn = "min"
# get existing primprod ecoregion min/max metric_seq
m_key <- glue("primprod_ecoregion_{fxn}")
m_description <- glue("Primary productivity Ecoregional {fxn}")
# Check if metric exists and delete if it does
existing_seq <- dbGetQuery(
con_sdm,
glue(
"SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
)
)
if (nrow(existing_seq) > 0) {
m_seq <- existing_seq$metric_seq
dbExecute(
con_sdm,
glue("DELETE FROM metric WHERE metric_seq = {m_seq}")
)
dbExecute(
con_sdm,
glue("DELETE FROM cell_metric WHERE metric_seq = {m_seq}")
)
dbExecute(
con_sdm,
glue("DELETE FROM zone_metric WHERE metric_seq = {m_seq}")
)
}
# insert new metric
dbExecute(
con_sdm,
glue(
"INSERT INTO metric (metric_key, description) VALUES ('{m_key}', '{m_description}')"
)
)
# get metric_seq
m_seq <- dbGetQuery(
con_sdm,
glue(
"SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
)
) |>
pull(metric_seq)
# Calculate min/max primary productivity per ecoregion
dbExecute(
con_sdm,
glue(
"INSERT INTO zone_metric (zone_seq, metric_seq, value)
SELECT
z.zone_seq,
-- z.tbl, z.fld, z.value AS tbl_fld_val,
{m_seq} as metric_seq,
{fxn}(cm.value) as value
FROM zone z
LEFT JOIN zone_cell zc ON z.zone_seq = zc.zone_seq
LEFT JOIN cell_metric cm ON zc.cell_id = cm.cell_id
WHERE
z.fld = 'ecoregion_key' AND
cm.metric_seq = {pp_seq}
GROUP BY z.zone_seq"
)
)
}
tbl(con_sdm, "metric") |>
filter(
metric_key == "primprod_ecoregion_min"
) |>
left_join(
tbl(con_sdm, "zone_metric"),
by = "metric_seq"
) |>
pull(value) |>
summary()
# OLD:
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 65.61 130.50 237.31 247.02 358.66 435.62
# NEW:
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 35.15 135.08 267.47 257.26 366.07 454.43
tbl(con_sdm, "metric") |>
filter(
metric_key == "primprod_ecoregion_max"
) |>
left_join(
tbl(con_sdm, "zone_metric"),
by = "metric_seq"
) |>
pull(value) |>
summary()
```
### Calculate PrimProd avg/sd by Planning Area
```{r}
#| label: primprod_ecoregion_avg_stddev
#| eval: !expr do_metrics
pp_metric <- "primprod"
pp_seq <- dbGetQuery(
con_sdm,
glue(
"SELECT metric_seq FROM metric WHERE metric_key = '{pp_metric}'"
)
) |>
pull(metric_seq)
for (fxn in c("avg", "stddev")) {
# fxn = "avg"
# get existing primprod avg/stddev metric_seq
m_key <- glue("primprod_{fxn}")
m_description <- glue("Primary productivity {fxn}")
# Check if metric exists and delete if it does
existing_seq <- dbGetQuery(
con_sdm,
glue(
"SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
)
) |>
pull(metric_seq)
if (length(existing_seq) > 0) {
dbExecute(
con_sdm,
glue(
"DELETE FROM metric WHERE metric_seq IN ({paste(existing_seq, collapse = ',')})"
)
)
dbExecute(
con_sdm,
glue(
"DELETE FROM cell_metric WHERE metric_seq IN ({paste(existing_seq, collapse = ',')})"
)
)
dbExecute(
con_sdm,
glue(
"DELETE FROM zone_metric WHERE metric_seq IN ({paste(existing_seq, collapse = ',')})"
)
)
}
# insert new metric
dbExecute(
con_sdm,
glue(
"INSERT INTO metric (metric_key, description) VALUES ('{m_key}', '{m_description}')"
)
)
# get metric_seq
m_seq <- dbGetQuery(
con_sdm,
glue(
"SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
)
) |>
pull(metric_seq)
stopifnot(length(m_seq) == 1)
# calculate avg/stddev primary productivity per planning area
# dbGetQuery(con_sdm, glue(
# "SELECT
dbExecute(
con_sdm,
glue(
"INSERT INTO zone_metric (zone_seq, metric_seq, value)
SELECT
z.zone_seq,
{m_seq} as metric_seq,
{fxn}(cm.value) as value
FROM zone z
LEFT JOIN zone_cell zc ON z.zone_seq = zc.zone_seq
LEFT JOIN cell_metric cm ON zc.cell_id = cm.cell_id
WHERE
z.fld = 'planarea_key' AND
cm.metric_seq = {pp_seq}
GROUP BY z.zone_seq"
)
)
}
# tbl(con_sdm, "cell_metric") |>
# filter(
# metric_seq == !!pp_seq) |>
# pull(value) |>
# summary()
tbl(con_sdm, "metric") |>
filter(
metric_key == "primprod_avg"
) |>
left_join(
tbl(con_sdm, "zone_metric"),
by = "metric_seq"
) |>
pull(value) |>
summary()
```
### Calculate rescaled PrimProd per cell based on EcoregionMinMax
```{r}
#| label: primprod_ecoregion_rescaled
#| eval: !expr do_metrics
pp_metric <- "primprod"
pp_seq <- dbGetQuery(
con_sdm,
glue(
"SELECT metric_seq FROM metric WHERE metric_key = '{pp_metric}'"
)
) |>
pull(metric_seq)
# Create new metric for rescaled primary productivity
m_key <- "primprod_ecoregion_rescaled"
m_description <- "Primary productivity rescaled to [0,100] based on Ecoregional min/max values"
# TODO: consider log rescaling PrimaryProductivity
# Check if metric exists and delete if it does
existing_seq <- dbGetQuery(
con_sdm,
glue(
"SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
)
) |>
pull(metric_seq)
if (length(existing_seq) > 0) {
dbExecute(
con_sdm,
glue("DELETE FROM metric WHERE metric_seq = {existing_seq}")
)
dbExecute(
con_sdm,
glue("DELETE FROM cell_metric WHERE metric_seq = {existing_seq}")
)
dbExecute(
con_sdm,
glue("DELETE FROM zone_metric WHERE metric_seq = {existing_seq}")
)
}
# Create new metric
dbExecute(
con_sdm,
glue(
"
INSERT INTO metric (metric_key, description)
VALUES ('{m_key}', '{m_description}')"
)
)
m_seq <- dbGetQuery(
con_sdm,
glue(
"
SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
)
) |>
pull(metric_seq)
# Rescale primary productivity by ecoregion
# dbGetQuery(con_sdm, glue("
dbExecute(
con_sdm,
glue(
"
WITH
-- get ecoregion_minmax from cell_metric for primary productivity
ecoregion_minmax AS (
SELECT
z.zone_seq,
MIN(zm.value) as min_value,
MAX(zm.value) as max_value
FROM zone z
JOIN zone_metric zm USING (zone_seq)
WHERE
z.fld = 'ecoregion_key' AND
zm.metric_seq IN (
SELECT metric_seq FROM metric WHERE metric_key IN (
'primprod_ecoregion_min',
'primprod_ecoregion_max') )
GROUP BY z.zone_seq ),
-- get cell_ecoregion from zone_cell with pct_covered
cell_ecoregion_raw AS (
SELECT
cell_id,
z.zone_seq,
zone_cell.pct_covered
FROM zone_cell
JOIN zone z ON zone_cell.zone_seq = z.zone_seq
WHERE z.fld = 'ecoregion_key'
),
-- calculate total coverage per cell for normalization
cell_coverage_total AS (
SELECT
cell_id,
SUM(pct_covered) as total_coverage
FROM cell_ecoregion_raw
GROUP BY cell_id
),
-- normalize coverage percentages
cell_ecoregion AS (
SELECT
ce.cell_id,
ce.zone_seq,
CASE
WHEN COUNT(*) OVER (PARTITION BY ce.cell_id) = 1 THEN 100
ELSE ce.pct_covered * 100.0 / ct.total_coverage
END as normalized_pct
FROM cell_ecoregion_raw ce
JOIN cell_coverage_total ct ON ce.cell_id = ct.cell_id
)
-- insert rescaled primary productivity values
INSERT INTO cell_metric (cell_id, metric_seq, value)
SELECT
cm.cell_id,
{m_seq} as metric_seq,
SUM(
-- (c.prim_prod_mean - er.min_value) /
(cm.value - er.min_value) /
(er.max_value - er.min_value) *
(ce.normalized_pct / 100.0) ) * 100 AS value -- scale to 0-100 range
-- FROM cell c
FROM cell_metric cm
JOIN cell_ecoregion ce ON cm.cell_id = ce.cell_id
JOIN ecoregion_minmax er ON ce.zone_seq = er.zone_seq
WHERE
--c.prim_prod_mean IS NOT NULL AND
cm.metric_seq = {pp_seq} AND
cm.value IS NOT NULL AND
ce.zone_seq IS NOT NULL AND
er.min_value IS NOT NULL AND
er.max_value IS NOT NULL AND
er.min_value < er.max_value -- avoid division by zero
GROUP BY cm.cell_id
"
)
)
# tbl(con_sdm, "cell_metric") |>
# filter(
# metric_seq == !!m_seq)
tbl(con_sdm, "metric") |>
filter(
metric_key == "primprod_ecoregion_rescaled"
) |>
left_join(
tbl(con_sdm, "cell_metric"),
by = "metric_seq"
) |>
pull(value) |>
summary()
```
### Calculate ProgramArea metrics contributing to score based on cell metrics
```{r}
#| label: cell_metrics_to_programarea_metrics
#| eval: !expr do_metrics
# get zone_seqs for program areas
z_seqs <- tbl(con_sdm, "zone") |>
filter(tbl == !!tbl_pra) |>
pull(zone_seq)
message(glue("Calculating metrics for {length(z_seqs)} program areas..."))
# Calculating metrics for 20 program areas...
metrics <- c(
glue("extrisk_{sp_cats}"),
glue("extrisk_{sp_cats}_ecoregion_rescaled"),
"primprod",
"primprod_ecoregion_rescaled"
)
for (m_key in metrics) {
# m_key = metrics[1]
# get existing metric_seq
m_seq <- tbl(con_sdm, "metric") |>
filter(metric_key == !!m_key) |>
pull(metric_seq)
if (length(m_seq) != 1) {
message(glue(" Skipping {m_key} - metric not found"))
next
}
# delete existing zone_metric values for this metric and program areas
dbExecute(
con_sdm,
glue(
"DELETE FROM zone_metric
WHERE metric_seq = {m_seq}
AND zone_seq IN ({paste(z_seqs, collapse = ',')})"
)
)
# stop if no cell_metric found
n_cm <- tbl(con_sdm, "cell_metric") |>
filter(metric_seq == !!m_seq) |>
tally() |>
pull(n)
if (n_cm == 0) {
message(glue(" Skipping {m_key} - no cell_metric values"))
next
}
# calculate average score per program area weighted by cell coverage
dbExecute(
con_sdm,
glue(
"
INSERT INTO zone_metric (zone_seq, metric_seq, value)
SELECT
zc.zone_seq,
{m_seq} as metric_seq,
SUM(cm.value * zc.pct_covered) / SUM(zc.pct_covered) as value
FROM zone_cell zc
JOIN cell_metric cm ON zc.cell_id = cm.cell_id
WHERE
cm.metric_seq = {m_seq}
AND zc.zone_seq IN ({paste(z_seqs, collapse = ',')})
AND cm.value IS NOT NULL
GROUP BY zc.zone_seq"
)
)
message(glue(" Calculated {m_key}"))
}
# verify
d_ck <- tbl(con_sdm, "metric") |>
select(metric_seq, metric_key) |>
filter(metric_key %in% metrics) |>
left_join(
tbl(con_sdm, "zone_metric"),
by = "metric_seq"
) |>
left_join(
tbl(con_sdm, "zone") |>
filter(tbl == !!tbl_pra) |>
select(zone_seq, pra_key = value),
by = "zone_seq"
) |>
filter(!is.na(pra_key)) |>
collect()
d_ck |>
filter(str_detect(metric_key, "_ecoregion_rescaled")) |>
pull(value) |>
summary()
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 1.195 11.186 21.902 27.317 41.150 91.748
```
### Calculate Ecoregion metrics contributing to score based on cell metrics
```{r}
#| label: cell_metrics_to_ecoregion_metrics
#| eval: !expr do_metrics
# get zone_seqs for ecoregions
z_seqs <- tbl(con_sdm, "zone") |>
filter(
tbl == !!tbl_er
) |>
pull(zone_seq)
# tbl(con_sdm, "metric") |> pull(metric_key) |> sort()
metrics <- c(
glue("extrisk_{sp_cats}"),
glue("extrisk_{sp_cats}_ecoregion_rescaled"),
"primprod",
"primprod_ecoregion_rescaled"
)
for (m_key in metrics) {
# m_key = metrics[1]
# get existing metric_seq
m_seq <- tbl(con_sdm, "metric") |>
filter(metric_key == !!m_key) |>
pull(metric_seq)
stopifnot(length(m_seq) == 1)
# delete existing zone_metric values for this metric and ecoregions
dbExecute(
con_sdm,
glue(
"DELETE FROM zone_metric
WHERE metric_seq = {m_seq}
AND zone_seq IN ({paste(z_seqs, collapse = ',')})"
)
)
# stop if no cell_metric found
n_cm <- tbl(con_sdm, "cell_metric") |>
filter(
metric_seq == !!m_seq
) |>
tally() |>
pull(n)
stopifnot(n_cm > 0)
# calculate average score per ecoregion weighted by cell coverage
dbExecute(
con_sdm,
glue(
"
INSERT INTO zone_metric (zone_seq, metric_seq, value)
SELECT
zc.zone_seq,
{m_seq} as metric_seq,
SUM(cm.value * zc.pct_covered) / SUM(zc.pct_covered) as value
FROM zone_cell zc
JOIN cell_metric cm ON zc.cell_id = cm.cell_id
WHERE
cm.metric_seq = {m_seq}
AND zc.zone_seq IN ({paste(z_seqs, collapse = ',')})
AND cm.value IS NOT NULL
GROUP BY zc.zone_seq"
)
)
}
d_ck <- tbl(con_sdm, "metric") |>
select(metric_seq, metric_key) |>
filter(
metric_key %in% metrics
) |>
left_join(
tbl(con_sdm, "zone_metric"),
by = "metric_seq"
) |>
left_join(
tbl(con_sdm, "zone") |>
filter(
tbl == !!tbl_er
) |>
select(zone_seq, pa_key = value),
by = "zone_seq"
) |>
filter(!is.na(pa_key)) |>
collect()
d_ck |>
filter(str_detect(metric_key, "_ecoregion_rescaled")) |>
pull(value) |>
summary()
# OLD:
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 0.1106 12.9840 24.9402 28.6058 43.1917 83.1341
# NEW:
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 2.365 13.926 21.852 26.010 35.288 80.208
# NEWEST:
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 2.365 13.923 22.475 27.350 38.309 80.039
```
### Calculate cell_metric score: ExtinctionRisk + PrimProd (already EcoregionRescaled), equal weight
```{r}
#| label: cell_metric_score
#| eval: !expr do_metrics
# Create combined metric for extinction risk and primary productivity
m_key <- "score_extriskspcat_primprod_ecoregionrescaled_equalweights"
m_description <- "Combined score of extinction risk per species category and primary productivity, equally weighted (and each previously rescaled [0,100] based on Ecoregional min/max values)"
# Check if metric exists and delete if it does
m_seq <- dbGetQuery(
con_sdm,
glue(
"SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
)
) |>
pull(metric_seq)
if (length(m_seq) > 0) {
dbExecute(con_sdm, glue("DELETE FROM metric WHERE metric_seq = {m_seq}"))
dbExecute(con_sdm, glue("DELETE FROM cell_metric WHERE metric_seq = {m_seq}"))
dbExecute(con_sdm, glue("DELETE FROM zone_metric WHERE metric_seq = {m_seq}"))
}
# Create new metric
dbExecute(
con_sdm,
glue(
"
INSERT INTO metric (metric_key, description)
VALUES ('{m_key}', '{m_description}')"
)
)
m_seq <- dbGetQuery(
con_sdm,
glue(
"
SELECT metric_seq FROM metric WHERE metric_key = '{m_key}'"
)
) |>
pull(metric_seq)
# Get all metric_seqs for extinction risk metrics and primary productivity
metric_keys <- c(
sapply(
sp_cats,
\(sc) glue("extrisk_{str_replace(sc, ' ', '_')}_ecoregion_rescaled"),
USE.NAMES = F
),
"primprod_ecoregion_rescaled"
)
# Create SQL to join all metrics with equal weights
dbExecute(
con_sdm,
glue(
"
WITH metric_values AS (
SELECT
cm.cell_id,
m.metric_key,
cm.value
FROM cell_metric cm
JOIN metric m ON cm.metric_seq = m.metric_seq
WHERE m.metric_key IN ({paste(shQuote(metric_keys), collapse=', ')}) )
INSERT INTO cell_metric (cell_id, metric_seq, value)
SELECT
cell_id,
{m_seq} as metric_seq,
ROUND(AVG(value)) AS value
FROM metric_values
GROUP BY cell_id"
)
)
# Check the new metric
d_ck <- tbl(con_sdm, "metric") |>
select(metric_seq, metric_key) |>
filter(
metric_key == !!m_key
) |>
left_join(
tbl(con_sdm, "cell_metric"),
by = "metric_seq"
) |>
collect()
d_ck |>
pull(value) |>
summary()
# OLD:
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 0.00 10.00 16.00 21.28 31.00 89.00
# NEW:
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 0.00 12.00 17.00 21.29 26.00 89.00
# NEWEST:
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 0.0 12.0 18.0 22.1 28.0 89.0
```
### Calculate Score per ProgramArea from ProgramArea metrics
```{r}
#| label: calc_programarea_score_from_programarea_metrics
#| eval: !expr do_metrics
# metric: calculate score per program area
m_key <- "score_extriskspcat_primprod_ecoregionrescaled_equalweights"
m_seq <- tbl(con_sdm, "metric") |>
filter(metric_key == !!m_key) |>
pull(metric_seq)
metric_input_keys <- c(
glue("extrisk_{sp_cats}_ecoregion_rescaled"),
"primprod_ecoregion_rescaled"
)
m_input_seqs <- tbl(con_sdm, "metric") |>
filter(metric_key %in% metric_input_keys) |>
pull(metric_seq)
stopifnot(length(metric_input_keys) == length(m_input_seqs))
# get zone_seqs for program areas
z_seqs <- tbl(con_sdm, "zone") |>
filter(
tbl == !!tbl_pra
) |>
pull(zone_seq)
# DEBUG check
# tbl(con_sdm, "zone_metric") |>
# filter(
# metric_seq == !!m_seq,
# zone_seq %in% z_seqs) |>
# arrange(desc(value)) |>
# pull(value) |>
# range()
# 5.704735 61.050130
# View()
# nrow() -> n_existing_zm
# z_seq == 134 # (ALA)
# delete existing zone_metric values for this metric and planning areas
dbExecute(
con_sdm,
glue(
"DELETE FROM zone_metric
WHERE metric_seq = {m_seq}
AND zone_seq IN ({paste(z_seqs, collapse = ',')})"
)
)
# calculate average score per program area
dbExecute(
con_sdm,
glue(
"
INSERT INTO zone_metric (zone_seq, metric_seq, value)
SELECT
zone_seq,
{m_seq} as metric_seq,
SUM(value) / COUNT(value) as value
FROM zone_metric zm
WHERE
metric_seq IN ({paste(m_input_seqs, collapse = ',')})
AND zone_seq IN ({paste(z_seqs, collapse = ',')})
AND value IS NOT NULL
GROUP BY zone_seq"
)
) # 20
# check the new metric
d_ck <- tbl(con_sdm, "metric") |>
select(metric_seq, metric_key) |>
filter(
metric_key == !!m_key
) |>
left_join(
tbl(con_sdm, "zone_metric"),
by = "metric_seq"
) |>
left_join(
tbl(con_sdm, "zone") |>
filter(
tbl == !!tbl_pra
) |>
select(zone_seq, pa_key = value),
by = "zone_seq"
) |>
filter(!is.na(pa_key)) |>
collect()
d_ck |>
pull(value) |>
summary()
# PlanAreas:
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 7.16 17.10 22.86 25.42 31.59 60.07
# NEW ProgramAreas:
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 7.642 17.372 24.131 26.439 32.972 60.068
```
### Metric labels
Add display label columns to metric table as single source of truth for display names used in score tables, Word/Excel output, and flower plots.
```{r}
#| label: metric_labels
#| eval: true
# add display label columns to metric table if missing
for (col in c("metric_title", "metric_abbrev")) {
cols <- dbGetQuery(
con_sdm,
"SELECT column_name FROM information_schema.columns WHERE table_name = 'metric'"
)
if (!col %in% cols$column_name) {
dbExecute(con_sdm, glue("ALTER TABLE metric ADD COLUMN {col} VARCHAR"))
}
}
# populate labels for score-contributing metrics
metric_labels <- tribble(
~metric_key , ~metric_title , ~metric_abbrev ,
"score_extriskspcat_primprod_ecoregionrescaled_equalweights" , "Score" , "Score" ,
"extrisk_bird_ecoregion_rescaled" , "Bird" , "Bird" ,
"extrisk_coral_ecoregion_rescaled" , "Coral" , "Coral" ,
"extrisk_fish_ecoregion_rescaled" , "Fish" , "Fish" ,
"extrisk_invertebrate_ecoregion_rescaled" , "Invertebrate" , "Invertebrate" ,
"extrisk_mammal_ecoregion_rescaled" , "Mammal" , "Mammal" ,
"extrisk_other_ecoregion_rescaled" , "Other" , "Other" ,
"extrisk_turtle_ecoregion_rescaled" , "Turtle" , "Turtle" ,
"primprod_ecoregion_rescaled" , "Primary Production" , "Prim. Prod."
)
duckdb_register(con_sdm, "tmp_labels", metric_labels)
dbExecute(
con_sdm,
"
UPDATE metric
SET metric_title = t.metric_title,
metric_abbrev = t.metric_abbrev
FROM tmp_labels t
WHERE metric.metric_key = t.metric_key"
)
duckdb_unregister(con_sdm, "tmp_labels")
```
## Add/update `zone_taxon`
For showing species composition per zone (e.g. subregion, planning area, program area) in mapgl app.
```{r}
#| label: zone_taxon
#| eval: !expr do_zone_taxon
# SLOW
tbl_taxon <- tbl(con_sdm, "taxon") |>
filter(is_ok) |>
select(
sp_cat,
sp_common = common_name,
sp_scientific = scientific_name,
taxon_id,
taxon_authority,
rl_code = extrisk_code,
er_score,
is_mmpa,
is_mbta,
is_bcc,
esa_code,
esa_source,
mdl_seq
)
d <- tbl(con_sdm, "zone") |>
filter(
tbl == !!tbl_sr,
value == "USA"
) |>
select(zone_seq, zone_tbl = tbl, zone_fld = fld, zone_value = value) |> # zone_tbl = tbl,
inner_join(
tbl(con_sdm, "zone_cell") |>
select(zone_seq, cell_id),
by = join_by(zone_seq)
) |>
inner_join(
tbl(con_sdm, "model_cell") |>
select(mdl_seq, cell_id, value) |>
inner_join(
tbl_taxon,
by = join_by(mdl_seq)
),
by = join_by(cell_id)
) |>
inner_join(
tbl(con_sdm, "cell") |>
select(cell_id, area_km2),
by = join_by(cell_id)
) |>
group_by(
zone_tbl,
zone_fld,
zone_value, # zone_seq,
mdl_seq,
sp_cat,
sp_common,
sp_scientific,
taxon_id,
taxon_authority,
rl_code,
er_score,
is_mmpa,
is_mbta,
is_bcc,
esa_code,
esa_source
) |>
summarize(
area_km2 = sum(area_km2, na.rm = T),
avg_suit = mean(value, na.rm = T) / 100,
.groups = "drop"
) |>
collect() # 220,534 × 12
# taxon_usa <- d |>
# filter(
# zone_fld == "subregion_key",
# zone_value == "USA") |>
# pull(taxon_id)
#
# tbl_taxon |>
# filter(!taxon_id %in% taxon_usa) |>
# select(sp_cat, sp_common, sp_scientific, taxon_id, rl_code, mdl_seq) |>
# arrange(sp_cat, sp_common, sp_scientific, taxon_id, rl_code, mdl_seq) |>
# collect() |>
# print(n = Inf)
#
# A tibble: 46 × 6
# sp_cat sp_common sp_scientific taxon_id rl_code mdl_seq
# <chr> <chr> <chr> <dbl> <chr> <int>
# 1 bird African Collared-dove Streptopelia… 2.27e7 LC 18033
# 2 bird Aplomado Falcon Falco femora… 2.27e7 EN 18359
# 3 bird Bay-breasted Warbler Setophaga ca… 2.27e7 LC 18156
# 4 bird Blue-and-yellow Macaw Ara ararauna 2.27e7 LC 17601
# 5 bird Brown-throated Parakeet Eupsittula p… 2.27e7 LC 18136
# 6 bird Cerulean Warbler Setophaga ce… 2.27e7 NT 17999
# 7 bird Common Greenshank Tringa nebul… 2.27e7 LC 18060
# 8 bird Common Ringed Plover Charadrius h… 2.27e7 LC 17677
# 9 bird Common Vermilion Flycatch… Pyrocephalus… 1.04e8 LC 18275
# 10 bird Dark-eyed Junco Junco hyemal… 2.27e7 LC 18140
# 11 bird Ferruginous Pygmy-owl Glaucidium b… 2.27e7 TN 18366
# 12 bird House Finch Haemorhous m… 2.27e7 LC 17781
# 13 bird Island Canary Serinus cana… 2.27e7 LC 17995
# 14 bird Lesser Antillean Pewee Contopus lat… 2.27e7 LC 17700
# 15 bird Long-eared Owl Asio otus 2.27e7 LC 17617
# 16 bird Middendorff's Grasshopper… Helopsaltes … 2.27e7 LC 18138
# 17 bird Nanday Parakeet Aratinga nen… 2.27e7 LC 17602
# 18 bird Orange-fronted Parakeet Eupsittula c… 2.27e7 VU 17740
# 19 bird Pin-tailed Whydah Vidua macrou… 2.27e7 LC 18078
# 20 bird Pine Warbler Setophaga pi… 2.27e7 LC 18157
# 21 bird Puerto Rican Emerald Riccordia ma… 2.27e7 LC 17987
# 22 bird Purple Sandpiper Calidris mar… 2.27e7 LC 18133
# 23 bird Red Siskin Spinus cucul… 2.27e7 EN 18021
# 24 bird Russet-backed Thrush Catharus ust… 1.04e8 LC 18252
# 25 bird Say's Phoebe Sayornis saya 2.27e7 LC 17992
# 26 bird Siberian Accentor Prunella mon… 2.27e7 LC 17939
# 27 bird Spot-breasted Oriole Icterus pect… 2.27e7 LC 17803
# 28 bird Spruce Grouse Canachites c… 6.12e7 LC 17663
# 29 bird Swainson's Thrush Catharus swa… 1.04e8 LC 18251
# 30 bird Two-barred Crossbill Loxia leucop… 2.27e7 LC 18146
# 31 bird Venezuelan Troupial Icterus icte… 2.27e7 LC 17802
# 32 bird Western Meadowlark Sturnella ne… 2.27e7 LC 18161
# 33 bird White-tailed Sea-eagle Haliaeetus a… 2.27e7 LC 17782
# 34 bird Yellow-crowned Amazon Amazona ochr… 2.27e7 LC 17573
# 35 bird Yellow-headed Amazon Amazona orat… 2.27e7 EN 17574
# 36 bird Yellow-throated Vireo Vireo flavif… 2.27e7 LC 18080
# 37 fish Arctic telescope Protomyctoph… 1.59e5 LC 17531
# 38 fish Cortez sea chub Kyphosus ele… 2.74e5 LC 17509
# 39 fish Gulf grouper Mycteroperca… 2.74e5 EN 17506
# 40 fish daggertooth pike conger Muraenesox c… 1.26e5 LC 17503
# 41 fish prowspine cusk-eel Lepophidium … 2.76e5 LC 17513
# 42 fish speckledtail flounder Engyophrys s… 2.76e5 LC 17523
# 43 invertebrate waved whelk Buccinum und… 1.39e5 NA 17224
# 44 invertebrate NA Paroediceros… 1.03e5 NA 17127
# 45 invertebrate NA Quadrimaera … 4.22e5 NA 8245
# 46 invertebrate NA Scabrotropho… 1.47e5 NA 17229
d <- d |>
mutate(
suit_rl = avg_suit * er_score / 100,
suit_rl_area = avg_suit * er_score / 100 * area_km2
) |>
group_by(zone_fld, zone_value, sp_cat) |>
mutate(
cat_suit_rl_area = sum(suit_rl_area, na.rm = T)
) |>
ungroup() |>
mutate(
pct_cat = suit_rl_area / cat_suit_rl_area
)
# d_akl48 <- d |>
# filter(
# zone_fld == "subregion_key",
# zone_value == "AKL48")
# # nrow(d_akl48) # 10,377 species
#
# d_akl48_sum <- d_akl48 |>
# inner_join(
# tbl(con_sdm, "taxon") |>
# filter(
# taxon_id %in% d_akl48$taxon_id) |>
# select(taxon_id, n_ds) |>
# collect(),
# by = join_by(taxon_id)) |>
# group_by(sp_cat) |>
# summarize(
# n_species = n(),
# n_models = sum(n_ds))
# d_akl48_sum |>
# summarize(
# total_species = sum(n_species),
# total_models = sum(n_models))
# # total_species total_models
# # <int> <int>
# # 1 10377 10461
# #
# # sp_cat n_species n_models
# # <chr> <int> <int>
# # 1 bird 247 280
# # 2 coral 342 349
# # 3 fish 3,299 3,314
# # 4 invertebrate 5,452 5,453
# # 5 mammal 72 84
# # 6 other 953 953
# # 7 reptile 12 28
# # Total 10,377 10,461
#
# write_csv(d_akl48, zone_taxon_akl48_csv)
write_csv(d, zone_taxon_csv)
dbWriteTable(con_sdm, glue("zone_taxon"), d, overwrite = T)
```
## Update PostGIS layers with metrics from DuckDB
These layers are used in the mapgl app (via pg_tileserv) and need to be updated with the new metrics calculated in DuckDB. The `update_table_with_clear` function adds new metric columns if they don't exist, clears existing values for those metrics, and then updates with new values from DuckDB.
### Update Zone Tables in PostGIS with Metrics
```{r}
#| label: setup_update_pgtable_with_clear
#| eval: !expr do_postgis
update_table_with_clear <- function(con, df, tbl, key_col = "programarea_key") {
# df = d_pa_metrics; tbl = "ply_planareas_2025"; key_col = "planarea_key"
stopifnot(dbExistsTable(con, tbl))
existing_cols <- dbListFields(con, tbl)
stopifnot(key_col %in% existing_cols)
stopifnot(key_col %in% names(df))
source_cols <- setdiff(names(df), key_col)
# Add missing columns
for (col in source_cols) {
if (!col %in% existing_cols) {
add_col_query <- glue(
"ALTER TABLE {tbl} ADD COLUMN {col} DOUBLE PRECISION"
)
dbExecute(con, add_col_query)
message("Added column: ", col)
}
}
# Clear existing values for columns we're updating
existing_source_cols <- intersect(source_cols, existing_cols)
if (length(existing_source_cols) > 0) {
clear_clauses <- paste0(existing_source_cols, " = NULL", collapse = ", ")
clear_query <- glue("UPDATE {tbl} SET {clear_clauses}")
dbExecute(con, clear_query)
message(
"Cleared existing values for: ",
paste(existing_source_cols, collapse = ", "),
"\n"
)
}
# Now update with new values
dbWriteTable(
con,
"temp_metrics_update",
df,
temporary = TRUE,
overwrite = TRUE
)
set_clauses <- paste0(source_cols, " = temp.", source_cols, collapse = ", ")
update_query <- glue(
"
UPDATE {tbl}
SET {set_clauses}
FROM temp_metrics_update temp
WHERE {tbl}.{key_col} = temp.{key_col}
"
)
rows_affected <- dbExecute(con, update_query)
message("Updated ", rows_affected, " rows")
dbRemoveTable(con, "temp_metrics_update", force = TRUE)
return(rows_affected)
}
# update zone.tbl to include version suffix ----
zone_tbls <- tbl(con_sdm, "zone") |> distinct(tbl) |> pull(tbl)
for (zt in zone_tbls) {
zt_v <- if (!grepl("_v\\d+$", zt)) paste0(zt, v_sfx) else zt
if (zt != zt_v) {
# dbExecute(
# con_sdm,
# glue(
# "UPDATE zone SET tbl = '{zt_v}' WHERE tbl = '{zt}'"
# )
# )
cat(glue("zone.tbl: {zt} -> {zt_v} ~ {Sys.time()}"))
}
}
```
```{r}
#| label: create_pg_v3_tables
#| eval: !expr do_postgis
# create versioned PostGIS tables from existing tables ----
pg_zone_tbls <- c(
tbl_er,
tbl_pra
)
for (pg_tbl in pg_zone_tbls) {
# pg_tbl already includes v_sfx (e.g. _v3) from tbl_er, tbl_pra
pg_tbl_base <- str_remove(pg_tbl, v_sfx)
if (!dbExistsTable(con, pg_tbl)) {
dbExecute(con, glue("CREATE TABLE {pg_tbl} AS SELECT * FROM {pg_tbl_base}"))
message(glue("Created {pg_tbl} from {pg_tbl_base}"))
} else {
message(glue("{pg_tbl} already exists"))
}
}
```
### Load Program Area Scores to PostGIS
```{r}
#| label: pg_update_programarea_metrics
#| eval: !expr do_postgis
# delete zone_metric rows without a metric_key
dbExecute(
con_sdm,
"
DELETE FROM zone_metric
WHERE metric_seq NOT IN (SELECT metric_seq FROM metric)"
) # 0
# check if table exists in PostGIS
if (!dbExistsTable(con, tbl_pra)) {
stop("Run create_pg_v3_tables R chunk above")
}
# get program area metrics from DuckDB
d_pra_metrics <- tbl(con_sdm, "zone") |>
filter(tbl == !!tbl_pra) |>
select(programarea_key = value, zone_seq) |>
left_join(
tbl(con_sdm, "zone_metric") |>
mutate(value = round(value, 1)),
by = "zone_seq"
) |>
left_join(
tbl(con_sdm, "metric") |>
select(metric_seq, metric_key),
by = "metric_seq"
) |>
select(programarea_key, metric_key, value) |>
filter(
!is.na(programarea_key),
!is.na(metric_key),
!is.na(value)
) |>
pivot_wider(
names_from = metric_key,
values_from = value
) |>
collect()
message(glue("Loaded {nrow(d_pra_metrics)} program area metrics"))
# For complete refresh (clears existing values first)
update_table_with_clear(
con,
d_pra_metrics,
tbl_pra,
"programarea_key"
)
```
```{r}
#| label: pg_rm_old_metrics
#| eval: !expr do_postgis
flds_pra_df <- names(d_pra_metrics)
flds_pra_db <- dbListFields(con, tbl_pra)
fdls_pra_rm <- setdiff(flds_pra_db, flds_pra_df) |>
str_subset("(extrisk_)|(^na$)")
d_rm <-
tibble(
tbl = tbl_pra,
fld = fdls_pra_rm
)
for (i in seq_len(nrow(d_rm))) {
# i = 1
tbl <- d_rm$tbl[i]
fld <- d_rm$fld[i]
a <- dbExecute(con, glue('ALTER TABLE {tbl} DROP COLUMN "{fld}"'))
message("Removed column: ", fld, " from ", tbl)
}
```
### Add index for vector tiles
per [Feature id | pg_tileserv](https://github.com/CrunchyData/pg_tileserv?tab=readme-ov-file#feature-id)
```{r}
#| label: add_index_for_vector_tiles
#| eval: !expr do_postgis
# add integer primary key to postgres programareas table
dbExecute(
con,
glue(
"ALTER TABLE public.ply_programareas_2026{v_sfx} ALTER COLUMN programarea_id TYPE INTEGER;"
)
)
dbExecute(
con,
glue(
"ALTER TABLE public.ply_ecoregions_2025{v_sfx} ADD COLUMN IF NOT EXISTS ecoregion_id SERIAL PRIMARY KEY;"
)
)
```
## Generate downloads
For sharing with BOEM and external partners.
```{r}
#| label: downloads
#| eval: !expr do_downloads
redo_dl <- T
# ecoregion gpkg destination (input copy, no metrics — just copy to dir_v)
er_gpkg_dst <- glue("{dir_v}/ply_ecoregions_2025.gpkg")
# redo_dl: delete existing files
if (redo_dl) {
for (f in c(
pra_gpkg,
lyrs_csv,
metrics_tif,
metrics_pra_tif,
er_gpkg_dst
)) {
if (file_exists(f)) {
file_delete(f)
}
}
}
# * pra_gpkg ----
if (!file.exists(pra_gpkg)) {
message("Generating Program Areas geopackage...")
st_read(con, tbl_pra) |>
st_write(pra_gpkg, delete_dsn = T, quiet = T)
}
# * lyrs_csv ----
if (!file.exists(lyrs_csv)) {
message("Generating layers CSV...")
d_lyrs <- bind_rows(
tibble(
order = 1,
category = "Overall",
layer = "score",
lyr = "score_extriskspcat_primprod_ecoregionrescaled_equalweights"
),
tibble(
order = 2,
category = "Species, rescaled by Ecoregion",
layer = glue("{sp_cats}: ext. risk, ecorgn"),
lyr = glue("extrisk_{sp_cats}_ecoregion_rescaled")
),
tibble(
order = 3,
category = "Primary Productivity, rescaled by Ecoregion",
layer = glue("prim prod, ecorgn"),
lyr = glue("primprod_ecoregion_rescaled")
),
tibble(
order = 4,
category = "Species, raw Extinction Risk",
layer = glue("{sp_cats}: ext. risk"),
lyr = glue("extrisk_{sp_cats}")
),
tibble(
order = 5,
category = "Primary Productivity, raw Phytoplankton",
lyr = "primprod",
layer = "prim prod, 2014-2023 avg (mg C/m^2/day)"
)
)
write_csv(d_lyrs, lyrs_csv)
}
# * metrics_tif ----
if (!file.exists(metrics_tif)) {
message("Generating Metrics raster...")
d_lyrs <- read_csv(lyrs_csv)
# add zones
lst <- list()
for (zone_fld in c("ecoregion_key", "programarea_key")) {
# zone_fld = "ecoregion_key"
# zone_fld = "programarea_key"
d <- tbl(con_sdm, "zone") |>
filter(fld == !!zone_fld) |>
left_join(
tbl(con_sdm, "zone_cell"),
by = "zone_seq"
) |>
group_by(cell_id) |>
window_order(pct_covered) |>
summarize(
value = last(value),
.groups = "drop"
) |>
collect() |>
mutate(
"{zone_fld}" := as.factor(value)
) |>
select(all_of(c("cell_id", zone_fld)))
r <- init(r_cell[[1]], NA) # |> as.factor() #sort(unique(d$value)))
# r$chr <- NA_character_ # ensure chr type
r[d$cell_id] <- d[[zone_fld]]
varnames(r) <- zone_fld
names(r) <- zone_fld
levels(r) <- tibble(
id = 1:length(levels(d[[zone_fld]])),
"{zone_fld}" := levels(d[[zone_fld]])
)
lst[[zone_fld]] <- r
}
r_zones <- do.call(c, lst |> unname())
# add layers
lst <- list()
for (i in 1:nrow(d_lyrs)) {
# i = 2
lyr <- d_lyrs$lyr[i]
layer <- d_lyrs$layer[i]
r <- get_rast(lyr) # plot(r)
names(r) <- lyr
varnames(r) <- layer
lst[[i]] <- r
}
r_metrics <- do.call(c, lst)
# add log of primprod
r_metrics[["primprod_log"]] <- log(r_metrics[["primprod"]])
# show layer names
names(r_metrics) |> sort() |> print()
r_metrics <- c(r_zones, r_metrics)
writeRaster(r_metrics, metrics_tif, overwrite = T)
}
# * metrics_pra_tif ----
if (!file.exists(metrics_pra_tif)) {
message("Generating Metrics raster masked to Program Areas...")
# mask to study area of Program Areas
# subregion_keys in sr_pra_csv: USA (all 20), AK (15), PA (3), GA (2)
# AK + L48 = USA (all program areas)
sr <- "USA"
pra_sr <- read_csv(sr_pra_csv) |>
filter(subregion_key == !!sr) |>
pull(programarea_key)
r_metrics <- rast(metrics_tif)
r_mask <- subset(r_metrics, "programarea_key")
r_metrics_pra <- r_metrics |>
mask(r_mask) |>
trim()
writeRaster(r_metrics_pra, metrics_pra_tif, overwrite = T)
}
# * er_gpkg_dst (input, no metrics — just copy to dir_v) ----
if (!file.exists(er_gpkg_dst)) {
message("Copying ecoregion gpkg to dir_v...")
file_copy(er_raw_gpkg, er_gpkg_dst)
}
```
```{r}
#| label: old_pa_metrics_geo
#| eval: false
pa_metrics_geo <- glue(
"{dir_derived}/OCS_Proposed_Planning_Area_Polygon_Env_Sensitivity_Metrics.geojson"
)
# * pa_metrics_geo ----
if (!file.exists(pa_metrics_geo)) {
pa_boem_geo <- glue("{dir_data}/raw/boem.gov/OCS_Area_Polygons.geojson")
pa <- read_sf(pa_gpkg)
pa_boem <- read_sf(pa_boem_geo)
(pa_miss <- pa_boem |>
st_drop_geometry() |>
anti_join(
pa |>
st_drop_geometry() |>
select(planarea_key, planarea_name),
by = join_by(PLANNING_AREA == planarea_key)
) |>
pull(PLANNING_AREA))
# [1] "CG" "EG" "WG"
pa_boem |>
filter(PLANNING_AREA %in% pa_miss) |>
mapview::mapView(zcol = "PLANNING_AREA")
pa |>
st_drop_geometry() |>
filter(
region_name == "Gulf of America"
) |>
select(planarea_key, planarea_name)
# planarea_key planarea_name
# <chr> <chr>
# 1 CGA Central Gulf of America
# 2 EGA Eastern Gulf of America
# 3 WGA Western Gulf of America
pa_metrics <- pa_boem |>
left_join(
pa |>
st_drop_geometry() |>
mutate(
PLANNING_AREA = case_match(
planarea_key,
"CGA" ~ "CG",
"EGA" ~ "EG",
"WGA" ~ "WG",
.default = planarea_key
)
),
by = join_by(PLANNING_AREA)
)
write_sf(pa_metrics, pa_metrics_geo)
}
```
## Summary data products
Merged from `msens-summary_programareas.qmd`: ecoregion × program area intersections, label placement points, and score tables.
```{r}
#| label: summary_data_redo
#| eval: !expr do_summary_data
# delete existing summary data files if redo (except lbls_csv — manually adjusted)
if (redo_summary_data) {
for (f in c(er_pra_gpkg, er_pra_csv, er_pra_n_csv, pra_n_csv)) {
if (file_exists(f)) {
file_delete(f)
}
}
}
```
### Ecoregion × Program Area intersections
```{r}
#| label: read_plys
#| eval: !expr do_summary_data
er <- read_sf(er_raw_gpkg)
pra <- read_sf(pra_gpkg)
if (!file.exists(er_pra_gpkg)) {
er_pra <- st_intersection(
er |>
select(ecoregion_key, ecoregion_name),
pra |>
select(region_key, region_name, programarea_key, programarea_name)
) |>
st_make_valid() |>
mutate(
area_km2 = st_area(geom) |> set_units(km^2) |> drop_units(),
ctr_lon = st_coordinates(st_centroid(geom))[, 1],
ctr_lat = st_coordinates(st_centroid(geom))[, 2]
) |>
filter(area_km2 > 1) |>
arrange(programarea_key, ecoregion_key) |>
group_by(programarea_key) |>
mutate(
pct_programarea_in_ecoregion = round(
area_km2 / sum(area_km2) * 100,
2
),
pct_programarea_in_ecoregion = na_if(
pct_programarea_in_ecoregion,
100.0
)
) |>
ungroup()
er_pra <- er_pra |>
select(
region_key,
region_name,
ecoregion_key,
ecoregion_name,
programarea_key,
programarea_name,
pct_programarea_in_ecoregion,
area_km2,
ctr_lon,
ctr_lat
) |>
arrange(region_key, ecoregion_key, programarea_key) |>
mutate(
area_km2 = round(area_km2, 2),
ctr_lon = round(ctr_lon, 4),
ctr_lat = round(ctr_lat, 4)
)
er_pra |>
st_drop_geometry() |>
write_csv(er_pra_csv, na = "")
write_sf(er_pra, er_pra_gpkg, delete_dsn = T)
}
er_pra <- read_sf(er_pra_gpkg)
```
### Label placement points
```{r}
#| label: ply_lbl_pts
#| eval: !expr do_summary_data
er_pts <- er |>
st_drop_geometry() |>
select(
ecoregion_key,
ecoregion_name,
region_name,
region_key,
ctr_lon,
ctr_lat
) |>
st_as_sf(coords = c("ctr_lon", "ctr_lat"), crs = 4326)
pra_pts <- pra |>
st_drop_geometry() |>
select(
programarea_key,
programarea_name,
region_name,
region_key,
ctr_lon,
ctr_lat
) |>
st_as_sf(coords = c("ctr_lon", "ctr_lat"), crs = 4326)
# keep file-exists guard — label placement is manually adjusted
if (!file.exists(lbls_csv)) {
bind_rows(
er_pts |> st_drop_geometry() |> select(ecoregion_key),
pra_pts |> st_drop_geometry() |> select(programarea_key)
) |>
arrange(ecoregion_key, programarea_key) |>
mutate(
text_anchor = ifelse(!is.na(programarea_key), "center", "top"),
text_justify = "center",
text_offset_right = 0,
text_offset_down = 0
) |>
write_csv(lbls_csv, na = "")
}
```
### Score tables
```{r}
#| label: pra_score_table
#| eval: !expr do_summary_data
d_pra <- read_sf(pra_gpkg) |>
st_drop_geometry()
# get field-to-label mapping from metric table (single source of truth)
d_metric_labels <- tbl(con_sdm, "metric") |>
filter(!is.na(metric_title)) |>
select(metric_key, metric_title, metric_abbrev) |>
collect()
# fields present in program area data that have labels
flds <- intersect(names(d_pra), d_metric_labels$metric_key)
# named vector: metric_key -> short name for column renaming
flds_rn <- d_metric_labels |>
filter(metric_key %in% flds) |>
mutate(
short = metric_key |>
str_replace(
"score_extriskspcat_primprod_ecoregionrescaled_equalweights",
"score"
) |>
str_replace("_ecoregion_rescaled", "") |>
str_replace("extrisk_", "")
) |>
pull(short, name = metric_key)
# ecoregion header rows with NA for all metric columns
na_cols <- setNames(
rep(list(NA_real_), length(flds_rn) + 1),
c("area_km2", unname(flds_rn))
)
d_er_n <- read_csv(er_pra_csv) |>
distinct(
region_key,
region_name,
ecoregion_key,
ecoregion_name
) |>
bind_cols(as_tibble(na_cols))
d_pra_n <- read_csv(er_pra_csv) |>
select(
region_name,
ecoregion_name,
programarea_key,
programarea_name,
pct_programarea_in_ecoregion
) |>
left_join(
d_pra |>
select(all_of(c("programarea_key", "area_km2", flds))) |>
rename_with(~ flds_rn[.x], all_of(flds)),
by = "programarea_key"
)
d_er_pra_n <- bind_rows(
d_er_n |> mutate(row_type = "ecoregion"),
d_pra_n |> mutate(row_type = "programarea")
) |>
relocate(
row_type,
programarea_key,
programarea_name,
pct_programarea_in_ecoregion,
.after = ecoregion_name
) |>
arrange(region_name, ecoregion_name, row_type, programarea_name) |>
mutate(
pct = ifelse(
!is.na(pct_programarea_in_ecoregion),
glue(" [{round(pct_programarea_in_ecoregion, 1)}%]", .trim = F),
""
),
row_lbl = case_match(
row_type,
"ecoregion" ~ glue("{region_name}: {ecoregion_name} ({ecoregion_key})"),
"programarea" ~ glue("· {programarea_name} ({programarea_key}){pct}")
)
) |>
relocate(row_lbl)
d_er_pra_n |>
select(
row_type,
region_name,
ecoregion_name,
ecoregion_key,
programarea_name,
programarea_key,
pct_programarea_in_ecoregion,
row_lbl,
score,
primprod,
any_of(sp_cats),
area_km2
) |>
write_excel_csv(er_pra_n_csv)
```
## Summary figures and tables
Merged from `msens-summary_programareas.qmd`: maps, flower plots, primary productivity charts, and Word/Excel tables.
```{r}
#| label: summary_figs_redo
#| eval: !expr do_summary_figs
dir_create(dir_figs)
# delete figs if redo requested
if (redo_summary_figs) {
fig_files <- dir_ls(dir_figs)
if (length(fig_files) > 0) {
file_delete(fig_files)
}
}
```
### Program Area boundary maps
```{r}
#| label: map_imgs_pra_ply_l48_ak
#| eval: !expr do_summary_figs
d_lbls <- read_csv(lbls_csv)
var_pra <- "score_extriskspcat_primprod_ecoregionrescaled_equalweights"
n_cols <- 11
rng_pra <- pra |>
pull({{ var_pra }}) |>
range()
cols_pra <- rev(RColorBrewer::brewer.pal(n_cols, "Spectral"))
brks_pra <- seq(rng_pra[1], rng_pra[2], length.out = n_cols)
for (is_ak in c(T, F)) {
if (is_ak) {
# alaska
er_area <- er |>
filter(region_key == "AK") |>
st_shift_longitude()
pra_area <- pra |>
filter(region_key == "AK") |>
st_shift_longitude()
er_pts_area <- er_pts |>
filter(region_key == "AK") |>
st_shift_longitude() |>
left_join(
d_lbls |> select(-programarea_key),
by = "ecoregion_key"
)
pra_pts_area <- pra_pts |>
filter(region_key == "AK") |>
st_shift_longitude() |>
left_join(
d_lbls |> select(-ecoregion_key),
by = "programarea_key"
)
lgnd_pos <- "top-left"
} else {
# lower 48
box_lower48 <- pra |>
filter(region_key != "AK") |>
st_bbox() |>
st_as_sfc()
er_area <- er |>
st_filter(box_lower48, .predicate = st_intersects)
pra_area <- pra |>
st_filter(box_lower48, .predicate = st_intersects)
er_pts_area <- er_pts |>
filter(ecoregion_key %in% er_area$ecoregion_key) |>
left_join(
d_lbls |> select(-programarea_key),
by = "ecoregion_key"
)
pra_pts_area <- pra_pts |>
filter(programarea_key %in% pra_area$programarea_key) |>
left_join(
d_lbls |> select(-ecoregion_key),
by = "programarea_key"
)
lgnd_pos <- "bottom-left"
}
scale_pos <- "bottom-right"
er_url <- glue(
"https://api.marinesensitivity.org/tilejson?table=public.ply_ecoregions_2025{v_sfx}&use_cache=FALSE"
)
pra_url <- glue(
"https://api.marinesensitivity.org/tilejson?table=public.ply_programareas_2026{v_sfx}&use_cache=FALSE"
)
er_filter <- c("in", "ecoregion_key", er_area$ecoregion_key)
pra_filter <- c("in", "programarea_key", pra_area$programarea_key)
m <- mapboxgl(
style = mapbox_style("dark"),
projection = "globe"
) |>
fit_bounds(bbox = er_area) |>
add_vector_source(
id = "pra_src",
url = pra_url
) |>
add_vector_source(
id = "er_src",
url = er_url
) |>
add_fill_layer(
id = "pra_fill",
source = "pra_src",
source_layer = glue("public.ply_programareas_2026{v_sfx}"),
filter = pra_filter,
fill_color = mapgl::interpolate(
column = var_pra,
values = brks_pra,
stops = cols_pra
)
) |>
add_line_layer(
id = "er_ln",
source = "er_src",
source_layer = glue("public.ply_ecoregions_2025{v_sfx}"),
filter = er_filter,
line_color = "lightgray",
line_opacity = 1,
line_width = 5
) |>
add_line_layer(
id = "pra_ln",
source = "pra_src",
source_layer = glue("public.ply_programareas_2026{v_sfx}"),
filter = pra_filter,
line_color = "black",
line_opacity = 1,
line_width = 1
) |>
add_symbol_layer(
id = "pra_symbol",
source = pra_pts_area,
filter = pra_filter,
text_field = get_column("programarea_key"),
text_allow_overlap = T,
text_anchor = get_column("text_anchor"),
text_justify = get_column("text_justify"),
text_offset = list(
get_column("text_offset_right"),
get_column("text_offset_down")
)
) |>
add_symbol_layer(
id = "er_symbol",
source = er_pts_area,
filter = er_filter,
text_field = get_column("ecoregion_name"),
text_color = "black",
text_allow_overlap = T,
text_anchor = get_column("text_anchor"),
text_justify = get_column("text_justify"),
text_offset = list(
get_column("text_offset_right"),
get_column("text_offset_down")
),
text_halo_color = "lightgray",
text_halo_width = 2,
text_halo_blur = 2,
text_line_height = 1.5,
text_size = 20
) |>
mapgl::add_legend(
"Score",
values = rng_pra,
colors = cols_pra,
position = lgnd_pos
) |>
add_scale_control(position = scale_pos)
b <- glue("{dir_figs}/map_pra_{ifelse(is_ak, 'ak', 'l48')}{v_sfx}")
b_html <- glue("{b}.html")
b_files <- glue("{b}_files")
b_png <- glue("{b}.png")
saveWidget(m, b_html, selfcontained = F, background = "transparent")
webshot(
b_html,
b_png,
vwidth = 1200,
vheight = 800,
zoom = 2,
selector = "#htmlwidget_container",
delay = 2,
quiet = T
)
file_delete(b_html)
dir_delete(b_files)
}
```
### Cell score & NPP maps
```{r}
#| label: map_imgs_cell_l48_ak
#| eval: !expr do_summary_figs
vgpm_tif <- glue(
"{dir_data}/raw/oregonstate.edu/vgpm.r2022.v.chl.v.sst.2160x4320_2014-2023.avg.sd_planareas.tif"
)
r_metrics <- rast(metrics_tif)
r_vgpm <- rast(vgpm_tif)
d_lbls <- read_csv(lbls_csv)
lyr_score <- "score_extriskspcat_primprod_ecoregionrescaled_equalweights"
lyr_npp <- "npp_avg"
# mask to program areas to exclude Atlantic coast
ply_mask <- vect(pra |> st_shift_longitude())
r_score <- r_metrics[[lyr_score]] |> mask(ply_mask)
r_npp <- r_vgpm |> log() |> mask(ply_mask)
n_cols <- 11
rng_r_score <- range(values(r_score, na.rm = T))
cols_r_score <- rev(RColorBrewer::brewer.pal(n_cols, "Spectral"))
brks_r_score <- seq(rng_r_score[1], rng_r_score[2], length.out = n_cols)
rng_r_npp <- range(values(r_npp, na.rm = T))
cols_r_npp <- pal_viridis()(n_cols)
brks_r_npp <- seq(rng_r_npp[1], rng_r_npp[2], length.out = n_cols)
for (is_vgpm in c(T, F)) {
# npp: is_vgpm = T # score: is_vgpm = F
if (is_vgpm) {
r <- r_npp
rng_r <- round(rng_r_npp, 2)
cols_r <- cols_r_npp
brks_r <- brks_r_npp
lyr <- lyr_npp
title <- "Primary Productivity (log(NPP))"
} else {
r <- r_score
rng_r <- rng_r_score
cols_r <- cols_r_score
brks_r <- brks_r_score
lyr <- lyr_score
title <- "Score"
}
for (is_ak in c(T, F)) {
# is_ak = T
if (is_ak) {
rgn_mask <- pra |>
filter(region_key == "AK") |>
st_shift_longitude() |>
vect()
r_area <- r |>
mask(rgn_mask) |>
trim()
er_area <- er |>
filter(region_key == "AK") |>
st_shift_longitude()
pra_area <- pra |>
filter(region_key == "AK") |>
st_shift_longitude()
er_pts_area <- er_pts |>
filter(region_key == "AK") |>
st_shift_longitude() |>
left_join(
d_lbls |> select(-programarea_key),
by = "ecoregion_key"
)
pra_pts_area <- pra_pts |>
filter(region_key == "AK") |>
st_shift_longitude() |>
left_join(
d_lbls |> select(-ecoregion_key),
by = "programarea_key"
)
lgnd_pos <- "top-left"
} else {
rgn_mask <- pra |>
filter(region_key != "AK") |>
st_shift_longitude() |>
vect()
r_area <- r |>
mask(rgn_mask) |>
trim()
box_lower48 <- pra |>
filter(region_key != "AK") |>
st_bbox() |>
st_as_sfc()
er_area <- er |>
st_filter(box_lower48, .predicate = st_intersects)
pra_area <- pra |>
st_filter(box_lower48, .predicate = st_intersects)
er_pts_area <- er_pts |>
filter(ecoregion_key %in% er_area$ecoregion_key) |>
left_join(
d_lbls |> select(-programarea_key),
by = "ecoregion_key"
)
pra_pts_area <- pra_pts |>
filter(programarea_key %in% pra_area$programarea_key) |>
left_join(
d_lbls |> select(-ecoregion_key),
by = "programarea_key"
)
lgnd_pos <- "bottom-left"
}
scale_pos <- "bottom-right"
er_url <- glue(
"https://api.marinesensitivity.org/tilejson?table=public.ply_ecoregions_2025{v_sfx}&use_cache=FALSE"
)
pra_url <- glue(
"https://api.marinesensitivity.org/tilejson?table=public.ply_programareas_2026{v_sfx}&use_cache=FALSE"
)
er_filter <- c("in", "ecoregion_key", er_area$ecoregion_key)
pra_filter <- c("in", "programarea_key", pra_area$programarea_key)
m <- mapboxgl(
style = mapbox_style("dark"),
projection = "globe"
) |>
fit_bounds(bbox = er_area) |>
add_image_source(
id = "r_src",
data = r_area,
colors = cols_r
) |>
add_vector_source(
id = "pra_src",
url = pra_url
) |>
add_vector_source(
id = "er_src",
url = er_url
) |>
add_raster_layer(
id = "r_lyr",
source = "r_src",
raster_opacity = 0.9,
raster_resampling = "nearest"
) |>
add_line_layer(
id = "er_ln",
source = "er_src",
source_layer = glue("public.ply_ecoregions_2025{v_sfx}"),
filter = er_filter,
line_color = "lightgray",
line_opacity = 1,
line_width = 5
) |>
add_line_layer(
id = "pra_ln",
source = "pra_src",
source_layer = glue("public.ply_programareas_2026{v_sfx}"),
filter = pra_filter,
line_color = "black",
line_opacity = 1,
line_width = 1
) |>
add_symbol_layer(
id = "pra_symbol",
source = pra_pts_area,
filter = pra_filter,
text_field = get_column("programarea_key"),
text_allow_overlap = T,
text_anchor = get_column("text_anchor"),
text_justify = get_column("text_justify"),
text_offset = list(
get_column("text_offset_right"),
get_column("text_offset_down")
)
) |>
add_symbol_layer(
id = "er_symbol",
source = er_pts_area,
filter = er_filter,
text_field = get_column("ecoregion_name"),
text_color = "black",
text_allow_overlap = T,
text_anchor = get_column("text_anchor"),
text_justify = get_column("text_justify"),
text_offset = list(
get_column("text_offset_right"),
get_column("text_offset_down")
),
text_halo_color = "lightgray",
text_halo_width = 2,
text_halo_blur = 2,
text_line_height = 1.5,
text_size = 20
) |>
mapgl::add_legend(
title,
values = rng_r,
colors = cols_r,
position = lgnd_pos
) |>
add_scale_control(position = scale_pos)
b <- glue(
"{dir_figs}/map_cell_{ifelse(is_vgpm, 'npp', 'score')}_{ifelse(is_ak, 'ak', 'l48')}{v_sfx}"
)
b_html <- glue("{b}.html")
b_files <- glue("{b}_files")
b_png <- glue("{b}.png")
saveWidget(m, b_html, selfcontained = F, background = "transparent")
webshot(
b_html,
b_png,
vwidth = 1200,
vheight = 800,
zoom = 2,
selector = "#htmlwidget_container",
delay = 2,
quiet = T
)
file_delete(b_html)
dir_delete(b_files)
}
}
```
### Ecoregion × Program Area table (docx)
```{r}
#| label: flextable_er_pra
#| eval: !expr do_summary_figs
tbl_docx <- glue("{dir_figs}/table_ecoregion_programarea_scores{v_sfx}.docx")
set_flextable_defaults(
font.family = "Arial",
font.size = 10,
border.color = "gray",
big.mark = ","
)
# build rename vector from metric table: metric_abbrev -> short_name
abbrev_rn <- d_metric_labels |>
filter(metric_key %in% flds) |>
mutate(short = flds_rn[metric_key]) |>
filter(short != "score") |>
pull(short, name = metric_abbrev)
d_er_pra_n |>
mutate(
area_kkm2 = area_km2 / 1000
) |>
mutate(across(where(is.numeric), round)) |>
mutate(
row_lbl = ifelse(
row_type == "ecoregion",
glue("**{row_lbl}**"),
row_lbl
)
) |>
select(
Area = row_lbl,
Score = score,
all_of(abbrev_rn),
`Area (1,000 km^2^)` = area_kkm2
) |>
flextable() |>
ftExtra::colformat_md(part = "all") |>
theme_vanilla() |>
save_as_docx(path = tbl_docx)
# browseURL(tbl_docx)
```
### Program Area table (docx + xlsx)
```{r}
#| label: flextable_pra
#| eval: !expr do_summary_figs
tbl_docx <- glue("{dir_figs}/table_programarea_scores{v_sfx}.docx")
tbl_xlsx <- glue("{dir_figs}/table_programarea_scores{v_sfx}.xlsx")
set_flextable_defaults(
font.family = "Arial",
font.size = 10,
border.color = "gray",
big.mark = ","
)
# build rename vector: metric_title -> short_name
title_rn <- d_metric_labels |>
filter(metric_key %in% flds) |>
mutate(short = flds_rn[metric_key]) |>
filter(short != "score") |>
pull(short, name = metric_title)
d_pra_only <- d_er_pra_n |>
filter(row_type == "programarea") |>
arrange(desc(score)) |>
select(
-row_type,
-ecoregion_name,
-ecoregion_key,
-pct_programarea_in_ecoregion
) |>
mutate(
area_kkm2 = area_km2 / 1000
) |>
mutate(across(where(is.numeric), round)) |>
mutate(
row_lbl = glue("{programarea_name} ({programarea_key})")
) |>
select(
row_lbl,
score,
all_of(unname(title_rn)),
area_kkm2
) |>
distinct() |>
select(
`Program Area` = row_lbl,
Score = score,
all_of(title_rn),
`Area (1,000 km^2^)` = area_kkm2
)
write_excel_csv(d_pra_only, pra_n_csv, na = "")
d_pra_only |>
flextable() |>
ftExtra::colformat_md(part = "all") |>
theme_vanilla() |>
save_as_docx(path = tbl_docx)
# xlsx version (plain column names without superscript)
d_pra_only |>
rename(`Area (1,000 km2)` = `Area (1,000 km^2^)`) |>
openxlsx::write.xlsx(tbl_xlsx)
# browseURL(tbl_docx)
```
### Program Area flower plots
```{r}
#| label: plot_programarea_flowers
#| eval: !expr do_summary_figs
# pra list from PostGIS
pra_all <- tbl(con, glue("ply_programareas_2026{v_sfx}")) |>
select(programarea_key, programarea_name) |>
collect()
# components
metric_input_keys <- c(
glue("extrisk_{sp_cats}_ecoregion_rescaled"),
"primprod_ecoregion_rescaled"
)
m_input_seqs <- tbl(con_sdm, "metric") |>
filter(metric_key %in% metric_input_keys) |>
pull(metric_seq)
# build components from metric table
components <- d_metric_labels |>
filter(metric_key %in% flds, metric_title != "Score") |>
pull(metric_title)
component_cols <- setNames(
hue_pal()(length(components)),
components
)
# d_scores
pra_tbl <- glue("ply_programareas_2026{v_sfx}")
m_score <- "score_extriskspcat_primprod_ecoregionrescaled_equalweights"
m_key_score <- tbl(con_sdm, "metric") |>
filter(metric_key == !!m_score) |>
pull(metric_seq)
d_scores <- tbl(con_sdm, "zone") |>
filter(
tbl == !!pra_tbl,
fld == "programarea_key",
value %in% pra_all$programarea_key
) |>
select(programarea_key = value, zone_seq) |>
inner_join(
tbl(con_sdm, "zone_metric") |>
filter(metric_seq == !!m_key_score),
by = join_by(zone_seq)
) |>
select(programarea_key, score = value) |>
mutate(score = round(score)) |>
collect() |>
arrange(desc(score), programarea_key)
d_scores$programarea_lbl <- factor(
d_scores$programarea_key,
levels = d_scores$programarea_key,
ordered = T
)
# d_fl
d_fl <- tbl(con_sdm, "zone") |>
filter(
tbl == !!pra_tbl,
fld == "programarea_key",
value %in% pra_all$programarea_key
) |>
select(programarea_key = value, zone_seq) |>
inner_join(
tbl(con_sdm, "zone_metric"),
by = "zone_seq"
) |>
inner_join(
tbl(con_sdm, "metric") |>
filter(metric_seq %in% m_input_seqs) |>
select(metric_seq, metric_key),
by = "metric_seq"
) |>
select(programarea_key, metric_key, value) |>
arrange(programarea_key, metric_key) |>
collect() |>
mutate(
component = metric_key |>
str_replace("extrisk_", "") |>
str_replace("_ecoregion_rescaled", "") |>
str_replace("_", " ")
) |>
select(programarea_key, component, value) |>
arrange(programarea_key, component, value)
# add missing components with value 0
d_0 <- expand_grid(
programarea_key = d_scores$programarea_key,
component = sort(unique(d_fl$component)),
value = 0
) |>
anti_join(
d_fl,
by = join_by(programarea_key, component)
)
d_fl <- d_fl |>
bind_rows(d_0) |>
arrange(programarea_key, component) |>
mutate(even = 1)
# build component display name lookup from metric table
component_labels <- d_metric_labels |>
filter(metric_key %in% flds, metric_title != "Score") |>
mutate(
component = metric_key |>
str_replace("extrisk_", "") |>
str_replace("_ecoregion_rescaled", "") |>
str_replace("_", " ")
) |>
pull(metric_title, name = component)
d_fl <- d_fl |>
mutate(
programarea_lbl = factor(
programarea_key,
levels = d_scores$programarea_key,
ordered = T
),
component = component_labels[component]
) |>
arrange(programarea_lbl, component)
# all flowers
d <- d_fl |>
arrange(programarea_lbl, component) |>
group_by(programarea_lbl) |>
mutate(across(!where(is.character), as.double)) |>
mutate(
ymax = cumsum(even),
ymin = lag(ymax, default = 0),
xmax = value,
xmin = 0
)
for (theme_name in c("minimal", "linedraw", "min_line", "dark", "void")) {
theme_fxn <- switch(
theme_name,
"minimal" = theme_minimal,
"linedraw" = theme_linedraw,
"min_line" = function(...) {
theme_minimal(...) %+replace%
theme(
panel.grid = element_line(colour = "black"),
panel.grid.major = element_line(linewidth = rel(0.1)),
panel.grid.minor = element_line(linewidth = rel(0.05))
)
},
"dark" = theme_dark,
"void" = theme_void
)
g <- d |>
ggplot() +
geom_rect_interactive(
aes(
xmin = xmin,
xmax = xmax,
ymin = ymin,
ymax = ymax,
fill = component,
color = "white",
data_id = component
),
color = "white",
alpha = 1
) +
scale_fill_manual(
values = component_cols,
name = "Component"
) +
facet_wrap(~programarea_lbl) +
coord_polar(theta = "y") +
xlim(c(-20, max(d_scores$score, d$xmax))) +
annotate(
"rect",
xmin = -20,
xmax = 0,
ymin = 0,
ymax = max(d$ymax),
fill = "white",
color = NA
) +
geom_text(
aes(x, y, label = score),
data = d_scores |> mutate(x = -20, y = 0),
size = 1.5,
fontface = "bold",
color = ifelse(theme_name == "dark", "white", "black")
) +
theme_fxn() +
theme(
legend.position = "bottom",
legend.title = element_text(size = 8),
legend.text = element_text(size = 6),
plot.margin = unit(c(1, 1, 1, 1), "pt"),
strip.text.x = element_text(size = 6),
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
)
pra_fl_png <- glue(
"{dir_figs}/programarea_flower-plot_scores_{theme_name}{v_sfx}.png"
)
pra_fl_pdf <- glue(
"{dir_figs}/programarea_flower-plot_scores_{theme_name}{v_sfx}.pdf"
)
png(pra_fl_png, width = 1200, height = 800, res = 150)
print(g)
dev.off()
pdf(pra_fl_pdf, width = 7, height = 5)
print(g)
dev.off()
}
```
### Primary productivity by Program Area
```{r}
#| label: plot_primprod
#| eval: !expr do_summary_figs
metric_pp_keys <- glue("primprod_{c('avg', 'stddev')}")
pra_tbl <- glue("ply_programareas_2026{v_sfx}")
d_pp_raw <- tbl(con_sdm, "zone") |>
filter(
tbl == !!pra_tbl,
fld == "programarea_key"
) |>
select(programarea_key = value, zone_seq) |>
left_join(
tbl(con_sdm, "zone_metric"),
by = "zone_seq"
) |>
left_join(
tbl(con_sdm, "metric") |>
filter(metric_key %in% metric_pp_keys),
by = "metric_seq"
) |>
filter(
!is.na(programarea_key),
!is.na(metric_key),
!is.na(value)
) |>
select(programarea_key, metric_key, value) |>
arrange(programarea_key, metric_key) |>
collect()
if (nrow(d_pp_raw) > 0) {
d_pp <- d_pp_raw |>
left_join(
tbl(con, glue("ply_programareas_2026{v_sfx}")) |>
select(programarea_key, programarea_name) |>
collect(),
by = "programarea_key"
) |>
filter(!is.na(programarea_name)) |>
tidyr::pivot_wider(
names_from = metric_key,
values_from = value
) |>
rename(
avg = primprod_avg,
sd = primprod_stddev
) |>
mutate(
across(
where(is.numeric),
~ set_units(.x, mg / m^2 / day) |>
set_units(t / km^2 / yr)
)
)
pp_csv <- glue("{dir_figs}/programarea_primprod{v_sfx}.csv")
d_pp |>
mutate(across(where(is.numeric), drop_units)) |>
write_csv(pp_csv)
g <- d_pp |>
arrange(desc(avg)) |>
mutate(across(where(is.numeric), drop_units)) |>
mutate(
programarea_name = factor(programarea_name, levels = programarea_name)
) |>
ggplot(aes(x = programarea_name, y = avg)) +
geom_bar(
position = position_dodge(),
stat = "identity",
fill = "darkgreen"
) +
geom_errorbar(aes(ymin = avg - sd, ymax = avg + sd), width = .4) +
labs(
title = NULL,
x = NULL,
y = expression("Areal NPP (metric tons C km"^-2 ~ "yr"^-1 * ")")
) +
scale_y_continuous(expand = c(0, 0)) +
theme(
axis.text.x = element_text(
angle = 45,
vjust = 1,
hjust = 1
)
)
pp_png <- glue("{dir_figs}/programarea_primprod{v_sfx}.png")
pp_pdf <- glue("{dir_figs}/programarea_primprod{v_sfx}.pdf")
png(pp_png, width = 1200, height = 800, res = 150)
print(g)
dev.off()
pdf(pp_pdf, width = 7, height = 5)
print(g)
dev.off()
} else {
message(
"No primprod_avg/primprod_stddev zone_metric data found; skipping primprod plot."
)
}
```
### Copy figures to `dir_v`
```{r}
#| label: copy_figs_to_dir_v
#| eval: !expr do_summary_figs
# data files are already written directly to dir_v;
# copy rendered figures from local figs/ into dir_v
fig_files <- dir_ls(dir_figs, regexp = "\\.(png|pdf|docx|xlsx|csv)$")
for (f in fig_files) {
file_copy(f, glue("{dir_v}/{path_file(f)}"), overwrite = T)
}
message(glue("Copied {length(fig_files)} figure files to {dir_v}"))
```
## Generate README
Programmatically generate `README.md` and `README.html` for the versioned output directory. Uses `data/derived_products.csv` for file descriptions.
```{r}
#| label: generate_readme
#| eval: !expr do_readme
dp_csv <- here("data/derived_products.csv")
stopifnot(file_exists(dp_csv))
d_dp <- read_csv(dp_csv)
# list actual files in dir_v (non-recursive, exclude README itself)
v_files <- dir_ls(dir_v, recurse = F) |>
path_file() |>
str_subset("^README", negate = T) |>
# exclude ~$ temp files from Word/Excel
str_subset("^~\\$", negate = T)
# match files to descriptions
d_files <- tibble(file = v_files) |>
left_join(
d_dp |> select(file = output_file, description),
by = "file"
) |>
mutate(
description = ifelse(is.na(description), "", description)
) |>
arrange(file)
lns_files <- d_files |>
knitr::kable(format = "pipe") |>
paste(collapse = '\n')
# build README.md content
readme_lines <- c(
glue(
"
---
format:
html:
embed-resources: true
---
# Derived Products — {ver}
Version: `{v_sfx}` | Generated: {Sys.Date()}
## File listing
{lns_files}
## Naming conventions
- `ply_*`: polygon spatial data (GeoPackage)
- `r_*`: raster data (GeoTIFF)
- `tbl_*`: tabular summary data (CSV)
- `layers_*`: layer metadata for metrics and species
- `zone_taxon_*`: zone × taxon matrix
- `*_v3`: version 3 model outputs
## Large files
Some files exceed GitHub size limits and are stored in the
`big-files/{ver}/` directory (not tracked by git).
See `sdm_parquet/` for parquet exports.
_Auto-generated by `calc_scores.qmd` on {Sys.time()}_"
)
)
readme_md <- glue("{dir_v}/README.md")
writeLines(readme_lines, readme_md)
message(glue("Wrote {readme_md}"))
# render to HTML
quarto_render(readme_md |> path_real())
message(glue("Rendered {readme_md} to HTML"))
```
## Map score cells
```{r}
#| label: map_raster
#| eval: !expr do_map
librarian::shelf(mapgl)
# load global raster template
# r_global <- readRDS(glue("{dir_data}/derived/r_global_0.05.rds"))
cell_tif <- glue("{dir_data}/derived/r_bio-oracle_planarea.tif")
r_cell <- rast(cell_tif)
# d_metric <- tbl(con_sdm, "metric") |> collect()
# View(d_metric)
# TODO: add sequences for date_created post-export-parquet
# get species richness metric ID
# m_key <- "extrisk_all_ecoregion_rescaled"
m_key <- "score_extriskspcat_primprod_ecoregionrescaled_equalweights"
m_seq <- tbl(con_sdm, "metric") |>
filter(metric_key == !!m_key) |>
pull(metric_seq)
# query species richness for a region
d_m <- dbGetQuery(
con_sdm,
glue(
"
SELECT
cm.cell_id,
cm.value
FROM cell_metric cm
WHERE cm.metric_seq = (
SELECT metric_seq
FROM metric
WHERE metric_key = '{m_key}' )"
)
) |>
tibble()
stopifnot(sum(duplicated(d_m$cell_id)) == 0)
# convert to raster
r_m <- create_raster_from_df(d_m, r_cell, value_col = "value")
# display with mapgl
# TODO: make into function in msens R package
mapgl_rast <- function(
r,
cols_r = rev(RColorBrewer::brewer.pal(11, "Spectral")),
layer_name = "SDM"
) {
# r = r_m; cols_r = cols_r; layer_name = m_key
rng_r <- minmax(r) |> as.numeric() |> signif(digits = 2)
mapboxgl(
style = mapbox_style("dark"),
zoom = 3.5,
center = c(-106, 40.1)
) |> # ply_pa centroid
add_image_source(
id = "r_src",
data = r,
colors = cols_r
) |>
add_raster_layer(
id = "r_lyr",
source = "r_src",
raster_resampling = "nearest"
) |>
mapgl::add_legend(
layer_name,
values = rng_r,
colors = cols_r,
position = "bottom-right"
) |>
add_fullscreen_control(position = "top-left") |>
add_navigation_control() |>
add_scale_control()
}
# visualize
mapgl_rast(r_m, layer_name = m_key)
```
## Parquet export/import
This cleans up the DuckDB database by exporting tables to Parquet files (with sort order for faster reads) and then re-importing them to a new DuckDB database. This also allows us to remove indexes from the DuckDB database, which are not needed for the mapgl app and can slow down writes.
### Export to Parquet
```{r}
#| label: export_parquet
#| eval: !expr do_parquet
# TODO: model_cell: PARTITION BY (mdl_seq);") # https://duckdb.org/docs/stable/data/parquet/overview
source(here("libs/sdm_functions.R"))
con_sdm <- dbConnect(duckdb(dbdir = sdm_db, read_only = F)) |>
load_duckdb_extensions()
# dbListTables(con_sdm)
dbExecute(con_sdm, "SET threads = 7;")
dbExecute(con_sdm, "SET memory_limit = '20GB';")
dir_pq <- normalizePath(dir_pq)
if (dir_exists(dir_pq)) {
dir_delete(dir_pq)
}
dir_create(dir_pq)
# OLD: add primary keys, indexes
# NEW: skip indexes, sort on export, per [performance](https://duckdb.org/docs/stable/guides/performance/indexing.html)
# dbRemoveTable(con_sdm, "cell_model", force = T)
tbl_keys <- list(
cell = "cell_id",
cell_metric = c("cell_id", "metric_seq"),
# cell_model = c("cell_id", "mdl_seq"), # source: model_cell; alternative key sorting order
dataset = "ds_key",
listing = "spp_id",
metric = "metric_seq",
model = "mdl_seq",
model_cell = c("mdl_seq", "cell_id"), # *big* one
species = "sp_seq",
taxon = "taxon_id",
taxon_model = c("taxon_id", "ds_key"),
zone = "zone_seq",
zone_cell = c("zone_seq", "cell_id"),
zone_metric = c("zone_seq", "metric_seq"),
zone_taxon = c("zone_fld", "zone_value", "mdl_seq", "taxon_id")
)
stopifnot(length(setdiff(names(tbl_keys), dbListTables(con_sdm))) == 0)
stopifnot(length(setdiff(dbListTables(con_sdm), names(tbl_keys))) == 0)
# export parquet files with sort order
for (tbl in names(tbl_keys)) {
# tbl = names(tbl_keys)[1] # tbl = "cell_model"
keys <- tbl_keys[[tbl]]
keys_str <- paste(keys, collapse = ', ')
pq_file <- glue("{dir_pq}/{tbl}.parquet")
message(glue(
"Exporting {tbl} (ORDER BY {keys_str}) to parquet ~ {Sys.time()}"
))
fro_tbl <- tbl
if (tbl == "cell_model") {
fro_tbl <- "model_cell"
}
stopifnot(all(keys %in% dbListFields(con_sdm, fro_tbl)))
if (tbl %in% c("cell_model", "model_cell")) {
flds <- dbListFields(con_sdm, fro_tbl) |>
setdiff(keys)
flds_str <- paste(flds, collapse = '\", \"')
sql <- glue(
"COPY (
SELECT {keys_str}, {flds_str} FROM {fro_tbl}
ORDER BY {keys_str})
TO '{pq_file}'
(FORMAT parquet, PER_THREAD_OUTPUT, COMPRESSION zstd) "
)
} else {
sql <- glue(
"COPY (
SELECT * FROM {fro_tbl}
ORDER BY {keys_str})
TO '{pq_file}'
(FORMAT parquet, PER_THREAD_OUTPUT, COMPRESSION zstd) "
)
}
message(sql)
dbExecute(con_sdm, sql)
}
message(glue("Exporting finished ~ {Sys.time()}")) # ~ 1 min
```
### Import from Parquet
```{r}
#| label: import_parquet
#| eval: !expr do_parquet
# dir_pq and sdm_db defined in setup chunk
if (exists("con_sdm")) {
dbDisconnect(con_sdm, shutdown = T)
rm(con_sdm)
}
duckdb_shutdown(duckdb())
if (file_exists(sdm_db)) {
file_delete(sdm_db)
}
source(here("libs/sdm_functions.R"))
con_sdm <- dbConnect(duckdb(dbdir = sdm_db, read_only = F)) |>
load_duckdb_extensions()
# dbExecute(con_sdm, "SET threads = 8;")
# dbExecute(con_sdm, "SET memory_limit = '20GB';")
# import parquet ----
for (pq in dir_ls(dir_pq, glob = "*.parquet")) {
# pq = dir_ls(dir_pq, glob = "*.parquet")[2]
tbl <- path_file(pq) |> str_remove("\\.parquet$")
if (!dbExistsTable(con_sdm, tbl)) {
message(glue("Importing {tbl} from {basename(pq)}/*.parquet"))
dbExecute(
con_sdm,
glue("CREATE TABLE {tbl} AS SELECT * FROM '{pq}/*.parquet'")
)
}
}
# create index on 2nd column of multi-column index from parquet export
# tbl_idx <- list(
# cell_metric = "metric_seq",
# model_cell = "mdl_seq",
# zone_cell = "cell_id",
# zone_metric = "metric_seq")
# for (tbl in names(tbl_idx)){ # tbl = "model_cell"
# fld <- tbl_idx[[tbl]]
# idx <- glue("idx_{tbl}_{fld}") # idx_model_cell_mdl_seq
# stopifnot(fld %in% dbListFields(con_sdm, tbl))
#
# sql <- glue("CREATE INDEX IF NOT EXISTS {idx} ON {tbl} ({fld});")
# message(sql)
# dbExecute(con_sdm, sql)
# }
# sequences ----
# for auto-incrementing primary keys
tbl_seq <- list(
metric = "metric_seq",
model = "mdl_seq",
species = "sp_seq",
zone = "zone_seq"
)
stopifnot(all(names(tbl_seq) %in% dbListTables(con_sdm)))
for (tbl in names(tbl_seq)) {
# tbl = "metric"
fld <- tbl_seq[[tbl]]
id <- glue("seq_{tbl}")
nxt <- dbGetQuery(con_sdm, glue("SELECT MAX({fld}) FROM {tbl}")) |>
pull(1) +
1
sql <- glue("CREATE SEQUENCE {id} START {nxt}")
message(sql)
dbExecute(con_sdm, sql)
sql <- glue(
"ALTER TABLE {tbl} ALTER COLUMN {fld} SET DEFAULT nextval('{id}')"
)
message(sql)
dbExecute(con_sdm, sql)
}
# date_created ----
# default to current_date
for (tbl in dbListTables(con_sdm)) {
# tbl = "dataset"
fld <- dbListFields(con_sdm, tbl) |>
str_subset("date_created")
if (length(fld) == 1) {
sql <- glue(
"ALTER TABLE {tbl} ALTER COLUMN date_created SET DEFAULT CURRENT_DATE;"
)
message(sql)
a <- dbExecute(con_sdm, sql)
}
}
dbDisconnect(con_sdm, shutdown = T)
```
## Deploy to server
Sync derived data and app code to the msens server.
```{r}
#| label: deploy
#| eval: !expr do_deploy
deploy_sh <- here("libs/deploy_to_server.sh")
stopifnot(file_exists(deploy_sh))
system2("bash", args = c(deploy_sh, ver), wait = T)
```
## Close Connection
```{r}
#| label: cleanup
dbDisconnect(con_sdm, shutdown = T)
```