GoMex cetacean & sea turtle SDMs

Cetacean and sea turtle spatial density model outputs from visual observations using line-transect survey methods aboard NOAA vessel and aircraft platforms in the Gulf of Mexico from 2003-06-12 to 2019-07-31 (NCEI Accession 0256800)

Code
# packages
librarian::shelf(
  devtools, dplyr, DT, fs, glue, here, janitor, knitr, mapview, purrr, readr, 
  readxl, sf, stringr, tibble, tidyr, units,
  quiet = T)
options(readr.show_col_types = F)

source(here("libs/db.R")) # con

create_index <- function(con, tbl, flds, schema = "public", geom=F, unique=F, overwrite=F, show=F, exec=T){
  # tbl = "taxa"; flds = c("tbl_orig", "aphia_id"); unique = T; geom=F
  stopifnot(!(geom == T & length(flds) != 1))
  sfx <- ifelse(
    geom,
    glue::glue(" USING GIST ({flds})"),
    glue::glue("({paste(flds, collapse=', ')})"))
  idx <- ifelse(
    unique,
    glue("{tbl}_unique_idx"),
    glue("{tbl}_{paste(flds, collapse='_')}_idx"))

  if (overwrite)
    dbExecute(con, glue("DROP INDEX IF EXISTS {schema}.{idx}"))

  sql <- glue::glue(
    "CREATE {ifelse(unique, 'UNIQUE','')} INDEX IF NOT EXISTS {idx} ON {schema}.{tbl}{sfx}")
  if (show)
    message(sql)
  if (exec)
    dbExecute(con, sql)
}
Code
schema <- "public"

d_datasets <- tibble(
  ds_key      = "gm",
  name_short  = "NOAA GoMex Cetacean & Sea Turtle Densities", # short name in form of {source_broad} {regions} {taxa_groups} {response_type}
  name_full   = "Cetacean and sea turtle spatial density model outputs from visual observations using line-transect survey methods aboard NOAA vessel and aircraft platforms in the Gulf of Mexico from 2003-06-12 to 2019-07-31 (NCEI Accession 0256800)",
  link        = "https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:256800",
  citation    = "Litz, Jenny; Aichinger Dias, Laura; Rappucci, Gina; Martinez, Anthony; Soldevilla, Melissa; Garrison, Lance; Mullin, Keith; Barry, Kevin; Foster, Marjorie (2022). Cetacean and sea turtle spatial density model outputs from visual observations using line-transect survey methods aboard NOAA vessel and aircraft platforms in the Gulf of Mexico from 2003-06-12 to 2019-07-31 (NCEI Accession 0256800). [indicate subset used]. NOAA National Centers for Environmental Information. Dataset. https://doi.org/10.25921/efv4-9z56. Accessed July 28, 2022.",
  description = "Based on ship-based and aerial line-transect surveys conducted in the U.S. waters of the Gulf of Mexico between 2003 and 2019, the NOAA Southeast Fisheries Science Center (SEFSC) developed spatial density models (SDMs) for cetacean and sea turtle species for the entire Gulf of Mexico. SDMs were developed using a generalized additive modeling (GAM) framework to determine the relationship between species abundance and environmental variables (monthly averaged oceanographic conditions during 2015 - 2019). Models were extrapolated beyond the U.S. Gulf of Mexico to provide insight into potential high density areas throughout the Gulf of Mexico. However, extrapolations of this type should be interpreted with caution. This dataset includes 19 shapefiles for the SDMs for each cetacean and sea turtle species or species group.",
  response_type  = "density",             
  # response_type: one of: occurrence, range, suitability, probability or density
  taxa_groups     = "cetacean, sea turtle", 
  # taxa_groups: one or more of: fish, invertebrates, marine mammals, cetaceans, sea turtles, seabirds, etc.
  year_pub       = 2022,
  date_obs_beg   = "2003-06-12", 
  date_obs_end   = "2019-08-01",            # up to, but not including *_end
  date_env_beg   = "2015-01-01", 
  date_env_end   = "2020-01-01",            # up to, but not including *_end
  regions        = list("Gulf of Mexico"))  # array
# names(d_dataset) |> paste(collapse = ", ") |> cat()

t_datasets <- Id(schema = schema, table = "sdm_datasets")

row_exists <- tbl(con, t_datasets) |> filter(ds_key == "gm") |> collect() |> nrow() == 1
if (!row_exists){
  
  # append, except array columns: regions
  dbWriteTable(
    con, t_datasets, append = T,
    d_datasets |> 
      select(-regions) )
  
  # append array column: regions
  regions_sql <- glue(
    "ARRAY['{d_datasets$regions |> unlist() |> paste(collapse = ', ')}']")
  dbExecute(
    con, 
    glue(
      "UPDATE sdm_datasets 
      SET   regions = {regions_sql}
      WHERE ds_key  = 'gm'") )
}

tbl(con, t_datasets) |> 
  filter(ds_key == "gm") |> 
  collect() |>
  glimpse()
Rows: 1
Columns: 17
$ ds_key        <chr> "gm"
$ name_short    <chr> "NOAA GoMex Cetacean & Sea Turtle Densities"
$ name_full     <chr> "Cetacean and sea turtle spatial density model outputs f…
$ link          <chr> "https://www.ncei.noaa.gov/access/metadata/landing-page/…
$ links_other   <pq__vrch> NA
$ source_broad  <chr> NA
$ source_detail <chr> NA
$ regions       <pq__vrch> {"Gulf of Mexico"}
$ description   <chr> "Based on ship-based and aerial line-transect surveys co…
$ citation      <chr> "Litz, Jenny; Aichinger Dias, Laura; Rappucci, Gina; Mar…
$ response_type <chr> "density"
$ taxa_groups   <chr> "cetacean, sea turtle"
$ year_pub      <int> 2022
$ date_obs_beg  <date> 2003-06-12
$ date_obs_end  <date> 2019-08-01
$ date_env_beg  <date> 2015-01-01
$ date_env_end  <date> 2020-01-01
Code
# helper functions ----
get_annual_density <- function(d_sf, i=0){
  # i = 12; d_sf <- d$sf[[i]]
  message(glue("{i} of {nrow(d_sf)} in get_annual_density() ~ {Sys.time()}"))

  # get geometries
  geoms <- d_sf %>%
    select(hexid = HEXID, geom = geometry)

  # get *_n abundance (#/40km2) per month and calculate annual average density (#/km2)
  mos_n <- glue("{month.abb}_n")
  attrs <- d_sf %>%
    st_drop_geometry() %>%
    select(hexid = HEXID, any_of(mos_n)) %>%
    pivot_longer(-hexid, names_to = "mo_n", values_to = "n") %>%
    filter(n != -9999) %>%
    group_by(hexid) %>%
    summarize(
      n = mean(n),
      .groups = "drop") %>%
    mutate(
      density = n / 40) %>%
    select(hexid, density) # individuals / km2

  # return geometries joined with summarized attributes
  d_sum <- geoms %>%
    left_join(
      attrs, by = "hexid")
  d_sum
}

# read layers ----
dir_shp   <- "/Users/bbest/My Drive/projects/offhab/data/raw/ncei.noaa.gov - GoMex cetacean & sea turtle SDMs/0256800/2.2/data/0-data/NOAA_SEFSC_Cetacean_SeaTurtle_SDM_shapefiles"
taxa_xls <- glue("{dir_shp}/../spp_gmx.xlsx")

d_taxa <- read_excel(taxa_xls)

t_geoms <- Id(schema = schema, table = "sdm_geometries")

rows_exist <- (
  tbl(con, t_geoms) |> 
    filter(ds_key == "gm") |> 
    collect() |> 
    nrow() ) > 1
if (!rows_exist){
  d <- tibble(
    path_shp = list.files(dir_shp, "shp$", full.names=T)) |> 
    mutate(
      base_shp = basename(path_shp) |> path_ext_remove(),
      taxa_shp = str_replace(base_shp, "(.*)_Monthly_.*", "\\1")) |> 
    left_join(
      d_taxa  |> 
        select(taxa_shp, taxa_sci, taxa_doc),
      by = "taxa_shp")  |> 
    mutate(
      population = map_chr(
        base_shp, ~{
          if (str_detect(.x, "^(Oceanic)|(Shelf)")) {
            return(str_replace(.x, "^(Oceanic|Shelf)_(.*)", "\\1"))
          } else {
            return(NA)
          } }),
      sf         = map(path_shp, read_sf),
      d_density  = imap(sf, get_annual_density),
      months_lst = map(sf, ~{
        intersect(
          colnames(.),
          glue("{month.abb}_n")) %>%
          str_replace("_n", "")  }),
      n_months = map_int(months_lst, length),
      months   = map_chr(months_lst, paste, collapse=";") )
  
  # get unique geometries
  d_geoms <- d |> 
    select(sf) |> 
    mutate(
      sf = map(sf, \(x){ x |> select(gid = HEXID) } ) ) |> 
    unnest(cols = c(sf)) |> 
    distinct() |> 
    arrange(gid) |> 
    st_as_sf() |> 
    # exclude "POLYGON" whole hexagon duplicates
    filter(st_geometry_type(geometry) == "MULTIPOLYGON") |> 
    rename(geom_id = gid) |> 
    mutate(
      geom_id = as.integer(geom_id),
      ds_key  = "gm") |> 
    relocate(ds_key, .before = geom_id) |> 
    st_transform(4326) |> # from: USA_Contiguous_Lambert_Conformal_Conic
    # mapView(d_geoms)
    st_set_geometry("geom")
  
  st_write(d_geoms, con, t_geoms, append = T)
}

q <- "SELECT * FROM sdm_geometries WHERE ds_key = 'gm'"
# st_read(con, query = q) |> 
#   mapView()
Code
t_vals <- Id(schema = schema, table = "sdm_values")

rows_exist <- tbl(con, t_vals) |> filter(ds_key == "gm") |> summarize(n()) |> pull() > 0
if (!rows_exist){
  
  # get unique values
  d_vals <- d |> 
    select(sp_key = taxa_sci, population, data = sf) |> 
    mutate(
      data = map(data, \(x){
        x |> 
          st_drop_geometry() |>
          rename(geom_id = HEXID) |> 
          pivot_longer(
            cols      = -geom_id,
            names_to  = "mo_var",
            values_to = "val") |>
          filter(val != -9999) |> 
          separate(
            col  = mo_var,
            into = c("mo", "var"),
            sep  = "_")
        } ) ) |> 
    unnest(cols = c(data)) |> 
    mutate(
      ds_key = "gm") |> 
    relocate(ds_key, .before = taxa_sci)
  # A tibble: 15,445,842 × 7
  
  yr <- 2019 # using last year of obs/env so ISO 8601 compliant
  d_vals <- d_vals |> 
    mutate(
     mm            = setNames(1:12, month.abb)[mo],
     mm_str        = stringr::str_pad(mm, 2, pad = "0"),
     # Time Interval notation using [ISO 8601])(https://en.wikipedia.org/wiki/ISO_8601)
     time_interval = glue("{yr}-{mm_str}/P1M")) |> 
    select(
      -mo, -mm, -mm_str)
  
  # summarize d_vals to d_models
  d_mdls <- d_vals |> 
    group_by(
      ds_key, sp_key, population, time_interval, var) |> 
    summarize(.groups = "drop") |> 
    rowid_to_column("mdl_id") |>
    mutate(
      mdl_id = "gm") |> 
    relocate(ds_key, .before = mdl_id)
  
  d_vals <- d_vals |> 
    left_join(
      d_models, 
      by = c("ds_key", "sp_key", "population", "time_interval", "var")) |> 
    select(
      ds_key, mdl_id, geom_id, val)
  
  d_spp <- d_taxa |> 
    group_by(taxa_sci) |>
    summarize(
      taxa_common = first(taxa_common) |> str_replace_all("Oceanic ", "") ) |> 
    mutate(
      ds_key = "gm") |>
    select(ds_key, sp_key = taxa_sci, sp_common = taxa_common) |> 
    mutate(
      sp_scientific = sp_key)
  
  t_spp <- Id(schema = schema, table = "sdm_species")
  
  rows_exist <- tbl(con, t_spp) |> filter(ds_key == "gm") |> summarize(n()) |> pull() > 0
  if (!rows_exist)
    dbWriteTable(con, t_spp, d_spp, append = T)
  
  t_mdls <- Id(schema = schema, table = "sdm_models")
  
  rows_exist <- tbl(con, t_mdls) |> filter(ds_key == "gm") |> summarize(n()) |> pull() > 0
  if (!rows_exist)
    dbWriteTable(con, t_mdls, d_mdls, append = T)
  
  # check: d_vals |> select(mo, time_interval) |> table()
  dbWriteTable(con, t_vals, d_vals, append = T)
}

t_mdls <- Id(schema = schema, table = "sdm_models")
tbl(con, t_mdls) |> 
  select(-description) |> 
  collect() |> 
  datatable()
Code
tbl(con, t_vals) |> 
  head() |> 
  collect() |> 
  kable()
ds_key mdl_id geom_id val
gm 638 139788 0.0006198
gm 639 139788 0.0011117
gm 637 139788 1.7935747
gm 641 139788 0.0005822
gm 642 139788 0.0010737
gm 640 139788 1.8442154

The map output is too big, but the following runs in RStudio.

Code
# build query
# tbl(con, t_mdls) |>
#   filter(
#     ds_key        == "gm",
#     # sp_key        == "Stenella frontalis",
#     # population    == "Oceanic",
#     sp_key        == "Balaenoptera ricei",
#     time_interval == "2019-01/P1M",
#     var           == "n") |>
#   select(ds_key, mdl_id) |> 
#   left_join(
#     tbl(con, t_vals),
#     by = c("ds_key", "mdl_id") ) |>
#   left_join(
#     tbl(con, t_geoms),
#     by = c("ds_key", "geom_id")) |> 
#   select(val, geom) |> 
#   show_query() # explain()

q <- "
SELECT v.val, g.geom
FROM (
  SELECT ds_key, mdl_id
  FROM public.sdm_models
  WHERE
    ds_key        = 'gm'                 AND
 -- sp_key        = 'Stenella frontalis' AND
 -- population    = 'Oceanic'            AND
    sp_key        = 'Balaenoptera ricei' AND
    time_interval = '2019-01/P1M'     AND
    var           = 'n' ) AS m
LEFT JOIN public.sdm_values AS v ON (
  m.ds_key = v.ds_key AND
  m.mdl_id = v.mdl_id )
LEFT JOIN public.sdm_geometries AS g ON (
  m.ds_key  = g.ds_key AND
  v.geom_id = g.geom_id )"
p <- st_read(con, query = q)
mapView(p)

Screenshot of running above query in RStudio.
Code
gm_models <- d %>%
  select(mdl_id, taxa_sci, taxa_doc, months, n_months)

mdl_hex <- d %>%
  select(
    mdl_id, d_density) %>%
  unnest(d_density)

gm_model_hexagons <- mdl_hex %>%
  group_by(hexid, geom) %>%
  summarize(.groups="drop") %>%
  st_as_sf()

gm_model_hexagon_densities<- mdl_hex %>%
  st_drop_geometry() %>%
  select(mdl_id, hexid, density) %>%
  filter(!is.na(density))

# helper functions ----
get_annual_density <- function(d_sf, i=0){
  # i = 12; d_sf <- d$sf[[i]]
  message(glue("{i} of {nrow(d)} in get_annual_density() ~ {Sys.time()}"))

  # get geometries
  geoms <- d_sf %>%
    select(hexid = HEXID, geom = geometry)

  # get *_n abundance (#/40km2) per month and calculate annual average density (#/km2)
  mos_n <- glue("{month.abb}_n")
  attrs <- d_sf %>%
    st_drop_geometry() %>%
    select(hexid = HEXID, any_of(mos_n)) %>%
    pivot_longer(-hexid, names_to = "mo_n", values_to = "n") %>%
    filter(n != -9999) %>%
    group_by(hexid) %>%
    summarize(
      n = mean(n),
      .groups = "drop") %>%
    mutate(
      density = n / 40) %>%
    select(hexid, density) # individuals / km2

  # return geometries joined with summarized attributes
  d_sum <- geoms %>%
    left_join(
      attrs, by = "hexid")
  d_sum
}


# add aphia_id ----
gm_models <- wm_add_aphia_id(gm_models, taxa_sci)

# TODO?: combine Oceanic_* & Shelf_* for AtlanticSpotted_Dolphin + CommonBottlenose_Dolphin

# write to database ----
con <- oh_pg_con()
dbWriteTable(con, "gm_models", gm_models, overwrite=T)
st_write(gm_model_hexagons, con, "gm_model_hexagons", delete_layer=T)
dbWriteTable(con, "gm_model_hexagon_densities", gm_model_hexagon_densities, overwrite=T)

create_index(con, "gm_models", "mdl_id", unique = T)
create_index(con, "gm_model_hexagons", "hexid", unique = T)
create_index(con, "gm_model_hexagon_densities", c("mdl_id", "hexid"), unique = T)

# hexid x cell_id ----
tbl(con, "gm_model_hexagons")
oh_cells_ply <- dbGetQuery(
  con,
  "WITH
  c  AS (
    SELECT
      cell_id, geom
    FROM oh_cells),
  d AS (
    SELECT
      hexid AS ply_id, ST_Transform(geom, 4326) AS geom
    FROM gm_model_hexagons)
  SELECT DISTINCT ON (tbl, ply_id, cell_id)
    'gm_model_hexagons' AS tbl,
    d.ply_id,
    c.cell_id
  FROM c JOIN
    d ON ST_Covers(d.geom, c.geom)") %>%
  tibble()
oh_cells_ply
dbSendQuery(con, "DELETE FROM oh_cells_ply WHERE tbl = 'gm_model_hexagons'")
dbAppendTable(con, "oh_cells_ply", oh_cells_ply)

# test model output ----
con <- oh_pg_con()
r_d <- tbl(con, "gm_models") %>%
  filter(taxa_sci == "Ziphius") %>%
  left_join(
    tbl(con, "gm_model_hexagon_densities"),
    by = "mdl_id") %>%
  left_join(
    tbl(con, "oh_cells_ply") %>%
      filter(tbl == "gm_model_hexagons"),
    by = c("hexid" = "ply_id")) %>%
  select(cell_id, density) %>%
  filter(            # 1,977,010 -> 1,959,979
    !is.na(cell_id),
    !is.na(density)) %>%
  collect()

library(terra)
r <- oh_rast()
r[r_d$cell_id] <- r_d$density
tmp_tif <- tempfile(fileext = ".tif")
r <- trim(r)
plot(r)
mapView(r, maxpixels=ncell(r))


# redo rast ----
librarian::shelf(
  here, readr, terra)
options(readr.show_col_types = F)

dir_lyrs_tif <- "/Users/bbest/My Drive/projects/offhab/data/derived/lyrs_tif"
lyrs_csv     <- here("data-raw/layers.csv")
ds_key       <- "gm"

# match with aphia_id
D <- d |>
  left_join(
    tbl(con, "taxa") |> # distinct(tbl) |> pull(tbl)
      filter(tbl == "gm_models") |>
      select(
        taxa_sci = taxa,
        aphia_id) |>
      collect(),
    by = "taxa_sci")

# combine:
#           Stenella frontalis | Tursiops truncatus
#  oceanic |                 8 |                  9
#    shelf |                15 |                 16
d_sf <- D |>
  filter(
    mdl_id == 8) |>
  select(aphia_id, d_density) |>
  unnest(d_density) |>
  rename(density_oceanic = density) |>
  left_join(
    D |>
      filter(
        mdl_id == 15) |>
      select(mdl_id, d_density) |>
      unnest(d_density) |>
      select(
        hexid, density_shelf = density),
    by = "hexid") |>
  mutate(
    density = map2_dbl(
      density_oceanic, density_shelf, sum, na.rm = T)) |>
  select(-density_oceanic, -density_shelf) |>
  nest(
    d_density = c(hexid, geom, density))
d_tt <- D |>
  filter(
    mdl_id == 9) |>
  select(aphia_id, d_density) |>
  unnest(d_density) |>
  rename(density_oceanic = density) |>
  left_join(
    D |>
      filter(
        mdl_id == 16) |>
      select(mdl_id, d_density) |>
      unnest(d_density) |>
      select(
        hexid, density_shelf = density),
    by = "hexid") |>
  mutate(
    density = map2_dbl(
      density_oceanic, density_shelf, sum, na.rm = T)) |>
  select(-density_oceanic, -density_shelf) |>
  nest(
    d_density = c(hexid, geom, density))
D <- D |>
  filter(!mdl_id %in% c(8,9,15,16)) |>
  select(aphia_id, d_density) |>
  rbind(
    d_sf,
    d_tt)

# iterate over rasters
r_na <- oh_rast("NA")
# for (i in 1:nrow(D)){ # i = 1
for (i in 16:nrow(D)){ # i = 16

  aphia_id <- D$aphia_id[i]
  lyr_key <- glue("{ds_key}_{aphia_id}")
  r_tif <- glue("{dir_lyrs_tif}/{lyr_key}.tif")
  message(glue("{i} of {nrow(D)}: {basename(r_tif)} ~ {Sys.time()}"))

  # get density, geom
  d <- D |>
    slice(i) |>
    pull(d_density) %>%
    .[[1]] |>
    st_as_sf()
  # mapView(d, zcol = "density")

  # convert to raster
  r <- d |>
    st_transform(3857) |>
    rasterize(
      r_na, "density")
  # plet(r, tiles="Esri.NatGeoWorldMap")

  # rescale 0 to 100; set 0 -> NA
  (vr <- range(values(r, na.rm = T)))
  r <- setValues(
    r,
    scales::rescale(
      values(r),
      to = c(0, 100)) )
  r[r==0] <- NA
  # plet(r, tiles="Esri.NatGeoWorldMap")

  # write raster
  write_rast(r, r_tif)

  # write min, max to layers.csv
  d <- read_csv(lyrs_csv) |>
    filter(
      lyr_key != !!lyr_key) |>
    bind_rows(
      tibble(
        lyr_key     = lyr_key,
        ds_key      = ds_key,
        aphia_id    = aphia_id,
        val_min     = vr[1],
        val_max     = vr[2],
        rescale_min = 0,
        rescale_max = 100) )
  # message(glue("  nrow(lyrs_csv): {nrow(d)}"))
  write_csv(d, lyrs_csv)
}