Ingest SDMs: NCCOS Atlantic & Pacific Seabird Densities

NCCOS Atlantic & Pacific Seabird Densities (Winship et al. 2018; Leirness et al. 2021)

NCCOS Assessments: Modeling At-Sea Density of Marine Birds for Atlantic and Pacific

/Users/bbest/My Drive/projects/_archive/offhab/data/raw/

ncei.noaa.gov - seabirds, atlantic/
- 0176682/1.1/data/0-data/NCCOS-Atlantic-Birds_ArchiveDataPackage/NCCOS-Atlantic-Birds_ArchiveDataPackage/Documentation/
  - Atlantic_bird_mapping_data_documentation.pdf
  - atl_spp.xlsx

ncei.noaa.gov - seabirds, pacific/
- 0242882/1.1/data/1-data/4LFC6T_PacificBirds_NCCOS/
  - DataDocumentation.pdf
  - pac_spp.xlsx
Code
# packages
librarian::shelf(
  # devtools, 
  dplyr, DT, fs, gdalUtilities, glue, here, httr2, janitor, jsonlite, knitr,
  mapview, purrr, readr,
  readxl, sf, stringr, terra, tibble, tidyr, units,
  quiet = T)
options(readr.show_col_types = F)

source(here("libs/db.R")) # con, create_index()
Code
schema <- "public"

d_dataset_atl <- tibble(
  ds_key        = "nc_atl_birds_dens",
  response_type = "density",             
  # ds_key:     "{src}-{rgn}-{taxa}-{resp}"; see below for response_type, taxa_groups lookups
  # name_short: "{source_broad} {regions} {taxa_groups} {response_type}"
  name_short  = "NCCOS Atlantic Seabird Densities", 
  name_full   = "NCCOS Assessment: Modeling At-Sea Density of Marine Birds to Support Atlantic Marine Renewable Energy Planning from 1978-2016 (NCEI Accession 0176682)",
  link        = "https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:0176682",
  citation    = "Winship, Arliss J.; Kinlan, Brian P.; White, Timothy P.; Leirness, Jeffery B.; Christensen, John (2018). NCCOS Assessment: Modeling At-Sea Density of Marine Birds to Support Atlantic Marine Renewable Energy Planning from 1978-2016 (NCEI Accession 0176682). [indicate subset used]. NOAA National Centers for Environmental Information. Dataset. https://doi.org/10.25921/8eq5-q834. Accessed October 6, 2022.",
  description = "This dataset provides seasonal spatial rasters of predicted long-term (1980-2017) density of 33 individual species and 13 taxonomic groups of marine birds throughout the Pacific Outer Continental Shelf (OCS) and adjacent waters off the contiguous United States at 2-km spatial resolution. Two indications of the uncertainty associated with the model predictions are also provided: 1) seasonal spatial layers indicating areas with no survey effort and 2) seasonal spatial rasters of the precision of predicted density of each species/group characterized as its coefficient of variation (CV). Predicted density should always be considered in conjunction with these two indications of uncertainty. This dataset also includes spatial rasters of environmental predictor variables that were used in the predictive modeling.",
  # response_type: one of: occurrence [occs], range [rng], suitability [suit], probability [pr], biomass [m], density [dens], abundance [n]
  # taxa_groups: one or more of: fish [fish], invertebrates [inverts], marine mammals [marmams], cetaceans [cets], sea turtles [turtles], seabirds [birds], etc.
  taxa_groups  = "seabirds", 
  year_pub     = 2018,
  date_obs_beg = "1980-01-01", 
  date_obs_end = "2016-10-05",      # up to, but not including *_end
  date_env_beg = "1992-01-01", 
  date_env_end = "2018-01-01",      # up to, but not including *_end
  regions      = list("Atlantic"))  # array

d_dataset_pac <- tibble(
  ds_key        = "nc_pac_birds_dens",
  response_type = "density",             
  # ds_key:     "{src}-{rgn}-{taxa}-{resp}"; see below for response_type, taxa_groups lookups
  # name_short: "{source_broad} {regions} {taxa_groups} {response_type}"
  name_short  = "NCCOS Pacific Seabird Densities", 
  name_full   = "NCCOS Assessment: Modeling at-sea density of marine birds to support renewable energy planning on the Pacific Outer Continental Shelf of the contiguous United States (NCEI Accession 0242882)",
  link        = "https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:0242882",
  citation    = "Leirness, Jeffery B.; Adams, Josh; Ballance, Lisa T.; Coyne, Michael; Felis, Jonathan J.; Joyce, Trevor; Pereksta, David M.; Winship, Arliss J. (2022). NCCOS Assessment: Modeling at-sea density of marine birds to support renewable energy planning on the Pacific Outer Continental Shelf of the contiguous United States (NCEI Accession 0242882). [indicate subset used]. NOAA National Centers for Environmental Information. Dataset. https://doi.org/10.25921/xqf2-r853. Accessed October 6, 2022.",
  description = "This dataset provides seasonal spatial rasters of predicted long-term (1980-2017) density of 33 individual species and 13 taxonomic groups of marine birds throughout the Pacific Outer Continental Shelf (OCS) and adjacent waters off the contiguous United States at 2-km spatial resolution. Two indications of the uncertainty associated with the model predictions are also provided: 1) seasonal spatial layers indicating areas with no survey effort and 2) seasonal spatial rasters of the precision of predicted density of each species/group characterized as its coefficient of variation (CV). Predicted density should always be considered in conjunction with these two indications of uncertainty. This dataset also includes spatial rasters of environmental predictor variables that were used in the predictive modeling.",
  # response_type: one of: occurrence [occs], range [rng], suitability [suit], probability [pr], biomass [m], density [dens], abundance [n]
  # taxa_groups: one or more of: fish [fish], invertebrates [inverts], marine mammals [marmams], cetaceans [cets], sea turtles [turtles], seabirds [birds], etc.
  taxa_groups  = "seabirds", 
  year_pub     = 2022,
  date_obs_beg = "1978-01-01",
  date_obs_end = "2017-01-01",     # up to, but not including *_end
  date_env_beg = "1981-01-01", 
  date_env_end = "2017-01-01",     # up to, but not including *_end
  regions      = list("Pacific"))  # array

d_datasets <- bind_rows(d_dataset_atl, d_dataset_pac)
# check: View(d_datasets); write_csv(d_datasets, "data/nc_datasets.csv"); glimpse(d_datasets)
# names(d_dataset) |> paste(collapse = ", ") |> cat()

t_datasets <- Id(schema = schema, table = "sdm_datasets")

row_exists <- tbl(con, t_datasets) |> filter(ds_key %in% !!d_datasets$ds_key) |> collect() |> nrow() == length(d_datasets$ds_key)
if (!row_exists){
  
  # append, except array columns: regions
  dbWriteTable(
    con, t_datasets, append = T,
    d_datasets |> 
      select(-regions) )
  
  for (i in 1:nrow(d_datasets)){ # i=1
    # append array column: regions
    regions_sql <- glue(
      "ARRAY['{d_datasets$regions[[i]] |> unlist() |> paste(collapse = ', ')}']")
    dbExecute(
      con, 
      glue(
        "UPDATE sdm_datasets 
        SET   regions = {regions_sql}
        WHERE ds_key  = '{d_datasets$ds_key[[i]]}'") )
  }
}

if (F){ # run once for quick fix
  dbExecute(con, glue(
    "UPDATE sdm_datasets 
     SET   spatial_data_type = 'raster'"))
  dbExecute(con, glue(
    "UPDATE sdm_datasets 
     SET   spatial_data_type = 'vector'
     WHERE ds_key = 'gm'") )
}


tbl(con, t_datasets) |> 
  filter(ds_key %in% !!d_datasets$ds_key) |> 
  collect() |>
  glimpse()
Rows: 2
Columns: 18
$ ds_key            <chr> "nc_pac_birds_dens", "nc_atl_birds_dens"
$ name_short        <chr> "NCCOS Pacific Seabird Densities", "NCCOS Atlantic S…
$ name_full         <chr> "NCCOS Assessment: Modeling at-sea density of marine…
$ link              <chr> "https://www.ncei.noaa.gov/access/metadata/landing-p…
$ links_other       <pq__vrch> NA, NA
$ source_broad      <chr> NA, NA
$ source_detail     <chr> NA, NA
$ regions           <pq__vrch> {Pacific}, {Atlantic}
$ description       <chr> "This dataset provides seasonal spatial rasters of p…
$ citation          <chr> "Leirness, Jeffery B.; Adams, Josh; Ballance, L…
$ response_type     <chr> "density", "density"
$ taxa_groups       <chr> "seabirds", "seabirds"
$ year_pub          <int> 2022, 2018
$ date_obs_beg      <date> 1978-01-01, 1980-01-01
$ date_obs_end      <date> 2017-01-01, 2018-01-01
$ date_env_beg      <date> 1981-01-01, 1992-01-01
$ date_env_end      <date> 2017-01-01, 2018-01-01
$ spatial_data_type <chr> "raster", "raster"
Code
# setup paths ----
dir_g   <- "/Users/bbest/My Drive/projects/_archive/offhab/data/raw"
dir_atl <- glue("{dir_g}/ncei.noaa.gov - seabirds, atlantic/0176682/1.1/data/0-data/NCCOS-Atlantic-Birds_ArchiveDataPackage")
dir_pac <- glue("{dir_g}/ncei.noaa.gov - seabirds, pacific/0242882/1.1/data/0-data")

spp_atl_xls <- glue("{dir_atl}/Documentation/atl_spp.xlsx")
spp_pac_xls <- glue("{dir_pac}/4LFC6T_PacificBirds_NCCOS/pac_spp.xlsx")

# get Atlantic rasters ----
dir_atl_tif <- glue("{dir_atl}/Data/model_output_predictions")
rx_atl  <- "(.+)_(.+)_predicted_relative_density_bootstrap_(.+)\\.tif$"
d_atl <- tibble(
  path_tif = list.files(dir_atl_tif, rx_atl, full.names = T, recursive = T)) |> 
  mutate(
    file_tif = basename(path_tif),
    region   = "Atlantic",
    sp_code  = str_replace(file_tif, rx_atl, "\\1"),
    season   = str_replace(file_tif, rx_atl, "\\2"),
    var      = str_replace(file_tif, rx_atl, "\\3"))
# d_atl |> select(-path_tif) |> glimpse()
# Rows: 420
# Columns: 5
# $ file_tif <chr> "ATPU_fall_predicted_relative_density_bootstrap_CV.tif", "ATPU_fall_predicted_relative_den…
# $ region   <chr> "Atlantic", "Atlantic", "Atlantic", "Atlantic", "Atlantic", "Atlantic", "Atlantic", "Atlan…
# $ sp_code  <chr> "ATPU", "ATPU", "ATPU", "AUSH", "AUSH", "AUSH", "BCPE", "BCPE", "BCPE", "BLKI", "BLKI", "B…
# $ season   <chr> "fall", "fall", "fall", "fall", "fall", "fall", "fall", "fall", "fall", "fall", "fall", "f…
# $ var      <chr> "CV", "QUANT_50", "RANGE_CI90", "CV", "QUANT_50", "RANGE_CI90", "CV", "QUANT_50", "RANGE_C…
# check all tifs: fs::dir_ls(dir_atl_tif, glob="*.tif", recursive=T) |> length()
           
# get Pacific rasters ----
dir_pac_tif <- glue("{dir_pac}/model_output_predictions")
rx_pac      <- "(.+)_(.+)_predicted_(.*).tif$"
d_pac       <- tibble(
  path_tif = list.files(dir_pac_tif, rx_pac, full.names = T, recursive = T)) |> 
  mutate(
    file_tif = basename(path_tif),
    region   = "Pacific",
    sp_code  = str_replace(file_tif, rx_pac, "\\1"),
    season   = str_replace(file_tif, rx_pac, "\\2"),
    var      = str_replace(file_tif, rx_pac, "\\3"))
# d_pac |> select(-path_tif) |> glimpse()
# Rows: 270
# Columns: 5
# $ file_tif <chr> "ANMU_spring_predicted_density_CV.tif", "ANMU_spring_predicted_density.tif", "ASSP_fall_pred…
# $ region   <chr> "Pacific", "Pacific", "Pacific", "Pacific", "Pacific", "Pacific", "Pacific", "Pacific", "Pac…
# $ sp_code  <chr> "ANMU", "ANMU", "ASSP", "ASSP", "ASSP", "ASSP", "ASSP", "ASSP", "BFAL", "BFAL", "BFAL", "BFA…
# $ season   <chr> "spring", "spring", "fall", "fall", "spring", "spring", "summer", "summer", "fall", "fall", …
# $ var      <chr> "density_CV", "density", "density_CV", "density", "density_CV", "density", "density_CV", "de…
# check for any missing TIFs: fs::dir_ls(dir_pac_tif, glob="*.tif", recursive=T) |> length()

# combine Atlantic and Pacific ----
d <- bind_rows(
  d_atl,
  d_pac)

# run once
if (F){
  
  # read in rasters ----
  d <- d |> 
    select(region, sp_code, season, var, path_tif) |> 
    mutate(
      r      = map(path_tif, terra::rast),
      crs    = map_chr(r, crs, proj=T),
      crs_tr = map_chr(crs, str_trunc, 24),
      nrow   = map_int(r, nrow),
      ncol   = map_int(r, ncol)) |> 
    arrange(region, crs_tr, sp_code, season, var)
  # confirm same dims per region: d |> select(nrow, ncol, region) |> table()
  
  # fix rasters in Atlantic missing crs ----
  
  # unique(d$crs)
  # [1] "+proj=omerc +lat_0=35 +lonc=-75 +alpha=40 +gamma=40 +k=0.9996 +x_0=0 +y_0=0 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs"
  # [2] ""
  # [3] "+proj=omerc +lat_0=39 +lonc=-125 +alpha=75 +gamma=75 +k=0.9996 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs"
  
  # d |> select(crs_tr, region) |> table()
  #                           region
  # crs_tr                     Atlantic Pacific
  #                                   7       0
  #   +proj=omerc +lat_0=35...      413       0
  #   +proj=omerc +lat_0=39...        0     270
  
  crs_atl <- d |> 
    filter(region == "Atlantic" & crs != "") |> 
    first() |> 
    pull(crs)
  
  d <- d |> 
    mutate(
      r = map(r, \(x) {
        if (crs(x) == "")
          crs(x) <- crs_atl
        x } ) ) |> 
    select(-crs, -crs_tr, -nrow, -ncol)
  
  # write rasters per model (region-species-season), including all vars ----
  dir_tif <- here("data/sdm/raw")
  for (dir_ds in glue("{dir_tif}/{d_datasets$ds_key}"))
    dir.create(dir_ds, recursive = T, showWarnings = F)
  
  # * standardize vars ----
  # d |> select(region, var) |> table()
  #           var
  # region      CV density density_CV QUANT_50 RANGE_CI90
  #   Atlantic 140       0          0      140        140
  #   Pacific    0     135        135        0          0
  d <- d |> 
    mutate(
      ds_key  = case_match(
        region,
        "Atlantic" ~ "nc_atl_birds_dens",
        "Pacific"  ~ "nc_pac_birds_dens"),
      mdl_tif = glue("{dir_tif}/{ds_key}/{sp_code}_{season}.tif"),
      var     = case_match(
        var,
        "density"    ~ "n_per_km2",
        "QUANT_50"   ~ "n_per_km2",
        "density_CV" ~ "cv_n_per_km2",
        "CV"         ~ "cv_n_per_km2",
        "RANGE_CI90" ~ "ci90_n_per_km2") )
  
  d_mdls <- d |> 
    arrange(
      ds_key, region, sp_code, season, 
      # arrange by var to ensure n_per_km2 (density) is first, cv second
      factor(var, levels = c("n_per_km2", "cv_n_per_km2", "ci90_n_per_km2"))) |>
    group_by(ds_key, region, sp_code, season, mdl_tif) |> 
    nest() |> 
    ungroup()
  
  # * write out rasters per model, convert to COG ----
  d_mdls |>
    select(data, mdl_tif) |>
    pwalk(\(data, mdl_tif) {
      
      tmp_tif <- tempfile(fileext = ".tif")
      
      # if mdl_tif already exists and is a valid COG, skip
      if (file_exists(mdl_tif)){
        
        is_cog <- system(glue("rio cogeo validate {mdl_tif}"), intern=T) |> 
          str_detect(" is a valid cloud optimized GeoTIFF")
        
        if (is_cog)
          return()
        
        file_move(mdl_tif, tmp_tif)
      } else {
        r        <- rast(data$r)
        names(r) <- data$var
        writeRaster(r, file = tmp_tif, overwrite = T)
      }
      
      system(glue("rio cogeo create {tmp_tif} {mdl_tif}"))
      unlink(tmp_tif)
    })
  
  # Uploaded all data/sdm/raw/{ds_key}/*.tif to rstudio.marinesensitivity.org:/share/public/cog/sdm/raw/{ds_key}/*.tif 
  
  # TODO: make COGs (*.tif) into STAC w/ json metadata
  
  # * write out temp csv (later db tbl) of all models for shiny app sdm-cog, including band index `bidx` ----
  d |> 
    arrange(
      ds_key, region, sp_code, season, 
      # arrange by var to ensure n_per_km2 (density) is first, cv second
      factor(var, levels = c("n_per_km2", "cv_n_per_km2", "ci90_n_per_km2"))) |>
    mutate(
      cog_url = glue("https://file.marinesensitivity.org/cog/sdm/raw/{ds_key}/{basename(mdl_tif)}"),
      bidx    = factor(var, levels = c("n_per_km2", "cv_n_per_km2", "ci90_n_per_km2")) |> as.integer()) |> 
    select(-r, -path_tif, -mdl_tif) |> 
    relocate(ds_key) |>
    write_csv(here("data/nc_models.csv"))
  # check: x |> select(var, bidx) |> table() 
}

# TODO: write d_mdls, d_spp to db 
#       use 'nc_*', not 'gm'
#
# t_mdls <- Id(schema = schema, table = "sdm_models")
# rows_exist <- tbl(con, t_mdls) |> filter(ds_key == "gm") |> summarize(n()) |> pull() > 0
# if (!rows_exist)
#   dbWriteTable(con, t_mdls, d_mdls, append = T)
# 
# t_spp <- Id(schema = schema, table = "sdm_species")
# rows_exist <- tbl(con, t_spp) |> filter(ds_key == "gm") |> summarize(n()) |> pull() > 0
# if (!rows_exist)
#   dbWriteTable(con, t_spp, d_spp, append = T)

# table(d$var, d$region)
#                  Atlantic Pacific
#   ci90_n_per_km2      140       0
#   cv_n_per_km2        140     135
#   n_per_km2           140     135

# TODO: Pacific: somehow include survey_effort_mask_{season}.shp
  
# d <- d |> 
#   mutate(
#     datatype = map_chr(r, datatype))
# table(d$datatype, d$region)
# FLT8S
Code
# map COG ----
cog_url        = "https://file.marinesensitivity.org/cog/sdm/raw/nc_atl_birds_dens/ARTE_summer.tif"
cog_method     = "bilinear"
cog_palette    = "spectral_r"
lgnd_palette   = "Spectral"
lgnd_palette_r = T
bidx           = 1

# * plot locally ----
cog_tif <- str_replace(
  cog_url,
  "https://file.marinesensitivity.org/cog/sdm",
  here("data/sdm"))
stopifnot(file.exists(cog_tif))
r <- rast(cog_tif, lyrs=bidx)
plot(r) # plot native terra

Code
# * plot via terra::plet() ----
plet(
  r, 
  col = RColorBrewer::brewer.pal(
    11, "Spectral") |> rev(),
  tiles = "Esri.OceanBasemap")
Code
# * plot via leaflet ----
library(leaflet)

r_mer <- leaflet::projectRasterForLeaflet(
  r, method = "bilinear")
pal <- colorNumeric("Spectral", values(r_mer),
  na.color = "transparent", reverse = T)

leaflet() |> 
  addProviderTiles("Esri.OceanBasemap") |> 
  addRasterImage(r_mer, colors = pal, opacity = 0.8) |>
  addLegend(pal = pal, values = values(r_mer),
    title = names(r_mer))
Code
# * get bounding box via titiler ----

titiler_endpoint = "https://titiler.xyz"

cog_bb <- request(titiler_endpoint) |> 
  req_url_path_append("cog", "bounds") |> 
  req_url_query(
    url = cog_url,
    bidx = bidx) |> 
  req_perform() |> 
  resp_body_json() |> 
  unlist() |> 
  as.numeric()
cog_bb
[1] -87.01826  23.06746 -52.26961  53.30289
Code
# * get range of values via titiler ----
cog_rng <- request(titiler_endpoint) |> 
  req_url_path_append("cog", "statistics") |> 
  req_url_query(
    url  = cog_url,
    bidx = bidx) |> 
  req_perform() |> 
  resp_body_json() |>
  pluck(glue("b{bidx}")) |> 
  keep_at(~ .x %in% c("min", "max")) |> 
  as.numeric()
cog_rng
[1] 0.0003564832 0.8107504845
Code
# set cog_rng manually
# cog_rng <- range(values(r, na.rm=T))

tile_opts <- glue(
  "bidx={bidx}&expression=b{bidx}&
  unscale=false&
  resampling={cog_method}&reproject={cog_method}&return_mask=true&
  rescale={paste(cog_rng, collapse=',')}&
  colormap_name={cog_palette}") |> str_replace_all("\\s+", "")
sdm_tile_url  <- glue(
  "{titiler_endpoint}/cog/tiles/WebMercatorQuad/{{z}}/{{x}}/{{y}}@2x.png?url={cog_url}&{tile_opts}")

# OLD: https://api.cogeo.xyz/docs#/Cloud%20Optimized%20GeoTIFF/tile_cog_tiles__TileMatrixSetId___z___x___y___scale_x__format__get
# 
# TODO: cogeo.xyz (bidx = 1,2,3) VS titiler.xyz (bidx = List [ 1, 2, 3]) ?
#
# NEW: https://titiler.xyz/api.html#/Cloud%20Optimized%20GeoTIFF/tile_cog_tiles__tileMatrixSetId___z___x___y___scale_x__format__get

library(rdeck)

dir_private <- switch(
  Sys.info()[["sysname"]],
  "Darwin" = "/Users/bbest/My Drive/private",
  "Linux"  = "/share/private")

mb_token_txt <- glue("{dir_private}/mapbox_token_bdbest.txt")
mb_token <- readLines(mb_token_txt)
options(rdeck.mapbox_access_token = mb_token)

bb <- cog_bb

rdeck(
  map_style      = mapbox_dark(),
  theme          = "light",
  initial_bounds = st_bbox(
    c(xmin=bb[1], ymin=bb[2], xmax=bb[3], ymax=bb[4]),
    crs = st_crs(4326) ) )  |>
  add_tile_layer(
    id                = "sdm",
    name              = "sdm",
    # visible           = F,
    # visibility_toggle = T,
    opacity           = 0.5,
    data              = sdm_tile_url)

References

Leirness, J. B., Josh Adams, Lisa T. Ballance, Michael Coyne, Jonathan J. Felis, Trevor Joyce, David M. Pereksta, Arliss J. Winship, Christopher FG Jeffrey, and D. Ainley. 2021. “Modeling at-Sea Density of Marine Birds to Support Renewable Energy Planning on the Pacific Outer Continental Shelf of the Contiguous United States.” https://repository.library.noaa.gov/view/noaa/49073.
Winship, Arliss J., Brian Patrick Kinlan, Timothy Paul White, Jeffrey Leirness, and John Christensen. 2018. “Modeling at-Sea Density of Marine Birds to Support Atlantic Marine Renewable Energy Planning.”