Author

Ben Best

1 aquamapsdata

(Kaschner et al. 2023)

Code
# dependency for aquamapsdata:
#  - Terminal: brew install gnupg
#  - R: install.packages("rcrypt")
librarian::shelf(
  raquamaps/aquamapsdata,
  DBI, dplyr, DT, duckdb, fs, glue, here, janitor, 
  leaflet, librarian, mapview,
  # raquamaps/raquamaps,
  readr, sf, terra, tibble,
  quiet = T)  # zeallot
Warning: package 'duckdb' was built under R version 4.3.1
Warning: package 'mapview' was built under R version 4.3.1
Code
# The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
# which was just loaded, will retire in October 2023.

# downloads about 2 GB of data, approx 10 GB when unpacked
# download_db()

# data(package = "raquamaps")
con_sl <- default_db("sqlite") # /Users/bbest/Library/Application Support/aquamaps/am.db
file_size(aquamapsdata::am_db_sqlite()) # 11.2G
11.2G
Code
dbListTables(con_sl)
 [1] "fts"                 "fts_config"          "fts_content"        
 [4] "fts_data"            "fts_docsize"         "fts_idx"            
 [7] "hcaf_r"              "hcaf_species_native" "hspen_r"            
[10] "occurrencecells_r"   "speciesoccursum_r"  
Code
dir_data <- "/Users/bbest/My Drive/projects/msens/data"
path_dd  <- glue("{dir_data}/derived/aquamaps/am.duckdb")
# file_delete(path_dd)
con_dd   <- dbConnect(
  duckdb(
    dbdir     = path_dd,
    read_only = T))
# dbDisconnect(con_dd, shutdown = T)
 [1] "fts"                 "fts_config"          "fts_content"        
 [4] "fts_data"            "fts_docsize"         "fts_idx"            
 [7] "hcaf_r"              "hcaf_species_native" "hspen_r"            
[10] "occurrencecells_r"   "speciesoccursum_r"  
Code
for (tbl in dbListTables(con_sl)){
  tbl(con_sl, tbl) |> 
    print()
}
# Source:   table<fts> [?? x 2]
# Database: sqlite 3.41.2 [/Users/bbest/Library/Application Support/aquamaps/am.db]
   key                                      terms                               
   <chr>                                    <chr>                               
 1 Chn-013ca97e-46a8-4c5e-b398-8bb4f340f88b Lenisquilla lata  Animalia Arthropo…
 2 Chn-015453c0-3ff9-477d-ab51-c68d85e65af5 Clorida albolitura  Animalia Arthro…
 3 Chn-0721ac0c-7d19-46fa-9dcc-02da54310867 Leiogalathea laevirostris  Animalia…
 4 Chn-08d9eb73-87c3-4b8a-8861-baf5b2862d8c Eduarctus martensii striated locust…
 5 Chn-0e470b93-1bf9-4c28-8770-166bb5e2a8c7 Levisquilla jurichi  Animalia Arthr…
 6 Chn-0e9cfa5e-eab6-490e-9279-34eeeb127eb2 Lophosquilla tiwarii  Animalia Arth…
 7 Chn-12f493b8-a962-48d7-af4b-632ba9c0fa57 Capillaster multiradiatus  Animalia…
 8 Chn-17d15aec-37c6-4c6a-88d2-9c350613076a Harpiosquilla annandalei  Animalia …
 9 Chn-21252d4f-8095-42f9-a7e5-67236bb3099d Cloridina pelamidae  Animalia Arthr…
10 Chn-27d923b5-53c3-4d53-aa10-1f02bea6dd01 Chelarctus cultrifer  Animalia Arth…
# ℹ more rows
# Source:   table<fts_config> [1 x 2]
# Database: sqlite 3.41.2 [/Users/bbest/Library/Application Support/aquamaps/am.db]
  k           v
  <chr>   <int>
1 version     4
# Source:   table<fts_content> [?? x 3]
# Database: sqlite 3.41.2 [/Users/bbest/Library/Application Support/aquamaps/am.db]
      id c0                                       c1                            
   <int> <chr>                                    <chr>                         
 1     1 Chn-013ca97e-46a8-4c5e-b398-8bb4f340f88b Lenisquilla lata  Animalia Ar…
 2     2 Chn-015453c0-3ff9-477d-ab51-c68d85e65af5 Clorida albolitura  Animalia …
 3     3 Chn-0721ac0c-7d19-46fa-9dcc-02da54310867 Leiogalathea laevirostris  An…
 4     4 Chn-08d9eb73-87c3-4b8a-8861-baf5b2862d8c Eduarctus martensii striated …
 5     5 Chn-0e470b93-1bf9-4c28-8770-166bb5e2a8c7 Levisquilla jurichi  Animalia…
 6     6 Chn-0e9cfa5e-eab6-490e-9279-34eeeb127eb2 Lophosquilla tiwarii  Animali…
 7     7 Chn-12f493b8-a962-48d7-af4b-632ba9c0fa57 Capillaster multiradiatus  An…
 8     8 Chn-17d15aec-37c6-4c6a-88d2-9c350613076a Harpiosquilla annandalei  Ani…
 9     9 Chn-21252d4f-8095-42f9-a7e5-67236bb3099d Cloridina pelamidae  Animalia…
10    10 Chn-27d923b5-53c3-4d53-aa10-1f02bea6dd01 Chelarctus cultrifer  Animali…
# ℹ more rows
# Source:   table<fts_data> [?? x 2]
# Database: sqlite 3.41.2 [/Users/bbest/Library/Application Support/aquamaps/am.db]
             id         block
        <int64>        <blob>
 1            1     <raw 9 B>
 2           10    <raw 28 B>
 3 137438953473 <raw 4.04 kB>
 4 137438953474 <raw 4.04 kB>
 5 137438953475 <raw 4.05 kB>
 6 137438953476 <raw 4.04 kB>
 7 137438953477 <raw 4.04 kB>
 8 137438953478 <raw 4.05 kB>
 9 137438953479 <raw 4.05 kB>
10 137438953480 <raw 4.05 kB>
# ℹ more rows
# Source:   table<fts_docsize> [?? x 2]
# Database: sqlite 3.41.2 [/Users/bbest/Library/Application Support/aquamaps/am.db]
      id        sz
   <int>    <blob>
 1     1 <raw 2 B>
 2     2 <raw 2 B>
 3     3 <raw 2 B>
 4     4 <raw 2 B>
 5     5 <raw 2 B>
 6     6 <raw 2 B>
 7     7 <raw 2 B>
 8     8 <raw 2 B>
 9     9 <raw 2 B>
10    10 <raw 2 B>
# ℹ more rows
# Source:   table<fts_idx> [?? x 3]
# Database: sqlite 3.41.2 [/Users/bbest/Library/Application Support/aquamaps/am.db]
   segid      term  pgno
   <int>    <blob> <int>
 1     1 <raw 0 B>     2
 2     1 <raw 5 B>     4
 3     1 <raw 7 B>     6
 4     1 <raw 7 B>     8
 5     1 <raw 7 B>    10
 6     1 <raw 5 B>    12
 7     1 <raw 7 B>    14
 8     1 <raw 7 B>    16
 9     1 <raw 4 B>    22
10     1 <raw 8 B>    24
# ℹ more rows
# Source:   table<hcaf_r> [?? x 58]
# Database: sqlite 3.41.2 [/Users/bbest/Library/Application Support/aquamaps/am.db]
      ID CsquareCode LOICZID NLimit Slimit WLimit ELimit CenterLat CenterLong
   <int> <chr>         <int>  <dbl>  <dbl>  <dbl>  <dbl>     <dbl>      <dbl>
 1     1 5207:363:1   167254  -26    -26.5  -73.5  -73       -26.2      -73.2
 2     2 5207:363:2   167253  -26    -26.5  -74    -73.5     -26.2      -73.8
 3     3 5207:363:3   167974  -26.5  -27    -73.5  -73       -26.8      -73.2
 4     4 5207:363:4   167973  -26.5  -27    -74    -73.5     -26.8      -73.8
 5     6 5207:360:2   167259  -26    -26.5  -71    -70.5     -26.2      -70.8
 6     8 5207:360:4   167979  -26.5  -27    -71    -70.5     -26.8      -70.8
 7     9 5207:364:1   167252  -26    -26.5  -74.5  -74       -26.2      -74.2
 8    10 5207:364:2   167251  -26    -26.5  -75    -74.5     -26.2      -74.8
 9    11 5207:364:3   167972  -26.5  -27    -74.5  -74       -26.8      -74.2
10    12 5207:364:4   167971  -26.5  -27    -75    -74.5     -26.8      -74.8
# ℹ more rows
# ℹ 49 more variables: CellArea <dbl>, OceanArea <dbl>, PWater <dbl>,
#   ClimZoneCode <chr>, FAOAreaM <int>, FAOAreaIn <int>, CountryMain <chr>,
#   CountrySecond <chr>, CountryThird <chr>, CountrySubMain <chr>,
#   CountrySubSecond <chr>, CountrySubThird <chr>, EEZ <int>, LME <int>,
#   LMEBorder <int>, MEOW <int>, OceanBasin <int>, IslandsNo <int>,
#   Area0_20 <dbl>, Area20_40 <dbl>, Area40_60 <dbl>, Area60_80 <dbl>, …
# Source:   table<hcaf_species_native> [?? x 7]
# Database: sqlite 3.41.2 [/Users/bbest/Library/Application Support/aquamaps/am.db]
   SpeciesID   CsquareCode CenterLat CenterLong Probability FAOAreaYN BoundBoxYN
   <chr>       <chr>           <dbl>      <dbl>       <dbl>     <int>      <int>
 1 Chn-013ca9… 1004:102:2       0.25       42.8        0.62         1          0
 2 Chn-013ca9… 1004:103:1       0.25       43.2        0.63         1          0
 3 Chn-013ca9… 1004:103:2       0.25       43.8        0.39         1          0
 4 Chn-013ca9… 1004:103:4       0.75       43.8        0.7          1          0
 5 Chn-013ca9… 1004:104:3       0.75       44.2        0.74         1          0
 6 Chn-013ca9… 1004:114:1       1.25       44.2        0.73         1          0
 7 Chn-013ca9… 1004:114:2       1.25       44.8        0.71         1          0
 8 Chn-013ca9… 1004:114:4       1.75       44.8        0.68         1          0
 9 Chn-013ca9… 1004:215:3       1.75       45.2        0.69         1          0
10 Chn-013ca9… 1004:215:4       1.75       45.8        0.44         1          0
# ℹ more rows
# Source:   table<hspen_r> [?? x 56]
# Database: sqlite 3.41.2 [/Users/bbest/Library/Application Support/aquamaps/am.db]
   SpeciesID Speccode LifeStage FAOAreas FAOComplete NMostLat SMostLat WMostLong
   <chr>        <int> <chr>     <chr>          <int>    <dbl>    <dbl>     <dbl>
 1 Chn-013c…    92965 adults    51, 61,…          NA       NA       NA        NA
 2 Chn-0154…    92936 adults    37, 51,…          NA       NA       NA        NA
 3 Chn-0721…    81439 adults    51, 57,…          NA       NA       NA        NA
 4 Chn-08d9…    15073 adults    51, 57,…          NA       35      -28        32
 5 Chn-0e47…    92967 adults    57, 71            NA       NA       NA        NA
 6 Chn-0e9c…    92969 adults    57, 71            NA       NA       NA        NA
 7 Chn-12f4…  2380937 adults    57, 71            NA       NA       NA        NA
 8 Chn-17d1…    92960 adults    51, 57,…          NA       NA       NA        NA
 9 Chn-2125…    92947 adults    57, 61,…          NA       NA       NA        NA
10 Chn-27d9…    15084 adults    51, 61,…          NA       NA       NA        NA
# ℹ more rows
# ℹ 48 more variables: EMostLong <dbl>, DepthYN <int>, DepthMin <int>,
#   DepthPrefMin <int>, DepthPrefMax <int>, DepthMax <int>, MeanDepth <int>,
#   Pelagic <int>, TempYN <int>, TempMin <dbl>, TempPrefMin <dbl>,
#   TempPrefMax <dbl>, TempMax <dbl>, SalinityYN <int>, SalinityMin <dbl>,
#   SalinityPrefMin <dbl>, SalinityPrefMax <dbl>, SalinityMax <dbl>,
#   PrimProdYN <int>, PrimProdMin <dbl>, PrimProdPrefMin <dbl>, …
# Source:   table<occurrencecells_r> [?? x 17]
# Database: sqlite 3.41.2 [/Users/bbest/Library/Application Support/aquamaps/am.db]
   RecordID CsquareCode SpeciesID SpecCode GoodCell InFAOArea InBoundBox GBIF_YN
      <int> <chr>       <chr>        <int>    <int>     <int>      <int>   <int>
 1  1596310 1009:478:2  Chn-013c…    92965        0         0          0       1
 2  1615055 1011:497:4  Chn-013c…    92965        1         1          0      NA
 3  1665456 1108:130:1  Chn-013c…    92965        0         0          0      NA
 4  1679103 1110:208:3  Chn-013c…    92965        1         1          0      NA
 5  1681092 1110:469:3  Chn-013c…    92965        1         1          0      NA
 6  1681147 1110:469:4  Chn-013c…    92965        1         1          0      NA
 7  1681212 1110:478:1  Chn-013c…    92965        1         1          0      NA
 8  1681232 1110:478:2  Chn-013c…    92965        1         1          0      NA
 9  1681287 1110:478:3  Chn-013c…    92965        1         1          0      NA
10  1681309 1110:478:4  Chn-013c…    92965        1         1          0      NA
# ℹ more rows
# ℹ 9 more variables: OBIS_YN <int>, FBSLB_YN <int>, CountryPoint_YN <int>,
#   AWI_YN <int>, IATTC_YN <int>, UWA_YN <int>, CenterLat <dbl>,
#   CenterLong <dbl>, FAOAreaM <int>
# Source:   table<speciesoccursum_r> [?? x 25]
# Database: sqlite 3.41.2 [/Users/bbest/Library/Application Support/aquamaps/am.db]
   SpeciesID        SpecCode Genus Species FBname OccurRecs OccurCells StockDefs
   <chr>               <int> <chr> <chr>   <chr>      <int>      <int> <chr>    
 1 Chn-013ca97e-46…    92965 Leni… lata    <NA>         109        103 Indo-Wes…
 2 Chn-015453c0-3f…    92936 Clor… alboli… <NA>          55         54 Indo-Wes…
 3 Chn-0721ac0c-7d…    81439 Leio… laevir… <NA>          38         37 Indo-Pac…
 4 Chn-08d9eb73-87…    15073 Edua… marten… stria…       283        270 Indo-Wes…
 5 Chn-0e470b93-1b…    92967 Levi… jurichi <NA>          21         18 Indo-Wes…
 6 Chn-0e9cfa5e-ea…    92969 Loph… tiwarii <NA>          22         20 Indo-Wes…
 7 Chn-12f493b8-a9…  2380937 Capi… multir… <NA>         157        129 Indo-Wes…
 8 Chn-17d15aec-37…    92960 Harp… annand… <NA>          71         70 Indo-Wes…
 9 Chn-21252d4f-80…    92947 Clor… pelami… <NA>          12         11 Indo-Wes…
10 Chn-27d923b5-53…    15084 Chel… cultri… <NA>          56         48 Indo-Pac…
# ℹ more rows
# ℹ 17 more variables: Kingdom <chr>, Phylum <chr>, Class <chr>, Order <chr>,
#   Family <chr>, deepwater <int>, angling <int>, diving <int>,
#   dangerous <int>, m_invertebrates <int>, highseas <int>, invasive <int>,
#   resilience <chr>, iucn_id <int>, iucn_code <chr>, iucn_version <chr>,
#   provider <chr>

2 transfer to duckdb

Queries in SQLite are quite slow compared to the new DuckDB.

Code
# table rename from old sqlite (sl) to new duckdb (dd)
d_tbls <- tribble(
  ~tbl_sl,                ~tbl_dd,
  "hcaf_r",               "cells",
  "hcaf_species_native",    "spp_cells",
  "hspen_r",                "spp_prefs",
  "occurrencecells_r",    "spp_occs",
  "speciesoccursum_r",    "spp")
d_tbls
# A tibble: 5 × 2
  tbl_sl              tbl_dd   
  <chr>               <chr>    
1 hcaf_r              cells    
2 hcaf_species_native spp_cells
3 hspen_r             spp_prefs
4 occurrencecells_r   spp_occs 
5 speciesoccursum_r   spp      
Code
redo <- F
if (!all(d_tbls$tbl_dd %in% dbListTables(con_dd)) | redo){
  
  for (i in 1:nrow(d_tbls)){ # i = 1
    tbl_sl <- d_tbls$tbl_sl[i]
    tbl_dd <- d_tbls$tbl_dd[i]
    
    message(glue(
      "{i} of {nrow(d_tbls)} tbls: read sqlite.{tbl_sl} () ~ {Sys.time()}"))
    t0 <- Sys.time()
    
    d <- dbGetQuery(con_sl, glue(
      "SELECT * FROM {tbl_sl}")) |> 
      clean_names() |> 
      rename_with(
        \(x) x |> 
          case_match(
            # cells
            "id"         ~ "cell_id", 
            "slimit"     ~ "s_limit",
            # spp, spp_cells, spp_occs, spp_prefs
            "species_id" ~ "sp_key",
            "speccode"   ~ "sp_int",
            "spec_code"  ~ "sp_int",
            # spp_occs
            "record_id"  ~ "occ_id",
            # spp
            "f_bname"    ~ "common_name",
            .default = x))
    
    t1 <- Sys.time()
    message(paste(
      "    ", format(nrow(d), big.mark=','), "rows read in", 
      round(difftime(t1, t0, units="mins"), 4), "mins"))
    
    message(glue(
      "  write duckdb.{tbl_dd} ~ {Sys.time()}",
      .trim = F))
    
    dbWriteTable(con_dd, tbl_dd, d, overwrite = T)
    
    t2 <- Sys.time()
    message(paste(
      "    ", format(nrow(d), big.mark=','), "rows written in", 
      round(difftime(t2, t1, units="mins"), 4), "mins"))
    
  }
}
1 of 5 tbls: read sqlite.hcaf_r () ~ 2023-12-01 21:48:46.1486
     177,869 rows read in 0.0264 mins
  write duckdb.cells ~ 2023-12-01 21:48:47.731827
     177,869 rows written in 0.0099 mins
2 of 5 tbls: read sqlite.hcaf_species_native () ~ 2023-12-01 21:48:48.326646
     118,249,855 rows read in 2.7802 mins
  write duckdb.spp_cells ~ 2023-12-01 21:51:35.139909
     118,249,855 rows written in  1.2148  mins
3 of 5 tbls: read sqlite.hspen_r () ~ 2023-12-01 21:52:48.026475
     23,699 rows read in 0.0039 mins
  write duckdb.spp_prefs ~ 2023-12-01 21:52:48.259237
     23,699 rows written in 0.0011 mins
4 of 5 tbls: read sqlite.occurrencecells_r () ~ 2023-12-01 21:52:48.323373
     2,908,181 rows read in 0.1266 mins
  write duckdb.spp_occs ~ 2023-12-01 21:52:55.917685
     2,908,181 rows written in 0.0438 mins
5 of 5 tbls: read sqlite.speciesoccursum_r () ~ 2023-12-01 21:52:58.547241
     23,699 rows read in 0.0022 mins
  write duckdb.spp ~ 2023-12-01 21:52:58.679833
     23,699 rows written in 0.0015 mins

3 rename fields

Code
file_size(path_dd) # 3.46G
3.46G
Code
renames_csv <- glue("{dir_data}/derived/aquamaps/am_tbl_fld_renames.csv")

dbListTables(con_dd)
[1] "_tbl_fld_renames" "cells"            "spp"              "spp_cells"       
[5] "spp_occs"         "spp_prefs"       
Code
# dbDisconnect(con_dd, shutdown = T)

if (!file.exists(renames_csv) | redo){
  for (i in 1:nrow(d_tbls)){ # i = 1
    tbl_sl <- d_tbls$tbl_sl[i]
    tbl_dd <- d_tbls$tbl_dd[i]
  
    d_sl <- dbGetQuery(con_sl, glue(
      "SELECT * FROM {tbl_sl} LIMIT 10"))
    d_dd <- dbGetQuery(con_dd, glue(
      "SELECT * FROM {tbl_dd} LIMIT 10"))
    
    d_r <- tibble(
      tbl_old = tbl_sl,
      tbl_new = tbl_dd,
      fld_old = names(d_sl),
      fld_new = names(d_dd))
    
    if (i == 1){
      d_renames <- d_r
    } else {
      d_renames <- d_renames |> 
        bind_rows(d_r)
    }
  }
  write_csv(d_renames, renames_csv)
  dbWriteTable(con_dd, "_tbl_fld_renames", d_renames, overwrite = T)
}
d_renames <- read_csv(renames_csv)
Rows: 163 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): tbl_old, tbl_new, fld_old, fld_new

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Code
datatable(d_renames)

4 streamline spp_cells, spp_occs to use cell_id

Code
d_spp_cells <- tbl(con_dd, "spp_cells") |> 
  left_join(
    tbl(con_dd, "cells") |> 
      select(cell_id, csquare_code),
    by = "csquare_code") |> 
  select(-csquare_code, -center_lat, -center_long) |> 
  collect()
dbWriteTable(con_dd, "spp_cells", d_spp_cells, overwrite = T)

d_spp_occs <- tbl(con_dd, "spp_occs") |> 
  left_join(
    tbl(con_dd, "cells") |> 
      select(cell_id, csquare_code),
    by = "csquare_code") |> 
  select(-csquare_code) |> 
  collect()
dbWriteTable(con_dd, "spp_occs", d_spp_occs, overwrite = T)

# TODO: update _tbl_fld_renames to this streamlining
#   spp_cells.csquare_code|center_lat|center_long -> cell_id
#   spp_occs.csquare_code                         -> cell_id

5 add indexes

Code
create_index <- function(con, tbl, flds, is_unique = F){
  unq  <- {ifelse(is_unique, 'UNIQUE','')}
  idx  <- glue("{tbl}_{paste(flds, collapse='_')}_idx")
  flds <- glue("{paste(flds, collapse=',')}")
  sql  <- glue("CREATE {unq} INDEX {idx} ON {tbl} ({flds});")

  message(sql)
  dbExecute(con, sql)
}
create_index(con_dd, "cells",     "cell_id", is_unique = T)
create_index(con_dd, "spp",       "sp_key",  is_unique = T)
create_index(con_dd, "spp_cells", "cell_id")
create_index(con_dd, "spp_cells", "sp_key")
create_index(con_dd, "spp_prefs", "sp_key", is_unique = T)
create_index(con_dd, "spp_occs",  "occ_id", is_unique = T)
create_index(con_dd, "spp_occs",  "cell_id")
create_index(con_dd, "spp_occs",  "sp_key")

6 export/import db

for:

  • version compatibility (since duckdb is not backwards compatible with itself before version 1.0); and
  • reducing file size
Code
dir_parquet <- glue("{dir_data}/derived/aquamaps/parquet")

dbExecute(con_dd, glue("
  -- export the database to the target directory 'db_name' as CSV files
  --   EXPORT DATABASE 'db_name';
  -- export to directory 'db_name', using the given options for the CSV serialization
  --   EXPORT DATABASE 'db_name' (FORMAT CSV, DELIMITER '|');
  -- export to directory 'db_name', tables serialized as Parquet
       EXPORT DATABASE '{dir_parquet}' (FORMAT PARQUET);"))
Code
dbDisconnect(con_dd, shutdown = T)
file_delete(path_dd)
con_dd   <- dbConnect(
  duckdb(
    dbdir     = path_dd,
    read_only = F))
file_size(path_dd) # 12K

system.time({
  dbExecute(con_dd, glue("
    IMPORT DATABASE '{dir_parquet}';"))
})

dbDisconnect(con_dd, shutdown = T)
file_size(path_dd) # 698M

con_dd   <- dbConnect(
  duckdb(
    dbdir     = path_dd,
    read_only = F))

# TODO: + fxns: export_duckdb(), import_duckdb(format="parquet")

7 todo: db relationship diagram

Mermaid entity relationship diagram with tables related by (primary) indexes showing one:many and many:many relationships.

8 create raster

Code
cells_tif <- glue("{dir_data}/derived/aquamaps/cell_id.tif")

if (!file.exists(cells_tif)){

  # get cells as points
  pts_cells <- tbl(con_dd, "cells") |> 
    select(cell_id, csquare_code, center_long, center_lat) |> 
    collect() |> 
    st_as_sf(
      coords = c("center_long", "center_lat"), crs = 4326)
  
  # create template raster from global dimensions and resolution
  r_g <- rast(
    xmin = -180, xmax = 180, 
    ymin = -90,  ymax = 90, 
    resolution = 0.5)
  
  # rasterize based on cell_id
  r_cells <- rasterize(pts_cells, r_g, field = "cell_id", fun = "last")
  names(r_cells) <- "cell_id"
  
  # ensure no duplicate cell_ids
  stopifnot(sum(duplicated(values(r_cells, na.rm=T))) == 0)
  
  # write to smallest possible raster
  r_cells |> 
    writeRaster(
      cells_tif, 
      overwrite = T,
      datatype  = "INT4U",
      gdal      = c(
        "TILED=YES",
        "COMPRESS=DEFLATE"))
  file_size(cells_tif) # 279K
  
  d_cells <- tbl(con_dd, "cells") |> 
    collect() |> 
    left_join(
      tibble(
        cell_id  = values(r_cells, mat=F),
        cell_idx = 1:ncell(r_cells)), 
      by = "cell_id") |> 
    relocate(cell_idx, .after = cell_id)
  
  dbWriteTable(con_dd, "cells", d_cells, overwrite = T)
}
r_cells <- rast(cells_tif)

plot(r_cells)

9 plot species

Code
sp_name <- "blue whale"

# get sp_key
sp_key <- tbl(con_dd, "spp") |> 
  filter(common_name == !!sp_name) |> 
  pull(sp_key)

# get sp cells
d <- tbl(con_dd, "spp_cells") |> 
  filter(sp_key == !!sp_key) |> 
  left_join(
    tbl(con_dd, "cells") |> 
      select(cell_id, cell_idx), 
    by = "cell_id") |> 
  select(cell_idx, probability) |> 
  collect()
# TODO: filter also by
# - fao_area_yn: Does this cell fall within an FAO area where the species is known to occur (endemic/native)? 0=No, 1=Yes
# - bound_box_yn: Does this cell fall within the geographical bounding box known for the species? 0=No, 1=Yes

r <- r_cells
values(r) <- NA

r[d$cell_idx] <- d$probability
plot(r)

10 next steps

  • assign species to aphia_id (WoRMS: MarineSpecies.org)
  • assign taxonomic groups, a la marinebon/gmbi (Visalli et al. 2020)
  • get species list per BOEM region
  • create function to extract species list per arbitrary region, a la CalCOFI oceano app
  • develop multidimensional array extraction with xarray

11 References

Kaschner, K., K. Kesner-Reyes, C. Garilao, J. Segschneider, J. Rius-Barile, T. Rees, and R. Froese. 2023. AquaMaps: Predicted Range Maps for Aquatic Species. Retrieved from https://www.aquamaps.org.
Visalli, Morgan E., Benjamin D. Best, Reniel B. Cabral, William W. L. Cheung, Nichola A. Clark, Cristina Garilao, Kristin Kaschner, et al. 2020. “Data-Driven Approach for Highlighting Priority Areas for Protection in Marine Areas Beyond National Jurisdiction.” Marine Policy, March, 103927. https://doi.org/10.1016/j.marpol.2020.103927.