Setting up database connection

The following chunk sets up connections to nefscdb1 using ROracle. Note, that you need to be plugged into the network or on VPN.

library(ROracle)
source(here::here('dbconnection.R'))
# source(here::here('coord_conversion.R'))
# source(here::here('app/functions/func_convertdateNcdf2R.R'))

Pull and aggregate data

Here I am pulling the Study Fleet bottom temperatures (Schema: NERS, table: GTE_JOIN). This is the raw data, has GPS polling/temp data ~ every 90 seconds, from 2007 to present. This will be a big file and take a bit of time to pull.

# pulling in SF data from GTE Join w/best quality data, gte_code == 92

# bt <- dbGetQuery(con.db1,"select * from  NERS.GTE_JOIN
#                           where TOW_EVENT_CODE ='92'") # this didnt work 

bt <- dbGetQuery(con.db1,"select * from NERS.GTE_JOIN")

# unique(bt$TOW_EVENT_CODE)
# save data bc pull takes a long time

saveRDS(bt, 'sf_bt.rds')

# Pull the 1 deg. min 1 hr binned data (NERS_TD_DATA_LMIN_BIN)
bt.bin <- dbGetQuery(con.db1,"select * from NERS.GTE_JOIN")
library(tidyverse)
library(timetk)
library(sf) 
library(dplyr)
bt <- readRDS('bt.rds')
load(here::here('nes_map.Rdata'))
wes <- wesanderson::wes_palette("Zissou1", 21, type = "continuous")

Next, bin the data by hour for each trip and haul.

# Bin data by hour 

bt.agg <- bt %>% dplyr::rename_all(., .funs = tolower) %>% 
   mutate(hour = lubridate::hour(gps_datetime)) %>% 
  filter(!is.na(bot_depth_m)) %>% 
  group_by(trip_id, effort_num) %>% 
  # Summarize
  timetk::summarise_by_time(
    .date_var = gps_datetime, 
    .by       = 'hour',
    min       = min(temp),
    max       = max(temp),
    mean      = mean(temp), 
    sd        = sd(temp),
    meanlat   = mean(latitude),
    meanlon   = mean(longitude)
  ) 

Testing just for a single year (2009).

bt.09 <- bt %>% dplyr::rename_all(., .funs = tolower) %>% 
  mutate(year = lubridate::year(gps_datetime),
         month = lubridate::month(gps_datetime),
         week = lubridate::week(gps_datetime),
         doy = lubridate::yday(gps_datetime),
         hour = lubridate::hour(gps_datetime)) %>% 
  filter(year == 2009 & !is.na(bot_depth_m))

# create sf object
points_sf09 <- sf::st_as_sf(bt.09, coords = c('longitude','latitude'), crs = 4326)
# plot to check 
ggplot() +
  geom_sf(data = points_sf09) + 
  theme_minimal()

bt.09agg <- bt.09 %>% 
  group_by(trip_id, effort_num) %>% # user filters by time ahead of this? 
  # Summarize
  timetk::summarise_by_time(
    .date_var = gps_datetime, 
    .by       = 'hour',
    min       = min(temp),
    max       = max(temp),
    mean      = mean(temp), 
    sd        = sd(temp), 
    minlat    = min(latitude),
    maxlat    = max(latitude),
    minlon    = min(longitude),
    maxlon    = max(longitude),
    meanlat   = mean(latitude),
    meanlon   = mean(longitude)
  ) 

points_sf <- sf::st_as_sf(bt.09agg, coords = c('meanlon','meanlat'), crs = 4326)

Grid data

# 1/12 degree ~ 9km
grid_sf <- sf::st_make_grid(points_sf, cellsize = 0.1) |> # 10000 #2.660 0.3333 (0.1 ~ 1/10 degree, ~7km)
  sf::st_sf(geometry = _) |>
  mutate(cell_id = row_number(), .before = 1)
# checks
# plot(grid_sf)

Join data, matching cell-id to associated data points. Make sure to keep track of number of data points informing a cell.

# spatial join to match cell_id-s to points,
shares_df <- sf::st_join(points_sf, grid_sf) |>
  sf::st_drop_geometry() |>
  # number of points per cell (mutate, row count remains the same)
  group_by(cell_id) |>
  mutate(points_per_cell = n(), 
         mean.t = mean(mean), 
         sd.t = sd(sd))

# testing to see if this will select just the first value associated 
# with each cell to see what is actually being plotted 
shares_df<- shares_df %>%
group_by(cell_id) %>%
mutate(is_first = mean == first(mean), 
       first.val = first(mean), 
       med.val = median(mean))
# join grid to shares for plotting
grid_shares_sf <- right_join(grid_sf, shares_df, by = join_by('cell_id'))

Stat maps

Plot gridded data. Here I look at the mean, median, and sd of the data within each grid cell. Also trying to figure out what R plots when you just plot the grid without calculating a particular function (still a mystery).

grid_shares_sf |>
  ggplot() +
  geom_sf(aes(fill = mean)) +
  scale_fill_gradientn(colours = wes) + 
  labs(fill = 'Mean \nB. Temp °C',
       title = 'Uknown value - First val per cell?') +
  theme_minimal() 

grid_shares_sf |>
  ggplot() +
  geom_sf(aes(fill = med.val)) +
  scale_fill_gradientn(colours = wes) + 
  labs(fill = 'Mean \nB. Temp °C',
       title = 'Median per cell') +
  theme_minimal() 

grid_shares_sf |>
  ggplot() +
  geom_sf(aes(fill = first.val)) +
  scale_fill_gradientn(colours = wes) + 
  labs(fill = 'Mean \nB. Temp °C',
       title = 'For sure first val per cell') +
  theme_minimal() 

grid_shares_sf |>
  ggplot() +
  geom_sf(aes(fill = mean.t)) +
  scale_fill_gradientn(colours = wes) + 
  labs(fill = 'Mean \nB. Temp °C',
       title = 'Average of all vals per cell') +
  theme_minimal() 

grid_shares_sf |>
  filter(!is.na(sd.t)) |>
  ggplot() +
  geom_sf(aes(fill = sd.t)) +
  viridis::scale_fill_viridis(option = 'plasma', direction = -1) + 
  labs(fill = 'Mean \nB. Temp °C',
       title = 'Sd of all vals per cell') +
  theme_minimal() 

Day vs Night

Rabbit hole of day vs night values. Day is classified is as 0600 to 1800 and night is classified as 1900 to 0500.

points_sf <- points_sf |>
  mutate(tod = case_when(lubridate::hour(gps_datetime) >= 06 &
                         lubridate::hour(gps_datetime) <= 18~ 'Day', 
                                               .default = 'Night'))
  
  
  
  
# spatial join to match cell_id-s to points,
shares_df <- sf::st_join(points_sf, grid_sf) |>
  sf::st_drop_geometry() |>
  # number of points per cell (mutate, row count remains the same)
  group_by(cell_id, tod) |>
  mutate(points_per_cell = n(), 
         mean.t = mean(mean), 
         sd.t = sd(sd)) 

# join grid to shares for plotting
grid_shares_sf <- right_join(grid_sf, shares_df, by = join_by('cell_id'))

grid_shares_sf |>
  ggplot() +
  geom_sf(aes(fill = mean.t)) +
  scale_fill_gradientn(colours = wes) + 
  labs(fill = 'Mean \nB. Temp °C',
       title = 'Average of all vals per cell') +
  theme_minimal() +
  facet_wrap(~tod)

grid_shares_sf |>
  filter(!is.na(sd.t)) |>
  ggplot() +
  geom_sf(aes(fill = sd.t)) +
  labs(fill = 'Sd \nB. Temp °C',
       title = 'SD of all vals per cell') +
  viridis::scale_fill_viridis(option = 'plasma', direction = -1) + 
  theme_minimal() +
  facet_wrap(~tod)

Number of data points per grid cell

Plot number of data points informing each grid cell

# Map of number of values per cell 
grid_shares_sf |>
  ggplot() +
  geom_sf(aes(fill = points_per_cell)) +
  viridis::scale_fill_viridis(option = 'plasma', direction = -1) + 
  labs(fill = 'No. Values \nPer Cell') +
  theme_minimal() 

Plot number of data points informing each grid cell night versus day

# Map of number of values per cell 
grid_shares_sf |>
  ggplot() +
  geom_sf(aes(fill = points_per_cell)) +
  viridis::scale_fill_viridis(option = 'plasma', direction = -1) + 
  labs(fill = 'No. Values \nPer Cell') +
  theme_minimal() +
    facet_wrap(~tod)

#Plot grid w/points

# with all points
grid_shares_sf |>
  ggplot() +
  # draw full grid
  # geom_sf(data = grid_sf, fill = NA, color = "grey") +
  geom_sf(aes(fill = mean)) +
  # add points
  geom_sf(data = points_sf, color = 'black') +
  scale_fill_gradientn(colours = wes) + 
  labs(fill = 'Mean \nB. Temp °C') +
  theme_minimal()

Show full grid

# with full grid 
grid_shares_sf |>
  ggplot() +
  # draw full grid
  geom_sf(data = grid_sf, fill = NA, color = "grey") +
  geom_sf(aes(fill = mean)) +
  geom_sf(data = us.coast, color = 'grey99') +
  labs(fill = 'Mean \nB. Temp °C') +
  scale_fill_gradientn(colours = wes) + 
  coord_sf(xlim = c(-76,-66.0), ylim = c(36,44), datum = sf::st_crs(4326)) +    theme_minimal() 

Create grid function

Create function to generate user-specified grid

# Some bt data that is all together and formatted correctly  
bt <- bt %>% dplyr::rename_all(., .funs = tolower) %>% 
  mutate(year = lubridate::year(gps_datetime),
         month = lubridate::month(gps_datetime),
         week = lubridate::week(gps_datetime),
         doy = lubridate::yday(gps_datetime),
         hour = lubridate::hour(gps_datetime)) %>% 
  rename(lat = latitude, lon = longitude) %>% 
  filter(!is.na(bot_depth_m))

botGrid <- function(bt, minlat, maxlat, minlon, maxlon, yearRange, monthRange, 
                    timeBin){
  bt.agg <- bt %>% 
    filter((between(lat, minlat, maxlat)) & 
             (between(lon, minlon, maxlon)) &
             year %in% yearRange &
             month %in% monthRange) %>%  #& 
    #week %in% weekRange) %>% 
    # this means it will re-grid everytime
    group_by(trip_id, effort_num) %>%
    timetk::summarise_by_time(
      .date_var = gps_datetime, 
      .by       = timeBin,
      min       = min(temp),
      max       = max(temp),
      mean      = mean(temp), 
      sd        = sd(temp), 
      meanlat   = mean(lat),
      meanlon   = mean(lon)
    ) 
  
  points_sf <- sf::st_as_sf(bt.agg, coords = c('meanlon','meanlat'), 
                            crs = 4326)
  
  grid_sf <- sf::st_make_grid(points_sf, cellsize = 0.1) |> 
    sf::st_sf(geometry = _) |>
    mutate(cell_id = row_number(), .before = 1)
  
  shares_df <- sf::st_join(points_sf, grid_sf) |> 
    sf::st_drop_geometry() |>
    group_by(cell_id) |> 
    mutate(points_per_cell = n())
  
  grid_shares_sf <- right_join(grid_sf, shares_df, by = join_by('cell_id'))
  
  return(grid_shares_sf)
}

Ok! Test the function - this is a particular set of lat,lons in 2012 for months May, June, July:

test = botGrid(bt, 37.4812, 42.9546, -74.3995, -66.5988, 2012, c(5:7),
              timeBin = 'hour')

test  %>% 
  ggplot() +
  geom_sf(aes(fill = mean)) +
  scale_fill_gradientn(colours = wes) + 
  labs(fill = 'Mean \nB. Temp °C') +
  theme_minimal() 

Year maps (testing function)

yrs <- c(2009:2019)
#for(i in 1:length(yrs)){
bt.map <- function(yrs){
  
test = botGrid(bt, 37.4812, 42.9546, -74.3995, -66.5988, yearRange = yrs,
               monthRange = c(1:12),
              timeBin = 'hour') 
test  %>% 
  ggplot() +
  geom_sf(aes(fill = mean)) +
  geom_sf(data = us.coast, color = 'grey99') +
  labs(fill = 'Mean \nB. Temp °C') +
  scale_fill_gradientn(colours = wes) + 
  coord_sf(xlim = c(-76,-66.0), ylim = c(37,43), datum = sf::st_crs(4326)) +    theme_minimal() 
 
  }


for(i in 1:length(yrs)){
 cat("\n#####",  as.character(yrs[i]),"\n")
    print(bt.map(yrs[i])) 
    cat("\n")   
}
2009

2010

2011

2012

2013

2014

2015

2016

2017

2018

2019