The following chunk sets up connections to nefscdb1 using ROracle. Note, that you need to be plugged into the network or on VPN.
library(ROracle)
source(here::here('dbconnection.R'))
# source(here::here('coord_conversion.R'))
# source(here::here('app/functions/func_convertdateNcdf2R.R'))Here I am pulling the Study Fleet bottom temperatures (Schema: NERS, table: GTE_JOIN). This is the raw data, has GPS polling/temp data ~ every 90 seconds, from 2007 to present. This will be a big file and take a bit of time to pull.
# pulling in SF data from GTE Join w/best quality data, gte_code == 92
# bt <- dbGetQuery(con.db1,"select * from  NERS.GTE_JOIN
#                           where TOW_EVENT_CODE ='92'") # this didnt work 
bt <- dbGetQuery(con.db1,"select * from NERS.GTE_JOIN")
# unique(bt$TOW_EVENT_CODE)
# save data bc pull takes a long time
saveRDS(bt, 'sf_bt.rds')
# Pull the 1 deg. min 1 hr binned data (NERS_TD_DATA_LMIN_BIN)
bt.bin <- dbGetQuery(con.db1,"select * from NERS.GTE_JOIN")library(tidyverse)
library(timetk)
library(sf) 
library(dplyr)
bt <- readRDS('bt.rds')
load(here::here('nes_map.Rdata'))
wes <- wesanderson::wes_palette("Zissou1", 21, type = "continuous")Next, bin the data by hour for each trip and haul.
# Bin data by hour 
bt.agg <- bt %>% dplyr::rename_all(., .funs = tolower) %>% 
   mutate(hour = lubridate::hour(gps_datetime)) %>% 
  filter(!is.na(bot_depth_m)) %>% 
  group_by(trip_id, effort_num) %>% 
  # Summarize
  timetk::summarise_by_time(
    .date_var = gps_datetime, 
    .by       = 'hour',
    min       = min(temp),
    max       = max(temp),
    mean      = mean(temp), 
    sd        = sd(temp),
    meanlat   = mean(latitude),
    meanlon   = mean(longitude)
  ) Testing just for a single year (2009).
bt.09 <- bt %>% dplyr::rename_all(., .funs = tolower) %>% 
  mutate(year = lubridate::year(gps_datetime),
         month = lubridate::month(gps_datetime),
         week = lubridate::week(gps_datetime),
         doy = lubridate::yday(gps_datetime),
         hour = lubridate::hour(gps_datetime)) %>% 
  filter(year == 2009 & !is.na(bot_depth_m))
# create sf object
points_sf09 <- sf::st_as_sf(bt.09, coords = c('longitude','latitude'), crs = 4326)
# plot to check 
ggplot() +
  geom_sf(data = points_sf09) + 
  theme_minimal()bt.09agg <- bt.09 %>% 
  group_by(trip_id, effort_num) %>% # user filters by time ahead of this? 
  # Summarize
  timetk::summarise_by_time(
    .date_var = gps_datetime, 
    .by       = 'hour',
    min       = min(temp),
    max       = max(temp),
    mean      = mean(temp), 
    sd        = sd(temp), 
    minlat    = min(latitude),
    maxlat    = max(latitude),
    minlon    = min(longitude),
    maxlon    = max(longitude),
    meanlat   = mean(latitude),
    meanlon   = mean(longitude)
  ) 
points_sf <- sf::st_as_sf(bt.09agg, coords = c('meanlon','meanlat'), crs = 4326)# 1/12 degree ~ 9km
grid_sf <- sf::st_make_grid(points_sf, cellsize = 0.1) |> # 10000 #2.660 0.3333 (0.1 ~ 1/10 degree, ~7km)
  sf::st_sf(geometry = _) |>
  mutate(cell_id = row_number(), .before = 1)
# checks
# plot(grid_sf)Join data, matching cell-id to associated data points. Make sure to keep track of number of data points informing a cell.
# spatial join to match cell_id-s to points,
shares_df <- sf::st_join(points_sf, grid_sf) |>
  sf::st_drop_geometry() |>
  # number of points per cell (mutate, row count remains the same)
  group_by(cell_id) |>
  mutate(points_per_cell = n(), 
         mean.t = mean(mean), 
         sd.t = sd(sd))
# testing to see if this will select just the first value associated 
# with each cell to see what is actually being plotted 
shares_df<- shares_df %>%
group_by(cell_id) %>%
mutate(is_first = mean == first(mean), 
       first.val = first(mean), 
       med.val = median(mean))
# join grid to shares for plotting
grid_shares_sf <- right_join(grid_sf, shares_df, by = join_by('cell_id'))Plot gridded data. Here I look at the mean, median, and sd of the data within each grid cell. Also trying to figure out what R plots when you just plot the grid without calculating a particular function (still a mystery).
grid_shares_sf |>
  ggplot() +
  geom_sf(aes(fill = mean)) +
  scale_fill_gradientn(colours = wes) + 
  labs(fill = 'Mean \nB. Temp °C',
       title = 'Uknown value - First val per cell?') +
  theme_minimal() grid_shares_sf |>
  ggplot() +
  geom_sf(aes(fill = med.val)) +
  scale_fill_gradientn(colours = wes) + 
  labs(fill = 'Mean \nB. Temp °C',
       title = 'Median per cell') +
  theme_minimal() grid_shares_sf |>
  ggplot() +
  geom_sf(aes(fill = first.val)) +
  scale_fill_gradientn(colours = wes) + 
  labs(fill = 'Mean \nB. Temp °C',
       title = 'For sure first val per cell') +
  theme_minimal() grid_shares_sf |>
  ggplot() +
  geom_sf(aes(fill = mean.t)) +
  scale_fill_gradientn(colours = wes) + 
  labs(fill = 'Mean \nB. Temp °C',
       title = 'Average of all vals per cell') +
  theme_minimal() grid_shares_sf |>
  filter(!is.na(sd.t)) |>
  ggplot() +
  geom_sf(aes(fill = sd.t)) +
  viridis::scale_fill_viridis(option = 'plasma', direction = -1) + 
  labs(fill = 'Mean \nB. Temp °C',
       title = 'Sd of all vals per cell') +
  theme_minimal() Rabbit hole of day vs night values. Day is classified is as 0600 to 1800 and night is classified as 1900 to 0500.
points_sf <- points_sf |>
  mutate(tod = case_when(lubridate::hour(gps_datetime) >= 06 &
                         lubridate::hour(gps_datetime) <= 18~ 'Day', 
                                               .default = 'Night'))
  
  
  
  
# spatial join to match cell_id-s to points,
shares_df <- sf::st_join(points_sf, grid_sf) |>
  sf::st_drop_geometry() |>
  # number of points per cell (mutate, row count remains the same)
  group_by(cell_id, tod) |>
  mutate(points_per_cell = n(), 
         mean.t = mean(mean), 
         sd.t = sd(sd)) 
# join grid to shares for plotting
grid_shares_sf <- right_join(grid_sf, shares_df, by = join_by('cell_id'))
grid_shares_sf |>
  ggplot() +
  geom_sf(aes(fill = mean.t)) +
  scale_fill_gradientn(colours = wes) + 
  labs(fill = 'Mean \nB. Temp °C',
       title = 'Average of all vals per cell') +
  theme_minimal() +
  facet_wrap(~tod)grid_shares_sf |>
  filter(!is.na(sd.t)) |>
  ggplot() +
  geom_sf(aes(fill = sd.t)) +
  labs(fill = 'Sd \nB. Temp °C',
       title = 'SD of all vals per cell') +
  viridis::scale_fill_viridis(option = 'plasma', direction = -1) + 
  theme_minimal() +
  facet_wrap(~tod)Plot number of data points informing each grid cell
# Map of number of values per cell 
grid_shares_sf |>
  ggplot() +
  geom_sf(aes(fill = points_per_cell)) +
  viridis::scale_fill_viridis(option = 'plasma', direction = -1) + 
  labs(fill = 'No. Values \nPer Cell') +
  theme_minimal() Plot number of data points informing each grid cell night versus day
# Map of number of values per cell 
grid_shares_sf |>
  ggplot() +
  geom_sf(aes(fill = points_per_cell)) +
  viridis::scale_fill_viridis(option = 'plasma', direction = -1) + 
  labs(fill = 'No. Values \nPer Cell') +
  theme_minimal() +
    facet_wrap(~tod)#Plot grid w/points
# with all points
grid_shares_sf |>
  ggplot() +
  # draw full grid
  # geom_sf(data = grid_sf, fill = NA, color = "grey") +
  geom_sf(aes(fill = mean)) +
  # add points
  geom_sf(data = points_sf, color = 'black') +
  scale_fill_gradientn(colours = wes) + 
  labs(fill = 'Mean \nB. Temp °C') +
  theme_minimal()Show full grid
# with full grid 
grid_shares_sf |>
  ggplot() +
  # draw full grid
  geom_sf(data = grid_sf, fill = NA, color = "grey") +
  geom_sf(aes(fill = mean)) +
  geom_sf(data = us.coast, color = 'grey99') +
  labs(fill = 'Mean \nB. Temp °C') +
  scale_fill_gradientn(colours = wes) + 
  coord_sf(xlim = c(-76,-66.0), ylim = c(36,44), datum = sf::st_crs(4326)) +    theme_minimal() Create function to generate user-specified grid
# Some bt data that is all together and formatted correctly  
bt <- bt %>% dplyr::rename_all(., .funs = tolower) %>% 
  mutate(year = lubridate::year(gps_datetime),
         month = lubridate::month(gps_datetime),
         week = lubridate::week(gps_datetime),
         doy = lubridate::yday(gps_datetime),
         hour = lubridate::hour(gps_datetime)) %>% 
  rename(lat = latitude, lon = longitude) %>% 
  filter(!is.na(bot_depth_m))
botGrid <- function(bt, minlat, maxlat, minlon, maxlon, yearRange, monthRange, 
                    timeBin){
  bt.agg <- bt %>% 
    filter((between(lat, minlat, maxlat)) & 
             (between(lon, minlon, maxlon)) &
             year %in% yearRange &
             month %in% monthRange) %>%  #& 
    #week %in% weekRange) %>% 
    # this means it will re-grid everytime
    group_by(trip_id, effort_num) %>%
    timetk::summarise_by_time(
      .date_var = gps_datetime, 
      .by       = timeBin,
      min       = min(temp),
      max       = max(temp),
      mean      = mean(temp), 
      sd        = sd(temp), 
      meanlat   = mean(lat),
      meanlon   = mean(lon)
    ) 
  
  points_sf <- sf::st_as_sf(bt.agg, coords = c('meanlon','meanlat'), 
                            crs = 4326)
  
  grid_sf <- sf::st_make_grid(points_sf, cellsize = 0.1) |> 
    sf::st_sf(geometry = _) |>
    mutate(cell_id = row_number(), .before = 1)
  
  shares_df <- sf::st_join(points_sf, grid_sf) |> 
    sf::st_drop_geometry() |>
    group_by(cell_id) |> 
    mutate(points_per_cell = n())
  
  grid_shares_sf <- right_join(grid_sf, shares_df, by = join_by('cell_id'))
  
  return(grid_shares_sf)
}Ok! Test the function - this is a particular set of lat,lons in 2012 for months May, June, July:
test = botGrid(bt, 37.4812, 42.9546, -74.3995, -66.5988, 2012, c(5:7),
              timeBin = 'hour')
test  %>% 
  ggplot() +
  geom_sf(aes(fill = mean)) +
  scale_fill_gradientn(colours = wes) + 
  labs(fill = 'Mean \nB. Temp °C') +
  theme_minimal() yrs <- c(2009:2019)
#for(i in 1:length(yrs)){
bt.map <- function(yrs){
  
test = botGrid(bt, 37.4812, 42.9546, -74.3995, -66.5988, yearRange = yrs,
               monthRange = c(1:12),
              timeBin = 'hour') 
test  %>% 
  ggplot() +
  geom_sf(aes(fill = mean)) +
  geom_sf(data = us.coast, color = 'grey99') +
  labs(fill = 'Mean \nB. Temp °C') +
  scale_fill_gradientn(colours = wes) + 
  coord_sf(xlim = c(-76,-66.0), ylim = c(37,43), datum = sf::st_crs(4326)) +    theme_minimal() 
 
  }
for(i in 1:length(yrs)){
 cat("\n#####",  as.character(yrs[i]),"\n")
    print(bt.map(yrs[i])) 
    cat("\n")   
}