The following chunk sets up connections to nefscdb1 using ROracle. Note, that you need to be plugged into the network or on VPN.
library(ROracle)
source(here::here('dbconnection.R'))
# source(here::here('coord_conversion.R'))
# source(here::here('app/functions/func_convertdateNcdf2R.R'))
Here I am pulling the Study Fleet bottom temperatures (Schema: NERS, table: GTE_JOIN). This is the raw data, has GPS polling/temp data ~ every 90 seconds, from 2007 to present. This will be a big file and take a bit of time to pull.
# pulling in SF data from GTE Join w/best quality data, gte_code == 92
# bt <- dbGetQuery(con.db1,"select * from NERS.GTE_JOIN
# where TOW_EVENT_CODE ='92'") # this didnt work
bt <- dbGetQuery(con.db1,"select * from NERS.GTE_JOIN")
# unique(bt$TOW_EVENT_CODE)
# save data bc pull takes a long time
saveRDS(bt, 'sf_bt.rds')
# Pull the 1 deg. min 1 hr binned data (NERS_TD_DATA_LMIN_BIN)
bt.bin <- dbGetQuery(con.db1,"select * from NERS.GTE_JOIN")
library(tidyverse)
library(timetk)
library(sf)
library(dplyr)
bt <- readRDS('bt.rds')
load(here::here('nes_map.Rdata'))
wes <- wesanderson::wes_palette("Zissou1", 21, type = "continuous")
Next, bin the data by hour for each trip and haul.
# Bin data by hour
bt.agg <- bt %>% dplyr::rename_all(., .funs = tolower) %>%
mutate(hour = lubridate::hour(gps_datetime)) %>%
filter(!is.na(bot_depth_m)) %>%
group_by(trip_id, effort_num) %>%
# Summarize
timetk::summarise_by_time(
.date_var = gps_datetime,
.by = 'hour',
min = min(temp),
max = max(temp),
mean = mean(temp),
sd = sd(temp),
meanlat = mean(latitude),
meanlon = mean(longitude)
)
Testing just for a single year (2009).
bt.09 <- bt %>% dplyr::rename_all(., .funs = tolower) %>%
mutate(year = lubridate::year(gps_datetime),
month = lubridate::month(gps_datetime),
week = lubridate::week(gps_datetime),
doy = lubridate::yday(gps_datetime),
hour = lubridate::hour(gps_datetime)) %>%
filter(year == 2009 & !is.na(bot_depth_m))
# create sf object
points_sf09 <- sf::st_as_sf(bt.09, coords = c('longitude','latitude'), crs = 4326)
# plot to check
ggplot() +
geom_sf(data = points_sf09) +
theme_minimal()
bt.09agg <- bt.09 %>%
group_by(trip_id, effort_num) %>% # user filters by time ahead of this?
# Summarize
timetk::summarise_by_time(
.date_var = gps_datetime,
.by = 'hour',
min = min(temp),
max = max(temp),
mean = mean(temp),
sd = sd(temp),
minlat = min(latitude),
maxlat = max(latitude),
minlon = min(longitude),
maxlon = max(longitude),
meanlat = mean(latitude),
meanlon = mean(longitude)
)
points_sf <- sf::st_as_sf(bt.09agg, coords = c('meanlon','meanlat'), crs = 4326)
# 1/12 degree ~ 9km
grid_sf <- sf::st_make_grid(points_sf, cellsize = 0.1) |> # 10000 #2.660 0.3333 (0.1 ~ 1/10 degree, ~7km)
sf::st_sf(geometry = _) |>
mutate(cell_id = row_number(), .before = 1)
# checks
# plot(grid_sf)
Join data, matching cell-id to associated data points. Make sure to keep track of number of data points informing a cell.
# spatial join to match cell_id-s to points,
shares_df <- sf::st_join(points_sf, grid_sf) |>
sf::st_drop_geometry() |>
# number of points per cell (mutate, row count remains the same)
group_by(cell_id) |>
mutate(points_per_cell = n(),
mean.t = mean(mean),
sd.t = sd(sd))
# testing to see if this will select just the first value associated
# with each cell to see what is actually being plotted
shares_df<- shares_df %>%
group_by(cell_id) %>%
mutate(is_first = mean == first(mean),
first.val = first(mean),
med.val = median(mean))
# join grid to shares for plotting
grid_shares_sf <- right_join(grid_sf, shares_df, by = join_by('cell_id'))
Plot gridded data. Here I look at the mean, median, and sd of the data within each grid cell. Also trying to figure out what R plots when you just plot the grid without calculating a particular function (still a mystery).
grid_shares_sf |>
ggplot() +
geom_sf(aes(fill = mean)) +
scale_fill_gradientn(colours = wes) +
labs(fill = 'Mean \nB. Temp °C',
title = 'Uknown value - First val per cell?') +
theme_minimal()
grid_shares_sf |>
ggplot() +
geom_sf(aes(fill = med.val)) +
scale_fill_gradientn(colours = wes) +
labs(fill = 'Mean \nB. Temp °C',
title = 'Median per cell') +
theme_minimal()
grid_shares_sf |>
ggplot() +
geom_sf(aes(fill = first.val)) +
scale_fill_gradientn(colours = wes) +
labs(fill = 'Mean \nB. Temp °C',
title = 'For sure first val per cell') +
theme_minimal()
grid_shares_sf |>
ggplot() +
geom_sf(aes(fill = mean.t)) +
scale_fill_gradientn(colours = wes) +
labs(fill = 'Mean \nB. Temp °C',
title = 'Average of all vals per cell') +
theme_minimal()
grid_shares_sf |>
filter(!is.na(sd.t)) |>
ggplot() +
geom_sf(aes(fill = sd.t)) +
viridis::scale_fill_viridis(option = 'plasma', direction = -1) +
labs(fill = 'Mean \nB. Temp °C',
title = 'Sd of all vals per cell') +
theme_minimal()
Rabbit hole of day vs night values. Day is classified is as 0600 to 1800 and night is classified as 1900 to 0500.
points_sf <- points_sf |>
mutate(tod = case_when(lubridate::hour(gps_datetime) >= 06 &
lubridate::hour(gps_datetime) <= 18~ 'Day',
.default = 'Night'))
# spatial join to match cell_id-s to points,
shares_df <- sf::st_join(points_sf, grid_sf) |>
sf::st_drop_geometry() |>
# number of points per cell (mutate, row count remains the same)
group_by(cell_id, tod) |>
mutate(points_per_cell = n(),
mean.t = mean(mean),
sd.t = sd(sd))
# join grid to shares for plotting
grid_shares_sf <- right_join(grid_sf, shares_df, by = join_by('cell_id'))
grid_shares_sf |>
ggplot() +
geom_sf(aes(fill = mean.t)) +
scale_fill_gradientn(colours = wes) +
labs(fill = 'Mean \nB. Temp °C',
title = 'Average of all vals per cell') +
theme_minimal() +
facet_wrap(~tod)
grid_shares_sf |>
filter(!is.na(sd.t)) |>
ggplot() +
geom_sf(aes(fill = sd.t)) +
labs(fill = 'Sd \nB. Temp °C',
title = 'SD of all vals per cell') +
viridis::scale_fill_viridis(option = 'plasma', direction = -1) +
theme_minimal() +
facet_wrap(~tod)
Plot number of data points informing each grid cell
# Map of number of values per cell
grid_shares_sf |>
ggplot() +
geom_sf(aes(fill = points_per_cell)) +
viridis::scale_fill_viridis(option = 'plasma', direction = -1) +
labs(fill = 'No. Values \nPer Cell') +
theme_minimal()
Plot number of data points informing each grid cell night versus day
# Map of number of values per cell
grid_shares_sf |>
ggplot() +
geom_sf(aes(fill = points_per_cell)) +
viridis::scale_fill_viridis(option = 'plasma', direction = -1) +
labs(fill = 'No. Values \nPer Cell') +
theme_minimal() +
facet_wrap(~tod)
#Plot grid w/points
# with all points
grid_shares_sf |>
ggplot() +
# draw full grid
# geom_sf(data = grid_sf, fill = NA, color = "grey") +
geom_sf(aes(fill = mean)) +
# add points
geom_sf(data = points_sf, color = 'black') +
scale_fill_gradientn(colours = wes) +
labs(fill = 'Mean \nB. Temp °C') +
theme_minimal()
Show full grid
# with full grid
grid_shares_sf |>
ggplot() +
# draw full grid
geom_sf(data = grid_sf, fill = NA, color = "grey") +
geom_sf(aes(fill = mean)) +
geom_sf(data = us.coast, color = 'grey99') +
labs(fill = 'Mean \nB. Temp °C') +
scale_fill_gradientn(colours = wes) +
coord_sf(xlim = c(-76,-66.0), ylim = c(36,44), datum = sf::st_crs(4326)) + theme_minimal()
Create function to generate user-specified grid
# Some bt data that is all together and formatted correctly
bt <- bt %>% dplyr::rename_all(., .funs = tolower) %>%
mutate(year = lubridate::year(gps_datetime),
month = lubridate::month(gps_datetime),
week = lubridate::week(gps_datetime),
doy = lubridate::yday(gps_datetime),
hour = lubridate::hour(gps_datetime)) %>%
rename(lat = latitude, lon = longitude) %>%
filter(!is.na(bot_depth_m))
botGrid <- function(bt, minlat, maxlat, minlon, maxlon, yearRange, monthRange,
timeBin){
bt.agg <- bt %>%
filter((between(lat, minlat, maxlat)) &
(between(lon, minlon, maxlon)) &
year %in% yearRange &
month %in% monthRange) %>% #&
#week %in% weekRange) %>%
# this means it will re-grid everytime
group_by(trip_id, effort_num) %>%
timetk::summarise_by_time(
.date_var = gps_datetime,
.by = timeBin,
min = min(temp),
max = max(temp),
mean = mean(temp),
sd = sd(temp),
meanlat = mean(lat),
meanlon = mean(lon)
)
points_sf <- sf::st_as_sf(bt.agg, coords = c('meanlon','meanlat'),
crs = 4326)
grid_sf <- sf::st_make_grid(points_sf, cellsize = 0.1) |>
sf::st_sf(geometry = _) |>
mutate(cell_id = row_number(), .before = 1)
shares_df <- sf::st_join(points_sf, grid_sf) |>
sf::st_drop_geometry() |>
group_by(cell_id) |>
mutate(points_per_cell = n())
grid_shares_sf <- right_join(grid_sf, shares_df, by = join_by('cell_id'))
return(grid_shares_sf)
}
Ok! Test the function - this is a particular set of lat,lons in 2012 for months May, June, July:
test = botGrid(bt, 37.4812, 42.9546, -74.3995, -66.5988, 2012, c(5:7),
timeBin = 'hour')
test %>%
ggplot() +
geom_sf(aes(fill = mean)) +
scale_fill_gradientn(colours = wes) +
labs(fill = 'Mean \nB. Temp °C') +
theme_minimal()
yrs <- c(2009:2019)
#for(i in 1:length(yrs)){
bt.map <- function(yrs){
test = botGrid(bt, 37.4812, 42.9546, -74.3995, -66.5988, yearRange = yrs,
monthRange = c(1:12),
timeBin = 'hour')
test %>%
ggplot() +
geom_sf(aes(fill = mean)) +
geom_sf(data = us.coast, color = 'grey99') +
labs(fill = 'Mean \nB. Temp °C') +
scale_fill_gradientn(colours = wes) +
coord_sf(xlim = c(-76,-66.0), ylim = c(37,43), datum = sf::st_crs(4326)) + theme_minimal()
}
for(i in 1:length(yrs)){
cat("\n#####", as.character(yrs[i]),"\n")
print(bt.map(yrs[i]))
cat("\n")
}