library(ncdf4)
library(terra)
library(sf)
library(tidyverse)
library(RColorBrewer)
library(wesanderson)
library(dichromat)
source('func_convertdateNcdf2R.R')
## Color Palette
# This is a function, you need to specify the length of scale: jet.colors(n)
jet.colors <-colorRampPalette(c("blue", "#007FFF", "cyan","#7FFF7F",
"yellow", "#FF7F00", "red", "#7F0000"))
Here I create a list of the files, so that I do not have to pull them in individually. The ‘glob2rx’ function will search for the string that matches the pattern you indicated via the argument ‘pattern’ and returns all files with a matching character string The asterisk at the end indicates that you want all files that start with the elements you specified and will ignore whatever comes after the *. You can make this as specific or generalized as you need.
For example, consider you have 20 files with the the following naming convention: “W_202120-AT-R2018-NESGRID-GRAD_SST-BOA-STACKED.nc” and 20 files with a slightly different name such as: “D_202320-AT-R2018-NESGRID-GRAD_SST-BOA-STACKED.nc”
# change the path to the directory you are storing .nc files
path = here::here('data/sst/nc_files')
files <- list.files(path = path, pattern = glob2rx('MM_*.nc'), full.names = TRUE)
Here you can index through the list of files you just pulled in to specify which file you want to work with using the brackets operator,[]. See code below. In order to see the names of the variables included in the NetCDF use the ‘attributes’ function and extract (using the $ operator) the ‘names’.
# Open nc file and save as an object
nc <- nc_open(files[39]) # Open files
attributes(nc$var)$names # here-the fifth element, or names[5], refers to SST_MEAN
## [1] "SST_NUM" "SST_MIN" "SST_MAX" "SST_MED" "SST_MEAN" "SST_STD"
# Extract variables of interest
lat <- ncvar_get(nc, 'latitude')
lon <- ncvar_get(nc, 'longitude')
sst <- ncvar_get(nc, attributes(nc$var)$names[5])
t <- ncvar_get(nc, 'time') # Extract time
tunits <- ncatt_get(nc,'time','units') # identify units for the conversion
splitt <- strsplit(tunits$value, " ") # parse time units for conversion
# convert dates!
ptime <- convertDateNcdf2R(t, unlist(splitt)[1],
origin = as.POSIXct(unlist(splitt)[3], tz = "UTC"),
time.format = "%Y-%m-%d")
ptime <- lubridate::ymd(ptime)
image(sst[,,6])
nc_close(nc)
rm(nc)
First, use the function ‘rast’ from the terra package to turn the variable you extracted from the NetCDF into a multi-layered SpatRRaster object.
Next, make sure you check the orientation of your spatrast object. If it needs adjusting, you can use the transpose function (t), and the ‘flip’ function to correct it. The flip function has an argument called ‘direction’ that needs to be specified. You can specify direction = ‘h’ (horizontal) or ‘v’ (vertical). In the file used for this example, you need to use both.
Set the layer names to reflect the associated dates we extracted earlier.
Save the time component in the spatraster object
Set the extent based on the lat/lons you extracted from the nc file.
Assign a projection by specifying the coordinate reference system (crs)
Finally, save your raster brick as a GeoTIFF file using the ‘writeRaster’ function.
Here is a quick reference on specifying a coordinate reference system in R.
As a check, you can just print the spatraster object name (here, r) to see how many layers it contains and make sure things are going as planned. In this example, the first two dimensions contain coordinates (lat/lons) and the third dimension is the number of layers. Here we should see 12 layers since this raster is created from a nc file of the mean monthly sst for a given year.
Note: the function ‘substr’ is a base R function that extracts or replaces substrings in a character vector. Here we use it to pull out the year of the original .nc file. Automating details like this can be a handy tool to standardize your file names and reduce human error when creating a series of objects across multiple files. However it will need to be adjusted for you own files based on your file path.
You can index through each layer of the raster using square brackets [[i]]. Specifying a number in those brackets will return the layer of the same number.
# Creating your spatraster object
r = terra::flip(t(terra::rast(sst)), direction='vertical')
# Run the raster object you created to see its information
r
## class : SpatRaster
## dimensions : 794, 596, 12 (nrow, ncol, nlyr)
## resolution : 1, 1 (x, y)
## extent : 0, 596, 0, 794 (xmin, xmax, ymin, ymax)
## coord. ref. :
## source(s) : memory
## names : lyr.1, lyr.2, lyr.3, lyr.4, lyr.5, lyr.6, ...
## min values : -1.820007, -0.7099915, -1.470001, 0.1535321, 3.51050, 6.061178, ...
## max values : 25.549988, 24.6761093, 25.069618, 25.3076439, 26.75422, 28.216497, ...
# checking orientation -
terra::plot(r[[5]])
# set the layer names to the dates you extracted earlier
names(r) <- ptime
# save time component in spatraster object
terra::time(r) <- names(r) %>% lubridate::ymd() %>% as.Date()
# Here I am indexing the 5th layer, this should be the month of may
# If everything went correctly, you can check the 'name'- it should read 2019-05-01
r[[5]]
## class : SpatRaster
## dimensions : 794, 596, 1 (nrow, ncol, nlyr)
## resolution : 1, 1 (x, y)
## extent : 0, 596, 0, 794 (xmin, xmax, ymin, ymax)
## coord. ref. :
## source(s) : memory
## name : 2019-05-01
## min value : 3.51050
## max value : 26.75422
## time (days) : 2019-05-01
ext(r) <- c(xmn = min(lon), xmx = max(lon), # set extent
ymn = min(lat), ymx = max(lat))
crs(r) <- 'epsg:4326' # set a projection, WGS 84 has EPSG code 4326
# checking extent (x axis and y axis) for the month of April
terra::plot(r[[4]])
# Save your raster as a geotif:
# (change the start/stop values based on the location of the date in your filename)
terra::writeRaster(r, filename = paste0('mm_sst_',
substr(files[39],
start = 90, stop = 93), '.tif'))
Note: In order to read in a spatial raster file, such as the one you just created and saved (i.e.: ‘raster_you_just_saved.tif’), you need to use the ‘rast’ function again.
# To read in the file you just made:
testing.rast <- terra::rast('mm_sst_2019.tif')
terra::plot(testing.rast[[2]], col = jet.colors(25))
ggplot() +
tidyterra::stat_spatraster(
data = testing.rast[[4]]) +
scale_fill_gradientn(colors = jet.colors(30)) +
coord_sf(xlim = c(-75,-65.5), ylim = c(36,44), datum = sf::st_crs(4326)) +
labs(x = '', y = '', fill = 'SST (°C)', title = 'Mean SST: April 2019') +
theme_bw()
Here we are adding two shapefiles to fill in the USA and Canada areas. Make sure to adjust the path below in the here function to the location where you have your shapefiles - here::here(‘your/directory/here/USA.shp’)
# Shapefile for US and Canada
canada <- sf::st_read(here::here('shapefiles/Canada.shp'), quiet = TRUE)%>% sf::st_set_crs(4326)
usa <- sf::st_read(here::here('shapefiles/USA.shp'), quiet = TRUE)%>% sf::st_set_crs(4326)
ggplot() +
tidyterra::stat_spatraster(
data = testing.rast[[4]]) +
scale_fill_gradientn(colors = jet.colors(30)) +
geom_sf(data = usa %>% st_as_sf(),color = 'gray20', fill = '#cbdbcd') +
geom_sf(data = canada %>% st_as_sf(),color = 'gray20', fill = '#cbdbcd') +
coord_sf(xlim = c(-75,-65.5), ylim = c(36,44), datum = sf::st_crs(4326)) +
labs(x = '', y = '', fill = 'SST (°C)', title = 'Mean SST: April 2019') +
theme_bw()
Here we are adding depth contours (isobaths) to the plot with the marmap package. You can specify which isobaths to include using the “breaks” argument of the geom_contour function.
# Adding bathymetry
bathy <- marmap::getNOAA.bathy(-81,-58, 27, 46)
# Extract bathymetry data in a data.frame
bathy = marmap::fortify.bathy(bathy)
ggplot() +
tidyterra::stat_spatraster(
data = testing.rast[[4]]) +
scale_fill_gradientn(colors = jet.colors(30)) +
geom_sf(data = usa %>% st_as_sf(),color = 'gray20', fill = '#cbdbcd') +
geom_sf(data = canada %>% st_as_sf(),color = 'gray20', fill = '#cbdbcd') +
geom_contour(data = bathy,
aes(x=x,y=y,z=-1*z),
breaks=c(50,100,150,200, Inf),
col = 'darkgrey') +
coord_sf(xlim = c(-75,-65.5), ylim = c(36,44), datum = sf::st_crs(4326)) +
labs(x = '', y = '', fill = 'SST (°C)', title = 'Mean SST: April 2019') +
theme_bw()
Below, we create a dataframe with coordinates as an example. You can call whatever dataframe you have that has coordinates.
To add these to the plot, you can use geom_point. It is important to include the “data =” component. Note that you can add pipe operations within the “data =” call, as is done below if you want to filter the point location data you are using.
You can use “size=” within the (aes = ) to assign the size of the point to a value in a dataframe, if that is relevant to your data set.
Alternatively, you can use “size=” outside the (aes =) call to specify what size you want the points to be. You can also use base pch here to specifiy shape.
# Creating a data frame with a few point locations
locs <- data.frame(lat = c(37.19167,39.30333, 40.02167),
lon = c(-74.76667,-72.39833, -71.11667))
# Plot!
ggplot() +
tidyterra::stat_spatraster(
data = testing.rast[[4]]) +
scale_fill_gradientn(colors = jet.colors(30)) +
geom_sf(data = usa %>% st_as_sf(),color = 'gray20', fill = '#cbdbcd') +
geom_sf(data = canada %>% st_as_sf(),color = 'gray20', fill = '#cbdbcd') +
geom_contour(data = bathy,
aes(x=x,y=y,z=-1*z),
breaks=c(50,100,150,200, Inf),
col = 'darkgrey') +
geom_point(data =locs %>% filter(lat > 33),
aes(x = lon, y = lat), col = 'black', size = 3) +
coord_sf(xlim = c(-75,-65.5), ylim = c(36,44), datum = sf::st_crs(4326)) +
labs(x = '', y = '', fill = 'SST (°C)', title = 'Mean SST: April 2019') +
theme_bw()
The following code will show you how to extract data from a raster. You can extract values (here sst) for point locations or from a particular polygon.
You can extract values at specific coordinates (e.g. fishing location, whale sighting), which can be very useful for high resolution fishing or sighting data.
In this format you will have a value from each layer of the raster (here,date) for each coordinate of interest.
fish_locs_sst <- terra::extract(r, locs[, c('lon', 'lat')])
head(fish_locs_sst)
## ID 2019-01-01 2019-02-01 2019-03-01 2019-04-01 2019-05-01 2019-06-01
## 1 1 10.442499 10.754377 8.546669 11.165600 15.29500 21.55731
## 2 2 9.356659 8.513850 8.555788 11.826663 12.81000 19.12091
## 3 3 9.059996 8.717643 7.512730 7.813001 11.75739 18.36591
## 2019-07-01 2019-08-01 2019-09-01 2019-10-01 2019-11-01 2019-12-01
## 1 26.24035 26.68307 22.70423 20.13381 16.78588 13.77727
## 2 24.46533 25.57577 23.06000 21.83785 18.02176 13.56181
## 3 22.62206 24.80334 22.97481 20.41667 17.18533 11.89444
fish_sst_long <- fish_locs_sst %>%
pivot_longer(
-c(ID),
names_to = 'date',
values_to = 'sst'
)
head(fish_sst_long)
## # A tibble: 6 × 3
## ID date sst
## <dbl> <chr> <dbl>
## 1 1 2019-01-01 10.4
## 2 1 2019-02-01 10.8
## 3 1 2019-03-01 8.55
## 4 1 2019-04-01 11.2
## 5 1 2019-05-01 15.3
## 6 1 2019-06-01 21.6
You can use shape files to extracting data from a polygon. Here we use the bottom trawl survey strata shape file to create a polygon that reflects Golden Tilefish habitat.
bts_strata <- st_read(here::here('shapefiles/NES_BOTTOM_TRAWL_STRATA.shp'),
quiet = TRUE)
# plot(bts_strata) # to see all bottom trawl strata
# sf object
gtf_strata_sf <- bts_strata %>%
filter(STRATUMA %in% c('01030', '01040', '01070', '01080', '01110', '01120',
'01140', '01150', '01670', '01680', '01710', '01720',
'01750', '01760')) # select just the gtf strata
# set projection
st_crs(gtf_strata_sf) <- 'epsg:4326'
Here we use the exact extract function. This function extracts both the value of the intersecting raster cells, and the fraction of the intersecting area relative to the full raster grid, giving you a column called ‘coverage_fraction’. This allows you to weight the value (sst) you are extracting from each pixel based on how much of that pixel contributed to the area of the polygon.
gtf_sst <-
exactextractr::exact_extract(
r,
gtf_strata_sf,
progress = F # this is for not displaying progress bar
)
s_combined <- bind_rows(gtf_sst, .id = 'id') %>%
as_tibble()
# Check out the tibble you just created!
# The first row (id) is the unique id for the particular polygon you are using
head(s_combined)
## # A tibble: 6 × 14
## id `2019-01-01` `2019-02-01` `2019-03-01` `2019-04-01` `2019-05-01`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 5.97 6.13 4.89 5.33 7.93
## 2 1 5.84 6.40 5.02 5.27 8.16
## 3 1 6.03 6.14 4.82 5.20 7.69
## 4 1 5.97 6.13 4.89 5.33 7.93
## 5 1 5.84 6.40 5.02 5.27 8.16
## 6 1 5.50 6.46 5.26 5.36 8.35
## # ℹ 8 more variables: `2019-06-01` <dbl>, `2019-07-01` <dbl>,
## # `2019-08-01` <dbl>, `2019-09-01` <dbl>, `2019-10-01` <dbl>,
## # `2019-11-01` <dbl>, `2019-12-01` <dbl>, coverage_fraction <dbl>
# In this example, we have 14 strata, so there will be 14 unique ids.
unique(s_combined$id)
## [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "13" "14"
Next we want to pivot the data to a longer format which is often a useful way to work with the data.
# Pivot to a longer format
gtf_sst_long <- pivot_longer(
s_combined,
-c(id, coverage_fraction),
names_to = 'ymd',
values_to = 'sst'
)
# These lines split up the dates, this is useful if you want to isolate
# particular subsets of time in your data.
sst_ts_gtf <- gtf_sst_long %>% janitor::clean_names() %>%
separate(ymd, into = c('year', 'month', 'day'), sep = "\\-", convert = TRUE)
The code below allows you to calculate some simple summary statistics on your time series and save the file.
# -- Calculate stats and save! Full Stat-Area -- #
sst_ts_gtf_stats <- sst_ts_gtf %>%
group_by(year,month) %>%
summarize(weighted_mean_sst = sum(na.omit(sst) * na.omit(coverage_fraction)) / sum(na.omit(coverage_fraction)),
mean_sst = mean(na.omit(sst)),
median_sst = median(na.omit(sst)),
sd_sst = sd(na.omit(sst)),
max_sst = max(na.omit(sst)),
min_sst = min(na.omit(sst)))
# Save your file!
write.csv(sst_ts_gtf_stats, 'sst_monthly_ts_gtf.csv', row.names = F)
Let’s take a quick look at your time series!
ggplot(sst_ts_gtf_stats, aes(x = month, y = mean_sst))+
geom_line(lwd = 1) +
geom_point(aes(x = month, fill = mean_sst), shape = 21, size = 4) +
geom_errorbar(aes(ymin = mean_sst - sd_sst, ymax = mean_sst + sd_sst),
width = .2, position = position_dodge(.9)) +
scale_fill_gradientn(colors = jet.colors(30)) +
labs(x = 'Month', y = 'Mean SST', fill = 'SST') +
scale_x_continuous(breaks = 1:12)+
theme_bw() +
theme(legend.position = c(0.92, 0.8)) +
theme(legend.background = element_rect(linetype="solid",
colour ="grey20")) +
theme(legend.key.size = unit(0.5, 'cm')) +
theme(axis.text = element_text(size = 13),
axis.title = element_text(size = 15),
axis.ticks.length = unit(-1.4, "mm"))
Variations on a theme ….
# -- Calculate stats and save! Indv. stat area -- #
sst_ts_gtf2 <- sst_ts_gtf %>%
group_by(id,year,month) %>%
summarize(weighted_mean_sst = sum(na.omit(sst) * na.omit(coverage_fraction)) / sum(na.omit(coverage_fraction)),
mean_sst = mean(na.omit(sst)),
median_sst = median(na.omit(sst)),
sd_sst = sd(na.omit(sst)),
max_sst = max(na.omit(sst)),
min_sst = min(na.omit(sst)))
write.csv(sst_ts_gtf2, 'sst_monthly_ts_gtf_indv_substrata.csv', row.names = F)
# North/South strata
sst_ts_gtf3 <- sst_ts_gtf %>%
mutate(
substrat = case_when(
id %in% c(1:6) ~ 'n_strata',
id %in% c(7:14) ~ 's_strata')) %>%
group_by(substrat,year,month) %>%
summarize(weighted_mean_sst = sum(na.omit(sst) * na.omit(coverage_fraction)) / sum(na.omit(coverage_fraction)),
mean_sst = mean(na.omit(sst)),
median_sst = median(na.omit(sst)),
sd_sst = sd(na.omit(sst)),
max_sst = max(na.omit(sst)),
min_sst = min(na.omit(sst)))
write.csv(sst_ts_gtf3, 'sst_monthly_ts_gtf_n_v_s_substrata.csv', row.names = F)
# A quick look at your time series
ggplot(sst_ts_gtf3, aes(x = month, y = mean_sst))+
geom_line(lwd = 1) +
geom_point(aes(x = month, fill = mean_sst), shape = 21, size = 4) +
geom_errorbar(aes(ymin = mean_sst - sd_sst, ymax = mean_sst + sd_sst),
width = .2, position = position_dodge(.9)) +
scale_fill_gradientn(colors = jet.colors(30)) +
labs(x = 'Month', y = 'Mean SST', fill = 'SST') +
scale_x_continuous(breaks = 1:12) +
facet_wrap(~substrat) +
theme_bw()+
theme(axis.text = element_text(size = 13),
axis.title = element_text(size = 15),
axis.ticks.length = unit(-1.4, "mm"))