ILXSM stats and maps

Goal of this document

This document aims to summarize and visualize ILXSM data, a cooperative research effort to collect paired length and weight data at processing facilities.

Data wrangling

The first step is to read in the most recent data file and load necessary packages.

# sets the working directory where the data lives
setwd(here::here('data/ilxsm'))
# Note the previous version was 'ILXSM_Pull_01_25_23.csv'
# Most updated data from Thomas on 2/14/23
lw_data <- read.csv('Entire_Database_Pull_ILXSM_2.14.23.csv')

Next step is to wrangle the data a bit to include date variables of interest and identify the unique stat areas that are in this file.

The unique statistical areas in this set are as follows:

# Make sure all data are in same units, if not convert
# unique(lw_data$UNIT_MEASURE)
lw_data <- lw_data %>%
  mutate(PARAM_VALUE_NUM = case_when(PARAM_TYPE == 'ML' & UNIT_MEASURE == 'CM' ~ PARAM_VALUE_NUM * 10, 
                            PARAM_TYPE == 'ML' & UNIT_MEASURE == 'MM'~ PARAM_VALUE_NUM, 
                            PARAM_TYPE == 'WT' & UNIT_MEASURE == 'GM' ~ PARAM_VALUE_NUM))
lw_data$LAND_DATE <-lubridate::dmy(lw_data$LAND_DATE)
lw_data <- lw_data %>%
  mutate(year = lubridate::year(LAND_DATE),
         month = lubridate::month(LAND_DATE),
         week = lubridate::week(LAND_DATE), 
         day = lubridate::day(LAND_DATE))
# setwd(here::here()) # sets the current working directory
unique_areas <- sort(unique(lw_data$AREA_CODE))
unique_areas

##  [1]   0 500 526 534 537 541 562 600 610 615 616 620 621 622 623 624 626 631 632
## [20] 633

## -- Notes from Thomas about area codes -- ## 
# AREA_CODE == 000 is all stat areas. 
# The 600 series are AREA_CODE identified from 601 to 699. 
# At this point in time AREA_CODE = 639 is the largest. 
# The micro level series end in tenths. 
# Example #1:  610, 620 630, ect. 
# Example #2: The 610 series is comprised of stat areas 611:619.

Next, I identify latitude and longitude for the center point of each stat area, so that I can plot the locations in a map. These coordinates do not indicate the actual fishing locations, but rather are the centerpoint derived from the map of statistical areas available at the GARFO website: https://www.fisheries.noaa.gov/resource/map/greater-atlantic-region-statistical-areas.

# Generate a dataframe that pairs a lat lon with each stat area code. This is 
# roughly the centroid of that polygon. I did this manually by looking at the 
# GARFO map but there is likely a more sophisticated way to do this with a 
# function in the sf package. 

# Put placeholders in 0, 500, 600, 610, 620
stat_areas <- data.frame(area_code = sort(unique(lw_data$AREA_CODE)), 
                         lat = c(0, 0, 40.50, 39.5, 40.5, 39.5,
                                 41.25, 0, 0, 39.5, 39.5, 0, 38.5,
                                 38.5, 38.5, 38.5, 37.5,
                                 36.5, 36.5, 36.5), 
                         lon = c(0, 0, -69.5, -70.50, -70.75, -69.5,
                                 -66.75, 0, 0, -73.5,-72.35, 0, -74.5,
                                 -73.5, -72.5, -71.0,-74.5,
                                 -75.25, -74.5, -73.5))


# Note, that I substituted zeros for the stat areas 0 and 600, so that I can keep
# those mixed areas for tallying the number of measurements. These can later be 
# removed for analyses depending on the question at hand. 

lw_tally_full <- lw_data %>% 
  group_by(AREA_CODE) %>%
  summarise(n = length(ORGANISM_ID))

lw_tally_full$lat <- NA
lw_tally_full$lon <- NA
for (i in 1:length(stat_areas$area_code)){
  a <- subset(lw_tally_full, AREA_CODE == unique_areas[i])
  locs <- which(lw_tally_full$AREA_CODE %in% a$AREA_CODE)
  b <- subset(stat_areas, area_code == unique_areas[i], select = 'lat')
  c <- subset(stat_areas, area_code == unique_areas[i], select = 'lon')
  lw_tally_full[locs,3] <- b$lat
  lw_tally_full[locs,4] <- c$lon
}
# Remove rows with placeholders for 0, 500, 600, 610, 620 since they are not specific to a unique stat area
lw_tally <- lw_data %>% 
  group_by(AREA_CODE, year) %>%
  summarise(n = length(ORGANISM_ID))

lw_tally <- lw_tally[-c(1:4,12:14, 19:20),] # this is not an 
# ideal way to do this - will change year to a param in quarto
lw21 = lw_tally %>% filter(year ==2021)
lw22 = lw_tally %>% filter(year ==2022)
lw23 = lw_tally %>% filter(year ==2023)




## ---- Seperate data into lengths and weights by year ---- ##

wt_full <- lw_data %>%
  filter(PARAM_TYPE == 'WT') %>%
  rename(weight = PARAM_VALUE_NUM)
ml_full <- lw_data %>%
  filter(PARAM_TYPE == 'ML') %>%
  rename(length = PARAM_VALUE_NUM)

ml_21 <- ml_full %>%
  filter(year == 2021) 
ml_22 <- ml_full %>%
  filter(year == 2022) 
wt_21 <- wt_full %>%
  filter(year == 2021) 
wt_22 <- wt_full %>%
  filter(year == 2022) 

# Consolidate the dataset to just samples that have paired weight/length values
ml.full.c <- ml_full %>% select(LAND_DATE,AREA_CODE,ORGANISM_ID, 
                              length,VESSEL_NAME, VTR_SERIAL_NUM)

wt.full.c <- wt_full %>% select(LAND_DATE,AREA_CODE,ORGANISM_ID, 
                              weight,VESSEL_NAME, VTR_SERIAL_NUM)

lw_paired <- right_join(ml.full.c, wt.full.c, 
                           by = c('LAND_DATE','AREA_CODE','ORGANISM_ID', 
                                 'VESSEL_NAME', 'VTR_SERIAL_NUM'))

lw_paired <- na.omit(lw_paired)
lw_paired <- lw_paired %>% relocate(weight, .after = length)

lw_paired <- lw_paired %>%
  mutate(year = lubridate::year(LAND_DATE),
         month = lubridate::month(LAND_DATE),
         week = lubridate::week(LAND_DATE), 
         day = lubridate::day(LAND_DATE))

Stat areas listed as 000 or 600 represent mixed areas. The analyses below do not include any mixed area data entries. Thomas and I are currently developing a protocol for including these data going forward.

Quick list of the number of individual squid measured in each of these mixed area codes

There are currently 2542 data entries from mixed area 00
There are currently 4038 data entries from mixed area 500
There are currently 19857 data entries from mixed area 600
There are currently 307 data entries from mixed area 610
There are currently 23987 data entries from mixed area 620

Data visualization

The full data set has 196452 data entries, from 15 different statistical areas, across 3 years. Of the 196452 data entries across all years, there are paired length and weight measurements for a total of 69300 individuals. individuals. At the time of this report, the number of individual squid with paired measurements for each year is as follows:

2021: 42106
2022: 27114
2023: 80

The number of squid measured at each site are shown below for each year:

ggplot(lw_tally, aes(x = AREA_CODE, y = n, color = factor(year))) + 
  geom_line(cex = 1.2) +
  geom_point(size = 2.5) +
  labs(x = 'Statistical Area', y = 'Number of samples (n)') +
  labs(color = 'Year') +
  scale_color_manual(values=c('gold1','slateblue3','violetred1'))+
  ecodata::theme_ts()

Here is a map of the statistical areas from which organisms were sampled across all years available in data set. The size of the data point is representative of the number of samples recorded in each statistical area.

## Bring in shapefiles
wd = here::here("shapefiles")
stat_areas_sp <- rgdal::readOGR(wd,'groundfish_stat_areas', verbose = FALSE) 
proj4string(stat_areas_sp) <- CRS("+init=epsg:4326")
# 526 534 537 541 562 600* 615 616 621 622 623 624 626 631 632 633
# Subset the shape file for just the stat areas that match the ILXSM data
stat_areas2 <- stat_areas_sp[na.omit(stat_areas_sp@data$Id %in% c('526','534',
                                                                  '537','541',
                                                                  '562',
                                                                  '615','616', 
                                                                  '621','622',
                                                                  '623', '624',
                                                                  '626', '631',
                                                                  '632', '633')),]
US.areas <- rgdal::readOGR(wd, 'USA', verbose = FALSE)
proj4string(US.areas) <- CRS("+init=epsg:4326")
canada.areas <-rgdal::readOGR(wd,'Canada', verbose = FALSE)
proj4string(canada.areas) <- CRS("+init=epsg:4326")
canyons <- rgdal::readOGR(wd, 'major_canyons', verbose = FALSE)
#proj4string(canyons) <- CRS("+init=epsg:4326")
# canyons <- st_read("~/github/ssalois1/NEFSC-illex_indicator_viewer/shapefiles/major_canyons.shp") %>% st_transform(., "+proj=longlat +datum=NAD83 +no_defs")
## -- Extra reference points you may want to add to map -- ##
# mesh_exmp <- rgdal::readOGR(wd, 'Illex_Fishery_Mesh_Exemption_Area')
# lobster <- rgdal::readOGR(wd, 'Lobster_Restricted_Gear_Areas')
# fishing <- marmap::getNOAA.bathy(-80,-66, 36, 41,resolution=1)
# fishing = fortify.bathy(fishing)
# interestingcanyons <- c("Norfolk Canyon", "Wilmington Canyon", 
#                         "Spencer Canyon", "Hudson Canyon", 
#                         "Atlantis Canyon")

## bring in bathymetry
atl <- marmap::getNOAA.bathy(-78,-64, 35, 45)
atl = fortify.bathy(atl)
blues = colorRampPalette(brewer.pal(9,'Blues'))(25)
depths <- c(0,50,100,200,300,500,Inf)
depths2 <- c('0','50','100','200','>300')
blues2 <- blues[c(5,7,9,11,13,24)]



# Major Ports  
ports <- data.frame(lon = c( -70.9342,-71.4856,-74.9060, -76.3452), 
                    lat = c(41.6362, 41.3731,38.9351, 37.0299),
                    port = c('New Bedford, MA', 'Narragansett, RI', 
                             'Cape May, NJ', 'Hampton, VA'))
# Geographical locations of interest
locations <- data.frame(lon = c( -70.9342), 
                    lat = c(41.6362),
                    port = c('Cape Hatteras, NC'))

## -- Here is the map -- ##
m1 = ggplot() +
  geom_contour_filled(data = atl,
                      aes(x=x,y=y,z=-1*z),
                      breaks=c(0,50,100,250,500,Inf),
                      size=c(0.3)) + 
  scale_fill_manual(values = blues2, # 5:20
                    name = paste("Depth (m)"),
                    labels = depths2) +
  geom_contour(data = atl,
               aes(x=x,y=y,z=-1*z),
               breaks=c(0,50,100,250,500,Inf),
               size=c(0.3), 
               col = 'lightgrey') +
  new_scale_fill() +
  geom_sf(data = US.areas %>% st_as_sf() ,color = 'gray20', fill = 'wheat3') +
  geom_sf(data = canada.areas %>% st_as_sf() ,color = 'gray20', fill = 'wheat3') +
  geom_sf(data = stat_areas2 %>% st_as_sf() ,  
          fill = NA, color = 'gray20', lwd = 0.6) +
  geom_point(data = lw_tally_full,
             aes(x = lon, y = lat, size = n), 
             color = 'black', bg = 'deeppink3',
             pch = 21) +
  geom_point(data = ports,
             aes(x = lon, y = lat), 
             color = 'black', bg = 'cornsilk1',
             pch = 23, lwd = 5) +
  labs(size = 'Number of Samples') +
  geom_label(data = ports, aes(lon, lat, label = port),
             colour = 'black', fill = 'cornsilk1', alpha = 0.8,
             fontface = 'bold', nudge_x = c(0.03,-1.2,0.07,0.80),
             nudge_y = c(0.40,0.2,0.4,0.33), size = 3) +
  coord_sf(xlim = c(-76,-66.0), ylim = c(35.5,44), datum = sf::st_crs(4326)) +
  xlab('Longitude') + 
  ylab('Latitude') +
  theme_bw()

## This increases axis text size and puts the tick marks on the inside
m1 + theme(axis.text = element_text(size = 13), 
        axis.title = element_text(size = 15),
        axis.ticks.length = unit(-1.4, "mm"))

## -- All years -- ##
# Lengths
p1 = ggplot(ml_full, 
       aes(x = length, y = factor(week), fill =  factor(week))) +
  geom_density_ridges(alpha = .8, color = 'white',
                      scale = 2.5, rel_min_height = .01) +
  labs(title = 'ILXSM Lengths (all years)', x = 'Length (mm)', y = 'Week', fill = 'Week') +
  guides(color = guide_legend(title = 'Week')) +
  theme_ridges() +
  theme_classic() 

# Weights
p2 = ggplot(wt_full, 
       aes(x = weight, y = factor(week), fill =  factor(week))) +
  geom_density_ridges(alpha = .8, color = 'white',
                      scale = 2.5, rel_min_height = .01) +
  labs(title = 'ILXSM Weights (all years)', x = 'Weights (g)', y = 'Week', fill = 'Week') +
  guides(color = guide_legend(title = 'Week')) +
  theme_ridges() +
  theme_classic() 

p1 + theme(axis.text = element_text(size = 10), 
        axis.title = element_text(size = 10),
        axis.ticks.length = unit(-1.4, "mm"))   

p2 + theme(axis.text = element_text(size = 10), 
        axis.title = element_text(size = 10),
        axis.ticks.length = unit(-1.4, "mm"))

Comparisons of mean length and mean weight across years:

wt_stats <- wt_full %>%
  group_by(year) %>%
  summarise(mean = mean(weight), 
            sd = sd(weight),
            min = min(weight), 
            max = max(weight))

ml_stats <- ml_full %>%
  group_by(year) %>%
  summarise(mean = mean(length), 
            sd = sd(length),
            min = min(length), 
            max = max(length))

wt_stats$param <- 'weight'
ml_stats$param <- 'mantle_length'


ilxsm_stats <- rbind(wt_stats, ml_stats)
ilxsm_stats$conf_interval <- (1.96*ilxsm_stats$sd)
# Labels outside bars: vjust = -0.5, Inside bars:  vjust = 1.6,
p1 = ggplot(data = wt_stats, aes(x = year, y = mean)) +
  geom_bar(stat ='identity', fill = c('gold1','slateblue3','violetred1'),
           col = 'black') +
  # geom_text(aes(label = round(mean)), 
  #           vjust = 1.6, hjust = c(-1.8,-3,-3),
  #           color = 'black', size = 3.5) +
  ylab('Mean weight (g)') +
  xlab('Year') +
  ylim(0,200) +
  geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd), width=.2,
                position=position_dodge(.9)) + 
  # geom_errorbar(aes(ymin = mean - (1.96 * sd), ymax = mean + (1.96 *sd)), width=.2,
  #               position=position_dodge(.9)) + 
  ecodata::theme_ts()
p1 = p1 +  theme(text = element_text(size = 14), 
            axis.text = element_text(size = 13), 
            axis.title = element_text(size = 15),
            axis.ticks.length = unit(-1.4, "mm")) 

p2 = ggplot(data = ml_stats, aes(x = year, y = mean)) +
  geom_bar(stat ='identity', fill = c('gold1','slateblue3','violetred1'),
           col = 'black')+
  # geom_text(aes(label = round(mean)), 
  #           vjust = 1.6, hjust = c(-1.8,-1.8,-1.8),
  #           color = 'black', size = 3.5) +
  ylab('Mean mantle length (mm)') +
  xlab('Year') +
  geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd), width=.2,
                position=position_dodge(.9)) + 
   # geom_errorbar(aes(ymin = mean - (1.96 * sd), ymax = mean + (1.96* sd)), width=.2,
   #              position=position_dodge(.9)) + 
  ecodata::theme_ts()
p2 = p2 +  theme(text = element_text(size = 14), 
                 axis.text = element_text(size = 13), 
                 axis.title = element_text(size = 15),
                 axis.ticks.length = unit(-1.4, "mm")) 



p1 + p2

ml_full_2122 <- subset(ml_full, year %in% c(2021,2022))
dat_ml <- as_tibble(ml_full_2122)
#unique(dat_ml$year)

x <- which(names(dat_ml) == "year") # name of grouping variable
y <- which(names(dat_ml) == "length") # names of variables to test
  
method <- "wilcox.test" # one of "wilcox.test" or "t.test"
paired <- FALSE 

p <- ggboxplot(dat_ml, x = colnames(dat_ml[12]), y = colnames(dat_ml[6]),
               color = colnames(dat_ml[12]),
               palette = c('gold1','slateblue3'),
               legend = "none",
               title = 'Mantle Length', 
               xlab = 'Year', 
               ylab = 'Length (mm)') + 
  ecodata::theme_ts()# add = "jitter"

    #  Add p-value
p = p + stat_compare_means(aes(label = paste0(after_stat(method), ", p-value = ", 
                                                    after_stat(p.format))),
                                 method = method,
                                 paired = paired,
                                 # group.by = NULL,
                                 ref.group = NULL)

p + theme(axis.text = element_text(size = 10), 
        axis.title = element_text(size = 10),
        axis.ticks.length = unit(-1.4, "mm"))

wt_full_2122 <- subset(wt_full, year %in% c(2021,2022))
dat_wt <- as_tibble(wt_full_2122)
x <- which(names(dat_wt) == "year") # name of grouping variable
y <- which(names(dat_wt) == "weight") # names of variables to test

p <- ggboxplot(dat_wt,x = colnames(dat_wt[12]), y = colnames(dat_wt[6]),
               color = colnames(dat_wt[12]),
               palette = c('gold1','slateblue3'),
               legend = "none",
               title = 'Body weight', 
               xlab = 'Year', 
               ylab = 'Weight (g)') + 
  ecodata::theme_ts()# add = "jitter"
 
#  Add p-value
p =  p + stat_compare_means(aes(label = paste0(after_stat(method), ", p-value = ", 
                                               after_stat(p.format))),
                            method = method,
                            paired = paired,
                            # group.by = NULL,
                            ref.group = NULL)

p + theme(axis.text = element_text(size = 10), 
          axis.title = element_text(size = 10),
          axis.ticks.length = unit(-1.4, "mm"))

Let’s take a closer look at each year.

2021

In 2021 there were a total of 114130 organisms measured across 9 statistical areas.

g = ggplot(data = lw_tally %>% filter(year==2021), 
       aes(x = factor(AREA_CODE), y = n)) +
  geom_bar(stat ='identity', fill = 'gold1', col = 'black') + 
  xlab('Statistical Area') + 
  ylab('Number of organisms measured') +
  ecodata::theme_ts() 


g + theme(axis.text = element_text(size = 10), 
        axis.title = element_text(size = 10),
        axis.ticks.length = unit(-1.4, "mm"))

datpie21 <- data.frame('Category' = c('Unpaired samples', 'Paired samples'),
                     'Proportion' = c((nrow(lw_data %>% filter(year == 2021))-nrow(lw_paired %>% filter(year == 2021)))/nrow(lw_data %>% filter(year == 2021))*100,
                                                    nrow(lw_paired %>% filter(year == 2021))/nrow(lw_data %>% filter(year == 2021))*100))

ggplot(datpie21, aes(x="", y=Proportion, fill= factor(Category))) +
  geom_bar(stat="identity", width=1) +
  coord_polar("y", start=0) +
  geom_text(aes(label = paste0(round(Proportion), "%")), 
            position = position_stack(vjust=0.5)) +
  labs(x = NULL, y = NULL, fill = NULL) +
  theme_classic() +
  theme(axis.line = element_blank(),
        axis.text = element_blank(),
        axis.ticks = element_blank()) +
  scale_fill_manual(values=c('gold1','gold3'))

## -- Quick plots -- ##
# Lengths
ggplot(ml_full %>% filter(year == 2021), aes(x = length, y = factor(week),
                          fill = stat(quantile))) +
  stat_density_ridges(quantile_lines = FALSE,
                      calc_ecdf = TRUE, rel_min_height = 0.0001,
                      geom = "density_ridges_gradient") +
  labs(title = '2021 Illex Lengths',
       x = 'Lengths (mm)', y = 'Week') +
  scale_fill_brewer(name = '', palette = 'YlOrBr') +
  ecodata::theme_ts()

# Weight
ggplot(wt_full %>% filter(year == 2021), aes(x = weight, y = factor(week),
                                           fill = stat(quantile))) +
  stat_density_ridges(quantile_lines = FALSE,
                      calc_ecdf = TRUE, rel_min_height = 0.0001,
                      geom = "density_ridges_gradient") +
  labs(title = '2021 Illex Weights',
       x = 'Weight (gm)', y = 'Week') +
  scale_fill_brewer(name = '', palette = 'YlOrBr') +
  ecodata::theme_ts()

2022

In 2022 there were a total of 30974 organisms measured across 14 statistical areas.

g = ggplot(data = lw_tally %>% filter(year==2022), 
       aes(x = factor(AREA_CODE), y = n)) +
  geom_bar(stat ='identity', fill = 'slateblue3', col = 'black') + 
  xlab('Statistical Area') + 
  ylab('Number of organisms measured') +
  ecodata::theme_ts() 

g + theme(axis.text = element_text(size = 10), 
        axis.title = element_text(size = 10),
        axis.ticks.length = unit(-1.4, "mm"))

datpie22 <- data.frame('Category' = c('Total samples', 'Paired samples'),
                     'Proportion' = c((nrow(lw_data %>% filter(year == 2022))-nrow(lw_paired %>% filter(year == 2022)))/nrow(lw_data %>% filter(year == 2022))*100,
                                      nrow(lw_paired %>% filter(year == 2022))/nrow(lw_data %>% filter(year == 2022))*100))


ggplot(datpie22, aes(x="", y=Proportion, fill= factor(Category))) +
  geom_bar(stat="identity", width=1) +
  coord_polar("y", start=0) +
  geom_text(aes(label = paste0(round(Proportion), "%")), 
            position = position_stack(vjust=0.5)) +
  labs(x = NULL, y = NULL, fill = NULL) +
  theme_classic() +
  theme(axis.line = element_blank(),
        axis.text = element_blank(),
        axis.ticks = element_blank()) +
  scale_fill_manual(values=c('slateblue1','slateblue3'))

# Lengths
ggplot(ml_full %>% filter(year == 2022), aes(x = length, y = factor(week),
                          fill = stat(quantile))) +
  stat_density_ridges(quantile_lines = FALSE,
                      calc_ecdf = TRUE, rel_min_height = 0.0001,
                      geom = "density_ridges_gradient") +
  labs(title = '2022 Illex Lengths',
       x = 'Lengths (mm)', y = 'Week') +
  scale_fill_brewer(name = '', palette = 'Purples') +
  ecodata::theme_ts()

# Weight
ggplot(wt_full %>% filter(year == 2022), aes(x = weight, y = factor(week),
                                           fill = stat(quantile))) +
  stat_density_ridges(quantile_lines = FALSE,
                      calc_ecdf = TRUE, rel_min_height = 0.0001,
                      geom = "density_ridges_gradient") +
  labs(title = '2022 Illex Weights',
       x = 'Weight (gm)', y = 'Week') +
  scale_fill_brewer(name = '', palette = 'Purples') +
  ecodata::theme_ts()

## -- Rainbow, no stats -- ##
# ggplot(ml_full %>% filter(year == 2022),
#        aes(x = length, y = factor(week), fill =  factor(week))) +
#   geom_density_ridges(alpha = .8, color = 'white',
#                       scale = 2.5, rel_min_height = .01) +
#   labs(title = '2022 Illex Lengths', x = 'Lengths (mm)',
#        y = 'Week', fill = 'Week') +
#   guides(color = guide_legend(title = 'Week')) +
#   theme_ridges() +
#   theme_classic()

# ggplot(wt_full %>% filter(year == 2022),
#        aes(x = weight, y = factor(week), fill =  factor(week))) +
#   geom_density_ridges(alpha = .8, color = 'white',
#                       scale = 2.5, rel_min_height = .01) +
#   labs(title = '2022 Illex Weights', x = 'Weight (gm)',
#        y = 'Week', fill = 'Week') +
#   guides(color = guide_legend(title = 'Week')) +
#   theme_ridges() +
#   theme_classic()

2023

In 2023 there were a total of 617 organisms measured across 2 statistical areas.

g = ggplot(data = lw_tally %>% filter(year==2023), 
           aes(x = factor(AREA_CODE), y = n)) +
  geom_bar(stat ='identity', fill = 'violetred1', col = 'black') + 
  xlab('Statistical Area') + 
  ylab('Number of organisms measured') +
  ecodata::theme_ts() 

g + theme(axis.text = element_text(size = 10), 
          axis.title = element_text(size = 10),
          axis.ticks.length = unit(-1.4, "mm"))

datpie23 <- data.frame('Category' = c('Total samples', 'Paired samples'),
                     'Proportion' = c((nrow(lw_data %>% filter(year == 2023))-nrow(lw_paired %>% filter(year == 2023)))/nrow(lw_data %>% filter(year == 2023))*100,
                                      nrow(lw_paired %>% filter(year == 2023))/nrow(lw_data %>% filter(year == 2023))*100))


ggplot(datpie23, aes(x="", y=Proportion, fill= factor(Category))) +
  geom_bar(stat="identity", width=1) +
  coord_polar("y", start=0) +
  geom_text(aes(label = paste0(round(Proportion), "%")), 
            position = position_stack(vjust=0.5)) +
  labs(x = NULL, y = NULL, fill = NULL) +
  theme_classic() +
  theme(axis.line = element_blank(),
        axis.text = element_blank(),
        axis.ticks = element_blank()) +
  scale_fill_manual(values=c('violetred1','violetred3'))

# Lengths
ggplot(ml_full %>% filter(year == 2023), aes(x = length, y = factor(week),
                          fill = stat(quantile))) +
  stat_density_ridges(quantile_lines = FALSE,
                      calc_ecdf = TRUE, rel_min_height = 0.0001,
                      geom = "density_ridges_gradient") +
  labs(title = '2023 Illex Lengths',
       x = 'Lengths (mm)', y = 'Week') +
  scale_fill_brewer(name = '', palette = 'PuRd') +
  ecodata::theme_ts()

# Weight
ggplot(wt_full %>% filter(year == 2023), aes(x = weight, y = factor(week),
                                           fill = stat(quantile))) +
  stat_density_ridges(quantile_lines = FALSE,
                      calc_ecdf = TRUE, rel_min_height = 0.0001,
                      geom = "density_ridges_gradient") +
  labs(title = '2023 Illex Weights',
       x = 'Weight (gm)', y = 'Week') +
  scale_fill_brewer(name = '', palette = 'PuRd') +
  ecodata::theme_ts()

Merging all weight data

Merged the data made available to the Illex RTA Working Group by Lisa Hendrickson and the data from the ILXSM project to view patterns of Illex weight over time.

setwd(here::here('data'))
bw <- read.csv('illex_bodywts.csv')
bw$DATE <- as.Date(bw$DATE)
bw <- bw %>%
  mutate(year = year(DATE),
         month = month(DATE),
         week = week(DATE), 
         day = day(DATE)) %>%
  as.data.frame()
#names(bw) <- tolower(bw)

# Let's add ILXSM and Lisa's weight data together 
ilxsm_bw <- lw_data %>% filter(PARAM_TYPE == 'WT') %>%
  select(LAND_DATE, PARAM_VALUE_NUM, year, month, week, day, VESSEL_NAME) %>%
  rename(DATE = LAND_DATE, BODYWT = PARAM_VALUE_NUM)


# Find the hullnum from CFDBS_vessels_1997-2019
# import vessel id data
v.ids <- read.csv('CFDBS_vessels_1997-2019.csv')
ilxsm_bw$HULLNUM <- NA

uvn <- sort(unique(ilxsm_bw$VESSEL_NAME))
# this located and extracts hullnum for each unique vessel then adds to 
# the data set of interest (ilxsm_bw)
for (i in 1:length(uvn)){
  a <- subset(v.ids, VESSNAME == uvn[i])
  locs <- which(ilxsm_bw$VESSEL_NAME %in% a$VESSNAME)
  ilxsm_bw[locs,8] <- a[1,3]
}


# Get this ready to merge
ilxsm_bw <- ilxsm_bw %>% select('HULLNUM', "DATE", 'BODYWT',
                              'year','month', 'week', 'day')


# merge ilxsm data to Lisa's
bwts <- rbind(bw, ilxsm_bw)


## Now add permits & vessel name just to be thorough for comparing data later 
bwts$permit <- NA
bwts$vessel_name <- NA

for (i in 1:length(uvn)){
  a <- subset(v.ids, VESSNAME == uvn[i])
  locs <- which(bwts$HULLNUM %in% a$HULLNUM)
  bwts[locs,8] <- a[1,2]
  bwts[locs,9] <- a[1,4]
  
}

Quick overview of data:

Weight range: grams to - grams
Year range: 1997, 2023
Week range: 1, 51

Boxplots:

The first boxplot figure is ordered by year and the second boxplot figure is ordered by median.

# All weights (Lisa + ILXSM)
g1 = ggplot(bwts, aes(x = factor(year), y = BODYWT)) + 
  labs(x = 'Year',
       y = 'Weight (g)',
       title = 'Illex body weights (WG + ILXSM)') +
  theme_classic() +
  guides(fill=guide_legend(title=NULL))+
  geom_boxplot() +
  ecodata::theme_ts() 

g1 + theme(axis.text = element_text(size = 10), 
        axis.title = element_text(size = 10),
        axis.ticks.length = unit(-1.4, "mm"))

# ggsave(path = here::here('results'),'all_weights.jpg', 
#        width = 8, height = 3.75, units = "in", dpi = 300)


# All weights (Lisa + ILXSM) ordered by median
g2 = ggplot(bwts, 
       aes(x = reorder(factor(year),BODYWT, median, na.rm = TRUE), y = BODYWT)) + 
  labs(x = 'Year',
       y = 'Weight (g)', 
       title = 'Illex body weights (WG + ILXSM)',
       subtitle = 'Ordered by median') +
  theme_classic() +
  guides(fill=guide_legend(title=NULL))+
  geom_boxplot() + 
  ecodata::theme_ts()
g2 + theme(axis.text = element_text(size = 10), 
        axis.title = element_text(size = 10),
        axis.ticks.length = unit(-1.4, "mm"))

# ggsave(path = here::here('results'),'all_weights_median.jpg', 
#        width = 8, height = 3.75, units = "in", dpi = 300)
# 
# ggplot(bwts, 
#        aes(x = reorder(factor(year),BODYWT, mean, na.rm = TRUE), y = BODYWT)) + 
#   labs(x = 'Year',
#        y = 'Weight (g)', 
#        title = 'Illex body weights (WG + ILXSM)',
#        subtitle = 'Ordered by mean') +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_boxplot()
# # ggsave(path = here::here('results'),'all_weights_median.jpg', 
# #        width = 8, height = 3.75, units = "in", dpi = 300)

Time series

bw_tally <- bwts %>% 
  group_by(year) %>%
  summarise(mean.wt = mean(na.omit(BODYWT)), 
            sd.wt = sd(na.omit(BODYWT)), 
            max.wt = max(na.omit(BODYWT)),
            min.wt = min(na.omit(BODYWT)),
            n = length(na.omit(BODYWT)))
yrs = unique(bw_tally$year)

Visualizing the time series and applying a trend analysis.

# Since this is a relatively short time series (23 years), the function from geom_gls() from ecodata will not work. 
# 
# This function fits four trend models to each time series, uses AICs to select the best model fit, and then implements a likelihood-ratio test to determine if a trend is present. More details on this function


g = ggplot(bw_tally, aes(x = year, y = mean.wt)) +
  geom_line() +
  #geom_point(color = 'grey20')+
  #geom_smooth() +
  # ecodata::geom_gls() +
  # xlim(1997,2022) +
  # scale_x_continuous(breaks = function(range) seq(yrs[1], yrs[23], by = 1))+
  xlab('Year') + 
  ylab('Mean weight (g)') +
  ecodata::theme_ts() 
g + theme(axis.text = element_text(size = 10), 
        axis.title = element_text(size = 10),
        axis.ticks.length = unit(-1.4, "mm"))

Adding in a LOESS (Locally Weighted Scatterplot Smoothing) trend line. LOESS is non-parametric regression technique

g = ggplot(bw_tally, aes(x = year, y = mean.wt)) +
  geom_smooth() +
   xlab('Year') + 
  ylab('Mean weight (g)') +
 ecodata::theme_ts() 
g + theme(axis.text = element_text(size = 10), 
        axis.title = element_text(size = 10),
        axis.ticks.length = unit(-1.4, "mm"))

# Calculate new coeffients with current data
# run regression
# mod <-lm(log(lw_paired$weight)~log(lw_paired$length/10))
# # Extract coefficients
# coef(mod) # intercept (alpha) and slope (beta) of the line 
# # intercept is -2.300349 and the slope  2.463418 (cm)
# # Back transform alpha
# # e = 2.7182818
# # e^-2.300349 
# exp(-2.300349) 

g = ggplot(lw_paired, aes(x=length, y=weight)) + 
  geom_point() +
  geom_smooth(method=lm) + 
  ecodata::theme_ts() 

g + theme(axis.text = element_text(size = 10), 
        axis.title = element_text(size = 10),
        axis.ticks.length = unit(-1.4, "mm"))

# ggplot(lw_paired, aes(x=log(length/10), y=log(weight))) + 
#   geom_point()+
#   geom_smooth(method=lm) + 
#   theme_bw()

Size frequency analysis

Following work by Paul Rago, size samples from ILXSM were used to illustrate the potential use of a weight vs age curve. Weight (W) as a function of age (A) was modeled in Hendrickson and Hart, where alpha=1.12 x 10-6 and beta = 3.60. These parameters were used to describe both male and female Illex combined, but the authors noted that sex specific differences did occur. The inverse of the weight at age function can be written as Age=(W/beta)^(1/beta). Together these functions can be used to compute the projected growth of a cohort sampled by the fishery in any given week. The projected growth of an squid of size W(t) to size W(t+1) in week t+1 was obtained by first estimating its age in days, then adding 7 days to project it to the next week.

library(contourPlot)
library(TAM)
library(Hmisc)
library(akima)
# Using weight data: 

df <- bwts %>% 
  group_by(year, week) %>% # May want to include stat_area for future analyses? 
 # mutate(weight = round(BODYWT)) %>%
  count(BODYWT)
df <- df %>% 
  rename(freq = n)
alpha<-1.12e-6
beta<-3.60

df$age.est<-round(df$BODYWT/alpha)^(1/beta)
# 
# sampfreq.by.week.yr<-tapply(df$freq, list(df$year,df$week), sum)
# 
# wtfreq.by.week.yr<-tapply(df$freq[df$year==2021], 
#                           list(df$BODYWT[df$year==2021],
#                                df$week[df$year==2021]), sum)
# agefreq.by.week.yr<-tapply(df$freq[df$year==2021], 
#                            list(df$age.est[df$year==2021],
#                                 df$week[df$year==2021]), sum)



# contour plots for Weight

nx<-sum(range(bwts$week))

nx<-32 # number of weeks
ny<-50 # 
# year.seq<-c(min(bwts$year):max(bwts$year)), this avoids 'magic numbers' but 
# since there are gaps in the data it has to be as below
# range(df$BODYWT)
# range(bwts$BODYWT)

year.seq<-c(1997:2006, 2010:2019, 2021:2022)
for (i in year.seq){
  ### save the plots to a set of png files
  #name1= paste0(getwd(),"/weight.plots.")
  # num=iyear   
  # ext = ".png"  
  #  path3 = paste0(name1,num,ext)  
  #  png(file=path3) 
  tmp <- df %>% filter(year == i)
 # iyear<-2022
  yy<-interp(tmp$week,
             tmp$BODYWT,
             tmp$freq,
             xo=seq(20,45, length=nx),
             yo=seq(50,250,length=ny),
             nx=nx, ny=ny, linear=FALSE,extrap=FALSE, duplicate ="mean")
  image(yy, main=paste("Weight prediction surface, year= ",i),
        xlab="Week of Year", ylab="Weight (g) of Sample")
  # find first week greater than or equal to 20. 
  min.wk <- tmp$week[which(tmp$week>=20)[1]]
  wgt.est.quantiles<-wtd.quantile(df$BODYWT[df$year==i & df$week==min.wk],
                                  w=df$freq[df$year==i & df$week==min.wk], 
                                  probs=c(0.05,0.1,0.9))
  
  # compute vectors of projections of computed weights for the 10%ile and 90%ile of initial weight 
  x.week=seq(from=min.wk, to= 45)
  # plot the growth trajectory for 10th percentile of weight sample in initial week = min.wk
  age.10wgt<-(wgt.est.quantiles[1]/alpha)^(1/beta)
  proj.wt.10<-alpha* (age.10wgt+ 7*(x.week - min.wk))^beta
  lines(x.week,proj.wt.10, col="red",lty=2, lwd=2)
  
  # plot the growth trajectory for 90th percentile of weight sample in initial week = min.wk
  age.90wgt<-(wgt.est.quantiles[3]/alpha)^(1/beta)
  proj.wt.10<-alpha* (age.90wgt+ 7*(x.week - min.wk))^beta
  lines(x.week,proj.wt.10, col="blue",lty=2, lwd=2)
  contour(yy,add=TRUE)    #,nlevels = 5)
}

Analysis of putative Birth weeks

Compute birth week as function of age at capture and then plot frequency of squid in sample:

# Compute birth week as function of age at capture
#df <- as.data.frame(df)
df$birth.week<-round(df$week-df$age.est/7)  
#  Plot frequency of birth weeks based on frequency of fish in sample.
b.week<-seq(from=min(df$birth.week), to = max(df$birth.week))
for (i in year.seq){
  tmp <- df %>% filter(year == i)
  b.week.table<-tapply(tmp$freq,tmp$birth.week, sum)
 barplot(b.week.table, main=paste("Birthweek frequency for Landings in ", i),
         xlab="Birth Week", ylab="Frequency", col="red")
 }

Adjusting birth week frequencies for natural mortality. This uses an M of 0.06, this may need to be adjusted/updated.

# adjust birth week frequencies for natural mortality
M <- 0.06   # natural mortality per week.   To be addressed later

for (i in year.seq){
  b.week.table<-tapply(tmp$freq* exp(tmp$age.est/7*M),
                       list(tmp$birth.week),
                       sum)
  barplot(b.week.table, main=paste("Birthweek frequency for Landings, adjust for M in ", i),
          xlab="Birth Week", ylab="Frequency", col="blue")
}

Relative Condition

Relative condition (Kn) = W/Wwhere W is the predicted length-specific mean weight for the fish population in a given region

# Two types of condition estimates
# -- Kn = W/W' -- 
# W = weight of individual squid, 
# W' = predicted length-specific mean weight for the pop in a given region

# -- Fultons K =  100* W /L^3 -- #

### -- Compute relative condition -- ###
# compute length-weight relationships based on 1978 lange_johnson study: 
lw_paired$W <- 0.04810*(lw_paired$length*.10)^2.71990
lw_paired$Kn <- lw_paired$weight/lw_paired$W
# lw_paired$W21 <- 0.082888*(lw_paired$length*.10)^2.530143 
# lw_paired$W21 <- 0.1002239*(lw_paired$length*.10)^2.463418 ** on full data set

# lw_paired$Kn21 <- lw_paired$weight/lw_paired$W21
lw_paired$fk <- 100 * (lw_paired$weight/(lw_paired$length^3))



g= ggplot(lw_paired, 
            aes(x = factor(AREA_CODE), y = Kn, fill = as.factor(year)))+ 
  labs(x = 'Statistical Area', y = 'Relative Condition (Kn)',
       title = 'Relative Condition:Kn') +
  theme_classic() +
  guides(fill=guide_legend(title=NULL))+
  geom_hline(yintercept = 1, lty = 2) +
  ylim(0,2) +
  geom_boxplot() + 
  ecodata::theme_ts()

g + theme(axis.text = element_text(size = 10), 
        axis.title = element_text(size = 10),
        axis.ticks.length = unit(-1.4, "mm"))

# # Here I used mean weight of population (not length specific)
# g0 = ggplot(dat, 
#             aes(x = factor(AREA_CODE), y = pop_kn, fill = as.factor(year)))+ 
#   labs(x = 'Statistical Area', y = 'Relative Condition (Kn)',
#        title = 'Relative Condition:',
#        subtitle = 'Based on mean weight of population') +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_hline(yintercept = 1, lty = 2) +
#   ylim(0,2) +
#   geom_boxplot()
# ggsave(path = here::here('results'),"Relative_condition_by_stat_area.jpg", 
#        width = 8, height = 3.75, units = "in", dpi = 300)
# library(patchwork) 
# g1 = ggplot(dat.check, 
#        aes(x = factor(AREA_CODE), y = Kn, fill = as.factor(year)))+ 
#   labs(x = 'Statistical Area', y = 'Relative Condition (Kn)',
#        title = 'Relative Condition (Kn):',
#        subtitle = 'Based on 1970s regression coefficients') +
#   theme_classic() +
#   guides(fill='none')+
#   #guides(fill=guide_legend(title=NULL))+
#   geom_hline(yintercept = 1, lty = 2) +
#   ylim(0,2) +
#   geom_boxplot() # %>% filter(Kn <= 4.5)
# g2 = ggplot(dat.check %>% filter(Kn.updated <= 6), 
#        aes(x = factor(AREA_CODE), y = Kn.updated, fill = as.factor(year)))+ 
#   labs(x = 'Statistical Area', y = '',
#        title = 'Relative Condition (Kn):',
#        subtitle = 'Based on 2021/22 regression coefficients') +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_hline(yintercept = 1, lty = 2) +
#   geom_boxplot()
# ## Note - i changed the filter and/or ylim to play with scale, remove outliers 
# 
# g1 + g2
# ggsave(path = here::here('results'),"kn_1970s_vs_kn_2122.jpg", 
#        width = 10, height = 6, units = "in", dpi = 300)
# g1 + g0  
# ggsave(path = here::here('results'),"kn_vs_mean_condition_by_stat_area.jpg", 
#        width = 10, height = 6, units = "in", dpi = 300)
# g1 + g0  # with ylim(0,2)
# ggsave(path = here::here('results'),"kn_vs_mean_condition_by_stat_area_zoom.jpg", 
#        width = 10, height = 5, units = "in", dpi = 300)
# 
# # Here I used length-specific weight of population 
# ggplot(dat, aes(x = factor(AREA_CODE), y = Kn, fill = as.factor(year)))+ 
#   labs(x = 'Statistical Area', y = 'Relative Condition (Kn)',
#        title = '2021 vs 2022') +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_hline(yintercept = 1, lty = 2) +
#   geom_boxplot()
# # remove 626 as outlier and other outliers
# #  2021 622 and 526
# sa526 <- df21 %>% filter(AREA_CODE == 526)
# sa622 <- df21 %>% filter(AREA_CODE == 622)
# max(sa622$weights)
# max(sa526$weights)
# ggplot(dat %>% filter(AREA_CODE %in% c(622, 537, 632, 616, 526, 541, 623,
#                                          525, 600, 621, 615, 562)),
#        aes(x = factor(AREA_CODE), y = Kn, fill = as.factor(year)))+ 
#   labs(x = 'Statistical Area', y = 'Relative Condition (Kn)',
#        title = '2021 vs 2022') +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_hline(yintercept = 1, lty = 2) +
#   geom_boxplot()
# ggplot(dat %>% filter(AREA_CODE %in% c(622, 537, 632, 616, 526, 541, 623,
#                                        525, 600, 621, 615, 562) &
#                         weights < 100),
#        aes(x = factor(AREA_CODE), y = Kn, fill = as.factor(year)))+ 
#   labs(x = 'Statistical Area', y = 'Relative Condition (Kn)',
#        title = '2021 vs 2022') +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_hline(yintercept = 1, lty = 2) +
#   geom_boxplot()
# ggsave(path = here::here('results'),"Relative_condition_Kn_no_outlier_by_stat_area.jpg", 
#        width = 8, height = 3.75, units = "in", dpi = 300)
# 
# # 2021 boxplot of condition
# ggplot(df21, aes(x = factor(AREA_CODE), y = cond2, fill = as.factor(AREA_CODE)))+ 
#   labs(x = 'Statistical Area', y = 'Condition',
#        title = '2021') +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_hline(yintercept = 1, lty = 2) +
#   geom_boxplot()
# # 2022 boxplot of condition
# ggplot(df22, aes(x = factor(AREA_CODE), y = cond2, fill = as.factor(AREA_CODE)))+ 
#   labs(x = 'Statistical Area', y = 'Condition (k)',
#        title = '2022') +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_hline(yintercept = 1, lty = 2) +
#   geom_boxplot()
# # Year specific condition
# ggplot(df22, aes(x = factor(AREA_CODE), y = cond, fill = as.factor(AREA_CODE)))+ 
#   labs(x = 'Statistical Area', y = 'Condition (k)',
#        title = '2022') +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_hline(yintercept = 1, lty = 2) +
#   geom_boxplot()
# 
# ggplot(df21, aes(x = factor(AREA_CODE), y = fk, fill = as.factor(AREA_CODE)))+ 
#   labs(x = 'Statistical Area', y = 'Fulton Body Condition (k)',
#        title = 'Fleet: Wet boats',
#        subtitle = 'Lengths') +
#   #scale_fill_manual(values = c("#F1BB7B","#FD6467")) +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_boxplot()
# 
# ggplot(df21, aes(x = factor(hold_type), y = fk, fill = as.factor(hold_type)))+ 
#   labs(x = 'Statistical Area', y = 'Fulton Body Condition (k)',
#        title = 'Fleet: Wet boats') +
#   #scale_fill_manual(values = c("#F1BB7B","#FD6467")) +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_boxplot()
# 
# ggplot(df21 %>% filter(week %in% c(25:26)), 
#        aes(x = factor(week), y = fk, fill = as.factor(hold_type)))+ 
#   labs(x = 'Statistical Area', y = 'Fulton Body Condition (k)') +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_boxplot()
# 
# ggplot(df21 %>% filter(hold_type %in% c('Ice', 'FT')), 
#        aes(x = factor(week), y = fk, fill = as.factor(hold_type)))+ 
#   labs(x = 'Statistical Area', y = 'Fulton Body Condition (k)') +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_boxplot()
# 
# ggplot(df21 %>% filter(AREA_CODE %in% c(622, 537, 632, 616, 526, 541, 623,
#                                         525, 600, 621, 615, 562)), 
#        aes(x = factor(AREA_CODE), y = fk, fill = as.factor(hold_type)))+ 
#   labs(x = 'Statistical Area', y = 'Fulton Body Condition (k)') +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_boxplot()
# 
# # small: <80 mm ML; medium: 80-120 mm ML; large: >120 mm ML
# ggplot(df21 %>% filter(lengths <= 80), 
#        aes(x = factor(AREA_CODE), y = fk, fill = as.factor(AREA_CODE)))+ 
#   labs(x = 'Statistical Area', y = 'Fulton Body Condition (k)', 
#        title = 'Small') +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_boxplot()
# 
# ggplot(df21 %>% filter(lengths <= 80), 
#        aes(x = factor(AREA_CODE), y = fk, fill = as.factor(AREA_CODE)))+ 
#   labs(x = 'Statistical Area', y = 'Fulton Body Condition (k)', 
#        title = 'Small') +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_boxplot()
# #80- 120 interesting, <200 more like large
# ggplot(df21 %>% filter(lengths > 80 & lengths < 200), 
#        aes(x = factor(AREA_CODE), y = fk, fill = as.factor(AREA_CODE)))+ 
#   labs(x = 'Statistical Area', y = 'Fulton Body Condition (k)', 
#        title = 'Medium') +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_boxplot()
# 
# ggplot(df21 %>% filter(lengths > 200), 
#        aes(x = factor(AREA_CODE), y = fk, fill = as.factor(AREA_CODE)))+ 
#   labs(x = 'Statistical Area', y = 'Fulton Body Condition (k)', 
#        title = 'Large') +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_boxplot()
# 
# ggplot(df21 %>% filter(lengths <= 80), 
#        aes(x = factor(AREA_CODE), y = fk, fill = as.factor(AREA_CODE)))+ 
#   labs(x = 'Statistical Area', y = 'Fulton Body Condition (k)', 
#        title = 'Small') +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_boxplot()
# 
# ggplot(k21 %>% filter(hold_type %in% c('RSW', 'Ice') & PARAM_VALUE_NUM <= 500), 
#        aes(x = factor(vessel_id), y = PARAM_VALUE_NUM, fill = as.factor(year)))+ 
#   labs(x = 'Vessel', y = 'Mantle length (mm)', 
#        title = 'Fleet: Wet boats', 
#        subtitle = 'Lengths') +
#   scale_fill_manual(values = c("#F1BB7B","#FD6467")) +
#   theme_classic() + 
#   guides(fill=guide_legend(title=NULL))+
#   geom_boxplot()