Goal of this document

This document aims to summarize and visualize ILXSM data, a cooperative research effort to collect paired length and weight data at processing facilities.

Data wrangling

The first step is to read in the most recent data file and load necessary packages.

# sets the working directory where the data lives
setwd(here::here('data/ilxsm'))
# Note the previous version was 'ILXSM_Pull_01_25_23.csv'
# Most updated data from Thomas on 2/14/23
lw_data <- read.csv('Entire_Database_Pull_ILXSM_2.14.23.csv')

Next step is to wrangle the data a bit to include date variables of interest and identify the unique stat areas that are in this file.


The unique statistical areas in this set are as follows:

# Make sure all data are in same units, if not convert
# unique(lw_data$UNIT_MEASURE)
lw_data <- lw_data %>%
  mutate(PARAM_VALUE_NUM = case_when(PARAM_TYPE == 'ML' & UNIT_MEASURE == 'CM' ~ PARAM_VALUE_NUM * 10, 
                            PARAM_TYPE == 'ML' & UNIT_MEASURE == 'MM'~ PARAM_VALUE_NUM, 
                            PARAM_TYPE == 'WT' & UNIT_MEASURE == 'GM' ~ PARAM_VALUE_NUM))
lw_data$LAND_DATE <-lubridate::dmy(lw_data$LAND_DATE)
lw_data <- lw_data %>%
  mutate(year = lubridate::year(LAND_DATE),
         month = lubridate::month(LAND_DATE),
         week = lubridate::week(LAND_DATE), 
         day = lubridate::day(LAND_DATE))
# setwd(here::here()) # sets the current working directory
unique_areas <- sort(unique(lw_data$AREA_CODE))
unique_areas
##  [1]   0 500 526 534 537 541 562 600 610 615 616 620 621 622 623 624 626 631 632
## [20] 633
## -- Notes from Thomas about area codes -- ## 
# AREA_CODE == 000 is all stat areas. 
# The 600 series are AREA_CODE identified from 601 to 699. 
# At this point in time AREA_CODE = 639 is the largest. 
# The micro level series end in tenths. 
# Example #1:  610, 620 630, ect. 
# Example #2: The 610 series is comprised of stat areas 611:619.

Next, I identify latitude and longitude for the center point of each stat area, so that I can plot the locations in a map. These coordinates do not indicate the actual fishing locations, but rather are the centerpoint derived from the map of statistical areas available at the GARFO website.

# Generate a dataframe that pairs a lat lon with each stat area code. This is 
# roughly the centroid of that polygon. I did this manually by looking at the 
# GARFO map but there is likely a more sophisticated way to do this with a 
# function in the sf package. 

# Put placeholders in 0, 500, 600, 610, 620
stat_areas <- data.frame(area_code = sort(unique(lw_data$AREA_CODE)), 
                         lat = c(0, 0, 40.50, 39.5, 40.5, 39.5,
                                 41.25, 0, 0, 39.5, 39.5, 0, 38.5,
                                 38.5, 38.5, 38.5, 37.5,
                                 36.5, 36.5, 36.5), 
                         lon = c(0, 0, -69.5, -70.50, -70.75, -69.5,
                                 -66.75, 0, 0, -73.5,-72.35, 0, -74.5,
                                 -73.5, -72.5, -71.0,-74.5,
                                 -75.25, -74.5, -73.5))

# Note, that I substituted zeros for the stat areas 0 and 600, so that I can keep
# those mixed areas for tallying the number of measurements. These can later be 
# removed for analyses depending on the question at hand. 

lw_tally_full <- lw_data %>% 
  group_by(AREA_CODE) %>%
  summarise(n = length(ORGANISM_ID))

lw_tally_full$lat <- NA
lw_tally_full$lon <- NA
for (i in 1:length(stat_areas$area_code)){
  a <- subset(lw_tally_full, AREA_CODE == unique_areas[i])
  locs <- which(lw_tally_full$AREA_CODE %in% a$AREA_CODE)
  b <- subset(stat_areas, area_code == unique_areas[i], select = 'lat')
  c <- subset(stat_areas, area_code == unique_areas[i], select = 'lon')
  lw_tally_full[locs,3] <- b$lat
  lw_tally_full[locs,4] <- c$lon
}
# Remove rows with placeholders for 0, 500, 600, 610, 620 since they are not
# specific to a unique stat area
lw_tally <- lw_data %>% 
  group_by(AREA_CODE, year) %>%
  summarise(n = length(ORGANISM_ID))

lw_tally <- lw_tally[-c(1:4,12:14, 19:20),] # this is not an 
# ideal way to do this - will change year to a param in quarto, removes area
# codes that are mixed areas
lw21 = lw_tally %>% filter(year ==2021)
lw22 = lw_tally %>% filter(year ==2022)
lw23 = lw_tally %>% filter(year ==2023)

lw_tally_by_week <- lw_data %>% 
  group_by(AREA_CODE, year, week) %>%
  summarise(n = length(ORGANISM_ID))
# removes area codes that are mixed areas
#lw_tally_by_week <- lw_tally[-c(1:4,12:14, 19:20),] 



## ---- Seperate data into lengths and weights by year ---- ##

wt_full <- lw_data %>%
  filter(PARAM_TYPE == 'WT') %>%
  rename(weight = PARAM_VALUE_NUM)
ml_full <- lw_data %>%
  filter(PARAM_TYPE == 'ML') %>%
  rename(length = PARAM_VALUE_NUM)

ml_21 <- ml_full %>%
  filter(year == 2021) 
ml_22 <- ml_full %>%
  filter(year == 2022) 
wt_21 <- wt_full %>%
  filter(year == 2021) 
wt_22 <- wt_full %>%
  filter(year == 2022) 

# Consolidate the dataset to just samples that have paired weight/length values
ml.full.c <- ml_full %>% select(LAND_DATE,AREA_CODE,ORGANISM_ID, 
                              length,VESSEL_NAME, VTR_SERIAL_NUM)

wt.full.c <- wt_full %>% select(LAND_DATE,AREA_CODE,ORGANISM_ID, 
                              weight,VESSEL_NAME, VTR_SERIAL_NUM)

lw_paired <- right_join(ml.full.c, wt.full.c, 
                           by = c('LAND_DATE','AREA_CODE','ORGANISM_ID', 
                                 'VESSEL_NAME', 'VTR_SERIAL_NUM'))

lw_paired <- na.omit(lw_paired)
lw_paired <- lw_paired %>% relocate(weight, .after = length)

lw_paired <- lw_paired %>%
  mutate(year = lubridate::year(LAND_DATE),
         month = lubridate::month(LAND_DATE),
         week = lubridate::week(LAND_DATE), 
         day = lubridate::day(LAND_DATE))

## -- Add in permit, vesshold type and HULLNUM for linking to other data -- ##


### ----vessel hold type------------------------ ###
setwd(here::here('data/'))
vesshold <- read_excel("vessel_type_2021.xlsx",
                       sheet = "vessel_type") %>%
  rename(VESSHOLD = Vess_type_Final, 
         VESSEL_NAME = VESSNAME) %>%
  mutate(VESSHOLD = str_to_upper(VESSHOLD))

# vesshold %>% filter(!(VESSHOLD %in% c("ICE", "FT", "RSW"))) %>% 
#   dplyr::select("PERMIT", "COMMENT")

  lw_paired <- lw_paired %>%
  left_join(vesshold, by = 'VESSEL_NAME') %>%
  droplevels()

  lw_paired$VESSHOLD <- ifelse((lw_paired$PERMIT == 410371 & lw_paired$year <= 2020),
                             "FT", lw_paired$VESSHOLD)
  lw_paired$VESSHOLD <- ifelse((lw_paired$PERMIT == 410371 & lw_paired$year > 2020), 
                             "RSW", lw_paired$VESSHOLD)
  lw_paired$VESSHOLD <- ifelse((lw_paired$PERMIT == 410403 & lw_paired$year <= 2016), 
                             "FT", lw_paired$VESSHOLD)
  lw_paired$VESSHOLD <- ifelse((lw_paired$PERMIT == 410403 & lw_paired$year > 2016), 
                             "RSW", lw_paired$VESSHOLD)
  
  lw_paired$fleet <- ifelse(lw_paired$VESSHOLD == 'FT', yes = 'FREEZE', no = NA)  
  lw_paired$fleet <- ifelse(lw_paired$VESSHOLD %in% c('ICE', 'RSW'),
                          yes = 'WET', no = lw_paired$fleet)  

## remove extraneous columns, re-order if necessary 
lw_paired <- lw_paired[,-c(14:17)]

# This part attempts to add in HULLNUM ** but it should be noted that HULLNUM
# listed in the vessel id data set (see line 831 below, file: 
# v.ids=CFDBS_vessels_1997-2019.csv) changes over the years as does the Permit 
# numbers and they don't necessarily line up with the "vessel_type_2021.xlsx"
# document. This likely needs further attention in the future - I will ask 
# someone who has more information on the structure of these data sets 
# and related fields 

# uvn <- sort(unique(lw_paired$VESSEL_NAME))
# lw_paired$HULLNUM <- NA
# for (i in 1:length(uvn)){
#   a <- subset(v.ids, VESSNAME == uvn[i])
#   locs <- which(lw_paired$VESSEL_NAME %in% a$VESSNAME)
#   lw_paired[locs,15] <- a[1,3]
# }

Stat areas listed as 000 or 600 represent mixed areas. The analyses below do not include any mixed area data entries. Thomas and I are currently developing a protocol for including these data going forward.

Quick list of the number of individual squid measured in each of these mixed area codes

  • There are currently 2542 data entries from mixed area 00

  • There are currently 4038 data entries from mixed area 500

  • There are currently 19857 data entries from mixed area 600

  • There are currently 307 data entries from mixed area 610

  • There are currently 23987 data entries from mixed area 620

Data visualization


The full data set has 196452 data entries, from 15 different statistical areas, across 3 years. Of the 196452 data entries across all years, there are paired length and weight measurements for a total of 69300 individuals. At the time of this report, the number of individual squid with paired measurements for each year is as follows:

  • 2021: 42106
  • 2022: 27114
  • 2023: 80

The number of squid measured at each site are shown below for each year:

ggplot(lw_tally_by_week, aes(x = week, y = n, color = factor(year))) + 
  geom_line(cex = 1) +
  geom_point(size = 1) +
  labs(x = 'Week of Year', y = 'Number of samples (n)') +
  labs(color = 'Year') +
  scale_color_manual(values=c('gold1','slateblue3','violetred1'))+
  ecodata::theme_facet()+
  facet_wrap(~year)

ggplot(data = lw_tally, aes(x = factor(AREA_CODE), y = n, 
                            fill = factor(year))) +
  geom_bar(stat ='identity', position = position_dodge(), color = 'black') +
  scale_fill_manual(values=c('gold1','slateblue3','violetred1')) +
  labs(x = 'Statistical Area', y = 'Number of samples (n)') +
  labs(fill = 'Year') +
  ecodata::theme_ts()

Here are some maps highlighting the statistical areas from which squid were sampled.

  • The first figure depicts the boundaries of the Greater Atlantic Region Statistical Areas from which Illex samples have been reported to the ILXSM database.

  • The second figure indicates the amount of samples reported for each statistical area. The size of the data point is representative of the number of samples recorded that particular area. Note this map is cumulative, it summarizes total samples in each stat area across all available years.

## Bring in shapefiles
wd = here::here("shapefiles")
stat_areas_sp <- rgdal::readOGR(wd,'groundfish_stat_areas', verbose = FALSE) 
proj4string(stat_areas_sp) <- CRS("+init=epsg:4326")
# 526 534 537 541 562 600* 615 616 621 622 623 624 626 631 632 633
# Subset the shape file for just the stat areas that match the ILXSM data
stat_areas2 <- stat_areas_sp[na.omit(stat_areas_sp@data$Id %in% c('526','534',
                                                                  '537','541',
                                                                  '562',
                                                                  '615','616', 
                                                                  '621','622',
                                                                  '623', '624',
                                                                  '626', '631',
                                                                  '632', '633')),]
US.areas <- rgdal::readOGR(wd, 'USA', verbose = FALSE)
proj4string(US.areas) <- CRS("+init=epsg:4326")
canada.areas <-rgdal::readOGR(wd,'Canada', verbose = FALSE)
proj4string(canada.areas) <- CRS("+init=epsg:4326")
canyons <- rgdal::readOGR(wd, 'major_canyons', verbose = FALSE)
#proj4string(canyons) <- CRS("+init=epsg:4326")
# canyons <- st_read("~/github/ssalois1/NEFSC-illex_indicator_viewer/shapefiles/major_canyons.shp") %>% st_transform(., "+proj=longlat +datum=NAD83 +no_defs")
## -- Extra reference points you may want to add to map -- ##
# mesh_exmp <- rgdal::readOGR(wd, 'Illex_Fishery_Mesh_Exemption_Area')
# lobster <- rgdal::readOGR(wd, 'Lobster_Restricted_Gear_Areas')
# fishing <- marmap::getNOAA.bathy(-80,-66, 36, 41,resolution=1)
# fishing = fortify.bathy(fishing)
# interestingcanyons <- c("Norfolk Canyon", "Wilmington Canyon", 
#                         "Spencer Canyon", "Hudson Canyon", 
#                         "Atlantis Canyon")

## bring in bathymetry
atl <- marmap::getNOAA.bathy(-78,-64, 35, 45)
atl = fortify.bathy(atl)
blues = colorRampPalette(brewer.pal(9,'Blues'))(25)
depths <- c(0,50,100,200,300,500,Inf)
depths2 <- c('0','50','100','200','>300')
blues2 <- blues[c(5,7,9,11,13,24)]

## -- Set up quick simple map for future plots -- ## 
shp_df <- broom::tidy(stat_areas2, region = 'Id')
cnames <- aggregate(cbind(long, lat) ~ id, data=shp_df, FUN=mean)
# Adjust a couple of the stat area label locations
cnames[1,c(2,3)] <- c(-69.50, 40.50)
cnames[3,c(2,3)] <- c(-70.75, 40.50)
cnames[5,c(2,3)] <- c(-66.75, 41.25)
cnames[8,c(2,3)] <- c(-74.5, 38.5)
cnames[13,c(2,3)] <- c(-75.25, 36.5)
stat_map_no_lab =  ggplot() + geom_polygon(data = stat_areas2,
                               aes(x = long, y = lat, group = group), 
                               colour = "black", fill = NA) + theme_void() 
stat_map = stat_map_no_lab + geom_text(data = cnames, 
                                aes(x = long, y = lat, label = id), 
                                size = 2) + 
  geom_sf(data = US.areas %>% st_as_sf() ,
          color = 'gray20', fill = 'wheat2') +
  coord_sf(xlim = c(-76,-66.0), ylim = c(35.5,42.3), 
           datum = sf::st_crs(4326)) 

# version 2 (stat_map.v2) has larger labels than stat_map, which needs
# smaller values to be legible inside a bar plot
stat_map.v2 = stat_map_no_lab + geom_text(data = cnames, 
                                aes(x = long, y = lat, label = id), 
                                size = 4) + 
  geom_sf(data = US.areas %>% st_as_sf() ,
          color = 'gray20', fill = 'wheat3') +
  coord_sf(xlim = c(-76,-66.0), ylim = c(35.5,42.3), 
           datum = sf::st_crs(4326)) 
stat_map.v2

# ggsave(path = here::here('figures'),'stat_areas.jpeg',
#        width = 6, height = 3.75, units = "in", dpi = 300)

## -- Back to points of interest for the main detailed map below -- ##

# Major Ports  
ports <- data.frame(lon = c( -70.9342,-71.4856,-74.9060, -76.3452), 
                    lat = c(41.6362, 41.3731,38.9351, 37.0299),
                    port = c('New Bedford, MA', 'Narragansett, RI', 
                             'Cape May, NJ', 'Hampton, VA'))
# Geographical locations of interest
locations <- data.frame(lon = c( -70.9342), 
                    lat = c(41.6362),
                    port = c('Cape Hatteras, NC'))

## -- Here is the map -- ##
m1 = ggplot() +
  geom_contour_filled(data = atl,
                      aes(x=x,y=y,z=-1*z),
                      breaks=c(0,50,100,250,500,Inf),
                      size=c(0.3)) + 
  scale_fill_manual(values = blues2, # 5:20
                    name = paste("Depth (m)"),
                    labels = depths2) +
  geom_contour(data = atl,
               aes(x=x,y=y,z=-1*z),
               breaks=c(0,50,100,250,500,Inf),
               size=c(0.3), 
               col = 'lightgrey') +
  new_scale_fill() +
  geom_sf(data = US.areas %>% st_as_sf() ,color = 'gray20', fill = 'wheat3') +
  geom_sf(data = canada.areas %>% st_as_sf() ,color = 'gray20', fill = 'wheat3') +
  geom_sf(data = stat_areas2 %>% st_as_sf() ,  
          fill = NA, color = 'gray20', lwd = 0.6) +
  geom_point(data = lw_tally_full,
             aes(x = lon, y = lat, size = n), 
             color = 'black', bg = 'deeppink3',
             pch = 21) +
  geom_point(data = ports,
             aes(x = lon, y = lat), 
             color = 'black', bg = 'cornsilk1',
             pch = 23, lwd = 5) +
  labs(size = 'Number of Samples') +
  geom_label(data = ports, aes(lon, lat, label = port),
             colour = 'black', fill = 'cornsilk1', alpha = 0.8,
             fontface = 'bold', nudge_x = c(0.03,-1.2,0.07,0.80),
             nudge_y = c(0.40,0.2,0.4,0.33), size = 3) +
  coord_sf(xlim = c(-76,-66.0), ylim = c(35.5,44), datum = sf::st_crs(4326)) +
  xlab('Longitude') + 
  ylab('Latitude') +
  theme_bw()

## This increases axis text size and puts the tick marks on the inside
m1 + theme(axis.text = element_text(size = 13), 
        axis.title = element_text(size = 15),
        axis.ticks.length = unit(-1.4, "mm")) 

## -- All years -- ##
# Lengths
p1 = ggplot(ml_full, 
       aes(x = length, y = factor(week), fill =  factor(week))) +
  geom_density_ridges(alpha = .8, color = 'white',
                      scale = 2.5, rel_min_height = .01) +
  labs(title = 'ILXSM Lengths (all years)', x = 'Length (mm)', y = 'Week', fill = 'Week') +
  guides(color = guide_legend(title = 'Week')) +
  theme_ridges() +
  theme_classic() 

# Weights
p2 = ggplot(wt_full, 
       aes(x = weight, y = factor(week), fill =  factor(week))) +
  geom_density_ridges(alpha = .8, color = 'white',
                      scale = 2.5, rel_min_height = .01) +
  labs(title = 'ILXSM Weights (all years)', x = 'Weights (g)', y = 'Week', fill = 'Week') +
  guides(color = guide_legend(title = 'Week')) +
  theme_ridges() +
  theme_classic() 

p1 + theme(axis.text = element_text(size = 10), 
        axis.title = element_text(size = 10),
        axis.ticks.length = unit(-1.4, "mm"))   

p2 + theme(axis.text = element_text(size = 10), 
        axis.title = element_text(size = 10),
        axis.ticks.length = unit(-1.4, "mm"))   

Comparisons of mean length and mean weight across years:

wt_stats <- wt_full %>%
  group_by(year) %>%
  summarise(mean = mean(weight), 
            sd = sd(weight),
            min = min(weight), 
            max = max(weight))

ml_stats <- ml_full %>%
  group_by(year) %>%
  summarise(mean = mean(length), 
            sd = sd(length),
            min = min(length), 
            max = max(length))

wt_stats$param <- 'weight'
ml_stats$param <- 'mantle_length'


ilxsm_stats <- rbind(wt_stats, ml_stats)
ilxsm_stats$conf_interval <- (1.96*ilxsm_stats$sd)
# Labels outside bars: vjust = -0.5, Inside bars:  vjust = 1.6,
p1 = ggplot(data = wt_stats, aes(x = year, y = mean)) +
  geom_bar(stat ='identity', fill = c('gold1','slateblue3','violetred1'),
           col = 'black') +
  # geom_text(aes(label = round(mean)), 
  #           vjust = 1.6, hjust = c(-1.8,-3,-3),
  #           color = 'black', size = 3.5) +
  ylab('Mean weight (g)') +
  xlab('Year') +
  ylim(0,200) +
  geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd), width=.2,
                position=position_dodge(.9)) + 
  # geom_errorbar(aes(ymin = mean - (1.96 * sd), ymax = mean + (1.96 *sd)), width=.2,
  #               position=position_dodge(.9)) + 
  ecodata::theme_ts()
p1 = p1 +  theme(text = element_text(size = 14), 
            axis.text = element_text(size = 13), 
            axis.title = element_text(size = 15),
            axis.ticks.length = unit(-1.4, "mm")) 

p2 = ggplot(data = ml_stats, aes(x = year, y = mean)) +
  geom_bar(stat ='identity', fill = c('gold1','slateblue3','violetred1'),
           col = 'black')+
  # geom_text(aes(label = round(mean)), 
  #           vjust = 1.6, hjust = c(-1.8,-1.8,-1.8),
  #           color = 'black', size = 3.5) +
  ylab('Mean mantle length (mm)') +
  xlab('Year') +
  geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd), width=.2,
                position=position_dodge(.9)) + 
   # geom_errorbar(aes(ymin = mean - (1.96 * sd), ymax = mean + (1.96* sd)), width=.2,
   #              position=position_dodge(.9)) + 
  ecodata::theme_ts()
p2 = p2 +  theme(text = element_text(size = 14), 
                 axis.text = element_text(size = 13), 
                 axis.title = element_text(size = 15),
                 axis.ticks.length = unit(-1.4, "mm")) 



p1 + p2

ml_full_2122 <- subset(ml_full, year %in% c(2021,2022))
dat_ml <- as_tibble(ml_full_2122)
#unique(dat_ml$year)

x <- which(names(dat_ml) == "year") # name of grouping variable
y <- which(names(dat_ml) == "length") # names of variables to test
  
method <- "wilcox.test" # one of "wilcox.test" or "t.test"
paired <- FALSE 

p <- ggboxplot(dat_ml, x = colnames(dat_ml[12]), y = colnames(dat_ml[6]),
               color = colnames(dat_ml[12]),
               palette = c('gold1','slateblue3'),
               legend = "none",
               title = 'Mantle Length', 
               xlab = 'Year', 
               ylab = 'Length (mm)') + 
  ecodata::theme_ts()# add = "jitter"

    #  Add p-value
p = p + stat_compare_means(aes(label = paste0(after_stat(method), ", p-value = ", 
                                                    after_stat(p.format))),
                                 method = method,
                                 paired = paired,
                                 # group.by = NULL,
                                 ref.group = NULL)

p + theme(axis.text = element_text(size = 10), 
        axis.title = element_text(size = 10),
        axis.ticks.length = unit(-1.4, "mm"))   

wt_full_2122 <- subset(wt_full, year %in% c(2021,2022))
dat_wt <- as_tibble(wt_full_2122)
x <- which(names(dat_wt) == "year") # name of grouping variable
y <- which(names(dat_wt) == "weight") # names of variables to test

p <- ggboxplot(dat_wt,x = colnames(dat_wt[12]), y = colnames(dat_wt[6]),
               color = colnames(dat_wt[12]),
               palette = c('gold1','slateblue3'),
               legend = "none",
               title = 'Body weight', 
               xlab = 'Year', 
               ylab = 'Weight (g)') + 
  ecodata::theme_ts()# add = "jitter"
 
#  Add p-value
p =  p + stat_compare_means(aes(label = paste0(after_stat(method), ", p-value = ", 
                                               after_stat(p.format))),
                            method = method,
                            paired = paired,
                            # group.by = NULL,
                            ref.group = NULL)

p + theme(axis.text = element_text(size = 10), 
          axis.title = element_text(size = 10),
          axis.ticks.length = unit(-1.4, "mm"))   

Let’s take a closer look at each year.

2021


In 2021 there were a total of 114130 organisms measured across 9 statistical areas.

g = ggplot(data = lw_tally %>% filter(year==2021), 
       aes(x = factor(AREA_CODE), y = n)) +
  geom_bar(stat ='identity', fill = 'gold1', col = 'black') + 
  xlab('Statistical Area') + 
  ylab('Number of organisms measured') +
  ecodata::theme_ts() 


g = g + theme(axis.text = element_text(size = 10), 
        axis.title = element_text(size = 10),
        axis.ticks.length = unit(-1.4, "mm"))  

# Inset a map of stat areas for reference

g + inset_element(p = stat_map,
                left = 0.01,
                bottom = 0.65,
                right = 0.5,
                top = 0.95)

# Set up data to create a pie chart depicting how many samples are paired 

datpie21 <- data.frame('Category' = c('Unpaired samples', 'Paired samples'),
                     'Proportion' = c((nrow(lw_data %>% filter(year == 2021))-nrow(lw_paired %>% filter(year == 2021)))/nrow(lw_data %>% filter(year == 2021))*100,
                                                    nrow(lw_paired %>% filter(year == 2021))/nrow(lw_data %>% filter(year == 2021))*100))

ggplot(datpie21, aes(x="", y=Proportion, fill= factor(Category))) +
  geom_bar(stat="identity", width=1) +
  coord_polar("y", start=0) +
  geom_text(aes(label = paste0(round(Proportion), "%")), 
            position = position_stack(vjust=0.5)) +
  labs(x = NULL, y = NULL, fill = NULL) +
  theme_classic() +
  theme(axis.line = element_blank(),
        axis.text = element_blank(),
        axis.ticks = element_blank()) +
  scale_fill_manual(values=c('gold1','gold3'))

## -- Quick plots -- ##
# Lengths
ggplot(ml_full %>% filter(year == 2021), aes(x = length, y = factor(week),
                          fill = stat(quantile))) +
  stat_density_ridges(quantile_lines = FALSE,
                      calc_ecdf = TRUE, rel_min_height = 0.0001,
                      geom = "density_ridges_gradient") +
  labs(title = '2021 Illex Lengths',
       x = 'Lengths (mm)', y = 'Week') +
  scale_fill_brewer(name = '', palette = 'YlOrBr') +
  ecodata::theme_ts()

# Weight
ggplot(wt_full %>% filter(year == 2021), aes(x = weight, y = factor(week),
                                           fill = stat(quantile))) +
  stat_density_ridges(quantile_lines = FALSE,
                      calc_ecdf = TRUE, rel_min_height = 0.0001,
                      geom = "density_ridges_gradient") +
  labs(title = '2021 Illex Weights',
       x = 'Weight (gm)', y = 'Week') +
  scale_fill_brewer(name = '', palette = 'YlOrBr') +
  ecodata::theme_ts()

2022


In 2022 there were a total of 30974 organisms measured across 14 statistical areas.

g = ggplot(data = lw_tally %>% filter(year==2022), 
       aes(x = factor(AREA_CODE), y = n)) +
  geom_bar(stat ='identity', fill = 'slateblue3', col = 'black') + 
  xlab('Statistical Area') + 
  ylab('Number of organisms measured') +
  ecodata::theme_ts() 

g = g + theme(axis.text = element_text(size = 10), 
        axis.title = element_text(size = 10),
        axis.ticks.length = unit(-1.4, "mm"))   

# Inset a map of stat areas for reference

g + inset_element(p = stat_map,
                left = 0.01,
                bottom = 0.65,
                right = 0.5,
                top = 0.95)

# Set up data to create a pie chart depicting how many samples are paired 
datpie22 <- data.frame('Category' = c('Total samples', 'Paired samples'),
                     'Proportion' = c((nrow(lw_data %>% filter(year == 2022))-nrow(lw_paired %>% filter(year == 2022)))/nrow(lw_data %>% filter(year == 2022))*100,
                                      nrow(lw_paired %>% filter(year == 2022))/nrow(lw_data %>% filter(year == 2022))*100))


ggplot(datpie22, aes(x="", y=Proportion, fill= factor(Category))) +
  geom_bar(stat="identity", width=1) +
  coord_polar("y", start=0) +
  geom_text(aes(label = paste0(round(Proportion), "%")), 
            position = position_stack(vjust=0.5)) +
  labs(x = NULL, y = NULL, fill = NULL) +
  theme_classic() +
  theme(axis.line = element_blank(),
        axis.text = element_blank(),
        axis.ticks = element_blank()) +
  scale_fill_manual(values=c('slateblue1','slateblue3'))

# Lengths
ggplot(ml_full %>% filter(year == 2022), aes(x = length, y = factor(week),
                          fill = stat(quantile))) +
  stat_density_ridges(quantile_lines = FALSE,
                      calc_ecdf = TRUE, rel_min_height = 0.0001,
                      geom = "density_ridges_gradient") +
  labs(title = '2022 Illex Lengths',
       x = 'Lengths (mm)', y = 'Week') +
  scale_fill_brewer(name = '', palette = 'Purples') +
  ecodata::theme_ts()

# Weight
ggplot(wt_full %>% filter(year == 2022), aes(x = weight, y = factor(week),
                                           fill = stat(quantile))) +
  stat_density_ridges(quantile_lines = FALSE,
                      calc_ecdf = TRUE, rel_min_height = 0.0001,
                      geom = "density_ridges_gradient") +
  labs(title = '2022 Illex Weights',
       x = 'Weight (gm)', y = 'Week') +
  scale_fill_brewer(name = '', palette = 'Purples') +
  ecodata::theme_ts()

2023


In 2023 there were a total of 617 organisms measured across 2 statistical areas.

g = ggplot(data = lw_tally %>% filter(year==2023), 
           aes(x = factor(AREA_CODE), y = n)) +
  geom_bar(stat ='identity', fill = 'violetred1', col = 'black') + 
  xlab('Statistical Area') + 
  ylab('Number of organisms measured') +
  ecodata::theme_ts() 

g = g + theme(axis.text = element_text(size = 10), 
          axis.title = element_text(size = 10),
          axis.ticks.length = unit(-1.4, "mm")) 

g + inset_element(p = stat_map,
                left = 0.01,
                bottom = 0.65,
                right = 0.5,
                top = 0.95)

# Set up data to create a pie chart depicting how many samples are paired 
datpie23 <- data.frame('Category' = c('Total samples', 'Paired samples'),
                     'Proportion' = c((nrow(lw_data %>% filter(year == 2023))-nrow(lw_paired %>% filter(year == 2023)))/nrow(lw_data %>% filter(year == 2023))*100,
                                      nrow(lw_paired %>% filter(year == 2023))/nrow(lw_data %>% filter(year == 2023))*100))


ggplot(datpie23, aes(x="", y=Proportion, fill= factor(Category))) +
  geom_bar(stat="identity", width=1) +
  coord_polar("y", start=0) +
  geom_text(aes(label = paste0(round(Proportion), "%")), 
            position = position_stack(vjust=0.5)) +
  labs(x = NULL, y = NULL, fill = NULL) +
  theme_classic() +
  theme(axis.line = element_blank(),
        axis.text = element_blank(),
        axis.ticks = element_blank()) +
  scale_fill_manual(values=c('violetred1','violetred3'))

# Lengths
ggplot(ml_full %>% filter(year == 2023), aes(x = length, y = factor(week),
                          fill = stat(quantile))) +
  stat_density_ridges(quantile_lines = FALSE,
                      calc_ecdf = TRUE, rel_min_height = 0.0001,
                      geom = "density_ridges_gradient") +
  labs(title = '2023 Illex Lengths',
       x = 'Lengths (mm)', y = 'Week') +
  scale_fill_brewer(name = '', palette = 'PuRd') +
  ecodata::theme_ts()

# Weight
ggplot(wt_full %>% filter(year == 2023), aes(x = weight, y = factor(week),
                                           fill = stat(quantile))) +
  stat_density_ridges(quantile_lines = FALSE,
                      calc_ecdf = TRUE, rel_min_height = 0.0001,
                      geom = "density_ridges_gradient") +
  labs(title = '2023 Illex Weights',
       x = 'Weight (gm)', y = 'Week') +
  scale_fill_brewer(name = '', palette = 'PuRd') +
  ecodata::theme_ts()

Merging all weight data

Merged the data made available to the Illex RTA Working Group by Lisa Hendrickson and the data from the ILXSM project to view patterns of Illex weight over time.

  • The data set ranges from 1997-2006 to 2009-2023. There is no data for years 2007 and 2008.
setwd(here::here('data'))
bw <- read.csv('illex_bodywts.csv')
bw$DATE <- as.Date(bw$DATE)
bw <- bw %>%
  mutate(year = year(DATE),
         month = month(DATE),
         week = week(DATE), 
         day = day(DATE)) %>%
  as.data.frame()
#names(bw) <- tolower(bw)

# Let's add ILXSM and Lisa's weight data together 
ilxsm_bw <- lw_data %>% filter(PARAM_TYPE == 'WT') %>%
  select(LAND_DATE, PARAM_VALUE_NUM, year, month, week, day, VESSEL_NAME) %>%
  rename(DATE = LAND_DATE, BODYWT = PARAM_VALUE_NUM)


# Find the hullnum from CFDBS_vessels_1997-2019
# import vessel id data
v.ids <- read.csv('CFDBS_vessels_1997-2019.csv')
ilxsm_bw$HULLNUM <- NA

uvn <- sort(unique(ilxsm_bw$VESSEL_NAME))
# this located and extracts hullnum for each unique vessel then adds to 
# the data set of interest (ilxsm_bw)
for (i in 1:length(uvn)){
  a <- subset(v.ids, VESSNAME == uvn[i])
  locs <- which(ilxsm_bw$VESSEL_NAME %in% a$VESSNAME)
  ilxsm_bw[locs,8] <- a[1,3]
}


# Get this ready to merge
ilxsm_bw <- ilxsm_bw %>% select('HULLNUM', "DATE", 'BODYWT',
                              'year','month', 'week', 'day')


# merge ilxsm data to Lisa's
bwts <- rbind(bw, ilxsm_bw)


## Now add permits & vessel name just to be thorough for comparing data later 
bwts$permit <- NA
bwts$vessel_name <- NA

for (i in 1:length(uvn)){
  a <- subset(v.ids, VESSNAME == uvn[i])
  locs <- which(bwts$HULLNUM %in% a$HULLNUM)
  bwts[locs,8] <- a[1,2]
  bwts[locs,9] <- a[1,4]
  
}

Quick overview of data:

  • Weight range: 2 grams to 1104 grams
  • Year range: 1997, 2023
  • Week range: 1, 51

Boxplots

  • The first boxplot figure is ordered by year
  • The second boxplot figure is ordered by median.

Note: To better fit all years on the x-axis only the last two digits of each year are labeled. For example, 1997 is labeled ‘97’ and 2019 is labeled ‘19’

# All weights (Lisa + ILXSM)
g1 = ggplot(bwts, aes(x = factor(year), y = BODYWT)) + 
  labs(x = 'Year',
       y = 'Weight (g)',
       title = 'Illex body weights (WG + ILXSM)') +
  theme_classic() +
  guides(fill=guide_legend(title=NULL))+
  geom_boxplot() +
  ecodata::theme_ts() +
  scale_x_discrete(
    breaks = c(1997:2006, 2009:2023), 
    labels = c('97', '98', '99', '00', '01', '02', '03', '04', '05', '06',
               '09', '10', '11', '12', '13', '14', '15', '16', '17', '18',
               '19', '20', '21', '22', '23')
  ) 

g1 + theme(axis.text = element_text(size = 10), 
        axis.title = element_text(size = 10),
        axis.ticks.length = unit(-1.4, "mm")) 

# ggsave(path = here::here('results'),'all_weights.jpg', 
#        width = 8, height = 3.75, units = "in", dpi = 300)


# All weights (Lisa + ILXSM) ordered by median
g2 = ggplot(bwts, 
       aes(x = reorder(factor(year),BODYWT, median, na.rm = TRUE), y = BODYWT)) + 
  labs(x = 'Year',
       y = 'Weight (g)', 
       title = 'Illex body weights (WG + ILXSM)',
       subtitle = 'Ordered by median') +
  theme_classic() +
  guides(fill=guide_legend(title=NULL))+
  geom_boxplot() + 
  ecodata::theme_ts() + 
  scale_x_discrete(
    breaks = c(1997:2006, 2009:2023), 
    labels = c('97', '98', '99', '00', '01', '02', '03', '04', '05', '06',
               '09', '10', '11', '12', '13', '14', '15', '16', '17', '18',
               '19', '20', '21', '22', '23')
  ) 
g2 + theme(axis.text = element_text(size = 10), 
        axis.title = element_text(size = 10),
        axis.ticks.length = unit(-1.4, "mm")) 

# ggsave(path = here::here('results'),'all_weights_median.jpg', 
#        width = 8, height = 3.75, units = "in", dpi = 300)
# 
# ggplot(bwts, 
#        aes(x = reorder(factor(year),BODYWT, mean, na.rm = TRUE), y = BODYWT)) + 
#   labs(x = 'Year',
#        y = 'Weight (g)', 
#        title = 'Illex body weights (WG + ILXSM)',
#        subtitle = 'Ordered by mean') +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_boxplot()
# # ggsave(path = here::here('results'),'all_weights_median.jpg', 
# #        width = 8, height = 3.75, units = "in", dpi = 300)

Time series

bw_tally <- bwts %>% 
  group_by(year) %>%
  summarise(mean.wt = mean(na.omit(BODYWT)), 
            sd.wt = sd(na.omit(BODYWT)), 
            max.wt = max(na.omit(BODYWT)),
            min.wt = min(na.omit(BODYWT)),
            n = length(na.omit(BODYWT)))
yrs = unique(bw_tally$year)

Visualizing the time series and applying a trend analysis.

# Since this is a relatively short time series (23 years), the function from geom_gls() from ecodata will not work. 
# 
# This function fits four trend models to each time series, uses AICs to select the best model fit, and then implements a likelihood-ratio test to determine if a trend is present. More details on this function


g = ggplot(bw_tally, aes(x = year, y = mean.wt)) +
  geom_line() +
  #geom_point(color = 'grey20')+
  #geom_smooth() +
  # ecodata::geom_gls() +
  # xlim(1997,2022) +
  # scale_x_continuous(breaks = function(range) seq(yrs[1], yrs[23], by = 1))+
  xlab('Year') + 
  ylab('Mean weight (g)') +
  ecodata::theme_ts() 
g + theme(axis.text = element_text(size = 10), 
        axis.title = element_text(size = 10),
        axis.ticks.length = unit(-1.4, "mm")) 

Adding in a LOESS (Locally Weighted Scatterplot Smoothing) trend line. LOESS is non-parametric regression technique

g = ggplot(bw_tally, aes(x = year, y = mean.wt)) +
  geom_smooth() +
   xlab('Year') + 
  ylab('Mean weight (g)') +
 ecodata::theme_ts() 
g + theme(axis.text = element_text(size = 10), 
        axis.title = element_text(size = 10),
        axis.ticks.length = unit(-1.4, "mm")) 

Size frequency analysis

Extending work by Paul Rago in 2021, size samples from Lisa’s data and ILXSM were used to illustrate the potential use of a weight vs age curve.

# From Rago_21 Methods: Weight (W) as a function of age (A) 
# was modeled in Hendrickson and Hart as $$ W = \alpha Age^{\beta} $$, where 
# $$ \alpha = 1.12 * 10^{-6} $$ and $$ \beta = 3.60 $$. These parameters were
# used to describe both male and female Illex combined, but the authors noted 
# that sex specific differences did occur. The inverse of the weight at age 
# function can be written as: Together these functions can be used to compute the projected growth of a cohort sampled
# by the fishery in any given week. The projected growth of an squid of size 
# $$ W_{t} $$ to size $$ W_{t+1} $$ in week $ t+1 $ was obtained by first estimating its age in days, then adding 7 days to project it to the next week.  
#  $$ Age_{t} = (W_{t} /\alpha)^{1/\beta} $$, thus the projected weight at time 
#  $ t $ is :  $$ W_{t+1} = \alpha(Age_{t}+ 7 days)^{\beta} $$
# The above equations can be used to compute the projected growth of a squid at 
# time $ t $ over times $ t + 1 $,$ t + 2 $,$ t + 3 $, etc.   


library(contourPlot)
library(TAM)
library(Hmisc)
library(akima)
# Using weight data: 

df <- bwts %>% 
  group_by(year, week) %>% # May want to include stat_area for future analyses? 
 # mutate(weight = round(BODYWT)) %>%
  count(BODYWT)
df <- df %>% 
  rename(freq = n)
alpha<-1.12e-6
beta<-3.60

df$age.est<-round(df$BODYWT/alpha)^(1/beta)
# 
# sampfreq.by.week.yr<-tapply(df$freq, list(df$year,df$week), sum)
# 
# wtfreq.by.week.yr<-tapply(df$freq[df$year==2021], 
#                           list(df$BODYWT[df$year==2021],
#                                df$week[df$year==2021]), sum)
# agefreq.by.week.yr<-tapply(df$freq[df$year==2021], 
#                            list(df$age.est[df$year==2021],
#                                 df$week[df$year==2021]), sum)



# contour plots for Weight

nx<-sum(range(bwts$week))

nx<-32 # number of weeks
ny<-50 # 
# year.seq<-c(min(bwts$year):max(bwts$year)), this avoids 'magic numbers' but 
# since there are gaps in the data it has to be as below
# range(df$BODYWT)
# range(bwts$BODYWT)

year.seq<-c(1997:2006, 2010:2019, 2021:2022)
for (i in year.seq){
  tmp <- df %>% filter(year == i)
  yy<-interp(tmp$week,
             tmp$BODYWT,
             tmp$freq,
             xo=seq(20,45, length=nx),
             yo=seq(50,250,length=ny),
             nx=nx, ny=ny, linear=FALSE,extrap=FALSE, duplicate ="mean")
  image(yy, main=paste("Weight prediction surface, year= ",i),
        xlab="Week of Year", ylab="Weight (g) of Sample")
  # find first week greater than or equal to 20. 
  min.wk <- tmp$week[which(tmp$week>=20)[1]]
  wgt.est.quantiles<-wtd.quantile(df$BODYWT[df$year==i & df$week==min.wk],
                                  w=df$freq[df$year==i & df$week==min.wk], 
                                  probs=c(0.05,0.1,0.9))
  
  # compute vectors of projections of computed weights for the 10%ile and 90%ile of initial weight 
  x.week=seq(from=min.wk, to= 45)
  # plot the growth trajectory for 10th percentile of weight sample in initial week = min.wk
  age.10wgt<-(wgt.est.quantiles[1]/alpha)^(1/beta)
  proj.wt.10<-alpha* (age.10wgt+ 7*(x.week - min.wk))^beta
  lines(x.week,proj.wt.10, col="red",lty=2, lwd=2)
  
  # plot the growth trajectory for 90th percentile of weight sample in initial week = min.wk
  age.90wgt<-(wgt.est.quantiles[3]/alpha)^(1/beta)
  proj.wt.10<-alpha* (age.90wgt+ 7*(x.week - min.wk))^beta
  lines(x.week,proj.wt.10, col="blue",lty=2, lwd=2)
  contour(yy,add=TRUE)    #,nlevels = 5)
}

Analysis of putative Birth weeks

Compute birth week as function of age at capture and then plot frequency of squid in sample:

# Compute birth week as function of age at capture
#df <- as.data.frame(df)
df$birth.week<-round(df$week-df$age.est/7)  
#  Plot frequency of birth weeks based on frequency of fish in sample.
b.week<-seq(from=min(df$birth.week), to = max(df$birth.week))
for (i in year.seq){
  tmp <- df %>% filter(year == i)
  b.week.table<-tapply(tmp$freq,tmp$birth.week, sum)
 barplot(b.week.table, main=paste("Birthweek frequency for Landings in ", i),
         xlab="Birth Week", ylab="Frequency", col="red")
 }

Adjusting birth week frequencies for natural mortality. Note: This uses an M of 0.06, this may need to be adjusted/updated.

# adjust birth week frequencies for natural mortality
M <- 0.06   # natural mortality per week.   To be addressed later

for (i in year.seq){
  b.week.table<-tapply(tmp$freq* exp(tmp$age.est/7*M),
                       list(tmp$birth.week),
                       sum)
  barplot(b.week.table, main=paste("Birthweek frequency for Landings, adjust for M in ", i),
          xlab="Birth Week", ylab="Frequency", col="blue")
}

Results/Conclusions:

Relative Condition

Condition estimates are often based on length-weight relationships. Below is a plot of the the length-weight relationship between all paired samples from the ILXSM data.

Regression coefficients were estimated from this current data set in effort to compare to length-weight relationships based on 1978 Lange and Johnson study.

# Calculate new coeffients with current data
# run regression
# mod <-lm(log(lw_paired$weight)~log(lw_paired$length/10))
# # Extract coefficients
# coef(mod) # intercept (alpha) and slope (beta) of the line 
# # intercept is -2.300349 and the slope  2.463418 (cm)
# # Back transform alpha
# # e = 2.7182818
# # e^-2.300349 
# exp(-2.300349) 

g = ggplot(lw_paired, aes(x=length, y=weight)) + 
  geom_point() +
  geom_smooth(method=lm) + 
  xlab('Length') + 
  ylab('Weight') +
  ecodata::theme_ts() 

g + theme(axis.text = element_text(size = 10), 
        axis.title = element_text(size = 10),
        axis.ticks.length = unit(-1.4, "mm"))   

# ggplot(lw_paired, aes(x=log(length/10), y=log(weight))) + 
#   geom_point()+
#   geom_smooth(method=lm) + 
#   theme_bw()

Relative condition (Kn) = W/W’, where W’ is the predicted length-specific mean weight for the fish population in a given region.

Regression coefficients used to compute Kn in plots below are based on those identified in 1978 Lange and Johnson study.

# Two types of condition estimates
# -- Kn = W/W' -- 
# W = weight of individual squid, 
# W' = predicted length-specific mean weight for the pop in a given region

# -- Fultons K =  100* W /L^3 -- #

### -- Compute relative condition -- ###
# compute length-weight relationships based on 1978 lange_johnson study: 
lw_paired$W <- 0.04810*(lw_paired$length*.10)^2.71990
lw_paired$Kn <- lw_paired$weight/lw_paired$W
# lw_paired$W21 <- 0.082888*(lw_paired$length*.10)^2.530143 
# lw_paired$W21 <- 0.1002239*(lw_paired$length*.10)^2.463418 ** on full data set

# lw_paired$Kn21 <- lw_paired$weight/lw_paired$W21
lw_paired$fk <- 100 * (lw_paired$weight/(lw_paired$length^3))



g  = ggplot(lw_paired, 
            aes(x = factor(AREA_CODE), y = Kn, fill = as.factor(year)))+ 
  labs(x = 'Statistical Area', y = 'Relative Condition (Kn)',
       title = 'Relative Condition:Kn') +
  theme_classic() +
  guides(fill=guide_legend(title=NULL))+
  geom_hline(yintercept = 1, lty = 2) +
  ylim(0,2) +
  geom_boxplot() + 
  scale_fill_manual(values=c('gold1','slateblue3','violetred1')) +
 ecodata::theme_ts()

g + theme(axis.text = element_text(size = 10), 
        axis.title = element_text(size = 10),
        axis.ticks.length = unit(-1.4, "mm"))   

Relative condition by vessel hold type across years.

‘Freeze’ represents freezer trawlers and ‘Wet’ represents vessels with refrigerated sea water or ice holds.

## ------ NOTE -------- ##
# Look at relative condition by week or month and by fleet 
# For some of the fleet info - there are NA's 
# unique(lw_paired$fleet)
# Remove NAs: 
check = na.omit(lw_paired)

g = ggplot(check, 
            aes(x = factor(fleet), y = Kn, fill = as.factor(year)))+ 
  labs(x = 'Hold Type', y = 'Relative Condition (Kn)',
       title = 'Relative Condition:Kn') +
  theme_classic() +
  guides(fill=guide_legend(title=NULL))+
  geom_hline(yintercept = 1, lty = 2) +
  ylim(0,2) +
  geom_boxplot() + 
   scale_fill_manual(values=c('gold1','slateblue3','violetred1')) +
  ecodata::theme_ts()

g + theme(axis.text = element_text(size = 10), 
        axis.title = element_text(size = 10),
        axis.ticks.length = unit(-1.4, "mm"))   

Relative condition inside and outside of the fishing season across years:

  • The first plot is during the fishing season window, here estimated as weeks 20 through 40:
  • The second plot denotes weeks occurring outside the typical fishing season
g1 = ggplot(lw_paired %>% filter(week %in% c(20:40)), 
            aes(x = factor(week), y = Kn, fill = as.factor(year)))+ 
  labs(x = 'Week of Year', y = 'Relative Condition (Kn)',
       title = 'Fishing Season - Relative Condition:Kn') +
  theme_classic() +
  guides(fill=guide_legend(title=NULL))+
  geom_hline(yintercept = 1, lty = 2) +
  ylim(0,2) +
  geom_boxplot() + 
  scale_fill_manual(values=c('gold1','slateblue3','violetred1')) +
  ecodata::theme_ts()

g1 + theme(axis.text = element_text(size = 10), 
        axis.title = element_text(size = 10),
        axis.ticks.length = unit(-1.4, "mm"))   

g2 = ggplot(lw_paired %>% filter(week %in% c(2:19, 41:51)), 
            aes(x = factor(week), y = Kn, fill = as.factor(year)))+ 
  labs(x = 'Week of Year', y = 'Relative Condition (Kn)',
       title = 'Non-fishing Season - Relative Condition:Kn') +
  theme_classic() +
  guides(fill=guide_legend(title=NULL))+
  geom_hline(yintercept = 1, lty = 2) +
  ylim(0,2) +
  geom_boxplot() + 
   scale_fill_manual(values=c('gold1','slateblue3','violetred1')) +
  ecodata::theme_ts()

g2 + theme(axis.text = element_text(size = 10), 
        axis.title = element_text(size = 10),
        axis.ticks.length = unit(-1.4, "mm"))   

# # Here I used mean weight of population (not length specific)
# g0 = ggplot(dat, 
#             aes(x = factor(AREA_CODE), y = pop_kn, fill = as.factor(year)))+ 
#   labs(x = 'Statistical Area', y = 'Relative Condition (Kn)',
#        title = 'Relative Condition:',
#        subtitle = 'Based on mean weight of population') +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_hline(yintercept = 1, lty = 2) +
#   ylim(0,2) +
#   geom_boxplot()
# ggsave(path = here::here('results'),"Relative_condition_by_stat_area.jpg", 
#        width = 8, height = 3.75, units = "in", dpi = 300)
# library(patchwork) 
# g1 = ggplot(dat.check, 
#        aes(x = factor(AREA_CODE), y = Kn, fill = as.factor(year)))+ 
#   labs(x = 'Statistical Area', y = 'Relative Condition (Kn)',
#        title = 'Relative Condition (Kn):',
#        subtitle = 'Based on 1970s regression coefficients') +
#   theme_classic() +
#   guides(fill='none')+
#   #guides(fill=guide_legend(title=NULL))+
#   geom_hline(yintercept = 1, lty = 2) +
#   ylim(0,2) +
#   geom_boxplot() # %>% filter(Kn <= 4.5)
# g2 = ggplot(dat.check %>% filter(Kn.updated <= 6), 
#        aes(x = factor(AREA_CODE), y = Kn.updated, fill = as.factor(year)))+ 
#   labs(x = 'Statistical Area', y = '',
#        title = 'Relative Condition (Kn):',
#        subtitle = 'Based on 2021/22 regression coefficients') +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_hline(yintercept = 1, lty = 2) +
#   geom_boxplot()
# ## Note - i changed the filter and/or ylim to play with scale, remove outliers 
# 
# g1 + g2
# ggsave(path = here::here('results'),"kn_1970s_vs_kn_2122.jpg", 
#        width = 10, height = 6, units = "in", dpi = 300)
# g1 + g0  
# ggsave(path = here::here('results'),"kn_vs_mean_condition_by_stat_area.jpg", 
#        width = 10, height = 6, units = "in", dpi = 300)
# g1 + g0  # with ylim(0,2)
# ggsave(path = here::here('results'),"kn_vs_mean_condition_by_stat_area_zoom.jpg", 
#        width = 10, height = 5, units = "in", dpi = 300)
# 
# # Here I used length-specific weight of population 
# ggplot(dat, aes(x = factor(AREA_CODE), y = Kn, fill = as.factor(year)))+ 
#   labs(x = 'Statistical Area', y = 'Relative Condition (Kn)',
#        title = '2021 vs 2022') +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_hline(yintercept = 1, lty = 2) +
#   geom_boxplot()
# # remove 626 as outlier and other outliers
# #  2021 622 and 526
# sa526 <- df21 %>% filter(AREA_CODE == 526)
# sa622 <- df21 %>% filter(AREA_CODE == 622)
# max(sa622$weights)
# max(sa526$weights)
# ggplot(dat %>% filter(AREA_CODE %in% c(622, 537, 632, 616, 526, 541, 623,
#                                          525, 600, 621, 615, 562)),
#        aes(x = factor(AREA_CODE), y = Kn, fill = as.factor(year)))+ 
#   labs(x = 'Statistical Area', y = 'Relative Condition (Kn)',
#        title = '2021 vs 2022') +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_hline(yintercept = 1, lty = 2) +
#   geom_boxplot()
# ggplot(dat %>% filter(AREA_CODE %in% c(622, 537, 632, 616, 526, 541, 623,
#                                        525, 600, 621, 615, 562) &
#                         weights < 100),
#        aes(x = factor(AREA_CODE), y = Kn, fill = as.factor(year)))+ 
#   labs(x = 'Statistical Area', y = 'Relative Condition (Kn)',
#        title = '2021 vs 2022') +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_hline(yintercept = 1, lty = 2) +
#   geom_boxplot()
# ggsave(path = here::here('results'),"Relative_condition_Kn_no_outlier_by_stat_area.jpg", 
#        width = 8, height = 3.75, units = "in", dpi = 300)
# 
# # 2021 boxplot of condition
# ggplot(df21, aes(x = factor(AREA_CODE), y = cond2, fill = as.factor(AREA_CODE)))+ 
#   labs(x = 'Statistical Area', y = 'Condition',
#        title = '2021') +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_hline(yintercept = 1, lty = 2) +
#   geom_boxplot()
# # 2022 boxplot of condition
# ggplot(df22, aes(x = factor(AREA_CODE), y = cond2, fill = as.factor(AREA_CODE)))+ 
#   labs(x = 'Statistical Area', y = 'Condition (k)',
#        title = '2022') +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_hline(yintercept = 1, lty = 2) +
#   geom_boxplot()
# # Year specific condition
# ggplot(df22, aes(x = factor(AREA_CODE), y = cond, fill = as.factor(AREA_CODE)))+ 
#   labs(x = 'Statistical Area', y = 'Condition (k)',
#        title = '2022') +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_hline(yintercept = 1, lty = 2) +
#   geom_boxplot()
# 
# ggplot(df21, aes(x = factor(AREA_CODE), y = fk, fill = as.factor(AREA_CODE)))+ 
#   labs(x = 'Statistical Area', y = 'Fulton Body Condition (k)',
#        title = 'Fleet: Wet boats',
#        subtitle = 'Lengths') +
#   #scale_fill_manual(values = c("#F1BB7B","#FD6467")) +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_boxplot()
# 
# ggplot(df21, aes(x = factor(hold_type), y = fk, fill = as.factor(hold_type)))+ 
#   labs(x = 'Statistical Area', y = 'Fulton Body Condition (k)',
#        title = 'Fleet: Wet boats') +
#   #scale_fill_manual(values = c("#F1BB7B","#FD6467")) +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_boxplot()
# 
# ggplot(df21 %>% filter(week %in% c(25:26)), 
#        aes(x = factor(week), y = fk, fill = as.factor(hold_type)))+ 
#   labs(x = 'Statistical Area', y = 'Fulton Body Condition (k)') +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_boxplot()
# 
# ggplot(df21 %>% filter(hold_type %in% c('Ice', 'FT')), 
#        aes(x = factor(week), y = fk, fill = as.factor(hold_type)))+ 
#   labs(x = 'Statistical Area', y = 'Fulton Body Condition (k)') +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_boxplot()
# 
# ggplot(df21 %>% filter(AREA_CODE %in% c(622, 537, 632, 616, 526, 541, 623,
#                                         525, 600, 621, 615, 562)), 
#        aes(x = factor(AREA_CODE), y = fk, fill = as.factor(hold_type)))+ 
#   labs(x = 'Statistical Area', y = 'Fulton Body Condition (k)') +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_boxplot()
# 
# # small: <80 mm ML; medium: 80-120 mm ML; large: >120 mm ML
# ggplot(df21 %>% filter(lengths <= 80), 
#        aes(x = factor(AREA_CODE), y = fk, fill = as.factor(AREA_CODE)))+ 
#   labs(x = 'Statistical Area', y = 'Fulton Body Condition (k)', 
#        title = 'Small') +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_boxplot()
# 
# ggplot(df21 %>% filter(lengths <= 80), 
#        aes(x = factor(AREA_CODE), y = fk, fill = as.factor(AREA_CODE)))+ 
#   labs(x = 'Statistical Area', y = 'Fulton Body Condition (k)', 
#        title = 'Small') +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_boxplot()
# #80- 120 interesting, <200 more like large
# ggplot(df21 %>% filter(lengths > 80 & lengths < 200), 
#        aes(x = factor(AREA_CODE), y = fk, fill = as.factor(AREA_CODE)))+ 
#   labs(x = 'Statistical Area', y = 'Fulton Body Condition (k)', 
#        title = 'Medium') +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_boxplot()
# 
# ggplot(df21 %>% filter(lengths > 200), 
#        aes(x = factor(AREA_CODE), y = fk, fill = as.factor(AREA_CODE)))+ 
#   labs(x = 'Statistical Area', y = 'Fulton Body Condition (k)', 
#        title = 'Large') +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_boxplot()
# 
# ggplot(df21 %>% filter(lengths <= 80), 
#        aes(x = factor(AREA_CODE), y = fk, fill = as.factor(AREA_CODE)))+ 
#   labs(x = 'Statistical Area', y = 'Fulton Body Condition (k)', 
#        title = 'Small') +
#   theme_classic() +
#   guides(fill=guide_legend(title=NULL))+
#   geom_boxplot()
# 
# ggplot(k21 %>% filter(hold_type %in% c('RSW', 'Ice') & PARAM_VALUE_NUM <= 500), 
#        aes(x = factor(vessel_id), y = PARAM_VALUE_NUM, fill = as.factor(year)))+ 
#   labs(x = 'Vessel', y = 'Mantle length (mm)', 
#        title = 'Fleet: Wet boats', 
#        subtitle = 'Lengths') +
#   scale_fill_manual(values = c("#F1BB7B","#FD6467")) +
#   theme_classic() + 
#   guides(fill=guide_legend(title=NULL))+
#   geom_boxplot()