ZoopCOG.Rmd

---
title: "ZoopCOG"
author: "Sarah Gaichas"
date: "`r Sys.Date()`"
output:
  html_document:
    code_fold: hide
link-citations: yes
csl: "canadian-journal-of-fisheries-and-aquatic-sciences.csl"
bibliography: zoopindex.bib
urlcolor: blue

---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE,
                      message = FALSE,
                      warning = FALSE)

library(tidyverse)
library(here)
library(DT)
library(ggiraph)
library(patchwork)
library(FishStatsUtils)
library(VAST)
```

## Pull COG from "best" zooplankton models

See [Copepod Model Results](https://noaa-edab.github.io/zooplanktonindex/CopeModResults.html) for the model stats, time series, and density outputs

These models are for all manner of copepods, plus zooplankton volume and Euphausiids

Functions to get the COG out and plot it with geom_gls trend assessment

```{r}

# put this in a package for petes sake

getcogVAST <- function(d.name){
  
  fit <- VAST::reload_model(readRDS(paste0(d.name,"/fit.rds")))
  
  dir.create(paste0(d.name,"/test"))
  
  cogout <- FishStatsUtils::plot_range_index(Sdreport = fit$parameter_estimates$SD, 
                                             Report = fit$Report, 
                                             TmbData = fit$data_list,
                                             year_labels = as.numeric(fit$year_labels),
                                             years_to_plot = fit$years_to_plot,
                                             Znames = colnames(fit$data_list$Z_xm),
                                             PlotDir = paste0(d.name,"/test")) #already have plots, will delete this directory
  
  saveRDS(cogout, paste0(d.name,"/cogout.rds"))
  
  unlink(paste0(d.name,"/test"), recursive=TRUE) #removes directory with unneeded plots
  
}

plotcog <- function(d.name){
  
  cogout <- readRDS(paste0(d.name,"/cogout.rds"))
  modpath <- unlist(str_split(d.name, pattern = "/"))
  modname <- modpath[length(modpath)]
  
  cogdat <- as.data.frame(cogout$COG_Table) |>
    dplyr::mutate(direction = ifelse(m==1, "Eastward", "Northward"))
  
  ggplot2::ggplot(cogdat, ggplot2::aes(x = Year, y = COG_hat)) + 
    ggplot2::geom_point() + 
    ecodata::geom_gls() + 
    ggplot2::labs(y = "Center of gravity, km") +
    ggplot2::facet_wrap(~direction, scales = "free_y") + 
    ecodata::theme_facet()+
    ggplot2::ggtitle(modname)
}


```

Functions to get the "best" model directories and apply the function to them

```{r}
modtable <- function(moddirs){
  
  # apply getmodinfo function to inout directories
  modcompare <- purrr::map_dfr(moddirs, getmodinfo)
  
  modselect <- modcompare |>
    dplyr::mutate(season = dplyr::case_when(stringr::str_detect(modname, "_fall_") ~ "Fall",
                                            stringr::str_detect(modname, "spring") ~ "Spring",
                                            stringr::str_detect(modname, "_all_") ~ "Annual",
                                            TRUE ~ as.character(NA))) |>
    dplyr::mutate(converged2 = dplyr::case_when(stringr::str_detect(converged, "no evidence") ~ "likely",
                                                stringr::str_detect(converged, "is likely not") ~ "unlikely",
                                                TRUE ~ as.character(NA))) |>
    dplyr::mutate(copegroup = stringr::str_extract(modname, "[^_]+")) |>
    #dplyr::mutate(modname = str_extract(modname, '(?<=allagg_).*')) |>
    dplyr::group_by(copegroup, season) |>
    dplyr::mutate(deltaAIC = AIC-min(AIC)) |>
    dplyr::select(copegroup, modname, season, deltaAIC, fixedcoeff,
                  randomcoeff, use_anisotropy, 
                  omega1, omega2, epsilon1, epsilon2, 
                  beta1, beta2, AIC, converged2) |>
    dplyr::arrange(copegroup, season, AIC)
  
  return(modselect)
}


# function to apply extracting info
getmodinfo <- function(d.name){
  # read settings
  modpath <- stringr::str_split(d.name, "/", simplify = TRUE)
  modname <- modpath[length(modpath)]
  
  settings <- read.table(file.path(d.name, "settings.txt"), comment.char = "",
                         fill = TRUE, header = FALSE)
  
  n_x <- as.numeric(as.character(settings[(which(settings[,1]=="$n_x")+1),2]))
  grid_size_km <- as.numeric(as.character(settings[(which(settings[,1]=="$grid_size_km")+1),2]))
  max_cells <- as.numeric(as.character(settings[(which(settings[,1]=="$max_cells")+1),2]))
  use_anisotropy <- as.character(settings[(which(settings[,1]=="$use_anisotropy")+1),2])
  fine_scale <- as.character(settings[(which(settings[,1]=="$fine_scale")+1),2])
  bias.correct <- as.character(settings[(which(settings[,1]=="$bias.correct")+1),2])
  
  #FieldConfig
  if(settings[(which(settings[,1]=="$FieldConfig")+1),1]=="Component_1"){
    omega1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+2),2])
    omega2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+3),1])
    epsilon1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+4),2])
    epsilon2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+5),1])
    beta1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+6),2])
    beta2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+7),1])
  }
  
  if(settings[(which(settings[,1]=="$FieldConfig")+1),1]=="Omega1"){
    omega1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+3),1])
    omega2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+4),1])
    epsilon1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+3),2])
    epsilon2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+4),2])
    beta1 <- "IID"
    beta2 <- "IID"
  }
  
  
  #RhoConfig
  rho_beta1 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+3),1]))
  rho_beta2 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+3),2]))
  rho_epsilon1 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+4),1]))
  rho_epsilon2 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+4),2]))
  
  # read parameter estimates, object is called parameter_Estimates
  if(file.exists(file.path(d.name, "parameter_estimates.RData"))) {
    load(file.path(d.name, "parameter_estimates.RData"))
    
    AIC <- parameter_estimates$AIC[1]  
    converged <- parameter_estimates$Convergence_check[1]
    fixedcoeff <- unname(parameter_estimates$number_of_coefficients[2])
    randomcoeff <- unname(parameter_estimates$number_of_coefficients[3])
    
  }else if(file.exists(file.path(d.name, "parameter_estimates.txt"))){
    
    reptext <- readLines(file.path(d.name, "parameter_estimates.txt"))
    
    AIC <- as.double(reptext[grep(reptext, pattern = "AIC")+1])
    converged <- reptext[grep(reptext, pattern = "Convergence_check")+1]
    fixedcoeff <- as.integer(stringr::str_split(reptext[grep(reptext, pattern = "number_of_coefficients")+2], 
                                                boundary("word"))[[1]][2])
    randomcoeff <- as.integer(stringr::str_split(reptext[grep(reptext, pattern = "number_of_coefficients")+2], 
                                                 boundary("word"))[[1]][3])
    
  }else{
    
    AIC <- NA_real_
    converged <- NA_character_
    fixedcoeff <- NA_integer_
    randomcoeff <- NA_integer_
  }
  
  
  #index <- read.csv(file.path(d.name, "Index.csv"))
  
  
  # return model attributes as a dataframe
  out <- data.frame(modname = modname,
                    n_x = n_x,
                    grid_size_km = grid_size_km,
                    max_cells = max_cells,
                    use_anisotropy = use_anisotropy,
                    fine_scale =  fine_scale,
                    bias.correct = bias.correct,
                    omega1 = omega1,
                    omega2 = omega2,
                    epsilon1 = epsilon1,
                    epsilon2 = epsilon2,
                    beta1 = beta1,
                    beta2 = beta2,
                    rho_epsilon1 = rho_epsilon1,
                    rho_epsilon2 = rho_epsilon2,
                    rho_beta1 = rho_beta1,
                    rho_beta2 = rho_beta2,
                    AIC = AIC,
                    converged = converged,
                    fixedcoeff = fixedcoeff,
                    randomcoeff = randomcoeff#,
                    #index = index
  )
  
  return(out)
  
}

# function to apply extracting info
getmodindex <- function(d.name){
  # read settings
  modpath <- stringr::str_split(d.name, "/", simplify = TRUE)
  modname <- modpath[length(modpath)]
  
  if(file.exists(file.path(d.name,"Index.csv"))){
    index <- read.csv(file.path(d.name, "Index.csv"))
  }else{
    stopifnot()
  }
  # return model indices as a dataframe
  out <- data.frame(modname = modname,
                    index
  )
  
  return(out)
}


```

Apply all that

```{r}

# from each output folder in pyindex, 
outdir <- here::here("pyindex")
moddirs <- list.dirs(outdir) 
moddirs <- moddirs[-1]

modselect <- modtable(moddirs)

# lets only look at indices for converged models
modcompare_conv <- modselect |>
  dplyr::ungroup() |>
  dplyr::filter(converged2 == "likely") |>
  dplyr::select(modname) |>
  as.vector() |>
  unname() |>
  unlist()

# or only converged and best AIC
modcompare_best <- modselect |>
  dplyr::ungroup() |>
  dplyr::filter(converged2 == "likely") |>
  dplyr::distinct(copegroup, season, .keep_all = T) |> # assumes orderd by dAIC!
  dplyr::select(modname) |>
  as.vector() |>
  unname() |>
  unlist()

moddirs_conv <-  moddirs[grepl(sprintf("\\.*(%s)$", paste(modcompare_conv, collapse = '|')), moddirs)]

moddirs_best <-  moddirs[grepl(sprintf("\\.*(%s)$", paste(modcompare_best, collapse = '|')), moddirs)]
```

```{r, eval=FALSE}
purrr::map(moddirs_best, getcogVAST)

```

## Plot the centers of gravity by index

Small copepods in fall trending northeast similar to forage fish and aggregated survey species.
Small copepods in spring trending north.
Large copepods in fall trending west, similar to benthos.
No significant change for Calfin, Euphausiids, Zooplankton volume.

```{r}
purrr::map(moddirs_best, plotcog)
```

## Make the inputs for the SOE 

from the "best" models for each season taxon combination.

Time series

```{r}

SOEinputs <- function(d.name) { #, stratlook) {
  
  
  # out of time to get this workign
  # keep getting pmap error about x and y not the same source
  # hard code it for SOE indices
  
  #stratlook <- mget(stratlook)
  
    # standard zoop strata for SOEs
  stratlook <- data.frame(Stratum = c("Stratum_1",
                                      "Stratum_2",
                                      "Stratum_3",
                                      "Stratum_4",
                                      "Stratum_5",
                                      "Stratum_6",
                                      "Stratum_7"),
                          Region  = c("AllEPU",
                                      "her_sp",
                                      "her_fa",
                                      "MAB",
                                      "GB",
                                      "GOM",
                                      "SS"))

  
  modpath <- stringr::str_split(d.name, "/", simplify = TRUE)
  modname <- modpath[length(modpath)]
  outdir <- stringr::str_c(modpath[1:length(modpath)-1], collapse = "/")
  
  infile <- file.path(d.name, "Index.csv")

  season <- stringr::str_split(modname, "_")[[1]][2]
    
  taxa <- stringr::str_split(modname, "_")[[1]][1]
    
  outfile <- file.path(outdir, paste0(season, taxa, "index.rds"))  
  
  splitoutput <- read.csv(infile)
  
  zoopindex <- splitoutput %>%
    left_join(stratlook) %>%
    dplyr::select(Time, 
                  EPU = Region, 
                  "Abundance Index Estimate" = Estimate, 
                  "Abundance Index Estimate SE" = Std..Error.for.Estimate) %>%
    tidyr::pivot_longer(c("Abundance Index Estimate", "Abundance Index Estimate SE"), 
                        names_to = "Var", values_to = "Value") %>%
    dplyr::filter(EPU %in% c("MAB", "GB", "GOM", "AllEPU")) %>%
    dplyr::mutate(Units = "numbers per 100 cu m volume") %>%
    dplyr::select(Time, Var, Value, EPU, Units)
  
  zoopindex$Var <- stringr::str_c(stringr::str_to_title(season), stringr::str_to_title(taxa), zoopindex$Var, sep = " ")
  
  saveRDS(zoopindex, outfile)
  
} 

#   # standard zoop strata for SOEs
#   stratlook <- data.frame(Stratum = c("Stratum_1",
#                                       "Stratum_2",
#                                       "Stratum_3",
#                                       "Stratum_4",
#                                       "Stratum_5",
#                                       "Stratum_6",
#                                       "Stratum_7"),
#                           Region  = c("AllEPU",
#                                       "her_sp",
#                                       "her_fa",
#                                       "MAB",
#                                       "GB",
#                                       "GOM",
#                                       "SS"))
# 
#     # larvarea zoop strata
#     stratlook2 <- data.frame(Stratum = c("Stratum_1",
#                                     "Stratum_2",
#                                     "Stratum_3",
#                                     "Stratum_4",
#                                     "Stratum_5",
#                                     "Stratum_6",
#                                     "Stratum_7",
#                                     "Stratum_8",
#                                     "Stratum_9"),
#                         Region  = c("AllEPU",
#                                     "her_sp",
#                                     "her_fa",
#                                     "her_larv",
#                                     "no_larv",
#                                     "MAB",
#                                     "GB",
#                                     "GOM",
#                                     "SS"))
# 
#     
# name <-  dplyr::case_when(stringr::str_detect(moddirs_best, "larvarea") ~ "stratlook2", 
#                                                  TRUE ~ "stratlook")  
# 
# list(d.name = moddirs_best,
#      stratlook = name) |> purrr::pmap(SOEinputs)


```

Center of gravity

```{r}
SOEinputsCOG <- function(d.name) {
  
  modpath <- stringr::str_split(d.name, "/", simplify = TRUE)
  modname <- modpath[length(modpath)]
  outdir <- stringr::str_c(modpath[1:length(modpath)-1], collapse = "/")
  
  season <- stringr::str_split(modname, "_")[[1]][2]
    
  taxa <- stringr::str_split(modname, "_")[[1]][1]
  
  infile <- file.path(d.name, "cogout.rds")
  
  cogout <- readRDS(infile)

  outfile <- file.path(outdir, paste0(season, taxa, "cog.rds"))  
  
  groupcog <- as.data.frame(cogout$COG_Table) |>
    dplyr::mutate(direction = ifelse(m==1, "Eastward", "Northward")) |>
    dplyr::select("Time" = Year,
                  "Center of Gravity" = COG_hat, 
                  "Center of Gravity SE" = SE,
                  direction) |>
    tidyr::pivot_longer(c("Center of Gravity", "Center of Gravity SE"), 
                        names_to = "Var", values_to = "Value") |>
    #direction into Var
    tidyr::unite(Var, direction:Var, sep = " ") |>
    dplyr::mutate(Units = "km",
                  EPU = "ALLEPU") |>
    dplyr::select(Time, Var, Value, EPU, Units)
  
  groupcog$Var <- stringr::str_c(stringr::str_to_title(season), stringr::str_to_title(taxa), groupcog$Var, sep = " ")
  
  #readr::write_csv(benthosindex, outfile)
  saveRDS(groupcog, outfile)
  
  
}


```

```{r, eval=FALSE}
# we don't want the herring larval area run in the SOE, nor do we want herring larvae models

moddirs_SOE <- moddirs_best[!stringr::str_detect(moddirs_best, "larva")]

purrr::map(moddirs_SOE, SOEinputs)

purrr::map(moddirs_SOE, SOEinputsCOG)

```