heatStressFA.R

require(raster)
require(psych)
require(here)

names <- c("Tmax95_past", "VP95_past", "Tmin95_past", "DD66_past")
#names <- c("Tmax95_past", "VP95_past", "DD66_past")

## Set seed for reproducibility
set.seed(1337)

## Read in netcdf layers as single column dataframes
## NB "here" calls from top layer of current project, so run script from within heatStress shared folder.
for (i in names){
  data  <- raster("heat_hazards.nc", varname = i)
  name <- paste0(i)
  assign(name, as.data.frame(values(data)))
}

## combine dataframes and rename columns
df <- na.omit(data.frame(Tmax95_past, VP95_past, Tmin95_past, DD66_past))
#df <- na.omit(data.frame(Tmax95_past, VP95_past, DD66_past))
colnames(df) <- names

## Generate raw correlation matrix
HScor <- cor(df)

## Run parallel analysis for appropriate component/factor N
PA.HS = fa.parallel(df, fm="wls", n.iter=50, quant=.95, fa="both") 
save(PA.HS, file = "PA.HS.Rdata")
eigen(HScor)$values                   #Eigenvalues of data from eigen function
PA.HS$pc.values                       #Equivalent results from principal components PA 
PA.HS$fa.values                       #Eigenvalues of common factor model

#Indicates 2-factor solution if criterion > random noise, or single factor if EV>1 criterion. For our purposes, 1-fac is best.

## Single component/factor solutions 
pc <-  principal(df, 1, residuals=T)
mr <-         fa(df, 1)

## Compare loadings
loadings.1fac <- cbind(round(pc$loadings,2), round(mr$loadings,2))
loadings.1fac

## Compare component loadings with latent scores (equivalent to loadings as no uniqueness)
round(cor(df, pc$scores),2)

## Compare factor loadings with factor scores
round(cor(df, mr$scores),2)

## Specify orthogonal 2-component Principal Component, Minres FA (principal axis and WLS are identical solutions)
## Oblique rotation - allows rotated factors to be correlated
pc2 <- principal(df, 2, rotate = "oblimin", residuals=T)
mr2 <-        fa(df, 2, rotate = "oblimin")
## Orthogonal rotation - constrains rotated factors to be uncorrelated
pc3 <- principal(df, 2, rotate = "varimax", residuals=T)
mr3 <-        fa(df, 2, rotate = "varimax")


## For 2 latent solutions compare factor congruence
round(factor.congruence(list(pc2, mr2)),2)

## compare loadings across 2 factor solutions (oblique) 
loadings.2fac <- cbind(round(pc2$loadings, 2), round(mr2$loadings, 2))
loadings.2fac

## compare loadings across 2 factor solutions (orthogonal)
loadings.3fac <- cbind(round(pc3$loadings, 2), round(mr3$loadings, 2))
loadings.3fac

## Of all solutions - my prior is that mr2 gives the most efficient summary of underlying dimensions, as rotated solutions 
## are clearly going to be correlated.
## HOWEVER, the variables are all derived from common climate sources, so there is probably an argument to be made 
## for there being "no measurement error", depending on whether you want to treat the climate output as "with error" or not.
## I think for our purposes, weights taken from "loadings.1fac$MR1" make the most sense, but bear in mind the following.

## Given measurement error - optimal solution is to select from the following:
# Select from PC1 if want to treat climate model as observation without measurement error
# Select from MR1 if want to treat climate model as observation with unique variances for each element.
loadings.1fac