Skip to content

Commit

Permalink
Added some documentaition and test for strict option for frame epansion
Browse files Browse the repository at this point in the history
  • Loading branch information
Edvin Fuglebakk authored and Edvin Fuglebakk committed Dec 30, 2024
1 parent a8ecf27 commit aa77c69
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 8 deletions.
23 changes: 18 additions & 5 deletions R/StoxAnalyticalBaselineFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -1584,9 +1584,17 @@ AnalyticalRatioEstimate <- function(AnalyticalPopulationEstimateData, StoxLandin
#' Extends estimate beyond sampling frame
#' @description
#' Infer estimates to parts of the fishery / target population that was not sampled.
#' That is strata not covered by the sampling frame and domains that did not get sampled, but are known to be populated in census data.
#' Landing data is taken to be census.
#' @details
#' Inference can be done in several ways, controlled by the argument 'Method'
#' This function only infers precence of domains and strata from census-data, and provide options for replacing missing estimates (NA) with some pragmatic approximations.
#' Any estimates already provided is not changed. Domains that are not sampled are in design-based estimation imlied to have estimates of 0 abundance, total and frequencies, and hence unkown means.
#' Corresponding variances are also zero. This function encodes unsampled domains excplitly, and may therefore introduce some estimated values that
#' was not explicitly provided in the input. It does not introduce landed weights, or other knowledge from landings,
#' except for the fact that domains and strata are present in the landings data. All inference about unkown values are inferred from the provided estimates ('AnalyticalPopulationEstimateData').
#' Subsequent ratio-estimation may make use of this information to also make use of total-weight information from landings (see \code{link[RstoxFDA]{AnalyticalRatioEstimate}}).
#'
#' Inference about unkown values can be done in several ways, controlled by the argument 'Method'
#' \describe{
#' \item{Strict}{Infer NA for means and frequencies for unsampled domains in the sampling frame.
#' Provide NA-values for all parameters of all domains that is not in the sampling frame.}
Expand Down Expand Up @@ -1632,15 +1640,15 @@ ExtendAnalyticalSamplingFrame <- function(AnalyticalPopulationEstimateData, Stox
StratificationVariables <- names(AnalyticalPopulationEstimateData$StratificationVariables)[names(AnalyticalPopulationEstimateData$StratificationVariables) %in% LandingPartition]
DomainVariables <- names(AnalyticalPopulationEstimateData$DomainVariables)[names(AnalyticalPopulationEstimateData$DomainVariables) %in% LandingPartition]

# Add domains with 0 abundance and total, and NA mean for each unsampled variable that is a domain variable
# Add domains with 0 abundance, frequency, and total, and NA mean for each unsampled variable that is a domain variable

if (length(DomainVariables)>0){

#
# Construct unsampled domains that need to be filled to match landings
#
landeddomains <- StoxLandingData$Landing[,.SD,.SDcol=DomainVariables]
landeddomains <- landeddomains[!duplicated(apply(landeddomains, 1, paste, collapes=",")),]
landeddomains <- landeddomains[!duplicated(apply(landeddomains, 1, paste, collapse=",")),]
sampledfractionaldomains <- AnalyticalPopulationEstimateData$DomainVariables[,.SD,.SDcol=DomainVariables]
unsampledfractionaldomains <- landeddomains[!(apply(landeddomains, 1, paste, collapse=",")
%in% apply(sampledfractionaldomains, 1, paste, collapse=",")),]
Expand Down Expand Up @@ -1689,7 +1697,7 @@ ExtendAnalyticalSamplingFrame <- function(AnalyticalPopulationEstimateData, Stox

additionalAbundanceCovariance <- merge(data.table::CJ(Stratum=unique(AnalyticalPopulationEstimateData$StratificationVariables$Stratum),
Domain1=unique(AnalyticalPopulationEstimateData$DomainVariables$Domain)),
cross, by=c("Domain1"))
cross, by=c("Domain1"), allow.cartesian=T)
additionalAbundanceCovariance <- additionalAbundanceCovariance[!(paste(additionalAbundanceCovariance$Stratum,
additionalAbundanceCovariance$Domain1,
additionalAbundanceCovariance$Domain2) %in%
Expand Down Expand Up @@ -1731,7 +1739,7 @@ ExtendAnalyticalSamplingFrame <- function(AnalyticalPopulationEstimateData, Stox
# Add new strata for not sampled
if (length(StratificationVariables)>0){
landedpart <- StoxLandingData$Landing[,.SD,.SDcol=StratificationVariables]
landedpart <- landedpart[!duplicated(apply(landedpart, 1, paste, collapes=",")),]
landedpart <- landedpart[!duplicated(apply(landedpart, 1, paste, collapse=",")),]
sampleframepart <- AnalyticalPopulationEstimateData$StratificationVariables[,.SD,.SDcol=StratificationVariables]
unsampledpart <- landedpart[!(apply(landedpart, 1, paste, collapse=",") %in% apply(sampleframepart, 1, paste, collapse=",")),]
unsampledpart$Stratum <- UnsampledStratum
Expand All @@ -1747,6 +1755,10 @@ ExtendAnalyticalSamplingFrame <- function(AnalyticalPopulationEstimateData, Stox
PSURelativeDomainSize=as.numeric(NA)))
}

#
# Infer values for some unkowns (NA or NaN)
#

if (Method == "Strict"){
newAbundance <- data.table::CJ(Stratum=unique(AnalyticalPopulationEstimateData$StratificationVariables$Stratum),
Domain=unique(AnalyticalPopulationEstimateData$DomainVariables$Domain))
Expand Down Expand Up @@ -1787,6 +1799,7 @@ ExtendAnalyticalSamplingFrame <- function(AnalyticalPopulationEstimateData, Stox
}

if (Method == "GrandMean"){
browser()
stop("Not implemented")
# if GrandMean: put in overall frequecny for domains and overall mean for all variables
}
Expand Down
52 changes: 49 additions & 3 deletions inst/tinytest/test-StoxAnalyticalBaselineFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -588,7 +588,6 @@ expect_equal(sum(expandedPopEst$Abundance$Abundance,na.rm=T), sum(popEst$Abundan
# Test expanding along a single domain variable
land <- RstoxFDA::CatchLotteryLandingExample
stationDesign <- RstoxFDA::CatchLotterySamplingExample
stationDesign$StratificationVariables$Frame <- "Sampling frame"
ex <- RstoxFDA::CatchLotteryExample
ex$Haul$Gear[ex$Haul$Gear=="3600"] <- "53"
ex$Haul$Gear[ex$Haul$Gear=="3700"] <- "53"
Expand All @@ -610,9 +609,56 @@ expect_equal(nrow(expandedPopEst$StratificationVariables), nrow(popEst$Stratific
expect_equal(sum(expandedPopEst$Abundance$Abundance,na.rm=T), sum(popEst$Abundance$Abundance))

# Test expanding along several variables for both domain and stratification
#warning("Add test along several domain and stratification variables")

# Test Grand mean option
#add stratification columns to landing
land <- RstoxFDA::CatchLotteryLandingExample
land$Landing$FrameVar1 <- "Sampling frame 1"
land$Landing$FrameVar1[1:300] <- "OutOfFrame 1"
land$Landing$FrameVar2 <- "Sampling frame 2-1"
land$Landing$FrameVar2[500:1000] <- "Sampling frame 2-2"
land$Landing$FrameVar2[1:150] <- "OutOfFrame 2-1"
land$Landing$FrameVar2[150:300] <- "OutOfFrame 2-2"

#add stratification columns to sampling design
stationDesign <- RstoxFDA::CatchLotterySamplingExample
stationDesign$StratificationVariables$FrameVar1 <- "Sampling frame"
stationDesign$StratificationVariables$FrameVar2 <- "Sampling frame 2-1"
stationDesign$StratificationVariables <- rbind(stationDesign$StratificationVariables,
data.table::data.table(Stratum="Nordsjo2", CountryVessel="NOR", FrameVar1="Sampling frame", FrameVar2="Sampling frame 2-2"))
stationDesign$SelectionTable$Stratum[1:10] <- "Nordsjo2"
stationDesign$SampleTable$N <- stationDesign$SampleTable$N - 200
stationDesign$SampleTable$n <- stationDesign$SampleTable$n - 10
stationDesign$SampleTable <- rbind(stationDesign$SampleTable, data.table::data.table(Stratum="Nordsjo2", N=200, n=10, SelectionMethod="Poisson", FrameDescription=""))

ex <- RstoxFDA::CatchLotteryExample
ex$Haul$Gear[ex$Haul$Gear=="3600"] <- "53"
ex$Haul$Gear[ex$Haul$Gear=="3700"] <- "53"
ex$Haul$Gear[ex$Haul$Gear=="3100"] <- "53"
ex$Haul$Gear[ex$Haul$Gear=="3500"] <- "11"
ex$Haul$Usage <- "2"
ex$Haul$Usage[1:2] <- "1"

stationDesign <- RstoxFDA::AssignPSUSamplingParameters(stationDesign, ex, "serialnumber", "Haul", "MissingAtRandom")
srs <- RstoxFDA:::ComputeIndividualSamplingParameters(ex, "SRS", c("IndividualAge"))
psuEst <- RstoxFDA:::AnalyticalPSUEstimate(ex, srs, c("IndividualRoundWeight", "IndividualTotalLength"), c("IndividualAge"), c("Gear", "Usage"))
popEst <- RstoxFDA:::AnalyticalPopulationEstimate(stationDesign, psuEst)

expandedPopEst <- RstoxFDA:::ExtendAnalyticalSamplingFrame(popEst, land, c("FrameVar1", "FrameVar2", "Gear", "Usage"), "Strict", "Unsampled")
expect_equal(nrow(expandedPopEst$SampleSummary), nrow(popEst$SampleSummary)+1)
expect_true(nrow(expandedPopEst$Variables)>nrow(popEst$Variables))
expect_true(nrow(expandedPopEst$Abundance)>nrow(popEst$Abundance))
expect_true(nrow(expandedPopEst$AbundanceCovariance)> nrow(popEst$Abundance))
expect_equal(nrow(expandedPopEst$VariablesCovariance), nrow(expandedPopEst$AbundanceCovariance)*3) #three variable combinations
expect_true(nrow(expandedPopEst$DomainVariables)>nrow(popEst$DomainVariables))
expect_true(nrow(expandedPopEst$StratificationVariables)>nrow(popEst$StratificationVariables))
expect_equal(length(unique(expandedPopEst$StratificationVariables$Stratum)), length(unique(popEst$StratificationVariables$Stratum))+1)
expect_equal(sum(expandedPopEst$Abundance$Abundance,na.rm=T), sum(popEst$Abundance$Abundance))


#
# Test Grand mean option for frame expansion
#
#browser()
#warning("Add test for Grand Mean, expanding along several domain and stratification variables")

#stop("Test collapseStrata with both HH and HT")
Expand Down

0 comments on commit aa77c69

Please sign in to comment.