Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Catch lottery stox4 #166

Draft
wants to merge 4 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ export(AddGearGroupStoxBiotic)
export(AddGearGroupStoxLanding)
export(AddPeriodStoxBiotic)
export(AddPeriodStoxLanding)
export(AddPsuStratificationVariables)
export(AddStratumStoxBiotic)
export(AddStratumStoxLanding)
export(AnalyticalPSUEstimate)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# 1.5.0-9003
* Added function for annotating stratification variables to sampling parameters (AddPsuStratificationVariables) (#163)
* Added option for computing sampling parameters for Proportion Poisson Sampling from data records (option DefinitionMethod='ProportionalPoissonSampling' to ComputePSUSamplingParameters) (#165)
* Added functions for sampling frame expansion (ExtendAnalyticalSamplingFrameCoverage), and domain interpolation (InterpolateAnalyticalDomainEstimates) (#154)
* Changed how Stratification variables and Domain variables are matched to landings with AnalyticalRatioEstimate (# 125)
* Replaced DefinePSUSamplingParameters (processdata) with ComputePSUSamplingParameters (no processdata) and ReadPSUSamplingParameters (no processdata). This change breaks some pre-release projects (v. v1.3-9006). (#127)
Expand Down
3 changes: 0 additions & 3 deletions R/StoxAnalyticalAnalysisFunctions.R

This file was deleted.

197 changes: 186 additions & 11 deletions R/StoxAnalyticalBaselineFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,53 @@ ReadPSUSamplingParameters <- function(FileName){
return(parseDesignParameters(FileName))
}

#' @noRd
computePpsParametersStoxBiotic <- function(StoxBioticData, SamplingUnitId, Quota, StratumName, ExpectedSampleSize){

checkMandatory(StoxBioticData, "StoxBioticData")
checkMandatory(SamplingUnitId, "SamplingUnitId")
checkMandatory(Quota, "Quota")
checkMandatory(StratumName, "StratumName")
checkMandatory(ExpectedSampleSize, "ExpectedSampleSize")

if (length(unique(StoxBioticData$SpeciesCategory$SpeciesCategory))!=1){
stop(paste("The DefinitionMethod 'ProportionalPoissonSampling', requires only one species category to be present in sample records. Found:",
truncateStringVector(unique(StoxBioticData$SpeciesCategory$SpeciesCategory))))
}

if (!(SamplingUnitId %in% names(StoxBioticData$Haul))){
stop(paste("The argument 'SamplingUnitId' must identify a variable in the 'Haul'-table of 'StoxBioticData"))
}

if (any(is.na(StoxBioticData$Sample$CatchFractionWeight))){
missing <- StoxBioticData$Sample$Sample[is.na(StoxBioticData$Sample$CatchFractionWeight)]
stop(paste("Cannot computed sampling parameters with missing catch weights, missing for samples: ", truncateStringVector(missing)))
}
n <- ExpectedSampleSize

flatBiotic <- RstoxData::mergeByStoxKeys(StoxBioticData$Haul, StoxBioticData$Sample, StoxDataType = "StoxBiotic")
SelectionTable <- flatBiotic[,list(catchWeight=sum(get("CatchFractionWeight"))), by=list(SamplingUnitId=get(SamplingUnitId))]
SelectionTable$SelectionProbability <- SelectionTable$catchWeight / Quota
SelectionTable$HHsamplingWeight <- 1 / (SelectionTable$SelectionProbability * sum(1/SelectionTable$SelectionProbability))
SelectionTable$InclusionProbability <- 1-((1-SelectionTable$SelectionProbability)**n)
SelectionTable$HTsamplingWeight <- 1 / (SelectionTable$InclusionProbability * sum(1/SelectionTable$InclusionProbability))
SelectionTable$Order <- as.numeric(NA)
SelectionTable$Stratum <- StratumName
SelectionTable$SelectionDescription <- ""
SelectionTable <- SelectionTable[,.SD,.SDcol=c("Stratum", "Order", "SamplingUnitId", "InclusionProbability", "HTsamplingWeight", "SelectionProbability", "HHsamplingWeight", "SelectionDescription")]

SampleTable <- data.table::data.table(Stratum=StratumName, N=as.numeric(NA), n=n, SelectionMethod="Poisson", FrameDescription="")

StratificationVariables <- data.table::data.table(Stratum=StratumName)

samplingParams <- list()
samplingParams$SampleTable <- SampleTable
samplingParams$SelectionTable <- SelectionTable
samplingParams$StratificationVariables <- StratificationVariables

return(samplingParams)
}

#' Compute PSU Sampling Design Parameters
#' @description
#' Compute sampling parameters for Primary Sampling Units in multi-stage sampling.
Expand All @@ -172,27 +219,152 @@ ReadPSUSamplingParameters <- function(FileName){
#' This is a reasonable approximation if within-strata sampling is approximately simple random selections,
#' the sample intensitiy is low (only a small fraction of the population is sampled),
#' and non-response is believed to be random.
#' @param DefinitionMethod 'AdHocStoxBiotic'
#'
#' If 'DefinitionMethod' is 'ProportionalPoissonSampling', Unstratified (singe stratum) Poission sampling with selection probabilities
#' proportional to catch size is assumed. 'SamplingUnitId' must be a variable on the Haul table of 'StoxBioticData' for this option,
#' and the data must contain only one species (SpeciesCategory in 'StoxBioticData'). SelectionProbabilities are assigned
#' based on the total catch of the species in each haul. Specifically, for a haul \eqn{i}; selectionprobabilites, \eqn{p_{i}} and inclusionprobabilities \eqn{\pi_{i}}
#' are calculated as:
#'
#' \deqn{p_{i}=\frac{w_{i}}{W}}
#'
#' \deqn{\pi_{i}=1-(1-p_{i})^{n}}
#'
#' where:
#' \itemize{
#' \item \eqn{w_{i}} is the sum of all catch weights in haul \eqn{i} ('CatchFractionWeight' on the 'Sample' table of 'StoxBioticData')
#' \item \eqn{W} is the expected total catch in the fishery (argument 'Quota')
#' \item \eqn{n} is the expected sample size (argument 'ExpectedSampleSize')
#' }
#' If proportional poisson sampling was actually used to select the sampled records in 'StoxBioticData',
#' sampling parameters would have been obtained prior to sampling, and it is generally preferable to obtain these,
#' and import those via \code{\link[RstoxFDA]{ReadPSUSamplingParameters}}. Weight-records are sometimes corrected after
#' sampling parameters are calculated, and proper information about non-response can not be recalculated after the fact.
#'
#' Proportional poisson sampling also allows the sampler to combine rigour and pragmatism, by varying sampling parameters
#' in the course of sample selection. For instance 'n' may be changed during the sampling period, if non-response
#' turns out to be higher than expected. Such flexibilities are not
#' provided by this function, and the approximation may be severely compromised, if such pragmatism is not accounted for.
#'
#' @param DefinitionMethod 'AdHocStoxBiotic' or 'ProportionalPoissonSampling'
#' @param StoxBioticData \code{\link[RstoxData]{StoxBioticData}} Sample data to construct design parameters from
#' @param SamplingUnitId name of column in 'StoxBioticData' that identifies the Primary Sampling Unit the design is constructed for.
#' @param StratificationColumns name of any column (at the same table as 'SamplingUnitId') that are to be used to define Strata for sampling.
#' @param StratificationColumns name of any column (at the same table as 'SamplingUnitId') that are to be used to define Strata for sampling. (for DefinitionMethod 'AdHocStoxBiotic')
#' @param StratumName name of the stratum sampling parameters are calculated for (for DefinitionMethod 'ProportionalPoissonSampling')
#' @param Quota expected total catch in sampling frame in kg (for DefinitionMethod 'ProportionalPoissonSampling')
#' @param ExpectedSampleSize the expected sample size for Possion sampling (for DefinitionMethod 'ProportionalPoissonSampling')
#' @return \code{\link[RstoxFDA]{PSUSamplingParametersData}}
#' @examples
#' # parameters for simpler random haul-selection, stratified by GearGroup
#' # parameters for simple random haul-selection, stratified by GearGroup
#' PSUparams <- ComputePSUSamplingParameters(RstoxFDA::StoxBioticDataExample,
#' "AdHocStoxBiotic",
#' "Haul",
#' "GearGroup")
#'
#' # parameters for haul selection proportional to catch size.
#' calculatedPps <- RstoxFDA::ComputePSUSamplingParameters(RstoxFDA::CatchLotteryExample,
#' "ProportionalPoissonSampling",
#' "serialnumber", StratumName =
#' "Nordsjo", Quota = 124*1e6,
#' ExpectedSampleSize = 110)
#'
#' @export
#' @concept StoX-functions
#' @concept Analytical estimation
#' @md
ComputePSUSamplingParameters <- function(StoxBioticData, DefinitionMethod=c("AdHocStoxBiotic"), SamplingUnitId=character(), StratificationColumns=character()){
ComputePSUSamplingParameters <- function(StoxBioticData, DefinitionMethod=c("AdHocStoxBiotic", "ProportionalPoissonSampling"), SamplingUnitId=character(), StratificationColumns=character(), StratumName=character(), Quota=numeric(), ExpectedSampleSize=numeric()){

DefinitionMethod <- checkOptions(DefinitionMethod, "DefinitionMethod", c("AdHocStoxBiotic", "ProportionalPoissonSampling"))

DefinitionMethod <- checkOptions(DefinitionMethod, "DefinitionMethod", c("AdHocStoxBiotic"))
if (DefinitionMethod=="AdHocStoxBiotic"){
return(assumeDesignParametersStoxBiotic(StoxBioticData, SamplingUnitId, StratificationColumns))
}
else if (DefinitionMethod=="ProportionalPoissonSampling"){
return(computePpsParametersStoxBiotic(StoxBioticData, SamplingUnitId, Quota, StratumName, ExpectedSampleSize))
}
else{
stop(paste("The option", DefinitionMethod, "is not recognized for the argument 'DefinitionMethod'"))
}

}

is.StratificationVariablesData <- function(StratificationVariablesTable){
if(!data.table::is.data.table(StratificationVariablesTable)){
return(FALSE)
}
if (!("Stratum" %in% names(StratificationVariablesTable))){
return(FALSE)
}
return(TRUE)
}

#' Add Stratification columns to 'PSUSamplingParametersData'
#' @description
#' Add additional variables to encode strata and its correspondance with census data (e.g. landings data).
#' @details
#' \code{\link[RstoxFDA]{PSUSamplingParametersData}} provide sampling parameters by strata.
#' Optionally, it may also contain additional variables that encode the stratification in terms of variables
#' available in other data sources, such as \code{\link[RstoxData]{StoxLandingData}}. This function allows
#' such variables to be added, if not already present.
#'
#' More detailed encoding of stratification is useful for
#' encoding the sampling frame of the design provided by 'PSUSamplingParametersData'. By encoding all strata
#' in terms of variables that are available in census-data, the correspondance between sampling frame and
#' target population can be encoded. This information will be available in downstream estimates (e.g.
#' \code{\link[RstoxFDA]{AnalyticalPopulationEstimate}}) and allow for pragmatic inference to
#' out-of-frame strata (via \code{\link[RstoxFDA]{ExtendAnalyticalSamplingFrameCoverage}}).
#'
#' @param PSUSamplingParametersData Sampling parameters stratification variables should be added to
#' @param StratificationVariables name of variables to add
#' @param StratificationVariablesTable value-combinations for the variables to add to each stratum
#' @return \code{\link[RstoxFDA]{PSUSamplingParametersData}}
#' @export
#' @concept StoX-functions
#' @concept Analytical estimation
#' @md
AddPsuStratificationVariables <- function(PSUSamplingParametersData, StratificationVariables, StratificationVariablesTable=data.table::data.table()){

checkMandatory(PSUSamplingParametersData, "PSUSamplingParametersData")
checkMandatory(StratificationVariables, "StratificationVariables")
checkMandatory(StratificationVariablesTable, "StratificationVariablesTable")

if (length(names(PSUSamplingParametersData$StratificationVariables))>1){
stop("'PSUSamplingParametersData' already has StratificationVariables")
}

if (!is.StratificationVariablesData(StratificationVariablesTable)){
stop("Invalid 'StratificationVariablesTable'.")
}

if (!all(StratificationVariablesTable$Stratum %in% PSUSamplingParametersData$StratificationVariables$Stratum)){
missing <- StratificationVariablesTable$Stratum[!(StratificationVariablesTable$Stratum %in% PSUSamplingParametersData$StratificationVariables$Stratum)]
stop(paste("Not all strata in 'StratificationVariablesTable' exist in 'PSUSamplingParametersData'. Missing", truncateStringVector(missing)))
}

if (!all(PSUSamplingParametersData$StratificationVariables$Stratum %in% StratificationVariablesTable$Stratum)){
missing <- PSUSamplingParametersData$StratificationVariables$Stratum[!(PSUSamplingParametersData$StratificationVariables$Stratum %in% StratificationVariablesTable$Stratum)]
stop(paste("Stratification variables are not provided for strata:", truncateStringVector(missing)))
}

if (!all(StratificationVariables %in% names(StratificationVariablesTable))){
stop("Not all StratificationVariables are in the StratificationVariablesTable")
}
if (!all(names(StratificationVariablesTable) %in% c("Stratum", StratificationVariables))){
stop("Some StratificationVariables are not in the StratificationVariablesTable")
}

stratCount <- StratificationVariablesTable[,list(strata=length(unique(get("Stratum")))), by=list(stratVarString=apply(StratificationVariablesTable[,.SD,.SDcol=names(StratificationVariablesTable)[names(StratificationVariablesTable) != "Stratum"]], 1, paste, collapse="/"))]
manyStrata <- stratCount[get("strata")>1,]
if (nrow(manyStrata)>0){
stop(paste("Stratification variables does not identify strata. Several strata overlap with:", truncateStringVector(manyStrata$stratVarString)))
}

PSUSamplingParametersData$StratificationVariables <- merge(PSUSamplingParametersData$StratificationVariables,
StratificationVariablesTable,
by="Stratum")

return(PSUSamplingParametersData)

return(assumeDesignParametersStoxBiotic(StoxBioticData, SamplingUnitId, StratificationColumns))
}

#' collapse strata, recalulate n/N and sampling weights
Expand Down Expand Up @@ -859,6 +1031,7 @@ covarAbundance <- function(Totals, PSUSampling, MeanOfMeans){

relPSUDomainSize <- sum(table$HHsamplingWeight[!duplicated(table$SamplingUnitId)])
relDomainSizes <- table[,list(relDomainSize=sum(get("HHsamplingWeight")[!duplicated(get("SamplingUnitId"))])), by=c("Stratum", "Domain")]

stopifnot(relPSUDomainSize<=1+1e-1)
stopifnot(all(relDomainSizes$relDomainSize <= relPSUDomainSize))

Expand Down Expand Up @@ -906,6 +1079,7 @@ covarVariables <- function(Totals, PSUSampling, MeanOfMeans, Abundance){

relPSUDomainSize <- sum(table$HHsamplingWeight[!duplicated(table$SamplingUnitId)])
relDomainSizes <- table[,list(relDomainSize=sum(get("HHsamplingWeight")[!duplicated(get("SamplingUnitId"))])), by=c("Stratum", "Domain")]

stopifnot(relPSUDomainSize<=1+1e-1)
stopifnot(all(relDomainSizes$relDomainSize <= relPSUDomainSize))

Expand Down Expand Up @@ -1581,6 +1755,12 @@ AnalyticalRatioEstimate <- function(AnalyticalPopulationEstimateData, StoxLandin

}

#'
#' @noRd
AggregateAnalyticalEstimate <- function(AnalyticalPopulationEstimateData, RetainStrata=character(), AggregateStratumName=character()){
# consider an option for combining with several 'AnalyticalPopulationEstimateData'
}

#' Fills in unsampled strata according to 'strict' after new domains and new strata has been inferred and added to the estimation object
#' @noRd
fillStrict <- function(extendedAnalyticalPopulationEstimateData){
Expand Down Expand Up @@ -2201,8 +2381,3 @@ InterpolateAnalyticalDomainEstimates <- function(AnalyticalPopulationEstimateDat
}

}

#' @noRd
ProbabilisticSuperIndividuals <- function(StoxBioticData, PSUSamplingParametersData, IndividualSamplingParametersData){

}
40 changes: 39 additions & 1 deletion R/StoxDataTypes.R
Original file line number Diff line number Diff line change
Expand Up @@ -2402,7 +2402,21 @@ stoxFunctionAttributes <- list(
StratificationColumns = "stratificationcolumns"
),
functionParameterDefaults = list(
DefinitionMethod = "AdHocStoxBiotic"
DefinitionMethod = "ProportionalPoissonSampling"
),
functionArgumentHierarchy = list(
StratificationColumns = list(
DefinitionMethod = "AdHocStoxBiotic"
),
StratumName = list(
DefinitionMethod = "ProportionalPoissonSampling"
),
Quota = list(
DefinitionMethod = "ProportionalPoissonSampling"
),
ExpectedSampleSize = list(
DefinitionMethod = "ProportionalPoissonSampling"
)
)
),
ReadPSUSamplingParameters = list(
Expand All @@ -2413,6 +2427,15 @@ stoxFunctionAttributes <- list(
FileName = "filePath"
)
),
AddPsuStratificationVariables = list(
functionType = "modelData",
functionCategory = "baseline",
functionOutputDataType = "PSUSamplingParametersData",
functionParameterFormat = list(
StratificationVariables = "stratificationvariablesvector",
StratificationVariablesTable = "stratificationvariablestable"
)
),
AssignPSUSamplingParameters = list(
functionType = "modelData",
functionCategory = "baseline",
Expand Down Expand Up @@ -3214,6 +3237,21 @@ processPropertyFormats <- list(
return(pv)
},
variableTypes = "character"
),
stratificationvariablesvector = list(
class = "vector",
title = "Names of stratification variables to add",
variableTypes = "character"
),
stratificationvariablestable = list(
class = "table",
title = "Table of Stratification variables for each Stratum",
columnNames = function(StratificationVariables) {
c("Stratum", StratificationVariables)
},
variableTypes = function(StratificationVariables) {
rep("character", 1 + length(StratificationVariables))
}
)
)

Binary file modified inst/extdata/functionArguments.rds
Binary file not shown.
Loading
Loading