diff --git a/NAMESPACE b/NAMESPACE index d4a106c3..786c9aad 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,7 @@ export(AddGearGroupStoxBiotic) export(AddGearGroupStoxLanding) export(AddPeriodStoxBiotic) export(AddPeriodStoxLanding) +export(AddPsuStratificationVariables) export(AddStratumStoxBiotic) export(AddStratumStoxLanding) export(AnalyticalPSUEstimate) diff --git a/NEWS.md b/NEWS.md index 5a19b4d0..5aef5989 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,6 @@ # 1.5.0-9003 +* Added function for annotating stratification variables to sampling parameters (AddPsuStratificationVariables) (#163) +* Added option for computing sampling parameters for Proportion Poisson Sampling from data records (option DefinitionMethod='ProportionalPoissonSampling' to ComputePSUSamplingParameters) (#165) * Added functions for sampling frame expansion (ExtendAnalyticalSamplingFrameCoverage), and domain interpolation (InterpolateAnalyticalDomainEstimates) (#154) * Changed how Stratification variables and Domain variables are matched to landings with AnalyticalRatioEstimate (# 125) * Replaced DefinePSUSamplingParameters (processdata) with ComputePSUSamplingParameters (no processdata) and ReadPSUSamplingParameters (no processdata). This change breaks some pre-release projects (v. v1.3-9006). (#127) diff --git a/R/StoxAnalyticalAnalysisFunctions.R b/R/StoxAnalyticalAnalysisFunctions.R deleted file mode 100644 index 6169a515..00000000 --- a/R/StoxAnalyticalAnalysisFunctions.R +++ /dev/null @@ -1,3 +0,0 @@ -#' @noRd -PrepareHorvitzThompsonDomainEstimate <- function(){} - diff --git a/R/StoxAnalyticalBaselineFunctions.R b/R/StoxAnalyticalBaselineFunctions.R index 1ea1b791..4eca04ee 100644 --- a/R/StoxAnalyticalBaselineFunctions.R +++ b/R/StoxAnalyticalBaselineFunctions.R @@ -161,6 +161,53 @@ ReadPSUSamplingParameters <- function(FileName){ return(parseDesignParameters(FileName)) } +#' @noRd +computePpsParametersStoxBiotic <- function(StoxBioticData, SamplingUnitId, Quota, StratumName, ExpectedSampleSize){ + + checkMandatory(StoxBioticData, "StoxBioticData") + checkMandatory(SamplingUnitId, "SamplingUnitId") + checkMandatory(Quota, "Quota") + checkMandatory(StratumName, "StratumName") + checkMandatory(ExpectedSampleSize, "ExpectedSampleSize") + + if (length(unique(StoxBioticData$SpeciesCategory$SpeciesCategory))!=1){ + stop(paste("The DefinitionMethod 'ProportionalPoissonSampling', requires only one species category to be present in sample records. Found:", + truncateStringVector(unique(StoxBioticData$SpeciesCategory$SpeciesCategory)))) + } + + if (!(SamplingUnitId %in% names(StoxBioticData$Haul))){ + stop(paste("The argument 'SamplingUnitId' must identify a variable in the 'Haul'-table of 'StoxBioticData")) + } + + if (any(is.na(StoxBioticData$Sample$CatchFractionWeight))){ + missing <- StoxBioticData$Sample$Sample[is.na(StoxBioticData$Sample$CatchFractionWeight)] + stop(paste("Cannot computed sampling parameters with missing catch weights, missing for samples: ", truncateStringVector(missing))) + } + n <- ExpectedSampleSize + + flatBiotic <- RstoxData::mergeByStoxKeys(StoxBioticData$Haul, StoxBioticData$Sample, StoxDataType = "StoxBiotic") + SelectionTable <- flatBiotic[,list(catchWeight=sum(get("CatchFractionWeight"))), by=list(SamplingUnitId=get(SamplingUnitId))] + SelectionTable$SelectionProbability <- SelectionTable$catchWeight / Quota + SelectionTable$HHsamplingWeight <- 1 / (SelectionTable$SelectionProbability * sum(1/SelectionTable$SelectionProbability)) + SelectionTable$InclusionProbability <- 1-((1-SelectionTable$SelectionProbability)**n) + SelectionTable$HTsamplingWeight <- 1 / (SelectionTable$InclusionProbability * sum(1/SelectionTable$InclusionProbability)) + SelectionTable$Order <- as.numeric(NA) + SelectionTable$Stratum <- StratumName + SelectionTable$SelectionDescription <- "" + SelectionTable <- SelectionTable[,.SD,.SDcol=c("Stratum", "Order", "SamplingUnitId", "InclusionProbability", "HTsamplingWeight", "SelectionProbability", "HHsamplingWeight", "SelectionDescription")] + + SampleTable <- data.table::data.table(Stratum=StratumName, N=as.numeric(NA), n=n, SelectionMethod="Poisson", FrameDescription="") + + StratificationVariables <- data.table::data.table(Stratum=StratumName) + + samplingParams <- list() + samplingParams$SampleTable <- SampleTable + samplingParams$SelectionTable <- SelectionTable + samplingParams$StratificationVariables <- StratificationVariables + + return(samplingParams) +} + #' Compute PSU Sampling Design Parameters #' @description #' Compute sampling parameters for Primary Sampling Units in multi-stage sampling. @@ -172,27 +219,152 @@ ReadPSUSamplingParameters <- function(FileName){ #' This is a reasonable approximation if within-strata sampling is approximately simple random selections, #' the sample intensitiy is low (only a small fraction of the population is sampled), #' and non-response is believed to be random. -#' @param DefinitionMethod 'AdHocStoxBiotic' +#' +#' If 'DefinitionMethod' is 'ProportionalPoissonSampling', Unstratified (singe stratum) Poission sampling with selection probabilities +#' proportional to catch size is assumed. 'SamplingUnitId' must be a variable on the Haul table of 'StoxBioticData' for this option, +#' and the data must contain only one species (SpeciesCategory in 'StoxBioticData'). SelectionProbabilities are assigned +#' based on the total catch of the species in each haul. Specifically, for a haul \eqn{i}; selectionprobabilites, \eqn{p_{i}} and inclusionprobabilities \eqn{\pi_{i}} +#' are calculated as: +#' +#' \deqn{p_{i}=\frac{w_{i}}{W}} +#' +#' \deqn{\pi_{i}=1-(1-p_{i})^{n}} +#' +#' where: +#' \itemize{ +#' \item \eqn{w_{i}} is the sum of all catch weights in haul \eqn{i} ('CatchFractionWeight' on the 'Sample' table of 'StoxBioticData') +#' \item \eqn{W} is the expected total catch in the fishery (argument 'Quota') +#' \item \eqn{n} is the expected sample size (argument 'ExpectedSampleSize') +#' } +#' If proportional poisson sampling was actually used to select the sampled records in 'StoxBioticData', +#' sampling parameters would have been obtained prior to sampling, and it is generally preferable to obtain these, +#' and import those via \code{\link[RstoxFDA]{ReadPSUSamplingParameters}}. Weight-records are sometimes corrected after +#' sampling parameters are calculated, and proper information about non-response can not be recalculated after the fact. +#' +#' Proportional poisson sampling also allows the sampler to combine rigour and pragmatism, by varying sampling parameters +#' in the course of sample selection. For instance 'n' may be changed during the sampling period, if non-response +#' turns out to be higher than expected. Such flexibilities are not +#' provided by this function, and the approximation may be severely compromised, if such pragmatism is not accounted for. +#' +#' @param DefinitionMethod 'AdHocStoxBiotic' or 'ProportionalPoissonSampling' #' @param StoxBioticData \code{\link[RstoxData]{StoxBioticData}} Sample data to construct design parameters from #' @param SamplingUnitId name of column in 'StoxBioticData' that identifies the Primary Sampling Unit the design is constructed for. -#' @param StratificationColumns name of any column (at the same table as 'SamplingUnitId') that are to be used to define Strata for sampling. +#' @param StratificationColumns name of any column (at the same table as 'SamplingUnitId') that are to be used to define Strata for sampling. (for DefinitionMethod 'AdHocStoxBiotic') +#' @param StratumName name of the stratum sampling parameters are calculated for (for DefinitionMethod 'ProportionalPoissonSampling') +#' @param Quota expected total catch in sampling frame in kg (for DefinitionMethod 'ProportionalPoissonSampling') +#' @param ExpectedSampleSize the expected sample size for Possion sampling (for DefinitionMethod 'ProportionalPoissonSampling') #' @return \code{\link[RstoxFDA]{PSUSamplingParametersData}} #' @examples -#' # parameters for simpler random haul-selection, stratified by GearGroup +#' # parameters for simple random haul-selection, stratified by GearGroup #' PSUparams <- ComputePSUSamplingParameters(RstoxFDA::StoxBioticDataExample, #' "AdHocStoxBiotic", #' "Haul", #' "GearGroup") +#' +#' # parameters for haul selection proportional to catch size. +#' calculatedPps <- RstoxFDA::ComputePSUSamplingParameters(RstoxFDA::CatchLotteryExample, +#' "ProportionalPoissonSampling", +#' "serialnumber", StratumName = +#' "Nordsjo", Quota = 124*1e6, +#' ExpectedSampleSize = 110) #' #' @export #' @concept StoX-functions #' @concept Analytical estimation #' @md -ComputePSUSamplingParameters <- function(StoxBioticData, DefinitionMethod=c("AdHocStoxBiotic"), SamplingUnitId=character(), StratificationColumns=character()){ +ComputePSUSamplingParameters <- function(StoxBioticData, DefinitionMethod=c("AdHocStoxBiotic", "ProportionalPoissonSampling"), SamplingUnitId=character(), StratificationColumns=character(), StratumName=character(), Quota=numeric(), ExpectedSampleSize=numeric()){ + + DefinitionMethod <- checkOptions(DefinitionMethod, "DefinitionMethod", c("AdHocStoxBiotic", "ProportionalPoissonSampling")) - DefinitionMethod <- checkOptions(DefinitionMethod, "DefinitionMethod", c("AdHocStoxBiotic")) + if (DefinitionMethod=="AdHocStoxBiotic"){ + return(assumeDesignParametersStoxBiotic(StoxBioticData, SamplingUnitId, StratificationColumns)) + } + else if (DefinitionMethod=="ProportionalPoissonSampling"){ + return(computePpsParametersStoxBiotic(StoxBioticData, SamplingUnitId, Quota, StratumName, ExpectedSampleSize)) + } + else{ + stop(paste("The option", DefinitionMethod, "is not recognized for the argument 'DefinitionMethod'")) + } + +} + +is.StratificationVariablesData <- function(StratificationVariablesTable){ + if(!data.table::is.data.table(StratificationVariablesTable)){ + return(FALSE) + } + if (!("Stratum" %in% names(StratificationVariablesTable))){ + return(FALSE) + } + return(TRUE) +} + +#' Add Stratification columns to 'PSUSamplingParametersData' +#' @description +#' Add additional variables to encode strata and its correspondance with census data (e.g. landings data). +#' @details +#' \code{\link[RstoxFDA]{PSUSamplingParametersData}} provide sampling parameters by strata. +#' Optionally, it may also contain additional variables that encode the stratification in terms of variables +#' available in other data sources, such as \code{\link[RstoxData]{StoxLandingData}}. This function allows +#' such variables to be added, if not already present. +#' +#' More detailed encoding of stratification is useful for +#' encoding the sampling frame of the design provided by 'PSUSamplingParametersData'. By encoding all strata +#' in terms of variables that are available in census-data, the correspondance between sampling frame and +#' target population can be encoded. This information will be available in downstream estimates (e.g. +#' \code{\link[RstoxFDA]{AnalyticalPopulationEstimate}}) and allow for pragmatic inference to +#' out-of-frame strata (via \code{\link[RstoxFDA]{ExtendAnalyticalSamplingFrameCoverage}}). +#' +#' @param PSUSamplingParametersData Sampling parameters stratification variables should be added to +#' @param StratificationVariables name of variables to add +#' @param StratificationVariablesTable value-combinations for the variables to add to each stratum +#' @return \code{\link[RstoxFDA]{PSUSamplingParametersData}} +#' @export +#' @concept StoX-functions +#' @concept Analytical estimation +#' @md +AddPsuStratificationVariables <- function(PSUSamplingParametersData, StratificationVariables, StratificationVariablesTable=data.table::data.table()){ + + checkMandatory(PSUSamplingParametersData, "PSUSamplingParametersData") + checkMandatory(StratificationVariables, "StratificationVariables") + checkMandatory(StratificationVariablesTable, "StratificationVariablesTable") + + if (length(names(PSUSamplingParametersData$StratificationVariables))>1){ + stop("'PSUSamplingParametersData' already has StratificationVariables") + } + + if (!is.StratificationVariablesData(StratificationVariablesTable)){ + stop("Invalid 'StratificationVariablesTable'.") + } + + if (!all(StratificationVariablesTable$Stratum %in% PSUSamplingParametersData$StratificationVariables$Stratum)){ + missing <- StratificationVariablesTable$Stratum[!(StratificationVariablesTable$Stratum %in% PSUSamplingParametersData$StratificationVariables$Stratum)] + stop(paste("Not all strata in 'StratificationVariablesTable' exist in 'PSUSamplingParametersData'. Missing", truncateStringVector(missing))) + } + + if (!all(PSUSamplingParametersData$StratificationVariables$Stratum %in% StratificationVariablesTable$Stratum)){ + missing <- PSUSamplingParametersData$StratificationVariables$Stratum[!(PSUSamplingParametersData$StratificationVariables$Stratum %in% StratificationVariablesTable$Stratum)] + stop(paste("Stratification variables are not provided for strata:", truncateStringVector(missing))) + } + + if (!all(StratificationVariables %in% names(StratificationVariablesTable))){ + stop("Not all StratificationVariables are in the StratificationVariablesTable") + } + if (!all(names(StratificationVariablesTable) %in% c("Stratum", StratificationVariables))){ + stop("Some StratificationVariables are not in the StratificationVariablesTable") + } + + stratCount <- StratificationVariablesTable[,list(strata=length(unique(get("Stratum")))), by=list(stratVarString=apply(StratificationVariablesTable[,.SD,.SDcol=names(StratificationVariablesTable)[names(StratificationVariablesTable) != "Stratum"]], 1, paste, collapse="/"))] + manyStrata <- stratCount[get("strata")>1,] + if (nrow(manyStrata)>0){ + stop(paste("Stratification variables does not identify strata. Several strata overlap with:", truncateStringVector(manyStrata$stratVarString))) + } + + PSUSamplingParametersData$StratificationVariables <- merge(PSUSamplingParametersData$StratificationVariables, + StratificationVariablesTable, + by="Stratum") + + return(PSUSamplingParametersData) - return(assumeDesignParametersStoxBiotic(StoxBioticData, SamplingUnitId, StratificationColumns)) } #' collapse strata, recalulate n/N and sampling weights @@ -859,6 +1031,7 @@ covarAbundance <- function(Totals, PSUSampling, MeanOfMeans){ relPSUDomainSize <- sum(table$HHsamplingWeight[!duplicated(table$SamplingUnitId)]) relDomainSizes <- table[,list(relDomainSize=sum(get("HHsamplingWeight")[!duplicated(get("SamplingUnitId"))])), by=c("Stratum", "Domain")] + stopifnot(relPSUDomainSize<=1+1e-1) stopifnot(all(relDomainSizes$relDomainSize <= relPSUDomainSize)) @@ -906,6 +1079,7 @@ covarVariables <- function(Totals, PSUSampling, MeanOfMeans, Abundance){ relPSUDomainSize <- sum(table$HHsamplingWeight[!duplicated(table$SamplingUnitId)]) relDomainSizes <- table[,list(relDomainSize=sum(get("HHsamplingWeight")[!duplicated(get("SamplingUnitId"))])), by=c("Stratum", "Domain")] + stopifnot(relPSUDomainSize<=1+1e-1) stopifnot(all(relDomainSizes$relDomainSize <= relPSUDomainSize)) @@ -1581,6 +1755,12 @@ AnalyticalRatioEstimate <- function(AnalyticalPopulationEstimateData, StoxLandin } +#' +#' @noRd +AggregateAnalyticalEstimate <- function(AnalyticalPopulationEstimateData, RetainStrata=character(), AggregateStratumName=character()){ + # consider an option for combining with several 'AnalyticalPopulationEstimateData' +} + #' Fills in unsampled strata according to 'strict' after new domains and new strata has been inferred and added to the estimation object #' @noRd fillStrict <- function(extendedAnalyticalPopulationEstimateData){ @@ -2201,8 +2381,3 @@ InterpolateAnalyticalDomainEstimates <- function(AnalyticalPopulationEstimateDat } } - -#' @noRd -ProbabilisticSuperIndividuals <- function(StoxBioticData, PSUSamplingParametersData, IndividualSamplingParametersData){ - -} diff --git a/R/StoxDataTypes.R b/R/StoxDataTypes.R index 6e4ef485..bcde6fac 100644 --- a/R/StoxDataTypes.R +++ b/R/StoxDataTypes.R @@ -2402,7 +2402,21 @@ stoxFunctionAttributes <- list( StratificationColumns = "stratificationcolumns" ), functionParameterDefaults = list( - DefinitionMethod = "AdHocStoxBiotic" + DefinitionMethod = "ProportionalPoissonSampling" + ), + functionArgumentHierarchy = list( + StratificationColumns = list( + DefinitionMethod = "AdHocStoxBiotic" + ), + StratumName = list( + DefinitionMethod = "ProportionalPoissonSampling" + ), + Quota = list( + DefinitionMethod = "ProportionalPoissonSampling" + ), + ExpectedSampleSize = list( + DefinitionMethod = "ProportionalPoissonSampling" + ) ) ), ReadPSUSamplingParameters = list( @@ -2413,6 +2427,15 @@ stoxFunctionAttributes <- list( FileName = "filePath" ) ), + AddPsuStratificationVariables = list( + functionType = "modelData", + functionCategory = "baseline", + functionOutputDataType = "PSUSamplingParametersData", + functionParameterFormat = list( + StratificationVariables = "stratificationvariablesvector", + StratificationVariablesTable = "stratificationvariablestable" + ) + ), AssignPSUSamplingParameters = list( functionType = "modelData", functionCategory = "baseline", @@ -3214,6 +3237,21 @@ processPropertyFormats <- list( return(pv) }, variableTypes = "character" + ), + stratificationvariablesvector = list( + class = "vector", + title = "Names of stratification variables to add", + variableTypes = "character" + ), + stratificationvariablestable = list( + class = "table", + title = "Table of Stratification variables for each Stratum", + columnNames = function(StratificationVariables) { + c("Stratum", StratificationVariables) + }, + variableTypes = function(StratificationVariables) { + rep("character", 1 + length(StratificationVariables)) + } ) ) diff --git a/inst/extdata/functionArguments.rds b/inst/extdata/functionArguments.rds index eae81ec8..6020bea0 100644 Binary files a/inst/extdata/functionArguments.rds and b/inst/extdata/functionArguments.rds differ diff --git a/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R b/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R index 46ac74e1..95a0d10b 100644 --- a/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R +++ b/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R @@ -33,7 +33,45 @@ expect_true(abs((sum(1/designParamsCorrected$SelectionTable$InclusionProbability #HH should be approximately the same after non-response correction expect_true(abs((mean(1/designParamsCorrected$SelectionTable$InclusionProbability)-mean(1/designParams$SelectionTable$InclusionProbability))/sum(1/designParamsCorrected$SelectionTable$InclusionProbability))<0.1) -#define from data + +#define pps-parameters from data +calculatedPps <- RstoxFDA::ComputePSUSamplingParameters(RstoxFDA::CatchLotteryExample, + "ProportionalPoissonSampling", + "serialnumber", StratumName = + "Nordsjo", Quota = 124*1e6, + ExpectedSampleSize = 110) +expect_true(RstoxFDA:::is.PSUSamplingParametersData(calculatedPps)) +comp <- merge(calculatedPps$SelectionTable, + RstoxFDA::CatchLotterySamplingExample$SelectionTable + , by=c("Stratum", "SamplingUnitId")) +comp <- comp[!(comp$SamplingUnitId %in% c("38408", "38409"))] #remove catches that has had weights corrected or something +expect_true(all(comp$SelectionProbability.x - comp$SelectionProbability.y < 1e-10)) +expect_true(all(comp$InclusionProbability.x - comp$InclusionProbability.y < 1e-10)) + +# add stratification columns +expect_error(RstoxFDA:::AddPsuStratificationVariables(calculatedPps, StratificationVariables = c("VesselLengthGroup", "VesselFlag"), StratificationVariablesTable = data.table::data.table(VesselLengthGroup="o15m", VesselFlag="NOR")), "Invalid 'StratificationVariablesTable'") +expect_error(RstoxFDA:::AddPsuStratificationVariables(calculatedPps, StratificationVariables = c("VesselLengthGroup", "VesselFlag"), StratificationVariablesTable = data.table::data.table(Stratum=c("s1", "s2"), VesselLengthGroup=c("o15m","o15m"), VesselFlag=c("NOR","NOR"))), "Not all strata in 'StratificationVariablesTable' exist in 'PSUSamplingParametersData'.") +calculatedPps2 <- calculatedPps +calculatedPps2$StratificationVariables <- data.table::data.table(Stratum=c("s1","s2")) +expect_error(RstoxFDA:::AddPsuStratificationVariables(calculatedPps2, StratificationVariables = c("VesselLengthGroup", "VesselFlag"), StratificationVariablesTable = data.table::data.table(Stratum=c("s1", "s2"), VesselLengthGroup=c("o15m","o15m"), VesselFlag=c("NOR","NOR"))), "Stratification variables does not identify strata. Several strata overlap with") + +calculatedPpsStrat <- RstoxFDA:::AddPsuStratificationVariables(calculatedPps, StratificationVariables = c("VesselFlag"), StratificationVariablesTable = data.table::data.table(Stratum="Nordsjo", VesselFlag="NOR")) +expect_true(all(c("Stratum", "VesselFlag") %in% names(calculatedPpsStrat$StratificationVariables))) +expect_equal(nrow(calculatedPpsStrat$StratificationVariables), 1) + +calculatedPpsStrat <- RstoxFDA:::AddPsuStratificationVariables(calculatedPps, StratificationVariables = c("VesselLengthGroup", "VesselFlag"), StratificationVariablesTable = data.table::data.table(Stratum="Nordsjo", VesselLengthGroup="o15m", VesselFlag="NOR")) +expect_true(all(c("Stratum", "VesselLengthGroup", "VesselFlag") %in% names(calculatedPpsStrat$StratificationVariables))) +expect_equal(nrow(calculatedPpsStrat$StratificationVariables), 1) + +calculatedPpsStrat <- RstoxFDA:::AddPsuStratificationVariables(calculatedPps, StratificationVariables = c("VesselLengthGroup", "VesselFlag"), StratificationVariablesTable = data.table::data.table(Stratum=c("Nordsjo", "Nordsjo"), VesselLengthGroup=c("u15m","o15m"), VesselFlag=c("NOR"))) +expect_true(all(c("Stratum", "VesselLengthGroup", "VesselFlag") %in% names(calculatedPpsStrat$StratificationVariables))) +expect_equal(nrow(calculatedPpsStrat$StratificationVariables), 2) + +#sampling weights are different, because of the two removed hauls above, but should be proportional +expect_true(length(unique(round(comp$HHsamplingWeight.x / comp$HHsamplingWeight.y, digits=10)))==1) +expect_true(length(unique(round(comp$HTsamplingWeight.x / comp$HTsamplingWeight.y, digits=10)))==1) + +#define equal prob fixed-size selection from data suppressWarnings(StoxBioticData <- RstoxData::StoxBiotic(RstoxData::ReadBiotic(system.file("testresources", "biotic_v3_example.xml", package="RstoxFDA")))) designParamsSB <- RstoxFDA:::ComputePSUSamplingParameters(StoxBioticData=StoxBioticData, SamplingUnitId = "Individual", StratificationColumns = c("SpeciesCategoryKey")) expect_true(RstoxFDA:::is.PSUSamplingParametersData(designParamsSB)) @@ -123,7 +161,9 @@ weights <- lsCol$SelectionTable[,list(meanN=sum(HTsamplingWeight)), by=c("Stratu expect_true(all(abs(weights$meanN-1) < 1e-6)) -#test estimate with HansenHurwitzDomainEstimate + + +#test estimate with HansenHurwitzDomainEstimate, read params data <- RstoxFDA::CatchLotteryExample indSampling <- RstoxFDA:::ComputeIndividualSamplingParameters(data, "SRS", c("IndividualAge")) @@ -306,7 +346,15 @@ popEstPD <- RstoxFDA:::AnalyticalPopulationEstimate(stationDesign, psuEstPD) expect_true(abs(popEst$Abundance$Abundance - sum(popEstPD$Abundance$Abundance))/popEst$Abundance$Abundance < 1e-2) expect_true(abs(popEst$Variables$Total - sum(popEstPD$Variables$Total))/popEst$Variables$Total < 1e-2) +#test PSU domains, computed PSU sampling parameters +ex2 <- RstoxFDA::CatchLotteryExample +ex2$SpeciesCategory$SpeciesCategory <- "061104" +stationDesignComp <- RstoxFDA::ComputePSUSamplingParameters(StoxBioticData = ex2, "ProportionalPoissonSampling", "serialnumber", StratumName = "Nordsjo", Quota = 124*1e6, ExpectedSampleSize = 110) +stationDesignComp <- RstoxFDA::AssignPSUSamplingParameters(stationDesignComp, ex2, "serialnumber", "Haul", "MissingAtRandom") +psuEstCompPD <- RstoxFDA:::AnalyticalPSUEstimate(ex2, srs, c("IndividualRoundWeight"), c("IndividualAge"), c("Gear")) +popEstCompPD <- RstoxFDA:::AnalyticalPopulationEstimate(stationDesignComp, psuEstCompPD) +expect_true(abs(sum(popEstCompPD$Abundance$Abundance) - sum(popEstPD$Abundance$Abundance))/sum(popEstCompPD$Abundance$Abundance) < 1e-2) # # Test ratio estimation # diff --git a/man/AddPsuStratificationVariables.Rd b/man/AddPsuStratificationVariables.Rd new file mode 100644 index 00000000..4e095397 --- /dev/null +++ b/man/AddPsuStratificationVariables.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/StoxAnalyticalBaselineFunctions.R +\name{AddPsuStratificationVariables} +\alias{AddPsuStratificationVariables} +\title{Add Stratification columns to 'PSUSamplingParametersData'} +\usage{ +AddPsuStratificationVariables( + PSUSamplingParametersData, + StratificationVariables, + StratificationVariablesTable = data.table::data.table() +) +} +\arguments{ +\item{PSUSamplingParametersData}{Sampling parameters stratification variables should be added to} + +\item{StratificationVariables}{name of variables to add} + +\item{StratificationVariablesTable}{value-combinations for the variables to add to each stratum} +} +\value{ +\code{\link[RstoxFDA]{PSUSamplingParametersData}} +} +\description{ +Add additional variables to encode strata and its correspondance with census data (e.g. landings data). +} +\details{ +\code{\link[RstoxFDA]{PSUSamplingParametersData}} provide sampling parameters by strata. +Optionally, it may also contain additional variables that encode the stratification in terms of variables +available in other data sources, such as \code{\link[RstoxData]{StoxLandingData}}. This function allows +such variables to be added, if not already present. + +More detailed encoding of stratification is useful for +encoding the sampling frame of the design provided by 'PSUSamplingParametersData'. By encoding all strata +in terms of variables that are available in census-data, the correspondance between sampling frame and +target population can be encoded. This information will be available in downstream estimates (e.g. +\code{\link[RstoxFDA]{AnalyticalPopulationEstimate}}) and allow for pragmatic inference to +out-of-frame strata (via \code{\link[RstoxFDA]{ExtendAnalyticalSamplingFrameCoverage}}). +} +\concept{Analytical estimation} +\concept{StoX-functions} diff --git a/man/ComputePSUSamplingParameters.Rd b/man/ComputePSUSamplingParameters.Rd index 96f7a26d..5d2f333c 100644 --- a/man/ComputePSUSamplingParameters.Rd +++ b/man/ComputePSUSamplingParameters.Rd @@ -6,19 +6,28 @@ \usage{ ComputePSUSamplingParameters( StoxBioticData, - DefinitionMethod = c("AdHocStoxBiotic"), + DefinitionMethod = c("AdHocStoxBiotic", "ProportionalPoissonSampling"), SamplingUnitId = character(), - StratificationColumns = character() + StratificationColumns = character(), + StratumName = character(), + Quota = numeric(), + ExpectedSampleSize = numeric() ) } \arguments{ \item{StoxBioticData}{\code{\link[RstoxData]{StoxBioticData}} Sample data to construct design parameters from} -\item{DefinitionMethod}{'AdHocStoxBiotic'} +\item{DefinitionMethod}{'AdHocStoxBiotic' or 'ProportionalPoissonSampling'} \item{SamplingUnitId}{name of column in 'StoxBioticData' that identifies the Primary Sampling Unit the design is constructed for.} -\item{StratificationColumns}{name of any column (at the same table as 'SamplingUnitId') that are to be used to define Strata for sampling.} +\item{StratificationColumns}{name of any column (at the same table as 'SamplingUnitId') that are to be used to define Strata for sampling. (for DefinitionMethod 'AdHocStoxBiotic')} + +\item{StratumName}{name of the stratum sampling parameters are calculated for (for DefinitionMethod 'ProportionalPoissonSampling')} + +\item{Quota}{expected total catch in sampling frame in kg (for DefinitionMethod 'ProportionalPoissonSampling')} + +\item{ExpectedSampleSize}{the expected sample size for Possion sampling (for DefinitionMethod 'ProportionalPoissonSampling')} } \value{ \code{\link[RstoxFDA]{PSUSamplingParametersData}} @@ -34,13 +43,46 @@ selection with replacement and complete response. This is a reasonable approximation if within-strata sampling is approximately simple random selections, the sample intensitiy is low (only a small fraction of the population is sampled), and non-response is believed to be random. + +If 'DefinitionMethod' is 'ProportionalPoissonSampling', Unstratified (singe stratum) Poission sampling with selection probabilities +proportional to catch size is assumed. 'SamplingUnitId' must be a variable on the Haul table of 'StoxBioticData' for this option, +and the data must contain only one species (SpeciesCategory in 'StoxBioticData'). SelectionProbabilities are assigned +based on the total catch of the species in each haul. Specifically, for a haul \eqn{i}; selectionprobabilites, \eqn{p_{i}} and inclusionprobabilities \eqn{\pi_{i}} +are calculated as: + +\deqn{p_{i}=\frac{w_{i}}{W}} + +\deqn{\pi_{i}=1-(1-p_{i})^{n}} + +where: +\itemize{ +\item \eqn{w_{i}} is the sum of all catch weights in haul \eqn{i} ('CatchFractionWeight' on the 'Sample' table of 'StoxBioticData') +\item \eqn{W} is the expected total catch in the fishery (argument 'Quota') +\item \eqn{n} is the expected sample size (argument 'ExpectedSampleSize') +} +If proportional poisson sampling was actually used to select the sampled records in 'StoxBioticData', +sampling parameters would have been obtained prior to sampling, and it is generally preferable to obtain these, +and import those via \code{\link[RstoxFDA]{ReadPSUSamplingParameters}}. Weight-records are sometimes corrected after +sampling parameters are calculated, and proper information about non-response can not be recalculated after the fact. + +Proportional poisson sampling also allows the sampler to combine rigour and pragmatism, by varying sampling parameters +in the course of sample selection. For instance 'n' may be changed during the sampling period, if non-response +turns out to be higher than expected. Such flexibilities are not +provided by this function, and the approximation may be severely compromised, if such pragmatism is not accounted for. } \examples{ - # parameters for simpler random haul-selection, stratified by GearGroup + # parameters for simple random haul-selection, stratified by GearGroup PSUparams <- ComputePSUSamplingParameters(RstoxFDA::StoxBioticDataExample, "AdHocStoxBiotic", "Haul", "GearGroup") + + # parameters for haul selection proportional to catch size. + calculatedPps <- RstoxFDA::ComputePSUSamplingParameters(RstoxFDA::CatchLotteryExample, + "ProportionalPoissonSampling", + "serialnumber", StratumName = + "Nordsjo", Quota = 124*1e6, + ExpectedSampleSize = 110) } \concept{Analytical estimation} diff --git a/man/processPropertyFormats.Rd b/man/processPropertyFormats.Rd index 57ccef88..02f64be8 100644 --- a/man/processPropertyFormats.Rd +++ b/man/processPropertyFormats.Rd @@ -5,7 +5,7 @@ \alias{processPropertyFormats} \title{Define the process property formats for inclusion in stox UI} \format{ -An object of class \code{list} of length 23. +An object of class \code{list} of length 25. } \usage{ processPropertyFormats diff --git a/man/stoxFunctionAttributes.Rd b/man/stoxFunctionAttributes.Rd index 60c49506..22f261a6 100644 --- a/man/stoxFunctionAttributes.Rd +++ b/man/stoxFunctionAttributes.Rd @@ -5,7 +5,7 @@ \alias{stoxFunctionAttributes} \title{Function specification for inclusion in StoX UI} \format{ -An object of class \code{list} of length 59. +An object of class \code{list} of length 60. } \usage{ stoxFunctionAttributes