From 9b0af511494bbd7236bc4df03a63dab8d5ec0209 Mon Sep 17 00:00:00 2001 From: Edvin Fuglebakk Date: Sat, 14 Oct 2023 16:46:19 +0200 Subject: [PATCH 01/24] Skeleton for analytical estimator functions --- R/StoxAnalyticalAnalysisFunctions.R | 3 +++ R/StoxAnalyticalBaselineFunctions.R | 14 ++++++++++++++ R/StoxAnalyticalReportFunction.R | 2 ++ 3 files changed, 19 insertions(+) create mode 100644 R/StoxAnalyticalAnalysisFunctions.R create mode 100644 R/StoxAnalyticalBaselineFunctions.R create mode 100644 R/StoxAnalyticalReportFunction.R diff --git a/R/StoxAnalyticalAnalysisFunctions.R b/R/StoxAnalyticalAnalysisFunctions.R new file mode 100644 index 00000000..6169a515 --- /dev/null +++ b/R/StoxAnalyticalAnalysisFunctions.R @@ -0,0 +1,3 @@ +#' @noRd +PrepareHorvitzThompsonDomainEstimate <- function(){} + diff --git a/R/StoxAnalyticalBaselineFunctions.R b/R/StoxAnalyticalBaselineFunctions.R new file mode 100644 index 00000000..29ac2864 --- /dev/null +++ b/R/StoxAnalyticalBaselineFunctions.R @@ -0,0 +1,14 @@ +#' @noRd +DefineSamplingDesignParameters <- function(){} + +#' @noRd +AssignIndividualDesignParameters <- function(){} + +#' @noRd +AssignPSUDesignParameters <- function(){} + +#' @noRd +DefinePSUCoInclusionProbabilities <- function(){} + +#' @noRd +ProbabilisticSuperIndividuals <- function(){} \ No newline at end of file diff --git a/R/StoxAnalyticalReportFunction.R b/R/StoxAnalyticalReportFunction.R new file mode 100644 index 00000000..34b8bd54 --- /dev/null +++ b/R/StoxAnalyticalReportFunction.R @@ -0,0 +1,2 @@ +#' @noRd +ReportAnalyticalCatchAtAge <- function(){} \ No newline at end of file From 585d167c43e22a72880431e6762c65c627418dae Mon Sep 17 00:00:00 2001 From: Edvin Fuglebakk Date: Sun, 15 Oct 2023 20:13:53 +0200 Subject: [PATCH 02/24] fixed building dependency evaluate for macOS 4.2 --- .github/workflows/check-full.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/check-full.yaml b/.github/workflows/check-full.yaml index a9703083..a88b0afe 100644 --- a/.github/workflows/check-full.yaml +++ b/.github/workflows/check-full.yaml @@ -157,6 +157,8 @@ jobs: message("Installing the following packages using default type:\n", paste0(missing, collapse = ", ")) utils::install.packages(missing, dependencies = TRUE) } + #binary evaluate build seems to be broken for macOS-lates R 4.2 + utils::install.packages("evaluate", dependencies = TRUE, type="source") shell: Rscript {0} From 13e10b929c5ca205a5fc364e67b36901c5e0ac4f Mon Sep 17 00:00:00 2001 From: Edvin Fuglebakk Date: Sun, 15 Oct 2023 20:14:49 +0200 Subject: [PATCH 03/24] fixed building dependency evaluate for macOS 4.2 --- .github/workflows/check-full.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/check-full.yaml b/.github/workflows/check-full.yaml index a88b0afe..a9703083 100644 --- a/.github/workflows/check-full.yaml +++ b/.github/workflows/check-full.yaml @@ -157,8 +157,6 @@ jobs: message("Installing the following packages using default type:\n", paste0(missing, collapse = ", ")) utils::install.packages(missing, dependencies = TRUE) } - #binary evaluate build seems to be broken for macOS-lates R 4.2 - utils::install.packages("evaluate", dependencies = TRUE, type="source") shell: Rscript {0} From 3a861f7f11e0ccd0d985cc01a454cbf093fe7e0a Mon Sep 17 00:00:00 2001 From: Edvin Fuglebakk Date: Mon, 16 Oct 2023 22:16:15 +0200 Subject: [PATCH 04/24] Added example data for catch lottery parameters --- inst/dataPrepScripts/prepDesignParameters.R | 120 ++++++++++++++++++ .../lotteryParameters/lotteryDesignNSH.txt | 11 ++ 2 files changed, 131 insertions(+) create mode 100644 inst/dataPrepScripts/prepDesignParameters.R create mode 100644 inst/testresources/lotteryParameters/lotteryDesignNSH.txt diff --git a/inst/dataPrepScripts/prepDesignParameters.R b/inst/dataPrepScripts/prepDesignParameters.R new file mode 100644 index 00000000..b6c8484f --- /dev/null +++ b/inst/dataPrepScripts/prepDesignParameters.R @@ -0,0 +1,120 @@ +# +# Prepares example for catch lotteri design parameters +# + +#' Reads catch lottery parameters from file as exported by lottery system Pr oct 2023. +#' @param filename path to file with lottery parameters +#' @return ~\code{\link[data.table]{data.table}} with colmns: +#' \describe{ +#' \item{aar}{year (integer)} +#' \item{RC}{Radio call signal of vessel (character)} +#' \item{SQ}{serialnumber for message (integer) identifies message given year, vessel and message recipient} +#' \item{TM}{ERS message type (character). HIA means departure message sent to IMR catch lottery (determines target species and therefor inclusion in lottery). HIF means catch message sent to IMR catch lottery, also encoding wheter sample is requested, along with lottery parameters} +#' \item{BD}{Date for start of fishing operation in UTC: YYYYMMDD} +#' \item{BT}{Tome for start of fishing operation in UTC: HHMM} +#' \item{Svar}{Code for whether sample is requested. Code 641 means that a sample is requested. 642 means sample is not requested. 643 means no (more) samples will be requested from this trip (until ned departure message).} +#' \item{i.prob}{Inclusion probability used in sample selection. Assigned also to samples not selected} +#' \item{lotteri}{Identifier for lottery, sampling frame, all inclusion probabilities are conditioned only on catch being in lottery} +#' \item{HIF.stratum}{Any stratification used in setting sampling parameters. Stratification is already factored into inclusion probabilities, some other column that goes into inclusion prob calculation depends on HIF.stratum (e.g. kapasitet)} +#' \item{kvote}{quota / expected total catch, used in calculation of inclusion probabilities} +#' \item{kapasitet}{sampling capasity / expected number of samples, used in calculation of inclusion probabilities} +#' \item{lotteri.kg}{reported catch in kg that was used in calculation of inclusion probability} +#' +#' The fields RC, SQ, TM, BC and BT are defined in the ERS regulation (https://lovdata.no/dokument/SF/forskrift/2009-12-21-1743). +#' } +#' @noRd +parseLotteryFile <- function(filename){ + lotteriparams <- data.table::fread(filename, sep = "\t", dec=".", header = T, colClasses = c("integer", "character", "integer", "character", "character", "character", "character", "numeric", "character", "character", "numeric", "numeric", "numeric")) + return(lotteriparams) +} + +#' Prepares PSU design parameters from catch lottery data and associates them with serial number. +#' @description +#' Vessels are not always compliant about time-zone usage. Catches are therefore associated with the closest lottery message that does not exceed a difference of 'maxDiff' hours. +#' @param lotteryParams lottery parameters as read by parseLotteryFile +#' @param StoxBiotic ~\code{\link[RstoxData]{StoxBioticData}} with hauls sampled by catch lottery +#' @param platformCodes table relating platform codes to other identifiers. As downloaded from the platform table in NMD reference. Contains columns: "Platformnumber", "syscode", "sysname", "Value", "Description", "Deprecated", "Valid from", "Valid to, "New code" +#' @param maxDiff the highest acceptable difference in time (hours) between StoxBiotic station time and lottery BD and BT time. +#' @noRd +prepDesignParamFile <- function(lotteryParams, StoxBiotic, platformCodes, maxDiff=2){ + + #partition lottery messages into set where sample was requested and other set + lotteryParamsFiltered <- lotteryParams + lotteryParamsFiltered$lotteryMessage <- paste(lotteryParamsFiltered$RC, lotteryParamsFiltered$BD, lotteryParamsFiltered$BT) + lotteryParamsFiltered$lotteryDateTime <- as.POSIXct(paste(lotteryParamsFiltered$BD,lotteryParamsFiltered$BT, sep=""), format="%Y%m%d%H%M", tz="UTC") + sampleRequested <- lotteryParamsFiltered[lotteryParamsFiltered$Svar=="641",] + sampleNotRequested <- lotteryParamsFiltered[lotteryParamsFiltered$Svar=="642",] + sampleNotRequestedTrip <- lotteryParamsFiltered[lotteryParamsFiltered$Svar=="631",] + + stationTable <- merge(StoxBiotic$Station, StoxBiotic$Haul) + + # annotate biotic data with the RC that was valid at the time of catch. + ITU <- data.table::data.table(platformCodes[platformCodes$sysname == "ITU Call Sign",]) + ITU$RC <- ITU$Value + ITU <- ITU[,.SD,.SDcol=c("Platformnumber", "RC", "Valid to")] + ITU <- merge(ITU, stationTable[,.SD, .SDcol=c("CatchPlatform", "DateTime")], by.x="Platformnumber", by.y="CatchPlatform") + ITU <- ITU[ITU$`Valid to`>=ITU$DateTime,] + ITU <- ITU[order(ITU$`Valid to`, decreasing = T),] + ITU <- ITU[!duplicated(paste(ITU$Platformnumber, ITU$DateTime)),] + + stationTable <- merge(stationTable, ITU[,.SD, .SDcol=c("Platformnumber", "RC", "DateTime")], by.x=c("CatchPlatform", "DateTime"), by.y=c("Platformnumber", "DateTime"), all.x=T) + + if (any(is.na(stationTable$RC))){ + stop("Could not find ITU call signal for all platforms, for the given dates.") + } + + stationTable <- merge(stationTable, sampleRequested, by="RC", all.y=T) + stationTable$timeDiff <- abs(difftime(stationTable$DateTime, stationTable$lotteryDateTime, unit="h")) + stationTable <- stationTable[is.na(stationTable$timeDiff) | stationTable$timeDiff<=maxDiff,] + stationTable <- stationTable[order(stationTable$timeDiff, decreasing=F),] + stationTable <- stationTable[is.na(stationTable$timeDiff) | !duplicated(stationTable$lotteryMessage),] + + if (!all(StoxBiotic$Haul$HaulKey %in% stationTable$HaulKey)){ + missing <- StoxBiotic$Haul[!(StoxBiotic$Haul$HaulKey %in% stationTable$HaulKey),] + warning(paste(nrow(missing), " samples are not requested from lottery and omitted from Design Parameters table.")) + } + + stationTable$description <- paste(stationTable$lotteri, stationTable$HIF.stratum, stationTable$lotteryMessage, sep="/") + stationTable$SelectionProbability <- stationTable$lotteri.kg/(stationTable$kvoteT*1000) + selectionTable <- stationTable[,.SD, .SDcol=c("HIF.stratum", "HaulKey", "i.prob", "SelectionProbability", "kapasitet", "description")] + names(selectionTable) <- c("Stratum", "SamplingUnitId", "InclusionProbability", "SelectionProbability", "n", "SelectionDescription") + if (length(unique(stationTable$kapasitet))!=1){ + selectionTable$SelectionProbability <- as.numeric(NA) + } + selectionTable <- selectionTable[, .SD, .SDcol=c("Stratum", "SamplingUnitId", "InclusionProbability", "SelectionProbability", "SelectionDescription")] + selectionTable$SelectionDescription <- as.character(NA) #remove vessel identifying descriptions + + stopifnot(length(unique(stationTable$HIF.stratum))==1) + sampleTable <- data.table(Stratum = stationTable$HIF.stratum[[1]], N = sum(!is.na(lotteryParams$i.prob))) + sampleTable$n <- as.numeric(NA) + if (length(unique(stationTable$kapasitet))==1){ + sampleTable$n <- stationTable$kapasitet[[1]] + } + sampleTable$SelectionMethod <- "Poission" + sampleTable$Finite <- TRUE + stopifnot(length(unique(stationTable$lotteri))==1) + sampleTable$FrameDescription <- stationTable$lotteri[[1]] + + designTable <- list() + designTable$sampleTable <- sampleTable + designTable$selectionTable <- selectionTable + + return(designTable) +} + +#' Save design table +#' @noRd +saveDesignTable <- function(filename, designTable){ + data.table::fwrite(file = filename, x=merge(designTable$sampleTable, designTable$selectionTable), sep = "\t") +} + +lotteryParams <- parseLotteryFile("~/hi_sync/fiskerisampling/fangstprøvelotteri/lotterifiler/example2022.txt") +lotteryParams <- lotteryParams[lotteryParams$lotteri=="Sild2022" & lotteryParams$HIF.stratum=="Nordsjo",] +bioData <- RstoxData::StoxBiotic(RstoxData::ReadBiotic("~/bioticsets/lotterieksempel/biotic_cruiseNumber_19-2022-20_Silde-sampling_2023-07-06T22.00.19.567Z.xml")) +filterexpression <- list() +filterexpression$Station <- "DateTime < '2022-01-21 00:00:00 UTC'" +bioFiltered <- RstoxData::FilterStoxBiotic(bioData, filterexpression, FilterUpwards = T) +platformCodes <- readxl::read_excel("~/codelists/NMDeksempler/platform.xlsx", 2) + +designParams <- prepDesignParamFile(lotteryParams, bioFiltered, platformCodes) +saveDesignTable("inst/testresources/lotteryParameters/lotteryDesignNSH.txt", designParams) \ No newline at end of file diff --git a/inst/testresources/lotteryParameters/lotteryDesignNSH.txt b/inst/testresources/lotteryParameters/lotteryDesignNSH.txt new file mode 100644 index 00000000..d17b4d8f --- /dev/null +++ b/inst/testresources/lotteryParameters/lotteryDesignNSH.txt @@ -0,0 +1,11 @@ +Stratum N n SelectionMethod Finite FrameDescription SamplingUnitId InclusionProbability SelectionProbability SelectionDescription +Nordsjo 48 110 Poission TRUE Sild2022 38401 0.213196915139625 0.00217741935483871 +Nordsjo 48 110 Poission TRUE Sild2022 38440 0.1700984890555 0.00169354838709677 +Nordsjo 48 110 Poission TRUE Sild2022 38403 0.0767615148687669 0.000725806451612903 +Nordsjo 48 110 Poission TRUE Sild2022 38446 0.267229615158194 0.00282258064516129 +Nordsjo 48 110 Poission TRUE Sild2022 38402 0.108975250145035 0.00104838709677419 +Nordsjo 48 110 Poission TRUE Sild2022 38404 0.162691408611336 0.00161290322580645 +Nordsjo 48 110 Poission TRUE Sild2022 38414 0.240689370659604 0.0025 +Nordsjo 48 110 Poission TRUE Sild2022 38411 0.458713264018625 0.00556451612903226 +Nordsjo 48 110 Poission TRUE Sild2022 0.068529393639208 0.000645161290322581 +Nordsjo 48 110 Poission TRUE Sild2022 0.132402285228299 0.00129032258064516 From 6a831d2b42e528025f19136bbfe7912a4f363cf5 Mon Sep 17 00:00:00 2001 From: Edvin Fuglebakk Date: Tue, 17 Oct 2023 17:35:51 +0200 Subject: [PATCH 05/24] Initial Stox function for defining design parameters. --- NAMESPACE | 1 + R/StoxAnalyticalBaselineFunctions.R | 124 +++++++++++++++++- R/StoxDataTypes.R | 52 ++++++++ inst/dataPrepScripts/prepDesignParameters.R | 11 +- .../lotteryParameters/lotteryDesignNSH.txt | 76 +++++++++-- .../test-StoxAnalyticalBaselineFunctions.R | 6 + man/DefineSamplingDesignParameters.Rd | 44 +++++++ man/SamplingDesignParametersData.Rd | 53 ++++++++ 8 files changed, 348 insertions(+), 19 deletions(-) create mode 100644 inst/tinytest/test-StoxAnalyticalBaselineFunctions.R create mode 100644 man/DefineSamplingDesignParameters.Rd create mode 100644 man/SamplingDesignParametersData.Rd diff --git a/NAMESPACE b/NAMESPACE index a9105528..8d0f02bf 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,6 +14,7 @@ export(DefineAreaPosition) export(DefineCarNeighbours) export(DefineLengthConversionParameters) export(DefinePeriod) +export(DefineSamplingDesignParameters) export(DefineStockSplittingParameters) export(DefineWeightConversionFactor) export(FilterAgeLengthOutliersStoxBiotic) diff --git a/R/StoxAnalyticalBaselineFunctions.R b/R/StoxAnalyticalBaselineFunctions.R index 29ac2864..4c66b1a6 100644 --- a/R/StoxAnalyticalBaselineFunctions.R +++ b/R/StoxAnalyticalBaselineFunctions.R @@ -1,5 +1,127 @@ +#' Construct design parameters assuming FSWOR, non-finite, equal prob, potentially stratified #' @noRd -DefineSamplingDesignParameters <- function(){} +assumeDesignParametersStoxBiotic <- function(StoxBioticData, SamplingUnitId, StratificationColumns=c(), OrderColumn=NULL){ + stop("Not Implemented") +} + +#' parse design parameters from tab delimited file +#' @noRd +parseDesignParameters <- function(filename){ + + colClasses <- c(Stratum="character", N="numeric", n="numeric", SelectionMethod="character", Finite="logical", FrameDescription="character", Order="numeric", SamplingUnitId="character", InclusionProbability="numeric", SelectionProbability="numeric", SelectionDescription="character") + headers <- data.table::fread(filename, sep="\t", dec=".", header = T, nrows = 1) + if (!all(names(colClasses) %in% names(headers))){ + missing <- names(colClasses)[!(names(colClasses) %in% names(headers)),] + stop(paste("Invalid format. Missing columns:", paste(missing, collapse=","))) + } + stratificationColumns <- c() + for (n in names(headers)){ + if (!n %in% names(colClasses)){ + stratificationColumns <- c(stratificationColumns, n) + newnames <- c(names(colClasses), n) + colClasses <- c(colClasses, "character") + names(colClasses) <- newnames + } + } + + designParameters <- data.table::fread(filename, sep="\t", dec=".", header = T, colClasses = colClasses, na.strings = c("")) + + selectionTable <- designParameters[,.SD,.SDcol=c("Stratum", "Order", "SamplingUnitId", "InclusionProbability", "SelectionProbability", "SelectionDescription")] + sampleTable <- designParameters[,.SD,.SDcol=c("Stratum", names(designParameters)[!(names(designParameters) %in% names(selectionTable))])] + stratificationTable <- data.table::data.table(StratificationVariables=c(stratificationColumns)) + + if (any(is.na(sampleTable$Stratum)) | any(is.na(selectionTable$Stratum))){ + stop("Invalid design specification. The mandatory column 'Stratum' may not contain missing values (NA).") + } + if (any(is.na(sampleTable$Finite))){ + stop("Invalid design specification. The mandatory column 'Finite' may not contain missing values (NA).") + } + if (any(is.na(sampleTable$SelectionMethod))){ + stop("Invalid design specification. The mandatory column 'SelectionMethod' may not contain missing values (NA).") + } + + for (n in stratificationColumns){ + stopifnot(n %in% names(sampleTable)) + if (any(is.na(sampleTable[[n]]))){ + stop(paste("Invalid design specification. The stratification column", n, "may not contain missing values (NA).")) + } + } + + sampleTableStrings <- apply(sampleTable, 1, paste, collapse="/") + sampleTable <- sampleTable[!duplicated(sampleTableStrings),] + duplicatedStrata <- sampleTable$Stratum[duplicated(sampleTable$Stratum)] + if (length(duplicatedStrata)>0){ + stop(paste("Invalid design specification. The column stratum must uniquely identify all sample table variables. Duplicates found for:", paste(duplicatedStrata, collapse=","))) + } + + if (length(stratificationColumns) > 0){ + stratificationVariableStrings <- apply(sampleTable[,.SD, .SDcol=stratificationColumns], 1, paste, collapse="/") + duplicatedStrata <- sampleTable$Stratum[duplicated(stratificationVariableStrings)] + + if (length(duplicatedStrata)>0){ + stop(paste("Invalid design spesification. The stratification variables must uniquely identify a stratum. Duplicates found for:", paste(duplicatedStrata, collapse=","))) + } + } + + sampleTable <- sampleTable[!duplicated(sampleTable$Stratum),] + + validSelectionMethod <- c("Poisson", "FSWR", "FSWOR") + if (!all(sampleTable$SelectionMethod %in% validSelectionMethod)){ + invalid <- sampleTable$SelectionMethod[!(sampleTable$SelectionMethod %in% validSelectionMethod)] + stop(paste("Invalid design specification. Unkown selection method:", paste(invalid, collapse=","))) + } + + designParameters <- list() + designParameters$sampleTable <- sampleTable + designParameters$selectionTable <- selectionTable + designParameters$stratificationVariables <- stratificationTable + + return(designParameters) +} + +#' Define Sampling Design Parameters +#' @description +#' Define sampling design parameters for use in analytical estimation. +#' @details +#' The DefintionMethod 'ResourceFile' reads design parameters from a tab delimited file with headers corresponding to those listed in +#' \code{\link[RstoxFDA]{SamplingDesignParametersData}}. The data is provided as one table, so that the information in 'sampleTable' is repeated for each entry in 'selectionTable'. +#' Any columns not named in \code{\link[RstoxFDA]{SamplingDesignParametersData}} are assumed to be stratification variables. +#' The conditions listed for the variables in \code{\link[RstoxFDA]{SamplingDesignParametersData}} are checked upon reading the data, and +#' execution halts with error if any are violated. +#' +#' The DefinitionMethod 'AdHocStoxBiotic' constructs Sampling Design Parameters from data, +#' assuming equal probability non-finite sampling with fixed sample size, selection without replacement and complete response. +#' @param processData \code{\link[RstoxFDA]{SamplingDesignParametersData}} as returned from this function. +#' @param DefinitionMethod 'ResourceFile' or 'AdHocStoxBiotic' +#' @param FileName path to resource file +#' @param StoxBioticData +#' @param SamplingUnitId +#' @param StratificationColumns +#' @param OrderColumn +#' @param UseProcessData If TRUE, bypasses execution of function and returns existing 'processData' +#' @return \code{\link[RstoxFDA]{SamplingDesignParametersData}} +#' @export +#' @concept StoX-functions +#' @concept Analytical estimation +#' @md +DefineSamplingDesignParameters <- function(processData, DefinitionMethod=c("ResourceFile", "AdHocStoxBiotic"), FileName=character(), StoxBioticData, SamplingUnitId, StratificationColumns, OrderColumn, UseProcessData=F){ + + if (UseProcessData){ + return(processData) + } + + DefinitionMethod <- checkOptions(DefinitionMethod, "DefinitionMethod", c("ResourceFile", "AdHocStoxBiotic")) + + if (DefinitionMethod == "ResourceFile"){ + return(parseDesignParameters(FileName)) + } + if (DefinitionMethod == "AdHocStoxBiotic"){ + return(assumeDesignParametersStoxBiotic(StoxBioticData, SamplingUnitId, StratificationColumns, OrderColumn)) + } +} + + + #' @noRd AssignIndividualDesignParameters <- function(){} diff --git a/R/StoxDataTypes.R b/R/StoxDataTypes.R index c6ca8d0b..8534c621 100644 --- a/R/StoxDataTypes.R +++ b/R/StoxDataTypes.R @@ -16,6 +16,58 @@ is.Date <- function(date){ return(FALSE) } +#' Sampling Design Parameters +#' +#' Sampling parameters for selection of a sampling unit +#' +#' @details +#' Encodes key information about the selection of a sampling unit, used in analytical design based estimation. +#' The selection encoded may be of a obesvered parameter from an individual, or of some intermediate selection in +#' hierarchical sampling (e.g. a haul, a vessel, etc.). Information is encoded in three tables. +#' +#' The sampleTable encodes information about the sample of sampling units: +#' \describe{ +#' \item{Stratum}{Mandatory, chr: Identifies the stratum the sample is taken from. Treat unstratified sample as single-stratum sampling (provide only one stratum.} +#' \item{N}{Optional, num: The total number of selection units in Stratum} +#' \item{n}{Optional, num: The number of selection units selected from the Stratum} +#' \item{SelectionMethod}{Mandatory, chr: 'Poission', 'FSWR' or 'FSWOR'. The manner of selection for use in bootstrap or inference of inclusionProbabilities, selectionProbabilites, co-inclusion probabilities or co-selection probabilities.} +#' \item{Finite}{Mandatory, logical: Whether selection is from finite population. That is whether sampling probabilites describes relative or absolute statistical weights.} +#' \item{FrameDescription}{Optional, chr: Free text field describing the sampling frame.} +#' \item{...}{mandatory, chr: Additional columns are stratification variables. These are mandatory if present (NAs not allowed). They provided are for linking with other data. They must be specified in 'stratificationVariables' and their combination must uniquely identify the Stratum.} +#' } +#' +#' The selectionTable encodes information abut the selection of sampling units for sampling: +#' \describe{ +#' \item{Stratum}{Mandatory: Identifies the stratum the sampling unit is taken from.} +#' \item{Order}{Optional: Identifes the order of seleciton. May be necessary for inference when selections are not independent (e.g. FSWOR)} +#' \item{SamplingUnitId}{Optional: Identifes sampling unit. NA encodes non-response} +#' \item{InclusionProbability}{Optional: The inclusion probability of the sampling unit} +#' \item{SelectionProbability}{Optional: The selection probability of the sampling unit} +#' \item{SelectionDescription}{Optional: Free text description of sampling unit.} +#' } +#' +#' The stratificationVariables table encodes information about which columns in the sampleTable are stratification variables: +#' \describe{ +#' \item{stratificationVariables}{columns in the sampleTable that are stratification variables.} +#' } +#' +#' Optional columns may be NA. +#' +#' The selection methods available for 'SelectionMethod' are explained here: +#' \describe{ +#' \item{Poission}{Poission sampling. Selection is performed randomly without replacement, and each selection is performed individually. Sample size is not fixed, and 'n' represents the expected sample size.} +#' \item{FSWR}{Fixed sample size with replacement. A random selection of a fixed sample size 'n' is chosen with replacement} +#' \item{FSWOR}{Fixed sample size with replacement. A random selection of a fixed sample size 'n' is chosen without replacement. Order of selection should be specified in the 'selectionTable'} +#' } +#' +#' The selectionProbability is defined as: The probability of selecting the sampling unit in a random draw from the population. +#' The inclusionProbability is defined as: The probability of the sampling unit being included in the sample. +#' +#' @name SamplingDesignParametersData +#' @concept Data types +#' @concept Analytical estimation +#' +NULL #' Trip Partition #' diff --git a/inst/dataPrepScripts/prepDesignParameters.R b/inst/dataPrepScripts/prepDesignParameters.R index b6c8484f..548c4665 100644 --- a/inst/dataPrepScripts/prepDesignParameters.R +++ b/inst/dataPrepScripts/prepDesignParameters.R @@ -37,7 +37,6 @@ parseLotteryFile <- function(filename){ #' @param maxDiff the highest acceptable difference in time (hours) between StoxBiotic station time and lottery BD and BT time. #' @noRd prepDesignParamFile <- function(lotteryParams, StoxBiotic, platformCodes, maxDiff=2){ - #partition lottery messages into set where sample was requested and other set lotteryParamsFiltered <- lotteryParams lotteryParamsFiltered$lotteryMessage <- paste(lotteryParamsFiltered$RC, lotteryParamsFiltered$BD, lotteryParamsFiltered$BT) @@ -81,7 +80,8 @@ prepDesignParamFile <- function(lotteryParams, StoxBiotic, platformCodes, maxDif if (length(unique(stationTable$kapasitet))!=1){ selectionTable$SelectionProbability <- as.numeric(NA) } - selectionTable <- selectionTable[, .SD, .SDcol=c("Stratum", "SamplingUnitId", "InclusionProbability", "SelectionProbability", "SelectionDescription")] + selectionTable$Order <- as.numeric(NA) + selectionTable <- selectionTable[, .SD, .SDcol=c("Stratum", "Order", "SamplingUnitId", "InclusionProbability", "SelectionProbability", "SelectionDescription")] selectionTable$SelectionDescription <- as.character(NA) #remove vessel identifying descriptions stopifnot(length(unique(stationTable$HIF.stratum))==1) @@ -90,7 +90,7 @@ prepDesignParamFile <- function(lotteryParams, StoxBiotic, platformCodes, maxDif if (length(unique(stationTable$kapasitet))==1){ sampleTable$n <- stationTable$kapasitet[[1]] } - sampleTable$SelectionMethod <- "Poission" + sampleTable$SelectionMethod <- "Poisson" sampleTable$Finite <- TRUE stopifnot(length(unique(stationTable$lotteri))==1) sampleTable$FrameDescription <- stationTable$lotteri[[1]] @@ -111,10 +111,7 @@ saveDesignTable <- function(filename, designTable){ lotteryParams <- parseLotteryFile("~/hi_sync/fiskerisampling/fangstprøvelotteri/lotterifiler/example2022.txt") lotteryParams <- lotteryParams[lotteryParams$lotteri=="Sild2022" & lotteryParams$HIF.stratum=="Nordsjo",] bioData <- RstoxData::StoxBiotic(RstoxData::ReadBiotic("~/bioticsets/lotterieksempel/biotic_cruiseNumber_19-2022-20_Silde-sampling_2023-07-06T22.00.19.567Z.xml")) -filterexpression <- list() -filterexpression$Station <- "DateTime < '2022-01-21 00:00:00 UTC'" -bioFiltered <- RstoxData::FilterStoxBiotic(bioData, filterexpression, FilterUpwards = T) platformCodes <- readxl::read_excel("~/codelists/NMDeksempler/platform.xlsx", 2) -designParams <- prepDesignParamFile(lotteryParams, bioFiltered, platformCodes) +designParams <- prepDesignParamFile(lotteryParams, bioData, platformCodes) saveDesignTable("inst/testresources/lotteryParameters/lotteryDesignNSH.txt", designParams) \ No newline at end of file diff --git a/inst/testresources/lotteryParameters/lotteryDesignNSH.txt b/inst/testresources/lotteryParameters/lotteryDesignNSH.txt index d17b4d8f..d38f0bec 100644 --- a/inst/testresources/lotteryParameters/lotteryDesignNSH.txt +++ b/inst/testresources/lotteryParameters/lotteryDesignNSH.txt @@ -1,11 +1,65 @@ -Stratum N n SelectionMethod Finite FrameDescription SamplingUnitId InclusionProbability SelectionProbability SelectionDescription -Nordsjo 48 110 Poission TRUE Sild2022 38401 0.213196915139625 0.00217741935483871 -Nordsjo 48 110 Poission TRUE Sild2022 38440 0.1700984890555 0.00169354838709677 -Nordsjo 48 110 Poission TRUE Sild2022 38403 0.0767615148687669 0.000725806451612903 -Nordsjo 48 110 Poission TRUE Sild2022 38446 0.267229615158194 0.00282258064516129 -Nordsjo 48 110 Poission TRUE Sild2022 38402 0.108975250145035 0.00104838709677419 -Nordsjo 48 110 Poission TRUE Sild2022 38404 0.162691408611336 0.00161290322580645 -Nordsjo 48 110 Poission TRUE Sild2022 38414 0.240689370659604 0.0025 -Nordsjo 48 110 Poission TRUE Sild2022 38411 0.458713264018625 0.00556451612903226 -Nordsjo 48 110 Poission TRUE Sild2022 0.068529393639208 0.000645161290322581 -Nordsjo 48 110 Poission TRUE Sild2022 0.132402285228299 0.00129032258064516 +Stratum N n SelectionMethod Finite FrameDescription Order SamplingUnitId InclusionProbability SelectionProbability SelectionDescription +Nordsjo 811 110 Poisson TRUE Sild2022 38401 0.213196915139625 0.00217741935483871 +Nordsjo 811 110 Poisson TRUE Sild2022 38433 0.247412405326388 0.00258064516129032 +Nordsjo 811 110 Poisson TRUE Sild2022 38440 0.1700984890555 0.00169354838709677 +Nordsjo 811 110 Poisson TRUE Sild2022 38445 0.108975250145035 0.00104838709677419 +Nordsjo 811 110 Poisson TRUE Sild2022 38438 0.641175307024414 0.0092741935483871 +Nordsjo 811 110 Poisson TRUE Sild2022 38441 0.233906828422691 0.00241935483870968 +Nordsjo 811 110 Poisson TRUE Sild2022 38448 0.267229615158194 0.00282258064516129 +Nordsjo 811 110 Poisson TRUE Sild2022 38403 0.0767615148687669 0.000725806451612903 +Nordsjo 811 110 Poisson TRUE Sild2022 38435 0.143125007547176 0.00140322580645161 +Nordsjo 811 110 Poisson TRUE Sild2022 38436 0.147680162555427 0.00145161290322581 +Nordsjo 811 110 Poisson TRUE Sild2022 38443 0.0182839435244132 0.000167741935483871 +Nordsjo 811 110 Poisson TRUE Sild2022 38446 0.267229615158194 0.00282258064516129 +Nordsjo 811 110 Poisson TRUE Sild2022 38447 0.247412405326388 0.00258064516129032 +Nordsjo 811 110 Poisson TRUE Sild2022 38402 0.108975250145035 0.00104838709677419 +Nordsjo 811 110 Poisson TRUE Sild2022 38434 0.220161129326957 0.00225806451612903 +Nordsjo 811 110 Poisson TRUE Sild2022 38442 0.068529393639208 0.000645161290322581 +Nordsjo 811 110 Poisson TRUE Sild2022 38444 0.147680162555427 0.00145161290322581 +Nordsjo 811 110 Poisson TRUE Sild2022 38437 0.147680162555427 0.00145161290322581 +Nordsjo 811 110 Poisson TRUE Sild2022 38439 0.311481476435603 0.00338709677419355 +Nordsjo 811 110 Poisson TRUE Sild2022 38431 0.0889747087440887 0.000846774193548387 +Nordsjo 811 110 Poisson TRUE Sild2022 38415 0.136247006433322 0.00133064516129032 +Nordsjo 811 110 Poisson TRUE Sild2022 38406 0.097027756531011 0.00092741935483871 +Nordsjo 811 110 Poisson TRUE Sild2022 38417 0.101027788104015 0.000967741935483871 +Nordsjo 811 110 Poisson TRUE Sild2022 38419 0.0849215381190344 0.000806451612903226 +Nordsjo 811 110 Poisson TRUE Sild2022 38408 0.184718409696934 0.00185483870967742 +Nordsjo 811 110 Poisson TRUE Sild2022 38412 0.216686691440281 0.00221774193548387 +Nordsjo 811 110 Poisson TRUE Sild2022 38422 0.0518463272604551 0.000483870967741936 +Nordsjo 811 110 Poisson TRUE Sild2022 38432 0.162691408611336 0.00161290322580645 +Nordsjo 811 110 Poisson TRUE Sild2022 38429 0.338519898362406 0.00375 +Nordsjo 811 110 Poisson TRUE Sild2022 38404 0.162691408611336 0.00161290322580645 +Nordsjo 811 110 Poisson TRUE Sild2022 38413 0.097027756531011 0.00092741935483871 +Nordsjo 811 110 Poisson TRUE Sild2022 38414 0.240689370659604 0.0025 +Nordsjo 811 110 Poisson TRUE Sild2022 38405 0.267229615158194 0.00282258064516129 +Nordsjo 811 110 Poisson TRUE Sild2022 38411 0.458713264018625 0.00556451612903226 +Nordsjo 811 110 Poisson TRUE Sild2022 38416 0.116853084910627 0.00112903225806452 +Nordsjo 811 110 Poisson TRUE Sild2022 38427 0.589628784636336 0.00806451612903226 +Nordsjo 811 110 Poisson TRUE Sild2022 38409 0.162691408611336 0.00161290322580645 +Nordsjo 811 110 Poisson TRUE Sild2022 38424 0.347296770955766 0.00387096774193548 +Nordsjo 811 110 Poisson TRUE Sild2022 38421 0.116853084910627 0.00112903225806452 +Nordsjo 811 110 Poisson TRUE Sild2022 38430 0.116853084910627 0.00112903225806452 +Nordsjo 811 110 Poisson TRUE Sild2022 38426 0.233906828422691 0.00241935483870968 +Nordsjo 811 110 Poisson TRUE Sild2022 38407 0.068529393639208 0.000645161290322581 +Nordsjo 811 110 Poisson TRUE Sild2022 38418 0.229685923270579 0.00236952419354839 +Nordsjo 811 110 Poisson TRUE Sild2022 38428 0.143125007547176 0.00140322580645161 +Nordsjo 811 110 Poisson TRUE Sild2022 38420 0.0425447856855458 0.000395161290322581 +Nordsjo 811 110 Poisson TRUE Sild2022 38423 0.463520590962966 0.00564516129032258 +Nordsjo 811 110 Poisson TRUE Sild2022 38410 0.267034025829916 0.00282016129032258 +Nordsjo 811 110 Poisson TRUE Sild2022 38425 0.0930100890906527 0.000887096774193548 +Nordsjo 811 110 Poisson TRUE Sild2022 0.199083079634323 0.00201612903225806 +Nordsjo 811 110 Poisson TRUE Sild2022 0.1700984890555 0.00169354838709677 +Nordsjo 811 110 Poisson TRUE Sild2022 0.1700984890555 0.00169354838709677 +Nordsjo 811 110 Poisson TRUE Sild2022 0.174511532419619 0.00174193548387097 +Nordsjo 811 110 Poisson TRUE Sild2022 0.202634883401628 0.00205645161290323 +Nordsjo 811 110 Poisson TRUE Sild2022 0.068529393639208 0.000645161290322581 +Nordsjo 811 110 Poisson TRUE Sild2022 0.132402285228299 0.00129032258064516 +Nordsjo 811 110 Poisson TRUE Sild2022 0.101027788104015 0.000967741935483871 +Nordsjo 811 110 Poisson TRUE Sild2022 0.0808504995083754 0.000766129032258065 +Nordsjo 811 110 Poisson TRUE Sild2022 0.140074844132902 0.00137096774193548 +Nordsjo 811 110 Poisson TRUE Sild2022 0.0552064585338686 0.000516129032258065 +Nordsjo 811 110 Poisson TRUE Sild2022 0.112922833036567 0.00108870967741935 +Nordsjo 811 110 Poisson TRUE Sild2022 0.386753608156008 0.00443548387096774 +Nordsjo 811 110 Poisson TRUE Sild2022 0.413480846839946 0.00483870967741936 +Nordsjo 811 110 Poisson TRUE Sild2022 0.247412405326388 0.00258064516129032 +Nordsjo 811 110 Poisson TRUE Sild2022 0.0518463272604551 0.000483870967741936 diff --git a/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R b/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R new file mode 100644 index 00000000..27edf203 --- /dev/null +++ b/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R @@ -0,0 +1,6 @@ +designParamsFile <- system.file("testresources", "lotteryParameters", "lotteryDesignNSH.txt", package="RstoxFDA") + +#regular read: +designParams <- RstoxFDA::DefineSamplingDesignParameters(NULL, "ResourceFile", designParamsFile) +expect_equal(nrow(designParams$selectionTable), 64) +expect_equal(nrow(designParams$sampleTable), 1) \ No newline at end of file diff --git a/man/DefineSamplingDesignParameters.Rd b/man/DefineSamplingDesignParameters.Rd new file mode 100644 index 00000000..f2d29a1f --- /dev/null +++ b/man/DefineSamplingDesignParameters.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/StoxAnalyticalBaselineFunctions.R +\name{DefineSamplingDesignParameters} +\alias{DefineSamplingDesignParameters} +\title{Define Sampling Design Parameters} +\usage{ +DefineSamplingDesignParameters( + processData, + DefinitionMethod = c("ResourceFile", "AdHocStoxBiotic"), + FileName = character(), + StoxBioticData, + SamplingUnitId, + StratificationColumns, + OrderColumn, + UseProcessData = F +) +} +\arguments{ +\item{processData}{\code{\link[RstoxFDA]{SamplingDesignParametersData}} as returned from this function.} + +\item{DefinitionMethod}{'ResourceFile' or 'AdHocStoxBiotic'} + +\item{FileName}{path to resource file} + +\item{UseProcessData}{If TRUE, bypasses execution of function and returns existing 'processData'} +} +\value{ +\code{\link[RstoxFDA]{SamplingDesignParametersData}} +} +\description{ +Define sampling design parameters for use in analytical estimation. +} +\details{ +The DefintionMethod 'ResourceFile' reads design parameters from a tab delimited file with headers corresponding to those listed in +\code{\link[RstoxFDA]{SamplingDesignParametersData}}. The data is provided as one table, so that the information in 'sampleTable' is repeated for each entry in 'selectionTable'. +Any columns not named in \code{\link[RstoxFDA]{SamplingDesignParametersData}} are assumed to be stratification variables. +The conditions listed for the variables in \code{\link[RstoxFDA]{SamplingDesignParametersData}} are checked upon reading the data, and +execution halts with error if any are violated. + +The DefinitionMethod 'AdHocStoxBiotic' constructs Sampling Design Parameters from data, +assuming equal probability non-finite sampling with fixed sample size, selection without replacement and complete response. +} +\concept{Analytical estimation} +\concept{StoX-functions} diff --git a/man/SamplingDesignParametersData.Rd b/man/SamplingDesignParametersData.Rd new file mode 100644 index 00000000..2eb33b89 --- /dev/null +++ b/man/SamplingDesignParametersData.Rd @@ -0,0 +1,53 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/StoxDataTypes.R +\name{SamplingDesignParametersData} +\alias{SamplingDesignParametersData} +\title{Sampling Design Parameters} +\description{ +Sampling parameters for selection of a sampling unit +} +\details{ +Encodes key information about the selection of a sampling unit, used in analytical design based estimation. + The selection encoded may be of a obesvered parameter from an individual, or of some intermediate selection in + hierarchical sampling (e.g. a haul, a vessel, etc.). Information is encoded in three tables. + + The sampleTable encodes information about the sample of sampling units: + \describe{ + \item{Stratum}{Mandatory, chr: Identifies the stratum the sample is taken from. Treat unstratified sample as single-stratum sampling (provide only one stratum.} + \item{N}{Optional, num: The total number of selection units in Stratum} + \item{n}{Optional, num: The number of selection units selected from the Stratum} + \item{SelectionMethod}{Mandatory, chr: 'Poission', 'FSWR' or 'FSWOR'. The manner of selection for use in bootstrap or inference of inclusionProbabilities, selectionProbabilites, co-inclusion probabilities or co-selection probabilities.} + \item{Finite}{Mandatory, logical: Whether selection is from finite population. That is whether sampling probabilites describes relative or absolute statistical weights.} + \item{FrameDescription}{Optional, chr: Free text field describing the sampling frame.} + \item{...}{mandatory, chr: Additional columns are stratification variables. These are mandatory if present (NAs not allowed). They provided are for linking with other data. They must be specified in 'stratificationVariables' and their combination must uniquely identify the Stratum.} + } + + The selectionTable encodes information abut the selection of sampling units for sampling: + \describe{ + \item{Stratum}{Mandatory: Identifies the stratum the sampling unit is taken from.} + \item{Order}{Optional: Identifes the order of seleciton. May be necessary for inference when selections are not independent (e.g. FSWOR)} + \item{SamplingUnitId}{Optional: Identifes sampling unit. NA encodes non-response} + \item{InclusionProbability}{Optional: The inclusion probability of the sampling unit} + \item{SelectionProbability}{Optional: The selection probability of the sampling unit} + \item{SelectionDescription}{Optional: Free text description of sampling unit.} + } + + The stratificationVariables table encodes information about which columns in the sampleTable are stratification variables: + \describe{ + \item{stratificationVariables}{columns in the sampleTable that are stratification variables.} + } + +Optional columns may be NA. + +The selection methods available for 'SelectionMethod' are explained here: +\describe{ + \item{Poission}{Poission sampling. Selection is performed randomly without replacement, and each selection is performed individually. Sample size is not fixed, and 'n' represents the expected sample size.} + \item{FSWR}{Fixed sample size with replacement. A random selection of a fixed sample size 'n' is chosen with replacement} + \item{FSWOR}{Fixed sample size with replacement. A random selection of a fixed sample size 'n' is chosen without replacement. Order of selection should be specified in the 'selectionTable'} +} + +The selectionProbability is defined as: The probability of selecting the sampling unit in a random draw from the population. +The inclusionProbability is defined as: The probability of the sampling unit being included in the sample. +} +\concept{Analytical estimation} +\concept{Data types} From 01219a7c224d612eb2810d65da80910cf0b6e638 Mon Sep 17 00:00:00 2001 From: Edvin Fuglebakk Date: Tue, 17 Oct 2023 22:47:40 +0200 Subject: [PATCH 06/24] Revised format for design parameters. Implemented DefinitionMethod that extracts design parameters from sample data with some assumptions. --- R/StoxAnalyticalBaselineFunctions.R | 89 +++++++++--- R/StoxDataTypes.R | 25 ++-- inst/dataPrepScripts/prepDesignParameters.R | 9 +- .../lotteryParameters/lotteryDesignNSH.txt | 130 +++++++++--------- .../test-StoxAnalyticalBaselineFunctions.R | 18 ++- man/DefineSamplingDesignParameters.Rd | 12 +- man/SamplingDesignParametersData.Rd | 25 ++-- 7 files changed, 190 insertions(+), 118 deletions(-) diff --git a/R/StoxAnalyticalBaselineFunctions.R b/R/StoxAnalyticalBaselineFunctions.R index 4c66b1a6..e10bba7b 100644 --- a/R/StoxAnalyticalBaselineFunctions.R +++ b/R/StoxAnalyticalBaselineFunctions.R @@ -1,14 +1,59 @@ #' Construct design parameters assuming FSWOR, non-finite, equal prob, potentially stratified #' @noRd assumeDesignParametersStoxBiotic <- function(StoxBioticData, SamplingUnitId, StratificationColumns=c(), OrderColumn=NULL){ - stop("Not Implemented") + targetTable <- NULL + for (n in names(StoxBioticData)){ + if (SamplingUnitId %in% names(StoxBioticData[[n]])){ + targetTable=n + } + } + + if (is.null(targetTable)){ + stop(paste("The SamplingUnitId", SamplingUnitId, "was not found in StoxBioticData")) + } + + flatStox <- StoxBioticData[[targetTable]] + if (isGiven(StratificationColumns) & length(StratificationColumns)>0 & !all(StratificationColumns %in% names(flatStox))){ + stop("Not all stratification columns were found at", targetTable, ", where the SamplingUnitId", SamplingUnitId, "is found.") + } + + if (any(is.na(flatStox[[SamplingUnitId]]))){ + stop(paste("Cannot construct design parameters for missing SamplingUnitIds. Missing values (NA) found for", SamplingUnitId)) + } + for (n in StratificationColumns){ + if (any(is.na(flatStox[[n]]))){ + stop(paste("Cannot construct design parameters with missing strata information. Missing values (NA) found for stratification column", n)) + } + } + + flatStox$Stratum <- "All" + flatStox$Stratum <- apply(flatStox[,.SD, .SDcol=StratificationColumns], 1, paste, collapse="/") + flatStox$SamplingUnitId <- flatStox[[SamplingUnitId]] + flatStox$Order <- as.numeric(NA) + + CommonSelectionData <- flatStox[,list(InclusionProbability=as.numeric(NA), SelectionProbability=as.numeric(NA), RelativeSelectionProbability=1/length(unique(SamplingUnitId)), SelectionDescription=as.character(NA)), by=c("Stratum")] + selectionUnits <- flatStox[,.SD, .SDcol=c("Stratum", "Order", "SamplingUnitId")] + selectionUnits <- selectionUnits[!duplicated(selectionUnits$SamplingUnitId),] + selectionTable <- merge(flatStox[,.SD, .SDcol=c("Stratum", "Order", "SamplingUnitId")], CommonSelectionData) + sampleTable <- flatStox[,list(N=as.numeric(NA), n=length(unique(SamplingUnitId)), SelectionMethod="FSWR", FrameDescription=as.character(NA)), by=c("Stratum")] + sampleTable <- sampleTable[,.SD,.SDcol=c("Stratum", "N", "n", "SelectionMethod", "FrameDescription")] + stratificationTable <- flatStox[,.SD,.SDcol=c("Stratum", StratificationColumns)] + stratificationTable <- stratificationTable[!duplicated(stratificationTable$Stratum),] + + designParameters <- list() + designParameters$sampleTable <- sampleTable + designParameters$selectionTable <- selectionTable + designParameters$stratificationVariables <- stratificationTable + + return(designParameters) + } #' parse design parameters from tab delimited file #' @noRd parseDesignParameters <- function(filename){ - colClasses <- c(Stratum="character", N="numeric", n="numeric", SelectionMethod="character", Finite="logical", FrameDescription="character", Order="numeric", SamplingUnitId="character", InclusionProbability="numeric", SelectionProbability="numeric", SelectionDescription="character") + colClasses <- c(Stratum="character", N="numeric", n="numeric", SelectionMethod="character", FrameDescription="character", Order="numeric", SamplingUnitId="character", InclusionProbability="numeric", SelectionProbability="numeric", RelativeSelectionProbability="numeric", SelectionDescription="character") headers <- data.table::fread(filename, sep="\t", dec=".", header = T, nrows = 1) if (!all(names(colClasses) %in% names(headers))){ missing <- names(colClasses)[!(names(colClasses) %in% names(headers)),] @@ -26,16 +71,13 @@ parseDesignParameters <- function(filename){ designParameters <- data.table::fread(filename, sep="\t", dec=".", header = T, colClasses = colClasses, na.strings = c("")) - selectionTable <- designParameters[,.SD,.SDcol=c("Stratum", "Order", "SamplingUnitId", "InclusionProbability", "SelectionProbability", "SelectionDescription")] - sampleTable <- designParameters[,.SD,.SDcol=c("Stratum", names(designParameters)[!(names(designParameters) %in% names(selectionTable))])] - stratificationTable <- data.table::data.table(StratificationVariables=c(stratificationColumns)) + selectionTable <- designParameters[,.SD,.SDcol=c("Stratum", "Order", "SamplingUnitId", "InclusionProbability", "SelectionProbability", "RelativeSelectionProbability", "SelectionDescription")] + sampleTable <- designParameters[,.SD,.SDcol=c("Stratum", "N", "n", "SelectionMethod", "FrameDescription")] + stratificationTable <- designParameters[,.SD,.SDcol=c("Stratum", names(designParameters)[!(names(designParameters) %in% names(selectionTable)) & !(names(designParameters) %in% names(sampleTable))])] if (any(is.na(sampleTable$Stratum)) | any(is.na(selectionTable$Stratum))){ stop("Invalid design specification. The mandatory column 'Stratum' may not contain missing values (NA).") } - if (any(is.na(sampleTable$Finite))){ - stop("Invalid design specification. The mandatory column 'Finite' may not contain missing values (NA).") - } if (any(is.na(sampleTable$SelectionMethod))){ stop("Invalid design specification. The mandatory column 'SelectionMethod' may not contain missing values (NA).") } @@ -53,17 +95,21 @@ parseDesignParameters <- function(filename){ if (length(duplicatedStrata)>0){ stop(paste("Invalid design specification. The column stratum must uniquely identify all sample table variables. Duplicates found for:", paste(duplicatedStrata, collapse=","))) } - + if (length(stratificationColumns) > 0){ - stratificationVariableStrings <- apply(sampleTable[,.SD, .SDcol=stratificationColumns], 1, paste, collapse="/") - duplicatedStrata <- sampleTable$Stratum[duplicated(stratificationVariableStrings)] + stratificationVariableStrings <- apply(stratificationTable[,.SD, .SDcol=stratificationColumns], 1, paste, collapse="/") + duplicatedStrata <- stratificationTable$Stratum[duplicated(stratificationVariableStrings)] if (length(duplicatedStrata)>0){ - stop(paste("Invalid design spesification. The stratification variables must uniquely identify a stratum. Duplicates found for:", paste(duplicatedStrata, collapse=","))) + stop(paste("Invalid design specification. The stratification variables must uniquely identify a stratum. Duplicates found for:", paste(duplicatedStrata, collapse=","))) } } - sampleTable <- sampleTable[!duplicated(sampleTable$Stratum),] + if (any(!is.na(selectionTable$SampleUnitId) & duplicated(paste(selectionTable$Stratum, selectionTable$SampleUnitId)))){ + stop("Invalid design specification. Some strata contain duplicated SampleUnitIds.") + } + + stratificationTable <- stratificationTable[!duplicated(stratificationTable$Stratum),] validSelectionMethod <- c("Poisson", "FSWR", "FSWOR") if (!all(sampleTable$SelectionMethod %in% validSelectionMethod)){ @@ -90,21 +136,23 @@ parseDesignParameters <- function(filename){ #' execution halts with error if any are violated. #' #' The DefinitionMethod 'AdHocStoxBiotic' constructs Sampling Design Parameters from data, -#' assuming equal probability non-finite sampling with fixed sample size, selection without replacement and complete response. +#' assuming equal probability sampling with fixed sample size, selection with replacement and complete response. +#' This is a reasonable approximation if within-strata sampling is approximately simple random selections, +#' non-response is believed to be at random, and only a small fraction of the strata is sampled, +#' so that with and without replacement sampling probabilities are approximately equal. #' @param processData \code{\link[RstoxFDA]{SamplingDesignParametersData}} as returned from this function. #' @param DefinitionMethod 'ResourceFile' or 'AdHocStoxBiotic' #' @param FileName path to resource file -#' @param StoxBioticData -#' @param SamplingUnitId -#' @param StratificationColumns -#' @param OrderColumn +#' @param StoxBioticData \code{\link[RstoxData]{StoxBioticData}} Sample data to construct design parameters from +#' @param SamplingUnitId name of column in 'StoxBioticData' that identifies the sampling unit the design is constructed for. +#' @param StratificationColumns name of any column (at the same table as 'SamplingUnitId') that are to be used to define Strata for sampling. #' @param UseProcessData If TRUE, bypasses execution of function and returns existing 'processData' #' @return \code{\link[RstoxFDA]{SamplingDesignParametersData}} #' @export #' @concept StoX-functions #' @concept Analytical estimation #' @md -DefineSamplingDesignParameters <- function(processData, DefinitionMethod=c("ResourceFile", "AdHocStoxBiotic"), FileName=character(), StoxBioticData, SamplingUnitId, StratificationColumns, OrderColumn, UseProcessData=F){ +DefineSamplingDesignParameters <- function(processData, DefinitionMethod=c("ResourceFile", "AdHocStoxBiotic"), FileName=character(), StoxBioticData, SamplingUnitId, StratificationColumns, UseProcessData=F){ if (UseProcessData){ return(processData) @@ -120,9 +168,6 @@ DefineSamplingDesignParameters <- function(processData, DefinitionMethod=c("Reso } } - - - #' @noRd AssignIndividualDesignParameters <- function(){} diff --git a/R/StoxDataTypes.R b/R/StoxDataTypes.R index 8534c621..507ddf7b 100644 --- a/R/StoxDataTypes.R +++ b/R/StoxDataTypes.R @@ -31,24 +31,24 @@ is.Date <- function(date){ #' \item{N}{Optional, num: The total number of selection units in Stratum} #' \item{n}{Optional, num: The number of selection units selected from the Stratum} #' \item{SelectionMethod}{Mandatory, chr: 'Poission', 'FSWR' or 'FSWOR'. The manner of selection for use in bootstrap or inference of inclusionProbabilities, selectionProbabilites, co-inclusion probabilities or co-selection probabilities.} -#' \item{Finite}{Mandatory, logical: Whether selection is from finite population. That is whether sampling probabilites describes relative or absolute statistical weights.} #' \item{FrameDescription}{Optional, chr: Free text field describing the sampling frame.} -#' \item{...}{mandatory, chr: Additional columns are stratification variables. These are mandatory if present (NAs not allowed). They provided are for linking with other data. They must be specified in 'stratificationVariables' and their combination must uniquely identify the Stratum.} #' } #' #' The selectionTable encodes information abut the selection of sampling units for sampling: #' \describe{ -#' \item{Stratum}{Mandatory: Identifies the stratum the sampling unit is taken from.} -#' \item{Order}{Optional: Identifes the order of seleciton. May be necessary for inference when selections are not independent (e.g. FSWOR)} -#' \item{SamplingUnitId}{Optional: Identifes sampling unit. NA encodes non-response} -#' \item{InclusionProbability}{Optional: The inclusion probability of the sampling unit} -#' \item{SelectionProbability}{Optional: The selection probability of the sampling unit} -#' \item{SelectionDescription}{Optional: Free text description of sampling unit.} +#' \item{Stratum}{Mandatory, chr: Identifies the stratum the sampling unit is taken from.} +#' \item{Order}{Optional, num: Identifes the order of seleciton. May be necessary for inference when selections are not independent (e.g. FSWOR)} +#' \item{SamplingUnitId}{Optional, chr: Identifes sampling unit. NA encodes non-response} +#' \item{InclusionProbability}{Optional, num: The inclusion probability of the sampling unit} +#' \item{SelectionProbability}{Optional, num: The selection probability of the sampling unit} +#' \item{RelativeSelectionProbability}{Optional, num: The relative selection probability of the sampling unit} +#' \item{SelectionDescription}{Optional, chr: Free text description of sampling unit.} #' } #' -#' The stratificationVariables table encodes information about which columns in the sampleTable are stratification variables: +#' The stratificationVariables table encodes information about which columns in the sampleTable are stratification variables (if any): #' \describe{ -#' \item{stratificationVariables}{columns in the sampleTable that are stratification variables.} +#' \item{Stratum}{Mandatory, chr: Identifies the stratum. In addition the Stratum is identified by the combination of all other columns on this table.} +#' \item{...}{Mandatory if present (may not contain NAs), chr: Additional columns in the sampleTable that are stratification variables.} #' } #' #' Optional columns may be NA. @@ -60,8 +60,9 @@ is.Date <- function(date){ #' \item{FSWOR}{Fixed sample size with replacement. A random selection of a fixed sample size 'n' is chosen without replacement. Order of selection should be specified in the 'selectionTable'} #' } #' -#' The selectionProbability is defined as: The probability of selecting the sampling unit in a random draw from the population. -#' The inclusionProbability is defined as: The probability of the sampling unit being included in the sample. +#' The SelectionProbability is defined as: The probability of selecting the sampling unit in a random draw from the population. +#' The RelativeSelectionProbability: A value proportional to the SelectionProbability. Within a stratum, SelectionProbability=c*RelativeSelectionProbability, with c possibly unknown. +#' The InclusionProbability is defined as: The probability of the sampling unit being included in the sample. #' #' @name SamplingDesignParametersData #' @concept Data types diff --git a/inst/dataPrepScripts/prepDesignParameters.R b/inst/dataPrepScripts/prepDesignParameters.R index 548c4665..7b8c5fe4 100644 --- a/inst/dataPrepScripts/prepDesignParameters.R +++ b/inst/dataPrepScripts/prepDesignParameters.R @@ -75,13 +75,15 @@ prepDesignParamFile <- function(lotteryParams, StoxBiotic, platformCodes, maxDif stationTable$description <- paste(stationTable$lotteri, stationTable$HIF.stratum, stationTable$lotteryMessage, sep="/") stationTable$SelectionProbability <- stationTable$lotteri.kg/(stationTable$kvoteT*1000) - selectionTable <- stationTable[,.SD, .SDcol=c("HIF.stratum", "HaulKey", "i.prob", "SelectionProbability", "kapasitet", "description")] - names(selectionTable) <- c("Stratum", "SamplingUnitId", "InclusionProbability", "SelectionProbability", "n", "SelectionDescription") + stationTable$RelativeSelectionProbability <- stationTable$SelectionProbability + selectionTable <- stationTable[,.SD, .SDcol=c("HIF.stratum", "HaulKey", "i.prob", "SelectionProbability", "RelativeSelectionProbability", "kapasitet", "description")] + names(selectionTable) <- c("Stratum", "SamplingUnitId", "InclusionProbability", "SelectionProbability", "RelativeSelectionProbability", "n", "SelectionDescription") if (length(unique(stationTable$kapasitet))!=1){ selectionTable$SelectionProbability <- as.numeric(NA) + stationTable$RelativeSelectionProbability <- as.numeric(NA) } selectionTable$Order <- as.numeric(NA) - selectionTable <- selectionTable[, .SD, .SDcol=c("Stratum", "Order", "SamplingUnitId", "InclusionProbability", "SelectionProbability", "SelectionDescription")] + selectionTable <- selectionTable[, .SD, .SDcol=c("Stratum", "Order", "SamplingUnitId", "InclusionProbability", "SelectionProbability", "RelativeSelectionProbability", "SelectionDescription")] selectionTable$SelectionDescription <- as.character(NA) #remove vessel identifying descriptions stopifnot(length(unique(stationTable$HIF.stratum))==1) @@ -91,7 +93,6 @@ prepDesignParamFile <- function(lotteryParams, StoxBiotic, platformCodes, maxDif sampleTable$n <- stationTable$kapasitet[[1]] } sampleTable$SelectionMethod <- "Poisson" - sampleTable$Finite <- TRUE stopifnot(length(unique(stationTable$lotteri))==1) sampleTable$FrameDescription <- stationTable$lotteri[[1]] diff --git a/inst/testresources/lotteryParameters/lotteryDesignNSH.txt b/inst/testresources/lotteryParameters/lotteryDesignNSH.txt index d38f0bec..b1d00a0c 100644 --- a/inst/testresources/lotteryParameters/lotteryDesignNSH.txt +++ b/inst/testresources/lotteryParameters/lotteryDesignNSH.txt @@ -1,65 +1,65 @@ -Stratum N n SelectionMethod Finite FrameDescription Order SamplingUnitId InclusionProbability SelectionProbability SelectionDescription -Nordsjo 811 110 Poisson TRUE Sild2022 38401 0.213196915139625 0.00217741935483871 -Nordsjo 811 110 Poisson TRUE Sild2022 38433 0.247412405326388 0.00258064516129032 -Nordsjo 811 110 Poisson TRUE Sild2022 38440 0.1700984890555 0.00169354838709677 -Nordsjo 811 110 Poisson TRUE Sild2022 38445 0.108975250145035 0.00104838709677419 -Nordsjo 811 110 Poisson TRUE Sild2022 38438 0.641175307024414 0.0092741935483871 -Nordsjo 811 110 Poisson TRUE Sild2022 38441 0.233906828422691 0.00241935483870968 -Nordsjo 811 110 Poisson TRUE Sild2022 38448 0.267229615158194 0.00282258064516129 -Nordsjo 811 110 Poisson TRUE Sild2022 38403 0.0767615148687669 0.000725806451612903 -Nordsjo 811 110 Poisson TRUE Sild2022 38435 0.143125007547176 0.00140322580645161 -Nordsjo 811 110 Poisson TRUE Sild2022 38436 0.147680162555427 0.00145161290322581 -Nordsjo 811 110 Poisson TRUE Sild2022 38443 0.0182839435244132 0.000167741935483871 -Nordsjo 811 110 Poisson TRUE Sild2022 38446 0.267229615158194 0.00282258064516129 -Nordsjo 811 110 Poisson TRUE Sild2022 38447 0.247412405326388 0.00258064516129032 -Nordsjo 811 110 Poisson TRUE Sild2022 38402 0.108975250145035 0.00104838709677419 -Nordsjo 811 110 Poisson TRUE Sild2022 38434 0.220161129326957 0.00225806451612903 -Nordsjo 811 110 Poisson TRUE Sild2022 38442 0.068529393639208 0.000645161290322581 -Nordsjo 811 110 Poisson TRUE Sild2022 38444 0.147680162555427 0.00145161290322581 -Nordsjo 811 110 Poisson TRUE Sild2022 38437 0.147680162555427 0.00145161290322581 -Nordsjo 811 110 Poisson TRUE Sild2022 38439 0.311481476435603 0.00338709677419355 -Nordsjo 811 110 Poisson TRUE Sild2022 38431 0.0889747087440887 0.000846774193548387 -Nordsjo 811 110 Poisson TRUE Sild2022 38415 0.136247006433322 0.00133064516129032 -Nordsjo 811 110 Poisson TRUE Sild2022 38406 0.097027756531011 0.00092741935483871 -Nordsjo 811 110 Poisson TRUE Sild2022 38417 0.101027788104015 0.000967741935483871 -Nordsjo 811 110 Poisson TRUE Sild2022 38419 0.0849215381190344 0.000806451612903226 -Nordsjo 811 110 Poisson TRUE Sild2022 38408 0.184718409696934 0.00185483870967742 -Nordsjo 811 110 Poisson TRUE Sild2022 38412 0.216686691440281 0.00221774193548387 -Nordsjo 811 110 Poisson TRUE Sild2022 38422 0.0518463272604551 0.000483870967741936 -Nordsjo 811 110 Poisson TRUE Sild2022 38432 0.162691408611336 0.00161290322580645 -Nordsjo 811 110 Poisson TRUE Sild2022 38429 0.338519898362406 0.00375 -Nordsjo 811 110 Poisson TRUE Sild2022 38404 0.162691408611336 0.00161290322580645 -Nordsjo 811 110 Poisson TRUE Sild2022 38413 0.097027756531011 0.00092741935483871 -Nordsjo 811 110 Poisson TRUE Sild2022 38414 0.240689370659604 0.0025 -Nordsjo 811 110 Poisson TRUE Sild2022 38405 0.267229615158194 0.00282258064516129 -Nordsjo 811 110 Poisson TRUE Sild2022 38411 0.458713264018625 0.00556451612903226 -Nordsjo 811 110 Poisson TRUE Sild2022 38416 0.116853084910627 0.00112903225806452 -Nordsjo 811 110 Poisson TRUE Sild2022 38427 0.589628784636336 0.00806451612903226 -Nordsjo 811 110 Poisson TRUE Sild2022 38409 0.162691408611336 0.00161290322580645 -Nordsjo 811 110 Poisson TRUE Sild2022 38424 0.347296770955766 0.00387096774193548 -Nordsjo 811 110 Poisson TRUE Sild2022 38421 0.116853084910627 0.00112903225806452 -Nordsjo 811 110 Poisson TRUE Sild2022 38430 0.116853084910627 0.00112903225806452 -Nordsjo 811 110 Poisson TRUE Sild2022 38426 0.233906828422691 0.00241935483870968 -Nordsjo 811 110 Poisson TRUE Sild2022 38407 0.068529393639208 0.000645161290322581 -Nordsjo 811 110 Poisson TRUE Sild2022 38418 0.229685923270579 0.00236952419354839 -Nordsjo 811 110 Poisson TRUE Sild2022 38428 0.143125007547176 0.00140322580645161 -Nordsjo 811 110 Poisson TRUE Sild2022 38420 0.0425447856855458 0.000395161290322581 -Nordsjo 811 110 Poisson TRUE Sild2022 38423 0.463520590962966 0.00564516129032258 -Nordsjo 811 110 Poisson TRUE Sild2022 38410 0.267034025829916 0.00282016129032258 -Nordsjo 811 110 Poisson TRUE Sild2022 38425 0.0930100890906527 0.000887096774193548 -Nordsjo 811 110 Poisson TRUE Sild2022 0.199083079634323 0.00201612903225806 -Nordsjo 811 110 Poisson TRUE Sild2022 0.1700984890555 0.00169354838709677 -Nordsjo 811 110 Poisson TRUE Sild2022 0.1700984890555 0.00169354838709677 -Nordsjo 811 110 Poisson TRUE Sild2022 0.174511532419619 0.00174193548387097 -Nordsjo 811 110 Poisson TRUE Sild2022 0.202634883401628 0.00205645161290323 -Nordsjo 811 110 Poisson TRUE Sild2022 0.068529393639208 0.000645161290322581 -Nordsjo 811 110 Poisson TRUE Sild2022 0.132402285228299 0.00129032258064516 -Nordsjo 811 110 Poisson TRUE Sild2022 0.101027788104015 0.000967741935483871 -Nordsjo 811 110 Poisson TRUE Sild2022 0.0808504995083754 0.000766129032258065 -Nordsjo 811 110 Poisson TRUE Sild2022 0.140074844132902 0.00137096774193548 -Nordsjo 811 110 Poisson TRUE Sild2022 0.0552064585338686 0.000516129032258065 -Nordsjo 811 110 Poisson TRUE Sild2022 0.112922833036567 0.00108870967741935 -Nordsjo 811 110 Poisson TRUE Sild2022 0.386753608156008 0.00443548387096774 -Nordsjo 811 110 Poisson TRUE Sild2022 0.413480846839946 0.00483870967741936 -Nordsjo 811 110 Poisson TRUE Sild2022 0.247412405326388 0.00258064516129032 -Nordsjo 811 110 Poisson TRUE Sild2022 0.0518463272604551 0.000483870967741936 +Stratum N n SelectionMethod FrameDescription Order SamplingUnitId InclusionProbability SelectionProbability RelativeSelectionProbability SelectionDescription +Nordsjo 811 110 Poisson Sild2022 38401 0.213196915139625 0.00217741935483871 0.00217741935483871 +Nordsjo 811 110 Poisson Sild2022 38433 0.247412405326388 0.00258064516129032 0.00258064516129032 +Nordsjo 811 110 Poisson Sild2022 38440 0.1700984890555 0.00169354838709677 0.00169354838709677 +Nordsjo 811 110 Poisson Sild2022 38445 0.108975250145035 0.00104838709677419 0.00104838709677419 +Nordsjo 811 110 Poisson Sild2022 38438 0.641175307024414 0.0092741935483871 0.0092741935483871 +Nordsjo 811 110 Poisson Sild2022 38441 0.233906828422691 0.00241935483870968 0.00241935483870968 +Nordsjo 811 110 Poisson Sild2022 38448 0.267229615158194 0.00282258064516129 0.00282258064516129 +Nordsjo 811 110 Poisson Sild2022 38403 0.0767615148687669 0.000725806451612903 0.000725806451612903 +Nordsjo 811 110 Poisson Sild2022 38435 0.143125007547176 0.00140322580645161 0.00140322580645161 +Nordsjo 811 110 Poisson Sild2022 38436 0.147680162555427 0.00145161290322581 0.00145161290322581 +Nordsjo 811 110 Poisson Sild2022 38443 0.0182839435244132 0.000167741935483871 0.000167741935483871 +Nordsjo 811 110 Poisson Sild2022 38446 0.267229615158194 0.00282258064516129 0.00282258064516129 +Nordsjo 811 110 Poisson Sild2022 38447 0.247412405326388 0.00258064516129032 0.00258064516129032 +Nordsjo 811 110 Poisson Sild2022 38402 0.108975250145035 0.00104838709677419 0.00104838709677419 +Nordsjo 811 110 Poisson Sild2022 38434 0.220161129326957 0.00225806451612903 0.00225806451612903 +Nordsjo 811 110 Poisson Sild2022 38442 0.068529393639208 0.000645161290322581 0.000645161290322581 +Nordsjo 811 110 Poisson Sild2022 38444 0.147680162555427 0.00145161290322581 0.00145161290322581 +Nordsjo 811 110 Poisson Sild2022 38437 0.147680162555427 0.00145161290322581 0.00145161290322581 +Nordsjo 811 110 Poisson Sild2022 38439 0.311481476435603 0.00338709677419355 0.00338709677419355 +Nordsjo 811 110 Poisson Sild2022 38431 0.0889747087440887 0.000846774193548387 0.000846774193548387 +Nordsjo 811 110 Poisson Sild2022 38415 0.136247006433322 0.00133064516129032 0.00133064516129032 +Nordsjo 811 110 Poisson Sild2022 38406 0.097027756531011 0.00092741935483871 0.00092741935483871 +Nordsjo 811 110 Poisson Sild2022 38417 0.101027788104015 0.000967741935483871 0.000967741935483871 +Nordsjo 811 110 Poisson Sild2022 38419 0.0849215381190344 0.000806451612903226 0.000806451612903226 +Nordsjo 811 110 Poisson Sild2022 38408 0.184718409696934 0.00185483870967742 0.00185483870967742 +Nordsjo 811 110 Poisson Sild2022 38412 0.216686691440281 0.00221774193548387 0.00221774193548387 +Nordsjo 811 110 Poisson Sild2022 38422 0.0518463272604551 0.000483870967741936 0.000483870967741936 +Nordsjo 811 110 Poisson Sild2022 38432 0.162691408611336 0.00161290322580645 0.00161290322580645 +Nordsjo 811 110 Poisson Sild2022 38429 0.338519898362406 0.00375 0.00375 +Nordsjo 811 110 Poisson Sild2022 38404 0.162691408611336 0.00161290322580645 0.00161290322580645 +Nordsjo 811 110 Poisson Sild2022 38413 0.097027756531011 0.00092741935483871 0.00092741935483871 +Nordsjo 811 110 Poisson Sild2022 38414 0.240689370659604 0.0025 0.0025 +Nordsjo 811 110 Poisson Sild2022 38405 0.267229615158194 0.00282258064516129 0.00282258064516129 +Nordsjo 811 110 Poisson Sild2022 38411 0.458713264018625 0.00556451612903226 0.00556451612903226 +Nordsjo 811 110 Poisson Sild2022 38416 0.116853084910627 0.00112903225806452 0.00112903225806452 +Nordsjo 811 110 Poisson Sild2022 38427 0.589628784636336 0.00806451612903226 0.00806451612903226 +Nordsjo 811 110 Poisson Sild2022 38409 0.162691408611336 0.00161290322580645 0.00161290322580645 +Nordsjo 811 110 Poisson Sild2022 38424 0.347296770955766 0.00387096774193548 0.00387096774193548 +Nordsjo 811 110 Poisson Sild2022 38421 0.116853084910627 0.00112903225806452 0.00112903225806452 +Nordsjo 811 110 Poisson Sild2022 38430 0.116853084910627 0.00112903225806452 0.00112903225806452 +Nordsjo 811 110 Poisson Sild2022 38426 0.233906828422691 0.00241935483870968 0.00241935483870968 +Nordsjo 811 110 Poisson Sild2022 38407 0.068529393639208 0.000645161290322581 0.000645161290322581 +Nordsjo 811 110 Poisson Sild2022 38418 0.229685923270579 0.00236952419354839 0.00236952419354839 +Nordsjo 811 110 Poisson Sild2022 38428 0.143125007547176 0.00140322580645161 0.00140322580645161 +Nordsjo 811 110 Poisson Sild2022 38420 0.0425447856855458 0.000395161290322581 0.000395161290322581 +Nordsjo 811 110 Poisson Sild2022 38423 0.463520590962966 0.00564516129032258 0.00564516129032258 +Nordsjo 811 110 Poisson Sild2022 38410 0.267034025829916 0.00282016129032258 0.00282016129032258 +Nordsjo 811 110 Poisson Sild2022 38425 0.0930100890906527 0.000887096774193548 0.000887096774193548 +Nordsjo 811 110 Poisson Sild2022 0.199083079634323 0.00201612903225806 0.00201612903225806 +Nordsjo 811 110 Poisson Sild2022 0.1700984890555 0.00169354838709677 0.00169354838709677 +Nordsjo 811 110 Poisson Sild2022 0.1700984890555 0.00169354838709677 0.00169354838709677 +Nordsjo 811 110 Poisson Sild2022 0.174511532419619 0.00174193548387097 0.00174193548387097 +Nordsjo 811 110 Poisson Sild2022 0.202634883401628 0.00205645161290323 0.00205645161290323 +Nordsjo 811 110 Poisson Sild2022 0.068529393639208 0.000645161290322581 0.000645161290322581 +Nordsjo 811 110 Poisson Sild2022 0.132402285228299 0.00129032258064516 0.00129032258064516 +Nordsjo 811 110 Poisson Sild2022 0.101027788104015 0.000967741935483871 0.000967741935483871 +Nordsjo 811 110 Poisson Sild2022 0.0808504995083754 0.000766129032258065 0.000766129032258065 +Nordsjo 811 110 Poisson Sild2022 0.140074844132902 0.00137096774193548 0.00137096774193548 +Nordsjo 811 110 Poisson Sild2022 0.0552064585338686 0.000516129032258065 0.000516129032258065 +Nordsjo 811 110 Poisson Sild2022 0.112922833036567 0.00108870967741935 0.00108870967741935 +Nordsjo 811 110 Poisson Sild2022 0.386753608156008 0.00443548387096774 0.00443548387096774 +Nordsjo 811 110 Poisson Sild2022 0.413480846839946 0.00483870967741936 0.00483870967741936 +Nordsjo 811 110 Poisson Sild2022 0.247412405326388 0.00258064516129032 0.00258064516129032 +Nordsjo 811 110 Poisson Sild2022 0.0518463272604551 0.000483870967741936 0.000483870967741936 diff --git a/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R b/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R index 27edf203..a1e3eab2 100644 --- a/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R +++ b/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R @@ -3,4 +3,20 @@ designParamsFile <- system.file("testresources", "lotteryParameters", "lotteryDe #regular read: designParams <- RstoxFDA::DefineSamplingDesignParameters(NULL, "ResourceFile", designParamsFile) expect_equal(nrow(designParams$selectionTable), 64) -expect_equal(nrow(designParams$sampleTable), 1) \ No newline at end of file +expect_equal(nrow(designParams$sampleTable), 1) +expect_equal(ncol(designParams$stratificationVariables), 1) +expect_equal(nrow(designParams$stratificationVariables), 1) + +#define from data +suppressWarnings(StoxBioticData <- RstoxData::StoxBiotic(RstoxData::ReadBiotic(system.file("testresources", "biotic_v3_example.xml", package="RstoxFDA")))) +designParamsSB <- RstoxFDA::DefineSamplingDesignParameters(NULL, "AdHocStoxBiotic", StoxBioticData=StoxBioticData, SamplingUnitId = "Individual", StratificationColumns = c("SpeciesCategoryKey")) + +#compare names of output with stratification variables to output without + +expect_true(all(names(designParamsSB$sampleTable) == names(designParams$sampleTable))) +expect_true(all(names(designParamsSB$selectionTable) == names(designParams$selectionTable))) +browser() +expect_equal(nrow(designParamsSB$selectionTable), 75) +expect_equal(nrow(designParamsSB$sampleTable), 2) +expect_equal(ncol(designParamsSB$stratificationVariables), 2) +expect_equal(nrow(designParamsSB$stratificationVariables), 2) diff --git a/man/DefineSamplingDesignParameters.Rd b/man/DefineSamplingDesignParameters.Rd index f2d29a1f..78004732 100644 --- a/man/DefineSamplingDesignParameters.Rd +++ b/man/DefineSamplingDesignParameters.Rd @@ -11,7 +11,6 @@ DefineSamplingDesignParameters( StoxBioticData, SamplingUnitId, StratificationColumns, - OrderColumn, UseProcessData = F ) } @@ -22,6 +21,12 @@ DefineSamplingDesignParameters( \item{FileName}{path to resource file} +\item{StoxBioticData}{\code{\link[RstoxData]{StoxBioticData}} Sample data to construct design parameters from} + +\item{SamplingUnitId}{name of column in 'StoxBioticData' that identifies the sampling unit the design is constructed for.} + +\item{StratificationColumns}{name of any column (at the same table as 'SamplingUnitId') that are to be used to define Strata for sampling.} + \item{UseProcessData}{If TRUE, bypasses execution of function and returns existing 'processData'} } \value{ @@ -38,7 +43,10 @@ The conditions listed for the variables in \code{\link[RstoxFDA]{SamplingDesignP execution halts with error if any are violated. The DefinitionMethod 'AdHocStoxBiotic' constructs Sampling Design Parameters from data, -assuming equal probability non-finite sampling with fixed sample size, selection without replacement and complete response. +assuming equal probability sampling with fixed sample size, selection with replacement and complete response. +This is a reasonable approximation if within-strata sampling is approximately simple random selections, +non-response is believed to be at random, and only a small fraction of the strata is sampled, +so that with and without replacement sampling probabilities are approximately equal. } \concept{Analytical estimation} \concept{StoX-functions} diff --git a/man/SamplingDesignParametersData.Rd b/man/SamplingDesignParametersData.Rd index 2eb33b89..02602ef4 100644 --- a/man/SamplingDesignParametersData.Rd +++ b/man/SamplingDesignParametersData.Rd @@ -17,24 +17,24 @@ Encodes key information about the selection of a sampling unit, used in analytic \item{N}{Optional, num: The total number of selection units in Stratum} \item{n}{Optional, num: The number of selection units selected from the Stratum} \item{SelectionMethod}{Mandatory, chr: 'Poission', 'FSWR' or 'FSWOR'. The manner of selection for use in bootstrap or inference of inclusionProbabilities, selectionProbabilites, co-inclusion probabilities or co-selection probabilities.} - \item{Finite}{Mandatory, logical: Whether selection is from finite population. That is whether sampling probabilites describes relative or absolute statistical weights.} \item{FrameDescription}{Optional, chr: Free text field describing the sampling frame.} - \item{...}{mandatory, chr: Additional columns are stratification variables. These are mandatory if present (NAs not allowed). They provided are for linking with other data. They must be specified in 'stratificationVariables' and their combination must uniquely identify the Stratum.} } The selectionTable encodes information abut the selection of sampling units for sampling: \describe{ - \item{Stratum}{Mandatory: Identifies the stratum the sampling unit is taken from.} - \item{Order}{Optional: Identifes the order of seleciton. May be necessary for inference when selections are not independent (e.g. FSWOR)} - \item{SamplingUnitId}{Optional: Identifes sampling unit. NA encodes non-response} - \item{InclusionProbability}{Optional: The inclusion probability of the sampling unit} - \item{SelectionProbability}{Optional: The selection probability of the sampling unit} - \item{SelectionDescription}{Optional: Free text description of sampling unit.} + \item{Stratum}{Mandatory, chr: Identifies the stratum the sampling unit is taken from.} + \item{Order}{Optional, num: Identifes the order of seleciton. May be necessary for inference when selections are not independent (e.g. FSWOR)} + \item{SamplingUnitId}{Optional, chr: Identifes sampling unit. NA encodes non-response} + \item{InclusionProbability}{Optional, num: The inclusion probability of the sampling unit} + \item{SelectionProbability}{Optional, num: The selection probability of the sampling unit} + \item{RelativeSelectionProbability}{Optional, num: The relative selection probability of the sampling unit} + \item{SelectionDescription}{Optional, chr: Free text description of sampling unit.} } - The stratificationVariables table encodes information about which columns in the sampleTable are stratification variables: + The stratificationVariables table encodes information about which columns in the sampleTable are stratification variables (if any): \describe{ - \item{stratificationVariables}{columns in the sampleTable that are stratification variables.} + \item{Stratum}{Mandatory, chr: Identifies the stratum. In addition the Stratum is identified by the combination of all other columns on this table.} + \item{...}{Mandatory if present (may not contain NAs), chr: Additional columns in the sampleTable that are stratification variables.} } Optional columns may be NA. @@ -46,8 +46,9 @@ The selection methods available for 'SelectionMethod' are explained here: \item{FSWOR}{Fixed sample size with replacement. A random selection of a fixed sample size 'n' is chosen without replacement. Order of selection should be specified in the 'selectionTable'} } -The selectionProbability is defined as: The probability of selecting the sampling unit in a random draw from the population. -The inclusionProbability is defined as: The probability of the sampling unit being included in the sample. +The SelectionProbability is defined as: The probability of selecting the sampling unit in a random draw from the population. +The RelativeSelectionProbability: A value proportional to the SelectionProbability. Within a stratum, SelectionProbability=c*RelativeSelectionProbability, with c possibly unknown. +The InclusionProbability is defined as: The probability of the sampling unit being included in the sample. } \concept{Analytical estimation} \concept{Data types} From ee8aa62d6a5aed3a8c61dcc39131cbeeaaaf8a8a Mon Sep 17 00:00:00 2001 From: Edvin Fuglebakk Date: Tue, 17 Oct 2023 23:10:27 +0200 Subject: [PATCH 07/24] Removed presumable redundant brew update --- .github/workflows/check-full.yaml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.github/workflows/check-full.yaml b/.github/workflows/check-full.yaml index a9703083..8288f32d 100644 --- a/.github/workflows/check-full.yaml +++ b/.github/workflows/check-full.yaml @@ -85,12 +85,6 @@ jobs: - uses: r-lib/actions/setup-pandoc@v2 - - name: Anticipate Bintray shutdown - if: runner.os == 'macOS' - run: | - brew update - shell: bash - - name: Query dependencies run: | install.packages('remotes') From d4fb2192291a279c813f9cce80e5a9478eeca1d6 Mon Sep 17 00:00:00 2001 From: Edvin Fuglebakk Date: Tue, 31 Oct 2023 21:21:19 +0100 Subject: [PATCH 08/24] renamed data type --- NAMESPACE | 2 +- R/RecaFormatChecks.R | 1 - R/StoxAnalyticalBaselineFunctions.R | 30 ++-- R/StoxDataTypes.R | 141 ++++++++++++++++-- .../test-StoxAnalyticalBaselineFunctions.R | 28 ++-- ... => DefineMultiStageSamplingParameters.Rd} | 18 +-- man/IndividualSamplingParametersData.Rd | 57 +++++++ ...Rd => MultiStageSamplingParametersData.Rd} | 19 ++- 8 files changed, 237 insertions(+), 59 deletions(-) rename man/{DefineSamplingDesignParameters.Rd => DefineMultiStageSamplingParameters.Rd} (69%) create mode 100644 man/IndividualSamplingParametersData.Rd rename man/{SamplingDesignParametersData.Rd => MultiStageSamplingParametersData.Rd} (81%) diff --git a/NAMESPACE b/NAMESPACE index 8d0f02bf..3e7fa5da 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,8 +13,8 @@ export(DefineAgeErrorMatrix) export(DefineAreaPosition) export(DefineCarNeighbours) export(DefineLengthConversionParameters) +export(DefineMultiStageSamplingParameters) export(DefinePeriod) -export(DefineSamplingDesignParameters) export(DefineStockSplittingParameters) export(DefineWeightConversionFactor) export(FilterAgeLengthOutliersStoxBiotic) diff --git a/R/RecaFormatChecks.R b/R/RecaFormatChecks.R index cdffe167..f82bc89c 100644 --- a/R/RecaFormatChecks.R +++ b/R/RecaFormatChecks.R @@ -185,7 +185,6 @@ checkWeightLength<-function(weightlength, landings){ checkCovariateConsistency <- function(modelobj, landingscov){ inlandings <- rownames(modelobj$info[modelobj$info[,"in.landings"]==1,]) if (any(!(inlandings %in% names(landingscov)))){ - browser() stop("some covariates labeled as in.landings are not found in corresponding covariate matrix in landings") } diff --git a/R/StoxAnalyticalBaselineFunctions.R b/R/StoxAnalyticalBaselineFunctions.R index e10bba7b..c3dac585 100644 --- a/R/StoxAnalyticalBaselineFunctions.R +++ b/R/StoxAnalyticalBaselineFunctions.R @@ -1,6 +1,6 @@ #' Construct design parameters assuming FSWOR, non-finite, equal prob, potentially stratified #' @noRd -assumeDesignParametersStoxBiotic <- function(StoxBioticData, SamplingUnitId, StratificationColumns=c(), OrderColumn=NULL){ +assumeDesignParametersStoxBiotic <- function(StoxBioticData, SamplingUnitId, StratificationColumns=c()){ targetTable <- NULL for (n in names(StoxBioticData)){ if (SamplingUnitId %in% names(StoxBioticData[[n]])){ @@ -41,9 +41,9 @@ assumeDesignParametersStoxBiotic <- function(StoxBioticData, SamplingUnitId, Str stratificationTable <- stratificationTable[!duplicated(stratificationTable$Stratum),] designParameters <- list() - designParameters$sampleTable <- sampleTable - designParameters$selectionTable <- selectionTable - designParameters$stratificationVariables <- stratificationTable + designParameters$SampleTable <- sampleTable + designParameters$SelectionTable <- selectionTable + designParameters$StratificationVariables <- stratificationTable return(designParameters) @@ -118,21 +118,21 @@ parseDesignParameters <- function(filename){ } designParameters <- list() - designParameters$sampleTable <- sampleTable - designParameters$selectionTable <- selectionTable - designParameters$stratificationVariables <- stratificationTable + designParameters$SampleTable <- sampleTable + designParameters$SelectionTable <- selectionTable + designParameters$StratificationVariables <- stratificationTable return(designParameters) } #' Define Sampling Design Parameters #' @description -#' Define sampling design parameters for use in analytical estimation. +#' Define sampling design parameters for intermediate sampling units in multi-stage sampling. #' @details #' The DefintionMethod 'ResourceFile' reads design parameters from a tab delimited file with headers corresponding to those listed in -#' \code{\link[RstoxFDA]{SamplingDesignParametersData}}. The data is provided as one table, so that the information in 'sampleTable' is repeated for each entry in 'selectionTable'. -#' Any columns not named in \code{\link[RstoxFDA]{SamplingDesignParametersData}} are assumed to be stratification variables. -#' The conditions listed for the variables in \code{\link[RstoxFDA]{SamplingDesignParametersData}} are checked upon reading the data, and +#' \code{\link[RstoxFDA]{MultiStageSamplingParametersData}}. The data is provided as one table, so that the information in 'sampleTable' is repeated for each entry in 'selectionTable'. +#' Any columns not named in \code{\link[RstoxFDA]{MultiStageSamplingParametersData}} are assumed to be stratification variables. +#' The conditions listed for the variables in \code{\link[RstoxFDA]{MultiStageSamplingParametersData}} are checked upon reading the data, and #' execution halts with error if any are violated. #' #' The DefinitionMethod 'AdHocStoxBiotic' constructs Sampling Design Parameters from data, @@ -140,19 +140,19 @@ parseDesignParameters <- function(filename){ #' This is a reasonable approximation if within-strata sampling is approximately simple random selections, #' non-response is believed to be at random, and only a small fraction of the strata is sampled, #' so that with and without replacement sampling probabilities are approximately equal. -#' @param processData \code{\link[RstoxFDA]{SamplingDesignParametersData}} as returned from this function. +#' @param processData \code{\link[RstoxFDA]{MultiStageSamplingParametersData}} as returned from this function. #' @param DefinitionMethod 'ResourceFile' or 'AdHocStoxBiotic' #' @param FileName path to resource file #' @param StoxBioticData \code{\link[RstoxData]{StoxBioticData}} Sample data to construct design parameters from #' @param SamplingUnitId name of column in 'StoxBioticData' that identifies the sampling unit the design is constructed for. #' @param StratificationColumns name of any column (at the same table as 'SamplingUnitId') that are to be used to define Strata for sampling. #' @param UseProcessData If TRUE, bypasses execution of function and returns existing 'processData' -#' @return \code{\link[RstoxFDA]{SamplingDesignParametersData}} +#' @return \code{\link[RstoxFDA]{MultiStageSamplingParametersData}} #' @export #' @concept StoX-functions #' @concept Analytical estimation #' @md -DefineSamplingDesignParameters <- function(processData, DefinitionMethod=c("ResourceFile", "AdHocStoxBiotic"), FileName=character(), StoxBioticData, SamplingUnitId, StratificationColumns, UseProcessData=F){ +DefineMultiStageSamplingParameters <- function(processData, DefinitionMethod=c("ResourceFile", "AdHocStoxBiotic"), FileName=character(), StoxBioticData, SamplingUnitId, StratificationColumns, UseProcessData=F){ if (UseProcessData){ return(processData) @@ -164,7 +164,7 @@ DefineSamplingDesignParameters <- function(processData, DefinitionMethod=c("Reso return(parseDesignParameters(FileName)) } if (DefinitionMethod == "AdHocStoxBiotic"){ - return(assumeDesignParametersStoxBiotic(StoxBioticData, SamplingUnitId, StratificationColumns, OrderColumn)) + return(assumeDesignParametersStoxBiotic(StoxBioticData, SamplingUnitId, StratificationColumns)) } } diff --git a/R/StoxDataTypes.R b/R/StoxDataTypes.R index 507ddf7b..9ed4d46c 100644 --- a/R/StoxDataTypes.R +++ b/R/StoxDataTypes.R @@ -16,16 +16,15 @@ is.Date <- function(date){ return(FALSE) } -#' Sampling Design Parameters +#' Multi-stage Sampling Design Parameters #' -#' Sampling parameters for selection of a sampling unit +#' Sampling parameters for selection of a sampling unit in a multi-stage sampling design #' #' @details -#' Encodes key information about the selection of a sampling unit, used in analytical design based estimation. -#' The selection encoded may be of a obesvered parameter from an individual, or of some intermediate selection in -#' hierarchical sampling (e.g. a haul, a vessel, etc.). Information is encoded in three tables. +#' Encodes information about the selection of an intermediate sampling unit in multi-stage sampling, used in analytical design based estimation. +#' Information is encoded in three tables. #' -#' The sampleTable encodes information about the sample of sampling units: +#' The SampleTable encodes information about the sample of sampling units: #' \describe{ #' \item{Stratum}{Mandatory, chr: Identifies the stratum the sample is taken from. Treat unstratified sample as single-stratum sampling (provide only one stratum.} #' \item{N}{Optional, num: The total number of selection units in Stratum} @@ -34,7 +33,7 @@ is.Date <- function(date){ #' \item{FrameDescription}{Optional, chr: Free text field describing the sampling frame.} #' } #' -#' The selectionTable encodes information abut the selection of sampling units for sampling: +#' The SelectionTable encodes information abut the selection of sampling units for sampling: #' \describe{ #' \item{Stratum}{Mandatory, chr: Identifies the stratum the sampling unit is taken from.} #' \item{Order}{Optional, num: Identifes the order of seleciton. May be necessary for inference when selections are not independent (e.g. FSWOR)} @@ -45,7 +44,7 @@ is.Date <- function(date){ #' \item{SelectionDescription}{Optional, chr: Free text description of sampling unit.} #' } #' -#' The stratificationVariables table encodes information about which columns in the sampleTable are stratification variables (if any): +#' The StratificationVariables table encodes information about which columns in the sampleTable are stratification variables (if any): #' \describe{ #' \item{Stratum}{Mandatory, chr: Identifies the stratum. In addition the Stratum is identified by the combination of all other columns on this table.} #' \item{...}{Mandatory if present (may not contain NAs), chr: Additional columns in the sampleTable that are stratification variables.} @@ -64,12 +63,136 @@ is.Date <- function(date){ #' The RelativeSelectionProbability: A value proportional to the SelectionProbability. Within a stratum, SelectionProbability=c*RelativeSelectionProbability, with c possibly unknown. #' The InclusionProbability is defined as: The probability of the sampling unit being included in the sample. #' -#' @name SamplingDesignParametersData +#' @name MultiStageSamplingParametersData #' @concept Data types #' @concept Analytical estimation #' NULL +#' Check if table is correctly formatted Multi Stage Sampling Parameters Data +#' @param table \code{\link[RstoxFDA]{MultiStageSamplingParametersData}} +#' @return validity +#' @concept Data types +#' @noRd +is.MultiStageSamplingParametersData <- function(MultiStageSamplingParametersData){ + if (!is.list(MultiStageSamplingParametersData)){ + return(FALSE) + } + if (!all(sapply(MultiStageSamplingParametersData, data.table::is.data.table))){ + return(FALSE) + } + if (!all(c("SampleTable", "SelectionTable", "StratificationVariables") %in% names(MultiStageSamplingParametersData))){ + return(FALSE) + } + if (!all(c("Stratum", "N", "n", "SelectionMethod", "FrameDescription") %in% names(MultiStageSamplingParametersData$SampleTable))){ + return(FALSE) + } + if (!all(c("Stratum", "Order", "SamplingUnitId", "InclusionProbability", "SelectionProbability", "RelativeSelectionProbability", "SelectionDescription") %in% names(MultiStageSamplingParametersData$SelectionTable))){ + return(FALSE) + } + if (!all(c("Stratum") %in% names(MultiStageSamplingParametersData$StratificationVariables))){ + return(FALSE) + } + if (any(duplicated(MultiStageSamplingParametersData$SampleTable$Stratum))){ + return(FALSE) + } + #test that mandatory fields are not NA. + if (any(is.na(MultiStageSamplingParametersData$SampleTable$Stratum))){ + return(FALSE) + } + if (any(is.na(MultiStageSamplingParametersData$SampleTable$SelectionMethod))){ + return(FALSE) + } + if (any(is.na(MultiStageSamplingParametersData$SelectionTable$Stratum))){ + return(FALSE) + } + if (any(is.na(MultiStageSamplingParametersData$StratificationVariables$Stratum))){ + return(FALSE) + } + for (n in names(MultiStageSamplingParametersData$StratificationVariables)){ + if (any(is.na(MultiStageSamplingParametersData$StratificationVariables[[n]]))){ + return(FALSE) + } + } + + if (ncol(MultiStageSamplingParametersData$StratificationVariables) > 1){ + stratificationVariableStrings <- apply(MultiStageSamplingParametersData$StratificationVariables[,.SD, .SDcol=names(MultiStageSamplingParametersData$StratificationVariables[names(MultiStageSamplingParametersData$StratificationVariables)!="Stratum"])], 1, paste, collapse="/") + duplicatedStrata <- MultiStageSamplingParametersData$StratificationVariables$Stratum[duplicated(stratificationVariableStrings)] + + if (length(duplicatedStrata)>0){ + return(FALSE) + } + } + return(TRUE) +} + +#' Individual Sampling Design Parameters +#' +#' Sampling parameters for selection of a sampling of individuals +#' +#' @details +#' Encodes information about the selection of a sample of observations from individuals, used in analytical design based estimation. +#' +#' The SampleTable encodes information about the sample of sampling units: +#' \describe{ +#' \item{Stratum}{Mandatory, chr: Identifies the stratum the sample is taken from. Treat unstratified sample as single-stratum sampling (provide only one stratum.} +#' \item{N}{Optional, num: The total number of individuals in Stratum} +#' \item{n}{Optional, num: The number of individuals selected from the Stratum} +#' \item{SelectionMethod}{Mandatory, chr: 'Poission', 'FSWR' or 'FSWOR'. The manner of selection for use in bootstrap or inference of inclusionProbabilities, selectionProbabilites, co-inclusion probabilities or co-selection probabilities.} +#' \item{FrameDescription}{Optional, chr: Free text field describing the sampling frame.} +#' } +#' +#' The SelectionTable encodes information abut the selection of sampling units for sampling: +#' \describe{ +#' \item{Stratum}{Mandatory, chr: Identifies the stratum the individual is taken from.} +#' \item{Order}{Optional, num: Identifes the order of seleciton. May be necessary for inference when selections are not independent (e.g. FSWOR)} +#' \item{IndividualId}{Optional, chr: Identifes individual. NA encodes non-response / observation failure} +#' \item{InclusionProbability}{Optional, num: The inclusion probability of the individual with respect to observing the parameters in the 'observationVariables' table} +#' \item{SelectionProbability}{Optional, num: The selection probability of the individual with respect to observing the parameters in the 'observationVariables' table} +#' \item{RelativeSelectionProbability}{Optional, num: The relative selection probability of the individual with respect to observing the parameters in the 'observationVariables' table} +#' \item{SelectionDescription}{Optional, chr: Free text description of sampling unit.} +#' } +#' +#' The ObservationVariables table specifies which set of variables the design is specified for: +#' \describe{ +#' \item{Parameter}{Mandatory, chr: Name of parameter selected for observation.} +#' } +#' +#' The StratificationVariables table encodes information about which columns in the sampleTable are stratification variables (if any): +#' \describe{ +#' \item{Stratum}{Mandatory, chr: Identifies the stratum. In addition the Stratum is identified by the combination of all other columns on this table.} +#' \item{...}{Mandatory if present (may not contain NAs), chr: Additional columns in the sampleTable that are stratification variables.} +#' } +#' +#' Optional columns may be NA. +#' +#' The selection methods available for 'SelectionMethod' are explained here: +#' \describe{ +#' \item{Poission}{Poission sampling. Selection is performed randomly without replacement, and each selection is performed individually. Sample size is not fixed, and 'n' represents the expected sample size.} +#' \item{FSWR}{Fixed sample size with replacement. A random selection of a fixed sample size 'n' is chosen with replacement} +#' \item{FSWOR}{Fixed sample size with replacement. A random selection of a fixed sample size 'n' is chosen without replacement. Order of selection should be specified in the 'selectionTable'} +#' } +#' +#' The SelectionProbability is defined as: The probability of selecting the sampling unit in a random draw from the population. +#' The RelativeSelectionProbability: A value proportional to the SelectionProbability. Within a stratum, SelectionProbability=c*RelativeSelectionProbability, with c possibly unknown. +#' The InclusionProbability is defined as: The probability of the sampling unit being included in the sample. +#' +#' @name IndividualSamplingParametersData +#' @concept Data types +#' @concept Analytical estimation +#' +NULL + +#' Check if table is correctly formatted Individual Sampling Parameters Data +#' @param table \code{\link[RstoxFDA]{IndividualSamplingParametersData}} +#' @return validity +#' @concept Data types +#' @noRd +is.IndividualSamplingParametersData <- function(IndividualSamplingParametersData){ + stop("Not Implemented") +} + + #' Trip Partition #' #' Partitioning of catch from a trip. diff --git a/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R b/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R index a1e3eab2..b0a45aa1 100644 --- a/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R +++ b/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R @@ -1,22 +1,22 @@ designParamsFile <- system.file("testresources", "lotteryParameters", "lotteryDesignNSH.txt", package="RstoxFDA") #regular read: -designParams <- RstoxFDA::DefineSamplingDesignParameters(NULL, "ResourceFile", designParamsFile) -expect_equal(nrow(designParams$selectionTable), 64) -expect_equal(nrow(designParams$sampleTable), 1) -expect_equal(ncol(designParams$stratificationVariables), 1) -expect_equal(nrow(designParams$stratificationVariables), 1) +designParams <- RstoxFDA::DefineMultiStageSamplingParameters(NULL, "ResourceFile", designParamsFile) +expect_true(RstoxFDA:::is.MultiStageSamplingParametersData(designParams)) +expect_equal(nrow(designParams$SelectionTable), 64) +expect_equal(nrow(designParams$SampleTable), 1) +expect_equal(ncol(designParams$StratificationVariables), 1) +expect_equal(nrow(designParams$StratificationVariables), 1) #define from data suppressWarnings(StoxBioticData <- RstoxData::StoxBiotic(RstoxData::ReadBiotic(system.file("testresources", "biotic_v3_example.xml", package="RstoxFDA")))) -designParamsSB <- RstoxFDA::DefineSamplingDesignParameters(NULL, "AdHocStoxBiotic", StoxBioticData=StoxBioticData, SamplingUnitId = "Individual", StratificationColumns = c("SpeciesCategoryKey")) - +designParamsSB <- RstoxFDA::DefineMultiStageSamplingParameters(NULL, "AdHocStoxBiotic", StoxBioticData=StoxBioticData, SamplingUnitId = "Individual", StratificationColumns = c("SpeciesCategoryKey")) +expect_true(RstoxFDA:::is.MultiStageSamplingParametersData(designParamsSB)) #compare names of output with stratification variables to output without -expect_true(all(names(designParamsSB$sampleTable) == names(designParams$sampleTable))) -expect_true(all(names(designParamsSB$selectionTable) == names(designParams$selectionTable))) -browser() -expect_equal(nrow(designParamsSB$selectionTable), 75) -expect_equal(nrow(designParamsSB$sampleTable), 2) -expect_equal(ncol(designParamsSB$stratificationVariables), 2) -expect_equal(nrow(designParamsSB$stratificationVariables), 2) +expect_true(all(names(designParamsSB$SampleTable) == names(designParams$SampleTable))) +expect_true(all(names(designParamsSB$SelectionTable) == names(designParams$SelectionTable))) +expect_equal(nrow(designParamsSB$SelectionTable), 75) +expect_equal(nrow(designParamsSB$SampleTable), 2) +expect_equal(ncol(designParamsSB$StratificationVariables), 2) +expect_equal(nrow(designParamsSB$StratificationVariables), 2) diff --git a/man/DefineSamplingDesignParameters.Rd b/man/DefineMultiStageSamplingParameters.Rd similarity index 69% rename from man/DefineSamplingDesignParameters.Rd rename to man/DefineMultiStageSamplingParameters.Rd index 78004732..75fb2e9e 100644 --- a/man/DefineSamplingDesignParameters.Rd +++ b/man/DefineMultiStageSamplingParameters.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/StoxAnalyticalBaselineFunctions.R -\name{DefineSamplingDesignParameters} -\alias{DefineSamplingDesignParameters} +\name{DefineMultiStageSamplingParameters} +\alias{DefineMultiStageSamplingParameters} \title{Define Sampling Design Parameters} \usage{ -DefineSamplingDesignParameters( +DefineMultiStageSamplingParameters( processData, DefinitionMethod = c("ResourceFile", "AdHocStoxBiotic"), FileName = character(), @@ -15,7 +15,7 @@ DefineSamplingDesignParameters( ) } \arguments{ -\item{processData}{\code{\link[RstoxFDA]{SamplingDesignParametersData}} as returned from this function.} +\item{processData}{\code{\link[RstoxFDA]{MultiStageSamplingParametersData}} as returned from this function.} \item{DefinitionMethod}{'ResourceFile' or 'AdHocStoxBiotic'} @@ -30,16 +30,16 @@ DefineSamplingDesignParameters( \item{UseProcessData}{If TRUE, bypasses execution of function and returns existing 'processData'} } \value{ -\code{\link[RstoxFDA]{SamplingDesignParametersData}} +\code{\link[RstoxFDA]{MultiStageSamplingParametersData}} } \description{ -Define sampling design parameters for use in analytical estimation. +Define sampling design parameters for intermediate sampling units in multi-stage sampling. } \details{ The DefintionMethod 'ResourceFile' reads design parameters from a tab delimited file with headers corresponding to those listed in -\code{\link[RstoxFDA]{SamplingDesignParametersData}}. The data is provided as one table, so that the information in 'sampleTable' is repeated for each entry in 'selectionTable'. -Any columns not named in \code{\link[RstoxFDA]{SamplingDesignParametersData}} are assumed to be stratification variables. -The conditions listed for the variables in \code{\link[RstoxFDA]{SamplingDesignParametersData}} are checked upon reading the data, and +\code{\link[RstoxFDA]{MultiStageSamplingParametersData}}. The data is provided as one table, so that the information in 'sampleTable' is repeated for each entry in 'selectionTable'. +Any columns not named in \code{\link[RstoxFDA]{MultiStageSamplingParametersData}} are assumed to be stratification variables. +The conditions listed for the variables in \code{\link[RstoxFDA]{MultiStageSamplingParametersData}} are checked upon reading the data, and execution halts with error if any are violated. The DefinitionMethod 'AdHocStoxBiotic' constructs Sampling Design Parameters from data, diff --git a/man/IndividualSamplingParametersData.Rd b/man/IndividualSamplingParametersData.Rd new file mode 100644 index 00000000..be4e0087 --- /dev/null +++ b/man/IndividualSamplingParametersData.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/StoxDataTypes.R +\name{IndividualSamplingParametersData} +\alias{IndividualSamplingParametersData} +\title{Individual Sampling Design Parameters} +\description{ +Sampling parameters for selection of a sampling of individuals +} +\details{ +Encodes information about the selection of a sample of observations from individuals, used in analytical design based estimation. + + The SampleTable encodes information about the sample of sampling units: + \describe{ + \item{Stratum}{Mandatory, chr: Identifies the stratum the sample is taken from. Treat unstratified sample as single-stratum sampling (provide only one stratum.} + \item{N}{Optional, num: The total number of individuals in Stratum} + \item{n}{Optional, num: The number of individuals selected from the Stratum} + \item{SelectionMethod}{Mandatory, chr: 'Poission', 'FSWR' or 'FSWOR'. The manner of selection for use in bootstrap or inference of inclusionProbabilities, selectionProbabilites, co-inclusion probabilities or co-selection probabilities.} + \item{FrameDescription}{Optional, chr: Free text field describing the sampling frame.} + } + + The SelectionTable encodes information abut the selection of sampling units for sampling: + \describe{ + \item{Stratum}{Mandatory, chr: Identifies the stratum the individual is taken from.} + \item{Order}{Optional, num: Identifes the order of seleciton. May be necessary for inference when selections are not independent (e.g. FSWOR)} + \item{IndividualId}{Optional, chr: Identifes individual. NA encodes non-response / observation failure} + \item{InclusionProbability}{Optional, num: The inclusion probability of the individual with respect to observing the parameters in the 'observationVariables' table} + \item{SelectionProbability}{Optional, num: The selection probability of the individual with respect to observing the parameters in the 'observationVariables' table} + \item{RelativeSelectionProbability}{Optional, num: The relative selection probability of the individual with respect to observing the parameters in the 'observationVariables' table} + \item{SelectionDescription}{Optional, chr: Free text description of sampling unit.} + } + + The ObservationVariables table specifies which set of variables the design is specified for: + \describe{ + \item{Parameter}{Mandatory, chr: Name of parameter selected for observation.} + } + + The StratificationVariables table encodes information about which columns in the sampleTable are stratification variables (if any): + \describe{ + \item{Stratum}{Mandatory, chr: Identifies the stratum. In addition the Stratum is identified by the combination of all other columns on this table.} + \item{...}{Mandatory if present (may not contain NAs), chr: Additional columns in the sampleTable that are stratification variables.} + } + +Optional columns may be NA. + +The selection methods available for 'SelectionMethod' are explained here: +\describe{ + \item{Poission}{Poission sampling. Selection is performed randomly without replacement, and each selection is performed individually. Sample size is not fixed, and 'n' represents the expected sample size.} + \item{FSWR}{Fixed sample size with replacement. A random selection of a fixed sample size 'n' is chosen with replacement} + \item{FSWOR}{Fixed sample size with replacement. A random selection of a fixed sample size 'n' is chosen without replacement. Order of selection should be specified in the 'selectionTable'} +} + +The SelectionProbability is defined as: The probability of selecting the sampling unit in a random draw from the population. +The RelativeSelectionProbability: A value proportional to the SelectionProbability. Within a stratum, SelectionProbability=c*RelativeSelectionProbability, with c possibly unknown. +The InclusionProbability is defined as: The probability of the sampling unit being included in the sample. +} +\concept{Analytical estimation} +\concept{Data types} diff --git a/man/SamplingDesignParametersData.Rd b/man/MultiStageSamplingParametersData.Rd similarity index 81% rename from man/SamplingDesignParametersData.Rd rename to man/MultiStageSamplingParametersData.Rd index 02602ef4..c9cc193d 100644 --- a/man/SamplingDesignParametersData.Rd +++ b/man/MultiStageSamplingParametersData.Rd @@ -1,17 +1,16 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/StoxDataTypes.R -\name{SamplingDesignParametersData} -\alias{SamplingDesignParametersData} -\title{Sampling Design Parameters} +\name{MultiStageSamplingParametersData} +\alias{MultiStageSamplingParametersData} +\title{Multi-stage Sampling Design Parameters} \description{ -Sampling parameters for selection of a sampling unit +Sampling parameters for selection of a sampling unit in a multi-stage sampling design } \details{ -Encodes key information about the selection of a sampling unit, used in analytical design based estimation. - The selection encoded may be of a obesvered parameter from an individual, or of some intermediate selection in - hierarchical sampling (e.g. a haul, a vessel, etc.). Information is encoded in three tables. +Encodes information about the selection of an intermediate sampling unit in multi-stage sampling, used in analytical design based estimation. + Information is encoded in three tables. - The sampleTable encodes information about the sample of sampling units: + The SampleTable encodes information about the sample of sampling units: \describe{ \item{Stratum}{Mandatory, chr: Identifies the stratum the sample is taken from. Treat unstratified sample as single-stratum sampling (provide only one stratum.} \item{N}{Optional, num: The total number of selection units in Stratum} @@ -20,7 +19,7 @@ Encodes key information about the selection of a sampling unit, used in analytic \item{FrameDescription}{Optional, chr: Free text field describing the sampling frame.} } - The selectionTable encodes information abut the selection of sampling units for sampling: + The SelectionTable encodes information abut the selection of sampling units for sampling: \describe{ \item{Stratum}{Mandatory, chr: Identifies the stratum the sampling unit is taken from.} \item{Order}{Optional, num: Identifes the order of seleciton. May be necessary for inference when selections are not independent (e.g. FSWOR)} @@ -31,7 +30,7 @@ Encodes key information about the selection of a sampling unit, used in analytic \item{SelectionDescription}{Optional, chr: Free text description of sampling unit.} } - The stratificationVariables table encodes information about which columns in the sampleTable are stratification variables (if any): + The StratificationVariables table encodes information about which columns in the sampleTable are stratification variables (if any): \describe{ \item{Stratum}{Mandatory, chr: Identifies the stratum. In addition the Stratum is identified by the combination of all other columns on this table.} \item{...}{Mandatory if present (may not contain NAs), chr: Additional columns in the sampleTable that are stratification variables.} From c20ecbae524a9fc31cca809f5b3bffbd8be4d8b3 Mon Sep 17 00:00:00 2001 From: Edvin Fuglebakk Date: Wed, 1 Nov 2023 22:09:44 +0100 Subject: [PATCH 09/24] Implemented IndividualDesignParameters --- NAMESPACE | 1 + R/StoxAnalyticalBaselineFunctions.R | 155 +++++++++++++++++- R/StoxDataTypes.R | 86 +++++++++- .../test-StoxAnalyticalBaselineFunctions.R | 18 ++ man/DefineIndividualSamplingParameters.Rd | 51 ++++++ man/DefineMultiStageSamplingParameters.Rd | 2 +- man/IndividualSamplingParametersData.Rd | 8 +- 7 files changed, 305 insertions(+), 16 deletions(-) create mode 100644 man/DefineIndividualSamplingParameters.Rd diff --git a/NAMESPACE b/NAMESPACE index 3e7fa5da..16e045ee 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,6 +12,7 @@ export(ConvertWeightBiotic) export(DefineAgeErrorMatrix) export(DefineAreaPosition) export(DefineCarNeighbours) +export(DefineIndividualSamplingParameters) export(DefineLengthConversionParameters) export(DefineMultiStageSamplingParameters) export(DefinePeriod) diff --git a/R/StoxAnalyticalBaselineFunctions.R b/R/StoxAnalyticalBaselineFunctions.R index c3dac585..49a55d21 100644 --- a/R/StoxAnalyticalBaselineFunctions.R +++ b/R/StoxAnalyticalBaselineFunctions.R @@ -125,7 +125,7 @@ parseDesignParameters <- function(filename){ return(designParameters) } -#' Define Sampling Design Parameters +#' Define Multi-Stage Sampling Design Parameters #' @description #' Define sampling design parameters for intermediate sampling units in multi-stage sampling. #' @details @@ -152,7 +152,7 @@ parseDesignParameters <- function(filename){ #' @concept StoX-functions #' @concept Analytical estimation #' @md -DefineMultiStageSamplingParameters <- function(processData, DefinitionMethod=c("ResourceFile", "AdHocStoxBiotic"), FileName=character(), StoxBioticData, SamplingUnitId, StratificationColumns, UseProcessData=F){ +DefineMultiStageSamplingParameters <- function(processData, DefinitionMethod=c("ResourceFile", "AdHocStoxBiotic"), FileName=character(), StoxBioticData, SamplingUnitId=character(), StratificationColumns=character(), UseProcessData=F){ if (UseProcessData){ return(processData) @@ -168,8 +168,157 @@ DefineMultiStageSamplingParameters <- function(processData, DefinitionMethod=c(" } } +#' make IndividualDesignParameters for stratified selection of Individuals +#' if StratificationColumn contains only one column and this is called Stratum, do not add any stratification columns. #' @noRd -AssignIndividualDesignParameters <- function(){} +extractIndividualDesignParametersStoxBiotic <- function(StoxBioticData, StratificationColumns, Parameters){ + + individuals <- RstoxData::mergeByIntersect(StoxBioticData$Individual, StoxBioticData$Sample) + + #check first, so no restrictions need to be put on names of Parameters. + hasParam <- rep(FALSE, nrow(individuals)) + for (p in Parameters){ + hasParam <- hasParam | !is.na(individuals[[p]]) + } + + individuals$Stratum <- apply(individuals[,.SD, .SDcol=StratificationColumns], 1, paste, collapse="/") + StratificationColumns <- StratificationColumns[StratificationColumns!="Stratum"] + + individuals$SampleId <- individuals$Sample + + stratificationTable <- individuals[!duplicated(paste(individuals$SampleId, individuals$Stratum)), .SD,.SDcol=c("SampleId", "Stratum", StratificationColumns)] + observationTable <- data.table::data.table(Parameter=Parameters) + + individuals$Sampled <- hasParam + + stratumTotals <- individuals[,list(totalInStratum=.N, sampledInStratum=sum(Sampled)), by=c("Stratum", "SampleId")] + sampleTotals <- individuals[,list(totalInSample=.N), by=c("SampleId")] + stratumFraction <- merge(stratumTotals, sampleTotals, by="SampleId") + stratumFraction$StratumFraction <- stratumFraction$totalInStratum / stratumFraction$totalInSample + individuals <- merge(individuals, stratumFraction, by=c("SampleId", "Stratum")) + individuals$N <- individuals$CatchFractionNumber*individuals$StratumFraction + individuals$n <- individuals$sampledInStratum + + sampleTable <- individuals[!duplicated(paste(individuals$Stratum, individuals$SampleId)), .SD, .SDcol=c("SampleId", "Stratum", "N", "n")] + sampleTable$SelectionMethod <- "FSWOR" + sampleTable$SampleDescription <- as.character(NA) + + selectedIndividuals <- individuals[individuals$Sampled,] + selectedIndividuals$IndividualId <- selectedIndividuals$Individual + selectedIndividuals$Order <- as.numeric(NA) + selectedIndividuals$InclusionProbability <- as.numeric(NA) #Need order. Could possibly be obtained by convention from StoxBioticiIndividual$IndividualKey, would have to be user choice. + selectedIndividuals$SelectionProbability <- 1/selectedIndividuals$N + selectedIndividuals$RelativeSelectionProbability <- 1/selectedIndividuals$N + selectedIndividuals$SelectionDescription <- as.character(NA) + + selectionTable <- selectedIndividuals[,.SD,.SDcol=c("SampleId", "Stratum", "Order", "IndividualId", "InclusionProbability", "SelectionProbability", "RelativeSelectionProbability", "SelectionDescription")] + + designParams <- list() + designParams$SampleTable <- sampleTable + designParams$SelectionTable <- selectionTable + designParams$ObservationVariables <- observationTable + designParams$StratificationVariables <- stratificationTable + + return(designParams) +} + +#' Define Sub-Sampling Parameters for Individuals +#' @description +#' Define approximate sampling design parameters for a sub-sample of individuals. Design parameters are inferred from data provided in ~\code{\link[RstoxData]{StoxBioticData}}, +#' and specify how a set of individuals recorded on the Individual table were selected for observation/measurement from a sample recorded on the Sample table. +#' @details +#' Sampling parameters are approximate inferred, assuming that all selected individuals are recorded, and based on some user-controllable assumptions about the selection process, +#' specified by the appropriate 'DefinitionMethod'. Individuals with a non-missing value for any of the parameters in 'Parameters' are treated as selected for observation. +#' +#' The available DefinitionMethods are: +#' \describe{ +#' \item{SRS}{Simple Random Selection. Individuals are selected for measurment by simple random selection without replacement} +#' \item{Stratified}{Stratified Selection. Individuals are selected for measurement by stratified random selection without replacement. Strata are specified as the combination of columns provided in 'StratificationColumns'. The number of fish in each stratum is estimated by the total in sample and the proportion of measured fish in each stratum.} +#' \item{LengthStratified}{Length stratified selection. Individuals are selected for measurement by stratified random selection without replacement. Strata are length groups, specified by the left closed intervals starting with [0,'LengthInterval'>.} +#' } +#' @param processData \code{\link[RstoxFDA]{IndividualSamplingParametersData}} as returned from this function. +#' @param StoxBioticData Data to define individual sampling parameters for +#' @param DefinitionMethod Method to infer sampling parameters, 'SRS', 'Stratified' or 'LengthStratified'. See details. +#' @param Parameters Measurements / observations of individuals included in the design specification. Must all be column on the Individual-table of StoxBioticData. +#' @param LengthInterval width of length strata in cm. Specifies left closed intervals used for Length Stratified selection (DefinitionMethod 'Stratified'). A value of 5 indicates that observation are selected stratified on length groups [0 cm,5 cm>, [5 cm, 10 cm>, and so on. +#' @param StratificationColumns names of columns in the Individual table of StoxBioticData that identify strata for Stratified selection (DefinitionMethod 'Stratified'). +#' @param UseProcessData If TRUE, bypasses execution of function and returns existing 'processData' +#' @return \code{\link[RstoxFDA]{IndividualSamplingParametersData}} +#' @export +#' @concept StoX-functions +#' @concept Analytical estimation +#' @md +DefineIndividualSamplingParameters <- function(processData, StoxBioticData, DefinitionMethod=c("SRS", "Stratified", "LengthStratified"), Parameters=c(), LengthInterval=numeric(), StratificationColumns=character(), UseProcessData=F){ + + if (UseProcessData){ + return(processData) + } + DefinitionMethod <- checkOptions(DefinitionMethod, "DefinitionMethod", c("SRS", "Stratified", "LengthStratified")) + checkMandatory(StoxBioticData, "StoxBioticData") + checkMandatory(Parameters, "Parameters") + + if (!all(Parameters %in% names(StoxBioticData$Individual))){ + stop("All values for the argument 'Parameters' must be names columns of the Individual table in 'StoxBioticData'") + } + + # + # Perform checks and + # Set stratification columns in accordance with DefinitionMethod + # + + if (DefinitionMethod == "SRS"){ + if (isGiven(LengthInterval) | isGiven(StratificationColumns)){ + stop("The arguments 'LengthInterval' and 'StratificationColumns' should not be provided in combination with DefinitionMethod SRS.") + } + StoxBioticData$Individual$Stratum <- rep("All", nrow(StoxBioticData$Individual)) + StratificationColumns <- c("Stratum") + return(extractIndividualDesignParametersStoxBiotic(StoxBioticData, StratificationColumns, Parameters)) + } + + if (DefinitionMethod == "LengthStratified"){ + if (isGiven(StratificationColumns)){ + stop("The argument 'StratificationColumns' should not be provided in combination with DefinitionMethod 'LengthStratified'.") + } + if (!isGiven(LengthInterval)){ + stop("The argument 'LengthInterval' must be provided when DefinitionMethod is 'LengthStratified") + } + if (LengthInterval <=0){ + stop("LengthInterval must be a positive value.") + } + if (any(is.na(StoxBioticData$Individual$IndividualTotalLength))){ + missing <- StoxBioticData$Individual$Individual[is.na(StoxBioticData$Individual$IndividualTotalLength)] + if (lengt(missing)>5){ + missing <- c(missing[1:5], "...") + } + stop(paste("Cannot specify length stratified selection when some individuals are not measured. Missing IndividualTotalLength for:", paste(missing, collapse=","))) + } + + lengthGroups <- seq(0,max(StoxBioticData$Individual$IndividualTotalLength)+LengthInterval,LengthInterval) + StoxBioticData$Individual$LengthStratum <- paste(as.character(cut(StoxBioticData$Individual$IndividualTotalLength, lengthGroups, right=F)), "cm") + StratificationColumns <- c("LengthStratum") + return(extractIndividualDesignParametersStoxBiotic(StoxBioticData, StratificationColumns, Parameters)) + } + + if (DefinitionMethod == "Stratified"){ + if (!isGiven(StratificationColumns)){ + stop("The argument 'StratificationColumns' must be provided when DefinitionMethod is 'Stratified") + } + if (!all(StratificationColumns %in% names(StoxBioticData$Individual))){ + stop("All values for the argument 'StratificationColumns' must be names of columns of the Individual table in 'StoxBioticData'") + } + for (st in StratificationColumns){ + if (any(is.na(StoxBioticData$Individual[[st]]))){ + stop("Cannot specify stratified selection when some individuals are not assigned a stratum. Missing values for:", st) + } + } + reserved_names <- c("Stratum", "SampleId") + if (any(reserved_names %in% StratificationColumns)){ + stop(paste(paste(reserved_names, collapse=","), "are reserved names in IndividualSamplingParametersData and cannot be specified as StratificationColumns")) + } + return(extractIndividualDesignParametersStoxBiotic(StoxBioticData, StratificationColumns, Parameters)) + } + +} #' @noRd AssignPSUDesignParameters <- function(){} diff --git a/R/StoxDataTypes.R b/R/StoxDataTypes.R index 9ed4d46c..c21cf54b 100644 --- a/R/StoxDataTypes.R +++ b/R/StoxDataTypes.R @@ -126,25 +126,30 @@ is.MultiStageSamplingParametersData <- function(MultiStageSamplingParametersData return(TRUE) } -#' Individual Sampling Design Parameters +#' Individual Sub-Sampling Design Parameters #' -#' Sampling parameters for selection of a sampling of individuals +#' Sampling parameters for selection of a sub-sample of individuals #' #' @details -#' Encodes information about the selection of a sample of observations from individuals, used in analytical design based estimation. +#' Encodes information about the selection of a sub-sample of observations from individuals, used in analytical design based estimation. +#' A sub-sample is simply a sample of a sample. This data type is intended to represent the final stage of sampling in multi-stage sampling, +#' and therefor has a reference to the Sample it was taken from ('SampleId'). Apart from that there is no principal difference from single +#' stage sampling. All stratification is specified within the sample identifed by 'SampleId', and all sampling probabilites are specified within strata. #' #' The SampleTable encodes information about the sample of sampling units: #' \describe{ -#' \item{Stratum}{Mandatory, chr: Identifies the stratum the sample is taken from. Treat unstratified sample as single-stratum sampling (provide only one stratum.} -#' \item{N}{Optional, num: The total number of individuals in Stratum} +#' \item{SampleId}{Mandatory, chr: Identifies the sample the sub-sample is taken from.} +#' \item{Stratum}{Mandatory, chr: Identifies the within-sample stratum the sub-sample is taken from. Treat unstratified sample as single-stratum sampling (provide only one stratum.} +#' \item{N}{Optional, num: The total number of individuals in Stratum. For unstratified sampling, the total number of individuals in the sample the sub-sample is taken from.} #' \item{n}{Optional, num: The number of individuals selected from the Stratum} #' \item{SelectionMethod}{Mandatory, chr: 'Poission', 'FSWR' or 'FSWOR'. The manner of selection for use in bootstrap or inference of inclusionProbabilities, selectionProbabilites, co-inclusion probabilities or co-selection probabilities.} -#' \item{FrameDescription}{Optional, chr: Free text field describing the sampling frame.} +#' \item{SampleDescription}{Optional, chr: Free text field describing the sample that is subsampled.} #' } #' #' The SelectionTable encodes information abut the selection of sampling units for sampling: #' \describe{ -#' \item{Stratum}{Mandatory, chr: Identifies the stratum the individual is taken from.} +#' \item{SampleId}{Mandatory, chr: Identifies the sample the sub-sample is taken from.} +#' \item{Stratum}{Mandatory, chr: Identifies the within sample-stratum the individual is taken from.} #' \item{Order}{Optional, num: Identifes the order of seleciton. May be necessary for inference when selections are not independent (e.g. FSWOR)} #' \item{IndividualId}{Optional, chr: Identifes individual. NA encodes non-response / observation failure} #' \item{InclusionProbability}{Optional, num: The inclusion probability of the individual with respect to observing the parameters in the 'observationVariables' table} @@ -160,7 +165,8 @@ is.MultiStageSamplingParametersData <- function(MultiStageSamplingParametersData #' #' The StratificationVariables table encodes information about which columns in the sampleTable are stratification variables (if any): #' \describe{ -#' \item{Stratum}{Mandatory, chr: Identifies the stratum. In addition the Stratum is identified by the combination of all other columns on this table.} +#' \item{SampleId}{Mandatory, chr: Identifies the sample the stratification applies to} +#' \item{Stratum}{Mandatory, chr: Identifies the within-sample stratum. In addition the Stratum is identified by the combination of all other columns on this table.} #' \item{...}{Mandatory if present (may not contain NAs), chr: Additional columns in the sampleTable that are stratification variables.} #' } #' @@ -189,7 +195,69 @@ NULL #' @concept Data types #' @noRd is.IndividualSamplingParametersData <- function(IndividualSamplingParametersData){ - stop("Not Implemented") + + if (!is.list(IndividualSamplingParametersData)){ + return(FALSE) + } + if (!all(sapply(IndividualSamplingParametersData, data.table::is.data.table))){ + return(FALSE) + } + if (!all(c("SampleTable", "SelectionTable", "StratificationVariables", "ObservationVariables") %in% names(IndividualSamplingParametersData))){ + return(FALSE) + } + if (!all(c("SampleId", "Stratum", "N", "n", "SelectionMethod", "SampleDescription") %in% names(IndividualSamplingParametersData$SampleTable))){ + return(FALSE) + } + if (!all(c("SampleId", "Stratum", "Order", "IndividualId", "InclusionProbability", "SelectionProbability", "RelativeSelectionProbability", "SelectionDescription") %in% names(IndividualSamplingParametersData$SelectionTable))){ + return(FALSE) + } + if (!all(c("Stratum") %in% names(IndividualSamplingParametersData$StratificationVariables))){ + return(FALSE) + } + if (!all(c("Parameter") %in% names(IndividualSamplingParametersData$ObservationVariables))){ + return(FALSE) + } + if (any(duplicated(paste(IndividualSamplingParametersData$SampleTable$Stratum, IndividualSamplingParametersData$SampleTable$SampleId)))){ + return(FALSE) + } + #test that mandatory fields are not NA. + if (any(is.na(IndividualSamplingParametersData$SampleTable$Stratum))){ + return(FALSE) + } + if (any(is.na(IndividualSamplingParametersData$SampleTable$SampleId))){ + return(FALSE) + } + if (any(is.na(IndividualSamplingParametersData$SampleTable$SelectionMethod))){ + return(FALSE) + } + if (any(is.na(IndividualSamplingParametersData$SelectionTable$Stratum))){ + return(FALSE) + } + if (any(is.na(IndividualSamplingParametersData$SelectionTable$SampleId))){ + return(FALSE) + } + if (any(is.na(IndividualSamplingParametersData$StratificationVariables$Stratum))){ + return(FALSE) + } + if (any(is.na(IndividualSamplingParametersData$StratificationVariables$SampleId))){ + return(FALSE) + } + for (n in names(IndividualSamplingParametersData$StratificationVariables)){ + if (any(is.na(IndividualSamplingParametersData$StratificationVariables[[n]]))){ + return(FALSE) + } + } + + if (ncol(IndividualSamplingParametersData$StratificationVariables) > 2){ + stratificationVariableStrings <- apply(IndividualSamplingParametersData$StratificationVariables[,.SD, .SDcol=names(IndividualSamplingParametersData$StratificationVariables)[!(names(IndividualSamplingParametersData$StratificationVariables) %in% c("Stratum", "SampleId"))]], 1, paste, collapse="/") + duplicatedStrata <- IndividualSamplingParametersData$StratificationVariables$Stratum[duplicated(stratificationVariableStrings)] + + if (length(duplicatedStrata)>0){ + return(FALSE) + } + } + return(TRUE) + } diff --git a/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R b/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R index b0a45aa1..228d510d 100644 --- a/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R +++ b/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R @@ -20,3 +20,21 @@ expect_equal(nrow(designParamsSB$SelectionTable), 75) expect_equal(nrow(designParamsSB$SampleTable), 2) expect_equal(ncol(designParamsSB$StratificationVariables), 2) expect_equal(nrow(designParamsSB$StratificationVariables), 2) + +#Define Individual design, SRS +expect_error(DefineIndividualSamplingParameters(NULL, RstoxFDA::StoxBioticDataExample, "SRS")) +srs <- DefineIndividualSamplingParameters(NULL, RstoxFDA::StoxBioticDataExample, "SRS", c("IndividualAge", "IndividualTotalLength", "IndividualRoundWeight")) +RstoxFDA:::is.IndividualSamplingParametersData(srs) +#Define Individual design, Length stratified +ls<-DefineIndividualSamplingParameters(NULL, RstoxFDA::StoxBioticDataExample, "LengthStratified", c("IndividualAge", "IndividualTotalLength", "IndividualRoundWeight"), LengthInterval = 5) +RstoxFDA:::is.IndividualSamplingParametersData(ls) + +#Define Individual design, stratrifed, setting strata by length as in Length stratified +bioStrat <- RstoxFDA::StoxBioticDataExample +bioStrat$Individual$LStrat <- as.character(cut(bioStrat$Individual$IndividualTotalLength, seq(0,max(bioStrat$Individual$IndividualTotalLength)+5,5), right = F)) +ss<-DefineIndividualSamplingParameters(NULL, bioStrat, "Stratified", c("IndividualAge", "IndividualTotalLength", "IndividualRoundWeight"), StratificationColumns = c("LStrat")) +RstoxFDA:::is.IndividualSamplingParametersData(ss) + +#check that length stratified and stratified is consistent. +expect_equal(ss$SelectionTable$SelectionProbability[[1]], ls$SelectionTable$SelectionProbability[[1]]) +expect_equal(nrow(ss$SampleTable), nrow(ls$SampleTable)) diff --git a/man/DefineIndividualSamplingParameters.Rd b/man/DefineIndividualSamplingParameters.Rd new file mode 100644 index 00000000..87941f68 --- /dev/null +++ b/man/DefineIndividualSamplingParameters.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/StoxAnalyticalBaselineFunctions.R +\name{DefineIndividualSamplingParameters} +\alias{DefineIndividualSamplingParameters} +\title{Define Individual Sampling Parameters} +\usage{ +DefineIndividualSamplingParameters( + processData, + StoxBioticData, + DefinitionMethod = c("SRS", "Stratified", "LengthStratified"), + Parameters = c(), + LengthInterval, + StratificationColumns, + UseProcessData = F +) +} +\arguments{ +\item{processData}{\code{\link[RstoxFDA]{IndividualSamplingParametersData}} as returned from this function.} + +\item{StoxBioticData}{Data to define individual sampling parameters for} + +\item{DefinitionMethod}{Method to infer sampling parameters, 'SRS', 'Stratified' or 'LengthStratified'. See details.} + +\item{Parameters}{Measurements / observations of individuals included in the design specification. Must all be column on the Individual-table of StoxBioticData.} + +\item{LengthInterval}{width of length strata in cm. Specifies left closed intervals used for Length Stratified selection (DefinitionMethod 'Stratified'). A value of 5 indicates that observation are selected stratified on length groups [0 cm,5 cm>, [5 cm, 10 cm>, and so on.} + +\item{StratificationColumns}{names of columns in the Individual table of StoxBioticData that identify strata for Stratified selection (DefinitionMethod 'Stratified').} + +\item{UseProcessData}{If TRUE, bypasses execution of function and returns existing 'processData'} +} +\value{ +\code{\link[RstoxFDA]{IndividualSamplingParametersData}} +} +\description{ +Define approximate sampling design parameters for a measured sub-sample of individuals. Design parameters are inferred from data provided in ~\code{\link[RstoxData]{StoxBioticData}}, +and specify how a set of individuals recorded on the Individual table were selected for observation/measurement from a sample recorded on the Sample table. +} +\details{ +Sampling parameters are approximate inferred, assuming that all selected individuals are recorded, and based on some user-controllable assumptions about the selection process, +specified by the appropriate 'DefinitionMethod'. Individuals with a non-missing value for any of the parameters in 'Parameters' are treated as selected for observation. + +The available DefinitionMethods are: +\describe{ +\item{SRS}{Simple Random Selection. Individuals are selected for measurment by simple random selection without replacement} +\item{Stratified}{Stratified Selection. Individuals are selected for measurement by stratified random selection without replacement. Strata are specified as the combination of columns provided in 'StratificationColumns'. The number of fish in each stratum is estimated by the total in sample and the proportion of measured fish in each stratum.} +\item{LengthStratified}{Length stratified selection. Individuals are selected for measurement by stratified random selection without replacement. Strata are length groups, specified by the left closed intervals starting with [0,'LengthInterval'>. In addition to the parameters in 'Parameters', IndividualTotalLength is included in the design specification. } +} +} +\concept{Analytical estimation} +\concept{StoX-functions} diff --git a/man/DefineMultiStageSamplingParameters.Rd b/man/DefineMultiStageSamplingParameters.Rd index 75fb2e9e..fefb7477 100644 --- a/man/DefineMultiStageSamplingParameters.Rd +++ b/man/DefineMultiStageSamplingParameters.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/StoxAnalyticalBaselineFunctions.R \name{DefineMultiStageSamplingParameters} \alias{DefineMultiStageSamplingParameters} -\title{Define Sampling Design Parameters} +\title{Define Multi-Stage Sampling Design Parameters} \usage{ DefineMultiStageSamplingParameters( processData, diff --git a/man/IndividualSamplingParametersData.Rd b/man/IndividualSamplingParametersData.Rd index be4e0087..2fe6415c 100644 --- a/man/IndividualSamplingParametersData.Rd +++ b/man/IndividualSamplingParametersData.Rd @@ -7,11 +7,12 @@ Sampling parameters for selection of a sampling of individuals } \details{ -Encodes information about the selection of a sample of observations from individuals, used in analytical design based estimation. +Encodes information about the selection of a sub-sample of observations from individuals, used in analytical design based estimation. The SampleTable encodes information about the sample of sampling units: \describe{ - \item{Stratum}{Mandatory, chr: Identifies the stratum the sample is taken from. Treat unstratified sample as single-stratum sampling (provide only one stratum.} + \item{Sample}{Mandatory, chr: Identifies the sample the sub-sample is taken from.} + \item{Stratum}{Mandatory, chr: Identifies the stratum of the sample, that the sub-sample is taken from. Treat unstratified sample as single-stratum sampling (provide only one stratum.} \item{N}{Optional, num: The total number of individuals in Stratum} \item{n}{Optional, num: The number of individuals selected from the Stratum} \item{SelectionMethod}{Mandatory, chr: 'Poission', 'FSWR' or 'FSWOR'. The manner of selection for use in bootstrap or inference of inclusionProbabilities, selectionProbabilites, co-inclusion probabilities or co-selection probabilities.} @@ -20,7 +21,8 @@ Encodes information about the selection of a sample of observations from individ The SelectionTable encodes information abut the selection of sampling units for sampling: \describe{ - \item{Stratum}{Mandatory, chr: Identifies the stratum the individual is taken from.} + \item{Sample}{Mandatory, chr: Identifies the sample the sub-sample is taken from.} + \item{Stratum}{Mandatory, chr: Identifies the stratum of the sample, that the the individual is taken from.} \item{Order}{Optional, num: Identifes the order of seleciton. May be necessary for inference when selections are not independent (e.g. FSWOR)} \item{IndividualId}{Optional, chr: Identifes individual. NA encodes non-response / observation failure} \item{InclusionProbability}{Optional, num: The inclusion probability of the individual with respect to observing the parameters in the 'observationVariables' table} From 7fc764761ed62124bc721c0e6d6cbb2a9c90a21a Mon Sep 17 00:00:00 2001 From: Edvin Fuglebakk Date: Wed, 1 Nov 2023 23:41:28 +0100 Subject: [PATCH 10/24] Added collapsing strata to IndividualSamplingParameters --- R/StoxAnalyticalBaselineFunctions.R | 53 ++++++++++++++++--- R/StoxDataTypes.R | 4 +- .../test-StoxAnalyticalBaselineFunctions.R | 1 + man/DefineIndividualSamplingParameters.Rd | 12 ++--- man/DefineMultiStageSamplingParameters.Rd | 4 +- man/IndividualSamplingParametersData.Rd | 24 +++++---- man/MultiStageSamplingParametersData.Rd | 2 +- 7 files changed, 71 insertions(+), 29 deletions(-) diff --git a/R/StoxAnalyticalBaselineFunctions.R b/R/StoxAnalyticalBaselineFunctions.R index 49a55d21..b668f89e 100644 --- a/R/StoxAnalyticalBaselineFunctions.R +++ b/R/StoxAnalyticalBaselineFunctions.R @@ -168,6 +168,27 @@ DefineMultiStageSamplingParameters <- function(processData, DefinitionMethod=c(" } } +#' @noRd +collapseStrataIndividualDesignParamaters <- function(designParam){ + + Nstrata <- designParam$StratificationVariables[,list(Nstrata=.N), by="SampleId"] + if (all(Nstrata$Nstrata==1)){ + return(designParam) + } + + NselectionMethods <- designParam$SampleTable[,list(NselMet=length(unique(SelectionMethod))), by="SampleId"] + if (any(NselectionMethods$NselMet>1)){ + stop("Cannot collapse strate with heterogenous selection methods") + } + + designParam$SelectionTable$Stratum <- "All" + designParam$SampleTable <- designParam$SampleTable[,list(Stratum="All", N=sum(N), n=sum(n), SelectionMethod=SelectionMethod[1], SampleDescription=as.character(NA)), by=c("SampleId")] + designParam$StratificationVariables$Stratum <- "All" + designParam$StratificationVariables <- designParam$StratificationVariables[,.SD,.SDcol=c("SampleId", "Stratum")] + + return(designParam) +} + #' make IndividualDesignParameters for stratified selection of Individuals #' if StratificationColumn contains only one column and this is called Stratum, do not add any stratification columns. #' @noRd @@ -183,14 +204,13 @@ extractIndividualDesignParametersStoxBiotic <- function(StoxBioticData, Stratifi individuals$Stratum <- apply(individuals[,.SD, .SDcol=StratificationColumns], 1, paste, collapse="/") StratificationColumns <- StratificationColumns[StratificationColumns!="Stratum"] - + individuals$SampleId <- individuals$Sample stratificationTable <- individuals[!duplicated(paste(individuals$SampleId, individuals$Stratum)), .SD,.SDcol=c("SampleId", "Stratum", StratificationColumns)] observationTable <- data.table::data.table(Parameter=Parameters) individuals$Sampled <- hasParam - stratumTotals <- individuals[,list(totalInStratum=.N, sampledInStratum=sum(Sampled)), by=c("Stratum", "SampleId")] sampleTotals <- individuals[,list(totalInSample=.N), by=c("SampleId")] stratumFraction <- merge(stratumTotals, sampleTotals, by="SampleId") @@ -236,6 +256,8 @@ extractIndividualDesignParametersStoxBiotic <- function(StoxBioticData, Stratifi #' \item{Stratified}{Stratified Selection. Individuals are selected for measurement by stratified random selection without replacement. Strata are specified as the combination of columns provided in 'StratificationColumns'. The number of fish in each stratum is estimated by the total in sample and the proportion of measured fish in each stratum.} #' \item{LengthStratified}{Length stratified selection. Individuals are selected for measurement by stratified random selection without replacement. Strata are length groups, specified by the left closed intervals starting with [0,'LengthInterval'>.} #' } +#' +#' #' @param processData \code{\link[RstoxFDA]{IndividualSamplingParametersData}} as returned from this function. #' @param StoxBioticData Data to define individual sampling parameters for #' @param DefinitionMethod Method to infer sampling parameters, 'SRS', 'Stratified' or 'LengthStratified'. See details. @@ -248,8 +270,11 @@ extractIndividualDesignParametersStoxBiotic <- function(StoxBioticData, Stratifi #' @concept StoX-functions #' @concept Analytical estimation #' @md -DefineIndividualSamplingParameters <- function(processData, StoxBioticData, DefinitionMethod=c("SRS", "Stratified", "LengthStratified"), Parameters=c(), LengthInterval=numeric(), StratificationColumns=character(), UseProcessData=F){ +DefineIndividualSamplingParameters <- function(processData, StoxBioticData, DefinitionMethod=c("SRS", "Stratified", "LengthStratified"), Parameters=c(), LengthInterval=numeric(), StratificationColumns=character(), UseProcessData=FALSE){ + #May want to expose this option if DefinitionMethods are added that only provides relative selection probabilities. + CollapseStrata=TRUE + if (UseProcessData){ return(processData) } @@ -272,7 +297,6 @@ DefineIndividualSamplingParameters <- function(processData, StoxBioticData, Defi } StoxBioticData$Individual$Stratum <- rep("All", nrow(StoxBioticData$Individual)) StratificationColumns <- c("Stratum") - return(extractIndividualDesignParametersStoxBiotic(StoxBioticData, StratificationColumns, Parameters)) } if (DefinitionMethod == "LengthStratified"){ @@ -287,7 +311,7 @@ DefineIndividualSamplingParameters <- function(processData, StoxBioticData, Defi } if (any(is.na(StoxBioticData$Individual$IndividualTotalLength))){ missing <- StoxBioticData$Individual$Individual[is.na(StoxBioticData$Individual$IndividualTotalLength)] - if (lengt(missing)>5){ + if (length(missing)>5){ missing <- c(missing[1:5], "...") } stop(paste("Cannot specify length stratified selection when some individuals are not measured. Missing IndividualTotalLength for:", paste(missing, collapse=","))) @@ -296,7 +320,6 @@ DefineIndividualSamplingParameters <- function(processData, StoxBioticData, Defi lengthGroups <- seq(0,max(StoxBioticData$Individual$IndividualTotalLength)+LengthInterval,LengthInterval) StoxBioticData$Individual$LengthStratum <- paste(as.character(cut(StoxBioticData$Individual$IndividualTotalLength, lengthGroups, right=F)), "cm") StratificationColumns <- c("LengthStratum") - return(extractIndividualDesignParametersStoxBiotic(StoxBioticData, StratificationColumns, Parameters)) } if (DefinitionMethod == "Stratified"){ @@ -315,13 +338,27 @@ DefineIndividualSamplingParameters <- function(processData, StoxBioticData, Defi if (any(reserved_names %in% StratificationColumns)){ stop(paste(paste(reserved_names, collapse=","), "are reserved names in IndividualSamplingParametersData and cannot be specified as StratificationColumns")) } - return(extractIndividualDesignParametersStoxBiotic(StoxBioticData, StratificationColumns, Parameters)) } + params <- extractIndividualDesignParametersStoxBiotic(StoxBioticData, StratificationColumns, Parameters) + if (CollapseStrata){ + params <- collapseStrataIndividualDesignParamaters(params) + } + return(params) + } #' @noRd -AssignPSUDesignParameters <- function(){} +CollapseSamplingHierarchy <- function(IndividualSamplingParametersData, MultiStageSamplingParametersData, CollapseStrata=T){ + +} + +#' Put some options for handling non-response here. +#' If all responded, this function does nothing but returning the input +#' @noRd +AssignPSUDesignParameters <- function(MultiStageSamplingParametersData){ + +} #' @noRd DefinePSUCoInclusionProbabilities <- function(){} diff --git a/R/StoxDataTypes.R b/R/StoxDataTypes.R index c21cf54b..8e4e9629 100644 --- a/R/StoxDataTypes.R +++ b/R/StoxDataTypes.R @@ -60,7 +60,7 @@ is.Date <- function(date){ #' } #' #' The SelectionProbability is defined as: The probability of selecting the sampling unit in a random draw from the population. -#' The RelativeSelectionProbability: A value proportional to the SelectionProbability. Within a stratum, SelectionProbability=c*RelativeSelectionProbability, with c possibly unknown. +#' The RelativeSelectionProbability: A value proportional to the SelectionProbability. SelectionProbability=c*RelativeSelectionProbability, with c constant within a stratum, but possibly unknown. #' The InclusionProbability is defined as: The probability of the sampling unit being included in the sample. #' #' @name MultiStageSamplingParametersData @@ -180,7 +180,7 @@ is.MultiStageSamplingParametersData <- function(MultiStageSamplingParametersData #' } #' #' The SelectionProbability is defined as: The probability of selecting the sampling unit in a random draw from the population. -#' The RelativeSelectionProbability: A value proportional to the SelectionProbability. Within a stratum, SelectionProbability=c*RelativeSelectionProbability, with c possibly unknown. +#' The RelativeSelectionProbability: A value proportional to the SelectionProbability. SelectionProbability=c*RelativeSelectionProbability, with c constant within a stratum, but possibly unknown. #' The InclusionProbability is defined as: The probability of the sampling unit being included in the sample. #' #' @name IndividualSamplingParametersData diff --git a/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R b/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R index 228d510d..0b98abcf 100644 --- a/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R +++ b/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R @@ -37,4 +37,5 @@ RstoxFDA:::is.IndividualSamplingParametersData(ss) #check that length stratified and stratified is consistent. expect_equal(ss$SelectionTable$SelectionProbability[[1]], ls$SelectionTable$SelectionProbability[[1]]) +expect_true(srs$SelectionTable$SelectionProbability[[1]] != ls$SelectionTable$SelectionProbability[[1]]) expect_equal(nrow(ss$SampleTable), nrow(ls$SampleTable)) diff --git a/man/DefineIndividualSamplingParameters.Rd b/man/DefineIndividualSamplingParameters.Rd index 87941f68..77b039ff 100644 --- a/man/DefineIndividualSamplingParameters.Rd +++ b/man/DefineIndividualSamplingParameters.Rd @@ -2,16 +2,16 @@ % Please edit documentation in R/StoxAnalyticalBaselineFunctions.R \name{DefineIndividualSamplingParameters} \alias{DefineIndividualSamplingParameters} -\title{Define Individual Sampling Parameters} +\title{Define Sub-Sampling Parameters for Individuals} \usage{ DefineIndividualSamplingParameters( processData, StoxBioticData, DefinitionMethod = c("SRS", "Stratified", "LengthStratified"), Parameters = c(), - LengthInterval, - StratificationColumns, - UseProcessData = F + LengthInterval = numeric(), + StratificationColumns = character(), + UseProcessData = FALSE ) } \arguments{ @@ -33,7 +33,7 @@ DefineIndividualSamplingParameters( \code{\link[RstoxFDA]{IndividualSamplingParametersData}} } \description{ -Define approximate sampling design parameters for a measured sub-sample of individuals. Design parameters are inferred from data provided in ~\code{\link[RstoxData]{StoxBioticData}}, +Define approximate sampling design parameters for a sub-sample of individuals. Design parameters are inferred from data provided in ~\code{\link[RstoxData]{StoxBioticData}}, and specify how a set of individuals recorded on the Individual table were selected for observation/measurement from a sample recorded on the Sample table. } \details{ @@ -44,7 +44,7 @@ The available DefinitionMethods are: \describe{ \item{SRS}{Simple Random Selection. Individuals are selected for measurment by simple random selection without replacement} \item{Stratified}{Stratified Selection. Individuals are selected for measurement by stratified random selection without replacement. Strata are specified as the combination of columns provided in 'StratificationColumns'. The number of fish in each stratum is estimated by the total in sample and the proportion of measured fish in each stratum.} -\item{LengthStratified}{Length stratified selection. Individuals are selected for measurement by stratified random selection without replacement. Strata are length groups, specified by the left closed intervals starting with [0,'LengthInterval'>. In addition to the parameters in 'Parameters', IndividualTotalLength is included in the design specification. } +\item{LengthStratified}{Length stratified selection. Individuals are selected for measurement by stratified random selection without replacement. Strata are length groups, specified by the left closed intervals starting with [0,'LengthInterval'>.} } } \concept{Analytical estimation} diff --git a/man/DefineMultiStageSamplingParameters.Rd b/man/DefineMultiStageSamplingParameters.Rd index fefb7477..3f92def1 100644 --- a/man/DefineMultiStageSamplingParameters.Rd +++ b/man/DefineMultiStageSamplingParameters.Rd @@ -9,8 +9,8 @@ DefineMultiStageSamplingParameters( DefinitionMethod = c("ResourceFile", "AdHocStoxBiotic"), FileName = character(), StoxBioticData, - SamplingUnitId, - StratificationColumns, + SamplingUnitId = character(), + StratificationColumns = character(), UseProcessData = F ) } diff --git a/man/IndividualSamplingParametersData.Rd b/man/IndividualSamplingParametersData.Rd index 2fe6415c..196831b7 100644 --- a/man/IndividualSamplingParametersData.Rd +++ b/man/IndividualSamplingParametersData.Rd @@ -2,27 +2,30 @@ % Please edit documentation in R/StoxDataTypes.R \name{IndividualSamplingParametersData} \alias{IndividualSamplingParametersData} -\title{Individual Sampling Design Parameters} +\title{Individual Sub-Sampling Design Parameters} \description{ -Sampling parameters for selection of a sampling of individuals +Sampling parameters for selection of a sub-sample of individuals } \details{ Encodes information about the selection of a sub-sample of observations from individuals, used in analytical design based estimation. + A sub-sample is simply a sample of a sample. This data type is intended to represent the final stage of sampling in multi-stage sampling, + and therefor has a reference to the Sample it was taken from ('SampleId'). Apart from that there is no principal difference from single + stage sampling. All stratification is specified within the sample identifed by 'SampleId', and all sampling probabilites are specified within strata. The SampleTable encodes information about the sample of sampling units: \describe{ - \item{Sample}{Mandatory, chr: Identifies the sample the sub-sample is taken from.} - \item{Stratum}{Mandatory, chr: Identifies the stratum of the sample, that the sub-sample is taken from. Treat unstratified sample as single-stratum sampling (provide only one stratum.} - \item{N}{Optional, num: The total number of individuals in Stratum} + \item{SampleId}{Mandatory, chr: Identifies the sample the sub-sample is taken from.} + \item{Stratum}{Mandatory, chr: Identifies the within-sample stratum the sub-sample is taken from. Treat unstratified sample as single-stratum sampling (provide only one stratum.} + \item{N}{Optional, num: The total number of individuals in Stratum. For unstratified sampling, the total number of individuals in the sample the sub-sample is taken from.} \item{n}{Optional, num: The number of individuals selected from the Stratum} \item{SelectionMethod}{Mandatory, chr: 'Poission', 'FSWR' or 'FSWOR'. The manner of selection for use in bootstrap or inference of inclusionProbabilities, selectionProbabilites, co-inclusion probabilities or co-selection probabilities.} - \item{FrameDescription}{Optional, chr: Free text field describing the sampling frame.} + \item{SampleDescription}{Optional, chr: Free text field describing the sample that is subsampled.} } The SelectionTable encodes information abut the selection of sampling units for sampling: \describe{ - \item{Sample}{Mandatory, chr: Identifies the sample the sub-sample is taken from.} - \item{Stratum}{Mandatory, chr: Identifies the stratum of the sample, that the the individual is taken from.} + \item{SampleId}{Mandatory, chr: Identifies the sample the sub-sample is taken from.} + \item{Stratum}{Mandatory, chr: Identifies the within sample-stratum the individual is taken from.} \item{Order}{Optional, num: Identifes the order of seleciton. May be necessary for inference when selections are not independent (e.g. FSWOR)} \item{IndividualId}{Optional, chr: Identifes individual. NA encodes non-response / observation failure} \item{InclusionProbability}{Optional, num: The inclusion probability of the individual with respect to observing the parameters in the 'observationVariables' table} @@ -38,7 +41,8 @@ Encodes information about the selection of a sub-sample of observations from ind The StratificationVariables table encodes information about which columns in the sampleTable are stratification variables (if any): \describe{ - \item{Stratum}{Mandatory, chr: Identifies the stratum. In addition the Stratum is identified by the combination of all other columns on this table.} + \item{SampleId}{Mandatory, chr: Identifies the sample the stratification applies to} + \item{Stratum}{Mandatory, chr: Identifies the within-sample stratum. In addition the Stratum is identified by the combination of all other columns on this table.} \item{...}{Mandatory if present (may not contain NAs), chr: Additional columns in the sampleTable that are stratification variables.} } @@ -52,7 +56,7 @@ The selection methods available for 'SelectionMethod' are explained here: } The SelectionProbability is defined as: The probability of selecting the sampling unit in a random draw from the population. -The RelativeSelectionProbability: A value proportional to the SelectionProbability. Within a stratum, SelectionProbability=c*RelativeSelectionProbability, with c possibly unknown. +The RelativeSelectionProbability: A value proportional to the SelectionProbability. SelectionProbability=c*RelativeSelectionProbability, with c constant within a stratum, but possibly unknown. The InclusionProbability is defined as: The probability of the sampling unit being included in the sample. } \concept{Analytical estimation} diff --git a/man/MultiStageSamplingParametersData.Rd b/man/MultiStageSamplingParametersData.Rd index c9cc193d..c25be427 100644 --- a/man/MultiStageSamplingParametersData.Rd +++ b/man/MultiStageSamplingParametersData.Rd @@ -46,7 +46,7 @@ The selection methods available for 'SelectionMethod' are explained here: } The SelectionProbability is defined as: The probability of selecting the sampling unit in a random draw from the population. -The RelativeSelectionProbability: A value proportional to the SelectionProbability. Within a stratum, SelectionProbability=c*RelativeSelectionProbability, with c possibly unknown. +The RelativeSelectionProbability: A value proportional to the SelectionProbability. SelectionProbability=c*RelativeSelectionProbability, with c constant within a stratum, but possibly unknown. The InclusionProbability is defined as: The probability of the sampling unit being included in the sample. } \concept{Analytical estimation} From 1b0fc257447b63fd9bbda9b19ed0b91ce1ae5336 Mon Sep 17 00:00:00 2001 From: Edvin Fuglebakk Date: Mon, 6 Nov 2023 13:59:45 +0100 Subject: [PATCH 11/24] Redefined sampling weights. Added support for strata collapse --- R/StoxAnalyticalBaselineFunctions.R | 134 +++++++++++++----- R/StoxDataTypes.R | 37 +++-- inst/dataPrepScripts/prepDesignParameters.R | 15 +- .../lotteryParameters/lotteryDesignNSH.txt | 130 ++++++++--------- .../test-StoxAnalyticalBaselineFunctions.R | 39 +++-- man/DefineIndividualSamplingParameters.Rd | 31 ++-- man/DefineMultiStageSamplingParameters.Rd | 5 +- man/IndividualSamplingParametersData.Rd | 17 +-- man/MultiStageSamplingParametersData.Rd | 8 +- 9 files changed, 246 insertions(+), 170 deletions(-) diff --git a/R/StoxAnalyticalBaselineFunctions.R b/R/StoxAnalyticalBaselineFunctions.R index b668f89e..fa9b9799 100644 --- a/R/StoxAnalyticalBaselineFunctions.R +++ b/R/StoxAnalyticalBaselineFunctions.R @@ -14,7 +14,7 @@ assumeDesignParametersStoxBiotic <- function(StoxBioticData, SamplingUnitId, Str flatStox <- StoxBioticData[[targetTable]] if (isGiven(StratificationColumns) & length(StratificationColumns)>0 & !all(StratificationColumns %in% names(flatStox))){ - stop("Not all stratification columns were found at", targetTable, ", where the SamplingUnitId", SamplingUnitId, "is found.") + stop("Not all stratification columns were found at ", targetTable, ", where the SamplingUnitId ", SamplingUnitId, " is found.") } if (any(is.na(flatStox[[SamplingUnitId]]))){ @@ -27,11 +27,13 @@ assumeDesignParametersStoxBiotic <- function(StoxBioticData, SamplingUnitId, Str } flatStox$Stratum <- "All" - flatStox$Stratum <- apply(flatStox[,.SD, .SDcol=StratificationColumns], 1, paste, collapse="/") + if (length(StratificationColumns)>0){ + flatStox$Stratum <- apply(flatStox[,.SD, .SDcol=StratificationColumns], 1, paste, collapse="/") + } flatStox$SamplingUnitId <- flatStox[[SamplingUnitId]] flatStox$Order <- as.numeric(NA) - CommonSelectionData <- flatStox[,list(InclusionProbability=as.numeric(NA), SelectionProbability=as.numeric(NA), RelativeSelectionProbability=1/length(unique(SamplingUnitId)), SelectionDescription=as.character(NA)), by=c("Stratum")] + CommonSelectionData <- flatStox[,list(InclusionProbability=as.numeric(NA), HTsamplingWeight=1/length(unique(SamplingUnitId)), SelectionProbability=as.numeric(NA), HHsamplingWeight=as.numeric(NA), SelectionDescription=as.character(NA)), by=c("Stratum")] selectionUnits <- flatStox[,.SD, .SDcol=c("Stratum", "Order", "SamplingUnitId")] selectionUnits <- selectionUnits[!duplicated(selectionUnits$SamplingUnitId),] selectionTable <- merge(flatStox[,.SD, .SDcol=c("Stratum", "Order", "SamplingUnitId")], CommonSelectionData) @@ -53,7 +55,7 @@ assumeDesignParametersStoxBiotic <- function(StoxBioticData, SamplingUnitId, Str #' @noRd parseDesignParameters <- function(filename){ - colClasses <- c(Stratum="character", N="numeric", n="numeric", SelectionMethod="character", FrameDescription="character", Order="numeric", SamplingUnitId="character", InclusionProbability="numeric", SelectionProbability="numeric", RelativeSelectionProbability="numeric", SelectionDescription="character") + colClasses <- c(Stratum="character", N="numeric", n="numeric", SelectionMethod="character", FrameDescription="character", Order="numeric", SamplingUnitId="character", InclusionProbability="numeric", HTsamplingWeight="numeric", SelectionProbability="numeric", HHsamplingWeight="numeric", SelectionDescription="character") headers <- data.table::fread(filename, sep="\t", dec=".", header = T, nrows = 1) if (!all(names(colClasses) %in% names(headers))){ missing <- names(colClasses)[!(names(colClasses) %in% names(headers)),] @@ -71,7 +73,7 @@ parseDesignParameters <- function(filename){ designParameters <- data.table::fread(filename, sep="\t", dec=".", header = T, colClasses = colClasses, na.strings = c("")) - selectionTable <- designParameters[,.SD,.SDcol=c("Stratum", "Order", "SamplingUnitId", "InclusionProbability", "SelectionProbability", "RelativeSelectionProbability", "SelectionDescription")] + selectionTable <- designParameters[,.SD,.SDcol=c("Stratum", "Order", "SamplingUnitId", "InclusionProbability", "HTsamplingWeight", "SelectionProbability", "HHsamplingWeight", "SelectionDescription")] sampleTable <- designParameters[,.SD,.SDcol=c("Stratum", "N", "n", "SelectionMethod", "FrameDescription")] stratificationTable <- designParameters[,.SD,.SDcol=c("Stratum", names(designParameters)[!(names(designParameters) %in% names(selectionTable)) & !(names(designParameters) %in% names(sampleTable))])] @@ -136,10 +138,9 @@ parseDesignParameters <- function(filename){ #' execution halts with error if any are violated. #' #' The DefinitionMethod 'AdHocStoxBiotic' constructs Sampling Design Parameters from data, -#' assuming equal probability sampling with fixed sample size, selection with replacement and complete response. +#' assuming equal probability sampling with fixed sample size, selection without replacement and complete response. #' This is a reasonable approximation if within-strata sampling is approximately simple random selections, -#' non-response is believed to be at random, and only a small fraction of the strata is sampled, -#' so that with and without replacement sampling probabilities are approximately equal. +#' non-response is believed to be at random. #' @param processData \code{\link[RstoxFDA]{MultiStageSamplingParametersData}} as returned from this function. #' @param DefinitionMethod 'ResourceFile' or 'AdHocStoxBiotic' #' @param FileName path to resource file @@ -168,33 +169,71 @@ DefineMultiStageSamplingParameters <- function(processData, DefinitionMethod=c(" } } +#' collapse strata, recalulate n/N and sampling weights +#' For strata with unknown inclusion and selection probability, sampling weights will be NA. #' @noRd -collapseStrataIndividualDesignParamaters <- function(designParam){ +collapseStrataIndividualDesignParamaters <- function(designParam, collapseVariables=c()){ + + sv <- names(designParam$StratificationVariables)[!names(designParam$StratificationVariables) %in% c("SampleId", "Stratum")] + if (!all(collapseVariables %in% sv)){ + missing <- collapseVariables[!(collapseVariables %in% sv)] + stop("The following are specified as strata to collapse, but are not StratificationVariables:", paste(missing, collapse=",")) + } Nstrata <- designParam$StratificationVariables[,list(Nstrata=.N), by="SampleId"] if (all(Nstrata$Nstrata==1)){ return(designParam) } + + retain <- sv[!(sv %in% collapseVariables)] + selectionStratumIndex <- match(paste(designParam$SelectionTable$Stratum, designParam$SelectionTable$SampleId), paste(designParam$StratificationVariables$Stratum, designParam$StratificationVariables$SampleId)) + sampleStratumIndex <- match(paste(designParam$SampleTable$Stratum, designParam$SampleTable$SampleId), paste(designParam$StratificationVariables$Stratum, designParam$StratificationVariables$SampleId)) + + if (length(retain)==0){ + designParam$StratificationVariables$Stratum <- "All" + } + else{ + designParam$StratificationVariables$Stratum <- apply(designParam$StratificationVariables[,.SD, .SDcol=retain], 1, paste, collapse="/") + } + + designParam$SampleTable$Stratum <- designParam$StratificationVariables$Stratum[sampleStratumIndex] + designParam$SelectionTable$Stratum <- designParam$StratificationVariables$Stratum[selectionStratumIndex] - NselectionMethods <- designParam$SampleTable[,list(NselMet=length(unique(SelectionMethod))), by="SampleId"] + NselectionMethods <- designParam$SampleTable[,list(NselMet=length(unique(SelectionMethod))), by=c("SampleId", "Stratum")] if (any(NselectionMethods$NselMet>1)){ stop("Cannot collapse strate with heterogenous selection methods") } - designParam$SelectionTable$Stratum <- "All" - designParam$SampleTable <- designParam$SampleTable[,list(Stratum="All", N=sum(N), n=sum(n), SelectionMethod=SelectionMethod[1], SampleDescription=as.character(NA)), by=c("SampleId")] - designParam$StratificationVariables$Stratum <- "All" - designParam$StratificationVariables <- designParam$StratificationVariables[,.SD,.SDcol=c("SampleId", "Stratum")] + weights <- designParam$SelectionTable[,list(HTsum=sum(1/InclusionProbability), HHsum=sum(1/SelectionProbability)),by=c("SampleId", "Stratum")] + designParam$SelectionTable <- merge(designParam$SelectionTable, weights, by=c("SampleId", "Stratum")) + designParam$SelectionTable$HTsamplingWeight <- 1/(designParam$SelectionTable$InclusionProbability * designParam$SelectionTable$HTsum) + designParam$SelectionTable$HHsamplingWeight <- 1/(designParam$SelectionTable$SelectionProbability * designParam$SelectionTable$HHsum) + designParam$SelectionTable$HHsum <- NULL + designParam$SelectionTable$HTsum <- NULL + + designParam$SampleTable <- designParam$SampleTable[,list(N=sum(N), n=sum(n), SelectionMethod=SelectionMethod[1], SampleDescription=as.character(NA)), by=c("SampleId", "Stratum")] + designParam$StratificationVariables <- designParam$StratificationVariables[!duplicated(paste(designParam$StratificationVariables$SampleId, designParam$StratificationVariables$Stratum)),.SD, .SDcol=c("SampleId", "Stratum", retain)] return(designParam) } #' make IndividualDesignParameters for stratified selection of Individuals -#' if StratificationColumn contains only one column and this is called Stratum, do not add any stratification columns. #' @noRd extractIndividualDesignParametersStoxBiotic <- function(StoxBioticData, StratificationColumns, Parameters){ + StratificationColumns <- c("SpeciesCategory", StratificationColumns) + individuals <- RstoxData::mergeByIntersect(StoxBioticData$Individual, StoxBioticData$Sample) + individuals <- RstoxData::mergeByIntersect(individuals, StoxBioticData$SpeciesCategory) + individuals <- RstoxData::mergeByIntersect(individuals, StoxBioticData$Haul) + + if (any(is.na(individuals$CatchFractionNumber))){ + missing <- unique(individuals$Sample[is.na(individuals$CatchFractionNumber)]) + if (length(missing)>5){ + missing <- c(missing[1:5], "...") + } + stop(paste("Cannot infer sampling parameters for individuals from Samples with missing total number. CatchFractionNumber missing for Sample:", paste(missing, collapse=","))) + } #check first, so no restrictions need to be put on names of Parameters. hasParam <- rep(FALSE, nrow(individuals)) @@ -202,13 +241,16 @@ extractIndividualDesignParametersStoxBiotic <- function(StoxBioticData, Stratifi hasParam <- hasParam | !is.na(individuals[[p]]) } - individuals$Stratum <- apply(individuals[,.SD, .SDcol=StratificationColumns], 1, paste, collapse="/") - StratificationColumns <- StratificationColumns[StratificationColumns!="Stratum"] + if (length(StratificationColumns)>0){ + individuals$Stratum <- apply(individuals[,.SD, .SDcol=StratificationColumns], 1, paste, collapse="/") + } + else{ + individuals$Stratum <- "All" + } - individuals$SampleId <- individuals$Sample + individuals$SampleId <- individuals$Haul stratificationTable <- individuals[!duplicated(paste(individuals$SampleId, individuals$Stratum)), .SD,.SDcol=c("SampleId", "Stratum", StratificationColumns)] - observationTable <- data.table::data.table(Parameter=Parameters) individuals$Sampled <- hasParam stratumTotals <- individuals[,list(totalInStratum=.N, sampledInStratum=sum(Sampled)), by=c("Stratum", "SampleId")] @@ -226,46 +268,55 @@ extractIndividualDesignParametersStoxBiotic <- function(StoxBioticData, Stratifi selectedIndividuals <- individuals[individuals$Sampled,] selectedIndividuals$IndividualId <- selectedIndividuals$Individual selectedIndividuals$Order <- as.numeric(NA) - selectedIndividuals$InclusionProbability <- as.numeric(NA) #Need order. Could possibly be obtained by convention from StoxBioticiIndividual$IndividualKey, would have to be user choice. - selectedIndividuals$SelectionProbability <- 1/selectedIndividuals$N - selectedIndividuals$RelativeSelectionProbability <- 1/selectedIndividuals$N + selectedIndividuals$InclusionProbability <- selectedIndividuals$n/selectedIndividuals$N + selectedIndividuals$HTsamplingWeight <- 1/selectedIndividuals$n + selectedIndividuals$SelectionProbability <- as.numeric(NA) #Need order. Could possibly be obtained by convention from StoxBioticiIndividual$IndividualKey, would have to be user choice. + selectedIndividuals$HHsamplingWeight <- as.numeric(NA) selectedIndividuals$SelectionDescription <- as.character(NA) - selectionTable <- selectedIndividuals[,.SD,.SDcol=c("SampleId", "Stratum", "Order", "IndividualId", "InclusionProbability", "SelectionProbability", "RelativeSelectionProbability", "SelectionDescription")] + selectionTable <- selectedIndividuals[,.SD,.SDcol=c("SampleId", "Stratum", "Order", "IndividualId", "InclusionProbability", "HTsamplingWeight", "SelectionProbability", "HHsamplingWeight", "SelectionDescription")] designParams <- list() designParams$SampleTable <- sampleTable designParams$SelectionTable <- selectionTable - designParams$ObservationVariables <- observationTable designParams$StratificationVariables <- stratificationTable return(designParams) } -#' Define Sub-Sampling Parameters for Individuals +#' Define Sampling Parameters for Individuals #' @description -#' Define approximate sampling design parameters for a sub-sample of individuals. Design parameters are inferred from data provided in ~\code{\link[RstoxData]{StoxBioticData}}, -#' and specify how a set of individuals recorded on the Individual table were selected for observation/measurement from a sample recorded on the Sample table. +#' Define approximate sampling parameters for the selection of individuals from a haul. Design parameters are inferred from data provided in ~\code{\link[RstoxData]{StoxBioticData}}, +#' and specify how a set of individuals recorded on the Individual table were selected for observation/measurement from a Haul (the table Haul in StoxBioticData). #' @details -#' Sampling parameters are approximate inferred, assuming that all selected individuals are recorded, and based on some user-controllable assumptions about the selection process, -#' specified by the appropriate 'DefinitionMethod'. Individuals with a non-missing value for any of the parameters in 'Parameters' are treated as selected for observation. +#' StoxBioticData represents sorting of species as a separate level in the hierarchy (SpeciesCategory) and Samples are selected in Stratified from the species categories. +#' This represent sampling stratified on taxons in addition to some additional stratification criteria in the cases where more than one sample is present for +#' a species-category in a Haul. The exact criteria for stratification is not important for the calculation of sampling parameters, but only clearly encoded criteria can be used +#' in subsequent analysis, so sampling parameters are reported stratified only on SpeciesCategory. Any other stratification has been incorporated into selection or inclusion probabilities. #' -#' The available DefinitionMethods are: +#' Sampling parameters are approximately inferred, assuming that all selected individuals are recorded, and based on some user-controllable assumptions about the selection process, +#' specified by the appropriate 'DefinitionMethod'. +#' +#' Individuals with a non-missing value for any of the parameters in 'Parameters' are treated as selected for observation. +#' In this way selection of individuals may be specified differently for different parameters. +#' For instance one may define one design for length-measurements and another for length-stratified age, weight and sex observations. +#' +#' The available DefinitionMethods specifies how Individuals are selected from a Sample, and are: #' \describe{ -#' \item{SRS}{Simple Random Selection. Individuals are selected for measurment by simple random selection without replacement} -#' \item{Stratified}{Stratified Selection. Individuals are selected for measurement by stratified random selection without replacement. Strata are specified as the combination of columns provided in 'StratificationColumns'. The number of fish in each stratum is estimated by the total in sample and the proportion of measured fish in each stratum.} -#' \item{LengthStratified}{Length stratified selection. Individuals are selected for measurement by stratified random selection without replacement. Strata are length groups, specified by the left closed intervals starting with [0,'LengthInterval'>.} +#' \item{SRS}{Simple Random Selection. Individuals are selected for measurment by simple random selection without replacement from each Sample.} +#' \item{Stratified}{Stratified Selection. Individuals are selected for measurement by stratified random selection without replacement from each Sample. Strata are specified as the combination of columns provided in 'StratificationColumns'. The number of fish in each stratum is estimated by the total in sample and the proportion of measured fish in each stratum.} +#' \item{LengthStratified}{Length stratified selection. Individuals are selected for measurement by stratified random selection without replacement from each Sample. Strata are length groups, specified by the left closed intervals starting with [0,'LengthInterval'>.} #' } #' #' #' @param processData \code{\link[RstoxFDA]{IndividualSamplingParametersData}} as returned from this function. #' @param StoxBioticData Data to define individual sampling parameters for #' @param DefinitionMethod Method to infer sampling parameters, 'SRS', 'Stratified' or 'LengthStratified'. See details. -#' @param Parameters Measurements / observations of individuals included in the design specification. Must all be column on the Individual-table of StoxBioticData. +#' @param Parameters Measurements / observations of individuals included in the design specification. Must all be columns on the Individual-table of StoxBioticData. #' @param LengthInterval width of length strata in cm. Specifies left closed intervals used for Length Stratified selection (DefinitionMethod 'Stratified'). A value of 5 indicates that observation are selected stratified on length groups [0 cm,5 cm>, [5 cm, 10 cm>, and so on. #' @param StratificationColumns names of columns in the Individual table of StoxBioticData that identify strata for Stratified selection (DefinitionMethod 'Stratified'). #' @param UseProcessData If TRUE, bypasses execution of function and returns existing 'processData' -#' @return \code{\link[RstoxFDA]{IndividualSamplingParametersData}} +#' @return \code{\link[RstoxFDA]{IndividualSamplingParametersData}} where SampleId refers to the variable 'Haul' on the 'Haul' table in StoxBioticData, and IndividualId refers to the variable 'Individual' on the 'Individual' table of StoxBioticData. #' @export #' @concept StoX-functions #' @concept Analytical estimation @@ -295,8 +346,6 @@ DefineIndividualSamplingParameters <- function(processData, StoxBioticData, Defi if (isGiven(LengthInterval) | isGiven(StratificationColumns)){ stop("The arguments 'LengthInterval' and 'StratificationColumns' should not be provided in combination with DefinitionMethod SRS.") } - StoxBioticData$Individual$Stratum <- rep("All", nrow(StoxBioticData$Individual)) - StratificationColumns <- c("Stratum") } if (DefinitionMethod == "LengthStratified"){ @@ -309,6 +358,12 @@ DefineIndividualSamplingParameters <- function(processData, StoxBioticData, Defi if (LengthInterval <=0){ stop("LengthInterval must be a positive value.") } + if ("IndividualTotalLength" %in% Parameters){ + stop("'IndividualTotalLength' may not be among the variables in 'Parameters' for length-stratified sampling.") + } + if ("LengthStratum" %in% Parameters){ + stop("'LengthStratum' may not be used as a 'Parameter' with DefinitionMethod 'LengthStratified'. Consider renaming or using the DefinitionMethod 'Stratified'") + } if (any(is.na(StoxBioticData$Individual$IndividualTotalLength))){ missing <- StoxBioticData$Individual$Individual[is.na(StoxBioticData$Individual$IndividualTotalLength)] if (length(missing)>5){ @@ -342,7 +397,8 @@ DefineIndividualSamplingParameters <- function(processData, StoxBioticData, Defi params <- extractIndividualDesignParametersStoxBiotic(StoxBioticData, StratificationColumns, Parameters) if (CollapseStrata){ - params <- collapseStrataIndividualDesignParamaters(params) + #SpeciesCategory is added to Stratification in extractIndividualDesignParametersStoxBiotic, and is not retained (not in 'StratificationColumns') + params <- collapseStrataIndividualDesignParamaters(params, StratificationColumns) } return(params) @@ -364,4 +420,4 @@ AssignPSUDesignParameters <- function(MultiStageSamplingParametersData){ DefinePSUCoInclusionProbabilities <- function(){} #' @noRd -ProbabilisticSuperIndividuals <- function(){} \ No newline at end of file +ProbabilisticSuperIndividuals <- function(){} diff --git a/R/StoxDataTypes.R b/R/StoxDataTypes.R index 8e4e9629..8d50e80c 100644 --- a/R/StoxDataTypes.R +++ b/R/StoxDataTypes.R @@ -39,8 +39,9 @@ is.Date <- function(date){ #' \item{Order}{Optional, num: Identifes the order of seleciton. May be necessary for inference when selections are not independent (e.g. FSWOR)} #' \item{SamplingUnitId}{Optional, chr: Identifes sampling unit. NA encodes non-response} #' \item{InclusionProbability}{Optional, num: The inclusion probability of the sampling unit} +#' \item{HTsamplingWeight}{Optional, num: The relative inclusion probability of the sampling unit} #' \item{SelectionProbability}{Optional, num: The selection probability of the sampling unit} -#' \item{RelativeSelectionProbability}{Optional, num: The relative selection probability of the sampling unit} +#' \item{HHsamplingWeight}{Optional, num: The relative selection probability of the sampling unit} #' \item{SelectionDescription}{Optional, chr: Free text description of sampling unit.} #' } #' @@ -59,9 +60,10 @@ is.Date <- function(date){ #' \item{FSWOR}{Fixed sample size with replacement. A random selection of a fixed sample size 'n' is chosen without replacement. Order of selection should be specified in the 'selectionTable'} #' } #' -#' The SelectionProbability is defined as: The probability of selecting the sampling unit in a random draw from the population. -#' The RelativeSelectionProbability: A value proportional to the SelectionProbability. SelectionProbability=c*RelativeSelectionProbability, with c constant within a stratum, but possibly unknown. +#' The SelectionProbability is defined as: The probability of selecting the sampling unit when it was selected from the population. +#' The HHsamplingWeight: The normalized sampling weight, or the fraction of the stratum represented by the sampled unit when estimating with the Hansen-Hurwitz strategy: 1 / (SelectionProbability*Q) , where Q is the sum of the reciprocal of the SelectionProbabilites for the sampled units. For equal probability sampling with replacement, this is simply 1/n, where n i sample size. #' The InclusionProbability is defined as: The probability of the sampling unit being included in the sample. +#' The HTsamplingWeight: The normalized sampling weight, or the fraction of the stratum represented by the sample when estimating with the Horvitz-Thompson strategy: 1 / (InclusionProbability*P), where P is the sum of the reciprocal of the InclusionProbabilites for the sampled units. For equal probability sampling without replacement, this is simply 1/n, where n is sample size. #' #' @name MultiStageSamplingParametersData #' @concept Data types @@ -87,7 +89,7 @@ is.MultiStageSamplingParametersData <- function(MultiStageSamplingParametersData if (!all(c("Stratum", "N", "n", "SelectionMethod", "FrameDescription") %in% names(MultiStageSamplingParametersData$SampleTable))){ return(FALSE) } - if (!all(c("Stratum", "Order", "SamplingUnitId", "InclusionProbability", "SelectionProbability", "RelativeSelectionProbability", "SelectionDescription") %in% names(MultiStageSamplingParametersData$SelectionTable))){ + if (!all(c("Stratum", "Order", "SamplingUnitId", "InclusionProbability", "SelectionProbability", "HHsamplingWeight", "SelectionDescription") %in% names(MultiStageSamplingParametersData$SelectionTable))){ return(FALSE) } if (!all(c("Stratum") %in% names(MultiStageSamplingParametersData$StratificationVariables))){ @@ -152,17 +154,13 @@ is.MultiStageSamplingParametersData <- function(MultiStageSamplingParametersData #' \item{Stratum}{Mandatory, chr: Identifies the within sample-stratum the individual is taken from.} #' \item{Order}{Optional, num: Identifes the order of seleciton. May be necessary for inference when selections are not independent (e.g. FSWOR)} #' \item{IndividualId}{Optional, chr: Identifes individual. NA encodes non-response / observation failure} -#' \item{InclusionProbability}{Optional, num: The inclusion probability of the individual with respect to observing the parameters in the 'observationVariables' table} -#' \item{SelectionProbability}{Optional, num: The selection probability of the individual with respect to observing the parameters in the 'observationVariables' table} -#' \item{RelativeSelectionProbability}{Optional, num: The relative selection probability of the individual with respect to observing the parameters in the 'observationVariables' table} +#' \item{HTsamplingWeight}{Optional, num: The relative inclusion probability of the individual} +#' \item{InclusionProbability}{Optional, num: The inclusion probability of the individual} +#' \item{SelectionProbability}{Optional, num: The selection probability of the individual} +#' \item{HHsamplingWeight}{Optional, num: The relative selection probability of the individual} #' \item{SelectionDescription}{Optional, chr: Free text description of sampling unit.} #' } #' -#' The ObservationVariables table specifies which set of variables the design is specified for: -#' \describe{ -#' \item{Parameter}{Mandatory, chr: Name of parameter selected for observation.} -#' } -#' #' The StratificationVariables table encodes information about which columns in the sampleTable are stratification variables (if any): #' \describe{ #' \item{SampleId}{Mandatory, chr: Identifies the sample the stratification applies to} @@ -179,9 +177,10 @@ is.MultiStageSamplingParametersData <- function(MultiStageSamplingParametersData #' \item{FSWOR}{Fixed sample size with replacement. A random selection of a fixed sample size 'n' is chosen without replacement. Order of selection should be specified in the 'selectionTable'} #' } #' -#' The SelectionProbability is defined as: The probability of selecting the sampling unit in a random draw from the population. -#' The RelativeSelectionProbability: A value proportional to the SelectionProbability. SelectionProbability=c*RelativeSelectionProbability, with c constant within a stratum, but possibly unknown. +#' The SelectionProbability is defined as: The probability of selecting the sampling unit when it was selected from the population. +#' The HHsamplingWeight: The normalized sampling weight, or the fraction of the stratum represented by the sampled unit when estimating with the Hansen-Hurwitz strategy: 1 / (SelectionProbability*Q) , where Q is the sum of the reciprocal of the SelectionProbabilites for the sampled units. For equal probability sampling with replacement, this is simply 1/n, where n i sample size. #' The InclusionProbability is defined as: The probability of the sampling unit being included in the sample. +#' The HTsamplingWeight: The normalized sampling weight, or the fraction of the stratum represented by the sample when estimating with the Horvitz-Thompson strategy: 1 / (InclusionProbability*P), where P is the sum of the reciprocal of the InclusionProbabilites for the sampled units. For equal probability sampling without replacement, this is simply 1/n, where n is sample size. #' #' @name IndividualSamplingParametersData #' @concept Data types @@ -195,28 +194,24 @@ NULL #' @concept Data types #' @noRd is.IndividualSamplingParametersData <- function(IndividualSamplingParametersData){ - if (!is.list(IndividualSamplingParametersData)){ return(FALSE) } if (!all(sapply(IndividualSamplingParametersData, data.table::is.data.table))){ return(FALSE) } - if (!all(c("SampleTable", "SelectionTable", "StratificationVariables", "ObservationVariables") %in% names(IndividualSamplingParametersData))){ + if (!all(c("SampleTable", "SelectionTable", "StratificationVariables") %in% names(IndividualSamplingParametersData))){ return(FALSE) } if (!all(c("SampleId", "Stratum", "N", "n", "SelectionMethod", "SampleDescription") %in% names(IndividualSamplingParametersData$SampleTable))){ return(FALSE) } - if (!all(c("SampleId", "Stratum", "Order", "IndividualId", "InclusionProbability", "SelectionProbability", "RelativeSelectionProbability", "SelectionDescription") %in% names(IndividualSamplingParametersData$SelectionTable))){ + if (!all(c("SampleId", "Stratum", "Order", "IndividualId", "InclusionProbability", "HTsamplingWeight", "SelectionProbability", "HHsamplingWeight", "SelectionDescription") %in% names(IndividualSamplingParametersData$SelectionTable))){ return(FALSE) } if (!all(c("Stratum") %in% names(IndividualSamplingParametersData$StratificationVariables))){ return(FALSE) } - if (!all(c("Parameter") %in% names(IndividualSamplingParametersData$ObservationVariables))){ - return(FALSE) - } if (any(duplicated(paste(IndividualSamplingParametersData$SampleTable$Stratum, IndividualSamplingParametersData$SampleTable$SampleId)))){ return(FALSE) } @@ -249,7 +244,7 @@ is.IndividualSamplingParametersData <- function(IndividualSamplingParametersData } if (ncol(IndividualSamplingParametersData$StratificationVariables) > 2){ - stratificationVariableStrings <- apply(IndividualSamplingParametersData$StratificationVariables[,.SD, .SDcol=names(IndividualSamplingParametersData$StratificationVariables)[!(names(IndividualSamplingParametersData$StratificationVariables) %in% c("Stratum", "SampleId"))]], 1, paste, collapse="/") + stratificationVariableStrings <- apply(IndividualSamplingParametersData$StratificationVariables[,.SD, .SDcol=names(IndividualSamplingParametersData$StratificationVariables)[!(names(IndividualSamplingParametersData$StratificationVariables) %in% c("Stratum"))]], 1, paste, collapse="/") duplicatedStrata <- IndividualSamplingParametersData$StratificationVariables$Stratum[duplicated(stratificationVariableStrings)] if (length(duplicatedStrata)>0){ diff --git a/inst/dataPrepScripts/prepDesignParameters.R b/inst/dataPrepScripts/prepDesignParameters.R index 7b8c5fe4..1049b69e 100644 --- a/inst/dataPrepScripts/prepDesignParameters.R +++ b/inst/dataPrepScripts/prepDesignParameters.R @@ -75,19 +75,20 @@ prepDesignParamFile <- function(lotteryParams, StoxBiotic, platformCodes, maxDif stationTable$description <- paste(stationTable$lotteri, stationTable$HIF.stratum, stationTable$lotteryMessage, sep="/") stationTable$SelectionProbability <- stationTable$lotteri.kg/(stationTable$kvoteT*1000) - stationTable$RelativeSelectionProbability <- stationTable$SelectionProbability - selectionTable <- stationTable[,.SD, .SDcol=c("HIF.stratum", "HaulKey", "i.prob", "SelectionProbability", "RelativeSelectionProbability", "kapasitet", "description")] - names(selectionTable) <- c("Stratum", "SamplingUnitId", "InclusionProbability", "SelectionProbability", "RelativeSelectionProbability", "n", "SelectionDescription") + stationTable$HHsamplingWeight <- 1 / (stationTable$SelectionProbability * sum(1/stationTable$SelectionProbability)) + selectionTable <- stationTable[,.SD, .SDcol=c("HIF.stratum", "HaulKey", "i.prob", "SelectionProbability", "HHsamplingWeight", "kapasitet", "description")] + names(selectionTable) <- c("Stratum", "SamplingUnitId", "InclusionProbability", "SelectionProbability", "HHsamplingWeight", "n", "SelectionDescription") if (length(unique(stationTable$kapasitet))!=1){ selectionTable$SelectionProbability <- as.numeric(NA) - stationTable$RelativeSelectionProbability <- as.numeric(NA) + stationTable$HHsamplingWeight <- as.numeric(NA) } selectionTable$Order <- as.numeric(NA) - selectionTable <- selectionTable[, .SD, .SDcol=c("Stratum", "Order", "SamplingUnitId", "InclusionProbability", "SelectionProbability", "RelativeSelectionProbability", "SelectionDescription")] + selectionTable$HTsamplingWeight <- 1 / (selectionTable$InclusionProbability * sum(1/selectionTable$InclusionProbability)) + selectionTable <- selectionTable[, .SD, .SDcol=c("Stratum", "Order", "SamplingUnitId", "InclusionProbability", "HTsamplingWeight", "SelectionProbability", "HHsamplingWeight", "SelectionDescription")] selectionTable$SelectionDescription <- as.character(NA) #remove vessel identifying descriptions stopifnot(length(unique(stationTable$HIF.stratum))==1) - sampleTable <- data.table(Stratum = stationTable$HIF.stratum[[1]], N = sum(!is.na(lotteryParams$i.prob))) + sampleTable <- data.table::data.table(Stratum = stationTable$HIF.stratum[[1]], N = sum(!is.na(lotteryParams$i.prob))) sampleTable$n <- as.numeric(NA) if (length(unique(stationTable$kapasitet))==1){ sampleTable$n <- stationTable$kapasitet[[1]] @@ -115,4 +116,4 @@ bioData <- RstoxData::StoxBiotic(RstoxData::ReadBiotic("~/bioticsets/lotteriekse platformCodes <- readxl::read_excel("~/codelists/NMDeksempler/platform.xlsx", 2) designParams <- prepDesignParamFile(lotteryParams, bioData, platformCodes) -saveDesignTable("inst/testresources/lotteryParameters/lotteryDesignNSH.txt", designParams) \ No newline at end of file +saveDesignTable("inst/testresources/lotteryParameters/lotteryDesignNSH.txt", designParams) diff --git a/inst/testresources/lotteryParameters/lotteryDesignNSH.txt b/inst/testresources/lotteryParameters/lotteryDesignNSH.txt index b1d00a0c..0abffca8 100644 --- a/inst/testresources/lotteryParameters/lotteryDesignNSH.txt +++ b/inst/testresources/lotteryParameters/lotteryDesignNSH.txt @@ -1,65 +1,65 @@ -Stratum N n SelectionMethod FrameDescription Order SamplingUnitId InclusionProbability SelectionProbability RelativeSelectionProbability SelectionDescription -Nordsjo 811 110 Poisson Sild2022 38401 0.213196915139625 0.00217741935483871 0.00217741935483871 -Nordsjo 811 110 Poisson Sild2022 38433 0.247412405326388 0.00258064516129032 0.00258064516129032 -Nordsjo 811 110 Poisson Sild2022 38440 0.1700984890555 0.00169354838709677 0.00169354838709677 -Nordsjo 811 110 Poisson Sild2022 38445 0.108975250145035 0.00104838709677419 0.00104838709677419 -Nordsjo 811 110 Poisson Sild2022 38438 0.641175307024414 0.0092741935483871 0.0092741935483871 -Nordsjo 811 110 Poisson Sild2022 38441 0.233906828422691 0.00241935483870968 0.00241935483870968 -Nordsjo 811 110 Poisson Sild2022 38448 0.267229615158194 0.00282258064516129 0.00282258064516129 -Nordsjo 811 110 Poisson Sild2022 38403 0.0767615148687669 0.000725806451612903 0.000725806451612903 -Nordsjo 811 110 Poisson Sild2022 38435 0.143125007547176 0.00140322580645161 0.00140322580645161 -Nordsjo 811 110 Poisson Sild2022 38436 0.147680162555427 0.00145161290322581 0.00145161290322581 -Nordsjo 811 110 Poisson Sild2022 38443 0.0182839435244132 0.000167741935483871 0.000167741935483871 -Nordsjo 811 110 Poisson Sild2022 38446 0.267229615158194 0.00282258064516129 0.00282258064516129 -Nordsjo 811 110 Poisson Sild2022 38447 0.247412405326388 0.00258064516129032 0.00258064516129032 -Nordsjo 811 110 Poisson Sild2022 38402 0.108975250145035 0.00104838709677419 0.00104838709677419 -Nordsjo 811 110 Poisson Sild2022 38434 0.220161129326957 0.00225806451612903 0.00225806451612903 -Nordsjo 811 110 Poisson Sild2022 38442 0.068529393639208 0.000645161290322581 0.000645161290322581 -Nordsjo 811 110 Poisson Sild2022 38444 0.147680162555427 0.00145161290322581 0.00145161290322581 -Nordsjo 811 110 Poisson Sild2022 38437 0.147680162555427 0.00145161290322581 0.00145161290322581 -Nordsjo 811 110 Poisson Sild2022 38439 0.311481476435603 0.00338709677419355 0.00338709677419355 -Nordsjo 811 110 Poisson Sild2022 38431 0.0889747087440887 0.000846774193548387 0.000846774193548387 -Nordsjo 811 110 Poisson Sild2022 38415 0.136247006433322 0.00133064516129032 0.00133064516129032 -Nordsjo 811 110 Poisson Sild2022 38406 0.097027756531011 0.00092741935483871 0.00092741935483871 -Nordsjo 811 110 Poisson Sild2022 38417 0.101027788104015 0.000967741935483871 0.000967741935483871 -Nordsjo 811 110 Poisson Sild2022 38419 0.0849215381190344 0.000806451612903226 0.000806451612903226 -Nordsjo 811 110 Poisson Sild2022 38408 0.184718409696934 0.00185483870967742 0.00185483870967742 -Nordsjo 811 110 Poisson Sild2022 38412 0.216686691440281 0.00221774193548387 0.00221774193548387 -Nordsjo 811 110 Poisson Sild2022 38422 0.0518463272604551 0.000483870967741936 0.000483870967741936 -Nordsjo 811 110 Poisson Sild2022 38432 0.162691408611336 0.00161290322580645 0.00161290322580645 -Nordsjo 811 110 Poisson Sild2022 38429 0.338519898362406 0.00375 0.00375 -Nordsjo 811 110 Poisson Sild2022 38404 0.162691408611336 0.00161290322580645 0.00161290322580645 -Nordsjo 811 110 Poisson Sild2022 38413 0.097027756531011 0.00092741935483871 0.00092741935483871 -Nordsjo 811 110 Poisson Sild2022 38414 0.240689370659604 0.0025 0.0025 -Nordsjo 811 110 Poisson Sild2022 38405 0.267229615158194 0.00282258064516129 0.00282258064516129 -Nordsjo 811 110 Poisson Sild2022 38411 0.458713264018625 0.00556451612903226 0.00556451612903226 -Nordsjo 811 110 Poisson Sild2022 38416 0.116853084910627 0.00112903225806452 0.00112903225806452 -Nordsjo 811 110 Poisson Sild2022 38427 0.589628784636336 0.00806451612903226 0.00806451612903226 -Nordsjo 811 110 Poisson Sild2022 38409 0.162691408611336 0.00161290322580645 0.00161290322580645 -Nordsjo 811 110 Poisson Sild2022 38424 0.347296770955766 0.00387096774193548 0.00387096774193548 -Nordsjo 811 110 Poisson Sild2022 38421 0.116853084910627 0.00112903225806452 0.00112903225806452 -Nordsjo 811 110 Poisson Sild2022 38430 0.116853084910627 0.00112903225806452 0.00112903225806452 -Nordsjo 811 110 Poisson Sild2022 38426 0.233906828422691 0.00241935483870968 0.00241935483870968 -Nordsjo 811 110 Poisson Sild2022 38407 0.068529393639208 0.000645161290322581 0.000645161290322581 -Nordsjo 811 110 Poisson Sild2022 38418 0.229685923270579 0.00236952419354839 0.00236952419354839 -Nordsjo 811 110 Poisson Sild2022 38428 0.143125007547176 0.00140322580645161 0.00140322580645161 -Nordsjo 811 110 Poisson Sild2022 38420 0.0425447856855458 0.000395161290322581 0.000395161290322581 -Nordsjo 811 110 Poisson Sild2022 38423 0.463520590962966 0.00564516129032258 0.00564516129032258 -Nordsjo 811 110 Poisson Sild2022 38410 0.267034025829916 0.00282016129032258 0.00282016129032258 -Nordsjo 811 110 Poisson Sild2022 38425 0.0930100890906527 0.000887096774193548 0.000887096774193548 -Nordsjo 811 110 Poisson Sild2022 0.199083079634323 0.00201612903225806 0.00201612903225806 -Nordsjo 811 110 Poisson Sild2022 0.1700984890555 0.00169354838709677 0.00169354838709677 -Nordsjo 811 110 Poisson Sild2022 0.1700984890555 0.00169354838709677 0.00169354838709677 -Nordsjo 811 110 Poisson Sild2022 0.174511532419619 0.00174193548387097 0.00174193548387097 -Nordsjo 811 110 Poisson Sild2022 0.202634883401628 0.00205645161290323 0.00205645161290323 -Nordsjo 811 110 Poisson Sild2022 0.068529393639208 0.000645161290322581 0.000645161290322581 -Nordsjo 811 110 Poisson Sild2022 0.132402285228299 0.00129032258064516 0.00129032258064516 -Nordsjo 811 110 Poisson Sild2022 0.101027788104015 0.000967741935483871 0.000967741935483871 -Nordsjo 811 110 Poisson Sild2022 0.0808504995083754 0.000766129032258065 0.000766129032258065 -Nordsjo 811 110 Poisson Sild2022 0.140074844132902 0.00137096774193548 0.00137096774193548 -Nordsjo 811 110 Poisson Sild2022 0.0552064585338686 0.000516129032258065 0.000516129032258065 -Nordsjo 811 110 Poisson Sild2022 0.112922833036567 0.00108870967741935 0.00108870967741935 -Nordsjo 811 110 Poisson Sild2022 0.386753608156008 0.00443548387096774 0.00443548387096774 -Nordsjo 811 110 Poisson Sild2022 0.413480846839946 0.00483870967741936 0.00483870967741936 -Nordsjo 811 110 Poisson Sild2022 0.247412405326388 0.00258064516129032 0.00258064516129032 -Nordsjo 811 110 Poisson Sild2022 0.0518463272604551 0.000483870967741936 0.000483870967741936 +Stratum N n SelectionMethod FrameDescription Order SamplingUnitId InclusionProbability HTsamplingWeight SelectionProbability HHsamplingWeight SelectionDescription +Nordsjo 811 110 Poisson Sild2022 38401 0.213196915139625 0.00894691694298864 0.00217741935483871 0.00849720338523697 +Nordsjo 811 110 Poisson Sild2022 38433 0.247412405326388 0.00770961783318542 0.00258064516129032 0.00716951535629369 +Nordsjo 811 110 Poisson Sild2022 38440 0.1700984890555 0.0112138273705256 0.00169354838709677 0.010924975781019 +Nordsjo 811 110 Poisson Sild2022 38445 0.108975250145035 0.0175035624118044 0.00104838709677419 0.0176480378001076 +Nordsjo 811 110 Poisson Sild2022 38438 0.641175307024414 0.00297493535911075 0.0092741935483871 0.00199499557740346 +Nordsjo 811 110 Poisson Sild2022 38441 0.233906828422691 0.00815476446377476 0.00241935483870968 0.00764748304671327 +Nordsjo 811 110 Poisson Sild2022 38448 0.267229615158194 0.00713788810842111 0.00282258064516129 0.00655498546861138 +Nordsjo 811 110 Poisson Sild2022 38403 0.0767615148687669 0.0248491069452791 0.000725806451612903 0.0254916101557109 +Nordsjo 811 110 Poisson Sild2022 38435 0.143125007547176 0.0133271964483698 0.00140322580645161 0.0131853155977815 +Nordsjo 811 110 Poisson Sild2022 38436 0.147680162555427 0.012916122648089 0.00145161290322581 0.0127458050778555 +Nordsjo 811 110 Poisson Sild2022 38443 0.0182839435244132 0.104324052943433 0.000167741935483871 0.110300236250672 +Nordsjo 811 110 Poisson Sild2022 38446 0.267229615158194 0.00713788810842111 0.00282258064516129 0.00655498546861138 +Nordsjo 811 110 Poisson Sild2022 38447 0.247412405326388 0.00770961783318542 0.00258064516129032 0.00716951535629369 +Nordsjo 811 110 Poisson Sild2022 38402 0.108975250145035 0.0175035624118044 0.00104838709677419 0.0176480378001076 +Nordsjo 811 110 Poisson Sild2022 38434 0.220161129326957 0.00866390492312064 0.00225806451612903 0.00819373183576422 +Nordsjo 811 110 Poisson Sild2022 38442 0.068529393639208 0.0278341160042645 0.000645161290322581 0.0286780614251748 +Nordsjo 811 110 Poisson Sild2022 38444 0.147680162555427 0.012916122648089 0.00145161290322581 0.0127458050778555 +Nordsjo 811 110 Poisson Sild2022 38437 0.147680162555427 0.012916122648089 0.00145161290322581 0.0127458050778555 +Nordsjo 811 110 Poisson Sild2022 38439 0.311481476435603 0.0061238154964569 0.00338709677419355 0.00546248789050948 +Nordsjo 811 110 Poisson Sild2022 38431 0.0889747087440887 0.0214381718038762 0.000846774193548387 0.0218499515620379 +Nordsjo 811 110 Poisson Sild2022 38415 0.136247006433322 0.0139999779972348 0.00133064516129032 0.0139045146303878 +Nordsjo 811 110 Poisson Sild2022 38406 0.097027756531011 0.0196588601081999 0.00092741935483871 0.0199499557740346 +Nordsjo 811 110 Poisson Sild2022 38417 0.101027788104015 0.0188804993957877 0.000967741935483871 0.0191187076167832 +Nordsjo 811 110 Poisson Sild2022 38419 0.0849215381190344 0.0224613818179075 0.000806451612903226 0.0229424491401398 +Nordsjo 811 110 Poisson Sild2022 38408 0.184718409696934 0.0103262858065158 0.00185483870967742 0.00997497788701731 +Nordsjo 811 110 Poisson Sild2022 38412 0.216686691440281 0.00880282531233035 0.00221774193548387 0.00834270877823266 +Nordsjo 811 110 Poisson Sild2022 38422 0.0518463272604551 0.0367905537970575 0.000483870967741936 0.0382374152335664 +Nordsjo 811 110 Poisson Sild2022 38432 0.162691408611336 0.0117243750517427 0.00161290322580645 0.0114712245700699 +Nordsjo 811 110 Poisson Sild2022 38429 0.338519898362406 0.00563469119978754 0.00375 0.0049338600301376 +Nordsjo 811 110 Poisson Sild2022 38404 0.162691408611336 0.0117243750517427 0.00161290322580645 0.0114712245700699 +Nordsjo 811 110 Poisson Sild2022 38413 0.097027756531011 0.0196588601081999 0.00092741935483871 0.0199499557740346 +Nordsjo 811 110 Poisson Sild2022 38414 0.240689370659604 0.00792496605491253 0.0025 0.00740079004520639 +Nordsjo 811 110 Poisson Sild2022 38405 0.267229615158194 0.00713788810842111 0.00282258064516129 0.00655498546861138 +Nordsjo 811 110 Poisson Sild2022 38411 0.458713264018625 0.00415827324360556 0.00556451612903226 0.00332499262900577 +Nordsjo 811 110 Poisson Sild2022 38416 0.116853084910627 0.0163235321832924 0.00112903225806452 0.0163874636715284 +Nordsjo 811 110 Poisson Sild2022 38427 0.589628784636336 0.00323501013172564 0.00806451612903226 0.00229424491401398 +Nordsjo 811 110 Poisson Sild2022 38409 0.162691408611336 0.0117243750517427 0.00161290322580645 0.0114712245700699 +Nordsjo 811 110 Poisson Sild2022 38424 0.347296770955766 0.00549229146878123 0.00387096774193548 0.0047796769041958 +Nordsjo 811 110 Poisson Sild2022 38421 0.116853084910627 0.0163235321832924 0.00112903225806452 0.0163874636715284 +Nordsjo 811 110 Poisson Sild2022 38430 0.116853084910627 0.0163235321832924 0.00112903225806452 0.0163874636715284 +Nordsjo 811 110 Poisson Sild2022 38426 0.233906828422691 0.00815476446377476 0.00241935483870968 0.00764748304671327 +Nordsjo 811 110 Poisson Sild2022 38407 0.068529393639208 0.0278341160042645 0.000645161290322581 0.0286780614251748 +Nordsjo 811 110 Poisson Sild2022 38418 0.229685923270579 0.00830462339656995 0.00236952419354839 0.00780830816726504 +Nordsjo 811 110 Poisson Sild2022 38428 0.143125007547176 0.0133271964483698 0.00140322580645161 0.0131853155977815 +Nordsjo 811 110 Poisson Sild2022 38420 0.0425447856855458 0.044834051024581 0.000395161290322581 0.0468213247757956 +Nordsjo 811 110 Poisson Sild2022 38423 0.463520590962966 0.00411514640221889 0.00564516129032258 0.00327749273430569 +Nordsjo 811 110 Poisson Sild2022 38410 0.267034025829916 0.00714311626141064 0.00282016129032258 0.00656060884762363 +Nordsjo 811 110 Poisson Sild2022 38425 0.0930100890906527 0.0205080449971025 0.000887096774193548 0.0208567719455817 +Nordsjo 811 110 Poisson Sild2022 0.199083079634323 0.00958120145498676 0.00201612903225806 0.00917697965605593 +Nordsjo 811 110 Poisson Sild2022 0.1700984890555 0.0112138273705256 0.00169354838709677 0.010924975781019 +Nordsjo 811 110 Poisson Sild2022 0.1700984890555 0.0112138273705256 0.00169354838709677 0.010924975781019 +Nordsjo 811 110 Poisson Sild2022 0.174511532419619 0.0109302523782158 0.00174193548387097 0.0106215042315462 +Nordsjo 811 110 Poisson Sild2022 0.202634883401628 0.00941326123239596 0.00205645161290323 0.0089970388784862 +Nordsjo 811 110 Poisson Sild2022 0.068529393639208 0.0278341160042645 0.000645161290322581 0.0286780614251748 +Nordsjo 811 110 Poisson Sild2022 0.132402285228299 0.0144065118586634 0.00129032258064516 0.0143390307125874 +Nordsjo 811 110 Poisson Sild2022 0.101027788104015 0.0188804993957877 0.000967741935483871 0.0191187076167832 +Nordsjo 811 110 Poisson Sild2022 0.0808504995083754 0.0235923723892148 0.000766129032258065 0.0241499464633051 +Nordsjo 811 110 Poisson Sild2022 0.140074844132902 0.0136173993557747 0.00137096774193548 0.0134955583177293 +Nordsjo 811 110 Poisson Sild2022 0.0552064585338686 0.0345513032879191 0.000516129032258065 0.0358475767814685 +Nordsjo 811 110 Poisson Sild2022 0.112922833036567 0.0168916687702827 0.00108870967741935 0.0169944067704739 +Nordsjo 811 110 Poisson Sild2022 0.386753608156008 0.00493196456873441 0.00443548387096774 0.00417135438911633 +Nordsjo 811 110 Poisson Sild2022 0.413480846839946 0.00461316432631274 0.00483870967741936 0.00382374152335664 +Nordsjo 811 110 Poisson Sild2022 0.247412405326388 0.00770961783318542 0.00258064516129032 0.00716951535629369 +Nordsjo 811 110 Poisson Sild2022 0.0518463272604551 0.0367905537970575 0.000483870967741936 0.0382374152335664 diff --git a/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R b/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R index 0b98abcf..b45e1002 100644 --- a/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R +++ b/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R @@ -7,6 +7,8 @@ expect_equal(nrow(designParams$SelectionTable), 64) expect_equal(nrow(designParams$SampleTable), 1) expect_equal(ncol(designParams$StratificationVariables), 1) expect_equal(nrow(designParams$StratificationVariables), 1) +expect_equal(sum(designParams$SelectionTable$HTsamplingWeight), 1) +expect_equal(sum(designParams$SelectionTable$HHsamplingWeight), 1) #define from data suppressWarnings(StoxBioticData <- RstoxData::StoxBiotic(RstoxData::ReadBiotic(system.file("testresources", "biotic_v3_example.xml", package="RstoxFDA")))) @@ -21,21 +23,36 @@ expect_equal(nrow(designParamsSB$SampleTable), 2) expect_equal(ncol(designParamsSB$StratificationVariables), 2) expect_equal(nrow(designParamsSB$StratificationVariables), 2) +# +# Prepare dataset with sub-sampled parameters +# +ds <- RstoxFDA::StoxBioticDataExample +ds$Individual$IndividualAge[rep(c(TRUE,FALSE), nrow(ds$Individual)/2)] <- as.numeric(NA) +ds$Individual$IndividualRoundWeight[rep(c(TRUE,FALSE), nrow(ds$Individual)/2)] <- as.numeric(NA) +ds$Sample$CatchFractionNumber[is.na(ds$Sample$CatchFractionNumber)] <- 1000 + #Define Individual design, SRS -expect_error(DefineIndividualSamplingParameters(NULL, RstoxFDA::StoxBioticDataExample, "SRS")) -srs <- DefineIndividualSamplingParameters(NULL, RstoxFDA::StoxBioticDataExample, "SRS", c("IndividualAge", "IndividualTotalLength", "IndividualRoundWeight")) -RstoxFDA:::is.IndividualSamplingParametersData(srs) +expect_error(DefineIndividualSamplingParameters(NULL, ds, "SRS")) +srs <- DefineIndividualSamplingParameters(NULL, ds, "SRS", c("IndividualAge", "IndividualTotalLength", "IndividualRoundWeight")) +expect_true(RstoxFDA:::is.IndividualSamplingParametersData(srs)) +weights <- srs$SelectionTable[,list(meanN=sum(HTsamplingWeight)), by=c("Stratum", "SampleId")] +expect_true(all(abs(weights$meanN-1) < 1e-6)) #Define Individual design, Length stratified -ls<-DefineIndividualSamplingParameters(NULL, RstoxFDA::StoxBioticDataExample, "LengthStratified", c("IndividualAge", "IndividualTotalLength", "IndividualRoundWeight"), LengthInterval = 5) -RstoxFDA:::is.IndividualSamplingParametersData(ls) +expect_error(DefineIndividualSamplingParameters(NULL, ds, "LengthStratified", c("IndividualAge", "IndividualTotalLength", "IndividualRoundWeight"), LengthInterval = 5), "'IndividualTotalLength' may not be among the variables in 'Parameters' for length-stratified sampling.") +ls<-DefineIndividualSamplingParameters(NULL, ds, "LengthStratified", c("IndividualAge", "IndividualRoundWeight"), LengthInterval = 5) +expect_true(RstoxFDA:::is.IndividualSamplingParametersData(ls)) +weights <- ls$SelectionTable[,list(meanN=sum(HTsamplingWeight)), by=c("Stratum", "SampleId")] +expect_true(all(abs(weights$meanN-1) < 1e-6)) -#Define Individual design, stratrifed, setting strata by length as in Length stratified -bioStrat <- RstoxFDA::StoxBioticDataExample +#Define Individual design, stratified, setting strata by length as in Length stratified +bioStrat <- ds bioStrat$Individual$LStrat <- as.character(cut(bioStrat$Individual$IndividualTotalLength, seq(0,max(bioStrat$Individual$IndividualTotalLength)+5,5), right = F)) -ss<-DefineIndividualSamplingParameters(NULL, bioStrat, "Stratified", c("IndividualAge", "IndividualTotalLength", "IndividualRoundWeight"), StratificationColumns = c("LStrat")) -RstoxFDA:::is.IndividualSamplingParametersData(ss) +ss<-DefineIndividualSamplingParameters(NULL, bioStrat, "Stratified", c("IndividualAge", "IndividualRoundWeight"), StratificationColumns = c("LStrat")) +expect_true(RstoxFDA:::is.IndividualSamplingParametersData(ss)) +weights <- ss$SelectionTable[,list(meanN=sum(HTsamplingWeight)), by=c("Stratum", "SampleId")] +expect_true(all(abs(weights$meanN-1) < 1e-6)) #check that length stratified and stratified is consistent. -expect_equal(ss$SelectionTable$SelectionProbability[[1]], ls$SelectionTable$SelectionProbability[[1]]) -expect_true(srs$SelectionTable$SelectionProbability[[1]] != ls$SelectionTable$SelectionProbability[[1]]) +expect_equal(ss$SelectionTable$InclusionProbability[[4]], ls$SelectionTable$InclusionProbability[[4]]) +expect_true(srs$SelectionTable$InclusionProbability[[4]] != ls$SelectionTable$InclusionProbability[[4]]) expect_equal(nrow(ss$SampleTable), nrow(ls$SampleTable)) diff --git a/man/DefineIndividualSamplingParameters.Rd b/man/DefineIndividualSamplingParameters.Rd index 77b039ff..cf64fb51 100644 --- a/man/DefineIndividualSamplingParameters.Rd +++ b/man/DefineIndividualSamplingParameters.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/StoxAnalyticalBaselineFunctions.R \name{DefineIndividualSamplingParameters} \alias{DefineIndividualSamplingParameters} -\title{Define Sub-Sampling Parameters for Individuals} +\title{Define Sampling Parameters for Individuals} \usage{ DefineIndividualSamplingParameters( processData, @@ -21,7 +21,7 @@ DefineIndividualSamplingParameters( \item{DefinitionMethod}{Method to infer sampling parameters, 'SRS', 'Stratified' or 'LengthStratified'. See details.} -\item{Parameters}{Measurements / observations of individuals included in the design specification. Must all be column on the Individual-table of StoxBioticData.} +\item{Parameters}{Measurements / observations of individuals included in the design specification. Must all be columns on the Individual-table of StoxBioticData.} \item{LengthInterval}{width of length strata in cm. Specifies left closed intervals used for Length Stratified selection (DefinitionMethod 'Stratified'). A value of 5 indicates that observation are selected stratified on length groups [0 cm,5 cm>, [5 cm, 10 cm>, and so on.} @@ -30,21 +30,30 @@ DefineIndividualSamplingParameters( \item{UseProcessData}{If TRUE, bypasses execution of function and returns existing 'processData'} } \value{ -\code{\link[RstoxFDA]{IndividualSamplingParametersData}} +\code{\link[RstoxFDA]{IndividualSamplingParametersData}} where SampleId refers to the variable 'Haul' on the 'Haul' table in StoxBioticData, and IndividualId refers to the variable 'Individual' on the 'Individual' table of StoxBioticData. } \description{ -Define approximate sampling design parameters for a sub-sample of individuals. Design parameters are inferred from data provided in ~\code{\link[RstoxData]{StoxBioticData}}, -and specify how a set of individuals recorded on the Individual table were selected for observation/measurement from a sample recorded on the Sample table. +Define approximate sampling parameters for the selection of individuals from a haul. Design parameters are inferred from data provided in ~\code{\link[RstoxData]{StoxBioticData}}, +and specify how a set of individuals recorded on the Individual table were selected for observation/measurement from a Haul (the table Haul in StoxBioticData). } \details{ -Sampling parameters are approximate inferred, assuming that all selected individuals are recorded, and based on some user-controllable assumptions about the selection process, -specified by the appropriate 'DefinitionMethod'. Individuals with a non-missing value for any of the parameters in 'Parameters' are treated as selected for observation. +StoxBioticData represents sorting of species as a separate level in the hierarchy (SpeciesCategory) and Samples are selected in Stratified from the species categories. +This represent sampling stratified on taxons in addition to some additional stratification criteria in the cases where more than one sample is present for +a species-category in a Haul. The exact criteria for stratification is not important for the calculation of sampling parameters, but only clearly encoded criteria can be used +in subsequent analysis, so sampling parameters are reported stratified only on SpeciesCategory. Any other stratification has been incorporated into selection or inclusion probabilities. -The available DefinitionMethods are: +Sampling parameters are approximately inferred, assuming that all selected individuals are recorded, and based on some user-controllable assumptions about the selection process, +specified by the appropriate 'DefinitionMethod'. + +Individuals with a non-missing value for any of the parameters in 'Parameters' are treated as selected for observation. +In this way selection of individuals may be specified differently for different parameters. +For instance one may define one design for length-measurements and another for length-stratified age, weight and sex observations. + +The available DefinitionMethods specifies how Individuals are selected from a Sample, and are: \describe{ -\item{SRS}{Simple Random Selection. Individuals are selected for measurment by simple random selection without replacement} -\item{Stratified}{Stratified Selection. Individuals are selected for measurement by stratified random selection without replacement. Strata are specified as the combination of columns provided in 'StratificationColumns'. The number of fish in each stratum is estimated by the total in sample and the proportion of measured fish in each stratum.} -\item{LengthStratified}{Length stratified selection. Individuals are selected for measurement by stratified random selection without replacement. Strata are length groups, specified by the left closed intervals starting with [0,'LengthInterval'>.} +\item{SRS}{Simple Random Selection. Individuals are selected for measurment by simple random selection without replacement from each Sample.} +\item{Stratified}{Stratified Selection. Individuals are selected for measurement by stratified random selection without replacement from each Sample. Strata are specified as the combination of columns provided in 'StratificationColumns'. The number of fish in each stratum is estimated by the total in sample and the proportion of measured fish in each stratum.} +\item{LengthStratified}{Length stratified selection. Individuals are selected for measurement by stratified random selection without replacement from each Sample. Strata are length groups, specified by the left closed intervals starting with [0,'LengthInterval'>.} } } \concept{Analytical estimation} diff --git a/man/DefineMultiStageSamplingParameters.Rd b/man/DefineMultiStageSamplingParameters.Rd index 3f92def1..1570e303 100644 --- a/man/DefineMultiStageSamplingParameters.Rd +++ b/man/DefineMultiStageSamplingParameters.Rd @@ -43,10 +43,9 @@ The conditions listed for the variables in \code{\link[RstoxFDA]{MultiStageSampl execution halts with error if any are violated. The DefinitionMethod 'AdHocStoxBiotic' constructs Sampling Design Parameters from data, -assuming equal probability sampling with fixed sample size, selection with replacement and complete response. +assuming equal probability sampling with fixed sample size, selection without replacement and complete response. This is a reasonable approximation if within-strata sampling is approximately simple random selections, -non-response is believed to be at random, and only a small fraction of the strata is sampled, -so that with and without replacement sampling probabilities are approximately equal. +non-response is believed to be at random. } \concept{Analytical estimation} \concept{StoX-functions} diff --git a/man/IndividualSamplingParametersData.Rd b/man/IndividualSamplingParametersData.Rd index 196831b7..00e7cfd0 100644 --- a/man/IndividualSamplingParametersData.Rd +++ b/man/IndividualSamplingParametersData.Rd @@ -28,17 +28,13 @@ Encodes information about the selection of a sub-sample of observations from ind \item{Stratum}{Mandatory, chr: Identifies the within sample-stratum the individual is taken from.} \item{Order}{Optional, num: Identifes the order of seleciton. May be necessary for inference when selections are not independent (e.g. FSWOR)} \item{IndividualId}{Optional, chr: Identifes individual. NA encodes non-response / observation failure} - \item{InclusionProbability}{Optional, num: The inclusion probability of the individual with respect to observing the parameters in the 'observationVariables' table} - \item{SelectionProbability}{Optional, num: The selection probability of the individual with respect to observing the parameters in the 'observationVariables' table} - \item{RelativeSelectionProbability}{Optional, num: The relative selection probability of the individual with respect to observing the parameters in the 'observationVariables' table} + \item{HTsamplingWeight}{Optional, num: The relative inclusion probability of the individual} + \item{InclusionProbability}{Optional, num: The inclusion probability of the individual} + \item{SelectionProbability}{Optional, num: The selection probability of the individual} + \item{HHsamplingWeight}{Optional, num: The relative selection probability of the individual} \item{SelectionDescription}{Optional, chr: Free text description of sampling unit.} } - The ObservationVariables table specifies which set of variables the design is specified for: - \describe{ - \item{Parameter}{Mandatory, chr: Name of parameter selected for observation.} - } - The StratificationVariables table encodes information about which columns in the sampleTable are stratification variables (if any): \describe{ \item{SampleId}{Mandatory, chr: Identifies the sample the stratification applies to} @@ -55,9 +51,10 @@ The selection methods available for 'SelectionMethod' are explained here: \item{FSWOR}{Fixed sample size with replacement. A random selection of a fixed sample size 'n' is chosen without replacement. Order of selection should be specified in the 'selectionTable'} } -The SelectionProbability is defined as: The probability of selecting the sampling unit in a random draw from the population. -The RelativeSelectionProbability: A value proportional to the SelectionProbability. SelectionProbability=c*RelativeSelectionProbability, with c constant within a stratum, but possibly unknown. +The SelectionProbability is defined as: The probability of selecting the sampling unit when it was selected from the population. +The HHsamplingWeight: The normalized sampling weight, or the fraction of the stratum represented by the sampled unit when estimating with the Hansen-Hurwitz strategy: 1 / (SelectionProbability*Q) , where Q is the sum of the reciprocal of the SelectionProbabilites for the sampled units. For equal probability sampling with replacement, this is simply 1/n, where n i sample size. The InclusionProbability is defined as: The probability of the sampling unit being included in the sample. +The HTsamplingWeight: The normalized sampling weight, or the fraction of the stratum represented by the sample when estimating with the Horvitz-Thompson strategy: 1 / (InclusionProbability*P), where P is the sum of the reciprocal of the InclusionProbabilites for the sampled units. For equal probability sampling without replacement, this is simply 1/n, where n is sample size. } \concept{Analytical estimation} \concept{Data types} diff --git a/man/MultiStageSamplingParametersData.Rd b/man/MultiStageSamplingParametersData.Rd index c25be427..ca73641b 100644 --- a/man/MultiStageSamplingParametersData.Rd +++ b/man/MultiStageSamplingParametersData.Rd @@ -25,8 +25,9 @@ Encodes information about the selection of an intermediate sampling unit in mult \item{Order}{Optional, num: Identifes the order of seleciton. May be necessary for inference when selections are not independent (e.g. FSWOR)} \item{SamplingUnitId}{Optional, chr: Identifes sampling unit. NA encodes non-response} \item{InclusionProbability}{Optional, num: The inclusion probability of the sampling unit} + \item{HTsamplingWeight}{Optional, num: The relative inclusion probability of the sampling unit} \item{SelectionProbability}{Optional, num: The selection probability of the sampling unit} - \item{RelativeSelectionProbability}{Optional, num: The relative selection probability of the sampling unit} + \item{HHsamplingWeight}{Optional, num: The relative selection probability of the sampling unit} \item{SelectionDescription}{Optional, chr: Free text description of sampling unit.} } @@ -45,9 +46,10 @@ The selection methods available for 'SelectionMethod' are explained here: \item{FSWOR}{Fixed sample size with replacement. A random selection of a fixed sample size 'n' is chosen without replacement. Order of selection should be specified in the 'selectionTable'} } -The SelectionProbability is defined as: The probability of selecting the sampling unit in a random draw from the population. -The RelativeSelectionProbability: A value proportional to the SelectionProbability. SelectionProbability=c*RelativeSelectionProbability, with c constant within a stratum, but possibly unknown. +The SelectionProbability is defined as: The probability of selecting the sampling unit when it was selected from the population. +The HHsamplingWeight: The normalized sampling weight, or the fraction of the stratum represented by the sampled unit when estimating with the Hansen-Hurwitz strategy: 1 / (SelectionProbability*Q) , where Q is the sum of the reciprocal of the SelectionProbabilites for the sampled units. For equal probability sampling with replacement, this is simply 1/n, where n i sample size. The InclusionProbability is defined as: The probability of the sampling unit being included in the sample. +The HTsamplingWeight: The normalized sampling weight, or the fraction of the stratum represented by the sample when estimating with the Horvitz-Thompson strategy: 1 / (InclusionProbability*P), where P is the sum of the reciprocal of the InclusionProbabilites for the sampled units. For equal probability sampling without replacement, this is simply 1/n, where n is sample size. } \concept{Analytical estimation} \concept{Data types} From 63b1461f8fd7c8780acabc465209acf21236dad1 Mon Sep 17 00:00:00 2001 From: Edvin Fuglebakk Date: Tue, 7 Nov 2023 22:53:13 +0100 Subject: [PATCH 12/24] Implemented AssignPSUSamplingParameters and added an example data set. --- NAMESPACE | 3 +- R/CatchLotteryExample-datadoc.R | 23 ++++ R/StoxAnalyticalBaselineFunctions.R | 117 ++++++++++++---- R/StoxDataTypes.R | 68 +++++----- R/auxfunctions.R | 10 ++ data/CatchLotteryExample.rda | Bin 0 -> 17485 bytes inst/dataPrepScripts/prepDesignParameters.R | 15 +- inst/dataPrepScripts/prepHH_HT_comp.R | 92 +++++++++++++ .../lotteryParameters/lotteryDesignNSH.txt | 128 +++++++++--------- .../test-StoxAnalyticalBaselineFunctions.R | 19 ++- man/AssignPSUSamplingParameters.Rd | 51 +++++++ man/CatchLotteryExample.Rd | 26 ++++ ...ters.Rd => DefinePSUSamplingParameters.Rd} | 26 ++-- man/IndividualSamplingParametersData.Rd | 4 +- ...rsData.Rd => PSUSamplingParametersData.Rd} | 28 ++-- 15 files changed, 447 insertions(+), 163 deletions(-) create mode 100644 R/CatchLotteryExample-datadoc.R create mode 100644 data/CatchLotteryExample.rda create mode 100644 inst/dataPrepScripts/prepHH_HT_comp.R create mode 100644 man/AssignPSUSamplingParameters.Rd create mode 100644 man/CatchLotteryExample.Rd rename man/{DefineMultiStageSamplingParameters.Rd => DefinePSUSamplingParameters.Rd} (56%) rename man/{MultiStageSamplingParametersData.Rd => PSUSamplingParametersData.Rd} (77%) diff --git a/NAMESPACE b/NAMESPACE index 16e045ee..62f92251 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,6 +7,7 @@ export(AddPeriodStoxBiotic) export(AddPeriodStoxLanding) export(AddStratumStoxBiotic) export(AddStratumStoxLanding) +export(AssignPSUSamplingParameters) export(ConvertLengthBiotic) export(ConvertWeightBiotic) export(DefineAgeErrorMatrix) @@ -14,7 +15,7 @@ export(DefineAreaPosition) export(DefineCarNeighbours) export(DefineIndividualSamplingParameters) export(DefineLengthConversionParameters) -export(DefineMultiStageSamplingParameters) +export(DefinePSUSamplingParameters) export(DefinePeriod) export(DefineStockSplittingParameters) export(DefineWeightConversionFactor) diff --git a/R/CatchLotteryExample-datadoc.R b/R/CatchLotteryExample-datadoc.R new file mode 100644 index 00000000..c3e3f04c --- /dev/null +++ b/R/CatchLotteryExample-datadoc.R @@ -0,0 +1,23 @@ +#' Data from the Norwegian catch lottery sampling program. +#' +#' Example of data formatted as \code{\link[RstoxData]{StoxBioticData}} +#' Hauls are primary sampling units, selected by Poission sampling with selection probabilities proportional to the catch size. +#' The data contain North Sea herring samples from catch lottery sampling in 2022. +#' +#' @docType data +#' +#' @usage data(CatchLotteryExample) +#' +#' @format \code{\link[RstoxData]{StoxBioticData}} +#' +#' @keywords datasets +#' @concept Analytical estimation +#' +#' @examples +#' RstoxFDA::plotArea(RstoxFDA::CatchLotteryExample$Station, +#' areaDef=RstoxFDA::mainareaFdir2018, +#' latCol = "Latitude", +#' lonCol = "Longitude", +#' areaLabels = TRUE) +"CatchLotteryExample" + diff --git a/R/StoxAnalyticalBaselineFunctions.R b/R/StoxAnalyticalBaselineFunctions.R index fa9b9799..9c592f6f 100644 --- a/R/StoxAnalyticalBaselineFunctions.R +++ b/R/StoxAnalyticalBaselineFunctions.R @@ -127,33 +127,33 @@ parseDesignParameters <- function(filename){ return(designParameters) } -#' Define Multi-Stage Sampling Design Parameters +#' Define PSU Sampling Design Parameters #' @description -#' Define sampling design parameters for intermediate sampling units in multi-stage sampling. +#' Define sampling parameters for Primary Sampling Units in multi-stage sampling. #' @details -#' The DefintionMethod 'ResourceFile' reads design parameters from a tab delimited file with headers corresponding to those listed in -#' \code{\link[RstoxFDA]{MultiStageSamplingParametersData}}. The data is provided as one table, so that the information in 'sampleTable' is repeated for each entry in 'selectionTable'. -#' Any columns not named in \code{\link[RstoxFDA]{MultiStageSamplingParametersData}} are assumed to be stratification variables. -#' The conditions listed for the variables in \code{\link[RstoxFDA]{MultiStageSamplingParametersData}} are checked upon reading the data, and +#' The DefintionMethod 'ResourceFile' reads sampling parameters from a tab delimited file with headers corresponding to those listed in +#' \code{\link[RstoxFDA]{PSUSamplingParametersData}}. The data is provided as one table, so that the information in 'sampleTable' is repeated for each entry in 'selectionTable'. +#' Any columns not named in \code{\link[RstoxFDA]{PSUSamplingParametersData}} are assumed to be stratification variables. +#' The conditions listed for the variables in \code{\link[RstoxFDA]{PSUSamplingParametersData}} are checked upon reading the data, and #' execution halts with error if any are violated. #' #' The DefinitionMethod 'AdHocStoxBiotic' constructs Sampling Design Parameters from data, #' assuming equal probability sampling with fixed sample size, selection without replacement and complete response. #' This is a reasonable approximation if within-strata sampling is approximately simple random selections, -#' non-response is believed to be at random. -#' @param processData \code{\link[RstoxFDA]{MultiStageSamplingParametersData}} as returned from this function. +#' and non-response is believed to be at random. +#' @param processData \code{\link[RstoxFDA]{PSUSamplingParametersData}} as returned from this function. #' @param DefinitionMethod 'ResourceFile' or 'AdHocStoxBiotic' #' @param FileName path to resource file #' @param StoxBioticData \code{\link[RstoxData]{StoxBioticData}} Sample data to construct design parameters from -#' @param SamplingUnitId name of column in 'StoxBioticData' that identifies the sampling unit the design is constructed for. +#' @param SamplingUnitId name of column in 'StoxBioticData' that identifies the Primary Sampling Unit the design is constructed for. #' @param StratificationColumns name of any column (at the same table as 'SamplingUnitId') that are to be used to define Strata for sampling. #' @param UseProcessData If TRUE, bypasses execution of function and returns existing 'processData' -#' @return \code{\link[RstoxFDA]{MultiStageSamplingParametersData}} +#' @return \code{\link[RstoxFDA]{PSUSamplingParametersData}} #' @export #' @concept StoX-functions #' @concept Analytical estimation #' @md -DefineMultiStageSamplingParameters <- function(processData, DefinitionMethod=c("ResourceFile", "AdHocStoxBiotic"), FileName=character(), StoxBioticData, SamplingUnitId=character(), StratificationColumns=character(), UseProcessData=F){ +DefinePSUSamplingParameters <- function(processData, DefinitionMethod=c("ResourceFile", "AdHocStoxBiotic"), FileName=character(), StoxBioticData, SamplingUnitId=character(), StratificationColumns=character(), UseProcessData=F){ if (UseProcessData){ return(processData) @@ -229,10 +229,7 @@ extractIndividualDesignParametersStoxBiotic <- function(StoxBioticData, Stratifi if (any(is.na(individuals$CatchFractionNumber))){ missing <- unique(individuals$Sample[is.na(individuals$CatchFractionNumber)]) - if (length(missing)>5){ - missing <- c(missing[1:5], "...") - } - stop(paste("Cannot infer sampling parameters for individuals from Samples with missing total number. CatchFractionNumber missing for Sample:", paste(missing, collapse=","))) + stop(paste("Cannot infer sampling parameters for individuals from Samples with missing total number. CatchFractionNumber missing for Sample:", paste(truncateStringVector(missing), collapse=","))) } #check first, so no restrictions need to be put on names of Parameters. @@ -366,10 +363,7 @@ DefineIndividualSamplingParameters <- function(processData, StoxBioticData, Defi } if (any(is.na(StoxBioticData$Individual$IndividualTotalLength))){ missing <- StoxBioticData$Individual$Individual[is.na(StoxBioticData$Individual$IndividualTotalLength)] - if (length(missing)>5){ - missing <- c(missing[1:5], "...") - } - stop(paste("Cannot specify length stratified selection when some individuals are not measured. Missing IndividualTotalLength for:", paste(missing, collapse=","))) + stop(paste("Cannot specify length stratified selection when some individuals are not measured. Missing IndividualTotalLength for:", paste(truncateStringVector(missing), collapse=","))) } lengthGroups <- seq(0,max(StoxBioticData$Individual$IndividualTotalLength)+LengthInterval,LengthInterval) @@ -404,15 +398,86 @@ DefineIndividualSamplingParameters <- function(processData, StoxBioticData, Defi } +#' Extend IndividualSamplingParametersData to reflect selection from PSU by specifying intermediate selection. +#' @return \code{\link[RstoxFDA]{IndividualSamplingParametersData}} #' @noRd -CollapseSamplingHierarchy <- function(IndividualSamplingParametersData, MultiStageSamplingParametersData, CollapseStrata=T){ +DefineSamplingHierarchy <- function(IndividualSamplingParametersData, Hierarchy=character(), Stratification=character(), StrataSizes=character(), SelectionMetod=character(), CollapseStrata=character()){ } -#' Put some options for handling non-response here. -#' If all responded, this function does nothing but returning the input -#' @noRd -AssignPSUDesignParameters <- function(MultiStageSamplingParametersData){ +#' Assign PSU Sampling Parameters +#' @description +#' Assigns data records to PSU Sampling Parameters and provides non-response adjustments for +#' selected PSUs that was not sampled. +#' @details +#' Some sampling parameters provided in ~\code{\link[RstoxFDA]{PSUSamplingParametersData}} are only +#' interpretable for sampling with complete response. This function adjusts these parameters, removes non-respondents from the +#' ~\code{\link[RstoxFDA]{PSUSamplingParametersData}}, and checks that all responding PSUs are present in data records. +#' +#' If any respondants (rows of the SelectionTable of PSUSamplingParametersData that does not have NA for SamplingUnitId) are not +#' found in 'SamplingUnitId', execution halts with an error. +#' +#' Response after selection can generally be considered a process that modifies the sampling parameters that are set by design. +#' Typically sample size, InclusionProbabilities and normalized SamplingWeights need to be adjusted as non-respondents are removed, +#' since these are depend of the entire sample, not just the sampling unit they are assigned to. +#' SelectionProbabilites are by definition set for a single draw of a single sampling unit from the population and are valid even +#' when response is not complete. +#' +#' Treatment of non-response requires some assumption about systematic differences between respondents and non-respondents. +#' These assumptions are specified via the argument 'DefinitionMethod' and the following options are available: +#' \describe{ +#' \item{MissingAtRandom}{A response propensity is estimated for each stratum as the fraction of the sample resonding, and sample size (n) and InclusionProbability are adjusted by multiplying with this propensity. Sampling weights are adjusted by dividing them with their sum over repsondents in a stratum.} +#' } +#' +#' @param PSUSamplingParametersData ~\code{\link[RstoxFDA]{PSUSamplingParametersData}} with sampling parameters for PSU selection +#' @param StoxBioticData ~\code{\link[RstoxData]{StoxBioticData}} with data records for responding PSUs. +#' @param SamplingUnitId name of Variable in ~\code{\link[RstoxData]{StoxBioticData}} that represent records of sampled PSUs +#' @param DefinitionMethod The method for dealing with non-response, e.g. 'MissingAtRandon' +#' @return ~\code{\link[RstoxFDA]{PSUSamplingParametersData}} +#' @concept StoX-functions +#' @concept Analytical estimation +#' @md +#' @export +AssignPSUSamplingParameters <- function(PSUSamplingParametersData, StoxBioticData, SamplingUnitId, DefinitionMethod=c("MissingAtRandom")){ + checkMandatory(PSUSamplingParametersData, "PSUSamplingParametersData") + checkMandatory(StoxBioticData, "StoxBioticData") + checkMandatory(SamplingUnitId, "SamplingUnitId") + checkOptions(DefinitionMethod, "DefinitionMethod", c("MissingAtRandom")) + + level <- NULL + for (l in names(StoxBioticData)){ + if (SamplingUnitId %in% names(StoxBioticData[[l]])){ + level <- l + } + } + if (is.null(level)){ + stop(paste("The variable provided for SamplingUnitId (", SamplingUnitId,") is not a variable in 'StoxBioticData'"), sep="") + } + + records <- PSUSamplingParametersData$SelectionTable$SamplingUnitId[!is.na(PSUSamplingParametersData$SelectionTable$SamplingUnitId)] + if (!all(records %in% StoxBioticData[[l]][[SamplingUnitId]])){ + missing <- records[!(records %in% StoxBioticData[[l]][[SamplingUnitId]])] + stop(paste("Records are not found for all sampled PSUs. Missing for the following SamplingUnitIds (", SamplingUnitId,"): ", paste(truncateStringVector(missing), collapse=","), sep="")) + } + + if (DefinitionMethod == "MissingAtRandom"){ + responsePropensity <- PSUSamplingParametersData$SelectionTable[,list(ResponsePropensity=sum(!is.na(SamplingUnitId))/.N), by=c("Stratum")] + + PSUSamplingParametersData$SampleTable$n <- PSUSamplingParametersData$SampleTable$n * responsePropensity$ResponsePropensity[match(PSUSamplingParametersData$SampleTable$Stratum, responsePropensity$Stratum)] + + # correct sampling probabilities + PSUSamplingParametersData$SelectionTable$InclusionProbability <- PSUSamplingParametersData$SelectionTable$InclusionProbability * responsePropensity$ResponsePropensity[match(PSUSamplingParametersData$SelectionTable$Stratum, responsePropensity$Stratum)] + + #remove non respondants + PSUSamplingParametersData$SelectionTable <- PSUSamplingParametersData$SelectionTable[!is.na(PSUSamplingParametersData$SelectionTable$SamplingUnitId)] + + #correct normalized sampling weights + weights <- PSUSamplingParametersData$SelectionTable[,list(HHsum=sum(HHsamplingWeight), HTsum=sum(HTsamplingWeight)), by=c("Stratum")] + PSUSamplingParametersData$SelectionTable$HTsamplingWeight <- PSUSamplingParametersData$SelectionTable$HTsamplingWeight / weights$HTsum[match(PSUSamplingParametersData$SelectionTable$Stratum, weights$Stratum)] + PSUSamplingParametersData$SelectionTable$HHsamplingWeight <- PSUSamplingParametersData$SelectionTable$HHsamplingWeight / weights$HHsum[match(PSUSamplingParametersData$SelectionTable$Stratum, weights$Stratum)] + + return(PSUSamplingParametersData) + } } @@ -420,4 +485,6 @@ AssignPSUDesignParameters <- function(MultiStageSamplingParametersData){ DefinePSUCoInclusionProbabilities <- function(){} #' @noRd -ProbabilisticSuperIndividuals <- function(){} +ProbabilisticSuperIndividuals <- function(StoxBioticData, PSUSamplingParametersData, IndividualSamplingParametersData){ + +} diff --git a/R/StoxDataTypes.R b/R/StoxDataTypes.R index 8d50e80c..53cdba1d 100644 --- a/R/StoxDataTypes.R +++ b/R/StoxDataTypes.R @@ -16,33 +16,33 @@ is.Date <- function(date){ return(FALSE) } -#' Multi-stage Sampling Design Parameters +#' PSU Sampling Design Parameters #' -#' Sampling parameters for selection of a sampling unit in a multi-stage sampling design +#' Sampling parameters for selection of a Primary Sampling Unit #' #' @details -#' Encodes information about the selection of an intermediate sampling unit in multi-stage sampling, used in analytical design based estimation. +#' Encodes information about the selection of Primary Sampling Units in multi-stage sampling, used in analytical design based estimation. #' Information is encoded in three tables. #' #' The SampleTable encodes information about the sample of sampling units: #' \describe{ #' \item{Stratum}{Mandatory, chr: Identifies the stratum the sample is taken from. Treat unstratified sample as single-stratum sampling (provide only one stratum.} -#' \item{N}{Optional, num: The total number of selection units in Stratum} -#' \item{n}{Optional, num: The number of selection units selected from the Stratum} +#' \item{N}{Optional, num: The total number of PSUs in Stratum (total available for selection, not total selected)} +#' \item{n}{Optional, num: The number of PSUs selected from the Stratum} #' \item{SelectionMethod}{Mandatory, chr: 'Poission', 'FSWR' or 'FSWOR'. The manner of selection for use in bootstrap or inference of inclusionProbabilities, selectionProbabilites, co-inclusion probabilities or co-selection probabilities.} #' \item{FrameDescription}{Optional, chr: Free text field describing the sampling frame.} #' } #' #' The SelectionTable encodes information abut the selection of sampling units for sampling: #' \describe{ -#' \item{Stratum}{Mandatory, chr: Identifies the stratum the sampling unit is taken from.} +#' \item{Stratum}{Mandatory, chr: Identifies the stratum the PSU is taken from.} #' \item{Order}{Optional, num: Identifes the order of seleciton. May be necessary for inference when selections are not independent (e.g. FSWOR)} -#' \item{SamplingUnitId}{Optional, chr: Identifes sampling unit. NA encodes non-response} -#' \item{InclusionProbability}{Optional, num: The inclusion probability of the sampling unit} -#' \item{HTsamplingWeight}{Optional, num: The relative inclusion probability of the sampling unit} -#' \item{SelectionProbability}{Optional, num: The selection probability of the sampling unit} -#' \item{HHsamplingWeight}{Optional, num: The relative selection probability of the sampling unit} -#' \item{SelectionDescription}{Optional, chr: Free text description of sampling unit.} +#' \item{SamplingUnitId}{Optional, chr: Identifes PSU. NA encodes non-response} +#' \item{InclusionProbability}{Optional, num: The inclusion probability of the PSU} +#' \item{HTsamplingWeight}{Optional, num: The normalized Horvitz-Thompson sampling weight of the PSU} +#' \item{SelectionProbability}{Optional, num: The selection probability of the PSU} +#' \item{HHsamplingWeight}{Optional, num: The normalized Hansen-Hurwitz sampling weight of the PSU} +#' \item{SelectionDescription}{Optional, chr: Free text description of the PSU.} #' } #' #' The StratificationVariables table encodes information about which columns in the sampleTable are stratification variables (if any): @@ -65,61 +65,61 @@ is.Date <- function(date){ #' The InclusionProbability is defined as: The probability of the sampling unit being included in the sample. #' The HTsamplingWeight: The normalized sampling weight, or the fraction of the stratum represented by the sample when estimating with the Horvitz-Thompson strategy: 1 / (InclusionProbability*P), where P is the sum of the reciprocal of the InclusionProbabilites for the sampled units. For equal probability sampling without replacement, this is simply 1/n, where n is sample size. #' -#' @name MultiStageSamplingParametersData +#' @name PSUSamplingParametersData #' @concept Data types #' @concept Analytical estimation #' NULL -#' Check if table is correctly formatted Multi Stage Sampling Parameters Data -#' @param table \code{\link[RstoxFDA]{MultiStageSamplingParametersData}} +#' Check if table is correctly formatted PSU Sampling Parameters Data +#' @param table \code{\link[RstoxFDA]{PSUSamplingParametersData}} #' @return validity #' @concept Data types #' @noRd -is.MultiStageSamplingParametersData <- function(MultiStageSamplingParametersData){ - if (!is.list(MultiStageSamplingParametersData)){ +is.PSUSamplingParametersData <- function(PSUSamplingParametersData){ + if (!is.list(PSUSamplingParametersData)){ return(FALSE) } - if (!all(sapply(MultiStageSamplingParametersData, data.table::is.data.table))){ + if (!all(sapply(PSUSamplingParametersData, data.table::is.data.table))){ return(FALSE) } - if (!all(c("SampleTable", "SelectionTable", "StratificationVariables") %in% names(MultiStageSamplingParametersData))){ + if (!all(c("SampleTable", "SelectionTable", "StratificationVariables") %in% names(PSUSamplingParametersData))){ return(FALSE) } - if (!all(c("Stratum", "N", "n", "SelectionMethod", "FrameDescription") %in% names(MultiStageSamplingParametersData$SampleTable))){ + if (!all(c("Stratum", "N", "n", "SelectionMethod", "FrameDescription") %in% names(PSUSamplingParametersData$SampleTable))){ return(FALSE) } - if (!all(c("Stratum", "Order", "SamplingUnitId", "InclusionProbability", "SelectionProbability", "HHsamplingWeight", "SelectionDescription") %in% names(MultiStageSamplingParametersData$SelectionTable))){ + if (!all(c("Stratum", "Order", "SamplingUnitId", "InclusionProbability", "SelectionProbability", "HHsamplingWeight", "SelectionDescription") %in% names(PSUSamplingParametersData$SelectionTable))){ return(FALSE) } - if (!all(c("Stratum") %in% names(MultiStageSamplingParametersData$StratificationVariables))){ + if (!all(c("Stratum") %in% names(PSUSamplingParametersData$StratificationVariables))){ return(FALSE) } - if (any(duplicated(MultiStageSamplingParametersData$SampleTable$Stratum))){ + if (any(duplicated(PSUSamplingParametersData$SampleTable$Stratum))){ return(FALSE) } #test that mandatory fields are not NA. - if (any(is.na(MultiStageSamplingParametersData$SampleTable$Stratum))){ + if (any(is.na(PSUSamplingParametersData$SampleTable$Stratum))){ return(FALSE) } - if (any(is.na(MultiStageSamplingParametersData$SampleTable$SelectionMethod))){ + if (any(is.na(PSUSamplingParametersData$SampleTable$SelectionMethod))){ return(FALSE) } - if (any(is.na(MultiStageSamplingParametersData$SelectionTable$Stratum))){ + if (any(is.na(PSUSamplingParametersData$SelectionTable$Stratum))){ return(FALSE) } - if (any(is.na(MultiStageSamplingParametersData$StratificationVariables$Stratum))){ + if (any(is.na(PSUSamplingParametersData$StratificationVariables$Stratum))){ return(FALSE) } - for (n in names(MultiStageSamplingParametersData$StratificationVariables)){ - if (any(is.na(MultiStageSamplingParametersData$StratificationVariables[[n]]))){ + for (n in names(PSUSamplingParametersData$StratificationVariables)){ + if (any(is.na(PSUSamplingParametersData$StratificationVariables[[n]]))){ return(FALSE) } } - if (ncol(MultiStageSamplingParametersData$StratificationVariables) > 1){ - stratificationVariableStrings <- apply(MultiStageSamplingParametersData$StratificationVariables[,.SD, .SDcol=names(MultiStageSamplingParametersData$StratificationVariables[names(MultiStageSamplingParametersData$StratificationVariables)!="Stratum"])], 1, paste, collapse="/") - duplicatedStrata <- MultiStageSamplingParametersData$StratificationVariables$Stratum[duplicated(stratificationVariableStrings)] + if (ncol(PSUSamplingParametersData$StratificationVariables) > 1){ + stratificationVariableStrings <- apply(PSUSamplingParametersData$StratificationVariables[,.SD, .SDcol=names(PSUSamplingParametersData$StratificationVariables[names(PSUSamplingParametersData$StratificationVariables)!="Stratum"])], 1, paste, collapse="/") + duplicatedStrata <- PSUSamplingParametersData$StratificationVariables$Stratum[duplicated(stratificationVariableStrings)] if (length(duplicatedStrata)>0){ return(FALSE) @@ -154,10 +154,10 @@ is.MultiStageSamplingParametersData <- function(MultiStageSamplingParametersData #' \item{Stratum}{Mandatory, chr: Identifies the within sample-stratum the individual is taken from.} #' \item{Order}{Optional, num: Identifes the order of seleciton. May be necessary for inference when selections are not independent (e.g. FSWOR)} #' \item{IndividualId}{Optional, chr: Identifes individual. NA encodes non-response / observation failure} -#' \item{HTsamplingWeight}{Optional, num: The relative inclusion probability of the individual} #' \item{InclusionProbability}{Optional, num: The inclusion probability of the individual} +#' \item{HTsamplingWeight}{Optional, num: The normalized Horvitz-Thompson sampling weight of the individual} #' \item{SelectionProbability}{Optional, num: The selection probability of the individual} -#' \item{HHsamplingWeight}{Optional, num: The relative selection probability of the individual} +#' \item{HHsamplingWeight}{Optional, num: The normalized Hansen-Hurwitz sampling weight of the individual} #' \item{SelectionDescription}{Optional, chr: Free text description of sampling unit.} #' } #' diff --git a/R/auxfunctions.R b/R/auxfunctions.R index 23b01409..66a27c58 100644 --- a/R/auxfunctions.R +++ b/R/auxfunctions.R @@ -130,4 +130,14 @@ deprecationWarning <- function(functionName, deprecationTime, message=NULL){ warningstring <- paste(warningstring, message) } stoxWarning(warningstring) +} + +#' Construct truncated vector of strings with aal elements more than maxsize replaced by a single character "..." +#' @noRd +truncateStringVector <- function(missing, maxsize=5){ + + if (length(missing)>maxsize){ + missing <- c(missing[1:maxsize], "...") + } + return(missing) } \ No newline at end of file diff --git a/data/CatchLotteryExample.rda b/data/CatchLotteryExample.rda new file mode 100644 index 0000000000000000000000000000000000000000..7c370f308575cebf4ae950da75698e24072b7fbd GIT binary patch literal 17485 zcmbt+2|UzMyYN^lTV*dqVob<7_E2KTWRNXnO=GNM-=)Zsy^M8i6;UBV4P%gO8OENS zk##H~gi6czAJu#B`+o2J?)}~G_?!P(p7WgNJo`Bo&Fju8vX?9&CYHX1ua^$_v)uXh zuVrH$=%M`jwLkrH<0nx4;5n^!97HYWH5lkL!$G6rN5bBN0&h+prOy*+&jo?z7}>?m zF86920D(Y04jPUo7~S0;4}1NtMaqe)YzF^y8VobVP8vT!> zTAgpu6erOM)5ywq@EjKGlkbnU=g{b5iP6$O$?6~zsl&nQQjCQnUE+$vZ8-3A``p9F7Ozbrlk+XihX-pm1W}cWIP}I`Loz-bx6(--`QoO1$Ft) zM#~spbwgMy`B}H235TJH)_doW{Kt=1TnY`1in9;rKYiBGC(n0v&`>udQV82>%$^==ku$8u{8TYv4$!ASD z&d-HL`;1T0#i-PQ`KOUp3j=A%KIqB7j7mJMuSSq(akjko6L-P z2y^*b2dOj708bdSj|i2O<4}^8a>#HCmJNS*YGD2RG50b4`=Ns&PCk=GVCEYcL@7B$ z{dzJQ(z-%Y&XlPi8_N2ul-ke)32^b480A2Kb3bBBuffm6xOuF5$VOptmJe_^oRUW- zPu=2)8jG2X0FOy0GVH%LxnjTx0w6R_GzG$TXjvt7$Mv{eAy#!eWk}E zLFtqVGwLu|H7=$If=G3#Mp1mK(HrLF_)~Y`IFVtM^AiFwJ(ELKO)W=5r&uckB6(|q#+SFu` zllOW>NXMQ~^V`mLFRRIno2|JJcg_^wKt-#Zdj?iwmE>LVnc*yTpPP|)Q{sFUa3(bk zgOb^{0Z^h89tqX+ZScZAPTt+Q*}cB5a%>`yFfrz}Iz=RZp2{JV`vnm~zXjJ?;j3Th z%_6fQ`nY!M3?nI0RY1V!z*m@scgNuJ#rmGtGYP5_9D=s*r26}ECPqr=k`1cS*5gQ< z<^@>~1o#znzL|`WY?z5byf_PPYFI_&HIUO@=kVcc1FMTYCOfSJ)_H6Z1ZAar&!-l< z<|<;UNSl3YwU$IYJSMT`D<8RR=u1*Lgc8#=uV`-=Sv|#_$2CkG0M1 zpM4ux#zZC==2t@qN;xe`N)2F<-OG4zR8;G+hSjvCH3D*deUE+ZW59L*vXh75iY*MvgGXs~Q4-aH z8ll&|Bo7j2a`30p{DnoJ4pxCxBltS>nVhO&19I2lBbv+PRBOX)$4`LQ)2r*;v%aJj z-Se~bMc~gU8bo9t&O=472e7(6^FCv(opP;*Tk`7CmOg7od-3`r|6}8GX9E=`LzVmk zE4J1`?AQb3nml#`uG!>_=}eLRv+t)b<3sk|X_+_PllI8idC+Y!!!xB-)Zcniskn2u z>Xd#}kguvPtnRg=ax9!}@?~YMeT=RHyM&kfqR@w-hYSxqf+fo~a)eV-E$xkwFPw`_ ztf9Fr*$L;PZbfS48}28aGtXd>DT%xF-0#(}0VW~nrdr;yZ-OmtJ??v%hd;G@KM_oz zFlVrj^=OF`Zpc3_T6TdLrHu`Yk+KNg@Fqw1yzZ=pOnBmY3Kxo7Ez$9TOVDf63Eu18 zR~%%%!a9`WyztAnB%gJ0OVCAK4(?L$Yl-Qox}LZ=toCGOEp%}>v~;Mf+N}$G4!h_b zcI;T=Y0u?5qhU9GY*gixo)tUY^w{NLR8bBti#}Mr?S~rbnbEoK8E}fv+*pMyQLctV>X1y0U-#?n`%X&U zXBkx{Wyu6H*kf?pGTU^|_hXdH6jQ2z0(a@D!gL zioRY=V<_tf-NYzn5Bh5R#d77bu#f;HiJS(qO)6yUtdkn|exBymYRmA*r^h_L9a>Y@+-_73p`4~`*sd}(p7W`%pP64cHJ6)f z-7%LCnQQ$aAHPsh1{aySBfOeb8r&PESmjfqDUXLPM;5_)9M_#>yD48$)g$@5f~IB*2k)$8=E zNd%z-bKJF@RCo=XUMo5uUf6^5KhXbfE8iv+?_J*!(k$))X@hyEt~b7%_GZ@o9~T0#0AoDqB-;3&M(6hc(z)lQ|{PR^|&>a|uue5X(L>j&IK6euemEq`^wYVU>n z#uD^I^3K+rdk*DI`Ma=o)wY+Xdp{`&n^@O!<=aibXO(C_c1h~Al6|Ks%D!*s6o+^7 z>DDDH+8j6R&aJ%|tDNTY`00g`37wo&_F0|&5s>?}khm3g8vFG{n0@rGFD3Sbk&rGZ zspdg#_L3fv^~x*ul7Ujwt&iWfJ@j<3J3rmp+}dhK3tbfV1eBj9H_s_1;Y>V5Y6l2w zT~jZD%1d_5yp2psWUhVqOkoU#fH?w%MMP|d1^JB4n(bdEz8Rr{xkF?}9c=~CJ^=@bDuN*=Y z80Su^@bLO9%XePrdfKK?!6yQ9<2vMEP@CxO-IkCrqXd<{Z9RoE6YO%In zm@Pw&BqV!LY6`hK1WMaCQ*nsp*)J`8SwZhNt;yRJ;#n$j^#opc^Yxy-uz;KDK0-mM zf+*BA*x{OMW-ZE4XU(gN;Z1TZWwBn*qbx8g;m_C+V=4M~3!iAV)YFKcA5CZVzFCW> zJw$6M5i3Hg&@3!es-Alu3X>e$R)R)?(cDr!X{*BehaClJL9r1Rj2+!j&z>PlLS%_I!63Z<)d)(*OhL(;u69zzNsR%nH94jdn2zKg1Azp(8>%Z1T9}SCzR(6y;AeVU4PfzZ>5BIyn+BZI!k6<7`f!??tEf zeH)6=rrN`|V?Gxq=0c_P2tPeMu`@qzr*7=3PdoPAu^ZW6e;V;K-)Vnz;j3N$V(f9c z3{6mMZ8dQ8(>P&EJ{!0SrT9%wZ=To;ZZnz*OBoBQqKqzp&o+a{;D{ne|d2yoQbMGp)^t)GVOhjm3wz7vFzbVi@Tx6W` znU=GD8oA~79Y-#WiBS%-Y9F!X3tlXT3nh&Wn{huOn^~GF+$Dk-I#`8$K-_RAc}tO8 zHmIITNg}4lQ3BK5i51Chs?z&hp^wGhl$CePzNMrHMYLb?&Ez##>9}i&1!G|>Mlg~I zM7NNh3jzZdL!-dhR8AKj4H@6uCrBdr3YWJJFH0;Kg}kK8f`~ijd-vo`i8E=5>Ffql zf<#Vm)7!^jB)dbnW*U|uhMiMK#{tm~($u}zeyT6~0rDz1JcT}-!#!3Dmgr2pmqf(( zT#dsrf>U|vT~0!dNO{vSMipS3l~hPA5Fs!vmQGPN{T+RKM7I2UVr)0K!x5s;W32@u zH}ach4+k8A>JNz^b4oBoCF5)Gx^$q)h0J-*44yKD>UWfaC(|tCQ)M2 zM2_*M_f1)DtJqe{dKT>*qAa(kS!}frLOZ7og_Q7!#zx!>;Xahp)Knd0akRtA{x$I$ zPCK`+O0wH5#lsb8g(=p?Yj&V!p(aWgWU;3)sX2=bkCyXi5zBduBt2`n)tm(}_0$UC zdtAr&xc*cLp89o9X5my}noqJ3QROVu7kErkd@{v+A@h?tsXm@MpFMS!rubsE$W~p! z9#$C(bskE5NyDrD>ngL9&V2khy2>ES(g`6^U5uDA&hU6tHf1E|f3@1*?3(2OvjjOo zVoF$IGF5`Cqa=>S$5;DA=VbHXxlr>Z-K50HbU7c>?#=`yS;w5%E~SVJPjHt~Ntcq) ze~guMcNSaLTB7D1bJC&RZ!ZY z{*mzD`?8DbZX_=s{3jQstnCW6!lKeCmD9STqM4D6@8q21++K0{oTQN$>KvT-@a4ks zNZ`Zf)2&$Q4FuyF7^6Y09M2041<_oUg9-@=2{JSZnwgoILQp6a8mORg6X-2C1da&? zVQBhK>7P=gag^CAsE1&3I-O@J%G#v#);sM)YV=bfO%rzP(3tm9B2Rwxl@Np&nF@b({fRXw^QEB>9r}=nLVIh(J z_JBYtJzjrCffulY|EqB%Kj=>%ot&J^-{IdlxPPxhH~ObrpyBU02y|?(VdzCYFf_e(t-q!bwF_h){OZ}{&T!2N&BKNzR$Rr?1bKz#qG|JRkjXm|gD zu*1ROf9o{Vm8c&ySZvnc&VPDugp&V2AM*w7AO94O_4R{2)55_I{`T<^2z9^fbR&=c zcK&&L)({rms?(1P(=vS-17AoGy1jAc#1hg_{H{o%_+!_0OKur$Jq`#IT6t5PWFl*H z#ZJHIP*CmK?WeF`0!MGR?EF|ukd?mk#|S=m6+J0a5ADlt3JEY(%AihJ@}Y#Ud6EpF0bc+T2+ zxG};mW)kiF*i`j2u~y)@OzujKSQSr1mat5rEDB;@O@k68nO3ut?xMux*sGbk#a`mv zuZ#=3>bzD?PqXuQd&mI6B$8~y>ym}ab}4@n^OZ@COB|-0!MSe@*_ntPD0)2Aw9lXfJm}N<@y)G7ZHHC8h0`ao5J+&tS6| zOanq}jdmtpEj@4N3OP#gUwG{sYl=Vl>T4L|g<*8rG-}_?d6Vmk@FlATQG48BGuj2Gh9z?Xq*4nis3`kZBcy6rs^ttLpSuxRaGconz~b)8M)|QEM{^eb zlufx?m*kHneOSmom7k7yPZ~ICKDQ@oN@U`cEaD;M_IqRmzxp0isx?>ue`7t24GA<+ zLZ)AB?<>s|@((5BueZ$%W2zN60yboQJ^Ln6k?uGFDF0i?=ST0O?U*O z0l}EV&cAIH+P2f@?9Ke)#Tt0JwjvRIFKJ0d`uWVfioD}3kNvs=u{%y~k*964&tD`~ zCYFF+@O;s!V|{+}B7S?U^r7d8Prc3CzwDyb-__G@lVs{-OW(baYy-D_M`QTTXcfpi4$0Wu2?tCf6mvfOh zL}xZ=n{msMuWx9maHS#s^j0a41~v zj05CKj3c7(jG}4NYq3c5cCe8$48 zT*nVk0BF!SxX}No@t0O2*h+&+IrSU&PmL}3d0aTNJ4X98Kqk5#WANe;?F2Xlje&!$ zkQmATNyEqR06Sc$dunRp1h8v>0~6QAzzIO0afcHKMF8U0XpC-kU|>KSP^a<&lo}XF zjpDArTB!B)wK%0fKPqi6s^Hu}C>j0#8{Ykw={b1pT>$wwuysoUfq+^^+5e}-#t4%; zSyZl;x?^+}QI1uALKqEXza~%*iGg#EfwAPhQYV!jAi7amleZCqGuUf5IyF@#Sp}DC z*tpi?n=3gr^*+HJgV+cis?Mb_2L}2JO@LEX$vf^lJzh1C^(O#7okGbkaw*YxrGJc3 zYSbi&gy+APsB+8ap8tzwOerkygCl?x*i89emB;$ZZ}r>=Zo?}D1`w43LpDMo`zkXY z2vQf}cMJc&XndP%knF-@15i$<7S~1qYW~(oH~4EHfi+;#T<%mIK(t%~9zulyNID4d zH013BAE!_sM2|}ML}So)banj#2Ibx3G4GpOrM5-H=lbU0SMMuV+Xe@!l2tM*)NCr$ z2zeW)y2vVR&le^)f);J-H(kH}8NA+T)3cZCxZ2BIk?GqohYi_oEBy_$hfjJ*)ft+e z?YNpQJb9Dkzx;bza)YFu6eaYX>`2i8?A%y1;{B7BYRPkhY9bB!sOuu^B3BVzumV_#(7EdaRKy^;UVfbZwp3t4DC9Gqyz?<_4Vs^v#)Bwzi!?KTzgt+cSzI^fmdaYo%4u7H(eb@!MAc*~ou z-+Nd^Q-~s{9PB>L%sJ7*sI}hkepd)U=S58i9eSIB;3T)W${SAn&3fTcuhXHKJ)LtN;2B!j; zz`qfZNEDSG-~3>ToFr~qZD+OX&!ie)f;$G#^^X924xv6bH@AKgDC1nBZklaJUG-g@ zo#b*CZya#(KY!3s9VFm04?NCqTF0rL@b43D^|uCiTkb#Ij#YY^L|~6#x}IPPNTi;U zZ?<03RBqy%lZ*F-$9niEtABreZNdtN8^H~qQF#Vk0`RV8nPxJDY$>0s? zF6So8aP}dLoK|UqqhxJsZFGQ$)U(SD_Z0Zk4L+35=H2eCy)si;e%AI_J-;{WAy%4v zZ?@wAyNrE6^f4MR%@phQqf^Tfhfyeco(D9Cxoozuoj_*oAan+cEjZh;_IDHHy@mt! z0L;v#s6bFCWX4}{Op-`In!NaDTgaUiIOISaDB{_C;7kQS3rLN6Vgep47VsSuwe231 z6}mBabtL1@BoAQ17&vbubPRw8q<}3cDf6}vglT}TGDKXW6}n5W4T^O-1xzMU)U5*j z&CI-kliPn^G`Oz*Lr#ZGK?iY*x}7JJ599*q3SEB*nlC+uKyKQob!~q7FfwYYffpo*JdddiYNCs!Z+m-on6@ z0s&nQC8+QI;h(2Uj}^eS0HWG9vBwt!2oPZSUc-h5P&=>-pFe>ASvu;#$rg+O5CNj3 ziU*GcB-`WLtG4Kfo1o-31pk&98ut+f(+1qGoE(Bg0=zGwzT3K`a7 z*lrT3CnUb4q}26p$^WK2`zt2elD{<&s3iQLZgn8Azr+BfmH@=!CNikTqhcx)K=FD! z%;jy$IJ%jxEPl0(g}o5YQgkKb^<`GSagM7q+BXao*{e({I12(`XU=z?d-T-m#yOGm zeoVER%T?m7HVm46Of&q7+74ck#`2^hixd(d>?;FW)~(mBI-g0=m*eCyc8*JvCow(< zGkIBlJMY`rQ`-WHu75$Yrp}7F&9VA&7bTtLA(7hR9qt|O1%7}w4?Z+*@~o6Kp2+&> zZb=K9Q7VDJ%IdNH=+a%3bHgHV3-Mcmb$|q@+e1@Sx(;jruo#sdAArw9NgQBuYXHLm z&h9m=A3)(I-18gYkOQG2F@TE$rURx2z)~6FD+fp~my#b$>O$_UjyylO&X_>g$jL$f z*cKL>85kIxT~Z>WZO_2L#wyqpOM4YYqok5c`V1J8l_}SMZ&LpTLgTN<{>9q6S(QJmgR)~-9DwJdl*TVX|Dwc5zx-eob;$8>tffe?y+pxEp@k%)8`eygyN)Q&VciK4 zeSbYo{!qDt7MFoVHY3I&75pB{o5W6(Fz(ArPqAPYFJ^OOW^gFxfc2Hd4{@_M+hawd zDusoh%ry2qW{$$_(8uv7OFCqV<8SD5c3h86iN2v@4Ao*lnjC_b47@P9A$eGf$+`Gh zr8efI7Wicbd#414xD2-$LRj>MrJlHi5t5s>Iq?YqE>kLI$@vzc+mZOJs-VNtbjavA zFN)KFza%g}H2T6cVf_~W%-wpP6B?y&J5`~DwfB6?eGF!m_=#4nWK%Mt_X5fn`{j_0 z85t#xazdRe_vNx)l!#h{l>qVv)JN#RKT8AQP0kgtH{3*UkIw{%p~Fjfa+izq;SRC?96u^GhGkqTqD2rvT! zmy?}ZM4=q9*kVBOfEA2zXX+B(Ov=554wI)T_LMNRY zpRNVIo+kVlX56ghs%0TrEyQD{02eDsf2@O#D$^a}g1kB^l)b1~CNdysBb0k}`L);+ zv?#B(Jc%Y9l6DR3f@ONkOYf*%?C=;P(all~g=4ar98CsP+02*l{MNzWX8Bzcv@csS z=V><|dsS}id&Jt?*i)qb#e9wU3c$)4fIH3jPpWn4$?c3&3)C3uE6CqR3Wo*eyobR6 zd*04n1i~7|FpSE1vTrWc6hCSTX_k~}0bbOUM5>mP2ZRO`IXSg%DiB~KfLR|14U2)V zzQ+LB7$EWW-zmx@iE7{~lzd<4n0hGSz9j*VfPn+x;J;}L5CwqXS;{JSDO?ai!vWIB zie<9_0RF9Xu2DVgzwx+KX^ERsC4j)*tx$=lKC)(jU-W)9|7<)-)J03wkwuHORTlW@ zfU-miHrdOZJRb9_ls{N=;g&#tnV$N9EvuS$!V)9EUXh)Cg8ibnWDAj^Dd9(uO{ z)m~+)E5%1GD#&HgQW*1nJN-9o8KB3If^V^4n5`!pwJ?=bvY7z-t#rVb9^VW=32mmf zdzEr2O1^S(ibSgGJJ4im`Z1qzz&%{A3zd6-P7?{M!$4NC4vg&uSW!7!)36cz+d=kB zAa~XP<)^Y6aHluxTVcQtd4TH_m@0wLI%H6w2-*(17 z5`RQx=0RclN00+}epRyS6G$MKyi?tNqA}o0?c-ZCrDgtaXdD2&A*v{PNNg0;w&8 zNNV#3GMw-;F*4>dY-8lTSZawjvxu$#WBZstUczG|Ke(QvLI9Xnb&#n6HjY98N4LG- zfeAOZ1_r!|DtQmD6c$2FzW&%AS^>Z#ozTQW(N5WuNv?1`m~$cvONO2ZhKQ`|GoP7_ZqD_6 z-ln^hZo`nfl1!h=<*gT|+!a5o?I5UO5ldswz|1PdD<@Y{DqbMe0H{;<_DER|<;^se zod<`sz2BC6V6dT7MvVbk5%mxO_`<1-2{fR=fXBoiSb6sW`v4Axh2SQDUOl>if&;o;1xx@|`iPs@=uuk)j`dD*rxsB_4bWDV zrDwGNPjxLdv*-Rz@^}dlhp8UA$2U`%29W;WACCk!Y`~wB_Xsu{p`+vJ;?|BtiQff* zI4NWQHj-PyqQ0orq`R|VwGonb|3;kpBHf#xd+&x%NT&sLyQp4-&x(ie_NyIFS*sO& zvbt0FK5#Dh!hVCD*5+;1Zxwg$VEsSY6?KN(7uE~VQup0`8n?M_D{JdAZg|AX*Tp=vu5e6$OGlws(XGB(e5}&XPVK?_?@Xhf zP|3=diXNtMw_L|wMD{dAB=!%YQWFwCaexcS6UALibtAOhN52kCVDzd!222M3d%L&c7&IlP`c0zZm%|CS3X^sKHHfdUx#N z7nA%8Cf}|1COnJwZoMg)-jiFnU3@a^_%ZXH3rALcx^aU8^PBI^_O66jHyly@n!kJJ z_-W>~t*>7?obA6oa&I$uVK(e+=`7(F=@-SS&G6x-+Wix$o^ks>FKnFt`b#Tb z8iitc8!0S&7-dg#e#}`wOw7u*5N32hbf8)o+B0s*$yi3nBtmrC*x3riVxUneM+F(d zvATCr+4h_yp@`zdLr{5lSeh|Uv1n8ro1(nFI7;}W3^YziG?p}g(Y(sQA(8duF(?K! z$o4Vhye!>^F;ChUh3Jb>X{qqU$DD^{U4)R`f^l~vWH<$nYQj#& zoB?4T$Ue|zLKrh3s*GVNl?Cp^cov?!@Khcd+7wP+*@!fkY;cDrBUt!oqQm0{7AT1V z!NYMm;3#Cje4mc22Bh8b%p;7;BYD;Vj1Uy16Hn^Uy{dVVUbjHrUXneUfq^}ZWDzZj ztrU%drKK4aoGFxf@PHWg7zvjl!QiwU4jRtwFceE#%HbxM=FKZQ8XT82+1SB6G{T~9 zF`TBUx)NE1x(}{q(ZTE=3DJ|%B6+Z~PxQ{nvS^F3#R-C+#PQNPz^my=Z(+JJ;CP1) zIzG07tIkAhCCr`a3hHFxnK)S%VWCJFhCY%IF9!z{$;0v>1f2%f*^-blq|Av$;#1-Cxcdx zGYa%5MP5_lFw?IWKkx6`+&!)mde(4szOg2r*F$t_$S%&YD>XK-6aw6Ream_q$%LWP z5q71IeIhQ{&eRs07|kK30K2MF<=hq(I~vcV2ku}U(1B_vLK%hONp~e2-kvOTH{wpJ zj2#zzepIJGu|v1GAc3BfA%{CQ9H*$5loZPhb8asv)DhNcC+a+U&&JtZXq3uWsEuhx zizHq)2_gaYCBn(z;?IQKGpdVE-BH7TTC&DJ4ss`!Y;3k ziR9zrwC{J2O_OmLZN08=xHaq9XqlTx{9O*CyX|B0mT6h9I61l7V{r;P&xDfr4ioL$ z#T7K&`7}>f@j1i~*gtC1=1t(EcYdF(IZi*ERM2Yi%z^9V%j`t>vvy%li)ZrAvEuUD z76YdC^6)34uV8HM*~F9mJVH#eWjd8QFS8Z2H84inl{yk>I{JKv!kHNQoS(&ulJpBC zEXrI{-LJaFBQGUhr|om@7khS2oBL(Vuu!4vIlfEU$V%to1f$oS&Iw%HsoF`rsX9rO zI9*(9>^O%}?6?BgT|_ijeEayhONCZR9-=sLMad+i7mbh_@=*8gk}iTl-S>-lp}xA*J}lEQj>RC{IirEfb=eY)89 z<&7jINHt|kMl|%@)DNDwl+&yGZESk`D?*v&yHbWO57DF?_3Aa-9|6|vVOonfU^V+n zdY2gxfzw*g!357C{u>j{vd>p;8$R1NxL3A!E061tK-iKE*)-@bR|$c!oLu#*_rdI@ z1O1Qn!QOAj2Qvo7^zLwc{?)fJEXkdqUC{6R>bf|PGmir;9rMs>RiI5B$o!9LLC;=e zK)>_*nm=7k8=(~R@45dOs=pk#zc|1L`451uX`tYMb^3syb$g$>iguw%V8b@GuiwQ6 z_?-u0cJ1#a3;NwgDr`22%c(#Ih=5y-&_DDP^aJ7bmHR})M&2Ljf2^I9^BP zT;#nTD*gJuQRw}} zuM@v_EXk+(r~Vw^&2I`Mxs&q#QD+Bu8>Zm>hbwX%_m1(oW8<>0>8S@RBU%rb_aE@w z_x=jf{BMQITp%%-kmCUUTIKg6;}@i39y6=S=H2jRy=j~7G>(5nVe(8Qbh(~5dwbTg zd$TZ6;O5W%h~Wp&9p&KfnOeQhzs^0LSBqS?WgH7QUBfq`bDD2t`9fE#f-g?$^3z@P zRX^8%gtz&A=(@Cz|tDU0;1p;=IMpx;~p3*1NG2e_qC9 zlZUPUmJPj6RoczXhnCr`q$_qkM1%d0KQ>;E=-&x0?yh?s(mvhsW%I#DCBC?GXTijL z#Sf@~DQtz5#e~f0mQUv8Y!dc+*fL+tKGQt0=E6m|zp#1w7jZTtuk*B&h;xa0i!#qE2i17?A2Iyz(cy^5s)d)ISVT)8g zmVNFSf)@>Dp{Ls0JlR;KPj-nep;#R$lX6orm2q&)sbyJAHvq zr*${~d~DHV@NPoC!lIp?2s>p6X8a+jt1tH)f;UfTZjOJ_oDUg43Rs+IFk^~6f@ z3(rL|qoO~)F1I_eh)@13nkg-tEGCq>BkxwX8=y5f|A~byMDRnx7nQ!7_qk6kEi`?8 zRQOqEub56yqfS7q*4a8icARoZSv20(c<*vqQgQh}U;SRkKxXRtKKkZ{tg@w1=jO?Q z-BFujB{iP0z;2Z^ZE3^Kny=__bsG1SZ>fJ}%A7g_ z_nK?!+X%RDXJF-u$V_l^u1otneR$>hI;Rf8WU5Y-HLv zliB;lV0M#yv+dSiM5Fnt&GhphCt^27(7$e5t_X0KJgppBaHZ=IqCX=0Wy+4UxRfRw zZ1du~bl%ow9~8@(y<*N`wUTeInV}AWo-I>R9`mP8>!bF3>*iC@Tjs@>F=@X-%Q^j= zm=PMh{pal{G$A4{lebg9&bqxDb5a317c!U9J(4&l(n7m*DZ1d>^Tp3N%jjN1#yRFX z56{fRqI|KVx#d=n@ddovi>R(_$H4;577tjY^1Ns*tkcS=m`oT;UN0k}1d>&8>?cws zimhp*F$Q`*&GW{?{~xKmNB661{(-O2offVt-MCXv zbBmQ5vd~%UhxAeyy3|@(Q-WhM<+Uw^1$)J5Vl~=Xo9}5s6wbTl1Unt=^gL<}p@GJ? z?0>Lh{twAzU?X0Z71{Adqs?D z-*YFY@J>=e43-(e`EEIw%|$1HAPOPhyZ4Sw%0wo=tzM7H-T5w`EqKO^bX+o5+NPNF zS^hE<94sdj4aep3OX1YFA6|TO-l3*v2RZs^baZ+7W&OkzEwujXJO{tQhgPkYjRpF9aenP< zwalCrlLYuus&{(&geOxouOw#&io4~!S%a37Q?J_fm$6HjZv{gmd<||(bD;HrvQh8~ zemGfz!tthDq)N?QeYf4k&~lA5(~|JK!3% zR9QZ>{$j=dBLYV_RASmTAj3tg*s2bhdQ7uHI^n}|zki|Fjt+wTdjHLcF)67^j!C{l zjw!MERfnBc{Ar_j#SWhI?&xw+T!p@Y4_W-ZSq*m*a6tk_@!;xOawIExun9*Re>k<> zR$sliFyIl=TQR=-ux($xGNO0Ce=$nnT{7KpM6=BK+o`|4dk)A>Y=tQeZv31%c5W?V zf6f!4YG9rU)1KR}NWS`Jj4qe1g6`}71^YXfzE4X}Zbg2%6m;?OO_4Zk_FF!Po8y}?=^3fagOKQ)~x)5*wO%Y~tnWM9A`9p7CL*BdQY_H+5 zi4WafnNHSFWUw5{39)xGpdsscTDxSGOLH(1Qtc8g^Qq)ndl)-?^*sdzhL;14F-8?k z!^hKK<|eeuW(s*G=NwHqwOsHyHiQ8I(W%c}TSPk#^t&J)F*Z!U7ogl?CZE0qlFzgi*^5T`eQ9p zM@~MhQ$Jli+w}Dz$IpuvL3WitE70Ft;`sNLt4??wiI~1mhyOnGY25Hb=;FuA=KM~3 z7miq2eA=n8$zB!aFFDdF8H85NJ!LX&b-|uQmI@-ow0U&xjO#h&yGmQFHHBdYrea8i zk|}zqqxqe~=ydxX)WkzN_5Geu*Et@Y#M;i17x@8ZHmi0XyM*B;n}qD_kKNNs3EC=; z21d!_1RskW*#wyzRqLUCkw{LF*Rb9S|AF0-YQ>fhtBO<1_>qP!m6>b-OKGD}Jh)j-KLGN0w%NaHA8qQm~U;lT;lT$V+m$ZF1A=G`gZBQF4_}XEurNNJap9 z1~pPi$!>|uBM$nnD!rL+_(?CdJBADO7;UyQQ}w>-JGgcF`O2Lm=Y9y|_8ZK1-gx$W zyKTPu^TEi>38>@LUcPkA9{-!={fkF_P4{;lmm^Wax4s@jbYYfn|9WD0Bw6TN@x|A7 zQodgv`|)eS^X=S`rMjL+VNd2V6Qs(1%7U1`|H`2RX4H&+|3E7}a_U}UqWZ&}_Rxz% z0+gcVZ?}WaB&VzUNNZ2+$w{fm;0^SybB|$XYeyui98<9@=`8)<&OeJ(IqVZ3FOo8* zOFDm;p&(J1(JR7{|DMLHjx#*ADfb^MoBHVq=~+zis0hcTMZY^clx%cetX)2$&@Uz1 z%|_(&wKmtVHKOYpkn){1^+$lT<_?Q zFI%s}Y6i!HY?HpdTo3x;YId>g4s8fWlVVBk#|sY+U76gEDnXs=i~RcFjcD5;Bv#+) z+Wkvvpn$K_96!5AAP{{~tq|b8)|gh~hz8q!g*^y#v&ah*FYq{9>pe}OPB`eO zd|!J^OgKwSH~__{Zd`Kd!BLPr@Q(xf`kdK}v!RC%{+9rV;Ysm(eE=SjBo8{AEr8XF zGGh@JWW1(wg>_0s%Nq8iQ1nR3)KI07t3oNCscZWyHju<)BL^DVtIWZkBuqyg80->% zN?m&^UX=Dq$KwmRcLIrLXs(*2C9{S*++@^9vp^;gVh`JAN`VEFtvhHEJGGc}V5qB4 zbmQU7XR@zm-8GH^!4Y!9^2yrV>VfH_*Ep z+_ToFCHjPsUK`6GF~%g%Vj?R$hNhh|Y{ z{0m~iqO@gT@khcRuUgE#cc-C|@i?E|5<$0)) + sampleTable$n <- sum(lotteryParams$Svar=="641") sampleTable$SelectionMethod <- "Poisson" stopifnot(length(unique(stationTable$lotteri))==1) sampleTable$FrameDescription <- stationTable$lotteri[[1]] @@ -112,8 +109,14 @@ saveDesignTable <- function(filename, designTable){ lotteryParams <- parseLotteryFile("~/hi_sync/fiskerisampling/fangstprøvelotteri/lotterifiler/example2022.txt") lotteryParams <- lotteryParams[lotteryParams$lotteri=="Sild2022" & lotteryParams$HIF.stratum=="Nordsjo",] + bioData <- RstoxData::StoxBiotic(RstoxData::ReadBiotic("~/bioticsets/lotterieksempel/biotic_cruiseNumber_19-2022-20_Silde-sampling_2023-07-06T22.00.19.567Z.xml")) platformCodes <- readxl::read_excel("~/codelists/NMDeksempler/platform.xlsx", 2) designParams <- prepDesignParamFile(lotteryParams, bioData, platformCodes) saveDesignTable("inst/testresources/lotteryParameters/lotteryDesignNSH.txt", designParams) + +#remove potential vessel identifying information +bioData$Station$CatchPlatform <- as.character(NA) +CatchLotteryExample <- bioData +usethis::use_data(CatchLotteryExample, overwrite = T) diff --git a/inst/dataPrepScripts/prepHH_HT_comp.R b/inst/dataPrepScripts/prepHH_HT_comp.R new file mode 100644 index 00000000..ace984c9 --- /dev/null +++ b/inst/dataPrepScripts/prepHH_HT_comp.R @@ -0,0 +1,92 @@ +# +# Contains some quality assurance checks on lottery parameters. Package resources are not prepared here, but in prepDesignParameters.R +# + + +#' Reads catch lottery parameters from file as exported by lottery system Pr oct 2023. +#' @param filename path to file with lottery parameters +#' @return ~\code{\link[data.table]{data.table}} with colmns: +#' \describe{ +#' \item{aar}{year (integer)} +#' \item{RC}{Radio call signal of vessel (character)} +#' \item{SQ}{serialnumber for message (integer) identifies message given year, vessel and message recipient} +#' \item{TM}{ERS message type (character). HIA means departure message sent to IMR catch lottery (determines target species and therefor inclusion in lottery). HIF means catch message sent to IMR catch lottery, also encoding wheter sample is requested, along with lottery parameters} +#' \item{BD}{Date for start of fishing operation in UTC: YYYYMMDD} +#' \item{BT}{Tome for start of fishing operation in UTC: HHMM} +#' \item{Svar}{Code for whether sample is requested. Code 641 means that a sample is requested. 642 means sample is not requested. 643 means no (more) samples will be requested from this trip (until ned departure message).} +#' \item{i.prob}{Inclusion probability used in sample selection. Assigned also to samples not selected} +#' \item{lotteri}{Identifier for lottery, sampling frame, all inclusion probabilities are conditioned only on catch being in lottery} +#' \item{HIF.stratum}{Any stratification used in setting sampling parameters. Stratification is already factored into inclusion probabilities, some other column that goes into inclusion prob calculation depends on HIF.stratum (e.g. kapasitet)} +#' \item{kvote}{quota / expected total catch, used in calculation of inclusion probabilities} +#' \item{kapasitet}{sampling capacity / expected number of samples, used in calculation of inclusion probabilities} +#' \item{lotteri.kg}{reported catch in kg that was used in calculation of inclusion probability} +#' +#' The fields RC, SQ, TM, BC and BT are defined in the ERS regulation (https://lovdata.no/dokument/SF/forskrift/2009-12-21-1743). +#' } +#' @noRd +parseLotteryFile <- function(filename){ + lotteriparams <- data.table::fread(filename, sep = "\t", dec=".", header = T, colClasses = c("integer", "character", "integer", "character", "character", "character", "character", "numeric", "character", "character", "numeric", "numeric", "numeric")) + return(lotteriparams) +} + +est_total <- function(lotteryParams, lottery, stratum){ + + lott <- lotteryParams[lotteryParams$lotteri==lottery & lotteryParams$HIF.stratum==stratum] + stopifnot(length(unique(lott$kvoteT))==1) + + kapasitet <- NA + if (length(unique(lott$kapasitet))==1){ + kapasitet <- lott$kapasitet[1] + } + + totalFrame <- sum(lott$lotteri.kg[!is.na(lott$i.prob) & lott$i.prob>0]) + kvote <- lott$kvoteT[1]*1000 + sample <- lott[lott$Svar=="641",] + dekning <- sum(lott$lotteri.kg[!is.na(lott$i.prob)& lott$i.prob>0]) / kvote + HTtot <- sum(sample$lotteri.kg/sample$i.prob) + HHtot <- mean(sample$lotteri.kg/(sample$lotteri.kg/kvote))*(sum(lott$lotteri.kg, na.rm=T)/kvote) + + result <- data.table::data.table(lotteri=lottery, stratum=stratum, totalFrame=totalFrame, kvote=kvote, HTtot=HTtot, n=nrow(sample), RelErrHTFrame=(HTtot-totalFrame)/totalFrame) + return(result) +} + +lotteryStats <- function(lotteryParams){ + results <- NULL + for (lottery in unique(lotteryParams$lotteri)){ + for (stratum in unique(lotteryParams$HIF.stratum[lotteryParams$lotteri==lottery])){ + stats <- est_total(lotteryParams, lottery, stratum) + results <- rbind(stats, results) + } + } + results <- results[order(results$n, decreasing=T),] + return(results) +} + +simulate <- function(lotteryParams, lottery, stratum, iterations=1000){ + frame <- lotteryParams[lotteryParams$lotteri==lottery & lotteryParams$HIF.stratum==stratum & !is.na(lotteryParams$i.prob) & lotteryParams$i.prob>0, ] + + tab <- NULL + for (it in 1:iterations){ + for (i in 1:nrow(frame)){ + p <- frame$i.prob[i] + if (sample(c(T,F), 1, replace = TRUE,prob=c(p,1-p))){ + frame$Svar[i] <- "641" + } + else{ + frame$Svar[i] <- "642" + } + } + res <- est_total(frame, lottery, stratum) + tab <- rbind(tab, res) + } + + tab$iteration <- 1:iterations + return(tab) +} + +lotteryParams <- parseLotteryFile("~/hi_sync/fiskerisampling/fangstprøvelotteri/lotterifiler/example2022.txt") +lotteryStats <- lotteryStats(lotteryParams) + +#check if the perfomranse for Norsjo is extreme: +nssim<-simulate(lotteryParams, "Sild2022", "Nordsjo") +print(paste("Percentile Sild2002, Nordsjo: ", sum(nssim$RelErrHTFrame<(-.191))*100/nrow(nssim), "%")) diff --git a/inst/testresources/lotteryParameters/lotteryDesignNSH.txt b/inst/testresources/lotteryParameters/lotteryDesignNSH.txt index 0abffca8..28faae08 100644 --- a/inst/testresources/lotteryParameters/lotteryDesignNSH.txt +++ b/inst/testresources/lotteryParameters/lotteryDesignNSH.txt @@ -1,65 +1,65 @@ Stratum N n SelectionMethod FrameDescription Order SamplingUnitId InclusionProbability HTsamplingWeight SelectionProbability HHsamplingWeight SelectionDescription -Nordsjo 811 110 Poisson Sild2022 38401 0.213196915139625 0.00894691694298864 0.00217741935483871 0.00849720338523697 -Nordsjo 811 110 Poisson Sild2022 38433 0.247412405326388 0.00770961783318542 0.00258064516129032 0.00716951535629369 -Nordsjo 811 110 Poisson Sild2022 38440 0.1700984890555 0.0112138273705256 0.00169354838709677 0.010924975781019 -Nordsjo 811 110 Poisson Sild2022 38445 0.108975250145035 0.0175035624118044 0.00104838709677419 0.0176480378001076 -Nordsjo 811 110 Poisson Sild2022 38438 0.641175307024414 0.00297493535911075 0.0092741935483871 0.00199499557740346 -Nordsjo 811 110 Poisson Sild2022 38441 0.233906828422691 0.00815476446377476 0.00241935483870968 0.00764748304671327 -Nordsjo 811 110 Poisson Sild2022 38448 0.267229615158194 0.00713788810842111 0.00282258064516129 0.00655498546861138 -Nordsjo 811 110 Poisson Sild2022 38403 0.0767615148687669 0.0248491069452791 0.000725806451612903 0.0254916101557109 -Nordsjo 811 110 Poisson Sild2022 38435 0.143125007547176 0.0133271964483698 0.00140322580645161 0.0131853155977815 -Nordsjo 811 110 Poisson Sild2022 38436 0.147680162555427 0.012916122648089 0.00145161290322581 0.0127458050778555 -Nordsjo 811 110 Poisson Sild2022 38443 0.0182839435244132 0.104324052943433 0.000167741935483871 0.110300236250672 -Nordsjo 811 110 Poisson Sild2022 38446 0.267229615158194 0.00713788810842111 0.00282258064516129 0.00655498546861138 -Nordsjo 811 110 Poisson Sild2022 38447 0.247412405326388 0.00770961783318542 0.00258064516129032 0.00716951535629369 -Nordsjo 811 110 Poisson Sild2022 38402 0.108975250145035 0.0175035624118044 0.00104838709677419 0.0176480378001076 -Nordsjo 811 110 Poisson Sild2022 38434 0.220161129326957 0.00866390492312064 0.00225806451612903 0.00819373183576422 -Nordsjo 811 110 Poisson Sild2022 38442 0.068529393639208 0.0278341160042645 0.000645161290322581 0.0286780614251748 -Nordsjo 811 110 Poisson Sild2022 38444 0.147680162555427 0.012916122648089 0.00145161290322581 0.0127458050778555 -Nordsjo 811 110 Poisson Sild2022 38437 0.147680162555427 0.012916122648089 0.00145161290322581 0.0127458050778555 -Nordsjo 811 110 Poisson Sild2022 38439 0.311481476435603 0.0061238154964569 0.00338709677419355 0.00546248789050948 -Nordsjo 811 110 Poisson Sild2022 38431 0.0889747087440887 0.0214381718038762 0.000846774193548387 0.0218499515620379 -Nordsjo 811 110 Poisson Sild2022 38415 0.136247006433322 0.0139999779972348 0.00133064516129032 0.0139045146303878 -Nordsjo 811 110 Poisson Sild2022 38406 0.097027756531011 0.0196588601081999 0.00092741935483871 0.0199499557740346 -Nordsjo 811 110 Poisson Sild2022 38417 0.101027788104015 0.0188804993957877 0.000967741935483871 0.0191187076167832 -Nordsjo 811 110 Poisson Sild2022 38419 0.0849215381190344 0.0224613818179075 0.000806451612903226 0.0229424491401398 -Nordsjo 811 110 Poisson Sild2022 38408 0.184718409696934 0.0103262858065158 0.00185483870967742 0.00997497788701731 -Nordsjo 811 110 Poisson Sild2022 38412 0.216686691440281 0.00880282531233035 0.00221774193548387 0.00834270877823266 -Nordsjo 811 110 Poisson Sild2022 38422 0.0518463272604551 0.0367905537970575 0.000483870967741936 0.0382374152335664 -Nordsjo 811 110 Poisson Sild2022 38432 0.162691408611336 0.0117243750517427 0.00161290322580645 0.0114712245700699 -Nordsjo 811 110 Poisson Sild2022 38429 0.338519898362406 0.00563469119978754 0.00375 0.0049338600301376 -Nordsjo 811 110 Poisson Sild2022 38404 0.162691408611336 0.0117243750517427 0.00161290322580645 0.0114712245700699 -Nordsjo 811 110 Poisson Sild2022 38413 0.097027756531011 0.0196588601081999 0.00092741935483871 0.0199499557740346 -Nordsjo 811 110 Poisson Sild2022 38414 0.240689370659604 0.00792496605491253 0.0025 0.00740079004520639 -Nordsjo 811 110 Poisson Sild2022 38405 0.267229615158194 0.00713788810842111 0.00282258064516129 0.00655498546861138 -Nordsjo 811 110 Poisson Sild2022 38411 0.458713264018625 0.00415827324360556 0.00556451612903226 0.00332499262900577 -Nordsjo 811 110 Poisson Sild2022 38416 0.116853084910627 0.0163235321832924 0.00112903225806452 0.0163874636715284 -Nordsjo 811 110 Poisson Sild2022 38427 0.589628784636336 0.00323501013172564 0.00806451612903226 0.00229424491401398 -Nordsjo 811 110 Poisson Sild2022 38409 0.162691408611336 0.0117243750517427 0.00161290322580645 0.0114712245700699 -Nordsjo 811 110 Poisson Sild2022 38424 0.347296770955766 0.00549229146878123 0.00387096774193548 0.0047796769041958 -Nordsjo 811 110 Poisson Sild2022 38421 0.116853084910627 0.0163235321832924 0.00112903225806452 0.0163874636715284 -Nordsjo 811 110 Poisson Sild2022 38430 0.116853084910627 0.0163235321832924 0.00112903225806452 0.0163874636715284 -Nordsjo 811 110 Poisson Sild2022 38426 0.233906828422691 0.00815476446377476 0.00241935483870968 0.00764748304671327 -Nordsjo 811 110 Poisson Sild2022 38407 0.068529393639208 0.0278341160042645 0.000645161290322581 0.0286780614251748 -Nordsjo 811 110 Poisson Sild2022 38418 0.229685923270579 0.00830462339656995 0.00236952419354839 0.00780830816726504 -Nordsjo 811 110 Poisson Sild2022 38428 0.143125007547176 0.0133271964483698 0.00140322580645161 0.0131853155977815 -Nordsjo 811 110 Poisson Sild2022 38420 0.0425447856855458 0.044834051024581 0.000395161290322581 0.0468213247757956 -Nordsjo 811 110 Poisson Sild2022 38423 0.463520590962966 0.00411514640221889 0.00564516129032258 0.00327749273430569 -Nordsjo 811 110 Poisson Sild2022 38410 0.267034025829916 0.00714311626141064 0.00282016129032258 0.00656060884762363 -Nordsjo 811 110 Poisson Sild2022 38425 0.0930100890906527 0.0205080449971025 0.000887096774193548 0.0208567719455817 -Nordsjo 811 110 Poisson Sild2022 0.199083079634323 0.00958120145498676 0.00201612903225806 0.00917697965605593 -Nordsjo 811 110 Poisson Sild2022 0.1700984890555 0.0112138273705256 0.00169354838709677 0.010924975781019 -Nordsjo 811 110 Poisson Sild2022 0.1700984890555 0.0112138273705256 0.00169354838709677 0.010924975781019 -Nordsjo 811 110 Poisson Sild2022 0.174511532419619 0.0109302523782158 0.00174193548387097 0.0106215042315462 -Nordsjo 811 110 Poisson Sild2022 0.202634883401628 0.00941326123239596 0.00205645161290323 0.0089970388784862 -Nordsjo 811 110 Poisson Sild2022 0.068529393639208 0.0278341160042645 0.000645161290322581 0.0286780614251748 -Nordsjo 811 110 Poisson Sild2022 0.132402285228299 0.0144065118586634 0.00129032258064516 0.0143390307125874 -Nordsjo 811 110 Poisson Sild2022 0.101027788104015 0.0188804993957877 0.000967741935483871 0.0191187076167832 -Nordsjo 811 110 Poisson Sild2022 0.0808504995083754 0.0235923723892148 0.000766129032258065 0.0241499464633051 -Nordsjo 811 110 Poisson Sild2022 0.140074844132902 0.0136173993557747 0.00137096774193548 0.0134955583177293 -Nordsjo 811 110 Poisson Sild2022 0.0552064585338686 0.0345513032879191 0.000516129032258065 0.0358475767814685 -Nordsjo 811 110 Poisson Sild2022 0.112922833036567 0.0168916687702827 0.00108870967741935 0.0169944067704739 -Nordsjo 811 110 Poisson Sild2022 0.386753608156008 0.00493196456873441 0.00443548387096774 0.00417135438911633 -Nordsjo 811 110 Poisson Sild2022 0.413480846839946 0.00461316432631274 0.00483870967741936 0.00382374152335664 -Nordsjo 811 110 Poisson Sild2022 0.247412405326388 0.00770961783318542 0.00258064516129032 0.00716951535629369 -Nordsjo 811 110 Poisson Sild2022 0.0518463272604551 0.0367905537970575 0.000483870967741936 0.0382374152335664 +Nordsjo 790 71 Poisson Sild2022 38401 0.213196915139625 0.00894691694298864 0.00217741935483871 0.00849720338523697 +Nordsjo 790 71 Poisson Sild2022 38433 0.247412405326388 0.00770961783318542 0.00258064516129032 0.00716951535629369 +Nordsjo 790 71 Poisson Sild2022 38440 0.1700984890555 0.0112138273705256 0.00169354838709677 0.010924975781019 +Nordsjo 790 71 Poisson Sild2022 38445 0.108975250145035 0.0175035624118044 0.00104838709677419 0.0176480378001076 +Nordsjo 790 71 Poisson Sild2022 38438 0.641175307024414 0.00297493535911075 0.0092741935483871 0.00199499557740346 +Nordsjo 790 71 Poisson Sild2022 38441 0.233906828422691 0.00815476446377476 0.00241935483870968 0.00764748304671327 +Nordsjo 790 71 Poisson Sild2022 38448 0.267229615158194 0.00713788810842111 0.00282258064516129 0.00655498546861138 +Nordsjo 790 71 Poisson Sild2022 38403 0.0767615148687669 0.0248491069452791 0.000725806451612903 0.0254916101557109 +Nordsjo 790 71 Poisson Sild2022 38435 0.143125007547176 0.0133271964483698 0.00140322580645161 0.0131853155977815 +Nordsjo 790 71 Poisson Sild2022 38436 0.147680162555427 0.012916122648089 0.00145161290322581 0.0127458050778555 +Nordsjo 790 71 Poisson Sild2022 38443 0.0182839435244132 0.104324052943433 0.000167741935483871 0.110300236250672 +Nordsjo 790 71 Poisson Sild2022 38446 0.267229615158194 0.00713788810842111 0.00282258064516129 0.00655498546861138 +Nordsjo 790 71 Poisson Sild2022 38447 0.247412405326388 0.00770961783318542 0.00258064516129032 0.00716951535629369 +Nordsjo 790 71 Poisson Sild2022 38402 0.108975250145035 0.0175035624118044 0.00104838709677419 0.0176480378001076 +Nordsjo 790 71 Poisson Sild2022 38434 0.220161129326957 0.00866390492312064 0.00225806451612903 0.00819373183576422 +Nordsjo 790 71 Poisson Sild2022 38442 0.068529393639208 0.0278341160042645 0.000645161290322581 0.0286780614251748 +Nordsjo 790 71 Poisson Sild2022 38444 0.147680162555427 0.012916122648089 0.00145161290322581 0.0127458050778555 +Nordsjo 790 71 Poisson Sild2022 38437 0.147680162555427 0.012916122648089 0.00145161290322581 0.0127458050778555 +Nordsjo 790 71 Poisson Sild2022 38439 0.311481476435603 0.0061238154964569 0.00338709677419355 0.00546248789050948 +Nordsjo 790 71 Poisson Sild2022 38431 0.0889747087440887 0.0214381718038762 0.000846774193548387 0.0218499515620379 +Nordsjo 790 71 Poisson Sild2022 38415 0.136247006433322 0.0139999779972348 0.00133064516129032 0.0139045146303878 +Nordsjo 790 71 Poisson Sild2022 38406 0.097027756531011 0.0196588601081999 0.00092741935483871 0.0199499557740346 +Nordsjo 790 71 Poisson Sild2022 38417 0.101027788104015 0.0188804993957877 0.000967741935483871 0.0191187076167832 +Nordsjo 790 71 Poisson Sild2022 38419 0.0849215381190344 0.0224613818179075 0.000806451612903226 0.0229424491401398 +Nordsjo 790 71 Poisson Sild2022 38408 0.184718409696934 0.0103262858065158 0.00185483870967742 0.00997497788701731 +Nordsjo 790 71 Poisson Sild2022 38412 0.216686691440281 0.00880282531233035 0.00221774193548387 0.00834270877823266 +Nordsjo 790 71 Poisson Sild2022 38422 0.0518463272604551 0.0367905537970575 0.000483870967741936 0.0382374152335664 +Nordsjo 790 71 Poisson Sild2022 38432 0.162691408611336 0.0117243750517427 0.00161290322580645 0.0114712245700699 +Nordsjo 790 71 Poisson Sild2022 38429 0.338519898362406 0.00563469119978754 0.00375 0.0049338600301376 +Nordsjo 790 71 Poisson Sild2022 38404 0.162691408611336 0.0117243750517427 0.00161290322580645 0.0114712245700699 +Nordsjo 790 71 Poisson Sild2022 38413 0.097027756531011 0.0196588601081999 0.00092741935483871 0.0199499557740346 +Nordsjo 790 71 Poisson Sild2022 38414 0.240689370659604 0.00792496605491253 0.0025 0.00740079004520639 +Nordsjo 790 71 Poisson Sild2022 38405 0.267229615158194 0.00713788810842111 0.00282258064516129 0.00655498546861138 +Nordsjo 790 71 Poisson Sild2022 38411 0.458713264018625 0.00415827324360556 0.00556451612903226 0.00332499262900577 +Nordsjo 790 71 Poisson Sild2022 38416 0.116853084910627 0.0163235321832924 0.00112903225806452 0.0163874636715284 +Nordsjo 790 71 Poisson Sild2022 38427 0.589628784636336 0.00323501013172564 0.00806451612903226 0.00229424491401398 +Nordsjo 790 71 Poisson Sild2022 38409 0.162691408611336 0.0117243750517427 0.00161290322580645 0.0114712245700699 +Nordsjo 790 71 Poisson Sild2022 38424 0.347296770955766 0.00549229146878123 0.00387096774193548 0.0047796769041958 +Nordsjo 790 71 Poisson Sild2022 38421 0.116853084910627 0.0163235321832924 0.00112903225806452 0.0163874636715284 +Nordsjo 790 71 Poisson Sild2022 38430 0.116853084910627 0.0163235321832924 0.00112903225806452 0.0163874636715284 +Nordsjo 790 71 Poisson Sild2022 38426 0.233906828422691 0.00815476446377476 0.00241935483870968 0.00764748304671327 +Nordsjo 790 71 Poisson Sild2022 38407 0.068529393639208 0.0278341160042645 0.000645161290322581 0.0286780614251748 +Nordsjo 790 71 Poisson Sild2022 38418 0.229685923270579 0.00830462339656995 0.00236952419354839 0.00780830816726504 +Nordsjo 790 71 Poisson Sild2022 38428 0.143125007547176 0.0133271964483698 0.00140322580645161 0.0131853155977815 +Nordsjo 790 71 Poisson Sild2022 38420 0.0425447856855458 0.044834051024581 0.000395161290322581 0.0468213247757956 +Nordsjo 790 71 Poisson Sild2022 38423 0.463520590962966 0.00411514640221889 0.00564516129032258 0.00327749273430569 +Nordsjo 790 71 Poisson Sild2022 38410 0.267034025829916 0.00714311626141064 0.00282016129032258 0.00656060884762363 +Nordsjo 790 71 Poisson Sild2022 38425 0.0930100890906527 0.0205080449971025 0.000887096774193548 0.0208567719455817 +Nordsjo 790 71 Poisson Sild2022 0.199083079634323 0.00958120145498676 0.00201612903225806 0.00917697965605593 +Nordsjo 790 71 Poisson Sild2022 0.1700984890555 0.0112138273705256 0.00169354838709677 0.010924975781019 +Nordsjo 790 71 Poisson Sild2022 0.1700984890555 0.0112138273705256 0.00169354838709677 0.010924975781019 +Nordsjo 790 71 Poisson Sild2022 0.174511532419619 0.0109302523782158 0.00174193548387097 0.0106215042315462 +Nordsjo 790 71 Poisson Sild2022 0.202634883401628 0.00941326123239596 0.00205645161290323 0.0089970388784862 +Nordsjo 790 71 Poisson Sild2022 0.068529393639208 0.0278341160042645 0.000645161290322581 0.0286780614251748 +Nordsjo 790 71 Poisson Sild2022 0.132402285228299 0.0144065118586634 0.00129032258064516 0.0143390307125874 +Nordsjo 790 71 Poisson Sild2022 0.101027788104015 0.0188804993957877 0.000967741935483871 0.0191187076167832 +Nordsjo 790 71 Poisson Sild2022 0.0808504995083754 0.0235923723892148 0.000766129032258065 0.0241499464633051 +Nordsjo 790 71 Poisson Sild2022 0.140074844132902 0.0136173993557747 0.00137096774193548 0.0134955583177293 +Nordsjo 790 71 Poisson Sild2022 0.0552064585338686 0.0345513032879191 0.000516129032258065 0.0358475767814685 +Nordsjo 790 71 Poisson Sild2022 0.112922833036567 0.0168916687702827 0.00108870967741935 0.0169944067704739 +Nordsjo 790 71 Poisson Sild2022 0.386753608156008 0.00493196456873441 0.00443548387096774 0.00417135438911633 +Nordsjo 790 71 Poisson Sild2022 0.413480846839946 0.00461316432631274 0.00483870967741936 0.00382374152335664 +Nordsjo 790 71 Poisson Sild2022 0.247412405326388 0.00770961783318542 0.00258064516129032 0.00716951535629369 +Nordsjo 790 71 Poisson Sild2022 0.0518463272604551 0.0367905537970575 0.000483870967741936 0.0382374152335664 diff --git a/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R b/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R index b45e1002..23afc99d 100644 --- a/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R +++ b/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R @@ -1,8 +1,8 @@ designParamsFile <- system.file("testresources", "lotteryParameters", "lotteryDesignNSH.txt", package="RstoxFDA") #regular read: -designParams <- RstoxFDA::DefineMultiStageSamplingParameters(NULL, "ResourceFile", designParamsFile) -expect_true(RstoxFDA:::is.MultiStageSamplingParametersData(designParams)) +designParams <- RstoxFDA::DefinePSUSamplingParameters(NULL, "ResourceFile", designParamsFile) +expect_true(RstoxFDA:::is.PSUSamplingParametersData(designParams)) expect_equal(nrow(designParams$SelectionTable), 64) expect_equal(nrow(designParams$SampleTable), 1) expect_equal(ncol(designParams$StratificationVariables), 1) @@ -10,10 +10,21 @@ expect_equal(nrow(designParams$StratificationVariables), 1) expect_equal(sum(designParams$SelectionTable$HTsamplingWeight), 1) expect_equal(sum(designParams$SelectionTable$HHsamplingWeight), 1) +# test assignment to data +expect_error(RstoxFDA::AssignPSUSamplingParameters(designParams, RstoxFDA::CatchLotteryExample, "MissingAtRandom")) +designParamsCorrected <- RstoxFDA::AssignPSUSamplingParameters(designParams, RstoxFDA::CatchLotteryExample, "HaulKey", "MissingAtRandom") +expect_equal(sum(designParamsCorrected$SelectionTable$HTsamplingWeight),1) +expect_equal(sum(designParamsCorrected$SelectionTable$HHsamplingWeight),1) +#HT should be approximately the same after non-response correction +expect_true(abs((sum(1/designParamsCorrected$SelectionTable$InclusionProbability)-sum(1/designParams$SelectionTable$InclusionProbability))/sum(1/designParamsCorrected$SelectionTable$InclusionProbability))<0.1) +#HH should be apprxoimately the same after non-response correction +expect_true(abs((mean(1/designParamsCorrected$SelectionTable$InclusionProbability)-mean(1/designParams$SelectionTable$InclusionProbability))/sum(1/designParamsCorrected$SelectionTable$InclusionProbability))<0.1) + + #define from data suppressWarnings(StoxBioticData <- RstoxData::StoxBiotic(RstoxData::ReadBiotic(system.file("testresources", "biotic_v3_example.xml", package="RstoxFDA")))) -designParamsSB <- RstoxFDA::DefineMultiStageSamplingParameters(NULL, "AdHocStoxBiotic", StoxBioticData=StoxBioticData, SamplingUnitId = "Individual", StratificationColumns = c("SpeciesCategoryKey")) -expect_true(RstoxFDA:::is.MultiStageSamplingParametersData(designParamsSB)) +designParamsSB <- RstoxFDA::DefinePSUSamplingParameters(NULL, "AdHocStoxBiotic", StoxBioticData=StoxBioticData, SamplingUnitId = "Individual", StratificationColumns = c("SpeciesCategoryKey")) +expect_true(RstoxFDA:::is.PSUSamplingParametersData(designParamsSB)) #compare names of output with stratification variables to output without expect_true(all(names(designParamsSB$SampleTable) == names(designParams$SampleTable))) diff --git a/man/AssignPSUSamplingParameters.Rd b/man/AssignPSUSamplingParameters.Rd new file mode 100644 index 00000000..1c8cfd3b --- /dev/null +++ b/man/AssignPSUSamplingParameters.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/StoxAnalyticalBaselineFunctions.R +\name{AssignPSUSamplingParameters} +\alias{AssignPSUSamplingParameters} +\title{Assign PSU Sampling Parameters} +\usage{ +AssignPSUSamplingParameters( + PSUSamplingParametersData, + StoxBioticData, + SamplingUnitId, + DefinitionMethod = c("MissingAtRandom") +) +} +\arguments{ +\item{PSUSamplingParametersData}{~\code{\link[RstoxFDA]{PSUSamplingParametersData}} with sampling parameters for PSU selection} + +\item{StoxBioticData}{~\code{\link[RstoxData]{StoxBioticData}} with data records for responding PSUs.} + +\item{SamplingUnitId}{name of Variable in ~\code{\link[RstoxData]{StoxBioticData}} that represent records of sampled PSUs} + +\item{DefinitionMethod}{The method for dealing with non-response, e.g. 'MissingAtRandon'} +} +\value{ +~\code{\link[RstoxFDA]{PSUSamplingParametersData}} +} +\description{ +Assigns data records to PSU Sampling Parameters and provides non-response adjustments for +selected PSUs that was not sampled. +} +\details{ +Some sampling parameters provided in ~\code{\link[RstoxFDA]{PSUSamplingParametersData}} are only +interpretable for sampling with complete response. This function adjusts these parameters, removes non-respondents from the +~\code{\link[RstoxFDA]{PSUSamplingParametersData}}, and checks that all responding PSUs are present in data records. + +If any respondants (rows of the SelectionTable of PSUSamplingParametersData that does not have NA for SamplingUnitId) are not +found in 'SamplingUnitId', execution halts with an error. + +Response after selection can generally be considered a process that modifies the sampling parameters that are set by design. +Typically sample size, InclusionProbabilities and normalized SamplingWeights need to be adjusted as non-respondents are removed, +since these are depend of the entire sample, not just the sampling unit they are assigned to. +SelectionProbabilites are by definition set for a single draw of a single sampling unit from the population and are valid even +when response is not complete. + +Treatment of non-response requires some assumption about systematic differences between respondents and non-respondents. +These assumptions are specified via the argument 'DefinitionMethod' and the following options are available: +\describe{ +\item{MissingAtRandom}{A response propensity is estimated for each stratum as the fraction of the sample resonding, and sample size (n) and InclusionProbability are adjusted by multiplying with this propensity. Sampling weights are adjusted by dividing them with their sum over repsondents in a stratum.} +} +} +\concept{Analytical estimation} +\concept{StoX-functions} diff --git a/man/CatchLotteryExample.Rd b/man/CatchLotteryExample.Rd new file mode 100644 index 00000000..1050927f --- /dev/null +++ b/man/CatchLotteryExample.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CatchLotteryExample-datadoc.R +\docType{data} +\name{CatchLotteryExample} +\alias{CatchLotteryExample} +\title{Data from the Norwegian catch lottery sampling program.} +\format{ +\code{\link[RstoxData]{StoxBioticData}} +} +\usage{ +data(CatchLotteryExample) +} +\description{ +Example of data formatted as \code{\link[RstoxData]{StoxBioticData}} +Hauls are primary sampling units, selected by Poission sampling with selection probabilities proportional to the catch size. +The data contain North Sea herring samples from catch lottery sampling in 2022. +} +\examples{ + RstoxFDA::plotArea(RstoxFDA::CatchLotteryExample$Station, + areaDef=RstoxFDA::mainareaFdir2018, + latCol = "Latitude", + lonCol = "Longitude", + areaLabels = TRUE) +} +\concept{Analytical estimation} +\keyword{datasets} diff --git a/man/DefineMultiStageSamplingParameters.Rd b/man/DefinePSUSamplingParameters.Rd similarity index 56% rename from man/DefineMultiStageSamplingParameters.Rd rename to man/DefinePSUSamplingParameters.Rd index 1570e303..b4cb1911 100644 --- a/man/DefineMultiStageSamplingParameters.Rd +++ b/man/DefinePSUSamplingParameters.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/StoxAnalyticalBaselineFunctions.R -\name{DefineMultiStageSamplingParameters} -\alias{DefineMultiStageSamplingParameters} -\title{Define Multi-Stage Sampling Design Parameters} +\name{DefinePSUSamplingParameters} +\alias{DefinePSUSamplingParameters} +\title{Define PSU Sampling Design Parameters} \usage{ -DefineMultiStageSamplingParameters( +DefinePSUSamplingParameters( processData, DefinitionMethod = c("ResourceFile", "AdHocStoxBiotic"), FileName = character(), @@ -15,7 +15,7 @@ DefineMultiStageSamplingParameters( ) } \arguments{ -\item{processData}{\code{\link[RstoxFDA]{MultiStageSamplingParametersData}} as returned from this function.} +\item{processData}{\code{\link[RstoxFDA]{PSUSamplingParametersData}} as returned from this function.} \item{DefinitionMethod}{'ResourceFile' or 'AdHocStoxBiotic'} @@ -23,29 +23,29 @@ DefineMultiStageSamplingParameters( \item{StoxBioticData}{\code{\link[RstoxData]{StoxBioticData}} Sample data to construct design parameters from} -\item{SamplingUnitId}{name of column in 'StoxBioticData' that identifies the sampling unit the design is constructed for.} +\item{SamplingUnitId}{name of column in 'StoxBioticData' that identifies the Primary Sampling Unit the design is constructed for.} \item{StratificationColumns}{name of any column (at the same table as 'SamplingUnitId') that are to be used to define Strata for sampling.} \item{UseProcessData}{If TRUE, bypasses execution of function and returns existing 'processData'} } \value{ -\code{\link[RstoxFDA]{MultiStageSamplingParametersData}} +\code{\link[RstoxFDA]{PSUSamplingParametersData}} } \description{ -Define sampling design parameters for intermediate sampling units in multi-stage sampling. +Define sampling parameters for Primary Sampling Units in multi-stage sampling. } \details{ -The DefintionMethod 'ResourceFile' reads design parameters from a tab delimited file with headers corresponding to those listed in -\code{\link[RstoxFDA]{MultiStageSamplingParametersData}}. The data is provided as one table, so that the information in 'sampleTable' is repeated for each entry in 'selectionTable'. -Any columns not named in \code{\link[RstoxFDA]{MultiStageSamplingParametersData}} are assumed to be stratification variables. -The conditions listed for the variables in \code{\link[RstoxFDA]{MultiStageSamplingParametersData}} are checked upon reading the data, and +The DefintionMethod 'ResourceFile' reads sampling parameters from a tab delimited file with headers corresponding to those listed in +\code{\link[RstoxFDA]{PSUSamplingParametersData}}. The data is provided as one table, so that the information in 'sampleTable' is repeated for each entry in 'selectionTable'. +Any columns not named in \code{\link[RstoxFDA]{PSUSamplingParametersData}} are assumed to be stratification variables. +The conditions listed for the variables in \code{\link[RstoxFDA]{PSUSamplingParametersData}} are checked upon reading the data, and execution halts with error if any are violated. The DefinitionMethod 'AdHocStoxBiotic' constructs Sampling Design Parameters from data, assuming equal probability sampling with fixed sample size, selection without replacement and complete response. This is a reasonable approximation if within-strata sampling is approximately simple random selections, -non-response is believed to be at random. +and non-response is believed to be at random. } \concept{Analytical estimation} \concept{StoX-functions} diff --git a/man/IndividualSamplingParametersData.Rd b/man/IndividualSamplingParametersData.Rd index 00e7cfd0..e0e9a416 100644 --- a/man/IndividualSamplingParametersData.Rd +++ b/man/IndividualSamplingParametersData.Rd @@ -28,10 +28,10 @@ Encodes information about the selection of a sub-sample of observations from ind \item{Stratum}{Mandatory, chr: Identifies the within sample-stratum the individual is taken from.} \item{Order}{Optional, num: Identifes the order of seleciton. May be necessary for inference when selections are not independent (e.g. FSWOR)} \item{IndividualId}{Optional, chr: Identifes individual. NA encodes non-response / observation failure} - \item{HTsamplingWeight}{Optional, num: The relative inclusion probability of the individual} \item{InclusionProbability}{Optional, num: The inclusion probability of the individual} + \item{HTsamplingWeight}{Optional, num: The normalized Horvitz-Thompson sampling weight of the individual} \item{SelectionProbability}{Optional, num: The selection probability of the individual} - \item{HHsamplingWeight}{Optional, num: The relative selection probability of the individual} + \item{HHsamplingWeight}{Optional, num: The normalized Hansen-Hurwitz sampling weight of the individual} \item{SelectionDescription}{Optional, chr: Free text description of sampling unit.} } diff --git a/man/MultiStageSamplingParametersData.Rd b/man/PSUSamplingParametersData.Rd similarity index 77% rename from man/MultiStageSamplingParametersData.Rd rename to man/PSUSamplingParametersData.Rd index ca73641b..af53d923 100644 --- a/man/MultiStageSamplingParametersData.Rd +++ b/man/PSUSamplingParametersData.Rd @@ -1,34 +1,34 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/StoxDataTypes.R -\name{MultiStageSamplingParametersData} -\alias{MultiStageSamplingParametersData} -\title{Multi-stage Sampling Design Parameters} +\name{PSUSamplingParametersData} +\alias{PSUSamplingParametersData} +\title{PSU Sampling Design Parameters} \description{ -Sampling parameters for selection of a sampling unit in a multi-stage sampling design +Sampling parameters for selection of a Primary Sampling Unit } \details{ -Encodes information about the selection of an intermediate sampling unit in multi-stage sampling, used in analytical design based estimation. +Encodes information about the selection of Primary Sampling Units in multi-stage sampling, used in analytical design based estimation. Information is encoded in three tables. The SampleTable encodes information about the sample of sampling units: \describe{ \item{Stratum}{Mandatory, chr: Identifies the stratum the sample is taken from. Treat unstratified sample as single-stratum sampling (provide only one stratum.} - \item{N}{Optional, num: The total number of selection units in Stratum} - \item{n}{Optional, num: The number of selection units selected from the Stratum} + \item{N}{Optional, num: The total number of PSUs in Stratum (total available for selection, not total selected)} + \item{n}{Optional, num: The number of PSUs selected from the Stratum} \item{SelectionMethod}{Mandatory, chr: 'Poission', 'FSWR' or 'FSWOR'. The manner of selection for use in bootstrap or inference of inclusionProbabilities, selectionProbabilites, co-inclusion probabilities or co-selection probabilities.} \item{FrameDescription}{Optional, chr: Free text field describing the sampling frame.} } The SelectionTable encodes information abut the selection of sampling units for sampling: \describe{ - \item{Stratum}{Mandatory, chr: Identifies the stratum the sampling unit is taken from.} + \item{Stratum}{Mandatory, chr: Identifies the stratum the PSU is taken from.} \item{Order}{Optional, num: Identifes the order of seleciton. May be necessary for inference when selections are not independent (e.g. FSWOR)} - \item{SamplingUnitId}{Optional, chr: Identifes sampling unit. NA encodes non-response} - \item{InclusionProbability}{Optional, num: The inclusion probability of the sampling unit} - \item{HTsamplingWeight}{Optional, num: The relative inclusion probability of the sampling unit} - \item{SelectionProbability}{Optional, num: The selection probability of the sampling unit} - \item{HHsamplingWeight}{Optional, num: The relative selection probability of the sampling unit} - \item{SelectionDescription}{Optional, chr: Free text description of sampling unit.} + \item{SamplingUnitId}{Optional, chr: Identifes PSU. NA encodes non-response} + \item{InclusionProbability}{Optional, num: The inclusion probability of the PSU} + \item{HTsamplingWeight}{Optional, num: The normalized Horvitz-Thompson sampling weight of the PSU} + \item{SelectionProbability}{Optional, num: The selection probability of the PSU} + \item{HHsamplingWeight}{Optional, num: The normalized Hansen-Hurwitz sampling weight of the PSU} + \item{SelectionDescription}{Optional, chr: Free text description of the PSU.} } The StratificationVariables table encodes information about which columns in the sampleTable are stratification variables (if any): From aa95c43c7a4de56d8f753b1aad01532db3d4ffde Mon Sep 17 00:00:00 2001 From: Edvin Fuglebakk Date: Wed, 22 Nov 2023 15:10:43 +0100 Subject: [PATCH 13/24] Added function for calculating co-inclusion probabilities --- R/StoxAnalyticalBaselineFunctions.R | 74 +++++++++++- R/StoxDataTypes.R | 110 +++++++++++++++++- .../test-StoxAnalyticalBaselineFunctions.R | 31 +++++ man/DefinePSUCoInclusionProbabilities.Rd | 21 ++++ man/PSUCoInclusionProbabilities.Rd | 46 ++++++++ man/PSUSamplingParametersData.Rd | 2 +- 6 files changed, 279 insertions(+), 5 deletions(-) create mode 100644 man/DefinePSUCoInclusionProbabilities.Rd create mode 100644 man/PSUCoInclusionProbabilities.Rd diff --git a/R/StoxAnalyticalBaselineFunctions.R b/R/StoxAnalyticalBaselineFunctions.R index 9c592f6f..087622ca 100644 --- a/R/StoxAnalyticalBaselineFunctions.R +++ b/R/StoxAnalyticalBaselineFunctions.R @@ -481,8 +481,80 @@ AssignPSUSamplingParameters <- function(PSUSamplingParametersData, StoxBioticDat } +#' Define PSU Co-Inclusion Probabilities +#' @description +#' Computes co-inclusion probabilites for a selection of Primary Selection Units +#' @details +#' The method for calculating co-inclusion probabilites depend on the method of selection, which is encded +#' in \code{\link[RstoxFDA]{PSUSamplingParametersData}}. Only selection methods 'Possion' and 'FSWR' are currently supported. +#' @param PSUSamplingParametersData \code{\link[RstoxFDA]{PSUSamplingParametersData}} +#' @return \code{\link[RstoxFDA]{PSUCoInclusionProbabilities}} +#' @md +DefinePSUCoInclusionProbabilities <- function(PSUSamplingParametersData){ + + if (!is.PSUSamplingParametersData(PSUSamplingParametersData)){ + stop("Invalid PSUSamplingParametersData") + } + + coinc <- NULL + for (i in 1:nrow(PSUSamplingParametersData$SampleTable)){ + + selectionMethod <- PSUSamplingParametersData$SampleTable$SelectionMethod[[i]] + stratum <- PSUSamplingParametersData$StratificationVariables$Stratum[i] + n <- PSUSamplingParametersData$SampleTable$n[i] + + if (any(is.na(PSUSamplingParametersData$SelectionTable$SamplingUnitId))){ + stop("Cannot calculate co-inclusion probabilities under non-response. Missing values for 'SamplingUnitId'.") + } + if (any(is.na(PSUSamplingParametersData$SelectionTable$InclusionProbability))){ + stop("Cannot calculate co-inclusion probabilities when inclusion probabilities are not known. Missing values for 'InclusionProbability'.") + } + + if (selectionMethod=="Poisson"){ + tab <- PSUSamplingParametersData$SelectionTable[Stratum==stratum,.SD,.SDcol=c("SamplingUnitId", "InclusionProbability")] + tab2 <- PSUSamplingParametersData$SelectionTable[Stratum==stratum,list(SamplingUnitId2=SamplingUnitId, InclusionProbability2=InclusionProbability)] + cross <- tab2[, as.list(tab), by = names(tab2)] + cross$CoInclusionProbability <- cross$InclusionProbability*cross$InclusionProbability2 + cross$Stratum <- stratum + cross <- cross[,.SD,.SDcol=c("Stratum", "SamplingUnitId", "SamplingUnitId2", "CoInclusionProbability")] + + coinc <- rbind(coinc, cross) + } + else if (selectionMethod=="FSWR"){ + + if (any(is.na(PSUSamplingParametersData$SelectionTable$SelectionProbability))){ + stop("For selection method FSWR, selection probabilities are needed in order to calculate co-inclusion probabilities. Missing values for 'SelectionProbability'.") + } + if (is.na(n)){ + stop("For selection method FSWR, the sample size must be known in order to calculate co-inclusion probabilities. Missing values for 'n'.") + } + + stratum <- PSUSamplingParametersData$StratificationVariables$Stratum[i] + tab <- PSUSamplingParametersData$SelectionTable[Stratum==stratum,.SD,.SDcol=c("SamplingUnitId", "InclusionProbability", "SelectionProbability")] + tab2 <- PSUSamplingParametersData$SelectionTable[Stratum==stratum,list(SamplingUnitId2=SamplingUnitId, InclusionProbability2=InclusionProbability, SelectionProbability2=SelectionProbability)] + cross <- tab2[, as.list(tab), by = names(tab2)] + cross$CoInclusionProbability <- cross$InclusionProbability+cross$InclusionProbability2 - (1 - (1-cross$SelectionProbability-cross$SelectionProbability2)**n) + cross$Stratum <- stratum + cross <- cross[,.SD,.SDcol=c("Stratum", "SamplingUnitId", "SamplingUnitId2", "CoInclusionProbability")] + + coinc <- rbind(coinc, cross) + } + else{ + stop(paste("Calculation of Co-inclusion probabilities not supported for selection method '", selectionMethod, "'.", sep="")) + } + } + + #remove diagonal, which is undefined + coinc <- coinc[SamplingUnitId!=SamplingUnitId2,] + + PSUSamplingParametersData$CoSelectionTable <- coinc + PSUSamplingParametersData$SelectionTable <- NULL + + return(PSUSamplingParametersData) +} + #' @noRd -DefinePSUCoInclusionProbabilities <- function(){} +HorvitzThompsonDomainEstimate <- function(){} #' @noRd ProbabilisticSuperIndividuals <- function(StoxBioticData, PSUSamplingParametersData, IndividualSamplingParametersData){ diff --git a/R/StoxDataTypes.R b/R/StoxDataTypes.R index 53cdba1d..7e9591b3 100644 --- a/R/StoxDataTypes.R +++ b/R/StoxDataTypes.R @@ -18,7 +18,7 @@ is.Date <- function(date){ #' PSU Sampling Design Parameters #' -#' Sampling parameters for selection of a Primary Sampling Unit +#' Sampling parameters for selection of Primary Sampling Units #' #' @details #' Encodes information about the selection of Primary Sampling Units in multi-stage sampling, used in analytical design based estimation. @@ -57,7 +57,7 @@ is.Date <- function(date){ #' \describe{ #' \item{Poission}{Poission sampling. Selection is performed randomly without replacement, and each selection is performed individually. Sample size is not fixed, and 'n' represents the expected sample size.} #' \item{FSWR}{Fixed sample size with replacement. A random selection of a fixed sample size 'n' is chosen with replacement} -#' \item{FSWOR}{Fixed sample size with replacement. A random selection of a fixed sample size 'n' is chosen without replacement. Order of selection should be specified in the 'selectionTable'} +#' \item{FSWOR}{Fixed sample size without replacement. A random selection of a fixed sample size 'n' is chosen without replacement. Order of selection should be specified in the 'selectionTable'} #' } #' #' The SelectionProbability is defined as: The probability of selecting the sampling unit when it was selected from the population. @@ -128,6 +128,110 @@ is.PSUSamplingParametersData <- function(PSUSamplingParametersData){ return(TRUE) } +#' PSU Co-Inclusion probabilities +#' +#' Co-Inclusion probabilites for selection of Primary Sampling Units +#' +#' @details +#' Encodes the co-inclusion probabilities for the selection of Primary Sampling Units in multi-stage sampling, used in analytical design based estimation. +#' Information is encoded in three tables. +#' +#' The SampleTable encodes information about the sample of sampling units: +#' \describe{ +#' \item{Stratum}{Mandatory, chr: Identifies the stratum the sample is taken from. Treat unstratified sample as single-stratum sampling (provide only one stratum.} +#' \item{N}{Optional, num: The total number of PSUs in Stratum (total available for selection, not total selected)} +#' \item{n}{Optional, num: The number of PSUs selected from the Stratum} +#' \item{SelectionMethod}{Mandatory, chr: 'Poission', 'FSWR' or 'FSWOR'. The manner of selection for use in bootstrap or inference of inclusionProbabilities, selectionProbabilites, co-inclusion probabilities or co-selection probabilities.} +#' \item{FrameDescription}{Optional, chr: Free text field describing the sampling frame.} +#' } +#' +#' The CoSelectionTable encodes information abut the co-inclusion of sampling units for sampling: +#' \describe{ +#' \item{Stratum}{Mandatory, chr: Identifies the stratum the PSU is taken from.} +#' \item{SamplingUnitId}{Mandatoryl, chr: Identifes co-selected PSU.} +#' \item{SamplingUnitId2}{Mandatory, chr: Identifes co-selected PSU.} +#' \item{CoInclusionProbability}{Mandatory, num: The co-inclusion probability of the PSU} +#' } +#' +#' The StratificationVariables table encodes information about which columns in the sampleTable are stratification variables (if any): +#' \describe{ +#' \item{Stratum}{Mandatory, chr: Identifies the stratum. In addition the Stratum is identified by the combination of all other columns on this table.} +#' \item{...}{Mandatory if present (may not contain NAs), chr: Additional columns in the sampleTable that are stratification variables.} +#' } +#' +#' Optional columns may be NA. +#' +#' The selection methods available for 'SelectionMethod' are explained here: +#' \describe{ +#' \item{Poission}{Poission sampling. Selection is performed randomly without replacement, and each selection is performed individually. Sample size is not fixed, and 'n' represents the expected sample size.} +#' \item{FSWR}{Fixed sample size with replacement. A random selection of a fixed sample size 'n' is chosen with replacement} +#' \item{FSWOR}{Fixed sample size without replacement. A random selection of a fixed sample size 'n' is chosen without replacement. Order of selection should be specified in the 'selectionTable'} +#' } +#' +#' @name PSUCoInclusionProbabilities +#' @concept Data types +#' @concept Analytical estimation +#' +NULL + +#' Check if table is correctly formatted Individual PSU Co-Inclusion Probability Data +#' @param table \code{\link[RstoxFDA]{PSUCoInclusionProbabilities}} +#' @return validity +#' @concept Data types +#' @noRd +is.PSUCoInclusionProbabilities <- function(PSUCoInclusionProbabilities){ + + if (!is.list(PSUCoInclusionProbabilities)){ + return(FALSE) + } + if (!all(sapply(PSUCoInclusionProbabilities, data.table::is.data.table))){ + return(FALSE) + } + if (!all(c("SampleTable", "CoSelectionTable", "StratificationVariables") %in% names(PSUCoInclusionProbabilities))){ + return(FALSE) + } + if (!all(c("Stratum", "N", "n", "SelectionMethod", "FrameDescription") %in% names(PSUCoInclusionProbabilities$SampleTable))){ + return(FALSE) + } + if (!all(c("Stratum", "SamplingUnitId", "SamplingUnitId2", "CoInclusionProbability") %in% names(PSUCoInclusionProbabilities$CoSelectionTable))){ + return(FALSE) + } + if (!all(c("Stratum") %in% names(PSUCoInclusionProbabilities$StratificationVariables))){ + return(FALSE) + } + if (any(duplicated(PSUCoInclusionProbabilities$SampleTable$Stratum))){ + return(FALSE) + } + #test that mandatory fields are not NA. + if (any(is.na(PSUCoInclusionProbabilities$SampleTable$Stratum))){ + return(FALSE) + } + if (any(is.na(PSUCoInclusionProbabilities$SampleTable$SelectionMethod))){ + return(FALSE) + } + if (any(is.na(PSUCoInclusionProbabilities$SelectionTable$Stratum))){ + return(FALSE) + } + if (any(is.na(PSUCoInclusionProbabilities$StratificationVariables$Stratum))){ + return(FALSE) + } + for (n in names(PSUCoInclusionProbabilities$StratificationVariables)){ + if (any(is.na(PSUCoInclusionProbabilities$StratificationVariables[[n]]))){ + return(FALSE) + } + } + + if (ncol(PSUCoInclusionProbabilities$StratificationVariables) > 1){ + stratificationVariableStrings <- apply(PSUCoInclusionProbabilities$StratificationVariables[,.SD, .SDcol=names(PSUCoInclusionProbabilities$StratificationVariables[names(PSUSamplingParametersData$StratificationVariables)!="Stratum"])], 1, paste, collapse="/") + duplicatedStrata <- PSUCoInclusionProbabilities$StratificationVariables$Stratum[duplicated(stratificationVariableStrings)] + + if (length(duplicatedStrata)>0){ + return(FALSE) + } + } + return(TRUE) +} + #' Individual Sub-Sampling Design Parameters #' #' Sampling parameters for selection of a sub-sample of individuals @@ -174,7 +278,7 @@ is.PSUSamplingParametersData <- function(PSUSamplingParametersData){ #' \describe{ #' \item{Poission}{Poission sampling. Selection is performed randomly without replacement, and each selection is performed individually. Sample size is not fixed, and 'n' represents the expected sample size.} #' \item{FSWR}{Fixed sample size with replacement. A random selection of a fixed sample size 'n' is chosen with replacement} -#' \item{FSWOR}{Fixed sample size with replacement. A random selection of a fixed sample size 'n' is chosen without replacement. Order of selection should be specified in the 'selectionTable'} +#' \item{FSWOR}{Fixed sample size without replacement. A random selection of a fixed sample size 'n' is chosen without replacement. Order of selection should be specified in the 'selectionTable'} #' } #' #' The SelectionProbability is defined as: The probability of selecting the sampling unit when it was selected from the population. diff --git a/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R b/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R index 23afc99d..0e472cc4 100644 --- a/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R +++ b/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R @@ -20,6 +20,34 @@ expect_true(abs((sum(1/designParamsCorrected$SelectionTable$InclusionProbability #HH should be apprxoimately the same after non-response correction expect_true(abs((mean(1/designParamsCorrected$SelectionTable$InclusionProbability)-mean(1/designParams$SelectionTable$InclusionProbability))/sum(1/designParamsCorrected$SelectionTable$InclusionProbability))<0.1) +# test co-inclusion probabilities +expect_error(RstoxFDA:::DefinePSUCoInclusionProbabilities(designParams), "Cannot calculate co-inclusion probabilities under non-response. Missing values for 'SamplingUnitId'.") +coincl <- RstoxFDA:::DefinePSUCoInclusionProbabilities(designParamsCorrected) +expect_equal(nrow(coincl$CoSelectionTable), nrow(designParamsCorrected$SelectionTable)*nrow(designParamsCorrected$SelectionTable)-nrow(designParamsCorrected$SelectionTable)) + +#test Poisson +paramMod <- designParamsCorrected +coincPoission <- RstoxFDA:::DefinePSUCoInclusionProbabilities(paramMod) +expect_true(RstoxFDA:::is.PSUCoInclusionProbabilities(coincPoission)) +value <- coincPoission$CoSelectionTable[1,] +ref<-paramMod$SelectionTable$InclusionProbability[paramMod$SelectionTable$SamplingUnitId==value$SamplingUnitId]*paramMod$SelectionTable$InclusionProbability[paramMod$SelectionTable$SamplingUnitId==value$SamplingUnitId2] +expect_equal(value$CoInclusionProbability, ref) + +#test FSWOR +paramMod <- designParamsCorrected +paramMod$SampleTable$SelectionMethod<-"FSWR" +coincFSWr <- RstoxFDA:::DefinePSUCoInclusionProbabilities(paramMod) +expect_true(RstoxFDA:::is.PSUCoInclusionProbabilities(coincFSWr)) +value <- coincFSWr$CoSelectionTable[1,] +ref<-paramMod$SelectionTable$InclusionProbability[paramMod$SelectionTable$SamplingUnitId==value$SamplingUnitId]+paramMod$SelectionTable$InclusionProbability[paramMod$SelectionTable$SamplingUnitId==value$SamplingUnitId2] - (1-(1-paramMod$SelectionTable$SelectionProbability[paramMod$SelectionTable$SamplingUnitId==value$SamplingUnitId]-paramMod$SelectionTable$SelectionProbability[paramMod$SelectionTable$SamplingUnitId==value$SamplingUnitId2])**paramMod$SampleTable$n) +expect_equal(value$CoInclusionProbability, ref) + +#test missing selectionProbabiliteis +paramMod$SelectionTable$SelectionProbability[1]<-NA +expect_error(RstoxFDA:::DefinePSUCoInclusionProbabilities(paramMod), "For selection method FSWR, selection probabilities are needed in order to calculate co-inclusion probabilities. Missing values for 'SelectionProbability'.") +paramMod <- designParamsCorrected +paramMod$SampleTable$SelectionMethod<-"FSWOR" +expect_error(RstoxFDA:::DefinePSUCoInclusionProbabilities(paramMod), "Calculation of Co-inclusion probabilities not supported for selection method 'FSWOR'.") #define from data suppressWarnings(StoxBioticData <- RstoxData::StoxBiotic(RstoxData::ReadBiotic(system.file("testresources", "biotic_v3_example.xml", package="RstoxFDA")))) @@ -67,3 +95,6 @@ expect_true(all(abs(weights$meanN-1) < 1e-6)) expect_equal(ss$SelectionTable$InclusionProbability[[4]], ls$SelectionTable$InclusionProbability[[4]]) expect_true(srs$SelectionTable$InclusionProbability[[4]] != ls$SelectionTable$InclusionProbability[[4]]) expect_equal(nrow(ss$SampleTable), nrow(ls$SampleTable)) + +#test estimate with HorvitzThompsonDomainEstimate +#browser() diff --git a/man/DefinePSUCoInclusionProbabilities.Rd b/man/DefinePSUCoInclusionProbabilities.Rd new file mode 100644 index 00000000..cc194ff2 --- /dev/null +++ b/man/DefinePSUCoInclusionProbabilities.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/StoxAnalyticalBaselineFunctions.R +\name{DefinePSUCoInclusionProbabilities} +\alias{DefinePSUCoInclusionProbabilities} +\title{Define PSU Co-Inclusion Probabilities} +\usage{ +DefinePSUCoInclusionProbabilities(PSUSamplingParametersData) +} +\arguments{ +\item{PSUSamplingParametersData}{\code{\link[RstoxFDA]{PSUSamplingParametersData}}} +} +\value{ +\code{\link[RstoxFDA]{PSUCoInclusionProbabilities}} +} +\description{ +Computes co-inclusion probabilites for a selection of Primary Selection Units +} +\details{ +The method for calculating co-inclusion probabilites depend on the method of selection, which is encded +in \code{\link[RstoxFDA]{PSUSamplingParametersData}}. Only selection methods 'Possion' and 'FSWR' are currently supported. +} diff --git a/man/PSUCoInclusionProbabilities.Rd b/man/PSUCoInclusionProbabilities.Rd new file mode 100644 index 00000000..e3cbe2c1 --- /dev/null +++ b/man/PSUCoInclusionProbabilities.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/StoxDataTypes.R +\name{PSUCoInclusionProbabilities} +\alias{PSUCoInclusionProbabilities} +\title{PSU Co-Inclusion probabilities} +\description{ +Co-Inclusion probabilites for selection of Primary Sampling Units +} +\details{ +Encodes the co-inclusion probabilities for the selection of Primary Sampling Units in multi-stage sampling, used in analytical design based estimation. + Information is encoded in three tables. + + The SampleTable encodes information about the sample of sampling units: + \describe{ + \item{Stratum}{Mandatory, chr: Identifies the stratum the sample is taken from. Treat unstratified sample as single-stratum sampling (provide only one stratum.} + \item{N}{Optional, num: The total number of PSUs in Stratum (total available for selection, not total selected)} + \item{n}{Optional, num: The number of PSUs selected from the Stratum} + \item{SelectionMethod}{Mandatory, chr: 'Poission', 'FSWR' or 'FSWOR'. The manner of selection for use in bootstrap or inference of inclusionProbabilities, selectionProbabilites, co-inclusion probabilities or co-selection probabilities.} + \item{FrameDescription}{Optional, chr: Free text field describing the sampling frame.} + } + + The CoSelectionTable encodes information abut the co-inclusion of sampling units for sampling: + \describe{ + \item{Stratum}{Mandatory, chr: Identifies the stratum the PSU is taken from.} + \item{SamplingUnitId}{Mandatoryl, chr: Identifes co-selected PSU.} + \item{SamplingUnitId2}{Mandatory, chr: Identifes co-selected PSU.} + \item{CoInclusionProbability}{Mandatory, num: The co-inclusion probability of the PSU} + } + + The StratificationVariables table encodes information about which columns in the sampleTable are stratification variables (if any): + \describe{ + \item{Stratum}{Mandatory, chr: Identifies the stratum. In addition the Stratum is identified by the combination of all other columns on this table.} + \item{...}{Mandatory if present (may not contain NAs), chr: Additional columns in the sampleTable that are stratification variables.} + } + +Optional columns may be NA. + +The selection methods available for 'SelectionMethod' are explained here: +\describe{ + \item{Poission}{Poission sampling. Selection is performed randomly without replacement, and each selection is performed individually. Sample size is not fixed, and 'n' represents the expected sample size.} + \item{FSWR}{Fixed sample size with replacement. A random selection of a fixed sample size 'n' is chosen with replacement} + \item{FSWOR}{Fixed sample size with replacement. A random selection of a fixed sample size 'n' is chosen without replacement. Order of selection should be specified in the 'selectionTable'} +} +} +\concept{Analytical estimation} +\concept{Data types} diff --git a/man/PSUSamplingParametersData.Rd b/man/PSUSamplingParametersData.Rd index af53d923..3af2dbea 100644 --- a/man/PSUSamplingParametersData.Rd +++ b/man/PSUSamplingParametersData.Rd @@ -4,7 +4,7 @@ \alias{PSUSamplingParametersData} \title{PSU Sampling Design Parameters} \description{ -Sampling parameters for selection of a Primary Sampling Unit +Sampling parameters for selection of Primary Sampling Units } \details{ Encodes information about the selection of Primary Sampling Units in multi-stage sampling, used in analytical design based estimation. From d780c9b20d18467b2c7dd9f21c1892b00cb7d62d Mon Sep 17 00:00:00 2001 From: Edvin Fuglebakk Date: Wed, 6 Dec 2023 13:38:52 +0100 Subject: [PATCH 14/24] Refactored and redesigned a bit. Changed populaiton estimation strategy to HH, as explained in spec doc. Keeping HT for estimation by PSU. --- R/CatchLotterySamplingExample-datadoc.R | 23 ++++ R/StoxAnalyticalBaselineFunctions.R | 109 +++++----------- R/StoxDataTypes.R | 120 ++---------------- data/CatchLotteryExample.rda | Bin 17485 -> 17503 bytes data/CatchLotterySamplingExample.rda | Bin 0 -> 2573 bytes inst/dataPrepScripts/prepDesignParameters.R | 9 +- inst/dataPrepScripts/prepHH_HT_comp.R | 2 +- .../test-StoxAnalyticalBaselineFunctions.R | 37 +----- man/AssignPSUSamplingParameters.Rd | 4 +- man/CatchLotterySamplingExample.Rd | 27 ++++ man/DefinePSUCoInclusionProbabilities.Rd | 21 --- man/IndividualSamplingParametersData.Rd | 2 +- man/PSUCoInclusionProbabilities.Rd | 46 ------- man/PSUSamplingParametersData.Rd | 8 +- 14 files changed, 118 insertions(+), 290 deletions(-) create mode 100644 R/CatchLotterySamplingExample-datadoc.R create mode 100644 data/CatchLotterySamplingExample.rda create mode 100644 man/CatchLotterySamplingExample.Rd delete mode 100644 man/DefinePSUCoInclusionProbabilities.Rd delete mode 100644 man/PSUCoInclusionProbabilities.Rd diff --git a/R/CatchLotterySamplingExample-datadoc.R b/R/CatchLotterySamplingExample-datadoc.R new file mode 100644 index 00000000..5adbc5fc --- /dev/null +++ b/R/CatchLotterySamplingExample-datadoc.R @@ -0,0 +1,23 @@ +#' Sampling parameters from the Norwegian catch lottery sampling program. +#' +#' Example of data formatted as \code{\link[RstoxFDA]{PSUSamplingParametersData}} +#' Hauls are primary sampling units, selected by Poission sampling with selection probabilities proportional to the catch size. +#' The data contain sampling parameters for North Sea herring samples from catch lottery sampling in 2022. +#' +#' The corresponding samples are provided in \code{\link[RstoxFDA]{CatchLotteryExample}} +#' +#' @docType data +#' +#' @usage data(CatchLotterySamplingExample) +#' +#' @format \code{\link[RstoxData]{StoxBioticData}} +#' +#' @keywords datasets +#' @concept Analytical estimation +#' +#' @examples +#' #all selected PSU that where actuall sampled are provided in CatchLotteryExample +#' sum(!is.na(CatchLotterySamplingExample$SelectionTable$SamplingUnitId)) +#' sum(CatchLotterySamplingExample$SelectionTable$SamplingUnitId %in% CatchLotteryExample$Haul$HaulKey) +"CatchLotterySamplingExample" + diff --git a/R/StoxAnalyticalBaselineFunctions.R b/R/StoxAnalyticalBaselineFunctions.R index 087622ca..df12e621 100644 --- a/R/StoxAnalyticalBaselineFunctions.R +++ b/R/StoxAnalyticalBaselineFunctions.R @@ -41,11 +41,13 @@ assumeDesignParametersStoxBiotic <- function(StoxBioticData, SamplingUnitId, Str sampleTable <- sampleTable[,.SD,.SDcol=c("Stratum", "N", "n", "SelectionMethod", "FrameDescription")] stratificationTable <- flatStox[,.SD,.SDcol=c("Stratum", StratificationColumns)] stratificationTable <- stratificationTable[!duplicated(stratificationTable$Stratum),] + assignmentTable <- data.table::data.table(DataRecordsId=character()) designParameters <- list() designParameters$SampleTable <- sampleTable designParameters$SelectionTable <- selectionTable designParameters$StratificationVariables <- stratificationTable + designParameters$Assignment <- assignmentTable return(designParameters) @@ -76,7 +78,8 @@ parseDesignParameters <- function(filename){ selectionTable <- designParameters[,.SD,.SDcol=c("Stratum", "Order", "SamplingUnitId", "InclusionProbability", "HTsamplingWeight", "SelectionProbability", "HHsamplingWeight", "SelectionDescription")] sampleTable <- designParameters[,.SD,.SDcol=c("Stratum", "N", "n", "SelectionMethod", "FrameDescription")] stratificationTable <- designParameters[,.SD,.SDcol=c("Stratum", names(designParameters)[!(names(designParameters) %in% names(selectionTable)) & !(names(designParameters) %in% names(sampleTable))])] - + assignmentTable <- data.table::data.table(DataRecordsId=character()) + if (any(is.na(sampleTable$Stratum)) | any(is.na(selectionTable$Stratum))){ stop("Invalid design specification. The mandatory column 'Stratum' may not contain missing values (NA).") } @@ -123,6 +126,7 @@ parseDesignParameters <- function(filename){ designParameters$SampleTable <- sampleTable designParameters$SelectionTable <- selectionTable designParameters$StratificationVariables <- stratificationTable + designParameters$Assignment <- assignmentTable return(designParameters) } @@ -431,33 +435,33 @@ DefineSamplingHierarchy <- function(IndividualSamplingParametersData, Hierarchy= #' #' @param PSUSamplingParametersData ~\code{\link[RstoxFDA]{PSUSamplingParametersData}} with sampling parameters for PSU selection #' @param StoxBioticData ~\code{\link[RstoxData]{StoxBioticData}} with data records for responding PSUs. -#' @param SamplingUnitId name of Variable in ~\code{\link[RstoxData]{StoxBioticData}} that represent records of sampled PSUs +#' @param DataRecordsId name of Variable in ~\code{\link[RstoxData]{StoxBioticData}} that represent records of sampled PSUs #' @param DefinitionMethod The method for dealing with non-response, e.g. 'MissingAtRandon' #' @return ~\code{\link[RstoxFDA]{PSUSamplingParametersData}} #' @concept StoX-functions #' @concept Analytical estimation #' @md #' @export -AssignPSUSamplingParameters <- function(PSUSamplingParametersData, StoxBioticData, SamplingUnitId, DefinitionMethod=c("MissingAtRandom")){ +AssignPSUSamplingParameters <- function(PSUSamplingParametersData, StoxBioticData, DataRecordsId, DefinitionMethod=c("MissingAtRandom")){ checkMandatory(PSUSamplingParametersData, "PSUSamplingParametersData") checkMandatory(StoxBioticData, "StoxBioticData") - checkMandatory(SamplingUnitId, "SamplingUnitId") + checkMandatory(DataRecordsId, "DataRecordsId") checkOptions(DefinitionMethod, "DefinitionMethod", c("MissingAtRandom")) level <- NULL for (l in names(StoxBioticData)){ - if (SamplingUnitId %in% names(StoxBioticData[[l]])){ + if (DataRecordsId %in% names(StoxBioticData[[l]])){ level <- l } } if (is.null(level)){ - stop(paste("The variable provided for SamplingUnitId (", SamplingUnitId,") is not a variable in 'StoxBioticData'"), sep="") + stop(paste("The variable provided for DataRecordsId (", DataRecordsId,") is not a variable in 'StoxBioticData'"), sep="") } records <- PSUSamplingParametersData$SelectionTable$SamplingUnitId[!is.na(PSUSamplingParametersData$SelectionTable$SamplingUnitId)] - if (!all(records %in% StoxBioticData[[l]][[SamplingUnitId]])){ - missing <- records[!(records %in% StoxBioticData[[l]][[SamplingUnitId]])] - stop(paste("Records are not found for all sampled PSUs. Missing for the following SamplingUnitIds (", SamplingUnitId,"): ", paste(truncateStringVector(missing), collapse=","), sep="")) + if (!all(records %in% StoxBioticData[[l]][[DataRecordsId]])){ + missing <- records[!(records %in% StoxBioticData[[l]][[DataRecordsId]])] + stop(paste("Records are not found for all sampled PSUs. Missing for the following SamplingUnitIds (", DataRecordsId,"): ", paste(truncateStringVector(missing), collapse=","), sep="")) } if (DefinitionMethod == "MissingAtRandom"){ @@ -476,85 +480,32 @@ AssignPSUSamplingParameters <- function(PSUSamplingParametersData, StoxBioticDat PSUSamplingParametersData$SelectionTable$HTsamplingWeight <- PSUSamplingParametersData$SelectionTable$HTsamplingWeight / weights$HTsum[match(PSUSamplingParametersData$SelectionTable$Stratum, weights$Stratum)] PSUSamplingParametersData$SelectionTable$HHsamplingWeight <- PSUSamplingParametersData$SelectionTable$HHsamplingWeight / weights$HHsum[match(PSUSamplingParametersData$SelectionTable$Stratum, weights$Stratum)] - return(PSUSamplingParametersData) } + PSUSamplingParametersData$Assignment$DataRecordsId <- DataRecordsId + return(PSUSamplingParametersData) } -#' Define PSU Co-Inclusion Probabilities -#' @description -#' Computes co-inclusion probabilites for a selection of Primary Selection Units -#' @details -#' The method for calculating co-inclusion probabilites depend on the method of selection, which is encded -#' in \code{\link[RstoxFDA]{PSUSamplingParametersData}}. Only selection methods 'Possion' and 'FSWR' are currently supported. -#' @param PSUSamplingParametersData \code{\link[RstoxFDA]{PSUSamplingParametersData}} -#' @return \code{\link[RstoxFDA]{PSUCoInclusionProbabilities}} -#' @md -DefinePSUCoInclusionProbabilities <- function(PSUSamplingParametersData){ - - if (!is.PSUSamplingParametersData(PSUSamplingParametersData)){ - stop("Invalid PSUSamplingParametersData") - } - - coinc <- NULL - for (i in 1:nrow(PSUSamplingParametersData$SampleTable)){ - - selectionMethod <- PSUSamplingParametersData$SampleTable$SelectionMethod[[i]] - stratum <- PSUSamplingParametersData$StratificationVariables$Stratum[i] - n <- PSUSamplingParametersData$SampleTable$n[i] - - if (any(is.na(PSUSamplingParametersData$SelectionTable$SamplingUnitId))){ - stop("Cannot calculate co-inclusion probabilities under non-response. Missing values for 'SamplingUnitId'.") - } - if (any(is.na(PSUSamplingParametersData$SelectionTable$InclusionProbability))){ - stop("Cannot calculate co-inclusion probabilities when inclusion probabilities are not known. Missing values for 'InclusionProbability'.") - } +AnalyticalPSUEstimate <- function(StoxBioticData, IndividualSamplingParametersData, variables, DomainVariables=character(), IncludeStratumInDomain=FALSE){ + # Estimate totals and means of all variables, and total and mean number in domain, depending on sampling parameters available (only means if only sample weights present) +} + +#' @noRd +AnalyticalPopulationEstimate <- function(StoxBioticData, PSUSamplingParametersData, AnalyticalPSUEstimateData, DomainVariables=character(), MeanOfMeans=F, IncludeStratumInDomain=FALSE){ + + #calculate total and means for all counts and totalvariables in AnalyticalPSUEstimateData. If MeanOfMeans, calculate mean of Means in stead. - if (selectionMethod=="Poisson"){ - tab <- PSUSamplingParametersData$SelectionTable[Stratum==stratum,.SD,.SDcol=c("SamplingUnitId", "InclusionProbability")] - tab2 <- PSUSamplingParametersData$SelectionTable[Stratum==stratum,list(SamplingUnitId2=SamplingUnitId, InclusionProbability2=InclusionProbability)] - cross <- tab2[, as.list(tab), by = names(tab2)] - cross$CoInclusionProbability <- cross$InclusionProbability*cross$InclusionProbability2 - cross$Stratum <- stratum - cross <- cross[,.SD,.SDcol=c("Stratum", "SamplingUnitId", "SamplingUnitId2", "CoInclusionProbability")] - - coinc <- rbind(coinc, cross) - } - else if (selectionMethod=="FSWR"){ - - if (any(is.na(PSUSamplingParametersData$SelectionTable$SelectionProbability))){ - stop("For selection method FSWR, selection probabilities are needed in order to calculate co-inclusion probabilities. Missing values for 'SelectionProbability'.") - } - if (is.na(n)){ - stop("For selection method FSWR, the sample size must be known in order to calculate co-inclusion probabilities. Missing values for 'n'.") - } - - stratum <- PSUSamplingParametersData$StratificationVariables$Stratum[i] - tab <- PSUSamplingParametersData$SelectionTable[Stratum==stratum,.SD,.SDcol=c("SamplingUnitId", "InclusionProbability", "SelectionProbability")] - tab2 <- PSUSamplingParametersData$SelectionTable[Stratum==stratum,list(SamplingUnitId2=SamplingUnitId, InclusionProbability2=InclusionProbability, SelectionProbability2=SelectionProbability)] - cross <- tab2[, as.list(tab), by = names(tab2)] - cross$CoInclusionProbability <- cross$InclusionProbability+cross$InclusionProbability2 - (1 - (1-cross$SelectionProbability-cross$SelectionProbability2)**n) - cross$Stratum <- stratum - cross <- cross[,.SD,.SDcol=c("Stratum", "SamplingUnitId", "SamplingUnitId2", "CoInclusionProbability")] - - coinc <- rbind(coinc, cross) - } - else{ - stop(paste("Calculation of Co-inclusion probabilities not supported for selection method '", selectionMethod, "'.", sep="")) - } - } - - #remove diagonal, which is undefined - coinc <- coinc[SamplingUnitId!=SamplingUnitId2,] + #estimate by stratum - PSUSamplingParametersData$CoSelectionTable <- coinc - PSUSamplingParametersData$SelectionTable <- NULL + #add over strata if not stratum is included in domain - return(PSUSamplingParametersData) } -#' @noRd -HorvitzThompsonDomainEstimate <- function(){} +AnalyticalRatioEstimate <- function(AnalyticalPopulationEstimateData, StoxLandingData, DomainVariables){ + + #ratio estimate for total number in domain. Domain variables not in landings are taken to be estimated domain of interest. Additional domain variables are specified in DomainVariables + +} #' @noRd ProbabilisticSuperIndividuals <- function(StoxBioticData, PSUSamplingParametersData, IndividualSamplingParametersData){ diff --git a/R/StoxDataTypes.R b/R/StoxDataTypes.R index 7e9591b3..0f0231b6 100644 --- a/R/StoxDataTypes.R +++ b/R/StoxDataTypes.R @@ -50,6 +50,12 @@ is.Date <- function(date){ #' \item{Stratum}{Mandatory, chr: Identifies the stratum. In addition the Stratum is identified by the combination of all other columns on this table.} #' \item{...}{Mandatory if present (may not contain NAs), chr: Additional columns in the sampleTable that are stratification variables.} #' } +#' +#' The Assignment encodes which identifier in sample records (e.g. \code{\link[RstoxData]{StoxBioticData}}) correspond to the SamplingUnitId +#' \describe{ +#' \item{DataRecordsId}{Optional, character. The identifier in data records that correspond to SamplingUnitId} +#' } +#' This field is optional, since SamplingParameters may be subject to processing before they are assigned to data records. #' #' Optional columns may be NA. #' @@ -57,7 +63,7 @@ is.Date <- function(date){ #' \describe{ #' \item{Poission}{Poission sampling. Selection is performed randomly without replacement, and each selection is performed individually. Sample size is not fixed, and 'n' represents the expected sample size.} #' \item{FSWR}{Fixed sample size with replacement. A random selection of a fixed sample size 'n' is chosen with replacement} -#' \item{FSWOR}{Fixed sample size without replacement. A random selection of a fixed sample size 'n' is chosen without replacement. Order of selection should be specified in the 'selectionTable'} +#' \item{FSWOR}{Fixed sample size without replacement. A random selection of a fixed sample size 'n' is chosen without replacement. Order of selection could be specified in the 'selectionTable'} #' } #' #' The SelectionProbability is defined as: The probability of selecting the sampling unit when it was selected from the population. @@ -83,7 +89,13 @@ is.PSUSamplingParametersData <- function(PSUSamplingParametersData){ if (!all(sapply(PSUSamplingParametersData, data.table::is.data.table))){ return(FALSE) } - if (!all(c("SampleTable", "SelectionTable", "StratificationVariables") %in% names(PSUSamplingParametersData))){ + if (!all(c("SampleTable", "SelectionTable", "StratificationVariables", "Assignment") %in% names(PSUSamplingParametersData))){ + return(FALSE) + } + if (nrow(PSUSamplingParametersData$Assignment)>1){ + return(FALSE) + } + if (!("DataRecordsId" %in% names(PSUSamplingParametersData$Assignment))){ return(FALSE) } if (!all(c("Stratum", "N", "n", "SelectionMethod", "FrameDescription") %in% names(PSUSamplingParametersData$SampleTable))){ @@ -128,110 +140,6 @@ is.PSUSamplingParametersData <- function(PSUSamplingParametersData){ return(TRUE) } -#' PSU Co-Inclusion probabilities -#' -#' Co-Inclusion probabilites for selection of Primary Sampling Units -#' -#' @details -#' Encodes the co-inclusion probabilities for the selection of Primary Sampling Units in multi-stage sampling, used in analytical design based estimation. -#' Information is encoded in three tables. -#' -#' The SampleTable encodes information about the sample of sampling units: -#' \describe{ -#' \item{Stratum}{Mandatory, chr: Identifies the stratum the sample is taken from. Treat unstratified sample as single-stratum sampling (provide only one stratum.} -#' \item{N}{Optional, num: The total number of PSUs in Stratum (total available for selection, not total selected)} -#' \item{n}{Optional, num: The number of PSUs selected from the Stratum} -#' \item{SelectionMethod}{Mandatory, chr: 'Poission', 'FSWR' or 'FSWOR'. The manner of selection for use in bootstrap or inference of inclusionProbabilities, selectionProbabilites, co-inclusion probabilities or co-selection probabilities.} -#' \item{FrameDescription}{Optional, chr: Free text field describing the sampling frame.} -#' } -#' -#' The CoSelectionTable encodes information abut the co-inclusion of sampling units for sampling: -#' \describe{ -#' \item{Stratum}{Mandatory, chr: Identifies the stratum the PSU is taken from.} -#' \item{SamplingUnitId}{Mandatoryl, chr: Identifes co-selected PSU.} -#' \item{SamplingUnitId2}{Mandatory, chr: Identifes co-selected PSU.} -#' \item{CoInclusionProbability}{Mandatory, num: The co-inclusion probability of the PSU} -#' } -#' -#' The StratificationVariables table encodes information about which columns in the sampleTable are stratification variables (if any): -#' \describe{ -#' \item{Stratum}{Mandatory, chr: Identifies the stratum. In addition the Stratum is identified by the combination of all other columns on this table.} -#' \item{...}{Mandatory if present (may not contain NAs), chr: Additional columns in the sampleTable that are stratification variables.} -#' } -#' -#' Optional columns may be NA. -#' -#' The selection methods available for 'SelectionMethod' are explained here: -#' \describe{ -#' \item{Poission}{Poission sampling. Selection is performed randomly without replacement, and each selection is performed individually. Sample size is not fixed, and 'n' represents the expected sample size.} -#' \item{FSWR}{Fixed sample size with replacement. A random selection of a fixed sample size 'n' is chosen with replacement} -#' \item{FSWOR}{Fixed sample size without replacement. A random selection of a fixed sample size 'n' is chosen without replacement. Order of selection should be specified in the 'selectionTable'} -#' } -#' -#' @name PSUCoInclusionProbabilities -#' @concept Data types -#' @concept Analytical estimation -#' -NULL - -#' Check if table is correctly formatted Individual PSU Co-Inclusion Probability Data -#' @param table \code{\link[RstoxFDA]{PSUCoInclusionProbabilities}} -#' @return validity -#' @concept Data types -#' @noRd -is.PSUCoInclusionProbabilities <- function(PSUCoInclusionProbabilities){ - - if (!is.list(PSUCoInclusionProbabilities)){ - return(FALSE) - } - if (!all(sapply(PSUCoInclusionProbabilities, data.table::is.data.table))){ - return(FALSE) - } - if (!all(c("SampleTable", "CoSelectionTable", "StratificationVariables") %in% names(PSUCoInclusionProbabilities))){ - return(FALSE) - } - if (!all(c("Stratum", "N", "n", "SelectionMethod", "FrameDescription") %in% names(PSUCoInclusionProbabilities$SampleTable))){ - return(FALSE) - } - if (!all(c("Stratum", "SamplingUnitId", "SamplingUnitId2", "CoInclusionProbability") %in% names(PSUCoInclusionProbabilities$CoSelectionTable))){ - return(FALSE) - } - if (!all(c("Stratum") %in% names(PSUCoInclusionProbabilities$StratificationVariables))){ - return(FALSE) - } - if (any(duplicated(PSUCoInclusionProbabilities$SampleTable$Stratum))){ - return(FALSE) - } - #test that mandatory fields are not NA. - if (any(is.na(PSUCoInclusionProbabilities$SampleTable$Stratum))){ - return(FALSE) - } - if (any(is.na(PSUCoInclusionProbabilities$SampleTable$SelectionMethod))){ - return(FALSE) - } - if (any(is.na(PSUCoInclusionProbabilities$SelectionTable$Stratum))){ - return(FALSE) - } - if (any(is.na(PSUCoInclusionProbabilities$StratificationVariables$Stratum))){ - return(FALSE) - } - for (n in names(PSUCoInclusionProbabilities$StratificationVariables)){ - if (any(is.na(PSUCoInclusionProbabilities$StratificationVariables[[n]]))){ - return(FALSE) - } - } - - if (ncol(PSUCoInclusionProbabilities$StratificationVariables) > 1){ - stratificationVariableStrings <- apply(PSUCoInclusionProbabilities$StratificationVariables[,.SD, .SDcol=names(PSUCoInclusionProbabilities$StratificationVariables[names(PSUSamplingParametersData$StratificationVariables)!="Stratum"])], 1, paste, collapse="/") - duplicatedStrata <- PSUCoInclusionProbabilities$StratificationVariables$Stratum[duplicated(stratificationVariableStrings)] - - if (length(duplicatedStrata)>0){ - return(FALSE) - } - } - return(TRUE) -} - #' Individual Sub-Sampling Design Parameters #' #' Sampling parameters for selection of a sub-sample of individuals diff --git a/data/CatchLotteryExample.rda b/data/CatchLotteryExample.rda index 7c370f308575cebf4ae950da75698e24072b7fbd..6784c26c9cefc371ae77f779f9d2d9ddaaaa85fa 100644 GIT binary patch literal 17503 zcmbun2Ut_hwlEw(Iw)O=A|*t+kWdAbE}=6=l(*`cThHWloeMpgBY5XqG`lP?~y(H z_5Y+@pyl}Z;mNO`r(N&ei(1ctfJBm)KulT_tp}P0kq31U@dl4<&ZlBC7C<0%W;JH2 zCpp0cC?L_Se~J?ey+7IQd{Hx+>dWs&5V>jVf)#b=!S0i5)Pd&<_;<-ZQNDki!}95p zdxC*c;$jfXxmFN9w6mk(hR1nq>z=iTSx>Nx8};MiOZP!CzmZT8ZbUN|J3w!lNa^JI zI?ic=m>WSL({hmPJ6RBjiH02%az@!XhY_w*Q?yZ%@f2+<(ea2gL*Pq>0)$;+#PAU6 zYWN^3U(+=SqFuD^Sm>XjH^`wcRkU6dIVO1%e5CgTl0n3NsZ`;n&W-0|<1_w|xWe%t zhRy*-%0=Ih4yC09Iv~d%dLW`_z)IK=PS5)KfC5=CkozRqK_EwxqqSg!y zm=!a`LUkl)VXAB-l!Xxy5pfbW;xy_yAg97u5F{d9MMqphoR$n|Cpk05HIg9F*Y|de zi`|BXg2BH;Qb0lQrv5XKBLN#BNdW)>Y1z){(XvTE zC1^pMg^xZ0m^0Y@pJ;&kNEF!}$%5aKC=&2EB@jJ(>j%y7^H&C2z)I( zgwk(t!api#$8m6Fu7x9#ble z%^(Y=r3E?4jdCeITn8xL;@0J#58OBQuKCJ6 z32$3%I+r;Wc-HjE$j&SLkD!Nn4eei^_V%6pym|Qbnd0LBd@vX2J0lknXWkUmv_ORv z+QNcorc^M-l_l25TBpt^5#=aAL!@fk#g2^uCEOa1xRNd^&MG#yhF%gBHj%V_GiUxr zWUDr7mR{c?#=~#OO2m`-eXBe9*}xkz3*-u$0~?l`xLkOef>O9jLn@*{^j&_ohGNFM zm(ZSzFz-l9p@5gues0Rv%{z0kxDwUP%r+^C^R6*!NjTK zHt9lNk$wZkdNve#Sz=&leQgw_TmCxAn|d(Y?1_q>+#In3GIhJ{kwh1K2%GT|fo4V4 z@2oAe!ZYnL)GjV|J{z8yl{NwQ;@KRK+i!g4w?)Hi`MU1bj5%+d?Ki6q$B3HccZ_+i z`uZ;%L!3l??c99Y=F)TkJLPNZwVtmN>aAUu8`GT)5Ll^9DK}pssf9!T&BQ)%rtNLJq>Acw z)b^l-Fpbz`+nih*4F>8p=bKMLL$`}`j&OJPcF;9LZeiFkKd5>VS#4JF<3 z+P+BH*5?0AW@S@qVz5wcPSlWPrAtdiR9j@FYj%MZ(Ld^?O}$Wuot@v%GOKRMd>n8U zxs5BR-uaNSGEf_O-~X3y=9Tz%_5oP0O8p12vRQv7M_Dh4rT049P?+!yAJtG+b;5n?$ZoXVxrJbBqjqh9S~Cc#|qVZq2`sWRRW9e%?oe|-O%{7FI}~1c4)Za`&Q4o* ztlyR!b&2{&(u0*NK<#8gu7Njf=26=ut2x;8) zgG3_i41K-NzAZ-K9L`>R$~icHd-HplRa$p;>?e-4?36U*k$!`3HZp(J-_5FR@9o5H z?!at88!A-p_QcuL8+6tSap7e~G8ykbt=&tAvUj$Tj)c>H5G!f4j?l1W5cEK;@pMe& ztS7kL7jfQ=;Z003w$>LjDc4Vaw^byzmRFg33G@~pdg3airA2z)Shb$XvP`0LRNs}x zAvAXueExp*pw;O61=b;G7(L3yquG?L!$wPTOpvBREl)dNptishBkqs^@32Q*986zL z6Np76GL=!~kco9jw3;MmED8>;7xL_eX@^FpQOBD!zl$&}vAn<|H^f?2FegPCGAf=i z{wb*-<6Tebk!gW*t%Vj?2(#pgia(np^=&*zW$998yH0pg@{_N{=CCPZ?qU!{Fqc)* z#K8}D>hQ_S)Kv>NXx~ep%{(b@s!+aRYr{0tCmeaGEapT_)3$GU)xiQ*OH&8Y`Z}Gu z?WJx3)$$m)0vl<+B7)&a*nA?_muf{irK>z0-jO5H#T3??%P~+Zf%I2eUKWK3z^zxk zp1X^0Rbze3!c!g7gj?e}!up#$%G2$WAbeq17ls;Ka&Dky)5|?54puNT{f5;c4Gk~w zMo=JwM~K3p%8HL6d`XAtEt2$ zDM`3GD+#>To+LLnI2!xBaTy^zkbE%A5r5*@8n1jq9v|hk>LG6x$m9<88buVvm~XY! z+vSUwPESkCYByP+LP;m-cq|z$0yud6;Y9^7P;wyi3>TOD5WFF2ayoZ0iR49c#CkrZhM5{AGwXX4>ctve#ahLA|#X8&+} zpvU3RwX?@VSOu-P{{4f;Ny*;vUY_2(@uFuvy+i!*se5N&5~>vHG+3Imhx&Trxfp@G z%T2qpt;a!m--Z@`E;sJTS{=0L{%T)27;D#fAZhq&sciLu$V?y&w3kL+C)vWP!5mep zHgu?y-3HI0436Qu`zkbS$v3?7k~~soOdhrWsZuC4s#-?Sx%@+D*BY+GC?aDAA=1%S zc%0BwgVKlQ-g?s-A^b5dY--t@_a?2NFY3s`NX1TuUH5W{L?5lsq+~3ytJO%#dXvDL z0d71+Nri;b4c2R~@w6}7#yAvS9H#lrjnw5iFdM!d6K2laPYt@^^iq=V4bD42g7wAh zbaCse(1wP}OSbnMYAf{fY4G;#BwNi^jWdU?;UyK9sEnPx3-~_{O1{Z36S1T>W;u$i ze(@&hT%nK()7*AV+d=h+_>ELy&x{-ivqGsM^=mm7KBm0+@&O_DG3#K+E%}@y;!X8^ zO$Fbp4eScL=!}iP7m>5YsDo#g+q*N3v%~L2IDdTlPE|Ov?U3JIFQ?{qDPhhfcF<$a z@WNH2?1dN2HW7BJ)7~?OnI*G7*Gf^VOYJW!8X7$pshtbE$G81WdonRGIfEBheOisG zpRLQ^C_OnxJv{r-=+`Rgv%Ou;K}_mSrJ?lmqtB%(%o?mKM}9Vb|3bkIU9y+s`7Yu*<(S4z0v%4-|&KS<=@a4GY& zZEUrob?W6AsXCwI?J_@PT3c~{V#gvb-}ZTRxozI2rMb@5Y}|rxUs=**CqAuZesiEk zROHCve5mmuWGSET*7I}z&3je$l`;)yo^z=2I22V~37Fe`?KxzX=}kRZ^)RiW@=MQf z`#YQ1Sq@p$EX*}t*0naMIkh?3`I%8apTNifCb8k#7z%bR)xnqV{c3MrqtR46Lqty;2=0H`qpycC`rJZiFA1|oIY7?FGeywF9vYO^q|m_8tS}mT zGGcaPR`!f^=c`Tema~=!TvLs&$J&5g<_)&;A=j8xvp57(XZk~{Tn&;at#=6xYSLl` z2&M^L1LFcZxzeWSYz}^|iK~rbnzUyt*;F3gQzCPs(kRw3L6guCfhaeaFwV;Pu$P14 znzuea8^n@l@0~@hIpmDCv&e$4fF0eV3Ic@XTS<+g6)X1Xs3s~l4b{}b976*n^4Kwr zseQS~i52Rg{@GcuagGDhNi!UZ~kfeHVak$|EQzFp3)61F;^&0|B zt|RzqVuLT08#~CQAk*%z7F}N!Z_fulJR_LHF(xP4%hP^U_Q(08)*CO6vY+1Dn#U?Q zxJsqI8)kb%!-i&ixSO(Z^6TD75II}x!1rt9pT3>-{rZY`=YF>7Nzn1aQRdEM%TdcO z>g}&NzW07yIJdpkhOaxicpm;!w950d-(t|in}he~iHLeX7c0`A=dlJ+L=taVEPWe# zeD;Y*z#W_6skWz2SsU z;$pGn{*+9O#qfiy9T=w}>b0zim!;Ezfs^>7qWLk=2rO$SDLj!&c8k0Tb2Wh66ae0!5F(NaHjm0?}ZC1(xM~1`1QYD0uJOj$&EY4&+gK*$hMvsgF zZbC3_I}NjR7D?rfT0Jr0ap6)o;#ty4s+TpwOW|nbBQz!#Q3&9{lo-;x37V)i!Vwma zq{hw1lc; zKxV1I;5c(;Da5Um#jRMlwwT4NRJaz-hKzy^sv$oZO4FOT(bamY0DGQeHfv1G3@LP@ z!Zhk5KWNVxmCp&5&w*<_*czYLdh$w>YR@slJW6XlSj?W)4AQ4MmZUneA;aT34W%i} z8cWO?;mDYxNwhlc@j) zXQ6emvOs459E`Sbt)JcopEe}Tz`?-)EQm75!nGZkFLl=S>%V1oat{FjBlXchj#o`mH$inG{}9!c$U+3)#$| zTC6+;QAuoUx2i)O>%-}xkvBTcK4(Mo%y&z{l#>TiwFhSPb|^lB_SgBJUk!Ow_N0hr zu^`M6kYv`eU9mmsm=GLkdcL>;F3H|thOaKJc*f8Mg?o+^FbgZULB-~MBU5F*D#o8G zS?gL$Gwa$gm$1?2s#ZTyU4E_bLwVb`2zwuGrL^@GK4E=z*^u4j5YS8Uz> zB_OPzD0Z7D^RoK@-H|i-l3U=#sS6*z<~(B4)u5-Lsb43i14R;_2^6WnE>8dR3fnF0 zBTsVwg|N9nX%bY&yaI@ar^Mk`&VbwFow%&nZ}P4eIABg2HHMo{9rn^5_MXfgw(M@K z1b@2^(PQHA4jr&1(grEDs@!?n@Y>)4@ovylpGR(RW5xR*-TqtzQhJd6ysWDGNM$sQGG-D4iHYPse-dCRQSF zV|w++#__|5lMB>qxrh`)8?jO$sqkcBvxJ(0glheS7lzyjZNQOIGeB9C5W*zf_PpGP zRHbk;L!}h4LZdE>79Y_4LYTitJE2E7?`^e-5mrJ(-_e|wnhJ5tP#;ld4Ar8t5e9rD z#8{hpUO>S=sWrb9?pu@QpWjBhkmwvOmb+VK5q__)_C-VTn1@xO%cVwRuz+LO3N+tUM}(MYT^j zDx8gx2g4{}mJU%9)7I#O=5<0@l(7&v1R-ox!;C6PMnL-0S)yRv_SAxf(-kh}@0ukE z*OwJ8ion_YAM->ESp8sv0fBE$>{fcs+7rqkzz#nn-2WQZf1oT}>)#SOs9o!42zMgD zuP71Fsnd2!6U?ug_;F$kO<&P5}o&_hmo~|5Nef|Gk z!Gb@|fJAEGEE5F0`|zvv!PF?UT0I;Xg4XySRj~2|?7)%$r-8a%KkEd{;c9_E3j=cR8#qX5`UJr-=%($Zr4|Et?> z=0AqJtsw#S%sK;GNAB!wXEtF%{%tbkuJ=1@U@i3(HE<3LCKda~N1w^umG;#_py9yY zFbl_J4zfeUr(}U=@A}v49=FlX&({E?gSG9N40&gX%+HU5o}Az?%6@MTAWic1 ztvs%A+xe(L5GT;CUr&>?uMbw**MBzv@=f-Gn>KEs<#(;=e^L3m+$J)B!T2B@F05@& z0;~C3p4z?G0P$@A3C=X~;~ z)4cum&~d!|<}gcDns?hW#{ZzF>^IPHQ{0Qw35O1%8U~sc_Hg$$eh-Up6S*Ub59k%2 zEcgv8w~eU9HZANQT=5QUH%syxzQP_$&yf7wK%XZ%1ZmF4z^A7@pqi&zN{MmJ@vF`8 z!(Tn6stI5NOjXMaBHWFY=xNE^sX}riFdltN*?N`vzNzwQ!9q+B`LmV}6uJx1`d7QTJFi;Cs?FBJO;wrt$h&wFdmGohlKA!J<|R#;kE zrZ2kFU;2q}e!g3-Sk>46T`dd-<7jEy4Va%Ih%6zy2P_-`>zc2*DZlTXwE~EXuYLWj zHw-2U1V+F?VB4@%IiPSI3A8ro( z8Pos_K%v!c`KXVw#%HI#k(DReaocByz$1*7(Fe0)e=R{P%O@H*xCSCMpTF=C4vb(D`!qrq}L z-wg9&atrsxuNN7bNk5EyNW1C%I`QQrFCJ14RW%@c@qqF`us1y1tH8T0T}4nrA4Q~r z5NlIQ8!H_WX|03-(b*U;Qsx|j(SUW?4OrM2_W!%u|KI>XCbWesKv}{=48RAVl=)|m zNs}dDz}_`sFqz;SzZHN3*}DPLe5Z)qkbs5p&N>*D#Q@Jm_$n)lKZ5{+NDs$gkVq#2 z2LPNHfCKI*#mHb#7E_ZZ@Q=SyFnVWybs+76XD=bk82?X_BD3Bf&OyBxopU` z3=1IYky(zHDZ&Y`stu-Vbc^oX74`1QSGLE~ApxXKK(u3A9^B z2uKw`A=8Q4%zcL7)fz*lRi)>Z7JRpO@6l`d^E!$gaI@fv5{S6;v`C0HTuUqd?i28!rNE;A(1@G`9;}c^1hate zW><7$F^bs$Kg*3W%8h2_Q^%TcQw|lck}Kp&p4Akh@|x1b>JAC0ZYG4S>s$6*m13wd zd@5FkKCh5y!n3B#m~IO8o@S21lI1m_RN%Mo7f^edjB>2;)P&m5M7yRx>rX;x=hjSB*j>tlaY)eQBd8ZG6F;+Oi46v2RtnZ{&=Vc^}$ zAyj2FYATm~eYm*h=M_WG;6_}uRS8K222(X5Y@x0|0N9V;kk>50n0h!qN7p+m2?m2| zxi$cb1*jL0fI{jY%W=8N0MHde0_U{B!Du+}AX%eDAXEJu4p1+=EI!8@3NZTcBmB4J zbGqT{>u-QWnqe{s^WC1GUvttlh@hx`!LVxJrSFlqs<8p+!Jiygza0EOQLg_N<{#b} zEB;d?XdulWy&FG6?Mg7gS$G+uMfro)_aLHA-<+kH)^G{7NlseK7D$6zk0UIXOu2Of z3`{h=;I|A@`1to7krBlD~z_$Vd^svPb?lc;X@SC^%YXt z)2NU_pLhJ}!Asrdf-Q{@`EV+^WBKl530=EpQ z?6uF!#P!XJ<{1Ue*<`2D4yQ zxUuo6IR%0~Br**!tiHZTqywfv+l2US4EU9{hhNKoAHv_;(^E zsPXAUgzJCm_i2X+8hEdF91NTS9kjg%r7TD-$3+)twdYn_PKSw$Mlnc-x$3)PjG$^w z%5u>>sSFnkI66Hsbp0jGaGmjrXlWdut@?V2^fOncMJS1&&Uk5NalPQ8SPVn0Guenv zEyy1Jk~xY4sr*8{lTF_>y%-S#vlGUl5rthCqc4x0Z(b36;xM$iwXsdNSzK>)FZXV7 zlhUM6RyNU$oKXh4gjg_v%NP_{i$nr}B~SHZUxSkIPPx_>TsYhif~1)(fg=9E`Bc%B zOUqRO?Sm_v1{?r$+>l87c|ZaDhP(-p3JMK`+FJl+fsw~;JAW8}0+bsNUr3~jOeig*+u7M=`j?ak0{XB*kXRrOJAnaP2`CA+W%C^nFzygE#>axt2iRAD z2U++WzoA*g;r4HPSwkQ@5C;Pz0N?{}Z!aM15Vf2m_&j#i1zvyk&*n@!g8u~_|NG`L zX2t)4#zxO9>}v>QCc5801khr_vcF;4q`!$j+CpF$xQ)V9Fgk&EC;%B#hd);^=#mr->s1}{lVXekS_aEJD{jTewY?{twumEO#j zxLUSwJEDu(+m1k>8yRVA92uj1Dtd%VX(L{3)#P?1*+qKov*@_e)L z#lkcGpS3Vy7d#>DuAt%2ITF_|ru9^xTnxOXxC=(jmQq2I~g z*aUc{U9hEDVm7wDI6uDxC_4sIFuw@6X@F#NNq<7I+8KhJ5vc0+^$?(XOAF9HpL!~@ zfP?~4so;;7b4{9T04y`m3Y^PhFu=17K?itvxPd7kfbCUPRnmxzQal4(BaP^3PStzq zKWO&9s^-(sj%TMuJp!eT2@2GTFJ1GN!{D;kpt1S<{m@u-UVpT~GzTvU z2_G7Zs4)}WVm?mK^(u0^*TNK!F&>SYGx7qjlvRs%`Dj)9&^srVvIFdoMBs)>%S$7p zj{r57_LlYuIHl)rf3$O|s=#D`wg$KXDmi^Q$B*EtfTt7!y#Todh}U$g;)4L#X$}G& zU4T}AWQ2f2_w$tuEj2$>+tH3V)I5?E;qywCx{q|`Ir zip186!KgK){2J2l04BR$O#I)Hw9MF=|+;@O_(KrzMy)M ztFHFbleD2j9qae;Ng2Yt&c06V%x`9@X0*+hdJ}r8TM^CkVP^Mk$@2=t>ayb*p^x=7 zKUr`ZRB4hl@Gzx#Rso5sl2{JH2nD z45E(<$IWsN@_Kk3*F1-X@Jv}>vQ5TNJFwAEcgcf}Bax?$KXjLAwj&l4Dx{e$1;i`v@~`mEO9(rYr+tWUTUSzdke3Vr({?C0LW_07Xyp08pC_pf7GYp*$M&OhGV zTYY%F=gZufbNQi+yjN8B`$HMOUw3xoHa@3)x#OPMI~??|b5}YmZT}FxOPUM*`F$^!0hI(WnYd?dkL@*f5%|n^t6l^FKO_m|X)HhPips#^U&tm|qPK`*E zq{mu!^n)Ywcww4wN<}X18e4Q2F)N9-de@o68vP^&#SCO5qr?-AXH~M^nxWR>1A;FQ1Py-qy;!UD(ShqOS zb9Hbq8Vz=&jDp}8VWjk29cY0VE*g}2lu|wf*+c(AKp@>VHUiu&78^~|!K)$`>8P!Z zO+Y}ERr|!DjNpPZ8j%Cy>e^^xct{)s21de}7J2bp379xuD3(VZVyi}}Y@p{vq5XzR zAB<)f4%UvQ>p?$xww=;}yhSN&Kt~*+IzsV+Rp@8K$*)}>4SD;c%BNaEN9~PHFLGN- z+xODWT5&I=XgjcV9*BFBF@wos)v6jKJ-}+*`eb4z!m0><47`*lL^4TT7_8o{XJ7_V zj6jQ{g>4ex?Aj=LI~Ix_JKM-Q*I4xtQVnNG`g&zGNv2y|&s3-CR7tC=>rf~~b6#~x zW_EFQDJe}!vS*QnuI#btI_$!!j84L~FI<`EnZSv8u?1n(FM!u@b*bXTV^aJ*vA1Gb zc+X_2k=CJXBdg(cj&;t00%D1I!pW>t#rb40^s%|2EN^&5ilx9k>OFR5!bWr%u`E*9 zJlyl1XV0G1OQ=%JLRc=gPv< zqe55Q6E<$Eqb|;x*Q1oLs-&MM_y(O%l&1sf$>WMaywi9SiKWwzvz3gN3>zsGJcEuF zi55rXK^>=rL3-+;YO4C`wyyNHJ$6Q!wxLFbJT$g%SwhvLbd=2bUxrClGjr(QnxPWn zi&3vLk1Nhsy;WS7A?!GwA>kTVqgJP?{x$<3UU4}Cg1_8EYQ&tDeL=D@Z>o5zUQ)gO z%TU9@#Ng4C7CvyvT4@K*9hDJxhvbe6VEkPiSeHrPc>DLZV0+J^sGmOBRH#}nnz)wx z0HbE4U{Ma>xy7HRaNo9rdH@6%tMzYC zj+1~lKgGxRF97>(Ak7kBO&b~-f+`58M@m{2PEOnEDv#TJm)INs%sJvRS#^82u<~>P z1WbpUlL3SUo{j<>ZXIg&UlXRCt_tXk%ly+AhSToGl`qFje}FMUdn#NkA+{_r%580J zE!%&w3>~-ooUW0u)#I0*f7>UktXdXzEdNa9_;Pcy^7mdir2kAqwHttU1r}2Hd+`PT z>;V`P4TO4ywn$dsbUqAMUKt-Bf0a?(9*A1^cAb|_rvnB8&)>fFzsY7U!VZj>`Ms}y zr(x}%okXWB91s2JcbxuLSrnkxnCU4w2rKtL+-$2ew?&%hxR?Wb7XO{ab^mUXB}U~h zy$JjLOG(GdcHor>*(PXnO_`lL!9=g{=nh z{)QK-EBG;H3UVkHmoZ0rVv;(MP_AH*BJJ3&PhwYO!MjWf>{4cP!~Nlx;S8%Xfv=v+ z#`~H_7ECS`&6$?k&aK@{c%ados=)d*xhu z?pW+L)6N~jnvku>+O3zsW&?OLe#CaL(k^W78iyC%aVeow+sy<&}S zo$=o`Gi5Fheodj*>iC8nck&OuY#pptO;%D!pg zqV&F>uP1*#UOo6!f3ov>G$(WH@chim6XE`cmw8{Kquh3d_4Q?QN|JY{cCIhhWli`y zmHtRCV4MQySqOS;hBK@*Bl4_lBMWx!ux!eUhWqtQp+4FTg)?m5%W*OGdl>s`;ltaq zetV4W?xm>5F=kdSrmpL*MLmeAV{HqpYC{$8`b`m4gTn`v*86SIkp)8!wHBnTXl`HI4VX6^*WX*?3IVSGvQdyr06T_y&jUD!>Lm-3C9md-eIxJuT|~(t3>zSJir? z?O)`%^_10bGxNxu@x##bwkzbEnJ!B*%OVHptYoz-$#*iZU%a5Rpgh&OxQkM#)L;Jb zq{#i!+wQfR{MOu` z;cWhoqV}01x;!hDXB(pExz)XXp0u1ifrmNY@NHf#7WQh8)+d$sIS+7h_h#m=?!3A( z*1r4t2>9N?>U!Kl;FHGVJC8RXRBi4GP5e4{lyl_!ih8~30_ooCb)wCUi|342K0`I> zo7;!l&!_zKoMQ{6c!2B5=-o7YqG6Uo^I~J3w@cdV?770)fTnpbc>Rgc7#x+>tY5xY zIQZHncau`Cw2FgJDz?7K?WT|obPaQNJzQ)yk!whnN|YX&I=#_Qd1Tj81~q$}AHN+I zzA_+?2FgVwq32#SLG3a2&<|k)B3SA8KQ%T=u3kPnid~i8lcl+BCkKq`(E~1!&@1=)*o_x zu`0hR-m~=@)$1W>8L4iz27C%%6yGjo&M7M!{{;r|Sq6Tuf?B3rnVTJuaWlN{{6UN| z+2q1>|A2d9yGT|%Vn7&Elhr2gQiDj}p9^Qg9ZGW^x9w&6Hz7a7Vznx;H7hlTpR(4@2WL0Y~JIhb0GdbS`KMS-M5|9Jc7Ku%A^$R+H%QCqqoH4ou;1; zqN3sWm9@3)0a@=3Wn3-pz;Z4vZe``gWreJR_yGT{__@Z9`&OQ~^XFx~X&NX4wLQv< zqTqfq+jdakF&XRCK{LXpCs06hIb}<_{qpkOL)!b9-m`}y@BFUm{ovh;|5^P#aqBDW zxGm<1Q2OnkllfP^Y)Kl%}b>q-)=h6NOWz5iau2Exm7z|r0K+% z6P4c+pKh~RS7}lJ8xBszGG4V4{5*5jA;@&1kR8GA)yuVN1>7$ogD)r_kX7G*{CK8|^6dUb zQ1lK9zv(102@82mRaRdys48K?--l$$XBV7Fn3~vob67BPk>QSnxtV))XJD4)#rV3O z5M7rzsHdq+cKNQzqeYb&m!)KXyE4YdXGID~!3HU_%@pBnJr)jzYXM~f<=5hFLpaN- zHJ4Ez#Uq;vt*})&j27laIu%)&(^{LIuz@j;mR{Q0GM{_2t%wiH9m5x6?da3s5n`1= z7ly|8%no|J^IsXb=7iLxh$CXc731eFO4`spUq)N6YxbGGqNGW)wWy8QK3I=Ws9g5$;wEO8v#I4oO) zXZxqEfmn52+ym0%>k9ZUKW>!%dgZX~euoM z-`Zn-tch%V9j;pZWpi$m`s*KU*Fle&sleuzIVOlI-d0X?&EvY->+Tj{W0t?C5BgermAb>|FfwHW=P(d ze(hx<)0mf^>1}zo?O_McPlZ{M^*&!}lcdQW?%_+ifqFzCVj$4@N1wk4g9h-vjs^P)(DN%_S57X0TYi!Vd~Sa- z4QfBe{IVtqUH+W-?vm2?+W9X5cRbE&KOkFY%MByPUe=TQ#XETE(l?@@_^h*RY1A%l zhqu$BYRBAuems9wrIRG%70<&6*(+@=Br_+!O1>|5o;QGC((sc`VKG>q zbNRKEklvS{96r2d@#ms?H=Q0n%=Z6oDtrE@?-E5tZq1kT6FFw~1!ZX#q1h7IY?3x8 z39Ns6C&xu-9?Hd41)f;q!k%y92fO&7ZsBuMaSPiT9<9G2?m4vqi=Wt#nfbzbwQ@bW z*Un;uBRt1E@vz1!qns~x^sAP--)fwi9S3~jE!sP>lUU|4Q-FhMH(O>;{E^3GVfU*$ z$@MZzdE+y#uQx_d-}R}}UqYC~)RuT1BT=3n(}wfx#%BOm>Go9>c|cRO_T zmEqtjV}#m~w$IDGUkSfn9Y4r?c)W5pt?VPwSK9ZNM%E)icdjh@EhgS%c_oDV1fu#j zyvBL{&5a8U+YbbOCLDA2KI&KxVLcKU$@@IgK4;$xIWK^9QJ#g#o3gK_YS(y!`BWqx z+Gwh>^}&*j^o8U5)otF2sWFMg@^z6W`&{H1Nl=Ei(LX7WF}kanr)4zHCf!RbK>grO z>4mO3{hs|tNq6_69dvW1g{&}7J=IlN>Ne~g6ysY-KKCJWVhO+5w8%fX?)Yl&`<0ys z5AE?Ze!4DSgTFptpj+?&U7}mg+C7=qIsW=-Km!aQ&nT|Xm83aAZ*{**~vbuTnYjm#HeMRrtnWUN{-_MjH zg@(yzm4BT?Q(>VQ!+E62jiAP)_RBxg(t$7BwLDgceqH-U3>+|@2|gRldklQFM4xgd zMN<5nDmI@RICw@leI%2RkN_bh2#3({k10lu5;UQZUjg#BU)njx-Fg?I$dag!I76jq zo;phZr3)5d8trN>6=Hn^BBmvh%sNdJN_RsP zaYh`YVWZI(+ZWEGYM^08Gp1}ysY#c}NSsQ`9juBBqS)q>1Fm5F-*Y zsNfj6%H({FBycA~vXeN8ityn4_;U^GscMK#f(8anN>3+SX$?krePG~6+fhVXZqYy@ zqxj@nd~b7mC#WVzz||?w?nF|e9XrK&43d zAEk&m)tVx5vMx5R8-c*XfsMu)6V0N-|>&xj-; zPKQMhMayvz*RA^LFF$8hl#x zAqE#HK03}=-#wr2hazJ?`uY7iy^;9W7qjM|D1;i3LZaf$Jzub4I3*Q}7K!!$0rSuh Avj6}9 literal 17485 zcmbt+2|UzMyYN^lTV*dqVob<7_E2KTWRNXnO=GNM-=)Zsy^M8i6;UBV4P%gO8OENS zk##H~gi6czAJu#B`+o2J?)}~G_?!P(p7WgNJo`Bo&Fju8vX?9&CYHX1ua^$_v)uXh zuVrH$=%M`jwLkrH<0nx4;5n^!97HYWH5lkL!$G6rN5bBN0&h+prOy*+&jo?z7}>?m zF86920D(Y04jPUo7~S0;4}1NtMaqe)YzF^y8VobVP8vT!> zTAgpu6erOM)5ywq@EjKGlkbnU=g{b5iP6$O$?6~zsl&nQQjCQnUE+$vZ8-3A``p9F7Ozbrlk+XihX-pm1W}cWIP}I`Loz-bx6(--`QoO1$Ft) zM#~spbwgMy`B}H235TJH)_doW{Kt=1TnY`1in9;rKYiBGC(n0v&`>udQV82>%$^==ku$8u{8TYv4$!ASD z&d-HL`;1T0#i-PQ`KOUp3j=A%KIqB7j7mJMuSSq(akjko6L-P z2y^*b2dOj708bdSj|i2O<4}^8a>#HCmJNS*YGD2RG50b4`=Ns&PCk=GVCEYcL@7B$ z{dzJQ(z-%Y&XlPi8_N2ul-ke)32^b480A2Kb3bBBuffm6xOuF5$VOptmJe_^oRUW- zPu=2)8jG2X0FOy0GVH%LxnjTx0w6R_GzG$TXjvt7$Mv{eAy#!eWk}E zLFtqVGwLu|H7=$If=G3#Mp1mK(HrLF_)~Y`IFVtM^AiFwJ(ELKO)W=5r&uckB6(|q#+SFu` zllOW>NXMQ~^V`mLFRRIno2|JJcg_^wKt-#Zdj?iwmE>LVnc*yTpPP|)Q{sFUa3(bk zgOb^{0Z^h89tqX+ZScZAPTt+Q*}cB5a%>`yFfrz}Iz=RZp2{JV`vnm~zXjJ?;j3Th z%_6fQ`nY!M3?nI0RY1V!z*m@scgNuJ#rmGtGYP5_9D=s*r26}ECPqr=k`1cS*5gQ< z<^@>~1o#znzL|`WY?z5byf_PPYFI_&HIUO@=kVcc1FMTYCOfSJ)_H6Z1ZAar&!-l< z<|<;UNSl3YwU$IYJSMT`D<8RR=u1*Lgc8#=uV`-=Sv|#_$2CkG0M1 zpM4ux#zZC==2t@qN;xe`N)2F<-OG4zR8;G+hSjvCH3D*deUE+ZW59L*vXh75iY*MvgGXs~Q4-aH z8ll&|Bo7j2a`30p{DnoJ4pxCxBltS>nVhO&19I2lBbv+PRBOX)$4`LQ)2r*;v%aJj z-Se~bMc~gU8bo9t&O=472e7(6^FCv(opP;*Tk`7CmOg7od-3`r|6}8GX9E=`LzVmk zE4J1`?AQb3nml#`uG!>_=}eLRv+t)b<3sk|X_+_PllI8idC+Y!!!xB-)Zcniskn2u z>Xd#}kguvPtnRg=ax9!}@?~YMeT=RHyM&kfqR@w-hYSxqf+fo~a)eV-E$xkwFPw`_ ztf9Fr*$L;PZbfS48}28aGtXd>DT%xF-0#(}0VW~nrdr;yZ-OmtJ??v%hd;G@KM_oz zFlVrj^=OF`Zpc3_T6TdLrHu`Yk+KNg@Fqw1yzZ=pOnBmY3Kxo7Ez$9TOVDf63Eu18 zR~%%%!a9`WyztAnB%gJ0OVCAK4(?L$Yl-Qox}LZ=toCGOEp%}>v~;Mf+N}$G4!h_b zcI;T=Y0u?5qhU9GY*gixo)tUY^w{NLR8bBti#}Mr?S~rbnbEoK8E}fv+*pMyQLctV>X1y0U-#?n`%X&U zXBkx{Wyu6H*kf?pGTU^|_hXdH6jQ2z0(a@D!gL zioRY=V<_tf-NYzn5Bh5R#d77bu#f;HiJS(qO)6yUtdkn|exBymYRmA*r^h_L9a>Y@+-_73p`4~`*sd}(p7W`%pP64cHJ6)f z-7%LCnQQ$aAHPsh1{aySBfOeb8r&PESmjfqDUXLPM;5_)9M_#>yD48$)g$@5f~IB*2k)$8=E zNd%z-bKJF@RCo=XUMo5uUf6^5KhXbfE8iv+?_J*!(k$))X@hyEt~b7%_GZ@o9~T0#0AoDqB-;3&M(6hc(z)lQ|{PR^|&>a|uue5X(L>j&IK6euemEq`^wYVU>n z#uD^I^3K+rdk*DI`Ma=o)wY+Xdp{`&n^@O!<=aibXO(C_c1h~Al6|Ks%D!*s6o+^7 z>DDDH+8j6R&aJ%|tDNTY`00g`37wo&_F0|&5s>?}khm3g8vFG{n0@rGFD3Sbk&rGZ zspdg#_L3fv^~x*ul7Ujwt&iWfJ@j<3J3rmp+}dhK3tbfV1eBj9H_s_1;Y>V5Y6l2w zT~jZD%1d_5yp2psWUhVqOkoU#fH?w%MMP|d1^JB4n(bdEz8Rr{xkF?}9c=~CJ^=@bDuN*=Y z80Su^@bLO9%XePrdfKK?!6yQ9<2vMEP@CxO-IkCrqXd<{Z9RoE6YO%In zm@Pw&BqV!LY6`hK1WMaCQ*nsp*)J`8SwZhNt;yRJ;#n$j^#opc^Yxy-uz;KDK0-mM zf+*BA*x{OMW-ZE4XU(gN;Z1TZWwBn*qbx8g;m_C+V=4M~3!iAV)YFKcA5CZVzFCW> zJw$6M5i3Hg&@3!es-Alu3X>e$R)R)?(cDr!X{*BehaClJL9r1Rj2+!j&z>PlLS%_I!63Z<)d)(*OhL(;u69zzNsR%nH94jdn2zKg1Azp(8>%Z1T9}SCzR(6y;AeVU4PfzZ>5BIyn+BZI!k6<7`f!??tEf zeH)6=rrN`|V?Gxq=0c_P2tPeMu`@qzr*7=3PdoPAu^ZW6e;V;K-)Vnz;j3N$V(f9c z3{6mMZ8dQ8(>P&EJ{!0SrT9%wZ=To;ZZnz*OBoBQqKqzp&o+a{;D{ne|d2yoQbMGp)^t)GVOhjm3wz7vFzbVi@Tx6W` znU=GD8oA~79Y-#WiBS%-Y9F!X3tlXT3nh&Wn{huOn^~GF+$Dk-I#`8$K-_RAc}tO8 zHmIITNg}4lQ3BK5i51Chs?z&hp^wGhl$CePzNMrHMYLb?&Ez##>9}i&1!G|>Mlg~I zM7NNh3jzZdL!-dhR8AKj4H@6uCrBdr3YWJJFH0;Kg}kK8f`~ijd-vo`i8E=5>Ffql zf<#Vm)7!^jB)dbnW*U|uhMiMK#{tm~($u}zeyT6~0rDz1JcT}-!#!3Dmgr2pmqf(( zT#dsrf>U|vT~0!dNO{vSMipS3l~hPA5Fs!vmQGPN{T+RKM7I2UVr)0K!x5s;W32@u zH}ach4+k8A>JNz^b4oBoCF5)Gx^$q)h0J-*44yKD>UWfaC(|tCQ)M2 zM2_*M_f1)DtJqe{dKT>*qAa(kS!}frLOZ7og_Q7!#zx!>;Xahp)Knd0akRtA{x$I$ zPCK`+O0wH5#lsb8g(=p?Yj&V!p(aWgWU;3)sX2=bkCyXi5zBduBt2`n)tm(}_0$UC zdtAr&xc*cLp89o9X5my}noqJ3QROVu7kErkd@{v+A@h?tsXm@MpFMS!rubsE$W~p! z9#$C(bskE5NyDrD>ngL9&V2khy2>ES(g`6^U5uDA&hU6tHf1E|f3@1*?3(2OvjjOo zVoF$IGF5`Cqa=>S$5;DA=VbHXxlr>Z-K50HbU7c>?#=`yS;w5%E~SVJPjHt~Ntcq) ze~guMcNSaLTB7D1bJC&RZ!ZY z{*mzD`?8DbZX_=s{3jQstnCW6!lKeCmD9STqM4D6@8q21++K0{oTQN$>KvT-@a4ks zNZ`Zf)2&$Q4FuyF7^6Y09M2041<_oUg9-@=2{JSZnwgoILQp6a8mORg6X-2C1da&? zVQBhK>7P=gag^CAsE1&3I-O@J%G#v#);sM)YV=bfO%rzP(3tm9B2Rwxl@Np&nF@b({fRXw^QEB>9r}=nLVIh(J z_JBYtJzjrCffulY|EqB%Kj=>%ot&J^-{IdlxPPxhH~ObrpyBU02y|?(VdzCYFf_e(t-q!bwF_h){OZ}{&T!2N&BKNzR$Rr?1bKz#qG|JRkjXm|gD zu*1ROf9o{Vm8c&ySZvnc&VPDugp&V2AM*w7AO94O_4R{2)55_I{`T<^2z9^fbR&=c zcK&&L)({rms?(1P(=vS-17AoGy1jAc#1hg_{H{o%_+!_0OKur$Jq`#IT6t5PWFl*H z#ZJHIP*CmK?WeF`0!MGR?EF|ukd?mk#|S=m6+J0a5ADlt3JEY(%AihJ@}Y#Ud6EpF0bc+T2+ zxG};mW)kiF*i`j2u~y)@OzujKSQSr1mat5rEDB;@O@k68nO3ut?xMux*sGbk#a`mv zuZ#=3>bzD?PqXuQd&mI6B$8~y>ym}ab}4@n^OZ@COB|-0!MSe@*_ntPD0)2Aw9lXfJm}N<@y)G7ZHHC8h0`ao5J+&tS6| zOanq}jdmtpEj@4N3OP#gUwG{sYl=Vl>T4L|g<*8rG-}_?d6Vmk@FlATQG48BGuj2Gh9z?Xq*4nis3`kZBcy6rs^ttLpSuxRaGconz~b)8M)|QEM{^eb zlufx?m*kHneOSmom7k7yPZ~ICKDQ@oN@U`cEaD;M_IqRmzxp0isx?>ue`7t24GA<+ zLZ)AB?<>s|@((5BueZ$%W2zN60yboQJ^Ln6k?uGFDF0i?=ST0O?U*O z0l}EV&cAIH+P2f@?9Ke)#Tt0JwjvRIFKJ0d`uWVfioD}3kNvs=u{%y~k*964&tD`~ zCYFF+@O;s!V|{+}B7S?U^r7d8Prc3CzwDyb-__G@lVs{-OW(baYy-D_M`QTTXcfpi4$0Wu2?tCf6mvfOh zL}xZ=n{msMuWx9maHS#s^j0a41~v zj05CKj3c7(jG}4NYq3c5cCe8$48 zT*nVk0BF!SxX}No@t0O2*h+&+IrSU&PmL}3d0aTNJ4X98Kqk5#WANe;?F2Xlje&!$ zkQmATNyEqR06Sc$dunRp1h8v>0~6QAzzIO0afcHKMF8U0XpC-kU|>KSP^a<&lo}XF zjpDArTB!B)wK%0fKPqi6s^Hu}C>j0#8{Ykw={b1pT>$wwuysoUfq+^^+5e}-#t4%; zSyZl;x?^+}QI1uALKqEXza~%*iGg#EfwAPhQYV!jAi7amleZCqGuUf5IyF@#Sp}DC z*tpi?n=3gr^*+HJgV+cis?Mb_2L}2JO@LEX$vf^lJzh1C^(O#7okGbkaw*YxrGJc3 zYSbi&gy+APsB+8ap8tzwOerkygCl?x*i89emB;$ZZ}r>=Zo?}D1`w43LpDMo`zkXY z2vQf}cMJc&XndP%knF-@15i$<7S~1qYW~(oH~4EHfi+;#T<%mIK(t%~9zulyNID4d zH013BAE!_sM2|}ML}So)banj#2Ibx3G4GpOrM5-H=lbU0SMMuV+Xe@!l2tM*)NCr$ z2zeW)y2vVR&le^)f);J-H(kH}8NA+T)3cZCxZ2BIk?GqohYi_oEBy_$hfjJ*)ft+e z?YNpQJb9Dkzx;bza)YFu6eaYX>`2i8?A%y1;{B7BYRPkhY9bB!sOuu^B3BVzumV_#(7EdaRKy^;UVfbZwp3t4DC9Gqyz?<_4Vs^v#)Bwzi!?KTzgt+cSzI^fmdaYo%4u7H(eb@!MAc*~ou z-+Nd^Q-~s{9PB>L%sJ7*sI}hkepd)U=S58i9eSIB;3T)W${SAn&3fTcuhXHKJ)LtN;2B!j; zz`qfZNEDSG-~3>ToFr~qZD+OX&!ie)f;$G#^^X924xv6bH@AKgDC1nBZklaJUG-g@ zo#b*CZya#(KY!3s9VFm04?NCqTF0rL@b43D^|uCiTkb#Ij#YY^L|~6#x}IPPNTi;U zZ?<03RBqy%lZ*F-$9niEtABreZNdtN8^H~qQF#Vk0`RV8nPxJDY$>0s? zF6So8aP}dLoK|UqqhxJsZFGQ$)U(SD_Z0Zk4L+35=H2eCy)si;e%AI_J-;{WAy%4v zZ?@wAyNrE6^f4MR%@phQqf^Tfhfyeco(D9Cxoozuoj_*oAan+cEjZh;_IDHHy@mt! z0L;v#s6bFCWX4}{Op-`In!NaDTgaUiIOISaDB{_C;7kQS3rLN6Vgep47VsSuwe231 z6}mBabtL1@BoAQ17&vbubPRw8q<}3cDf6}vglT}TGDKXW6}n5W4T^O-1xzMU)U5*j z&CI-kliPn^G`Oz*Lr#ZGK?iY*x}7JJ599*q3SEB*nlC+uKyKQob!~q7FfwYYffpo*JdddiYNCs!Z+m-on6@ z0s&nQC8+QI;h(2Uj}^eS0HWG9vBwt!2oPZSUc-h5P&=>-pFe>ASvu;#$rg+O5CNj3 ziU*GcB-`WLtG4Kfo1o-31pk&98ut+f(+1qGoE(Bg0=zGwzT3K`a7 z*lrT3CnUb4q}26p$^WK2`zt2elD{<&s3iQLZgn8Azr+BfmH@=!CNikTqhcx)K=FD! z%;jy$IJ%jxEPl0(g}o5YQgkKb^<`GSagM7q+BXao*{e({I12(`XU=z?d-T-m#yOGm zeoVER%T?m7HVm46Of&q7+74ck#`2^hixd(d>?;FW)~(mBI-g0=m*eCyc8*JvCow(< zGkIBlJMY`rQ`-WHu75$Yrp}7F&9VA&7bTtLA(7hR9qt|O1%7}w4?Z+*@~o6Kp2+&> zZb=K9Q7VDJ%IdNH=+a%3bHgHV3-Mcmb$|q@+e1@Sx(;jruo#sdAArw9NgQBuYXHLm z&h9m=A3)(I-18gYkOQG2F@TE$rURx2z)~6FD+fp~my#b$>O$_UjyylO&X_>g$jL$f z*cKL>85kIxT~Z>WZO_2L#wyqpOM4YYqok5c`V1J8l_}SMZ&LpTLgTN<{>9q6S(QJmgR)~-9DwJdl*TVX|Dwc5zx-eob;$8>tffe?y+pxEp@k%)8`eygyN)Q&VciK4 zeSbYo{!qDt7MFoVHY3I&75pB{o5W6(Fz(ArPqAPYFJ^OOW^gFxfc2Hd4{@_M+hawd zDusoh%ry2qW{$$_(8uv7OFCqV<8SD5c3h86iN2v@4Ao*lnjC_b47@P9A$eGf$+`Gh zr8efI7Wicbd#414xD2-$LRj>MrJlHi5t5s>Iq?YqE>kLI$@vzc+mZOJs-VNtbjavA zFN)KFza%g}H2T6cVf_~W%-wpP6B?y&J5`~DwfB6?eGF!m_=#4nWK%Mt_X5fn`{j_0 z85t#xazdRe_vNx)l!#h{l>qVv)JN#RKT8AQP0kgtH{3*UkIw{%p~Fjfa+izq;SRC?96u^GhGkqTqD2rvT! zmy?}ZM4=q9*kVBOfEA2zXX+B(Ov=554wI)T_LMNRY zpRNVIo+kVlX56ghs%0TrEyQD{02eDsf2@O#D$^a}g1kB^l)b1~CNdysBb0k}`L);+ zv?#B(Jc%Y9l6DR3f@ONkOYf*%?C=;P(all~g=4ar98CsP+02*l{MNzWX8Bzcv@csS z=V><|dsS}id&Jt?*i)qb#e9wU3c$)4fIH3jPpWn4$?c3&3)C3uE6CqR3Wo*eyobR6 zd*04n1i~7|FpSE1vTrWc6hCSTX_k~}0bbOUM5>mP2ZRO`IXSg%DiB~KfLR|14U2)V zzQ+LB7$EWW-zmx@iE7{~lzd<4n0hGSz9j*VfPn+x;J;}L5CwqXS;{JSDO?ai!vWIB zie<9_0RF9Xu2DVgzwx+KX^ERsC4j)*tx$=lKC)(jU-W)9|7<)-)J03wkwuHORTlW@ zfU-miHrdOZJRb9_ls{N=;g&#tnV$N9EvuS$!V)9EUXh)Cg8ibnWDAj^Dd9(uO{ z)m~+)E5%1GD#&HgQW*1nJN-9o8KB3If^V^4n5`!pwJ?=bvY7z-t#rVb9^VW=32mmf zdzEr2O1^S(ibSgGJJ4im`Z1qzz&%{A3zd6-P7?{M!$4NC4vg&uSW!7!)36cz+d=kB zAa~XP<)^Y6aHluxTVcQtd4TH_m@0wLI%H6w2-*(17 z5`RQx=0RclN00+}epRyS6G$MKyi?tNqA}o0?c-ZCrDgtaXdD2&A*v{PNNg0;w&8 zNNV#3GMw-;F*4>dY-8lTSZawjvxu$#WBZstUczG|Ke(QvLI9Xnb&#n6HjY98N4LG- zfeAOZ1_r!|DtQmD6c$2FzW&%AS^>Z#ozTQW(N5WuNv?1`m~$cvONO2ZhKQ`|GoP7_ZqD_6 z-ln^hZo`nfl1!h=<*gT|+!a5o?I5UO5ldswz|1PdD<@Y{DqbMe0H{;<_DER|<;^se zod<`sz2BC6V6dT7MvVbk5%mxO_`<1-2{fR=fXBoiSb6sW`v4Axh2SQDUOl>if&;o;1xx@|`iPs@=uuk)j`dD*rxsB_4bWDV zrDwGNPjxLdv*-Rz@^}dlhp8UA$2U`%29W;WACCk!Y`~wB_Xsu{p`+vJ;?|BtiQff* zI4NWQHj-PyqQ0orq`R|VwGonb|3;kpBHf#xd+&x%NT&sLyQp4-&x(ie_NyIFS*sO& zvbt0FK5#Dh!hVCD*5+;1Zxwg$VEsSY6?KN(7uE~VQup0`8n?M_D{JdAZg|AX*Tp=vu5e6$OGlws(XGB(e5}&XPVK?_?@Xhf zP|3=diXNtMw_L|wMD{dAB=!%YQWFwCaexcS6UALibtAOhN52kCVDzd!222M3d%L&c7&IlP`c0zZm%|CS3X^sKHHfdUx#N z7nA%8Cf}|1COnJwZoMg)-jiFnU3@a^_%ZXH3rALcx^aU8^PBI^_O66jHyly@n!kJJ z_-W>~t*>7?obA6oa&I$uVK(e+=`7(F=@-SS&G6x-+Wix$o^ks>FKnFt`b#Tb z8iitc8!0S&7-dg#e#}`wOw7u*5N32hbf8)o+B0s*$yi3nBtmrC*x3riVxUneM+F(d zvATCr+4h_yp@`zdLr{5lSeh|Uv1n8ro1(nFI7;}W3^YziG?p}g(Y(sQA(8duF(?K! z$o4Vhye!>^F;ChUh3Jb>X{qqU$DD^{U4)R`f^l~vWH<$nYQj#& zoB?4T$Ue|zLKrh3s*GVNl?Cp^cov?!@Khcd+7wP+*@!fkY;cDrBUt!oqQm0{7AT1V z!NYMm;3#Cje4mc22Bh8b%p;7;BYD;Vj1Uy16Hn^Uy{dVVUbjHrUXneUfq^}ZWDzZj ztrU%drKK4aoGFxf@PHWg7zvjl!QiwU4jRtwFceE#%HbxM=FKZQ8XT82+1SB6G{T~9 zF`TBUx)NE1x(}{q(ZTE=3DJ|%B6+Z~PxQ{nvS^F3#R-C+#PQNPz^my=Z(+JJ;CP1) zIzG07tIkAhCCr`a3hHFxnK)S%VWCJFhCY%IF9!z{$;0v>1f2%f*^-blq|Av$;#1-Cxcdx zGYa%5MP5_lFw?IWKkx6`+&!)mde(4szOg2r*F$t_$S%&YD>XK-6aw6Ream_q$%LWP z5q71IeIhQ{&eRs07|kK30K2MF<=hq(I~vcV2ku}U(1B_vLK%hONp~e2-kvOTH{wpJ zj2#zzepIJGu|v1GAc3BfA%{CQ9H*$5loZPhb8asv)DhNcC+a+U&&JtZXq3uWsEuhx zizHq)2_gaYCBn(z;?IQKGpdVE-BH7TTC&DJ4ss`!Y;3k ziR9zrwC{J2O_OmLZN08=xHaq9XqlTx{9O*CyX|B0mT6h9I61l7V{r;P&xDfr4ioL$ z#T7K&`7}>f@j1i~*gtC1=1t(EcYdF(IZi*ERM2Yi%z^9V%j`t>vvy%li)ZrAvEuUD z76YdC^6)34uV8HM*~F9mJVH#eWjd8QFS8Z2H84inl{yk>I{JKv!kHNQoS(&ulJpBC zEXrI{-LJaFBQGUhr|om@7khS2oBL(Vuu!4vIlfEU$V%to1f$oS&Iw%HsoF`rsX9rO zI9*(9>^O%}?6?BgT|_ijeEayhONCZR9-=sLMad+i7mbh_@=*8gk}iTl-S>-lp}xA*J}lEQj>RC{IirEfb=eY)89 z<&7jINHt|kMl|%@)DNDwl+&yGZESk`D?*v&yHbWO57DF?_3Aa-9|6|vVOonfU^V+n zdY2gxfzw*g!357C{u>j{vd>p;8$R1NxL3A!E061tK-iKE*)-@bR|$c!oLu#*_rdI@ z1O1Qn!QOAj2Qvo7^zLwc{?)fJEXkdqUC{6R>bf|PGmir;9rMs>RiI5B$o!9LLC;=e zK)>_*nm=7k8=(~R@45dOs=pk#zc|1L`451uX`tYMb^3syb$g$>iguw%V8b@GuiwQ6 z_?-u0cJ1#a3;NwgDr`22%c(#Ih=5y-&_DDP^aJ7bmHR})M&2Ljf2^I9^BP zT;#nTD*gJuQRw}} zuM@v_EXk+(r~Vw^&2I`Mxs&q#QD+Bu8>Zm>hbwX%_m1(oW8<>0>8S@RBU%rb_aE@w z_x=jf{BMQITp%%-kmCUUTIKg6;}@i39y6=S=H2jRy=j~7G>(5nVe(8Qbh(~5dwbTg zd$TZ6;O5W%h~Wp&9p&KfnOeQhzs^0LSBqS?WgH7QUBfq`bDD2t`9fE#f-g?$^3z@P zRX^8%gtz&A=(@Cz|tDU0;1p;=IMpx;~p3*1NG2e_qC9 zlZUPUmJPj6RoczXhnCr`q$_qkM1%d0KQ>;E=-&x0?yh?s(mvhsW%I#DCBC?GXTijL z#Sf@~DQtz5#e~f0mQUv8Y!dc+*fL+tKGQt0=E6m|zp#1w7jZTtuk*B&h;xa0i!#qE2i17?A2Iyz(cy^5s)d)ISVT)8g zmVNFSf)@>Dp{Ls0JlR;KPj-nep;#R$lX6orm2q&)sbyJAHvq zr*${~d~DHV@NPoC!lIp?2s>p6X8a+jt1tH)f;UfTZjOJ_oDUg43Rs+IFk^~6f@ z3(rL|qoO~)F1I_eh)@13nkg-tEGCq>BkxwX8=y5f|A~byMDRnx7nQ!7_qk6kEi`?8 zRQOqEub56yqfS7q*4a8icARoZSv20(c<*vqQgQh}U;SRkKxXRtKKkZ{tg@w1=jO?Q z-BFujB{iP0z;2Z^ZE3^Kny=__bsG1SZ>fJ}%A7g_ z_nK?!+X%RDXJF-u$V_l^u1otneR$>hI;Rf8WU5Y-HLv zliB;lV0M#yv+dSiM5Fnt&GhphCt^27(7$e5t_X0KJgppBaHZ=IqCX=0Wy+4UxRfRw zZ1du~bl%ow9~8@(y<*N`wUTeInV}AWo-I>R9`mP8>!bF3>*iC@Tjs@>F=@X-%Q^j= zm=PMh{pal{G$A4{lebg9&bqxDb5a317c!U9J(4&l(n7m*DZ1d>^Tp3N%jjN1#yRFX z56{fRqI|KVx#d=n@ddovi>R(_$H4;577tjY^1Ns*tkcS=m`oT;UN0k}1d>&8>?cws zimhp*F$Q`*&GW{?{~xKmNB661{(-O2offVt-MCXv zbBmQ5vd~%UhxAeyy3|@(Q-WhM<+Uw^1$)J5Vl~=Xo9}5s6wbTl1Unt=^gL<}p@GJ? z?0>Lh{twAzU?X0Z71{Adqs?D z-*YFY@J>=e43-(e`EEIw%|$1HAPOPhyZ4Sw%0wo=tzM7H-T5w`EqKO^bX+o5+NPNF zS^hE<94sdj4aep3OX1YFA6|TO-l3*v2RZs^baZ+7W&OkzEwujXJO{tQhgPkYjRpF9aenP< zwalCrlLYuus&{(&geOxouOw#&io4~!S%a37Q?J_fm$6HjZv{gmd<||(bD;HrvQh8~ zemGfz!tthDq)N?QeYf4k&~lA5(~|JK!3% zR9QZ>{$j=dBLYV_RASmTAj3tg*s2bhdQ7uHI^n}|zki|Fjt+wTdjHLcF)67^j!C{l zjw!MERfnBc{Ar_j#SWhI?&xw+T!p@Y4_W-ZSq*m*a6tk_@!;xOawIExun9*Re>k<> zR$sliFyIl=TQR=-ux($xGNO0Ce=$nnT{7KpM6=BK+o`|4dk)A>Y=tQeZv31%c5W?V zf6f!4YG9rU)1KR}NWS`Jj4qe1g6`}71^YXfzE4X}Zbg2%6m;?OO_4Zk_FF!Po8y}?=^3fagOKQ)~x)5*wO%Y~tnWM9A`9p7CL*BdQY_H+5 zi4WafnNHSFWUw5{39)xGpdsscTDxSGOLH(1Qtc8g^Qq)ndl)-?^*sdzhL;14F-8?k z!^hKK<|eeuW(s*G=NwHqwOsHyHiQ8I(W%c}TSPk#^t&J)F*Z!U7ogl?CZE0qlFzgi*^5T`eQ9p zM@~MhQ$Jli+w}Dz$IpuvL3WitE70Ft;`sNLt4??wiI~1mhyOnGY25Hb=;FuA=KM~3 z7miq2eA=n8$zB!aFFDdF8H85NJ!LX&b-|uQmI@-ow0U&xjO#h&yGmQFHHBdYrea8i zk|}zqqxqe~=ydxX)WkzN_5Geu*Et@Y#M;i17x@8ZHmi0XyM*B;n}qD_kKNNs3EC=; z21d!_1RskW*#wyzRqLUCkw{LF*Rb9S|AF0-YQ>fhtBO<1_>qP!m6>b-OKGD}Jh)j-KLGN0w%NaHA8qQm~U;lT;lT$V+m$ZF1A=G`gZBQF4_}XEurNNJap9 z1~pPi$!>|uBM$nnD!rL+_(?CdJBADO7;UyQQ}w>-JGgcF`O2Lm=Y9y|_8ZK1-gx$W zyKTPu^TEi>38>@LUcPkA9{-!={fkF_P4{;lmm^Wax4s@jbYYfn|9WD0Bw6TN@x|A7 zQodgv`|)eS^X=S`rMjL+VNd2V6Qs(1%7U1`|H`2RX4H&+|3E7}a_U}UqWZ&}_Rxz% z0+gcVZ?}WaB&VzUNNZ2+$w{fm;0^SybB|$XYeyui98<9@=`8)<&OeJ(IqVZ3FOo8* zOFDm;p&(J1(JR7{|DMLHjx#*ADfb^MoBHVq=~+zis0hcTMZY^clx%cetX)2$&@Uz1 z%|_(&wKmtVHKOYpkn){1^+$lT<_?Q zFI%s}Y6i!HY?HpdTo3x;YId>g4s8fWlVVBk#|sY+U76gEDnXs=i~RcFjcD5;Bv#+) z+Wkvvpn$K_96!5AAP{{~tq|b8)|gh~hz8q!g*^y#v&ah*FYq{9>pe}OPB`eO zd|!J^OgKwSH~__{Zd`Kd!BLPr@Q(xf`kdK}v!RC%{+9rV;Ysm(eE=SjBo8{AEr8XF zGGh@JWW1(wg>_0s%Nq8iQ1nR3)KI07t3oNCscZWyHju<)BL^DVtIWZkBuqyg80->% zN?m&^UX=Dq$KwmRcLIrLXs(*2C9{S*++@^9vp^;gVh`JAN`VEFtvhHEJGGc}V5qB4 zbmQU7XR@zm-8GH^!4Y!9^2yrV>VfH_*Ep z+_ToFCHjPsUK`6GF~%g%Vj?R$hNhh|Y{ z{0m~iqO@gT@khcRuUgE#cc-C|@i?E|5<$;|G;NLy+puP=XYO1*jM&u56;d8E$K$?_@s#Esk9ts~ z^5o3*M9#+*Fe(yitpa0&k*N5OnH%8%I)f`+86z&!S(96+BKlt)r z|8Kp4Mn~2_P?0G;*CA}?fR$LMrX;Dzz;bAsqK`yX7}hY+pw;4>vCp<+?S?gx#Lqbt zL}gqD(^j2#wFnIoLyyAz$|%^X^4-kHGA5O+?Y8zlFs1v zQ#2T+79+mUQUKr|6Q|$+-yu1NF(ew2#sfru<)Ot)iA7Kz9le7<^5Y^4f$!)e06GBd zKpZT_(S4k%?3)NNN)APhR>XJ&VZjrp(?kJ521||a3;+-lGBk!z=#EG}FEtJrg>EB) zhgWI7UPQsRKHD@^ajHImK%>QgA$|)S0OJ0r1HQgL3}OB7j~&GceJ}lA_^eeJxtt-* zuZ-&V7X80eb3e!S$!RE1chHQc>a0~${3>0Hr;i>Qz_kMkWy-{1gsB>93fH$vMnAEjJdmR~5*O&ma)&c-2Zi#S% zwUnhw9-sX2rJ$T0Gp*@YfD8PQ3cRS%VXHj zAczNhH@>v)Fil#<;f}-axmu1>87;0696*v^5nOCE;-@uu0si&>lc*GtWs>w z$;dOWGApwt(#u|;t~GBqnkY(2>?I1Rek20skT@_0I;5q9psC_DR@mA}G14DM@hlRx zoKIFlCO#V;UJ@vuJF*f1quJAN0dQ<4R%cY?Tw!>qu9#%~AA%C{y?!Fy64JXH3$N2+ z!1k9alUoaPO8|VdcztIf?}-5kjBhsvQfm*p6lSzNpaZoRDt4Ahk23cj+%Ua8$$qD0 zFp)J-XXOjMeW}@n|G==zvPAPC7BO_i!qKhZBTXv4{$ls>j$U1}>84zl$YUL8t( zH9LQ~5_t8GO~gX07kf)7>zqe+Oushd;^1a?(CFk2i$NHh5oZM;74tE!v`JfIt04|$x`+l5Og-S32XQT=ZVfP{V3ymmL?=}B4y?-vy2V~Mv) z8guNKpep&ciYO_TXf-p%$19NwXpRq&F7|lbP{@RX1X*!7!YL3?>&f-FG2!Z$5hfz< zgIJX>)$jL_?L7zAHF%dENd|xe^?=gku{Sl?4cTckGAL{An|@P*UDmU&?5yGV^*2S3 zS{Rv~3|zU7mQksx6QC^ZI*^9+{lJmYzX7`RAQqW`>3ga_x`yx-EjZSY)5MewL;t z2T!o|XNq)C?ui>~fC6}4>a9L^F$HU_@9jP&rD6t|_pgaKL<_QGxWwYN8Uf_Onc+S` zE9IN&@w*beP2ZMKQKba1qszrW3wN&;2wGAjK|LuC^-f`pIlh=F-Uf5K7y7c&S=7}t zO9^YGaBoAPq^c!u{ikLFQlMGh2c5FK^HUh}xJSYLs2uGioD|P; z_KL{FcIz?LH?=JH6EZrzY*CKir&~^`v7=07>4SC=Ef{A|azBXUUF7SXP@_u26$!gc zrqP>{ZA~pP!!m_VwX?3__)C9uNUAn#hHit%FeRfJc{U*adrN+Atr>?1t%rWg*7eU| zxf!MKjSNsMf^>$4lAfBA;uE(1rjpDbUXjvP@>SFfNVz0ZG6R3KA0_Yi*V?RmJQ}Yw zRmj+tOywT7A+cm<{_Cb?;g8N3O>Q=~^-zyh2p(I%y`0^{C!^(L;y`h3B8~l~ru5z? zBmChAIUkHYA)5(AWFiW1Z(bBg7lKe$<-%<(;+?KxkfkeC^|hx@pmwxf+aIe~<}E!F zY9!@IJh1k4h*jlf?4y00?L;p+`fvTMXf44flyRWgantBVTc6difW2eJST+=1V?iYS ziJ2|xvK@#!7r1cqYD@Zu+_k4gwX*0(f~z6P4M;XUs(rOUZ`_TTFO6|;Cfh*v63bu$ zo@e&_@SA{LzIyG#)8t~0APnNPvL#O-b@K_4UpCl1kpbu>lG2sNOq3fnm3B?S8o1WC z%=W!VWJX->E?;3W46?EY3+%Bm0G%#xfI+VyMZsW{|~x#tfegUdD3qR;>4N;f|n zuiXi8QQ3496wKKou&F+;EAE9hVMVM`v~O;wpA>s8YAlO8meI-l3ZvU%u*qeh1vJpry&>J^qAo6kmOyd!@rfrk<`=`S)N6oXCin`HQ)IDNSLH- zKA%JlAO7-Za!Tx>R%n4Jhkne9 zrZm!MJMfIZxU`(G#xqBGcP2WB<{+2dXB46^(~w%cwb=t~Q1ic3Ssz#%HX>xbGeWRN zWMO1%1l&;IIN%f)e@m#>)+8+*yPQ*uuloz3Yh^zhUBA(NAJSiL=KXZp=1_Cs-JFoh zw3FnQPBo4G#HoZM&F~QJ&X|7%$P8__y93N92dC)H9~6|_#<*3-N{(nP=V3lN+FGxO zpr*pFnf|k6bXqUn0~C!I?$TPC&wa+M>kA9jB@i~N&!NHtH(zMvbqo_MN5I5ED>Fgm zR8o>s=2vn=E-^&HaMg2cYOJVcr6!x*6V(`MKkN1J-XHqQ+qW*2PjBV-@!<>$ zC-R8LPUOpIeuEA!Q6Gt0Dell5Famd-IvzNeMRe5^rWma*1lvA&@s?p_G+D;llD3`? zPFhI|aGqGb%B!GG6@IQgS!OC3Q1U8sQX>fGn%l_j5(Y2c!HRhzcZ~_Mkn^CkA=>5O zwkBv(e3RD@XYt@3%VLMZ3N5$#J9D+m^f6-duMo)0OZ;9leh?X+JQ` zJ^N}JCq=*aVCw23@xrKY1ul2Sc@67by+ueT4qYOTdMufJ61Yg8f(8mC#kKu&-fw+p zax1D10GUA#=0Bx#s%B Date: Thu, 11 Jan 2024 14:03:27 +0100 Subject: [PATCH 15/24] Added test coverage reporting for pull request for testing branch --- .github/workflows/test-coverage.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index e0ba4b36..2806a966 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -4,6 +4,7 @@ on: - master pull_request: branches: + - testing - develop name: test-coverage From 69b84026c4105d9f27e021279f4acc6f985ae3bc Mon Sep 17 00:00:00 2001 From: Edvin Fuglebakk Date: Thu, 11 Jan 2024 15:49:30 +0100 Subject: [PATCH 16/24] Added relaxed testing for platforms where Reca is not available --- .github/workflows/check-full.yaml | 20 +- DESCRIPTION | 2 +- README.md | 12 +- inst/tinytest/test-RecaFormatChecks.R | 6 +- inst/tinytest/test-RecaWrap.R | 7 +- inst/tinytest/test-StoxAnalysisFunctions.R | 718 +++++++++++---------- inst/tinytest/test-StoxReportFunctions.R | 16 +- inst/tinytest/test-ecaOuputputConversion.R | 6 + 8 files changed, 412 insertions(+), 375 deletions(-) diff --git a/.github/workflows/check-full.yaml b/.github/workflows/check-full.yaml index 8288f32d..6de0fe8a 100644 --- a/.github/workflows/check-full.yaml +++ b/.github/workflows/check-full.yaml @@ -21,10 +21,8 @@ jobs: config: # for windows and mac all builds are pushed to drat repo on merge to master (except pre-release builds), except 'next' - # 2023-08-23 fails, not getting Reca. Because it is not built yet for macos and next or 4.3. - #- {os: macOS-latest, r: 'next', pkgext: '.tgz'} - #- {os: macOS-latest, r: '4.3', pkgext: '.tgz'} - + - {os: macOS-latest, r: 'next', pkgext: '.tgz'} #tested without Reca + - {os: macOS-latest, r: '4.3', pkgext: '.tgz'} #tested without Reca - {os: macOS-latest, r: '4.2', pkgext: '.tgz'} - {os: macOS-latest, r: '4.1', pkgext: '.tgz'} - {os: macOS-latest, r: '4.0', pkgext: '.tgz'} @@ -169,12 +167,24 @@ jobs: sessioninfo::session_info(pkgs, include_base = TRUE) shell: Rscript {0} - - name: Check + # turn off testing of suggestions for configurations where Reca is not provided in StoX package repositories + - name: Check without suggested dependencies + if: runner.os == 'macOS' && (matrix.config.r== 'next' || matrix.config.r== '4.3') env: _R_CHECK_CRAN_INCOMING_: false + _R_CHECK_FORCE_SUGGESTS_ : false run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "error", check_dir = "check") shell: Rscript {0} + - name: Check with suggested dependencies + if: runner.os != 'macOS' || (matrix.config.r!= 'next' && matrix.config.r!= '4.3') + env: + _R_CHECK_CRAN_INCOMING_: false + _R_CHECK_FORCE_SUGGESTS_ : true + run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "error", check_dir = "check") + shell: Rscript {0} + + - name: Show testthat output if: always() run: find check -name 'testthat.Rout*' -exec cat '{}' \; || true diff --git a/DESCRIPTION b/DESCRIPTION index 4275f047..50a84f4c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: RstoxFDA -Version: 1.3.0-9002 +Version: 1.3.0-9003 Date: 2023-12-07 Title: Fisheries Dependent Analysis with RstoX Authors@R: c( diff --git a/README.md b/README.md index d1e8c0ad..1a7e915a 100644 --- a/README.md +++ b/README.md @@ -28,11 +28,10 @@ The preferred way to communicate problems is by raising an issue on the RstoxFDA When reporting bugs, please report the versions your were using of R, RstoxFDA, and operating system. Please also report any error messages, and if possible include instructions for how to reproduce the problem. ## Reca -Reca is a library for estimating total catch at age from commerical catches. RstoxFDA contains functions for adapting data to Reca, running estimates, and plotting or tabulating results. These functions are availble in the StoX user interface. In addition some functions are provided for adapting Reca to other data formats than just the ones supported by Stox. +Reca is a library for estimating total catch at age from commerical catches. RstoxFDA contains functions for adapting data to Reca, running estimates, and plotting or tabulating results. These functions are available in the StoX user interface. In addition some functions are provided for adapting Reca to other data formats than just the ones supported by Stox. RstoxFDA development is otherwise independent of development of Reca, and Reca is only a suggested dependency. That means that RstoxFDA may be available for platforms (operating systems or R-versions) where Reca is not available, and functions dependent on Reca will not work in these cases. Reca is primarily available at: https://github.com/NorskRegnesentral/Reca. - One may also consider installing from the fork at https://github.com/StoXProject/reca or from the StoX repository at https://stoxproject.github.io/repo/, but these resources should be considered experimental, and they are not backed by a maintenance policy: ```r @@ -63,4 +62,11 @@ Currently the latest release/pre-release of RstoxFDA is being tested for the fol * R 4.0 (mac, linux, windows) * R 4.1 (mac, linux, windows) * R 4.2 (mac, linux, windows) -* R 4.3 (windows) +* R 4.3 (mac, linux, windows) + +and is tested with Reca for the following R versions: + +* R 4.0 (mac, linux, windows) +* R 4.1 (mac, linux, windows) +* R 4.2 (mac, linux, windows) +* R 4.3 (linux, windows) \ No newline at end of file diff --git a/inst/tinytest/test-RecaFormatChecks.R b/inst/tinytest/test-RecaFormatChecks.R index b3c09efb..d21dcb6f 100644 --- a/inst/tinytest/test-RecaFormatChecks.R +++ b/inst/tinytest/test-RecaFormatChecks.R @@ -1,3 +1,7 @@ +# ECA tests are not run for platforms where Reca is not available from StoX repositories. +env<-Sys.getenv() +if (!("_R_CHECK_FORCE_SUGGESTS_" %in% names(env)) || as.logical(env[["_R_CHECK_FORCE_SUGGESTS_"]])){ + StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") StoxLandingData <- readRDS(StoxLandingFile) StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") @@ -145,4 +149,4 @@ errorPrep$GlobalParameters$delta.age <- .01 errorPrep$GlobalParameters$lgamodel <- "non-linear" expect_error(RstoxFDA:::sanitizeRecaInput(GlobalParameters=errorPrep$GlobalParameters, AgeLength=errorPrep$AgeLength, WeightLength=errorPrep$WeightLength, stage="parameterize"), "Some required global parameters are NA: thin") - +} diff --git a/inst/tinytest/test-RecaWrap.R b/inst/tinytest/test-RecaWrap.R index c9283512..1a60d039 100644 --- a/inst/tinytest/test-RecaWrap.R +++ b/inst/tinytest/test-RecaWrap.R @@ -1,4 +1,7 @@ - +# ECA tests are not run for platforms where Reca is not available from StoX repositories. +env<-Sys.getenv() +if (!("_R_CHECK_FORCE_SUGGESTS_" %in% names(env)) || as.logical(env[["_R_CHECK_FORCE_SUGGESTS_"]])){ + fishdata <- data.table::as.data.table(readRDS(system.file(package = "RstoxFDA", "testresources", "fishdata.rda"))) landings <- data.table::as.data.table(readRDS(system.file(package = "RstoxFDA", "testresources", "landings.rda"))) @@ -237,3 +240,5 @@ expect_true(!is.null(recaObj$AgeLength$CovariateMatrix$constant)) expect_true(!is.null(recaObj$WeightLength$CovariateMatrix$constant)) expect_true("constant" %in% rownames(recaObj$AgeLength$info)) expect_true("constant" %in% rownames(recaObj$WeightLength$info)) + +} diff --git a/inst/tinytest/test-StoxAnalysisFunctions.R b/inst/tinytest/test-StoxAnalysisFunctions.R index ff4787a6..241a9979 100644 --- a/inst/tinytest/test-StoxAnalysisFunctions.R +++ b/inst/tinytest/test-StoxAnalysisFunctions.R @@ -1,357 +1,363 @@ -#context("Test ParameterizeRecaModels cache") -StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") -StoxBioticData <- readRDS(StoxBioticFile) - -StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") -StoxLandingData <- readRDS(StoxLandingFile) - -StoxBioticDataWDupl <- StoxBioticData -StoxBioticDataWDupl$Station <- rbind(StoxBioticDataWDupl$Station, StoxBioticDataWDupl$Station) -expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticDataWDupl, StoxLandingData, FixedEffects = c(), RandomEffects = c()), "Malformed StoxBioticData.") - -prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c()) - -#test wrong groupingvariables -prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c()) -fpath <- RstoxFDA:::makeTempDirReca() -paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=99) -expect_error(pred <- RstoxFDA::RunRecaModels(paramOut, StoxLandingData,GroupingVariables = c("")), "All 'GroupingVariables' must be column in 'StoxLandingData', the following are not: ") -RstoxFDA:::removeTempDirReca(fpath) - -#test non-linear setting -prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c()) -fpath <- RstoxFDA:::makeTempDirReca() -paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=99, Lgamodel = "log-linear") -paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=99, Lgamodel = "non-linear") -prednl <- RstoxFDA::RunRecaModels(paramOut, StoxLandingData) -RstoxFDA:::removeTempDirReca(fpath) - -fpath <- RstoxFDA:::makeTempDirReca() -# check that seed works -paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=99) -paramOut2 <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed = paramOut$GlobalParameters$GlobalParameters$seed) -paramOut3 <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed = paramOut$GlobalParameters$GlobalParameters$seed+1) - -expect_equal(paramOut2$FitProportionAtAge, paramOut$FitProportionAtAge) -expect_equal(paramOut2$FitLengthGivenAge, paramOut$FitLengthGivenAge) -expect_equal(paramOut2$FitWeightGivenLength, paramOut$FitWeightGivenLength) -expect_true(!all(paramOut$FitWeightGivenLength$fish$tau_Intercept == paramOut3$FitWeightGivenLength$fish$tau_Intercept)) - - -# check that cache works -paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=155) -expect_warning(paramOut2 <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=155, UseCache=T), "Using cached data for ParameterizeRecaModels") -expect_true(identical(paramOut, paramOut2)) -# check that cache fails when arguments are changed -expect_error(RstoxFDA:::ParameterizeRecaModels(prep, 10, 51, 1, fpath, Seed=155, UseCache=T), "Arguments or data are not identical to cached run. Re-run with UseCache=FALSE.") -# check that cache fails when data are changed -prep2 <- prep -prep2$AgeLength$DataMatrix$lengthCM[1]<-5 -expect_error(RstoxFDA:::ParameterizeRecaModels(prep2, 10, 50, 1, fpath, Seed=155, UseCache=T), "Arguments or data are not identical to cached run. Re-run with UseCache=FALSE.") -# check that failed runs didnt touch cache files -expect_warning(paramOut3 <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=155, UseCache=T), "Using cached data for ParameterizeRecaModels") -expect_true(identical(paramOut, paramOut2)) -# check that new run overwrites cahce files -paramOut4 <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 52, 1, fpath, Seed=156) -expect_warning(paramOut5 <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 52, 1, fpath, Seed=156, UseCache=T), "Using cached data for ParameterizeRecaModels") -expect_true(identical(paramOut4, paramOut5)) -RstoxFDA:::removeTempDirReca(fpath) - -fpath <- RstoxFDA:::makeTempDirReca() -# check that halts with error when no cache is found -expect_error(RstoxFDA:::ParameterizeRecaModels(prep, 10, 52, 1, fpath, Seed=156, UseCache=T), "No cached input found. Re-run with UseCache=FALSE.") -RstoxFDA:::removeTempDirReca(fpath) - -#context("PrepRecaEstimate: Missing values warnings") -StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") -StoxBioticData <- readRDS(StoxBioticFile) -StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") -StoxLandingData <- readRDS(StoxLandingFile) - -StoxBioticData$Cruise$Cruise[1] <- NA -expect_warning(expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("Cruise")))) -StoxBioticData <- readRDS(StoxBioticFile) -StoxBioticData$Station$CatchPlatform[1] <- NA -expect_warning(expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("CatchPlatform")))) -StoxBioticData <- readRDS(StoxBioticFile) -StoxBioticData$Haul$Gear[1] <- NA -expect_warning(expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c("Gear"), RandomEffects = c()))) -StoxBioticData <- readRDS(StoxBioticFile) -StoxBioticData$Sample$CatchFractionNumber[1] <- NA -expect_warning(expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c("CatchFractionNumber"), RandomEffects = c()))) -StoxBioticData <- readRDS(StoxBioticFile) -StoxBioticData$Individual$IndividualTotalLength[1] <- NA -expect_warning(expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c()))) -StoxBioticData <- readRDS(StoxBioticFile) -StoxBioticData$Station$DateTime[1] <- NA -expect_warning(expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c()),"Cannot proceed with missing values for Reca-effects")) - -#context("PrepRecaEstimate: Missing cell warnings") -StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") -StoxBioticData <- readRDS(StoxBioticFile) -StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") -StoxLandingData <- readRDS(StoxLandingFile) -StoxBioticData$Station$Area <- NA -StoxBioticData$Station$Area <- c(StoxLandingData$Landing$Area[1:20], StoxLandingData$Landing$Area[1:20], StoxLandingData$Landing$Area[1:5]) -expect_warning(expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c("Gear", "Area"), RandomEffects = c()))) - -#context("PrepRecaEstimate: StockSplitting") -manual <- RstoxFDA:::DefineStockSplittingParameters(DefinitionMethod = "FunctionParameters", - StockNameCC="S1", StockNameS="S2", ProbabilityType1As1=.8, - ProbabilityType1As5=.2, ProbabilityType2As2=.6, - ProbabilityType2As4=.4, ProbabilityType4As2=.4, - ProbabilityType4As4=.6, ProbabilityType5As1=.2, - ProbabilityType5As5=.8) -StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") -StoxBioticData <- readRDS(StoxBioticFile) -StoxBioticData$Individual$otolithtype <- c(rep(c(1,5), 1045), c(1,5,1)) - -StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") -StoxLandingData <- readRDS(StoxLandingFile) - -prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c(), UseStockSplitting=T, UseStockSplittingError=T, StockSplittingParameters=manual) -expect_true(prep$GlobalParameters$GlobalParameters$CC) -expect_true(prep$GlobalParameters$GlobalParameters$CCerror) -expect_true(RstoxFDA:::is.StockSplittingParameters(prep$AgeLength$StockSplittingParameters)) -expect_true(is.null(prep$AgeLength$CCerrorList)) -fpath <- RstoxFDA:::makeTempDirReca() -#make sure it works with trailing "/" on path -pathWtrailing <- paste0(fpath, "/") -param <- RstoxFDA:::ParameterizeRecaModels(prep, 100, 400, ResultDirectory = pathWtrailing, Seed = 100) - -pathWsubDir <- file.path(fpath, "subdir") -param <- RstoxFDA:::ParameterizeRecaModels(prep, 100, 400, ResultDirectory = pathWsubDir, Seed = 100) - -#context("check that age group names are set correct for stock splitting") -expect_equal(sum(is.na(param$FitProportionAtAge$constant$Age)), 0) -expect_equal(param$FitProportionAtAge$constant$Age[1], "S1 2") - - -#context("Check that back conversion to eca objects works fine with stock splitting") -ecafit <- RstoxFDA:::stox2recaFit(param) - -result <- RstoxFDA:::RunRecaModels(param, StoxLandingData = StoxLandingData) - -expect_true("Stock" %in% result$GroupingVariables$GroupingVariables) -resultAgg <- RstoxFDA:::RunRecaModels(param, StoxLandingData = StoxLandingData, GroupingVariables = c("Gear")) -expect_true(all(c("Stock", "Gear") %in% resultAgg$GroupingVariables$GroupingVariables)) - -RstoxFDA:::removeTempDirReca(fpath) -expect_true(RstoxFDA:::is.StockSplittingParameters(param$AgeLength$StockSplittingParameters)) -expect_true(is.null(prep$AgeLength$CCerrorList)) -expect_equal(param$AgeLength$StockSplittingParameters, manual) -expect_true(RstoxFDA::is.RecaCatchAtAge(result)) -expect_true("Stock" %in% names(result$CatchAtAge)) -expect_true("Stock" %in% names(result$MeanLength)) -expect_true("Stock" %in% names(result$MeanWeight)) - -#stock splitting w warning -StoxBioticData$Individual$otolithtype[1] <- 9 -expect_warning(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c(), UseStockSplitting=T, UseStockSplittingError=T, StockSplittingParameters=manual), "StoX: Some aged fish does not have Otolithtype set, or have it set to an unrecognized value. This may slow down Stox processing of Reca results.") -StoxBioticData$Individual$IndividualAge[1] <- NA -expect_silent(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c(), UseStockSplitting=T, UseStockSplittingError=T, StockSplittingParameters=manual)) - -#context("PrepRecaEstimate: AgerrorMatrix") -ageerorfile <- system.file("testresources","AgeErrorHirstEtAl2012.txt", package="RstoxFDA") -ageerror <- RstoxFDA::DefineAgeErrorMatrix(FileName = ageerorfile) -expect_true(RstoxFDA::is.AgeErrorMatrix(ageerror)) -StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") -StoxBioticData <- readRDS(StoxBioticFile) - -StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") -StoxLandingData <- readRDS(StoxLandingFile) - -prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c(), UseAgingError = T, AgeErrorMatrix = ageerror, MinAge = 0, MaxAge = 14) -expect_true(!is.null(prep$AgeLength$AgeErrorMatrix)) -expect_warning(est <- RstoxFDA::RunRecaEstimate(prep, 10, 50, Seed = 99)) - -#context("PrepareRecaEstimate: configuration tests") -StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") -StoxBioticData <- readRDS(StoxBioticFile) - -StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") -StoxLandingData <- readRDS(StoxLandingFile) - -StoxLandingData$Landing$NewConst <- 1 -StoxBioticData$Station$NewConst <- 1 - - -expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c("NewConst"), RandomEffects = c()), "Only one level for categorical covariate NewConst") -expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("NewConst")), "Only one level for categorical covariate NewConst") - -StoxBioticData$Station$Area <- StoxLandingData$Landing$Area[c(7,8,13,4,3,4,11,20,4,5,6,20,4,12,3,3,10,4,1,20,11,5,11,5,15,8,14,7,10,6,13,16,11,14,19,20,2,19,11,16,15,5,11,11,9)] -expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c("Area"), RandomEffects = c("Area")), "Some random effects are also specified as fixed effects: Area") -expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("Area"), CarEffect = "Area"), "UseCarEffect is False, while the parameter 'CarEffect' is given") -expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("Area"), CarEffect = "Area", UseCarEffect = T, CarNeighbours = list()), "The CAR effect Area is also specified as fixed effect or random effect") - -#check CAR value cehcks -carfile <- system.file("testresources","mainarea_neighbour_correct_codes.txt", package="RstoxFDA") -car <- RstoxFDA::DefineCarNeighbours(NULL, FileName = carfile) -expect_true(RstoxFDA::is.CarNeighbours(car)) -car$Neighbours[9] <- paste(car$Neighbours[9], "30", sep=",") -car$Neighbours[29] <- paste(car$Neighbours[29], "08", sep=",") -prepCar <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c(), CarEffect = "Area", UseCarEffect = T, CarNeighbours = car) - -fpath <- RstoxFDA:::makeTempDirReca() -paramOut <- RstoxFDA:::ParameterizeRecaModels(prepCar, 10, 50, 1, fpath, Seed=42) -result <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData) -RstoxFDA:::removeTempDirReca(fpath) -expect_true(RstoxFDA::is.RecaParameterData(paramOut)) -expect_true(RstoxFDA::is.RecaCatchAtAge(result)) - - -#context("ParameterizeRecaModels: simple case") -StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") -StoxBioticData <- readRDS(StoxBioticFile) - -StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") -StoxLandingData <- readRDS(StoxLandingFile) - -prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c()) - -fpath <- RstoxFDA:::makeTempDirReca() -paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=43) - -expect_true(c("FitLengthGivenAge") %in% names(paramOut)) -expect_equal(length(paramOut$FitLengthGivenAge), 4) -expect_true(RstoxFDA::is.RecaParameterData((paramOut))) - -#context("test-StoxAnalysisFunctions: RunRecaModels") -results <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData) -expect_true("Age" %in% names(results$CatchAtAge)) -expect_true(RstoxFDA::is.RecaCatchAtAge(results)) - -#context("test-StoxAnalysisFunctions: RunRecaModels with GroupingVariables") -results <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData, GroupingVariables = c("Area", "Usage")) -expect_equal(length(unique(paste(results$CatchAtAge$Area, results$CatchAtAge$Usage))), length(unique(paste(StoxLandingData$Landing$Area, StoxLandingData$Landing$Usage)))) -expect_true(RstoxFDA::is.RecaCatchAtAge(results)) -expect_warning(RstoxFDA:::RunRecaModels(paramOut, StoxLandingData, GroupingVariables = c("Area", "Usage"), CollapseLength = F), "StoX: Producing estimates for all length groups in combination with age and several 'GroupingVariables'. This may exhaust memory, consider the option 'CollapseLength'") -RstoxFDA:::removeTempDirReca(fpath) - -#context("test-StoxAnalysisFunctions: RunRecaModels with random effects in landings") - -StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") -StoxLandingData <- readRDS(StoxLandingFile) - -StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") -StoxBioticData <- readRDS(StoxBioticFile) -StoxBioticData$Haul$Gear <- StoxLandingData$Landing$Gear[sample.int(20,45, replace=T)] - -fpath <- RstoxFDA:::makeTempDirReca() - -prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("Gear")) - -#check that seed works -paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 50, 50, 1, fpath, Seed=111) -seed <- sample.int(.Machine$integer.max, 1) -results <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData) -paramOut2 <- RstoxFDA:::ParameterizeRecaModels(prep, 50, 50, 1, fpath, Seed=paramOut$GlobalParameters$GlobalParameters$seed) -results2 <- RstoxFDA:::RunRecaModels(paramOut2, StoxLandingData) -paramOut3 <- RstoxFDA:::ParameterizeRecaModels(prep, 50, 50, 1, fpath, Seed=paramOut$GlobalParameters$GlobalParameters$seed+1) -results3 <- RstoxFDA:::RunRecaModels(paramOut3, StoxLandingData) -expect_equal(results2, results) -expect_true(!all(results$CatchAtAge == results3$CatchAtAge)) -#/ seed - -paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 50, 50, 1, fpath, Seed=100) -results <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData) - -expect_true("Gear" %in% names(paramOut$Landings$AgeLengthCov)) -expect_true("Age" %in% names(results$CatchAtAge)) -expect_true(RstoxFDA::is.RecaCatchAtAge(results)) - -paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=44) -results <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData, GroupingVariables = "Gear") -expect_true("Gear" %in% names(paramOut$Landings$AgeLengthCov)) -expect_true("Age" %in% names(results$CatchAtAge)) -expect_true(RstoxFDA::is.RecaCatchAtAge(results)) - -#context("RunRecaModels: Test collapse Length") -resultsWlength <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData, GroupingVariables = "Gear", CollapseLength = F) -expect_true("Age" %in% names(resultsWlength$CatchAtAge)) -expect_true(RstoxFDA::is.RecaCatchAtAge(resultsWlength)) -expect_equal(nrow(results$CatchAtAge)*2*results$CatchAtAge$Length[1], nrow(resultsWlength$CatchAtAge)) -expect_equal(length(unique(results$CatchAtAge$Length)),1) -expect_true(length(unique(resultsWlength$CatchAtAge$Length))> 1) - -#context("RunRecaModels: Test collapse Length wo Aggregation") -results <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData) -expect_true("Age" %in% names(results$CatchAtAge)) -expect_true(RstoxFDA::is.RecaCatchAtAge(results)) - -resultsWlength <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData, CollapseLength = F) -expect_true("Age" %in% names(resultsWlength$CatchAtAge)) -expect_true(RstoxFDA::is.RecaCatchAtAge(resultsWlength)) -expect_equal(nrow(results$CatchAtAge)*2*results$CatchAtAge$Length[1], nrow(resultsWlength$CatchAtAge)) -expect_equal(length(unique(results$CatchAtAge$Length)),1) -expect_true(length(unique(resultsWlength$CatchAtAge$Length))> 1) - - -#context("test-StoxAnalysisFunctions: PrepareRecaEstimate missing arguments") -expect_error(RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, ResultDirectory = NULL), "Argument 'ResultDirectory' must be provided.") - -RstoxFDA:::removeTempDirReca(fpath) - -#context("test-StoxAnalysisFunctions: PrepareRecaEstimate simple case") -StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") -StoxBioticData <- readRDS(StoxBioticFile) - -StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") -StoxLandingData <- readRDS(StoxLandingFile) - -prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c()) -expect_equal(length(prep$CovariateMaps$CovariateMaps_randomEffects_AgeLength_catchSample$values), length(unique(StoxBioticData$Individual$HaulKey))) - -prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c(), MinAge=1, MaxAge=30) - -#context("test-StoxAnalysisFunctions: RunRecaEstimate simple case") -expect_warning(result <- RstoxFDA::RunRecaEstimate(prep, 10, 50, Thin=1, Seed = 42)) -expect_true(all(c("input", "fit", "prediction", "covariateMaps") %in% names(result))) -expect_equal(dim(result$prediction$TotalCount)[3], 10) - - -#context("test-StoxAnalysisFunctions: PrepareRecaEstimate, missing sample dates") -StoxBioticData$Station$DateTime[1] <- NA -expect_error(suppressWarnings(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c()))) - -#context("test-StoxAnalysisFunctions: PrepareRecaEstimate, stratified samples (nFish), missing CatchFractionNumber") -StoxBioticDataDelp <- readRDS(system.file("testresources","StoxBioticDelpr.rds", package="RstoxFDA")) -expect_error(suppressWarnings(RstoxFDA:::PrepareRecaEstimate(StoxBioticDataDelp, StoxLandingData, FixedEffects = c(), RandomEffects = c()))) - -#context("test-StoxAnalysisFunctions: PrepareRecaEstimate, stratified samples (nFish)") -StoxBioticDataDelp$Sample$CatchFractionNumber[2] <- 3000 -prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticDataDelp, StoxLandingData, FixedEffects = c(), RandomEffects = c()) - -##context("test-StoxAnalysisFunctions: RunRecaEstimate, stratified samples (nFish)") -#to few iterations to converge consistently. removing test -#est <- RunRecaEstimate(prep, 10, 200, 0) - -#context("test-StoxAnalysisFunctions: PrepareRecaEstimate with with random effect Area") -StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") -StoxBioticData <- readRDS(StoxBioticFile) - -StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") -StoxLandingData <- readRDS(StoxLandingFile) - -StoxBioticData$Station$Area <- c(rep(StoxLandingData$Landing$Area[10], 20), rep(StoxLandingData$Landing$Area[20], 25)) -StoxBioticData$Station$GG <- c(rep(StoxLandingData$Landing$Gear[10], 20), rep(StoxLandingData$Landing$Gear[20], 25)) -StoxLandingData$Landing$GG <- StoxLandingData$Landing$Gear - -prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("Area")) -expect_true("Area" %in% names(prep$Landings$AgeLengthCov)) - -#context("test-StoxAnalysisFunctions: PrepareRecaEstimate cellEffect") -prepCell <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("Area", "GG"), CellEffect = "All") -expect_equal(prepCell$AgeLength$info$interaction[prepCell$AgeLength$info$covariate=="Area"], 1) -expect_equal(prepCell$AgeLength$info$interaction[prepCell$AgeLength$info$covariate=="GG"], 1) - -fpath <- RstoxFDA:::makeTempDirReca() -paramOut <- RstoxFDA:::ParameterizeRecaModels(prepCell, 10, 50, 1, fpath, Seed = 451) -expect_true("cell" %in% names(paramOut$FitProportionAtAge)) - -RstoxFDA:::removeTempDirReca(fpath) - -#context("test-StoxAnalysisFunctions: RunRecaEstimate with random effect Area") -expect_warning(est <- RstoxFDA::RunRecaEstimate(prep, 10, 100, 0, Seed = 112)) -expect_true("Area" %in% names(est$fit$ProportionAtAge$Intercept$cov)) +# Tests StoX analysis functions that interfaces Reca. +# ECA tests are not run for platforms where Reca is not available from StoX repositories. + +env<-Sys.getenv() +if (!("_R_CHECK_FORCE_SUGGESTS_" %in% names(env)) || as.logical(env[["_R_CHECK_FORCE_SUGGESTS_"]])){ + #context("Test ParameterizeRecaModels cache") + StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") + StoxBioticData <- readRDS(StoxBioticFile) + + StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") + StoxLandingData <- readRDS(StoxLandingFile) + + StoxBioticDataWDupl <- StoxBioticData + StoxBioticDataWDupl$Station <- rbind(StoxBioticDataWDupl$Station, StoxBioticDataWDupl$Station) + expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticDataWDupl, StoxLandingData, FixedEffects = c(), RandomEffects = c()), "Malformed StoxBioticData.") + + prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c()) + + #test wrong groupingvariables + prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c()) + fpath <- RstoxFDA:::makeTempDirReca() + paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=99) + expect_error(pred <- RstoxFDA::RunRecaModels(paramOut, StoxLandingData,GroupingVariables = c("")), "All 'GroupingVariables' must be column in 'StoxLandingData', the following are not: ") + RstoxFDA:::removeTempDirReca(fpath) + + #test non-linear setting + prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c()) + fpath <- RstoxFDA:::makeTempDirReca() + paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=99, Lgamodel = "log-linear") + paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=99, Lgamodel = "non-linear") + prednl <- RstoxFDA::RunRecaModels(paramOut, StoxLandingData) + RstoxFDA:::removeTempDirReca(fpath) + + fpath <- RstoxFDA:::makeTempDirReca() + # check that seed works + paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=99) + paramOut2 <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed = paramOut$GlobalParameters$GlobalParameters$seed) + paramOut3 <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed = paramOut$GlobalParameters$GlobalParameters$seed+1) + + expect_equal(paramOut2$FitProportionAtAge, paramOut$FitProportionAtAge) + expect_equal(paramOut2$FitLengthGivenAge, paramOut$FitLengthGivenAge) + expect_equal(paramOut2$FitWeightGivenLength, paramOut$FitWeightGivenLength) + expect_true(!all(paramOut$FitWeightGivenLength$fish$tau_Intercept == paramOut3$FitWeightGivenLength$fish$tau_Intercept)) + + + # check that cache works + paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=155) + expect_warning(paramOut2 <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=155, UseCache=T), "Using cached data for ParameterizeRecaModels") + expect_true(identical(paramOut, paramOut2)) + # check that cache fails when arguments are changed + expect_error(RstoxFDA:::ParameterizeRecaModels(prep, 10, 51, 1, fpath, Seed=155, UseCache=T), "Arguments or data are not identical to cached run. Re-run with UseCache=FALSE.") + # check that cache fails when data are changed + prep2 <- prep + prep2$AgeLength$DataMatrix$lengthCM[1]<-5 + expect_error(RstoxFDA:::ParameterizeRecaModels(prep2, 10, 50, 1, fpath, Seed=155, UseCache=T), "Arguments or data are not identical to cached run. Re-run with UseCache=FALSE.") + # check that failed runs didnt touch cache files + expect_warning(paramOut3 <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=155, UseCache=T), "Using cached data for ParameterizeRecaModels") + expect_true(identical(paramOut, paramOut2)) + # check that new run overwrites cahce files + paramOut4 <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 52, 1, fpath, Seed=156) + expect_warning(paramOut5 <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 52, 1, fpath, Seed=156, UseCache=T), "Using cached data for ParameterizeRecaModels") + expect_true(identical(paramOut4, paramOut5)) + RstoxFDA:::removeTempDirReca(fpath) + + fpath <- RstoxFDA:::makeTempDirReca() + # check that halts with error when no cache is found + expect_error(RstoxFDA:::ParameterizeRecaModels(prep, 10, 52, 1, fpath, Seed=156, UseCache=T), "No cached input found. Re-run with UseCache=FALSE.") + RstoxFDA:::removeTempDirReca(fpath) + + #context("PrepRecaEstimate: Missing values warnings") + StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") + StoxBioticData <- readRDS(StoxBioticFile) + StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") + StoxLandingData <- readRDS(StoxLandingFile) + + StoxBioticData$Cruise$Cruise[1] <- NA + expect_warning(expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("Cruise")))) + StoxBioticData <- readRDS(StoxBioticFile) + StoxBioticData$Station$CatchPlatform[1] <- NA + expect_warning(expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("CatchPlatform")))) + StoxBioticData <- readRDS(StoxBioticFile) + StoxBioticData$Haul$Gear[1] <- NA + expect_warning(expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c("Gear"), RandomEffects = c()))) + StoxBioticData <- readRDS(StoxBioticFile) + StoxBioticData$Sample$CatchFractionNumber[1] <- NA + expect_warning(expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c("CatchFractionNumber"), RandomEffects = c()))) + StoxBioticData <- readRDS(StoxBioticFile) + StoxBioticData$Individual$IndividualTotalLength[1] <- NA + expect_warning(expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c()))) + StoxBioticData <- readRDS(StoxBioticFile) + StoxBioticData$Station$DateTime[1] <- NA + expect_warning(expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c()),"Cannot proceed with missing values for Reca-effects")) + + #context("PrepRecaEstimate: Missing cell warnings") + StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") + StoxBioticData <- readRDS(StoxBioticFile) + StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") + StoxLandingData <- readRDS(StoxLandingFile) + StoxBioticData$Station$Area <- NA + StoxBioticData$Station$Area <- c(StoxLandingData$Landing$Area[1:20], StoxLandingData$Landing$Area[1:20], StoxLandingData$Landing$Area[1:5]) + expect_warning(expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c("Gear", "Area"), RandomEffects = c()))) + + #context("PrepRecaEstimate: StockSplitting") + manual <- RstoxFDA:::DefineStockSplittingParameters(DefinitionMethod = "FunctionParameters", + StockNameCC="S1", StockNameS="S2", ProbabilityType1As1=.8, + ProbabilityType1As5=.2, ProbabilityType2As2=.6, + ProbabilityType2As4=.4, ProbabilityType4As2=.4, + ProbabilityType4As4=.6, ProbabilityType5As1=.2, + ProbabilityType5As5=.8) + StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") + StoxBioticData <- readRDS(StoxBioticFile) + StoxBioticData$Individual$otolithtype <- c(rep(c(1,5), 1045), c(1,5,1)) + + StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") + StoxLandingData <- readRDS(StoxLandingFile) + + prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c(), UseStockSplitting=T, UseStockSplittingError=T, StockSplittingParameters=manual) + expect_true(prep$GlobalParameters$GlobalParameters$CC) + expect_true(prep$GlobalParameters$GlobalParameters$CCerror) + expect_true(RstoxFDA:::is.StockSplittingParameters(prep$AgeLength$StockSplittingParameters)) + expect_true(is.null(prep$AgeLength$CCerrorList)) + fpath <- RstoxFDA:::makeTempDirReca() + #make sure it works with trailing "/" on path + pathWtrailing <- paste0(fpath, "/") + param <- RstoxFDA:::ParameterizeRecaModels(prep, 100, 400, ResultDirectory = pathWtrailing, Seed = 100) + + pathWsubDir <- file.path(fpath, "subdir") + param <- RstoxFDA:::ParameterizeRecaModels(prep, 100, 400, ResultDirectory = pathWsubDir, Seed = 100) + + #context("check that age group names are set correct for stock splitting") + expect_equal(sum(is.na(param$FitProportionAtAge$constant$Age)), 0) + expect_equal(param$FitProportionAtAge$constant$Age[1], "S1 2") + + + #context("Check that back conversion to eca objects works fine with stock splitting") + ecafit <- RstoxFDA:::stox2recaFit(param) + + result <- RstoxFDA:::RunRecaModels(param, StoxLandingData = StoxLandingData) + + expect_true("Stock" %in% result$GroupingVariables$GroupingVariables) + resultAgg <- RstoxFDA:::RunRecaModels(param, StoxLandingData = StoxLandingData, GroupingVariables = c("Gear")) + expect_true(all(c("Stock", "Gear") %in% resultAgg$GroupingVariables$GroupingVariables)) + + RstoxFDA:::removeTempDirReca(fpath) + expect_true(RstoxFDA:::is.StockSplittingParameters(param$AgeLength$StockSplittingParameters)) + expect_true(is.null(prep$AgeLength$CCerrorList)) + expect_equal(param$AgeLength$StockSplittingParameters, manual) + expect_true(RstoxFDA::is.RecaCatchAtAge(result)) + expect_true("Stock" %in% names(result$CatchAtAge)) + expect_true("Stock" %in% names(result$MeanLength)) + expect_true("Stock" %in% names(result$MeanWeight)) + + #stock splitting w warning + StoxBioticData$Individual$otolithtype[1] <- 9 + expect_warning(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c(), UseStockSplitting=T, UseStockSplittingError=T, StockSplittingParameters=manual), "StoX: Some aged fish does not have Otolithtype set, or have it set to an unrecognized value. This may slow down Stox processing of Reca results.") + StoxBioticData$Individual$IndividualAge[1] <- NA + expect_silent(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c(), UseStockSplitting=T, UseStockSplittingError=T, StockSplittingParameters=manual)) + + #context("PrepRecaEstimate: AgerrorMatrix") + ageerorfile <- system.file("testresources","AgeErrorHirstEtAl2012.txt", package="RstoxFDA") + ageerror <- RstoxFDA::DefineAgeErrorMatrix(FileName = ageerorfile) + expect_true(RstoxFDA::is.AgeErrorMatrix(ageerror)) + StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") + StoxBioticData <- readRDS(StoxBioticFile) + + StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") + StoxLandingData <- readRDS(StoxLandingFile) + + prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c(), UseAgingError = T, AgeErrorMatrix = ageerror, MinAge = 0, MaxAge = 14) + expect_true(!is.null(prep$AgeLength$AgeErrorMatrix)) + expect_warning(est <- RstoxFDA::RunRecaEstimate(prep, 10, 50, Seed = 99)) + + #context("PrepareRecaEstimate: configuration tests") + StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") + StoxBioticData <- readRDS(StoxBioticFile) + + StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") + StoxLandingData <- readRDS(StoxLandingFile) + + StoxLandingData$Landing$NewConst <- 1 + StoxBioticData$Station$NewConst <- 1 + + + expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c("NewConst"), RandomEffects = c()), "Only one level for categorical covariate NewConst") + expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("NewConst")), "Only one level for categorical covariate NewConst") + + StoxBioticData$Station$Area <- StoxLandingData$Landing$Area[c(7,8,13,4,3,4,11,20,4,5,6,20,4,12,3,3,10,4,1,20,11,5,11,5,15,8,14,7,10,6,13,16,11,14,19,20,2,19,11,16,15,5,11,11,9)] + expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c("Area"), RandomEffects = c("Area")), "Some random effects are also specified as fixed effects: Area") + expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("Area"), CarEffect = "Area"), "UseCarEffect is False, while the parameter 'CarEffect' is given") + expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("Area"), CarEffect = "Area", UseCarEffect = T, CarNeighbours = list()), "The CAR effect Area is also specified as fixed effect or random effect") + + #check CAR value cehcks + carfile <- system.file("testresources","mainarea_neighbour_correct_codes.txt", package="RstoxFDA") + car <- RstoxFDA::DefineCarNeighbours(NULL, FileName = carfile) + expect_true(RstoxFDA::is.CarNeighbours(car)) + car$Neighbours[9] <- paste(car$Neighbours[9], "30", sep=",") + car$Neighbours[29] <- paste(car$Neighbours[29], "08", sep=",") + prepCar <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c(), CarEffect = "Area", UseCarEffect = T, CarNeighbours = car) + + fpath <- RstoxFDA:::makeTempDirReca() + paramOut <- RstoxFDA:::ParameterizeRecaModels(prepCar, 10, 50, 1, fpath, Seed=42) + result <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData) + RstoxFDA:::removeTempDirReca(fpath) + expect_true(RstoxFDA::is.RecaParameterData(paramOut)) + expect_true(RstoxFDA::is.RecaCatchAtAge(result)) + + + #context("ParameterizeRecaModels: simple case") + StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") + StoxBioticData <- readRDS(StoxBioticFile) + + StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") + StoxLandingData <- readRDS(StoxLandingFile) + + prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c()) + + fpath <- RstoxFDA:::makeTempDirReca() + paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=43) + + expect_true(c("FitLengthGivenAge") %in% names(paramOut)) + expect_equal(length(paramOut$FitLengthGivenAge), 4) + expect_true(RstoxFDA::is.RecaParameterData((paramOut))) + + #context("test-StoxAnalysisFunctions: RunRecaModels") + results <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData) + expect_true("Age" %in% names(results$CatchAtAge)) + expect_true(RstoxFDA::is.RecaCatchAtAge(results)) + + #context("test-StoxAnalysisFunctions: RunRecaModels with GroupingVariables") + results <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData, GroupingVariables = c("Area", "Usage")) + expect_equal(length(unique(paste(results$CatchAtAge$Area, results$CatchAtAge$Usage))), length(unique(paste(StoxLandingData$Landing$Area, StoxLandingData$Landing$Usage)))) + expect_true(RstoxFDA::is.RecaCatchAtAge(results)) + expect_warning(RstoxFDA:::RunRecaModels(paramOut, StoxLandingData, GroupingVariables = c("Area", "Usage"), CollapseLength = F), "StoX: Producing estimates for all length groups in combination with age and several 'GroupingVariables'. This may exhaust memory, consider the option 'CollapseLength'") + RstoxFDA:::removeTempDirReca(fpath) + + #context("test-StoxAnalysisFunctions: RunRecaModels with random effects in landings") + + StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") + StoxLandingData <- readRDS(StoxLandingFile) + + StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") + StoxBioticData <- readRDS(StoxBioticFile) + StoxBioticData$Haul$Gear <- StoxLandingData$Landing$Gear[sample.int(20,45, replace=T)] + + fpath <- RstoxFDA:::makeTempDirReca() + + prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("Gear")) + + #check that seed works + paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 50, 50, 1, fpath, Seed=111) + seed <- sample.int(.Machine$integer.max, 1) + results <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData) + paramOut2 <- RstoxFDA:::ParameterizeRecaModels(prep, 50, 50, 1, fpath, Seed=paramOut$GlobalParameters$GlobalParameters$seed) + results2 <- RstoxFDA:::RunRecaModels(paramOut2, StoxLandingData) + paramOut3 <- RstoxFDA:::ParameterizeRecaModels(prep, 50, 50, 1, fpath, Seed=paramOut$GlobalParameters$GlobalParameters$seed+1) + results3 <- RstoxFDA:::RunRecaModels(paramOut3, StoxLandingData) + expect_equal(results2, results) + expect_true(!all(results$CatchAtAge == results3$CatchAtAge)) + #/ seed + + paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 50, 50, 1, fpath, Seed=100) + results <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData) + + expect_true("Gear" %in% names(paramOut$Landings$AgeLengthCov)) + expect_true("Age" %in% names(results$CatchAtAge)) + expect_true(RstoxFDA::is.RecaCatchAtAge(results)) + + paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=44) + results <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData, GroupingVariables = "Gear") + expect_true("Gear" %in% names(paramOut$Landings$AgeLengthCov)) + expect_true("Age" %in% names(results$CatchAtAge)) + expect_true(RstoxFDA::is.RecaCatchAtAge(results)) + + #context("RunRecaModels: Test collapse Length") + resultsWlength <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData, GroupingVariables = "Gear", CollapseLength = F) + expect_true("Age" %in% names(resultsWlength$CatchAtAge)) + expect_true(RstoxFDA::is.RecaCatchAtAge(resultsWlength)) + expect_equal(nrow(results$CatchAtAge)*2*results$CatchAtAge$Length[1], nrow(resultsWlength$CatchAtAge)) + expect_equal(length(unique(results$CatchAtAge$Length)),1) + expect_true(length(unique(resultsWlength$CatchAtAge$Length))> 1) + + #context("RunRecaModels: Test collapse Length wo Aggregation") + results <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData) + expect_true("Age" %in% names(results$CatchAtAge)) + expect_true(RstoxFDA::is.RecaCatchAtAge(results)) + + resultsWlength <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData, CollapseLength = F) + expect_true("Age" %in% names(resultsWlength$CatchAtAge)) + expect_true(RstoxFDA::is.RecaCatchAtAge(resultsWlength)) + expect_equal(nrow(results$CatchAtAge)*2*results$CatchAtAge$Length[1], nrow(resultsWlength$CatchAtAge)) + expect_equal(length(unique(results$CatchAtAge$Length)),1) + expect_true(length(unique(resultsWlength$CatchAtAge$Length))> 1) + + + #context("test-StoxAnalysisFunctions: PrepareRecaEstimate missing arguments") + expect_error(RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, ResultDirectory = NULL), "Argument 'ResultDirectory' must be provided.") + + RstoxFDA:::removeTempDirReca(fpath) + + #context("test-StoxAnalysisFunctions: PrepareRecaEstimate simple case") + StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") + StoxBioticData <- readRDS(StoxBioticFile) + + StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") + StoxLandingData <- readRDS(StoxLandingFile) + + prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c()) + expect_equal(length(prep$CovariateMaps$CovariateMaps_randomEffects_AgeLength_catchSample$values), length(unique(StoxBioticData$Individual$HaulKey))) + + prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c(), MinAge=1, MaxAge=30) + + #context("test-StoxAnalysisFunctions: RunRecaEstimate simple case") + expect_warning(result <- RstoxFDA::RunRecaEstimate(prep, 10, 50, Thin=1, Seed = 42)) + expect_true(all(c("input", "fit", "prediction", "covariateMaps") %in% names(result))) + expect_equal(dim(result$prediction$TotalCount)[3], 10) + + + #context("test-StoxAnalysisFunctions: PrepareRecaEstimate, missing sample dates") + StoxBioticData$Station$DateTime[1] <- NA + expect_error(suppressWarnings(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c()))) + + #context("test-StoxAnalysisFunctions: PrepareRecaEstimate, stratified samples (nFish), missing CatchFractionNumber") + StoxBioticDataDelp <- readRDS(system.file("testresources","StoxBioticDelpr.rds", package="RstoxFDA")) + expect_error(suppressWarnings(RstoxFDA:::PrepareRecaEstimate(StoxBioticDataDelp, StoxLandingData, FixedEffects = c(), RandomEffects = c()))) + + #context("test-StoxAnalysisFunctions: PrepareRecaEstimate, stratified samples (nFish)") + StoxBioticDataDelp$Sample$CatchFractionNumber[2] <- 3000 + prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticDataDelp, StoxLandingData, FixedEffects = c(), RandomEffects = c()) + + ##context("test-StoxAnalysisFunctions: RunRecaEstimate, stratified samples (nFish)") + #to few iterations to converge consistently. removing test + #est <- RunRecaEstimate(prep, 10, 200, 0) + + #context("test-StoxAnalysisFunctions: PrepareRecaEstimate with with random effect Area") + StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") + StoxBioticData <- readRDS(StoxBioticFile) + + StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") + StoxLandingData <- readRDS(StoxLandingFile) + + StoxBioticData$Station$Area <- c(rep(StoxLandingData$Landing$Area[10], 20), rep(StoxLandingData$Landing$Area[20], 25)) + StoxBioticData$Station$GG <- c(rep(StoxLandingData$Landing$Gear[10], 20), rep(StoxLandingData$Landing$Gear[20], 25)) + StoxLandingData$Landing$GG <- StoxLandingData$Landing$Gear + + prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("Area")) + expect_true("Area" %in% names(prep$Landings$AgeLengthCov)) + + #context("test-StoxAnalysisFunctions: PrepareRecaEstimate cellEffect") + prepCell <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("Area", "GG"), CellEffect = "All") + expect_equal(prepCell$AgeLength$info$interaction[prepCell$AgeLength$info$covariate=="Area"], 1) + expect_equal(prepCell$AgeLength$info$interaction[prepCell$AgeLength$info$covariate=="GG"], 1) + + fpath <- RstoxFDA:::makeTempDirReca() + paramOut <- RstoxFDA:::ParameterizeRecaModels(prepCell, 10, 50, 1, fpath, Seed = 451) + expect_true("cell" %in% names(paramOut$FitProportionAtAge)) + + RstoxFDA:::removeTempDirReca(fpath) + + #context("test-StoxAnalysisFunctions: RunRecaEstimate with random effect Area") + expect_warning(est <- RstoxFDA::RunRecaEstimate(prep, 10, 100, 0, Seed = 112)) + expect_true("Area" %in% names(est$fit$ProportionAtAge$Intercept$cov)) +} diff --git a/inst/tinytest/test-StoxReportFunctions.R b/inst/tinytest/test-StoxReportFunctions.R index c5727f20..5263fb48 100644 --- a/inst/tinytest/test-StoxReportFunctions.R +++ b/inst/tinytest/test-StoxReportFunctions.R @@ -139,7 +139,7 @@ expect_true(sum(gReport$FisheriesSampling$Catches, na.rm=T) == sum(qgReport$Fish SamplingReport <- RstoxFDA::ReportFdaSampling(StoxBioticData, StoxLandingData, GroupingVariables = c("Quarter")) expect_true(abs(sum(StoxBioticData$Sample$CatchFractionWeight, na.rm=T) - sum(SamplingReport$FisheriesSampling$WeightOfSampledCatches)) / sum(SamplingReport$FisheriesSampling$WeightOfSampledCatches) < .01) expect_true(abs(sum(StoxLandingData$Landing$RoundWeight, na.rm=T) - sum(SamplingReport$FisheriesSampling$LandedRoundWeight)) / sum(SamplingReport$FisheriesSampling$LandedRoundWeight) < .01) -expect_true(RstoxFDA::is.ReportFdaSamplingData(SamplingReport)) +expect_true(RstoxFDA:::is.ReportFdaSamplingData(SamplingReport)) expect_true(all(!is.na(SamplingReport$FisheriesSampling$LandedRoundWeight))) expect_equal(RstoxData::getUnit(SamplingReport$FisheriesSampling$WeightOfSampledCatches), "mass-kg") expect_true(is.na(RstoxData::getUnit(SamplingReport$FisheriesSampling$Catches))) @@ -160,7 +160,7 @@ expect_true(all(SamplingReportRounded$FisheriesSampling$WeightOfSampledCatches ! # test one with NAs SamplingReportSV <- RstoxFDA::ReportFdaSampling(StoxBioticData, StoxLandingData, GroupingVariables = c("Quarter"), Unit="kiloton", Decimals = 6, SamplingVariables = c("IndividualSex")) -expect_true(is.ReportFdaSamplingData(SamplingReportSV)) +expect_true(RstoxFDA:::is.ReportFdaSamplingData(SamplingReportSV)) expect_true("SamplingVariables" %in% names(SamplingReportSV)) expect_true("IndividualSex" %in% names(SamplingReportSV$FisheriesSampling)) expect_equal(sum(is.na(SamplingReportSV$FisheriesSampling$IndividualSex)),2) @@ -209,8 +209,8 @@ catchAtAgeDecomp <- readRDS(system.file("testresources", "recaPredictionDecomp.r catchAtAgeReportDecomp <- RstoxFDA::ReportRecaCatchAtAge(catchAtAgeDecomp) catchAtAgeReportFlat <- RstoxFDA::ReportRecaCatchAtAge(catchAtAgeFlat) -expect_true(RstoxFDA::is.ReportFdaData(catchAtAgeReportDecomp)) -expect_true(RstoxFDA::is.ReportFdaData(catchAtAgeReportFlat)) +expect_true(RstoxFDA:::is.ReportFdaData(catchAtAgeReportDecomp)) +expect_true(RstoxFDA:::is.ReportFdaData(catchAtAgeReportFlat)) diff <- sum(catchAtAgeReportFlat$NbyAge$CatchAtAge) - sum(catchAtAgeReportDecomp$NbyAge$CatchAtAge) reldiff <- abs(diff/sum(catchAtAgeReportFlat$NbyAge$CatchAtAge)) @@ -294,7 +294,7 @@ expect_equal(catchAtAgeReportMi$NbyAge$SD[1:3]*1e6, catchAtAgeReportFlatPlusGr$N # Report Mean weight MeanWeightReportDecomp <- RstoxFDA::ReportRecaWeightAtAge(catchAtAgeDecomp, Decimals = 4, Unit = "kg") -expect_true(RstoxFDA::is.ReportFdaData(MeanWeightReportDecomp)) +expect_true(RstoxFDA:::is.ReportFdaData(MeanWeightReportDecomp)) expect_equal(RstoxData::getUnit(MeanWeightReportDecomp$MeanWeightByAge$MeanIndividualWeight), "mass-kg") MeanWeightReportDecimal <- RstoxFDA::ReportRecaWeightAtAge(catchAtAgeDecomp, Decimal=4) @@ -333,7 +333,7 @@ expect_true(all(MeanWeightReportDecompPlusGr$MeanWeightByAge$MeanIndividualWeigh # Report Mean length MeanLengthReportDecomp <- RstoxFDA::ReportRecaLengthAtAge(catchAtAgeDecomp, Unit="cm", Decimals=1) -expect_true(RstoxFDA::is.ReportFdaData(MeanLengthReportDecomp)) +expect_true(RstoxFDA:::is.ReportFdaData(MeanLengthReportDecomp)) expect_true(!all(nchar(as.character(MeanLengthReportDecomp$MeanLengthByAge$MeanIndividualLength[MeanLengthReportDecomp$MeanLengthByAge$MeanIndividualLength>0]))>5)) expect_equal(RstoxData::getUnit(MeanLengthReportDecomp$MeanLengthByAge$MeanIndividualLength), "length-cm") @@ -381,7 +381,7 @@ catchAtAgeReportDecompPlusGr <- RstoxFDA::ReportRecaCatchAtAge(catchAtAgeDecomp, MeanWeightReportDecompPlusGr <- RstoxFDA::ReportRecaWeightAtAge(catchAtAgeDecomp, PlusGroup=5, Decimals = 6, Threshold = 1000) expect_true(sum(is.na(MeanWeightReportDecompPlusGr$MeanWeightByAge$MeanIndividualWeight))>1) sopTabNa <- RstoxFDA::ReportFdaSOP(catchAtAgeReportDecompPlusGr, MeanWeightReportDecompPlusGr, StoxLandingData, GroupingVariables = c("Gear", "Area")) -expect_true(RstoxFDA::is.ReportFdaSOP(sopTabNa)) +expect_true(RstoxFDA:::is.ReportFdaSOP(sopTabNa)) sopTabNa <- sopTabNa$SopReport expect_true(any(sopTabNa$Difference<0)) @@ -391,7 +391,7 @@ catchAtAgeReportDecompPlusGr <- RstoxFDA::ReportRecaCatchAtAge(catchAtAgeDecomp, MeanWeightReportDecompPlusGr <- RstoxFDA::ReportRecaWeightAtAge(catchAtAgeDecomp, PlusGroup=5, Decimals = 6) sopTab <- RstoxFDA::ReportFdaSOP(catchAtAgeReportDecompPlusGr, MeanWeightReportDecompPlusGr, StoxLandingData, GroupingVariables = c("Gear", "Area")) sopTabKi <- RstoxFDA::ReportFdaSOP(catchAtAgeReportDecompPlusGrKi, MeanWeightReportDecompPlusGr, StoxLandingData, GroupingVariables = c("Gear", "Area"), UnitFraction = "%") -expect_true(RstoxFDA::is.ReportFdaSOP(sopTab)) +expect_true(RstoxFDA:::is.ReportFdaSOP(sopTab)) sopTab <- sopTab$SopReport sopTabKi <- sopTabKi$SopReport expect_true(all(abs(sopTab$RelativeDifference) < 0.02)) diff --git a/inst/tinytest/test-ecaOuputputConversion.R b/inst/tinytest/test-ecaOuputputConversion.R index 63a57132..2070a7a3 100644 --- a/inst/tinytest/test-ecaOuputputConversion.R +++ b/inst/tinytest/test-ecaOuputputConversion.R @@ -1,3 +1,8 @@ + +# ECA tests are not run for platforms where Reca is not available from StoX repositories. +env<-Sys.getenv() +if (!("_R_CHECK_FORCE_SUGGESTS_" %in% names(env)) || as.logical(env[["_R_CHECK_FORCE_SUGGESTS_"]])){ + #context("test-StoxAnalysisFunctions: tests RecaResult conversion") ecaResult <- readRDS(system.file("testresources","ecaResult.rds", package="RstoxFDA")) @@ -69,4 +74,5 @@ RecaData <- RstoxFDA::convertRecaData(prep, nSamples = 10, burnin = 50, thin=1, sanitizeRecaInput(RecaData$AgeLength, RecaData$WeightLength, RecaData$Landings, RecaData$GlobalParameters, stage="parameterize") est<-Reca::eca.estimate(RecaData$AgeLength, RecaData$WeightLength, RecaData$Landings, RecaData$GlobalParameters) RstoxFDA:::removeTempDirReca(fpath) +} From 6d534351c3e946f2f166d0d4959b486fd9b4e9b0 Mon Sep 17 00:00:00 2001 From: Edvin Fuglebakk Date: Thu, 11 Jan 2024 16:02:47 +0100 Subject: [PATCH 17/24] Fixed workflow file --- .github/workflows/check-full.yaml | 80 +++++++++++++++++++++---------- 1 file changed, 55 insertions(+), 25 deletions(-) diff --git a/.github/workflows/check-full.yaml b/.github/workflows/check-full.yaml index 6de0fe8a..82178795 100644 --- a/.github/workflows/check-full.yaml +++ b/.github/workflows/check-full.yaml @@ -2,9 +2,13 @@ on: push: branches: - master + - testing + - develop pull_request: branches: - master + - testing + - develop name: R-CMD-check @@ -21,23 +25,23 @@ jobs: config: # for windows and mac all builds are pushed to drat repo on merge to master (except pre-release builds), except 'next' + # 2023-08-23 fails, not getting Reca. Because it is not built yet for macos and next or 4.3. - {os: macOS-latest, r: 'next', pkgext: '.tgz'} #tested without Reca - {os: macOS-latest, r: '4.3', pkgext: '.tgz'} #tested without Reca - {os: macOS-latest, r: '4.2', pkgext: '.tgz'} - {os: macOS-latest, r: '4.1', pkgext: '.tgz'} - {os: macOS-latest, r: '4.0', pkgext: '.tgz'} - # 2023-08-23 fails, not getting Reca. Because it is not built yet for windows and next or 4.3. - {os: windows-latest, r: 'next', pkgext: '.zip'} - {os: windows-latest, r: '4.3', pkgext: '.zip'} - - {os: windows-latest, r: '4.2', pkgext: '.zip'} - {os: windows-latest, r: '4.1', pkgext: '.zip'} - {os: windows-latest, r: '4.0', pkgext: '.zip', rspm: "https://cloud.r-project.org"} - {os: ubuntu-latest, r: 'next', pkgext: '.tar.gz', rspm: "https://cloud.r-project.org"} + # source is pushed to drat repos on merge to for ubunutu when r is 4.2 (source is the same for all R versions), update script if 4.2 is taken out + - {os: ubuntu-22.04, r: '4.3', pkgext: '.tar.gz', rspm: "https://cloud.r-project.org"} - {os: ubuntu-22.04, r: '4.2', pkgext: '.tar.gz', rspm: "https://cloud.r-project.org"} - # 4.1 is pushed to drat repo on merge to master for ubunutu (except pre-release builds), update script if 4.1 is taken out - {os: ubuntu-20.04, r: '4.1', pkgext: '.tar.gz', rspm: "https://cloud.r-project.org"} - {os: ubuntu-20.04, r: '4.0', pkgext: '.tar.gz', rspm: "https://cloud.r-project.org"} @@ -50,9 +54,13 @@ jobs: - uses: actions/checkout@v3 - - name: Do autorelease increment, tagging, and environment setup - if: github.event_name == 'push' && github.ref == 'refs/heads/master' + - name: Prepare for deployment at push + if: github.event_name == 'push' run: | + curl "https://raw.githubusercontent.com/StoXProject/unstableRepo/main/scripts/checktag-actions.sh" --output unstableRepo_checktag.sh + curl "https://raw.githubusercontent.com/StoXProject/unstableRepo/main/scripts/deploy-actions.sh" --output unstableRepo_deploy.sh + chmod +x ./unstableRepo_checktag.sh ./unstableRepo_deploy.sh + . unstableRepo_checktag.sh curl "https://raw.githubusercontent.com/StoXProject/testingRepo/main/scripts/checktag-actions.sh" --output testingRepo_checktag.sh curl "https://raw.githubusercontent.com/StoXProject/testingRepo/main/scripts/deploy-actions.sh" --output testingRepo_deploy.sh chmod +x ./testingRepo_checktag.sh ./testingRepo_deploy.sh @@ -83,6 +91,12 @@ jobs: - uses: r-lib/actions/setup-pandoc@v2 + - name: Anticipate Bintray shutdown + if: runner.os == 'macOS' + run: | + brew update + shell: bash + - name: Query dependencies run: | install.packages('remotes') @@ -184,7 +198,6 @@ jobs: run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "error", check_dir = "check") shell: Rscript {0} - - name: Show testthat output if: always() run: find check -name 'testthat.Rout*' -exec cat '{}' \; || true @@ -197,44 +210,61 @@ jobs: name: ${{ runner.os }}-r${{ matrix.config.r }}-results path: check - - name: Build package source archive - if: runner.os == 'Linux' && matrix.config.r == '4.1' && github.event_name == 'push' && github.ref == 'refs/heads/master' + ####################################################### + #### 4. Build the package source and binary files: #### + ####################################################### + + - name: Build package source archive from branches develop, testing and master + if: runner.os == 'Linux' && matrix.config.r == '4.2' && github.event_name == 'push' && (github.ref_name == 'master' || github.ref_name == 'testing' || github.ref_name == 'develop') run: | pkgbuild::build(".", dest_path = ".", binary = FALSE) system(paste0("PKG_FREL=", Sys.getenv("PKG_FILE"), " >> ", Sys.getenv("GITHUB_ENV"))) shell: Rscript {0} - - name: Build package binary archive - if: (runner.os == 'Windows' || runner.os == 'macOS') && github.event_name == 'push' && github.ref == 'refs/heads/master' + - name: Build package binary archive from branches develop, testing and master for Windows and macOS (this builds also on matrix.config.r = release, which can or can not be a duplicate) + if: (runner.os == 'Windows' || runner.os == 'macOS') && github.event_name == 'push' && (github.ref_name == 'master' || github.ref_name == 'testing' || github.ref_name == 'develop') run: | pkgbuild::build(".", dest_path = ".", binary = TRUE) + print("PKG_FILE") + print(Sys.getenv("PKG_FILE")) + print("PKG_FREL") + print(Sys.getenv("PKG_FREL")) file.copy(Sys.getenv("PKG_FILE"), Sys.getenv("PKG_FREL")) shell: Rscript {0} - # Upload to drat repo only for one version (4.1) on Linux, and only for all the specifically given R versions for macOS and Windows (not next). Next is included to detect errors immediatelly when the new R is patched: - - name: Upload to DRAT repo - if: ( (runner.os == 'Windows' && matrix.config.r != 'next') || (runner.os == 'macOS' && matrix.config.r != 'next') || (runner.os == 'Linux' && matrix.config.r == '4.1') ) && github.event_name == 'push' && github.ref == 'refs/heads/master' + + + ###################################### + #### 5. Deploy to the drat repos: #### + ###################################### + + # Upload to drat repo only for one version (4.2) on Linux, and only for all the specifically given R versions for macOS and Windows (not next). Next is included to detect errors immediatelly when the new R is patched: + + ## 5a. Upload to the official (DRAT) repo, but only from the master branch. + - name: Upload to the official (DRAT) repo + if: ( (runner.os == 'Windows' && matrix.config.r != 'next') || (runner.os == 'macOS' && matrix.config.r != 'next') || (runner.os == 'Linux' && matrix.config.r == '4.2') ) && github.event_name == 'push' && github.ref_name == 'master' env: BUILD_NUMBER: ${{ github.sha }} DRAT_DEPLOY_TOKEN: ${{ secrets.DRAT_DEPLOY_TOKEN }} run: . repo_deploy.sh shell: bash - - name: Upload to DRAT testingRepo - if: ( (runner.os == 'Windows' && matrix.config.r != 'next') || (runner.os == 'macOS' && matrix.config.r != 'next') || (runner.os == 'Linux' && matrix.config.r == '4.1') ) && github.event_name == 'push' && github.ref == 'refs/heads/master' + + ## 5b. Upload to the (DRAT) testingRepo, but only from the testing branch. + - name: Upload to the testing (DRAT) repo + if: ( (runner.os == 'Windows' && matrix.config.r != 'next') || (runner.os == 'macOS' && matrix.config.r != 'next') || (runner.os == 'Linux' && matrix.config.r == '4.2') ) && github.event_name == 'push' && github.ref_name == 'testing' env: BUILD_NUMBER: ${{ github.sha }} DRAT_DEPLOY_TOKEN: ${{ secrets.DRAT_DEPLOY_TOKEN }} run: . testingRepo_deploy.sh shell: bash - - name: Create release and upload files - if: ( (runner.os == 'Windows' && matrix.config.r != 'next') || (runner.os == 'macOS' && matrix.config.r != 'next') || (runner.os == 'Linux' && matrix.config.r == '4.1') ) && github.event_name == 'push' && github.ref == 'refs/heads/master' - uses: ncipollo/release-action@v1 - with: - allowUpdates: true - commit: master - tag: ${{ env.FINAL_TAG }} - name: ${{ env.FINAL_TAG }} - artifacts: ${{ env.PKG_REPO }}/${{ env.PKG_FREL }} - token: ${{ secrets.GITHUB_TOKEN }} + + ## 5c. Upload to the (DRAT) unstableRepo, but only from the develop branch. + - name: Upload to the unstable (DRAT) repo + if: ( (runner.os == 'Windows' && matrix.config.r != 'next') || (runner.os == 'macOS' && matrix.config.r != 'next') || (runner.os == 'Linux' && matrix.config.r == '4.2') ) && github.event_name == 'push' && github.ref_name == 'develop' + env: + BUILD_NUMBER: ${{ github.sha }} + DRAT_DEPLOY_TOKEN: ${{ secrets.DRAT_DEPLOY_TOKEN }} + run: . unstableRepo_deploy.sh + shell: bash \ No newline at end of file From 574d3cec6b2e7410fe34647d6a9fb99f8ebb5aa3 Mon Sep 17 00:00:00 2001 From: Edvin Fuglebakk Date: Thu, 11 Jan 2024 21:31:15 +0100 Subject: [PATCH 18/24] changed Reca dependent tests to run only when reca is installed. --- inst/tinytest/test-RecaFormatChecks.R | 5 +++-- inst/tinytest/test-RecaWrap.R | 5 +++-- inst/tinytest/test-StoxAnalysisFunctions.R | 5 ++--- inst/tinytest/test-ecaOuputputConversion.R | 6 +++--- tests/testAgainstStoxPrep.R | 6 ++++++ 5 files changed, 17 insertions(+), 10 deletions(-) diff --git a/inst/tinytest/test-RecaFormatChecks.R b/inst/tinytest/test-RecaFormatChecks.R index d21dcb6f..b0f06349 100644 --- a/inst/tinytest/test-RecaFormatChecks.R +++ b/inst/tinytest/test-RecaFormatChecks.R @@ -1,6 +1,7 @@ # ECA tests are not run for platforms where Reca is not available from StoX repositories. -env<-Sys.getenv() -if (!("_R_CHECK_FORCE_SUGGESTS_" %in% names(env)) || as.logical(env[["_R_CHECK_FORCE_SUGGESTS_"]])){ +# ECA tests are only run if Reca is installed. + +if (length(system.file(package="Reca"))>0){ StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") StoxLandingData <- readRDS(StoxLandingFile) diff --git a/inst/tinytest/test-RecaWrap.R b/inst/tinytest/test-RecaWrap.R index 1a60d039..2c486a99 100644 --- a/inst/tinytest/test-RecaWrap.R +++ b/inst/tinytest/test-RecaWrap.R @@ -1,6 +1,7 @@ # ECA tests are not run for platforms where Reca is not available from StoX repositories. -env<-Sys.getenv() -if (!("_R_CHECK_FORCE_SUGGESTS_" %in% names(env)) || as.logical(env[["_R_CHECK_FORCE_SUGGESTS_"]])){ +# ECA tests are only run if Reca is installed. + +if (length(system.file(package="Reca"))>0){ fishdata <- data.table::as.data.table(readRDS(system.file(package = "RstoxFDA", "testresources", "fishdata.rda"))) landings <- data.table::as.data.table(readRDS(system.file(package = "RstoxFDA", "testresources", "landings.rda"))) diff --git a/inst/tinytest/test-StoxAnalysisFunctions.R b/inst/tinytest/test-StoxAnalysisFunctions.R index 241a9979..0dc4c1ee 100644 --- a/inst/tinytest/test-StoxAnalysisFunctions.R +++ b/inst/tinytest/test-StoxAnalysisFunctions.R @@ -1,8 +1,7 @@ # Tests StoX analysis functions that interfaces Reca. -# ECA tests are not run for platforms where Reca is not available from StoX repositories. +# ECA tests are only run if Reca is installed. -env<-Sys.getenv() -if (!("_R_CHECK_FORCE_SUGGESTS_" %in% names(env)) || as.logical(env[["_R_CHECK_FORCE_SUGGESTS_"]])){ +if (length(system.file(package="Reca"))>0){ #context("Test ParameterizeRecaModels cache") StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") StoxBioticData <- readRDS(StoxBioticFile) diff --git a/inst/tinytest/test-ecaOuputputConversion.R b/inst/tinytest/test-ecaOuputputConversion.R index 2070a7a3..95aa9eba 100644 --- a/inst/tinytest/test-ecaOuputputConversion.R +++ b/inst/tinytest/test-ecaOuputputConversion.R @@ -1,7 +1,7 @@ -# ECA tests are not run for platforms where Reca is not available from StoX repositories. -env<-Sys.getenv() -if (!("_R_CHECK_FORCE_SUGGESTS_" %in% names(env)) || as.logical(env[["_R_CHECK_FORCE_SUGGESTS_"]])){ +# ECA tests are only run if Reca is installed. + +if (length(system.file(package="Reca"))>0){ #context("test-StoxAnalysisFunctions: tests RecaResult conversion") ecaResult <- readRDS(system.file("testresources","ecaResult.rds", package="RstoxFDA")) diff --git a/tests/testAgainstStoxPrep.R b/tests/testAgainstStoxPrep.R index d1e8febb..198916a7 100644 --- a/tests/testAgainstStoxPrep.R +++ b/tests/testAgainstStoxPrep.R @@ -5,6 +5,11 @@ library(data.table) # Prepare data based on StoXexport in "old" prepECA and compare results # +# ECA tests are only run if Reca is installed. + +if (length(system.file(package="Reca"))>0){ + + stoxRobj <- readRDS(system.file(package = "RstoxFDA", "testresources", "oldstoxprepreca")) samples <- data.table(catchId = as.character(stoxRobj$StoxExport$biotic$serialnumber), @@ -38,3 +43,4 @@ tabStox <- RstoxFDA::makeResultTableRECA(stoxRecaResults$prediction) #RstoxFDA::plotCatchAtAge(prepRecaResults$prediction, title="RecaPrep results") #RstoxFDA::plotCatchAtAge(stoxRecaResults$prediction, title="StoxPrep results") +} \ No newline at end of file From a48c68e860be1b33ba25cea54fadfcc36d6df9cd Mon Sep 17 00:00:00 2001 From: Edvin Fuglebakk Date: Thu, 11 Jan 2024 22:23:42 +0100 Subject: [PATCH 19/24] Fixed suggests dependent tests --- .github/workflows/check-full.yaml | 4 ++-- R/RecaWrap.R | 6 ++++++ inst/tinytest/test-RecaFormatChecks.R | 2 +- inst/tinytest/test-RecaWrap.R | 2 +- inst/tinytest/test-StoxAnalysisFunctions.R | 2 +- inst/tinytest/test-ecaOuputputConversion.R | 5 +++-- inst/tinytest/test-lengthGroupCollapse.R | 6 ++++++ tests/testAgainstStoxPrep.R | 4 ++-- 8 files changed, 22 insertions(+), 9 deletions(-) diff --git a/.github/workflows/check-full.yaml b/.github/workflows/check-full.yaml index 82178795..357a4565 100644 --- a/.github/workflows/check-full.yaml +++ b/.github/workflows/check-full.yaml @@ -182,7 +182,7 @@ jobs: shell: Rscript {0} # turn off testing of suggestions for configurations where Reca is not provided in StoX package repositories - - name: Check without suggested dependencies + - name: Check without suggest-dependencies if: runner.os == 'macOS' && (matrix.config.r== 'next' || matrix.config.r== '4.3') env: _R_CHECK_CRAN_INCOMING_: false @@ -190,7 +190,7 @@ jobs: run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "error", check_dir = "check") shell: Rscript {0} - - name: Check with suggested dependencies + - name: Check with suggest-dependencies if: runner.os != 'macOS' || (matrix.config.r!= 'next' && matrix.config.r!= '4.3') env: _R_CHECK_CRAN_INCOMING_: false diff --git a/R/RecaWrap.R b/R/RecaWrap.R index 224dec76..e5e9825f 100644 --- a/R/RecaWrap.R +++ b/R/RecaWrap.R @@ -1108,3 +1108,9 @@ rEcaDataReport <- function(samples, landings, covariates){ out <- data.table::as.data.table(out) return(out) } + +#' Runs Reca::eca.estimate. Provided so that tinytest unittest can be implemented in a way that is dependent on Reca being available +#' @noRd +eca.estimate <- function(AgeLength, WeightLength, Landings, GlobalParameters){ + return(Reca::eca.estimate(AgeLength, WeightLength, Landings, GlobalParameters)) +} diff --git a/inst/tinytest/test-RecaFormatChecks.R b/inst/tinytest/test-RecaFormatChecks.R index b0f06349..3421f2d9 100644 --- a/inst/tinytest/test-RecaFormatChecks.R +++ b/inst/tinytest/test-RecaFormatChecks.R @@ -1,7 +1,7 @@ # ECA tests are not run for platforms where Reca is not available from StoX repositories. # ECA tests are only run if Reca is installed. -if (length(system.file(package="Reca"))>0){ +if (nchar(system.file(package="Reca"))>0){ StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") StoxLandingData <- readRDS(StoxLandingFile) diff --git a/inst/tinytest/test-RecaWrap.R b/inst/tinytest/test-RecaWrap.R index 2c486a99..c01895d7 100644 --- a/inst/tinytest/test-RecaWrap.R +++ b/inst/tinytest/test-RecaWrap.R @@ -1,7 +1,7 @@ # ECA tests are not run for platforms where Reca is not available from StoX repositories. # ECA tests are only run if Reca is installed. -if (length(system.file(package="Reca"))>0){ +if (nchar(system.file(package="Reca"))>0){ fishdata <- data.table::as.data.table(readRDS(system.file(package = "RstoxFDA", "testresources", "fishdata.rda"))) landings <- data.table::as.data.table(readRDS(system.file(package = "RstoxFDA", "testresources", "landings.rda"))) diff --git a/inst/tinytest/test-StoxAnalysisFunctions.R b/inst/tinytest/test-StoxAnalysisFunctions.R index 0dc4c1ee..eb7cbad4 100644 --- a/inst/tinytest/test-StoxAnalysisFunctions.R +++ b/inst/tinytest/test-StoxAnalysisFunctions.R @@ -1,7 +1,7 @@ # Tests StoX analysis functions that interfaces Reca. # ECA tests are only run if Reca is installed. -if (length(system.file(package="Reca"))>0){ +if (nchar(system.file(package="Reca"))>0){ #context("Test ParameterizeRecaModels cache") StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") StoxBioticData <- readRDS(StoxBioticFile) diff --git a/inst/tinytest/test-ecaOuputputConversion.R b/inst/tinytest/test-ecaOuputputConversion.R index 95aa9eba..49632db0 100644 --- a/inst/tinytest/test-ecaOuputputConversion.R +++ b/inst/tinytest/test-ecaOuputputConversion.R @@ -1,7 +1,7 @@ # ECA tests are only run if Reca is installed. -if (length(system.file(package="Reca"))>0){ +if (nchar(system.file(package="Reca"))>0){ #context("test-StoxAnalysisFunctions: tests RecaResult conversion") ecaResult <- readRDS(system.file("testresources","ecaResult.rds", package="RstoxFDA")) @@ -72,7 +72,8 @@ prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEff fpath <- RstoxFDA:::makeTempDirReca() RecaData <- RstoxFDA::convertRecaData(prep, nSamples = 10, burnin = 50, thin=1, resultdir = fpath, delta.age = .001, fitfile = "fit", seed = 42, lgamodel = "log-linear") sanitizeRecaInput(RecaData$AgeLength, RecaData$WeightLength, RecaData$Landings, RecaData$GlobalParameters, stage="parameterize") -est<-Reca::eca.estimate(RecaData$AgeLength, RecaData$WeightLength, RecaData$Landings, RecaData$GlobalParameters) + +est<-RstoxFDA:::eca.estimate(RecaData$AgeLength, RecaData$WeightLength, RecaData$Landings, RecaData$GlobalParameters) RstoxFDA:::removeTempDirReca(fpath) } diff --git a/inst/tinytest/test-lengthGroupCollapse.R b/inst/tinytest/test-lengthGroupCollapse.R index 46005730..104fd4cf 100644 --- a/inst/tinytest/test-lengthGroupCollapse.R +++ b/inst/tinytest/test-lengthGroupCollapse.R @@ -1,3 +1,7 @@ +# ECA tests are only run if Reca is installed. + +if (nchar(system.file(package="Reca"))>0){ + StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") StoxLandingData <- readRDS(StoxLandingFile) @@ -41,3 +45,5 @@ reportCAALwoLength <- RstoxFDA::ReportRecaCatchAtLengthAndAge(results) expect_equal(nrow(reportCAALwLength$NbyLength), 2158) expect_equal(nrow(reportCAALwoLength$NbyLength), 26) expect_equal(sum(reportCAALwoLength$NbyLength$CatchAtLength), sum(reportCAALwLength$NbyLength$CatchAtLength)) + +} diff --git a/tests/testAgainstStoxPrep.R b/tests/testAgainstStoxPrep.R index 198916a7..4205d8cc 100644 --- a/tests/testAgainstStoxPrep.R +++ b/tests/testAgainstStoxPrep.R @@ -7,7 +7,7 @@ library(data.table) # ECA tests are only run if Reca is installed. -if (length(system.file(package="Reca"))>0){ +if (nchar(system.file(package="Reca"))>0){ stoxRobj <- readRDS(system.file(package = "RstoxFDA", "testresources", "oldstoxprepreca")) @@ -43,4 +43,4 @@ tabStox <- RstoxFDA::makeResultTableRECA(stoxRecaResults$prediction) #RstoxFDA::plotCatchAtAge(prepRecaResults$prediction, title="RecaPrep results") #RstoxFDA::plotCatchAtAge(stoxRecaResults$prediction, title="StoxPrep results") -} \ No newline at end of file +} From bf4e38620be087a0a0487cdda9c21cfe5c34ed23 Mon Sep 17 00:00:00 2001 From: Edvin Fuglebakk Date: Thu, 11 Jan 2024 22:23:42 +0100 Subject: [PATCH 20/24] Fixed suggests dependent tests --- .github/workflows/check-full.yaml | 4 +- .github/workflows/test-coverage.yaml | 1 + R/RecaWrap.R | 6 +++ inst/tinytest/test-ConvergenceAnalysis.R | 45 ++++++++++++++++++++++ inst/tinytest/test-RecaFormatChecks.R | 2 +- inst/tinytest/test-RecaWrap.R | 3 +- inst/tinytest/test-StoxAnalysisFunctions.R | 2 +- inst/tinytest/test-StoxDataTypes.R | 3 ++ inst/tinytest/test-StoxReportFunctions.R | 44 --------------------- inst/tinytest/test-ecaOuputputConversion.R | 5 ++- inst/tinytest/test-lengthGroupCollapse.R | 6 +++ tests/testAgainstStoxPrep.R | 4 +- 12 files changed, 71 insertions(+), 54 deletions(-) create mode 100644 inst/tinytest/test-ConvergenceAnalysis.R diff --git a/.github/workflows/check-full.yaml b/.github/workflows/check-full.yaml index 82178795..357a4565 100644 --- a/.github/workflows/check-full.yaml +++ b/.github/workflows/check-full.yaml @@ -182,7 +182,7 @@ jobs: shell: Rscript {0} # turn off testing of suggestions for configurations where Reca is not provided in StoX package repositories - - name: Check without suggested dependencies + - name: Check without suggest-dependencies if: runner.os == 'macOS' && (matrix.config.r== 'next' || matrix.config.r== '4.3') env: _R_CHECK_CRAN_INCOMING_: false @@ -190,7 +190,7 @@ jobs: run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "error", check_dir = "check") shell: Rscript {0} - - name: Check with suggested dependencies + - name: Check with suggest-dependencies if: runner.os != 'macOS' || (matrix.config.r!= 'next' && matrix.config.r!= '4.3') env: _R_CHECK_CRAN_INCOMING_: false diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index 2806a966..964b3d0e 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -2,6 +2,7 @@ on: push: branches: - master + - testing pull_request: branches: - testing diff --git a/R/RecaWrap.R b/R/RecaWrap.R index 224dec76..e5e9825f 100644 --- a/R/RecaWrap.R +++ b/R/RecaWrap.R @@ -1108,3 +1108,9 @@ rEcaDataReport <- function(samples, landings, covariates){ out <- data.table::as.data.table(out) return(out) } + +#' Runs Reca::eca.estimate. Provided so that tinytest unittest can be implemented in a way that is dependent on Reca being available +#' @noRd +eca.estimate <- function(AgeLength, WeightLength, Landings, GlobalParameters){ + return(Reca::eca.estimate(AgeLength, WeightLength, Landings, GlobalParameters)) +} diff --git a/inst/tinytest/test-ConvergenceAnalysis.R b/inst/tinytest/test-ConvergenceAnalysis.R new file mode 100644 index 00000000..0f9d5d5e --- /dev/null +++ b/inst/tinytest/test-ConvergenceAnalysis.R @@ -0,0 +1,45 @@ +# ECA tests are only run if Reca is installed. +if (nchar(system.file(package="Reca"))>0){ +#context("Test StoxReportFunctions: ReportRecaParameterStatistics") +StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") +StoxLandingData <- readRDS(StoxLandingFile) +StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") +StoxBioticData <- readRDS(StoxBioticFile) +StoxBioticData$Individual <- StoxBioticData$Individual[StoxBioticData$Individual$IndividualAge<4,] +StoxBioticData$Haul$Gear <- StoxLandingData$Landing$Gear[c(1:20, 1:20, 1:5)] +StoxBioticData$Station$Area <- StoxLandingData$Landing$Area[c(1:20, 1:20, 1:5)] +prep <- RstoxFDA::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("Gear", "Area"), CellEffect = c("All"), MinAge = 2, MaxAge = 3) + +fpath1 <- RstoxFDA:::makeTempDirReca("chain1") +fpath2 <- RstoxFDA:::makeTempDirReca("chain2") +fpath3 <- RstoxFDA:::makeTempDirReca("chain3") + +paramOut1 <- RstoxFDA::ParameterizeRecaModels(prep, 10, 50, 1, ResultDirectory = fpath1) +paramOut2 <- RstoxFDA::ParameterizeRecaModels(prep, 10, 50, 1, ResultDirectory = fpath2) + +paramSummary <- RstoxFDA::ReportRecaParameterStatistics(paramOut1) +paramSummary <- RstoxFDA::ReportRecaParameterStatistics(paramOut2, paramSummary, AppendReport = TRUE) +expect_true(RstoxFDA::is.ParameterizationSummaryData(paramSummary)) + +RstoxFDA:::removeTempDirReca(fpath1) +RstoxFDA:::removeTempDirReca(fpath2) +RstoxFDA:::removeTempDirReca(fpath3) + +convergence <- RstoxFDA::ReportParameterConvergence(paramSummary) + +expect_true(RstoxFDA::is.ParameterConvergenceData(convergence)) +expect_true(nrow(convergence$ConvergenceReport) < 433) +expect_true(nrow(convergence$ConvergenceReport) > 0) + +#construct three identical chains, should signal convergence +paramSummary <- RstoxFDA::ReportRecaParameterStatistics(paramOut1) +paramOut1$GlobalParameters$GlobalParameters$resultdir="B" +paramSummary <- RstoxFDA::ReportRecaParameterStatistics(paramOut1, paramSummary, AppendReport = T) +paramOut1$GlobalParameters$GlobalParameters$resultdir="C" +paramSummary <- RstoxFDA::ReportRecaParameterStatistics(paramOut1, paramSummary, AppendReport = T) + +#context("Check Gelman-Rubin for equal chains") +convergence <- RstoxFDA::ReportParameterConvergence(paramSummary, Tolerance = 0) +expect_equal(nrow(convergence$ConvergenceReport), 433) +expect_true(all(abs(convergence$ConvergenceReport$GelmanRubinR-1)<.1)) +} diff --git a/inst/tinytest/test-RecaFormatChecks.R b/inst/tinytest/test-RecaFormatChecks.R index b0f06349..3421f2d9 100644 --- a/inst/tinytest/test-RecaFormatChecks.R +++ b/inst/tinytest/test-RecaFormatChecks.R @@ -1,7 +1,7 @@ # ECA tests are not run for platforms where Reca is not available from StoX repositories. # ECA tests are only run if Reca is installed. -if (length(system.file(package="Reca"))>0){ +if (nchar(system.file(package="Reca"))>0){ StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") StoxLandingData <- readRDS(StoxLandingFile) diff --git a/inst/tinytest/test-RecaWrap.R b/inst/tinytest/test-RecaWrap.R index 2c486a99..8ad5129f 100644 --- a/inst/tinytest/test-RecaWrap.R +++ b/inst/tinytest/test-RecaWrap.R @@ -1,7 +1,6 @@ -# ECA tests are not run for platforms where Reca is not available from StoX repositories. # ECA tests are only run if Reca is installed. -if (length(system.file(package="Reca"))>0){ +if (nchar(system.file(package="Reca"))>0){ fishdata <- data.table::as.data.table(readRDS(system.file(package = "RstoxFDA", "testresources", "fishdata.rda"))) landings <- data.table::as.data.table(readRDS(system.file(package = "RstoxFDA", "testresources", "landings.rda"))) diff --git a/inst/tinytest/test-StoxAnalysisFunctions.R b/inst/tinytest/test-StoxAnalysisFunctions.R index 0dc4c1ee..eb7cbad4 100644 --- a/inst/tinytest/test-StoxAnalysisFunctions.R +++ b/inst/tinytest/test-StoxAnalysisFunctions.R @@ -1,7 +1,7 @@ # Tests StoX analysis functions that interfaces Reca. # ECA tests are only run if Reca is installed. -if (length(system.file(package="Reca"))>0){ +if (nchar(system.file(package="Reca"))>0){ #context("Test ParameterizeRecaModels cache") StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") StoxBioticData <- readRDS(StoxBioticFile) diff --git a/inst/tinytest/test-StoxDataTypes.R b/inst/tinytest/test-StoxDataTypes.R index 69cc6864..8739128a 100644 --- a/inst/tinytest/test-StoxDataTypes.R +++ b/inst/tinytest/test-StoxDataTypes.R @@ -11,5 +11,8 @@ expect_true(RstoxFDA:::is.POSIXct(as.POSIXct(Sys.Date()))) expect_true(RstoxFDA:::is.RecaPrediction(RstoxFDA::recaPrediction)) #context("Test is.RecaResult") +# ECA tests are only run if Reca is installed. +if (nchar(system.file(package="Reca"))>0){ suppressWarnings(ex<-RstoxFDA::RunRecaEstimate(RstoxFDA::recaDataExample, 100,100)) expect_true(RstoxFDA:::is.RecaResult(ex)) +} \ No newline at end of file diff --git a/inst/tinytest/test-StoxReportFunctions.R b/inst/tinytest/test-StoxReportFunctions.R index 5263fb48..a3e27183 100644 --- a/inst/tinytest/test-StoxReportFunctions.R +++ b/inst/tinytest/test-StoxReportFunctions.R @@ -10,50 +10,6 @@ expect_true(!any(is.na(decomp$MeanWeightByAge$SD))) expect_true(!any(is.na(decomp$MeanWeightByAge$Low))) expect_true(!any(is.na(decomp$MeanWeightByAge$High))) -#context("Test StoxReportFunctions: ReportRecaParameterStatistics") -StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") -StoxLandingData <- readRDS(StoxLandingFile) -StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") -StoxBioticData <- readRDS(StoxBioticFile) -StoxBioticData$Individual <- StoxBioticData$Individual[StoxBioticData$Individual$IndividualAge<4,] -StoxBioticData$Haul$Gear <- StoxLandingData$Landing$Gear[c(1:20, 1:20, 1:5)] -StoxBioticData$Station$Area <- StoxLandingData$Landing$Area[c(1:20, 1:20, 1:5)] -prep <- RstoxFDA::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("Gear", "Area"), CellEffect = c("All"), MinAge = 2, MaxAge = 3) - -fpath1 <- RstoxFDA:::makeTempDirReca("chain1") -fpath2 <- RstoxFDA:::makeTempDirReca("chain2") -fpath3 <- RstoxFDA:::makeTempDirReca("chain3") - -paramOut1 <- RstoxFDA::ParameterizeRecaModels(prep, 10, 50, 1, ResultDirectory = fpath1) -paramOut2 <- RstoxFDA::ParameterizeRecaModels(prep, 10, 50, 1, ResultDirectory = fpath2) - -paramSummary <- RstoxFDA::ReportRecaParameterStatistics(paramOut1) -paramSummary <- RstoxFDA::ReportRecaParameterStatistics(paramOut2, paramSummary, AppendReport = TRUE) -expect_true(RstoxFDA::is.ParameterizationSummaryData(paramSummary)) - -RstoxFDA:::removeTempDirReca(fpath1) -RstoxFDA:::removeTempDirReca(fpath2) -RstoxFDA:::removeTempDirReca(fpath3) - -convergence <- RstoxFDA::ReportParameterConvergence(paramSummary) - -expect_true(RstoxFDA::is.ParameterConvergenceData(convergence)) -expect_true(nrow(convergence$ConvergenceReport) < 433) -expect_true(nrow(convergence$ConvergenceReport) > 0) - -#construct three identical chains, should signal convergence -paramSummary <- RstoxFDA::ReportRecaParameterStatistics(paramOut1) -paramOut1$GlobalParameters$GlobalParameters$resultdir="B" -paramSummary <- RstoxFDA::ReportRecaParameterStatistics(paramOut1, paramSummary, AppendReport = T) -paramOut1$GlobalParameters$GlobalParameters$resultdir="C" -paramSummary <- RstoxFDA::ReportRecaParameterStatistics(paramOut1, paramSummary, AppendReport = T) - -#context("Check Gelman-Rubin for equal chains") -convergence <- RstoxFDA::ReportParameterConvergence(paramSummary, Tolerance = 0) -expect_equal(nrow(convergence$ConvergenceReport), 433) -expect_true(all(abs(convergence$ConvergenceReport$GelmanRubinR-1)<.1)) - - #context("Test StoxReportFunctions: ReportRecaCatchStatistics") predictiondatafile <- system.file("testresources","stocksplitpred.rds", package="RstoxFDA") catchAtAgeFlat <- readRDS(system.file("testresources", "recaPredictionFlat.rds", package="RstoxFDA")) diff --git a/inst/tinytest/test-ecaOuputputConversion.R b/inst/tinytest/test-ecaOuputputConversion.R index 95aa9eba..49632db0 100644 --- a/inst/tinytest/test-ecaOuputputConversion.R +++ b/inst/tinytest/test-ecaOuputputConversion.R @@ -1,7 +1,7 @@ # ECA tests are only run if Reca is installed. -if (length(system.file(package="Reca"))>0){ +if (nchar(system.file(package="Reca"))>0){ #context("test-StoxAnalysisFunctions: tests RecaResult conversion") ecaResult <- readRDS(system.file("testresources","ecaResult.rds", package="RstoxFDA")) @@ -72,7 +72,8 @@ prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEff fpath <- RstoxFDA:::makeTempDirReca() RecaData <- RstoxFDA::convertRecaData(prep, nSamples = 10, burnin = 50, thin=1, resultdir = fpath, delta.age = .001, fitfile = "fit", seed = 42, lgamodel = "log-linear") sanitizeRecaInput(RecaData$AgeLength, RecaData$WeightLength, RecaData$Landings, RecaData$GlobalParameters, stage="parameterize") -est<-Reca::eca.estimate(RecaData$AgeLength, RecaData$WeightLength, RecaData$Landings, RecaData$GlobalParameters) + +est<-RstoxFDA:::eca.estimate(RecaData$AgeLength, RecaData$WeightLength, RecaData$Landings, RecaData$GlobalParameters) RstoxFDA:::removeTempDirReca(fpath) } diff --git a/inst/tinytest/test-lengthGroupCollapse.R b/inst/tinytest/test-lengthGroupCollapse.R index 46005730..104fd4cf 100644 --- a/inst/tinytest/test-lengthGroupCollapse.R +++ b/inst/tinytest/test-lengthGroupCollapse.R @@ -1,3 +1,7 @@ +# ECA tests are only run if Reca is installed. + +if (nchar(system.file(package="Reca"))>0){ + StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") StoxLandingData <- readRDS(StoxLandingFile) @@ -41,3 +45,5 @@ reportCAALwoLength <- RstoxFDA::ReportRecaCatchAtLengthAndAge(results) expect_equal(nrow(reportCAALwLength$NbyLength), 2158) expect_equal(nrow(reportCAALwoLength$NbyLength), 26) expect_equal(sum(reportCAALwoLength$NbyLength$CatchAtLength), sum(reportCAALwLength$NbyLength$CatchAtLength)) + +} diff --git a/tests/testAgainstStoxPrep.R b/tests/testAgainstStoxPrep.R index 198916a7..4205d8cc 100644 --- a/tests/testAgainstStoxPrep.R +++ b/tests/testAgainstStoxPrep.R @@ -7,7 +7,7 @@ library(data.table) # ECA tests are only run if Reca is installed. -if (length(system.file(package="Reca"))>0){ +if (nchar(system.file(package="Reca"))>0){ stoxRobj <- readRDS(system.file(package = "RstoxFDA", "testresources", "oldstoxprepreca")) @@ -43,4 +43,4 @@ tabStox <- RstoxFDA::makeResultTableRECA(stoxRecaResults$prediction) #RstoxFDA::plotCatchAtAge(prepRecaResults$prediction, title="RecaPrep results") #RstoxFDA::plotCatchAtAge(stoxRecaResults$prediction, title="StoxPrep results") -} \ No newline at end of file +} From a1354b8a41d4779283c5f8ed72795a3cfa9d3216 Mon Sep 17 00:00:00 2001 From: Edvin Fuglebakk Date: Thu, 11 Jan 2024 22:55:41 +0100 Subject: [PATCH 21/24] Fixed issue with unqualified function call in test --- inst/tinytest/test-ecaOuputputConversion.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tinytest/test-ecaOuputputConversion.R b/inst/tinytest/test-ecaOuputputConversion.R index 49632db0..98fcf1c7 100644 --- a/inst/tinytest/test-ecaOuputputConversion.R +++ b/inst/tinytest/test-ecaOuputputConversion.R @@ -71,7 +71,7 @@ prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEff fpath <- RstoxFDA:::makeTempDirReca() RecaData <- RstoxFDA::convertRecaData(prep, nSamples = 10, burnin = 50, thin=1, resultdir = fpath, delta.age = .001, fitfile = "fit", seed = 42, lgamodel = "log-linear") -sanitizeRecaInput(RecaData$AgeLength, RecaData$WeightLength, RecaData$Landings, RecaData$GlobalParameters, stage="parameterize") +RstoxFDA::sanitizeRecaInput(RecaData$AgeLength, RecaData$WeightLength, RecaData$Landings, RecaData$GlobalParameters, stage="parameterize") est<-RstoxFDA:::eca.estimate(RecaData$AgeLength, RecaData$WeightLength, RecaData$Landings, RecaData$GlobalParameters) RstoxFDA:::removeTempDirReca(fpath) From 88aac85a6912a19ef83cc62a07c3b43d3dbd2cc9 Mon Sep 17 00:00:00 2001 From: Edvin Fuglebakk Date: Thu, 11 Jan 2024 23:03:27 +0100 Subject: [PATCH 22/24] Fixed display of error log --- .github/workflows/check-full.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/check-full.yaml b/.github/workflows/check-full.yaml index 357a4565..d7f519f8 100644 --- a/.github/workflows/check-full.yaml +++ b/.github/workflows/check-full.yaml @@ -200,7 +200,7 @@ jobs: - name: Show testthat output if: always() - run: find check -name 'testthat.Rout*' -exec cat '{}' \; || true + run: find check -name 'tinytest.Rout*' -exec cat '{}' \; || true shell: bash - name: Upload check results From 9392c6c6698fe1e6cb86a08e2d4cf970dd7da7be Mon Sep 17 00:00:00 2001 From: Edvin Fuglebakk Date: Thu, 11 Jan 2024 23:13:03 +0100 Subject: [PATCH 23/24] Hid some functions from namespace. Anticipating possible release for StoX 4.0 that will come before Analytical catch at age functions --- NAMESPACE | 3 - R/StoxAnalyticalBaselineFunctions.R | 6 +- .../test-StoxAnalyticalBaselineFunctions.R | 20 +++---- man/AssignPSUSamplingParameters.Rd | 51 ---------------- man/DefineIndividualSamplingParameters.Rd | 60 ------------------- man/DefinePSUSamplingParameters.Rd | 51 ---------------- 6 files changed, 13 insertions(+), 178 deletions(-) delete mode 100644 man/AssignPSUSamplingParameters.Rd delete mode 100644 man/DefineIndividualSamplingParameters.Rd delete mode 100644 man/DefinePSUSamplingParameters.Rd diff --git a/NAMESPACE b/NAMESPACE index 62f92251..a9105528 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,15 +7,12 @@ export(AddPeriodStoxBiotic) export(AddPeriodStoxLanding) export(AddStratumStoxBiotic) export(AddStratumStoxLanding) -export(AssignPSUSamplingParameters) export(ConvertLengthBiotic) export(ConvertWeightBiotic) export(DefineAgeErrorMatrix) export(DefineAreaPosition) export(DefineCarNeighbours) -export(DefineIndividualSamplingParameters) export(DefineLengthConversionParameters) -export(DefinePSUSamplingParameters) export(DefinePeriod) export(DefineStockSplittingParameters) export(DefineWeightConversionFactor) diff --git a/R/StoxAnalyticalBaselineFunctions.R b/R/StoxAnalyticalBaselineFunctions.R index df12e621..83de8e05 100644 --- a/R/StoxAnalyticalBaselineFunctions.R +++ b/R/StoxAnalyticalBaselineFunctions.R @@ -153,7 +153,7 @@ parseDesignParameters <- function(filename){ #' @param StratificationColumns name of any column (at the same table as 'SamplingUnitId') that are to be used to define Strata for sampling. #' @param UseProcessData If TRUE, bypasses execution of function and returns existing 'processData' #' @return \code{\link[RstoxFDA]{PSUSamplingParametersData}} -#' @export +#' @noRd #' @concept StoX-functions #' @concept Analytical estimation #' @md @@ -318,7 +318,7 @@ extractIndividualDesignParametersStoxBiotic <- function(StoxBioticData, Stratifi #' @param StratificationColumns names of columns in the Individual table of StoxBioticData that identify strata for Stratified selection (DefinitionMethod 'Stratified'). #' @param UseProcessData If TRUE, bypasses execution of function and returns existing 'processData' #' @return \code{\link[RstoxFDA]{IndividualSamplingParametersData}} where SampleId refers to the variable 'Haul' on the 'Haul' table in StoxBioticData, and IndividualId refers to the variable 'Individual' on the 'Individual' table of StoxBioticData. -#' @export +#' @noRd #' @concept StoX-functions #' @concept Analytical estimation #' @md @@ -441,7 +441,7 @@ DefineSamplingHierarchy <- function(IndividualSamplingParametersData, Hierarchy= #' @concept StoX-functions #' @concept Analytical estimation #' @md -#' @export +#' @noRd AssignPSUSamplingParameters <- function(PSUSamplingParametersData, StoxBioticData, DataRecordsId, DefinitionMethod=c("MissingAtRandom")){ checkMandatory(PSUSamplingParametersData, "PSUSamplingParametersData") checkMandatory(StoxBioticData, "StoxBioticData") diff --git a/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R b/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R index b8305bbb..55e93bce 100644 --- a/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R +++ b/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R @@ -1,7 +1,7 @@ designParamsFile <- system.file("testresources", "lotteryParameters", "lotteryDesignNSH.txt", package="RstoxFDA") #regular read: -designParams <- RstoxFDA::DefinePSUSamplingParameters(NULL, "ResourceFile", designParamsFile) +designParams <- RstoxFDA:::DefinePSUSamplingParameters(NULL, "ResourceFile", designParamsFile) expect_true(RstoxFDA:::is.PSUSamplingParametersData(designParams)) expect_equal(nrow(designParams$SelectionTable), 64) expect_equal(nrow(designParams$SampleTable), 1) @@ -11,8 +11,8 @@ expect_equal(sum(designParams$SelectionTable$HTsamplingWeight), 1) expect_equal(sum(designParams$SelectionTable$HHsamplingWeight), 1) # test assignment to data -expect_error(RstoxFDA::AssignPSUSamplingParameters(designParams, RstoxFDA::CatchLotteryExample, "MissingAtRandom")) -designParamsCorrected <- RstoxFDA::AssignPSUSamplingParameters(designParams, RstoxFDA::CatchLotteryExample, "HaulKey", "MissingAtRandom") +expect_error(RstoxFDA:::AssignPSUSamplingParameters(designParams, RstoxFDA::CatchLotteryExample, "MissingAtRandom")) +designParamsCorrected <- RstoxFDA:::AssignPSUSamplingParameters(designParams, RstoxFDA::CatchLotteryExample, "HaulKey", "MissingAtRandom") expect_equal(sum(designParamsCorrected$SelectionTable$HTsamplingWeight),1) expect_equal(sum(designParamsCorrected$SelectionTable$HHsamplingWeight),1) #HT should be approximately the same after non-response correction @@ -22,7 +22,7 @@ expect_true(abs((mean(1/designParamsCorrected$SelectionTable$InclusionProbabilit #define from data suppressWarnings(StoxBioticData <- RstoxData::StoxBiotic(RstoxData::ReadBiotic(system.file("testresources", "biotic_v3_example.xml", package="RstoxFDA")))) -designParamsSB <- RstoxFDA::DefinePSUSamplingParameters(NULL, "AdHocStoxBiotic", StoxBioticData=StoxBioticData, SamplingUnitId = "Individual", StratificationColumns = c("SpeciesCategoryKey")) +designParamsSB <- RstoxFDA:::DefinePSUSamplingParameters(NULL, "AdHocStoxBiotic", StoxBioticData=StoxBioticData, SamplingUnitId = "Individual", StratificationColumns = c("SpeciesCategoryKey")) expect_true(RstoxFDA:::is.PSUSamplingParametersData(designParamsSB)) #compare names of output with stratification variables to output without @@ -42,14 +42,14 @@ ds$Individual$IndividualRoundWeight[rep(c(TRUE,FALSE), nrow(ds$Individual)/2)] < ds$Sample$CatchFractionNumber[is.na(ds$Sample$CatchFractionNumber)] <- 1000 #Define Individual design, SRS -expect_error(DefineIndividualSamplingParameters(NULL, ds, "SRS")) -srs <- DefineIndividualSamplingParameters(NULL, ds, "SRS", c("IndividualAge", "IndividualTotalLength", "IndividualRoundWeight")) +expect_error(RstoxFDA:::DefineIndividualSamplingParameters(NULL, ds, "SRS")) +srs <- RstoxFDA:::DefineIndividualSamplingParameters(NULL, ds, "SRS", c("IndividualAge", "IndividualTotalLength", "IndividualRoundWeight")) expect_true(RstoxFDA:::is.IndividualSamplingParametersData(srs)) weights <- srs$SelectionTable[,list(meanN=sum(HTsamplingWeight)), by=c("Stratum", "SampleId")] expect_true(all(abs(weights$meanN-1) < 1e-6)) #Define Individual design, Length stratified -expect_error(DefineIndividualSamplingParameters(NULL, ds, "LengthStratified", c("IndividualAge", "IndividualTotalLength", "IndividualRoundWeight"), LengthInterval = 5), "'IndividualTotalLength' may not be among the variables in 'Parameters' for length-stratified sampling.") -ls<-DefineIndividualSamplingParameters(NULL, ds, "LengthStratified", c("IndividualAge", "IndividualRoundWeight"), LengthInterval = 5) +expect_error(RstoxFDA:::DefineIndividualSamplingParameters(NULL, ds, "LengthStratified", c("IndividualAge", "IndividualTotalLength", "IndividualRoundWeight"), LengthInterval = 5), "'IndividualTotalLength' may not be among the variables in 'Parameters' for length-stratified sampling.") +ls<-RstoxFDA:::DefineIndividualSamplingParameters(NULL, ds, "LengthStratified", c("IndividualAge", "IndividualRoundWeight"), LengthInterval = 5) expect_true(RstoxFDA:::is.IndividualSamplingParametersData(ls)) weights <- ls$SelectionTable[,list(meanN=sum(HTsamplingWeight)), by=c("Stratum", "SampleId")] expect_true(all(abs(weights$meanN-1) < 1e-6)) @@ -57,7 +57,7 @@ expect_true(all(abs(weights$meanN-1) < 1e-6)) #Define Individual design, stratified, setting strata by length as in Length stratified bioStrat <- ds bioStrat$Individual$LStrat <- as.character(cut(bioStrat$Individual$IndividualTotalLength, seq(0,max(bioStrat$Individual$IndividualTotalLength)+5,5), right = F)) -ss<-DefineIndividualSamplingParameters(NULL, bioStrat, "Stratified", c("IndividualAge", "IndividualRoundWeight"), StratificationColumns = c("LStrat")) +ss<-RstoxFDA:::DefineIndividualSamplingParameters(NULL, bioStrat, "Stratified", c("IndividualAge", "IndividualRoundWeight"), StratificationColumns = c("LStrat")) expect_true(RstoxFDA:::is.IndividualSamplingParametersData(ss)) weights <- ss$SelectionTable[,list(meanN=sum(HTsamplingWeight)), by=c("Stratum", "SampleId")] expect_true(all(abs(weights$meanN-1) < 1e-6)) @@ -69,5 +69,5 @@ expect_equal(nrow(ss$SampleTable), nrow(ls$SampleTable)) #test estimate with HansenHurwitzDomainEstimate data <- RstoxFDA::CatchLotteryExample -indSampling <- RstoxFDA::DefineIndividualSamplingParameters(NULL, data, "SRS", c("IndividualAge")) +indSampling <- RstoxFDA:::DefineIndividualSamplingParameters(NULL, data, "SRS", c("IndividualAge")) #domainEst <- HansenHurwitzDomainEstimate() \ No newline at end of file diff --git a/man/AssignPSUSamplingParameters.Rd b/man/AssignPSUSamplingParameters.Rd deleted file mode 100644 index 41ade970..00000000 --- a/man/AssignPSUSamplingParameters.Rd +++ /dev/null @@ -1,51 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/StoxAnalyticalBaselineFunctions.R -\name{AssignPSUSamplingParameters} -\alias{AssignPSUSamplingParameters} -\title{Assign PSU Sampling Parameters} -\usage{ -AssignPSUSamplingParameters( - PSUSamplingParametersData, - StoxBioticData, - DataRecordsId, - DefinitionMethod = c("MissingAtRandom") -) -} -\arguments{ -\item{PSUSamplingParametersData}{~\code{\link[RstoxFDA]{PSUSamplingParametersData}} with sampling parameters for PSU selection} - -\item{StoxBioticData}{~\code{\link[RstoxData]{StoxBioticData}} with data records for responding PSUs.} - -\item{DataRecordsId}{name of Variable in ~\code{\link[RstoxData]{StoxBioticData}} that represent records of sampled PSUs} - -\item{DefinitionMethod}{The method for dealing with non-response, e.g. 'MissingAtRandon'} -} -\value{ -~\code{\link[RstoxFDA]{PSUSamplingParametersData}} -} -\description{ -Assigns data records to PSU Sampling Parameters and provides non-response adjustments for -selected PSUs that was not sampled. -} -\details{ -Some sampling parameters provided in ~\code{\link[RstoxFDA]{PSUSamplingParametersData}} are only -interpretable for sampling with complete response. This function adjusts these parameters, removes non-respondents from the -~\code{\link[RstoxFDA]{PSUSamplingParametersData}}, and checks that all responding PSUs are present in data records. - -If any respondants (rows of the SelectionTable of PSUSamplingParametersData that does not have NA for SamplingUnitId) are not -found in 'SamplingUnitId', execution halts with an error. - -Response after selection can generally be considered a process that modifies the sampling parameters that are set by design. -Typically sample size, InclusionProbabilities and normalized SamplingWeights need to be adjusted as non-respondents are removed, -since these are depend of the entire sample, not just the sampling unit they are assigned to. -SelectionProbabilites are by definition set for a single draw of a single sampling unit from the population and are valid even -when response is not complete. - -Treatment of non-response requires some assumption about systematic differences between respondents and non-respondents. -These assumptions are specified via the argument 'DefinitionMethod' and the following options are available: -\describe{ -\item{MissingAtRandom}{A response propensity is estimated for each stratum as the fraction of the sample resonding, and sample size (n) and InclusionProbability are adjusted by multiplying with this propensity. Sampling weights are adjusted by dividing them with their sum over repsondents in a stratum.} -} -} -\concept{Analytical estimation} -\concept{StoX-functions} diff --git a/man/DefineIndividualSamplingParameters.Rd b/man/DefineIndividualSamplingParameters.Rd deleted file mode 100644 index cf64fb51..00000000 --- a/man/DefineIndividualSamplingParameters.Rd +++ /dev/null @@ -1,60 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/StoxAnalyticalBaselineFunctions.R -\name{DefineIndividualSamplingParameters} -\alias{DefineIndividualSamplingParameters} -\title{Define Sampling Parameters for Individuals} -\usage{ -DefineIndividualSamplingParameters( - processData, - StoxBioticData, - DefinitionMethod = c("SRS", "Stratified", "LengthStratified"), - Parameters = c(), - LengthInterval = numeric(), - StratificationColumns = character(), - UseProcessData = FALSE -) -} -\arguments{ -\item{processData}{\code{\link[RstoxFDA]{IndividualSamplingParametersData}} as returned from this function.} - -\item{StoxBioticData}{Data to define individual sampling parameters for} - -\item{DefinitionMethod}{Method to infer sampling parameters, 'SRS', 'Stratified' or 'LengthStratified'. See details.} - -\item{Parameters}{Measurements / observations of individuals included in the design specification. Must all be columns on the Individual-table of StoxBioticData.} - -\item{LengthInterval}{width of length strata in cm. Specifies left closed intervals used for Length Stratified selection (DefinitionMethod 'Stratified'). A value of 5 indicates that observation are selected stratified on length groups [0 cm,5 cm>, [5 cm, 10 cm>, and so on.} - -\item{StratificationColumns}{names of columns in the Individual table of StoxBioticData that identify strata for Stratified selection (DefinitionMethod 'Stratified').} - -\item{UseProcessData}{If TRUE, bypasses execution of function and returns existing 'processData'} -} -\value{ -\code{\link[RstoxFDA]{IndividualSamplingParametersData}} where SampleId refers to the variable 'Haul' on the 'Haul' table in StoxBioticData, and IndividualId refers to the variable 'Individual' on the 'Individual' table of StoxBioticData. -} -\description{ -Define approximate sampling parameters for the selection of individuals from a haul. Design parameters are inferred from data provided in ~\code{\link[RstoxData]{StoxBioticData}}, -and specify how a set of individuals recorded on the Individual table were selected for observation/measurement from a Haul (the table Haul in StoxBioticData). -} -\details{ -StoxBioticData represents sorting of species as a separate level in the hierarchy (SpeciesCategory) and Samples are selected in Stratified from the species categories. -This represent sampling stratified on taxons in addition to some additional stratification criteria in the cases where more than one sample is present for -a species-category in a Haul. The exact criteria for stratification is not important for the calculation of sampling parameters, but only clearly encoded criteria can be used -in subsequent analysis, so sampling parameters are reported stratified only on SpeciesCategory. Any other stratification has been incorporated into selection or inclusion probabilities. - -Sampling parameters are approximately inferred, assuming that all selected individuals are recorded, and based on some user-controllable assumptions about the selection process, -specified by the appropriate 'DefinitionMethod'. - -Individuals with a non-missing value for any of the parameters in 'Parameters' are treated as selected for observation. -In this way selection of individuals may be specified differently for different parameters. -For instance one may define one design for length-measurements and another for length-stratified age, weight and sex observations. - -The available DefinitionMethods specifies how Individuals are selected from a Sample, and are: -\describe{ -\item{SRS}{Simple Random Selection. Individuals are selected for measurment by simple random selection without replacement from each Sample.} -\item{Stratified}{Stratified Selection. Individuals are selected for measurement by stratified random selection without replacement from each Sample. Strata are specified as the combination of columns provided in 'StratificationColumns'. The number of fish in each stratum is estimated by the total in sample and the proportion of measured fish in each stratum.} -\item{LengthStratified}{Length stratified selection. Individuals are selected for measurement by stratified random selection without replacement from each Sample. Strata are length groups, specified by the left closed intervals starting with [0,'LengthInterval'>.} -} -} -\concept{Analytical estimation} -\concept{StoX-functions} diff --git a/man/DefinePSUSamplingParameters.Rd b/man/DefinePSUSamplingParameters.Rd deleted file mode 100644 index b4cb1911..00000000 --- a/man/DefinePSUSamplingParameters.Rd +++ /dev/null @@ -1,51 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/StoxAnalyticalBaselineFunctions.R -\name{DefinePSUSamplingParameters} -\alias{DefinePSUSamplingParameters} -\title{Define PSU Sampling Design Parameters} -\usage{ -DefinePSUSamplingParameters( - processData, - DefinitionMethod = c("ResourceFile", "AdHocStoxBiotic"), - FileName = character(), - StoxBioticData, - SamplingUnitId = character(), - StratificationColumns = character(), - UseProcessData = F -) -} -\arguments{ -\item{processData}{\code{\link[RstoxFDA]{PSUSamplingParametersData}} as returned from this function.} - -\item{DefinitionMethod}{'ResourceFile' or 'AdHocStoxBiotic'} - -\item{FileName}{path to resource file} - -\item{StoxBioticData}{\code{\link[RstoxData]{StoxBioticData}} Sample data to construct design parameters from} - -\item{SamplingUnitId}{name of column in 'StoxBioticData' that identifies the Primary Sampling Unit the design is constructed for.} - -\item{StratificationColumns}{name of any column (at the same table as 'SamplingUnitId') that are to be used to define Strata for sampling.} - -\item{UseProcessData}{If TRUE, bypasses execution of function and returns existing 'processData'} -} -\value{ -\code{\link[RstoxFDA]{PSUSamplingParametersData}} -} -\description{ -Define sampling parameters for Primary Sampling Units in multi-stage sampling. -} -\details{ -The DefintionMethod 'ResourceFile' reads sampling parameters from a tab delimited file with headers corresponding to those listed in -\code{\link[RstoxFDA]{PSUSamplingParametersData}}. The data is provided as one table, so that the information in 'sampleTable' is repeated for each entry in 'selectionTable'. -Any columns not named in \code{\link[RstoxFDA]{PSUSamplingParametersData}} are assumed to be stratification variables. -The conditions listed for the variables in \code{\link[RstoxFDA]{PSUSamplingParametersData}} are checked upon reading the data, and -execution halts with error if any are violated. - -The DefinitionMethod 'AdHocStoxBiotic' constructs Sampling Design Parameters from data, -assuming equal probability sampling with fixed sample size, selection without replacement and complete response. -This is a reasonable approximation if within-strata sampling is approximately simple random selections, -and non-response is believed to be at random. -} -\concept{Analytical estimation} -\concept{StoX-functions} From c1133ddb08f65e59c0bd52064c216f602aa17818 Mon Sep 17 00:00:00 2001 From: Edvin Fuglebakk Date: Thu, 11 Jan 2024 23:39:31 +0100 Subject: [PATCH 24/24] Cleaned up some notes --- .github/workflows/check-full.yaml | 2 +- R/CatchLotterySamplingExample-datadoc.R | 3 ++- R/StoxAnalyticalBaselineFunctions.R | 20 ++++++++++---------- man/CatchLotterySamplingExample.Rd | 3 ++- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/.github/workflows/check-full.yaml b/.github/workflows/check-full.yaml index d7f519f8..6dc8651e 100644 --- a/.github/workflows/check-full.yaml +++ b/.github/workflows/check-full.yaml @@ -198,7 +198,7 @@ jobs: run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "error", check_dir = "check") shell: Rscript {0} - - name: Show testthat output + - name: Show unit test output if: always() run: find check -name 'tinytest.Rout*' -exec cat '{}' \; || true shell: bash diff --git a/R/CatchLotterySamplingExample-datadoc.R b/R/CatchLotterySamplingExample-datadoc.R index 5adbc5fc..80d881f7 100644 --- a/R/CatchLotterySamplingExample-datadoc.R +++ b/R/CatchLotterySamplingExample-datadoc.R @@ -18,6 +18,7 @@ #' @examples #' #all selected PSU that where actuall sampled are provided in CatchLotteryExample #' sum(!is.na(CatchLotterySamplingExample$SelectionTable$SamplingUnitId)) -#' sum(CatchLotterySamplingExample$SelectionTable$SamplingUnitId %in% CatchLotteryExample$Haul$HaulKey) +#' sum(CatchLotterySamplingExample$SelectionTable$SamplingUnitId %in% +#' CatchLotteryExample$Haul$HaulKey) "CatchLotterySamplingExample" diff --git a/R/StoxAnalyticalBaselineFunctions.R b/R/StoxAnalyticalBaselineFunctions.R index 83de8e05..4d04a195 100644 --- a/R/StoxAnalyticalBaselineFunctions.R +++ b/R/StoxAnalyticalBaselineFunctions.R @@ -33,11 +33,11 @@ assumeDesignParametersStoxBiotic <- function(StoxBioticData, SamplingUnitId, Str flatStox$SamplingUnitId <- flatStox[[SamplingUnitId]] flatStox$Order <- as.numeric(NA) - CommonSelectionData <- flatStox[,list(InclusionProbability=as.numeric(NA), HTsamplingWeight=1/length(unique(SamplingUnitId)), SelectionProbability=as.numeric(NA), HHsamplingWeight=as.numeric(NA), SelectionDescription=as.character(NA)), by=c("Stratum")] + CommonSelectionData <- flatStox[,list(InclusionProbability=as.numeric(NA), HTsamplingWeight=1/length(unique(get("SamplingUnitId"))), SelectionProbability=as.numeric(NA), HHsamplingWeight=as.numeric(NA), SelectionDescription=as.character(NA)), by=c("Stratum")] selectionUnits <- flatStox[,.SD, .SDcol=c("Stratum", "Order", "SamplingUnitId")] selectionUnits <- selectionUnits[!duplicated(selectionUnits$SamplingUnitId),] selectionTable <- merge(flatStox[,.SD, .SDcol=c("Stratum", "Order", "SamplingUnitId")], CommonSelectionData) - sampleTable <- flatStox[,list(N=as.numeric(NA), n=length(unique(SamplingUnitId)), SelectionMethod="FSWR", FrameDescription=as.character(NA)), by=c("Stratum")] + sampleTable <- flatStox[,list(N=as.numeric(NA), n=length(unique(get("SamplingUnitId"))), SelectionMethod="FSWR", FrameDescription=as.character(NA)), by=c("Stratum")] sampleTable <- sampleTable[,.SD,.SDcol=c("Stratum", "N", "n", "SelectionMethod", "FrameDescription")] stratificationTable <- flatStox[,.SD,.SDcol=c("Stratum", StratificationColumns)] stratificationTable <- stratificationTable[!duplicated(stratificationTable$Stratum),] @@ -184,7 +184,7 @@ collapseStrataIndividualDesignParamaters <- function(designParam, collapseVariab stop("The following are specified as strata to collapse, but are not StratificationVariables:", paste(missing, collapse=",")) } - Nstrata <- designParam$StratificationVariables[,list(Nstrata=.N), by="SampleId"] + Nstrata <- designParam$StratificationVariables[,list(Nstrata=get(".N")), by="SampleId"] if (all(Nstrata$Nstrata==1)){ return(designParam) } @@ -203,19 +203,19 @@ collapseStrataIndividualDesignParamaters <- function(designParam, collapseVariab designParam$SampleTable$Stratum <- designParam$StratificationVariables$Stratum[sampleStratumIndex] designParam$SelectionTable$Stratum <- designParam$StratificationVariables$Stratum[selectionStratumIndex] - NselectionMethods <- designParam$SampleTable[,list(NselMet=length(unique(SelectionMethod))), by=c("SampleId", "Stratum")] + NselectionMethods <- designParam$SampleTable[,list(NselMet=length(unique(get("SelectionMethod")))), by=c("SampleId", "Stratum")] if (any(NselectionMethods$NselMet>1)){ stop("Cannot collapse strate with heterogenous selection methods") } - weights <- designParam$SelectionTable[,list(HTsum=sum(1/InclusionProbability), HHsum=sum(1/SelectionProbability)),by=c("SampleId", "Stratum")] + weights <- designParam$SelectionTable[,list(HTsum=sum(1/get("InclusionProbability")), HHsum=sum(1/get("SelectionProbability"))),by=c("SampleId", "Stratum")] designParam$SelectionTable <- merge(designParam$SelectionTable, weights, by=c("SampleId", "Stratum")) designParam$SelectionTable$HTsamplingWeight <- 1/(designParam$SelectionTable$InclusionProbability * designParam$SelectionTable$HTsum) designParam$SelectionTable$HHsamplingWeight <- 1/(designParam$SelectionTable$SelectionProbability * designParam$SelectionTable$HHsum) designParam$SelectionTable$HHsum <- NULL designParam$SelectionTable$HTsum <- NULL - designParam$SampleTable <- designParam$SampleTable[,list(N=sum(N), n=sum(n), SelectionMethod=SelectionMethod[1], SampleDescription=as.character(NA)), by=c("SampleId", "Stratum")] + designParam$SampleTable <- designParam$SampleTable[,list(N=sum(get("N")), n=sum(get("n")), SelectionMethod=get("SelectionMethod")[1], SampleDescription=as.character(NA)), by=c("SampleId", "Stratum")] designParam$StratificationVariables <- designParam$StratificationVariables[!duplicated(paste(designParam$StratificationVariables$SampleId, designParam$StratificationVariables$Stratum)),.SD, .SDcol=c("SampleId", "Stratum", retain)] return(designParam) @@ -254,8 +254,8 @@ extractIndividualDesignParametersStoxBiotic <- function(StoxBioticData, Stratifi stratificationTable <- individuals[!duplicated(paste(individuals$SampleId, individuals$Stratum)), .SD,.SDcol=c("SampleId", "Stratum", StratificationColumns)] individuals$Sampled <- hasParam - stratumTotals <- individuals[,list(totalInStratum=.N, sampledInStratum=sum(Sampled)), by=c("Stratum", "SampleId")] - sampleTotals <- individuals[,list(totalInSample=.N), by=c("SampleId")] + stratumTotals <- individuals[,list(totalInStratum=get(".N"), sampledInStratum=sum(get("Sampled"))), by=c("Stratum", "SampleId")] + sampleTotals <- individuals[,list(totalInSample=get(".N")), by=c("SampleId")] stratumFraction <- merge(stratumTotals, sampleTotals, by="SampleId") stratumFraction$StratumFraction <- stratumFraction$totalInStratum / stratumFraction$totalInSample individuals <- merge(individuals, stratumFraction, by=c("SampleId", "Stratum")) @@ -465,7 +465,7 @@ AssignPSUSamplingParameters <- function(PSUSamplingParametersData, StoxBioticDat } if (DefinitionMethod == "MissingAtRandom"){ - responsePropensity <- PSUSamplingParametersData$SelectionTable[,list(ResponsePropensity=sum(!is.na(SamplingUnitId))/.N), by=c("Stratum")] + responsePropensity <- PSUSamplingParametersData$SelectionTable[,list(ResponsePropensity=sum(!is.na(get("SamplingUnitId")))/get(".N")), by=c("Stratum")] PSUSamplingParametersData$SampleTable$n <- PSUSamplingParametersData$SampleTable$n * responsePropensity$ResponsePropensity[match(PSUSamplingParametersData$SampleTable$Stratum, responsePropensity$Stratum)] @@ -476,7 +476,7 @@ AssignPSUSamplingParameters <- function(PSUSamplingParametersData, StoxBioticDat PSUSamplingParametersData$SelectionTable <- PSUSamplingParametersData$SelectionTable[!is.na(PSUSamplingParametersData$SelectionTable$SamplingUnitId)] #correct normalized sampling weights - weights <- PSUSamplingParametersData$SelectionTable[,list(HHsum=sum(HHsamplingWeight), HTsum=sum(HTsamplingWeight)), by=c("Stratum")] + weights <- PSUSamplingParametersData$SelectionTable[,list(HHsum=sum(get("HHsamplingWeight")), HTsum=sum(get("HTsamplingWeight"))), by=c("Stratum")] PSUSamplingParametersData$SelectionTable$HTsamplingWeight <- PSUSamplingParametersData$SelectionTable$HTsamplingWeight / weights$HTsum[match(PSUSamplingParametersData$SelectionTable$Stratum, weights$Stratum)] PSUSamplingParametersData$SelectionTable$HHsamplingWeight <- PSUSamplingParametersData$SelectionTable$HHsamplingWeight / weights$HHsum[match(PSUSamplingParametersData$SelectionTable$Stratum, weights$Stratum)] diff --git a/man/CatchLotterySamplingExample.Rd b/man/CatchLotterySamplingExample.Rd index 9d70f54c..70e67ede 100644 --- a/man/CatchLotterySamplingExample.Rd +++ b/man/CatchLotterySamplingExample.Rd @@ -21,7 +21,8 @@ The corresponding samples are provided in \code{\link[RstoxFDA]{CatchLotteryExam \examples{ #all selected PSU that where actuall sampled are provided in CatchLotteryExample sum(!is.na(CatchLotterySamplingExample$SelectionTable$SamplingUnitId)) - sum(CatchLotterySamplingExample$SelectionTable$SamplingUnitId \%in\% CatchLotteryExample$Haul$HaulKey) + sum(CatchLotterySamplingExample$SelectionTable$SamplingUnitId \%in\% + CatchLotteryExample$Haul$HaulKey) } \concept{Analytical estimation} \keyword{datasets}