From bb5dfc1585147465beb01eb0f8393f042214369a Mon Sep 17 00:00:00 2001 From: "georgina.anderson" Date: Tue, 23 Jan 2024 16:10:17 +0000 Subject: [PATCH] Further changes to phe_sii documentation. Added note about references to PHE in package and function names. --- R/SII_function.R | 38 ++++++----- README.Rmd | 4 +- README.md | 15 +++-- man/phe_sii.Rd | 172 ++++++++++++++++++++++++++--------------------- 4 files changed, 132 insertions(+), 97 deletions(-) diff --git a/R/SII_function.R b/R/SII_function.R index 34e6fe4..c84a5df 100644 --- a/R/SII_function.R +++ b/R/SII_function.R @@ -49,23 +49,25 @@ #' method. #' #' Use in conjunction with the \code{transform} parameter in calculation of the -#' SII: It is recommended that rates and proportions, and their confidence -#' limits, are transformed prior to calculation of the SII by setting the -#' \code{transform} parameter to TRUE for these indicator types. This will -#' perform a log transformation for rates, or logit for proportions, and return -#' outputs transformed back to the original units of the indicator. These -#' transformations are recommended to improve the linearity between the -#' indicator values and the quantile, which is an assumption of the method. A -#' user-provided standard error will not be accepted when the \code{transform] -#' parameter is set to TRUE as the confidence limits are required for this -#' transformation. +#' SII: It is recommended that rates and proportions are transformed prior to +#' calculation of the SII by setting the \code{transform} parameter to TRUE for +#' these indicator types. This will perform a log transformation for rates, or +#' logit for proportions, and return outputs transformed back to the original +#' units of the indicator. These transformations are recommended to improve the +#' linearity between the indicator values and the quantile, which is an +#' assumption of the method. A user-provided standard error will not be accepted +#' when the \code{transform} parameter is set to TRUE as the confidence limits +#' are required for this transformation. #' -#' Use in calculation of the standard error: Rates and proportions, -#' and their confidence limits, are transformed prior to calculation of the -#' standard error. This is because it is assumed that the confidence interval +#' Use in calculation of the standard error: Rates and proportions, and their +#' confidence limits, are transformed prior to calculation of the standard error +#' for each quantile. This is because it is assumed that the confidence interval #' around the indicator value is non-symmetric for these indicator types. Note #' that this transformation is not controlled by the \code{transform} parameter -#' and is applied based on the value of the \code{value_type} parameter only. +#' and is applied based on the value of the \code{value_type} parameter only. A +#' user-provided standard error will not be accepted when the \code{transform} +#' parameter is set to TRUE as the confidence limits are required for this +#' transformation. #' #' @section Warning: #' @@ -92,7 +94,7 @@ #' does not need to be supplied for proportions if count and population are #' given); unquoted string; no default #' @param value_type indicates the indicator type (1 = rate, 2 = proportion, 0 = -#' other). The `value_type` argument is used to determine whether data should +#' other). The \code{value_type} argument is used to determine whether data should #' be transformed prior to calculation of the standard error and/or SII. See #' the \code{Tansformations} section for full details; integer; default 0 #' @param transform option to transform input rates or proportions prior to @@ -196,10 +198,11 @@ #' decile, #' population, #' value_type = 1, +#' transform = TRUE, #' value = value, -#' se = StandardError, +#' lower_cl = lowerCL, +#' upper_cl = upperCL, #' confidence = c(0.95, 0.998), -#' transform = TRUE, #' repetitions = 10000, #' rii = TRUE, #' type = "standard") @@ -210,6 +213,7 @@ #' the inputted data.frame. #' #' @family PHEindicatormethods package functions +# ------------------------------------------------------------------------------------------------- phe_sii <- function(data, quantile, population, # compulsory fields x = NULL, # optional fields diff --git a/README.Rmd b/README.Rmd index 84b36d2..cf4dd09 100644 --- a/README.Rmd +++ b/README.Rmd @@ -18,7 +18,9 @@ knitr::opts_chunk$set( # PHEindicatormethods -This is an R package to support analysts in the execution of statistical methods approved for use in the production of PHE indicators such as those presented via Fingertips. It provides functions for the generation of Proportions, Rates, DSRs, ISRs, Means, Life Expectancy and Slope Index of Inequality including confidence intervals for these statistics, and a function for assigning data to quantiles. +This is an R package to support analysts in the execution of statistical methods approved for use in the production of PHE indicators such as those presented via [Fingertips](https://fingertips.phe.org.uk/). It provides functions for the generation of Proportions, Rates, DSRs, ISRs, Means, Life Expectancy and Slope Index of Inequality including confidence intervals for these statistics, and a function for assigning data to quantiles. + +In October 2021 Public Health England (PHE) was disbanded and as a result this package is now owned by the Department of Health and Social Care. It will continue to be supported and to prevent breaking changes there are currently no immediate plans to rename the package or its functions in light of this organisational change. Any feedback would be appreciated and can be provided using the Issues section of the [PHEindicatormethods GitHub repository](https://github.com/ukhsa-collaboration/PHEindicatormethods). diff --git a/README.md b/README.md index 2904234..e32e600 100644 --- a/README.md +++ b/README.md @@ -8,10 +8,17 @@ This is an R package to support analysts in the execution of statistical methods approved for use in the production of PHE indicators such as -those presented via Fingertips. It provides functions for the generation -of Proportions, Rates, DSRs, ISRs, Means, Life Expectancy and Slope -Index of Inequality including confidence intervals for these statistics, -and a function for assigning data to quantiles. +those presented via [Fingertips](https://fingertips.phe.org.uk/). It +provides functions for the generation of Proportions, Rates, DSRs, ISRs, +Means, Life Expectancy and Slope Index of Inequality including +confidence intervals for these statistics, and a function for assigning +data to quantiles. + +In October 2021 Public Health England (PHE) was disbanded and as a +result this package is now owned by the Department of Health and Social +Care. It will continue to be supported and to prevent breaking changes +there are currently no immediate plans to rename the package or its +functions in light of this organisational change. Any feedback would be appreciated and can be provided using the Issues section of the [PHEindicatormethods GitHub diff --git a/man/phe_sii.Rd b/man/phe_sii.Rd index bbed5c8..cf4127a 100644 --- a/man/phe_sii.Rd +++ b/man/phe_sii.Rd @@ -11,6 +11,7 @@ phe_sii( x = NULL, value = NULL, value_type = 0, + transform = FALSE, lower_cl = NULL, upper_cl = NULL, se = NULL, @@ -19,74 +20,87 @@ phe_sii( confidence = 0.95, rii = FALSE, intercept = FALSE, - transform = FALSE, reliability_stat = FALSE, type = "full" ) } \arguments{ -\item{data}{data.frame containing the required input fields, pre-grouped if an SII is required for -each subgroup; unquoted string; no default} - -\item{quantile}{field name within data that contains the quantile label (e.g. decile). The number -of quantiles should be between 5 and 100; unquoted string; no default} - -\item{population}{field name within data that contains the quantile populations (ie, denominator). -Non-zero populations are required for all quantiles to calculate SII for an area; +\item{data}{data.frame containing the required input fields, pre-grouped if +an SII is required for each subgroup; unquoted string; no default} + +\item{quantile}{field name within data that contains the quantile label (e.g. +decile). The number of quantiles should be between 5 and 100; unquoted +string; no default} + +\item{population}{field name within data that contains the quantile +populations (ie, denominator). Non-zero populations are required for all +quantiles to calculate SII for an area; unquoted string; no default} + +\item{x}{(for indicators that are proportions) field name within data that +contains the members of the population with the attribute of interest (ie, +numerator). This will be divided by population to calculate a proportion as +the indicator value (if value field is not provided); unquoted string; no +default} + +\item{value}{field name within data that contains the indicator value (this +does not need to be supplied for proportions if count and population are +given); unquoted string; no default} + +\item{value_type}{indicates the indicator type (1 = rate, 2 = proportion, 0 = +other). The \code{value_type} argument is used to determine whether data should +be transformed prior to calculation of the standard error and/or SII. See +the \code{Tansformations} section for full details; integer; default 0} + +\item{transform}{option to transform input rates or proportions prior to +calculation of the SII. See the \code{Transformations} section for full +details; logical; default FALSE} + +\item{lower_cl}{field name within data that contains 95 percent lower +confidence limit of indicator value (to calculate standard error of +indicator value). This field is needed if the se field is not supplied; unquoted string; no default} -\item{x}{(for indicators that are proportions) field name within data that contains -the members of the population with the attribute of interest (ie, numerator). This will be -divided by population to calculate a proportion as the indicator value -(if value field is not provided); unquoted string; no default} - -\item{value}{field name within data that contains the indicator value (this does not need to be supplied -for proportions if count and population are given); unquoted string; no default} - -\item{value_type}{indicates the indicator type (1 = rate, 2 = proportion, 0 = other); -integer; default 0} - -\item{lower_cl}{field name within data that contains 95 percent lower confidence limit -of indicator value (to calculate standard error of indicator value). This field is needed -if the se field is not supplied; unquoted string; no default} +\item{upper_cl}{field name within data that contains 95 percent upper +confidence limit of indicator value (to calculate standard error of +indicator value). This field is needed if the se field is not supplied; +unquoted string; no default} -\item{upper_cl}{field name within data that contains 95 percent upper confidence limit -of indicator value (to calculate standard error of indicator value). This field is needed -if the se field is not supplied; unquoted string; no default} +\item{se}{field name within data that contains the standard error of the +indicator value. If not supplied, this will be calculated from the 95 +percent lower and upper confidence limits (i.e. one or the other of these +fields must be supplied); unquoted string; no default} -\item{se}{field name within data that contains the standard error of the indicator -value. If not supplied, this will be calculated from the 95 percent lower and upper confidence -limits (i.e. one or the other of these fields must be supplied); unquoted string; no default} +\item{multiplier}{factor to multiply the SII and SII confidence limits by +(e.g. set to 100 to return prevalences on a percentage scale between 0 and +100). If the multiplier is negative, the inverse of the RII is taken to +account for the change in polarity; numeric; default 1;} -\item{multiplier}{factor to multiply the SII and SII confidence limits by (e.g. set to 100 to return -prevalences on a percentage scale between 0 and 100). If the multiplier is negative, the -inverse of the RII is taken to account for the change in polarity; numeric; default 1;} +\item{repetitions}{number of random samples to perform to return confidence +interval of SII (and RII). Minimum is 1000, no maximum (though the more +repetitions, the longer the run time); numeric; default 100,000} -\item{repetitions}{number of random samples to perform to return confidence interval of SII (and RII). -Minimum is 1000, no maximum (though the more repetitions, the longer the run time); -numeric; default 100,000} +\item{confidence}{confidence level used to calculate the lower and upper +confidence limits of SII, expressed as a number between 0.9 and 1, or 90 +and 100. It can be a vector of 0.95 and 0.998, for example, to output both +95 percent and 99.8 percent CIs; numeric; default 0.95} -\item{confidence}{confidence level used to calculate the lower and upper confidence limits of SII, -expressed as a number between 0.9 and 1, or 90 and 100. It can be a vector of 0.95 and 0.998, -for example, to output both 95 percent and 99.8 percent CIs; numeric; default 0.95} +\item{rii}{option to return the Relative Index of Inequality (RII) with +associated confidence limits as well as the SII; logical; default FALSE} -\item{rii}{option to return the Relative Index of Inequality (RII) with associated confidence limits -as well as the SII; logical; default FALSE} +\item{intercept}{option to return the intercept value of the regression line +(y value where x=0); logical; default FALSE} -\item{intercept}{option to return the intercept value of the regression line (y value where x=0); +\item{reliability_stat}{option to carry out the SII confidence interval +simulation 10 times instead of once and return the Mean Average Difference +between the first and subsequent samples (as a measure of the amount of +variation). Warning: this will significantly increase run time of the +function and should first be tested on a small number of repetitions; logical; default FALSE} -\item{transform}{option to transform input data prior to calculation of the SII, where there is not -a linear relationship between the indicator values and the quantile; logical; default FALSE} - -\item{reliability_stat}{option to carry out the SII confidence interval simulation 10 times instead -of once and return the Mean Average Difference between the first and subsequent samples (as a -measure of the amount of variation). Warning: this will significantly increase run time of the -function and should first be tested on a small number of repetitions; logical; default FALSE} - -\item{type}{"full" output includes columns in the output dataset specifying the parameters the user -has input to the function (value_type, multiplier, CI_confidence, CI_method); character string -either "full" or "standard"; default "full"} +\item{type}{"full" output includes columns in the output dataset specifying +the parameters the user has input to the function (value_type, multiplier, +CI_confidence, CI_method); character string either "full" or "standard"; +default "full"} } \value{ The SII with lower and upper confidence limits for each subgroup of @@ -134,20 +148,32 @@ of the fitted value at \code{x=1,Y1} and the fitted value at \code{x=0,Y0}. which can be calculated as: \code{RII = (Y0 + SII)/Y0} } -\section{Function arguments}{ - - -The indicator type can be specified via the \code{value_type} parameter. It -is recommended that rate and proportion indicators are transformed for -calculation of the SII using the \code{transform} parameter set to TRUE. This -will perform a log transformation for rates, and logit for proportions, and -return outputs transformed back to the original units of the indicator. -Where the \code{transform} parameter is set to FALSE transformations are -applied to the indicator value and its confidence limits before calculating -the standard error in cases where the confidence interval around the indicator -value is likely to be non-symmetric. If the standard error is supplied directly -to the function from the input dataset, this is used instead of calculating -one from the indicator confidence limits. +\section{Transformations}{ + + +The indicator type can be specified as 1 (rate), 2 (proportion) or 0 (other), +using the \code{value_type} parameter. This setting determines the data +transformations that will be applied in the following two parts of the +method. + +Use in conjunction with the \code{transform} parameter in calculation of the +SII: It is recommended that rates and proportions, and their confidence +limits, are transformed prior to calculation of the SII by setting the +\code{transform} parameter to TRUE for these indicator types. This will +perform a log transformation for rates, or logit for proportions, and return +outputs transformed back to the original units of the indicator. These +transformations are recommended to improve the linearity between the +indicator values and the quantile, which is an assumption of the method. A +user-provided standard error will not be accepted when the \code{transform} +parameter is set to TRUE as the confidence limits are required for this +transformation. + +Use in calculation of the standard error: Rates and proportions, +and their confidence limits, are transformed prior to calculation of the +standard error. This is because it is assumed that the confidence interval +around the indicator value is non-symmetric for these indicator types. Note +that this transformation is not controlled by the \code{transform} parameter +and is applied based on the value of the \code{value_type} parameter only. } \section{Warning}{ @@ -160,12 +186,6 @@ function does not include checks for linearity or stability; it is the user's responsibility to ensure the input data is suitable for the SII calculation. } -\section{Notes}{ - - -This function is using nest and unnest functions from tidyr version 1.0.0. -} - \examples{ library(dplyr) @@ -208,13 +228,15 @@ phe_sii(group_by(data, area), rii = TRUE, type = "standard") -# multiple confidence intervals +# multiple confidence intervals, log transforming the data if they are rates phe_sii(group_by(data, area), decile, population, - value_type = 0, + value_type = 1, + transform = TRUE, value = value, - se = StandardError, + lower_cl = lowerCL, + upper_cl = upperCL, confidence = c(0.95, 0.998), repetitions = 10000, rii = TRUE,