From 294b1851a12b21deb10ef77084b9a977c88c2735 Mon Sep 17 00:00:00 2001 From: Phil Chalmers Date: Mon, 19 Sep 2022 11:23:28 -0400 Subject: [PATCH] default to using origina avov in gCD (but argument to switch) --- DESCRIPTION | 4 ++-- R/gCD.R | 50 ++++++++++++++++++++++++++------------- man/gCD.Rd | 12 ++++++++-- tests/testthat/test-gCD.R | 15 +++++++----- 4 files changed, 55 insertions(+), 26 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1a7511d..71c7a8b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: faoutlier -Version: 0.7.5 +Version: 0.7.6 Type: Package Title: Influential Case Detection Methods for Factor Analysis and Structural Equation Models @@ -29,4 +29,4 @@ LazyData: yes Repository: github License: GPL (>= 2) URL: https://github.com/philchalmers/faoutlier -RoxygenNote: 7.1.1 +RoxygenNote: 7.2.1 diff --git a/R/gCD.R b/R/gCD.R index d9b45bc..e0f93c4 100644 --- a/R/gCD.R +++ b/R/gCD.R @@ -14,6 +14,9 @@ #' exploratory factor analysis (requires complete dataset, i.e., no missing). #' If \code{class(model)} is a sem (semmod), or lavaan (character), #' then a confirmatory approach is performed instead +#' @param vcov_drop logical; should the variance-covariance matrix of the parameter +#' estimates be based on the unique \code{data[-i, ]} models +#' (Pek and MacCallum, 2011) or original \code{data}? #' @param progress logical; display the progress of the computations in the console? #' @author Phil Chalmers \email{rphilip.chalmers@@gmail.com} #' @seealso @@ -27,6 +30,11 @@ #' Flora, D. B., LaBrish, C. & Chalmers, R. P. (2012). Old and new ideas for data #' screening and assumption testing for exploratory and confirmatory factor analysis. #' \emph{Frontiers in Psychology, 3}, 1-21. \doi{10.3389/fpsyg.2012.00055} +#' +#' Pek, J. & MacCallum, R. C. (2011). Sensitivity Analysis in Structural +#' Equation Models: Cases and Their Influence. Multivariate Behavioral Research, +#' 46(2), 202-228. +#' #' @keywords cooks #' @export gCD #' @examples @@ -84,46 +92,46 @@ #' plot(result) #' #' mod <- mirt(dat, model) -#' res <- residuals(mod, type = 'exp') +#' res <- mirt::residuals(mod, type = 'exp') #' cbind(res, gCD=round(result$gCD, 3)) #' #' } -gCD <- function(data, model, progress = TRUE, ...) +gCD <- function(data, model, vcov_drop = FALSE, progress = TRUE, ...) { - f_numeric <- function(ind, data, model, theta){ + f_numeric <- function(ind, data, model, theta, inv_vcov, vcov_drop){ tmp1 <- cor(data[-ind,]) tmp2 <- mlfact(tmp1, model) h2 <- tmp2$par - inv_vcovmat <- tmp2$hessian + inv_vcovmat <- if(vcov_drop) tmp2$hessian else inv_vcov vcovmat <- solve(inv_vcovmat) DFBETA <- (theta - h2)/sqrt(diag(vcovmat)) gCD <- t(theta - h2) %*% inv_vcovmat %*% (theta - h2) list(dfbeta = DFBETA, gCD = gCD) } - f_sem <- function(ind, data, model, objective, theta, ...){ + f_sem <- function(ind, data, model, objective, theta, vcov, vcov_drop, ...){ tmp2 <- sem::sem(model, data=data[-ind, ], objective=objective, ...) h2 <- tmp2$coeff - vcovmat <- tmp2$vcov + vcovmat <- if(vcov_drop) tmp2$vcov else vcov inv_vcovmat <- solve(vcovmat) DFBETA <- (theta - h2)/sqrt(diag(vcovmat)) gCD <- t(theta - h2) %*% inv_vcovmat %*% (theta - h2) list(dfbeta = DFBETA, gCD = gCD) } - f_lavaan <- function(ind, data, model, theta, ...){ + f_lavaan <- function(ind, data, model, theta, vcov, vcov_drop, ...){ tmp <- lavaan::sem(model, data[-ind, ], ...) h2 <- lavaan::coef(tmp) - vcovmat <- lavaan::vcov(tmp) + vcovmat <- if(vcov_drop) lavaan::vcov(tmp) else vcov inv_vcovmat <- solve(vcovmat) DFBETA <- (theta - h2)/sqrt(diag(vcovmat)) gCD <- t(theta - h2) %*% inv_vcovmat %*% (theta - h2) list(dfbeta = DFBETA, gCD = gCD) } - f_mirt <- function(ind, data, large, model, theta, sv, ...){ + f_mirt <- function(ind, data, large, model, theta, sv, vcov, vcov_drop, ...){ large$Freq[[1L]][ind] <- large$Freq[[1L]][ind] - 1L - tmp <- mirt::mirt(data, model, large=large, SE=TRUE, + tmp <- mirt::mirt(data, model, large=large, SE=vcov_drop, pars=sv, verbose=FALSE, ...) h2 <- mirt::extract.mirt(tmp, 'parvec') - vcovmat <- mirt::vcov(tmp) + vcovmat <- if(vcov_drop) mirt::vcov(tmp) else vcov inv_vcovmat <- solve(vcovmat) DFBETA <- (theta - h2)/sqrt(diag(vcovmat)) gCD <- t(theta - h2) %*% inv_vcovmat %*% (theta - h2) @@ -132,34 +140,44 @@ gCD <- function(data, model, progress = TRUE, ...) N <- nrow(data) index <- as.list(1:N) + inv_vcov <- NULL if(is.numeric(model)){ if(any(is.na(data))) stop('Numeric model requires complete dataset (no NA\'s)') mod <- mlfact(cor(data), model) theta <- mod$par + if(!vcov_drop) inv_vcov <- mod$hessian tmp <- myLapply(index, FUN=f_numeric, progress=progress, - theta=theta, model=model, data=data) + theta=theta, model=model, data=data, + inv_vcov=inv_vcov, vcov_drop=vcov_drop) } else if(class(model) == "semmod"){ objective <- if(any(is.na(data))) sem::objectiveFIML else sem::objectiveML mod <- sem::sem(model, data=data, objective=objective, ...) theta <- mod$coeff + if(!vcov_drop) vcov <- mod$vcov tmp <- myLapply(index, FUN=f_sem, progress=progress, theta=theta, model=model, data=data, - objective=objective, ...) + objective=objective, vcov=vcov, + vcov_drop=vcov_drop, ...) } else if(class(model) == "character"){ mod <- lavaan::sem(model, data=data, ...) theta <- lavaan::coef(mod) + if(!vcov_drop) vcov <- lavaan::vcov(mod) tmp <- myLapply(index, FUN=f_lavaan, progress=progress, - theta=theta, model=model, data=data, ...) + theta=theta, model=model, data=data, vcov=vcov, + vcov_drop=vcov_drop, ...) } else if(class(model) == "mirt.model"){ large <- mirt::mirt(data=data, model=model, large = 'return') index <- matrix(1L:length(large$Freq[[1L]])) - mod <- mirt::mirt(data=data, model=model, large=large, verbose=FALSE, ...) + mod <- mirt::mirt(data=data, model=model, large=large, verbose=FALSE, + SE=!vcov_drop, ...) theta <- mirt::extract.mirt(mod, 'parvec') sv <- mirt::mod2values(mod) + if(!vcov_drop) vcov <- mirt::vcov(mod) tmp <- myLapply(index, FUN=f_mirt, progress=progress, theta=theta, model=model, data=data, - large=large, sv=sv, ...) + large=large, sv=sv, + vcov=vcov, vcov_drop=vcov_drop, ...) } else stop('model class not supported') gCD <- lapply(tmp, function(x) x$gCD) diff --git a/man/gCD.Rd b/man/gCD.Rd index 82a95c7..86ff324 100644 --- a/man/gCD.Rd +++ b/man/gCD.Rd @@ -6,7 +6,7 @@ \alias{plot.gCD} \title{Generalized Cook's Distance} \usage{ -gCD(data, model, progress = TRUE, ...) +gCD(data, model, vcov_drop = FALSE, progress = TRUE, ...) \method{print}{gCD}(x, ncases = 10, DFBETAS = FALSE, ...) @@ -27,6 +27,10 @@ exploratory factor analysis (requires complete dataset, i.e., no missing). If \code{class(model)} is a sem (semmod), or lavaan (character), then a confirmatory approach is performed instead} +\item{vcov_drop}{logical; should the variance-covariance matrix of the parameter +estimates be based on the unique \code{data[-i, ]} models +(Pek and MacCallum, 2011) or original \code{data}?} + \item{progress}{logical; display the progress of the computations in the console?} \item{...}{additional parameters to be passed} @@ -110,7 +114,7 @@ result <- gCD(dat, model) plot(result) mod <- mirt(dat, model) -res <- residuals(mod, type = 'exp') +res <- mirt::residuals(mod, type = 'exp') cbind(res, gCD=round(result$gCD, 3)) } @@ -123,6 +127,10 @@ Chalmers, R. P. & Flora, D. B. (2015). faoutlier: An R Package for Detecting Flora, D. B., LaBrish, C. & Chalmers, R. P. (2012). Old and new ideas for data screening and assumption testing for exploratory and confirmatory factor analysis. \emph{Frontiers in Psychology, 3}, 1-21. \doi{10.3389/fpsyg.2012.00055} + +Pek, J. & MacCallum, R. C. (2011). Sensitivity Analysis in Structural + Equation Models: Cases and Their Influence. Multivariate Behavioral Research, + 46(2), 202-228. } \seealso{ \code{\link{LD}}, \code{\link{obs.resid}}, \code{\link{robustMD}}, \code{\link{setCluster}} diff --git a/tests/testthat/test-gCD.R b/tests/testthat/test-gCD.R index dbbf8c6..ed9cc5c 100644 --- a/tests/testthat/test-gCD.R +++ b/tests/testthat/test-gCD.R @@ -4,18 +4,20 @@ test_that('gCD run', { #Exploratory nfact <- 3 - gCDresult <- gCD(holzinger, nfact, progress = FALSE) + gCDresult <- gCD(holzinger, nfact, progress = FALSE, vcov_drop = TRUE) gCDresult.outlier <- gCD(holzinger.outlier, nfact, progress = FALSE) expect_is(gCDresult, 'gCD') expect_is(plot(gCDresult), 'trellis') expect_is(gCDresult.outlier, 'gCD') expect_is(plot(gCDresult.outlier), 'trellis') expect_equal(as.numeric(gCDresult$gCD[1:3]), c(0.0020993686, 0.0008613305, 0.0013162123), tolerance=1e-5) + gCDresult <- gCD(holzinger, nfact, progress = FALSE) + expect_equal(as.numeric(gCDresult$gCD[1:3]), c(0.0022232819, 0.0008943396, 0.0012875196), tolerance=1e-5) #------------------------------------------------------------------- suppressMessages(model <- sem::specifyModel(file='sem-model/sem-model.txt', quiet=TRUE)) - gCDresult <- suppressWarnings(gCD(holzinger, model, progress = FALSE)) - gCDresult.outlier <- suppressWarnings(gCD(holzinger.outlier, model, progress = FALSE)) + gCDresult <- suppressWarnings(gCD(holzinger, model, progress = FALSE, vcov_drop = TRUE)) + gCDresult.outlier <- suppressWarnings(gCD(holzinger.outlier, model, progress = FALSE, , vcov_drop = TRUE)) expect_is(gCDresult, 'gCD') expect_is(plot(gCDresult), 'trellis') expect_is(gCDresult.outlier, 'gCD') @@ -28,8 +30,8 @@ test_that('gCD run', { F2 =~ MissNum + MxdArit + OddWrds F3 =~ Boots + Gloves + Hatchts' - gCDresult <- gCD(holzinger, model, orthogonal=TRUE, progress = FALSE) - gCDresult.outlier <- gCD(holzinger.outlier, model, orthogonal=TRUE, progress = FALSE) + gCDresult <- gCD(holzinger, model, orthogonal=TRUE, progress = FALSE, vcov_drop = TRUE) + gCDresult.outlier <- gCD(holzinger.outlier, model, orthogonal=TRUE, progress = FALSE, vcov_drop = TRUE) expect_equal(as.numeric(gCDresult.outlier$gCD[1:3]), c(36.67371343, 0.05034515, 0.34999495), tolerance = 1e-5) expect_is(gCDresult, 'gCD') @@ -50,7 +52,8 @@ test_that('gCD categorical', { F2 =~ MissNum + MxdArit + OddWrds F3 =~ Boots + Gloves + Hatchts' - gCDresult <- suppressWarnings(gCD(dat, model, orthogonal=TRUE, progress = FALSE, ordered=colnames(dat))) + gCDresult <- suppressWarnings(gCD(dat, model, orthogonal=TRUE, progress = FALSE, + vcov_drop = TRUE, ordered=colnames(dat))) expect_is(gCDresult, 'gCD') expect_equal(as.numeric(head(gCDresult$gCD)), c(0.45389878, 0.11746119, 0.15885323, 0.07169155, 0.30643962, 0.15728745), tolerance = 1e-2)