-
Notifications
You must be signed in to change notification settings - Fork 5
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Some more new functions and minor changes #5
base: master
Are you sure you want to change the base?
Changes from all commits
0179daa
351e336
2bdcebb
08e1453
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should have @Seealso referencing findSpikesStdevWindow |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
#' Deletes spikes using rolling window stdev filtering | ||
#' | ||
#' @param obs Required. A \pkg{CRHMr} obs data frame. | ||
#' @param colnum Optional. The number of the column to test for spikes, not | ||
#' including the \code{datetime}. | ||
#' @param min_frac_records Optional. The fraction of records required in a | ||
#' window for a sucessful computation, otherwise the current value is flagged. | ||
#' Default is 0.8. | ||
#' @param lead_window Optional. A list of values corresponding to the offset | ||
#' from the current value. Default is 1:10. | ||
#' @param lag_window Optional. A list of values corresponding to the offset from | ||
#' the current value. Default is 1:10. | ||
#' @param number_sd Optional. The number of standard deviations away from the | ||
#' mean required for the current value to be flagged. | ||
#' @param logfile Optional. Name of the file to be used for logging the action. | ||
#' Normally not used. | ||
#' | ||
#' @return If successful, returns a data frame consisting of the datetime and the original obs values, where | ||
#' all of the spike values have been set to be \code{NA_real_}. If no spikes are found a message is | ||
#' printed and the function returns the value \code{FALSE}. | ||
#' @export | ||
#' | ||
#' @examples | ||
#' deleteSpikesStdevWindow(BadLake7376, 1, min_frac_records = 0.5) | ||
deleteSpikesStdevWindow <- | ||
function(obs, | ||
colnum = 1, | ||
min_frac_records = .8, | ||
lead_window = list(1:10), | ||
lag_window = list(-1:-10), | ||
number_sd = 10, | ||
logfile = ""){ | ||
|
||
if (nrow(obs) == 0) { | ||
stop("Missing obs values") | ||
} | ||
obsName <- deparse(substitute(obs)) | ||
|
||
if (any(is.na(obs[, colnum + 1]))) { | ||
stop("Missing values. Remove before searching for spikes") | ||
} | ||
|
||
if (number_sd == 0) { | ||
stop("sd is <= 0. Set before searching for spikes") | ||
} | ||
|
||
spikeDatetimes <- findSpikesStdevWindow(obs, | ||
colnum, | ||
min_frac_records, | ||
lead_window, | ||
lag_window, | ||
number_sd) | ||
|
||
if (length(spikeDatetimes) == 1) { | ||
if (spikeDatetimes == 0) { | ||
cat("No spikes found\n") | ||
return(FALSE) | ||
} | ||
if (spikeDatetimes == FALSE) { | ||
stop("Error in finding spikes") | ||
} | ||
} | ||
|
||
spikeLocs <- match(spikeDatetimes, obs$datetime) | ||
obs[spikeLocs, colnum + 1] <- NA_real_ | ||
|
||
# output to logfile | ||
outputMessage <- paste(length(spikeDatetimes), " values set to NA_real_") | ||
comment <- paste("findSpikesStdevWindow obs:", obsName, outputMessage, sep = "") | ||
result <- logAction(comment, logfile) | ||
|
||
if (!result) { | ||
return(result) | ||
} else { | ||
return(obs) | ||
} | ||
|
||
} |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @Seealso should also reference deleteSpikesStdevWindow |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
#' Finds spikes using rolling window stdev filtering | ||
#' | ||
#' @description Finds spikes using a rolling window with a width defined as a | ||
#' lead and lag offset from the current value. Currently is set up to define | ||
#' spikes within the window given a number of standard deviations away from | ||
#' the mean. NOTE: the first few and last few values will automatically be | ||
#' removed due to the stdev function requiring a minimum of 3 vals. | ||
#' | ||
#' @param obs Required. A \pkg{CRHMr} obs data frame. | ||
#' @param colnum Optional. The number of the column to test for spikes, not | ||
#' including the \code{datetime}. | ||
#' @param min_frac_records Optional. The fraction of records required in a | ||
#' window for a sucessful computation, otherwise the current value is flagged. | ||
#' Default is 0.8. | ||
#' @param lead_window Optional. A list of values corresponding to the offset | ||
#' from the current value. Default is 1:10. | ||
#' @param lag_window Optional. A list of values corresponding to the offset from | ||
#' the current value. Default is 1:10. | ||
#' @param number_sd Optional. The number of standard deviations away from the | ||
#' mean required for the current value to be flagged. | ||
#' @param logfile Optional. Name of the file to be used for logging the action. | ||
#' Normally not used. | ||
#' | ||
#' @return If successful and there are no spikes, returns \code{0}. If there are | ||
#' spikes, returns their \code{datetime} values. If unsuccessful returns | ||
#' \code{FALSE}. | ||
#' @author Alex Cebulski | ||
#' @seealso \code{\link{deleteSpikes}} \code{\link{findGaps}} | ||
#' \code{\link{findDupes}} | ||
#' @export | ||
#' | ||
#' @examples | ||
#' findSpikesStdevWindow(BadLake7376, 1, min_frac_records = 0.5) | ||
findSpikesStdevWindow <- | ||
function(obs, | ||
colnum = 1, | ||
min_frac_records = .8, | ||
lead_window = list(1:10), | ||
lag_window = list(-1:-10), | ||
number_sd = 10, | ||
logfile = "" | ||
){ | ||
|
||
if (nrow(obs) == 0) { | ||
stop("Error: missing obs values") | ||
} | ||
|
||
obsName <- deparse(substitute(obs)) | ||
|
||
if (any(is.na(obs[, colnum + 1]))) { | ||
stop("Missing values. Remove before searching for spikes") | ||
} | ||
|
||
if (number_sd == 0) { | ||
stop("sd is <= 0. Set before searching for spikes") | ||
} | ||
|
||
datetime_col <- 1 | ||
var_col <- colnum + 1 # raw colnum does not include datetime | ||
select_cols <- c(datetime_col, var_col) | ||
|
||
min_records_window <- max(lead_window[[1]])*min_frac_records | ||
|
||
obs_fltr <- obs[,select_cols] | ||
|
||
# count the number of records in the leading window | ||
obs_fltr$lead_count <- zoo::rollapply(obs_fltr[,var_col], | ||
width=lead_window, | ||
FUN = function(x) sum(!is.na(x)), | ||
fill=NA, | ||
partial = T) | ||
|
||
obs_fltr$lag_count <- zoo::rollapply(obs_fltr[,var_col], | ||
width=lag_window, | ||
FUN = function(x) sum(!is.na(x)), | ||
fill=NA, | ||
partial = T) | ||
|
||
obs_fltr$lead_count_filter <- | ||
ifelse(obs_fltr$lead_count<min_records_window|is.na(obs_fltr$lead_count), 0, 1) | ||
obs_fltr$lag_count_filter <- | ||
ifelse(obs_fltr$lag_count<min_records_window|is.na(obs_fltr$lag_count), 0, 1) | ||
|
||
obs_fltr$lead_mean <- zoo::rollapply(obs_fltr[,var_col], | ||
width=lead_window, | ||
FUN = mean, | ||
partial = T, | ||
na.rm = T, | ||
fill=NA) | ||
|
||
obs_fltr$lag_mean <- zoo::rollapply(obs_fltr[,var_col], | ||
width=lag_window, | ||
FUN = mean, | ||
partial = T, | ||
na.rm = T, | ||
fill=NA) | ||
|
||
obs_fltr$lead_sd <- zoo::rollapply(obs_fltr[,var_col], | ||
width=lead_window, | ||
FUN = sd, | ||
partial = T, | ||
na.rm = T, | ||
fill=NA) | ||
|
||
obs_fltr$lag_sd <- zoo::rollapply(obs_fltr[,var_col], | ||
width=lag_window, | ||
FUN = sd, | ||
partial = T, | ||
na.rm = T, | ||
fill=NA) | ||
|
||
obs_fltr$lead_fltr_max <- | ||
obs_fltr$lead_mean + (number_sd * obs_fltr$lead_sd) | ||
obs_fltr$lead_fltr_min <- | ||
obs_fltr$lead_mean - (number_sd * obs_fltr$lead_sd) | ||
obs_fltr$lag_fltr_max <- | ||
obs_fltr$lag_mean + (number_sd * obs_fltr$lag_sd) | ||
obs_fltr$lag_fltr_min <- | ||
obs_fltr$lag_mean - (number_sd * obs_fltr$lag_sd) | ||
|
||
obs_fltr$sd_filter <- T | ||
|
||
# set sd_filter to FALSE for those that pass our test | ||
obs_fltr$sd_filter[(obs_fltr$lead_count_filter == 1 & | ||
obs_fltr[,var_col] < obs_fltr$lead_fltr_max & | ||
obs_fltr[,var_col] > obs_fltr$lead_fltr_min) & | ||
(obs_fltr$lag_count_filter == 1 & | ||
obs_fltr[,var_col] < obs_fltr$lag_fltr_max & | ||
obs_fltr[,var_col] > obs_fltr$lag_fltr_min)] <- F | ||
|
||
locs <- obs_fltr[obs_fltr$sd_filter, 1] | ||
numSpikes <- sum(obs_fltr$sd_filter) | ||
|
||
if (numSpikes == 0) { | ||
outputMessage <- " No spikes found" | ||
returnvalue <- 0 | ||
} | ||
else { | ||
outputMessage <- paste(" ", numSpikes, " spikes found with standard deviations ", | ||
number_sd, " above/below the mean using the rolling window method ", | ||
sep = "") | ||
returnvalue <- locs | ||
} | ||
|
||
# output to logfile | ||
comment <- paste("findStdevSpikeWindow obs:", obsName, outputMessage, sep = "") | ||
result <- logAction(comment, logfile) | ||
if (!result) { | ||
return(result) | ||
} else { | ||
return(returnvalue) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
#' Helper function to plot identified flags | ||
#' | ||
#' @description This function takes a vector of datetimes identified by other | ||
#' CRHMr qaqc functions and illustrates these on top of the original data | ||
#' using GGPLOT. It is also easy to pass this ggplot output onto plotly for | ||
#' interactive viewing by running plotly::ggplotly() after this function. | ||
#' | ||
#' @param obs Required. A \pkg{CRHMr} obs data frame. | ||
#' @param datetime_flags Required. A vector of datetimes in POSIXct format, for | ||
#' example the direct output of the findSpikes() function. | ||
#' @param colnum Optional. The number of the column to test for spikes, not | ||
#' including the \code{datetime}. | ||
#' @param gg_flag_shape Optional. An integer representing the ggplot shape type | ||
#' for the flagged values. | ||
#' @param gg_flag_colour Optional. A string representing the ggplot colour | ||
#' for the flagged values. | ||
#' @param gg_flag_size Optional. An integer representing the ggplot shape size | ||
#' for the flagged values. | ||
#' | ||
#' @return a ggplot object. | ||
#' @author Alex Cebulski | ||
#' @seealso \code{\link{findSpikes}} \code{\link{findGaps}} | ||
#' \code{\link{findFlatlines}} \code{\link{findSpikesStdevWindow}} | ||
#' @export | ||
#' | ||
#' @examples | ||
#' plotFlags(BadLake7376, c(as.POSIXct("1974-09-11 12:00:00"))) | ||
plotFlags <- | ||
function(obs, | ||
datetime_flags, | ||
colnum = 1, | ||
gg_flag_shape = 4, | ||
gg_flag_colour = 'red', | ||
gg_flag_size = 1) { | ||
if (colnames(obs)[1] != 'datetime') { | ||
stop("First column of the obs file must be datetime.") | ||
} | ||
|
||
if (!"POSIXct" %in% class(datetime_flags)) { | ||
stop("datetime_flags must be of type POSIXct.") | ||
} | ||
|
||
obs_fltr <- obs[obs$datetime %in% datetime_flags, ] | ||
|
||
gg_ylab <- colnames(obs)[colnum + 1] | ||
|
||
ggplot2::ggplot(obs, aes(datetime, obs[, colnum + 1])) + | ||
geom_line() + | ||
geom_point( | ||
data = obs_fltr, | ||
aes(x = datetime, y = obs_fltr[, colnum + 1]), | ||
shape = gg_flag_shape, | ||
colour = gg_flag_colour | ||
) + | ||
ggplot2::ylab(gg_ylab) | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for fixing the typos