From 07a71648a9c5f689ba0a5a86be513a2adf7a5125 Mon Sep 17 00:00:00 2001 From: Ilja Kocken Date: Fri, 23 Jun 2023 01:38:19 -1000 Subject: [PATCH] move functions to their own file --- .gitignore | 2 + README.org | 15 +- clumped-bootstrap-calibration.org | 277 +----------------------------- functions.org | 272 +++++++++++++++++++++++++++++ 4 files changed, 283 insertions(+), 283 deletions(-) create mode 100644 .gitignore create mode 100644 functions.org diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e5274f0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/.Rhistory +/.Rbuildignore diff --git a/README.org b/README.org index 185fafb..0ed5ad4 100644 --- a/README.org +++ b/README.org @@ -9,13 +9,14 @@ This document describes how to calculate bootstrapped averages and apply a clump - Apply the bootstrapped calibration to calculate temperature and d18Osw. To use this: -0. Install the package ~clumpedcalib~, see below. -1. Update the input calibration data [[file:dat/example_calib.csv]]. Make sure the +1. Install this package ~clumpedcalib~, see [[#installation]]. +2. Update the input calibration data [[file:dat/example_calib.csv]]. Make sure the column names include ~X~, ~D47~, ~sd_X~, ~sd_D47~ -2. Change [[file:clumped-bootstrap-calibration.org::*d18Osw_calc][the function that calculates d18Osw]] to something that you like. -3. Change [[file:clumped-bootstrap-calibration.org::*filter_outliers][filter outliers]] so that it filters out bad measurements for your data. -4. Add your samples at the replicate level and update [[file:clumped-bootstrap-calibration.org::*load sample data][load sample data]] -5. Walk through [[file:clumped-bootstrap-calibration.org::*apply the functions][apply the functions]] step-by-step. +3. Overwrite [[file:functions.org#d18Osw_calc][the function that calculates d18Osw]] to something that you like. +4. Overwrite [[file:functions.org#filter_outliers][filter outliers]] so that it filters out bad measurements for your data. +5. Add your samples at the replicate level and update [[file:clumped-bootstrap-calibration.org#load-sample-data][load sample data]] +6. Walk through [[file:clumped-bootstrap-calibration.org][the example workflow]] step-by-step. +7. Have a look at [[file:functions.org][the functions]] to better understand what's happening. The bootstrapped calibration idea is based on a Matlab script written by Alvaro Fernandez. @@ -27,4 +28,4 @@ names, but it should work!): remotes::install_github("japhir/clumpedcalib") #+end_src -Or you can look at all the [[file:clumped-bootstrap-calibration.org::*functions][functions]]. +Or you can look at all the [[file:functions.org][functions]]. diff --git a/clumped-bootstrap-calibration.org b/clumped-bootstrap-calibration.org index 9ae33c4..95cd544 100644 --- a/clumped-bootstrap-calibration.org +++ b/clumped-bootstrap-calibration.org @@ -12,7 +12,7 @@ theme_set(theme_bw() + theme(text = element_text(size = 24))) - # this sloppy package, see all functions below + # this sloppy package library(clumpedcalib) #+end_src @@ -108,278 +108,3 @@ I've come up with some really silly sample data. #+RESULTS: [[file:imgs/data_plot.png]] - -* functions -This could be turned into a package if it proves useful at some point but I -currently don't have the time. - -Ok ok I'll quickly tangle them to R files so the user can install from github. - -** clumped_calib_boot -#+begin_src R :tangle R/clumped_calib_boot.R - #' Bootstrapped clumped isotope calibration - #' - #' @param data A data.frame with columns D47, X, sd_X, and sd_D47. X stands for - #' the commonly-used temperature scale, 10^6 / T^2 with T in K. - clumped_calib_boot <- function(data, Nsim = 1e5) { - bs <- function(data, indices) { - d <- data[indices,] # allows boot to select sample - fit <- bfsl::bfsl(d$X, d$D47, d$sd_X, d$sd_D47) - return(coef(fit)) # note that this returns a vector of intercept, slope, intercept error, slope error - } - - # this returns 4 t values - results <- boot::boot(data = data, statistic = bs, R = Nsim) - - # tidy up - tibble::tibble(intercept = results$t[, 1], slope = results$t[, 2]) - } -#+end_src - -#+RESULTS: - -** filter_outliers -#+begin_src R :tangle R/filter_outliers.R - #' Filter clumped isotope outliers - #' - #' @param data A dataframe with columns `outlier`, `broadid`, - filter_outliers <- function(data, group) { - data |> - tidylog::filter(!outlier, - !is.na(outlier)) |> # leave only samples - tidylog::filter(broadid == "other") |> # make sure they have bins and D47_final and d18O_PDB_vit values - tidylog::filter(!is.na({{group}}) & - !is.na(D47_final) & - !is.na(d18O_PDB_vit) & - !is.na(d13C_PDB_vit)) - } -#+end_src - -#+RESULTS: - -** bootstrap_means -#+begin_src R :tangle R/bootstrap_means.R - ##' Calculate bootstrapped means for each group in a dataframe - ##' - ##' @param data A dataframe or tibble that should have columns `outlier`, - ##' `broadid` ("other" for samples), `D47_final`, `d18O_PDB_vit`, - ##' `d13C_PDB_vit`, and the grouping column. - ##' @param group The column name in `data` with a character or factor column - ##' that contains the binning information that you want to calculate - ##' bootstrapped averages for. - ##' @param age The column name in `data` with the sample ages. - ##' @param d13C The column name in `data` with the carbon isotope values. - ##' @param d18O The column name in `data` with the oxygen isotope values. - ##' @param D47 The column name in `data` with the clumped isotope values. - ##' @param Nsim The number of bootstraps you want to run. Defaults to the - ##' number of rows in `calib`. - bootstrap_means <- function(data, - group, - age, - d13C, - d18O, - D47, - Nsim = 1e5) { - data |> - # make sure that there are no NAs in group or in your d13C etc! - group_by({{group}}) |> - # subset only relevant columns for now unfortunately this gets rid of - # potentially useful columns. You can left_join them back with - # distinct(data, id_col1, id_col2) - select({{group}}, {{age}}, {{d13C}}, {{d18O}}, {{D47}}) |> - # R magic, search for nesting/unnesting if you want to understand what - # happens here. - tidyr::nest() |> - # create Nsim bootstrapped copies of the data - mutate(boot = purrr::map(data, - ~ infer::rep_slice_sample(.x, - # we resample using all data - prop = 1, - replace = TRUE, - reps = Nsim))) |> - # get rid of the raw data, leave only the bootstrapped values - select(-data) |> - # calculate summaries for the bootstrapped data, Nsim times - mutate(summ = purrr::map(boot, ~ .x |> - summarize( - # here they get these new simpler names - age = mean({{age}}, na.rm = TRUE), - d13C = mean({{d13C}}, na.rm = TRUE), - d18O = mean({{d18O}}, na.rm = TRUE), - D47 = mean({{D47}}, na.rm = TRUE)))) |> - # get rid of the bootstrapped values - select(-boot) |> - # unfold the bootstraps, we're back to a simple tibble now - tidyr::unnest(summ) - } -#+end_src - -#+RESULTS: - -** calc_temp_d18Osw -#+begin_src R :tangle R/calc_temp_d18Osw.R - #' Calculate temperature and d18Osw - #' - #' From bootstrapped samples and a bootstrapped set of slope--intercept pairs. - #' - #' @param calib A dataframe with draws from the bootstrapped (or Bayesian) - #' temperature regression. Should have columns `slope` and `intercept`, - #' which are related via `clumpedr::revcal()`. - calc_temp_d18Osw <- function(boot, calib, Nsim = NULL) { - if (is.null(Nsim)) { - # we simulate the same number of bootstraps for easy combination - Nsim <- nrow(boot) - calib <- calib[sample(nrow(calib), replace = TRUE, size = Nsim), ] - } - - boot |> - # append the slope/intercept pairs of the temperature calibration - # this is why we made sure that they are Nsim long as well. - mutate(slope = calib$slope, - intercept = calib$intercept) |> - # calculate temperature using the parameters - # this relies on my clumpedr package - # https://github.com/isoverse/clumpedr/ - # you can also just copy its revcal function from here: - # https://github.com/isoverse/clumpedr/blob/master/R/calibration.R#L72 - mutate(temp = clumpedr::revcal(D47, slope = slope, intercept = intercept, - # we have to use ignorecnf because the confidence calculations - # in clumpedr are WRONG! - ignorecnf = TRUE)) |> - # get rid of calibration intercept and slope - select(-slope, -intercept) |> - # calculate d18Osw using the function above - # we do not take into account potential uncertainty in these parameters, - # but this is likely nothing. - mutate(d18Osw = d18Osw_calc(d18O, temp)) - } -#+end_src - -#+RESULTS: - -** our_summary -#+begin_src R :tangle R/our_summary.R - #' Summarize the bootstrapped values into a mean, sd, and the 68% and 95% CIs - #' - #' @param boot Output of `apply_calib_and_d18O_boot()` - #' @param group The group to summarize by. - our_summary <- function(boot, group) { - boot |> - group_by({{group}}) |> - ggdist::median_qi(.exclude = "replicate", - .width = c(.68, .95)) - } -#+end_src - -#+RESULTS: - -** d18Osw_calc -#+begin_src R :tangle R/d18Osw_calc.R - #' Calculate the d18Osw from the d18Occ and temperature - #' - #' according to Kim & O'neil 1997 as modified by Bemis et al., 1998 - #' - #' @param d18Occ The oxygen isotope composition of the calcite in VPDB. - #' @param temperature The formation temperature (in °C). - #' @return The oxygen isotope composition of the sea water in VSMOW. - #' @author Ilja J. Kocken - d18Osw_calc <- function(d18Occ, temperature) { - (sqrt(-4 * 16.1 * 0.09 + 4.64^2 + 4 * 0.09 * temperature) - 4.64 + 2 * 0.09 * d18Occ) / - (2 * 0.09) + 0.27 - # note the 0.27, which is from conversion from VPDB to VSMOW - - # we could also use Marchitto et al., 2014 equation 9 - ## 0.245 * temperature - 0.0011 * temperature^2 - 3.58 + d18Occ + 0.27 - } -#+end_src - -#+RESULTS: - -** temp_calc -:PROPERTIES: -:CREATED: [2022-08-08 Mon 22:04] -:END: -reverse of d18Osw_calc -#+begin_src R :tangle R/temp_calc.R - #' Calculate the temperature from d18Occ and the d18Osw - #' - #' This is the relationship from Kim & O'neil 1997, - #' as updated by Bemis et al., 1998 - #' - #' @param d18Occ The d18O of the calcite in VPDB. - #' @param d18Osw The d18O of the sea water in VSMOW - #' @return The temperature in degrees Celsius. - temp_calc <- function(d18Occ, d18Osw) { - d18Osw <- d18Osw - 0.27 - # the -0.27 is to convert from VSMOW to VPDB - 16.1 - 4.64 * (d18Occ - d18Osw) + 0.09 * (d18Occ - d18Osw)^2 - } -#+end_src - -#+RESULTS: - -test 'em out - #+begin_src R - # this should resolve to the input d18Osw values - d18Osw_calc(d18Occ = 4, - temperature = temp_calc(d18Occ = 4, d18Osw = seq(-1, 1, .5))) - - # this should resolve to the input temperature values - temp_calc(d18Occ = 4, d18Osw = d18Osw_calc(d18Occ = 4, temperature = 0:5)) -#+end_src - -#+RESULTS: -: [1] -1.000000e+00 -5.000000e-01 1.387779e-15 5.000000e-01 1.000000e+00 -: [1] 6.661338e-15 1.000000e+00 2.000000e+00 3.000000e+00 4.000000e+00 -: [6] 5.000000e+00 - -** wrapper -#+begin_src R :tangle R/apply_calib_and_d18O_boot.R - #' Calculate bootstrapped mean values for age, d18O, d13C, and D47 and calculate temperature and d18Osw - #' - apply_calib_and_d18O_boot <- function(data, - calib, - group, - output = "summary", - Nsim = NULL) { - # make sure you select one of the valid output types - if (!output %in% c("summary", "raw", "all")) { - stop("Output needs to be either 'summary', 'raw', or 'all'.") - } - - # we simulate the same number of bootstraps for easy combination - if (is.null(Nsim)) { - Nsim <- nrow(calib) - } else { - # take a subset of the calibration with the same size - calib <- calib[sample(nrow(calib), replace = TRUE, size = Nsim), ] - } - - sim <- data |> - filter_outliers(group = {{group}}) |> - bootstrap_means(group = {{group}}, - age = age, - d13C = d13C_PDB_vit, - d18O = d18O_PDB_vit, - D47 = D47_final, - Nsim = Nsim) |> - calc_temp_d18Osw(calib = calib, Nsim = Nsim) - - if (output == "raw") { - return(sim) - } - - # otherwise return the summary, there is no ALL yet - sum <- sim |> - our_summary(group = {{group}}) - - ## # we're now missing some essential metadata, which we do summarize in this - ## # older function we wrote - ## data |> - ## summarize_bins() |> - ## select(bins:labs) |> - ## left_join(our_summary) - } -#+end_src - -#+RESULTS: diff --git a/functions.org b/functions.org new file mode 100644 index 0000000..1d9a7f0 --- /dev/null +++ b/functions.org @@ -0,0 +1,272 @@ +* functions +This could be turned into a package if it proves useful at some point but I +currently don't have the time. + +Ok ok I'll quickly tangle them to R files so the user can install from github. + +** clumped_calib_boot +#+begin_src R :tangle R/clumped_calib_boot.R + #' Bootstrapped clumped isotope calibration + #' + #' @param data A data.frame with columns D47, X, sd_X, and sd_D47. X stands for + #' the commonly-used temperature scale, 10^6 / T^2 with T in K. + clumped_calib_boot <- function(data, Nsim = 1e5) { + bs <- function(data, indices) { + d <- data[indices,] # allows boot to select sample + fit <- bfsl::bfsl(d$X, d$D47, d$sd_X, d$sd_D47) + return(coef(fit)) # note that this returns a vector of intercept, slope, intercept error, slope error + } + + # this returns 4 t values + results <- boot::boot(data = data, statistic = bs, R = Nsim) + + # tidy up + tibble::tibble(intercept = results$t[, 1], slope = results$t[, 2]) + } +#+end_src + +#+RESULTS: + +** filter_outliers +#+begin_src R :tangle R/filter_outliers.R + #' Filter clumped isotope outliers + #' + #' @param data A dataframe with columns `outlier`, `broadid`, + filter_outliers <- function(data, group) { + data |> + tidylog::filter(!outlier, + !is.na(outlier)) |> # leave only samples + tidylog::filter(broadid == "other") |> # make sure they have bins and D47_final and d18O_PDB_vit values + tidylog::filter(!is.na({{group}}) & + !is.na(D47_final) & + !is.na(d18O_PDB_vit) & + !is.na(d13C_PDB_vit)) + } +#+end_src + +#+RESULTS: + +** bootstrap_means +#+begin_src R :tangle R/bootstrap_means.R + ##' Calculate bootstrapped means for each group in a dataframe + ##' + ##' @param data A dataframe or tibble that should have columns `outlier`, + ##' `broadid` ("other" for samples), `D47_final`, `d18O_PDB_vit`, + ##' `d13C_PDB_vit`, and the grouping column. + ##' @param group The column name in `data` with a character or factor column + ##' that contains the binning information that you want to calculate + ##' bootstrapped averages for. + ##' @param age The column name in `data` with the sample ages. + ##' @param d13C The column name in `data` with the carbon isotope values. + ##' @param d18O The column name in `data` with the oxygen isotope values. + ##' @param D47 The column name in `data` with the clumped isotope values. + ##' @param Nsim The number of bootstraps you want to run. Defaults to the + ##' number of rows in `calib`. + bootstrap_means <- function(data, + group, + age, + d13C, + d18O, + D47, + Nsim = 1e5) { + data |> + # make sure that there are no NAs in group or in your d13C etc! + group_by({{group}}) |> + # subset only relevant columns for now unfortunately this gets rid of + # potentially useful columns. You can left_join them back with + # distinct(data, id_col1, id_col2) + select({{group}}, {{age}}, {{d13C}}, {{d18O}}, {{D47}}) |> + # R magic, search for nesting/unnesting if you want to understand what + # happens here. + tidyr::nest() |> + # create Nsim bootstrapped copies of the data + mutate(boot = purrr::map(data, + ~ infer::rep_slice_sample(.x, + # we resample using all data + prop = 1, + replace = TRUE, + reps = Nsim))) |> + # get rid of the raw data, leave only the bootstrapped values + select(-data) |> + # calculate summaries for the bootstrapped data, Nsim times + mutate(summ = purrr::map(boot, ~ .x |> + summarize( + # here they get these new simpler names + age = mean({{age}}, na.rm = TRUE), + d13C = mean({{d13C}}, na.rm = TRUE), + d18O = mean({{d18O}}, na.rm = TRUE), + D47 = mean({{D47}}, na.rm = TRUE)))) |> + # get rid of the bootstrapped values + select(-boot) |> + # unfold the bootstraps, we're back to a simple tibble now + tidyr::unnest(summ) + } +#+end_src + +#+RESULTS: + +** calc_temp_d18Osw +#+begin_src R :tangle R/calc_temp_d18Osw.R + #' Calculate temperature and d18Osw + #' + #' From bootstrapped samples and a bootstrapped set of slope--intercept pairs. + #' + #' @param calib A dataframe with draws from the bootstrapped (or Bayesian) + #' temperature regression. Should have columns `slope` and `intercept`, + #' which are related via `clumpedr::revcal()`. + calc_temp_d18Osw <- function(boot, calib, Nsim = NULL) { + if (is.null(Nsim)) { + # we simulate the same number of bootstraps for easy combination + Nsim <- nrow(boot) + calib <- calib[sample(nrow(calib), replace = TRUE, size = Nsim), ] + } + + boot |> + # append the slope/intercept pairs of the temperature calibration + # this is why we made sure that they are Nsim long as well. + mutate(slope = calib$slope, + intercept = calib$intercept) |> + # calculate temperature using the parameters + # this relies on my clumpedr package + # https://github.com/isoverse/clumpedr/ + # you can also just copy its revcal function from here: + # https://github.com/isoverse/clumpedr/blob/master/R/calibration.R#L72 + mutate(temp = clumpedr::revcal(D47, slope = slope, intercept = intercept, + # we have to use ignorecnf because the confidence calculations + # in clumpedr are WRONG! + ignorecnf = TRUE)) |> + # get rid of calibration intercept and slope + select(-slope, -intercept) |> + # calculate d18Osw using the function above + # we do not take into account potential uncertainty in these parameters, + # but this is likely nothing. + mutate(d18Osw = d18Osw_calc(d18O, temp)) + } +#+end_src + +#+RESULTS: + +** our_summary +#+begin_src R :tangle R/our_summary.R + #' Summarize the bootstrapped values into a mean, sd, and the 68% and 95% CIs + #' + #' @param boot Output of `apply_calib_and_d18O_boot()` + #' @param group The group to summarize by. + our_summary <- function(boot, group) { + boot |> + group_by({{group}}) |> + ggdist::median_qi(.exclude = "replicate", + .width = c(.68, .95)) + } +#+end_src + +#+RESULTS: + +** d18Osw_calc +#+begin_src R :tangle R/d18Osw_calc.R + #' Calculate the d18Osw from the d18Occ and temperature + #' + #' according to Kim & O'neil 1997 as modified by Bemis et al., 1998 + #' + #' @param d18Occ The oxygen isotope composition of the calcite in VPDB. + #' @param temperature The formation temperature (in °C). + #' @return The oxygen isotope composition of the sea water in VSMOW. + #' @author Ilja J. Kocken + d18Osw_calc <- function(d18Occ, temperature) { + (sqrt(-4 * 16.1 * 0.09 + 4.64^2 + 4 * 0.09 * temperature) - 4.64 + 2 * 0.09 * d18Occ) / + (2 * 0.09) + 0.27 + # note the 0.27, which is from conversion from VPDB to VSMOW + + # we could also use Marchitto et al., 2014 equation 9 + ## 0.245 * temperature - 0.0011 * temperature^2 - 3.58 + d18Occ + 0.27 + } +#+end_src + +#+RESULTS: + +** temp_calc +:PROPERTIES: +:CREATED: [2022-08-08 Mon 22:04] +:END: +reverse of d18Osw_calc +#+begin_src R :tangle R/temp_calc.R + #' Calculate the temperature from d18Occ and the d18Osw + #' + #' This is the relationship from Kim & O'neil 1997, + #' as updated by Bemis et al., 1998 + #' + #' @param d18Occ The d18O of the calcite in VPDB. + #' @param d18Osw The d18O of the sea water in VSMOW + #' @return The temperature in degrees Celsius. + temp_calc <- function(d18Occ, d18Osw) { + d18Osw <- d18Osw - 0.27 + # the -0.27 is to convert from VSMOW to VPDB + 16.1 - 4.64 * (d18Occ - d18Osw) + 0.09 * (d18Occ - d18Osw)^2 + } +#+end_src + +#+RESULTS: + +test 'em out + #+begin_src R + # this should resolve to the input d18Osw values + d18Osw_calc(d18Occ = 4, + temperature = temp_calc(d18Occ = 4, d18Osw = seq(-1, 1, .5))) + + # this should resolve to the input temperature values + temp_calc(d18Occ = 4, d18Osw = d18Osw_calc(d18Occ = 4, temperature = 0:5)) +#+end_src + +#+RESULTS: +: [1] -1.000000e+00 -5.000000e-01 1.387779e-15 5.000000e-01 1.000000e+00 +: [1] 6.661338e-15 1.000000e+00 2.000000e+00 3.000000e+00 4.000000e+00 +: [6] 5.000000e+00 + +** wrapper +#+begin_src R :tangle R/apply_calib_and_d18O_boot.R + #' Calculate bootstrapped mean values for age, d18O, d13C, and D47 and calculate temperature and d18Osw + #' + apply_calib_and_d18O_boot <- function(data, + calib, + group, + output = "summary", + Nsim = NULL) { + # make sure you select one of the valid output types + if (!output %in% c("summary", "raw", "all")) { + stop("Output needs to be either 'summary', 'raw', or 'all'.") + } + + # we simulate the same number of bootstraps for easy combination + if (is.null(Nsim)) { + Nsim <- nrow(calib) + } else { + # take a subset of the calibration with the same size + calib <- calib[sample(nrow(calib), replace = TRUE, size = Nsim), ] + } + + sim <- data |> + filter_outliers(group = {{group}}) |> + bootstrap_means(group = {{group}}, + age = age, + d13C = d13C_PDB_vit, + d18O = d18O_PDB_vit, + D47 = D47_final, + Nsim = Nsim) |> + calc_temp_d18Osw(calib = calib, Nsim = Nsim) + + if (output == "raw") { + return(sim) + } + + # otherwise return the summary, there is no ALL yet + sum <- sim |> + our_summary(group = {{group}}) + + ## # we're now missing some essential metadata, which we do summarize in this + ## # older function we wrote + ## data |> + ## summarize_bins() |> + ## select(bins:labs) |> + ## left_join(our_summary) + } +#+end_src