diff --git a/README.md b/README.md index f407bb1..0c8fb2e 100644 --- a/README.md +++ b/README.md @@ -4,9 +4,9 @@ This repository includes the data and code necessary for reproducing the analysis in our malaria infant study manuscript. -## Data File Overview +## Data File -The included data file, `MIS_master_data_sheet_long.csv`, is a 119x30 data frame which has the following column names, where each row represents one individual at either acute infection or 4 weeks post-treatment. A companion file, `MIS_master_data_sheet_wide.csv`, is also included for convenience. +The data file, `MIS_master_data_sheet_long.csv`, is a 119x30 data frame which has the following column names, where each row represents one individual at either acute infection or 4 weeks post-treatment. A reformatted companion file, `MIS_master_data_sheet_wide.csv`, is also included for convenience. The covariates for each subject are given in the first six columns: diff --git a/analysis/MalariaInfantStudyAnalysis.Rmd b/analysis/MalariaInfantStudyAnalysis.Rmd new file mode 100644 index 0000000..2f351e8 --- /dev/null +++ b/analysis/MalariaInfantStudyAnalysis.Rmd @@ -0,0 +1,655 @@ +--- +title: "Malaria Infant Paper Analysis" +author: "Paul L. Maurizio" +date: "3/2/2019" +output: + html_document: default + pdf_document: default +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +## Load Packages + +```{r, message=FALSE} +library(ggplot2) +library(ggfortify) +library(coin) +library(xtable) +library(pscl) # zero inflated poisson regression +library(MASS) # multinomial ordinal probit regression / proportional odds logistic regression / Box-Cox +library(ordinal) # cumulative link mixed model +library(nparLD) # nonparametric rank-based statistics for longitudinal data +library(lmerTest) +library(psych) +library(coin) +``` + +## Load Data + +```{r} +dat <- read.csv("../data/MIS_master_data_sheet_wide.csv") +dat_long <- read.csv("../data/MIS_master_data_sheet_long.csv") +``` + +## PCA on raw phenotypes + +To explore the data, we use PCA on a subset of the phenotypes that were collected from the most individuals. + +```{r} +select <- c('GMCSF','IFNg','IL10','IL12p40','IL12p70','IL6','TNFa','Nitrate.570','Hb') +dat_sub <- dat_long[,select] +dat_ref <- dat_long[complete.cases(dat_sub),] +dat_sub <- dat_sub[complete.cases(dat_sub),] +dat_ref$Visit <- as.factor(dat_ref$Visit) +dat_ref$AgeVisit <- as.factor(paste(dat_ref$Age, dat_ref$Visit, sep="_")) +pr <- princomp(dat_sub, cor=TRUE, scores=TRUE) +summary(pr) +plot(pr, type="lines") + +autoplot(pr, data = dat_ref, colour = 'AgeVisit', x=1, y=2, loadings=TRUE, loadings.label=TRUE) +autoplot(pr, data = dat_ref, colour = 'AgeVisit', x=2, y=3, loadings=TRUE, loadings.label=TRUE) +``` + +## 1. Parasite Load + +Model the effects of age, considering sex, on parasite levels on V1, using the Wilcoxon test, among detectable samples only. + +```{r} +dat.sub <- subset(dat, parasites>0) +fit1 <- wilcox_test(dat.sub$parasites ~ dat.sub$age | dat.sub$sex, distribution="exact"); pvalue(fit1) +``` + +Model the effects of age and sex on parasite levels on V1, using zero-inflated Poisson regression. + +We use the zero-inflated Poisson (ZIP) model (log link), with the binomial distribution to model the binary outcome of 0-inflation or not (probit link) (Zeileis 2008). + +```{r} +fit1 <- zeroinfl(round(parasites) ~ age * sex , data = dat, dist="poisson", link="probit") +summary(fit1) +``` + +Model the effects of age on parasite levels on V2, using zero-inflated Poisson regression. + +```{r} +fit1 <- zeroinfl(round(parasites.V2) ~ age, data = dat, dist="poisson", link="probit") +summary(fit1) +``` + +## 2. Gametocytes + +Check the correlation of the Pfs phenotypes. + +```{r} +p <- corr.test(log(dat[,c("pfs16", "pfs25", "pfs230")]))$p; p +``` + +Model the effects of age and sex on pfs16, pfs25, and pfs230 levels on V1 using the Wilcoxon test + +```{r} +fit1 <- wilcox_test(dat$pfs16 ~ dat$age | dat$sex, distribution="exact"); pvalue(fit1) +fit2 <- wilcox_test(dat$pfs25 ~ dat$age | dat$sex, distribution="exact"); pvalue(fit2) +fit3 <- wilcox_test(dat$pfs230 ~ dat$age | dat$sex, distribution="exact"); pvalue(fit3) +median(subset(dat,age=="A")$pfs25, na.rm=TRUE); median(subset(dat,age=="I")$pfs25, na.rm=TRUE); +``` + +Check whether the results are the same under distributional assumptions, using lm() + +```{r} +fit1 <- lm(log(dat$pfs16) ~ age*sex, data=dat); anova(fit1) +fit2 <- lm(log(dat$pfs25) ~ age*sex, data=dat); anova(fit2) +fit3 <- lm(log(dat$pfs230) ~ age*sex, data=dat); anova(fit3) +``` + +## 3. Antimalarial antibody + +Model the effects of age and sex on antibody test results at V1 using multinomial ordinal probit / logistic regression + +```{r} +dat.sub <- subset(dat, !is.na(malaria.Ab.result)) +levels(dat.sub$malaria.Ab.result) <- c("neg", "grey", "pos") +with(dat.sub, table(malaria.Ab.result, sex, age)) +fit1 <- polr(malaria.Ab.result ~ age*sex, data=dat.sub, method="logistic") +fit2 <- polr(malaria.Ab.result ~ age + sex, data=dat.sub, method="logistic") +fit3 <- polr(malaria.Ab.result ~ sex, data=dat.sub, method="logistic") +fit4 <- polr(malaria.Ab.result ~ 1, data=dat.sub, method="logistic") +anova(fit4,fit3,fit2,fit1) +``` + +Model the effects of age and sex on antibody test results at V2 using multinomial probit / logistic regression. + +```{r} +dat.sub <- subset(dat, !is.na(malaria.Ab.result.V2)) +levels(dat.sub$malaria.Ab.result.V2) <- c("neg", "grey", "pos") +with(dat.sub, table(malaria.Ab.result.V2, sex, age)) +fit1 <- polr(malaria.Ab.result.V2 ~ age*sex, data=dat.sub, method="logistic") +fit2 <- polr(malaria.Ab.result.V2 ~ age + sex, data=dat.sub, method="logistic") +fit3 <- polr(malaria.Ab.result.V2 ~ sex, data=dat.sub, method="logistic") +fit4 <- polr(malaria.Ab.result.V2 ~ 1, data=dat.sub, method="logistic") +anova(fit4,fit3,fit2,fit1) +``` + +Model the effects of age, sex, and visit, together, using a cumulative link mixed model (CLMM)/ordinal probit regression. + +```{r} +dat.sub <- subset(dat_long, !is.na(malaria.Ab.result)) +levels(dat.sub$malaria.Ab.result) <- c("neg", "grey", "pos") +with(dat.sub, table(malaria.Ab.result, Sex, Age, Visit)) + +fit1 <- clmm(malaria.Ab.result ~ Age*Sex*Visit + (1|Subject_ID), data=dat.sub, Hess=TRUE, link="probit", + nAGQ=10) +summary(fit1) +``` + +## 4. Plasma cytokines + +Model the effects of age, sex, and visit on plasma cytokine levels using nparLD (nonparametric). + +```{r, results="hide"} +data <- dat_long +host.secreted <- c('TNFa', 'IFNg', 'IL6', 'IL12p40', 'IL12p70', 'IL10', + 'GMCSF', 'Hb', 'Nitrate.570') + +fits <- NULL +fit.df <- as.data.frame(matrix(data=NA, nrow=length(host.secreted),ncol=7)) +colnames(fit.df) <- c("Age", "Sex", "Visit", "Age:Sex", "Age:Visit", "Sex:Visit", "Age:Sex:Visit") +fit.df.wholeplot <- as.data.frame(matrix(data=NA, nrow=length(host.secreted),ncol=3)) +colnames(fit.df.wholeplot) <- c("Age", "Sex", "Age:Sex") + +for(i in 1:length(host.secreted)){ + phen <- host.secreted[i] + tempdata <- droplevels(subset(data, !is.na(data[phen]))) + complete.subjects <- names(summary(data$Subject_ID))[summary(data$Subject_ID)==2] + tempdata <- droplevels(subset(data, Subject_ID %in% complete.subjects)) + y <- tempdata[,phen] + + time <- tempdata$Visit + group1 <- tempdata$Age + group2 <- tempdata$Sex + subject <- tempdata$Subject_ID + time.name <- "Visit" + group1.name <- "Age" + group2.name <- "Sex" + fit <- f2.ld.f1(y=y, time=time, group1=group1, group2=group2, + subject=subject, time.name=time.name, + group1.name=group1.name, + group2.name=group2.name, plot.RTE=FALSE) + fits[[i]] <- fit + fit.df[i,] <- fit$ANOVA.test$`p-value` + fit.df.wholeplot[i,] <- fit$ANOVA.test.mod.Box$`p-value` +} + +cat(paste0(phen, ": \n")); #print(fit$Wald.test); +rownames(fit.df) <- rownames(fit.df.wholeplot) <- host.secreted +print(fit.df) +xtable(format(fit.df, scientific = TRUE, digits=4)) +print(fit.df.wholeplot) +``` + +Plot the p-values, colored by significance thresholds + +```{r} +par(oma=c(0.1,3,0.1,0.1), mar=c(5,3,0.5,0.5)) +plot(1~1, xlim=c(0.5,7.5), ylim=rev(c(1,length(host.secreted)+1)), col="white", ylab="", xlab="",axes=FALSE) +for(i in 1:7){ + for(j in 1:length(host.secreted)){ + if(fit.df[j,i] < 0.05){ + points(x=i,y=j, col=adjustcolor("darkblue", alpha.f=0.15), pch=15, cex=3) + } + if(fit.df[j,i] < 0.01){ + points(x=i,y=j, col=adjustcolor("darkblue", alpha.f=0.45), pch=15, cex=3) + } + if(fit.df[j,i] < 0.001){ + points(x=i,y=j, col="darkblue", pch=15, cex=3) + } + } +} +new.rownames <- c(expression(paste("TNF-",alpha)), expression(paste("IFN-", gamma)), "IL-6", "IL-12 p40", + "IL-12 p70", "IL-10", "GM-CSF", "hemoglobin", "nitric oxide") +axis(side=2,at=c(1:length(host.secreted)),labels=new.rownames, las=2) +text(seq(1, 7, by=1)-0.1, par("usr")[3] + 0.5, labels = colnames(fit.df), + srt = 45, pos = 1, offset=1.5, xpd = TRUE) +axis(side=1, at=c(1:7), labels=rep("",7), las=1) +``` + +## 5. Cell composition phenotypes + +Model the effects of age, sex, and visit on cellular phenotypes using nparLD (nonparametric). + +```{r, results="hide"} +data <- dat_long +host.cellular <- c('CD33.live', 'mDC.live', 'monocytes.live', 'inflam.CD163', + 'patrol.CD163', 'trad.CD163', 'low.traditional') +fits <- NULL +fit.df <- as.data.frame(matrix(data=NA, nrow=length(host.cellular),ncol=7)) +colnames(fit.df) <- c("Age", "Sex", "Visit", "Age:Sex", "Age:Visit", "Sex:Visit", "Age:Sex:Visit") +fit.df.wholeplot <- as.data.frame(matrix(data=NA, nrow=length(host.cellular),ncol=3)) +colnames(fit.df.wholeplot) <- c("Age", "Sex", "Age:Sex") + +for(i in 1:length(host.cellular)){ + phen <- host.cellular[i] + tempdata <- droplevels(subset(data, !is.na(data[phen]))) + complete.subjects <- names(summary(data$Subject_ID))[summary(data$Subject_ID)==2] + tempdata <- droplevels(subset(data, Subject_ID %in% complete.subjects)) + y <- tempdata[,phen] + + time <- tempdata$Visit + group1 <- tempdata$Age + group2 <- tempdata$Sex + subject <- tempdata$Subject_ID + time.name <- "Visit" + group1.name <- "Age" + group2.name <- "Sex" + fit <- f2.ld.f1(y=y, time=time, group1=group1, group2=group2, + subject=subject, time.name=time.name, + group1.name=group1.name, + group2.name=group2.name, plot.RTE=FALSE) + fits[[i]] <- fit + fit.df[i,] <- fit$ANOVA.test$`p-value` + fit.df.wholeplot[i,] <- fit$ANOVA.test.mod.Box$`p-value` +} + +cat(paste0(phen, ": \n")); #print(fit$Wald.test); +rownames(fit.df) <- rownames(fit.df.wholeplot) <- host.cellular +``` + +```{r} +xtable(format(fit.df, scientific = TRUE, digits=4)) +print(fit.df.wholeplot) +``` + +Plot the p-values (colored by significance thresholds) + +```{r} +par(oma=c(0.1,0.1,0.1,0.1), mar=c(5,13,0.5,0.5)) +plot(1~1, xlim=c(0.5,7.5), ylim=rev(c(1,8)), col="white", ylab="", xlab="",axes=FALSE) +for(i in 1:7){ + for(j in 1:7){ + if(fit.df[j,i] < 0.05){ + points(x=i,y=j, col=adjustcolor("darkblue", alpha.f=0.15), pch=15, cex=3) + } + if(fit.df[j,i] < 0.01){ + points(x=i,y=j, col=adjustcolor("darkblue", alpha.f=0.45), pch=15, cex=3) + } + if(fit.df[j,i] < 0.001){ + points(x=i,y=j, col="darkblue", pch=15, cex=3) + } + } +} +new.rownames <- c("CD33+ (% of live)", + "mDC (% of live)", + "monocytes (% of live)", + "infl. mono. (% of CD163+)", + "patrol. mono. (% of CD163+)", + "trad. mono. (% of CD163+)", + "CD14^low (% of trad. mono.)") +axis(side=2,at=c(1:7),labels=new.rownames, las=2) +text(seq(1, 7, by=1)-0.1, par("usr")[3], labels = colnames(fit.df), + srt = 45, pos = 1, offset=1.5, xpd = TRUE) +axis(side=1, at=c(1:7), labels=rep("",7), las=1) +``` + +In order to use a parametric (linear mixed model) with our data (lmer) we need to deal with heteroskedastic residuals. We can find a power transform that helps normalize them using Box-Cox analysis (Box and Cox, 1964). + +```{r, echo=FALSE} +bcphens <- c("TNFa", "IFNg", "IL6", "IL12p40", "IL12p70", + "IL10", "GMCSF", "Hb", "Nitrate.570") + +boxcoxlist <- list() + +par(mfrow=c(3,3)) +for(i in 1:length(bcphens)){ + thisdat <- subset(dat_long, select=c("Age", "Sex", "Visit", bcphens[i])) + thisdat <- thisdat[complete.cases(thisdat),] + thisdat$Visit <- as.factor(thisdat$Visit) + thismodel <- paste0(bcphens[i], " ~ Visit*Sex*Age") + fit1 <- lm(thismodel, data=thisdat, na.action=na.omit) + boxcoxlist[[i]] <- boxcox(fit1, data = thisdat, lambda = seq(-1,2,0.01), plotit = TRUE, + xlab=bcphens[i]); abline(v=0, col="red"); abline(v=1, col="green") +} + +par(mfrow=c(3,3)) +for(i in 1:length(bcphens)){ + thisdat <- subset(dat_long, select=c("Age", "Sex", "Visit", bcphens[i])) + thisdat <- thisdat[complete.cases(thisdat),] + thisdat$Visit <- as.factor(thisdat$Visit) + thismodel <- paste0(bcphens[i], " ~ Visit*Sex*Age") + fit1 <- lm(thismodel, data=thisdat, na.action=na.omit) + try(plot(fit1, which=2, main=bcphens[i])) +} + +``` + +Based on the Box-Cox analysis, choose a sensible transform proximal to the lambda value (+/- sqrt, +/- 1/3 root, log, no transform, etc.): Use log (natural) for lambda ~ 0, and no transform for lambda ~ 1. + +```{r, echo=FALSE} +bc.transform <- function(y, lambda){(y^lambda - 1)/lambda} + +dat_long$GMCSF.bc <- bc.transform(dat_long$GMCSF, lambda=-1/3) +dat_long$IFNg.bc <- bc.transform(dat_long$IFNg, lambda=-0.5) +dat_long$IL10.bc <- bc.transform(dat_long$IL10, lambda=-1/3) +dat_long$IL12p40.bc <- bc.transform(dat_long$IL12p40, lambda=-1/3) +dat_long$IL12p70.bc <- log(dat_long$IL12p70) +dat_long$IL6.bc <- bc.transform(dat_long$IL6, lambda=-0.5) +dat_long$TNFa.bc <- log(dat_long$TNFa) +dat_long$Hb.bc <- dat_long$Hb +dat_long$Nitrate.570.bc <- log(dat_long$Nitrate.570) + +bcphens.t <- paste(bcphens, "bc", sep=".") +par(mfrow=c(3,3)) +for(i in 1:length(bcphens.t)){ + thisdat <- subset(dat_long, select=c("Age", "Sex", "Visit", bcphens.t[i])) + thisdat <- thisdat[complete.cases(thisdat),] + thisdat$Visit <- as.factor(thisdat$Visit) + thismodel <- paste0(bcphens.t[i], " ~ Visit*Sex*Age") + fit1 <- lm(thismodel, data=thisdat, na.action=na.omit) + boxcoxlist[[i]] <- boxcox(fit1, data = thisdat, lambda = seq(-1,2,0.01), plotit = TRUE, + xlab=bcphens.t[i]); abline(v=0, col="red"); abline(v=1, col="green") +} + +bcphens.t <- paste(bcphens, "bc", sep=".") +par(mfrow=c(3,3)) +for(i in 1:length(bcphens.t)){ + thisdat <- subset(dat_long, select=c("Age", "Sex", "Visit", bcphens.t[i])) + thisdat <- thisdat[complete.cases(thisdat),] + thisdat$Visit <- as.factor(thisdat$Visit) + thismodel <- paste0(bcphens.t[i], " ~ Visit*Sex*Age") + fit1 <- lm(thismodel, data=thisdat, na.action=na.omit) + try(plot(fit1, which=2, main=bcphens.t[i])) +} +``` + +Try `lmer`: + +```{r, results="hide", message=FALSE, warning=FALSE} +dat_long$Visit <- dat_long$Visit - 1 +fits1 <- fits2 <- summaries <- list() +row_names <- c("(Intercept)", "AgeI", "SexM", "Visit", "AgeI:SexM", + "AgeI:Visit", "SexM:Visit", "AgeI:SexM:Visit") +summary_table <- data.frame(row.names = row_names) +#for(i in c(1:10,18:20)){ +for(i in 1:length(bcphens.t)){ + # Subject-level random intercepts will absorb all the age-specific variation, so we leave them out + # and instead estimate global age-specific effects, and only model the within-subject (visit) slopes + expr1 <- paste0(bcphens.t[i] , "~ Sex*Visit + (0+Visit|Subject_ID)") + expr2 <- paste0(bcphens.t[i], "~ Age*Sex*Visit + (0+Visit|Subject_ID)") + fit1 <- lmer(expr1, data=dat_long, na.action=na.exclude) + fit2 <- lmer(expr2, data=dat_long, na.action=na.exclude) + fits1[[i]] <- fit1 + names(fits1)[i] <- bcphens.t[i] + fits2[[i]] <- fit2 + names(fits2)[i] <- bcphens.t[i] + cat("\n##-------------------------") + cat(paste0(as.character(bcphens.t[i]), " : ")) + cat("-------------------------##\n") + cat("\n##------------") + cat("SUMMARY") + cat("--------------##\n") + print(summary(fit2)) + cat("\n##------------") + cat("ANOVA") + cat("--------------##\n") + print(anova(fit2,fit1)) + cat("\n##------------") + cat("RANOVA") + cat("--------------##\n") + print(ranova(fit2)) + summaries[[i]] <- as.data.frame(summary(fit2)[[10]][,5]) + summary_table <- cbind(summary_table, p=summaries[[i]]) +} + +colnames(summary_table) <- bcphens +summary_table <- t(summary_table) +xtable(format(summary_table, scientific = TRUE, digits=4)) +dat_long$Visit <- dat_long$Visit + 1 +``` + +## 6. Cytokine ratios + +Model the effects of age, sex, and visit on blood analyte ratios using nparLD; we omit IL12p40, NO and Hb, resulting in 15 proportions tested. + +```{r, results="hide", message=FALSE, warning=FALSE} +ratiotest <- c('TNFa','IFNg','IL6','IL12p70','IL10','GMCSF') +dat.ratios <- dat_long[,c("Subject_ID", "Sample", "age.years", "Age", "Sex", "Visit")] +ratio.combos <- t(combn(ratiotest,2)) +ratio.colnames <- paste(ratio.combos[,1], ratio.combos[,2], sep="/") +for(i in 1:length(ratio.colnames)){ + dat.ratios[,ratio.colnames[i]] <- dat_long[,ratio.combos[i,1]]/dat_long[,ratio.combos[i,2]] +} + +fits <- NULL +fit.df <- as.data.frame(matrix(data=NA, nrow=length(ratio.colnames),ncol=7)) +colnames(fit.df) <- c("Age", "Sex", "Visit", "Age:Sex", "Age:Visit", "Sex:Visit", "Age:Sex:Visit") +fit.df.wholeplot <- as.data.frame(matrix(data=NA, nrow=length(ratio.colnames),ncol=3)) +colnames(fit.df.wholeplot) <- c("Age", "Sex", "Age:Sex") + +for(i in 1:length(ratio.colnames)){ + phen <- ratio.colnames[i] + #tempdata <- droplevels(subset(dat.ratios, !is.na(data[phen]))) + complete.subjects <- names(summary(data$Subject_ID))[summary(data$Subject_ID)==2] + tempdata <- droplevels(subset(dat.ratios, Subject_ID %in% complete.subjects)) + y <- tempdata[,phen] + + time <- tempdata$Visit + group1 <- tempdata$Age + group2 <- tempdata$Sex + subject <- tempdata$Subject_ID + time.name <- "Visit" + group1.name <- "Age" + group2.name <- "Sex" + fit <- f2.ld.f1(y=y, time=time, group1=group1, group2=group2, + subject=subject, time.name=time.name, + group1.name=group1.name, + group2.name=group2.name, plot.RTE=FALSE) + fits[[i]] <- fit + fit.df[i,] <- fit$ANOVA.test$`p-value` + fit.df.wholeplot[i,] <- fit$ANOVA.test.mod.Box$`p-value` +} + +cat(paste0(phen, ": \n")); #print(fit$Wald.test); +rownames(fit.df) <- rownames(fit.df.wholeplot) <- ratio.colnames +``` + +```{r} +xtable(format(fit.df, scientific = TRUE, digits=4)) +print(fit.df) +print(fit.df.wholeplot) +``` + +Plot the p-values in a grid. + +```{r, echo=FALSE} +par(oma=c(0.1,0.1,0.1,0.1), mar=c(5,9,0.5,0.5)) +plot(1~1, xlim=c(0.5,7.5), ylim=rev(c(1,nrow(fit.df))), col="white", ylab="", xlab="",axes=FALSE) +for(i in 1:7){ + for(j in 1:nrow(fit.df)){ + if(fit.df[j,i] < 0.05){ + points(x=i,y=j, col=adjustcolor("darkblue", alpha.f=0.15), pch=15, cex=3) + } + if(fit.df[j,i] < 0.01){ + points(x=i,y=j, col=adjustcolor("darkblue", alpha.f=0.45), pch=15, cex=3) + } + if(fit.df[j,i] < 0.001){ + points(x=i,y=j, col="darkblue", pch=15, cex=3) + } + } +} +axis(side=2,at=c(1:nrow(fit.df)),labels=rownames(fit.df), las=2) +text(seq(1, 7, by=1)-0.1, par("usr")[3], labels = colnames(fit.df), + srt = 45, pos = 1, offset=1.5, xpd = TRUE) +axis(side=1, at=c(1:7), labels=rep("",7), las=1) +``` + +## 7. Treatment failure + +```{r, echo=FALSE} +dat$color.by.age <- ifelse(dat$age=="A", rgb(0,0,0,1,alpha=0.25), rgb(1,0,0,1,alpha=0.25)) +dat$color.by.age1 <- ifelse(dat$age=="A", rgb(0,0,0,1), rgb(1,0,0,1)) +dat$shape.by.sex <- ifelse(dat$sex=="M", 24, 21) +dat$shape.by.sex1 <- ifelse(dat$sex=="M", 17, 16) +dat$treatfail <- ifelse(dat$parasites.V2>10,1,0) + +phens.sub <- c("parasites", "Hb", "Nitrate.570", "TNFa", "IFNg", "IL6", "IL12p40", "IL12p70", + "IL10", "GMCSF") +new.rownames <- c("parasites", "hemoglobin", "nitric oxide", expression(paste("TNF-",alpha)), expression(paste("IFN-", gamma)), "IL-6", "IL-12 p40", "IL-12 p70", + "IL-10", "GM-CSF") + +par(mfrow=c(4,3), mar=c(3,3,2.5,1)) +i <- 1 +phen.v1 <- phens.sub[i] +phen.v2 <- paste0(phens.sub[i],".V2") +expr <- substitute(log(j+1,base=10) ~ log(i+1,base=10), list(i = as.name(phen.v1), j=as.name(phen.v2))) +plot(eval(expr), data=dat, main=new.rownames[i], xlab="", ylab="", pch=shape.by.sex1, col=color.by.age, cex=1.5, + ylim=range(log(dat[,phen.v1]+1, base=10), na.rm=TRUE), las=1) +points(eval(expr), data=subset(dat, treatfail==1), main=new.rownames[i], xlab="", ylab="", pch=shape.by.sex, bg=color.by.age1, col="black", cex=1.5, + ylim=range(log(dat[,phen.v1]+1, base=10), na.rm=TRUE)) +abline(a=0,b=1) +for(i in 2:(length(phens.sub))) { + phen.v1 <- phens.sub[i] + phen.v2 <- paste0(phens.sub[i],".V2") + expr <- substitute(log(j,base=10) ~ log(i,base=10), list(i = as.name(phen.v1), j=as.name(phen.v2))) + plot(eval(expr), data=dat, main=new.rownames[i], xlab="", ylab="", pch=shape.by.sex1, col=color.by.age, cex=1.5, las=1) + points(eval(expr), data=subset(dat, treatfail==1), main=new.rownames[i], xlab="", ylab="", pch=shape.by.sex, bg=color.by.age1, col="black", cex=1.5) + abline(a=0,b=1) +} +``` + +## 8. Continuous age effects within group + +Model the phenotypes vs. age for adults and infants at V1. + +```{r, results="hide"} +phens <- c('GMCSF', 'IFNg', 'IL10', 'IL12p40', 'IL12p70', + 'IL6', 'TNFa', 'Nitrate.570', + 'pfs25', 'pfs16', 'pfs230', 'Hb', 'malaria.Ab', 'parasites') + +phens <- phens[!(phens=="malaria.Ab")] +dat.I <- subset(dat, age="I") +dat.A <- subset(dat, age="A") + +cat("##-----------------##\n") +cat("INFANTS \n") +cat("##-----------------##\n") +for(i in 1:length(phens)){ + cat("##-----------------") + cat(phens[i]) + cat("-----------------## \n") + form <- paste0(phens[i], "~ age.years*sex") + try(fit1 <- lm(form, data=dat.I)) + try(print(summary(fit1))) + fit1 <- NULL +} + +for(i in 1:length(phens)){ + cat("##-----------------") + cat(phens[i]) + cat("-----------------## \n") + form <- paste0(phens[i], "~ age.years*sex") + try(fit1 <- lm(form, data=dat.A)) + try(print(summary(fit1))) + fit1 <- NULL +} +``` + +Model the phenotypes vs. age for adults and infants at V2. + +```{r, results="hide"} +phens.V2 <- paste0(phens, ".V2") +phens.V2 <- phens.V2[!(phens.V2 %in% c("pfs25.V2", "pfs16.V2", "pfs230.V2", "malaria.Ab.V2"))] + +cat("##-----------------##\n") +cat("INFANTS \n") +cat("##-----------------##\n") +for(i in 1:length(phens.V2)){ + cat("##-----------------") + cat(phens.V2[i]) + cat("-----------------## \n") + form <- paste0(phens.V2[i], "~ age.years*sex") + try(fit1 <- lm(form, data=dat.I)) + try(print(summary(fit1))) + fit1 <- NULL +} + +cat("##-----------------##\n") +cat("ADULTS \n") +cat("##-----------------##\n") +for(i in 1:length(phens.V2)){ + cat("##-----------------") + cat(phens.V2[i]) + cat("-----------------## \n") + form <- paste0(phens.V2[i], "~ age.years*sex") + try(fit1 <- lm(form, data=dat.A)) + try(print(summary(fit1))) + fit1 <- NULL +} +``` + +Model the log2FC of phenotypes vs. age for adults and infants. + +```{r, results="hide"} +dat$TNFa.FC <- log2(dat$TNFa.V2) - log2(dat$TNFa) +dat$IFNg.FC <- log2(dat$IFNg.V2) - log2(dat$IFNg) +dat$IL6.FC <- log2(dat$IL6.V2) - log2(dat$IL6) +dat$IL12p40.FC <- log2(dat$IL12p40.V2) - log2(dat$IL12p40) +dat$IL12p70.FC <- log2(dat$IL12p70.V2) - log2(dat$IL12p70) +dat$IL10.FC <- log2(dat$IL10.V2) - log2(dat$IL10) +dat$GMCSF.FC <- log2(dat$GMCSF.V2) - log2(dat$GMCSF) +dat$Hb.FC <- log2(dat$Hb.V2) - log2(dat$Hb) +dat$Nitrate.570.FC <- log2(dat$Nitrate.570.V2) - log2(dat$Nitrate.570) +phens <- c("TNFa", "IFNg", "IL6", "IL12p40", "IL12p70", "IL10", "GMCSF", "Hb", "Nitrate.570") +phens.FC <- paste0(phens, ".FC") +dat.I <- subset(dat, age="I") +dat.A <- subset(dat, age="A") + +cat("##-----------------##\n") +cat("INFANTS \n") +cat("##-----------------##\n") +for(i in 1:length(phens.FC)){ + cat("##-----------------") + cat(phens.FC[i]) + cat("-----------------## \n") + form <- paste0(phens.FC[i], "~ age.years*sex") + try(fit1 <- lm(form, data=dat.I)) + try(print(summary(fit1))) + fit1 <- NULL +} + +cat("##-----------------##\n") +cat("ADULTS \n") +cat("##-----------------##\n") +for(i in 1:length(phens.FC)){ + cat("##-----------------") + cat(phens.FC[i]) + cat("-----------------## \n") + form <- paste0(phens.FC[i], "~ age.years*sex") + try(fit1 <- lm(form, data=dat.A)) + try(print(summary(fit1))) + fit1 <- NULL +} +``` + +Plot the phenotypes vs. age for adults and infants - V1. + +```{r, message=FALSE, warning=FALSE} +dat.I <- droplevels(subset(dat, age=="I")) +dat.A <- droplevels(subset(dat, age=="A")) + +p1 <- ggplot(dat.I, aes_string(x="age.years", y="TNFa", colour="sex", shape="sex")) +p2 <- p1 + geom_point(aes(pch=sex), size=3) + geom_smooth(method=lm) + theme_classic() + geom_hline(yintercept=0, color = "darkgrey") + ylab(expression(paste("TNF-",alpha," (pg/mL)"))) +plot(p2) + +p1 <- ggplot(dat.A, aes_string(x="age.years", y="pfs16", colour="sex", shape="sex")) +p2 <- p1 + geom_point(aes(pch=sex), size=3) + geom_smooth(method=lm) + ylim(-75,200) + theme_classic() + geom_hline(yintercept=0, color = "darkgrey") + ylab("Pfs16 (gametocytes/uL)") +plot(p2) +``` + +Plot the phenotypes vs. age for adults and infants - V2. + +```{r, message=FALSE, warning=FALSE} +p1 <- ggplot(dat.A, aes_string(x="age.years", y="Nitrate.570.V2", colour="sex", shape="sex")) +p2 <- p1 + geom_point(aes(pch=sex), size=3) + geom_smooth(method=lm) + theme_classic() + geom_hline(yintercept=0, color = "darkgrey") + ylab("nitric oxide (uM)") +plot(p2) +``` + diff --git a/analysis/MalariaInfantStudyAnalysis.html b/analysis/MalariaInfantStudyAnalysis.html new file mode 100644 index 0000000..067e438 --- /dev/null +++ b/analysis/MalariaInfantStudyAnalysis.html @@ -0,0 +1,1161 @@ + + + + + + + + + + + + + + +Malaria Infant Paper Analysis + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + +
+

Load Packages

+
library(ggplot2)
+library(ggfortify)
+library(coin)
+library(xtable)
+library(pscl) # zero inflated poisson regression
+library(MASS) # multinomial ordinal probit regression / proportional odds logistic regression / Box-Cox
+library(ordinal) # cumulative link mixed model
+library(nparLD) # nonparametric rank-based statistics for longitudinal data
+library(lmerTest)
+library(psych)
+library(coin)
+
+
+

Load Data

+
dat <- read.csv("MIS_master_data_sheet_wide.csv")
+dat_long <- read.csv("MIS_master_data_sheet_long.csv")
+
+
+

PCA on raw phenotypes

+

To explore the data, we use PCA on a subset of the phenotypes that were collected from the most individuals.

+
select <- c('GMCSF','IFNg','IL10','IL12p40','IL12p70','IL6','TNFa','Nitrate.570','Hb')
+dat_sub <- dat_long[,select]
+dat_ref <- dat_long[complete.cases(dat_sub),]
+dat_sub <- dat_sub[complete.cases(dat_sub),]
+dat_ref$Visit <- as.factor(dat_ref$Visit)
+dat_ref$AgeVisit <- as.factor(paste(dat_ref$Age, dat_ref$Visit, sep="_"))
+pr <- princomp(dat_sub, cor=TRUE, scores=TRUE)
+summary(pr)
+
## Importance of components:
+##                           Comp.1    Comp.2    Comp.3     Comp.4     Comp.5
+## Standard deviation     1.6166372 1.5206210 1.1662880 0.93028163 0.85753381
+## Proportion of Variance 0.2903906 0.2569209 0.1511364 0.09615821 0.08170714
+## Cumulative Proportion  0.2903906 0.5473115 0.6984480 0.79460618 0.87631332
+##                            Comp.6     Comp.7     Comp.8     Comp.9
+## Standard deviation     0.69915024 0.53225219 0.48571599 0.32427872
+## Proportion of Variance 0.05431234 0.03147693 0.02621334 0.01168408
+## Cumulative Proportion  0.93062565 0.96210259 0.98831592 1.00000000
+
plot(pr, type="lines")
+

+
autoplot(pr, data = dat_ref, colour = 'AgeVisit', x=1, y=2, loadings=TRUE, loadings.label=TRUE)
+

+
autoplot(pr, data = dat_ref, colour = 'AgeVisit', x=2, y=3, loadings=TRUE, loadings.label=TRUE)
+

+
+
+

1. Parasite Load

+

Model the effects of age, considering sex, on parasite levels on V1, using the Wilcoxon test, among detectable samples only.

+
dat.sub <- subset(dat, parasites>0)
+fit1 <- wilcox_test(dat.sub$parasites ~ dat.sub$age | dat.sub$sex, distribution="exact"); pvalue(fit1)
+
## [1] 0.005679652
+

Model the effects of age and sex on parasite levels on V1, using zero-inflated Poisson regression.

+

We use the zero-inflated Poisson (ZIP) model (log link), with the binomial distribution to model the binary outcome of 0-inflation or not (probit link) (Zeileis 2008).

+
fit1 <- zeroinfl(round(parasites) ~ age * sex , data = dat, dist="poisson", link="probit")
+summary(fit1)
+
## 
+## Call:
+## zeroinfl(formula = round(parasites) ~ age * sex, data = dat, dist = "poisson", 
+##     link = "probit")
+## 
+## Pearson residuals:
+##    Min     1Q Median     3Q    Max 
+## -2.160 -1.967 -1.483  0.119 10.408 
+## 
+## Count model coefficients (poisson with log link):
+##              Estimate Std. Error z value Pr(>|z|)    
+## (Intercept)  9.987511   0.001957  5102.8   <2e-16 ***
+## ageI         1.284821   0.002233   575.3   <2e-16 ***
+## sexM        -1.721518   0.005692  -302.4   <2e-16 ***
+## ageI:sexM    2.012143   0.005851   343.9   <2e-16 ***
+## 
+## Zero-inflation model coefficients (binomial with probit link):
+##             Estimate Std. Error z value Pr(>|z|)  
+## (Intercept)  -0.8416     0.3689  -2.281   0.0225 *
+## ageI          0.3528     0.4932   0.715   0.4743  
+## sexM          0.1671     0.5393   0.310   0.7566  
+## ageI:sexM    -0.6073     0.7247  -0.838   0.4021  
+## ---
+## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
+## 
+## Number of iterations in BFGS optimization: 9 
+## Log-likelihood: -2.308e+06 on 8 Df
+

Model the effects of age on parasite levels on V2, using zero-inflated Poisson regression.

+
fit1 <- zeroinfl(round(parasites.V2) ~ age, data = dat, dist="poisson", link="probit")
+summary(fit1)
+
## 
+## Call:
+## zeroinfl(formula = round(parasites.V2) ~ age, data = dat, dist = "poisson", 
+##     link = "probit")
+## 
+## Pearson residuals:
+##     Min      1Q  Median      3Q     Max 
+## -0.4082 -0.4082 -0.2311 -0.1981 10.7672 
+## 
+## Count model coefficients (poisson with log link):
+##             Estimate Std. Error z value Pr(>|z|)    
+## (Intercept)   3.9890     0.1361   29.31   <2e-16 ***
+## ageI          5.8160     0.1361   42.72   <2e-16 ***
+## 
+## Zero-inflation model coefficients (binomial with probit link):
+##             Estimate Std. Error z value Pr(>|z|)    
+## (Intercept)   1.7688     0.4519   3.915 9.06e-05 ***
+## ageI         -0.7013     0.5386  -1.302    0.193    
+## ---
+## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
+## 
+## Number of iterations in BFGS optimization: 16 
+## Log-likelihood: -9.148e+04 on 4 Df
+
+
+

2. Gametocytes

+

Check the correlation of the Pfs phenotypes.

+
p <- corr.test(log(dat[,c("pfs16", "pfs25", "pfs230")]))$p; p
+
##             pfs16      pfs25 pfs230
+## pfs16  0.00000000 0.08389135      1
+## pfs25  0.02796378 0.00000000      1
+## pfs230 0.66050611 0.94653555      0
+

Model the effects of age and sex on pfs16, pfs25, and pfs230 levels on V1 using the Wilcoxon test

+
fit1 <- wilcox_test(dat$pfs16 ~ dat$age | dat$sex, distribution="exact"); pvalue(fit1)
+
## [1] 0.1095546
+
fit2 <- wilcox_test(dat$pfs25 ~ dat$age | dat$sex, distribution="exact"); pvalue(fit2)
+
## [1] 0.006847807
+
fit3 <- wilcox_test(dat$pfs230 ~ dat$age | dat$sex, distribution="exact"); pvalue(fit3)
+
## [1] 0.2377015
+
median(subset(dat,age=="A")$pfs25, na.rm=TRUE); median(subset(dat,age=="I")$pfs25, na.rm=TRUE);
+
## [1] 0.255
+
## [1] 0.465
+

Check whether the results are the same under distributional assumptions, using lm()

+
fit1 <- lm(log(dat$pfs16) ~ age*sex, data=dat); anova(fit1)
+
## Analysis of Variance Table
+## 
+## Response: log(dat$pfs16)
+##           Df Sum Sq Mean Sq F value  Pr(>F)  
+## age        1  3.252  3.2520  3.1852 0.08014 .
+## sex        1  0.585  0.5853  0.5732 0.45240  
+## age:sex    1  0.714  0.7135  0.6988 0.40700  
+## Residuals 52 53.092  1.0210                  
+## ---
+## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+
fit2 <- lm(log(dat$pfs25) ~ age*sex, data=dat); anova(fit2)
+
## Analysis of Variance Table
+## 
+## Response: log(dat$pfs25)
+##           Df Sum Sq Mean Sq F value   Pr(>F)   
+## age        1 14.361 14.3608  8.2920 0.005769 **
+## sex        1  1.841  1.8410  1.0630 0.307308   
+## age:sex    1  3.244  3.2442  1.8732 0.176990   
+## Residuals 52 90.058  1.7319                    
+## ---
+## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+
fit3 <- lm(log(dat$pfs230) ~ age*sex, data=dat); anova(fit3)
+
## Analysis of Variance Table
+## 
+## Response: log(dat$pfs230)
+##           Df  Sum Sq Mean Sq F value Pr(>F)
+## age        1   32.51  32.509  1.6078 0.2106
+## sex        1    0.09   0.086  0.0042 0.9483
+## age:sex    1    0.88   0.877  0.0434 0.8359
+## Residuals 51 1031.23  20.220
+
+
+

3. Antimalarial antibody

+

Model the effects of age and sex on antibody test results at V1 using multinomial ordinal probit / logistic regression

+
dat.sub <- subset(dat, !is.na(malaria.Ab.result))
+levels(dat.sub$malaria.Ab.result) <- c("neg", "grey", "pos")
+with(dat.sub, table(malaria.Ab.result, sex, age))
+
## , , age = A
+## 
+##                  sex
+## malaria.Ab.result  F  M
+##              neg   0  1
+##              grey  3  2
+##              pos  12 10
+## 
+## , , age = I
+## 
+##                  sex
+## malaria.Ab.result  F  M
+##              neg   1  1
+##              grey  8  5
+##              pos   7 10
+
fit1 <- polr(malaria.Ab.result ~ age*sex, data=dat.sub, method="logistic")
+fit2 <- polr(malaria.Ab.result ~ age + sex, data=dat.sub, method="logistic")
+fit3 <- polr(malaria.Ab.result ~ sex, data=dat.sub, method="logistic")
+fit4 <- polr(malaria.Ab.result ~ 1, data=dat.sub, method="logistic")
+anova(fit4,fit3,fit2,fit1)
+
## Likelihood ratio tests of ordinal regression models
+## 
+## Response: malaria.Ab.result
+##       Model Resid. df Resid. Dev   Test    Df  LR stat.    Pr(Chi)
+## 1         1        58   94.91848                                  
+## 2       sex        57   94.68500 1 vs 2     1 0.2334787 0.62895634
+## 3 age + sex        56   90.39413 2 vs 3     1 4.2908745 0.03831745
+## 4 age * sex        55   89.72993 3 vs 4     1 0.6641964 0.41508237
+

Model the effects of age and sex on antibody test results at V2 using multinomial probit / logistic regression.

+
dat.sub <- subset(dat, !is.na(malaria.Ab.result.V2))
+levels(dat.sub$malaria.Ab.result.V2) <- c("neg", "grey", "pos")
+with(dat.sub, table(malaria.Ab.result.V2, sex, age))
+
## , , age = A
+## 
+##                     sex
+## malaria.Ab.result.V2  F  M
+##                 neg   2  0
+##                 grey  3  4
+##                 pos  10  9
+## 
+## , , age = I
+## 
+##                     sex
+## malaria.Ab.result.V2  F  M
+##                 neg   0  1
+##                 grey  7  5
+##                 pos   8  6
+
fit1 <- polr(malaria.Ab.result.V2 ~ age*sex, data=dat.sub, method="logistic")
+fit2 <- polr(malaria.Ab.result.V2 ~ age + sex, data=dat.sub, method="logistic")
+fit3 <- polr(malaria.Ab.result.V2 ~ sex, data=dat.sub, method="logistic")
+fit4 <- polr(malaria.Ab.result.V2 ~ 1, data=dat.sub, method="logistic")
+anova(fit4,fit3,fit2,fit1)
+
## Likelihood ratio tests of ordinal regression models
+## 
+## Response: malaria.Ab.result.V2
+##       Model Resid. df Resid. Dev   Test    Df    LR stat.   Pr(Chi)
+## 1         1        53   91.55680                                   
+## 2       sex        52   91.55054 1 vs 2     1 0.006256122 0.9369565
+## 3 age + sex        51   90.50029 2 vs 3     1 1.050246129 0.3054504
+## 4 age * sex        50   90.22810 3 vs 4     1 0.272190878 0.6018659
+

Model the effects of age, sex, and visit, together, using a cumulative link mixed model (CLMM)/ordinal probit regression.

+
dat.sub <- subset(dat_long, !is.na(malaria.Ab.result))
+levels(dat.sub$malaria.Ab.result) <- c("neg", "grey", "pos")
+with(dat.sub, table(malaria.Ab.result, Sex, Age, Visit))
+
## , , Age = A, Visit = 1
+## 
+##                  Sex
+## malaria.Ab.result  F  M
+##              neg   0  1
+##              grey  3  2
+##              pos  12 10
+## 
+## , , Age = I, Visit = 1
+## 
+##                  Sex
+## malaria.Ab.result  F  M
+##              neg   1  1
+##              grey  8  5
+##              pos   7 10
+## 
+## , , Age = A, Visit = 2
+## 
+##                  Sex
+## malaria.Ab.result  F  M
+##              neg   2  0
+##              grey  3  4
+##              pos  10  9
+## 
+## , , Age = I, Visit = 2
+## 
+##                  Sex
+## malaria.Ab.result  F  M
+##              neg   0  1
+##              grey  7  5
+##              pos   8  6
+
fit1 <- clmm(malaria.Ab.result ~ Age*Sex*Visit + (1|Subject_ID), data=dat.sub, Hess=TRUE, link="probit",
+              nAGQ=10)
+summary(fit1)
+
## Cumulative Link Mixed Model fitted with the adaptive Gauss-Hermite 
+## quadrature approximation with 10 quadrature points 
+## 
+## formula: malaria.Ab.result ~ Age * Sex * Visit + (1 | Subject_ID)
+## data:    dat.sub
+## 
+##  link   threshold nobs logLik AIC    niter     max.grad cond.H 
+##  probit flexible  115  -83.00 186.01 518(1557) 2.32e-05 1.5e+03
+## 
+## Random effects:
+##  Groups     Name        Variance Std.Dev.
+##  Subject_ID (Intercept) 2.221    1.49    
+## Number of groups:  Subject_ID 60 
+## 
+## Coefficients:
+##                 Estimate Std. Error z value Pr(>|z|)  
+## AgeI             -3.5607     1.6383  -2.173   0.0298 *
+## SexM             -1.8474     1.7177  -1.076   0.2821  
+## Visit            -1.2559     0.6978  -1.800   0.0719 .
+## AgeI:SexM         3.5755     2.2323   1.602   0.1092  
+## AgeI:Visit        1.7113     0.8885   1.926   0.0541 .
+## SexM:Visit        1.2400     0.9672   1.282   0.1998  
+## AgeI:SexM:Visit  -2.3611     1.2978  -1.819   0.0689 .
+## ---
+## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+## 
+## Threshold coefficients:
+##          Estimate Std. Error z value
+## neg|grey   -5.283      1.563  -3.379
+## grey|pos   -3.036      1.354  -2.243
+
+
+

4. Plasma cytokines

+

Model the effects of age, sex, and visit on plasma cytokine levels using nparLD (nonparametric).

+
data <- dat_long
+host.secreted <- c('TNFa', 'IFNg', 'IL6', 'IL12p40', 'IL12p70', 'IL10',
+                   'GMCSF', 'Hb', 'Nitrate.570')
+
+fits <- NULL
+fit.df <- as.data.frame(matrix(data=NA, nrow=length(host.secreted),ncol=7))
+colnames(fit.df) <- c("Age", "Sex", "Visit", "Age:Sex", "Age:Visit", "Sex:Visit", "Age:Sex:Visit")
+fit.df.wholeplot <- as.data.frame(matrix(data=NA, nrow=length(host.secreted),ncol=3))
+colnames(fit.df.wholeplot) <- c("Age", "Sex", "Age:Sex")
+
+for(i in 1:length(host.secreted)){
+  phen <- host.secreted[i]
+  tempdata <- droplevels(subset(data, !is.na(data[phen])))
+  complete.subjects <- names(summary(data$Subject_ID))[summary(data$Subject_ID)==2]
+  tempdata <- droplevels(subset(data, Subject_ID %in% complete.subjects))
+  y <- tempdata[,phen]
+  
+  time <- tempdata$Visit
+  group1 <- tempdata$Age
+  group2 <- tempdata$Sex
+  subject <- tempdata$Subject_ID
+  time.name <- "Visit"
+  group1.name <- "Age"
+  group2.name <- "Sex"
+  fit <-  f2.ld.f1(y=y, time=time, group1=group1, group2=group2,
+                   subject=subject, time.name=time.name, 
+                   group1.name=group1.name,
+                   group2.name=group2.name, plot.RTE=FALSE)
+  fits[[i]] <- fit
+  fit.df[i,] <- fit$ANOVA.test$`p-value`
+  fit.df.wholeplot[i,] <- fit$ANOVA.test.mod.Box$`p-value`
+}
+
+cat(paste0(phen, ": \n")); #print(fit$Wald.test); 
+rownames(fit.df) <- rownames(fit.df.wholeplot) <- host.secreted
+print(fit.df)
+xtable(format(fit.df, scientific = TRUE, digits=4))
+print(fit.df.wholeplot)
+

Plot the p-values, colored by significance thresholds

+
par(oma=c(0.1,3,0.1,0.1), mar=c(5,3,0.5,0.5))
+plot(1~1, xlim=c(0.5,7.5), ylim=rev(c(1,length(host.secreted)+1)), col="white", ylab="", xlab="",axes=FALSE)
+
## Warning in plot.formula(1 ~ 1, xlim = c(0.5, 7.5), ylim = rev(c(1,
+## length(host.secreted) + : the formula '1 ~ 1' is treated as '1 ~ 1'
+
for(i in 1:7){
+  for(j in 1:length(host.secreted)){
+    if(fit.df[j,i] < 0.05){
+      points(x=i,y=j, col=adjustcolor("darkblue", alpha.f=0.15), pch=15, cex=3)
+    }
+    if(fit.df[j,i] < 0.01){
+      points(x=i,y=j, col=adjustcolor("darkblue", alpha.f=0.45), pch=15, cex=3)
+    }
+    if(fit.df[j,i] < 0.001){
+      points(x=i,y=j, col="darkblue", pch=15, cex=3)
+    }
+  }
+}
+new.rownames <- c(expression(paste("TNF-",alpha)), expression(paste("IFN-", gamma)), "IL-6", "IL-12 p40", 
+                  "IL-12 p70", "IL-10", "GM-CSF", "hemoglobin", "nitric oxide")
+axis(side=2,at=c(1:length(host.secreted)),labels=new.rownames, las=2)
+text(seq(1, 7, by=1)-0.1, par("usr")[3] + 0.5, labels = colnames(fit.df), 
+     srt = 45, pos = 1, offset=1.5, xpd = TRUE)
+axis(side=1, at=c(1:7), labels=rep("",7), las=1)
+

+
+
+

5. Cell composition phenotypes

+

Model the effects of age, sex, and visit on cellular phenotypes using nparLD (nonparametric).

+
data <- dat_long
+host.cellular <- c('CD33.live', 'mDC.live', 'monocytes.live', 'inflam.CD163', 
+                   'patrol.CD163', 'trad.CD163', 'low.traditional')
+fits <- NULL
+fit.df <- as.data.frame(matrix(data=NA, nrow=length(host.cellular),ncol=7))
+colnames(fit.df) <- c("Age", "Sex", "Visit", "Age:Sex", "Age:Visit", "Sex:Visit", "Age:Sex:Visit")
+fit.df.wholeplot <- as.data.frame(matrix(data=NA, nrow=length(host.cellular),ncol=3))
+colnames(fit.df.wholeplot) <- c("Age", "Sex", "Age:Sex")
+
+for(i in 1:length(host.cellular)){
+  phen <- host.cellular[i]
+  tempdata <- droplevels(subset(data, !is.na(data[phen])))
+  complete.subjects <- names(summary(data$Subject_ID))[summary(data$Subject_ID)==2]
+  tempdata <- droplevels(subset(data, Subject_ID %in% complete.subjects))
+  y <- tempdata[,phen]
+  
+  time <- tempdata$Visit
+  group1 <- tempdata$Age
+  group2 <- tempdata$Sex
+  subject <- tempdata$Subject_ID
+  time.name <- "Visit"
+  group1.name <- "Age"
+  group2.name <- "Sex"
+  fit <-  f2.ld.f1(y=y, time=time, group1=group1, group2=group2,
+                   subject=subject, time.name=time.name, 
+                   group1.name=group1.name,
+                   group2.name=group2.name, plot.RTE=FALSE)
+  fits[[i]] <- fit
+  fit.df[i,] <- fit$ANOVA.test$`p-value`
+  fit.df.wholeplot[i,] <- fit$ANOVA.test.mod.Box$`p-value`
+}
+
+cat(paste0(phen, ": \n")); #print(fit$Wald.test); 
+rownames(fit.df) <- rownames(fit.df.wholeplot) <- host.cellular
+
xtable(format(fit.df, scientific = TRUE, digits=4))
+
## % latex table generated in R 3.5.1 by xtable 1.8-3 package
+## % Tue Mar  5 08:28:27 2019
+## \begin{table}[ht]
+## \centering
+## \begin{tabular}{rlllllll}
+##   \hline
+##  & Age & Sex & Visit & Age:Sex & Age:Visit & Sex:Visit & Age:Sex:Visit \\ 
+##   \hline
+## CD33.live & 1.234e-01 & 3.144e-01 & 6.351e-02 & 8.146e-01 & 5.770e-01 & 3.976e-01 & 5.409e-01 \\ 
+##   mDC.live & 4.666e-02 & 9.955e-01 & 6.032e-08 & 2.024e-01 & 4.282e-02 & 7.180e-01 & 7.882e-01 \\ 
+##   monocytes.live & 1.903e-01 & 3.264e-01 & 1.303e-01 & 9.151e-01 & 4.617e-01 & 3.037e-01 & 5.550e-01 \\ 
+##   inflam.CD163 & 1.269e-01 & 3.639e-01 & 7.735e-01 & 9.002e-01 & 1.780e-01 & 5.000e-01 & 5.468e-01 \\ 
+##   patrol.CD163 & 7.971e-01 & 4.551e-01 & 1.168e-05 & 3.814e-01 & 1.104e-01 & 2.660e-01 & 9.464e-01 \\ 
+##   trad.CD163 & 1.072e-01 & 1.738e-01 & 7.886e-01 & 9.705e-01 & 8.510e-01 & 6.950e-01 & 9.854e-01 \\ 
+##   low.traditional & 4.337e-01 & 9.392e-01 & 1.648e-02 & 2.313e-01 & 4.256e-01 & 3.040e-01 & 2.756e-01 \\ 
+##    \hline
+## \end{tabular}
+## \end{table}
+
print(fit.df.wholeplot)
+
##                       Age       Sex   Age:Sex
+## CD33.live       0.1299105 0.3194342 0.8156053
+## mDC.live        0.0520493 0.9955597 0.2082021
+## monocytes.live  0.1964811 0.3313265 0.9155898
+## inflam.CD163    0.1335887 0.3685465 0.9007352
+## patrol.CD163    0.7981877 0.4585976 0.3855759
+## trad.CD163      0.1135216 0.1799019 0.9706147
+## low.traditional 0.4372928 0.9395174 0.2368036
+

Plot the p-values (colored by significance thresholds)

+
par(oma=c(0.1,0.1,0.1,0.1), mar=c(5,13,0.5,0.5))
+plot(1~1, xlim=c(0.5,7.5), ylim=rev(c(1,8)), col="white", ylab="", xlab="",axes=FALSE)
+
## Warning in plot.formula(1 ~ 1, xlim = c(0.5, 7.5), ylim = rev(c(1, 8)), :
+## the formula '1 ~ 1' is treated as '1 ~ 1'
+
for(i in 1:7){
+  for(j in 1:7){
+    if(fit.df[j,i] < 0.05){
+      points(x=i,y=j, col=adjustcolor("darkblue", alpha.f=0.15), pch=15, cex=3)
+    }
+    if(fit.df[j,i] < 0.01){
+      points(x=i,y=j, col=adjustcolor("darkblue", alpha.f=0.45), pch=15, cex=3)
+    }
+    if(fit.df[j,i] < 0.001){
+      points(x=i,y=j, col="darkblue", pch=15, cex=3)
+    }
+  }
+}
+new.rownames <- c("CD33+ (% of live)",
+                  "mDC (% of live)",
+                  "monocytes (% of live)",
+                  "infl. mono. (% of CD163+)",
+                  "patrol. mono. (% of CD163+)",
+                  "trad. mono. (% of CD163+)",
+                  "CD14^low (% of trad. mono.)")
+axis(side=2,at=c(1:7),labels=new.rownames, las=2)
+text(seq(1, 7, by=1)-0.1, par("usr")[3], labels = colnames(fit.df), 
+     srt = 45, pos = 1, offset=1.5, xpd = TRUE)
+axis(side=1, at=c(1:7), labels=rep("",7), las=1)
+

+

In order to use a parametric (linear mixed model) with our data (lmer) we need to deal with heteroskedastic residuals. We can find a power transform that helps normalize them using Box-Cox analysis (Box and Cox, 1964).

+

+

Based on the Box-Cox analysis, choose a sensible transform proximal to the lambda value (+/- sqrt, +/- 1/3 root, log, no transform, etc.): Use log (natural) for lambda ~ 0, and no transform for lambda ~ 1.

+

+

Try lmer:

+
dat_long$Visit <- dat_long$Visit - 1 
+fits1 <- fits2 <- summaries <- list()
+row_names <- c("(Intercept)", "AgeI", "SexM", "Visit", "AgeI:SexM",
+               "AgeI:Visit", "SexM:Visit", "AgeI:SexM:Visit")
+summary_table <- data.frame(row.names = row_names)
+#for(i in c(1:10,18:20)){
+for(i in 1:length(bcphens.t)){
+  # Subject-level random intercepts will absorb all the age-specific variation, so we leave them out
+  # and instead estimate global age-specific effects, and only model the within-subject (visit) slopes
+  expr1 <- paste0(bcphens.t[i] , "~ Sex*Visit + (0+Visit|Subject_ID)")
+  expr2 <- paste0(bcphens.t[i], "~ Age*Sex*Visit + (0+Visit|Subject_ID)")
+  fit1 <- lmer(expr1, data=dat_long, na.action=na.exclude)
+  fit2 <- lmer(expr2, data=dat_long, na.action=na.exclude)
+  fits1[[i]] <- fit1
+  names(fits1)[i] <- bcphens.t[i]
+  fits2[[i]] <- fit2
+  names(fits2)[i] <- bcphens.t[i]
+  cat("\n##-------------------------")
+  cat(paste0(as.character(bcphens.t[i]), " : "))
+  cat("-------------------------##\n")
+  cat("\n##------------")
+  cat("SUMMARY")
+  cat("--------------##\n")
+  print(summary(fit2))
+  cat("\n##------------")
+  cat("ANOVA")
+  cat("--------------##\n")
+  print(anova(fit2,fit1))
+  cat("\n##------------")
+  cat("RANOVA")
+  cat("--------------##\n")
+  print(ranova(fit2))
+  summaries[[i]] <- as.data.frame(summary(fit2)[[10]][,5])
+  summary_table <- cbind(summary_table, p=summaries[[i]])
+}
+
+colnames(summary_table) <- bcphens
+summary_table <- t(summary_table)
+xtable(format(summary_table, scientific = TRUE, digits=4))
+dat_long$Visit <- dat_long$Visit + 1
+
+
+

6. Cytokine ratios

+

Model the effects of age, sex, and visit on blood analyte ratios using nparLD; we omit IL12p40, NO and Hb, resulting in 15 proportions tested.

+
ratiotest <- c('TNFa','IFNg','IL6','IL12p70','IL10','GMCSF')
+dat.ratios <- dat_long[,c("Subject_ID", "Sample", "age.years", "Age", "Sex", "Visit")]
+ratio.combos <- t(combn(ratiotest,2))
+ratio.colnames <- paste(ratio.combos[,1], ratio.combos[,2], sep="/")
+for(i in 1:length(ratio.colnames)){
+  dat.ratios[,ratio.colnames[i]] <- dat_long[,ratio.combos[i,1]]/dat_long[,ratio.combos[i,2]]
+}
+
+fits <- NULL
+fit.df <- as.data.frame(matrix(data=NA, nrow=length(ratio.colnames),ncol=7))
+colnames(fit.df) <- c("Age", "Sex", "Visit", "Age:Sex", "Age:Visit", "Sex:Visit", "Age:Sex:Visit")
+fit.df.wholeplot <- as.data.frame(matrix(data=NA, nrow=length(ratio.colnames),ncol=3))
+colnames(fit.df.wholeplot) <- c("Age", "Sex", "Age:Sex")
+
+for(i in 1:length(ratio.colnames)){
+  phen <- ratio.colnames[i]
+  #tempdata <- droplevels(subset(dat.ratios, !is.na(data[phen])))
+  complete.subjects <- names(summary(data$Subject_ID))[summary(data$Subject_ID)==2]
+  tempdata <- droplevels(subset(dat.ratios, Subject_ID %in% complete.subjects))
+  y <- tempdata[,phen]
+  
+  time <- tempdata$Visit
+  group1 <- tempdata$Age
+  group2 <- tempdata$Sex
+  subject <- tempdata$Subject_ID
+  time.name <- "Visit"
+  group1.name <- "Age"
+  group2.name <- "Sex"
+  fit <-  f2.ld.f1(y=y, time=time, group1=group1, group2=group2,
+                   subject=subject, time.name=time.name, 
+                   group1.name=group1.name,
+                   group2.name=group2.name, plot.RTE=FALSE)
+  fits[[i]] <- fit
+  fit.df[i,] <- fit$ANOVA.test$`p-value`
+  fit.df.wholeplot[i,] <- fit$ANOVA.test.mod.Box$`p-value`
+}
+
+cat(paste0(phen, ": \n")); #print(fit$Wald.test); 
+rownames(fit.df) <- rownames(fit.df.wholeplot) <- ratio.colnames
+
xtable(format(fit.df, scientific = TRUE, digits=4))
+
## % latex table generated in R 3.5.1 by xtable 1.8-3 package
+## % Tue Mar  5 08:28:34 2019
+## \begin{table}[ht]
+## \centering
+## \begin{tabular}{rlllllll}
+##   \hline
+##  & Age & Sex & Visit & Age:Sex & Age:Visit & Sex:Visit & Age:Sex:Visit \\ 
+##   \hline
+## TNFa/IFNg & 9.121e-02 & 5.852e-01 & 2.680e-03 & 2.673e-02 & 8.262e-03 & 5.840e-01 & 1.691e-02 \\ 
+##   TNFa/IL6 & 2.857e-03 & 6.443e-01 & 6.791e-01 & 9.455e-02 & 2.693e-02 & 1.370e-01 & 6.000e-01 \\ 
+##   TNFa/IL12p70 & 5.861e-08 & 4.937e-01 & 3.807e-18 & 3.270e-01 & 4.423e-01 & 3.774e-01 & 4.588e-01 \\ 
+##   TNFa/IL10 & 2.706e-02 & 6.958e-01 & 4.555e-16 & 8.375e-01 & 2.540e-01 & 9.063e-01 & 4.831e-02 \\ 
+##   TNFa/GMCSF & 1.907e-05 & 4.098e-01 & 8.120e-13 & 8.466e-01 & 5.674e-01 & 9.900e-01 & 2.218e-01 \\ 
+##   IFNg/IL6 & 3.077e-01 & 9.418e-01 & 1.782e-03 & 9.354e-01 & 3.535e-01 & 1.892e-02 & 3.071e-01 \\ 
+##   IFNg/IL12p70 & 1.128e-03 & 7.762e-01 & 5.501e-06 & 1.244e-01 & 2.191e-02 & 4.324e-01 & 8.849e-04 \\ 
+##   IFNg/IL10 & 1.034e-03 & 6.961e-01 & 2.844e-19 & 6.569e-01 & 6.257e-01 & 8.150e-01 & 5.466e-01 \\ 
+##   IFNg/GMCSF & 2.533e-02 & 3.089e-01 & 1.093e-03 & 3.406e-02 & 3.338e-01 & 9.142e-01 & 9.116e-04 \\ 
+##   IL6/IL12p70 & 2.524e-01 & 8.248e-01 & 1.500e-11 & 6.402e-01 & 1.385e-04 & 3.185e-01 & 1.507e-01 \\ 
+##   IL6/IL10 & 5.282e-05 & 3.498e-01 & 3.055e-19 & 8.788e-01 & 5.374e-01 & 1.644e-01 & 1.252e-02 \\ 
+##   IL6/GMCSF & 3.188e-01 & 5.405e-01 & 1.493e-10 & 2.702e-01 & 8.994e-04 & 7.862e-02 & 3.733e-01 \\ 
+##   IL12p70/IL10 & 7.763e-06 & 5.190e-01 & 4.754e-22 & 8.536e-01 & 5.150e-01 & 7.358e-01 & 1.019e-01 \\ 
+##   IL12p70/GMCSF & 6.258e-01 & 9.159e-01 & 9.159e-02 & 2.446e-01 & 4.148e-01 & 8.555e-01 & 3.113e-01 \\ 
+##   IL10/GMCSF & 2.796e-05 & 4.987e-01 & 1.242e-22 & 7.694e-01 & 6.191e-01 & 8.830e-01 & 7.485e-02 \\ 
+##    \hline
+## \end{tabular}
+## \end{table}
+
print(fit.df)
+
##                        Age       Sex        Visit    Age:Sex    Age:Visit
+## TNFa/IFNg     9.120635e-02 0.5852038 2.679570e-03 0.02673352 0.0082616320
+## TNFa/IL6      2.856784e-03 0.6442682 6.790523e-01 0.09455301 0.0269344575
+## TNFa/IL12p70  5.860592e-08 0.4937122 3.807258e-18 0.32700283 0.4422558598
+## TNFa/IL10     2.706203e-02 0.6957773 4.555445e-16 0.83749812 0.2539669683
+## TNFa/GMCSF    1.906851e-05 0.4097557 8.119534e-13 0.84663218 0.5673586057
+## IFNg/IL6      3.077146e-01 0.9417818 1.781863e-03 0.93540234 0.3535413331
+## IFNg/IL12p70  1.128398e-03 0.7761798 5.501030e-06 0.12440072 0.0219111074
+## IFNg/IL10     1.034297e-03 0.6960740 2.843759e-19 0.65692822 0.6256864901
+## IFNg/GMCSF    2.532751e-02 0.3089323 1.092911e-03 0.03405905 0.3338340221
+## IL6/IL12p70   2.524049e-01 0.8248457 1.499572e-11 0.64024467 0.0001385155
+## IL6/IL10      5.282103e-05 0.3498184 3.055106e-19 0.87884185 0.5373833237
+## IL6/GMCSF     3.188063e-01 0.5405496 1.492876e-10 0.27020448 0.0008994290
+## IL12p70/IL10  7.762802e-06 0.5190226 4.753808e-22 0.85362176 0.5150001829
+## IL12p70/GMCSF 6.257536e-01 0.9158885 9.158735e-02 0.24463452 0.4147820477
+## IL10/GMCSF    2.796421e-05 0.4986923 1.241835e-22 0.76941913 0.6190910080
+##                Sex:Visit Age:Sex:Visit
+## TNFa/IFNg     0.58400643  0.0169148255
+## TNFa/IL6      0.13702180  0.6000227758
+## TNFa/IL12p70  0.37742929  0.4588496621
+## TNFa/IL10     0.90633123  0.0483083382
+## TNFa/GMCSF    0.99001948  0.2217603615
+## IFNg/IL6      0.01892378  0.3071071312
+## IFNg/IL12p70  0.43237932  0.0008848825
+## IFNg/IL10     0.81496355  0.5466323136
+## IFNg/GMCSF    0.91417711  0.0009115823
+## IL6/IL12p70   0.31854725  0.1506788411
+## IL6/IL10      0.16435034  0.0125232641
+## IL6/GMCSF     0.07861986  0.3733004096
+## IL12p70/IL10  0.73584373  0.1018831981
+## IL12p70/GMCSF 0.85554122  0.3112755688
+## IL10/GMCSF    0.88302614  0.0748478475
+
print(fit.df.wholeplot)
+
##                        Age       Sex    Age:Sex
+## TNFa/IFNg     9.756590e-02 0.5876821 0.03141559
+## TNFa/IL6      4.571634e-03 0.6464594 0.10138971
+## TNFa/IL12p70  1.759777e-06 0.4969169 0.33178815
+## TNFa/IL10     3.242638e-02 0.6977063 0.83846500
+## TNFa/GMCSF    1.207824e-04 0.4148172 0.84764572
+## IFNg/IL6      3.126348e-01 0.9420737 0.93572656
+## IFNg/IL12p70  2.037338e-03 0.7773628 0.13075292
+## IFNg/IL10     1.945056e-03 0.6978248 0.65895016
+## IFNg/GMCSF    3.075933e-02 0.3148248 0.04008269
+## IL6/IL12p70   2.581658e-01 0.8257988 0.64239146
+## IL6/IL10      1.848243e-04 0.3543693 0.87946316
+## IL6/GMCSF     3.243046e-01 0.5437265 0.27625071
+## IL12p70/IL10  5.083825e-05 0.5222403 0.85443645
+## IL12p70/GMCSF 6.283891e-01 0.9164109 0.25145816
+## IL10/GMCSF    1.250330e-04 0.5020743 0.77073520
+

Plot the p-values in a grid.

+
## Warning in plot.formula(1 ~ 1, xlim = c(0.5, 7.5), ylim = rev(c(1,
+## nrow(fit.df))), : the formula '1 ~ 1' is treated as '1 ~ 1'
+

+
+
+

7. Treatment failure

+

+
+
+

8. Continuous age effects within group

+

Model the phenotypes vs. age for adults and infants at V1.

+
phens <- c('GMCSF', 'IFNg', 'IL10', 'IL12p40', 'IL12p70', 
+           'IL6', 'TNFa', 'Nitrate.570',  
+           'pfs25', 'pfs16', 'pfs230', 'Hb', 'malaria.Ab', 'parasites')
+
+phens <- phens[!(phens=="malaria.Ab")]
+dat.I <- subset(dat, age="I")
+dat.A <- subset(dat, age="A")
+
+cat("##-----------------##\n")
+cat("INFANTS \n")
+cat("##-----------------##\n")
+for(i in 1:length(phens)){
+  cat("##-----------------")
+  cat(phens[i])
+  cat("-----------------## \n")
+  form <- paste0(phens[i], "~ age.years*sex")
+  try(fit1 <- lm(form, data=dat.I))
+  try(print(summary(fit1)))
+  fit1 <- NULL
+}
+
+for(i in 1:length(phens)){
+  cat("##-----------------")
+  cat(phens[i])
+  cat("-----------------## \n")
+  form <- paste0(phens[i], "~ age.years*sex")
+  try(fit1 <- lm(form, data=dat.A))
+  try(print(summary(fit1)))
+  fit1 <- NULL
+}
+

Model the phenotypes vs. age for adults and infants at V2.

+
phens.V2 <- paste0(phens, ".V2")
+phens.V2 <- phens.V2[!(phens.V2 %in% c("pfs25.V2", "pfs16.V2", "pfs230.V2", "malaria.Ab.V2"))]
+
+cat("##-----------------##\n")
+cat("INFANTS \n")
+cat("##-----------------##\n")
+for(i in 1:length(phens.V2)){
+  cat("##-----------------")
+  cat(phens.V2[i])
+  cat("-----------------## \n")
+  form <- paste0(phens.V2[i], "~ age.years*sex")
+  try(fit1 <- lm(form, data=dat.I))
+  try(print(summary(fit1)))
+  fit1 <- NULL
+}
+
+cat("##-----------------##\n")
+cat("ADULTS \n")
+cat("##-----------------##\n")
+for(i in 1:length(phens.V2)){
+  cat("##-----------------")
+  cat(phens.V2[i])
+  cat("-----------------## \n")
+  form <- paste0(phens.V2[i], "~ age.years*sex")
+  try(fit1 <- lm(form, data=dat.A))
+  try(print(summary(fit1)))
+  fit1 <- NULL
+}
+

Model the log2FC of phenotypes vs. age for adults and infants.

+
dat$TNFa.FC <- log2(dat$TNFa.V2) - log2(dat$TNFa)
+dat$IFNg.FC <- log2(dat$IFNg.V2) - log2(dat$IFNg)
+dat$IL6.FC <- log2(dat$IL6.V2) - log2(dat$IL6)
+dat$IL12p40.FC <- log2(dat$IL12p40.V2) - log2(dat$IL12p40)
+dat$IL12p70.FC <- log2(dat$IL12p70.V2) - log2(dat$IL12p70)
+dat$IL10.FC <- log2(dat$IL10.V2) - log2(dat$IL10)
+dat$GMCSF.FC <- log2(dat$GMCSF.V2) - log2(dat$GMCSF)
+dat$Hb.FC <- log2(dat$Hb.V2) - log2(dat$Hb)
+dat$Nitrate.570.FC <- log2(dat$Nitrate.570.V2) - log2(dat$Nitrate.570)
+phens <- c("TNFa", "IFNg", "IL6", "IL12p40", "IL12p70", "IL10", "GMCSF", "Hb", "Nitrate.570")
+phens.FC <- paste0(phens, ".FC")
+dat.I <- subset(dat, age="I")
+dat.A <- subset(dat, age="A")
+
+cat("##-----------------##\n")
+cat("INFANTS \n")
+cat("##-----------------##\n")
+for(i in 1:length(phens.FC)){
+  cat("##-----------------")
+  cat(phens.FC[i])
+  cat("-----------------## \n")
+  form <- paste0(phens.FC[i], "~ age.years*sex")
+  try(fit1 <- lm(form, data=dat.I))
+  try(print(summary(fit1)))
+  fit1 <- NULL
+}
+
+cat("##-----------------##\n")
+cat("ADULTS \n")
+cat("##-----------------##\n")
+for(i in 1:length(phens.FC)){
+  cat("##-----------------")
+  cat(phens.FC[i])
+  cat("-----------------## \n")
+  form <- paste0(phens.FC[i], "~ age.years*sex")
+  try(fit1 <- lm(form, data=dat.A))
+  try(print(summary(fit1)))
+  fit1 <- NULL
+}
+

Plot the phenotypes vs. age for adults and infants - V1.

+
dat.I <- droplevels(subset(dat, age=="I"))
+dat.A <- droplevels(subset(dat, age=="A"))
+
+p1 <- ggplot(dat.I, aes_string(x="age.years", y="TNFa", colour="sex", shape="sex"))
+p2 <- p1 + geom_point(aes(pch=sex), size=3) + geom_smooth(method=lm) + theme_classic() + geom_hline(yintercept=0, color = "darkgrey") + ylab(expression(paste("TNF-",alpha," (pg/mL)")))
+plot(p2)
+

+
p1 <- ggplot(dat.A, aes_string(x="age.years", y="pfs16", colour="sex", shape="sex"))
+p2 <- p1 + geom_point(aes(pch=sex), size=3) + geom_smooth(method=lm) + ylim(-75,200) + theme_classic() + geom_hline(yintercept=0, color = "darkgrey") + ylab("Pfs16 (gametocytes/uL)")
+plot(p2)
+

+

Plot the phenotypes vs. age for adults and infants - V2.

+
p1 <- ggplot(dat.A, aes_string(x="age.years", y="Nitrate.570.V2", colour="sex", shape="sex"))
+p2 <- p1 + geom_point(aes(pch=sex), size=3) + geom_smooth(method=lm) + theme_classic() + geom_hline(yintercept=0, color = "darkgrey") + ylab("nitric oxide (uM)")
+plot(p2)
+

+
+ + + + +
+ + + + + + + + diff --git a/analysis/MalariaInfantStudyAnalysis.pdf b/analysis/MalariaInfantStudyAnalysis.pdf new file mode 100644 index 0000000..eb585a0 Binary files /dev/null and b/analysis/MalariaInfantStudyAnalysis.pdf differ