-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBIOST2069_project_cz.Rmd
144 lines (99 loc) · 3.66 KB
/
BIOST2069_project_cz.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
---
title: "Data Preprocessing"
author: "Crystal Zang"
date: '2022-10-12'
output: html_document
editor_options:
chunk_output_type: console
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
Public on Feb 27, 2013
Title: Effect of sleep restriction on the human transcriptome during extended wakefulness
Summary: In a cross balanced design human subjects were given one week of sufficient sleep and insufficient sleep (6 h per day). Following each of these conditions they were then kept awake for a day, a night and the subsequent day. During each of these periods of extended wakefulness 10 blood samples were taken, RNA was extracted from leukocytes, labelled and hybridised to human whole-genome microarrays
Design: A total of 438 samples comprising 26 human subjects, for which 20 samples across multiple time-points/sleep condition were collected.
```{r}
if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("GEOquery")
```
```{r message=FALSE, warning=FALSE}
library(GEOquery)
library(dplyr)
library(tidyr)
library(Biobase)
library(expss)
library(matrixStats)
library(limma)
library(glmnet)
library('annotate')
library('data.table')
library('foreach')
library('ggplot2')
library('limma')
library('limorhyde')
library('qs')
library(splines)
library(tidyverse)
RNGversion("3.5.1")
set.seed(194)
```
# Pre-process Moller data
```{r}
moller <- getGEO("gse39445")
moller <- moller[["GSE39445_series_matrix.txt.gz"]]
sum(is.na(exprs(moller)))
dim(moller)
# only select control group: sleep extension
table(moller$`sleepprotocol:ch1`)
moller_control = moller[,which(moller$`sleepprotocol:ch1` == "Sleep Extension") ]
dim(moller_control)
# gse: expression data, matrix
process <- function(gse, sd.cutoff=0.5){
#gene filtering
sd <- matrixStats::rowSds(gse, na.rm=T)
gse.new <- gse[sd > sd.cutoff,]
print(paste(dim(gse.new)[1], "genes left after filtering"))
return(gse.new)
}
moller_exp <- process(exprs(moller_control), 0.5)
```
# Pre-process Braun data
```{r}
# this doesn't have gene expression data, only has sample data
braun <- getGEO("gse113883")
braun <- braun$GSE113883_series_matrix.txt.gz
dim(exprs(brawn))
braun2 = read.table(gzfile("GSE113883_TPM_processed_data.txt.gz"), header = T, sep = '\t')
braun2 <- braun2 %>%
remove_rownames %>%
column_to_rownames(var="GENE")
braun2.rowsum <- rowSums(braun2)
sum(braun2.rowsum==0)
nrow(braun2)
braun2 <- as.matrix(braun2)
braun_exp <- process(braun2, 0.5)
# select control group
braun$`serum_melatonin:ch1`
braun$`local_time:ch1` #local time
table(braun$`hour:ch1`) #time point 1 to 29 with increment of 2.
```
```{r}
a.meta = moller_exp
a.meta$time_sin = sin(moller_control$LocalTime*pi/12)
a.meta$time_cos = cos(a.meta$LocalTime*pi/12)
a.expr <- all.expr[, which(colnames(all.expr) %in% a.meta$samp)]
a.expr <- recalibrateExprs(a.expr, a.meta$sID)
rhyLimma = foreach(condNow = unique(a.meta$cond), .combine = rbind) %do% {
design = model.matrix(~ time_cos + time_sin, data = a.meta[which(a.meta$cond==condNow),])
fit = lmFit(a.expr[, which(a.meta$cond==condNow)], design)
fit = eBayes(fit) #, trend = TRUE)
rhyNow = data.table(topTable(fit, coef = 2:3, number = Inf), keep.rownames = TRUE)
setnames(rhyNow, 'rn', 'gene_id')
rhyNow[, cond := condNow]}
rhyLimma$name <- paste0(rhyLimma$gene_id, rhyLimma$cond)
rhyLimmaSummary = rhyLimma[, .(P.Value = min(P.Value)), by = gene_id]
rhyLimmaSummary[, adj.P.Val := p.adjust(P.Value, method = 'BH')]
setorderv(rhyLimmaSummary, 'adj.P.Val')
```