forked from xihaoli/STAARpipeline-Tutorial
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgds2agds.R
53 lines (40 loc) · 1.77 KB
/
gds2agds.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
rm(list=ls())
gc()
##########################################################################
# Input
##########################################################################
### gds file
dir_geno <- "/path_to_the_GDS_file/"
gds_file_name_1 <- "freeze.5.chr"
gds_file_name_2 <- ".pass_and_fail.gtonly.minDP0.gds"
### annotation file (output of Annotate.R)
dir_anno <- "/path_to_the_annotation_file/"
anno_file_name_1 <- "Anno_chr"
anno_file_name_2 <- "_STAARpipeline.csv"
chr <- as.numeric(commandArgs(TRUE)[1])
###########################################################################
# Main Function
###########################################################################
### load required package
library(gdsfmt)
library(SeqArray)
library(SeqVarTools)
library(readr)
### read annotation data
FunctionalAnnotation <- read_csv(paste0(dir_anno,"chr",chr,"/",anno_file_name_1,chr,anno_file_name_2),
col_types=list(col_character(),col_double(),col_double(),col_double(),col_double(),
col_double(),col_double(),col_double(),col_double(),col_double(),
col_character(),col_character(),col_character(),col_double(),col_character(),
col_character(),col_character(),col_character(),col_character(),col_double(),
col_double(),col_character()))
dim(FunctionalAnnotation)
## rename colnames
colnames(FunctionalAnnotation)[2] <- "apc_conservation"
colnames(FunctionalAnnotation)[7] <- "apc_local_nucleotide_diversity"
colnames(FunctionalAnnotation)[9] <- "apc_protein_function"
## open GDS
gds.path <- paste0(dir_geno,gds_file_name_1,chr,gds_file_name_2)
genofile <- seqOpen(gds.path, readonly = FALSE)
Anno.folder <- index.gdsn(genofile, "annotation/info")
add.gdsn(Anno.folder, "FunctionalAnnotation", val=FunctionalAnnotation, compress="LZMA_ra", closezip=TRUE)
seqClose(genofile)