-
Notifications
You must be signed in to change notification settings - Fork 4
Uploading Single Cell Sequencing data
-
you can locate the files from the 10x cellranger output folder under raw_feature_bc_matrix folder, there are usually 3 compressed files, barcodes.tsv.gz features.tsv.gz matrix.mtx.gz
There are 3 columns in the features.tsv.gz, please only keep the first 2 columns
zcat features.tsv.gz |cut -f1,2 > genes.tsv
Decompress all the the zip files
gunzip *.gz
renames features.tsv to genes.tsv:
mv features.tsv genes.tsv
# there should be 3 files for next step (MUST BE NAMED AS FOLLOWING): barcodes.tsv genes.tsv matrix.mtx
Compress all the files
tar -cf upload1.tar *.tsv *.mtx (windows)
COPYFILE_DISABLE=1 tar -cf upload1.tar *.tsv *.mtx (mac)
suppressMessages(library("Seurat"));suppressMessages(library(dplyr)); suppressMessages(library(biomaRt))
metadata <- function(object,group){
meta <- as.data.frame(cbind(row.names(object@meta.data), object@meta.data, Embeddings(object[["umap"]])))
colnames(meta)[1] <- "observations"
return(meta)
}
Path to your RDS file of Seurat analysis result
SeuratObj<-readRDS("SeuratObject.rds")
##create metadata sheet using function above
obs <- metadata(SeuratObj)
t1.m=data.frame(GetAssayData(object = SeuratObj))
#add replicate column based on cell_type
obs <- obs %>% group_by(cell_type) %>% mutate(replicate = row_number())
##add cluster_label to able the primary analysis function
obs$cluster_label=obs$cell_type
##if you have the hexcode for each cell type to color, name that column as "cell_type_colors"
Make sure the cluster names are not in numeric format
In order to enable the “primary analysis” function of single cell workbench, one of the column names MUST be in this list. ['cluster', 'cell_type', 'cluster_label', 'subclass_label', 'joint_cluster_round4_annot']. As long as one of the tag is found in this list, the primary analysis will use that tag to display the cluster to compare with.
Don’t put “,” or “ “ in the column name, otherwise, the multi-genes display function will have problems.
Prepare annotation
t1.m=data.frame(GetAssayData(object = SeuratObj))
mart = useMart( 'ensembl' )
datasets <- listDatasets(mart)
mart = useDataset( 'mmusculus_gene_ensembl' , mart = mart )
ensembl = getBM( attributes = c('ensembl_gene_id','external_gene_name') , mart=mart)
names(ensembl) = c("ensembl_ID","gene_symbol")
t1.m.ann=merge(ensembl,t1.m,by.y=0,by.x="gene_symbol",all.y = T)
names(ensembl) = c("ensembl_ID","gene_symbol")
ensembl.dedup=ensembl[!duplicated(ensembl$gene_symbol),]
counter=1
for (i in 1:length(t1.m.ann$ensembl_ID)){
if (is.na(t1.m.ann$ensembl_ID[i])){
t1.m.ann$ensembl_ID[i]=paste0("FAKE",counter)
counter=counter+1}
}
genes<- t1.m.ann[,2:1]
Create count matrix
counts<-t1.m.ann[,-1]
#names(count)=gsub("X","",names(count))
#if the “-“ is changed to “.” by R, using this script to replace “-“ to “.”
colnames(counts)=gsub("[.]","-",colnames(counts))
write.table(obs, "observations.tab", sep = "\t", quote = FALSE, row.names = FALSE)
write.table(genes, "genes.tab", sep = "\t", quote = FALSE, row.names = FALSE)
write.table(counts, "expression.tab", sep = "\t", quote = FALSE, row.names = FALSE, col.names = TRUE)
Zip file together for uploading
system( 'tar -czvf upload.tar.gz *.tab')
library(Rtsne);library(SingleCellExperiment);library(CellTrails);library(scuttle)
BP_Data <- readRDS("example.rds")
class(BP_Data)
#SCNorm, get the normalized count matrix
data <- scuttle::logNormCounts(BP_Data )
exp=data.frame(logcounts(data ))
ann=rowData(data)
ann$gene_symbol=row.names(ann)
exp.ann=merge(ann,exp,by.y=0,by.x=0)
names(exp.ann)[1]="gene_symbol"
write.table(exp.ann[-1],file="expression.tab",sep="\t",row.names=F,quote=F)
write.table(exp.ann[c(2,1)],file="genes.tab",sep="\t",row.names=F,quote=F)
raw.obs=data.frame(names(exp))
names(raw.obs)="observations"
obs=data.frame(BP_Data@colData)
obs1=cbind(data.frame(row.names(obs)),obs[,])
names(obs1)[1]="observations"
obs1$cell_type=obs1$CellTrails.state
reducedDimNames(BP_Data)
tsne.cor=data.frame(reducedDim(BP_Data, "CellTrails.tSNE")[,1:2])
colnames(tsne.cor)=c("tSNE_1","tSNE_2")
obs2=merge(obs1,tsne.cor,by.x=0,by.y=0)
#rearrange the columns
obs3=obs2[c(2,3,4,5,6,7)]
write.table(obs3=,file="observations.tab",sep="\t",row.names=F,quote=F)
####compress files together for uploading
system( 'tar -czvf upload.tar.gz *.tab')