Uploading Single Cell Sequencing data

A. Prepare the files using the raw matrix from 10x platform

you can locate the files from the 10x cellranger output folder under raw_feature_bc_matrix folder, there are usually 3 compressed files, barcodes.tsv.gz features.tsv.gz matrix.mtx.gz

There are 3 columns in the features.tsv.gz, please only keep the first 2 columns

zcat features.tsv.gz |cut -f1,2 > genes.tsv

Decompress all the the zip files

gunzip *.gz

renames features.tsv to genes.tsv:

mv features.tsv genes.tsv

# there should be 3 files for next step (MUST BE NAMED AS FOLLOWING): barcodes.tsv genes.tsv matrix.mtx

Compress all the files

tar -cf upload1.tar *.tsv *.mtx (windows)

COPYFILE_DISABLE=1 tar -cf upload1.tar *.tsv *.mtx (mac)

B. Prepare the files based on seurat processed output (all the command below are R script)

suppressMessages(library("Seurat"));suppressMessages(library(dplyr)); suppressMessages(library(biomaRt))

metadata <- function(object,group){
  meta <- as.data.frame(cbind(row.names(object@meta.data), object@meta.data, Embeddings(object[["umap"]])))
  colnames(meta)[1] <- "observations"
  return(meta)
}

Path to your RDS file of Seurat analysis result

SeuratObj<-readRDS("SeuratObject.rds")



##create metadata sheet using function above

obs <- metadata(SeuratObj)

t1.m=data.frame(GetAssayData(object = SeuratObj))

#add replicate column based on cell_type
obs <- obs %>% group_by(cell_type) %>% mutate(replicate = row_number())

##add cluster_label to able the primary analysis function
obs$cluster_label=obs$cell_type

##if you have the hexcode for each cell type to color, name that column as "cell_type_colors"

Make sure the cluster names are not in numeric format

In order to enable the “primary analysis” function of single cell workbench, one of the column names MUST be in this list. ['cluster', 'cell_type', 'cluster_label', 'subclass_label', 'joint_cluster_round4_annot']. As long as one of the tag is found in this list, the primary analysis will use that tag to display the cluster to compare with.

Don’t put “,” or “ “ in the column name, otherwise, the multi-genes display function will have problems.

Prepare annotation

t1.m=data.frame(GetAssayData(object = SeuratObj))

mart = useMart( 'ensembl' )
datasets <- listDatasets(mart)
mart = useDataset( 'mmusculus_gene_ensembl' , mart = mart )
ensembl = getBM( attributes = c('ensembl_gene_id','external_gene_name') , mart=mart)
names(ensembl) = c("ensembl_ID","gene_symbol")


t1.m.ann=merge(ensembl,t1.m,by.y=0,by.x="gene_symbol",all.y = T)

names(ensembl) = c("ensembl_ID","gene_symbol")
ensembl.dedup=ensembl[!duplicated(ensembl$gene_symbol),]


counter=1
for (i in 1:length(t1.m.ann$ensembl_ID)){
  if (is.na(t1.m.ann$ensembl_ID[i])){
    t1.m.ann$ensembl_ID[i]=paste0("FAKE",counter)
    counter=counter+1}
}
genes<- t1.m.ann[,2:1]

Create count matrix

counts<-t1.m.ann[,-1]

#names(count)=gsub("X","",names(count))

#if the “-“ is changed to “.” by R, using this script to replace “-“ to “.”
colnames(counts)=gsub("[.]","-",colnames(counts))





write.table(obs, "observations.tab", sep = "\t", quote =  FALSE, row.names = FALSE)
write.table(genes, "genes.tab", sep = "\t", quote =  FALSE, row.names = FALSE)
write.table(counts, "expression.tab", sep = "\t", quote =  FALSE, row.names = FALSE, col.names = TRUE)

Zip file together for uploading

system( 'tar -czvf upload.tar.gz *.tab')

C. Prepare the files from SingleCellExperiment object

library(Rtsne);library(SingleCellExperiment);library(CellTrails);library(scuttle)
BP_Data <- readRDS("example.rds")
class(BP_Data)

#SCNorm, get the normalized count matrix

data <- scuttle::logNormCounts(BP_Data )

exp=data.frame(logcounts(data ))

ann=rowData(data)
ann$gene_symbol=row.names(ann)

exp.ann=merge(ann,exp,by.y=0,by.x=0)

names(exp.ann)[1]="gene_symbol"
write.table(exp.ann[-1],file="expression.tab",sep="\t",row.names=F,quote=F)

write.table(exp.ann[c(2,1)],file="genes.tab",sep="\t",row.names=F,quote=F)

raw.obs=data.frame(names(exp))
names(raw.obs)="observations"

obs=data.frame(BP_Data@colData)

obs1=cbind(data.frame(row.names(obs)),obs[,])
names(obs1)[1]="observations"
obs1$cell_type=obs1$CellTrails.state
reducedDimNames(BP_Data)

tsne.cor=data.frame(reducedDim(BP_Data, "CellTrails.tSNE")[,1:2])

colnames(tsne.cor)=c("tSNE_1","tSNE_2")

obs2=merge(obs1,tsne.cor,by.x=0,by.y=0)

#rearrange the columns
obs3=obs2[c(2,3,4,5,6,7)]
write.table(obs3=,file="observations.tab",sep="\t",row.names=F,quote=F)

####compress files together for uploading
 
 system( 'tar -czvf upload.tar.gz *.tab')

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uploading Single Cell Sequencing data

A. Prepare the files using the raw matrix from 10x platform

B. Prepare the files based on seurat processed output (all the command below are R script)

C. Prepare the files from SingleCellExperiment object

Getting started

Analysis tools

Data upload

Data curation

Data download

Frequently Asked Questions

Clone this wiki locally