Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

How can I use the "genes_vs_motifs.feather" into R SCENIC #30

Open
CNChunhua opened this issue Oct 19, 2022 · 0 comments
Open

How can I use the "genes_vs_motifs.feather" into R SCENIC #30

CNChunhua opened this issue Oct 19, 2022 · 0 comments

Comments

@CNChunhua
Copy link

Recently, I created goat feather files but I didn't know how to use the feather file as the R SCENIC input file
here is my code
image

library(Seurat)
library(tidyverse)
library(patchwork)
library(SCENIC)
library(AUCell)
library(RcisTarget)
library(GENIE3)
rm(list=ls())
getwd()


##准备细胞meta信息
##Merge
setwd("F:/SCGM_RDS")
scRNA <- readRDS("SCGM_merge_cluster-Normalizetion-PCA-UMAP-SNE.rds")
(n=length(unique([email protected]$seurat_clusters)))
celltype=data.frame(ClusterID=0:(n-1),
                    celltype='Unknow')
##Merge
celltype[celltype$ClusterID %in% c(4),2]='BCs' #1
celltype[celltype$ClusterID %in% c(18),2]='ESCs' #2
celltype[celltype$ClusterID %in% c(14),2]='MPCs' #3
celltype[celltype$ClusterID %in% c(1),2]='MuSC1' #4
celltype[celltype$ClusterID %in% c(6),2]='MuSC2' #5
celltype[celltype$ClusterID %in% c(8),2]='MuSC3' #6 
celltype[celltype$ClusterID %in% c(),2]='Mb1' #7
celltype[celltype$ClusterID %in% c(12),2]='Mb2' #8
celltype[celltype$ClusterID %in% c(),2]='Mb3' #9 
celltype[celltype$ClusterID %in% c(13),2]='Mb4' #10 
celltype[celltype$ClusterID %in% c(3,19),2]='SMCs' #11
celltype[celltype$ClusterID %in% c(0,7),2]='FAPs' #12
celltype[celltype$ClusterID %in% c(11),2]='Fbs' #13
celltype[celltype$ClusterID %in% c(16),2]='Aps' #14
celltype[celltype$ClusterID %in% c(15),2]='Tcs' #15
celltype[celltype$ClusterID %in% c(2,5,9,20),2]='ECs' #16
celltype[celltype$ClusterID %in% c(17),2]='ICs' #17
celltype[celltype$ClusterID %in% c(10),2]='Unknow' #18
[email protected]$celltype = "NA"
for(i in 1:nrow(celltype)){
  [email protected][which([email protected]$seurat_clusters == celltype$ClusterID[i]),'celltype'] <- celltype$celltype[i]}
[email protected]$celltype1<-factor([email protected]$celltype,levels = c('BCs','ESCs','MPCs','MuSC1','MuSC2','MuSC3','Mb2','Mb4','SMCs','FAPs','Fbs','Aps','Tcs','ECs','ICs','Unknow'))

###############################################
cellInfo <- data.frame([email protected])
colnames(cellInfo)[which(colnames(cellInfo)=="orig.ident")] <- "sample"
colnames(cellInfo)[which(colnames(cellInfo)=="seurat_clusters")] <- "cluster"
colnames(cellInfo)[which(colnames(cellInfo)=="celltype1")] <- "celltype"
cellInfo <- cellInfo[,c("sample","cluster","celltype")]
setwd("F:\\works\\单细胞转录调控网络分析\\单细胞转录调控网络分析")
#saveRDS(cellInfo, file="cellInfo.Rds")
cellInfo <- readRDS(file="cellInfo.Rds")
##准备表达矩阵
#为了节省计算资源,随机抽取1000个细胞的数据子集
subcell <- sample(colnames(scRNA),1000)
scRNAsub <- scRNA[,subcell]
#saveRDS(scRNAsub, "scRNAsub.rds")
scRNAsub <- readRDS(file="scRNAsub.rds")
exprMat <- as.matrix(scRNAsub@assays$RNA@counts)
##设置分析环境
mydbDIR <- "F:\\works\\意大利期间单细胞\\SCENIC"

##修改feather文件
#于python中修改
##
mydbs <- c("500up.genes_vs_motifs.rankings.feather",
           "10k.genes_vs_motifs.rankings.feather")

#mydbs <- c("hg19-500bp-upstream-7species.mc8nr.feather",
#           "hg19-tss-centered-10kb-7species.mc8nr.feather")
names(mydbs) <- c("500bp", "10kb")
#scenicOptions <- initializeScenic(org="hgnc", 
  #                       dbDir=mydbDIR , nCores=4)

scenicOptions <- initializeScenic(org="hgnc", 
                                  nCores=2,
                                  dbDir=mydbDIR, 
                                  dbs = mydbs,
                                  datasetTitle = "GOAT")
#saveRDS(scenicOptions, "int/scenicOptions.rds")


##==转录调控网络推断==##
##基因过滤
#过滤标准是基因表达量之和>细胞数*3%,且在1%的细胞中表达
genesKept <- geneFiltering(exprMat, scenicOptions, 
                           minCountsPerGene = 3 * 0.01 * ncol(exprMat), 
                           minSamples = ncol(exprMat) * 0.01)
#genesKept <- rownames(exprMat)#不过滤
exprMat_filtered <- exprMat[genesKept, ]
##计算相关性矩阵
runCorrelation(exprMat_filtered, scenicOptions)
##TF-Targets相关性回归分析
exprMat_filtered_log <- log2(exprMat_filtered+1)
runGenie3(exprMat_filtered_log, scenicOptions, nParts = 10)
#这一步消耗的计算资源非常大,个人电脑需要几个小时的运行时间
##推断共表达模块
runSCENIC_1_coexNetwork2modules(scenicOptions)
##推断转录调控网络(regulon)
runSCENIC_2_createRegulons(scenicOptions)
#以上代码可增加参数coexMethod=c("w001", "w005", "top50", "top5perTarget", "top10perTarget", "top50perTarget"))
#默认6种方法的共表达网络都计算,可以少选几种方法以减少计算量
##regulons计算AUC值并进行下游分析
#exprMat_all <- as.matrix(scRNA@assays$RNA@counts)
#exprMat_all <- log2(exprMat_all+1)
exprMat_all <- readRDS("F:\\works\\意大利期间单细胞\\SCENIC\\scenicOptions.rds")
runSCENIC_3_scoreCells(scenicOptions, exprMat=exprMat_all)
#使用shiny互动调整阈值
aucellApp <- plotTsne_AUCellApp(scenicOptions, exprMat_all)
savedSelections <- shiny::runApp(aucellApp)
#保存调整后的阈值
newThresholds <- savedSelections$thresholds
scenicOptions@fileNames$int["aucell_thresholds",1] <- "int/newThresholds.Rds"
saveRDS(newThresholds, file=getIntName(scenicOptions, "aucell_thresholds"))
saveRDS(scenicOptions, file="int/scenicOptions.Rds")
runSCENIC_4_aucell_binarize(scenicOptions, exprMat=
```exprMat_all)

I dont know if its right
Thanks for answering my question
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant