diff --git a/404.html b/404.html index 04589ad..7dcc1e6 100644 --- a/404.html +++ b/404.html @@ -1,66 +1,27 @@ - - -
- + + + + -BRENDA.Rmd
We use BRENDA tissue ontology to indicate the tissue or cell-line of a signature, and GEO platform accession id for platform records.
-The record was last updated 09/2020, so some newest ontology and platform may not be included.
-If you are not sure which BRENDA tissue ontology or GEO platform accession id to use, you can search for them as shown below.
We use BRENDA tissue ontology to indicate the tissue or cell-line of
+a signature, and GEO platform accession id for platform records.
+The record was last updated 09/2020, so some newest
+ontology and platform may not be included.
+If you are not sure which BRENDA tissue ontology or GEO platform
+accession id to use, you can search for them as shown below.
-devtools::load_all(".")
## ℹ Loading OmicSignature
+devtools::load_all(".")
## ℹ Loading OmicSignature
-BRENDASearch("MDA")
## ID Name
-## BTO:0000815 "BTO:0000815" "MDA-MB-231 cell"
-## BTO:0001567 "BTO:0001567" "MDA-MB-435 cell"
-## BTO:0001568 "BTO:0001568" "MDA-MB-436 cell"
-## BTO:0001569 "BTO:0001569" "MDA-MB-361 cell"
-## BTO:0001570 "BTO:0001570" "MDA-MB-468 cell"
-## BTO:0001908 "BTO:0001908" "MDA-MB-453 cell"
-## BTO:0002716 "BTO:0002716" "MDA-1186 cell"
-## BTO:0002717 "BTO:0002717" "MDA-886 cell"
-## BTO:0003052 "BTO:0003052" "MDA-686Ln cell"
-## BTO:0003302 "BTO:0003302" "MDA-Panc-3 cell"
-## BTO:0003303 "BTO:0003303" "MDA-Panc-28 cell"
-## BTO:0003304 "BTO:0003304" "MDA-1483 cell"
-## BTO:0003471 "BTO:0003471" "MDA-MB-175-VII cell"
-## BTO:0003941 "BTO:0003941" "MDA-MB-435S cell"
-## BTO:0003942 "BTO:0003942" "MDAH-2774 cell"
-## BTO:0004164 "BTO:0004164" "MDA-MB-435HGF cell"
-## BTO:0004474 "BTO:0004474" "MDA-MB-231-BAG cell"
-## BTO:0004955 "BTO:0004955" "MDA-B02 cell"
-## BTO:0005050 "BTO:0005050" "MDA-PCa-2b cell"
-## BTO:0005172 "BTO:0005172" "MDA-MB-231-Luc2 cell"
-## BTO:0005866 "BTO:0005866" "MDA435/LCC6 cell"
-## BTO:0005867 "BTO:0005867" "MDA435/LCC6MDR1 cell"
-## BTO:0006039 "BTO:0006039" "MDA-MB-415 cell"
-## BTO:0006218 "BTO:0006218" "MDA-MB-157 cell"
+BRENDASearch("MDA")
## ID Name
+## BTO:0000815 "BTO:0000815" "MDA-MB-231 cell"
+## BTO:0001567 "BTO:0001567" "MDA-MB-435 cell"
+## BTO:0001568 "BTO:0001568" "MDA-MB-436 cell"
+## BTO:0001569 "BTO:0001569" "MDA-MB-361 cell"
+## BTO:0001570 "BTO:0001570" "MDA-MB-468 cell"
+## BTO:0001908 "BTO:0001908" "MDA-MB-453 cell"
+## BTO:0002716 "BTO:0002716" "MDA-1186 cell"
+## BTO:0002717 "BTO:0002717" "MDA-886 cell"
+## BTO:0003052 "BTO:0003052" "MDA-686Ln cell"
+## BTO:0003302 "BTO:0003302" "MDA-Panc-3 cell"
+## BTO:0003303 "BTO:0003303" "MDA-Panc-28 cell"
+## BTO:0003304 "BTO:0003304" "MDA-1483 cell"
+## BTO:0003471 "BTO:0003471" "MDA-MB-175-VII cell"
+## BTO:0003941 "BTO:0003941" "MDA-MB-435S cell"
+## BTO:0003942 "BTO:0003942" "MDAH-2774 cell"
+## BTO:0004164 "BTO:0004164" "MDA-MB-435HGF cell"
+## BTO:0004474 "BTO:0004474" "MDA-MB-231-BAG cell"
+## BTO:0004955 "BTO:0004955" "MDA-B02 cell"
+## BTO:0005050 "BTO:0005050" "MDA-PCa-2b cell"
+## BTO:0005172 "BTO:0005172" "MDA-MB-231-Luc2 cell"
+## BTO:0005866 "BTO:0005866" "MDA435/LCC6 cell"
+## BTO:0005867 "BTO:0005867" "MDA435/LCC6MDR1 cell"
+## BTO:0006039 "BTO:0006039" "MDA-MB-415 cell"
+## BTO:0006218 "BTO:0006218" "MDA-MB-157 cell"
-GEOPlatformSearch("Illumina HiSeq 4000", species = "Homo Sapiens")
## Accession
-## 633 GPL28185
-## 881 GPL27803
-## 1407 GPL26865
-## 1569 GPL26639
-## 2090 GPL25904
-## 2248 GPL25719
-## 2422 GPL25476
-## 2441 GPL25431
-## 2894 GPL24850
-## 4101 GPL23362
-## 5175 GPL22132
-## 6856 GPL20301
-## Title
-## 633 Illumina HiSeq 4000 (Homo sapiens; Plasmodium berghei)
-## 881 Illumina HiSeq 4000 (Homo sapiens; Pan troglodytes)
-## 1407 Illumina HiSeq 4000 (Canis lupus familiaris; Homo sapiens; Mus musculus)
-## 1569 Illumina HiSeq 4000 ([Haemophilus] ducreyi; Homo sapiens)
-## 2090 Illumina HiSeq 4000 (Homo sapiens; Neisseria gonorrhoeae)
-## 2248 Illumina HiSeq 4000 (Homo sapiens; Human gammaherpesvirus 8)
-## 2422 Illumina HiSeq 4000 (Drosophila melanogaster; Homo sapiens)
-## 2441 Illumina HiSeq 4000 (Homo sapiens; Mus musculus)
-## 2894 Illumina HiSeq 4000 (Gallus gallus; Homo sapiens)
-## 4101 Illumina HiSeq 4000 (Homo sapiens; Human gammaherpesvirus 4)
-## 5175 Illumina HiSeq 4000 (Homo sapiens; Human herpesvirus 4)
-## 6856 Illumina HiSeq 4000 (Homo sapiens)
-## Technology
-## 633 high-throughput sequencing
-## 881 high-throughput sequencing
-## 1407 high-throughput sequencing
-## 1569 high-throughput sequencing
-## 2090 high-throughput sequencing
-## 2248 high-throughput sequencing
-## 2422 high-throughput sequencing
-## 2441 high-throughput sequencing
-## 2894 high-throughput sequencing
-## 4101 high-throughput sequencing
-## 5175 high-throughput sequencing
-## 6856 high-throughput sequencing
-## Taxonomy Data.Rows Samples.Count
-## 633 Homo sapiens;Plasmodium berghei 0 31
-## 881 Homo sapiens;Pan troglodytes 0 9
-## 1407 Canis lupus familiaris;Homo sapiens;Mus musculus 0 5
-## 1569 Homo sapiens;[Haemophilus] ducreyi 0 4
-## 2090 Homo sapiens;Neisseria gonorrhoeae 0 109
-## 2248 Homo sapiens;Human gammaherpesvirus 8 0 27
-## 2422 Drosophila melanogaster;Homo sapiens 0 8
-## 2441 Homo sapiens;Mus musculus 0 57
-## 2894 Gallus gallus;Homo sapiens 0 3
-## 4101 Homo sapiens;Human gammaherpesvirus 4 0 49
-## 5175 Homo sapiens;Human gammaherpesvirus 4 0 51
-## 6856 Homo sapiens 0 56722
-## Series.Count Contact Release.Date
-## 633 1 GEO Feb 21, 2020
-## 881 1 GEO Nov 25, 2019
-## 1407 3 GEO Jun 29, 2019
-## 1569 1 GEO May 08, 2019
-## 2090 1 GEO Dec 06, 2018
-## 2248 5 GEO Oct 24, 2018
-## 2422 2 GEO Aug 20, 2018
-## 2441 7 GEO Aug 06, 2018
-## 2894 2 GEO Apr 05, 2018
-## 4101 9 GEO Apr 24, 2017
-## 5175 4 GEO Jul 10, 2016
-## 6856 2501 GEO Jun 09, 2015
+GEOPlatformSearch("Illumina HiSeq 4000", species = "Homo Sapiens")
## Accession
+## 633 GPL28185
+## 881 GPL27803
+## 1407 GPL26865
+## 1569 GPL26639
+## 2090 GPL25904
+## 2248 GPL25719
+## 2422 GPL25476
+## 2441 GPL25431
+## 2894 GPL24850
+## 4101 GPL23362
+## 5175 GPL22132
+## 6856 GPL20301
+## Title
+## 633 Illumina HiSeq 4000 (Homo sapiens; Plasmodium berghei)
+## 881 Illumina HiSeq 4000 (Homo sapiens; Pan troglodytes)
+## 1407 Illumina HiSeq 4000 (Canis lupus familiaris; Homo sapiens; Mus musculus)
+## 1569 Illumina HiSeq 4000 ([Haemophilus] ducreyi; Homo sapiens)
+## 2090 Illumina HiSeq 4000 (Homo sapiens; Neisseria gonorrhoeae)
+## 2248 Illumina HiSeq 4000 (Homo sapiens; Human gammaherpesvirus 8)
+## 2422 Illumina HiSeq 4000 (Drosophila melanogaster; Homo sapiens)
+## 2441 Illumina HiSeq 4000 (Homo sapiens; Mus musculus)
+## 2894 Illumina HiSeq 4000 (Gallus gallus; Homo sapiens)
+## 4101 Illumina HiSeq 4000 (Homo sapiens; Human gammaherpesvirus 4)
+## 5175 Illumina HiSeq 4000 (Homo sapiens; Human herpesvirus 4)
+## 6856 Illumina HiSeq 4000 (Homo sapiens)
+## Technology
+## 633 high-throughput sequencing
+## 881 high-throughput sequencing
+## 1407 high-throughput sequencing
+## 1569 high-throughput sequencing
+## 2090 high-throughput sequencing
+## 2248 high-throughput sequencing
+## 2422 high-throughput sequencing
+## 2441 high-throughput sequencing
+## 2894 high-throughput sequencing
+## 4101 high-throughput sequencing
+## 5175 high-throughput sequencing
+## 6856 high-throughput sequencing
+## Taxonomy Data.Rows Samples.Count
+## 633 Homo sapiens;Plasmodium berghei 0 31
+## 881 Homo sapiens;Pan troglodytes 0 9
+## 1407 Canis lupus familiaris;Homo sapiens;Mus musculus 0 5
+## 1569 Homo sapiens;[Haemophilus] ducreyi 0 4
+## 2090 Homo sapiens;Neisseria gonorrhoeae 0 109
+## 2248 Homo sapiens;Human gammaherpesvirus 8 0 27
+## 2422 Drosophila melanogaster;Homo sapiens 0 8
+## 2441 Homo sapiens;Mus musculus 0 57
+## 2894 Gallus gallus;Homo sapiens 0 3
+## 4101 Homo sapiens;Human gammaherpesvirus 4 0 49
+## 5175 Homo sapiens;Human gammaherpesvirus 4 0 51
+## 6856 Homo sapiens 0 56722
+## Series.Count Contact Release.Date
+## 633 1 GEO Feb 21, 2020
+## 881 1 GEO Nov 25, 2019
+## 1407 3 GEO Jun 29, 2019
+## 1569 1 GEO May 08, 2019
+## 2090 1 GEO Dec 06, 2018
+## 2248 5 GEO Oct 24, 2018
+## 2422 2 GEO Aug 20, 2018
+## 2441 7 GEO Aug 06, 2018
+## 2894 2 GEO Apr 05, 2018
+## 4101 9 GEO Apr 24, 2017
+## 5175 4 GEO Jul 10, 2016
+## 6856 2501 GEO Jun 09, 2015
Show only accession id:
-GEOPlatformSearch("Illumina HiSeq 4000", species = "Homo Sapiens", accession_only = TRUE)
## [1] "GPL28185" "GPL27803" "GPL26865" "GPL26639" "GPL25904" "GPL25719"
-## [7] "GPL25476" "GPL25431" "GPL24850" "GPL23362" "GPL22132" "GPL20301"
+GEOPlatformSearch("Illumina HiSeq 4000", species = "Homo Sapiens", accession_only = TRUE)
## [1] "GPL28185" "GPL27803" "GPL26865" "GPL26639" "GPL25904" "GPL25719"
+## [7] "GPL25476" "GPL25431" "GPL24850" "GPL23362" "GPL22132" "GPL20301"
Search for multiple terms:
-GEOPlatformSearch(c("Drosophila melanogaster", "Illumina HiSeq 4000"), species = "Homo Sapiens", contain_all = TRUE)
## Accession Title
-## 2422 GPL25476 Illumina HiSeq 4000 (Drosophila melanogaster; Homo sapiens)
-## Technology Taxonomy Data.Rows
-## 2422 high-throughput sequencing Drosophila melanogaster;Homo sapiens 0
-## Samples.Count Series.Count Contact Release.Date
-## 2422 8 2 GEO Aug 20, 2018
-Set contain_all = T
to show results include all search terms. Set contain_all = FALSE
to show results include any of the search terms.
GEOPlatformSearch(c("Drosophila melanogaster", "Illumina HiSeq 4000"), species = "Homo Sapiens", contain_all = TRUE)
## Accession Title
+## 2422 GPL25476 Illumina HiSeq 4000 (Drosophila melanogaster; Homo sapiens)
+## Technology Taxonomy Data.Rows
+## 2422 high-throughput sequencing Drosophila melanogaster;Homo sapiens 0
+## Samples.Count Series.Count Contact Release.Date
+## 2422 8 2 GEO Aug 20, 2018
+Set contain_all = T
to show results include all search
+terms. Set contain_all = FALSE
to show results include any
+of the search terms.
Developed by Mengze Li.
+ +Developed by Mengze Li.
CreateOmS.Rmd
devtools::load_all(".")
+library(dplyr)
An OmicSignature
object contains three parts:
An OmicSignature
object contains three parts:
metadata, a list containing metadata fields.
-required fields: “signature_name”, “organism”, “platform”, “direction_type”, “phenotype”.
metadata, a list containing metadata
+fields.
+required fields: “signature_name”,
+“organism”, “platform”,
+“direction_type”, “phenotype”.
signature, a dataframe.
-required columns: “signature_symbol”, “signature_direction”
+required columns: “signature_symbol”,
+“signature_direction”
optional column: “signature_score”
-“signature_symbol” should be a subset of “symbol” in difexp (if present).
difexp (optional), a dataframe of differential expression analysis results.
-required columns: “id”, “symbol”, “score”, “p_value”.
difexp (optional), a dataframe of differential
+expression analysis results.
+required columns: “id”, “symbol”,
+“score”, “p_value”.
Once you have the componants above, you can create your own object:
-OmicObj <- OmicSignature$new(
- metadata = metadata,
- signature = signatures,
- difexp = difexp
-)
-Or, you can read an OmicSignature
saved in .json format using readJson()
. See “here”.
Create the object:
OmicObj <- OmicSignature$new(
+ metadata = metadata,
+ signature = signatures,
+ difexp = difexp
+)
+You can also read an OmicSignature
saved in .json format
+using readJson()
. See “here”
+for more details.
The example provided below is from an experiment for Myc gene reduce in mice. Signatures was extracted by comparing the liver of treatment and control when mice is 24-month old. This is a bi-directional signature example, which contains up and down regulated features (genes).
-To be saved into a OmicSignature object, the required metadata fields are:
-“signature_name”, “organism”, “direction_type”.
-Fields not required but highly recommended if available: “platform”, “sample_type”, “phenotype”.
-Additional optional fields can be added, e.g., score_cutoff, adj_p_cutoff, logfc_cutoff, or additional experiment descriptors, which will make the information more complete.
One option is to create metadata list by hand:
-metadata <- list(
- "signature_name" = Myc_reduce_mice_liver_24m,
- "organism" = "Mus Musculus",
- "sample_type" = "liver",
- "phenotype" = "Myc_reduce",
- "direction_type" = "bi-directional",
- "platform" = "GPL6246",
- "adj_p_cutoff" = 0.05,
- "score_cutoff" = 7,
- "keywords" = c("Myc", "KO", "longevity"),
- "PMID" = 25619689,
- "year" = 2015
-)
-Or use the built-in function createMetadata()
(recommended). The function reminds you what attributes to include. You can also provide your own customized attributes.
-Click here to see a full list of built-in attributes.
OmicSignature
Object Step-by-Step
+The example provided below is from an experiment for Myc gene reduce
+in mice. Signatures were extracted by comparing the liver of treatment
+and control when mice is 24-month old. This is a bi-directional
+signature example, which contains up and down regulated features
+(genes).
Required metadata fields:
+“signature_name”, “organism”,
+“direction_type”.
+Fields not required, but highly recommended when applicable:
+“phenotype”, “platform”,
+“sample_type”, “covariates”,
+“score_cutoff”, “adj_p_cutoff”,
+“logfc_cutoff”.
Option 1: Create metadata by hand (not recommended because typos can
+occur)
metadata <- list(
+ "signature_name" = Myc_reduce_mice_liver_24m,
+ "organism" = "Mus Musculus",
+ "sample_type" = "liver",
+ "phenotype" = "Myc_reduce",
+ "direction_type" = "bi-directional",
+ "platform" = "GPL6246",
+ "adj_p_cutoff" = 0.05,
+ "score_cutoff" = 7,
+ "keywords" = c("Myc", "KO", "longevity"),
+ "PMID" = 25619689,
+ "year" = 2015
+)
+Option 2: Use function createMetadata()
+(recommended).
+This function helps remind you of the built-in attributes. The full list
+of current built-in attributes is shown here.
+You can also provide your own customized attributes.
-metadata <- createMetadata(
- # examples of build-in attributes:
- signature_name = "Myc_reduce_mice_liver_24m", # required
- organism = "Mus Musculus", # required
- phenotype = "Myc_reduce", # optional but highly recommended
- direction_type = "bi-directional", # required
- platform = "GPL6246", # optional but highly recommended; must be a GEO platform ID
- sample_type = "liver", # optional but highly recommended; must be BRENDA ontology
- adj_p_cutoff = 0.05,
- score_cutoff = 7,
- keywords = c("Myc", "KO", "longevity"),
- PMID = 25619689,
- year = 2015,
-
- # example of cursomized attributes:
- animal_strain = "C57BL/6"
-)
Note: If “sample_type” is NOT a BRENDA ontology term or “platform” is NOT a valid GEO platform accession ID, you will get warnings. See how to search for the correct term to use in “BRENDA ontology & GEO platform ID” section.
-direction_type
is one of:
metadata <- createMetadata(
+ # required attributes:
+ signature_name = "Myc_reduce_mice_liver_24m",
+ organism = "Mus Musculus",
+ direction_type = "bi-directional",
+
+ # optional and recommended:
+ phenotype = "Myc_reduce",
+ covariates = "none",
+ platform = "GPL6246", # must be a GEO platform ID
+ sample_type = "liver", # must be BRENDA ontology
+
+ # optional cut-off attributes.
+ # specifying them can facilitate the extraction of signatures.
+ # feel free to delete any that are not relevant.
+ logfc_cutoff = NULL,
+ p_value_cutoff = NULL,
+ adj_p_cutoff = 0.05,
+ score_cutoff = 7,
+
+ # other optional built-in attributes:
+ keywords = c("Myc", "KO", "longevity"),
+ cutoff_description = NULL,
+ author = NULL,
+ PMID = 25619689,
+ year = 2015,
+
+ # example of a customized attribute:
+ animal_strain = "C57BL/6"
+)
If “sample_type” is NOT a BRENDA ontology term, or “platform” is NOT
+a valid GEO platform accession ID, you will get warnings. See how to
+search for the correct term in “BRENDA
+ontology & GEO platform ID”.
direction_type
must be one of the following:
“uni-directional”. You only have a list of significant feature names but don’t know if they are up or down regulated in the treatment group, or directional information is not applicable.
“bi-directional”. In most cases significant features can be grouped into “up” and “down” regulated features. For example, when comparing treatment vs. control groups, some features will be higher (“up”, or “+”) and some will be lower (“down” or “-”) in treatment.
“multi-directional”. Used with multi-valued categorical phenotypes (e.g., “low” vs. “medium” vs. “high”), usually analyzed by ANOVA. In this case, the “direction” column in signature table should be the phenotype’s value name (e.g., “low”).
“uni-directional”. You only have a list of significant feature
+names, and don’t know if they are up or down regulated in the treatment
+group, or directional information is not applicable. An example would be
+“genes mutated in a disease.”
“bi-directional”. Significant features can be grouped into “up”
+and “down” categories. For example, when comparing treatment
+vs. control groups, some features will be higher (“up”, or “+”)
+and some will be lower (“down” or “-”) in the treatment group.
+Similarly, when the phenotype is a continuous trait, such as age, some
+features will increase (“up”, or “+”) with age, while others will
+decrease (“down”, or “-”).
“multi-directional”. Used with multi-valued categorical
+phenotypes (e.g., “low” vs. “medium” vs. “high”),
+usually analyzed by ANOVA. In this case, the “direction” column in
+signature table should be the phenotype’s category (e.g., “low”).
A differential expression analysis matrix is optional but highly recommended if you have it.
-To be saved into a OmicSignature object, the matrix’ required columns are:
-“id”, “symbol”, “score”, “p_value”.
-“id” is used as an unique identifier in case there are duplicated gene symbols. Frequently used id’s include probe ID, ENSEMBL ID, or unique numbers.
Here we show an example of how to derive the difexp
object from the results of a differential expression analysis based on the limma
package. Output columns include logFC, AveExpr, t, P.Value, adj.P.Val, B score, Probe.ID, gene_symbol, and gene_name. In this example, we use t-test statistic (column t) as the score for the symbols.
A differential expression dataframe is optional but
+highly recommended if available. It facilitates
+downstream signature extraction.
difexp
is a dataframe with required
+columns:
+“id”, “symbol”,
+“score”, “p_value”.
+“id” is used as an unique identifier in case there are duplicated
+symbols. Frequently used examples including: probe ID, ENSEMBL ID,
+UniProt ID, unique numbers.
+“score” is usually t-test statistics or Z-score.
Some frequently used optional column names:
+“logfc”, “est”, “aveexpr”, “se”, “robust_se”, “HR”, “adj_p”,
+“gene_name”, “gene_annotation”.
Here we use an example out put from the differential expression
+analysis using the limma
package.
-difexp <- read.table(file.path(system.file("extdata", package = "OmicSignature"), "difmatrix_Myc_mice_liver_24m_raw.txt"),
- header = TRUE, sep = "\t", stringsAsFactors = FALSE
-)
-head(difexp)
-#> logFC AveExpr t P.Value adj.P.Val b Probe.ID
-#> 1 -0.09301000 6.676552 -1.0304955 0.33776819 0.7015182 -6.370549 10344614
-#> 2 -0.09837667 2.998012 -1.1161772 0.30194862 0.6735042 -6.281333 10344616
-#> 3 -0.21524000 5.055207 -2.5893972 0.03664519 0.2872695 -4.287330 10344620
-#> 4 -0.12186667 12.056667 -1.5881487 0.15719140 0.5256233 -5.713531 10344624
-#> 5 0.01440000 10.087133 0.2221561 0.83066749 0.9532330 -6.902609 10344633
-#> 6 -0.03361667 9.947465 -0.4638379 0.65713620 0.8842391 -6.810300 10344637
-#> gene_symbol gene_name
-#> 1 Gm16088 predicted gene 16088
-#> 2 Gm26206 predicted gene, 26206
-#> 3 Gm10568 predicted gene 10568
-#> 4 Lypla1 lysophospholipase 1
-#> 5 Tcea1 transcription elongation factor A (SII) 1
-#> 6 Atp6v1h ATPase, H+ transporting, lysosomal V1 subunit H
You can manually modify the column names to match the requirements. Alternatively, you can use the built-in function replaceDifexpCol()
, designed to replace some frequently-used alternative column names.
difexp <- read.table(file.path(system.file("extdata", package = "OmicSignature"), "difmatrix_Myc_mice_liver_24m_raw.txt"),
+ header = TRUE, sep = "\t", stringsAsFactors = FALSE
+)
+head(difexp)
+#> logFC AveExpr t P.Value adj.P.Val b Probe.ID
+#> 1 -0.09301000 6.676552 -1.0304955 0.33776819 0.7015182 -6.370549 10344614
+#> 2 -0.09837667 2.998012 -1.1161772 0.30194862 0.6735042 -6.281333 10344616
+#> 3 -0.21524000 5.055207 -2.5893972 0.03664519 0.2872695 -4.287330 10344620
+#> 4 -0.12186667 12.056667 -1.5881487 0.15719140 0.5256233 -5.713531 10344624
+#> 5 0.01440000 10.087133 0.2221561 0.83066749 0.9532330 -6.902609 10344633
+#> 6 -0.03361667 9.947465 -0.4638379 0.65713620 0.8842391 -6.810300 10344637
+#> gene_symbol gene_name
+#> 1 Gm16088 predicted gene 16088
+#> 2 Gm26206 predicted gene, 26206
+#> 3 Gm10568 predicted gene 10568
+#> 4 Lypla1 lysophospholipase 1
+#> 5 Tcea1 transcription elongation factor A (SII) 1
+#> 6 Atp6v1h ATPase, H+ transporting, lysosomal V1 subunit H
We can manually modify the column names to match the requirements.
+Alternatively, we can use the built-in function
+replaceDifexpCol()
designed to replace some frequently used
+alternative column names.
-colnames(difexp) <- replaceDifexpCol(colnames(difexp))
-head(difexp)
-#> logfc aveexpr score p_value adj_p b id
-#> 1 -0.09301000 6.676552 -1.0304955 0.33776819 0.7015182 -6.370549 10344614
-#> 2 -0.09837667 2.998012 -1.1161772 0.30194862 0.6735042 -6.281333 10344616
-#> 3 -0.21524000 5.055207 -2.5893972 0.03664519 0.2872695 -4.287330 10344620
-#> 4 -0.12186667 12.056667 -1.5881487 0.15719140 0.5256233 -5.713531 10344624
-#> 5 0.01440000 10.087133 0.2221561 0.83066749 0.9532330 -6.902609 10344633
-#> 6 -0.03361667 9.947465 -0.4638379 0.65713620 0.8842391 -6.810300 10344637
-#> symbol gene_name
-#> 1 Gm16088 predicted gene 16088
-#> 2 Gm26206 predicted gene, 26206
-#> 3 Gm10568 predicted gene 10568
-#> 4 Lypla1 lysophospholipase 1
-#> 5 Tcea1 transcription elongation factor A (SII) 1
-#> 6 Atp6v1h ATPase, H+ transporting, lysosomal V1 subunit H
colnames(difexp) <- replaceDifexpCol(colnames(difexp))
+head(difexp)
+#> logfc aveexpr score p_value adj_p b id
+#> 1 -0.09301000 6.676552 -1.0304955 0.33776819 0.7015182 -6.370549 10344614
+#> 2 -0.09837667 2.998012 -1.1161772 0.30194862 0.6735042 -6.281333 10344616
+#> 3 -0.21524000 5.055207 -2.5893972 0.03664519 0.2872695 -4.287330 10344620
+#> 4 -0.12186667 12.056667 -1.5881487 0.15719140 0.5256233 -5.713531 10344624
+#> 5 0.01440000 10.087133 0.2221561 0.83066749 0.9532330 -6.902609 10344633
+#> 6 -0.03361667 9.947465 -0.4638379 0.65713620 0.8842391 -6.810300 10344637
+#> symbol gene_name
+#> 1 Gm16088 predicted gene 16088
+#> 2 Gm26206 predicted gene, 26206
+#> 3 Gm10568 predicted gene 10568
+#> 4 Lypla1 lysophospholipase 1
+#> 5 Tcea1 transcription elongation factor A (SII) 1
+#> 6 Atp6v1h ATPase, H+ transporting, lysosomal V1 subunit H
Here we create a bi-directional signature manually from the difexp object, using the filter()
function from the dplyr
package. In this example, we use the score_cutoff
and adj_p_cutoff
previously specified in the metadata.
To be stored into an OmicSignature object, signature
+need to be a dataframe with column “signature_symbol”.
+Also, if the signature is “bi-directional” or “multi-directional”
+(specified in direction_type
in metadata
+list), then column “signature_direction” is also
+required.
+An optional column “signature_score” is recommended when feature scores
+are available.
Option 1: Extract signature from difexp.
+Here we create a bi-directional signature manually from the difexp
+generated above, using the score_cutoff
and
+adj_p_cutoff
previously specified in the metadata.
-signatures <- difexp %>%
- dplyr::filter(abs(score) > metadata$score_cutoff & adj_p < metadata$adj_p_cutoff) %>%
- dplyr::select(symbol, score) %>%
- dplyr::mutate(signature_direction = ifelse(score > 0, "+", "-")) %>%
- dplyr::rename(signature_symbol = "symbol", signature_score = "score")
-head(signatures)
-#> signature_symbol signature_score signature_direction
-#> 1 Il1r1 -13.542734 -
-#> 2 Ctse 14.762071 +
-#> 3 Chil1 -25.413178 -
-#> 4 Kcnt2 -7.727982 -
-#> 5 Sh2d1b1 8.818281 +
-#> 6 Olfr16 -7.010304 -
(note: if you see numbers instead of gene symbol name in the first column, please check if the “symbol” column in your difexp matrix is “character” and not accidentally be “factor”)
-To be stored into OmicSignature object, signature need to be a dataframe with column “signature_symbol”. Also, if the signature is “bi-directional” or “multi-directional” (specified in direction_type
in metadata
list), then the column “signature_direction” is also required. “uni-directional” type does not require this column. The optional column “signature_score” is used when feature scores are available.
Our function standardizeSigDF()
can help you to remove duplicate rows, empty symbols in the signature dataframe, if any.
signatures <- difexp %>%
+ dplyr::filter(abs(score) > metadata$score_cutoff & adj_p < metadata$adj_p_cutoff) %>%
+ dplyr::select(symbol, score) %>%
+ dplyr::mutate(signature_direction = ifelse(score > 0, "+", "-")) %>%
+ dplyr::rename(signature_symbol = "symbol", signature_score = "score")
+head(signatures)
+#> signature_symbol signature_score signature_direction
+#> 1 Il1r1 -13.542734 -
+#> 2 Ctse 14.762071 +
+#> 3 Chil1 -25.413178 -
+#> 4 Kcnt2 -7.727982 -
+#> 5 Sh2d1b1 8.818281 +
+#> 6 Olfr16 -7.010304 -
(note: if you see numbers instead of gene symbol name in the first
+column, please check if the “symbol” column in your difexp matrix is
+“character” and not accidentally be “factor”)
Function standardizeSigDF()
can help remove duplicate
+rows, empty symbols in the signature dataframe.
-signatures <- standardizeSigDF(signatures)
-head(signatures)
-#> signature_symbol signature_score signature_direction
-#> 1 Saa1 -35.29997 -
-#> 3 Sult3a1 -31.75527 -
-#> 4 Isyna1 -29.93255 -
-#> 6 Chil1 -25.41318 -
-#> 8 Saa2 -23.81452 -
-#> 11 Sult1e1 -22.76345 -
-tail(signatures)
-#> signature_symbol signature_score signature_direction
-#> 339 Cfp 7.040545 +
-#> 340 Igkv9-124 7.031580 +
-#> 341 Mtnr1a 7.030133 +
-#> 343 Clec12a 7.024170 +
-#> 344 Angpt2 7.021941 +
-#> 345 Gas2 7.013040 +
Alternatively, you can provide signatures as a character vector. For example:
-signatures <- c("gene1", "gene2", "gene3")
-Or as a numeric vector and provide symbols as its name:
-signatures <- c(0.45, -3.21, 2.44)
-names(signatures) <- c("gene1", "gene2", "gene3")
-If direction_type
in metadata
is set to be “bi-directional”, the direction will be determined by whether a symbol has a positive or negative score.
signatures <- standardizeSigDF(signatures)
+head(signatures)
+#> signature_symbol signature_score signature_direction
+#> 1 Saa1 -35.29997 -
+#> 3 Sult3a1 -31.75527 -
+#> 4 Isyna1 -29.93255 -
+#> 6 Chil1 -25.41318 -
+#> 8 Saa2 -23.81452 -
+#> 11 Sult1e1 -22.76345 -
+tail(signatures)
+#> signature_symbol signature_score signature_direction
+#> 339 Cfp 7.040545 +
+#> 340 Igkv9-124 7.031580 +
+#> 341 Mtnr1a 7.030133 +
+#> 343 Clec12a 7.024170 +
+#> 344 Angpt2 7.021941 +
+#> 345 Gas2 7.013040 +
Option 2: Manually write signature.
+For uni-directional signatures:
signatures <- c("gene1", "gene2", "gene3")
+For bi-directional signatures:
signatures <- c(0.45, -3.21, 2.44)
+names(signatures) <- c("gene1", "gene2", "gene3")
+The direction will be automatically determined by the score value
+provided.
OmicSignature
objectWe have everything we need now.
-Use OmicSignature$new()
to create a new OmicSignature R6 object.
OmicSignature
object
+Use OmicSignature$new()
to create a new OmicSignature R6
+object.
-OmicObj <- OmicSignature$new(
- metadata = metadata,
- signature = signatures,
- difexp = difexp
-)
-#> [Success] OmicSignature object Myc_reduce_mice_liver_24m created.
Use print()
to see its information:
OmicObj <- OmicSignature$new(
+ metadata = metadata,
+ signature = signatures,
+ difexp = difexp
+)
+#> [Success] OmicSignature object Myc_reduce_mice_liver_24m created.
You can also ask the program to print the messages while creating the
+OmicSignature Object. By default, print_message
is set to
+be FALSE
.
-print(OmicObj)
-#> Signature Object:
-#> Metadata:
-#> adj_p_cutoff = 0.05
-#> animal_strain = C57BL/6
-#> covariates = none
-#> direction_type = bi-directional
-#> keywords = Myc, KO, longevity
-#> organism = Mus Musculus
-#> phenotype = Myc_reduce
-#> platform = GPL6246
-#> PMID = 25619689
-#> sample_type = liver
-#> score_cutoff = 7
-#> signature_name = Myc_reduce_mice_liver_24m
-#> year = 2015
-#> signature:
-#> - (152)
-#> + (194)
-#> Differential Expression Data:
-#> 27359 x 9
You can also ask the program to print the messages while creating the OmicSignature Object. By default, print_message
is set to be FALSE
.
OmicObj <- OmicSignature$new(
+ metadata = metadata,
+ signature = signatures,
+ difexp = difexp,
+ print_message = TRUE
+)
+#> --Required attributes for metadata: signature_name, organism, direction_type --
+#> [Success] Metadata is saved.
+#> [Success] Signature is valid.
+#> difexp: additional columns found: logfc, aveexpr, p_value, b, gene_name.
+#> [Success] difexp matrix is valid.
+#> [Success] OmicSignature object Myc_reduce_mice_liver_24m created.
Now we can print()
to see the information:
-OmicObj <- OmicSignature$new(
- metadata = metadata,
- signature = signatures,
- difexp = difexp,
- print_message = TRUE
-)
-#> --Required attributes for metadata: signature_name, organism, direction_type --
-#> [Success] Metadata is saved.
-#> [Success] Signature is valid.
-#> difexp: additional columns found: logfc, aveexpr, p_value, b, gene_name.
-#> [Success] difexp matrix is valid.
-#> [Success] OmicSignature object Myc_reduce_mice_liver_24m created.
print(OmicObj)
+#> Signature Object:
+#> Metadata:
+#> adj_p_cutoff = 0.05
+#> animal_strain = C57BL/6
+#> covariates = none
+#> direction_type = bi-directional
+#> keywords = Myc, KO, longevity
+#> organism = Mus Musculus
+#> phenotype = Myc_reduce
+#> platform = GPL6246
+#> PMID = 25619689
+#> sample_type = liver
+#> score_cutoff = 7
+#> signature_name = Myc_reduce_mice_liver_24m
+#> year = 2015
+#> signature:
+#> - (152)
+#> + (194)
+#> Differential Expression Data:
+#> 27359 x 9
And use new criteria to extract new significant features:
+(note: this does not change the signature
saved in
+the object)
+OmicObj$extractSignature("abs(score) > 25; adj_p < 0.001")
+#> symbol score direction
+#> 1 Saa1 -35.29997 -
+#> 2 Cpa1 34.62008 +
+#> 3 Sult3a1 -31.75527 -
+#> 4 Isyna1 -29.93255 -
+#> 5 Zg16 28.51562 +
+#> 6 Chil1 -25.41318 -
See more in “Functionalities
+of OmicSignature” section.
OmicSignature
from difexp
and metadata
+OmicSignature
from difexp
and
+metadata
You can by-pass the generating signature process once you are an expert. Simply provide cutoffs in metadata, and OmicSigFromDifexp()
will extract signatures from the difexp
provided according to those criteria, and create the OmicSignature
object for you.
Remember to provide cutoffs, e.g. adj_p_cutoff
and score_cutoff
in metadata
, and make sure your input difexp
has those columns.
-OmicObj1 <- OmicSigFromDifexp(difexp, metadata)
-#> -- criterias used to extract signatures: abs(score) > 7; adj_p < 0.05 .
-#>
-#> [Success] OmicSignature object Myc_reduce_mice_liver_24m created.
-OmicObj1
-#> Signature Object:
-#> Metadata:
-#> adj_p_cutoff = 0.05
-#> animal_strain = C57BL/6
-#> covariates = none
-#> direction_type = bi-directional
-#> keywords = Myc, KO, longevity
-#> organism = Mus Musculus
-#> phenotype = Myc_reduce
-#> platform = GPL6246
-#> PMID = 25619689
-#> sample_type = liver
-#> score_cutoff = 7
-#> signature_name = Myc_reduce_mice_liver_24m
-#> year = 2015
-#> signature:
-#> - (152)
-#> + (194)
-#> Differential Expression Data:
-#> 27359 x 9
See the top signatures:
+You can by-pass the generating signature process once you are an
+expert. Simply provide cutoffs (e.g. adj_p_cutoff
and
+score_cutoff
) in the metadata
, make sure
+difexp
has those columns available, and use
+OmicSigFromDifexp()
to extract significant features and
+create the OmicSignature
object.
-head(OmicObj1$signature %>% dplyr::arrange(desc(abs(signature_score))))
-#> signature_symbol signature_score signature_direction
-#> 1 Saa1 -35.29997 -
-#> 2 Cpa1 34.62008 +
-#> 3 Sult3a1 -31.75527 -
-#> 4 Isyna1 -29.93255 -
-#> 5 Zg16 28.51562 +
-#> 6 Chil1 -25.41318 -
OmicObj1 <- OmicSigFromDifexp(difexp, metadata)
+#> -- criterias used to extract signatures: abs(score) > 7; adj_p < 0.05 .
+#>
+#> [Success] OmicSignature object Myc_reduce_mice_liver_24m created.
+OmicObj1
+#> Signature Object:
+#> Metadata:
+#> adj_p_cutoff = 0.05
+#> animal_strain = C57BL/6
+#> covariates = none
+#> direction_type = bi-directional
+#> keywords = Myc, KO, longevity
+#> organism = Mus Musculus
+#> phenotype = Myc_reduce
+#> platform = GPL6246
+#> PMID = 25619689
+#> sample_type = liver
+#> score_cutoff = 7
+#> signature_name = Myc_reduce_mice_liver_24m
+#> year = 2015
+#> signature:
+#> - (152)
+#> + (194)
+#> Differential Expression Data:
+#> 27359 x 9
Developed by Mengze Li.
+ +Developed by Mengze Li.