nf-core · d4straub · Nov 25, 2024 · Nov 26, 2024 · Nov 26, 2024 · Nov 26, 2024
diff --git a/conf/modules.config b/conf/modules.config
@@ -1057,6 +1057,15 @@ process {
             pattern: "*.rds"
         ]
     }
+
+    withName: TREESUMMARIZEDEXPERIMENT {
+        publishDir = [
+            path: { "${params.outdir}/treesummarizedexperiment" },
+            mode: params.publish_dir_mode,
+            pattern: "*.rds"
+        ]
+    }
+
     withName: 'MULTIQC' {
         ext.args   = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' }
         publishDir = [

diff --git a/modules/local/treesummarizedexperiment.nf b/modules/local/treesummarizedexperiment.nf
@@ -0,0 +1,76 @@
+process TREESUMMARIZEDEXPERIMENT {
+    tag "$prefix"
+    label 'process_low'
+
+    conda "bioconda::bioconductor-treesummarizedexperiment=2.10.0"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/bioconductor-treesummarizedexperiment%3A2.10.0--r43hdfd78af_0' :
+        'bioconductor-treesummarizedexperiment%3A2.10.0--r43hdfd78af_0' }"
+
+    input:
+    tuple val(prefix), path(tax_tsv), path(otu_tsv)
+    path sam_tsv
+    path tree
+
+    output:
+    tuple val(prefix), path("*TreeSummarizedExperiment.rds"), emit: rds
+    path "versions.yml"                                     , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def sam_tsv = "\"${sam_tsv}\""
+    def otu_tsv = "\"${otu_tsv}\""
+    def tax_tsv = "\"${tax_tsv}\""
+    def tree    = "\"${tree}\""
+    def prefix  = "\"${prefix}\""
+    """
+    #!/usr/bin/env Rscript
+
+    suppressPackageStartupMessages(library(TreeSummarizedExperiment))
+
+    # Read otu table. It must be in a SimpleList as a matrix where rows
+    # represent taxa and columns samples.
+    otu_mat  <- read.table($otu_tsv, sep="\\t", header=TRUE, row.names=1)
+    otu_mat <- as.matrix(otu_mat)
+    assays <- SimpleList(counts = otu_mat)
+    # Read taxonomy table. Correct format for it is DataFrame.
+    taxonomy_table  <- read.table($tax_tsv, sep="\\t", header=TRUE, row.names=1)
+    taxonomy_table <- DataFrame(taxonomy_table)
+
+    # Match rownames between taxonomy table and abundance matrix.
+    taxonomy_table <- taxonomy_table[match(rownames(otu_mat), rownames(taxonomy_table)), ]
+
+    # Create TreeSE object.
+    tse <- TreeSummarizedExperiment(
+        assays = assays,
+        rowData = taxonomy_table
+    )
+
+    # If provided, we add sample metadata as DataFrame object. rownames of
+    # sample metadata must match with colnames of abundance matrix.
+    if (file.exists($sam_tsv)) {
+        sample_meta  <- read.table($sam_tsv, sep="\\t", header=TRUE, row.names=1)
+        sample_meta <- sample_meta[match(colnames(tse), rownames(sample_meta)), ]
+        sample_meta  <- DataFrame(sample_meta)
+        colData(tse) <- sample_meta
+    }
+
+    # If provided, we add phylogeny. The rownames in abundance matrix must match
+    # with node labels in phylogeny.
+    if (file.exists($tree)) {
+        phylogeny <- read_tree($tree)
+        rowTree(tse) <- phylogeny
+    }
+
+    saveRDS(tse, file = paste0($prefix, "_TreeSummarizedExperiment.rds"))
+
+    # Version information
+    writeLines(c("\\"${task.process}\\":",
+        paste0("    R: ", paste0(R.Version()[c("major","minor")], collapse = ".")),
+        paste0("    TreeSummarizedExperiment: ", packageVersion("TreeSummarizedExperiment"))),
+        "versions.yml"
+    )
+    """
+}
diff --git a/nextflow.config b/nextflow.config
@@ -105,6 +105,8 @@ params {
     skip_dada_addspecies   = false
     skip_alpha_rarefaction = false
     skip_diversity_indices = false
+    skip_phyloseq          = false
+    skip_tse               = false
     skip_multiqc           = false
     skip_report            = false
 

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -790,6 +790,14 @@
                     "type": "boolean",
                     "description": "Skip alpha and beta diversity analysis"
                 },
+                "skip_phyloseq": {
+                    "type": "boolean",
+                    "description": "Skip exporting phyloseq rds object(s)"
+                },
+                "skip_tse": {
+                    "type": "boolean",
+                    "description": "Skip exporting TreeSummarizedExperiment rds object(s)"
+                },
                 "skip_multiqc": {
                     "type": "boolean",
                     "description": "Skip MultiQC reporting"

diff --git a/subworkflows/local/phyloseq_workflow.nf b/subworkflows/local/phyloseq_workflow.nf
diff --git a/subworkflows/local/robject_workflow.nf b/subworkflows/local/robject_workflow.nf
@@ -0,0 +1,56 @@
+/*
+ * Create phyloseq objects
+ */
+
+include { PHYLOSEQ                                } from '../../modules/local/phyloseq'
+include { PHYLOSEQ_INASV                          } from '../../modules/local/phyloseq_inasv'
+include { TREESUMMARIZEDEXPERIMENT                } from '../../modules/local/treesummarizedexperiment'
+
+workflow ROBJECT_WORKFLOW {
+    take:
+    ch_tax
+    ch_tsv
+    ch_meta
+    ch_tree
+    run_qiime2
+
+    main:
+    ch_versions_robject_workflow = Channel.empty()
+
+    if ( params.metadata ) {
+        ch_robject_inmeta = ch_meta.first() // The .first() is to make sure it's a value channel
+    } else {
+        ch_robject_inmeta = []
+    }
+
+    if ( params.pplace_tree ) {
+        ch_robject_intree = ch_tree.map { it = it[1] }.first()
+    } else {
+        ch_robject_intree = []
+    }
+
+    if ( run_qiime2 ) {
+        if ( params.exclude_taxa != "none" || params.min_frequency != 1 || params.min_samples != 1 ) {
+            ch_robject_inasv = PHYLOSEQ_INASV ( ch_tsv ).tsv
+        } else {
+            ch_robject_inasv = ch_tsv
+        }
+    } else {
+        ch_robject_inasv = ch_tsv
+    }
+
+    if ( !params.skip_phyloseq ) {
+        PHYLOSEQ ( ch_tax.combine(ch_robject_inasv), ch_robject_inmeta, ch_robject_intree )
+        ch_versions_robject_workflow = ch_versions_robject_workflow.mix(PHYLOSEQ.out.versions)
+    }
+
+    if ( !params.skip_tse ) {
+        TREESUMMARIZEDEXPERIMENT ( ch_tax.combine(ch_robject_inasv), ch_robject_inmeta, ch_robject_intree )
+        ch_versions_robject_workflow = ch_versions_robject_workflow.mix(TREESUMMARIZEDEXPERIMENT.out.versions)
+    }
+
+    emit:
+    phyloseq = !params.skip_phyloseq ? PHYLOSEQ.out.rds : []
+    tse      = !params.skip_tse ? TREESUMMARIZEDEXPERIMENT.out.rds : []
+    versions = ch_versions_robject_workflow
+}
diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf
@@ -156,8 +156,8 @@ if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1)
     if ( workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 ) { log.warn "Conda or mamba is enabled, any steps involving QIIME2 are not available. Use a container engine instead of conda to enable all software." }
 }
 
-// This tracks tax tables produced during pipeline and each table will be used during phyloseq
-ch_tax_for_phyloseq = Channel.empty()
+// This tracks tax tables produced during pipeline and each table will be used during phyloseq and treesummarizedexperiment
+ch_tax_for_robject = Channel.empty()
 
 
 /*
@@ -237,7 +237,7 @@ include { QIIME2_EXPORT                 } from '../subworkflows/local/qiime2_exp
 include { QIIME2_BARPLOTAVG             } from '../subworkflows/local/qiime2_barplotavg'
 include { QIIME2_DIVERSITY              } from '../subworkflows/local/qiime2_diversity'
 include { QIIME2_ANCOM                  } from '../subworkflows/local/qiime2_ancom'
-include { PHYLOSEQ_WORKFLOW             } from '../subworkflows/local/phyloseq_workflow'
+include { ROBJECT_WORKFLOW             } from '../subworkflows/local/robject_workflow'
 
 //
 // FUNCTIONS
@@ -606,7 +606,7 @@ workflow AMPLISEQ {
             taxlevels
         ).tax.set { ch_dada2_tax }
         ch_versions = ch_versions.mix(DADA2_TAXONOMY_WF.out.versions)
-        ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( ch_dada2_tax.map { it = [ "dada2", file(it) ] } )
+        ch_tax_for_robject = ch_tax_for_robject.mix ( ch_dada2_tax.map { it = [ "dada2", file(it) ] } )
     } else {
         ch_dada2_tax = Channel.empty()
     }
@@ -620,7 +620,7 @@ workflow AMPLISEQ {
             kraken2_taxlevels
         ).qiime2_tsv.set { ch_kraken2_tax }
         ch_versions = ch_versions.mix(KRAKEN2_TAXONOMY_WF.out.versions)
-        ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( ch_kraken2_tax.map { it = [ "kraken2", file(it) ] } )
+        ch_tax_for_robject = ch_tax_for_robject.mix ( ch_kraken2_tax.map { it = [ "kraken2", file(it) ] } )
     } else {
         ch_kraken2_tax = Channel.empty()
     }
@@ -635,7 +635,7 @@ workflow AMPLISEQ {
             sintax_taxlevels
         ).tax.set { ch_sintax_tax }
         ch_versions = ch_versions.mix(SINTAX_TAXONOMY_WF.out.versions)
-        ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( ch_sintax_tax.map { it = [ "sintax", file(it) ] } )
+        ch_tax_for_robject = ch_tax_for_robject.mix ( ch_sintax_tax.map { it = [ "sintax", file(it) ] } )
     } else {
         ch_sintax_tax = Channel.empty()
     }
@@ -657,7 +657,7 @@ workflow AMPLISEQ {
         FASTA_NEWICK_EPANG_GAPPA ( ch_pp_data )
         ch_versions = ch_versions.mix( FASTA_NEWICK_EPANG_GAPPA.out.versions )
         ch_pplace_tax = FORMAT_PPLACETAX ( FASTA_NEWICK_EPANG_GAPPA.out.taxonomy_per_query ).tsv
-        ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( PHYLOSEQ_INTAX_PPLACE ( ch_pplace_tax ).tsv.map { it = [ "pplace", file(it) ] } )
+        ch_tax_for_robject = ch_tax_for_robject.mix ( PHYLOSEQ_INTAX_PPLACE ( ch_pplace_tax ).tsv.map { it = [ "pplace", file(it) ] } )
     } else {
         ch_pplace_tax = Channel.empty()
     }
@@ -679,7 +679,7 @@ workflow AMPLISEQ {
         )
         ch_versions = ch_versions.mix( QIIME2_TAXONOMY.out.versions )
         ch_qiime2_tax = QIIME2_TAXONOMY.out.tsv
-        ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( PHYLOSEQ_INTAX_QIIME2 ( ch_qiime2_tax ).tsv.map { it = [ "qiime2", file(it) ] } )
+        ch_tax_for_robject = ch_tax_for_robject.mix ( PHYLOSEQ_INTAX_QIIME2 ( ch_qiime2_tax ).tsv.map { it = [ "qiime2", file(it) ] } )
     } else {
         ch_qiime2_tax = Channel.empty()
     }
@@ -866,23 +866,23 @@ workflow AMPLISEQ {
     }
 
     //
-    // SUBWORKFLOW: Create phyloseq objects
+    // SUBWORKFLOW: Create R objects
     //
-    if ( !params.skip_taxonomy ) {
+    if ( !params.skip_taxonomy && ( !params.skip_phyloseq || !params.skip_tse ) ) {
         if ( params.pplace_tree ) {
-            ch_tree_for_phyloseq = FASTA_NEWICK_EPANG_GAPPA.out.grafted_phylogeny
+            ch_tree_for_robject = FASTA_NEWICK_EPANG_GAPPA.out.grafted_phylogeny
         } else {
-            ch_tree_for_phyloseq = []
+            ch_tree_for_robject = []
         }
 
-        PHYLOSEQ_WORKFLOW (
-            ch_tax_for_phyloseq,
+        ROBJECT_WORKFLOW (
+            ch_tax_for_robject,
             ch_tsv,
             ch_metadata.ifEmpty([]),
-            ch_tree_for_phyloseq,
+            ch_tree_for_robject,
             run_qiime2
         )
-        ch_versions = ch_versions.mix(PHYLOSEQ_WORKFLOW.out.versions.first())
+        ch_versions = ch_versions.mix(ROBJECT_WORKFLOW.out.versions)
     }
 
     //
@@ -1012,7 +1012,8 @@ workflow AMPLISEQ {
             run_qiime2 && params.ancombc_formula && params.metadata ? QIIME2_ANCOM.out.ancombc_formula.collect().ifEmpty( [] ) : [],
             params.picrust ? PICRUST.out.pathways.ifEmpty( [] ) : [],
             params.sbdiexport ? SBDIEXPORT.out.sbditables.mix(SBDIEXPORTREANNOTATE.out.sbdiannottables).collect().ifEmpty( [] ) : [],
-            !params.skip_taxonomy ? PHYLOSEQ_WORKFLOW.out.rds.map{info,rds -> [rds]}.collect().ifEmpty( [] ) : []
+            !params.skip_taxonomy && !params.skip_phyloseq ? ROBJECT_WORKFLOW.out.phyloseq.map{info,rds -> [rds]}.collect().ifEmpty( [] ) : []
+            //TODO: add treesummarizedexperiment
         )
         ch_versions    = ch_versions.mix(SUMMARY_REPORT.out.versions)
     }