-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathmain.nf
160 lines (128 loc) · 4.37 KB
/
main.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
params.help = false
if (params.help) {
log.info """
-----------------------------------------------------------------------
idplot: compare similar sequences to a reference
================================================
Output is written to <outdir>/idplot.html and can
be opened with your internet browser.
required
--------
--reference Fasta sequence that will act as the root sequence.
--fasta One ('my.fasta') or multiple ('*.fasta') query sequences
to compare to `--reference`.
OR
--alignment Pre-aligned sequences where the first sequence is your root.
options
-------
--outdir Base results directory for output.
Default: '/.results'
--window The sliding window size across the reference genome upon
which to calculate similarity.
Default: 500
--gard Run GARD for breakpoint detection.
Default: false
--gff Reference annotation file in .gff or .gff3 format.
Coordinates will be adjusted to include gaps introduced
during sequence alignment.
Default: false
--cpus Threads for multi-threaded processes.
Default: 1
-----------------------------------------------------------------------
""".stripIndent()
exit 0
}
// required arguments
params.reference = false
params.fasta = false
params.alignment = false
if( params.alignment ) {
Channel
.fromPath(params.alignment, checkIfExists: true)
.into { alignment_gard_ch; alignment_json_ch; alignment_report_ch }
mafft_ch = Channel.empty()
reference = false
} else {
if( !params.reference ) { exit 1, "Neither --reference NOR --alignment are defined" }
reference = file(params.reference)
if( !reference.exists() ) { exit 1, "Reference [${reference}] does not exist." }
if( !params.fasta ) { exit 1, "--fasta is not defined" }
Channel
.fromPath(params.fasta, checkIfExists: true)
.set { mafft_ch }
}
gff = params.gff ? file(params.gff) : false
process mafft {
publishDir path: "${params.outdir}/", mode: "copy"
cpus params.cpus.toInteger()
input:
path(reference)
path(query) from mafft_ch.collect()
output:
path("${reference.baseName}.msa.fasta") into (msa_gard_ch, msa_json_ch, msa_report_ch)
when:
params.reference
script:
"""
cat ${reference} > mafft_input.fasta
cat ${query} >> mafft_input.fasta
mafft --auto --thread ${task.cpus} --maxiterate 1000 --globalpair mafft_input.fasta > ${reference.baseName}.msa.fasta
"""
}
msa_gard_input_ch = (params.reference ? msa_gard_ch : alignment_gard_ch)
msa_json_input_ch = (params.reference ? msa_json_ch : alignment_json_ch)
msa_report_input_ch = (params.reference ? msa_report_ch : alignment_report_ch)
process gard {
publishDir path: "${params.outdir}/gard/", mode: "copy"
tag "${msa}"
cpus params.cpus.toInteger()
input:
path(msa) from msa_gard_input_ch
output:
path("${msa.baseName}.json") into (gard_output_ch, gard_output_secondary_ch)
when:
params.gard
script:
cmd = params.nompi ? "hyphy" : "mpirun -np ${task.cpus} --allow-run-as-root --oversubscribe HYPHYMPI"
// rv: None, GDD, Gamma
"""
$cmd CPU=${task.cpus} gard --type nucleotide --code Universal \
--alignment ${msa} --rv GDD --model JTT --rate-classes 4 --output ${msa.baseName}.json
"""
}
gard_json_ch = (params.gard ? gard_output_ch : [""])
gard_report_ch = (params.gard ? gard_output_secondary_ch : [""])
process jsontofasta {
input:
path(msa) from msa_json_input_ch
file(json) from gard_json_ch
output:
path("*.fa") into selection_ch
when:
params.gard
script:
template "jsontofasta.py"
}
process fasttree {
input:
path(fasta) from selection_ch.flatten()
output:
path("${fasta.simpleName}.tree") into tree_output_ch
script:
"""
fasttree -nt -gamma -spr 4 -quiet ${fasta} > ${fasta.simpleName}.tree
"""
}
tree_report_ch = (params.gard ? tree_output_ch : [""])
process idplot {
publishDir path: "${params.outdir}/", mode: "copy"
input:
path(msa) from msa_report_input_ch
file(json) from gard_report_ch
file(trees) from tree_report_ch.collect()
file(gff)
output:
path("idplot.html")
script:
template "idplot.py"
}