-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathfetch_samples_from_newick.py
47 lines (39 loc) · 1.23 KB
/
fetch_samples_from_newick.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# -*- coding: utf-8 -*-
"""
Created on 2022-11-03
@author: Yudongcai
@Email: [email protected]
"""
import typer
import gzip
from ete3 import Tree
def main(infile: str = typer.Argument(..., help="input newick tree file, can be gzipped"),
samplesfile: str = typer.Argument(..., help="keep only these samples, one sample per row"),
outfile: str = typer.Argument(..., help="output newick tree file, can be gzipped")):
infile_suffix = infile[-3:]
if infile_suffix == '.gz':
f_in = gzip.open(infile, 'rb')
else:
f_in = open(infile)
outfile_suffix = outfile[-3:]
if outfile_suffix == '.gz':
f_out = gzip.open(outfile, 'wb')
else:
f_out = open(outfile, 'w')
sampleIDs = [x.strip() for x in open(samplesfile)]
for line in f_in:
if infile_suffix == '.gz':
tree_seq = line.decode().strip()
else:
tree_seq = line.strip()
t = Tree(tree_seq)
t.prune(sampleIDs)
out_tree_seq = t.write()+'\n'
if outfile_suffix == '.gz':
f_out.write(out_tree_seq.encode())
else:
f_out.write(out_tree_seq)
f_in.close()
f_out.close()
if __name__ == '__main__':
typer.run(main)