-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathselect_coding.pl
executable file
·58 lines (47 loc) · 1.22 KB
/
select_coding.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/env perl
use strict;
use warnings;
use IO::File;
my $fh = IO::File->new("$ARGV[0]") || die "Cannot open file: $ARGV[0]!\n";
my $map = {};
while(my $line = $fh->getline) {
chomp($line);
if($line =~ /^ensembl_gene_id/) {
next;
}
else {
my @lineContents = split(/\t/, $line);
my $ensembl_id = $lineContents[0];
my $hgnc_symbol = $lineContents[1];
my $geneType = $lineContents[7];
if($geneType eq "protein_coding") {
if($hgnc_symbol eq "") {
print STDERR "Skipping gene symbol: $ensembl_id due to missing HGNC ID\n";
}
else {
$map->{$ensembl_id} = $hgnc_symbol;
}
}
else {
next;
}
}
}
$fh->close;
$fh = IO::File->new("$ARGV[1]") || die "Cannot open file: $ARGV[1]!\n";
while(my $line = $fh->getline) {
chomp($line);
my @lineContents = split(/\t/, $line);
my $ensembl_id = $lineContents[0];
if(exists $map->{$ensembl_id}) {
print $map->{$ensembl_id};
for my $index(1..$#lineContents) {
print "\t".$lineContents[$index];
}
print "\n";
}
else {
next;
}
}
$fh->close;