Skip to content

Commit

Permalink
Update test for long deletions
Browse files Browse the repository at this point in the history
  • Loading branch information
sposadac authored and DrYak committed May 8, 2021
1 parent 18c83b9 commit 0334e4d
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 21 deletions.
8 changes: 4 additions & 4 deletions examples/long_deletions_test/SNV.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@ REF 15 G A * 0.1303 0.1423 * 0.9981 1.0000
REF 19 C A * 0.1049 0.1235 * 0.9977 1.0000
REF 19 C G * 0.8951 0.8765 * 1.0000 1.0000
REF 29 G C 0.0254 0.0187 0.0292 1.0000 1.0000 1.0000
REF 51 A --------- - 0.8343 0.8414 - 1.0000 1.0000
REF 51 A ------------ - 0.0047 0.0040 - 1.0000 1.0000
REF 50 AAGTTCGGCC A - 0.8343 0.8414 - 1.0000 1.0000
REF 50 AAGTTCGGCCGAG A - 0.0047 0.0040 - 1.0000 1.0000
REF 65 A T 0.8401 0.8455 0.8553 1.0000 1.0000 0.9999
REF 100 A G 0.2388 0.1016 0.1064 0.9999 1.0000 1.0000
REF 101 T A 0.7612 0.8984 0.8936 1.0000 1.0000 1.0000
REF 103 T A 0.9584 0.9674 0.9687 1.0000 1.0000 1.0000
REF 103 T C 0.0416 0.0326 0.0313 0.9996 1.0000 1.0000
REF 108 T --------- - 0.0009 0.0010 - 0.9796 0.9823
REF 111 A --------- - 0.9665 0.9677 - 1.0000 1.0000
REF 107 ATGAAATCTC A - 0.0009 0.0010 - 0.9796 0.9823
REF 110 AAATCTCGGC A - 0.9665 0.9677 - 1.0000 1.0000
REF 119 C A 0.0009 0.0010 0.8238 0.9796 0.9823 1.0000
REF 129 A T 0.1064 0.1762 0.1398 1.0000 0.9995 0.9994
REF 239 T C 0.1325 0.1367 0.1334 1.0000 1.0000 1.0000
Expand Down
8 changes: 4 additions & 4 deletions examples/long_deletions_test/SNVs_0.010000.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@ REF 15 G A * 0.1303 0.1423 * 0.9981 1.0000 69 3 536 14 0.566433
REF 19 C A * 0.1049 0.1235 * 0.9977 1.0000 81 5 686 33 0.719778
REF 19 C G * 0.8951 0.8765 * 1.0000 1.0000 605 28 686 33 1
REF 29 G C 0.0254 0.0187 0.0292 1.0000 1.0000 1.0000 19 1 1030 86 1
REF 51 A --------- - 0.8343 0.8414 - 1.0000 1.0000 728 143 929 192 0.915081
REF 51 A ------------ - 0.0047 0.0040 - 1.0000 1.0000 3 1 914 191 1
REF 50 AAGTTCGGCC A - 0.8343 0.8414 - 1.0000 1.0000 728 143 929 192 0.915081
REF 50 AAGTTCGGCCGAG A - 0.0047 0.0040 - 1.0000 1.0000 3 1 914 191 1
REF 65 A T 0.8401 0.8455 0.8553 1.0000 1.0000 0.9999 1211 336 1451 407 1
REF 100 A G 0.2388 0.1016 0.1064 0.9999 1.0000 1.0000 163 120 1362 768 0.270704
REF 101 T A 0.7612 0.8984 0.8936 1.0000 1.0000 1.0000 810 476 968 596 0.846958
REF 103 T A 0.9584 0.9674 0.9687 1.0000 1.0000 1.0000 853 540 885 565 0.98614
REF 103 T C 0.0416 0.0326 0.0313 0.9996 1.0000 1.0000 32 25 885 565 0.611461
REF 108 T --------- - 0.0009 0.0010 - 0.9796 0.9823 0 1 636 434 0.811215
REF 111 A --------- - 0.9665 0.9677 - 1.0000 1.0000 603 407 635 440 0.919726
REF 107 ATGAAATCTC A - 0.0009 0.0010 - 0.9796 0.9823 0 1 636 434 0.811215
REF 110 AAATCTCGGC A - 0.9665 0.9677 - 1.0000 1.0000 603 407 635 440 0.919726
REF 119 C A 0.0009 0.0010 0.8238 0.9796 0.9823 1.0000 77 55 717 498 0.953245
REF 129 A T 0.1064 0.1762 0.1398 1.0000 0.9995 0.9994 210 183 1715 1402 0.789111
REF 239 T C 0.1325 0.1367 0.1334 1.0000 1.0000 1.0000 57 240 448 1815 0.965147
Expand Down
28 changes: 15 additions & 13 deletions examples/long_deletions_test/test_long_deletions.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def read_counts(chrm, start, end, bamfile):
return read_count.forward, read_count.reverse


def count_matching_variants(chrm, start, length, variant, bamfile):
def count_matching_variants(chrm, start, length, variant, is_del, bamfile):
variant_count = StrandCounts()

with pysam.AlignmentFile(bamfile, 'rb') as alnfile:
Expand All @@ -55,7 +55,7 @@ def count_matching_variants(chrm, start, length, variant, bamfile):
# least one read - including positions outside the region of
# interest, unless 'tuncate=True'
for read in column.pileups:
if variant.startswith('-'):
if is_del:
# Handle deletions
if -read.indel == length:
if read.alignment.is_reverse:
Expand All @@ -82,31 +82,33 @@ def main(bamfile, snvsfile, outfile):
df_snvs = pd.read_csv(snvsfile, sep="\t", header=0, compression=None)
# convert to 0-based
df_snvs["Pos"] -= 1
df_snvs["Pos_end"] = df_snvs["Pos"] + df_snvs["Var"].str.len()
del_mask = df_snvs["Var"].str.startswith('-')
# Deletion length
aux_len = df_snvs["Ref"].str.len() - 1
del_mask = aux_len > 0
df_snvs["Del_len"] = np.nan
df_snvs.loc[del_mask, "Del_len"] = df_snvs.loc[del_mask, "Var"].str.len()
# Pysam pileup: length of the deletion is recorded in the preceding
# position
df_snvs.loc[del_mask, "Pos"] -= 1
df_snvs.loc[del_mask, "Del_len"] = aux_len[del_mask]
df_snvs["Is_del"] = del_mask

df_snvs[["Variant_forward", "Variant_reverse"]] = df_snvs.apply(
lambda x: count_matching_variants(
x["Chromosome"], x["Pos"], x["Del_len"], x["Var"], bamfile),
axis=1, result_type="expand")
x["Chromosome"], x["Pos"], x["Del_len"], x["Var"], x["Is_del"],
bamfile), axis=1, result_type="expand")

# Revert operations for the reporting position of deletions
# Temporarily change to start position for the deletions to count the
# number of reads that cover the long deletion in full length
df_snvs.loc[del_mask, "Pos"] += 1
df_snvs.loc[del_mask, "Pos_end"] = df_snvs["Pos"] + aux_len
df_snvs.loc[~del_mask, "Pos_end"] = df_snvs["Pos"] + 1
df_snvs[["Depth_forward", "Depth_reverse"]] = df_snvs.apply(
lambda x: read_counts(
x["Chromosome"], x["Pos"], x["Pos_end"], bamfile), axis=1,
result_type="expand")

# Revert operations for the reporting position
df_snvs["Pos"] += 1
df_snvs.loc[~del_mask, "Pos"] += 1

# Clean-up for comparison
df_snvs = df_snvs.drop(columns=["Pos_end", "Del_len"])
df_snvs = df_snvs.drop(columns=["Del_len", "Is_del", "Pos_end"])

# Load output produced by ShoRAH for comparison
df_out = pd.read_csv(outfile, sep="\t", header=None, compression=None)
Expand Down

0 comments on commit 0334e4d

Please sign in to comment.