From 8f9f93f3936772301b327c8466f2e3a155e05575 Mon Sep 17 00:00:00 2001 From: Florian <45581601+TheGreatJack@users.noreply.github.com> Date: Thu, 28 Nov 2024 18:06:52 -0500 Subject: [PATCH 1/2] Update __init__.py softclip handling Adding softclip and hardclip hadling to output correct reads with these kind of alignments --- BioExt/misc/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/BioExt/misc/__init__.py b/BioExt/misc/__init__.py index 00260b8..17cba64 100644 --- a/BioExt/misc/__init__.py +++ b/BioExt/misc/__init__.py @@ -345,7 +345,7 @@ def gapless(seq): raise ValueError('seq must have type SeqRecord, Seq, or str') -_cigar_regexp = re_compile(r'([0-9]+)([M=XID])') +_cigar_regexp = re_compile(r'([0-9]+)([M=XIDSH])') def gapful(record, insertions=True): From 7878316794ff18e59001359e98917b7c35ba4384 Mon Sep 17 00:00:00 2001 From: Florian <45581601+TheGreatJack@users.noreply.github.com> Date: Fri, 29 Nov 2024 23:13:31 -0500 Subject: [PATCH 2/2] Update __init__.py Hard clipping removed. Hard clipping doesn't include the "clipped" sequence for a bam record. So no need to skip over the query sequence. Adding "H" it in the match would clip things that it shouldn't in hardclipped alignments. --- BioExt/misc/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/BioExt/misc/__init__.py b/BioExt/misc/__init__.py index 17cba64..c22c9df 100644 --- a/BioExt/misc/__init__.py +++ b/BioExt/misc/__init__.py @@ -345,7 +345,7 @@ def gapless(seq): raise ValueError('seq must have type SeqRecord, Seq, or str') -_cigar_regexp = re_compile(r'([0-9]+)([M=XIDSH])') +_cigar_regexp = re_compile(r'([0-9]+)([M=XIDS])') def gapful(record, insertions=True):