From a7c9743f39768ede387ef08c0dbce86c9d20d5c0 Mon Sep 17 00:00:00 2001 From: Jerome Kelleher Date: Fri, 3 May 2024 12:54:10 +0100 Subject: [PATCH 1/2] Add explicit testswith indels Closes #106 --- bio2zarr/vcf_utils.py | 2 +- tests/data/vcf/chr_m_indels.vcf.gz | Bin 0 -> 12933 bytes tests/data/vcf/chr_m_indels.vcf.gz.csi | Bin 0 -> 116 bytes tests/test_vcf_examples.py | 1 + tests/test_vcf_utils.py | 10 ++++++++++ 5 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 tests/data/vcf/chr_m_indels.vcf.gz create mode 100644 tests/data/vcf/chr_m_indels.vcf.gz.csi diff --git a/bio2zarr/vcf_utils.py b/bio2zarr/vcf_utils.py index 30fc9d3..78690fc 100644 --- a/bio2zarr/vcf_utils.py +++ b/bio2zarr/vcf_utils.py @@ -441,9 +441,9 @@ def count_variants(self, region): return sum(1 for _ in self.variants(region)) def variants(self, region): - # Need to filter because of indels overlapping the region start = 1 if region.start is None else region.start for var in self.vcf(str(region)): + # Need to filter because of indels overlapping the region if var.POS >= start: yield var diff --git a/tests/data/vcf/chr_m_indels.vcf.gz b/tests/data/vcf/chr_m_indels.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..a2510e984d0ca936bd20f06e274a7ae57becfe9a GIT binary patch literal 12933 zcmV;0GJ4G)iwFb&00000{{{d;LjnMgD(qToQ{%`L{p|dTuIz`*?i$lCy{j>fbM8I&_HF<0!@VDP_ffoX z)Be}t@q_H~Km72+@#)$4=(_*Q>8O7>xVh<$ykrvl%hZp;{vQWHK=4%R#R*xuN#afa z_-8ybI2&gVTzt8>y*@hLeCFC)#$MuuX+qrai7cZe@$UkUxN4fvP5lS;I`Gt6DM{yk zLV{@Gs)w`5YX+S~k5?-<*qk44l7He|KYePB|9W+EJ&xUQGWSj|Mn`9k)1x$v7^V3A z^X4?;c$Llx>`Z)^gvm&nKS=`b!Nazrm}HZXMKs0VuocsjvuH|wA$zt#Evv_DYQM^c z_cGz2ZUOnFPrfOmcuz=6zF^SJ#U;$KzXuXbp#?-p!KCb&`0)w@-e>)TjpZvinCMB< zwsLd6T%Qkerycy-y;v>oytvQ0=k4Y5o`$J6^J2}+{)9}uWjar?nNw^q_U;iaQxzmda3sVJvyY;9>cu4RleCDw z+M1)w#$NPmE|+oi%}3m;h1_YrIQgVrC*;w`>+`EVHw>(m|9OW&YB)kT_6h~?rav~9 zKe=kN{7C^AxvEEPZI*FkmYX#8!^vQ98u*B*y*#K^q5sd7r$!WXxMYST5f(s^Pj{=7z!>6AVkDE|?c_%? ziNesEAkW0!1W}T#pD$OP#nEcHwS%*A11B%hsca4We|gg`xtzoM_Y&*}PzC<)h~Rk? zP4nKQZn|ERJq>NOTj6gjg=nW z`EJt7OggPJ4qm~sp1i<50J^eCOzohJ<2u$KSt^tKB|yeSXudSDTa2_Q~h@%C|Y`e^TxEBWsbO(DOL^(t@Y6XhtHnq;ft_;poB@kRVI9c|cQdl@y=d zy8%kHRjAZNc~9mvs$z6kXy3UZS@_@574XY?FAaG>q^HZBybOGc}Cx3T$aw)NyPL z`@F&$bTAv;*X=QXTYHg7B}Ig6kNJBwjm5bonP$j&HH~SQ8N^jrq_6Hd4)*u=XUqF! zA0_PqRrAX&w|DPi>AT5&8bv{J``~+zU!ki?nOKjppXX^3_;-8i^`2~w z9Ktk+d|$mLdkOh`LMaohoB!s;Z$Yr##jOu*4wBx(#M7tw%McV1;x`<%?W0xb9goC3)wTA z!_7)U8N=8ia=V(IS%-PP?W1#M7>7k#>W?lvC*y;`=wNhtaB_8UdDfx(^gx=joBcd+ zoCDfr2rJyr(EKw)bhDqD&jjKo`?>j9PJu8$!AWP(LC_h_D=9EE6sQeKbBhE#!g0)&~PAR$$>3v9GLb;a3Co-7$XoY_3xD&h@68A zSyTi7xh=9b$9DD53I(lU_ju{uIg~s{jp2n^p@#Hf&<}x)sH`#>Db){iW{B`-@}Ht9oZbAz8880=3+f9Fl zJ411De-_}oV#ao!s%3+pHA6l8w?@16VuQ0aYdgnhJg%91X?(#g&q886><+u* zla9r0%492g*94;i;5oJ?0;57%>ZulnhHAm=oNH;=j%>@dS#nLga)a$>EmFK2J<4>% zOCuwmYJ?2F=Co1}1(c5b4Xr0^WG5K{+VVW4jl z(+%HXJxiC)98tQ4Bc1l?%t2}YqUbt3=4jl&M4^$#KZ=I!d}i8aDa}r;s8C7rBb3=H ziV9U;Q=>fvrnaPDJyRc?ML}U0(%LBMo(12Qr4GwF_-0;9NL5E`$~l86=((fnx~&Lw z98pA|wCczU>m!giSmFI#P(zxpK^`exCyWyQ)Wn`)uLblbr|qD39I1z1n$M=a5_-WA zZcXUn6=fkU4PHZDen>T9@B(?;5tjKbx`WDBbCyB6D4;h@>lH^Cb4RUbZ@a!!1)#!T z^q8qhKDHItnSMR z!xB2y(5=i^GpK39ox) zLH*{ETKm>ksr}|+`^D+2BK*x|_;b@2E~?7&!Fb&Hsf(ZE?qH~Xz)LCG%Wra0u+P>< zsoW8`u|8($ozyn8xwX)C0}*V#-b-<|@7fzqy3;t-!m@f?AN_omDh6JlkQL5@vwtB)W$8@o7|IGJXa^`NG3|=Eu-`bp?6SKuWhR+!$~vu zPl@WjoqrmJRrVuT*KBqqc3YQwsI5B~NnTXvc+v1t-YZKJX^WbRn(+1iyXZT(sGYg! z0R6F_lz*ak-E>cF(WO_;&Bm10A*rn^hH;x$IV6|OgCIlgA#^@zT5PSE>>0&u7^TuV z*D58?OEgP0YPO`38igMk_1hLMwmSX4KI@6S_i2*#-lt90d#6U3;p86FVL~0^-*WMj zGjF2L_KGr`J$LDl-{**R#inXBT!)mG4g&qrb%zsoa}yVxRAT zTG=%etcIG?lcGQkr=%KclF`@5uio9a5!8IXf#T-p1(Nyo4NfW@d`koE^%mN-eK1iq zzxM3TbXPa(7A1pU;wuJi>pNWHmZuQLwFi*Qh@1w#El{W{t@04L-9be1EENi0=%^{w z|L*0D$;(DSkK)@dt@2WJE5~g8u~KLWHF=SLP`z-`me8soswbXo<$^?^>ABQPmDQ!h-x5uL(GHqJHtV6cNXD1R=~ag7B3c!CQ5T)wN~O zDc-16n>G|pGfT%6?r1X|+&e?_gumoT4h@o@S?6N2ZT(6GLzLDAp`Mh zeyR+#h3f8nr*fxar7ie%<%|b+NNI1M^geutR16GEuA%9W%a?%Qa*cYl1ha*zL84~+ z-<76>>X>IgoZTL1ZW=oiTn=6cSv!(;%w7 zA+ksBh_WHK#q~RZp)bUXVyZ&kRWXpa+heHoK}Gt{mV|gw<59@rg?za%C86A8lZ#NE z3-BAWm09^9tyA1(7nLjIFjJL9Dpm7|9{pb6?nw^L8pckx{k2g)UXdNu*~wcWu) zyTrIgjVqmz+EXA9*zF-qS!|L?%rCa<7dyRb*$tLuwamCiNfrNEr1aZ8gz~>Q6C$A? z0>KKJJ>DRf3;b4QJ(RtT}^7bR7hZFmQ3t4KD;NT*HA zkXVSPN7M3ImS$ZL_R>paa1~q!HTGni=&KdY+Zj+`7>>on5$Qx;L4(iAxzL@d?^r1jgDxu zQVM+3Kw!Jj2x|AE&9;s`!NtC$H8)@ zk{)^jgzA=4)%MMsdNc~Fzvv!~B?ANV?=8eOx6J?1`2c>1I~;i3bJqMvbf)jMaYxd) zdOT>=g_c&<7@i|H&?8=`{J^jqXu7LUb6425I>s|S%~c5v9$M-7)0z}7d`qp393UlP=4|vjkV~4z^~)P>7@%QoMP72%olp-d)~WVqunJ6;^j<;*tAK|O!xzRb`Gnx z$U|X;!vm_qIm3niP#R>A0=@1mNVP<5#f~e!c^l=tSIjoT6HyS0T*DX%pvom1O^;gq zJIIO+#UYOGAfuvhp^`=~cU%jVC>CX)VFt~#@_*FwM7f~=7T;~9}$A!I>L-%}{LwYF%Q zb*CkTf6%G+TCS3o8mR-1x4746nGW|FmclXdh`Qzi7*MiQeOW>2wje6^TTP7w=&g~5 ziArouBVAP8$r$`^e8Wj8Qy}w?3yCybu*N=(?mi7wL`kbP&=d<=%|vC`cw>r05O}p# zX>GgINz30F5mh*3_*z+QR;tTd_3MxK7+lDH(dF$J<$)7rp2vU9jnv50wh?RDS zsz#-`PN)K> z&!p=Ur_%MAbEy*g2v2abyh=ilfmh^9TVIKkCWv@U>n1`yqPO3n;Ot-jMUto2&S zanSffJOH;zv}=cy4=aG|x@AkB4-1GX!GK@^iS>lSf=r%J-w%kc?m@5>4lB)V1Az*0 zqMGB*OsOS+J45f+i~1A>4SI zSYSbK{Z!;AJcIdAYu@NVNiRLzA|kitXj=b!PFl1UDraMl$`b+E0W{YMaIO>ZB-_rM z_(AaZy59{kOe$~}d_B7a9r|z36*J%vrBibmPCleUXd%`EY`!VPGS%-1cd*V5H`8@b z+{RFHlNXcICY|raew(TM0hCvby4>pI%2bO20Dq7WK#q~Ejh?H~g4cB@NaTG&7 zJtGTlnWoy>ay*}lD9tMHv+NP&2K>?}CXs4}j8J04%XY==-$jXyxr#M4DdpTGGBkiu zzE#>bOl@bFpH`%$iyhSc zdnl(bi?q?hiXKU6HmJA57wzt15^8Fi0dN<16;ay^J~iN=8iY&xMIs}O)HZ`J4Z3J7 zE}Ha(ir))+RrP6VCN@jat>gRT;=tmZK!#8)I4S3jY~56Sp-V3^6YsfWnhF56WEVnl zAyP~Hy1@cQs_DWYu<#iY|94)irX=K$GvfMxvIPo_><|pA7i&^2hbV%D+W*wMT0p7k zPqu=U(m;nS4vTrVkH4gn5jAskIZ;U+jEE;hR9I2Mo#7LwJV5PK%9{Ntzdl=|06Q z9@46u(s*9F_V0MIQ#w>h3j%Hkf|r7i$X8{^?T#UAoXN{=;jcVBU!{ha;6t}%kLI9M z7R&lut|vb>W-azlwl+)o!((LL#jS-J8X7Vk7i)FQK>d&`$blwSLw#tgH#a;ZipNQ75yWnTIjM}`s3o&+ab>}p`riZC83pfIZv2Z%mN%E@F~6ohH?A3DdOYjOt2LRH^cQhZl>KK4Zuw4?ywzdq zi(24HksOn*x*Kr!21XA^%(Xy7dy1i1K{zy;b<<|SIwGvyl#>s~w=s7iv}Qd7-DXYl z;KZQFE_wbY_zWjz!s$6jK58f{seSAYZhGMnjzohOx}58FXqITU&r2~&_N<3??EGw$ z1M@R^WEYS^Y7$YSzDmdLyQ~(%zSCj)iZylf;bOidIHbF9 z2*>kNr}O;w`z~fJ=Gw?7O(&yK!z-=EW7`kKwXN84plUh`^}BN9y}?0grI$=&=0CI0 zz6f1pKc7aPESHYCtpOuYVV4LOjrqI{!cmUyOoMwEwlfq!;5pLoG=yhzB&A3frl<#_ z?6FBNo(CjoR)8F{9T@qNa-(M2>^LtrJ{xAJ@o=vVPQ^zj;_+#C4@pWhFkAI2`O12) zqc$t`rN>hR=m8CE)2z$`n*kKc2ulrL`+iwwhJYJ&?df6ik;F0OP*n1TZFIJ9B$`$y z_9VaZi`_(zdNQem#G&hDJr^+_axOAyp4}?;2;cW)O3R?@T)FZ%g@)Z2fX*-zowf2$ zx$knIidByV{P=s_ZypgH-%VW zXXsXQXw3vX+O?vh=t5Mma{o<7)5DXcT#)!6CU)qalrs_^^al0;%tah^masOvCL4D_ zsBhF#i%I!kU|e-KIK;&O?k7{Rb?b_rdZJvmsNuLz`oI3z`5evE5^R0r6|v1m7bBZW~kKO3L$^bD+<*@5roQ{hn_JC`a7do znL1q4r*RxxTN+?tpBNR$$jwKqPu?|so~J=U%MJO zRciAZHnI1!wegpv_frUMK6g-c=(uE;#OHp3_23kFGzmH>FrHdY(l6aWgbkswpq?hi zWMR}j`H}{5l#8v){qQ{5vx0-6Nc9||b{e^~eat{9Y}jcP1sjwcrgg&G{8-nKV`j{O zt4y?z1yAIpiB0s<8FJ#T66b|S7I?rZO8=G-%aY5|P3kChQ1wd@-g=$vxD!mIsqMks zL?75dw!&Zs5Jr!k?AB326ar1iPGpG(MF{}+(GfA=C z2zNwr_DsY^25<-=m-C{tvfT5EH2Z7!{9s0nWJ58T4WMa5$~MRrxaoOpTABf0ANBK(&(vIvSy^!@EizS(w(WKYeHDgaLc; zY-=b*6K|u_WX*?A@?or2n$-rXyxJg3)Jj{s??fH)Ac76m5Y2LvX12?to0N~LnmZjJ zHE@N6+e&~m(oiHn0p==@`6{6BK`odL&9=3pZ`~9gJKQmu)bk$etX$XM=HP!$2Z$+U zc6dPH*@l32QRC9hh#qKMcst0_zC;j-$0LajfY?6mpbm5t4xE*;lX|x{R>1Vy|MbeELs1)BHX%1s zKr|~N-?u1fecDh=`W7jBE!QMTfha2uB5U|yEY*RaCxQ}c<-W505t&|exZyx|7fviP zC3m&;gLZ_a`rh7x8B~;QmbtKXhk9cn7X<-(oQo!rKciR4rg@G=^@d3~wrQ-g+z(B= zc@v`>t@(O+Ca+OWx82mkCH)_BKq^-~cMxs6c^8p^|It4wG_kA}$v14*#GRZ2(y(68 zCR3VwKDrT}G%LFU^K{{&@+~^Csm*y|@9h*P{7@XlS%$?@F2HnUH(AjMG|;G0abwww zJ4#xs3|EnJR|sRcvh@kbX|^n$J*fLd+V{vzpH~L>4|J z-LBldhhZvbb1LVVjzu!qZf40Y^hlpm0`m;g+H7-DB5tRpq?~VRvaAh;%#%j}Jo=;O zpbeYYCGLPdkSmx_iWh8{)5bfvJ~J1;rdMX>%+?HPaq)TFv%P`dIXY@ z&Ps67Znuk|j zkJEWC9wS=E55^ipkGvA>`{H1Ph@02LMms1<#f%qe*zSg@sU&{9$4qZVO89&Aoe29k zLPG%TRdbg0P$QLiU&I5&Q185eAn?zL_f4w=peN3X`E;B5(`$7Lu#tn%%r=9{r?SU; za}k$fJDQQvjTcnOABP}NiK*_?$fqk5nrae&UxRdWdL{M1e3NksmW3}wH!5PKxukf@ ziPc^%k(@V4$7s2_N;M1S206gsq>+aG@n7XuNK?LH+b6&n*`6nv?CUHFm0n}+xM@QJ-!5omC7yu#4vCIh$a;# zgUzjauYP-_kx5dPj~^c&m#^f;h}nBGPP;KSTvOM)%bYox*RUIMP}P(O0ZpXqFQ!vq>`xn({?WFE@HE;AwG%u?N_&t!Q_K5{59p-n0z{z zUsE5LSYr^l?8gPlpW67@dT8XTIkd;qLjer^J<$RBm`e*xW1@EPicbBl|A|-4ZGE^V z7Pk+M@X$4?45HTgSC}Y;FC56xc85{dU@Mto-Wqz|rt0yL;42xn8CA?5*+3hSNRj zsqu>(478N~rZ?h|t;9&hL{ud+KLxAdVqJE-J7G2-0}2)7qL)DhAq4K(kD^W@zONw3 zP2k$M=uv7o1d0Xd7@T8Z?m?+$E{MvIuba3+?ckbzq%kA)(0$ID`&NO?jf>JQ$_{(_ zX8&S@0R|ZN_PIzMvK_C zvY-~yKye`tt&3>i6X<(F z_dRI(z33?8_B|S?r4QwMAZb(dJ!CRbp)qx0t-|zr@kEq#o5yo;Y~BTUv8cowVyVCm z(lo9_uv*^V;=b0V(cihj0$Y^ny^>9Zvodj=MAIOxJXUnyE>^N(a7f57gp@m)oY-i_N#-or+c)0v#>^W z@cOJ92d$&pd1vMCd7^!qxl=0x(OcZZcKmc}8LN{u6zB4#wUoO8)Jto*HwAaL#wViF zYDsr+=CX@;ynx4xceij}mW9KHGIsuWHDWLqoQXqRsYf8TM;Tz@!c%>H!ZZhyz$%i- zj5;SwimC0nGFML}kE-oVT^OA4O^>p#P5$`3U}N9V!{)g`x!IZxn5d;gcxZrcm}+Zn zDvXQ-!Dk*p?Ubd5Z1U7ua@JG z0ngclBK@YZi!|p$q^=WKUFXm$?KD5YiW&O`>ypd`-=mdHmkBKG-eQGC2b*U$Q4Qlc zd-ZyzDY-k-;xD~XeAzE>O?$_7kypj~E`{thm=Fk~kp01y;5eJ?4=Rb4W4vNaYlZG3 zB5lGcLi08ig>0M`?vBCKdwCV?Xuu*u1Q{OWoLa~Gt$mCeeLrFss6TVef|4y(NtBE^ zzC_M)K831F!|%5Xy0p9839WH?p{9tF zosDjLG)FCP{kp{}N1}D>>fBN3vxP2cGRf!12ThfPY6XA#K=%`xf32GqBW)hQc}lWM z1n1GHHojR#6oLia;d-rBjuJ}fhl#RpWO#v6h*L;-&FIZb;nCoh@5XI=<(8n#MGh9u z?hnjYd%Vl95fAGzw_oiaic)fUJm8f1HRZ{Pc^8sR7Mxy(n(z|zRg8J&f8gZBJDdgXDX zjT5|HsD=r(;a*%rZPT;h=5$1EvRzLM0je(6Ep-45szzMcO%q!ZE?|y#BYYoTp}H#^ zwcLkQ9oRBLFF<<22>j}0>2@5NIv*mgAc9B7yBFwSxSJXErtE$vc?#F!I%=-IH;QxS zR|enNAN-R7#n1eopXnn;gZ%vb`Spc=^ZD_G4;V;~Z^3`@kMS>le69b?<>QGT`9ZM* z3cu^&tJIf$XfWa%(3HjPzS790s6sd;J%pmPo{B+V)Xq_ge=kEO6?@-ha+^*VW?xn> zBGa9e2&2q1k;;pa?Dbu-?DdV&Y!dB3wfB$9Pnrh>BG;&?$7dJuF$Dx659Aa~7BhA!sPq}GoX z)~jhrtT18Cqpr(NBj%!QpXo#Ui8Wxa>(J_C7E8Cmk8~rTdFn!+evt1aQna-LpBm9c zai;RYsJ8Z_XxM0O1JlU8B%(rpfYJZ{pZ^1zuK)`G{{R3ViwFb&00000{{{d;LjnNU z2i2L|lG{WOhOf<|1Vv1D&#CS5h4F?ZRY};*y0{O)Knh4wNvPuS={ZRwY3$eq7bIKO zj4X}p%-8+*!GFI#e*F0T@zYlIO0nI1_pnu)r*D7ya6JC}=IyWhcgJ6Tk!wJXHqW0u z+3mO3nGWLoySPY^T?!@*38I9@c|i z8z}3cbJmn$HkDUd@2oBBWxCcTuh*ut&Xi#`mDg9VfBW!!Mar-G)qa0i?YqPN(Cy{0 z0&5k+O19+X;lr!9uYWr}eR1>T8MEy9r+1*j-mipF{Mbzf1m07k!W!~SnEB1_#nx`V zef9BJfzeiP_J6#2|6bsicOR>s2sSr&TZ0ar_M0bv4`vfAM)yYEOy&X5*$ylF0Ef2u z@xeS?&kO62k25w}hv~JRTSvI4vw46Wtb?3gn#T#-05UX?HMABuiHrba9){UeUU@ax z(Jt(@t38^sTBd8}yc$qmUe%MvnH-9Fa4?T(9f$7GN;@RtD=ou%pFGzYml=h(f?=qs zVTeyI%tDMsn`9N1!0EYF0Ely&hM4H!{n=W$L!6k4HJV!ANvy^6o*P=iNlOrW;-!{Q z(GpNFYKcR>z*zckqKuePMv@Q9U%sYzxTbh|&4V96gD=-IJ-|RO@B=0`y}f{3p(cK$ zo4zX#YCrnaNlc-C>QoLGtT6atlP46g}bi{Nk0{3WzY`nfm+y+S6GH3Zf$uX9=#CA36 z1~EUxI%6%=suolUGcw;qCXk0TNzalbs=iL}DCkW~&4h&D<6a` zKIJi@P@3rYwX)YGGdNZrST%B3=Xf*ZNiwaTLspR;#@1Op7Ly4j(P8yS^dZDAVm-H$hZlEZCH|yB1U`Wj_;vvI zt$5&4ma8;roW=ukq86u0W=Slv;h2tGEey^pAMM3B=W{H_R%$E%!)PsH_>I&yhjRIb z9tUFq18{jbfT56?&?V=27~?VB4{X!@GMQL|4oQ0+<1yXutm|?CTw9^+Bbi~0$3mDR zVJT{MA_fM%)%wb!Msn++%0xOZ1-*e*wHqWq5;vMAIh$NUJlP~64W$Z3=%i0lF(-69 z|5m{a0)1@)%0`ofV=5p$6t5ffA=4Li7~=wZx)T0H_-AKJleowzW@k%7xyV#zXSMlY zOd7?2M10ubXO&@Sk!Xp)E&H%7K`@+BdaZ2;!=rhi^tGftBBI2xs*>IpwNeJcq6|>$ z0$O8Y@Q~o(8uk#ifF$jQM#~I`mf4skK&vToK5%%5_TXV+52#(a1AR*FJ=S-~7LH;c zSjM5|C|g`)98ICrvz2`JA`u_Ld^4@{U|NpsrTG|1Vfc5N)Y6+DyWWN}uS9;XqJE0- z9GX)N$u_=VYIHW0E6JRO3X@U)M@%6>+pTU_2o?=r?o5&mH6ck6*Mej@dy_$iGu|Y< zVl0_9s#;q`Q6my+cN0BDBv1SUk z&O755NEoP9hB9V9ad47?6r^M-3cwdoNXPIrL?uzU` zRAq6fRmm~Xg{q~acgawzmXl<%@CoVIVYia%ez%goFL`@iLubg9L6Ef*c-u_iITJ&T zy!g2a1e6^jNe8oEGs{Oh@HrbZww0YLm%RlVnq{+ODK~}`L~z%^Ie}*w=de!T8pb)i z6ZnR40@I{)AhctN;kvpzD9>2_B$5#ji5r*&px{`=XZ-ZnL@|WwL8*G;!Y8>4*rHU` v&x7Q@#I64aq~eyjW;S9xm-%_3=CnO{=C%G!5F(tWa@G_kWm?hdV*|@9b;iP$! zB_zHqRhXn;C^5TH@iK=?m2XlbXT#OSr_ZMy;A1d&Qjj(gXq-HnxzY^GU~51G01MtA ALI3~& literal 0 HcmV?d00001 diff --git a/tests/test_vcf_examples.py b/tests/test_vcf_examples.py index 07545b1..909efc7 100644 --- a/tests/test_vcf_examples.py +++ b/tests/test_vcf_examples.py @@ -837,6 +837,7 @@ def test_duplicate_paths(self, tmp_path): "1kg_2020_chrM.vcf.gz", "field_type_combos.vcf.gz", "out_of_order_contigs.vcf.gz", + "chr_m_indels.vcf.gz", ], ) def test_by_validating(name, tmp_path): diff --git a/tests/test_vcf_utils.py b/tests/test_vcf_utils.py index 4800176..5c1e75f 100644 --- a/tests/test_vcf_utils.py +++ b/tests/test_vcf_utils.py @@ -29,6 +29,13 @@ def test_context_manager_error(self): with vcf_utils.IndexedVcf(data_path / "no-such-file.bcf"): pass + def test_indels_filtered(self): + with vcf_utils.IndexedVcf(data_path / "chr_m_indels.vcf.gz") as vfile: + # Hand-picked example that results in filtering + region = vcf_utils.Region("chrM", 300, 314) + pos = [var.POS for var in vfile.variants(region)] + assert pos == [307, 308, 309, 312, 313, 314] + # values computed using bcftools index -s @pytest.mark.parametrize( ("index_file", "expected"), @@ -58,6 +65,7 @@ def test_context_manager_error(self): ("1kg_2020_chr20_annotations.bcf.csi", {"chr20": 21}), ("NA12878.prod.chr20snippet.g.vcf.gz.tbi", {"20": 301778}), ("multi_contig.vcf.gz.tbi", {str(j): 933 for j in range(5)}), + ("chr_m_indels.vcf.gz.csi", {"chrM": 155}), ], ) def test_contig_record_counts(self, index_file, expected): @@ -82,6 +90,7 @@ def test_contig_record_counts(self, index_file, expected): ("1kg_2020_chr20_annotations.bcf.csi", ["chr20:60070-"]), ("NA12878.prod.chr20snippet.g.vcf.gz.tbi", ["20:60001-"]), ("multi_contig.vcf.gz.tbi", [f"{j}:1-" for j in range(5)]), + ("chr_m_indels.vcf.gz.csi", ["chrM:26-"]), ], ) def test_partition_into_one_part(self, index_file, expected): @@ -106,6 +115,7 @@ def test_partition_into_one_part(self, index_file, expected): ("1kg_2020_chr20_annotations.bcf.csi", 1, 21), ("NA12878.prod.chr20snippet.g.vcf.gz.tbi", 59, 301778), ("multi_contig.vcf.gz.tbi", 5, 5 * 933), + ("chr_m_indels.vcf.gz.csi", 1, 155), ], ) def test_partition_into_max_parts(self, index_file, num_expected, total_records): From 76980778f8f2d913f291d77bfb25693f11c2ca81 Mon Sep 17 00:00:00 2001 From: Jerome Kelleher Date: Fri, 3 May 2024 13:00:47 +0100 Subject: [PATCH 2/2] Update du tests for added files --- tests/test_core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index c6e8f45..6fd71f8 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -192,8 +192,8 @@ def test_5_chunk_1(self, n, expected): # It *might* work in CI, but it may well not either, as it's # probably dependent on a whole bunch of things. Expect to fail # at some point. - ("tests/data", 4960266), - ("tests/data/vcf", 4948129), + ("tests/data", 4973315), + ("tests/data/vcf", 4961178), ("tests/data/vcf/sample.vcf.gz", 1089), ], )