From 1895a6c669c0db9ce1ded6519441a323ddb25b4f Mon Sep 17 00:00:00 2001 From: Amandine PERRIN <amandine.perrin@pasteur.fr> Date: Fri, 17 Jul 2020 18:04:46 +0200 Subject: [PATCH] Tests for utils.py updated --- .gitlab-ci.yml | 1 + PanACoTA/utils.py | 33 +- .../exp_files/baseline/test_plot_dist.png | Bin 14698 -> 14693 bytes .../exp_files/res_H299_H561-ESCO00005.fna | 6 +- .../exp_files/res_test_write_discard.lst | 6 +- .../res_test_write_discard_1genome.lst | 2 + .../exp_files/res_test_write_info_qc.lst | 12 +- .../exp_files/res_test_write_lstinfo.lst | 12 +- .../test_files/list_genomes-multi-files.txt | 4 +- .../test_files/lstinfo-miss-1genome.lst | 6 + .../annotate/test_files/lstinfo-no-genome.lst | 6 + .../annotate/test_files/lstinfo-no-header.lst | 1 + .../test_files/lstinfo-not-all-filled.lst | 6 + .../test_files/lstinfo-not-int-nbcont.lst | 6 + .../test_files/lstinfo-wrong-header.lst | 3 + test/data/annotate/test_files/lstinfo.lst | 6 + test/test_unit/test_utils.py | 798 +++++++++++------- 17 files changed, 577 insertions(+), 331 deletions(-) create mode 100644 test/data/annotate/exp_files/res_test_write_discard_1genome.lst create mode 100644 test/data/annotate/test_files/lstinfo-miss-1genome.lst create mode 100644 test/data/annotate/test_files/lstinfo-no-genome.lst create mode 100644 test/data/annotate/test_files/lstinfo-no-header.lst create mode 100644 test/data/annotate/test_files/lstinfo-not-all-filled.lst create mode 100644 test/data/annotate/test_files/lstinfo-not-int-nbcont.lst create mode 100644 test/data/annotate/test_files/lstinfo-wrong-header.lst create mode 100644 test/data/annotate/test_files/lstinfo.lst diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index c86ee27e..8a1bc6ad 100755 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -83,6 +83,7 @@ unit-test-ubuntu18.04: # - py.test test/test_unit/test_pangenome -vx # - py.test test/test_unit/test_pangenome/test_protseqfunc.py -vx # - py.test test/test_unit/test_pangenome/test_mmseq_func.py -vx + - py.test test/test_unit/test_utils.py -vx - py.test test/test_unit/test_utils-pan.py -vx - py.test test/test_unit/test_corepers -vx - py.test test/test_unit/test_align/test_getseqs.py -vx diff --git a/PanACoTA/utils.py b/PanACoTA/utils.py index d40eb9c7..e49c0b7c 100755 --- a/PanACoTA/utils.py +++ b/PanACoTA/utils.py @@ -309,7 +309,7 @@ def run_cmd(cmd, error, eof=False, **kwargs): call.wait() retcode = call.returncode except OSError: - logger.error("error : {cmd} does not exist") + logger.error(f"error: {cmd} does not exist") if eof: sys.exit(1) else: @@ -399,14 +399,14 @@ def write_warning_skipped(skipped, do_format=False, prodigal_only=False, logfile "current error log " "(<output_directory>/PanACoTA-annotate_list_genomes[-date].log.err) to get more " "information on the problems. Here are those " - "genomes:\n{list_to_write}") + f"genomes:\n{list_to_write}") else: logger.info(f"WARNING: Some genomes could not be formatted. See {logfile}") - logger.warning(("Some genomes were annotated by {0}, but could not be formatted, " + logger.warning((f"Some genomes were annotated by {soft}, but could not be formatted, " "and are hence absent from your output database. Please look at " "'<output_directory>/PanACoTA-annotate_list_genomes[-date].log.err' and " ".details files to get more information about why they could not be " - "formatted.\n{1}").format(soft, list_to_write)) + f"formatted.\n{list_to_write}")) def write_genomes_info(genomes, kept_genomes, list_file, res_path, qc=False): @@ -442,9 +442,9 @@ def write_genomes_info(genomes, kept_genomes, list_file, res_path, qc=False): nb_disc = len(genomes) - len(kept_genomes) # Log number of genomes discarded. if not qc and nb_disc < 2: - logger.info("{} genome was discarded.".format(nb_disc)) + logger.info(f"{nb_disc} genome was discarded.") elif not qc: - logger.info("{} genomes were discarded.".format(nb_disc)) + logger.info(f"{nb_disc} genomes were discarded.") # Get input list file name (without path) _, name_lst = os.path.split(list_file) # if not QC, write discarded genomes to a file "discarded-[list_file].lst" @@ -665,6 +665,7 @@ def read_genomes(list_file, name, date, dbpath, tmp_path): "ignored when concatenating {}").format(file, genomes_inf)) # If there are files to concatenate, concatenate them if to_concat: + print(to_concat) genome_name = to_concat[0] + "-all.fna" concat_file = os.path.join(tmp_path, genome_name) to_concat = [os.path.join(dbpath, gname) for gname in to_concat] @@ -723,11 +724,11 @@ def read_genomes_info(list_file, name, date=None, logger=None): logger.info(f"Reading given information on your genomes in {list_file}") genomes = {} if name and date: - spegenus = "{}.{}".format(name, date) + spegenus = f"{name}.{date}" column_order = {} # Put the number of column corresponding to each field if not os.path.isfile(list_file): logger.error(f"ERROR: The info file {list_file} that you gave does not exist. " - "Please provide the right path/name for this file.\nEnding program.") + "Please provide the right path/name for this file.\nEnding program.") sys.exit(1) message_no_header = (f"ERROR: It seems that your info file {list_file} does not have a " "header, or this header does not have, at least, the required " @@ -736,6 +737,9 @@ def read_genomes_info(list_file, name, date=None, logger=None): with open(list_file, "r") as lff: for line in lff: line = line.strip() + # Ignore empty lines + if line == "": + continue # Header line: Just get column number corresponding to each field if "to_annotate" in line: column_headers = line.split("\t") @@ -745,7 +749,6 @@ def read_genomes_info(list_file, name, date=None, logger=None): if len(found) != 4: logger.error(message_no_header) sys.exit(1) - continue # If no header found, error message and exit if not column_order: logger.error(message_no_header) @@ -758,6 +761,8 @@ def read_genomes_info(list_file, name, date=None, logger=None): infos = line.strip().split() # Get genome name with its path to db_dir gpath = infos[column_order["to_annotate"]] + gfile = os.path.basename(gpath) + gname = os.path.splitext(gfile)[0] gsize = int(infos[column_order["gsize"]]) gl90 = int(infos[column_order["L90"]]) gcont = int(infos[column_order["nb_conts"]]) @@ -769,7 +774,7 @@ def read_genomes_info(list_file, name, date=None, logger=None): continue # If no value for at least 1 field, warning message and ignore genome except IndexError: - logger.error("ERROR: Check that all fields of {list_file} are filled in each " + logger.error(f"ERROR: Check that all fields of {list_file} are filled in each " "line (can be 'NA')") sys.exit(1) # Could we find genome file? @@ -1128,14 +1133,14 @@ def get_genome_contigs_and_rename(gembase_name, gpath, outfile): # - write header ("<contig name> <size>") to replicon file if prev_cont: cont = "\t".join([prev_cont, str(cont_size)]) + "\n" - sizes.append(cont) + sizes.append(cont.strip()) cor = "\t".join([prev_cont, prev_orig_name]) contigs.append(cor) grf.write(cont) grf.write(seq) prev_cont = ">" + gembase_name + "." + str(contig_num).zfill(4) - prev_orig_name = line + prev_orig_name = line.strip() contig_num += 1 cont_size = 0 seq = "" @@ -1145,7 +1150,7 @@ def get_genome_contigs_and_rename(gembase_name, gpath, outfile): cont_size += len(line.strip()) # Write last contig cont = "\t".join([prev_cont, str(cont_size)]) + "\n" - sizes.append(cont) + sizes.append(cont.strip()) cor = "\t".join([prev_cont, prev_orig_name]) contigs.append(cor) grf.write(cont) @@ -1227,7 +1232,7 @@ def write_list(list_names, fileout): """ with open(fileout, "w") as fo: for genome in list_names: - fo.write(genome + "\n") + fo.write(str(genome) + "\n") def list_to_str(list, sep='\t'): diff --git a/test/data/annotate/exp_files/baseline/test_plot_dist.png b/test/data/annotate/exp_files/baseline/test_plot_dist.png index 12028ada154d812cbdaaf18975867c4655446b21..c73ea07a1bef4986377ffbc35a205c06a35b250a 100755 GIT binary patch literal 14693 zcmeHucU)6xx9&z1#sZAvsDlVtC^kSqKsq)sNU_k7qV$f0A~h(Z=pdp&L^=*4ozRgI zf}&ugDV<P)^d12TkPvd$cE0<4=iGa~f9_xR{Lb)m29lk<_q*4-*0a`n){}>q^|W}l z?c0VR2oL7s`Kt)RIf)>fs(<2wcY+#Y3gOpg>{*P#Pw*4;lkEd|z18!gDHcKaZlM2h zWU6O4!JDVOFPL~=^RV~!yWwSrxZUvfboKCdb-H=L*Uk&;<l!zWDJ>~`;((*Kx2KYn z)W42PdU!cVg(A}D5##`ZIe*6BcH-=Se}9Zm5_@5V=i{ND-e=4FwC7=HyoS5u&3LEZ zM$aC)o0gRo7av*HW0J|zK2dI;_)JXhQtg(NxGTlfk9DW8TNf1$+NHfY{Kq!;&=V6Y z4T3#$JjXn-DYUl}OADv@RH)d2iJ;T_V`IC0l5+B|W*KaP^&1#QIP<}8<R@?V-;ui- z+z1l7>8}@|f4?~LZ)~7eLZ`S8IyyTI=a!?CrZeHMF|v5d^-}t1wEpY-d`T@qAA`<u z?QHfSQXBqMgFE$gXJ@C}n^LECv4JYT)+YzgdCe9!jMMu(S{KI}6+XJG%=IY*GwA~d zT8S_7=5f5w^37-~->2HI85_qIw<zWGS_y>m#Ts=AAxQkY-?#3<i!{bcwcg#bUB;y= zcYWHJ^l2>CDp@~dF*;7g->a<GK7mVpsnKdf#)G(QBv-GCPYj%|SfiJdk}Qb<hHm%e zvgm=Fh_L1+!O+Ne({%Ti1O+MalpwEs(gq`FTsDPV$EThsq#BSL5R{Ww=3Kv3pzmgc zMA3&m<_13hJel_vw{==)u|w;1Y_nXi3xT-UB-=U~uD*UFtijkX*(d!f+0fV`=0@H9 z^uU#Qr@k_`fR*_q7QIiZ+cMA%@5k#OGM$yN*l|r??)3W&TB{%-C#TUea4u%A<?VwF zLV>OU%hNH>CCtb7l2$L^SJa!<TX7p$UX^dP-cxO&bA9ElLV>eSt=Nq2$TH=b+~TWX ztf}@4g#cHdiIQ&ElkA*)ZM-XSxhuz>&8%za_U2{$vHQ4JuU)KNMv|{~IImz^O)zt| zq+PG#d)uYrn;-tr>>~v6jkl(xj67DBG<Pk$Kq+yGJ`=_@_USm``W%fppf<@QE~+X| zXQs7P`O!+;2b5F;=}lk5g_B9^V^WdHeuLHiimdPL?Iaw##Dl7qr0l21-e6%nu4s)t z6i>c*mF@6-N*||$@uK%SDBPawBvO$Awb?UZ$*w;yL?rr;$Hl1y2fWrxijR=2oKz*P z&!(`~Qn2$C6ZSKC<t;D6h2r&+l<emFEBq&4hI_G=yB(TdNKf^Zdx+`0coHF1a~k6X zR?TOu3HtRHzFi_NemdUk({0iI@?7}-pl@%TzWjM1fIhocgKdqI6|b5rGcTitF!$ru z7vhxt-1CWaO3*wurSaL(YclptFMNWuruDhstc@oueLNgB>SDz-Q}UZn6v+t*meSgP z>-=k-IQ$?naH7X1O4q`|!o70pb>i*ehZQS>fwKOKqnhf))^G35)-dOt1}c3tqlA(~ zFFhY+%n(|?XbC6nkuV#Ds|K}pucy*hV7X7j_|#5cij{2s`1Ei)!HT59oGq?cs25m5 zO;Ev{uadR>_(X(k?m>(h@T00msc+n-!yk|KE6T)Zi|VT`8@N}DJ%1#mswB*s)WQzk z;TFT>S59Z|5LQzOoGUdBt3XmQhDhpt(X47Jsb!Cp<#ngdY(r5Dx-}-HYC)^tG}Zgk zFU4vTEQ!ZFYS!%2Cj6(;WB2vb+B5Xi^;6Vn%6&Vv_Fplr^ma;Xh*DYnsEz-WsE9Sn zYmSq3=xj|2VNCIq;b3Eq<(ZXcd&4djd47FoII~MkCsQP=KX_#z($Z#ZNzi}vDUC_N zdHB$p!P3Vt$HKVz@~bx(GdW$<+ar;mKEFJ79CIvMT;Fmo(6dRp!2rUb%sIF#*Axv1 zBd#646;QgZl2!spR{mo#+7m5F$1SRS^P1x2a^Bjom(<zUoTxRycHf5H=5)D30KhV{ zGLk8i70*wYUiF_V?KT=9fj!|Ku(H+K;!U-co)44}0iwJc<&{f!J_a9a888)GPPL8J zZ7Z^C)SQ9+GoR^phhHt~?DZ1oQeW;opXrXHROaByzTDu&=#-{6w?>L!yZgP{bzM{& z8I{9-9LJ0764%p#Kr_lKhiI5c^0UN`PN=i}(^C52W7x0v_R8PZ=jPr20^Lx6k`@7v z>cYWd)?zHF3E&Nl5{IGMJ3K;t?p1S(V^TOpS%;RUv?w*oP@(S}MJ^z}E6?1axLMwu z7Kd27DZUpB<-4byju)W?y3uZp)Q?%Qm#t(yhidl+Oupn-3%c#w=T`1L-QkpE2@9&> zsd+IUwok_Pqx0cg1S0(tidV3cHMEysb=~S8@uSfaR#$Dj56(0zbB%*z@&U6W!tC2h zzOy}!L_gXf25)Q?TzcjkA%aOGsbdzwqjTxoIfVmf3q{DYy~XIwQ3oUjEPs0caF?u2 z{i8>}Ngt}B)dc$X-)iXGXyr#7c~Kx99R3io1VAH#sq~|5B==c*f4Q(Wsihu&oKP(T zc5H&v+p`UG(9`!`_dLT`o+&pX5Ft1T?M*I9`oUvM64&2&CMEj?pSb?!q(!ASW~M7o zK(!0b@t*0Dkg8d|nby$d?aus^wk>{GX<3T2?u8$vvRPCrwkjrYM%j1v1(m(A5_j^( zS=4L#{U?$z(^2@-3-5Pf$KxFAqjlv?X%%4S(_rVwLE_3_f?ebDbcfbt8uNoCxM&l2 zS-h5zzsS;fb0d#J|C2CYr6c8o0WNeZj(x(k$TrWC$)fiV$kqN!MpYZ{wo8d%!c-X& zW*zLApxz?8$TF(}>$gh8jcM4UqlD^3?dOsfl*vN4i`~ZhTF$_=!Tm-x!Vq~EkHvi2 z=Yhj@yj5SXQy*PylxhX=d!f#n(qD_kt;KqSb4(;+^;#4s60_IUkX6rJlZb1FQLf2T z;q@VI3?4FEP+mxxR+Nz9>QESVz^B`eMqFtbiBeBSFOnnV0qcpLWmzQg%i51FcUyUs z)GW4HLZDif(w{~JiD1xlcZ#wB;BPs%L{NDY=r%5_a@!NV1!UWdFE%UvSpDZxRyEV+ zZ3@3btop%CM93aEW?yN@U}a_Hj(w+&pjmgmawcDd+^j(DAM%NQ-P_KG&@QBM7JLa< z_^=n-b0gf2@qMb-IVW#1i54`WT5eTji<t-i(VPLSwD5Vb{yM?E(wPWhi-J{=<Ci~Q z3#u%KN0S1j=FvP(juy|jmZ7IOnHpx4N2^|H!QjE8ir!j&cS!cPj|{wP9O5+CuCLCz z<DkTtOp6wANW7&v7%-D(ID?iCHLN)gam-`I@fUpSK`t}IL7G(X)TOEZ3T%rSb5;e6 z7b#>_c=JNo%=6ZGIhRtcqq)}uS5mM=zO>y)?)C|Ik}D=_AFPiX<$nA}#nK;0Bl<Jv z%8Qp;lpPjF$uy?KY{VXkPiQ_5VKM9`Kfi=Tt`7j6ST$dH9L0sqD`_&;wV{T6wBlAZ zH1!p+H<%8ssw+AP@@|U{^@9!_)_Q!MMr}<FoR3RkuTS^`{1*Yn5h{OVeHo{=S6tzE zT2Z59FSb<`{;k*o9JLGv(vtj_#>dL~+#5q+Nfgx?$U6EV>m$MwP(XR{DvmyJg@Ouw zUOlC$S8^@ht6#$>o}0UqA*;rswLvW>pAMS*YSzY^Ag4lrlK~35m@746wB-;#aK!f4 zx{YVJ1)o{?4alKa=i=G-4EA4TsY$j19%Bi-*I{A!0|t*iEp@eCp$Op4F2St~6D6d& zI9uF04)?==g%%?{f^0fx=rpQ2iYId2*C8Z(MS>aMF|dNir-vUv(dZDy3gKA&IeT#T zpyM{&YMt;gFimdI=wack5^xe3$Iq#s&{EMkYRhNW#qt?fLUkzsdt0eX_a`V#kD;a4 z7}TbO6IL}rrRvOimtYc9Z85M}1k~!rP{vs}!84_sm%GPU`1YoIf!`THYH1sq_>sQ0 ziYH#4>dY}thq#T*S4e&(1F2Q6Wy7-jj5+}1@hCNhqg_Ylm6(<!Wn)UIi!r~vtI617 z<$04iERivt6JrylTGmDkT+s#qJ08ZxldThf$|<t@48h=7%y!8)sYP>TeMV;G?iQG~ zp?xDcRlEV|SF>Qb)r%f%x<?GuI!5Saox1Za>^?p@7;{Oo!nTyQ6iDQz`AN|I0!b@X zSF>>J8b&9<2w>D0eHuH+B01k_RFX`{`sycLg@F?1DlOsQlEq~Ljws_j`B}T%ec)K0 zd3iqAIK%7X!El2gQAl77h4C5D!I88Z9t$4#@$qq~o>xI~qbjw>Ji3!!j;}MiNi9&) z$lm@o^oU<pOi;=DBMLLSj~k6@sWYyWHgF<{wZu=~_bo$Pfo?mw;6|N{8>PgMI<y@@ zG~BNG$T@#{b~H}G!!qPsnDDC$5qtcXnibHB6*dt;YL$MOl$%x{t?5EqdNGk4D9K{3 z=%e**p-n^3*l{b<<t?Y7!|UL^W0|%A%drP+TYI;+<_TJjEA)Gwd#uKoyzbj;x7HS0 zV=%V>pEbGEa)xk4nAy{?_<ooE=<mDqKBmW7RywoLzg}UY7op}GLo?xS@y)P&w>P{2 zk<MUmyzOjHAx{^d`uaE6w4*Bk{LY-g0NxegKkblo4O=;&%U(TUR&qpNZOs-OjRe^# zk23P}9n0dfey>KTVva*q?OHIHrWGYZ&LPxPQc|kmY+2YC9yMVExh#-h!<c>~ppZ|o zLC?C+D;N)<<I)-XKJWYbQmZ(cL2D=}GkxVW>c;eOj4!p|oV&re$eKHP;*Q@=rc&xo zeS?iv-D}|u8<3jSl$`WPIOTe<&v0>l__Nl1%^w^h$FEL1xY1{O`PD<#sdAe-#Xh@q zpauBDUE<?t!htO(I<wGn#;nvi21;j_QuURV8*^3S3pldc8-7ep0tY7-)T{?Pg<ju* zvVFJ0E?oezR|V?EDRS`<|NAZshvqE8*w2%4&QP!^0t90}>^8QAEkX;H*ZL{RGX#<v z3d*Q;oTGk%4L5^N&jOY;8kxJrp!I>mfcG^>6VcE$9LY!ya4yyQ;Z$3{?<*sVCD=mK zh-URtEwH_{YH21Nsb%=Dq|>d>EtsZNtmb~X9@OP1spb-Nd6S@XB5tVtAFogC{cEAZ z&;F(Z{v90v%*nqY3jW(4`MysMatrVqd2~9H%C4b|fd3RcH!}=Gx>}EBB;u2NDOqpL zNFl83Q#=3~Guo-$Z}fwwukot}c$YT7JJza-?}HD2gPzX;9J*5oyfSq8z-O2K*qZO@ z-m^W@Zhd9V8BlSltbeQP6xs@heTIHs+1FX=GqWE+mS6hjdHC}=An~b*6kLefa+dy> zABkzf#V2%qZYM0qWcwzfCXaypiMsnd<~}$`E9lLAp>9gNRnK1moqYCbL0E}P<H30W zw7eMGZnb)L9-2}#VNgckP{E$rGt`gY=LeKMNG#vMprvM5*@9|W^#O7z%}jt<E&!jA zj#Kh+hQ^#??e_KEJ;V6>ayusl;>440AA(2F1+X1-Xl%zna(Vm|p5d5)vaf6XW4AZ5 z?$Ac?D|tJ{h~4zrKWv!>eO0=q3tGXVC3X<KI7K&N&xRvBi1R~LfB@*`(X@4+N8mXS zFr&~YH@Ak+i(R3$jyrwp=0B1J#I+9;IQCFHReXE5eF+i=O0uA(tn7_<zki?UCj4Ws zHU~s1jXk^O_33DaLEsK}*!Ty#V&HD6$R^Y(+YWsG`^EoQ8z>x7UwzAm5@%3Q<x|qo zSa`mfp*h^^kkZTtwIgs?HGjR$iw1ps{ZvLuGFrx>{NwuCQjRL`=Vz8d(xwFmrn;q^ zz6>XoLZ!M#Qb%o#Y)c?!D;)w*&`<T)3<Op(l<E>_`;Ky@bGJfY7rB~xbzC8LWG$~S z>ALPjIscJJTWatOP8VNUY1@%GdS760>um^($83DgE%c6XW+K#v@=!Abmo{+3x9Q`| zgeR<6&WmZnq7i#UfmKTMHS?<HCtGdc*B7G555>E}nWC`<fj1GPD-7am9!;9p^i^Z1 z>*ETYLR%c2jjuliCTmfOeHF(}6V~Nb6j}RZJk1%Lj$?I?7B=9mSjQB{o=rDxLuvzx zxrz6Q>x+$RgDW+$w2+mmDPX_Qg-}MK#Ov-8vAB$+Y21KAN`2paxfM&u2NwTagKz_F zv?{%)hO4nXCZ(=%xh4xOEZ@0l++2UfLRo^F5w1lELF53S<faKYmJ7st%Ghv(RLfGU zI>|X0S_71EN)B2iqlgKOy=k57^(Uhgzr09RAJ37qv_W#Tehrpn)4;&C8LtHI(pP!< zeYV#%jRPsQo9@Vpt6Yq5VB7x_w&bQ^HyTkc6i{LGS!W`)a=FWV0;TsPYgUKlRO*~C z^=_Z}?<|)rwUAX;s<F6eUA(l75oxYjLA+|VO$Ez=$T>iv$FE%WwdD<T$hOe48OA^8 z+zAi{tYRb7&mE2|=#ZPBcu7uK9~EEgX;3q|M-CNR@?aaWo9Ac~W3kb@^e)nSZ0cG! zBeh(IKf4$}iR}frCU0g{(GcVl#Uc0Ft&h>ioTA9BQM%>VRS?AWa*YTt3S8EF3{86| zc{+svz(Rej`M`w8uNtY4VKg{4|K}GPc_w8i;F)c|zGE;i#mZOnAsV_s2$bIZ`fjv6 zMQ`I<ID<*3Zu_PN+n}4u*@4#{BhomL%f<i4h5r35{=*S8QV_LNNejFp7T>HuAW?~d zbxbI~Pu=0Ye0+KuuNFL5LH+Z>U+;y#`L9;|A1?RT5^C+b5bc#78*+&gxhakeMK1XJ z97WXJJ#L3k;(o7-Z6sW2#%4&ky<xnUv&R~v2_bU)r%x&_HI3fk#@jXGp}k=*-{GF_ z=H}6^S&U)KaUhwd^i<Ag(@d(qn67rq>BEW(?`<O__Ib$qxi7Y-h)pCXe4mX=^f4-q z+KOnj7jO%v8W>x~C3;^C&Ll{-L^GCYI8ul{Gsak+7K9+Dj>7Y0@3qn?qi%$`Rk7&g zOHyt<{s_{Ox>-phi;Gt$ze3EkaF|%0V=!GVa5B|@d=p|F=E(~O#dIzLRn+U0G5Tf% z|MJ`npOZ(sm}$|gjrFB)VLb?lQ8tS-S}%~JDh-Ho23GA10HMBd7}$1(hZ7;krCK)~ z?^@;N)hI#c;6BBE1nc34^%S)R=Woad`m%gJPqYM2C{3EXKAnAI!+QA*HbezJu$^m% z=VL{rR#Ol5{SR1ua$tT4UqCo7h7>}6onRDr49%Gu+(&vFrNnoMci}EEms%BqaxX;K zc3VWZ>s{hiDNtJCKt9`Ex93SU0BC{zxbA(WtpdA9*6*T81Bp7iIub1vBJDr?aP-MJ z{4TvrdI^H$iU)q?(Dq?Wk%vE!AMulUl{*N;buSrv9T^8bFXr5G_eJ(XR5UFpLv0B9 z;YwLfq@V>x1xGr;6y&Uonq3nZXxhC7{mBGe(mIXZB|N^7=OLP=C9ckLTDISUSQEE; zhhCL1#RF{ICSJ>6%%rp?th7*Sm}0oKOH0jMh(<I-LHa(G(-n+q+qK2}Gk}IAu5qr# z&B`tw)mLzCFc@@xj)ir~-8j2g(JIQxEygWqAUw1(V2qn&N;ilMRFR73R98g$JgkVO z9Egd>CU6qwd_sQ^mZdlj?Jdh}qJ^wLiUTsP&W%s;)z=@W_!u?lgt`?jaJdseE0C@; zJ-yFE!TtJDE4nH#@Ck|4k6I8uR>6tl<Y!084(ff?vS8*{o0V$oty>Zq*ml%`|9urB zS<H1gThMoPs1GphegY>FYJU(Oq%*xSUMh#~Zc!!1$>sVl_k68SphG8n>M$@?{{@8n zUjYe7({WdM{x;jKMIBcMO)ZLG9L{ea>?AKVh<j9jSuF%+)plhd+9Nn;ZDJFWO8&_^ z)a)B>L*J{@5O!OnJ%dynvhacYSXm4J8w}Q2&xu&GV9(6HCDBCqRG)dzC&D<6Y+Qri zA)LJ4t}dFUDOzkVdRWUSP#Y53xf~eZ)^e9Xq;c|zq%aI*oUUFqj@3zg5ww)hnY;rD z6$4n*$h$pX!61(EslC|msnM3EwaB8O9#SgKiKK4*v8D*C4%!*q>Q~$8j?uEdX+9VA zq0rG+(fHsM-Q?gh%C${Mfe?hS#A(K)ZItSCT2RvqMk(N%OFkyQN^)&Pc!pC535^#; zpMSTpNcmtqq@UHoFKIJ%!70J<I7tYhM;hFjTeJk12cuRdpXwxZoq%ei;Ko0P&6&H) zsg`UzPuP|`Gf@e1`5KNN@T(#5=Ii~@r4;?~=1aWFCrk?uLC)DA^p#%n?Kuc}I9pZp zFF@$Hi*42Q9aM}{lvQNTGD_2;^u+Y_Hoj-r&iC8S<mD(IhR*@7+HqG)IFD*RSY*fg zI)^Fpc485NnPTF}FB1JM|Fr=HQ?G$NU&akoVB22nvMiTk0_Zy9O<66j;38X5*kG&> zs1Y@7uz4hgA(S#jw{OYVX%}@GbtdpZk0XZK;taQFc|RUzE;x1yTQN?J7~%os8Bo9p zLCiiw!JY8A_G}6#^5v3+B7*!@j%oqEaU_NWu-wvHaW!h6VH&bg{hgEdq8e)|JCYl^ zb~I~EUrWg*aUca((4h?^y;>n?)L;^v=+kAoRwumi1f8G)DeS}rlNFDk5dnThFI(sw z;@@RnaVZbBuweO51Nw;o8<ZR{orMmb6oHp39(!&+o>xBj31#lDq-ol<y}~HYzZuC^ zJycM$F`|wybBk_P&_5QlLaoVn|7X(fc(C)Z%+2=?xLm}Oysx5M%yy~j%c*tssm6rS ziV3{?0Fdqayvn(1T^vZy5k>Cc!{|&2ZMU}XIgv>}qH*o^c--iGRrK4iXc|s^YCw6d z$pJWO9ZMXM0zv-yJv1?aKzPaGHr7=(R!1r(|BL{DEO$r=rf1PjG_LWhsl=T+>i_kg zEu{qJSE%a>sp7-<@~zDMe?VBg2H_UWe9eFtN=942@9p}nC?gBR;46v^hprpcH&Q_d zE9ey4!D_!hgPUFeHJ42X^gG2d0X;2LjB>z6p#@pW47fa6j6xA=F;^JMV@Jl1wkXe# z@`A^k7*bDxbfR2?MQ?^h_rbWS7)om~%*KK+TRMd`>mchj`VmIhoLNheExPW&E_DjM ze2tE0%-u>2OB*tSj~j{j4aDdq#RDKLc9}0v{K;mL#9^%^9#(T(IS|K9n}QF^27I4H zhqnHpBY?!uUg=r9E7D!#;x<clKP9h#iFhQeUPB4%cvj}7&<r{N^H+o%DoZj>Mzmss zF$#s3g`aN7YTfAPb2Fq~L1c(C{=aH&foBw5JAf-rdT!g#mEtf`c!2zeh_N{T<uOy| z53?sMRn(RyTK7)QI7g}Cedq8y5914MsEW^6QyDWJIkL7Fi@{?%9s>tyKihL(c?1T^ zPf=>t>5!3DYIo*ds~~CBfbWh4WiZcQhl!s~mG7JkjLVMcmK6#-IgfmKv`0eQ{mns` z0j~_^U&TP>G}f5lfPzV)2H{GBFfIt>0;RK1*r9B*<?XNcbdpsRaT`l1*tc8uzB(7q z+bOiu#LC#xbPk9GM@=9KB7uO7wF>c<Bz|wJhDyE{QXNX}Ci)Hf^UFCK{D_jLfUlsB ziGK=r2^aJ=;Z>e3aE^j9Q)8FY6)2nF=MGwfUicYwL&)X7UTpdM1@b?`29{|;FilSM zZo}}Oa=c6__ejYrqZ|tt3UGu0R-tng$jEAy91Z6<Occ<GH##K<T8OY}0yp}B%(2(K zF!y)x<I!1bFU++8U%`!B&Ol7y7#TQV`mePC=Vj=Ej*@^MPeBRi=^R4-Fe=l^l>pcV zdZ^2B4yE7GSyFGYgYgaY8eQSo&kBb?3<2}kOVCSYtD+(|7fh<hc`^OuM07F$*UX|H zL9Zivr{?J@Xe$y4A<H?vZsh}Bqp0KWN3Vm2>ui#${&tr4^iU`|i-iHnrN-wHIjX<p z4lD2M(yM~Wb}Ho@c>nHA#Cn)JpG;x)PoVSvb&we~!@O#Z1NSdX`uyv-nA6jRkp|<y zC-E|;$&W;^6D`eqI)%899!H%dB{>*Sx)qdxSmKy&T|A6J%5E<Vqr#3TRk}&YQUcW$ zQ~)ju?HQu{<u5hEM7HB|FM9dhO!;QEN)Vwo!(0)%q|dhXj|CG7Z3N~YB}m(7LMAdx ze`q2I=7K3~vb0V8=mwM4DghaJz_%}ta`^4O9T#O@QO<Sz41E(4WP*}v6sL@_E^`l5 zf`ZQCN5Smw-`>>y0YUE^0dRnW!C<sV`GBuEY+RXU)OOgCP|6{g48T+<M-@;5(f#Q2 zH>dVDulR#ob?E^?%hRp9#BABD6@3`jqg<QS`taUPA(-ri`T!H&12h^IWD2%W3#`2t zSGzKI3!KP=FE3janQ@PhDEkPb=VDNKOn}m$MNfIoU=A2*jp+Fby89a|`j2=w3J zty3I7veJJB;cudFxmwMyK-{(bAnY`oU#JH9mTC=IY-WUaZNcORcT;*u1HcHEsep2A zii*?NBNdWENvl=bH7RTc5obJi5RPJ+Qq<WVw?cgI9GyZLn(M}Rno;0S8_>uhi2NZ= z{EjU8&vNe$&Zh$fG>^X|Nrs?Y{4tC}BL&rRVerxVYKlopKH1~I>LEcIfE-D<jWvG| ztbrg2lwR18NCh!WeUz{i8bQv#L)C`P2uro#<EdNU@cw632j2WQe9zxU{$F^j{|@JL z7oZYCb1K6K9fpFQL9%ML=n|@+`Nt$29gJqCJ^plWujLC=04A=!hC!uUI}dA)!oW6< z?g-8Sxet;!$RSbbsv_ugmLQd$ZV>dlzQH81m0{)ujKx~BbCgzbyxAPb>XR2roiGpn zTC9nMdFk3}?!sHhQF1V%FLCXaf%E#aOUvB)#$VVxCbL$kDZux>{-ga%^*U$D_*ZpX zCz!1Mn50Iv?h}3lSp;Rl<@;Qt9}6nWn@~Y!6)l1$mK^Clu(+9vPzY3DC+s2i9ZPDw z=GG=sJm!DIJRelB5->Q28#|Q@!W|8xfUCHy{9_=e3*~c>Eg7#l6R}sya(Q*Sde{7l zTjm$}tH-GXwzf=Fd{}OMF`4Z_+pzgFU-<N6(woFMnJi_ABff8+Q#O%vM3(t%bh#r7 zbh#bl^ls&fJ;COWi}hL$I(IRoW6eyBtkvIF?9Vr<X!`o|`WbU|*M|6sA&njPemiu3 zKUIB)*coQXj6gMY<?C9Hd%?;?9i1{)cZ(26Qui=$S9*WaHKb*>Gd)d7G;4aYlpycq z>T2L$5!oF->Q(V8Z@6y5gVSYd9@kx3Y-X0~OUF+*v?Q*LC<{;ndsrcnWYxg)%kUG& zdxzu!H1@W8XBhx!T#>>`-WQ>X{CtO#`&mqk+~~xo#>Ud1wZ$)gK*5jHdfD081q?qF z*J%iSaVE;7*uJ?<ff|(d`0?X20v|d%I(F<A)(Z%z{PgM5vnNl!#TL)PNk}3MMA_T8 z!nF1E6$H27thrRr!bcrj5)~<xJ~{HPJqIwAUSogYqoc<bZli>J9du>zgX5N++&TT0 zTiC$Yw@lu>KNFfG=oRPNKp04{KA8y$&A7(^mI$(%0IRVc7#KJvDQRSC8b_f}p5gI- zwzXY-o{1jU*!(BTLBaHtW8(*{jUancQ#p5zuP%%nJ96am)vI_h-FOi~cTQsB{w<q0 z+P|FSa_ud;aOKJsZ9P4C!7aRK$p(&c(Y@Rpl!FFdUPXhqM|j!`hfabO%__aKhDS!^ zZVjIw07>exqerix(rL#B)=5c8lxqKC85x;T->Tiacc+6~oiA`6^e!<^pT3-&bgUR~ zS6v$g;<wvMOW|bh2eDfkzoV)%@9&dW>>M19i;7+VF={(_rZ+WsWvZvp=2!7YFY4<x z*H|IC@u$e%zn=aK?N?TA?uoS^wcUI79#>K_Bdsq+>=O{s)d~wcb?Vd+IXOdVX=!-Q zCw+59K0c-L9)r0cg`EcBm#pB<o!O<OhN7aPivh$+kQw`|e*1c()y37d1CrbBUAqpG zsDzdlJ#<LJfNIas+&l-W&M0?JlkmH|=U*((bi1@BtA<(DVzF2`?6?kbbvWYK@#C^D zc=gI|)Nz51SBpp_I=i{a33BtcefjbdR60jwWv_uTyedqF!k;cc7S1LTEes6}wY9bJ zB4ZO1$M^HjfOPq?xp@KzkoVLb-T82Mc(|j#KewmHB<b|6vmjs-qN~8lK(IG8N{)uL z<6m`j7(m&48D!)}At4)%_4n_sZvW~03lO96ta<xDfWzh6!16rlciROOz0Si50zkQ| zqjiUKuD4iALPCO4WEXK=5{in-SDvR`mG<n<OiMe5_XdS4&%UULi0H<#oAh7UJ&_87 znLRx{?0`W~yMLcdi{gET#h4XX-vMb!KFI#OMGhZ2d-*d5_vFmXwM&;GK-7IxY1{tH z@^a&Q_wJb$+eg~BBswYa?w^EFP8TeH_qKbdZ0dJh(9&{&`Evk7V&}kso08Ce5eW%h z{Il5D3|b9SkB5hc0pEux31<4j(r<0&(=<5pYunn=#4-rxb|^lNkI$~GRN~CC7TBgV zgp7@iK~R5cZk7Y@EwO3fO;QasN>5LJomTsDdS=FFdD>7FCPfgF-S8A80n5&FJ%#6T zjPhgz`FQc*oDe+0%bmuHYe8UL)Wf%J-@Xjhd;DYWa2I#?=iY7~>Qg>^_%H=3TL|Az zjtzJ1-f#;(@B}$?aaX=Y4j``c*r%A+BegdqufItJv4JIg9C(rJsnedB;8URNngV_6 zzMfKaBF6xnFU3yi#XNiF2%B~P+PkB_{PN2w+(wXuY0-J1C@pS2p=VT@v8!wDwr$&V z8p6UXYJyDLO2@w8<Y7X82YX-nabXzvWC3(SN5R9u=d&|2KLht>t@O)&duX`4ss@go zIB^OL;@hsPfWd?GTi$Vfp+UGCbaRFl7P@B|NW-8c1OKVnbSD%-TGWDh@(6e%Fe2?h zv5hX=-jVV9wPjKlkHV!voU%S!@wY0IVczqZp!wGY4Z;ITU*GbFy;hUZWhw}6<-KNQ zMF6?(r3)7>I6FIEJ^7mf{PXiG4+seObqANv!tjN5>8}WjV1*leGXrf>j7Vu|X=H%d z>GlMVnxiSL3YS}$DC~e3v7htN&Q4MZ5tqeWj!#G!^R0?g<=!Ir;5@({;O*4|KVP=9 zONX(rQ2Wy%jhB}%e)Tkg+iL6R?7!X7(Q)MGpN>dNUx7e>ZS#&}p2QBpbmv#h$jQlJ zeknH|vTAQ@`*Uz`@ZgB5sw)0fjxph}ugIf=e*L$?L86fjn72dG2x8x~*q;C5mcGlM z@BGx>*O$}Zf8Eg7*o8Joj92!{pStLoIXY@PM<wAt9zJ>$V@WV(uF#AvEt98Sc!mPB zV%qX8s<<Lapgr5C;ND-`(pSrNfstc&PR_LhKmT@W1JoJhF|VVdq8~2HJlcKf`H3s= zO#Alj<ErNsRC=XxAa!=uHH>T>ky%qyqt<>9S*L+qJ)_$SXJKk;%IV=uC=Xim;><ev z{8f5-7T_9)=EA0qcs{AFRw06R^%I%=<s6^ttGaMLbj_4Ex6;-_NK>~xAAq8B5`gG= z!^o90XYQy7MFEB_L!IE#94{9D%&4x``OWZ!n*`Wj$X&1C%Q7zDvk)BkSMT1P{qX*M z=F697)Kc^z8x|+~kMsNZ`Z|LM?~kacZh%YEVcu;@KRdOxwSAu}r*4eh3Q3o4J2Ls( zRxtJ=lNA6$Y1pDy%CzchASgwpIyzcmbsb$@(@=F?wy;P-1L*zx^Lf`RuC}T%bL%5* z*tYA~ty{M$1=42=8X%RX{r>xAOdEWKBNHGwCp%jkK-$>e{<S51pWz_W)7tf)8F2p0 ukDOV%=~tBfK#&s}f4>1{`2WWqYz_zZ)--|cLVI+3Fc<XB=byE{`@aAUK>&;Z literal 14698 zcmeHuXIPVIx9+P9Vuf*>PeqzaQ&bp1Kx%LpVL$<uE?wzedJpQS=qNA-kxo#0M|zJH z484aQl+b%gAV89{Ugz6;pC5alb6w}xIcHBU6%&&8eV+BKa<6;cd3r-z{m}js`w;{= zr1AT2w-AJF8bNl~?%e~g1T{y$g&#k7{;Hw37al=-t)Ia2eeS;-dm;#z75aBq7ADgH zUX=5?YUHKsX6xl^<za)kSb4cSyLmY~+&SrE<KgMx=6X_6{G#|T=TF*udAZ9=Nc_he z#N9mXB+^~T(Fk%9(fI9(o?qhpi2rc3cM@x5<ItDWdq3q!?mh7|G+xEk{!YBZamuf! zAEahy$Hj-2_Zy`#G|yL@5KY%;{mK3PAv?9!5J9f)7MowNM{db19(a>Aw&#mS2zL1X z$Y>=ZyFpJLnp#`A%%w>79GMEb%o`I!@=nSvxRtH98+M?lpX<m4Kass&@XwhCDjWzB zy8Evup?^O)^IzCNxs*2S+1}IJtG_^vR9yW6|C@L!nru`@`*cJnt)M_$o!{TOcTzKl zHHy3od!fRSlF{4SD_vRUL=smDq_q@Tee`I#*hqP0;!6DEALnGgIBhJEF^ugE^2zqP z*ZDJ8&X)zZmp1Z@OKuq&#w-Od1-3E=k)X=bY#M@e{4Ams|5C{K?VYPf#Z6=7JSQn6 z0@GW(ZoMn$+Y$Vio^0I`YM+VhW}F3M*3f^gRW|pcD~>W>(%x24Pt9MS>CSYNs!ebt z$Rf*DLLIy85BHV&dNOzDax-aB9t&lp$!<K;i{EeT{`zdM6Ls9g`R!FH?f!n-1eaH> zGDCSe|NddhkG{z5ywSSAshr$`rsTlIxJ-PAJZ9%B7ms3n##-LFXw8DkZ+|>Le(^Ny z`ua9)p}WG3JYTa^U$r%x8&w<pWi3iaIZhNGRQ&!F4U1j$Gcz+oEvv}dS(BTLz1x+k zgD-U*ktgg-NJQ1LtR}I{HN5A$T^wm96|OV=Z}0k$gEw|2WeE<YBOCP_b#M+geR;;= z{3hLU3*O$|Rp0(NhZ8b!t(uq}U@@0hd=)r?Cz_(GD7b6I;}HrOx|!PFuQGhE(KKk~ zvvHM&P3nSS9mj#A=fAvd<WGR@o?jR&YyR_;st0`{rf_A0p)Ih^FsCN-`BPrZ3{|)d zmb)aN1}OT-*=)gFjZte?z%q9deFh!4FS}dH`^?7{nAO;hy}K{B`bBW9|8AskHcM)& zCsR3ENN%k~il>$qKk9}U3D8p5I^L8y_KMjh-cz$}jgN*?ntE_R+^<~%vpt1nP9+A@ z1_nHuFL-`A6_$1_Mm$c|&3uu-VwwgnlyS?tnr>|@;WeT}CO#aHxQ<KoXgZsV4GH7( zv0o07AnbVf4A{hY&Gww-;Pbk1yUZ~fmP}Y0j>Itid`7%U&J`m*a*LJYl5V55roq%+ z`n(+`!J#|DX1Ma++-5`%p3{9iTz2wx7=M!?VXN0`VNhDbfVE}LiP>4co*;7}c$?OG zv5_~by1IJU{o^6Ium3#aK{E5TneE9A53&9F;n9Fg#Yk7V%g}Q^1sPa-YZ=KrAvHo? zEdO>{(_wYqmE=PD74njaTb(qzz*iScUUjCb+r10j<v$r`=fR+orZCKLj7KA%uXTTs zl?G0h93)k<P;SB>yxs|Zt<0?y;3XEVnH;oA9QNwg;#Tl+E+2Hv<#m6zO9OYVpz^e! zdPEEO=YXX+Vt?x&NS2j`*wRK}iSV^odV{47x*E6q=ifFO6h`vO6?eA#%=P7?p4Y_V zR@*u4Zw{WZXJeZ>XE;MDByYF*Y<>+hgf;d}uLwq5euGO}-5ifBG!NWaX*6UFJ7gyH zS#EDH5eQuwS_zt@0<&|^j$XK3r|i&`rip9f^BwM-mNrl#quZ`8WNh@UkAeyFacPoX z(;ZDtg>~ftuwRS9g96I6@S^u@k90kUx<Ig(4SjVYmo(K9r|U3M?QPSMa>Xf#A?;td zu{{xEQ2=LB)>Rj{(Ud6Xc|wtfVXa}JBlr{y9&_?l2e?eO#6=55>uSrTcY0nht1exg z%`Ggb$AUfeaC(qU*6-V~hkAcS!@1UHVK6;#V^ITl%pzzdxPok-8s1gyJ*PIO6tHeI zM{*r1cWG1F7%p`v@!`n#p6L=MGZuY$@`BOS^Ur-pirv3{K%Kxzsd=k<zPK4D6eX&o zovEE<Q0P0~e{m|&QyWJ)M%cWXiXZYD5BFr!r)2$fIJo!|M|_4Zcuuvo&J_~sM@t}e z5t=?cI^YQ&V7ol_{t*9=&ydSnx<v3<F6B*2{*cYDdm_E3J6?BsmbnbcBsdon1KNK; zoSIK9B0*>L#G-}#$xeYfvA>*RtxB+P?;mnZ{i(`H{PIGuWUinl1H5o<y&5vE+wx~G zfgVW6eaD{$E>_v02}^Nv?BP<Ny`OG^QlJlnOYhh(OZsXNym5W`CR6V@1f19V8<lBq zy%vY7Is?e{!BIkmwX2#B$1IL*414O}C=e*O$B*q%hLG35Re4UK>*tp9u-;mqi6Tac z+R^3*xTPKSU_TY}kT{%n@KQ(z?+^0Gg;AXFwbj*T2~mvz+B}qQbNBb`1CF`*X!fyN zr4Zv3{XM{8qi&|EP1LTnHHYqE>zgf;9r5b+j;D{b=~_^OP;bkeG82HGnia0!!%i)- z%%NKZ&hN_siQ10ox4sG$!M!)xweBTt#h3@2)mU|7(cG4*Msn(?NeaGg0?g&G>+zS) zO4+rwK;XVgW1e|>MEmk7G&RW#+NV;J4<LPyRfl)kuPj#26`Z%IR|L-sAxqF4!85hx zCSEG8cc{i;uptFCi`BePNaEx?ZD~Vpg!5+AUis$R3|c=Sfxb3Xc#lD!!t3Dn1g@Ku z<*-QL;mtfQ1n?m50EYBTSNb*c5c<S<lk&XIyUj5Y?GM-w^i^io%H;)M_yeY2!y)_D zF&H;FSj$|jKd(vDU6QC>{*}3&iJ8d6FdV9Vmy%1`<j>h9c^l}52tfJV=Y`xZjXq~M z$${Ctf}?=xO>*%Iy}5>*^3y4%o~_aY5Pz%FsgaX9*sbo_Mkq2PLV;M4H5`~f6vh+z zh1I2`j@PP&g~1d-0S%lYW4$}P96NTOqi=I3mvh`afgZ-m7mwbeO<@V#B*}L)?jPq2 z@|dsI4}Gm0T{80-5Y||&Av0X_@fCm*R39HCYKSJi1luTSctDbs@f%5VpslmL3p_lH zlp3KM;#6AiXo;6Tht}bG#)2DRiiYOU5nu9%eegiBO)l39GDMPLq3_bDl0^OHoud-w z!=(>1neMB;i=TsGacCCtf=K0nqCQJa@)<m<fjX3I|1;)#w+>-0c}(9TexpZMCwMF_ zLVh+&h&YppPlhDxN!?0ttB-RFo-@(FrA8`mv0=!A0g!T@S?t`Y_F0IRYFCun81c18 zZ8R(}9T=;g@SJQh9>2Liw{Mg_m6$gY$d1f?kb|A<&8<J~45in&(%lNOQ4~1FIk!5h z0Tjy2>q+v|V8Wx|m2aO1f;UE_EZ>Df_EThRQJWzgJhm4q>>$#(rEIV7Idq~lV7qQ( zRAGaR#iAYsX`9~xlB_awCDLwvx^s@PJ#aTtX%!ZDdSf)u4UMs0(iID+>>1bNy2fs- z!jZ@Y&c9V(g!q{+?bb%mToP*baL`)ofG;_iRB0EyRJYL<iQO3wnz?4t1OcC@B{Lvo zR^!tOAaAokiqEIts%j=9!J}20Wc}sOQ*vXWY$#BJdT|l0sSV*gG5d~)<ZUc5Mrujh zBM==eu@?ow{FZ1To`(%^OIrUvJ3G4y_Md{nL<H}B3DrnQ!<E3G1u^C<+97u0^D9Q> z>jJ1vVCJX<*Xpxqy#%aB?JZPrAA1K#PSnuMTOW<d^*NFR03NxW<&WKk7KB5zZT2zM ztN=s5)U!JqSeCC1*d{tMh$0xx$tSP(_#pQ!I#DO&*1}sPUW@6Iw4h%B<=Fs2#++Nm z*=Qp6ZbbGlpp9tMc~|Vn{`pH4qXD@dZE{n3sM$3k>z#v=lM9zkEnaI=Fch!f{MG~! z%d@is3EW(cxjB9=cxycy;9GSkwINCm@+ZBZgh2A{h`jvmNz+Fzj467g=}x4A?`u+f ze*9(?;8xP6DVj~cFJXOtpajJYO#r~ebTh8|ljc$k#iE=1r;-c@0FR2f43*~#Ik<<f zNSkrgK~H0lXLOTJ!ZJur%U?g9<CFKwf~xvj(z(B=>FP1F(uNIs-RQ%Onxoqab>wZc z>>dKvg5G2(#<u5RRw*o`m?)B<w}+G0DDLvTI}?w2T#f2R(1p?mN%aQY%0YhV>XH1> zsHL_54#+{7I0qi%n@Xx*s^1m^BxaZn6)YNXkZ;|hB9a$btvTUFN_stsVpJlSN(z?q zoJUqXN#45&;^}*)P{(ZDENfX%dD*H2KPG~3SQSF}AjEGT)m;dw84>;QtOxZLuV zc7)82X4R18hXQH(xd#4_FN!@Tnj9n95#*ixPt($~6$FNhyzgS79X?owjf3+#)ZH#9 zKtc3o3v&khW$5vGIPaUMGPg(sTgbNbohb|+LQWRYni(4TrXECL`nO{QqlFcCuhF%( zD}^@&7}M$~a-9H})BgFGMQs2B{ol*8x=`ODDR%|7js79}11#UAk~1KD7SbcmdS`Rx zP#>4Ve9`qdNnt38?I+lR-#=ilS|}gtB4Am)?Yp4U-aKz!ds#<u<u@ASGSra<$A7$U zULr#dT5E}u5{0O7EgH==EEFQ1E37SUzysnNlDzlr(;;O61i8UeP`lc6J@%K^gLlzS zJ?0Wf2FDDh#(c_Og&PW^6||}WKSw68$jsSh4WFSR)$%iuqI7Nag9lGs${PH5X}PR0 z_`)$$_bX+MDcLPa3D*;2P5o`Yl{AFCY(LTZ*)DwY)~uZiWFK&dZ)EA+y&@e>J*UEW zaZtvmx~@sIn!}+7Fi<>$mSw<2p5d4cn$<#$@CufwTsfdoA9dy79veWdz5E&X#m%aJ zmOY|n*^!b~gfYZR$3OeGyKLFDW*1KUI4$i6{wg<{mFfYB0)>1iZb171$Wk)UT?L@l zag5vx2YwsgJ&(I4PCRPj{GwTcpeQ@+t_pq`b3nZEWM-m=W|{hTr`orEqP$a$$EGVy zBkq#Jtul46y`@rF4h?y?<v*29vs7I%POaL^>og7Ovlmx(3c9hI&oL1@*8Q*NmyZ3l zP=S|!69WGgApqRTe?b-eKmVKc6Vgt7dC*zsuh7CUJFf@F!GDTg4eA9VeeFb1d?K_o zZ{H6B63w}E2$IH}W(uiNCuB7W?ZV0$;T20IxleWMegL3~gXpV{k?{F<0pPrgD2hO{ z6>VW?N*BMShjI>%k&sH>$-6%Z40nW9gHw5P?D+|4F%-N)CQkHhSCYl=P(;_Z$t-2n zFwT4$%FgAnLsEkR^5U?dT3$WIwj~zrG|_aRjbW@02h8Q`CvpO_RW|YC#!+kNE59c` zg~duiC~sLbG%x0{2X>gm)(3A-LeWJ#zZY#u3WUa2Q^HkSd`{ymABd&fIOxjl0L35b z1CXtk?%*f8yY3O%I(ESap}(F9E?1N${&Vv9J_N}sUKww++u3}qFmBtPob>f$n4!bz zzE7tXrDq|DI+xNqRJm*b3ej8al%ovtcRGQO>-i}V@=N8t@vhgZVJK~a7Jqcxc-GF; z0DwFx|5b`(B#?CPZlmcwl#LUmd(b9os@Kp$t36rCv*QW}8tUA?SgQQ1sz3tpUL}f7 zQKEDTXvbl%mkx`H!FE)3LCb+4KdYqt{p7!`4JuAzHs5oh=oMPM^1)Pah@w|N1RPjt zk!q(vyjuZ>WwdY;UN+)z$x%YI#sM=oi`UHK02*3DP4DGLIpZ>Gu7CsXQ%SzI4bQo& zmOmWGRYLjox!)lfz#A!PQQA!?MFb_hU~nA0Nd)gV^Ko&Y;uPtWDEfjM*5t3daM4mo zMQB@!@(OE7OF^Q*9p?QNqa63mKek;Sf9^HccR@7C>)&b_q!%>?wGi9d4*a00-`7X} zi&awwg%ZZaLd~x(G#}AXM1wgK+PS6Gga6xrR#V!ZFF~n5ZDl$o5h$f_elkGewcf%I zs#5&`qetk7%=B{zAMS=o0sP81&*F2GCjbCqp8nM62xOvipfXSnB)qR37F$ajbV9*4 zE;3|2RX_G1Sj+oVhm&3;zmf+~nU`)`lr;uvp;_^UxW!LkOoY9NqwZ0hw39)ZIy|%F z>wgHz3t|5erWD8x<o@4J{y(*Wz3##;#7{t?_RsUCu?LU;S?Wlo4Qj~>KRYUn-~6q3 zDp{ZJFcL}??bxMR7Ub_ic|lyCp>Aw%=-9ShH^(Xs*x|P<qS6Y)xury^7ON}R_>s`~ zaRLR=q?X`&8|jAixSeTr;<wMlo9e=u$?=IH0jn_tA?(gwj}QB(Uqd>t_9h>P>sxZk zt7MDUoQGzTl8MPD5PA)W?>R&*m>$6?g(#V(xcd9*oQ0?;-ZaU07h>cF5u1gMAr4q< z{)+Oh&b2EIGr-`|pld+e>R$ds_F~ZOJU_ldWAORrvAmb+c-&wNKn?8Kg=86{pNAoU zWMLicb_6ts&h_+3bZ+4Z+_VjJ&T*19nh{F0e!#tXg?;%^(dWqZ<kzZ<LbDo~QJ*NS z*>+<NB*hl$oU~(47Qls^_k+->$3o4W+F5HqHr?^l`a*)@%BP_X1>XXUXmHKK3AgS3 zyBbTQtuiMNM63PO;)UOV^+m@M=S?e%APs6+WhUVimV&mKYCOW@4Z=!+GNZHxo~R^W z*ZKhjvG;&QSIUn12|ha}(e~!*Gf7}R(2g19;>#L$$A2I&ofxte1bmFSW&}nVh>b&- zBM8FtT&3_{W^#Zdun{;Rhu$3h@7xpR*0m1FQa1~1AZDaS6y`RM@f=9#ArwjhYb$)W zF`^ZaPmEJRl?Wh~FTj-RL+g%qXuIngTuNGH&~6wf#M+Ex#kL1cN}+`KBG3%zh|0hE z70RFV^1sf2CQA^dkx*C`8h;W1&)d6=la=EU&4Q7Nxch7z_5Ty#Nqy_DaogtGr>DLA zX&I$%gnKaFs5;Guq%>vd;FAFf$r_oI+TV=M_&Tz8{iy8<x&Fp1gJr7#0O*jiPV?Je zh)qI<BHVft#*Uwd&l^i#gsBRe!t`ZAb~c|2klpQup}m(1+FTaX`V^10zA(E;Gb}Ve z)Q3(0(Bah#G?RO{_)XJ^x2vycJk}3{z9)qpMLy`7a|`R^!7BuTW+1M$WANF!nUhcf zJb`+v1u&L9C<u_M=;#jt;V0lw(D@iT<nf<Lk3su<>&@k_azIB*0$Kz}<-E5!MwGw` zUR{Y4HLHfY0d4CVEf_%@<5dc8hVcj3qdXjd*GrjKpW$?Z#i|%I>%ux*jD@S802s3` zXOInUCKkO>&xNS8{`7y4tG|c;W@xqdHGZt(fLHg{HaJ}^Tutl9G*b4aKzxR@w6L9j zTl)KJo=+6O0si;k&D8c#iWi)2OY959O-pEvApvNFaS|sJL{-B?fZICAYqW3{!dM4p zP%j-n|3Nutp|OWcyX~@I)yAkbebpKQ3=6)rMLng8lfYJm1cFgsDNQ5kTg;89v%0u$ z9UWkY2b@b9hFTJ0cINGf19s58i;}m^OCt9nD&0jKd?|V{7RiZzcY?DD&$dNvQJ1iU z5FMCY$Swr|9zA=X^G41w^WV#d79YCQjQ9|rNx1g=BS?SB50HvrF$L9P##O6z6}ft| z6+D-+{3mxKmJi)I;g=#>nw*Ni4BTs^^xuv8`f-lS!HpztTwTiCS>xdUJ%qVNVKE4l zt+`yKivUNipj&K-rI1-latozfZVrXHPPJO;IOh^Ak8Uj@b6b`LmHiSotf#m=q?{?^ z>&o!%m}(8&mY;_4=X{YW<MlG&bVYc>fjx&jBIFR21CWm$KOc|~YF9QPaMbB$LW5yF z9-gc856CRP!42orV}LK1BcwB`rcNmL<>(hTixaamML2kGB+((2NQ#&2o?<2;$nhFf z3$W)#jwt^6V@@q-<7Wn?>7PVrH)7pNZ+Z;hBOa^&Q;}xmvHYG0!=}L72+|jY9)$ct zsWlM?G`02i!W;>(6Su6m|IP}`muCxKFvkT%$xNT%cLJW*z3uiOmPrE-!Q8BEY?Q9H zzd~D7THppH(Gy4B5uuY=`Ir_)0{MDFgjNWxWVUrbv})je>9?yd%?wwq1jL9_Nf<na zwNMsJ(}099nQ#aR6+tVKw3ofdL^O=E50NBl&O40A4+P9rkj$}l%t45Sp7T?v>ATV> zC3r13P)@}n<#$W<HO!8U)JW|BT2SDS!qRmBhXEzKk)l!b*l7czRVxi!o-Nq*)UC1t zvnaP9LAklS$3y%<?jMzA;hMbPk14o-NozVsol4tSOzP>C9(Dcj&q+MGstI1N(A&!| zB87lDoxH4rbLhsk2W9x%?M6~G_IZWA8e3_cYuEZ-IlikO{<99gvgL$Ib~%~(&~5v} z0UlR?Uh$7qIL_z;lngO0^PL@4;=W}6v5gIRw?`N?0$?^#$Wh#K9DER+`%iWNtV=<5 z|1X}T9EJXL+s$*aiYEjvFLNAqEwymX2nBI}2DKrQvBVOoS$Gw+me7hh1|FrB!q)SE zwT6Yc=I`ZX<v8iN1uD#Zefm#9BT8xD$(7*;N7q5$<%U(LVlmU4NR)28cKyzTHRTHh z&pZq80O&PtrRqUlur)v0$s@`s3+2}q>tyK^Q8c8htF2cl6drOG%^JdCS0L))CGCAn z!i1%IYZfEQybia7#n1=ZbCkGYP0&5{Jg{IEhj4ANaS3&2ipr8ru}0g>ZBeMg^Cdc~ zEmFP7KLNA=dpN!a^ii{9E-*3P&|By|_sY~;ABc3K0Lw@K4XyL&{sDc&$w|$O)U4dS z+t?SjJsK#>GW}Oz9*9s}trJ-4Jr{Q|)mUv8V)^mkf$@!Vu2t#G=CNvDoxxI#+ZC=s zY$z@OH*g>ECQd2GwBYf)0rtfnHj*Jh3=7}sKb;A|t0PMWftnCX4^=p_*zNF1^QF40 zV-JaOc726>-g=VWj;om%;k8L&6IbIAZb+08yZKU%D(<*5Mpygt=eXQ-a5l}e*ab2M z2Hy}!Isb70;j-F;<@u~Ra9*E<*^T(Zt{PvM7Wc-X?s*Ee#k>2|z)r&ft1!@TXup!s zd39bRL+e_3|C@ldU4O;Fh0DwDC>aU3^lNiVFx3?;vF%;bS{jg|k1XNbf^&1A$Q6bB zb%vqc+PQ@M)vEShjx8#Qgpa9bF^QSV`ei`o8ew782EnUQR9PT0eh91l-t*62;W*P9 zd(o*P*vyRKkH(Y`m_G;0+DHrDw(i!7(wtya6y^>sOTlX1@AV43MV*OK+;;9tRrlz( zBEAmmYXl160?reBpSs>n<ntARqC#ecbXB;m&gh6j(yyi6F3W^tcnZgW3+Igk+@r_} zlDw-1<7$s@Pc;h7iB~v!3-haG;7Zb3s3(QXXezVl6p)C}*T@mHd*e6(M%G~WE~MiJ zDCX5H=C<dzr=qqnc|p}1<^6Y_@p^nY1&R#y9HkRQr!JVq5Br!fD#|dMmW-G$)Fq3B zY)G@a>xBx`l04hws<szv)*eX&ok9tsWGs_z>eYD_9ecps6JrenBOsMjbd9Sd(GtJ4 z4lxxaOU}a_W008_<d)y1-EDN#*^8(g0Sapt&z!+ax(t?<x(xYH2Z6aMhThKxCbd<o zP3Lki^K}S?u(quSODTg+QGf(t{73)F6{X6V&9e1{bNKl$-m24n0j1GlEQ5}V4XjS5 z5EtqLfdrCt)RPR^844@LQ1uutT8|GomBU%u-`-uvo~<W_4g=6p&ce;mjAFlg9^m6t z5@Ew9TJx4+OrBAvq9>S)0?j-+091so7Sd-)j?(^)pT!BagMdamo}-=q6ow^-jzNlH zFh2Q2FaiiM72V|xNsuo6q2jg+wa`2$P0)EPV5Ju@1UC#6GHUfbotMN3!|fYoVUXj* z;ABuZsJuPUj!HhjJwswzWyw2iOp3ooK}h4^CSfGyY|FJSu_L0|d9;(#Huksyjmg%8 znARMngGk4n>v(0Q?o1uGqH_DrH)2{JYYtCKI}>rjD3;}i!`-1(5L(CHKO}w2)X|c( zQVrjuaa6=Oj?(l=`D@uS9+>t=2afIcLQ;s&1u)9vjk3esvVpXDR7jzD<gLi=I<<VG zx2MM&!kdAn|2|HGA*558`UBsKXP#ce2D@tmYqwd1qDvTsw!#QPDqwxOc4xI2Jxd<N z#iBcang>q<Lnpx6Y!o8G?h;`<TGcs@=J%9xfUqc4>KF-`U*(9SJ~VUi_};Ex{r40F zul(J}|2MON^gJkf+S4?mipe3{=|Dqe=jJv`VCl{On%qGmaRw$0jZR75#o?fxae>0W z2(K`iUj9(;3h3@^Mr-|{C(v4Z$Hrw1!ks+Y@8C*#f<PD>)%YwF!#LU?5V$2I{y&2; zn>gUOQ?B~WW@i>#zJK5tc~fe#Ey-lzuph^L=4)?rTrT;o4ATutL)HZyB`P`tg>m#~ z9_>s~+Yeh|ec2i!dT8@y{}VKEn<j}Can?7=8nX|x;wTEe{0|6%fOA5RTHnCInCp$& z<xh2&pf5Fpw2IFAVZL4USG3qA`-^CJbZLmDgMy)#7nG-=Tqr|`vidr}w#{ynEU>`^ z1m>0y@zc{I>`)O@!qG_qbn{s>xIbVK(iIN)*xwq}|Hd|efhNdV01=a0CKKF}LD66h z%yZj91=;*BR)k}B-D6atDF-@IDXS0vgn<!81Na>&pofONNoGJufe)1lK`MvocT6s{ zmoPE+34y?y0?w)x1O(W03qlCJLAL(p!iv2?cdim5@Kbl@c3!9AUsKXc0(fcV<50f; zRT9z9nbq~d9EcZ0iB%x#K(!r)_vGVaV3ex36L$XlIQd(N+S-co_(P~|vmh*K=`(Rl z0&ZH?hz!C(9B^^t0=XBDf}^9%d6xg$qz#ZePNnsZ3vfxN#TW1!BQ7Xq7hrS_`U|Jh zQMhAoB!Iaa3g1U^S|C6w2J5KJU3+!jn6VjO%eUz3meqMXOMi)kWwL0C7?Ihn`{SF> znvKs!)iyu4bIjTC!k@Jvf6~I<W;|4z&R{D&Z+O|@$QL<7&u7lRxJf%PVz1?&Ki8p| z*!<Nox|x^ry+A4Tp--NFgu;)iryf5!)wsd+@M=drd27$oMzEi9InO+UUR39^?LIs= zy42325iauZ`Dvl-F0Usy`*#uta@EznZry5qan8c!r~Qxgj2*_yM5;{1ubw{Keeq31 zemf5-eyxA2_FleRGcY<KthR;?ejkFxU5|GDi)`@7P#s$Wr19dz*P;;6Tw0BTc(4bQ zf@N4%$OMJ*e6y;lNi*NH%A@uUDse9|y?XT#;0L(+N!yOpzTqlQDx~Aw++3%AwR@nM zQEgO@ii^ws<B$Eq!orM+^C+!?Ox5_Uym|0GMN&#iOa1*)?3Kt{@YZ-)w`3W&(LCxL z=^74(`Dr~pdj5+1d`ba5?!TfRdq;kMFyLi1D378O5^@jm$V>yp6))$RF^Xjcvp>AD z(Es+Mm*QXV{B$tj=edOieFukZF)^{8&d%4RrI(^(VoH7kIR}Eg`H>y%8WH5cG8XM3 zVJ8SX-xJBk2tM7LH(%Vkb?cJ%te$_J`NYJ;*&p}DMm*kU*`1;F$J3`b9UXIK&bX(5 z@X`@9RUS1X@sc)w+|t#hHt@K`MzDuDI68KL)Gj(UHtVcz#&r3RtFEzeTtGm;b#>LB z=N1?HK)@6o9W7ijlph;=d;u)h<H^H=zLehIZ&Wqns<btox&^Y+2H9WoK8xH_d5My0 zUX(9~CnhJwE?m&EYfJ0|4$ljsau#$QKZ`z(yKzQ{KrVy>jgQ#PvGwhl#LK8CL412= zSJ$o24GptXc&x#hpC$PYadr(4=dG`Meg6D8J1gsV*9w!evN8bHsoIH`U+Z8PJzl^4 zmQADiz+#AV)Y%AyMc@d-)RZKC-tPc4^p9uHqR2~zzP=S89khYq_RT3QBvFV_jk1^X zRIhNnX#p)OD)q6F7^Bna@ltjXpzpn^ucvqXI)|tUA*2ovX(p2yQdUus5W&VVGdtS_ zda{i9d1rlned5HO`vMIOVFDppZEf1UJw0cHg|FiDjEu5=7kUG-R24oxzIf%}>aQ=K zsO{Ug&(Xz2KPad+LI6{MN=tjzc0vf^rj;oGfWOR9HE~_t-DUQjr@_Fwj~+c5=jJ*3 zP2%K_HjBWFAI2B7#Y<m1d-klOv-72w`?Y;Hm#tt0Ga#Lo;$!E8Er^3*K%}kZQDs`~ z{`JA>7f)u4+*3eQsL>TJJ9^n!@zkj+;xW$#uI=14v<NQyHa^ap^9N;buPT?)CA~Ib z2SDlZN`d82ccb1ue{$x^6%JlYOG_0O7nf@0)|{65eKzXGMs8)L5omDwKv#Ud@je@< z!wutQT;d`4TtU7EM~AAC0svR^3=KKS^YaP`3aaAtZrn(5BQOkFTU)n4D^%t^XAu4J zrTYP{2EEI?eFG(S-f%FHmtQ3(7d(Evhb`NZXTSUy0!O+(>|;}VyDYjv5cMW02j2$o z&PaXtIzB$$i@s`e!MOOhxVX5%8sEZXzi<0M{-p(iyUH0q_doOU&O`9bfe_|p`&1NZ zt`|I9-_$e@_TKBIc-hp+DL3uhIT_}8Fw0aso&*r2LE-56k4n=jFSkMOSZdpPq7{#^ z$Ti59<m2G<TpX5Dg7N|G-20qc3iht*?dzMHnW>Hfm5zc16v4nAGZuy_a`W=8m)N#S zCjO-T;^oUU_5(-lL6^Derzr0}77Aw$rUwVJ2J6g!`}_=7a6XJx0#v^3bYf@3-ES!= zY&Ykj+h}TQGcYwx%*n~|>eV-li})c79b-~_YG4=6^80z`2b~I|g`kJI)4vItfIRfZ z<>jv)L|{LIK0NKihhN!u?-CXf(X3vzx*_yaC+OmHVf6ZCeC4?IzRVLr`8+PKf|h-f z-+TUY@9ihPC$|7*<?;tDU-9<wnFaowq$N9IE&mg5Ms~IaNHbq)YHId$b-l6tB@6zH zj+#@c)U(GA&hF6HGVXrZwa0?Mti4A%Bs+XkV|IG__{Mej3{Z^fD&yU<61i^I`n`pG zQps`ruA{rd_Smfr3<N{;(1I<E_h+w@C#W=<n5d}weH9h>UQ9KEHl(HgfbG#oD<}fs zC}-s4{HJdk8fL1TOrcP$!7t(>_Hk4~dn#mBHOb6MPrr(*tlQ0sFE5t?7mYl=G&dI| zWL6*KUn^*}yNW)UAbq#tARi_q=+oP+t$@}f1(J$b|KkUozr{ghPfSgL-3*{L?1a6* zjaAb#G8{qBD98`twi-}3SmB1>zkmPr#(%O=CMGm+KA>B7-T~ccCqVeeKBIe>7A1D= z2CAwLci(?kv@%lT+dScM_QHkM8<N+5hJmxLw|7~5LPA&rC%<AAY+-oV^dZsmS?vA$ z_jh%lLUxw=3e0i==$gsN$*H<Ivg|<@yXThGY2ueJHv$7|fV2thzBzW1ot-@Z!UUW; z6TVJh0d>7ASt+p8>f;_dBcyv}?8h)Jfj%}a#gmbZsc+t#hB~BgXgCL_-aXr!O9ho? zLd26Z<u=Vn;e2zzWYBl?LJL2!Mi<h6><XDu*VD5kNEr`GQ+Xv&GS5QhhwO)z1sFJi zi@-?@T3cIBLz7Ey9^cWtn|HJ0;?b?GEed=cVl;3m8?IV0?C}h~a+3;QZHkZB6P69x z7DVPL5Y`|i*CrB)=+{k>{U^D#;y*Eon^QO(&I=SYHzPp;si~o%flCtz*}MYSOjk=Q zTtp{14>D~X^xJ~8ukSwnhGKoFs=J53Pr#8-_A|gG!^0@_0XhZ$`-T5o?7?KS;oKd@ TOF(weW6`*({aeAWmJj|1oq}dV diff --git a/test/data/annotate/exp_files/res_H299_H561-ESCO00005.fna b/test/data/annotate/exp_files/res_H299_H561-ESCO00005.fna index e9017c69..6cacdcc1 100755 --- a/test/data/annotate/exp_files/res_H299_H561-ESCO00005.fna +++ b/test/data/annotate/exp_files/res_H299_H561-ESCO00005.fna @@ -1,4 +1,4 @@ ->ESCO.0216.00005.0001 +>ESCO.0216.00005.0001 3480 AGCAGCACGCTTTTTAATCCGTCCACCGGACCTTCAACCGGCCCTTCGCTGATGGCATCG ATCACACTCAGCAGCTGCGTGGACTTCAGGTTGTCCTTCGCTTCGCGCGGGGTATGCCCC TTACTGCTGCCTTTACCCATTCGTCATGCTCCATAAACGATAAAACCGCCCGGAGGCGGT @@ -57,7 +57,7 @@ ATCATTCAGAGCCTGCATTTTCTCTTGAAAAACATGATCTGACATTCCACGGGATTTATC TGCATAGTCACGTTCAAGTTGCAGACGCTGATTGTTATATCCATGTTCAATCCGCAGTAA TTCCTGCTGGCGTTGCTGATTTTTATCGCCAACCCCATAACCAGCAATCTGAATATCATA CCCCTGCTGACGATTATCAATCGAAGCCTGCAATGAATCACGCCATGCTGTTATTTCGGC ->ESCO.0216.00005.0002 +>ESCO.0216.00005.0002 7080 AGCAGCACGCTTTTTAATCCATCCACCGGACCTTCAACCGGCCCTTCGCTGATGGCATCG ATCACACTCAGCAGCTGCGTGGACTTCAGGTTGTCCTTCGCTTCGCGCGGGGTATGCCCC TTACTGCTGCCTTTACCCATTCGTCATGCTCCATAAACGATAAAACCGCCCGGAGGCGGT @@ -176,7 +176,7 @@ ATCCACGAATCCAGCTCTGAATCCGGCACCTGAGCAGGCAGGAAAACTTCAATATGCAGC TCTGCCTGCCAGGTATCGCTGTCCAGCTCTTCGCCCGTGTATTCAGCGCCGGTGAGATAA ACGGCAATTGCCGGAAAATCTTCCTCATCAAAAACAGCGGGGCGACCATCAAAAAGCGTC GCCCCGGTGTCATGCTTCTCCAGTGCATCCAGTACGGCTGCACGGAGTTCAGTATGTTTC ->ESCO.0216.00005.0003 +>ESCO.0216.00005.0003 2583 ATCATACAGTCATTTGTTAATATCTGTCTGACAAGGTACCAAGCAGAAAATAGGGTTAAG ATATTTGTAGCCATAAATATCATATATACGTTAGCCTATTTTTTTGGGGTTTTATATTCC TTATTTATAGATCACTGGGATAATGTTTGGAAAAATATTATTTTGTTTTATGTGCTTACA diff --git a/test/data/annotate/exp_files/res_test_write_discard.lst b/test/data/annotate/exp_files/res_test_write_discard.lst index ed71bc47..9ae80dbc 100755 --- a/test/data/annotate/exp_files/res_test_write_discard.lst +++ b/test/data/annotate/exp_files/res_test_write_discard.lst @@ -1,3 +1,3 @@ -orig_name gsize nb_conts L90 -genome1 4564855 156 40 -genome2 6549 16 8 +orig_name to_annotate gsize nb_conts L90 +genome1 genome1 4564855 156 40 +genome2 genome2 6549 16 8 diff --git a/test/data/annotate/exp_files/res_test_write_discard_1genome.lst b/test/data/annotate/exp_files/res_test_write_discard_1genome.lst new file mode 100644 index 00000000..e645589e --- /dev/null +++ b/test/data/annotate/exp_files/res_test_write_discard_1genome.lst @@ -0,0 +1,2 @@ +orig_name to_annotate gsize nb_conts L90 +genome1 genome1 4564855 156 40 diff --git a/test/data/annotate/exp_files/res_test_write_info_qc.lst b/test/data/annotate/exp_files/res_test_write_info_qc.lst index 69e60005..5d303a89 100755 --- a/test/data/annotate/exp_files/res_test_write_info_qc.lst +++ b/test/data/annotate/exp_files/res_test_write_info_qc.lst @@ -1,6 +1,6 @@ -orig_name gsize nb_conts L90 -genome3 9876546 6 2 -genome1 4564855 156 40 -B2_A3_5.fasta-problems 456464645 5 1 -genome2 6549 16 8 -H299_H561.fasta 12656 3 1 +orig_name to_annotate gsize nb_conts L90 +H299_H561.fasta H299_H561.fasta 12656 3 1 +B2_A3_5.fasta-problems B2_A3_5.fasta-problems 456464645 5 1 +genome1 genome1 4564855 156 40 +genome2 genome2 6549 16 8 +genome3 genome3 9876546 6 2 diff --git a/test/data/annotate/exp_files/res_test_write_lstinfo.lst b/test/data/annotate/exp_files/res_test_write_lstinfo.lst index 4d6d57c6..750d1ce1 100755 --- a/test/data/annotate/exp_files/res_test_write_lstinfo.lst +++ b/test/data/annotate/exp_files/res_test_write_lstinfo.lst @@ -1,6 +1,6 @@ -gembase_name orig_name gsize nb_conts L90 -genome.0417.00001 genome3 9876546 6 2 -genome.0417.00008 genome1 4564855 156 40 -toto.0417.00006 B2_A3_5.fasta-problems 456464645 5 1 -toto.0417.00008 genome2 6549 16 8 -toto.0417.00010 H299_H561.fasta 12656 3 1 +gembase_name orig_name to_annotate gsize nb_conts L90 +toto.0417 H299_H561.fasta test/data/annotate/genomes/H299_H561.fasta 12656 3 1 +toto.0417 B2_A3_5.fasta-problems test/data/annotate/genomes/B2_A3_5.fasta-problems 456464645 5 1 +toto.0417 genome3 test/data/annotate/genomes/genome3 9876546 6 2 +toto.0417 genome2 test/data/annotate/genomes/genome2 6549 16 8 +toto.0417 genome1 test/data/annotate/genomes/genome1 4564855 156 40 diff --git a/test/data/annotate/test_files/list_genomes-multi-files.txt b/test/data/annotate/test_files/list_genomes-multi-files.txt index 8a409cef..1381fc6e 100755 --- a/test/data/annotate/test_files/list_genomes-multi-files.txt +++ b/test/data/annotate/test_files/list_genomes-multi-files.txt @@ -1,4 +1,4 @@ -A_H738.fasta B2_A3_5.fasta-split5N.fna-gembase.fna +A_H738.fasta B2_A3_5.fasta-split5N.fna-short-contig.fna H299_H561.fasta genome6.fasta genome.fna ::ABCD genome2.fasta::TOTO @@ -9,4 +9,4 @@ genome4.fasta :: TOTO. genome5.fasta :: TOTO.0114 -toto.fst toto.fasta genome.fst \ No newline at end of file +toto.fst toto.fasta genome.fst diff --git a/test/data/annotate/test_files/lstinfo-miss-1genome.lst b/test/data/annotate/test_files/lstinfo-miss-1genome.lst new file mode 100644 index 00000000..59693ccc --- /dev/null +++ b/test/data/annotate/test_files/lstinfo-miss-1genome.lst @@ -0,0 +1,6 @@ +to_annotate toto L90 gsize nb_conts +test/data/annotate/genomes/genome1.fasta wrong 5 800 6 +data/annotate/genomes/A_H738-and-B2_A3_5.fna ignored 6 7000 78 + +test/data/annotate/genomes/genome7.fasta ignored_too 65 79705 80 + diff --git a/test/data/annotate/test_files/lstinfo-no-genome.lst b/test/data/annotate/test_files/lstinfo-no-genome.lst new file mode 100644 index 00000000..882948f9 --- /dev/null +++ b/test/data/annotate/test_files/lstinfo-no-genome.lst @@ -0,0 +1,6 @@ +to_annotate toto L90 gsize nb_conts +data/annotate/genomes/genome1.fasta wrong 5 800 6 +test/annotate/genomes/A_H738-and-B2_A3_5.fna ignored 6 7000 78 + +test/data/genomes/genome7.fasta ignored_too 65 79705 80 + diff --git a/test/data/annotate/test_files/lstinfo-no-header.lst b/test/data/annotate/test_files/lstinfo-no-header.lst new file mode 100644 index 00000000..c1545b2b --- /dev/null +++ b/test/data/annotate/test_files/lstinfo-no-header.lst @@ -0,0 +1 @@ +toto toti 4 5 5 diff --git a/test/data/annotate/test_files/lstinfo-not-all-filled.lst b/test/data/annotate/test_files/lstinfo-not-all-filled.lst new file mode 100644 index 00000000..b8f8bd9d --- /dev/null +++ b/test/data/annotate/test_files/lstinfo-not-all-filled.lst @@ -0,0 +1,6 @@ +to_annotate toto L90 gsize nb_conts +test/data/annotate/genomes/genome1.fasta wrong 800 6 +test/data/annotate/genomes/A_H738-and-B2_A3_5.fna ignored 6 7000 78 + +test/data/annotate/genomes/genome7.fasta ignored_too 65 79705 80 + diff --git a/test/data/annotate/test_files/lstinfo-not-int-nbcont.lst b/test/data/annotate/test_files/lstinfo-not-int-nbcont.lst new file mode 100644 index 00000000..ac9accfb --- /dev/null +++ b/test/data/annotate/test_files/lstinfo-not-int-nbcont.lst @@ -0,0 +1,6 @@ +to_annotate toto L90 gsize nb_conts +test/data/annotate/genomes/genome1.fasta wrong 5 800 6 +test/data/annotate/genomes/A_H738-and-B2_A3_5.fna ignored 6 7000 "78" + +test/data/annotate/genomes/genome7.fasta ignored_too 65 79705 80 + diff --git a/test/data/annotate/test_files/lstinfo-wrong-header.lst b/test/data/annotate/test_files/lstinfo-wrong-header.lst new file mode 100644 index 00000000..6f1387f3 --- /dev/null +++ b/test/data/annotate/test_files/lstinfo-wrong-header.lst @@ -0,0 +1,3 @@ +to_annotate nb_conts L90 +toto toti 4 5 5 + diff --git a/test/data/annotate/test_files/lstinfo.lst b/test/data/annotate/test_files/lstinfo.lst new file mode 100644 index 00000000..3938c159 --- /dev/null +++ b/test/data/annotate/test_files/lstinfo.lst @@ -0,0 +1,6 @@ +to_annotate toto L90 gsize nb_conts +test/data/annotate/genomes/genome1.fasta wrong 5 800 6 +test/data/annotate/genomes/A_H738-and-B2_A3_5.fna ignored 6 7000 78 + +test/data/annotate/genomes/genome7.fasta ignored_too 65 79705 80 + diff --git a/test/test_unit/test_utils.py b/test/test_unit/test_utils.py index 4a0240c5..c8a7f164 100755 --- a/test/test_unit/test_utils.py +++ b/test/test_unit/test_utils.py @@ -5,17 +5,40 @@ Unit tests for utils.py """ -import genomeAPCAT.utils as utils +import PanACoTA.utils as utils +import test.test_unit.utilities_for_tests as utilities import pytest import os import logging import shutil import matplotlib +import argparse matplotlib.use('AGG') # Define variables used by several tests -BASELINE_DIR = os.path.join("..", "data", "annotate", "exp_files", "baseline") +DATA_DIR = os.path.join("test", "data", "annotate") +BASELINE_DIR = os.path.abspath(os.path.join(DATA_DIR, "exp_files", "baseline")) +GENEPATH = os.path.join(DATA_DIR, "generated_by_unit-tests") +LOGFILE_BASE = "test_prokka" +LOGFILES = [LOGFILE_BASE + ext for ext in [".log", ".log.debug", ".log.details", ".log.err"]] + +@pytest.fixture(autouse=True) +def setup_teardown_module(): + """ + Remove log files at the end of this test module + """ + # Init logger to level detail (15) + utils.init_logger(LOGFILE_BASE, logging.DEBUG, 'test_utils', verbose=1) + os.mkdir(GENEPATH) + print("setup") + + yield + for f in LOGFILES: + if os.path.exists(f): + os.remove(f) + shutil.rmtree(GENEPATH) + print("teardown") # Start tests @@ -39,115 +62,130 @@ def test_plot_dist(): """ Plot a given distribution, and check that output is as expected """ + logger = logging.getLogger("test_utils") values = [1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 10] limit = 3 res_dir = os.path.join("test", "data", "annotate") - os.makedirs(res_dir, exist_ok=True) # reffile = os.path.join("test", "data", "annotate", "exp_files", "res_plot_distr.png") title = "Distribution test" text = "Max L90 =" - myfig = utils.plot_distr(values, limit, title, text) + myfig = utils.plot_distr(values, limit, title, text, logger) return myfig -def test_skipped_prokka(capsys): +def test_skipped_prokka_prodigal(caplog): """ Test that when the list of skipped genomes (because of prokka run) is not empty, it writes the right message. """ - logfile_base = "test_prokka" - utils.init_logger(logfile_base, 0, '', verbose=1) + caplog.set_level(logging.DEBUG) skipped = ["toto", "genome", "genome2"] utils.write_warning_skipped(skipped) - out, err = capsys.readouterr() - assert ("Prokka had problems while annotating some genomes, or did not " + assert ("prokka had problems while annotating some genomes, or did not " "find any gene. Hence, they are not " - "formatted, and absent from your output database. Please look at their " - "Prokka logs (<output_directory>/tmp_files/<genome_name>-prokka.log) and " - "to the current error log (<output_directory>/<input_filename>.log.err)" - " to get more information, and run again to annotate and format them. " - "Here are the genomes (problem with prokka or no " - "gene found):") in err - assert ("\\n\\t- toto\\n\\t- genome\\n\\t- genome2" in err or - "\n\t- toto\n\t- genome\n\t- genome2" in err) - os.remove(logfile_base + ".log") - os.remove(logfile_base + ".log.details") - os.remove(logfile_base + ".log.err") + "formatted, and absent from your output database. Please look at the " + "current error log (<output_directory>/PanACoTA-annotate_list_genomes[-date].log.err) " + "to get more information on the problems. " + "Here are those genomes:") in caplog.text + assert "\n\t- toto\n\t- genome\n\t- genome2" in caplog.text + utils.write_warning_skipped(skipped, prodigal_only=True) + assert ("prodigal had problems while annotating some genomes, or did not " + "find any gene. Hence, they are not " + "formatted, and absent from your output database. Please look at the " + "current error log (<output_directory>/PanACoTA-annotate_list_genomes[-date].log.err) " + "to get more information on the problems. " + "Here are those genomes:") in caplog.text + assert "\n\t- toto\n\t- genome\n\t- genome2" in caplog.text -def test_skipped_format(capsys): +def test_skipped_format(caplog): """ Test that when the list of skipped genomes (format step could not run) is not empty, it writes the right message. """ - logfile_base = "test_prokka" - utils.init_logger(logfile_base, 0, '', verbose=1) + caplog.set_level(logging.DEBUG) skipped_format = ["toto", "genome", "genome2"] utils.write_warning_skipped(skipped_format, do_format=True) - out, err = capsys.readouterr() assert ("Some genomes were annotated by prokka, but could not be formatted, " - "and are hence absent from your output database. Please look at log " - "files to get more information about why they could not be ") in err - assert ("formatted.\n\t- toto\n\t- genome\n\t- genome2\n" in err or - "formatted.\\n\\t- toto\\n\\t- genome\\n\\t- genome2" in err) - os.remove(logfile_base + ".log") - os.remove(logfile_base + ".log.details") - os.remove(logfile_base + ".log.err") + "and are hence absent from your output database. Please look at " + "'<output_directory>/PanACoTA-annotate_list_genomes[-date].log.err' and .details " + "files to get more information about why they could not be formatted") in caplog.text + assert ("\n\t- toto\n\t- genome\n\t- genome2\n") in caplog.text + -def test_write_discarded(): +def test_write_discarded(caplog): """ Test that the list of discarded genomes is written as expected. """ + caplog.set_level(logging.DEBUG) gnames = ["H299_H561.fasta", "B2_A3_5.fasta-problems", "genome1", "genome2", "genome3"] - gpaths = [os.path.join("test", "data", "annotate", "genomes", name) for name in gnames] - genomes = {gnames[0]: ["toto.0417", gpaths[0], 12656, 3, 1], - gnames[1]: ["toto.0417", gpaths[1], 456464645, 5, 1], - gnames[2]: ["toto.0417", gpaths[2], 4564855, 156, 40], - gnames[3]: ["toto.0417", gpaths[3], 6549, 16, 8], - gnames[4]: ["toto.0417", gpaths[4], 9876546, 6, 2] + gpaths = [os.path.join(DATA_DIR, "genomes", name) for name in gnames] + genomes = {gnames[0]: ["toto.0417", gpaths[0], gpaths[0], 12656, 3, 1], + gnames[1]: ["toto.0417", gpaths[1], gpaths[1], 456464645, 5, 1], + gnames[2]: ["toto.0417", gpaths[2], gpaths[2], 4564855, 156, 40], + gnames[3]: ["toto.0417", gpaths[3], gpaths[3], 6549, 16, 8], + gnames[4]: ["toto.0417", gpaths[4], gpaths[4], 9876546, 6, 2] } kept_genomes = ["H299_H561.fasta", "B2_A3_5.fasta-problems", "genome3"] - list_file = os.path.join("test", "data", "annotate", "list_genomes.txt") - res_path = os.path.join("test", "data", "annotate") - utils.write_discarded(genomes, kept_genomes, list_file, res_path) - outfile = os.path.join("test", "data", "annotate", "discarded-list_genomes.lst") - exp_file = os.path.join("test", "data", "annotate", "exp_files", "res_test_write_discard.lst") + list_file = os.path.join("titi", "toto", "list_genomes.txt") + utils.write_genomes_info(genomes, kept_genomes, list_file, GENEPATH, qc=False) + outfile = os.path.join(GENEPATH, "discarded-list_genomes.lst") + exp_file = os.path.join(DATA_DIR, "exp_files", "res_test_write_discard.lst") + assert "2 genomes were discarded" in caplog.text + # There is no order in the discarded file. So, just check that the lines + # written are as expected. + assert utilities.compare_file_content(outfile, exp_file) + + +def test_write_discarded_1genome(caplog): + """ + Test that the list of discarded genomes is written as expected. + """ + caplog.set_level(logging.DEBUG) + gnames = ["H299_H561.fasta", "B2_A3_5.fasta-problems", "genome1", "genome2", "genome3"] + gpaths = [os.path.join(DATA_DIR, "genomes", name) for name in gnames] + genomes = {gnames[0]: ["toto.0417", gpaths[0], gpaths[0], 12656, 3, 1], + gnames[1]: ["toto.0417", gpaths[1], gpaths[1], 456464645, 5, 1], + gnames[2]: ["toto.0417", gpaths[2], gpaths[2], 4564855, 156, 40], + gnames[3]: ["toto.0417", gpaths[3], gpaths[3], 6549, 16, 8], + gnames[4]: ["toto.0417", gpaths[4], gpaths[4], 9876546, 6, 2] + } + kept_genomes = ["H299_H561.fasta", "B2_A3_5.fasta-problems", "genome3", "genome2"] + list_file = os.path.join("titi", "toto", "list_genomes.txt") + utils.write_genomes_info(genomes, kept_genomes, list_file, GENEPATH, qc=False) + outfile = os.path.join(GENEPATH, "discarded-list_genomes.lst") + exp_file = os.path.join(DATA_DIR, "exp_files", "res_test_write_discard_1genome.lst") + assert "1 genome was discarded" in caplog.text # There is no order in the discarded file. So, just check that the lines # written are as expected. - with open(outfile, "r") as outf, open(exp_file, "r") as expf: - exp_lines = expf.readlines() - out_lines = outf.readlines() - assert set(out_lines) == set(exp_lines) - os.remove(outfile) + assert utilities.compare_file_content(outfile, exp_file) -def test_write_discarded_qc(): +def test_write_discarded_qc(caplog): """ Test that the list with information on all genomes when we run with QC only is written as expected """ + caplog.set_level(logging.DEBUG) gnames = ["H299_H561.fasta", "B2_A3_5.fasta-problems", "genome1", "genome2", "genome3"] - gpaths = [os.path.join("test", "data", "annotate", "genomes", name) for name in gnames] - genomes = {gnames[0]: ["toto.0417", gpaths[0], 12656, 3, 1], - gnames[1]: ["toto.0417", gpaths[1], 456464645, 5, 1], - gnames[2]: ["toto.0417", gpaths[2], 4564855, 156, 40], - gnames[3]: ["toto.0417", gpaths[3], 6549, 16, 8], - gnames[4]: ["toto.0417", gpaths[4], 9876546, 6, 2] + gpaths = [os.path.join(DATA_DIR, "genomes", name) for name in gnames] + genomes = {gnames[0]: ["toto.0417", gpaths[0], gpaths[0], 12656, 3, 1], + gnames[1]: ["toto.0417", gpaths[1], gpaths[1], 456464645, 5, 1], + gnames[2]: ["toto.0417", gpaths[2], gpaths[2], 4564855, 156, 40], + gnames[3]: ["toto.0417", gpaths[3], gpaths[3], 6549, 16, 8], + gnames[4]: ["toto.0417", gpaths[4], gpaths[4], 9876546, 6, 2] } kept_genomes = [] - list_file = os.path.join("test", "data", "annotate", "list_genomes.txt") - res_path = os.path.join("test", "data", "annotate") - utils.write_discarded(genomes, kept_genomes, list_file, res_path, qc=True) - outfile = os.path.join("test", "data", "annotate", "info-genomes-list_genomes.lst") - exp_file = os.path.join("test", "data", "annotate", "exp_files", "res_test_write_info_qc.lst") + list_file = os.path.join(DATA_DIR, "list_genomes.txt") + utils.write_genomes_info(genomes, kept_genomes, list_file, GENEPATH, qc=True) + outfile = os.path.join(GENEPATH, "ALL-GENOMES-info-list_genomes.lst") + exp_file = os.path.join(DATA_DIR, "exp_files", "res_test_write_info_qc.lst") + assert ("Writing information on genomes in " + "test/data/annotate/generated_by_unit-tests/ALL-GENOMES-info-list_genomes.lst") in caplog.text # There is no order in the discarded file. So, just check that the lines # written are as expected. - with open(outfile, "r") as outf, open(exp_file, "r") as expf: - exp_lines = expf.readlines() - out_lines = outf.readlines() - assert set(out_lines) == set(exp_lines) - os.remove(outfile) + assert utilities.compare_file_content(outfile, exp_file) def test_write_discarded_empty(): @@ -157,40 +195,38 @@ def test_write_discarded_empty(): """ genomes = {} kept_genomes = ["H299_H561.fasta", "B2_A3_5.fasta-problems", "genome3"] - list_file = os.path.join("test", "data", "annotate", "list_genomes.txt") - res_path = os.path.join("test", "data", "annotate") - utils.write_discarded(genomes, kept_genomes, list_file, res_path) - outfile = os.path.join("test", "data", "annotate", "discarded-list_genomes.lst") + list_file = os.path.join(DATA_DIR, "list_genomes.txt") + utils.write_genomes_info(genomes, kept_genomes, list_file, GENEPATH) + outfile = os.path.join(GENEPATH, "discarded-list_genomes.lst") with open(outfile, "r") as outf: all_lines = outf.readlines() assert len(all_lines) == 1 - assert all_lines[0] == "orig_name\tgsize\tnb_conts\tL90\n" - os.remove(outfile) + assert all_lines[0] == "orig_name\tto_annotate\tgsize\tnb_conts\tL90\n" -def test_write_discarded_all_kept(): +def test_write_discarded_all_kept(caplog): """ Test that when all genomes are kept, the discarded lst file only contains the header line. """ + caplog.set_level(logging.DEBUG) gnames = ["H299_H561.fasta", "B2_A3_5.fasta-problems", "genome1", "genome2", "genome3"] - gpaths = [os.path.join("test", "data", "annotate", "genomes", name) for name in gnames] - genomes = {gnames[0]: ["toto.0417", gpaths[0], 12656, 3, 1], - gnames[1]: ["toto.0417", gpaths[1], 456464645, 5, 1], - gnames[2]: ["toto.0417", gpaths[2], 4564855, 156, 40], - gnames[3]: ["toto.0417", gpaths[3], 6549, 16, 8], - gnames[4]: ["toto.0417", gpaths[4], 9876546, 6, 2] + gpaths = [os.path.join(DATA_DIR, "genomes", name) for name in gnames] + genomes = {gnames[0]: ["toto.0417", gpaths[0], gpaths[0], 12656, 3, 1], + gnames[1]: ["toto.0417", gpaths[1], gpaths[1], 456464645, 5, 1], + gnames[2]: ["toto.0417", gpaths[2], gpaths[2], 4564855, 156, 40], + gnames[3]: ["toto.0417", gpaths[3], gpaths[3], 6549, 16, 8], + gnames[4]: ["toto.0417", gpaths[4], gpaths[4], 9876546, 6, 2] } kept_genomes = ["H299_H561.fasta", "B2_A3_5.fasta-problems", "genome3", "genome2", "genome1"] - list_file = os.path.join("test", "data", "annotate", "list_genomes.txt") - res_path = os.path.join("test", "data", "annotate") - utils.write_discarded(genomes, kept_genomes, list_file, res_path) - outfile = os.path.join("test", "data", "annotate", "discarded-list_genomes.lst") + list_file = os.path.join(DATA_DIR, "list_genomes.txt") + utils.write_genomes_info(genomes, kept_genomes, list_file, GENEPATH) + outfile = os.path.join(GENEPATH, "discarded-list_genomes.lst") with open(outfile, "r") as outf: all_lines = outf.readlines() assert len(all_lines) == 1 - assert all_lines[0] == "orig_name\tgsize\tnb_conts\tL90\n" - os.remove(outfile) + assert all_lines[0] == "orig_name\tto_annotate\tgsize\tnb_conts\tL90\n" + assert "0 genome was discarded" in caplog.text def test_write_lstinfo(): @@ -198,22 +234,18 @@ def test_write_lstinfo(): Test that lstinfo file is written as expected. """ gnames = ["H299_H561.fasta", "B2_A3_5.fasta-problems", "genome1", "genome2", "genome3"] - gpaths = [os.path.join("test", "data", "annotate", "genomes", name) for name in gnames] - genomes = {gnames[0]: ["toto.0417.00010", gpaths[0], 12656, 3, 1], - gnames[1]: ["toto.0417.00006", gpaths[1], 456464645, 5, 1], - gnames[2]: ["genome.0417.00008", gpaths[2], 4564855, 156, 40], - gnames[3]: ["toto.0417.00008", gpaths[3], 6549, 16, 8], - gnames[4]: ["genome.0417.00001", gpaths[4], 9876546, 6, 2] + gpaths = [os.path.join(DATA_DIR, "genomes", name) for name in gnames] + genomes = {gnames[0]: ["toto.0417", gpaths[0], gpaths[0], 12656, 3, 1], + gnames[1]: ["toto.0417", gpaths[1], gpaths[1], 456464645, 5, 1], + gnames[2]: ["toto.0417", gpaths[2], gpaths[2], 4564855, 156, 40], + gnames[3]: ["toto.0417", gpaths[3], gpaths[3], 6549, 16, 8], + gnames[4]: ["toto.0417", gpaths[4], gpaths[4], 9876546, 6, 2] } - list_file = os.path.join("test", "data", "annotate", "list_genomes.txt") - outdir = os.path.join("test", "data", "annotate") - utils.write_lstinfo(list_file, genomes, outdir) - outfile = os.path.join(outdir, "LSTINFO-list_genomes.lst") - exp_file = os.path.join("test", "data", "annotate", "exp_files", "res_test_write_lstinfo.lst") - with open(outfile, "r") as outf, open(exp_file, "r") as expf: - for line_out, line_exp in zip(outf, expf): - assert line_out == line_exp - os.remove(outfile) + list_file = os.path.join("toto", "list_genomes.txt") + utils.write_lstinfo(list_file, genomes, GENEPATH) + outfile = os.path.join(GENEPATH, "LSTINFO-list_genomes.lst") + exp_file = os.path.join(DATA_DIR, "exp_files", "res_test_write_lstinfo.lst") + assert utilities.compare_order_content(outfile, exp_file) def test_write_lstinfo_nogenome(): @@ -222,15 +254,13 @@ def test_write_lstinfo_nogenome(): only header. """ genomes = {} - list_file = os.path.join("test", "data", "annotate", "list_genomes.txt") - outdir = os.path.join("test", "data", "annotate") - utils.write_lstinfo(list_file, genomes, outdir) - outfile = os.path.join(outdir, "LSTINFO-list_genomes.lst") + list_file = os.path.join("toto", "list_genomes.txt") + utils.write_lstinfo(list_file, genomes, GENEPATH) + outfile = os.path.join(GENEPATH, "LSTINFO-list_genomes.lst") with open(outfile, "r") as outf: all_lines = outf.readlines() assert len(all_lines) == 1 - assert all_lines[0] == "gembase_name\torig_name\tgsize\tnb_conts\tL90\n" - os.remove(outfile) + assert all_lines[0] == "gembase_name\torig_name\tto_annotate\tgsize\tnb_conts\tL90\n" def test_sort_gene(): @@ -241,7 +271,7 @@ def test_sort_gene(): genomes = ["genome.0417.00010", "toto.0417.00010", "genome1.0417.00002", "genome.0417.00015", "totn.0417.00010", "genome.0417.00009", "genome.0517.00001", "toto.0417.00011"] - sorted_genomes = sorted(genomes, key=utils.sort_genomes) + sorted_genomes = sorted(genomes, key=utils.sort_genomes_by_name) exp = ["genome.0517.00001", "genome.0417.00009", "genome.0417.00010", "genome.0417.00015", "genome1.0417.00002", "totn.0417.00010", "toto.0417.00010", "toto.0417.00011"] @@ -261,7 +291,7 @@ def test_sort_gene_tuple(): "name6": ["genome.0417.00009", "path/to/genome", 123456, 50, 3], "name7": ["genome.0517.00001", "path/to/genome", 123456, 50, 3], "name8": ["toto.0417.00011", "path/to/genome", 123456, 50, 3], } - sorted_genomes = sorted(genomes.items(), key=utils.sort_genomes) + sorted_genomes = sorted(genomes.items(), key=utils.sort_genomes_by_name) exp = [("name7", genomes["name7"]), ("name6", genomes["name6"]), ("name1", genomes["name1"]), ("name4", genomes["name4"]), ("name3", genomes["name3"]), ("name5", genomes["name5"]), @@ -275,18 +305,40 @@ def test_sort_gene_noformat(): genomes in alphabetical order """ # genomes = {genome_orig, [gembase, path, gsize, nbcont, L90]} - genomes = {"name1": ["genome.0417.00010", "path/to/genome", 123456, 50, 3], - "name2": ["toto.0417.00010", "path/to/genome", 123456, 50, 3], - "name3": ["genome1.0417.00002", "path/to/genome", 123456, 50, 3], - "name4": ["genome.0417.00015", "path/to/genome", 123456, 50, 3], - "name5": ["mygenome.0416", "path/to/genome", 123456, 50, 3], - "name6": ["genome.0417", "path/to/genome", 123456, 50, 3], - "name7": ["genome.0517.00001", "path/to/genome", 123456, 50, 3], + genomes = {"name1": ["genome", "path/to/genome", 123456, 50, 3], + "name2": ["toto", "path/to/genome", 123456, 50, 3], + "name3": ["genome1 ", "path/to/genome", 123456, 50, 3], + "name4": ["genome1bis", "path/to/genome", 123456, 50, 3], + "name5": ["mygenome", "path/to/genome", 123456, 50, 3], + "name6": ["agenome_nogembase", "path/to/genome", 123456, 50, 3], + "name7": ["agenome_nogembase2", "path/to/genome", 123456, 50, 3], "name8": ["toto.0417.00011", "path/to/genome", 123456, 50, 3], } - sorted_genomes = sorted(genomes.items(), key=utils.sort_genomes) - exp = [("name7", genomes["name7"]), ("name1", genomes["name1"]), - ("name4", genomes["name4"]), ("name6", genomes["name6"]), - ("name3", genomes["name3"]), ("name5", genomes["name5"]), + sorted_genomes = sorted(genomes.items(), key=utils.sort_genomes_by_name) + exp = [("name6", genomes["name6"]), ("name7", genomes["name7"]), + ("name1", genomes["name1"]), ("name3", genomes["name3"]), + ("name4", genomes["name4"]), ("name5", genomes["name5"]), + ("name2", genomes["name2"]), ("name8", genomes["name8"])] + assert sorted_genomes == exp + + +def test_sort_genome_l90(): + """ + Test that when genomes are not in the gembase format, it returns + genomes in alphabetical order + """ + # genomes = {genome_orig, [gembase, path, gsize, nbcont, L90]} + genomes = {"name1": ["genome", "path/to/genome", 123456, 1, 2], + "name2": ["toto", "path/to/genome", 123456, 100, 4], + "name3": ["genome1 ", "path/to/genome", 123456, 10, 2], + "name4": ["genome1bis", "path/to/genome", 123456, 11, 2], + "name5": ["mygenome", "path/to/genome", 123456, 8, 3], + "name6": ["agenome_nogembase", "path/to/genome", 123456, 3, 1], + "name7": ["agenome_nogembase2", "path/to/genome", 123456, 4, 1], + "name8": ["toto.0417.00011", "path/to/genome", 123456, 50, 5], } + sorted_genomes = sorted(genomes.items(), key=utils.sort_genomes_l90_nbcont) + exp = [("name6", genomes["name6"]), ("name7", genomes["name7"]), + ("name1", genomes["name1"]), ("name3", genomes["name3"]), + ("name4", genomes["name4"]), ("name5", genomes["name5"]), ("name2", genomes["name2"]), ("name8", genomes["name8"])] assert sorted_genomes == exp @@ -331,34 +383,33 @@ def test_sort_proteins_other_format(): assert sorted_prot == exp -def test_sort_proteins_error_format1(capsys): +def test_sort_proteins_error_format1(caplog): """ Test that when a protein name does not follow the format <alpha_num>_<num>, it gives an error. """ + caplog.set_level(logging.DEBUG) proteins = ["ESCO.0417.00010.i0001_12354", "ESCO.0617.00001.i0001_005", "ESCO.0517.00001.i0001_12354", "error-protein"] with pytest.raises(SystemExit): sorted(proteins, key=utils.sort_proteins) - out, err = capsys.readouterr() assert ("ERROR: Protein error-protein does not have the required format. It must contain, " "at least <alpha-num>_<num_only>, and at best " "<name>.<date>.<strain_num>.<contig_info>_<prot_num>. " - "Please change its name.") in err + "Please change its name.") in caplog.text -def test_read_genomes_nofile(capsys): +def test_read_genomes_nofile(caplog): """ Test that when the genome list file provided does not exist, it ends the program with an error message """ with pytest.raises(SystemExit): utils.read_genomes("toto.txt", "TOTO", "0417", "db/path", "tmppath") - out, err = capsys.readouterr() - assert "ERROR: Your list file " in err - assert "toto.txt" in err - assert "does not exist. Please provide a list file." in err - assert "Ending program." in err + assert "ERROR: Your list file " in caplog.text + assert "toto.txt" in caplog.text + assert "does not exist. Please provide a list file." in caplog.text + assert "Ending program." in caplog.text def test_read_genomes_wrongname(): @@ -368,26 +419,23 @@ def test_read_genomes_wrongname(): """ name = "ESCO" date = "0417" - dbpath = os.path.join("test", "data", "annotate", "genomes") + dbpath = os.path.join(DATA_DIR, "genomes") tmppath = "tmppath" - list_file = os.path.join("test", "data", "annotate", "test_files", - "list_genomes-wrongNames.txt") + list_file = os.path.join(DATA_DIR, "test_files", "list_genomes-wrongNames.txt") genomes = utils.read_genomes(list_file, name, date, dbpath, tmppath) assert genomes == {} -def test_read_genomes_ok(capsys): +def test_read_genomes_ok(caplog): """ Test that when the list file contains genomes existing, it returns the expected list of genomes """ - logfile_base = "test_utils" - utils.init_logger(logfile_base, 0, '', verbose=1) name = "ESCO" date = "0417" - dbpath = os.path.join("test", "data", "annotate", "genomes") + dbpath = os.path.join(DATA_DIR, "genomes") tmppath = "tmppath" - list_file = os.path.join("test", "data", "annotate", "test_files", "list_genomes.lst") + list_file = os.path.join(DATA_DIR, "test_files", "list_genomes.lst") genomes = utils.read_genomes(list_file, name, date, dbpath, tmppath) exp = {"A_H738.fasta": ["ESCO.0417"], "B2_A3_5.fasta-split5N.fna-short-contig.fna": ["ESCO.0417"], @@ -395,25 +443,19 @@ def test_read_genomes_ok(capsys): "genome3.fasta": ["ESCO.0512"], "genome4.fasta": ["TOTO.0417"], "genome5.fasta": ["TOTO.0114"]} assert exp == genomes - _, err = capsys.readouterr() - assert "genome.fst genome file does not exist. It will be ignored." in err - os.remove(logfile_base + ".log") - os.remove(logfile_base + ".log.details") - os.remove(logfile_base + ".log.err") + assert "genome.fst genome file does not exist. It will be ignored." in caplog.text -def test_read_genomes_errors(capsys): +def test_read_genomes_errors(caplog): """ Test that when the list file contains errors in name and date provided, it returns the expected errors, and the expected genome list. """ - logfile_base = "test_utils" - utils.init_logger(logfile_base, 0, '', verbose=1) name = "ESCO" date = "0417" - dbpath = os.path.join("test", "data", "annotate", "genomes") + dbpath = os.path.join(DATA_DIR, "genomes") tmppath = "tmppath" - list_file = os.path.join("test", "data", "annotate", "test_files", "list_genomes-errors.txt") + list_file = os.path.join(DATA_DIR, "test_files", "list_genomes-errors.txt") genomes = utils.read_genomes(list_file, name, date, dbpath, tmppath) exp = {"A_H738.fasta": ["ESCO.0417"], "B2_A3_5.fasta-split5N.fna-short-contig.fna": ["ESCO.0417"], @@ -421,57 +463,50 @@ def test_read_genomes_errors(capsys): "genome3.fasta": ["ESCO.0512"], "genome4.fasta": ["ESCO.0417"], "genome5.fasta": ["ESCO.0417"]} assert genomes == exp - _, err = capsys.readouterr() assert ("Invalid name/date given for genome A_H738.fasta. Only put " "4 alphanumeric characters in your date and name. For " "this genome, the default name (ESCO) and date (0417) will " - "be used.") in err - assert ( - "Invalid name abc given for genome B2_A3_5.fasta-split5N.fna-short-contig.fna. Only put " + "be used.") in caplog.text + assert ("Invalid name abc given for genome B2_A3_5.fasta-split5N.fna-short-contig.fna. Only put " "4 alphanumeric characters in your date and name. For " "this genome, the default name (ESCO) will " - "be used.") in err + "be used.") in caplog.text assert ("Invalid date 152 given for genome H299_H561.fasta. Only put " "4 alphanumeric characters in your date and name. For " "this genome, the default date (0417) will " - "be used.") in err + "be used.") in caplog.text assert ("Invalid date 1-03 given for genome genome2.fasta. Only put " "4 alphanumeric characters in your date and name. For " "this genome, the default date (0417) will " - "be used.") in err + "be used.") in caplog.text assert ("genome.fst genome file does not exist. " - "It will be ignored.") in err + "It will be ignored.") in caplog.text assert ("Invalid name a/b2 given for genome genome3.fasta. Only put " "4 alphanumeric characters in your date and name. For " "this genome, the default name (ESCO) will " - "be used.") in err + "be used.") in caplog.text assert ("Invalid name #esc given for genome genome5.fasta. Only put " "4 alphanumeric characters in your date and name. For " "this genome, the default name (ESCO) will " - "be used.") in err + "be used.") in caplog.text assert ("Invalid date 1_14 given for genome genome5.fasta. Only put " "4 alphanumeric characters in your date and name. For " "this genome, the default date (0417) will " - "be used.") in err - os.remove(logfile_base + ".log") - os.remove(logfile_base + ".log.details") - os.remove(logfile_base + ".log.err") + "be used.") in caplog.text -def test_read_genomes_multi_files(capsys): +def test_read_genomes_multi_files(caplog): """ Test that when the list file contains several filenames for 1 same genome, it returns the expected genome list, the expected errors (when some genome files do not exist) and the expected concatenated files. """ - logfile_base = "test_utils" - utils.init_logger(logfile_base, 0, '', verbose=1) name = "ESCO" date = "0417" - dbpath = os.path.join("test", "data", "annotate", "genomes") - tmppath = os.path.join("test", "data", "annotate") - list_file = os.path.join("test", "data", "annotate", "test_files", - "list_genomes-multi-files.txt") + tmppath = os.path.join(GENEPATH, "tmppath") + os.mkdir(tmppath) + dbpath = os.path.join(DATA_DIR, "genomes") + list_file = os.path.join(DATA_DIR, "test_files", "list_genomes-multi-files.txt") genomes = utils.read_genomes(list_file, name, date, dbpath, tmppath) exp = {"A_H738.fasta-all.fna": ["ESCO.0417"], "H299_H561.fasta-all.fna": ["ABCD.0417"], "genome2.fasta": ["TOTO.0417"], @@ -479,124 +514,284 @@ def test_read_genomes_multi_files(capsys): "genome5.fasta": ["TOTO.0114"]} assert exp == genomes # Check error messages - _, err = capsys.readouterr() assert ("genome.fna genome file does not exist. Its file will be ignored " - "when concatenating ['H299_H561.fasta', 'genome6.fasta', 'genome.fna']") in err - assert "genome.fst genome file does not exist. It will be ignored." in err + "when concatenating ['H299_H561.fasta', 'genome6.fasta', 'genome.fna']") in caplog.text + assert "genome.fst genome file does not exist. It will be ignored." in caplog.text assert ("toto.fst genome file does not exist. Its file will be ignored " - "when concatenating ['toto.fst', 'toto.fasta', 'genome.fst']") in err + "when concatenating ['toto.fst', 'toto.fasta', 'genome.fst']") in caplog.text assert ("toto.fasta genome file does not exist. Its file will be ignored " - "when concatenating ['toto.fst', 'toto.fasta', 'genome.fst']") in err + "when concatenating ['toto.fst', 'toto.fasta', 'genome.fst']") in caplog.text assert ("genome.fst genome file does not exist. Its file will be ignored " - "when concatenating ['toto.fst', 'toto.fasta', 'genome.fst']") in err + "when concatenating ['toto.fst', 'toto.fasta', 'genome.fst']") in caplog.text assert ("None of the genome files in ['toto.fst', 'toto.fasta', 'genome.fst'] exist. " - "This genome will be ignored.") in err + "This genome will be ignored.") in caplog.text # Check that files were concatenated as expected concat1 = os.path.join(tmppath, "A_H738.fasta-all.fna") exp_concat1 = os.path.join(dbpath, "A_H738-and-B2_A3_5.fna") concat2 = os.path.join(tmppath, "H299_H561.fasta-all.fna") exp_concat2 = os.path.join(dbpath, "H299_H561-and-genome6.fna") - with open(concat1, "r") as outf, open(exp_concat1, "r") as expf: - for line_out, line_exp in zip(outf, expf): - assert line_out == line_exp - with open(concat2, "r") as outf, open(exp_concat2, "r") as expf: - for line_out, line_exp in zip(outf, expf): - assert line_out == line_exp - os.remove(concat1) - os.remove(concat2) - os.remove(logfile_base + ".log") - os.remove(logfile_base + ".log.details") - os.remove(logfile_base + ".log.err") + assert utilities.compare_order_content(concat1, exp_concat1) + assert utilities.compare_order_content(concat2, exp_concat2) + + +def test_read_genomes_info_nofile(caplog): + """ + Read lstinfo file and get all genomes information + Check that when the file does not exist, it exits with appropriate error message + """ + caplog.set_level(logging.DEBUG) + name = "ESCO" + with pytest.raises(SystemExit): + utils.read_genomes_info("toto.txt", name) + assert 'Reading given information on your genomes in toto.txt' in caplog.text + assert ("ERROR: The info file toto.txt that you gave does not exist. " + "Please provide the right path/name for this file.") in caplog.text + assert "Ending program" in caplog.text + + +def test_read_genomes_info_wrongheader(caplog): + """ + Read lstinfo file and get all genomes information + When wrong header (not all required columns), exits and appropriate error message + """ + caplog.set_level(logging.DEBUG) + name = "ESCO" + lstinfo_file = os.path.join(DATA_DIR, "test_files", "lstinfo-wrong-header.lst") + with pytest.raises(SystemExit): + utils.read_genomes_info(lstinfo_file, name) + assert ("Reading given information on your genomes in " + "test/data/annotate/test_files/lstinfo-wrong-header.lst") in caplog.text + assert ("ERROR: It seems that your info file test/data/annotate/test_files/lstinfo-wrong-header.lst " + "does not have a header, " + "or this header does not have, at least, the required columns tab separated: ") in caplog.text + assert("to_annotate, gsize nb_conts and L90 (in any order).\nEnding program.") in caplog.text -def test_check_resdirlst(capsys): +def test_read_genomes_info_noheader(caplog): + """ + Read lstinfo file and get all genomes information + When no header, exits and appropriate error message + """ + caplog.set_level(logging.DEBUG) + name = "ESCO" + lstinfo_file = os.path.join(DATA_DIR, "test_files", "lstinfo-no-header.lst") + with pytest.raises(SystemExit): + utils.read_genomes_info(lstinfo_file, name) + assert ("Reading given information on your genomes in " + "test/data/annotate/test_files/lstinfo-no-header.lst") in caplog.text + assert ("ERROR: It seems that your info file test/data/annotate/test_files/lstinfo-no-header.lst " + "does not have a header, " + "or this header does not have, at least, the required columns tab separated: ") in caplog.text + assert("to_annotate, gsize nb_conts and L90 (in any order).\nEnding program.") in caplog.text + + +def test_read_genomes_info_not_int(caplog): + """ + Read lstinfo file and get all genomes information + When a nbcont column is not an int for 1 genome, writes error message and ignores the genome + """ + caplog.set_level(logging.DEBUG) + name = "ESCO" + lstinfo_file = os.path.join(DATA_DIR, "test_files", "lstinfo-not-int-nbcont.lst") + genomes = utils.read_genomes_info(lstinfo_file, name) + assert ("Reading given information on your genomes in " + "test/data/annotate/test_files/lstinfo-not-int-nbcont.lst") in caplog.text + assert 'Found 2 genomes in total' in caplog.text + assert ("For genome A_H738-and-B2_A3_5, at least one of your columns 'gsize', " + "'nb_conts' or 'L90' contains a non numeric value. This genome will be ignored") in caplog.text + exp = {"genome1.fasta": + ["genome1", "test/data/annotate/genomes/genome1.fasta", + "test/data/annotate/genomes/genome1.fasta", 800, 6, 5], + "genome7.fasta": + ["genome7", "test/data/annotate/genomes/genome7.fasta", + "test/data/annotate/genomes/genome7.fasta", 79705, 80, 65]} + assert genomes == exp + + +def test_read_genomes_info_not_all_filled(caplog): + """ + Read lstinfo file and get all genomes information + 1 column not filled for at least 1 genome: exits and write appropriate error message + """ + caplog.set_level(logging.DEBUG) + name = "ESCO" + lstinfo_file = os.path.join(DATA_DIR, "test_files", "lstinfo-not-all-filled.lst") + with pytest.raises(SystemExit): + utils.read_genomes_info(lstinfo_file, name) + assert ("Reading given information on your genomes in " + "test/data/annotate/test_files/lstinfo-not-all-filled.lst") in caplog.text + assert ("ERROR: Check that all fields of test/data/annotate/test_files/lstinfo-not-all-filled.lst " + "are filled in each line (can be 'NA')") in caplog.text + + +def test_read_genomes_info_no_path(caplog): + """ + Read lstinfo file and get all genomes information + When a genome in lstinfo does not have its corresponding file, write appropriate error message + and ignores genome + """ + caplog.set_level(logging.DEBUG) + name = "ESCO" + lstinfo_file = os.path.join(DATA_DIR, "test_files", "lstinfo-miss-1genome.lst") + genomes = utils.read_genomes_info(lstinfo_file, name) + assert ("data/annotate/genomes/A_H738-and-B2_A3_5.fna genome file does not exist. " + "This genome will be ignored") in caplog.text + assert 'Found 2 genomes in total' in caplog.text + exp = {"genome1.fasta": + ["genome1", "test/data/annotate/genomes/genome1.fasta", + "test/data/annotate/genomes/genome1.fasta", 800, 6, 5], + "genome7.fasta": + ["genome7", "test/data/annotate/genomes/genome7.fasta", + "test/data/annotate/genomes/genome7.fasta", 79705, 80, 65]} + assert genomes == exp + + +def test_read_genomes_info_no_genomes(caplog): + """ + Read lstinfo file and get all genomes information + When no genome in lstinfo correspond to existing paths, exits and appropriate error message + """ + caplog.set_level(logging.DEBUG) + name = "ESCO" + lstinfo_file = os.path.join(DATA_DIR, "test_files", "lstinfo-no-genome.lst") + with pytest.raises(SystemExit): + utils.read_genomes_info(lstinfo_file, name) + assert ("Reading given information on your genomes in " + "test/data/annotate/test_files/lstinfo-no-genome.lst") in caplog.text + assert ("no genome listed in test/data/annotate/test_files/lstinfo-no-genome.lst " + "were found.") in caplog.text + + +def test_read_genomes_info_ok(caplog): + """ + Read lstinfo file and get all genomes information and returns it as expected + """ + caplog.set_level(logging.DEBUG) + list_file = os.path.join(DATA_DIR, "test_files", "lstinfo.lst") + name = "ESCO" + genomes = utils.read_genomes_info(list_file, name) + assert ("Reading given information on your genomes in " + "test/data/annotate/test_files/lstinfo.lst") in caplog.text + assert 'Found 3 genomes in total' in caplog.text + exp = {"genome1.fasta": + ["genome1", "test/data/annotate/genomes/genome1.fasta", + "test/data/annotate/genomes/genome1.fasta", 800, 6, 5], + "A_H738-and-B2_A3_5.fna": + ["A_H738-and-B2_A3_5", "test/data/annotate/genomes/A_H738-and-B2_A3_5.fna", + "test/data/annotate/genomes/A_H738-and-B2_A3_5.fna", 7000, 78, 6], + "genome7.fasta": + ["genome7", "test/data/annotate/genomes/genome7.fasta", + "test/data/annotate/genomes/genome7.fasta", 79705, 80, 65]} + assert genomes == exp + + +def test_read_genomes_info_date_ok(caplog): + """ + Read lstinfo file and get all genomes information + """ + caplog.set_level(logging.DEBUG) + list_file = os.path.join(DATA_DIR, "test_files", "lstinfo.lst") + name = "ESCO" + date = "0720" + genomes = utils.read_genomes_info(list_file, name, date=date) + assert ("Reading given information on your genomes in " + "test/data/annotate/test_files/lstinfo.lst") in caplog.text + assert 'Found 3 genomes in total' in caplog.text + exp = {"test/data/annotate/genomes/genome1.fasta": + ["ESCO.0720", "test/data/annotate/genomes/genome1.fasta", + "test/data/annotate/genomes/genome1.fasta", 800, 6, 5], + "test/data/annotate/genomes/A_H738-and-B2_A3_5.fna": + ["ESCO.0720", "test/data/annotate/genomes/A_H738-and-B2_A3_5.fna", + "test/data/annotate/genomes/A_H738-and-B2_A3_5.fna", 7000, 78, 6], + "test/data/annotate/genomes/genome7.fasta": + ["ESCO.0720", "test/data/annotate/genomes/genome7.fasta", + "test/data/annotate/genomes/genome7.fasta", 79705, 80, 65]} + assert genomes == exp + + +def test_check_format(): + """ + test that format is ok or not + """ + assert utils.check_format("ESC1") + assert utils.check_format("1234") + assert not utils.check_format("12") + assert not utils.check_format("ESC*") + + +def test_check_resdirlst(caplog): """ Test that when the result directory already contains .lst files in LSTINFO, program ends with an error message. """ - resdir = os.path.join("test", "data", "annotate", "test_check_resdir") # Create output directory with a lst file in LSTINFO - os.makedirs(os.path.join(resdir, "LSTINFO")) - open(os.path.join(resdir, "LSTINFO", "toto.lst"), "w").close() + os.makedirs(os.path.join(GENEPATH, "LSTINFO")) + open(os.path.join(GENEPATH, "LSTINFO", "toto.lst"), "w").close() with pytest.raises(SystemExit): - utils.check_out_dirs(resdir) - out, err = capsys.readouterr() + utils.check_out_dirs(GENEPATH) assert ("ERROR: Your output directory already has .lst files in the " "LSTINFO folder. Provide another result directory, or remove the " - "files in this one.") in err - shutil.rmtree(resdir) + "files in this one.") in caplog.text -def test_check_resdirprt(capsys): +def test_check_resdirprt(caplog): """ Test that when the result directory already contains .prt files in Proteins, program ends with an error message. """ - resdir = os.path.join("test", "data", "annotate", "test_check_resdir") # Create output directory with a lst file in LSTINFO - os.makedirs(os.path.join(resdir, "Proteins")) - open(os.path.join(resdir, "Proteins", "toto.prt"), "w").close() + os.makedirs(os.path.join(GENEPATH, "Proteins")) + open(os.path.join(GENEPATH, "Proteins", "toto.prt"), "w").close() with pytest.raises(SystemExit): - utils.check_out_dirs(resdir) - out, err = capsys.readouterr() + utils.check_out_dirs(GENEPATH) assert ("ERROR: Your output directory already has .prt files in the " "Proteins folder. Provide another result directory, or remove the " - "files in this one.") in err - shutil.rmtree(resdir) + "files in this one.") in caplog.text -def test_check_resdirgen(capsys): +def test_check_resdirgen(caplog): """ Test that when the result directory already contains .gen files in Genes, program ends with an error message. """ - resdir = os.path.join("test", "data", "annotate", "test_check_resdir") # Create output directory with a lst file in LSTINFO - os.makedirs(os.path.join(resdir, "Genes")) - open(os.path.join(resdir, "Genes", "toto.gen"), "w").close() + os.makedirs(os.path.join(GENEPATH, "Genes")) + open(os.path.join(GENEPATH, "Genes", "toto.gen"), "w").close() with pytest.raises(SystemExit): - utils.check_out_dirs(resdir) - out, err = capsys.readouterr() + utils.check_out_dirs(GENEPATH) assert ("ERROR: Your output directory already has .gen files in the " "Genes folder. Provide another result directory, or remove the " - "files in this one.") in err - shutil.rmtree(resdir) + "files in this one.") in caplog.text -def test_check_resdirrep(capsys): +def test_check_resdirrep(caplog): """ Test that when the result directory already contains .fna files in Replicons, program ends with an error message. """ - resdir = os.path.join("test", "data", "annotate", "test_check_resdir") # Create output directory with a lst file in LSTINFO - os.makedirs(os.path.join(resdir, "Replicons")) - open(os.path.join(resdir, "Replicons", "toto.fna"), "w").close() + os.makedirs(os.path.join(GENEPATH, "Replicons")) + open(os.path.join(GENEPATH, "Replicons", "toto.fna"), "w").close() with pytest.raises(SystemExit): - utils.check_out_dirs(resdir) - out, err = capsys.readouterr() + utils.check_out_dirs(GENEPATH) assert ("ERROR: Your output directory already has .fna files in the " "Replicons folder. Provide another result directory, or remove the " - "files in this one.") in err - shutil.rmtree(resdir) + "files in this one.") in caplog.text -def test_check_resdirgff(capsys): +def test_check_resdirgff(caplog): """ Test that when the result directory already contains .fna files in Replicons, program ends with an error message. """ - resdir = os.path.join("test", "data", "annotate", "test_check_resdir_gff") # Create output directory with a lst file in LSTINFO - os.makedirs(os.path.join(resdir, "gff3")) - open(os.path.join(resdir, "gff3", "toto.gff"), "w").close() + os.makedirs(os.path.join(GENEPATH, "gff3")) + open(os.path.join(GENEPATH, "gff3", "toto.gff"), "w").close() with pytest.raises(SystemExit): - utils.check_out_dirs(resdir) - out, err = capsys.readouterr() + utils.check_out_dirs(GENEPATH) assert ("ERROR: Your output directory already has .gff files in the " "gff3 folder. Provide another result directory, or remove the " - "files in this one.") in err - shutil.rmtree(resdir) + "files in this one.") in caplog.text def test_check_resdirotherext(): @@ -604,12 +799,10 @@ def test_check_resdirotherext(): Test that when the result directory contains txt files in Replicons dir, there is no problem. """ - resdir = os.path.join("test", "data", "annotate", "test_check_resdir") # Create output directory with a lst file in LSTINFO - os.makedirs(os.path.join(resdir, "Replicons")) - open(os.path.join(resdir, "Replicons", "toto.txt"), "w").close() - utils.check_out_dirs(resdir) - shutil.rmtree(resdir) + os.makedirs(os.path.join(GENEPATH, "Replicons")) + open(os.path.join(GENEPATH, "Replicons", "toto.txt"), "w").close() + utils.check_out_dirs(GENEPATH) def test_check_resdirnodir(): @@ -619,19 +812,18 @@ def test_check_resdirnodir(): utils.check_out_dirs("totoresdir") -def test_run_cmd_error_noquit(capsys): +def test_run_cmd_error_noquit(caplog): """ Test that when we try to run a command which does not exist, it returns an error message, but does not exit the program (eof=False). """ cmd = "toto" error = "error trying to run toto" - assert utils.run_cmd(cmd, error) == -1 - out, err = capsys.readouterr() - assert "error trying to run toto: toto does not exist" in err + assert utils.run_cmd(cmd, error) == 1 + assert "toto does not exist" in caplog.text -def test_run_cmd_error_noquit_logger(capsys): +def test_run_cmd_error_noquit_logger(caplog): """ Test that when we try to run a command which does not exist, it returns an error message, but does not exit the program (eof=False). With a given logger where error is written. @@ -639,12 +831,11 @@ def test_run_cmd_error_noquit_logger(capsys): cmd = "toto" logger = logging.getLogger("default") error = "error trying to run toto" - assert utils.run_cmd(cmd, error, logger=logger) == -1 - out, err = capsys.readouterr() - assert "error trying to run toto: toto does not exist" in err + assert utils.run_cmd(cmd, error, logger=logger) == 1 + assert "toto does not exist" in caplog.text -def test_run_cmd_error_quit(capsys): +def test_run_cmd_error_quit(caplog): """ Test that when we try to run a command which does not exist, it returns an error message, and exits the program (eof=True) @@ -653,8 +844,7 @@ def test_run_cmd_error_quit(capsys): error = "error trying to run toto" with pytest.raises(SystemExit): utils.run_cmd(cmd, error, eof=True) - out, err = capsys.readouterr() - assert "error trying to run toto: toto does not exist" in err + assert "toto does not exist" in caplog.text def test_run_cmd_retcode_non0(caplog): @@ -680,30 +870,30 @@ def test_run_cmd_retcode_non0_quit(caplog): assert error in caplog.text -def test_run_cmd_error_stderrfile(): +def test_run_cmd_error_stderrfile(caplog): """ Test that when we try to run a command which does not exist, and direct its output to a file instead of stderr, we have the expected error written in the given error file. """ cmd = "toto" error = "error trying to run toto" - outfile = open(os.path.join("test", "data", "annotate", "stderr_run_cmd.txt"), "w") - assert utils.run_cmd(cmd, error, stderr=outfile) == -1 + outfile = open(os.path.join(GENEPATH, "stderr_run_cmd.txt"), "w") + assert utils.run_cmd(cmd, error, stderr=outfile) == 1 + assert "error: toto does not exist" in caplog.text outfile.close() - os.remove(os.path.join("test", "data", "annotate", "stderr_run_cmd.txt")) -def test_run_cmd_error_stdoutfile(): +def test_run_cmd_error_stdoutfile(caplog): """ Test that when we try to run a command which does not exist, and direct its output to a file instead of stderr, we have the expected error written in the given error file. """ cmd = "toto" error = "error trying to run toto" - outfile = open(os.path.join("test", "data", "annotate", "stdout_run_cmd.txt"), "w") - assert utils.run_cmd(cmd, error, stdout=outfile) == -1 + outfile = open(os.path.join(GENEPATH, "stdout_run_cmd.txt"), "w") + assert utils.run_cmd(cmd, error, stdout=outfile) == 1 + assert "error: toto does not exist" in caplog.text outfile.close() - os.remove(os.path.join("test", "data", "annotate", "stdout_run_cmd.txt")) def test_rename_contigs(): @@ -712,16 +902,20 @@ def test_rename_contigs(): and save the output sequence to the given res_path. Check that the output file is as expected. """ - gpath = os.path.join("test", "data", "annotate", "genomes", "H299_H561.fasta") + gpath = os.path.join(DATA_DIR, "genomes", "H299_H561.fasta") gembase_name = "ESCO.0216.00005" - res_path = os.path.join("test", "data", "annotate") - outfile = os.path.join(res_path, "H299_H561.fasta-short-contig.fna") - exp_file = os.path.join("test", "data", "annotate", "exp_files", "res_H299_H561-ESCO00005.fna") - utils.rename_genome_contigs(gembase_name, gpath, outfile) - with open(exp_file, "r") as expf, open(outfile, "r") as of: - for line_exp, line_seq in zip(expf, of): - assert line_exp == line_seq - os.remove(outfile) + outfile = os.path.join(GENEPATH, "H299_H561.fasta-short-contig.fna") + exp_file = os.path.join(DATA_DIR, "exp_files", "res_H299_H561-ESCO00005.fna") + contigs, sizes = utils.get_genome_contigs_and_rename(gembase_name, gpath, outfile) + print(sizes) + assert contigs == [">ESCO.0216.00005.0001\t>H561_S27 L001_R1_001_(paired)_contig_1", + ">ESCO.0216.00005.0002\t>H561_S28 L001_R1_001_(paired)_contig_2", + ">ESCO.0216.00005.0003\t>H561_S29 L001_R1_001_(paired)_contig_115"] + assert sizes == [">ESCO.0216.00005.0001\t3480", + ">ESCO.0216.00005.0002\t7080", + ">ESCO.0216.00005.0003\t2583"] + assert utilities.compare_order_content(outfile, exp_file) + def test_cat_nobar(capsys): @@ -730,37 +924,25 @@ def test_cat_nobar(capsys): contains what is expected (concatenation of content of all input files) """ import glob - list_files = glob.glob(os.path.join("test", "data", "annotate", "genomes", "*.fasta")) - outfile = "test_catfile.txt" + list_files = glob.glob(os.path.join(DATA_DIR, "genomes", "*.fasta")) + outfile = os.path.join(GENEPATH, "test_catfile.txt") utils.cat(list_files, outfile) - exp_file = os.path.join("test", "data", "annotate", "exp_files", - "res_test_cat_genomes_fasta.fst") - with open(exp_file, 'r') as expf, open(outfile, 'r') as outf: - lines_exp = expf.readlines() - lines_out = outf.readlines() - assert set(lines_exp) == set(lines_out) - _, err = capsys.readouterr() - assert "/{} (".format(len(list_files)) not in err - os.remove(outfile) + exp_file = os.path.join(DATA_DIR, "exp_files", "res_test_cat_genomes_fasta.fst") + assert utilities.compare_file_content(outfile, exp_file) -def test_cat_bar(): +def test_cat_bar(caplog): """ Check that when cat is called on a list of several files, the output file contains what is expected (concatenation of content of all input files) """ import glob - list_files = glob.glob(os.path.join("test", "data", "annotate", "genomes", "*.fasta")) - outfile = "test_catfile.txt" + list_files = glob.glob(os.path.join(DATA_DIR, "genomes", "*.fasta")) + outfile = os.path.join(GENEPATH, "test_catfile.txt") title = "test cat progressbar" utils.cat(list_files, outfile, title=title) - exp_file = os.path.join("test", "data", "annotate", "exp_files", - "res_test_cat_genomes_fasta.fst") - with open(exp_file, 'r') as expf, open(outfile, 'r') as outf: - lines_exp = expf.readlines() - lines_out = outf.readlines() - assert set(lines_exp) == set(lines_out) - os.remove(outfile) + exp_file = os.path.join(DATA_DIR, "exp_files", "res_test_cat_genomes_fasta.fst") + assert utilities.compare_file_content(outfile, exp_file) def test_detail(): @@ -779,15 +961,14 @@ def test_grep(): "123toto.txt other letters\n" "123toto:txt otherletters\n" "123toto.txt otherletters\n") - filein = "filein.txt" + filein = os.path.join(GENEPATH, "filein.txt") with open(filein, "w") as ff: ff.write(lines) - pattern = "[0-9]+toto\.txt [a-z]{6}" + pattern = "[0-9]+toto[.]txt [a-z]{6}" lines_grep = utils.grep(filein, pattern) exp = ["1toto.txt otherletters", "123toto.txt otherletters"] assert exp == lines_grep - os.remove(filein) def test_grep_count(): @@ -799,13 +980,12 @@ def test_grep_count(): "123toto.txt other letters\n" "123toto:txt otherletters\n" "123toto.txt otherletters\n") - filein = "filein.txt" + filein = os.path.join(GENEPATH, "filein.txt") with open(filein, "w") as ff: ff.write(lines) - pattern = "[0-9]+toto\.txt [a-z]{6}" + pattern = "[0-9]+toto[.]txt [a-z]{6}" lines_grep = utils.grep(filein, pattern, counts=True) assert lines_grep == 2 - os.remove(filein) def test_count_lines(): @@ -817,12 +997,11 @@ def test_count_lines(): "123toto.txt other letters\n" "123toto:txt otherletters\n" "123toto.txt otherletters\n") - filein = "filein.txt" + filein = os.path.join(GENEPATH, "filein.txt") with open(filein, "w") as ff: ff.write(lines) nbline = utils.count(filein) assert nbline == 5 - os.remove(filein) def test_count_words(): @@ -834,22 +1013,20 @@ def test_count_words(): "123toto.txt other letters\n" "123toto:txt otherletters\n" "123toto.txt otherletters\n") - filein = "filein.txt" + filein = os.path.join(GENEPATH, "filein.txt") with open(filein, "w") as ff: ff.write(lines) nbword = utils.count(filein, get="words") assert nbword == 10 - os.remove(filein) -def test_count_error(capsys): +def test_count_error(caplog): """ test that when we want to count something else than 'lines' or 'words', it returns an error """ with pytest.raises(SystemExit): utils.count("filein", get="letters") - _, err = capsys.readouterr() - assert "Choose what you want to count among ['lines', 'words']" in err + assert "Choose what you want to count among ['lines', 'words']" in caplog.text def test_save_bin(): @@ -860,13 +1037,12 @@ def test_save_bin(): obj2 = [1, 2, 5, "toto", "plop"] obj3 = "a string" objects = [obj1, obj2, obj3] - fileout = "filout.bin" + fileout = os.path.join(GENEPATH, "fileout.txt") utils.save_bin(objects, fileout) import _pickle as pickle with open(fileout, "rb") as binf: obj = pickle.load(binf) assert objects == obj - os.remove(fileout) def test_load_bin(): @@ -882,22 +1058,50 @@ def test_load_bin(): assert found == objects -def test_write_list(): +def test_list_to_str(): """ Test that the given list is returned as expected string """ inlist = [1, 2, "toto", {1: 2, "1": 5}, 1e-6] - tostr = utils.write_list(inlist) + tostr = utils.list_to_str(inlist, sep=" ") exp1 = "1 2 toto {1: 2, '1': 5} 1e-06\n" exp2 = "1 2 toto {'1': 5, 1: 2} 1e-06\n" assert tostr == exp1 or tostr == exp2 +def test_list_to_str_default(): + """ + Test that the given list is returned as expected string + """ + inlist = [1, 2, "toto", {1: 2, "1": 5}, 1e-6] + tostr = utils.list_to_str(inlist) + exp1 = "1\t2\ttoto\t{1: 2, '1': 5}\t1e-06\n" + exp2 = "1\t2\ttoto\t{'1': 5, 1: 2}\t1e-06\n" + assert tostr == exp1 or tostr == exp2 + + +def test_write_list(): + """ + test that given a list, it writes all its elements, 1 by line, in given outfile + """ + outfile = os.path.join(GENEPATH, "toto.txt") + inlist = [1, 2, "toto", {1: 2, "1": 5}, 1e-6] + utils.write_list(inlist, outfile) + with open(outfile, "r") as of: + lines = of.readlines() + assert len(lines) == 5 + assert "1\n" in lines + assert "2\n" in lines + assert "toto\n" in lines + assert "{1: 2, '1': 5}\n" in lines + assert "1e-06\n" in lines + + def test_remove_exits(): """ Test that a given file is removed if it exists """ - infile = "toto.txt" + infile = os.path.join(GENEPATH, "toto.txt") open(infile, "w").close() assert os.path.isfile(infile) utils.remove(infile) @@ -908,7 +1112,7 @@ def test_remove_not_exist(): """ Test that removing a file which does not exist brings no error """ - infile = "toto.txt" + infile = os.path.join(GENEPATH, "toto.txt") assert not os.path.isfile(infile) utils.remove(infile) assert not os.path.isfile(infile) -- GitLab