diff --git a/README.md b/README.md new file mode 100755 index 0000000000000000000000000000000000000000..d4d5f2291e35efa38068902d6f89d28768ac155e --- /dev/null +++ b/README.md @@ -0,0 +1,17 @@ +DESCRIPTION + +\----------------------------------------------------------------------------------------------------------------------- + +These tests aim at checking that refactoring in the code and porting it to python 3 do not induce changes in what is crucial in the final result. + +The file virome\_assembly\_raw.fa is the concatenation of all other .fasta files. + +AUTHOR + +\---------------------------------------------------------------------------------------------------------------------- + +Véronique Legrand vlegrand@pasteur.fr + +Data for the tests were provided by Julian Garneau + +Explanation on what statistics should be carefully looked at was provided by Marc Monot diff --git a/non-regression-tests/run_HK97.sh b/non-regression-tests/run_HK97.sh index db8c1426c937c69c6c9104a41d4d6fb8966c2682..d9a33c0738cb0a666a406c54f1a3284d807da292 100755 --- a/non-regression-tests/run_HK97.sh +++ b/non-regression-tests/run_HK97.sh @@ -1,7 +1,8 @@ #/bin/bash DATA_PATH=./data REF_RES_PATH=./reference_results +SCRIPT_PATH=../_modules echo "running PhageTerm on HK97 genome" -python ../PhageTerm.py -f $DATA_PATH/R1_1M_READS_EACH_PHAGE.fastq.20 -p $DATA_PATH/R2_1M_READS_EACH_PHAGE.fastq.20 -r $DATA_PATH/HK97_assembly.fasta || exit 1 +python $SCRIPT_PATH/PhageTerm.py -f $DATA_PATH/R1_1M_READS_EACH_PHAGE.fastq.20 -p $DATA_PATH/R2_1M_READS_EACH_PHAGE.fastq.20 -r $DATA_PATH/HK97_assembly.fasta || exit 1 python check_HK97_res.py || exit 2 diff --git a/non-regression-tests/run_HK97_long.sh b/non-regression-tests/run_HK97_long.sh index 5f569ef04df8da785eb7691e938b7ff44ff722fe..88b482eb62960ea39710c2e09fdbeb09b9987378 100755 --- a/non-regression-tests/run_HK97_long.sh +++ b/non-regression-tests/run_HK97_long.sh @@ -1,6 +1,8 @@ #/bin/bash DATA_PATH=./data +SCRIPT_PATH=../_modules + echo "running PhageTerm on HK97 genome and a dataset with a minimum 50x coverage" -python ../PhageTerm.py -f "$DATA_PATH/R1_1M_READS_EACH_PHAGE(1).fastq.4" -p "$DATA_PATH/R2_1M_READS_EACH_PHAGE(1).fastq.4" -r $DATA_PATH/HK97_assembly.fasta --nrt || exit 1 +python $SCRIPT_PATH/PhageTerm.py -f "$DATA_PATH/R1_1M_READS_EACH_PHAGE(1).fastq.4" -p "$DATA_PATH/R2_1M_READS_EACH_PHAGE(1).fastq.4" -r $DATA_PATH/HK97_assembly.fasta --nrt || exit 1 python check_HK97_class.py || exit 2 diff --git a/non-regression-tests/run_HK97_long_multiproc.sh b/non-regression-tests/run_HK97_long_multiproc.sh old mode 100644 new mode 100755 index 8a59e5d2671a8adbf831a549b317d7932756677e..3998511e1d1f686cb02bc8c2f30ec72d29134530 --- a/non-regression-tests/run_HK97_long_multiproc.sh +++ b/non-regression-tests/run_HK97_long_multiproc.sh @@ -1,6 +1,7 @@ #/bin/bash DATA_PATH=./data +SCRIPT_PATH=../_modules echo "running PhageTerm on HK97 genome and a dataset with a minimum 50x coverage" -python ../PhageTerm.py -c 4 -f "$DATA_PATH/R1_1M_READS_EACH_PHAGE(1).fastq.4" -p "$DATA_PATH/R2_1M_READS_EACH_PHAGE(1).fastq.4" -r $DATA_PATH/HK97_assembly.fasta --nrt +python $SCRIPT_PATH/PhageTerm.py -c 4 -f "$DATA_PATH/R1_1M_READS_EACH_PHAGE(1).fastq.4" -p "$DATA_PATH/R2_1M_READS_EACH_PHAGE(1).fastq.4" -r $DATA_PATH/HK97_assembly.fasta --nrt python check_HK97_class.py diff --git a/non-regression-tests/run_Lamda_long.sh b/non-regression-tests/run_Lamda_long.sh index 07280d84b4f325ef1298be06cda7352d29460149..e73b552614506c7690ae7443f8bbeed2eca3b636 100755 --- a/non-regression-tests/run_Lamda_long.sh +++ b/non-regression-tests/run_Lamda_long.sh @@ -1,6 +1,7 @@ #/bin/bash DATA_PATH=./data +SCRIPT_PATH=../_modules echo "running PhageTerm on Lamda genome and a dataset with a minimum 50x coverage" -python ../PhageTerm.py -f "$DATA_PATH/R1_1M_READS_EACH_PHAGE(1).fastq.4" -p "$DATA_PATH/R2_1M_READS_EACH_PHAGE(1).fastq.4" -r $DATA_PATH/Lambda_assembly.fasta --nrt || exit 1 +python $SCRIPT_PATH/PhageTerm.py -f "$DATA_PATH/R1_1M_READS_EACH_PHAGE(1).fastq.4" -p "$DATA_PATH/R2_1M_READS_EACH_PHAGE(1).fastq.4" -r $DATA_PATH/Lambda_assembly.fasta --nrt || exit 1 python check_Lamda_class.py || exit 2 diff --git a/non-regression-tests/run_Lamda_long_multiproc.sh b/non-regression-tests/run_Lamda_long_multiproc.sh old mode 100644 new mode 100755 index ea685076f0c5e27d6f68dfe03d82e023686edad4..cf5d6f1ce994d7229f8cfc5cea80133ab22c57ce --- a/non-regression-tests/run_Lamda_long_multiproc.sh +++ b/non-regression-tests/run_Lamda_long_multiproc.sh @@ -1,6 +1,7 @@ #/bin/bash DATA_PATH=./data +SCRIPT_PATH=../_modules echo "running PhageTerm on Lamda genome and a dataset with a minimum 50x coverage" -python ../PhageTerm.py -c 4 -f "$DATA_PATH/R1_1M_READS_EACH_PHAGE(1).fastq.4" -p "$DATA_PATH/R2_1M_READS_EACH_PHAGE(1).fastq.4" -r $DATA_PATH/Lambda_assembly.fasta --nrt +python $SCRIPT_PATH/PhageTerm.py -c 4 -f "$DATA_PATH/R1_1M_READS_EACH_PHAGE(1).fastq.4" -p "$DATA_PATH/R2_1M_READS_EACH_PHAGE(1).fastq.4" -r $DATA_PATH/Lambda_assembly.fasta --nrt python check_Lamda_class.py diff --git a/non-regression-tests/run_N1.sh b/non-regression-tests/run_N1.sh index 1f29f4498e3c51802455c5a2f02483cab830e4f3..949eb94affd4b0b942ac9a0de5e82b567f177114 100755 --- a/non-regression-tests/run_N1.sh +++ b/non-regression-tests/run_N1.sh @@ -1,7 +1,8 @@ #/bin/bash DATA_PATH=./data REF_RES_PATH=./reference_results +SCRIPT_PATH=../_modules echo "running PhageTerm on StaphN1 genome" -python ../PhageTerm.py -f $DATA_PATH/R1_1M_READS_EACH_PHAGE.fastq.20 -p $DATA_PATH/R2_1M_READS_EACH_PHAGE.fastq.20 -r $DATA_PATH/Staph1N_assembly.fasta ||exit 1 +python $SCRIPT_PATH/PhageTerm.py -f $DATA_PATH/R1_1M_READS_EACH_PHAGE.fastq.20 -p $DATA_PATH/R2_1M_READS_EACH_PHAGE.fastq.20 -r $DATA_PATH/Staph1N_assembly.fasta ||exit 1 python check_N1_res.py ||exit 1 diff --git a/non-regression-tests/run_P1.sh b/non-regression-tests/run_P1.sh index 6cf95711927ae7d43d2d0665d55a138e25741758..e6edcfb13e65261fa840a861bddd2d52ea027f47 100755 --- a/non-regression-tests/run_P1.sh +++ b/non-regression-tests/run_P1.sh @@ -1,7 +1,8 @@ #/bin/bash DATA_PATH=./data REF_RES_PATH=./reference_results +SCRIPT_PATH=../_modules echo "running PhageTerm on P1 genome" -python ../PhageTerm.py -f $DATA_PATH/R1_1M_READS_EACH_PHAGE.fastq.20 -p $DATA_PATH/R2_1M_READS_EACH_PHAGE.fastq.20 -r $DATA_PATH/P1_assembly.fasta || exit 1 +python $SCRIPT_PATH/PhageTerm.py -f $DATA_PATH/R1_1M_READS_EACH_PHAGE.fastq.20 -p $DATA_PATH/R2_1M_READS_EACH_PHAGE.fastq.20 -r $DATA_PATH/P1_assembly.fasta || exit 1 python check_P1_res.py || exit 2 diff --git a/non-regression-tests/run_P1_long.sh b/non-regression-tests/run_P1_long.sh index b5a934b379085d8ed82254e190480505d058edbb..a11885042d81d26e5ac5264784dc44df86cc1a86 100755 --- a/non-regression-tests/run_P1_long.sh +++ b/non-regression-tests/run_P1_long.sh @@ -1,6 +1,7 @@ #/bin/bash DATA_PATH=./data +SCRIPT_PATH=../_modules echo "running PhageTerm on P1 genome and a dataset with a minimum 50x coverage" -python ../PhageTerm.py -f "$DATA_PATH/R1_1M_READS_EACH_PHAGE(1).fastq.4" -p "$DATA_PATH/R2_1M_READS_EACH_PHAGE(1).fastq.4" -r $DATA_PATH/P1_assembly.fasta --nrt || exit 1 +python $SCRIPT_PATH/PhageTerm.py -f "$DATA_PATH/R1_1M_READS_EACH_PHAGE(1).fastq.4" -p "$DATA_PATH/R2_1M_READS_EACH_PHAGE(1).fastq.4" -r $DATA_PATH/P1_assembly.fasta --nrt || exit 1 python check_P1_class.py || exit 2 diff --git a/non-regression-tests/run_P1_long_multiproc.sh b/non-regression-tests/run_P1_long_multiproc.sh old mode 100644 new mode 100755 index f86e6877861d295b2e4b75fb4ca25209b70c85b7..a1faf53bbdeb2d9eb73ea3f2685109512cbd18e5 --- a/non-regression-tests/run_P1_long_multiproc.sh +++ b/non-regression-tests/run_P1_long_multiproc.sh @@ -1,6 +1,7 @@ #/bin/bash DATA_PATH=./data +SCRIPT_PATH=../_modules echo "running PhageTerm on P1 genome and a dataset with a minimum 50x coverage" -python ../PhageTerm.py -c 4 -f "$DATA_PATH/R1_1M_READS_EACH_PHAGE(1).fastq.4" -p "$DATA_PATH/R2_1M_READS_EACH_PHAGE(1).fastq.4" -r $DATA_PATH/P1_assembly.fasta --nrt +python $SCRIPT_PATH/PhageTerm.py -c 4 -f "$DATA_PATH/R1_1M_READS_EACH_PHAGE(1).fastq.4" -p "$DATA_PATH/R2_1M_READS_EACH_PHAGE(1).fastq.4" -r $DATA_PATH/P1_assembly.fasta --nrt python check_P1_class.py diff --git a/non-regression-tests/run_StaphN1_long.sh b/non-regression-tests/run_StaphN1_long.sh index 16125f77fb8654f49fde53d43df9fde57bf2ca97..44823634f5fd5bb40d44e8d587d21ba6bef65365 100755 --- a/non-regression-tests/run_StaphN1_long.sh +++ b/non-regression-tests/run_StaphN1_long.sh @@ -1,6 +1,7 @@ #/bin/bash DATA_PATH=./data +SCRIPT_PATH=../_modules echo "running PhageTerm on Staph1N genome and a dataset with a minimum 50x coverage" -python ../PhageTerm.py --nrt -f "$DATA_PATH/R1_1M_READS_EACH_PHAGE(1).fastq.4" -p "$DATA_PATH/R2_1M_READS_EACH_PHAGE(1).fastq.4" -r $DATA_PATH/Staph1N_assembly.fasta || exit 1 +python $SCRIPT_PATH/PhageTerm.py --nrt -f "$DATA_PATH/R1_1M_READS_EACH_PHAGE(1).fastq.4" -p "$DATA_PATH/R2_1M_READS_EACH_PHAGE(1).fastq.4" -r $DATA_PATH/Staph1N_assembly.fasta || exit 1 python check_Staph1N_class.py || exit 2 diff --git a/non-regression-tests/run_StaphN1_long_multiproc.sh b/non-regression-tests/run_StaphN1_long_multiproc.sh old mode 100644 new mode 100755 index 0a78f8e773a911ffbb6f5f74341216aa28a768ee..792ca822784caa9f7510645d0522f94dd02076ce --- a/non-regression-tests/run_StaphN1_long_multiproc.sh +++ b/non-regression-tests/run_StaphN1_long_multiproc.sh @@ -1,6 +1,7 @@ #/bin/bash DATA_PATH=./data +SCRIPT_PATH=../_modules echo "running PhageTerm on Staph1N genome and a dataset with a minimum 50x coverage" -python ../PhageTerm.py -c 4 --nrt -f "$DATA_PATH/R1_1M_READS_EACH_PHAGE(1).fastq.4" -p "$DATA_PATH/R2_1M_READS_EACH_PHAGE(1).fastq.4" -r $DATA_PATH/Staph1N_assembly.fasta +python $SCRIPT_PATH/PhageTerm.py -c 4 --nrt -f "$DATA_PATH/R1_1M_READS_EACH_PHAGE(1).fastq.4" -p "$DATA_PATH/R2_1M_READS_EACH_PHAGE(1).fastq.4" -r $DATA_PATH/Staph1N_assembly.fasta python check_Staph1N_class.py diff --git a/non-regression-tests/run_T4.sh b/non-regression-tests/run_T4.sh index 279fa7f76fe7ee4949e274fe191c3bd3e1b9e092..07aafdeb049121cdeedc8a178033e32db4b5af62 100755 --- a/non-regression-tests/run_T4.sh +++ b/non-regression-tests/run_T4.sh @@ -1,7 +1,8 @@ #/bin/bash DATA_PATH=./data REF_RES_PATH=./reference_results +SCRIPT_PATH=../_modules echo "running PhageTerm on T4 genome" -python ../PhageTerm.py -f $DATA_PATH/R1_1M_READS_EACH_PHAGE.fastq.20 -p $DATA_PATH/R2_1M_READS_EACH_PHAGE.fastq.20 -r $DATA_PATH/T4_assembly.fasta || exit 1 +python $SCRIPT_PATH/PhageTerm.py -f $DATA_PATH/R1_1M_READS_EACH_PHAGE.fastq.20 -p $DATA_PATH/R2_1M_READS_EACH_PHAGE.fastq.20 -r $DATA_PATH/T4_assembly.fasta || exit 1 #python check_T4_res.py diff --git a/non-regression-tests/run_T4_long.sh b/non-regression-tests/run_T4_long.sh index 13637d906996ee9bea0b2d9ee2bd0046c55bb313..f135e35231b20cc64e301356046f609cd34ffcd5 100755 --- a/non-regression-tests/run_T4_long.sh +++ b/non-regression-tests/run_T4_long.sh @@ -1,6 +1,7 @@ #/bin/bash DATA_PATH=./data +SCRIPT_PATH=../_modules echo "running PhageTerm on T4 genome and a dataset with a minimum 50x coverage" -python ../PhageTerm.py -f "$DATA_PATH/R1_1M_READS_EACH_PHAGE(1).fastq.4" -p "$DATA_PATH/R2_1M_READS_EACH_PHAGE(1).fastq.4" -r $DATA_PATH/T4_assembly.fasta --nrt || exit 1 +python $SCRIPT_PATH/PhageTerm.py -f "$DATA_PATH/R1_1M_READS_EACH_PHAGE(1).fastq.4" -p "$DATA_PATH/R2_1M_READS_EACH_PHAGE(1).fastq.4" -r $DATA_PATH/T4_assembly.fasta --nrt || exit 1 python check_T4_class.py || exit 2 diff --git a/non-regression-tests/run_T4_long_multiproc.sh b/non-regression-tests/run_T4_long_multiproc.sh old mode 100644 new mode 100755 index 2b304c61d8fc987294e494976e49a58c410b2fdb..c89c37d930a7d0b65d8281d4f9f1643a8c2e46b6 --- a/non-regression-tests/run_T4_long_multiproc.sh +++ b/non-regression-tests/run_T4_long_multiproc.sh @@ -1,6 +1,7 @@ #/bin/bash DATA_PATH=./data +SCRIPT_PATH=../_modules echo "running PhageTerm on T4 genome and a dataset with a minimum 50x coverage" -python ../PhageTerm.py -c 4 -f "$DATA_PATH/R1_1M_READS_EACH_PHAGE(1).fastq.4" -p "$DATA_PATH/R2_1M_READS_EACH_PHAGE(1).fastq.4" -r $DATA_PATH/T4_assembly.fasta --nrt +python $SCRIPT_PATH/PhageTerm.py -c 4 -f "$DATA_PATH/R1_1M_READS_EACH_PHAGE(1).fastq.4" -p "$DATA_PATH/R2_1M_READS_EACH_PHAGE(1).fastq.4" -r $DATA_PATH/T4_assembly.fasta --nrt python check_T4_class.py diff --git a/non-regression-tests/run_T7.sh b/non-regression-tests/run_T7.sh index 04e5d2d17582ba329dac00591fa6d1912a1fcf30..a9ea89499786abe05c1c582f2221978e376c884c 100755 --- a/non-regression-tests/run_T7.sh +++ b/non-regression-tests/run_T7.sh @@ -1,7 +1,8 @@ #/bin/bash DATA_PATH=./data REF_RES_PATH=./reference_results +SCRIPT_PATH=../_modules echo "running PhageTerm on T7 genome" -python ../PhageTerm.py -f $DATA_PATH/R1_1M_READS_EACH_PHAGE.fastq.20 -p $DATA_PATH/R2_1M_READS_EACH_PHAGE.fastq.20 -r $DATA_PATH/T7_assembly.fasta || exit 1 +python $SCRIPT_PATH/PhageTerm.py -f $DATA_PATH/R1_1M_READS_EACH_PHAGE.fastq.20 -p $DATA_PATH/R2_1M_READS_EACH_PHAGE.fastq.20 -r $DATA_PATH/T7_assembly.fasta || exit 1 python check_T7_res.py || exit 2 diff --git a/non-regression-tests/run_T7_long.sh b/non-regression-tests/run_T7_long.sh index 701e3ad4bc3517b9eebca0451ba38c219d27551b..3aeeb83d61695ebc1d8da42bfd14beb1dcb01d85 100755 --- a/non-regression-tests/run_T7_long.sh +++ b/non-regression-tests/run_T7_long.sh @@ -1,6 +1,7 @@ #/bin/bash DATA_PATH=./data +SCRIPT_PATH=../_modules echo "running PhageTerm on T7 genome and a dataset with a minimum 50x coverage" -python ../PhageTerm.py -f "$DATA_PATH/R1_1M_READS_EACH_PHAGE(1).fastq.4" -p "$DATA_PATH/R2_1M_READS_EACH_PHAGE(1).fastq.4" -r $DATA_PATH/T7_assembly.fasta --nrt || exit 1 +python $SCRIPT_PATH/PhageTerm.py -f "$DATA_PATH/R1_1M_READS_EACH_PHAGE(1).fastq.4" -p "$DATA_PATH/R2_1M_READS_EACH_PHAGE(1).fastq.4" -r $DATA_PATH/T7_assembly.fasta --nrt || exit 1 python check_T7_class.py || exit 2 diff --git a/non-regression-tests/run_T7_long_multiproc.sh b/non-regression-tests/run_T7_long_multiproc.sh old mode 100644 new mode 100755 index b3a8e14f89ff3f93eb8e707a454ef13f7a18f542..5e06af353b504c887518f686ac15fce45174f95a --- a/non-regression-tests/run_T7_long_multiproc.sh +++ b/non-regression-tests/run_T7_long_multiproc.sh @@ -1,6 +1,7 @@ #/bin/bash DATA_PATH=./data +SCRIPT_PATH=../_modules echo "running PhageTerm on T7 genome and a dataset with a minimum 50x coverage" -python ../PhageTerm.py -c 4 -f "$DATA_PATH/R1_1M_READS_EACH_PHAGE(1).fastq.4" -p "$DATA_PATH/R2_1M_READS_EACH_PHAGE(1).fastq.4" -r $DATA_PATH/T7_assembly.fasta --nrt +python $SCRIPT_PATH/PhageTerm.py -c 4 -f "$DATA_PATH/R1_1M_READS_EACH_PHAGE(1).fastq.4" -p "$DATA_PATH/R2_1M_READS_EACH_PHAGE(1).fastq.4" -r $DATA_PATH/T7_assembly.fasta --nrt python check_T7_class.py diff --git a/non-regression-tests/run_lamda.sh b/non-regression-tests/run_lamda.sh index b2811bd9e8b225c378beca9d85898a564fd42138..7a89197784f1d28b53b88a733421566bcd88f09f 100755 --- a/non-regression-tests/run_lamda.sh +++ b/non-regression-tests/run_lamda.sh @@ -1,8 +1,9 @@ #/bin/bash DATA_PATH=./data REF_RES_PATH=./reference_results +SCRIPT_PATH=../_modules echo "running PhageTerm on lamda genome" -python ../PhageTerm.py -f $DATA_PATH/R1_1M_READS_EACH_PHAGE.fastq.20 -p $DATA_PATH/R2_1M_READS_EACH_PHAGE.fastq.20 -r $DATA_PATH/Lambda_assembly.fasta || exit 1 +python $SCRIPT_PATH/PhageTerm.py -f $DATA_PATH/R1_1M_READS_EACH_PHAGE.fastq.20 -p $DATA_PATH/R2_1M_READS_EACH_PHAGE.fastq.20 -r $DATA_PATH/Lambda_assembly.fasta || exit 1 python check_lamda_res.py || exit 2 diff --git a/non-regression-tests/run_nrt_long_multiproc.sh b/non-regression-tests/run_nrt_long_multiproc.sh old mode 100644 new mode 100755 index b1138a27e91b49754f1624fa2e2531c0c886203f..3ac2a6685c43de0c91643ec16c7a31c0a0b8733e --- a/non-regression-tests/run_nrt_long_multiproc.sh +++ b/non-regression-tests/run_nrt_long_multiproc.sh @@ -1,6 +1,6 @@ #/bin/bash ## VL: decided to write the checking of results in python scripts rather than shel script for portability. -echo "running non regression tests (short version)" +echo "running non regression tests (parallel version; requires 4 cores)" ./run_HK97_long_multiproc.sh || exit 1 ./run_Lamda_long_multiproc.sh || exit 1 ./run_StaphN1_long_multiproc.sh || exit 1 diff --git a/phageterm/IData_handling.py b/phageterm/IData_handling.py new file mode 100755 index 0000000000000000000000000000000000000000..a0559a34a787af250b8d1423a2164b722f13bb5a --- /dev/null +++ b/phageterm/IData_handling.py @@ -0,0 +1,340 @@ +## @file IData_handling.py +# +# VL: Gather here the classes and functions useful for handling input data. +from __future__ import print_function + +import gzip +from utilities import reverseComplement,changeCase +from time import gmtime, strftime +import datetime + +try: + import cPickle as pickle +except ImportError: # python 3.x + import pickle + + +## This class encapsulates the reference sequences, the host sequence if any and all useful information about the sequences. +# +# It is used both for searching the read extracts in the sequences and for exploiting the results +class refData: + def __init__(self,refseq_list,seed,hostseq): + self.refseq_list=refseq_list + self.seed=seed + self.hostseq=hostseq + if hostseq!="": + self.refseq_list.insert(0,hostseq) + self.nb_sequences=len(refseq_list) + + def getIdxSeq(self,refseq): + idx=-1 + found=False + for s in self.refseq_list: + idx += 1 + if s==refseq: + found=True + break + if not found: + raise RuntimeError("Couldn't find sequence in list of ref sequences.") + return idx + + + def IdxIsHostseq(self,idx_seq): + if (((self.hostseq == "") and (idx_seq <= self.nb_sequences - 1)) or ( + (self.hostseq != "") and (idx_seq >0))): + return False + return True + + def getSeqSizesList(self): + seq_sizes_list = [] + for seq in self.refseq_list: + seq_sizes_list.append(len(seq)) + return seq_sizes_list + + +## Base class for handling read extracts. +# +# This class should not be used directly. +class ReadExtracts: + def __init__(self,seed): + self.seed = seed + self.r_extracts_list = [] + self.nb_reads = 0 + self.nb_extracts=0 + + ## Returns the list of read extracts from the loaded dataset, the number of reads and the total number of extracts + def getRExtracts(self): + return self.r_extracts_list,self.nb_reads,self.nb_extracts + +## Class containing all the read extracts (PE reads) that must be mapped against a sequence. +class readExtractsPE(ReadExtracts): + def __init__(self,seed): + self.__init__(seed) + + + def addRead(self, whole_PE1,whole_PE2): + self.r_extracts_list.append(whole_PE1[:self.seed]) + self.r_extracts_list.append(whole_PE1[-self.seed:]) + self.r_extracts_list.append(whole_PE2[:self.seed]) + self.r_extracts_list.append(reverseComplement(whole_PE2)[:self.seed]) + self.r_extracts_list.append(reverseComplement(whole_PE2)[-self.seed:]) + self.nb_reads += 1 + self.nb_extracts += 5 # Number of extracts per read: 2 extracts for PE1 and 3 for PE2. + + + +## Class containing all the read extracts (single reads) that must be mapped against a sequence. +class readsExtractsS(ReadExtracts): + def __init__(self,seed): + ReadExtracts.__init__(self,seed) + + ## Adds a read to the list of extracts + # + # @param whole_read The read as extracted from the fastq file + # @param no_pair This paramenter s only used to make the distinction between Single and paired. + # Note VL: I didn't use meta programming here because I thought that it would have a negative impact on performance. + # TODO: test it when all the rest works. + def addRead(self,whole_read,no_pair=""): + read_part = whole_read[:self.seed] + self.r_extracts_list.append(read_part) + self.r_extracts_list.append(whole_read[-self.seed:]) + self.r_extracts_list.append(reverseComplement(whole_read)[:self.seed]) + self.r_extracts_list.append(reverseComplement(whole_read)[-self.seed:]) + self.nb_reads+=1 + self.nb_extracts += 4 + +## use objects of this class to store read offset (PE1 and PE2) in files. +class ReadInfo: + def __init__(self, off_PE1,whole_read,seed,off_PE2=None): + self.offset1=off_PE1 + self.offset2=off_PE2 + self.corlen = len(whole_read) - seed + +## Gets the number of reads in the fastq file +# def getNbReads(fastq): +# with open(fastq) as f: +# for i, l in enumerate(f): +# pass +# nb_r=i+1 +# nb_r=nb_r/4 +# return nb_r + + + +## loads a chunk of reads for mapping on GPU. +# Yields a ReadExtracts object plus a dictionnary of ReadInfo. +# keeps in memory the parsing state of the file. +# @param ch_size is in number of reads +# @reset_ids indicates whether or not we want read index to be reset to 0 at the beginning of each chunk. +def getChunk(fastq,seed,paired,ch_siz,reset_ids=True): + new_chunk = False + d_rinfo=dict() + idx_read=0 + off2=None + filin = open(fastq) + line = filin.readline() + read_paired="" + if paired != "": + RE=readExtractsPE(seed) + filin_paired = open(paired) + line_paired = filin_paired.readline() + else: + RE=readsExtractsS(seed) + + start = False + num_line=0 + while line: + # Read sequence + read = line.split("\n")[0].split("\r")[0] + if paired != "": + read_paired = line_paired.split("\n")[0].split("\r")[0] + if (read[0] == '@' and num_line%4 == 0): # make sure we don't take into account a quality score instead of a read. + start = True + off1=filin.tell() + line = filin.readline() + if paired != "": + off2=filin_paired.tell() + line_paired = filin_paired.readline() + continue + if (start == True): + start = False + readlen = len(read) + if readlen < seed: + line = filin.readline() + if paired !="": + line_paired = filin_paired.readline() # also skip PE2 in that case + continue + RE.addRead(read,read_paired) + d_rinfo[idx_read]=ReadInfo(off1,read,seed,off2) + if (idx_read>0 and ((idx_read+1)%(ch_siz)==0)): + yield RE,d_rinfo + if (reset_ids): + idx_read=0 + new_chunk=True + if paired != "": + RE = readExtractsPE(seed) + else: + RE = readsExtractsS(seed) + d_rinfo = dict() + if not new_chunk: + idx_read+=1 + else: + new_chunk=False + + line = filin.readline() + if paired!="": + line_paired = filin_paired.readline() + filin.close() + if paired !="": + filin_paired.close() + yield RE, d_rinfo + +## dumps a dictionnary of ReadInfo objects indexed on read index. +# +# @param d_rinfo dictionnary to dump +# @param fic filename (incl. full path) where to dump +def dump_d_rinfo(d_rinfo,fic): + with open(fic, 'wb') as fp: + pickle.dump(d_rinfo, fp, protocol=pickle.HIGHEST_PROTOCOL) + +## Loads a dictionnary of ReadInfo objects. +def load_d_rinfo(fic): + with open(fic, 'rb') as fp: + d_rinfo = pickle.load(fp) + return d_rinfo + + +## loads all extracts of reads into a list for processing on GPU. +# +# returns 1 or 2 readExtracts objects plus a dictionnary of ReadInfo. +def getAllReads(fastq,seed,paired): + d_rinfo=dict() + idx_read=0 + off2=None + filin = open(fastq) + line = filin.readline() + read_paired="" + + if paired != "": + RE=readExtractsPE(seed) + filin_paired = open(paired) + line_paired = filin_paired.readline() + else: + RE=readsExtractsS(seed) + + start = False + num_line=0 + while line: + # Read sequence + read = line.split("\n")[0].split("\r")[0] + if paired != "": + read_paired = line_paired.split("\n")[0].split("\r")[0] + if (read[0] == '@' and num_line%4 == 0): # make sure we don't take into account a quality score instead of a read. + start = True + off1=filin.tell() + line = filin.readline() + if paired != "": + off2=filin_paired.tell() + line_paired = filin_paired.readline() + continue + if (start == True): + start = False + readlen = len(read) + if readlen < seed: + line = filin.readline() + if paired !="": + line_paired = filin_paired.readline() # also skip PE2 in that case + continue + RE.addRead(read,read_paired) + d_rinfo[idx_read]=ReadInfo(off1,read,seed,off2) + idx_read+=1 + + line = filin.readline() + if paired!="": + line_paired = filin_paired.readline() + filin.close() + if paired !="": + filin_paired.close() + return RE,d_rinfo + +## use this class to retrieve reads from fastq file. +class ReadGetter: + ## constructor + # + # @param fastq Name of the fastq file that contains the read + # @param d_rinfo A dictionnary of ReadInfo objects that contains the offset indicating where the read starts in the file. + # @param paired The name of the file containing the PE2 (defaults to ""). + def __init__(self,fastq,d_rinfo,paired=""): + self.filin=open(fastq) + self.d_rinfo=d_rinfo + self.paired=paired + if paired!="": + self.filinp=open(fastq) + + def getOneRead(self,idx_read): + read_paired="" + self.filin.seek(self.d_rinfo[idx_read].offset1) + read=self.filin.readline() + if self.paired!="": + self.filinp.seek(self.d_rinfo[idx_read].offset2) + read_paired = self.filinp.readline() + return read,read_paired + + +### READS Functions +def totReads(filin): + """Verify and retrieve the number of reads in the fastq file before alignment""" + if filin.endswith('.gz'): + filein = gzip.open(filin, 'rb') + else: + filein = open(filin, 'r') + + line = 0 + while filein.readline(): + line += 1 + seq = float(round(line / 4)) + filein.close() + return seq + +### SEQUENCE parsing function +def genomeFastaRecovery(filin, limit_reference, edge, host_test = 0): + """Get genome sequence from fasta file""" + print("recovering genome from: ",filin) + print(strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime())) + if filin == "": + return "", "", "" + + #infile = open(filin, 'r') + infile = gzip.open(filin, "rt") if filin.endswith(".gz") else open(filin, 'r') + name = [] + genome = [] + genome_line = "" + genome_rejected = 0 + for line in infile: + if line[0] == ">": + if name != []: + if len(genome_line) >= limit_reference: + genome.append(genome_line[-edge:] + genome_line + genome_line[:edge]) + else: + genome_rejected += 1 + name = name[:-1] + genome_line = "" + name.append(line[1:].split('\r')[0].split('\n')[0]) + else: + genome_line += changeCase(line).replace(' ', '').split('\r')[0].split('\n')[0] + + if len(genome_line) >= limit_reference: + genome.append(genome_line[-edge:] + genome_line + genome_line[:edge]) + genome_line = "" + else: + genome_rejected += 1 + name = name[:-1] + + infile.close() + + if host_test: + return "".join(genome) + else: + return genome, name, genome_rejected + close(filin) + diff --git a/phageterm/PhageTerm.py b/phageterm/PhageTerm.py new file mode 100755 index 0000000000000000000000000000000000000000..7aaa7648bf126320534acf96753a37ed63bc25ef --- /dev/null +++ b/phageterm/PhageTerm.py @@ -0,0 +1,336 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- +##@file phageterm.py +# +# main program +## PhageTerm software +# +# Phageterm is a tool to determine phage termini and packaging strategy +# and other useful informations using raw sequencing reads. +# (This programs works with sequencing reads from a randomly +# sheared DNA library preparations as Illumina TruSeq paired-end or similar) +# +# ---------------------------------------------------------------------- +# Copyright (C) 2017 Julian Garneau +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# <http://www.gnu.org/licenses/gpl-3.0.html> +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# ---------------------------------------------------------------------- +# +# @author Julian Garneau <julian.garneau@usherbrooke.ca> +# @author Marc Monot <marc.monot@pasteur.fr> +# @author David Bikard <david.bikard@pasteur.fr> + + +### PYTHON Module +# Base +#import sys + + +from __future__ import print_function + +# Multiprocessing +import multiprocessing +import os +from multiprocessing import Manager + + +# Project + +from utilities import checkReportTitle +from functions_PhageTerm import * +from common_readsCoverage_processing import processCovValuesForSeq +from main_utils import setOptions,checkOptArgsConsistency + + +### MAIN +def main(): + + getopt=setOptions() + inRawDArgs, fParms, tParms, inDArgs=checkOptArgsConsistency(getopt) + + # For each fasta in file + DR = {"Headful (pac)":{}, "COS (5')":{}, "COS (3')":{}, "COS":{}, "DTR (short)":{}, "DTR (long)":{}, "Mu-like":{}, "UNKNOWN":{}, "NEW":{}} + results_pos = 0 + no_match = [] + draw = 0 # used when one wants to draw some graphs. + chk_handler = RCCheckpoint_handler(tParms.chk_freq, tParms.dir_chk, tParms.test_mode) + ## VL: keep this code just in case we want to try GPU implementation again later. + # if tParms.gpu!=0: + # ref_data = refData(inDArgs.refseq_liste, fParms.seed, inDArgs.hostseq) + # nb_extracts=inRawDArgs.tot_reads + # if (inRawDArgs.paired!=""): + # nb_extracts_per_read=7 + # else: + # nb_extracts_per_read=4 + # nb_extracts *= nb_extracts_per_read + # + # gpu_mapping_res_dir = tParms.gpu_mapping_res_dir + # wanted_gpu_nb_chunks = tParms.wanted_chunks + # mapper = GPU_chunkMapper() + # mapper.setRefData(ref_data) + # mapper.setFicDir(gpu_mapping_res_dir) + # nb_kmer_in_chunk = nb_extracts//wanted_gpu_nb_chunks + # doMapping(nb_kmer_in_chunk, mapper, inRawDArgs.fastq, "", ref_data, nb_extracts_per_read) + # if tParms.gpu_mapping_res_dir!=0: + # exit() # Consider that if we put results in files, it is because we are processing large datasets on a cluster. Otherwise, go on working. + # + # if tParms.dir_cov_res!=None and tParms.gpu_mapping_res_dir!=None: # Process the mapping results produced by the GPU and put results in files + # if tParms.idx_chunk==None or tParms.idx_seq==None: + # print "Indicate index of chunk and sequence to process" + # exit(1) + # seq_info = seqInfo(inDArgs.refseq_liste[tParms.idx_seq],tParms.idx_seq, inDArgs.hostseq) + # fname=os.path.join(tParms.gpu_mapping_res_dir,base_fname_rinfo+str(tParms.idx_chunk)) + # d_rinfo=load_d_rinfo(fname) + # readsCoverageGPU_chunk(inRawDArgs.fastq, seq_info, tParms.idx_chunk, d_rinfo, fParms.edge, tParms.limit_coverage, fParms.virome, tParms.gpu_mapping_res_dir, + # tParms.dir_cov_res, logger=None) + # exit() # Consider that if we put results in files, it is because we are processing large datasets on a cluster. + + if tParms.multi_machine: + print("Running on cluster") + print(tParms.dir_cov_mm, tParms.seq_id, tParms.dir_seq_mm, tParms.DR_path) + if tParms.dir_cov_mm!=None and tParms.gpu_mapping_res_dir==None and tParms.dir_seq_mm==None: # perform mapping and readCoverage calculation and write results in file. + # In that case we are processing data in an embarrassingly parallel way on a cluster. + position = [] + read_indices = list(range(int(inRawDArgs.tot_reads))) + part = chunks(read_indices, tParms.core) + for i in range(tParms.core): + position.append(next(part)[0]) + + position = position + [int(inRawDArgs.tot_reads)] + idx_refseq=chk_handler.getIdxSeq(tParms.core_id) + print("starting processing at sequence: ",idx_refseq) + for refseq in inDArgs.refseq_liste[idx_refseq:]: + readsCoverage(inRawDArgs, refseq, inDArgs, fParms,None,tParms.core_id, position[tParms.core_id], position[tParms.core_id + 1], + tParms,chk_handler,idx_refseq) + print("Processed: ", idx_refseq, " sequences") + idx_refseq+=1 + if tParms.core_id==0: + fname=os.path.join(tParms.dir_cov_mm,"nb_seq_processed.txt") + f=open(fname,"w") + f.write(str(idx_refseq)) + f.close() + exit() # Consider that if we put results in files, it is because we are processing large datasets on a cluster. + if tParms.dir_cov_mm!=None and tParms.seq_id!=None and tParms.dir_seq_mm!=None and tParms.DR_path!=None: + from _modules.seq_processing import sum_readsCoverage_for_seq + # in that case, we are processing all the results of readCoverage sequence by sequence in an embarrassingly parallel way on a cluster. + sum_readsCoverage_for_seq(tParms.dir_cov_mm, tParms.seq_id, tParms.nb_pieces, inDArgs, fParms, inRawDArgs, tParms.dir_seq_mm,tParms.DR_path) + exit() + if tParms.dir_seq_mm!=None and tParms.dir_cov_mm==None and tParms.seq_id==None and tParms.DR_path!=None: # report generation + from _modules.generate_report import loadDR,genReport + loadDR(tParms.DR_path, DR) + genReport(fParms, inDArgs, inRawDArgs, no_match, DR) + exit() + else: # mono machine original multi processing mode. + ### COVERAGE + print("\nCalculating coverage values, please wait (may take a while)...\n") + start_run = time.time() + + if not fParms.test_run and tParms.core == 1: + print("If your computer has more than 1 processor, you can use the -c or --core option to speed up the process.\n\n") + + + for refseq in inDArgs.refseq_liste: + jobs = [] + manager = Manager() + return_dict = manager.dict() + position = [] + + read_indices = list(range(int(inRawDArgs.tot_reads))) + part = chunks(read_indices, tParms.core) + for i in range(tParms.core): + position.append(next(part)[0]) + + position = position + [int(inRawDArgs.tot_reads)] + + for i in range(0, tParms.core): + tParms.core_id=i + process = multiprocessing.Process(target=readsCoverage, args=(inRawDArgs, refseq, inDArgs, fParms,return_dict, i,position[i], position[i+1], + tParms, chk_handler,results_pos)) + jobs.append(process) + + for j in jobs: + j.start() + + for j in jobs: + j.join() + + # merging results + for core_id in range(tParms.core): + if core_id == 0: + termini_coverage = return_dict[core_id][0] + whole_coverage = return_dict[core_id][1] + paired_whole_coverage = return_dict[core_id][2] + phage_hybrid_coverage = return_dict[core_id][3] + host_hybrid_coverage = return_dict[core_id][4] + host_whole_coverage = return_dict[core_id][5] + list_hybrid = return_dict[core_id][6] + insert = return_dict[core_id][7].tolist() + paired_missmatch = return_dict[core_id][8] + reads_tested = return_dict[core_id][9] + else: + termini_coverage += return_dict[core_id][0] + whole_coverage += return_dict[core_id][1] + paired_whole_coverage += return_dict[core_id][2] + phage_hybrid_coverage += return_dict[core_id][3] + host_hybrid_coverage += return_dict[core_id][4] + host_whole_coverage += return_dict[core_id][5] + list_hybrid += return_dict[core_id][6] + insert += return_dict[core_id][7].tolist() + paired_missmatch += return_dict[core_id][8] + reads_tested += return_dict[core_id][9] + + termini_coverage = termini_coverage.tolist() + whole_coverage = whole_coverage.tolist() + paired_whole_coverage = paired_whole_coverage.tolist() + phage_hybrid_coverage = phage_hybrid_coverage.tolist() + host_hybrid_coverage = host_hybrid_coverage.tolist() + host_whole_coverage = host_whole_coverage.tolist() + list_hybrid = list_hybrid.tolist() + + + # Estimate fParms.virome run time + if fParms.virome: + end_run = time.time() + virome_run = int((end_run - start_run) * inDArgs.nbr_virome) + print("\n\nThe fasta file tested contains: " + str(inDArgs.nbr_virome) + " contigs (mean length: " + str( + inDArgs.mean_virome) + ")") + print("\nA complete run takes approximatively (" + str(tParms.core) + " core used) : " + EstimateTime( + virome_run) + "\n") + exit() + + # Contigs without any match + if sum(termini_coverage[0]) + sum(termini_coverage[1]) == 0: + no_match.append((checkReportTitle(inDArgs.refseq_name[results_pos]))) + continue + + s_stats=processCovValuesForSeq(refseq,inDArgs.hostseq,inDArgs.refseq_name,inDArgs.refseq_liste,fParms.seed,inRawDArgs.analysis_name,inRawDArgs.tot_reads,\ + results_pos,fParms.test_run, inRawDArgs.paired,fParms.edge,inRawDArgs.host,fParms.test, fParms.surrounding,\ + fParms.limit_preferred,fParms.limit_fixed,fParms.Mu_threshold,termini_coverage,whole_coverage,\ + paired_whole_coverage,phage_hybrid_coverage,host_hybrid_coverage, host_whole_coverage,insert,list_hybrid,reads_tested,DR) + + + results_pos += 1 + + + + ### EXPORT Data + if len(inDArgs.refseq_liste) == 1: + # Test No Match + if len(no_match) == 1: + print("\n\nERROR: No reads match, please check your reference file.") + exit() + + # Text report only + if fParms.workflow: + WorkflowReport(inRawDArgs.analysis_name, s_stats.P_class, s_stats.P_left, s_stats.P_right, s_stats.P_type, s_stats.P_orient, s_stats.ave_whole_cov) + else: + # Statistics + ExportStatistics(inRawDArgs.analysis_name, whole_coverage, paired_whole_coverage, termini_coverage, s_stats.phage_plus_norm, s_stats.phage_minus_norm, inRawDArgs.paired, fParms.test_run) + + # Sequence + ExportCohesiveSeq(inRawDArgs.analysis_name, s_stats.ArtcohesiveSeq, s_stats.P_seqcoh, fParms.test_run) + ExportPhageSequence(inRawDArgs.analysis_name, s_stats.P_left, s_stats.P_right, refseq, s_stats.P_orient, s_stats.Redundant, s_stats.Mu_like, \ + s_stats.P_class, s_stats.P_seqcoh, fParms.test_run) + + # Report + # TODO: just pass s_stat as argument; it will be cleaner. + CreateReport(inRawDArgs.analysis_name, fParms.seed, s_stats.added_whole_coverage, draw, s_stats.Redundant, s_stats.P_left, s_stats.P_right, s_stats.Permuted, \ + s_stats.P_orient, s_stats.termini_coverage_norm_close, \ + s_stats.picMaxPlus_norm_close, s_stats.picMaxMinus_norm_close, s_stats.gen_len, inRawDArgs.tot_reads, s_stats.P_seqcoh, s_stats.phage_plus_norm, \ + s_stats.phage_minus_norm, s_stats.ArtPackmode, s_stats.termini, s_stats.forward, s_stats.reverse, s_stats.ArtOrient, s_stats.ArtcohesiveSeq, \ + s_stats.termini_coverage_close, s_stats.picMaxPlus_close, s_stats.picMaxMinus_close, \ + s_stats.picOUT_norm_forw, s_stats.picOUT_norm_rev, s_stats.picOUT_forw, s_stats.picOUT_rev, s_stats.lost_perc, s_stats.ave_whole_cov, \ + s_stats.R1, s_stats.R2, s_stats.R3, inRawDArgs.host, len(inDArgs.hostseq), host_whole_coverage, \ + s_stats.picMaxPlus_host, s_stats.picMaxMinus_host, fParms.surrounding, s_stats.drop_cov, inRawDArgs.paired, insert, phage_hybrid_coverage,\ + host_hybrid_coverage, s_stats.added_paired_whole_coverage, s_stats.Mu_like, fParms.test_run, s_stats.P_class, s_stats.P_type, s_stats.P_concat) + + if (inRawDArgs.nrt==True): # non regression tests, dump phage class name into file for later checking. + fnrt=open("nrt.txt","w") + fnrt.write(s_stats.P_class) + fnrt.close() + else: + # Test No Match + if len(no_match) == inDArgs.nbr_virome: + print("\n\nERROR: No reads match, please check your reference file.") + exit() + + # Report Resume + multiReport = SummaryReport(inRawDArgs.analysis_name, DR, no_match) + multiCohSeq = "" + multiPhageSeq = "" + multiWorkflow = "#analysis_name\tClass\tLeft\tPVal\tAdjPval\tRight\tPVal\tAdjPval\tType\tOrient\tCoverage\tComments\n" + + # No Match in workflow + if fParms.workflow: + for no_match_contig in no_match: + multiWorkflow += WorkflowReport(no_match_contig, "-", "-", "-", "-", "-", 0, 1) + + for DPC in DR: + for DC in DR[DPC]: + stat_dict = DR[DPC][DC] # splat this in everywhere + # Text report + if fParms.workflow: + multiWorkflow += WorkflowReport(phagename=DC, multi=1, **stat_dict) + # Sequence + idx_refseq=DR[DPC][DC]["idx_refseq_in_list"] + refseq=inDArgs.refseq_liste[idx_refseq] + multiCohSeq += ExportCohesiveSeq(DC, stat_dict["ArtcohesiveSeq"], stat_dict["P_seqcoh"], fParms.test_run, 1) + multiPhageSeq += ExportPhageSequence(DC, stat_dict["P_left"], stat_dict["P_right"], refseq, stat_dict["P_orient"], stat_dict["Redundant"], stat_dict["Mu_like"], stat_dict["P_class"], stat_dict["P_seqcoh"], fParms.test_run, 1) + + # Report + multiReport = CreateReport(phagename=DC, + draw=draw, + multi=1, + multiReport=multiReport, + **stat_dict) + + # Workflow + if not fParms.test: + if fParms.workflow: + filoutWorkflow = open(inRawDArgs.analysis_name + "_workflow.txt", "w") + filoutWorkflow.write(multiWorkflow) + filoutWorkflow.close() + + # Concatene Sequences + filoutCohSeq = open(inRawDArgs.analysis_name + "_cohesive-sequence.fasta", "w") + filoutCohSeq.write(multiCohSeq) + filoutCohSeq.close() + + filoutPhageSeq = open(inRawDArgs.analysis_name + "_sequence.fasta", "w") + filoutPhageSeq.write(multiPhageSeq) + filoutPhageSeq.close() + + # Concatene Report + doc = SimpleDocTemplate("%s_PhageTerm_report.pdf" % inRawDArgs.analysis_name, pagesize=letter, rightMargin=10,leftMargin=10, topMargin=5, bottomMargin=10) + doc.build(multiReport) + + + # Real virome run time + end_run = time.time() + virome_run = int(end_run-start_run) + print("\nThe fasta file tested contains: " + str(inDArgs.nbr_virome) + " contigs (mean length: " + str(inDArgs.mean_virome) + ")") + print("The run has taken (" + str(tParms.core) + " core used) : " + EstimateTime(virome_run) + "\n") + exit() + + + +if __name__ == '__main__': + main() + + + + + + + + + diff --git a/phageterm/SeqStats.py b/phageterm/SeqStats.py new file mode 100644 index 0000000000000000000000000000000000000000..e811c0588d9dbaadb1af7788c1ea52ed81081bba --- /dev/null +++ b/phageterm/SeqStats.py @@ -0,0 +1,93 @@ +##@file SeqStats.py +# +# Utility class to store results (statistics) for a sequence once all coverage results have been processed for it + +class SeqStats: + def __init__(self,P_class, P_left, P_right, P_type, P_orient, ave_whole_cov, phage_plus_norm, phage_minus_norm, ArtcohesiveSeq,\ + P_seqcoh, Redundant, Mu_like, added_whole_coverage, Permuted, termini_coverage_norm_close, picMaxPlus_norm_close, \ + picMaxMinus_norm_close, gen_len, termini_coverage_close,ArtPackmode, termini, forward, reverse, ArtOrient, \ + picMaxPlus_close, picMaxMinus_close, picOUT_norm_forw, picOUT_norm_rev, picOUT_forw, picOUT_rev, \ + lost_perc, R1, R2, R3, picMaxPlus_host, picMaxMinus_host, drop_cov, added_paired_whole_coverage, P_concat): + self.P_class=P_class # TODO: some information about the meaning of these fields would be welcome. + self.P_left=P_left + self.P_right=P_right + self.P_type=P_type + self.P_orient=P_orient + self.ave_whole_cov=ave_whole_cov + self.phage_plus_norm=phage_plus_norm + self.phage_minus_norm=phage_minus_norm + self.ArtcohesiveSeq=ArtcohesiveSeq + self.P_seqcoh=P_seqcoh + self.Redundant=Redundant + self.Mu_like=Mu_like + self.added_whole_coverage=added_whole_coverage + self.Permuted=Permuted + self.termini_coverage_norm_close=termini_coverage_norm_close + self.picMaxPlus_norm_close=picMaxPlus_norm_close + self.picMaxMinus_norm_close=picMaxMinus_norm_close + self.gen_len=gen_len + self.termini_coverage_close=termini_coverage_close + self.ArtPackmode=ArtPackmode + self.termini=termini + self.forward=forward + self.reverse=reverse + self.ArtOrient=ArtOrient + self.picMaxPlus_close=picMaxPlus_close + self.picMaxMinus_close=picMaxMinus_close + self.picOUT_norm_forw=picOUT_norm_forw + self.picOUT_norm_rev=picOUT_norm_rev + self.picOUT_forw=picOUT_forw + self.picOUT_rev=picOUT_rev + self.lost_perc=lost_perc + self.R1=R1 + self.R2=R2 + self.R3=R3 + self.picMaxPlus_host=picMaxPlus_host + self.picMaxMinus_host=picMaxMinus_host + self.drop_cov=drop_cov + self.added_paired_whole_coverage=added_paired_whole_coverage + self.P_concat=P_concat + + def toFile(self,ficname): #TODO: implement me + pass + +# types of the elements of the class +# <type 'str'> +# <type 'numpy.int64'> +# <type 'numpy.int64'> +# <type 'str'> +# <type 'str'> +# <type 'float'> +# <class 'pandas.core.frame.DataFrame'> +# <class 'pandas.core.frame.DataFrame'> +# <type 'str'> +# <type 'str'> +# <type 'int'> +# <type 'int'> +# <type 'list'> +# <type 'str'> +# <type 'list'> +# <type 'list'> +# <type 'list'> +# <type 'int'> +# <type 'list'> +# <type 'str'> +# <type 'str'> +# <type 'str'> +# <type 'str'> +# <type 'str'> +# <type 'list'> +# <type 'list'> +# <type 'list'> +# <type 'list'> +# <type 'list'> +# <type 'list'> +# <type 'float'> +# <type 'float'> +# <type 'float'> +# <type 'float'> +# <type 'str'> +# <type 'str'> +# <type 'list'> +# <type 'list'> +# <type 'str'> diff --git a/phageterm/__init__.py b/phageterm/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fc80254b619d488138a43632b617124a3d324702 --- /dev/null +++ b/phageterm/__init__.py @@ -0,0 +1 @@ +pass \ No newline at end of file diff --git a/phageterm/common_readsCoverage_processing.py b/phageterm/common_readsCoverage_processing.py new file mode 100644 index 0000000000000000000000000000000000000000..68d9dae328c73c5d9c91804cc0f1627b7dee6d4a --- /dev/null +++ b/phageterm/common_readsCoverage_processing.py @@ -0,0 +1,695 @@ +## @file common_readsCoverage_processing.py +# +# VL: here I gathered functions that are common to both GPU and mono/multi CPU versions. +# These functions are called after the mapping is done and all the counters are filled from mapping output results. +from __future__ import print_function + +from time import gmtime, strftime +import heapq +import itertools + +import numpy as np +import pandas as pd +# Statistics +from scipy import stats +from statsmodels.sandbox.stats.multicomp import multipletests +from sklearn.tree import DecisionTreeRegressor + +from utilities import checkReportTitle +from SeqStats import SeqStats + +import os + + +k_no_match_for_contig=1 + +def wholeCov(whole_coverage, gen_len): + """Calculate the coverage for whole read alignments and its average""" + if gen_len == 0: + return whole_coverage, 1 + total_cov = sum(whole_coverage[0]) + sum(whole_coverage[1]) + ave_whole_cov = float(total_cov) / (2 * float(gen_len)) + added_whole_coverage = [x + y for x, y in zip(whole_coverage[0], whole_coverage[1])] + return added_whole_coverage, ave_whole_cov + +def testwholeCov(added_whole_coverage, ave_whole_cov, test): + """Return information about whole coverage.""" + if test: + return "" + if ave_whole_cov < 50: + print("\nWARNING: average coverage is under the limit of the software (50)") + elif ave_whole_cov < 200: + print("\nWARNING: average coverage is low (<200), Li's method is presumably unreliable\n") + drop_cov = [] + start_pos = last_pos = count_pos = 0 + for pos in range(len(added_whole_coverage)): + if added_whole_coverage[pos] < (ave_whole_cov / 1.5): + if pos == last_pos+1: + count_pos += 1 + last_pos = pos + else: + if count_pos > 100: + drop_cov.append( (start_pos,last_pos+1) ) + last_pos = start_pos = pos + count_pos = 0 + last_pos = pos + return drop_cov + +def maxPaired(paired_whole_coverage, whole_coverage): + """Max paired coverage using whole coverage, counter edge effect with paired-ends.""" + pwc = paired_whole_coverage[:] + wc = whole_coverage[:] + for i in range(len(pwc)): + for j in range(len(pwc[i])): + if pwc[i][j] < wc[i][j]: + pwc[i][j] = wc[i][j] + return pwc + +def replaceNormMean(norm_cov): + """Replace the values not normalised due to covLimit by mean.""" + nc_sum = nc_count = 0 + for nc in norm_cov: + if nc > 0: + nc_sum += nc + nc_count += 1 + if nc_count == 0: + mean_nc = 0 + else: + mean_nc = nc_sum / float(nc_count) + for i in range(len(norm_cov)): + if norm_cov[i] == 0: + norm_cov[i] = mean_nc + return norm_cov, mean_nc + +def normCov(termini_coverage, whole_coverage, covLimit, edge): + """Return the termini_coverage normalised by the whole coverage (% of coverage due to first base).""" + normalised_coverage = [len(termini_coverage[0])*[0], len(termini_coverage[0])*[0]] + termini_len = len(termini_coverage[0]) + mean_nc = [1,1] + for i in range(len(termini_coverage)): + for j in range(len(termini_coverage[i])): + if j < edge or j > termini_len-edge: + continue + if whole_coverage[i][j] >= covLimit: + if float(whole_coverage[i][j]) != 0: + normalised_coverage[i][j] = float(termini_coverage[i][j]) / float(whole_coverage[i][j]) + else: + normalised_coverage[i][j] = 0 + else: + normalised_coverage[i][j] = 0 + normalised_coverage[i], mean_nc[i] = replaceNormMean(normalised_coverage[i]) + return normalised_coverage, mean_nc + +def RemoveEdge(tableau, edge): + return tableau[edge:-edge] + +def usedReads(coverage, tot_reads): + """Retrieve the number of reads after alignment and calculate the percentage of reads lost.""" + used_reads = sum(coverage[0]) + sum(coverage[1]) + lost_reads = tot_reads - used_reads + lost_perc = (float(tot_reads) - float(used_reads))/float(tot_reads) * 100 + return used_reads, lost_reads, lost_perc + +### PEAK functions +def picMax(coverage, nbr_pic): + """COORDINATES (coverage value, position) of the nbr_pic largest coverage value.""" + if coverage == [[],[]] or coverage == []: + return "", "", "" + picMaxPlus = heapq.nlargest(nbr_pic, zip(coverage[0], itertools.count())) + picMaxMinus = heapq.nlargest(nbr_pic, zip(coverage[1], itertools.count())) + TopFreqH = max(max(np.array(list(zip(*picMaxPlus))[0])), max(np.array(list(zip(*picMaxMinus))[0]))) + return picMaxPlus, picMaxMinus, TopFreqH + +def RemoveClosePicMax(picMax, gen_len, nbr_base): + """Remove peaks that are too close of the maximum (nbr_base around)""" + if nbr_base == 0: + return picMax[1:], [picMax[0]] + picMaxRC = picMax[:] + posMax = picMaxRC[0][1] + LimSup = posMax + nbr_base + LimInf = posMax - nbr_base + if LimSup < gen_len and LimInf >= 0: + PosOut = list(range(LimInf,LimSup)) + elif LimSup >= gen_len: + TurnSup = LimSup - gen_len + PosOut = list(range(posMax,gen_len))+list(range(0,TurnSup)) + list(range(LimInf,posMax)) + elif LimInf < 0: + TurnInf = gen_len + LimInf + PosOut = list(range(0,posMax))+list(range(TurnInf,gen_len)) + list(range(posMax,LimSup)) + picMaxOK = [] + picOUT = [] + for peaks in picMaxRC: + if peaks[1] not in PosOut: + picMaxOK.append(peaks) + else: + picOUT.append(peaks) + return picMaxOK, picOUT + +def addClosePic(picList, picClose, norm = 0): + """Add coverage value of close peaks to the top peak. Remove picClose in picList if exist.""" + if norm: + if picClose[0][0] >= 0.5: + return picList, [picClose[0]] + picListOK = picList[:] + cov_add = 0 + for cov in picClose: + cov_add += cov[0] + picListOK[cov[1]] = 0.01 + picListOK[picClose[0][1]] = cov_add + return picListOK, picClose + +def remove_pics(arr,n): + '''Removes the n highest values from the array''' + arr=np.array(arr) + pic_pos=arr.argsort()[-n:][::-1] + arr2=np.delete(arr,pic_pos) + return arr2 + +def gamma(X): + """Apply a gamma distribution.""" + X = np.array(X, dtype=np.int64) + v = remove_pics(X, 3) + + dist_max = float(max(v)) + if dist_max == 0: + return np.array([1.00] * len(X)) + + actual = np.bincount(v) + fit_alpha, fit_loc, fit_beta = stats.gamma.fit(v) + expected = stats.gamma.pdf(np.arange(0, dist_max + 1, 1), fit_alpha, loc=fit_loc, scale=fit_beta) * sum(actual) + + return stats.gamma.pdf(X, fit_alpha, loc=fit_loc, scale=fit_beta) + + +# STATISTICS +def test_pics_decision_tree(whole_coverage, termini_coverage, termini_coverage_norm, termini_coverage_norm_close): + """Fits a gamma distribution using a decision tree.""" + L = len(whole_coverage[0]) + res = pd.DataFrame({"Position": np.array(range(L)) + 1, "termini_plus": termini_coverage[0], + "SPC_norm_plus": termini_coverage_norm[0], "SPC_norm_minus": termini_coverage_norm[1], + "SPC_norm_plus_close": termini_coverage_norm_close[0], + "SPC_norm_minus_close": termini_coverage_norm_close[1], "termini_minus": termini_coverage[1], + "cov_plus": whole_coverage[0], "cov_minus": whole_coverage[1]}) + + res["cov"] = res["cov_plus"].values + res["cov_minus"].values + + res["R_plus"] = list(map(float, termini_coverage[0])) // np.mean(termini_coverage[0]) + res["R_minus"] = list(map(float, termini_coverage[1])) // np.mean(termini_coverage[1]) + + regr = DecisionTreeRegressor(max_depth=3, min_samples_leaf=100) + X = np.arange(L) + X = X[:, np.newaxis] + y = res["cov"].values + regr.fit(X, y) + + # Predict + y_1 = regr.predict(X) + res["covnode"] = y_1 + covnodes = np.unique(y_1) + thres = np.mean(whole_coverage[0]) / 2 + covnodes = [n for n in covnodes if n > thres] + + for node in covnodes: + X = res[res["covnode"] == node]["termini_plus"].values + res.loc[res["covnode"] == node, "pval_plus"] = gamma(X) + X = res[res["covnode"] == node]["termini_minus"].values + res.loc[res["covnode"] == node, "pval_minus"] = gamma(X) + + res.loc[res.pval_plus > 1, 'pval_plus'] = 1.00 + res.loc[res.pval_minus > 1, 'pval_minus'] = 1.00 + res = res.fillna(1.00) + + res['pval_plus_adj'] = multipletests(res["pval_plus"].values, alpha=0.01, method="bonferroni")[1] + res['pval_minus_adj'] = multipletests(res["pval_minus"].values, alpha=0.01, method="bonferroni")[1] + + res = res.fillna(1.00) + + res_plus = pd.DataFrame( + {"Position": res['Position'], "SPC_std": res['SPC_norm_plus'] * 100, "SPC": res['SPC_norm_plus_close'] * 100, + "pval_gamma": res['pval_plus'], "pval_gamma_adj": res['pval_plus_adj']}) + res_minus = pd.DataFrame( + {"Position": res['Position'], "SPC_std": res['SPC_norm_minus'] * 100, "SPC": res['SPC_norm_minus_close'] * 100, + "pval_gamma": res['pval_minus'], "pval_gamma_adj": res['pval_minus_adj']}) + + res_plus.sort_values("SPC", ascending=False, inplace=True) + res_minus.sort_values("SPC", ascending=False, inplace=True) + + res_plus.reset_index(drop=True, inplace=True) + res_minus.reset_index(drop=True, inplace=True) + + return res, res_plus, res_minus + +### SCORING functions +# Li's methodology +def ratioR1(TopFreqH, used_reads, gen_len): + """Calculate the ratio H/A (R1) = highest frequency/average frequency. For Li's methodology.""" + AveFreq = (float(used_reads)/float(gen_len)/2) + if AveFreq == 0: + return 0, 0 + R1 = float(TopFreqH)/float(AveFreq) + return R1, AveFreq + +def ratioR(picMax): + """Calculate the T1/T2 = Top 1st frequency/Second higher frequency. For Li's methodology.""" + T1 = picMax[0][0] + T2 = max(1,picMax[1][0]) + R = float(T1)/float(T2) + return round(R) + + +def packMode(R1, R2, R3): + """Make the prognosis about the phage packaging mode and termini type. For Li's methodology.""" + packmode = "OTHER" + termini = "" + forward = "" + reverse = "" + + if R1 < 30: + termini = "Absence" + if R2 < 3: + forward = "No Obvious Termini" + if R3 < 3: + reverse = "No Obvious Termini" + elif R1 > 100: + termini = "Fixed" + if R2 < 3: + forward = "Multiple-Pref. Term." + if R3 < 3: + reverse = "Multiple-Pref. Term." + else: + termini = "Preferred" + if R2 < 3: + forward = "Multiple-Pref. Term." + if R3 < 3: + reverse = "Multiple-Pref. Term." + + if R2 >= 3: + forward = "Obvious Termini" + if R3 >= 3: + reverse = "Obvious Termini" + + if R2 >= 3 and R3 >= 3: + packmode = "COS" + if R2 >= 3 and R3 < 3: + packmode = "PAC" + if R2 < 3 and R3 >= 3: + packmode = "PAC" + return packmode, termini, forward, reverse + +### PHAGE Information +def orientation(picMaxPlus, picMaxMinus): + """Return phage termini orientation.""" + if not picMaxPlus and not picMaxMinus: + return "NA" + if picMaxPlus and not picMaxMinus: + return "Forward" + if not picMaxPlus and picMaxMinus: + return "Reverse" + if picMaxPlus and picMaxMinus: + if picMaxPlus[0][0] > picMaxMinus[0][0]: + return "Forward" + elif picMaxMinus[0][0] > picMaxPlus[0][0]: + return "Reverse" + elif picMaxMinus[0][0] == picMaxPlus[0][0]: + return "NA" + + +def typeCOS(PosPlus, PosMinus, nbr_lim): + """ Return type of COS sequence.""" + if PosPlus < PosMinus and abs(PosPlus-PosMinus) < nbr_lim: + return "COS (5')", "Lambda" + else: + return "COS (3')", "HK97" + +def sequenceCohesive(Packmode, refseq, picMaxPlus, picMaxMinus, nbr_lim): + """Return cohesive sequence for COS phages.""" + if Packmode != 'COS': + return '', Packmode + PosPlus = picMaxPlus[0][1] + PosMinus = picMaxMinus[0][1] + + SC_class, SC_type = typeCOS(PosPlus, PosMinus, nbr_lim) + + if SC_class == "COS (5')": + if abs(PosMinus - PosPlus) < nbr_lim: + seqcoh = refseq[min(PosPlus, PosMinus):max(PosPlus, PosMinus) + 1] + return seqcoh, Packmode + else: + seqcoh = refseq[max(PosPlus, PosMinus) + 1:] + refseq[:min(PosPlus, PosMinus)] + return seqcoh, Packmode + + elif SC_class == "COS (3')": + if abs(PosMinus - PosPlus) < nbr_lim: + seqcoh = refseq[min(PosPlus, PosMinus) + 1:max(PosPlus, PosMinus)] + return seqcoh, Packmode + else: + seqcoh = refseq[max(PosPlus, PosMinus) + 1:] + refseq[:min(PosPlus, PosMinus)] + return seqcoh, Packmode + else: + return '', Packmode + +def selectSignificant(table, pvalue, limit): + """Return significant peaks over a limit""" + table_pvalue = table.loc[lambda df: df.pval_gamma_adj < pvalue, :] + table_pvalue_limit = table_pvalue.loc[lambda df: df.SPC > limit, :] + table_pvalue_limit.reset_index(drop=True, inplace=True) + return table_pvalue_limit + +def testMu(paired, list_hybrid, gen_len, used_reads, seed, insert, phage_hybrid_coverage, Mu_threshold, hostseq): + """Return Mu if enough hybrid reads compared to theory.""" + if hostseq == "": + return 0, -1, -1, "" + if paired != "" and len(insert) != 0: + insert_mean = sum(insert) / len(insert) + else: + insert_mean = max(100, seed+10) + Mu_limit = ((insert_mean - seed) / float(gen_len)) * used_reads/2 + test = 0 + Mu_term_plus = "Random" + Mu_term_minus = "Random" + picMaxPlus_Mu, picMaxMinus_Mu, TopFreqH_phage_hybrid = picMax(phage_hybrid_coverage, 1) + picMaxPlus_Mu = picMaxPlus_Mu[0][1] + picMaxMinus_Mu = picMaxMinus_Mu[0][1] + + # Orientation + if list_hybrid[0] > list_hybrid[1]: + P_orient = "Forward" + elif list_hybrid[1] > list_hybrid[0]: + P_orient = "Reverse" + else: + P_orient = "" + + # Termini + if list_hybrid[0] > ( Mu_limit * Mu_threshold ): + test = 1 + pos_to_check = range(picMaxPlus_Mu+1,gen_len) + range(0,100) + for pos in pos_to_check: + if phage_hybrid_coverage[0][pos] >= max(1,phage_hybrid_coverage[0][picMaxPlus_Mu]/4): + Mu_term_plus = pos + picMaxPlus_Mu = pos + else: + Mu_term_plus = pos + break + # Reverse + if list_hybrid[1] > ( Mu_limit * Mu_threshold ): + test = 1 + pos_to_check = range(0,picMaxMinus_Mu)[::-1] + range(gen_len-100,gen_len)[::-1] + for pos in pos_to_check: + if phage_hybrid_coverage[1][pos] >= max(1,phage_hybrid_coverage[1][picMaxMinus_Mu]/4): + Mu_term_minus = pos + picMaxMinus_Mu = pos + else: + Mu_term_minus = pos + break + return test, Mu_term_plus, Mu_term_minus, P_orient + +### DECISION Process +def decisionProcess(plus_significant, minus_significant, limit_fixed, gen_len, paired, insert, R1, list_hybrid, + used_reads, seed, phage_hybrid_coverage, Mu_threshold, refseq, hostseq): + """ .""" + P_orient = "NA" + P_seqcoh = "" + P_concat = "" + P_type = "-" + Mu_like = 0 + P_left = "Random" + P_right = "Random" + # 2 peaks sig. + if not plus_significant.empty and not minus_significant.empty: + # Multiple + if (len(plus_significant["SPC"]) > 1 or len(minus_significant["SPC"]) > 1): + if not (plus_significant["SPC"][0] > limit_fixed or minus_significant["SPC"][0] > limit_fixed): + Redundant = 1 + P_left = "Multiple" + P_right = "Multiple" + Permuted = "Yes" + P_class = "-" + P_type = "-" + return Redundant, Permuted, P_class, P_type, P_seqcoh, P_concat, P_orient, P_left, P_right, Mu_like + + dist_peak = abs(plus_significant['Position'][0] - minus_significant['Position'][0]) + dist_peak_over = abs(abs(plus_significant['Position'][0] - minus_significant['Position'][0]) - gen_len) + P_left = plus_significant['Position'][0] + P_right = minus_significant['Position'][0] + # COS + if (dist_peak <= 2) or (dist_peak_over <= 2): + Redundant = 0 + Permuted = "No" + P_class = "COS" + P_type = "-" + elif (dist_peak < 20 and dist_peak > 2) or (dist_peak_over < 20 and dist_peak_over > 2): + Redundant = 0 + Permuted = "No" + P_class, P_type = typeCOS(plus_significant["Position"][0], minus_significant["Position"][0], gen_len / 2) + P_seqcoh, packstat = sequenceCohesive("COS", refseq, [ + ((plus_significant["SPC"][0]), (plus_significant["Position"][0]) - 1)], [((minus_significant["SPC"][0]), + ( + minus_significant["Position"][ + 0]) - 1)], gen_len / 2) + # DTR + elif (dist_peak <= 1000) or (dist_peak_over <= 1000): + Redundant = 1 + Permuted = "No" + P_class = "DTR (short)" + P_type = "T7" + P_seqcoh, packstat = sequenceCohesive("COS", refseq, [ + ((plus_significant["SPC"][0]), (plus_significant["Position"][0]) - 1)], [((minus_significant["SPC"][0]), + ( + minus_significant["Position"][ + 0]) - 1)], gen_len / 2) + elif (dist_peak <= 0.1 * gen_len) or (dist_peak_over <= 0.1 * gen_len): + Redundant = 1 + Permuted = "No" + P_class = "DTR (long)" + P_type = "T5" + P_seqcoh, packstat = sequenceCohesive("COS", refseq, [ + ((plus_significant["SPC"][0]), (plus_significant["Position"][0]) - 1)], [((minus_significant["SPC"][0]), + ( + minus_significant["Position"][ + 0]) - 1)], gen_len / 2) + else: + Redundant = 1 + Permuted = "No" + P_class = "-" + P_type = "-" + # 1 peak sig. + elif not plus_significant.empty and minus_significant.empty or plus_significant.empty and not minus_significant.empty: + Redundant = 1 + Permuted = "Yes" + P_class = "Headful (pac)" + P_type = "P1" + if paired != "": + if R1 == 0 or len(insert) == 0: + P_concat = 1 + else: + P_concat = round((sum(insert) / len(insert)) / R1) - 1 + if not plus_significant.empty: + P_left = plus_significant['Position'][0] + P_right = "Distributed" + P_orient = "Forward" + else: + P_left = "Distributed" + P_right = minus_significant['Position'][0] + P_orient = "Reverse" + # 0 peak sig. + elif plus_significant.empty and minus_significant.empty: + Mu_like, Mu_term_plus, Mu_term_minus, P_orient = testMu(paired, list_hybrid, gen_len, used_reads, seed, insert, + phage_hybrid_coverage, Mu_threshold, hostseq) + if Mu_like: + Redundant = 0 + Permuted = "No" + P_class = "Mu-like" + P_type = "Mu" + P_left = Mu_term_plus + P_right = Mu_term_minus + else: + Redundant = 1 + Permuted = "Yes" + P_class = "-" + P_type = "-" + + return Redundant, Permuted, P_class, P_type, P_seqcoh, P_concat, P_orient, P_left, P_right, Mu_like + +# Processes coverage values for a sequence. +def processCovValuesForSeq(refseq,hostseq,refseq_name,refseq_liste,seed,analysis_name,tot_reads,results_pos,test_run, paired,edge,host,test, surrounding,limit_preferred,limit_fixed,Mu_threshold,\ + termini_coverage,whole_coverage,paired_whole_coverage,phage_hybrid_coverage,host_hybrid_coverage, host_whole_coverage,insert,list_hybrid,reads_tested,DR,DR_path=None): + + print("\n\nFinished calculating coverage values, the remainder should be completed rapidly\n", + strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime())) + + # WHOLE Coverage : Average, Maximum and Minimum + added_whole_coverage, ave_whole_cov = wholeCov(whole_coverage, len(refseq)) + added_paired_whole_coverage, ave_paired_whole_cov = wholeCov(paired_whole_coverage, len(refseq)) + added_host_whole_coverage, ave_host_whole_cov = wholeCov(host_whole_coverage, len(hostseq)) + + drop_cov = testwholeCov(added_whole_coverage, ave_whole_cov, test_run) + + # NORM pic by whole coverage (1 base) + if paired != "": + #paired_whole_coverage_test = maxPaired(paired_whole_coverage, whole_coverage) + termini_coverage_norm, mean_nc = normCov(termini_coverage, paired_whole_coverage, max(10, ave_whole_cov / 1.5), + edge) + else: + termini_coverage_norm, mean_nc = normCov(termini_coverage, whole_coverage, max(10, ave_whole_cov / 1.5), edge) + + # REMOVE edge + termini_coverage[0] = RemoveEdge(termini_coverage[0], edge) + termini_coverage[1] = RemoveEdge(termini_coverage[1], edge) + termini_coverage_norm[0] = RemoveEdge(termini_coverage_norm[0], edge) + termini_coverage_norm[1] = RemoveEdge(termini_coverage_norm[1], edge) + whole_coverage[0] = RemoveEdge(whole_coverage[0], edge) + whole_coverage[1] = RemoveEdge(whole_coverage[1], edge) + paired_whole_coverage[0] = RemoveEdge(paired_whole_coverage[0], edge) + paired_whole_coverage[1] = RemoveEdge(paired_whole_coverage[1], edge) + added_whole_coverage = RemoveEdge(added_whole_coverage, edge) + added_paired_whole_coverage = RemoveEdge(added_paired_whole_coverage, edge) + added_host_whole_coverage = RemoveEdge(added_host_whole_coverage, edge) + phage_hybrid_coverage[0] = RemoveEdge(phage_hybrid_coverage[0], edge) + phage_hybrid_coverage[1] = RemoveEdge(phage_hybrid_coverage[1], edge) + host_whole_coverage[0] = RemoveEdge(host_whole_coverage[0], edge) + host_whole_coverage[1] = RemoveEdge(host_whole_coverage[1], edge) + host_hybrid_coverage[0] = RemoveEdge(host_hybrid_coverage[0], edge) + host_hybrid_coverage[1] = RemoveEdge(host_hybrid_coverage[1], edge) + refseq = RemoveEdge(refseq, edge) + if host != "": + hostseq = RemoveEdge(hostseq, edge) + gen_len = len(refseq) + host_len = len(hostseq) + if test == "DL": + gen_len = 100000 + + # READS Total, Used and Lost + used_reads, lost_reads, lost_perc = usedReads(termini_coverage, reads_tested) + + # PIC Max + picMaxPlus, picMaxMinus, TopFreqH = picMax(termini_coverage, 5) + picMaxPlus_norm, picMaxMinus_norm, TopFreqH_norm = picMax(termini_coverage_norm, 5) + picMaxPlus_host, picMaxMinus_host, TopFreqH_host = picMax(host_whole_coverage, 5) + + ### ANALYSIS + + ## Close Peaks + picMaxPlus, picOUT_forw = RemoveClosePicMax(picMaxPlus, gen_len, surrounding) + picMaxMinus, picOUT_rev = RemoveClosePicMax(picMaxMinus, gen_len, surrounding) + picMaxPlus_norm, picOUT_norm_forw = RemoveClosePicMax(picMaxPlus_norm, gen_len, surrounding) + picMaxMinus_norm, picOUT_norm_rev = RemoveClosePicMax(picMaxMinus_norm, gen_len, surrounding) + + termini_coverage_close = termini_coverage[:] + termini_coverage_close[0], picOUT_forw = addClosePic(termini_coverage[0], picOUT_forw) + termini_coverage_close[1], picOUT_rev = addClosePic(termini_coverage[1], picOUT_rev) + + termini_coverage_norm_close = termini_coverage_norm[:] + termini_coverage_norm_close[0], picOUT_norm_forw = addClosePic(termini_coverage_norm[0], picOUT_norm_forw, 1) + termini_coverage_norm_close[1], picOUT_norm_rev = addClosePic(termini_coverage_norm[1], picOUT_norm_rev, 1) + + ## Statistical Analysis + picMaxPlus_norm_close, picMaxMinus_norm_close, TopFreqH_norm = picMax(termini_coverage_norm_close, 5) + phage_norm, phage_plus_norm, phage_minus_norm = test_pics_decision_tree(paired_whole_coverage, termini_coverage, + termini_coverage_norm, + termini_coverage_norm_close) + # VL: comment that since the 2 different conditions lead to the execution of the same piece of code... + # if paired != "": + # phage_norm, phage_plus_norm, phage_minus_norm = test_pics_decision_tree(paired_whole_coverage, termini_coverage, + # termini_coverage_norm, + # termini_coverage_norm_close) + # else: + # phage_norm, phage_plus_norm, phage_minus_norm = test_pics_decision_tree(whole_coverage, termini_coverage, + # termini_coverage_norm, + # termini_coverage_norm_close) + + + ## LI Analysis + picMaxPlus_close, picMaxMinus_close, TopFreqH = picMax(termini_coverage_close, 5) + + R1, AveFreq = ratioR1(TopFreqH, used_reads, gen_len) + R2 = ratioR(picMaxPlus_close) + R3 = ratioR(picMaxMinus_close) + + ArtPackmode, termini, forward, reverse = packMode(R1, R2, R3) + ArtOrient = orientation(picMaxPlus_close, picMaxMinus_close) + ArtcohesiveSeq, ArtPackmode = sequenceCohesive(ArtPackmode, refseq, picMaxPlus_close, picMaxMinus_close, + gen_len / 2) + + ### DECISION Process + + # PEAKS Significativity + plus_significant = selectSignificant(phage_plus_norm, 1.0 / gen_len, limit_preferred) + minus_significant = selectSignificant(phage_minus_norm, 1.0 / gen_len, limit_preferred) + + # DECISION + Redundant, Permuted, P_class, P_type, P_seqcoh, P_concat, P_orient, P_left, P_right, Mu_like = decisionProcess( + plus_significant, minus_significant, limit_fixed, gen_len, paired, insert, R1, list_hybrid, used_reads, + seed, phage_hybrid_coverage, Mu_threshold, refseq, hostseq) + + ### Results + if len(refseq_liste) != 1: + if P_class == "-": + if P_left == "Random" and P_right == "Random": + P_class = "UNKNOWN" + else: + P_class = "NEW" + DR[P_class][checkReportTitle(refseq_name[results_pos])] = {"analysis_name": analysis_name, "seed": seed, + "added_whole_coverage": added_whole_coverage, + "Redundant": Redundant, "P_left": P_left, + "P_right": P_right, "Permuted": Permuted, + "P_orient": P_orient, + "termini_coverage_norm_close": termini_coverage_norm_close, + "picMaxPlus_norm_close": picMaxPlus_norm_close, + "picMaxMinus_norm_close": picMaxMinus_norm_close, + "gen_len": gen_len, "tot_reads": tot_reads, + "P_seqcoh": P_seqcoh, + "phage_plus_norm": phage_plus_norm, + "phage_minus_norm": phage_minus_norm, + "ArtPackmode": ArtPackmode, "termini": termini, + "forward": forward, "reverse": reverse, + "ArtOrient": ArtOrient, + "ArtcohesiveSeq": ArtcohesiveSeq, + "termini_coverage_close": termini_coverage_close, + "picMaxPlus_close": picMaxPlus_close, + "picMaxMinus_close": picMaxMinus_close, + "picOUT_norm_forw": picOUT_norm_forw, + "picOUT_norm_rev": picOUT_norm_rev, + "picOUT_forw": picOUT_forw, + "picOUT_rev": picOUT_rev, "lost_perc": lost_perc, + "ave_whole_cov": ave_whole_cov, "R1": R1, "R2": R2, + "R3": R3, "host": host, "host_len": host_len, + "host_whole_coverage": host_whole_coverage, + "picMaxPlus_host": picMaxPlus_host, + "picMaxMinus_host": picMaxMinus_host, + "surrounding": surrounding, "drop_cov": drop_cov, + "paired": paired, "insert": insert, + "phage_hybrid_coverage": phage_hybrid_coverage, + "host_hybrid_coverage": host_hybrid_coverage, + "added_paired_whole_coverage": added_paired_whole_coverage, + "Mu_like": Mu_like, "test_run": test_run, + "P_class": P_class, "P_type": P_type, + "P_concat": P_concat, + "idx_refseq_in_list": results_pos} + + if DR_path!=None: # multi machine cluster mode. + strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) + P_class_dir=os.path.join(DR_path,P_class) + if os.path.exists(P_class_dir): + if not os.path.isdir(P_class_dir): + raise RuntimeError("P_class_dir is not a directory") + else: + os.mkdir(P_class_dir) + import pickle + fic_name=os.path.join(P_class_dir,checkReportTitle(refseq_name[results_pos])) + items_to_save=(analysis_name,seed,added_whole_coverage,Redundant,P_left,P_right,Permuted, \ + P_orient,termini_coverage_norm_close,picMaxPlus_norm_close,picMaxMinus_norm_close, \ + gen_len,tot_reads,P_seqcoh,phage_plus_norm,phage_minus_norm,ArtPackmode,termini, \ + forward,reverse,ArtOrient,ArtcohesiveSeq,termini_coverage_close,picMaxPlus_close, \ + picMaxMinus_close,picOUT_norm_forw,picOUT_norm_rev,picOUT_forw,picOUT_rev, \ + lost_perc,ave_whole_cov,R1,R2,R3,host,host_len,host_whole_coverage,picMaxPlus_host, \ + picMaxMinus_host,surrounding,drop_cov,paired, insert,phage_hybrid_coverage, \ + host_hybrid_coverage,added_paired_whole_coverage,Mu_like,test_run,P_class,P_type,\ + P_concat,results_pos) + with open(fic_name,'wb') as f: + pickle.dump(items_to_save,f) + f.close() + + return SeqStats(P_class, P_left, P_right, P_type, P_orient, ave_whole_cov, phage_plus_norm, phage_minus_norm, ArtcohesiveSeq, P_seqcoh, Redundant, Mu_like, \ + added_whole_coverage, Permuted, termini_coverage_norm_close, picMaxPlus_norm_close, picMaxMinus_norm_close, gen_len, termini_coverage_close, \ + ArtPackmode, termini, forward, reverse, ArtOrient, picMaxPlus_close, picMaxMinus_close, picOUT_norm_forw, picOUT_norm_rev, picOUT_forw, picOUT_rev, \ + lost_perc, R1, R2, R3, picMaxPlus_host, picMaxMinus_host, drop_cov, added_paired_whole_coverage, P_concat) diff --git a/phageterm/debug_utils.py b/phageterm/debug_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..edb489c8aee8b18b01b802b32a16016be0cbc0bf --- /dev/null +++ b/phageterm/debug_utils.py @@ -0,0 +1,87 @@ +##@file debug_utils.py +# +# Contains utility classes for debugging and testing. +# +#@author vegrand@pasteur.fr + +## Utility class for debugging. +# +# Contains the mapping results for 1 read. +# For map_start,map_end,map_rcpl_start,map_rcpl_stop a value of 0 means that no match was found and a value of 1 means that a match was found. +class ReadMappingInfo: + ## + # + # @param idx_read Number of the read in the processing (reads are processed in the same order as they are found in the fasta file). + # @param map_start Read maps at its beginning (1rts seed characters) or not. + # @param map_end Read maps at is end (last seed characters) or not. + # @param map_rcpl_start Start of reverse complement maps or not. + # @param map_rcpl_end End of reverse complement maps or not. + def __init__(self,idx_read,map_start,map_end,map_rcpl_start,map_rcpl_stop): + self.idx_read=idx_read + self.map_start=map_start + self.map_end=map_end + self.map_rcpl_start=map_rcpl_start + self.map_rcpl_end=map_rcpl_stop + + pass + + + + +## Aim of this class is to give the ability to compare the results of readsCoverage (oriinal CPU version) and readsCoverageGPU. +class ReadMappingInfoLogger: + ## + # + # @param cnt_read count only reads that were not rejected (readlen >= seed) + # @param l_rm_info list of ReadMappingInfo objects. + # @param cur_r_info ReadMappingInfo for read that s currently being processed. + def __init__(self): + self.cnt_read = 0 # count only reads that were not rejected (readlen >= seed) + self.l_rm_info=[] + self.cur_r_info=None + self.rw_lst = [] + + def add_rw(self, rw): + self.rw_lst.append(rw) + + def newRmInfo(self,numR_in_file=None): + if self.cur_r_info!=None: + self.l_rm_info.append(self.cur_r_info) + if (numR_in_file!=None): + idx_read=numR_in_file + else: + idx_read=self.cnt_read + self.cur_r_info=ReadMappingInfo(idx_read,0,0,0,0) + self.cnt_read+=1 + + ## Records the mapping information (does it map or not and where) for the read that is currently being processed. + def rMatch(self,akey): + if self.cur_r_info == None: + raise RuntimeError("Call newRmInfo() before calling rsMatch()") + if akey=="mstart": + self.cur_r_info.map_start = 1 + elif akey=="mend": + self.cur_r_info.map_end=1 + elif akey=="mrcplstart": + self.cur_r_info.map_rcpl_start=1 + elif akey=="mrcplend": + self.cur_r_info.map_rcpl_end=1 + else: + raise RuntimeError("invalid key to indicate where read matches sequence") + + def getMatchInfoList(self): + if self.cur_r_info != None: + self.l_rm_info.append(self.cur_r_info) + return self.l_rm_info + + ## Flushes all ReadMappingInfo to the given file. + def flush(self,filename): + self.f_debug = open(filename, "w") + if self.cur_r_info != None: + self.l_rm_info.append(self.cur_r_info) + self.f_debug.write(self.cnt_read) + for elm in self.l_rm_info: + my_str=str(elm.idx_read)+"|"+str(elm.map_start)+"|"+str(elm.map_end)+"|"+str(elm.map_rcpl_start)+"|"+str(elm.map_rcpl_stop) + self.f_debug.write(my_str) + self.f_debug.close() + diff --git a/phageterm/functions_PhageTerm.py b/phageterm/functions_PhageTerm.py new file mode 100644 index 0000000000000000000000000000000000000000..6c1f6d045fa013b6b53ff881b70fa2b56af11f55 --- /dev/null +++ b/phageterm/functions_PhageTerm.py @@ -0,0 +1,1520 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- +## @file functions_PhageTerm.py +# +# This file is a part of PhageTerm software +# A tool to determine phage termini and packaging strategy +# and other useful informations using raw sequencing reads. +# (This programs works with sequencing reads from a randomly +# sheared DNA library preparations as Illumina TruSeq paired-end or similar) +# +# ---------------------------------------------------------------------- +# Copyright (C) 2017 Julian Garneau +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# <http://www.gnu.org/licenses/gpl-3.0.html> +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# ---------------------------------------------------------------------- +# +# @author Julian Garneau <julian.garneau@usherbrooke.ca> +# @author Marc Monot <marc.monot@pasteur.fr> +# @author David Bikard <david.bikard@pasteur.fr> + + +### PYTHON Module +# Base +from __future__ import print_function + +import sys + +import os + + +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt +from matplotlib import patches +from matplotlib.path import Path + +import numpy as np +import pandas as pd + +# String +#import cStringIO +import io +import gzip + +# PDF report building +import time +from reportlab.lib.enums import TA_JUSTIFY, TA_CENTER, TA_LEFT, TA_RIGHT +from reportlab.lib.pagesizes import letter, landscape +from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image, Table, TableStyle, PageBreak +from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle +from reportlab.lib.units import inch +from reportlab.lib import colors +from reportlab.lib.utils import ImageReader + +from utilities import reverseComplement,hybridCoverage,applyCoverage,correctEdge +from common_readsCoverage_processing import picMax +from readsCoverage_res import RCRes, RCCheckpoint_handler,RCWorkingS + +### UTILITY function +def chunks(l, n): + """Yield n successive chunks from l.""" + newn = int(1.0 * len(l) / n + 0.5) + for i in range(0, n-1): + yield l[i*newn:i*newn+newn] + yield l[n*newn-newn:] + +## +# Initializes working structure for readsCoverage +def init_ws(p_res,refseq,hostseq): + gen_len = len(refseq) + host_len = len(hostseq) + k = count_line = 0 + if p_res==None: + termini_coverage = np.array([gen_len*[0], gen_len*[0]]) + whole_coverage = np.array([gen_len*[0], gen_len*[0]]) + paired_whole_coverage = np.array([gen_len*[0], gen_len*[0]]) + phage_hybrid_coverage = np.array([gen_len*[0], gen_len*[0]]) + host_hybrid_coverage = np.array([host_len*[0], host_len*[0]]) + host_whole_coverage = np.array([host_len*[0], host_len*[0]]) + list_hybrid = np.array([0,0]) + insert = [] + paired_missmatch = 0 + read_match = 0 + else: + termini_coverage=p_res.interm_res.termini_coverage + whole_coverage=p_res.interm_res.whole_coverage + paired_whole_coverage=p_res.interm_res.paired_whole_coverage + phage_hybrid_coverage=p_res.interm_res.phage_hybrid_coverage + host_hybrid_coverage=p_res.interm_res.host_hybrid_coverage + host_whole_coverage=p_res.interm_res.host_whole_coverage + list_hybrid=p_res.interm_res.list_hybrid + insert=p_res.interm_res.insert + paired_missmatch=p_res.interm_res.paired_mismatch + k=int(p_res.interm_res.reads_tested) + #count_line=p_res.count_line-1 # do that because readsCoverage will start by incrementing it of 1 + read_match=p_res.read_match + return gen_len,host_len,termini_coverage,whole_coverage,paired_whole_coverage,phage_hybrid_coverage,host_hybrid_coverage, \ + host_whole_coverage,list_hybrid,insert,paired_missmatch,k,count_line,read_match #TODO refactor that. + + + +## COVERAGE Starting and Whole function +# +# VL: use debug mode to keep track of what reads matched and what reads didn't. For those who matched, want to know if it is at the beginning of the read or at the end or if it is its reverse complement. +# My aim is to compare the results with those of the GPU version. +def readsCoverage(inRawDArgs,refseq,inDArgs,fParms,return_dict, core_id,line_start,line_end,tParms,\ + chk_handler,idx_refseq,logger=None): + """Calculate whole coverage and first base coverage. """ + + p_res=chk_handler.load(core_id,idx_refseq) + gen_len,host_len,termini_coverage, whole_coverage, paired_whole_coverage, phage_hybrid_coverage, host_hybrid_coverage,\ + host_whole_coverage, list_hybrid, insert, paired_missmatch, k, count_line, read_match=init_ws(p_res, refseq, inDArgs.hostseq) + if logger!=None: + logger.add_rw(p_res) + test_read_seq = match = 0 + # Timer + if core_id == (tParms.core-1): + sys.stdout.write(" 0.0 %") + sys.stdout.flush() + + # Mapping + #filin = open(inRawDArgs.fastq) + filin = gzip.open(inRawDArgs.fastq, "rt") if inRawDArgs.fastq.endswith(".gz") else open(inRawDArgs.fastq) + line = filin.readline() + + if inRawDArgs.paired != "": + #filin_paired = open(inRawDArgs.paired) + filin_paired = gzip.open(inRawDArgs.paired, "rt") if inRawDArgs.paired.endswith(".gz") else open(inRawDArgs.paired) + line_paired = filin_paired.readline() + count_line_tmp=0 + while line and count_line!=count_line_tmp: + count_line_tmp += 1 + line = filin.readline() + while line: + count_line+=1 + if count_line//4 <= line_start: + test_read_seq = 0 + if count_line//4 > line_end: + break + + if test_read_seq: + k += 1 + # Read sequence + read = line.split("\n")[0].split("\r")[0] + line = filin.readline() + + if inRawDArgs.paired != "": + read_paired = line_paired.split("\n")[0].split("\r")[0] + line_paired = filin_paired.readline() + + readlen = len(read) + if readlen < fParms.seed: + if logger!=None: + print("CPU rejecting read",k) + continue + corlen = readlen-fParms.seed + + if logger!=None: + print("CPU processing read: ",k,read, reverseComplement(read)) + logger.newRmInfo(k) + + ### Match sense + (multiple, random pick one) + #print("read[:fParms.seed]=",read[:fParms.seed]) + matchPplus_start, matchPplus_end = applyCoverage(read[:fParms.seed], refseq) + + ## Phage + if matchPplus_start != -1: + if logger!=None: + print("CPU found: ",read[:fParms.seed]) + logger.rMatch("mstart") + match = 1 + termini_coverage[0][matchPplus_start]+=1 + position_end = matchPplus_end+corlen + + # whole coverage + for i in range(matchPplus_start, min(gen_len,position_end)): + whole_coverage[0][i]+=1 + + # Paired-read + if inRawDArgs.paired != "": + matchPplus_start_paired, matchPplus_end_paired = applyCoverage(reverseComplement(read_paired)[-fParms.seed:], refseq) + insert_length = matchPplus_end_paired - matchPplus_start + if insert_length > 0 and insert_length < fParms.insert_max: + position_end = matchPplus_end_paired + insert.append(insert_length) + else: + paired_missmatch += 1 + # Paired hybrid + if inDArgs.hostseq != "" and matchPplus_start_paired == -1: + matchHplus_start, matchHplus_end = applyCoverage(read_paired[:fParms.seed], inDArgs.hostseq) + if matchHplus_start != -1: + list_hybrid[0] += 1 + phage_hybrid_coverage[0] = hybridCoverage(read, refseq, phage_hybrid_coverage[0], matchPplus_start, min(gen_len,matchPplus_end+corlen) ) + host_hybrid_coverage[0] = hybridCoverage(read_paired, inDArgs.hostseq, host_hybrid_coverage[0], matchHplus_start, min(host_len,matchHplus_end+corlen) ) + else: + matchHminus_start, matchHminus_end = applyCoverage(reverseComplement(read_paired)[:fParms.seed], inDArgs.hostseq) + if matchHminus_start != -1: + list_hybrid[0] += 1 + phage_hybrid_coverage[0] = hybridCoverage(read, refseq, phage_hybrid_coverage[0], matchPplus_start, min(gen_len,matchPplus_end+corlen) ) + host_hybrid_coverage[1] = hybridCoverage(reverseComplement(read_paired), inDArgs.hostseq, host_hybrid_coverage[1], matchHminus_start, min(host_len,matchHminus_end+corlen) ) + + # Single hybrid + elif inDArgs.hostseq != "": + matchPFplus_start, matchPFplus_end = applyCoverage(read[-fParms.seed:], refseq) + if matchPFplus_start == -1: + matchHplus_start, matchHplus_end = applyCoverage(read[-fParms.seed:], inDArgs.hostseq) + if matchHplus_start != -1: + list_hybrid[0] += 1 + phage_hybrid_coverage[0] = hybridCoverage(read, refseq, phage_hybrid_coverage[0], matchPplus_start, min(gen_len,matchPplus_end+corlen) ) + host_hybrid_coverage[0] = hybridCoverage(read, inDArgs.hostseq, host_hybrid_coverage[0], matchHplus_start, min(host_len,matchHplus_end+corlen) ) + else: + matchHminus_start, matchHminus_end = applyCoverage(reverseComplement(read)[-fParms.seed:], inDArgs.hostseq) + if matchHminus_start != -1: + list_hybrid[0] += 1 + phage_hybrid_coverage[0] = hybridCoverage(read, refseq, phage_hybrid_coverage[0], matchPplus_start, min(gen_len,matchPplus_end+corlen) ) + host_hybrid_coverage[1] = hybridCoverage(reverseComplement(read), inDArgs.hostseq, host_hybrid_coverage[1], matchHminus_start, min(host_len,matchHminus_end+corlen) ) + + # whole coverage + for i in range(matchPplus_start, min(gen_len,position_end)): + paired_whole_coverage[0][i]+=1 + + ### if no match sense +, then test sense - + if not match: + matchPminus_start, matchPminus_end = applyCoverage(reverseComplement(read)[-fParms.seed:], refseq) + + ## Phage + if matchPminus_end != -1: + if logger != None: + print("CPU found: ",reverseComplement(read)[-fParms.seed:]," from ",reverseComplement(read)) + logger.rMatch("mrcplstart") + match = 1 + termini_coverage[1][matchPminus_end-1]+=1 + position_start = matchPminus_start-corlen + + # whole coverage + for i in range(max(0,position_start), matchPminus_end): + whole_coverage[1][i]+=1 + + # Paired-read + if inRawDArgs.paired != "": + matchPminus_start_paired, matchPminus_end_paired = applyCoverage(read_paired[:fParms.seed], refseq) + insert_length = matchPminus_end - matchPminus_start_paired + if insert_length > 0 and insert_length < fParms.insert_max: + position_start = matchPminus_start_paired + insert.append(insert_length) + else: + paired_missmatch += 1 + # Paired hybrid + if inDArgs.hostseq != "" and matchPminus_start_paired == -1: + matchHplus_start, matchHplus_end = applyCoverage(read_paired[:fParms.seed], inDArgs.hostseq) + if matchHplus_start != -1: + list_hybrid[1] += 1 + phage_hybrid_coverage[1] = hybridCoverage(reverseComplement(read), refseq, phage_hybrid_coverage[1], matchPminus_start, min(gen_len,matchPminus_end+corlen) ) + host_hybrid_coverage[0] = hybridCoverage(read_paired, inDArgs.hostseq, host_hybrid_coverage[0], matchHplus_start, min(host_len,matchHplus_end+corlen) ) + + else: + matchHminus_start, matchHminus_end = applyCoverage(reverseComplement(read_paired)[-fParms.seed:], inDArgs.hostseq) + if matchHminus_start != -1: + list_hybrid[1] += 1 + phage_hybrid_coverage[1] = hybridCoverage(reverseComplement(read), refseq, phage_hybrid_coverage[1], matchPminus_start, min(gen_len,matchPminus_end+corlen) ) + host_hybrid_coverage[1] = hybridCoverage(reverseComplement(read_paired), inDArgs.hostseq, host_hybrid_coverage[1], matchHminus_start, min(host_len,matchHminus_end+corlen) ) + + # Single hybrid + elif inDArgs.hostseq != "": + matchPRplus_start, matchPRplus_end = applyCoverage(reverseComplement(read)[:fParms.seed], refseq) + if matchPRplus_start == -1: + matchHplus_start, matchHplus_end = applyCoverage(read[:fParms.seed], inDArgs.hostseq) + if matchHplus_start != -1: + list_hybrid[1] += 1 + phage_hybrid_coverage[1] = hybridCoverage(reverseComplement(read), refseq, phage_hybrid_coverage[1], matchPminus_start, min(gen_len,matchPminus_end+corlen) ) + host_hybrid_coverage[0] = hybridCoverage(read, inDArgs.hostseq, host_hybrid_coverage[0], matchHplus_start, min(host_len,matchHplus_end+corlen) ) + else: + matchHminus_start, matchHminus_end = applyCoverage(reverseComplement(read)[:fParms.seed], inDArgs.hostseq) + if matchHminus_start != -1: + list_hybrid[1] += 1 + phage_hybrid_coverage[1] = hybridCoverage(reverseComplement(read), refseq, phage_hybrid_coverage[1], matchPminus_start, min(gen_len,matchPminus_end+corlen) ) + host_hybrid_coverage[1] = hybridCoverage(reverseComplement(read), inDArgs.hostseq, host_hybrid_coverage[1], matchHminus_start, min(host_len,matchHminus_end+corlen) ) + + # whole coverage + for i in range(max(0,position_start), matchPminus_end): + paired_whole_coverage[1][i]+=1 + + ### if no match on Phage, test Host + if not match: + matchHplus_start, matchHplus_end = applyCoverage(read[:fParms.seed], inDArgs.hostseq) + if matchHplus_start != -1: + for i in range(matchHplus_start, min(host_len,matchHplus_end+corlen)): + host_whole_coverage[0][i]+=1 + else: + matchHminus_start, matchHminus_end = applyCoverage(reverseComplement(read)[-fParms.seed:], inDArgs.hostseq) + if matchHminus_end != -1: + for i in range(max(0,matchHminus_start-corlen), matchHminus_end): + host_whole_coverage[1][i]+=1 + + # TEST limit_coverage + read_match += match*readlen + + match = test_read_seq = 0 + # Timer + if core_id == (tParms.core-1): + if k%1000 == 0: + sys.stdout.write("\b\b\b\b\b\b\b\b\b%3.1f %%" %( float(read_match/gen_len) / tParms.limit_coverage * 100 )) + sys.stdout.flush() + + chk_handler.check(count_line,core_id,idx_refseq,termini_coverage,whole_coverage,paired_whole_coverage,\ + phage_hybrid_coverage, host_hybrid_coverage, \ + host_whole_coverage,list_hybrid,insert,paired_missmatch,k,read_match) # maybe time to create checkpoint + + else: + if line[0] == "@": + test_read_seq = 1 + + line = filin.readline() + if inRawDArgs.paired != "": + line_paired = filin_paired.readline() + + # TEST limit_coverage + if (read_match/gen_len) > tParms.limit_coverage: + line = 0 + + + if core_id == (tParms.core-1): + sys.stdout.write("\b\b\b\b\b\b\b\b\b%3.1f %%" %( 100 )) + sys.stdout.flush() + + # Close file + filin.close() + if inRawDArgs.paired != "": + filin_paired.close() + + + # Correct EDGE coverage + if sum(termini_coverage[0]) + sum(termini_coverage[1]) == 0 and not fParms.virome: + print("WARNING: No read Match, please check your fastq file") + + termini_coverage = correctEdge(termini_coverage, fParms.edge) + #paired_whole_coverage = correctEdge(whole_coverage, fParms.edge) #TODO: discuss with Julian and Max about the PE issue that Max reported. + whole_coverage = correctEdge(whole_coverage, fParms.edge) + phage_hybrid_coverage = correctEdge(phage_hybrid_coverage, fParms.edge) + if inDArgs.hostseq != "": + host_whole_coverage = correctEdge(host_whole_coverage, fParms.edge) + host_hybrid_coverage = correctEdge(host_hybrid_coverage, fParms.edge) + + if return_dict!=None and tParms.dir_cov_mm==None: + return_dict[core_id] = [termini_coverage, whole_coverage, paired_whole_coverage, phage_hybrid_coverage, host_hybrid_coverage, host_whole_coverage, list_hybrid, np.array(insert), paired_missmatch, k] + elif return_dict==None and tParms.dir_cov_mm!=None: + insert = np.array(insert) + fic_name = os.path.join(tParms.dir_cov_mm, "coverage" + str(idx_refseq) + "_" + str(core_id)) + res=RCRes(termini_coverage,whole_coverage,paired_whole_coverage,\ + phage_hybrid_coverage, host_hybrid_coverage, \ + host_whole_coverage,list_hybrid,insert,paired_missmatch,k) + res.save(fic_name) + else: + print("Error: readsCoverage must be used either with directory name or return_dict") + chk_handler.end(core_id) + + return + + + +### IMAGE Functions +def GraphCov(termini_coverage, picMaxPlus, picMaxMinus, phagename, norm, draw, hybrid = 0): + """Produces a plot with termini coverage values.""" + # Remove old plot + plt.clf() + plt.cla() + plt.close() + # Create figure + plt.figure(1) + term_len = len(termini_coverage[0]) + term_range = list(range(term_len)) + # MOP: not totally sure what's going on here with the plot formatting + # but I refactored this out as it was getting complicated. + # Someone who understands the code better might put in more informative var names. + zipper = list(zip(*picMaxPlus)) + max_first_zipper = max(np.array(zipper[0])) + if norm == 1: + ylim = 1.2 + elif hybrid == 1: + offset = 0.2*(max_first_zipper) + 1 + ylim = max_first_zipper + offset + else: + offset = 0.2*(max(max(np.array(list(zip(*picMaxPlus))[0])), max(np.array(list(zip(*picMaxMinus))[0])))) + ylim = max(max(np.array(list(zip(*picMaxPlus))[0])), max(np.array(list(zip(*picMaxMinus))[0]))) + offset + # Strand (+) + plt.subplot(211) + if norm == 1: + plt.scatter(term_range,termini_coverage[0]) + else: + plt.plot(termini_coverage[0],linewidth=2) + plt.title('strand (+)') + plt.ylabel('') + # Axes + axes = plt.gca() + axes.set_ylim([0,ylim]) + # Maximum + x_strandplus = np.array(list(zip(*picMaxPlus))[1]) + y_strandplus = np.array(list(zip(*picMaxPlus))[0]) + # Plot + plt.plot(x_strandplus, y_strandplus, 'ro') + if norm == 1: + axes.axhline(y=0.5, xmin=0, xmax=1, color='grey', linestyle='dashed', linewidth=1.5) + axes.axhline(y=1.0, xmin=0, xmax=1, color='grey', linestyle='dashed', linewidth=1.5) + # Annotation + for i,j in zip(x_strandplus,y_strandplus): + plt.text(i+(term_len*0.03), j, str(i+1), fontsize=15, bbox=dict(boxstyle='round', facecolor='white', alpha=1)) + # Plot Option + plt.margins(0.1) + plt.locator_params(axis = 'x', nbins = 10) + plt.locator_params(axis = 'y', nbins = 3) + plt.xticks(rotation=75) + # Strand (-) + plt.subplot(212) + if norm == 1: + plt.scatter(term_range,termini_coverage[1]) + else: + plt.plot(termini_coverage[1],linewidth=2) + plt.title('strand (-)') + plt.ylabel('') + # Axes + if hybrid == 1: + offset = 0.2*(max_first_zipper) + 1 + ylim = max_first_zipper + offset + axes = plt.gca() + axes.set_ylim([0,ylim]) + # Maximum + x_strandminus = np.array(list(zip(*picMaxMinus))[1]) + y_strandminus = np.array(list(zip(*picMaxMinus))[0]) + # Plot + plt.plot(x_strandminus, y_strandminus, 'ro') + if norm == 1: + axes.axhline(y=0.5, xmin=0, xmax=1, color='grey', linestyle='dashed', linewidth=1.5) + axes.axhline(y=1.0, xmin=0, xmax=1, color='grey', linestyle='dashed', linewidth=1.5) + # Annotation + for i,j in zip(x_strandminus,y_strandminus): + plt.text(i+(term_len*0.03), j, str(i+1), fontsize=15, bbox=dict(boxstyle='round', facecolor='white', alpha=1)) + # Plot Option + plt.margins(0.1) + plt.locator_params(axis = 'x', nbins = 10) + plt.locator_params(axis = 'y', nbins = 3) + plt.xticks(rotation=75) + # Plot Adjustments + plt.tight_layout() + # Draw graph + if draw: + plt.savefig("%s_TCov.png" % phagename, dpi=200) + fig = plt.figure(1) + return fig + +def GraphWholeCov(added_whole_coverage, phagename, draw, P_left = "", P_right = "", pos_left = 0, pos_right = 0, graphZoom = 0, title = "WHOLE COVERAGE"): + """Produces a plot with whole coverage values.""" + # Remove old plot + plt.clf() + plt.cla() + plt.close() + # Create figure + offset = 0.2*(max(added_whole_coverage)) + ylim = max(added_whole_coverage) + offset + # Cumulative both strands + plt.figure(figsize=(15,8)) + plt.plot(added_whole_coverage,linewidth=2) + plt.title(title) + # Axes + axes = plt.gca() + axes.set_ylim([0,ylim]) + # Plot Option + plt.margins(0.1) + plt.locator_params(axis = 'x', nbins = 10) + plt.xticks(rotation=75) + # Termini vertical dashed line display + if graphZoom and isinstance(P_left, np.integer): + plt.axvline(x=pos_left, ymin=0, ymax=ylim, color='red', zorder=2, linestyle='dashed', linewidth=1) + if graphZoom and isinstance(P_right, np.integer): + plt.axvline(x=pos_right, ymin=0, ymax=ylim, color='green', zorder=2, linestyle='dashed', linewidth=1) + # Draw graph + if draw: + plt.savefig("%s_plot_WCov.png" % phagename, dpi=200) + fig = plt.figure(1) + return fig + +def GraphLogo(P_class, P_left, P_right, draw): + """Produce logo.""" + # Remove old plot + plt.clf() + plt.cla() + plt.close() + # Create figure + plt.figure(figsize=(10,10)) + #axes = plt.add_subplot(111) + axes = plt.gca() + axes.set_frame_on(False) + axes.xaxis.set_visible(False) + axes.yaxis.set_visible(False) + # Cadre + axes.add_artist(patches.Rectangle((0.1, 0.1), 0.8, 0.8, edgecolor = 'black', fill = False, linewidth = 15)) + + if P_class == "Headful (pac)": + # Texte + axes.text(0.17, 0.7, r"Headful (pac)", fontsize=50, fontweight='bold') + # PAC (blue line) + axes.axhline(y=0.35, xmin=0.2, xmax=0.8, color='blue', linewidth=15) + # PAC (red line) + axes.axvline(x=0.19, ymin=0.30, ymax=0.40, color='red', linewidth=10) + axes.axvline(x=0.42, ymin=0.30, ymax=0.40, color='red', linewidth=10) + axes.axvline(x=0.65, ymin=0.30, ymax=0.40, color='red', linewidth=10) + # PAC (Arrow) + axes.axvline(x=0.19, ymin=0.45, ymax=0.55, color='red', linewidth=15) + axes.arrow(0.19, 0.55, 0.07, 0, color='red', linewidth=15, head_width=0.07, head_length=0.1) + + elif P_class == "COS (5')": + # Texte + axes.text(0.3, 0.7, r"COS (5')", fontsize=50, fontweight='bold') + axes.add_artist(patches.Ellipse(xy=(0.5,0.4), width=0.5, height=0.35 , edgecolor = 'blue', fill=False, lw=15)) + axes.add_artist(patches.Ellipse(xy=(0.5,0.4), width=0.58, height=0.43 , edgecolor = 'blue', fill=False, lw=15)) + axes.add_artist(patches.Rectangle((0.4, 0.5), 0.20, 0.20, edgecolor = 'white', facecolor = 'white', fill = True)) + axes.axhline(y=0.56, xmin=0.415, xmax=0.48, color='red', linewidth=16) + axes.axhline(y=0.601, xmin=0.52, xmax=0.585, color='red', linewidth=16) + + elif P_class == "COS (3')": + # Texte + axes.text(0.3, 0.7, r"COS (3')", fontsize=50, fontweight='bold') + axes.add_artist(patches.Ellipse(xy=(0.5,0.4), width=0.5, height=0.35 , edgecolor = 'blue', fill=False, lw=15)) + axes.add_artist(patches.Ellipse(xy=(0.5,0.4), width=0.58, height=0.43 , edgecolor = 'blue', fill=False, lw=15)) + axes.add_artist(patches.Rectangle((0.4, 0.5), 0.20, 0.20, edgecolor = 'white', facecolor = 'white', fill = True)) + axes.axhline(y=0.601, xmin=0.415, xmax=0.48, color='red', linewidth=16) + axes.axhline(y=0.56, xmin=0.52, xmax=0.585, color='red', linewidth=16) + + elif P_class == "COS": + # Texte + axes.text(0.4, 0.7, r"COS", fontsize=50, fontweight='bold') + axes.add_artist(patches.Ellipse(xy=(0.5,0.4), width=0.5, height=0.35 , edgecolor = 'blue', fill=False, lw=15)) + axes.add_artist(patches.Ellipse(xy=(0.5,0.4), width=0.58, height=0.43 , edgecolor = 'blue', fill=False, lw=15)) + axes.add_artist(patches.Rectangle((0.4, 0.5), 0.20, 0.20, edgecolor = 'white', facecolor = 'white', fill = True)) + + elif P_class == "DTR (short)": + # Texte + axes.text(0.22, 0.7, r"DTR (short)", fontsize=50, fontweight='bold') + + verts = [(0.5, 0.5), (0.9, 0.4), (0.9, 0.3), (0.5,0.2)] + codes = [Path.MOVETO, Path.CURVE4, Path.CURVE4, Path.CURVE4] + path = Path(verts, codes) + patch = patches.PathPatch(path, facecolor='none', edgecolor = 'blue', lw=15) + axes.add_patch(patch) + + verts = [(0.5, 0.2), (0.1, 0.30), (0.1, 0.45), (0.5,0.55)] + codes = [Path.MOVETO, Path.CURVE4, Path.CURVE4, Path.CURVE4] + path = Path(verts, codes) + patch = patches.PathPatch(path, facecolor='none', edgecolor = 'blue', lw=15) + axes.add_patch(patch) + + verts = [(0.5, 0.55), (0.52, 0.545), (0, 0)] + codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY] + path = Path(verts, codes) + patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15) + axes.add_patch(patch) + + verts = [(0.56, 0.536), (0.58, 0.530), (0, 0)] + codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY] + path = Path(verts, codes) + patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15) + axes.add_patch(patch) + + verts = [(0.5, 0.50), (0.56, 0.480), (0, 0)] + codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY] + path = Path(verts, codes) + patch = patches.PathPatch(path, facecolor='none', edgecolor = 'white', lw=20) + axes.add_patch(patch) + + verts = [(0.5, 0.50), (0.52, 0.495), (0, 0)] + codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY] + path = Path(verts, codes) + patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15) + axes.add_patch(patch) + + verts = [(0.56, 0.486), (0.58, 0.480), (0, 0)] + codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY] + path = Path(verts, codes) + patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15) + axes.add_patch(patch) + + elif P_class == "DTR (long)": + # Texte + axes.text(0.25, 0.7, r"DTR (long)", fontsize=50, fontweight='bold') + verts = [(0.5, 0.5), (0.9, 0.4), (0.9, 0.3), (0.5,0.2)] + codes = [Path.MOVETO, Path.CURVE4, Path.CURVE4, Path.CURVE4] + path = Path(verts, codes) + patch = patches.PathPatch(path, facecolor='none', edgecolor = 'blue', lw=15) + axes.add_patch(patch) + + verts = [(0.5, 0.2), (0.1, 0.30), (0.1, 0.45), (0.5,0.55)] + codes = [Path.MOVETO, Path.CURVE4, Path.CURVE4, Path.CURVE4] + path = Path(verts, codes) + patch = patches.PathPatch(path, facecolor='none', edgecolor = 'blue', lw=15) + axes.add_patch(patch) + + verts = [(0.5, 0.55), (0.52, 0.545), (0, 0)] + codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY] + path = Path(verts, codes) + patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15) + axes.add_patch(patch) + + verts = [(0.56, 0.536), (0.58, 0.530), (0, 0)] + codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY] + path = Path(verts, codes) + patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15) + axes.add_patch(patch) + + verts = [(0.62, 0.521), (0.64, 0.516), (0, 0)] + codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY] + path = Path(verts, codes) + patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15) + axes.add_patch(patch) + + verts = [(0.68, 0.507), (0.70, 0.501), (0, 0)] + codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY] + path = Path(verts, codes) + patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15) + axes.add_patch(patch) + + verts = [(0.5, 0.50), (0.65, 0.460), (0, 0)] + codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY] + path = Path(verts, codes) + patch = patches.PathPatch(path, facecolor='none', edgecolor = 'white', lw=25) + axes.add_patch(patch) + + verts = [(0.5, 0.50), (0.52, 0.495), (0, 0)] + codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY] + path = Path(verts, codes) + patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15) + axes.add_patch(patch) + + verts = [(0.56, 0.486), (0.58, 0.480), (0, 0)] + codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY] + path = Path(verts, codes) + patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15) + axes.add_patch(patch) + + verts = [(0.62, 0.471), (0.64, 0.465), (0, 0)] + codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY] + path = Path(verts, codes) + patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15) + axes.add_patch(patch) + + verts = [(0.68, 0.456), (0.70, 0.450), (0, 0)] + codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY] + path = Path(verts, codes) + patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15) + axes.add_patch(patch) + + elif P_class == "Mu-like": + # Texte + axes.text(0.33, 0.7, r"Mu-like", fontsize=50, fontweight='bold') + axes.axhline(y=0.43, xmin=0.3, xmax=0.7, color='blue', linewidth=15) + axes.axhline(y=0.47, xmin=0.3, xmax=0.7, color='blue', linewidth=15) + axes.axhline(y=0.43, xmin=0.7, xmax=0.8, color='green', linewidth=15) + axes.axhline(y=0.47, xmin=0.7, xmax=0.8, color='green', linewidth=15) + axes.axhline(y=0.43, xmin=0.2, xmax=0.3, color='green', linewidth=15) + axes.axhline(y=0.47, xmin=0.2, xmax=0.3, color='green', linewidth=15) + + elif P_left == "Random" and P_right == "Random": + # Texte + axes.text(0.25, 0.7, r"UNKNOWN", fontsize=50, fontweight='bold') + axes.text(0.44, 0.3, r"?", fontsize=200, fontweight='bold') + else: + # Texte + axes.text(0.4, 0.7, r"NEW", fontsize=50, fontweight='bold') + axes.text(0.44, 0.3, r"!", fontsize=200, fontweight='bold') + + # Draw graph + if draw: + plt.savefig("%s_logo.png" % phagename, dpi=200) + fig = plt.figure(1) + return fig + + +### OUTPUT Result files +def exportDataSplit(sequence, split): + """Export sequence with split line length.""" + seq = "" + for i in range((len(sequence)//split)+1): + seq += "".join(map(str,sequence[i*split:(i+1)*split])) + '\n' + return seq + +def ExportStatistics(phagename, whole_coverage, paired_whole_coverage, termini_coverage, phage_plus_norm, phage_minus_norm, paired, test_run): + """Export peaks statistics.""" + if test_run: + return + export = pd.DataFrame() + # ORGANIZE Column + export["Position"] = list(phage_plus_norm.sort_values("Position")["Position"]) + if paired != "": + export["Coverage +"] = paired_whole_coverage[0] + else: + export["Coverage +"] = whole_coverage[0] + export["SPC +"] = termini_coverage[0] + export["T +"] = [format(x/100.0,'0.2') for x in list(phage_plus_norm.sort_values("Position")["SPC_std"])] + export["T + (close)"] = [format(x/100.0,'0.2') for x in list(phage_plus_norm.sort_values("Position")["SPC"])] + export["pvalue +"] = [format(x,'0.2e') for x in list(phage_plus_norm.sort_values("Position")["pval_gamma"])] + export["padj +"] = [format(x,'0.2e') for x in list(phage_plus_norm.sort_values("Position")["pval_gamma_adj"])] + if paired != "": + export["Coverage -"] = whole_coverage[1] + else: + export["Coverage -"] = paired_whole_coverage[1] + export["SPC -"] = termini_coverage[1] + export["T -"] = [format(x/100.0,'0.2') for x in list(phage_minus_norm.sort_values("Position")["SPC_std"])] + export["T - (close)"] = [format(x/100.0,'0.2') for x in list(phage_minus_norm.sort_values("Position")["SPC"])] + export["pvalue -"] = [format(x,'0.2e') for x in list(phage_minus_norm.sort_values("Position")["pval_gamma"])] + export["padj -"] = [format(x,'0.2e') for x in list(phage_minus_norm.sort_values("Position")["pval_gamma_adj"])] + filout = open(phagename + "_statistics.csv", "w") + filout.write(export.to_csv(index=False)) + filout.close() + return + +def ExportCohesiveSeq(phagename, ArtcohesiveSeq, P_seqcoh, test_run, multi = 0): + """Export cohesive sequence of COS phages.""" + if test_run: + return "" + if len(ArtcohesiveSeq) < 3 and len(P_seqcoh) < 3: + return "" + if len(ArtcohesiveSeq) < 20 and len(P_seqcoh) < 20: + export_text = "cohesive sequence" + if not multi: + filout = open(phagename + "_cohesive-sequence.fasta", "w") + else: + export_text = "direct terminal repeats sequence" + if not multi: + filout = open(phagename + "_direct-term-repeats.fasta", "w") + if P_seqcoh != '': + if not multi: + filout.write(">" + phagename + " " + export_text + " (Analysis: Statistics)\n" + exportDataSplit(P_seqcoh, 60)) + else: + return ">" + phagename + " " + export_text + " (Analysis: Statistics)\n" + exportDataSplit(P_seqcoh, 60) + if ArtcohesiveSeq != '': + if not multi: + filout.write(">" + phagename + " " + export_text + " (Analysis: Li)\n" + exportDataSplit(ArtcohesiveSeq, 60)) + filout.close() + return "" + +def ExportPhageSequence(phagename, P_left, P_right, refseq, P_orient, Redundant, Mu_like, P_class, P_seqcoh, test_run, multi = 0): + """Export the phage sequence reorganized and completed if needed.""" + if test_run: + return "" + seq_out = "" + # Mu-like + if Mu_like: + if P_orient == "Forward": + if P_right != "Random": + if P_left > P_right: + seq_out = refseq[P_right-1:P_left-1] + else: + seq_out = refseq[P_right-1:] + refseq[:P_left-1] + else: + seq_out = refseq[P_left-1:] + refseq[:P_left-1] + elif P_orient == "Reverse": + if P_left != "Random": + if P_left > P_right: + seq_out = reverseComplement(refseq[P_right-1:P_left-1]) + else: + seq_out = reverseComplement(refseq[P_right-1:] + reverseComplement(refseq[:P_left-1])) + else: + seq_out = reverseComplement(refseq[P_right-1:] + reverseComplement(refseq[:P_right-1]) ) + # COS + elif isinstance(P_left, np.integer) and isinstance(P_right, np.integer): + # Cos or DTR + if P_class == "COS (3')": + if abs(P_left-P_right) > len(refseq)/2: + seq_out = refseq[min(P_left,P_right)-1:max(P_left,P_right)] + else: + seq_out = refseq[max(P_left,P_right)-1:] + refseq[:min(P_left,P_right)] + seq_out = seq_out + P_seqcoh + else: + # Genome + if abs(P_left-P_right) > len(refseq)/2: + seq_out = refseq[min(P_left,P_right)-1:max(P_left,P_right)] + else: + seq_out = refseq[max(P_left,P_right):] + refseq[:min(P_left,P_right)-1] + # COS 5' + if P_class == "COS (5')": + seq_out = P_seqcoh + seq_out + # DTR + else: + seq_out = P_seqcoh + seq_out + P_seqcoh + # PAC + elif isinstance(P_left, np.integer) or isinstance(P_right, np.integer): + if P_orient == "Reverse": + seq_out = reverseComplement(refseq[:P_right]) + reverseComplement(refseq[P_right:]) + else: + seq_out = refseq[P_left-1:] + refseq[:P_left-1] + # Write Sequence + if multi: + return ">" + phagename + " sequence re-organized\n" + exportDataSplit(seq_out, 60) + else: + filout = open(phagename + "_sequence.fasta", "w") + filout.write(">" + phagename + " sequence re-organized\n" + exportDataSplit(seq_out, 60)) + filout.close() + return "" + +def CreateReport(phagename, seed, added_whole_coverage, draw, Redundant, P_left, P_right, Permuted, P_orient, termini_coverage_norm_close, picMaxPlus_norm_close, picMaxMinus_norm_close, gen_len, tot_reads, P_seqcoh, phage_plus_norm, phage_minus_norm, ArtPackmode, termini, forward, reverse, ArtOrient, ArtcohesiveSeq, termini_coverage_close, picMaxPlus_close, picMaxMinus_close, picOUT_norm_forw, picOUT_norm_rev, picOUT_forw, picOUT_rev, lost_perc, ave_whole_cov, R1, R2, R3, host, host_len, host_whole_coverage, picMaxPlus_host, picMaxMinus_host, surrounding, drop_cov, paired, insert, phage_hybrid_coverage, host_hybrid_coverage, added_paired_whole_coverage, Mu_like, test_run, P_class, P_type, P_concat, multi = 0, multiReport = 0, *args, **kwargs): + """Produce a PDF report.""" + if not multi: + doc = SimpleDocTemplate("%s_PhageTerm_report.pdf" % phagename, pagesize=letter, rightMargin=10,leftMargin=10, topMargin=5, bottomMargin=10) + report=[] + else: + report = multiReport + + styles=getSampleStyleSheet() + styles.add(ParagraphStyle(name='Justify', alignment=TA_JUSTIFY)) + styles.add(ParagraphStyle(name='Center', alignment=TA_CENTER)) + styles.add(ParagraphStyle(name='Right', alignment=TA_RIGHT)) + styles.add(ParagraphStyle(name='Left', alignment=TA_LEFT)) + + ### GENERAL INFORMATION + + # TITLE + ptext = '<b><font size=16>' + phagename + ' PhageTerm Analysis</font></b>' + report.append(Paragraph(ptext, styles["Center"])) + report.append(Spacer(1, 15)) + + ## ZOOMED TERMINI GRAPH AND LOGO RESULT + + # LOGO SLECTION + + imgdata = io.BytesIO() + fig_logo = GraphLogo(P_class, P_left, P_right, draw) + fig_logo.savefig(imgdata, format='png') + imgdata.seek(0) + IMG = ImageReader(imgdata) + IMAGE_2 = Image(IMG.fileName, width=150, height=150, kind='proportional') + IMAGE_2.hAlign = 'CENTER' + + # Zoom on inter-termini seq + if isinstance(P_left, np.integer) and isinstance(P_right, np.integer) and not Mu_like: + Zoom_left = min(P_left-1000, P_right-1000) + Zoom_right = max(P_left+1000, P_right+1000) + imgdata = io.BytesIO() + if P_orient == "Reverse": + zoom_pos_left = P_right-max(0,Zoom_left) + zoom_pos_right = P_left-max(0,Zoom_left) + else: + zoom_pos_left = P_left-max(0,Zoom_left) + zoom_pos_right = P_right-max(0,Zoom_left) + + figZ_whole = GraphWholeCov(added_whole_coverage[max(0,Zoom_left):min(gen_len,Zoom_right)], phagename + "-zoom", draw, P_left, P_right, zoom_pos_left, zoom_pos_right, 1, "Zoom Termini") + figZ_whole.savefig(imgdata, format='png') + imgdata.seek(0) + IMG = ImageReader(imgdata) + IMAGE = Image(IMG.fileName, width=275, height=340, kind='proportional') + IMAGE.hAlign = 'CENTER' + + data = [[IMAGE, IMAGE_2]] + t=Table(data, 1*[4*inch]+1*[3*inch], 1*[2*inch], hAlign='CENTER', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + report.append(t) + report.append(Spacer(1, 5)) + + elif isinstance(P_left, np.integer) and P_orient == "Forward": + imgdata = io.BytesIO() + + if Mu_like: + figZL_whole = GraphWholeCov(phage_hybrid_coverage[0][max(0,P_left-1000):min(gen_len,P_left+1000)], phagename + "-zoom-left", draw, P_left, "", P_left-max(0,P_left-1000), 0, 1, "Zoom Termini") + else: + figZL_whole = GraphWholeCov(added_whole_coverage[max(0,P_left-1000):min(gen_len,P_left+1000)], phagename + "-zoom-left", draw, P_left, P_right, P_left-max(0,P_left-1000), 0, 1, "Zoom Termini") + figZL_whole.savefig(imgdata, format='png') + imgdata.seek(0) + IMG = ImageReader(imgdata) + IMAGE = Image(IMG.fileName, width=275, height=340, kind='proportional') + IMAGE.hAlign = 'CENTER' + + data = [[IMAGE, IMAGE_2]] + t=Table(data, 1*[5*inch]+1*[3*inch], 1*[2*inch], hAlign='CENTER', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + report.append(t) + + elif isinstance(P_right, np.integer) and P_orient == "Reverse": + imgdata = io.BytesIO() + + if Mu_like: + figZR_whole = GraphWholeCov(phage_hybrid_coverage[1][max(0,P_right-1000):min(gen_len,P_right+1000)], phagename + "-zoom-right", draw, "", P_right, 0, P_right-max(0,P_right-1000), 1, "Zoom Termini") + else: + figZR_whole = GraphWholeCov(added_whole_coverage[max(0,P_right-1000):min(gen_len,P_right+1000)], phagename + "-zoom-right", draw, P_left, P_right, 0, P_right-max(0,P_right-1000), 1, "Zoom Termini") + figZR_whole.savefig(imgdata, format='png') + imgdata.seek(0) + IMG = ImageReader(imgdata) + IMAGE = Image(IMG.fileName, width=275, height=340, kind='proportional') + IMAGE.hAlign = 'CENTER' + + data = [[IMAGE, IMAGE_2]] + t=Table(data, 1*[5*inch]+1*[3*inch], 1*[2*inch], hAlign='CENTER', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + report.append(t) + report.append(Spacer(1, 5)) + else: + data = [[IMAGE_2]] + t=Table(data, 1*[1.5*inch], 1*[2*inch], hAlign='CENTER', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + report.append(t) + + # Warning coverage message + if ave_whole_cov < 50 and test_run == 0: + ptextawc = "- - - - - - - - - WARNING: Coverage (" + str(int(ave_whole_cov)) + ") is under the limit of the software, Please consider results carrefuly. - - - - - - - - -" + data = [[ptextawc]] + t=Table(data, 1*[5*inch], hAlign='LEFT', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('TEXTCOLOR',(0,0),(-1,-1),'RED'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + report.append(t) + + ## Statistics + ptext = '<u><font size=14>PhageTerm Method</font></u>' + report.append(Paragraph(ptext, styles["Left"])) + report.append(Spacer(1, 10)) + + if Redundant: + Ends = "Redundant" + else: + Ends = "Non Red." + + data = [["Ends", "Left (red)", "Right (green)", "Permuted", "Orientation", "Class", "Type"], [Ends, P_left, P_right, Permuted, P_orient, P_class, P_type]] + t=Table(data, 7*[1.10*inch], 2*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + report.append(t) + report.append(Spacer(1, 5)) + + # Seq cohesive or Direct terminal repeats + if P_seqcoh != "": + if len(P_seqcoh) < 20: + ptext = '<i><font size=12>*Sequence cohesive: ' + P_seqcoh + '</font></i>' + else: + ptext = '<i><font size=12>*Direct Terminal Repeats: ' + str(len(P_seqcoh)) + ' bp</font></i>' + report.append(Paragraph(ptext, styles["Left"])) + + # Multiple / Multiple (Nextera) + if P_left == "Multiple" and P_right == "Multiple": + ptext = '<i><font size=12>*This results could be due to a non-random fragmented sequence (e.g. Nextera)</font></i>' + report.append(Paragraph(ptext, styles["Left"])) + + + # Concatermer + elif P_class[:7] == "Headful" and paired != "": + ptext = '<i><font size=12>*concatemer estimation: ' + str(P_concat) + '</font></i>' + report.append(Paragraph(ptext, styles["Left"])) + + # Mu hybrid + elif Mu_like: + if P_orient == "Forward": + Mu_termini = P_left + else: + Mu_termini = P_right + ptext = '<i><font size=12>*Mu estimated termini position with hybrid fragments: ' + str(Mu_termini) + '</font></i>' + report.append(Paragraph(ptext, styles["Left"])) + + report.append(Spacer(1, 10)) + + # Results + imgdata = io.BytesIO() + figP_norm = GraphCov(termini_coverage_norm_close, picMaxPlus_norm_close[:1], picMaxMinus_norm_close[:1], phagename + "-norm", 1, draw) + figP_norm.savefig(imgdata, format='png') + imgdata.seek(0) + IMG = ImageReader(imgdata) + IMAGE = Image(IMG.fileName, width=240, height=340, kind='proportional') + IMAGE.hAlign = 'CENTER' + + data = [["Strand", "Location", "T", "pvalue", "T (Start. Pos. Cov. / Whole Cov.)"], ["+",phage_plus_norm["Position"][0],format(phage_plus_norm["SPC"][0]/100.0, '0.2f'),format(phage_plus_norm["pval_gamma_adj"][0], '0.2e'),IMAGE], ["",phage_plus_norm["Position"][1],format(phage_plus_norm["SPC"][1]/100.0, '0.2f'),format(phage_plus_norm["pval_gamma_adj"][1], '0.2e'),""], ["",phage_plus_norm["Position"][2],format(phage_plus_norm["SPC"][2]/100.0, '0.2f'),format(phage_plus_norm["pval_gamma_adj"][2], '0.2e'),""], ["",phage_plus_norm["Position"][3],format(phage_plus_norm["SPC"][3]/100.0, '0.2f'),format(phage_plus_norm["pval_gamma_adj"][3], '0.2e'),""], ["",phage_plus_norm["Position"][4],format(phage_plus_norm["SPC"][4]/100.0, '0.2f'),format(phage_plus_norm["pval_gamma_adj"][4], '0.2e'),""], ["-",phage_minus_norm["Position"][0],format(phage_minus_norm["SPC"][0]/100.0, '0.2f'),format(phage_minus_norm["pval_gamma_adj"][0], '0.2e'),""], ["",phage_minus_norm["Position"][1],format(phage_minus_norm["SPC"][1]/100.0, '0.2f'),format(phage_minus_norm["pval_gamma_adj"][1], '0.2e'),""], ["",phage_minus_norm["Position"][2],format(phage_minus_norm["SPC"][2]/100.0, '0.2f'),format(phage_minus_norm["pval_gamma_adj"][2], '0.2e'),""], ["",phage_minus_norm["Position"][3],format(phage_minus_norm["SPC"][3]/100.0, '0.2f'),format(phage_minus_norm["pval_gamma_adj"][3], '0.2e'),""], ["",phage_minus_norm["Position"][4],format(phage_minus_norm["SPC"][4]/100.0, '0.2f'),format(phage_minus_norm["pval_gamma_adj"][4], '0.2e'),""]] + t=Table(data, 4*[1*inch]+1*[4*inch], 11*[0.25*inch], hAlign='CENTER', style=[('SPAN',(0,1),(0,5)), ('SPAN',(0,6),(0,10)), ('SPAN',(4,1),(4,10)), ('LINEABOVE',(0,1),(4,1),1.5,colors.black), ('LINEABOVE',(0,6),(4,6),1.5,colors.grey), ('FONT',(0,0),(-1,0),'Helvetica-Bold'), ('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),12), ('FONTSIZE',(0,1),(0,-1),16), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + + report.append(t) + report.append(Spacer(1, 5)) + + ## Li's Analysis + ptext = '<u><font size=14>Li\'s Method</font></u>' + report.append(Paragraph(ptext, styles["Left"])) + report.append(Spacer(1, 10)) + + data = [["Packaging", "Termini", "Forward", "Reverse", "Orientation"], [ArtPackmode, termini, forward, reverse, ArtOrient]] + t=Table(data, 2*[1*inch] + 2*[2*inch] + 1*[1*inch], 2*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + + report.append(t) + report.append(Spacer(1, 5)) + + # Seq cohesive or Direct terminal repeats + if len(ArtcohesiveSeq) > 2: + if len(ArtcohesiveSeq) < 20: + ptext = '<i><font size=12>*Sequence cohesive: ' + ArtcohesiveSeq + '</font></i>' + else: + ptext = '<i><font size=12>*Direct Terminal Repeats: ' + str(len(ArtcohesiveSeq)) + ' bp</font></i>' + report.append(Paragraph(ptext, styles["Left"])) + report.append(Spacer(1, 10)) + + # Results + imgdata = io.BytesIO() + figP = GraphCov(termini_coverage_close, picMaxPlus_close[:1], picMaxMinus_close[:1], phagename, 0, draw) + figP.savefig(imgdata, format='png') + imgdata.seek(0) + IMG = ImageReader(imgdata) + IMAGE = Image(IMG.fileName, width=240, height=340, kind='proportional') + IMAGE.hAlign = 'CENTER' + + data = [["Strand", "Location", "SPC", "R", "SPC"],["+",picMaxPlus_close[0][1]+1,picMaxPlus_close[0][0],R2,IMAGE],["",picMaxPlus_close[1][1]+1,picMaxPlus_close[1][0],"-",""],["",picMaxPlus_close[2][1]+1,picMaxPlus_close[2][0],"-",""],["",picMaxPlus_close[3][1]+1,picMaxPlus_close[3][0],"-",""],["",picMaxPlus_close[4][1]+1,picMaxPlus_close[4][0],"-",""],["-",picMaxMinus_close[0][1]+1,picMaxMinus_close[0][0],R3,""], ["",picMaxMinus_close[1][1]+1,picMaxMinus_close[1][0],"-",""], ["",picMaxMinus_close[2][1]+1,picMaxMinus_close[2][0],"-",""], ["",picMaxMinus_close[3][1]+1,picMaxMinus_close[3][0],"-",""], ["",picMaxMinus_close[4][1]+1,picMaxMinus_close[4][0],"-",""]] + t=Table(data, 4*[1*inch]+1*[4*inch], 11*[0.25*inch], hAlign='CENTER', style=[('SPAN',(0,1),(0,5)), ('SPAN',(0,6),(0,10)), ('SPAN',(4,1),(4,10)), ('LINEABOVE',(0,1),(4,1),1.5,colors.black), ('LINEABOVE',(0,6),(4,6),1.5,colors.grey), ('FONT',(0,0),(-1,0),'Helvetica-Bold'), ('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),12), ('FONTSIZE',(0,1),(0,-1),16), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + + report.append(t) + + + # NEW PAGE + report.append(PageBreak()) + + # HOST RESULTS + if host != "": + # Host coverage + ptext = '<u><font size=14>Host Analysis</font></u>' + report.append(Paragraph(ptext, styles["Left"])) + report.append(Spacer(1, 10)) + + ptext = '<i><font size=10></font>Reads that does not match on the phage genome are tested on the host genome. These reads could come from Phage transduction but also Host DNA contamination.</i>' + report.append(Paragraph(ptext, styles["Justify"])) + report.append(Spacer(1, 5)) + + data = [["Host Genome", str(host_len) + " bp"]] + t=Table(data, 2*[2.25*inch], hAlign='LEFT', style=[('FONT',(0,0),(0,0),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'LEFT') ,('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + + report.append(t) + report.append(Spacer(1, 5)) + + imgdata = io.BytesIO() + + figH = GraphCov(host_whole_coverage, picMaxPlus_host[:1], picMaxMinus_host[:1], "", 0, draw) + figH.savefig(imgdata, format='png') + imgdata.seek(0) + IMG = ImageReader(imgdata) + IMAGE = Image(IMG.fileName, width=240, height=340, kind='proportional') + IMAGE.hAlign = 'CENTER' + + data = [["Strand", "Location", "Coverage", "-", "Whole Coverage"],["+",picMaxPlus_host[0][1]+1,picMaxPlus_host[0][0],"-",IMAGE],["",picMaxPlus_host[1][1]+1,picMaxPlus_host[1][0],"-",""],["",picMaxPlus_host[2][1]+1,picMaxPlus_host[2][0],"-",""],["",picMaxPlus_host[3][1]+1,picMaxPlus_host[3][0],"-",""],["",picMaxPlus_host[4][1]+1,picMaxPlus_host[4][0],"-",""],["-",picMaxMinus_host[0][1]+1,picMaxMinus_host[0][0],"-",""], ["",picMaxMinus_host[1][1]+1,picMaxMinus_host[1][0],"-",""], ["",picMaxMinus_host[2][1]+1,picMaxMinus_host[2][0],"-",""], ["",picMaxMinus_host[3][1]+1,picMaxMinus_host[3][0],"-",""], ["",picMaxMinus_host[4][1]+1,picMaxMinus_host[4][0],"-",""]] + t=Table(data, 4*[1*inch]+1*[4*inch], 11*[0.25*inch], hAlign='CENTER', style=[('SPAN',(0,1),(0,5)), ('SPAN',(0,6),(0,10)), ('SPAN',(4,1),(4,10)), ('LINEABOVE',(0,1),(4,1),1.5,colors.black), ('LINEABOVE',(0,6),(4,6),1.5,colors.grey), ('FONT',(0,0),(-1,0),'Helvetica-Bold'), ('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),12), ('FONTSIZE',(0,1),(0,-1),16), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + + report.append(t) + report.append(Spacer(1, 10)) + + # Hybrid coverage + ptext = '<u><font size=14>Hybrid Analysis</font></u>' + report.append(Paragraph(ptext, styles["Left"])) + report.append(Spacer(1, 10)) + + ptext = '<i><font size=10></font>Hybrid reads with one edge on the phage genome and the other edge on the host genome are detected. Phage Hybrid Coverages are used to detect Mu-like packaging mode. Host Hybrid Coverages could be used to detect Phage Transduction but also genome integration location of prophages.</i>' + report.append(Paragraph(ptext, styles["Justify"])) + report.append(Spacer(1, 5)) + + picMaxPlus_phage_hybrid, picMaxMinus_phage_hybrid, TopFreqH_phage_hybrid = picMax(phage_hybrid_coverage, 5) + picMaxPlus_host_hybrid, picMaxMinus_host_hybrid, TopFreqH_host_hybrid = picMax(host_hybrid_coverage, 5) + + imgdataPH = io.BytesIO() + figPH = GraphCov(phage_hybrid_coverage, picMaxPlus_phage_hybrid[:1], picMaxMinus_phage_hybrid[:1], "", 0, draw, 1) + figPH.savefig(imgdataPH, format='png') + imgdataPH.seek(0) + IMGPH = ImageReader(imgdataPH) + IMAGEPH = Image(IMGPH.fileName, width=240, height=340, kind='proportional') + IMAGEPH.hAlign = 'CENTER' + + + imgdataHH = io.BytesIO() + figHH = GraphCov(host_hybrid_coverage, picMaxPlus_host_hybrid[:1], picMaxMinus_host_hybrid[:1], "", 0, draw, 1) + figHH.savefig(imgdataHH, format='png') + imgdataHH.seek(0) + IMGHH = ImageReader(imgdataHH) + IMAGEHH = Image(IMGHH.fileName, width=240, height=340, kind='proportional') + IMAGEHH.hAlign = 'CENTER' + + data = [["Phage Hybrid Coverage", "Host Hybrid Coverage"],[IMAGEPH,IMAGEHH]] + t=Table(data, 2*[4*inch], 1*[0.25*inch]+1*[2.5*inch], hAlign='CENTER', style=[('LINEABOVE',(0,1),(1,1),1.5,colors.black),('FONT',(0,0),(-1,-1),'Helvetica-Bold'),('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + + report.append(t) + report.append(Spacer(1, 10)) + + # NEW PAGE + report.append(PageBreak()) + + + # DETAILED RESULTS + ptext = '<u><font size=14>Analysis Methodology</font></u>' + report.append(Paragraph(ptext, styles["Left"])) + report.append(Spacer(1, 10)) + + ptext = '<i><font size=10>PhageTerm software uses raw reads of a phage sequenced with a sequencing technology using random fragmentation and its genomic reference sequence to determine the termini position. The process starts with the alignment of NGS reads to the phage genome in order to calculate the starting position coverage (SPC), where a hit is given only to the position of the first base in a successfully aligned read (the alignment algorithm uses the lenght of the seed (default: 20) for mapping and does not accept gap or missmatch to speed up the process). Then the program apply 2 distinct scoring methods: i) a statistical approach based on the Gamma law; and ii) a method derived from LI and al. 2014 paper.</font></i>' + report.append(Paragraph(ptext, styles["Justify"])) + report.append(Spacer(1, 5)) + + + # INFORMATION + ptext = '<u><font size=12>General set-up and mapping informations</font></u>' + report.append(Paragraph(ptext, styles["Justify"])) + report.append(Spacer(1, 5)) + + + imgdata = io.BytesIO() + + if paired != "": + figP_whole = GraphWholeCov(added_paired_whole_coverage, phagename, draw) + else: + figP_whole = GraphWholeCov(added_whole_coverage, phagename, draw) + figP_whole.savefig(imgdata, format='png') + imgdata.seek(0) + IMG = ImageReader(imgdata) + IMAGE = Image(IMG.fileName, width=275, height=340, kind='proportional') + IMAGE.hAlign = 'CENTER' + + if host == "": + host_analysis = "No" + else: + host_analysis = "Yes" + + if paired == "": + sequencing_reads = "Single-ends Reads" + else: + sequencing_reads = "Paired-ends Reads" + + data = [["Phage Genome ", str(gen_len) + " bp",IMAGE], ["Sequencing Reads", int(tot_reads),""], ["Mapping Reads", str(int(100 - lost_perc)) + " %",""], ["OPTIONS","",""], ["Mapping Seed",seed,""], ["Surrounding",surrounding,""], ["Host Analysis ", host_analysis,""], ["","",""]] + t=Table(data, 1*[2.25*inch]+1*[1.80*inch]+1*[4*inch], 8*[0.25*inch], hAlign='LEFT', style=[('SPAN',(2,0),(2,-1)), ('FONT',(0,0),(0,2),'Helvetica-Bold'), ('FONT',(0,3),(0,3),'Helvetica-Oblique'), ('FONT',(0,4),(1,-1),'Helvetica-Oblique'), ('FONT',(2,0),(2,0),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-2),'LEFT'), ('ALIGN',(2,0),(2,-1),'CENTER') ,('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + + report.append(t) + report.append(Spacer(1, 5)) + + + # Img highest peaks of each side even if no significative + ptext = '<u><font size=12>Highest peak of each side coverage graphics</font></u>' + report.append(Paragraph(ptext, styles["Justify"])) + report.append(Spacer(1, 5)) + + + imgdata = io.BytesIO() + + if Mu_like and isinstance(P_left, np.integer): + figHL_whole = GraphWholeCov(phage_hybrid_coverage[0][max(0,P_left-1000):min(gen_len,P_left+1000)], phagename + "-zoom-left", draw, P_left, "", P_left-max(0,P_left-1000), 0, 1, "Zoom Termini") + else: + P_left = phage_plus_norm["Position"][0] + figHL_whole = GraphWholeCov(added_whole_coverage[max(0,P_left-1000):min(gen_len,P_left+1000)], phagename + "-zoom-left", draw, P_left, "", P_left-max(0,P_left-1000), 0, 1, "Zoom Termini") + figHL_whole.savefig(imgdata, format='png') + imgdata.seek(0) + IMG = ImageReader(imgdata) + IMAGE = Image(IMG.fileName, width=275, height=340, kind='proportional') + IMAGE.hAlign = 'CENTER' + + imgdata2 = io.BytesIO() + + if Mu_like and isinstance(P_right, np.integer): + figHR_whole = GraphWholeCov(phage_hybrid_coverage[1][max(0,P_right-1000):min(gen_len,P_right+1000)], phagename + "-zoom-right", draw, "", P_right, 0, P_right-max(0,P_right-1000), 1, "Zoom Termini") + else: + P_right = phage_minus_norm["Position"][0] + figHR_whole = GraphWholeCov(added_whole_coverage[max(0,P_right-1000):min(gen_len,P_right+1000)], phagename + "-zoom-right", draw, "", P_right, 0, P_right-max(0,P_right-1000), 1, "Zoom Termini") + figHR_whole.savefig(imgdata2, format='png') + imgdata2.seek(0) + IMG2 = ImageReader(imgdata2) + IMAGE2 = Image(IMG2.fileName, width=275, height=340, kind='proportional') + IMAGE2.hAlign = 'CENTER' + + if Mu_like: + data = [["Hybrid Coverage Zoom (Left)", "Hybrid Coverage Zoom (Right)"],[IMAGE,IMAGE2]] + else: + data = [["Whole Coverage Zoom (Left)", "Whole Coverage Zoom (Right)"],[IMAGE,IMAGE2]] + t=Table(data, 2*[4*inch], 1*[0.25*inch]+1*[2*inch], hAlign='CENTER', style=[('LINEABOVE',(0,1),(1,1),1.5,colors.black),('FONT',(0,0),(-1,-1),'Helvetica-Bold'),('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + report.append(t) + + # Controls + ptext = '<u><font size=12>General controls information</font></u>' + report.append(Paragraph(ptext, styles["Justify"])) + report.append(Spacer(1, 5)) + + if ave_whole_cov < 50: + ptextawc = "WARNING: Under the limit of the software (50)" + elif ave_whole_cov < 200: + ptextawc = "WARNING: Low (<200), Li's method could not be reliable" + else: + ptextawc = "OK" + + data = [["Whole genome coverage", int(ave_whole_cov), ptextawc]] + t=Table(data, 1*[3.5*inch]+1*[1*inch]+1*[3.5*inch], hAlign='LEFT', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + report.append(t) + + drop_perc = len([i for i in added_whole_coverage if i < (ave_whole_cov/2)]) / float(len(added_whole_coverage)) + if drop_perc < 1: + ptextdp = "OK" + else: + ptextdp = "Check your genome reference" + + data = [["Weak genome coverage", "%.1f %%" %drop_perc, ptextdp]] + t=Table(data, 1*[3.5*inch]+1*[1*inch]+1*[4*inch], hAlign='LEFT', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + report.append(t) + + if paired != "": + if len(insert) != 0: + insert_mean = sum(insert)/len(insert) + else: + insert_mean = "-" + data = [["Insert mean size", int(insert_mean), "Mean insert estimated from paired-end reads"]] + t=Table(data, 1*[3.5*inch]+1*[1*inch]+1*[4*inch], hAlign='LEFT', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + report.append(t) + + if lost_perc > 25: + ptextlp = "Warning: high percentage of reads lost" + else: + ptextlp = "OK" + + data = [["Reads lost during alignment", "%.1f %%" %lost_perc, ptextlp]] + t=Table(data, 1*[3.5*inch]+1*[1*inch]+1*[4*inch], hAlign='LEFT', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + report.append(t) + report.append(Spacer(1, 5)) + + # DETAILED SCORE + ptext = '<b><font size=14>i) PhageTerm method</font></b>' + report.append(Paragraph(ptext, styles["Left"])) + report.append(Spacer(1, 10)) + + ptext = '<i><font size=10>Reads are mapped on the reference to determine the starting position coverage (SPC) as well as the coverage (COV) in each orientation. These values are then used to compute the variable T = SPC/COV. The average value of T at positions along the genome that are not termini is expected to be 1/F, where F is the average fragment size. For the termini that depends of the packaging mode. Cos Phages: no reads should start before the terminus and therefore T=1. DTR phages: for N phages present in the sample, there should be N fragments that start at the terminus and N fragments that cover the edge of the repeat on the other side of the genome as a results T is expected to be 0.5. Pac phages: for N phages in the sample, there should be N/C fragments starting at the pac site, where C is the number of phage genome copies per concatemer. In the same sample N fragments should cover the pac site position, T is expected to be (N/C)/(N+N/C) = 1/(1+C). To assess whether the number of reads starting at a given position along the genome can be considered a significant outlier, PhageTerm first segments the genome according to coverage using a regression tree. A gamma distribution is fitted to SPC for each segment and an adjusted p-value is computed for each position. If several significant peaks are detected within a small sequence window (default: 20bp), their X values are merged.</font></i>' + report.append(Paragraph(ptext, styles["Justify"])) + report.append(Spacer(1, 5)) + + # surrounding + if surrounding > 0: + data = [["Nearby Termini (Forward / Reverse)", str(len(picOUT_norm_forw)-1) + " / " + str(len(picOUT_norm_rev)-1), "Peaks localized %s bases around the maximum" %surrounding]] + t=Table(data, 1*[3.5*inch]+1*[1*inch]+1*[4*inch], 1*[0.25*inch], hAlign='LEFT', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + report.append(t) + + report.append(Spacer(1, 10)) + + # Li's Method + if not multi: + ptext = '<b><font size=14>ii) Li\'s method</font></b>' + report.append(Paragraph(ptext, styles["Left"])) + report.append(Spacer(1, 10)) + + ptext = '<i><font size=10>The second approach is based on the calculation and interpretation of three specific ratios R1, R2 and R3 as suggested in a previous publication from Li et al. 2014. The first ratio, is calculated as follow: the highest starting frequency found on either the forward or reverse strands is divided by the average starting frequency, R1 = (highest frequency/average frequency). Li’s et al. have proposed three possible interpretation of the R1 ratio. First, if R1 < 30, the phage genome does not have any termini, and is either circular or completely permuted and terminally redundant. The second interpretation for R1 is when 30 ≤ R1 ≥ 100, suggesting the presence of preferred termini with terminal redundancy and apparition of partially circular permutations. At last if R1 > 100 that is an indication that at least one fixed termini is present with terminase recognizing a specific site. The two other ratios are R2 and R3 and the calculation is done in a similar manner. R2 is calculated using the highest two frequencies (T1-F and T2-F) found on the forward strand and R3 is calculated using the highest two frequencies (T1-R and T2-R) found on the reverse strand. To calculate these two ratios, we divide the highest frequency by the second highest frequency T2. So R2 = (T1-F / T2-F) and R3 = (T1-R / T2-R). These two ratios are used to analyze termini characteristics on each strand taken individually. Li et al. suggested two possible interpretations for R2 and R3 ratios combine to R1. When R1 < 30 and R2 < 3, we either have no obvious termini on the forward strand, or we have multiple preferred termini on the forward strand, if 30 ≤ R1 ≤ 100. If R2 > 3, it is suggested that there is an obvious unique termini on the forward strand. The same reasoning is applicable for the result of R3. Combining the results for ratios found with this approach, it is possible to make the first prediction for the viral packaging mode of the analyzed phage. A unique obvious termini present at both ends (both R2 and R3 > 3) reveals the presence of a COS mode of packaging. The headful mode of packaging PAC is concluded when we have a single obvious termini only on one strand.</font></i>' + report.append(Paragraph(ptext, styles["Justify"])) + report.append(Spacer(1, 5)) + + if surrounding > 0: + data = [["Nearby Termini (Forward / Reverse)", str(len(picOUT_forw)-1) + " / " + str(len(picOUT_rev)-1), "Peaks localized %s bases around the maximum" %surrounding]] + t=Table(data, 1*[3.5*inch]+1*[1*inch]+1*[3.5*inch], 1*[0.25*inch], hAlign='LEFT', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + report.append(t) + report.append(Spacer(1, 5)) + + if R1 > 100: + ptextR1 = "At least one fixed termini is present with terminase recognizing a specific site." + elif R1 > 30: + ptextR1 = "Presence of preferred termini with terminal redundancy and apparition of partially circular permutations." + else: + ptextR1 = "Phage genome does not have any termini, and is either circular or completely permuted and terminally redundant." + + data = [["R1 - highest freq./average freq.", int(R1), Paragraph(ptextR1, styles["Justify"])]] + t=Table(data, 1*[3.5*inch]+1*[1*inch]+1*[3.5*inch], 1*[0.25*inch], hAlign='LEFT', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + report.append(t) + report.append(Spacer(1, 5)) + + if R2 < 3 and R1 < 30: + ptextR2 = "No obvious termini on the forward strand." + elif R2 < 3 : + ptextR2 = "Multiple preferred termini on the forward strand." + elif R2 >= 3: + ptextR2 = "Unique termini on the forward strand." + + data = [["R2 Forw - highest freq./second freq.", int(R2), Paragraph(ptextR2, styles["Justify"])]] + t=Table(data, 1*[3.5*inch]+1*[1*inch]+1*[3.5*inch], 1*[0.25*inch], hAlign='LEFT', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + report.append(t) + report.append(Spacer(1, 5)) + + if R3 < 3 and R1 < 30: + ptextR3 = "No obvious termini on the reverse strand." + elif R3 < 3 : + ptextR3 = "Multiple preferred termini on the reverse strand." + elif R3 >= 3: + ptextR3 = "Unique termini on the reverse strand." + + data = [["R3 Rev - highest freq./second freq.", int(R3), Paragraph(ptextR3, styles["Justify"])]] + t=Table(data, 1*[3.5*inch]+1*[1*inch]+1*[3.5*inch], 1*[0.25*inch], hAlign='LEFT', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + report.append(t) + + # CREDITS and TIME + ptext = '<font size=8>%s</font>' % "Please cite: Sci. Rep. DOI 10.1038/s41598-017-07910-5" + report.append(Paragraph(ptext, styles["Center"])) + ptext = '<font size=8>%s</font>' % "Garneau, Depardieu, Fortier, Bikard and Monot. PhageTerm: Determining Bacteriophage Termini and Packaging using NGS data." + report.append(Paragraph(ptext, styles["Center"])) + ptext = '<font size=8>Report generated : %s</font>' % time.ctime() + report.append(Paragraph(ptext, styles["Center"])) + + # CREATE PDF + if not multi: + doc.build(report) + else: + report.append(PageBreak()) + return report + return + +def SummaryReport(phagename, DR, no_match): + """ Create first page of multi reports.""" + report=[] + styles=getSampleStyleSheet() + styles.add(ParagraphStyle(name='Justify', alignment=TA_JUSTIFY)) + styles.add(ParagraphStyle(name='Center', alignment=TA_CENTER)) + styles.add(ParagraphStyle(name='Right', alignment=TA_RIGHT)) + styles.add(ParagraphStyle(name='Left', alignment=TA_LEFT)) + + ### GENERAL INFORMATION + + # TITLE + ptext = '<b><font size=16>' + phagename + ' PhageTerm Analysis</font></b>' + report.append(Paragraph(ptext, styles["Center"])) + report.append(Spacer(1, 15)) + + # No Match + if len(no_match) > 0: + ptext = '<u><font size=14>No Match ('+ str(len(no_match)) +')</font></u>' + report.append(Paragraph(ptext, styles["Left"])) + report.append(Spacer(1, 10)) + + data = [["Name", "Class", "Left", "Right", "Type", "Orient", "Coverage", "Comments"]] + t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-1),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + report.append(t) + + for contig in no_match: + P_comments = "No read match" + + data = [[contig, "-", "-", "-", "-", "-", 0, P_comments]] + t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + report.append(t) + + # COS Phages + count_COS = len(DR["COS (3')"]) + len(DR["COS (5')"]) + len(DR["COS"]) + ptext = '<u><font size=14>COS Phages ('+ str(count_COS) +')</font></u>' + report.append(Paragraph(ptext, styles["Left"])) + report.append(Spacer(1, 10)) + + if count_COS != 0: + + data = [["Name", "Class", "Left", "Right", "Type", "Orient", "Coverage", "Comments"]] + t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-1),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + report.append(t) + + for DC in DR["COS (3')"]: + P_comments = "" + if int(DR["COS (3')"][DC]["ave_whole_cov"]) < 50: + P_comments = "Low coverage" + + data = [[DC, DR["COS (3')"][DC]["P_class"], DR["COS (3')"][DC]["P_left"], DR["COS (3')"][DC]["P_right"], DR["COS (3')"][DC]["P_type"], DR["COS (3')"][DC]["P_orient"], int(DR["COS (3')"][DC]["ave_whole_cov"]), P_comments]] + t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + report.append(t) + + for DC in DR["COS (5')"]: + P_comments = "" + if int(DR["COS (5')"][DC]["ave_whole_cov"]) < 50: + P_comments = "Low coverage" + + data = [[DC, DR["COS (5')"][DC]["P_class"], DR["COS (5')"][DC]["P_left"], DR["COS (5')"][DC]["P_right"], DR["COS (5')"][DC]["P_type"], DR["COS (5')"][DC]["P_orient"], int(DR["COS (5')"][DC]["ave_whole_cov"]), P_comments]] + t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + report.append(t) + + for DC in DR["COS"]: + P_comments = "" + if int(DR["COS"][DC]["ave_whole_cov"]) < 50: + P_comments = "Low coverage" + + data = [[DC, DR["COS"][DC]["P_class"], DR["COS"][DC]["P_left"], DR["COS"][DC]["P_right"], DR["COS"][DC]["P_type"], DR["COS"][DC]["P_orient"], int(DR["COS"][DC]["ave_whole_cov"]), P_comments]] + t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + report.append(t) + + report.append(Spacer(1, 5)) + + # DTR Phages + count_DTR = len(DR["DTR (short)"]) + len(DR["DTR (long)"]) + ptext = '<u><font size=14>DTR Phages ('+ str(count_DTR) +')</font></u>' + report.append(Paragraph(ptext, styles["Left"])) + report.append(Spacer(1, 10)) + + if count_DTR != 0: + + data = [["Name", "Class", "Left", "Right", "Type", "Orient", "Coverage", "Comments"]] + t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-1),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + report.append(t) + + for DC in DR["DTR (short)"]: + P_comments = "" + if int(DR["DTR (short)"][DC]["ave_whole_cov"]) < 50: + P_comments = "Low coverage" + + data = [[DC, DR["DTR (short)"][DC]["P_class"], DR["DTR (short)"][DC]["P_left"], DR["DTR (short)"][DC]["P_right"], DR["DTR (short)"][DC]["P_type"], DR["DTR (short)"][DC]["P_orient"], int(DR["DTR (short)"][DC]["ave_whole_cov"]), P_comments]] + t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + report.append(t) + + for DC in DR["DTR (long)"]: + P_comments = "" + if int(DR["DTR (long)"][DC]["ave_whole_cov"]) < 50: + P_comments = "Low coverage" + + data = [[DC, DR["DTR (long)"][DC]["P_class"], DR["DTR (long)"][DC]["P_left"], DR["DTR (long)"][DC]["P_right"], DR["DTR (long)"][DC]["P_type"], DR["DTR (long)"][DC]["P_orient"], int(DR["DTR (long)"][DC]["ave_whole_cov"]), P_comments]] + t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + report.append(t) + + report.append(Spacer(1, 5)) + + # Headful Phages + count_Headful = len(DR["Headful (pac)"]) + ptext = '<u><font size=14>Headful Phages ('+ str(count_Headful) +')</font></u>' + report.append(Paragraph(ptext, styles["Left"])) + report.append(Spacer(1, 10)) + + if count_Headful != 0: + + data = [["Name", "Class", "Left", "Right", "Type", "Orient", "Coverage", "Comments"]] + t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-1),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + report.append(t) + + for DC in DR["Headful (pac)"]: + P_comments = "" + if int(DR["Headful (pac)"][DC]["ave_whole_cov"]) < 50: + P_comments = "Low coverage" + + data = [[DC, DR["Headful (pac)"][DC]["P_class"], DR["Headful (pac)"][DC]["P_left"], DR["Headful (pac)"][DC]["P_right"], DR["Headful (pac)"][DC]["P_type"], DR["Headful (pac)"][DC]["P_orient"], int(DR["Headful (pac)"][DC]["ave_whole_cov"]), P_comments]] + t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + report.append(t) + + report.append(Spacer(1, 5)) + + # OTHERS Phages + count_Others = len(DR["Mu-like"]) + len(DR["UNKNOWN"]) + len(DR["NEW"]) + ptext = '<u><font size=14>Others Phages ('+ str(count_Others) +')</font></u>' + report.append(Paragraph(ptext, styles["Left"])) + report.append(Spacer(1, 10)) + + if count_Others != 0: + + data = [["Name", "Class", "Left", "Right", "Type", "Orient", "Coverage", "Comments"]] + t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-1),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + report.append(t) + + for DC in DR["Mu-like"]: + P_comments = "" + if int(DR["Mu-like"][DC]["ave_whole_cov"]) < 50: + P_comments = "Low coverage" + + data = [[DC, DR["Mu-like"][DC]["P_class"], DR["Mu-like"][DC]["P_left"], DR["Mu-like"][DC]["P_right"], DR["Mu-like"][DC]["P_type"], DR["Mu-like"][DC]["P_orient"], int(DR["Mu-like"][DC]["ave_whole_cov"]), P_comments]] + t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + report.append(t) + + for DC in DR["NEW"]: + P_comments = "" + if int(DR["NEW"][DC]["ave_whole_cov"]) < 50: + P_comments = "Low coverage" + + data = [[DC, DR["NEW"][DC]["P_class"], DR["NEW"][DC]["P_left"], DR["NEW"][DC]["P_right"], DR["NEW"][DC]["P_type"], DR["NEW"][DC]["P_orient"], int(DR["NEW"][DC]["ave_whole_cov"]), P_comments]] + t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + report.append(t) + + for DC in DR["UNKNOWN"]: + P_comments = "" + if int(DR["UNKNOWN"][DC]["ave_whole_cov"]) < 50: + P_comments = "Low coverage" + + data = [[DC, DR["UNKNOWN"][DC]["P_class"], DR["UNKNOWN"][DC]["P_left"], DR["UNKNOWN"][DC]["P_right"], DR["UNKNOWN"][DC]["P_type"], DR["UNKNOWN"][DC]["P_orient"], int(DR["UNKNOWN"][DC]["ave_whole_cov"]), P_comments]] + t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) + report.append(t) + + report.append(Spacer(1, 5)) + + report.append(PageBreak()) + + return report + +def WorkflowReport(phagename, P_class, P_left, P_right, P_type, P_orient, ave_whole_cov, multi = 0, phage_plus_norm=None, phage_minus_norm=None,*args, **kwargs): + """ Text report for each phage.""" + + P_comments = "" + if ave_whole_cov < 50: + P_comments = "WARNING: Low coverage" + + if ave_whole_cov == 0: + P_comments = "No read match" + + if not multi: + filoutWorkflow = open(phagename + "_workflow.txt", "w") + filoutWorkflow.write("#phagename\tClass\tLeft\tRight\tType\tOrient\tCoverage\tComments\n") + filoutWorkflow.write(phagename + "\t" + P_class + "\t" + str(P_left) + "\t" + str(P_right) + "\t" + P_type + "\t" + P_orient + "\t" + str(ave_whole_cov) + "\t" + P_comments + "\n") + filoutWorkflow.close() + else: + pval_left_peak="-" + pval_adj_left_peak="-" + pval_right_peak="-" + pval_adj_right_peak="-" + if isinstance(P_left,np.int64): + # get pvalue and adjusted pvalue for this + peak + left_peak_infos=phage_plus_norm.loc[phage_plus_norm['Position']==P_left] + pval_left_peak=left_peak_infos["pval_gamma"] + pval_left_peak=pval_left_peak.values[0] + pval_adj_left_peak=left_peak_infos["pval_gamma_adj"] + pval_adj_left_peak =pval_adj_left_peak.values[0] + if isinstance(P_right,np.int64): + # get pvalue and adjusted pvalue for this + peak + right_peak_infos=phage_minus_norm.loc[phage_minus_norm['Position']==P_right] + pval_right_peak=right_peak_infos["pval_gamma"] + pval_right_peak=pval_right_peak.values[0] + pval_adj_right_peak=right_peak_infos["pval_gamma_adj"] + pval_adj_right_peak=pval_adj_right_peak.values[0] + return phagename + "\t" + P_class + "\t" + str(P_left) + "\t" +str(pval_left_peak)+ "\t" +str(pval_adj_left_peak)+\ + "\t" + str(P_right) + "\t" + str(pval_right_peak) + "\t" + str(pval_adj_right_peak)+ "\t" + P_type +\ + "\t" + P_orient + "\t" + str(ave_whole_cov) + "\t" + P_comments + "\n" + return + +def EstimateTime(secondes): + """ Convert secondes into time.""" + conv = (86400,3600,60,1) + result = [0,0,0,0] + i=0 + while secondes>0: + result[i]= secondes/conv[i] + secondes=secondes-result[i]*conv[i] + i+=1 + return str(result[0]) + " Days " + str(result[1]) + " Hrs " + str(result[2]) + " Min " + str(result[3]) + " Sec" + + + + + + diff --git a/phageterm/generate_report.py b/phageterm/generate_report.py new file mode 100644 index 0000000000000000000000000000000000000000..abd73fa1f3ae631fc39b9d2b0803c562f1cc7a28 --- /dev/null +++ b/phageterm/generate_report.py @@ -0,0 +1,158 @@ +from __future__ import print_function +import os +import pickle +from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image, Table, TableStyle, PageBreak +from reportlab.lib.pagesizes import letter, landscape +from _modules.functions_PhageTerm import SummaryReport,WorkflowReport,ExportCohesiveSeq,ExportPhageSequence,CreateReport + + +def loadDR(DR_path,DR): + for d in os.listdir(DR_path): # iterate over P_class subdirectories. + if not os.path.isdir(os.path.join(DR_path,d)): + err_str=DR_path+" should contain only directories." + raise RuntimeError(err_str) + for fic_name in os.listdir(os.path.join(DR_path,d)): # iterate over all files for a given P_class + p=os.path.join(DR_path,d) + fname=os.path.join(p,fic_name) + with open(fname, 'rb') as f: + loaded_items=pickle.load(f) + # d is P_class name, fic_name is phagename. + dict_tmp=dict() + dict_tmp["phagename"]=loaded_items[0] + dict_tmp["seed"]=loaded_items[1] + dict_tmp["added_whole_coverage"]=loaded_items[2] + dict_tmp["Redundant"]=loaded_items[3] + dict_tmp["P_left"]=loaded_items[4] + print("P_left=",dict_tmp["P_left"],type(dict_tmp["P_left"])) + dict_tmp["P_right"] = loaded_items[5] + print("P_right=",dict_tmp["P_right"],type(dict_tmp["P_right"])) + dict_tmp["Permuted"]=loaded_items[6] + dict_tmp["P_orient"] =loaded_items[7] + dict_tmp["termini_coverage_norm_close"] =loaded_items[8] + dict_tmp["picMaxPlus_norm_close"] =loaded_items[9] + dict_tmp["picMaxMinus_norm_close"] =loaded_items[10] + dict_tmp["gen_len"] =loaded_items[11] + dict_tmp["tot_reads"] =loaded_items[12] + dict_tmp["P_seqcoh"] =loaded_items[13] + dict_tmp["phage_plus_norm"] =loaded_items[14] + dict_tmp["phage_minus_norm"] =loaded_items[15] + dict_tmp["ArtPackmode"] = loaded_items[16] + dict_tmp["termini"] = loaded_items[17] + dict_tmp["forward"] = loaded_items[18] + dict_tmp["reverse"] = loaded_items[19] + dict_tmp["ArtOrient"] = loaded_items[20] + dict_tmp["ArtcohesiveSeq"] = loaded_items[21] + dict_tmp["termini_coverage_close"] = loaded_items[22] + dict_tmp["picMaxPlus_close"] = loaded_items[23] + dict_tmp["picMaxMinus_close"] = loaded_items[24] + dict_tmp["picOUT_norm_forw"] = loaded_items[25] + dict_tmp["picOUT_norm_rev"] = loaded_items[26] + dict_tmp["picOUT_forw"] = loaded_items[27] + dict_tmp["picOUT_rev"] = loaded_items[28] + dict_tmp["lost_perc"] = loaded_items[29] + dict_tmp["ave_whole_cov"] = loaded_items[30] + dict_tmp["R1"] = loaded_items[31] + dict_tmp["R2"] = loaded_items[32] + dict_tmp["R3"] = loaded_items[33] + dict_tmp["host"] = loaded_items[34] + dict_tmp["host_len"] = loaded_items[35] + dict_tmp["host_whole_coverage"] = loaded_items[36] + dict_tmp["picMaxPlus_host"] = loaded_items[37] + dict_tmp["picMaxMinus_host"] = loaded_items[38] + dict_tmp["surrounding"] = loaded_items[39] + dict_tmp["drop_cov"] = loaded_items[40] + dict_tmp["paired"] = loaded_items[41] + dict_tmp["insert"] = loaded_items[42] + dict_tmp["phage_hybrid_coverage"] = loaded_items[43] + dict_tmp["host_hybrid_coverage"] = loaded_items[44] + dict_tmp["added_paired_whole_coverage"] = loaded_items[45] + dict_tmp["Mu_like"] = loaded_items[46] + dict_tmp["test_run"] = loaded_items[47] + dict_tmp["P_class"] = loaded_items[48] + dict_tmp["P_type"] = loaded_items[49] + dict_tmp["P_concat"] = loaded_items[50] + dict_tmp["idx_refseq_in_list"] = loaded_items[51] + DR [d][fic_name]=dict_tmp + f.close() + + + + +def genReport(fParms,inDArgs,inRawDArgs,no_match,DR): + # Test No Match + if len(no_match) == inDArgs.nbr_virome: + print("\n\nERROR: No reads match, please check your reference file.") + exit() + + # Report Resume + multiReport = SummaryReport(inRawDArgs.analysis_name, DR, no_match) + multiCohSeq = "" + multiPhageSeq = "" + multiWorkflow = "#phagename\tClass\tLeft\tRight\tType\tOrient\tCoverage\tComments\n" + + # No Match in workflow + if fParms.workflow: + for no_match_contig in no_match: + multiWorkflow += WorkflowReport(no_match_contig, "-", "-", "-", "-", "-", 0, 1) + + for DPC in DR: + for DC in DR[DPC]: + # Text report + if fParms.workflow: # phagename, P_class, P_left, P_right, P_type, P_orient, ave_whole_cov, multi = 0 + multiWorkflow += WorkflowReport(DC, DR[DPC][DC]["P_class"], DR[DPC][DC]["P_left"], + DR[DPC][DC]["P_right"], + DR[DPC][DC]["P_type"], DR[DPC][DC]["P_orient"], + DR[DPC][DC]["ave_whole_cov"], 1,DR[DPC][DC]["phage_plus_norm"], + DR[DPC][DC]["phage_minus_norm"]) + + # Sequence + idx_refseq = DR[DPC][DC]["idx_refseq_in_list"] + refseq = inDArgs.refseq_liste[idx_refseq] + multiCohSeq += ExportCohesiveSeq(DC, DR[DPC][DC]["ArtcohesiveSeq"], DR[DPC][DC]["P_seqcoh"], fParms.test_run, 1) + multiPhageSeq += ExportPhageSequence(DC, DR[DPC][DC]["P_left"], DR[DPC][DC]["P_right"], refseq, + DR[DPC][DC]["P_orient"], DR[DPC][DC]["Redundant"], DR[DPC][DC]["Mu_like"], + DR[DPC][DC]["P_class"], DR[DPC][DC]["P_seqcoh"], fParms.test_run, 1) + + # Report + draw=0 # TODO VL: ask what is the use of this parameter that is alwayes 0... + multiReport = CreateReport(DC, DR[DPC][DC]["seed"], DR[DPC][DC]["added_whole_coverage"], draw, + DR[DPC][DC]["Redundant"], DR[DPC][DC]["P_left"], DR[DPC][DC]["P_right"], + DR[DPC][DC]["Permuted"], DR[DPC][DC]["P_orient"], + DR[DPC][DC]["termini_coverage_norm_close"], DR[DPC][DC]["picMaxPlus_norm_close"], + DR[DPC][DC]["picMaxMinus_norm_close"], DR[DPC][DC]["gen_len"], + DR[DPC][DC]["tot_reads"], DR[DPC][DC]["P_seqcoh"], DR[DPC][DC]["phage_plus_norm"], + DR[DPC][DC]["phage_minus_norm"], DR[DPC][DC]["ArtPackmode"], DR[DPC][DC]["termini"], + DR[DPC][DC]["forward"], DR[DPC][DC]["reverse"], DR[DPC][DC]["ArtOrient"], + DR[DPC][DC]["ArtcohesiveSeq"], DR[DPC][DC]["termini_coverage_close"], + DR[DPC][DC]["picMaxPlus_close"], DR[DPC][DC]["picMaxMinus_close"], + DR[DPC][DC]["picOUT_norm_forw"], DR[DPC][DC]["picOUT_norm_rev"], + DR[DPC][DC]["picOUT_forw"], DR[DPC][DC]["picOUT_rev"], DR[DPC][DC]["lost_perc"], + DR[DPC][DC]["ave_whole_cov"], DR[DPC][DC]["R1"], DR[DPC][DC]["R2"], + DR[DPC][DC]["R3"], DR[DPC][DC]["host"], DR[DPC][DC]["host_len"], + DR[DPC][DC]["host_whole_coverage"], DR[DPC][DC]["picMaxPlus_host"], + DR[DPC][DC]["picMaxMinus_host"], DR[DPC][DC]["surrounding"], DR[DPC][DC]["drop_cov"], + DR[DPC][DC]["paired"], DR[DPC][DC]["insert"], DR[DPC][DC]["phage_hybrid_coverage"], + DR[DPC][DC]["host_hybrid_coverage"], DR[DPC][DC]["added_paired_whole_coverage"], + DR[DPC][DC]["Mu_like"], fParms.test_run, DR[DPC][DC]["P_class"], + DR[DPC][DC]["P_type"], DR[DPC][DC]["P_concat"], 1, multiReport) + + # Workflow + if not fParms.test: + if fParms.workflow: + filoutWorkflow = open(inRawDArgs.analysis_name + "_workflow.txt", "w") + filoutWorkflow.write(multiWorkflow) + filoutWorkflow.close() + + # Concatene Sequences + filoutCohSeq = open(inRawDArgs.analysis_name + "_cohesive-sequence.fasta", "w") + filoutCohSeq.write(multiCohSeq) + filoutCohSeq.close() + + filoutPhageSeq = open(inRawDArgs.analysis_name + "_sequence.fasta", "w") + filoutPhageSeq.write(multiPhageSeq) + filoutPhageSeq.close() + + # Concatene Report + doc = SimpleDocTemplate("%s_PhageTerm_report.pdf" % inRawDArgs.analysis_name, pagesize=letter, rightMargin=10, + leftMargin=10, topMargin=5, bottomMargin=10) + doc.build(multiReport) diff --git a/phageterm/main_utils.py b/phageterm/main_utils.py new file mode 100755 index 0000000000000000000000000000000000000000..a888ded9ea2e6e232e7771ae1c57c28626518d43 --- /dev/null +++ b/phageterm/main_utils.py @@ -0,0 +1,493 @@ +##@file main_utils.py +# +# Contains utility functions and classes for the main program. +# Aim is to make main simpler and smaller and thus improve testability (by allowing separate/independant testing of small program "subparts"). + +# Note about main program's options. This is to be discussed and subject to change. +# -g + --mapping_res_dir : Assume we are on a cluster. Perform mapping only and save results to files +# --mapping_res_dir+ --cov_res_dir : assume we are on a cluster. Process mapping results stored in files and puts the readsCoverage results in other files. +# Will use a job array in that case. each Phageterm will process 1 chunk for 1 sequence +from __future__ import print_function + +from time import gmtime, strftime +import sys +import gzip +from optparse import OptionParser, OptionGroup +from utilities import checkReportTitle,changeCase +from IData_handling import totReads,genomeFastaRecovery + +usage = """\n\nUsage: %prog -f reads.fastq -r phage_sequence.fasta [--report_title analysis_name -p reads_paired -s seed_lenght -d surrounding -t installation_test -c nbr_core -g host.fasta (warning increase process time) -l limit_multi-fasta -v virome_time] +[--mm --dir_cov_mm path_to_coverage_results -c nb_cores --core_id idx_core -p reads_paired -s seed_lenght -d surrounding -l limit_multi-fasta] +[--mm --dir_cov_mm path_to_coverage_results --dir_seq_mm path_to_sequence_results --DR_path path_to_results --seq_id index_of_sequence --nb_pieces nbr_of_read_chunks -p reads_paired -s seed_lenght -d surrounding -l limit_multi-fasta] +[--mm --DR_path path_to_results --dir_seq_mm path_to_sequence_results -p reads_paired -s seed_lenght -d surrounding -l limit_multi-fasta] + + Program: PhageTerm - Analyze phage termini and packaging mode using reads from high-throughput sequenced phage data + Version: 4.1 (also py3_release_1) + Contact: Julian Garneau <julian.garneau@usherbrooke.ca> + Contact: David Bikard <david.bikard@pasteur.fr> + Contact: Marc Monot <marc.monot@pasteur.fr> + Contact: Veronique Legrand <vlegrand@pasteur.fr> + + You can perform a program test run upon installation using the "-t " option. + Arguments for the -t option can be : C5, C3, DS, DL, M , H or V + + Example of test commands : + PhageTerm.py -t C5 -> Test run for a 5\' cohesive end (e.g. Lambda) + PhageTerm.py -t C3 -> Test run for a 3\' cohesive end (e.g. HK97) + PhageTerm.py -t DS -> Test run for a Direct Terminal Repeats end short (e.g. T7) + PhageTerm.py -t DL -> Test run for a Direct Terminal Repeats end long (e.g. T5) + PhageTerm.py -t H -> Test run for a Headful packaging (e.g. P1) + PhageTerm.py -t M -> Test run for a Mu-like packaging (e.g. Mu) + PhageTerm.py -t V -> Test run for a Virome data + """ + + +## checkFastaFile +# +# Checking input Fasta file (file existence and format). +def checkFastaFile(filin): + """Check sequence Fasta file given by user""" + first_line = 1 + infil = gzip.open(filin, "rt") if filin.endswith(".gz") else open(filin, 'r') + try: + for line in infil: + # Test '>' + if first_line : + if line[0] != '>': + return 1 + else: + first_line = 0 + continue + # Test 1st base per line : 'ATGCN>' + base = changeCase(line[0]) + if base != 'A' and base != 'T' and base != 'C' and base != 'G' and base != 'N' and base != '\n' and base != '\r' and base != '>': + infil.close() + return 1 + infil.close() + return 0 + except IOError: + sys.exit('ERROR: No such file %s' % filin) + +## setOptions +# +# Uses the OptionParser class. Defines all the options offered by phageterm and their default values if any. +# Also defines the usage message. +# Returns an optionParser object usable by the main program. +def setOptions(): + getopt = OptionParser(usage=usage) + + optreads = OptionGroup(getopt, 'Raw reads file in fastq format') + optreads.add_option('-f', '--fastq', dest='fastq', metavar='FILE', help='Fastq reads from Illumina TruSeq') + getopt.add_option_group(optreads) + + optref = OptionGroup(getopt, 'Phage genome in fasta format') + optref.add_option('-r', '--ref', dest='reference', metavar='FILE', + help='Reference phage genome as contigs in fasta format') + getopt.add_option_group(optref) + + optname = OptionGroup(getopt, 'Name of the phage being analyzed by the user') + optname.add_option('--report_title', dest='analysis_name', metavar='STRING', + help='Manually enter the name of the analysis. Used as prefix for output file names. Default value is \"analysis_date_HHMM.') + getopt.add_option_group(optname) + + optseed = OptionGroup(getopt, 'Lenght of the seed used for reads in the mapping process') + optseed.add_option('-s', '--seed', dest='seed', metavar='INT', type="int", + help='Manually enter the lenght of the seed used for reads in the mapping process.') + getopt.add_option_group(optseed) + + optsurround = OptionGroup(getopt, 'Lenght of the surrounding region considered for peak value cumulation') + optsurround.add_option('-d', '--surrounding', dest='surround', type="int", metavar='INT', + help='Manually enter the lenght of the surrounding used to merge very close peaks in the analysis process.') + getopt.add_option_group(optsurround) + + optcore = OptionGroup(getopt, + 'GPU and multicore options. Default is 1 core and no GPU.') + optcore.add_option('-c', '--core', dest='core', metavar='INT', type="int", + help='Manually enter the number of core you want to use.') + getopt.add_option_group(optcore) + #optcore.add_option('-u', '--gpu', dest='gpu', action="store_true", default=False, # VL: Keep that for later use maybe. + # help='use this flag if you want to use GPU for read mapping') + #optcore.add_option("--dir_mapping_res",dest='gpu_mapping_res_dir',metavar='STRING',default=None, help="directory where to put mapping results produced by GPU") + # optcore.add_option("--idx_chunk",dest='idx_chunk',metavar='INT',default=None,help="index of the chunk for which we want to compute coverage") + # optcore.add_option("--nb_chunks", dest='nb_chunks',metavar='INT', type="int",default=None,help="Indicate number of chunks wanted for GPU mapping. If None, phageTerm will automatically compute it") + optmm=OptionGroup(getopt,"options for multi machine (or cluster mode)") + optmm.add_option("--core_id",dest='core_id',metavar='INT',type="int",default=None,help="This option is used together with -c when running Pageterm on a cluster in parallel multimachine mode.") + optmm.add_option("--mm",dest='multi_machine_mode',action='store_true',default=False,help="use this option to indicate that you want to use the cluster (or multi machine) mode.") + optmm.add_option("--dir_cov_mm",dest='dir_cov_mm',metavar='STRING',default=None,help="directory where to put coverage results produced by Phageterm") + optmm.add_option("--dir_seq_mm", dest='dir_seq_mm', metavar='STRING', default=None, + help="directory where to put per sequence results produced by Phageterm") + optmm.add_option("--nb_pieces",dest='nb_pieces',metavar='INT',default=None,help="For per sequence processing after reads coverage has been done on the cluster") + optmm.add_option("--DR_path",dest='DR_path',metavar='STRING',default=None,help="Directory where to put content of DR dictionnary (per sequence processing results)") + optmm.add_option("--seq_id",dest='seq_id',metavar='INT',default=None,help="index of the sequence for which we want to compute coverage") + getopt.add_option_group(optmm) + + optchk=OptionGroup(getopt,"options related to checkpoints.") + optchk.add_option("--chk_freq",dest='chk_freq',metavar='INT',default=0,help="Frequency in minutes at which reads coverage (the longuest step in phageTerm) intermediate results must be saved ") + optchk.add_option("--dir_chk",dest='dir_chk',metavar='STRING',default="",help="Directory where to put checkpoint files") + getopt.add_option_group(optchk) + + opthost = OptionGroup(getopt, 'Host genome in fasta format') + opthost.add_option('-g', '--host', dest='host', metavar='FILE', + help='Reference host genome as unique contig in fasta format') + getopt.add_option_group(opthost) + + optpaired = OptionGroup(getopt, 'Use paired-end reads') + optpaired.add_option('-p', '--paired', dest='paired', metavar='FILE', + help='Use paired-end reads to calculate real insert coverage') + getopt.add_option_group(optpaired) + + optmean = OptionGroup(getopt, 'Defined phage mean coverage') + optmean.add_option('-m', '--mean', dest='mean', metavar='INT', type="int", help='Defined phage mean coverage') + getopt.add_option_group(optmean) + + optlimit = OptionGroup(getopt, 'Limit minimum fasta size (Default: 500)') + optlimit.add_option('-l', '--limit', dest='limit', metavar='INT', type="int", help='Limit minimum fasta length') + getopt.add_option_group(optlimit) + + optvirome = OptionGroup(getopt, 'Estimate execution time for a Virome') + optvirome.add_option('-v', '--virome', dest='virome', metavar='INT', type="int", + help='Estimate execution time for a Virome') + getopt.add_option_group(optvirome) + + opttest = OptionGroup(getopt, 'Perform a program test run upon installation') + opttest.add_option('-t', '--test', dest='test', metavar='STRING', + help='Perform a program test run upon installation. If you want to perform a test run, use the "-t " option. Arguments for the -t option can be : C5, C3, DS, DL, H or M. C5 -> Test run for a 5\' cohesive end (e.g. Lambda); C3 -> Test run for a 3\' cohesive end (e.g. HK97); DS -> Test run for a short Direct Terminal Repeats end (e.g. T7); DL -> Test run for a long Direct Terminal Repeats end (e.g. T5); H -> Test run for a Headful packaging (e.g. P1); M -> Test run for a Mu-like packaging (e.g. Mu)') + + opttest.add_option('--nrt',dest='nrt',action='store_true',default=False,help='dump phage Class name to special file for non regression testing') + getopt.add_option_group(opttest) + + return getopt + +## User Raw data handling. +# +# This class provides encapsulation for raw data provided by the user as arguments to phageterm (input file names, testing mode if so, analysis_name, host and paired). +# It also performs checkings on the input files and computes the number of reads. +class inputRawDataArgs: + def __init__(self,fastq,reference,host,analysis_name,paired,test,nrt): + if test == "C5": + print("\nPerforming a test run using test phage sequence with 5 prime cohesive overhang :") + print("\npython PhageTerm.py -f test-data/COS-5.fastq -r test-data/COS-5.fasta --report_title TEST_cohesive_5_prime") + fastq = "test-data/COS-5.fastq" + reference = "test-data/COS-5.fasta" + analysis_name = "Test-cohesive-5'" + elif test == "C3": + print("\nPerforming a test run using test phage sequence with 3 prime cohesive overhang:") + print("\npython PhageTerm.py -f test-data/COS-3.fastq -r test-data/COS-3.fasta --report_title TEST_cohesive_3_prime") + fastq = "test-data/COS-3.fastq" + reference = "test-data/COS-3.fasta" + analysis_name = "Test-cohesive-3'" + elif test == "DS": + print("\nPerforming a test run using test phage sequence with short direct terminal repeats (DTR-short) :") + print("\npython PhageTerm.py -f test-data/DTR-short.fastq -r test-data/DTR-short.fasta --report_title TEST_short_direct_terminal_repeats") + fastq = "test-data/DTR-short.fastq" + reference = "test-data/DTR-short.fasta" + analysis_name = "Test-short-direct-terminal-repeats" + elif test == "DL": + print("\nPerforming a test run using test phage sequence with long direct terminal repeats (DTR-long) :") + print("\npython PhageTerm.py -f test-data/DTR-long.fastq -r test-data/DTR-long.fasta --report_title TEST_long_direct_terminal_repeats") + fastq = "test-data/DTR-long.fastq" + reference = "test-data/DTR-long.fasta" + analysis_name = "Test-long-direct-terminal-repeats" + elif test == "H": + print("\nPerforming a test run using test phage sequence with headful packaging") + print("\npython PhageTerm.py -f test-data/Headful.fastq -r test-data/Headful.fasta --report_title TEST_headful") + fastq = "test-data/Headful.fastq" + reference = "test-data/Headful.fasta" + analysis_name = "Test-Headful" + elif test == "M": + print("\nPerforming a test run using test phage sequence with Mu-like packaging") + print("\npython PhageTerm.py -f test-data/Mu-like_R1.fastq -p test-data/Mu-like_R2.fastq -r test-data/Mu-like.fasta --report_title TEST_Mu-like -g test-data/Mu-like_host.fasta") + fastq = "test-data/Mu-like_R1.fastq" + paired = "test-data/Mu-like_R2.fastq" + reference = "test-data/Mu-like.fasta" + host = "test-data/Mu-like_host.fasta" + analysis_name = "Test-Mu-like" + elif test == "V": + print("\nPerforming a test run using virome data containing one example of each packaging mode") + print("\npython PhageTerm.py -f test-data/Virome.fastq -r test-data/Virome.fasta --report_title TEST_Virome") + fastq = "test-data/Virome.fastq" + reference = "test-data/Virome.fasta" + analysis_name = "Test-Virome" + elif test==None: + pass # Not a test, normal use. + else: + print("Unrecognized test run argument ('{}')!\nAllowed options are {}.".format(test, "C5, C3, DS, DL, H or M")) + + if host == None: + host = "" + if paired == None: + paired = "" + # CHECK inputs + if analysis_name!=None: + analysis_name = checkReportTitle(analysis_name) + self.analysis_name = analysis_name + else: + self.analysis_name="NA" + if checkFastaFile(reference): + exit("ERROR in reference file") + self.reference = reference + if host != "": + if checkFastaFile(host): + exit("ERROR in reference file") + self.host = host + self.fastq=fastq + self.paired=paired + self.host=host + self.nrt=nrt + if (self.nrt==True): + print("running nrt tests") + + # READS Number + self.tot_reads = totReads(fastq) + if paired != "": + self.tot_reads_paired = totReads(paired) + if (self.tot_reads != self.tot_reads_paired): + print("\nWARNING: Number of reads between the two reads files differ, using single reads only\n") + self.paired = "" + + +## User functional parameters handling +# +# Here gather user input parameters and global variable that define how the data will be processed from a functionnal point of view (ex: seed length...) +class functionalParms: + def __init__(self,seed,surround,mean,limit,virome,test): + if seed == None: + seed = 20 + if seed < 15: + seed = 15 + if surround == None: + surround = 20 + self.seed=seed + self.surrounding=surround + + if limit == None: + limit = 500 + self.limit_reference=limit + + if virome == None: + virome = 0 + if virome != 1: + virome = 0 + self.virome=virome + + if mean == None: + mean = 250 + self.mean=mean + if test == None: + self.test_run = 0 + else: + self.test_run = 1 + self.test=test + if test=="H" or test=="M" or test=="V": + self.surrounding = 0 + if test=="V": + self.workflow = 1 + # VARIABLE + self.edge = 500 + self.insert_max = 1000 + self.limit_fixed = 35 + self.limit_preferred = 11 + self.Mu_threshold = 0.5 + self.draw = 0 + self.workflow = 0 + +## Derive other parameter from functional and raw parameters. +# +# Here, gather data derived from the rawInputData and updated according to the functionnal parameters. +# functionnal parameter workflow can also be updated. +class InputDerivedDataArgs: + def __init__(self,inputRaw,fparms): + # REFERENCE sequence recovery and edge adds + self.refseq_liste, self.refseq_name, refseq_rejected = genomeFastaRecovery(inputRaw.reference, fparms.limit_reference, fparms.edge) + #print strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) + self.nbr_virome = len(self.refseq_liste) + if self.nbr_virome == 0: + print("\nERROR: All the reference(s) sequence(s) are under the length limitation : " + str( + fparms.limit_reference) + " (adapt your -l option)") + exit() + if self.nbr_virome > 1: + fparms.workflow = 1 + length_virome = len("".join(self.refseq_liste)) + self.mean_virome = length_virome // self.nbr_virome + if fparms.virome: + self.refseq_liste, self.refseq_name, refseq_rejected = ["N" * int(self.mean_virome)], ["Test_virome"], 0 + if len(self.refseq_liste) == 1 and inputRaw.host != "": + self.hostseq = genomeFastaRecovery(inputRaw.host, fparms.limit_reference, fparms.edge, 1) + if len(self.hostseq[0]) != 0 and len(self.hostseq[0]) > len(self.refseq_liste[0]): + print("\nHost length < Phage length : removing host sequence.") + self.hostseq = "" + else: + self.hostseq = "" + if len(self.refseq_liste) > 1: + print("\nWARNING: Host analysis impossible with multiple fasta input\n") + +## Handling of technical parameters given by the user +# +# Here gather user input parameters and former global variable that define how the data will be processed from a technical point of view (ex: multicore,gpu...) +# VL: here keep parameters related to gpu processing just in case GPU code would be needed one day for evolutions but they are not used. +class technicalParms: + def __init__(self, core, gpu, mean, gpu_mapping_res_dir, nb_chunks, dir_cov_mm, seq_id, idx_chunk, \ + core_id, dir_seq_mm, multi_machine_mode, DR_path, nb_pieces,chk_freq=0,dir_chk="",test_mode=False): + self.chk_freq=chk_freq + self.dir_chk=dir_chk + self.multi_machine=multi_machine_mode + self.core = core + self.wanted_chunks = nb_chunks + self.dir_cov_mm = dir_cov_mm + self.DR_path=DR_path + self.test_mode=test_mode # used for testing the checkpoint implementation. + if nb_pieces!=None: + self.nb_pieces=int(nb_pieces) + else: + self.nb_pieces =None + if idx_chunk!=None: + self.idx_chunk=int(idx_chunk) + else: + self.idx_chunk =None + if seq_id!=None: + self.seq_id=int(seq_id) + else: + self.seq_id=None + self.core_id=core_id + self.dir_seq_mm=dir_seq_mm + if core == None: + self.core = 1 + self.limit_coverage = max(50, mean * 2) / float(self.core) + if gpu ==True and self.core > 1: + print("Choose either multicore or gpu!") + exit(1) + self.gpu=gpu + if gpu == None: + self.gpu = False + self.gpu_mapping_res_dir=gpu_mapping_res_dir + if self.gpu==True and (self.dir_cov_mm != None or self.dir_seq_mm != None): + print("when -g is used it is either to perform mapping only or whole process, --dir-cov_res/--dir_seq_res and -g are thus mutually exclusive") + exit(1) + if (self.gpu==True and self.core_id!=None): + print("Inconsistency in options. -u/--gpu cannot be used with --core_id") + exit(1) + if self.chk_freq!=0 and self.dir_chk=="": + print("Inconsistency in options: if frequency for checkpoints is not NULL (you activated checkpoints), you must also indicate in which directory to put them.") + exit(1) + if self.chk_freq==0 and self.dir_chk!="": + print("Inconsistency in options: checkpoints are deactivated (frequency is 0) but you indicated directory for them!") + exit(1) + if self.multi_machine==True: + if (self.dir_cov_mm==None and self.dir_seq_mm==None and self.DR_path==None): + print("Please proivide path where to put results in multi machine mode") + exit(1) + elif self.dir_cov_mm!=None and self.dir_seq_mm==None: # step 1: mapping+readsCoverage. + self.checkOptConsistencyS1() + elif self.dir_cov_mm!=None and self.dir_seq_mm!=None: # step 2: per-sequence processing + self.checkOptConsistencyS2() + elif self.dir_cov_mm==None and self.dir_seq_mm!=None: # step 3: final report generation + self.checkOptConsistencyS3() + else: + print("inconsistencies in options; please read documentation") + print(usage) + exit(1) + else: + if (self.dir_cov_mm!=None or self.dir_seq_mm!=None or self.DR_path!=None): + print("Inconsistency in options: please use --mm if you intend to use multi machine mode") + exit(1) + if (self.chk_freq!=0 or self.dir_chk)!="": + print("checkpoints can only be used in multi-machine mode") + exit(0) + ## GPU stuff, in case we need it one day + # if (self.core>1 and self.core_id==None): + # if (self.gpu_mapping_res_dir!=None or self.dir_seq_res!=None or self.dir_cov_res!=None): + # print "Indicate core_id when processing mapping or coverage resuts on a cluster" + # exit(1) + # if (self.core>1 and self.core_id!=None): + # if not((self.gpu_mapping_res_dir!=None and self.dir_cov_res!=None) or (self.dir_cov_res!=None and self.dir_seq_res==None)): + # print " Indicate both directory where to find intermediate results to process and directory where to put the results of this processing" + # exit(1) + # if self.dir_cov_res!=None and (self.idx_seq!=None or self.idx_chunk!=None) and self.dir_seq_res==None and self.dir_mapping_res!=None: + # print "Please provide both index of sequence and chunk index. In case you have hostseq, it has index 0 by convention so --idx_seq must be >=1." + # exit(1) + # if self.core<=1 and self.dir_cov_res!=None: + # print "Putting coverage results in files is usually used with multi-machine (cluster) mode" + # exit(1) + + def checkOptConsistencyS1(self): + if self.core_id == None: + print("Please indicate core_id when running mapping/coverage in multi machine mode") + exit(1) + if (self.core_id >= self.core): + print("--core_id must be >=0 and <nb_cores") + exit(1) + if self.core == 1: + print("Warning : running on only 1 core!") + if self.DR_path != None: + print("--DR_path is used at step 2 and step 3. It is incompatible with --dir_cov_res (step 1)") + exit(1) + if self.seq_id != None: + print("--seq_id is only used at step 2. It is incompatible with --dir_cov_res (step 1)") + exit(1) + if self.nb_pieces != None: + print("--nb_pieces is only used at step 2. It is incompatible with --dir_cov_res (step 1)") + exit(1) + + def checkOptConsistencyS2(self): + if self.DR_path == None: + print("Please indicate DR_path when running per sequence processing in multi machine mode") + exit(1) + if self.seq_id == None: + print("Please indicate index of sequence to process in multi machine mode.") + exit(1) + if self.nb_pieces == None: + print(" Please indicate in how many number of packets the reads were mapped during step 1.") + exit(1) + if self.core_id != None: + print("There is no need to specify --core_id doing step 2 in multi machine mode (per sequence processing of the results of step 1)") + exit(1) + if self.core != 1: + print("There is no need to specify --core doing step 2 in multi machine mode (per sequence processing of the results of step 1)") + exit(1) + + def checkOptConsistencyS3(self): + if self.DR_path == None: + print("Please indicate DR_path for generating final report.") + exit(1) + if self.seq_id != None: + print("--seq_id is incompatible with step 3 (report generation)") + exit(1) + if self.nb_pieces != None: + print("--nb_pieces is incompatible with step 3 (report generation)") + exit(1) + if self.core_id != None: + print("--core_id is incompatible with step 3 (report generation)") + exit(1) + if self.core != 1: + print("--core_id is incompatible with step 3 (report generation)") + exit(1) + + +## Checks options and arguments consistency and instantiates data structure for main. +# +# Consistency checkings and instantiation of technicalParms, inputDerivedDataArgs, functionalParms, inputRawDataArgs objects that are directly usable inside main. +def checkOptArgsConsistency(getopt): + """ + + :rtype: + """ + options, arguments = getopt.parse_args() + if options.fastq == None and options.test == None: + getopt.error('\tNo reads file provided.\n\t\t\tUse -h or --help for more details\n') + + if options.reference == None and options.test == None: + getopt.error('\tNo fasta reference file provided.\n\t\t\tUse -h or --help for more details\n') + + if options.analysis_name == None and options.test == None: + analysis_name = "Analysis" + + inRawDArgs = inputRawDataArgs(options.fastq, options.reference, options.host, options.analysis_name, options.paired, + options.test,options.nrt) + fParms = functionalParms(options.seed, options.surround, options.mean, options.limit, options.virome, options.test) + tParms = technicalParms(options.core, None, fParms.mean, None, None, + options.dir_cov_mm, options.seq_id, None, options.core_id, + options.dir_seq_mm, options.multi_machine_mode, + options.DR_path,options.nb_pieces, + float(options.chk_freq), options.dir_chk, False) + inDArgs = InputDerivedDataArgs(inRawDArgs, fParms) + return inRawDArgs, fParms, tParms, inDArgs # TODO: make a version that returns only 1 structure gathering only the useful information. diff --git a/phageterm/readsCoverage_res.py b/phageterm/readsCoverage_res.py new file mode 100644 index 0000000000000000000000000000000000000000..dfea8d0a22e3b362f51d8529d915f82cc35f504a --- /dev/null +++ b/phageterm/readsCoverage_res.py @@ -0,0 +1,311 @@ +##@file readsCoverage_res.py +# Compact structure to store partial results of readsCoverage for later processing; used in multi machine mode and for checkpoints. +# +#@author vlegrand@pasteur.fr +import numpy as np +import os +import time + +base_chk_fname="chk_" +chk_fname_sep="_" + + +## Utility classes for testing the checkpoint implementation +# class checkpoint_visitor: +# def __str__(self): +# return self.__class__.__name__ +# +# class checkpoint_visitor_11150_Cos5(checkpoint_visitor): +# def visit(self,chk_res): +# if chk_res.host_len!=0 or chk_res.gen!=25 or chk_res.reads_tested!=2: +# return False +# return True +# +# class checkpoint_visitor_38_Cos5(checkpoint_visitor): +# def visit(self,chk_res): +# if chk_res.host_len!=0 or chk_res.gen!=25 or chk_res.reads_tested!=2: +# return False +# return True + + + + + + +def loadArr(arr_idx0,arr_val0,arr_idx1,arr_val1,arr2D): + for idx, val in zip(arr_idx0, arr_val0): + arr2D[0][idx] = val + + for idx, val in zip(arr_idx1, arr_val1): + arr2D[1][idx] = val + + +def loadRCRes(filename): + npzfile = np.load(filename) + gen_len=npzfile['gen_len'] + gen_len=int(gen_len) + host_len=npzfile['host_len'] + host_len=int(host_len) + termini_coverage_idx0 = npzfile['termini_coverage_idx0'] + termini_coverage_val0=npzfile['termini_coverage_val0'] + termini_coverage_idx1 = npzfile['termini_coverage_idx1'] + termini_coverage_val1 = npzfile['termini_coverage_val1'] + + whole_coverage_idx0=npzfile['whole_coverage_idx0'] + whole_coverage_val0 = npzfile['whole_coverage_val0'] + whole_coverage_idx1 = npzfile['whole_coverage_idx1'] + whole_coverage_val1 = npzfile['whole_coverage_val1'] + + paired_whole_coverage_idx0=npzfile['paired_whole_coverage_idx0'] + paired_whole_coverage_val0 = npzfile['paired_whole_coverage_val0'] + paired_whole_coverage_idx1 = npzfile['paired_whole_coverage_idx1'] + paired_whole_coverage_val1 = npzfile['paired_whole_coverage_val1'] + + phage_hybrid_coverage_idx0=npzfile['phage_hybrid_coverage_idx0'] + phage_hybrid_coverage_val0 = npzfile['phage_hybrid_coverage_val0'] + phage_hybrid_coverage_idx1 = npzfile['phage_hybrid_coverage_idx0'] + phage_hybrid_coverage_val1 = npzfile['phage_hybrid_coverage_idx1'] + + host_hybrid_coverage_idx0 = npzfile['host_hybrid_coverage_idx0'] + host_hybrid_coverage_val0 = npzfile['host_hybrid_coverage_val0'] + host_hybrid_coverage_idx1 = npzfile['host_hybrid_coverage_idx1'] + host_hybrid_coverage_val1 = npzfile['host_hybrid_coverage_val1'] + + host_whole_coverage_idx0 = npzfile['host_whole_coverage_idx0'] + host_whole_coverage_val0 = npzfile['host_whole_coverage_val0'] + host_whole_coverage_idx1 = npzfile['host_whole_coverage_idx1'] + host_whole_coverage_val1 = npzfile['host_whole_coverage_val1'] + + list_hybrid=npzfile['list_hybrid'] + insert=npzfile['insert'] + insert=list(insert) + paired_mismatch=npzfile['paired_mismatch'] + reads_tested=npzfile['reads_tested'] + + termini_coverage=np.array([gen_len*[0], gen_len*[0]]) + + whole_coverage = np.array([gen_len*[0], gen_len*[0]]) + paired_whole_coverage = np.array([gen_len*[0], gen_len*[0]]) + phage_hybrid_coverage = np.array([gen_len*[0], gen_len*[0]]) + host_hybrid_coverage = np.array([host_len*[0], host_len*[0]]) + host_whole_coverage = np.array([host_len*[0], host_len*[0]]) + loadArr(termini_coverage_idx0,termini_coverage_val0,termini_coverage_idx1,termini_coverage_val1,termini_coverage) + loadArr(whole_coverage_idx0,whole_coverage_val0,whole_coverage_idx1,whole_coverage_val1,whole_coverage) + loadArr(paired_whole_coverage_idx0,paired_whole_coverage_val0,paired_whole_coverage_idx1,paired_whole_coverage_val1,paired_whole_coverage) + loadArr(phage_hybrid_coverage_idx0,phage_hybrid_coverage_val0,phage_hybrid_coverage_idx1,phage_hybrid_coverage_val1,phage_hybrid_coverage) + loadArr(host_hybrid_coverage_idx0,host_hybrid_coverage_val0,host_hybrid_coverage_idx1,host_hybrid_coverage_val1,host_hybrid_coverage) + loadArr(host_whole_coverage_idx0,host_whole_coverage_val0,host_whole_coverage_idx1,host_whole_coverage_val1,host_whole_coverage) + + res=RCRes(termini_coverage,whole_coverage,paired_whole_coverage,\ + phage_hybrid_coverage, host_hybrid_coverage,\ + host_whole_coverage,list_hybrid,insert,paired_mismatch,reads_tested) + + return res + +## +# Working structure for readsCoverage (encapsulating temporary results) +class RCWorkingS: + def __init__(self,rc_res,cnt_line,read_match): + self.interm_res=rc_res + self.count_line=cnt_line + self.read_match=read_match + +class RCRes: + def __init__(self,termini_coverage,whole_coverage,paired_whole_coverage,\ + phage_hybrid_coverage, host_hybrid_coverage, \ + host_whole_coverage,list_hybrid,insert,paired_mismatch,reads_tested): + + self.termini_coverage=termini_coverage + self.whole_coverage=whole_coverage + self.paired_whole_coverage=paired_whole_coverage + self.phage_hybrid_coverage=phage_hybrid_coverage + self.host_hybrid_coverage=host_hybrid_coverage + self.host_whole_coverage=host_whole_coverage + + self.list_hybrid=list_hybrid + self.insert=insert + self.paired_mismatch=paired_mismatch + self.reads_tested=reads_tested + + self.gen_len = len(self.termini_coverage[0]) + self.host_len= len(self.host_hybrid_coverage[0]) + + # def accept(self,a_visitor): + # self.vis=a_visitor + # + # def make_visit(self): + # self.vis.visit() + + def save(self,filename): + termini_coverage_idx0 = np.flatnonzero(self.termini_coverage[0]) + termini_coverage_val0 = self.termini_coverage[0][termini_coverage_idx0] + termini_coverage_idx1 = np.flatnonzero(self.termini_coverage[1]) + termini_coverage_val1 = self.termini_coverage[1][termini_coverage_idx1] + + whole_coverage_idx0 = np.flatnonzero(self.whole_coverage[0]) + whole_coverage_val0 = self.whole_coverage[0][whole_coverage_idx0] + whole_coverage_idx1 = np.flatnonzero(self.whole_coverage[1]) + whole_coverage_val1 = self.whole_coverage[1][whole_coverage_idx1] + + paired_whole_coverage_idx0 = np.flatnonzero(self.paired_whole_coverage[0]) + paired_whole_coverage_val0 = self.paired_whole_coverage[0][paired_whole_coverage_idx0] + paired_whole_coverage_idx1 = np.flatnonzero(self.paired_whole_coverage[1]) + paired_whole_coverage_val1 = self.paired_whole_coverage[1][paired_whole_coverage_idx1] + + phage_hybrid_coverage_idx0 = np.flatnonzero(self.phage_hybrid_coverage[0]) + phage_hybrid_coverage_val0 = self.phage_hybrid_coverage[0][phage_hybrid_coverage_idx0] + phage_hybrid_coverage_idx1 = np.flatnonzero(self.phage_hybrid_coverage[1]) + phage_hybrid_coverage_val1 = self.phage_hybrid_coverage[1][phage_hybrid_coverage_idx1] + + host_hybrid_coverage_idx0 = np.flatnonzero(self.host_hybrid_coverage[0]) + host_hybrid_coverage_val0 = self.host_hybrid_coverage[0][host_hybrid_coverage_idx0] + host_hybrid_coverage_idx1 = np.flatnonzero(self.host_hybrid_coverage[1]) + host_hybrid_coverage_val1 = self.host_hybrid_coverage[1][host_hybrid_coverage_idx1] + + host_whole_coverage_idx0 = np.flatnonzero(self.host_whole_coverage[0]) + host_whole_coverage_val0 = self.host_whole_coverage[0][host_whole_coverage_idx0] + host_whole_coverage_idx1 = np.flatnonzero(self.host_whole_coverage[1]) + host_whole_coverage_val1 = self.host_whole_coverage[1][host_whole_coverage_idx1] + + np.savez(filename,gen_len=np.array(self.gen_len),host_len=np.array(self.host_len),\ + termini_coverage_idx0=termini_coverage_idx0, termini_coverage_val0=termini_coverage_val0,\ + termini_coverage_idx1=termini_coverage_idx1, termini_coverage_val1=termini_coverage_val1,\ + whole_coverage_idx0=whole_coverage_idx0,whole_coverage_val0=whole_coverage_val0,\ + whole_coverage_idx1=whole_coverage_idx1,whole_coverage_val1=whole_coverage_val1,\ + paired_whole_coverage_idx0=paired_whole_coverage_idx0,paired_whole_coverage_val0=paired_whole_coverage_val0,\ + paired_whole_coverage_idx1=paired_whole_coverage_idx1,paired_whole_coverage_val1=paired_whole_coverage_val1, \ + phage_hybrid_coverage_idx0=phage_hybrid_coverage_idx0,phage_hybrid_coverage_val0=phage_hybrid_coverage_val0, \ + phage_hybrid_coverage_idx1=phage_hybrid_coverage_idx1,phage_hybrid_coverage_val1=phage_hybrid_coverage_val1, \ + host_hybrid_coverage_idx0=host_hybrid_coverage_idx0,host_hybrid_coverage_val0=host_hybrid_coverage_val0, \ + host_hybrid_coverage_idx1=host_hybrid_coverage_idx1,host_hybrid_coverage_val1=host_hybrid_coverage_val1, \ + host_whole_coverage_idx0=host_whole_coverage_idx0,host_whole_coverage_val0=host_whole_coverage_val0, \ + host_whole_coverage_idx1=host_whole_coverage_idx1,host_whole_coverage_val1=host_whole_coverage_val1, \ + list_hybrid=self.list_hybrid,insert=self.insert,paired_mismatch=np.array(self.paired_mismatch),\ + reads_tested=self.reads_tested) + + +class RCCheckpoint: + def __init__(self,count_line,core_id,idx_seq,termini_coverage,whole_coverage,paired_whole_coverage,\ + phage_hybrid_coverage, host_hybrid_coverage, \ + host_whole_coverage,list_hybrid,insert,paired_mismatch,reads_tested,read_match): + self.count_line=count_line + self.core_id=core_id + self.idx_seq=idx_seq + self.read_match=read_match + self.res=RCRes(termini_coverage,whole_coverage,paired_whole_coverage,\ + phage_hybrid_coverage, host_hybrid_coverage, \ + host_whole_coverage,list_hybrid,insert,paired_mismatch,reads_tested) + + + def save(self,dir_chk,core_id,idx_refseq): + filename=base_chk_fname+str(self.core_id)+chk_fname_sep+str(self.idx_seq)+chk_fname_sep+\ + str(self.count_line)+chk_fname_sep+str(self.read_match) + full_fname = os.path.join(dir_chk, filename) + self.res.save(full_fname) + # remove old breakpoint file + list_f=os.listdir(dir_chk) + sub_s=base_chk_fname+ str(core_id) + chk_fname_sep + str(idx_refseq) + chk_fname_sep + for f in list_f: + if f!=filename+".npz" and sub_s in f: + os.remove(os.path.join(dir_chk,f)) + + +class RCCheckpoint_handler: + def __init__(self,chk_freq,dir_chk,test_mode=False): + self.chk_freq=chk_freq + self.test_mode = test_mode + self.start_t=0 + self.dir_chk = dir_chk + # if self.test_mode == True: + # self.v38_C5 = checkpoint_visitor_38_Cos5() + # self.v11150_C5 = checkpoint_visitor_11150_Cos5() + if self.test_mode==True: + self.start_t = time.perf_counter_ns() + if os.path.exists(dir_chk): + if not (os.path.isdir(dir_chk)): + raise RuntimeError("dir_chk must point to a directory") + else: + os.mkdir(dir_chk) + elif self.chk_freq!=0: + if os.path.exists(dir_chk): + if not (os.path.isdir(dir_chk)): + raise RuntimeError("dir_chk must point to a directory") + else: + raise RuntimeError("dir_chk must point to an existing directory") + + def getIdxSeq(self,core_id): + idx_seq=0 + if self.chk_freq!=0 or self.test_mode==True: + list_f = os.listdir(self.dir_chk) + subfname = base_chk_fname+ str(core_id) + chk_fname_sep + chk_f = "" + for fname in list_f: + if subfname in fname: + chk_f = fname + break + if chk_f != "": + l=chk_f.split(chk_fname_sep) + idx_seq=int(l[2]) + return idx_seq + + + def load(self,core_id,idx_refseq): + if self.chk_freq!=0 or self.test_mode==True: + list_f = os.listdir(self.dir_chk) + subfname = base_chk_fname+ str(core_id) + chk_fname_sep + str(idx_refseq) + chk_fname_sep + chk_f = "" + for fname in list_f: + if subfname in fname: + chk_f = fname + break + if chk_f != "": + interm_res=loadRCRes(os.path.join(self.dir_chk,chk_f)) + # if self.test_mode==True: + # interm_res.accept(self.v38_C5) + l=chk_f.split(chk_fname_sep) + cnt_line=int(l[-2]) + tmp=l[-1] # get rid of .npz extension + l2=tmp.split(".") + read_match=int(l2[0]) + partial_res=RCWorkingS(interm_res,cnt_line,read_match) + # if self.test_mode: + # partial_res.accept(self.v38_C5) + # partial_res.make_visit() + return partial_res + else: # no checkpoint found for this sequence, start from beginning + return None + else: + return None + + + def check(self,count_line,core_id,idx_seq,termini_coverage,whole_coverage,paired_whole_coverage,\ + phage_hybrid_coverage, host_hybrid_coverage, \ + host_whole_coverage,list_hybrid,insert,paired_mismatch,reads_tested,read_match): + cur_t = time.perf_counter_ns() + elapsed_t = cur_t - self.start_t + #convert elapsed_t tp to seconds + elaspsed_t=elapsed_t * 0.000000001 + if (self.test_mode==True or (self.chk_freq!=0 and (elapsed_t % self.chk_freq == 0))): # time to create checkpoint. + chkp=RCCheckpoint(count_line,core_id,idx_seq,termini_coverage,whole_coverage,paired_whole_coverage,\ + phage_hybrid_coverage, host_hybrid_coverage, \ + host_whole_coverage,list_hybrid,insert,paired_mismatch,reads_tested,read_match) + chkp.save(self.dir_chk,core_id,idx_seq) + + + def end(self,core_id): + if (self.test_mode==False and self.chk_freq!=0) : + # remove old breakpoint file + list_f = os.listdir(self.dir_chk) + sub_s=base_chk_fname+str(core_id)+chk_fname_sep + for f in list_f: + if sub_s in f: + os.remove(os.path.join(self.dir_chk, f)) + + + + + + + + + diff --git a/phageterm/seq_processing.py b/phageterm/seq_processing.py new file mode 100755 index 0000000000000000000000000000000000000000..f3c66b2e474be2c34fbf5c09b8c264c65d93bc13 --- /dev/null +++ b/phageterm/seq_processing.py @@ -0,0 +1,95 @@ +##@file seq_processing.py +# +# This file contains functions that are used when running phageterm on multiple machines on a calculation cluster. +# @param DR_Path directory path where to put DR content. +from __future__ import print_function + +from time import gmtime, strftime +import os +import numpy as np +from _modules.utilities import checkReportTitle +from _modules.readsCoverage_res import loadRCRes +from _modules.common_readsCoverage_processing import processCovValuesForSeq +#from SeqStats import SeqStats +def sum_readsCoverage_for_seq(dir_cov_res,idx_refseq,nb_pieces,inDArgs,fParms,inRawDArgs,dir_seq_res,DR_path): + if os.path.exists(DR_path): + if not (os.path.isdir(DR_path)): + raise RuntimeError("DR_path must point to a directory") + else: + os.mkdir(DR_path) + DR = {"Headful (pac)": {}, "COS (5')": {}, "COS (3')": {}, "COS": {}, "DTR (short)": {}, "DTR (long)": {}, + "Mu-like": {}, "UNKNOWN": {}, "NEW": {}} + print("going to load ",nb_pieces," files for sequence ",idx_refseq) + print(strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime())) + for i in range(0,nb_pieces): + fic_name = os.path.join(dir_cov_res, "coverage" + str(idx_refseq) + "_" + str(i)+".npz") + print("loading file: ",fic_name) + print(strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime())) + partial_res=loadRCRes(fic_name) + #npzfile=np.load(fic_name) + if i == 0: + termini_coverage = partial_res.termini_coverage + whole_coverage = partial_res.whole_coverage + paired_whole_coverage = partial_res.paired_whole_coverage + phage_hybrid_coverage = partial_res.phage_hybrid_coverage + host_hybrid_coverage = partial_res.host_hybrid_coverage + host_whole_coverage = partial_res.host_whole_coverage + list_hybrid = partial_res.list_hybrid + insert = partial_res.insert + paired_missmatch = partial_res.paired_mismatch + reads_tested = partial_res.reads_tested + else: + termini_coverage += partial_res.termini_coverage + whole_coverage += partial_res.whole_coverage + paired_whole_coverage += partial_res.paired_whole_coverage + phage_hybrid_coverage += partial_res.phage_hybrid_coverage + host_hybrid_coverage += partial_res.host_hybrid_coverage + host_whole_coverage += partial_res.host_whole_coverage + list_hybrid += partial_res.list_hybrid + insert += partial_res.insert + paired_missmatch += partial_res.paired_mismatch + reads_tested += partial_res.reads_tested + + # fic_name = os.path.join(dir_seq_res, "coverage" + str(idx_refseq)) + # np.savez(fic_name, termini_coverage=termini_coverage, whole_coverage=whole_coverage, + # paired_whole_coverage=paired_whole_coverage, \ + # phage_hybrid_coverage=phage_hybrid_coverage, host_hybrid_coverage=host_hybrid_coverage, \ + # host_whole_coverage=host_whole_coverage, list_hybrid=list_hybrid, insert=insert, + # paired_missmatch=np.array(paired_missmatch)) + termini_coverage = termini_coverage.tolist() + whole_coverage = whole_coverage.tolist() + paired_whole_coverage = paired_whole_coverage.tolist() + phage_hybrid_coverage = phage_hybrid_coverage.tolist() + host_hybrid_coverage = host_hybrid_coverage.tolist() + host_whole_coverage = host_whole_coverage.tolist() + list_hybrid = list_hybrid.tolist() + + if sum(termini_coverage[0]) + sum(termini_coverage[1]) == 0: + no_match_file="no_natch"+str(idx_refseq) + f=open(os.path.join(dir_seq_res,no_match_file),"w") + f.write((checkReportTitle(seq_name[idx_refseq]))) + f.close() + + print("finished sum, calling processCovValuesForSeq") + print(strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime())) + # TODO: having so many values in input and returned is ugly and bad for readibility and maintanability. Group those who are related in structures. + refseq = inDArgs.refseq_liste[idx_refseq] + S_stats=processCovValuesForSeq(refseq, inDArgs.hostseq, inDArgs.refseq_name, inDArgs.refseq_liste, fParms.seed, + inRawDArgs.analysis_name, inRawDArgs.tot_reads, \ + idx_refseq, fParms.test_run, inRawDArgs.paired, fParms.edge, inRawDArgs.host, + fParms.test, fParms.surrounding, \ + fParms.limit_preferred, fParms.limit_fixed, fParms.Mu_threshold, termini_coverage, + whole_coverage, \ + paired_whole_coverage, phage_hybrid_coverage, host_hybrid_coverage, + host_whole_coverage, insert, list_hybrid, reads_tested, DR,DR_path) + #fic_name = os.path.join(dir_seq_res, "seq_stats" + str(idx_refseq)) + # S_stats.toFile(fic_name) s_stats content is only used in the case where there is only 1 sequence. I'm not interested in this case here since sum_readsCoverage_for_seq will be used for viromes. + # so, just drop s_stat and forget it... + # Only writing DR content to file is usefuk in the case of a virome processing over several machines on a cluster. + print("exit sum_readsCoverage_for_seq") + print(strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime())) + + + + + diff --git a/phageterm/utilities.py b/phageterm/utilities.py new file mode 100644 index 0000000000000000000000000000000000000000..40ddd3b602aba317c700ad57eb6fd5729cafb65f --- /dev/null +++ b/phageterm/utilities.py @@ -0,0 +1,106 @@ +## @file utilities.py +# +# Gather here utility methods for phageterm. Used in both CPU and GPU version. +#from string import maketrans +import re +import random +import sys + +import numpy as np +import datetime + +if sys.version_info < (3,): + import string + TRANSTAB = string.maketrans("ACGTN", "TGCAN") +else: + TRANSTAB = str.maketrans("ACGTN", "TGCAN") + +def checkReportTitle(report_title): + """Normalise report title (take out any special char)""" + default_title="Analysis_" + right_now=datetime.datetime.now() + default_title+=str(right_now.month) + default_title+=str(right_now.day) + default_title+="_" + default_title+=str(right_now.hour) + default_title+=str(right_now.minute) + titleNorm = "" + charok = list(range(48,58)) + list(range(65,91)) + list(range(97,123)) + [45,95] + for char in report_title: + if ord(char) in charok: + titleNorm += char + if len(titleNorm) > 1: + return titleNorm[:20] + else: + return default + +### SEQUENCE manipulation function +def changeCase(seq): + """Change lower case to UPPER CASE for a sequence string.""" + return seq.upper() + + +def reverseComplement(seq, transtab=str.maketrans('ATGCN', 'TACGN')): + """Reverse Complement a sequence.""" + return changeCase(seq).translate(transtab)[::-1] + +def longest_common_substring(read, refseq): + """Longest common substring between two strings.""" + m = [[0] * (1 + len(refseq)) for i in range(1 + len(read))] + longest, x_longest = 0, 0 + for x in range(1, 1 + len(read)): + for y in range(1, 1 + len(refseq)): + if read[x - 1] == refseq[y - 1]: + m[x][y] = m[x - 1][y - 1] + 1 + if m[x][y] > longest: + longest = m[x][y] + x_longest = x + else: + m[x][y] = 0 + return read[x_longest - longest: x_longest] + +def hybridCoverage(read, sequence, hybrid_coverage, start, end): + """Return hybrid coverage.""" + aligned_part_only = longest_common_substring(read, sequence[start:end]) + for i in range(start, min(len(sequence),start+len(aligned_part_only))): + hybrid_coverage[i]+=1 + return hybrid_coverage + +## Determines if readPart maps against Sequence. +# +# @param readPart A part of a read (seed characters usually) +# @param sequence (a contig) +# It choses randomly a mapping position amongst all mappings found. +# It returns 2 numbers: the start and stop position of the chosen mapping location. +def applyCoverage(readPart, sequence): + """Return a random match of a read onto the sequence. """ + position = [] + for pos in re.finditer(readPart,sequence): + position.append(pos) + if len(position) > 0: + match = random.choice(position) + return match.start(), match.end() + else: + return -1, -1 + +def correctEdge(coverage, edge): + """Correction of the Edge coverage. """ + correctCov = np.array([len(coverage[0])*[0], len(coverage[0])*[0]]) + End = len(coverage[0]) + covSta = range(edge) + covEnd = range(End-edge,End) + for i in range(len(coverage)): + for j in range(len(coverage[i])): + correctCov[i][j] = coverage[i][j] + for k in covSta: + correctCov[i][k+edge] += coverage[i][k+End-edge] + for l in covEnd: + correctCov[i][l-edge] += coverage[i][l-End+edge] + return correctCov + +# utility class for storing results of decisionProcess function +class DecisionProcessOutput: + def __init__(self, Redundant, Permuted, P_class, P_type, P_seqcoh, P_concat, + P_orient, P_left, P_right, Mu_like): + pass + diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 0000000000000000000000000000000000000000..9310eefeb7f63412a12704e2abfa7199d38c0144 --- /dev/null +++ b/poetry.lock @@ -0,0 +1,774 @@ +# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. + +[[package]] +name = "contourpy" +version = "1.1.0" +description = "Python library for calculating contours of 2D quadrilateral grids" +optional = false +python-versions = ">=3.8" +files = [ + {file = "contourpy-1.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:89f06eff3ce2f4b3eb24c1055a26981bffe4e7264acd86f15b97e40530b794bc"}, + {file = "contourpy-1.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dffcc2ddec1782dd2f2ce1ef16f070861af4fb78c69862ce0aab801495dda6a3"}, + {file = "contourpy-1.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25ae46595e22f93592d39a7eac3d638cda552c3e1160255258b695f7b58e5655"}, + {file = "contourpy-1.1.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:17cfaf5ec9862bc93af1ec1f302457371c34e688fbd381f4035a06cd47324f48"}, + {file = "contourpy-1.1.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:18a64814ae7bce73925131381603fff0116e2df25230dfc80d6d690aa6e20b37"}, + {file = "contourpy-1.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90c81f22b4f572f8a2110b0b741bb64e5a6427e0a198b2cdc1fbaf85f352a3aa"}, + {file = "contourpy-1.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:53cc3a40635abedbec7f1bde60f8c189c49e84ac180c665f2cd7c162cc454baa"}, + {file = "contourpy-1.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:1f795597073b09d631782e7245016a4323cf1cf0b4e06eef7ea6627e06a37ff2"}, + {file = "contourpy-1.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0b7b04ed0961647691cfe5d82115dd072af7ce8846d31a5fac6c142dcce8b882"}, + {file = "contourpy-1.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:27bc79200c742f9746d7dd51a734ee326a292d77e7d94c8af6e08d1e6c15d545"}, + {file = "contourpy-1.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:052cc634bf903c604ef1a00a5aa093c54f81a2612faedaa43295809ffdde885e"}, + {file = "contourpy-1.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9382a1c0bc46230fb881c36229bfa23d8c303b889b788b939365578d762b5c18"}, + {file = "contourpy-1.1.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5cec36c5090e75a9ac9dbd0ff4a8cf7cecd60f1b6dc23a374c7d980a1cd710e"}, + {file = "contourpy-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f0cbd657e9bde94cd0e33aa7df94fb73c1ab7799378d3b3f902eb8eb2e04a3a"}, + {file = "contourpy-1.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:181cbace49874f4358e2929aaf7ba84006acb76694102e88dd15af861996c16e"}, + {file = "contourpy-1.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:fb3b7d9e6243bfa1efb93ccfe64ec610d85cfe5aec2c25f97fbbd2e58b531256"}, + {file = "contourpy-1.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bcb41692aa09aeb19c7c213411854402f29f6613845ad2453d30bf421fe68fed"}, + {file = "contourpy-1.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5d123a5bc63cd34c27ff9c7ac1cd978909e9c71da12e05be0231c608048bb2ae"}, + {file = "contourpy-1.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62013a2cf68abc80dadfd2307299bfa8f5aa0dcaec5b2954caeb5fa094171103"}, + {file = "contourpy-1.1.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0b6616375d7de55797d7a66ee7d087efe27f03d336c27cf1f32c02b8c1a5ac70"}, + {file = "contourpy-1.1.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:317267d915490d1e84577924bd61ba71bf8681a30e0d6c545f577363157e5e94"}, + {file = "contourpy-1.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d551f3a442655f3dcc1285723f9acd646ca5858834efeab4598d706206b09c9f"}, + {file = "contourpy-1.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:e7a117ce7df5a938fe035cad481b0189049e8d92433b4b33aa7fc609344aafa1"}, + {file = "contourpy-1.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:d4f26b25b4f86087e7d75e63212756c38546e70f2a92d2be44f80114826e1cd4"}, + {file = "contourpy-1.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc00bb4225d57bff7ebb634646c0ee2a1298402ec10a5fe7af79df9a51c1bfd9"}, + {file = "contourpy-1.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:189ceb1525eb0655ab8487a9a9c41f42a73ba52d6789754788d1883fb06b2d8a"}, + {file = "contourpy-1.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9f2931ed4741f98f74b410b16e5213f71dcccee67518970c42f64153ea9313b9"}, + {file = "contourpy-1.1.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:30f511c05fab7f12e0b1b7730ebdc2ec8deedcfb505bc27eb570ff47c51a8f15"}, + {file = "contourpy-1.1.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:143dde50520a9f90e4a2703f367cf8ec96a73042b72e68fcd184e1279962eb6f"}, + {file = "contourpy-1.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e94bef2580e25b5fdb183bf98a2faa2adc5b638736b2c0a4da98691da641316a"}, + {file = "contourpy-1.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ed614aea8462735e7d70141374bd7650afd1c3f3cb0c2dbbcbe44e14331bf002"}, + {file = "contourpy-1.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:438ba416d02f82b692e371858143970ed2eb6337d9cdbbede0d8ad9f3d7dd17d"}, + {file = "contourpy-1.1.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a698c6a7a432789e587168573a864a7ea374c6be8d4f31f9d87c001d5a843493"}, + {file = "contourpy-1.1.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:397b0ac8a12880412da3551a8cb5a187d3298a72802b45a3bd1805e204ad8439"}, + {file = "contourpy-1.1.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:a67259c2b493b00e5a4d0f7bfae51fb4b3371395e47d079a4446e9b0f4d70e76"}, + {file = "contourpy-1.1.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2b836d22bd2c7bb2700348e4521b25e077255ebb6ab68e351ab5aa91ca27e027"}, + {file = "contourpy-1.1.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:084eaa568400cfaf7179b847ac871582199b1b44d5699198e9602ecbbb5f6104"}, + {file = "contourpy-1.1.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:911ff4fd53e26b019f898f32db0d4956c9d227d51338fb3b03ec72ff0084ee5f"}, + {file = "contourpy-1.1.0.tar.gz", hash = "sha256:e53046c3863828d21d531cc3b53786e6580eb1ba02477e8681009b6aa0870b21"}, +] + +[package.dependencies] +numpy = ">=1.16" + +[package.extras] +bokeh = ["bokeh", "selenium"] +docs = ["furo", "sphinx-copybutton"] +mypy = ["contourpy[bokeh,docs]", "docutils-stubs", "mypy (==1.2.0)", "types-Pillow"] +test = ["Pillow", "contourpy[test-no-images]", "matplotlib"] +test-no-images = ["pytest", "pytest-cov", "wurlitzer"] + +[[package]] +name = "cycler" +version = "0.11.0" +description = "Composable style cycles" +optional = false +python-versions = ">=3.6" +files = [ + {file = "cycler-0.11.0-py3-none-any.whl", hash = "sha256:3a27e95f763a428a739d2add979fa7494c912a32c17c4c38c4d5f082cad165a3"}, + {file = "cycler-0.11.0.tar.gz", hash = "sha256:9c87405839a19696e837b3b818fed3f5f69f16f1eec1a1ad77e043dcea9c772f"}, +] + +[[package]] +name = "fonttools" +version = "4.42.1" +description = "Tools to manipulate font files" +optional = false +python-versions = ">=3.8" +files = [ + {file = "fonttools-4.42.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ed1a13a27f59d1fc1920394a7f596792e9d546c9ca5a044419dca70c37815d7c"}, + {file = "fonttools-4.42.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c9b1ce7a45978b821a06d375b83763b27a3a5e8a2e4570b3065abad240a18760"}, + {file = "fonttools-4.42.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f720fa82a11c0f9042376fd509b5ed88dab7e3cd602eee63a1af08883b37342b"}, + {file = "fonttools-4.42.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db55cbaea02a20b49fefbd8e9d62bd481aaabe1f2301dabc575acc6b358874fa"}, + {file = "fonttools-4.42.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3a35981d90feebeaef05e46e33e6b9e5b5e618504672ca9cd0ff96b171e4bfff"}, + {file = "fonttools-4.42.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:68a02bbe020dc22ee0540e040117535f06df9358106d3775e8817d826047f3fd"}, + {file = "fonttools-4.42.1-cp310-cp310-win32.whl", hash = "sha256:12a7c247d1b946829bfa2f331107a629ea77dc5391dfd34fdcd78efa61f354ca"}, + {file = "fonttools-4.42.1-cp310-cp310-win_amd64.whl", hash = "sha256:a398bdadb055f8de69f62b0fc70625f7cbdab436bbb31eef5816e28cab083ee8"}, + {file = "fonttools-4.42.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:689508b918332fb40ce117131633647731d098b1b10d092234aa959b4251add5"}, + {file = "fonttools-4.42.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9e36344e48af3e3bde867a1ca54f97c308735dd8697005c2d24a86054a114a71"}, + {file = "fonttools-4.42.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19b7db825c8adee96fac0692e6e1ecd858cae9affb3b4812cdb9d934a898b29e"}, + {file = "fonttools-4.42.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:113337c2d29665839b7d90b39f99b3cac731f72a0eda9306165a305c7c31d341"}, + {file = "fonttools-4.42.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:37983b6bdab42c501202500a2be3a572f50d4efe3237e0686ee9d5f794d76b35"}, + {file = "fonttools-4.42.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6ed2662a3d9c832afa36405f8748c250be94ae5dfc5283d668308391f2102861"}, + {file = "fonttools-4.42.1-cp311-cp311-win32.whl", hash = "sha256:179737095eb98332a2744e8f12037b2977f22948cf23ff96656928923ddf560a"}, + {file = "fonttools-4.42.1-cp311-cp311-win_amd64.whl", hash = "sha256:f2b82f46917d8722e6b5eafeefb4fb585d23babd15d8246c664cd88a5bddd19c"}, + {file = "fonttools-4.42.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:62f481ac772fd68901573956231aea3e4b1ad87b9b1089a61613a91e2b50bb9b"}, + {file = "fonttools-4.42.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f2f806990160d1ce42d287aa419df3ffc42dfefe60d473695fb048355fe0c6a0"}, + {file = "fonttools-4.42.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db372213d39fa33af667c2aa586a0c1235e88e9c850f5dd5c8e1f17515861868"}, + {file = "fonttools-4.42.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d18fc642fd0ac29236ff88ecfccff229ec0386090a839dd3f1162e9a7944a40"}, + {file = "fonttools-4.42.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:8708b98c278012ad267ee8a7433baeb809948855e81922878118464b274c909d"}, + {file = "fonttools-4.42.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:c95b0724a6deea2c8c5d3222191783ced0a2f09bd6d33f93e563f6f1a4b3b3a4"}, + {file = "fonttools-4.42.1-cp38-cp38-win32.whl", hash = "sha256:4aa79366e442dbca6e2c8595645a3a605d9eeabdb7a094d745ed6106816bef5d"}, + {file = "fonttools-4.42.1-cp38-cp38-win_amd64.whl", hash = "sha256:acb47f6f8680de24c1ab65ebde39dd035768e2a9b571a07c7b8da95f6c8815fd"}, + {file = "fonttools-4.42.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5fb289b7a815638a7613d46bcf324c9106804725b2bb8ad913c12b6958ffc4ec"}, + {file = "fonttools-4.42.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:53eb5091ddc8b1199330bb7b4a8a2e7995ad5d43376cadce84523d8223ef3136"}, + {file = "fonttools-4.42.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46a0ec8adbc6ff13494eb0c9c2e643b6f009ce7320cf640de106fb614e4d4360"}, + {file = "fonttools-4.42.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7cc7d685b8eeca7ae69dc6416833fbfea61660684b7089bca666067cb2937dcf"}, + {file = "fonttools-4.42.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:be24fcb80493b2c94eae21df70017351851652a37de514de553435b256b2f249"}, + {file = "fonttools-4.42.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:515607ec756d7865f23070682622c49d922901943697871fc292277cf1e71967"}, + {file = "fonttools-4.42.1-cp39-cp39-win32.whl", hash = "sha256:0eb79a2da5eb6457a6f8ab904838454accc7d4cccdaff1fd2bd3a0679ea33d64"}, + {file = "fonttools-4.42.1-cp39-cp39-win_amd64.whl", hash = "sha256:7286aed4ea271df9eab8d7a9b29e507094b51397812f7ce051ecd77915a6e26b"}, + {file = "fonttools-4.42.1-py3-none-any.whl", hash = "sha256:9398f244e28e0596e2ee6024f808b06060109e33ed38dcc9bded452fd9bbb853"}, + {file = "fonttools-4.42.1.tar.gz", hash = "sha256:c391cd5af88aacaf41dd7cfb96eeedfad297b5899a39e12f4c2c3706d0a3329d"}, +] + +[package.extras] +all = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "fs (>=2.2.0,<3)", "lxml (>=4.0,<5)", "lz4 (>=1.7.4.2)", "matplotlib", "munkres", "scipy", "skia-pathops (>=0.5.0)", "sympy", "uharfbuzz (>=0.23.0)", "unicodedata2 (>=15.0.0)", "xattr", "zopfli (>=0.1.4)"] +graphite = ["lz4 (>=1.7.4.2)"] +interpolatable = ["munkres", "scipy"] +lxml = ["lxml (>=4.0,<5)"] +pathops = ["skia-pathops (>=0.5.0)"] +plot = ["matplotlib"] +repacker = ["uharfbuzz (>=0.23.0)"] +symfont = ["sympy"] +type1 = ["xattr"] +ufo = ["fs (>=2.2.0,<3)"] +unicode = ["unicodedata2 (>=15.0.0)"] +woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"] + +[[package]] +name = "importlib-resources" +version = "6.0.1" +description = "Read resources from Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "importlib_resources-6.0.1-py3-none-any.whl", hash = "sha256:134832a506243891221b88b4ae1213327eea96ceb4e407a00d790bb0626f45cf"}, + {file = "importlib_resources-6.0.1.tar.gz", hash = "sha256:4359457e42708462b9626a04657c6208ad799ceb41e5c58c57ffa0e6a098a5d4"}, +] + +[package.dependencies] +zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-ruff"] + +[[package]] +name = "joblib" +version = "1.3.2" +description = "Lightweight pipelining with Python functions" +optional = false +python-versions = ">=3.7" +files = [ + {file = "joblib-1.3.2-py3-none-any.whl", hash = "sha256:ef4331c65f239985f3f2220ecc87db222f08fd22097a3dd5698f693875f8cbb9"}, + {file = "joblib-1.3.2.tar.gz", hash = "sha256:92f865e621e17784e7955080b6d042489e3b8e294949cc44c6eac304f59772b1"}, +] + +[[package]] +name = "kiwisolver" +version = "1.4.5" +description = "A fast implementation of the Cassowary constraint solver" +optional = false +python-versions = ">=3.7" +files = [ + {file = "kiwisolver-1.4.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:05703cf211d585109fcd72207a31bb170a0f22144d68298dc5e61b3c946518af"}, + {file = "kiwisolver-1.4.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:146d14bebb7f1dc4d5fbf74f8a6cb15ac42baadee8912eb84ac0b3b2a3dc6ac3"}, + {file = "kiwisolver-1.4.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6ef7afcd2d281494c0a9101d5c571970708ad911d028137cd558f02b851c08b4"}, + {file = "kiwisolver-1.4.5-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:9eaa8b117dc8337728e834b9c6e2611f10c79e38f65157c4c38e9400286f5cb1"}, + {file = "kiwisolver-1.4.5-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ec20916e7b4cbfb1f12380e46486ec4bcbaa91a9c448b97023fde0d5bbf9e4ff"}, + {file = "kiwisolver-1.4.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39b42c68602539407884cf70d6a480a469b93b81b7701378ba5e2328660c847a"}, + {file = "kiwisolver-1.4.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aa12042de0171fad672b6c59df69106d20d5596e4f87b5e8f76df757a7c399aa"}, + {file = "kiwisolver-1.4.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2a40773c71d7ccdd3798f6489aaac9eee213d566850a9533f8d26332d626b82c"}, + {file = "kiwisolver-1.4.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:19df6e621f6d8b4b9c4d45f40a66839294ff2bb235e64d2178f7522d9170ac5b"}, + {file = "kiwisolver-1.4.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:83d78376d0d4fd884e2c114d0621624b73d2aba4e2788182d286309ebdeed770"}, + {file = "kiwisolver-1.4.5-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e391b1f0a8a5a10ab3b9bb6afcfd74f2175f24f8975fb87ecae700d1503cdee0"}, + {file = "kiwisolver-1.4.5-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:852542f9481f4a62dbb5dd99e8ab7aedfeb8fb6342349a181d4036877410f525"}, + {file = "kiwisolver-1.4.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:59edc41b24031bc25108e210c0def6f6c2191210492a972d585a06ff246bb79b"}, + {file = "kiwisolver-1.4.5-cp310-cp310-win32.whl", hash = "sha256:a6aa6315319a052b4ee378aa171959c898a6183f15c1e541821c5c59beaa0238"}, + {file = "kiwisolver-1.4.5-cp310-cp310-win_amd64.whl", hash = "sha256:d0ef46024e6a3d79c01ff13801cb19d0cad7fd859b15037aec74315540acc276"}, + {file = "kiwisolver-1.4.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:11863aa14a51fd6ec28688d76f1735f8f69ab1fabf388851a595d0721af042f5"}, + {file = "kiwisolver-1.4.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8ab3919a9997ab7ef2fbbed0cc99bb28d3c13e6d4b1ad36e97e482558a91be90"}, + {file = "kiwisolver-1.4.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fcc700eadbbccbf6bc1bcb9dbe0786b4b1cb91ca0dcda336eef5c2beed37b797"}, + {file = "kiwisolver-1.4.5-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dfdd7c0b105af050eb3d64997809dc21da247cf44e63dc73ff0fd20b96be55a9"}, + {file = "kiwisolver-1.4.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76c6a5964640638cdeaa0c359382e5703e9293030fe730018ca06bc2010c4437"}, + {file = "kiwisolver-1.4.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bbea0db94288e29afcc4c28afbf3a7ccaf2d7e027489c449cf7e8f83c6346eb9"}, + {file = "kiwisolver-1.4.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ceec1a6bc6cab1d6ff5d06592a91a692f90ec7505d6463a88a52cc0eb58545da"}, + {file = "kiwisolver-1.4.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:040c1aebeda72197ef477a906782b5ab0d387642e93bda547336b8957c61022e"}, + {file = "kiwisolver-1.4.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f91de7223d4c7b793867797bacd1ee53bfe7359bd70d27b7b58a04efbb9436c8"}, + {file = "kiwisolver-1.4.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:faae4860798c31530dd184046a900e652c95513796ef51a12bc086710c2eec4d"}, + {file = "kiwisolver-1.4.5-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:b0157420efcb803e71d1b28e2c287518b8808b7cf1ab8af36718fd0a2c453eb0"}, + {file = "kiwisolver-1.4.5-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:06f54715b7737c2fecdbf140d1afb11a33d59508a47bf11bb38ecf21dc9ab79f"}, + {file = "kiwisolver-1.4.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fdb7adb641a0d13bdcd4ef48e062363d8a9ad4a182ac7647ec88f695e719ae9f"}, + {file = "kiwisolver-1.4.5-cp311-cp311-win32.whl", hash = "sha256:bb86433b1cfe686da83ce32a9d3a8dd308e85c76b60896d58f082136f10bffac"}, + {file = "kiwisolver-1.4.5-cp311-cp311-win_amd64.whl", hash = "sha256:6c08e1312a9cf1074d17b17728d3dfce2a5125b2d791527f33ffbe805200a355"}, + {file = "kiwisolver-1.4.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:32d5cf40c4f7c7b3ca500f8985eb3fb3a7dfc023215e876f207956b5ea26632a"}, + {file = "kiwisolver-1.4.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f846c260f483d1fd217fe5ed7c173fb109efa6b1fc8381c8b7552c5781756192"}, + {file = "kiwisolver-1.4.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5ff5cf3571589b6d13bfbfd6bcd7a3f659e42f96b5fd1c4830c4cf21d4f5ef45"}, + {file = "kiwisolver-1.4.5-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7269d9e5f1084a653d575c7ec012ff57f0c042258bf5db0954bf551c158466e7"}, + {file = "kiwisolver-1.4.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da802a19d6e15dffe4b0c24b38b3af68e6c1a68e6e1d8f30148c83864f3881db"}, + {file = "kiwisolver-1.4.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3aba7311af82e335dd1e36ffff68aaca609ca6290c2cb6d821a39aa075d8e3ff"}, + {file = "kiwisolver-1.4.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:763773d53f07244148ccac5b084da5adb90bfaee39c197554f01b286cf869228"}, + {file = "kiwisolver-1.4.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2270953c0d8cdab5d422bee7d2007f043473f9d2999631c86a223c9db56cbd16"}, + {file = "kiwisolver-1.4.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d099e745a512f7e3bbe7249ca835f4d357c586d78d79ae8f1dcd4d8adeb9bda9"}, + {file = "kiwisolver-1.4.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:74db36e14a7d1ce0986fa104f7d5637aea5c82ca6326ed0ec5694280942d1162"}, + {file = "kiwisolver-1.4.5-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:7e5bab140c309cb3a6ce373a9e71eb7e4873c70c2dda01df6820474f9889d6d4"}, + {file = "kiwisolver-1.4.5-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:0f114aa76dc1b8f636d077979c0ac22e7cd8f3493abbab152f20eb8d3cda71f3"}, + {file = "kiwisolver-1.4.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:88a2df29d4724b9237fc0c6eaf2a1adae0cdc0b3e9f4d8e7dc54b16812d2d81a"}, + {file = "kiwisolver-1.4.5-cp312-cp312-win32.whl", hash = "sha256:72d40b33e834371fd330fb1472ca19d9b8327acb79a5821d4008391db8e29f20"}, + {file = "kiwisolver-1.4.5-cp312-cp312-win_amd64.whl", hash = "sha256:2c5674c4e74d939b9d91dda0fae10597ac7521768fec9e399c70a1f27e2ea2d9"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:3a2b053a0ab7a3960c98725cfb0bf5b48ba82f64ec95fe06f1d06c99b552e130"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3cd32d6c13807e5c66a7cbb79f90b553642f296ae4518a60d8d76243b0ad2898"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:59ec7b7c7e1a61061850d53aaf8e93db63dce0c936db1fda2658b70e4a1be709"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:da4cfb373035def307905d05041c1d06d8936452fe89d464743ae7fb8371078b"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2400873bccc260b6ae184b2b8a4fec0e4082d30648eadb7c3d9a13405d861e89"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:1b04139c4236a0f3aff534479b58f6f849a8b351e1314826c2d230849ed48985"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:4e66e81a5779b65ac21764c295087de82235597a2293d18d943f8e9e32746265"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:7931d8f1f67c4be9ba1dd9c451fb0eeca1a25b89e4d3f89e828fe12a519b782a"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:b3f7e75f3015df442238cca659f8baa5f42ce2a8582727981cbfa15fee0ee205"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:bbf1d63eef84b2e8c89011b7f2235b1e0bf7dacc11cac9431fc6468e99ac77fb"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:4c380469bd3f970ef677bf2bcba2b6b0b4d5c75e7a020fb863ef75084efad66f"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-win32.whl", hash = "sha256:9408acf3270c4b6baad483865191e3e582b638b1654a007c62e3efe96f09a9a3"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-win_amd64.whl", hash = "sha256:5b94529f9b2591b7af5f3e0e730a4e0a41ea174af35a4fd067775f9bdfeee01a"}, + {file = "kiwisolver-1.4.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:11c7de8f692fc99816e8ac50d1d1aef4f75126eefc33ac79aac02c099fd3db71"}, + {file = "kiwisolver-1.4.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:53abb58632235cd154176ced1ae8f0d29a6657aa1aa9decf50b899b755bc2b93"}, + {file = "kiwisolver-1.4.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:88b9f257ca61b838b6f8094a62418421f87ac2a1069f7e896c36a7d86b5d4c29"}, + {file = "kiwisolver-1.4.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3195782b26fc03aa9c6913d5bad5aeb864bdc372924c093b0f1cebad603dd712"}, + {file = "kiwisolver-1.4.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fc579bf0f502e54926519451b920e875f433aceb4624a3646b3252b5caa9e0b6"}, + {file = "kiwisolver-1.4.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5a580c91d686376f0f7c295357595c5a026e6cbc3d77b7c36e290201e7c11ecb"}, + {file = "kiwisolver-1.4.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cfe6ab8da05c01ba6fbea630377b5da2cd9bcbc6338510116b01c1bc939a2c18"}, + {file = "kiwisolver-1.4.5-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:d2e5a98f0ec99beb3c10e13b387f8db39106d53993f498b295f0c914328b1333"}, + {file = "kiwisolver-1.4.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:a51a263952b1429e429ff236d2f5a21c5125437861baeed77f5e1cc2d2c7c6da"}, + {file = "kiwisolver-1.4.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:3edd2fa14e68c9be82c5b16689e8d63d89fe927e56debd6e1dbce7a26a17f81b"}, + {file = "kiwisolver-1.4.5-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:74d1b44c6cfc897df648cc9fdaa09bc3e7679926e6f96df05775d4fb3946571c"}, + {file = "kiwisolver-1.4.5-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:76d9289ed3f7501012e05abb8358bbb129149dbd173f1f57a1bf1c22d19ab7cc"}, + {file = "kiwisolver-1.4.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:92dea1ffe3714fa8eb6a314d2b3c773208d865a0e0d35e713ec54eea08a66250"}, + {file = "kiwisolver-1.4.5-cp38-cp38-win32.whl", hash = "sha256:5c90ae8c8d32e472be041e76f9d2f2dbff4d0b0be8bd4041770eddb18cf49a4e"}, + {file = "kiwisolver-1.4.5-cp38-cp38-win_amd64.whl", hash = "sha256:c7940c1dc63eb37a67721b10d703247552416f719c4188c54e04334321351ced"}, + {file = "kiwisolver-1.4.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:9407b6a5f0d675e8a827ad8742e1d6b49d9c1a1da5d952a67d50ef5f4170b18d"}, + {file = "kiwisolver-1.4.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:15568384086b6df3c65353820a4473575dbad192e35010f622c6ce3eebd57af9"}, + {file = "kiwisolver-1.4.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0dc9db8e79f0036e8173c466d21ef18e1befc02de8bf8aa8dc0813a6dc8a7046"}, + {file = "kiwisolver-1.4.5-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:cdc8a402aaee9a798b50d8b827d7ecf75edc5fb35ea0f91f213ff927c15f4ff0"}, + {file = "kiwisolver-1.4.5-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:6c3bd3cde54cafb87d74d8db50b909705c62b17c2099b8f2e25b461882e544ff"}, + {file = "kiwisolver-1.4.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:955e8513d07a283056b1396e9a57ceddbd272d9252c14f154d450d227606eb54"}, + {file = "kiwisolver-1.4.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:346f5343b9e3f00b8db8ba359350eb124b98c99efd0b408728ac6ebf38173958"}, + {file = "kiwisolver-1.4.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b9098e0049e88c6a24ff64545cdfc50807818ba6c1b739cae221bbbcbc58aad3"}, + {file = "kiwisolver-1.4.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:00bd361b903dc4bbf4eb165f24d1acbee754fce22ded24c3d56eec268658a5cf"}, + {file = "kiwisolver-1.4.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7b8b454bac16428b22560d0a1cf0a09875339cab69df61d7805bf48919415901"}, + {file = "kiwisolver-1.4.5-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:f1d072c2eb0ad60d4c183f3fb44ac6f73fb7a8f16a2694a91f988275cbf352f9"}, + {file = "kiwisolver-1.4.5-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:31a82d498054cac9f6d0b53d02bb85811185bcb477d4b60144f915f3b3126342"}, + {file = "kiwisolver-1.4.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:6512cb89e334e4700febbffaaa52761b65b4f5a3cf33f960213d5656cea36a77"}, + {file = "kiwisolver-1.4.5-cp39-cp39-win32.whl", hash = "sha256:9db8ea4c388fdb0f780fe91346fd438657ea602d58348753d9fb265ce1bca67f"}, + {file = "kiwisolver-1.4.5-cp39-cp39-win_amd64.whl", hash = "sha256:59415f46a37f7f2efeec758353dd2eae1b07640d8ca0f0c42548ec4125492635"}, + {file = "kiwisolver-1.4.5-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:5c7b3b3a728dc6faf3fc372ef24f21d1e3cee2ac3e9596691d746e5a536de920"}, + {file = "kiwisolver-1.4.5-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:620ced262a86244e2be10a676b646f29c34537d0d9cc8eb26c08f53d98013390"}, + {file = "kiwisolver-1.4.5-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:378a214a1e3bbf5ac4a8708304318b4f890da88c9e6a07699c4ae7174c09a68d"}, + {file = "kiwisolver-1.4.5-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aaf7be1207676ac608a50cd08f102f6742dbfc70e8d60c4db1c6897f62f71523"}, + {file = "kiwisolver-1.4.5-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:ba55dce0a9b8ff59495ddd050a0225d58bd0983d09f87cfe2b6aec4f2c1234e4"}, + {file = "kiwisolver-1.4.5-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:fd32ea360bcbb92d28933fc05ed09bffcb1704ba3fc7942e81db0fd4f81a7892"}, + {file = "kiwisolver-1.4.5-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:5e7139af55d1688f8b960ee9ad5adafc4ac17c1c473fe07133ac092310d76544"}, + {file = "kiwisolver-1.4.5-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:dced8146011d2bc2e883f9bd68618b8247387f4bbec46d7392b3c3b032640126"}, + {file = "kiwisolver-1.4.5-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c9bf3325c47b11b2e51bca0824ea217c7cd84491d8ac4eefd1e409705ef092bd"}, + {file = "kiwisolver-1.4.5-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:5794cf59533bc3f1b1c821f7206a3617999db9fbefc345360aafe2e067514929"}, + {file = "kiwisolver-1.4.5-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:e368f200bbc2e4f905b8e71eb38b3c04333bddaa6a2464a6355487b02bb7fb09"}, + {file = "kiwisolver-1.4.5-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e5d706eba36b4c4d5bc6c6377bb6568098765e990cfc21ee16d13963fab7b3e7"}, + {file = "kiwisolver-1.4.5-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:85267bd1aa8880a9c88a8cb71e18d3d64d2751a790e6ca6c27b8ccc724bcd5ad"}, + {file = "kiwisolver-1.4.5-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:210ef2c3a1f03272649aff1ef992df2e724748918c4bc2d5a90352849eb40bea"}, + {file = "kiwisolver-1.4.5-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:11d011a7574eb3b82bcc9c1a1d35c1d7075677fdd15de527d91b46bd35e935ee"}, + {file = "kiwisolver-1.4.5.tar.gz", hash = "sha256:e57e563a57fb22a142da34f38acc2fc1a5c864bc29ca1517a88abc963e60d6ec"}, +] + +[[package]] +name = "matplotlib" +version = "3.7.2" +description = "Python plotting package" +optional = false +python-versions = ">=3.8" +files = [ + {file = "matplotlib-3.7.2-cp310-cp310-macosx_10_12_universal2.whl", hash = "sha256:2699f7e73a76d4c110f4f25be9d2496d6ab4f17345307738557d345f099e07de"}, + {file = "matplotlib-3.7.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:a8035ba590658bae7562786c9cc6ea1a84aa49d3afab157e414c9e2ea74f496d"}, + {file = "matplotlib-3.7.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2f8e4a49493add46ad4a8c92f63e19d548b2b6ebbed75c6b4c7f46f57d36cdd1"}, + {file = "matplotlib-3.7.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71667eb2ccca4c3537d9414b1bc00554cb7f91527c17ee4ec38027201f8f1603"}, + {file = "matplotlib-3.7.2-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:152ee0b569a37630d8628534c628456b28686e085d51394da6b71ef84c4da201"}, + {file = "matplotlib-3.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:070f8dddd1f5939e60aacb8fa08f19551f4b0140fab16a3669d5cd6e9cb28fc8"}, + {file = "matplotlib-3.7.2-cp310-cp310-win32.whl", hash = "sha256:fdbb46fad4fb47443b5b8ac76904b2e7a66556844f33370861b4788db0f8816a"}, + {file = "matplotlib-3.7.2-cp310-cp310-win_amd64.whl", hash = "sha256:23fb1750934e5f0128f9423db27c474aa32534cec21f7b2153262b066a581fd1"}, + {file = "matplotlib-3.7.2-cp311-cp311-macosx_10_12_universal2.whl", hash = "sha256:30e1409b857aa8a747c5d4f85f63a79e479835f8dffc52992ac1f3f25837b544"}, + {file = "matplotlib-3.7.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:50e0a55ec74bf2d7a0ebf50ac580a209582c2dd0f7ab51bc270f1b4a0027454e"}, + {file = "matplotlib-3.7.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ac60daa1dc83e8821eed155796b0f7888b6b916cf61d620a4ddd8200ac70cd64"}, + {file = "matplotlib-3.7.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:305e3da477dc8607336ba10bac96986d6308d614706cae2efe7d3ffa60465b24"}, + {file = "matplotlib-3.7.2-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c308b255efb9b06b23874236ec0f10f026673ad6515f602027cc8ac7805352d"}, + {file = "matplotlib-3.7.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60c521e21031632aa0d87ca5ba0c1c05f3daacadb34c093585a0be6780f698e4"}, + {file = "matplotlib-3.7.2-cp311-cp311-win32.whl", hash = "sha256:26bede320d77e469fdf1bde212de0ec889169b04f7f1179b8930d66f82b30cbc"}, + {file = "matplotlib-3.7.2-cp311-cp311-win_amd64.whl", hash = "sha256:af4860132c8c05261a5f5f8467f1b269bf1c7c23902d75f2be57c4a7f2394b3e"}, + {file = "matplotlib-3.7.2-cp38-cp38-macosx_10_12_universal2.whl", hash = "sha256:a1733b8e84e7e40a9853e505fe68cc54339f97273bdfe6f3ed980095f769ddc7"}, + {file = "matplotlib-3.7.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:d9881356dc48e58910c53af82b57183879129fa30492be69058c5b0d9fddf391"}, + {file = "matplotlib-3.7.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f081c03f413f59390a80b3e351cc2b2ea0205839714dbc364519bcf51f4b56ca"}, + {file = "matplotlib-3.7.2-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:1cd120fca3407a225168238b790bd5c528f0fafde6172b140a2f3ab7a4ea63e9"}, + {file = "matplotlib-3.7.2-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a2c1590b90aa7bd741b54c62b78de05d4186271e34e2377e0289d943b3522273"}, + {file = "matplotlib-3.7.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6d2ff3c984b8a569bc1383cd468fc06b70d7b59d5c2854ca39f1436ae8394117"}, + {file = "matplotlib-3.7.2-cp38-cp38-win32.whl", hash = "sha256:5dea00b62d28654b71ca92463656d80646675628d0828e08a5f3b57e12869e13"}, + {file = "matplotlib-3.7.2-cp38-cp38-win_amd64.whl", hash = "sha256:0f506a1776ee94f9e131af1ac6efa6e5bc7cb606a3e389b0ccb6e657f60bb676"}, + {file = "matplotlib-3.7.2-cp39-cp39-macosx_10_12_universal2.whl", hash = "sha256:6515e878f91894c2e4340d81f0911857998ccaf04dbc1bba781e3d89cbf70608"}, + {file = "matplotlib-3.7.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:71f7a8c6b124e904db550f5b9fe483d28b896d4135e45c4ea381ad3b8a0e3256"}, + {file = "matplotlib-3.7.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:12f01b92ecd518e0697da4d97d163b2b3aa55eb3eb4e2c98235b3396d7dad55f"}, + {file = "matplotlib-3.7.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a7e28d6396563955f7af437894a36bf2b279462239a41028323e04b85179058b"}, + {file = "matplotlib-3.7.2-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dbcf59334ff645e6a67cd5f78b4b2cdb76384cdf587fa0d2dc85f634a72e1a3e"}, + {file = "matplotlib-3.7.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:318c89edde72ff95d8df67d82aca03861240512994a597a435a1011ba18dbc7f"}, + {file = "matplotlib-3.7.2-cp39-cp39-win32.whl", hash = "sha256:ce55289d5659b5b12b3db4dc9b7075b70cef5631e56530f14b2945e8836f2d20"}, + {file = "matplotlib-3.7.2-cp39-cp39-win_amd64.whl", hash = "sha256:2ecb5be2b2815431c81dc115667e33da0f5a1bcf6143980d180d09a717c4a12e"}, + {file = "matplotlib-3.7.2-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:fdcd28360dbb6203fb5219b1a5658df226ac9bebc2542a9e8f457de959d713d0"}, + {file = "matplotlib-3.7.2-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0c3cca3e842b11b55b52c6fb8bd6a4088693829acbfcdb3e815fa9b7d5c92c1b"}, + {file = "matplotlib-3.7.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ebf577c7a6744e9e1bd3fee45fc74a02710b214f94e2bde344912d85e0c9af7c"}, + {file = "matplotlib-3.7.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:936bba394682049919dda062d33435b3be211dc3dcaa011e09634f060ec878b2"}, + {file = "matplotlib-3.7.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:bc221ffbc2150458b1cd71cdd9ddd5bb37962b036e41b8be258280b5b01da1dd"}, + {file = "matplotlib-3.7.2-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:35d74ebdb3f71f112b36c2629cf32323adfbf42679e2751252acd468f5001c07"}, + {file = "matplotlib-3.7.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:717157e61b3a71d3d26ad4e1770dc85156c9af435659a25ee6407dc866cb258d"}, + {file = "matplotlib-3.7.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:20f844d6be031948148ba49605c8b96dfe7d3711d1b63592830d650622458c11"}, + {file = "matplotlib-3.7.2.tar.gz", hash = "sha256:a8cdb91dddb04436bd2f098b8fdf4b81352e68cf4d2c6756fcc414791076569b"}, +] + +[package.dependencies] +contourpy = ">=1.0.1" +cycler = ">=0.10" +fonttools = ">=4.22.0" +importlib-resources = {version = ">=3.2.0", markers = "python_version < \"3.10\""} +kiwisolver = ">=1.0.1" +numpy = ">=1.20" +packaging = ">=20.0" +pillow = ">=6.2.0" +pyparsing = ">=2.3.1,<3.1" +python-dateutil = ">=2.7" + +[[package]] +name = "numpy" +version = "1.25.2" +description = "Fundamental package for array computing in Python" +optional = false +python-versions = ">=3.9" +files = [ + {file = "numpy-1.25.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:db3ccc4e37a6873045580d413fe79b68e47a681af8db2e046f1dacfa11f86eb3"}, + {file = "numpy-1.25.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:90319e4f002795ccfc9050110bbbaa16c944b1c37c0baeea43c5fb881693ae1f"}, + {file = "numpy-1.25.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dfe4a913e29b418d096e696ddd422d8a5d13ffba4ea91f9f60440a3b759b0187"}, + {file = "numpy-1.25.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f08f2e037bba04e707eebf4bc934f1972a315c883a9e0ebfa8a7756eabf9e357"}, + {file = "numpy-1.25.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bec1e7213c7cb00d67093247f8c4db156fd03075f49876957dca4711306d39c9"}, + {file = "numpy-1.25.2-cp310-cp310-win32.whl", hash = "sha256:7dc869c0c75988e1c693d0e2d5b26034644399dd929bc049db55395b1379e044"}, + {file = "numpy-1.25.2-cp310-cp310-win_amd64.whl", hash = "sha256:834b386f2b8210dca38c71a6e0f4fd6922f7d3fcff935dbe3a570945acb1b545"}, + {file = "numpy-1.25.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c5462d19336db4560041517dbb7759c21d181a67cb01b36ca109b2ae37d32418"}, + {file = "numpy-1.25.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c5652ea24d33585ea39eb6a6a15dac87a1206a692719ff45d53c5282e66d4a8f"}, + {file = "numpy-1.25.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d60fbae8e0019865fc4784745814cff1c421df5afee233db6d88ab4f14655a2"}, + {file = "numpy-1.25.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60e7f0f7f6d0eee8364b9a6304c2845b9c491ac706048c7e8cf47b83123b8dbf"}, + {file = "numpy-1.25.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:bb33d5a1cf360304754913a350edda36d5b8c5331a8237268c48f91253c3a364"}, + {file = "numpy-1.25.2-cp311-cp311-win32.whl", hash = "sha256:5883c06bb92f2e6c8181df7b39971a5fb436288db58b5a1c3967702d4278691d"}, + {file = "numpy-1.25.2-cp311-cp311-win_amd64.whl", hash = "sha256:5c97325a0ba6f9d041feb9390924614b60b99209a71a69c876f71052521d42a4"}, + {file = "numpy-1.25.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b79e513d7aac42ae918db3ad1341a015488530d0bb2a6abcbdd10a3a829ccfd3"}, + {file = "numpy-1.25.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:eb942bfb6f84df5ce05dbf4b46673ffed0d3da59f13635ea9b926af3deb76926"}, + {file = "numpy-1.25.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e0746410e73384e70d286f93abf2520035250aad8c5714240b0492a7302fdca"}, + {file = "numpy-1.25.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7806500e4f5bdd04095e849265e55de20d8cc4b661b038957354327f6d9b295"}, + {file = "numpy-1.25.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8b77775f4b7df768967a7c8b3567e309f617dd5e99aeb886fa14dc1a0791141f"}, + {file = "numpy-1.25.2-cp39-cp39-win32.whl", hash = "sha256:2792d23d62ec51e50ce4d4b7d73de8f67a2fd3ea710dcbc8563a51a03fb07b01"}, + {file = "numpy-1.25.2-cp39-cp39-win_amd64.whl", hash = "sha256:76b4115d42a7dfc5d485d358728cdd8719be33cc5ec6ec08632a5d6fca2ed380"}, + {file = "numpy-1.25.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1a1329e26f46230bf77b02cc19e900db9b52f398d6722ca853349a782d4cff55"}, + {file = "numpy-1.25.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c3abc71e8b6edba80a01a52e66d83c5d14433cbcd26a40c329ec7ed09f37901"}, + {file = "numpy-1.25.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:1b9735c27cea5d995496f46a8b1cd7b408b3f34b6d50459d9ac8fe3a20cc17bf"}, + {file = "numpy-1.25.2.tar.gz", hash = "sha256:fd608e19c8d7c55021dffd43bfe5492fab8cc105cc8986f813f8c3c048b38760"}, +] + +[[package]] +name = "packaging" +version = "23.1" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.7" +files = [ + {file = "packaging-23.1-py3-none-any.whl", hash = "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61"}, + {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"}, +] + +[[package]] +name = "pandas" +version = "2.1.0" +description = "Powerful data structures for data analysis, time series, and statistics" +optional = false +python-versions = ">=3.9" +files = [ + {file = "pandas-2.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:40dd20439ff94f1b2ed55b393ecee9cb6f3b08104c2c40b0cb7186a2f0046242"}, + {file = "pandas-2.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d4f38e4fedeba580285eaac7ede4f686c6701a9e618d8a857b138a126d067f2f"}, + {file = "pandas-2.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6e6a0fe052cf27ceb29be9429428b4918f3740e37ff185658f40d8702f0b3e09"}, + {file = "pandas-2.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d81e1813191070440d4c7a413cb673052b3b4a984ffd86b8dd468c45742d3cc"}, + {file = "pandas-2.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:eb20252720b1cc1b7d0b2879ffc7e0542dd568f24d7c4b2347cb035206936421"}, + {file = "pandas-2.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:38f74ef7ebc0ffb43b3d633e23d74882bce7e27bfa09607f3c5d3e03ffd9a4a5"}, + {file = "pandas-2.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cda72cc8c4761c8f1d97b169661f23a86b16fdb240bdc341173aee17e4d6cedd"}, + {file = "pandas-2.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d97daeac0db8c993420b10da4f5f5b39b01fc9ca689a17844e07c0a35ac96b4b"}, + {file = "pandas-2.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8c58b1113892e0c8078f006a167cc210a92bdae23322bb4614f2f0b7a4b510f"}, + {file = "pandas-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:629124923bcf798965b054a540f9ccdfd60f71361255c81fa1ecd94a904b9dd3"}, + {file = "pandas-2.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:70cf866af3ab346a10debba8ea78077cf3a8cd14bd5e4bed3d41555a3280041c"}, + {file = "pandas-2.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:d53c8c1001f6a192ff1de1efe03b31a423d0eee2e9e855e69d004308e046e694"}, + {file = "pandas-2.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:86f100b3876b8c6d1a2c66207288ead435dc71041ee4aea789e55ef0e06408cb"}, + {file = "pandas-2.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:28f330845ad21c11db51e02d8d69acc9035edfd1116926ff7245c7215db57957"}, + {file = "pandas-2.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9a6ccf0963db88f9b12df6720e55f337447aea217f426a22d71f4213a3099a6"}, + {file = "pandas-2.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d99e678180bc59b0c9443314297bddce4ad35727a1a2656dbe585fd78710b3b9"}, + {file = "pandas-2.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b31da36d376d50a1a492efb18097b9101bdbd8b3fbb3f49006e02d4495d4c644"}, + {file = "pandas-2.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:0164b85937707ec7f70b34a6c3a578dbf0f50787f910f21ca3b26a7fd3363437"}, + {file = "pandas-2.1.0.tar.gz", hash = "sha256:62c24c7fc59e42b775ce0679cfa7b14a5f9bfb7643cfbe708c960699e05fb918"}, +] + +[package.dependencies] +numpy = [ + {version = ">=1.22.4", markers = "python_version < \"3.11\""}, + {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, +] +python-dateutil = ">=2.8.2" +pytz = ">=2020.1" +tzdata = ">=2022.1" + +[package.extras] +all = ["PyQt5 (>=5.15.6)", "SQLAlchemy (>=1.4.36)", "beautifulsoup4 (>=4.11.1)", "bottleneck (>=1.3.4)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=0.8.1)", "fsspec (>=2022.05.0)", "gcsfs (>=2022.05.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.8.0)", "matplotlib (>=3.6.1)", "numba (>=0.55.2)", "numexpr (>=2.8.0)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pandas-gbq (>=0.17.5)", "psycopg2 (>=2.9.3)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.5)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)", "pyxlsb (>=1.0.9)", "qtpy (>=2.2.0)", "s3fs (>=2022.05.0)", "scipy (>=1.8.1)", "tables (>=3.7.0)", "tabulate (>=0.8.10)", "xarray (>=2022.03.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)", "zstandard (>=0.17.0)"] +aws = ["s3fs (>=2022.05.0)"] +clipboard = ["PyQt5 (>=5.15.6)", "qtpy (>=2.2.0)"] +compression = ["zstandard (>=0.17.0)"] +computation = ["scipy (>=1.8.1)", "xarray (>=2022.03.0)"] +consortium-standard = ["dataframe-api-compat (>=0.1.7)"] +excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pyxlsb (>=1.0.9)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)"] +feather = ["pyarrow (>=7.0.0)"] +fss = ["fsspec (>=2022.05.0)"] +gcp = ["gcsfs (>=2022.05.0)", "pandas-gbq (>=0.17.5)"] +hdf5 = ["tables (>=3.7.0)"] +html = ["beautifulsoup4 (>=4.11.1)", "html5lib (>=1.1)", "lxml (>=4.8.0)"] +mysql = ["SQLAlchemy (>=1.4.36)", "pymysql (>=1.0.2)"] +output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.8.10)"] +parquet = ["pyarrow (>=7.0.0)"] +performance = ["bottleneck (>=1.3.4)", "numba (>=0.55.2)", "numexpr (>=2.8.0)"] +plot = ["matplotlib (>=3.6.1)"] +postgresql = ["SQLAlchemy (>=1.4.36)", "psycopg2 (>=2.9.3)"] +spss = ["pyreadstat (>=1.1.5)"] +sql-other = ["SQLAlchemy (>=1.4.36)"] +test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)"] +xml = ["lxml (>=4.8.0)"] + +[[package]] +name = "patsy" +version = "0.5.3" +description = "A Python package for describing statistical models and for building design matrices." +optional = false +python-versions = "*" +files = [ + {file = "patsy-0.5.3-py2.py3-none-any.whl", hash = "sha256:7eb5349754ed6aa982af81f636479b1b8db9d5b1a6e957a6016ec0534b5c86b7"}, + {file = "patsy-0.5.3.tar.gz", hash = "sha256:bdc18001875e319bc91c812c1eb6a10be4bb13cb81eb763f466179dca3b67277"}, +] + +[package.dependencies] +numpy = ">=1.4" +six = "*" + +[package.extras] +test = ["pytest", "pytest-cov", "scipy"] + +[[package]] +name = "pillow" +version = "10.0.0" +description = "Python Imaging Library (Fork)" +optional = false +python-versions = ">=3.8" +files = [ + {file = "Pillow-10.0.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:1f62406a884ae75fb2f818694469519fb685cc7eaff05d3451a9ebe55c646891"}, + {file = "Pillow-10.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d5db32e2a6ccbb3d34d87c87b432959e0db29755727afb37290e10f6e8e62614"}, + {file = "Pillow-10.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edf4392b77bdc81f36e92d3a07a5cd072f90253197f4a52a55a8cec48a12483b"}, + {file = "Pillow-10.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:520f2a520dc040512699f20fa1c363eed506e94248d71f85412b625026f6142c"}, + {file = "Pillow-10.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:8c11160913e3dd06c8ffdb5f233a4f254cb449f4dfc0f8f4549eda9e542c93d1"}, + {file = "Pillow-10.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:a74ba0c356aaa3bb8e3eb79606a87669e7ec6444be352870623025d75a14a2bf"}, + {file = "Pillow-10.0.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d5d0dae4cfd56969d23d94dc8e89fb6a217be461c69090768227beb8ed28c0a3"}, + {file = "Pillow-10.0.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:22c10cc517668d44b211717fd9775799ccec4124b9a7f7b3635fc5386e584992"}, + {file = "Pillow-10.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:dffe31a7f47b603318c609f378ebcd57f1554a3a6a8effbc59c3c69f804296de"}, + {file = "Pillow-10.0.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:9fb218c8a12e51d7ead2a7c9e101a04982237d4855716af2e9499306728fb485"}, + {file = "Pillow-10.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d35e3c8d9b1268cbf5d3670285feb3528f6680420eafe35cccc686b73c1e330f"}, + {file = "Pillow-10.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ed64f9ca2f0a95411e88a4efbd7a29e5ce2cea36072c53dd9d26d9c76f753b3"}, + {file = "Pillow-10.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b6eb5502f45a60a3f411c63187db83a3d3107887ad0d036c13ce836f8a36f1d"}, + {file = "Pillow-10.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:c1fbe7621c167ecaa38ad29643d77a9ce7311583761abf7836e1510c580bf3dd"}, + {file = "Pillow-10.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:cd25d2a9d2b36fcb318882481367956d2cf91329f6892fe5d385c346c0649629"}, + {file = "Pillow-10.0.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:3b08d4cc24f471b2c8ca24ec060abf4bebc6b144cb89cba638c720546b1cf538"}, + {file = "Pillow-10.0.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d737a602fbd82afd892ca746392401b634e278cb65d55c4b7a8f48e9ef8d008d"}, + {file = "Pillow-10.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:3a82c40d706d9aa9734289740ce26460a11aeec2d9c79b7af87bb35f0073c12f"}, + {file = "Pillow-10.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:bc2ec7c7b5d66b8ec9ce9f720dbb5fa4bace0f545acd34870eff4a369b44bf37"}, + {file = "Pillow-10.0.0-cp312-cp312-macosx_10_10_x86_64.whl", hash = "sha256:d80cf684b541685fccdd84c485b31ce73fc5c9b5d7523bf1394ce134a60c6883"}, + {file = "Pillow-10.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:76de421f9c326da8f43d690110f0e79fe3ad1e54be811545d7d91898b4c8493e"}, + {file = "Pillow-10.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81ff539a12457809666fef6624684c008e00ff6bf455b4b89fd00a140eecd640"}, + {file = "Pillow-10.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce543ed15570eedbb85df19b0a1a7314a9c8141a36ce089c0a894adbfccb4568"}, + {file = "Pillow-10.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:685ac03cc4ed5ebc15ad5c23bc555d68a87777586d970c2c3e216619a5476223"}, + {file = "Pillow-10.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:d72e2ecc68a942e8cf9739619b7f408cc7b272b279b56b2c83c6123fcfa5cdff"}, + {file = "Pillow-10.0.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d50b6aec14bc737742ca96e85d6d0a5f9bfbded018264b3b70ff9d8c33485551"}, + {file = "Pillow-10.0.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:00e65f5e822decd501e374b0650146063fbb30a7264b4d2744bdd7b913e0cab5"}, + {file = "Pillow-10.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:f31f9fdbfecb042d046f9d91270a0ba28368a723302786c0009ee9b9f1f60199"}, + {file = "Pillow-10.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:1ce91b6ec08d866b14413d3f0bbdea7e24dfdc8e59f562bb77bc3fe60b6144ca"}, + {file = "Pillow-10.0.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:349930d6e9c685c089284b013478d6f76e3a534e36ddfa912cde493f235372f3"}, + {file = "Pillow-10.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3a684105f7c32488f7153905a4e3015a3b6c7182e106fe3c37fbb5ef3e6994c3"}, + {file = "Pillow-10.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b4f69b3700201b80bb82c3a97d5e9254084f6dd5fb5b16fc1a7b974260f89f43"}, + {file = "Pillow-10.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f07ea8d2f827d7d2a49ecf1639ec02d75ffd1b88dcc5b3a61bbb37a8759ad8d"}, + {file = "Pillow-10.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:040586f7d37b34547153fa383f7f9aed68b738992380ac911447bb78f2abe530"}, + {file = "Pillow-10.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:f88a0b92277de8e3ca715a0d79d68dc82807457dae3ab8699c758f07c20b3c51"}, + {file = "Pillow-10.0.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:c7cf14a27b0d6adfaebb3ae4153f1e516df54e47e42dcc073d7b3d76111a8d86"}, + {file = "Pillow-10.0.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:3400aae60685b06bb96f99a21e1ada7bc7a413d5f49bce739828ecd9391bb8f7"}, + {file = "Pillow-10.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:dbc02381779d412145331789b40cc7b11fdf449e5d94f6bc0b080db0a56ea3f0"}, + {file = "Pillow-10.0.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:9211e7ad69d7c9401cfc0e23d49b69ca65ddd898976d660a2fa5904e3d7a9baa"}, + {file = "Pillow-10.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:faaf07ea35355b01a35cb442dd950d8f1bb5b040a7787791a535de13db15ed90"}, + {file = "Pillow-10.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c9f72a021fbb792ce98306ffb0c348b3c9cb967dce0f12a49aa4c3d3fdefa967"}, + {file = "Pillow-10.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f7c16705f44e0504a3a2a14197c1f0b32a95731d251777dcb060aa83022cb2d"}, + {file = "Pillow-10.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:76edb0a1fa2b4745fb0c99fb9fb98f8b180a1bbceb8be49b087e0b21867e77d3"}, + {file = "Pillow-10.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:368ab3dfb5f49e312231b6f27b8820c823652b7cd29cfbd34090565a015e99ba"}, + {file = "Pillow-10.0.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:608bfdee0d57cf297d32bcbb3c728dc1da0907519d1784962c5f0c68bb93e5a3"}, + {file = "Pillow-10.0.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5c6e3df6bdd396749bafd45314871b3d0af81ff935b2d188385e970052091017"}, + {file = "Pillow-10.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:7be600823e4c8631b74e4a0d38384c73f680e6105a7d3c6824fcf226c178c7e6"}, + {file = "Pillow-10.0.0-pp310-pypy310_pp73-macosx_10_10_x86_64.whl", hash = "sha256:92be919bbc9f7d09f7ae343c38f5bb21c973d2576c1d45600fce4b74bafa7ac0"}, + {file = "Pillow-10.0.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f8182b523b2289f7c415f589118228d30ac8c355baa2f3194ced084dac2dbba"}, + {file = "Pillow-10.0.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:38250a349b6b390ee6047a62c086d3817ac69022c127f8a5dc058c31ccef17f3"}, + {file = "Pillow-10.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:88af2003543cc40c80f6fca01411892ec52b11021b3dc22ec3bc9d5afd1c5334"}, + {file = "Pillow-10.0.0-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:c189af0545965fa8d3b9613cfdb0cd37f9d71349e0f7750e1fd704648d475ed2"}, + {file = "Pillow-10.0.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce7b031a6fc11365970e6a5686d7ba8c63e4c1cf1ea143811acbb524295eabed"}, + {file = "Pillow-10.0.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:db24668940f82321e746773a4bc617bfac06ec831e5c88b643f91f122a785684"}, + {file = "Pillow-10.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:efe8c0681042536e0d06c11f48cebe759707c9e9abf880ee213541c5b46c5bf3"}, + {file = "Pillow-10.0.0.tar.gz", hash = "sha256:9c82b5b3e043c7af0d95792d0d20ccf68f61a1fec6b3530e718b688422727396"}, +] + +[package.extras] +docs = ["furo", "olefile", "sphinx (>=2.4)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinx-removed-in", "sphinxext-opengraph"] +tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"] + +[[package]] +name = "pyparsing" +version = "3.0.9" +description = "pyparsing module - Classes and methods to define and execute parsing grammars" +optional = false +python-versions = ">=3.6.8" +files = [ + {file = "pyparsing-3.0.9-py3-none-any.whl", hash = "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"}, + {file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"}, +] + +[package.extras] +diagrams = ["jinja2", "railroad-diagrams"] + +[[package]] +name = "python-dateutil" +version = "2.8.2" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ + {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, + {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, +] + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "pytz" +version = "2023.3" +description = "World timezone definitions, modern and historical" +optional = false +python-versions = "*" +files = [ + {file = "pytz-2023.3-py2.py3-none-any.whl", hash = "sha256:a151b3abb88eda1d4e34a9814df37de2a80e301e68ba0fd856fb9b46bfbbbffb"}, + {file = "pytz-2023.3.tar.gz", hash = "sha256:1d8ce29db189191fb55338ee6d0387d82ab59f3d00eac103412d64e0ebd0c588"}, +] + +[[package]] +name = "reportlab" +version = "4.0.4" +description = "The Reportlab Toolkit" +optional = false +python-versions = ">=3.7,<4" +files = [ + {file = "reportlab-4.0.4-py3-none-any.whl", hash = "sha256:3dcda79ce04baf70721e2ec54854722644262cac2feec3d5c4c5e77015504cb0"}, + {file = "reportlab-4.0.4.tar.gz", hash = "sha256:7f70b3b56aff5f11cb4136c51a0f5a56fe6e4c8fbbac7b903076db99a8ef31c1"}, +] + +[package.dependencies] +pillow = ">=9.0.0" + +[package.extras] +accel = ["rl-accel (>=0.9.0,<1.1)"] +pycairo = ["freetype-py (>=2.3.0,<2.4)", "rlPyCairo (>=0.2.0,<1)"] +renderpm = ["rl-renderPM (>=4.0.3,<4.1)"] + +[[package]] +name = "scikit-learn" +version = "1.3.0" +description = "A set of python modules for machine learning and data mining" +optional = false +python-versions = ">=3.8" +files = [ + {file = "scikit-learn-1.3.0.tar.gz", hash = "sha256:8be549886f5eda46436b6e555b0e4873b4f10aa21c07df45c4bc1735afbccd7a"}, + {file = "scikit_learn-1.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:981287869e576d42c682cf7ca96af0c6ac544ed9316328fd0d9292795c742cf5"}, + {file = "scikit_learn-1.3.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:436aaaae2c916ad16631142488e4c82f4296af2404f480e031d866863425d2a2"}, + {file = "scikit_learn-1.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c7e28d8fa47a0b30ae1bd7a079519dd852764e31708a7804da6cb6f8b36e3630"}, + {file = "scikit_learn-1.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ae80c08834a473d08a204d966982a62e11c976228d306a2648c575e3ead12111"}, + {file = "scikit_learn-1.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:552fd1b6ee22900cf1780d7386a554bb96949e9a359999177cf30211e6b20df6"}, + {file = "scikit_learn-1.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:79970a6d759eb00a62266a31e2637d07d2d28446fca8079cf9afa7c07b0427f8"}, + {file = "scikit_learn-1.3.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:850a00b559e636b23901aabbe79b73dc604b4e4248ba9e2d6e72f95063765603"}, + {file = "scikit_learn-1.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ee04835fb016e8062ee9fe9074aef9b82e430504e420bff51e3e5fffe72750ca"}, + {file = "scikit_learn-1.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d953531f5d9f00c90c34fa3b7d7cfb43ecff4c605dac9e4255a20b114a27369"}, + {file = "scikit_learn-1.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:151ac2bf65ccf363664a689b8beafc9e6aae36263db114b4ca06fbbbf827444a"}, + {file = "scikit_learn-1.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6a885a9edc9c0a341cab27ec4f8a6c58b35f3d449c9d2503a6fd23e06bbd4f6a"}, + {file = "scikit_learn-1.3.0-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:9877af9c6d1b15486e18a94101b742e9d0d2f343d35a634e337411ddb57783f3"}, + {file = "scikit_learn-1.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c470f53cea065ff3d588050955c492793bb50c19a92923490d18fcb637f6383a"}, + {file = "scikit_learn-1.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd6e2d7389542eae01077a1ee0318c4fec20c66c957f45c7aac0c6eb0fe3c612"}, + {file = "scikit_learn-1.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:3a11936adbc379a6061ea32fa03338d4ca7248d86dd507c81e13af428a5bc1db"}, + {file = "scikit_learn-1.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:998d38fcec96584deee1e79cd127469b3ad6fefd1ea6c2dfc54e8db367eb396b"}, + {file = "scikit_learn-1.3.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:ded35e810438a527e17623ac6deae3b360134345b7c598175ab7741720d7ffa7"}, + {file = "scikit_learn-1.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e8102d5036e28d08ab47166b48c8d5e5810704daecf3a476a4282d562be9a28"}, + {file = "scikit_learn-1.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7617164951c422747e7c32be4afa15d75ad8044f42e7d70d3e2e0429a50e6718"}, + {file = "scikit_learn-1.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:1d54fb9e6038284548072df22fd34777e434153f7ffac72c8596f2d6987110dd"}, +] + +[package.dependencies] +joblib = ">=1.1.1" +numpy = ">=1.17.3" +scipy = ">=1.5.0" +threadpoolctl = ">=2.0.0" + +[package.extras] +benchmark = ["matplotlib (>=3.1.3)", "memory-profiler (>=0.57.0)", "pandas (>=1.0.5)"] +docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.1.3)", "memory-profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.0.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.16.2)", "seaborn (>=0.9.0)", "sphinx (>=6.0.0)", "sphinx-copybutton (>=0.5.2)", "sphinx-gallery (>=0.10.1)", "sphinx-prompt (>=1.3.0)", "sphinxext-opengraph (>=0.4.2)"] +examples = ["matplotlib (>=3.1.3)", "pandas (>=1.0.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.16.2)", "seaborn (>=0.9.0)"] +tests = ["black (>=23.3.0)", "matplotlib (>=3.1.3)", "mypy (>=1.3)", "numpydoc (>=1.2.0)", "pandas (>=1.0.5)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pytest (>=7.1.2)", "pytest-cov (>=2.9.0)", "ruff (>=0.0.272)", "scikit-image (>=0.16.2)"] + +[[package]] +name = "scipy" +version = "1.11.2" +description = "Fundamental algorithms for scientific computing in Python" +optional = false +python-versions = "<3.13,>=3.9" +files = [ + {file = "scipy-1.11.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2b997a5369e2d30c97995dcb29d638701f8000d04df01b8e947f206e5d0ac788"}, + {file = "scipy-1.11.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:95763fbda1206bec41157582bea482f50eb3702c85fffcf6d24394b071c0e87a"}, + {file = "scipy-1.11.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e367904a0fec76433bf3fbf3e85bf60dae8e9e585ffd21898ab1085a29a04d16"}, + {file = "scipy-1.11.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d690e1ca993c8f7ede6d22e5637541217fc6a4d3f78b3672a6fe454dbb7eb9a7"}, + {file = "scipy-1.11.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d2b813bfbe8dec6a75164523de650bad41f4405d35b0fa24c2c28ae07fcefb20"}, + {file = "scipy-1.11.2-cp310-cp310-win_amd64.whl", hash = "sha256:afdb0d983f6135d50770dd979df50bf1c7f58b5b33e0eb8cf5c73c70600eae1d"}, + {file = "scipy-1.11.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8d9886f44ef8c9e776cb7527fb01455bf4f4a46c455c4682edc2c2cc8cd78562"}, + {file = "scipy-1.11.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:1342ca385c673208f32472830c10110a9dcd053cf0c4b7d4cd7026d0335a6c1d"}, + {file = "scipy-1.11.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b133f237bd8ba73bad51bc12eb4f2d84cbec999753bf25ba58235e9fc2096d80"}, + {file = "scipy-1.11.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3aeb87661de987f8ec56fa6950863994cd427209158255a389fc5aea51fa7055"}, + {file = "scipy-1.11.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:90d3b1364e751d8214e325c371f0ee0dd38419268bf4888b2ae1040a6b266b2a"}, + {file = "scipy-1.11.2-cp311-cp311-win_amd64.whl", hash = "sha256:f73102f769ee06041a3aa26b5841359b1a93cc364ce45609657751795e8f4a4a"}, + {file = "scipy-1.11.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fa4909c6c20c3d91480533cddbc0e7c6d849e7d9ded692918c76ce5964997898"}, + {file = "scipy-1.11.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:ac74b1512d38718fb6a491c439aa7b3605b96b1ed3be6599c17d49d6c60fca18"}, + {file = "scipy-1.11.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b8425fa963a32936c9773ee3ce44a765d8ff67eed5f4ac81dc1e4a819a238ee9"}, + {file = "scipy-1.11.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:542a757e2a6ec409e71df3d8fd20127afbbacb1c07990cb23c5870c13953d899"}, + {file = "scipy-1.11.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ea932570b1c2a30edafca922345854ff2cd20d43cd9123b6dacfdecebfc1a80b"}, + {file = "scipy-1.11.2-cp312-cp312-win_amd64.whl", hash = "sha256:4447ad057d7597476f9862ecbd9285bbf13ba9d73ce25acfa4e4b11c6801b4c9"}, + {file = "scipy-1.11.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b0620240ef445b5ddde52460e6bc3483b7c9c750275369379e5f609a1050911c"}, + {file = "scipy-1.11.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:f28f1f6cfeb48339c192efc6275749b2a25a7e49c4d8369a28b6591da02fbc9a"}, + {file = "scipy-1.11.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:214cdf04bbae7a54784f8431f976704ed607c4bc69ba0d5d5d6a9df84374df76"}, + {file = "scipy-1.11.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10eb6af2f751aa3424762948e5352f707b0dece77288206f227864ddf675aca0"}, + {file = "scipy-1.11.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0f3261f14b767b316d7137c66cc4f33a80ea05841b9c87ad83a726205b901423"}, + {file = "scipy-1.11.2-cp39-cp39-win_amd64.whl", hash = "sha256:2c91cf049ffb5575917f2a01da1da082fd24ed48120d08a6e7297dfcac771dcd"}, + {file = "scipy-1.11.2.tar.gz", hash = "sha256:b29318a5e39bd200ca4381d80b065cdf3076c7d7281c5e36569e99273867f61d"}, +] + +[package.dependencies] +numpy = ">=1.21.6,<1.28.0" + +[package.extras] +dev = ["click", "cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy", "pycodestyle", "pydevtool", "rich-click", "ruff", "types-psutil", "typing_extensions"] +doc = ["jupytext", "matplotlib (>2)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (==0.9.0)", "sphinx (!=4.1.0)", "sphinx-design (>=0.2.0)"] +test = ["asv", "gmpy2", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] + +[[package]] +name = "sklearn" +version = "0.0.post7" +description = "deprecated sklearn package, use scikit-learn instead" +optional = false +python-versions = "*" +files = [ + {file = "sklearn-0.0.post7.tar.gz", hash = "sha256:1c89020b364fdc3aa2839e0ae34e8f0b406669e4b5c2359dda3ac398f9c76874"}, +] + +[[package]] +name = "statsmodels" +version = "0.14.0" +description = "Statistical computations and models for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "statsmodels-0.14.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:16bfe0c96a53b20fa19067e3b6bd2f1d39e30d4891ea0d7bc20734a0ae95942d"}, + {file = "statsmodels-0.14.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5a6a0a1a06ff79be8aa89c8494b33903442859add133f0dda1daf37c3c71682e"}, + {file = "statsmodels-0.14.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77b3cd3a5268ef966a0a08582c591bd29c09c88b4566c892a7c087935234f285"}, + {file = "statsmodels-0.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c64ebe9cf376cba0c31aed138e15ed179a1d128612dd241cdf299d159e5e882"}, + {file = "statsmodels-0.14.0-cp310-cp310-win_amd64.whl", hash = "sha256:fb471f757fc45102a87e5d86e87dc2c8c78b34ad4f203679a46520f1d863b9da"}, + {file = "statsmodels-0.14.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:582f9e41092e342aaa04920d17cc3f97240e3ee198672f194719b5a3d08657d6"}, + {file = "statsmodels-0.14.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7ebe885ccaa64b4bc5ad49ac781c246e7a594b491f08ab4cfd5aa456c363a6f6"}, + {file = "statsmodels-0.14.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b587ee5d23369a0e881da6e37f78371dce4238cf7638a455db4b633a1a1c62d6"}, + {file = "statsmodels-0.14.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ef7fa4813c7a73b0d8a0c830250f021c102c71c95e9fe0d6877bcfb56d38b8c"}, + {file = "statsmodels-0.14.0-cp311-cp311-win_amd64.whl", hash = "sha256:a6ad7b8aadccd4e4dd7f315a07bef1bca41d194eeaf4ec600d20dea02d242fce"}, + {file = "statsmodels-0.14.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3757542c95247e4ab025291a740efa5da91dc11a05990c033d40fce31c450dc9"}, + {file = "statsmodels-0.14.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:de489e3ed315bdba55c9d1554a2e89faa65d212e365ab81bc323fa52681fc60e"}, + {file = "statsmodels-0.14.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76e290f4718177bffa8823a780f3b882d56dd64ad1c18cfb4bc8b5558f3f5757"}, + {file = "statsmodels-0.14.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:71054f9dbcead56def14e3c9db6f66f943110fdfb19713caf0eb0f08c1ec03fd"}, + {file = "statsmodels-0.14.0-cp38-cp38-win_amd64.whl", hash = "sha256:d7fda067837df94e0a614d93d3a38fb6868958d37f7f50afe2a534524f2660cb"}, + {file = "statsmodels-0.14.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1c7724ad573af26139a98393ae64bc318d1b19762b13442d96c7a3e793f495c3"}, + {file = "statsmodels-0.14.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3b0a135f3bfdeec987e36e3b3b4c53e0bb87a8d91464d2fcc4d169d176f46fdb"}, + {file = "statsmodels-0.14.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce28eb1c397dba437ec39b9ab18f2101806f388c7a0cf9cdfd8f09294ad1c799"}, + {file = "statsmodels-0.14.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:68b1c768dd94cc5ba8398121a632b673c625491aa7ed627b82cb4c880a25563f"}, + {file = "statsmodels-0.14.0-cp39-cp39-win_amd64.whl", hash = "sha256:8d1e3e10dfbfcd58119ba5a4d3c7d519182b970a2aebaf0b6f539f55ae16058d"}, + {file = "statsmodels-0.14.0.tar.gz", hash = "sha256:6875c7d689e966d948f15eb816ab5616f4928706b180cf470fd5907ab6f647a4"}, +] + +[package.dependencies] +numpy = [ + {version = ">=1.22.3", markers = "python_version == \"3.10\" and platform_system == \"Windows\" and platform_python_implementation != \"PyPy\""}, + {version = ">=1.18", markers = "python_version != \"3.10\" or platform_system != \"Windows\" or platform_python_implementation == \"PyPy\""}, +] +packaging = ">=21.3" +pandas = ">=1.0" +patsy = ">=0.5.2" +scipy = ">=1.4,<1.9.2 || >1.9.2" + +[package.extras] +build = ["cython (>=0.29.26)"] +develop = ["colorama", "cython (>=0.29.26)", "cython (>=0.29.28,<3.0.0)", "flake8", "isort", "joblib", "matplotlib (>=3)", "oldest-supported-numpy (>=2022.4.18)", "pytest (>=7.0.1,<7.1.0)", "pytest-randomly", "pytest-xdist", "pywinpty", "setuptools-scm[toml] (>=7.0.0,<7.1.0)"] +docs = ["ipykernel", "jupyter-client", "matplotlib", "nbconvert", "nbformat", "numpydoc", "pandas-datareader", "sphinx"] + +[[package]] +name = "threadpoolctl" +version = "3.2.0" +description = "threadpoolctl" +optional = false +python-versions = ">=3.8" +files = [ + {file = "threadpoolctl-3.2.0-py3-none-any.whl", hash = "sha256:2b7818516e423bdaebb97c723f86a7c6b0a83d3f3b0970328d66f4d9104dc032"}, + {file = "threadpoolctl-3.2.0.tar.gz", hash = "sha256:c96a0ba3bdddeaca37dc4cc7344aafad41cdb8c313f74fdfe387a867bba93355"}, +] + +[[package]] +name = "tzdata" +version = "2023.3" +description = "Provider of IANA time zone data" +optional = false +python-versions = ">=2" +files = [ + {file = "tzdata-2023.3-py2.py3-none-any.whl", hash = "sha256:7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda"}, + {file = "tzdata-2023.3.tar.gz", hash = "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a"}, +] + +[[package]] +name = "zipp" +version = "3.16.2" +description = "Backport of pathlib-compatible object wrapper for zip files" +optional = false +python-versions = ">=3.8" +files = [ + {file = "zipp-3.16.2-py3-none-any.whl", hash = "sha256:679e51dd4403591b2d6838a48de3d283f3d188412a9782faadf845f298736ba0"}, + {file = "zipp-3.16.2.tar.gz", hash = "sha256:ebc15946aa78bd63458992fc81ec3b6f7b1e92d51c35e6de1c3804e73b799147"}, +] + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy (>=0.9.1)", "pytest-ruff"] + +[metadata] +lock-version = "2.0" +python-versions = ">=3.9,<3.13" +content-hash = "9fbdb7b4f22806fd518d1cf09bd80d0327d6f00776bd242c61a0ac8e4b5e86c1" diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000000000000000000000000000000000..e2f7caf60c8bb61ba3d9f7a72aee65c6ef1ae3fd --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,26 @@ +[tool.poetry] +name = "phageterm" +version = "4.1.1" +description = "Using sequencing bias to identify phages terminii and phage genome packaging." +authors = ["Marc Monot <marc.monot@pasteur.fr>", + "Julian Garneau <julian.garneau@unil.ch>", + "Veronique Legrand <veronique.legrand@pasteur.fr>"] +license = "AGPLv3+" +readme = "README.md" + +[tool.poetry.dependencies] +python = [">=3.9,<3.13"] +numpy = "^1.25.2" +matplotlib = "^3.7.2" +pandas = "^2.1.0" +reportlab = "^4.0.4" +scipy = "^1.11.2" +statsmodels = "^0.14.0" +sklearn = "^0.0.post7" +scikit-learn = "^1.3.0" + + + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/unit-tests/test_functions_PhageTerm_for_multi.py b/unit-tests/test_functions_PhageTerm_for_multi.py index a7c723dd49a6f2f908cb432fb69d345af7c68dcb..7e84e1181ee9f542068cf86921298d32aaf641e0 100755 --- a/unit-tests/test_functions_PhageTerm_for_multi.py +++ b/unit-tests/test_functions_PhageTerm_for_multi.py @@ -1,4 +1,4 @@ -##@file test_functions_PhageTerm_for_GPU.py +##@file test_functions_PhageTerm_for_multi.py # # # Check that readsCoverage can write its result to a file and that they can be read again to retrieve results.