diff --git a/PanACoTA/align_module/__init__.py b/PanACoTA/align_module/__init__.py index 494d571b2adf7bf322b7fce86cf3059084074b7f..abc8c3e44e9fd9fc344377a67b01e2a5ab70c0ed 100755 --- a/PanACoTA/align_module/__init__.py +++ b/PanACoTA/align_module/__init__.py @@ -1,3 +1,35 @@ +# ############################################################################### +# This file is part of PanACOTA. # +# # +# Authors: Amandine Perrin # +# Copyright © 2018-2020 Institut Pasteur (Paris). # +# See the COPYRIGHT file for details. # +# # +# PanACOTA is a software providing tools for large scale bacterial comparative # +# genomics. From a set of complete and/or draft genomes, you can: # +# - Do a quality control of your strains, to eliminate poor quality # +# genomes, which would not give any information for the comparative study # +# - Uniformly annotate all genomes # +# - Do a Pan-genome # +# - Do a Core or Persistent genome # +# - Align all Core/Persistent families # +# - Infer a phylogenetic tree from the Core/Persistent families # +# # +# PanACOTA is free software: you can redistribute it and/or modify it under the # +# terms of the Affero GNU General Public License as published by the Free # +# Software Foundation, either version 3 of the License, or (at your option) # +# any later version. # +# # +# PanACOTA is distributed in the hope that it will be useful, but WITHOUT ANY # +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # +# FOR A PARTICULAR PURPOSE. See the Affero GNU General Public License # +# for more details. # +# # +# You should have received a copy of the Affero GNU General Public License # +# along with PanACOTA (COPYING file). # +# If not, see <https://www.gnu.org/licenses/>. # +# ############################################################################### + """ PanACoTA align submodule """ diff --git a/PanACoTA/align_module/alignment.py b/PanACoTA/align_module/alignment.py index 2ea7c08ae40f130c2d3cd6c6b0633a7aa768ca8f..a34c0c9b2bda3302b77c80d4b18bc35ef0c38e85 100755 --- a/PanACoTA/align_module/alignment.py +++ b/PanACoTA/align_module/alignment.py @@ -1,6 +1,38 @@ #!/usr/bin/env python3 # coding: utf-8 +# ############################################################################### +# This file is part of PanACOTA. # +# # +# Authors: Amandine Perrin # +# Copyright © 2018-2020 Institut Pasteur (Paris). # +# See the COPYRIGHT file for details. # +# # +# PanACOTA is a software providing tools for large scale bacterial comparative # +# genomics. From a set of complete and/or draft genomes, you can: # +# - Do a quality control of your strains, to eliminate poor quality # +# genomes, which would not give any information for the comparative study # +# - Uniformly annotate all genomes # +# - Do a Pan-genome # +# - Do a Core or Persistent genome # +# - Align all Core/Persistent families # +# - Infer a phylogenetic tree from the Core/Persistent families # +# # +# PanACOTA is free software: you can redistribute it and/or modify it under the # +# terms of the Affero GNU General Public License as published by the Free # +# Software Foundation, either version 3 of the License, or (at your option) # +# any later version. # +# # +# PanACOTA is distributed in the hope that it will be useful, but WITHOUT ANY # +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # +# FOR A PARTICULAR PURPOSE. See the Affero GNU General Public License # +# for more details. # +# # +# You should have received a copy of the Affero GNU General Public License # +# along with PanACOTA (COPYING file). # +# If not, see <https://www.gnu.org/licenses/>. # +# ############################################################################### + """ For a given family: diff --git a/PanACoTA/align_module/get_seqs.py b/PanACoTA/align_module/get_seqs.py index 15da584b579eb4b49a3c88db8ebc709d8b17b03f..dd7d849f15df5a11eae2c81835b05f0d05476d07 100755 --- a/PanACoTA/align_module/get_seqs.py +++ b/PanACoTA/align_module/get_seqs.py @@ -1,6 +1,38 @@ #!/usr/bin/env python3 # coding: utf-8 +# ############################################################################### +# This file is part of PanACOTA. # +# # +# Authors: Amandine Perrin # +# Copyright © 2018-2020 Institut Pasteur (Paris). # +# See the COPYRIGHT file for details. # +# # +# PanACOTA is a software providing tools for large scale bacterial comparative # +# genomics. From a set of complete and/or draft genomes, you can: # +# - Do a quality control of your strains, to eliminate poor quality # +# genomes, which would not give any information for the comparative study # +# - Uniformly annotate all genomes # +# - Do a Pan-genome # +# - Do a Core or Persistent genome # +# - Align all Core/Persistent families # +# - Infer a phylogenetic tree from the Core/Persistent families # +# # +# PanACOTA is free software: you can redistribute it and/or modify it under the # +# terms of the Affero GNU General Public License as published by the Free # +# Software Foundation, either version 3 of the License, or (at your option) # +# any later version. # +# # +# PanACOTA is distributed in the hope that it will be useful, but WITHOUT ANY # +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # +# FOR A PARTICULAR PURPOSE. See the Affero GNU General Public License # +# for more details. # +# # +# You should have received a copy of the Affero GNU General Public License # +# along with PanACOTA (COPYING file). # +# If not, see <https://www.gnu.org/licenses/>. # +# ############################################################################### + import sys import os import logging @@ -225,7 +257,7 @@ def extract_sequences(to_extract, fasf, files_todo=None, outf=None): # State machine variables previous_fp = None - + for line in fasf: if line[0] == '>': # Close previous file if needed diff --git a/PanACoTA/align_module/pan_to_pergenome.py b/PanACoTA/align_module/pan_to_pergenome.py index 7d5a48aa509a352dd3d597ced6c82066d0b5995b..86d20b50603783ef2e532204283d936c9c8edf8b 100755 --- a/PanACoTA/align_module/pan_to_pergenome.py +++ b/PanACoTA/align_module/pan_to_pergenome.py @@ -1,6 +1,38 @@ #!/usr/bin/env python3 # coding: utf-8 +# ############################################################################### +# This file is part of PanACOTA. # +# # +# Authors: Amandine Perrin # +# Copyright © 2018-2020 Institut Pasteur (Paris). # +# See the COPYRIGHT file for details. # +# # +# PanACOTA is a software providing tools for large scale bacterial comparative # +# genomics. From a set of complete and/or draft genomes, you can: # +# - Do a quality control of your strains, to eliminate poor quality # +# genomes, which would not give any information for the comparative study # +# - Uniformly annotate all genomes # +# - Do a Pan-genome # +# - Do a Core or Persistent genome # +# - Align all Core/Persistent families # +# - Infer a phylogenetic tree from the Core/Persistent families # +# # +# PanACOTA is free software: you can redistribute it and/or modify it under the # +# terms of the Affero GNU General Public License as published by the Free # +# Software Foundation, either version 3 of the License, or (at your option) # +# any later version. # +# # +# PanACOTA is distributed in the hope that it will be useful, but WITHOUT ANY # +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # +# FOR A PARTICULAR PURPOSE. See the Affero GNU General Public License # +# for more details. # +# # +# You should have received a copy of the Affero GNU General Public License # +# along with PanACOTA (COPYING file). # +# If not, see <https://www.gnu.org/licenses/>. # +# ############################################################################### + """ From the Persistent Genome file, group all persistent proteins per genome, in order to be able to extract them faster after. diff --git a/PanACoTA/align_module/post_align.py b/PanACoTA/align_module/post_align.py index 6b9a7e4a6f3948c51b530c0cba8f100a57294c33..c0149a34dba8b05415506fe07f1f95a4b3f61088 100755 --- a/PanACoTA/align_module/post_align.py +++ b/PanACoTA/align_module/post_align.py @@ -1,6 +1,38 @@ #!/usr/bin/env python3 # coding: utf-8 +# ############################################################################### +# This file is part of PanACOTA. # +# # +# Authors: Amandine Perrin # +# Copyright © 2018-2020 Institut Pasteur (Paris). # +# See the COPYRIGHT file for details. # +# # +# PanACOTA is a software providing tools for large scale bacterial comparative # +# genomics. From a set of complete and/or draft genomes, you can: # +# - Do a quality control of your strains, to eliminate poor quality # +# genomes, which would not give any information for the comparative study # +# - Uniformly annotate all genomes # +# - Do a Pan-genome # +# - Do a Core or Persistent genome # +# - Align all Core/Persistent families # +# - Infer a phylogenetic tree from the Core/Persistent families # +# # +# PanACOTA is free software: you can redistribute it and/or modify it under the # +# terms of the Affero GNU General Public License as published by the Free # +# Software Foundation, either version 3 of the License, or (at your option) # +# any later version. # +# # +# PanACOTA is distributed in the hope that it will be useful, but WITHOUT ANY # +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # +# FOR A PARTICULAR PURPOSE. See the Affero GNU General Public License # +# for more details. # +# # +# You should have received a copy of the Affero GNU General Public License # +# along with PanACOTA (COPYING file). # +# If not, see <https://www.gnu.org/licenses/>. # +# ############################################################################### + """ Concatenate all alignment files of all families. Then, group alignments by genome.