Skip to content
Snippets Groups Projects
Commit 66d300bc authored by Amandine  PERRIN's avatar Amandine PERRIN
Browse files

Sequences are now correctly formatted (for prokka cut and no cut, and for prodigal cut)

parent 1d87b4eb
No related branches found
No related tags found
No related merge requests found
...@@ -86,8 +86,6 @@ def analyse_all_genomes(genomes, dbpath, tmp_path, nbn, prodigal_only, logger, q ...@@ -86,8 +86,6 @@ def analyse_all_genomes(genomes, dbpath, tmp_path, nbn, prodigal_only, logger, q
del genomes[gen] del genomes[gen]
if not quiet: if not quiet:
bar.finish() bar.finish()
import sys
sys.exit(0)
def analyse_genome(genome, dbpath, tmp_path, cut, pat, genomes, prodigal_only, logger): def analyse_genome(genome, dbpath, tmp_path, cut, pat, genomes, prodigal_only, logger):
...@@ -126,8 +124,6 @@ def analyse_genome(genome, dbpath, tmp_path, cut, pat, genomes, prodigal_only, l ...@@ -126,8 +124,6 @@ def analyse_genome(genome, dbpath, tmp_path, cut, pat, genomes, prodigal_only, l
bool bool
True if genome analysis went well, False otherwise True if genome analysis went well, False otherwise
""" """
print("***** GENOME genome analyzed:" + genome)
gpath, grespath = get_output_dir(prodigal_only, dbpath, tmp_path, genome, cut, pat) gpath, grespath = get_output_dir(prodigal_only, dbpath, tmp_path, genome, cut, pat)
# Open original sequence file # Open original sequence file
...@@ -145,7 +141,7 @@ def analyse_genome(genome, dbpath, tmp_path, cut, pat, genomes, prodigal_only, l ...@@ -145,7 +141,7 @@ def analyse_genome(genome, dbpath, tmp_path, cut, pat, genomes, prodigal_only, l
# Read each line of original sequence # Read each line of original sequence
for line in genf: for line in genf:
#### NEW CONTIG #### NEW CONTIG
# If line corresponding to a new contig # Line corresponding to a new contig
if line.startswith(">"): if line.startswith(">"):
# If not first contig, write info to output file (of needed) # If not first contig, write info to output file (of needed)
if cur_seq != "": if cur_seq != "":
...@@ -161,11 +157,13 @@ def analyse_genome(genome, dbpath, tmp_path, cut, pat, genomes, prodigal_only, l ...@@ -161,11 +157,13 @@ def analyse_genome(genome, dbpath, tmp_path, cut, pat, genomes, prodigal_only, l
# If prokka, contig name is 1st word, 1st 15 characters # If prokka, contig name is 1st word, 1st 15 characters
else: else:
cur_contig_name = line.split()[0][:15] cur_contig_name = line.split()[0][:15]
# Initialize for next contig
cur_seq = "" cur_seq = ""
# #### SEQUENCE LINE # #### SEQUENCE LINE
# If sequence line, keep it, all in upper case # If sequence line, keep it, all in upper case
else: else:
cur_seq += line.strip().upper() + "\n" # Add this line without \n to sequence (1 sequence per line)
cur_seq += line.strip().upper()
# LAST CONTIG # LAST CONTIG
if cur_contig_name != "": if cur_contig_name != "":
...@@ -189,7 +187,6 @@ def analyse_genome(genome, dbpath, tmp_path, cut, pat, genomes, prodigal_only, l ...@@ -189,7 +187,6 @@ def analyse_genome(genome, dbpath, tmp_path, cut, pat, genomes, prodigal_only, l
# If we wrote a new sequence file, close it # If we wrote a new sequence file, close it
if grespath: if grespath:
gresf.close() gresf.close()
print(genomes)
return True return True
...@@ -277,16 +274,18 @@ def format_contig(cut, pat, cur_seq, cur_contig_name, contig_sizes, gresf, num, ...@@ -277,16 +274,18 @@ def format_contig(cut, pat, cur_seq, cur_contig_name, contig_sizes, gresf, num,
bool bool
True if contig has been written without problem, False if any problem True if contig has been written without problem, False if any problem
""" """
# "CUT" if cut: need to cut at each 'pat' -> write new header + seq in new file # "CUT" if cut: need to cut at each 'pat' -> write new header + seq in new file
if cut: if cut:
# Cut sequence and write header + sequence to res file # Cut sequence and write header + sequence to res file
num = split_contig(pat, cur_seq, cur_contig_name, contig_sizes, gresf, num) num = split_contig(pat, cur_seq, cur_contig_name, contig_sizes, gresf, num)
# PROKKA User does not want to cut, but will annotate with prokka # PROKKA User does not want to cut, but will annotate with prokka, so we still
# have to create a new sequence file
elif gresf: elif gresf:
gresf.write("{}_{}\n".format(cur_contig_name, num)) new_contig_name = "{}_{}\n".format(cur_contig_name, num)
gresf.write(cur_seq) gresf.write(new_contig_name)
contig_sizes[cur_contig_name] = len(cur_seq) gresf.write(cur_seq + "\n")
contig_sizes[new_contig_name] = len(cur_seq)
num += 1
# PRODIGAL No cut, and prodigal used -> no new file created, but check # PRODIGAL No cut, and prodigal used -> no new file created, but check
# contig unique names # contig unique names
else: else:
...@@ -333,9 +332,9 @@ def split_contig(pat, whole_seq, cur_contig_name, contig_sizes, gresf, num): ...@@ -333,9 +332,9 @@ def split_contig(pat, whole_seq, cur_contig_name, contig_sizes, gresf, num):
# we get empty contigs, if 2 occurrences of the pattern are side by side). # we get empty contigs, if 2 occurrences of the pattern are side by side).
if len(seq) == 0: if len(seq) == 0:
continue continue
cur_name = cur_contig_name + "_" + str(num) new_contig_name = "{}_{}\n".format(cur_contig_name, num)
contig_sizes[cur_name] = len(seq) contig_sizes[cur_name] = len(seq)
gresf.write(cur_name + "\n") gresf.write(new_contig_name)
gresf.write(seq + "\n") gresf.write(seq + "\n")
num += 1 num += 1
return num return num
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment