diff --git a/PanACoTA/utils.py b/PanACoTA/utils.py index e49c0b7c5f5111555de436c819db3dbacf2a023f..9d29116a8234a53b73e598628d80a440bf15128f 100755 --- a/PanACoTA/utils.py +++ b/PanACoTA/utils.py @@ -1098,10 +1098,10 @@ def get_genome_contigs_and_rename(gembase_name, gpath, outfile): Returns ------- tuple - - List of all contigs with their original and new name: (list of str) - ["contig1'\t'orig_name1", "contig2'\t'orig_name2" ...] + - Dict of all contigs with their original and new name: (list of str) + {>orig_name: >new_name} - List of all contigs with their size: (list of str) - ["contig1'\t'size1", "contig2'\t'size2" ...] + {"new_name': 'size1"} """ # Initialize variables @@ -1110,9 +1110,9 @@ def get_genome_contigs_and_rename(gembase_name, gpath, outfile): # contig size cont_size = 0 # List of contigs (str) [<name>\t<orig_name>] - contigs = [] + contigs = {} # List of contigs (str) with their sizes [<name>\t<size>] - sizes = [] + sizes = {} # Name of previous contig (to put to contigs, as we need to wait for the next # contig to know the size of the previous one) prev_cont = "" @@ -1133,12 +1133,10 @@ def get_genome_contigs_and_rename(gembase_name, gpath, outfile): # - write header ("<contig name> <size>") to replicon file if prev_cont: cont = "\t".join([prev_cont, str(cont_size)]) + "\n" - sizes.append(cont.strip()) - cor = "\t".join([prev_cont, prev_orig_name]) - contigs.append(cor) + sizes[prev_cont] = cont_size + contigs[prev_cont] = prev_orig_name grf.write(cont) grf.write(seq) - prev_cont = ">" + gembase_name + "." + str(contig_num).zfill(4) prev_orig_name = line.strip() contig_num += 1 @@ -1150,9 +1148,8 @@ def get_genome_contigs_and_rename(gembase_name, gpath, outfile): cont_size += len(line.strip()) # Write last contig cont = "\t".join([prev_cont, str(cont_size)]) + "\n" - sizes.append(cont.strip()) - cor = "\t".join([prev_cont, prev_orig_name]) - contigs.append(cor) + sizes[prev_cont] = cont_size + contigs[prev_cont] = prev_orig_name grf.write(cont) grf.write(seq) return contigs, sizes diff --git a/test/test_unit/test_utils.py b/test/test_unit/test_utils.py index de78a587e5190aa0601c8b2ee6b9139f3f32b4f5..9856a54312792fc5b5fc4e7f0376aed3d117d3ef 100755 --- a/test/test_unit/test_utils.py +++ b/test/test_unit/test_utils.py @@ -907,12 +907,12 @@ def test_rename_contigs(): exp_file = os.path.join(DATA_DIR, "exp_files", "res_H299_H561-ESCO00005.fna") contigs, sizes = utils.get_genome_contigs_and_rename(gembase_name, gpath, outfile) print(sizes) - assert contigs == [">ESCO.0216.00005.0001\t>H561_S27 L001_R1_001_(paired)_contig_1", - ">ESCO.0216.00005.0002\t>H561_S28 L001_R1_001_(paired)_contig_2", - ">ESCO.0216.00005.0003\t>H561_S29 L001_R1_001_(paired)_contig_115"] - assert sizes == [">ESCO.0216.00005.0001\t3480", - ">ESCO.0216.00005.0002\t7080", - ">ESCO.0216.00005.0003\t2583"] + assert contigs == {">ESCO.0216.00005.0001":">H561_S27 L001_R1_001_(paired)_contig_1", + ">ESCO.0216.00005.0002":">H561_S28 L001_R1_001_(paired)_contig_2", + ">ESCO.0216.00005.0003":">H561_S29 L001_R1_001_(paired)_contig_115"} + assert sizes == {">ESCO.0216.00005.0001":3480, + ">ESCO.0216.00005.0002":7080, + ">ESCO.0216.00005.0003":2583} assert utilities.compare_order_content(outfile, exp_file)