diff --git a/PanACoTA/align_module/pan_to_pergenome.py b/PanACoTA/align_module/pan_to_pergenome.py
index 86d20b50603783ef2e532204283d936c9c8edf8b..1975c9cacaf3119a2472b57a0cbe2b6cec1de2a3 100755
--- a/PanACoTA/align_module/pan_to_pergenome.py
+++ b/PanACoTA/align_module/pan_to_pergenome.py
@@ -108,7 +108,7 @@ def get_per_genome(persgen, list_gen, dname, outdir):
 
     # Sort proteins by strain
     logger.info("Reading PersGenome and constructing lists of missing genomes in each family.")
-    all_prots, fam_genomes, several = proteins_per_strain(persgen)
+    all_prots, fam_genomes, several = proteins_per_strain(persgen, all_genomes)
     # Write output files
     write_getentry_files(all_prots, several, listdir, aldir, dname, all_genomes)
     write_missing_genomes(fam_genomes, several, all_genomes, aldir, dname)
@@ -139,7 +139,7 @@ def get_all_genomes(list_gen):
     return all_genomes
 
 
-def proteins_per_strain(persgen):
+def proteins_per_strain(persgen, all_genomes):
     """
     From the persistentGenome file, get all persistent proteins, and classify them
     according to the strain from which they are.
@@ -154,33 +154,36 @@ def proteins_per_strain(persgen):
     (all_prots, fam_genomes, several) : tuple
 
         * all_prots: dict, {strain: {member: fam_num}}
-        * fam_genomes: dict, {fam_num: [genomes having a member in fam]}
-        * several: dict, {fam_num: [genomes having several members in fam]}
+        * fam_genomes: dict, {fam_num: set(genomes having a member in fam)}
+        * several: dict, {fam_num: set(genomes having several members in fam)}
     """
     logger.info("Getting all persistent proteins and classify by strain.")
+    all_genomes = frozenset(all_genomes)
     all_prots = {}  # {strain: {member: fam_num}}
-    fam_genomes = {}  # {fam_num: [genomes having a member in fam]}
-    several = {}  # {fam_num: [genomes having several members in fam]}
+    fam_genomes = {}  # {fam_num: set(genomes having a member in fam)}
+    several = {}  # {fam_num: set(genomes having several members in fam)}
     with open(persgen, "r") as pgf:
         for line in pgf:
             fam_num = line.split()[0]
             members = line.strip().split()[1:]
-            fam_genomes[fam_num] = []
-            several[fam_num] = []
+            fam_genomes[fam_num] = set()
+            several[fam_num] = set()
             for mem in members:
                 # if format is ESCO.1512.00001.i0002_12124, strain is 3 first fields
                 # separated by '.'
                 if "." in mem and len(mem.split(".")) >= 3:
                     strain = ".".join(mem.split(".")[:3])
+                    if strain not in all_genomes:
+                        strain = "_".join(mem.split("_")[:-1])
                 # if format is not like this, it must be something_00001:
                 else:
                     strain = "_".join(mem.split("_")[:-1])
                 # if strain not already in fam_genomes, add it
                 if strain not in fam_genomes[fam_num]:
-                    fam_genomes[fam_num].append(strain)
+                    fam_genomes[fam_num].add(strain)
                 # If strain already in fam_genomes, it has several members: add it to several
                 elif strain not in several[fam_num]:
-                    several[fam_num].append(strain)
+                    several[fam_num].add(strain)
                 if strain not in all_prots:
                     all_prots[strain] = {}
                 if mem in all_prots[strain]:
@@ -221,9 +224,9 @@ def write_getentry_files(all_prots, several, listdir, aldir, dname, all_genomes)
             error.append(strain)
     if error:
         for gen in error:
-            logger.error(("There is not any protein for genome {} in any family! "
-                          "The program will close, please fix this problem to be able to "
-                          "run the alignments").format(gen))
+            logger.error(f"There is not any protein for genome {gen} in any family! "
+                         "The program will close, please fix this problem to be able to "
+                         "run the alignments")
         sys.exit(1)
 
 
diff --git a/test/test_unit/test_align/test_pan2pergenome.py b/test/test_unit/test_align/test_pan2pergenome.py
index 58e60c5c309e1904b6b8564310ea0d50549c9d6e..12b59dfd0dce9749ccc532c0fcda361224ae2352 100755
--- a/test/test_unit/test_align/test_pan2pergenome.py
+++ b/test/test_unit/test_align/test_pan2pergenome.py
@@ -100,27 +100,29 @@ def test_get_per_genome(caplog):
     assert set(fams) == set(exp_fams)
 
 
-def test_prot_per_strain():
+def test_prot_per_strain_gembase():
     """
-    Test parser of persistent genome file
+    Test parser of persistent genome file when genome names are in gembase format 
+    -> genome files = ESCO.0421.00001.prt and prot names = ESCO.0421.00001.0001i_00001
     """
     pers = os.path.join("test", "data", "persgenome", "exp_files", "exp_pers-floor-mixed.txt")
-    all_prots, fams_genomes, several = p2p.proteins_per_strain(pers)
-    exp_several = {'1': ["GENO.1216.00002"],
-                   '3': [],
-                   '5': [],
-                   '8': [],
-                   '10': [],
-                   '11': [],
-                   '12': ["GENO.1216.00003"]}
+    all_genomes = ["GEN4.1111.00001", "GENO.0817.00001", "GENO.1216.00002", "GENO.1216.00003"]
+    all_prots, fams_genomes, several = p2p.proteins_per_strain(pers, all_genomes)
+    exp_several = {'1': {"GENO.1216.00002"},
+                   '3': set(),
+                   '5': set(),
+                   '8': set(),
+                   '10': set(),
+                   '11': set(),
+                   '12': {"GENO.1216.00003"}}
     assert several == exp_several
-    exp_fams = {'1': ["GEN4.1111.00001", "GENO.0817.00001", "GENO.1216.00002", "GENO.1216.00003"],
-                '3': ["GEN4.1111.00001", "GENO.0817.00001", "GENO.1216.00002", "GENO.1216.00003"],
-                '5': ["GEN4.1111.00001", "GENO.0817.00001", "GENO.1216.00002", "GENO.1216.00003"],
-                '8': ["GEN4.1111.00001", "GENO.0817.00001", "GENO.1216.00002"],
-                '10': ["GEN4.1111.00001", "GENO.0817.00001", "GENO.1216.00002"],
-                '11': ["GEN4.1111.00001", "GENO.0817.00001", "GENO.1216.00002"],
-                '12': ["GEN4.1111.00001", "GENO.0817.00001", "GENO.1216.00002", "GENO.1216.00003"]}
+    exp_fams = {'1': {"GEN4.1111.00001", "GENO.0817.00001", "GENO.1216.00002", "GENO.1216.00003"},
+                '3': {"GEN4.1111.00001", "GENO.0817.00001", "GENO.1216.00002", "GENO.1216.00003"},
+                '5': {"GEN4.1111.00001", "GENO.0817.00001", "GENO.1216.00002", "GENO.1216.00003"},
+                '8': {"GEN4.1111.00001", "GENO.0817.00001", "GENO.1216.00002"},
+                '10': {"GEN4.1111.00001", "GENO.0817.00001", "GENO.1216.00002"},
+                '11': {"GEN4.1111.00001", "GENO.0817.00001", "GENO.1216.00002"},
+                '12': {"GEN4.1111.00001", "GENO.0817.00001", "GENO.1216.00002", "GENO.1216.00003"}}
     assert fams_genomes == exp_fams
     exp_prots = {"GEN4.1111.00001": {"GEN4.1111.00001.b0001_00001": '1',
                                      "GEN4.1111.00001.b0001_00009": '3',
@@ -157,18 +159,118 @@ def test_prot_per_strain():
     assert all_prots == exp_prots
 
 
+def test_prot_per_strain_completeg():
+    """
+    Test parser of persistent genome file when genome names are in gembase complete genomes format 
+    -> genome files = ESCO.0421.00001.C001.prt and prot names = ESCO.0421.00001.C001_00001
+    """
+    pers = os.path.join(TESTPATH, "pers_genome_complete-gembase.txt")
+    all_genomes = ["GEN4.1111.00001.C001", "GEN4.1111.00001.P001", "GENO.1216.00002.C002", "GENO.1216.00003.C001"]
+    all_prots, fams_genomes, several = p2p.proteins_per_strain(pers, all_genomes)
+    exp_several = {'1': {"GENO.1216.00002.C002"},
+                   '3': set(),
+                   '5': set(),
+                   '8': set(),
+                   '10': set(),
+                   '11': set(),
+                   '12': {"GENO.1216.00003.C001"}}
+    assert several == exp_several
+    exp_fams = {'1': {"GEN4.1111.00001.C001", "GEN4.1111.00001.P001", "GENO.1216.00002.C002", "GENO.1216.00003.C001"},
+                '3': {"GEN4.1111.00001.C001", "GEN4.1111.00001.P001", "GENO.1216.00002.C002", "GENO.1216.00003.C001"},
+                '5': {"GEN4.1111.00001.C001", "GEN4.1111.00001.P001", "GENO.1216.00002.C002", "GENO.1216.00003.C001"},
+                '8': {"GEN4.1111.00001.C001", "GEN4.1111.00001.P001", "GENO.1216.00002.C002"},
+                '10': {"GEN4.1111.00001.C001", "GEN4.1111.00001.P001", "GENO.1216.00002.C002"},
+                '11': {"GEN4.1111.00001.C001", "GEN4.1111.00001.P001", "GENO.1216.00002.C002"},
+                '12': {"GEN4.1111.00001.C001", "GEN4.1111.00001.P001", "GENO.1216.00002.C002", "GENO.1216.00003.C001"}}
+    assert fams_genomes == exp_fams
+    exp_prots = {"GEN4.1111.00001.C001": {"GEN4.1111.00001.C001_00001": '1',
+                                         "GEN4.1111.00001.C001_00009": '3',
+                                         "GEN4.1111.00001.C001_00002": '5',
+                                         "GEN4.1111.00001.C001_00007": '8',
+                                         "GEN4.1111.00001.C001_00004": '10',
+                                         "GEN4.1111.00001.C001_00005": '11',
+                                         "GEN4.1111.00001.C001_00008": '12'
+                                         },
+                 "GEN4.1111.00001.P001": {"GEN4.1111.00001.P001_00002": '1',
+                                         "GEN4.1111.00001.P001_00011": '3',
+                                         "GEN4.1111.00001.P001_00003": '5',
+                                         "GEN4.1111.00001.P001_00009": '8',
+                                         "GEN4.1111.00001.P001_00004": '10',
+                                         "GEN4.1111.00001.P001_00005": '11',
+                                         "GEN4.1111.00001.P001_00010": '12'
+                                         },
+                 "GENO.1216.00002.C002": {"GENO.1216.00002.C002_00001": '1',
+                                         "GENO.1216.00002.C002_00002": '1',
+                                         "GENO.1216.00002.C002_00010": '3',
+                                         "GENO.1216.00002.C002_00003": '5',
+                                         "GENO.1216.00002.C002_00008": '8',
+                                         "GENO.1216.00002.C002_00005": '10',
+                                         "GENO.1216.00002.C002_00006": '11',
+                                         "GENO.1216.00002.C002_00009": '12'
+                                         },
+                 "GENO.1216.00003.C001": {"GENO.1216.00003.C001_00003": '1',
+                                         "GENO.1216.00003.C001_01010": '3',
+                                         "GENO.1216.00003.C001_00010": '5',
+                                         "GENO.1216.00003.C001_00004": '12',
+                                         "GENO.1216.00003.C001_01000": '12'
+                                         }
+                 }
+    assert all_prots == exp_prots
+
+
+def test_prot_per_strain_noformat():
+    """
+    Test parser of persistent genome file when genome names are anything
+    -> genome files = my.genome-name.prt and prot names = my.genome-name_00001
+    """
+    pers = os.path.join(TESTPATH, "pers_genome_noformat.txt")
+    all_genomes = ["my.genome-name", "my-genome", "genome1", "genome3"]
+    all_prots, fams_genomes, several = p2p.proteins_per_strain(pers, all_genomes)
+    exp_several = {'1': {"genome1"},
+                   '5': set(),
+                   '12':set(),
+                   '32': set()}
+    assert several == exp_several
+    exp_fams = {'1': {"my.genome-name", "my-genome", "genome1", "genome3"},
+                '5': {"my.genome-name", "my-genome", "genome3"},
+                '12': {"my.genome-name", "my-genome", "genome1"},
+                '32': {"my.genome-name", "my-genome", "genome1", "genome3"}}
+    assert fams_genomes == exp_fams
+    exp_prots = {"my.genome-name": {"my.genome-name_1": '1',
+                                     "my.genome-name_2": '5',
+                                     "my.genome-name_3": '12',
+                                     "my.genome-name_4": '32'
+                                     },
+                 "my-genome": {"my-genome_1": '1',
+                              "my-genome_2": '5',
+                              "my-genome_3": '12',
+                              "my-genome_50": '32'
+                              },
+                 "genome1": {"genome1_1": '1', "genome1_10": "1",
+                             "genome1_3": '12',
+                             "genome1_4": '32',
+                             },
+                 "genome3": {"genome3_1": '1',
+                             "genome3_3": '5',
+                             "genome3_4": '32',
+                            }
+                 }
+    assert all_prots == exp_prots
+
+
 def test_prot_per_strain_member_bis(caplog):
     """
     Test parser of persistent genome file when a same member is in 2 different families
     """
     caplog.set_level(logging.DEBUG)
     pers = os.path.join(TESTPATH, "pers_genome_member-bis.txt")
-    all_prots, fams_genomes, several = p2p.proteins_per_strain(pers)
+    all_genomes = ["t", "j"]
+    all_prots, fams_genomes, several = p2p.proteins_per_strain(pers, all_genomes)
     assert "problem: ESCO2_2 already exists, in family 5. Conflict with family 32" in caplog.text
-    exp_several = {'1': [], '5': [], '12': [], '32': []}
+    exp_several = {'1': set(), '5': set(), '12': set(), '32': set()}
     assert several == exp_several
-    exp_fams = {'1': ["ESCO_1", "ESCO2", "ESCO3", "ESCO4"], '5': ["ESCO_1", "ESCO2", "ESCO4"],
-                '12': ["ESCO_1", "ESCO2", "ESCO3"], '32': ["ESCO_1", "ESCO2", "ESCO3", "ESCO4"]}
+    exp_fams = {'1': {"ESCO_1", "ESCO2", "ESCO3", "ESCO4"}, '5': {"ESCO_1", "ESCO2", "ESCO4"},
+                '12': {"ESCO_1", "ESCO2", "ESCO3"}, '32': {"ESCO_1", "ESCO2", "ESCO3", "ESCO4"}}
     assert fams_genomes == exp_fams
     exp_prots = {"ESCO_1": {"ESCO_1_1": '1', "ESCO_1_2": '5', "ESCO_1_3": '12', "ESCO_1_4": '32'},
                  "ESCO2": {"ESCO2_1": '1', "ESCO2_2": '32', "ESCO2_3": '12'},
@@ -177,6 +279,34 @@ def test_prot_per_strain_member_bis(caplog):
     assert all_prots == exp_prots
 
 
+def test_prot_per_strain_gembase_member_bis(caplog):
+    """
+    Test parser of persistent genome file when a same member is in 2 different families, with gembase format
+    """
+    caplog.set_level(logging.DEBUG)
+    pers = os.path.join(TESTPATH, "pers_genome_gembase_member-bis.txt")
+    all_genomes = ["GEN4.1111.00001", "GENO.0817.00001", "GENO.1216.00002", "GENO.1216.00003"]
+    all_prots, fams_genomes, several = p2p.proteins_per_strain(pers, all_genomes)
+    assert "problem: GEN4.1111.00001.0001i_00005 already exists, in family 12. Conflict with family 32" in caplog.text
+    exp_several = {'1': {"GENO.1216.00002"}, '5': set(), '12': set(), '32': {"GENO.1216.00003"}}
+    assert several == exp_several
+    exp_fams = {'1': {"GEN4.1111.00001", "GENO.0817.00001", "GENO.1216.00002", "GENO.1216.00003"}, 
+                '5': {"GEN4.1111.00001", "GENO.0817.00001", "GENO.1216.00002", "GENO.1216.00003"},
+                '12': {"GEN4.1111.00001", "GENO.0817.00001", "GENO.1216.00002"},
+                '32': {"GEN4.1111.00001", "GENO.0817.00001", "GENO.1216.00002", "GENO.1216.00003"}}
+    assert fams_genomes == exp_fams
+    exp_prots = {"GEN4.1111.00001": {"GEN4.1111.00001.0001b_00001": '1', "GEN4.1111.00001.0001b_00009": '5', 
+                                     "GEN4.1111.00001.0001i_00005": '12', "GEN4.1111.00001.0001i_00005": '32'},
+                 "GENO.0817.00001": {"GENO.0817.00001.0001b_00002": '1', "GENO.0817.00001.0002b_00011": '5', 
+                                     "GENO.0817.00001.0002i_00005": '12', "GENO.0817.00001.i0002_00010": '32'},
+                 "GENO.1216.00002": {"GENO.1216.00002.0001b_00001": '1', "GENO.1216.00002.i0001_00002": "1", 
+                                     "GENO.1216.00002.0002b_00010": '5', "GENO.1216.00002.0001i_00006": '12',
+                                     "GENO.1216.00002.b0002_00009": '32'},
+                 "GENO.1216.00003": {"GENO.1216.00003.i0001_00003": '1', "GENO.1216.00003.i0001_01010": '5', 
+                                     "GENO.1216.00003.i0001_00004": '32', "GENO.1216.00003.i0001_01000": '32'}}
+    assert all_prots == exp_prots
+
+
 def test_get_genomes():
     """
     Test parser of list of genomes