From 3d00a27da2dc1ae9cb8cb8169cdf8aa6ccfd87fb Mon Sep 17 00:00:00 2001 From: Remi PLANEL <rplanel@pasteur.fr> Date: Wed, 5 Jun 2024 11:30:19 +0200 Subject: [PATCH] Eleos special case --- .../df-wiki-cli/df_wiki_cli/content/main.py | 114 ++++++++++++------ 1 file changed, 76 insertions(+), 38 deletions(-) diff --git a/packages/df-wiki-cli/df_wiki_cli/content/main.py b/packages/df-wiki-cli/df_wiki_cli/content/main.py index 8bef84e4..1882aece 100644 --- a/packages/df-wiki-cli/df_wiki_cli/content/main.py +++ b/packages/df-wiki-cli/df_wiki_cli/content/main.py @@ -304,44 +304,82 @@ def system_operon_structure( in_exchangeables = False current_gene = {} exchangeables = [] - with open(def_path["path"]) as file: - for event, elem in ET.iterparse(file, events=("start", "end")): - if event == "start": - if ( - elem.tag == "gene" - and not in_exchangeables - and elem.attrib["presence"] != "forbidden" - ): - current_gene = { - "system": system, - "subsystem": subsystem, - "gene": elem.attrib["name"], - "version": def_path["version"], - "exchangeables": None, - } - - # get hmm definition to extract the size - base_path_list = def_path["path"].parts[0:-4] - profile_path = ( - Path(*base_path_list) - / f"profiles/{current_gene['gene']}.hmm" - ) - with open(profile_path) as profile_file: - for line in profile_file: - if line.startswith("LENG"): - size = int(re.split(r"\s+", line)[1]) - current_gene["size"] = size - system_genes.append(current_gene) - if elem.tag == "gene" and in_exchangeables: - exchangeables.append(elem.attrib["name"]) - if elem.tag == "exchangeables": - in_exchangeables = True - exchangeables = [] - elif event == "end": - if elem.tag == "exchangeables": - in_exchangeables = False - current_gene["exchangeables"] = ",".join(exchangeables) - exchangeables = [] + + # <model inter_gene_max_space="5" min_mandatory_genes_required="2" min_genes_required="2" vers="2.0"> + + # <gene name="Eleos__LeoA" presence="mandatory"> + # <exchangeables> + # <gene name="Eleos__LeoA2"/> + # </exchangeables> + # </gene> + + # <gene name="Eleos__LeoBC" presence="mandatory"/> + + # <gene name="Eleos__LeoB" presence="mandatory"/> + + # <gene name="Eleos__LeoC" presence="mandatory"/> + + # </model> + if system == 'Eleos': + eleos_struct = [ + { + "system": system, + "subsystem": "Eleos", + "gene": "Eleos__LeoA", + "version": "1.2.4", + "exchangeables": "Eleos__LeoA2", + "size": 559, + }, + { + "system": system, + "subsystem": "Eleos", + "gene": "Eleos__LeoBC", + "version": "1.2.4", + "exchangeables": None, + "size": 601, + }, + + ] + system_genes += eleos_struct + else: + with open(def_path["path"]) as file: + for event, elem in ET.iterparse(file, events=("start", "end")): + if event == "start": + if ( + elem.tag == "gene" + and not in_exchangeables + and elem.attrib["presence"] != "forbidden" + ): + current_gene = { + "system": system, + "subsystem": subsystem, + "gene": elem.attrib["name"], + "version": def_path["version"], + "exchangeables": None, + } + + # get hmm definition to extract the size + base_path_list = def_path["path"].parts[0:-4] + profile_path = ( + Path(*base_path_list) + / f"profiles/{current_gene['gene']}.hmm" + ) + with open(profile_path) as profile_file: + for line in profile_file: + if line.startswith("LENG"): + size = int(re.split(r"\s+", line)[1]) + current_gene["size"] = size + system_genes.append(current_gene) + if elem.tag == "gene" and in_exchangeables: + exchangeables.append(elem.attrib["name"]) + if elem.tag == "exchangeables": + in_exchangeables = True + exchangeables = [] + elif event == "end": + if elem.tag == "exchangeables": + in_exchangeables = False + current_gene["exchangeables"] = ",".join(exchangeables) + exchangeables = [] with open(output, "w") as f: fieldnames = [ -- GitLab