diff --git a/packages/df-wiki-cli/df_wiki_cli/content/main.py b/packages/df-wiki-cli/df_wiki_cli/content/main.py index 1dc0c5474e86093bf8b585ad71671124bd5ca439..c0331f2801a4ba4c961960983a8a003c281a3883 100644 --- a/packages/df-wiki-cli/df_wiki_cli/content/main.py +++ b/packages/df-wiki-cli/df_wiki_cli/content/main.py @@ -16,6 +16,8 @@ from rich.console import Console import re import requests from Bio.PDB import PDBParser, MMCIFIO +import tarfile +import xml.etree.ElementTree as ET console = Console() app = typer.Typer() @@ -241,6 +243,79 @@ def systems( ty.write(json_object) +@app.command() +def system_operon_structure( + version: Annotated[str, typer.Option(help="Defense finder model")] = "1.2.4", +): + + # get defense finder model from github + + # https://github.com/mdmparis/defense-finder-models/releases/download/1.2.4/defense-finder-models-v1.2.4.tar.gz + df_model_url = f"https://github.com/mdmparis/defense-finder-models/releases/download/{version}/defense-finder-models-v{version}.tar.gz" + _, tmp_path = tempfile.mkstemp() + tmp_root_dir = tempfile.gettempdir() + df_model_dir = Path(f"{tmp_root_dir}/defense-finder-models-v{version}") + df_model_definitions_dir = df_model_dir / "defense-finder-models" / "definitions" + console.print(f"Download models: {df_model_url}") + # response = requests.get( + # df_model_url, + # allow_redirects=True, + # ) + # with open(tmp_path, mode="wb") as file: + # file.write(response.content) + + # console.print("untar file") + # with tarfile.open(tmp_path) as archive: + # archive.extractall(df_model_dir) + # # extract foreach system and subsystem list genes + # set the order + system_genes = [] + for child in df_model_definitions_dir.iterdir(): + for system_path in child.iterdir(): + system = system_path.name + console.rule(system) + subsystem_list = ( + s for s in system_path.iterdir() if str(s).endswith(".xml") + ) + for subsystem in subsystem_list: + susbsystem_name = subsystem.stem + console.print(susbsystem_name) + with open(subsystem) as file: + tree = ET.parse(file) + root = tree.getroot() + current_gene = {} + for child in root.iter(): + if child.tag == "gene": + current_gene = { + "system": system, + "subsystem": susbsystem_name, + "gene": child.attrib["name"], + "alternatives": None, + } + system_genes.append(current_gene) + if child.tag == "exchangeables": + alternatives = [] + for ex_gene in child.iter(): + # console.rule("exchangeables") + # console.print(ex_gene.attrib) + if ex_gene.tag == "gene": + console.print() + alternatives.append(ex_gene.attrib["name"]) + current_gene["alternatives"] = ",".join(alternatives) + current_gene = {} + print(current_gene) + + # print(child.tag, child.attrib) + # print(system_genes) + with open("/tmp/log", "w") as f: + fieldnames = ["system", "subsystem", "gene", "alternatives"] + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() + for gene in system_genes: + # gene["alternatives"] = ",".join(gene["alternatives"]) + writer.writerow(gene) + + TMP_CIF = """ # loop_ @@ -511,7 +586,7 @@ def markdown( with open(dst, "r+") as f: all_file = f.read() - + if ( re.search( r"#{2}\s+Structure",