Skip to content
Snippets Groups Projects
Commit c9989ba1 authored by Remi  PLANEL's avatar Remi PLANEL
Browse files

parse df models to get list gene per system and subsystem

parent 16410613
No related branches found
No related tags found
No related merge requests found
Pipeline #128038 failed
...@@ -16,6 +16,8 @@ from rich.console import Console ...@@ -16,6 +16,8 @@ from rich.console import Console
import re import re
import requests import requests
from Bio.PDB import PDBParser, MMCIFIO from Bio.PDB import PDBParser, MMCIFIO
import tarfile
import xml.etree.ElementTree as ET
console = Console() console = Console()
app = typer.Typer() app = typer.Typer()
...@@ -241,6 +243,79 @@ def systems( ...@@ -241,6 +243,79 @@ def systems(
ty.write(json_object) ty.write(json_object)
@app.command()
def system_operon_structure(
version: Annotated[str, typer.Option(help="Defense finder model")] = "1.2.4",
):
# get defense finder model from github
# https://github.com/mdmparis/defense-finder-models/releases/download/1.2.4/defense-finder-models-v1.2.4.tar.gz
df_model_url = f"https://github.com/mdmparis/defense-finder-models/releases/download/{version}/defense-finder-models-v{version}.tar.gz"
_, tmp_path = tempfile.mkstemp()
tmp_root_dir = tempfile.gettempdir()
df_model_dir = Path(f"{tmp_root_dir}/defense-finder-models-v{version}")
df_model_definitions_dir = df_model_dir / "defense-finder-models" / "definitions"
console.print(f"Download models: {df_model_url}")
# response = requests.get(
# df_model_url,
# allow_redirects=True,
# )
# with open(tmp_path, mode="wb") as file:
# file.write(response.content)
# console.print("untar file")
# with tarfile.open(tmp_path) as archive:
# archive.extractall(df_model_dir)
# # extract foreach system and subsystem list genes
# set the order
system_genes = []
for child in df_model_definitions_dir.iterdir():
for system_path in child.iterdir():
system = system_path.name
console.rule(system)
subsystem_list = (
s for s in system_path.iterdir() if str(s).endswith(".xml")
)
for subsystem in subsystem_list:
susbsystem_name = subsystem.stem
console.print(susbsystem_name)
with open(subsystem) as file:
tree = ET.parse(file)
root = tree.getroot()
current_gene = {}
for child in root.iter():
if child.tag == "gene":
current_gene = {
"system": system,
"subsystem": susbsystem_name,
"gene": child.attrib["name"],
"alternatives": None,
}
system_genes.append(current_gene)
if child.tag == "exchangeables":
alternatives = []
for ex_gene in child.iter():
# console.rule("exchangeables")
# console.print(ex_gene.attrib)
if ex_gene.tag == "gene":
console.print()
alternatives.append(ex_gene.attrib["name"])
current_gene["alternatives"] = ",".join(alternatives)
current_gene = {}
print(current_gene)
# print(child.tag, child.attrib)
# print(system_genes)
with open("/tmp/log", "w") as f:
fieldnames = ["system", "subsystem", "gene", "alternatives"]
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
for gene in system_genes:
# gene["alternatives"] = ",".join(gene["alternatives"])
writer.writerow(gene)
TMP_CIF = """ TMP_CIF = """
# #
loop_ loop_
...@@ -511,7 +586,7 @@ def markdown( ...@@ -511,7 +586,7 @@ def markdown(
with open(dst, "r+") as f: with open(dst, "r+") as f:
all_file = f.read() all_file = f.read()
if ( if (
re.search( re.search(
r"#{2}\s+Structure", r"#{2}\s+Structure",
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment