Skip to content
Snippets Groups Projects

List system ms

Merged Remi PLANEL requested to merge list-system-ms into main
5 files
+ 121
4
Compare changes
  • Side-by-side
  • Inline
Files
5
import typer
import sys
import re
import json
import pandas as pd
from pandas.errors import InvalidIndexError
import shutil
from typing_extensions import Annotated
from typing import Optional, List
@@ -111,3 +114,80 @@ def structure(
systemDir = output / system
systemDir.mkdir(parents=True, exist_ok=True)
shutil.copy2(f, systemDir)
@app.command()
def systems(
dir: Annotated[
Path,
typer.Option(exists=False, file_okay=False, readable=True, dir_okay=True),
],
pfam: Annotated[
Path,
typer.Option(
exists=False,
file_okay=True,
writable=True,
),
],
output: Annotated[
Path,
typer.Option(
file_okay=True,
dir_okay=False,
writable=True,
resolve_path=True,
),
],
):
with open(pfam, "r") as pf:
pfam_df = pd.read_csv(pf, index_col="AC", keep_default_na=False)
systems = []
with open(output, "a") as ty:
for file in dir.iterdir():
if file.suffix == ".md":
console.rule(f"[bold blue]{file.name}", style="blue")
with open(file) as f:
metadata, _ = frontmatter.parse(f.read())
del metadata["layout"]
if "tableColumns" in metadata:
table_data = metadata["tableColumns"]
if "PFAM" in table_data:
# print(table_data["PFAM"])
pfams_list = [
pfam.strip()
for pfam in table_data["PFAM"].split(",")
]
pfam_metadata = list()
for pfam in pfams_list:
try:
pfam_obj = pfam_df.loc[[pfam]]
# print(pfam_obj)
pfam_to_dict = pfam_obj.to_dict(orient="index")
pfam_to_dict[pfam]["AC"] = pfam
flatten_value = pfam_to_dict[pfam]
pfam_metadata.append(flatten_value)
except KeyError as err:
console.print(f"[bold red]{err}", style="red")
console.print(
f"[bold red]No pfam entry or {pfam}",
style="red",
)
continue
del metadata["tableColumns"]
if "article" in table_data:
metadata["doi"] = table_data["article"]["doi"]
if "abstract" in table_data["article"]:
metadata["abstract"] = table_data["article"][
"abstract"
]
del table_data["article"]
sanitizedMetadata = {**metadata, **table_data}
sanitizedMetadata["PFAM"] = pfam_metadata
systems.append(sanitizedMetadata)
json_object = json.dumps(systems, indent=2)
ty.write(json_object)
Loading