From 5ae1603d5a6caa79fe2d5295904a9bda7cdc4b8b Mon Sep 17 00:00:00 2001 From: Remi PLANEL <rplanel@pasteur.fr> Date: Thu, 11 Jan 2024 16:19:07 +0100 Subject: [PATCH] script to remove replicon version --- .../df-wiki-cli/df_wiki_cli/content/main.py | 42 +++++++++++++++++-- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/packages/df-wiki-cli/df_wiki_cli/content/main.py b/packages/df-wiki-cli/df_wiki_cli/content/main.py index 1affaa6b..e516f383 100644 --- a/packages/df-wiki-cli/df_wiki_cli/content/main.py +++ b/packages/df-wiki-cli/df_wiki_cli/content/main.py @@ -13,6 +13,7 @@ from pydantic import BaseModel, ValidationError import frontmatter from enum import Enum from rich.console import Console +import re console = Console() app = typer.Typer() @@ -216,9 +217,9 @@ def systems( if "article" in table_data: sanitizedMetadata["doi"] = table_data["article"]["doi"] if "abstract" in table_data["article"]: - sanitizedMetadata["abstract"] = table_data["article"][ - "abstract" - ] + sanitizedMetadata["abstract"] = table_data[ + "article" + ]["abstract"] del table_data["article"] if "PFAM" in table_data: del table_data["PFAM"] @@ -241,3 +242,38 @@ def pae2png(tsv_file, png_file): plt.tight_layout() plt.savefig(png_file, dpi=150, facecolor=None, transparent=True) plt.close() + + +@app.command() +def refseq( + input: Annotated[ + Path, + typer.Option( + exists=False, + file_okay=True, + writable=True, + ), + ], + output: Annotated[ + Path, + typer.Option( + file_okay=True, + dir_okay=False, + writable=True, + resolve_path=True, + ), + ], +): + + with open(output, "w") as out, open(input, "r") as refseq_f: + reader = csv.DictReader(refseq_f) + fieldnames = reader.fieldnames + writer = csv.DictWriter(out, fieldnames=fieldnames) + writer.writeheader() + for row in reader: + result = re.sub(r"^(\w+)\.\d+(_.*)$", r"\1\2", row["sys_id"]) + console.print(f"[green]{row['sys_id']} -> {result}") + row["sys_id"] = result + writer.writerow(row) + + \ No newline at end of file -- GitLab