From 5ae1603d5a6caa79fe2d5295904a9bda7cdc4b8b Mon Sep 17 00:00:00 2001
From: Remi  PLANEL <rplanel@pasteur.fr>
Date: Thu, 11 Jan 2024 16:19:07 +0100
Subject: [PATCH] script to remove replicon version

---
 .../df-wiki-cli/df_wiki_cli/content/main.py   | 42 +++++++++++++++++--
 1 file changed, 39 insertions(+), 3 deletions(-)

diff --git a/packages/df-wiki-cli/df_wiki_cli/content/main.py b/packages/df-wiki-cli/df_wiki_cli/content/main.py
index 1affaa6b..e516f383 100644
--- a/packages/df-wiki-cli/df_wiki_cli/content/main.py
+++ b/packages/df-wiki-cli/df_wiki_cli/content/main.py
@@ -13,6 +13,7 @@ from pydantic import BaseModel, ValidationError
 import frontmatter
 from enum import Enum
 from rich.console import Console
+import re
 
 console = Console()
 app = typer.Typer()
@@ -216,9 +217,9 @@ def systems(
                             if "article" in table_data:
                                 sanitizedMetadata["doi"] = table_data["article"]["doi"]
                                 if "abstract" in table_data["article"]:
-                                    sanitizedMetadata["abstract"] = table_data["article"][
-                                        "abstract"
-                                    ]
+                                    sanitizedMetadata["abstract"] = table_data[
+                                        "article"
+                                    ]["abstract"]
                                 del table_data["article"]
                             if "PFAM" in table_data:
                                 del table_data["PFAM"]
@@ -241,3 +242,38 @@ def pae2png(tsv_file, png_file):
     plt.tight_layout()
     plt.savefig(png_file, dpi=150, facecolor=None, transparent=True)
     plt.close()
+
+
+@app.command()
+def refseq(
+    input: Annotated[
+        Path,
+        typer.Option(
+            exists=False,
+            file_okay=True,
+            writable=True,
+        ),
+    ],
+    output: Annotated[
+        Path,
+        typer.Option(
+            file_okay=True,
+            dir_okay=False,
+            writable=True,
+            resolve_path=True,
+        ),
+    ],
+):
+    
+    with open(output, "w") as out, open(input, "r") as refseq_f:
+        reader = csv.DictReader(refseq_f)
+        fieldnames = reader.fieldnames
+        writer = csv.DictWriter(out, fieldnames=fieldnames)
+        writer.writeheader()
+        for row in reader:
+            result = re.sub(r"^(\w+)\.\d+(_.*)$", r"\1\2", row["sys_id"])
+            console.print(f"[green]{row['sys_id']} ->  {result}")
+            row["sys_id"] = result
+            writer.writerow(row)
+
+        
\ No newline at end of file
-- 
GitLab