Skip to content
Snippets Groups Projects
Commit 5ae1603d authored by Remi  PLANEL's avatar Remi PLANEL
Browse files

script to remove replicon version

parent ca016a9e
No related branches found
No related tags found
2 merge requests!203Foldseek pages,!186Refactor facet autocomplete
Pipeline #120599 passed
This commit is part of merge request !186. Comments created here will be created in the context of that merge request.
......@@ -13,6 +13,7 @@ from pydantic import BaseModel, ValidationError
import frontmatter
from enum import Enum
from rich.console import Console
import re
console = Console()
app = typer.Typer()
......@@ -216,9 +217,9 @@ def systems(
if "article" in table_data:
sanitizedMetadata["doi"] = table_data["article"]["doi"]
if "abstract" in table_data["article"]:
sanitizedMetadata["abstract"] = table_data["article"][
"abstract"
]
sanitizedMetadata["abstract"] = table_data[
"article"
]["abstract"]
del table_data["article"]
if "PFAM" in table_data:
del table_data["PFAM"]
......@@ -241,3 +242,38 @@ def pae2png(tsv_file, png_file):
plt.tight_layout()
plt.savefig(png_file, dpi=150, facecolor=None, transparent=True)
plt.close()
@app.command()
def refseq(
input: Annotated[
Path,
typer.Option(
exists=False,
file_okay=True,
writable=True,
),
],
output: Annotated[
Path,
typer.Option(
file_okay=True,
dir_okay=False,
writable=True,
resolve_path=True,
),
],
):
with open(output, "w") as out, open(input, "r") as refseq_f:
reader = csv.DictReader(refseq_f)
fieldnames = reader.fieldnames
writer = csv.DictWriter(out, fieldnames=fieldnames)
writer.writeheader()
for row in reader:
result = re.sub(r"^(\w+)\.\d+(_.*)$", r"\1\2", row["sys_id"])
console.print(f"[green]{row['sys_id']} -> {result}")
row["sys_id"] = result
writer.writerow(row)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment