diff --git a/packages/df-wiki-cli/df_wiki_cli/content/main.py b/packages/df-wiki-cli/df_wiki_cli/content/main.py index 32333d1601245e3c17bec85923cd043c05e5f621..e027a26285db6afbffb0d795b57f24ecb1d5b34a 100644 --- a/packages/df-wiki-cli/df_wiki_cli/content/main.py +++ b/packages/df-wiki-cli/df_wiki_cli/content/main.py @@ -1,6 +1,9 @@ import typer import sys import re +import json +import pandas as pd +from pandas.errors import InvalidIndexError import shutil from typing_extensions import Annotated from typing import Optional, List @@ -111,3 +114,80 @@ def structure( systemDir = output / system systemDir.mkdir(parents=True, exist_ok=True) shutil.copy2(f, systemDir) + + +@app.command() +def systems( + dir: Annotated[ + Path, + typer.Option(exists=False, file_okay=False, readable=True, dir_okay=True), + ], + pfam: Annotated[ + Path, + typer.Option( + exists=False, + file_okay=True, + writable=True, + ), + ], + output: Annotated[ + Path, + typer.Option( + file_okay=True, + dir_okay=False, + writable=True, + resolve_path=True, + ), + ], +): + with open(pfam, "r") as pf: + pfam_df = pd.read_csv(pf, index_col="AC", keep_default_na=False) + systems = [] + with open(output, "a") as ty: + for file in dir.iterdir(): + if file.suffix == ".md": + console.rule(f"[bold blue]{file.name}", style="blue") + with open(file) as f: + metadata, _ = frontmatter.parse(f.read()) + del metadata["layout"] + if "tableColumns" in metadata: + table_data = metadata["tableColumns"] + if "PFAM" in table_data: + # print(table_data["PFAM"]) + pfams_list = [ + pfam.strip() + for pfam in table_data["PFAM"].split(",") + ] + pfam_metadata = list() + for pfam in pfams_list: + try: + pfam_obj = pfam_df.loc[[pfam]] + # print(pfam_obj) + pfam_to_dict = pfam_obj.to_dict(orient="index") + pfam_to_dict[pfam]["AC"] = pfam + flatten_value = pfam_to_dict[pfam] + pfam_metadata.append(flatten_value) + + except KeyError as err: + console.print(f"[bold red]{err}", style="red") + console.print( + f"[bold red]No pfam entry or {pfam}", + style="red", + ) + continue + del metadata["tableColumns"] + + if "article" in table_data: + metadata["doi"] = table_data["article"]["doi"] + if "abstract" in table_data["article"]: + metadata["abstract"] = table_data["article"][ + "abstract" + ] + del table_data["article"] + sanitizedMetadata = {**metadata, **table_data} + + sanitizedMetadata["PFAM"] = pfam_metadata + systems.append(sanitizedMetadata) + + json_object = json.dumps(systems, indent=2) + ty.write(json_object) diff --git a/packages/df-wiki-cli/df_wiki_cli/meilisearch/__init__.py b/packages/df-wiki-cli/df_wiki_cli/meilisearch/__init__.py index ca966fb010773901ed165311c85b8ac49bb708c1..639d80b5764fa054f2ae69f5f7af5e680ab6b210 100644 --- a/packages/df-wiki-cli/df_wiki_cli/meilisearch/__init__.py +++ b/packages/df-wiki-cli/df_wiki_cli/meilisearch/__init__.py @@ -1,7 +1,7 @@ import meilisearch from pathlib import Path import csv - +import json from typing import Annotated, List, Optional from pydantic import BaseModel, Field, BeforeValidator from enum import Enum @@ -175,6 +175,37 @@ def update_structure( index.update_typo_tolerance({"enabled": False}) +def update_systems( + host: str, + key: str, + file: Path, + document: str, +): + client = meilisearch.Client(host, key) + index = client.index(document.lower()) + with open(file, "r") as jsonfile: + json_object = json.load(jsonfile) + tasks = index.add_documents_in_batches(json_object, primary_key="title") + for task in tasks: + print(task) + pagination_settings_task = index.update_pagination_settings( + {"maxTotalHits": 100000} + ) + print(pagination_settings_task) + attr_task = index.update_filterable_attributes( + body=["title", "Sensor", "Activator", "Effector", "PFAM.AC"] + ) + params = { + "maxValuesPerFacet": 1000000, + "sortFacetValuesBy": {"*": "count"}, + } + index.update_faceting_settings(params) + + print(attr_task) + index.update_sortable_attributes(["title", "Sensor", "Activator", "Effector"]) + index.update_typo_tolerance({"enabled": False}) + + def split_on_comma(str_val: str) -> List[str]: for val in str_val.split(","): yield val.strip() diff --git a/packages/df-wiki-cli/df_wiki_cli/meilisearch/main.py b/packages/df-wiki-cli/df_wiki_cli/meilisearch/main.py index 83a141b0e92fa57a0511d6a5bdb39ad9d17ed9c2..858e189d803cc336741dc29fbd0128657cfd1cdc 100644 --- a/packages/df-wiki-cli/df_wiki_cli/meilisearch/main.py +++ b/packages/df-wiki-cli/df_wiki_cli/meilisearch/main.py @@ -2,7 +2,7 @@ import typer import meilisearch from typing_extensions import Annotated from pathlib import Path -from df_wiki_cli.meilisearch import update_refseq, update_structure +from df_wiki_cli.meilisearch import update_refseq, update_structure, update_systems from enum import Enum from types import SimpleNamespace @@ -12,6 +12,7 @@ app = typer.Typer() class Documents(str, Enum): refseq = "refseq" structure = "structure" + systems = "systems" @app.callback() @@ -54,6 +55,8 @@ def update( update_refseq(ctx.obj.host, ctx.obj.key, file, document) if document == "structure": update_structure(ctx.obj.host, ctx.obj.key, file, document) + if document == "systems": + update_systems(ctx.obj.host, ctx.obj.key, file, document) @app.command() diff --git a/packages/df-wiki-cli/df_wiki_cli/pfam/__init__.py b/packages/df-wiki-cli/df_wiki_cli/pfam/__init__.py index 0133e49091e568ae34ee158b3559081093ab977e..9e185b9acd73443233151f71beb7fc3b928c7406 100644 --- a/packages/df-wiki-cli/df_wiki_cli/pfam/__init__.py +++ b/packages/df-wiki-cli/df_wiki_cli/pfam/__init__.py @@ -26,7 +26,10 @@ def fetch_pfam(output): else: kk, v = line.split(" ") g, k = kk.split() - d[pfID][k] = v + if (k == 'AC'): + d[pfID][k] = v.split(".")[0] + else: + d[pfID][k] = v df = pd.DataFrame(d).T df.index.name = "ID" diff --git a/packages/df-wiki-cli/pyproject.toml b/packages/df-wiki-cli/pyproject.toml index 320115fc3a887382a669362e1118c0a5c2e4f373..236029026cd480779937ff757858d6cf7c8ad037 100644 --- a/packages/df-wiki-cli/pyproject.toml +++ b/packages/df-wiki-cli/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "df-wiki-cli" -version = "0.1.2" +version = "0.1.3" description = "" authors = ["Remi PLANEL <rplanel@pasteur.fr>"] readme = "README.md"