Skip to content
Snippets Groups Projects
Commit ac32dcc0 authored by Remi  PLANEL's avatar Remi PLANEL
Browse files

df-wiki-cli: can create a systems index in meilisearch

parent 47f443b8
No related branches found
No related tags found
1 merge request!161List system ms
Pipeline #118732 passed
This commit is part of merge request !161. Comments created here will be created in the context of that merge request.
import typer import typer
import sys import sys
import re import re
import json
import pandas as pd
from pandas.errors import InvalidIndexError
import shutil import shutil
from typing_extensions import Annotated from typing_extensions import Annotated
from typing import Optional, List from typing import Optional, List
...@@ -111,3 +114,80 @@ def structure( ...@@ -111,3 +114,80 @@ def structure(
systemDir = output / system systemDir = output / system
systemDir.mkdir(parents=True, exist_ok=True) systemDir.mkdir(parents=True, exist_ok=True)
shutil.copy2(f, systemDir) shutil.copy2(f, systemDir)
@app.command()
def systems(
dir: Annotated[
Path,
typer.Option(exists=False, file_okay=False, readable=True, dir_okay=True),
],
pfam: Annotated[
Path,
typer.Option(
exists=False,
file_okay=True,
writable=True,
),
],
output: Annotated[
Path,
typer.Option(
file_okay=True,
dir_okay=False,
writable=True,
resolve_path=True,
),
],
):
with open(pfam, "r") as pf:
pfam_df = pd.read_csv(pf, index_col="AC", keep_default_na=False)
systems = []
with open(output, "a") as ty:
for file in dir.iterdir():
if file.suffix == ".md":
console.rule(f"[bold blue]{file.name}", style="blue")
with open(file) as f:
metadata, _ = frontmatter.parse(f.read())
del metadata["layout"]
if "tableColumns" in metadata:
table_data = metadata["tableColumns"]
if "PFAM" in table_data:
# print(table_data["PFAM"])
pfams_list = [
pfam.strip()
for pfam in table_data["PFAM"].split(",")
]
pfam_metadata = list()
for pfam in pfams_list:
try:
pfam_obj = pfam_df.loc[[pfam]]
# print(pfam_obj)
pfam_to_dict = pfam_obj.to_dict(orient="index")
pfam_to_dict[pfam]["AC"] = pfam
flatten_value = pfam_to_dict[pfam]
pfam_metadata.append(flatten_value)
except KeyError as err:
console.print(f"[bold red]{err}", style="red")
console.print(
f"[bold red]No pfam entry or {pfam}",
style="red",
)
continue
del metadata["tableColumns"]
if "article" in table_data:
metadata["doi"] = table_data["article"]["doi"]
if "abstract" in table_data["article"]:
metadata["abstract"] = table_data["article"][
"abstract"
]
del table_data["article"]
sanitizedMetadata = {**metadata, **table_data}
sanitizedMetadata["PFAM"] = pfam_metadata
systems.append(sanitizedMetadata)
json_object = json.dumps(systems, indent=2)
ty.write(json_object)
import meilisearch import meilisearch
from pathlib import Path from pathlib import Path
import csv import csv
import json
from typing import Annotated, List, Optional from typing import Annotated, List, Optional
from pydantic import BaseModel, Field, BeforeValidator from pydantic import BaseModel, Field, BeforeValidator
from enum import Enum from enum import Enum
...@@ -175,6 +175,37 @@ def update_structure( ...@@ -175,6 +175,37 @@ def update_structure(
index.update_typo_tolerance({"enabled": False}) index.update_typo_tolerance({"enabled": False})
def update_systems(
host: str,
key: str,
file: Path,
document: str,
):
client = meilisearch.Client(host, key)
index = client.index(document.lower())
with open(file, "r") as jsonfile:
json_object = json.load(jsonfile)
tasks = index.add_documents_in_batches(json_object, primary_key="title")
for task in tasks:
print(task)
pagination_settings_task = index.update_pagination_settings(
{"maxTotalHits": 100000}
)
print(pagination_settings_task)
attr_task = index.update_filterable_attributes(
body=["title", "Sensor", "Activator", "Effector", "PFAM.AC"]
)
params = {
"maxValuesPerFacet": 1000000,
"sortFacetValuesBy": {"*": "count"},
}
index.update_faceting_settings(params)
print(attr_task)
index.update_sortable_attributes(["title", "Sensor", "Activator", "Effector"])
index.update_typo_tolerance({"enabled": False})
def split_on_comma(str_val: str) -> List[str]: def split_on_comma(str_val: str) -> List[str]:
for val in str_val.split(","): for val in str_val.split(","):
yield val.strip() yield val.strip()
...@@ -2,7 +2,7 @@ import typer ...@@ -2,7 +2,7 @@ import typer
import meilisearch import meilisearch
from typing_extensions import Annotated from typing_extensions import Annotated
from pathlib import Path from pathlib import Path
from df_wiki_cli.meilisearch import update_refseq, update_structure from df_wiki_cli.meilisearch import update_refseq, update_structure, update_systems
from enum import Enum from enum import Enum
from types import SimpleNamespace from types import SimpleNamespace
...@@ -12,6 +12,7 @@ app = typer.Typer() ...@@ -12,6 +12,7 @@ app = typer.Typer()
class Documents(str, Enum): class Documents(str, Enum):
refseq = "refseq" refseq = "refseq"
structure = "structure" structure = "structure"
systems = "systems"
@app.callback() @app.callback()
...@@ -54,6 +55,8 @@ def update( ...@@ -54,6 +55,8 @@ def update(
update_refseq(ctx.obj.host, ctx.obj.key, file, document) update_refseq(ctx.obj.host, ctx.obj.key, file, document)
if document == "structure": if document == "structure":
update_structure(ctx.obj.host, ctx.obj.key, file, document) update_structure(ctx.obj.host, ctx.obj.key, file, document)
if document == "systems":
update_systems(ctx.obj.host, ctx.obj.key, file, document)
@app.command() @app.command()
......
...@@ -26,7 +26,10 @@ def fetch_pfam(output): ...@@ -26,7 +26,10 @@ def fetch_pfam(output):
else: else:
kk, v = line.split(" ") kk, v = line.split(" ")
g, k = kk.split() g, k = kk.split()
d[pfID][k] = v if (k == 'AC'):
d[pfID][k] = v.split(".")[0]
else:
d[pfID][k] = v
df = pd.DataFrame(d).T df = pd.DataFrame(d).T
df.index.name = "ID" df.index.name = "ID"
......
[tool.poetry] [tool.poetry]
name = "df-wiki-cli" name = "df-wiki-cli"
version = "0.1.2" version = "0.1.3"
description = "" description = ""
authors = ["Remi PLANEL <rplanel@pasteur.fr>"] authors = ["Remi PLANEL <rplanel@pasteur.fr>"]
readme = "README.md" readme = "README.md"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment