diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 9bf28de799dde207684ecd505fe157c203a29150..e31e3afe0d3183417d3b9707ad0cd4385de71f9a 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -212,9 +212,8 @@ lint: meilisearch --host ${MEILI_HOST} --key "${MEILI_MASTER_KEY}" - update + update refseq --file data/refseq_res.csv - --document refseq # REF SEQ TAXO # - > df-wiki-cli @@ -238,9 +237,8 @@ lint: meilisearch --host ${MEILI_HOST} --key "${MEILI_MASTER_KEY}" - update + update refseqtaxo --file data/refseqtaxo.csv - --document refseqtaxo # REFSEQ TAXO TYPE # - > df-wiki-cli @@ -264,9 +262,8 @@ lint: meilisearch --host ${MEILI_HOST} --key "${MEILI_MASTER_KEY}" - update + update refseqtaxotype --file data/refseqtaxotype.csv - --document refseqtaxotype # SANITIZED REFSEQ - > df-wiki-cli @@ -290,9 +287,8 @@ lint: meilisearch --host ${MEILI_HOST} --key "${MEILI_MASTER_KEY}" - update + update refseqsanitized --file data/refseq-sanitized.csv - --document refseqsanitized # SYSTEMS - > df-wiki-cli @@ -305,18 +301,29 @@ lint: meilisearch --host ${MEILI_HOST} --key "${MEILI_MASTER_KEY}" - update + update systems --file data/list-systems.json - --document systems # STRUCTURE - > df-wiki-cli meilisearch --host ${MEILI_HOST} --key "${MEILI_MASTER_KEY}" - update + update structure --file data/all_predictions_statistics_clean.csv - --document structure + + # SYSTEM OPERON STRUCUTRE + - > + df-wiki-cli + content + system-operon-structure + --version "1.2.4" --output data/system-structures.csv + - > + df-wiki-cli + meilisearch + update system-operon-structure + --file data/system-structures.csv + # ARTICLES - > df-wiki-cli @@ -329,9 +336,8 @@ lint: meilisearch --host ${MEILI_HOST} --key "${MEILI_MASTER_KEY}" - update + update article --file zot-articles.json - --document article diff --git a/packages/df-wiki-cli/df_wiki_cli/content/main.py b/packages/df-wiki-cli/df_wiki_cli/content/main.py index c0331f2801a4ba4c961960983a8a003c281a3883..ebbae3c6955c3fbbae04346fc17668d0953219fc 100644 --- a/packages/df-wiki-cli/df_wiki_cli/content/main.py +++ b/packages/df-wiki-cli/df_wiki_cli/content/main.py @@ -245,28 +245,36 @@ def systems( @app.command() def system_operon_structure( + output: Annotated[ + Path, + typer.Option( + file_okay=True, + dir_okay=False, + writable=True, + resolve_path=True, + ), + ] = "./system-structures.csv", version: Annotated[str, typer.Option(help="Defense finder model")] = "1.2.4", ): # get defense finder model from github - # https://github.com/mdmparis/defense-finder-models/releases/download/1.2.4/defense-finder-models-v1.2.4.tar.gz df_model_url = f"https://github.com/mdmparis/defense-finder-models/releases/download/{version}/defense-finder-models-v{version}.tar.gz" _, tmp_path = tempfile.mkstemp() tmp_root_dir = tempfile.gettempdir() df_model_dir = Path(f"{tmp_root_dir}/defense-finder-models-v{version}") df_model_definitions_dir = df_model_dir / "defense-finder-models" / "definitions" console.print(f"Download models: {df_model_url}") - # response = requests.get( - # df_model_url, - # allow_redirects=True, - # ) - # with open(tmp_path, mode="wb") as file: - # file.write(response.content) - - # console.print("untar file") - # with tarfile.open(tmp_path) as archive: - # archive.extractall(df_model_dir) + response = requests.get( + df_model_url, + allow_redirects=True, + ) + with open(tmp_path, mode="wb") as file: + file.write(response.content) + + console.print("untar file") + with tarfile.open(tmp_path) as archive: + archive.extractall(df_model_dir) # # extract foreach system and subsystem list genes # set the order system_genes = [] @@ -290,29 +298,23 @@ def system_operon_structure( "system": system, "subsystem": susbsystem_name, "gene": child.attrib["name"], - "alternatives": None, + "exchangeables": None, } system_genes.append(current_gene) if child.tag == "exchangeables": - alternatives = [] + exchangeables = [] for ex_gene in child.iter(): - # console.rule("exchangeables") - # console.print(ex_gene.attrib) if ex_gene.tag == "gene": - console.print() - alternatives.append(ex_gene.attrib["name"]) - current_gene["alternatives"] = ",".join(alternatives) + exchangeables.append(ex_gene.attrib["name"]) + current_gene["exchangeables"] = ",".join(exchangeables) current_gene = {} - print(current_gene) - - # print(child.tag, child.attrib) - # print(system_genes) - with open("/tmp/log", "w") as f: - fieldnames = ["system", "subsystem", "gene", "alternatives"] + with open(output, "w") as f: + fieldnames = ["id", "system", "subsystem", "gene", "exchangeables"] writer = csv.DictWriter(f, fieldnames=fieldnames) writer.writeheader() - for gene in system_genes: + for id, gene in enumerate(system_genes): # gene["alternatives"] = ",".join(gene["alternatives"]) + gene["id"] = id writer.writerow(gene) diff --git a/packages/df-wiki-cli/df_wiki_cli/meilisearch/main.py b/packages/df-wiki-cli/df_wiki_cli/meilisearch/main.py index 1084a28f158147cceef25e3ae88df81254bb84a9..ea904573e1405d88f16d25c13a8c8e37357b55e2 100644 --- a/packages/df-wiki-cli/df_wiki_cli/meilisearch/main.py +++ b/packages/df-wiki-cli/df_wiki_cli/meilisearch/main.py @@ -2,6 +2,10 @@ import typer import meilisearch from typing_extensions import Annotated from pathlib import Path + +from df_wiki_cli.meilisearch.update import main as update_main + +# from df_wiki_cli.meilisearch import update from df_wiki_cli.meilisearch import ( update_refseqtaxo, update_articles, @@ -11,6 +15,7 @@ from df_wiki_cli.meilisearch import ( update_structure, update_systems, ) + from enum import Enum from types import SimpleNamespace from rich.console import Console @@ -19,6 +24,7 @@ from rich.console import Console console = Console() app = typer.Typer() +app.add_typer(update_main.app, name="update") class Documents(str, Enum): @@ -52,39 +58,39 @@ def main( # ctx.obj = SimpleNamespace() -@app.command() -def update( - ctx: typer.Context, - file: Annotated[ - Path, - typer.Option( - exists=False, - file_okay=True, - writable=True, - ), - ], - document: Annotated[ - Documents, typer.Option(case_sensitive=False) - ] = Documents.refseq, - content_type: Annotated[str, typer.Option(help="Content-Type header")] = "text/csv", -): - - if document == "refseqtaxo": - update_refseqtaxo(ctx.obj.host, ctx.obj.key, file, document) - if document == "refseq": - update_refseq(ctx.obj.host, ctx.obj.key, file, document) - if document == "structure": - update_structure(ctx.obj.host, ctx.obj.key, file, document) - if document == "systems": - update_systems(ctx.obj.host, ctx.obj.key, file, document) - if document == "article": - update_articles(ctx.obj.host, ctx.obj.key, file, document) - if document == "refseqtaxotype": - update_refseqtaxotype(ctx.obj.host, ctx.obj.key, file, document) - if document == "refseqsanitized": - update_refseq(ctx.obj.host, ctx.obj.key, file, document) - if document == "refseqtypecount": - update_refseqtypecount(ctx.obj.host, ctx.obj.key, file, document) +# @app.command() +# def update( +# ctx: typer.Context, +# file: Annotated[ +# Path, +# typer.Option( +# exists=False, +# file_okay=True, +# writable=True, +# ), +# ], +# document: Annotated[ +# Documents, typer.Option(case_sensitive=False) +# ] = Documents.refseq, +# content_type: Annotated[str, typer.Option(help="Content-Type header")] = "text/csv", +# ): + +# if document == "refseqtaxo": +# update_refseqtaxo(ctx.obj.host, ctx.obj.key, file, document) +# if document == "refseq": +# update_refseq(ctx.obj.host, ctx.obj.key, file, document) +# if document == "structure": +# update_structure(ctx.obj.host, ctx.obj.key, file, document) +# if document == "systems": +# update_systems(ctx.obj.host, ctx.obj.key, file, document) +# if document == "article": +# update_articles(ctx.obj.host, ctx.obj.key, file, document) +# if document == "refseqtaxotype": +# update_refseqtaxotype(ctx.obj.host, ctx.obj.key, file, document) +# if document == "refseqsanitized": +# update_refseq(ctx.obj.host, ctx.obj.key, file, document) +# if document == "refseqtypecount": +# update_refseqtypecount(ctx.obj.host, ctx.obj.key, file, document) @app.command() diff --git a/packages/df-wiki-cli/df_wiki_cli/meilisearch/update/__init__.py b/packages/df-wiki-cli/df_wiki_cli/meilisearch/update/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/packages/df-wiki-cli/df_wiki_cli/meilisearch/update/main.py b/packages/df-wiki-cli/df_wiki_cli/meilisearch/update/main.py new file mode 100644 index 0000000000000000000000000000000000000000..6310e37e037896e51abfd2d02495416db3c90d90 --- /dev/null +++ b/packages/df-wiki-cli/df_wiki_cli/meilisearch/update/main.py @@ -0,0 +1,573 @@ +import typer +from rich.console import Console +import meilisearch +from pathlib import Path +import csv +import json +from typing import Annotated, List, Optional +from pydantic import BaseModel, Field, BeforeValidator +from enum import Enum + +console = Console() + +app = typer.Typer() + + +def emptyStringToNone(val: str) -> None | int: + if val == "": + return None + return int(float(val)) + + +class RefSeqCsv(BaseModel): + sys_id: str + Assembly: str + replicon: str + type: str + subtype: str + sys_beg: str + sys_end: str + protein_in_syst: Optional[List[str]] + genes_count: Annotated[int | None, BeforeValidator(emptyStringToNone)] + name_of_profiles_in_sys: Optional[List[str]] + accession_in_sys: Optional[List[str]] + Superkingdom: str + phylum: str + class_: str = Field(..., alias="class") + order: str + family: str + genus: str + species: str + + +class RefSeqTaxo(BaseModel): + index: int + Assembly: str + Superkingdom: str + phylum: str + class_: str = Field(..., alias="class") + order: str + family: str + genus: str + species: str + size: int + + +class RefSeqTaxoType(RefSeqTaxo): + type: str + + +class RefSeqTypeCount(BaseModel): + type: str + size: int + + +class StructureTypes(str, Enum): + Validated = "Validated" + DF = "DF" + na = "na" + + +def na_to_none(v: Optional[float]) -> Optional[float]: + if v == "na": + return None + else: + return v + + +NaFloat = Annotated[Optional[float], BeforeValidator(na_to_none)] + + +class StrucutreStatistics(BaseModel): + id: int + System_name_ok: str + System: str + gene_name: str + subtype: str + proteins_in_the_prediction: Optional[List[str]] + prediction_type: str + batch: int + nb_sys: str + type: StructureTypes + system_number_of_genes: int + system_genes: Optional[List[str]] + pdb: str + pae_table: str + plddt_table: str + fasta_file: str + completed: bool + iptm_ptm: NaFloat = Field(..., alias="iptm+ptm") + pDockQ: Optional[NaFloat] + plddts: Optional[NaFloat] + Foldseek_name: Optional[str] + + +class SystemOperonStructure(BaseModel): + id: int + system: str + subsystem: str + gene: str + exchangeables: Optional[List[str]] + + +@app.command() +def refseqtaxo( + ctx: typer.Context, + file: Annotated[ + Path, + typer.Option( + exists=False, + file_okay=True, + writable=False, + ), + ], +): + client = meilisearch.Client(ctx.obj.host, ctx.obj.key) + document = "refseqtaxo" + index = client.index(document.lower()) + documents = [] + with open(file, "r") as csvfile: + csvreader = csv.DictReader(csvfile) + for row in csvreader: + doc = RefSeqTaxo(**row) + documents.append(doc.model_dump(by_alias=True)) + tasks = index.add_documents_in_batches(documents, primary_key="Assembly") + for task in tasks: + console.print(task) + index.update_pagination_settings({"maxTotalHits": 1000000}) + index.update_filterable_attributes( + body=[ + "Superkingdom", + "phylum", + "class", + "order", + "family", + "genus", + "species", + "Assembly", + ] + ) + index.update_sortable_attributes( + [ + "Superkingdom", + "phylum", + "class", + "order", + "family", + "genus", + "species", + "Assembly", + "size", + ] + ) + params = { + "maxValuesPerFacet": 1000000, + "sortFacetValuesBy": {"*": "count"}, + } + index.update_faceting_settings(params) + + +@app.command() +def refseqtaxotype( + ctx: typer.Context, + file: Annotated[ + Path, + typer.Option( + exists=False, + file_okay=True, + writable=False, + ), + ], +): + client = meilisearch.Client(ctx.obj.host, ctx.obj.key) + document = "refseqtaxotype" + index = client.index(document.lower()) + documents = [] + with open(file, "r") as csvfile: + csvreader = csv.DictReader(csvfile) + for row in csvreader: + doc = RefSeqTaxoType(**row) + documents.append(doc.model_dump(by_alias=True)) + tasks = index.add_documents_in_batches(documents, primary_key="index") + for task in tasks: + console.print(task) + index.update_pagination_settings({"maxTotalHits": 1000000}) + index.update_filterable_attributes( + body=[ + "Superkingdom", + "phylum", + "class", + "order", + "family", + "genus", + "species", + "Assembly", + ] + ) + index.update_sortable_attributes( + [ + "Superkingdom", + "phylum", + "class", + "order", + "family", + "genus", + "species", + "Assembly", + "type", + "size", + ] + ) + params = { + "maxValuesPerFacet": 1000000, + "sortFacetValuesBy": {"*": "count"}, + } + index.update_faceting_settings(params) + + +@app.command() +def refseqtypecount( + ctx: typer.Context, + file: Annotated[ + Path, + typer.Option( + exists=False, + file_okay=True, + writable=False, + ), + ], +): + client = meilisearch.Client(ctx.obj.host, ctx.obj.key) + document = "refseqtypecount" + index = client.index(document.lower()) + documents = [] + with open(file, "r") as csvfile: + csvreader = csv.DictReader(csvfile) + for row in csvreader: + doc = RefSeqTypeCount(**row) + documents.append(doc.model_dump(by_alias=True)) + tasks = index.add_documents_in_batches(documents, primary_key="type") + for task in tasks: + console.print(task) + index.update_pagination_settings({"maxTotalHits": 1000000}) + index.update_filterable_attributes(body=["type"]) + index.update_sortable_attributes( + [ + "type", + "size", + ] + ) + params = { + "maxValuesPerFacet": 1000000, + "sortFacetValuesBy": {"*": "count"}, + } + index.update_faceting_settings(params) + + +@app.command() +def refseq( + ctx: typer.Context, + file: Annotated[ + Path, + typer.Option( + exists=False, + file_okay=True, + writable=False, + ), + ], +): + client = meilisearch.Client(ctx.obj.host, ctx.obj.key) + document = "refseq" + update_refseq(client, document, file) + + +@app.command() +def refseqsanitized( + ctx: typer.Context, + file: Annotated[ + Path, + typer.Option( + exists=False, + file_okay=True, + writable=False, + ), + ], +): + client = meilisearch.Client(ctx.obj.host, ctx.obj.key) + document = "refseqsanitized" + update_refseq(client, document, file) + + +@app.command() +def structure( + ctx: typer.Context, + file: Annotated[ + Path, + typer.Option( + exists=False, + file_okay=True, + writable=False, + ), + ], +): + client = meilisearch.Client(ctx.obj.host, ctx.obj.key) + document = "structure" + index = client.index(document.lower()) + documents = [] + with open(file, "r") as csvfile: + csvreader = csv.DictReader(csvfile) + for id, row in enumerate(csvreader): + row["proteins_in_the_prediction"] = split_on_comma( + row["proteins_in_the_prediction"] + ) + row["system_genes"] = split_on_comma(row["system_genes"]) + doc = StrucutreStatistics(**row, id=id) + documents.append(doc.model_dump(by_alias=True)) + tasks = index.add_documents_in_batches(documents, primary_key="id") + for task in tasks: + print(task) + pagination_settings_task = index.update_pagination_settings( + {"maxTotalHits": 100000} + ) + print(pagination_settings_task) + attr_task = index.update_filterable_attributes( + body=[ + "System", + "gene_name", + "subtype", + "completed", + "prediction_type", + "plddts", + "iptm+ptm", + "proteins_in_the_prediction", + "system_genes", + "pDockQ", + ] + ) + params = { + "maxValuesPerFacet": 1000000, + "sortFacetValuesBy": {"*": "count"}, + } + index.update_faceting_settings(params) + + print(attr_task) + index.update_sortable_attributes( + [ + "System_name_ok", + "System", + "gene_name", + "subtype", + "completed", + "plddts", + "nb_sys", + "completed", + "prediction_type", + "system_number_of_genes", + "iptm+ptm", + "pDockQ", + ] + ) + index.update_typo_tolerance({"enabled": False}) + + +@app.command() +def systems( + ctx: typer.Context, + file: Annotated[ + Path, + typer.Option( + exists=False, + file_okay=True, + writable=False, + ), + ], +): + client = meilisearch.Client(ctx.obj.host, ctx.obj.key) + document = "systems" + index = client.index(document.lower()) + with open(file, "r") as jsonfile: + json_object = json.load(jsonfile) + tasks = index.add_documents_in_batches(json_object, primary_key="title") + for task in tasks: + print(task) + pagination_settings_task = index.update_pagination_settings( + {"maxTotalHits": 100000} + ) + print(pagination_settings_task) + attr_task = index.update_filterable_attributes( + body=[ + "title", + "Sensor", + "Activator", + "Effector", + "PFAM.AC", + "PFAM.DE", + "contributors", + ] + ) + params = { + "maxValuesPerFacet": 1000000, + "sortFacetValuesBy": {"*": "count"}, + } + index.update_faceting_settings(params) + + print(attr_task) + index.update_sortable_attributes(["title", "Sensor", "Activator", "Effector"]) + index.update_typo_tolerance({"enabled": False}) + + +@app.command() +def articles( + ctx: typer.Context, + file: Annotated[ + Path, + typer.Option( + exists=False, + file_okay=True, + writable=False, + ), + ], +): + client = meilisearch.Client(ctx.obj.host, ctx.obj.key) + document = "article" + index = client.index(document.lower()) + with open(file, "r") as jsonfile: + json_object = json.load(jsonfile) + for obj in json_object: + obj["ms_id"] = obj["id"].replace("/", "_") + tasks = index.add_documents_in_batches(json_object, primary_key="ms_id") + for task in tasks: + print(task) + + pagination_settings_task = index.update_pagination_settings( + {"maxTotalHits": 100000} + ) + print(pagination_settings_task) + attr_task = index.update_filterable_attributes( + body=[ + "DOI", + ] + ) + params = { + "maxValuesPerFacet": 1000000, + "sortFacetValuesBy": {"*": "count"}, + } + index.update_faceting_settings(params) + + print(attr_task) + + +@app.command() +def system_operon_structure( + ctx: typer.Context, + file: Annotated[ + Path, + typer.Option( + exists=False, + file_okay=True, + writable=False, + ), + ], +): + client = meilisearch.Client(ctx.obj.host, ctx.obj.key) + document = "systemoperonstruct" + index = client.index(document.lower()) + documents = [] + with open(file, "r") as csvfile: + csvreader = csv.DictReader(csvfile) + for row in csvreader: + row["exchangeables"] = split_on_comma(row["exchangeables"]) + doc = SystemOperonStructure(**row) + documents.append(doc.model_dump(by_alias=True)) + tasks = index.add_documents_in_batches(documents, primary_key="id") + for task in tasks: + console.print(task) + index.update_pagination_settings({"maxTotalHits": 1000000}) + index.update_filterable_attributes( + body=[ + "system", + "subsystem", + "gene", + "exchangeables", + ] + ) + index.update_sortable_attributes( + [ + "system", + "subsystem", + "gene", + ] + ) + params = { + "maxValuesPerFacet": 1000000, + "sortFacetValuesBy": {"*": "count"}, + } + index.update_faceting_settings(params) + + +def split_on_comma(str_val: Optional[str]) -> Optional[List[str]]: + if str_val is not None and str_val != "": + for val in str_val.split(","): + yield val.strip() + else: + return None + + +def update_refseq(client, document, file): + index = client.index(document.lower()) + documents = [] + with open(file, "r") as csvfile: + csvreader = csv.DictReader(csvfile) + for row in csvreader: + row["protein_in_syst"] = split_on_comma(row["protein_in_syst"]) + row["name_of_profiles_in_sys"] = split_on_comma( + row["name_of_profiles_in_sys"] + ) + row["accession_in_sys"] = split_on_comma(row["accession_in_sys"]) + doc = RefSeqCsv(**row) + documents.append(doc.model_dump(by_alias=True)) + tasks = index.add_documents_in_batches(documents, primary_key="sys_id") + for task in tasks: + console.print(task) + index.update_pagination_settings({"maxTotalHits": 1000000}) + index.update_filterable_attributes( + body=[ + "replicon", + "Assembly", + "type", + "subtype", + "Superkingdom", + "phylum", + "class", + "order", + "family", + "genus", + "species", + ] + ) + index.update_sortable_attributes( + [ + "replicon", + "Assembly", + "type", + "subtype", + "Superkingdom", + "phylum", + "class", + "order", + "family", + "genus", + "species", + ] + ) + params = { + "maxValuesPerFacet": 1000000, + "sortFacetValuesBy": {"*": "count"}, + } + index.update_faceting_settings(params) + index.update_typo_tolerance( + { + "enabled": False + # "minWordSizeForTypos": {"oneTypo": 50, "twoTypos": 100} + } + ) diff --git a/packages/df-wiki-cli/pyproject.toml b/packages/df-wiki-cli/pyproject.toml index e32dd2b316028b3045a12c04313ebc8e9e8dffa4..421d54c8eed046710ccf2afb2185c9251bfe71be 100644 --- a/packages/df-wiki-cli/pyproject.toml +++ b/packages/df-wiki-cli/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "df-wiki-cli" -version = "0.1.8" +version = "0.1.9" description = "" authors = ["Remi PLANEL <rplanel@pasteur.fr>"] readme = "README.md" diff --git a/packages/df-wiki-cli/scripts/fill-local-meilisearch.sh b/packages/df-wiki-cli/scripts/fill-local-meilisearch.sh new file mode 100755 index 0000000000000000000000000000000000000000..789c7832ae9be07202b749f8064f0fe53058f854 --- /dev/null +++ b/packages/df-wiki-cli/scripts/fill-local-meilisearch.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +DATA_DIR=../../../data +CONTENT_DIR=../../../content +PUBLIC_DIR=../../../public +# REFSEQ +df-wiki-cli meilisearch delete-all-documents refseq +df-wiki-cli meilisearch update refseq --file ${DATA_DIR}/refseq_res.csv + + +# REF SEQ TAXO +df-wiki-cli content refseq-group-per-assembly --input ${DATA_DIR}/refseq_res.csv --output /tmp/refseqtaxo.csv +df-wiki-cli meilisearch delete-all-documents refseqtaxo +df-wiki-cli meilisearch update refseqtaxo --file /tmp/refseqtaxo.csv + +# REFSEQ TAXO TYPE + +df-wiki-cli content refseq-group-per-assembly-and-type --input ${DATA_DIR}/refseq_res.csv --output /tmp/refseqtaxotype.csv +df-wiki-cli meilisearch delete-all-documents refseqtaxotype +df-wiki-cli meilisearch update refseqtaxotype --file /tmp/refseqtaxotype.csv + + +# SANITIZED REFSEQ +df-wiki-cli content refseq-sanitized-hits --input ${DATA_DIR}/refseq_res.csv --output /tmp/refseq-sanitized.csv + +df-wiki-cli meilisearch delete-all-documents refseqsanitized +df-wiki-cli meilisearch update refseqsanitized --file /tmp/refseq-sanitized.csv + +# systems +df-wiki-cli content systems --dir ${CONTENT_DIR}/3.defense-systems/ --pfam ${PUBLIC_DIR}/pfam-a-hmm.csv --output /tmp/list-systems.json +df-wiki-cli meilisearch update systems --file /tmp/list-systems.json + +# STRUCTURE +df-wiki-cli meilisearch update structure --file ${DATA_DIR}/all_predictions_statistics_clean.csv + +# ARTICLES +# df-wiki-cli meilisearch delete-all-documents article +# df-wiki-cli meilisearch + + +# system operon structure +df-wiki-cli content system-operon-structure --version "1.2.4" --output ${DATA_DIR}/system-structures.csv +df-wiki-cli meilisearch update system-operon-structure --file ${DATA_DIR}/system-structures.csv + diff --git a/scripts/fill-local-meilisearch.sh b/scripts/fill-local-meilisearch.sh deleted file mode 100755 index cfd0aa4b422465f855d16ca9c92c81e4b4be5814..0000000000000000000000000000000000000000 --- a/scripts/fill-local-meilisearch.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash -# REFSEQ -df-wiki-cli meilisearch delete-all-documents refseq -df-wiki-cli meilisearch update --file ../data/refseq_res.csv --document refseq - - -# REF SEQ TAXO -df-wiki-cli content refseq-group-per-assembly --input ../data/refseq_res.csv --output /tmp/refseqtaxo.csv -df-wiki-cli meilisearch delete-all-documents refseqtaxo -df-wiki-cli meilisearch update --file /tmp/refseqtaxo.csv --document refseqtaxo - -# REFSEQ TAXO TYPE - -df-wiki-cli content refseq-group-per-assembly-and-type --input ../data/refseq_res.csv --output /tmp/refseqtaxotype.csv -df-wiki-cli meilisearch delete-all-documents refseqtaxotype -df-wiki-cli meilisearch update --file /tmp/refseqtaxotype.csv --document refseqtaxotype - - -# SANITIZED REFSEQ -df-wiki-cli content refseq-sanitized-hits --input ../data/refseq_res.csv --output /tmp/refseq-sanitized.csv - -df-wiki-cli meilisearch delete-all-documents refseqsanitized -df-wiki-cli meilisearch update --file /tmp/refseq-sanitized.csv --document refseqsanitized - -# systems -df-wiki-cli content systems --dir ../content/3.defense-systems/ --pfam ../public/pfam-a-hmm.csv --output /tmp/list-systems.json -df-wiki-cli meilisearch update --file /tmp/list-systems.json --document systems - -# STRUCTURE -df-wiki-cli meilisearch update --file ../data/all_predictions_statistics_clean.csv --document structure - -# ARTICLES -# df-wiki-cli meilisearch delete-all-documents article -# df-wiki-cli meilisearch