From 8fbae2649c84b79961f9e102c4aeb47647a65661 Mon Sep 17 00:00:00 2001 From: Remi PLANEL <rplanel@pasteur.fr> Date: Tue, 9 Apr 2024 15:56:30 +0200 Subject: [PATCH] wip --- components/content/SystemOperonStructure.vue | 69 +++++++++++++++++++ content/3.defense-systems/abia.md | 4 ++ content/3.defense-systems/avs.md | 3 + .../df-wiki-cli/df_wiki_cli/content/main.py | 6 +- .../df_wiki_cli/meilisearch/update/main.py | 40 ++++++++--- .../scripts/fill-local-meilisearch.sh | 14 ++-- 6 files changed, 119 insertions(+), 17 deletions(-) create mode 100644 components/content/SystemOperonStructure.vue diff --git a/components/content/SystemOperonStructure.vue b/components/content/SystemOperonStructure.vue new file mode 100644 index 00000000..bfa0430b --- /dev/null +++ b/components/content/SystemOperonStructure.vue @@ -0,0 +1,69 @@ +<script setup lang="ts"> +import * as d3 from "d3"; + +const client = useMeiliSearchRef() +const { page } = useContent(); + +const pending = ref<boolean>(false) +const msIndexName = ref<'systemoperonstruct'>("systemoperonstruct") +const msResponse = ref() +const systemName = computed(() => { + const toValPage = toValue(page) + return toValPage?.system ?? toValPage?.title ?? undefined +}) + + +const systemGroupedBySubtype = computed(() => { + return d3.groups(sanitizedHits.value, d => d.subsystem) +}) + +const sanitizedHits = computed(() => { + const toValMsResponse = toValue(msResponse) + console.log(toValMsResponse) + if (toValMsResponse?.hits?.length > 0) { + return toValMsResponse.hits.map(hit => { + console.log(hit) + return { ...hit, gene: hit.gene.split("__")[1] } + }) + } else { + return [] + } + +}) + +onMounted(() => { + fetchOperonStructure() +}) + +async function fetchOperonStructure() { + try { + pending.value = true + const data = await client.index(toValue(msIndexName)).search("", { + facets: ["*"], + filter: [`system = '${toValue(systemName)}'`], + limit: 500000, + }) + msResponse.value = data + + } catch (error) { + throw createError(`Cannot get hits on refseq for system: ${toValue(systemName)} `) + } finally { + pending.value = false + } +} + + + + +</script> + +<template> + + <v-card flat> + <v-list-item v-for="s in sanitizedHits" :key="s.id"> + {{ s.gene }} - {{ s.subsystem }} + </v-list-item> + <pre>{{ systemGroupedBySubtype }}</pre> + </v-card> + +</template> \ No newline at end of file diff --git a/content/3.defense-systems/abia.md b/content/3.defense-systems/abia.md index fcac9a83..e1c75307 100644 --- a/content/3.defense-systems/abia.md +++ b/content/3.defense-systems/abia.md @@ -57,6 +57,10 @@ The AbiA_small system in *Alicyclobacillus sp. SO9* (GCF_016406125.1, NZ_CP06633 ## Structure + +::system-operon-structure +:: + ### pDockQ matrix ::pdockq-matrix diff --git a/content/3.defense-systems/avs.md b/content/3.defense-systems/avs.md index 24899ded..479c6527 100644 --- a/content/3.defense-systems/avs.md +++ b/content/3.defense-systems/avs.md @@ -79,6 +79,9 @@ Proportion of genome encoding the Avs system for the 14 phyla with more than 50 ## Structure +::system-operon-structure +:: + ### matrix diff --git a/packages/df-wiki-cli/df_wiki_cli/content/main.py b/packages/df-wiki-cli/df_wiki_cli/content/main.py index ebbae3c6..9202e8a3 100644 --- a/packages/df-wiki-cli/df_wiki_cli/content/main.py +++ b/packages/df-wiki-cli/df_wiki_cli/content/main.py @@ -254,15 +254,15 @@ def system_operon_structure( resolve_path=True, ), ] = "./system-structures.csv", - version: Annotated[str, typer.Option(help="Defense finder model")] = "1.2.4", + version: Annotated[str, typer.Option(help="Defense finder model")] = "v1.2.4", ): # get defense finder model from github - df_model_url = f"https://github.com/mdmparis/defense-finder-models/releases/download/{version}/defense-finder-models-v{version}.tar.gz" + df_model_url = f"https://github.com/mdmparis/defense-finder-models/releases/download/{version}/defense-finder-models-{version}.tar.gz" _, tmp_path = tempfile.mkstemp() tmp_root_dir = tempfile.gettempdir() - df_model_dir = Path(f"{tmp_root_dir}/defense-finder-models-v{version}") + df_model_dir = Path(f"{tmp_root_dir}/defense-finder-models-{version}") df_model_definitions_dir = df_model_dir / "defense-finder-models" / "definitions" console.print(f"Download models: {df_model_url}") response = requests.get( diff --git a/packages/df-wiki-cli/df_wiki_cli/meilisearch/update/main.py b/packages/df-wiki-cli/df_wiki_cli/meilisearch/update/main.py index 6310e37e..24a6290b 100644 --- a/packages/df-wiki-cli/df_wiki_cli/meilisearch/update/main.py +++ b/packages/df-wiki-cli/df_wiki_cli/meilisearch/update/main.py @@ -1,4 +1,5 @@ import typer +import re from rich.console import Console import meilisearch from pathlib import Path @@ -80,10 +81,7 @@ NaFloat = Annotated[Optional[float], BeforeValidator(na_to_none)] class StrucutreStatistics(BaseModel): id: int - System_name_ok: str - System: str - gene_name: str - subtype: str + system: str proteins_in_the_prediction: Optional[List[str]] prediction_type: str batch: int @@ -99,7 +97,6 @@ class StrucutreStatistics(BaseModel): iptm_ptm: NaFloat = Field(..., alias="iptm+ptm") pDockQ: Optional[NaFloat] plddts: Optional[NaFloat] - Foldseek_name: Optional[str] class SystemOperonStructure(BaseModel): @@ -321,11 +318,38 @@ def structure( row["proteins_in_the_prediction"] ) row["system_genes"] = split_on_comma(row["system_genes"]) + # get info from systemoperonstruct + + search_subtype = re.split(r"-\d+$", row["system"])[0] + # search_subtype = row["system"].split("-")[0] + console.print(search_subtype) + res = client.index("systemoperonstruct").search( + "", + { + "filter": f"subsystem = '{search_subtype}'", + }, + ) + if len(res["hits"]) < 1: + + # raise RuntimeError("do not find hits for " + search_subtype) + console.print(f"[red] no hits for {search_subtype}") + # else: + # console.print(res["hits"]) + # group data per subtype + + # create a set of list of gene + + # compare with the one defined in system_genes= + + # if it is the same, set subtype + doc = StrucutreStatistics(**row, id=id) + # console.print(doc) + documents.append(doc.model_dump(by_alias=True)) - tasks = index.add_documents_in_batches(documents, primary_key="id") - for task in tasks: - print(task) + # tasks = index.add_documents_in_batches(documents, primary_key="id") + # for task in tasks: + # print(task) pagination_settings_task = index.update_pagination_settings( {"maxTotalHits": 100000} ) diff --git a/packages/df-wiki-cli/scripts/fill-local-meilisearch.sh b/packages/df-wiki-cli/scripts/fill-local-meilisearch.sh index 789c7832..2fabe8ea 100755 --- a/packages/df-wiki-cli/scripts/fill-local-meilisearch.sh +++ b/packages/df-wiki-cli/scripts/fill-local-meilisearch.sh @@ -3,6 +3,13 @@ DATA_DIR=../../../data CONTENT_DIR=../../../content PUBLIC_DIR=../../../public + + +# system operon structure +df-wiki-cli content system-operon-structure --version "1.2.3" --output ${DATA_DIR}/system-structures.csv +df-wiki-cli meilisearch update system-operon-structure --file ${DATA_DIR}/system-structures.csv + + # REFSEQ df-wiki-cli meilisearch delete-all-documents refseq df-wiki-cli meilisearch update refseq --file ${DATA_DIR}/refseq_res.csv @@ -31,14 +38,9 @@ df-wiki-cli content systems --dir ${CONTENT_DIR}/3.defense-systems/ --pfam ${PUB df-wiki-cli meilisearch update systems --file /tmp/list-systems.json # STRUCTURE -df-wiki-cli meilisearch update structure --file ${DATA_DIR}/all_predictions_statistics_clean.csv +df-wiki-cli meilisearch update structure --file ${DATA_DIR}/all_predictions_statistics.csv # ARTICLES # df-wiki-cli meilisearch delete-all-documents article # df-wiki-cli meilisearch - -# system operon structure -df-wiki-cli content system-operon-structure --version "1.2.4" --output ${DATA_DIR}/system-structures.csv -df-wiki-cli meilisearch update system-operon-structure --file ${DATA_DIR}/system-structures.csv - -- GitLab