From 8fbae2649c84b79961f9e102c4aeb47647a65661 Mon Sep 17 00:00:00 2001
From: Remi  PLANEL <rplanel@pasteur.fr>
Date: Tue, 9 Apr 2024 15:56:30 +0200
Subject: [PATCH] wip

---
 components/content/SystemOperonStructure.vue  | 69 +++++++++++++++++++
 content/3.defense-systems/abia.md             |  4 ++
 content/3.defense-systems/avs.md              |  3 +
 .../df-wiki-cli/df_wiki_cli/content/main.py   |  6 +-
 .../df_wiki_cli/meilisearch/update/main.py    | 40 ++++++++---
 .../scripts/fill-local-meilisearch.sh         | 14 ++--
 6 files changed, 119 insertions(+), 17 deletions(-)
 create mode 100644 components/content/SystemOperonStructure.vue

diff --git a/components/content/SystemOperonStructure.vue b/components/content/SystemOperonStructure.vue
new file mode 100644
index 00000000..bfa0430b
--- /dev/null
+++ b/components/content/SystemOperonStructure.vue
@@ -0,0 +1,69 @@
+<script setup lang="ts">
+import * as d3 from "d3";
+
+const client = useMeiliSearchRef()
+const { page } = useContent();
+
+const pending = ref<boolean>(false)
+const msIndexName = ref<'systemoperonstruct'>("systemoperonstruct")
+const msResponse = ref()
+const systemName = computed(() => {
+    const toValPage = toValue(page)
+    return toValPage?.system ?? toValPage?.title ?? undefined
+})
+
+
+const systemGroupedBySubtype = computed(() => {
+    return d3.groups(sanitizedHits.value, d => d.subsystem)
+})
+
+const sanitizedHits = computed(() => {
+    const toValMsResponse = toValue(msResponse)
+    console.log(toValMsResponse)
+    if (toValMsResponse?.hits?.length > 0) {
+        return toValMsResponse.hits.map(hit => {
+            console.log(hit)
+            return { ...hit, gene: hit.gene.split("__")[1] }
+        })
+    } else {
+        return []
+    }
+
+})
+
+onMounted(() => {
+    fetchOperonStructure()
+})
+
+async function fetchOperonStructure() {
+    try {
+        pending.value = true
+        const data = await client.index(toValue(msIndexName)).search("", {
+            facets: ["*"],
+            filter: [`system = '${toValue(systemName)}'`],
+            limit: 500000,
+        })
+        msResponse.value = data
+
+    } catch (error) {
+        throw createError(`Cannot get hits on refseq for system: ${toValue(systemName)} `)
+    } finally {
+        pending.value = false
+    }
+}
+
+
+
+
+</script>
+
+<template>
+
+    <v-card flat>
+        <v-list-item v-for="s in sanitizedHits" :key="s.id">
+            {{ s.gene }} - {{ s.subsystem }}
+        </v-list-item>
+        <pre>{{ systemGroupedBySubtype }}</pre>
+    </v-card>
+
+</template>
\ No newline at end of file
diff --git a/content/3.defense-systems/abia.md b/content/3.defense-systems/abia.md
index fcac9a83..e1c75307 100644
--- a/content/3.defense-systems/abia.md
+++ b/content/3.defense-systems/abia.md
@@ -57,6 +57,10 @@ The AbiA_small system in *Alicyclobacillus sp. SO9* (GCF_016406125.1, NZ_CP06633
 
 ## Structure
 
+
+::system-operon-structure
+::
+
 ### pDockQ matrix
 
 ::pdockq-matrix
diff --git a/content/3.defense-systems/avs.md b/content/3.defense-systems/avs.md
index 24899ded..479c6527 100644
--- a/content/3.defense-systems/avs.md
+++ b/content/3.defense-systems/avs.md
@@ -79,6 +79,9 @@ Proportion of genome encoding the Avs system for the 14 phyla with more than 50
 
 ## Structure
 
+::system-operon-structure
+::
+
 
 ### matrix
 
diff --git a/packages/df-wiki-cli/df_wiki_cli/content/main.py b/packages/df-wiki-cli/df_wiki_cli/content/main.py
index ebbae3c6..9202e8a3 100644
--- a/packages/df-wiki-cli/df_wiki_cli/content/main.py
+++ b/packages/df-wiki-cli/df_wiki_cli/content/main.py
@@ -254,15 +254,15 @@ def system_operon_structure(
             resolve_path=True,
         ),
     ] = "./system-structures.csv",
-    version: Annotated[str, typer.Option(help="Defense finder model")] = "1.2.4",
+    version: Annotated[str, typer.Option(help="Defense finder model")] = "v1.2.4",
 ):
 
     # get defense finder model from github
 
-    df_model_url = f"https://github.com/mdmparis/defense-finder-models/releases/download/{version}/defense-finder-models-v{version}.tar.gz"
+    df_model_url = f"https://github.com/mdmparis/defense-finder-models/releases/download/{version}/defense-finder-models-{version}.tar.gz"
     _, tmp_path = tempfile.mkstemp()
     tmp_root_dir = tempfile.gettempdir()
-    df_model_dir = Path(f"{tmp_root_dir}/defense-finder-models-v{version}")
+    df_model_dir = Path(f"{tmp_root_dir}/defense-finder-models-{version}")
     df_model_definitions_dir = df_model_dir / "defense-finder-models" / "definitions"
     console.print(f"Download models: {df_model_url}")
     response = requests.get(
diff --git a/packages/df-wiki-cli/df_wiki_cli/meilisearch/update/main.py b/packages/df-wiki-cli/df_wiki_cli/meilisearch/update/main.py
index 6310e37e..24a6290b 100644
--- a/packages/df-wiki-cli/df_wiki_cli/meilisearch/update/main.py
+++ b/packages/df-wiki-cli/df_wiki_cli/meilisearch/update/main.py
@@ -1,4 +1,5 @@
 import typer
+import re
 from rich.console import Console
 import meilisearch
 from pathlib import Path
@@ -80,10 +81,7 @@ NaFloat = Annotated[Optional[float], BeforeValidator(na_to_none)]
 
 class StrucutreStatistics(BaseModel):
     id: int
-    System_name_ok: str
-    System: str
-    gene_name: str
-    subtype: str
+    system: str
     proteins_in_the_prediction: Optional[List[str]]
     prediction_type: str
     batch: int
@@ -99,7 +97,6 @@ class StrucutreStatistics(BaseModel):
     iptm_ptm: NaFloat = Field(..., alias="iptm+ptm")
     pDockQ: Optional[NaFloat]
     plddts: Optional[NaFloat]
-    Foldseek_name: Optional[str]
 
 
 class SystemOperonStructure(BaseModel):
@@ -321,11 +318,38 @@ def structure(
                 row["proteins_in_the_prediction"]
             )
             row["system_genes"] = split_on_comma(row["system_genes"])
+            # get info from systemoperonstruct
+
+            search_subtype = re.split(r"-\d+$", row["system"])[0]
+            # search_subtype = row["system"].split("-")[0]
+            console.print(search_subtype)
+            res = client.index("systemoperonstruct").search(
+                "",
+                {
+                    "filter": f"subsystem = '{search_subtype}'",
+                },
+            )
+            if len(res["hits"]) < 1:
+
+                # raise RuntimeError("do not find hits for " + search_subtype)
+                console.print(f"[red] no hits for {search_subtype}")
+            # else:
+            #     console.print(res["hits"])
+            # group data per subtype
+
+            # create a set of list of gene
+
+            # compare with the one defined in system_genes=
+
+            # if it is the same, set subtype
+
             doc = StrucutreStatistics(**row, id=id)
+            # console.print(doc)
+
             documents.append(doc.model_dump(by_alias=True))
-        tasks = index.add_documents_in_batches(documents, primary_key="id")
-        for task in tasks:
-            print(task)
+        # tasks = index.add_documents_in_batches(documents, primary_key="id")
+        # for task in tasks:
+        #     print(task)
     pagination_settings_task = index.update_pagination_settings(
         {"maxTotalHits": 100000}
     )
diff --git a/packages/df-wiki-cli/scripts/fill-local-meilisearch.sh b/packages/df-wiki-cli/scripts/fill-local-meilisearch.sh
index 789c7832..2fabe8ea 100755
--- a/packages/df-wiki-cli/scripts/fill-local-meilisearch.sh
+++ b/packages/df-wiki-cli/scripts/fill-local-meilisearch.sh
@@ -3,6 +3,13 @@
 DATA_DIR=../../../data
 CONTENT_DIR=../../../content
 PUBLIC_DIR=../../../public
+
+
+# system operon structure
+df-wiki-cli content system-operon-structure --version "1.2.3" --output ${DATA_DIR}/system-structures.csv
+df-wiki-cli meilisearch update system-operon-structure --file ${DATA_DIR}/system-structures.csv
+
+
 # REFSEQ
 df-wiki-cli meilisearch delete-all-documents refseq
 df-wiki-cli meilisearch update refseq --file ${DATA_DIR}/refseq_res.csv
@@ -31,14 +38,9 @@ df-wiki-cli content systems --dir ${CONTENT_DIR}/3.defense-systems/ --pfam ${PUB
 df-wiki-cli meilisearch update systems --file /tmp/list-systems.json
 
 # STRUCTURE 
-df-wiki-cli meilisearch update structure --file ${DATA_DIR}/all_predictions_statistics_clean.csv
+df-wiki-cli meilisearch update structure --file ${DATA_DIR}/all_predictions_statistics.csv
 
 # ARTICLES
 # df-wiki-cli meilisearch delete-all-documents article
 # df-wiki-cli meilisearch
 
-
-# system operon structure
-df-wiki-cli content system-operon-structure --version "1.2.4" --output ${DATA_DIR}/system-structures.csv
-df-wiki-cli meilisearch update system-operon-structure --file ${DATA_DIR}/system-structures.csv
-
-- 
GitLab