Compare revisions

Remi PLANEL · Remi PLANEL · Remi PLANEL · Remi PLANEL · Remi PLANEL · Remi PLANEL
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -192,18 +192,20 @@ lint:
    MEILI_HOST: "http://localhost:7700"
  script:
    # - rm data/list-systems.json
+    ###### REFSEQ
+    # delete all document refseq
    - >
      df-wiki-cli
      meilisearch 
      --host ${MEILI_HOST} 
      --key ${MEILI_MASTER_KEY}
      delete-all-documents refseq
+    # update refseq index
    - >
-      df-wiki-cli 
-      content systems 
-      --dir content/3.defense-systems/ 
-      --pfam public/pfam-a-hmm.csv 
-      --output data/list-systems.json
+      df-wiki-cli meilisearch 
+      --host ${MEILI_HOST} 
+      --key ${MEILI_MASTER_KEY}
+      index-update refseq sys_id
    - >
      df-wiki-cli
      meilisearch 
@@ -211,23 +213,92 @@ lint:
      --key ${MEILI_MASTER_KEY}
      update
      --file data/refseq_res.csv
-      --document refseq
+      --document refseq 
+    # REF SEQ TAXO #
+    - >
+      df-wiki-cli 
+      content 
+      refseq-group-per-assembly
+      --input data/refseq_res.csv
+      --output data/refseqtaxo.csv
+    - >
+      df-wiki-cli
+      meilisearch 
+      --host ${MEILI_HOST} 
+      --key ${MEILI_MASTER_KEY}
+      delete-all-documents refseqtaxo
+    - >
+      df-wiki-cli meilisearch 
+      --host ${MEILI_HOST} 
+      --key ${MEILI_MASTER_KEY}
+      index-update refseqtaxo Assembly
    - >
      df-wiki-cli
      meilisearch 
      --host ${MEILI_HOST} 
      --key ${MEILI_MASTER_KEY}
      update
-      --file data/refseq_res.csv
+      --file data/refseqtaxo.csv
      --document refseqtaxo
+    # REFSEQ TAXO TYPE #
+    - >
+      df-wiki-cli 
+      content 
+      refseq-group-per-assembly-and-type      
+      --input data/refseq_res.csv
+      --output data/refseqtaxotype.csv
+    - >
+      df-wiki-cli
+      meilisearch 
+      --host ${MEILI_HOST} 
+      --key ${MEILI_MASTER_KEY}
+      delete-all-documents refseqtaxotype
+    - >
+      df-wiki-cli meilisearch 
+      --host ${MEILI_HOST} 
+      --key ${MEILI_MASTER_KEY}
+      index-update refseqtaxotype index
    - >
      df-wiki-cli
      meilisearch 
      --host ${MEILI_HOST} 
      --key ${MEILI_MASTER_KEY}
      update
-      --file data/all_predictions_statistics_clean.csv
-      --document structure
+      --file data/refseqtaxotype.csv
+      --document refseqtaxotype
+    # SANITIZED REFSEQ
+    - >
+      df-wiki-cli 
+      content 
+      refseq-sanitized-hits
+      --input data/refseq_res.csv
+      --output data/refseq-sanitized.csv
+    - >
+      df-wiki-cli
+      meilisearch 
+      --host ${MEILI_HOST} 
+      --key ${MEILI_MASTER_KEY}
+      delete-all-documents refseqsanitized
+    - >
+      df-wiki-cli meilisearch 
+      --host ${MEILI_HOST} 
+      --key ${MEILI_MASTER_KEY}
+      index-update refseqsanitized index
+    - >
+      df-wiki-cli
+      meilisearch 
+      --host ${MEILI_HOST} 
+      --key ${MEILI_MASTER_KEY}
+      update
+      --file data/refseq-sanitized.csv
+      --document refseqsanitized 
+    # SYSTEMS
+    - >
+      df-wiki-cli 
+      content systems 
+      --dir content/3.defense-systems/ 
+      --pfam public/pfam-a-hmm.csv 
+      --output data/list-systems.json
    - >
      df-wiki-cli
      meilisearch 
@@ -236,6 +307,16 @@ lint:
      update
      --file data/list-systems.json
      --document systems
+    # STRUCTURE
+    - >
+      df-wiki-cli
+      meilisearch 
+      --host ${MEILI_HOST} 
+      --key ${MEILI_MASTER_KEY}
+      update
+      --file data/all_predictions_statistics_clean.csv
+      --document structure
+    # ARTICLES
    - > 
      df-wiki-cli
      meilisearch 
@@ -279,12 +360,13 @@ update-meilisearch-index:prod:
  stage: get-meili-key
  variables:
    MEILI_HOST: "http://localhost:7700"
+    MEILI_HOST_NO_PROTOCOLE: "localhost:7700"
  script:
    - > 
      df-wiki-cli 
      meilisearch 
      --key "${MEILI_MASTER_KEY}" 
-      --host ${MEILI_HOST} 
+      --host "${MEILI_HOST}"
      get-env-var
      --output build.env
  artifacts:
@@ -297,6 +379,7 @@ set-meili-env:dev:
  extends: .set-meili-env
  variables:
    MEILI_HOST: "https://${MEILI_HOST_DEV}"
+    MEILI_HOST_NO_PROTOCOLE: ${MEILI_HOST_DEV}
  rules:
    - if: $CI_COMMIT_BRANCH != "main"  

@@ -304,6 +387,8 @@ set-meili-env:prod:
  extends: .set-meili-env
  variables:
    MEILI_HOST: "https://${MEILI_HOST_PROD}"
+    MEILI_HOST_NO_PROTOCOLE: ${MEILI_HOST_PROD}
+
  rules:
    - if: $CI_COMMIT_BRANCH == "main"  

@@ -335,7 +420,6 @@ sync-zotero:
    - *docker-login
  script:
    - echo $MEILI_HOST
-    - echo $MEILI_API_KEY
    - >
      docker buildx build --pull -t "$CI_REGISTRY_IMAGE/$IMAGE_NAME:$CI_COMMIT_SHORT_SHA" 
      --build-arg "BASE_URL=$BASE_URL" 
@@ -369,7 +453,7 @@ build:prod:wiki:
    # - get-pfam
  variables:
    BASE_URL: /wiki/
-    HOST_URL: https://${HOST_PROD}
+    HOST_URL: "https://${HOST_PROD}"

  rules:
    - if: $CI_COMMIT_BRANCH == "main"  
@@ -411,7 +495,7 @@ build:prod:wiki:
 deploy:dev:
  extends: .deploy
  rules:
-    - if: $CI_COMMIT_BRANCH == "dev" || $CI_COMMIT_BRANCH == "rework-references"
+    - if: $CI_COMMIT_BRANCH == "dev" || $CI_COMMIT_BRANCH == "distri-system-section"
  needs:
    - "build:dev:wiki"
  when: manual
@@ -436,7 +520,7 @@ deploy:prod:
  variables:
    NODE_ENV: "production"
    KUBE_NAMESPACE: "defense-finder-prod"
-    PUBLIC_URL: "${HOST_PROD}"
+    PUBLIC_URL: "https://${HOST_PROD}"
    CI_DEBUG_TRACE: "false"
    ENV: "production"
  environment:
@@ -520,7 +604,7 @@ create-structures-archives:prod:
  variables:
    NODE_ENV: "production"
    KUBE_NAMESPACE: "defense-finder-prod"
-    PUBLIC_URL: "${HOST_PROD}"
+    PUBLIC_URL: "https://${HOST_PROD}"
    CI_DEBUG_TRACE: "false"
    ENV: "production"
  environment:

--- a/components/Nav/Navbar.vue
+++ b/components/Nav/Navbar.vue
@@ -36,12 +36,11 @@ const sections = ref([
    href: runtimeConfig.public.defenseFinderWebservice,
  },
  { id: "wiki", label: "Wiki", to: '/', },
-  { id: "refseq", label: "RefSeq DB", to: '/refseq/' },
-  { id: "structure", label: "Structures DB", to: '/structure/' },
+  { id: "refseq", label: "RefSeq DB", to: '/refseq' },
+  { id: "structure", label: "Structures DB", to: '/structure' },
  { id: "help", label: "Help", to: '/help/defensefinder' },
 ]);

-
 function toggleDrawer() {
  emit('update:drawer', !props.drawer)
 }
@@ -63,7 +62,7 @@ function toggleDrawer() {
          {{ section.label }}
        </v-btn>
        <v-btn @click="toggleTheme" color="primary" :icon="theme.global.current.value.dark ? 'md:light_mode' : 'md:dark_mode'
-          "></v-btn>
+    "></v-btn>
      </template>
      <template v-else>
        <v-menu>

--- a/components/content/MolstarPdbePlugin.vue
+++ b/components/content/MolstarPdbePlugin.vue
@@ -17,7 +17,6 @@ export interface Props {
 }

 const { mobile } = useDisplay()
-// const selectedPdb = ref('')
 const refinedDataUrls = computed(() => {

    function refinedUrl(url: string) {
@@ -105,7 +104,6 @@ useHead({
 })

 const pdbeMolstarComponent = ref(null)
-// const selectedPdb = ref("/wiki/avs/AVAST_I,AVAST_I__Avs1A,0,V-plddts_85.07081.pdb")
 const selectedPdb: Ref<string | null> = ref(null)
 const structureToDownload: Ref<string | null> = ref(null)
 const selectedPaePath = computed(() => {
@@ -151,9 +149,6 @@ function setSelectedPdbToFirst() {
    }
 }

-// const moleculeFormat = computed(() => {
-//     return toValue(selectedPdb)?.split(".")?.[-1]?.toLowerCase() ?? "pdb"
-// })
 const moleculeFormat: Ref<string> = ref("pdb")
 </script>

@@ -191,7 +186,8 @@ const moleculeFormat: Ref<string> = ref("pdb")
        <v-card flat :rounded="false">
            <v-toolbar>
                <v-toolbar-title>Structures</v-toolbar-title>
-                <v-select v-model="selectedPdb" label="Select PDB" :items="refinedDataUrls" hide-details="auto"></v-select>
+                <v-select v-model="selectedPdb" label="Select PDB" :items="refinedDataUrls"
+                    hide-details="auto"></v-select>
                <v-spacer></v-spacer>

                <v-btn :disabled="!selectedPdb" icon="md:download" :href="structureToDownload"></v-btn>
@@ -207,15 +203,12 @@ const moleculeFormat: Ref<string> = ref("pdb")
                            class="d-flex align-center justify-center flex-wrap text-center mx-auto px-4 my-3"
                            :height="computedHeight" :width="computedWidth" style="position:relative;">
                            <pdbe-molstar ref="pdbeMolstarComponent" :custom-data-url="selectedPdb" alphafold-view
-                                sequence-panel="true" landscape="false" :custom-data-format="moleculeFormat"></pdbe-molstar>
+                                sequence-panel="true" landscape="false"
+                                :custom-data-format="moleculeFormat"></pdbe-molstar>
                        </v-sheet>
                    </v-col>
                    <v-col v-if="moleculeFormat === 'cif'" :cols="mobile ? 12 : undefined">
                        <v-img :src="selectedPaePath"></v-img>
-
-                        <!-- <PlotFigure v-if="sanitizedPaeData?.length > 0 && paeError === null" defer
-                                :options="plotPaeOptions"></PlotFigure>
-                            <v-alert v-else type="warning" variant="tonal">{{ paeError }}</v-alert> -->
                        <v-card flat color="transparent">
                            <v-card-title>Model Confidence</v-card-title>
                            <v-card-text>

--- a/components/content/RefseqDb.vue
+++ b/components/content/RefseqDb.vue
@@ -18,7 +18,12 @@ const itemValue = ref("id");
 const { width } = useDisplay();
 const dbName = ref("refseq")
 const taxonomyFacet = ref<Record<string, any> | undefined>(undefined)
-
+const cellPlotMargin = ref({
+    marginLeft: 150,
+    marginBottom: 200,
+    marginTop: 0,
+    marginRight: 50
+})


 onBeforeMount(async () => {
@@ -150,7 +155,7 @@ const availableTaxo: Ref<string[]> = ref([
    "Superkingdom"
 ]);

-const scaleTypes = ref<string[]>(['linear', 'sqrt', 'log', 'symlog'])
+const scaleTypes = ref<string[]>(['linear', 'sqrt', 'symlog'])
 const selectedTaxoRank = ref<"species" | "genus" | "family" | "order" | "class" | "phylum" | "Superkingdom">("Superkingdom");

 const headers = ref([
@@ -190,7 +195,18 @@ const computedWidth = computed(() => {
    return Math.max(currentWidth, 550);
 });

+const cellPlotComputedDimension = computed(() => {
+    const { marginLeft, marginBottom, marginRight, marginTop } = toValue(cellPlotMargin)
+    const toValWidth = toValue(width)
+    const widthFixCell = countSystem.value * 50 + marginLeft + marginRight
+    const heigthFix = countClade.value * 50 + marginTop + marginBottom
+    if (widthFixCell > toValWidth) {
+        return { width: toValWidth - marginLeft - marginRight, height: undefined }
+    } else {
+        return { width: widthFixCell, height: heigthFix }
+    }

+})

 const allHits: Ref<Record<string, any> | undefined> = ref(undefined)

@@ -201,24 +217,26 @@ async function getAllHits(params: { index: string, params: Record<string, any>,
        selectedTaxoRank.value = "Superkingdom"

    }
-    if (params.index === toValue(dbName)) {
-
-        pendingAllHits.value = true
-        try {
-            const { data, error } = await useAsyncMeiliSearch({
-                ...params,
-                params: {
-                    ...params.params,
-                    'attributesToRetrieve': ['type', 'Assembly', ...toValue(availableTaxo)]
-                }
-            })
-            allHits.value = data.value
-        } finally {
-            pendingAllHits.value = false
+    // if (params.index === toValue(dbName)) {

-        }
+    pendingAllHits.value = true
+    try {
+        const { data, error } = await useAsyncMeiliSearch({
+            ...params,
+            index: "refseqsanitized",
+            params: {
+                ...params.params,
+
+                'attributesToRetrieve': ['type', 'Assembly', ...toValue(availableTaxo)]
+            }
+        })
+        allHits.value = data.value
+    } finally {
+        pendingAllHits.value = false

    }
+
+    // }
 }


@@ -358,29 +376,51 @@ const sortedCellDomain = computed(() => {
    }
 })
 const binPlotOptions = computed(() => {
+    const { marginLeft, marginBottom } = toValue(cellPlotMargin)
+    const { height } = toValue(cellPlotComputedDimension)
+
    return {
-        marginLeft: 150,
-        marginBottom: 200,
+        marginLeft,
+        marginBottom,
        padding: 0,
        grid: true,
+        aspectRatio: height ? undefined : 1,
        x: { tickRotate: 90, label: "Systems", domain: toValue(sortedCellDomain) },
-        // y: { tickFormat: 's' },
        color: { scheme: "plasma", legend: true, label: `Proportion per ${selectedTaxoRank.value}`, domain: [0, 100] },
    }
 })

+const countSystem = computed(() => {
+    const toValueAllHits = toValue(allHits)
+    const data = toValueAllHits?.hits ?? []
+    const setSystem = new Set(data.map(d => d.type))
+    return setSystem.size
+})
+
+const countClade = computed(() => {
+    const toValueAllHits = toValue(allHits)
+    const data = toValueAllHits?.hits ?? []
+    const setSystem = new Set(data.map(d => d[selectedTaxoRank.value]))
+    return setSystem.size
+})

 const binPlotGroup = computed(() => {
    return Plot.group(
        {
            label: (d) => d.fill,
            fill: {
+                /**
+                 * 
+                 * @param I is the list of element index that are par of the same group (cell)
+                 * @param X is the list of all elements 
+                 */
                reduceIndex: function (I, X) {
                    const toValTaxonomyFacet = toValue(taxonomyFacet)
                    if (toValTaxonomyFacet !== undefined) {
                        const clade = X[I[0]][selectedTaxoRank.value]
                        const system = X[I[0]].type
-                        // Get the list of item for this group
+                        // Get the list of all the items for this group (same cell)
+                        // and group them per type and assembly
                        const itemsPerGroup = d3.rollup(I.map(index => {
                            return X[index]
                        }), D => D.length, d => d.type, d => d.Assembly)
@@ -408,22 +448,24 @@ const binPlotGroup = computed(() => {

 const binPlotDataOptions = computed(() => {
    const toValueAllHits = toValue(allHits)
+    const toValBinPlotGroup = toValue(binPlotGroup)
    const data = toValueAllHits?.hits ?? []
+    const plotCellMark = Plot.cell(data, toValBinPlotGroup)
+    const { width, height } = toValue(cellPlotComputedDimension)
+
+    const dim = height ? { width, height } : { width }
+
    return toValueAllHits?.hits?.length > 0 ? {
        ...binPlotOptions.value,
-        width: width.value,
-        title: "Proportion of genomes with defense system X for a given clade",
+        ...dim,
+        title: `Proportion of genomes with defense system X per ${selectedTaxoRank.value} taxonomic rank`,
        color: {
            ...binPlotOptions.value.color,
            type: scaleType.value,
            tickFormat: '~s',
            ticks: scaleType.value === 'symlog' ? 3 : 5,
        },
-        marks: [
-            Plot.cell(data,
-                toValue(binPlotGroup)
-            ),
-        ],
+        marks: [plotCellMark],
    } : null
 })


--- a/content/2.general-concepts/3.defense-systems-effectors.md
+++ b/content/2.general-concepts/3.defense-systems-effectors.md
@@ -11,7 +11,7 @@ contributors:

 Most of the anti-phage defense systems of bacteria can be described as a combination of two main components. 
 First, a sensing component that detects phage infection to trigger the immune response 
-(see [defense-systems_trigger](/general-concepts/defense-systems_trigger/)). 
+(see [defense-systems_trigger](/general-concepts/defense-systems_trigger)). 
 Second, an effector component that mediates the immune response following the detection of phage infection.

 The effector components of anti-phage systems are very diverse, and can be arbitrarily distributed in broad categories :ref{doi=10.1038/s41579-023-00934-x} :

--- a/data/refseq_res.csv
+++ b/data/refseq_res.csv
--- a/data/refseq_res_sm.csv
+++ b/data/refseq_res_sm.csv
--- a/deploy/df-wiki/values.yaml
+++ b/deploy/df-wiki/values.yaml
@@ -35,6 +35,7 @@ securityContext:
  runAsNonRoot: true
  runAsUser: 101
  runAsGroup: 101
+  allowPrivilegeEscalation: false
  # capabilities:
  #   drop:
  #   - ALL

--- a/deploy/meilisearch/Chart.yaml
+++ b/deploy/meilisearch/Chart.yaml
@@ -25,6 +25,6 @@ appVersion: "1.16.0"

 dependencies:
  - name: meilisearch
-    version: 0.3.0
+    version: 0.5.0
    repository: "https://meilisearch.github.io/meilisearch-kubernetes"

--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -5,9 +5,9 @@ services:
      context: .
      target: dev
      args:
-        BASE_URL: /wiki/
+        BASE_URL: /wiki
        MEILI_HOST: http://localhost:7700
-        MEILI_API_KEY: f9cc073016cbb392365aae86517878cb3f3408bb85c1fafd06e27f73ccb35e3d
+        MEILI_API_KEY: f5f5f1bc48e6379fc2509f5bf0aed1fce96c1bbf86e0a194c605b258d7cfe890
        HOST_URL: http://localhost:8082
    container_name: nuxt
    environment:
@@ -31,7 +31,7 @@ services:
      - main

  meilisearch:
-    image: getmeili/meilisearch:v1.5
+    image: getmeili/meilisearch:v1.7
    # command:
    #   - meilisearch
    #   - --http-addr 

--- a/nuxt.config.ts
+++ b/nuxt.config.ts
@@ -19,11 +19,12 @@ export default defineNuxtConfig({
          "data-domaim": "defense-finder.dev.pasteur.cloud",
          src: "https://plausible.pasteur.cloud/js/script.js"
        }
-
-
      ]
    }
  },
+  router: {
+    strict: true
+  },
  site: {
    url: 'https://defensefinder.mdmlab.fr',
    name: 'DefenseFinder webservice and knowledge base',

--- a/packages/df-wiki-cli/data/test/refseq.csv
+++ b/packages/df-wiki-cli/data/test/refseq.csv
+sys_id,Assembly,replicon,type,subtype,sys_beg,sys_end,protein_in_syst,genes_count,name_of_profiles_in_sys,accession_in_sys,Superkingdom,phylum,class,order,family,genus,species
+GCF_001602115_NZ_CP014352_AbiE_1,GCF_001602115.1,NZ_CP014352,AbiE,AbiE,GCF_001602115.1_NZ_CP014352_01750,GCF_001602115.1_NZ_CP014352_01751,"GCF_001602115.1_NZ_CP014352_01750, GCF_001602115.1_NZ_CP014352_01751",2.0,"AbiEii__AbiEii, AbiEii__AbiEi_1","WP_062819585.1, WP_062819586.1",Bacteria,Actinomycetota,Actinomycetes,Propionibacteriales,Propionibacteriaceae,Acidipropionibacterium,Acidipropionibacterium acidipropionici
+GCF_001602115_NZ_CP014352_Thoeris_II_1,GCF_001602115.1,NZ_CP014352,Thoeris,Thoeris_II,GCF_001602115.1_NZ_CP014352_00483,GCF_001602115.1_NZ_CP014352_00484,"GCF_001602115.1_NZ_CP014352_00483, GCF_001602115.1_NZ_CP014352_00484",2.0,"Thoeris__ThsB_Global, Thoeris_II__ThsA_new_petit","WP_062818945.1, WP_082815879.1",Bacteria,Actinomycetota,Actinomycetes,Propionibacteriales,Propionibacteriaceae,Acidipropionibacterium,Acidipropionibacterium acidipropionici
+GCF_001602115_NZ_CP014352_RM_Type_IIG_5,GCF_001602115.1,NZ_CP014352,RM,RM_Type_IIG,GCF_001602115.1_NZ_CP014352_00058,GCF_001602115.1_NZ_CP014352_00058,GCF_001602115.1_NZ_CP014352_00058,1.0,RM_Type_IIG__Type_IIG,WP_062818741.1,Bacteria,Actinomycetota,Actinomycetes,Propionibacteriales,Propionibacteriaceae,Acidipropionibacterium,Acidipropionibacterium acidipropionici
+GCF_001602115_NZ_CP014352_RM_Type_I_1,GCF_001602115.1,NZ_CP014352,RM,RM_Type_I,GCF_001602115.1_NZ_CP014352_00060,GCF_001602115.1_NZ_CP014352_00062,"GCF_001602115.1_NZ_CP014352_00060, GCF_001602115.1_NZ_CP014352_00061, GCF_001602115.1_NZ_CP014352_00062",3.0,"RM__Type_I_REases, RM__Type_I_S, RM__Type_I_MTases","WP_062818743.1, WP_062818744.1, WP_062818745.1",Bacteria,Actinomycetota,Actinomycetes,Propionibacteriales,Propionibacteriaceae,Acidipropionibacterium,Acidipropionibacterium acidipropionici
+GCF_001602115_NZ_CP014352_RM_Type_IIG_6,GCF_001602115.1,NZ_CP014352,RM,RM_Type_IIG,GCF_001602115.1_NZ_CP014352_00069,GCF_001602115.1_NZ_CP014352_00069,GCF_001602115.1_NZ_CP014352_00069,1.0,RM_Type_IIG__Type_IIG,WP_062818749.1,Bacteria,Actinomycetota,Actinomycetes,Propionibacteriales,Propionibacteriaceae,Acidipropionibacterium,Acidipropionibacterium acidipropionici
+GCF_001602115_NZ_CP014352_RM_Type_I_2,GCF_001602115.1,NZ_CP014352,RM,RM_Type_I,GCF_001602115.1_NZ_CP014352_00475,GCF_001602115.1_NZ_CP014352_00477,"GCF_001602115.1_NZ_CP014352_00475, GCF_001602115.1_NZ_CP014352_00476, GCF_001602115.1_NZ_CP014352_00477",3.0,"RM__Type_I_MTases, RM__Type_I_S, RM__Type_I_REases","WP_062818938.1, WP_082815877.1, WP_062818940.1",Bacteria,Actinomycetota,Actinomycetes,Propionibacteriales,Propionibacteriaceae,Acidipropionibacterium,Acidipropionibacterium acidipropionici
+GCF_001602115_NZ_CP014352_RM_Type_I_3,GCF_001602115.1,NZ_CP014352,RM,RM_Type_I,GCF_001602115.1_NZ_CP014352_02756,GCF_001602115.1_NZ_CP014352_02758,"GCF_001602115.1_NZ_CP014352_02756, GCF_001602115.1_NZ_CP014352_02757, GCF_001602115.1_NZ_CP014352_02758",3.0,"RM__Type_I_REases, RM__Type_I_S, RM__Type_I_MTases","WP_062820191.1, WP_062820192.1, WP_062820839.1",Bacteria,Actinomycetota,Actinomycetes,Propionibacteriales,Propionibacteriaceae,Acidipropionibacterium,Acidipropionibacterium acidipropionici
+GCF_001602115_NZ_CP014352_RM_Type_IIG_7,GCF_001602115.1,NZ_CP014352,RM,RM_Type_IIG,GCF_001602115.1_NZ_CP014352_03005,GCF_001602115.1_NZ_CP014352_03005,GCF_001602115.1_NZ_CP014352_03005,1.0,RM_Type_IIG__Type_IIG,WP_198401420.1,Bacteria,Actinomycetota,Actinomycetes,Propionibacteriales,Propionibacteriaceae,Acidipropionibacterium,Acidipropionibacterium acidipropionici
+GCF_001602115_NZ_CP014352_RM_Type_I_4,GCF_001602115.1,NZ_CP014352,RM,RM_Type_I,GCF_001602115.1_NZ_CP014352_03420,GCF_001602115.1_NZ_CP014352_03422,"GCF_001602115.1_NZ_CP014352_03420, GCF_001602115.1_NZ_CP014352_03421, GCF_001602115.1_NZ_CP014352_03422",3.0,"RM__Type_I_REases, RM__Type_I_S, RM__Type_I_MTases","WP_062818743.1, WP_062818744.1, WP_062818745.1",Bacteria,Actinomycetota,Actinomycetes,Propionibacteriales,Propionibacteriaceae,Acidipropionibacterium,Acidipropionibacterium acidipropionici
+GCF_001602115_NZ_CP014352_RM_Type_IIG_8,GCF_001602115.1,NZ_CP014352,RM,RM_Type_IIG,GCF_001602115.1_NZ_CP014352_03427,GCF_001602115.1_NZ_CP014352_03427,GCF_001602115.1_NZ_CP014352_03427,1.0,RM_Type_IIG__Type_IIG,WP_157773967.1,Bacteria,Actinomycetota,Actinomycetes,Propionibacteriales,Propionibacteriaceae,Acidipropionibacterium,Acidipropionibacterium acidipropionici
+GCF_001602115_NZ_CP014353,GCF_001602115.1,NZ_CP014353,No system found,,,,,,,,Bacteria,Actinomycetota,Actinomycetes,Propionibacteriales,Propionibacteriaceae,Acidipropionibacterium,Acidipropionibacterium acidipropionici
+GCF_001602115_NZ_CP014352_CAS_Class1-Subtype-I-G_1,GCF_001602115.1,NZ_CP014352,Cas,CAS_Class1-Subtype-I-G,GCF_001602115.1_NZ_CP014352_01614,GCF_001602115.1_NZ_CP014352_01620,"GCF_001602115_NZ_CP014352_01614, GCF_001602115_NZ_CP014352_01615, GCF_001602115_NZ_CP014352_01616, GCF_001602115_NZ_CP014352_01617, GCF_001602115_NZ_CP014352_01618, GCF_001602115_NZ_CP014352_01619, GCF_001602115_NZ_CP014352_01620",7.0,"DEDDh_I_II_III_IV_V_VI_1, csb1gr7_I-G_1, csb2gr5_I-G_1, cas3_I-G_3, csb3_I-G_1, cas1_I_II_III_IV_V_VI_1, cas2_I_II_III_IV_V_VI_3","WP_198401461.1, WP_062819507.1, WP_062819508.1, WP_062819509.1, WP_062819510.1, WP_062819511.1, WP_062819512.1",Bacteria,Actinomycetota,Actinomycetes,Propionibacteriales,Propionibacteriaceae,Acidipropionibacterium,Acidipropionibacterium acidipropionici
+GCF_000830055_NZ_CP010781_Gabija_1,GCF_000830055.1,NZ_CP010781,Gabija,Gabija,GCF_000830055.1_NZ_CP010781_01480,GCF_000830055.1_NZ_CP010781_01481,"GCF_000830055.1_NZ_CP010781_01480, GCF_000830055.1_NZ_CP010781_01481",2.0,"Gabija__GajA, Gabija__GajB_2","WP_005115822.1, WP_000073989.1",Bacteria,Pseudomonadota,Gammaproteobacteria,Moraxellales,Moraxellaceae,Acinetobacter,Acinetobacter baumannii
+GCF_000830055_NZ_CP010781_Gao_Qat_1,GCF_000830055.1,NZ_CP010781,Gao_Qat,Gao_Qat,GCF_000830055.1_NZ_CP010781_00952,GCF_000830055.1_NZ_CP010781_00955,"GCF_000830055.1_NZ_CP010781_00952, GCF_000830055.1_NZ_CP010781_00953, GCF_000830055.1_NZ_CP010781_00954, GCF_000830055.1_NZ_CP010781_00955",4.0,"Gao_Qat__QatA, Gao_Qat__QatB, Gao_Qat__QatC, Gao_Qat__QatD","WP_000269396.1, WP_000537345.1, WP_041152179.1, WP_000937120.1",Bacteria,Pseudomonadota,Gammaproteobacteria,Moraxellales,Moraxellaceae,Acinetobacter,Acinetobacter baumannii
+GCF_000830055_NZ_CP010781_RosmerTA_1,GCF_000830055.1,NZ_CP010781,RosmerTA,RosmerTA,GCF_000830055.1_NZ_CP010781_00651,GCF_000830055.1_NZ_CP010781_00652,"GCF_000830055.1_NZ_CP010781_00651, GCF_000830055.1_NZ_CP010781_00652",2.0,"RosmerTA__RmrA_2634932349, RosmerTA__RmrT_2634932349","WP_000482796.1, WP_001182927.1",Bacteria,Pseudomonadota,Gammaproteobacteria,Moraxellales,Moraxellaceae,Acinetobacter,Acinetobacter baumannii
+GCF_000830055_NZ_CP010781_RM_Type_II_1,GCF_000830055.1,NZ_CP010781,RM,RM_Type_II,GCF_000830055.1_NZ_CP010781_03697,GCF_000830055.1_NZ_CP010781_03698,"GCF_000830055.1_NZ_CP010781_03697, GCF_000830055.1_NZ_CP010781_03698",2.0,"RM_Type_II__Type_II_REases, RM_Type_II__Type_II_MTases","WP_001062713.1, WP_000862934.1",Bacteria,Pseudomonadota,Gammaproteobacteria,Moraxellales,Moraxellaceae,Acinetobacter,Acinetobacter baumannii
+GCF_000830055_NZ_CP010782,GCF_000830055.1,NZ_CP010782,No system found,,,,,,,,Bacteria,Pseudomonadota,Gammaproteobacteria,Moraxellales,Moraxellaceae,Acinetobacter,Acinetobacter baumannii
+GCF_000830055_NZ_CP010781_CAS_Class1-Subtype-I-F_1,GCF_000830055.1,NZ_CP010781,Cas,CAS_Class1-Subtype-I-F,GCF_000830055.1_NZ_CP010781_02755,GCF_000830055.1_NZ_CP010781_02760,"GCF_000830055_NZ_CP010781_02755, GCF_000830055_NZ_CP010781_02756, GCF_000830055_NZ_CP010781_02757, GCF_000830055_NZ_CP010781_02758, GCF_000830055_NZ_CP010781_02759, GCF_000830055_NZ_CP010781_02760",6.0,"cas6f_I_II_III_IV_V_VI_3, cas7f_I-F_2, cas5f_I-F_4, cas8f_I-F_8, cas3f_I-F_1, cas1_I-F_1","WP_001104789.1, WP_001097003.1, WP_001215684.1, WP_000841022.1, WP_000637362.1, WP_000436801.1",Bacteria,Pseudomonadota,Gammaproteobacteria,Moraxellales,Moraxellaceae,Acinetobacter,Acinetobacter baumannii
--- a/packages/df-wiki-cli/df_wiki_cli/content/main.py
+++ b/packages/df-wiki-cli/df_wiki_cli/content/main.py
@@ -244,7 +244,7 @@ def pae2png(tsv_file, png_file):
    plt.close()


-@app.command()
+@app.command(help="Remove version from sys_id")
 def refseq(
    input: Annotated[
        Path,
@@ -276,3 +276,189 @@ def refseq(
            console.print(f"[green]{row['sys_id']} ->  {result}")
            row["sys_id"] = result
            writer.writerow(row)
+
+
+@app.command(
+    help='Remove "No system found" hits if the are not the only hit for an assembly'
+)
+def refseq_sanitized_hits(
+    input: Annotated[
+        Path,
+        typer.Option(
+            exists=False,
+            file_okay=True,
+            writable=True,
+        ),
+    ],
+    output: Annotated[
+        Path,
+        typer.Option(
+            file_okay=True,
+            dir_okay=False,
+            writable=True,
+            resolve_path=True,
+        ),
+    ],
+):
+    df = pd.read_csv(input)
+    df_final = _sanitized_refseq_hits(df)
+    df_final.reset_index().to_csv(output, index=False)
+    return df_final
+
+
+@app.command(help="Group hits per assembly and types (from 'sanitized-hits')")
+def refseq_group_per_assembly_and_type(
+    input: Annotated[
+        Path,
+        typer.Option(
+            exists=False,
+            file_okay=True,
+            writable=True,
+        ),
+    ],
+    output: Annotated[
+        Path,
+        typer.Option(
+            file_okay=True,
+            dir_okay=False,
+            writable=True,
+            resolve_path=True,
+        ),
+    ],
+):
+    df = pd.read_csv(input)
+    df_final = _sanitized_refseq_hits(df)
+    df_final_grouped = df_final.groupby(
+        [
+            "Assembly",
+            "type",
+            "Superkingdom",
+            "phylum",
+            "class",
+            "order",
+            "family",
+            "genus",
+            "species",
+        ],
+        as_index=False,
+    ).size()
+    df_final_grouped.reset_index().to_csv(output, index=False)
+
+
+@app.command()
+def refseq_group_per_assembly(
+    input: Annotated[
+        Path,
+        typer.Option(
+            exists=False,
+            file_okay=True,
+            writable=True,
+        ),
+    ],
+    output: Annotated[
+        Path,
+        typer.Option(
+            file_okay=True,
+            dir_okay=False,
+            writable=True,
+            resolve_path=True,
+        ),
+    ],
+):
+    df = pd.read_csv(input)
+
+    df["Assembly"] = df["Assembly"].apply(remove_version)
+    df_grouped = df.groupby(
+        [
+            "Assembly",
+            "Superkingdom",
+            "phylum",
+            "class",
+            "order",
+            "family",
+            "genus",
+            "species",
+        ],
+        as_index=False,
+    ).size()
+    df_grouped.reset_index().to_csv(output, index=False)
+
+
+@app.command()
+def refseq_type_count(
+    input: Annotated[
+        Path,
+        typer.Option(
+            exists=False,
+            file_okay=True,
+            writable=True,
+            help="csv file with type and taxo (No system found removed when other system are founded in the same assembly)",
+        ),
+    ],
+    output: Annotated[
+        Path,
+        typer.Option(
+            file_okay=True,
+            dir_okay=False,
+            writable=True,
+            resolve_path=True,
+        ),
+    ],
+):
+    df = pd.read_csv(input)
+    grouped_per_type = df.groupby(
+        ["type"],
+        as_index=False,
+    ).size()
+    grouped_per_type.reset_index().to_csv(output, index=False)
+
+
+def remove_version(assembly):
+    return assembly.split(".")[0]
+
+
+def _sanitized_refseq_hits(df):
+    df["Assembly"] = df["Assembly"].apply(remove_version)
+    # Lower type namesmc
+    # df["type"] = df["type"].apply(lambda x: x.lower())
+
+    # Get all row with no system type
+    df_no_system = df.loc[df["type"] == "No system found"]
+    # unique assembly with no sys
+    serie_assembly_with_no_sys = df_no_system["Assembly"].unique()
+    # filter assembly to have those with no sys
+    df_with_no_sys = df[df["Assembly"].isin(serie_assembly_with_no_sys)]
+    # Group them by assembly, type, taxo
+    no_sys_assembly_by_size = df_with_no_sys.groupby(
+        [
+            "Assembly",
+            "type",
+            "Superkingdom",
+            "phylum",
+            "class",
+            "order",
+            "family",
+            "genus",
+            "species",
+        ],
+        as_index=False,
+    ).size()
+
+    # count each occurrence
+    df_again_per_assembly = no_sys_assembly_by_size.groupby(
+        "Assembly",
+        as_index=False,
+    ).size()
+    # filter to keep only size > 1 (when == 1 it means that there is only "no system found for an assembly")
+    # so we should keep it
+    df_size_sup_1 = df_again_per_assembly[df_again_per_assembly["size"] > 1]
+    assembly_where_should_remove_no_sys_found = df_size_sup_1["Assembly"].unique()
+
+    # Construct new dataset to remove entries with no system found
+    # while found system on other replicon that belongs to the
+    # same assembly
+    df_filtered_assembly_only_with_sys = df[
+        (df["type"] != "No system found")
+        | ~df.Assembly.isin(assembly_where_should_remove_no_sys_found)
+    ]
+    return df_filtered_assembly_only_with_sys
--- a/packages/df-wiki-cli/df_wiki_cli/meilisearch/__init__.py
+++ b/packages/df-wiki-cli/df_wiki_cli/meilisearch/__init__.py
@@ -38,6 +38,28 @@ class RefSeqCsv(BaseModel):
    species: str


+class RefSeqTaxo(BaseModel):
+    index: int
+    Assembly: str
+    Superkingdom: str
+    phylum: str
+    class_: str = Field(..., alias="class")
+    order: str
+    family: str
+    genus: str
+    species: str
+    size: int
+
+
+class RefSeqTaxoType(RefSeqTaxo):
+    type: str
+
+
+class RefSeqTypeCount(BaseModel):
+    type: str
+    size: int
+
+
 class StructureTypes(str, Enum):
    Validated = "Validated"
    DF = "DF"
@@ -84,28 +106,57 @@ def update_refseqtaxo(host: str, key: str, file: Path, document: str):
    documents = []
    with open(file, "r") as csvfile:
        csvreader = csv.DictReader(csvfile)
-        assembly = {}
        for row in csvreader:
-            assembly_id = row["Assembly"]
-            assembly[row["Assembly"]] = {
-                k: row[k]
-                for k in (
-                    "Superkingdom",
-                    "phylum",
-                    "class",
-                    "order",
-                    "family",
-                    "genus",
-                    "species",
-                    "Assembly",
-                )
-                if k in row
-            }
-            assembly[assembly_id]["Assembly"] = assembly[assembly_id]["Assembly"].split('.')[0]
-        for item in assembly.values():
-            documents.append(item)
+            doc = RefSeqTaxo(**row)
+            documents.append(doc.model_dump(by_alias=True))
    tasks = index.add_documents_in_batches(documents, primary_key="Assembly")
-    print(tasks)
+    for task in tasks:
+        console.print(task)
+    index.update_pagination_settings({"maxTotalHits": 1000000})
+    index.update_filterable_attributes(
+        body=[
+            "Superkingdom",
+            "phylum",
+            "class",
+            "order",
+            "family",
+            "genus",
+            "species",
+            "Assembly",
+        ]
+    )
+    index.update_sortable_attributes(
+        [
+            "Superkingdom",
+            "phylum",
+            "class",
+            "order",
+            "family",
+            "genus",
+            "species",
+            "Assembly",
+            "size",
+        ]
+    )
+    params = {
+        "maxValuesPerFacet": 1000000,
+        "sortFacetValuesBy": {"*": "count"},
+    }
+    index.update_faceting_settings(params)
+
+
+def update_refseqtaxotype(host: str, key: str, file: Path, document: str):
+    client = meilisearch.Client(host, key)
+    index = client.index(document.lower())
+    documents = []
+    with open(file, "r") as csvfile:
+        csvreader = csv.DictReader(csvfile)
+        for row in csvreader:
+            doc = RefSeqTaxoType(**row)
+            documents.append(doc.model_dump(by_alias=True))
+    tasks = index.add_documents_in_batches(documents, primary_key="index")
+    for task in tasks:
+        console.print(task)
    index.update_pagination_settings({"maxTotalHits": 1000000})
    index.update_filterable_attributes(
        body=[
@@ -129,6 +180,35 @@ def update_refseqtaxo(host: str, key: str, file: Path, document: str):
            "genus",
            "species",
            "Assembly",
+            "type",
+            "size",
+        ]
+    )
+    params = {
+        "maxValuesPerFacet": 1000000,
+        "sortFacetValuesBy": {"*": "count"},
+    }
+    index.update_faceting_settings(params)
+
+
+def update_refseqtypecount(host: str, key: str, file: Path, document: str):
+    client = meilisearch.Client(host, key)
+    index = client.index(document.lower())
+    documents = []
+    with open(file, "r") as csvfile:
+        csvreader = csv.DictReader(csvfile)
+        for row in csvreader:
+            doc = RefSeqTypeCount(**row)
+            documents.append(doc.model_dump(by_alias=True))
+    tasks = index.add_documents_in_batches(documents, primary_key="type")
+    for task in tasks:
+        console.print(task)
+    index.update_pagination_settings({"maxTotalHits": 1000000})
+    index.update_filterable_attributes(body=["type"])
+    index.update_sortable_attributes(
+        [
+            "type",
+            "size",
        ]
    )
    params = {
@@ -342,7 +422,6 @@ def update_articles(
    print(attr_task)


-
 def split_on_comma(str_val: str) -> List[str]:
    for val in str_val.split(","):
        yield val.strip()
--- a/packages/df-wiki-cli/df_wiki_cli/meilisearch/main.py
+++ b/packages/df-wiki-cli/df_wiki_cli/meilisearch/main.py
@@ -6,6 +6,8 @@ from df_wiki_cli.meilisearch import (
    update_refseqtaxo,
    update_articles,
    update_refseq,
+    update_refseqtaxotype,
+    update_refseqtypecount,
    update_structure,
    update_systems,
 )
@@ -21,6 +23,9 @@ app = typer.Typer()

 class Documents(str, Enum):
    refseqtaxo = "refseqtaxo"
+    refseqtaxotype = "refseqtaxotype"
+    refseqtypecount = "refseqtypecount"
+    refseqsanitized = "refseqsanitized"
    refseq = "refseq"
    structure = "structure"
    systems = "systems"
@@ -63,6 +68,7 @@ def update(
    ] = Documents.refseq,
    content_type: Annotated[str, typer.Option(help="Content-Type header")] = "text/csv",
 ):
+
    if document == "refseqtaxo":
        update_refseqtaxo(ctx.obj.host, ctx.obj.key, file, document)
    if document == "refseq":
@@ -73,6 +79,12 @@ def update(
        update_systems(ctx.obj.host, ctx.obj.key, file, document)
    if document == "article":
        update_articles(ctx.obj.host, ctx.obj.key, file, document)
+    if document == "refseqtaxotype":
+        update_refseqtaxotype(ctx.obj.host, ctx.obj.key, file, document)
+    if document == "refseqsanitized":
+        update_refseq(ctx.obj.host, ctx.obj.key, file, document)
+    if document == "refseqtypecount":
+        update_refseqtypecount(ctx.obj.host, ctx.obj.key, file, document)


 @app.command()
@@ -97,6 +109,12 @@ def index_update(ctx: typer.Context, index: str, primary_key: str):
    console.print(task)


+@app.command()
+def index_delete(ctx: typer.Context, index: str):
+    client = meilisearch.Client(ctx.obj.host, ctx.obj.key)
+    client.index(index).delete()
+
+
 @app.command()
 def task(ctx: typer.Context, id: str):
    client = meilisearch.Client(ctx.obj.host, ctx.obj.key)
@@ -119,7 +137,9 @@ def get_env_var(
    keys = client.get_keys()

    api_key = [res.key for res in keys.results if res.name == "Default Search API Key"]
+    print(ctx.obj.host)
    if len(api_key) == 1:
        with open(output, "a") as outfile:
-            outfile.write(f"MEILI_HOST={ctx.obj.host}\n")
-            outfile.write(f"MEILI_API_KEY={api_key[0]}\n")
+            print(ctx.obj.host)
+            outfile.write(f'MEILI_HOST="{ctx.obj.host}"\n')
+            outfile.write(f'MEILI_API_KEY="{api_key[0]}"\n')
--- a/packages/df-wiki-cli/pyproject.toml
+++ b/packages/df-wiki-cli/pyproject.toml
 [tool.poetry]
 name = "df-wiki-cli"
-version = "0.1.6"
+version = "0.1.7"
 description = ""
 authors = ["Remi  PLANEL <rplanel@pasteur.fr>"]
 readme = "README.md"

--- a/scripts/fill-local-meilisearch.sh
+++ b/scripts/fill-local-meilisearch.sh
+#!/bin/bash 
+# REFSEQ
+df-wiki-cli meilisearch delete-all-documents refseq
+df-wiki-cli meilisearch update --file ../data/refseq_res.csv --document refseq
+
+
+# REF SEQ TAXO
+df-wiki-cli content refseq-group-per-assembly --input ../data/refseq_res.csv --output /tmp/refseqtaxo.csv
+df-wiki-cli meilisearch delete-all-documents refseqtaxo
+df-wiki-cli meilisearch update --file /tmp/refseqtaxo.csv --document refseqtaxo
+
+# REFSEQ TAXO TYPE
+
+df-wiki-cli content refseq-group-per-assembly-and-type --input ../data/refseq_res.csv --output /tmp/refseqtaxotype.csv
+df-wiki-cli meilisearch delete-all-documents refseqtaxotype
+df-wiki-cli meilisearch  update --file /tmp/refseqtaxotype.csv --document refseqtaxotype
+
+
+# SANITIZED REFSEQ
+df-wiki-cli content refseq-sanitized-hits --input ../data/refseq_res.csv --output /tmp/refseq-sanitized.csv
+
+df-wiki-cli meilisearch delete-all-documents refseqsanitized
+df-wiki-cli meilisearch update --file /tmp/refseq-sanitized.csv --document refseqsanitized 
+
+# systems
+df-wiki-cli content systems --dir ../content/3.defense-systems/ --pfam ../public/pfam-a-hmm.csv --output /tmp/list-systems.json
+df-wiki-cli meilisearch update --file /tmp/list-systems.json --document systems
+
+# STRUCTURE 
+df-wiki-cli meilisearch update --file ../data/all_predictions_statistics_clean.csv --document structure
+
+# ARTICLES
+# df-wiki-cli meilisearch delete-all-documents article
+# df-wiki-cli meilisearch
No results found