Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • mdm-lab/wiki
  • hvaysset/wiki
  • jsousa/wiki
  • tclabby/wiki
4 results
Show changes
Commits on Source (17)
Showing
with 550 additions and 95 deletions
......@@ -192,18 +192,20 @@ lint:
MEILI_HOST: "http://localhost:7700"
script:
# - rm data/list-systems.json
###### REFSEQ
# delete all document refseq
- >
df-wiki-cli
meilisearch
--host ${MEILI_HOST}
--key ${MEILI_MASTER_KEY}
delete-all-documents refseq
# update refseq index
- >
df-wiki-cli
content systems
--dir content/3.defense-systems/
--pfam public/pfam-a-hmm.csv
--output data/list-systems.json
df-wiki-cli meilisearch
--host ${MEILI_HOST}
--key ${MEILI_MASTER_KEY}
index-update refseq sys_id
- >
df-wiki-cli
meilisearch
......@@ -211,23 +213,92 @@ lint:
--key ${MEILI_MASTER_KEY}
update
--file data/refseq_res.csv
--document refseq
--document refseq
# REF SEQ TAXO #
- >
df-wiki-cli
content
refseq-group-per-assembly
--input data/refseq_res.csv
--output data/refseqtaxo.csv
- >
df-wiki-cli
meilisearch
--host ${MEILI_HOST}
--key ${MEILI_MASTER_KEY}
delete-all-documents refseqtaxo
- >
df-wiki-cli meilisearch
--host ${MEILI_HOST}
--key ${MEILI_MASTER_KEY}
index-update refseqtaxo Assembly
- >
df-wiki-cli
meilisearch
--host ${MEILI_HOST}
--key ${MEILI_MASTER_KEY}
update
--file data/refseq_res.csv
--file data/refseqtaxo.csv
--document refseqtaxo
# REFSEQ TAXO TYPE #
- >
df-wiki-cli
content
refseq-group-per-assembly-and-type
--input data/refseq_res.csv
--output data/refseqtaxotype.csv
- >
df-wiki-cli
meilisearch
--host ${MEILI_HOST}
--key ${MEILI_MASTER_KEY}
delete-all-documents refseqtaxotype
- >
df-wiki-cli meilisearch
--host ${MEILI_HOST}
--key ${MEILI_MASTER_KEY}
index-update refseqtaxotype index
- >
df-wiki-cli
meilisearch
--host ${MEILI_HOST}
--key ${MEILI_MASTER_KEY}
update
--file data/all_predictions_statistics_clean.csv
--document structure
--file data/refseqtaxotype.csv
--document refseqtaxotype
# SANITIZED REFSEQ
- >
df-wiki-cli
content
refseq-sanitized-hits
--input data/refseq_res.csv
--output data/refseq-sanitized.csv
- >
df-wiki-cli
meilisearch
--host ${MEILI_HOST}
--key ${MEILI_MASTER_KEY}
delete-all-documents refseqsanitized
- >
df-wiki-cli meilisearch
--host ${MEILI_HOST}
--key ${MEILI_MASTER_KEY}
index-update refseqsanitized index
- >
df-wiki-cli
meilisearch
--host ${MEILI_HOST}
--key ${MEILI_MASTER_KEY}
update
--file data/refseq-sanitized.csv
--document refseqsanitized
# SYSTEMS
- >
df-wiki-cli
content systems
--dir content/3.defense-systems/
--pfam public/pfam-a-hmm.csv
--output data/list-systems.json
- >
df-wiki-cli
meilisearch
......@@ -236,6 +307,16 @@ lint:
update
--file data/list-systems.json
--document systems
# STRUCTURE
- >
df-wiki-cli
meilisearch
--host ${MEILI_HOST}
--key ${MEILI_MASTER_KEY}
update
--file data/all_predictions_statistics_clean.csv
--document structure
# ARTICLES
- >
df-wiki-cli
meilisearch
......@@ -279,12 +360,13 @@ update-meilisearch-index:prod:
stage: get-meili-key
variables:
MEILI_HOST: "http://localhost:7700"
MEILI_HOST_NO_PROTOCOLE: "localhost:7700"
script:
- >
df-wiki-cli
meilisearch
--key "${MEILI_MASTER_KEY}"
--host ${MEILI_HOST}
--host "${MEILI_HOST}"
get-env-var
--output build.env
artifacts:
......@@ -297,6 +379,7 @@ set-meili-env:dev:
extends: .set-meili-env
variables:
MEILI_HOST: "https://${MEILI_HOST_DEV}"
MEILI_HOST_NO_PROTOCOLE: ${MEILI_HOST_DEV}
rules:
- if: $CI_COMMIT_BRANCH != "main"
......@@ -304,6 +387,8 @@ set-meili-env:prod:
extends: .set-meili-env
variables:
MEILI_HOST: "https://${MEILI_HOST_PROD}"
MEILI_HOST_NO_PROTOCOLE: ${MEILI_HOST_PROD}
rules:
- if: $CI_COMMIT_BRANCH == "main"
......@@ -335,7 +420,6 @@ sync-zotero:
- *docker-login
script:
- echo $MEILI_HOST
- echo $MEILI_API_KEY
- >
docker buildx build --pull -t "$CI_REGISTRY_IMAGE/$IMAGE_NAME:$CI_COMMIT_SHORT_SHA"
--build-arg "BASE_URL=$BASE_URL"
......@@ -369,7 +453,7 @@ build:prod:wiki:
# - get-pfam
variables:
BASE_URL: /wiki/
HOST_URL: https://${HOST_PROD}
HOST_URL: "https://${HOST_PROD}"
rules:
- if: $CI_COMMIT_BRANCH == "main"
......@@ -411,7 +495,7 @@ build:prod:wiki:
deploy:dev:
extends: .deploy
rules:
- if: $CI_COMMIT_BRANCH == "dev" || $CI_COMMIT_BRANCH == "rework-references"
- if: $CI_COMMIT_BRANCH == "dev" || $CI_COMMIT_BRANCH == "distri-system-section"
needs:
- "build:dev:wiki"
when: manual
......@@ -436,7 +520,7 @@ deploy:prod:
variables:
NODE_ENV: "production"
KUBE_NAMESPACE: "defense-finder-prod"
PUBLIC_URL: "${HOST_PROD}"
PUBLIC_URL: "https://${HOST_PROD}"
CI_DEBUG_TRACE: "false"
ENV: "production"
environment:
......@@ -520,7 +604,7 @@ create-structures-archives:prod:
variables:
NODE_ENV: "production"
KUBE_NAMESPACE: "defense-finder-prod"
PUBLIC_URL: "${HOST_PROD}"
PUBLIC_URL: "https://${HOST_PROD}"
CI_DEBUG_TRACE: "false"
ENV: "production"
environment:
......
......@@ -36,12 +36,11 @@ const sections = ref([
href: runtimeConfig.public.defenseFinderWebservice,
},
{ id: "wiki", label: "Wiki", to: '/', },
{ id: "refseq", label: "RefSeq DB", to: '/refseq/' },
{ id: "structure", label: "Structures DB", to: '/structure/' },
{ id: "refseq", label: "RefSeq DB", to: '/refseq' },
{ id: "structure", label: "Structures DB", to: '/structure' },
{ id: "help", label: "Help", to: '/help/defensefinder' },
]);
function toggleDrawer() {
emit('update:drawer', !props.drawer)
}
......@@ -63,7 +62,7 @@ function toggleDrawer() {
{{ section.label }}
</v-btn>
<v-btn @click="toggleTheme" color="primary" :icon="theme.global.current.value.dark ? 'md:light_mode' : 'md:dark_mode'
"></v-btn>
"></v-btn>
</template>
<template v-else>
<v-menu>
......
......@@ -17,7 +17,6 @@ export interface Props {
}
const { mobile } = useDisplay()
// const selectedPdb = ref('')
const refinedDataUrls = computed(() => {
function refinedUrl(url: string) {
......@@ -105,7 +104,6 @@ useHead({
})
const pdbeMolstarComponent = ref(null)
// const selectedPdb = ref("/wiki/avs/AVAST_I,AVAST_I__Avs1A,0,V-plddts_85.07081.pdb")
const selectedPdb: Ref<string | null> = ref(null)
const structureToDownload: Ref<string | null> = ref(null)
const selectedPaePath = computed(() => {
......@@ -151,9 +149,6 @@ function setSelectedPdbToFirst() {
}
}
// const moleculeFormat = computed(() => {
// return toValue(selectedPdb)?.split(".")?.[-1]?.toLowerCase() ?? "pdb"
// })
const moleculeFormat: Ref<string> = ref("pdb")
</script>
......@@ -191,7 +186,8 @@ const moleculeFormat: Ref<string> = ref("pdb")
<v-card flat :rounded="false">
<v-toolbar>
<v-toolbar-title>Structures</v-toolbar-title>
<v-select v-model="selectedPdb" label="Select PDB" :items="refinedDataUrls" hide-details="auto"></v-select>
<v-select v-model="selectedPdb" label="Select PDB" :items="refinedDataUrls"
hide-details="auto"></v-select>
<v-spacer></v-spacer>
<v-btn :disabled="!selectedPdb" icon="md:download" :href="structureToDownload"></v-btn>
......@@ -207,15 +203,12 @@ const moleculeFormat: Ref<string> = ref("pdb")
class="d-flex align-center justify-center flex-wrap text-center mx-auto px-4 my-3"
:height="computedHeight" :width="computedWidth" style="position:relative;">
<pdbe-molstar ref="pdbeMolstarComponent" :custom-data-url="selectedPdb" alphafold-view
sequence-panel="true" landscape="false" :custom-data-format="moleculeFormat"></pdbe-molstar>
sequence-panel="true" landscape="false"
:custom-data-format="moleculeFormat"></pdbe-molstar>
</v-sheet>
</v-col>
<v-col v-if="moleculeFormat === 'cif'" :cols="mobile ? 12 : undefined">
<v-img :src="selectedPaePath"></v-img>
<!-- <PlotFigure v-if="sanitizedPaeData?.length > 0 && paeError === null" defer
:options="plotPaeOptions"></PlotFigure>
<v-alert v-else type="warning" variant="tonal">{{ paeError }}</v-alert> -->
<v-card flat color="transparent">
<v-card-title>Model Confidence</v-card-title>
<v-card-text>
......
......@@ -18,7 +18,12 @@ const itemValue = ref("id");
const { width } = useDisplay();
const dbName = ref("refseq")
const taxonomyFacet = ref<Record<string, any> | undefined>(undefined)
const cellPlotMargin = ref({
marginLeft: 150,
marginBottom: 200,
marginTop: 0,
marginRight: 50
})
onBeforeMount(async () => {
......@@ -150,7 +155,7 @@ const availableTaxo: Ref<string[]> = ref([
"Superkingdom"
]);
const scaleTypes = ref<string[]>(['linear', 'sqrt', 'log', 'symlog'])
const scaleTypes = ref<string[]>(['linear', 'sqrt', 'symlog'])
const selectedTaxoRank = ref<"species" | "genus" | "family" | "order" | "class" | "phylum" | "Superkingdom">("Superkingdom");
const headers = ref([
......@@ -190,7 +195,18 @@ const computedWidth = computed(() => {
return Math.max(currentWidth, 550);
});
const cellPlotComputedDimension = computed(() => {
const { marginLeft, marginBottom, marginRight, marginTop } = toValue(cellPlotMargin)
const toValWidth = toValue(width)
const widthFixCell = countSystem.value * 50 + marginLeft + marginRight
const heigthFix = countClade.value * 50 + marginTop + marginBottom
if (widthFixCell > toValWidth) {
return { width: toValWidth - marginLeft - marginRight, height: undefined }
} else {
return { width: widthFixCell, height: heigthFix }
}
})
const allHits: Ref<Record<string, any> | undefined> = ref(undefined)
......@@ -201,24 +217,26 @@ async function getAllHits(params: { index: string, params: Record<string, any>,
selectedTaxoRank.value = "Superkingdom"
}
if (params.index === toValue(dbName)) {
pendingAllHits.value = true
try {
const { data, error } = await useAsyncMeiliSearch({
...params,
params: {
...params.params,
'attributesToRetrieve': ['type', 'Assembly', ...toValue(availableTaxo)]
}
})
allHits.value = data.value
} finally {
pendingAllHits.value = false
// if (params.index === toValue(dbName)) {
}
pendingAllHits.value = true
try {
const { data, error } = await useAsyncMeiliSearch({
...params,
index: "refseqsanitized",
params: {
...params.params,
'attributesToRetrieve': ['type', 'Assembly', ...toValue(availableTaxo)]
}
})
allHits.value = data.value
} finally {
pendingAllHits.value = false
}
// }
}
......@@ -358,29 +376,51 @@ const sortedCellDomain = computed(() => {
}
})
const binPlotOptions = computed(() => {
const { marginLeft, marginBottom } = toValue(cellPlotMargin)
const { height } = toValue(cellPlotComputedDimension)
return {
marginLeft: 150,
marginBottom: 200,
marginLeft,
marginBottom,
padding: 0,
grid: true,
aspectRatio: height ? undefined : 1,
x: { tickRotate: 90, label: "Systems", domain: toValue(sortedCellDomain) },
// y: { tickFormat: 's' },
color: { scheme: "plasma", legend: true, label: `Proportion per ${selectedTaxoRank.value}`, domain: [0, 100] },
}
})
const countSystem = computed(() => {
const toValueAllHits = toValue(allHits)
const data = toValueAllHits?.hits ?? []
const setSystem = new Set(data.map(d => d.type))
return setSystem.size
})
const countClade = computed(() => {
const toValueAllHits = toValue(allHits)
const data = toValueAllHits?.hits ?? []
const setSystem = new Set(data.map(d => d[selectedTaxoRank.value]))
return setSystem.size
})
const binPlotGroup = computed(() => {
return Plot.group(
{
label: (d) => d.fill,
fill: {
/**
*
* @param I is the list of element index that are par of the same group (cell)
* @param X is the list of all elements
*/
reduceIndex: function (I, X) {
const toValTaxonomyFacet = toValue(taxonomyFacet)
if (toValTaxonomyFacet !== undefined) {
const clade = X[I[0]][selectedTaxoRank.value]
const system = X[I[0]].type
// Get the list of item for this group
// Get the list of all the items for this group (same cell)
// and group them per type and assembly
const itemsPerGroup = d3.rollup(I.map(index => {
return X[index]
}), D => D.length, d => d.type, d => d.Assembly)
......@@ -408,22 +448,24 @@ const binPlotGroup = computed(() => {
const binPlotDataOptions = computed(() => {
const toValueAllHits = toValue(allHits)
const toValBinPlotGroup = toValue(binPlotGroup)
const data = toValueAllHits?.hits ?? []
const plotCellMark = Plot.cell(data, toValBinPlotGroup)
const { width, height } = toValue(cellPlotComputedDimension)
const dim = height ? { width, height } : { width }
return toValueAllHits?.hits?.length > 0 ? {
...binPlotOptions.value,
width: width.value,
title: "Proportion of genomes with defense system X for a given clade",
...dim,
title: `Proportion of genomes with defense system X per ${selectedTaxoRank.value} taxonomic rank`,
color: {
...binPlotOptions.value.color,
type: scaleType.value,
tickFormat: '~s',
ticks: scaleType.value === 'symlog' ? 3 : 5,
},
marks: [
Plot.cell(data,
toValue(binPlotGroup)
),
],
marks: [plotCellMark],
} : null
})
......
......@@ -11,7 +11,7 @@ contributors:
Most of the anti-phage defense systems of bacteria can be described as a combination of two main components.
First, a sensing component that detects phage infection to trigger the immune response
(see [defense-systems_trigger](/general-concepts/defense-systems_trigger/)).
(see [defense-systems_trigger](/general-concepts/defense-systems_trigger)).
Second, an effector component that mediates the immune response following the detection of phage infection.
The effector components of anti-phage systems are very diverse, and can be arbitrarily distributed in broad categories :ref{doi=10.1038/s41579-023-00934-x} :
......
Source diff could not be displayed: it is stored in LFS. Options to address this: view the blob.
Source diff could not be displayed: it is stored in LFS. Options to address this: view the blob.
......@@ -35,6 +35,7 @@ securityContext:
runAsNonRoot: true
runAsUser: 101
runAsGroup: 101
allowPrivilegeEscalation: false
# capabilities:
# drop:
# - ALL
......
......@@ -25,6 +25,6 @@ appVersion: "1.16.0"
dependencies:
- name: meilisearch
version: 0.3.0
version: 0.5.0
repository: "https://meilisearch.github.io/meilisearch-kubernetes"
......@@ -5,9 +5,9 @@ services:
context: .
target: dev
args:
BASE_URL: /wiki/
BASE_URL: /wiki
MEILI_HOST: http://localhost:7700
MEILI_API_KEY: f9cc073016cbb392365aae86517878cb3f3408bb85c1fafd06e27f73ccb35e3d
MEILI_API_KEY: f5f5f1bc48e6379fc2509f5bf0aed1fce96c1bbf86e0a194c605b258d7cfe890
HOST_URL: http://localhost:8082
container_name: nuxt
environment:
......@@ -31,7 +31,7 @@ services:
- main
meilisearch:
image: getmeili/meilisearch:v1.5
image: getmeili/meilisearch:v1.7
# command:
# - meilisearch
# - --http-addr
......
......@@ -19,11 +19,12 @@ export default defineNuxtConfig({
"data-domaim": "defense-finder.dev.pasteur.cloud",
src: "https://plausible.pasteur.cloud/js/script.js"
}
]
}
},
router: {
strict: true
},
site: {
url: 'https://defensefinder.mdmlab.fr',
name: 'DefenseFinder webservice and knowledge base',
......
sys_id,Assembly,replicon,type,subtype,sys_beg,sys_end,protein_in_syst,genes_count,name_of_profiles_in_sys,accession_in_sys,Superkingdom,phylum,class,order,family,genus,species
GCF_001602115_NZ_CP014352_AbiE_1,GCF_001602115.1,NZ_CP014352,AbiE,AbiE,GCF_001602115.1_NZ_CP014352_01750,GCF_001602115.1_NZ_CP014352_01751,"GCF_001602115.1_NZ_CP014352_01750, GCF_001602115.1_NZ_CP014352_01751",2.0,"AbiEii__AbiEii, AbiEii__AbiEi_1","WP_062819585.1, WP_062819586.1",Bacteria,Actinomycetota,Actinomycetes,Propionibacteriales,Propionibacteriaceae,Acidipropionibacterium,Acidipropionibacterium acidipropionici
GCF_001602115_NZ_CP014352_Thoeris_II_1,GCF_001602115.1,NZ_CP014352,Thoeris,Thoeris_II,GCF_001602115.1_NZ_CP014352_00483,GCF_001602115.1_NZ_CP014352_00484,"GCF_001602115.1_NZ_CP014352_00483, GCF_001602115.1_NZ_CP014352_00484",2.0,"Thoeris__ThsB_Global, Thoeris_II__ThsA_new_petit","WP_062818945.1, WP_082815879.1",Bacteria,Actinomycetota,Actinomycetes,Propionibacteriales,Propionibacteriaceae,Acidipropionibacterium,Acidipropionibacterium acidipropionici
GCF_001602115_NZ_CP014352_RM_Type_IIG_5,GCF_001602115.1,NZ_CP014352,RM,RM_Type_IIG,GCF_001602115.1_NZ_CP014352_00058,GCF_001602115.1_NZ_CP014352_00058,GCF_001602115.1_NZ_CP014352_00058,1.0,RM_Type_IIG__Type_IIG,WP_062818741.1,Bacteria,Actinomycetota,Actinomycetes,Propionibacteriales,Propionibacteriaceae,Acidipropionibacterium,Acidipropionibacterium acidipropionici
GCF_001602115_NZ_CP014352_RM_Type_I_1,GCF_001602115.1,NZ_CP014352,RM,RM_Type_I,GCF_001602115.1_NZ_CP014352_00060,GCF_001602115.1_NZ_CP014352_00062,"GCF_001602115.1_NZ_CP014352_00060, GCF_001602115.1_NZ_CP014352_00061, GCF_001602115.1_NZ_CP014352_00062",3.0,"RM__Type_I_REases, RM__Type_I_S, RM__Type_I_MTases","WP_062818743.1, WP_062818744.1, WP_062818745.1",Bacteria,Actinomycetota,Actinomycetes,Propionibacteriales,Propionibacteriaceae,Acidipropionibacterium,Acidipropionibacterium acidipropionici
GCF_001602115_NZ_CP014352_RM_Type_IIG_6,GCF_001602115.1,NZ_CP014352,RM,RM_Type_IIG,GCF_001602115.1_NZ_CP014352_00069,GCF_001602115.1_NZ_CP014352_00069,GCF_001602115.1_NZ_CP014352_00069,1.0,RM_Type_IIG__Type_IIG,WP_062818749.1,Bacteria,Actinomycetota,Actinomycetes,Propionibacteriales,Propionibacteriaceae,Acidipropionibacterium,Acidipropionibacterium acidipropionici
GCF_001602115_NZ_CP014352_RM_Type_I_2,GCF_001602115.1,NZ_CP014352,RM,RM_Type_I,GCF_001602115.1_NZ_CP014352_00475,GCF_001602115.1_NZ_CP014352_00477,"GCF_001602115.1_NZ_CP014352_00475, GCF_001602115.1_NZ_CP014352_00476, GCF_001602115.1_NZ_CP014352_00477",3.0,"RM__Type_I_MTases, RM__Type_I_S, RM__Type_I_REases","WP_062818938.1, WP_082815877.1, WP_062818940.1",Bacteria,Actinomycetota,Actinomycetes,Propionibacteriales,Propionibacteriaceae,Acidipropionibacterium,Acidipropionibacterium acidipropionici
GCF_001602115_NZ_CP014352_RM_Type_I_3,GCF_001602115.1,NZ_CP014352,RM,RM_Type_I,GCF_001602115.1_NZ_CP014352_02756,GCF_001602115.1_NZ_CP014352_02758,"GCF_001602115.1_NZ_CP014352_02756, GCF_001602115.1_NZ_CP014352_02757, GCF_001602115.1_NZ_CP014352_02758",3.0,"RM__Type_I_REases, RM__Type_I_S, RM__Type_I_MTases","WP_062820191.1, WP_062820192.1, WP_062820839.1",Bacteria,Actinomycetota,Actinomycetes,Propionibacteriales,Propionibacteriaceae,Acidipropionibacterium,Acidipropionibacterium acidipropionici
GCF_001602115_NZ_CP014352_RM_Type_IIG_7,GCF_001602115.1,NZ_CP014352,RM,RM_Type_IIG,GCF_001602115.1_NZ_CP014352_03005,GCF_001602115.1_NZ_CP014352_03005,GCF_001602115.1_NZ_CP014352_03005,1.0,RM_Type_IIG__Type_IIG,WP_198401420.1,Bacteria,Actinomycetota,Actinomycetes,Propionibacteriales,Propionibacteriaceae,Acidipropionibacterium,Acidipropionibacterium acidipropionici
GCF_001602115_NZ_CP014352_RM_Type_I_4,GCF_001602115.1,NZ_CP014352,RM,RM_Type_I,GCF_001602115.1_NZ_CP014352_03420,GCF_001602115.1_NZ_CP014352_03422,"GCF_001602115.1_NZ_CP014352_03420, GCF_001602115.1_NZ_CP014352_03421, GCF_001602115.1_NZ_CP014352_03422",3.0,"RM__Type_I_REases, RM__Type_I_S, RM__Type_I_MTases","WP_062818743.1, WP_062818744.1, WP_062818745.1",Bacteria,Actinomycetota,Actinomycetes,Propionibacteriales,Propionibacteriaceae,Acidipropionibacterium,Acidipropionibacterium acidipropionici
GCF_001602115_NZ_CP014352_RM_Type_IIG_8,GCF_001602115.1,NZ_CP014352,RM,RM_Type_IIG,GCF_001602115.1_NZ_CP014352_03427,GCF_001602115.1_NZ_CP014352_03427,GCF_001602115.1_NZ_CP014352_03427,1.0,RM_Type_IIG__Type_IIG,WP_157773967.1,Bacteria,Actinomycetota,Actinomycetes,Propionibacteriales,Propionibacteriaceae,Acidipropionibacterium,Acidipropionibacterium acidipropionici
GCF_001602115_NZ_CP014353,GCF_001602115.1,NZ_CP014353,No system found,,,,,,,,Bacteria,Actinomycetota,Actinomycetes,Propionibacteriales,Propionibacteriaceae,Acidipropionibacterium,Acidipropionibacterium acidipropionici
GCF_001602115_NZ_CP014352_CAS_Class1-Subtype-I-G_1,GCF_001602115.1,NZ_CP014352,Cas,CAS_Class1-Subtype-I-G,GCF_001602115.1_NZ_CP014352_01614,GCF_001602115.1_NZ_CP014352_01620,"GCF_001602115_NZ_CP014352_01614, GCF_001602115_NZ_CP014352_01615, GCF_001602115_NZ_CP014352_01616, GCF_001602115_NZ_CP014352_01617, GCF_001602115_NZ_CP014352_01618, GCF_001602115_NZ_CP014352_01619, GCF_001602115_NZ_CP014352_01620",7.0,"DEDDh_I_II_III_IV_V_VI_1, csb1gr7_I-G_1, csb2gr5_I-G_1, cas3_I-G_3, csb3_I-G_1, cas1_I_II_III_IV_V_VI_1, cas2_I_II_III_IV_V_VI_3","WP_198401461.1, WP_062819507.1, WP_062819508.1, WP_062819509.1, WP_062819510.1, WP_062819511.1, WP_062819512.1",Bacteria,Actinomycetota,Actinomycetes,Propionibacteriales,Propionibacteriaceae,Acidipropionibacterium,Acidipropionibacterium acidipropionici
GCF_000830055_NZ_CP010781_Gabija_1,GCF_000830055.1,NZ_CP010781,Gabija,Gabija,GCF_000830055.1_NZ_CP010781_01480,GCF_000830055.1_NZ_CP010781_01481,"GCF_000830055.1_NZ_CP010781_01480, GCF_000830055.1_NZ_CP010781_01481",2.0,"Gabija__GajA, Gabija__GajB_2","WP_005115822.1, WP_000073989.1",Bacteria,Pseudomonadota,Gammaproteobacteria,Moraxellales,Moraxellaceae,Acinetobacter,Acinetobacter baumannii
GCF_000830055_NZ_CP010781_Gao_Qat_1,GCF_000830055.1,NZ_CP010781,Gao_Qat,Gao_Qat,GCF_000830055.1_NZ_CP010781_00952,GCF_000830055.1_NZ_CP010781_00955,"GCF_000830055.1_NZ_CP010781_00952, GCF_000830055.1_NZ_CP010781_00953, GCF_000830055.1_NZ_CP010781_00954, GCF_000830055.1_NZ_CP010781_00955",4.0,"Gao_Qat__QatA, Gao_Qat__QatB, Gao_Qat__QatC, Gao_Qat__QatD","WP_000269396.1, WP_000537345.1, WP_041152179.1, WP_000937120.1",Bacteria,Pseudomonadota,Gammaproteobacteria,Moraxellales,Moraxellaceae,Acinetobacter,Acinetobacter baumannii
GCF_000830055_NZ_CP010781_RosmerTA_1,GCF_000830055.1,NZ_CP010781,RosmerTA,RosmerTA,GCF_000830055.1_NZ_CP010781_00651,GCF_000830055.1_NZ_CP010781_00652,"GCF_000830055.1_NZ_CP010781_00651, GCF_000830055.1_NZ_CP010781_00652",2.0,"RosmerTA__RmrA_2634932349, RosmerTA__RmrT_2634932349","WP_000482796.1, WP_001182927.1",Bacteria,Pseudomonadota,Gammaproteobacteria,Moraxellales,Moraxellaceae,Acinetobacter,Acinetobacter baumannii
GCF_000830055_NZ_CP010781_RM_Type_II_1,GCF_000830055.1,NZ_CP010781,RM,RM_Type_II,GCF_000830055.1_NZ_CP010781_03697,GCF_000830055.1_NZ_CP010781_03698,"GCF_000830055.1_NZ_CP010781_03697, GCF_000830055.1_NZ_CP010781_03698",2.0,"RM_Type_II__Type_II_REases, RM_Type_II__Type_II_MTases","WP_001062713.1, WP_000862934.1",Bacteria,Pseudomonadota,Gammaproteobacteria,Moraxellales,Moraxellaceae,Acinetobacter,Acinetobacter baumannii
GCF_000830055_NZ_CP010782,GCF_000830055.1,NZ_CP010782,No system found,,,,,,,,Bacteria,Pseudomonadota,Gammaproteobacteria,Moraxellales,Moraxellaceae,Acinetobacter,Acinetobacter baumannii
GCF_000830055_NZ_CP010781_CAS_Class1-Subtype-I-F_1,GCF_000830055.1,NZ_CP010781,Cas,CAS_Class1-Subtype-I-F,GCF_000830055.1_NZ_CP010781_02755,GCF_000830055.1_NZ_CP010781_02760,"GCF_000830055_NZ_CP010781_02755, GCF_000830055_NZ_CP010781_02756, GCF_000830055_NZ_CP010781_02757, GCF_000830055_NZ_CP010781_02758, GCF_000830055_NZ_CP010781_02759, GCF_000830055_NZ_CP010781_02760",6.0,"cas6f_I_II_III_IV_V_VI_3, cas7f_I-F_2, cas5f_I-F_4, cas8f_I-F_8, cas3f_I-F_1, cas1_I-F_1","WP_001104789.1, WP_001097003.1, WP_001215684.1, WP_000841022.1, WP_000637362.1, WP_000436801.1",Bacteria,Pseudomonadota,Gammaproteobacteria,Moraxellales,Moraxellaceae,Acinetobacter,Acinetobacter baumannii
......@@ -244,7 +244,7 @@ def pae2png(tsv_file, png_file):
plt.close()
@app.command()
@app.command(help="Remove version from sys_id")
def refseq(
input: Annotated[
Path,
......@@ -276,3 +276,189 @@ def refseq(
console.print(f"[green]{row['sys_id']} -> {result}")
row["sys_id"] = result
writer.writerow(row)
@app.command(
help='Remove "No system found" hits if the are not the only hit for an assembly'
)
def refseq_sanitized_hits(
input: Annotated[
Path,
typer.Option(
exists=False,
file_okay=True,
writable=True,
),
],
output: Annotated[
Path,
typer.Option(
file_okay=True,
dir_okay=False,
writable=True,
resolve_path=True,
),
],
):
df = pd.read_csv(input)
df_final = _sanitized_refseq_hits(df)
df_final.reset_index().to_csv(output, index=False)
return df_final
@app.command(help="Group hits per assembly and types (from 'sanitized-hits')")
def refseq_group_per_assembly_and_type(
input: Annotated[
Path,
typer.Option(
exists=False,
file_okay=True,
writable=True,
),
],
output: Annotated[
Path,
typer.Option(
file_okay=True,
dir_okay=False,
writable=True,
resolve_path=True,
),
],
):
df = pd.read_csv(input)
df_final = _sanitized_refseq_hits(df)
df_final_grouped = df_final.groupby(
[
"Assembly",
"type",
"Superkingdom",
"phylum",
"class",
"order",
"family",
"genus",
"species",
],
as_index=False,
).size()
df_final_grouped.reset_index().to_csv(output, index=False)
@app.command()
def refseq_group_per_assembly(
input: Annotated[
Path,
typer.Option(
exists=False,
file_okay=True,
writable=True,
),
],
output: Annotated[
Path,
typer.Option(
file_okay=True,
dir_okay=False,
writable=True,
resolve_path=True,
),
],
):
df = pd.read_csv(input)
df["Assembly"] = df["Assembly"].apply(remove_version)
df_grouped = df.groupby(
[
"Assembly",
"Superkingdom",
"phylum",
"class",
"order",
"family",
"genus",
"species",
],
as_index=False,
).size()
df_grouped.reset_index().to_csv(output, index=False)
@app.command()
def refseq_type_count(
input: Annotated[
Path,
typer.Option(
exists=False,
file_okay=True,
writable=True,
help="csv file with type and taxo (No system found removed when other system are founded in the same assembly)",
),
],
output: Annotated[
Path,
typer.Option(
file_okay=True,
dir_okay=False,
writable=True,
resolve_path=True,
),
],
):
df = pd.read_csv(input)
grouped_per_type = df.groupby(
["type"],
as_index=False,
).size()
grouped_per_type.reset_index().to_csv(output, index=False)
def remove_version(assembly):
return assembly.split(".")[0]
def _sanitized_refseq_hits(df):
df["Assembly"] = df["Assembly"].apply(remove_version)
# Lower type namesmc
# df["type"] = df["type"].apply(lambda x: x.lower())
# Get all row with no system type
df_no_system = df.loc[df["type"] == "No system found"]
# unique assembly with no sys
serie_assembly_with_no_sys = df_no_system["Assembly"].unique()
# filter assembly to have those with no sys
df_with_no_sys = df[df["Assembly"].isin(serie_assembly_with_no_sys)]
# Group them by assembly, type, taxo
no_sys_assembly_by_size = df_with_no_sys.groupby(
[
"Assembly",
"type",
"Superkingdom",
"phylum",
"class",
"order",
"family",
"genus",
"species",
],
as_index=False,
).size()
# count each occurrence
df_again_per_assembly = no_sys_assembly_by_size.groupby(
"Assembly",
as_index=False,
).size()
# filter to keep only size > 1 (when == 1 it means that there is only "no system found for an assembly")
# so we should keep it
df_size_sup_1 = df_again_per_assembly[df_again_per_assembly["size"] > 1]
assembly_where_should_remove_no_sys_found = df_size_sup_1["Assembly"].unique()
# Construct new dataset to remove entries with no system found
# while found system on other replicon that belongs to the
# same assembly
df_filtered_assembly_only_with_sys = df[
(df["type"] != "No system found")
| ~df.Assembly.isin(assembly_where_should_remove_no_sys_found)
]
return df_filtered_assembly_only_with_sys
......@@ -38,6 +38,28 @@ class RefSeqCsv(BaseModel):
species: str
class RefSeqTaxo(BaseModel):
index: int
Assembly: str
Superkingdom: str
phylum: str
class_: str = Field(..., alias="class")
order: str
family: str
genus: str
species: str
size: int
class RefSeqTaxoType(RefSeqTaxo):
type: str
class RefSeqTypeCount(BaseModel):
type: str
size: int
class StructureTypes(str, Enum):
Validated = "Validated"
DF = "DF"
......@@ -84,28 +106,57 @@ def update_refseqtaxo(host: str, key: str, file: Path, document: str):
documents = []
with open(file, "r") as csvfile:
csvreader = csv.DictReader(csvfile)
assembly = {}
for row in csvreader:
assembly_id = row["Assembly"]
assembly[row["Assembly"]] = {
k: row[k]
for k in (
"Superkingdom",
"phylum",
"class",
"order",
"family",
"genus",
"species",
"Assembly",
)
if k in row
}
assembly[assembly_id]["Assembly"] = assembly[assembly_id]["Assembly"].split('.')[0]
for item in assembly.values():
documents.append(item)
doc = RefSeqTaxo(**row)
documents.append(doc.model_dump(by_alias=True))
tasks = index.add_documents_in_batches(documents, primary_key="Assembly")
print(tasks)
for task in tasks:
console.print(task)
index.update_pagination_settings({"maxTotalHits": 1000000})
index.update_filterable_attributes(
body=[
"Superkingdom",
"phylum",
"class",
"order",
"family",
"genus",
"species",
"Assembly",
]
)
index.update_sortable_attributes(
[
"Superkingdom",
"phylum",
"class",
"order",
"family",
"genus",
"species",
"Assembly",
"size",
]
)
params = {
"maxValuesPerFacet": 1000000,
"sortFacetValuesBy": {"*": "count"},
}
index.update_faceting_settings(params)
def update_refseqtaxotype(host: str, key: str, file: Path, document: str):
client = meilisearch.Client(host, key)
index = client.index(document.lower())
documents = []
with open(file, "r") as csvfile:
csvreader = csv.DictReader(csvfile)
for row in csvreader:
doc = RefSeqTaxoType(**row)
documents.append(doc.model_dump(by_alias=True))
tasks = index.add_documents_in_batches(documents, primary_key="index")
for task in tasks:
console.print(task)
index.update_pagination_settings({"maxTotalHits": 1000000})
index.update_filterable_attributes(
body=[
......@@ -129,6 +180,35 @@ def update_refseqtaxo(host: str, key: str, file: Path, document: str):
"genus",
"species",
"Assembly",
"type",
"size",
]
)
params = {
"maxValuesPerFacet": 1000000,
"sortFacetValuesBy": {"*": "count"},
}
index.update_faceting_settings(params)
def update_refseqtypecount(host: str, key: str, file: Path, document: str):
client = meilisearch.Client(host, key)
index = client.index(document.lower())
documents = []
with open(file, "r") as csvfile:
csvreader = csv.DictReader(csvfile)
for row in csvreader:
doc = RefSeqTypeCount(**row)
documents.append(doc.model_dump(by_alias=True))
tasks = index.add_documents_in_batches(documents, primary_key="type")
for task in tasks:
console.print(task)
index.update_pagination_settings({"maxTotalHits": 1000000})
index.update_filterable_attributes(body=["type"])
index.update_sortable_attributes(
[
"type",
"size",
]
)
params = {
......@@ -342,7 +422,6 @@ def update_articles(
print(attr_task)
def split_on_comma(str_val: str) -> List[str]:
for val in str_val.split(","):
yield val.strip()
......@@ -6,6 +6,8 @@ from df_wiki_cli.meilisearch import (
update_refseqtaxo,
update_articles,
update_refseq,
update_refseqtaxotype,
update_refseqtypecount,
update_structure,
update_systems,
)
......@@ -21,6 +23,9 @@ app = typer.Typer()
class Documents(str, Enum):
refseqtaxo = "refseqtaxo"
refseqtaxotype = "refseqtaxotype"
refseqtypecount = "refseqtypecount"
refseqsanitized = "refseqsanitized"
refseq = "refseq"
structure = "structure"
systems = "systems"
......@@ -63,6 +68,7 @@ def update(
] = Documents.refseq,
content_type: Annotated[str, typer.Option(help="Content-Type header")] = "text/csv",
):
if document == "refseqtaxo":
update_refseqtaxo(ctx.obj.host, ctx.obj.key, file, document)
if document == "refseq":
......@@ -73,6 +79,12 @@ def update(
update_systems(ctx.obj.host, ctx.obj.key, file, document)
if document == "article":
update_articles(ctx.obj.host, ctx.obj.key, file, document)
if document == "refseqtaxotype":
update_refseqtaxotype(ctx.obj.host, ctx.obj.key, file, document)
if document == "refseqsanitized":
update_refseq(ctx.obj.host, ctx.obj.key, file, document)
if document == "refseqtypecount":
update_refseqtypecount(ctx.obj.host, ctx.obj.key, file, document)
@app.command()
......@@ -97,6 +109,12 @@ def index_update(ctx: typer.Context, index: str, primary_key: str):
console.print(task)
@app.command()
def index_delete(ctx: typer.Context, index: str):
client = meilisearch.Client(ctx.obj.host, ctx.obj.key)
client.index(index).delete()
@app.command()
def task(ctx: typer.Context, id: str):
client = meilisearch.Client(ctx.obj.host, ctx.obj.key)
......@@ -119,7 +137,9 @@ def get_env_var(
keys = client.get_keys()
api_key = [res.key for res in keys.results if res.name == "Default Search API Key"]
print(ctx.obj.host)
if len(api_key) == 1:
with open(output, "a") as outfile:
outfile.write(f"MEILI_HOST={ctx.obj.host}\n")
outfile.write(f"MEILI_API_KEY={api_key[0]}\n")
print(ctx.obj.host)
outfile.write(f'MEILI_HOST="{ctx.obj.host}"\n')
outfile.write(f'MEILI_API_KEY="{api_key[0]}"\n')
[tool.poetry]
name = "df-wiki-cli"
version = "0.1.6"
version = "0.1.7"
description = ""
authors = ["Remi PLANEL <rplanel@pasteur.fr>"]
readme = "README.md"
......
#!/bin/bash
# REFSEQ
df-wiki-cli meilisearch delete-all-documents refseq
df-wiki-cli meilisearch update --file ../data/refseq_res.csv --document refseq
# REF SEQ TAXO
df-wiki-cli content refseq-group-per-assembly --input ../data/refseq_res.csv --output /tmp/refseqtaxo.csv
df-wiki-cli meilisearch delete-all-documents refseqtaxo
df-wiki-cli meilisearch update --file /tmp/refseqtaxo.csv --document refseqtaxo
# REFSEQ TAXO TYPE
df-wiki-cli content refseq-group-per-assembly-and-type --input ../data/refseq_res.csv --output /tmp/refseqtaxotype.csv
df-wiki-cli meilisearch delete-all-documents refseqtaxotype
df-wiki-cli meilisearch update --file /tmp/refseqtaxotype.csv --document refseqtaxotype
# SANITIZED REFSEQ
df-wiki-cli content refseq-sanitized-hits --input ../data/refseq_res.csv --output /tmp/refseq-sanitized.csv
df-wiki-cli meilisearch delete-all-documents refseqsanitized
df-wiki-cli meilisearch update --file /tmp/refseq-sanitized.csv --document refseqsanitized
# systems
df-wiki-cli content systems --dir ../content/3.defense-systems/ --pfam ../public/pfam-a-hmm.csv --output /tmp/list-systems.json
df-wiki-cli meilisearch update --file /tmp/list-systems.json --document systems
# STRUCTURE
df-wiki-cli meilisearch update --file ../data/all_predictions_statistics_clean.csv --document structure
# ARTICLES
# df-wiki-cli meilisearch delete-all-documents article
# df-wiki-cli meilisearch