diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 5c2af36c8c4560953db0d04004a5332940e3573d..5d3ff5a35d10bbac0aa7d21cae59061a1eba4e7b 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -197,6 +197,12 @@ lint: MEILI_HOST: "http://localhost:7700" script: # - rm data/list-systems.json + - > + df-wiki-cli + meilisearch + --host ${MEILI_HOST} + --key ${MEILI_MASTER_KEY} + delete-all-documents refseq - > df-wiki-cli content systems diff --git a/components/content/RefseqDb.vue b/components/content/RefseqDb.vue index b4ed606d8f64e22f5f611aad9862134b5c14ed49..b5e33f8794361fd0ff9163a2a47703851710ce87 100644 --- a/components/content/RefseqDb.vue +++ b/components/content/RefseqDb.vue @@ -1,4 +1,6 @@ <script setup lang="ts"> +import * as d3 from "d3"; + import * as Plot from "@observablehq/plot"; import PlotFigure from "~/components/PlotFigure"; import { useDisplay } from "vuetify"; @@ -107,6 +109,9 @@ const scaleTypes = ref<string[]>(['linear', 'sqrt', 'log', 'symlog']) const selectedTaxoRank = ref("Superkingdom"); const headers = ref([ + { title: "Assembly", key: "Assembly" }, + + { title: "Replicon", key: "replicon" }, { title: "System", key: "type", @@ -149,10 +154,7 @@ const allHits: Ref<Record<string, any> | undefined> = ref(undefined) const pendingAllHits = ref(false) async function getAllHits(params: { index: string, params: Record<string, any>, query: string }) { - console.log(params.index) if (params.index === toValue(dbName)) { - console.log("get all hits in function ") - console.log(params) pendingAllHits.value = true try { const { data, error } = await useAsyncMeiliSearch(params) @@ -205,6 +207,8 @@ const computedSystemDistribution = computed(() => { }) const computedDistriSystemOptions = computed(() => { + // const toValNormalizePerAssembly = toValue(normalizePerAssembly) + return { ...defaultBarPlotOptions.value, marginBottom: 100, @@ -221,6 +225,15 @@ const computedDistriSystemOptions = computed(() => { }, ), + // Plot.barY( + // toValNormalizePerAssembly, + // Plot.groupX({ y: "sum" }, { + // x: 'system', tip: true, + // sort: { x: "-y" }, + // }), + + + // ), ], }; }); @@ -228,7 +241,8 @@ const computedDistriSystemOptions = computed(() => { // Taxo distri const computedTaxonomyDistribution = computed(() => { - if (toValue(msResult)?.facetDistribution?.[selectedTaxoRank.value]) { + const toValNormalizePerAssembly = toValue(normalizePerAssembly) + if (toValNormalizePerAssembly?.length > 0) { return Object.entries(toValue(msResult).facetDistribution[selectedTaxoRank.value]).map(([key, value]) => { return { [selectedTaxoRank.value]: key, @@ -248,6 +262,7 @@ const computedDistriTaxoOptions = computed(() => { width: computedWidth.value, marks: [ Plot.barY( + toValue(computedTaxonomyDistribution), { y: "count", @@ -307,7 +322,93 @@ const binPlotDataOptions = computed(() => { } : null + // const toValNormalizePerAssembly = toValue(normalizePerAssembly) + // return toValNormalizePerAssembly?.length > 0 ? { + // ...binPlotOptions.value, + // width: width.value, + + // color: { + // ...binPlotOptions.value.color, + // type: scaleType.value, + // tickFormat: '~s', + // ticks: scaleType.value === 'symlog' ? 3 : 5, + // }, + // marks: [ + // Plot.cell( + // toValNormalizePerAssembly ?? [], + // Plot.group({ + // fill: "count" + // }, { + // x: "system", + // y: "rank", + // tip: true, + // inset: 0.5, + // sort: { y: "fill" } + // })), + // ] + + // } : null + +}) + +const systemPerAssemblyPerRank = computed(() => { + const toValueAllHits = toValue(allHits) + console.log("all hits dans system per...") + console.log(toValueAllHits) + if (toValueAllHits && toValueAllHits?.hits?.length > 0) { + return d3.rollup(toValueAllHits.hits, D => D.length, d => d[toValue(selectedTaxoRank)], d => d.Assembly, d => d.type) + } +}) + +const assemblyPerRank = computed(() => { + const toValueAllHits = toValue(allHits) + + if (toValueAllHits && toValueAllHits?.hits?.length > 0) { + return d3.rollup(toValueAllHits.hits, D => D.length, d => d[toValue(selectedTaxoRank)], d => d.Assembly) + } +}) + +const ranks = computed(() => { + + const toValAssemblyPerRank = toValue(assemblyPerRank) + console.log("dans le ranks computed") + console.log(toValAssemblyPerRank) + if (toValAssemblyPerRank && toValAssemblyPerRank?.size > 0) { + console.log(toValAssemblyPerRank.keys()) + + return [...toValAssemblyPerRank.keys()] + } }) + +const normalizePerAssembly = computed(() => { + const toValRanks = toValue(ranks) + const toValAssemblyPerRank = toValue(assemblyPerRank) + const toValSystemPerAssemblyPerRank = toValue(systemPerAssemblyPerRank) + const normalizedSystemCount = [] + + console.log("dans computed normalize per assembly") + console.log(toValRanks) + console.log(toValAssemblyPerRank) + console.log(toValSystemPerAssemblyPerRank) + if (toValRanks && toValAssemblyPerRank && toValSystemPerAssemblyPerRank) { + for (const rank of toValRanks) { + // get list assembly for this rank + console.log(rank) + const assemblies = [...toValAssemblyPerRank.get(rank).keys()] + for (const assembly of assemblies) { + // get list + const countAssemblyPerRank = toValAssemblyPerRank.get(rank).get(assembly) + const systems = toValSystemPerAssemblyPerRank.get(rank).get(assembly).keys() + for (const system of systems) { + const countSystem = toValSystemPerAssemblyPerRank.get(rank).get(assembly).get(system) + normalizedSystemCount.push({ rank, assembly, system, frequency: countSystem / countAssemblyPerRank }) + } + } + } + } + return normalizedSystemCount +}) + const scaleType = ref("linear") const systemsDistributionPlot = ref<ComponentPublicInstance | null>(null) const taxonomicDistributionPlot = ref<ComponentPublicInstance | null>(null) @@ -466,7 +567,6 @@ async function downloadPng(component: ComponentPublicInstance | null, filename: </v-expansion-panel-text> </v-expansion-panel> </v-expansion-panels> - <ServerDbTable title="RefSeq" :sortBy="sortBy" :autocomplete-meili-facets-props="computedAutocompleteMeiliFacetsProps" :data-table-server-props="dataTableServerProps" @refresh:search="(params) => getAllHits(params)"> diff --git a/data/refseq_res.csv b/data/refseq_res.csv index 733fbf04e103186d499d6201405d79378c3ed802..becb696192d60bc89a646284da9d6d86ba120069 100644 --- a/data/refseq_res.csv +++ b/data/refseq_res.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f19dff4f92dd3ca79a66fe6835f71261b9a5264f3df176dd005490dd74aa552d -size 64961046 +oid sha256:1a7d382a7f767718dc48aa49ae3dd9b0159fdffd96e48946c7a167bcbc516deb +size 68772089 diff --git a/packages/df-wiki-cli/df_wiki_cli/content/main.py b/packages/df-wiki-cli/df_wiki_cli/content/main.py index ee09705cf6a667bf650c0267c240a94f86dcedff..17b7865777a7117790fa90495b7ddd602bc903f7 100644 --- a/packages/df-wiki-cli/df_wiki_cli/content/main.py +++ b/packages/df-wiki-cli/df_wiki_cli/content/main.py @@ -270,6 +270,8 @@ def refseq( writer = csv.DictWriter(out, fieldnames=fieldnames) writer.writeheader() for row in reader: + if row["sys_id"] == "": + row["sys_id"] = f'{row["Assembly"]}_{row["replicon"]}' result = re.sub(r"^(\w+)\.\d+(_.*)$", r"\1\2", row["sys_id"]) console.print(f"[green]{row['sys_id']} -> {result}") row["sys_id"] = result diff --git a/packages/df-wiki-cli/df_wiki_cli/meilisearch/__init__.py b/packages/df-wiki-cli/df_wiki_cli/meilisearch/__init__.py index 3479787cd2b5187332b02065e26547b9b5854ccd..2963d14df8773cba9b7dc759ec247fe984bdfca7 100644 --- a/packages/df-wiki-cli/df_wiki_cli/meilisearch/__init__.py +++ b/packages/df-wiki-cli/df_wiki_cli/meilisearch/__init__.py @@ -11,15 +11,22 @@ from rich.console import Console console = Console() +def emptyStringToNone(val: str) -> None | int: + if val == "": + return None + return int(float(val)) + + class RefSeqCsv(BaseModel): sys_id: str + Assembly: str replicon: str type: str subtype: str sys_beg: str sys_end: str protein_in_syst: List[str] - genes_count: int + genes_count: Annotated[int | None, BeforeValidator(emptyStringToNone)] name_of_profiles_in_sys: List[str] accession_in_sys: List[str] Superkingdom: str