diff --git a/components/content/RefseqDb.vue b/components/content/RefseqDb.vue index 08ca0836885ee5a808d2cccdb5dd6a0ee4119728..91e0d266785346d91290ff0292c9e589d7830f62 100644 --- a/components/content/RefseqDb.vue +++ b/components/content/RefseqDb.vue @@ -25,29 +25,51 @@ onBeforeMount(async () => { index: toValue(dbName), query: "", params: { facets: ["*"], filter: [], - limit: 500000, - sort: ["type:asc"] + page: 1, + hitsPerPage: 25, + } }) autocompleteMeiliFacetsProps.value.facetDistribution = toValue(data)?.facetDistribution - allHitsDb.value = toValue(data)?.hits - + // allHitsDb.value = toValue(data)?.hits + const { data: taxo } = await useAsyncMeiliSearch({ + index: toValue("refseqtaxo"), query: "", params: { + facets: ["*"], + filter: [], + page: 1, + hitsPerPage: 25, + } + }) + taxonomyFacet.value = toValue(taxo)?.facetDistribution }) -const allHitsDb = ref<Record<string, any>[] | undefined>(undefined) +const taxonomyFacet = ref<Record<string, any> | undefined>(undefined) +// const allHitsDb = ref<Record<string, any>[] | undefined>(undefined) onMounted(async () => { + const { data } = await useAsyncMeiliSearch({ index: toValue(dbName), query: "", params: { facets: ["*"], filter: [], - // page: 1, - // hitsPerPage: 25, - limit: 500000, - sort: ["type:asc"] + page: 1, + hitsPerPage: 25, + // limit: 500000, + // sort: ["type:asc"] } }) + autocompleteMeiliFacetsProps.value.facetDistribution = toValue(data)?.facetDistribution - allHitsDb.value = toValue(data)?.hits + // allHitsDb.value = toValue(data)?.hits + + const { data: taxo } = await useAsyncMeiliSearch({ + index: toValue("refseqtaxo"), query: "", params: { + facets: ["*"], + filter: [], + page: 1, + hitsPerPage: 25, + } + }) + taxonomyFacet.value = toValue(taxo)?.facetDistribution }) @@ -104,12 +126,13 @@ const availableTaxo: Ref<string[]> = ref([ "genus", "family", "order", + "class", "phylum", "Superkingdom" ]); const scaleTypes = ref<string[]>(['linear', 'sqrt', 'log', 'symlog']) -const selectedTaxoRank = ref("Superkingdom"); +const selectedTaxoRank = ref<"species" | "genus" | "family" | "order" | "class" | "phylum" | "Superkingdom">("Superkingdom"); const headers = ref([ { @@ -152,12 +175,25 @@ const computedWidth = computed(() => { const allHits: Ref<Record<string, any> | undefined> = ref(undefined) + const pendingAllHits = ref(false) async function getAllHits(params: { index: string, params: Record<string, any>, query: string }) { + if (!params?.params?.filter || params?.params?.filter?.length === 0) { + selectedTaxoRank.value = "Superkingdom" + + } if (params.index === toValue(dbName)) { + pendingAllHits.value = true try { - const { data, error } = await useAsyncMeiliSearch(params) + console.log(params.params.filter) + const { data, error } = await useAsyncMeiliSearch({ + ...params, + params: { + ...params.params, + 'attributesToRetrieve': ['type', 'Assembly', ...toValue(availableTaxo)] + } + }) allHits.value = data.value console.log(error.value) } finally { @@ -303,15 +339,16 @@ const binPlotGroup = computed(() => { fill: { label: `Normalize by ${selectedTaxoRank.value} clade`, reduceIndex: function (I, X) { - const toValAssemblyPerRank = toValue(assemblyPerRank) - if (toValAssemblyPerRank?.size > 0) { + const toValTaxonomyFacet = toValue(taxonomyFacet) + if (toValTaxonomyFacet !== undefined) { const clade = X[I[0]][selectedTaxoRank.value] const system = X[I[0]].type // Get the list of item for this group const itemsPerGroup = d3.rollup(I.map(index => { return X[index] }), D => D.length, d => d.type, d => d.Assembly) - const countForClade = toValAssemblyPerRank.get(clade).size + const countForClade = toValTaxonomyFacet[selectedTaxoRank.value][clade] + console.log(`${itemsPerGroup.get(system).size} / ${countForClade}`) const frequency = (itemsPerGroup.get(system).size / countForClade) * 100 return frequency } @@ -349,15 +386,6 @@ const binPlotDataOptions = computed(() => { } : null }) -const assemblyPerRank = computed(() => { - const toValueAllHits = toValue(allHitsDb) - - if (toValueAllHits && toValueAllHits?.length > 0) { - return d3.rollup(toValueAllHits, D => D.length, d => d[toValue(selectedTaxoRank)], d => d.Assembly) - } -}) - - const scaleType = ref("linear") const systemsDistributionPlot = ref<ComponentPublicInstance | null>(null) diff --git a/data/refseq_res.csv b/data/refseq_res.csv index 85812c477a9bac3da1201f81cff06a539ec0c2ec..becb696192d60bc89a646284da9d6d86ba120069 100644 --- a/data/refseq_res.csv +++ b/data/refseq_res.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4801eea1e299738e8d01b233091f81404410799a6901da74b11131d12b390158 -size 73983918 +oid sha256:1a7d382a7f767718dc48aa49ae3dd9b0159fdffd96e48946c7a167bcbc516deb +size 68772089 diff --git a/packages/df-wiki-cli/df_wiki_cli/meilisearch/__init__.py b/packages/df-wiki-cli/df_wiki_cli/meilisearch/__init__.py index 2963d14df8773cba9b7dc759ec247fe984bdfca7..1111225625f07cbc4584ae571646350cbe00a998 100644 --- a/packages/df-wiki-cli/df_wiki_cli/meilisearch/__init__.py +++ b/packages/df-wiki-cli/df_wiki_cli/meilisearch/__init__.py @@ -78,6 +78,66 @@ class StrucutreStatistics(BaseModel): Foldseek_name: Optional[str] +def update_refseqtaxo(host: str, key: str, file: Path, document: str): + client = meilisearch.Client(host, key) + index = client.index(document.lower()) + documents = [] + with open(file, "r") as csvfile: + csvreader = csv.DictReader(csvfile) + assembly = {} + for row in csvreader: + assembly_id = row["Assembly"] + assembly[row["Assembly"]] = { + k: row[k] + for k in ( + "Superkingdom", + "phylum", + "class", + "order", + "family", + "genus", + "species", + "Assembly", + ) + if k in row + } + assembly[assembly_id]["Assembly"] = assembly[assembly_id]["Assembly"].split('.')[0] + for item in assembly.values(): + documents.append(item) + tasks = index.add_documents_in_batches(documents, primary_key="Assembly") + print(tasks) + index.update_pagination_settings({"maxTotalHits": 1000000}) + index.update_filterable_attributes( + body=[ + "Superkingdom", + "phylum", + "class", + "order", + "family", + "genus", + "species", + "Assembly", + ] + ) + index.update_sortable_attributes( + [ + "Superkingdom", + "phylum", + "class", + "order", + "family", + "genus", + "species", + "Assembly", + ] + ) + params = { + "maxValuesPerFacet": 1000000, + "sortFacetValuesBy": {"*": "count"}, + } + index.update_faceting_settings(params) + + def update_refseq( host: str, key: str, @@ -104,10 +164,12 @@ def update_refseq( index.update_filterable_attributes( body=[ "replicon", + "Assembly", "type", "subtype", "Superkingdom", "phylum", + "class", "order", "family", "genus", @@ -117,10 +179,12 @@ def update_refseq( index.update_sortable_attributes( [ "replicon", + "Assembly", "type", "subtype", "Superkingdom", "phylum", + "class", "order", "family", "genus", diff --git a/packages/df-wiki-cli/df_wiki_cli/meilisearch/main.py b/packages/df-wiki-cli/df_wiki_cli/meilisearch/main.py index e7e3037766ce72d268bfdae830e4e52de3d34b03..f32ac8c3af221457efe03522b7e0bdf206f0c610 100644 --- a/packages/df-wiki-cli/df_wiki_cli/meilisearch/main.py +++ b/packages/df-wiki-cli/df_wiki_cli/meilisearch/main.py @@ -2,7 +2,12 @@ import typer import meilisearch from typing_extensions import Annotated from pathlib import Path -from df_wiki_cli.meilisearch import update_refseq, update_structure, update_systems +from df_wiki_cli.meilisearch import ( + update_refseq, + update_structure, + update_systems, + update_refseqtaxo, +) from enum import Enum from types import SimpleNamespace from rich.console import Console @@ -14,6 +19,7 @@ app = typer.Typer() class Documents(str, Enum): + refseqtaxo = "refseqtaxo" refseq = "refseq" structure = "structure" systems = "systems" @@ -55,6 +61,8 @@ def update( ] = Documents.refseq, content_type: Annotated[str, typer.Option(help="Content-Type header")] = "text/csv", ): + if document == "refseqtaxo": + update_refseqtaxo(ctx.obj.host, ctx.obj.key, file, document) if document == "refseq": update_refseq(ctx.obj.host, ctx.obj.key, file, document) if document == "structure":