Skip to content
Snippets Groups Projects
Commit da592497 authored by Remi  PLANEL's avatar Remi PLANEL
Browse files

use taxo facets to normalize heatmap frequencies

parent 70cc101c
No related branches found
No related tags found
1 merge request!222Refseq no sys
Pipeline #123624 waiting for manual action with stages
in 7 minutes
...@@ -25,29 +25,51 @@ onBeforeMount(async () => { ...@@ -25,29 +25,51 @@ onBeforeMount(async () => {
index: toValue(dbName), query: "", params: { index: toValue(dbName), query: "", params: {
facets: ["*"], facets: ["*"],
filter: [], filter: [],
limit: 500000, page: 1,
sort: ["type:asc"] hitsPerPage: 25,
} }
}) })
autocompleteMeiliFacetsProps.value.facetDistribution = toValue(data)?.facetDistribution autocompleteMeiliFacetsProps.value.facetDistribution = toValue(data)?.facetDistribution
allHitsDb.value = toValue(data)?.hits // allHitsDb.value = toValue(data)?.hits
const { data: taxo } = await useAsyncMeiliSearch({
index: toValue("refseqtaxo"), query: "", params: {
facets: ["*"],
filter: [],
page: 1,
hitsPerPage: 25,
}
})
taxonomyFacet.value = toValue(taxo)?.facetDistribution
}) })
const allHitsDb = ref<Record<string, any>[] | undefined>(undefined) const taxonomyFacet = ref<Record<string, any> | undefined>(undefined)
// const allHitsDb = ref<Record<string, any>[] | undefined>(undefined)
onMounted(async () => { onMounted(async () => {
const { data } = await useAsyncMeiliSearch({ const { data } = await useAsyncMeiliSearch({
index: toValue(dbName), query: "", params: { index: toValue(dbName), query: "", params: {
facets: ["*"], facets: ["*"],
filter: [], filter: [],
// page: 1, page: 1,
// hitsPerPage: 25, hitsPerPage: 25,
limit: 500000, // limit: 500000,
sort: ["type:asc"] // sort: ["type:asc"]
} }
}) })
autocompleteMeiliFacetsProps.value.facetDistribution = toValue(data)?.facetDistribution autocompleteMeiliFacetsProps.value.facetDistribution = toValue(data)?.facetDistribution
allHitsDb.value = toValue(data)?.hits // allHitsDb.value = toValue(data)?.hits
const { data: taxo } = await useAsyncMeiliSearch({
index: toValue("refseqtaxo"), query: "", params: {
facets: ["*"],
filter: [],
page: 1,
hitsPerPage: 25,
}
})
taxonomyFacet.value = toValue(taxo)?.facetDistribution
}) })
...@@ -104,12 +126,13 @@ const availableTaxo: Ref<string[]> = ref([ ...@@ -104,12 +126,13 @@ const availableTaxo: Ref<string[]> = ref([
"genus", "genus",
"family", "family",
"order", "order",
"class",
"phylum", "phylum",
"Superkingdom" "Superkingdom"
]); ]);
const scaleTypes = ref<string[]>(['linear', 'sqrt', 'log', 'symlog']) const scaleTypes = ref<string[]>(['linear', 'sqrt', 'log', 'symlog'])
const selectedTaxoRank = ref("Superkingdom"); const selectedTaxoRank = ref<"species" | "genus" | "family" | "order" | "class" | "phylum" | "Superkingdom">("Superkingdom");
const headers = ref([ const headers = ref([
{ {
...@@ -152,12 +175,25 @@ const computedWidth = computed(() => { ...@@ -152,12 +175,25 @@ const computedWidth = computed(() => {
const allHits: Ref<Record<string, any> | undefined> = ref(undefined) const allHits: Ref<Record<string, any> | undefined> = ref(undefined)
const pendingAllHits = ref(false) const pendingAllHits = ref(false)
async function getAllHits(params: { index: string, params: Record<string, any>, query: string }) { async function getAllHits(params: { index: string, params: Record<string, any>, query: string }) {
if (!params?.params?.filter || params?.params?.filter?.length === 0) {
selectedTaxoRank.value = "Superkingdom"
}
if (params.index === toValue(dbName)) { if (params.index === toValue(dbName)) {
pendingAllHits.value = true pendingAllHits.value = true
try { try {
const { data, error } = await useAsyncMeiliSearch(params) console.log(params.params.filter)
const { data, error } = await useAsyncMeiliSearch({
...params,
params: {
...params.params,
'attributesToRetrieve': ['type', 'Assembly', ...toValue(availableTaxo)]
}
})
allHits.value = data.value allHits.value = data.value
console.log(error.value) console.log(error.value)
} finally { } finally {
...@@ -303,15 +339,16 @@ const binPlotGroup = computed(() => { ...@@ -303,15 +339,16 @@ const binPlotGroup = computed(() => {
fill: { fill: {
label: `Normalize by ${selectedTaxoRank.value} clade`, label: `Normalize by ${selectedTaxoRank.value} clade`,
reduceIndex: function (I, X) { reduceIndex: function (I, X) {
const toValAssemblyPerRank = toValue(assemblyPerRank) const toValTaxonomyFacet = toValue(taxonomyFacet)
if (toValAssemblyPerRank?.size > 0) { if (toValTaxonomyFacet !== undefined) {
const clade = X[I[0]][selectedTaxoRank.value] const clade = X[I[0]][selectedTaxoRank.value]
const system = X[I[0]].type const system = X[I[0]].type
// Get the list of item for this group // Get the list of item for this group
const itemsPerGroup = d3.rollup(I.map(index => { const itemsPerGroup = d3.rollup(I.map(index => {
return X[index] return X[index]
}), D => D.length, d => d.type, d => d.Assembly) }), D => D.length, d => d.type, d => d.Assembly)
const countForClade = toValAssemblyPerRank.get(clade).size const countForClade = toValTaxonomyFacet[selectedTaxoRank.value][clade]
console.log(`${itemsPerGroup.get(system).size} / ${countForClade}`)
const frequency = (itemsPerGroup.get(system).size / countForClade) * 100 const frequency = (itemsPerGroup.get(system).size / countForClade) * 100
return frequency return frequency
} }
...@@ -349,15 +386,6 @@ const binPlotDataOptions = computed(() => { ...@@ -349,15 +386,6 @@ const binPlotDataOptions = computed(() => {
} : null } : null
}) })
const assemblyPerRank = computed(() => {
const toValueAllHits = toValue(allHitsDb)
if (toValueAllHits && toValueAllHits?.length > 0) {
return d3.rollup(toValueAllHits, D => D.length, d => d[toValue(selectedTaxoRank)], d => d.Assembly)
}
})
const scaleType = ref("linear") const scaleType = ref("linear")
const systemsDistributionPlot = ref<ComponentPublicInstance | null>(null) const systemsDistributionPlot = ref<ComponentPublicInstance | null>(null)
......
source diff could not be displayed: it is stored in LFS. Options to address this: view the blob.
...@@ -78,6 +78,66 @@ class StrucutreStatistics(BaseModel): ...@@ -78,6 +78,66 @@ class StrucutreStatistics(BaseModel):
Foldseek_name: Optional[str] Foldseek_name: Optional[str]
def update_refseqtaxo(host: str, key: str, file: Path, document: str):
client = meilisearch.Client(host, key)
index = client.index(document.lower())
documents = []
with open(file, "r") as csvfile:
csvreader = csv.DictReader(csvfile)
assembly = {}
for row in csvreader:
assembly_id = row["Assembly"]
assembly[row["Assembly"]] = {
k: row[k]
for k in (
"Superkingdom",
"phylum",
"class",
"order",
"family",
"genus",
"species",
"Assembly",
)
if k in row
}
assembly[assembly_id]["Assembly"] = assembly[assembly_id]["Assembly"].split('.')[0]
for item in assembly.values():
documents.append(item)
tasks = index.add_documents_in_batches(documents, primary_key="Assembly")
print(tasks)
index.update_pagination_settings({"maxTotalHits": 1000000})
index.update_filterable_attributes(
body=[
"Superkingdom",
"phylum",
"class",
"order",
"family",
"genus",
"species",
"Assembly",
]
)
index.update_sortable_attributes(
[
"Superkingdom",
"phylum",
"class",
"order",
"family",
"genus",
"species",
"Assembly",
]
)
params = {
"maxValuesPerFacet": 1000000,
"sortFacetValuesBy": {"*": "count"},
}
index.update_faceting_settings(params)
def update_refseq( def update_refseq(
host: str, host: str,
key: str, key: str,
...@@ -104,10 +164,12 @@ def update_refseq( ...@@ -104,10 +164,12 @@ def update_refseq(
index.update_filterable_attributes( index.update_filterable_attributes(
body=[ body=[
"replicon", "replicon",
"Assembly",
"type", "type",
"subtype", "subtype",
"Superkingdom", "Superkingdom",
"phylum", "phylum",
"class",
"order", "order",
"family", "family",
"genus", "genus",
...@@ -117,10 +179,12 @@ def update_refseq( ...@@ -117,10 +179,12 @@ def update_refseq(
index.update_sortable_attributes( index.update_sortable_attributes(
[ [
"replicon", "replicon",
"Assembly",
"type", "type",
"subtype", "subtype",
"Superkingdom", "Superkingdom",
"phylum", "phylum",
"class",
"order", "order",
"family", "family",
"genus", "genus",
......
...@@ -2,7 +2,12 @@ import typer ...@@ -2,7 +2,12 @@ import typer
import meilisearch import meilisearch
from typing_extensions import Annotated from typing_extensions import Annotated
from pathlib import Path from pathlib import Path
from df_wiki_cli.meilisearch import update_refseq, update_structure, update_systems from df_wiki_cli.meilisearch import (
update_refseq,
update_structure,
update_systems,
update_refseqtaxo,
)
from enum import Enum from enum import Enum
from types import SimpleNamespace from types import SimpleNamespace
from rich.console import Console from rich.console import Console
...@@ -14,6 +19,7 @@ app = typer.Typer() ...@@ -14,6 +19,7 @@ app = typer.Typer()
class Documents(str, Enum): class Documents(str, Enum):
refseqtaxo = "refseqtaxo"
refseq = "refseq" refseq = "refseq"
structure = "structure" structure = "structure"
systems = "systems" systems = "systems"
...@@ -55,6 +61,8 @@ def update( ...@@ -55,6 +61,8 @@ def update(
] = Documents.refseq, ] = Documents.refseq,
content_type: Annotated[str, typer.Option(help="Content-Type header")] = "text/csv", content_type: Annotated[str, typer.Option(help="Content-Type header")] = "text/csv",
): ):
if document == "refseqtaxo":
update_refseqtaxo(ctx.obj.host, ctx.obj.key, file, document)
if document == "refseq": if document == "refseq":
update_refseq(ctx.obj.host, ctx.obj.key, file, document) update_refseq(ctx.obj.host, ctx.obj.key, file, document)
if document == "structure": if document == "structure":
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment