Skip to content
Snippets Groups Projects
Commit 6bf7b037 authored by Remi  PLANEL's avatar Remi PLANEL
Browse files

WIP: generate system stat and distri

parent b5962ec2
No related branches found
No related tags found
1 merge request!231System distribution plot edit article
<script setup lang="ts">
import { toValue } from '@vueuse/core';
import * as d3 from "d3";
import * as Plot from "@observablehq/plot";
import { useDisplay } from "vuetify";
const { width } = useDisplay();
const systemHits = ref(undefined)
const refseqTaxo = ref(undefined)
const marginRight = ref(50)
const selectedTaxoRank = ref("phylum")
const taxoRanks: Ref<string[]> = ref([
"species",
"genus",
"family",
"order",
"class",
"phylum",
"Superkingdom"
]);
const { page } = useContent();
// get the structures
const msIndexName = ref<string>("refseqsanitized")
onBeforeMount(() => {
fetchSystemHits()
fetchRefSeqTaxo()
})
onMounted(() => {
fetchSystemHits()
fetchRefSeqTaxo()
})
const computedWidth = computed(() => {
const screenWidth = toValue(width) > 1280 ? 1280 : toValue(width)
return Math.max(screenWidth, 550);
});
const computedDistribution = computed(() => {
const toValSelectedTaxoRank = toValue(selectedTaxoRank)
const toValSystemHits = toValue(systemHits)
const toValRefseqTaxo = toValue(refseqTaxo)
if (toValSystemHits?.hits && toValSelectedTaxoRank && toValRefseqTaxo?.facetDistribution) {
console.log(toValRefseqTaxo)
const toValFacetsPerRank = toValRefseqTaxo.facetDistribution?.[toValSelectedTaxoRank]
// group per selected taxo rank and accession
const itemsPerGroup = d3.rollup(toValSystemHits.hits, D => D.length, d => d[toValSelectedTaxoRank], d => d.Assembly)
console.log(itemsPerGroup)
if (toValSelectedTaxoRank === "order"){
console.log(itemsPerGroup.get("Oscillatoriales"))
}
const distribution = []
for (const [taxo, values] of itemsPerGroup.entries()) {
console.log(toValFacetsPerRank[taxo])
if (toValFacetsPerRank[taxo] && toValFacetsPerRank[taxo] > 0) {
distribution.push({ taxo, size: (values.size / toValFacetsPerRank[taxo]) * 100 })
}
}
return distribution
}
return []
})
// const totalGenome = computed(() => {
// refseqTaxo?.estimatedTotalHits
// })
const systemStatistics = computed(() => {
const toValSystemHits = toValue(systemHits)
const toValRefseqTaxo = toValue(refseqTaxo)
let statistics: Record<string, number | undefined> = { totalGenome: undefined, genomeWithSystem: undefined, speciesWithSystem: undefined, percentGenome: undefined }
if (toValSystemHits?.facetDistribution) {
statistics = {
...statistics,
genomeWithSystem: Object.entries(toValSystemHits.facetDistribution.Assembly)?.length,
speciesWithSystem: Object.entries(toValSystemHits.facetDistribution.species)?.length,
}
}
if (statistics.genomeWithSystem !== undefined && toValRefseqTaxo?.estimatedTotalHits) {
statistics = {
...statistics,
totalGenome: toValRefseqTaxo.estimatedTotalHits,
percentGenome: (statistics.genomeWithSystem / toValRefseqTaxo.estimatedTotalHits) * 100
}
}
return statistics
})
const distributionOptions = computed(() => {
return {
marginBottom: 100,
marginRight: marginRight.value,
y: { label: `% of genomes encoding ${toValue(page)?.title ?? 'the system'}` },
x: { label: selectedTaxoRank.value, tickRotate: 45 },
width: computedWidth.value - marginRight.value,
marks: [
Plot.barY(
toValue(computedDistribution),
{
y: "size",
x: "taxo",
tip: true,
sort: { x: "-y" },
},
),
],
};
})
console.log(computedDistribution)
// =================================================
// ASYNC PART
// =================================================
async function fetchSystemHits() {
const { data, error } = await useAsyncMeiliSearch({
index: toValue(msIndexName),
query: "",
params: {
facets: ["*"],
filter: [`type='${toValue(page).title}'`],
limit: 500000,
}
})
systemHits.value = data.value
if (error.value) {
throw createError(`Cannot get hits on refseq for system: ${page.title}`)
}
}
async function fetchRefSeqTaxo() {
const { data, error } = await useAsyncMeiliSearch({
index: "refseqtaxo",
query: "",
params: {
facets: ["*"],
filter: [],
limit: 1,
}
})
refseqTaxo.value = data.value
if (error.value) {
throw createError(`Cannot get refseq taxo: ${page.title}`)
}
}
</script>
<template>
<v-card flat>
<v-select v-model="selectedTaxoRank" :items="taxoRanks" density="compact" label="Select taxonomic rank"
hide-details="auto" class="mx-2"></v-select>
<v-card-text>
Among the {{ d3.format(",")(systemStatistics.totalGenome) }} complete genomes of RefSeq, the {{ page.title
}} is
detected in {{ d3.format(",")(systemStatistics.genomeWithSystem) }} genomes ({{
d3.format(".2f")(systemStatistics.percentGenome) }} %).
The system was detected in {{ d3.format(",")(systemStatistics.speciesWithSystem) }} different species.
</v-card-text>
<PlotFigure ref="systemsDistributionPlot" :options="unref(distributionOptions)" defer></PlotFigure>
</v-card>
</template>
\ No newline at end of file
......@@ -60,6 +60,10 @@ Proportion of genome encoding the AbiA system for the 14 phyla with more than 50
::article-system-distribution-plot
::
## Structure
### AbiA_large
##### Example 1
......
......@@ -73,6 +73,10 @@ The system was detected in 366 different species.
Proportion of genome encoding the Avs system for the 14 phyla with more than 50 genomes in the RefSeq database.
::article-system-distribution-plot
::
## Structure
### AVAST_I
##### Example 1
......
......@@ -70,6 +70,9 @@ The system was detected in 6137 different species.
Proportion of genome encoding the RM system for the 14 phyla with more than 50 genomes in the RefSeq database.
::article-system-distribution-plot
::
## Structure
### Experimentaly determined structure
......
......@@ -341,6 +341,7 @@ def refseq_group_per_assembly_and_type(
"species",
],
as_index=False,
dropna=False
).size()
df_final_grouped.reset_index().to_csv(output, index=False)
......@@ -380,6 +381,7 @@ def refseq_group_per_assembly(
"species",
],
as_index=False,
dropna=False,
).size()
df_grouped.reset_index().to_csv(output, index=False)
......@@ -406,10 +408,7 @@ def refseq_type_count(
],
):
df = pd.read_csv(input)
grouped_per_type = df.groupby(
["type"],
as_index=False,
).size()
grouped_per_type = df.groupby(["type"], as_index=False, dropna=False).size()
grouped_per_type.reset_index().to_csv(output, index=False)
......@@ -442,12 +441,14 @@ def _sanitized_refseq_hits(df):
"species",
],
as_index=False,
dropna=False,
).size()
# count each occurrence
df_again_per_assembly = no_sys_assembly_by_size.groupby(
"Assembly",
as_index=False,
dropna=False
).size()
# filter to keep only size > 1 (when == 1 it means that there is only "no system found for an assembly")
# so we should keep it
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment