Skip to content
Snippets Groups Projects
Commit 6bf7b037 authored by Remi  PLANEL's avatar Remi PLANEL
Browse files

WIP: generate system stat and distri

parent b5962ec2
No related branches found
No related tags found
1 merge request!231System distribution plot edit article
<script setup lang="ts">
import { toValue } from '@vueuse/core';
import * as d3 from "d3";
import * as Plot from "@observablehq/plot";
import { useDisplay } from "vuetify";
const { width } = useDisplay();
const systemHits = ref(undefined)
const refseqTaxo = ref(undefined)
const marginRight = ref(50)
const selectedTaxoRank = ref("phylum")
const taxoRanks: Ref<string[]> = ref([
"species",
"genus",
"family",
"order",
"class",
"phylum",
"Superkingdom"
]);
const { page } = useContent();
// get the structures
const msIndexName = ref<string>("refseqsanitized")
onBeforeMount(() => {
fetchSystemHits()
fetchRefSeqTaxo()
})
onMounted(() => {
fetchSystemHits()
fetchRefSeqTaxo()
})
const computedWidth = computed(() => {
const screenWidth = toValue(width) > 1280 ? 1280 : toValue(width)
return Math.max(screenWidth, 550);
});
const computedDistribution = computed(() => {
const toValSelectedTaxoRank = toValue(selectedTaxoRank)
const toValSystemHits = toValue(systemHits)
const toValRefseqTaxo = toValue(refseqTaxo)
if (toValSystemHits?.hits && toValSelectedTaxoRank && toValRefseqTaxo?.facetDistribution) {
console.log(toValRefseqTaxo)
const toValFacetsPerRank = toValRefseqTaxo.facetDistribution?.[toValSelectedTaxoRank]
// group per selected taxo rank and accession
const itemsPerGroup = d3.rollup(toValSystemHits.hits, D => D.length, d => d[toValSelectedTaxoRank], d => d.Assembly)
console.log(itemsPerGroup)
if (toValSelectedTaxoRank === "order"){
console.log(itemsPerGroup.get("Oscillatoriales"))
}
const distribution = []
for (const [taxo, values] of itemsPerGroup.entries()) {
console.log(toValFacetsPerRank[taxo])
if (toValFacetsPerRank[taxo] && toValFacetsPerRank[taxo] > 0) {
distribution.push({ taxo, size: (values.size / toValFacetsPerRank[taxo]) * 100 })
}
}
return distribution
}
return []
})
// const totalGenome = computed(() => {
// refseqTaxo?.estimatedTotalHits
// })
const systemStatistics = computed(() => {
const toValSystemHits = toValue(systemHits)
const toValRefseqTaxo = toValue(refseqTaxo)
let statistics: Record<string, number | undefined> = { totalGenome: undefined, genomeWithSystem: undefined, speciesWithSystem: undefined, percentGenome: undefined }
if (toValSystemHits?.facetDistribution) {
statistics = {
...statistics,
genomeWithSystem: Object.entries(toValSystemHits.facetDistribution.Assembly)?.length,
speciesWithSystem: Object.entries(toValSystemHits.facetDistribution.species)?.length,
}
}
if (statistics.genomeWithSystem !== undefined && toValRefseqTaxo?.estimatedTotalHits) {
statistics = {
...statistics,
totalGenome: toValRefseqTaxo.estimatedTotalHits,
percentGenome: (statistics.genomeWithSystem / toValRefseqTaxo.estimatedTotalHits) * 100
}
}
return statistics
})
const distributionOptions = computed(() => {
return {
marginBottom: 100,
marginRight: marginRight.value,
y: { label: `% of genomes encoding ${toValue(page)?.title ?? 'the system'}` },
x: { label: selectedTaxoRank.value, tickRotate: 45 },
width: computedWidth.value - marginRight.value,
marks: [
Plot.barY(
toValue(computedDistribution),
{
y: "size",
x: "taxo",
tip: true,
sort: { x: "-y" },
},
),
],
};
})
console.log(computedDistribution)
// =================================================
// ASYNC PART
// =================================================
async function fetchSystemHits() {
const { data, error } = await useAsyncMeiliSearch({
index: toValue(msIndexName),
query: "",
params: {
facets: ["*"],
filter: [`type='${toValue(page).title}'`],
limit: 500000,
}
})
systemHits.value = data.value
if (error.value) {
throw createError(`Cannot get hits on refseq for system: ${page.title}`)
}
}
async function fetchRefSeqTaxo() {
const { data, error } = await useAsyncMeiliSearch({
index: "refseqtaxo",
query: "",
params: {
facets: ["*"],
filter: [],
limit: 1,
}
})
refseqTaxo.value = data.value
if (error.value) {
throw createError(`Cannot get refseq taxo: ${page.title}`)
}
}
</script>
<template>
<v-card flat>
<v-select v-model="selectedTaxoRank" :items="taxoRanks" density="compact" label="Select taxonomic rank"
hide-details="auto" class="mx-2"></v-select>
<v-card-text>
Among the {{ d3.format(",")(systemStatistics.totalGenome) }} complete genomes of RefSeq, the {{ page.title
}} is
detected in {{ d3.format(",")(systemStatistics.genomeWithSystem) }} genomes ({{
d3.format(".2f")(systemStatistics.percentGenome) }} %).
The system was detected in {{ d3.format(",")(systemStatistics.speciesWithSystem) }} different species.
</v-card-text>
<PlotFigure ref="systemsDistributionPlot" :options="unref(distributionOptions)" defer></PlotFigure>
</v-card>
</template>
\ No newline at end of file
...@@ -60,6 +60,10 @@ Proportion of genome encoding the AbiA system for the 14 phyla with more than 50 ...@@ -60,6 +60,10 @@ Proportion of genome encoding the AbiA system for the 14 phyla with more than 50
::article-system-distribution-plot
::
## Structure ## Structure
### AbiA_large ### AbiA_large
##### Example 1 ##### Example 1
......
...@@ -73,6 +73,10 @@ The system was detected in 366 different species. ...@@ -73,6 +73,10 @@ The system was detected in 366 different species.
Proportion of genome encoding the Avs system for the 14 phyla with more than 50 genomes in the RefSeq database. Proportion of genome encoding the Avs system for the 14 phyla with more than 50 genomes in the RefSeq database.
::article-system-distribution-plot
::
## Structure ## Structure
### AVAST_I ### AVAST_I
##### Example 1 ##### Example 1
......
...@@ -70,6 +70,9 @@ The system was detected in 6137 different species. ...@@ -70,6 +70,9 @@ The system was detected in 6137 different species.
Proportion of genome encoding the RM system for the 14 phyla with more than 50 genomes in the RefSeq database. Proportion of genome encoding the RM system for the 14 phyla with more than 50 genomes in the RefSeq database.
::article-system-distribution-plot
::
## Structure ## Structure
### Experimentaly determined structure ### Experimentaly determined structure
......
...@@ -341,6 +341,7 @@ def refseq_group_per_assembly_and_type( ...@@ -341,6 +341,7 @@ def refseq_group_per_assembly_and_type(
"species", "species",
], ],
as_index=False, as_index=False,
dropna=False
).size() ).size()
df_final_grouped.reset_index().to_csv(output, index=False) df_final_grouped.reset_index().to_csv(output, index=False)
...@@ -380,6 +381,7 @@ def refseq_group_per_assembly( ...@@ -380,6 +381,7 @@ def refseq_group_per_assembly(
"species", "species",
], ],
as_index=False, as_index=False,
dropna=False,
).size() ).size()
df_grouped.reset_index().to_csv(output, index=False) df_grouped.reset_index().to_csv(output, index=False)
...@@ -406,10 +408,7 @@ def refseq_type_count( ...@@ -406,10 +408,7 @@ def refseq_type_count(
], ],
): ):
df = pd.read_csv(input) df = pd.read_csv(input)
grouped_per_type = df.groupby( grouped_per_type = df.groupby(["type"], as_index=False, dropna=False).size()
["type"],
as_index=False,
).size()
grouped_per_type.reset_index().to_csv(output, index=False) grouped_per_type.reset_index().to_csv(output, index=False)
...@@ -442,12 +441,14 @@ def _sanitized_refseq_hits(df): ...@@ -442,12 +441,14 @@ def _sanitized_refseq_hits(df):
"species", "species",
], ],
as_index=False, as_index=False,
dropna=False,
).size() ).size()
# count each occurrence # count each occurrence
df_again_per_assembly = no_sys_assembly_by_size.groupby( df_again_per_assembly = no_sys_assembly_by_size.groupby(
"Assembly", "Assembly",
as_index=False, as_index=False,
dropna=False
).size() ).size()
# filter to keep only size > 1 (when == 1 it means that there is only "no system found for an assembly") # filter to keep only size > 1 (when == 1 it means that there is only "no system found for an assembly")
# so we should keep it # so we should keep it
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment