From 7fa1c73b010989a19ee7b6bce939a2f51d9f2a8b Mon Sep 17 00:00:00 2001
From: Remi  PLANEL <rplanel@pasteur.fr>
Date: Fri, 26 Apr 2024 17:53:35 +0200
Subject: [PATCH] operon strcture: take gene size into account

---
 components/OperonStructure.vue                | 98 ++++++++++++++-----
 .../df-wiki-cli/df_wiki_cli/content/main.py   | 23 ++++-
 .../df_wiki_cli/meilisearch/update/main.py    |  1 +
 packages/df-wiki-cli/pyproject.toml           |  2 +-
 types/structure.ts                            |  2 +
 5 files changed, 98 insertions(+), 28 deletions(-)

diff --git a/components/OperonStructure.vue b/components/OperonStructure.vue
index 2fa2077f..da343553 100644
--- a/components/OperonStructure.vue
+++ b/components/OperonStructure.vue
@@ -14,7 +14,7 @@ const structureBasket = useStructuresBasket()
 const props = withDefaults(defineProps<Props>(), {
     genes: null
 });
-const height = ref<number>(120)
+const height = ref<number>(200)
 const svgRef = ref<SVGElement | null>(null)
 const margin = ref<PlotMargin>({
     marginTop: 10,
@@ -35,16 +35,36 @@ const domain = computed(() => {
     return genes?.map(d => { return d.gene })
 })
 
+const innerPadding = ref<number>(0.5)
+
+const totalGeneLength = computed(() => {
+    const genes = toValue(computedGenes)
+    return genes.reduce((acc, curr) => {
+        return acc + (curr?.size ?? 10) + innerPadding.value
+    }, 0)
+})
+
+
+const domainGenes = computed(() => {
+    return [0, totalGeneLength.value]
+
+})
 const xScale = computed(() => {
     return d3.scaleBand()
-        .paddingInner(0.1)
+        .paddingInner(0)
         .domain(toValue(domain))
         .range([0, computedPlotWidth.value])
 })
 
 
+const xScaleGenes = computed(() => {
+    return d3.scaleLinear()
+        .domain(toValue(domainGenes))
+        .range([0, computedPlotWidth.value])
+})
+
 const yScale = ref(d3.scaleBand()
-    .domain(['img', 'gene'])
+    .domain(['img', 'buff', 'buff2', 'gene'])
     .range([toValue(margin).marginTop, toValue(height)]));
 const gbContainer = ref(null)
 const computedContainerWidth = computed(() => {
@@ -60,9 +80,15 @@ const computedPlotWidth = computed(() => {
 const computedGenes = computed<StructureOperonGene[]>(() => {
     const genes = toValue(props.genes)
     if (genes !== null) {
+        let currentSumSize = 0
         return genes.map(d => {
+            const size = d?.size ?? 10
+            const position = currentSumSize
+            currentSumSize = position + size + innerPadding.value
             return {
                 ...d,
+                size,
+                position
             }
         })
     }
@@ -83,14 +109,14 @@ const structureVersion = computed(() => {
 
 const genesWithCoord = computed<StructureOperonGeneWithCoordinate[]>(() => {
     const genes = toValue(computedGenes)
-    const xScaleVal = toValue(xScale)
+    const xScaleVal = toValue(xScaleGenes)
     const yScaleVal = toValue(yScale)
     if (genes !== null) {
         return genes.map(d => {
             return {
                 ...d,
-                width: xScaleVal.bandwidth(),
-                x: xScaleVal(d.gene),
+                width: xScaleVal(d.size),
+                x: xScaleVal(d.position),
                 y: yScaleVal('gene'),
                 height: yScaleVal.bandwidth()
             }
@@ -127,8 +153,6 @@ function draw() {
 
         gx.call(g => g.select(".domain")
             .remove())
-
-
             .selectAll("text")
             .attr("transform", 'rotate(20)')
             .attr("text-anchor", "start")
@@ -143,19 +167,24 @@ function draw() {
 function drawGenes(genesGroup: d3.Selection<SVGElement, any, SVGElement, any>) {
     const data = toValue(genesWithCoord)
     const genesSelection = genesGroup
-        .selectAll("g.gene") // get all "existing" lines in svg
+        .selectAll("g.operon-item") // get all "existing" lines in svg
         .data<StructureOperonGeneWithCoordinate>(data) // sync them with our data
         .join(
             enter => {
-
-
-                const gGene = enter.append("g")
-                    .classed("gene", true);
-
-                gGene.append("path")
+                const gOperonItem = enter.append("g")
+                    .classed("operon-item", true);
+
+                // gene grp
+                const gGene = gOperonItem.append("g")
+                    .classed("gene-grp", true)
+                gGene
+                    .append("path")
                     .classed("gene", true)
 
-                gGene.append("g").classed("img", true).append("image")
+                // img group
+                gOperonItem
+                    .append("g").classed("img", true)
+                    .append("image")
                     .on("mouseover", function (event) {
                         const target = d3.select(event.srcElement.parentElement)
                         target
@@ -170,32 +199,51 @@ function drawGenes(genesGroup: d3.Selection<SVGElement, any, SVGElement, any>) {
                             // .attr("stroke", null)
                             .attr("cursor", "unset")
                     })
-                gGene.append("text")
+
+
+                gOperonItem.append("text")
                     // .attr("fill", "white")
                     .classed("gene-label", true)
                     .attr("fill", "currentColor")
                     .attr("dominant-baseline", "middle")
-                gGene.append("title")
-                return gGene
+
+
+                gOperonItem.append("line")
+                gOperonItem.append("title")
+                return gOperonItem
             },
             update => update,
             exit => exit.remove()
         )
-    genesSelection.attr("transform", d => `translate(${d.x}, 0)`)
-    genesSelection.select("g.img").select("image")
+    genesSelection.select("g.gene-grp").attr("transform", d => `translate(${d.x}, 0)`)
+    genesSelection.select("g.img")
+        .attr("transform", d => `translate(${xScale.value(d.gene)})`)
+        .select("image")
         .attr("transform", d => `translate(0, ${toValue(yScale)("img")})`)
         .attr("href", d => d?.structImg ?? null)
-        .attr("width", d => d.width)
-        .attr("height", d => d.height)
+        .attr("width", toValue(xScale).bandwidth())
+        .attr("height", toValue(yScale).step() * 3)
+        .attr("preserveAspectRatio", "xMidYMid meet")
         .on("click", function (event) {
             const data = d3.select<SVGElement, StructureOperonGeneWithCoordinate>(this).data()
             structureBasket.set(data.map(s => s?.structPath ?? ''))
 
         })
-    genesSelection.select("path.gene")
+
+
+    genesSelection.select("g.gene-grp").select("path.gene")
         .attr("transform", d => `translate(0, ${d.y})`)
-        .attr("fill", d => color(d.gene))
+        .attr("fill", d => color(d.system))
         .attr("d", d => drawGene(d).toString())
+
+    genesSelection.select("line")
+        //  x1="0" y1="80" x2="100" y2="20" stroke="black"
+        .attr("x1", d => xScale.value(d.gene) + xScale.value.bandwidth() / 2)
+        .attr("y1", d => yScale.value("buff2") + yScale.value.bandwidth() / 3)
+        .attr("x2", d => xScaleGenes.value(d.position) + xScaleGenes.value(d.size / 2))
+        .attr("y2", d => yScale.value("gene") - 2)
+        .attr("stroke", "currentColor")
+
 }
 
 function drawGene({ width, height }) {
diff --git a/packages/df-wiki-cli/df_wiki_cli/content/main.py b/packages/df-wiki-cli/df_wiki_cli/content/main.py
index c3373ef5..fda38afe 100644
--- a/packages/df-wiki-cli/df_wiki_cli/content/main.py
+++ b/packages/df-wiki-cli/df_wiki_cli/content/main.py
@@ -316,6 +316,18 @@ def system_operon_structure(
                                 "version": def_path["version"],
                                 "exchangeables": None,
                             }
+
+                            # get hmm definition to extract the size
+                            base_path_list = def_path["path"].parts[0:-4]
+                            profile_path = (
+                                Path(*base_path_list)
+                                / f"profiles/{current_gene['gene']}.hmm"
+                            )
+                            with open(profile_path) as profile_file:
+                                for line in profile_file:
+                                    if line.startswith("LENG"):
+                                        size = int(re.split(r"\s+", line)[1])
+                                        current_gene["size"] = size
                             system_genes.append(current_gene)
                         if elem.tag == "gene" and in_exchangeables:
                             exchangeables.append(elem.attrib["name"])
@@ -329,7 +341,15 @@ def system_operon_structure(
                             exchangeables = []
 
     with open(output, "w") as f:
-        fieldnames = ["id", "system", "subsystem", "version", "gene", "exchangeables"]
+        fieldnames = [
+            "id",
+            "system",
+            "subsystem",
+            "version",
+            "gene",
+            "size",
+            "exchangeables",
+        ]
         writer = csv.DictWriter(f, fieldnames=fieldnames)
         writer.writeheader()
         for id, gene in enumerate(system_genes):
@@ -727,7 +747,6 @@ def find_model_definition(system, subsystem, list_paths):
         parts = path.parts
         if path.stem == subsystem and parts[-2] == system:
             console.rule(f"{system} - {subsystem}")
-            console.print(p)
             found_path = {"path": path, "version": p["version"]}
             break
 
diff --git a/packages/df-wiki-cli/df_wiki_cli/meilisearch/update/main.py b/packages/df-wiki-cli/df_wiki_cli/meilisearch/update/main.py
index de579e18..a219b235 100644
--- a/packages/df-wiki-cli/df_wiki_cli/meilisearch/update/main.py
+++ b/packages/df-wiki-cli/df_wiki_cli/meilisearch/update/main.py
@@ -110,6 +110,7 @@ class SystemOperonStructure(BaseModel):
     subsystem: str
     version: str
     gene: str
+    size: int
     exchangeables: Optional[List[str]]
 
 
diff --git a/packages/df-wiki-cli/pyproject.toml b/packages/df-wiki-cli/pyproject.toml
index db1e9ac2..fd210805 100644
--- a/packages/df-wiki-cli/pyproject.toml
+++ b/packages/df-wiki-cli/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "df-wiki-cli"
-version = "0.2.1"
+version = "0.2.2"
 description = ""
 authors = ["Remi  PLANEL <rplanel@pasteur.fr>"]
 readme = "README.md"
diff --git a/types/structure.ts b/types/structure.ts
index e7d08dcb..e622d92c 100644
--- a/types/structure.ts
+++ b/types/structure.ts
@@ -5,6 +5,8 @@ export interface StructureOperonGene {
     version: string
     system: string
     exchangeables: string[]
+    size: number
+    position: number
 
 }
 
-- 
GitLab