Skip to content
Snippets Groups Projects
Commit 8fbae264 authored by Remi  PLANEL's avatar Remi PLANEL
Browse files

wip

parent b1583515
No related branches found
No related tags found
No related merge requests found
Pipeline #128175 failed
<script setup lang="ts">
import * as d3 from "d3";
const client = useMeiliSearchRef()
const { page } = useContent();
const pending = ref<boolean>(false)
const msIndexName = ref<'systemoperonstruct'>("systemoperonstruct")
const msResponse = ref()
const systemName = computed(() => {
const toValPage = toValue(page)
return toValPage?.system ?? toValPage?.title ?? undefined
})
const systemGroupedBySubtype = computed(() => {
return d3.groups(sanitizedHits.value, d => d.subsystem)
})
const sanitizedHits = computed(() => {
const toValMsResponse = toValue(msResponse)
console.log(toValMsResponse)
if (toValMsResponse?.hits?.length > 0) {
return toValMsResponse.hits.map(hit => {
console.log(hit)
return { ...hit, gene: hit.gene.split("__")[1] }
})
} else {
return []
}
})
onMounted(() => {
fetchOperonStructure()
})
async function fetchOperonStructure() {
try {
pending.value = true
const data = await client.index(toValue(msIndexName)).search("", {
facets: ["*"],
filter: [`system = '${toValue(systemName)}'`],
limit: 500000,
})
msResponse.value = data
} catch (error) {
throw createError(`Cannot get hits on refseq for system: ${toValue(systemName)} `)
} finally {
pending.value = false
}
}
</script>
<template>
<v-card flat>
<v-list-item v-for="s in sanitizedHits" :key="s.id">
{{ s.gene }} - {{ s.subsystem }}
</v-list-item>
<pre>{{ systemGroupedBySubtype }}</pre>
</v-card>
</template>
\ No newline at end of file
...@@ -57,6 +57,10 @@ The AbiA_small system in *Alicyclobacillus sp. SO9* (GCF_016406125.1, NZ_CP06633 ...@@ -57,6 +57,10 @@ The AbiA_small system in *Alicyclobacillus sp. SO9* (GCF_016406125.1, NZ_CP06633
## Structure ## Structure
::system-operon-structure
::
### pDockQ matrix ### pDockQ matrix
::pdockq-matrix ::pdockq-matrix
......
...@@ -79,6 +79,9 @@ Proportion of genome encoding the Avs system for the 14 phyla with more than 50 ...@@ -79,6 +79,9 @@ Proportion of genome encoding the Avs system for the 14 phyla with more than 50
## Structure ## Structure
::system-operon-structure
::
### matrix ### matrix
......
...@@ -254,15 +254,15 @@ def system_operon_structure( ...@@ -254,15 +254,15 @@ def system_operon_structure(
resolve_path=True, resolve_path=True,
), ),
] = "./system-structures.csv", ] = "./system-structures.csv",
version: Annotated[str, typer.Option(help="Defense finder model")] = "1.2.4", version: Annotated[str, typer.Option(help="Defense finder model")] = "v1.2.4",
): ):
# get defense finder model from github # get defense finder model from github
df_model_url = f"https://github.com/mdmparis/defense-finder-models/releases/download/{version}/defense-finder-models-v{version}.tar.gz" df_model_url = f"https://github.com/mdmparis/defense-finder-models/releases/download/{version}/defense-finder-models-{version}.tar.gz"
_, tmp_path = tempfile.mkstemp() _, tmp_path = tempfile.mkstemp()
tmp_root_dir = tempfile.gettempdir() tmp_root_dir = tempfile.gettempdir()
df_model_dir = Path(f"{tmp_root_dir}/defense-finder-models-v{version}") df_model_dir = Path(f"{tmp_root_dir}/defense-finder-models-{version}")
df_model_definitions_dir = df_model_dir / "defense-finder-models" / "definitions" df_model_definitions_dir = df_model_dir / "defense-finder-models" / "definitions"
console.print(f"Download models: {df_model_url}") console.print(f"Download models: {df_model_url}")
response = requests.get( response = requests.get(
......
import typer import typer
import re
from rich.console import Console from rich.console import Console
import meilisearch import meilisearch
from pathlib import Path from pathlib import Path
...@@ -80,10 +81,7 @@ NaFloat = Annotated[Optional[float], BeforeValidator(na_to_none)] ...@@ -80,10 +81,7 @@ NaFloat = Annotated[Optional[float], BeforeValidator(na_to_none)]
class StrucutreStatistics(BaseModel): class StrucutreStatistics(BaseModel):
id: int id: int
System_name_ok: str system: str
System: str
gene_name: str
subtype: str
proteins_in_the_prediction: Optional[List[str]] proteins_in_the_prediction: Optional[List[str]]
prediction_type: str prediction_type: str
batch: int batch: int
...@@ -99,7 +97,6 @@ class StrucutreStatistics(BaseModel): ...@@ -99,7 +97,6 @@ class StrucutreStatistics(BaseModel):
iptm_ptm: NaFloat = Field(..., alias="iptm+ptm") iptm_ptm: NaFloat = Field(..., alias="iptm+ptm")
pDockQ: Optional[NaFloat] pDockQ: Optional[NaFloat]
plddts: Optional[NaFloat] plddts: Optional[NaFloat]
Foldseek_name: Optional[str]
class SystemOperonStructure(BaseModel): class SystemOperonStructure(BaseModel):
...@@ -321,11 +318,38 @@ def structure( ...@@ -321,11 +318,38 @@ def structure(
row["proteins_in_the_prediction"] row["proteins_in_the_prediction"]
) )
row["system_genes"] = split_on_comma(row["system_genes"]) row["system_genes"] = split_on_comma(row["system_genes"])
# get info from systemoperonstruct
search_subtype = re.split(r"-\d+$", row["system"])[0]
# search_subtype = row["system"].split("-")[0]
console.print(search_subtype)
res = client.index("systemoperonstruct").search(
"",
{
"filter": f"subsystem = '{search_subtype}'",
},
)
if len(res["hits"]) < 1:
# raise RuntimeError("do not find hits for " + search_subtype)
console.print(f"[red] no hits for {search_subtype}")
# else:
# console.print(res["hits"])
# group data per subtype
# create a set of list of gene
# compare with the one defined in system_genes=
# if it is the same, set subtype
doc = StrucutreStatistics(**row, id=id) doc = StrucutreStatistics(**row, id=id)
# console.print(doc)
documents.append(doc.model_dump(by_alias=True)) documents.append(doc.model_dump(by_alias=True))
tasks = index.add_documents_in_batches(documents, primary_key="id") # tasks = index.add_documents_in_batches(documents, primary_key="id")
for task in tasks: # for task in tasks:
print(task) # print(task)
pagination_settings_task = index.update_pagination_settings( pagination_settings_task = index.update_pagination_settings(
{"maxTotalHits": 100000} {"maxTotalHits": 100000}
) )
......
...@@ -3,6 +3,13 @@ ...@@ -3,6 +3,13 @@
DATA_DIR=../../../data DATA_DIR=../../../data
CONTENT_DIR=../../../content CONTENT_DIR=../../../content
PUBLIC_DIR=../../../public PUBLIC_DIR=../../../public
# system operon structure
df-wiki-cli content system-operon-structure --version "1.2.3" --output ${DATA_DIR}/system-structures.csv
df-wiki-cli meilisearch update system-operon-structure --file ${DATA_DIR}/system-structures.csv
# REFSEQ # REFSEQ
df-wiki-cli meilisearch delete-all-documents refseq df-wiki-cli meilisearch delete-all-documents refseq
df-wiki-cli meilisearch update refseq --file ${DATA_DIR}/refseq_res.csv df-wiki-cli meilisearch update refseq --file ${DATA_DIR}/refseq_res.csv
...@@ -31,14 +38,9 @@ df-wiki-cli content systems --dir ${CONTENT_DIR}/3.defense-systems/ --pfam ${PUB ...@@ -31,14 +38,9 @@ df-wiki-cli content systems --dir ${CONTENT_DIR}/3.defense-systems/ --pfam ${PUB
df-wiki-cli meilisearch update systems --file /tmp/list-systems.json df-wiki-cli meilisearch update systems --file /tmp/list-systems.json
# STRUCTURE # STRUCTURE
df-wiki-cli meilisearch update structure --file ${DATA_DIR}/all_predictions_statistics_clean.csv df-wiki-cli meilisearch update structure --file ${DATA_DIR}/all_predictions_statistics.csv
# ARTICLES # ARTICLES
# df-wiki-cli meilisearch delete-all-documents article # df-wiki-cli meilisearch delete-all-documents article
# df-wiki-cli meilisearch # df-wiki-cli meilisearch
# system operon structure
df-wiki-cli content system-operon-structure --version "1.2.4" --output ${DATA_DIR}/system-structures.csv
df-wiki-cli meilisearch update system-operon-structure --file ${DATA_DIR}/system-structures.csv
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment