Skip to content
Snippets Groups Projects
Commit 31cefa7d authored by Kenzo-Hugo Hillion's avatar Kenzo-Hugo Hillion ♻️
Browse files

continue method to create kegg annotations

parent 192809a2
No related branches found
No related tags found
1 merge request!16gene annotations from csv file
Pipeline #77711 passed
......@@ -7,3 +7,4 @@
# Python
__pycache__
*.egg-info/
notebooks/
\ No newline at end of file
......@@ -5,14 +5,13 @@ import inject
import pandas as pd
# from app.core.models.annotations import KeggOrthologyAnnotation
from app.core.repositories.annotations import KeggOrthologyAnnotationsRepo
from app.core.repositories.experiment import ExperimentsRepo
from app.core.repositories.gene import GenesRepo
from app.core.repositories.kegg import KeggOrthologysRepo
# from app.core.schemas.entities.annotations import KeggOrthologyAnnotationCreate
from app.core.schemas.tasks.db import DbOperation
from app.core.schemas.entities.annotations import KeggOrthologyAnnotationCreate
logger = logging.getLogger()
......@@ -21,7 +20,8 @@ class CreateGeneAnnotationsUseCase:
BASE_OPERATION_NAME = "create_gene_annotations"
ID_COL = "entry_id"
@inject.autoparams("repo")
@inject.autoparams("genes_repo")
@inject.autoparams("experiments_repo")
def __init__(
self,
annotations_df: pd.DataFrame,
......@@ -39,27 +39,52 @@ class CreateGeneAnnotationsUseCase:
self.df = annotations_df
self.experiment_id = experiment_id
self._experiments_repo = experiments_repo
self._genes_repo = genes_repo
self.batch_size = batch_size
self.db_operations: List[DbOperation] = []
self.to_create = []
def _create_unkown_annotations(self, col: str):
message = f"{col} is not a valid column name for annotations. Skipped."
def _create_unknown_annotations(self, **kwargs):
message = (
f"{self.df.columns[1]} is not a valid column name for annotations. Skipped."
)
logger.warning(message)
self.db_operations.append(DbOperation(name="creation_skipped", extra=message))
self.db_operations.append(
DbOperation(name="creation_skipped", extra={"message": message})
)
@inject.autoparams("kegg_orthology_annotations_repo")
@inject.autoparams("kegg_orthologys_repo")
def _create_kegg_annotations(
self,
kegg_orthology_annotations_repo: KeggOrthologyAnnotationsRepo,
kegg_orthologys_repo: KeggOrthologysRepo,
**kwargs,
):
# Perform KEGG annotations creation
db_operation = DbOperation(name="create-kegg-annotations")
return db_operation
# Retrieve all gene db ids
gene_db_dict = self._genes_repo.get_gene_db_ids(
list(set(self.df.iloc[:, 0])), **kwargs
)
# Retrieve all kegg db ids
keggs = kegg_orthologys_repo.get_entries(list(set(self.df["kegg"])), **kwargs)
kegg_db_dict = {kegg.kegg_id: kegg.id for kegg in keggs}
# Create annotations
# - Create format for creation from df
for kegg_id, subdf in self.df.groupby("kegg"):
annotation = KeggOrthologyAnnotationCreate(
experiment_id=self.experiment_id,
kegg_id=kegg_db_dict[kegg_id],
genes=[gene_db_dict[gene_id] for gene_id in subdf.iloc[:, 0]],
)
kegg_orthology_annotations_repo.create(annotation, **kwargs)
db_operation.created += 1
self.db_operations.append(db_operation)
def create_annotations(self):
def create_annotations(self, **kwargs):
col = self.df.columns[1]
getattr(
self, f"_create_{col}_annotations", self._create_unkown_annotations(col)
)()
getattr(self, f"_create_{col}_annotations", self._create_unknown_annotations)(
**kwargs
)
return self.db_operations
This diff is collapsed.
......@@ -31,6 +31,8 @@ flake8 = "^4.0.1"
invoke = "^1.6.0"
pytest-cov = "^3.0.0"
snakeviz = "^2.1.1"
jupyter = "^1.0.0"
importlib-resources = "^5.4.0"
[tool.poetry.scripts]
create_test_db = "app.cli.db.create_test_db:run"
......
gene,kegg
gene1,K00035
gene2,K00022
gene2,K00029
gene3,K00029
gene4,K02229
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment