diff --git a/backend/Pipfile b/backend/Pipfile index cd50a168c87f471513965cc8b4ebabb33e1353d7..1151d901669ba46a5c07cb1f61a542dc3ab14240 100644 --- a/backend/Pipfile +++ b/backend/Pipfile @@ -29,6 +29,7 @@ factory-boy = "*" pytest-factoryboy = "*" pylint = "*" mock = "*" +snakeviz = "*" [packages] certifi = "*" diff --git a/backend/Pipfile.lock b/backend/Pipfile.lock index ec5e1461226bad3a88519e7350e477bf42002710..f67025225f0afe55e0c5d900b32309c53465422d 100644 --- a/backend/Pipfile.lock +++ b/backend/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "615d076688d5e77a4beecac7c374bd91fb8d8190f6cc17992bd7dd52b525207d" + "sha256": "f81cb1460e5f4b94a712ee3af4bd701917a32360529825962b4e146f0efbc9a4" }, "pipfile-spec": 6, "requires": { @@ -978,6 +978,14 @@ "index": "pypi", "version": "==1.13.0" }, + "snakeviz": { + "hashes": [ + "sha256:5e30f144edb17d875b46cb5f82bd3e67fb5018e534ecc1a94e092ef3ce932c25", + "sha256:80acc9c204aeb1e089f209a4c79bb5940dc40b6536a5184c1778a3f448634885" + ], + "index": "pypi", + "version": "==2.0.1" + }, "terminado": { "hashes": [ "sha256:4804a774f802306a7d9af7322193c5390f1da0abb429e082a10ef1d46e6fb2c2", diff --git a/backend/scripts/populate_db/import_igc_data.py b/backend/scripts/populate_db/import_igc_data.py index 58a2ca441f5b4f3c69f66705f457adbc0f1af603..aa6b92ba3598d592f21637d77e16f365bc10cd0f 100755 --- a/backend/scripts/populate_db/import_igc_data.py +++ b/backend/scripts/populate_db/import_igc_data.py @@ -11,7 +11,7 @@ from slugify import slugify from metagenedb.common.utils.parsers import IGCLineParser -logging.basicConfig() +logging.basicConfig(format='[%(asctime)s] %(levelname)s:%(name)s:%(message)s') logger = logging.getLogger() @@ -88,7 +88,7 @@ class ImportIGCGenes(object): gene_dict.pop('functions') return gene_dict - def load_annotation_file_to_db_in_chunks(self, chunk_size=1000): + def load_annotation_file_to_db_in_chunks(self, chunk_size=1000, test=False): with open(self.annotation_file, 'r') as file: while True: chunk_genes = list(islice(file, chunk_size)) @@ -104,6 +104,8 @@ class ImportIGCGenes(object): self.skipped_genes += len(genes) self.processed_genes += len(chunk_genes) logger.info("%s Genes processed so far...", self.processed_genes) + if test is True: + break logger.info("[DONE] %s/%s Genes created.", self.created_genes, self.total_genes) logger.info("[DONE] %s/%s Genes updated.", self.updated_genes, self.total_genes) logger.info("[DONE] %s/%s Genes skipped.", self.skipped_genes, self.total_genes) @@ -117,8 +119,10 @@ def parse_arguments(): # Common arguments for analysis and annotations parser.add_argument('annotation', help='IGC annotation file') parser.add_argument('--url', help='base URL of the instance.', default='http://localhost/') + parser.add_argument('--chunk_size', type=int, default=1000, help='How many genes to handle and create in the same time.') parser.add_argument('--skip_taxonomy', action='store_true', help='Skip taxonomy information from genes.') parser.add_argument('--skip_functions', action='store_true', help='Skip functions information from genes.') + parser.add_argument('--test', action='store_true', help='Run only on first chunk.') parser.add_argument('-v', '--verbose', action='store_true') try: @@ -133,7 +137,7 @@ def run(): logger.setLevel(logging.INFO) import_igc_genes = ImportIGCGenes(args.annotation, args.url, skip_tax=args.skip_taxonomy, skip_functions=args.skip_functions) - import_igc_genes.load_annotation_file_to_db_in_chunks() + import_igc_genes.load_annotation_file_to_db_in_chunks(chunk_size=args.chunk_size, test=args.test) if __name__ == "__main__":