diff --git a/.gitignore b/.gitignore index 2af12653142f280d031421a0d3e547f9901e3955..57d294a81975142edbe2d9d0503df4044c1477a9 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,6 @@ __pycache__/ # For settings configuration web/metagenedb/settings/__init__.py + +# Jupyter notebook +.ipynb_checkpoints/ diff --git a/notebooks/.ipynb_checkpoints/Load IGC Genes-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/Load IGC Genes-checkpoint.ipynb deleted file mode 100644 index 347c68000fbcc3ad743d2ef0ec86140580405aec..0000000000000000000000000000000000000000 --- a/notebooks/.ipynb_checkpoints/Load IGC Genes-checkpoint.ipynb +++ /dev/null @@ -1,222 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Proof of concept\n", - "The idea here is to perform loading of all the genes in the annotation file from IGC" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import time\n", - "\n", - "# Timer decorator\n", - "\n", - "def timer(function):\n", - " def wrapper(*args,**kwargs):\n", - " start_time = time.time()\n", - " func = function(*args,**kwargs)\n", - " print(\"\\n[{}] --> EXECUTED TIME: {} seconds\".format(function.__name__, time.time() - start_time))\n", - " return func\n", - " return wrapper" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Input files" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "IGC_annotation_path = \"/home/khillion/Pasteur/gitlab/metagenedb/dev_data/IGC_sample.annotation_OF.summary\"" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "from itertools import islice\n", - "\n", - "from metagenedb.apps.catalog.models import Gene\n", - "\n", - "\n", - "def insert_gene(raw_line):\n", - " gene_info = raw_line.rstrip().split('\\t')\n", - " gene = Gene(gene_id=gene_info[1],\n", - " gene_length=gene_info[2],\n", - " taxonomic_genus=gene_info[6],\n", - " taxonomic_phylum=gene_info[5])\n", - " gene.full_clean()\n", - " gene.save()\n", - "\n", - "\n", - "def insert_gene_list(chunk_genes):\n", - " for i in chunk_genes:\n", - " insert_gene(i)\n", - "\n", - "\n", - "@timer\n", - "def load_annotation_file_to_db_in_chunks(annotation_file, chunk_size=100000):\n", - " loaded_genes = 0\n", - " with open(annotation_file, 'r') as file:\n", - " while True:\n", - " chunk_genes = list(islice(file, chunk_size))\n", - " if not chunk_genes:\n", - " break\n", - " loaded_genes += len(chunk_genes)\n", - " # genes = format_gene_list(chunk_genes)\n", - " insert_gene_list(chunk_genes)\n", - " # Gene.objects.bulk_create(genes)\n", - " print(f\"{loaded_genes} genes loaded so far...\")\n", - " break" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/plain": [ - "(0, {'catalog.Gene': 0})" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Gene.objects.all().delete()\n", - "# load_annotation_file_to_db_in_chunks(IGC_annotation_path)" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1000 genes loaded so far...\n" - ] - } - ], - "source": [ - "load_annotation_file_to_db_in_chunks(IGC_annotation_path)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "import metagenedb.settings" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['django.middleware.security.SecurityMiddleware',\n", - " 'django.contrib.sessions.middleware.SessionMiddleware',\n", - " 'django.middleware.common.CommonMiddleware',\n", - " 'django.middleware.csrf.CsrfViewMiddleware',\n", - " 'django.contrib.auth.middleware.AuthenticationMiddleware',\n", - " 'django.contrib.messages.middleware.MessageMiddleware',\n", - " 'django.middleware.clickjacking.XFrameOptionsMiddleware',\n", - " 'corsheaders.middleware.CorsMiddleware']" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "metagenedb.settings.MIDDLEWARE" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['metagenedb.apps.catalog',\n", - " 'django.contrib.admin',\n", - " 'django.contrib.auth',\n", - " 'django.contrib.contenttypes',\n", - " 'django.contrib.sessions',\n", - " 'django.contrib.messages',\n", - " 'django.contrib.staticfiles',\n", - " 'rest_framework',\n", - " 'django_extensions',\n", - " 'corsheaders']" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "metagenedb.settings.INSTALLED_APPS" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Django Shell-Plus", - "language": "python", - "name": "django_extensions" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.7" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/Load IGC Genes.ipynb b/notebooks/Load IGC Genes.ipynb index 0018d568a0952be75b9567522bf1d047ed5ad079..26e53bf9200d1256e05b475461e78007aa9476ea 100644 --- a/notebooks/Load IGC Genes.ipynb +++ b/notebooks/Load IGC Genes.ipynb @@ -106,7 +106,7 @@ }, { "cell_type": "code", - "execution_count": 79, + "execution_count": 81, "metadata": { "scrolled": true }, @@ -117,7 +117,7 @@ "(1000, {'catalog.Gene': 1000})" ] }, - "execution_count": 79, + "execution_count": 81, "metadata": {}, "output_type": "execute_result" } @@ -212,6 +212,74 @@ "metagenedb.settings.INSTALLED_APPS" ] }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from metagenedb.apps.catalog.models import KeggOrthology" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<QuerySet []>" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "KeggOrthology.objects.all()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "a_kegg = KeggOrthology.objects.create_kegg(function_id=\"K01824\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "a_kegg.save()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'requests'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-4-95039fbd75c1>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mrequests\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'requests'" + ] + } + ], + "source": [ + "import requests" + ] + }, { "cell_type": "code", "execution_count": null, @@ -236,7 +304,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.7" + "version": "3.6.8" } }, "nbformat": 4, diff --git a/notebooks/bioservices_kegg.ipynb b/notebooks/bioservices_kegg.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..409faca1c305acbd14d53fbdd84b4c03849eacdd --- /dev/null +++ b/notebooks/bioservices_kegg.ipynb @@ -0,0 +1,501 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "import os\n", + "import requests\n", + "\n", + "import django\n", + "from django.core.exceptions import ValidationError" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from metagenedb.apps.catalog.models import KeggOrthology, Gene\n", + "logging.basicConfig(level=logging.INFO)\n", + "_LOGGER = logging.getLogger(__name__)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "all_ko = requests.get(\"http://rest.kegg.jp/list/ko\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def _parse_ko(line):\n", + " \"\"\"\n", + " Parse line from kegg KO list to return organized dict\n", + " \"\"\"\n", + " content = line.split('\\t')\n", + " function_id = content[0].split(':')[1]\n", + " names = content[1].split(';')\n", + " if '[EC:' in names[1]:\n", + " ec_number = names[1].split('[EC:')[1].rstrip(']')\n", + " else:\n", + " ec_number = ''\n", + " return({\n", + " 'function_id': function_id,\n", + " 'name': names[0],\n", + " 'long_name': names[1].lstrip(),\n", + " 'ec_number': ec_number\n", + " })" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "def create_kegg_ko(line):\n", + " kegg_ko = _parse_ko(line)\n", + " obj_kegg = KeggOrthology(\n", + " function_id=kegg_ko.get('function_id'),\n", + " name=kegg_ko.get('name'),\n", + " long_name=kegg_ko.get('long_name'),\n", + " ec_number=kegg_ko.get('ec_number')\n", + " )\n", + " obj_kegg.full_clean()\n", + " obj_kegg.save()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:__main__:100/22833 KEGG KO inserted so far...\n", + "INFO:__main__:200/22833 KEGG KO inserted so far...\n", + "INFO:__main__:300/22833 KEGG KO inserted so far...\n", + "INFO:__main__:400/22833 KEGG KO inserted so far...\n", + "INFO:__main__:500/22833 KEGG KO inserted so far...\n", + "INFO:__main__:600/22833 KEGG KO inserted so far...\n", + "INFO:__main__:700/22833 KEGG KO inserted so far...\n", + "INFO:__main__:800/22833 KEGG KO inserted so far...\n", + "INFO:__main__:900/22833 KEGG KO inserted so far...\n", + "INFO:__main__:1000/22833 KEGG KO inserted so far...\n", + "INFO:__main__:1100/22833 KEGG KO inserted so far...\n", + "INFO:__main__:1200/22833 KEGG KO inserted so far...\n", + "INFO:__main__:1300/22833 KEGG KO inserted so far...\n", + "INFO:__main__:1400/22833 KEGG KO inserted so far...\n", + "INFO:__main__:1500/22833 KEGG KO inserted so far...\n", + "INFO:__main__:1600/22833 KEGG KO inserted so far...\n", + "INFO:__main__:1700/22833 KEGG KO inserted so far...\n", + "INFO:__main__:1800/22833 KEGG KO inserted so far...\n", + "INFO:__main__:1900/22833 KEGG KO inserted so far...\n", + "INFO:__main__:2000/22833 KEGG KO inserted so far...\n", + "INFO:__main__:2100/22833 KEGG KO inserted so far...\n", + "INFO:__main__:2200/22833 KEGG KO inserted so far...\n", + "INFO:__main__:2300/22833 KEGG KO inserted so far...\n", + "INFO:__main__:2400/22833 KEGG KO inserted so far...\n", + "INFO:__main__:2500/22833 KEGG KO inserted so far...\n", + "INFO:__main__:2600/22833 KEGG KO inserted so far...\n", + "INFO:__main__:2700/22833 KEGG KO inserted so far...\n", + "INFO:__main__:2800/22833 KEGG KO inserted so far...\n", + "INFO:__main__:2900/22833 KEGG KO inserted so far...\n", + "INFO:__main__:3000/22833 KEGG KO inserted so far...\n", + "INFO:__main__:3100/22833 KEGG KO inserted so far...\n", + "INFO:__main__:3200/22833 KEGG KO inserted so far...\n", + "INFO:__main__:3300/22833 KEGG KO inserted so far...\n", + "INFO:__main__:3400/22833 KEGG KO inserted so far...\n", + "INFO:__main__:3500/22833 KEGG KO inserted so far...\n", + "INFO:__main__:3600/22833 KEGG KO inserted so far...\n", + "INFO:__main__:3700/22833 KEGG KO inserted so far...\n", + "INFO:__main__:3800/22833 KEGG KO inserted so far...\n", + "INFO:__main__:3900/22833 KEGG KO inserted so far...\n", + "INFO:__main__:4000/22833 KEGG KO inserted so far...\n", + "INFO:__main__:4100/22833 KEGG KO inserted so far...\n", + "INFO:__main__:4200/22833 KEGG KO inserted so far...\n", + "INFO:__main__:4300/22833 KEGG KO inserted so far...\n", + "INFO:__main__:4400/22833 KEGG KO inserted so far...\n", + "INFO:__main__:4500/22833 KEGG KO inserted so far...\n", + "INFO:__main__:4600/22833 KEGG KO inserted so far...\n", + "INFO:__main__:4700/22833 KEGG KO inserted so far...\n", + "INFO:__main__:4800/22833 KEGG KO inserted so far...\n", + "INFO:__main__:4900/22833 KEGG KO inserted so far...\n", + "INFO:__main__:5000/22833 KEGG KO inserted so far...\n", + "INFO:__main__:5100/22833 KEGG KO inserted so far...\n", + "INFO:__main__:5200/22833 KEGG KO inserted so far...\n", + "INFO:__main__:5300/22833 KEGG KO inserted so far...\n", + "INFO:__main__:5400/22833 KEGG KO inserted so far...\n", + "INFO:__main__:5500/22833 KEGG KO inserted so far...\n", + "INFO:__main__:5600/22833 KEGG KO inserted so far...\n", + "INFO:__main__:5700/22833 KEGG KO inserted so far...\n", + "INFO:__main__:5800/22833 KEGG KO inserted so far...\n", + "INFO:__main__:5900/22833 KEGG KO inserted so far...\n", + "INFO:__main__:6000/22833 KEGG KO inserted so far...\n", + "INFO:__main__:6100/22833 KEGG KO inserted so far...\n", + "INFO:__main__:6200/22833 KEGG KO inserted so far...\n", + "INFO:__main__:6300/22833 KEGG KO inserted so far...\n", + "INFO:__main__:6400/22833 KEGG KO inserted so far...\n", + "INFO:__main__:6500/22833 KEGG KO inserted so far...\n", + "INFO:__main__:6600/22833 KEGG KO inserted so far...\n", + "INFO:__main__:6700/22833 KEGG KO inserted so far...\n", + "INFO:__main__:6800/22833 KEGG KO inserted so far...\n", + "INFO:__main__:6900/22833 KEGG KO inserted so far...\n", + "INFO:__main__:7000/22833 KEGG KO inserted so far...\n", + "INFO:__main__:7100/22833 KEGG KO inserted so far...\n", + "INFO:__main__:7200/22833 KEGG KO inserted so far...\n", + "INFO:__main__:7300/22833 KEGG KO inserted so far...\n", + "INFO:__main__:7400/22833 KEGG KO inserted so far...\n", + "INFO:__main__:7500/22833 KEGG KO inserted so far...\n", + "INFO:__main__:7600/22833 KEGG KO inserted so far...\n", + "INFO:__main__:7700/22833 KEGG KO inserted so far...\n", + "INFO:__main__:7800/22833 KEGG KO inserted so far...\n", + "INFO:__main__:7900/22833 KEGG KO inserted so far...\n", + "INFO:__main__:8000/22833 KEGG KO inserted so far...\n", + "INFO:__main__:8100/22833 KEGG KO inserted so far...\n", + "INFO:__main__:8200/22833 KEGG KO inserted so far...\n", + "INFO:__main__:8300/22833 KEGG KO inserted so far...\n", + "INFO:__main__:8400/22833 KEGG KO inserted so far...\n", + "INFO:__main__:8500/22833 KEGG KO inserted so far...\n", + "INFO:__main__:8600/22833 KEGG KO inserted so far...\n", + "INFO:__main__:8700/22833 KEGG KO inserted so far...\n", + "INFO:__main__:8800/22833 KEGG KO inserted so far...\n", + "INFO:__main__:8900/22833 KEGG KO inserted so far...\n", + "INFO:__main__:9000/22833 KEGG KO inserted so far...\n", + "INFO:__main__:9100/22833 KEGG KO inserted so far...\n", + "INFO:__main__:9200/22833 KEGG KO inserted so far...\n", + "INFO:__main__:9300/22833 KEGG KO inserted so far...\n", + "INFO:__main__:9400/22833 KEGG KO inserted so far...\n", + "INFO:__main__:9500/22833 KEGG KO inserted so far...\n", + "INFO:__main__:9600/22833 KEGG KO inserted so far...\n", + "INFO:__main__:9700/22833 KEGG KO inserted so far...\n", + "INFO:__main__:9800/22833 KEGG KO inserted so far...\n", + "INFO:__main__:9900/22833 KEGG KO inserted so far...\n", + "INFO:__main__:10000/22833 KEGG KO inserted so far...\n", + "INFO:__main__:10100/22833 KEGG KO inserted so far...\n", + "INFO:__main__:10200/22833 KEGG KO inserted so far...\n", + "INFO:__main__:10300/22833 KEGG KO inserted so far...\n", + "INFO:__main__:10400/22833 KEGG KO inserted so far...\n", + "INFO:__main__:10500/22833 KEGG KO inserted so far...\n", + "INFO:__main__:10600/22833 KEGG KO inserted so far...\n", + "INFO:__main__:10700/22833 KEGG KO inserted so far...\n", + "INFO:__main__:10800/22833 KEGG KO inserted so far...\n", + "INFO:__main__:10900/22833 KEGG KO inserted so far...\n", + "INFO:__main__:11000/22833 KEGG KO inserted so far...\n", + "INFO:__main__:11100/22833 KEGG KO inserted so far...\n", + "INFO:__main__:11200/22833 KEGG KO inserted so far...\n", + "INFO:__main__:11300/22833 KEGG KO inserted so far...\n", + "INFO:__main__:11400/22833 KEGG KO inserted so far...\n", + "INFO:__main__:11500/22833 KEGG KO inserted so far...\n", + "INFO:__main__:11600/22833 KEGG KO inserted so far...\n", + "INFO:__main__:11700/22833 KEGG KO inserted so far...\n", + "INFO:__main__:11800/22833 KEGG KO inserted so far...\n", + "INFO:__main__:11900/22833 KEGG KO inserted so far...\n", + "INFO:__main__:12000/22833 KEGG KO inserted so far...\n", + "INFO:__main__:12100/22833 KEGG KO inserted so far...\n", + "INFO:__main__:12200/22833 KEGG KO inserted so far...\n", + "INFO:__main__:12300/22833 KEGG KO inserted so far...\n", + "INFO:__main__:12400/22833 KEGG KO inserted so far...\n", + "INFO:__main__:12500/22833 KEGG KO inserted so far...\n", + "INFO:__main__:12600/22833 KEGG KO inserted so far...\n", + "INFO:__main__:12700/22833 KEGG KO inserted so far...\n", + "INFO:__main__:12800/22833 KEGG KO inserted so far...\n", + "INFO:__main__:12900/22833 KEGG KO inserted so far...\n", + "INFO:__main__:13000/22833 KEGG KO inserted so far...\n", + "INFO:__main__:13100/22833 KEGG KO inserted so far...\n", + "INFO:__main__:13200/22833 KEGG KO inserted so far...\n", + "INFO:__main__:13300/22833 KEGG KO inserted so far...\n", + "INFO:__main__:13400/22833 KEGG KO inserted so far...\n", + "INFO:__main__:13500/22833 KEGG KO inserted so far...\n", + "INFO:__main__:13600/22833 KEGG KO inserted so far...\n", + "INFO:__main__:13700/22833 KEGG KO inserted so far...\n", + "INFO:__main__:13800/22833 KEGG KO inserted so far...\n", + "INFO:__main__:13900/22833 KEGG KO inserted so far...\n", + "INFO:__main__:14000/22833 KEGG KO inserted so far...\n", + "INFO:__main__:14100/22833 KEGG KO inserted so far...\n", + "INFO:__main__:14200/22833 KEGG KO inserted so far...\n", + "INFO:__main__:14300/22833 KEGG KO inserted so far...\n", + "INFO:__main__:14400/22833 KEGG KO inserted so far...\n", + "INFO:__main__:14500/22833 KEGG KO inserted so far...\n", + "INFO:__main__:14600/22833 KEGG KO inserted so far...\n", + "INFO:__main__:14700/22833 KEGG KO inserted so far...\n", + "INFO:__main__:14800/22833 KEGG KO inserted so far...\n", + "INFO:__main__:14900/22833 KEGG KO inserted so far...\n", + "INFO:__main__:15000/22833 KEGG KO inserted so far...\n", + "INFO:__main__:15100/22833 KEGG KO inserted so far...\n", + "INFO:__main__:15200/22833 KEGG KO inserted so far...\n", + "INFO:__main__:15300/22833 KEGG KO inserted so far...\n", + "INFO:__main__:15400/22833 KEGG KO inserted so far...\n", + "INFO:__main__:15500/22833 KEGG KO inserted so far...\n", + "INFO:__main__:15600/22833 KEGG KO inserted so far...\n", + "INFO:__main__:15700/22833 KEGG KO inserted so far...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:__main__:15800/22833 KEGG KO inserted so far...\n", + "INFO:__main__:15900/22833 KEGG KO inserted so far...\n", + "INFO:__main__:16000/22833 KEGG KO inserted so far...\n", + "INFO:__main__:16100/22833 KEGG KO inserted so far...\n", + "INFO:__main__:16200/22833 KEGG KO inserted so far...\n", + "INFO:__main__:16300/22833 KEGG KO inserted so far...\n", + "INFO:__main__:16400/22833 KEGG KO inserted so far...\n", + "INFO:__main__:16500/22833 KEGG KO inserted so far...\n", + "INFO:__main__:16600/22833 KEGG KO inserted so far...\n", + "INFO:__main__:16700/22833 KEGG KO inserted so far...\n", + "INFO:__main__:16800/22833 KEGG KO inserted so far...\n", + "INFO:__main__:16900/22833 KEGG KO inserted so far...\n", + "INFO:__main__:17000/22833 KEGG KO inserted so far...\n", + "INFO:__main__:17100/22833 KEGG KO inserted so far...\n", + "INFO:__main__:17200/22833 KEGG KO inserted so far...\n", + "INFO:__main__:17300/22833 KEGG KO inserted so far...\n", + "INFO:__main__:17400/22833 KEGG KO inserted so far...\n", + "INFO:__main__:17500/22833 KEGG KO inserted so far...\n", + "INFO:__main__:17600/22833 KEGG KO inserted so far...\n", + "INFO:__main__:17700/22833 KEGG KO inserted so far...\n", + "INFO:__main__:17800/22833 KEGG KO inserted so far...\n", + "INFO:__main__:17900/22833 KEGG KO inserted so far...\n", + "INFO:__main__:18000/22833 KEGG KO inserted so far...\n", + "INFO:__main__:18100/22833 KEGG KO inserted so far...\n", + "INFO:__main__:18200/22833 KEGG KO inserted so far...\n", + "INFO:__main__:18300/22833 KEGG KO inserted so far...\n", + "INFO:__main__:18400/22833 KEGG KO inserted so far...\n", + "INFO:__main__:18500/22833 KEGG KO inserted so far...\n", + "INFO:__main__:18600/22833 KEGG KO inserted so far...\n", + "INFO:__main__:18700/22833 KEGG KO inserted so far...\n", + "INFO:__main__:18800/22833 KEGG KO inserted so far...\n", + "INFO:__main__:18900/22833 KEGG KO inserted so far...\n", + "INFO:__main__:19000/22833 KEGG KO inserted so far...\n", + "INFO:__main__:19100/22833 KEGG KO inserted so far...\n", + "INFO:__main__:19200/22833 KEGG KO inserted so far...\n", + "INFO:__main__:19300/22833 KEGG KO inserted so far...\n", + "INFO:__main__:19400/22833 KEGG KO inserted so far...\n", + "INFO:__main__:19500/22833 KEGG KO inserted so far...\n", + "INFO:__main__:19600/22833 KEGG KO inserted so far...\n", + "INFO:__main__:19700/22833 KEGG KO inserted so far...\n", + "INFO:__main__:19800/22833 KEGG KO inserted so far...\n", + "INFO:__main__:19900/22833 KEGG KO inserted so far...\n", + "INFO:__main__:20000/22833 KEGG KO inserted so far...\n", + "INFO:__main__:20100/22833 KEGG KO inserted so far...\n", + "INFO:__main__:20200/22833 KEGG KO inserted so far...\n", + "INFO:__main__:20300/22833 KEGG KO inserted so far...\n", + "INFO:__main__:20400/22833 KEGG KO inserted so far...\n", + "INFO:__main__:20500/22833 KEGG KO inserted so far...\n", + "INFO:__main__:20600/22833 KEGG KO inserted so far...\n", + "INFO:__main__:20700/22833 KEGG KO inserted so far...\n", + "INFO:__main__:20800/22833 KEGG KO inserted so far...\n", + "INFO:__main__:20900/22833 KEGG KO inserted so far...\n", + "INFO:__main__:21000/22833 KEGG KO inserted so far...\n", + "INFO:__main__:21100/22833 KEGG KO inserted so far...\n", + "INFO:__main__:21200/22833 KEGG KO inserted so far...\n", + "INFO:__main__:21300/22833 KEGG KO inserted so far...\n", + "INFO:__main__:21400/22833 KEGG KO inserted so far...\n", + "INFO:__main__:21500/22833 KEGG KO inserted so far...\n", + "INFO:__main__:21600/22833 KEGG KO inserted so far...\n", + "INFO:__main__:21700/22833 KEGG KO inserted so far...\n", + "INFO:__main__:21800/22833 KEGG KO inserted so far...\n", + "INFO:__main__:21900/22833 KEGG KO inserted so far...\n", + "INFO:__main__:22000/22833 KEGG KO inserted so far...\n", + "INFO:__main__:22100/22833 KEGG KO inserted so far...\n", + "INFO:__main__:22200/22833 KEGG KO inserted so far...\n", + "INFO:__main__:22300/22833 KEGG KO inserted so far...\n", + "INFO:__main__:22400/22833 KEGG KO inserted so far...\n", + "INFO:__main__:22500/22833 KEGG KO inserted so far...\n", + "INFO:__main__:22600/22833 KEGG KO inserted so far...\n", + "INFO:__main__:22700/22833 KEGG KO inserted so far...\n", + "INFO:__main__:22800/22833 KEGG KO inserted so far...\n", + "INFO:__main__:[DONE] 22833/22833 KEGG KO inserted.\n" + ] + } + ], + "source": [ + "inserted_kegg = 0\n", + "total_kegg = len(all_ko.text.splitlines())\n", + "for line in all_ko.text.splitlines():\n", + " try:\n", + " create_kegg_ko(line)\n", + " inserted_kegg += 1\n", + " except ValidationError as e:\n", + " _LOGGER.warning(f\"{e.__dict__} for function_id: {obj_kegg.function_id}. Insertion skipped.\")\n", + " if inserted_kegg % 100 == 0:\n", + " _LOGGER.info(f\"{inserted_kegg}/{total_kegg} KEGG KO inserted so far...\")\n", + "_LOGGER.info(f\"[DONE] {inserted_kegg}/{total_kegg} KEGG KO inserted.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "ename": "DoesNotExist", + "evalue": "KeggOrthology matching query does not exist.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mDoesNotExist\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-8-9d02e2a9c6da>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtest\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mKeggOrthology\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mobjects\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunction_id\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"K2348713123\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/Pasteur/venv/metagenedb/lib/python3.6/site-packages/django/db/models/manager.py\u001b[0m in \u001b[0;36mmanager_method\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 80\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mcreate_method\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 81\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mmanager_method\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 82\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_queryset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 83\u001b[0m \u001b[0mmanager_method\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 84\u001b[0m \u001b[0mmanager_method\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__doc__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__doc__\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Pasteur/venv/metagenedb/lib/python3.6/site-packages/django/db/models/query.py\u001b[0m in \u001b[0;36mget\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 406\u001b[0m raise self.model.DoesNotExist(\n\u001b[1;32m 407\u001b[0m \u001b[0;34m\"%s matching query does not exist.\"\u001b[0m \u001b[0;34m%\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 408\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_meta\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mobject_name\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 409\u001b[0m )\n\u001b[1;32m 410\u001b[0m raise self.model.MultipleObjectsReturned(\n", + "\u001b[0;31mDoesNotExist\u001b[0m: KeggOrthology matching query does not exist." + ] + } + ], + "source": [ + "test = KeggOrthology.objects.get(function_id=\"K2348713123\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "test.full_clean()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "test2 = KeggOrthology(function_id=\"K212123487\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "ename": "ValidationError", + "evalue": "{'name': ['This field cannot be blank.'], 'long_name': ['This field cannot be blank.'], 'function_id': ['Function with this Function id already exists.']}", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValidationError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-6-70bd25b4d8bf>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtest2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfull_clean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/Pasteur/venv/metagenedb/lib/python3.6/site-packages/django/db/models/base.py\u001b[0m in \u001b[0;36mfull_clean\u001b[0;34m(self, exclude, validate_unique)\u001b[0m\n\u001b[1;32m 1201\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1202\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1203\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValidationError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1204\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1205\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mclean_fields\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexclude\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValidationError\u001b[0m: {'name': ['This field cannot be blank.'], 'long_name': ['This field cannot be blank.'], 'function_id': ['Function with this Function id already exists.']}" + ] + } + ], + "source": [ + "test2.full_clean()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(45668,\n", + " {'catalog.Gene_functions': 0,\n", + " 'catalog.KeggOrthology': 22834,\n", + " 'catalog.Function': 22834})" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "KeggOrthology.objects.all().delete()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "ename": "MultipleObjectsReturned", + "evalue": "get() returned more than one KeggOrthology -- it returned 2!", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mMultipleObjectsReturned\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-4-8c1de85f296c>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mKeggOrthology\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mobjects\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunction_id\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"K00001\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/Pasteur/venv/metagenedb/lib/python3.6/site-packages/django/db/models/manager.py\u001b[0m in \u001b[0;36mmanager_method\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 80\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mcreate_method\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 81\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mmanager_method\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 82\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_queryset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 83\u001b[0m \u001b[0mmanager_method\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 84\u001b[0m \u001b[0mmanager_method\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__doc__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__doc__\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Pasteur/venv/metagenedb/lib/python3.6/site-packages/django/db/models/query.py\u001b[0m in \u001b[0;36mget\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 410\u001b[0m raise self.model.MultipleObjectsReturned(\n\u001b[1;32m 411\u001b[0m \u001b[0;34m\"get() returned more than one %s -- it returned %s!\"\u001b[0m \u001b[0;34m%\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 412\u001b[0;31m \u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_meta\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mobject_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnum\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 413\u001b[0m )\n\u001b[1;32m 414\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mMultipleObjectsReturned\u001b[0m: get() returned more than one KeggOrthology -- it returned 2!" + ] + } + ], + "source": [ + "KeggOrthology.objects.get(function_id=\"K00001\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1000" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Gene.objects.all().count()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Django Shell-Plus", + "language": "python", + "name": "django_extensions" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}