diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index e90e2dbc036cfcfa2c11b04ce42a0c378d96c724..e9bc60d76507eeb81b90bcd6833a3677f5cdd842 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -16,26 +16,34 @@ test-centos7: - python3.6 -m pip install virtualenv - virtualenv -p python3.6 venv - . venv/bin/activate - - pip3.6 install -r requirements-working.txt + - yum install -y git + - pip3.6 install -r requirements.txt - yum install -y httpd httpd-devel gcc - pip3.6 install mod_wsgi - python3.6 setup.py test -test-python36: - image: python:3.6 +.test-python: stage: test script: - apt-get update && apt install -y libblas-dev liblapack-dev python3-dev - - pip install -r requirements-working.txt + - pip install -r requirements.txt - python setup.py test +test-python36: + extends: ".test-python" + image: python:3.6 + test-python37: + extends: ".test-python" image: python:3.7 - stage: test - script: - - apt-get update && apt install -y libblas-dev liblapack-dev python3-dev - - pip install -r requirements-working.txt - - python setup.py test + +test-python38: + extends: ".test-python" + image: python:3.8 + +test-python39: + extends: ".test-python" + image: python:3.9 pages: image: python:3.6 diff --git a/doc/source/conf.py b/doc/source/conf.py index 76161be99de1ec4682ec01eb4601236b051e6b94..b14f546a2953d8a21ae25bf41d9eb7e53067520a 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -1,197 +1,197 @@ -# -*- coding: utf-8 -*- -# -# Configuration file for the Sphinx documentation builder. -# -# This file does only contain a selection of the most common options. For a -# full list see the documentation: -# http://www.sphinx-doc.org/en/stable/config - -# -- Path setup -------------------------------------------------------------- - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -import os -import sys -sys.path.insert(0, os.path.abspath('../..')) - -# -- Project information ----------------------------------------------------- - -project = 'JASS' -copyright = '2018, Hugues Aschard, Vi' -author = 'Hugues Aschard, Vi' - -# The short X.Y version -version = '' -# The full version, including alpha/beta/rc tags -release = '' - - -# -- General configuration --------------------------------------------------- - -# If your documentation needs a minimal Sphinx version, state it here. -# -# needs_sphinx = '1.0' - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.coverage', - 'sphinx.ext.viewcode', - 'sphinx.ext.autosummary', - # The Napoleon extension allows for nicer argument formatting. - 'sphinx.ext.napoleon', - 'sphinxarg.ext', - 'sphinxcontrib.bibtex', -] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] - -# The suffix(es) of source filenames. -# You can specify multiple suffix as a list of string: -# -# source_suffix = ['.rst', '.md'] -source_suffix = '.rst' - -# The master toctree document. -master_doc = 'index' - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# -# This is also used if you do content translation via gettext catalogs. -# Usually you set "language" from the command line for these cases. -language = 'python' - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This pattern also affects html_static_path and html_extra_path . -exclude_patterns = [] - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' - - -# -- Options for HTML output ------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# -html_theme = 'sphinx_rtd_theme' - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -# -# html_theme_options = {} - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] - -# Custom sidebar templates, must be a dictionary that maps document names -# to template names. -# -# The default sidebars (for documents that don't match any pattern) are -# defined by theme itself. Builtin themes are using these templates by -# default: ``['localtoc.html', 'relations.html', 'sourcelink.html', -# 'searchbox.html']``. -# -# html_sidebars = {} -html_sidebars = { '**': ['globaltoc.html', 'relations.html', 'sourcelink.html', 'searchbox.html'] } - - -# -- Options for HTMLHelp output --------------------------------------------- - -# Output file base name for HTML help builder. -htmlhelp_basename = 'JASSdoc' - - -# -- Options for LaTeX output ------------------------------------------------ - -latex_elements = { - # The paper size ('letterpaper' or 'a4paper'). - # - # 'papersize': 'letterpaper', - - # The font size ('10pt', '11pt' or '12pt'). - # - # 'pointsize': '10pt', - - # Additional stuff for the LaTeX preamble. - # - # 'preamble': '', - - # Latex figure (float) alignment - # - # 'figure_align': 'htbp', -} - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, -# author, documentclass [howto, manual, or own class]). -latex_documents = [ - (master_doc, 'JASS.tex', 'JASS Documentation', - 'Hugues Aschard, Vi', 'manual'), -] - - -# -- Options for manual page output ------------------------------------------ - -# One entry per manual page. List of tuples -# (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'jass', 'JASS Documentation', - [author], 1) -] - - -# -- Options for Texinfo output ---------------------------------------------- - -# Grouping the document tree into Texinfo files. List of tuples -# (source start file, target name, title, author, -# dir menu entry, description, category) -texinfo_documents = [ - (master_doc, 'JASS', 'JASS Documentation', - author, 'JASS', 'One line description of project.', - 'Miscellaneous'), -] - - -# -- Extension configuration -------------------------------------------------i - -autoclass_content = "both" # include both class docstring and __init__ -autodoc_default_flags = [ - # Make sure that any autodoc declarations show the right members - "members", - "inherited-members", - "private-members", - "show-inheritance", -] -autosummary_generate = True # Make _autosummary files and include them -napoleon_numpy_docstring = False # Force consistency, leave only Google -napoleon_use_rtype = False # More legible - -# The suffix of source filenames. -autosummary_generate = True - -exclude_patterns = ['_build'] - -# Edit on gitlab link -html_context = { - "display_gitlab": True, # Integrate Gitlab - "gitlab_user": "statistical-genetics", # Username - "gitlab_host": "gitlab.pasteur.fr", - "gitlab_repo": "jass", # Repo name - "gitlab_version": "master", # Version - "conf_py_path": "/doc/source/", # Path in the checkout to the docs root -} - -bibtex_bibfiles = [ - 'refs.bib', -] +# -*- coding: utf-8 -*- +# +# Configuration file for the Sphinx documentation builder. +# +# This file does only contain a selection of the most common options. For a +# full list see the documentation: +# http://www.sphinx-doc.org/en/stable/config + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import os +import sys +sys.path.insert(0, os.path.abspath('../..')) + +# -- Project information ----------------------------------------------------- + +project = 'JASS' +copyright = '2018, Hugues Aschard, Vi' +author = 'Hugues Aschard, Vi' + +# The short X.Y version +version = '' +# The full version, including alpha/beta/rc tags +release = '' + + +# -- General configuration --------------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +# +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.coverage', + 'sphinx.ext.viewcode', + 'sphinx.ext.autosummary', + # The Napoleon extension allows for nicer argument formatting. + 'sphinx.ext.napoleon', + 'sphinxarg.ext', + 'sphinxcontrib.bibtex', +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +# source_suffix = ['.rst', '.md'] +source_suffix = '.rst' + +# The master toctree document. +master_doc = 'index' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = 'python' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path . +exclude_patterns = [] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'sphinx_rtd_theme' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# +# html_theme_options = {} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# Custom sidebar templates, must be a dictionary that maps document names +# to template names. +# +# The default sidebars (for documents that don't match any pattern) are +# defined by theme itself. Builtin themes are using these templates by +# default: ``['localtoc.html', 'relations.html', 'sourcelink.html', +# 'searchbox.html']``. +# +# html_sidebars = {} +html_sidebars = { '**': ['globaltoc.html', 'relations.html', 'sourcelink.html', 'searchbox.html'] } + + +# -- Options for HTMLHelp output --------------------------------------------- + +# Output file base name for HTML help builder. +htmlhelp_basename = 'JASSdoc' + + +# -- Options for LaTeX output ------------------------------------------------ + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + # + # 'papersize': 'letterpaper', + + # The font size ('10pt', '11pt' or '12pt'). + # + # 'pointsize': '10pt', + + # Additional stuff for the LaTeX preamble. + # + # 'preamble': '', + + # Latex figure (float) alignment + # + # 'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + (master_doc, 'JASS.tex', 'JASS Documentation', + 'Hugues Aschard, Vi', 'manual'), +] + + +# -- Options for manual page output ------------------------------------------ + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + (master_doc, 'jass', 'JASS Documentation', + [author], 1) +] + + +# -- Options for Texinfo output ---------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + (master_doc, 'JASS', 'JASS Documentation', + author, 'JASS', 'One line description of project.', + 'Miscellaneous'), +] + + +# -- Extension configuration -------------------------------------------------i + +autoclass_content = "both" # include both class docstring and __init__ +autodoc_default_flags = [ + # Make sure that any autodoc declarations show the right members + "members", + "inherited-members", + "private-members", + "show-inheritance", +] +autosummary_generate = True # Make _autosummary files and include them +napoleon_numpy_docstring = False # Force consistency, leave only Google +napoleon_use_rtype = False # More legible + +# The suffix of source filenames. +autosummary_generate = True + +exclude_patterns = ['_build'] + +# Edit on gitlab link +html_context = { + "display_gitlab": True, # Integrate Gitlab + "gitlab_user": "statistical-genetics", # Username + "gitlab_host": "gitlab.pasteur.fr", + "gitlab_repo": "jass", # Repo name + "gitlab_version": "master", # Version + "conf_py_path": "/doc/source/", # Path in the checkout to the docs root +} + +bibtex_bibfiles = [ + 'reference.bib', +] diff --git a/doc/source/install.rst b/doc/source/install.rst index 3803021cad039bc09a447fd6deef0be9c2e60446..c712e739dde45a160ee83f827f4cc91bb2db1d5f 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -1,45 +1,145 @@ -Installation -============ - -You can use JASS locally either using the command line interface in a terminal, or by running a web server. Deployment in a public server is also later discussed in this document. - -Basic installation ------------------- - -You need **python3** to install and use JASS. JASS can be installed like any other python package, for instance with pip: - -.. code-block:: shell - - pip3 install git+https://gitlab.pasteur.fr/statistical-genetics/jass - -Installing it this way will automatically import and setup all of the dependencies required to run JASS. This is pretty much all you need to do to use JASS on the command line, or to run a local personal web server. To deploy JASS on a public web server, please refer to the "Public server deployment" section. - -Some python packages require additional non-python software that you might need to install, e.g. on Ubuntu, with: - -.. code-block:: shell - - sudo apt install libfreetype6-dev #(required by matplotlib) - sudo apt install libhdf5-dev #(required by tables) - sudo apt install rabbitmq-server #(required by celery) - -Run JASS as a web application (optional) ----------------------------------------- - -To run locally JASS as a web application, you need to launch two servers in two different processes, the `celery` task management server and the web server. The web server handles the HTTP requests, and sends all computation requests to the task management server. The command lines below show how to launch the two servers. Please note that you should of course not use this for any use beyond tests and personnal use, we provide further instructions below to deploy JASS on shared/public servers. - -.. code-block:: shell - - # launch celery to process tasks - celery -A jass worker - ## and in ANOTHER TERMINAL - # launch the web server - jass serve - -By default, the Jass server will listen to requests on the port 8080 of your machine. You can control the host and port that the JASS standalone webserver listens to through two environment variables, ``JASS_HOST`` and ``JASS_PORT``, that you just have to set before to launch the web server. - -Public server deployment (optional) ------------------------------------ - -Ansible playbooks are available in the `ansible folder of the source repository <https://gitlab.pasteur.fr/statistical-genetics/jass/tree/master/ansible>`_. These playbooks automate the initial configuration of the system (for a server using CentOS), and the initial installation or the update of the code from the repository. - -In this specific deployment, the JASS web application is hosted by an NGINX server, connected through the uWSGI library. It communicates with a celery service that handles the user-launched tasks. Many other deployment options are of course possible, use whichever suits your infrastructure! +Installation +============ + +You can use JASS locally either using the command line interface in a terminal, or by running a web server. Deployment in a public server is also later discussed in this document. + +Basic installation +------------------ + +You need **python3** to install and use JASS. As of April 2021, JASS runs on python 3.x with x < 9. + +.. warning:: + The Installation of JASS crashes for python 3.9 on Windows and on Linux : various packages (including tables) are not compatible with python 3.9. Moreover celery is also not compatible with python 3.9 . + +A procedure for the correct installation of JASS in Anaconda environment for development purpose is developed in detail below. + +**1. Create a specific directory and load the code on your laptop (only the first time)** + +For example, the directory $HOME/DEVELOP_JASS has been created. +In a TERMINAL window, type the following instructions: + +.. code-block:: shell + + cd $HOME/DEVELOP_JASS + git clone https://gitlab.pasteur.fr/statistical-genetics/jass + +**2. Change directory to the JASS main directory** + +.. code-block:: shell + + cd $HOME/DEVELOP_JASS/jass + +**3. Create a specific virtual ANACONDA environment for JASS:** + +.. note:: + + You have to check that you are in the **base** environment of Anaconda before creating the new virtual environment: + (for example, Dev_Jass_Pyt36 with python 3.6) + +.. code-block:: shell + + conda create --name Dev_Jass_Pyt36 python=3.6 + +**4. Activation of the new environment** + +.. code-block:: shell + + conda activate Dev_Jass_Pyt36 + +**5. Installing JASS dependencies** + +.. code-block:: shell + + pip install -r requirements.txt + +**6. Installing JASS for development purpose** + +.. code-block:: shell + + pip install -e . + +**7. Installation verification** + +.. code-block:: shell + + pip freeze + +Installing it this way will automatically import and setup all of the dependencies required to run JASS. + +This is pretty much all you need to do to use JASS on the command line, or to run a local personal web server. To deploy JASS on a public web server, please refer to the "Public server deployment" section. + +Additional software installation on Linux +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Some python packages require additional non-python software that you might need to install, e.g. on Ubuntu, with: + +.. code-block:: shell + + sudo apt install libfreetype6-dev #(required by matplotlib) + sudo apt install libhdf5-dev #(required by tables) + sudo apt install rabbitmq-server #(required by celery) + +Additional software installation on Windows +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. tip:: For Windows, an easy option is to install the free software package manager called chocolatey. + +In order to have a correct installation of RabbitMQ on windows, we recommend to install chocolatey by running the following command as Administrator in a powershell: + +.. code-block:: shell + + Set-ExecutionPolicy Bypass -Scope Process -Force; iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1')) + +Then, you can install RabbitMQ with the "choco install" command: + +.. code-block:: shell + + Choco install rabbitmq + +Run JASS as a web application (optional) +---------------------------------------- + +To run locally JASS as a web application, you need to launch two servers in two different processes, the `celery` task management server and the web server. The web server handles the HTTP requests, and sends all computation requests to the task management server. + +launching the two servers on Linux +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The command lines below show how to launch the two servers. Please note that you should of course not use this for any use beyond tests and personnal use, we provide further instructions below to deploy JASS on shared/public servers. + +.. code-block:: shell + + # launch celery to process tasks + celery -A jass worker + ## and in ANOTHER TERMINAL + # launch the web server + jass serve + +By default, the Jass server will listen to requests on the port 8080 of your machine. You can control the host and port that the JASS standalone webserver listens to through two environment variables, ``JASS_HOST`` and ``JASS_PORT``, that you just have to set before to launch the web server. + +Launching the celery server on Windows +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In order to launch the celery server on windows, it is necessary to use the following command in a terminal: + +.. code-block:: shell + + celery -A jass worker --pool=solo + +.. warning:: + The command recommended for Linux crashes when it is used on windows due to incorrect recognition of the prefork option on windows by the billiard library. + The part “--pool=solo†is necessary on windows because this is the only option of celery that works on windows. + +Launching the web server on Windows +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The web server is launched in another terminal (the same as on Linux): + +.. code-block:: shell + + jass serve + +Public server deployment (optional) +----------------------------------- + +Ansible playbooks are available in the `ansible folder of the source repository <https://gitlab.pasteur.fr/statistical-genetics/jass/tree/master/ansible>`_. These playbooks automate the initial configuration of the system (for a server using CentOS), and the initial installation or the update of the code from the repository. + +In this specific deployment, the JASS web application is hosted by an NGINX server, connected through the uWSGI library. It communicates with a celery service that handles the user-launched tasks. Many other deployment options are of course possible, use whichever suits your infrastructure! diff --git a/jass/__main__.py b/jass/__main__.py index 1cc759fa2d04ef5b75b88e57b4dc26fbf21bf29e..1713af147cb1cd4ed3daecf11c25d2a6b4d66d92 100644 --- a/jass/__main__.py +++ b/jass/__main__.py @@ -1,283 +1,425 @@ -#!/usr/bin/env python3 -import os -import argparse - -from .server import get_jass_app -from .config import config - -from .models.phenotype import get_available_phenotypes -from .models.inittable import create_inittable_file -from .models.worktable import create_worktable_file -from .models.plots import create_global_plot, create_quadrant_plot - - -def serve(args): - app = get_jass_app() - app.run(host=config["HOST"], port=config["PORT"]) - - -def w_list_phenotypes(args): - phenotypes = get_available_phenotypes(args.init_table_path) - print([phenotype.id for phenotype in phenotypes]) - -def compute_worktable(args): - init_table_path = args.init_table_path - selected_phenotypes = args.phenotypes - worktable_path = args.worktable_path - remove_nan = not (args.keep_nans) - significance_treshold = float(args.significance_treshold) - post_filtering = bool(args.post_filtering) - custom_loadings=args.custom_loadings - - if args.omnibus: - strategy = 'jass.models.stats:omnibus_stat' - elif args.sumz: - strategy = 'jass.models.stats:sumz_stat' - elif args.fisher_test: - strategy = "jass.models.stats:fisher_test" - elif args.meta_analysis: - strategy = "jass.models.stats:meta_analysis" - else: - strategy = args.strategy - - create_worktable_file( - selected_phenotypes, init_table_path, worktable_path, - remove_nan, strategy, significance_treshold=significance_treshold, - post_filtering=post_filtering,chunk_size=int(args.chunk_size), - custom_loadings=custom_loadings - ) - -def w_create_worktable(args): - compute_worktable(args) - -def w_create_project_data(args): - compute_worktable(args) - worktable_path = args.worktable_path - manhattan_plot_path = args.manhattan_plot_path - quadrant_plot_path = args.quadrant_plot_path - create_global_plot(worktable_path, manhattan_plot_path) - create_quadrant_plot(worktable_path, quadrant_plot_path, significance_treshold = float(args.significance_treshold)) - - -def w_create_inittable(args): - input_data_path = args.input_data_path - init_covariance_path = args.init_covariance_path - regions_map_path = args.regions_map_path - description_file_path = args.description_file_path - init_table_path = args.init_table_path - create_inittable_file( - input_data_path, - regions_map_path, - description_file_path, - init_table_path, - init_covariance_path - ) - -def w_plot_manhattan(args): - worktable_path = args.worktable_path - plot_path = args.plot_path - create_global_plot(worktable_path, plot_path) - - -def w_plot_quadrant(args): - worktable_path = args.worktable_path - plot_path = args.plot_path - significance_treshold= float(args.significance_treshold) - create_quadrant_plot(worktable_path, plot_path, significance_treshold=significance_treshold) - - -def get_parser(): - parser = argparse.ArgumentParser(prog="jass") - subparsers = parser.add_subparsers(dest="action") - subparsers.required = True - - parser_serve = subparsers.add_parser("serve", help="run JASS web server") - parser_serve.set_defaults(func=serve) - - parser_list_phe = subparsers.add_parser( - "list-phenotypes", help="list phenotypes available in a data file" - ) - parser_list_phe.add_argument( - "--init-table-path", - default=os.path.join(config["DATA_DIR"], "initTable.hdf5"), - help="path to the initial data file, default is the configured path (JASS_DATA_DIR/initTable.hdf5)", - ) - parser_list_phe.set_defaults(func=w_list_phenotypes) - - parser_create_pd = subparsers.add_parser( - "create-project-data", - help="compute joint statistics and generate plots for a given set of phenotypes", - ) - parser_create_pd.add_argument( - "--init-table-path", - default=os.path.join(config["DATA_DIR"], "initTable.hdf5"), - help="path to the initial data file, default is the configured path (JASS_DATA_DIR/initTable.hdf5)", - ) - parser_create_pd.add_argument( - "--phenotypes", nargs="+", required=True, help="list of selected phenotypes" - ) - parser_create_pd.add_argument( - "--worktable-path", required=True, help="path to the worktable file to generate" - ) - parser_create_pd.add_argument("--keep-nans", action="store_true", default=False) - parser_create_pd.add_argument( - "--manhattan-plot-path", - required=True, - help="path to the genome-wide manhattan plot to generate", - ) - parser_create_pd.add_argument( - "--quadrant-plot-path", - required=True, - help="path to the quadrant plot to generate", - ) - parser_create_pd.add_argument( - "--significance-treshold", - default=5*10**-8, - help="The treshold at which a p-value is considered significant", - ) - parser_create_pd.add_argument( - "--post-filtering", - default=True, - help="If a filtering to remove outlier will be applied (in this case the result of SNPs considered aberant will not appear in the worktable)", - ) - - parser_create_pd.add_argument( - "--custom-loadings", - required=False, - help="path toward a csv file containing custom loading for sumZ tests", - ) - - parser_create_pd.add_argument( - "--chunk-size", - required=False, - default=50, - help="Number of region to load in memory at once", - ) - - strategies = parser_create_pd.add_mutually_exclusive_group() - strategies.add_argument("--omnibus", action="store_true", default=True) - strategies.add_argument("--sumz", action="store_true", default=False) - strategies.add_argument("--fisher_test", action="store_true", default=False) - strategies.add_argument("--meta_analysis", action="store_true", default=False) - strategies.add_argument("--strategy") - - parser_create_pd.set_defaults(func=w_create_project_data) - - parser_create_it = subparsers.add_parser( - "create-inittable", help="import data into an initial data file" - ) - parser_create_it.add_argument( - "--input-data-path", - required=True, - help="path to the GWAS data file (ImpG format) to import. the path must be specify between quotes", - ) - parser_create_it.add_argument( - "--init-covariance-path", - required=False, - help="path to the covariance file to import", - ) - parser_create_it.add_argument( - "--regions-map-path", - required=True, - help="path to the genome regions map (BED format) to import", - ) - parser_create_it.add_argument( - "--description-file-path", - required=True, - help="path to the GWAS studies metadata file", - ) - parser_create_it.add_argument( - "--init-table-path", - default=os.path.join(config["DATA_DIR"], "initTable.hdf5"), - help="path to the initial data file to produce, default is the configured path (JASS_DATA_DIR/initTable.hdf5)", - ) - parser_create_it.set_defaults(func=w_create_inittable) - - parser_create_wt = subparsers.add_parser( - "create-worktable", - help="compute joint statistics for a given set of phenotypes", - ) - parser_create_wt.add_argument( - "--init-table-path", - default=os.path.join(config["DATA_DIR"], "initTable.hdf5"), - help="path to the initial data table, default is the configured path (JASS_DATA_DIR/initTable.hdf5)", - ) - parser_create_wt.add_argument( - "--phenotypes", nargs="+", required=True, help="list of selected phenotypes" - ) - parser_create_wt.add_argument( - "--worktable-path", required=True, help="path to the worktable file to generate" - ) - parser_create_wt.add_argument( - "--significance-treshold", - default=5*10**-8, - help="threshold at which a p-value is considered significant" - ) - parser_create_wt.add_argument( - "--post-filtering", - default=True, - help="If a filtering to remove outlier will be applied (in this case the result of SNPs considered aberant will not appear in the worktable)" - ) - - parser_create_wt.add_argument( - "--custom-loadings", - required=False, - help="path toward a csv file containing custom loading for sumZ tests", - ) - - parser_create_wt.add_argument( - "--chunk-size", - required=False, - default=50, - help="Number of region to load in memory at once", - ) - parser_create_wt.add_argument("--keep-nans", action="store_true", default=False) - strategies = parser_create_wt.add_mutually_exclusive_group() - strategies.add_argument("--omnibus", action="store_true", default=True) - strategies.add_argument("--sumz", action="store_true", default=False) - strategies.add_argument("--fisher_test", action="store_true", default=False) - strategies.add_argument("--meta_analysis", action="store_true", default=False) - strategies.add_argument("--strategy") - parser_create_wt.set_defaults(func=w_create_worktable) - - parser_create_mp = subparsers.add_parser( - "plot-manhattan", - help="generate genome-wide manhattan plot for a given set of phenotypes", - ) - parser_create_mp.add_argument( - "--worktable-path", - required=True, - help="path to the worktable file containing the data", - ) - parser_create_mp.add_argument( - "--plot-path", required=True, help="path to the manhattan plot file to generate" - ) - parser_create_mp.set_defaults(func=w_plot_manhattan) - - parser_create_mp = subparsers.add_parser( - "plot-quadrant", help="generate a quadrant plot for a given set of phenotypes" - ) - parser_create_mp.add_argument( - "--worktable-path", - required=True, - help="path to the worktable file containing the data", - ) - parser_create_mp.add_argument( - "--plot-path", required=True, help="path to the quadrant plot file to generate" - ) - parser_create_mp.add_argument( - "--significance-treshold", - default=5*10**-8, - help="threshold at which a p-value is considered significant" - ) - parser_create_mp.set_defaults(func=w_plot_quadrant) - return parser - -def main(): - parser = get_parser() - args = parser.parse_args() - args.func(args) - - -if __name__ == "__main__": - main() +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import os +import argparse + +from jass.server import get_jass_app +from jass.config import config + +from jass.models.phenotype import get_available_phenotypes +from jass.models.inittable import create_inittable_file +from jass.models.worktable import create_worktable_file +from jass.models.plots import create_global_plot, create_quadrant_plot + +def absolute_path_of_the_file(fileName, output_file = False): + """ + Builds the absolute path of the file : fileName + This makes the execution of JASS functions more robust and flexible + """ + + # Build an absolute path if possible + absoluteFilePath = os.path.abspath(fileName) + + # Test if the file name is a pattern + is_a_pattern = (os.path.basename(fileName).find("*") > 0) + + if (is_a_pattern or output_file) : + # Test if the directory path exist + Directory_path_exist = os.path.exists(os.path.dirname(absoluteFilePath)) + + if (Directory_path_exist == False): + # Test the path using the Jass data directory + absoluteFilePath = os.path.normpath(os.path.join(config["DATA_DIR"], fileName)) + + Directory_path_exist = os.path.exists(os.path.dirname(absoluteFilePath)) + + if (Directory_path_exist == False): + Message = "The directory of the file {} does not exist".format(fileName) + raise NameError(Message) + else: + # Test if the file path exist + File_path_exist = os.path.exists(absoluteFilePath) + + if (File_path_exist == False): + # Test the path using the Jass data directory + absoluteFilePath = os.path.normpath(os.path.join(config["DATA_DIR"], fileName)) + File_path_exist = os.path.exists(absoluteFilePath) + + if (File_path_exist == False): + Message = "The file {} does not exist".format(fileName) + raise NameError(Message) + + # Test if it is realy a file + Is_a_file = os.path.isfile(absoluteFilePath) + + if (not Is_a_file) : + Message = "{} is not a file".format(fileName) + raise NameError(Message) + + return absoluteFilePath + + +def serve(args): + app = get_jass_app() + app.run(host=config["HOST"], port=config["PORT"]) + + +def w_list_phenotypes(args): + phenotypes = get_available_phenotypes(args.init_table_path) + print([phenotype.id for phenotype in phenotypes]) + + +def compute_worktable(args): + + csv_file_path = args.csv_file_path + if (csv_file_path is not None): + csv_file_path = absolute_path_of_the_file(csv_file_path, True) + init_table_path = absolute_path_of_the_file(args.init_table_path) + worktable_path = absolute_path_of_the_file(args.worktable_path, True) + selected_phenotypes = args.phenotypes + remove_nan = not (args.keep_nans) + significance_treshold = float(args.significance_treshold) + post_filtering = bool(args.post_filtering) + custom_loadings = args.custom_loadings + chromosome = args.chromosome_number + pos_Start = args.start_position + pos_End = args.end_position + + if args.omnibus: + strategy = 'jass.models.stats:omnibus_stat' + elif args.sumz: + strategy = 'jass.models.stats:sumz_stat' + elif args.fisher_test: + strategy = "jass.models.stats:fisher_test" + elif args.meta_analysis: + strategy = "jass.models.stats:meta_analysis" + else: + strategy = args.strategy + + create_worktable_file( + phenotype_ids = selected_phenotypes, + init_file_path = init_table_path, + project_hdf_path = worktable_path, + remove_nan = remove_nan, + stat = strategy, + optim_na = True, + csv_file = csv_file_path, + chunk_size = int(args.chunk_size), + significance_treshold = significance_treshold, + post_filtering = post_filtering, + delayed_gen_csv_file = False, + chromosome = chromosome, + pos_Start = pos_Start, + pos_End = pos_End, + custom_loadings = custom_loadings + ) + +def w_create_worktable(args): + compute_worktable(args) + +def w_create_project_data(args): + compute_worktable(args) + worktable_path = absolute_path_of_the_file(args.worktable_path, True) + manhattan_plot_path = absolute_path_of_the_file(args.manhattan_plot_path, True) + quadrant_plot_path = absolute_path_of_the_file(args.quadrant_plot_path, True) + create_global_plot(worktable_path, manhattan_plot_path) + create_quadrant_plot(worktable_path, + quadrant_plot_path, + significance_treshold = float(args.significance_treshold)) + + +def w_create_inittable(args): + input_data_path = absolute_path_of_the_file(args.input_data_path) + init_covariance_path = absolute_path_of_the_file(args.init_covariance_path) + regions_map_path = absolute_path_of_the_file(args.regions_map_path) + description_file_path = absolute_path_of_the_file(args.description_file_path) + init_table_path = absolute_path_of_the_file(args.init_table_path, True) + + create_inittable_file( + input_data_path, + regions_map_path, + description_file_path, + init_table_path, + init_covariance_path + ) + +def w_plot_manhattan(args): + worktable_path = absolute_path_of_the_file(args.worktable_path) + plot_path = absolute_path_of_the_file(args.plot_path) + create_global_plot(worktable_path, plot_path) + + +def w_plot_quadrant(args): + worktable_path = absolute_path_of_the_file(args.worktable_path) + plot_path = absolute_path_of_the_file(args.plot_path) + significance_treshold = float(args.significance_treshold) + create_quadrant_plot(worktable_path, + plot_path, + significance_treshold=significance_treshold) + + +def get_parser(): + parser = argparse.ArgumentParser(prog="jass") + subparsers = parser.add_subparsers(dest="action") + subparsers.required = True + + parser_serve = subparsers.add_parser("serve", help="run JASS web server") + parser_serve.set_defaults(func=serve) + + # ------- list-phenotypes ------- + + parser_list_phe = subparsers.add_parser( + "list-phenotypes", help="list phenotypes available in a data file" + ) + parser_list_phe.add_argument( + "--init-table-path", + default=os.path.join(config["DATA_DIR"], "initTable.hdf5"), + help="path to the initial data file, default is the configured path (JASS_DATA_DIR/initTable.hdf5)", + ) + parser_list_phe.set_defaults(func=w_list_phenotypes) + + # ------- create-project-data ------- + + parser_create_pd = subparsers.add_parser( + "create-project-data", + help="compute joint statistics and generate plots for a given set of phenotypes", + ) + parser_create_pd.add_argument( + "--init-table-path", + default=os.path.join(config["DATA_DIR"], "initTable.hdf5"), + help="path to the initial data file, default is the configured path (JASS_DATA_DIR/initTable.hdf5)", + ) + parser_create_pd.add_argument( + "--phenotypes", nargs="+", required=True, help="list of selected phenotypes" + ) + parser_create_pd.add_argument( + "--worktable-path", required=True, help="path to the worktable file to generate" + ) + parser_create_pd.add_argument("--keep-nans", action="store_true", default=False) + parser_create_pd.add_argument( + "--manhattan-plot-path", + required=True, + help="path to the genome-wide manhattan plot to generate", + ) + parser_create_pd.add_argument( + "--quadrant-plot-path", + required=True, + help="path to the quadrant plot to generate", + ) + parser_create_pd.add_argument( + "--significance-treshold", + default=5*10**-8, + help="The treshold at which a p-value is considered significant", + ) + parser_create_pd.add_argument( + "--post-filtering", + default=True, + help="If a filtering to remove outlier will be applied (in this case the result of SNPs considered aberant will not appear in the worktable)", + ) + + parser_create_pd.add_argument( + "--custom-loadings", + required=False, + help="path toward a csv file containing custom loading for sumZ tests", + ) + + parser_create_pd.add_argument( + "--chunk-size", + required=False, + default=50, + help="Number of region to load in memory at once", + ) + + parser_create_pd.add_argument( + "--csv-file-path", + required=False, + help="path to the results file in csv format" + ) + + parser_create_pd.add_argument( + "--chromosome-number", + required=False, + help="option used only for local analysis: chromosome number studied" + ) + + parser_create_pd.add_argument( + "--start-position", + required=False, + help="option used only for local analysis: start position of the region studied" + ) + + parser_create_pd.add_argument( + "--end-position", + required=False, + help="option used only for local analysis: end position of the region studied" + ) + + strategies = parser_create_pd.add_mutually_exclusive_group() + strategies.add_argument("--omnibus", action="store_true", default=True) + strategies.add_argument("--sumz", action="store_true", default=False) + strategies.add_argument("--fisher_test", action="store_true", default=False) + strategies.add_argument("--meta_analysis", action="store_true", default=False) + strategies.add_argument("--strategy") + + parser_create_pd.set_defaults(func=w_create_project_data) + + # ------- create-inittable ------- + + parser_create_it = subparsers.add_parser( + "create-inittable", help="import data into an initial data file" + ) + parser_create_it.add_argument( + "--input-data-path", + required=True, + help="path to the GWAS data file (ImpG format) to import. the path must be specify between quotes", + ) + parser_create_it.add_argument( + "--init-covariance-path", + required=False, + help="path to the covariance file to import", + ) + parser_create_it.add_argument( + "--regions-map-path", + required=True, + help="path to the genome regions map (BED format) to import", + ) + parser_create_it.add_argument( + "--description-file-path", + required=True, + help="path to the GWAS studies metadata file", + ) + parser_create_it.add_argument( + "--init-table-path", + default=os.path.join(config["DATA_DIR"], "initTable.hdf5"), + help="path to the initial data file to produce, default is the configured path (JASS_DATA_DIR/initTable.hdf5)", + ) + parser_create_it.set_defaults(func=w_create_inittable) + + # ------- create-worktable ------- + + parser_create_wt = subparsers.add_parser( + "create-worktable", + help="compute joint statistics for a given set of phenotypes", + ) + parser_create_wt.add_argument( + "--init-table-path", + default=os.path.join(config["DATA_DIR"], "initTable.hdf5"), + help="path to the initial data table, default is the configured path (JASS_DATA_DIR/initTable.hdf5)", + ) + parser_create_wt.add_argument( + "--phenotypes", nargs="+", required=True, help="list of selected phenotypes" + ) + parser_create_wt.add_argument( + "--worktable-path", required=True, help="path to the worktable file to generate" + ) + parser_create_wt.add_argument( + "--significance-treshold", + default=5*10**-8, + help="threshold at which a p-value is considered significant" + ) + parser_create_wt.add_argument( + "--post-filtering", + default=True, + help="If a filtering to remove outlier will be applied (in this case the result of SNPs considered aberant will not appear in the worktable)" + ) + + parser_create_wt.add_argument( + "--custom-loadings", + required=False, + help="path toward a csv file containing custom loading for sumZ tests", + ) + + parser_create_wt.add_argument( + "--csv-file-path", + required=False, + help="path to the results file in csv format" + ) + + parser_create_wt.add_argument( + "--chunk-size", + required=False, + default=50, + help="Number of region to load in memory at once", + ) + + parser_create_wt.add_argument("--keep-nans", action="store_true", default=False) + + parser_create_wt.add_argument( + "--chromosome-number", + required=False, + help="option used only for local analysis: chromosome number studied" + ) + + parser_create_wt.add_argument( + "--start-position", + required=False, + help="option used only for local analysis: start position of the region studied" + ) + + parser_create_wt.add_argument( + "--end-position", + required=False, + help="option used only for local analysis: end position of the region studied" + ) + + strategies = parser_create_wt.add_mutually_exclusive_group() + strategies.add_argument("--omnibus", action="store_true", default=True) + strategies.add_argument("--sumz", action="store_true", default=False) + strategies.add_argument("--fisher_test", action="store_true", default=False) + strategies.add_argument("--meta_analysis", action="store_true", default=False) + strategies.add_argument("--strategy") + + parser_create_wt.set_defaults(func=w_create_worktable) + + # ------- plot-manhattan ------- + + parser_create_mp = subparsers.add_parser( + "plot-manhattan", + help="generate genome-wide manhattan plot for a given set of phenotypes", + ) + parser_create_mp.add_argument( + "--worktable-path", + required=True, + help="path to the worktable file containing the data", + ) + parser_create_mp.add_argument( + "--plot-path", + required=True, + help="path to the manhattan plot file to generate" + ) + parser_create_mp.set_defaults(func=w_plot_manhattan) + + # ------- plot-quadrant ------- + + parser_create_mp = subparsers.add_parser( + "plot-quadrant", help="generate a quadrant plot for a given set of phenotypes" + ) + parser_create_mp.add_argument( + "--worktable-path", + required=True, + help="path to the worktable file containing the data", + ) + parser_create_mp.add_argument( + "--plot-path", + required=True, + help="path to the quadrant plot file to generate" + ) + parser_create_mp.add_argument( + "--significance-treshold", + default=5*10**-8, + help="threshold at which a p-value is considered significant" + ) + parser_create_mp.set_defaults(func=w_plot_quadrant) + + return parser + + +def main(): + parser = get_parser() + args = parser.parse_args() + args.func(args) + + +if __name__ == "__main__": + main() diff --git a/jass/controllers/default_controller.py b/jass/controllers/default_controller.py index af5997fbb22d9264ad3b15368b4fd3386ee36bc8..f447e63262de6c024fabcc61ff85c18c1d35abae 100644 --- a/jass/controllers/default_controller.py +++ b/jass/controllers/default_controller.py @@ -1,133 +1,172 @@ -from typing import List, Dict -import os - -from six import iteritems -from flask import send_file, abort -import connexion - -from jass.models.phenotype import Phenotype, get_available_phenotypes -from jass.models.project import Project, create_project -from jass.config import config - -PHENOTYPES = get_available_phenotypes( - os.path.join(config["DATA_DIR"], "initTable.hdf5") -) # FIXME part of the config - - -def phenotypes_get(): - """ - phenotypes_get - Gets the list of available phenotypes - - :rtype: List[Phenotype] - """ - return PHENOTYPES - - -def projects_post(phenotypeID): - """ - projects_post - Create a new project from a selection of phenotypes - :param phenotypeID: IDs of the phenotypes selected for the project - :type phenotypeID: List[str] - - :rtype: str - """ - return create_project(phenotypeID, PHENOTYPES) - - -def projects_project_id_summary_statistics(projectID): - """ - projects_project_id_summary_statistics - Retrieve project summary statistics - """ - return Project(id=projectID).get_project_summary_statistics() - - -def projects_project_id_genome_get(projectID, threshold=None): - """ - projects_project_id_genome_get - Retrieve genome data for a given project - :param projectID: project ID - :type projectID: str - - :rtype: str - """ - return Project(id=projectID).get_project_genomedata() - - -def projects_project_id_global_manhattan_plot_get(projectID): - try: - return send_file( - Project(id=projectID).get_global_manhattan_plot_path(), mimetype="image/png" - ) - except FileNotFoundError: - status = Project(id=projectID).status - if status == Project.DOES_NOT_EXIST: - abort(404) - elif status["global_manhattan"] == Project.CREATING: - return ( - "Plot is not ready yet", - 202, - {"Content-Type": "text/plain; charset=utf-8"}, - ) - else: - abort(500) - - -def projects_project_id_quadrant_plot_get(projectID): - try: - return send_file( - Project(id=projectID).get_quadrant_plot_path(), mimetype="image/png" - ) - except FileNotFoundError: - status = Project(id=projectID).status - if status == Project.DOES_NOT_EXIST: - abort(404) - elif status["quadrant_plot_status"] == Project.CREATING: - return ( - "Plot is not ready yet", - 202, - {"Content-Type": "text/plain; charset=utf-8"}, - ) - else: - abort(500) - - -def projects_project_id_genome_full_get(projectID): - try: - return send_file( - Project(id=projectID).get_csv_path(), mimetype="text/csv" - ) - except FileNotFoundError: - status = Project(id=projectID).status - if status == Project.DOES_NOT_EXIST: - abort(404) - elif status["worktable"] == Project.CREATING: - return ( - "CSV is not ready yet", - 202, - {"Content-Type": "text/plain; charset=utf-8"}, - ) - else: - abort(500) - - -def projects_project_id_local_manhattan_data_get(projectID, chromosome, region): - return Project(id=projectID).get_project_local_manhattan_data(chromosome, region) - - -def projects_project_id_local_heatmap_data_get(projectID, chromosome, region): - return Project(id=projectID).get_project_local_heatmap_data(chromosome, region) - - -def projects_project_idget(projectID): - """ - projects_project_idget - Retrieve a project definition - :param projectID: project ID - :type projectID: str - - :rtype: Phenotype - """ - return Project(id=projectID) +# -*- coding: utf-8 -*- + +""" +default_controller ensures the connection between the web interface and the Python JASS-analysis module +""" + +from jass.config import config +from jass.models.project import Project, create_project +from jass.models.phenotype import Phenotype, get_available_phenotypes +import connexion +from flask import send_file, abort +from six import iteritems +import os +from typing import List, Dict + +PHENOTYPES = get_available_phenotypes( + os.path.join(config["DATA_DIR"], "initTable.hdf5") +) # FIXME part of the config + + +def phenotypes_get(): + """ + phenotypes_get + Gets the list of available phenotypes + + :rtype: List[Phenotype] + """ + return PHENOTYPES + + +def projects_post(phenotypeID): + """ + projects_post + Create a new project from a selection of phenotypes + :param phenotypeID: IDs of the phenotypes selected for the project + :type phenotypeID: List[str] + + :rtype: str + """ + return create_project(phenotypeID, PHENOTYPES) + + +def projects_project_id_csv_status_get(projectID): + """ + projects_project_id_csv_status_get + Retrieve the generation status of the genome full csv file + :param projectID: project ID + :type projectID: str + + :rtype: str + """ + return Project(id=projectID).get_csv_file_generation() + + +def projects_project_id_summary_statistics(projectID): + """ + projects_project_id_summary_statistics + Retrieve project summary statistics + """ + return Project(id=projectID).get_project_summary_statistics() + + +def projects_project_id_genome_get(projectID, threshold=None): + """ + projects_project_id_genome_get + Retrieve genome data for a given project + :param projectID: project ID + :type projectID: str + + :rtype: str + """ + return Project(id=projectID).get_project_genomedata() + + +def projects_project_id_global_manhattan_plot_get(projectID): + """ + projects_project_id_global_manhattan_plot_get + Gets the global Manhattan plot stored in the Project folder to display it on the Web interface + """ + try: + return send_file( + Project(id=projectID).get_global_manhattan_plot_path(), mimetype="image/png" + ) + except FileNotFoundError: + status = Project(id=projectID).status + if status == Project.DOES_NOT_EXIST: + abort(404) + elif status["global_manhattan"] == Project.CREATING: + return ( + "Plot is not ready yet", + 202, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + else: + abort(500) + + +def projects_project_id_quadrant_plot_get(projectID): + """ + projects_project_id_quadrant_plot_get + Gets the quadrant plot stored in the Project folder to display it on the Web interface + """ + try: + return send_file( + Project(id=projectID).get_quadrant_plot_path(), mimetype="image/png" + ) + except FileNotFoundError: + status = Project(id=projectID).status + if status == Project.DOES_NOT_EXIST: + abort(404) + elif status["quadrant_plot_status"] == Project.CREATING: + return ( + "Plot is not ready yet", + 202, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + else: + abort(500) + + +def projects_project_id_genome_full_get(projectID): + """ + projects_project_id_genome_full_get + Downloads the file genome_full.csv stored in the Project folder + """ + try: + return send_file( + Project(id=projectID).get_csv_path(), + mimetype="text/csv", + as_attachment=True, + attachment_filename="genome_full.csv" + ) + except FileNotFoundError: + status = Project(id=projectID).status + if status == Project.DOES_NOT_EXIST: + abort(404) + elif status["worktable"] == Project.CREATING: + return ( + "CSV is not ready yet", + 202, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + else: + abort(500) + + +def projects_project_id_local_manhattan_data_get(projectID, chromosome, region): + """ + projects_project_id_local_manhattan_data_get + Return the SumStatTab dataframe of the Project for a given chromosome and region for the Manhattan plot + """ + return Project(id=projectID).get_project_local_manhattan_data(chromosome, region) + + +def projects_project_id_local_heatmap_data_get(projectID, chromosome, region): + """ + projects_project_id_local_heatmap_data_get + Return the SumStatTab dataframe of the Project for a given chromosome and region for the Heatmap plot + """ + return Project(id=projectID).get_project_local_heatmap_data(chromosome, region) + + +def projects_project_idget(projectID): + """ + projects_project_idget + Retrieve a project definition + :param projectID: project ID + :type projectID: str + + :rtype: Phenotype + """ + return Project(id=projectID) diff --git a/jass/models/plots.py b/jass/models/plots.py index 4e46050cb37ca981af7bb5586446f09edb664bc4..719f606351c87acf37585825bb30106e4a8aab03 100755 --- a/jass/models/plots.py +++ b/jass/models/plots.py @@ -1,33 +1,43 @@ # -*- coding: utf-8 -*- """ -Created on Tue Mar 28 09:57:33 2017 +This software allows to plot and store graphs which can be displayed on the web interface. -@author: vguillem -@author: hmenager -@author: hjulienn -@author: clasry +@author: vguillem, hmenager, hjulienn, clasry """ -from pandas import DataFrame, read_hdf -# create (or open) an hdf5 file and opens in append mode +import logging import numpy as np -import matplotlib +# Keep the following order: 1) importing matplotlib, (2) configuring it to use AGG, (3) importing matplotlib submodules +import matplotlib matplotlib.use("AGG") import matplotlib.pyplot as plt from matplotlib import colors import matplotlib.patches as mpatches +import os +from pandas import DataFrame, read_hdf + + def replaceZeroes(df): - ids = np.where((df !=0) & np.isfinite(df)) + """ + replaceZeroes + replace null values of df with the smallest non-zero value + """ + ids = np.where((df != 0) & np.isfinite(df)) min_nonzero = np.min(df.values[ids]) df.values[df.values == 0] = min_nonzero return df def create_global_plot(work_file_path: str, global_plot_path: str): + """ + create_global_plot + generate genome-wide manhattan plot for a given set of phenotypes + """ df = read_hdf(work_file_path, "SumStatTab") - df[['JASS_PVAL', 'UNIVARIATE_MIN_PVAL']] = replaceZeroes(df[['JASS_PVAL', 'UNIVARIATE_MIN_PVAL']]) + df[['JASS_PVAL', 'UNIVARIATE_MIN_PVAL']] = replaceZeroes( + df[['JASS_PVAL', 'UNIVARIATE_MIN_PVAL']]) df["-log10(Joint p-value)"] = -np.log10(df.JASS_PVAL) df["ind"] = range(len(df)) @@ -63,8 +73,9 @@ def create_global_plot(work_file_path: str, global_plot_path: str): "#bcbd22", "#17becf", "#1f77b4", - "#ff7f0e", + "#ff7f0e" ] + for num, (name, group) in enumerate(df_grouped): group = DataFrame(group) group["absolute_position"] = group["position"] + m @@ -99,21 +110,33 @@ def create_global_plot(work_file_path: str, global_plot_path: str): fig.savefig(global_plot_path, dpi=300) fig.clf() + # Update Jass_progress + progress_path = os.path.join(os.path.dirname( + work_file_path), "JASS_progress.txt") + JASS_progress = 99 + file_progress = open(progress_path, "w") + file_progress.write(str(JASS_progress)) + file_progress.close() + print("------ progress -----") def create_quadrant_plot(work_file_path: str, - quadrant_plot_path: str, significance_treshold=5*10**-8): + quadrant_plot_path: str, significance_treshold=5*10**-8): """ - Create a "quadrant" plot that represent the joint test pvalue versus the univariate test pvalue for the most significant SNPs by genomic region. The plot use a logarithmic scale. + create_quadrant_plot + Create a "quadrant" plot that represent the joint test pvalue versus the univariate \ + test pvalue for the most significant SNPs by genomic region. The plot use a logarithmic scale. :param work_file_path: path to the worktable :type work_file_path: str + :param quadrant_plot_path: path to the file where to store the plot results :type quadrant_plot_path: str """ df = read_hdf(work_file_path, "Regions") - df[['JASS_PVAL', 'UNIVARIATE_MIN_PVAL']] = replaceZeroes(df[['JASS_PVAL', 'UNIVARIATE_MIN_PVAL']]) + df[['JASS_PVAL', 'UNIVARIATE_MIN_PVAL']] = replaceZeroes( + df[['JASS_PVAL', 'UNIVARIATE_MIN_PVAL']]) #df["-log10(Joint p-value)"] = -np.log10(df.JASS_PVAL) @@ -126,26 +149,27 @@ def create_quadrant_plot(work_file_path: str, pv_t["color"] = "grey" # blue: significant pvalues for and univariate tests - cond = df.signif_status=="Both" + cond = df.signif_status == "Both" pv_t.loc[cond.values, "color"] = "#3ba3ec" b = cond.sum() # red: significant pvalues for test only - cond = df.signif_status=="Joint" + cond = df.signif_status == "Joint" pv_t.loc[cond.values, "color"] = "#f77189" r = cond.sum() # green: significant pvalues for univariate test only - cond = df.signif_status=="Univariate" + cond = df.signif_status == "Univariate" pv_t.loc[cond.values, "color"] = "#50b131" c = cond.sum() # grey: non significant pvalues - cond = df.signif_status=="None" + cond = df.signif_status == "None" a = cond.sum() fig, ax = plt.subplots(figsize=(10, 5)) plt.subplot(121) - plt.scatter(pv_t.UNIVARIATE_MIN_PVAL, pv_t.JASS_PVAL, c=pv_t.color.tolist(), alpha=0.6, s=10) + plt.scatter(pv_t.UNIVARIATE_MIN_PVAL, pv_t.JASS_PVAL, + c=pv_t.color.tolist(), alpha=0.6, s=10) plt.axis([0, pv_t.UNIVARIATE_MIN_PVAL.max(), 0, pv_t.JASS_PVAL.max()]) # axes abcisse et ordonnée à 8 @@ -168,13 +192,16 @@ def create_quadrant_plot(work_file_path: str, red_patch = mpatches.Patch( color='#f77189', label='{} Significant pvalues for joint test only'.format(r)) # grey: non significant pvalues - grey_patch = mpatches.Patch(color='grey', label='{} Non significant pvalues'.format(a)) + grey_patch = mpatches.Patch( + color='grey', label='{} Non significant pvalues'.format(a)) lgd = plt.legend(handles=[red_patch, blue_patch, green_patch, grey_patch], - bbox_to_anchor=(0.95, -0.25), loc='lower center', ncol=2, mode="expand", borderaxespad=0.) + bbox_to_anchor=(0.95, -0.25), loc='lower center', + ncol=2, mode="expand", borderaxespad=0.) plt.subplot(122) - plt.scatter(pv_t.UNIVARIATE_MIN_PVAL, pv_t.JASS_PVAL, c=pv_t.color.tolist(), alpha=0.6, s=10) + plt.scatter(pv_t.UNIVARIATE_MIN_PVAL, pv_t.JASS_PVAL, + c=pv_t.color.tolist(), alpha=0.6, s=10) # axes abcisse et ordonnee à 8 plt.axvline(treshold, color="grey", linestyle="--") plt.axhline(treshold, color="grey", linestyle="--") @@ -185,24 +212,35 @@ def create_quadrant_plot(work_file_path: str, plt.axis([0, alim, 0, alim]) # légendes abcisse et ordonnee plt.xlabel('-log10(P) for univariate tests', fontsize=12) - #plt.show() - plt.savefig(quadrant_plot_path, dpi=600, bbox_extra_artists=(lgd,), bbox_inches='tight') + # plt.show() + plt.savefig(quadrant_plot_path, dpi=600, + bbox_extra_artists=(lgd,), bbox_inches='tight') plt.clf() nb_omnibus = r nb_total = r + b + c + # Update Jass_progress + progress_path = os.path.join(os.path.dirname( + work_file_path), "JASS_progress.txt") + JASS_progress = 100 + file_progress = open(progress_path, "w") + file_progress.write(str(JASS_progress)) + file_progress.close() + print("------ progress -----") + return (nb_omnibus, nb_total) def create_qq_plot(work_file_path: str, qq_plot_path: str): df = read_hdf(work_file_path, "SumStatTab") - df[['JASS_PVAL', 'UNIVARIATE_MIN_PVAL']] = replaceZeroes(df[['JASS_PVAL', 'UNIVARIATE_MIN_PVAL']]) + df[['JASS_PVAL', 'UNIVARIATE_MIN_PVAL']] = replaceZeroes( + df[['JASS_PVAL', 'UNIVARIATE_MIN_PVAL']]) pvalue = -np.log10(df.JASS_PVAL) # Cast values between 0 and 1, 0 and 1 excluded x = -np.log10(np.arange(1, pvalue.shape[0] + 1) / (pvalue.shape[0] + 2)) - y = np.sort(pvalue[:, 0]) + y = pvalue.sort_values() plt.scatter(x[::-1], y, s=5) lambda_value = np.median(y) / np.median(x) x_1 = np.linspace(0, 6) diff --git a/jass/models/project.py b/jass/models/project.py index 775c40e457df2c5ff3344cb53b6d638526ade1a3..86d84fd67007687a84e205385ae732fdc8a05f81 100644 --- a/jass/models/project.py +++ b/jass/models/project.py @@ -1,247 +1,404 @@ -# coding: utf-8 - -from __future__ import absolute_import -from typing import List, Dict -import os, sys -import hashlib -import traceback - -from celery import Celery - -from .base_model_ import Model -from ..util import deserialize_model -from .phenotype import Phenotype -from .worktable import ( - create_worktable_file, - get_worktable_summary, - get_worktable_genomedata, - get_worktable_local_manhattan_data, - get_worktable_local_heatmap_data, -) -from .plots import create_global_plot, create_quadrant_plot -from jass.config import config - -app = Celery("tasks", broker="pyamqp://guest@localhost//") - - -class Project(Model): - - DOES_NOT_EXIST = "DOES_NOT_EXIST" - - CREATING = "CREATING" - - READY = "READY" - - ERROR = "ERROR" - - def __init__(self, id: str = None, phenotypes: List[Phenotype] = None): - """ - Project - a project (list of phenotypes) - - :param id: project ID. - :type id: str - """ - self.swagger_types = {"id": str, "status": str, "phenotypes": List[Phenotype]} - - self.attribute_map = { - "id": "id", - "status": "status", - "phenotypes": "phenotypes", - } - - self._id = id - self._phenotypes = phenotypes - if self._id is None: - self._id = self.get_id() - - @classmethod - def from_dict(cls, dikt) -> "Project": - """ - Returns the dict as a model - - :param dikt: A dict. - :type: dict - :return: The Project. - :rtype: Project - """ - return deserialize_model(dikt, cls) - - @property - def id(self) -> str: - """ - Gets the id of this Project. - - :return: The id of this Project. - :rtype: str - """ - return self._id - - @id.setter - def id(self, id: str): - """ - Lists the id of this Project. - - :param id: The id of this Project. - :type id: str - """ - - self._id = id - - @property - def phenotypes(self) -> List[Phenotype]: - """ - Gets the phenotypes list for this project. - - :return: The phenotypes. - :rtype: str - """ - return self._phenotypes - - @phenotypes.setter - def cohort(self, phenotypes: List[Phenotype]): - """ - Lists the phenotypes list for this project. - - :param phenotypes: The phenotypes. - :type phenotypes: str - """ - - self._phenotypes = phenotypes - - def get_folder_path(self): - return os.path.join(config["DATA_DIR"], "project_{}".format(self.id)) - - def get_worktable_path(self): - return os.path.join(self.get_folder_path(), "workTable.hdf5") - - def get_csv_path(self): - return os.path.join(self.get_folder_path(), "workTable.csv") - - def get_project_summary_statistics(self): - return get_worktable_summary(self.get_worktable_path()) - - def get_project_genomedata(self): - return get_worktable_genomedata(self.get_worktable_path()) - - def get_project_local_manhattan_data(self, chromosome: str, region: str): - return get_worktable_local_manhattan_data( - self.get_worktable_path(), chromosome, region - ) - - def get_project_local_heatmap_data(self, chromosome: str, region: str): - return get_worktable_local_heatmap_data( - self.get_worktable_path(), chromosome, region - ) - - def get_id(self): - m = hashlib.md5() - for phenotype_id in [phenotype.id for phenotype in self._phenotypes]: - m.update(str(phenotype_id).encode("utf-8")) - return m.hexdigest() - - def get_global_manhattan_plot_path(self): - return os.path.join(self.get_folder_path(), "Manhattan_Plot_Omnibus.png") - - def get_quadrant_plot_path(self): - return os.path.join(self.get_folder_path(), "Quadrant_Plot_Omnibus.png") - - @property - def status(self): - if not os.path.exists(self.get_folder_path()): - return Project.DOES_NOT_EXIST - else: - worktable_status = get_file_status(self.get_worktable_path()) - global_manhattan_status = get_file_status( - self.get_global_manhattan_plot_path() - ) - quadrant_plot_status = get_file_status(self.get_quadrant_plot_path()) - return { - # WARNING: project status is hacked so that everything is ready - # only once the final step has completed. - # This avoids the apparent "corrupted hdf5" file situation - # "worktable": worktable_status, - # "global_manhattan": global_manhattan_status, - "worktable": quadrant_plot_status, - "global_manhattan": quadrant_plot_status, - "quadrant_plot_status": quadrant_plot_status, - } - - -def get_file_building_tb_path(file_path): - return file_path + ".log" - - -def get_file_status(file_path): - if os.path.exists(file_path): - return Project.READY - elif os.path.exists(get_file_building_tb_path(file_path)): - return Project.ERROR - else: - return Project.CREATING - - -@app.task -def create_project_global_plot(worktable_path, plot_path): - try: - create_global_plot(worktable_path, plot_path) - except Exception as e: - exc_type, exc_value, exc_traceback = sys.exc_info() - log_path = get_file_building_tb_path(plot_path) - log_fh = open(log_path, "w") - traceback.print_exception(exc_type, exc_value, exc_traceback, file=log_fh) - log_fh.close() - - -@app.task -def create_project_quadrant_plot(worktable_path, plot_path): - try: - create_quadrant_plot(worktable_path, plot_path) - except Exception as e: - exc_type, exc_value, exc_traceback = sys.exc_info() - log_path = get_file_building_tb_path(plot_path) - log_fh = open(log_path, "w") - traceback.print_exception(exc_type, exc_value, exc_traceback, file=log_fh) - log_fh.close() - - -@app.task -def create_project_data( - phenotype_ids, init_table_path, worktable_path, global_plot_path, quadrant_plot_path, csv_file=None -): - try: - create_worktable_file(phenotype_ids, init_table_path, worktable_path, False, csv_file=csv_file) - except Exception as e: - exc_type, exc_value, exc_traceback = sys.exc_info() - log_path = get_file_building_tb_path(worktable_path) - log_fh = open(log_path, "w") - traceback.print_exception(exc_type, exc_value, exc_traceback, file=log_fh) - log_fh.close() - return - create_project_global_plot.delay(worktable_path, global_plot_path) - create_project_quadrant_plot.delay(worktable_path, quadrant_plot_path) - - -def create_project(phenotype_ids: List[str], available_phenotypes: List[Phenotype]): - available_phenotype_ids = [phenotype.id for phenotype in available_phenotypes] - unavailable_requested_ids = set(phenotype_ids).difference( - set(available_phenotype_ids) - ) - if len(unavailable_requested_ids) > 0: - raise Exception() # FIXME with a nice exception - phenotypes = [ - phenotype for phenotype in available_phenotypes if phenotype.id in phenotype_ids - ] - project = Project(phenotypes=phenotypes) - folder_path = project.get_folder_path() - # if project does not exist - if project.status == Project.DOES_NOT_EXIST: - os.makedirs(folder_path) - create_project_data.delay( - phenotype_ids, - os.path.join(config["DATA_DIR"], "initTable.hdf5"), - project.get_worktable_path(), - project.get_global_manhattan_plot_path(), - project.get_quadrant_plot_path(), - csv_file=project.get_csv_path() - ) - return project +# -*- coding: utf-8 -*- +""" +compute joint statistics and generate plots for a given set of phenotypes +""" +from __future__ import absolute_import +from typing import List, Dict +import os, sys +import shutil +import hashlib +import traceback + +from celery import Celery + +from jass.models.base_model_ import Model +from jass.util import deserialize_model +from jass.models.phenotype import Phenotype +from jass.models.worktable import ( + create_worktable_file, + get_worktable_summary, + get_worktable_genomedata, + get_worktable_local_manhattan_data, + get_worktable_local_heatmap_data, + create_genome_full_csv +) +from jass.models.plots import create_global_plot, create_quadrant_plot +from jass.config import config + +app = Celery("tasks", broker="pyamqp://guest@localhost//") + + +class Project(Model): + + DOES_NOT_EXIST = "DOES_NOT_EXIST" + + CREATING = "CREATING" + + READY = "READY" + + ERROR = "ERROR" + + def __init__(self, id: str = None, phenotypes: List[Phenotype] = None): + """ + Project - a project (list of phenotypes) + + :param id: project ID. + :type id: str + """ + self.swagger_types = {"id": str, + "status": str, + "phenotypes": List[Phenotype], + "progress": str} + + self.attribute_map = { + "id": "id", + "status": "status", + "phenotypes": "phenotypes", + "progress": "progress", + } + + self._id = id + self._phenotypes = phenotypes + if self._id is None: + self._id = self.get_id() + + @classmethod + def from_dict(cls, dikt) -> "Project": + """ + Returns the dict as a model + + :param dikt: A dict. + :type: dict + :return: The Project. + :rtype: Project + """ + return deserialize_model(dikt, cls) + + @property + def id(self) -> str: + """ + Gets the id of this Project. + + :return: The id of this Project. + :rtype: str + """ + return self._id + + @id.setter + def id(self, id: str): + """ + Lists the id of this Project. + + :param id: The id of this Project. + :type id: str + """ + + self._id = id + + @property + def phenotypes(self) -> List[Phenotype]: + """ + Gets the phenotypes list for this project. + + :return: The phenotypes. + :rtype: str + """ + return self._phenotypes + + @phenotypes.setter + def cohort(self, phenotypes: List[Phenotype]): + """ + Lists the phenotypes list for this project. + + :param phenotypes: The phenotypes. + :type phenotypes: str + """ + + self._phenotypes = phenotypes + + def get_folder_path(self): + """ + get_folder_path + Gets the path of the folder where the project data are stored + """ + return os.path.join(config["DATA_DIR"], "project_{}".format(self.id)) + + def get_worktable_path(self): + """ + get_worktable_path + Gets the path of the file workTable.hdf5 + """ + return os.path.join(self.get_folder_path(), "workTable.hdf5") + + def get_csv_path(self): + """ + get_csv_path + Gets the path of the file genome_full.csv + """ + return os.path.join(self.get_folder_path(), "workTable.csv") + + def get_progress_path(self): + """ + get_progress_path + Gets the path of the file containing the current progress percentage of \ + the analysis performed within the project + """ + return os.path.join(self.get_folder_path(), "JASS_progress.txt") + + def get_csv_lock_path(self): + """ + get_csv_lock_path + Gets the path of the lock set-on when the csv file is not available yet + """ + return os.path.join(self.get_folder_path(), "the_lock.txt") + + def get_project_summary_statistics(self): + return get_worktable_summary(self.get_worktable_path()) + + def get_project_genomedata(self): + return get_worktable_genomedata(self.get_worktable_path()) + + def get_project_local_manhattan_data(self, chromosome: str, region: str): + return get_worktable_local_manhattan_data( + self.get_worktable_path(), chromosome, region + ) + + def get_project_local_heatmap_data(self, chromosome: str, region: str): + return get_worktable_local_heatmap_data( + self.get_worktable_path(), chromosome, region + ) + + def get_id(self): + m = hashlib.md5() + for phenotype_id in [phenotype.id for phenotype in self._phenotypes]: + m.update(str(phenotype_id).encode("utf-8")) + return m.hexdigest() + + def get_global_manhattan_plot_path(self): + return os.path.join(self.get_folder_path(), "Manhattan_Plot_Omnibus.png") + + def get_quadrant_plot_path(self): + return os.path.join(self.get_folder_path(), "Quadrant_Plot_Omnibus.png") + + @property + def status(self): + """ + status + Gets the status of the project + """ + if not os.path.exists(self.get_folder_path()): + return Project.DOES_NOT_EXIST + else: + worktable_status = get_file_status(self.get_worktable_path()) + global_manhattan_status = get_file_status( + self.get_global_manhattan_plot_path() + ) + quadrant_plot_status = get_file_status(self.get_quadrant_plot_path()) + return { + # WARNING: project status is hacked so that everything is ready + # only once the final step has completed. + # This avoids the apparent "corrupted hdf5" file situation + # "worktable": worktable_status, + # "global_manhattan": global_manhattan_status, + "worktable": quadrant_plot_status, + "global_manhattan": quadrant_plot_status, + "quadrant_plot_status": quadrant_plot_status, + } + + @property + def progress(self): + """ + progress + Gets the percentage of completion of the phenotype analysis + """ + JASS_progress = 0 + progress_path = self.get_progress_path() + if os.path.exists(progress_path): + file_progress = open(progress_path, "r") + JASS_progress = file_progress.read() + file_progress.close() + return JASS_progress + + def get_csv_file_generation(self): + """ + csv_file_generation + Gets the status of the genome_full csv file generation + """ + the_lock_path = self.get_csv_lock_path() + csv_file = self.get_csv_path() + csv_file_status = Project.CREATING + if (not os.path.isfile(the_lock_path)): + if(os.path.isfile(csv_file)): + csv_file_status = Project.READY + else : + csv_file_status = Project.ERROR + print("csv_file_generation:csv_file_status={}".format(csv_file_status)) + return csv_file_status + +def get_file_building_tb_path(file_path): + return file_path + ".log" + + +def get_file_status(file_path): + if os.path.exists(file_path): + return Project.READY + elif os.path.exists(get_file_building_tb_path(file_path)): + return Project.ERROR + else: + return Project.CREATING + + +@app.task +def create_project_global_plot(worktable_path, plot_path): + try: + create_global_plot(worktable_path, plot_path) + except Exception as e: + exc_type, exc_value, exc_traceback = sys.exc_info() + log_path = get_file_building_tb_path(plot_path) + log_fh = open(log_path, "w") + traceback.print_exception(exc_type, exc_value, exc_traceback, file=log_fh) + log_fh.close() + + +@app.task +def create_project_quadrant_plot(worktable_path, plot_path): + try: + create_quadrant_plot(worktable_path, plot_path) + except Exception as e: + exc_type, exc_value, exc_traceback = sys.exc_info() + log_path = get_file_building_tb_path(plot_path) + log_fh = open(log_path, "w") + traceback.print_exception(exc_type, exc_value, exc_traceback, file=log_fh) + log_fh.close() + + +@app.task +def create_project_csv_file(worktable_path, csv_file, Nchunk): + try: + create_genome_full_csv(worktable_path, csv_file, Nchunk=Nchunk) + except Exception as e: + exc_type, exc_value, exc_traceback = sys.exc_info() + log_path = get_file_building_tb_path(plot_path) + log_fh = open(log_path, "w") + traceback.print_exception(exc_type, exc_value, exc_traceback, file=log_fh) + log_fh.close() + + +@app.task +def create_project_data( + phenotype_ids, + init_table_path, + worktable_path, + remove_nan = False, + stat = "jass.models.stats:omnibus_stat", + csv_file = None, + chunk_size = 50, + significance_treshold = 5*10**-8, + post_filtering = True, + delayed_gen_csv_file = False, + chromosome = None, + start = None, + end = None, + custom_loadings = None, + global_plot_path = None, + quadrant_plot_path = None + ): + + try: + Nchunk = create_worktable_file( + phenotype_ids = phenotype_ids, + init_file_path = init_table_path, + project_hdf_path = worktable_path, + remove_nan = remove_nan, + stat = stat, + optim_na = True, + csv_file = csv_file, + chunk_size = chunk_size, + significance_treshold = significance_treshold, + post_filtering = post_filtering, + delayed_gen_csv_file = delayed_gen_csv_file, + chromosome = chromosome, + pos_Start = start, + pos_End = end, + custom_loadings = custom_loadings + ) + except Exception as e: + exc_type, exc_value, exc_traceback = sys.exc_info() + log_path = get_file_building_tb_path(worktable_path) + log_fh = open(log_path, "w") + traceback.print_exception(exc_type, exc_value, exc_traceback, file=log_fh) + log_fh.close() + return + + if (global_plot_path is not None): + create_project_global_plot.delay(worktable_path, global_plot_path) + if (quadrant_plot_path is not None): + create_project_quadrant_plot.delay(worktable_path, quadrant_plot_path) + if (delayed_gen_csv_file and (csv_file is not None)): + create_project_csv_file.delay(worktable_path, csv_file, Nchunk=Nchunk) + + +def create_project(phenotype_ids: List[str], available_phenotypes: List[Phenotype]): + available_phenotype_ids = [phenotype.id for phenotype in available_phenotypes] + unavailable_requested_ids = set(phenotype_ids).difference( + set(available_phenotype_ids) + ) + if len(unavailable_requested_ids) > 0: + raise Exception() # FIXME with a nice exception + phenotypes = [ + phenotype for phenotype in available_phenotypes if phenotype.id in phenotype_ids + ] + project = Project(phenotypes=phenotypes) + folder_path = project.get_folder_path() + # if project does not exist + if project.status == Project.DOES_NOT_EXIST: + os.makedirs(folder_path) + create_project_data.delay( + phenotype_ids=phenotype_ids, + init_table_path=os.path.join(config["DATA_DIR"], "initTable.hdf5"), + worktable_path=project.get_worktable_path(), + global_plot_path=project.get_global_manhattan_plot_path(), + quadrant_plot_path=project.get_quadrant_plot_path(), + csv_file=project.get_csv_path(), + delayed_gen_csv_file=True + ) + return project + + +def create_project_local( + phenotype_ids: List[str], + available_phenotypes: List[Phenotype], + ip: str, + chromosome: str, + start: str = None, + end: str = None + ): + available_phenotype_ids = [phenotype.id for phenotype in available_phenotypes] + unavailable_requested_ids = set(phenotype_ids).difference( + set(available_phenotype_ids) + ) + if len(unavailable_requested_ids) > 0: + raise Exception() # FIXME with a nice exception + phenotypes = [ + phenotype for phenotype in available_phenotypes if phenotype.id in phenotype_ids + ] + ip = ip.replace('.', '_') + id_project = "local_{}".format(ip) + project = Project(phenotypes=phenotypes, id=id_project) + folder_path = project.get_folder_path() + # If the folder exists, it is deleted with the files it contains + if os.path.exists(folder_path): + shutil.rmtree(folder_path) + # The folder is created + os.makedirs(folder_path) + create_project_data.delay( + phenotype_ids=phenotype_ids, + init_table_path=os.path.join(config["DATA_DIR"], "initTable.hdf5"), + worktable_path=project.get_worktable_path(), + csv_file=project.get_csv_path(), + chromosome=chromosome, + start=start, + end=end + ) + return project + \ No newline at end of file diff --git a/jass/models/stats.py b/jass/models/stats.py old mode 100755 new mode 100644 index d224900d40c19833f5fede53312437c2919ee867..ff2649b5b1d8d6e5c0021c9de22f58cba6b3e0e1 --- a/jass/models/stats.py +++ b/jass/models/stats.py @@ -1,213 +1,248 @@ -# -*- coding: utf-8 -*- -import numpy as np -import scipy.stats as spst - - -def make_stat_computer_nopattern(cov, stat_func, **kwargs): - """ - Create the function that computes the joint statistics - if no NaN value is in z. - - :param cov: covariance matrix - :type cov: pandas.core.frame.DataFrame - :param stat_func: function that computes the joint statistics - :type stat_func: function - :return: the function used to compute joint statistics with z as input - :rtype: function - """ - # invcov is only computed once - invcov = np.linalg.pinv(cov, rcond=0.001)#np.linalg.inv(cov) - - def compute(z): - return stat_func(z, cov, invcov,**kwargs) - return compute - - -def make_stat_computer_pattern(cov, stat_func): - """ - Create the function that computes the joint statistics - if NaN values are in z. It uses a covariance matrix corresponding - to the pattern of non-NaN values in z. - - :param cov: covariance matrix - :type cov: pandas.core.frame.DataFrame - :param stat_func: function that computes the joint statistics - :type stat_func: function - :return: the function used to compute joint statistics with z as input - :rtype: function - """ - if not stat_func.can_use_pattern: - raise ValueError("this computation strategy cannot be used with patterns") - # invcov_bypattern is a dictionary of invcovs where the key is the - # corresponding pattern of non-NaN values in z - invcov_bypattern = {} - - def compute(z): - z_na_bool = 1 - z.iloc[0,].isnull() - pattern_code = np.dot(z_na_bool, 10 ** np.arange((len(z_na_bool) - 1), -1, -1)) - z_na_bool = z_na_bool.astype(bool) - if pattern_code in invcov_bypattern: - invcov = invcov_bypattern[pattern_code] - else: - mini_cov = cov.loc[z_na_bool, z_na_bool] - invcov = np.linalg.pinv(mini_cov, rcond=0.001) - invcov_bypattern[pattern_code] = invcov - z = z.loc[:, z_na_bool] - - return stat_func(z, None, invcov) - - return compute - - -def make_stat_computer_nan_dumb(cov, stat_func): - # invert covariance for each line - if not (stat_func.can_use_pattern): - raise ValueError("this computation strategy cannot be used with patterns") - - def compute(z): - z_na_pattern = np.abs((np.isnan(z)).astype(int) - 1) - z_na_bool = [bool(val) for val in z_na_pattern] - mini_cov = cov.loc[z_na_bool, z_na_bool] - invcov = np.linalg.pinv(mini_cov, rcond=0.001) - z = z.dropna() - return stat_func(z, None, invcov) - - return compute - -def omnibus_stat(z, cov, invcov): - """ - joint statistics "omnibus" strategy - - note that the omnibus statistics uses invcov but not cov, - all statistics use the same signature as a simple way to - allow extensibility - - omnibus can be used in the "NaN processing" function - - :param z: z-scores for the phenotypes - :type z: pandas.core.series.Series - :param cov: covariance matrix - :type cov: DataFrame - :param invcov: inverted covariance matrix - :type invcov: numpy.ndarray - :return: the joint statistics - :rtype: float - """ - try: - p = np.linalg.matrix_rank(invcov) - #print("DF_K: {0} DF_K': {1}".format(invcov.shape[0], p)) - # stat = np.sum(np.multiply(z, z.dot(invcov))) - stat = np.einsum("ij,jk,ki->i", z, invcov, z.T) - return spst.chi2.sf(stat, df=p) - except ValueError: - print(z.head()) - print(invcov.shape) - print("Error in omnibus stat") - - -omnibus_stat.can_use_pattern = True - - -def fisher_test(z, cov, invcov): - """ - fisher combination of univariate p-values corrected with bonferoni - - This test is potentially strongly conservative if the phenotypes are strongly correlated - - :param z: z-scores for the phenotypes - :type z: pandas.core.series.Series - :param cov: covariance matrix - :type cov: DataFrame - :param invcov: inverted covariance matrix - :type invcov: numpy.ndarray - :return: the joint statistics - :rtype: float - """ - try: - p = (~z.isnull()).sum(1) - print(z) - print(cov) - print(invcov) - p_val = 2*spst.norm.sf(np.abs(z)) - stat = -2 * np.log(np.nansum(p_val, axis=1)) - return spst.chi2.sf(stat, df=p) - except ValueError: - print(z.head()) - print(invcov.shape) - print("Error in Fisher stat") - -fisher_test.can_use_pattern = False - -def meta_analysis(z, cov, invcov, **kwargs): - """ - Meta analysis using global sample size to weight z-score - - :param z: z-scores matrix for the phenotypes ((N_snp,N_phenotype) format) - :type z: pandas.core.DataFrame - :param cov: covariance matrix - :type cov: DataFrame - :param invcov: inverted covariance matrix - :type invcov: numpy.ndarray - :return: the joint statistics - :rtype: numpy.ndarray float64 - :param samp_size: Number of sample (will be used as weights in the meta analysis) - :type samp_size : pandas.Series - """ - - Effective_sample_size = kwargs.get('samp_size', None) - if Effective_sample_size is None: - raise Error('no sample size available to perform meta_analysis') - else: - loading = Effective_sample_size.loc[z.columns]**0.5 - - M_loadings = np.full(z.shape, loading**2) - M_loadings[np.isnan(z)] = 0 - - z = np.nan_to_num(z) # fill na with zero - - numi = loading.dot(z.transpose()) - deno = np.sqrt(np.sum(M_loadings, axis=1)) - # print(loading) - # fill na with 0 = don't take the missing GWAS into account in the test - stat = numi / deno - - return spst.chi2.sf(stat, df=1) - -meta_analysis.can_use_pattern = False - -def sumz_stat(z, cov, invcov, **kwargs): - """ - joint statistics "sumZ" strategy - - :param z: z-scores matrix for the phenotypes ((N_snp,N_phenotype) format) - :type z: pandas.core.DataFrame - :param cov: covariance matrix - :type cov: DataFrame - :param invcov: inverted covariance matrix - :type invcov: numpy.ndarray - :return: the joint statistics - :rtype: numpy.ndarray float64 - """ - loading = kwargs.get('loadings', None) - - if loading is None: - p = z.shape[1] - loading = np.ones(p) - else: - loading = loading.loc[z.columns] - - print(loading) - print(cov) - M_loadings = np.full(z.shape, loading) - M_loadings[np.isnan(z)] = 0 - - z = np.nan_to_num(z) - numi = np.square(loading.dot(z.transpose())) - deno = np.einsum('ij,jk,ki->i', M_loadings, cov, M_loadings.T) - - # fill na with 0 = don't take the missing GWAS into account in the test - stat = numi / deno - return spst.chi2.sf(stat, df=1) - - -sumz_stat.can_use_pattern = False +# -*- coding: utf-8 -*- +import numpy as np +import scipy.stats as spst + + +def make_stat_computer_nopattern(cov, stat_func, **kwargs): + """ + Create the function that computes the joint statistics + if no NaN value is in z. + + :param cov: covariance matrix + :type cov: pandas.core.frame.DataFrame + :param stat_func: function that computes the joint statistics + :type stat_func: function + :return: the function used to compute joint statistics with z as input + :rtype: function + """ + # invcov is only computed once + invcov = np.linalg.pinv(cov, rcond=0.001)#np.linalg.inv(cov) + + def compute(z): + return stat_func(z, cov, invcov,**kwargs) + return compute + + +def make_stat_computer_pattern(cov, stat_func): + """ + Create the function that computes the joint statistics if NaN values are in z + and if the number of selected phenotypes is less than or equal to 16. + It uses a covariance matrix corresponding + to the pattern of non-NaN values in z. + This function is implemented using the currying technique: + the first part which declares the data structure and the stat function + is called only once while the second part (compute) is called for each pattern. + :param cov: covariance matrix + :type cov: pandas.core.frame.DataFrame + :param stat_func: function that computes the joint statistics + :type stat_func: function + :return: the function used to compute joint statistics with z as input + :rtype: function + """ + if not stat_func.can_use_pattern: + raise ValueError("this computation strategy cannot be used with patterns") + + # invcov_bypattern is a dictionary of invcovs where the key is the + # corresponding pattern of non-NaN values in z + invcov_bypattern = {} + + def compute(z, pattern_code): + z_na_bool = z.iloc[0,].notnull() + if pattern_code in invcov_bypattern: + invcov = invcov_bypattern[pattern_code] + else: + mini_cov = cov.loc[z_na_bool, z_na_bool] + invcov = np.linalg.pinv(mini_cov, rcond=0.001) + invcov_bypattern[pattern_code] = invcov + z = z.loc[:, z_na_bool] + + return stat_func(z, None, invcov) + + return compute + + +def make_stat_computer_pattern_big(cov, stat_func): + """ + Create the function that computes the joint statistics if NaN values are in z + and if the number of selected phenotypes is greater than or equal to 17. + It uses a covariance matrix corresponding + to the pattern of non-NaN values in z. + This function is implemented using the currying technique: + the first part which declares the data structure and the stat function + is called only once while the second part (compute) is called for each pattern. + :param cov: covariance matrix + :type cov: numpy.ndarray + :param stat_func: function that computes the joint statistics + :type stat_func: function + :return: the function used to compute joint statistics with z as input + :rtype: function + """ + if not stat_func.can_use_pattern: + raise ValueError("this computation strategy cannot be used with patterns") + # invcov_bypattern is a dictionary of invcovs where the key is the + # corresponding pattern of non-NaN values in z + invcov_bypattern = {} + + def compute(z, pattern_code, Num): + if pattern_code in invcov_bypattern: + invcov = invcov_bypattern[pattern_code] + else: + mini_cov = (cov.take(Num,axis=1)).take(Num,axis=0) + invcov = np.linalg.pinv(mini_cov, rcond=0.001) + invcov_bypattern[pattern_code] = invcov + + return stat_func(z, None, invcov) + + return compute + + +def make_stat_computer_nan_dumb(cov, stat_func): + # invert covariance for each line + if not (stat_func.can_use_pattern): + raise ValueError("this computation strategy cannot be used with patterns") + + def compute(z): + z_na_pattern = np.abs((np.isnan(z)).astype(int) - 1) + z_na_bool = [bool(val) for val in z_na_pattern] + mini_cov = cov.loc[z_na_bool, z_na_bool] + invcov = np.linalg.pinv(mini_cov, rcond=0.001) + z = z.dropna() + return stat_func(z, None, invcov) + + return compute + +def omnibus_stat(z, cov, invcov): + """ + joint statistics "omnibus" strategy + + note that the omnibus statistics uses invcov but not cov, + all statistics use the same signature as a simple way to + allow extensibility + + omnibus can be used in the "NaN processing" function + + :param z: z-scores for the phenotypes + :type z: pandas.dataframe or numpy.ndarray + :param cov: covariance matrix + :type cov: pandas.dataframe or numpy.ndarray + :param invcov: inverted covariance matrix + :type invcov: numpy.ndarray + :return: the joint statistics + :rtype: float + """ + try: + p = np.linalg.matrix_rank(invcov) + stat = np.einsum("ij,jk,ki->i", z, invcov, z.T) + return spst.chi2.sf(stat, df=p) + except ValueError: + print("{}".format(z[:5])) + print(invcov.shape) + print("Error in omnibus stat") + + +omnibus_stat.can_use_pattern = True + + +def fisher_test(z, cov, invcov): + """ + fisher combination of univariate p-values corrected with bonferoni + + This test is potentially strongly conservative if the phenotypes are strongly correlated + + :param z: z-scores for the phenotypes + :type z: pandas.core.series.Series + :param cov: covariance matrix + :type cov: DataFrame + :param invcov: inverted covariance matrix + :type invcov: numpy.ndarray + :return: the joint statistics + :rtype: float + """ + try: + p = (~z.isnull()).sum(1) + print(z) + print(cov) + print(invcov) + p_val = 2*spst.norm.sf(np.abs(z)) + stat = -2 * np.log(np.nansum(p_val, axis=1)) + return spst.chi2.sf(stat, df=p) + except ValueError: + print(z.head()) + print(invcov.shape) + print("Error in Fisher stat") + +fisher_test.can_use_pattern = False + +def meta_analysis(z, cov, invcov, **kwargs): + """ + Meta analysis using global sample size to weight z-score + + :param z: z-scores matrix for the phenotypes ((N_snp,N_phenotype) format) + :type z: pandas.core.DataFrame + :param cov: covariance matrix + :type cov: DataFrame + :param invcov: inverted covariance matrix + :type invcov: numpy.ndarray + :return: the joint statistics + :rtype: numpy.ndarray float64 + :param samp_size: Number of sample (will be used as weights in the meta analysis) + :type samp_size : pandas.Series + """ + + Effective_sample_size = kwargs.get('samp_size', None) + if Effective_sample_size is None: + raise Error('no sample size available to perform meta_analysis') + else: + loading = Effective_sample_size.loc[z.columns]**0.5 + + M_loadings = np.full(z.shape, loading**2) + M_loadings[np.isnan(z)] = 0 + + z = np.nan_to_num(z) # fill na with zero + + numi = loading.dot(z.transpose()) + deno = np.sqrt(np.sum(M_loadings, axis=1)) + # print(loading) + # fill na with 0 = don't take the missing GWAS into account in the test + stat = numi / deno + + return spst.chi2.sf(stat, df=1) + +meta_analysis.can_use_pattern = False + +def sumz_stat(z, cov, invcov, **kwargs): + """ + joint statistics "sumZ" strategy + + :param z: z-scores matrix for the phenotypes ((N_snp,N_phenotype) format) + :type z: pandas.core.DataFrame + :param cov: covariance matrix + :type cov: DataFrame + :param invcov: inverted covariance matrix + :type invcov: numpy.ndarray + :return: the joint statistics + :rtype: numpy.ndarray float64 + """ + loading = kwargs.get('loadings', None) + + if loading is None: + p = z.shape[1] + loading = np.ones(p) + else: + loading = loading.loc[z.columns] + + print(loading) + print(cov) + M_loadings = np.full(z.shape, loading) + M_loadings[np.isnan(z)] = 0 + + z = np.nan_to_num(z) + numi = np.square(loading.dot(z.transpose())) + deno = np.einsum('ij,jk,ki->i', M_loadings, cov, M_loadings.T) + + # fill na with 0 = don't take the missing GWAS into account in the test + stat = numi / deno + return spst.chi2.sf(stat, df=1) + + +sumz_stat.can_use_pattern = False \ No newline at end of file diff --git a/jass/models/worktable.py b/jass/models/worktable.py old mode 100755 new mode 100644 index a249eb9bc8cd3f765d440d7bdda7587fbcb18aa4..ebda182f5d4ec489ad291347f0ec2ba5ea13210a --- a/jass/models/worktable.py +++ b/jass/models/worktable.py @@ -1,450 +1,841 @@ -# -*- coding: utf-8 -*- -""" -This contains all functions for accessing the "worktable" hdf5 file. -All functions either create or read a worktable at a specific path location. -@author: vguillem, hmenager, hjulienne -""" -import logging -import os -import importlib - -from typing import List - -# from dask.dataframe import read_hdf as dask_read_hdf -from pandas import HDFStore, DataFrame, concat, read_hdf, read_csv, Series, Index - -# create (or open) an hdf5 file and opens in append mode -import numpy as np -import scipy.stats as spst -import tables -import warnings - -warnings.filterwarnings("ignore", category=tables.NaturalNameWarning) - -from ..config import config -from .stats import ( - make_stat_computer_nopattern, - make_stat_computer_pattern, - make_stat_computer_nan_dumb, -) - -def choose_stat_function(smart_na_computation, optim_na, function_name, stat_function, sub_cov, **kwargs): - if smart_na_computation: - #If stat is sumz use normal computer even with na - if function_name == "omnibus_stat": - if optim_na: - stat_compute = make_stat_computer_pattern(sub_cov, stat_function) - else: - stat_compute = make_stat_computer_nan_dumb(sub_cov, stat_function) - else: - if function_name == "meta_analysis": - stat_compute = make_stat_computer_nopattern(sub_cov, stat_function, **kwargs) - elif function_name=="sumz_stat": - loading_file = kwargs.get('loadings', None) - if loading_file is None: - #Default loadings would be one for every phenotypes - stat_compute = make_stat_computer_nopattern(sub_cov, stat_function) - else: - loadings = read_csv(loading_file, index_col=0) - loadings = loadings.iloc[:,0] - stat_compute = make_stat_computer_nopattern(sub_cov, stat_function,loadings=loadings ) - else: - stat_compute = make_stat_computer_nopattern(sub_cov, stat_function) - else: - stat_compute = make_stat_computer_nopattern(sub_cov, stat_function) - - - return stat_compute - -def add_signif_status_column(region_sub_tab, significance_treshold=5*10**-8): - - - region_sub_tab["signif_status"] = "" - # blue: significant pvalues for omnibus and univariate tests - cond = np.where((region_sub_tab.JASS_PVAL < significance_treshold) & (region_sub_tab.UNIVARIATE_MIN_PVAL < significance_treshold))[0] - region_sub_tab.loc[region_sub_tab.index[cond], "signif_status"] = "Both" - - # red: significant pvalues for omnibus test only - cond = np.where((region_sub_tab.JASS_PVAL < significance_treshold ) & (region_sub_tab.UNIVARIATE_MIN_PVAL > significance_treshold))[0] - region_sub_tab.loc[region_sub_tab.index[cond], "signif_status"] = "Joint" - - # green: significant pvalues for univariate test only - cond = np.where((region_sub_tab.JASS_PVAL > significance_treshold) & (region_sub_tab.UNIVARIATE_MIN_PVAL < significance_treshold))[0] - region_sub_tab.loc[region_sub_tab.index[cond], "signif_status"] = "Univariate" - - # grey: non significant pvalues - cond = np.where((region_sub_tab.JASS_PVAL > significance_treshold) & (region_sub_tab.UNIVARIATE_MIN_PVAL > significance_treshold))[0] - region_sub_tab.loc[region_sub_tab.index[cond], "signif_status"] = "None" - - return region_sub_tab - -def get_region_summary(sum_stat_tab, phenotype_ids, significance_treshold=5*10**-8): - - # Select the most significant SNP for the joint test for each region - region_sub_tab = sum_stat_tab.sort_values("JASS_PVAL").groupby("Region").first()#.reset_index() - - # add minimum univariate p-value - univar = sum_stat_tab.groupby("Region").min().UNIVARIATE_MIN_PVAL - region_sub_tab.loc[univar.index, "UNIVARIATE_MIN_PVAL"] = univar.values - # Tag SNPs depending on which test is significant - region_sub_tab.reset_index(inplace=True) - region_sub_tab = add_signif_status_column(region_sub_tab, significance_treshold) - - # reorder columns - region_sub_tab = region_sub_tab[['Region', "MiddlePosition", "snp_ids","CHR", "position", "Ref_allele", "Alt_allele", "JASS_PVAL", "UNIVARIATE_MIN_PVAL", "signif_status"] + phenotype_ids] - - return region_sub_tab - -def post_computation_filtering(worktable_chunk, significant_treshold = 5*10**-8): - """ - Remove SNPs that seems aberrant: SNPs with a very low p-value that are isolated - in their region - - :param worktable_chunk: pandas DataFrame worktable part - :type worktable_chunk: pandas DataFrame - :param significant_treshold: threshold at which a p-value is considered significant - :type significant_treshold: float - """ - - def count_nearly_significant(rsnp): - return((rsnp.JASS_PVAL < (significant_treshold*20)).sum()) - - res = worktable_chunk.groupby("Region").apply(count_nearly_significant) - - # select region with only one SNP that is significant which is - # suspect - reg = res.loc[res==1].index - - for reg_aberant in reg: - aberant_SNP = worktable_chunk.loc[worktable_chunk.Region==reg_aberant].sort_values("JASS_PVAL").index[0] - worktable_chunk.drop(aberant_SNP, inplace=True) - - return worktable_chunk - -def compute_pleiotropy_index(W,significance_treshold): - - N_significatif = (2.0 * spst.norm.sf(W.fillna(0, inplace=False).abs())<significance_treshold).sum(1) - N_pheno = (~W.isnull()).sum(1) - #pleiotropy index is not meaningful for too few phenotype - S = N_significatif/N_pheno - S.loc[N_pheno < 4] = np.nan - return S - -def create_worktable_file( - phenotype_ids: List[str], - init_file_path: str, - project_hdf_path: str, - remove_nan: bool, - stat: str = "jass.models.stats:omnibus_stat", - optim_na: bool = True, - csv_file: str = None, - chunk_size: int = 50, - significance_treshold = 5*10**-8, - post_filtering=True, - **kwargs -): - """ - Create a worktable file from an initial data table by specifying the - selected phenotypes and the computation strategy - - :param phenotype_ids: the list of IDs for the phenotypes to select - :type phenotype_ids: list - :param init_file_path: path to the initial data table - :type init_file_path: str - :param project_hdf_path: path to the worktable file that will be produced - :type project_hdf_path: str - :param remove_nan: boolean to control the JOST computation strategy: - - if True any SNP which has a NaN value for one of its \ - phenotypes will be removed. - - if False SNPs will be removed only if all phenotype \ - values are NaN and JOST will be performed the "smart" \ - way to compensate these missing values. - :type remove_nan: bool - :param optim_na: boolean to control if we use a smart gestion of z NA \ - values. Should always be set to True except for performance tests. - :type optim_na: bool - :param chunk_size: the size of the chunks of the initial data - to process together. default is 50 and should probably not - be touched for anything else than experimentation - :type chunk_size: int - :param significant_treshold: threshold at which a p-value is considered significant - :type significant_treshold: float - : - """ - # read data by chunks to optimize memory usage - # select only rows (SNPs) where there are no missing data - how_dropna = "any" if remove_nan else "all" - if os.path.exists(project_hdf_path): - os.remove(project_hdf_path) - hdf_work = HDFStore(project_hdf_path) - - # subset of phenotypes that have been selected - phenolist = read_hdf(init_file_path, "PhenoList") - phenolist = phenolist.loc[phenotype_ids] - hdf_work.put( - "PhenoList", phenolist - ) - - # subset of covariance matrix for the selected phenotypes - cov = read_hdf(init_file_path, "COV") - sub_cov = cov.loc[phenotype_ids, phenotype_ids] - hdf_work.put( - "COV", sub_cov, format="table", data_columns=True - ) # Covariance matrix - - regions = read_hdf(init_file_path, "Regions").index.tolist() - sum_stat_tab_min_itemsizes = {"snp_ids": 80, "Region": 10, "CHR": 5,"Ref_allele" : 70, "Alt_allele":70} - #['Region', "MiddlePosition", "snp_ids","CHR", "position", "Ref_allele", "Alt_allele", "JASS_PVAL", "UNIVARIATE_MIN_PVAL", "signif_status"] - region_sub_table_min_itemsizes = {"Region": 10, "index": 10, "CHR": 5, "snp_ids": 80, "signif_status":20,"Ref_allele" : 70, "Alt_allele":70} - - smart_na_computation = not (remove_nan) - module_name, function_name = stat.split(":") - stat_module = importlib.import_module(module_name) - stat_fn = getattr(stat_module, function_name) - - stat_compute = choose_stat_function(smart_na_computation, optim_na, function_name, stat_fn, sub_cov, samp_size=phenolist['Effective_sample_size'], **kwargs) - - Nchunk = len(regions) // chunk_size + 1 - - Nsnp_total = 0 - Nsnp_jassed = 0 - - for chunk in range(Nchunk): - binf = chunk * chunk_size - bsup = (chunk+1) * chunk_size - sum_stat_tab = read_hdf(init_file_path, 'SumStatTab', columns=[ - 'Region', 'CHR', 'position', 'snp_ids', 'Ref_allele', 'Alt_allele', 'MiddlePosition'] + phenotype_ids, - where='Region >= {0} and Region < {1}'.format(binf, bsup)) - print("Regions {0} to {1}\r".format(binf, bsup)) - - # Remake row index unique: IMPORTANT for assignation with .loc at line 98 - sum_stat_tab.dropna( - axis=0, subset=phenotype_ids, how=how_dropna, inplace=True - ) - sum_stat_tab.reset_index(drop=True, inplace=True) - - if sum_stat_tab.shape[0]==0: - print("No data available for region {0} to region {1}".format(binf, bsup)) - continue # skip region if no data are available - - N_pheno = len(phenotype_ids) - Nsnp_total = Nsnp_total + sum_stat_tab.shape[0] - - if remove_nan or stat.split(":")[-1] != "omnibus_stat": - sum_stat_tab['JASS_PVAL'] = stat_compute(sum_stat_tab[phenotype_ids]) - else: - # Sort SumStatTab by missing patterns - patterns_missing, frequent_pattern = compute_frequent_missing_pattern(sum_stat_tab[phenotype_ids]) - - # Apply the statistic computation by missing patterns - for pattern in frequent_pattern: - bool_serie = (patterns_missing == pattern) - try: - sum_stat_tab.loc[bool_serie, "JASS_PVAL"] = stat_compute( - sum_stat_tab.loc[bool_serie, phenotype_ids] - ) - except ValueError: - print("worktable") - - sum_stat_tab.index = Index(patterns_missing) - # Keep_only frequent_pattern - sum_stat_tab = sum_stat_tab.loc[frequent_pattern] - # drop pattern index : - sum_stat_tab.reset_index(drop=True, inplace=True) - - Nsnp_jassed = Nsnp_jassed + sum_stat_tab.shape[0] - sum_stat_tab.sort_values(by=["Region", "CHR"], inplace=True) - - sum_stat_tab["UNIVARIATE_MIN_PVAL"] = DataFrame( - 2.0 * spst.norm.sf(sum_stat_tab[phenotype_ids].fillna(0, inplace=False).abs()), - index=sum_stat_tab.index, - ).min(axis=1) - - sum_stat_tab["UNIVARIATE_MIN_QVAL"] = sum_stat_tab["UNIVARIATE_MIN_PVAL"]*(1-np.isnan(sum_stat_tab[phenotype_ids]).astype(int)).sum(1) - sum_stat_tab.loc[sum_stat_tab.UNIVARIATE_MIN_QVAL>1 , "UNIVARIATE_MIN_QVAL"] = 1 - - #Computing pleiotropy - sum_stat_tab["PLEIOTROPY_INDEX"] = compute_pleiotropy_index(sum_stat_tab[phenotype_ids], significance_treshold) - - sum_stat_tab = sum_stat_tab[ - ["Region", "CHR", "snp_ids", "position", 'Ref_allele', 'Alt_allele', "MiddlePosition", "JASS_PVAL", "UNIVARIATE_MIN_PVAL", "UNIVARIATE_MIN_QVAL","PLEIOTROPY_INDEX" ] - + phenotype_ids] - - if post_filtering: - sum_stat_tab = post_computation_filtering(sum_stat_tab) - - hdf_work.append( - "SumStatTab", sum_stat_tab, min_itemsize=sum_stat_tab_min_itemsizes - ) - - if csv_file is not None: - with open(csv_file, 'a') as f: - sum_stat_tab.to_csv(f, header=f.tell()==0) - - region_sub_table = get_region_summary(sum_stat_tab, phenotype_ids, significance_treshold=significance_treshold) - - hdf_work.append( - "Regions", - region_sub_table - ,min_itemsize=region_sub_table_min_itemsizes - ) - hdf_work.close() - - print("{1} SNPs treated on {0} SNPs".format(Nsnp_jassed, Nsnp_total)) - - RegionSubTable = read_hdf(project_hdf_path, "Regions") - - pval_min = RegionSubTable["UNIVARIATE_MIN_PVAL"] - jost_min = RegionSubTable["JASS_PVAL"] - summaryTable = DataFrame( - np.array( - [ - [ - sum((jost_min < significance_treshold) & (pval_min < significance_treshold)), - sum((jost_min < significance_treshold) & (pval_min > significance_treshold)), - ], - [ - sum((jost_min > significance_treshold) & (pval_min < significance_treshold)), - sum((jost_min > significance_treshold) & (pval_min > significance_treshold)), - ], - ] - ) - ) - summaryTable.columns = ["PhenoSignif", "NoPhenoSignif"] - summaryTable.index = ["JOSTSignif", "NoJOSTSignif"] - - hdf_work = HDFStore(project_hdf_path) - hdf_work.put( - "summaryTable", summaryTable, format="table", data_columns=True - ) # Summary Table (contigency table) - hdf_work.close() - - -def compute_frequent_missing_pattern(sum_stat_tab): - """ - Compute the frequency of missing pattern in the dataset - determine the number of pattern that should be taken into account to - cover 99 percent of the snps - """ - N_pheno = sum_stat_tab.shape[1] - patterns_missing = Series(np.dot((1- sum_stat_tab.isnull()), 10**np.arange((N_pheno-1), -1, -1))) - pattern_frequency = patterns_missing.value_counts() / len(patterns_missing) - n_pattern = pattern_frequency.shape[0]#(pattern_frequency.cumsum() < 0.99).sum() + 1 - #n_pattern = min(5000, n_pattern) - print("Number of pattern {}".format(n_pattern)) - #print("Covering {}/1 of snps ".format(pattern_frequency.iloc[:n_pattern].sum())) - frequent_pattern = pattern_frequency.index.tolist() #[:n_pattern] - return patterns_missing, frequent_pattern - - -def stringize_dataframe_region_chr(dataframe: DataFrame): - """ - Reformat Region and Chromosome numbers as strings in a DataFrame. - This will eventually be deprecated once the js code accepts integers instead - of strings - - :param project_hdf_path: path to the worktable file - :type project_hdf_path: str - :param frame: name of the frame to be read - :type frame: str - :return: The dataframe with converted Region and CHR columns - :rtype: pandas.DataFrame - """ - dataframe["Region"] = dataframe["Region"].apply(lambda x: "Region" + str(x)) - dataframe["CHR"] = dataframe["CHR"].apply(lambda x: "chr" + str(x)) - return dataframe - - -def get_worktable_summary(project_hdf_path: str): - """ - Read and return the summaryTable dataframe from a worktable file - - :param project_hdf_path: path to the worktable file - :type project_hdf_path: str - :return: The dataframe as a dictionary with one entry per line number - :rtype: dict - """ - summary_dataframe = read_hdf(project_hdf_path, "summaryTable") - lines = {} - for index, row in summary_dataframe.iterrows(): - lines[index] = { - "PhenoSignif": int(row["PhenoSignif"]), - "NoPhenoSignif": int(row["NoPhenoSignif"]), - } - return lines - - -def get_worktable_genomedata(project_hdf_path: str): - """ - Read and return the Regions dataframe from a worktable file - - :param project_hdf_path: path to the worktable file - :type project_hdf_path: str - :return: The dataframe as a CSV formatted text - :rtype: str - """ - region_subtable = stringize_dataframe_region_chr(read_hdf(project_hdf_path, "Regions")) - region_subtable.rename(index=str, columns={'JASS_PVAL':'JOSTmin'}, inplace=True) - return region_subtable.to_csv(index=False) - - -def get_worktable_local_manhattan_data( - project_hdf_path: str, chromosome: str, region: str -): - """ - Read and return the SumStatTab dataframe from a worktable file - for a given chromosome and region for the Manhattan plot - - :param project_hdf_path: path to the worktable file - :type project_hdf_path: str - :param chromosome: chromosome number in "string form", e.g. chr12 - :type chromosome: str - :param region: region number in "string form", e.g. Region213 - :type region: str - :return: The dataframe subset corresponding to the chromosome and region, as a CSV formatted text - :rtype: str - """ - region_int = region[6:] - chromosome_int = chromosome[3:] - dataframe = read_hdf(project_hdf_path, "SumStatTab", - columns=["Region", "CHR", "position", "snp_ids", "JASS_PVAL"], - where=['Region='+str(region_int), 'CHR='+str(chromosome_int)]) - dataframe = stringize_dataframe_region_chr(dataframe) - dataframe = dataframe.sort_values("position") - return dataframe.to_csv(index=False) - - -def get_worktable_local_heatmap_data( - project_hdf_path: str, chromosome: str, region: str -): - """ - Read and return the SumStatTab dataframe from a worktable file - for a given chromosome and region for the Heatmap plot - - :param project_hdf_path: path to the worktable file - :type project_hdf_path: str - :param chromosome: chromosome number in "string form", e.g. chr12 - :type chromosome: str - :param region: region number in "string form", e.g. Region213 - :type region: str - :return: The dataframe subset corresponding to the chromosome and region, \ - pivoted and as a CSV formatted text - :rtype: str - """ - region_int = region[6:] - chromosome_int = chromosome[3:] - dataframe = read_hdf(project_hdf_path, "SumStatTab", - where=['Region='+str(region_int), 'CHR='+str(chromosome_int)]) - dataframe = stringize_dataframe_region_chr(dataframe) - dataframe = dataframe.sort_values("position") - dataframe.drop( ["Region", "CHR", "position", "JASS_PVAL", "MiddlePosition", "UNIVARIATE_MIN_PVAL", "UNIVARIATE_MIN_QVAL", "PLEIOTROPY_INDEX"], - axis=1, - inplace=True, - ) - dataframe.rename(columns={"snp_ids": "ID"}, inplace=True) - column_order = list(dataframe.ID) - pivoted_dataframe = dataframe.pivot_table(columns="ID") - pivoted_dataframe = pivoted_dataframe.reindex_axis(column_order, axis=1) - # TODO rework the selection to return 5000 snps in total, around the - # region ;) - return pivoted_dataframe.to_csv(index_label="ID") +# -*- coding: utf-8 -*- +""" +This contains all functions for accessing the "worktable" hdf5 file. +All functions either create or read a worktable at a specific path location. + +@author: vguillem, hmenager, hjulienne +""" +import math + +from jass.models.stats import ( + make_stat_computer_nopattern, + make_stat_computer_pattern, + make_stat_computer_pattern_big, + make_stat_computer_nan_dumb, +) + +from jass.config import config +import logging +import os +import importlib + +from typing import List + +from pandas import HDFStore, DataFrame, concat, read_hdf, read_csv, Series, Index + +import numpy as np +import scipy.stats as spst +import tables +import warnings + +warnings.filterwarnings("ignore", category=tables.NaturalNameWarning) + + +def signif(x, digit): + """ + signif + Round a number x to represent it with <digit> digits + + :param x: the number to round + :type x: float + + :param digit: the number of digits + :type digit: int + + :return: the rounded number + :rtype: float + + example: + >>> signif(1.2345678, 1) + 1.0 + + >>> signif(1.2345678, 3) + 1.23 + + >>> signif(1.2345678, 5) + 1.2346 + """ + if x == 0: + return 0 + + return round(x, digit - int(math.floor(math.log10(abs(x)))) - 1) + + +def choose_stat_function(smart_na_computation, optim_na, big, function_name, stat_function, sub_cov, **kwargs): + if smart_na_computation: + # If stat is sumz use normal computer even with na + if function_name == "omnibus_stat": + if optim_na: + if big: + stat_compute = make_stat_computer_pattern_big(sub_cov, stat_function) + else: + stat_compute = make_stat_computer_pattern(sub_cov, stat_function) + else: + stat_compute = make_stat_computer_nan_dumb(sub_cov, stat_function) + else: + if function_name == "meta_analysis": + stat_compute = make_stat_computer_nopattern(sub_cov, stat_function, **kwargs) + elif function_name == "sumz_stat": + loading_file = kwargs.get('loadings', None) + if loading_file is None: + # Default loadings would be one for every phenotypes + stat_compute = make_stat_computer_nopattern(sub_cov, stat_function) + else: + loadings = read_csv(loading_file, index_col=0) + loadings = loadings.iloc[:, 0] + stat_compute = make_stat_computer_nopattern(sub_cov, stat_function, loadings=loadings) + else: + stat_compute = make_stat_computer_nopattern(sub_cov, stat_function) + else: + stat_compute = make_stat_computer_nopattern(sub_cov, stat_function) + + return stat_compute + + +def add_signif_status_column(region_sub_tab, significance_treshold=5*10**-8): + + region_sub_tab["signif_status"] = "" + + # blue: significant pvalues for omnibus and univariate tests + cond = np.where((region_sub_tab.JASS_PVAL < significance_treshold) & ( + region_sub_tab.UNIVARIATE_MIN_PVAL < significance_treshold))[0] + region_sub_tab.loc[region_sub_tab.index[cond], "signif_status"] = "Both" + + # red: significant pvalues for omnibus test only + cond = np.where((region_sub_tab.JASS_PVAL < significance_treshold) & ( + region_sub_tab.UNIVARIATE_MIN_PVAL > significance_treshold))[0] + region_sub_tab.loc[region_sub_tab.index[cond], "signif_status"] = "Joint" + + # green: significant pvalues for univariate test only + cond = np.where((region_sub_tab.JASS_PVAL > significance_treshold) & ( + region_sub_tab.UNIVARIATE_MIN_PVAL < significance_treshold))[0] + region_sub_tab.loc[region_sub_tab.index[cond], + "signif_status"] = "Univariate" + + # grey: non significant pvalues + cond = np.where((region_sub_tab.JASS_PVAL > significance_treshold) & ( + region_sub_tab.UNIVARIATE_MIN_PVAL > significance_treshold))[0] + region_sub_tab.loc[region_sub_tab.index[cond], "signif_status"] = "None" + + return region_sub_tab + + +def get_region_summary(sum_stat_tab, phenotype_ids, significance_treshold=5*10**-8): + + # Select the most significant SNP for the joint test for each region + region_sub_tab = sum_stat_tab.sort_values( + "JASS_PVAL").groupby("Region").first() # .reset_index() + + # add minimum univariate p-value + univar = sum_stat_tab.groupby("Region").min().UNIVARIATE_MIN_PVAL + region_sub_tab.loc[univar.index, "UNIVARIATE_MIN_PVAL"] = univar.values + + # Tag SNPs depending on which test is significant + region_sub_tab.reset_index(inplace=True) + region_sub_tab = add_signif_status_column( + region_sub_tab, significance_treshold) + + # reorder columns + region_sub_tab = region_sub_tab[['Region', "MiddlePosition", "snp_ids", "CHR", "position", + "Ref_allele", "Alt_allele", "JASS_PVAL", "UNIVARIATE_MIN_PVAL", + "signif_status"] + phenotype_ids] + + return region_sub_tab + + +def post_computation_filtering(worktable_chunk, significant_treshold=5*10**-8): + """ + Remove SNPs that seems aberrant: SNPs with a very low p-value that are isolated + in their region + + :param worktable_chunk: pandas DataFrame worktable part + :type worktable_chunk: pandas DataFrame + :param significant_treshold: threshold at which a p-value is considered significant + :type significant_treshold: float + """ + + def count_nearly_significant(rsnp): + return((rsnp.JASS_PVAL < (significant_treshold*20)).sum()) + + res = worktable_chunk.groupby("Region").apply(count_nearly_significant) + + # select region with only one SNP that is significant which is + # suspect + reg = res.loc[res == 1].index + + for reg_aberant in reg: + aberant_SNP = worktable_chunk.loc[worktable_chunk.Region == reg_aberant].sort_values( + "JASS_PVAL").index[0] + worktable_chunk.drop(aberant_SNP, inplace=True) + + return worktable_chunk + + +def compute_pleiotropy_index(W, significance_treshold): + + N_significatif = (2.0 * spst.norm.sf(W.fillna(0, + inplace=False).abs()) < significance_treshold).sum(1) + N_pheno = (~W.isnull()).sum(1) + # pleiotropy index is not meaningful for too few phenotype + S = N_significatif/N_pheno + S.loc[N_pheno < 4] = np.nan + return S + + +def create_worktable_file( + phenotype_ids: List[str], + init_file_path: str, + project_hdf_path: str, + remove_nan: bool, + stat: str = "jass.models.stats:omnibus_stat", + optim_na: bool = True, + csv_file: str = None, + chunk_size: int = 50, + significance_treshold=5*10**-8, + post_filtering=True, + delayed_gen_csv_file=False, + chromosome: str = None, + pos_Start: str = None, + pos_End: str = None, + **kwargs + ): + + """ + Create a worktable file from an initial data table by specifying the + selected phenotypes and the computation strategy + + :param phenotype_ids: the list of IDs for the phenotypes to select + :type phenotype_ids: list + :param init_file_path: path to the initial data table + :type init_file_path: str + :param project_hdf_path: path to the worktable file that will be produced + :type project_hdf_path: str + :param remove_nan: boolean to control the JOST computation strategy: + - if True any SNP which has a NaN value for one of its \ + phenotypes will be removed. + - if False SNPs will be removed only if all phenotype \ + values are NaN and JOST will be performed the "smart" \ + way to compensate these missing values. + :type remove_nan: bool + :param optim_na: boolean to control if we use a smart gestion of z NA \ + values. Should always be set to True except for performance tests. + :type optim_na: bool + :param chunk_size: the size of the chunks of the initial data + to process together. default is 50 and should probably not + be touched for anything else than experimentation + :type chunk_size: int + :param significant_treshold: threshold at which a p-value is considered significant + :type significant_treshold: float + :param post_filtering: Remove SNPs that seems aberrant + :type post_filtering: bool + :param delayed_gen_csv_file: generate the csv_file asynchronously (used in the web interface) + :type delayed_gen_csv_file: bool + :param chromosome: Chromosome number selected for local analysis + :type chromosome: str + :param pos_Start: start of the position of the studied region (base point) for local analysis + :type pos_Start: str + :param pos_End: end of the position of the studied region (base point) for local analysis + :type pos_End: str + + :return: Number of chunks used to write workTable file. \ + This information is useful for reading the file + :rtype: int + """ + # number of phenotypes beyond which we change the algorithm + K_NB_PHENOTYPES_BIG = 18 + + # Upper bound of the chromosome length (bp) + K_POS_MAX = 250000000 + + # Minimum and maximum limit of regions for each chromosome (multiples of 50) + Min_pos_chr = [ 0, 100, 250, 400, 500, 600, 700, 800, 900, 1000, 1050, + 1150, 1250, 1300, 1350, 1400, 1450, 1500, 1550, 1600, 1650, 1650] + Max_pos_chr = [150, 300, 400, 550, 650, 750, 850, 950, 1050, 1100, 1200, + 1300, 1350, 1400, 1450, 1500, 1550, 1600, 1650, 1700, 1700, 1750] + + N_pheno = len(phenotype_ids) + + # Controls the number of phenotypes + if (N_pheno > 64): + print("ERROR: {} phenotypes are selected. \nThe current version of JASS cannot analyze more than 64 phenotypes" \ + .format(N_pheno)) + raise ValueError("Maximum number of phenotypes exceeded") + elif (N_pheno >= 20): + print("WARNING: {} phenotypes are selected. The computation will be very long!".format(N_pheno)) + + if (chromosome is None): + local_analysis = False + print("============== Whole genome analysis ===============") + else: + local_analysis = True + print("============== Local analysis ===============") + if not(chromosome.isdigit()): + print("ERROR: when performing a local analysis, the chromosome number (between 1 and 22) is mandatory") + raise ValueError("create_worktable_file: the required argument chromosome is not a number") + else: + num_Chr = int(chromosome) + + if ((pos_Start is None) and (pos_End is None)): + chromosome_full = True + print("------ Chromosome : {} ------".format(num_Chr)) + else: + chromosome_full = False + if ((pos_Start is None) or (not pos_Start.isdigit())): + pos_Start = 0 + if ((pos_End is None) or (not pos_End.isdigit())): + pos_End = K_POS_MAX + print("------ Chromosome : {} ({} - {}) ------".format(num_Chr, pos_Start, pos_End)) + + print("Phenotypes = {}".format(phenotype_ids)) + + # Initialization of Jass_progress + progress_path = os.path.join(os.path.dirname( + project_hdf_path), "JASS_progress.txt") + JASS_progress = 0 + file_progress = open(progress_path, "w") + file_progress.write(str(JASS_progress)) + file_progress.close() + + # select only rows (SNPs) where there are no missing data + how_dropna = "any" if remove_nan else "all" + if os.path.exists(project_hdf_path): + os.remove(project_hdf_path) + hdf_work = HDFStore(project_hdf_path) + + if csv_file is not None: + if os.path.exists(csv_file): + os.remove(csv_file) + + if delayed_gen_csv_file: + # setting a lock to generate the csv_file asynchronously + the_lock_path = os.path.join(os.path.dirname(project_hdf_path), "the_lock.txt") + the_lock = "The lock is set on : workTable.csv is not yet available" + file_lock = open(the_lock_path, "w") + file_lock.write(the_lock) + file_lock.close() + + # subset of phenotypes that have been selected + phenolist = read_hdf(init_file_path, "PhenoList") + phenolist = phenolist.loc[phenotype_ids] + hdf_work.put( + "PhenoList", phenolist + ) + + # subset of covariance matrix for the selected phenotypes + cov = read_hdf(init_file_path, "COV") + sub_cov = cov.loc[phenotype_ids, phenotype_ids] + hdf_work.put( + "COV", sub_cov, format="table", data_columns=True + ) # Covariance matrix + + regions = read_hdf(init_file_path, "Regions").index.tolist() + sum_stat_tab_min_itemsizes = {"snp_ids": 50, "Region": 10, "CHR": 5} + region_sub_table_min_itemsizes = { + "Region": 10, "index": 10, "CHR": 5, "snp_ids": 50, "signif_status": 20} + + smart_na_computation = not (remove_nan) + module_name, function_name = stat.split(":") + stat_module = importlib.import_module(module_name) + stat_fn = getattr(stat_module, function_name) + + if (N_pheno < K_NB_PHENOTYPES_BIG): + big = False + sub_cov_matrix = sub_cov + else: + big = True + sub_cov_matrix = sub_cov.to_numpy() + + stat_compute = choose_stat_function(smart_na_computation, + optim_na, + big, + function_name, + stat_fn, + sub_cov_matrix, + samp_size=phenolist['Effective_sample_size'], + **kwargs) + + # read data by chunks to optimize memory usage + if (not local_analysis): + Nchunk = len(regions) // chunk_size + 1 + start_value = 0 + else: + chunk_size = 50 + Nchunk = Max_pos_chr[num_Chr - 1] // chunk_size + start_value = Min_pos_chr[num_Chr - 1] // chunk_size + + # selection criterion in the case of a partial analysis by chromosome and position + if (chromosome_full): + Local_criteria = "(CHR == {})".format(chromosome) + else: + Local_criteria = "(CHR == {}) and (position >= {}) and (position <= {})"\ + .format(chromosome, pos_Start, pos_End) + + Nsnp_total = 0 + Nsnp_jassed = 0 + + for chunk in range(start_value, Nchunk): + + # the chunk index starts at zero and we take into account the 2 plot stages + JASS_progress = round((chunk + 1) * 100 / (Nchunk + 2)) + + binf = chunk * chunk_size + bsup = (chunk+1) * chunk_size + + sum_stat_tab = read_hdf(init_file_path, 'SumStatTab', columns=[ + 'Region', 'CHR', 'position', 'snp_ids', 'Ref_allele', 'Alt_allele', 'MiddlePosition'] + phenotype_ids, + where='Region >= {0} and Region < {1}'.format(binf, bsup)) + + print("Regions {0} to {1}\r".format(binf, bsup)) + + if(local_analysis): + # Data extraction in the case of a partial analysis + sum_stat_tab = sum_stat_tab.query(Local_criteria) + + # Remake row index unique: IMPORTANT for assignation with .loc + sum_stat_tab.dropna( + axis=0, subset=phenotype_ids, how=how_dropna, inplace=True + ) + sum_stat_tab.reset_index(drop=True, inplace=True) + + if sum_stat_tab.shape[0] == 0: + print( + "No data available for region {0} to region {1}".format(binf, bsup)) + continue # skip region if no data are available + + Nsnp_total = Nsnp_total + sum_stat_tab.shape[0] + + if remove_nan or stat.split(":")[-1] != "omnibus_stat": + sum_stat_tab['JASS_PVAL'] = stat_compute( + sum_stat_tab[phenotype_ids]) + else: + if not big: + # Algorithm optimized for a small number of phenotypes + + # Sort SumStatTab by missing patterns + patterns_missing, frequent_pattern = compute_frequent_missing_pattern( + sum_stat_tab[phenotype_ids]) + + sum_stat_tab["patterns_missing"] = patterns_missing + z1 = sum_stat_tab[phenotype_ids] + + # Apply the statistic computation by missing patterns + for pattern in frequent_pattern: + bool_serie = (patterns_missing == pattern) + Selection_criteria = sum_stat_tab["patterns_missing"] == pattern + + try: + sum_stat_tab.loc[bool_serie, "JASS_PVAL"] = stat_compute(z1[Selection_criteria], pattern) + except ValueError: + print("worktable") + + else: + # Algorithm optimized for a high number of phenotypes + + # Sort SumStatTab by missing patterns + patterns_missing, frequent_pattern, dico_index_y = \ + compute_frequent_missing_pattern_Big(sum_stat_tab[phenotype_ids]) + + sum_stat_tab["index"] = sum_stat_tab.index.tolist() + sum_stat_tab["patterns_missing"] = patterns_missing + + # In our case, the "apply" function transforms all numeric columns into float 64-bit encoded columns. + # However, the mantissa of a float is encoded on 52 bits while we use an integer code using 64 bits. + # Beyond 52 phenotypes, the automatic transformation integer-float induces a false pattern code. + # Transforming our code into a string keeps the coding accuracy beyond 52 phenotypes up to 64 phenotypes. + sum_stat_tab = sum_stat_tab.astype({"patterns_missing": str}) + + Liste_colonnes = ["index", "patterns_missing"] + phenotype_ids + dico_z = {} + dico_index_x = {} + + sum_stat_tab[Liste_colonnes].apply(lambda x: store_pattern(dico_z, dico_index_x, *x), axis=1) + + Retour_omnibus_bypattern = {} + + # Apply the statistic computation by missing patterns + for pattern in frequent_pattern: + try: + Retour_omnibus_bypattern[pattern] = stat_compute( + np.array(dico_z[pattern]), + pattern, + dico_index_y[pattern] + ) + except ValueError: + print("worktable") + + Retour_omnibus = [0.0 for i in range(sum_stat_tab.shape[0])] + + for pattern in frequent_pattern: + for ligne, indice in enumerate(dico_index_x[pattern]): + Retour_omnibus[int(indice)] = (Retour_omnibus_bypattern[pattern])[int(ligne)] + + sum_stat_tab["JASS_PVAL"] = Retour_omnibus + + Nsnp_jassed = Nsnp_jassed + sum_stat_tab.shape[0] + sum_stat_tab.sort_values(by=["Region", "CHR"], inplace=True) + + sum_stat_tab["UNIVARIATE_MIN_PVAL"] = DataFrame( + 2.0 * + spst.norm.sf(sum_stat_tab[phenotype_ids].fillna( + 0, inplace=False).abs()), + index=sum_stat_tab.index, + ).min(axis=1) + + sum_stat_tab["UNIVARIATE_MIN_QVAL"] = sum_stat_tab["UNIVARIATE_MIN_PVAL"] * \ + (1-np.isnan(sum_stat_tab[phenotype_ids]).astype(int)).sum(1) + sum_stat_tab.loc[sum_stat_tab.UNIVARIATE_MIN_QVAL > + 1, "UNIVARIATE_MIN_QVAL"] = 1 + + # Computing pleiotropy + sum_stat_tab["PLEIOTROPY_INDEX"] = compute_pleiotropy_index( + sum_stat_tab[phenotype_ids], significance_treshold) + + sum_stat_tab = sum_stat_tab[ + ["Region", "CHR", "snp_ids", "position", 'Ref_allele', 'Alt_allele', "MiddlePosition", + "JASS_PVAL", "UNIVARIATE_MIN_PVAL", "UNIVARIATE_MIN_QVAL", "PLEIOTROPY_INDEX"] + + phenotype_ids] + + if post_filtering: + sum_stat_tab = post_computation_filtering(sum_stat_tab) + + hdf_work.append( + "SumStatTab", sum_stat_tab, min_itemsize=sum_stat_tab_min_itemsizes + ) + + if ((csv_file is not None) and (not delayed_gen_csv_file)): + with open(csv_file, 'a') as f: + sum_stat_tab.to_csv(f, header=f.tell()==0) + + region_sub_table = get_region_summary( + sum_stat_tab, phenotype_ids, significance_treshold=significance_treshold) + + hdf_work.append( + "Regions", + region_sub_table, min_itemsize=region_sub_table_min_itemsizes + ) + + file_progress = open(progress_path, "w") + file_progress.write(str(JASS_progress)) + file_progress.close() + + hdf_work.close() + + print("{1} SNPs treated on {0} SNPs".format(Nsnp_jassed, Nsnp_total)) + + RegionSubTable = read_hdf(project_hdf_path, "Regions") + + pval_min = RegionSubTable["UNIVARIATE_MIN_PVAL"] + jost_min = RegionSubTable["JASS_PVAL"] + summaryTable = DataFrame( + np.array( + [ + [ + sum((jost_min < significance_treshold) & + (pval_min < significance_treshold)), + sum((jost_min < significance_treshold) & + (pval_min > significance_treshold)), + ], + [ + sum((jost_min > significance_treshold) & + (pval_min < significance_treshold)), + sum((jost_min > significance_treshold) & + (pval_min > significance_treshold)), + ], + ] + ) + ) + summaryTable.columns = ["PhenoSignif", "NoPhenoSignif"] + summaryTable.index = ["JOSTSignif", "NoJOSTSignif"] + + hdf_work = HDFStore(project_hdf_path) + hdf_work.put( + "summaryTable", summaryTable, format="table", data_columns=True + ) # Summary Table (contigency table) + hdf_work.close() + + return Nchunk + + +def binary_code(*args): + """ + binary_code + Generates the binary code of each pattern ensuring compatibility between Linux and Windows + """ + Chaine = "" + for valeur in args: + Chaine += "{}".format(valeur) + return int(Chaine, 2) + + +def binary_code_Big(dico_index_y, *args): + """ + binary_code + Generates the binary code of each pattern ensuring compatibility between Linux and Windows + """ + Chaine = "" + for valeur in args: + Chaine += "{}".format(valeur) + + Codage = int(Chaine, 2) + + if (not (Codage in dico_index_y)): + dico_index_y[Codage] = [] + for indice, valeur in enumerate(args): + if (valeur == 1): + dico_index_y[Codage].append(indice) + + return Codage + + +def store_pattern(dico_z, dico_index_x, *colonne): + """ + store_pattern + Reorders z-values by pattern and store their index in the original dataframe + """ + Index = int(colonne[0]) + Codage = int(colonne[1]) + + if (not (Codage in dico_z)): + dico_z[Codage] = [] + dico_index_x[Codage] = [] + + dico_index_x[Codage].append(Index) + + new_line = [] + for valeur in colonne[2:]: + if not math.isnan(valeur): + new_line.append(valeur) + + dico_z[Codage].append(new_line) + + +def compute_frequent_missing_pattern(sum_stat_tab): + """ + Compute the frequency of missing pattern in the dataset + """ + Pheno_is_present = 1- sum_stat_tab.isnull() + + # The coding of patterns missing is not guaranteed if there are more than 64 phenotypes + patterns_missing = Pheno_is_present[Pheno_is_present.columns].apply(lambda x: binary_code(*x), axis=1) + + pattern_frequency = patterns_missing.value_counts() / len(patterns_missing) + n_pattern = pattern_frequency.shape[0] + print("Number of pattern {}".format(n_pattern)) + frequent_pattern = pattern_frequency.index.tolist() + return patterns_missing, frequent_pattern + + +def compute_frequent_missing_pattern_Big(sum_stat_tab): + """ + Compute the frequency of missing pattern in the dataset + + """ + dico_index_y = {} + + Pheno_is_present = 1- sum_stat_tab.isnull() + + # The coding of patterns missing is not guaranteed if there are more than 64 phenotypes + patterns_missing = Pheno_is_present[Pheno_is_present.columns] \ + .apply(lambda x: binary_code_Big(dico_index_y, *x), axis=1) + + + pattern_frequency = patterns_missing.value_counts() / len(patterns_missing) + n_pattern = pattern_frequency.shape[0] + print("Number of pattern {}".format(n_pattern)) + frequent_pattern = pattern_frequency.index.tolist() + + return patterns_missing, frequent_pattern, dico_index_y + + +def stringize_dataframe_region_chr(dataframe: DataFrame): + """ + Reformat Region and Chromosome numbers as strings in a DataFrame. + This will eventually be deprecated once the js code accepts integers instead + of strings + + :param project_hdf_path: path to the worktable file + :type project_hdf_path: str + :param frame: name of the frame to be read + :type frame: str + :return: The dataframe with converted Region and CHR columns + :rtype: pandas.DataFrame + """ + dataframe["Region"] = dataframe["Region"].apply( + lambda x: "Region" + str(x)) + dataframe["CHR"] = dataframe["CHR"].apply(lambda x: "chr" + str(x)) + dataframe["JASS_PVAL"] = dataframe["JASS_PVAL"].apply( + lambda x: str(signif(x, 4))) + + return dataframe + + +def get_worktable_summary(project_hdf_path: str): + """ + Read and return the summaryTable dataframe from a worktable file + + :param project_hdf_path: path to the worktable file + :type project_hdf_path: str + :return: The dataframe as a dictionary with one entry per line number + :rtype: dict + """ + summary_dataframe = read_hdf(project_hdf_path, "summaryTable") + lines = {} + for index, row in summary_dataframe.iterrows(): + lines[index] = { + "PhenoSignif": int(row["PhenoSignif"]), + "NoPhenoSignif": int(row["NoPhenoSignif"]), + } + return lines + + +def get_worktable_genomedata(project_hdf_path: str): + """ + Read and return the Regions dataframe from a worktable file + + :param project_hdf_path: path to the worktable file + :type project_hdf_path: str + :return: The dataframe as a CSV formatted text + :rtype: str + """ + region_subtable = stringize_dataframe_region_chr( + read_hdf(project_hdf_path, "Regions")) + + region_subtable.rename(index=str, columns={ + 'JASS_PVAL': 'JOSTmin'}, inplace=True) + + region_subtable['PVALmin'] = region_subtable['UNIVARIATE_MIN_PVAL'] + region_subtable['PVALmin'] = region_subtable['PVALmin']. apply( + lambda x: str(signif(x, 4))) + + return region_subtable.to_csv(index=False) + + +def get_worktable_local_manhattan_data(project_hdf_path: str, chromosome: str = None, region: str = None): + """ + Read and return the SumStatTab dataframe from a worktable file + for a given chromosome and region for the Manhattan plot + + :param project_hdf_path: path to the worktable file + :type project_hdf_path: str + :param chromosome: chromosome number in "string form", e.g. chr12 + :type chromosome: str + :param region: region number in "string form", e.g. Region213 + :type region: str + :return: The dataframe subset corresponding to the chromosome and region, as a CSV formatted text + :rtype: str + """ + if ((chromosome is None) and (region is None)): + # Local analysis : the file project_hdf_path contains only useful information. + # No data filter is needed + dataframe = read_hdf(project_hdf_path, "SumStatTab", + columns=["Region", "CHR", "position", + "snp_ids", "JASS_PVAL"]) + else: + # Genome full analysis + region_int = region[6:] + chromosome_int = chromosome[3:] + dataframe = read_hdf(project_hdf_path, "SumStatTab", + columns=["Region", "CHR", "position", + "snp_ids", "JASS_PVAL"], + where=['Region='+str(region_int), 'CHR='+str(chromosome_int)]) + + dataframe = stringize_dataframe_region_chr(dataframe) + dataframe = dataframe.sort_values("position") + + return dataframe.to_csv(index=False) + + +def get_worktable_local_heatmap_data(project_hdf_path: str, chromosome: str = None, region: str = None): + """ + Read and return the SumStatTab dataframe from a worktable file + for a given chromosome and region for the Heatmap plot + + :param project_hdf_path: path to the worktable file + :type project_hdf_path: str + :param chromosome: chromosome number in "string form", e.g. chr12 + :type chromosome: str + :param region: region number in "string form", e.g. Region213 + :type region: str + :return: The dataframe subset corresponding to the chromosome and region, \ + pivoted and as a CSV formatted text + :rtype: str + """ + if ((chromosome is None) and (region is None)): + # Local analysis : the file project_hdf_path contains only useful information. + # No data filter is needed + dataframe = read_hdf(project_hdf_path, "SumStatTab") + else: + # Genome full analysis + region_int = region[6:] + chromosome_int = chromosome[3:] + dataframe = read_hdf(project_hdf_path, "SumStatTab", + where=['Region='+str(region_int), 'CHR='+str(chromosome_int)]) + + dataframe = stringize_dataframe_region_chr(dataframe) + dataframe = dataframe.sort_values("position") + dataframe.drop(["Region", "CHR", "position", "JASS_PVAL", "MiddlePosition", "UNIVARIATE_MIN_PVAL", + "UNIVARIATE_MIN_QVAL", "PLEIOTROPY_INDEX"], + axis=1, + inplace=True, + ) + dataframe.rename(columns={"snp_ids": "ID"}, inplace=True) + column_order = list(dataframe.ID) + pivoted_dataframe = dataframe.pivot_table(columns="ID") + pivoted_dataframe = pivoted_dataframe.reindex(column_order, axis=1) + # TODO rework the selection to return 5000 snps in total, around the + # region ;) + + return pivoted_dataframe.to_csv(index_label="ID") + + +def create_genome_full_csv(project_hdf_path, csv_file, chunk_size=50, Nchunk=35): + """ + create_genome_full_csv + Write the genome_full.csv file + + :param project_hdf_path: path to the worktable file + :type project_hdf_path: str + + :param csv_file: path to the genome_full file + :type csv_file: str + + :param chunk_size: the size of the chunks of the initial data + :type chunk_size: int + + :return: csv_file is available. + :rtype: bool + """ + + # path of the lock that indicates that the csv file is not available + the_lock_path = os.path.join(os.path.dirname(project_hdf_path), + "the_lock.txt") + if (os.path.isfile(the_lock_path)): + # The lock is set on + if (os.path.isfile(csv_file)): + # An error occurred: the csv file must not exist if the lock is set + # The existing csv file is deleted + os.remove(csv_file) + + for chunk in range(Nchunk): + print("indice de boucle = {}".format(chunk)) + binf = chunk * chunk_size + bsup = (chunk + 1) * chunk_size + + # read workTable.hdf5 + df_for_csv = read_hdf(project_hdf_path, "SumStatTab", + where='Region >= {0} and Region < {1}'.format(binf, bsup)) + + # append the data to the csv file + with open(csv_file, 'a') as f: + df_for_csv.to_csv(f, header=f.tell() == 0) + + # The lock is deleted + os.remove(the_lock_path) + + if (os.path.isfile(csv_file)): + The_file_is_available = True + else: + The_file_is_available = False + + return The_file_is_available + diff --git a/jass/static/chromo_heatmap_manhattan.html b/jass/static/chromo_heatmap_manhattan.html index 71e80fac7a325f99cf6481933008b4b9fcf1b42b..a21806c484ab3f76d33304263cb4f4ed04678e04 100644 --- a/jass/static/chromo_heatmap_manhattan.html +++ b/jass/static/chromo_heatmap_manhattan.html @@ -1,1564 +1,1543 @@ -<html> -<head> - - <link rel="stylesheet" href="//code.jquery.com/ui/1.12.1/themes/base/jquery-ui.css"> - <link rel="stylesheet" href="css/style.css"> - - - <style> - - #sortable1, #sortable2 { - border: 1px solid #eee; - width: 142px; - min-height: 20px; - list-style-type: none; - margin: 0; - padding: 5px 0 0 0; - float: left; - margin-right: 10px; - } - #sortable1 li, #sortable2 li { - margin: 0 5px 5px 5px; - padding: 5px; - font-size: 1.2em; - width: 120px; - } - .gtitle {font-size:20px!important;} - </style> - <!-- Plotly.js --> - - - - <script src="js/plotly-latest.min.js"></script> <!--https://cdn.plot.ly/plotly-latest.min.js--> - <!--<script src="js/jquery.min.js"></script> http://ajax.googleapis.com/ajax/libs/ - - jquery/1.8.1/jquery.min.js--> - <script src="https://code.jquery.com/jquery-3.3.1.js"></script> - <script src="js/jquery-ui.js"></script> <!--https://code.jquery.com/ui/1.12.1/jquery-ui.js--> - - <script src="js/snap.svg-min.js"></script> - <script src="js/Chromosome.js"></script> - <script src="js/colorScale.js"></script> - <script src="js/jquery.blockUI.js"></script> - - - - - - <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css"> - <link rel="stylesheet" href="https://cdn.datatables.net/1.10.19/css/dataTables.bootstrap.min.css"> - <link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/1.10.15/css/jquery.dataTables.min.css"> - - <link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/select/1.3.0/css/select.dataTables.min.css"> - - - - <script src="https://stackpath.bootstrapcdn.com/bootstrap/4.1.3/js/bootstrap.min.js" ></script> - - <!--<script src="https://code.jquery.com/jquery-3.3.1.slim.min.js" ></script>--> - <script type="text/javascript" language="javascript" src="https://cdn.datatables.net/1.10.19/js/jquery.dataTables.min.js"></script> - <script type="text/javascript" language="javascript" src="https://cdn.datatables.net/1.10.19/js/dataTables.bootstrap.min.js"></script> - <script type="text/javascript" language="javascript" src="https://cdn.datatables.net/select/1.3.0/js/dataTables.select.min.js"></script> - - - <!-- datatables buttons pdf csv copy --> - <script src="https://cdnjs.cloudflare.com/ajax/libs/jszip/3.1.3/jszip.min.js"></script> - <script src="js/pdfmake.min.js"></script> - <script src="js/vfs_fonts.js"></script> - <script src="https://cdn.datatables.net/buttons/1.3.1/js/buttons.html5.min.js"></script> - <script src="https://cdn.datatables.net/buttons/1.3.1/js/buttons.print.min.js"></script> - <script src="https://cdn.datatables.net/buttons/1.3.1/js/dataTables.buttons.min.js"></script> - <link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/buttons/1.3.1/css/buttons.dataTables.min.css"> - <script src="https://cdn.datatables.net/buttons/1.3.1/js/buttons.flash.min.js"></script> - - - - - - - <style> - .header { - margin:0 0 30px 0; - padding: 5px 0 0 0 ; - height:50; - background-image: url('img/bkg_part2_wthLabel.jpg'); - background-color: transparent; - background-size:contain; - object-fit: contain; - background-repeat:no-repeat; - } - #image-top {height:80!important} - </style> -</head> - - -<body onload="init()" style="margin-left: 3em;"><!-- margin down--> - - <div class="header" > </div> - <div id="divWholeStatPlot" ><!-- Plotly chart will be drawn inside this DIV --> - </div> - <center><p>Click on any point to zoom in and obtain the distribution of Zscore for single GWAS</p></center> - <div id ="summaryStat" hidden="true"><button id="exportGlobal" >Export Region Results</button> - <button id="exportCsv">Export All SNP Results</button> - <button id="fullImg">Manhattan Plot</button> - <button id="quadrantImg">Quadrant plot</button> - <button id="directLink">Share Direct Link</button> - <button data-toggle="collapse" data-target="#collapseExample" aria-expanded="false" aria-controls="collapseExample">JASS only significant region(s)</button> - <textarea id= "directLinkText" hidden="true"></textarea> - </div> - - - - <div class="collapse" id="collapseExample"> - <div id='divContainer' width ="400px"> - <table id ="pheTable" style="width :500px" class="display dataTable"></table> - </div> - </div> - - - <svg id="svg" width="100%" height="150" ></svg> <!-- style="background-color:powderblue;" --> - <select id="ChrList" onChange="dodraww()" hidden="true"></select> - <div id="divHeatPlot" ><!-- Plotly chart will be drawn inside this DIV --></div> - <select id="HeatList" onChange="redrawHeatmap()" hidden="true"></select> - <div id="exportHeatPlot" hidden="true"><button id="exportHeatmap" >Export</button></div> - <div id="divLocalStatPlot" ><!-- Plotly chart will be drawn inside this DIV --></div> - <div id="exportLocalStatPlot" hidden="true" ><button id="exportLocal" >Export SNPs From The Region</button></div> - <script> - - var annotations = []; - var heatmapPhenotype; - - - var plotSize; - var plotHeatMapSize; - var mapWidth; - var mapHeight =40; - var paper; - - var phenotypesCount; - - var syzeChr = { "chr1" : 248956422, "chr10" :133797422, - "chr11": 135086622 ,"chr12":133275309 , - "chr13": 114364328,"chr14": 107043718, - "chr15": 101991189,"chr16": 90338345, - "chr17": 83257441,"chr18": 80373285, - "chr19": 58617616,"chr2": 242193529, - "chr20": 64444167,"chr21": 46709983, - "chr22": 50818468,"chr3": 198295559, - "chr4": 190214555,"chr5": 181538259, - "chr6": 170805979,"chr7": 159345973, - "chr8": 145138636,"chr9": 138394717, - "chrX": 156040895, "chrY":57227415}; - - var chrOrder = ["chr1","chr2","chr3","chr4","chr5","chr6","chr7","chr8","chr9","chr10","chr11","chr12","chr13","chr14","chr15","chr16","chr17","chr18","chr19","chr20","chr21","chr22","chrX","chrY"]; - - var chrDict; - var sel; // hash index-> chromosomeId - - // idProjet <- list of phenotypes - var idProject; - var selectedRegion; - var selectedChr; - // PhenotypesDict <- Json object list of Phenotypes object - var PhenotypesDict; - - function init(){ - - idProject = sessionStorage.getItem("id"); - console.log(idProject); - var phe = sessionStorage.getItem("phenotypes"); - PhenotypesDict = JSON.parse(phe); - phenotypesCount =PhenotypesDict.length; - - console.log(PhenotypesDict+" "+phenotypesCount); - - - console.log("width "+$(window).width()); - var plotSize = $(window).width(); - plotHeatMapSize = $(window).width()-50; // !!!!!!!! A modifier si changement de disposition des plots heatmap et stat jointe locale afin qu'ils restent caler en abscisse - - //mapWidth = plotHeatMapSize-50; - mapWidth = plotHeatMapSize/2; - var chrsize; - //var conv=mapWidth/247249719; - paper = Snap("#svg"); - - - - //var chrNum=0; - - - - chrDict = new Array(); - - - - var chromosomes =[]; - var chrOrder = ["chr1","chr2","chr3","chr4","chr5","chr6","chr7","chr8","chr9","chr10","chr11","chr12","chr13","chr14","chr15","chr16","chr17","chr18","chr19","chr20","chr21","chr22","chrX","chrY"]; - - sel = document.getElementById('ChrList'); - var fragment = document.createDocumentFragment(); - - chrOrder.forEach(function(chr, index) { - var opt = document.createElement('option'); - opt.innerHTML = chr; - opt.value = chr; - fragment.appendChild(opt); - }); - sel.appendChild(fragment); - - - //var heatColors = $('#HeatList'); - var heatColors = document.getElementById('HeatList'); - var fragmentHeat = document.createDocumentFragment(); - var colors = ["Picnic","Basic","Custom1","Custom2","Blues","Blackbody","Bluered","Earth","Electric","Greens","Greys","Hot","Jet","Labelled","Portland","Reds","Viridis","YlOrRd","YlGnBu"]; - colors.forEach(function(colorsHeatScale, index) { - var opt = document.createElement('option'); - opt.innerHTML = colorsHeatScale; - opt.value = colorsHeatScale; - - fragmentHeat.appendChild(opt); - }); - //fragmentHeat.selectedIndex= 3; - heatColors.appendChild(fragmentHeat); - makeplot(); - } - - - - function makeplot() { - //console.log(Plotly.d3);Heatmap - //lien vers l'image omnibus - // http://hub17.hosting.pasteur.fr/api/projects/a75aec97af8ac7f28d55056e4a4c01af/globalmanhattan - - - Plotly.d3.tsv("data/cytoBand.txt", function(data){ processDataBand(data) } ); - //Plotly.d3.csv("ttpn2.csv", function(data){ processData2(data) } ); - - // summary Stat - /*var desiredText = "get the full plot png"; - var desiredLink = "/api/projects/"+idProject+"/globalmanhattan"; - $('<a target="_blank" href="'+desiredLink+'">'+desiredText+'</a>').appendTo($('#summaryStat'));*/ - - //Plotly.d3.csv("data/FileJOSTmin.csv", function(error,data){ - console.log("/api/projects/"+idProject+"/genome"); - Plotly.d3.csv("/api/projects/"+idProject+"/genome", function(error,data){ - if (error) { - $('#divWholeStatPlot').html('<p>An error has occurred</p>'); - return console.warn(error) - - } - else{ - processDataJOSTmin(data) - } - } ); - - - - //Plotly.d3.csv("BIGZ.csv", function(data){ processData(data,"Heatmap") } ); - //Plotly.d3.csv("BIGP.csv", function(data){ processData(data,"Manhattan") } ); - - - //console.log("Test"); - }; - - function unpack(rows, key) { - return rows.map(function(row) { return row[key]; }); - - } - function unpackLog(rows, key) { - return rows.map(function(row) { return -Math.log10(row[key]); }); - - } - function processDataJOSTmin(rows) { - - $('#summaryStat').show(); - /*var chromosomes = ["chr1","chr2","chr3","chr4","chr5","chr6","chr7","chr8","chr9","chr10","chr11","chr12","chr13","chr14","chr15","chr16","chr17","chr18","chr19","chr20","chr21","chr22","chrX","chrY"];*/ - var chromosomes = ["chr1","chr2","chr3","chr4","chr5","chr6","chr7","chr8","chr9","chr10","chr11","chr12","chr13","chr14","chr15","chr16","chr17","chr18","chr19","chr20","chr21","chr22"]; - - //return ("chr"+row.CHR === chr); - //name: row.CHR, - console.log(rows); - var data = chromosomes.map(function(chr) { - var rowsFiltered = rows.filter(function(row) { - return (row.CHR === chr); - }); - //console.log(rowsFiltered); - return { - name: chr, - x: unpack(rowsFiltered, 'Region'), - y: unpackLog(rowsFiltered, 'JOSTmin'), - text: unpack(rowsFiltered, 'MiddlePosition'), - mode: 'markers', - showlegend : false, - type:'scatter' - } - - }); - - - - // fill the region table - processShowGJASSPVAL(rows); - - - //mode: 'markers', - //type:'histogram' - - //mode: 'markers', - //type:'scatter' - - // width: 700, - // height: 1500, - //type: 'category' - // remove toggleSpikelines in modeBarButtonsToRemove - - var layout = { - title: "Joint test association results by region", - width: plotHeatMapSize, - hovermode:'closest', - showLink: false, - modeBarButtonsToRemove: [], - - font:{ - size:8 - }, - margin: { - l: 80, - r: 50, - b: 60, - t: 30, - pad: 4 - }, - xaxis: { - showgrid : false, - showticklabels :true, - ticks: '', - side: 'bottom', - type: 'category', - range : [0,'1704'] - }, - yaxis: { - title: "-log(Pvalue)", - ticks: '', - ticksuffix: ' ', - - //type : 'log' - }, - - hovermode: 'closest' - }; - Plotly.setPlotConfig({ - modeBarButtonsToRemove: ['zoom2d','pan2d','select2d','lasso2d','zoomIn2d','zoomOut2d','autoScale2d','resetScale2d','hoverClosestCartesian','hoverCompareCartesian'] - , displaylogo: false - }); - Plotly.plot('divWholeStatPlot', data, layout); - - var suggestiveline = -Math.log10(1e-5); - var suggestiveline_color = "orange"; - var suggestiveline_width = 1; - var genomewideline = -Math.log10(5e-8); - var genomewideline_color = "green"; - - - - - var datmp = data[0]; - var tbx = datmp.x; - var d = tbx[0]; - - var datmp2 = data[data.length-1]; - var tbx2 = datmp2.x - var e = tbx2[tbx2.length-1]; - - console.log("datmp " +datmp); - console.log("d "+d +" e "+e); - console.log(tbx2); - //var d ='rs3107146'; - //var e = 'rs4824114'; - var trace1 = { - x: [d,e], - y: [suggestiveline, suggestiveline], - mode: 'lines+markers+text', - name: '1e-5', - // text: ['Text A', 'Text B', 'Text C'], - textposition: 'top', - type: 'scatter', - showlegend: true, - marker: { - color: suggestiveline_color - } - }; - - var trace2 = { - x: [d, e], - y: [genomewideline, genomewideline], - mode: 'lines+markers+text', - name: '5e-8', - // text: ['Text A', 'Text B', 'Text C'], - textposition: 'top', - type: 'scatter', - showlegend: true, - marker: { - color: genomewideline_color - } - }; - - Plotly.addTraces('divWholeStatPlot', trace1); - Plotly.addTraces('divWholeStatPlot', trace2); - - - - - divWholeStatPlot.on('plotly_click', function(data){ - var pts = ''; - - var infotext = data.points.map(function(d){ - //console.log(d.pointNumber+' ' +d.data.text[d.pointNumber]);//text[d.pointNumber] - - return (d.data.name+': x= '+d.x+', y= '+d.y.toPrecision(3)+' pos:' +d); - }); - //console.log(infotext); - //alert('Closest point clicked:\n\n'+pts); - - //console.log(data); - console.log(data.points[0].x); - console.log(data.points[0]); - console.log(data.points[0].text); - - //var selectedChr ; - data.points.map(function(d){ - //chr d.data.name - //pos d.data.text[d.pointNumber]) - selectedChr =d.data.name; - console.log("!!! "+d.data.text[d.pointNumber]); - dodrawChr(d.data.name,d.data.text[d.pointNumber]); - sel.value = d.data.name; - }); - - selectedRegion = data.points[0].x; - - var updaterange = data.points[0].x.split("Region"); - console.log(updaterange[1]); - // region 840 -> 850 - var tmp= Number(updaterange[1])+10; - updaterange = "Region"+tmp; - console.log("updaterange "+updaterange); - //var update = { - // title: 'some new title', // updates the title - //var xpos = data.points[0].x-5; - var update = { - shapes: [ - // 1st highlight during Feb 4 - Feb 6 - { - type: 'rect', - // x-reference is assigned to the x-values - xref: 'x', - // y-reference is assigned to the plot paper [0,1] - yref: 'paper', - x0: data.points[0].x, - y0: 0, - x1: updaterange, - y1: 1, - fillcolor: '#d3d3d3', - opacity: 0.3, - line: { - width: 0 - } - } - ] - - }; - - Plotly.relayout(divWholeStatPlot,update); - - selectRegion(selectedChr,selectedRegion); - - }); - - - } - - function selectRegion(chromosome,region){ - - // empty the local heatmap and local manhattan plot - Plotly.purge(divLocalStatPlot); - Plotly.purge(divHeatPlot); - - selectedRegion = region; - selectedChr = chromosome; - - //$('#exportLocalStatPlot').empty(); - $.blockUI({ css: { - border: 'none', - padding: '15px', - backgroundColor: '#000', - '-webkit-border-radius': '10px', - '-moz-border-radius': '10px', - opacity: .5, - color: '#fff'} }); - - console.log("/api/projects/"+idProject+"/manhattan/"+selectedChr+"/"+selectedRegion); - Plotly.d3.csv("/api/projects/"+idProject+"/manhattan/"+selectedChr+"/"+selectedRegion, function(data){ processManhattanJASSPVAL(data) } ); - } - - function processManhattanJASSPVAL(rows) { - - // appel de la heatmap quand le manhattan local est chargé - $('#exportLocalStatPlot').show(); - console.log("rows.length "+rows.length); - console.log("/api/projects/"+idProject+"/heatmap/"+selectedChr+"/"+selectedRegion); - Plotly.d3.csv("/api/projects/"+idProject+"/heatmap/"+selectedChr+"/"+selectedRegion, function(data){ processData(data,"Heatmap") } ); - - var regions = []; - for (var i=0; i<2000; i++) { - regions[i] = "Region"+i; - //console.log(regions[i]); - } - - // create link to data table page to export zone of interrest - - sessionStorage.setItem("selectedRegion",selectedRegion); - sessionStorage.setItem("selectedChr",selectedChr); - - var desiredText = "Export data"; - var desiredLink = "export.html"; - //$('<a target="_blank" href="'+desiredLink+'">'+desiredText+'</a>').appendTo($('#exportLocalStatPlot')); - - /*var button = document.createElement("BUTTON"); - button.setAttribute("id","exportLocal"); - buttonText = document.createTextNode("Export Local Plot"); - button.appendChild(buttonText); - exportLocalStatPlot.appendChild(button);*/ - //$('#exportLocal').appendTo($('#exportLocalStatPlot')); - - - //var rpair ="rpair"; - //var rimpair = "rimpair"; - //var regions = ["pairs","impairs"]; - //var data = regions.map(function(region) { - /*var group ="one"; - var data = regions.map(function(chr) { - - var rowsFiltered = rows.filter(function(row) { - var ronum = row.Region.substring(6, row.length); - var bool =true; - if (ronum%2 == 0){ - bool =true; - group = "one"; - } - else{ - bool = false; - group = "two"; - } - //console.log(bool+" "+row.Region); - //return bool; - // return (row.Region === region); - //return (row.CHR === chr); - return bool; - }); - //console.log(rowsFiltered); - return { - name: chr, - x: unpack(rowsFiltered, 'snp_ids'), - y: unpackLog(rowsFiltered, 'JASS_PVAL'), - text: unpack(rowsFiltered, 'Region'), - mode: 'markers', - type:'scatter' - } - - });*/ - - - var data = regions.map(function(region) { - var rowsFiltered = rows.filter(function(row) { - return (row.Region === region); - }); - return { - name: region, - x: unpack(rowsFiltered, 'snp_ids'), - y: unpackLog(rowsFiltered, 'JASS_PVAL'), - text: unpack(rowsFiltered, 'Region'), - mode: 'markers', - type:'scatter' - } - - }); - //mode: 'markers', - //type:'histogram' - - //mode: 'markers', - //type:'scatter' - - - //type: 'category' - /*var layout = { - - title: "Local joined statistic by SNP", - - - width: plotSize, - margin: { - l: 90, - r: 50, - b: 10, - t: 100, - pad: 4 - }, - xaxis: { - showticklabels :true, - ticks: '', - side: 'top', - type: 'category', - textposition: 'down', - range : [0,'10000'] - - - }, - yaxis: { - title: "-log(Pvalue)", - autosize: true, - ticks: '', - textposition: 'down', - fixedrange:true, - - - } - - - - };*/ - //title: "Local joined statistic by SNP", - var titleplot = "Joint test association results for locus "+selectedRegion+" on "+selectedChr; - var layout = { - - title: titleplot, - width: plotSize, - hovermode:'closest', - font:{ - size:8 - }, - margin: { - l: 90, - r: 50, - b: 10, - t: 100, - pad: 4 - }, - xaxis: { - showticklabels :false, - ticks: '', - side: 'top', - type: 'category', - - range : [-0.5,rows.length] - }, - yaxis: { - title: "-log(Pvalue)", - ticks: '', - ticksuffix: ' ', - - fixedrange:true, - - //type : 'log' - }, - - hovermode: 'closest' - }; - Plotly.plot('divLocalStatPlot', data, layout); - - var suggestiveline = -Math.log10(1e-5); - var suggestiveline_color = "orange"; - var suggestiveline_width = 1; - var genomewideline = -Math.log10(5e-8); - var genomewideline_color = "green"; - - var datmp = rows[0]; - - - - var d = datmp.snp_ids; - var datmp2 = rows[rows.length-1]; - var e= datmp2.snp_ids; - - - console.log("d "+d +" e "+e); - //var d ='rs3107146'; - //var e = 'rs4824114'; - var trace1 = { - x: [d,e], - y: [suggestiveline, suggestiveline], - mode: 'lines+markers+text', - name: '1e-5', - // text: ['Text A', 'Text B', 'Text C'], - textposition: 'top', - type: 'scatter', - showlegend: true, - marker: { - color: suggestiveline_color - } - }; - - var trace2 = { - x: [d, e], - y: [genomewideline, genomewideline], - mode: 'lines+markers+text', - name: '5e-8', - // text: ['Text A', 'Text B', 'Text C'], - textposition: 'top', - type: 'scatter', - showlegend: true, - - marker: { - color: genomewideline_color - } - }; - - Plotly.addTraces('divLocalStatPlot', trace1); - Plotly.addTraces('divLocalStatPlot', trace2); - - Plotly.setPlotConfig({ - modeBarButtonsToRemove: ['zoom2d','pan2d','select2d','lasso2d','zoomIn2d','zoomOut2d','autoScale2d','resetScale2d','hoverClosestCartesian','hoverCompareCartesian'] - , displaylogo: false - }); - - divLocalStatPlot.on('plotly_relayout', - function(eventdata){ - /*console.log( 'ZOOM!' + '\n\n' + - 'Event data:' + '\n' + - JSON.stringify(eventdata) + '\n\n' + - 'x-axis start:' + eventdata['xaxis.range[0]'] + '\n' + - 'x-axis end:' + eventdata['xaxis.range[1]'] ); - */ - var update = { - - 'xaxis.range': [eventdata['xaxis.range[0]'],eventdata['xaxis.range[1]']], - - } - - Plotly.relayout('divHeatPlot', update); - }); - - } - - - - function processData(allRows,plotType) { - - - $.unblockUI(); - //$('#exportHeatPlot').show(); - console.log("type of Plot "+ plotType); - - var x = [], y = []; - - - - - var cols =[]; - var lines = []; - - for(var valeur in allRows[0]) { - //console.log(valeur); - if ((valeur != "")&&(valeur!='ID')){ - cols.push(valeur); - } - } - - - //Manhattan - /*for (var i=0; i<allRows.length; i++) { - row = allRows[i]; - lines[i]= row['YLAB']; - //console.log(row); - //console.log(row['YLAB']); - //console.log(row.getElementById['Y1']); - - //var arrayOfStrings = row.split(" "); - //console.log(row); - x.push( row['YLAB'] ); - y.push( -Math.log10(row['Y1']) ); - - - - - //y.push(row['Y1']); - }*/ - var arr = []; - var row = allRows[0]; - - /*console.log(row[cols[3]]); - - console.log(allRows.length); - console.log(cols.length); -*/ - console.log(allRows[0]); - for (var i=0; i<allRows.length; i++) { - row = allRows[i]; - - // Creates an empty line - arr.push([]); - //console.log('!!!! ROW '+row['YLAB']) - if(plotType == "Heatmap"){ - lines[i] = row['ID']; - } - // Adds cols to the empty line: - arr[i].push( new Array(cols)); - for(var j=0; j < cols.length; j++){ - // Initializes: - //arr[i][j] = Math.random() * random(10000); - // arr[i][j] = -Math.log10(row[cols[j]]); - if(plotType == "Heatmap"){ - arr[i][j] = row[cols[j]]; - } - else{ - if(row[cols[j]] == 0){ - console.log("ZEROOOOOOO !") - arr[i][j] = 32; - } - else{ - arr[i][j] = -Math.log10(row[cols[j]]); - } - } - } - } - /*for(var i=0; i < cols.length; j++){ - var col = cols[i]; - // Creates an empty line - arr.push([]); - - // Adds cols to the empty line: - arr[i].push( new Array(cols.length)); - for(var j=1; j < allRows.length; j++){ - // Initializes: - //arr[i][j] = Math.random() * random(10000); - arr[i][j] = -Math.log10(allRows[cols[j]]); - } - - }*/ - - - //console.log(arr.length); - //console.log(lines.length); - //console.log(arr); - console.log(cols); - console.log(lines); - - var customDict = {'Custom1':'','Custom2':''}; - var heatL = document.getElementById('HeatList'); - var colorsc = heatL.options[heatL.selectedIndex].value; - - if (colorsc in customDict){ - colorsc = colorScale[colorsc]; - } - console.log(cols[0]); - console.log(lines[0]); - var data = [ - { - z: arr, - x: cols, - y: lines, - type: 'heatmap', - colorscale :colorsc, - transforms: [{ - type: 'filter', - target: 'y', - operation: '!=', - value: 'PVALmin' - }] - - } - ]; - - //colorscale : [[0.0, 'rgb(0,0,0)'], [0.1111111111111111, 'rgb(215,48,39)'], [0.2222222222222222, 'rgb(244,109,67)'], [0.3333333333333333, 'rgb(253,174,97)'], [0.4444444444444444, 'rgb(254,224,144)'], [0.5555555555555556, 'rgb(224,243,248)'], [0.6666666666666666, 'rgb(171,217,233)'], [0.7777777777777778, 'rgb(116,173,209)'], [0.8888888888888888, 'rgb(69,117,180)'], [1.0, 'rgb(49,54,149)']] - - - //console.log( 'X',x, 'Y',y, 'SD',standard_deviation ); - var res = -Math.log10(1e-5); - var res2 = -Math.log10(5e-8); - console.log("!!"+res); - console.log("!!"+res2); - if (plotType == "Heatmap"){ - - // hide PVALMIN - /*data = [ - { - z: arr, - x: cols, - y: lines, - type: 'heatmap', - colorscale :colorsc, - transforms: [{ - type: 'filter', - target: 'y', - operation: '=', - value: 'PVLAmin' - }] - - } - ];*/ - - makeHeatmap(data); - } - else if(plotType == "Manhattan"){ - var d =cols[0]; - var e = cols[cols.length-1]; - - console.log("col "+cols[1]+" "+cols.length); - console.log(arr[0]) - //var d ="rs17685809"; - //var e ="rs1543534"; - - //var d = "IND1"; - //var e = "IND10000"; - - makeManhattanly( cols, arr[0] ,d, e); - } - } - - /*function filtrerParID(obj) { - console.log(obj) - if(obj !="PVLAMIN") return true; - else return false; - }*/ - - function makeHeatmap(data ){ - - - - console.log(data); - //data = data.filter(phenotype !="PVLAMIN"); // hide PVALMIN - //data = data.filter(filtrerParID); - //console.log(data); - - - // create recombinaison zone - //["rs609922","rs6137776","rs7352682","rs2225265","rs613664","rs6081904"]; - var heatmapHeight; - if(phenotypesCount<10){ - heatmapHeight = phenotypesCount * 50; - } - else{ - heatmapHeight = phenotypesCount * 30; - } - var sizeLetterSnps = 65; - var borderHeatmap = 70; - heatmapHeight = phenotypesCount * 50 + (phenotypesCount*2 +sizeLetterSnps+borderHeatmap); - - var layout = { - title : "Z score Heatmap", - autosize: false, - height: heatmapHeight, - width: plotHeatMapSize, - font:{ - size:8 - }, - - margin: { - l: 90, - r: 50, - b: 10, - t: 30, - pad: 4 - }, - - xaxis: { - showticklabels :true, - ticks: '', - side: 'bottom', - fixedrange:true, - type: 'category' - , - rangeslider: { - thickness :0.07, - } - }, - yaxis: { - - ticks: '', - fixedrange:true, - ticksuffix: ' ' - } - - - }; - //Plotly.newPlot('divHeatPlot', traces, - // {title: 'HeatM'}); - Plotly.newPlot('divHeatPlot', data,layout); - divHeatPlot.on('plotly_relayout', - function(eventdata){ - console.log( 'ZOOM!' + '\n\n' + - 'Event data:' + '\n' + - JSON.stringify(eventdata) + '\n\n' + - 'x-axis start:' + eventdata['xaxis.range[0]'] + '\n' + - 'x-axis end:' + eventdata['xaxis.range[1]'] ); - - var update = { - - 'xaxis.range': [eventdata['xaxis.range[0]'],eventdata['xaxis.range[1]']], - - } - - //Plotly.relayout('divLocalStatPlot', update); - }); - heatmapPhenotype = divHeatPlot; - } - //Change the heatmap color scale - function redrawHeatmap(){ - //redraw the heatmap with new colorscale - console.log(heatmapPhenotype); - var heatL = document.getElementById('HeatList'); - //var update = { - //marker: {color: 'red'} - //}; - - var customDict = {'Custom1':'','Custom2':''}; - - var colorsc = heatL.options[heatL.selectedIndex].value; - - if (colorsc in customDict){ - colorsc = colorScale[colorsc]; - } - var data_update = { - colorscale : colorsc - }; - console.log(data_update); - //var layout_update = { - // title: 'some new title', // updates the title - //}; - Plotly.restyle(heatmapPhenotype, data_update , 0); - - } - - function makeManhattanly( x, y ,d,e){ - //var plotDiv = document.getElementById("plot"); - - var layout = { - - title: "Joint test association results for locus Region474 on chr4", - - - width: plotSize, - margin: { - l: 90, - r: 50, - b: 10, - t: 100, - pad: 4 - }, - xaxis: { - showticklabels :true, - ticks: '', - side: 'top', - type: 'category', - textposition: 'down', - range : [0,'10000'] - - - }, - yaxis: { - title: "-log(Pvalue)", - autosize: true, - ticks: '', - textposition: 'down', - fixedrange:true, - - - } - - - - }; - - var suggestiveline = -Math.log10(1e-5); - var suggestiveline_color = "orange"; - var suggestiveline_width = 1; - var genomewideline = -Math.log10(5e-8); - var genomewideline_color = "green"; - - //var d =cols[0]; - //var e = cols[ncols-1]; - var trace1 = { - x: [d,e], - y: [suggestiveline, suggestiveline], - mode: 'lines+markers+text', - name: '1e-5', - // text: ['Text A', 'Text B', 'Text C'], - textposition: 'top', - type: 'scatter', - marker: { - color: suggestiveline_color - } - }; - - var trace2 = { - x: [d, e], - y: [genomewideline, genomewideline], - mode: 'lines+markers+text', - name: '5e-8', - // text: ['Text A', 'Text B', 'Text C'], - textposition: 'top', - type: 'scatter', - marker: { - color: genomewideline_color - } - }; - var traces = [{ - x: x, - y: y, - mode: 'markers', - type:'scatter', - name: '-log(Pvalue)' - - }, - trace1,trace2 - ]; - /* - var traces = [{ - x: x, - y: y - - } - - ];*/ - Plotly.newPlot('divLocalStatPlot', traces,layout); - //console.log(divHeatPlot); - //console.log(divLocalStatPlot); - divLocalStatPlot.on('plotly_relayout', - function(eventdata){ - /*console.log( 'ZOOM!' + '\n\n' + - 'Event data:' + '\n' + - JSON.stringify(eventdata) + '\n\n' + - 'x-axis start:' + eventdata['xaxis.range[0]'] + '\n' + - 'x-axis end:' + eventdata['xaxis.range[1]'] ); - */ - var update = { - - 'xaxis.range': [eventdata['xaxis.range[0]'],eventdata['xaxis.range[1]']], - - } - - Plotly.relayout('divHeatPlot', update); - }); - - - - // anottate point - /*divLocalStatPlot.on('plotly_click', function(data){ - console.log('plotly_click'); - var pts = ''; - for(var i=0; i < data.points.length; i++){ - //annotate_text = 'x = '+data.points[i].x + - // 'y = '+data.points[i].y.toPrecision(4); - annotate_text = data.points[i].x + " "+data.points[i].y.toPrecision(4); - annotation = { - text: annotate_text, - x: data.points[i].x, - y: parseFloat(data.points[i].y.toPrecision(4)) - } - - //annotations = self.layout.annotations || []; - - annotations.push(annotation); - Plotly.relayout('divLocalStatPlot',{annotations: annotations}) - } - });*/ - - }; - - - - - - - function processDataBand(allRows) { - //var chrNum=0; - var chr =""; - var chrTab =[]; - var chrtmp=""; - var centroPosd=""; - var centroPosf=""; - for (var i=1; i<allRows.length; i++) { - row = allRows[i]; - - chr = row['chr']; - if (( chrtmp == "" )||( chr ==chrtmp)){ - chrTab.push(row); - if(row['id'] == "acen"){ - if(centroPosd ==""){ - centroPosd = row['inf']; - } - else{ - centroPosf = row['sup']; - } - } - } - - else{ - //console.log(chrtmp+" "+chrTab.length); - var chrom = new Chromosome(chrtmp,paper,syzeChr[chrtmp],centroPosd,centroPosf,chrTab,mapWidth,mapHeight); - //console.log("ii"+chrom+" "+typeof(chrom)); - //console.log(chrom); - //console.log(chr); - chrDict[chrtmp+""] = chrom; - //chrom.drawMapH(); - //createchr(chrtmp,chrTab,centroPosd,centroPosf); - chrTab = []; - chrTab.push(row); - centroPosd =""; - centroPosf =""; - - - // Translation - /*chrNum++; - var ydecal =70*chrNum; - var t = new Snap.Matrix(); - t.translate(100, ydecal); - chrom.graphicChr.transform(t); - */ - } - chrtmp = row['chr']; - - - - } - //chrNumber,snap,mapWidth,mapLength,chrsize,centroB,centroE,tabBand,mapWidth,mapLength - var chrom = new Chromosome(chrtmp,paper,syzeChr[chr],centroPosd,centroPosf,chrTab,mapWidth,mapHeight); - //console.log("chr "+chr); - //console.log("ii"+chrom+" "+typeof(chrom)); - - //chrom.drawMapH(); - chrDict[chr] = chrom; - - //doDrawAllChr(); - //dodrawChr('chr1',1); - } - - - - function dodraww(){ - - - - - //var sel = document.getElementById('ChrList'); - var chrid = sel.options[sel.selectedIndex].value; - //var chrSize = syzeChr[chrid]; - dodrawChr(chrid,1); - - /*var conv=mapWidth/chrSize; - console.log(mapWidth+" "+chrSize+" "+ conv); - chrom = chrDict[sel.options[sel.selectedIndex].value]; - chrom.drawMapH(conv); - - var t = new Snap.Matrix(); - t.translate(50, 50); - //c.transform(t); - - - chrom.graphicChr.animate({ transform: t }, 1000, mina.bounce );*/ - - } - function dodrawChr(chrid,pos){ - - $('#ChrList').show(); - $('#HeatList').show(); - console.log(chrid+" "+mapWidth); - paper.clear(); - - var chrSize =syzeChr[chrid]; - console.log(syzeChr[chrid]); - var conv=mapWidth/chrSize; - - console.log(mapWidth+" "+chrSize+" "+ conv); - //chrom = chrDict[sel.options[sel.selectedIndex].value]; - chrom = chrDict[chrid]; - console.log(chrDict); - console.log(chrom); - - chrom.drawMapH(conv); - - var t = new Snap.Matrix(); - t.translate(50, 50); - //c.transform(t); - - - chrom.graphicChr.animate({ transform: t }, 1000, mina.bounce ); - if(pos !=-1){ - chrom.addZoom(pos); - } - } - function doDrawAllChr(){ - - var totalSize =3088286401; - var mapW =mapWidth-200; - var conv=mapW/totalSize; - - var xdecal =0; - for (var i=0; i<chrOrder.length; i++) { - var chrSize =syzeChr[chrOrder[i]]; - var mapChrWidth = mapW*chrSize/totalSize; - console.log(chrOrder[i]); - var chrom = chrDict[chrOrder[i]]; - chrom.arcP =1; - - var t = new Snap.Matrix(); - chrom.drawMapH(conv); - - console.log(mapW+" "+mapChrWidth+" " +xdecal); - t.translate(xdecal,50 ); - //c.transform(t); - chrom.graphicChr.animate({ transform: t }, 1000, mina.bounce ); - xdecal +=mapChrWidth+10; - - } - } - - - $(function(){ - // Variable to get ids for the checkboxes - var idCounter=1; - - $("#exportCsv").click(function(){ - var desiredLink = "/api/projects/"+idProject+"/genome_full"; - console.log("fullImg "+desiredLink); - window.open(desiredLink,'_blank'); - - }); - $("#fullImg").click(function(){ - var desiredLink = "/api/projects/"+idProject+"/globalmanhattan"; - console.log("fullImg "+desiredLink); - window.open(desiredLink,'_blank'); - - }); - $("#quadrantImg").click(function(){ - var desiredLink = "/api/projects/"+idProject+"/quadrant"; - console.log("quadrant "+desiredLink); - window.open(desiredLink,'_blank'); - - }); - $("#directLink").click(function(){ - console.log(PhenotypesDict); - var phe = ""; - for (var i=0; i<PhenotypesDict.length-1; i++) { - - //console.log(variable.id); - phe+=PhenotypesDict[i].id+","; - } - phe+=PhenotypesDict[PhenotypesDict.length-1].id; - console.log(phe); - var desiredLink = "http://jass.pasteur.fr/directLink.html?phenotypes="+phe; - console.log("quadrant "+desiredLink); - $("#directLinkText").val(desiredLink); - $('#directLinkText').show(); - //window.open(desiredLink,'_blank'); - - }); - - $("#exportGlobal").click(function(){ - - - sessionStorage.setItem("typeExport","globalStat"); - - - window.open('export.html','_blank'); - console.log("exportGlobal"); - }); - - $("#exportHeatmap").click(function(){ - - - sessionStorage.setItem("typeExport","heatmap"); - sessionStorage.setItem("selectedChr",selectedChr); - - window.open('export.html','_blank'); - console.log("exportHeatmap"); - }); - - $("#exportLocal").click(function(){ - - sessionStorage.setItem("selectedRegion",selectedRegion); - sessionStorage.setItem("selectedChr",selectedChr); - sessionStorage.setItem("typeExport","localStat"); - - var desiredText = "Export data"; - var desiredLink = "export.html"; - window.open('export.html','_blank'); - console.log("exportLocal"); - }); - - - }); - - - - - function afac(){ - - /* d3.queue() - .defer(d3.csv, "/data/cities.csv") - .defer(d3.tsv, "/data/animals.tsv") - .await(analyze); - - function analyze(error, cities, animals) { - if(error) { console.log(error); } - - console.log(cities[0]); - console.log(animals[0]); - }*/ - } - - - - function processShowGJASSPVAL(rows) { - console.log("processShowGJASSPVAL rows.length "+rows.length); - console.log(rows[0]); - - var columns =["CHR","JOSTmin","MiddlePosition","PVALmin","Region","signif_status","snp_ids"]; - - tabulate(rows,columns); - } - - function tabulate(data, columns) { - //console.log(data); - - //var table = document.createElement("table"); - var tablediv = $('#pheTable'); - //table.id="pheTable"; - var thead = document.createElement("thead"); - var tr = document.createElement("tr"); - for (var i=0; i<columns.length; i++) { - if ((columns[i] != "ID")&&(columns[i] != "linkRef")){ - var th = document.createElement("th"); - th.innerHTML =columns[i]; - tr.appendChild(th); - } - } - - thead.appendChild(tr); - tablediv.append(thead); - - var tbody = document.createElement("tbody"); - for (var i=0; i<data.length; i++) { - - var tr = document.createElement("tr"); - - for (var j=0; j<columns.length; j++) { - - var td = document.createElement("td"); - td.innerHTML=data[i][columns[j]]; - if(columns[j] == "JASS_PVAL"){ - var val= -Math.log10(data[i][columns[j]]); - td.innerHTML= val; - //idCounter ++; - tr.append(td); - } - - else { - td.innerHTML=data[i][columns[j]]; - tr.append(td); - } - - } - - tbody.appendChild(tr); - } - - - - tablediv.append(tbody); - - - - var table = $('#pheTable').dataTable( { - - aoColumns: [ { },{ },{ },{ },{ },{ },{ }], - dom: 'Bfrtip', - select: { - style: 'single' - }, - buttons: [ - 'copy', 'csv', 'pdf' - ] - - - } ); - console.log(table); - - - /*var filteredData = table.api() - .column( 5 ) - .data() - .filter( function ( value, index ) { - return value == "Joint" ? true : false; - } );*/ - table.api().column(5).search("Joint").draw(); - - $('#pheTable').on('click', 'tr', function () { - var datarow = table.api().row( this ).data(); - console.log( 'You clicked on '+datarow[4]+'\'s row' ); - - - selectedRegion = datarow[4]; - selectedChr = datarow[0]; - var pos = datarow[2]; - - dodrawChr(selectedChr,pos); - sel.value = selectedChr; - - var wholePlot = document.getElementById('divWholeStatPlot'); - - var dataWhole = wholePlot.data; - - var result - dataWhole.filter(function(d){ - console.log(d); - if (d.name = selectedChr){ - - d.x.forEach(function(valeur){ - //console.log(valeur); - if (valeur == selectedRegion){ - result = d; - //word => valeur == selectedRegion; - - } - - - }); - } - //console.log(d.x[0]); - - //console.log(d.chr) - //word => d.chr == selectedChr; - }); - console.log(result); - /* - dataWhole.map.forEach(function(valeur, clé) { - console.log(clé + " = " + valeur); - }); - var infotext = dataWhole.map(function(d){ - //console.log(d.pointNumber+' ' +d.data.text[d.pointNumber]);//text[d.pointNumber] - //data.points[0] =region - console.log(d); - return (d.data.name+': x= '+d.x+', y= '+d.y.toPrecision(3)+' pos:' +d); - }); - console.log(infotext);*/ - - selectRegion(selectedChr,selectedRegion); - - } ); - - /*table.on( 'select', function ( e, dt, type, indexes ) { - var rowData = table.rows( indexes ).data().toArray(); - events.prepend( '<div><b>'+type+' selection</b> - '+JSON.stringify( rowData )+'</div>' ); - - } );*/ - - } - - - </script> -</body> - -</html> - +<html> +<head> + + <link rel="stylesheet" href="//code.jquery.com/ui/1.12.1/themes/base/jquery-ui.css"> + <link rel="stylesheet" href="css/style.css"> + + + <style> + + #sortable1, #sortable2 { + border: 1px solid #eee; + width: 142px; + min-height: 20px; + list-style-type: none; + margin: 0; + padding: 5px 0 0 0; + float: left; + margin-right: 10px; + } + #sortable1 li, #sortable2 li { + margin: 0 5px 5px 5px; + padding: 5px; + font-size: 1.2em; + width: 120px; + } + .gtitle {font-size:20px!important;} + </style> + <!-- Plotly.js --> + + + <script src="js/plotly-latest.min.js"></script> <!--https://cdn.plot.ly/plotly-latest.min.js--> + <!--<script src="js/jquery.min.js"></script> http://ajax.googleapis.com/ajax/libs/ + + jquery/1.8.1/jquery.min.js--> + <script src="https://code.jquery.com/jquery-3.3.1.js"></script> + <script src="js/jquery-ui.js"></script> <!--https://code.jquery.com/ui/1.12.1/jquery-ui.js--> + + <script src="js/snap.svg-min.js"></script> + <script src="js/Chromosome.js"></script> + <script src="js/colorScale.js"></script> + <script src="js/jquery.blockUI.js"></script> + + + <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css"> + <link rel="stylesheet" href="https://cdn.datatables.net/1.10.19/css/dataTables.bootstrap.min.css"> + <link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/1.10.15/css/jquery.dataTables.min.css"> + <link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/select/1.3.0/css/select.dataTables.min.css"> + + + <script src="https://stackpath.bootstrapcdn.com/bootstrap/4.1.3/js/bootstrap.min.js" ></script> + + <!--<script src="https://code.jquery.com/jquery-3.3.1.slim.min.js" ></script>--> + <script type="text/javascript" language="javascript" src="https://cdn.datatables.net/1.10.19/js/jquery.dataTables.min.js"></script> + <script type="text/javascript" language="javascript" src="https://cdn.datatables.net/1.10.19/js/dataTables.bootstrap.min.js"></script> + <script type="text/javascript" language="javascript" src="https://cdn.datatables.net/select/1.3.0/js/dataTables.select.min.js"></script> + + <!-- datatables buttons pdf csv copy --> + <script src="https://cdnjs.cloudflare.com/ajax/libs/jszip/3.1.3/jszip.min.js"></script> + <script src="js/pdfmake.min.js"></script> + <script src="js/vfs_fonts.js"></script> + <script src="https://cdn.datatables.net/buttons/1.3.1/js/buttons.html5.min.js"></script> + <script src="https://cdn.datatables.net/buttons/1.3.1/js/buttons.print.min.js"></script> + <script src="https://cdn.datatables.net/buttons/1.3.1/js/dataTables.buttons.min.js"></script> + <link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/buttons/1.3.1/css/buttons.dataTables.min.css"> + <script src="https://cdn.datatables.net/buttons/1.3.1/js/buttons.flash.min.js"></script> + + <script src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-MML-AM_CHTML" type="text/javascript"></script> + + <!--<script defer language="javascript" src="js/create_genome_full_csv.js" type="text/javascript"> </script>--> + + + <style> + .header { + margin:0 0 30px 0; + padding: 5px 0 0 0 ; + height:50; + background-image: url('img/bkg_part2_wthLabel.jpg'); + background-color: transparent; + background-size:contain; + object-fit: contain; + background-repeat:no-repeat; + } + + #image-top {height:80!important} + + #exportCsv { + justify-content: center; + vertical-align:center; + background-image:none; + background-repeat: no-repeat; + background-position:center; + } + </style> +</head> + + +<body onload="init()" style="margin-left: 3em;"><!-- margin down--> + + <div class="header" > </div> + <div id="divWholeStatPlot" ><!-- Plotly chart will be drawn inside this DIV --> + </div> + + <center><p>Click on any point to zoom in and obtain the distribution of Zscore for single GWAS</p></center> + + <div id ="summaryStat" hidden="true"><button id="exportGlobal" >Export Region Results</button> + <button id="exportCsv">Export All SNP Results</button> + <button id="fullImg">Manhattan Plot</button> + <button id="quadrantImg">Quadrant plot</button> + <button id="directLink">Share Direct Link</button> + <button data-toggle="collapse" data-target="#collapseExample" aria-expanded="false" aria-controls="collapseExample">JASS only significant region(s)</button> + <textarea id= "directLinkText" hidden="true"></textarea> + </div> + + <div class="collapse" id="collapseExample"> + <div id='divContainer' width ="400px"> + <table id ="pheTable" style="width :500px" class="display dataTable"></table> + </div> + </div> + + <svg id="svg" width="100%" height="150" ></svg> <!-- style="background-color:powderblue;" --> + <select id="ChrList" onChange="dodraww()" hidden="true"></select> + <div id="divHeatPlot" ><!-- Plotly chart will be drawn inside this DIV --></div> + <select id="HeatList" onChange="redrawHeatmap()" hidden="true"></select> + <div id="exportHeatPlot" hidden="true"><button id="exportHeatmap" >Export</button></div> + <div id="divLocalStatPlot" ><!-- Plotly chart will be drawn inside this DIV --></div> + <div id="exportLocalStatPlot" hidden="true" ><button id="exportLocal" >Export SNPs From The Region</button></div> + + <script> + + // variables non utilisées + var annotations = []; + + + var heatmapPhenotype; + var plotSize; + var plotHeatMapSize; + var mapWidth; + var mapHeight =40; + var paper; + var phenotypesCount; + + var default_color = "red"; + var couleur = ['red', 'blue']; + + var syzeChr = { "chr1" : 248956422, "chr2" : 242193529, + "chr3" : 198295559, "chr4" : 190214555, + "chr5" : 181538259, "chr6" : 170805979, + "chr7" : 159345973, "chr8" : 145138636, + "chr9" : 138394717, "chr10" : 133797422, + "chr11" : 135086622, "chr12" : 133275309, + "chr13" : 114364328, "chr14" : 107043718, + "chr15" : 101991189, "chr16" : 90338345, + "chr17" : 83257441, "chr18" : 80373285, + "chr19" : 58617616, "chr20" : 64444167, + "chr21" : 46709983, "chr22" : 50818468, + "chrX" : 156040895, "chrY" : 57227415}; + + var chrOrder = ["chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", + "chr10", "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18", "chr19", + "chr20", "chr21", "chr22", "chrX", "chrY"]; + + var chromosomes = [ "chr1","chr2","chr3","chr4","chr5","chr6","chr7","chr8","chr9", + "chr10","chr11","chr12","chr13","chr14","chr15","chr16","chr17","chr18","chr19", + "chr20","chr21","chr22"]; + + var chrDict; + var sel; // hash index-> chromosomeId + + // idProjet <- list of phenotypes + var idProject; + var selectedRegion; + var selectedChr; + // PhenotypesDict <- Json object list of Phenotypes object + var PhenotypesDict; + + var table_lines = new Array(); + + + function init(){ + + console.log("Entrée dans la fonction init() " ); //__Modif__CN__ + + idProject = sessionStorage.getItem("id"); + console.log(idProject); + var phe = sessionStorage.getItem("phenotypes"); + + PhenotypesDict = JSON.parse(phe); + phenotypesCount =PhenotypesDict.length; + + console.log("phe =" + phe); + console.log(PhenotypesDict+" "+phenotypesCount); + console.log("width "+$(window).width()); + + plotSize = $(window).width(); + plotHeatMapSize = $(window).width()-50; // !!!!!!!! A modifier si changement de disposition des plots heatmap et stat jointe locale afin qu'ils restent caler en abscisse + mapWidth = plotHeatMapSize/2; + var chrsize; + + paper = Snap("#svg"); + + chrDict = new Array(); + + var chromosomes =[]; + + sel = document.getElementById('ChrList'); + var fragment = document.createDocumentFragment(); + + + chrOrder.forEach(function(chr, index) { + var opt = document.createElement('option'); + opt.innerHTML = chr; + opt.value = chr; + fragment.appendChild(opt); + }); + + sel.appendChild(fragment); + + var heatColors = document.getElementById('HeatList'); + var fragmentHeat = document.createDocumentFragment(); + var colors = [ "Picnic","Basic","Custom1","Custom2","Blues","Blackbody","Bluered", + "Earth","Electric","Greens","Greys","Hot","Jet","Labelled","Portland", + "Reds","Viridis","YlOrRd","YlGnBu"]; + + + colors.forEach(function(colorsHeatScale, index) { + var opt = document.createElement('option'); + opt.innerHTML = colorsHeatScale; + opt.value = colorsHeatScale; + fragmentHeat.appendChild(opt); + }); + + heatColors.appendChild(fragmentHeat); + + + makeplot(); + + // management of the exportCsv button: + // if the genome_full.csv file does not exist yet, + // it is generated asynchronously and the button is set insensitive + var exportCsvButton = document.getElementById('exportCsv'); + var styleCsvButton = window.getComputedStyle(exportCsvButton,null); + var theCSSbackgd = styleCsvButton.getPropertyValue("background-color"); + var theCSScolTxt = styleCsvButton.getPropertyValue("color"); + var Init_button = true; + var getCsvFileStatus = function(){ + $.get("/api/projects/" + idProject + "/csv_status", function( data ) { + var csv_file_gen_status = data; + + if(csv_file_gen_status != "READY"){ + if (Init_button == true) { + Init_button = false; + // Deactivate the button and update the feedback color. + exportCsvButton.style.backgroundImage="url(./img/busy.gif)"; + exportCsvButton.disabled = true; + exportCsvButton.style.backgroundColor = "rgb(200,200,200)"; + exportCsvButton.style.color = "rgb(255,255,255)"; + } + setTimeout(getCsvFileStatus, 10000); + } + else if (Init_button == false) { + // Reactivate the button. + exportCsvButton.style.backgroundImage="none"; + exportCsvButton.disabled = false; + // Reset the feedback color on the button + exportCsvButton.style.backgroundColor = theCSSbackgd; + exportCsvButton.style.color = theCSScolTxt; + exportCsvButton.onmouseover = function(){ + this.style.backgroundColor = "rgb(59,127,224)"; + this.style.color = "rgb(255,255,255)"; + }; + exportCsvButton.onmouseout = function(){ + this.style.backgroundColor = theCSSbackgd; + this.style.color = theCSScolTxt; + }; + } + }); + }; + getCsvFileStatus(); + console.log("Sortie de la fonction init() " ); //__Modif__CN__ + } + +// ------------------------------------------------------------------------------------------------------------------------ + + function makeplot() { + + console.log("Entrée dans la fonction makeplot() " ); //__Modif__CN__ + + Plotly.d3.tsv("data/cytoBand.txt", + function(data){ processDataBand(data) } ); + + console.log("/api/projects/" + idProject + "/genome"); + + Plotly.d3.csv("/api/projects/" + idProject + "/genome", + function(error, data){ + if (error) { + $('#divWholeStatPlot').html('<p>An error has occurred</p>'); + return console.warn(error) + } + else{ + processDataJOSTmin(data) + } + } + ); + + console.log("Sortie de la fonction makeplot() " ); //__Modif__CN__ + + }; + +// --------------------------------------------------------------------------------------------------------------------------- + + function unpack(rows, key) { + return rows.map(function(row) { return row[key]; }); + } + +// --------------------------------------------------------------------------------------------------------------------------- + + function unpackLog(rows, key) { + return rows.map(function(row) { return -Math.log10(row[key]); }); + } + +// --------------------------------------------------------------------------------------------------------------------------- + + function processDataJOSTmin(rows) { + + console.log("Entrée dans la fonction processDataJOSTmin(rows) " ); //__Modif__CN__ + + $('#summaryStat').show(); + + + + console.log(rows); + + + var data = chromosomes.map(function(chr) { + + var rowsFiltered = rows.filter(function(row) { + return (row.CHR === chr); + }); + + return { + name: chr, + x: unpack(rowsFiltered, 'Region'), + y: unpackLog(rowsFiltered, 'JOSTmin'), + text: unpack(rowsFiltered, 'MiddlePosition'), + mode: 'markers', + showlegend : false, + type:'scatter', + marker: { + color: default_color + } + } + + }); + + + // fill the region table + processShowGJASSPVAL(rows); + + // Debut modif__CN__ + var marker_offset = 0.04; + + // ================================================================ + + function offset_signal(signal, marker_offset) { + + var result = 0; + if (Math.abs(signal) > marker_offset) { + if (signal > 0) { + result = signal - marker_offset; + } + else { + result = signal + marker_offset; + } + } + return result; + } + + // ================================================================ + + var k = 1; + + for (i = 0; i < data.length; i++) { + + k = 1-k; + data[i].marker.color = couleur[k]; + + for (j=0; j<data[i].x.length; j++){ + + var dict = { + "type": 'line', + "xref": 'x', + "yref": 'y', + "x0": data[i].x[j], + "y0": 0, + "x1": data[i].x[j], + "y1": offset_signal( data[i].y[j], marker_offset), + "line": { + color: data[i].marker.color, + width: 0.5 + } + }; + + table_lines.push(dict); + } + } + + var update = { + shapes: table_lines + }; + + var layout = { + title: "Joint test association results by region", + width: plotHeatMapSize, + hovermode:'closest', + showLink: false, + modeBarButtonsToRemove: [], + font:{ + size:8 + }, + margin: { + l: 80, + r: 50, + b: 60, + t: 30, + pad: 4 + }, + xaxis: { + showgrid : false, + showticklabels :true, + tickmode: "array", + tickvals: ['Region67', 'Region206', 'Region339', 'Region461', + 'Region577', 'Region688', 'Region793', 'Region890', + 'Region974', 'Region1053', 'Region1138', 'Region1221', + 'Region1293', 'Region1352', 'Region1405', 'Region1457', + 'Region1507', 'Region1555', 'Region1598', 'Region1637', + 'Region1668', 'Region1692'], + ticktext: ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', + '12', '13', '14', '15', '16', '17', '18', '19', '20', + '21', '22'], + tickfont: { + family: 'Arial, sans-serif', + size: 12, + color: 'black' + }, + title: "chromosome", + side: 'bottom', + type: 'category', + range : [0,'1704'] + }, + yaxis: { + title: "-log(Pvalue)", + type: 'log', + autorange: true, + tickfont: { + family: 'Arial, sans-serif', + size: 10, + color: 'black' + } + }, + }; + + Plotly.setPlotConfig({ + modeBarButtonsToRemove: [ + 'zoom2d', + 'pan2d', + 'select2d', + 'lasso2d', + 'zoomIn2d', + 'zoomOut2d', + 'autoScale2d', + 'resetScale2d', + 'hoverClosestCartesian', + 'hoverCompareCartesian' + ], + displaylogo: false + }); + + Plotly.plot('divWholeStatPlot', data, layout); + + var suggestiveline = -Math.log10(1e-5); + var suggestiveline_color = "orange"; + var suggestiveline_width = 1; + var genomewideline = -Math.log10(5e-8); + var genomewideline_color = "green"; + + var datmp = data[0]; + var tbx = datmp.x; + var d = tbx[0]; + + var datmp2 = data[data.length-1]; + var tbx2 = datmp2.x + var e = tbx2[tbx2.length-1]; + + console.log("datmp " +datmp); + console.log("d "+d +" e "+e); + console.log(tbx2); + + var trace1 = { + x: [d, e], + y: [suggestiveline, suggestiveline], + mode: 'lines+markers+text', + name: '1e-5', + textposition: 'top', + type: 'scatter', + showlegend: true, + marker: { + color: suggestiveline_color + } + }; + + var trace2 = { + x: [d, e], + y: [genomewideline, genomewideline], + mode: 'lines+markers+text', + name: '5e-8', + textposition: 'top', + type: 'scatter', + showlegend: true, + marker: { + color: genomewideline_color + } + }; + + console.log("trace2.marker.color " + trace2.marker.color); // __Modif__CN__ + + Plotly.addTraces('divWholeStatPlot', trace1); + Plotly.addTraces('divWholeStatPlot', trace2); + + // Apply the new layout to the plot + Plotly.relayout('divWholeStatPlot',update); + +// ================================================================ + + divWholeStatPlot.on('plotly_click', function(data){ + + console.log("entrée dans la fonction plotly_click de data " ); //__Modif__CN__ + + // ================================================================ + + var infotext = data.points.map(function(d){ + return (d.data.name+': x= '+d.x+', y= '+d.y.toPrecision(3)+' pos:' +d); + }); + + // ================================================================ + + console.log("infotext " + infotext); //__Modif__CN__ + + console.log("data.points[0].x " + data.points[0].x); + console.log("data.points[0] " + data.points[0]); + console.log("data.points[0].text " + data.points[0].text); + + // ================================================================ + + data.points.map(function(d){ + selectedChr =d.data.name; + console.log("!!! "+d.data.text[d.pointNumber]); + dodrawChr(d.data.name, d.data.text[d.pointNumber]); + sel.value = d.data.name; + }); + + // ================================================================ + + selectedRegion = data.points[0].x; + + console.log("selectedRegion " + selectedRegion); //__Modif__CN__ + + var updaterange = data.points[0].x.split("Region"); + + console.log("updaterange " + updaterange); //__Modif__CN__ + + console.log(updaterange[1]); + // region 840 -> 850 + var tmp= Number(updaterange[1])+10; + + console.log("tmp " + tmp); //__Modif__CN__ + + updaterange = "Region"+tmp; + console.log("updaterange "+updaterange); + + var select_rectangle = { + "type": 'rect', + // x-reference is assigned to the x-values + "xref": 'x', + // y-reference is assigned to the plot paper [0,1] + "yref": 'paper', + "x0": data.points[0].x, + "y0": 0, + "x1": updaterange, + "y1": 1, + "fillcolor": '#d3d3d3', + "opacity": 0.3, + "line": { + width: 0 + } + }; + console.log("dernier élément table_lines.type : " + table_lines[table_lines.length - 1].type); + if (table_lines[table_lines.length - 1].type == 'rect') { + console.log("ca marche"); + table_lines.pop(); + } + table_lines.push(select_rectangle); + + var update = { + shapes: table_lines + }; + + console.log("update qui ne fonctionne pas " + update); //__Modif__CN__ + + // Apply the new layout to the plot + Plotly.relayout(divWholeStatPlot,update); + + selectRegion(selectedChr,selectedRegion); + + console.log("Sortie de la fonction plotly_click de data " ); //__Modif__CN__ + + }); + +// ================================================================ + + console.log("Sortie de la fonction processDataJOSTmin(rows) " ); //__Modif__CN__ + + } + +// --------------------------------------------------------------------------------------------------------------------------- + + function selectRegion(chromosome, region){ + + console.log("Entrée dans la fonction selectRegion(chromosome, region) " ); //__Modif__CN__ + + // empty the local heatmap and local manhattan plot + Plotly.purge(divLocalStatPlot); + Plotly.purge(divHeatPlot); + + selectedRegion = region; + + console.log("selectedRegion "+ selectedRegion); //__Modif__CN__ + + selectedChr = chromosome; + + console.log("selectedChr "+ selectedChr); //__Modif__CN__ + // Block the GUI while waiting for the update + $.blockUI({ + css: { + border: 'none', + padding: '15px', + backgroundColor: '#000', + '-webkit-border-radius': '10px', + '-moz-border-radius': '10px', + opacity: .5, + color: '#fff' + } + }); + + console.log("/api/projects/"+idProject+"/manhattan/"+selectedChr+"/"+selectedRegion); + Plotly.d3.csv("/api/projects/" + idProject + "/manhattan/" + selectedChr + "/"+selectedRegion, + function(data){ processManhattanJASSPVAL(data) } ); + + console.log("Sortie de la fonction selectRegion(chrosome, region) " ); //__Modif__CN__ + + } + +// --------------------------------------------------------------------------------------------------------------------------- + + function processManhattanJASSPVAL(rows) { + + console.log("Entrée dans la fonction processManhattanJASSPVAL(rows) " ); //__Modif__CN__ + + // call heatmap when local manhattan is loaded + $('#exportLocalStatPlot').show(); + console.log("rows.length "+rows.length); + console.log("/api/projects/"+idProject+"/heatmap/"+selectedChr+"/"+selectedRegion); + Plotly.d3.csv("/api/projects/" + idProject + "/heatmap/" + selectedChr + "/" + selectedRegion, + function(data){ processData(data,"Heatmap") } ); + + var regions = []; + for (var i = 0; i < 2000; i++) { + regions[i] = "Region" + i; + } + + // create link to data table page to export zone of interrest + + sessionStorage.setItem("selectedRegion", selectedRegion); + sessionStorage.setItem("selectedChr", selectedChr); + + var desiredText = "Export data"; + + console.log("desiredText " + desiredText); //__Modif__CN__ + + var desiredLink = "export.html"; + + console.log("desiredLink " + desiredLink); //__Modif__CN__ + + console.log("selectedChr " + selectedChr ); //__Modif__CN__ + + var string_number_chr = selectedChr.slice(3, selectedChr.length); //__Modif__CN__ + console.log("string_number_chr "+ string_number_chr); //__Modif__CN__ + var number_chr = parseInt(string_number_chr, 10); + console.log("number_chr "+ number_chr); //__Modif__CN__ + var number_color = (number_chr + 1) % 2; + console.log("number_color "+ number_color); //__Modif__CN__ + + // ================================================================ + + var data = regions.map(function(region) { + + // ================================================================ + + var rowsFiltered = rows.filter(function(row) { + return (row.Region === region); + }); + + // ================================================================ + + + return { + name: region, + x: unpack(rowsFiltered, 'snp_ids'), + y: unpackLog(rowsFiltered, 'JASS_PVAL'), + text: unpack(rowsFiltered, 'Region'), + mode: 'markers', + type:'scatter', + marker: { + color: couleur[number_color] + } + } + //__Modif__CN__ + }); + + // ================================================================ + + var titleplot = "Joint test association results for locus " + + selectedRegion + " on " + selectedChr; + + console.log("titleplot " + titleplot); //__Modif__CN__ + + var layout = { + title: titleplot, + width: plotSize, + hovermode: 'closest', + font:{ + size:8 + }, + margin: { + l: 90, + r: 50, + b: 10, + t: 100, + pad: 4 + }, + xaxis: { + showticklabels :false, + ticks: '', + side: 'top', + type: 'category', + range : [-0.5,rows.length] + }, + yaxis: { + title: "-log(Pvalue)", + ticks: '', + ticksuffix: ' ', + fixedrange:true, + }, + }; + + console.log("layout " + layout); //__Modif__CN__ + + Plotly.plot('divLocalStatPlot', data, layout); + + var suggestiveline = -Math.log10(1e-5); + var suggestiveline_color = "orange"; + var suggestiveline_width = 1; + + var genomewideline = -Math.log10(5e-8); + var genomewideline_color = "green"; + + var datmp = rows[0]; + var d = datmp.snp_ids; + var datmp2 = rows[rows.length-1]; + var e= datmp2.snp_ids; + + console.log("d "+d +" e "+e); + + var trace1 = + { + x: [d,e], + y: [suggestiveline, suggestiveline], + mode: 'lines+markers+text', + name: '1e-5', + textposition: 'top', + type: 'scatter', + showlegend: true, + marker: { + color: suggestiveline_color + } + }; + + console.log("trace1 " + trace1); //__Modif__CN__ + + var trace2 = + { + x: [d, e], + y: [genomewideline, genomewideline], + mode: 'lines+markers+text', + name: '5e-8', + textposition: 'top', + type: 'scatter', + showlegend: true, + marker: { + color: genomewideline_color + } + }; + + console.log("trace2 " + trace2); //__Modif__CN__ + + Plotly.addTraces('divLocalStatPlot', trace1); + Plotly.addTraces('divLocalStatPlot', trace2); + Plotly.setPlotConfig({ + modeBarButtonsToRemove: ['zoom2d', + 'pan2d', + 'select2d', + 'lasso2d', + 'zoomIn2d', + 'zoomOut2d', + 'autoScale2d', + 'resetScale2d', + 'hoverClosestCartesian', + 'hoverCompareCartesian'], + displaylogo: false + }); + + // ================================================================================ + divLocalStatPlot.on('plotly_relayout', + function(eventdata){ + var update = { + 'xaxis.range': [eventdata['xaxis.range[0]'], + eventdata['xaxis.range[1]']] + } + // Apply the new layout to the plot + Plotly.relayout('divHeatPlot', update); + }); + + console.log("Sortie de la fonction processManhattanJASSPVAL(rows) " ); //__Modif__CN__ + + } + +// --------------------------------------------------------------------------------------------------------------------------- + + function processData(allRows, plotType) { + + console.log("Entrée dans la fonction processData(allRows, plotType) " ); //__Modif__CN__ + + $.unblockUI(); + + console.log("type of Plot "+ plotType); + + var x = [], y = []; + var cols =[]; + var lines = []; + + for(var valeur in allRows[0]) { + if ((valeur != "")&&(valeur!='ID')){ + cols.push(valeur); + } + } + + var arr = []; + var row = allRows[0]; + + console.log(allRows[0]); + + for (var i=0; i<allRows.length; i++) { + row = allRows[i]; + // Creates an empty line + arr.push([]); + if(plotType == "Heatmap"){ + lines[i] = row['ID']; + } + // Adds cols to the empty line: + arr[i].push( new Array(cols)); + for(var j=0; j < cols.length; j++){ + if(plotType == "Heatmap"){ + arr[i][j] = row[cols[j]]; + } + else{ + if(row[cols[j]] == 0){ + console.log("ZEROOOOOOO !") + arr[i][j] = 32; + } + else{ + arr[i][j] = -Math.log10(row[cols[j]]); + } + } + } + } + + console.log(cols); + + console.log(lines); + + var customDict = {'Custom1':'','Custom2':''}; + var heatL = document.getElementById('HeatList'); + var colorsc = heatL.options[heatL.selectedIndex].value; + if (colorsc in customDict){ + colorsc = colorScale[colorsc]; + } + + console.log(cols[0]); + + console.log(lines[0]); + + var data = [ + { + z: arr, + x: cols, + y: lines, + type: 'heatmap', + colorscale :colorsc, + transforms: [{ + type: 'filter', + target: 'y', + operation: '!=', + value: 'PVALmin' + }] + } + ]; + + var res = -Math.log10(1e-5); + var res2 = -Math.log10(5e-8); + + console.log("!!"+res); + + console.log("!!"+res2); + + if (plotType == "Heatmap"){ + makeHeatmap(data); + } + else if (plotType == "Manhattan"){ + var d =cols[0]; + var e = cols[cols.length-1]; + + console.log("col "+cols[1]+" "+cols.length); + + console.log(arr[0]) + + makeManhattanly( cols, arr[0] ,d, e); + } + + console.log("Sortie de la fonction processData(allRows, plotType) " ); //__Modif__CN__ + + } + +// --------------------------------------------------------------------------------------------------------------------------- + + function makeHeatmap(data ){ + + console.log("Entrée dans la fonction makeHeatmap(data) " ); //__Modif__CN__ + + console.log(data); + + var heatmapHeight; + if(phenotypesCount < 10){ + heatmapHeight = phenotypesCount * 50; + } + else{ + heatmapHeight = phenotypesCount * 30; + } + var sizeLetterSnps = 65; + var borderHeatmap = 70; + heatmapHeight = phenotypesCount * 50 + (phenotypesCount*2 +sizeLetterSnps+borderHeatmap); + + var layout = { + title : "Z score Heatmap", + autosize: false, + height: heatmapHeight, + width: plotHeatMapSize, + font:{ + size:8 + }, + margin: { + l: 90, + r: 50, + b: 10, + t: 30, + pad: 4 + }, + xaxis: { + showticklabels :true, + ticks: '', + side: 'bottom', + fixedrange:true, + type: 'category' + , + rangeslider: { + thickness :0.07, + } + }, + yaxis: { + ticks: '', + fixedrange:true, + ticksuffix: ' ' + } + }; + + Plotly.newPlot('divHeatPlot', data, layout); + + // ============================================================================================== + divHeatPlot.on('plotly_relayout', + function(eventdata){ + console.log( 'ZOOM!' + '\n\n' + + 'Event data:' + '\n' + + JSON.stringify(eventdata) + '\n\n' + + 'x-axis start:' + eventdata['xaxis.range[0]'] + '\n' + + 'x-axis end:' + eventdata['xaxis.range[1]'] ); + + var update = { + 'xaxis.range': [eventdata['xaxis.range[0]'], + eventdata['xaxis.range[1]']], + } + } ); + heatmapPhenotype = divHeatPlot; + + // ============================================================================================== + + console.log("Sortie de la fonction makeHeatmap(data) " ); //__Modif__CN__ + + } + +// --------------------------------------------------------------------------------------------------------------------------- + + //Change the heatmap color scale + function redrawHeatmap(){ + + console.log("Entrée dans la fonction redrawHeatmap() " ); //__Modif__CN__ + + //redraw the heatmap with new colorscale + + console.log(heatmapPhenotype); + + var heatL = document.getElementById('HeatList'); + var customDict = {'Custom1':'','Custom2':''}; + var colorsc = heatL.options[heatL.selectedIndex].value; + if (colorsc in customDict){ + colorsc = colorScale[colorsc]; + } + var data_update = { + colorscale : colorsc + }; + + console.log(data_update); + + Plotly.restyle(heatmapPhenotype, data_update , 0); + + console.log("Sortie de la fonction redrawHeatmap() " ); //__Modif__CN__ + + } + +// --------------------------------------------------------------------------------------------------------------------------- + + function makeManhattanly( x, y ,d, e){ + /* + Create a "zoom" plot for the region of the selected point + on the main plot. + + x: x-coordinate of the point selected by the user + y: y-coordinate of the point selected by the user + d: first position of the selected region + e: last position of the selected region + */ + + console.log("Entrée dans la fonction makeManhattanly( x, y d, e) " ); //__Modif__CN__ + + var layout = { + title: "Joint test association results for locus Region474 on chr4", + width: plotSize, + margin: { + l: 90, + r: 50, + b: 10, + t: 100, + pad: 4 + }, + xaxis: { + showticklabels :true, + ticks: '', + side: 'top', + type: 'category', + textposition: 'down', + range : [0,'10000'] + }, + yaxis: { + title: "-log(Pvalue)", + autosize: true, + ticks: '', + textposition: 'down', + fixedrange:true, + } + }; + + var suggestiveline = -Math.log10(1e-5); + var suggestiveline_color = "orange"; + var suggestiveline_width = 1; + var genomewideline = -Math.log10(5e-8); + var genomewideline_color = "green"; + + var trace1 = { + x: [d,e], + y: [suggestiveline, suggestiveline], + mode: 'lines+markers+text', + name: '1e-5', + textposition: 'top', + type: 'scatter', + marker: { + color: suggestiveline_color + } + }; + + var trace2 = { + x: [d, e], + y: [genomewideline, genomewideline], + mode: 'lines+markers+text', + name: '5e-8', + textposition: 'top', + type: 'scatter', + marker: { + color: genomewideline_color + } + }; + + var traces = [ + { + x: x, + y: y, + mode: 'markers', + type:'scatter', + name: '-log(Pvalue)' + }, + trace1, + trace2 + ]; + + Plotly.newPlot('divLocalStatPlot', traces, layout); + + divLocalStatPlot.on('plotly_relayout', + function(eventdata){ + /* + Update the zoom plot with the range chosen by the user + eventdata: new limits of the interval + */ + var update = { + 'xaxis.range': [eventdata['xaxis.range[0]'], + eventdata['xaxis.range[1]']], + } + // Apply the new layout to the plot + Plotly.relayout('divHeatPlot', update); + }); + + console.log("Sortie de la fonction makeManhattanly( x, y d, e) " ); //__Modif__CN__ + + }; + +// --------------------------------------------------------------------------------------------------------------------------- + + function processDataBand(allRows) { + /* + Groups the data by chromosomes to draw the main plot + and to draw the chromosome selected by the user + allrows: the rows of the data + */ + console.log("Entrée dans la fonction processDataBand(allRows) " ); //__Modif__CN__ + + var chr =""; + var chrTab =[]; + var chrtmp=""; + var centroPosd=""; + var centroPosf=""; + + for (var i = 1; i <allRows.length; i++) { + row = allRows[i]; + chr = row['chr']; + if (( chrtmp == "" )||( chr ==chrtmp)){ + chrTab.push(row); + if(row['id'] == "acen"){ + if(centroPosd ==""){ + centroPosd = row['inf']; + } + else{ + centroPosf = row['sup']; + } + } + } + else{ + var chrom = new Chromosome( chrtmp, + paper, + syzeChr[chrtmp], + centroPosd, + centroPosf, + chrTab, + mapWidth, + mapHeight); + chrDict[chrtmp + ""] = chrom; + chrTab = []; + chrTab.push(row); + centroPosd =""; + centroPosf =""; + } + chrtmp = row['chr']; + } + + var chrom = new Chromosome( chrtmp, + paper, + syzeChr[chr], + centroPosd, + centroPosf, + chrTab, + mapWidth, + mapHeight); + chrDict[chr] = chrom; + + console.log("Sortie de la fonction processDataBand(allRows) " ); //__Modif__CN__ + + } + +// --------------------------------------------------------------------------------------------------------------------------- + + function dodraww(){ + /* + Callback running on change of selected chromosome to redraw chromosome + */ + console.log(" Entrée dans la fonction dodraww() " ); //__Modif__CN__ + + var chrid = sel.options[sel.selectedIndex].value; + dodrawChr(chrid,1); + + console.log("Sortie de la fonction dodraww() " ); //__Modif__CN__ + + } + +// --------------------------------------------------------------------------------------------------------------------------- + + function dodrawChr(chrid,pos){ + /* + function that draws the chromosome selected by the user + chrid: identifier of the chromosome + pos: position of the region selected on the chromosome + */ + console.log("Entrée dans la fonction dodrawChr(chrid, pos) " ); //__Modif__CN__ + + $('#ChrList').show(); + $('#HeatList').show(); + console.log(chrid+" "+mapWidth); + paper.clear(); + var chrSize = syzeChr[chrid]; + + console.log(syzeChr[chrid]); + + var conv = mapWidth/chrSize; + + console.log(mapWidth+" "+chrSize+" "+ conv); + + chrom = chrDict[chrid]; + + console.log(chrDict); + console.log(chrom); + + chrom.drawMapH(conv); + var t = new Snap.Matrix(); + + console.log("t "+ t ); //__Modif__CN__ + + t.translate(50, 50); + + console.log("t "+ t ); //__Modif__CN__ + + // la ligne de code suivante qui permet de modifier la région + // est mise provisoirement en commentaire + //chrom.graphicChr.animate({ transform: t }, 1000, mina.bounce ); //__Modif__CN__ + if(pos !=-1){ + chrom.addZoom(pos); + } + + console.log("Sortie de la fonction dodrawChr(chrid, pos) " ); //__Modif__CN__ + + } + +// --------------------------------------------------------------------------------------------------------------------------- + + function doDrawAllChr(){ + + console.log("Entrée dans la fonction doDrawAllChr() " ); //__Modif__CN__ + + var totalSize = 3088286401; + var mapW = mapWidth - 200; + var conv = mapW / totalSize; + var xdecal = 0; + + for (var i=0; i <chrOrder.length; i++) { + var chrSize = syzeChr[chrOrder[i]]; + var mapChrWidth = mapW * chrSize / totalSize; + + console.log(chrOrder[i]); + + var chrom = chrDict[chrOrder[i]]; + chrom.arcP = 1; + var t = new Snap.Matrix(); + chrom.drawMapH(conv); + + console.log(mapW + " " + mapChrWidth + " " + xdecal); + + t.translate(xdecal,50 ); + chrom.graphicChr.animate({ transform: t }, 1000, mina.bounce ); + xdecal += mapChrWidth + 10; + } + + console.log("Sortie de la fonction doDrawAllChr() " ); //__Modif__CN__ + + } + +// --------------------------------------------------------------------------------------------------------------------------- + + $(function(){ + // Variable to get ids for the checkboxes + var idCounter = 1; + + // ===================================================================================== + + $("#exportCsv").click(function(){ + var desiredLink = "/api/projects/" + idProject + "/genome_full"; + + console.log("fullImg "+desiredLink); + + window.open(desiredLink,'_blank'); + }); + + // ===================================================================================== + + $("#fullImg").click(function(){ + var desiredLink = "/api/projects/"+idProject+"/globalmanhattan"; + + console.log("fullImg "+desiredLink); + + window.open(desiredLink,'_blank'); + }); + + // ===================================================================================== + + $("#quadrantImg").click(function(){ + var desiredLink = "/api/projects/"+idProject+"/quadrant"; + + console.log("quadrant "+desiredLink); + + window.open(desiredLink,'_blank'); + }); + + // ========================================================================================= + + $("#directLink").click(function(){ + + console.log(PhenotypesDict); + + var phe = ""; + for (var i = 0; i < PhenotypesDict.length-1; i++) { + phe += PhenotypesDict[i].id + ","; + } + phe += PhenotypesDict[PhenotypesDict.length-1].id; + + console.log(phe); + + var url_jass = window.location.protocol + "//" + window.location.host; + var desiredLink = url_jass + "/directLink.html?phenotypes=" + phe; + console.log("directLink " + desiredLink); + + $("#directLinkText").val(desiredLink); + $('#directLinkText').show(); + }); + + // ========================================================================================= + + $("#exportGlobal").click(function(){ + sessionStorage.setItem("typeExport","globalStat"); + window.open('export.html','_blank'); + + console.log("exportGlobal"); + + }); + + // ====================================================================================== + + $("#exportHeatmap").click(function(){ + sessionStorage.setItem("typeExport","heatmap"); + sessionStorage.setItem("selectedChr",selectedChr); + window.open('export.html','_blank'); + + console.log("exportHeatmap"); + + }); + + // ====================================================================================== + + $("#exportLocal").click(function(){ + sessionStorage.setItem("selectedRegion",selectedRegion); + sessionStorage.setItem("selectedChr",selectedChr); + sessionStorage.setItem("typeExport","localStat"); + var desiredText = "Export data"; + var desiredLink = "export.html"; + window.open('export.html','_blank'); + + console.log("exportLocal"); + + }); + + // ====================================================================================== + + }); + +// --------------------------------------------------------------------------------------------------------------------------- + + function processShowGJASSPVAL(rows) { + + console.log(" Entrée dans la fonction processShowGJASSPVAL(rows) " ); //__Modif__CN__ + + console.log("processShowGJASSPVAL rows.length "+rows.length); + + console.log(rows[0]); + + var columns = [ "CHR", + "JOSTmin", + "MiddlePosition", + "PVALmin", + "Region", + "signif_status", + "snp_ids"]; + tabulate(rows,columns); + + console.log("Sortie de la fonction processShowGJASSPVAL(rows) " ); //__Modif__CN__ + + } + +// --------------------------------------------------------------------------------------------------------------------------- + + function tabulate(data, columns) { + + console.log("Entrée dans la fonction tabulate(data, columns ) " ); //__Modif__CN__ + + var tablediv = $('#pheTable'); + var thead = document.createElement("thead"); + var tr = document.createElement("tr"); + for (var i=0; i<columns.length; i++) { + if ((columns[i] != "ID")&&(columns[i] != "linkRef")){ + var th = document.createElement("th"); + th.innerHTML =columns[i]; + tr.appendChild(th); + } + } + thead.appendChild(tr); + tablediv.append(thead); + var tbody = document.createElement("tbody"); + for (var i=0; i<data.length; i++) { + var tr = document.createElement("tr"); + for (var j = 0; j < columns.length; j++) { + var td = document.createElement("td"); + td.innerHTML=data[i][columns[j]]; + if(columns[j] == "JASS_PVAL"){ + var val= -Math.log10(data[i][columns[j]]); + td.innerHTML= val; + tr.append(td); + } + else { + td.innerHTML=data[i][columns[j]]; + tr.append(td); + } + } + tbody.appendChild(tr); + } + tablediv.append(tbody); + var table = $('#pheTable').dataTable( { + aoColumns: [ { },{ },{ },{ },{ },{ },{ }], + dom: 'Bfrtip', + select: { + style: 'single' + }, + buttons: [ + { + extend: 'csv', + text : 'Export to CSV', + + filename: function(){ + var d = new Date(); + var n = d.getTime(); + return 'Data_Table_significant_regions'; + }, + } + ] + } ); + + console.log(table); + + table.api().column(5).search("Joint").draw(); + + // =========================================================================================== + $('#pheTable').on('click', 'tr', function () { + var datarow = table.api().row( this ).data(); + + console.log( 'You clicked on '+datarow[4]+'\'s row' ); + + selectedRegion = datarow[4]; + selectedChr = datarow[0]; + var pos = datarow[2]; + dodrawChr(selectedChr,pos); + sel.value = selectedChr; + var wholePlot = document.getElementById('divWholeStatPlot'); + var dataWhole = wholePlot.data; + var result + + // ============================================================================================ + dataWhole.filter(function(d){ + console.log(d); + if (d.name = selectedChr){ + d.x.forEach(function(valeur){ + if (valeur == selectedRegion){ + result = d; + } + }); + } + }); + // ============================================================================================ + + console.log(result); + + selectRegion(selectedChr,selectedRegion); + } ); + // ============================================================================================== + + console.log("Sortie de la fonction tabulate(data, columns ) " ); //__Modif__CN__ + + } + +// --------------------------------------------------------------------------------------------------------------------------- + + </script> +</body> + +</html> + diff --git a/jass/static/export.html b/jass/static/export.html index 25a6b90f3f58f5c12ece45243c3bcc0e9e1e1d7a..23f4666b72eb6bf6d26d8426d51d766dd5402908 100644 --- a/jass/static/export.html +++ b/jass/static/export.html @@ -124,22 +124,11 @@ var tr = document.createElement("tr"); //var val = data[i]['ID']; //var linkRef = data[i]['linkRef']; + for (var j=0; j<columns.length; j++) { - var td = document.createElement("td"); td.innerHTML=data[i][columns[j]]; - if(columns[j] == "JASS_PVAL"){ - var val= -Math.log10(data[i][columns[j]]); - td.innerHTML= val; - //idCounter ++; - tr.append(td); - } - - else { - td.innerHTML=data[i][columns[j]]; - tr.append(td); - } - + tr.append(td); } //var td = document.createElement("td"); //td.innerHTML='-'; diff --git a/jass/static/favicon.ico b/jass/static/favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..d8af31a0ed752246748f5f12ff4bb2f077404d60 Binary files /dev/null and b/jass/static/favicon.ico differ diff --git a/jass/static/images/sort_asc.png b/jass/static/images/sort_asc.png new file mode 100644 index 0000000000000000000000000000000000000000..e1ba61a8055fcb18273f2468d335572204667b1f Binary files /dev/null and b/jass/static/images/sort_asc.png differ diff --git a/jass/static/images/sort_asc_disabled.png b/jass/static/images/sort_asc_disabled.png new file mode 100644 index 0000000000000000000000000000000000000000..fb11dfe24a6c564cb7ddf8bc96703ebb121df1e7 Binary files /dev/null and b/jass/static/images/sort_asc_disabled.png differ diff --git a/jass/static/images/sort_both.png b/jass/static/images/sort_both.png new file mode 100644 index 0000000000000000000000000000000000000000..af5bc7c5a10b9d6d57cb641aeec752428a07f0ca Binary files /dev/null and b/jass/static/images/sort_both.png differ diff --git a/jass/static/images/sort_desc.png b/jass/static/images/sort_desc.png new file mode 100644 index 0000000000000000000000000000000000000000..0e156deb5f61d18f9e2ec5da4f6a8c94a5b4fb41 Binary files /dev/null and b/jass/static/images/sort_desc.png differ diff --git a/jass/static/images/sort_desc_disabled.png b/jass/static/images/sort_desc_disabled.png new file mode 100644 index 0000000000000000000000000000000000000000..c9fdd8a1502fda301682e907afde86bc450da10f Binary files /dev/null and b/jass/static/images/sort_desc_disabled.png differ diff --git a/jass/static/img/bkg_part1.jpg b/jass/static/img/bkg_part1.jpg index ffd434de93db4869d9ba7cb20627404eb98d9ca2..b22e8c670f0f1a3c53e1b24efe4ba9db8b4255d5 100644 Binary files a/jass/static/img/bkg_part1.jpg and b/jass/static/img/bkg_part1.jpg differ diff --git a/jass/static/img/jass19Avrilsmall.jpg b/jass/static/img/jass19Avrilsmall.jpg index e39ec7ab8f79b08b8d01f80990b6cc3d68c86891..9ab7240b257493e8980a295dcbdbcab22c4f2bef 100644 Binary files a/jass/static/img/jass19Avrilsmall.jpg and b/jass/static/img/jass19Avrilsmall.jpg differ diff --git a/jass/static/index.html b/jass/static/index.html index 376dd7668e2d284ba7027f377ac7b656c3e52f08..9f514a80e06a7bdf2aadcbef436a30d13ac534a9 100644 --- a/jass/static/index.html +++ b/jass/static/index.html @@ -1,69 +1,69 @@ -<!DOCTYPE html> -<html> - -<head> - - - <script src="https://code.jquery.com/jquery-1.12.4.js"></script> - <script src="https://code.jquery.com/ui/1.12.1/jquery-ui.js"></script> - <link rel="stylesheet" href="//code.jquery.com/ui/1.12.1/themes/base/jquery-ui.css"> - <link rel="stylesheet" href="https://research.pasteur.fr/wp-content/themes/biologyx2/style.css" type="text/css" > - <style> - #image-top {height:80px!important} - </style> - - <script> - $( function() { - $( "#tabs" ).tabs(); - } ); - function goToURL() { - location.href = '/selectPhenotypes.html'; - - } - </script> -</head> - -<body> - <div id="top-header"> - - <div id="image-top" class="img-filter" style="background-image:url(/img/bkg_part1.jpg)"> - <div id="image-credits"> - <div class="img-caption">© Jass</div> - <div class="img-description">Local Z score Heatmap.</div> - </div> - </div> - - - -</div> -<br> - <div id="tabs"> - <ul> - <li><a href="#tabs-1">Home</a></li> - <li><a href="#tabs-2">About</a></li> - <li><a href="#tabs-3">Citing Jass</a></li> - - </ul> - <div id="tabs-1" > - <h2>JASS allows for single SNP joint analysis of multiple GWAS summary statistics. Our database currently includes ~150 publicly available GWAS (see next page for the complete list). Three tests have been implemented: <br>For the moment being, the application is in its early state. E.g., only the omnibus version of the joint analysis is implemented and can be applied on only 25 phenotypes.<h2/> - <center><img src="img/jass19Avrilsmall.jpg" height="300" width="300"></center> - <form><input type="button" value="Let's Jass !" OnClick="goToURL();"></form> - </div> - <div id="tabs-2"> - <p>Jass is devellopped by the <a href="https://research.pasteur.fr/en/team/statistical-genetics/" target="_blank">Statistical Genetics</a> group in collaboration with the <a href="https://research.pasteur.fr/en/team/bioinformatics-and-biostatistics-hub/">HUB</a> </p> - </div> - <div id="tabs-3"> - <p><b>JASS: command line and web interface for the joint analysis of GWAS results</b><br /> - Hanna Julienne, Pierre Lechat, Vincent Guillemot, Carla Lasry, Chunzi Yao, Robinson Araud, Vincent Laville, Bjarni Vilhjalmsson, Hervé Ménager, Hugues Aschard<br /> - in: NAR Genomics and Bioinformatics, Volume 2, Issue 1, March 2020, lqaa003, <a href="https://doi.org/10.1093/nargab/lqaa003"> <FONT color=#0000FF>https://doi.org/10.1093/nargab/lqaa003</FONT></a></p> - <p><b>Multitrait genetic-phenotype associations to connect disease variants and biological mechanisms</b><br /> - Hanna Julienne, Vincent Laville, Zachary R. McCaw, Zihuai He, Vincent Guillemot, Carla Lasry, Andrey Ziyatdinov, Amaury Vaysse, Pierre Lechat, Hervé Ménager, Wilfried Le Goff, Marie-Pierre Dube, Peter Kraft, Iuliana Ionita-Laza, Bjarni J. Vilhjálmsson, Hugues Aschard<br /> - preprint in: biorxiv, <a href=https://www.biorxiv.org/content/10.1101/2020.06.26.172999v1.full> <FONT color=#0000FF>https://www.biorxiv.org/content/10.1101/2020.06.26.172999v1.full</FONT></a></p> - </div> -</div> -</body> - -<footer id="colophon" class="site-footer" role="contentinfo"> -<a href="http://www.pasteur.fr/en" target="_blank"><div id="footlogo"></div></a> -</footer> -</html> +<!DOCTYPE html> +<html> + +<head> + + + <script src="https://code.jquery.com/jquery-1.12.4.js"></script> + <script src="https://code.jquery.com/ui/1.12.1/jquery-ui.js"></script> + <link rel="stylesheet" href="//code.jquery.com/ui/1.12.1/themes/base/jquery-ui.css"> + <link rel="stylesheet" href="https://research.pasteur.fr/wp-content/themes/biologyx2/style.css" type="text/css" > + <style> + #image-top {height:80px!important} + </style> + + <script> + $( function() { + $( "#tabs" ).tabs(); + } ); + function goToURL() { + location.href = '/selectPhenotypes.html'; + + } + </script> +</head> + +<body> + <div id="top-header"> + + <div id="image-top" class="img-filter" style="background-image:url(/img/bkg_part1.jpg)"> + <div id="image-credits"> + <div class="img-caption">© Jass</div> + <div class="img-description">Local Z score Heatmap.</div> + </div> + </div> + + + +</div> +<br> + <div id="tabs"> + <ul> + <li><a href="#tabs-1">Home</a></li> + <li><a href="#tabs-2">About</a></li> + <li><a href="#tabs-3">Citing Jass</a></li> + + </ul> + <div id="tabs-1" > + <img src="img/jass19Avrilsmall.jpg" height="300" width="300" style="float: right;"> + <p><b><font size=+3>JASS allows for single SNP joint analysis of multiple GWAS summary statistics. Our database currently includes ~150 publicly available GWAS (see next page for the complete list). Three tests have been implemented: <br>For the moment being, the application is in its early state. E.g., only the omnibus version of the joint analysis is implemented and can be applied on only 64 phenotypes.</font></b></p> + <form><input type="button" value="Let's Jass !" OnClick="goToURL();"></form> + </div> + <div id="tabs-2"> + <p>Jass is developed by the <a href="https://research.pasteur.fr/en/team/statistical-genetics/" target="_blank">Statistical Genetics</a> group in collaboration with the <a href="https://research.pasteur.fr/en/team/bioinformatics-and-biostatistics-hub/">HUB</a> </p> + </div> + <div id="tabs-3"> + <p><b>JASS: command line and web interface for the joint analysis of GWAS results</b><br /> + Hanna Julienne, Pierre Lechat, Vincent Guillemot, Carla Lasry, Chunzi Yao, Robinson Araud, Vincent Laville, Bjarni Vilhjalmsson, Hervé Ménager, Hugues Aschard<br /> + in: NAR Genomics and Bioinformatics, Volume 2, Issue 1, March 2020, lqaa003, <a href="https://doi.org/10.1093/nargab/lqaa003"> <FONT color=#0000FF>https://doi.org/10.1093/nargab/lqaa003</FONT></a></p> + <p><b>Multitrait genetic-phenotype associations to connect disease variants and biological mechanisms</b><br /> + Hanna Julienne, Vincent Laville, Zachary R. McCaw, Zihuai He, Vincent Guillemot, Carla Lasry, Andrey Ziyatdinov, Amaury Vaysse, Pierre Lechat, Hervé Ménager, Wilfried Le Goff, Marie-Pierre Dube, Peter Kraft, Iuliana Ionita-Laza, Bjarni J. Vilhjálmsson, Hugues Aschard<br /> + preprint in: biorxiv, <a href=https://www.biorxiv.org/content/10.1101/2020.06.26.172999v1.full> <FONT color=#0000FF>https://www.biorxiv.org/content/10.1101/2020.06.26.172999v1.full</FONT></a></p> + </div> +</div> +</body> + +<footer id="colophon" class="site-footer" role="contentinfo"> +<a href="http://www.pasteur.fr/en" target="_blank"><div id="footlogo"></div></a> +</footer> +</html> diff --git a/jass/static/selectPhenotypes.html b/jass/static/selectPhenotypes.html index feaabe636a4f2b06d5b2278539ab2de931d6e5b6..a49e381e85e5cca9b2f540591b1c3794aa740116 100644 --- a/jass/static/selectPhenotypes.html +++ b/jass/static/selectPhenotypes.html @@ -1,181 +1,234 @@ -<html> - -<head> - <script src="/js/jquery-3.2.1.min.js"></script> - <script src="/js/plotly-latest.min.js"></script> - - <link rel="stylesheet" type="text/css" href="/css/jquery.dataTables.min.css"> - <script type="text/javascript" charset="utf8" src="/js/jquery.dataTables.js"></script> - <script src="/js/jquery.blockUI.js"></script> - <link rel="stylesheet" href="http://ajax.googleapis.com/ajax/libs/jqueryui/1/themes/smoothness/jquery-ui.css"> - <link rel="stylesheet" href="https://research.pasteur.fr/wp-content/themes/biologyx2/style.css" type="text/css" > - <style> - .header { - margin:0 0 30px 0; - padding: 5px 0 0 0 ; - height:50; - background-image: url('/img/bkg_part2_wthLabel.jpg'); - background-color: transparent; - background-size:contain; - object-fit: contain; - background-repeat:no-repeat; - } - #image-top {height:80!important} - </style> - - <script> - - Plotly.d3.json('/api/phenotypes', function (error, data) { - function tabulate(data, columns) { - var table = $('#pheTable'); - var thead = document.createElement("thead"); - var tr = document.createElement("tr"); - for (var i=0; i<columns.length; i++) { - if ((columns[i] != "id")&&(columns[i] != "ref_link")){ - var th = document.createElement("th"); - th.innerHTML =columns[i]; - tr.appendChild(th); - } - } - thead.appendChild(tr); - table.append(thead); - var idCounter=1; - var tbody = document.createElement("tbody"); - for (var i=0; i<data.length; i++) { - var tr = document.createElement("tr"); - $(tr).attr('id', data[i]['id']); - var val = data[i]['id']; - var linkRef = data[i]['ref_link']; - for (var j=0; j<columns.length; j++) { - var td = document.createElement("td"); - td.innerHTML=data[i][columns[j]]; - if(columns[j] == "select"){ - td.innerHTML= "<input id='chk_" + idCounter + "' name ='type' type='checkbox' value='" + val + "' />"; - idCounter ++; - tr.appendChild(td); - } - else if (columns[j] == "ref"){ - td.innerHTML="<a href='"+linkRef+"'>"+data[i][columns[j]]+"</a>"; - tr.appendChild(td); - } - else if (columns[j] == "data_link"){ - td.innerHTML="<a href='"+data[i][columns[j]]+"' target='_blank'>"+data[i][columns[j]]+"</a>"; - tr.appendChild(td); - } - else if ((columns[j] != "id")&&(columns[j] != "ref_link")){ - td.innerHTML=data[i][columns[j]]; - tr.appendChild(td); - } - } - tbody.appendChild(tr); - } - table.append(tbody); - $('#pheTable').DataTable( { - aoColumns: [ { },{ },{ },{ },{ } ,{ sWidth: "10%" ,bSearchable: false},{} ] - } ); - - $('#pheTable').on( 'click', 'tr', function () { - $(this).toggleClass('selected'); - if($(this).find("input").attr("checked")=="checked"){ - $(this).find("input").attr("checked",null); - }else{ - $(this).find("input").attr("checked","checked"); - } - } ); - - } - // render the table(s) - tabulate(data, ['select','id','outcome', 'full_name','consortium','type','ref','ref_link','data_link']); - }); - function getSelected() { - var selected = new Array(); - $('#pheTable').DataTable().rows('.selected').ids().each(function(id){ - selected.push(id); - }); - console.log(selected); - return selected; - } - - $(function(){ - // Variable to get ids for the checkboxes - var idCounter=1; - - $("#btn1").click(function(){ - var selectedString = getSelected().join(','); - if(selectedString != ''){ - var phe = {}; - phe['phenotypeID'] = selectedString; - console.log("!!! "+selectedString); - console.log("!!! "+phe['phenotypeID']); - $.blockUI({ css: { - border: 'none', - padding: '15px', - backgroundColor: '#000', - '-webkit-border-radius': '10px', - '-moz-border-radius': '10px', - opacity: .5, - color: '#fff'} }); - - var status="-1"; - var getProjectStatus = function(){ - $.post( "/api/projects",phe).done(function( data ) { - status = data.status.worktable; - console.log("!! status "+status); - if(status =="READY"){ - $.unblockUI(); - console.log( data ); - sessionStorage.setItem("id",data.id); - console.log(data.phenotypes); - //var monobjet_json = JSON.stringify(data.phenotypes[0]); - var monobjet_json = JSON.stringify(data.phenotypes); - sessionStorage.setItem("phenotypes",monobjet_json); - console.log(data.phenotypes[0]["cohort"]); - //location.href = 'http://hub17.hosting.pasteur.fr/getVar.html'; - location.href = 'chromo_heatmap_manhattan.html'; - } - else if(status =="CREATING"){ - console.log("CREATING"); - setTimeout(getProjectStatus, 10000); - } - }); - - }; - getProjectStatus(); - - - } - else{ - console.log( "rien"); - $.blockUI({ message: '<h2><img src="img/busy.gif" /> Please choose an array of Phenotypes...</h2>' , - css: { - border: 'none', - padding: '15px', - backgroundColor: '#000', - '-webkit-border-radius': '10px', - '-moz-border-radius': '10px', - opacity: .5, - color: '#fff'}}); - setTimeout($.unblockUI, 2000); - } - }); - - }); - - - - </script> -</head> - -<body> - -<div class="header" > </div> - -<h2>Select the phenotypes you want to analyse jointly:</h2> - -<div id='divContainer'> - <table id ="pheTable" style="width: 100%" class="display dataTable"></table> -</div> -<button id="btn1">Select Phenotypes</button> -</body> - -</html> +<html> + +<head> + <script src="/js/jquery-3.2.1.min.js"></script> + <script src="/js/plotly-latest.min.js"></script> + + <link rel="stylesheet" type="text/css" href="/css/jquery.dataTables.min.css"> + <script type="text/javascript" charset="utf8" src="/js/jquery.dataTables.js"></script> + <script src="/js/jquery.blockUI.js"></script> + <link rel="stylesheet" href="http://ajax.googleapis.com/ajax/libs/jqueryui/1/themes/smoothness/jquery-ui.css"> + <link rel="stylesheet" href="https://research.pasteur.fr/wp-content/themes/biologyx2/style.css" type="text/css" > + <style> + .header { + margin:0 0 30px 0; + padding: 5px 0 0 0 ; + height:50; + background-image: url('/img/bkg_part2_wthLabel.jpg'); + background-color: transparent; + background-size:contain; + object-fit: contain; + background-repeat:no-repeat; + } + #image-top {height:80!important} + + div.blockMe { padding: 30px; margin: 30px; border: 10px solid #ccc; background-color: #ffd } + #question { background-color: #ffc; padding: 10px; } + #question input { width: 4em } + </style> + + <script> + + Plotly.d3.json('/api/phenotypes', function (error, data) { + function tabulate(data, columns) { + var table = $('#pheTable'); + var thead = document.createElement("thead"); + var tr = document.createElement("tr"); + for (var i=0; i<columns.length; i++) { + // Column "data_link" is not displayed: it is therefore not build + if ((columns[i] != "id")&&(columns[i] != "ref_link")&& (columns[i] != "data_link")){ + var th = document.createElement("th"); + th.innerHTML =columns[i]; + tr.appendChild(th); + } + } + thead.appendChild(tr); + table.append(thead); + var idCounter=1; + var tbody = document.createElement("tbody"); + for (var i=0; i<data.length; i++) { + var tr = document.createElement("tr"); + $(tr).attr('id', data[i]['id']); + var val = data[i]['id']; + var linkRef = data[i]['ref_link']; + for (var j=0; j<columns.length; j++) { + var td = document.createElement("td"); + td.innerHTML=data[i][columns[j]]; + if(columns[j] == "select"){ + td.innerHTML= "<input id='chk_" + idCounter + "' name ='type' type='checkbox' value='" + val + "' />"; + idCounter ++; + tr.appendChild(td); + } + else if (columns[j] == "ref"){ + td.innerHTML="<a href='"+linkRef+"'>"+data[i][columns[j]]+"</a>"; + tr.appendChild(td); + } + else if ((columns[j] != "id")&&(columns[j] != "ref_link")){ + td.innerHTML=data[i][columns[j]]; + tr.appendChild(td); + } + } + tbody.appendChild(tr); + } + table.append(tbody); + $('#pheTable').DataTable( { + aoColumns: [ { },{ },{ },{ },{ } ,{ sWidth: "10%" ,bSearchable: false} ] + } ); + + $('#pheTable').on( 'click', 'tr', function () { + $(this).toggleClass('selected'); + if($(this).find("input").attr("checked")=="checked"){ + $(this).find("input").attr("checked",null); + }else{ + $(this).find("input").attr("checked","checked"); + } + } ); + + } + // render the table(s) + tabulate(data, ['select','id','outcome', 'full_name','consortium','type','ref','ref_link']); + }); + function getSelected() { + var selected = new Array(); + $('#pheTable').DataTable().rows('.selected').ids().each(function(id){ + selected.push(id); + }); + console.log(selected); + return selected; + } + + $(function(){ + function avancement() { + var ava = document.getElementById("avancement"); + var prc = document.getElementById("pourcentage"); + prc.innerHTML = ava.value + "%"; + } + + function modif(val) { + var ava = document.getElementById("avancement"); + if((ava.value+val)<=ava.max && (ava.value+val)>0) { + ava.value += val; + } + avancement(); + } + + + // Variable to get ids for the checkboxes + var idCounter=1; + + $("#btn1").click(function(){ + var selectedNumber = getSelected().length; + console.log("selectedNumber = "+ selectedNumber); + var toApply = 1; + if (selectedNumber == 1){ + // A confirmation window is displayed + var r = confirm("ð–ð€ð‘ððˆðð†: ð²ð¨ð® ð¡ðšð¯ðž ð¬ðžð¥ðžðœððžð ð¨ð§ð¥ð² ð¨ð§ðž ð©ð¡ðžð§ð¨ðð²ð©ðž!\nIt isn't the way JASS normally works.\nDo you want to continue?"); + if (r == true) { // Button OK is selected + toApply = 1; + } else { // Button CANCEL is selected + toApply = 0; + } + } + else if ((selectedNumber > 20) && (selectedNumber <= 64)){ + // A confirmation window is displayed + var User_Message = "ð–ð€ð‘ððˆðð†: ð²ð¨ð® ð¡ðšð¯ðž ð¬ðžð¥ðžðœððžð " + + selectedNumber + + " ð©ð¡ðžð§ð¨ðð²ð©ðžð¬!\nThe computation will be very long. \nDo you want to continue?"; + var g = confirm(User_Message); + if (g == true) { // Button OK is selected + toApply = 1; + } else { // Button CANCEL is selected + toApply = 0; + } + } + else if (selectedNumber > 64) { + console.log( "ERROR: More than 64 Phenotypes have been selected !"); + var User_Message = "ð„ð‘ð‘ðŽð‘: ð²ð¨ð® ð¡ðšð¯ðž ð¬ðžð¥ðžðœððžð " + + selectedNumber + + " ð©ð¡ðžð§ð¨ðð²ð©ðžð¬!\nThe current implementation of JASS cannot analyze more than 64 phenotypes."; + alert(User_Message); + toApply = 0; + } + if (toApply == 1){ + var selectedString = getSelected().join(','); + if(selectedString != ''){ + var phe = {}; + phe['phenotypeID'] = selectedString; + console.log("!!! "+selectedString); + console.log("!!! "+phe['phenotypeID']); + + $.blockUI({ message: $('#question'), css: { width: '275px' }}); + avancement(); //Initialisation + + + var status="-1"; + var JASS_progress = 0; + var Old_progress = 0; + var getProjectStatus = function(){ + $.post( "/api/projects",phe).done(function( data ) { + status = data.status.worktable; + console.log("!! status "+status); + JASS_progress = data.progress; + console.log(">>>>>>> progress "+JASS_progress); + var deltaProgress = JASS_progress - Old_progress; + Old_progress = JASS_progress; + modif(deltaProgress); + + + if(status =="READY"){ + $.unblockUI(); + console.log( data ); + sessionStorage.setItem("id",data.id); + console.log(data.phenotypes); + //var monobjet_json = JSON.stringify(data.phenotypes[0]); + var monobjet_json = JSON.stringify(data.phenotypes); + sessionStorage.setItem("phenotypes",monobjet_json); + console.log(data.phenotypes[0]["cohort"]); + //location.href = 'http://hub17.hosting.pasteur.fr/getVar.html'; + location.href = 'chromo_heatmap_manhattan.html'; + } + else if(status =="CREATING"){ + console.log("CREATING"); + + + setTimeout(getProjectStatus, 10000); + } + }); + + }; + getProjectStatus(); + } + else{ + console.log( "WARNING: No phenotype is selected !"); + alert("ð–ð€ð‘ððˆðð†: ðð¥ðžðšð¬ðž ðœð¡ð¨ð¨ð¬ðž ðšð§ ðšð«ð«ðšð² ð¨ðŸ ðð¡ðžð§ð¨ðð²ð©ðžð¬..."); + } + } + }); + + }); + + + + </script> +</head> + +<body> + +<div class="header" > </div> + +<h2>Select the phenotypes you want to analyse jointly:</h2> + +<div id='divContainer'> + <table id ="pheTable" style="width: 100%" class="display dataTable"></table> +</div> + +<div id="question" style="display:none; cursor: default"> + <H3>Analysis in progress ...</H3> + <progress id="avancement" value="0" max="100"></progress> + <span id="pourcentage"></span> +</div> + +<button id="btn1">Select Phenotypes</button> +</body> + +</html> diff --git a/jass/swagger/globalmanhattan_example.png b/jass/swagger/globalmanhattan_example.png new file mode 100644 index 0000000000000000000000000000000000000000..8607ccd06684422ad42fc1d08417e7cd2c986e89 Binary files /dev/null and b/jass/swagger/globalmanhattan_example.png differ diff --git a/jass/swagger/quadrant_example.png b/jass/swagger/quadrant_example.png new file mode 100644 index 0000000000000000000000000000000000000000..892505a9225cdd903f8e7fc678456cdec3bed89d Binary files /dev/null and b/jass/swagger/quadrant_example.png differ diff --git a/jass/swagger/swagger.yaml b/jass/swagger/swagger.yaml index d0e1d5487d647cee6488ce0c4de82acc5a7a09f3..c47461d30df983b6ba9c213ded0e1ab49e17d23a 100644 --- a/jass/swagger/swagger.yaml +++ b/jass/swagger/swagger.yaml @@ -1,238 +1,458 @@ ---- -swagger: "2.0" -info: - version: "0.0.0" - title: "JASS API Specification" -paths: - /phenotypes: - get: - description: "Gets the list of available phenotypes\n" - operationId: "phenotypes_get" - parameters: [] - responses: - 200: - description: "List of the available phenotypes" - schema: - type: "array" - title: "ArrayOfPhenotypes" - items: - $ref: "#/definitions/Phenotype" - x-swagger-router-controller: "jass.controllers.default_controller" - /projects: - post: - description: "Create a new project from a selection of phenotypes\n" - operationId: "projects_post" - parameters: - - name: "phenotypeID" - in: "formData" - description: "IDs of the phenotypes selected for the project" - required: true - type: "array" - items: - type: "string" - collectionFormat: "multi" - responses: - 200: - description: "Project created" - schema: - $ref: "#/definitions/Phenotype" - x-swagger-router-controller: "jass.controllers.default_controller" - /projects/{projectID}: - get: - description: "Retrieve a project definition\n" - operationId: "projects_project_idget" - parameters: - - name: "projectID" - in: "path" - description: "project ID" - required: true - type: "string" - responses: - 200: - description: "Retrieved project" - schema: - $ref: "#/definitions/Phenotype" - x-swagger-router-controller: "jass.controllers.default_controller" - /projects/{projectID}/summary: - get: - description: "Retrieve summary statistics for a given project" - operationId: "projects_project_id_summary_statistics" - parameters: - - name: "projectID" - in: "path" - description: "project ID" - required: true - type: "string" - responses: - 200: - description: "Summary statistics in JSON" - schema: - type: "string" - title: "Project summary statistics" - x-swagger-router-controller: "jass.controllers.default_controller" - /projects/{projectID}/genome: - get: - description: "Retrieve whole genome summary data for a given project\n" - operationId: "projects_project_id_genome_get" - produces: - - "text/csv; charset=utf-8" - parameters: - - name: "projectID" - in: "path" - description: "project ID" - required: true - type: "string" - responses: - 200: - description: "Whole genome summary data in CSV format\n" - schema: - type: "string" - title: "Genome data" - x-swagger-router-controller: "jass.controllers.default_controller" - /projects/{projectID}/genome_full: - get: - description: "Retrieve whole genome complete (not summarized) data for a given project\n" - operationId: "projects_project_id_genome_full_get" - produces: - - "text/csv; charset=utf-8" - parameters: - - name: "projectID" - in: "path" - description: "project ID" - required: true - type: "string" - responses: - 200: - description: "Retrieve whole genome complete (not summarized) data for a given project in CSV format\n" - schema: - type: "string" - title: "Genome data" - x-swagger-router-controller: "jass.controllers.default_controller" - /projects/{projectID}/globalmanhattan: - get: - description: "Retrieve global manhattan plot for a given project\n" - operationId: "projects_project_id_global_manhattan_plot_get" - produces: - - "image/png" - parameters: - - name: "projectID" - in: "path" - description: "project ID" - required: true - type: "string" - responses: - 200: - description: "Global manhattan plot in PNG format\n" - schema: - type: "string" - title: "Global manhattan plot" - x-swagger-router-controller: "jass.controllers.default_controller" - /projects/{projectID}/quadrant: - get: - description: "Retrieve quadrant plot for a given project\n" - operationId: "projects_project_id_quadrant_plot_get" - produces: - - "image/png" - - "text/plain" - parameters: - - name: "projectID" - in: "path" - description: "project ID" - required: true - type: "string" - responses: - 200: - description: "Quadrant plot in PNG format\n" - schema: - type: "string" - title: "Quadrant plot" - x-swagger-router-controller: "jass.controllers.default_controller" - /projects/{projectID}/manhattan/{chromosome}/{region}: - get: - description: "Retrieve local manhattan data" - operationId: "projects_project_id_local_manhattan_data_get" - produces: - - "text/csv; charset=utf-8" - parameters: - - name: "projectID" - in: "path" - description: "project ID" - required: true - type: "string" - - name: "chromosome" - in: "path" - description: "chromosome number" - required: true - type: "string" - - name: "region" - in: "path" - description: "region number" - required: true - type: "string" - responses: - 200: - description: "Local manhattan plot data in CSV format\n" - schema: - type: "string" - title: "Local manhattan plot data" - x-swagger-router-controller: "jass.controllers.default_controller" - /projects/{projectID}/heatmap/{chromosome}/{region}: - get: - description: "Retrieve local heatmap data" - operationId: "projects_project_id_local_heatmap_data_get" - produces: - - "text/csv; charset=utf-8" - parameters: - - name: "projectID" - in: "path" - description: "project ID" - required: true - type: "string" - - name: "chromosome" - in: "path" - description: "chromosome number" - required: true - type: "string" - - name: "region" - in: "path" - description: "region number" - required: true - type: "string" - responses: - 200: - description: "Local manhattan plot data in CSV format\n" - schema: - type: "string" - title: "Local heatmap plot data" - x-swagger-router-controller: "jass.controllers.default_controller" -definitions: - Phenotype: - properties: - id: - type: "string" - consortium: - type: "string" - outcome: - type: "string" - full_name: - type: "string" - type: - type: "string" - ref: - type: "string" - ref_link: - type: "string" - data_link: - type: "string" - data_path: - type: "string" - Project: - properties: - id: - type: "string" - status: - type: "string" - outcome: - type: "array" - items: - $ref: "#/definitions/Phenotype" +openapi: 3.0.0 +info: + version: 0.0.0 + title: JASS API Specification +paths: + /phenotypes: + get: + description: | + Gets the list of available phenotypes + operationId: phenotypes_get + responses: + "200": + description: List of the available phenotypes + content: + "application/json": + schema: + type: array + title: ArrayOfPhenotypes + items: + $ref: "#/components/schemas/Phenotype" + example: + - "consortium": "IHEC" + "data_link": "http://www.bloodcellgenetics.org" + "full_name": "Monocyte percentage of white cells" + "id": "z_IHEC_MONOP" + "outcome": "MONOP" + "ref": " Astle et al. 2016" + "ref_link": "https://www.ncbi.nlm.nih.gov/pubmed/27863262" + "type": "Cellular" + - "consortium": "RA" + "data_link": "http://plaza.umin.ac.jp/~yokada/datasource/software.htm" + "full_name": "Rheumatoid Arthritis" + "id": "z_RA_RA" + "outcome": "RA" + "ref": "Okada et al. 2014" + "ref_link": "https://www.ncbi.nlm.nih.gov/pubmed/24390342" + "type": "Immunity" + x-openapi-router-controller: jass.controllers.default_controller + /projects: + post: + description: | + Create a new project from a selection of phenotypes + operationId: projects_post + requestBody: + content: + application/x-www-form-urlencoded: + schema: + type: object + properties: + phenotypeID: + description: IDs of the phenotypes selected for the project + type: array + items: + type: string + required: + - phenotypeID + example: + - z_IHEC_MONOP + - z_RA_RA + responses: + "200": + description: Project created + content: + "application/json": + schema: + $ref: "#/components/schemas/Phenotype" + examples: + Creating: + value: + id: "bca9d414e0f9a67b9e0d2131a47c316c" + phenotypes: + - "consortium": "IHEC" + "data_link": "http://www.bloodcellgenetics.org" + "full_name": "Monocyte percentage of white cells" + "id": "z_IHEC_MONOP" + "outcome": "MONOP" + "ref": " Astle et al. 2016" + "ref_link": "https://www.ncbi.nlm.nih.gov/pubmed/27863262" + "type": "Cellular" + - "consortium": "RA" + "data_link": "http://plaza.umin.ac.jp/~yokada/datasource/software.htm" + "full_name": "Rheumatoid Arthritis" + "id": "z_RA_RA" + "outcome": "RA" + "ref": "Okada et al. 2014" + "ref_link": "https://www.ncbi.nlm.nih.gov/pubmed/24390342" + "type": "Immunity" + status": + "global_manhattan": "CREATING" + "quadrant_plot_status": "CREATING" + "worktable": "CREATING" + progress": + "progress": "0" + Ready: + value: + id: "bca9d414e0f9a67b9e0d2131a47c316c" + phenotypes: + - "consortium": "IHEC" + "data_link": "http://www.bloodcellgenetics.org" + "full_name": "Monocyte percentage of white cells" + "id": "z_IHEC_MONOP" + "outcome": "MONOP" + "ref": " Astle et al. 2016" + "ref_link": "https://www.ncbi.nlm.nih.gov/pubmed/27863262" + "type": "Cellular" + - "consortium": "RA" + "data_link": "http://plaza.umin.ac.jp/~yokada/datasource/software.htm" + "full_name": "Rheumatoid Arthritis" + "id": "z_RA_RA" + "outcome": "RA" + "ref": "Okada et al. 2014" + "ref_link": "https://www.ncbi.nlm.nih.gov/pubmed/24390342" + "type": "Immunity" + status": + "global_manhattan": "READY" + "quadrant_plot_status": "READY" + "worktable": "READY" + progress": + "progress": "100" + x-openapi-router-controller: jass.controllers.default_controller + "/projects/{projectID}": + get: + description: | + Retrieve a project definition + operationId: projects_project_idget + parameters: + - name: projectID + in: path + description: project ID + required: true + schema: + type: string + example: "bca9d414e0f9a67b9e0d2131a47c316c" + responses: + "200": + description: Retrieved project + content: + "application/json": + schema: + $ref: "#/components/schemas/Phenotype" + example: + id: "bca9d414e0f9a67b9e0d2131a47c316c" + status": + "global_manhattan": "READY" + "quadrant_plot_status": "READY" + "worktable": "READY" + progress": + "progress": "100" + x-openapi-router-controller: jass.controllers.default_controller + "/projects/{projectID}/summary": + get: + description: Retrieve summary statistics for a given project + operationId: projects_project_id_summary_statistics + parameters: + - name: projectID + in: path + description: project ID + required: true + schema: + type: string + example: "bca9d414e0f9a67b9e0d2131a47c316c" + responses: + "200": + description: Summary statistics in JSON + content: + "*/*": + schema: + type: string + title: Project summary statistics + example: + "JOSTSignif": + "NoPhenoSignif": 10 + "PhenoSignif": 210 + "NoJOSTSignif": + "NoPhenoSignif": 1470 + "PhenoSignif": 14 + x-openapi-router-controller: jass.controllers.default_controller + + "/projects/{projectID}/csv_status": + get: + description: | + Retrieve the generation status of the genome full csv file + operationId: projects_project_id_csv_status_get + parameters: + - name: projectID + in: path + description: project ID + required: true + schema: + type: string + example: "bca9d414e0f9a67b9e0d2131a47c316c" + responses: + "200": + description: | + Generation status of the genome full csv file + content: + text/csv; charset=utf-8: + schema: + type: string + title: csv_file_generation + example: | + READY + x-openapi-router-controller: jass.controllers.default_controller + + "/projects/{projectID}/genome": + get: + description: | + Retrieve whole genome summary data for a given project + operationId: projects_project_id_genome_get + parameters: + - name: projectID + in: path + description: project ID + required: true + schema: + type: string + example: "bca9d414e0f9a67b9e0d2131a47c316c" + responses: + "200": + description: | + Whole genome summary data in CSV format + content: + text/csv; charset=utf-8: + schema: + type: string + title: Genome data + example: | + Region,MiddlePosition,snp_ids,CHR,position,Ref_allele,Alt_allele,JOSTmin,UNIVARIATE_MIN_PVAL,signif_status,z_IHEC_MONOP,z_RA_RA + Region0,0.0,rs61997853,chr15,20001087,C,A,0.3085010612493116,0.300300000000003,None,-1.0357902654077036, + Region1,951595.0,rs28635530,chr1,1649392,T,C,4.038788020606384e-06,2.7559999999999873e-06,None,4.688213804974398,2.999976992703393 + Region2,2737671.5,rs72644697,chr1,2533605,A,G,2.4600636176583336e-10,1.6188949607934016e-10,Both,-1.4356568827860683,-6.393727818324495 + Region3,3981773.5,rs12145992,chr1,3760309,A,G,0.0002538976722549933,0.00026034845141981964,None,-1.6164363711150218,3.651859369008055 + Region4,5147352.0,rs2649072,chr1,5754088,G,A,0.0007863952492527496,0.0006378999999999985,None,3.4149658871961184,-2.999976992703393 + Region5,6580614.0,rs2986741,chr1,6548774,G,A,0.0013472918321710914,0.0011119999999999993,None,-3.260540717377886,2.726551316504396 + Region6,8306267.0,rs79412885,chr1,9241839,A,G,2.0889091093474285e-13,8.106999999999937e-14,Both,7.46857160133221,-1.2003588580308502 + Region7,10086091.5,rs113829298,chr1,10061038,T,C,4.3158209846991565e-05,6.135999999999996e-06,None,-4.5216481219798474,0.5100734569685951 + x-openapi-router-controller: jass.controllers.default_controller + + "/projects/{projectID}/genome_full": + get: + description: | + Retrieve whole genome complete (not summarized) data for a given project + operationId: projects_project_id_genome_full_get + parameters: + - name: projectID + in: path + description: project ID + required: true + schema: + type: string + example: "bca9d414e0f9a67b9e0d2131a47c316c" + responses: + "200": + description: > + Retrieve whole genome complete (not summarized) data for a given + project in CSV format + content: + text/csv; charset=utf-8: + schema: + type: string + title: Genome data + example: | + Region,CHR,snp_ids,position,Ref_allele,Alt_allele,MiddlePosition,JASS_PVAL,UNIVARIATE_MIN_PVAL,UNIVARIATE_MIN_QVAL,PLEIOTROPY_INDEX,z_IHEC_MONOP,z_RA_RA + 194218,0,6,rs530120680,63980,G,A,0.0,0.6556994470547299,0.6501999999999999,0.6501999999999999,,0.4534843639637209, + 194219,0,6,rs561313667,63979,T,C,0.0,0.538722344719537,0.5318,0.5318,,0.6252606526209208, + 194220,0,15,rs113772187,20000538,T,C,0.0,0.5218942699938458,0.5148000000000001,0.5148000000000001,,-0.651381887083556, + 194221,0,15,rs61997852,20001079,C,A,0.0,0.33819737748654505,0.33000000000000274,0.33000000000000274,,-0.9741138770593036, + 194222,0,15,rs61997853,20001087,C,A,0.0,0.3085010612493116,0.300300000000003,0.300300000000003,,-1.0357902654077036, + 867,1,1,rs10454464,1833495,A,G,951595.0,0.2523888759643953,0.19000000000000383,0.38000000000000766,,-0.9817967289175548,1.3105791121681174 + 4836,1,1,rs10907175,1130727,C,A,951595.0,0.3313846158840952,0.21180000000000354,0.4236000000000071,,1.2486311872236304,0.8380752842791193 + 4837,1,1,rs10907176,1130855,C,T,951595.0,0.3455642965805482,0.2251000000000033,0.4502000000000066,,1.213078000845954,0.8380752842791193 + 4838,1,1,rs10907185,1733219,A,G,951595.0,0.3919109214945312,0.25000000000000333,0.5000000000000067,,-0.7010048315295425,1.1503493803760003 + 4839,1,1,rs10907187,1759054,A,G,951595.0,0.16161974795348924,0.09099999999999998,0.18199999999999997,,-0.8114627085037827,1.6901461375274702 + 4840,1,1,rs10907188,1768894,T,C,951595.0,0.15366147518120524,0.08599999999999992,0.17199999999999985,,-0.8149516264832889,1.7168860184310413 + 4841,1,1,rs10907190,1773772,A,G,951595.0,0.12979175667585227,0.07999999999999982,0.15999999999999964,,-0.938281041511616,1.7506860712521708 + 4842,1,1,rs10907193,1805391,A,G,951595.0,0.09562672355608258,0.06299999999999988,0.12599999999999975,,-1.0405165049626888,1.8591914944718688 + 4843,1,1,rs10907194,1712230,T,C,951595.0,0.2669995168398967,0.16000000000000425,0.3200000000000085,,-0.7600913211933399,1.4050715603096189 + x-openapi-router-controller: jass.controllers.default_controller + "/projects/{projectID}/globalmanhattan": + get: + description: | + Retrieve global manhattan plot for a given project + operationId: projects_project_id_global_manhattan_plot_get + parameters: + - name: projectID + in: path + description: project ID + required: true + schema: + type: string + example: "bca9d414e0f9a67b9e0d2131a47c316c" + responses: + "200": + description: | + Global manhattan plot in PNG format + content: + image/png: + schema: + type: string + title: Global manhattan plot + example: + externalValue: 'globalmanhattan_example.png' + x-openapi-router-controller: jass.controllers.default_controller + "/projects/{projectID}/quadrant": + get: + description: | + Retrieve quadrant plot for a given project + operationId: projects_project_id_quadrant_plot_get + parameters: + - name: projectID + in: path + description: project ID + required: true + schema: + type: string + example: "bca9d414e0f9a67b9e0d2131a47c316c" + responses: + "200": + description: | + Quadrant plot in PNG format + content: + image/png: + schema: + type: string + title: Quadrant plot + example: + externalValue: 'quadrant_example.png' + x-openapi-router-controller: jass.controllers.default_controller + "/projects/{projectID}/manhattan/{chromosome}/{region}": + get: + description: Retrieve local manhattan data + operationId: projects_project_id_local_manhattan_data_get + parameters: + - name: projectID + in: path + description: project ID + required: true + schema: + type: string + example: "bca9d414e0f9a67b9e0d2131a47c316c" + - name: chromosome + in: path + description: chromosome number + required: true + schema: + type: string + example: "chr1" + - name: region + in: path + description: region number + required: true + schema: + type: string + example: "Region1" + responses: + "200": + description: | + Local manhattan plot data in CSV format + content: + text/csv; charset=utf-8: + schema: + type: string + title: Local manhattan plot data + example: | + Region,CHR,position,snp_ids,JASS_PVAL + Region1,chr1,636285,rs545945172,0.7216577092326355 + Region1,chr1,662414,rs371628865,0.6725666758876788 + Region1,chr1,662622,rs61769339,0.405683282952658 + Region1,chr1,665266,rs539032812,0.3348008598497511 + Region1,chr1,693731,rs12238997,0.4952464035829177 + Region1,chr1,701835,rs189800799,0.666563580963709 + Region1,chr1,706778,rs113462541,0.861641963454806 + Region1,chr1,711310,rs200531508,0.07463266395489108 + Region1,chr1,714019,rs114983708,0.6667198743938074 + Region1,chr1,715265,rs12184267,0.6666055494294745 + Region1,chr1,715367,rs12184277,0.7657858702655146 + Region1,chr1,720381,rs116801199,0.6816390671665746 + Region1,chr1,723742,rs28375378,0.7124933618852456 + Region1,chr1,724324,rs28692873,0.9212425499680825 + Region1,chr1,725196,rs377099097,0.594983644175122 + Region1,chr1,725389,rs375619475,0.7032290172253173 + Region1,chr1,727841,rs116587930,0.9078685880041112 + x-openapi-router-controller: jass.controllers.default_controller + "/projects/{projectID}/heatmap/{chromosome}/{region}": + get: + description: Retrieve local heatmap data + operationId: projects_project_id_local_heatmap_data_get + parameters: + - name: projectID + in: path + description: project ID + required: true + schema: + type: string + example: "bca9d414e0f9a67b9e0d2131a47c316c" + - name: chromosome + in: path + description: chromosome number + required: true + schema: + type: string + example: "chr1" + - name: region + in: path + description: region number + required: true + schema: + type: string + example: "Region1" + responses: + "200": + description: | + Local manhattan plot data in CSV format + content: + text/csv; charset=utf-8: + schema: + type: string + title: Local heatmap plot data + example: | + ID,rs545945172,rs371628865,rs61769339,rs539032812,rs12238997,rs189800799 + z_IHEC_MONOP,-0.3623372836601329,-0.429856541533544,-0.8457360635272954,-0.9809852811227732,-0.6936527568935886,0.4382385293216385 + z_RA_RA,,,,,, + x-openapi-router-controller: jass.controllers.default_controller +components: + schemas: + Phenotype: + properties: + id: + type: string + consortium: + type: string + outcome: + type: string + full_name: + type: string + type: + type: string + ref: + type: string + ref_link: + type: string + data_link: + type: string + data_path: + type: string + Project: + properties: + id: + type: string + status: + type: string + progress: + type: string + outcome: + type: array + items: + $ref: "#/components/schemas/Phenotype" + diff --git a/jass/test/test_plots.py b/jass/test/test_plots.py new file mode 100644 index 0000000000000000000000000000000000000000..9bdbdf0fb7dd42ff4e24493dd763c2ae0a1ad3f9 --- /dev/null +++ b/jass/test/test_plots.py @@ -0,0 +1,38 @@ +# coding: utf-8 + +from __future__ import absolute_import +import os, shutil, tempfile + +from pandas import read_hdf +from pandas.testing import assert_frame_equal + +from jass.models.plots import create_global_plot + +from . import JassTestCase + + +class TestPlots(JassTestCase): + + test_folder = "data_test1" + + def setUp(self): + # Create a temporary directory + self.test_dir = tempfile.mkdtemp() + self.worktable_hdf_path = self.get_file_path_fn("worktable.hdf5") + self.global_plot_path = os.path.join(self.test_dir, "global_manhattan.png") + + def tearDown(self): + # Remove the directory after the test + shutil.rmtree(self.test_dir) + pass + + def test_create_global_plot(self): + """ + Compare result and expected SumStatJostTab + """ + create_global_plot(self.worktable_hdf_path, self.global_plot_path) + +if __name__ == "__main__": + import unittest + + unittest.main()