diff --git a/Dockerfile b/Dockerfile index a861df27ca4c8544c1ecd482543794baad34491e..4f837065c9c7ecb64aae692a9550ebb51c17c3a5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,8 +2,9 @@ FROM centos:7 RUN yum install -y epel-release wget gcc https://repo.ius.io/ius-release-el7.rpm RUN yum update -y -RUN yum install -y python35u python35u-libs python35u-devel python35u-pip openssl-devel libffi-devel -RUN pip3.5 install ansible +RUN yum install -y python36u python36u-libs python36u-devel python36u-pip openssl-devel libffi-devel +RUN pip3.6 install --upgrade pip +RUN pip3.6 install ansible RUN yum install -y python-pip RUN python -m pip install --upgrade pip COPY . /code diff --git a/jass/__init__.py b/jass/__init__.py index e27073c7f61922fed66faf175167e9ddc9d6ff8c..a88eb7abf651d44a7e45578285661a85262703a3 100644 --- a/jass/__init__.py +++ b/jass/__init__.py @@ -17,4 +17,4 @@ Submodules """ import os -from jass.tasks import celery \ No newline at end of file +from jass.tasks import celery diff --git a/jass/__main__.py b/jass/__main__.py index c3d79337ae16503bfde99f52b7bb713acebe546b..9bd45225639dd9733cc201ba192250d0b41e0685 100644 --- a/jass/__main__.py +++ b/jass/__main__.py @@ -5,59 +5,66 @@ import os import sys import argparse -from jass.server import get_jass_app +from jass.server import jass_app from jass.config import config from jass.models.phenotype import get_available_phenotypes from jass.models.inittable import create_inittable_file, add_gene_annotation from jass.models.worktable import create_worktable_file -from jass.models.plots import ( create_global_plot, - create_quadrant_plot, - create_local_plot, - create_qq_plot) +from jass.models.plots import ( + create_global_plot, + create_quadrant_plot, + create_local_plot, + create_qq_plot, +) -def absolute_path_of_the_file(fileName, output_file = False): + +def absolute_path_of_the_file(fileName, output_file=False): """ Builds the absolute path of the file : fileName This makes the execution of JASS functions more robust and flexible """ - + # Build an absolute path if possible absoluteFilePath = os.path.abspath(fileName) # Test if the file name is a pattern - is_a_pattern = (os.path.basename(fileName).find("*") > -1) + is_a_pattern = os.path.basename(fileName).find("*") > -1 - if (is_a_pattern or output_file) : + if is_a_pattern or output_file: # Test if the directory path exist Directory_path_exist = os.path.exists(os.path.dirname(absoluteFilePath)) - if (Directory_path_exist == False): + if Directory_path_exist == False: # Test the path using the Jass data directory - absoluteFilePath = os.path.normpath(os.path.join(config["DATA_DIR"], fileName)) + absoluteFilePath = os.path.normpath( + os.path.join(config["DATA_DIR"], fileName) + ) Directory_path_exist = os.path.exists(os.path.dirname(absoluteFilePath)) - if (Directory_path_exist == False): + if Directory_path_exist == False: Message = "The directory of the file {} does not exist".format(fileName) raise NameError(Message) else: # Test if the file path exist File_path_exist = os.path.exists(absoluteFilePath) - if (File_path_exist == False): + if File_path_exist == False: # Test the path using the Jass data directory - absoluteFilePath = os.path.normpath(os.path.join(config["DATA_DIR"], fileName)) + absoluteFilePath = os.path.normpath( + os.path.join(config["DATA_DIR"], fileName) + ) File_path_exist = os.path.exists(absoluteFilePath) - if (File_path_exist == False): + if File_path_exist == False: Message = "The file {} does not exist".format(fileName) raise NameError(Message) # Test if it is realy a file Is_a_file = os.path.isfile(absoluteFilePath) - if (not Is_a_file) : + if not Is_a_file: Message = "{} is not a file".format(fileName) raise NameError(Message) @@ -65,8 +72,7 @@ def absolute_path_of_the_file(fileName, output_file = False): def serve(args): - app = get_jass_app() - app.run(host=config["HOST"], port=config["PORT"]) + jass_app.flask_app.run(host=config["HOST"], port=config["PORT"]) def w_list_phenotypes(args): @@ -77,12 +83,12 @@ def w_list_phenotypes(args): def compute_worktable(args): csv_file_path = args.csv_file_path - if (csv_file_path is not None): + if csv_file_path is not None: csv_file_path = absolute_path_of_the_file(csv_file_path, True) init_table_path = absolute_path_of_the_file(args.init_table_path) worktable_path = absolute_path_of_the_file(args.worktable_path, True) selected_phenotypes = args.phenotypes - remove_nan = (args.remove_nans) + remove_nan = args.remove_nans significance_treshold = float(args.significance_treshold) post_filtering = bool(args.post_filtering) custom_loadings = args.custom_loadings @@ -91,9 +97,9 @@ def compute_worktable(args): pos_End = args.end_position if args.omnibus: - strategy = 'jass.models.stats:omnibus_stat' + strategy = "jass.models.stats:omnibus_stat" elif args.sumz: - strategy = 'jass.models.stats:sumz_stat' + strategy = "jass.models.stats:sumz_stat" elif args.fisher_test: strategy = "jass.models.stats:fisher_test" elif args.meta_analysis: @@ -102,21 +108,21 @@ def compute_worktable(args): strategy = args.strategy create_worktable_file( - phenotype_ids = selected_phenotypes, - init_file_path = init_table_path, - project_hdf_path = worktable_path, - remove_nan = remove_nan, - stat = strategy, - optim_na = True, - csv_file = csv_file_path, - chunk_size = int(args.chunk_size), - significance_treshold = significance_treshold, - post_filtering = post_filtering, - delayed_gen_csv_file = False, - chromosome = chromosome, - pos_Start = pos_Start, - pos_End = pos_End, - custom_loadings = custom_loadings + phenotype_ids=selected_phenotypes, + init_file_path=init_table_path, + project_hdf_path=worktable_path, + remove_nan=remove_nan, + stat=strategy, + optim_na=True, + csv_file=csv_file_path, + chunk_size=int(args.chunk_size), + significance_treshold=significance_treshold, + post_filtering=post_filtering, + delayed_gen_csv_file=False, + chromosome=chromosome, + pos_Start=pos_Start, + pos_End=pos_End, + custom_loadings=custom_loadings, ) @@ -126,28 +132,25 @@ def w_create_worktable(args): def w_create_project_data(args): compute_worktable(args) - worktable_path = absolute_path_of_the_file(args.worktable_path, True) - manhattan_plot_path = args.manhattan_plot_path - if (manhattan_plot_path is not None): + if manhattan_plot_path is not None: manhattan_plot_path = absolute_path_of_the_file(manhattan_plot_path, True) create_global_plot(worktable_path, manhattan_plot_path) - quadrant_plot_path = args.quadrant_plot_path - if (quadrant_plot_path is not None): - quadrant_plot_path = absolute_path_of_the_file(quadrant_plot_path, True) - create_quadrant_plot(worktable_path, - quadrant_plot_path, - significance_treshold = float(args.significance_treshold)) - + if quadrant_plot_path is not None: + quadrant_plot_path = absolute_path_of_the_file(quadrant_plot_path, True) + create_quadrant_plot( + worktable_path, + quadrant_plot_path, + significance_treshold=float(args.significance_treshold), + ) zoom_plot_path = args.zoom_plot_path - if (zoom_plot_path is not None): + if zoom_plot_path is not None: zoom_plot_path = absolute_path_of_the_file(zoom_plot_path, True) create_local_plot(worktable_path, zoom_plot_path) - qq_plot_path = args.qq_plot_path - if (qq_plot_path is not None): + if qq_plot_path is not None: qq_plot_path = absolute_path_of_the_file(qq_plot_path, True) create_qq_plot(worktable_path, qq_plot_path) @@ -155,7 +158,9 @@ def w_create_project_data(args): def w_create_inittable(args): input_data_path = absolute_path_of_the_file(args.input_data_path) init_covariance_path = absolute_path_of_the_file(args.init_covariance_path) - init_genetic_covariance_path = absolute_path_of_the_file(args.init_genetic_covariance_path) + init_genetic_covariance_path = absolute_path_of_the_file( + args.init_genetic_covariance_path + ) regions_map_path = absolute_path_of_the_file(args.regions_map_path) description_file_path = absolute_path_of_the_file(args.description_file_path) init_table_path = absolute_path_of_the_file(args.init_table_path, True) @@ -166,7 +171,7 @@ def w_create_inittable(args): description_file_path, init_table_path, init_covariance_path, - init_genetic_covariance_path + init_genetic_covariance_path, ) @@ -180,9 +185,9 @@ def w_plot_quadrant(args): worktable_path = absolute_path_of_the_file(args.worktable_path) plot_path = absolute_path_of_the_file(args.plot_path) significance_treshold = float(args.significance_treshold) - create_quadrant_plot(worktable_path, - plot_path, - significance_treshold=significance_treshold) + create_quadrant_plot( + worktable_path, plot_path, significance_treshold=significance_treshold + ) def w_gene_annotation(args): @@ -190,10 +195,9 @@ def w_gene_annotation(args): initTable_path = absolute_path_of_the_file(args.init_table_path, True) df_gene_csv_path = absolute_path_of_the_file(args.gene_csv_path, True) df_exon_csv_path = absolute_path_of_the_file(args.exon_csv_path, True) - add_gene_annotation(gene_data_path, - initTable_path, - df_gene_csv_path, - df_exon_csv_path) + add_gene_annotation( + gene_data_path, initTable_path, df_gene_csv_path, df_exon_csv_path + ) def get_parser(): @@ -256,7 +260,7 @@ def get_parser(): ) parser_create_pd.add_argument( "--significance-treshold", - default=5*10**-8, + default=5 * 10 ** -8, help="The treshold at which a p-value is considered significant", ) parser_create_pd.add_argument( @@ -279,27 +283,25 @@ def get_parser(): ) parser_create_pd.add_argument( - "--csv-file-path", - required=False, - help="path to the results file in csv format" + "--csv-file-path", required=False, help="path to the results file in csv format" ) parser_create_pd.add_argument( "--chromosome-number", required=False, - help="option used only for local analysis: chromosome number studied" + help="option used only for local analysis: chromosome number studied", ) parser_create_pd.add_argument( "--start-position", required=False, - help="option used only for local analysis: start position of the region studied" + help="option used only for local analysis: start position of the region studied", ) parser_create_pd.add_argument( "--end-position", required=False, - help="option used only for local analysis: end position of the region studied" + help="option used only for local analysis: end position of the region studied", ) strategies = parser_create_pd.add_mutually_exclusive_group() @@ -344,7 +346,7 @@ def get_parser(): parser_create_it.add_argument( "--init-genetic-covariance-path", default=None, - help = "path to the genetic covariance file to import", + help="path to the genetic covariance file to import", ) parser_create_it.set_defaults(func=w_create_inittable) # ------- create-worktable ------- @@ -366,13 +368,13 @@ def get_parser(): ) parser_create_wt.add_argument( "--significance-treshold", - default=5*10**-8, - help="threshold at which a p-value is considered significant" - ) + default=5 * 10 ** -8, + help="threshold at which a p-value is considered significant", + ) parser_create_wt.add_argument( "--post-filtering", default=True, - help="If a filtering to remove outlier will be applied (in this case the result of SNPs considered aberant will not appear in the worktable)" + help="If a filtering to remove outlier will be applied (in this case the result of SNPs considered aberant will not appear in the worktable)", ) parser_create_wt.add_argument( @@ -382,9 +384,7 @@ def get_parser(): ) parser_create_wt.add_argument( - "--csv-file-path", - required=False, - help="path to the results file in csv format" + "--csv-file-path", required=False, help="path to the results file in csv format" ) parser_create_wt.add_argument( @@ -399,19 +399,19 @@ def get_parser(): parser_create_wt.add_argument( "--chromosome-number", required=False, - help="option used only for local analysis: chromosome number studied" + help="option used only for local analysis: chromosome number studied", ) parser_create_wt.add_argument( "--start-position", required=False, - help="option used only for local analysis: start position of the region studied" + help="option used only for local analysis: start position of the region studied", ) parser_create_wt.add_argument( "--end-position", required=False, - help="option used only for local analysis: end position of the region studied" + help="option used only for local analysis: end position of the region studied", ) strategies = parser_create_wt.add_mutually_exclusive_group() @@ -435,9 +435,7 @@ def get_parser(): help="path to the worktable file containing the data", ) parser_create_mp.add_argument( - "--plot-path", - required=True, - help="path to the manhattan plot file to generate" + "--plot-path", required=True, help="path to the manhattan plot file to generate" ) parser_create_mp.set_defaults(func=w_plot_manhattan) @@ -452,21 +450,20 @@ def get_parser(): help="path to the worktable file containing the data", ) parser_create_mp.add_argument( - "--plot-path", - required=True, - help="path to the quadrant plot file to generate" + "--plot-path", required=True, help="path to the quadrant plot file to generate" ) parser_create_mp.add_argument( "--significance-treshold", - default=5*10**-8, - help="threshold at which a p-value is considered significant" + default=5 * 10 ** -8, + help="threshold at which a p-value is considered significant", ) parser_create_mp.set_defaults(func=w_plot_quadrant) # ------- add-gene-annotation ------- parser_create_mp = subparsers.add_parser( - "add-gene-annotation", help="add information about genes ansd exons to the inittable" + "add-gene-annotation", + help="add information about genes ansd exons to the inittable", ) parser_create_mp.add_argument( "--gene-data-path", @@ -476,17 +473,13 @@ def get_parser(): parser_create_mp.add_argument( "--init-table-path", required=True, - help="path to the initial table file to update" + help="path to the initial table file to update", ) parser_create_mp.add_argument( - "--gene-csv-path", - required=False, - help="path to the file df_gene.csv" + "--gene-csv-path", required=False, help="path to the file df_gene.csv" ) parser_create_mp.add_argument( - "--exon-csv-path", - required=False, - help="path to the file df_exon.csv" + "--exon-csv-path", required=False, help="path to the file df_exon.csv" ) parser_create_mp.set_defaults(func=w_gene_annotation) @@ -494,16 +487,16 @@ def get_parser(): def main(): - print("", file = sys.stderr) - print(" ** ******* ******* *******", file = sys.stderr) - print(" ** ** ** ** **", file = sys.stderr) - print(" ** ** ** ** **", file = sys.stderr) - print(" ** ** ** ****** ******", file = sys.stderr) - print(" ** *********** ** **", file = sys.stderr) - print(" ** ** ** ** ** **", file = sys.stderr) - print(" ******* ** ** ******* *******", file = sys.stderr) - print("", file = sys.stderr) - print("", file = sys.stderr) + print("", file=sys.stderr) + print(" ** ******* ******* *******", file=sys.stderr) + print(" ** ** ** ** **", file=sys.stderr) + print(" ** ** ** ** **", file=sys.stderr) + print(" ** ** ** ****** ******", file=sys.stderr) + print(" ** *********** ** **", file=sys.stderr) + print(" ** ** ** ** ** **", file=sys.stderr) + print(" ******* ** ** ******* *******", file=sys.stderr) + print("", file=sys.stderr) + print("", file=sys.stderr) parser = get_parser() args = parser.parse_args() args.func(args) diff --git a/jass/celeryconfig.py b/jass/celeryconfig.py index 2350b9a27b31c525734b5c7fddeadc038fc7ff6e..1530851f931f986320b72add4f8fa19d316d3ea5 100644 --- a/jass/celeryconfig.py +++ b/jass/celeryconfig.py @@ -1,17 +1,15 @@ import os ## Broker settings. -broker_url = os.getenv('JASS_RABBITMQ_URL','amqp://guest:guest@localhost:5672//') +broker_url = os.getenv("JASS_RABBITMQ_URL", "amqp://guest:guest@localhost:5672//") ## Broker settings. -#result_backend = os.getenv('JASS_RABBITMQ_URL','amqp://guest2:guest@localhost:5672//') -result_backend='rpc://' +# result_backend = os.getenv('JASS_RABBITMQ_URL','amqp://guest2:guest@localhost:5672//') +result_backend = "rpc://" # List of modules to import when the Celery worker starts. -#imports = ('myapp.tasks',) +# imports = ('myapp.tasks',) ## Using the database to store task state and results. -#result_backend = 'db+sqlite:///results.db' - -#task_annotations = {'tasks.add': {'rate_limit': '10/s'}} - +# result_backend = 'db+sqlite:///results.db' +# task_annotations = {'tasks.add': {'rate_limit': '10/s'}} diff --git a/jass/controllers/__init__.py b/jass/controllers/__init__.py deleted file mode 100644 index 0e94abd262a9ad33a4968618f3ba933bf97c6740..0000000000000000000000000000000000000000 --- a/jass/controllers/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -""" -the list of controller functions called by the JASS REST web services - -Submodules -========== - -.. autosummary:: - :toctree: _autosummary - - default_controller -""" diff --git a/jass/controllers/default_controller.py b/jass/controllers/default_controller.py deleted file mode 100644 index 8f1f7838e39e8f7abc47e63e132b08dd27f89528..0000000000000000000000000000000000000000 --- a/jass/controllers/default_controller.py +++ /dev/null @@ -1,234 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -default_controller ensures the connection between the web interface and the Python JASS-analysis module -""" -import os -from typing import List, Dict - -import connexion -from flask import send_file, abort -from six import iteritems - -from jass.config import config -from jass.models.project import Project -from jass.models.phenotype import Phenotype, get_available_phenotypes -from jass.tasks import create_project - -PHENOTYPES = get_available_phenotypes( - os.path.join(config["DATA_DIR"], "initTable.hdf5") -) # FIXME part of the config - - -def phenotypes_get(): - """ - phenotypes_get - Gets the list of available phenotypes - - :rtype: List[Phenotype] - """ - return PHENOTYPES - - -def projects_post(phenotypeID): - """ - projects_post - Create a new project from a selection of phenotypes - :param phenotypeID: IDs of the phenotypes selected for the project - :type phenotypeID: List[str] - - :rtype: str - """ - return create_project(phenotypeID, PHENOTYPES) - - -def local_project_post(phenotypeID, chromosome, start, end): - """ - local_project_post - Create a new local project from a chromosome number, start and end positions - and a selection of phenotypes - :param param: IDs of the phenotypes selected for the project - :type phenotypeID: List[str] - - :rtype: str - """ - return create_project(phenotypeID, PHENOTYPES, chromosome, start, end) - - -def projects_project_id_csv_status_get(projectID): - """ - projects_project_id_csv_status_get - Retrieve the generation status of the genome full csv file - :param projectID: project ID - :type projectID: str - - :rtype: str - """ - return Project(id=projectID).get_csv_file_generation() - - -def projects_project_id_summary_statistics(projectID): - """ - projects_project_id_summary_statistics - Retrieve project summary statistics - """ - return Project(id=projectID).get_project_summary_statistics() - - -def projects_project_id_genome_get(projectID, threshold=None): - """ - projects_project_id_genome_get - Retrieve genome data for a given project - :param projectID: project ID - :type projectID: str - - :rtype: str - """ - return Project(id=projectID).get_project_genomedata() - - -def projects_project_id_global_manhattan_plot_get(projectID): - """ - projects_project_id_global_manhattan_plot_get - Gets the global Manhattan plot stored in the Project folder to display it on the Web interface - """ - try: - return send_file( - Project(id=projectID).get_global_manhattan_plot_path(), mimetype="image/png" - ) - except FileNotFoundError: - status = Project(id=projectID).status - if status == Project.DOES_NOT_EXIST: - abort(404) - elif status["global_manhattan"] == Project.CREATING: - return ( - "Plot is not ready yet", - 202, - {"Content-Type": "text/plain; charset=utf-8"}, - ) - else: - abort(500) - - -def projects_project_id_zoom_plot_get(projectID): - """ - projects_project_id_zoom_plot_get - Gets the zoom plot stored in the local Project folder to display it on the Web interface - """ - try: - return send_file( - Project(id=projectID).get_zoom_plot_path(), mimetype="image/png" - ) - except FileNotFoundError: - status = Project(id=projectID).status - if status == Project.DOES_NOT_EXIST: - abort(404) - elif status["zoom_plot"] == Project.CREATING: - return ( - "Plot is not ready yet", - 202, - {"Content-Type": "text/plain; charset=utf-8"}, - ) - else: - abort(500) - - -def projects_project_id_quadrant_plot_get(projectID): - """ - projects_project_id_quadrant_plot_get - Gets the quadrant plot stored in the Project folder to display it on the Web interface - """ - try: - return send_file( - Project(id=projectID).get_quadrant_plot_path(), mimetype="image/png" - ) - except FileNotFoundError: - status = Project(id=projectID).status - if status == Project.DOES_NOT_EXIST: - abort(404) - elif status["quadrant_plot_status"] == Project.CREATING: - return ( - "Plot is not ready yet", - 202, - {"Content-Type": "text/plain; charset=utf-8"}, - ) - else: - abort(500) - - -def projects_project_id_genome_full_get(projectID): - """ - projects_project_id_genome_full_get - Downloads the file genome_full.csv stored in the Project folder - """ - - Type_of_Analysis = Project(id=projectID).get_type_of_analysis() - if(Type_of_Analysis == Project.LOCAL_ANALYSIS): - Fichier = "local_analysis_result.csv" - else: - Fichier = "genome_full.csv" - - try: - return send_file( - Project(id=projectID).get_csv_path(), - mimetype = "text/csv", - as_attachment = True, - attachment_filename = Fichier - ) - except FileNotFoundError: - status = Project(id=projectID).status - if status == Project.DOES_NOT_EXIST: - abort(404) - elif status["worktable"] == Project.CREATING: - return ( - "CSV is not ready yet", - 202, - {"Content-Type": "text/plain; charset=utf-8"}, - ) - else: - abort(500) - - -def projects_project_id_local_manhattan_data_get(projectID, chromosome, region): - """ - projects_project_id_local_manhattan_data_get - Return the SumStatTab dataframe of the Project for a given chromosome and region for the Manhattan plot - """ - return Project(id=projectID).get_project_local_manhattan_data(chromosome, region) - - -def projects_project_id_local_heatmap_data_get(projectID, chromosome, region): - """ - projects_project_id_local_heatmap_data_get - Return the SumStatTab dataframe of the Project for a given chromosome and region for the Heatmap plot - """ - return Project(id=projectID).get_project_local_heatmap_data(chromosome, region) - - -def projects_project_id_zoom_manhattan_data_get(projectID): - """ - projects_project_id_local_manhattan_data_get - Return the SumStatTab dataframe of the Project for a given chromosome and region for the Manhattan plot - """ - print("... projects_project_id_zoom_manhattan_data_get({})".format(projectID)) - return Project(id=projectID).get_project_local_manhattan_data() - - -def projects_project_id_zoom_heatmap_data_get(projectID): - """ - projects_project_id_local_heatmap_data_get - Return the SumStatTab dataframe of the Project for a given chromosome and region for the Heatmap plot - """ - return Project(id=projectID).get_project_local_heatmap_data() - - -def projects_project_idget(projectID): - """ - projects_project_idget - Retrieve a project definition - :param projectID: project ID - :type projectID: str - - :rtype: Phenotype - """ - return Project(id=projectID) diff --git a/jass/encoder.py b/jass/encoder.py deleted file mode 100644 index 175502a4df6e905113b701e38214b296e82af577..0000000000000000000000000000000000000000 --- a/jass/encoder.py +++ /dev/null @@ -1,25 +0,0 @@ -from connexion.apps.flask_app import FlaskJSONEncoder - -# from connexion.decorators import produces -from six import iteritems -from jass.models.base_model_ import Model -from pandas import isnull - - -class JSONEncoder(FlaskJSONEncoder): - include_nulls = False - - def default(self, o): - if isinstance(o, Model): - dikt = {} - for attr, _ in iteritems(o.swagger_types): - value = getattr(o, attr) - if not (isinstance(value, list)) and (value is None or isnull(value)): - if not self.include_nulls: - continue - else: - return None - attr = o.attribute_map[attr] - dikt[attr] = value - return dikt - return produces.JSONEncoder.default(self, o) diff --git a/jass/models/inittable.py b/jass/models/inittable.py index 15fcd0fb051bf73ecb2578efc1122ff45cafc8f2..7a2849dd59bec963d53866aab2f3754a72b40cf3 100644 --- a/jass/models/inittable.py +++ b/jass/models/inittable.py @@ -18,6 +18,7 @@ from functools import reduce options.mode.chained_assignment = None warnings.filterwarnings("ignore", category=tables.NaturalNameWarning) + def get_gwasname(file_name): return "_".join(os.path.basename(file_name).split("_")[0:3]) @@ -33,9 +34,9 @@ def check_if_SNP_unique(z_gwas_chrom): def get_gwas_dict(input_data_path): gwas_dict = {} - #retrieve all files corresponding to glob patterns + # retrieve all files corresponding to glob patterns files_by_pattern = list(map(glob.glob, input_data_path.split(";"))) - all_path = reduce(lambda x,y:x+y, files_by_pattern) + all_path = reduce(lambda x, y: x + y, files_by_pattern) print(all_path) for x in all_path: gwas_name = get_gwasname(x) @@ -60,17 +61,27 @@ def create_pheno_summary(description): "internalDataLink", "Nsample", "Ncase", - "Ncontrol" + "Ncontrol", ] ] - pheno_summary_data["ID"] = "z_" + pheno_summary_data.Consortium.str.upper() + "_" + pheno_summary_data.Outcome.str.upper() + pheno_summary_data["ID"] = ( + "z_" + + pheno_summary_data.Consortium.str.upper() + + "_" + + pheno_summary_data.Outcome.str.upper() + ) - is_quantitatif = pheno_summary_data['Ncase'].isnull() + is_quantitatif = pheno_summary_data["Ncase"].isnull() pheno_summary_data["Effective_sample_size"] = np.nan - pheno_summary_data.loc[~is_quantitatif,"Effective_sample_size"] = (pheno_summary_data.loc[~is_quantitatif, "Ncase"]*pheno_summary_data.loc[~is_quantitatif, "Ncontrol"]) / pheno_summary_data.loc[~is_quantitatif, "Nsample"] + pheno_summary_data.loc[~is_quantitatif, "Effective_sample_size"] = ( + pheno_summary_data.loc[~is_quantitatif, "Ncase"] + * pheno_summary_data.loc[~is_quantitatif, "Ncontrol"] + ) / pheno_summary_data.loc[~is_quantitatif, "Nsample"] - pheno_summary_data.loc[is_quantitatif,"Effective_sample_size"] = pheno_summary_data.loc[is_quantitatif, "Nsample"] + pheno_summary_data.loc[ + is_quantitatif, "Effective_sample_size" + ] = pheno_summary_data.loc[is_quantitatif, "Nsample"] # reorder columns in the dataframe pheno_summary_data = pheno_summary_data[ @@ -87,7 +98,7 @@ def create_pheno_summary(description): "Nsample", "Ncase", "Ncontrol", - "Effective_sample_size" + "Effective_sample_size", ] ] pheno_summary_data.index = pheno_summary_data["ID"] @@ -121,21 +132,23 @@ def format_chr_gwas(gwas_file_chri, chrom, study_name, regions_bychr): z_gwas["Region"] = 0 z_gwas["MiddlePosition"] = 0.0 - for region_index, region_row in regions_bychr.get_group( - "chr%d" % chrom - ).iterrows(): + for region_index, region_row in regions_bychr.get_group("chr%d" % chrom).iterrows(): left = region_row["start"] right = region_row["stop"] ind = (z_gwas["position"] >= left) & (z_gwas["position"] <= right) (z_gwas.loc[ind, "Region"]) = np.int(region_index + 1) (z_gwas.loc[ind, "MiddlePosition"]) = (left + right) / 2 - return(z_gwas) + return z_gwas def compute_covariance_zscore(init_file_path): print("## Compute covariance ##") - sum_stat_jost_tab = read_hdf(init_file_path, 'SumStatTab', where='Region >= {0} and Region < {1}'.format(0, 3)) - trait = [i for i in sum_stat_jost_tab.columns if i[:2]=="z_"] + sum_stat_jost_tab = read_hdf( + init_file_path, + "SumStatTab", + where="Region >= {0} and Region < {1}".format(0, 3), + ) + trait = [i for i in sum_stat_jost_tab.columns if i[:2] == "z_"] NSNP_matrix = DataFrame(index=trait, columns=trait) cov_matrix = DataFrame(index=trait, columns=trait) @@ -143,31 +156,35 @@ def compute_covariance_zscore(init_file_path): cov_matrix.fillna(0, inplace=True) NSNP_matrix.fillna(0, inplace=True) - bi = range(0,1751,50) - n_len = len(bi)-1 + bi = range(0, 1751, 50) + n_len = len(bi) - 1 for i in range(n_len): binf = bi[i] - bsup = bi[(i+1)] - sum_stat_jost_tab = read_hdf(init_file_path, 'SumStatTab', where='Region >= {0} and Region < {1}'.format(binf, bsup)) + bsup = bi[(i + 1)] + sum_stat_jost_tab = read_hdf( + init_file_path, + "SumStatTab", + where="Region >= {0} and Region < {1}".format(binf, bsup), + ) print("Regions {0} to {1}\r".format(binf, bsup)) j = 0 for tr1 in trait: for tr2 in trait[j:]: - cc = sum_stat_jost_tab[[tr1,tr2]].dropna() + cc = sum_stat_jost_tab[[tr1, tr2]].dropna() cc = cc.loc[cc.max(1) < 4] - cov_matrix.loc[tr1,tr2] += cc.iloc[:,0].dot(cc.iloc[:,1]) - NSNP_matrix.loc[tr1,tr2] += cc.shape[0] + cov_matrix.loc[tr1, tr2] += cc.iloc[:, 0].dot(cc.iloc[:, 1]) + NSNP_matrix.loc[tr1, tr2] += cc.shape[0] - cov_matrix.loc[tr2,tr1] += cc.iloc[:,0].dot(cc.iloc[:,1]) - NSNP_matrix.loc[tr2,tr1] += cc.shape[0] - j=j+1 + cov_matrix.loc[tr2, tr1] += cc.iloc[:, 0].dot(cc.iloc[:, 1]) + NSNP_matrix.loc[tr2, tr1] += cc.shape[0] + j = j + 1 - #(cov_matrix/NSNP_matrix).to_csv("Covariance_on_Zscores.csv", sep="\t") + # (cov_matrix/NSNP_matrix).to_csv("Covariance_on_Zscores.csv", sep="\t") hdf_init = HDFStore(init_file_path) - hdf_init.put("COV", (cov_matrix/NSNP_matrix), format="table", data_columns=True) + hdf_init.put("COV", (cov_matrix / NSNP_matrix), format="table", data_columns=True) hdf_init.close() @@ -177,18 +194,20 @@ def create_inittable_file( description_file_path: str, init_table_path: str, init_covariance_path=None, - init_genetic_covariance_path=None + init_genetic_covariance_path=None, ): # Read region file - regions = read_csv(regions_map_path, sep='\s+', memory_map=True) + regions = read_csv(regions_map_path, sep="\s+", memory_map=True) # Create HDFStore if os.path.exists(init_table_path): os.remove(init_table_path) hdf_init = HDFStore(init_table_path) # Read covariance file - if init_covariance_path!=None: - covariance = read_csv(init_covariance_path, sep="\t", index_col=0, memory_map=True) + if init_covariance_path != None: + covariance = read_csv( + init_covariance_path, sep="\t", index_col=0, memory_map=True + ) compute_covariance = False else: compute_covariance = True @@ -206,18 +225,20 @@ def create_inittable_file( "z_" + meta_row["Consortium"].upper() + "_" + meta_row["Outcome"].upper() ) - pheno_select = list(phenotypes_list) & gwas_dict.keys() #&covariance.columns + pheno_select = list(phenotypes_list) & gwas_dict.keys() # &covariance.columns pheno_summary_data = create_pheno_summary(description) # select only phenotypes for which there is a covariance pheno_list = pheno_summary_data.loc[pheno_select, :] - if compute_covariance==False: + if compute_covariance == False: COV = covariance.loc[pheno_select, pheno_select] hdf_init.put("COV", COV, format="table", data_columns=True) # Read Genetic Covariance file and add genetic correlation if available - if init_genetic_covariance_path!=None: - genetic_covariance = read_csv(init_genetic_covariance_path, sep="\t", index_col=0, memory_map=True) + if init_genetic_covariance_path != None: + genetic_covariance = read_csv( + init_genetic_covariance_path, sep="\t", index_col=0, memory_map=True + ) GEN_COV = genetic_covariance.loc[pheno_select, pheno_select] hdf_init.put("GEN_COV", GEN_COV, format="table", data_columns=True) @@ -234,7 +255,7 @@ def create_inittable_file( which_cols.extend(list(pheno_select)) hdf_init.put("PhenoList", pheno_list, format="table", data_columns=True) hdf_init.put("Regions", regions, format="table", data_columns=True) - sum_stat_tab_min_itemsizes = {"snp_ids": 80, "Ref_allele":70,"Alt_allele":70} + sum_stat_tab_min_itemsizes = {"snp_ids": 80, "Ref_allele": 70, "Alt_allele": 70} regions_bychr = regions.groupby("chr") @@ -326,16 +347,15 @@ def create_inittable_file( compute_covariance_zscore(init_table_path) -def add_gene_annotation(gene_data_path, - initTable_path=None, - df_gene_csv_path=None, - df_exon_csv_path=None): +def add_gene_annotation( + gene_data_path, initTable_path=None, df_gene_csv_path=None, df_exon_csv_path=None +): """ add_gene_annotation - for the first 22 chromosomes, retrieves the label of the genes + for the first 22 chromosomes, retrieves the label of the genes and their position as well as those of the exons associated with the genes. Then store this information in a hdf5 file - + :param gene_data_path: path to the GFF file containing gene and exon data (for example, GRCh37_latest_genomic.gff) :type gene_data_path: str :param initTable_path: path to the file initTable.hdf5 @@ -344,7 +364,7 @@ def add_gene_annotation(gene_data_path, :type df_gene_csv_path: str :param df_exon_csv_path: path to the file df_exon.csv :type df_exon_csv_path: str - + :return: the dataframes df_gene and df_exon :rtype: 2 PANDAS dataframes """ @@ -355,7 +375,7 @@ def add_gene_annotation(gene_data_path, gene_start = [] gene_end = [] gene_direction = [] - + # lists containing exon data exon_id_label = [] exon_GeneID = [] @@ -363,7 +383,7 @@ def add_gene_annotation(gene_data_path, exon_start = [] exon_end = [] exon_direction = [] - + # temporary list containing the data of all exons TMP__exon_id_label = [] TMP__exon_GeneID = [] @@ -371,18 +391,18 @@ def add_gene_annotation(gene_data_path, TMP__exon_start = [] TMP__exon_end = [] TMP__exon_direction = [] - + fichier = open(gene_data_path, "r") lignes = fichier.readlines() fichier.close() - + for ligne in lignes: elements = ligne.split("\t") - if elements[0].startswith('NC_'): + if elements[0].startswith("NC_"): decode_chr = elements[0].strip("NC_").split(".") chr = int(decode_chr[0]) - if (chr <= 22): - if (elements[2] == "gene"): + if chr <= 22: + if elements[2] == "gene": gene_chr.append(chr) gene_start.append(int(elements[3])) gene_end.append(int(elements[4])) @@ -394,8 +414,8 @@ def add_gene_annotation(gene_data_path, decode_id_4 = decode_id_3[0].split("=") decode_id_5 = decode_id_4[1].split(":") gene_GeneID.append(decode_id_5[1]) - - elif (elements[2] == "exon"): + + elif elements[2] == "exon": TMP__exon_chr.append(chr) TMP__exon_start.append(int(elements[3])) TMP__exon_end.append(int(elements[4])) @@ -407,49 +427,56 @@ def add_gene_annotation(gene_data_path, decode_id_4 = decode_id_3[0].split("=") decode_id_5 = decode_id_4[1].split(":") TMP__exon_GeneID.append(decode_id_5[1]) - - - # We only keep the exons that correspond to a gene + + # We only keep the exons that correspond to a gene for i in range(len(TMP__exon_id_label)): - if (TMP__exon_GeneID[i] in gene_GeneID) : + if TMP__exon_GeneID[i] in gene_GeneID: exon_id_label.append(TMP__exon_id_label[i]) exon_GeneID.append(TMP__exon_GeneID[i]) exon_chr.append(TMP__exon_chr[i]) exon_start.append(TMP__exon_start[i]) exon_end.append(TMP__exon_end[i]) exon_direction.append(TMP__exon_direction[i]) - + # We insert genes and exons into dataframes - df_gene = DataFrame({"Chr": gene_chr, - "GeneID" : gene_GeneID, - "gene_label" : gene_id_label, - "start" : gene_start, - "end" : gene_end, - "direction" : gene_direction}) - df_exon = DataFrame({"Chr": exon_chr, - "exon_label" : exon_id_label, - "GeneID" : exon_GeneID, - "start" : exon_start, - "end" : exon_end, - "direction" : exon_direction}) - + df_gene = DataFrame( + { + "Chr": gene_chr, + "GeneID": gene_GeneID, + "gene_label": gene_id_label, + "start": gene_start, + "end": gene_end, + "direction": gene_direction, + } + ) + df_exon = DataFrame( + { + "Chr": exon_chr, + "exon_label": exon_id_label, + "GeneID": exon_GeneID, + "start": exon_start, + "end": exon_end, + "direction": exon_direction, + } + ) + # The rows of the dataframes are sorted by chromosome number and start position (and end position for exon) df_gene.sort_values(by=["Chr", "start"], inplace=True, ignore_index=True) df_exon.sort_values(by=["Chr", "start", "end"], inplace=True, ignore_index=True) - + # This information about genes and exons is stored in an hdf5 file if possible - if (initTable_path is not None): + if initTable_path is not None: hdf_file = HDFStore(initTable_path) hdf_file.put("Gene", df_gene, format="table", data_columns=True) hdf_file.put("Exon", df_exon, format="table", data_columns=True) hdf_file.close() - + # Dataframe df_gene is stored in a csv file if possible - if (df_gene_csv_path is not None): + if df_gene_csv_path is not None: df_gene.to_csv(df_gene_csv_path) - + # Dataframe df_exon is stored in a csv file if possible - if (df_exon_csv_path is not None): + if df_exon_csv_path is not None: df_exon.to_csv(df_exon_csv_path) - - return (df_gene, df_exon) \ No newline at end of file + + return (df_gene, df_exon) diff --git a/jass/models/project.py b/jass/models/project.py index d2ddd6f8b8dbe44246061fe17df2aa08ce517d79..15a1eb540a2bc5aa45951bbd7d89848b8927c650 100644 --- a/jass/models/project.py +++ b/jass/models/project.py @@ -1,279 +1,289 @@ -# -*- coding: utf-8 -*- -""" -compute joint statistics and generate plots for a given set of phenotypes -""" -from __future__ import absolute_import -from typing import List, Dict -import os, sys -import shutil -import hashlib -import traceback - -from jass.models.base_model_ import Model -from jass.util import deserialize_model -from jass.models.phenotype import Phenotype -from jass.models.worktable import (get_worktable_summary, - get_worktable_genomedata, - get_worktable_local_manhattan_data, - get_worktable_local_heatmap_data) - -from jass.config import config - -class Project(Model): - - DOES_NOT_EXIST = "DOES_NOT_EXIST" - - CREATING = "CREATING" - - READY = "READY" - - ERROR = "ERROR" - - # Type of analysis - LOCAL_ANALYSIS = "LOCAL_ANALYSIS" - GENOME_WIDE_ANALYSIS = "GENOME_WIDE_ANALYSIS" - - # Upper bound of the chromosome length (bp) - K_POS_MAX = 250000000 - - def __init__(self, id: str = None, - phenotypes: List[Phenotype] = None, - chromosome: str = None, - start: str = None, - end: str = None): - """ - Project - a project (list of phenotypes) - - :param id: project ID. - :type id: str - """ - self.swagger_types = {"id": str, - "status": str, - "phenotypes": List[Phenotype], - "progress": str} - - self.attribute_map = { - "id": "id", - "status": "status", - "phenotypes": "phenotypes", - "progress": "progress", - } - - self._phenotypes = phenotypes - - self._id = id - - if self._id is None: - self._id = self.get_id(chromosome, start, end) - - @classmethod - def from_dict(cls, dikt) -> "Project": - """ - Returns the dict as a model - - :param dikt: A dict. - :type: dict - :return: The Project. - :rtype: Project - """ - return deserialize_model(dikt, cls) - - @property - def id(self) -> str: - """ - Gets the id of this Project. - - :return: The id of this Project. - :rtype: str - """ - return self._id - - @id.setter - def id(self, id: str): - """ - Lists the id of this Project. - - :param id: The id of this Project. - :type id: str - """ - - self._id = id - - @property - def phenotypes(self) -> List[Phenotype]: - """ - Gets the phenotypes list for this project. - - :return: The phenotypes. - :rtype: str - """ - return self._phenotypes - - @phenotypes.setter - def cohort(self, phenotypes: List[Phenotype]): - """ - Lists the phenotypes list for this project. - - :param phenotypes: The phenotypes. - :type phenotypes: str - """ - - self._phenotypes = phenotypes - - def get_type_of_analysis(self): - """ - get_type_of_analysis - Gets the type of analysis : local or genome wide - """ - if((self._id).split("_")[0] == "local"): - return Project.LOCAL_ANALYSIS - else : - return Project.GENOME_WIDE_ANALYSIS - - def get_folder_path(self): - """ - get_folder_path - Gets the path of the folder where the project data are stored - """ - return os.path.join(config["DATA_DIR"], "project_{}".format(self.id)) - - def get_worktable_path(self): - """ - get_worktable_path - Gets the path of the file workTable.hdf5 - """ - return os.path.join(self.get_folder_path(), "workTable.hdf5") - - def get_csv_path(self): - """ - get_csv_path - Gets the path of the file genome_full.csv - """ - return os.path.join(self.get_folder_path(), "workTable.csv") - - def get_progress_path(self): - """ - get_progress_path - Gets the path of the file containing the current progress percentage of \ - the analysis performed within the project - """ - return os.path.join(self.get_folder_path(), "JASS_progress.txt") - - def get_csv_lock_path(self): - """ - get_csv_lock_path - Gets the path of the lock set-on when the csv file is not available yet - """ - return os.path.join(self.get_folder_path(), "the_lock.txt") - - def get_project_summary_statistics(self): - return get_worktable_summary(self.get_worktable_path()) - - def get_project_genomedata(self): - return get_worktable_genomedata(self.get_worktable_path()) - - def get_project_local_manhattan_data(self, chromosome: str = None, region: str = None): - print("project::get_project_local_manhattan_data") - return get_worktable_local_manhattan_data( - self.get_worktable_path(), chromosome, region - ) - - def get_project_local_heatmap_data(self, chromosome: str = None, region: str = None): - return get_worktable_local_heatmap_data( - self.get_worktable_path(), chromosome, region - ) - - def get_id(self, chromosome=None, start=None, end=None): - m = hashlib.md5() - for phenotype_id in [phenotype.id for phenotype in self._phenotypes]: - m.update(str(phenotype_id).encode("utf-8")) - - if (chromosome is not None): - # Local analysis - if start is None: - start = 0 - if end is None: - end = Project.K_POS_MAX - Loc_An = "_{}_{}_{}".format(chromosome, start, end) - m.update(str(Loc_An).encode("utf-8")) - id_project = "local_{}".format(m.hexdigest()) - else: - id_project = m.hexdigest() - - return id_project - - def get_global_manhattan_plot_path(self): - return os.path.join(self.get_folder_path(), "Manhattan_Plot_Omnibus.png") - - def get_quadrant_plot_path(self): - return os.path.join(self.get_folder_path(), "Quadrant_Plot_Omnibus.png") - - def get_zoom_plot_path(self): - return os.path.join(self.get_folder_path(), "Zoom_Plot_Omnibus.png") - - - @property - def status(self): - """ - status - Gets the status of the project - """ - if not os.path.exists(self.get_folder_path()): - return Project.DOES_NOT_EXIST - else: - if (self.get_type_of_analysis() == Project.LOCAL_ANALYSIS): - Return_status = get_file_status(self.get_zoom_plot_path()) - else: - Return_status = get_file_status(self.get_quadrant_plot_path()) - - return { - # WARNING: project status is hacked so that everything is ready - # only once the final step has completed. - # This avoids the apparent "corrupted hdf5" file situation - "worktable": Return_status, - "global_manhattan": Return_status, - "quadrant_plot_status": Return_status, - "zoom_plot": Return_status - } - - @property - def progress(self): - """ - progress - Gets the percentage of completion of the phenotype analysis - """ - JASS_progress = 0 - progress_path = self.get_progress_path() - if os.path.exists(progress_path): - file_progress = open(progress_path, "r") - JASS_progress = file_progress.read() - file_progress.close() - return JASS_progress - - def get_csv_file_generation(self): - """ - csv_file_generation - Gets the status of the genome_full csv file generation - """ - the_lock_path = self.get_csv_lock_path() - csv_file = self.get_csv_path() - csv_file_status = Project.CREATING - if (not os.path.isfile(the_lock_path)): - if(os.path.isfile(csv_file)): - csv_file_status = Project.READY - else : - csv_file_status = Project.ERROR - print("csv_file_generation:csv_file_status={}".format(csv_file_status)) - return csv_file_status - -def get_file_building_tb_path(file_path): - return file_path + ".log" - - -def get_file_status(file_path): - if os.path.exists(file_path): - return Project.READY - elif os.path.exists(get_file_building_tb_path(file_path)): - return Project.ERROR - else: - return Project.CREATING \ No newline at end of file +# -*- coding: utf-8 -*- +""" +compute joint statistics and generate plots for a given set of phenotypes +""" +from __future__ import absolute_import +from typing import List +import os +import hashlib + +from jass.models.base_model_ import Model +from jass.util import deserialize_model +from jass.models.phenotype import Phenotype +from jass.models.worktable import ( + get_worktable_summary, + get_worktable_genomedata, + get_worktable_local_manhattan_data, + get_worktable_local_heatmap_data, +) + +from jass.config import config + + +class Project(Model): + + DOES_NOT_EXIST = "DOES_NOT_EXIST" + + CREATING = "CREATING" + + READY = "READY" + + ERROR = "ERROR" + + # Type of analysis + LOCAL_ANALYSIS = "LOCAL_ANALYSIS" + GENOME_WIDE_ANALYSIS = "GENOME_WIDE_ANALYSIS" + + # Upper bound of the chromosome length (bp) + K_POS_MAX = 250000000 + + def __init__( + self, + id: str = None, + phenotypes: List[Phenotype] = None, + chromosome: str = None, + start: str = None, + end: str = None, + ): + """ + Project - a project (list of phenotypes) + + :param id: project ID. + :type id: str + """ + self.swagger_types = { + "id": str, + "status": str, + "phenotypes": List[Phenotype], + "progress": str, + } + + self.attribute_map = { + "id": "id", + "status": "status", + "phenotypes": "phenotypes", + "progress": "progress", + } + + self._phenotypes = phenotypes + + self._id = id + + if self._id is None: + self._id = self.get_id(chromosome, start, end) + + @classmethod + def from_dict(cls, dikt) -> "Project": + """ + Returns the dict as a model + + :param dikt: A dict. + :type: dict + :return: The Project. + :rtype: Project + """ + return deserialize_model(dikt, cls) + + @property + def id(self) -> str: + """ + Gets the id of this Project. + + :return: The id of this Project. + :rtype: str + """ + return self._id + + @id.setter + def id(self, id: str): + """ + Lists the id of this Project. + + :param id: The id of this Project. + :type id: str + """ + + self._id = id + + @property + def phenotypes(self) -> List[Phenotype]: + """ + Gets the phenotypes list for this project. + + :return: The phenotypes. + :rtype: str + """ + return self._phenotypes + + @phenotypes.setter + def cohort(self, phenotypes: List[Phenotype]): + """ + Lists the phenotypes list for this project. + + :param phenotypes: The phenotypes. + :type phenotypes: str + """ + + self._phenotypes = phenotypes + + def get_type_of_analysis(self): + """ + get_type_of_analysis + Gets the type of analysis : local or genome wide + """ + if (self._id).split("_")[0] == "local": + return Project.LOCAL_ANALYSIS + else: + return Project.GENOME_WIDE_ANALYSIS + + def get_folder_path(self): + """ + get_folder_path + Gets the path of the folder where the project data are stored + """ + return os.path.join(config["DATA_DIR"], "project_{}".format(self.id)) + + def get_worktable_path(self): + """ + get_worktable_path + Gets the path of the file workTable.hdf5 + """ + return os.path.join(self.get_folder_path(), "workTable.hdf5") + + def get_csv_path(self): + """ + get_csv_path + Gets the path of the file genome_full.csv + """ + return os.path.join(self.get_folder_path(), "workTable.csv") + + def get_progress_path(self): + """ + get_progress_path + Gets the path of the file containing the current progress percentage + of the analysis performed within the project + """ + return os.path.join(self.get_folder_path(), "JASS_progress.txt") + + def get_csv_lock_path(self): + """ + get_csv_lock_path + Gets the path of the lock set-on when the csv file is not available yet + """ + return os.path.join(self.get_folder_path(), "the_lock.txt") + + def get_project_summary_statistics(self): + return get_worktable_summary(self.get_worktable_path()) + + def get_project_genomedata(self): + return get_worktable_genomedata(self.get_worktable_path()) + + def get_project_local_manhattan_data( + self, chromosome: str = None, region: str = None + ): + print("project::get_project_local_manhattan_data") + return get_worktable_local_manhattan_data( + self.get_worktable_path(), chromosome, region + ) + + def get_project_local_heatmap_data( + self, chromosome: str = None, region: str = None + ): + return get_worktable_local_heatmap_data( + self.get_worktable_path(), chromosome, region + ) + + def get_id(self, chromosome=None, start=None, end=None): + m = hashlib.md5() + for phenotype_id in [phenotype.id for phenotype in self._phenotypes]: + m.update(str(phenotype_id).encode("utf-8")) + + if chromosome is not None: + # Local analysis + if start is None: + start = 0 + if end is None: + end = Project.K_POS_MAX + Loc_An = "_{}_{}_{}".format(chromosome, start, end) + m.update(str(Loc_An).encode("utf-8")) + id_project = "local_{}".format(m.hexdigest()) + else: + id_project = m.hexdigest() + + return id_project + + def get_global_manhattan_plot_path(self): + return os.path.join(self.get_folder_path(), "Manhattan_Plot_Omnibus.png") + + def get_quadrant_plot_path(self): + return os.path.join(self.get_folder_path(), "Quadrant_Plot_Omnibus.png") + + def get_zoom_plot_path(self): + return os.path.join(self.get_folder_path(), "Zoom_Plot_Omnibus.png") + + @property + def status(self): + """ + status + Gets the status of the project + """ + if not os.path.exists(self.get_folder_path()): + return Project.DOES_NOT_EXIST + else: + if self.get_type_of_analysis() == Project.LOCAL_ANALYSIS: + Return_status = get_file_status(self.get_zoom_plot_path()) + else: + Return_status = get_file_status(self.get_quadrant_plot_path()) + + return { + # WARNING: project status is hacked so that everything is ready + # only once the final step has completed. + # This avoids the apparent "corrupted hdf5" file situation + "worktable": Return_status, + "global_manhattan": Return_status, + "quadrant_plot_status": Return_status, + "zoom_plot": Return_status, + } + + @property + def progress(self): + """ + progress + Gets the percentage of completion of the phenotype analysis + """ + JASS_progress = 0 + progress_path = self.get_progress_path() + if os.path.exists(progress_path): + file_progress = open(progress_path, "r") + JASS_progress = file_progress.read() + file_progress.close() + return JASS_progress + + def get_csv_file_generation(self): + """ + csv_file_generation + Gets the status of the genome_full csv file generation + """ + the_lock_path = self.get_csv_lock_path() + csv_file = self.get_csv_path() + csv_file_status = Project.CREATING + if not os.path.isfile(the_lock_path): + if os.path.isfile(csv_file): + csv_file_status = Project.READY + else: + csv_file_status = Project.ERROR + print("csv_file_generation:csv_file_status={}".format(csv_file_status)) + return csv_file_status + + +def get_file_building_tb_path(file_path): + return file_path + ".log" + + +def get_file_status(file_path): + if os.path.exists(file_path): + return Project.READY + elif os.path.exists(get_file_building_tb_path(file_path)): + return Project.ERROR + else: + return Project.CREATING diff --git a/jass/models/stats.py b/jass/models/stats.py index ff2649b5b1d8d6e5c0021c9de22f58cba6b3e0e1..8d6b815106164ee809df48ea98f815c8ba9a1b01 100644 --- a/jass/models/stats.py +++ b/jass/models/stats.py @@ -16,21 +16,22 @@ def make_stat_computer_nopattern(cov, stat_func, **kwargs): :rtype: function """ # invcov is only computed once - invcov = np.linalg.pinv(cov, rcond=0.001)#np.linalg.inv(cov) + invcov = np.linalg.pinv(cov, rcond=0.001) # np.linalg.inv(cov) def compute(z): - return stat_func(z, cov, invcov,**kwargs) + return stat_func(z, cov, invcov, **kwargs) + return compute def make_stat_computer_pattern(cov, stat_func): """ Create the function that computes the joint statistics if NaN values are in z - and if the number of selected phenotypes is less than or equal to 16. + and if the number of selected phenotypes is less than or equal to 16. It uses a covariance matrix corresponding to the pattern of non-NaN values in z. - This function is implemented using the currying technique: - the first part which declares the data structure and the stat function + This function is implemented using the currying technique: + the first part which declares the data structure and the stat function is called only once while the second part (compute) is called for each pattern. :param cov: covariance matrix :type cov: pandas.core.frame.DataFrame @@ -41,13 +42,15 @@ def make_stat_computer_pattern(cov, stat_func): """ if not stat_func.can_use_pattern: raise ValueError("this computation strategy cannot be used with patterns") - + # invcov_bypattern is a dictionary of invcovs where the key is the # corresponding pattern of non-NaN values in z invcov_bypattern = {} def compute(z, pattern_code): - z_na_bool = z.iloc[0,].notnull() + z_na_bool = z.iloc[ + 0, + ].notnull() if pattern_code in invcov_bypattern: invcov = invcov_bypattern[pattern_code] else: @@ -67,8 +70,8 @@ def make_stat_computer_pattern_big(cov, stat_func): and if the number of selected phenotypes is greater than or equal to 17. It uses a covariance matrix corresponding to the pattern of non-NaN values in z. - This function is implemented using the currying technique: - the first part which declares the data structure and the stat function + This function is implemented using the currying technique: + the first part which declares the data structure and the stat function is called only once while the second part (compute) is called for each pattern. :param cov: covariance matrix :type cov: numpy.ndarray @@ -87,7 +90,7 @@ def make_stat_computer_pattern_big(cov, stat_func): if pattern_code in invcov_bypattern: invcov = invcov_bypattern[pattern_code] else: - mini_cov = (cov.take(Num,axis=1)).take(Num,axis=0) + mini_cov = (cov.take(Num, axis=1)).take(Num, axis=0) invcov = np.linalg.pinv(mini_cov, rcond=0.001) invcov_bypattern[pattern_code] = invcov @@ -111,6 +114,7 @@ def make_stat_computer_nan_dumb(cov, stat_func): return compute + def omnibus_stat(z, cov, invcov): """ joint statistics "omnibus" strategy @@ -163,7 +167,7 @@ def fisher_test(z, cov, invcov): print(z) print(cov) print(invcov) - p_val = 2*spst.norm.sf(np.abs(z)) + p_val = 2 * spst.norm.sf(np.abs(z)) stat = -2 * np.log(np.nansum(p_val, axis=1)) return spst.chi2.sf(stat, df=p) except ValueError: @@ -171,8 +175,10 @@ def fisher_test(z, cov, invcov): print(invcov.shape) print("Error in Fisher stat") + fisher_test.can_use_pattern = False + def meta_analysis(z, cov, invcov, **kwargs): """ Meta analysis using global sample size to weight z-score @@ -189,16 +195,16 @@ def meta_analysis(z, cov, invcov, **kwargs): :type samp_size : pandas.Series """ - Effective_sample_size = kwargs.get('samp_size', None) + Effective_sample_size = kwargs.get("samp_size", None) if Effective_sample_size is None: - raise Error('no sample size available to perform meta_analysis') + raise Error("no sample size available to perform meta_analysis") else: - loading = Effective_sample_size.loc[z.columns]**0.5 + loading = Effective_sample_size.loc[z.columns] ** 0.5 - M_loadings = np.full(z.shape, loading**2) + M_loadings = np.full(z.shape, loading ** 2) M_loadings[np.isnan(z)] = 0 - z = np.nan_to_num(z) # fill na with zero + z = np.nan_to_num(z) # fill na with zero numi = loading.dot(z.transpose()) deno = np.sqrt(np.sum(M_loadings, axis=1)) @@ -208,8 +214,10 @@ def meta_analysis(z, cov, invcov, **kwargs): return spst.chi2.sf(stat, df=1) + meta_analysis.can_use_pattern = False + def sumz_stat(z, cov, invcov, **kwargs): """ joint statistics "sumZ" strategy @@ -223,7 +231,7 @@ def sumz_stat(z, cov, invcov, **kwargs): :return: the joint statistics :rtype: numpy.ndarray float64 """ - loading = kwargs.get('loadings', None) + loading = kwargs.get("loadings", None) if loading is None: p = z.shape[1] @@ -238,11 +246,11 @@ def sumz_stat(z, cov, invcov, **kwargs): z = np.nan_to_num(z) numi = np.square(loading.dot(z.transpose())) - deno = np.einsum('ij,jk,ki->i', M_loadings, cov, M_loadings.T) + deno = np.einsum("ij,jk,ki->i", M_loadings, cov, M_loadings.T) # fill na with 0 = don't take the missing GWAS into account in the test stat = numi / deno return spst.chi2.sf(stat, df=1) -sumz_stat.can_use_pattern = False \ No newline at end of file +sumz_stat.can_use_pattern = False diff --git a/jass/models/worktable.py b/jass/models/worktable.py index 64042d57563da83e2aab2bfd348560cec61534ae..b21ce807627a6c6f62fbea988b286a4eb38e2998 100644 --- a/jass/models/worktable.py +++ b/jass/models/worktable.py @@ -61,29 +61,37 @@ def signif(x, digit): return round(x, digit - int(math.floor(math.log10(abs(x)))) - 1) -def choose_stat_function(smart_na_computation, optim_na, big, function_name, stat_function, sub_cov, **kwargs): +def choose_stat_function( + smart_na_computation, optim_na, big, function_name, stat_function, sub_cov, **kwargs +): if smart_na_computation: # If stat is sumz use normal computer even with na if function_name == "omnibus_stat": if optim_na: if big: - stat_compute = make_stat_computer_pattern_big(sub_cov, stat_function) + stat_compute = make_stat_computer_pattern_big( + sub_cov, stat_function + ) else: stat_compute = make_stat_computer_pattern(sub_cov, stat_function) else: stat_compute = make_stat_computer_nan_dumb(sub_cov, stat_function) else: if function_name == "meta_analysis": - stat_compute = make_stat_computer_nopattern(sub_cov, stat_function, **kwargs) + stat_compute = make_stat_computer_nopattern( + sub_cov, stat_function, **kwargs + ) elif function_name == "sumz_stat": - loading_file = kwargs.get('loadings', None) + loading_file = kwargs.get("loadings", None) if loading_file is None: # Default loadings would be one for every phenotypes stat_compute = make_stat_computer_nopattern(sub_cov, stat_function) else: loadings = read_csv(loading_file, index_col=0) loadings = loadings.iloc[:, 0] - stat_compute = make_stat_computer_nopattern(sub_cov, stat_function, loadings=loadings) + stat_compute = make_stat_computer_nopattern( + sub_cov, stat_function, loadings=loadings + ) else: stat_compute = make_stat_computer_nopattern(sub_cov, stat_function) else: @@ -92,39 +100,47 @@ def choose_stat_function(smart_na_computation, optim_na, big, function_name, sta return stat_compute -def add_signif_status_column(region_sub_tab, significance_treshold=5*10**-8): +def add_signif_status_column(region_sub_tab, significance_treshold=5 * 10 ** -8): region_sub_tab["signif_status"] = "" # blue: significant pvalues for omnibus and univariate tests - cond = np.where((region_sub_tab.JASS_PVAL < significance_treshold) & ( - region_sub_tab.UNIVARIATE_MIN_PVAL < significance_treshold))[0] + cond = np.where( + (region_sub_tab.JASS_PVAL < significance_treshold) + & (region_sub_tab.UNIVARIATE_MIN_PVAL < significance_treshold) + )[0] region_sub_tab.loc[region_sub_tab.index[cond], "signif_status"] = "Both" # red: significant pvalues for omnibus test only - cond = np.where((region_sub_tab.JASS_PVAL < significance_treshold) & ( - region_sub_tab.UNIVARIATE_MIN_PVAL > significance_treshold))[0] + cond = np.where( + (region_sub_tab.JASS_PVAL < significance_treshold) + & (region_sub_tab.UNIVARIATE_MIN_PVAL > significance_treshold) + )[0] region_sub_tab.loc[region_sub_tab.index[cond], "signif_status"] = "Joint" # green: significant pvalues for univariate test only - cond = np.where((region_sub_tab.JASS_PVAL > significance_treshold) & ( - region_sub_tab.UNIVARIATE_MIN_PVAL < significance_treshold))[0] - region_sub_tab.loc[region_sub_tab.index[cond], - "signif_status"] = "Univariate" + cond = np.where( + (region_sub_tab.JASS_PVAL > significance_treshold) + & (region_sub_tab.UNIVARIATE_MIN_PVAL < significance_treshold) + )[0] + region_sub_tab.loc[region_sub_tab.index[cond], "signif_status"] = "Univariate" # grey: non significant pvalues - cond = np.where((region_sub_tab.JASS_PVAL > significance_treshold) & ( - region_sub_tab.UNIVARIATE_MIN_PVAL > significance_treshold))[0] + cond = np.where( + (region_sub_tab.JASS_PVAL > significance_treshold) + & (region_sub_tab.UNIVARIATE_MIN_PVAL > significance_treshold) + )[0] region_sub_tab.loc[region_sub_tab.index[cond], "signif_status"] = "None" return region_sub_tab -def get_region_summary(sum_stat_tab, phenotype_ids, significance_treshold=5*10**-8): +def get_region_summary(sum_stat_tab, phenotype_ids, significance_treshold=5 * 10 ** -8): # Select the most significant SNP for the joint test for each region - region_sub_tab = sum_stat_tab.sort_values( - "JASS_PVAL").groupby("Region").first() # .reset_index() + region_sub_tab = ( + sum_stat_tab.sort_values("JASS_PVAL").groupby("Region").first() + ) # .reset_index() # add minimum univariate p-value univar = sum_stat_tab.groupby("Region").min().UNIVARIATE_MIN_PVAL @@ -132,18 +148,29 @@ def get_region_summary(sum_stat_tab, phenotype_ids, significance_treshold=5*10** # Tag SNPs depending on which test is significant region_sub_tab.reset_index(inplace=True) - region_sub_tab = add_signif_status_column( - region_sub_tab, significance_treshold) + region_sub_tab = add_signif_status_column(region_sub_tab, significance_treshold) # reorder columns - region_sub_tab = region_sub_tab[['Region', "MiddlePosition", "snp_ids", "CHR", "position", - "Ref_allele", "Alt_allele", "JASS_PVAL", "UNIVARIATE_MIN_PVAL", - "signif_status"] + phenotype_ids] + region_sub_tab = region_sub_tab[ + [ + "Region", + "MiddlePosition", + "snp_ids", + "CHR", + "position", + "Ref_allele", + "Alt_allele", + "JASS_PVAL", + "UNIVARIATE_MIN_PVAL", + "signif_status", + ] + + phenotype_ids + ] return region_sub_tab -def post_computation_filtering(worktable_chunk, significant_treshold=5*10**-8): +def post_computation_filtering(worktable_chunk, significant_treshold=5 * 10 ** -8): """ Remove SNPs that seems aberrant: SNPs with a very low p-value that are isolated in their region @@ -155,17 +182,20 @@ def post_computation_filtering(worktable_chunk, significant_treshold=5*10**-8): """ def count_nearly_significant(rsnp): - return((rsnp.JASS_PVAL < (significant_treshold*20)).sum()) + return (rsnp.JASS_PVAL < (significant_treshold * 20)).sum() res = worktable_chunk.groupby("Region").apply(count_nearly_significant) # select region with only one SNP that is significant which is - # suspect + # suspect reg = res.loc[res == 1].index for reg_aberant in reg: - aberant_SNP = worktable_chunk.loc[worktable_chunk.Region == reg_aberant].sort_values( - "JASS_PVAL").index[0] + aberant_SNP = ( + worktable_chunk.loc[worktable_chunk.Region == reg_aberant] + .sort_values("JASS_PVAL") + .index[0] + ) worktable_chunk.drop(aberant_SNP, inplace=True) return worktable_chunk @@ -173,11 +203,12 @@ def post_computation_filtering(worktable_chunk, significant_treshold=5*10**-8): def compute_pleiotropy_index(W, significance_treshold): - N_significatif = (2.0 * spst.norm.sf(W.fillna(0, - inplace=False).abs()) < significance_treshold).sum(1) + N_significatif = ( + 2.0 * spst.norm.sf(W.fillna(0, inplace=False).abs()) < significance_treshold + ).sum(1) N_pheno = (~W.isnull()).sum(1) # pleiotropy index is not meaningful for too few phenotype - S = N_significatif/N_pheno + S = N_significatif / N_pheno S.loc[N_pheno < 4] = np.nan return S @@ -191,14 +222,14 @@ def create_worktable_file( optim_na: bool = True, csv_file: str = None, chunk_size: int = 50, - significance_treshold=5*10**-8, + significance_treshold=5 * 10 ** -8, post_filtering=True, delayed_gen_csv_file=False, chromosome: str = None, pos_Start: str = None, pos_End: str = None, **kwargs - ): +): """ Create a worktable file from an initial data table by specifying the @@ -248,49 +279,107 @@ def create_worktable_file( K_POS_MAX = 250000000 # Minimum and maximum limit of regions for each chromosome (multiples of 50) - Min_pos_chr = [ 0, 100, 250, 400, 500, 600, 700, 800, 900, 1000, 1050, - 1150, 1250, 1300, 1350, 1400, 1450, 1500, 1550, 1600, 1650, 1650] - Max_pos_chr = [150, 300, 400, 550, 650, 750, 850, 950, 1050, 1100, 1200, - 1300, 1350, 1400, 1450, 1500, 1550, 1600, 1650, 1700, 1700, 1750] + Min_pos_chr = [ + 0, + 100, + 250, + 400, + 500, + 600, + 700, + 800, + 900, + 1000, + 1050, + 1150, + 1250, + 1300, + 1350, + 1400, + 1450, + 1500, + 1550, + 1600, + 1650, + 1650, + ] + Max_pos_chr = [ + 150, + 300, + 400, + 550, + 650, + 750, + 850, + 950, + 1050, + 1100, + 1200, + 1300, + 1350, + 1400, + 1450, + 1500, + 1550, + 1600, + 1650, + 1700, + 1700, + 1750, + ] N_pheno = len(phenotype_ids) # Controls the number of phenotypes - if (N_pheno > 64): - print("ERROR: {} phenotypes are selected. \nThe current version of JASS cannot analyze more than 64 phenotypes" \ - .format(N_pheno)) + if N_pheno > 64: + print( + "ERROR: {} phenotypes are selected. \nThe current version of JASS cannot analyze more than 64 phenotypes".format( + N_pheno + ) + ) raise ValueError("Maximum number of phenotypes exceeded") - elif (N_pheno >= 20): - print("WARNING: {} phenotypes are selected. The computation will be very long!".format(N_pheno)) + elif N_pheno >= 20: + print( + "WARNING: {} phenotypes are selected. The computation will be very long!".format( + N_pheno + ) + ) - if (chromosome is None): + if chromosome is None: local_analysis = False print("============== Whole genome analysis ===============") else: local_analysis = True print("============== Local analysis ===============") - if not(chromosome.isdigit()): - print("ERROR: when performing a local analysis, the chromosome number (between 1 and 22) is mandatory") - raise ValueError("create_worktable_file: the required argument chromosome is not a number") + if not (chromosome.isdigit()): + print( + "ERROR: when performing a local analysis, the chromosome number (between 1 and 22) is mandatory" + ) + raise ValueError( + "create_worktable_file: the required argument chromosome is not a number" + ) else: num_Chr = int(chromosome) - if ((pos_Start is None) and (pos_End is None)): + if (pos_Start is None) and (pos_End is None): chromosome_full = True print("------ Chromosome : {} ------".format(num_Chr)) else: chromosome_full = False - if ((pos_Start is None) or (not pos_Start.isdigit())): + if (pos_Start is None) or (not pos_Start.isdigit()): pos_Start = 0 - if ((pos_End is None) or (not pos_End.isdigit())): + if (pos_End is None) or (not pos_End.isdigit()): pos_End = K_POS_MAX - print("------ Chromosome : {} ({} - {}) ------".format(num_Chr, pos_Start, pos_End)) + print( + "------ Chromosome : {} ({} - {}) ------".format( + num_Chr, pos_Start, pos_End + ) + ) print("Phenotypes = {}".format(phenotype_ids)) # Initialization of Jass_progress - progress_path = os.path.join(os.path.dirname( - project_hdf_path), "JASS_progress.txt") + progress_path = os.path.join(os.path.dirname(project_hdf_path), "JASS_progress.txt") JASS_progress = 0 file_progress = open(progress_path, "w") file_progress.write(str(JASS_progress)) @@ -309,7 +398,9 @@ def create_worktable_file( if delayed_gen_csv_file: # setting a lock to generate the csv_file asynchronously - the_lock_path = os.path.join(os.path.dirname(project_hdf_path), "the_lock.txt") + the_lock_path = os.path.join( + os.path.dirname(project_hdf_path), "the_lock.txt" + ) the_lock = "The lock is set on : workTable.csv is not yet available" file_lock = open(the_lock_path, "w") file_lock.write(the_lock) @@ -318,59 +409,66 @@ def create_worktable_file( # subset of phenotypes that have been selected phenolist = read_hdf(init_file_path, "PhenoList") phenolist = phenolist.loc[phenotype_ids] - hdf_work.put( - "PhenoList", phenolist - ) + hdf_work.put("PhenoList", phenolist) # subset of covariance matrix for the selected phenotypes cov = read_hdf(init_file_path, "COV") sub_cov = cov.loc[phenotype_ids, phenotype_ids] # Covariance matrix - hdf_work.put( - "COV", sub_cov, format="table", data_columns=True - ) + hdf_work.put("COV", sub_cov, format="table", data_columns=True) - #If available extract genetic covariance + # If available extract genetic covariance try: gcov = read_hdf(init_file_path, "GEN_COV") sub_gcov = gcov.loc[phenotype_ids, phenotype_ids] # Covariance matrix - hdf_work.put( - "GEN_COV", sub_gcov, format="table", data_columns=True - ) + hdf_work.put("GEN_COV", sub_gcov, format="table", data_columns=True) except KeyError: print("Genetic correlation not available in inittable. ") - - regions = read_hdf(init_file_path, "Regions").index.tolist() - sum_stat_tab_min_itemsizes = {"snp_ids": 80, "Region": 10, "CHR": 5, "Ref_allele" : 70, "Alt_allele":70} + sum_stat_tab_min_itemsizes = { + "snp_ids": 80, + "Region": 10, + "CHR": 5, + "Ref_allele": 70, + "Alt_allele": 70, + } region_sub_table_min_itemsizes = { - "Region": 10, "index": 10, "CHR": 5, "snp_ids": 80, "signif_status": 20,"Ref_allele" : 70, "Alt_allele":70} + "Region": 10, + "index": 10, + "CHR": 5, + "snp_ids": 80, + "signif_status": 20, + "Ref_allele": 70, + "Alt_allele": 70, + } smart_na_computation = not (remove_nan) module_name, function_name = stat.split(":") stat_module = importlib.import_module(module_name) stat_fn = getattr(stat_module, function_name) - if (N_pheno < K_NB_PHENOTYPES_BIG): + if N_pheno < K_NB_PHENOTYPES_BIG: big = False sub_cov_matrix = sub_cov else: big = True sub_cov_matrix = sub_cov.to_numpy() - stat_compute = choose_stat_function(smart_na_computation, - optim_na, - big, - function_name, - stat_fn, - sub_cov_matrix, - samp_size=phenolist['Effective_sample_size'], - **kwargs) + stat_compute = choose_stat_function( + smart_na_computation, + optim_na, + big, + function_name, + stat_fn, + sub_cov_matrix, + samp_size=phenolist["Effective_sample_size"], + **kwargs + ) # read data by chunks to optimize memory usage - if (not local_analysis): + if not local_analysis: Nchunk = len(regions) // chunk_size + 1 start_value = 0 else: @@ -379,11 +477,14 @@ def create_worktable_file( start_value = Min_pos_chr[num_Chr - 1] // chunk_size # selection criterion in the case of a partial analysis by chromosome and position - if (chromosome_full): + if chromosome_full: Local_criteria = "(CHR == {})".format(chromosome) else: - Local_criteria = "(CHR == {}) and (position >= {}) and (position <= {})"\ - .format(chromosome, pos_Start, pos_End) + Local_criteria = ( + "(CHR == {}) and (position >= {}) and (position <= {})".format( + chromosome, pos_Start, pos_End + ) + ) Nsnp_total = 0 Nsnp_jassed = 0 @@ -394,52 +495,63 @@ def create_worktable_file( JASS_progress = round((chunk + 1) * 100 / (Nchunk + 2)) binf = chunk * chunk_size - bsup = (chunk+1) * chunk_size - - sum_stat_tab = read_hdf(init_file_path, 'SumStatTab', columns=[ - 'Region', 'CHR', 'position', 'snp_ids', 'Ref_allele', 'Alt_allele', 'MiddlePosition'] + phenotype_ids, - where='Region >= {0} and Region < {1}'.format(binf, bsup)) + bsup = (chunk + 1) * chunk_size + + sum_stat_tab = read_hdf( + init_file_path, + "SumStatTab", + columns=[ + "Region", + "CHR", + "position", + "snp_ids", + "Ref_allele", + "Alt_allele", + "MiddlePosition", + ] + + phenotype_ids, + where="Region >= {0} and Region < {1}".format(binf, bsup), + ) print("Regions {0} to {1}\r".format(binf, bsup)) - if(local_analysis): + if local_analysis: # Data extraction in the case of a partial analysis sum_stat_tab.query(Local_criteria, inplace=True) # Remake row index unique: IMPORTANT for assignation with .loc - sum_stat_tab.dropna( - axis=0, subset=phenotype_ids, how=how_dropna, inplace=True - ) + sum_stat_tab.dropna(axis=0, subset=phenotype_ids, how=how_dropna, inplace=True) sum_stat_tab.reset_index(drop=True, inplace=True) if sum_stat_tab.shape[0] == 0: - print( - "No data available for region {0} to region {1}".format(binf, bsup)) + print("No data available for region {0} to region {1}".format(binf, bsup)) continue # skip region if no data are available Nsnp_total = Nsnp_total + sum_stat_tab.shape[0] if remove_nan or stat.split(":")[-1] != "omnibus_stat": - sum_stat_tab['JASS_PVAL'] = stat_compute( - sum_stat_tab[phenotype_ids]) + sum_stat_tab["JASS_PVAL"] = stat_compute(sum_stat_tab[phenotype_ids]) else: if not big: # Algorithm optimized for a small number of phenotypes # Sort SumStatTab by missing patterns patterns_missing, frequent_pattern = compute_frequent_missing_pattern( - sum_stat_tab[phenotype_ids]) + sum_stat_tab[phenotype_ids] + ) sum_stat_tab["patterns_missing"] = patterns_missing z1 = sum_stat_tab[phenotype_ids] # Apply the statistic computation by missing patterns for pattern in frequent_pattern: - bool_serie = (patterns_missing == pattern) + bool_serie = patterns_missing == pattern Selection_criteria = sum_stat_tab["patterns_missing"] == pattern try: - sum_stat_tab.loc[bool_serie, "JASS_PVAL"] = stat_compute(z1[Selection_criteria], pattern) + sum_stat_tab.loc[bool_serie, "JASS_PVAL"] = stat_compute( + z1[Selection_criteria], pattern + ) except ValueError: print("worktable") @@ -447,8 +559,11 @@ def create_worktable_file( # Algorithm optimized for a high number of phenotypes # Sort SumStatTab by missing patterns - patterns_missing, frequent_pattern, dico_index_y = \ - compute_frequent_missing_pattern_Big(sum_stat_tab[phenotype_ids]) + ( + patterns_missing, + frequent_pattern, + dico_index_y, + ) = compute_frequent_missing_pattern_Big(sum_stat_tab[phenotype_ids]) sum_stat_tab["index"] = sum_stat_tab.index.tolist() sum_stat_tab["patterns_missing"] = patterns_missing @@ -463,7 +578,9 @@ def create_worktable_file( dico_z = {} dico_index_x = {} - sum_stat_tab[Liste_colonnes].apply(lambda x: store_pattern(dico_z, dico_index_x, *x), axis=1) + sum_stat_tab[Liste_colonnes].apply( + lambda x: store_pattern(dico_z, dico_index_x, *x), axis=1 + ) Retour_omnibus_bypattern = {} @@ -471,9 +588,7 @@ def create_worktable_file( for pattern in frequent_pattern: try: Retour_omnibus_bypattern[pattern] = stat_compute( - np.array(dico_z[pattern]), - pattern, - dico_index_y[pattern] + np.array(dico_z[pattern]), pattern, dico_index_y[pattern] ) except ValueError: print("worktable") @@ -482,7 +597,9 @@ def create_worktable_file( for pattern in frequent_pattern: for ligne, indice in enumerate(dico_index_x[pattern]): - Retour_omnibus[int(indice)] = (Retour_omnibus_bypattern[pattern])[int(ligne)] + Retour_omnibus[int(indice)] = ( + Retour_omnibus_bypattern[pattern] + )[int(ligne)] sum_stat_tab["JASS_PVAL"] = Retour_omnibus @@ -490,25 +607,39 @@ def create_worktable_file( sum_stat_tab.sort_values(by=["Region", "CHR"], inplace=True) sum_stat_tab["UNIVARIATE_MIN_PVAL"] = DataFrame( - 2.0 * - spst.norm.sf(sum_stat_tab[phenotype_ids].fillna( - 0, inplace=False).abs()), + 2.0 + * spst.norm.sf(sum_stat_tab[phenotype_ids].fillna(0, inplace=False).abs()), index=sum_stat_tab.index, ).min(axis=1) - sum_stat_tab["UNIVARIATE_MIN_QVAL"] = sum_stat_tab["UNIVARIATE_MIN_PVAL"] * \ - (1-np.isnan(sum_stat_tab[phenotype_ids]).astype(int)).sum(1) - sum_stat_tab.loc[sum_stat_tab.UNIVARIATE_MIN_QVAL > - 1, "UNIVARIATE_MIN_QVAL"] = 1 + sum_stat_tab["UNIVARIATE_MIN_QVAL"] = sum_stat_tab["UNIVARIATE_MIN_PVAL"] * ( + 1 - np.isnan(sum_stat_tab[phenotype_ids]).astype(int) + ).sum(1) + sum_stat_tab.loc[ + sum_stat_tab.UNIVARIATE_MIN_QVAL > 1, "UNIVARIATE_MIN_QVAL" + ] = 1 # Computing pleiotropy sum_stat_tab["PLEIOTROPY_INDEX"] = compute_pleiotropy_index( - sum_stat_tab[phenotype_ids], significance_treshold) + sum_stat_tab[phenotype_ids], significance_treshold + ) sum_stat_tab = sum_stat_tab[ - ["Region", "CHR", "snp_ids", "position", 'Ref_allele', 'Alt_allele', "MiddlePosition", - "JASS_PVAL", "UNIVARIATE_MIN_PVAL", "UNIVARIATE_MIN_QVAL", "PLEIOTROPY_INDEX"] - + phenotype_ids] + [ + "Region", + "CHR", + "snp_ids", + "position", + "Ref_allele", + "Alt_allele", + "MiddlePosition", + "JASS_PVAL", + "UNIVARIATE_MIN_PVAL", + "UNIVARIATE_MIN_QVAL", + "PLEIOTROPY_INDEX", + ] + + phenotype_ids + ] if post_filtering: sum_stat_tab = post_computation_filtering(sum_stat_tab) @@ -517,16 +648,16 @@ def create_worktable_file( "SumStatTab", sum_stat_tab, min_itemsize=sum_stat_tab_min_itemsizes ) - if ((csv_file is not None) and (not delayed_gen_csv_file)): - with open(csv_file, 'a') as f: - sum_stat_tab.to_csv(f, header=f.tell()==0) + if (csv_file is not None) and (not delayed_gen_csv_file): + with open(csv_file, "a") as f: + sum_stat_tab.to_csv(f, header=f.tell() == 0) region_sub_table = get_region_summary( - sum_stat_tab, phenotype_ids, significance_treshold=significance_treshold) + sum_stat_tab, phenotype_ids, significance_treshold=significance_treshold + ) hdf_work.append( - "Regions", - region_sub_table, min_itemsize=region_sub_table_min_itemsizes + "Regions", region_sub_table, min_itemsize=region_sub_table_min_itemsizes ) file_progress = open(progress_path, "w") @@ -545,16 +676,24 @@ def create_worktable_file( np.array( [ [ - sum((jost_min < significance_treshold) & - (pval_min < significance_treshold)), - sum((jost_min < significance_treshold) & - (pval_min > significance_treshold)), + sum( + (jost_min < significance_treshold) + & (pval_min < significance_treshold) + ), + sum( + (jost_min < significance_treshold) + & (pval_min > significance_treshold) + ), ], [ - sum((jost_min > significance_treshold) & - (pval_min < significance_treshold)), - sum((jost_min > significance_treshold) & - (pval_min > significance_treshold)), + sum( + (jost_min > significance_treshold) + & (pval_min < significance_treshold) + ), + sum( + (jost_min > significance_treshold) + & (pval_min > significance_treshold) + ), ], ] ) @@ -593,10 +732,10 @@ def binary_code_Big(dico_index_y, *args): Codage = int(Chaine, 2) - if (not (Codage in dico_index_y)): + if not (Codage in dico_index_y): dico_index_y[Codage] = [] for indice, valeur in enumerate(args): - if (valeur == 1): + if valeur == 1: dico_index_y[Codage].append(indice) return Codage @@ -610,7 +749,7 @@ def store_pattern(dico_z, dico_index_x, *colonne): Index = int(colonne[0]) Codage = int(colonne[1]) - if (not (Codage in dico_z)): + if not (Codage in dico_z): dico_z[Codage] = [] dico_index_x[Codage] = [] @@ -628,10 +767,12 @@ def compute_frequent_missing_pattern(sum_stat_tab): """ Compute the frequency of missing pattern in the dataset """ - Pheno_is_present = 1- sum_stat_tab.isnull() + Pheno_is_present = 1 - sum_stat_tab.isnull() # The coding of patterns missing is not guaranteed if there are more than 64 phenotypes - patterns_missing = Pheno_is_present[Pheno_is_present.columns].apply(lambda x: binary_code(*x), axis=1) + patterns_missing = Pheno_is_present[Pheno_is_present.columns].apply( + lambda x: binary_code(*x), axis=1 + ) pattern_frequency = patterns_missing.value_counts() / len(patterns_missing) n_pattern = pattern_frequency.shape[0] @@ -647,17 +788,17 @@ def compute_frequent_missing_pattern_Big(sum_stat_tab): """ dico_index_y = {} - Pheno_is_present = 1- sum_stat_tab.isnull() + Pheno_is_present = 1 - sum_stat_tab.isnull() # The coding of patterns missing is not guaranteed if there are more than 64 phenotypes - patterns_missing = Pheno_is_present[Pheno_is_present.columns] \ - .apply(lambda x: binary_code_Big(dico_index_y, *x), axis=1) - + patterns_missing = Pheno_is_present[Pheno_is_present.columns].apply( + lambda x: binary_code_Big(dico_index_y, *x), axis=1 + ) pattern_frequency = patterns_missing.value_counts() / len(patterns_missing) n_pattern = pattern_frequency.shape[0] print("Number of pattern {}".format(n_pattern)) - frequent_pattern = pattern_frequency.index.tolist() + frequent_pattern = pattern_frequency.index.tolist() return patterns_missing, frequent_pattern, dico_index_y @@ -675,11 +816,9 @@ def stringize_dataframe_region_chr(dataframe: DataFrame): :return: The dataframe with converted Region and CHR columns :rtype: pandas.DataFrame """ - dataframe["Region"] = dataframe["Region"].apply( - lambda x: "Region" + str(x)) + dataframe["Region"] = dataframe["Region"].apply(lambda x: "Region" + str(x)) dataframe["CHR"] = dataframe["CHR"].apply(lambda x: "chr" + str(x)) - dataframe["JASS_PVAL"] = dataframe["JASS_PVAL"].apply( - lambda x: str(signif(x, 4))) + dataframe["JASS_PVAL"] = dataframe["JASS_PVAL"].apply(lambda x: str(signif(x, 4))) return dataframe @@ -713,19 +852,22 @@ def get_worktable_genomedata(project_hdf_path: str): :rtype: str """ region_subtable = stringize_dataframe_region_chr( - read_hdf(project_hdf_path, "Regions")) + read_hdf(project_hdf_path, "Regions") + ) - region_subtable.rename(index=str, columns={ - 'JASS_PVAL': 'JOSTmin'}, inplace=True) + region_subtable.rename(index=str, columns={"JASS_PVAL": "JOSTmin"}, inplace=True) - region_subtable['PVALmin'] = region_subtable['UNIVARIATE_MIN_PVAL'] - region_subtable['PVALmin'] = region_subtable['PVALmin']. apply( - lambda x: str(signif(x, 4))) + region_subtable["PVALmin"] = region_subtable["UNIVARIATE_MIN_PVAL"] + region_subtable["PVALmin"] = region_subtable["PVALmin"].apply( + lambda x: str(signif(x, 4)) + ) return region_subtable.to_csv(index=False) -def get_worktable_local_manhattan_data(project_hdf_path: str, chromosome: str = None, region: str = None): +def get_worktable_local_manhattan_data( + project_hdf_path: str, chromosome: str = None, region: str = None +): """ Read and return the SumStatTab dataframe from a worktable file for a given chromosome and region for the Manhattan plot @@ -739,20 +881,24 @@ def get_worktable_local_manhattan_data(project_hdf_path: str, chromosome: str = :return: The dataframe subset corresponding to the chromosome and region, as a CSV formatted text :rtype: str """ - if ((chromosome is None) and (region is None)): + if (chromosome is None) and (region is None): # Local analysis : the file project_hdf_path contains only useful information. # No data filter is needed - dataframe = read_hdf(project_hdf_path, "SumStatTab", - columns=["Region", "CHR", "position", - "snp_ids", "JASS_PVAL"]) + dataframe = read_hdf( + project_hdf_path, + "SumStatTab", + columns=["Region", "CHR", "position", "snp_ids", "JASS_PVAL"], + ) else: # Genome full analysis region_int = region[6:] chromosome_int = chromosome[3:] - dataframe = read_hdf(project_hdf_path, "SumStatTab", - columns=["Region", "CHR", "position", - "snp_ids", "JASS_PVAL"], - where=['Region='+str(region_int), 'CHR='+str(chromosome_int)]) + dataframe = read_hdf( + project_hdf_path, + "SumStatTab", + columns=["Region", "CHR", "position", "snp_ids", "JASS_PVAL"], + where=["Region=" + str(region_int), "CHR=" + str(chromosome_int)], + ) dataframe = stringize_dataframe_region_chr(dataframe) dataframe = dataframe.sort_values("position") @@ -760,7 +906,9 @@ def get_worktable_local_manhattan_data(project_hdf_path: str, chromosome: str = return dataframe.to_csv(index=False) -def get_worktable_local_heatmap_data(project_hdf_path: str, chromosome: str = None, region: str = None): +def get_worktable_local_heatmap_data( + project_hdf_path: str, chromosome: str = None, region: str = None +): """ Read and return the SumStatTab dataframe from a worktable file for a given chromosome and region for the Heatmap plot @@ -775,7 +923,7 @@ def get_worktable_local_heatmap_data(project_hdf_path: str, chromosome: str = No pivoted and as a CSV formatted text :rtype: str """ - if ((chromosome is None) and (region is None)): + if (chromosome is None) and (region is None): # Local analysis : the file project_hdf_path contains only useful information. # No data filter is needed dataframe = read_hdf(project_hdf_path, "SumStatTab") @@ -783,16 +931,28 @@ def get_worktable_local_heatmap_data(project_hdf_path: str, chromosome: str = No # Genome full analysis region_int = region[6:] chromosome_int = chromosome[3:] - dataframe = read_hdf(project_hdf_path, "SumStatTab", - where=['Region='+str(region_int), 'CHR='+str(chromosome_int)]) + dataframe = read_hdf( + project_hdf_path, + "SumStatTab", + where=["Region=" + str(region_int), "CHR=" + str(chromosome_int)], + ) dataframe = stringize_dataframe_region_chr(dataframe) dataframe = dataframe.sort_values("position") - dataframe.drop(["Region", "CHR", "position", "JASS_PVAL", "MiddlePosition", "UNIVARIATE_MIN_PVAL", - "UNIVARIATE_MIN_QVAL", "PLEIOTROPY_INDEX"], - axis=1, - inplace=True, - ) + dataframe.drop( + [ + "Region", + "CHR", + "position", + "JASS_PVAL", + "MiddlePosition", + "UNIVARIATE_MIN_PVAL", + "UNIVARIATE_MIN_QVAL", + "PLEIOTROPY_INDEX", + ], + axis=1, + inplace=True, + ) dataframe.rename(columns={"snp_ids": "ID"}, inplace=True) column_order = list(dataframe.ID) pivoted_dataframe = dataframe.pivot_table(columns="ID") @@ -822,11 +982,10 @@ def create_genome_full_csv(project_hdf_path, csv_file, chunk_size=50, Nchunk=35) """ # path of the lock that indicates that the csv file is not available - the_lock_path = os.path.join(os.path.dirname(project_hdf_path), - "the_lock.txt") - if (os.path.isfile(the_lock_path)): + the_lock_path = os.path.join(os.path.dirname(project_hdf_path), "the_lock.txt") + if os.path.isfile(the_lock_path): # The lock is set on - if (os.path.isfile(csv_file)): + if os.path.isfile(csv_file): # An error occurred: the csv file must not exist if the lock is set # The existing csv file is deleted os.remove(csv_file) @@ -837,17 +996,20 @@ def create_genome_full_csv(project_hdf_path, csv_file, chunk_size=50, Nchunk=35) bsup = (chunk + 1) * chunk_size # read workTable.hdf5 - df_for_csv = read_hdf(project_hdf_path, "SumStatTab", - where='Region >= {0} and Region < {1}'.format(binf, bsup)) + df_for_csv = read_hdf( + project_hdf_path, + "SumStatTab", + where="Region >= {0} and Region < {1}".format(binf, bsup), + ) # append the data to the csv file - with open(csv_file, 'a') as f: + with open(csv_file, "a") as f: df_for_csv.to_csv(f, header=f.tell() == 0) # The lock is deleted os.remove(the_lock_path) - if (os.path.isfile(csv_file)): + if os.path.isfile(csv_file): The_file_is_available = True else: The_file_is_available = False diff --git a/jass/server.py b/jass/server.py index a6900afbf9c4b68ca8d5413578ebb403a5d5031b..a667b5ec6beb5a5178a2d9d502a993fbbcfad70a 100644 --- a/jass/server.py +++ b/jass/server.py @@ -1,30 +1,396 @@ #!/usr/bin/env python3 -""" -Module that creates the flask app used to run JASS as a web server -""" -import connexion -import flask +import os -from .encoder import JSONEncoder -from .config import config +from flask import Flask, redirect, send_file +from flask.views import MethodView +import marshmallow as ma +from flask_smorest import Api, Blueprint, abort +from webargs.flaskparser import FlaskParser +from jass.config import config +from jass.models.phenotype import get_available_phenotypes +from jass.models.project import Project +from jass.tasks import create_project -class JassFlaskApp(connexion.FlaskApp): + +class PhenotypeSchema(ma.Schema): + id = ma.fields.String() + consortium = ma.fields.String() + outcome = ma.fields.String() + full_name = ma.fields.String() + typ = ma.fields.String() + ref = ma.fields.String() + ref_link = ma.fields.String() + data_link = ma.fields.String() + data_path = ma.fields.String() + + +class ProjectParamsSchema(ma.Schema): + class Meta: + unknown = ma.EXCLUDE + + phenotypeID = ma.fields.List(ma.fields.String()) + + +class LocalProjectParamsSchema(ma.Schema): + class Meta: + unknown = ma.EXCLUDE + + phenotypeID = ma.fields.List(ma.fields.String()) + chromosome = ma.fields.Integer() + start = ma.fields.Integer() + end = ma.fields.Integer() + + +class ProjectStatusSchema(ma.Schema): + STATUS_VALUES = ["DOES_NOT_EXIST", "CREATING", "READY", "ERROR"] + global_manhattan = ma.fields.String(validate=ma.validate.OneOf(STATUS_VALUES)) + quadrant_plot_status = ma.fields.String(validate=ma.validate.OneOf(STATUS_VALUES)) + worktable = ma.fields.String(validate=ma.validate.OneOf(STATUS_VALUES)) + + +class ProjectSchema(ma.Schema): + id = ma.fields.String() + status = ma.fields.Nested(ProjectStatusSchema) + phenotypes = ma.fields.List(ma.fields.Nested(PhenotypeSchema())) + progress = ma.fields.String() + + +blp_phenotypes = Blueprint( + "phenotypes", + "phenotypes", + url_prefix="/phenotypes", + description="Operations on phenotypes", +) + +blp_projects = Blueprint( + "projects", "projects", url_prefix="/projects", description="Operations on projects" +) + +blp_local_projects = Blueprint( + "local_projects", + "local_projects", + url_prefix="/local_projects", + description="Operations on local projects", +) + + +def get_phenotypes(): + return get_available_phenotypes(os.path.join(config["DATA_DIR"], "initTable.hdf5")) + + +@blp_phenotypes.route("") +class PhenotypesMethodView(MethodView): + @blp_phenotypes.response(200, PhenotypeSchema(many=True)) + def get(self): + """List phenotypes""" + return get_phenotypes() + + +@blp_projects.route("") +class ProjectCreateMethodView(MethodView): + @blp_projects.arguments(ProjectParamsSchema(), location="form") + @blp_projects.response(200, ProjectSchema()) + def post(self, parameters): + """List projects""" + phenotype_ids = [ + phenotype_id + for ids_with_commas in parameters["phenotypeID"] + for phenotype_id in ids_with_commas.split(",") + ] + phenotypes = list(filter(lambda d: d.id in phenotype_ids, get_phenotypes())) + return create_project([p.id for p in phenotypes], get_phenotypes()) + + +@blp_local_projects.route("") +class LocalProjectCreateMethodView(MethodView): + @blp_projects.arguments(LocalProjectParamsSchema(), location="form") + @blp_projects.response(200, ProjectSchema()) + def post(self, parameters): + """List projects""" + phenotype_ids = [ + phenotype_id + for ids_with_commas in parameters["phenotypeID"] + for phenotype_id in ids_with_commas.split(",") + ] + phenotypes = list(filter(lambda d: d.id in phenotype_ids, get_phenotypes())) + return create_project( + [p.id for p in phenotypes], + get_phenotypes(), + str(parameters["chromosome"]), + str(parameters["start"]), + str(parameters["end"]), + ) + + +@blp_projects.route("/") +class ProjectDetailMethodView(MethodView): + @blp_projects.response(200, ProjectSchema()) + def get(self, project_id): + return Project(id=project_id) + + +@blp_projects.route("//csv_status") +class ProjectCSVStatusMethodView(MethodView): + def get(self, project_id): + return Project(id=project_id).get_csv_file_generation() + + +@blp_projects.route("//summary") +class ProjectSummaryMethodView(MethodView): + @blp_projects.response(200, ProjectSchema()) + def get(self, project_id): + return Project(id=project_id).get_project_summary_statistics() + + +@blp_projects.route("//genome") +class ProjectGenomeMethodView(MethodView): + + # @blp_projects.response(200, headers={"Content-Type": "text/csv"}) + def get(self, project_id): + try: + return ( + Project(id=project_id).get_project_genomedata(), + 200, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + except FileNotFoundError: + status = Project(id=project_id).status + if status == Project.DOES_NOT_EXIST: + return ( + f"project {project_id} does not exist", + 404, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + abort(404) + elif status["worktable"] == Project.CREATING: + return ( + "data not ready yet", + 202, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + else: + abort(500) + + +@blp_projects.route("//genome_full") +class ProjectGenomeFullMethodView(MethodView): + def get(self, project_id): + Type_of_Analysis = Project(id=project_id).get_type_of_analysis() + if(Type_of_Analysis == Project.LOCAL_ANALYSIS): + Fichier = "local_analysis_result.csv" + else: + Fichier = "genome_full.csv" + + try: + return send_file( + Project(id=project_id).get_csv_path(), + mimetype="text/csv", + as_attachment=True, + attachment_filename=Fichier, + ) + except FileNotFoundError: + status = Project(id=project_id).status + if status == Project.DOES_NOT_EXIST: + return ( + f"project {project_id} does not exist", + 404, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + elif status["worktable"] == Project.CREATING: + return ( + "data not ready yet", + 202, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + else: + abort(500) + + +@blp_projects.route("//globalmanhattan") +class ProjectGlobalManhattanMethodView(MethodView): + def get(self, project_id): + try: + return send_file( + Project(id=project_id).get_global_manhattan_plot_path(), + mimetype="image/png", + ) + except FileNotFoundError: + status = Project(id=project_id).status + if status == Project.DOES_NOT_EXIST: + return ( + f"project {project_id} does not exist", + 404, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + elif status["global_manhattan"] == Project.CREATING: + return ( + "data not ready yet", + 202, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + else: + abort(500) + + +@blp_projects.route("//quadrant") +class ProjectQuadrantMethodView(MethodView): + def get(self, project_id): + try: + return send_file( + Project(id=project_id).get_quadrant_plot_path(), + mimetype="image/png", + ) + except FileNotFoundError: + status = Project(id=project_id).status + if status == Project.DOES_NOT_EXIST: + return ( + f"project {project_id} does not exist", + 404, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + elif status["quadrant_plot_status"] == Project.CREATING: + return ( + "data not ready yet", + 202, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + else: + abort(500) + + +@blp_projects.route("//manhattan//") +class ProjectLocalManhattanMethodView(MethodView): + def get(self, project_id, chromosome, region): + try: + return ( + Project(id=project_id).get_project_local_manhattan_data( + chromosome, region + ), + 200, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + except FileNotFoundError: + status = Project(id=project_id).status + if status == Project.DOES_NOT_EXIST: + return ( + f"project {project_id} does not exist", + 404, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + elif status["worktable"] == Project.CREATING: + return ( + "data not ready yet", + 202, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + else: + abort(500) + + +@blp_projects.route("//zoom_manhattan") +class ProjectZoomManhattanMethodView(MethodView): + def get(self, project_id): + try: + return ( + Project(id=project_id).get_project_local_manhattan_data(), + 200, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + except FileNotFoundError: + status = Project(id=project_id).status + if status == Project.DOES_NOT_EXIST: + return ( + f"project {project_id} does not exist", + 404, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + elif status["worktable"] == Project.CREATING: + return ( + "data not ready yet", + 202, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + else: + abort(500) + + +@blp_projects.route("//zoom_heatmap") +class ProjectZoomHeatmapMethodView(MethodView): + def get(self, project_id): + try: + return ( + Project(id=project_id).get_project_local_heatmap_data(), + 200, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + except FileNotFoundError: + status = Project(id=project_id).status + if status == Project.DOES_NOT_EXIST: + return ( + f"project {project_id} does not exist", + 404, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + elif status["worktable"] == Project.CREATING: + return ( + "data not ready yet", + 202, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + else: + abort(500) + + +@blp_projects.route("//zoomplot") +class ProjectZoomPlotMethodView(MethodView): + def get(self, project_id): + try: + return send_file( + Project(id=project_id).get_zoom_plot_path(), mimetype="image/png" + ) + except FileNotFoundError: + status = Project(id=project_id).status + if status == Project.DOES_NOT_EXIST: + return ( + f"project {project_id} does not exist", + 404, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + elif status["worktable"] == Project.CREATING: + return ( + "data not ready yet", + 202, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + else: + abort(500) + + +class JassApp(Flask): """ - JassFlaskApp subclasses connexion's FlaskApp only to customize the static url path + JassApp builds the JASS Flask application """ + def __init__(self): + self.flask_app = Flask(__name__, static_url_path="", static_folder="static") + self.flask_app.config["API_TITLE"] = "JASS API" + self.flask_app.config["API_VERSION"] = "v2.0" + self.flask_app.config["OPENAPI_VERSION"] = "3.0.2" + self.flask_app.route("/")(self.redirect_to_index) + self.api = Api(self.flask_app) + def create_app(self): - app = flask.Flask(self.import_name, static_url_path="", static_folder="static") - app.json_encoder = JSONEncoder - app.route("/")(self.redirect_to_index) - return app + return self.flask_app def redirect_to_index(self): - return flask.redirect("index.html") + return redirect("index.html") + + def register_api_blueprint(self, blp): + self.api.register_blueprint(blp, url_prefix=f"/api/{blp.url_prefix}") -def get_jass_app(): - app = JassFlaskApp(__name__, specification_dir="./swagger/") - app.add_api("swagger.yaml", arguments={"title": "JASS"}, base_path="/api") - return app +jass_app = JassApp() +jass_app.register_api_blueprint(blp_phenotypes) +jass_app.register_api_blueprint(blp_projects) +jass_app.register_api_blueprint(blp_local_projects) diff --git a/jass/static/directLink.html b/jass/static/directLink.html index fd91f5533de584d1d9e8057f43caa726964e7afb..926cedfd3ce0e47c878887625bdcc0c388f4f0c8 100644 --- a/jass/static/directLink.html +++ b/jass/static/directLink.html @@ -3,7 +3,7 @@ @@ -111,10 +111,10 @@ } } if (Local_Analysis == true) { - var The_Project = "/api/local_project/" + - chromosome + "/" + - start + "/" + - end; + phe['chromosome'] = chromosome; + phe['start'] = start; + phe['end'] = end; + var The_Project = "/api/local_projects"; } else{ var The_Project = "/api/projects"; diff --git a/jass/static/index.html b/jass/static/index.html index ed0b6c46858cff5ed45e069a7976ae48b6e1558e..eaeb43e8ecaaa41364368298c4fa559e347dc537 100644 --- a/jass/static/index.html +++ b/jass/static/index.html @@ -60,10 +60,10 @@

JASS: command line and web interface for the joint analysis of GWAS results
Hanna Julienne, Pierre Lechat, Vincent Guillemot, Carla Lasry, Chunzi Yao, Robinson Araud, Vincent Laville, Bjarni Vilhjalmsson, Hervé Ménager, Hugues Aschard
- in: NAR Genomics and Bioinformatics, Volume 2, Issue 1, March 2020, lqaa003, https://doi.org/10.1093/nargab/lqaa003

+ in: NAR Genomics and Bioinformatics, Volume 2, Issue 1, March 2020, lqaa003, https://doi.org/10.1093/nargab/lqaa003

Multitrait genetic-phenotype associations to connect disease variants and biological mechanisms
Hanna Julienne, Vincent Laville, Zachary R. McCaw, Zihuai He, Vincent Guillemot, Carla Lasry, Andrey Ziyatdinov, Amaury Vaysse, Pierre Lechat, Hervé Ménager, Wilfried Le Goff, Marie-Pierre Dube, Peter Kraft, Iuliana Ionita-Laza, Bjarni J. Vilhjálmsson, Hugues Aschard
- preprint in: biorxiv, https://www.biorxiv.org/content/10.1101/2020.06.26.172999v1.full

+ preprint in: biorxiv, https://www.biorxiv.org/content/10.1101/2020.06.26.172999v1.full

diff --git a/jass/static/selectPhenotypes_for_region.html b/jass/static/selectPhenotypes_for_region.html index eb8db36bcdf6f8c81005e1fc755c08e74ca0e3df..51b518a53dc7faa172dc2960b775b7561f55ab95 100644 --- a/jass/static/selectPhenotypes_for_region.html +++ b/jass/static/selectPhenotypes_for_region.html @@ -262,6 +262,10 @@ if(selectedString != ''){ var phe = {}; phe['phenotypeID'] = selectedString; + phe['chromosome'] = selectedChromosome; + phe['start'] = region_start; + phe['end'] = region_end; + console.log("!!! "+selectedString); console.log("!!! "+phe['phenotypeID']); @@ -277,8 +281,7 @@ var JASS_progress = 0; var Old_progress = 0; var getProjectStatus = function(){ - $.post( "/api/local_project/" + selectedChromosome + "/" + region_start + "/" + region_end, - phe).done(function( data ) { + $.post( "/api/local_projects", phe).done(function( data ) { status = data.status.worktable; console.log("!! status "+status); JASS_progress = data.progress; diff --git a/jass/swagger/swagger.yaml b/jass/swagger/swagger.yaml index ebde594444f28ce9dcf627fce503388ec0745f54..03378e691554407361bd2c7df356863cd0b2eeca 100644 --- a/jass/swagger/swagger.yaml +++ b/jass/swagger/swagger.yaml @@ -1,595 +1,566 @@ -openapi: 3.0.0 -info: - version: 0.0.0 - title: JASS API Specification -paths: - /phenotypes: - get: - description: | - Gets the list of available phenotypes - operationId: phenotypes_get - responses: - "200": - description: List of the available phenotypes - content: - "application/json": - schema: - type: array - title: ArrayOfPhenotypes - items: - $ref: "#/components/schemas/Phenotype" - example: - - "consortium": "IHEC" - "data_link": "http://www.bloodcellgenetics.org" - "full_name": "Monocyte percentage of white cells" - "id": "z_IHEC_MONOP" - "outcome": "MONOP" - "ref": " Astle et al. 2016" - "ref_link": "https://www.ncbi.nlm.nih.gov/pubmed/27863262" - "type": "Cellular" - - "consortium": "RA" - "data_link": "http://plaza.umin.ac.jp/~yokada/datasource/software.htm" - "full_name": "Rheumatoid Arthritis" - "id": "z_RA_RA" - "outcome": "RA" - "ref": "Okada et al. 2014" - "ref_link": "https://www.ncbi.nlm.nih.gov/pubmed/24390342" - "type": "Immunity" - x-openapi-router-controller: jass.controllers.default_controller - /projects: - post: - description: | - Create a new project from a selection of phenotypes - operationId: projects_post - requestBody: - content: - application/x-www-form-urlencoded: - schema: - type: object - properties: - phenotypeID: - description: IDs of the phenotypes selected for the project - type: array - items: - type: string - required: - - phenotypeID - example: - - z_IHEC_MONOP - - z_RA_RA - responses: - "200": - description: Project created - content: - "application/json": - schema: - $ref: "#/components/schemas/Phenotype" - examples: - Creating: - value: - id: "bca9d414e0f9a67b9e0d2131a47c316c" - phenotypes: - - "consortium": "IHEC" - "data_link": "http://www.bloodcellgenetics.org" - "full_name": "Monocyte percentage of white cells" - "id": "z_IHEC_MONOP" - "outcome": "MONOP" - "ref": " Astle et al. 2016" - "ref_link": "https://www.ncbi.nlm.nih.gov/pubmed/27863262" - "type": "Cellular" - - "consortium": "RA" - "data_link": "http://plaza.umin.ac.jp/~yokada/datasource/software.htm" - "full_name": "Rheumatoid Arthritis" - "id": "z_RA_RA" - "outcome": "RA" - "ref": "Okada et al. 2014" - "ref_link": "https://www.ncbi.nlm.nih.gov/pubmed/24390342" - "type": "Immunity" - status": - "global_manhattan": "CREATING" - "quadrant_plot_status": "CREATING" - "worktable": "CREATING" - progress": - "progress": "0" - Ready: - value: - id: "bca9d414e0f9a67b9e0d2131a47c316c" - phenotypes: - - "consortium": "IHEC" - "data_link": "http://www.bloodcellgenetics.org" - "full_name": "Monocyte percentage of white cells" - "id": "z_IHEC_MONOP" - "outcome": "MONOP" - "ref": " Astle et al. 2016" - "ref_link": "https://www.ncbi.nlm.nih.gov/pubmed/27863262" - "type": "Cellular" - - "consortium": "RA" - "data_link": "http://plaza.umin.ac.jp/~yokada/datasource/software.htm" - "full_name": "Rheumatoid Arthritis" - "id": "z_RA_RA" - "outcome": "RA" - "ref": "Okada et al. 2014" - "ref_link": "https://www.ncbi.nlm.nih.gov/pubmed/24390342" - "type": "Immunity" - status": - "global_manhattan": "READY" - "quadrant_plot_status": "READY" - "worktable": "READY" - progress": - "progress": "100" - x-openapi-router-controller: jass.controllers.default_controller - - - "/local_project/{chromosome}/{start}/{end}": - post: - description: | - Create a new local project from a selection of phenotypes - operationId: local_project_post - parameters: - - name: chromosome - in: path - description: chromosome number - required: true - schema: - type: string - - name: start - in: path - description: start position of the region - required: true - schema: - type: string - - name: end - in: path - description: end position of the region - required: true - schema: - type: string - requestBody: - content: - application/x-www-form-urlencoded: - schema: - type: object - properties: - phenotypeID: - description: IDs of the phenotypes selected for the project - type: array - items: - type: string - required: - - phenotypeID - responses: - "200": - description: Project created - content: - "application/json": - schema: - $ref: "#/components/schemas/Phenotype" - x-openapi-router-controller: jass.controllers.default_controller - - "/projects/{projectID}": - get: - description: | - Retrieve a project definition - operationId: projects_project_idget - parameters: - - name: projectID - in: path - description: project ID - required: true - schema: - type: string - example: "bca9d414e0f9a67b9e0d2131a47c316c" - responses: - "200": - description: Retrieved project - content: - "application/json": - schema: - $ref: "#/components/schemas/Phenotype" - example: - id: "bca9d414e0f9a67b9e0d2131a47c316c" - status": - "global_manhattan": "READY" - "quadrant_plot_status": "READY" - "worktable": "READY" - progress": - "progress": "100" - x-openapi-router-controller: jass.controllers.default_controller - "/projects/{projectID}/summary": - get: - description: Retrieve summary statistics for a given project - operationId: projects_project_id_summary_statistics - parameters: - - name: projectID - in: path - description: project ID - required: true - schema: - type: string - example: "bca9d414e0f9a67b9e0d2131a47c316c" - responses: - "200": - description: Summary statistics in JSON - content: - "*/*": - schema: - type: string - title: Project summary statistics - example: - "JOSTSignif": - "NoPhenoSignif": 10 - "PhenoSignif": 210 - "NoJOSTSignif": - "NoPhenoSignif": 1470 - "PhenoSignif": 14 - x-openapi-router-controller: jass.controllers.default_controller - - "/projects/{projectID}/csv_status": - get: - description: | - Retrieve the generation status of the genome full csv file - operationId: projects_project_id_csv_status_get - parameters: - - name: projectID - in: path - description: project ID - required: true - schema: - type: string - example: "bca9d414e0f9a67b9e0d2131a47c316c" - responses: - "200": - description: | - Generation status of the genome full csv file - content: - text/csv; charset=utf-8: - schema: - type: string - title: csv_file_generation - example: | - READY - x-openapi-router-controller: jass.controllers.default_controller - - "/projects/{projectID}/genome": - get: - description: | - Retrieve whole genome summary data for a given project - operationId: projects_project_id_genome_get - parameters: - - name: projectID - in: path - description: project ID - required: true - schema: - type: string - example: "bca9d414e0f9a67b9e0d2131a47c316c" - responses: - "200": - description: | - Whole genome summary data in CSV format - content: - text/csv; charset=utf-8: - schema: - type: string - title: Genome data - example: | - Region,MiddlePosition,snp_ids,CHR,position,Ref_allele,Alt_allele,JOSTmin,UNIVARIATE_MIN_PVAL,signif_status,z_IHEC_MONOP,z_RA_RA - Region0,0.0,rs61997853,chr15,20001087,C,A,0.3085010612493116,0.300300000000003,None,-1.0357902654077036, - Region1,951595.0,rs28635530,chr1,1649392,T,C,4.038788020606384e-06,2.7559999999999873e-06,None,4.688213804974398,2.999976992703393 - Region2,2737671.5,rs72644697,chr1,2533605,A,G,2.4600636176583336e-10,1.6188949607934016e-10,Both,-1.4356568827860683,-6.393727818324495 - Region3,3981773.5,rs12145992,chr1,3760309,A,G,0.0002538976722549933,0.00026034845141981964,None,-1.6164363711150218,3.651859369008055 - Region4,5147352.0,rs2649072,chr1,5754088,G,A,0.0007863952492527496,0.0006378999999999985,None,3.4149658871961184,-2.999976992703393 - Region5,6580614.0,rs2986741,chr1,6548774,G,A,0.0013472918321710914,0.0011119999999999993,None,-3.260540717377886,2.726551316504396 - Region6,8306267.0,rs79412885,chr1,9241839,A,G,2.0889091093474285e-13,8.106999999999937e-14,Both,7.46857160133221,-1.2003588580308502 - Region7,10086091.5,rs113829298,chr1,10061038,T,C,4.3158209846991565e-05,6.135999999999996e-06,None,-4.5216481219798474,0.5100734569685951 - x-openapi-router-controller: jass.controllers.default_controller - - "/projects/{projectID}/genome_full": - get: - description: | - Retrieve whole genome complete (not summarized) data for a given project - operationId: projects_project_id_genome_full_get - parameters: - - name: projectID - in: path - description: project ID - required: true - schema: - type: string - example: "bca9d414e0f9a67b9e0d2131a47c316c" - responses: - "200": - description: > - Retrieve whole genome complete (not summarized) data for a given - project in CSV format - content: - text/csv; charset=utf-8: - schema: - type: string - title: Genome data - example: | - Region,CHR,snp_ids,position,Ref_allele,Alt_allele,MiddlePosition,JASS_PVAL,UNIVARIATE_MIN_PVAL,UNIVARIATE_MIN_QVAL,PLEIOTROPY_INDEX,z_IHEC_MONOP,z_RA_RA - 194218,0,6,rs530120680,63980,G,A,0.0,0.6556994470547299,0.6501999999999999,0.6501999999999999,,0.4534843639637209, - 194219,0,6,rs561313667,63979,T,C,0.0,0.538722344719537,0.5318,0.5318,,0.6252606526209208, - 194220,0,15,rs113772187,20000538,T,C,0.0,0.5218942699938458,0.5148000000000001,0.5148000000000001,,-0.651381887083556, - 194221,0,15,rs61997852,20001079,C,A,0.0,0.33819737748654505,0.33000000000000274,0.33000000000000274,,-0.9741138770593036, - 194222,0,15,rs61997853,20001087,C,A,0.0,0.3085010612493116,0.300300000000003,0.300300000000003,,-1.0357902654077036, - 867,1,1,rs10454464,1833495,A,G,951595.0,0.2523888759643953,0.19000000000000383,0.38000000000000766,,-0.9817967289175548,1.3105791121681174 - 4836,1,1,rs10907175,1130727,C,A,951595.0,0.3313846158840952,0.21180000000000354,0.4236000000000071,,1.2486311872236304,0.8380752842791193 - 4837,1,1,rs10907176,1130855,C,T,951595.0,0.3455642965805482,0.2251000000000033,0.4502000000000066,,1.213078000845954,0.8380752842791193 - 4838,1,1,rs10907185,1733219,A,G,951595.0,0.3919109214945312,0.25000000000000333,0.5000000000000067,,-0.7010048315295425,1.1503493803760003 - 4839,1,1,rs10907187,1759054,A,G,951595.0,0.16161974795348924,0.09099999999999998,0.18199999999999997,,-0.8114627085037827,1.6901461375274702 - 4840,1,1,rs10907188,1768894,T,C,951595.0,0.15366147518120524,0.08599999999999992,0.17199999999999985,,-0.8149516264832889,1.7168860184310413 - 4841,1,1,rs10907190,1773772,A,G,951595.0,0.12979175667585227,0.07999999999999982,0.15999999999999964,,-0.938281041511616,1.7506860712521708 - 4842,1,1,rs10907193,1805391,A,G,951595.0,0.09562672355608258,0.06299999999999988,0.12599999999999975,,-1.0405165049626888,1.8591914944718688 - 4843,1,1,rs10907194,1712230,T,C,951595.0,0.2669995168398967,0.16000000000000425,0.3200000000000085,,-0.7600913211933399,1.4050715603096189 - x-openapi-router-controller: jass.controllers.default_controller - "/projects/{projectID}/globalmanhattan": - get: - description: | - Retrieve global manhattan plot for a given project - operationId: projects_project_id_global_manhattan_plot_get - parameters: - - name: projectID - in: path - description: project ID - required: true - schema: - type: string - example: "bca9d414e0f9a67b9e0d2131a47c316c" - responses: - "200": - description: | - Global manhattan plot in PNG format - content: - image/png: - schema: - type: string - title: Global manhattan plot - example: - externalValue: 'globalmanhattan_example.png' - x-openapi-router-controller: jass.controllers.default_controller - "/projects/{projectID}/zoomplot": - get: - description: | - Retrieve zoom plot for a given local project - operationId: projects_project_id_zoom_plot_get - parameters: - - name: projectID - in: path - description: project ID - required: true - schema: - type: string - responses: - "200": - description: | - Zoom plot in PNG format - content: - image/png: - schema: - type: string - title: Zoom plot - x-openapi-router-controller: jass.controllers.default_controller - "/projects/{projectID}/quadrant": - get: - description: | - Retrieve quadrant plot for a given project - operationId: projects_project_id_quadrant_plot_get - parameters: - - name: projectID - in: path - description: project ID - required: true - schema: - type: string - example: "bca9d414e0f9a67b9e0d2131a47c316c" - responses: - "200": - description: | - Quadrant plot in PNG format - content: - image/png: - schema: - type: string - title: Quadrant plot - example: - externalValue: 'quadrant_example.png' - x-openapi-router-controller: jass.controllers.default_controller - "/projects/{projectID}/manhattan/{chromosome}/{region}": - get: - description: Retrieve local manhattan data - operationId: projects_project_id_local_manhattan_data_get - parameters: - - name: projectID - in: path - description: project ID - required: true - schema: - type: string - example: "bca9d414e0f9a67b9e0d2131a47c316c" - - name: chromosome - in: path - description: chromosome number - required: true - schema: - type: string - example: "chr1" - - name: region - in: path - description: region number - required: true - schema: - type: string - example: "Region1" - responses: - "200": - description: | - Local manhattan plot data in CSV format - content: - text/csv; charset=utf-8: - schema: - type: string - title: Local manhattan plot data - example: | - Region,CHR,position,snp_ids,JASS_PVAL - Region1,chr1,636285,rs545945172,0.7216577092326355 - Region1,chr1,662414,rs371628865,0.6725666758876788 - Region1,chr1,662622,rs61769339,0.405683282952658 - Region1,chr1,665266,rs539032812,0.3348008598497511 - Region1,chr1,693731,rs12238997,0.4952464035829177 - Region1,chr1,701835,rs189800799,0.666563580963709 - Region1,chr1,706778,rs113462541,0.861641963454806 - Region1,chr1,711310,rs200531508,0.07463266395489108 - Region1,chr1,714019,rs114983708,0.6667198743938074 - Region1,chr1,715265,rs12184267,0.6666055494294745 - Region1,chr1,715367,rs12184277,0.7657858702655146 - Region1,chr1,720381,rs116801199,0.6816390671665746 - Region1,chr1,723742,rs28375378,0.7124933618852456 - Region1,chr1,724324,rs28692873,0.9212425499680825 - Region1,chr1,725196,rs377099097,0.594983644175122 - Region1,chr1,725389,rs375619475,0.7032290172253173 - Region1,chr1,727841,rs116587930,0.9078685880041112 - x-openapi-router-controller: jass.controllers.default_controller - "/projects/{projectID}/heatmap/{chromosome}/{region}": - get: - description: Retrieve local heatmap data - operationId: projects_project_id_local_heatmap_data_get - parameters: - - name: projectID - in: path - description: project ID - required: true - schema: - type: string - example: "bca9d414e0f9a67b9e0d2131a47c316c" - - name: chromosome - in: path - description: chromosome number - required: true - schema: - type: string - example: "chr1" - - name: region - in: path - description: region number - required: true - schema: - type: string - example: "Region1" - responses: - "200": - description: | - Local manhattan plot data in CSV format - content: - text/csv; charset=utf-8: - schema: - type: string - title: Local heatmap plot data - example: | - ID,rs545945172,rs371628865,rs61769339,rs539032812,rs12238997,rs189800799 - z_IHEC_MONOP,-0.3623372836601329,-0.429856541533544,-0.8457360635272954,-0.9809852811227732,-0.6936527568935886,0.4382385293216385 - z_RA_RA,,,,,, - x-openapi-router-controller: jass.controllers.default_controller - "/projects/{projectID}/zoom_manhattan": - get: - description: Retrieve local manhattan data - operationId: projects_project_id_zoom_manhattan_data_get - parameters: - - name: projectID - in: path - description: project ID - required: true - schema: - type: string - example: "bca9d414e0f9a67b9e0d2131a47c316c" - responses: - "200": - description: | - Local manhattan plot data in CSV format - content: - text/csv; charset=utf-8: - schema: - type: string - title: Local manhattan plot data - example: | - Region,CHR,position,snp_ids,JASS_PVAL - Region1,chr1,636285,rs545945172,0.7216577092326355 - Region1,chr1,662414,rs371628865,0.6725666758876788 - Region1,chr1,662622,rs61769339,0.405683282952658 - Region1,chr1,665266,rs539032812,0.3348008598497511 - Region1,chr1,693731,rs12238997,0.4952464035829177 - Region1,chr1,701835,rs189800799,0.666563580963709 - Region1,chr1,706778,rs113462541,0.861641963454806 - Region1,chr1,711310,rs200531508,0.07463266395489108 - Region1,chr1,714019,rs114983708,0.6667198743938074 - Region1,chr1,715265,rs12184267,0.6666055494294745 - Region1,chr1,715367,rs12184277,0.7657858702655146 - Region1,chr1,720381,rs116801199,0.6816390671665746 - Region1,chr1,723742,rs28375378,0.7124933618852456 - Region1,chr1,724324,rs28692873,0.9212425499680825 - Region1,chr1,725196,rs377099097,0.594983644175122 - Region1,chr1,725389,rs375619475,0.7032290172253173 - Region1,chr1,727841,rs116587930,0.9078685880041112 - x-openapi-router-controller: jass.controllers.default_controller - "/projects/{projectID}/zoom_heatmap": - get: - description: Retrieve local heatmap data - operationId: projects_project_id_zoom_heatmap_data_get - parameters: - - name: projectID - in: path - description: project ID - required: true - schema: - type: string - example: "bca9d414e0f9a67b9e0d2131a47c316c" - responses: - "200": - description: | - Local manhattan plot data in CSV format - content: - text/csv; charset=utf-8: - schema: - type: string - title: Local heatmap plot data - example: | - ID,rs545945172,rs371628865,rs61769339,rs539032812,rs12238997,rs189800799 - z_IHEC_MONOP,-0.3623372836601329,-0.429856541533544,-0.8457360635272954,-0.9809852811227732,-0.6936527568935886,0.4382385293216385 - z_RA_RA,,,,,, - x-openapi-router-controller: jass.controllers.default_controller -components: - schemas: - Phenotype: - properties: - id: - type: string - consortium: - type: string - outcome: - type: string - full_name: - type: string - type: - type: string - ref: - type: string - ref_link: - type: string - data_link: - type: string - data_path: - type: string - Project: - properties: - id: - type: string - status: - type: string - progress: - type: string - outcome: - type: array - items: - $ref: "#/components/schemas/Phenotype" - +openapi: 3.0.0 +info: + version: 0.0.0 + title: JASS API Specification +paths: + /phenotypes: + get: + description: | + Gets the list of available phenotypes + operationId: phenotypes_get + responses: + "200": + description: List of the available phenotypes + content: + "application/json": + schema: + type: array + title: ArrayOfPhenotypes + items: + $ref: "#/components/schemas/Phenotype" + example: + - "consortium": "IHEC" + "data_link": "http://www.bloodcellgenetics.org" + "full_name": "Monocyte percentage of white cells" + "id": "z_IHEC_MONOP" + "outcome": "MONOP" + "ref": " Astle et al. 2016" + "ref_link": "https://www.ncbi.nlm.nih.gov/pubmed/27863262" + "type": "Cellular" + - "consortium": "RA" + "data_link": "http://plaza.umin.ac.jp/~yokada/datasource/software.htm" + "full_name": "Rheumatoid Arthritis" + "id": "z_RA_RA" + "outcome": "RA" + "ref": "Okada et al. 2014" + "ref_link": "https://www.ncbi.nlm.nih.gov/pubmed/24390342" + "type": "Immunity" + /projects: + post: + description: | + Create a new project from a selection of phenotypes + operationId: projects_post + requestBody: + content: + application/x-www-form-urlencoded: + schema: + type: object + properties: + phenotypeID: + description: IDs of the phenotypes selected for the project + type: array + items: + type: string + required: + - phenotypeID + example: + - z_IHEC_MONOP + - z_RA_RA + responses: + "200": + description: Project created + content: + "application/json": + schema: + $ref: "#/components/schemas/Phenotype" + examples: + Creating: + value: + id: "bca9d414e0f9a67b9e0d2131a47c316c" + phenotypes: + - "consortium": "IHEC" + "data_link": "http://www.bloodcellgenetics.org" + "full_name": "Monocyte percentage of white cells" + "id": "z_IHEC_MONOP" + "outcome": "MONOP" + "ref": " Astle et al. 2016" + "ref_link": "https://www.ncbi.nlm.nih.gov/pubmed/27863262" + "type": "Cellular" + - "consortium": "RA" + "data_link": "http://plaza.umin.ac.jp/~yokada/datasource/software.htm" + "full_name": "Rheumatoid Arthritis" + "id": "z_RA_RA" + "outcome": "RA" + "ref": "Okada et al. 2014" + "ref_link": "https://www.ncbi.nlm.nih.gov/pubmed/24390342" + "type": "Immunity" + status": + "global_manhattan": "CREATING" + "quadrant_plot_status": "CREATING" + "worktable": "CREATING" + progress": + "progress": "0" + Ready: + value: + id: "bca9d414e0f9a67b9e0d2131a47c316c" + phenotypes: + - "consortium": "IHEC" + "data_link": "http://www.bloodcellgenetics.org" + "full_name": "Monocyte percentage of white cells" + "id": "z_IHEC_MONOP" + "outcome": "MONOP" + "ref": " Astle et al. 2016" + "ref_link": "https://www.ncbi.nlm.nih.gov/pubmed/27863262" + "type": "Cellular" + - "consortium": "RA" + "data_link": "http://plaza.umin.ac.jp/~yokada/datasource/software.htm" + "full_name": "Rheumatoid Arthritis" + "id": "z_RA_RA" + "outcome": "RA" + "ref": "Okada et al. 2014" + "ref_link": "https://www.ncbi.nlm.nih.gov/pubmed/24390342" + "type": "Immunity" + status": + "global_manhattan": "READY" + "quadrant_plot_status": "READY" + "worktable": "READY" + progress": + "progress": "100" + /local_project: + post: + description: | + Create a new local project from a selection of phenotypes + operationId: local_project_post + requestBody: + content: + application/x-www-form-urlencoded: + schema: + type: object + properties: + phenotypeID: + description: IDs of the phenotypes selected for the project + type: array + items: + type: string + chromosome: + description: chromosome number + type: string + start: + description: start position of the region + type: string + end: + description: end position of the region + type: string + required: + - phenotypeID + - chromosome + - start + - end + responses: + "200": + description: Project created + content: + "application/json": + schema: + $ref: "#/components/schemas/Phenotype" + "/projects/{projectID}": + get: + description: | + Retrieve a project definition + operationId: projects_project_idget + parameters: + - name: projectID + in: path + description: project ID + required: true + schema: + type: string + example: "bca9d414e0f9a67b9e0d2131a47c316c" + responses: + "200": + description: Retrieved project + content: + "application/json": + schema: + $ref: "#/components/schemas/Phenotype" + example: + id: "bca9d414e0f9a67b9e0d2131a47c316c" + status": + "global_manhattan": "READY" + "quadrant_plot_status": "READY" + "worktable": "READY" + progress": + "progress": "100" + "/projects/{projectID}/summary": + get: + description: Retrieve summary statistics for a given project + operationId: projects_project_id_summary_statistics + parameters: + - name: projectID + in: path + description: project ID + required: true + schema: + type: string + example: "bca9d414e0f9a67b9e0d2131a47c316c" + responses: + "200": + description: Summary statistics in JSON + content: + "*/*": + schema: + type: string + title: Project summary statistics + example: + "JOSTSignif": + "NoPhenoSignif": 10 + "PhenoSignif": 210 + "NoJOSTSignif": + "NoPhenoSignif": 1470 + "PhenoSignif": 14 + "/projects/{projectID}/csv_status": + get: + description: | + Retrieve the generation status of the genome full csv file + operationId: projects_project_id_csv_status_get + parameters: + - name: projectID + in: path + description: project ID + required: true + schema: + type: string + example: "bca9d414e0f9a67b9e0d2131a47c316c" + responses: + "200": + description: | + Generation status of the genome full csv file + content: + text/csv; charset=utf-8: + schema: + type: string + title: csv_file_generation + example: | + READY + "/projects/{projectID}/genome": + get: + description: | + Retrieve whole genome summary data for a given project + operationId: projects_project_id_genome_get + parameters: + - name: projectID + in: path + description: project ID + required: true + schema: + type: string + example: "bca9d414e0f9a67b9e0d2131a47c316c" + responses: + "200": + description: | + Whole genome summary data in CSV format + content: + text/csv; charset=utf-8: + schema: + type: string + title: Genome data + example: | + Region,MiddlePosition,snp_ids,CHR,position,Ref_allele,Alt_allele,JOSTmin,UNIVARIATE_MIN_PVAL,signif_status,z_IHEC_MONOP,z_RA_RA + Region0,0.0,rs61997853,chr15,20001087,C,A,0.3085010612493116,0.300300000000003,None,-1.0357902654077036, + Region1,951595.0,rs28635530,chr1,1649392,T,C,4.038788020606384e-06,2.7559999999999873e-06,None,4.688213804974398,2.999976992703393 + Region2,2737671.5,rs72644697,chr1,2533605,A,G,2.4600636176583336e-10,1.6188949607934016e-10,Both,-1.4356568827860683,-6.393727818324495 + Region3,3981773.5,rs12145992,chr1,3760309,A,G,0.0002538976722549933,0.00026034845141981964,None,-1.6164363711150218,3.651859369008055 + Region4,5147352.0,rs2649072,chr1,5754088,G,A,0.0007863952492527496,0.0006378999999999985,None,3.4149658871961184,-2.999976992703393 + Region5,6580614.0,rs2986741,chr1,6548774,G,A,0.0013472918321710914,0.0011119999999999993,None,-3.260540717377886,2.726551316504396 + Region6,8306267.0,rs79412885,chr1,9241839,A,G,2.0889091093474285e-13,8.106999999999937e-14,Both,7.46857160133221,-1.2003588580308502 + Region7,10086091.5,rs113829298,chr1,10061038,T,C,4.3158209846991565e-05,6.135999999999996e-06,None,-4.5216481219798474,0.5100734569685951 + "/projects/{projectID}/genome_full": + get: + description: | + Retrieve whole genome complete (not summarized) data for a given project + operationId: projects_project_id_genome_full_get + parameters: + - name: projectID + in: path + description: project ID + required: true + schema: + type: string + example: "bca9d414e0f9a67b9e0d2131a47c316c" + responses: + "200": + description: > + Retrieve whole genome complete (not summarized) data for a given + project in CSV format + content: + text/csv; charset=utf-8: + schema: + type: string + title: Genome data + example: | + Region,CHR,snp_ids,position,Ref_allele,Alt_allele,MiddlePosition,JASS_PVAL,UNIVARIATE_MIN_PVAL,UNIVARIATE_MIN_QVAL,PLEIOTROPY_INDEX,z_IHEC_MONOP,z_RA_RA + 194218,0,6,rs530120680,63980,G,A,0.0,0.6556994470547299,0.6501999999999999,0.6501999999999999,,0.4534843639637209, + 194219,0,6,rs561313667,63979,T,C,0.0,0.538722344719537,0.5318,0.5318,,0.6252606526209208, + 194220,0,15,rs113772187,20000538,T,C,0.0,0.5218942699938458,0.5148000000000001,0.5148000000000001,,-0.651381887083556, + 194221,0,15,rs61997852,20001079,C,A,0.0,0.33819737748654505,0.33000000000000274,0.33000000000000274,,-0.9741138770593036, + 194222,0,15,rs61997853,20001087,C,A,0.0,0.3085010612493116,0.300300000000003,0.300300000000003,,-1.0357902654077036, + 867,1,1,rs10454464,1833495,A,G,951595.0,0.2523888759643953,0.19000000000000383,0.38000000000000766,,-0.9817967289175548,1.3105791121681174 + 4836,1,1,rs10907175,1130727,C,A,951595.0,0.3313846158840952,0.21180000000000354,0.4236000000000071,,1.2486311872236304,0.8380752842791193 + 4837,1,1,rs10907176,1130855,C,T,951595.0,0.3455642965805482,0.2251000000000033,0.4502000000000066,,1.213078000845954,0.8380752842791193 + 4838,1,1,rs10907185,1733219,A,G,951595.0,0.3919109214945312,0.25000000000000333,0.5000000000000067,,-0.7010048315295425,1.1503493803760003 + 4839,1,1,rs10907187,1759054,A,G,951595.0,0.16161974795348924,0.09099999999999998,0.18199999999999997,,-0.8114627085037827,1.6901461375274702 + 4840,1,1,rs10907188,1768894,T,C,951595.0,0.15366147518120524,0.08599999999999992,0.17199999999999985,,-0.8149516264832889,1.7168860184310413 + 4841,1,1,rs10907190,1773772,A,G,951595.0,0.12979175667585227,0.07999999999999982,0.15999999999999964,,-0.938281041511616,1.7506860712521708 + 4842,1,1,rs10907193,1805391,A,G,951595.0,0.09562672355608258,0.06299999999999988,0.12599999999999975,,-1.0405165049626888,1.8591914944718688 + 4843,1,1,rs10907194,1712230,T,C,951595.0,0.2669995168398967,0.16000000000000425,0.3200000000000085,,-0.7600913211933399,1.4050715603096189 + "/projects/{projectID}/globalmanhattan": + get: + description: | + Retrieve global manhattan plot for a given project + operationId: projects_project_id_global_manhattan_plot_get + parameters: + - name: projectID + in: path + description: project ID + required: true + schema: + type: string + example: "bca9d414e0f9a67b9e0d2131a47c316c" + responses: + "200": + description: | + Global manhattan plot in PNG format + content: + image/png: + schema: + type: string + title: Global manhattan plot + example: + externalValue: "globalmanhattan_example.png" + "/projects/{projectID}/zoomplot": + get: + description: | + Retrieve zoom plot for a given local project + operationId: projects_project_id_zoom_plot_get + parameters: + - name: projectID + in: path + description: project ID + required: true + schema: + type: string + responses: + "200": + description: | + Zoom plot in PNG format + content: + image/png: + schema: + type: string + title: Zoom plot + "/projects/{projectID}/quadrant": + get: + description: | + Retrieve quadrant plot for a given project + operationId: projects_project_id_quadrant_plot_get + parameters: + - name: projectID + in: path + description: project ID + required: true + schema: + type: string + example: "bca9d414e0f9a67b9e0d2131a47c316c" + responses: + "200": + description: | + Quadrant plot in PNG format + content: + image/png: + schema: + type: string + title: Quadrant plot + example: + externalValue: "quadrant_example.png" + "/projects/{projectID}/manhattan/{chromosome}/{region}": + get: + description: Retrieve local manhattan data + operationId: projects_project_id_local_manhattan_data_get + parameters: + - name: projectID + in: path + description: project ID + required: true + schema: + type: string + example: "bca9d414e0f9a67b9e0d2131a47c316c" + - name: chromosome + in: path + description: chromosome number + required: true + schema: + type: string + example: "chr1" + - name: region + in: path + description: region number + required: true + schema: + type: string + example: "Region1" + responses: + "200": + description: | + Local manhattan plot data in CSV format + content: + text/csv; charset=utf-8: + schema: + type: string + title: Local manhattan plot data + example: | + Region,CHR,position,snp_ids,JASS_PVAL + Region1,chr1,636285,rs545945172,0.7216577092326355 + Region1,chr1,662414,rs371628865,0.6725666758876788 + Region1,chr1,662622,rs61769339,0.405683282952658 + Region1,chr1,665266,rs539032812,0.3348008598497511 + Region1,chr1,693731,rs12238997,0.4952464035829177 + Region1,chr1,701835,rs189800799,0.666563580963709 + Region1,chr1,706778,rs113462541,0.861641963454806 + Region1,chr1,711310,rs200531508,0.07463266395489108 + Region1,chr1,714019,rs114983708,0.6667198743938074 + Region1,chr1,715265,rs12184267,0.6666055494294745 + Region1,chr1,715367,rs12184277,0.7657858702655146 + Region1,chr1,720381,rs116801199,0.6816390671665746 + Region1,chr1,723742,rs28375378,0.7124933618852456 + Region1,chr1,724324,rs28692873,0.9212425499680825 + Region1,chr1,725196,rs377099097,0.594983644175122 + Region1,chr1,725389,rs375619475,0.7032290172253173 + Region1,chr1,727841,rs116587930,0.9078685880041112 + "/projects/{projectID}/heatmap/{chromosome}/{region}": + get: + description: Retrieve local heatmap data + operationId: projects_project_id_local_heatmap_data_get + parameters: + - name: projectID + in: path + description: project ID + required: true + schema: + type: string + example: "bca9d414e0f9a67b9e0d2131a47c316c" + - name: chromosome + in: path + description: chromosome number + required: true + schema: + type: string + example: "chr1" + - name: region + in: path + description: region number + required: true + schema: + type: string + example: "Region1" + responses: + "200": + description: | + Local manhattan plot data in CSV format + content: + text/csv; charset=utf-8: + schema: + type: string + title: Local heatmap plot data + example: | + ID,rs545945172,rs371628865,rs61769339,rs539032812,rs12238997,rs189800799 + z_IHEC_MONOP,-0.3623372836601329,-0.429856541533544,-0.8457360635272954,-0.9809852811227732,-0.6936527568935886,0.4382385293216385 + z_RA_RA,,,,,, + "/projects/{projectID}/zoom_manhattan": + get: + description: Retrieve local manhattan data + operationId: projects_project_id_zoom_manhattan_data_get + parameters: + - name: projectID + in: path + description: project ID + required: true + schema: + type: string + example: "bca9d414e0f9a67b9e0d2131a47c316c" + responses: + "200": + description: | + Local manhattan plot data in CSV format + content: + text/csv; charset=utf-8: + schema: + type: string + title: Local manhattan plot data + example: | + Region,CHR,position,snp_ids,JASS_PVAL + Region1,chr1,636285,rs545945172,0.7216577092326355 + Region1,chr1,662414,rs371628865,0.6725666758876788 + Region1,chr1,662622,rs61769339,0.405683282952658 + Region1,chr1,665266,rs539032812,0.3348008598497511 + Region1,chr1,693731,rs12238997,0.4952464035829177 + Region1,chr1,701835,rs189800799,0.666563580963709 + Region1,chr1,706778,rs113462541,0.861641963454806 + Region1,chr1,711310,rs200531508,0.07463266395489108 + Region1,chr1,714019,rs114983708,0.6667198743938074 + Region1,chr1,715265,rs12184267,0.6666055494294745 + Region1,chr1,715367,rs12184277,0.7657858702655146 + Region1,chr1,720381,rs116801199,0.6816390671665746 + Region1,chr1,723742,rs28375378,0.7124933618852456 + Region1,chr1,724324,rs28692873,0.9212425499680825 + Region1,chr1,725196,rs377099097,0.594983644175122 + Region1,chr1,725389,rs375619475,0.7032290172253173 + Region1,chr1,727841,rs116587930,0.9078685880041112 + "/projects/{projectID}/zoom_heatmap": + get: + description: Retrieve local heatmap data + operationId: projects_project_id_zoom_heatmap_data_get + parameters: + - name: projectID + in: path + description: project ID + required: true + schema: + type: string + example: "bca9d414e0f9a67b9e0d2131a47c316c" + responses: + "200": + description: | + Local manhattan plot data in CSV format + content: + text/csv; charset=utf-8: + schema: + type: string + title: Local heatmap plot data + example: | + ID,rs545945172,rs371628865,rs61769339,rs539032812,rs12238997,rs189800799 + z_IHEC_MONOP,-0.3623372836601329,-0.429856541533544,-0.8457360635272954,-0.9809852811227732,-0.6936527568935886,0.4382385293216385 + z_RA_RA,,,,,, +components: + schemas: + Phenotype: + properties: + id: + type: string + consortium: + type: string + outcome: + type: string + full_name: + type: string + type: + type: string + ref: + type: string + ref_link: + type: string + data_link: + type: string + data_path: + type: string + Project: + properties: + id: + type: string + status: + type: string + progress: + type: string + outcome: + type: array + items: + $ref: "#/components/schemas/Phenotype" diff --git a/jass/tasks.py b/jass/tasks.py index dede914577cb4a755bbb35f2b9bc1e840c89d504..a17a8bcae4dc1be36545778c9e58d6ce16ffa671 100644 --- a/jass/tasks.py +++ b/jass/tasks.py @@ -9,24 +9,25 @@ from flask import Flask import jass.models.project from jass.models.project import Project, get_file_building_tb_path -from jass.models.plots import (create_global_plot, - create_local_plot, - create_quadrant_plot, - create_qq_plot) +from jass.models.plots import ( + create_global_plot, + create_local_plot, + create_quadrant_plot, + create_qq_plot, +) + +from jass.models.worktable import create_worktable_file, create_genome_full_csv -from jass.models.worktable import (create_worktable_file, - create_genome_full_csv) - from jass.models.phenotype import Phenotype from jass.config import config def make_celery(app): celery = Celery() - if 'CELERY_CONFIG_MODULE' in os.environ: - celery.config_from_envvar('CELERY_CONFIG_MODULE') + if "CELERY_CONFIG_MODULE" in os.environ: + celery.config_from_envvar("CELERY_CONFIG_MODULE") else: - celery.config_from_object('jass.celeryconfig') + celery.config_from_object("jass.celeryconfig") celery.conf.update(app.config) TaskBase = celery.Task @@ -41,45 +42,46 @@ def make_celery(app): return celery - flask_app = Flask(__name__) celery = make_celery(flask_app) + @celery.task def create_project_worktable_file( - phenotype_ids, - init_file_path, - project_hdf_path, - remove_nan, - stat, - optim_na, - csv_file, - chunk_size, - significance_treshold, - post_filtering, - delayed_gen_csv_file, - chromosome, - pos_Start, - pos_End, - custom_loadings): + phenotype_ids, + init_file_path, + project_hdf_path, + remove_nan, + stat, + optim_na, + csv_file, + chunk_size, + significance_treshold, + post_filtering, + delayed_gen_csv_file, + chromosome, + pos_Start, + pos_End, + custom_loadings, +): try: return create_worktable_file( - phenotype_ids = phenotype_ids, - init_file_path = init_file_path, - project_hdf_path = project_hdf_path, - remove_nan = remove_nan, - stat = stat, - optim_na = True, - csv_file = csv_file, - chunk_size = chunk_size, - significance_treshold = significance_treshold, - post_filtering = post_filtering, - delayed_gen_csv_file = delayed_gen_csv_file, - chromosome = chromosome, - pos_Start = pos_Start, - pos_End = pos_End, - custom_loadings = custom_loadings - ) + phenotype_ids=phenotype_ids, + init_file_path=init_file_path, + project_hdf_path=project_hdf_path, + remove_nan=remove_nan, + stat=stat, + optim_na=True, + csv_file=csv_file, + chunk_size=chunk_size, + significance_treshold=significance_treshold, + post_filtering=post_filtering, + delayed_gen_csv_file=delayed_gen_csv_file, + chromosome=chromosome, + pos_Start=pos_Start, + pos_End=pos_End, + custom_loadings=custom_loadings, + ) except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() log_path = get_file_building_tb_path(project_hdf_path) @@ -147,77 +149,78 @@ def create_project_csv_file(Nchunk, worktable_path, csv_file_path): traceback.print_exception(exc_type, exc_value, exc_traceback, file=log_fh) log_fh.close() + def launch_create_project( - phenotype_ids, - init_table_path, + phenotype_ids, + init_table_path, worktable_path, - remove_nan = False, - stat = "jass.models.stats:omnibus_stat", - csv_file = None, - chunk_size = 50, - significance_treshold = 5*10**-8, - post_filtering = True, - delayed_gen_csv_file = False, - chromosome = None, - start = None, - end = None, - custom_loadings = None, - global_plot_path = None, - quadrant_plot_path = None, - zoom_plot_path = None, - qq_plot_path = None - ): + remove_nan=False, + stat="jass.models.stats:omnibus_stat", + csv_file=None, + chunk_size=50, + significance_treshold=5 * 10 ** -8, + post_filtering=True, + delayed_gen_csv_file=False, + chromosome=None, + start=None, + end=None, + custom_loadings=None, + global_plot_path=None, + quadrant_plot_path=None, + zoom_plot_path=None, + qq_plot_path=None, +): post_worktable_jobs = [] - if (global_plot_path is not None): + if global_plot_path is not None: post_worktable_jobs.append( create_project_global_plot.si(worktable_path, global_plot_path) ) - if (quadrant_plot_path is not None): + if quadrant_plot_path is not None: post_worktable_jobs.append( create_project_quadrant_plot.si(worktable_path, quadrant_plot_path), ) - if (zoom_plot_path is not None): + if zoom_plot_path is not None: post_worktable_jobs.append( create_project_zoom_plot.si(worktable_path, zoom_plot_path) ) - if (qq_plot_path is not None): + if qq_plot_path is not None: post_worktable_jobs.append( create_project_qq_plot.si(worktable_path, qq_plot_path) ) - if (delayed_gen_csv_file and (csv_file is not None)): - post_worktable_jobs.append( - create_project_csv_file.s(worktable_path, csv_file) - ) + if delayed_gen_csv_file and (csv_file is not None): + post_worktable_jobs.append(create_project_csv_file.s(worktable_path, csv_file)) post_worktable_tasks_group = group(post_worktable_jobs) - main_wf = chain(create_project_worktable_file.si( - phenotype_ids = phenotype_ids, - init_file_path = init_table_path, - project_hdf_path = worktable_path, - remove_nan = remove_nan, - stat = stat, - optim_na = True, - csv_file = csv_file, - chunk_size = chunk_size, - significance_treshold = significance_treshold, - post_filtering = post_filtering, - delayed_gen_csv_file = delayed_gen_csv_file, - chromosome = chromosome, - pos_Start = start, - pos_End = end, - custom_loadings = custom_loadings - ), - post_worktable_tasks_group) + main_wf = chain( + create_project_worktable_file.si( + phenotype_ids=phenotype_ids, + init_file_path=init_table_path, + project_hdf_path=worktable_path, + remove_nan=remove_nan, + stat=stat, + optim_na=True, + csv_file=csv_file, + chunk_size=chunk_size, + significance_treshold=significance_treshold, + post_filtering=post_filtering, + delayed_gen_csv_file=delayed_gen_csv_file, + chromosome=chromosome, + pos_Start=start, + pos_End=end, + custom_loadings=custom_loadings, + ), + post_worktable_tasks_group, + ) main_wf.delay() def create_project( - phenotype_ids: List[str], + phenotype_ids: List[str], available_phenotypes: List[Phenotype], chromosome: str = None, start: str = None, - end: str = None - ): - + end: str = None, +): + available_phenotype_ids = [phenotype.id for phenotype in available_phenotypes] unavailable_requested_ids = set(phenotype_ids).difference( set(available_phenotype_ids) @@ -227,12 +230,11 @@ def create_project( phenotypes = [ phenotype for phenotype in available_phenotypes if phenotype.id in phenotype_ids ] - project = Project(phenotypes = phenotypes, - chromosome = chromosome, - start = start, - end = end) - - if (project.get_type_of_analysis() == Project.LOCAL_ANALYSIS): + project = Project( + phenotypes=phenotypes, chromosome=chromosome, start=start, end=end + ) + + if project.get_type_of_analysis() == Project.LOCAL_ANALYSIS: # Local Analysis global_plot_path = None quadrant_plot_path = None @@ -244,21 +246,21 @@ def create_project( quadrant_plot_path = project.get_quadrant_plot_path() zoom_plot_path = None delayed_gen_csv_file = True - + # if project does not exist if project.status == Project.DOES_NOT_EXIST: os.makedirs(project.get_folder_path()) launch_create_project( - phenotype_ids = phenotype_ids, - init_table_path = os.path.join(config["DATA_DIR"], "initTable.hdf5"), - worktable_path = project.get_worktable_path(), - csv_file = project.get_csv_path(), - global_plot_path = global_plot_path, - quadrant_plot_path = quadrant_plot_path, - zoom_plot_path = zoom_plot_path, - delayed_gen_csv_file = delayed_gen_csv_file, - chromosome = chromosome, - start = start, - end = end + phenotype_ids=phenotype_ids, + init_table_path=os.path.join(config["DATA_DIR"], "initTable.hdf5"), + worktable_path=project.get_worktable_path(), + csv_file=project.get_csv_path(), + global_plot_path=global_plot_path, + quadrant_plot_path=quadrant_plot_path, + zoom_plot_path=zoom_plot_path, + delayed_gen_csv_file=delayed_gen_csv_file, + chromosome=chromosome, + start=start, + end=end, ) - return project \ No newline at end of file + return project diff --git a/jass/test/__init__.py b/jass/test/__init__.py index 86f967d82f02120c4f61393f690934c58972ad34..5dae7a40b36352ec4c95c25fbbf67ca6a7c985fd 100644 --- a/jass/test/__init__.py +++ b/jass/test/__init__.py @@ -1,12 +1,21 @@ import unittest -from ..encoder import JSONEncoder import logging import os, shutil, tempfile import unittest -import connexion + import flask_testing + +# replace delay() and si() method with mocks +# to avoid freezing calls in unit tests +from celery.app.task import Task +from unittest.mock import MagicMock + +Task.delay = MagicMock() +Task.si = MagicMock() + +from jass.server import jass_app from jass.models.inittable import create_inittable_file @@ -20,16 +29,17 @@ class JassTestCase(unittest.TestCase): class JassFlaskTestCase(JassTestCase, flask_testing.TestCase): def create_app(self): - logging.getLogger("connexion.operation").setLevel("ERROR") from jass.config import config self.test_dir = tempfile.mkdtemp() config["DATA_DIR"] = self.test_dir shutil.copy(self.get_file_path_fn("initTable.hdf5"), self.test_dir) - app = connexion.App(__name__, specification_dir="../swagger/") - app.app.json_encoder = JSONEncoder - app.add_api("swagger.yaml") - return app.app + + self.jass_app = jass_app + application = self.jass_app.create_app() + application.config["TESTING"] = True + self.testing_client = application.test_client() + return application def tearDown(self): shutil.rmtree(self.test_dir) diff --git a/jass/test/test_default_controller.py b/jass/test/test_default_controller.py deleted file mode 100644 index 839f48375abe67b5f4acc4fd5e9623ca0790f032..0000000000000000000000000000000000000000 --- a/jass/test/test_default_controller.py +++ /dev/null @@ -1,31 +0,0 @@ -# coding: utf-8 - -from __future__ import absolute_import -import os, shutil, tempfile - -from six import BytesIO -from flask import json - -from jass.config import config -from . import JassFlaskTestCase - - -class TestDefaultController(JassFlaskTestCase): - """ DefaultController integration test stubs """ - - test_folder = "data_test1" - - def test_phenotypes_get(self): - """ - Test case for phenotypes_get - - - """ - response = self.client.open("/phenotypes", method="GET") - self.assert200(response, "Response body is : " + response.data.decode("utf-8")) - - -if __name__ == "__main__": - import unittest - - unittest.main() diff --git a/jass/test/test_init_table.py b/jass/test/test_init_table.py index 5df8b9abe7b6a08de3031642a0952bd99ec925c5..d0a428f2a435095861a9aad5f20712b7cbe96eb5 100644 --- a/jass/test/test_init_table.py +++ b/jass/test/test_init_table.py @@ -40,20 +40,24 @@ class TestInitTable(object): def tearDown(self): # Remove the directory after the test - #shutil.rmtree(self.test_dir) + # shutil.rmtree(self.test_dir) print(self.test_dir) def test_compare_phenolist(self): """ Compare result and expected PhenoList """ - assert_frame_equal(self.expected_phenolist, self.result_phenolist, check_like=True) + assert_frame_equal( + self.expected_phenolist, self.result_phenolist, check_like=True + ) def test_compare_sumstattab(self): """ Compare result and expected SumStatTab """ - assert_frame_equal(self.expected_sum_stat_tab, self.result_sum_stat_tab, check_like=True) + assert_frame_equal( + self.expected_sum_stat_tab, self.result_sum_stat_tab, check_like=True + ) def test_compare_cov(self): """ diff --git a/jass/test/test_plots.py b/jass/test/test_plots.py index 9bdbdf0fb7dd42ff4e24493dd763c2ae0a1ad3f9..7748ccbabb60dfe47cb3a3e260d4af3fda9fd3c3 100644 --- a/jass/test/test_plots.py +++ b/jass/test/test_plots.py @@ -32,6 +32,7 @@ class TestPlots(JassTestCase): """ create_global_plot(self.worktable_hdf_path, self.global_plot_path) + if __name__ == "__main__": import unittest diff --git a/jass/test/test_server.py b/jass/test/test_server.py new file mode 100644 index 0000000000000000000000000000000000000000..8547712218608cbc2abb5ba1ae6236c7197870c5 --- /dev/null +++ b/jass/test/test_server.py @@ -0,0 +1,40 @@ +# coding: utf-8 + +from __future__ import absolute_import +import os, shutil, tempfile + +from six import BytesIO +from flask import json, url_for + +from jass.config import config +from . import JassFlaskTestCase + + +class TestDefaultController(JassFlaskTestCase): + """DefaultController integration test stubs""" + + test_folder = "data_test1" + + def test_phenotypes_get(self): + """ + Test case retrieving available phenotypes + """ + response = self.testing_client.open("/api/phenotypes", method="GET") + self.assert200(response, "Response body is : " + response.data.decode("utf-8")) + + def test_create_project(self): + """ + Test case for creating a project + """ + response = self.testing_client.open( + "/api/projects", + method="POST", + data={"phenotypeID": "z_IHEC_MONOP,z_RA_RA"}, + ) + self.assert200(response, "Response body is : " + response.data.decode("utf-8")) + + +if __name__ == "__main__": + import unittest + + unittest.main() diff --git a/jass/test/test_worktable.py b/jass/test/test_worktable.py index bed2941a036ee945616cf08f1e4b5074dcde4758..5e9b3ce23e2e7802c6fa73754a52ed13b72ba676 100644 --- a/jass/test/test_worktable.py +++ b/jass/test/test_worktable.py @@ -47,19 +47,25 @@ class TestWorkTable(object): """ Compare result and expected SumStatJostTab """ - assert_frame_equal(self.expected_sumstatjosttab, self.result_sumstatjosttab, check_like=True) + assert_frame_equal( + self.expected_sumstatjosttab, self.result_sumstatjosttab, check_like=True + ) def test_compare_regionsubtable(self): """ Compare result and expected RegionSubTable """ - assert_frame_equal(self.expected_regionsubtable, self.result_regionsubtable, check_like=True) + assert_frame_equal( + self.expected_regionsubtable, self.result_regionsubtable, check_like=True + ) def test_compare_summarytable(self): """ Compare result and expected SummaryTable """ - assert_frame_equal(self.expected_summarytable, self.result_summarytable, check_like=True) + assert_frame_equal( + self.expected_summarytable, self.result_summarytable, check_like=True + ) def test_compare_subcov(self): """ @@ -71,8 +77,9 @@ class TestWorkTable(object): """ Compare result and expected PhenoList """ - assert_frame_equal(self.expected_phenolist, self.result_phenolist, check_like=True) - + assert_frame_equal( + self.expected_phenolist, self.result_phenolist, check_like=True + ) phenotypes_disney = ["z_DISNEY_POCAHONT", "z_DISNEY_RATATOUY"] diff --git a/jass/util.py b/jass/util.py index 4e0189d061b34135f812df61a1365fa1bb6d666c..6c0445b46bc57465be84d8a9a1c2ed98c85a51d3 100644 --- a/jass/util.py +++ b/jass/util.py @@ -68,6 +68,7 @@ def deserialize_date(string): """ try: from dateutil.parser import parse + return parse(string).date() except ImportError: return string @@ -85,6 +86,7 @@ def deserialize_datetime(string): """ try: from dateutil.parser import parse + return parse(string) except ImportError: return string @@ -104,9 +106,11 @@ def deserialize_model(data, klass): return data for attr, attr_type in six.iteritems(instance.swagger_types): - if data is not None \ - and instance.attribute_map[attr] in data \ - and isinstance(data, (list, dict)): + if ( + data is not None + and instance.attribute_map[attr] in data + and isinstance(data, (list, dict)) + ): value = data[instance.attribute_map[attr]] setattr(instance, attr, _deserialize(value, attr_type)) @@ -123,8 +127,7 @@ def _deserialize_list(data, boxed_type): :return: deserialized list. :rtype: list """ - return [_deserialize(sub_data, boxed_type) - for sub_data in data] + return [_deserialize(sub_data, boxed_type) for sub_data in data] def _deserialize_dict(data, boxed_type): @@ -137,5 +140,4 @@ def _deserialize_dict(data, boxed_type): :return: deserialized dict. :rtype: dict """ - return {k: _deserialize(v, boxed_type) - for k, v in six.iteritems(data)} \ No newline at end of file + return {k: _deserialize(v, boxed_type) for k, v in six.iteritems(data)} diff --git a/requirements.txt b/requirements.txt index 25400d93c0388519cea98149d04e2cacbade82a3..f6fc215883a802969f2cf7bcf01d9f501d150a46 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -git+https://github.com/hmenager/connexion.git@master#egg=connexion[swagger-ui] +flask-smorest aiohttp python_dateutil setuptools diff --git a/setup.py b/setup.py index 19675e52aa5d51210c488a360066df4de0043581..67e274e329c3082dabf0228d1e82f7e5dd93eea1 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ README = os.path.join(SETUP_DIR, 'README.md') readme = open(README).read() REQUIRES = [ - "connexion[swagger-ui] @ git+https://github.com/hmenager/connexion.git@master#egg=connexion[swagger-ui]", + "flask-smorest", "aiohttp", "python_dateutil", "setuptools",