diff --git a/Dockerfile b/Dockerfile index a861df27ca4c8544c1ecd482543794baad34491e..4f837065c9c7ecb64aae692a9550ebb51c17c3a5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,8 +2,9 @@ FROM centos:7 RUN yum install -y epel-release wget gcc https://repo.ius.io/ius-release-el7.rpm RUN yum update -y -RUN yum install -y python35u python35u-libs python35u-devel python35u-pip openssl-devel libffi-devel -RUN pip3.5 install ansible +RUN yum install -y python36u python36u-libs python36u-devel python36u-pip openssl-devel libffi-devel +RUN pip3.6 install --upgrade pip +RUN pip3.6 install ansible RUN yum install -y python-pip RUN python -m pip install --upgrade pip COPY . /code diff --git a/jass/__init__.py b/jass/__init__.py index e27073c7f61922fed66faf175167e9ddc9d6ff8c..a88eb7abf651d44a7e45578285661a85262703a3 100644 --- a/jass/__init__.py +++ b/jass/__init__.py @@ -17,4 +17,4 @@ Submodules """ import os -from jass.tasks import celery \ No newline at end of file +from jass.tasks import celery diff --git a/jass/__main__.py b/jass/__main__.py index c3d79337ae16503bfde99f52b7bb713acebe546b..9bd45225639dd9733cc201ba192250d0b41e0685 100644 --- a/jass/__main__.py +++ b/jass/__main__.py @@ -5,59 +5,66 @@ import os import sys import argparse -from jass.server import get_jass_app +from jass.server import jass_app from jass.config import config from jass.models.phenotype import get_available_phenotypes from jass.models.inittable import create_inittable_file, add_gene_annotation from jass.models.worktable import create_worktable_file -from jass.models.plots import ( create_global_plot, - create_quadrant_plot, - create_local_plot, - create_qq_plot) +from jass.models.plots import ( + create_global_plot, + create_quadrant_plot, + create_local_plot, + create_qq_plot, +) -def absolute_path_of_the_file(fileName, output_file = False): + +def absolute_path_of_the_file(fileName, output_file=False): """ Builds the absolute path of the file : fileName This makes the execution of JASS functions more robust and flexible """ - + # Build an absolute path if possible absoluteFilePath = os.path.abspath(fileName) # Test if the file name is a pattern - is_a_pattern = (os.path.basename(fileName).find("*") > -1) + is_a_pattern = os.path.basename(fileName).find("*") > -1 - if (is_a_pattern or output_file) : + if is_a_pattern or output_file: # Test if the directory path exist Directory_path_exist = os.path.exists(os.path.dirname(absoluteFilePath)) - if (Directory_path_exist == False): + if Directory_path_exist == False: # Test the path using the Jass data directory - absoluteFilePath = os.path.normpath(os.path.join(config["DATA_DIR"], fileName)) + absoluteFilePath = os.path.normpath( + os.path.join(config["DATA_DIR"], fileName) + ) Directory_path_exist = os.path.exists(os.path.dirname(absoluteFilePath)) - if (Directory_path_exist == False): + if Directory_path_exist == False: Message = "The directory of the file {} does not exist".format(fileName) raise NameError(Message) else: # Test if the file path exist File_path_exist = os.path.exists(absoluteFilePath) - if (File_path_exist == False): + if File_path_exist == False: # Test the path using the Jass data directory - absoluteFilePath = os.path.normpath(os.path.join(config["DATA_DIR"], fileName)) + absoluteFilePath = os.path.normpath( + os.path.join(config["DATA_DIR"], fileName) + ) File_path_exist = os.path.exists(absoluteFilePath) - if (File_path_exist == False): + if File_path_exist == False: Message = "The file {} does not exist".format(fileName) raise NameError(Message) # Test if it is realy a file Is_a_file = os.path.isfile(absoluteFilePath) - if (not Is_a_file) : + if not Is_a_file: Message = "{} is not a file".format(fileName) raise NameError(Message) @@ -65,8 +72,7 @@ def absolute_path_of_the_file(fileName, output_file = False): def serve(args): - app = get_jass_app() - app.run(host=config["HOST"], port=config["PORT"]) + jass_app.flask_app.run(host=config["HOST"], port=config["PORT"]) def w_list_phenotypes(args): @@ -77,12 +83,12 @@ def w_list_phenotypes(args): def compute_worktable(args): csv_file_path = args.csv_file_path - if (csv_file_path is not None): + if csv_file_path is not None: csv_file_path = absolute_path_of_the_file(csv_file_path, True) init_table_path = absolute_path_of_the_file(args.init_table_path) worktable_path = absolute_path_of_the_file(args.worktable_path, True) selected_phenotypes = args.phenotypes - remove_nan = (args.remove_nans) + remove_nan = args.remove_nans significance_treshold = float(args.significance_treshold) post_filtering = bool(args.post_filtering) custom_loadings = args.custom_loadings @@ -91,9 +97,9 @@ def compute_worktable(args): pos_End = args.end_position if args.omnibus: - strategy = 'jass.models.stats:omnibus_stat' + strategy = "jass.models.stats:omnibus_stat" elif args.sumz: - strategy = 'jass.models.stats:sumz_stat' + strategy = "jass.models.stats:sumz_stat" elif args.fisher_test: strategy = "jass.models.stats:fisher_test" elif args.meta_analysis: @@ -102,21 +108,21 @@ def compute_worktable(args): strategy = args.strategy create_worktable_file( - phenotype_ids = selected_phenotypes, - init_file_path = init_table_path, - project_hdf_path = worktable_path, - remove_nan = remove_nan, - stat = strategy, - optim_na = True, - csv_file = csv_file_path, - chunk_size = int(args.chunk_size), - significance_treshold = significance_treshold, - post_filtering = post_filtering, - delayed_gen_csv_file = False, - chromosome = chromosome, - pos_Start = pos_Start, - pos_End = pos_End, - custom_loadings = custom_loadings + phenotype_ids=selected_phenotypes, + init_file_path=init_table_path, + project_hdf_path=worktable_path, + remove_nan=remove_nan, + stat=strategy, + optim_na=True, + csv_file=csv_file_path, + chunk_size=int(args.chunk_size), + significance_treshold=significance_treshold, + post_filtering=post_filtering, + delayed_gen_csv_file=False, + chromosome=chromosome, + pos_Start=pos_Start, + pos_End=pos_End, + custom_loadings=custom_loadings, ) @@ -126,28 +132,25 @@ def w_create_worktable(args): def w_create_project_data(args): compute_worktable(args) - worktable_path = absolute_path_of_the_file(args.worktable_path, True) - manhattan_plot_path = args.manhattan_plot_path - if (manhattan_plot_path is not None): + if manhattan_plot_path is not None: manhattan_plot_path = absolute_path_of_the_file(manhattan_plot_path, True) create_global_plot(worktable_path, manhattan_plot_path) - quadrant_plot_path = args.quadrant_plot_path - if (quadrant_plot_path is not None): - quadrant_plot_path = absolute_path_of_the_file(quadrant_plot_path, True) - create_quadrant_plot(worktable_path, - quadrant_plot_path, - significance_treshold = float(args.significance_treshold)) - + if quadrant_plot_path is not None: + quadrant_plot_path = absolute_path_of_the_file(quadrant_plot_path, True) + create_quadrant_plot( + worktable_path, + quadrant_plot_path, + significance_treshold=float(args.significance_treshold), + ) zoom_plot_path = args.zoom_plot_path - if (zoom_plot_path is not None): + if zoom_plot_path is not None: zoom_plot_path = absolute_path_of_the_file(zoom_plot_path, True) create_local_plot(worktable_path, zoom_plot_path) - qq_plot_path = args.qq_plot_path - if (qq_plot_path is not None): + if qq_plot_path is not None: qq_plot_path = absolute_path_of_the_file(qq_plot_path, True) create_qq_plot(worktable_path, qq_plot_path) @@ -155,7 +158,9 @@ def w_create_project_data(args): def w_create_inittable(args): input_data_path = absolute_path_of_the_file(args.input_data_path) init_covariance_path = absolute_path_of_the_file(args.init_covariance_path) - init_genetic_covariance_path = absolute_path_of_the_file(args.init_genetic_covariance_path) + init_genetic_covariance_path = absolute_path_of_the_file( + args.init_genetic_covariance_path + ) regions_map_path = absolute_path_of_the_file(args.regions_map_path) description_file_path = absolute_path_of_the_file(args.description_file_path) init_table_path = absolute_path_of_the_file(args.init_table_path, True) @@ -166,7 +171,7 @@ def w_create_inittable(args): description_file_path, init_table_path, init_covariance_path, - init_genetic_covariance_path + init_genetic_covariance_path, ) @@ -180,9 +185,9 @@ def w_plot_quadrant(args): worktable_path = absolute_path_of_the_file(args.worktable_path) plot_path = absolute_path_of_the_file(args.plot_path) significance_treshold = float(args.significance_treshold) - create_quadrant_plot(worktable_path, - plot_path, - significance_treshold=significance_treshold) + create_quadrant_plot( + worktable_path, plot_path, significance_treshold=significance_treshold + ) def w_gene_annotation(args): @@ -190,10 +195,9 @@ def w_gene_annotation(args): initTable_path = absolute_path_of_the_file(args.init_table_path, True) df_gene_csv_path = absolute_path_of_the_file(args.gene_csv_path, True) df_exon_csv_path = absolute_path_of_the_file(args.exon_csv_path, True) - add_gene_annotation(gene_data_path, - initTable_path, - df_gene_csv_path, - df_exon_csv_path) + add_gene_annotation( + gene_data_path, initTable_path, df_gene_csv_path, df_exon_csv_path + ) def get_parser(): @@ -256,7 +260,7 @@ def get_parser(): ) parser_create_pd.add_argument( "--significance-treshold", - default=5*10**-8, + default=5 * 10 ** -8, help="The treshold at which a p-value is considered significant", ) parser_create_pd.add_argument( @@ -279,27 +283,25 @@ def get_parser(): ) parser_create_pd.add_argument( - "--csv-file-path", - required=False, - help="path to the results file in csv format" + "--csv-file-path", required=False, help="path to the results file in csv format" ) parser_create_pd.add_argument( "--chromosome-number", required=False, - help="option used only for local analysis: chromosome number studied" + help="option used only for local analysis: chromosome number studied", ) parser_create_pd.add_argument( "--start-position", required=False, - help="option used only for local analysis: start position of the region studied" + help="option used only for local analysis: start position of the region studied", ) parser_create_pd.add_argument( "--end-position", required=False, - help="option used only for local analysis: end position of the region studied" + help="option used only for local analysis: end position of the region studied", ) strategies = parser_create_pd.add_mutually_exclusive_group() @@ -344,7 +346,7 @@ def get_parser(): parser_create_it.add_argument( "--init-genetic-covariance-path", default=None, - help = "path to the genetic covariance file to import", + help="path to the genetic covariance file to import", ) parser_create_it.set_defaults(func=w_create_inittable) # ------- create-worktable ------- @@ -366,13 +368,13 @@ def get_parser(): ) parser_create_wt.add_argument( "--significance-treshold", - default=5*10**-8, - help="threshold at which a p-value is considered significant" - ) + default=5 * 10 ** -8, + help="threshold at which a p-value is considered significant", + ) parser_create_wt.add_argument( "--post-filtering", default=True, - help="If a filtering to remove outlier will be applied (in this case the result of SNPs considered aberant will not appear in the worktable)" + help="If a filtering to remove outlier will be applied (in this case the result of SNPs considered aberant will not appear in the worktable)", ) parser_create_wt.add_argument( @@ -382,9 +384,7 @@ def get_parser(): ) parser_create_wt.add_argument( - "--csv-file-path", - required=False, - help="path to the results file in csv format" + "--csv-file-path", required=False, help="path to the results file in csv format" ) parser_create_wt.add_argument( @@ -399,19 +399,19 @@ def get_parser(): parser_create_wt.add_argument( "--chromosome-number", required=False, - help="option used only for local analysis: chromosome number studied" + help="option used only for local analysis: chromosome number studied", ) parser_create_wt.add_argument( "--start-position", required=False, - help="option used only for local analysis: start position of the region studied" + help="option used only for local analysis: start position of the region studied", ) parser_create_wt.add_argument( "--end-position", required=False, - help="option used only for local analysis: end position of the region studied" + help="option used only for local analysis: end position of the region studied", ) strategies = parser_create_wt.add_mutually_exclusive_group() @@ -435,9 +435,7 @@ def get_parser(): help="path to the worktable file containing the data", ) parser_create_mp.add_argument( - "--plot-path", - required=True, - help="path to the manhattan plot file to generate" + "--plot-path", required=True, help="path to the manhattan plot file to generate" ) parser_create_mp.set_defaults(func=w_plot_manhattan) @@ -452,21 +450,20 @@ def get_parser(): help="path to the worktable file containing the data", ) parser_create_mp.add_argument( - "--plot-path", - required=True, - help="path to the quadrant plot file to generate" + "--plot-path", required=True, help="path to the quadrant plot file to generate" ) parser_create_mp.add_argument( "--significance-treshold", - default=5*10**-8, - help="threshold at which a p-value is considered significant" + default=5 * 10 ** -8, + help="threshold at which a p-value is considered significant", ) parser_create_mp.set_defaults(func=w_plot_quadrant) # ------- add-gene-annotation ------- parser_create_mp = subparsers.add_parser( - "add-gene-annotation", help="add information about genes ansd exons to the inittable" + "add-gene-annotation", + help="add information about genes ansd exons to the inittable", ) parser_create_mp.add_argument( "--gene-data-path", @@ -476,17 +473,13 @@ def get_parser(): parser_create_mp.add_argument( "--init-table-path", required=True, - help="path to the initial table file to update" + help="path to the initial table file to update", ) parser_create_mp.add_argument( - "--gene-csv-path", - required=False, - help="path to the file df_gene.csv" + "--gene-csv-path", required=False, help="path to the file df_gene.csv" ) parser_create_mp.add_argument( - "--exon-csv-path", - required=False, - help="path to the file df_exon.csv" + "--exon-csv-path", required=False, help="path to the file df_exon.csv" ) parser_create_mp.set_defaults(func=w_gene_annotation) @@ -494,16 +487,16 @@ def get_parser(): def main(): - print("", file = sys.stderr) - print(" ** ******* ******* *******", file = sys.stderr) - print(" ** ** ** ** **", file = sys.stderr) - print(" ** ** ** ** **", file = sys.stderr) - print(" ** ** ** ****** ******", file = sys.stderr) - print(" ** *********** ** **", file = sys.stderr) - print(" ** ** ** ** ** **", file = sys.stderr) - print(" ******* ** ** ******* *******", file = sys.stderr) - print("", file = sys.stderr) - print("", file = sys.stderr) + print("", file=sys.stderr) + print(" ** ******* ******* *******", file=sys.stderr) + print(" ** ** ** ** **", file=sys.stderr) + print(" ** ** ** ** **", file=sys.stderr) + print(" ** ** ** ****** ******", file=sys.stderr) + print(" ** *********** ** **", file=sys.stderr) + print(" ** ** ** ** ** **", file=sys.stderr) + print(" ******* ** ** ******* *******", file=sys.stderr) + print("", file=sys.stderr) + print("", file=sys.stderr) parser = get_parser() args = parser.parse_args() args.func(args) diff --git a/jass/celeryconfig.py b/jass/celeryconfig.py index 2350b9a27b31c525734b5c7fddeadc038fc7ff6e..1530851f931f986320b72add4f8fa19d316d3ea5 100644 --- a/jass/celeryconfig.py +++ b/jass/celeryconfig.py @@ -1,17 +1,15 @@ import os ## Broker settings. -broker_url = os.getenv('JASS_RABBITMQ_URL','amqp://guest:guest@localhost:5672//') +broker_url = os.getenv("JASS_RABBITMQ_URL", "amqp://guest:guest@localhost:5672//") ## Broker settings. -#result_backend = os.getenv('JASS_RABBITMQ_URL','amqp://guest2:guest@localhost:5672//') -result_backend='rpc://' +# result_backend = os.getenv('JASS_RABBITMQ_URL','amqp://guest2:guest@localhost:5672//') +result_backend = "rpc://" # List of modules to import when the Celery worker starts. -#imports = ('myapp.tasks',) +# imports = ('myapp.tasks',) ## Using the database to store task state and results. -#result_backend = 'db+sqlite:///results.db' - -#task_annotations = {'tasks.add': {'rate_limit': '10/s'}} - +# result_backend = 'db+sqlite:///results.db' +# task_annotations = {'tasks.add': {'rate_limit': '10/s'}} diff --git a/jass/controllers/__init__.py b/jass/controllers/__init__.py deleted file mode 100644 index 0e94abd262a9ad33a4968618f3ba933bf97c6740..0000000000000000000000000000000000000000 --- a/jass/controllers/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -""" -the list of controller functions called by the JASS REST web services - -Submodules -========== - -.. autosummary:: - :toctree: _autosummary - - default_controller -""" diff --git a/jass/controllers/default_controller.py b/jass/controllers/default_controller.py deleted file mode 100644 index 8f1f7838e39e8f7abc47e63e132b08dd27f89528..0000000000000000000000000000000000000000 --- a/jass/controllers/default_controller.py +++ /dev/null @@ -1,234 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -default_controller ensures the connection between the web interface and the Python JASS-analysis module -""" -import os -from typing import List, Dict - -import connexion -from flask import send_file, abort -from six import iteritems - -from jass.config import config -from jass.models.project import Project -from jass.models.phenotype import Phenotype, get_available_phenotypes -from jass.tasks import create_project - -PHENOTYPES = get_available_phenotypes( - os.path.join(config["DATA_DIR"], "initTable.hdf5") -) # FIXME part of the config - - -def phenotypes_get(): - """ - phenotypes_get - Gets the list of available phenotypes - - :rtype: List[Phenotype] - """ - return PHENOTYPES - - -def projects_post(phenotypeID): - """ - projects_post - Create a new project from a selection of phenotypes - :param phenotypeID: IDs of the phenotypes selected for the project - :type phenotypeID: List[str] - - :rtype: str - """ - return create_project(phenotypeID, PHENOTYPES) - - -def local_project_post(phenotypeID, chromosome, start, end): - """ - local_project_post - Create a new local project from a chromosome number, start and end positions - and a selection of phenotypes - :param param: IDs of the phenotypes selected for the project - :type phenotypeID: List[str] - - :rtype: str - """ - return create_project(phenotypeID, PHENOTYPES, chromosome, start, end) - - -def projects_project_id_csv_status_get(projectID): - """ - projects_project_id_csv_status_get - Retrieve the generation status of the genome full csv file - :param projectID: project ID - :type projectID: str - - :rtype: str - """ - return Project(id=projectID).get_csv_file_generation() - - -def projects_project_id_summary_statistics(projectID): - """ - projects_project_id_summary_statistics - Retrieve project summary statistics - """ - return Project(id=projectID).get_project_summary_statistics() - - -def projects_project_id_genome_get(projectID, threshold=None): - """ - projects_project_id_genome_get - Retrieve genome data for a given project - :param projectID: project ID - :type projectID: str - - :rtype: str - """ - return Project(id=projectID).get_project_genomedata() - - -def projects_project_id_global_manhattan_plot_get(projectID): - """ - projects_project_id_global_manhattan_plot_get - Gets the global Manhattan plot stored in the Project folder to display it on the Web interface - """ - try: - return send_file( - Project(id=projectID).get_global_manhattan_plot_path(), mimetype="image/png" - ) - except FileNotFoundError: - status = Project(id=projectID).status - if status == Project.DOES_NOT_EXIST: - abort(404) - elif status["global_manhattan"] == Project.CREATING: - return ( - "Plot is not ready yet", - 202, - {"Content-Type": "text/plain; charset=utf-8"}, - ) - else: - abort(500) - - -def projects_project_id_zoom_plot_get(projectID): - """ - projects_project_id_zoom_plot_get - Gets the zoom plot stored in the local Project folder to display it on the Web interface - """ - try: - return send_file( - Project(id=projectID).get_zoom_plot_path(), mimetype="image/png" - ) - except FileNotFoundError: - status = Project(id=projectID).status - if status == Project.DOES_NOT_EXIST: - abort(404) - elif status["zoom_plot"] == Project.CREATING: - return ( - "Plot is not ready yet", - 202, - {"Content-Type": "text/plain; charset=utf-8"}, - ) - else: - abort(500) - - -def projects_project_id_quadrant_plot_get(projectID): - """ - projects_project_id_quadrant_plot_get - Gets the quadrant plot stored in the Project folder to display it on the Web interface - """ - try: - return send_file( - Project(id=projectID).get_quadrant_plot_path(), mimetype="image/png" - ) - except FileNotFoundError: - status = Project(id=projectID).status - if status == Project.DOES_NOT_EXIST: - abort(404) - elif status["quadrant_plot_status"] == Project.CREATING: - return ( - "Plot is not ready yet", - 202, - {"Content-Type": "text/plain; charset=utf-8"}, - ) - else: - abort(500) - - -def projects_project_id_genome_full_get(projectID): - """ - projects_project_id_genome_full_get - Downloads the file genome_full.csv stored in the Project folder - """ - - Type_of_Analysis = Project(id=projectID).get_type_of_analysis() - if(Type_of_Analysis == Project.LOCAL_ANALYSIS): - Fichier = "local_analysis_result.csv" - else: - Fichier = "genome_full.csv" - - try: - return send_file( - Project(id=projectID).get_csv_path(), - mimetype = "text/csv", - as_attachment = True, - attachment_filename = Fichier - ) - except FileNotFoundError: - status = Project(id=projectID).status - if status == Project.DOES_NOT_EXIST: - abort(404) - elif status["worktable"] == Project.CREATING: - return ( - "CSV is not ready yet", - 202, - {"Content-Type": "text/plain; charset=utf-8"}, - ) - else: - abort(500) - - -def projects_project_id_local_manhattan_data_get(projectID, chromosome, region): - """ - projects_project_id_local_manhattan_data_get - Return the SumStatTab dataframe of the Project for a given chromosome and region for the Manhattan plot - """ - return Project(id=projectID).get_project_local_manhattan_data(chromosome, region) - - -def projects_project_id_local_heatmap_data_get(projectID, chromosome, region): - """ - projects_project_id_local_heatmap_data_get - Return the SumStatTab dataframe of the Project for a given chromosome and region for the Heatmap plot - """ - return Project(id=projectID).get_project_local_heatmap_data(chromosome, region) - - -def projects_project_id_zoom_manhattan_data_get(projectID): - """ - projects_project_id_local_manhattan_data_get - Return the SumStatTab dataframe of the Project for a given chromosome and region for the Manhattan plot - """ - print("... projects_project_id_zoom_manhattan_data_get({})".format(projectID)) - return Project(id=projectID).get_project_local_manhattan_data() - - -def projects_project_id_zoom_heatmap_data_get(projectID): - """ - projects_project_id_local_heatmap_data_get - Return the SumStatTab dataframe of the Project for a given chromosome and region for the Heatmap plot - """ - return Project(id=projectID).get_project_local_heatmap_data() - - -def projects_project_idget(projectID): - """ - projects_project_idget - Retrieve a project definition - :param projectID: project ID - :type projectID: str - - :rtype: Phenotype - """ - return Project(id=projectID) diff --git a/jass/encoder.py b/jass/encoder.py deleted file mode 100644 index 175502a4df6e905113b701e38214b296e82af577..0000000000000000000000000000000000000000 --- a/jass/encoder.py +++ /dev/null @@ -1,25 +0,0 @@ -from connexion.apps.flask_app import FlaskJSONEncoder - -# from connexion.decorators import produces -from six import iteritems -from jass.models.base_model_ import Model -from pandas import isnull - - -class JSONEncoder(FlaskJSONEncoder): - include_nulls = False - - def default(self, o): - if isinstance(o, Model): - dikt = {} - for attr, _ in iteritems(o.swagger_types): - value = getattr(o, attr) - if not (isinstance(value, list)) and (value is None or isnull(value)): - if not self.include_nulls: - continue - else: - return None - attr = o.attribute_map[attr] - dikt[attr] = value - return dikt - return produces.JSONEncoder.default(self, o) diff --git a/jass/models/inittable.py b/jass/models/inittable.py index 15fcd0fb051bf73ecb2578efc1122ff45cafc8f2..7a2849dd59bec963d53866aab2f3754a72b40cf3 100644 --- a/jass/models/inittable.py +++ b/jass/models/inittable.py @@ -18,6 +18,7 @@ from functools import reduce options.mode.chained_assignment = None warnings.filterwarnings("ignore", category=tables.NaturalNameWarning) + def get_gwasname(file_name): return "_".join(os.path.basename(file_name).split("_")[0:3]) @@ -33,9 +34,9 @@ def check_if_SNP_unique(z_gwas_chrom): def get_gwas_dict(input_data_path): gwas_dict = {} - #retrieve all files corresponding to glob patterns + # retrieve all files corresponding to glob patterns files_by_pattern = list(map(glob.glob, input_data_path.split(";"))) - all_path = reduce(lambda x,y:x+y, files_by_pattern) + all_path = reduce(lambda x, y: x + y, files_by_pattern) print(all_path) for x in all_path: gwas_name = get_gwasname(x) @@ -60,17 +61,27 @@ def create_pheno_summary(description): "internalDataLink", "Nsample", "Ncase", - "Ncontrol" + "Ncontrol", ] ] - pheno_summary_data["ID"] = "z_" + pheno_summary_data.Consortium.str.upper() + "_" + pheno_summary_data.Outcome.str.upper() + pheno_summary_data["ID"] = ( + "z_" + + pheno_summary_data.Consortium.str.upper() + + "_" + + pheno_summary_data.Outcome.str.upper() + ) - is_quantitatif = pheno_summary_data['Ncase'].isnull() + is_quantitatif = pheno_summary_data["Ncase"].isnull() pheno_summary_data["Effective_sample_size"] = np.nan - pheno_summary_data.loc[~is_quantitatif,"Effective_sample_size"] = (pheno_summary_data.loc[~is_quantitatif, "Ncase"]*pheno_summary_data.loc[~is_quantitatif, "Ncontrol"]) / pheno_summary_data.loc[~is_quantitatif, "Nsample"] + pheno_summary_data.loc[~is_quantitatif, "Effective_sample_size"] = ( + pheno_summary_data.loc[~is_quantitatif, "Ncase"] + * pheno_summary_data.loc[~is_quantitatif, "Ncontrol"] + ) / pheno_summary_data.loc[~is_quantitatif, "Nsample"] - pheno_summary_data.loc[is_quantitatif,"Effective_sample_size"] = pheno_summary_data.loc[is_quantitatif, "Nsample"] + pheno_summary_data.loc[ + is_quantitatif, "Effective_sample_size" + ] = pheno_summary_data.loc[is_quantitatif, "Nsample"] # reorder columns in the dataframe pheno_summary_data = pheno_summary_data[ @@ -87,7 +98,7 @@ def create_pheno_summary(description): "Nsample", "Ncase", "Ncontrol", - "Effective_sample_size" + "Effective_sample_size", ] ] pheno_summary_data.index = pheno_summary_data["ID"] @@ -121,21 +132,23 @@ def format_chr_gwas(gwas_file_chri, chrom, study_name, regions_bychr): z_gwas["Region"] = 0 z_gwas["MiddlePosition"] = 0.0 - for region_index, region_row in regions_bychr.get_group( - "chr%d" % chrom - ).iterrows(): + for region_index, region_row in regions_bychr.get_group("chr%d" % chrom).iterrows(): left = region_row["start"] right = region_row["stop"] ind = (z_gwas["position"] >= left) & (z_gwas["position"] <= right) (z_gwas.loc[ind, "Region"]) = np.int(region_index + 1) (z_gwas.loc[ind, "MiddlePosition"]) = (left + right) / 2 - return(z_gwas) + return z_gwas def compute_covariance_zscore(init_file_path): print("## Compute covariance ##") - sum_stat_jost_tab = read_hdf(init_file_path, 'SumStatTab', where='Region >= {0} and Region < {1}'.format(0, 3)) - trait = [i for i in sum_stat_jost_tab.columns if i[:2]=="z_"] + sum_stat_jost_tab = read_hdf( + init_file_path, + "SumStatTab", + where="Region >= {0} and Region < {1}".format(0, 3), + ) + trait = [i for i in sum_stat_jost_tab.columns if i[:2] == "z_"] NSNP_matrix = DataFrame(index=trait, columns=trait) cov_matrix = DataFrame(index=trait, columns=trait) @@ -143,31 +156,35 @@ def compute_covariance_zscore(init_file_path): cov_matrix.fillna(0, inplace=True) NSNP_matrix.fillna(0, inplace=True) - bi = range(0,1751,50) - n_len = len(bi)-1 + bi = range(0, 1751, 50) + n_len = len(bi) - 1 for i in range(n_len): binf = bi[i] - bsup = bi[(i+1)] - sum_stat_jost_tab = read_hdf(init_file_path, 'SumStatTab', where='Region >= {0} and Region < {1}'.format(binf, bsup)) + bsup = bi[(i + 1)] + sum_stat_jost_tab = read_hdf( + init_file_path, + "SumStatTab", + where="Region >= {0} and Region < {1}".format(binf, bsup), + ) print("Regions {0} to {1}\r".format(binf, bsup)) j = 0 for tr1 in trait: for tr2 in trait[j:]: - cc = sum_stat_jost_tab[[tr1,tr2]].dropna() + cc = sum_stat_jost_tab[[tr1, tr2]].dropna() cc = cc.loc[cc.max(1) < 4] - cov_matrix.loc[tr1,tr2] += cc.iloc[:,0].dot(cc.iloc[:,1]) - NSNP_matrix.loc[tr1,tr2] += cc.shape[0] + cov_matrix.loc[tr1, tr2] += cc.iloc[:, 0].dot(cc.iloc[:, 1]) + NSNP_matrix.loc[tr1, tr2] += cc.shape[0] - cov_matrix.loc[tr2,tr1] += cc.iloc[:,0].dot(cc.iloc[:,1]) - NSNP_matrix.loc[tr2,tr1] += cc.shape[0] - j=j+1 + cov_matrix.loc[tr2, tr1] += cc.iloc[:, 0].dot(cc.iloc[:, 1]) + NSNP_matrix.loc[tr2, tr1] += cc.shape[0] + j = j + 1 - #(cov_matrix/NSNP_matrix).to_csv("Covariance_on_Zscores.csv", sep="\t") + # (cov_matrix/NSNP_matrix).to_csv("Covariance_on_Zscores.csv", sep="\t") hdf_init = HDFStore(init_file_path) - hdf_init.put("COV", (cov_matrix/NSNP_matrix), format="table", data_columns=True) + hdf_init.put("COV", (cov_matrix / NSNP_matrix), format="table", data_columns=True) hdf_init.close() @@ -177,18 +194,20 @@ def create_inittable_file( description_file_path: str, init_table_path: str, init_covariance_path=None, - init_genetic_covariance_path=None + init_genetic_covariance_path=None, ): # Read region file - regions = read_csv(regions_map_path, sep='\s+', memory_map=True) + regions = read_csv(regions_map_path, sep="\s+", memory_map=True) # Create HDFStore if os.path.exists(init_table_path): os.remove(init_table_path) hdf_init = HDFStore(init_table_path) # Read covariance file - if init_covariance_path!=None: - covariance = read_csv(init_covariance_path, sep="\t", index_col=0, memory_map=True) + if init_covariance_path != None: + covariance = read_csv( + init_covariance_path, sep="\t", index_col=0, memory_map=True + ) compute_covariance = False else: compute_covariance = True @@ -206,18 +225,20 @@ def create_inittable_file( "z_" + meta_row["Consortium"].upper() + "_" + meta_row["Outcome"].upper() ) - pheno_select = list(phenotypes_list) & gwas_dict.keys() #&covariance.columns + pheno_select = list(phenotypes_list) & gwas_dict.keys() # &covariance.columns pheno_summary_data = create_pheno_summary(description) # select only phenotypes for which there is a covariance pheno_list = pheno_summary_data.loc[pheno_select, :] - if compute_covariance==False: + if compute_covariance == False: COV = covariance.loc[pheno_select, pheno_select] hdf_init.put("COV", COV, format="table", data_columns=True) # Read Genetic Covariance file and add genetic correlation if available - if init_genetic_covariance_path!=None: - genetic_covariance = read_csv(init_genetic_covariance_path, sep="\t", index_col=0, memory_map=True) + if init_genetic_covariance_path != None: + genetic_covariance = read_csv( + init_genetic_covariance_path, sep="\t", index_col=0, memory_map=True + ) GEN_COV = genetic_covariance.loc[pheno_select, pheno_select] hdf_init.put("GEN_COV", GEN_COV, format="table", data_columns=True) @@ -234,7 +255,7 @@ def create_inittable_file( which_cols.extend(list(pheno_select)) hdf_init.put("PhenoList", pheno_list, format="table", data_columns=True) hdf_init.put("Regions", regions, format="table", data_columns=True) - sum_stat_tab_min_itemsizes = {"snp_ids": 80, "Ref_allele":70,"Alt_allele":70} + sum_stat_tab_min_itemsizes = {"snp_ids": 80, "Ref_allele": 70, "Alt_allele": 70} regions_bychr = regions.groupby("chr") @@ -326,16 +347,15 @@ def create_inittable_file( compute_covariance_zscore(init_table_path) -def add_gene_annotation(gene_data_path, - initTable_path=None, - df_gene_csv_path=None, - df_exon_csv_path=None): +def add_gene_annotation( + gene_data_path, initTable_path=None, df_gene_csv_path=None, df_exon_csv_path=None +): """ add_gene_annotation - for the first 22 chromosomes, retrieves the label of the genes + for the first 22 chromosomes, retrieves the label of the genes and their position as well as those of the exons associated with the genes. Then store this information in a hdf5 file - + :param gene_data_path: path to the GFF file containing gene and exon data (for example, GRCh37_latest_genomic.gff) :type gene_data_path: str :param initTable_path: path to the file initTable.hdf5 @@ -344,7 +364,7 @@ def add_gene_annotation(gene_data_path, :type df_gene_csv_path: str :param df_exon_csv_path: path to the file df_exon.csv :type df_exon_csv_path: str - + :return: the dataframes df_gene and df_exon :rtype: 2 PANDAS dataframes """ @@ -355,7 +375,7 @@ def add_gene_annotation(gene_data_path, gene_start = [] gene_end = [] gene_direction = [] - + # lists containing exon data exon_id_label = [] exon_GeneID = [] @@ -363,7 +383,7 @@ def add_gene_annotation(gene_data_path, exon_start = [] exon_end = [] exon_direction = [] - + # temporary list containing the data of all exons TMP__exon_id_label = [] TMP__exon_GeneID = [] @@ -371,18 +391,18 @@ def add_gene_annotation(gene_data_path, TMP__exon_start = [] TMP__exon_end = [] TMP__exon_direction = [] - + fichier = open(gene_data_path, "r") lignes = fichier.readlines() fichier.close() - + for ligne in lignes: elements = ligne.split("\t") - if elements[0].startswith('NC_'): + if elements[0].startswith("NC_"): decode_chr = elements[0].strip("NC_").split(".") chr = int(decode_chr[0]) - if (chr <= 22): - if (elements[2] == "gene"): + if chr <= 22: + if elements[2] == "gene": gene_chr.append(chr) gene_start.append(int(elements[3])) gene_end.append(int(elements[4])) @@ -394,8 +414,8 @@ def add_gene_annotation(gene_data_path, decode_id_4 = decode_id_3[0].split("=") decode_id_5 = decode_id_4[1].split(":") gene_GeneID.append(decode_id_5[1]) - - elif (elements[2] == "exon"): + + elif elements[2] == "exon": TMP__exon_chr.append(chr) TMP__exon_start.append(int(elements[3])) TMP__exon_end.append(int(elements[4])) @@ -407,49 +427,56 @@ def add_gene_annotation(gene_data_path, decode_id_4 = decode_id_3[0].split("=") decode_id_5 = decode_id_4[1].split(":") TMP__exon_GeneID.append(decode_id_5[1]) - - - # We only keep the exons that correspond to a gene + + # We only keep the exons that correspond to a gene for i in range(len(TMP__exon_id_label)): - if (TMP__exon_GeneID[i] in gene_GeneID) : + if TMP__exon_GeneID[i] in gene_GeneID: exon_id_label.append(TMP__exon_id_label[i]) exon_GeneID.append(TMP__exon_GeneID[i]) exon_chr.append(TMP__exon_chr[i]) exon_start.append(TMP__exon_start[i]) exon_end.append(TMP__exon_end[i]) exon_direction.append(TMP__exon_direction[i]) - + # We insert genes and exons into dataframes - df_gene = DataFrame({"Chr": gene_chr, - "GeneID" : gene_GeneID, - "gene_label" : gene_id_label, - "start" : gene_start, - "end" : gene_end, - "direction" : gene_direction}) - df_exon = DataFrame({"Chr": exon_chr, - "exon_label" : exon_id_label, - "GeneID" : exon_GeneID, - "start" : exon_start, - "end" : exon_end, - "direction" : exon_direction}) - + df_gene = DataFrame( + { + "Chr": gene_chr, + "GeneID": gene_GeneID, + "gene_label": gene_id_label, + "start": gene_start, + "end": gene_end, + "direction": gene_direction, + } + ) + df_exon = DataFrame( + { + "Chr": exon_chr, + "exon_label": exon_id_label, + "GeneID": exon_GeneID, + "start": exon_start, + "end": exon_end, + "direction": exon_direction, + } + ) + # The rows of the dataframes are sorted by chromosome number and start position (and end position for exon) df_gene.sort_values(by=["Chr", "start"], inplace=True, ignore_index=True) df_exon.sort_values(by=["Chr", "start", "end"], inplace=True, ignore_index=True) - + # This information about genes and exons is stored in an hdf5 file if possible - if (initTable_path is not None): + if initTable_path is not None: hdf_file = HDFStore(initTable_path) hdf_file.put("Gene", df_gene, format="table", data_columns=True) hdf_file.put("Exon", df_exon, format="table", data_columns=True) hdf_file.close() - + # Dataframe df_gene is stored in a csv file if possible - if (df_gene_csv_path is not None): + if df_gene_csv_path is not None: df_gene.to_csv(df_gene_csv_path) - + # Dataframe df_exon is stored in a csv file if possible - if (df_exon_csv_path is not None): + if df_exon_csv_path is not None: df_exon.to_csv(df_exon_csv_path) - - return (df_gene, df_exon) \ No newline at end of file + + return (df_gene, df_exon) diff --git a/jass/models/project.py b/jass/models/project.py index d2ddd6f8b8dbe44246061fe17df2aa08ce517d79..15a1eb540a2bc5aa45951bbd7d89848b8927c650 100644 --- a/jass/models/project.py +++ b/jass/models/project.py @@ -1,279 +1,289 @@ -# -*- coding: utf-8 -*- -""" -compute joint statistics and generate plots for a given set of phenotypes -""" -from __future__ import absolute_import -from typing import List, Dict -import os, sys -import shutil -import hashlib -import traceback - -from jass.models.base_model_ import Model -from jass.util import deserialize_model -from jass.models.phenotype import Phenotype -from jass.models.worktable import (get_worktable_summary, - get_worktable_genomedata, - get_worktable_local_manhattan_data, - get_worktable_local_heatmap_data) - -from jass.config import config - -class Project(Model): - - DOES_NOT_EXIST = "DOES_NOT_EXIST" - - CREATING = "CREATING" - - READY = "READY" - - ERROR = "ERROR" - - # Type of analysis - LOCAL_ANALYSIS = "LOCAL_ANALYSIS" - GENOME_WIDE_ANALYSIS = "GENOME_WIDE_ANALYSIS" - - # Upper bound of the chromosome length (bp) - K_POS_MAX = 250000000 - - def __init__(self, id: str = None, - phenotypes: List[Phenotype] = None, - chromosome: str = None, - start: str = None, - end: str = None): - """ - Project - a project (list of phenotypes) - - :param id: project ID. - :type id: str - """ - self.swagger_types = {"id": str, - "status": str, - "phenotypes": List[Phenotype], - "progress": str} - - self.attribute_map = { - "id": "id", - "status": "status", - "phenotypes": "phenotypes", - "progress": "progress", - } - - self._phenotypes = phenotypes - - self._id = id - - if self._id is None: - self._id = self.get_id(chromosome, start, end) - - @classmethod - def from_dict(cls, dikt) -> "Project": - """ - Returns the dict as a model - - :param dikt: A dict. - :type: dict - :return: The Project. - :rtype: Project - """ - return deserialize_model(dikt, cls) - - @property - def id(self) -> str: - """ - Gets the id of this Project. - - :return: The id of this Project. - :rtype: str - """ - return self._id - - @id.setter - def id(self, id: str): - """ - Lists the id of this Project. - - :param id: The id of this Project. - :type id: str - """ - - self._id = id - - @property - def phenotypes(self) -> List[Phenotype]: - """ - Gets the phenotypes list for this project. - - :return: The phenotypes. - :rtype: str - """ - return self._phenotypes - - @phenotypes.setter - def cohort(self, phenotypes: List[Phenotype]): - """ - Lists the phenotypes list for this project. - - :param phenotypes: The phenotypes. - :type phenotypes: str - """ - - self._phenotypes = phenotypes - - def get_type_of_analysis(self): - """ - get_type_of_analysis - Gets the type of analysis : local or genome wide - """ - if((self._id).split("_")[0] == "local"): - return Project.LOCAL_ANALYSIS - else : - return Project.GENOME_WIDE_ANALYSIS - - def get_folder_path(self): - """ - get_folder_path - Gets the path of the folder where the project data are stored - """ - return os.path.join(config["DATA_DIR"], "project_{}".format(self.id)) - - def get_worktable_path(self): - """ - get_worktable_path - Gets the path of the file workTable.hdf5 - """ - return os.path.join(self.get_folder_path(), "workTable.hdf5") - - def get_csv_path(self): - """ - get_csv_path - Gets the path of the file genome_full.csv - """ - return os.path.join(self.get_folder_path(), "workTable.csv") - - def get_progress_path(self): - """ - get_progress_path - Gets the path of the file containing the current progress percentage of \ - the analysis performed within the project - """ - return os.path.join(self.get_folder_path(), "JASS_progress.txt") - - def get_csv_lock_path(self): - """ - get_csv_lock_path - Gets the path of the lock set-on when the csv file is not available yet - """ - return os.path.join(self.get_folder_path(), "the_lock.txt") - - def get_project_summary_statistics(self): - return get_worktable_summary(self.get_worktable_path()) - - def get_project_genomedata(self): - return get_worktable_genomedata(self.get_worktable_path()) - - def get_project_local_manhattan_data(self, chromosome: str = None, region: str = None): - print("project::get_project_local_manhattan_data") - return get_worktable_local_manhattan_data( - self.get_worktable_path(), chromosome, region - ) - - def get_project_local_heatmap_data(self, chromosome: str = None, region: str = None): - return get_worktable_local_heatmap_data( - self.get_worktable_path(), chromosome, region - ) - - def get_id(self, chromosome=None, start=None, end=None): - m = hashlib.md5() - for phenotype_id in [phenotype.id for phenotype in self._phenotypes]: - m.update(str(phenotype_id).encode("utf-8")) - - if (chromosome is not None): - # Local analysis - if start is None: - start = 0 - if end is None: - end = Project.K_POS_MAX - Loc_An = "_{}_{}_{}".format(chromosome, start, end) - m.update(str(Loc_An).encode("utf-8")) - id_project = "local_{}".format(m.hexdigest()) - else: - id_project = m.hexdigest() - - return id_project - - def get_global_manhattan_plot_path(self): - return os.path.join(self.get_folder_path(), "Manhattan_Plot_Omnibus.png") - - def get_quadrant_plot_path(self): - return os.path.join(self.get_folder_path(), "Quadrant_Plot_Omnibus.png") - - def get_zoom_plot_path(self): - return os.path.join(self.get_folder_path(), "Zoom_Plot_Omnibus.png") - - - @property - def status(self): - """ - status - Gets the status of the project - """ - if not os.path.exists(self.get_folder_path()): - return Project.DOES_NOT_EXIST - else: - if (self.get_type_of_analysis() == Project.LOCAL_ANALYSIS): - Return_status = get_file_status(self.get_zoom_plot_path()) - else: - Return_status = get_file_status(self.get_quadrant_plot_path()) - - return { - # WARNING: project status is hacked so that everything is ready - # only once the final step has completed. - # This avoids the apparent "corrupted hdf5" file situation - "worktable": Return_status, - "global_manhattan": Return_status, - "quadrant_plot_status": Return_status, - "zoom_plot": Return_status - } - - @property - def progress(self): - """ - progress - Gets the percentage of completion of the phenotype analysis - """ - JASS_progress = 0 - progress_path = self.get_progress_path() - if os.path.exists(progress_path): - file_progress = open(progress_path, "r") - JASS_progress = file_progress.read() - file_progress.close() - return JASS_progress - - def get_csv_file_generation(self): - """ - csv_file_generation - Gets the status of the genome_full csv file generation - """ - the_lock_path = self.get_csv_lock_path() - csv_file = self.get_csv_path() - csv_file_status = Project.CREATING - if (not os.path.isfile(the_lock_path)): - if(os.path.isfile(csv_file)): - csv_file_status = Project.READY - else : - csv_file_status = Project.ERROR - print("csv_file_generation:csv_file_status={}".format(csv_file_status)) - return csv_file_status - -def get_file_building_tb_path(file_path): - return file_path + ".log" - - -def get_file_status(file_path): - if os.path.exists(file_path): - return Project.READY - elif os.path.exists(get_file_building_tb_path(file_path)): - return Project.ERROR - else: - return Project.CREATING \ No newline at end of file +# -*- coding: utf-8 -*- +""" +compute joint statistics and generate plots for a given set of phenotypes +""" +from __future__ import absolute_import +from typing import List +import os +import hashlib + +from jass.models.base_model_ import Model +from jass.util import deserialize_model +from jass.models.phenotype import Phenotype +from jass.models.worktable import ( + get_worktable_summary, + get_worktable_genomedata, + get_worktable_local_manhattan_data, + get_worktable_local_heatmap_data, +) + +from jass.config import config + + +class Project(Model): + + DOES_NOT_EXIST = "DOES_NOT_EXIST" + + CREATING = "CREATING" + + READY = "READY" + + ERROR = "ERROR" + + # Type of analysis + LOCAL_ANALYSIS = "LOCAL_ANALYSIS" + GENOME_WIDE_ANALYSIS = "GENOME_WIDE_ANALYSIS" + + # Upper bound of the chromosome length (bp) + K_POS_MAX = 250000000 + + def __init__( + self, + id: str = None, + phenotypes: List[Phenotype] = None, + chromosome: str = None, + start: str = None, + end: str = None, + ): + """ + Project - a project (list of phenotypes) + + :param id: project ID. + :type id: str + """ + self.swagger_types = { + "id": str, + "status": str, + "phenotypes": List[Phenotype], + "progress": str, + } + + self.attribute_map = { + "id": "id", + "status": "status", + "phenotypes": "phenotypes", + "progress": "progress", + } + + self._phenotypes = phenotypes + + self._id = id + + if self._id is None: + self._id = self.get_id(chromosome, start, end) + + @classmethod + def from_dict(cls, dikt) -> "Project": + """ + Returns the dict as a model + + :param dikt: A dict. + :type: dict + :return: The Project. + :rtype: Project + """ + return deserialize_model(dikt, cls) + + @property + def id(self) -> str: + """ + Gets the id of this Project. + + :return: The id of this Project. + :rtype: str + """ + return self._id + + @id.setter + def id(self, id: str): + """ + Lists the id of this Project. + + :param id: The id of this Project. + :type id: str + """ + + self._id = id + + @property + def phenotypes(self) -> List[Phenotype]: + """ + Gets the phenotypes list for this project. + + :return: The phenotypes. + :rtype: str + """ + return self._phenotypes + + @phenotypes.setter + def cohort(self, phenotypes: List[Phenotype]): + """ + Lists the phenotypes list for this project. + + :param phenotypes: The phenotypes. + :type phenotypes: str + """ + + self._phenotypes = phenotypes + + def get_type_of_analysis(self): + """ + get_type_of_analysis + Gets the type of analysis : local or genome wide + """ + if (self._id).split("_")[0] == "local": + return Project.LOCAL_ANALYSIS + else: + return Project.GENOME_WIDE_ANALYSIS + + def get_folder_path(self): + """ + get_folder_path + Gets the path of the folder where the project data are stored + """ + return os.path.join(config["DATA_DIR"], "project_{}".format(self.id)) + + def get_worktable_path(self): + """ + get_worktable_path + Gets the path of the file workTable.hdf5 + """ + return os.path.join(self.get_folder_path(), "workTable.hdf5") + + def get_csv_path(self): + """ + get_csv_path + Gets the path of the file genome_full.csv + """ + return os.path.join(self.get_folder_path(), "workTable.csv") + + def get_progress_path(self): + """ + get_progress_path + Gets the path of the file containing the current progress percentage + of the analysis performed within the project + """ + return os.path.join(self.get_folder_path(), "JASS_progress.txt") + + def get_csv_lock_path(self): + """ + get_csv_lock_path + Gets the path of the lock set-on when the csv file is not available yet + """ + return os.path.join(self.get_folder_path(), "the_lock.txt") + + def get_project_summary_statistics(self): + return get_worktable_summary(self.get_worktable_path()) + + def get_project_genomedata(self): + return get_worktable_genomedata(self.get_worktable_path()) + + def get_project_local_manhattan_data( + self, chromosome: str = None, region: str = None + ): + print("project::get_project_local_manhattan_data") + return get_worktable_local_manhattan_data( + self.get_worktable_path(), chromosome, region + ) + + def get_project_local_heatmap_data( + self, chromosome: str = None, region: str = None + ): + return get_worktable_local_heatmap_data( + self.get_worktable_path(), chromosome, region + ) + + def get_id(self, chromosome=None, start=None, end=None): + m = hashlib.md5() + for phenotype_id in [phenotype.id for phenotype in self._phenotypes]: + m.update(str(phenotype_id).encode("utf-8")) + + if chromosome is not None: + # Local analysis + if start is None: + start = 0 + if end is None: + end = Project.K_POS_MAX + Loc_An = "_{}_{}_{}".format(chromosome, start, end) + m.update(str(Loc_An).encode("utf-8")) + id_project = "local_{}".format(m.hexdigest()) + else: + id_project = m.hexdigest() + + return id_project + + def get_global_manhattan_plot_path(self): + return os.path.join(self.get_folder_path(), "Manhattan_Plot_Omnibus.png") + + def get_quadrant_plot_path(self): + return os.path.join(self.get_folder_path(), "Quadrant_Plot_Omnibus.png") + + def get_zoom_plot_path(self): + return os.path.join(self.get_folder_path(), "Zoom_Plot_Omnibus.png") + + @property + def status(self): + """ + status + Gets the status of the project + """ + if not os.path.exists(self.get_folder_path()): + return Project.DOES_NOT_EXIST + else: + if self.get_type_of_analysis() == Project.LOCAL_ANALYSIS: + Return_status = get_file_status(self.get_zoom_plot_path()) + else: + Return_status = get_file_status(self.get_quadrant_plot_path()) + + return { + # WARNING: project status is hacked so that everything is ready + # only once the final step has completed. + # This avoids the apparent "corrupted hdf5" file situation + "worktable": Return_status, + "global_manhattan": Return_status, + "quadrant_plot_status": Return_status, + "zoom_plot": Return_status, + } + + @property + def progress(self): + """ + progress + Gets the percentage of completion of the phenotype analysis + """ + JASS_progress = 0 + progress_path = self.get_progress_path() + if os.path.exists(progress_path): + file_progress = open(progress_path, "r") + JASS_progress = file_progress.read() + file_progress.close() + return JASS_progress + + def get_csv_file_generation(self): + """ + csv_file_generation + Gets the status of the genome_full csv file generation + """ + the_lock_path = self.get_csv_lock_path() + csv_file = self.get_csv_path() + csv_file_status = Project.CREATING + if not os.path.isfile(the_lock_path): + if os.path.isfile(csv_file): + csv_file_status = Project.READY + else: + csv_file_status = Project.ERROR + print("csv_file_generation:csv_file_status={}".format(csv_file_status)) + return csv_file_status + + +def get_file_building_tb_path(file_path): + return file_path + ".log" + + +def get_file_status(file_path): + if os.path.exists(file_path): + return Project.READY + elif os.path.exists(get_file_building_tb_path(file_path)): + return Project.ERROR + else: + return Project.CREATING diff --git a/jass/models/stats.py b/jass/models/stats.py index ff2649b5b1d8d6e5c0021c9de22f58cba6b3e0e1..8d6b815106164ee809df48ea98f815c8ba9a1b01 100644 --- a/jass/models/stats.py +++ b/jass/models/stats.py @@ -16,21 +16,22 @@ def make_stat_computer_nopattern(cov, stat_func, **kwargs): :rtype: function """ # invcov is only computed once - invcov = np.linalg.pinv(cov, rcond=0.001)#np.linalg.inv(cov) + invcov = np.linalg.pinv(cov, rcond=0.001) # np.linalg.inv(cov) def compute(z): - return stat_func(z, cov, invcov,**kwargs) + return stat_func(z, cov, invcov, **kwargs) + return compute def make_stat_computer_pattern(cov, stat_func): """ Create the function that computes the joint statistics if NaN values are in z - and if the number of selected phenotypes is less than or equal to 16. + and if the number of selected phenotypes is less than or equal to 16. It uses a covariance matrix corresponding to the pattern of non-NaN values in z. - This function is implemented using the currying technique: - the first part which declares the data structure and the stat function + This function is implemented using the currying technique: + the first part which declares the data structure and the stat function is called only once while the second part (compute) is called for each pattern. :param cov: covariance matrix :type cov: pandas.core.frame.DataFrame @@ -41,13 +42,15 @@ def make_stat_computer_pattern(cov, stat_func): """ if not stat_func.can_use_pattern: raise ValueError("this computation strategy cannot be used with patterns") - + # invcov_bypattern is a dictionary of invcovs where the key is the # corresponding pattern of non-NaN values in z invcov_bypattern = {} def compute(z, pattern_code): - z_na_bool = z.iloc[0,].notnull() + z_na_bool = z.iloc[ + 0, + ].notnull() if pattern_code in invcov_bypattern: invcov = invcov_bypattern[pattern_code] else: @@ -67,8 +70,8 @@ def make_stat_computer_pattern_big(cov, stat_func): and if the number of selected phenotypes is greater than or equal to 17. It uses a covariance matrix corresponding to the pattern of non-NaN values in z. - This function is implemented using the currying technique: - the first part which declares the data structure and the stat function + This function is implemented using the currying technique: + the first part which declares the data structure and the stat function is called only once while the second part (compute) is called for each pattern. :param cov: covariance matrix :type cov: numpy.ndarray @@ -87,7 +90,7 @@ def make_stat_computer_pattern_big(cov, stat_func): if pattern_code in invcov_bypattern: invcov = invcov_bypattern[pattern_code] else: - mini_cov = (cov.take(Num,axis=1)).take(Num,axis=0) + mini_cov = (cov.take(Num, axis=1)).take(Num, axis=0) invcov = np.linalg.pinv(mini_cov, rcond=0.001) invcov_bypattern[pattern_code] = invcov @@ -111,6 +114,7 @@ def make_stat_computer_nan_dumb(cov, stat_func): return compute + def omnibus_stat(z, cov, invcov): """ joint statistics "omnibus" strategy @@ -163,7 +167,7 @@ def fisher_test(z, cov, invcov): print(z) print(cov) print(invcov) - p_val = 2*spst.norm.sf(np.abs(z)) + p_val = 2 * spst.norm.sf(np.abs(z)) stat = -2 * np.log(np.nansum(p_val, axis=1)) return spst.chi2.sf(stat, df=p) except ValueError: @@ -171,8 +175,10 @@ def fisher_test(z, cov, invcov): print(invcov.shape) print("Error in Fisher stat") + fisher_test.can_use_pattern = False + def meta_analysis(z, cov, invcov, **kwargs): """ Meta analysis using global sample size to weight z-score @@ -189,16 +195,16 @@ def meta_analysis(z, cov, invcov, **kwargs): :type samp_size : pandas.Series """ - Effective_sample_size = kwargs.get('samp_size', None) + Effective_sample_size = kwargs.get("samp_size", None) if Effective_sample_size is None: - raise Error('no sample size available to perform meta_analysis') + raise Error("no sample size available to perform meta_analysis") else: - loading = Effective_sample_size.loc[z.columns]**0.5 + loading = Effective_sample_size.loc[z.columns] ** 0.5 - M_loadings = np.full(z.shape, loading**2) + M_loadings = np.full(z.shape, loading ** 2) M_loadings[np.isnan(z)] = 0 - z = np.nan_to_num(z) # fill na with zero + z = np.nan_to_num(z) # fill na with zero numi = loading.dot(z.transpose()) deno = np.sqrt(np.sum(M_loadings, axis=1)) @@ -208,8 +214,10 @@ def meta_analysis(z, cov, invcov, **kwargs): return spst.chi2.sf(stat, df=1) + meta_analysis.can_use_pattern = False + def sumz_stat(z, cov, invcov, **kwargs): """ joint statistics "sumZ" strategy @@ -223,7 +231,7 @@ def sumz_stat(z, cov, invcov, **kwargs): :return: the joint statistics :rtype: numpy.ndarray float64 """ - loading = kwargs.get('loadings', None) + loading = kwargs.get("loadings", None) if loading is None: p = z.shape[1] @@ -238,11 +246,11 @@ def sumz_stat(z, cov, invcov, **kwargs): z = np.nan_to_num(z) numi = np.square(loading.dot(z.transpose())) - deno = np.einsum('ij,jk,ki->i', M_loadings, cov, M_loadings.T) + deno = np.einsum("ij,jk,ki->i", M_loadings, cov, M_loadings.T) # fill na with 0 = don't take the missing GWAS into account in the test stat = numi / deno return spst.chi2.sf(stat, df=1) -sumz_stat.can_use_pattern = False \ No newline at end of file +sumz_stat.can_use_pattern = False diff --git a/jass/models/worktable.py b/jass/models/worktable.py index 64042d57563da83e2aab2bfd348560cec61534ae..b21ce807627a6c6f62fbea988b286a4eb38e2998 100644 --- a/jass/models/worktable.py +++ b/jass/models/worktable.py @@ -61,29 +61,37 @@ def signif(x, digit): return round(x, digit - int(math.floor(math.log10(abs(x)))) - 1) -def choose_stat_function(smart_na_computation, optim_na, big, function_name, stat_function, sub_cov, **kwargs): +def choose_stat_function( + smart_na_computation, optim_na, big, function_name, stat_function, sub_cov, **kwargs +): if smart_na_computation: # If stat is sumz use normal computer even with na if function_name == "omnibus_stat": if optim_na: if big: - stat_compute = make_stat_computer_pattern_big(sub_cov, stat_function) + stat_compute = make_stat_computer_pattern_big( + sub_cov, stat_function + ) else: stat_compute = make_stat_computer_pattern(sub_cov, stat_function) else: stat_compute = make_stat_computer_nan_dumb(sub_cov, stat_function) else: if function_name == "meta_analysis": - stat_compute = make_stat_computer_nopattern(sub_cov, stat_function, **kwargs) + stat_compute = make_stat_computer_nopattern( + sub_cov, stat_function, **kwargs + ) elif function_name == "sumz_stat": - loading_file = kwargs.get('loadings', None) + loading_file = kwargs.get("loadings", None) if loading_file is None: # Default loadings would be one for every phenotypes stat_compute = make_stat_computer_nopattern(sub_cov, stat_function) else: loadings = read_csv(loading_file, index_col=0) loadings = loadings.iloc[:, 0] - stat_compute = make_stat_computer_nopattern(sub_cov, stat_function, loadings=loadings) + stat_compute = make_stat_computer_nopattern( + sub_cov, stat_function, loadings=loadings + ) else: stat_compute = make_stat_computer_nopattern(sub_cov, stat_function) else: @@ -92,39 +100,47 @@ def choose_stat_function(smart_na_computation, optim_na, big, function_name, sta return stat_compute -def add_signif_status_column(region_sub_tab, significance_treshold=5*10**-8): +def add_signif_status_column(region_sub_tab, significance_treshold=5 * 10 ** -8): region_sub_tab["signif_status"] = "" # blue: significant pvalues for omnibus and univariate tests - cond = np.where((region_sub_tab.JASS_PVAL < significance_treshold) & ( - region_sub_tab.UNIVARIATE_MIN_PVAL < significance_treshold))[0] + cond = np.where( + (region_sub_tab.JASS_PVAL < significance_treshold) + & (region_sub_tab.UNIVARIATE_MIN_PVAL < significance_treshold) + )[0] region_sub_tab.loc[region_sub_tab.index[cond], "signif_status"] = "Both" # red: significant pvalues for omnibus test only - cond = np.where((region_sub_tab.JASS_PVAL < significance_treshold) & ( - region_sub_tab.UNIVARIATE_MIN_PVAL > significance_treshold))[0] + cond = np.where( + (region_sub_tab.JASS_PVAL < significance_treshold) + & (region_sub_tab.UNIVARIATE_MIN_PVAL > significance_treshold) + )[0] region_sub_tab.loc[region_sub_tab.index[cond], "signif_status"] = "Joint" # green: significant pvalues for univariate test only - cond = np.where((region_sub_tab.JASS_PVAL > significance_treshold) & ( - region_sub_tab.UNIVARIATE_MIN_PVAL < significance_treshold))[0] - region_sub_tab.loc[region_sub_tab.index[cond], - "signif_status"] = "Univariate" + cond = np.where( + (region_sub_tab.JASS_PVAL > significance_treshold) + & (region_sub_tab.UNIVARIATE_MIN_PVAL < significance_treshold) + )[0] + region_sub_tab.loc[region_sub_tab.index[cond], "signif_status"] = "Univariate" # grey: non significant pvalues - cond = np.where((region_sub_tab.JASS_PVAL > significance_treshold) & ( - region_sub_tab.UNIVARIATE_MIN_PVAL > significance_treshold))[0] + cond = np.where( + (region_sub_tab.JASS_PVAL > significance_treshold) + & (region_sub_tab.UNIVARIATE_MIN_PVAL > significance_treshold) + )[0] region_sub_tab.loc[region_sub_tab.index[cond], "signif_status"] = "None" return region_sub_tab -def get_region_summary(sum_stat_tab, phenotype_ids, significance_treshold=5*10**-8): +def get_region_summary(sum_stat_tab, phenotype_ids, significance_treshold=5 * 10 ** -8): # Select the most significant SNP for the joint test for each region - region_sub_tab = sum_stat_tab.sort_values( - "JASS_PVAL").groupby("Region").first() # .reset_index() + region_sub_tab = ( + sum_stat_tab.sort_values("JASS_PVAL").groupby("Region").first() + ) # .reset_index() # add minimum univariate p-value univar = sum_stat_tab.groupby("Region").min().UNIVARIATE_MIN_PVAL @@ -132,18 +148,29 @@ def get_region_summary(sum_stat_tab, phenotype_ids, significance_treshold=5*10** # Tag SNPs depending on which test is significant region_sub_tab.reset_index(inplace=True) - region_sub_tab = add_signif_status_column( - region_sub_tab, significance_treshold) + region_sub_tab = add_signif_status_column(region_sub_tab, significance_treshold) # reorder columns - region_sub_tab = region_sub_tab[['Region', "MiddlePosition", "snp_ids", "CHR", "position", - "Ref_allele", "Alt_allele", "JASS_PVAL", "UNIVARIATE_MIN_PVAL", - "signif_status"] + phenotype_ids] + region_sub_tab = region_sub_tab[ + [ + "Region", + "MiddlePosition", + "snp_ids", + "CHR", + "position", + "Ref_allele", + "Alt_allele", + "JASS_PVAL", + "UNIVARIATE_MIN_PVAL", + "signif_status", + ] + + phenotype_ids + ] return region_sub_tab -def post_computation_filtering(worktable_chunk, significant_treshold=5*10**-8): +def post_computation_filtering(worktable_chunk, significant_treshold=5 * 10 ** -8): """ Remove SNPs that seems aberrant: SNPs with a very low p-value that are isolated in their region @@ -155,17 +182,20 @@ def post_computation_filtering(worktable_chunk, significant_treshold=5*10**-8): """ def count_nearly_significant(rsnp): - return((rsnp.JASS_PVAL < (significant_treshold*20)).sum()) + return (rsnp.JASS_PVAL < (significant_treshold * 20)).sum() res = worktable_chunk.groupby("Region").apply(count_nearly_significant) # select region with only one SNP that is significant which is - # suspect + # suspect reg = res.loc[res == 1].index for reg_aberant in reg: - aberant_SNP = worktable_chunk.loc[worktable_chunk.Region == reg_aberant].sort_values( - "JASS_PVAL").index[0] + aberant_SNP = ( + worktable_chunk.loc[worktable_chunk.Region == reg_aberant] + .sort_values("JASS_PVAL") + .index[0] + ) worktable_chunk.drop(aberant_SNP, inplace=True) return worktable_chunk @@ -173,11 +203,12 @@ def post_computation_filtering(worktable_chunk, significant_treshold=5*10**-8): def compute_pleiotropy_index(W, significance_treshold): - N_significatif = (2.0 * spst.norm.sf(W.fillna(0, - inplace=False).abs()) < significance_treshold).sum(1) + N_significatif = ( + 2.0 * spst.norm.sf(W.fillna(0, inplace=False).abs()) < significance_treshold + ).sum(1) N_pheno = (~W.isnull()).sum(1) # pleiotropy index is not meaningful for too few phenotype - S = N_significatif/N_pheno + S = N_significatif / N_pheno S.loc[N_pheno < 4] = np.nan return S @@ -191,14 +222,14 @@ def create_worktable_file( optim_na: bool = True, csv_file: str = None, chunk_size: int = 50, - significance_treshold=5*10**-8, + significance_treshold=5 * 10 ** -8, post_filtering=True, delayed_gen_csv_file=False, chromosome: str = None, pos_Start: str = None, pos_End: str = None, **kwargs - ): +): """ Create a worktable file from an initial data table by specifying the @@ -248,49 +279,107 @@ def create_worktable_file( K_POS_MAX = 250000000 # Minimum and maximum limit of regions for each chromosome (multiples of 50) - Min_pos_chr = [ 0, 100, 250, 400, 500, 600, 700, 800, 900, 1000, 1050, - 1150, 1250, 1300, 1350, 1400, 1450, 1500, 1550, 1600, 1650, 1650] - Max_pos_chr = [150, 300, 400, 550, 650, 750, 850, 950, 1050, 1100, 1200, - 1300, 1350, 1400, 1450, 1500, 1550, 1600, 1650, 1700, 1700, 1750] + Min_pos_chr = [ + 0, + 100, + 250, + 400, + 500, + 600, + 700, + 800, + 900, + 1000, + 1050, + 1150, + 1250, + 1300, + 1350, + 1400, + 1450, + 1500, + 1550, + 1600, + 1650, + 1650, + ] + Max_pos_chr = [ + 150, + 300, + 400, + 550, + 650, + 750, + 850, + 950, + 1050, + 1100, + 1200, + 1300, + 1350, + 1400, + 1450, + 1500, + 1550, + 1600, + 1650, + 1700, + 1700, + 1750, + ] N_pheno = len(phenotype_ids) # Controls the number of phenotypes - if (N_pheno > 64): - print("ERROR: {} phenotypes are selected. \nThe current version of JASS cannot analyze more than 64 phenotypes" \ - .format(N_pheno)) + if N_pheno > 64: + print( + "ERROR: {} phenotypes are selected. \nThe current version of JASS cannot analyze more than 64 phenotypes".format( + N_pheno + ) + ) raise ValueError("Maximum number of phenotypes exceeded") - elif (N_pheno >= 20): - print("WARNING: {} phenotypes are selected. The computation will be very long!".format(N_pheno)) + elif N_pheno >= 20: + print( + "WARNING: {} phenotypes are selected. The computation will be very long!".format( + N_pheno + ) + ) - if (chromosome is None): + if chromosome is None: local_analysis = False print("============== Whole genome analysis ===============") else: local_analysis = True print("============== Local analysis ===============") - if not(chromosome.isdigit()): - print("ERROR: when performing a local analysis, the chromosome number (between 1 and 22) is mandatory") - raise ValueError("create_worktable_file: the required argument chromosome is not a number") + if not (chromosome.isdigit()): + print( + "ERROR: when performing a local analysis, the chromosome number (between 1 and 22) is mandatory" + ) + raise ValueError( + "create_worktable_file: the required argument chromosome is not a number" + ) else: num_Chr = int(chromosome) - if ((pos_Start is None) and (pos_End is None)): + if (pos_Start is None) and (pos_End is None): chromosome_full = True print("------ Chromosome : {} ------".format(num_Chr)) else: chromosome_full = False - if ((pos_Start is None) or (not pos_Start.isdigit())): + if (pos_Start is None) or (not pos_Start.isdigit()): pos_Start = 0 - if ((pos_End is None) or (not pos_End.isdigit())): + if (pos_End is None) or (not pos_End.isdigit()): pos_End = K_POS_MAX - print("------ Chromosome : {} ({} - {}) ------".format(num_Chr, pos_Start, pos_End)) + print( + "------ Chromosome : {} ({} - {}) ------".format( + num_Chr, pos_Start, pos_End + ) + ) print("Phenotypes = {}".format(phenotype_ids)) # Initialization of Jass_progress - progress_path = os.path.join(os.path.dirname( - project_hdf_path), "JASS_progress.txt") + progress_path = os.path.join(os.path.dirname(project_hdf_path), "JASS_progress.txt") JASS_progress = 0 file_progress = open(progress_path, "w") file_progress.write(str(JASS_progress)) @@ -309,7 +398,9 @@ def create_worktable_file( if delayed_gen_csv_file: # setting a lock to generate the csv_file asynchronously - the_lock_path = os.path.join(os.path.dirname(project_hdf_path), "the_lock.txt") + the_lock_path = os.path.join( + os.path.dirname(project_hdf_path), "the_lock.txt" + ) the_lock = "The lock is set on : workTable.csv is not yet available" file_lock = open(the_lock_path, "w") file_lock.write(the_lock) @@ -318,59 +409,66 @@ def create_worktable_file( # subset of phenotypes that have been selected phenolist = read_hdf(init_file_path, "PhenoList") phenolist = phenolist.loc[phenotype_ids] - hdf_work.put( - "PhenoList", phenolist - ) + hdf_work.put("PhenoList", phenolist) # subset of covariance matrix for the selected phenotypes cov = read_hdf(init_file_path, "COV") sub_cov = cov.loc[phenotype_ids, phenotype_ids] # Covariance matrix - hdf_work.put( - "COV", sub_cov, format="table", data_columns=True - ) + hdf_work.put("COV", sub_cov, format="table", data_columns=True) - #If available extract genetic covariance + # If available extract genetic covariance try: gcov = read_hdf(init_file_path, "GEN_COV") sub_gcov = gcov.loc[phenotype_ids, phenotype_ids] # Covariance matrix - hdf_work.put( - "GEN_COV", sub_gcov, format="table", data_columns=True - ) + hdf_work.put("GEN_COV", sub_gcov, format="table", data_columns=True) except KeyError: print("Genetic correlation not available in inittable. ") - - regions = read_hdf(init_file_path, "Regions").index.tolist() - sum_stat_tab_min_itemsizes = {"snp_ids": 80, "Region": 10, "CHR": 5, "Ref_allele" : 70, "Alt_allele":70} + sum_stat_tab_min_itemsizes = { + "snp_ids": 80, + "Region": 10, + "CHR": 5, + "Ref_allele": 70, + "Alt_allele": 70, + } region_sub_table_min_itemsizes = { - "Region": 10, "index": 10, "CHR": 5, "snp_ids": 80, "signif_status": 20,"Ref_allele" : 70, "Alt_allele":70} + "Region": 10, + "index": 10, + "CHR": 5, + "snp_ids": 80, + "signif_status": 20, + "Ref_allele": 70, + "Alt_allele": 70, + } smart_na_computation = not (remove_nan) module_name, function_name = stat.split(":") stat_module = importlib.import_module(module_name) stat_fn = getattr(stat_module, function_name) - if (N_pheno < K_NB_PHENOTYPES_BIG): + if N_pheno < K_NB_PHENOTYPES_BIG: big = False sub_cov_matrix = sub_cov else: big = True sub_cov_matrix = sub_cov.to_numpy() - stat_compute = choose_stat_function(smart_na_computation, - optim_na, - big, - function_name, - stat_fn, - sub_cov_matrix, - samp_size=phenolist['Effective_sample_size'], - **kwargs) + stat_compute = choose_stat_function( + smart_na_computation, + optim_na, + big, + function_name, + stat_fn, + sub_cov_matrix, + samp_size=phenolist["Effective_sample_size"], + **kwargs + ) # read data by chunks to optimize memory usage - if (not local_analysis): + if not local_analysis: Nchunk = len(regions) // chunk_size + 1 start_value = 0 else: @@ -379,11 +477,14 @@ def create_worktable_file( start_value = Min_pos_chr[num_Chr - 1] // chunk_size # selection criterion in the case of a partial analysis by chromosome and position - if (chromosome_full): + if chromosome_full: Local_criteria = "(CHR == {})".format(chromosome) else: - Local_criteria = "(CHR == {}) and (position >= {}) and (position <= {})"\ - .format(chromosome, pos_Start, pos_End) + Local_criteria = ( + "(CHR == {}) and (position >= {}) and (position <= {})".format( + chromosome, pos_Start, pos_End + ) + ) Nsnp_total = 0 Nsnp_jassed = 0 @@ -394,52 +495,63 @@ def create_worktable_file( JASS_progress = round((chunk + 1) * 100 / (Nchunk + 2)) binf = chunk * chunk_size - bsup = (chunk+1) * chunk_size - - sum_stat_tab = read_hdf(init_file_path, 'SumStatTab', columns=[ - 'Region', 'CHR', 'position', 'snp_ids', 'Ref_allele', 'Alt_allele', 'MiddlePosition'] + phenotype_ids, - where='Region >= {0} and Region < {1}'.format(binf, bsup)) + bsup = (chunk + 1) * chunk_size + + sum_stat_tab = read_hdf( + init_file_path, + "SumStatTab", + columns=[ + "Region", + "CHR", + "position", + "snp_ids", + "Ref_allele", + "Alt_allele", + "MiddlePosition", + ] + + phenotype_ids, + where="Region >= {0} and Region < {1}".format(binf, bsup), + ) print("Regions {0} to {1}\r".format(binf, bsup)) - if(local_analysis): + if local_analysis: # Data extraction in the case of a partial analysis sum_stat_tab.query(Local_criteria, inplace=True) # Remake row index unique: IMPORTANT for assignation with .loc - sum_stat_tab.dropna( - axis=0, subset=phenotype_ids, how=how_dropna, inplace=True - ) + sum_stat_tab.dropna(axis=0, subset=phenotype_ids, how=how_dropna, inplace=True) sum_stat_tab.reset_index(drop=True, inplace=True) if sum_stat_tab.shape[0] == 0: - print( - "No data available for region {0} to region {1}".format(binf, bsup)) + print("No data available for region {0} to region {1}".format(binf, bsup)) continue # skip region if no data are available Nsnp_total = Nsnp_total + sum_stat_tab.shape[0] if remove_nan or stat.split(":")[-1] != "omnibus_stat": - sum_stat_tab['JASS_PVAL'] = stat_compute( - sum_stat_tab[phenotype_ids]) + sum_stat_tab["JASS_PVAL"] = stat_compute(sum_stat_tab[phenotype_ids]) else: if not big: # Algorithm optimized for a small number of phenotypes # Sort SumStatTab by missing patterns patterns_missing, frequent_pattern = compute_frequent_missing_pattern( - sum_stat_tab[phenotype_ids]) + sum_stat_tab[phenotype_ids] + ) sum_stat_tab["patterns_missing"] = patterns_missing z1 = sum_stat_tab[phenotype_ids] # Apply the statistic computation by missing patterns for pattern in frequent_pattern: - bool_serie = (patterns_missing == pattern) + bool_serie = patterns_missing == pattern Selection_criteria = sum_stat_tab["patterns_missing"] == pattern try: - sum_stat_tab.loc[bool_serie, "JASS_PVAL"] = stat_compute(z1[Selection_criteria], pattern) + sum_stat_tab.loc[bool_serie, "JASS_PVAL"] = stat_compute( + z1[Selection_criteria], pattern + ) except ValueError: print("worktable") @@ -447,8 +559,11 @@ def create_worktable_file( # Algorithm optimized for a high number of phenotypes # Sort SumStatTab by missing patterns - patterns_missing, frequent_pattern, dico_index_y = \ - compute_frequent_missing_pattern_Big(sum_stat_tab[phenotype_ids]) + ( + patterns_missing, + frequent_pattern, + dico_index_y, + ) = compute_frequent_missing_pattern_Big(sum_stat_tab[phenotype_ids]) sum_stat_tab["index"] = sum_stat_tab.index.tolist() sum_stat_tab["patterns_missing"] = patterns_missing @@ -463,7 +578,9 @@ def create_worktable_file( dico_z = {} dico_index_x = {} - sum_stat_tab[Liste_colonnes].apply(lambda x: store_pattern(dico_z, dico_index_x, *x), axis=1) + sum_stat_tab[Liste_colonnes].apply( + lambda x: store_pattern(dico_z, dico_index_x, *x), axis=1 + ) Retour_omnibus_bypattern = {} @@ -471,9 +588,7 @@ def create_worktable_file( for pattern in frequent_pattern: try: Retour_omnibus_bypattern[pattern] = stat_compute( - np.array(dico_z[pattern]), - pattern, - dico_index_y[pattern] + np.array(dico_z[pattern]), pattern, dico_index_y[pattern] ) except ValueError: print("worktable") @@ -482,7 +597,9 @@ def create_worktable_file( for pattern in frequent_pattern: for ligne, indice in enumerate(dico_index_x[pattern]): - Retour_omnibus[int(indice)] = (Retour_omnibus_bypattern[pattern])[int(ligne)] + Retour_omnibus[int(indice)] = ( + Retour_omnibus_bypattern[pattern] + )[int(ligne)] sum_stat_tab["JASS_PVAL"] = Retour_omnibus @@ -490,25 +607,39 @@ def create_worktable_file( sum_stat_tab.sort_values(by=["Region", "CHR"], inplace=True) sum_stat_tab["UNIVARIATE_MIN_PVAL"] = DataFrame( - 2.0 * - spst.norm.sf(sum_stat_tab[phenotype_ids].fillna( - 0, inplace=False).abs()), + 2.0 + * spst.norm.sf(sum_stat_tab[phenotype_ids].fillna(0, inplace=False).abs()), index=sum_stat_tab.index, ).min(axis=1) - sum_stat_tab["UNIVARIATE_MIN_QVAL"] = sum_stat_tab["UNIVARIATE_MIN_PVAL"] * \ - (1-np.isnan(sum_stat_tab[phenotype_ids]).astype(int)).sum(1) - sum_stat_tab.loc[sum_stat_tab.UNIVARIATE_MIN_QVAL > - 1, "UNIVARIATE_MIN_QVAL"] = 1 + sum_stat_tab["UNIVARIATE_MIN_QVAL"] = sum_stat_tab["UNIVARIATE_MIN_PVAL"] * ( + 1 - np.isnan(sum_stat_tab[phenotype_ids]).astype(int) + ).sum(1) + sum_stat_tab.loc[ + sum_stat_tab.UNIVARIATE_MIN_QVAL > 1, "UNIVARIATE_MIN_QVAL" + ] = 1 # Computing pleiotropy sum_stat_tab["PLEIOTROPY_INDEX"] = compute_pleiotropy_index( - sum_stat_tab[phenotype_ids], significance_treshold) + sum_stat_tab[phenotype_ids], significance_treshold + ) sum_stat_tab = sum_stat_tab[ - ["Region", "CHR", "snp_ids", "position", 'Ref_allele', 'Alt_allele', "MiddlePosition", - "JASS_PVAL", "UNIVARIATE_MIN_PVAL", "UNIVARIATE_MIN_QVAL", "PLEIOTROPY_INDEX"] - + phenotype_ids] + [ + "Region", + "CHR", + "snp_ids", + "position", + "Ref_allele", + "Alt_allele", + "MiddlePosition", + "JASS_PVAL", + "UNIVARIATE_MIN_PVAL", + "UNIVARIATE_MIN_QVAL", + "PLEIOTROPY_INDEX", + ] + + phenotype_ids + ] if post_filtering: sum_stat_tab = post_computation_filtering(sum_stat_tab) @@ -517,16 +648,16 @@ def create_worktable_file( "SumStatTab", sum_stat_tab, min_itemsize=sum_stat_tab_min_itemsizes ) - if ((csv_file is not None) and (not delayed_gen_csv_file)): - with open(csv_file, 'a') as f: - sum_stat_tab.to_csv(f, header=f.tell()==0) + if (csv_file is not None) and (not delayed_gen_csv_file): + with open(csv_file, "a") as f: + sum_stat_tab.to_csv(f, header=f.tell() == 0) region_sub_table = get_region_summary( - sum_stat_tab, phenotype_ids, significance_treshold=significance_treshold) + sum_stat_tab, phenotype_ids, significance_treshold=significance_treshold + ) hdf_work.append( - "Regions", - region_sub_table, min_itemsize=region_sub_table_min_itemsizes + "Regions", region_sub_table, min_itemsize=region_sub_table_min_itemsizes ) file_progress = open(progress_path, "w") @@ -545,16 +676,24 @@ def create_worktable_file( np.array( [ [ - sum((jost_min < significance_treshold) & - (pval_min < significance_treshold)), - sum((jost_min < significance_treshold) & - (pval_min > significance_treshold)), + sum( + (jost_min < significance_treshold) + & (pval_min < significance_treshold) + ), + sum( + (jost_min < significance_treshold) + & (pval_min > significance_treshold) + ), ], [ - sum((jost_min > significance_treshold) & - (pval_min < significance_treshold)), - sum((jost_min > significance_treshold) & - (pval_min > significance_treshold)), + sum( + (jost_min > significance_treshold) + & (pval_min < significance_treshold) + ), + sum( + (jost_min > significance_treshold) + & (pval_min > significance_treshold) + ), ], ] ) @@ -593,10 +732,10 @@ def binary_code_Big(dico_index_y, *args): Codage = int(Chaine, 2) - if (not (Codage in dico_index_y)): + if not (Codage in dico_index_y): dico_index_y[Codage] = [] for indice, valeur in enumerate(args): - if (valeur == 1): + if valeur == 1: dico_index_y[Codage].append(indice) return Codage @@ -610,7 +749,7 @@ def store_pattern(dico_z, dico_index_x, *colonne): Index = int(colonne[0]) Codage = int(colonne[1]) - if (not (Codage in dico_z)): + if not (Codage in dico_z): dico_z[Codage] = [] dico_index_x[Codage] = [] @@ -628,10 +767,12 @@ def compute_frequent_missing_pattern(sum_stat_tab): """ Compute the frequency of missing pattern in the dataset """ - Pheno_is_present = 1- sum_stat_tab.isnull() + Pheno_is_present = 1 - sum_stat_tab.isnull() # The coding of patterns missing is not guaranteed if there are more than 64 phenotypes - patterns_missing = Pheno_is_present[Pheno_is_present.columns].apply(lambda x: binary_code(*x), axis=1) + patterns_missing = Pheno_is_present[Pheno_is_present.columns].apply( + lambda x: binary_code(*x), axis=1 + ) pattern_frequency = patterns_missing.value_counts() / len(patterns_missing) n_pattern = pattern_frequency.shape[0] @@ -647,17 +788,17 @@ def compute_frequent_missing_pattern_Big(sum_stat_tab): """ dico_index_y = {} - Pheno_is_present = 1- sum_stat_tab.isnull() + Pheno_is_present = 1 - sum_stat_tab.isnull() # The coding of patterns missing is not guaranteed if there are more than 64 phenotypes - patterns_missing = Pheno_is_present[Pheno_is_present.columns] \ - .apply(lambda x: binary_code_Big(dico_index_y, *x), axis=1) - + patterns_missing = Pheno_is_present[Pheno_is_present.columns].apply( + lambda x: binary_code_Big(dico_index_y, *x), axis=1 + ) pattern_frequency = patterns_missing.value_counts() / len(patterns_missing) n_pattern = pattern_frequency.shape[0] print("Number of pattern {}".format(n_pattern)) - frequent_pattern = pattern_frequency.index.tolist() + frequent_pattern = pattern_frequency.index.tolist() return patterns_missing, frequent_pattern, dico_index_y @@ -675,11 +816,9 @@ def stringize_dataframe_region_chr(dataframe: DataFrame): :return: The dataframe with converted Region and CHR columns :rtype: pandas.DataFrame """ - dataframe["Region"] = dataframe["Region"].apply( - lambda x: "Region" + str(x)) + dataframe["Region"] = dataframe["Region"].apply(lambda x: "Region" + str(x)) dataframe["CHR"] = dataframe["CHR"].apply(lambda x: "chr" + str(x)) - dataframe["JASS_PVAL"] = dataframe["JASS_PVAL"].apply( - lambda x: str(signif(x, 4))) + dataframe["JASS_PVAL"] = dataframe["JASS_PVAL"].apply(lambda x: str(signif(x, 4))) return dataframe @@ -713,19 +852,22 @@ def get_worktable_genomedata(project_hdf_path: str): :rtype: str """ region_subtable = stringize_dataframe_region_chr( - read_hdf(project_hdf_path, "Regions")) + read_hdf(project_hdf_path, "Regions") + ) - region_subtable.rename(index=str, columns={ - 'JASS_PVAL': 'JOSTmin'}, inplace=True) + region_subtable.rename(index=str, columns={"JASS_PVAL": "JOSTmin"}, inplace=True) - region_subtable['PVALmin'] = region_subtable['UNIVARIATE_MIN_PVAL'] - region_subtable['PVALmin'] = region_subtable['PVALmin']. apply( - lambda x: str(signif(x, 4))) + region_subtable["PVALmin"] = region_subtable["UNIVARIATE_MIN_PVAL"] + region_subtable["PVALmin"] = region_subtable["PVALmin"].apply( + lambda x: str(signif(x, 4)) + ) return region_subtable.to_csv(index=False) -def get_worktable_local_manhattan_data(project_hdf_path: str, chromosome: str = None, region: str = None): +def get_worktable_local_manhattan_data( + project_hdf_path: str, chromosome: str = None, region: str = None +): """ Read and return the SumStatTab dataframe from a worktable file for a given chromosome and region for the Manhattan plot @@ -739,20 +881,24 @@ def get_worktable_local_manhattan_data(project_hdf_path: str, chromosome: str = :return: The dataframe subset corresponding to the chromosome and region, as a CSV formatted text :rtype: str """ - if ((chromosome is None) and (region is None)): + if (chromosome is None) and (region is None): # Local analysis : the file project_hdf_path contains only useful information. # No data filter is needed - dataframe = read_hdf(project_hdf_path, "SumStatTab", - columns=["Region", "CHR", "position", - "snp_ids", "JASS_PVAL"]) + dataframe = read_hdf( + project_hdf_path, + "SumStatTab", + columns=["Region", "CHR", "position", "snp_ids", "JASS_PVAL"], + ) else: # Genome full analysis region_int = region[6:] chromosome_int = chromosome[3:] - dataframe = read_hdf(project_hdf_path, "SumStatTab", - columns=["Region", "CHR", "position", - "snp_ids", "JASS_PVAL"], - where=['Region='+str(region_int), 'CHR='+str(chromosome_int)]) + dataframe = read_hdf( + project_hdf_path, + "SumStatTab", + columns=["Region", "CHR", "position", "snp_ids", "JASS_PVAL"], + where=["Region=" + str(region_int), "CHR=" + str(chromosome_int)], + ) dataframe = stringize_dataframe_region_chr(dataframe) dataframe = dataframe.sort_values("position") @@ -760,7 +906,9 @@ def get_worktable_local_manhattan_data(project_hdf_path: str, chromosome: str = return dataframe.to_csv(index=False) -def get_worktable_local_heatmap_data(project_hdf_path: str, chromosome: str = None, region: str = None): +def get_worktable_local_heatmap_data( + project_hdf_path: str, chromosome: str = None, region: str = None +): """ Read and return the SumStatTab dataframe from a worktable file for a given chromosome and region for the Heatmap plot @@ -775,7 +923,7 @@ def get_worktable_local_heatmap_data(project_hdf_path: str, chromosome: str = No pivoted and as a CSV formatted text :rtype: str """ - if ((chromosome is None) and (region is None)): + if (chromosome is None) and (region is None): # Local analysis : the file project_hdf_path contains only useful information. # No data filter is needed dataframe = read_hdf(project_hdf_path, "SumStatTab") @@ -783,16 +931,28 @@ def get_worktable_local_heatmap_data(project_hdf_path: str, chromosome: str = No # Genome full analysis region_int = region[6:] chromosome_int = chromosome[3:] - dataframe = read_hdf(project_hdf_path, "SumStatTab", - where=['Region='+str(region_int), 'CHR='+str(chromosome_int)]) + dataframe = read_hdf( + project_hdf_path, + "SumStatTab", + where=["Region=" + str(region_int), "CHR=" + str(chromosome_int)], + ) dataframe = stringize_dataframe_region_chr(dataframe) dataframe = dataframe.sort_values("position") - dataframe.drop(["Region", "CHR", "position", "JASS_PVAL", "MiddlePosition", "UNIVARIATE_MIN_PVAL", - "UNIVARIATE_MIN_QVAL", "PLEIOTROPY_INDEX"], - axis=1, - inplace=True, - ) + dataframe.drop( + [ + "Region", + "CHR", + "position", + "JASS_PVAL", + "MiddlePosition", + "UNIVARIATE_MIN_PVAL", + "UNIVARIATE_MIN_QVAL", + "PLEIOTROPY_INDEX", + ], + axis=1, + inplace=True, + ) dataframe.rename(columns={"snp_ids": "ID"}, inplace=True) column_order = list(dataframe.ID) pivoted_dataframe = dataframe.pivot_table(columns="ID") @@ -822,11 +982,10 @@ def create_genome_full_csv(project_hdf_path, csv_file, chunk_size=50, Nchunk=35) """ # path of the lock that indicates that the csv file is not available - the_lock_path = os.path.join(os.path.dirname(project_hdf_path), - "the_lock.txt") - if (os.path.isfile(the_lock_path)): + the_lock_path = os.path.join(os.path.dirname(project_hdf_path), "the_lock.txt") + if os.path.isfile(the_lock_path): # The lock is set on - if (os.path.isfile(csv_file)): + if os.path.isfile(csv_file): # An error occurred: the csv file must not exist if the lock is set # The existing csv file is deleted os.remove(csv_file) @@ -837,17 +996,20 @@ def create_genome_full_csv(project_hdf_path, csv_file, chunk_size=50, Nchunk=35) bsup = (chunk + 1) * chunk_size # read workTable.hdf5 - df_for_csv = read_hdf(project_hdf_path, "SumStatTab", - where='Region >= {0} and Region < {1}'.format(binf, bsup)) + df_for_csv = read_hdf( + project_hdf_path, + "SumStatTab", + where="Region >= {0} and Region < {1}".format(binf, bsup), + ) # append the data to the csv file - with open(csv_file, 'a') as f: + with open(csv_file, "a") as f: df_for_csv.to_csv(f, header=f.tell() == 0) # The lock is deleted os.remove(the_lock_path) - if (os.path.isfile(csv_file)): + if os.path.isfile(csv_file): The_file_is_available = True else: The_file_is_available = False diff --git a/jass/server.py b/jass/server.py index a6900afbf9c4b68ca8d5413578ebb403a5d5031b..a667b5ec6beb5a5178a2d9d502a993fbbcfad70a 100644 --- a/jass/server.py +++ b/jass/server.py @@ -1,30 +1,396 @@ #!/usr/bin/env python3 -""" -Module that creates the flask app used to run JASS as a web server -""" -import connexion -import flask +import os -from .encoder import JSONEncoder -from .config import config +from flask import Flask, redirect, send_file +from flask.views import MethodView +import marshmallow as ma +from flask_smorest import Api, Blueprint, abort +from webargs.flaskparser import FlaskParser +from jass.config import config +from jass.models.phenotype import get_available_phenotypes +from jass.models.project import Project +from jass.tasks import create_project -class JassFlaskApp(connexion.FlaskApp): + +class PhenotypeSchema(ma.Schema): + id = ma.fields.String() + consortium = ma.fields.String() + outcome = ma.fields.String() + full_name = ma.fields.String() + typ = ma.fields.String() + ref = ma.fields.String() + ref_link = ma.fields.String() + data_link = ma.fields.String() + data_path = ma.fields.String() + + +class ProjectParamsSchema(ma.Schema): + class Meta: + unknown = ma.EXCLUDE + + phenotypeID = ma.fields.List(ma.fields.String()) + + +class LocalProjectParamsSchema(ma.Schema): + class Meta: + unknown = ma.EXCLUDE + + phenotypeID = ma.fields.List(ma.fields.String()) + chromosome = ma.fields.Integer() + start = ma.fields.Integer() + end = ma.fields.Integer() + + +class ProjectStatusSchema(ma.Schema): + STATUS_VALUES = ["DOES_NOT_EXIST", "CREATING", "READY", "ERROR"] + global_manhattan = ma.fields.String(validate=ma.validate.OneOf(STATUS_VALUES)) + quadrant_plot_status = ma.fields.String(validate=ma.validate.OneOf(STATUS_VALUES)) + worktable = ma.fields.String(validate=ma.validate.OneOf(STATUS_VALUES)) + + +class ProjectSchema(ma.Schema): + id = ma.fields.String() + status = ma.fields.Nested(ProjectStatusSchema) + phenotypes = ma.fields.List(ma.fields.Nested(PhenotypeSchema())) + progress = ma.fields.String() + + +blp_phenotypes = Blueprint( + "phenotypes", + "phenotypes", + url_prefix="/phenotypes", + description="Operations on phenotypes", +) + +blp_projects = Blueprint( + "projects", "projects", url_prefix="/projects", description="Operations on projects" +) + +blp_local_projects = Blueprint( + "local_projects", + "local_projects", + url_prefix="/local_projects", + description="Operations on local projects", +) + + +def get_phenotypes(): + return get_available_phenotypes(os.path.join(config["DATA_DIR"], "initTable.hdf5")) + + +@blp_phenotypes.route("") +class PhenotypesMethodView(MethodView): + @blp_phenotypes.response(200, PhenotypeSchema(many=True)) + def get(self): + """List phenotypes""" + return get_phenotypes() + + +@blp_projects.route("") +class ProjectCreateMethodView(MethodView): + @blp_projects.arguments(ProjectParamsSchema(), location="form") + @blp_projects.response(200, ProjectSchema()) + def post(self, parameters): + """List projects""" + phenotype_ids = [ + phenotype_id + for ids_with_commas in parameters["phenotypeID"] + for phenotype_id in ids_with_commas.split(",") + ] + phenotypes = list(filter(lambda d: d.id in phenotype_ids, get_phenotypes())) + return create_project([p.id for p in phenotypes], get_phenotypes()) + + +@blp_local_projects.route("") +class LocalProjectCreateMethodView(MethodView): + @blp_projects.arguments(LocalProjectParamsSchema(), location="form") + @blp_projects.response(200, ProjectSchema()) + def post(self, parameters): + """List projects""" + phenotype_ids = [ + phenotype_id + for ids_with_commas in parameters["phenotypeID"] + for phenotype_id in ids_with_commas.split(",") + ] + phenotypes = list(filter(lambda d: d.id in phenotype_ids, get_phenotypes())) + return create_project( + [p.id for p in phenotypes], + get_phenotypes(), + str(parameters["chromosome"]), + str(parameters["start"]), + str(parameters["end"]), + ) + + +@blp_projects.route("/<project_id>") +class ProjectDetailMethodView(MethodView): + @blp_projects.response(200, ProjectSchema()) + def get(self, project_id): + return Project(id=project_id) + + +@blp_projects.route("/<project_id>/csv_status") +class ProjectCSVStatusMethodView(MethodView): + def get(self, project_id): + return Project(id=project_id).get_csv_file_generation() + + +@blp_projects.route("/<project_id>/summary") +class ProjectSummaryMethodView(MethodView): + @blp_projects.response(200, ProjectSchema()) + def get(self, project_id): + return Project(id=project_id).get_project_summary_statistics() + + +@blp_projects.route("/<project_id>/genome") +class ProjectGenomeMethodView(MethodView): + + # @blp_projects.response(200, headers={"Content-Type": "text/csv"}) + def get(self, project_id): + try: + return ( + Project(id=project_id).get_project_genomedata(), + 200, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + except FileNotFoundError: + status = Project(id=project_id).status + if status == Project.DOES_NOT_EXIST: + return ( + f"project {project_id} does not exist", + 404, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + abort(404) + elif status["worktable"] == Project.CREATING: + return ( + "data not ready yet", + 202, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + else: + abort(500) + + +@blp_projects.route("/<project_id>/genome_full") +class ProjectGenomeFullMethodView(MethodView): + def get(self, project_id): + Type_of_Analysis = Project(id=project_id).get_type_of_analysis() + if(Type_of_Analysis == Project.LOCAL_ANALYSIS): + Fichier = "local_analysis_result.csv" + else: + Fichier = "genome_full.csv" + + try: + return send_file( + Project(id=project_id).get_csv_path(), + mimetype="text/csv", + as_attachment=True, + attachment_filename=Fichier, + ) + except FileNotFoundError: + status = Project(id=project_id).status + if status == Project.DOES_NOT_EXIST: + return ( + f"project {project_id} does not exist", + 404, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + elif status["worktable"] == Project.CREATING: + return ( + "data not ready yet", + 202, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + else: + abort(500) + + +@blp_projects.route("/<project_id>/globalmanhattan") +class ProjectGlobalManhattanMethodView(MethodView): + def get(self, project_id): + try: + return send_file( + Project(id=project_id).get_global_manhattan_plot_path(), + mimetype="image/png", + ) + except FileNotFoundError: + status = Project(id=project_id).status + if status == Project.DOES_NOT_EXIST: + return ( + f"project {project_id} does not exist", + 404, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + elif status["global_manhattan"] == Project.CREATING: + return ( + "data not ready yet", + 202, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + else: + abort(500) + + +@blp_projects.route("/<project_id>/quadrant") +class ProjectQuadrantMethodView(MethodView): + def get(self, project_id): + try: + return send_file( + Project(id=project_id).get_quadrant_plot_path(), + mimetype="image/png", + ) + except FileNotFoundError: + status = Project(id=project_id).status + if status == Project.DOES_NOT_EXIST: + return ( + f"project {project_id} does not exist", + 404, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + elif status["quadrant_plot_status"] == Project.CREATING: + return ( + "data not ready yet", + 202, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + else: + abort(500) + + +@blp_projects.route("/<project_id>/manhattan/<chromosome>/<region>") +class ProjectLocalManhattanMethodView(MethodView): + def get(self, project_id, chromosome, region): + try: + return ( + Project(id=project_id).get_project_local_manhattan_data( + chromosome, region + ), + 200, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + except FileNotFoundError: + status = Project(id=project_id).status + if status == Project.DOES_NOT_EXIST: + return ( + f"project {project_id} does not exist", + 404, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + elif status["worktable"] == Project.CREATING: + return ( + "data not ready yet", + 202, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + else: + abort(500) + + +@blp_projects.route("/<project_id>/zoom_manhattan") +class ProjectZoomManhattanMethodView(MethodView): + def get(self, project_id): + try: + return ( + Project(id=project_id).get_project_local_manhattan_data(), + 200, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + except FileNotFoundError: + status = Project(id=project_id).status + if status == Project.DOES_NOT_EXIST: + return ( + f"project {project_id} does not exist", + 404, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + elif status["worktable"] == Project.CREATING: + return ( + "data not ready yet", + 202, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + else: + abort(500) + + +@blp_projects.route("/<project_id>/zoom_heatmap") +class ProjectZoomHeatmapMethodView(MethodView): + def get(self, project_id): + try: + return ( + Project(id=project_id).get_project_local_heatmap_data(), + 200, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + except FileNotFoundError: + status = Project(id=project_id).status + if status == Project.DOES_NOT_EXIST: + return ( + f"project {project_id} does not exist", + 404, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + elif status["worktable"] == Project.CREATING: + return ( + "data not ready yet", + 202, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + else: + abort(500) + + +@blp_projects.route("/<project_id>/zoomplot") +class ProjectZoomPlotMethodView(MethodView): + def get(self, project_id): + try: + return send_file( + Project(id=project_id).get_zoom_plot_path(), mimetype="image/png" + ) + except FileNotFoundError: + status = Project(id=project_id).status + if status == Project.DOES_NOT_EXIST: + return ( + f"project {project_id} does not exist", + 404, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + elif status["worktable"] == Project.CREATING: + return ( + "data not ready yet", + 202, + {"Content-Type": "text/plain; charset=utf-8"}, + ) + else: + abort(500) + + +class JassApp(Flask): """ - JassFlaskApp subclasses connexion's FlaskApp only to customize the static url path + JassApp builds the JASS Flask application """ + def __init__(self): + self.flask_app = Flask(__name__, static_url_path="", static_folder="static") + self.flask_app.config["API_TITLE"] = "JASS API" + self.flask_app.config["API_VERSION"] = "v2.0" + self.flask_app.config["OPENAPI_VERSION"] = "3.0.2" + self.flask_app.route("/")(self.redirect_to_index) + self.api = Api(self.flask_app) + def create_app(self): - app = flask.Flask(self.import_name, static_url_path="", static_folder="static") - app.json_encoder = JSONEncoder - app.route("/")(self.redirect_to_index) - return app + return self.flask_app def redirect_to_index(self): - return flask.redirect("index.html") + return redirect("index.html") + + def register_api_blueprint(self, blp): + self.api.register_blueprint(blp, url_prefix=f"/api/{blp.url_prefix}") -def get_jass_app(): - app = JassFlaskApp(__name__, specification_dir="./swagger/") - app.add_api("swagger.yaml", arguments={"title": "JASS"}, base_path="/api") - return app +jass_app = JassApp() +jass_app.register_api_blueprint(blp_phenotypes) +jass_app.register_api_blueprint(blp_projects) +jass_app.register_api_blueprint(blp_local_projects) diff --git a/jass/static/directLink.html b/jass/static/directLink.html index fd91f5533de584d1d9e8057f43caa726964e7afb..926cedfd3ce0e47c878887625bdcc0c388f4f0c8 100644 --- a/jass/static/directLink.html +++ b/jass/static/directLink.html @@ -3,7 +3,7 @@ <style> div.blockMe { padding: 30px; margin: 30px; border: 10px solid #ccc; background-color: #ffd } #question { background-color: #ffc; padding: 10px; } - #question input { width: 4em } + #question input { width: 4em ; } </style> <script src="https://code.jquery.com/jquery-3.2.1.min.js"></script> @@ -111,10 +111,10 @@ } } if (Local_Analysis == true) { - var The_Project = "/api/local_project/" + - chromosome + "/" + - start + "/" + - end; + phe['chromosome'] = chromosome; + phe['start'] = start; + phe['end'] = end; + var The_Project = "/api/local_projects"; } else{ var The_Project = "/api/projects"; diff --git a/jass/static/index.html b/jass/static/index.html index ed0b6c46858cff5ed45e069a7976ae48b6e1558e..eaeb43e8ecaaa41364368298c4fa559e347dc537 100644 --- a/jass/static/index.html +++ b/jass/static/index.html @@ -60,10 +60,10 @@ <div id="tabs-3"> <p><b>JASS: command line and web interface for the joint analysis of GWAS results</b><br /> Hanna Julienne, Pierre Lechat, Vincent Guillemot, Carla Lasry, Chunzi Yao, Robinson Araud, Vincent Laville, Bjarni Vilhjalmsson, Hervé Ménager, Hugues Aschard<br /> - in: NAR Genomics and Bioinformatics, Volume 2, Issue 1, March 2020, lqaa003, <a href="https://doi.org/10.1093/nargab/lqaa003"> <FONT color=#0000FF>https://doi.org/10.1093/nargab/lqaa003</FONT></a></p> + in: NAR Genomics and Bioinformatics, Volume 2, Issue 1, March 2020, lqaa003, <a href="https://urldefense.com/v3/__https://doi.org/10.1093/nargab/lqaa003__;!!JFdNOqOXpB6UZW0!75infPACk5lQWcBTD-rE5FQb6Yk4WckKROxH7a5qa4ERTDyiliWfWLGe8-HIgjvh3Gk$"> <FONT color=#0000FF>https://doi.org/10.1093/nargab/lqaa003</FONT></a></p> <p><b>Multitrait genetic-phenotype associations to connect disease variants and biological mechanisms</b><br /> Hanna Julienne, Vincent Laville, Zachary R. McCaw, Zihuai He, Vincent Guillemot, Carla Lasry, Andrey Ziyatdinov, Amaury Vaysse, Pierre Lechat, Hervé Ménager, Wilfried Le Goff, Marie-Pierre Dube, Peter Kraft, Iuliana Ionita-Laza, Bjarni J. Vilhjálmsson, Hugues Aschard<br /> - preprint in: biorxiv, <a href=https://www.biorxiv.org/content/10.1101/2020.06.26.172999v1.full> <FONT color=#0000FF>https://www.biorxiv.org/content/10.1101/2020.06.26.172999v1.full</FONT></a></p> + preprint in: biorxiv, <a href="https://urldefense.com/v3/__https://www.biorxiv.org/content/10.1101/2020.06.26.172999v1.full__;!!JFdNOqOXpB6UZW0!75infPACk5lQWcBTD-rE5FQb6Yk4WckKROxH7a5qa4ERTDyiliWfWLGe8-HIV8Z7Cco$"> <FONT color=#0000FF>https://www.biorxiv.org/content/10.1101/2020.06.26.172999v1.full</FONT></a></p> </div> </div> </body> diff --git a/jass/static/selectPhenotypes_for_region.html b/jass/static/selectPhenotypes_for_region.html index eb8db36bcdf6f8c81005e1fc755c08e74ca0e3df..51b518a53dc7faa172dc2960b775b7561f55ab95 100644 --- a/jass/static/selectPhenotypes_for_region.html +++ b/jass/static/selectPhenotypes_for_region.html @@ -262,6 +262,10 @@ if(selectedString != ''){ var phe = {}; phe['phenotypeID'] = selectedString; + phe['chromosome'] = selectedChromosome; + phe['start'] = region_start; + phe['end'] = region_end; + console.log("!!! "+selectedString); console.log("!!! "+phe['phenotypeID']); @@ -277,8 +281,7 @@ var JASS_progress = 0; var Old_progress = 0; var getProjectStatus = function(){ - $.post( "/api/local_project/" + selectedChromosome + "/" + region_start + "/" + region_end, - phe).done(function( data ) { + $.post( "/api/local_projects", phe).done(function( data ) { status = data.status.worktable; console.log("!! status "+status); JASS_progress = data.progress; diff --git a/jass/swagger/swagger.yaml b/jass/swagger/swagger.yaml index ebde594444f28ce9dcf627fce503388ec0745f54..03378e691554407361bd2c7df356863cd0b2eeca 100644 --- a/jass/swagger/swagger.yaml +++ b/jass/swagger/swagger.yaml @@ -1,595 +1,566 @@ -openapi: 3.0.0 -info: - version: 0.0.0 - title: JASS API Specification -paths: - /phenotypes: - get: - description: | - Gets the list of available phenotypes - operationId: phenotypes_get - responses: - "200": - description: List of the available phenotypes - content: - "application/json": - schema: - type: array - title: ArrayOfPhenotypes - items: - $ref: "#/components/schemas/Phenotype" - example: - - "consortium": "IHEC" - "data_link": "http://www.bloodcellgenetics.org" - "full_name": "Monocyte percentage of white cells" - "id": "z_IHEC_MONOP" - "outcome": "MONOP" - "ref": " Astle et al. 2016" - "ref_link": "https://www.ncbi.nlm.nih.gov/pubmed/27863262" - "type": "Cellular" - - "consortium": "RA" - "data_link": "http://plaza.umin.ac.jp/~yokada/datasource/software.htm" - "full_name": "Rheumatoid Arthritis" - "id": "z_RA_RA" - "outcome": "RA" - "ref": "Okada et al. 2014" - "ref_link": "https://www.ncbi.nlm.nih.gov/pubmed/24390342" - "type": "Immunity" - x-openapi-router-controller: jass.controllers.default_controller - /projects: - post: - description: | - Create a new project from a selection of phenotypes - operationId: projects_post - requestBody: - content: - application/x-www-form-urlencoded: - schema: - type: object - properties: - phenotypeID: - description: IDs of the phenotypes selected for the project - type: array - items: - type: string - required: - - phenotypeID - example: - - z_IHEC_MONOP - - z_RA_RA - responses: - "200": - description: Project created - content: - "application/json": - schema: - $ref: "#/components/schemas/Phenotype" - examples: - Creating: - value: - id: "bca9d414e0f9a67b9e0d2131a47c316c" - phenotypes: - - "consortium": "IHEC" - "data_link": "http://www.bloodcellgenetics.org" - "full_name": "Monocyte percentage of white cells" - "id": "z_IHEC_MONOP" - "outcome": "MONOP" - "ref": " Astle et al. 2016" - "ref_link": "https://www.ncbi.nlm.nih.gov/pubmed/27863262" - "type": "Cellular" - - "consortium": "RA" - "data_link": "http://plaza.umin.ac.jp/~yokada/datasource/software.htm" - "full_name": "Rheumatoid Arthritis" - "id": "z_RA_RA" - "outcome": "RA" - "ref": "Okada et al. 2014" - "ref_link": "https://www.ncbi.nlm.nih.gov/pubmed/24390342" - "type": "Immunity" - status": - "global_manhattan": "CREATING" - "quadrant_plot_status": "CREATING" - "worktable": "CREATING" - progress": - "progress": "0" - Ready: - value: - id: "bca9d414e0f9a67b9e0d2131a47c316c" - phenotypes: - - "consortium": "IHEC" - "data_link": "http://www.bloodcellgenetics.org" - "full_name": "Monocyte percentage of white cells" - "id": "z_IHEC_MONOP" - "outcome": "MONOP" - "ref": " Astle et al. 2016" - "ref_link": "https://www.ncbi.nlm.nih.gov/pubmed/27863262" - "type": "Cellular" - - "consortium": "RA" - "data_link": "http://plaza.umin.ac.jp/~yokada/datasource/software.htm" - "full_name": "Rheumatoid Arthritis" - "id": "z_RA_RA" - "outcome": "RA" - "ref": "Okada et al. 2014" - "ref_link": "https://www.ncbi.nlm.nih.gov/pubmed/24390342" - "type": "Immunity" - status": - "global_manhattan": "READY" - "quadrant_plot_status": "READY" - "worktable": "READY" - progress": - "progress": "100" - x-openapi-router-controller: jass.controllers.default_controller - - - "/local_project/{chromosome}/{start}/{end}": - post: - description: | - Create a new local project from a selection of phenotypes - operationId: local_project_post - parameters: - - name: chromosome - in: path - description: chromosome number - required: true - schema: - type: string - - name: start - in: path - description: start position of the region - required: true - schema: - type: string - - name: end - in: path - description: end position of the region - required: true - schema: - type: string - requestBody: - content: - application/x-www-form-urlencoded: - schema: - type: object - properties: - phenotypeID: - description: IDs of the phenotypes selected for the project - type: array - items: - type: string - required: - - phenotypeID - responses: - "200": - description: Project created - content: - "application/json": - schema: - $ref: "#/components/schemas/Phenotype" - x-openapi-router-controller: jass.controllers.default_controller - - "/projects/{projectID}": - get: - description: | - Retrieve a project definition - operationId: projects_project_idget - parameters: - - name: projectID - in: path - description: project ID - required: true - schema: - type: string - example: "bca9d414e0f9a67b9e0d2131a47c316c" - responses: - "200": - description: Retrieved project - content: - "application/json": - schema: - $ref: "#/components/schemas/Phenotype" - example: - id: "bca9d414e0f9a67b9e0d2131a47c316c" - status": - "global_manhattan": "READY" - "quadrant_plot_status": "READY" - "worktable": "READY" - progress": - "progress": "100" - x-openapi-router-controller: jass.controllers.default_controller - "/projects/{projectID}/summary": - get: - description: Retrieve summary statistics for a given project - operationId: projects_project_id_summary_statistics - parameters: - - name: projectID - in: path - description: project ID - required: true - schema: - type: string - example: "bca9d414e0f9a67b9e0d2131a47c316c" - responses: - "200": - description: Summary statistics in JSON - content: - "*/*": - schema: - type: string - title: Project summary statistics - example: - "JOSTSignif": - "NoPhenoSignif": 10 - "PhenoSignif": 210 - "NoJOSTSignif": - "NoPhenoSignif": 1470 - "PhenoSignif": 14 - x-openapi-router-controller: jass.controllers.default_controller - - "/projects/{projectID}/csv_status": - get: - description: | - Retrieve the generation status of the genome full csv file - operationId: projects_project_id_csv_status_get - parameters: - - name: projectID - in: path - description: project ID - required: true - schema: - type: string - example: "bca9d414e0f9a67b9e0d2131a47c316c" - responses: - "200": - description: | - Generation status of the genome full csv file - content: - text/csv; charset=utf-8: - schema: - type: string - title: csv_file_generation - example: | - READY - x-openapi-router-controller: jass.controllers.default_controller - - "/projects/{projectID}/genome": - get: - description: | - Retrieve whole genome summary data for a given project - operationId: projects_project_id_genome_get - parameters: - - name: projectID - in: path - description: project ID - required: true - schema: - type: string - example: "bca9d414e0f9a67b9e0d2131a47c316c" - responses: - "200": - description: | - Whole genome summary data in CSV format - content: - text/csv; charset=utf-8: - schema: - type: string - title: Genome data - example: | - Region,MiddlePosition,snp_ids,CHR,position,Ref_allele,Alt_allele,JOSTmin,UNIVARIATE_MIN_PVAL,signif_status,z_IHEC_MONOP,z_RA_RA - Region0,0.0,rs61997853,chr15,20001087,C,A,0.3085010612493116,0.300300000000003,None,-1.0357902654077036, - Region1,951595.0,rs28635530,chr1,1649392,T,C,4.038788020606384e-06,2.7559999999999873e-06,None,4.688213804974398,2.999976992703393 - Region2,2737671.5,rs72644697,chr1,2533605,A,G,2.4600636176583336e-10,1.6188949607934016e-10,Both,-1.4356568827860683,-6.393727818324495 - Region3,3981773.5,rs12145992,chr1,3760309,A,G,0.0002538976722549933,0.00026034845141981964,None,-1.6164363711150218,3.651859369008055 - Region4,5147352.0,rs2649072,chr1,5754088,G,A,0.0007863952492527496,0.0006378999999999985,None,3.4149658871961184,-2.999976992703393 - Region5,6580614.0,rs2986741,chr1,6548774,G,A,0.0013472918321710914,0.0011119999999999993,None,-3.260540717377886,2.726551316504396 - Region6,8306267.0,rs79412885,chr1,9241839,A,G,2.0889091093474285e-13,8.106999999999937e-14,Both,7.46857160133221,-1.2003588580308502 - Region7,10086091.5,rs113829298,chr1,10061038,T,C,4.3158209846991565e-05,6.135999999999996e-06,None,-4.5216481219798474,0.5100734569685951 - x-openapi-router-controller: jass.controllers.default_controller - - "/projects/{projectID}/genome_full": - get: - description: | - Retrieve whole genome complete (not summarized) data for a given project - operationId: projects_project_id_genome_full_get - parameters: - - name: projectID - in: path - description: project ID - required: true - schema: - type: string - example: "bca9d414e0f9a67b9e0d2131a47c316c" - responses: - "200": - description: > - Retrieve whole genome complete (not summarized) data for a given - project in CSV format - content: - text/csv; charset=utf-8: - schema: - type: string - title: Genome data - example: | - Region,CHR,snp_ids,position,Ref_allele,Alt_allele,MiddlePosition,JASS_PVAL,UNIVARIATE_MIN_PVAL,UNIVARIATE_MIN_QVAL,PLEIOTROPY_INDEX,z_IHEC_MONOP,z_RA_RA - 194218,0,6,rs530120680,63980,G,A,0.0,0.6556994470547299,0.6501999999999999,0.6501999999999999,,0.4534843639637209, - 194219,0,6,rs561313667,63979,T,C,0.0,0.538722344719537,0.5318,0.5318,,0.6252606526209208, - 194220,0,15,rs113772187,20000538,T,C,0.0,0.5218942699938458,0.5148000000000001,0.5148000000000001,,-0.651381887083556, - 194221,0,15,rs61997852,20001079,C,A,0.0,0.33819737748654505,0.33000000000000274,0.33000000000000274,,-0.9741138770593036, - 194222,0,15,rs61997853,20001087,C,A,0.0,0.3085010612493116,0.300300000000003,0.300300000000003,,-1.0357902654077036, - 867,1,1,rs10454464,1833495,A,G,951595.0,0.2523888759643953,0.19000000000000383,0.38000000000000766,,-0.9817967289175548,1.3105791121681174 - 4836,1,1,rs10907175,1130727,C,A,951595.0,0.3313846158840952,0.21180000000000354,0.4236000000000071,,1.2486311872236304,0.8380752842791193 - 4837,1,1,rs10907176,1130855,C,T,951595.0,0.3455642965805482,0.2251000000000033,0.4502000000000066,,1.213078000845954,0.8380752842791193 - 4838,1,1,rs10907185,1733219,A,G,951595.0,0.3919109214945312,0.25000000000000333,0.5000000000000067,,-0.7010048315295425,1.1503493803760003 - 4839,1,1,rs10907187,1759054,A,G,951595.0,0.16161974795348924,0.09099999999999998,0.18199999999999997,,-0.8114627085037827,1.6901461375274702 - 4840,1,1,rs10907188,1768894,T,C,951595.0,0.15366147518120524,0.08599999999999992,0.17199999999999985,,-0.8149516264832889,1.7168860184310413 - 4841,1,1,rs10907190,1773772,A,G,951595.0,0.12979175667585227,0.07999999999999982,0.15999999999999964,,-0.938281041511616,1.7506860712521708 - 4842,1,1,rs10907193,1805391,A,G,951595.0,0.09562672355608258,0.06299999999999988,0.12599999999999975,,-1.0405165049626888,1.8591914944718688 - 4843,1,1,rs10907194,1712230,T,C,951595.0,0.2669995168398967,0.16000000000000425,0.3200000000000085,,-0.7600913211933399,1.4050715603096189 - x-openapi-router-controller: jass.controllers.default_controller - "/projects/{projectID}/globalmanhattan": - get: - description: | - Retrieve global manhattan plot for a given project - operationId: projects_project_id_global_manhattan_plot_get - parameters: - - name: projectID - in: path - description: project ID - required: true - schema: - type: string - example: "bca9d414e0f9a67b9e0d2131a47c316c" - responses: - "200": - description: | - Global manhattan plot in PNG format - content: - image/png: - schema: - type: string - title: Global manhattan plot - example: - externalValue: 'globalmanhattan_example.png' - x-openapi-router-controller: jass.controllers.default_controller - "/projects/{projectID}/zoomplot": - get: - description: | - Retrieve zoom plot for a given local project - operationId: projects_project_id_zoom_plot_get - parameters: - - name: projectID - in: path - description: project ID - required: true - schema: - type: string - responses: - "200": - description: | - Zoom plot in PNG format - content: - image/png: - schema: - type: string - title: Zoom plot - x-openapi-router-controller: jass.controllers.default_controller - "/projects/{projectID}/quadrant": - get: - description: | - Retrieve quadrant plot for a given project - operationId: projects_project_id_quadrant_plot_get - parameters: - - name: projectID - in: path - description: project ID - required: true - schema: - type: string - example: "bca9d414e0f9a67b9e0d2131a47c316c" - responses: - "200": - description: | - Quadrant plot in PNG format - content: - image/png: - schema: - type: string - title: Quadrant plot - example: - externalValue: 'quadrant_example.png' - x-openapi-router-controller: jass.controllers.default_controller - "/projects/{projectID}/manhattan/{chromosome}/{region}": - get: - description: Retrieve local manhattan data - operationId: projects_project_id_local_manhattan_data_get - parameters: - - name: projectID - in: path - description: project ID - required: true - schema: - type: string - example: "bca9d414e0f9a67b9e0d2131a47c316c" - - name: chromosome - in: path - description: chromosome number - required: true - schema: - type: string - example: "chr1" - - name: region - in: path - description: region number - required: true - schema: - type: string - example: "Region1" - responses: - "200": - description: | - Local manhattan plot data in CSV format - content: - text/csv; charset=utf-8: - schema: - type: string - title: Local manhattan plot data - example: | - Region,CHR,position,snp_ids,JASS_PVAL - Region1,chr1,636285,rs545945172,0.7216577092326355 - Region1,chr1,662414,rs371628865,0.6725666758876788 - Region1,chr1,662622,rs61769339,0.405683282952658 - Region1,chr1,665266,rs539032812,0.3348008598497511 - Region1,chr1,693731,rs12238997,0.4952464035829177 - Region1,chr1,701835,rs189800799,0.666563580963709 - Region1,chr1,706778,rs113462541,0.861641963454806 - Region1,chr1,711310,rs200531508,0.07463266395489108 - Region1,chr1,714019,rs114983708,0.6667198743938074 - Region1,chr1,715265,rs12184267,0.6666055494294745 - Region1,chr1,715367,rs12184277,0.7657858702655146 - Region1,chr1,720381,rs116801199,0.6816390671665746 - Region1,chr1,723742,rs28375378,0.7124933618852456 - Region1,chr1,724324,rs28692873,0.9212425499680825 - Region1,chr1,725196,rs377099097,0.594983644175122 - Region1,chr1,725389,rs375619475,0.7032290172253173 - Region1,chr1,727841,rs116587930,0.9078685880041112 - x-openapi-router-controller: jass.controllers.default_controller - "/projects/{projectID}/heatmap/{chromosome}/{region}": - get: - description: Retrieve local heatmap data - operationId: projects_project_id_local_heatmap_data_get - parameters: - - name: projectID - in: path - description: project ID - required: true - schema: - type: string - example: "bca9d414e0f9a67b9e0d2131a47c316c" - - name: chromosome - in: path - description: chromosome number - required: true - schema: - type: string - example: "chr1" - - name: region - in: path - description: region number - required: true - schema: - type: string - example: "Region1" - responses: - "200": - description: | - Local manhattan plot data in CSV format - content: - text/csv; charset=utf-8: - schema: - type: string - title: Local heatmap plot data - example: | - ID,rs545945172,rs371628865,rs61769339,rs539032812,rs12238997,rs189800799 - z_IHEC_MONOP,-0.3623372836601329,-0.429856541533544,-0.8457360635272954,-0.9809852811227732,-0.6936527568935886,0.4382385293216385 - z_RA_RA,,,,,, - x-openapi-router-controller: jass.controllers.default_controller - "/projects/{projectID}/zoom_manhattan": - get: - description: Retrieve local manhattan data - operationId: projects_project_id_zoom_manhattan_data_get - parameters: - - name: projectID - in: path - description: project ID - required: true - schema: - type: string - example: "bca9d414e0f9a67b9e0d2131a47c316c" - responses: - "200": - description: | - Local manhattan plot data in CSV format - content: - text/csv; charset=utf-8: - schema: - type: string - title: Local manhattan plot data - example: | - Region,CHR,position,snp_ids,JASS_PVAL - Region1,chr1,636285,rs545945172,0.7216577092326355 - Region1,chr1,662414,rs371628865,0.6725666758876788 - Region1,chr1,662622,rs61769339,0.405683282952658 - Region1,chr1,665266,rs539032812,0.3348008598497511 - Region1,chr1,693731,rs12238997,0.4952464035829177 - Region1,chr1,701835,rs189800799,0.666563580963709 - Region1,chr1,706778,rs113462541,0.861641963454806 - Region1,chr1,711310,rs200531508,0.07463266395489108 - Region1,chr1,714019,rs114983708,0.6667198743938074 - Region1,chr1,715265,rs12184267,0.6666055494294745 - Region1,chr1,715367,rs12184277,0.7657858702655146 - Region1,chr1,720381,rs116801199,0.6816390671665746 - Region1,chr1,723742,rs28375378,0.7124933618852456 - Region1,chr1,724324,rs28692873,0.9212425499680825 - Region1,chr1,725196,rs377099097,0.594983644175122 - Region1,chr1,725389,rs375619475,0.7032290172253173 - Region1,chr1,727841,rs116587930,0.9078685880041112 - x-openapi-router-controller: jass.controllers.default_controller - "/projects/{projectID}/zoom_heatmap": - get: - description: Retrieve local heatmap data - operationId: projects_project_id_zoom_heatmap_data_get - parameters: - - name: projectID - in: path - description: project ID - required: true - schema: - type: string - example: "bca9d414e0f9a67b9e0d2131a47c316c" - responses: - "200": - description: | - Local manhattan plot data in CSV format - content: - text/csv; charset=utf-8: - schema: - type: string - title: Local heatmap plot data - example: | - ID,rs545945172,rs371628865,rs61769339,rs539032812,rs12238997,rs189800799 - z_IHEC_MONOP,-0.3623372836601329,-0.429856541533544,-0.8457360635272954,-0.9809852811227732,-0.6936527568935886,0.4382385293216385 - z_RA_RA,,,,,, - x-openapi-router-controller: jass.controllers.default_controller -components: - schemas: - Phenotype: - properties: - id: - type: string - consortium: - type: string - outcome: - type: string - full_name: - type: string - type: - type: string - ref: - type: string - ref_link: - type: string - data_link: - type: string - data_path: - type: string - Project: - properties: - id: - type: string - status: - type: string - progress: - type: string - outcome: - type: array - items: - $ref: "#/components/schemas/Phenotype" - +openapi: 3.0.0 +info: + version: 0.0.0 + title: JASS API Specification +paths: + /phenotypes: + get: + description: | + Gets the list of available phenotypes + operationId: phenotypes_get + responses: + "200": + description: List of the available phenotypes + content: + "application/json": + schema: + type: array + title: ArrayOfPhenotypes + items: + $ref: "#/components/schemas/Phenotype" + example: + - "consortium": "IHEC" + "data_link": "http://www.bloodcellgenetics.org" + "full_name": "Monocyte percentage of white cells" + "id": "z_IHEC_MONOP" + "outcome": "MONOP" + "ref": " Astle et al. 2016" + "ref_link": "https://www.ncbi.nlm.nih.gov/pubmed/27863262" + "type": "Cellular" + - "consortium": "RA" + "data_link": "http://plaza.umin.ac.jp/~yokada/datasource/software.htm" + "full_name": "Rheumatoid Arthritis" + "id": "z_RA_RA" + "outcome": "RA" + "ref": "Okada et al. 2014" + "ref_link": "https://www.ncbi.nlm.nih.gov/pubmed/24390342" + "type": "Immunity" + /projects: + post: + description: | + Create a new project from a selection of phenotypes + operationId: projects_post + requestBody: + content: + application/x-www-form-urlencoded: + schema: + type: object + properties: + phenotypeID: + description: IDs of the phenotypes selected for the project + type: array + items: + type: string + required: + - phenotypeID + example: + - z_IHEC_MONOP + - z_RA_RA + responses: + "200": + description: Project created + content: + "application/json": + schema: + $ref: "#/components/schemas/Phenotype" + examples: + Creating: + value: + id: "bca9d414e0f9a67b9e0d2131a47c316c" + phenotypes: + - "consortium": "IHEC" + "data_link": "http://www.bloodcellgenetics.org" + "full_name": "Monocyte percentage of white cells" + "id": "z_IHEC_MONOP" + "outcome": "MONOP" + "ref": " Astle et al. 2016" + "ref_link": "https://www.ncbi.nlm.nih.gov/pubmed/27863262" + "type": "Cellular" + - "consortium": "RA" + "data_link": "http://plaza.umin.ac.jp/~yokada/datasource/software.htm" + "full_name": "Rheumatoid Arthritis" + "id": "z_RA_RA" + "outcome": "RA" + "ref": "Okada et al. 2014" + "ref_link": "https://www.ncbi.nlm.nih.gov/pubmed/24390342" + "type": "Immunity" + status": + "global_manhattan": "CREATING" + "quadrant_plot_status": "CREATING" + "worktable": "CREATING" + progress": + "progress": "0" + Ready: + value: + id: "bca9d414e0f9a67b9e0d2131a47c316c" + phenotypes: + - "consortium": "IHEC" + "data_link": "http://www.bloodcellgenetics.org" + "full_name": "Monocyte percentage of white cells" + "id": "z_IHEC_MONOP" + "outcome": "MONOP" + "ref": " Astle et al. 2016" + "ref_link": "https://www.ncbi.nlm.nih.gov/pubmed/27863262" + "type": "Cellular" + - "consortium": "RA" + "data_link": "http://plaza.umin.ac.jp/~yokada/datasource/software.htm" + "full_name": "Rheumatoid Arthritis" + "id": "z_RA_RA" + "outcome": "RA" + "ref": "Okada et al. 2014" + "ref_link": "https://www.ncbi.nlm.nih.gov/pubmed/24390342" + "type": "Immunity" + status": + "global_manhattan": "READY" + "quadrant_plot_status": "READY" + "worktable": "READY" + progress": + "progress": "100" + /local_project: + post: + description: | + Create a new local project from a selection of phenotypes + operationId: local_project_post + requestBody: + content: + application/x-www-form-urlencoded: + schema: + type: object + properties: + phenotypeID: + description: IDs of the phenotypes selected for the project + type: array + items: + type: string + chromosome: + description: chromosome number + type: string + start: + description: start position of the region + type: string + end: + description: end position of the region + type: string + required: + - phenotypeID + - chromosome + - start + - end + responses: + "200": + description: Project created + content: + "application/json": + schema: + $ref: "#/components/schemas/Phenotype" + "/projects/{projectID}": + get: + description: | + Retrieve a project definition + operationId: projects_project_idget + parameters: + - name: projectID + in: path + description: project ID + required: true + schema: + type: string + example: "bca9d414e0f9a67b9e0d2131a47c316c" + responses: + "200": + description: Retrieved project + content: + "application/json": + schema: + $ref: "#/components/schemas/Phenotype" + example: + id: "bca9d414e0f9a67b9e0d2131a47c316c" + status": + "global_manhattan": "READY" + "quadrant_plot_status": "READY" + "worktable": "READY" + progress": + "progress": "100" + "/projects/{projectID}/summary": + get: + description: Retrieve summary statistics for a given project + operationId: projects_project_id_summary_statistics + parameters: + - name: projectID + in: path + description: project ID + required: true + schema: + type: string + example: "bca9d414e0f9a67b9e0d2131a47c316c" + responses: + "200": + description: Summary statistics in JSON + content: + "*/*": + schema: + type: string + title: Project summary statistics + example: + "JOSTSignif": + "NoPhenoSignif": 10 + "PhenoSignif": 210 + "NoJOSTSignif": + "NoPhenoSignif": 1470 + "PhenoSignif": 14 + "/projects/{projectID}/csv_status": + get: + description: | + Retrieve the generation status of the genome full csv file + operationId: projects_project_id_csv_status_get + parameters: + - name: projectID + in: path + description: project ID + required: true + schema: + type: string + example: "bca9d414e0f9a67b9e0d2131a47c316c" + responses: + "200": + description: | + Generation status of the genome full csv file + content: + text/csv; charset=utf-8: + schema: + type: string + title: csv_file_generation + example: | + READY + "/projects/{projectID}/genome": + get: + description: | + Retrieve whole genome summary data for a given project + operationId: projects_project_id_genome_get + parameters: + - name: projectID + in: path + description: project ID + required: true + schema: + type: string + example: "bca9d414e0f9a67b9e0d2131a47c316c" + responses: + "200": + description: | + Whole genome summary data in CSV format + content: + text/csv; charset=utf-8: + schema: + type: string + title: Genome data + example: | + Region,MiddlePosition,snp_ids,CHR,position,Ref_allele,Alt_allele,JOSTmin,UNIVARIATE_MIN_PVAL,signif_status,z_IHEC_MONOP,z_RA_RA + Region0,0.0,rs61997853,chr15,20001087,C,A,0.3085010612493116,0.300300000000003,None,-1.0357902654077036, + Region1,951595.0,rs28635530,chr1,1649392,T,C,4.038788020606384e-06,2.7559999999999873e-06,None,4.688213804974398,2.999976992703393 + Region2,2737671.5,rs72644697,chr1,2533605,A,G,2.4600636176583336e-10,1.6188949607934016e-10,Both,-1.4356568827860683,-6.393727818324495 + Region3,3981773.5,rs12145992,chr1,3760309,A,G,0.0002538976722549933,0.00026034845141981964,None,-1.6164363711150218,3.651859369008055 + Region4,5147352.0,rs2649072,chr1,5754088,G,A,0.0007863952492527496,0.0006378999999999985,None,3.4149658871961184,-2.999976992703393 + Region5,6580614.0,rs2986741,chr1,6548774,G,A,0.0013472918321710914,0.0011119999999999993,None,-3.260540717377886,2.726551316504396 + Region6,8306267.0,rs79412885,chr1,9241839,A,G,2.0889091093474285e-13,8.106999999999937e-14,Both,7.46857160133221,-1.2003588580308502 + Region7,10086091.5,rs113829298,chr1,10061038,T,C,4.3158209846991565e-05,6.135999999999996e-06,None,-4.5216481219798474,0.5100734569685951 + "/projects/{projectID}/genome_full": + get: + description: | + Retrieve whole genome complete (not summarized) data for a given project + operationId: projects_project_id_genome_full_get + parameters: + - name: projectID + in: path + description: project ID + required: true + schema: + type: string + example: "bca9d414e0f9a67b9e0d2131a47c316c" + responses: + "200": + description: > + Retrieve whole genome complete (not summarized) data for a given + project in CSV format + content: + text/csv; charset=utf-8: + schema: + type: string + title: Genome data + example: | + Region,CHR,snp_ids,position,Ref_allele,Alt_allele,MiddlePosition,JASS_PVAL,UNIVARIATE_MIN_PVAL,UNIVARIATE_MIN_QVAL,PLEIOTROPY_INDEX,z_IHEC_MONOP,z_RA_RA + 194218,0,6,rs530120680,63980,G,A,0.0,0.6556994470547299,0.6501999999999999,0.6501999999999999,,0.4534843639637209, + 194219,0,6,rs561313667,63979,T,C,0.0,0.538722344719537,0.5318,0.5318,,0.6252606526209208, + 194220,0,15,rs113772187,20000538,T,C,0.0,0.5218942699938458,0.5148000000000001,0.5148000000000001,,-0.651381887083556, + 194221,0,15,rs61997852,20001079,C,A,0.0,0.33819737748654505,0.33000000000000274,0.33000000000000274,,-0.9741138770593036, + 194222,0,15,rs61997853,20001087,C,A,0.0,0.3085010612493116,0.300300000000003,0.300300000000003,,-1.0357902654077036, + 867,1,1,rs10454464,1833495,A,G,951595.0,0.2523888759643953,0.19000000000000383,0.38000000000000766,,-0.9817967289175548,1.3105791121681174 + 4836,1,1,rs10907175,1130727,C,A,951595.0,0.3313846158840952,0.21180000000000354,0.4236000000000071,,1.2486311872236304,0.8380752842791193 + 4837,1,1,rs10907176,1130855,C,T,951595.0,0.3455642965805482,0.2251000000000033,0.4502000000000066,,1.213078000845954,0.8380752842791193 + 4838,1,1,rs10907185,1733219,A,G,951595.0,0.3919109214945312,0.25000000000000333,0.5000000000000067,,-0.7010048315295425,1.1503493803760003 + 4839,1,1,rs10907187,1759054,A,G,951595.0,0.16161974795348924,0.09099999999999998,0.18199999999999997,,-0.8114627085037827,1.6901461375274702 + 4840,1,1,rs10907188,1768894,T,C,951595.0,0.15366147518120524,0.08599999999999992,0.17199999999999985,,-0.8149516264832889,1.7168860184310413 + 4841,1,1,rs10907190,1773772,A,G,951595.0,0.12979175667585227,0.07999999999999982,0.15999999999999964,,-0.938281041511616,1.7506860712521708 + 4842,1,1,rs10907193,1805391,A,G,951595.0,0.09562672355608258,0.06299999999999988,0.12599999999999975,,-1.0405165049626888,1.8591914944718688 + 4843,1,1,rs10907194,1712230,T,C,951595.0,0.2669995168398967,0.16000000000000425,0.3200000000000085,,-0.7600913211933399,1.4050715603096189 + "/projects/{projectID}/globalmanhattan": + get: + description: | + Retrieve global manhattan plot for a given project + operationId: projects_project_id_global_manhattan_plot_get + parameters: + - name: projectID + in: path + description: project ID + required: true + schema: + type: string + example: "bca9d414e0f9a67b9e0d2131a47c316c" + responses: + "200": + description: | + Global manhattan plot in PNG format + content: + image/png: + schema: + type: string + title: Global manhattan plot + example: + externalValue: "globalmanhattan_example.png" + "/projects/{projectID}/zoomplot": + get: + description: | + Retrieve zoom plot for a given local project + operationId: projects_project_id_zoom_plot_get + parameters: + - name: projectID + in: path + description: project ID + required: true + schema: + type: string + responses: + "200": + description: | + Zoom plot in PNG format + content: + image/png: + schema: + type: string + title: Zoom plot + "/projects/{projectID}/quadrant": + get: + description: | + Retrieve quadrant plot for a given project + operationId: projects_project_id_quadrant_plot_get + parameters: + - name: projectID + in: path + description: project ID + required: true + schema: + type: string + example: "bca9d414e0f9a67b9e0d2131a47c316c" + responses: + "200": + description: | + Quadrant plot in PNG format + content: + image/png: + schema: + type: string + title: Quadrant plot + example: + externalValue: "quadrant_example.png" + "/projects/{projectID}/manhattan/{chromosome}/{region}": + get: + description: Retrieve local manhattan data + operationId: projects_project_id_local_manhattan_data_get + parameters: + - name: projectID + in: path + description: project ID + required: true + schema: + type: string + example: "bca9d414e0f9a67b9e0d2131a47c316c" + - name: chromosome + in: path + description: chromosome number + required: true + schema: + type: string + example: "chr1" + - name: region + in: path + description: region number + required: true + schema: + type: string + example: "Region1" + responses: + "200": + description: | + Local manhattan plot data in CSV format + content: + text/csv; charset=utf-8: + schema: + type: string + title: Local manhattan plot data + example: | + Region,CHR,position,snp_ids,JASS_PVAL + Region1,chr1,636285,rs545945172,0.7216577092326355 + Region1,chr1,662414,rs371628865,0.6725666758876788 + Region1,chr1,662622,rs61769339,0.405683282952658 + Region1,chr1,665266,rs539032812,0.3348008598497511 + Region1,chr1,693731,rs12238997,0.4952464035829177 + Region1,chr1,701835,rs189800799,0.666563580963709 + Region1,chr1,706778,rs113462541,0.861641963454806 + Region1,chr1,711310,rs200531508,0.07463266395489108 + Region1,chr1,714019,rs114983708,0.6667198743938074 + Region1,chr1,715265,rs12184267,0.6666055494294745 + Region1,chr1,715367,rs12184277,0.7657858702655146 + Region1,chr1,720381,rs116801199,0.6816390671665746 + Region1,chr1,723742,rs28375378,0.7124933618852456 + Region1,chr1,724324,rs28692873,0.9212425499680825 + Region1,chr1,725196,rs377099097,0.594983644175122 + Region1,chr1,725389,rs375619475,0.7032290172253173 + Region1,chr1,727841,rs116587930,0.9078685880041112 + "/projects/{projectID}/heatmap/{chromosome}/{region}": + get: + description: Retrieve local heatmap data + operationId: projects_project_id_local_heatmap_data_get + parameters: + - name: projectID + in: path + description: project ID + required: true + schema: + type: string + example: "bca9d414e0f9a67b9e0d2131a47c316c" + - name: chromosome + in: path + description: chromosome number + required: true + schema: + type: string + example: "chr1" + - name: region + in: path + description: region number + required: true + schema: + type: string + example: "Region1" + responses: + "200": + description: | + Local manhattan plot data in CSV format + content: + text/csv; charset=utf-8: + schema: + type: string + title: Local heatmap plot data + example: | + ID,rs545945172,rs371628865,rs61769339,rs539032812,rs12238997,rs189800799 + z_IHEC_MONOP,-0.3623372836601329,-0.429856541533544,-0.8457360635272954,-0.9809852811227732,-0.6936527568935886,0.4382385293216385 + z_RA_RA,,,,,, + "/projects/{projectID}/zoom_manhattan": + get: + description: Retrieve local manhattan data + operationId: projects_project_id_zoom_manhattan_data_get + parameters: + - name: projectID + in: path + description: project ID + required: true + schema: + type: string + example: "bca9d414e0f9a67b9e0d2131a47c316c" + responses: + "200": + description: | + Local manhattan plot data in CSV format + content: + text/csv; charset=utf-8: + schema: + type: string + title: Local manhattan plot data + example: | + Region,CHR,position,snp_ids,JASS_PVAL + Region1,chr1,636285,rs545945172,0.7216577092326355 + Region1,chr1,662414,rs371628865,0.6725666758876788 + Region1,chr1,662622,rs61769339,0.405683282952658 + Region1,chr1,665266,rs539032812,0.3348008598497511 + Region1,chr1,693731,rs12238997,0.4952464035829177 + Region1,chr1,701835,rs189800799,0.666563580963709 + Region1,chr1,706778,rs113462541,0.861641963454806 + Region1,chr1,711310,rs200531508,0.07463266395489108 + Region1,chr1,714019,rs114983708,0.6667198743938074 + Region1,chr1,715265,rs12184267,0.6666055494294745 + Region1,chr1,715367,rs12184277,0.7657858702655146 + Region1,chr1,720381,rs116801199,0.6816390671665746 + Region1,chr1,723742,rs28375378,0.7124933618852456 + Region1,chr1,724324,rs28692873,0.9212425499680825 + Region1,chr1,725196,rs377099097,0.594983644175122 + Region1,chr1,725389,rs375619475,0.7032290172253173 + Region1,chr1,727841,rs116587930,0.9078685880041112 + "/projects/{projectID}/zoom_heatmap": + get: + description: Retrieve local heatmap data + operationId: projects_project_id_zoom_heatmap_data_get + parameters: + - name: projectID + in: path + description: project ID + required: true + schema: + type: string + example: "bca9d414e0f9a67b9e0d2131a47c316c" + responses: + "200": + description: | + Local manhattan plot data in CSV format + content: + text/csv; charset=utf-8: + schema: + type: string + title: Local heatmap plot data + example: | + ID,rs545945172,rs371628865,rs61769339,rs539032812,rs12238997,rs189800799 + z_IHEC_MONOP,-0.3623372836601329,-0.429856541533544,-0.8457360635272954,-0.9809852811227732,-0.6936527568935886,0.4382385293216385 + z_RA_RA,,,,,, +components: + schemas: + Phenotype: + properties: + id: + type: string + consortium: + type: string + outcome: + type: string + full_name: + type: string + type: + type: string + ref: + type: string + ref_link: + type: string + data_link: + type: string + data_path: + type: string + Project: + properties: + id: + type: string + status: + type: string + progress: + type: string + outcome: + type: array + items: + $ref: "#/components/schemas/Phenotype" diff --git a/jass/tasks.py b/jass/tasks.py index dede914577cb4a755bbb35f2b9bc1e840c89d504..a17a8bcae4dc1be36545778c9e58d6ce16ffa671 100644 --- a/jass/tasks.py +++ b/jass/tasks.py @@ -9,24 +9,25 @@ from flask import Flask import jass.models.project from jass.models.project import Project, get_file_building_tb_path -from jass.models.plots import (create_global_plot, - create_local_plot, - create_quadrant_plot, - create_qq_plot) +from jass.models.plots import ( + create_global_plot, + create_local_plot, + create_quadrant_plot, + create_qq_plot, +) + +from jass.models.worktable import create_worktable_file, create_genome_full_csv -from jass.models.worktable import (create_worktable_file, - create_genome_full_csv) - from jass.models.phenotype import Phenotype from jass.config import config def make_celery(app): celery = Celery() - if 'CELERY_CONFIG_MODULE' in os.environ: - celery.config_from_envvar('CELERY_CONFIG_MODULE') + if "CELERY_CONFIG_MODULE" in os.environ: + celery.config_from_envvar("CELERY_CONFIG_MODULE") else: - celery.config_from_object('jass.celeryconfig') + celery.config_from_object("jass.celeryconfig") celery.conf.update(app.config) TaskBase = celery.Task @@ -41,45 +42,46 @@ def make_celery(app): return celery - flask_app = Flask(__name__) celery = make_celery(flask_app) + @celery.task def create_project_worktable_file( - phenotype_ids, - init_file_path, - project_hdf_path, - remove_nan, - stat, - optim_na, - csv_file, - chunk_size, - significance_treshold, - post_filtering, - delayed_gen_csv_file, - chromosome, - pos_Start, - pos_End, - custom_loadings): + phenotype_ids, + init_file_path, + project_hdf_path, + remove_nan, + stat, + optim_na, + csv_file, + chunk_size, + significance_treshold, + post_filtering, + delayed_gen_csv_file, + chromosome, + pos_Start, + pos_End, + custom_loadings, +): try: return create_worktable_file( - phenotype_ids = phenotype_ids, - init_file_path = init_file_path, - project_hdf_path = project_hdf_path, - remove_nan = remove_nan, - stat = stat, - optim_na = True, - csv_file = csv_file, - chunk_size = chunk_size, - significance_treshold = significance_treshold, - post_filtering = post_filtering, - delayed_gen_csv_file = delayed_gen_csv_file, - chromosome = chromosome, - pos_Start = pos_Start, - pos_End = pos_End, - custom_loadings = custom_loadings - ) + phenotype_ids=phenotype_ids, + init_file_path=init_file_path, + project_hdf_path=project_hdf_path, + remove_nan=remove_nan, + stat=stat, + optim_na=True, + csv_file=csv_file, + chunk_size=chunk_size, + significance_treshold=significance_treshold, + post_filtering=post_filtering, + delayed_gen_csv_file=delayed_gen_csv_file, + chromosome=chromosome, + pos_Start=pos_Start, + pos_End=pos_End, + custom_loadings=custom_loadings, + ) except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() log_path = get_file_building_tb_path(project_hdf_path) @@ -147,77 +149,78 @@ def create_project_csv_file(Nchunk, worktable_path, csv_file_path): traceback.print_exception(exc_type, exc_value, exc_traceback, file=log_fh) log_fh.close() + def launch_create_project( - phenotype_ids, - init_table_path, + phenotype_ids, + init_table_path, worktable_path, - remove_nan = False, - stat = "jass.models.stats:omnibus_stat", - csv_file = None, - chunk_size = 50, - significance_treshold = 5*10**-8, - post_filtering = True, - delayed_gen_csv_file = False, - chromosome = None, - start = None, - end = None, - custom_loadings = None, - global_plot_path = None, - quadrant_plot_path = None, - zoom_plot_path = None, - qq_plot_path = None - ): + remove_nan=False, + stat="jass.models.stats:omnibus_stat", + csv_file=None, + chunk_size=50, + significance_treshold=5 * 10 ** -8, + post_filtering=True, + delayed_gen_csv_file=False, + chromosome=None, + start=None, + end=None, + custom_loadings=None, + global_plot_path=None, + quadrant_plot_path=None, + zoom_plot_path=None, + qq_plot_path=None, +): post_worktable_jobs = [] - if (global_plot_path is not None): + if global_plot_path is not None: post_worktable_jobs.append( create_project_global_plot.si(worktable_path, global_plot_path) ) - if (quadrant_plot_path is not None): + if quadrant_plot_path is not None: post_worktable_jobs.append( create_project_quadrant_plot.si(worktable_path, quadrant_plot_path), ) - if (zoom_plot_path is not None): + if zoom_plot_path is not None: post_worktable_jobs.append( create_project_zoom_plot.si(worktable_path, zoom_plot_path) ) - if (qq_plot_path is not None): + if qq_plot_path is not None: post_worktable_jobs.append( create_project_qq_plot.si(worktable_path, qq_plot_path) ) - if (delayed_gen_csv_file and (csv_file is not None)): - post_worktable_jobs.append( - create_project_csv_file.s(worktable_path, csv_file) - ) + if delayed_gen_csv_file and (csv_file is not None): + post_worktable_jobs.append(create_project_csv_file.s(worktable_path, csv_file)) post_worktable_tasks_group = group(post_worktable_jobs) - main_wf = chain(create_project_worktable_file.si( - phenotype_ids = phenotype_ids, - init_file_path = init_table_path, - project_hdf_path = worktable_path, - remove_nan = remove_nan, - stat = stat, - optim_na = True, - csv_file = csv_file, - chunk_size = chunk_size, - significance_treshold = significance_treshold, - post_filtering = post_filtering, - delayed_gen_csv_file = delayed_gen_csv_file, - chromosome = chromosome, - pos_Start = start, - pos_End = end, - custom_loadings = custom_loadings - ), - post_worktable_tasks_group) + main_wf = chain( + create_project_worktable_file.si( + phenotype_ids=phenotype_ids, + init_file_path=init_table_path, + project_hdf_path=worktable_path, + remove_nan=remove_nan, + stat=stat, + optim_na=True, + csv_file=csv_file, + chunk_size=chunk_size, + significance_treshold=significance_treshold, + post_filtering=post_filtering, + delayed_gen_csv_file=delayed_gen_csv_file, + chromosome=chromosome, + pos_Start=start, + pos_End=end, + custom_loadings=custom_loadings, + ), + post_worktable_tasks_group, + ) main_wf.delay() def create_project( - phenotype_ids: List[str], + phenotype_ids: List[str], available_phenotypes: List[Phenotype], chromosome: str = None, start: str = None, - end: str = None - ): - + end: str = None, +): + available_phenotype_ids = [phenotype.id for phenotype in available_phenotypes] unavailable_requested_ids = set(phenotype_ids).difference( set(available_phenotype_ids) @@ -227,12 +230,11 @@ def create_project( phenotypes = [ phenotype for phenotype in available_phenotypes if phenotype.id in phenotype_ids ] - project = Project(phenotypes = phenotypes, - chromosome = chromosome, - start = start, - end = end) - - if (project.get_type_of_analysis() == Project.LOCAL_ANALYSIS): + project = Project( + phenotypes=phenotypes, chromosome=chromosome, start=start, end=end + ) + + if project.get_type_of_analysis() == Project.LOCAL_ANALYSIS: # Local Analysis global_plot_path = None quadrant_plot_path = None @@ -244,21 +246,21 @@ def create_project( quadrant_plot_path = project.get_quadrant_plot_path() zoom_plot_path = None delayed_gen_csv_file = True - + # if project does not exist if project.status == Project.DOES_NOT_EXIST: os.makedirs(project.get_folder_path()) launch_create_project( - phenotype_ids = phenotype_ids, - init_table_path = os.path.join(config["DATA_DIR"], "initTable.hdf5"), - worktable_path = project.get_worktable_path(), - csv_file = project.get_csv_path(), - global_plot_path = global_plot_path, - quadrant_plot_path = quadrant_plot_path, - zoom_plot_path = zoom_plot_path, - delayed_gen_csv_file = delayed_gen_csv_file, - chromosome = chromosome, - start = start, - end = end + phenotype_ids=phenotype_ids, + init_table_path=os.path.join(config["DATA_DIR"], "initTable.hdf5"), + worktable_path=project.get_worktable_path(), + csv_file=project.get_csv_path(), + global_plot_path=global_plot_path, + quadrant_plot_path=quadrant_plot_path, + zoom_plot_path=zoom_plot_path, + delayed_gen_csv_file=delayed_gen_csv_file, + chromosome=chromosome, + start=start, + end=end, ) - return project \ No newline at end of file + return project diff --git a/jass/test/__init__.py b/jass/test/__init__.py index 86f967d82f02120c4f61393f690934c58972ad34..5dae7a40b36352ec4c95c25fbbf67ca6a7c985fd 100644 --- a/jass/test/__init__.py +++ b/jass/test/__init__.py @@ -1,12 +1,21 @@ import unittest -from ..encoder import JSONEncoder import logging import os, shutil, tempfile import unittest -import connexion + import flask_testing + +# replace delay() and si() method with mocks +# to avoid freezing calls in unit tests +from celery.app.task import Task +from unittest.mock import MagicMock + +Task.delay = MagicMock() +Task.si = MagicMock() + +from jass.server import jass_app from jass.models.inittable import create_inittable_file @@ -20,16 +29,17 @@ class JassTestCase(unittest.TestCase): class JassFlaskTestCase(JassTestCase, flask_testing.TestCase): def create_app(self): - logging.getLogger("connexion.operation").setLevel("ERROR") from jass.config import config self.test_dir = tempfile.mkdtemp() config["DATA_DIR"] = self.test_dir shutil.copy(self.get_file_path_fn("initTable.hdf5"), self.test_dir) - app = connexion.App(__name__, specification_dir="../swagger/") - app.app.json_encoder = JSONEncoder - app.add_api("swagger.yaml") - return app.app + + self.jass_app = jass_app + application = self.jass_app.create_app() + application.config["TESTING"] = True + self.testing_client = application.test_client() + return application def tearDown(self): shutil.rmtree(self.test_dir) diff --git a/jass/test/test_default_controller.py b/jass/test/test_default_controller.py deleted file mode 100644 index 839f48375abe67b5f4acc4fd5e9623ca0790f032..0000000000000000000000000000000000000000 --- a/jass/test/test_default_controller.py +++ /dev/null @@ -1,31 +0,0 @@ -# coding: utf-8 - -from __future__ import absolute_import -import os, shutil, tempfile - -from six import BytesIO -from flask import json - -from jass.config import config -from . import JassFlaskTestCase - - -class TestDefaultController(JassFlaskTestCase): - """ DefaultController integration test stubs """ - - test_folder = "data_test1" - - def test_phenotypes_get(self): - """ - Test case for phenotypes_get - - - """ - response = self.client.open("/phenotypes", method="GET") - self.assert200(response, "Response body is : " + response.data.decode("utf-8")) - - -if __name__ == "__main__": - import unittest - - unittest.main() diff --git a/jass/test/test_init_table.py b/jass/test/test_init_table.py index 5df8b9abe7b6a08de3031642a0952bd99ec925c5..d0a428f2a435095861a9aad5f20712b7cbe96eb5 100644 --- a/jass/test/test_init_table.py +++ b/jass/test/test_init_table.py @@ -40,20 +40,24 @@ class TestInitTable(object): def tearDown(self): # Remove the directory after the test - #shutil.rmtree(self.test_dir) + # shutil.rmtree(self.test_dir) print(self.test_dir) def test_compare_phenolist(self): """ Compare result and expected PhenoList """ - assert_frame_equal(self.expected_phenolist, self.result_phenolist, check_like=True) + assert_frame_equal( + self.expected_phenolist, self.result_phenolist, check_like=True + ) def test_compare_sumstattab(self): """ Compare result and expected SumStatTab """ - assert_frame_equal(self.expected_sum_stat_tab, self.result_sum_stat_tab, check_like=True) + assert_frame_equal( + self.expected_sum_stat_tab, self.result_sum_stat_tab, check_like=True + ) def test_compare_cov(self): """ diff --git a/jass/test/test_plots.py b/jass/test/test_plots.py index 9bdbdf0fb7dd42ff4e24493dd763c2ae0a1ad3f9..7748ccbabb60dfe47cb3a3e260d4af3fda9fd3c3 100644 --- a/jass/test/test_plots.py +++ b/jass/test/test_plots.py @@ -32,6 +32,7 @@ class TestPlots(JassTestCase): """ create_global_plot(self.worktable_hdf_path, self.global_plot_path) + if __name__ == "__main__": import unittest diff --git a/jass/test/test_server.py b/jass/test/test_server.py new file mode 100644 index 0000000000000000000000000000000000000000..8547712218608cbc2abb5ba1ae6236c7197870c5 --- /dev/null +++ b/jass/test/test_server.py @@ -0,0 +1,40 @@ +# coding: utf-8 + +from __future__ import absolute_import +import os, shutil, tempfile + +from six import BytesIO +from flask import json, url_for + +from jass.config import config +from . import JassFlaskTestCase + + +class TestDefaultController(JassFlaskTestCase): + """DefaultController integration test stubs""" + + test_folder = "data_test1" + + def test_phenotypes_get(self): + """ + Test case retrieving available phenotypes + """ + response = self.testing_client.open("/api/phenotypes", method="GET") + self.assert200(response, "Response body is : " + response.data.decode("utf-8")) + + def test_create_project(self): + """ + Test case for creating a project + """ + response = self.testing_client.open( + "/api/projects", + method="POST", + data={"phenotypeID": "z_IHEC_MONOP,z_RA_RA"}, + ) + self.assert200(response, "Response body is : " + response.data.decode("utf-8")) + + +if __name__ == "__main__": + import unittest + + unittest.main() diff --git a/jass/test/test_worktable.py b/jass/test/test_worktable.py index bed2941a036ee945616cf08f1e4b5074dcde4758..5e9b3ce23e2e7802c6fa73754a52ed13b72ba676 100644 --- a/jass/test/test_worktable.py +++ b/jass/test/test_worktable.py @@ -47,19 +47,25 @@ class TestWorkTable(object): """ Compare result and expected SumStatJostTab """ - assert_frame_equal(self.expected_sumstatjosttab, self.result_sumstatjosttab, check_like=True) + assert_frame_equal( + self.expected_sumstatjosttab, self.result_sumstatjosttab, check_like=True + ) def test_compare_regionsubtable(self): """ Compare result and expected RegionSubTable """ - assert_frame_equal(self.expected_regionsubtable, self.result_regionsubtable, check_like=True) + assert_frame_equal( + self.expected_regionsubtable, self.result_regionsubtable, check_like=True + ) def test_compare_summarytable(self): """ Compare result and expected SummaryTable """ - assert_frame_equal(self.expected_summarytable, self.result_summarytable, check_like=True) + assert_frame_equal( + self.expected_summarytable, self.result_summarytable, check_like=True + ) def test_compare_subcov(self): """ @@ -71,8 +77,9 @@ class TestWorkTable(object): """ Compare result and expected PhenoList """ - assert_frame_equal(self.expected_phenolist, self.result_phenolist, check_like=True) - + assert_frame_equal( + self.expected_phenolist, self.result_phenolist, check_like=True + ) phenotypes_disney = ["z_DISNEY_POCAHONT", "z_DISNEY_RATATOUY"] diff --git a/jass/util.py b/jass/util.py index 4e0189d061b34135f812df61a1365fa1bb6d666c..6c0445b46bc57465be84d8a9a1c2ed98c85a51d3 100644 --- a/jass/util.py +++ b/jass/util.py @@ -68,6 +68,7 @@ def deserialize_date(string): """ try: from dateutil.parser import parse + return parse(string).date() except ImportError: return string @@ -85,6 +86,7 @@ def deserialize_datetime(string): """ try: from dateutil.parser import parse + return parse(string) except ImportError: return string @@ -104,9 +106,11 @@ def deserialize_model(data, klass): return data for attr, attr_type in six.iteritems(instance.swagger_types): - if data is not None \ - and instance.attribute_map[attr] in data \ - and isinstance(data, (list, dict)): + if ( + data is not None + and instance.attribute_map[attr] in data + and isinstance(data, (list, dict)) + ): value = data[instance.attribute_map[attr]] setattr(instance, attr, _deserialize(value, attr_type)) @@ -123,8 +127,7 @@ def _deserialize_list(data, boxed_type): :return: deserialized list. :rtype: list """ - return [_deserialize(sub_data, boxed_type) - for sub_data in data] + return [_deserialize(sub_data, boxed_type) for sub_data in data] def _deserialize_dict(data, boxed_type): @@ -137,5 +140,4 @@ def _deserialize_dict(data, boxed_type): :return: deserialized dict. :rtype: dict """ - return {k: _deserialize(v, boxed_type) - for k, v in six.iteritems(data)} \ No newline at end of file + return {k: _deserialize(v, boxed_type) for k, v in six.iteritems(data)} diff --git a/requirements.txt b/requirements.txt index 25400d93c0388519cea98149d04e2cacbade82a3..f6fc215883a802969f2cf7bcf01d9f501d150a46 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -git+https://github.com/hmenager/connexion.git@master#egg=connexion[swagger-ui] +flask-smorest aiohttp python_dateutil setuptools diff --git a/setup.py b/setup.py index 19675e52aa5d51210c488a360066df4de0043581..67e274e329c3082dabf0228d1e82f7e5dd93eea1 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ README = os.path.join(SETUP_DIR, 'README.md') readme = open(README).read() REQUIRES = [ - "connexion[swagger-ui] @ git+https://github.com/hmenager/connexion.git@master#egg=connexion[swagger-ui]", + "flask-smorest", "aiohttp", "python_dateutil", "setuptools",