diff --git a/jass/models/worktable.py b/jass/models/worktable.py index b21ce807627a6c6f62fbea988b286a4eb38e2998..32f147e7ae37d239a31066d33cfea236475758c0 100644 --- a/jass/models/worktable.py +++ b/jass/models/worktable.py @@ -525,6 +525,9 @@ def create_worktable_file( if sum_stat_tab.shape[0] == 0: print("No data available for region {0} to region {1}".format(binf, bsup)) + file_progress = open(progress_path, "w") + file_progress.write(str(JASS_progress)) + file_progress.close() continue # skip region if no data are available Nsnp_total = Nsnp_total + sum_stat_tab.shape[0] @@ -668,6 +671,11 @@ def create_worktable_file( print("{1} SNPs treated on {0} SNPs".format(Nsnp_jassed, Nsnp_total)) + if (Nsnp_jassed == 0): + file_error = "ERROR_LOCAL_ANALYSIS_MSG.log" + msg_error = "There is no SNP in the selected region" + write_error_msg(project_hdf_path, file_error, msg_error) + RegionSubTable = read_hdf(project_hdf_path, "Regions") pval_min = RegionSubTable["UNIVARIATE_MIN_PVAL"] @@ -706,7 +714,10 @@ def create_worktable_file( "summaryTable", summaryTable, format="table", data_columns=True ) # Summary Table (contigency table) hdf_work.close() - + + if local_analysis: + add_annotation_region(init_file_path, project_hdf_path, chromosome, pos_Start, pos_End) + return Nchunk @@ -1015,3 +1026,211 @@ def create_genome_full_csv(project_hdf_path, csv_file, chunk_size=50, Nchunk=35) The_file_is_available = False return The_file_is_available + + +def extract_annotation_region( + init_file_path, + chromosome, + pos_Start, + pos_End +): + + """ + Extract genes and exons from an initial data table + by specifying the selected region (chromosome and position start, end) + + :param init_file_path: path to the initial data table + :type init_file_path: str + :param chromosome: Chromosome number selected for local analysis + :type chromosome: str + :param pos_Start: start of the position of the studied region (base point) for local analysis + :type pos_Start: str + :param pos_End: end of the position of the studied region (base point) for local analysis + :type pos_End: str + + :return: execution status + :rtype: bool + :return: df_gene_exon + :rtype: dataframe of genes and exons + """ + + Genes_and_exons_are_available = False + df_gene_exon = None + + condition = "Chr = {0} and start < {2} and end > {1}".format(chromosome, pos_Start, pos_End) + + try: + df_gene = read_hdf(init_file_path, + "Gene", + columns=[ + "Chr", + "GeneID", + "gene_label", + "start", + "end", + "direction", + "biotype" + ], + where=condition) + + except KeyError: + print("WARNING: Gene annotation not available in inittable. \nUse inittable.add_gene_annotation to update inittable.hdf5") + return (Genes_and_exons_are_available, df_gene_exon) + + Array_Gene = df_gene.to_numpy() + Dict_gene_label = {} + Dict_gene_start = {} + Dict_gene_end = {} + Dict_gene_direction = {} + Dict_gene_biotype = {} + for i in range(df_gene.shape[0]): + Dict_gene_label[Array_Gene[i][1]] = Array_Gene[i][2] + Dict_gene_start[Array_Gene[i][1]] = Array_Gene[i][3] + Dict_gene_end[Array_Gene[i][1]] = Array_Gene[i][4] + Dict_gene_direction[Array_Gene[i][1]] = Array_Gene[i][5] + Dict_gene_biotype[Array_Gene[i][1]] = Array_Gene[i][6] + + try: + df_exon = read_hdf(init_file_path, + "Exon", + columns=[ + "Chr", + "exon_label", + "GeneID", + "start", + "end", + "direction" + ], + where=condition) + + except KeyError: + print("WARNING: Exon annotation not available in inittable. \nUse inittable.add_gene_annotation to update inittable.hdf5") + return (Genes_and_exons_are_available, df_gene_exon) + + colonnes = ["GeneID", + "exon_label", + "exon_start", + "exon_end", + "gene_label", + "gene_start", + "gene_end", + "gene_direction", + "gene_biotype"] + + Liste_gene = df_exon[colonnes[0]].values + gene_label = [] + gene_start = [] + gene_end = [] + gene_direction = [] + gene_biotype = [] + + for gene_id in Liste_gene: + gene_label.append(Dict_gene_label[gene_id]) + gene_start.append(Dict_gene_start[gene_id]) + gene_end.append(Dict_gene_end[gene_id]) + gene_direction.append(Dict_gene_direction[gene_id]) + gene_biotype.append(Dict_gene_biotype[gene_id]) + + df_gene_exon = DataFrame() + df_gene_exon[colonnes[0]] = df_exon[colonnes[0]] + df_gene_exon[colonnes[1]] = df_exon[colonnes[1]] + df_gene_exon[colonnes[2]] = df_exon["start"] + df_gene_exon[colonnes[3]] = df_exon["end"] + df_gene_exon[colonnes[4]] = gene_label + df_gene_exon[colonnes[5]] = gene_start + df_gene_exon[colonnes[6]] = gene_end + df_gene_exon[colonnes[7]] = gene_direction + df_gene_exon[colonnes[8]] = gene_biotype + + Genes_and_exons_are_available = True + return (Genes_and_exons_are_available, df_gene_exon) + + +def add_annotation_region( + init_file_path, + project_hdf_path, + chromosome, + pos_Start, + pos_End +): + + """ + Add genes and exons to the worktable file from an initial data table + by specifying the selected region (chromosome and position start, end) + + :param init_file_path: path to the initial data table + :type init_file_path: str + :param project_hdf_path: path to the worktable file that will be produced + :type project_hdf_path: str + :param chromosome: Chromosome number selected for local analysis + :type chromosome: str + :param pos_Start: start of the position of the studied region (base point) for local analysis + :type pos_Start: str + :param pos_End: end of the position of the studied region (base point) for local analysis + :type pos_End: str + + :return: execution status + :rtype: bool + """ + status, df_gene_exon = extract_annotation_region(init_file_path, chromosome, pos_Start, pos_End) + + if status: + hdf_work = HDFStore(project_hdf_path) + hdf_work.put("gene_exon", df_gene_exon, format="table", data_columns=True) + hdf_work.close() + else: + file_error = "ERROR_GENE_EXON_MSG.log" + msg_error = "Gene and Exon annotations are not available. Run inittable.add_gene_annotation" + write_error_msg(project_hdf_path, file_error, msg_error) + + return status + + +def get_annotation_gene_exon_data(project_hdf_path: str, + init_file_path: str = None, + chromosome: str = None, + pos_Start: str = None, + pos_End: str = None): + """ + Read and return the gene_exon dataframe from a worktable file + for a given chromosome and region for the Gene and Exon diagram + + :param project_hdf_path: path to the worktable file + :type project_hdf_path: str + + :return: The dataframe gene_exon corresponding to the genes and exons for a given chromosome and region, \ + as a CSV formatted text + :rtype: str + """ + if chromosome is None: + # Local analysis : the file project_hdf_path contains only useful information. + # No data filter is needed + df_gene_exon = read_hdf(project_hdf_path, "gene_exon") + else: + # Genome full analysis + df_gene_exon = extract_annotation_region(init_file_path, + chromosome, + pos_Start, + pos_End) + + return df_gene_exon.to_csv() + + +def write_error_msg(project_hdf_path, file_error, msg_error): + """ + write_error_msg + writes an error message which is displayed in the web interface + + :param project_hdf_path: path to the worktable file + :type project_hdf_path: str + :param file_error: name of the file where the message is stored + :type file_error: str + :param msg_error: error message text + :type msg_error: str + + """ + file_error_path = os.path.join(os.path.dirname(project_hdf_path), + file_error) + the_file = open(file_error_path, "w") + the_file.write(msg_error) + the_file.close()