Skip to content
Snippets Groups Projects
Commit 46ad7c44 authored by Veronique Legrand's avatar Veronique Legrand
Browse files

changed the loading of data required to draw the global plot; now read only 3...

changed the loading of data required to draw the global plot; now read only 3 columns of the original hdf file.
parent 52356f68
No related branches found
No related tags found
2 merge requests!108Fixing py 3.12,!107Draft: changed the loading of data required to draw the global plot; now read only 3...
...@@ -17,10 +17,10 @@ from matplotlib import colors ...@@ -17,10 +17,10 @@ from matplotlib import colors
import matplotlib.patches as mpatches import matplotlib.patches as mpatches
from scipy.stats import norm, chi2 from scipy.stats import norm, chi2
import seaborn as sns import seaborn as sns
import os
from pandas import DataFrame, read_hdf from pandas import DataFrame, read_hdf
import pandas as pd import pandas as pd
default_chunk_size=50
def replaceZeroes(df): def replaceZeroes(df):
""" """
...@@ -32,30 +32,34 @@ def replaceZeroes(df): ...@@ -32,30 +32,34 @@ def replaceZeroes(df):
df.values[df.values == 0] = min_nonzero df.values[df.values == 0] = min_nonzero
return df return df
def create_global_plot(work_file_path: str, global_plot_path: str):
def get_info_4_global_plot(work_file_path: str):
regions = read_hdf(work_file_path, "Regions",columns=['Region','CHR','MiddlePosition'])
print(regions.dtypes)
N_reg = regions.Region.max() # Keep biggest element in Region column
binf = regions.Region.iloc[0]
chr_considered = regions.CHR.unique()
length_chr = regions.groupby("CHR").MiddlePosition.max() / 10 ** 6
length_chr.loc[0] = 0
return N_reg,binf,chr_considered,length_chr
def create_global_plot(work_file_path: str, global_plot_path: str, chunk_size:int =default_chunk_size):
""" """
create_global_plot create_global_plot
generate genome-wide manhattan plot for a given set of phenotypes generate genome-wide manhattan plot for a given set of phenotypes
""" """
regions = read_hdf(work_file_path, "Regions") N_reg,binf,chr_considered,length_chr=get_info_4_global_plot(work_file_path)
#chr_length = regions.groupby('CHR').max().position
N_reg= regions.Region.max()
maxy = 0 maxy = 0
fig = plt.figure(figsize=(30, 12)) fig = plt.figure(figsize=(30, 12))
ax = fig.add_subplot(111) ax = fig.add_subplot(111)
chunk_size = 50
colors = [ colors = [
'#4287f5', '#4287f5',
'orangered' 'orangered'
] ]
binf=regions.Region.iloc[0]
bsup= binf+chunk_size
chr_considered= regions.CHR.unique()
length_chr = regions.groupby("CHR").MiddlePosition.max() / 10**6
length_chr.loc[0] = 0
label = "Chr"+length_chr.loc[chr_considered].index.astype("str") label = "Chr"+length_chr.loc[chr_considered].index.astype("str")
lab_pos = length_chr.loc[chr_considered]/2 lab_pos = length_chr.loc[chr_considered]/2
...@@ -63,7 +67,7 @@ def create_global_plot(work_file_path: str, global_plot_path: str): ...@@ -63,7 +67,7 @@ def create_global_plot(work_file_path: str, global_plot_path: str):
pos_shift.index = pos_shift.index +1 pos_shift.index = pos_shift.index +1
pos_shift.loc[chr_considered[0]] = 0 pos_shift.loc[chr_considered[0]] = 0
lab_pos = lab_pos + [pos_shift.loc[i] for i in chr_considered] lab_pos = lab_pos + [pos_shift.loc[i] for i in chr_considered]
bsup = binf + chunk_size
while binf < N_reg: while binf < N_reg:
df = read_hdf(work_file_path, "SumStatTab", columns=["CHR","position", 'JASS_PVAL', "Region"], where = "Region >= {0} and Region < {1}".format(binf, bsup)) df = read_hdf(work_file_path, "SumStatTab", columns=["CHR","position", 'JASS_PVAL', "Region"], where = "Region >= {0} and Region < {1}".format(binf, bsup))
binf+= chunk_size binf+= chunk_size
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment