Skip to content
Snippets Groups Projects

Draft: changed the loading of data required to draw the global plot; now read only 3...

Merged Bryan BRANCOTTE requested to merge vero_global_plot into master
3 files
+ 30
13
Compare changes
  • Side-by-side
  • Inline
Files
3
+ 16
12
@@ -17,10 +17,10 @@ from matplotlib import colors
import matplotlib.patches as mpatches
from scipy.stats import norm, chi2
import seaborn as sns
import os
from pandas import DataFrame, read_hdf
import pandas as pd
default_chunk_size=50
def replaceZeroes(df):
"""
@@ -32,30 +32,34 @@ def replaceZeroes(df):
df.values[df.values == 0] = min_nonzero
return df
def create_global_plot(work_file_path: str, global_plot_path: str):
def get_info_4_global_plot(work_file_path: str):
regions = read_hdf(work_file_path, "Regions",columns=['Region','CHR','MiddlePosition'])
print(regions.dtypes)
N_reg = regions.Region.max() # Keep biggest element in Region column
binf = regions.Region.iloc[0]
chr_considered = regions.CHR.unique()
length_chr = regions.groupby("CHR").MiddlePosition.max() / 10 ** 6
length_chr.loc[0] = 0
return N_reg,binf,chr_considered,length_chr
def create_global_plot(work_file_path: str, global_plot_path: str, chunk_size:int =default_chunk_size):
"""
create_global_plot
generate genome-wide manhattan plot for a given set of phenotypes
"""
regions = read_hdf(work_file_path, "Regions")
chr_length = regions.groupby('CHR').max().position
N_reg= regions.Region.max()
N_reg,binf,chr_considered,length_chr=get_info_4_global_plot(work_file_path)
maxy = 0
fig = plt.figure(figsize=(30, 12))
ax = fig.add_subplot(111)
chunk_size = 50
colors = [
'#4287f5',
'orangered'
]
binf=regions.Region.iloc[0]
bsup= binf+chunk_size
chr_considered= regions.CHR.unique()
length_chr = regions.groupby("CHR").MiddlePosition.max() / 10**6
length_chr.loc[0] = 0
label = "Chr"+length_chr.loc[chr_considered].index.astype("str")
lab_pos = length_chr.loc[chr_considered]/2
@@ -63,7 +67,7 @@ def create_global_plot(work_file_path: str, global_plot_path: str):
pos_shift.index = pos_shift.index +1
pos_shift.loc[chr_considered[0]] = 0
lab_pos = lab_pos + [pos_shift.loc[i] for i in chr_considered]
bsup = binf + chunk_size
while binf < N_reg:
df = read_hdf(work_file_path, "SumStatTab", columns=["CHR","position", 'JASS_PVAL', "Region"], where = "Region >= {0} and Region < {1}".format(binf, bsup))
binf+= chunk_size