Skip to content
Snippets Groups Projects
Commit cc9337de authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

add filter on reference panel

parent 849b8ee4
No related branches found
No related tags found
No related merge requests found
...@@ -39,9 +39,7 @@ def launch_preprocessing(args): ...@@ -39,9 +39,7 @@ def launch_preprocessing(args):
mapgw = jp.map_gwas.map_columns_position(GWAS_link, args.gwas_info) mapgw = jp.map_gwas.map_columns_position(GWAS_link, args.gwas_info)
gw_df = jp.map_gwas.read_gwas(GWAS_link, mapgw) gw_df = jp.map_gwas.read_gwas(GWAS_link, mapgw)
ref = pd.read_csv(args.ref_path, header=None, sep= "\t", ref = read_reference(args.ref_path, bool(args.mask_MHC), float(args.minimum_MAF))
names =['chr', "pos", "snp_id", "ref", "alt", "MAF"],
index_col="snp_id")
mgwas = jp.map_reference.map_on_ref_panel(gw_df, ref) mgwas = jp.map_reference.map_on_ref_panel(gw_df, ref)
mgwas = jp.map_reference.compute_snp_alignement(mgwas) mgwas = jp.map_reference.compute_snp_alignement(mgwas)
...@@ -69,6 +67,10 @@ def add_preprocessing_argument(): ...@@ -69,6 +67,10 @@ def add_preprocessing_argument():
parser.add_argument('--output-folder', required=True, help= "Location of main ouput folder for preprocessed GWAS files (splitted by chromosome)") parser.add_argument('--output-folder', required=True, help= "Location of main ouput folder for preprocessed GWAS files (splitted by chromosome)")
parser.add_argument('--output-folder-1-file', required=False, help= "optional location to store the preprocessing in one tabular file with one chromosome columns (useful to compute LDSC correlation for instance)") parser.add_argument('--output-folder-1-file', required=False, help= "optional location to store the preprocessing in one tabular file with one chromosome columns (useful to compute LDSC correlation for instance)")
parser.add_argument('--percent-sample-size', required=False, help= "the proportion (between 0 and 1) of the 90th percentile of the sample size used to filter the SNPs", default=0.7) parser.add_argument('--percent-sample-size', required=False, help= "the proportion (between 0 and 1) of the 90th percentile of the sample size used to filter the SNPs", default=0.7)
parser.add_argument('--minimum-MAF', required=False, help= "Filter the reference panel by minimum allele frequency", default=0.01)
parser.add_argument('--mask-MHC', required=False, help= "Whether the MHC region should be masked or not. default is False", default=False)
parser.set_defaults(func=launch_preprocessing) parser.set_defaults(func=launch_preprocessing)
return parser return parser
......
...@@ -88,7 +88,7 @@ def map_columns_position(gwas_internal_link, GWAS_labels): ...@@ -88,7 +88,7 @@ def map_columns_position(gwas_internal_link, GWAS_labels):
column_dict = pd.read_csv(GWAS_labels, sep='\t', na_values='na') column_dict = pd.read_csv(GWAS_labels, sep='\t', na_values='na')
column_dict.set_index("filename", inplace=True) column_dict.set_index("filename", inplace=True)
print(gwas_internal_link)
gwas_file = gwas_internal_link.split('/')[-1] gwas_file = gwas_internal_link.split('/')[-1]
my_labels = column_dict.loc[gwas_file] my_labels = column_dict.loc[gwas_file]
...@@ -133,8 +133,7 @@ def read_gwas( gwas_internal_link, column_map): ...@@ -133,8 +133,7 @@ def read_gwas( gwas_internal_link, column_map):
usecols = column_map.values, #column_dict['label_position'].keys(), usecols = column_map.values, #column_dict['label_position'].keys(),
names= column_map.index, names= column_map.index,
index_col=0, index_col=0,
header=0, na_values= ['', '#N/A', '#N/A', 'N/A', header=0, na_values= ['', '#N/A', '#N/A', 'N/A','#NA', '-1.#IND', '-1.#QNAN',
'#NA', '-1.#IND', '-1.#QNAN',
'-NaN', '-nan', '1.#IND', '1.#QNAN', 'N/A', '-NaN', '-nan', '1.#IND', '1.#QNAN', 'N/A',
'NA', 'NULL', 'NaN', 'NA', 'NULL', 'NaN',
'nan', 'na', '.']) 'nan', 'na', '.'])
......
...@@ -8,17 +8,23 @@ import numpy as np ...@@ -8,17 +8,23 @@ import numpy as np
import jass_preprocessing.dna_utils as dna_u import jass_preprocessing.dna_utils as dna_u
import warnings import warnings
def read_reference(gwas_reference_panel, mask_MHC=False, minimum_MAF=None):
def read_reference(gwas_reference_panel):
""" """
helper function to name correctly the column helper function to name correctly the column
""" """
ref = pd.read_csv(REF_filename, header=None, sep= "\t", ref = pd.read_csv(gwas_reference_panel, header=None, sep= "\t",
names =['chr', "pos", "snp_id", "ref", "alt", "MAF"], names =['chr', "pos", "snp_id", "ref", "alt", "MAF"],
index_col="snp_id") index_col="snp_id")
if mask_MHC:
ref = ref.loc[(ref.chr !=6)|(ref.pos < 28477797)|(ref.pos > 33448354)]
if minimum_MAF is not None:
ref = ref.loc[ref.MAF > minimum_MAF]
return(ref) return(ref)
def map_on_ref_panel(gw_df , ref_panel): def map_on_ref_panel(gw_df , ref_panel):
""" """
Merge Gwas dataframe with the reference panel Merge Gwas dataframe with the reference panel
......
...@@ -15,11 +15,11 @@ def save_output_by_chromosome(mgwas, ImpG_output_Folder, my_study): ...@@ -15,11 +15,11 @@ def save_output_by_chromosome(mgwas, ImpG_output_Folder, my_study):
mgwas_chr = pd.DataFrame({ mgwas_chr = pd.DataFrame({
'rsID': mgwas_copy.loc[chrom].snp_id, 'rsID': mgwas_copy.loc[chrom].snp_id,
'pos': mgwas_copy.loc[chrom].pos, 'pos': mgwas_copy.loc[chrom].pos,
'A0': mgwas_copy.loc[chrom].ref, 'A1': mgwas_copy.loc[chrom].ref,
'A1':mgwas_copy.loc[chrom].alt, 'A2':mgwas_copy.loc[chrom].alt,
'Z': mgwas_copy.loc[chrom].computed_z, 'Z': mgwas_copy.loc[chrom].computed_z,
'P': mgwas_copy.loc[chrom].pval 'P': mgwas_copy.loc[chrom].pval
}, columns= ['rsID', 'pos', 'A0', "A1", "Z", "P" ]) }, columns= ['rsID', 'pos', 'A1', "A2", "Z", "P" ])
impg_output_file = ImpG_output_Folder + 'z_'+ my_study +'_chr'+str(chrom)+".txt" impg_output_file = ImpG_output_Folder + 'z_'+ my_study +'_chr'+str(chrom)+".txt"
print("WRITING CHR {} results for {} to: {}".format(chrom, my_study, ImpG_output_Folder)) print("WRITING CHR {} results for {} to: {}".format(chrom, my_study, ImpG_output_Folder))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment