Skip to content
Snippets Groups Projects
Commit 5fc49314 authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

modify main

parent d644c800
No related branches found
No related tags found
No related merge requests found
Pipeline #14548 passed
......@@ -29,45 +29,46 @@ def launch_preprocessing(args):
"""
gwas_map = pd.read_csv(args.gwas_info, sep="\t", index_col=0)
tag = "{0}_{1}".format(gwas_map.loc[args.gwas_filename, 'consortia'],
gwas_map.loc[args.gwas_filename, 'outcome'])
for gwas_filename in gwas_map[['filename']]:
tag = "{0}_{1}".format(gwas_map.loc[gwas_filename, 'consortia'],
gwas_map.loc[gwas_filename, 'outcome'])
print('processing GWAS: {}'.format(tag))
start = time.time()
GWAS_link = jp.map_gwas.walkfs(args.gwas_folder, args.gwas_filename)[2]
mapgw = jp.map_gwas.map_columns_position(GWAS_link, args.gwas_info)
print('processing GWAS: {}'.format(tag))
start = time.time()
GWAS_link = jp.map_gwas.walkfs(args.gwas_folder, gwas_filename)[2]
mapgw = jp.map_gwas.map_columns_position(GWAS_link, args.gwas_info)
gw_df = jp.map_gwas.read_gwas(GWAS_link, mapgw)
gw_df = jp.map_gwas.read_gwas(GWAS_link, mapgw)
ref = pd.read_csv(args.ref_path, header=None, sep= "\t",
names =['chr', "pos", "snp_id", "ref", "alt", "MAF"],
index_col="snp_id")
ref = pd.read_csv(args.ref_path, header=None, sep= "\t",
names =['chr', "pos", "snp_id", "ref", "alt", "MAF"],
index_col="snp_id")
mgwas = jp.map_reference.map_on_ref_panel(gw_df, ref)
mgwas = jp.map_reference.compute_snp_alignement(mgwas)
mgwas = jp.compute_score.compute_z_score(mgwas)
mgwas = jp.compute_score.compute_sample_size(mgwas, args.diagnostic_folder, tag, args.percent_sample_size)
mgwas = jp.map_reference.map_on_ref_panel(gw_df, ref)
mgwas = jp.map_reference.compute_snp_alignement(mgwas)
mgwas = jp.compute_score.compute_z_score(mgwas)
mgwas = jp.compute_score.compute_sample_size(mgwas, args.diagnostic_folder, tag, args.percent_sample_size)
end = time.time()
end = time.time()
print("Preprocessing of {0} in {1}s".format(tag, end-start))
jp.save_output.save_output_by_chromosome(mgwas, args.output_folder, tag)
print("Preprocessing of {0} in {1}s".format(tag, end-start))
jp.save_output.save_output_by_chromosome(mgwas, args.output_folder, tag)
if(args.output_folder_1_file):
jp.save_output.save_output(mgwas, args.output_folder_1_file, tag)
if(args.output_folder_1_file):
jp.save_output.save_output(mgwas, args.output_folder_1_file, tag)
def add_preprocessing_argument():
parser = argparse.ArgumentParser()
parser.add_argument('--gwas-info', required=True, help= "Path to the file describing the format of the individual GWASs files")
parser.add_argument('--gwas-filename', required=True, help= "Name of the raw GWAS file to standardize")
parser.add_argument('--gwas-info', required=True, help= "Path to the file describing the format of the individual GWASs files with correct header")
#parser.add_argument('--gwas-filename', required=True, help= "Name of the raw GWAS file to standardize")
parser.add_argument('--ref-path', required=True, help= "reference panel location (used to determine which snp to impute)")
parser.add_argument('--gwas-folder', required=True, help= "Path to the folder containing the GWASs summ stat files, must end by '/'")
parser.add_argument('--input-folder', required=True, help= "Path to the folder containing the Raw GWASs summary statistic files, must end by '/'")
parser.add_argument('--diagnostic-folder', required=True, help= "Path to the reporting information on the PreProcessing such as the SNPs sample size distribution")
parser.add_argument('--output-folder', required=True, help= "Location of main ouput folder for preprocessed GWAS files (splitted by chromosome)")
parser.add_argument('--output-folder-1-file', required=False, help= "optional location to store the preprocessing in one tabular file with one chromosome columns")
parser.add_argument('--output-folder-1-file', required=False, help= "optional location to store the preprocessing in one tabular file with one chromosome columns (useful to compute LDSC correlation for instance)")
parser.add_argument('--percent-sample-size', required=False, help= "the proportion (between 0 and 1) of the 90th percentile of the sample size used to filter the SNPs", default=0.7)
parser.set_defaults(func=launch_preprocessing)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment