Commit c10239e2 authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

fixed command line arguments

parent c2563922
Pipeline #7897 passed with stages
in 1 minute and 27 seconds
......@@ -13,7 +13,7 @@ import argparse
#| variable name | description | current default value|
#|---------------|-------------|----------------------|
#| netPath | Main project folder, must end by "/" | /mnt/atlas/ |
#| GWAS_labels* | Path to the file describing the format of the individual GWASs files | netPath+'PCMA/1._DATA/RAW.GWAS/GWAS_labels.csv' |
#| args.gwas_info* | Path to the file describing the format of the individual GWASs files | netPath+'PCMA/1._DATA/RAW.GWAS/args.gwas_info.csv' |
#| GWAS_path* | Path to the folder containing the GWASs summ stat files, must end by "/" | netPath+'PCMA/1._DATA/RAW.GWAS/'|
#| diagnostic_folder | folder for histograms of sample size distribution among SNPs | /mnt/atlas/PCMA/1._DATA/sample_size_distribution/ |
#| ldscore_format | data formated to use LDscore, 1 file per study | /mnt/atlas/PCMA/1._DATA/ldscore_data/ |
......@@ -27,55 +27,56 @@ def launch_preprocessing(args):
"""
Preprocessing GWAS dataset
"""
gwas_map = pd.read_csv(GWAS_labels, sep="\t", index_col=0)
gwas_map = pd.read_csv(args.gwas_info, sep="\t", index_col=0)
tag = "{0}_{1}".format(gwas_map.loc[GWAS_filename, 'consortia'],
gwas_map.loc[GWAS_filename, 'outcome'])
tag = "{0}_{1}".format(gwas_map.loc[args.gwas_filename, 'consortia'],
gwas_map.loc[args.gwas_filename, 'outcome'])
print('processing GWAS: {}'.format(tag))
start = time.time()
gwas = jp.map_gwas.gwas_internal_link(GWAS_table, GWAS_path)
GWAS_link = jp.map_gwas.walkfs(GWAS_path, GWAS_filename)[2]
mapgw = jp.map_gwas.map_columns_position(GWAS_link, GWAS_labels)
gwas = jp.map_gwas.gwas_internal_link(GWAS_table, args.gwas_folder)
GWAS_link = jp.map_gwas.walkfs(args.gwas_folder, args.gwas_filename)[2]
mapgw = jp.map_gwas.map_columns_position(GWAS_link, args.gwas_info)
print(mapgw)
gw_df = jp.map_gwas.read_gwas(GWAS_link, mapgw)
ref = pd.read_csv(REF_filename, header=None, sep= "\t",
ref = pd.read_csv(args.ref_path, header=None, sep= "\t",
names =['chr', "pos", "snp_id", "ref", "alt", "MAF"],
index_col="snp_id")
mgwas = jp.map_reference.map_on_ref_panel(gw_df, ref)
mgwas = jp.map_reference.compute_snp_alignement(mgwas)
mgwas = jp.compute_score.compute_z_score(mgwas)
mgwas = jp.compute_score.compute_sample_size(mgwas, diagnostic_folder, tag)
mgwas = jp.compute_score.compute_sample_size(mgwas, args.diagnostic_folder, tag, args.percent_sample_size)
end = time.time()
print("Preprocessing of {0} in {1}s".format(tag, end-start))
jp.save_output.save_output_by_chromosome(mgwas, args.output_folder, tag)
jp.save_output.save_output_by_chromosome(mgwas, ImpG_output_Folder, tag)
jp.save_output.save_output(mgwas, ldscore_format, tag)
if(args.output_folder_1_file):
jp.save_output.save_output(mgwas, args.output_folder_1_file, tag)
def add_preprocessing_argument():
parser = argparse.ArgumentParser()
parser.add_argument('--percent-sample-size', required=True, help= "the proportion of the 90th percentile of the sample size used to filter the SNPs")
parser.add_argument('--gwas-info', required=True, help= "Path to the file describing the format of the individual GWASs files")
parser.add_argument('--ref-folder', required=True, help= "reference panel location (used to determine which snp to impute)")
parser.add_argument('--gwas-folder', required=True, help= " Path to the folder containing the GWASs summ stat files, must end by '/'")
parser.add_argument('--gwas-filename', required=True, help= "Name of the raw GWAS file to standardize")
parser.add_argument('--ref-path', required=True, help= "reference panel location (used to determine which snp to impute)")
parser.add_argument('--gwas-folder', required=True, help= "Path to the folder containing the GWASs summ stat files, must end by '/'")
parser.add_argument('--diagnostic-folder', required=True, help= "Path to the reporting information on the PreProcessing such as the SNPs sample size distribution")
parser.add_argument('--output-folder', required=True, help= "Location of main ouput folder for preprocessed GWAS files (splitted by chromosome)")
parser.add_argument('--output-folder-1-file', required=False, help= "optional location to store the preprocessing in one tabular file with one chromosome columns")
parser.add_argument('--percent-sample-size', required=False, help= "the proportion (between 0 and 1) of the 90th percentile of the sample size used to filter the SNPs", default=0.7)
parser.set_defaults(func=launch_preprocessing)
return parser
def main():
parser = add_preprocessing_argument()
args = parser.parse_args()
args.func(args)
......
......@@ -6,7 +6,7 @@ import scipy.stats as ss
import seaborn as sns
import matplotlib.pyplot as plt
perSS = 0.7
def compute_z_score(mgwas):
"""
......@@ -27,7 +27,7 @@ def compute_z_score(mgwas):
return mgwas
def compute_sample_size(mgwas, diagnostic_folder, trait):
def compute_sample_size(mgwas, diagnostic_folder, trait, perSS = 0.7):
if 'n' in mgwas.columns:
myN = mgwas.n
......
......@@ -5,7 +5,7 @@ setup(name='jass_preprocessing',
description='Preprocess and standardize heterogeneous GWAS summary statistic for JASS',
url='http:https://gitlab.pasteur.fr/statistical-genetics/JASS_Pre-processing',
author='Hugues Aschard, Hanna Julienne, Vincent Laville',
author_email='hugues.aschard@pasteur.fr',
author_email='hanna.julienne@pasteur.fr',
license='MIT',
#package_dir = {'': 'jass_preprocessing'},
packages= ['jass_preprocessing'],
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment