Skip to content
Snippets Groups Projects
Commit c10239e2 authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

fixed command line arguments

parent c2563922
No related branches found
No related tags found
No related merge requests found
Pipeline #7897 passed
......@@ -13,7 +13,7 @@ import argparse
#| variable name | description | current default value|
#|---------------|-------------|----------------------|
#| netPath | Main project folder, must end by "/" | /mnt/atlas/ |
#| GWAS_labels* | Path to the file describing the format of the individual GWASs files | netPath+'PCMA/1._DATA/RAW.GWAS/GWAS_labels.csv' |
#| args.gwas_info* | Path to the file describing the format of the individual GWASs files | netPath+'PCMA/1._DATA/RAW.GWAS/args.gwas_info.csv' |
#| GWAS_path* | Path to the folder containing the GWASs summ stat files, must end by "/" | netPath+'PCMA/1._DATA/RAW.GWAS/'|
#| diagnostic_folder | folder for histograms of sample size distribution among SNPs | /mnt/atlas/PCMA/1._DATA/sample_size_distribution/ |
#| ldscore_format | data formated to use LDscore, 1 file per study | /mnt/atlas/PCMA/1._DATA/ldscore_data/ |
......@@ -27,55 +27,56 @@ def launch_preprocessing(args):
"""
Preprocessing GWAS dataset
"""
gwas_map = pd.read_csv(GWAS_labels, sep="\t", index_col=0)
gwas_map = pd.read_csv(args.gwas_info, sep="\t", index_col=0)
tag = "{0}_{1}".format(gwas_map.loc[GWAS_filename, 'consortia'],
gwas_map.loc[GWAS_filename, 'outcome'])
tag = "{0}_{1}".format(gwas_map.loc[args.gwas_filename, 'consortia'],
gwas_map.loc[args.gwas_filename, 'outcome'])
print('processing GWAS: {}'.format(tag))
start = time.time()
gwas = jp.map_gwas.gwas_internal_link(GWAS_table, GWAS_path)
GWAS_link = jp.map_gwas.walkfs(GWAS_path, GWAS_filename)[2]
mapgw = jp.map_gwas.map_columns_position(GWAS_link, GWAS_labels)
gwas = jp.map_gwas.gwas_internal_link(GWAS_table, args.gwas_folder)
GWAS_link = jp.map_gwas.walkfs(args.gwas_folder, args.gwas_filename)[2]
mapgw = jp.map_gwas.map_columns_position(GWAS_link, args.gwas_info)
print(mapgw)
gw_df = jp.map_gwas.read_gwas(GWAS_link, mapgw)
ref = pd.read_csv(REF_filename, header=None, sep= "\t",
ref = pd.read_csv(args.ref_path, header=None, sep= "\t",
names =['chr', "pos", "snp_id", "ref", "alt", "MAF"],
index_col="snp_id")
mgwas = jp.map_reference.map_on_ref_panel(gw_df, ref)
mgwas = jp.map_reference.compute_snp_alignement(mgwas)
mgwas = jp.compute_score.compute_z_score(mgwas)
mgwas = jp.compute_score.compute_sample_size(mgwas, diagnostic_folder, tag)
mgwas = jp.compute_score.compute_sample_size(mgwas, args.diagnostic_folder, tag, args.percent_sample_size)
end = time.time()
print("Preprocessing of {0} in {1}s".format(tag, end-start))
jp.save_output.save_output_by_chromosome(mgwas, args.output_folder, tag)
jp.save_output.save_output_by_chromosome(mgwas, ImpG_output_Folder, tag)
jp.save_output.save_output(mgwas, ldscore_format, tag)
if(args.output_folder_1_file):
jp.save_output.save_output(mgwas, args.output_folder_1_file, tag)
def add_preprocessing_argument():
parser = argparse.ArgumentParser()
parser.add_argument('--percent-sample-size', required=True, help= "the proportion of the 90th percentile of the sample size used to filter the SNPs")
parser.add_argument('--gwas-info', required=True, help= "Path to the file describing the format of the individual GWASs files")
parser.add_argument('--ref-folder', required=True, help= "reference panel location (used to determine which snp to impute)")
parser.add_argument('--gwas-folder', required=True, help= " Path to the folder containing the GWASs summ stat files, must end by '/'")
parser.add_argument('--gwas-filename', required=True, help= "Name of the raw GWAS file to standardize")
parser.add_argument('--ref-path', required=True, help= "reference panel location (used to determine which snp to impute)")
parser.add_argument('--gwas-folder', required=True, help= "Path to the folder containing the GWASs summ stat files, must end by '/'")
parser.add_argument('--diagnostic-folder', required=True, help= "Path to the reporting information on the PreProcessing such as the SNPs sample size distribution")
parser.add_argument('--output-folder', required=True, help= "Location of main ouput folder for preprocessed GWAS files (splitted by chromosome)")
parser.add_argument('--output-folder-1-file', required=False, help= "optional location to store the preprocessing in one tabular file with one chromosome columns")
parser.add_argument('--percent-sample-size', required=False, help= "the proportion (between 0 and 1) of the 90th percentile of the sample size used to filter the SNPs", default=0.7)
parser.set_defaults(func=launch_preprocessing)
return parser
def main():
parser = add_preprocessing_argument()
args = parser.parse_args()
args.func(args)
......
......@@ -6,7 +6,7 @@ import scipy.stats as ss
import seaborn as sns
import matplotlib.pyplot as plt
perSS = 0.7
def compute_z_score(mgwas):
"""
......@@ -27,7 +27,7 @@ def compute_z_score(mgwas):
return mgwas
def compute_sample_size(mgwas, diagnostic_folder, trait):
def compute_sample_size(mgwas, diagnostic_folder, trait, perSS = 0.7):
if 'n' in mgwas.columns:
myN = mgwas.n
......
......@@ -5,7 +5,7 @@ setup(name='jass_preprocessing',
description='Preprocess and standardize heterogeneous GWAS summary statistic for JASS',
url='http:https://gitlab.pasteur.fr/statistical-genetics/JASS_Pre-processing',
author='Hugues Aschard, Hanna Julienne, Vincent Laville',
author_email='hugues.aschard@pasteur.fr',
author_email='hanna.julienne@pasteur.fr',
license='MIT',
#package_dir = {'': 'jass_preprocessing'},
packages= ['jass_preprocessing'],
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment