Skip to content
Snippets Groups Projects
Commit c10239e2 authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

fixed command line arguments

parent c2563922
No related branches found
No related tags found
No related merge requests found
Pipeline #7897 passed
...@@ -13,7 +13,7 @@ import argparse ...@@ -13,7 +13,7 @@ import argparse
#| variable name | description | current default value| #| variable name | description | current default value|
#|---------------|-------------|----------------------| #|---------------|-------------|----------------------|
#| netPath | Main project folder, must end by "/" | /mnt/atlas/ | #| netPath | Main project folder, must end by "/" | /mnt/atlas/ |
#| GWAS_labels* | Path to the file describing the format of the individual GWASs files | netPath+'PCMA/1._DATA/RAW.GWAS/GWAS_labels.csv' | #| args.gwas_info* | Path to the file describing the format of the individual GWASs files | netPath+'PCMA/1._DATA/RAW.GWAS/args.gwas_info.csv' |
#| GWAS_path* | Path to the folder containing the GWASs summ stat files, must end by "/" | netPath+'PCMA/1._DATA/RAW.GWAS/'| #| GWAS_path* | Path to the folder containing the GWASs summ stat files, must end by "/" | netPath+'PCMA/1._DATA/RAW.GWAS/'|
#| diagnostic_folder | folder for histograms of sample size distribution among SNPs | /mnt/atlas/PCMA/1._DATA/sample_size_distribution/ | #| diagnostic_folder | folder for histograms of sample size distribution among SNPs | /mnt/atlas/PCMA/1._DATA/sample_size_distribution/ |
#| ldscore_format | data formated to use LDscore, 1 file per study | /mnt/atlas/PCMA/1._DATA/ldscore_data/ | #| ldscore_format | data formated to use LDscore, 1 file per study | /mnt/atlas/PCMA/1._DATA/ldscore_data/ |
...@@ -27,55 +27,56 @@ def launch_preprocessing(args): ...@@ -27,55 +27,56 @@ def launch_preprocessing(args):
""" """
Preprocessing GWAS dataset Preprocessing GWAS dataset
""" """
gwas_map = pd.read_csv(GWAS_labels, sep="\t", index_col=0) gwas_map = pd.read_csv(args.gwas_info, sep="\t", index_col=0)
tag = "{0}_{1}".format(gwas_map.loc[GWAS_filename, 'consortia'], tag = "{0}_{1}".format(gwas_map.loc[args.gwas_filename, 'consortia'],
gwas_map.loc[GWAS_filename, 'outcome']) gwas_map.loc[args.gwas_filename, 'outcome'])
print('processing GWAS: {}'.format(tag)) print('processing GWAS: {}'.format(tag))
start = time.time() start = time.time()
gwas = jp.map_gwas.gwas_internal_link(GWAS_table, GWAS_path) gwas = jp.map_gwas.gwas_internal_link(GWAS_table, args.gwas_folder)
GWAS_link = jp.map_gwas.walkfs(GWAS_path, GWAS_filename)[2] GWAS_link = jp.map_gwas.walkfs(args.gwas_folder, args.gwas_filename)[2]
mapgw = jp.map_gwas.map_columns_position(GWAS_link, GWAS_labels) mapgw = jp.map_gwas.map_columns_position(GWAS_link, args.gwas_info)
print(mapgw) print(mapgw)
gw_df = jp.map_gwas.read_gwas(GWAS_link, mapgw) gw_df = jp.map_gwas.read_gwas(GWAS_link, mapgw)
ref = pd.read_csv(REF_filename, header=None, sep= "\t", ref = pd.read_csv(args.ref_path, header=None, sep= "\t",
names =['chr', "pos", "snp_id", "ref", "alt", "MAF"], names =['chr', "pos", "snp_id", "ref", "alt", "MAF"],
index_col="snp_id") index_col="snp_id")
mgwas = jp.map_reference.map_on_ref_panel(gw_df, ref) mgwas = jp.map_reference.map_on_ref_panel(gw_df, ref)
mgwas = jp.map_reference.compute_snp_alignement(mgwas) mgwas = jp.map_reference.compute_snp_alignement(mgwas)
mgwas = jp.compute_score.compute_z_score(mgwas) mgwas = jp.compute_score.compute_z_score(mgwas)
mgwas = jp.compute_score.compute_sample_size(mgwas, diagnostic_folder, tag) mgwas = jp.compute_score.compute_sample_size(mgwas, args.diagnostic_folder, tag, args.percent_sample_size)
end = time.time() end = time.time()
print("Preprocessing of {0} in {1}s".format(tag, end-start)) print("Preprocessing of {0} in {1}s".format(tag, end-start))
jp.save_output.save_output_by_chromosome(mgwas, args.output_folder, tag)
jp.save_output.save_output_by_chromosome(mgwas, ImpG_output_Folder, tag) if(args.output_folder_1_file):
jp.save_output.save_output(mgwas, ldscore_format, tag) jp.save_output.save_output(mgwas, args.output_folder_1_file, tag)
def add_preprocessing_argument(): def add_preprocessing_argument():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--percent-sample-size', required=True, help= "the proportion of the 90th percentile of the sample size used to filter the SNPs")
parser.add_argument('--gwas-info', required=True, help= "Path to the file describing the format of the individual GWASs files") parser.add_argument('--gwas-info', required=True, help= "Path to the file describing the format of the individual GWASs files")
parser.add_argument('--ref-folder', required=True, help= "reference panel location (used to determine which snp to impute)") parser.add_argument('--gwas-filename', required=True, help= "Name of the raw GWAS file to standardize")
parser.add_argument('--gwas-folder', required=True, help= " Path to the folder containing the GWASs summ stat files, must end by '/'") parser.add_argument('--ref-path', required=True, help= "reference panel location (used to determine which snp to impute)")
parser.add_argument('--gwas-folder', required=True, help= "Path to the folder containing the GWASs summ stat files, must end by '/'")
parser.add_argument('--diagnostic-folder', required=True, help= "Path to the reporting information on the PreProcessing such as the SNPs sample size distribution")
parser.add_argument('--output-folder', required=True, help= "Location of main ouput folder for preprocessed GWAS files (splitted by chromosome)") parser.add_argument('--output-folder', required=True, help= "Location of main ouput folder for preprocessed GWAS files (splitted by chromosome)")
parser.add_argument('--output-folder-1-file', required=False, help= "optional location to store the preprocessing in one tabular file with one chromosome columns") parser.add_argument('--output-folder-1-file', required=False, help= "optional location to store the preprocessing in one tabular file with one chromosome columns")
parser.add_argument('--percent-sample-size', required=False, help= "the proportion (between 0 and 1) of the 90th percentile of the sample size used to filter the SNPs", default=0.7)
parser.set_defaults(func=launch_preprocessing) parser.set_defaults(func=launch_preprocessing)
return parser return parser
def main(): def main():
parser = add_preprocessing_argument() parser = add_preprocessing_argument()
args = parser.parse_args() args = parser.parse_args()
args.func(args) args.func(args)
......
...@@ -6,7 +6,7 @@ import scipy.stats as ss ...@@ -6,7 +6,7 @@ import scipy.stats as ss
import seaborn as sns import seaborn as sns
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
perSS = 0.7
def compute_z_score(mgwas): def compute_z_score(mgwas):
""" """
...@@ -27,7 +27,7 @@ def compute_z_score(mgwas): ...@@ -27,7 +27,7 @@ def compute_z_score(mgwas):
return mgwas return mgwas
def compute_sample_size(mgwas, diagnostic_folder, trait): def compute_sample_size(mgwas, diagnostic_folder, trait, perSS = 0.7):
if 'n' in mgwas.columns: if 'n' in mgwas.columns:
myN = mgwas.n myN = mgwas.n
......
...@@ -5,7 +5,7 @@ setup(name='jass_preprocessing', ...@@ -5,7 +5,7 @@ setup(name='jass_preprocessing',
description='Preprocess and standardize heterogeneous GWAS summary statistic for JASS', description='Preprocess and standardize heterogeneous GWAS summary statistic for JASS',
url='http:https://gitlab.pasteur.fr/statistical-genetics/JASS_Pre-processing', url='http:https://gitlab.pasteur.fr/statistical-genetics/JASS_Pre-processing',
author='Hugues Aschard, Hanna Julienne, Vincent Laville', author='Hugues Aschard, Hanna Julienne, Vincent Laville',
author_email='hugues.aschard@pasteur.fr', author_email='hanna.julienne@pasteur.fr',
license='MIT', license='MIT',
#package_dir = {'': 'jass_preprocessing'}, #package_dir = {'': 'jass_preprocessing'},
packages= ['jass_preprocessing'], packages= ['jass_preprocessing'],
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment