From 6f72006d723806d28e426e4a989bf7d451d6b831 Mon Sep 17 00:00:00 2001 From: hanna julienne <hanna.julienne@pasteur.fr> Date: Tue, 19 Nov 2019 11:47:07 +0100 Subject: [PATCH] corrected index system to treat correctly GWAS sharing the same summary statistics file --- jass_preprocessing/__main__.py | 4 ++-- jass_preprocessing/map_gwas.py | 12 +++++------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/jass_preprocessing/__main__.py b/jass_preprocessing/__main__.py index 2127191..6c989c0 100644 --- a/jass_preprocessing/__main__.py +++ b/jass_preprocessing/__main__.py @@ -32,7 +32,7 @@ def launch_preprocessing(args): gwas_map = pd.read_csv(args.gwas_info, sep="\t") #define an unique - gwas_map['tag'] = gwas_map.Consortium+ "_" + D.Outcome + gwas_map['tag'] = gwas_map.Consortium+ "_" + gwas_map.Outcome if gwas_map.tag.duplicated().any(): raise_duplicated_index(gwas_map.tag) @@ -41,7 +41,7 @@ def launch_preprocessing(args): for tag in gwas_map.index: - gwas_filename = D.loc[tag, "filename"] + gwas_filename = gwas_map.loc[tag, "filename"] print('processing GWAS: {}'.format(tag)) start = time.time() diff --git a/jass_preprocessing/map_gwas.py b/jass_preprocessing/map_gwas.py index 901f609..0f5c35b 100644 --- a/jass_preprocessing/map_gwas.py +++ b/jass_preprocessing/map_gwas.py @@ -76,7 +76,7 @@ def convert_missing_values(df): return df.replace(def_missing, nan_vec) -def map_columns_position(gwas_internal_link, my_labels): +def map_columns_position(gwas_internal_link, column_dict): """ Find column position for each specific Gwas @@ -87,16 +87,14 @@ def map_columns_position(gwas_internal_link, my_labels): Return: pandas Series with column position and column names as index """ - - print(gwas_internal_link) gwas_file = gwas_internal_link.split('/')[-1] - my_labels = column_dict.loc[gwas_file] - #Our standart labels: - reference_label = column_dict.columns.tolist() + + reference_label = column_dict.index.tolist() + print(reference_label) # labels in the GWAS files - target_lab = pd.Index(my_labels.values.tolist()) + target_lab = pd.Index(column_dict.values.tolist()) is_gzipped = re.search(r".gz$", gwas_internal_link) if is_gzipped: f = gzip.open(gwas_internal_link) -- GitLab