From 6f72006d723806d28e426e4a989bf7d451d6b831 Mon Sep 17 00:00:00 2001
From: hanna julienne <hanna.julienne@pasteur.fr>
Date: Tue, 19 Nov 2019 11:47:07 +0100
Subject: [PATCH] corrected index system to treat correctly GWAS sharing the
 same summary statistics file

---
 jass_preprocessing/__main__.py |  4 ++--
 jass_preprocessing/map_gwas.py | 12 +++++-------
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/jass_preprocessing/__main__.py b/jass_preprocessing/__main__.py
index 2127191..6c989c0 100644
--- a/jass_preprocessing/__main__.py
+++ b/jass_preprocessing/__main__.py
@@ -32,7 +32,7 @@ def launch_preprocessing(args):
     gwas_map = pd.read_csv(args.gwas_info, sep="\t")
 
     #define an unique
-    gwas_map['tag'] = gwas_map.Consortium+ "_" + D.Outcome
+    gwas_map['tag'] = gwas_map.Consortium+ "_" + gwas_map.Outcome
 
     if gwas_map.tag.duplicated().any():
         raise_duplicated_index(gwas_map.tag)
@@ -41,7 +41,7 @@ def launch_preprocessing(args):
 
     for tag in gwas_map.index:
 
-        gwas_filename = D.loc[tag, "filename"]
+        gwas_filename = gwas_map.loc[tag, "filename"]
 
         print('processing GWAS: {}'.format(tag))
         start = time.time()
diff --git a/jass_preprocessing/map_gwas.py b/jass_preprocessing/map_gwas.py
index 901f609..0f5c35b 100644
--- a/jass_preprocessing/map_gwas.py
+++ b/jass_preprocessing/map_gwas.py
@@ -76,7 +76,7 @@ def convert_missing_values(df):
     return df.replace(def_missing, nan_vec)
 
 
-def map_columns_position(gwas_internal_link,  my_labels):
+def map_columns_position(gwas_internal_link,  column_dict):
     """
     Find column position for each specific Gwas
 
@@ -87,16 +87,14 @@ def map_columns_position(gwas_internal_link,  my_labels):
     Return:
         pandas Series with column position and column names as index
     """
-
-
     print(gwas_internal_link)
     gwas_file = gwas_internal_link.split('/')[-1]
-    my_labels = column_dict.loc[gwas_file]
-
     #Our standart labels:
-    reference_label = column_dict.columns.tolist()
+
+    reference_label = column_dict.index.tolist()
+    print(reference_label)
     # labels in the GWAS files
-    target_lab = pd.Index(my_labels.values.tolist())
+    target_lab = pd.Index(column_dict.values.tolist())
     is_gzipped = re.search(r".gz$", gwas_internal_link)
     if is_gzipped:
         f = gzip.open(gwas_internal_link)
-- 
GitLab