diff --git a/doc/source/index.rst b/doc/source/index.rst
index 733fd0b36cef188589dd8826d06ac14a421f42ea..83387d811424fce8607e4741070f08c1c1c48550 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -70,7 +70,7 @@ Input
 |  1  |14930| rs75454623 |  A  |  G  | 0.482228|
 +-----+-----+------------+-----+-----+---------+
 
-* Folder containing all raw gwas data (all chromosomes in one file) (minimal conditions?? tab separated?)
+* Folder containing all raw gwas data : (all chromosomes in one file) (minimal conditions?? tab separated?)
 * a list containing the name of GWAS file to the string format.
 * A descriptor csv files that will described each GWAS summary statistic files:
 
@@ -84,7 +84,7 @@ Input
 +===========================================+============================================================+
 |             path to the data              |                            filename                        |
 +-------------------------------------------+------------------------------------------------------------+
-|            study info fields              | consortia,outcome,fullName,type,Nsample,Ncase,Ncontrol,Nsnp|
+|            study info fields              | Consortium,Outcome,fullName,type,Nsample,Ncase,Ncontrol,Nsnp|
 +-------------------------------------------+------------------------------------------------------------+
 |    names of the header in the GWAS file   |      snpid,a1,a2,freq,pval,n,z,OR,se,code,imp,ncas,ncont   |
 +-------------------------------------------+------------------------------------------------------------+
@@ -92,6 +92,7 @@ Input
 .. Give an example
 .. |               I don't know                 |                          altNcas,altNcont|
 
+Note that the combination of Consortium and outcome must be unique because it will be used as an index in the cleaning process.
 
 Here is an example of descriptor field, the field irrelevant (for example odd ratio for continuous trait) for the study must be filled with na. 
 
diff --git a/jass_preprocessing/__main__.py b/jass_preprocessing/__main__.py
index 112b75f532f5a0e0d2d4e545a2237b2dc4bd9e7a..212719155f6eab2fe4f712369ea62760a4de69a2 100644
--- a/jass_preprocessing/__main__.py
+++ b/jass_preprocessing/__main__.py
@@ -21,22 +21,32 @@ import argparse
 #| pathOUT | **unused in main_preprocessing.py**  | netPath+'PCMA/1._DATA/RAW.summary/'|
 #| ImpG_output_Folder | main ouput folder | netPath+ 'PCMA/1._DATA/preprocessing_test/' |
 
+def raise_duplicated_index(tag):
+    duplicated_index = tag.duplicated()
+    raise ValueError("'Consortium_Outcome' are duplicated for: {0}".format(duplicated_index))
 
 def launch_preprocessing(args):
     """
     Preprocessing GWAS dataset
     """
     gwas_map = pd.read_csv(args.gwas_info, sep="\t")
-    gwas_map.set_index("filename", inplace=True)
 
-    for gwas_filename in gwas_map.index:
-        tag = "{0}_{1}".format(gwas_map.loc[gwas_filename, 'Consortium'],
-                               gwas_map.loc[gwas_filename, 'Outcome'])
+    #define an unique
+    gwas_map['tag'] = gwas_map.Consortium+ "_" + D.Outcome
+
+    if gwas_map.tag.duplicated().any():
+        raise_duplicated_index(gwas_map.tag)
+
+    gwas_map.set_index("tag", inplace=True)
+
+    for tag in gwas_map.index:
+
+        gwas_filename = D.loc[tag, "filename"]
 
         print('processing GWAS: {}'.format(tag))
         start = time.time()
         GWAS_link = jp.map_gwas.walkfs(args.input_folder, gwas_filename)[2]
-        mapgw = jp.map_gwas.map_columns_position(GWAS_link, args.gwas_info)
+        mapgw = jp.map_gwas.map_columns_position(GWAS_link, gwas_map.loc[tag])
 
         gw_df = jp.map_gwas.read_gwas(GWAS_link, mapgw)
 
diff --git a/jass_preprocessing/map_gwas.py b/jass_preprocessing/map_gwas.py
index 3cf1a5cfe45c244e9076515d6149545c1ca636c1..901f609b6a50ccc0525e0ef0a849a00ec20b3571 100644
--- a/jass_preprocessing/map_gwas.py
+++ b/jass_preprocessing/map_gwas.py
@@ -76,21 +76,19 @@ def convert_missing_values(df):
     return df.replace(def_missing, nan_vec)
 
 
-def map_columns_position(gwas_internal_link,  GWAS_labels):
+def map_columns_position(gwas_internal_link,  my_labels):
     """
     Find column position for each specific Gwas
 
     Args:
         gwas_internal_link (str): filename of the GWAS data (with path)
-        GWAS_labels (str): filename of the csv information file
+        GWAS_labels (pd.DataFrame): corresponding row of the information file
 
     Return:
         pandas Series with column position and column names as index
     """
 
-    column_dict = pd.read_csv(GWAS_labels, sep='\t', na_values='na')
 
-    column_dict.set_index("filename", inplace=True)
     print(gwas_internal_link)
     gwas_file = gwas_internal_link.split('/')[-1]
     my_labels = column_dict.loc[gwas_file]