diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..a062d1a52502392f00d44b645e6535bf3fdcefbb
--- /dev/null
+++ b/README.md
@@ -0,0 +1,33 @@
+
+
+# Preprocessing package for cleaning and formating data for JASS
+
+This helper package convert heterogeneous GWAS summary statistic format to
+the IMPG input format.
+
+## Installation
+
+*Requirement* :
+
+## Usage
+
+Once installed the preprocessing can be launch in the following manner:
+
+
+
+## reference panel format:
+
+The user is expected to provide a reference panel with the following
+column naming and column types:
+
+| chr | pos | snp_id | ref | alt | MAF |
+|-----|-----|--------|-----|-----|-----|
+|1	|13116	|rs62635286 |	T|	G|	0.0970447|
+|1	|13118	|rs200579949 |A	|G	|0.0970447|
+|1	|14604	|rs541940975 |A	|G	|0.147564|
+|1	|14930	|rs75454623	|A	|G	|0.482228|
+
+***
+
+## Output format
+['rsID', 'pos', 'A0', "A1", "Z" ]
diff --git a/jass_preprocessing/jass_preprocessing/compute_score/compute.py b/jass_preprocessing/jass_preprocessing/compute_score/compute.py
index 841eba01dea572d3a506b4651ecf8a47cb1fbc1f..fd29ff24ac0d1c198a0e7812d8f75b1dddf11a4c 100644
--- a/jass_preprocessing/jass_preprocessing/compute_score/compute.py
+++ b/jass_preprocessing/jass_preprocessing/compute_score/compute.py
@@ -54,12 +54,14 @@ def compute_sample_size(mgwas, diagnostic_folder, trait):
     ss_thres = perSS * myW_thres
     mgwas["computed_N"] = myN
     plt.clf()
-    p1 = sns.distplot(mgwas.computed_N)
+    p1 = sns.distplot(mgwas.computed_N[~mgwas.computed_N.isna()])
     p1.axvline(x=ss_thres)
     fo = "{0}/Sample_size_distribution_{1}.png".format(diagnostic_folder, trait)
     p1.figure.savefig(fo)
     # Filter SNP with a too small sample _SampleSize
     print("NSNP before sample size filtering: {}".format(mgwas.shape[0]))
     mgwas = mgwas.loc[(myN >= ss_thres)]
+    mgwas = mgwas.loc[~mgwas.computed_N.isna()]
+
     print("NSNP after sample size filtering: {}".format(mgwas.shape[0]))
     return(mgwas)
diff --git a/jass_preprocessing/jass_preprocessing/map_gwas/map_gwas.py b/jass_preprocessing/jass_preprocessing/map_gwas/map_gwas.py
index aca29dd1cbd22786f8ebd1da67203bcd4db5bf91..b906d03c807a6d92a05ceda24a81c25d131fcfa2 100644
--- a/jass_preprocessing/jass_preprocessing/map_gwas/map_gwas.py
+++ b/jass_preprocessing/jass_preprocessing/map_gwas/map_gwas.py
@@ -36,7 +36,7 @@ def convert_missing_values(df):
     def_missing = ['', '#N/A', '#N/A', 'N/A', '#NA', '-1.#IND', '-1.#QNAN',
                    '-NaN',
                    '-nan', '1.#IND', '1.#QNAN', 'N/A', 'NA', 'NULL', 'NaN',
-                    'nan', 'na', '.']
+                   'nan', 'na', '.']
 
     nmissing = len(def_missing)
     nan_vec = ["NA"] * nmissing
@@ -47,35 +47,51 @@ def map_columns_position(gwas_internal_link,  GWAS_labels):
     """
     Find column position for each specific Gwas
     """
-    column_dict = pd.read_csv(GWAS_labels, sep='\t', na_values='na')
+    column_dict = pd.read_csv(GWAS_labels, sep='\t', na_values='na', index_col=0)
+
     gwas_file = gwas_internal_link.split('/')[-1]
-    my_labels = column_dict[column_dict['filename'] == gwas_file]
-    target_lab = my_labels.values.tolist()[0]
-    f = open(gwas_internal_link)
 
+    my_labels = column_dict.loc[gwas_file]
+    column_dict.head()
+    #Our standart labels:
+    reference_label = column_dict.columns.tolist()
+    # labels in the GWAS files
+    target_lab = pd.Index(my_labels.values.tolist())
+
+    f = open(gwas_internal_link)
     count_line = 0
     line = f.readline()
-    header = line.split()
-    list_col = {}
-    list_lab = {}
-    for l in range(0, len(target_lab)):
-        for h in range(0, len(header)):
-            if header[h] == target_lab[l]:
-                list_lab[my_labels.columns.tolist()[l]] = h
-                list_col[h] = my_labels.columns.tolist()[l]
-    return {'label_position' : list_col, 'position_label': list_lab}
-
-
-def read_gwas( gwas_internal_link, column_dict):
+    print(line)
+    header = pd.Index(line.split())
+
+    def get_position(I,x):
+        try:
+            return I.get_loc(x)
+        except KeyError:
+            return np.nan
+    label_position = [get_position(header,i) for i in target_lab]
+
+
+    mapgw = pd.Series(label_position, index=reference_label)
+    mapgw = mapgw.loc[~mapgw.isna()].astype(int)
+    mapgw.sort_values(inplace=True)
+    print(mapgw)
+    f.close()
+    return mapgw
+
+def read_gwas( gwas_internal_link, column_map):
     """
     Read gwas Chromosome and rename columns in our standart
     """
 
     fullGWAS = pd.read_csv(gwas_internal_link, delim_whitespace=True,
-                               usecols=column_dict['label_position'].keys(),
+                               usecols = column_map.values, #column_dict['label_position'].keys(),
+                               names= column_map.index,
                                 index_col=0,
-                                names=column_dict['label_position'].values(),
-                                 header=0)
+                                 header=0, na_values= ['', '#N/A', '#N/A', 'N/A', '#NA', '-1.#IND', '-1.#QNAN',
+                                                 '-NaN',
+                                                 '-nan', '1.#IND', '1.#QNAN', 'N/A', 'NA', 'NULL', 'NaN',
+                                                 'nan', 'na', '.'])
 
     fullGWAS = fullGWAS[~fullGWAS.index.duplicated(keep='first')]
     #fullGWAS = convert_missing_values(fullGWAS)
diff --git a/jass_preprocessing/jass_preprocessing/map_reference/map_reference.py b/jass_preprocessing/jass_preprocessing/map_reference/map_reference.py
index 26f1ae2829f72cfe6e9ee460e64efd4675ee84f7..bb049bcab99a7eb3eb2c6c1feed0a55379ce90e3 100644
--- a/jass_preprocessing/jass_preprocessing/map_reference/map_reference.py
+++ b/jass_preprocessing/jass_preprocessing/map_reference/map_reference.py
@@ -36,7 +36,7 @@ def map_on_ref_panel(gw_df , ref_panel):
         merge_GWAS = pd.concat([other_snp, merge_GWAS])
 
     merge_GWAS = merge_GWAS[~merge_GWAS.index.duplicated(keep='first')]
-    
+
     merge_GWAS.index.rename("snp_id", inplace=True)
     return(merge_GWAS)
 
@@ -80,6 +80,10 @@ def compute_snp_alignement(mgwas):
         mgwas: a pandas dataframe of the GWAS data merged
          with the reference panel
     """
+    #ensure that allele are upper cases:
+
+    mgwas['a1'] = mgwas.a1.str.upper()
+    mgwas['a2'] = mgwas.a2.str.upper()
 
     mgwas['a1c'] = dna_u.dna_complement(mgwas.a1)
     mgwas['a2c'] = dna_u.dna_complement(mgwas.a2)
diff --git a/main_preprocessing.py b/main_preprocessing.py
index 8b719f8e4437a66a0484d466d079153e3d33a570..e4248c2ae2c652334a9fd78f27888731aa9dce28 100644
--- a/main_preprocessing.py
+++ b/main_preprocessing.py
@@ -26,31 +26,29 @@ ldscore_format="/mnt/atlas/PCMA/1._DATA/ldscore_data/"
 REF_filename = netPath+'PCMA/0._REF/1KGENOME/summary_genome_Filter_part2.out'
 pathOUT = netPath+'PCMA/1._DATA/RAW.summary/'
 
-outFileName = netPath+'PCMA/1._DATA/ZSCORE_merged_ALL_NO_strand_ambiguous.hdf5'
-def_missing = ['', '#N/A', '#N/A', 'N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN',
-               '-nan', '1.#IND', '1.#QNAN', 'N/A', 'NA', 'NULL', 'NaN', 'nan',
-               'na', '.']
-
-out_summary = "summary_GWAS.csv"
 ImpG_output_Folder = netPath+ 'PCMA/1._DATA/preprocessing_test/'
 gwas_map = pd.read_csv(GWAS_labels, sep="\t", index_col=0)
-gwas_map.index
-GWAS_table = gwas_map.index["GIANT_2015_HIP_COMBINED_EUR.txt"]#"EUR.CD.gwas_filtered.assoc"]
+gwas_map
+#GWAS_table = gwas_map.index[22:]#["GIANT_2015_HIP_COMBINED_EUR.txt"]#"EUR.CD.gwas_filtered.assoc"]
+#GWAS_table[5:]
 # "GWAS_DBP_recoded.txt","GWAS_MAP_recoded.txt", #"GWAS_SBP_recoded_dummy.txt"
 #              "GWAS_PP_recoded.txt","GWAS_SBP_recoded.txt",
 #              "GIANT_2015_HIP_COMBINED_EUR.txt",
 #              ]
-for GWAS_filename in GWAS_table.index:
+
+gwas_map
+GWAS_table = ["Menopause_HapMap_for_website_18112015.txt"] # gwas_map.index#["gabriel_asthma_dummy.txt"]
+for GWAS_filename in GWAS_table:
 
     tag = "{0}_{1}".format(gwas_map.loc[GWAS_filename, 'consortia'],
                            gwas_map.loc[GWAS_filename, 'outcome'])
     print('processing GWAS: {}'.format(tag))
     start = time.time()
     gwas = jp.map_gwas.gwas_internal_link(GWAS_table, GWAS_path)
-    column_dict = pd.read_csv(GWAS_labels, sep='\t', na_values='na')
 
     GWAS_link = jp.map_gwas.walkfs(GWAS_path, GWAS_filename)[2]
     mapgw = jp.map_gwas.map_columns_position(GWAS_link, GWAS_labels)
+    print(mapgw)
 
     gw_df = jp.map_gwas.read_gwas(GWAS_link, mapgw)
 
@@ -58,21 +56,28 @@ for GWAS_filename in GWAS_table.index:
                       names =['chr', "pos", "snp_id", "ref", "alt", "MAF"],
                        index_col="snp_id")
 
-    if gw_df.index.map(str).str.contains("^chr*", case=False).any():
-        ref['key2'] = "chr"+ref.chr.map(str) + ":" +ref.pos.map(str)
-        other_snp = pd.merge(ref, gw_df, how='inner', indicator=True,
-                             left_on ='key2', left_index=False, right_index=True)
 
     mgwas = jp.map_reference.map_on_ref_panel(gw_df, ref)
+
     mgwas = jp.map_reference.compute_snp_alignement(mgwas)
     mgwas = jp.compute_score.compute_z_score(mgwas)
 
     mgwas = jp.compute_score.compute_sample_size(mgwas, diagnostic_folder, tag)
     end = time.time()
+
     print("Preprocessing of {0} in {1}s".format(tag, end-start))
 
     jp.save_output.save_output_by_chromosome(mgwas, ImpG_output_Folder, tag)
     jp.save_output.save_output(mgwas, ldscore_format, tag)
 
-mgwas.reset_index(inplace=True)
-mgwas.sort_values(['chr', "pos"]).head(100)
+mapgw.sort_values(inplace=True)
+mgwas.head()
+
+GWAS_labels
+pd.read_csv(GWAS_labels, sep='\t', na_values='na', index_col=0)
+
+mapgw.head()
+
+GWAS_path
+GWAS_labels
+mapgw