From 8f3a98d95e708efd901ed223535758b7dc5af987 Mon Sep 17 00:00:00 2001
From: hjulienn <hanna.julienne@pasteur.fr>
Date: Fri, 1 Oct 2021 14:26:57 +0200
Subject: [PATCH 1/4] FIX issue #8

---
 jass_preprocessing/map_gwas.py      |  6 ++++++
 jass_preprocessing/map_reference.py | 12 +++++++-----
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/jass_preprocessing/map_gwas.py b/jass_preprocessing/map_gwas.py
index 0fb8534..44da008 100644
--- a/jass_preprocessing/map_gwas.py
+++ b/jass_preprocessing/map_gwas.py
@@ -153,6 +153,12 @@ def read_gwas( gwas_internal_link, column_map, imputation_treshold=None):
                                                  'NA', 'NULL', 'NaN',
                                                  'nan', 'na', '.', '-'], dtype={"snpid":str, "a1":str,"a2":str,"freq":float, "z":float,"se":float, "pval":float})
     print(fullGWAS.head())
+    #Ensure that allele are written in upper cases:
+
+    fullGWAS.a1 = fullGWAS.a1.str.upper()
+    fullGWAS.a2 = fullGWAS.a2.str.upper()
+
+
     def sorted_alleles(x):
         return "".join(sorted(x))
     # either rs ID or full position must be available:
diff --git a/jass_preprocessing/map_reference.py b/jass_preprocessing/map_reference.py
index 2edef8d..f6ad235 100644
--- a/jass_preprocessing/map_reference.py
+++ b/jass_preprocessing/map_reference.py
@@ -21,7 +21,7 @@ def read_reference(gwas_reference_panel, mask_MHC=False, minimum_MAF=None, regio
     """
     ref = pd.read_csv(gwas_reference_panel, header=None, sep= "\t",
                       names =[ 'chr', "snp_id", "MAF","pos",  "ref", "alt"],
-                      dtype = {"chr": str, "snp_id":str, "MAF": np.float, "pos":np.int, "ref":str, "alt":str}, 
+                      dtype = {"chr": str, "snp_id":str, "MAF": np.float, "pos":np.int, "ref":str, "alt":str},
                        index_col="snp_id")
 
     def sorted_alleles(x):
@@ -81,10 +81,12 @@ def map_on_ref_panel(gw_df , ref_panel, index_type="rsid"):
             merge_GWAS.set_index("snp_id", inplace=True)
         else:
             raise ValueError("index_type can take only two values: 'rsid' or 'positional'")
-    if ((merge_GWAS.pos == merge_GWAS.POS).mean()> 0.95):
-        merge_GWAS = merge_GWAS.loc[(merge_GWAS.pos == merge_GWAS.POS)]
-    else:
-        raise ValueError("SNP positions in reference panel and in Summary statistic are different! Different assembly?")
+
+    if (("pos" in merge_GWAS.columns) and ("POS" in merge_GWAS.columns))
+        if (merge_GWAS.pos == merge_GWAS.POS).mean()> 0.95):
+            merge_GWAS = merge_GWAS.loc[(merge_GWAS.pos == merge_GWAS.POS)]
+        else:
+            raise ValueError("SNP positions in reference panel and in Summary statistic are different! Different assembly?")
 
     print("before filter")
     print(merge_GWAS.shape)
-- 
GitLab


From 14a70c94b441dd2c7fed9b3526b97c39d81ffc3f Mon Sep 17 00:00:00 2001
From: hjulienn <hanna.julienne@pasteur.fr>
Date: Fri, 1 Oct 2021 15:57:28 +0200
Subject: [PATCH 2/4] change index type option name

---
 jass_preprocessing/__main__.py      | 2 +-
 jass_preprocessing/map_reference.py | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/jass_preprocessing/__main__.py b/jass_preprocessing/__main__.py
index 6570db6..75e3880 100644
--- a/jass_preprocessing/__main__.py
+++ b/jass_preprocessing/__main__.py
@@ -95,7 +95,7 @@ def add_preprocessing_argument():
     parser.add_argument('--additional-masked-region', required=False, help= "List of dictionary containing coordinate of region to mask. For example :[{'chr':6, 'start':50000000, 'end': 70000000}, {'chr':6, 'start':100000000, 'end': 120000000}]", default='None')
 
     parser.add_argument('--imputation-quality-treshold', required=False, help= "minimum imputation quality in summary statistics", default='None')
-    parser.add_argument('--index-type', required=False, help= "type of index : rsID or chr:postion:ref_allele:alt_allele", default='rsid')
+    parser.add_argument('--index-type', required=False, help= "type of index : rsID or chr:postion:ref_allele:alt_allele", default='rs-number')
 
     parser.set_defaults(func=launch_preprocessing)
 
diff --git a/jass_preprocessing/map_reference.py b/jass_preprocessing/map_reference.py
index f6ad235..8210e0a 100644
--- a/jass_preprocessing/map_reference.py
+++ b/jass_preprocessing/map_reference.py
@@ -15,7 +15,7 @@ def read_reference(gwas_reference_panel, mask_MHC=False, minimum_MAF=None, regio
         Filter the reference panel by  minimum allele frequency (hg19 coordinate)
         minimum_MAF (float): minimum allele frequency for a SNPs to be retain in the panel
         region_to_mask (dict): a list of additional regions to mask
-        type_of_index(str): 'rsid' or 'positional'
+        type_of_index(str): 'rs-number' or 'positional'
     Return:
         ref (pandas dataframe): the reference_panel with the specified filter applied
     """
@@ -44,7 +44,7 @@ def read_reference(gwas_reference_panel, mask_MHC=False, minimum_MAF=None, regio
 
 
 
-def map_on_ref_panel(gw_df , ref_panel, index_type="rsid"):
+def map_on_ref_panel(gw_df , ref_panel, index_type="rs-number"):
     """
     Merge Gwas dataframe with the reference panel
     Make sure that the same SNPs are in the reference panel and the gwas
@@ -58,7 +58,7 @@ def map_on_ref_panel(gw_df , ref_panel, index_type="rsid"):
     """
 
 
-    if index_type=="rsid":
+    if index_type=="rs-number":
         merge_GWAS = pd.merge(ref_panel, gw_df,
                             how='inner', indicator=True, left_index=True, right_index=True)
         print("SNPs {}".format(merge_GWAS.shape[0]))
@@ -80,7 +80,7 @@ def map_on_ref_panel(gw_df , ref_panel, index_type="rsid"):
             print(merge_GWAS)
             merge_GWAS.set_index("snp_id", inplace=True)
         else:
-            raise ValueError("index_type can take only two values: 'rsid' or 'positional'")
+            raise ValueError("index_type can take only two values: 'rs-number' or 'positional'")
 
     if (("pos" in merge_GWAS.columns) and ("POS" in merge_GWAS.columns))
         if (merge_GWAS.pos == merge_GWAS.POS).mean()> 0.95):
-- 
GitLab


From 11ee29ad1e20eb5b5e9a0774c457eb74160736f5 Mon Sep 17 00:00:00 2001
From: hjulienn <hanna.julienne@pasteur.fr>
Date: Fri, 1 Oct 2021 16:09:24 +0200
Subject: [PATCH 3/4] typo

---
 jass_preprocessing/map_reference.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jass_preprocessing/map_reference.py b/jass_preprocessing/map_reference.py
index 8210e0a..1949b1b 100644
--- a/jass_preprocessing/map_reference.py
+++ b/jass_preprocessing/map_reference.py
@@ -82,7 +82,7 @@ def map_on_ref_panel(gw_df , ref_panel, index_type="rs-number"):
         else:
             raise ValueError("index_type can take only two values: 'rs-number' or 'positional'")
 
-    if (("pos" in merge_GWAS.columns) and ("POS" in merge_GWAS.columns))
+    if (("pos" in merge_GWAS.columns) and ("POS" in merge_GWAS.columns)):
         if (merge_GWAS.pos == merge_GWAS.POS).mean()> 0.95):
             merge_GWAS = merge_GWAS.loc[(merge_GWAS.pos == merge_GWAS.POS)]
         else:
-- 
GitLab


From c5ca18bf5112ee9acf6e1fe8766f6d41964a1deb Mon Sep 17 00:00:00 2001
From: hjulienn <hanna.julienne@pasteur.fr>
Date: Fri, 1 Oct 2021 16:15:49 +0200
Subject: [PATCH 4/4] typo

---
 jass_preprocessing/map_reference.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jass_preprocessing/map_reference.py b/jass_preprocessing/map_reference.py
index 1949b1b..2b676aa 100644
--- a/jass_preprocessing/map_reference.py
+++ b/jass_preprocessing/map_reference.py
@@ -83,7 +83,7 @@ def map_on_ref_panel(gw_df , ref_panel, index_type="rs-number"):
             raise ValueError("index_type can take only two values: 'rs-number' or 'positional'")
 
     if (("pos" in merge_GWAS.columns) and ("POS" in merge_GWAS.columns)):
-        if (merge_GWAS.pos == merge_GWAS.POS).mean()> 0.95):
+        if ((merge_GWAS.pos == merge_GWAS.POS).mean()> 0.95):
             merge_GWAS = merge_GWAS.loc[(merge_GWAS.pos == merge_GWAS.POS)]
         else:
             raise ValueError("SNP positions in reference panel and in Summary statistic are different! Different assembly?")
-- 
GitLab