diff --git a/jass_preprocessing/jass_preprocessing/__init__.py b/jass_preprocessing/jass_preprocessing/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8669fc771e2bee31c822c9d69beb0f3e14bba93a
--- /dev/null
+++ b/jass_preprocessing/jass_preprocessing/__init__.py
@@ -0,0 +1,2 @@
+import jass_preprocessing.map_gwas.map_gwas
+import jass_preprocessing.dna_utils.dna_utils
diff --git a/jass_preprocessing/jass_preprocessing/__pycache__/__init__.cpython-35.pyc b/jass_preprocessing/jass_preprocessing/__pycache__/__init__.cpython-35.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..90cd37a5ba622284ffaaf5c15051c02e856fe0dd
Binary files /dev/null and b/jass_preprocessing/jass_preprocessing/__pycache__/__init__.cpython-35.pyc differ
diff --git a/jass_preprocessing/jass_preprocessing/dna_utils/dna_utils.py b/jass_preprocessing/jass_preprocessing/dna_utils/dna_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..a0524300aefc5713f7a72a75ede66a50c19c1ff5
--- /dev/null
+++ b/jass_preprocessing/jass_preprocessing/dna_utils/dna_utils.py
@@ -0,0 +1,15 @@
+
+def dna_complement_base(inputbase):
+    if (inputbase == 'A'):
+        return('T')
+    if (inputbase == 'T'):
+        return('A')
+    if (inputbase == 'G'):
+        return('C')
+    if (inputbase == 'C'):
+        return('G')
+    return('Not ATGC')
+
+
+def dna_complement(input):
+    return([dna_complement_base(x) for x in input])
diff --git a/jass_preprocessing/jass_preprocessing/map_gwas/__pycache__/map_gwas.cpython-35.pyc b/jass_preprocessing/jass_preprocessing/map_gwas/__pycache__/map_gwas.cpython-35.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a080559f1ba2ed9e2ae37219fc43f4397bf6141d
Binary files /dev/null and b/jass_preprocessing/jass_preprocessing/map_gwas/__pycache__/map_gwas.cpython-35.pyc differ
diff --git a/jass_preprocessing/jass_preprocessing/map_gwas/map_gwas.py b/jass_preprocessing/jass_preprocessing/map_gwas/map_gwas.py
new file mode 100644
index 0000000000000000000000000000000000000000..aef00bb1ea9bec4f6e56f0bf280751e31f49c6d1
--- /dev/null
+++ b/jass_preprocessing/jass_preprocessing/map_gwas/map_gwas.py
@@ -0,0 +1,31 @@
+import os
+import sys
+
+
+
+
+def walkfs(startdir, findfile):
+    dircount = 0
+    filecount = 0
+    for root, dirs, files in os.walk(startdir):
+        if findfile in files:
+            return dircount, filecount + files.index(findfile), os.path.join(root, findfile)
+        dircount += 1
+        filecount += len(files)
+    # nothing found, return None instead of a full path for the file
+    return dircount, filecount, None
+
+
+
+def gwas_internal_link(GWAS_table, GWAS_path):
+    """
+    Walk the GWAS path to find the GWAS tables
+    """
+    Glink = []
+
+    for GWAS in range(0, len(GWAS_table)):
+        GWAS_filename = GWAS_table[GWAS]
+        Glink.append({'filename': GWAS_filename,
+                  'internalDataLink': walkfs(GWAS_path, GWAS_filename)[2]})
+    Glink = pd.DataFrame(Glink, columns=('filename', 'internalDataLink'))
+    return Glink
diff --git a/jass_preprocessing/setup.py b/jass_preprocessing/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..1aaf7ec39b0e2e8a6157d63479cd51b79d78186d
--- /dev/null
+++ b/jass_preprocessing/setup.py
@@ -0,0 +1,11 @@
+from setuptools import setup
+
+setup(name='jass_preprocessing',
+      version='0.1',
+      description='Preprocess GWAS summary statistic for JASS',
+      url='http:https://gitlab.pasteur.fr/statistical-genetics/JASS_Pre-processing',
+      author='Hugues Aschard, Vincent Laville, Hanna Julienne',
+      author_email='hugues.aschard@pasteur.fr',
+      license='MIT',
+      packages=['jass_preprocessing'],
+      zip_safe=False)
diff --git a/pyPCMA_1_format_v1.4.py b/pyPCMA_1_format_v1.4.py
index 47307a55869f9b1a451f378b2882965ef6a4455f..4f9d07c2c5674037b399598d20ecaf0f1e303bfe 100755
--- a/pyPCMA_1_format_v1.4.py
+++ b/pyPCMA_1_format_v1.4.py
@@ -2,7 +2,7 @@
 Read raw GWAS summary statistics, filter and format
 Write clean GWAS
 """
-__updated__ = '2017-08-29'
+__updated__ = '2018-19-02'
 
 import h5py
 import numpy as np
@@ -14,9 +14,6 @@ import pandas as pd
 import matplotlib.pyplot as plt
 
 
-print('Number of arguments:', len(sys.argv), 'arguments.')
-print('Argument List:', str(sys.argv))
-
 perSS = 0.7
 netPath = "/mnt/atlas/"  # '/home/genstat/ATLAS/'
 #netPath       = '/pasteur/projets/policy01/'