From 8d414641cfefe6b26d66a3db2dd9db051249ffe0 Mon Sep 17 00:00:00 2001
From: hanna julienne <hanna.julienne@pasteur.fr>
Date: Mon, 19 Feb 2018 17:54:40 +0100
Subject: [PATCH] Structuring preprocessing in a package

---
 .../jass_preprocessing/__init__.py            |   2 ++
 .../__pycache__/__init__.cpython-35.pyc       | Bin 0 -> 253 bytes
 .../jass_preprocessing/dna_utils/dna_utils.py |  15 +++++++++
 .../__pycache__/map_gwas.cpython-35.pyc       | Bin 0 -> 576 bytes
 .../jass_preprocessing/map_gwas/map_gwas.py   |  31 ++++++++++++++++++
 jass_preprocessing/setup.py                   |  11 +++++++
 pyPCMA_1_format_v1.4.py                       |   5 +--
 7 files changed, 60 insertions(+), 4 deletions(-)
 create mode 100644 jass_preprocessing/jass_preprocessing/__init__.py
 create mode 100644 jass_preprocessing/jass_preprocessing/__pycache__/__init__.cpython-35.pyc
 create mode 100644 jass_preprocessing/jass_preprocessing/dna_utils/dna_utils.py
 create mode 100644 jass_preprocessing/jass_preprocessing/map_gwas/__pycache__/map_gwas.cpython-35.pyc
 create mode 100644 jass_preprocessing/jass_preprocessing/map_gwas/map_gwas.py
 create mode 100644 jass_preprocessing/setup.py

diff --git a/jass_preprocessing/jass_preprocessing/__init__.py b/jass_preprocessing/jass_preprocessing/__init__.py
new file mode 100644
index 0000000..8669fc7
--- /dev/null
+++ b/jass_preprocessing/jass_preprocessing/__init__.py
@@ -0,0 +1,2 @@
+import jass_preprocessing.map_gwas.map_gwas
+import jass_preprocessing.dna_utils.dna_utils
diff --git a/jass_preprocessing/jass_preprocessing/__pycache__/__init__.cpython-35.pyc b/jass_preprocessing/jass_preprocessing/__pycache__/__init__.cpython-35.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..90cd37a5ba622284ffaaf5c15051c02e856fe0dd
GIT binary patch
literal 253
zcmWgR<>mVLt1C)}fq~&M0}^0jU|?`yU|=W~U|?WKVPHsMWXNG)h++gWf;E|5f|UAc
zGDWFmB^DRQ7Zjxy6y+zU78hscrR(J;7R0BQCl<pQw}dcNtz;--W?+C2zmoJb@^e%5
z(^K<`OA<@;JreWs67>U$^0QKtON#Zq9D{@71Bz005q9Wf8i-9OK0Y%qvm`!Vub}c4
ThfQvNN@-529V5tFAol|R|F}(C

literal 0
HcmV?d00001

diff --git a/jass_preprocessing/jass_preprocessing/dna_utils/dna_utils.py b/jass_preprocessing/jass_preprocessing/dna_utils/dna_utils.py
new file mode 100644
index 0000000..a052430
--- /dev/null
+++ b/jass_preprocessing/jass_preprocessing/dna_utils/dna_utils.py
@@ -0,0 +1,15 @@
+
+def dna_complement_base(inputbase):
+    if (inputbase == 'A'):
+        return('T')
+    if (inputbase == 'T'):
+        return('A')
+    if (inputbase == 'G'):
+        return('C')
+    if (inputbase == 'C'):
+        return('G')
+    return('Not ATGC')
+
+
+def dna_complement(input):
+    return([dna_complement_base(x) for x in input])
diff --git a/jass_preprocessing/jass_preprocessing/map_gwas/__pycache__/map_gwas.cpython-35.pyc b/jass_preprocessing/jass_preprocessing/map_gwas/__pycache__/map_gwas.cpython-35.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a080559f1ba2ed9e2ae37219fc43f4397bf6141d
GIT binary patch
literal 576
zcmWgR<>jjW*%h^%k%8ec0}^0jU|?`yU|=ZLU|?WKVPHsMWXNG)h=Q{j8KM{&QkWQ0
zm>F6a7^0X!3W7CRUV@D9O9tr%vDg_H7}yvX7@R>Sb%RV|WT*wJtz~AY$Y&^FV8~)*
zs9|7eW@K=Qjbn&mW~gOhsAXlSWn-veWT;_d$Yx_GN?@pAVyIyTakH2};?0Z<;vlWe
z3|TA;H7sBuCWbU-hTs||kfY3N!Rkv`L0Xy_8O&>$846_>!Xd__Fo2b6GW!)VGB7Z_
z1QD8Sx0v#aZ?Tjo=49Vu&CE+lt+>TfkXVv&izO>RGw&92PHLVedlW};Nn%k+N@h_M
zM_OiHN?K-4Y7|FGW>Io}X<kVbCx{Pb-eM`r&o8;fl9E|ee2Wz%T)dK@h=+lJ;a9eP
zMt*LpetK$NaY<r{zDHtSUZQ?LQGQlxa!Ikimt$~nd_YmEZb4Cga%ypLW?s5}R$_5+
zd_hqvexcmNg820E#A1CIL$9Fn78}SvX~m#uC>8)k2qPOK8>2iU52Gd{7o!MM5g!8s
ugC=tkGXn#|E#~6NVz2<%p-2QTNS4DUH$SB`C)ExVT*WL53=BMsJgfk_4~Q%P

literal 0
HcmV?d00001

diff --git a/jass_preprocessing/jass_preprocessing/map_gwas/map_gwas.py b/jass_preprocessing/jass_preprocessing/map_gwas/map_gwas.py
new file mode 100644
index 0000000..aef00bb
--- /dev/null
+++ b/jass_preprocessing/jass_preprocessing/map_gwas/map_gwas.py
@@ -0,0 +1,31 @@
+import os
+import sys
+
+
+
+
+def walkfs(startdir, findfile):
+    dircount = 0
+    filecount = 0
+    for root, dirs, files in os.walk(startdir):
+        if findfile in files:
+            return dircount, filecount + files.index(findfile), os.path.join(root, findfile)
+        dircount += 1
+        filecount += len(files)
+    # nothing found, return None instead of a full path for the file
+    return dircount, filecount, None
+
+
+
+def gwas_internal_link(GWAS_table, GWAS_path):
+    """
+    Walk the GWAS path to find the GWAS tables
+    """
+    Glink = []
+
+    for GWAS in range(0, len(GWAS_table)):
+        GWAS_filename = GWAS_table[GWAS]
+        Glink.append({'filename': GWAS_filename,
+                  'internalDataLink': walkfs(GWAS_path, GWAS_filename)[2]})
+    Glink = pd.DataFrame(Glink, columns=('filename', 'internalDataLink'))
+    return Glink
diff --git a/jass_preprocessing/setup.py b/jass_preprocessing/setup.py
new file mode 100644
index 0000000..1aaf7ec
--- /dev/null
+++ b/jass_preprocessing/setup.py
@@ -0,0 +1,11 @@
+from setuptools import setup
+
+setup(name='jass_preprocessing',
+      version='0.1',
+      description='Preprocess GWAS summary statistic for JASS',
+      url='http:https://gitlab.pasteur.fr/statistical-genetics/JASS_Pre-processing',
+      author='Hugues Aschard, Vincent Laville, Hanna Julienne',
+      author_email='hugues.aschard@pasteur.fr',
+      license='MIT',
+      packages=['jass_preprocessing'],
+      zip_safe=False)
diff --git a/pyPCMA_1_format_v1.4.py b/pyPCMA_1_format_v1.4.py
index 47307a5..4f9d07c 100755
--- a/pyPCMA_1_format_v1.4.py
+++ b/pyPCMA_1_format_v1.4.py
@@ -2,7 +2,7 @@
 Read raw GWAS summary statistics, filter and format
 Write clean GWAS
 """
-__updated__ = '2017-08-29'
+__updated__ = '2018-19-02'
 
 import h5py
 import numpy as np
@@ -14,9 +14,6 @@ import pandas as pd
 import matplotlib.pyplot as plt
 
 
-print('Number of arguments:', len(sys.argv), 'arguments.')
-print('Argument List:', str(sys.argv))
-
 perSS = 0.7
 netPath = "/mnt/atlas/"  # '/home/genstat/ATLAS/'
 #netPath       = '/pasteur/projets/policy01/'
-- 
GitLab