From 8d414641cfefe6b26d66a3db2dd9db051249ffe0 Mon Sep 17 00:00:00 2001 From: hanna julienne <hanna.julienne@pasteur.fr> Date: Mon, 19 Feb 2018 17:54:40 +0100 Subject: [PATCH] Structuring preprocessing in a package --- .../jass_preprocessing/__init__.py | 2 ++ .../__pycache__/__init__.cpython-35.pyc | Bin 0 -> 253 bytes .../jass_preprocessing/dna_utils/dna_utils.py | 15 +++++++++ .../__pycache__/map_gwas.cpython-35.pyc | Bin 0 -> 576 bytes .../jass_preprocessing/map_gwas/map_gwas.py | 31 ++++++++++++++++++ jass_preprocessing/setup.py | 11 +++++++ pyPCMA_1_format_v1.4.py | 5 +-- 7 files changed, 60 insertions(+), 4 deletions(-) create mode 100644 jass_preprocessing/jass_preprocessing/__init__.py create mode 100644 jass_preprocessing/jass_preprocessing/__pycache__/__init__.cpython-35.pyc create mode 100644 jass_preprocessing/jass_preprocessing/dna_utils/dna_utils.py create mode 100644 jass_preprocessing/jass_preprocessing/map_gwas/__pycache__/map_gwas.cpython-35.pyc create mode 100644 jass_preprocessing/jass_preprocessing/map_gwas/map_gwas.py create mode 100644 jass_preprocessing/setup.py diff --git a/jass_preprocessing/jass_preprocessing/__init__.py b/jass_preprocessing/jass_preprocessing/__init__.py new file mode 100644 index 0000000..8669fc7 --- /dev/null +++ b/jass_preprocessing/jass_preprocessing/__init__.py @@ -0,0 +1,2 @@ +import jass_preprocessing.map_gwas.map_gwas +import jass_preprocessing.dna_utils.dna_utils diff --git a/jass_preprocessing/jass_preprocessing/__pycache__/__init__.cpython-35.pyc b/jass_preprocessing/jass_preprocessing/__pycache__/__init__.cpython-35.pyc new file mode 100644 index 0000000000000000000000000000000000000000..90cd37a5ba622284ffaaf5c15051c02e856fe0dd GIT binary patch literal 253 zcmWgR<>mVLt1C)}fq~&M0}^0jU|?`yU|=W~U|?WKVPHsMWXNG)h++gWf;E|5f|UAc zGDWFmB^DRQ7Zjxy6y+zU78hscrR(J;7R0BQCl<pQw}dcNtz;--W?+C2zmoJb@^e%5 z(^K<`OA<@;JreWs67>U$^0QKtON#Zq9D{@71Bz005q9Wf8i-9OK0Y%qvm`!Vub}c4 ThfQvNN@-529V5tFAol|R|F}(C literal 0 HcmV?d00001 diff --git a/jass_preprocessing/jass_preprocessing/dna_utils/dna_utils.py b/jass_preprocessing/jass_preprocessing/dna_utils/dna_utils.py new file mode 100644 index 0000000..a052430 --- /dev/null +++ b/jass_preprocessing/jass_preprocessing/dna_utils/dna_utils.py @@ -0,0 +1,15 @@ + +def dna_complement_base(inputbase): + if (inputbase == 'A'): + return('T') + if (inputbase == 'T'): + return('A') + if (inputbase == 'G'): + return('C') + if (inputbase == 'C'): + return('G') + return('Not ATGC') + + +def dna_complement(input): + return([dna_complement_base(x) for x in input]) diff --git a/jass_preprocessing/jass_preprocessing/map_gwas/__pycache__/map_gwas.cpython-35.pyc b/jass_preprocessing/jass_preprocessing/map_gwas/__pycache__/map_gwas.cpython-35.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a080559f1ba2ed9e2ae37219fc43f4397bf6141d GIT binary patch literal 576 zcmWgR<>jjW*%h^%k%8ec0}^0jU|?`yU|=ZLU|?WKVPHsMWXNG)h=Q{j8KM{&QkWQ0 zm>F6a7^0X!3W7CRUV@D9O9tr%vDg_H7}yvX7@R>Sb%RV|WT*wJtz~AY$Y&^FV8~)* zs9|7eW@K=Qjbn&mW~gOhsAXlSWn-veWT;_d$Yx_GN?@pAVyIyTakH2};?0Z<;vlWe z3|TA;H7sBuCWbU-hTs||kfY3N!Rkv`L0Xy_8O&>$846_>!Xd__Fo2b6GW!)VGB7Z_ z1QD8Sx0v#aZ?Tjo=49Vu&CE+lt+>TfkXVv&izO>RGw&92PHLVedlW};Nn%k+N@h_M zM_OiHN?K-4Y7|FGW>Io}X<kVbCx{Pb-eM`r&o8;fl9E|ee2Wz%T)dK@h=+lJ;a9eP zMt*LpetK$NaY<r{zDHtSUZQ?LQGQlxa!Ikimt$~nd_YmEZb4Cga%ypLW?s5}R$_5+ zd_hqvexcmNg820E#A1CIL$9Fn78}SvX~m#uC>8)k2qPOK8>2iU52Gd{7o!MM5g!8s ugC=tkGXn#|E#~6NVz2<%p-2QTNS4DUH$SB`C)ExVT*WL53=BMsJgfk_4~Q%P literal 0 HcmV?d00001 diff --git a/jass_preprocessing/jass_preprocessing/map_gwas/map_gwas.py b/jass_preprocessing/jass_preprocessing/map_gwas/map_gwas.py new file mode 100644 index 0000000..aef00bb --- /dev/null +++ b/jass_preprocessing/jass_preprocessing/map_gwas/map_gwas.py @@ -0,0 +1,31 @@ +import os +import sys + + + + +def walkfs(startdir, findfile): + dircount = 0 + filecount = 0 + for root, dirs, files in os.walk(startdir): + if findfile in files: + return dircount, filecount + files.index(findfile), os.path.join(root, findfile) + dircount += 1 + filecount += len(files) + # nothing found, return None instead of a full path for the file + return dircount, filecount, None + + + +def gwas_internal_link(GWAS_table, GWAS_path): + """ + Walk the GWAS path to find the GWAS tables + """ + Glink = [] + + for GWAS in range(0, len(GWAS_table)): + GWAS_filename = GWAS_table[GWAS] + Glink.append({'filename': GWAS_filename, + 'internalDataLink': walkfs(GWAS_path, GWAS_filename)[2]}) + Glink = pd.DataFrame(Glink, columns=('filename', 'internalDataLink')) + return Glink diff --git a/jass_preprocessing/setup.py b/jass_preprocessing/setup.py new file mode 100644 index 0000000..1aaf7ec --- /dev/null +++ b/jass_preprocessing/setup.py @@ -0,0 +1,11 @@ +from setuptools import setup + +setup(name='jass_preprocessing', + version='0.1', + description='Preprocess GWAS summary statistic for JASS', + url='http:https://gitlab.pasteur.fr/statistical-genetics/JASS_Pre-processing', + author='Hugues Aschard, Vincent Laville, Hanna Julienne', + author_email='hugues.aschard@pasteur.fr', + license='MIT', + packages=['jass_preprocessing'], + zip_safe=False) diff --git a/pyPCMA_1_format_v1.4.py b/pyPCMA_1_format_v1.4.py index 47307a5..4f9d07c 100755 --- a/pyPCMA_1_format_v1.4.py +++ b/pyPCMA_1_format_v1.4.py @@ -2,7 +2,7 @@ Read raw GWAS summary statistics, filter and format Write clean GWAS """ -__updated__ = '2017-08-29' +__updated__ = '2018-19-02' import h5py import numpy as np @@ -14,9 +14,6 @@ import pandas as pd import matplotlib.pyplot as plt -print('Number of arguments:', len(sys.argv), 'arguments.') -print('Argument List:', str(sys.argv)) - perSS = 0.7 netPath = "/mnt/atlas/" # '/home/genstat/ATLAS/' #netPath = '/pasteur/projets/policy01/' -- GitLab