diff --git a/jass_preprocessing/jass_preprocessing/__init__.py b/jass_preprocessing/jass_preprocessing/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8669fc771e2bee31c822c9d69beb0f3e14bba93a --- /dev/null +++ b/jass_preprocessing/jass_preprocessing/__init__.py @@ -0,0 +1,2 @@ +import jass_preprocessing.map_gwas.map_gwas +import jass_preprocessing.dna_utils.dna_utils diff --git a/jass_preprocessing/jass_preprocessing/__pycache__/__init__.cpython-35.pyc b/jass_preprocessing/jass_preprocessing/__pycache__/__init__.cpython-35.pyc new file mode 100644 index 0000000000000000000000000000000000000000..90cd37a5ba622284ffaaf5c15051c02e856fe0dd Binary files /dev/null and b/jass_preprocessing/jass_preprocessing/__pycache__/__init__.cpython-35.pyc differ diff --git a/jass_preprocessing/jass_preprocessing/dna_utils/dna_utils.py b/jass_preprocessing/jass_preprocessing/dna_utils/dna_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..a0524300aefc5713f7a72a75ede66a50c19c1ff5 --- /dev/null +++ b/jass_preprocessing/jass_preprocessing/dna_utils/dna_utils.py @@ -0,0 +1,15 @@ + +def dna_complement_base(inputbase): + if (inputbase == 'A'): + return('T') + if (inputbase == 'T'): + return('A') + if (inputbase == 'G'): + return('C') + if (inputbase == 'C'): + return('G') + return('Not ATGC') + + +def dna_complement(input): + return([dna_complement_base(x) for x in input]) diff --git a/jass_preprocessing/jass_preprocessing/map_gwas/__pycache__/map_gwas.cpython-35.pyc b/jass_preprocessing/jass_preprocessing/map_gwas/__pycache__/map_gwas.cpython-35.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a080559f1ba2ed9e2ae37219fc43f4397bf6141d Binary files /dev/null and b/jass_preprocessing/jass_preprocessing/map_gwas/__pycache__/map_gwas.cpython-35.pyc differ diff --git a/jass_preprocessing/jass_preprocessing/map_gwas/map_gwas.py b/jass_preprocessing/jass_preprocessing/map_gwas/map_gwas.py new file mode 100644 index 0000000000000000000000000000000000000000..aef00bb1ea9bec4f6e56f0bf280751e31f49c6d1 --- /dev/null +++ b/jass_preprocessing/jass_preprocessing/map_gwas/map_gwas.py @@ -0,0 +1,31 @@ +import os +import sys + + + + +def walkfs(startdir, findfile): + dircount = 0 + filecount = 0 + for root, dirs, files in os.walk(startdir): + if findfile in files: + return dircount, filecount + files.index(findfile), os.path.join(root, findfile) + dircount += 1 + filecount += len(files) + # nothing found, return None instead of a full path for the file + return dircount, filecount, None + + + +def gwas_internal_link(GWAS_table, GWAS_path): + """ + Walk the GWAS path to find the GWAS tables + """ + Glink = [] + + for GWAS in range(0, len(GWAS_table)): + GWAS_filename = GWAS_table[GWAS] + Glink.append({'filename': GWAS_filename, + 'internalDataLink': walkfs(GWAS_path, GWAS_filename)[2]}) + Glink = pd.DataFrame(Glink, columns=('filename', 'internalDataLink')) + return Glink diff --git a/jass_preprocessing/setup.py b/jass_preprocessing/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..1aaf7ec39b0e2e8a6157d63479cd51b79d78186d --- /dev/null +++ b/jass_preprocessing/setup.py @@ -0,0 +1,11 @@ +from setuptools import setup + +setup(name='jass_preprocessing', + version='0.1', + description='Preprocess GWAS summary statistic for JASS', + url='http:https://gitlab.pasteur.fr/statistical-genetics/JASS_Pre-processing', + author='Hugues Aschard, Vincent Laville, Hanna Julienne', + author_email='hugues.aschard@pasteur.fr', + license='MIT', + packages=['jass_preprocessing'], + zip_safe=False) diff --git a/pyPCMA_1_format_v1.4.py b/pyPCMA_1_format_v1.4.py index 47307a55869f9b1a451f378b2882965ef6a4455f..4f9d07c2c5674037b399598d20ecaf0f1e303bfe 100755 --- a/pyPCMA_1_format_v1.4.py +++ b/pyPCMA_1_format_v1.4.py @@ -2,7 +2,7 @@ Read raw GWAS summary statistics, filter and format Write clean GWAS """ -__updated__ = '2017-08-29' +__updated__ = '2018-19-02' import h5py import numpy as np @@ -14,9 +14,6 @@ import pandas as pd import matplotlib.pyplot as plt -print('Number of arguments:', len(sys.argv), 'arguments.') -print('Argument List:', str(sys.argv)) - perSS = 0.7 netPath = "/mnt/atlas/" # '/home/genstat/ATLAS/' #netPath = '/pasteur/projets/policy01/'