Skip to content
Snippets Groups Projects
Commit b8b082b3 authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

add zipped GWAS support

parent 845dfd26
No related branches found
No related tags found
No related merge requests found
Pipeline #17788 passed
......@@ -10,6 +10,9 @@ import os
import sys
import pandas as pd
import numpy as np
import gzip
import re
def walkfs(startdir, findfile):
"""
......@@ -96,15 +99,23 @@ def map_columns_position(gwas_internal_link, GWAS_labels):
reference_label = column_dict.columns.tolist()
# labels in the GWAS files
target_lab = pd.Index(my_labels.values.tolist())
f = open(gwas_internal_link)
is_gzipped = re.search(r".gz$", gwas_internal_link)
if is_gzipped:
f = gzip.open(gwas_internal_link)
line = f.readline()
line = line.decode('utf-8')
else:
f = open(gwas_internal_link)
line = f.readline()
count_line = 0
line = f.readline()
header = pd.Index(line.split())
def get_position(I,x):
try:
return I.get_loc(x)
except KeyError:
return np.nan
label_position = [get_position(header,i) for i in target_lab]
mapgw = pd.Series(label_position, index=reference_label)
......@@ -128,9 +139,18 @@ def read_gwas( gwas_internal_link, column_map):
"""
print("Reading file:")
print(gwas_internal_link)
is_gzipped = re.search(r".gz$", gwas_internal_link)
if is_gzipped:
compression = 'gzip'
else:
compression = None
print(column_map.values)
print(column_map.index)
fullGWAS = pd.read_csv(gwas_internal_link, delim_whitespace=True,
usecols = column_map.values, #column_dict['label_position'].keys(),
usecols = column_map.values,
compression=compression,
#column_dict['label_position'].keys(),
names= column_map.index,
index_col=0,
header=0, na_values= ['', '#N/A', '#N/A', 'N/A','#NA', '-1.#IND', '-1.#QNAN',
......
......@@ -15,11 +15,11 @@ def save_output_by_chromosome(mgwas, ImpG_output_Folder, my_study):
mgwas_chr = pd.DataFrame({
'rsID': mgwas_copy.loc[chrom].snp_id,
'pos': mgwas_copy.loc[chrom].pos,
'A1': mgwas_copy.loc[chrom].ref,
'A2':mgwas_copy.loc[chrom].alt,
'A0': mgwas_copy.loc[chrom].ref,
'A1':mgwas_copy.loc[chrom].alt,
'Z': mgwas_copy.loc[chrom].computed_z,
'P': mgwas_copy.loc[chrom].pval
}, columns= ['rsID', 'pos', 'A1', "A2", "Z", "P" ])
}, columns= ['rsID', 'pos', 'A0', "A1", "Z", "P" ])
impg_output_file = ImpG_output_Folder + 'z_'+ my_study +'_chr'+str(chrom)+".txt"
print("WRITING CHR {} results for {} to: {}".format(chrom, my_study, ImpG_output_Folder))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment