Commit 4018e364 authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

updated requirements

parent 28fe2b23
Pipeline #15333 failed with stage
in 38 seconds
......@@ -14,7 +14,7 @@ pages:
- yum install -y make
- pip3 install sphinx
- pip3 install sphinxcontrib-bibtex sphinx_rtd_theme sphinx-argparse
- pip3 install -r jass_preprocessing/requirements.txt
- pip3 install -r requirements.txt
- cd doc
- sphinx-apidoc -f -o ./source/_autosummary/ ../jass_preprocessing/
- make html
......
......@@ -22,24 +22,20 @@ import argparse
#| ImpG_output_Folder | main ouput folder | netPath+ 'PCMA/1._DATA/preprocessing_test/' |
def launch_preprocessing(args):
"""
Preprocessing GWAS dataset
"""
gwas_map = pd.read_csv(args.gwas_info, sep="\t", index_col=0)
print(gwas_map.head())
gwas_map = pd.read_csv(args.gwas_info, sep="\t")
gwas_map.set_index("filename", inplace=True)
for gwas_filename in gwas_map.index:
print(gwas_filename)
print(gwas_map.columns)
tag = "{0}_{1}".format(gwas_map.loc[gwas_filename, 'consortia'],
gwas_map.loc[gwas_filename, 'outcome'])
tag = "{0}_{1}".format(gwas_map.loc[gwas_filename, 'Consortium'],
gwas_map.loc[gwas_filename, 'Outcome'])
print('processing GWAS: {}'.format(tag))
start = time.time()
GWAS_link = jp.map_gwas.walkfs(args.input_folder, gwas_filename)[2]
mapgw = jp.map_gwas.map_columns_position(GWAS_link, args.gwas_info)
gw_df = jp.map_gwas.read_gwas(GWAS_link, mapgw)
......
......@@ -84,23 +84,22 @@ def map_columns_position(gwas_internal_link, GWAS_labels):
Return:
pandas Series with column position and column names as index
"""
column_dict = pd.read_csv(GWAS_labels, sep='\t', na_values='na', index_col=0)
gwas_file = gwas_internal_link.split('/')[-1]
column_dict = pd.read_csv(GWAS_labels, sep='\t', na_values='na')
column_dict.set_index("filename", inplace=True)
gwas_file = gwas_internal_link.split('/')[-1]
my_labels = column_dict.loc[gwas_file]
#Our standart labels:
reference_label = column_dict.columns.tolist()
# labels in the GWAS files
target_lab = pd.Index(my_labels.values.tolist())
f = open(gwas_internal_link)
count_line = 0
line = f.readline()
print(line)
header = pd.Index(line.split())
def get_position(I,x):
try:
return I.get_loc(x)
......@@ -108,7 +107,6 @@ def map_columns_position(gwas_internal_link, GWAS_labels):
return np.nan
label_position = [get_position(header,i) for i in target_lab]
mapgw = pd.Series(label_position, index=reference_label)
mapgw = mapgw.loc[~mapgw.isna()].astype(int)
mapgw.sort_values(inplace=True)
......@@ -137,8 +135,7 @@ def read_gwas( gwas_internal_link, column_map):
index_col=0,
header=0, na_values= ['', '#N/A', '#N/A', 'N/A',
'#NA', '-1.#IND', '-1.#QNAN',
'-NaN',
'-nan', '1.#IND', '1.#QNAN', 'N/A',
'-NaN', '-nan', '1.#IND', '1.#QNAN', 'N/A',
'NA', 'NULL', 'NaN',
'nan', 'na', '.'])
......
scipy
pandas
numpy
seaborn
matplotlib
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment