Skip to content
Snippets Groups Projects
Commit 4018e364 authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

updated requirements

parent 28fe2b23
No related branches found
No related tags found
No related merge requests found
......@@ -14,7 +14,7 @@ pages:
- yum install -y make
- pip3 install sphinx
- pip3 install sphinxcontrib-bibtex sphinx_rtd_theme sphinx-argparse
- pip3 install -r jass_preprocessing/requirements.txt
- pip3 install -r requirements.txt
- cd doc
- sphinx-apidoc -f -o ./source/_autosummary/ ../jass_preprocessing/
- make html
......
......@@ -22,24 +22,20 @@ import argparse
#| ImpG_output_Folder | main ouput folder | netPath+ 'PCMA/1._DATA/preprocessing_test/' |
def launch_preprocessing(args):
"""
Preprocessing GWAS dataset
"""
gwas_map = pd.read_csv(args.gwas_info, sep="\t", index_col=0)
print(gwas_map.head())
gwas_map = pd.read_csv(args.gwas_info, sep="\t")
gwas_map.set_index("filename", inplace=True)
for gwas_filename in gwas_map.index:
print(gwas_filename)
print(gwas_map.columns)
tag = "{0}_{1}".format(gwas_map.loc[gwas_filename, 'consortia'],
gwas_map.loc[gwas_filename, 'outcome'])
tag = "{0}_{1}".format(gwas_map.loc[gwas_filename, 'Consortium'],
gwas_map.loc[gwas_filename, 'Outcome'])
print('processing GWAS: {}'.format(tag))
start = time.time()
GWAS_link = jp.map_gwas.walkfs(args.input_folder, gwas_filename)[2]
mapgw = jp.map_gwas.map_columns_position(GWAS_link, args.gwas_info)
gw_df = jp.map_gwas.read_gwas(GWAS_link, mapgw)
......
......@@ -84,23 +84,22 @@ def map_columns_position(gwas_internal_link, GWAS_labels):
Return:
pandas Series with column position and column names as index
"""
column_dict = pd.read_csv(GWAS_labels, sep='\t', na_values='na', index_col=0)
gwas_file = gwas_internal_link.split('/')[-1]
column_dict = pd.read_csv(GWAS_labels, sep='\t', na_values='na')
column_dict.set_index("filename", inplace=True)
gwas_file = gwas_internal_link.split('/')[-1]
my_labels = column_dict.loc[gwas_file]
#Our standart labels:
reference_label = column_dict.columns.tolist()
# labels in the GWAS files
target_lab = pd.Index(my_labels.values.tolist())
f = open(gwas_internal_link)
count_line = 0
line = f.readline()
print(line)
header = pd.Index(line.split())
def get_position(I,x):
try:
return I.get_loc(x)
......@@ -108,7 +107,6 @@ def map_columns_position(gwas_internal_link, GWAS_labels):
return np.nan
label_position = [get_position(header,i) for i in target_lab]
mapgw = pd.Series(label_position, index=reference_label)
mapgw = mapgw.loc[~mapgw.isna()].astype(int)
mapgw.sort_values(inplace=True)
......@@ -137,8 +135,7 @@ def read_gwas( gwas_internal_link, column_map):
index_col=0,
header=0, na_values= ['', '#N/A', '#N/A', 'N/A',
'#NA', '-1.#IND', '-1.#QNAN',
'-NaN',
'-nan', '1.#IND', '1.#QNAN', 'N/A',
'-NaN', '-nan', '1.#IND', '1.#QNAN', 'N/A',
'NA', 'NULL', 'NaN',
'nan', 'na', '.'])
......
scipy
pandas
numpy
seaborn
matplotlib
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment