Skip to content
Snippets Groups Projects
Commit 4018e364 authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

updated requirements

parent 28fe2b23
No related branches found
No related tags found
No related merge requests found
Pipeline #15333 failed
...@@ -14,7 +14,7 @@ pages: ...@@ -14,7 +14,7 @@ pages:
- yum install -y make - yum install -y make
- pip3 install sphinx - pip3 install sphinx
- pip3 install sphinxcontrib-bibtex sphinx_rtd_theme sphinx-argparse - pip3 install sphinxcontrib-bibtex sphinx_rtd_theme sphinx-argparse
- pip3 install -r jass_preprocessing/requirements.txt - pip3 install -r requirements.txt
- cd doc - cd doc
- sphinx-apidoc -f -o ./source/_autosummary/ ../jass_preprocessing/ - sphinx-apidoc -f -o ./source/_autosummary/ ../jass_preprocessing/
- make html - make html
......
...@@ -22,24 +22,20 @@ import argparse ...@@ -22,24 +22,20 @@ import argparse
#| ImpG_output_Folder | main ouput folder | netPath+ 'PCMA/1._DATA/preprocessing_test/' | #| ImpG_output_Folder | main ouput folder | netPath+ 'PCMA/1._DATA/preprocessing_test/' |
def launch_preprocessing(args): def launch_preprocessing(args):
""" """
Preprocessing GWAS dataset Preprocessing GWAS dataset
""" """
gwas_map = pd.read_csv(args.gwas_info, sep="\t", index_col=0) gwas_map = pd.read_csv(args.gwas_info, sep="\t")
print(gwas_map.head()) gwas_map.set_index("filename", inplace=True)
for gwas_filename in gwas_map.index: for gwas_filename in gwas_map.index:
print(gwas_filename) tag = "{0}_{1}".format(gwas_map.loc[gwas_filename, 'Consortium'],
print(gwas_map.columns) gwas_map.loc[gwas_filename, 'Outcome'])
tag = "{0}_{1}".format(gwas_map.loc[gwas_filename, 'consortia'],
gwas_map.loc[gwas_filename, 'outcome'])
print('processing GWAS: {}'.format(tag)) print('processing GWAS: {}'.format(tag))
start = time.time() start = time.time()
GWAS_link = jp.map_gwas.walkfs(args.input_folder, gwas_filename)[2] GWAS_link = jp.map_gwas.walkfs(args.input_folder, gwas_filename)[2]
mapgw = jp.map_gwas.map_columns_position(GWAS_link, args.gwas_info) mapgw = jp.map_gwas.map_columns_position(GWAS_link, args.gwas_info)
gw_df = jp.map_gwas.read_gwas(GWAS_link, mapgw) gw_df = jp.map_gwas.read_gwas(GWAS_link, mapgw)
......
...@@ -84,23 +84,22 @@ def map_columns_position(gwas_internal_link, GWAS_labels): ...@@ -84,23 +84,22 @@ def map_columns_position(gwas_internal_link, GWAS_labels):
Return: Return:
pandas Series with column position and column names as index pandas Series with column position and column names as index
""" """
column_dict = pd.read_csv(GWAS_labels, sep='\t', na_values='na', index_col=0)
gwas_file = gwas_internal_link.split('/')[-1] column_dict = pd.read_csv(GWAS_labels, sep='\t', na_values='na')
column_dict.set_index("filename", inplace=True)
gwas_file = gwas_internal_link.split('/')[-1]
my_labels = column_dict.loc[gwas_file] my_labels = column_dict.loc[gwas_file]
#Our standart labels: #Our standart labels:
reference_label = column_dict.columns.tolist() reference_label = column_dict.columns.tolist()
# labels in the GWAS files # labels in the GWAS files
target_lab = pd.Index(my_labels.values.tolist()) target_lab = pd.Index(my_labels.values.tolist())
f = open(gwas_internal_link) f = open(gwas_internal_link)
count_line = 0 count_line = 0
line = f.readline() line = f.readline()
print(line)
header = pd.Index(line.split()) header = pd.Index(line.split())
def get_position(I,x): def get_position(I,x):
try: try:
return I.get_loc(x) return I.get_loc(x)
...@@ -108,7 +107,6 @@ def map_columns_position(gwas_internal_link, GWAS_labels): ...@@ -108,7 +107,6 @@ def map_columns_position(gwas_internal_link, GWAS_labels):
return np.nan return np.nan
label_position = [get_position(header,i) for i in target_lab] label_position = [get_position(header,i) for i in target_lab]
mapgw = pd.Series(label_position, index=reference_label) mapgw = pd.Series(label_position, index=reference_label)
mapgw = mapgw.loc[~mapgw.isna()].astype(int) mapgw = mapgw.loc[~mapgw.isna()].astype(int)
mapgw.sort_values(inplace=True) mapgw.sort_values(inplace=True)
...@@ -137,8 +135,7 @@ def read_gwas( gwas_internal_link, column_map): ...@@ -137,8 +135,7 @@ def read_gwas( gwas_internal_link, column_map):
index_col=0, index_col=0,
header=0, na_values= ['', '#N/A', '#N/A', 'N/A', header=0, na_values= ['', '#N/A', '#N/A', 'N/A',
'#NA', '-1.#IND', '-1.#QNAN', '#NA', '-1.#IND', '-1.#QNAN',
'-NaN', '-NaN', '-nan', '1.#IND', '1.#QNAN', 'N/A',
'-nan', '1.#IND', '1.#QNAN', 'N/A',
'NA', 'NULL', 'NaN', 'NA', 'NULL', 'NaN',
'nan', 'na', '.']) 'nan', 'na', '.'])
......
scipy
pandas
numpy
seaborn
matplotlib
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment