Skip to content
Snippets Groups Projects
Commit 0ca598c3 authored by hjulienne's avatar hjulienne
Browse files

change main

parent 5fc49314
No related branches found
No related tags found
No related merge requests found
Pipeline #15317 passed
...@@ -28,18 +28,21 @@ def launch_preprocessing(args): ...@@ -28,18 +28,21 @@ def launch_preprocessing(args):
Preprocessing GWAS dataset Preprocessing GWAS dataset
""" """
gwas_map = pd.read_csv(args.gwas_info, sep="\t", index_col=0) gwas_map = pd.read_csv(args.gwas_info, sep="\t", index_col=0)
print(gwas_map.head())
for gwas_filename in gwas_map[['filename']]: for gwas_filename in gwas_map.index:
print(gwas_filename)
print(gwas_map.columns)
tag = "{0}_{1}".format(gwas_map.loc[gwas_filename, 'consortia'], tag = "{0}_{1}".format(gwas_map.loc[gwas_filename, 'consortia'],
gwas_map.loc[gwas_filename, 'outcome']) gwas_map.loc[gwas_filename, 'outcome'])
print('processing GWAS: {}'.format(tag)) print('processing GWAS: {}'.format(tag))
start = time.time() start = time.time()
GWAS_link = jp.map_gwas.walkfs(args.gwas_folder, gwas_filename)[2] GWAS_link = jp.map_gwas.walkfs(args.input_folder, gwas_filename)[2]
mapgw = jp.map_gwas.map_columns_position(GWAS_link, args.gwas_info) mapgw = jp.map_gwas.map_columns_position(GWAS_link, args.gwas_info)
gw_df = jp.map_gwas.read_gwas(GWAS_link, mapgw) gw_df = jp.map_gwas.read_gwas(GWAS_link, mapgw)
ref = pd.read_csv(args.ref_path, header=None, sep= "\t", ref = pd.read_csv(args.ref_path, header=None, sep= "\t",
names =['chr', "pos", "snp_id", "ref", "alt", "MAF"], names =['chr', "pos", "snp_id", "ref", "alt", "MAF"],
index_col="snp_id") index_col="snp_id")
......
...@@ -112,7 +112,7 @@ def map_columns_position(gwas_internal_link, GWAS_labels): ...@@ -112,7 +112,7 @@ def map_columns_position(gwas_internal_link, GWAS_labels):
mapgw = pd.Series(label_position, index=reference_label) mapgw = pd.Series(label_position, index=reference_label)
mapgw = mapgw.loc[~mapgw.isna()].astype(int) mapgw = mapgw.loc[~mapgw.isna()].astype(int)
mapgw.sort_values(inplace=True) mapgw.sort_values(inplace=True)
print(mapgw)
f.close() f.close()
return mapgw return mapgw
...@@ -128,6 +128,8 @@ def read_gwas( gwas_internal_link, column_map): ...@@ -128,6 +128,8 @@ def read_gwas( gwas_internal_link, column_map):
Return: Return:
a pandas dataframe with missing value all equal to np.nan a pandas dataframe with missing value all equal to np.nan
""" """
print("Reading file:")
print(gwas_internal_link)
fullGWAS = pd.read_csv(gwas_internal_link, delim_whitespace=True, fullGWAS = pd.read_csv(gwas_internal_link, delim_whitespace=True,
usecols = column_map.values, #column_dict['label_position'].keys(), usecols = column_map.values, #column_dict['label_position'].keys(),
...@@ -140,6 +142,7 @@ def read_gwas( gwas_internal_link, column_map): ...@@ -140,6 +142,7 @@ def read_gwas( gwas_internal_link, column_map):
'NA', 'NULL', 'NaN', 'NA', 'NULL', 'NaN',
'nan', 'na', '.']) 'nan', 'na', '.'])
fullGWAS = fullGWAS[~fullGWAS.index.duplicated(keep='first')] fullGWAS = fullGWAS[~fullGWAS.index.duplicated(keep='first')]
#fullGWAS = convert_missing_values(fullGWAS) #fullGWAS = convert_missing_values(fullGWAS)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment