Commit 0ca598c3 authored by hjulienne's avatar hjulienne
Browse files

change main

parent 5fc49314
Pipeline #15317 passed with stages
in 55 seconds
...@@ -28,18 +28,21 @@ def launch_preprocessing(args): ...@@ -28,18 +28,21 @@ def launch_preprocessing(args):
Preprocessing GWAS dataset Preprocessing GWAS dataset
""" """
gwas_map = pd.read_csv(args.gwas_info, sep="\t", index_col=0) gwas_map = pd.read_csv(args.gwas_info, sep="\t", index_col=0)
print(gwas_map.head())
for gwas_filename in gwas_map[['filename']]: for gwas_filename in gwas_map.index:
print(gwas_filename)
print(gwas_map.columns)
tag = "{0}_{1}".format(gwas_map.loc[gwas_filename, 'consortia'], tag = "{0}_{1}".format(gwas_map.loc[gwas_filename, 'consortia'],
gwas_map.loc[gwas_filename, 'outcome']) gwas_map.loc[gwas_filename, 'outcome'])
print('processing GWAS: {}'.format(tag)) print('processing GWAS: {}'.format(tag))
start = time.time() start = time.time()
GWAS_link = jp.map_gwas.walkfs(args.gwas_folder, gwas_filename)[2] GWAS_link = jp.map_gwas.walkfs(args.input_folder, gwas_filename)[2]
mapgw = jp.map_gwas.map_columns_position(GWAS_link, args.gwas_info) mapgw = jp.map_gwas.map_columns_position(GWAS_link, args.gwas_info)
gw_df = jp.map_gwas.read_gwas(GWAS_link, mapgw) gw_df = jp.map_gwas.read_gwas(GWAS_link, mapgw)
ref = pd.read_csv(args.ref_path, header=None, sep= "\t", ref = pd.read_csv(args.ref_path, header=None, sep= "\t",
names =['chr', "pos", "snp_id", "ref", "alt", "MAF"], names =['chr', "pos", "snp_id", "ref", "alt", "MAF"],
index_col="snp_id") index_col="snp_id")
......
...@@ -112,7 +112,7 @@ def map_columns_position(gwas_internal_link, GWAS_labels): ...@@ -112,7 +112,7 @@ def map_columns_position(gwas_internal_link, GWAS_labels):
mapgw = pd.Series(label_position, index=reference_label) mapgw = pd.Series(label_position, index=reference_label)
mapgw = mapgw.loc[~mapgw.isna()].astype(int) mapgw = mapgw.loc[~mapgw.isna()].astype(int)
mapgw.sort_values(inplace=True) mapgw.sort_values(inplace=True)
print(mapgw)
f.close() f.close()
return mapgw return mapgw
...@@ -128,6 +128,8 @@ def read_gwas( gwas_internal_link, column_map): ...@@ -128,6 +128,8 @@ def read_gwas( gwas_internal_link, column_map):
Return: Return:
a pandas dataframe with missing value all equal to np.nan a pandas dataframe with missing value all equal to np.nan
""" """
print("Reading file:")
print(gwas_internal_link)
fullGWAS = pd.read_csv(gwas_internal_link, delim_whitespace=True, fullGWAS = pd.read_csv(gwas_internal_link, delim_whitespace=True,
usecols = column_map.values, #column_dict['label_position'].keys(), usecols = column_map.values, #column_dict['label_position'].keys(),
...@@ -140,6 +142,7 @@ def read_gwas( gwas_internal_link, column_map): ...@@ -140,6 +142,7 @@ def read_gwas( gwas_internal_link, column_map):
'NA', 'NULL', 'NaN', 'NA', 'NULL', 'NaN',
'nan', 'na', '.']) 'nan', 'na', '.'])
fullGWAS = fullGWAS[~fullGWAS.index.duplicated(keep='first')] fullGWAS = fullGWAS[~fullGWAS.index.duplicated(keep='first')]
#fullGWAS = convert_missing_values(fullGWAS) #fullGWAS = convert_missing_values(fullGWAS)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment