From 0ca598c386b06bc5cecd5df26b25d16025711d6b Mon Sep 17 00:00:00 2001 From: hjulienne <hanna.julienne@gmail.com> Date: Mon, 16 Sep 2019 16:11:56 +0100 Subject: [PATCH] change main --- jass_preprocessing/__main__.py | 9 ++++++--- jass_preprocessing/map_gwas.py | 5 ++++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/jass_preprocessing/__main__.py b/jass_preprocessing/__main__.py index bee0a4f..ed42e4b 100644 --- a/jass_preprocessing/__main__.py +++ b/jass_preprocessing/__main__.py @@ -28,18 +28,21 @@ def launch_preprocessing(args): Preprocessing GWAS dataset """ gwas_map = pd.read_csv(args.gwas_info, sep="\t", index_col=0) + print(gwas_map.head()) - for gwas_filename in gwas_map[['filename']]: + for gwas_filename in gwas_map.index: + print(gwas_filename) + print(gwas_map.columns) tag = "{0}_{1}".format(gwas_map.loc[gwas_filename, 'consortia'], gwas_map.loc[gwas_filename, 'outcome']) print('processing GWAS: {}'.format(tag)) start = time.time() - GWAS_link = jp.map_gwas.walkfs(args.gwas_folder, gwas_filename)[2] + GWAS_link = jp.map_gwas.walkfs(args.input_folder, gwas_filename)[2] + mapgw = jp.map_gwas.map_columns_position(GWAS_link, args.gwas_info) gw_df = jp.map_gwas.read_gwas(GWAS_link, mapgw) - ref = pd.read_csv(args.ref_path, header=None, sep= "\t", names =['chr', "pos", "snp_id", "ref", "alt", "MAF"], index_col="snp_id") diff --git a/jass_preprocessing/map_gwas.py b/jass_preprocessing/map_gwas.py index 2b0816f..f3e8313 100644 --- a/jass_preprocessing/map_gwas.py +++ b/jass_preprocessing/map_gwas.py @@ -112,7 +112,7 @@ def map_columns_position(gwas_internal_link, GWAS_labels): mapgw = pd.Series(label_position, index=reference_label) mapgw = mapgw.loc[~mapgw.isna()].astype(int) mapgw.sort_values(inplace=True) - print(mapgw) + f.close() return mapgw @@ -128,6 +128,8 @@ def read_gwas( gwas_internal_link, column_map): Return: a pandas dataframe with missing value all equal to np.nan """ + print("Reading file:") + print(gwas_internal_link) fullGWAS = pd.read_csv(gwas_internal_link, delim_whitespace=True, usecols = column_map.values, #column_dict['label_position'].keys(), @@ -140,6 +142,7 @@ def read_gwas( gwas_internal_link, column_map): 'NA', 'NULL', 'NaN', 'nan', 'na', '.']) + fullGWAS = fullGWAS[~fullGWAS.index.duplicated(keep='first')] #fullGWAS = convert_missing_values(fullGWAS) -- GitLab