Commit 6f72006d authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

corrected index system to treat correctly GWAS sharing the same summary statistics file

parent f28feeee
Pipeline #18645 passed with stages
in 1 minute
......@@ -32,7 +32,7 @@ def launch_preprocessing(args):
gwas_map = pd.read_csv(args.gwas_info, sep="\t")
#define an unique
gwas_map['tag'] = gwas_map.Consortium+ "_" + D.Outcome
gwas_map['tag'] = gwas_map.Consortium+ "_" + gwas_map.Outcome
if gwas_map.tag.duplicated().any():
raise_duplicated_index(gwas_map.tag)
......@@ -41,7 +41,7 @@ def launch_preprocessing(args):
for tag in gwas_map.index:
gwas_filename = D.loc[tag, "filename"]
gwas_filename = gwas_map.loc[tag, "filename"]
print('processing GWAS: {}'.format(tag))
start = time.time()
......
......@@ -76,7 +76,7 @@ def convert_missing_values(df):
return df.replace(def_missing, nan_vec)
def map_columns_position(gwas_internal_link, my_labels):
def map_columns_position(gwas_internal_link, column_dict):
"""
Find column position for each specific Gwas
......@@ -87,16 +87,14 @@ def map_columns_position(gwas_internal_link, my_labels):
Return:
pandas Series with column position and column names as index
"""
print(gwas_internal_link)
gwas_file = gwas_internal_link.split('/')[-1]
my_labels = column_dict.loc[gwas_file]
#Our standart labels:
reference_label = column_dict.columns.tolist()
reference_label = column_dict.index.tolist()
print(reference_label)
# labels in the GWAS files
target_lab = pd.Index(my_labels.values.tolist())
target_lab = pd.Index(column_dict.values.tolist())
is_gzipped = re.search(r".gz$", gwas_internal_link)
if is_gzipped:
f = gzip.open(gwas_internal_link)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment