Skip to content
Snippets Groups Projects
Commit 6f72006d authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

corrected index system to treat correctly GWAS sharing the same summary statistics file

parent f28feeee
No related branches found
No related tags found
No related merge requests found
Pipeline #18645 passed
......@@ -32,7 +32,7 @@ def launch_preprocessing(args):
gwas_map = pd.read_csv(args.gwas_info, sep="\t")
#define an unique
gwas_map['tag'] = gwas_map.Consortium+ "_" + D.Outcome
gwas_map['tag'] = gwas_map.Consortium+ "_" + gwas_map.Outcome
if gwas_map.tag.duplicated().any():
raise_duplicated_index(gwas_map.tag)
......@@ -41,7 +41,7 @@ def launch_preprocessing(args):
for tag in gwas_map.index:
gwas_filename = D.loc[tag, "filename"]
gwas_filename = gwas_map.loc[tag, "filename"]
print('processing GWAS: {}'.format(tag))
start = time.time()
......
......@@ -76,7 +76,7 @@ def convert_missing_values(df):
return df.replace(def_missing, nan_vec)
def map_columns_position(gwas_internal_link, my_labels):
def map_columns_position(gwas_internal_link, column_dict):
"""
Find column position for each specific Gwas
......@@ -87,16 +87,14 @@ def map_columns_position(gwas_internal_link, my_labels):
Return:
pandas Series with column position and column names as index
"""
print(gwas_internal_link)
gwas_file = gwas_internal_link.split('/')[-1]
my_labels = column_dict.loc[gwas_file]
#Our standart labels:
reference_label = column_dict.columns.tolist()
reference_label = column_dict.index.tolist()
print(reference_label)
# labels in the GWAS files
target_lab = pd.Index(my_labels.values.tolist())
target_lab = pd.Index(column_dict.values.tolist())
is_gzipped = re.search(r".gz$", gwas_internal_link)
if is_gzipped:
f = gzip.open(gwas_internal_link)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment