Skip to content
Snippets Groups Projects
Commit 327e6e63 authored by Hervé  MENAGER's avatar Hervé MENAGER
Browse files

first corrections on the init table

parent 92b50252
No related branches found
No related tags found
No related merge requests found
...@@ -117,13 +117,13 @@ def create_inittable_file(input_data_path: str, init_covariance_path: str, regio ...@@ -117,13 +117,13 @@ def create_inittable_file(input_data_path: str, init_covariance_path: str, regio
# Read summary statistics # Read summary statistics
covariance = read_csv(init_covariance_path, sep='\t', index_col=0) covariance = read_csv(init_covariance_path, sep='\t', index_col=0)
# Read description file # Read description file
description = read_csv(description_file_path, sep='\t', index_col=0) description = read_csv(description_file_path, sep='\t')
# gwas_list = list(set([os.path.basename(x).split("_")[0] for x in glob.glob(input_data_path)])) # gwas_list = list(set([os.path.basename(x).split("_")[0] for x in glob.glob(input_data_path)]))
gwas_dict = {} gwas_dict = {}
for x in glob.glob(input_data_path)]): for x in glob.glob(input_data_path):
gwas_name = os.path.basename(x).split("_")[0] gwas_name = os.path.basename(x).split("_")[0]
if gwas_name in gwas_dict: if gwas_name in gwas_dict:
gwas_dict[gwas_name].append(x) gwas_dict[gwas_name].append(x)
...@@ -134,18 +134,20 @@ def create_inittable_file(input_data_path: str, init_covariance_path: str, regio ...@@ -134,18 +134,20 @@ def create_inittable_file(input_data_path: str, init_covariance_path: str, regio
for meta_index, meta_row in description.iterrows(): # READ GWAS for meta_index, meta_row in description.iterrows(): # READ GWAS
print(meta_row.keys())
# create an ID from the GWAS consortia + outcome columns # create an ID from the GWAS consortia + outcome columns
study_name = 'z_' + meta_row['consortia'].upper() + '_' + meta_row['outcome'].upper() study_name = 'z_' + meta_row['Consortium'].upper() + '_' + meta_row['Outcome'].upper()
print('**** PARSING %s ******' % my_study) print('**** PARSING %s ******' % study_name)
for gwas_file_chri in gwas_dict[meta_row['outcome']]: for gwas_file_chri in gwas_dict[meta_row['Outcome']]:
print(gwas_file_chri)
chrom = int(os.path.splitext(os.path.basename(gwas_file_chri[0]))[0].split("_chr")[1]) chrom = int(os.path.splitext(os.path.basename(gwas_file_chri))[0].split("_chr")[1])
print('Working on Chromosome %d' % chrom) print('Working on Chromosome %d' % chrom)
# Load summary statistics file for CHR i # Load summary statistics file for CHR i
z_gwas = read_csv(gwas_file_chri, sep = "\t", index_col= [0, 1, 4]) z_gwas = read_csv(gwas_file_chri, sep = "\t", usecols= [0, 1, 4])
print(z_gwas.columns)
z_gwas.columns = ['snp_ids', 'position', 'z_score'] z_gwas.columns = ['snp_ids', 'position', 'z_score']
# CAUTION: handle duplicates (whenever there are multiple values for a single SNP) # CAUTION: handle duplicates (whenever there are multiple values for a single SNP)
z_gwas = z_gwas[~z_gwas.index.duplicated(keep='first')] z_gwas = z_gwas[~z_gwas.index.duplicated(keep='first')]
...@@ -155,7 +157,7 @@ def create_inittable_file(input_data_path: str, init_covariance_path: str, regio ...@@ -155,7 +157,7 @@ def create_inittable_file(input_data_path: str, init_covariance_path: str, regio
# Add chromosome and study name columns # Add chromosome and study name columns
z_gwas['CHR'] = [chrom] * N z_gwas['CHR'] = [chrom] * N
merge_GWAS['study_name'] = [study_name] * N z_gwas['study_name'] = [study_name] * N
# Add region and middle position columns # Add region and middle position columns
z_gwas['Region'] = np.zeros(len(z_gwas), dtype=np.int) z_gwas['Region'] = np.zeros(len(z_gwas), dtype=np.int)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment