Skip to content
Snippets Groups Projects
Commit 9b26bed9 authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

explicity argument naming in pd.dataframe.all , .any

parent 7367427b
No related branches found
No related tags found
No related merge requests found
...@@ -46,6 +46,10 @@ def launch_preprocessing(args): ...@@ -46,6 +46,10 @@ def launch_preprocessing(args):
print("#SNPs in GWAS summary statistic file: {}".format(gw_df.shape[0])) print("#SNPs in GWAS summary statistic file: {}".format(gw_df.shape[0]))
ref = jp.map_reference.read_reference(args.ref_path, np.bool_(args.mask_MHC), np.double(args.minimum_MAF), region_to_mask=eval(args.additional_masked_region)) ref = jp.map_reference.read_reference(args.ref_path, np.bool_(args.mask_MHC), np.double(args.minimum_MAF), region_to_mask=eval(args.additional_masked_region))
print("Unique chromosome in reference")
print(ref.chr.unique())
mgwas = jp.map_reference.map_on_ref_panel(gw_df, ref, gwas_map.loc[tag, "index_type"]) mgwas = jp.map_reference.map_on_ref_panel(gw_df, ref, gwas_map.loc[tag, "index_type"])
print("#SNPs mapped to reference panel: {}".format(mgwas.shape[0])) print("#SNPs mapped to reference panel: {}".format(mgwas.shape[0]))
......
...@@ -32,7 +32,7 @@ def compute_sample_size(mgwas, diagnostic_folder, trait, perSS = 0.7): ...@@ -32,7 +32,7 @@ def compute_sample_size(mgwas, diagnostic_folder, trait, perSS = 0.7):
if 'n' in mgwas.columns: if 'n' in mgwas.columns:
myN = mgwas.n myN = mgwas.n
#--- freq, case-cont N exist #--- freq, case-cont N exist
elif(('ncas' in mgwas.columns) & ('ncont' in mgwas.columns)): elif(('Ncase' in mgwas.columns) & ('Ncontrol' in mgwas.columns)):
sumN = mgwas.ncas + mgwas.ncont sumN = mgwas.ncas + mgwas.ncont
perCase = mgwas.ncas / sumN perCase = mgwas.ncas / sumN
myN = sumN * perCase * (1-perCase) myN = sumN * perCase * (1-perCase)
......
...@@ -146,12 +146,13 @@ def read_gwas( gwas_internal_link, column_map, imputation_treshold=None): ...@@ -146,12 +146,13 @@ def read_gwas( gwas_internal_link, column_map, imputation_treshold=None):
fullGWAS = pd.read_csv(gwas_internal_link, delim_whitespace=True, fullGWAS = pd.read_csv(gwas_internal_link, delim_whitespace=True,
usecols = column_map.values, usecols = column_map.values,
compression=compression, compression=compression,
#column_dict['label_position'].keys(), #column_dict['label_position'].keys(),
names= column_map.index, names= column_map.index,
header=0, na_values= ['', '#N/A', '#N/A', 'N/A','#NA', '-1.#IND', '-1.#QNAN', header=0, na_values= ['', '#N/A', '#N/A', 'N/A','#NA', '-1.#IND', '-1.#QNAN',
'-NaN', '-nan', '1.#IND', '1.#QNAN', 'N/A', '-NaN', '-nan', '1.#IND', '1.#QNAN', 'N/A',
'NA', 'NULL', 'NaN', 'NA', 'NULL', 'NaN',
'nan', 'na', '.', '-'], dtype={"snpid":str, "a1":str,"a2":str,"freq":float, "z":float,"se":float, "pval":float}) 'nan', 'na', '.', '-'],
dtype={"snpid":str, "a1":str,"a2":str,"freq":np.double, "z":np.double,"se":np.double, "pval":np.double})
print(fullGWAS.head()) print(fullGWAS.head())
#Ensure that allele are written in upper cases: #Ensure that allele are written in upper cases:
......
...@@ -30,7 +30,8 @@ def read_reference(gwas_reference_panel, mask_MHC=False, minimum_MAF=None, regio ...@@ -30,7 +30,8 @@ def read_reference(gwas_reference_panel, mask_MHC=False, minimum_MAF=None, regio
return "".join(sorted(x)) return "".join(sorted(x))
#Filter Strand ambiguous if biallelic #Filter Strand ambiguous if biallelic
ref = ref.loc[~(ref.ref+ref.alt).isin(["AT", "TA", 'CG','GC'])] ref = ref.loc[~(ref.ref+ref.alt).isin(["AT", "TA", 'CG','GC'])]
print("REFERENCE")
print(ref.head())
ref["positional_index"] = ref.chr.apply(str)+ref.pos.apply(str)+(ref.ref+ref.alt).apply(sorted_alleles) ref["positional_index"] = ref.chr.apply(str)+ref.pos.apply(str)+(ref.ref+ref.alt).apply(sorted_alleles)
if mask_MHC: if mask_MHC:
...@@ -116,9 +117,9 @@ def compute_is_flipped(mgwas): ...@@ -116,9 +117,9 @@ def compute_is_flipped(mgwas):
flipped = pd.DataFrame({"ref_flipped" : (mgwas.ref == mgwas.a2), "alt_flipped" : (mgwas.alt == mgwas.a1)}) flipped = pd.DataFrame({"ref_flipped" : (mgwas.ref == mgwas.a2), "alt_flipped" : (mgwas.alt == mgwas.a1)})
flipped_complement = pd.DataFrame({"ref_flippedc" : (mgwas.ref == mgwas.a2c), "alt_flippedc" : (mgwas.alt == mgwas.a1c)}) flipped_complement = pd.DataFrame({"ref_flippedc" : (mgwas.ref == mgwas.a2c), "alt_flippedc" : (mgwas.alt == mgwas.a1c)})
is_flipped = pd.DataFrame({"flipped":flipped.all(1), # The allele of the is_flipped = pd.DataFrame({"flipped":flipped.all(axis=1), # The allele of the
"flipped_complement":flipped_complement.all(1)} "flipped_complement":flipped_complement.all(axis=1)}
).any(1) ).any(axis=1)
return is_flipped return is_flipped
def compute_is_aligned(mgwas): def compute_is_aligned(mgwas):
...@@ -132,9 +133,8 @@ def compute_is_aligned(mgwas): ...@@ -132,9 +133,8 @@ def compute_is_aligned(mgwas):
aligned = pd.DataFrame({"ref_ok" : (mgwas.ref == mgwas.a1), "alt_ok" : (mgwas.alt == mgwas.a2)}) aligned = pd.DataFrame({"ref_ok" : (mgwas.ref == mgwas.a1), "alt_ok" : (mgwas.alt == mgwas.a2)})
aligned_complement = pd.DataFrame({"ref_ok" : (mgwas.ref == mgwas.a1c), "alt_ok" : (mgwas.alt == mgwas.a2c)}) aligned_complement = pd.DataFrame({"ref_ok" : (mgwas.ref == mgwas.a1c), "alt_ok" : (mgwas.alt == mgwas.a2c)})
is_aligned = pd.DataFrame({"aligned":aligned.all(1), # The allele of the is_aligned = pd.DataFrame({"aligned":aligned.all(axis=1), # The allele of the
"aligned_complement":aligned_complement.all(1)} "aligned_complement":aligned_complement.all(axis=1)}).any(axis=1)
).any(1)
return is_aligned return is_aligned
def compute_snp_alignement(mgwas): def compute_snp_alignement(mgwas):
...@@ -153,7 +153,7 @@ def compute_snp_alignement(mgwas): ...@@ -153,7 +153,7 @@ def compute_snp_alignement(mgwas):
mgwas['a1c'] = dna_u.dna_complement(mgwas.a1) mgwas['a1c'] = dna_u.dna_complement(mgwas.a1)
mgwas['a2c'] = dna_u.dna_complement(mgwas.a2) mgwas['a2c'] = dna_u.dna_complement(mgwas.a2)
print(mgwas)
is_aligned = compute_is_aligned(mgwas) is_aligned = compute_is_aligned(mgwas)
is_flipped = compute_is_flipped(mgwas) is_flipped = compute_is_flipped(mgwas)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment