diff --git a/jass_preprocessing/__main__.py b/jass_preprocessing/__main__.py index 89d6256d8bc6508117a380318835def1183c1412..870babb0d007a64c39e007cf946eb0d5ca1f92ec 100644 --- a/jass_preprocessing/__main__.py +++ b/jass_preprocessing/__main__.py @@ -46,6 +46,10 @@ def launch_preprocessing(args): print("#SNPs in GWAS summary statistic file: {}".format(gw_df.shape[0])) ref = jp.map_reference.read_reference(args.ref_path, np.bool_(args.mask_MHC), np.double(args.minimum_MAF), region_to_mask=eval(args.additional_masked_region)) + + print("Unique chromosome in reference") + print(ref.chr.unique()) + mgwas = jp.map_reference.map_on_ref_panel(gw_df, ref, gwas_map.loc[tag, "index_type"]) print("#SNPs mapped to reference panel: {}".format(mgwas.shape[0])) diff --git a/jass_preprocessing/compute_score.py b/jass_preprocessing/compute_score.py index 349be38438fa767377d21a0be42a100a9dc66227..9e4ccddc610886ba054214487ac64a0e4b834705 100644 --- a/jass_preprocessing/compute_score.py +++ b/jass_preprocessing/compute_score.py @@ -32,7 +32,7 @@ def compute_sample_size(mgwas, diagnostic_folder, trait, perSS = 0.7): if 'n' in mgwas.columns: myN = mgwas.n #--- freq, case-cont N exist - elif(('ncas' in mgwas.columns) & ('ncont' in mgwas.columns)): + elif(('Ncase' in mgwas.columns) & ('Ncontrol' in mgwas.columns)): sumN = mgwas.ncas + mgwas.ncont perCase = mgwas.ncas / sumN myN = sumN * perCase * (1-perCase) diff --git a/jass_preprocessing/map_gwas.py b/jass_preprocessing/map_gwas.py index 44da0084c4d4477f23b3bba9d8fa62d7548981c2..4e25925ae7241d8f50f0c3749ba64fd26b2640c4 100644 --- a/jass_preprocessing/map_gwas.py +++ b/jass_preprocessing/map_gwas.py @@ -146,12 +146,13 @@ def read_gwas( gwas_internal_link, column_map, imputation_treshold=None): fullGWAS = pd.read_csv(gwas_internal_link, delim_whitespace=True, usecols = column_map.values, compression=compression, - #column_dict['label_position'].keys(), - names= column_map.index, - header=0, na_values= ['', '#N/A', '#N/A', 'N/A','#NA', '-1.#IND', '-1.#QNAN', - '-NaN', '-nan', '1.#IND', '1.#QNAN', 'N/A', - 'NA', 'NULL', 'NaN', - 'nan', 'na', '.', '-'], dtype={"snpid":str, "a1":str,"a2":str,"freq":float, "z":float,"se":float, "pval":float}) + #column_dict['label_position'].keys(), + names= column_map.index, + header=0, na_values= ['', '#N/A', '#N/A', 'N/A','#NA', '-1.#IND', '-1.#QNAN', + '-NaN', '-nan', '1.#IND', '1.#QNAN', 'N/A', + 'NA', 'NULL', 'NaN', + 'nan', 'na', '.', '-'], + dtype={"snpid":str, "a1":str,"a2":str,"freq":np.double, "z":np.double,"se":np.double, "pval":np.double}) print(fullGWAS.head()) #Ensure that allele are written in upper cases: diff --git a/jass_preprocessing/map_reference.py b/jass_preprocessing/map_reference.py index 260e16e9d4939bc6d678f37e8e7903aa8fbeb324..1ecd27d6501bf196d9a2f37840becaf10da9a979 100644 --- a/jass_preprocessing/map_reference.py +++ b/jass_preprocessing/map_reference.py @@ -30,7 +30,8 @@ def read_reference(gwas_reference_panel, mask_MHC=False, minimum_MAF=None, regio return "".join(sorted(x)) #Filter Strand ambiguous if biallelic ref = ref.loc[~(ref.ref+ref.alt).isin(["AT", "TA", 'CG','GC'])] - + print("REFERENCE") + print(ref.head()) ref["positional_index"] = ref.chr.apply(str)+ref.pos.apply(str)+(ref.ref+ref.alt).apply(sorted_alleles) if mask_MHC: @@ -116,9 +117,9 @@ def compute_is_flipped(mgwas): flipped = pd.DataFrame({"ref_flipped" : (mgwas.ref == mgwas.a2), "alt_flipped" : (mgwas.alt == mgwas.a1)}) flipped_complement = pd.DataFrame({"ref_flippedc" : (mgwas.ref == mgwas.a2c), "alt_flippedc" : (mgwas.alt == mgwas.a1c)}) - is_flipped = pd.DataFrame({"flipped":flipped.all(1), # The allele of the - "flipped_complement":flipped_complement.all(1)} - ).any(1) + is_flipped = pd.DataFrame({"flipped":flipped.all(axis=1), # The allele of the + "flipped_complement":flipped_complement.all(axis=1)} + ).any(axis=1) return is_flipped def compute_is_aligned(mgwas): @@ -132,9 +133,8 @@ def compute_is_aligned(mgwas): aligned = pd.DataFrame({"ref_ok" : (mgwas.ref == mgwas.a1), "alt_ok" : (mgwas.alt == mgwas.a2)}) aligned_complement = pd.DataFrame({"ref_ok" : (mgwas.ref == mgwas.a1c), "alt_ok" : (mgwas.alt == mgwas.a2c)}) - is_aligned = pd.DataFrame({"aligned":aligned.all(1), # The allele of the - "aligned_complement":aligned_complement.all(1)} - ).any(1) + is_aligned = pd.DataFrame({"aligned":aligned.all(axis=1), # The allele of the + "aligned_complement":aligned_complement.all(axis=1)}).any(axis=1) return is_aligned def compute_snp_alignement(mgwas): @@ -153,7 +153,7 @@ def compute_snp_alignement(mgwas): mgwas['a1c'] = dna_u.dna_complement(mgwas.a1) mgwas['a2c'] = dna_u.dna_complement(mgwas.a2) - + print(mgwas) is_aligned = compute_is_aligned(mgwas) is_flipped = compute_is_flipped(mgwas)