diff --git a/modules/GW_reference_panels.nf b/modules/GW_reference_panels.nf new file mode 100644 index 0000000000000000000000000000000000000000..6e02d7c7628d0f3eb4ef21bbed68c691d3d5aeb2 --- /dev/null +++ b/modules/GW_reference_panels.nf @@ -0,0 +1,82 @@ + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + JASS suite pipeline +authors : Hanna Julienne, Hervé Ménager & Lucie Troubat +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +/* Parameter to set if optional pipeline steps are performed */ +params.compute_project=false + + +/* path of input data */ +params.ref_panel = '/pasteur/zeus/projets/p02/GGS_WKD/DATA_1000G/Panels/EAS/' + +params.region = '/pasteur/zeus/projets/p02/GGS_WKD/DATA_1000G/Panels/Regions_LD/EAS/All_Regions_ALL_ensemble_1000G_hg38_EAS.bed' +params.output_folder = "${launchDir}" + +params.ancestry="EAS" +params.prefix="ALL_ensemble_1000G_hg38_EAS_chr" +params.prefix_Impute_GWAS="ALL_ensemble_1000G_hg38_EAS_" +params.suffix="" + + +chr_channel = Channel.from(1..22) + +ref_chr_channel=Channel.fromPath(params.ref_panel+"/ALL_ensemble_1000G_hg38_EAS_chr*.bim") +ref_chr_channel2=Channel.fromPath(params.ref_panel+"/ALL_ensemble_1000G_hg38_EAS_chr*.bim") +ref_chr_channel3=Channel.fromPath(params.ref_panel+"/ALL_ensemble_1000G_hg38_EAS_chr*.*") + + +process Compute_MAF{ + + input: + file ref_panel from ref_chr_channel3.collect() + val chr from chr_channel + output: + file "*.frq" into MAF_channel + """ + echo "Compute_MAF" + + bfile="${params.prefix}${chr}${params.suffix}" + echo \$bfile + + plink --bfile \${bfile} --freq --out ./chr${chr} + + """ +} + +process create_WG_reference_panel{ + publishDir "${launchDir}/Ref_Panel", pattern: "*.csv", mode: 'copy' + + input: + file maf_files from MAF_channel.collect() + file chr_files from ref_chr_channel.collect() + output: + file "1000G_${params.ancestry}_0_01.csv" into ref_panel_wg_channel + """ + #!/usr/bin/env python3 + import subprocess as sub + import pandas as pd + import os + cwd = os.getcwd() + print(cwd) + pref="${params.prefix}" + suf="${params.suffix}" + refchr_list = [] + for chrom in range(1,23): + fi = "{0}{1}{2}.bim".format(pref,chrom,suf) + print(fi) + print(type(fi)) + position = pd.read_csv(fi, sep='\t', names=['chr', "rsid", "?", "pos", "ref_al", "alt_al"]) + position.set_index("rsid", inplace=True) + ref_chr = pd.read_csv("./chr{0}.frq".format(chrom), sep="\\s+") + + ref_chr['pos'] = position.loc[ref_chr.SNP, "pos"].values + refchr_list.append(ref_chr[["CHR", "pos", "SNP", "A1", "A2", "MAF"]]) + + ref= pd.concat(refchr_list) + ref.loc[~(ref.A1+ref.A2).isin(["AT", 'TA','CG','GC'])][["CHR","SNP", "MAF", "pos", "A1", "A2"]].to_csv("1000G_${params.ancestry}_0_01.csv", index=False, header=False, sep="\t") + """ +}