add script to compile chromosome panel into a GW panel

d1a8d71b · Hanna JULIENNE · 24a5de55 · d1a8d71b
Commit d1a8d71b authored Apr 25, 2023 by Hanna JULIENNE
--- a/modules/GW_reference_panels.nf
+++ b/modules/GW_reference_panels.nf
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+                           JASS suite pipeline
+authors : Hanna Julienne, Hervé Ménager & Lucie Troubat
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+/* Parameter to set if optional pipeline steps are performed */
+params.compute_project=false
+
+
+/* path of input data */
+params.ref_panel = '/pasteur/zeus/projets/p02/GGS_WKD/DATA_1000G/Panels/EAS/'
+
+params.region = '/pasteur/zeus/projets/p02/GGS_WKD/DATA_1000G/Panels/Regions_LD/EAS/All_Regions_ALL_ensemble_1000G_hg38_EAS.bed'
+params.output_folder = "${launchDir}"
+
+params.ancestry="EAS"
+params.prefix="ALL_ensemble_1000G_hg38_EAS_chr"
+params.prefix_Impute_GWAS="ALL_ensemble_1000G_hg38_EAS_"
+params.suffix=""
+
+
+chr_channel = Channel.from(1..22)
+
+ref_chr_channel=Channel.fromPath(params.ref_panel+"/ALL_ensemble_1000G_hg38_EAS_chr*.bim")
+ref_chr_channel2=Channel.fromPath(params.ref_panel+"/ALL_ensemble_1000G_hg38_EAS_chr*.bim")
+ref_chr_channel3=Channel.fromPath(params.ref_panel+"/ALL_ensemble_1000G_hg38_EAS_chr*.*")
+
+
+process Compute_MAF{
+
+    input:
+        file ref_panel from ref_chr_channel3.collect()
+        val chr from chr_channel
+    output:
+        file "*.frq" into MAF_channel
+    """
+    echo "Compute_MAF"
+
+    bfile="${params.prefix}${chr}${params.suffix}"
+    echo \$bfile
+
+    plink --bfile \${bfile} --freq --out ./chr${chr}
+
+    """
+}
+
+process create_WG_reference_panel{
+    publishDir "${launchDir}/Ref_Panel", pattern: "*.csv", mode: 'copy'
+
+    input:
+        file maf_files from MAF_channel.collect()
+        file chr_files from ref_chr_channel.collect()
+    output:
+        file "1000G_${params.ancestry}_0_01.csv" into ref_panel_wg_channel
+    """
+    #!/usr/bin/env python3
+    import subprocess as sub
+    import pandas as pd
+    import os
+    cwd = os.getcwd()
+    print(cwd)
+    pref="${params.prefix}"
+    suf="${params.suffix}"
+    refchr_list = []
+    for chrom in range(1,23):
+        fi = "{0}{1}{2}.bim".format(pref,chrom,suf)
+        print(fi)
+        print(type(fi))
+        position =  pd.read_csv(fi, sep='\t', names=['chr', "rsid", "?", "pos", "ref_al", "alt_al"])
+        position.set_index("rsid", inplace=True)
+        ref_chr = pd.read_csv("./chr{0}.frq".format(chrom), sep="\\s+")
+
+        ref_chr['pos'] = position.loc[ref_chr.SNP, "pos"].values
+        refchr_list.append(ref_chr[["CHR", "pos", "SNP", "A1", "A2", "MAF"]])
+
+    ref= pd.concat(refchr_list)
+    ref.loc[~(ref.A1+ref.A2).isin(["AT", 'TA','CG','GC'])][["CHR","SNP", "MAF", "pos", "A1", "A2"]].to_csv("1000G_${params.ancestry}_0_01.csv",  index=False, header=False, sep="\t")
+    """
+}