diff --git a/compute_GW_reference_panel.nf b/compute_GW_reference_panel.nf new file mode 100644 index 0000000000000000000000000000000000000000..8eb7bb119a97e15a86507457b2576cf136c794cf --- /dev/null +++ b/compute_GW_reference_panel.nf @@ -0,0 +1,32 @@ + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + JASS suite pipeline: +This subworklow allows to compute genome wide reference panel +authors : Hanna Julienne, Hervé Ménager & Lucie Troubat +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ +nextflow.enable.dsl=2 + +params.output_folder = "${launchDir}" + +params.ancestry="EUR" +params.prefix="ALL_ensemble_1000G_hg38_EUR_chr" +params.suffix="" + +chr_channel = Channel.from(1..22).mix(Channel.of("X")) +params.ref_panel = '/pasteur/zeus/projets/p02/GGS_JASS/jass_analysis_pipeline/Ref_panel_by_chr/' + +ref_chr_channel=Channel.fromPath("/pasteur/zeus/projets/p02/GGS_WKD/DATA_1000G/Panels/EUR/ALL_ensemble_1000G_hg38_EUR_chr*.bim") +ref_chr_channel3=Channel.fromPath("/pasteur/zeus/projets/p02/GGS_WKD/DATA_1000G/Panels/EUR/ALL_ensemble_1000G_hg38_EUR_chr*.*") + +/*****************************/ +/* process inclusion */ +/*****************************/ +include {Compute_MAF; Create_WG_reference_panel} from "./modules/GW_reference_panels" + +workflow{ + /****** PREPROCESSING ******/ + Compute_MAF(ref_chr_channel3.collect(), chr_channel) + Create_WG_reference_panel(Compute_MAF.out.collect(), ref_chr_channel.collect()) +} \ No newline at end of file diff --git a/modules/GW_reference_panels.nf b/modules/GW_reference_panels.nf index 7bae47066476128a6f1595a9d4c9f0b1e68d3796..5c18965aa2f49cb3117e5ae70503b7b244f226f1 100644 --- a/modules/GW_reference_panels.nf +++ b/modules/GW_reference_panels.nf @@ -1,41 +1,11 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - JASS suite pipeline -authors : Hanna Julienne, Hervé Ménager & Lucie Troubat -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -/* Parameter to set if optional pipeline steps are performed */ -params.compute_project=false - - -/* path of input data */ -params.ref_panel = '/pasteur/zeus/projets/p02/GGS_WKD/DATA_1000G/Panels/EAS/' - -params.region = '/pasteur/zeus/projets/p02/GGS_WKD/DATA_1000G/Panels/Regions_LD/EAS/All_Regions_ALL_ensemble_1000G_hg38_EAS.bed' -params.output_folder = "${launchDir}" - -params.ancestry="EAS" -params.prefix="ALL_ensemble_1000G_hg38_EAS_chr" -params.prefix_Impute_GWAS="ALL_ensemble_1000G_hg38_EAS_" -params.suffix="" - - -chr_channel = Channel.from(1..22) - -ref_chr_channel=Channel.fromPath(params.ref_panel+"/ALL_ensemble_1000G_hg38_EAS_chr*.bim") -ref_chr_channel2=Channel.fromPath(params.ref_panel+"/ALL_ensemble_1000G_hg38_EAS_chr*.bim") -ref_chr_channel3=Channel.fromPath(params.ref_panel+"/ALL_ensemble_1000G_hg38_EAS_chr*.*") - - process Compute_MAF{ input: - file ref_panel + path ref_panel val chr output: - file "*.frq" + path "*.frq", emit: freq """ echo "Compute_MAF" @@ -48,13 +18,13 @@ process Compute_MAF{ } process Create_WG_reference_panel{ - publishDir "${launchDir}/Ref_Panel", pattern: "*.csv", mode: 'copy' + publishDir "${params.output_folder}/Ref_Panel", pattern: "*.csv", mode: 'copy' input: - file maf_files - file chr_files + path maf_files + path chr_files output: - file "1000G_${params.ancestry}_0_01.csv" + path "1000G_${params.ancestry}_0_01.csv", emit: ref_panel """ #!/usr/bin/env python3 import subprocess as sub @@ -65,7 +35,8 @@ process Create_WG_reference_panel{ pref="${params.prefix}" suf="${params.suffix}" refchr_list = [] - for chrom in range(1,23): + for chrom in [*range(1,23), "X"]: + print(chrom) fi = "{0}{1}{2}.bim".format(pref,chrom,suf) print(fi) print(type(fi)) @@ -80,11 +51,3 @@ process Create_WG_reference_panel{ ref.loc[~(ref.A1+ref.A2).isin(["AT", 'TA','CG','GC'])][["CHR","SNP", "MAF", "pos", "A1", "A2"]].to_csv("1000G_${params.ancestry}_0_01.csv", index=False, header=False, sep="\t") """ } - - -workflow { - - Compute_MAF(ref_chr_channel3.collect(), chr_channel) - create_WG_reference_panel(Compute_MAF.out.collect(), ref_chr_channel.collect()) - -} \ No newline at end of file