From 9dcad7969a327544a91be7b80b5750c9dd2b6a06 Mon Sep 17 00:00:00 2001
From: hjulienn <hanna.julienne@pasteur.fr>
Date: Wed, 11 Oct 2023 18:19:11 +0200
Subject: [PATCH] compute MAF and ref panel with W chr
---
compute_GW_reference_panel.nf | 32 ++++++++++++++++++++
modules/GW_reference_panels.nf | 53 +++++-----------------------------
2 files changed, 40 insertions(+), 45 deletions(-)
create mode 100644 compute_GW_reference_panel.nf
diff --git a/compute_GW_reference_panel.nf b/compute_GW_reference_panel.nf
new file mode 100644
index 0000000..8eb7bb1
--- /dev/null
+++ b/compute_GW_reference_panel.nf
@@ -0,0 +1,32 @@
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ JASS suite pipeline:
+This subworklow allows to compute genome wide reference panel
+authors : Hanna Julienne, Hervé Ménager & Lucie Troubat
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+nextflow.enable.dsl=2
+
+params.output_folder = "${launchDir}"
+
+params.ancestry="EUR"
+params.prefix="ALL_ensemble_1000G_hg38_EUR_chr"
+params.suffix=""
+
+chr_channel = Channel.from(1..22).mix(Channel.of("X"))
+params.ref_panel = '/pasteur/zeus/projets/p02/GGS_JASS/jass_analysis_pipeline/Ref_panel_by_chr/'
+
+ref_chr_channel=Channel.fromPath("/pasteur/zeus/projets/p02/GGS_WKD/DATA_1000G/Panels/EUR/ALL_ensemble_1000G_hg38_EUR_chr*.bim")
+ref_chr_channel3=Channel.fromPath("/pasteur/zeus/projets/p02/GGS_WKD/DATA_1000G/Panels/EUR/ALL_ensemble_1000G_hg38_EUR_chr*.*")
+
+/*****************************/
+/* process inclusion */
+/*****************************/
+include {Compute_MAF; Create_WG_reference_panel} from "./modules/GW_reference_panels"
+
+workflow{
+ /****** PREPROCESSING ******/
+ Compute_MAF(ref_chr_channel3.collect(), chr_channel)
+ Create_WG_reference_panel(Compute_MAF.out.collect(), ref_chr_channel.collect())
+}
\ No newline at end of file
diff --git a/modules/GW_reference_panels.nf b/modules/GW_reference_panels.nf
index 7bae470..5c18965 100644
--- a/modules/GW_reference_panels.nf
+++ b/modules/GW_reference_panels.nf
@@ -1,41 +1,11 @@
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- JASS suite pipeline
-authors : Hanna Julienne, Hervé Ménager & Lucie Troubat
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-/* Parameter to set if optional pipeline steps are performed */
-params.compute_project=false
-
-
-/* path of input data */
-params.ref_panel = '/pasteur/zeus/projets/p02/GGS_WKD/DATA_1000G/Panels/EAS/'
-
-params.region = '/pasteur/zeus/projets/p02/GGS_WKD/DATA_1000G/Panels/Regions_LD/EAS/All_Regions_ALL_ensemble_1000G_hg38_EAS.bed'
-params.output_folder = "${launchDir}"
-
-params.ancestry="EAS"
-params.prefix="ALL_ensemble_1000G_hg38_EAS_chr"
-params.prefix_Impute_GWAS="ALL_ensemble_1000G_hg38_EAS_"
-params.suffix=""
-
-
-chr_channel = Channel.from(1..22)
-
-ref_chr_channel=Channel.fromPath(params.ref_panel+"/ALL_ensemble_1000G_hg38_EAS_chr*.bim")
-ref_chr_channel2=Channel.fromPath(params.ref_panel+"/ALL_ensemble_1000G_hg38_EAS_chr*.bim")
-ref_chr_channel3=Channel.fromPath(params.ref_panel+"/ALL_ensemble_1000G_hg38_EAS_chr*.*")
-
-
process Compute_MAF{
input:
- file ref_panel
+ path ref_panel
val chr
output:
- file "*.frq"
+ path "*.frq", emit: freq
"""
echo "Compute_MAF"
@@ -48,13 +18,13 @@ process Compute_MAF{
}
process Create_WG_reference_panel{
- publishDir "${launchDir}/Ref_Panel", pattern: "*.csv", mode: 'copy'
+ publishDir "${params.output_folder}/Ref_Panel", pattern: "*.csv", mode: 'copy'
input:
- file maf_files
- file chr_files
+ path maf_files
+ path chr_files
output:
- file "1000G_${params.ancestry}_0_01.csv"
+ path "1000G_${params.ancestry}_0_01.csv", emit: ref_panel
"""
#!/usr/bin/env python3
import subprocess as sub
@@ -65,7 +35,8 @@ process Create_WG_reference_panel{
pref="${params.prefix}"
suf="${params.suffix}"
refchr_list = []
- for chrom in range(1,23):
+ for chrom in [*range(1,23), "X"]:
+ print(chrom)
fi = "{0}{1}{2}.bim".format(pref,chrom,suf)
print(fi)
print(type(fi))
@@ -80,11 +51,3 @@ process Create_WG_reference_panel{
ref.loc[~(ref.A1+ref.A2).isin(["AT", 'TA','CG','GC'])][["CHR","SNP", "MAF", "pos", "A1", "A2"]].to_csv("1000G_${params.ancestry}_0_01.csv", index=False, header=False, sep="\t")
"""
}
-
-
-workflow {
-
- Compute_MAF(ref_chr_channel3.collect(), chr_channel)
- create_WG_reference_panel(Compute_MAF.out.collect(), ref_chr_channel.collect())
-
-}
\ No newline at end of file
--
GitLab