Skip to content
Snippets Groups Projects
Commit 9dcad796 authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

compute MAF and ref panel with W chr

parent a0a948d4
No related branches found
No related tags found
No related merge requests found
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
JASS suite pipeline:
This subworklow allows to compute genome wide reference panel
authors : Hanna Julienne, Hervé Ménager & Lucie Troubat
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
nextflow.enable.dsl=2
params.output_folder = "${launchDir}"
params.ancestry="EUR"
params.prefix="ALL_ensemble_1000G_hg38_EUR_chr"
params.suffix=""
chr_channel = Channel.from(1..22).mix(Channel.of("X"))
params.ref_panel = '/pasteur/zeus/projets/p02/GGS_JASS/jass_analysis_pipeline/Ref_panel_by_chr/'
ref_chr_channel=Channel.fromPath("/pasteur/zeus/projets/p02/GGS_WKD/DATA_1000G/Panels/EUR/ALL_ensemble_1000G_hg38_EUR_chr*.bim")
ref_chr_channel3=Channel.fromPath("/pasteur/zeus/projets/p02/GGS_WKD/DATA_1000G/Panels/EUR/ALL_ensemble_1000G_hg38_EUR_chr*.*")
/*****************************/
/* process inclusion */
/*****************************/
include {Compute_MAF; Create_WG_reference_panel} from "./modules/GW_reference_panels"
workflow{
/****** PREPROCESSING ******/
Compute_MAF(ref_chr_channel3.collect(), chr_channel)
Create_WG_reference_panel(Compute_MAF.out.collect(), ref_chr_channel.collect())
}
\ No newline at end of file
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
JASS suite pipeline
authors : Hanna Julienne, Hervé Ménager & Lucie Troubat
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
/* Parameter to set if optional pipeline steps are performed */
params.compute_project=false
/* path of input data */
params.ref_panel = '/pasteur/zeus/projets/p02/GGS_WKD/DATA_1000G/Panels/EAS/'
params.region = '/pasteur/zeus/projets/p02/GGS_WKD/DATA_1000G/Panels/Regions_LD/EAS/All_Regions_ALL_ensemble_1000G_hg38_EAS.bed'
params.output_folder = "${launchDir}"
params.ancestry="EAS"
params.prefix="ALL_ensemble_1000G_hg38_EAS_chr"
params.prefix_Impute_GWAS="ALL_ensemble_1000G_hg38_EAS_"
params.suffix=""
chr_channel = Channel.from(1..22)
ref_chr_channel=Channel.fromPath(params.ref_panel+"/ALL_ensemble_1000G_hg38_EAS_chr*.bim")
ref_chr_channel2=Channel.fromPath(params.ref_panel+"/ALL_ensemble_1000G_hg38_EAS_chr*.bim")
ref_chr_channel3=Channel.fromPath(params.ref_panel+"/ALL_ensemble_1000G_hg38_EAS_chr*.*")
process Compute_MAF{ process Compute_MAF{
input: input:
file ref_panel path ref_panel
val chr val chr
output: output:
file "*.frq" path "*.frq", emit: freq
""" """
echo "Compute_MAF" echo "Compute_MAF"
...@@ -48,13 +18,13 @@ process Compute_MAF{ ...@@ -48,13 +18,13 @@ process Compute_MAF{
} }
process Create_WG_reference_panel{ process Create_WG_reference_panel{
publishDir "${launchDir}/Ref_Panel", pattern: "*.csv", mode: 'copy' publishDir "${params.output_folder}/Ref_Panel", pattern: "*.csv", mode: 'copy'
input: input:
file maf_files path maf_files
file chr_files path chr_files
output: output:
file "1000G_${params.ancestry}_0_01.csv" path "1000G_${params.ancestry}_0_01.csv", emit: ref_panel
""" """
#!/usr/bin/env python3 #!/usr/bin/env python3
import subprocess as sub import subprocess as sub
...@@ -65,7 +35,8 @@ process Create_WG_reference_panel{ ...@@ -65,7 +35,8 @@ process Create_WG_reference_panel{
pref="${params.prefix}" pref="${params.prefix}"
suf="${params.suffix}" suf="${params.suffix}"
refchr_list = [] refchr_list = []
for chrom in range(1,23): for chrom in [*range(1,23), "X"]:
print(chrom)
fi = "{0}{1}{2}.bim".format(pref,chrom,suf) fi = "{0}{1}{2}.bim".format(pref,chrom,suf)
print(fi) print(fi)
print(type(fi)) print(type(fi))
...@@ -80,11 +51,3 @@ process Create_WG_reference_panel{ ...@@ -80,11 +51,3 @@ process Create_WG_reference_panel{
ref.loc[~(ref.A1+ref.A2).isin(["AT", 'TA','CG','GC'])][["CHR","SNP", "MAF", "pos", "A1", "A2"]].to_csv("1000G_${params.ancestry}_0_01.csv", index=False, header=False, sep="\t") ref.loc[~(ref.A1+ref.A2).isin(["AT", 'TA','CG','GC'])][["CHR","SNP", "MAF", "pos", "A1", "A2"]].to_csv("1000G_${params.ancestry}_0_01.csv", index=False, header=False, sep="\t")
""" """
} }
workflow {
Compute_MAF(ref_chr_channel3.collect(), chr_channel)
create_WG_reference_panel(Compute_MAF.out.collect(), ref_chr_channel.collect())
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment