Commit 84bf3b6e authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

pipeline maintenance and update

parent dfede0a5
......@@ -46,4 +46,8 @@ The following Item are necessary to run JASS pipeline on real data
* --output_folder : A path toward a folder to write pipeline results (inittable, worktable...). by default results will be publish in the workflow directory.
Parameters can be specified in command line or by editing the
## Usage Example on HPC Cluster
If you are working with a HPC server (Slurm job scheduler), you can adapt the nextflow_sbatch.config file and launch the pipeline with a command like:
sbatch --mem-per-cpu 32G -p common,dedicated,ggs --qos=long --wrap "module load java/13.0.2;module load singularity/3.8.3;module load graphviz/2.42.3;./nextflow run imputation_only.nf -with-report imput_report.html -with-timeline imput_timeline.html -c nextflow_sbatch.config -qs 300"
params.output_folder = "${baseDir}"
/* Script channels*/
extract_sample_size_script_channel = Channel.fromPath("${baseDir}/extract_sample_size.py")
generate_trait_pairs_channel = Channel.fromPath("${baseDir}/generate_trait_pairs.py")
parse_correlation_channel = Channel.fromPath("${baseDir}/parse_correlation_results.py")
ldsc_data_channel = Channel.fromPath("${baseDir}/ldsc_data/data_*.sumstats.gz")
ldsc_data_channel_bis = Channel.fromPath("${baseDir}/ldsc_data/data_*.sumstats.gz")
/*
process related to LD-score calculation
*/
process Generate_trait_pair {
time '1h'
queue 'dedicated,common,ggs'
input:
file generate_trait_pairs_script from generate_trait_pairs_channel
file ldsc_data from ldsc_data_channel.unique().collect()
output:
file "pairs_chunk_*.txt" into combi_channel mode flatten
"""
python3 ${generate_trait_pairs_script}
"""
}
process Correlation_LDSC_data {
memory {8.GB * task.attempt}
time {24.h * task.attempt}
queue 'dedicated,common,ggs'
publishDir "${params.output_folder}/cor_data/", pattern: "*.log", mode: 'copy'
input:
file trait_pair from combi_channel
file ldsc_data from ldsc_data_channel_bis.unique().collect()
output:
file "*.log" into cor_log_channel
"""
export OMP_NUM_THREADS=1
echo ${trait_pair}
IFS=';' read -ra my_trait <<< "\$(cat ${trait_pair})"
i=1
for trait_p in \${my_trait[@]}
do
echo \$trait_p
trait1=\$(echo \$trait_p | cut -d '.' -f1 | cut -d '_' -f2,3)
trait2=\$(echo \$trait_p | cut -d ',' -f2 | cut -d '.' -f1 | cut -d '_' -f2,3)
ldsc.py --rg \$trait_p --out \${trait1}-_-\${trait2} --ref-ld-chr ${baseDir}/eur_w_ld_chr/ --w-ld-chr ${baseDir}/eur_w_ld_chr/
done
"""
}
process Correlation_matrices {
publishDir "${params.output_folder}/Correlation_matrices/", pattern: "*.csv", mode: 'copy'
time '1h'
queue 'dedicated,common,ggs'
input:
file parsing_script from parse_correlation_channel
file ldsc_data from cor_log_channel.collect()
"""
python3 ${parsing_script}
"""
}
params.ref_panel = '/pasteur/zeus/projets/p02/GGS_JASS/1._DATA/ImpG_refpanel/'
params.region = "${baseDir}/input_files/fourier_ls-all.bed"
params.output_folder = "${baseDir}"
Region_channel = Channel.fromPath(params.region)
chr_channel = Channel.from(1..22)
ref_chr_channel=Channel.fromPath(params.ref_panel+"/chr*.eur.1pct.bim")
ld_channel=Channel.fromPath("/pasteur/zeus/projets/p02/GGS_JASS/WKD_Hanna/impute_for_jass/ld_block_new_plink/*.ld")
harmonized_GWAS_files=Channel.fromPath("/pasteur/zeus/projets/p02/GGS_JASS/jass_analysis_pipeline/harmonized_GWAS_files/*.txt")
process Impute_GWAS {
publishDir "${params.output_folder}", pattern: "imputed_GWAS/*.txt", mode: 'copy'
memory {8.GB * task.attempt}
time {24.h * task.attempt}
maxRetries = 4
queue 'dedicated,ggs,common'
input:
file gwas_files from harmonized_GWAS_files
file ref_file from ref_chr_channel.collect()
file ld_file from ld_channel.collect()
output:
file "imputed_GWAS/*.txt" into imputed_gwas_channel
file "imputed_GWAS/*.txt" into imputed_gwas_channel2
script:
"""
mkdir -p imputed_GWAS
chrom=\$(echo ${gwas_files} | cut -d '_' -f4 | cut -d "." -f1)
study=\$(echo ${gwas_files} | cut -d '_' -f2,3)
echo \$chrom
echo \$study
raiss --chrom \${chrom} --gwas \${study} --ref-folder ./ --R2-threshold 0.6 --eigen-threshold 1.0 --ld-folder ./ --zscore-folder ./ --output-folder ./imputed_GWAS --ref-panel-suffix .eur.1pct.bim
"""
}
dag {
enabled = true
file = 'dag.dot'
}
report {
enabled = true
file = 'nextflow_logs/report.html'
}
trace {
enabled = true
file = 'nextflow_logs/trace.txt'
}
singularity {
enabled = true
autoMounts = true
runOptions = '--home $HOME:/home/$USER'
}
process{
executor='local'
maxErrors=10
maxRetries=3
maxForks=400
queueSize=500
errorStrategy='finish'
cache='deep'
withName: 'Compute_MAF' {
container='docker://quay.io/biocontainers/plink:1.90b5--heea4ae3_1'
cpus=1
}
withName: 'create_WG_reference_panel' {
container='docker://quay.io/biocontainers/jass_preprocessing:2.0.1--py_0'
cpus=1
}
withName: 'meta_data_GWAS' {
cpus=1
}
withName: 'Clean_GWAS' {
cpus=1
}
withName: 'Impute_GWAS' {
container='docker://quay.io/biocontainers/raiss:3.1--pyhdfd78af_0'
cpus=1
}
withName: 'Munge_LDSC_data' {
container='docker://quay.io/biocontainers/ldsc:1.0.1--py_0'
cpus=1
}
withName: 'Generate_trait_pair' {
container='docker://quay.io/biocontainers/jass_preprocessing:2.0.1--py_0'
cpus=1
}
withName: 'Correlation_LDSC_data' {
container="docker://quay.io/biocontainers/ldsc:1.0.1--py_0"
cpus=1
}
withName: 'Correlation_matrices' {
container='docker://quay.io/biocontainers/jass_preprocessing:2.0.1--py_0'
cpus=1
}
withName: 'Create_inittable_LDSC' {
container='docker://quay.io/biocontainers/jass:2.0--pyh5ca1d4c_0'
cpus=1
}
withName: 'Create_inittable' {
container='docker://quay.io/biocontainers/jass:2.0--pyh5ca1d4c_0'
cpus=1
}
withName: 'get_pheno_group' {
cpus=1
}
withName: 'Create_project_data' {
container='docker://quay.io/biocontainers/jass:2.0--pyh5ca1d4c_0'
cpus=1
}
}
......@@ -16,27 +16,28 @@ trace {
singularity {
enabled = true
autoMounts = true
runOptions = '--home $HOME:/home/$USER'
runOptions = '--home $HOME:/home/$USER -B /pasteur/zeus/projets/p02/GGS_JASS/jass_analysis_pipeline/'
}
executor {
submitRateLimit = '10 sec'
}
process{
executor='local'
maxErrors=10
maxRetries=3
executor='slurm'
maxErrors=20
maxRetries=4
maxForks=400
queueSize=500
errorStrategy='finish'
queueSize = 500
errorStrategy='retry'
cache='deep'
withName: 'Compute_MAF' {
container='plink_1.90b5--heea4ae3_0.sif'
cpus=1
}
withName: 'Compute_MAF' {
container='plink_1.90b5--heea4ae3_0.sif'
cpus=1
}
withName: 'create_WG_reference_panel' {
container='jass_preprocessing_2.0--py_0.sif'
container='jass_preprocessing_2.1--pyhdfd78af_0.sif'
cpus=1
}
......@@ -45,12 +46,13 @@ process{
}
withName: 'Clean_GWAS' {
// container='jass_preprocessing_2.0--py_0.sif'
cpus=1
memory = '16G'
cpus=1
}
withName: 'Impute_GWAS' {
container="raiss_2.0--py_0.sif"
memory = '8G'
container='raiss_3.1--pyhdfd78af_0.sif'
cpus=1
}
......@@ -60,7 +62,7 @@ process{
}
withName: 'Generate_trait_pair' {
container='jass_preprocessing_2.0--py_0.sif'
container='jass_preprocessing_2.1--pyhdfd78af_0.sif'
cpus=1
}
......@@ -70,17 +72,17 @@ process{
}
withName: 'Correlation_matrices' {
container='jass_preprocessing_2.0--py_0.sif'
container='jass_preprocessing_2.1--pyhdfd78af_0.sif'
cpus=1
}
withName: 'Create_inittable_LDSC' {
container='jass_2.0--pyh5ca1d4c_0.sif'
container='jass_2.2--pyhb7b1952_0.sif'
cpus=1
}
withName: 'Create_inittable' {
container='jass_2.0--pyh5ca1d4c_0.sif'
container='jass_2.2--pyhb7b1952_0.sif'
cpus=1
}
......@@ -89,7 +91,7 @@ process{
}
withName: 'Create_project_data' {
container='jass_2.0--pyh5ca1d4c_0.sif'
container='jass_2.2--pyhb7b1952_0.sif'
cpus=1
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment