diff --git a/README.md b/README.md index 07fc7047fdb1ce13caf516b298bfc65b2a2acbc0..4f10cc337a0266cb2b2ff860165a271adb36a301 100644 --- a/README.md +++ b/README.md @@ -46,4 +46,8 @@ The following Item are necessary to run JASS pipeline on real data * --output_folder : A path toward a folder to write pipeline results (inittable, worktable...). by default results will be publish in the workflow directory. -Parameters can be specified in command line or by editing the +## Usage Example on HPC Cluster + +If you are working with a HPC server (Slurm job scheduler), you can adapt the nextflow_sbatch.config file and launch the pipeline with a command like: + +sbatch --mem-per-cpu 32G -p common,dedicated,ggs --qos=long --wrap "module load java/13.0.2;module load singularity/3.8.3;module load graphviz/2.42.3;./nextflow run imputation_only.nf -with-report imput_report.html -with-timeline imput_timeline.html -c nextflow_sbatch.config -qs 300" diff --git a/correlation.nf b/correlation.nf new file mode 100644 index 0000000000000000000000000000000000000000..f0e57eae76d3a3ab8d9a95356d0a92d1c2b3492f --- /dev/null +++ b/correlation.nf @@ -0,0 +1,73 @@ +params.output_folder = "${baseDir}" + +/* Script channels*/ +extract_sample_size_script_channel = Channel.fromPath("${baseDir}/extract_sample_size.py") +generate_trait_pairs_channel = Channel.fromPath("${baseDir}/generate_trait_pairs.py") +parse_correlation_channel = Channel.fromPath("${baseDir}/parse_correlation_results.py") + +ldsc_data_channel = Channel.fromPath("${baseDir}/ldsc_data/data_*.sumstats.gz") +ldsc_data_channel_bis = Channel.fromPath("${baseDir}/ldsc_data/data_*.sumstats.gz") +/* + process related to LD-score calculation +*/ + + +process Generate_trait_pair { + time '1h' + queue 'dedicated,common,ggs' + input: + file generate_trait_pairs_script from generate_trait_pairs_channel + file ldsc_data from ldsc_data_channel.unique().collect() + output: + file "pairs_chunk_*.txt" into combi_channel mode flatten + + """ + python3 ${generate_trait_pairs_script} + """ +} + +process Correlation_LDSC_data { + memory {8.GB * task.attempt} + time {24.h * task.attempt} + queue 'dedicated,common,ggs' + publishDir "${params.output_folder}/cor_data/", pattern: "*.log", mode: 'copy' + input: + file trait_pair from combi_channel + file ldsc_data from ldsc_data_channel_bis.unique().collect() + output: + file "*.log" into cor_log_channel + + """ + export OMP_NUM_THREADS=1 + echo ${trait_pair} + IFS=';' read -ra my_trait <<< "\$(cat ${trait_pair})" + i=1 + + for trait_p in \${my_trait[@]} + do + + echo \$trait_p + trait1=\$(echo \$trait_p | cut -d '.' -f1 | cut -d '_' -f2,3) + trait2=\$(echo \$trait_p | cut -d ',' -f2 | cut -d '.' -f1 | cut -d '_' -f2,3) + + ldsc.py --rg \$trait_p --out \${trait1}-_-\${trait2} --ref-ld-chr ${baseDir}/eur_w_ld_chr/ --w-ld-chr ${baseDir}/eur_w_ld_chr/ + + done + """ +} + + +process Correlation_matrices { + + publishDir "${params.output_folder}/Correlation_matrices/", pattern: "*.csv", mode: 'copy' + time '1h' + queue 'dedicated,common,ggs' + input: + file parsing_script from parse_correlation_channel + file ldsc_data from cor_log_channel.collect() + + """ + python3 ${parsing_script} + + """ +} diff --git a/imputation_only.nf b/imputation_only.nf new file mode 100644 index 0000000000000000000000000000000000000000..acc296f3579290ec5b58d8e12ca163207c3f7232 --- /dev/null +++ b/imputation_only.nf @@ -0,0 +1,39 @@ +params.ref_panel = '/pasteur/zeus/projets/p02/GGS_JASS/1._DATA/ImpG_refpanel/' + +params.region = "${baseDir}/input_files/fourier_ls-all.bed" +params.output_folder = "${baseDir}" + +Region_channel = Channel.fromPath(params.region) +chr_channel = Channel.from(1..22) + +ref_chr_channel=Channel.fromPath(params.ref_panel+"/chr*.eur.1pct.bim") +ld_channel=Channel.fromPath("/pasteur/zeus/projets/p02/GGS_JASS/WKD_Hanna/impute_for_jass/ld_block_new_plink/*.ld") + +harmonized_GWAS_files=Channel.fromPath("/pasteur/zeus/projets/p02/GGS_JASS/jass_analysis_pipeline/harmonized_GWAS_files/*.txt") + +process Impute_GWAS { + publishDir "${params.output_folder}", pattern: "imputed_GWAS/*.txt", mode: 'copy' + memory {8.GB * task.attempt} + time {24.h * task.attempt} + maxRetries = 4 + queue 'dedicated,ggs,common' + input: + file gwas_files from harmonized_GWAS_files + file ref_file from ref_chr_channel.collect() + file ld_file from ld_channel.collect() + output: + file "imputed_GWAS/*.txt" into imputed_gwas_channel + file "imputed_GWAS/*.txt" into imputed_gwas_channel2 + script: + """ + mkdir -p imputed_GWAS + + chrom=\$(echo ${gwas_files} | cut -d '_' -f4 | cut -d "." -f1) + study=\$(echo ${gwas_files} | cut -d '_' -f2,3) + + echo \$chrom + echo \$study + + raiss --chrom \${chrom} --gwas \${study} --ref-folder ./ --R2-threshold 0.6 --eigen-threshold 1.0 --ld-folder ./ --zscore-folder ./ --output-folder ./imputed_GWAS --ref-panel-suffix .eur.1pct.bim + """ +} diff --git a/nextflow.config b/nextflow.config deleted file mode 100644 index 69fcf7f9be93899d9f65ce670b234221ac9dacc6..0000000000000000000000000000000000000000 --- a/nextflow.config +++ /dev/null @@ -1,96 +0,0 @@ -dag { - enabled = true - file = 'dag.dot' -} - -report { - enabled = true - file = 'nextflow_logs/report.html' -} - -trace { - enabled = true - file = 'nextflow_logs/trace.txt' -} - -singularity { - enabled = true - autoMounts = true - runOptions = '--home $HOME:/home/$USER' -} - -process{ - executor='local' - maxErrors=10 - maxRetries=3 - maxForks=400 - queueSize=500 - errorStrategy='finish' - cache='deep' - - - withName: 'Compute_MAF' { - container='docker://quay.io/biocontainers/plink:1.90b5--heea4ae3_1' - cpus=1 - } - - withName: 'create_WG_reference_panel' { - container='docker://quay.io/biocontainers/jass_preprocessing:2.0.1--py_0' - cpus=1 - } - - withName: 'meta_data_GWAS' { - cpus=1 - } - - withName: 'Clean_GWAS' { - - cpus=1 - } - - withName: 'Impute_GWAS' { - container='docker://quay.io/biocontainers/raiss:3.1--pyhdfd78af_0' - cpus=1 - } - - withName: 'Munge_LDSC_data' { - - container='docker://quay.io/biocontainers/ldsc:1.0.1--py_0' - cpus=1 - } - - withName: 'Generate_trait_pair' { - container='docker://quay.io/biocontainers/jass_preprocessing:2.0.1--py_0' - cpus=1 - } - - withName: 'Correlation_LDSC_data' { - container="docker://quay.io/biocontainers/ldsc:1.0.1--py_0" - cpus=1 - } - - withName: 'Correlation_matrices' { - container='docker://quay.io/biocontainers/jass_preprocessing:2.0.1--py_0' - cpus=1 - } - - withName: 'Create_inittable_LDSC' { - container='docker://quay.io/biocontainers/jass:2.0--pyh5ca1d4c_0' - cpus=1 - } - - withName: 'Create_inittable' { - container='docker://quay.io/biocontainers/jass:2.0--pyh5ca1d4c_0' - cpus=1 - } - - withName: 'get_pheno_group' { - cpus=1 - } - - withName: 'Create_project_data' { - container='docker://quay.io/biocontainers/jass:2.0--pyh5ca1d4c_0' - cpus=1 - } - -} diff --git a/nextflow_slurm.config b/nextflow_sbatch.config similarity index 57% rename from nextflow_slurm.config rename to nextflow_sbatch.config index 57e1f90e672e25cbda4d0da0ee46dbee0c7ecc57..58c62cfb69848e033a831b8dba8c38a1a51294f7 100644 --- a/nextflow_slurm.config +++ b/nextflow_sbatch.config @@ -16,27 +16,28 @@ trace { singularity { enabled = true autoMounts = true - runOptions = '--home $HOME:/home/$USER' + runOptions = '--home $HOME:/home/$USER -B /pasteur/zeus/projets/p02/GGS_JASS/jass_analysis_pipeline/' +} +executor { + submitRateLimit = '10 sec' } - process{ - executor='local' - maxErrors=10 - maxRetries=3 + executor='slurm' + maxErrors=20 + maxRetries=4 maxForks=400 - queueSize=500 - errorStrategy='finish' + queueSize = 500 + errorStrategy='retry' cache='deep' - withName: 'Compute_MAF' { - container='plink_1.90b5--heea4ae3_0.sif' - cpus=1 - } - + withName: 'Compute_MAF' { + container='plink_1.90b5--heea4ae3_0.sif' + cpus=1 + } withName: 'create_WG_reference_panel' { - container='jass_preprocessing_2.0--py_0.sif' + container='jass_preprocessing_2.1--pyhdfd78af_0.sif' cpus=1 } @@ -45,12 +46,13 @@ process{ } withName: 'Clean_GWAS' { -// container='jass_preprocessing_2.0--py_0.sif' - cpus=1 + memory = '16G' + cpus=1 } withName: 'Impute_GWAS' { - container="raiss_2.0--py_0.sif" + memory = '8G' + container='raiss_3.1--pyhdfd78af_0.sif' cpus=1 } @@ -60,7 +62,7 @@ process{ } withName: 'Generate_trait_pair' { - container='jass_preprocessing_2.0--py_0.sif' + container='jass_preprocessing_2.1--pyhdfd78af_0.sif' cpus=1 } @@ -70,17 +72,17 @@ process{ } withName: 'Correlation_matrices' { - container='jass_preprocessing_2.0--py_0.sif' + container='jass_preprocessing_2.1--pyhdfd78af_0.sif' cpus=1 } withName: 'Create_inittable_LDSC' { - container='jass_2.0--pyh5ca1d4c_0.sif' + container='jass_2.2--pyhb7b1952_0.sif' cpus=1 } withName: 'Create_inittable' { - container='jass_2.0--pyh5ca1d4c_0.sif' + container='jass_2.2--pyhb7b1952_0.sif' cpus=1 } @@ -89,7 +91,7 @@ process{ } withName: 'Create_project_data' { - container='jass_2.0--pyh5ca1d4c_0.sif' + container='jass_2.2--pyhb7b1952_0.sif' cpus=1 }