From 4c555cc4870308f8977a65f147a6303cd3d1c7e8 Mon Sep 17 00:00:00 2001 From: hjulienn <hanna.julienne@pasteur.fr> Date: Wed, 11 Oct 2023 10:00:49 +0200 Subject: [PATCH] tutorial works with DSL2 --- README.md | 21 +++------------------ input_files/group.txt | 3 ++- jass_pipeline.nf | 28 +++++++++++++--------------- 3 files changed, 18 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index cc30a98..eda3624 100644 --- a/README.md +++ b/README.md @@ -25,26 +25,11 @@ Clone the current repository locally: ``` git clone https://gitlab.pasteur.fr/statistical-genetics/jass_suite_pipeline.git ``` -<!-- -download the test data through the interface, using wget or git lfs -and place it in the ./test_data/hg38_EAS folder. +[!NOTE] +The pipeline has been upgraded to nextflow DSL2 syntax recently. If you wish to use the previous version in DSL1, you find it in ./old_versions and run it with previous version of nextflow ("NXF_VER=22.10.5 nextflow run jass_pipeline.nf ....") -Option with wget -``` -cd ${PATH_TO_PIPELINE_FOLDER}/test_data/hg38_EAS/ -wget https://gitlab.pasteur.fr/statistical-genetics/jass_suite_pipeline/-/raw/pipeline_ancestry/test_data/hg38_EAS/RBC_EAS_chr22.tsv?inline=false && mv RBC_EAS_chr22.tsv\?inline\=false RBC_EAS_chr22.tsv -wget https://gitlab.pasteur.fr/statistical-genetics/jass_suite_pipeline/-/raw/pipeline_ancestry/test_data/hg38_EAS/PLT_EAS_chr22.tsv?inline=false && mv PLT_EAS_chr22.tsv\?inline\=false PLT_EAS_chr22.tsv -wget https://gitlab.pasteur.fr/statistical-genetics/jass_suite_pipeline/-/raw/pipeline_ancestry/test_data/hg38_EAS/WBC_EAS_chr22.tsv?inline=false && mv WBC_EAS_chr22.tsv\?inline\=false WBC_EAS_chr22.tsv -``` -Option with git-lfs (require installing git lfs) -``` - git lfs pull --include PLT_EAS_chr22.tsv - git lfs pull --include RBC_EAS_chr22.tsv - git lfs pull --include WBC_EAS_chr22.tsv -``` ---> Test data are located in the ${PATH_TO_PIPELINE_FOLDER}/test_data/hg38_EAS/ folder These are extracts of summary statistics from a trans ancestry GWAS on blood traits ([Chen et al](https://www.sciencedirect.com/science/article/pii/S0092867420308229?via%3Dihub)): WBC, White blood cell count; RBC, Red blood cell count; PLT, platelet count. @@ -53,7 +38,7 @@ They correspond to the chromosome 21 and 22 for the East asian ancestry. Once done you can launch the pipeline as: ``` - NXF_VER=22.10.5 nextflow run jass_pipeline.nf --ref_panel_WG {ABSOLUTE_PATH_TO_PIPELINE_FOLDER}Ref_Panel/1000G_EAS_0_01_chr22_21.csv --gwas_folder {ABSOLUTE_PATH_TO_PIPELINE_FOLDER}/test_data/hg38_EAS/ --meta-data {ABSOLUTE_PATH_TO_PIPELINE_FOLDER}/input_files/Data_test_EAS.csv --region {ABSOLUTE_PATH_TO_PIPELINE_FOLDER}/input_files/All_Regions_ALL_ensemble_1000G_hg38_EAS.bed --group {ABSOLUTE_PATH_TO_PIPELINE_FOLDER}/input_files/group.txt -with-report jass_report.html -c nextflow_local.config + nextflow run jass_pipeline.nf --ref_panel_WG {ABSOLUTE_PATH_TO_PIPELINE_FOLDER}Ref_Panel/1000G_EAS_0_01_chr22_21.csv --gwas_folder {ABSOLUTE_PATH_TO_PIPELINE_FOLDER}/test_data/hg38_EAS/ --meta-data {ABSOLUTE_PATH_TO_PIPELINE_FOLDER}/input_files/Data_test_EAS.csv --region {ABSOLUTE_PATH_TO_PIPELINE_FOLDER}/input_files/All_Regions_ALL_ensemble_1000G_hg38_EAS.bed --group {ABSOLUTE_PATH_TO_PIPELINE_FOLDER}/input_files/group.txt -with-report jass_report.html -c nextflow_local.config ``` See the description of required parameters in the next section. You can specify parameter in the jass_pipeline.nf header if prefered. diff --git a/input_files/group.txt b/input_files/group.txt index f20cd7a..558817a 100644 --- a/input_files/group.txt +++ b/input_files/group.txt @@ -1 +1,2 @@ -GRP1;z_SHRINE_FVC z_SHRINE_PEF z_SHRINE_FEV1 z_SHRINE_RATIO +GRP1;z_BCT_WBC z_BCT_PLT z_BCT_RBC +GRP2;z_BCT_PLT z_BCT_RBC \ No newline at end of file diff --git a/jass_pipeline.nf b/jass_pipeline.nf index 9841c2f..6c56ddd 100644 --- a/jass_pipeline.nf +++ b/jass_pipeline.nf @@ -8,24 +8,23 @@ nextflow.enable.dsl=2 /* Parameter to set if optional pipeline steps are performed */ params.compute_project=true // Compute JASS runs -params.compute_LDSC_matrix=true // Infer the genetic covariance and residual covariance using the LDscore regression (Bulik-Sullivan, et al, 2015). The residual covariance is necessary to perform multi-trait GWAS (see julienne, et al 2021) If set to false, the residual covariance will be infered from Zscores -params.compute_imputation=true +params.compute_LDSC_matrix=false // Infer the genetic covariance and residual covariance using the LDscore regression (Bulik-Sullivan, et al, 2015). The residual covariance is necessary to perform multi-trait GWAS (see julienne, et al 2021) If set to false, the residual covariance will be infered from Zscores +params.compute_imputation=false /* Path of input data */ -params.meta_data = "${projectDir}"+"/input_files/Meta_data_preliminary_analysis.csv" // file describing gwas summary statistic format -params.gwas_folder = "${projectDir}" + "/RAW_SUMSTAT/" - -// //"/pasteur/zeus/projets/p02/GGS_JASS/1._DATA/Summary_stat_hg38/EAS/" +params.meta_data = "${projectDir}"+"/input_files/Data_test_EAS.csv" + // "${projectDir}"+"/input_files/Meta_data_preliminary_analysis.csv" file describing gwas summary statistic format +params.gwas_folder = "${projectDir}"+'/test_data/hg38_EAS/' params.region = Channel.fromPath("${projectDir}"+"/input_files/All_Regions_ALL_ensemble_1000G_hg38_EUR.bed") -params.ref_panel_WG = "${projectDir}"+"/Ref_Panel/1000G_EUR_0_01.csv" -//"${projectDir}/Ref_Panel/1000G_SAS_0_01_chr22.csv" +params.ref_panel_WG = "${projectDir}"+"/Ref_Panel/1000G_EAS_0_01_chr22_21.csv" + -params.ancestry="EUR" +params.ancestry="EAS" params.prefix="ALL_ensemble_1000G_hg38_EUR_chr" -params.prefix_Impute_GWAS="ALL_ensemble_1000G_hg38_EUR_" +params.prefix_Impute_GWAS="ALL_ensemble_1000G_hg38_EAS_" params.suffix="" -params.LD_SCORE_folder='/pasteur/zeus/projets/p02/GGS_WKD/DATA_1000G/Panels/Matrix_LDSCORE/EUR/' +params.LD_SCORE_folder='/pasteur/zeus/projets/p02/GGS_WKD/DATA_1000G/Panels/Matrix_LDSCORE/EAS/' /* Folder to store result output */ params.output_folder = "${projectDir}" @@ -43,14 +42,13 @@ params.r2threshold = 0.6 params.eigenthreshold = 0.05 params.minimumld = 5 params.ld_type = "plink" -params.ld_folder = Channel.fromPath("/pasteur/zeus/projets/p02/GGS_WKD/DATA_1000G/Panels/Matrix_LD_RAISS/EUR/*.ld").collect() +params.ld_folder = Channel.fromPath("/pasteur/zeus/projets/p02/GGS_WKD/DATA_1000G/Panels/Matrix_LD_RAISS/EAS/*.ld").collect() params.ref_panel = '/pasteur/zeus/projets/p02/GGS_JASS/jass_analysis_pipeline/Ref_panel_by_chr/' chr_channel = Channel.from(1..22) params.ref_chr_path= Channel.fromPath("/pasteur/zeus/projets/p02/GGS_WKD/DATA_1000G/Panels/EUR/ALL_ensemble_1000G_hg38_EUR_chr*.bim").collect() -params.perform_sanity_checks=true -params.perform_accuracy_checks=true - +params.perform_sanity_checks=false +params.perform_accuracy_checks=false /* Project group */ params.group = "${projectDir}/input_files/group.txt" -- GitLab