Commit 3e65e282 authored by Hanna  JULIENNE's avatar Hanna JULIENNE

describes parameters

parent f80d131c
......@@ -11,13 +11,16 @@ The current pipeline integrate the following workflow:
The following Item are necessary to run JASS pipeline
1. --input_folder : A path toward a meta-data file describing GWAS (see example file in ./input_files/test1.csv)
1. --meta_data : A path toward a meta-data file describing GWAS (see example file in ./input_files/test1.csv)
2. --gwas_folder : A path toward a folder containing the summary statistics to analyze
3. A folder containing a Reference Panel in the .bim, .bed, .fam format
4. If you wish to compute joint analyses with the pipeline, a group file with the each phenotype group written on a separated line
3. --ref_panel :A folder containing a Reference Panel in the .bim, .bed, .fam format
4. --ld-folder : A path toward a folder containing LD matrices (that can be generated from the reference panel with the raiss package as described here : http://statistical-genetics.pages.pasteur.fr/raiss/#precomputation-of-ld-correlation)
5. --group If you wish to compute joint analyses with the pipeline, a group file with the each phenotype group written on a separated line
## Optional parameters
. A path toward a folder to write pipeline results (inittable, worktable...). by default results will be publish in the workflow directory.
* --output_folder : A path toward a folder to write pipeline results (inittable, worktable...). by default results will be publish in the workflow directory.
## Usage of nextflow pipeline
......
params.compute_project=false
params.compute_project=true
params.compute_LDSC_matrix=true
params.compute_imputation=false
params.compute_imputation=true
params.meta_data = "test_final.csv"
params.input_folder = ${baseDir}+"/input_files/"
params.meta_data = "${baseDir}"+"/input_files/test_final.csv"
params.gwas_folder = "/pasteur/projets/policy01/PCMA/1._DATA/RAW.GWAS/"
params.ref_panel = '/pasteur/projets/policy01/PCMA/1._DATA/ImpG_refpanel/'
params.group = params.input_folder+"/group.txt"
params.group = "${baseDir}/input_files/group.txt"
group = file(params.group)
params.region = "${baseDir}/input_files/fourier_ls-all.bed"
params.output_folder = "$baseDir"
params.region = params.input_folder+"fourier_ls-all.bed"
params.output_folder = $baseDir
GWAS_labels = params.input_folder + params.input_folder
diagnostic_folder= params.output_folder + "/sample_size/"
harmonized_GWAS_folder = params.output_folder + "harmonized_GWAS_by_chr/"
harmonized_GWAS_1_file_folder = params.output_folder +"harmonized_GWAS_WG/"
Region_channel = Channel.fromPath(params.region)
Region_channel2 = Channel.fromPath(params.region)
diagnostic_folder= params.output_folder + "/sample_size/"
harmonized_GWAS_folder = params.output_folder + "harmonized_GWAS_by_chr/"
harmonized_GWAS_1_file_folder = params.output_folder + "harmonized_GWAS_WG/"
chr_channel = Channel.from(1..22)
ref_chr_channel=Channel.fromPath("/pasteur/projets/policy01/PCMA/1._DATA/ImpG_refpanel/chr*.eur.1pct.bim")
ref_chr_channel2=Channel.fromPath("/pasteur/projets/policy01/PCMA/1._DATA/ImpG_refpanel/chr*.eur.1pct.bim")
ref_chr_channel3=Channel.fromPath("/pasteur/projets/policy01/PCMA/1._DATA/ImpG_refpanel/chr*.eur.1pct.*")
ref_chr_channel=Channel.fromPath(params.ref_panel+"/chr*.eur.1pct.bim")
ref_chr_channel2=Channel.fromPath(params.ref_panel+"/chr*.eur.1pct.bim")
ref_chr_channel3=Channel.fromPath(params.ref_panel+"/chr*.eur.1pct.*")
ld_channel=Channel.fromPath("/pasteur/projets/policy01/PCMA/WKD_Hanna/impute_for_jass/ld_block_new_plink/*.ld")
/* Script channels*/
extract_sample_size_script_channel = Channel.fromPath('${baseDir}/extract_sample_size.py')
generate_trait_pairs_channel = Channel.fromPath('${baseDir}/generate_trait_pairs.py')
parse_correlation_channel = Channel.fromPath('${baseDir}/parse_correlation_results.py')
extract_sample_size_script_channel = Channel.fromPath("${baseDir}/extract_sample_size.py")
generate_trait_pairs_channel = Channel.fromPath("${baseDir}/generate_trait_pairs.py")
parse_correlation_channel = Channel.fromPath("${baseDir}/parse_correlation_results.py")
......@@ -97,21 +91,21 @@ process meta_data_GWAS{
output:
file "meta_data_chk*" into meta_data mode flatten
"""
d=`wc -l ${GWAS_labels}`
d=`wc -l ${params.meta_data}`
e=`echo \$d | cut -d ' ' -f 1`
for ((i = 2; i <= \$e; i++));
do
head -n1 ${GWAS_labels} > "meta_data_chk\$i.csv"
head -n \$i ${GWAS_labels} | tail -n 1 >> "meta_data_chk\$i.csv"
head -n1 ${params.meta_data} > "meta_data_chk\$i.csv"
head -n \$i ${params.meta_data} | tail -n 1 >> "meta_data_chk\$i.csv"
done
"""
}
process Clean_GWAS {
//publishDir "${output_folder}/harmonized_GWAS", pattern: "*.txt", mode: 'copy'
//publishDir "${output_folder}", pattern: "harmonized_GWAS_1_file/*.txt", mode: 'copy'
publishDir "${output_folder}/harmonized_GWAS_files/", pattern: "*.txt", mode: 'copy'
publishDir "${output_folder}", pattern: "*.txt", mode: 'copy'
input:
file ref_panel from ref_panel_wg_channel
......@@ -139,7 +133,7 @@ process Clean_GWAS {
process Impute_GWAS {
//publishDir "${output_folder}", pattern: "imputed_GWAS/*.txt", mode: 'copy'
publishDir "${output_folder}", pattern: "imputed_GWAS/*.txt", mode: 'copy'
memory '8 GB'
time '8h'
input:
......@@ -188,11 +182,10 @@ process Do_not_Impute_GWAS {
}
/*
process related to LD-score calculation
process related to LD-score calculation
*/
process Munge_LDSC_data {
//publishDir "${baseDir}", pattern: "ldsc_data/data_*.sumstats.gz", mode: 'copy'
publishDir "${output_folder}", pattern: "ldsc_data/data_*.sumstats.gz", mode: 'copy'
input:
file clean_gwas from cleaned_gwas
......@@ -203,7 +196,7 @@ process Munge_LDSC_data {
when:
params.compute_LDSC_matrix
"""
Nsamp=\$(python2.7 ${extract_sample_size_script} ${clean_gwas} ${GWAS_labels})
Nsamp=\$(python2.7 ${extract_sample_size_script} ${clean_gwas} ${params.meta_data})
if [ ! -d "ldsc_data" ]
then
......@@ -236,7 +229,7 @@ process Generate_trait_pair {
process Correlation_LDSC_data {
//publishDir "${baseDir}/cor_data/", pattern: "*.log", mode: 'copy'
publishDir "${output_folder}/cor_data/", pattern: "*.log", mode: 'copy'
input:
file trait_pair from combi_channel
file ldsc_data from ldsc_data_channel_bis.collect()
......@@ -266,7 +259,7 @@ process Correlation_LDSC_data {
process Correlation_matrices {
//publishDir "${baseDir}/Correlation_matrices/", pattern: "*.csv", mode: 'copy'
publishDir "${output_folder}/Correlation_matrices/", pattern: "*.csv", mode: 'copy'
input:
file parsing_script from parse_correlation_channel
......@@ -300,7 +293,7 @@ process Create_inittable_LDSC {
date_init=\$(date +"%m_%d_%Y-%H:%M")
init_name="inittable_LDSC_\$date_init.hdf5"
jass create-inittable --input-data-path "\$e" --init-covariance-path ${Covariance_H0} --regions-map-path ${Regions} --description-file-path ${GWAS_labels} --init-table-path \$init_name
jass create-inittable --input-data-path "\$e" --init-covariance-path ${Covariance_H0} --regions-map-path ${Regions} --description-file-path ${params.meta_data} --init-table-path \$init_name
"""
}
......@@ -324,7 +317,7 @@ process Create_inittable {
date_init=\$(date +"%m_%d_%Y-%H:%M")
init_name="inittable_NO_LDSC_\$date_init.hdf5"
jass create-inittable --input-data-path "\$e" --regions-map-path ${Regions} --description-file-path ${GWAS_labels} --init-table-path inittable_no_LDSC.hdf5 --init-covariance-path ${baseDir}/Correlation_matrices/Covariance_matrix_H0.csv
jass create-inittable --input-data-path "\$e" --regions-map-path ${Regions} --description-file-path ${params.meta_data} --init-table-path inittable_no_LDSC.hdf5 --init-covariance-path ${baseDir}/Correlation_matrices/Covariance_matrix_H0.csv
"""
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment