diff --git a/Ref_Panel/1000G_AFR_0_01.csv b/Ref_Panel/1000G_AFR_0_01.csv new file mode 100755 index 0000000000000000000000000000000000000000..34732ab61353114c4ab406dce02a6e50b3520077 --- /dev/null +++ b/Ref_Panel/1000G_AFR_0_01.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:934c63984d760943b0228802f1fcbf3a778d8a3c525e3b78138948b4c07f4ab4 +size 515852597 diff --git a/Ref_Panel/1000G_EAS_0_01.csv b/Ref_Panel/1000G_EAS_0_01.csv new file mode 100755 index 0000000000000000000000000000000000000000..83ab685c2be8a4bd213ad117ab991b01600d6f0a --- /dev/null +++ b/Ref_Panel/1000G_EAS_0_01.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:254d08862169ae17658e38094ee8bddbe01571a4e56b9058713abc8b706f8081 +size 262350635 diff --git a/modules/GW_reference_panels.nf b/modules/GW_reference_panels.nf new file mode 100644 index 0000000000000000000000000000000000000000..6e02d7c7628d0f3eb4ef21bbed68c691d3d5aeb2 --- /dev/null +++ b/modules/GW_reference_panels.nf @@ -0,0 +1,82 @@ + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + JASS suite pipeline +authors : Hanna Julienne, Hervé Ménager & Lucie Troubat +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +/* Parameter to set if optional pipeline steps are performed */ +params.compute_project=false + + +/* path of input data */ +params.ref_panel = '/pasteur/zeus/projets/p02/GGS_WKD/DATA_1000G/Panels/EAS/' + +params.region = '/pasteur/zeus/projets/p02/GGS_WKD/DATA_1000G/Panels/Regions_LD/EAS/All_Regions_ALL_ensemble_1000G_hg38_EAS.bed' +params.output_folder = "${launchDir}" + +params.ancestry="EAS" +params.prefix="ALL_ensemble_1000G_hg38_EAS_chr" +params.prefix_Impute_GWAS="ALL_ensemble_1000G_hg38_EAS_" +params.suffix="" + + +chr_channel = Channel.from(1..22) + +ref_chr_channel=Channel.fromPath(params.ref_panel+"/ALL_ensemble_1000G_hg38_EAS_chr*.bim") +ref_chr_channel2=Channel.fromPath(params.ref_panel+"/ALL_ensemble_1000G_hg38_EAS_chr*.bim") +ref_chr_channel3=Channel.fromPath(params.ref_panel+"/ALL_ensemble_1000G_hg38_EAS_chr*.*") + + +process Compute_MAF{ + + input: + file ref_panel from ref_chr_channel3.collect() + val chr from chr_channel + output: + file "*.frq" into MAF_channel + """ + echo "Compute_MAF" + + bfile="${params.prefix}${chr}${params.suffix}" + echo \$bfile + + plink --bfile \${bfile} --freq --out ./chr${chr} + + """ +} + +process create_WG_reference_panel{ + publishDir "${launchDir}/Ref_Panel", pattern: "*.csv", mode: 'copy' + + input: + file maf_files from MAF_channel.collect() + file chr_files from ref_chr_channel.collect() + output: + file "1000G_${params.ancestry}_0_01.csv" into ref_panel_wg_channel + """ + #!/usr/bin/env python3 + import subprocess as sub + import pandas as pd + import os + cwd = os.getcwd() + print(cwd) + pref="${params.prefix}" + suf="${params.suffix}" + refchr_list = [] + for chrom in range(1,23): + fi = "{0}{1}{2}.bim".format(pref,chrom,suf) + print(fi) + print(type(fi)) + position = pd.read_csv(fi, sep='\t', names=['chr', "rsid", "?", "pos", "ref_al", "alt_al"]) + position.set_index("rsid", inplace=True) + ref_chr = pd.read_csv("./chr{0}.frq".format(chrom), sep="\\s+") + + ref_chr['pos'] = position.loc[ref_chr.SNP, "pos"].values + refchr_list.append(ref_chr[["CHR", "pos", "SNP", "A1", "A2", "MAF"]]) + + ref= pd.concat(refchr_list) + ref.loc[~(ref.A1+ref.A2).isin(["AT", 'TA','CG','GC'])][["CHR","SNP", "MAF", "pos", "A1", "A2"]].to_csv("1000G_${params.ancestry}_0_01.csv", index=False, header=False, sep="\t") + """ +} diff --git a/modules/run_RAISS_perf.nf b/modules/run_RAISS_perf.nf new file mode 100755 index 0000000000000000000000000000000000000000..61b6a3f592a2ee5758e16ce68f8c8c459419c7ee --- /dev/null +++ b/modules/run_RAISS_perf.nf @@ -0,0 +1,34 @@ + +params.ref_panel = '/pasteur/zeus/projets/p02/GGS_WKD/DATA_GnomAD/hg37/FINNS/ref_panel' +params.output_folder = "/pasteur/zeus/projets/p02/GGS_JASS/DATA_BATCH_04_11_2022/" +ref_chr_channel2=Channel.fromPath(params.ref_panel+"/chr*.bim") + +ld_channel=Channel.fromPath("/pasteur/zeus/projets/p02/GGS_WKD/DATA_GnomAD/hg37/FINNS/ld_mat/*") +cleaned_gwas_chr_channel = Channel.fromPath("/pasteur/zeus/projets/p02/GGS_JASS/DATA_BATCH_04_11_2022/harmonized_new_index/z_*chr22.txt") + + + +process perf_raiss { + publishDir "${params.output_folder}", pattern: "imputed_gnomad/*.txt", mode: 'copy' + + input: + file gwas_files from cleaned_gwas_chr_channel + file ref_file from ref_chr_channel2.collect() + file ld_file from ld_channel.collect() + output: + file "imputed_gnomad/*.txt" into imputed_gwas_channel + script: + """ + mkdir -p imputed_gnomad + mkdir -p masked_zscore + mkdir -p raiss_report + chrom=\$(echo ${gwas_files} | cut -d '_' -f4 | cut -d "." -f1) + study=\$(echo ${gwas_files} | cut -d '_' -f2,3) + + echo \$chrom + echo \$study + + raiss --ld-folder ./ --ref-folder ./ --gwas \$study --chrom chr22 --ld-type scipy performance-grid-search --harmonized-folder ./ --masked-folder ./masked_zscore/ --imputed-folder ./imputed_gnomad/ --output-path ./raiss_report --eigen-ratio-grid '[0.000001, 0.1, 0.001]' --ld-threshold-grid '[0,10]' --n-cpu 8 + + """ +} diff --git a/modules/sanity_checks.nf b/modules/sanity_checks.nf new file mode 100755 index 0000000000000000000000000000000000000000..925416c13527fe38379239883a87fe00c6b6986c --- /dev/null +++ b/modules/sanity_checks.nf @@ -0,0 +1,22 @@ + + + +params.output_folder = "/pasteur/zeus/projets/p02/GGS_JASS/DATA_BATCH_04_11_2022/" +params.ancestry = "" +params.harmonized_files = "" + +process Sanity_checks { + publishDir "${params.output_folder}", pattern: "sanity_checks/*.txt", mode: 'copy' + input: + path harmonized_files from params.harmonized_files + path imputed_files from params.imputed_files + + script: + """ + trait=`cut --delimiter '.' -f 1 <<< ${trait_file}` + echo \$trait + ls + mkdir -p sanity_checks_${params.EUR} + raiss sanity-check --trait \${trait} --harmonized-folder ${params.harmonized_files} --imputed-folder ${params.imputed_files} --output-path ./sanity_checks/sanity_report_${params.ancestry} + """ + }