Skip to content
Snippets Groups Projects
Commit 2ac00394 authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

Merge branch 'raiss_report_integration' into 'master'

Raiss report integration

See merge request !3
parents 48df1edc 40ecb43d
No related branches found
No related tags found
1 merge request!3Raiss report integration
Source diff could not be displayed: it is stored in LFS. Options to address this: view the blob.
Source diff could not be displayed: it is stored in LFS. Options to address this: view the blob.
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
JASS suite pipeline
authors : Hanna Julienne, Hervé Ménager & Lucie Troubat
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
/* Parameter to set if optional pipeline steps are performed */
params.compute_project=false
/* path of input data */
params.ref_panel = '/pasteur/zeus/projets/p02/GGS_WKD/DATA_1000G/Panels/EAS/'
params.region = '/pasteur/zeus/projets/p02/GGS_WKD/DATA_1000G/Panels/Regions_LD/EAS/All_Regions_ALL_ensemble_1000G_hg38_EAS.bed'
params.output_folder = "${launchDir}"
params.ancestry="EAS"
params.prefix="ALL_ensemble_1000G_hg38_EAS_chr"
params.prefix_Impute_GWAS="ALL_ensemble_1000G_hg38_EAS_"
params.suffix=""
chr_channel = Channel.from(1..22)
ref_chr_channel=Channel.fromPath(params.ref_panel+"/ALL_ensemble_1000G_hg38_EAS_chr*.bim")
ref_chr_channel2=Channel.fromPath(params.ref_panel+"/ALL_ensemble_1000G_hg38_EAS_chr*.bim")
ref_chr_channel3=Channel.fromPath(params.ref_panel+"/ALL_ensemble_1000G_hg38_EAS_chr*.*")
process Compute_MAF{
input:
file ref_panel from ref_chr_channel3.collect()
val chr from chr_channel
output:
file "*.frq" into MAF_channel
"""
echo "Compute_MAF"
bfile="${params.prefix}${chr}${params.suffix}"
echo \$bfile
plink --bfile \${bfile} --freq --out ./chr${chr}
"""
}
process create_WG_reference_panel{
publishDir "${launchDir}/Ref_Panel", pattern: "*.csv", mode: 'copy'
input:
file maf_files from MAF_channel.collect()
file chr_files from ref_chr_channel.collect()
output:
file "1000G_${params.ancestry}_0_01.csv" into ref_panel_wg_channel
"""
#!/usr/bin/env python3
import subprocess as sub
import pandas as pd
import os
cwd = os.getcwd()
print(cwd)
pref="${params.prefix}"
suf="${params.suffix}"
refchr_list = []
for chrom in range(1,23):
fi = "{0}{1}{2}.bim".format(pref,chrom,suf)
print(fi)
print(type(fi))
position = pd.read_csv(fi, sep='\t', names=['chr', "rsid", "?", "pos", "ref_al", "alt_al"])
position.set_index("rsid", inplace=True)
ref_chr = pd.read_csv("./chr{0}.frq".format(chrom), sep="\\s+")
ref_chr['pos'] = position.loc[ref_chr.SNP, "pos"].values
refchr_list.append(ref_chr[["CHR", "pos", "SNP", "A1", "A2", "MAF"]])
ref= pd.concat(refchr_list)
ref.loc[~(ref.A1+ref.A2).isin(["AT", 'TA','CG','GC'])][["CHR","SNP", "MAF", "pos", "A1", "A2"]].to_csv("1000G_${params.ancestry}_0_01.csv", index=False, header=False, sep="\t")
"""
}
params.ref_panel = '/pasteur/zeus/projets/p02/GGS_WKD/DATA_GnomAD/hg37/FINNS/ref_panel'
params.output_folder = "/pasteur/zeus/projets/p02/GGS_JASS/DATA_BATCH_04_11_2022/"
ref_chr_channel2=Channel.fromPath(params.ref_panel+"/chr*.bim")
ld_channel=Channel.fromPath("/pasteur/zeus/projets/p02/GGS_WKD/DATA_GnomAD/hg37/FINNS/ld_mat/*")
cleaned_gwas_chr_channel = Channel.fromPath("/pasteur/zeus/projets/p02/GGS_JASS/DATA_BATCH_04_11_2022/harmonized_new_index/z_*chr22.txt")
process perf_raiss {
publishDir "${params.output_folder}", pattern: "imputed_gnomad/*.txt", mode: 'copy'
input:
file gwas_files from cleaned_gwas_chr_channel
file ref_file from ref_chr_channel2.collect()
file ld_file from ld_channel.collect()
output:
file "imputed_gnomad/*.txt" into imputed_gwas_channel
script:
"""
mkdir -p imputed_gnomad
mkdir -p masked_zscore
mkdir -p raiss_report
chrom=\$(echo ${gwas_files} | cut -d '_' -f4 | cut -d "." -f1)
study=\$(echo ${gwas_files} | cut -d '_' -f2,3)
echo \$chrom
echo \$study
raiss --ld-folder ./ --ref-folder ./ --gwas \$study --chrom chr22 --ld-type scipy performance-grid-search --harmonized-folder ./ --masked-folder ./masked_zscore/ --imputed-folder ./imputed_gnomad/ --output-path ./raiss_report --eigen-ratio-grid '[0.000001, 0.1, 0.001]' --ld-threshold-grid '[0,10]' --n-cpu 8
"""
}
params.output_folder = "/pasteur/zeus/projets/p02/GGS_JASS/DATA_BATCH_04_11_2022/"
params.ancestry = ""
params.harmonized_files = ""
process Sanity_checks {
publishDir "${params.output_folder}", pattern: "sanity_checks/*.txt", mode: 'copy'
input:
path harmonized_files from params.harmonized_files
path imputed_files from params.imputed_files
script:
"""
trait=`cut --delimiter '.' -f 1 <<< ${trait_file}`
echo \$trait
ls
mkdir -p sanity_checks_${params.EUR}
raiss sanity-check --trait \${trait} --harmonized-folder ${params.harmonized_files} --imputed-folder ${params.imputed_files} --output-path ./sanity_checks/sanity_report_${params.ancestry}
"""
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment