Skip to content
Snippets Groups Projects
Commit d1a8d71b authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

add script to compile chromosome panel into a GW panel

parent 24a5de55
No related branches found
No related tags found
1 merge request!3Raiss report integration
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
JASS suite pipeline
authors : Hanna Julienne, Hervé Ménager & Lucie Troubat
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
/* Parameter to set if optional pipeline steps are performed */
params.compute_project=false
/* path of input data */
params.ref_panel = '/pasteur/zeus/projets/p02/GGS_WKD/DATA_1000G/Panels/EAS/'
params.region = '/pasteur/zeus/projets/p02/GGS_WKD/DATA_1000G/Panels/Regions_LD/EAS/All_Regions_ALL_ensemble_1000G_hg38_EAS.bed'
params.output_folder = "${launchDir}"
params.ancestry="EAS"
params.prefix="ALL_ensemble_1000G_hg38_EAS_chr"
params.prefix_Impute_GWAS="ALL_ensemble_1000G_hg38_EAS_"
params.suffix=""
chr_channel = Channel.from(1..22)
ref_chr_channel=Channel.fromPath(params.ref_panel+"/ALL_ensemble_1000G_hg38_EAS_chr*.bim")
ref_chr_channel2=Channel.fromPath(params.ref_panel+"/ALL_ensemble_1000G_hg38_EAS_chr*.bim")
ref_chr_channel3=Channel.fromPath(params.ref_panel+"/ALL_ensemble_1000G_hg38_EAS_chr*.*")
process Compute_MAF{
input:
file ref_panel from ref_chr_channel3.collect()
val chr from chr_channel
output:
file "*.frq" into MAF_channel
"""
echo "Compute_MAF"
bfile="${params.prefix}${chr}${params.suffix}"
echo \$bfile
plink --bfile \${bfile} --freq --out ./chr${chr}
"""
}
process create_WG_reference_panel{
publishDir "${launchDir}/Ref_Panel", pattern: "*.csv", mode: 'copy'
input:
file maf_files from MAF_channel.collect()
file chr_files from ref_chr_channel.collect()
output:
file "1000G_${params.ancestry}_0_01.csv" into ref_panel_wg_channel
"""
#!/usr/bin/env python3
import subprocess as sub
import pandas as pd
import os
cwd = os.getcwd()
print(cwd)
pref="${params.prefix}"
suf="${params.suffix}"
refchr_list = []
for chrom in range(1,23):
fi = "{0}{1}{2}.bim".format(pref,chrom,suf)
print(fi)
print(type(fi))
position = pd.read_csv(fi, sep='\t', names=['chr', "rsid", "?", "pos", "ref_al", "alt_al"])
position.set_index("rsid", inplace=True)
ref_chr = pd.read_csv("./chr{0}.frq".format(chrom), sep="\\s+")
ref_chr['pos'] = position.loc[ref_chr.SNP, "pos"].values
refchr_list.append(ref_chr[["CHR", "pos", "SNP", "A1", "A2", "MAF"]])
ref= pd.concat(refchr_list)
ref.loc[~(ref.A1+ref.A2).isin(["AT", 'TA','CG','GC'])][["CHR","SNP", "MAF", "pos", "A1", "A2"]].to_csv("1000G_${params.ancestry}_0_01.csv", index=False, header=False, sep="\t")
"""
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment