Skip to content
Snippets Groups Projects
Commit c3298448 authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

Merge branch 'master' into 'heatmap'

# Conflicts:
#   README.md
parents 2e1c3cdd 84ce2ec4
No related branches found
No related tags found
1 merge request!10Updated doc + provided easy link to reference panel
JASS Pipeline © 2019 by Institut Pasteur is licensed under Attribution-NonCommercial-ShareAlike 4.0 International. To view a copy of this license, visit http://creativecommons.org/licenses/by-nc-sa/4.0/
......@@ -81,7 +81,9 @@ Once done you can launch the pipeline using by replacing {ABSOLUTE_PATH_TO_PIPEL
nextflow run jass_pipeline.nf --ref_panel_WG {ABSOLUTE_PATH_TO_PIPELINE_FOLDER}Ref_Panel/1000G_EAS_0_01_chr22_21.csv --gwas_folder {ABSOLUTE_PATH_TO_PIPELINE_FOLDER}/test_data/hg38_EAS/ --meta-data {ABSOLUTE_PATH_TO_PIPELINE_FOLDER}/input_files/Data_test_EAS.csv --region {ABSOLUTE_PATH_TO_PIPELINE_FOLDER}/input_files/All_Regions_ALL_ensemble_1000G_hg38_EAS.bed --group {ABSOLUTE_PATH_TO_PIPELINE_FOLDER}/input_files/group.txt -with-report jass_report.html -c nextflow_local.config
```
See the description of required parameters in the next section.
You can specify parameter in the jass_pipeline.nf header if prefered.
On the command line above, parameters are passed down to nextflow directly on the command line. This should override parameters specified in the jass_pipeline.nf.
> **However, you can specify/modify parameter in the jass_pipeline.nf header if prefered.**
If all went well, you have cleaned the three summary statistic files, aligned them on the reference panel, and integrated them in one database. This database was used to perform a multi-trait GWAS on the three traits.
......@@ -153,7 +155,7 @@ and require a HPC cluster.
tar -jxvf eur_w_ld_chr.tar.bz2
```
For hg38 and Admixed American, African, East Asian, European and South-east Asian ancestries you can download our precomputed panels:
For **hg38 and Admixed American, African, East Asian, European and South-east Asian ancestries** you can download our precomputed panels:
[https://zenodo.org/record/8096589](https://zenodo.org/record/8096589)
For details on the computation method for this panel consult [https://www.biorxiv.org/content/10.1101/2023.06.23.546248v1](https://www.biorxiv.org/content/10.1101/2023.06.23.546248v1).
......
......@@ -23,6 +23,12 @@ setkey(trait_name, "ID")
Rho = fread(correlation_file)
setkey(Rho, "V1")
D = as.matrix(1-Rho[,-1])
D[is.na(D)] = 0
h = hclust(as.dist(D))
order_trait <- trait_name$ID[h$order]
Pval = fread(p_file)
setkey(Pval, "V1")
......@@ -30,7 +36,6 @@ Rho = Rho[trait_name$ID , c("V1",trait_name$ID), with=FALSE]
Pval = Pval[trait_name$ID , c("V1",trait_name$ID), with=FALSE]
cor_long = melt(as.data.table(Rho),id.vars=c('V1'))
order_trait <- trait_name$ID
pval_long =melt(as.data.table(Pval),id.vars=c('V1'))
......@@ -45,6 +50,8 @@ pval_long[,cor_val:= signif(round(cor_long$value, 2), 2)]
pval_long[,full_annot := str_trim(paste0(cor_val, "\n",pval_annote))]
pval_long$full_annot[pval_long$full_annot == "1"] <- ""
pval_long$full_annot[pval_long$full_annot == "NA"] <- ""
cor_long[, variable:=ordered(cor_long$variable, levels=order_trait)]
cor_long[, V1:=ordered(cor_long$V1, levels=order_trait)]
......@@ -65,7 +72,10 @@ pG = pG + theme(legend.key.size = unit(1, 'cm'), #change legend key size
pG = pG + geom_text(data=pval_long, aes(x=V1,y=variable, label=full_annot), size=3.5)
png(paste("HeatMap_", strsplit(Args[6], split=".", fixed=T)[[1]][1],".png", sep=""),width = 20, height = 20,
png_width <- max(length(trait_name$ID)*0.5, 10)
png_height <- max(length(trait_name$ID)*0.5, 10)
png(paste("HeatMap_", strsplit(Args[6], split=".", fixed=T)[[1]][1],".png", sep=""),width =png_width , height = png_height,
units = "in",
res = 300,
pointsize = 4)
......
......@@ -2,6 +2,10 @@ import re
import pandas as pd
import glob
import numpy as np
import sys
ancestry = sys.argv[1]
current_date = sys.argv[2]
print("Parsing_correlation")
file_pairs = set(glob.glob("*-_-*.log"))
......@@ -134,13 +138,13 @@ for i1, t1 in enumerate(traits):
Sd_cov_matrix_genetic.loc[t2_col, t2_col] = float(L_h2_t2[0].split(":")[1].split(" ")[2].strip("()\n"))
Covariance_matrix_genetic.to_csv("Covariance_matrix_genetic.csv", sep="\t")
Covariance_matrix_H0.to_csv("Covariance_matrix_H0.csv", sep="\t")
Correlation_matrix_genetic.to_csv("Correlation_matrix_genetic.csv", sep="\t")
Covariance_matrix_genetic.to_csv("Covariance_matrix_genetic_"+ancestry+"_"+current_date+".csv", sep="\t")
Covariance_matrix_H0.to_csv("Covariance_matrix_H0_"+ancestry+"_"+current_date+".csv", sep="\t")
Correlation_matrix_genetic.to_csv("Correlation_matrix_genetic_"+ancestry+"_"+current_date+".csv", sep="\t")
Sd_cov_matrix_genetic.to_csv("Sd_cov_matrix_genetic.csv", sep="\t")
Sd_matrix_H0.to_csv("Sd_matrix_H0.csv", sep="\t")
Sd_cor_matrix_genetic.to_csv("Sd_cor_matrix_genetic.csv", sep="\t")
Pval_matrix_genetic.to_csv("Pval_cor_matrix_genetic.csv", sep="\t")
Sd_cov_matrix_genetic.to_csv("Sd_cov_matrix_genetic_"+ancestry+"_"+current_date+".csv", sep="\t")
Sd_matrix_H0.to_csv("Sd_matrix_H0_"+ancestry+"_"+current_date+".csv", sep="\t")
Sd_cor_matrix_genetic.to_csv("Sd_cor_matrix_genetic_"+ancestry+"_"+current_date+".csv", sep="\t")
Pval_matrix_genetic.to_csv("Pval_cor_matrix_genetic_"+ancestry+"_"+current_date+".csv", sep="\t")
print("Parsing_correlation")
filename internalDataLink Consortium Outcome FullName Type Reference ReferenceLink dataLink Nsample Ncase Ncontrol Nsnp snpid a1 a2 freq pval n z OR se code imp ncas ncont altNcas altNcont Unnamed: 28
GIANT_HEIGHT_Wood_et_al_2014_publicrelease_HapMapCeuFreq.txt GIANT HEIGHT Height Anthropometry Wood et al. 2014 http://www.ncbi.nlm.nih.gov/pubmed/25282103 https://www.broadinstitute.org/collaboration/giant/index.php/GIANT_consortium_data_files 253288 2550858.0 MarkerName Allele1 Allele2 Freq.Allele1.HapMapCEU p N b SE
SNP_gwas_mc_merge_nogc.tbl.uniq GIANT BMI Body Mass Index Anthropometry Locke et al. 2015 http://www.ncbi.nlm.nih.gov/pubmed/25673413 https://www.broadinstitute.org/collaboration/giant/index.php/GIANT_consortium_data_files 339224 2554637.0 SNP A1 A2 Freq1.Hapmap p N b se
......@@ -16,12 +16,12 @@ params.meta_data = "${projectDir}"+"/input_files/Data_test_EAS.csv"
// "${projectDir}"+"/input_files/Meta_data_preliminary_analysis.csv" file describing gwas summary statistic format
params.gwas_folder = "${projectDir}"+'/test_data/hg38_EAS/'
params.region = Channel.fromPath("${projectDir}"+"/input_files/All_Regions_ALL_ensemble_1000G_hg38_EUR.bed")
params.region = Channel.fromPath("${projectDir}"+"/input_files/All_Regions_ALL_ensemble_1000G_hg38_EAS.bed")
params.ref_panel_WG = "${projectDir}"+"/Ref_Panel/1000G_EAS_0_01_chr22_21.csv"
params.ancestry="EAS"
params.prefix="ALL_ensemble_1000G_hg38_EUR_chr"
params.prefix="ALL_ensemble_1000G_hg38_EAS_chr"
params.prefix_Impute_GWAS="ALL_ensemble_1000G_hg38_EAS_"
params.suffix=""
params.LD_SCORE_folder='/pasteur/zeus/projets/p02/GGS_WKD/DATA_1000G/Panels/Matrix_LDSCORE/EAS/'
......@@ -45,7 +45,7 @@ params.ld_type = "plink"
params.ld_folder = Channel.fromPath("/pasteur/zeus/projets/p02/GGS_WKD/DATA_1000G/Panels/Matrix_LD_RAISS/EAS/*.ld").collect()
params.ref_panel = '/pasteur/zeus/projets/p02/GGS_JASS/jass_analysis_pipeline/Ref_panel_by_chr/'
chr_channel = Channel.from(1..22)
params.ref_chr_path= Channel.fromPath("/pasteur/zeus/projets/p02/GGS_WKD/DATA_1000G/Panels/EUR/ALL_ensemble_1000G_hg38_EUR_chr*.bim").collect()
params.ref_chr_path= Channel.fromPath("/pasteur/zeus/projets/p02/GGS_WKD/DATA_1000G/Panels/EAS/ALL_ensemble_1000G_hg38_EAS_chr*.bim").collect()
params.perform_sanity_checks=false
params.perform_accuracy_checks=false
......@@ -58,6 +58,34 @@ generate_trait_pairs_channel = "${projectDir}/bin/generate_trait_pairs.py"
parse_correlation_channel = "${projectDir}/bin/parse_correlation_results.py"
make_heatmap_channel = "${projectDir}/bin/make_heatmap.R"
// folder id output will be a hash of all parameters
string_input = String.valueOf(params.compute_project) +
String.valueOf(params.compute_LDSC_matrix) +
String.valueOf(params.compute_imputation) +
String.valueOf(params.meta_data) +
String.valueOf(params.gwas_folder) +
String.valueOf(params.region) +
String.valueOf(params.ref_panel_WG) +
String.valueOf(params.ancestry) +
String.valueOf(params.prefix) +
String.valueOf(params.prefix_Impute_GWAS) +
String.valueOf(params.suffix) +
String.valueOf(params.LD_SCORE_folder) +
String.valueOf(params.output_folder) +
String.valueOf(params.diagnostic_folder) +
String.valueOf(params.r2threshold) +
String.valueOf(params.eigenthreshold) +
String.valueOf(params.minimumld) +
String.valueOf(params.ld_type) +
String.valueOf(params.ld_folder) +
String.valueOf(params.ref_panel) +
String.valueOf(params.ref_chr_path) +
String.valueOf(params.perform_sanity_checks) +
String.valueOf(params.perform_accuracy_checks) +
String.valueOf(params.group)
params.hash_params = string_input.md5()
/*****************************/
/* process inclusion */
/*****************************/
......
......@@ -5,8 +5,7 @@ process Meta_data_GWAS{
output:
path "meta_data_chk*.csv"
"""
d=`wc -l ${pheno_list}`
e=`echo \$d | cut -d ' ' -f 1`
e=\$(grep -c '' ${pheno_list})
for ((i = 2; i <= \$e; i++));
do
......@@ -17,16 +16,16 @@ process Meta_data_GWAS{
}
process Clean_GWAS{
publishDir "${params.output_folder}/harmonized_GWAS_files/", pattern: "*.txt", mode: 'copy'
publishDir "${params.output_folder}", pattern: "harmonized_GWAS_1_file/*.txt", mode: 'copy'
publishDir "${params.output_folder}/harmonized_GWAS_files_${params.ancestry}_${params.hash_input}/", pattern: "*.txt", mode: 'copy'
publishDir "${params.output_folder}", pattern: "harmonized_GWAS_1_file_${params.ancestry}_${params.hash_input}/*.txt", mode: 'copy'
input:
path ref_panel
path meta_chunk
output:
path "harmonized_GWAS_1_file/*.txt", emit: cleaned_gwas_channel
path "harmonized_GWAS_1_file_${params.ancestry}_${params.hash_input}/*.txt", emit: cleaned_gwas_channel
path "*.txt", emit: cleaned_gwas_chr_channel
"""
mkdir -p harmonized_GWAS_1_file
mkdir -p harmonized_GWAS_1_file_${params.ancestry}_${params.hash_input}
pwd
ls ${params.gwas_folder}
echo ${params.gwas_folder}
......@@ -38,6 +37,6 @@ process Clean_GWAS{
jass_preprocessing --gwas-info \$full_path --ref-path ${ref_panel} \
--input-folder ${params.gwas_folder} --diagnostic-folder ${params.diagnostic_folder} \
--output-folder ./ --output-folder-1-file harmonized_GWAS_1_file/
--output-folder ./ --output-folder-1-file harmonized_GWAS_1_file_${params.ancestry}_${params.hash_input}/
"""
}
process Create_inittable_LDSC {
publishDir "${params.output_folder}/init_table/", pattern: "*.hdf5", mode: 'copy'
publishDir "${params.output_folder}/init_table_${params.ancestry}_${params.hash_input}/", pattern: "*.hdf5", mode: 'copy'
input:
path cleaned_gwas_chr
path cleaned_gwas
......@@ -22,7 +22,7 @@ process Create_inittable_LDSC {
}
process Create_inittable {
publishDir "${params.output_folder}/init_table/", pattern: "*.hdf5", mode: 'copy'
publishDir "${params.output_folder}/init_table_${params.ancestry}_${params.hash_input}/", pattern: "*.hdf5", mode: 'copy'
input:
path cleaned_gwas_chr
path cleaned_gwas
......@@ -54,7 +54,7 @@ process Get_pheno_group {
}
process Create_project_data {
publishDir "${params.output_folder}/worktable/", pattern:"worktable_*.hdf5", mode: 'copy'
publishDir "${params.output_folder}/worktable_${params.ancestry}_${params.hash_input}/", pattern:"worktable_*.hdf5", mode: 'copy'
publishDir "${params.output_folder}/quadrant/", pattern:"quadrant_*.png", mode: 'copy'
publishDir "${params.output_folder}/manhattan/", pattern:"manhattan_*.png", mode: 'copy'
input:
......@@ -77,4 +77,4 @@ process Create_project_data {
jass create-project-data --phenotypes \$pheno_list --init-table-path ./${init_table} --worktable-path ./worktable_bis_\$group_tag.hdf5 --manhattan-plot-path ./manhattan_\$group_tag.png --quadrant-plot-path ./quadrant_\$group_tag.png
done
"""
}
\ No newline at end of file
}
process Munge_LDSC_data {
publishDir "${params.output_folder}", pattern: "ldsc_data/data_*.sumstats.gz", mode: 'copy'
publishDir "${params.output_folder}", pattern: "ldsc_data/*.log", mode: 'copy'
publishDir "${params.output_folder}", pattern: "ldsc_data_${params.ancestry}_${params.hash_input}/data_*.sumstats.gz", mode: 'copy'
publishDir "${params.output_folder}", pattern: "ldsc_data_${params.ancestry}_${params.hash_input}/*.log", mode: 'copy'
input:
path clean_gwas
......@@ -27,7 +27,7 @@ process Munge_LDSC_data {
process Heritability_LDSC_data {
publishDir "${params.output_folder}/h2_data/", pattern: "*.log", mode: 'copy'
publishDir "${params.output_folder}/h2_data_${params.ancestry}_${params.hash_input}/", pattern: "*.log", mode: 'copy'
input:
path ldsc_data
output:
......@@ -56,7 +56,7 @@ process Correlation_LDSC_data {
memory {8.GB * task.attempt}
time {24.h * task.attempt}
publishDir "${params.output_folder}/cor_data/", pattern: "*.log", mode: 'copy'
publishDir "${params.output_folder}/cor_data_${params.ancestry}_${params.hash_input}/", pattern: "*.log", mode: 'copy'
input:
path trait_pair
path ldsc_data
......@@ -90,13 +90,13 @@ process Parsing_correlation_matrices {
path ldsc_data
path h2_ld
output:
path "Covariance_matrix_H0.csv", emit: cov_H0_matrice_channel
path "Covariance_matrix_genetic.csv", emit: cov_gen_matrice_channel
path "Covariance_matrix_H0_${params.ancestry}_${params.hash_input}.csv", emit: cov_H0_matrice_channel
path "Covariance_matrix_genetic_${params.ancestry}_${params.hash_input}.csv", emit: cov_gen_matrice_channel
path "*.csv", emit: parsing_results
when:
params.compute_LDSC_matrix
"""
python3 ${parsing_script}
python3 ${parsing_script} ${params.ancestry} ${params.hash_input}
"""
}
......@@ -116,7 +116,7 @@ process Make_HeatMap {
params.compute_LDSC_matrix
"""
Rscript ${make_heatmap_script} Correlation_matrix_genetic.csv Pval_cor_matrix_genetic.csv
Rscript ${make_heatmap_script} Correlation_matrix_genetic_${params.ancestry}_${params.hash_input}.csv Pval_cor_matrix_genetic_${params.ancestry}_${params.hash_input}.csv
"""
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment