From d00aaca1b1b963c9301ea9ce0c65d44d33642610 Mon Sep 17 00:00:00 2001 From: Hanna JULIENNE <hanna.julienne@pasteur.fr> Date: Wed, 21 Feb 2024 14:35:52 +0100 Subject: [PATCH] Output name improved --- bin/parse_correlation_results.py | 18 +++++++++++------- jass_pipeline.nf | 4 ++++ modules/Clean_GWAS.nf | 13 ++++++------- modules/JASS.nf | 8 ++++---- modules/LDSC.nf | 16 ++++++++-------- 5 files changed, 33 insertions(+), 26 deletions(-) diff --git a/bin/parse_correlation_results.py b/bin/parse_correlation_results.py index b7f5241..5509f9f 100644 --- a/bin/parse_correlation_results.py +++ b/bin/parse_correlation_results.py @@ -2,6 +2,10 @@ import re import pandas as pd import glob import numpy as np +import sys + +ancestry = sys.argv[1] +current_date = sys.argv[2] print("Parsing_correlation") file_pairs = set(glob.glob("*-_-*.log")) @@ -134,13 +138,13 @@ for i1, t1 in enumerate(traits): Sd_cov_matrix_genetic.loc[t2_col, t2_col] = float(L_h2_t2[0].split(":")[1].split(" ")[2].strip("()\n")) -Covariance_matrix_genetic.to_csv("Covariance_matrix_genetic.csv", sep="\t") -Covariance_matrix_H0.to_csv("Covariance_matrix_H0.csv", sep="\t") -Correlation_matrix_genetic.to_csv("Correlation_matrix_genetic.csv", sep="\t") +Covariance_matrix_genetic.to_csv("Covariance_matrix_genetic_"+ancestry+"_"+current_date+".csv", sep="\t") +Covariance_matrix_H0.to_csv("Covariance_matrix_H0_"+ancestry+"_"+current_date+".csv", sep="\t") +Correlation_matrix_genetic.to_csv("Correlation_matrix_genetic_"+ancestry+"_"+current_date+".csv", sep="\t") -Sd_cov_matrix_genetic.to_csv("Sd_cov_matrix_genetic.csv", sep="\t") -Sd_matrix_H0.to_csv("Sd_matrix_H0.csv", sep="\t") -Sd_cor_matrix_genetic.to_csv("Sd_cor_matrix_genetic.csv", sep="\t") -Pval_matrix_genetic.to_csv("Pval_cor_matrix_genetic.csv", sep="\t") +Sd_cov_matrix_genetic.to_csv("Sd_cov_matrix_genetic_"+ancestry+"_"+current_date+".csv", sep="\t") +Sd_matrix_H0.to_csv("Sd_matrix_H0_"+ancestry+"_"+current_date+".csv", sep="\t") +Sd_cor_matrix_genetic.to_csv("Sd_cor_matrix_genetic_"+ancestry+"_"+current_date+".csv", sep="\t") +Pval_matrix_genetic.to_csv("Pval_cor_matrix_genetic_"+ancestry+"_"+current_date+".csv", sep="\t") print("Parsing_correlation") diff --git a/jass_pipeline.nf b/jass_pipeline.nf index 5fef9b0..06dd01d 100644 --- a/jass_pipeline.nf +++ b/jass_pipeline.nf @@ -58,6 +58,10 @@ generate_trait_pairs_channel = "${projectDir}/bin/generate_trait_pairs.py" parse_correlation_channel = "${projectDir}/bin/parse_correlation_results.py" make_heatmap_channel = "${projectDir}/bin/make_heatmap.R" +/* current date */ +def today = new Date().format('yyyy-MM-dd') +params.current_date = today + /*****************************/ /* process inclusion */ /*****************************/ diff --git a/modules/Clean_GWAS.nf b/modules/Clean_GWAS.nf index 962ef2c..f01008b 100644 --- a/modules/Clean_GWAS.nf +++ b/modules/Clean_GWAS.nf @@ -5,8 +5,7 @@ process Meta_data_GWAS{ output: path "meta_data_chk*.csv" """ - d=`wc -l ${pheno_list}` - e=`echo \$d | cut -d ' ' -f 1` + e=\$(grep -c '' ${pheno_list}) for ((i = 2; i <= \$e; i++)); do @@ -17,16 +16,16 @@ process Meta_data_GWAS{ } process Clean_GWAS{ - publishDir "${params.output_folder}/harmonized_GWAS_files/", pattern: "*.txt", mode: 'copy' - publishDir "${params.output_folder}", pattern: "harmonized_GWAS_1_file/*.txt", mode: 'copy' + publishDir "${params.output_folder}/harmonized_GWAS_files_${params.ancestry}_${params.current_date}/", pattern: "*.txt", mode: 'copy' + publishDir "${params.output_folder}", pattern: "harmonized_GWAS_1_file_${params.ancestry}_${params.current_date}/*.txt", mode: 'copy' input: path ref_panel path meta_chunk output: - path "harmonized_GWAS_1_file/*.txt", emit: cleaned_gwas_channel + path "harmonized_GWAS_1_file_${params.ancestry}_${params.current_date}/*.txt", emit: cleaned_gwas_channel path "*.txt", emit: cleaned_gwas_chr_channel """ - mkdir -p harmonized_GWAS_1_file + mkdir -p harmonized_GWAS_1_file_${params.ancestry}_${params.current_date} pwd ls ${params.gwas_folder} echo ${params.gwas_folder} @@ -38,6 +37,6 @@ process Clean_GWAS{ jass_preprocessing --gwas-info \$full_path --ref-path ${ref_panel} \ --input-folder ${params.gwas_folder} --diagnostic-folder ${params.diagnostic_folder} \ - --output-folder ./ --output-folder-1-file harmonized_GWAS_1_file/ + --output-folder ./ --output-folder-1-file harmonized_GWAS_1_file_${params.ancestry}_${params.current_date}/ """ } diff --git a/modules/JASS.nf b/modules/JASS.nf index 1ac35de..4185e1f 100644 --- a/modules/JASS.nf +++ b/modules/JASS.nf @@ -1,6 +1,6 @@ process Create_inittable_LDSC { - publishDir "${params.output_folder}/init_table/", pattern: "*.hdf5", mode: 'copy' + publishDir "${params.output_folder}/init_table_${params.ancestry}_${params.current_date}/", pattern: "*.hdf5", mode: 'copy' input: path cleaned_gwas_chr path cleaned_gwas @@ -22,7 +22,7 @@ process Create_inittable_LDSC { } process Create_inittable { - publishDir "${params.output_folder}/init_table/", pattern: "*.hdf5", mode: 'copy' + publishDir "${params.output_folder}/init_table_${params.ancestry}_${params.current_date}/", pattern: "*.hdf5", mode: 'copy' input: path cleaned_gwas_chr path cleaned_gwas @@ -54,7 +54,7 @@ process Get_pheno_group { } process Create_project_data { - publishDir "${params.output_folder}/worktable/", pattern:"worktable_*.hdf5", mode: 'copy' + publishDir "${params.output_folder}/worktable_${params.ancestry}_${params.current_date}/", pattern:"worktable_*.hdf5", mode: 'copy' publishDir "${params.output_folder}/quadrant/", pattern:"quadrant_*.png", mode: 'copy' publishDir "${params.output_folder}/manhattan/", pattern:"manhattan_*.png", mode: 'copy' input: @@ -77,4 +77,4 @@ process Create_project_data { jass create-project-data --phenotypes \$pheno_list --init-table-path ./${init_table} --worktable-path ./worktable_bis_\$group_tag.hdf5 --manhattan-plot-path ./manhattan_\$group_tag.png --quadrant-plot-path ./quadrant_\$group_tag.png done """ -} \ No newline at end of file +} diff --git a/modules/LDSC.nf b/modules/LDSC.nf index f6e8ce9..f78342c 100644 --- a/modules/LDSC.nf +++ b/modules/LDSC.nf @@ -1,7 +1,7 @@ process Munge_LDSC_data { - publishDir "${params.output_folder}", pattern: "ldsc_data/data_*.sumstats.gz", mode: 'copy' - publishDir "${params.output_folder}", pattern: "ldsc_data/*.log", mode: 'copy' + publishDir "${params.output_folder}", pattern: "ldsc_data_${params.ancestry}_${params.current_date}/data_*.sumstats.gz", mode: 'copy' + publishDir "${params.output_folder}", pattern: "ldsc_data_${params.ancestry}_${params.current_date}/*.log", mode: 'copy' input: path clean_gwas @@ -27,7 +27,7 @@ process Munge_LDSC_data { process Heritability_LDSC_data { - publishDir "${params.output_folder}/h2_data/", pattern: "*.log", mode: 'copy' + publishDir "${params.output_folder}/h2_data_${params.ancestry}_${params.current_date}/", pattern: "*.log", mode: 'copy' input: path ldsc_data output: @@ -56,7 +56,7 @@ process Correlation_LDSC_data { memory {8.GB * task.attempt} time {24.h * task.attempt} - publishDir "${params.output_folder}/cor_data/", pattern: "*.log", mode: 'copy' + publishDir "${params.output_folder}/cor_data_${params.ancestry}_${params.current_date}/", pattern: "*.log", mode: 'copy' input: path trait_pair path ldsc_data @@ -90,13 +90,13 @@ process Parsing_correlation_matrices { path ldsc_data path h2_ld output: - path "Covariance_matrix_H0.csv", emit: cov_H0_matrice_channel - path "Covariance_matrix_genetic.csv", emit: cov_gen_matrice_channel + path "Covariance_matrix_H0_${params.ancestry}_${params.current_date}.csv", emit: cov_H0_matrice_channel + path "Covariance_matrix_genetic_${params.ancestry}_${params.current_date}.csv", emit: cov_gen_matrice_channel path "*.csv", emit: parsing_results when: params.compute_LDSC_matrix """ - python3 ${parsing_script} + python3 ${parsing_script} ${params.ancestry} ${params.current_date} """ } @@ -116,7 +116,7 @@ process Make_HeatMap { params.compute_LDSC_matrix """ - Rscript ${make_heatmap_script} Correlation_matrix_genetic.csv Pval_cor_matrix_genetic.csv + Rscript ${make_heatmap_script} Correlation_matrix_genetic_${params.ancestry}_${params.current_date}.csv Pval_cor_matrix_genetic_${params.ancestry}_${params.current_date}.csv """ -- GitLab