From d00aaca1b1b963c9301ea9ce0c65d44d33642610 Mon Sep 17 00:00:00 2001
From: Hanna  JULIENNE <hanna.julienne@pasteur.fr>
Date: Wed, 21 Feb 2024 14:35:52 +0100
Subject: [PATCH] Output name improved

---
 bin/parse_correlation_results.py | 18 +++++++++++-------
 jass_pipeline.nf                 |  4 ++++
 modules/Clean_GWAS.nf            | 13 ++++++-------
 modules/JASS.nf                  |  8 ++++----
 modules/LDSC.nf                  | 16 ++++++++--------
 5 files changed, 33 insertions(+), 26 deletions(-)

diff --git a/bin/parse_correlation_results.py b/bin/parse_correlation_results.py
index b7f5241..5509f9f 100644
--- a/bin/parse_correlation_results.py
+++ b/bin/parse_correlation_results.py
@@ -2,6 +2,10 @@ import re
 import pandas as pd
 import glob
 import numpy as np
+import sys
+
+ancestry = sys.argv[1]
+current_date = sys.argv[2]
 
 print("Parsing_correlation")
 file_pairs = set(glob.glob("*-_-*.log"))
@@ -134,13 +138,13 @@ for i1, t1 in enumerate(traits):
                 Sd_cov_matrix_genetic.loc[t2_col, t2_col] = float(L_h2_t2[0].split(":")[1].split(" ")[2].strip("()\n"))
 
 
-Covariance_matrix_genetic.to_csv("Covariance_matrix_genetic.csv", sep="\t")
-Covariance_matrix_H0.to_csv("Covariance_matrix_H0.csv", sep="\t")
-Correlation_matrix_genetic.to_csv("Correlation_matrix_genetic.csv", sep="\t")
+Covariance_matrix_genetic.to_csv("Covariance_matrix_genetic_"+ancestry+"_"+current_date+".csv", sep="\t")
+Covariance_matrix_H0.to_csv("Covariance_matrix_H0_"+ancestry+"_"+current_date+".csv", sep="\t")
+Correlation_matrix_genetic.to_csv("Correlation_matrix_genetic_"+ancestry+"_"+current_date+".csv", sep="\t")
 
-Sd_cov_matrix_genetic.to_csv("Sd_cov_matrix_genetic.csv", sep="\t")
-Sd_matrix_H0.to_csv("Sd_matrix_H0.csv", sep="\t")
-Sd_cor_matrix_genetic.to_csv("Sd_cor_matrix_genetic.csv", sep="\t")
-Pval_matrix_genetic.to_csv("Pval_cor_matrix_genetic.csv", sep="\t")
+Sd_cov_matrix_genetic.to_csv("Sd_cov_matrix_genetic_"+ancestry+"_"+current_date+".csv", sep="\t")
+Sd_matrix_H0.to_csv("Sd_matrix_H0_"+ancestry+"_"+current_date+".csv", sep="\t")
+Sd_cor_matrix_genetic.to_csv("Sd_cor_matrix_genetic_"+ancestry+"_"+current_date+".csv", sep="\t")
+Pval_matrix_genetic.to_csv("Pval_cor_matrix_genetic_"+ancestry+"_"+current_date+".csv", sep="\t")
 
 print("Parsing_correlation")
diff --git a/jass_pipeline.nf b/jass_pipeline.nf
index 5fef9b0..06dd01d 100644
--- a/jass_pipeline.nf
+++ b/jass_pipeline.nf
@@ -58,6 +58,10 @@ generate_trait_pairs_channel = "${projectDir}/bin/generate_trait_pairs.py"
 parse_correlation_channel = "${projectDir}/bin/parse_correlation_results.py"
 make_heatmap_channel = "${projectDir}/bin/make_heatmap.R"
 
+/* current date */
+def today = new Date().format('yyyy-MM-dd')
+params.current_date = today
+
 /*****************************/
 /*   process inclusion   */
 /*****************************/
diff --git a/modules/Clean_GWAS.nf b/modules/Clean_GWAS.nf
index 962ef2c..f01008b 100644
--- a/modules/Clean_GWAS.nf
+++ b/modules/Clean_GWAS.nf
@@ -5,8 +5,7 @@ process Meta_data_GWAS{
     output:
         path "meta_data_chk*.csv"
     """
-    d=`wc -l ${pheno_list}`
-    e=`echo \$d | cut -d ' ' -f 1`
+    e=\$(grep -c '' ${pheno_list})
 
     for ((i = 2; i <= \$e; i++));
     do
@@ -17,16 +16,16 @@ process Meta_data_GWAS{
 }
 
 process Clean_GWAS{
-    publishDir "${params.output_folder}/harmonized_GWAS_files/", pattern: "*.txt", mode: 'copy'
-    publishDir "${params.output_folder}", pattern: "harmonized_GWAS_1_file/*.txt", mode: 'copy'
+    publishDir "${params.output_folder}/harmonized_GWAS_files_${params.ancestry}_${params.current_date}/", pattern: "*.txt", mode: 'copy'
+    publishDir "${params.output_folder}", pattern: "harmonized_GWAS_1_file_${params.ancestry}_${params.current_date}/*.txt", mode: 'copy'
     input:
         path ref_panel
         path meta_chunk
     output:
-        path "harmonized_GWAS_1_file/*.txt", emit: cleaned_gwas_channel
+        path "harmonized_GWAS_1_file_${params.ancestry}_${params.current_date}/*.txt", emit: cleaned_gwas_channel
         path "*.txt", emit: cleaned_gwas_chr_channel
     """
-    mkdir -p harmonized_GWAS_1_file
+    mkdir -p harmonized_GWAS_1_file_${params.ancestry}_${params.current_date}
     pwd
     ls ${params.gwas_folder}
     echo ${params.gwas_folder}
@@ -38,6 +37,6 @@ process Clean_GWAS{
 
     jass_preprocessing --gwas-info \$full_path --ref-path ${ref_panel} \
         --input-folder ${params.gwas_folder} --diagnostic-folder ${params.diagnostic_folder} \
-        --output-folder ./ --output-folder-1-file harmonized_GWAS_1_file/
+        --output-folder ./ --output-folder-1-file harmonized_GWAS_1_file_${params.ancestry}_${params.current_date}/
     """
 }
diff --git a/modules/JASS.nf b/modules/JASS.nf
index 1ac35de..4185e1f 100644
--- a/modules/JASS.nf
+++ b/modules/JASS.nf
@@ -1,6 +1,6 @@
 
 process Create_inittable_LDSC {
-    publishDir "${params.output_folder}/init_table/", pattern: "*.hdf5", mode: 'copy'
+    publishDir "${params.output_folder}/init_table_${params.ancestry}_${params.current_date}/", pattern: "*.hdf5", mode: 'copy'
     input:
         path cleaned_gwas_chr
         path cleaned_gwas
@@ -22,7 +22,7 @@ process Create_inittable_LDSC {
 }
 
 process Create_inittable {
-    publishDir "${params.output_folder}/init_table/", pattern: "*.hdf5", mode: 'copy'
+    publishDir "${params.output_folder}/init_table_${params.ancestry}_${params.current_date}/", pattern: "*.hdf5", mode: 'copy'
     input:
         path cleaned_gwas_chr
         path cleaned_gwas
@@ -54,7 +54,7 @@ process Get_pheno_group {
 }
 
 process Create_project_data {
-    publishDir "${params.output_folder}/worktable/", pattern:"worktable_*.hdf5", mode: 'copy'
+    publishDir "${params.output_folder}/worktable_${params.ancestry}_${params.current_date}/", pattern:"worktable_*.hdf5", mode: 'copy'
     publishDir "${params.output_folder}/quadrant/", pattern:"quadrant_*.png", mode: 'copy'
     publishDir "${params.output_folder}/manhattan/", pattern:"manhattan_*.png", mode: 'copy'
     input:
@@ -77,4 +77,4 @@ process Create_project_data {
             jass create-project-data --phenotypes \$pheno_list --init-table-path ./${init_table} --worktable-path ./worktable_bis_\$group_tag.hdf5 --manhattan-plot-path ./manhattan_\$group_tag.png --quadrant-plot-path ./quadrant_\$group_tag.png
         done
     """
-}
\ No newline at end of file
+}
diff --git a/modules/LDSC.nf b/modules/LDSC.nf
index f6e8ce9..f78342c 100644
--- a/modules/LDSC.nf
+++ b/modules/LDSC.nf
@@ -1,7 +1,7 @@
 
 process Munge_LDSC_data {
-    publishDir "${params.output_folder}", pattern: "ldsc_data/data_*.sumstats.gz", mode: 'copy'
-    publishDir "${params.output_folder}", pattern: "ldsc_data/*.log", mode: 'copy'
+    publishDir "${params.output_folder}", pattern: "ldsc_data_${params.ancestry}_${params.current_date}/data_*.sumstats.gz", mode: 'copy'
+    publishDir "${params.output_folder}", pattern: "ldsc_data_${params.ancestry}_${params.current_date}/*.log", mode: 'copy'
 
     input:
         path clean_gwas
@@ -27,7 +27,7 @@ process Munge_LDSC_data {
 
 process Heritability_LDSC_data {
 
-    publishDir "${params.output_folder}/h2_data/", pattern: "*.log", mode: 'copy'
+    publishDir "${params.output_folder}/h2_data_${params.ancestry}_${params.current_date}/", pattern: "*.log", mode: 'copy'
     input:
         path ldsc_data
     output:
@@ -56,7 +56,7 @@ process Correlation_LDSC_data {
     memory {8.GB * task.attempt}
     time {24.h * task.attempt}
 
-    publishDir "${params.output_folder}/cor_data/", pattern: "*.log", mode: 'copy'
+    publishDir "${params.output_folder}/cor_data_${params.ancestry}_${params.current_date}/", pattern: "*.log", mode: 'copy'
     input:
         path trait_pair
         path ldsc_data 
@@ -90,13 +90,13 @@ process Parsing_correlation_matrices {
         path ldsc_data
         path h2_ld
     output:
-        path "Covariance_matrix_H0.csv", emit: cov_H0_matrice_channel
-        path "Covariance_matrix_genetic.csv", emit: cov_gen_matrice_channel
+        path "Covariance_matrix_H0_${params.ancestry}_${params.current_date}.csv", emit: cov_H0_matrice_channel
+        path "Covariance_matrix_genetic_${params.ancestry}_${params.current_date}.csv", emit: cov_gen_matrice_channel
         path "*.csv", emit: parsing_results
     when:
         params.compute_LDSC_matrix
     """
-    python3 ${parsing_script}
+    python3 ${parsing_script} ${params.ancestry} ${params.current_date}
 
     """
 }
@@ -116,7 +116,7 @@ process Make_HeatMap {
         params.compute_LDSC_matrix
 
     """
-        Rscript ${make_heatmap_script} Correlation_matrix_genetic.csv Pval_cor_matrix_genetic.csv
+        Rscript ${make_heatmap_script} Correlation_matrix_genetic_${params.ancestry}_${params.current_date}.csv Pval_cor_matrix_genetic_${params.ancestry}_${params.current_date}.csv
 
     """
 
-- 
GitLab