From feccab2cc027ce2837fbd9f7a9dcb17b4303f093 Mon Sep 17 00:00:00 2001
From: gmillot <gael.millot@pasteur.fr>
Date: Fri, 24 Jun 2022 19:41:30 +0200
Subject: [PATCH] release v3.0: vcf_ficher.nf now parallelized for all the
 genome

---
 README.md         |  5 +++++
 bin/fisher_lod.py | 17 +++++++++--------
 vcf_fisher.config |  2 +-
 vcf_fisher.nf     | 14 +++++++++-----
 4 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/README.md b/README.md
index 8710596..debb259 100644
--- a/README.md
+++ b/README.md
@@ -203,6 +203,11 @@ Gitlab developers
 <br /><br />
 ## WHAT'S NEW IN
 
+### v3.0
+
+1) vcf_ficher.nf now parallelized for all the genome
+
+
 ### v2.1
 
 1) vcf_ficher.nf improved for the whole genome
diff --git a/bin/fisher_lod.py b/bin/fisher_lod.py
index a827afe..0a2ac08 100644
--- a/bin/fisher_lod.py
+++ b/bin/fisher_lod.py
@@ -182,11 +182,11 @@ random.seed(1)
 
 ################ Ignition
 
-if region == "None":
-    region = None
-else:
-    # region = region.strip('[]').replace('"', '').replace(' ', '').split(',') # old version when no parall in nf
-    region = region.replace('"', '').replace(' ', '')
+# if region == "None":
+#    region = None
+#else:
+#    # region = region.strip('[]').replace('"', '').replace(' ', '').split(',') # old version when no parall in nf
+#    region = region.replace('"', '').replace(' ', '')
 
 ################ End ignition
 
@@ -240,9 +240,10 @@ with warnings.catch_warnings():
 #            for v in vcf(i1):
 #                tempo = fisher(v = v, columns = columns)
 #                df = df.append(tempo)
-    for v in vcf:
-        tempo = fisher(v = v, columns = columns)
-        df = df.append(tempo)
+    for v in vcf: # parse each line of vcf with the content of region in it
+        if v.CHROM == region:
+            tempo = fisher(v = v, columns = columns)
+            df = df.append(tempo)
 
 # on ecrit la dataframe dans un fichier
 df.to_csv('./fisher.tsv', sep='\t', index=False)
diff --git a/vcf_fisher.config b/vcf_fisher.config
index 45fa722..00ab124 100644
--- a/vcf_fisher.config
+++ b/vcf_fisher.config
@@ -20,7 +20,7 @@ env {
     //Warning: do not write the out_path now. See below. If written here, the one below is not considered"
     ped_path = "/pasteur/zeus/projets/p01/BioIT/gmillot/08002_bourgeron/dataset/Dyslexia.pedigree.txt" // pedigree
     chr_path = "/pasteur/zeus/projets/p01/BioIT/gmillot/reference_genomes/human hg19_grch37/hg19_grch37p5_chr_size_cumul.txt"
-    region = "chr7, chr1, chr2" // region to parse. Write "chr7:0-147000000, chr10:1000000-2000000" for a single region, "chr7:0-147000000, chr10:1000000-2000000" if two regions, ""chr7" for a whole chromosome, "chr7, chr1" for two chromosomes and "None" for the complete genome // Warning : replace eval() by ast.literal_eval() from ast package in the main py code ?
+    region = "None" // region to parse. Write "chr7:0-147000000, chr10:1000000-2000000" for a single region, "chr7:0-147000000, chr10:1000000-2000000" if two regions, ""chr7" for a whole chromosome, "chr7, chr1" for two chromosomes and "None" for the complete genome // Warning : replace eval() by ast.literal_eval() from ast package in the main py code ?
     y_lim1 = 5 // max y-axis limit of the top panel in the miami plot, in log10, i.e., 5 means up to score 10^5
     y_lim2 = 5 // max y-axis limit of the bottom panel in the miami plot, in log10, i.e., 5 means up to score 10^5
     cute_path = "https://gitlab.pasteur.fr/gmillot/cute_little_R_functions/-/raw/v11.4.0/cute_little_R_functions.R" // single character string indicating the file (and absolute pathway) of the required cute_little_R_functions toolbox. With ethernet connection available, this can also be used: "https://gitlab.pasteur.fr/gmillot/cute_little_R_functions/raw/v5.1.0/cute_little_R_functions.R" or local "C:\\Users\\Gael\\Documents\\Git_projects\\cute_little_R_functions\\cute_little_R_functions.R"
diff --git a/vcf_fisher.nf b/vcf_fisher.nf
index 92b7e6b..6946670 100644
--- a/vcf_fisher.nf
+++ b/vcf_fisher.nf
@@ -131,7 +131,7 @@ process WorkflowVersion { // create a file with the workflow version in out_path
 
 process fisher {
     label 'python' // see the withLabel: bash in the nextflow config file 
-    publishDir path: "${out_path}", mode: 'copy', overwrite: false
+    //publishDir path: "${out_path}", mode: 'copy', overwrite: false
     cache 'true'
 
     //no channel input here for the vcf, because I do not transform it
@@ -143,7 +143,7 @@ process fisher {
     //val region2 from region_ch
 
     output:
-    file "*.tsv" into fisher_ch
+    file "*.tsv" into fisher_ch1 // multi channel
 
     script:
     """
@@ -151,17 +151,20 @@ process fisher {
     fisher_lod.py ${vcf} ${ped} "${region2}"
     """
 }
-fisher_ch.collectFile(name: "fisher.tsv").subscribe{it -> it.copyTo("${out_path}")} // concatenate all the cov_report.txt files in channel cov_report_ch into a single file published into ${out_path}/reports
+
+fisher_ch1.collectFile(name: "fisher.tsv", skip:1, keepHeader:true).into{fisher_ch2 ; fisher_ch3}
+fisher_ch2.subscribe{it -> it.copyTo("${out_path}")}
 
 
 process miamiplot {
     label 'r_ext' // see the withLabel: bash in the nextflow config file 
-    publishDir path: "${out_path}", mode: 'copy', overwrite: false
+    publishDir "${out_path}", mode: 'copy', pattern: "{*.png}", overwrite: false // https://docs.oracle.com/javase/tutorial/essential/io/fileOps.html#glob
+    publishDir "${out_path}/reports", mode: 'copy', pattern: "{miami_report.txt}", overwrite: false // https://docs.oracle.com/javase/tutorial/essential/io/fileOps.html#glob
     cache 'true'
 
     //no channel input here for the vcf, because I do not transform it
     input:
-    file fisher from fisher_ch
+    file fisher from fisher_ch3
     file chr from chr_ch
     val y_lim1
     val y_lim2
@@ -180,6 +183,7 @@ process miamiplot {
 }
 
 
+
 process Backup {
     label 'bash' // see the withLabel: bash in the nextflow config file 
     publishDir "${out_path}/reports", mode: 'copy', overwrite: false // since I am in mode copy, all the output files will be copied into the publishDir. See \\wsl$\Ubuntu-20.04\home\gael\work\aa\a0e9a739acae026fb205bc3fc21f9b
-- 
GitLab