diff --git a/COPYRIGHT b/COPYRIGHT index c6a6a21b6a693891b7458be93fe0ae94f22fedf4..cce5466eda1755cac2b6cf9672eb4f2fa80cf37c 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -1,11 +1,11 @@ -ChIPflow - a snakemake-based workflow for the analysis of epigenomic data (ChIP-seq) +ePeak - a snakemake-based workflow for the analysis of epigenomic data (ChIP-seq, CUT&RUN, CUT&Tag) from the raw fastq files to the differential analysis of transcription factor binding or histone modification marking. Copyright © 2020 Institut Pasteur (Paris) and CNRS. ------------------------------------------------------------------ Citation: -Daunesse M, Legendre R, Varet H, Pain A, Chica C. ChIPflow: from raw data to epigenomic dynamics +Daunesse M, Legendre R, Varet H, Pain A, Chica C. ePeak: from replicated chromatin profiling data to epigenomic dynamics ------------------------------------------------------------------ @@ -20,7 +20,7 @@ The code includes contributions and input from: ----------------------------------------------------------------- -ChIPflow is free software: you can redistribute it and/or modify +ePeak is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation version 3 of the License. diff --git a/Snakefile b/Snakefile index a8921cfe96adf2b43a67b9bc44201cbf4f93206f..4823242687f48bdc59dd0cf59eac67c8dd485494 100755 --- a/Snakefile +++ b/Snakefile @@ -1,28 +1,29 @@ ######################################################################### -# ChIPflow: Standardize and reproducible ChIP-seq analysis from raw # +# ePeak: Standardize and reproducible ChIP-seq analysis from raw # # data to differential analysis # # Authors: Rachel Legendre, Maelle Daunesse # # Copyright (c) 2019-2020 Institut Pasteur (Paris) and CNRS. # # # -# This file is part of ChIPflow workflow. # +# This file is part of ePeak workflow. # # # -# ChIPflow is free software: you can redistribute it and/or modify # +# ePeak is free software: you can redistribute it and/or modify # # it under the terms of the GNU General Public License as published by # # the Free Software Foundation, either version 3 of the License, or # # (at your option) any later version. # # # -# ChIPflow is distributed in the hope that it will be useful, # +# ePeak is distributed in the hope that it will be useful, # # but WITHOUT ANY WARRANTY; without even the implied warranty of # # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # # GNU General Public License for more details . # # # # You should have received a copy of the GNU General Public License # -# along with ChIPflow (LICENSE). # +# along with ePeak (LICENSE). # # If not, see <https://www.gnu.org/licenses/>. # ######################################################################### + import pandas as pd from fnmatch import fnmatch from re import sub, match diff --git a/config/config.yaml b/config/config.yaml index 5af82df8bece13dc268a1d327ff0eeb0af54a6e7..c7801288e1bb07f29e33b5ffaf7949885bafe4b3 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -1,30 +1,31 @@ ######################################################################### -# ChIPflow: Standardize and reproducible ChIP-seq analysis from raw # +# ePeak: Standardize and reproducible ChIP-seq analysis from raw # # data to differential analysis # # Authors: Rachel Legendre, Maelle Daunesse # # Copyright (c) 2019-2020 Institut Pasteur (Paris) and CNRS. # # # -# This file is part of ChIPflow workflow. # +# This file is part of ePeak workflow. # # # -# ChIPflow is free software: you can redistribute it and/or modify # +# ePeak is free software: you can redistribute it and/or modify # # it under the terms of the GNU General Public License as published by # # the Free Software Foundation, either version 3 of the License, or # # (at your option) any later version. # # # -# ChIPflow is distributed in the hope that it will be useful, # +# ePeak is distributed in the hope that it will be useful, # # but WITHOUT ANY WARRANTY; without even the implied warranty of # # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # # GNU General Public License for more details . # # # # You should have received a copy of the GNU General Public License # -# along with ChIPflow (LICENSE). # +# along with ePeak (LICENSE). # # If not, see <https://www.gnu.org/licenses/>. # ######################################################################### + # ======================================================== -# ChIPflow pipeline config file +# ePeak pipeline config file #========================================================= # path to the fastq directory @@ -222,7 +223,7 @@ seacr: #=============================================================================== compute_idr: - do: no + do: yes rank: 'signal.value' thresh: 0.05 @@ -239,7 +240,7 @@ compute_idr: #=============================================================================== intersectionApproach: - do: yes + do: no ia_overlap: 0.8 diff --git a/config/multiqc_config.yaml b/config/multiqc_config.yaml index ac72410fd1d1310cb8b61d8f2d4ac7b08fdc438d..6b0c6cb830fb2d91273b620c1fe6a5f83a1b08cb 100644 --- a/config/multiqc_config.yaml +++ b/config/multiqc_config.yaml @@ -1,29 +1,29 @@ ######################################################################### -# ChIPflow: Standardize and reproducible ChIP-seq analysis from raw # +# ePeak: Standardize and reproducible ChIP-seq analysis from raw # # data to differential analysis # # Authors: Rachel Legendre, Maelle Daunesse # # Copyright (c) 2019-2020 Institut Pasteur (Paris) and CNRS. # # # -# This file is part of ChIPflow workflow. # +# This file is part of ePeak workflow. # # # -# ChIPflow is free software: you can redistribute it and/or modify # +# ePeak is free software: you can redistribute it and/or modify # # it under the terms of the GNU General Public License as published by # # the Free Software Foundation, either version 3 of the License, or # # (at your option) any later version. # # # -# ChIPflow is distributed in the hope that it will be useful, # +# ePeak is distributed in the hope that it will be useful, # # but WITHOUT ANY WARRANTY; without even the implied warranty of # # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # # GNU General Public License for more details . # # # # You should have received a copy of the GNU General Public License # -# along with ChIPflow (LICENSE). # +# along with ePeak (LICENSE). # # If not, see <https://www.gnu.org/licenses/>. # ######################################################################### #=============================================================================== -# Optimized MultiQc config file dedicated to ChIPflow workflow +# Optimized MultiQc config file dedicated to ePeak workflow #=============================================================================== #------------------------------------------------------------------------------ diff --git a/test/config.yaml b/test/config.yaml index e441a6cbdf1e3848880cc6c02bf2ef3fd3d285c3..6a50d6852eb380977d7dc02fff29f82e3a5ce2a3 100644 --- a/test/config.yaml +++ b/test/config.yaml @@ -1,29 +1,30 @@ ######################################################################### -# ChIPflow: Standardize and reproducible ChIP-seq analysis from raw # +# ePeak: Standardize and reproducible ChIP-seq analysis from raw # # data to differential analysis # # Authors: Rachel Legendre, Maelle Daunesse # # Copyright (c) 2019-2020 Institut Pasteur (Paris) and CNRS. # # # -# This file is part of ChIPflow workflow. # +# This file is part of ePeak workflow. # # # -# ChIPflow is free software: you can redistribute it and/or modify # +# ePeak is free software: you can redistribute it and/or modify # # it under the terms of the GNU General Public License as published by # # the Free Software Foundation, either version 3 of the License, or # # (at your option) any later version. # # # -# ChIPflow is distributed in the hope that it will be useful, # +# ePeak is distributed in the hope that it will be useful, # # but WITHOUT ANY WARRANTY; without even the implied warranty of # # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # # GNU General Public License for more details . # # # # You should have received a copy of the GNU General Public License # -# along with ChIPflow (LICENSE). # +# along with ePeak (LICENSE). # # If not, see <https://www.gnu.org/licenses/>. # ######################################################################### + # ======================================================== -# Config file for ChIPflow pipeline +# Config file for ePeak pipeline #========================================================= # directory where fastq are stored @@ -217,24 +218,36 @@ seacr: threshold: 'stringent' norm: 'norm' - - - #=============================================================================== # Compute IDR on replicates, pseudo-replicates and pooled replicates # # :Parameters: # -# - rank: Which column to use to rank peaks. Options: signal.value p.value q.value columnIndex -# - thresh: Report statistics for peaks with a global idr below this value but return all peaks with an idr below --idr. Default: 0.05 -# - intersectionApproach: set to yes if you want to use intersection approach with narrow peaks when replicates are not closed enough +# - do: if unchecked, this rule is ignored. +# - rank: which column to use to rank peaks. Options: signal.value, p.value, q.value, columnIndex +# - thresh: report statistics for peaks with a global idr below this value. Default: 0.05 # #=============================================================================== compute_idr: + do: yes rank: 'signal.value' thresh: 0.05 - intersectionApproach: no + + +#=============================================================================== +# Compute intersection approach on replicates +# +# :Parameters: +# +# - do: if set to 'yes', will compute the intersection approach and use it +# to select reproducible peaks. (for narrow only, correspond to the default broad approach) +# - ia_overlap: percentage of overlap between the peaks to be selected (-f parameter of bedtools intersect). Default: 0.8 +# +#=============================================================================== + +intersectionApproach: + do: no ia_overlap: 0.8 #=============================================================================== diff --git a/workflow/rules/compute_FRiP_scores.rules b/workflow/rules/compute_FRiP_scores.rules deleted file mode 100644 index 40666a7ec0a8c824ac83cac536f2312cf2f35c1c..0000000000000000000000000000000000000000 --- a/workflow/rules/compute_FRiP_scores.rules +++ /dev/null @@ -1,56 +0,0 @@ -######################################################################### -# ePeak: Standardize and reproducible ChIP-seq analysis from raw # -# data to differential analysis # -# Authors: Rachel Legendre, Maelle Daunesse # -# Copyright (c) 2019-2020 Institut Pasteur (Paris) and CNRS. # -# # -# This file is part of ePeak workflow. # -# # -# ePeak is free software: you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published by # -# the Free Software Foundation, either version 3 of the License, or # -# (at your option) any later version. # -# # -# ePeak is distributed in the hope that it will be useful, # -# but WITHOUT ANY WARRANTY; without even the implied warranty of # -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # -# GNU General Public License for more details . # -# # -# You should have received a copy of the GNU General Public License # -# along with ePeak (LICENSE). # -# If not, see <https://www.gnu.org/licenses/>. # -######################################################################### - - -rule compute_FRiP_scores: - input: - bam = compute_FRiP_scores_input - output: - tab = compute_FRiP_scores_output - threads: 8 - run: - import pysam - - samfile = pysam.AlignmentFile(input.bam, "rb") - - - bed_files = ["peaks.bed"] - cr = countReadsPerBin.CountReadsPerBin([bam_file1, bam_file2], - bedFile=bed_files, - numberOfProcessors=threads) - reads_at_peaks = cr.run() - print reads_at_peaks - total = reads_at_peaks.sum(axis=0) - - bam1 = pysam.AlignmentFile(bam_file1) - bam2 = pysam.AlignmentFile(bam_file2) - - frip1 = float(total[0]) / bam1.mapped - frip2 = float(total[1]) / bam2.mapped - print frip1, frip2 - - - with open(output.tab, 'w') as file_fp: - file_fp.write(S) - - \ No newline at end of file