diff --git a/TO_DO.txt b/TO_DO.txt new file mode 100644 index 0000000000000000000000000000000000000000..914263e53a6ec10624d2cc78199521c560d54885 --- /dev/null +++ b/TO_DO.txt @@ -0,0 +1,14 @@ +To do ? + + - Differential analysis annotation is in commentary -- need to fix the issue : + annotation script is not working see first lines of the /pasteur/zeus/projets/p01/BioIT/Amina/Part4/epeak/workflow/scripts/annote_analysis_diff.sh + + - Maybe see if this is usefull for genrich to do chipr / find a way to have similar metrics on peaks files to be able to compare tss_enrichment + -> some metrics are not rank the same so it might be interesting to look after this + -> also check on the metrics taken by lanceOtron if needed can take different values than those chosen (but work accurately for now) + + - Real benchmarking on peak callers + + - FriP score with subread featureCounts (check brouillon.txt) + + - TSS enrichment score (check brouillon.txt) diff --git a/brouillon.txt b/brouillon.txt new file mode 100644 index 0000000000000000000000000000000000000000..60205ecfef42a2f2dc774c2addf1630e57a33c1f --- /dev/null +++ b/brouillon.txt @@ -0,0 +1,219 @@ +######################################################################### +# ePeak: Standardize and reproducible ATAC-seq analysis from raw # +# data to differential analysis # +# Authors: Amina Alioua # +# Copyright (c) 2024 Institut Pasteur (Paris) and CNRS. # +# # +# This file is part of ePeak workflow. # +# # +# ePeak is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# ePeak is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details . # +# # +# You should have received a copy of the GNU General Public License # +# along with ePeak (LICENSE). # +# If not, see <https://www.gnu.org/licenses/>. # +######################################################################### + + + +rule frip_score: + input: + frip_input_peaks + log: + out = frip_log_out, + err = frip_log_err + output: + frip_output + params: + gtf = config['tss_enrichment']['gtf_file'], + saf = frip_saf + singularity: + "epeak.sif" + shell: + """ + touch {params.saf} + + awk 'OFS="\t" {{print $4, $1, $2+1, $3, $6}}' {input} > {params.saf} + + featureCounts -p -a {params.saf} -F SAF -o {output} {input} > {log.out} 2> {log.err} + + """ + +frip_input_peaks = expand(os.path.join(analysis_dir,"06-PeakCalling/{{CALLER}}/{{SAMPLE}}_peaks.narrowPeak")) +frip_log_out = os.path.join(analysis_dir,"15-FRiP/{CALLER}/logs/{SAMPLE}_peaks.out") +frip_log_err = os.path.join(analysis_dir,"15-FRiP/{CALLER}/logs/{SAMPLE}_peaks.err") +frip_output = os.path.join(analysis_dir,"15-FRiP/{CALLER}/{SAMPLE}_peaks.scores") +frip_saf = os.path.join(analysis_dir,"15-FRiP/{CALLER}/{SAMPLE}_peaks.saf") +#final_output.extend(expand(frip_output, SAMPLE=ALL_IP_PC)) + +for peak_caller in all_peak_caller.keys(): + final_output.extend(expand(frip_output, CALLER=peak_caller, SAMPLE=ALL_IP_PC)) + +include: os.path.join(RULES, "frip_scores.rules") + +######################################################################### +# ePeak: Standardize and reproducible ATAC-seq analysis from raw # +# data to differential analysis # +# Authors: Amina Alioua # +# Copyright (c) 2024 Institut Pasteur (Paris) and CNRS. # +# # +# This file is part of ePeak workflow. # +# # +# ePeak is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# ePeak is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details . # +# # +# You should have received a copy of the GNU General Public License # +# along with ePeak (LICENSE). # +# If not, see <https://www.gnu.org/licenses/>. # +######################################################################### + + + +rule tss_cov: + input: + coverage_input_bam + log: + out = coverage_log_out, + err = coverage_log_err + output: + bam_coverage_output + singularity: + "epeak.sif" + shell: + """ + bamCoverage -b {input} -o {output} > {log.out} 2> {log.err} + + """ + + +######################################################################### +# ePeak: Standardize and reproducible ATAC-seq analysis from raw # +# data to differential analysis # +# Authors: Amina Alioua # +# Copyright (c) 2024 Institut Pasteur (Paris) and CNRS. # +# # +# This file is part of ePeak workflow. # +# # +# ePeak is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# ePeak is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details . # +# # +# You should have received a copy of the GNU General Public License # +# along with ePeak (LICENSE). # +# If not, see <https://www.gnu.org/licenses/>. # +######################################################################### + + + +rule tss_matrix: + input: + cov = tss_input_bw, + bed = tss_input_bed + log: + out = tss_enrichment_log_out, + err = tss_enrichment_log_err + params: + gtf = config['tss_enrichment']['gtf_file'] + output: + tss_enrichment_output + singularity: + "epeak.sif" + shell: + """ + computeMatrix reference-point \ + -S {input.cov} -R {input.bed} \ + --referencePoint TSS \ + -a 1000 -b 1000 \ + --binSize 10 \ + -o {output} > {log.out} 2> {log.err} + + """ + + +######################################################################### +# ePeak: Standardize and reproducible ATAC-seq analysis from raw # +# data to differential analysis # +# Authors: Amina Alioua # +# Copyright (c) 2024 Institut Pasteur (Paris) and CNRS. # +# # +# This file is part of ePeak workflow. # +# # +# ePeak is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# ePeak is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details . # +# # +# You should have received a copy of the GNU General Public License # +# along with ePeak (LICENSE). # +# If not, see <https://www.gnu.org/licenses/>. # +######################################################################### + + + +rule tss_plot: + input: + tss_enrichment_output + log: + out = tss_plot_log_out, + err = tss_plot_log_err + output: + pdf = tss_plot_output + singularity: + "epeak.sif" + shell: + """ + plotProfile -m {input} -out {output.pdf} > {log.out} 2> {log.err} + + """ + + +In the Snakefile : + if config['tss_enrichment']['do']: + + coverage_input_bam = "{}/{{SAMPLE}}_{{REF}}_sort{}.bam".format(biasedRegions_dir, biasedRegions) + coverage_log_out = os.path.join(analysis_dir,"14-TSS/coverage/logs/{SAMPLE}_{REF}_buffer.out") + coverage_log_err = os.path.join(analysis_dir,"14-TSS/coverage/logs/{SAMPLE}_{REF}_buffer.err") + bam_coverage_output = os.path.join(analysis_dir,"14-TSS/coverage/{SAMPLE}_{REF}_coverage.bw") + include: os.path.join(RULES, "tss_cov.rules") + final_output.extend(expand(bam_coverage_output,SAMPLE=samples,REF=ref)) + + tss_input_bw = expand(bam_coverage_output,SAMPLE=samples,REF=ref) + tss_input_bed = config['tss_enrichment']['gtf_file'] + #tss_input_bed = "tss_gtf.gtf" + tss_enrichment_log_out = os.path.join(analysis_dir,"14-TSS/matrix/logs/{SAMPLE}_{REF}_buffer.out") + tss_enrichment_log_err = os.path.join(analysis_dir,"14-TSS/matrix/logs/{SAMPLE}_{REF}_buffer.err") + tss_enrichment_output = os.path.join(analysis_dir,"14-TSS/matrix/{SAMPLE}_{REF}_matrix.gz") + include: os.path.join(RULES, "tss_matrix.rules") + final_output.extend(expand(tss_enrichment_output,SAMPLE=samples,REF=ref)) + + #tss_enrichment_output = expand("14-TSS/{{SAMPLE}}_{{REF}}_matrix.gz") + tss_plot_log_out = os.path.join(analysis_dir,"14-TSS/profile/logs/{SAMPLE}_{REF}.out") + tss_plot_log_err = os.path.join(analysis_dir,"14-TSS/profile/logs/{SAMPLE}_{REF}.err") + tss_plot_output = os.path.join(analysis_dir,"14-TSS/profile/{SAMPLE}_{REF}.pdf") + include: os.path.join(RULES, "tss_plot.rules") + final_output.extend(expand(tss_plot_output,SAMPLE=samples,REF=ref))