diff --git a/Snakefile b/Snakefile old mode 100755 new mode 100644 index 953e159c3ccf5744db49b20d35650954a45c0d4c..5e9730a88f0c7314cd52f76dca72f11c3823f74f --- a/Snakefile +++ b/Snakefile @@ -152,7 +152,7 @@ for row in design.itertuples(index=True, name='Pandas'): MARK_OK.append(mark) CORR_INPUT_OK.append(getattr(row, "INPUT_NAME").split("_")[0]) break - else: + else: nb += 1 nb = 0 @@ -379,7 +379,7 @@ wildcard_constraints: sample = "[A-Za-z-_0-9]+_{0}[0-9]+".format(rep_flag), IP_REP = "[A-Za-z-_0-9]+_{0}[0-9]+".format(rep_flag), REP = "{0}[0-9]+".format(rep_flag), - SPR = "[A-Za-z-_0-9]+_SPR[0-9]\.[1-4]*", + SPR = r"[A-Za-z-_0-9]+_SPR[0-9]\.[1-4]*", PPR = "[A-Za-z-_0-9]+_PPR[0-9]*", POOL = "[A-Za-z-_0-9]+_PPRPool", INPUT_POOL = "[A-Za-z-_0-9]+_(Pool|{0}1)".format(rep_flag), @@ -408,13 +408,17 @@ final_output = [] # quality control #---------------------------------- -fastqc_input_fastq = input_data + + +""" fastqc_input_fastq = input_data +print(fastqc_input_fastq) fastqc_output_done = os.path.join(analysis_dir, "00-Fastqc/{{SAMPLE}}{}_fastqc.done".format(rt1)) +print(fastqc_output_done) fastqc_wkdir = os.path.join(analysis_dir, "00-Fastqc") fastqc_log = os.path.join(analysis_dir, "00-Fastqc/logs/{{SAMPLE}}{}_fastqc_raw.log".format(rt1)) +print(fastqc_output_done) final_output.extend(expand(fastqc_output_done, SAMPLE=samples)) -include: os.path.join(RULES, "fastqc.rules") - +include: os.path.join(RULES, "fastqc.rules") """ #---------------------------------- @@ -736,6 +740,7 @@ if config["macs2"]["do"]: macs2_input_bam = "{}/{{SAMPLE}}_{}_sort{}.bam".format(biasedRegions_dir, ref, biasedRegions) macs2_control = INPUTtoIP + macs2_log_out = os.path.join(analysis_dir, "06-PeakCalling/macs2_{}/logs/{{SAMPLE}}.out".format(model_dir)) macs2_log_err = os.path.join(analysis_dir, "06-PeakCalling/macs2_{}/logs/{{SAMPLE}}.err".format(model_dir)) macs2_output = os.path.join(analysis_dir, "06-PeakCalling/macs2_{}/{{SAMPLE}}_peaks.{}Peak".format(model_dir, model)) @@ -755,10 +760,14 @@ if config["seacr"]["do"]: if not config["macs2"]["do"]: peak_caller = ["seacr"] mod = [config["seacr"]["threshold"]] + model_dir = mod else: peak_caller += ["seacr"] mod += [config["seacr"]["threshold"]] + #if model_dir : + # model_dir = + # produce bedgrah files bedgraph_input = "{}/{{SAMPLE}}_{}_sort{}.bam".format(biasedRegions_dir, ref, biasedRegions) if config["design"]["spike"]: @@ -792,17 +801,55 @@ if config["seacr"]["do"]: include: os.path.join(RULES, "seacr.rules") +#---------------------------------- +# Peak Calling with SICER +#---------------------------------- + +if config["sicer"]["do"]: + window_size = config["sicer"]["window_size"] + gap_size = config["sicer"]["gap_size"] + sicer_mode = "W{}-G{}".format(window_size, gap_size) + + #if model_dir : + # model_dir = + + if not config["macs2"]["do"] and not config["seacr"]["do"]: + peak_caller = ["sicer"] + mod = [sicer_mode] + else: + peak_caller += ["sicer"] + mod += [sicer_mode] + + def INPUTtoIP(wildcards): + return str(biasedRegions_dir + "/" + IP_INPUT[IP_INPUT.IP == wildcards.SAMPLE].iloc[0]['INPUT'] + "_{}_sort{}.bam".format(ref, biasedRegions)) + + sicer_input_bam = "{}/{{SAMPLE}}_{}_sort{}.bam".format(biasedRegions_dir, ref, biasedRegions) + sicer_input_control = INPUTtoIP + + sicer_options = config["sicer"]["options"] + sicer_genome = config["sicer"]["genome"] + + sicer_logs_out = os.path.join(analysis_dir, "06-PeakCalling/sicer_{}/logs/{{SAMPLE}}_sicer_calling.out".format(sicer_mode)) + sicer_logs_err = os.path.join(analysis_dir, "06-PeakCalling/sicer_{}/logs/{{SAMPLE}}_sicer_calling.err".format(sicer_mode)) + sicer_output = os.path.join(analysis_dir, "06-PeakCalling/sicer_{}/{{SAMPLE}}_{}_sort{}-{}.scoreisland".format(sicer_mode, ref, biasedRegions, sicer_mode)) + sicer_output_dir = os.path.join(analysis_dir, "06-PeakCalling/sicer_{}/".format(sicer_mode)) + + include: os.path.join(RULES, "sicer.rules") + final_output.extend(expand(sicer_output, SAMPLE = ALL_IP_PC)) + #---------------------------------- # Peak Calling metrics #---------------------------------- -if config["macs2"]["do"] or config["seacr"]["do"] : +if config["macs2"]["do"] or config["seacr"]["do"] or config["sicer"]["do"]: def stats_pc_input(wildcards): if wildcards.CALLER == "macs2": return expand(os.path.join(analysis_dir, "06-PeakCalling/macs2_{{MOD}}/{IP_REP}_peaks.{{MOD}}Peak"), IP_REP=ALL_IP_PC) elif wildcards.CALLER == "seacr": return expand(os.path.join(analysis_dir, "06-PeakCalling/seacr_{{MOD}}/{IP_REP}.{{MOD}}.bed"), IP_REP=ALL_IP_PC) - + elif wildcards.CALLER == "sicer": + return expand(os.path.join(analysis_dir, "06-PeakCalling/sicer_{{MOD}}/{IP_REP}_{REF}_sort{BIASED}-{{MOD}}.scoreisland"), IP_REP=ALL_IP_PC, REF = ref, BIASED = biasedRegions) + stats_peakCalling_input = stats_pc_input stats_peakCalling_csv = os.path.join(analysis_dir, "{CALLER}_{MOD}_Peaks_metrics_mqc.out") stats_peakCalling_marks = marks @@ -825,8 +872,7 @@ if config["macs2"]["do"] and config["compute_idr"]["do"]: compute_idr_input1 = os.path.join(analysis_dir, "06-PeakCalling/macs2_{}/{{IP_IDR}}_{{CASE}}1_peaks.{}Peak".format(model_dir, model)) compute_idr_input2 = os.path.join(analysis_dir, "06-PeakCalling/macs2_{}/{{IP_IDR}}_{{CASE}}2_peaks.{}Peak".format(model_dir, model)) compute_idr_output = os.path.join(analysis_dir, "07-IDR/macs2_{}/{{IP_IDR}}_{{CASE}}1vs{{CASE}}2_{}_{}_idr.txt".format(model_dir, ref, model)) - compute_idr_output_peak = os.path.join(analysis_dir, "07-IDR/macs2_{}/{{IP_IDR}}_{{CASE}}1vs{{CASE}}2_{}_{}_idr{}.{}Peak".format(model_dir, - ref, model, config["compute_idr"]["thresh"], model)) + compute_idr_output_peak = os.path.join(analysis_dir, "07-IDR/macs2_{}/{{IP_IDR}}_{{CASE}}1vs{{CASE}}2_{}_{}_idr{}.{}Peak".format(model_dir,ref, model, config["compute_idr"]["thresh"], model)) compute_idr_log = os.path.join(analysis_dir, "07-IDR/macs2_{}/logs/{{IP_IDR}}_{{CASE}}1vs{{CASE}}2_{}_idr.out".format(model_dir,model)) include: os.path.join(RULES, "compute_idr.rules") final_output.extend(expand(compute_idr_output, zip, IP_IDR=REP_IDR, CASE=CASE)) @@ -847,7 +893,7 @@ if config["macs2"]["do"] and model in ["narrow"] and not config["intersectionApp def IDR_input_ppr(wildcards): #if wildcards.CALLER == "macs2_narrow": - #return [os.path.join(analysis_dir, "07-IDR/macs2/%s/{IP_IDR}_%s1vs%s2_%s_%s_idr%s.%sPeak" % (model_dir, + #return [os.path.join(analysis_dir, "07-IDR/macs2/%s/{IP_IDR}_%s1vs%s2_%s_%s_idr%s.%sPeak" % (model_dir, # "PPR", "PPR", ref, model, config["compute_idr"]["thresh"], model))] return str(os.path.join(analysis_dir, "07-IDR/macs2_"+model_dir+"/"+wildcards.IP_IDR+"_PPR1vsPPR2_"+ref+"_"+model+"_idr"+str(config["compute_idr"]["thresh"])+"."+model+"Peak")) @@ -1223,9 +1269,9 @@ if config["igv_session"]["do"]: #---------------------------------- try : model_dir except NameError : model_dir = "seacr_" + config["seacr"]["threshold"] -if not config['seacr']['do'] and not config['macs2']['do']: +if not config['seacr']['do'] and not config['macs2']['do'] and not config['sicer']['do']: model_dir = "multiqc" - +#print(final_output) multiqc_input = final_output multiqc_input_dir = analysis_dir multiqc_logs = os.path.join(analysis_dir, "11-Multiqc/multiqc.log") diff --git a/Snakefile_noCTL.smk b/Snakefile_noCTL.smk index 756a6b23463f21f223bd4c2521e543798e054117..7a99150dabe9af5ae562a2435e7688fb0cf4cf07 100755 --- a/Snakefile_noCTL.smk +++ b/Snakefile_noCTL.smk @@ -271,7 +271,7 @@ wildcard_constraints: sample = "[A-Za-z-_0-9]+_{0}[0-9]+".format(rep_flag), IP_REP = "[A-Za-z-_0-9]+_{0}[0-9]+".format(rep_flag), REP = "{0}[0-9]+".format(rep_flag), - SPR = "[A-Za-z-_0-9]+_SPR[0-9]\.[1-4]*", + SPR = r"[A-Za-z-_0-9]+_SPR[0-9]\.[1-4]*", PPR = "[A-Za-z-_0-9]+_PPR[0-9]*", POOL = "[A-Za-z-_0-9]+_PPRPool", MARK = "[A-Za-z-_0-9]+", @@ -298,14 +298,14 @@ final_output = [] #---------------------------------- # quality control #---------------------------------- - +""" fastqc_input_fastq = input_data fastqc_output_done = os.path.join(analysis_dir, "00-Fastqc/{{SAMPLE}}{}_fastqc.done".format(rt1)) fastqc_wkdir = os.path.join(analysis_dir, "00-Fastqc") fastqc_log = os.path.join(analysis_dir, "00-Fastqc/logs/{{SAMPLE}}{}_fastqc_raw.log".format(rt1)) final_output.extend(expand(fastqc_output_done, SAMPLE=samples)) include: os.path.join(RULES, "fastqc.rules") - +""" #---------------------------------- @@ -655,17 +655,47 @@ if config["seacr"]["do"]: final_output.extend(expand(seacr_output, SAMPLE=IP_ALL)) include: os.path.join(RULES, "seacr_noCTL.rules") +#---------------------------------- +# Peak Calling with SICER +#---------------------------------- + +if config["sicer"]["do"]: + window_size = config["sicer"]["window_size"] + gap_size = config["sicer"]["gap_size"] + sicer_mode = "W{}-G{}".format(window_size, gap_size) + + if not config["macs2"]["do"] and not config["seacr"]["do"]: + peak_caller = ["sicer"] + mod = [sicer_mode] + else: + peak_caller += ["sicer"] + mod += [sicer_mode] + + sicer_input_bam = "{}/{{SAMPLE}}_{}_sort{}.bam".format(biasedRegions_dir, ref, biasedRegions) + + sicer_options = config["sicer"]["options"] + sicer_genome = config["sicer"]["genome"] + + sicer_logs_out = os.path.join(analysis_dir, "06-PeakCalling/sicer_{}/logs/{{SAMPLE}}_sicer_calling.out".format(sicer_mode)) + sicer_logs_err = os.path.join(analysis_dir, "06-PeakCalling/sicer_{}/logs/{{SAMPLE}}_sicer_calling.err".format(sicer_mode)) + sicer_output = os.path.join(analysis_dir, "06-PeakCalling/sicer_{}/{{SAMPLE}}_{}_sort{}-{}.scoreisland".format(sicer_mode, ref, biasedRegions, sicer_mode)) + sicer_output_dir = os.path.join(analysis_dir, "06-PeakCalling/sicer_{}/".format(sicer_mode)) + + include: os.path.join(RULES, "sicer_noCTL.rules") + final_output.extend(expand(sicer_output, SAMPLE = ALL_IP_PC)) #---------------------------------- # Peak Calling metrics #---------------------------------- -if config["macs2"]["do"] or config["seacr"]["do"] : +if config["macs2"]["do"] or config["seacr"]["do"] or config["sicer"]["do"] : def stats_pc_input(wildcards): if wildcards.CALLER == "macs2": return expand(os.path.join(analysis_dir, "06-PeakCalling/macs2_{{MOD}}/{IP_REP}_peaks.{{MOD}}Peak"), IP_REP=ALL_IP_PC) elif wildcards.CALLER == "seacr": return expand(os.path.join(analysis_dir, "06-PeakCalling/seacr_{{MOD}}/{IP_REP}.{{MOD}}.bed"), IP_REP=ALL_IP_PC) + elif wildcards.CALLER == "sicer": + return expand(os.path.join(analysis_dir, "06-PeakCalling/sicer_{{MOD}}/{IP_REP}_{REF}_sort{BIASED}-{{MOD}}.scoreisland"), IP_REP=ALL_IP_PC, REF = ref, BIASED = biasedRegions) stats_peakCalling_input = stats_pc_input stats_peakCalling_csv = os.path.join(analysis_dir, "{CALLER}_{MOD}_Peaks_metrics_mqc.out") @@ -1084,7 +1114,7 @@ if config["igv_session"]["do"]: #---------------------------------- try : model_dir except NameError : model_dir = "seacr_" + config["seacr"]["threshold"] -if not config['seacr']['do'] and not config['macs2']['do']: +if not config['seacr']['do'] and not config['macs2']['do'] and not config['sicer']['do']: model_dir = "multiqc" multiqc_input = final_output diff --git a/config/config.yaml b/config/config.yaml index a51089d889932fda870029a06f1d0b5e10a4c6df..e6c1ac240d1a76bf523f9232c4f77836b2e81603 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -1,7 +1,7 @@ ######################################################################### # ePeak: Standardize and reproducible ChIP-seq analysis from raw # # data to differential analysis # -# Authors: Rachel Legendre, Maelle Daunesse, Luc Jouneau, Amina Alioua # +# Authors: Rachel Legendre, Maelle Daunesse # # Copyright (c) 2019-2020 Institut Pasteur (Paris) and CNRS. # # # # This file is part of ePeak workflow. # @@ -23,21 +23,20 @@ - -#========================================================= -# ePeak pipeline config file +# ======================================================== +# Config file for ePeak pipeline #========================================================= -# path to the fastq directory -input_dir: /path/to/data -# mate pair tag in the fastq filenames (regular expression) +# directory where fastq are stored +input_dir: data +# How mate pair are written in fastq input_mate: '_R[12]' # filename extension input_extension: '.fastq.gz' -# path to the analysis directory -analysis_dir: /path/to/result -# tmpdir: path to temporary directory (default /tmp/, but could be "/local/scratch/") -tmpdir: $TMPDIR +# directory where you want +analysis_dir: analyse +# tmpdir: write temporary file on this directory (default /tmp/, but could be "/local/scratch/") +tmpdir: /pasteur/appa/scratch/shamima #=============================================================================== # Design information. These informations will be used during the @@ -74,18 +73,17 @@ design: #=============================================================================== genome: - index: no - genome_directory: /path/to/genome/directory/mm10 + index: yes + genome_directory: genome/ name: mm10 - fasta_file: /path/to/genome/directory/mm10.fa + fasta_file: genome/mm10.fa #=============================================================================== # FastQC section # # :Parameters: # -# - options: Any valid FastQC parameter -# - threads: number of threads +# - options: Any valid FastQC options #=============================================================================== fastqc: @@ -109,13 +107,13 @@ fastqc: # #=============================================================================== - adapters: remove: yes - adapter_list: file:config/adapt.fa + adapter_list: "ATAGATCTCGTCTAGCT" + tool_choice: cutadapt m: 25 - mode: a - options: -O 6 --trim-n --max-n 1 + mode: g + options: -O 6 --trim-n --max-n 1 -j 4 quality: 30 threads: 4 @@ -126,12 +124,13 @@ adapters: # # :Parameters: # -# - options: any parameter recognized by bowtie2 (see bowtie2 manual) +# - options: any options recognised by bowtie2 tool # - threads: number of threads to be used #=============================================================================== bowtie2_mapping: +# options: "--dovetail --no-mixed --no-discordant " for paired-end data options: "--very-sensitive " threads: 4 @@ -154,21 +153,22 @@ mark_duplicates: threads: 4 #=============================================================================== -# remove biased genomic regions (previously named blacklisted regions) +# remove biased genomic regions # # :Parameters: # -# - do: if 'no', this rule is ignored. +# - do: if unchecked, this rule is ignored. # - bed_file: path to BED file containing all biased regions -# - threads: number of threads #=============================================================================== remove_biasedRegions: do: yes - bed_file: /path/to/genome/directory/mm10-blacklist.v2.bed + bed_file: genome/mm10.blacklist.bed threads: 1 + + #=============================================================================== # peak calling with macs2. # @@ -191,14 +191,14 @@ remove_biasedRegions: #=============================================================================== - macs2: - do: yes - mode_choice: 'narrow' - no_model: no - options: "--keep-dup all --nomodel --extsize=100" + do: no + mode_choice: 'narrow' ## may be broad + no_model: no ## may be yes + options: "--keep-dup all " cutoff: 0.1 - genomeSize: hs + genomeSize: mm + readLength: 50 #=============================================================================== @@ -214,26 +214,27 @@ macs2: seacr: - do: no + do: yes threshold: 'stringent' norm: 'norm' + #=============================================================================== -# Assess reproducibility with ChIP-R https://github.com/rhysnewell/ChIP-R +# Peaks calling with Sicer2 # -# # :Parameters: -# -# - do: if unchecked, this rule is ignored. -# - options: any parameter recognized by ChIP-R. Usually minentries and size. -# +# - do: if unchecked, this rule is ignored +# - window_size: Size of the ... +# - gap_size: +# - options: any options recognized by SICER2, see SICER2's documentation +# - spaces: #=============================================================================== - - -chipr: - do: no - options: "-m 2" - +sicer: + do: yes + window_size: 150 + gap_size: 300 + options: "" + genome: "mm10" #=============================================================================== # Compute IDR on replicates, pseudo-replicates and pooled replicates @@ -255,63 +256,19 @@ compute_idr: #=============================================================================== # Compute intersection approach on replicates # -# In this section, we seek for regions covered by all replicates. -# The region is extended if it is covered by at least nb_min_replicates. -# The region ends with the last extension covered by all samples -# -# example (with nb_min_replicates=2) : -# R1 ---------------------------------------- ----------------------------- -# R2 --------------- ------------------------------------------- -# R3 ---------------------------- ------------------------------------- -# -# out -------------------------------------------------------------- -# # :Parameters: # # - do: if set to 'yes', will compute the intersection approach and use it # to select reproducible peaks. (for narrow only, correspond to the default broad approach) -# - nb_min_replicates: minimal number of replicates covering the same region to extend -# region covered by all the replicates -# - min_peak_length: minimal length accepted for the peaks in output -# - ia_overlap: percentage of overlap between the peaks to be selected. Default: 0.8 +# - ia_overlap: percentage of overlap between the peaks to be selected (-f parameter of bedtools intersect). Default: 0.8 # #=============================================================================== intersectionApproach: do: yes - nb_min_replicates: 2 - min_peak_length: 0 - ia_overlap : 0.8 - -#=============================================================================== -# Peak annotation -# -# :Parameters: -# -# If peakAnnotation is standard, please specify : -# - gtf_file : the path to the file containing the gene definition in gtf format. -# Gene identifier should be indicated by 'gene_id "<gene identifier>"' -# in the last column (attributes column - see http://mblab.wustl.edu/GTF22.html) -# - gene_annotation_file : the path to the file containing gene information -# (gene symbol, gene description, genome coordinates, ...) -# the gene identifier (same identifier as in the gtf file) should be contained -# in the first column. This first column should be labeled like this 'Gene ID' -# -# If peakAnnotation is not standard, please specify the path -# to your specific annotation configuration file : -# - config_file: path to the file containing annotation specifications -# In this case gtf_file and gene_annotation_file are ignored. -# An example of a complex annotation file is available in test/annotation/specific_annotation_config.txt -#=============================================================================== - -peakAnnotation: - do: no - standard: yes - gtf_file: test/annotation/Mus_musculus.GRCm39.111.gtf - gene_annotation_file: test/annotation/Mus_musculus.Ensembl111.txt - config_file: config/annotation_config.txt - - + ia_overlap: 0.8 + nb_min_replicates: 20 + min_peak_length: 10 #=============================================================================== # Compute differential analysis @@ -325,41 +282,38 @@ peakAnnotation: # For DESeq2, "holm" "hochberg" "hommel" "bonferroni" "BH" "BY" "fdr" and "none" are accepted. # - alpha: 0.05 by default # - batch: NULL or a vector with batch effects as c("","") -# - input_counting: add all INPUT in count matrix +# - input_counting: add all input in count matrix #=============================================================================== differential_analysis: do: yes method: "Limma" + normalisation: "quantile" spikes: no - normalisation: "scale" pAdjustMethod: "BH" alpha: 0.05 batch: NULL - input_counting: no - + input_counting: yes -#=============================================================================== +############################################################################# # bamCoverage from Deeptools # see https://deeptools.readthedocs.io/en/develop/content/tools/bamCoverage.html # # :Parameters: # -# - do: if unchecked, this rule is ignored -# - options: options related to deeptools -# - spike-in: set to yes to use spike-in data as sacaling factor -# see https://deeptools.readthedocs.io/en/latest/content/feature/effectiveGenomeSize.html -# for more information about effective Genome Size +# - do: if 'no', this rule is ignored. +# - options: any parameter recognized by Deeptools (see Deeptools manual) +# #=============================================================================== bamCoverage: do: yes - options: "--binSize 10 --effectiveGenomeSize 2913022398 --normalizeUsing RPGC" - spike-in: no + options: "--binSize 10 --effectiveGenomeSize 2913022398 --normalizeUsing RPGC" + spike-in: no threads: 4 -#=============================================================================== +############################################################################# # GeneBody heatmap plot from Deeptools # see https://deeptools.readthedocs.io/en/develop/content/tools/plotHeatmap.html#usage-examples # @@ -371,8 +325,8 @@ bamCoverage: #=============================================================================== geneBody: - do: yes - regionsFileName: test/annotation/Mus_musculus.GRCm39.111.gtf + do: no + regionsFileName: genome/mm10.refGene.gtf threads: 4 #============================================================================== @@ -391,13 +345,14 @@ igv_session: autoScale: True normalize: False + #=============================================================================== # MultiQC aggregates results from bioinformatics analyses across many # samples into a single report. # # :Parameters: # -# - options: any options recognised by MultiQC +# - options: any options recognised by multiqc # - output-directory: Create report in the specified output directory #=============================================================================== @@ -406,3 +361,46 @@ multiqc: options: " -f -e macs2 -x 03-Deduplication/*spikes* -x 02-Mapping/*_spike*" output-directory: "11-Multiqc" +#=============================================================================== +# Peak annotation +# +# :Parameters: +# +# If peakAnnotation is standard, please specify : +# - gtf_file : the path to the file containing the gene definition in gtf format. +# Gene identifier should be indicated by 'gene_id "<gene identifier>"' +# in the last column (attributes column - see http://mblab.wustl.edu/GTF22.html) +# - gene_annotation_file : the path to the file containing gene information +# (gene symbol, gene description, genome coordinates, ...) +# the gene identifier (same identifier as in the gtf file) should be contained +# in the first column. This first column should be labeled like this 'Gene ID' +# +# If peakAnnotation is not standard, please specify the path +# to your specific annotation configuration file : +# - config_file: path to the file containing annotation specifications +# In this case gtf_file and gene_annotation_file are ignored. +# An example of a complex annotation file is available in test/annotation/specific_annotation_config.txt +#=============================================================================== + +peakAnnotation: + do: yes + standard: yes + gtf_file: test/annotation/Mus_musculus.GRCm39.111.gtf + gene_annotation_file: test/annotation/Mus_musculus.Ensembl111.txt + config_file: config/annotation_config.txt + +#=============================================================================== +# Assess reproducibility with ChIP-R +# see https://github.com/rhysnewell/ChIP-R +# +# +# :Parameters: +# +# - do: if unchecked, this rule is ignored. +# - options: any parameter recognized by ChIP-R. Usually minentries and size. +# +#=============================================================================== + +chipr: + do: yes + options: "-m 2 --rankmethod signalvalue" diff --git a/config/multiqc_config.yaml b/config/multiqc_config.yaml index 92b2ee368de7fcda47964145b26def9e22483191..7a67d26c4c8633f2156faf34e99a8fa859e94cb3 100644 --- a/config/multiqc_config.yaml +++ b/config/multiqc_config.yaml @@ -134,6 +134,8 @@ sp: fn: 'macs2*_Peaks_metrics.out' seacr_peaks_metrics: fn: 'seacr*_Peaks_metrics.out' + sicer_peaks_metrics: + fn: 'sicer*_Peaks_metrics.out' spikes_metrics: fn: 'Spikes_metrics.out' frip_scores: @@ -255,6 +257,23 @@ custom_data: Peaks: title: 'Number of peaks' description: 'Number of peaks' + sicer_peaks_metrics: + id: 'sicer_peaks_metrics' + section_name: 'Number of peaks with SICER' + parent_id: "peak_section" + parent_name: "Peaks metrics" + parent_description: "This section contains metrics and statistics about peak calling, IDR, CHIPR and spike-in" + plot_type: 'table' + pconfig: + id: 'sicer_peaks_metrics' + namespace: 'sicer_peaks_metrics' + headers: + Sample: + title: 'Sample name' + description: 'Sample Name' + Peaks: + title: 'Number of peaks' + description: 'Number of peaks' chipr_metrics: id: "chipr_metrics" section_name: 'CHIPR metrics' diff --git a/profile/__pycache__/CookieCutter.cpython-312.pyc b/profile/__pycache__/CookieCutter.cpython-312.pyc index 4f2f6d397f48e1b65b53474172ee89dab0ec7407..0d729ec8991b13bc308d7c4ff59680c9e40747d8 100644 Binary files a/profile/__pycache__/CookieCutter.cpython-312.pyc and b/profile/__pycache__/CookieCutter.cpython-312.pyc differ diff --git a/profile/__pycache__/slurm_utils.cpython-312.pyc b/profile/__pycache__/slurm_utils.cpython-312.pyc index 999c8ce8f59f66351f92e99dfa7b9a1033d2c82a..005aa93565af6724ea58b5b6289808c0fa8842db 100644 Binary files a/profile/__pycache__/slurm_utils.cpython-312.pyc and b/profile/__pycache__/slurm_utils.cpython-312.pyc differ diff --git a/profile/config.yaml b/profile/config.yaml index 13efe1fe0a3fbd2bfb83ae9c863dd1f22bf7b7aa..d65a1cd8390f10bf280b43443f760316d4dacc9d 100644 --- a/profile/config.yaml +++ b/profile/config.yaml @@ -26,6 +26,7 @@ local-cores: 1 latency-wait: "120" use-conda: "False" use-apptainer: "True" +use-envmodules: "True" jobs: "200" printshellcmds: "True" rerun-incomplete: "True" @@ -35,7 +36,7 @@ cores: 1 #configfile: config/config_atac.yaml #jobname: "{rule}__{jobid}" #benchmark-extended: "True" -apptainer-prefix: "/pasteur/zeus/projets/p01/BioIT/Rachelbis/" +apptainer-prefix: "/pasteur/helix/projects/BioIT/Shamima/epeak/" apptainer-args: "-B /pasteur/" diff --git a/profile/slurm-jobscript.sh b/profile/slurm-jobscript.sh old mode 100755 new mode 100644