Commit 58eb4beb authored by Rachel  LEGENDRE's avatar Rachel LEGENDRE
Browse files

add some modifications

parent 24dd19c4
......@@ -268,10 +268,15 @@ peak_calling:
compute_idr:
do: yes
rank: 'signal.value'
thresh: 0.05
intersectionApproach: no
intersectionApproach:
do: no
ia_overlap: 0.8
```
> If none or very few peaks pass the IDR, this means that there is to much variability between your replicates aka they are probably not good replicates. If you want to proceed anyway with the analysis, you can use the intersection approach (less stringent) instead of the IDR by setting `intersectionApproach` to 'yes'.
......@@ -288,11 +293,17 @@ peak_calling:
cutoff: 0.01
genomeSize: mm
compute_idr:
do: no
rank: 'signal.value'
thresh: 0.01
intersectionApproach: yes
intersectionApproach:
do: yes
ia_overlap: 0.8
```
### Default mode for cut&run
......@@ -311,9 +322,13 @@ seacr:
norm: 'norm'
compute_idr:
do: no
rank: 'signal.value'
thresh: 0.01
intersectionApproach: yes
intersectionApproach:
do: yes
ia_overlap: 0.8
```
......
......@@ -501,7 +501,7 @@ else:
# Coverage step
#----------------------------------
if config["bamCoverage"]["do"]:
bamCoverage_input = "{}/{{SAMPLE}}_{{REF}}_sort_dedup{}.bam".format(biasedRegions_dir, biasedRegions)
bamCoverage_input = os.path.join(analysis_dir, "{}/{{SAMPLE}}_{{REF}}_sort_dedup{}.bam".format(biasedRegions_dir, biasedRegions))
bamCoverage_logs = os.path.join(analysis_dir, "12-IGV/logs/{SAMPLE}_{REF}.out")
bamCoverage_output = os.path.join(analysis_dir, "12-IGV/{SAMPLE}_{REF}_coverage.bw")
bamCoverage_options = config['bamCoverage']['options']
......@@ -512,7 +512,7 @@ if config["bamCoverage"]["do"]:
#----------------------------------
# Estimate Library Complexity
#----------------------------------
lib_complexity_input = "{}/{{SAMPLE}}_{{REF}}_sort_dedup{}.bam".format(biasedRegions_dir, biasedRegions)
lib_complexity_input = os.path.join(analysis_dir, "{}/{{SAMPLE}}_{{REF}}_sort_dedup{}.bam".format(biasedRegions_dir, biasedRegions))
lib_complexity_metrics = os.path.join(analysis_dir, "05-QC/Complexity/{SAMPLE}_{REF}_metrics.txt")
lib_complexity_log_std = os.path.join(analysis_dir, "05-QC/Complexity/logs/{SAMPLE}_{REF}_complexity.out")
lib_complexity_log_err = os.path.join(analysis_dir, "05-QC/Complexity/logs/{SAMPLE}_{REF}_complexity.err")
......@@ -554,6 +554,18 @@ if config['geneBody']['do']:
include: os.path.join(RULES, "plotHeatmap.rules")
#------------------------------------------------
# fragment size distribution - Picard CollectInsertSizeMetrics
#------------------------------------------------
if paired:
insert_size_metrics_input = os.path.join(analysis_dir, "{}/{{SAMPLE}}_{{REF}}_sort_dedup{}.bam".format(biasedRegions_dir, biasedRegions))
insert_size_metrics_output_txt = "05-QC/FragmentSizeDistribution/{SAMPLE}_{REF}_sort_dedup_fragmentSizeDistribution.txt"
insert_size_metrics_histo = "05-QC/FragmentSizeDistribution/{SAMPLE}_{REF}_sort_dedup_fragmentSizeDistribution.pdf"
insert_size_metrics_log_out = "05-QC/FragmentSizeDistribution/logs/{SAMPLE}_{REF}_fragment_size.out"
insert_size_metrics_log_err = "05-QC/FragmentSizeDistribution/logs/{SAMPLE}_{REF}_fragment_size.err"
final_output.extend(expand(insert_size_metrics_output_txt, SAMPLE=samples, REF=ref))
include: os.path.join(RULES, "fragment_size_distribution.rules")
#-------------------------------------
# then we performed peak calling only in files against reference genome and not spike genome !
ref = config["genome"]["name"]
......@@ -674,8 +686,20 @@ if config["seacr"]["do"]:
else:
peak_caller += ["seacr"]
mod += [config["seacr"]["threshold"]]
# compute scale factor if spike-in
if config["design"]["spike"]:
compute_scaling_factor_input = os.path.join(analysis_dir, "03-Deduplication/{SAMPLE}_spikes_sort_dedup.bam")
compute_scaling_factor_output = os.path.join(analysis_dir, "06-PeakCalling/seacr/{SAMPLE}_scaleFactor.txt")
final_output.extend(expand(compute_scaling_factor_output, SAMPLE=samples))
include: os.path.join(RULES, "compute_scaling_factor.rules")
# produce bedgrah files
bedgraph_input = "{}/{{SAMPLE}}_{}_sort_dedup{}.bam".format(biasedRegions_dir, ref, biasedRegions)
bedgraph_input = os.path.join(analysis_dir, "{}/{{SAMPLE}}_{}_sort_dedup{}.bam".format(biasedRegions_dir, ref, biasedRegions))
if config["design"]["spike"]:
bedgraph_scaleFactor = compute_scaling_factor_output
else:
bedgraph_scaleFactor = bedgraph_input
bedgraph_genome = config["genome"]["fasta_file"]+".fai"
if paired:
bedgraph_options = " -bedpe "
......@@ -683,7 +707,7 @@ if config["seacr"]["do"]:
bedgraph_options = ""
bedgraph_logs = os.path.join(analysis_dir, "06-PeakCalling/seacr/logs/{SAMPLE}_bedgraph.out")
bedgraph_output = os.path.join(analysis_dir, "06-PeakCalling/seacr/{SAMPLE}_bedgraph.bg")
final_output.extend(expand(bedgraph_output, SAMPLE=IP_ALL))
final_output.extend(expand(bedgraph_output, SAMPLE=samples))
include: os.path.join(RULES, "bedgraph.rules")
def IGGtoIP(wildcards):
......@@ -724,7 +748,7 @@ final_output.extend(expand(stats_peakCalling_csv, zip, CALLER=peak_caller, MOD=m
#----------------------------------
# IDR computing
#----------------------------------
if config["macs2"]["do"]:
if config["macs2"]["do"] and config["compute_idr"]["do"]:
if model in ["narrow"]:
compute_idr_mode = "narrowPeak"
......@@ -745,7 +769,7 @@ if config["macs2"]["do"]:
# Select reproducible peaks
#----------------------------------
CALL_MOD = []
if config["macs2"]["do"] and model in ["narrow"] and not config["compute_idr"]["intersectionApproach"]:
if config["macs2"]["do"] and model in ["narrow"] and not config["intersectionApproach"]["do"]:
CALL_MOD += ["macs2_" + model]
# Select IDR peaks
......@@ -778,7 +802,7 @@ if config["macs2"]["do"] and model in ["narrow"] and not config["compute_idr"]["
if config["seacr"]["do"] :
CALL_MOD += ["seacr_" + config["seacr"]["threshold"]]
if (config["macs2"]["do"] and config["compute_idr"]["intersectionApproach"]) or (config["macs2"]["do"] and config["macs2"]["mode_choice"] in ["broad"]):
if (config["macs2"]["do"] and config["intersectionApproach"]["do"]) or (config["macs2"]["do"] and config["macs2"]["mode_choice"] in ["broad"]):
CALL_MOD += ["macs2_" + model]
......@@ -788,14 +812,14 @@ def IA_input(wildcards):
elif wildcards.CALLER == "seacr_"+ config["seacr"]["threshold"]:
return expand(os.path.join(analysis_dir, "06-PeakCalling/{CALLER}/{{IP_IDR}}_{CASE}.{MOD}.bed"), CALLER="seacr", CASE=rep, MOD=config["seacr"]["threshold"])
if (config["macs2"]["do"] and config["macs2"]["mode_choice"] in ["broad"]) or config["compute_idr"]["intersectionApproach"] or config["seacr"]["do"] :
if (config["macs2"]["do"] and config["macs2"]["mode_choice"] in ["broad"]) or config["intersectionApproach"]["do"] or config["seacr"]["do"] :
intersectionApproach_input_rep = IA_input
intersectionApproach_logs = os.path.join(analysis_dir, "08-ReproduciblePeaks/{CALLER}/logs/{IP_IDR}_IA_peaks.o")
intersectionApproach_output = os.path.join(analysis_dir, "08-ReproduciblePeaks/{CALLER}/{IP_IDR}_IA.Peak")
if config["macs2"]["do"] and config["macs2"]["mode_choice"] in ["broad"] :
intersectionApproach_overlap = 0.8
else:
intersectionApproach_overlap = config["compute_idr"]["ia_overlap"]
intersectionApproach_overlap = config["intersectionApproach"]["ia_overlap"]
include: os.path.join(RULES, "intersectionApproach.rules")
final_output.extend(expand(intersectionApproach_output, CALLER=CALL_MOD, IP_IDR=IP_REP))
......@@ -804,7 +828,7 @@ if (config["macs2"]["do"] and config["macs2"]["mode_choice"] in ["broad"]) or co
# Compute IDR metrics
#----------------------------------
if config["macs2"]["do"] :
if config["macs2"]["do"] and config["compute_idr"]["do"]:
idr_peaks = []
idr_peaks.extend(expand(os.path.join(analysis_dir, "07-IDR/macs2/{}/{{IP_IDR}}_{{CASE}}1vs{{CASE}}2_{}_{}_idr{}.{}Peak".format(model_dir,
ref,model, config["compute_idr"]["thresh"], model)), zip, IP_IDR=REP_IDR, CASE=CASE))
......@@ -826,7 +850,7 @@ if len(conds) > 1 and config["differential_analysis"]["do"]:
def getPeakFilesByMark(wildcards):
if wildcards.CALLER in ["seacr_"+config["seacr"]["threshold"], "macs2_broad" ] or (wildcards.CALLER in ["macs2_narrow"] and config["compute_idr"]["intersectionApproach"]):
if wildcards.CALLER in ["seacr_"+config["seacr"]["threshold"], "macs2_broad" ] or (wildcards.CALLER in ["macs2_narrow"] and config["intersectionApproach"]["do"]):
return expand(os.path.join(analysis_dir, "08-ReproduciblePeaks/{CALLER}/{MARK}_{COND}_IA.Peak"),
CALLER=wildcards.CALLER, MARK=wildcards.MARK, COND=conds)
else :
......@@ -939,7 +963,7 @@ if len(conds) > 1 and config["differential_analysis"]["do"]:
if config["igv_session"]["do"]:
igv_session_input = expand(bamCoverage_output, SAMPLE=IP_ALL, REF=ref, allow_missing=True)
if len(IP_REP) > 1 and config["macs2"]["do"]:
if config["compute_idr"]["intersectionApproach"]:
if config["intersectionApproach"]["do"]:
igv_session_input += expand(intersectionApproach_output, CALLER="macs2_" + model, IP_IDR=IP_REP)
else:
igv_session_input += expand(select_peaks_output, CALLER="macs2", IP_IDR=IP_REP)
......
......@@ -24,7 +24,8 @@
rule bedgraph:
input:
bedgraph_input
bam = bedgraph_input,
scaleFactor = bedgraph_scaleFactor
output:
temp(bedgraph_output)
params:
......@@ -37,8 +38,21 @@ rule bedgraph:
shell:
"""
temp_file=$(mktemp)
#bedtools bamtobed {params.options} -i {input} | awk '$1==$4 && $6-$2 < 1000 {{print $0}}' - | cut -f 1,2,6 | sort -k1,1 -k2,2n -k3,3n > $temp_file
bedtools bamtobed {params.options} -i {input} | sort -k1,1 -k2,2n -k3,3n > $temp_file
bedtools genomecov -bg -i $temp_file -g {params.genome} > {output} 2> {log.out}
#bedtools bamtobed {params.options} -i {input.bam} | awk '$1==$4 && $6-$2 < 1000 {{print $0}}' - | cut -f 1,2,6 | sort -k1,1 -k2,2n -k3,3n > $temp_file
bedtools bamtobed {params.options} -i {input.bam} | sort -k1,1 -k2,2n -k3,3n > $temp_file
if [[ -s {input.scaleFactor} && {input.scaleFactor} == *"_scaleFactor.txt"* ]]
then
S=$(cat {input.scaleFactor})
bedtools genomecov -bg -i $temp_file -g {params.genome} -scale $S > {output} 2> {log.out}
else
bedtools genomecov -bg -i $temp_file -g {params.genome} > {output} 2> {log.out}
fi
"""
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment