diff --git a/Snakefile b/Snakefile index 4823242687f48bdc59dd0cf59eac67c8dd485494..c4a38f99c0a7ea49e02682d34f4728549919fe4e 100755 --- a/Snakefile +++ b/Snakefile @@ -502,7 +502,7 @@ else: # Coverage step #---------------------------------- if config["bamCoverage"]["do"]: - bamCoverage_input = os.path.join(analysis_dir, "{}/{{SAMPLE}}_{{REF}}_sort_dedup{}.bam".format(biasedRegions_dir, biasedRegions)) + bamCoverage_input = "{}/{{SAMPLE}}_{{REF}}_sort_dedup{}.bam".format(biasedRegions_dir, biasedRegions) bamCoverage_logs = os.path.join(analysis_dir, "12-IGV/logs/{SAMPLE}_{REF}.out") bamCoverage_output = os.path.join(analysis_dir, "12-IGV/{SAMPLE}_{REF}_coverage.bw") bamCoverage_options = config['bamCoverage']['options'] @@ -513,7 +513,7 @@ if config["bamCoverage"]["do"]: #---------------------------------- # Estimate Library Complexity #---------------------------------- -lib_complexity_input = os.path.join(analysis_dir, "{}/{{SAMPLE}}_{{REF}}_sort_dedup{}.bam".format(biasedRegions_dir, biasedRegions)) +lib_complexity_input = "{}/{{SAMPLE}}_{{REF}}_sort_dedup{}.bam".format(biasedRegions_dir, biasedRegions) lib_complexity_metrics = os.path.join(analysis_dir, "05-QC/Complexity/{SAMPLE}_{REF}_metrics.txt") lib_complexity_log_std = os.path.join(analysis_dir, "05-QC/Complexity/logs/{SAMPLE}_{REF}_complexity.out") lib_complexity_log_err = os.path.join(analysis_dir, "05-QC/Complexity/logs/{SAMPLE}_{REF}_complexity.err") @@ -524,7 +524,7 @@ include: os.path.join(RULES, "EstimateLibraryComplexity.rules") # plot Fingerprint #---------------------------------- def IPandINPUT(wildcards): - return [str(os.path.join(analysis_dir, biasedRegions_dir +"/") + IP_INPUT[IP_INPUT.IP == wildcards.IP].iloc[0]['INPUT'] + "_{}_sort_dedup{}.bam".format(ref, biasedRegions)), os.path.join(analysis_dir,"{}/{{IP}}_{}_sort_dedup{}.bam".format(biasedRegions_dir, ref, biasedRegions))] + return [str(biasedRegions_dir + "/" + IP_INPUT[IP_INPUT.IP == wildcards.IP].iloc[0]['INPUT'] + "_{}_sort_dedup{}.bam".format(ref, biasedRegions)), "{}/{{IP}}_{}_sort_dedup{}.bam".format(biasedRegions_dir, ref, biasedRegions)] plotFingerprint_input = IPandINPUT @@ -559,11 +559,11 @@ if config['geneBody']['do']: # fragment size distribution - Picard CollectInsertSizeMetrics #------------------------------------------------ if paired: - insert_size_metrics_input = os.path.join(analysis_dir, "{}/{{SAMPLE}}_{{REF}}_sort_dedup{}.bam".format(biasedRegions_dir, biasedRegions)) - insert_size_metrics_output_txt = "05-QC/FragmentSizeDistribution/{SAMPLE}_{REF}_sort_dedup_fragmentSizeDistribution.txt" - insert_size_metrics_histo = "05-QC/FragmentSizeDistribution/{SAMPLE}_{REF}_sort_dedup_fragmentSizeDistribution.pdf" - insert_size_metrics_log_out = "05-QC/FragmentSizeDistribution/logs/{SAMPLE}_{REF}_fragment_size.out" - insert_size_metrics_log_err = "05-QC/FragmentSizeDistribution/logs/{SAMPLE}_{REF}_fragment_size.err" + insert_size_metrics_input = "{}/{{SAMPLE}}_{{REF}}_sort_dedup{}.bam".format(biasedRegions_dir, biasedRegions) + insert_size_metrics_output_txt = os.path.join(analysis_dir, "05-QC/FragmentSizeDistribution/{SAMPLE}_{REF}_sort_dedup_fragmentSizeDistribution.txt") + insert_size_metrics_histo = os.path.join(analysis_dir, "05-QC/FragmentSizeDistribution/{SAMPLE}_{REF}_sort_dedup_fragmentSizeDistribution.pdf") + insert_size_metrics_log_out = os.path.join(analysis_dir, "05-QC/FragmentSizeDistribution/logs/{SAMPLE}_{REF}_fragment_size.out") + insert_size_metrics_log_err = os.path.join(analysis_dir, "05-QC/FragmentSizeDistribution/logs/{SAMPLE}_{REF}_fragment_size.err") final_output.extend(expand(insert_size_metrics_output_txt, SAMPLE=samples, REF=ref)) include: os.path.join(RULES, "fragment_size_distribution.rules") @@ -638,7 +638,7 @@ else : #---------------------------------- if config["macs2"]["do"]: def INPUTtoIP(wildcards): - return str(os.path.join(analysis_dir,biasedRegions_dir) + "/" + IP_INPUT[IP_INPUT.IP == wildcards.SAMPLE].iloc[0]['INPUT'] + "_{}_sort_dedup{}.bam".format(ref, biasedRegions)) + return str(biasedRegions_dir + "/" + IP_INPUT[IP_INPUT.IP == wildcards.SAMPLE].iloc[0]['INPUT'] + "_{}_sort_dedup{}.bam".format(ref, biasedRegions)) model = config["macs2"]["mode_choice"] model_dir = model @@ -657,7 +657,6 @@ if config["macs2"]["do"]: config["macs2"]['cutoff']) + config["macs2"]['options'] if config["macs2"]["no_model"]: macs2_options += " --nomodel " - #macs2_input_done += [os.path.join(analysis_dir, "05-PhantomPeakQualTools/{{IP}}_{}_sort_dedup{}_spp.out".format(ref, biasedRegions))] macs2_shift_file = os.path.join(analysis_dir, "05-QC/PhantomPeakQualTools/{{SAMPLE}}_{}_sort_dedup{}_spp.out".format(ref, biasedRegions)) else: macs2_shift_file = "Empty" @@ -666,8 +665,7 @@ if config["macs2"]["do"]: else: macs2_pe_mode = "no" - macs2_input_bam = os.path.join(analysis_dir, "{}/{{SAMPLE}}_{}_sort_dedup{}.bam".format(biasedRegions_dir, ref, biasedRegions)) - #macs2_control = "{}/{{INPUT}}_{}_sort_dedup{}.bam".format(biasedRegions_dir, ref, biasedRegions) + macs2_input_bam = "{}/{{SAMPLE}}_{}_sort_dedup{}.bam".format(biasedRegions_dir, ref, biasedRegions) macs2_control = INPUTtoIP macs2_log = os.path.join(analysis_dir, "06-PeakCalling/macs2/{}/logs/{{SAMPLE}}.out".format(model_dir)) macs2_output = os.path.join(analysis_dir, "06-PeakCalling/macs2/{}/{{SAMPLE}}_peaks.{}Peak".format(model_dir, model)) @@ -696,7 +694,7 @@ if config["seacr"]["do"]: include: os.path.join(RULES, "compute_scaling_factor.rules") # produce bedgrah files - bedgraph_input = os.path.join(analysis_dir, "{}/{{SAMPLE}}_{}_sort_dedup{}.bam".format(biasedRegions_dir, ref, biasedRegions)) + bedgraph_input = "{}/{{SAMPLE}}_{}_sort_dedup{}.bam".format(biasedRegions_dir, ref, biasedRegions) if config["design"]["spike"]: bedgraph_scaleFactor = compute_scaling_factor_output else: @@ -793,7 +791,7 @@ if config["macs2"]["do"] and model in ["narrow"] and not config["intersectionApp select_peaks_input_ppr = IDR_input_ppr select_peaks_input_pool = IDR_input_pool select_peaks_logs = os.path.join(analysis_dir, "08-ReproduciblePeaks/{{CALLER}}_{}/logs/{{IP_IDR}}_selected_peaks.o".format(model_dir)) - select_peaks_output = os.path.join(analysis_dir, "08-ReproduciblePeaks/{{CALLER}}_{}/{{IP_IDR}}_select.Peak".format(model_dir)) + select_peaks_output = os.path.join(analysis_dir, "08-ReproduciblePeaks/{{CALLER}}_{}/{{IP_IDR}}_select.{}Peak".format(model_dir, model)) include: os.path.join(RULES, "select_peaks.rules") final_output.extend(expand(select_peaks_output, CALLER="macs2", IP_IDR=IP_REP)) @@ -816,7 +814,7 @@ def IA_input(wildcards): if (config["macs2"]["do"] and config["macs2"]["mode_choice"] in ["broad"]) or config["intersectionApproach"]["do"] or config["seacr"]["do"] : intersectionApproach_input_rep = IA_input intersectionApproach_logs = os.path.join(analysis_dir, "08-ReproduciblePeaks/{CALLER}/logs/{IP_IDR}_IA_peaks.o") - intersectionApproach_output = os.path.join(analysis_dir, "08-ReproduciblePeaks/{CALLER}/{IP_IDR}_IA.Peak") + intersectionApproach_output = os.path.join(analysis_dir, "08-ReproduciblePeaks/{CALLER}/{IP_IDR}_IA.bed") if config["macs2"]["do"] and config["macs2"]["mode_choice"] in ["broad"] : intersectionApproach_overlap = 0.8 else: @@ -852,10 +850,10 @@ if len(conds) > 1 and config["differential_analysis"]["do"]: def getPeakFilesByMark(wildcards): if wildcards.CALLER in ["seacr_"+config["seacr"]["threshold"], "macs2_broad" ] or (wildcards.CALLER in ["macs2_narrow"] and config["intersectionApproach"]["do"]): - return expand(os.path.join(analysis_dir, "08-ReproduciblePeaks/{CALLER}/{MARK}_{COND}_IA.Peak"), + return expand(os.path.join(analysis_dir, "08-ReproduciblePeaks/{CALLER}/{MARK}_{COND}_IA.bed"), CALLER=wildcards.CALLER, MARK=wildcards.MARK, COND=conds) else : - return expand(os.path.join(analysis_dir, "08-ReproduciblePeaks/{CALLER}/{MARK}_{COND}_select.Peak"), + return expand(os.path.join(analysis_dir, "08-ReproduciblePeaks/{{CALLER}}/{{MARK}}_{{COND}}_select.{}Peak".format(model)), CALLER=wildcards.CALLER, MARK=wildcards.MARK, COND=conds) @@ -907,7 +905,7 @@ if len(conds) > 1 and config["differential_analysis"]["do"]: feature_counts_input = getBAMFilesByMark feature_counts_optional_input = [] if config["differential_analysis"]["input_counting"]: - feature_counts_optional_input += expand(os.path.join(analysis_dir, "{}/{{INPUT}}_{}_sort_dedup{}.bam".format(biasedRegions_dir, ref, biasedRegions)), INPUT=CORR_INPUT) + feature_counts_optional_input += expand("{}/{{INPUT}}_{}_sort_dedup{}.bam".format(biasedRegions_dir, ref, biasedRegions), INPUT=CORR_INPUT) feature_counts_output_count = os.path.join(analysis_dir, "09-CountMatrix/{CALLER}/{MARK}_Matrix_Optimal_Peak.mtx") if len(conds) == 2 : feature_counts_gff = os.path.join(analysis_dir, "09-CountMatrix/{{CALLER}}/{{MARK}}_{}u{}_{}.optimal.Peak_overlap.gff".format(conds[0], conds[1], ref))