diff --git a/Snakefile_noCTL.smk b/Snakefile_noCTL.smk index 7a99150dabe9af5ae562a2435e7688fb0cf4cf07..160f4fbe5e59a8c5065631ebb04a64494f6653ad 100755 --- a/Snakefile_noCTL.smk +++ b/Snakefile_noCTL.smk @@ -101,6 +101,7 @@ for mark in design['IP_NAME']: MARK_OK = [] +CORR_INPUT_OK = [] # corresponding INPUT condition nb = 0 for row in design.itertuples(index=True, name='Pandas'): for mark in marks: @@ -109,12 +110,21 @@ for row in design.itertuples(index=True, name='Pandas'): if (getattr(row, "IP_NAME")).endswith(tuple(conds)) and getattr(row, "NB_IP") > 1: if nb >= 1 and mark not in MARK_OK: MARK_OK.append(mark) + CORR_INPUT_OK.append(getattr(row, "INPUT_NAME").split("_")[0]) break - else: + else: nb += 1 nb = 0 +if config["differential_analysis"]["input_counting"]: + CORR_INPUT = [] # corresponding INPUT files for input counting + for mark in CORR_INPUT_OK: + for file in samples: + if fnmatch(file, mark+"_*") and file not in CORR_INPUT: + name = sub(mate_pair, '', file) + CORR_INPUT.append(name) + # ----------------------------------------------- # built list of MARK_COND_REP from config.yaml and check correspondance between design, config and fastq files @@ -298,14 +308,13 @@ final_output = [] #---------------------------------- # quality control #---------------------------------- -""" + fastqc_input_fastq = input_data fastqc_output_done = os.path.join(analysis_dir, "00-Fastqc/{{SAMPLE}}{}_fastqc.done".format(rt1)) fastqc_wkdir = os.path.join(analysis_dir, "00-Fastqc") fastqc_log = os.path.join(analysis_dir, "00-Fastqc/logs/{{SAMPLE}}{}_fastqc_raw.log".format(rt1)) final_output.extend(expand(fastqc_output_done, SAMPLE=samples)) include: os.path.join(RULES, "fastqc.rules") -""" #---------------------------------- @@ -579,17 +588,19 @@ else : #---------------------------------- # Peak Calling with MACS2 #---------------------------------- +mode={} #dictionnaire contenant les modes de toutes les methodes utilisees if config["macs2"]["do"]: - model = config["macs2"]["mode_choice"] - model_dir = model - mod = [model] + mode["macs2"] = config["macs2"]["mode_choice"] + model_dir = mode["macs2"] + mod = [mode["macs2"]] peak_caller = ["macs2"] + if config["macs2"]["no_model"]: model_dir += "-nomodel" - if model in ["narrow", "broad"]: + if mode["macs2"] in ["narrow", "broad"]: # Peak Calling on replicates - if model in ["narrow"]: + if mode["macs2"] in ["narrow"]: # add corresponding options macs2_options = "-p {} ".format(config["macs2"]['cutoff']) + config["macs2"]['options'] else: @@ -606,12 +617,13 @@ if config["macs2"]["do"]: macs2_pe_mode = "no" macs2_input_bam = "{}/{{SAMPLE}}_{}_sort{}.bam".format(biasedRegions_dir, ref, biasedRegions) + macs2_log_out = os.path.join(analysis_dir, "06-PeakCalling/macs2_{}/logs/{{SAMPLE}}.out".format(model_dir)) macs2_log_err = os.path.join(analysis_dir, "06-PeakCalling/macs2_{}/logs/{{SAMPLE}}.err".format(model_dir)) - macs2_output = os.path.join(analysis_dir, "06-PeakCalling/macs2_{}/{{SAMPLE}}_peaks.{}Peak".format(model_dir, model)) + macs2_output = os.path.join(analysis_dir, "06-PeakCalling/macs2_{}/{{SAMPLE}}_peaks.{}Peak".format(model_dir, mode["macs2"])) macs2_output_prefix = os.path.join(analysis_dir, "06-PeakCalling/macs2_{}/{{SAMPLE}}".format(model_dir)) - final_output.extend(expand(macs2_output, zip, SAMPLE=ALL_IP_PC)) + final_output.extend(expand(macs2_output, zip, SAMPLE=IP_ALL)) include: os.path.join(RULES, "macs2_noCTL.rules") else : raise ValueError("Please provides valid model for MACS2") @@ -629,6 +641,7 @@ if config["seacr"]["do"]: peak_caller += ["seacr"] mod += [config["seacr"]["threshold"]] + mode["seacr"] = config["seacr"]["threshold"] # produce bedgrah files bedgraph_input = "{}/{{SAMPLE}}_{}_sort{}.bam".format(biasedRegions_dir, ref, biasedRegions) if config["design"]["spike"]: @@ -640,18 +653,18 @@ if config["seacr"]["do"]: bedgraph_options = " -bedpe " else: bedgraph_options = "" - bedgraph_logs = os.path.join(analysis_dir, ("06-PeakCalling/seacr_"+config["seacr"]["threshold"]),"logs/{SAMPLE}_bedgraph.out") - bedgraph_output = os.path.join(analysis_dir, ("06-PeakCalling/seacr_"+config["seacr"]["threshold"]),"{SAMPLE}_bedgraph.bg") + bedgraph_logs = os.path.join(analysis_dir, ("06-PeakCalling/seacr_"+mode["seacr"]),"logs/{SAMPLE}_bedgraph.out") + bedgraph_output = os.path.join(analysis_dir, ("06-PeakCalling/seacr_"+mode["seacr"]),"{SAMPLE}_bedgraph.bg") #final_output.extend(expand(bedgraph_output, SAMPLE=samples)) include: os.path.join(RULES, "bedgraph.rules") seacr_bed_ip = bedgraph_output seacr_bed_threshold = config["seacr"]["threshold"] seacr_bed_norm = config["seacr"]["norm"] - seacr_output_prefix = os.path.join(analysis_dir, ("06-PeakCalling/seacr_"+config["seacr"]["threshold"]),"{SAMPLE}") - seacr_output = os.path.join(analysis_dir, ("06-PeakCalling/seacr_"+config["seacr"]["threshold"]),"{{SAMPLE}}.{}.bed".format(config["seacr"]["threshold"])) - seacr_logs_std = os.path.join(analysis_dir, ("06-PeakCalling/seacr_"+config["seacr"]["threshold"]),"logs/{SAMPLE}_seacr_calling.out") - seacr_logs_err = os.path.join(analysis_dir, ("06-PeakCalling/seacr_"+config["seacr"]["threshold"]),"logs/{SAMPLE}_seacr_calling.err") + seacr_output_prefix = os.path.join(analysis_dir, ("06-PeakCalling/seacr_"+mode["seacr"]),"{SAMPLE}") + seacr_output = os.path.join(analysis_dir, ("06-PeakCalling/seacr_"+mode["seacr"]),"{{SAMPLE}}.{}.bed".format(mode["seacr"])) + seacr_logs_std = os.path.join(analysis_dir, ("06-PeakCalling/seacr_"+mode["seacr"]),"logs/{SAMPLE}_seacr_calling.out") + seacr_logs_err = os.path.join(analysis_dir, ("06-PeakCalling/seacr_"+mode["seacr"]),"logs/{SAMPLE}_seacr_calling.err") final_output.extend(expand(seacr_output, SAMPLE=IP_ALL)) include: os.path.join(RULES, "seacr_noCTL.rules") @@ -660,29 +673,30 @@ if config["seacr"]["do"]: #---------------------------------- if config["sicer"]["do"]: + window_size = config["sicer"]["window_size"] gap_size = config["sicer"]["gap_size"] - sicer_mode = "W{}-G{}".format(window_size, gap_size) + mode["sicer"] = "W{}-G{}".format(window_size, gap_size) if not config["macs2"]["do"] and not config["seacr"]["do"]: peak_caller = ["sicer"] - mod = [sicer_mode] + mod = [mode["sicer"]] else: peak_caller += ["sicer"] - mod += [sicer_mode] + mod += [mode["sicer"]] sicer_input_bam = "{}/{{SAMPLE}}_{}_sort{}.bam".format(biasedRegions_dir, ref, biasedRegions) sicer_options = config["sicer"]["options"] sicer_genome = config["sicer"]["genome"] - sicer_logs_out = os.path.join(analysis_dir, "06-PeakCalling/sicer_{}/logs/{{SAMPLE}}_sicer_calling.out".format(sicer_mode)) - sicer_logs_err = os.path.join(analysis_dir, "06-PeakCalling/sicer_{}/logs/{{SAMPLE}}_sicer_calling.err".format(sicer_mode)) - sicer_output = os.path.join(analysis_dir, "06-PeakCalling/sicer_{}/{{SAMPLE}}_{}_sort{}-{}.scoreisland".format(sicer_mode, ref, biasedRegions, sicer_mode)) - sicer_output_dir = os.path.join(analysis_dir, "06-PeakCalling/sicer_{}/".format(sicer_mode)) + sicer_logs_out = os.path.join(analysis_dir, "06-PeakCalling/sicer_{}/logs/{{SAMPLE}}_sicer_calling.out".format(mode["sicer"])) + sicer_logs_err = os.path.join(analysis_dir, "06-PeakCalling/sicer_{}/logs/{{SAMPLE}}_sicer_calling.err".format(mode["sicer"])) + sicer_output = os.path.join(analysis_dir, "06-PeakCalling/sicer_{}/{{SAMPLE}}_{}_sort{}-{}.scoreisland".format(mode["sicer"], ref, biasedRegions, mode["sicer"])) + sicer_output_dir = os.path.join(analysis_dir, "06-PeakCalling/sicer_{}/".format(mode["sicer"])) include: os.path.join(RULES, "sicer_noCTL.rules") - final_output.extend(expand(sicer_output, SAMPLE = ALL_IP_PC)) + final_output.extend(expand(sicer_output, SAMPLE = IP_ALL)) #---------------------------------- # Peak Calling metrics @@ -710,18 +724,15 @@ if config["macs2"]["do"] or config["seacr"]["do"] or config["sicer"]["do"] : #---------------------------------- # IDR computing #---------------------------------- -if config["macs2"]["do"] and config["compute_idr"]["do"]: +if config["macs2"]["do"] and mode["macs2"] in ["narrow"] and config["compute_idr"]["do"]: - if model in ["narrow"]: - compute_idr_mode = "narrowPeak" - else : - compute_idr_mode = "broadPeak" - compute_idr_input1 = os.path.join(analysis_dir, "06-PeakCalling/macs2_{}/{{IP_IDR}}_{{CASE}}1_peaks.{}Peak".format(model_dir, model)) - compute_idr_input2 = os.path.join(analysis_dir, "06-PeakCalling/macs2_{}/{{IP_IDR}}_{{CASE}}2_peaks.{}Peak".format(model_dir, model)) - compute_idr_output = os.path.join(analysis_dir, "07-IDR/macs2_{}/{{IP_IDR}}_{{CASE}}1vs{{CASE}}2_{}_{}_idr.txt".format(model_dir, ref, model)) + compute_idr_mode = "narrowPeak" + compute_idr_input1 = os.path.join(analysis_dir, "06-PeakCalling/macs2_{}/{{IP_IDR}}_{{CASE}}1_peaks.{}Peak".format(model_dir, mode["macs2"])) + compute_idr_input2 = os.path.join(analysis_dir, "06-PeakCalling/macs2_{}/{{IP_IDR}}_{{CASE}}2_peaks.{}Peak".format(model_dir, mode["macs2"])) + compute_idr_output = os.path.join(analysis_dir, "07-IDR/macs2_{}/{{IP_IDR}}_{{CASE}}1vs{{CASE}}2_{}_{}_idr.txt".format(model_dir, ref, mode["macs2"])) compute_idr_output_peak = os.path.join(analysis_dir, "07-IDR/macs2_{}/{{IP_IDR}}_{{CASE}}1vs{{CASE}}2_{}_{}_idr{}.{}Peak".format(model_dir, - ref, model, config["compute_idr"]["thresh"], model)) - compute_idr_log = os.path.join(analysis_dir, "07-IDR/macs2_{}/logs/{{IP_IDR}}_{{CASE}}1vs{{CASE}}2_{}_idr.out".format(model_dir,model)) + ref, mode["macs2"], config["compute_idr"]["thresh"], mode["macs2"])) + compute_idr_log = os.path.join(analysis_dir, "07-IDR/macs2_{}/logs/{{IP_IDR}}_{{CASE}}1vs{{CASE}}2_{}_idr.out".format(model_dir,mode["macs2"])) include: os.path.join(RULES, "compute_idr.rules") final_output.extend(expand(compute_idr_output, zip, IP_IDR=REP_IDR, CASE=CASE)) @@ -732,29 +743,29 @@ if config["macs2"]["do"] and config["compute_idr"]["do"]: #---------------------------------- CALL_MOD = [] -if config["macs2"]["do"] and model in ["narrow"] and not config["intersectionApproach"]["do"] and config["compute_idr"]["do"]: +if config["macs2"]["do"] and mode["macs2"] in ["narrow"] and not config["intersectionApproach"]["do"] and config["compute_idr"]["do"]: CALL_MOD += ["macs2_" + model_dir] # Select IDR peaks def IDR_input_rep(wildcards): #if wildcards.CALLER == "macs2_narrow": - return str(os.path.join(analysis_dir, "07-IDR/macs2_"+model_dir+"/"+wildcards.IP_IDR+"_"+rep_flag+"1vs"+rep_flag+"2_"+ref+"_"+model+"_idr"+str(config["compute_idr"]["thresh"])+"."+model+"Peak")) + return str(os.path.join(analysis_dir, "07-IDR/macs2_"+model_dir+"/"+wildcards.IP_IDR+"_"+rep_flag+"1vs"+rep_flag+"2_"+ref+"_"+mode["macs2"]+"_idr"+str(config["compute_idr"]["thresh"])+"."+mode["macs2"]+"Peak")) def IDR_input_ppr(wildcards): #if wildcards.CALLER == "macs2_narrow": #return [os.path.join(analysis_dir, "07-IDR/macs2/%s/{IP_IDR}_%s1vs%s2_%s_%s_idr%s.%sPeak" % (model_dir, # "PPR", "PPR", ref, model, config["compute_idr"]["thresh"], model))] - return str(os.path.join(analysis_dir, "07-IDR/macs2_"+model_dir+"/"+wildcards.IP_IDR+"_PPR1vsPPR2_"+ref+"_"+model+"_idr"+str(config["compute_idr"]["thresh"])+"."+model+"Peak")) + return str(os.path.join(analysis_dir, "07-IDR/macs2_"+model_dir+"/"+wildcards.IP_IDR+"_PPR1vsPPR2_"+ref+"_"+mode["macs2"]+"_idr"+str(config["compute_idr"]["thresh"])+"."+mode["macs2"]+"Peak")) def IDR_input_pool(wildcards): #if wildcards.CALLER == "macs2_narrow": #return [os.path.join(analysis_dir, "06-PeakCalling/macs2/%s/{IP_IDR}_PPRPool_peaks.%sPeak" % (model_dir, model))] - return str(os.path.join(analysis_dir, "06-PeakCalling/macs2_"+model_dir+"/"+wildcards.IP_IDR+"_PPRPool_peaks."+model+"Peak")) + return str(os.path.join(analysis_dir, "06-PeakCalling/macs2_"+model_dir+"/"+wildcards.IP_IDR+"_PPRPool_peaks."+mode["macs2"]+"Peak")) select_peaks_input_rep = IDR_input_rep select_peaks_input_ppr = IDR_input_ppr select_peaks_input_pool = IDR_input_pool select_peaks_logs = os.path.join(analysis_dir, "08-ReproduciblePeaks/{{CALLER}}_{}/logs/{{IP_IDR}}_selected_peaks.o".format(model_dir)) - select_peaks_output = os.path.join(analysis_dir, "08-ReproduciblePeaks/{{CALLER}}_{}/{{IP_IDR}}_select.{}Peak".format(model_dir, model)) + select_peaks_output = os.path.join(analysis_dir, "08-ReproduciblePeaks/{{CALLER}}_{}/{{IP_IDR}}_select.{}Peak".format(model_dir, mode["macs2"])) include: os.path.join(RULES, "select_peaks.rules") final_output.extend(expand(select_peaks_output, CALLER="macs2", IP_IDR=IP_REP)) @@ -763,16 +774,20 @@ if config["macs2"]["do"] and model in ["narrow"] and not config["intersectionApp #---------------------------------- CALL_MOD = [] -if config["seacr"]["do"] : - CALL_MOD += ["seacr_" + config["seacr"]["threshold"]] +if config["seacr"]["do"] and config["intersectionApproach"]["do"]: + CALL_MOD += ["seacr_" + mode["seacr"]] if (config["macs2"]["do"] and config["intersectionApproach"]["do"]) or (config["macs2"]["do"] and config["macs2"]["mode_choice"] in ["broad"]): CALL_MOD += ["macs2_" + model_dir] +if config["sicer"]["do"] and config["intersectionApproach"]["do"]: + CALL_MOD += ["sicer_" + mode["sicer"]] def IA_input(wildcards): if wildcards.CALLER == "macs2_"+ model_dir: - return expand(os.path.join(analysis_dir, "06-PeakCalling/macs2_{MOD}/{{IP_IDR}}_{CASE}_peaks.{MOD}Peak"), CASE=rep, MOD=config["macs2"]["mode_choice"]) + return expand(os.path.join(analysis_dir, "06-PeakCalling/macs2_{MOD}/{{IP_IDR}}_{CASE}_peaks.{MOD}Peak"), CASE=rep, MOD=model_dir) elif wildcards.CALLER == "seacr_"+ config["seacr"]["threshold"]: - return expand(os.path.join(analysis_dir, "06-PeakCalling/seacr_{MOD}/{{IP_IDR}}_{CASE}.{MOD}.bed"), CASE=rep, MOD=config["seacr"]["threshold"]) + return expand(os.path.join(analysis_dir, "06-PeakCalling/seacr_{MOD}/{{IP_IDR}}_{CASE}.{MOD}.bed"), CASE=rep, MOD=mode["seacr"]) + elif wildcards.CALLER == "sicer_"+ mode["sicer"]: + return expand(os.path.join(analysis_dir, "06-PeakCalling/sicer_{MOD}/{{IP_IDR}}_{CASE}_{REF}_sort{BIASED}-{MOD}.scoreisland"), MOD = mode["sicer"], CASE = rep, REF= ref, BIASED=biasedRegions) if (config["macs2"]["do"] and config["macs2"]["mode_choice"] in ["broad"]) or config["intersectionApproach"]["do"] or config["seacr"]["do"] : intersectionApproach_input_rep = IA_input @@ -783,39 +798,55 @@ if (config["macs2"]["do"] and config["macs2"]["mode_choice"] in ["broad"]) or co # else: # intersectionApproach_overlap = config["intersectionApproach"]["ia_overlap"] include: os.path.join(RULES, "intersectionApproach.rules") - final_output.extend(expand(intersectionApproach_output, CALLER=CALL_MOD, IP_IDR=IP_REP, MOD=config["macs2"]["mode_choice"])) + final_output.extend(expand(intersectionApproach_output, CALLER=CALL_MOD, IP_IDR=IP_REP)) # MOD=config["macs2"]["mode_choice"] #---------------------------------- # ChipR reproducibility #---------------------------------- -if config["chipr"]["do"] and config["macs2"]["do"]: +CALL_MOD = [] +if config["macs2"]["do"]: + CALL_MOD += ["macs2_" + model_dir] +if config["seacr"]["do"]: + CALL_MOD += ["seacr_" + mode["seacr"]] +if config["sicer"]["do"]: + CALL_MOD += ["sicer_" + mode["sicer"]] +if config["chipr"]["do"]: def chipr_input(wildcards): - return expand(os.path.join(analysis_dir,"06-PeakCalling/macs2_{MOD}/{{MARK}}_{{COND}}_{REP}_peaks.{MOD}Peak"),MOD=model_dir,REP=rep_noidr) - + if config["macs2"]["do"]: + return expand(os.path.join(analysis_dir,"06-PeakCalling/macs2_{MOD}/{{MARK}}_{{COND}}_{REP}_peaks.{MOD}Peak"), MOD=mode["macs2"],REP=rep_noidr) + + if config["seacr"]["do"]: + return expand(os.path.join(analysis_dir,"06-PeakCalling/seacr_{MOD}/{{MARK}}_{{COND}}_{REP}.{MOD}.bed"), MOD=mode["seacr"],REP=rep_noidr) + + if config["sicer"]["do"]: + return expand(os.path.join(analysis_dir,"06-PeakCalling/sicer_{MOD}/{{MARK}}_{{COND}}_{REP}_{REF}_sort{BIASED}-{MOD}.scoreisland"), + MOD=mode["sicer"],REP=rep_noidr,REF=ref, BIASED=biasedRegions) + + chipr_input_peak = chipr_input chipr_output_opti = os.path.join(analysis_dir,"08-ReproduciblePeaks/{CALLER}/Chipr/{MARK}_{COND}_DONE_optimal.bed") chipr_output_all = os.path.join(analysis_dir,"08-ReproduciblePeaks/{CALLER}/Chipr/{MARK}_{COND}_DONE_all.bed") chipr_output_log = os.path.join(analysis_dir,"08-ReproduciblePeaks/{CALLER}/Chipr/{MARK}_{COND}_DONE_log.txt") - chipr_options = config["chipr"]["options"] + chipr_tmp = os.path.join(analysis_dir,"08-ReproduciblePeaks/{CALLER}/Chipr/{MARK}_{COND}_tmp") chipr_log = os.path.join(analysis_dir,"08-ReproduciblePeaks/{CALLER}/Chipr/logs/{MARK}_{COND}_log") chipr_log_err = os.path.join(analysis_dir,"08-ReproduciblePeaks/{CALLER}/Chipr/logs/{MARK}_{COND}_log_err") include: os.path.join(RULES, "chipr.rules") - final_output.extend(expand(chipr_output_opti,MARK=mark,COND=conds,CALLER=CALL_MOD)) + final_output.extend(expand(chipr_output_opti,MARK=marks,COND=conds,CALLER=CALL_MOD)) #---------------------------------- # Compute ChipR metrics #---------------------------------- -if config["chipr"]["do"] and config["macs2"]["do"]: +if config["chipr"]["do"] and (config["macs2"]["do"] or config["seacr"]["do"] or config["sicer"]["do"]): metrics_peaks_input = expand(os.path.join(analysis_dir,"08-ReproduciblePeaks/{CALLER}/Chipr/{MARK}_{COND}_DONE_optimal.bed"),MARK=marks,COND=conds,CALLER=CALL_MOD) metrics_peaks_marks = marks metrics_peaks_conds = conds - metrics_peaks_logs = os.path.join(analysis_dir, "08-ReproduciblePeaks/macs2_{}/Chipr/CHIPR_metrics.out".format(model_dir)) + metrics_peaks_logs = expand(os.path.join(analysis_dir, "08-ReproduciblePeaks/{CALLER}/Chipr/CHIPR_metrics.out"),CALLER = CALL_MOD) metrics_chipr_output = os.path.join(analysis_dir, "CHIPR_metrics.out") include: os.path.join(RULES, "metrics_chipr.rules") final_output.extend([metrics_chipr_output]) @@ -823,8 +854,15 @@ if config["chipr"]["do"] and config["macs2"]["do"]: #---------------------------------- # Compute IA metrics #---------------------------------- +CALL_MOD = [] +if config["intersectionApproach"]["do"] : + if (config["macs2"]["do"] and config["macs2"]["mode_choice"] in ["broad"]): + CALL_MOD += ["macs2_" + model_dir] + if config["seacr"]["do"] and config["seacr"]["threshold"] in ["relaxed"]: + CALL_MOD += ["seacr_" + mode["seacr"]] + if config["sicer"]["do"]: + CALL_MOD += ["sicer_" + mode["sicer"]] -if (config["macs2"]["do"] and config["macs2"]["mode_choice"] in ["broad"]) or config["intersectionApproach"]["do"]: stats_IA_input = expand(intersectionApproach_output, CALLER=CALL_MOD, IP_IDR=IP_REP) stats_IA_csv = os.path.join(analysis_dir, "IA_metrics.out") stats_IA_log = expand(os.path.join(analysis_dir, "08-ReproduciblePeaks/{CALLER}/logs/IA_metrics.out"),CALLER=CALL_MOD) @@ -838,7 +876,7 @@ if (config["macs2"]["do"] and config["macs2"]["mode_choice"] in ["broad"]) or co if config["macs2"]["do"] and config["compute_idr"]["do"]: idr_peaks = [] idr_peaks.extend(expand(os.path.join(analysis_dir, "07-IDR/macs2_{}/{{IP_IDR}}_{{CASE}}1vs{{CASE}}2_{}_{}_idr{}.{}Peak".format(model_dir, - ref,model, config["compute_idr"]["thresh"], model)), zip, IP_IDR=REP_IDR, CASE=CASE)) + ref,mode["macs2"], config["compute_idr"]["thresh"], mode["macs2"])), zip, IP_IDR=REP_IDR, CASE=CASE)) metrics_peaks_input = idr_peaks metrics_peaks_marks = marks metrics_peaks_conds = conds @@ -854,29 +892,33 @@ if config["macs2"]["do"] and config["compute_idr"]["do"]: CALL_MOD = [] if (config["seacr"]["do"] and config["peakAnnotation"]["do"]) : - CALL_MOD += ["seacr_" + config["seacr"]["threshold"]] + CALL_MOD += ["seacr_" + mode["seacr"]] if (config["macs2"]["do"] and config["peakAnnotation"]["do"]) : - CALL_MOD += ["macs2_" +config["macs2"]["mode_choice"]] + CALL_MOD += ["macs2_" + model_dir] +if (config["sicer"]["do"] and config["peakAnnotation"]["do"]): + CALL_MOD += ["sicer_" + mode["sicer"]] #??? Définir peakAnnotation_log et peakAnnotation_output en tant que variable globale et leur affecter une valeur dans la fonction ??? def indpeak_annot(wildcards): if wildcards.CALLER.find("macs2")!=-1 : - return expand(os.path.join(analysis_dir, - "06-PeakCalling/{CALLER}_{MOD}/{{IP_IDR}}_{{CASE}}_peaks.{MOD}Peak"), - CALLER="macs2",MOD=config['macs2']['mode_choice']) #,CASE=rep) + return expand(os.path.join(analysis_dir,"06-PeakCalling/{CALLER}_{MOD}/{{IP_IDR}}_{{CASE}}_peaks.{MODE}Peak"), + CALLER="macs2", MOD=model_dir, MODE=mode["macs2"]) #,CASE=rep elif wildcards.CALLER.find("seacr")!=-1 : return expand(os.path.join(analysis_dir, "06-PeakCalling/{CALLER}_{MOD}/{{IP_IDR}}_{{CASE}}.{MOD}.bed"), - CALLER="seacr", MOD=config["seacr"]["threshold"]) #,CASE=rep) + CALLER="seacr", MOD=mode["seacr"]) #,CASE=rep + elif wildcards.CALLER.find("sicer")!=-1 : + return expand(os.path.join(analysis_dir, "06-PeakCalling/{CALLER}_{MOD}/{{IP_IDR}}_{{CASE}}_{REF}_sort{BIASED}-{MOD}.scoreisland"), + CALLER="sicer", MOD=mode["sicer"], REF = ref, BIASED = biasedRegions) return -if config["peakAnnotation"]["do"] and (config["macs2"]["do"] or config["seacr"]["do"]) : +if config["peakAnnotation"]["do"] and (config["macs2"]["do"] or config["seacr"]["do"] or config["sicer"]["do"]) : peakAnnotation_input = indpeak_annot peakAnnotation_log = expand(os.path.join(analysis_dir, - "06-PeakCalling/{{CALLER}}/annotated_peaks/logs/{{IP_IDR}}_{{CASE}}_peaks.out"), - MOD=mod) #MOD=model_dir) + "06-PeakCalling/{{CALLER}}/annotated_peaks/logs/{{IP_IDR}}_{{CASE}}_peaks.out")) + #MOD=mod #MOD=model_dir peakAnnotation_output = expand(os.path.join(analysis_dir, - "06-PeakCalling/{{CALLER}}/annotated_peaks/{{IP_IDR}}_{{CASE}}_peaks.txt"), - MOD=mod) #MOD=model_dir) + "06-PeakCalling/{{CALLER}}/annotated_peaks/{{IP_IDR}}_{{CASE}}_peaks.txt")) + #MOD=mod #MOD=model_dir #Debug #print("-----") @@ -895,29 +937,34 @@ CALL_MOD = [] if config["seacr"]["do"] : - CALL_MOD += ["seacr_" + config["seacr"]["threshold"]] + CALL_MOD += ["seacr_" + mode["seacr"]] #if (config["macs2"]["do"] and config["peakAnnotation"]["do"]) or (config["macs2"]["do"] and config["macs2"]["mode_choice"] in ["broad"]): #Repetition de la regele edictee dans la partie "Select reproducible peaks" #if config["macs2"]["do"] and model in ["narrow"] and not config["intersectionApproach"]["do"] and config["compute_idr"]["do"]: + if config["macs2"]["do"] and config["peakAnnotation"]["do"] : CALL_MOD += ["macs2_" + model_dir] +if config["sicer"]["do"] and config["peakAnnotation"]["do"]: + CALL_MOD += ["sicer_"+ mode["sicer"]] CONSENSUS_MOD = [] -if (config["macs2"]["do"] and model in ["narrow"] and not config["intersectionApproach"]["do"] and config["compute_idr"]["do"]) : #"_select" files +if (config["macs2"]["do"] and mode["macs2"] in ["narrow"] and not config["intersectionApproach"]["do"] and config["compute_idr"]["do"]) : #"_select" files CONSENSUS_MOD += ["select."+config["macs2"]["mode_choice"]+"Peak"] else : CONSENSUS_MOD += ["IA.bed"] if config["peakAnnotation"]["do"] and ( - (config["macs2"]["do"] and model in ["narrow"] and not config["intersectionApproach"]["do"] and config["compute_idr"]["do"]) #"_select" files - or - config["seacr"]["do"] - or - (config["macs2"]["do"] and config["intersectionApproach"]["do"]) - or - config["macs2"]["do"] and model in ["broad"] - ) : + (config["macs2"]["do"] and mode["macs2"] in ["narrow"] and not config["intersectionApproach"]["do"] and config["compute_idr"]["do"]) #"_select" files + or + config["seacr"]["do"] + or + (config["macs2"]["do"] and config["intersectionApproach"]["do"]) + or + (config["macs2"]["do"] and mode["macs2"] in ["broad"]) + or + config["sicer"]["do"] + ): peakAnnotation_input = expand(os.path.join(analysis_dir, "08-ReproduciblePeaks/{{CALLER}}/{{IP_IDR}}_{{CONSENSUS_MOD}}")) peakAnnotation_log = expand(os.path.join(analysis_dir, @@ -946,28 +993,33 @@ CALL_MOD = [] if len(conds) > 1 and config["differential_analysis"]["do"]: - if config["macs2"]["do"] and model in ["narrow"] and not config["intersectionApproach"]["do"] and config["compute_idr"]["do"] or config["chipr"]["do"]: - CALL_MOD += ["macs2_" + model_dir] + #if config["macs2"]["do"] and model in ["narrow"] and not config["intersectionApproach"]["do"] and config["compute_idr"]["do"] or config["chipr"]["do"]: + #CALL_MOD += ["macs2_" + model_dir] + if config["seacr"]["do"] : - CALL_MOD += ["seacr_" + config["seacr"]["threshold"]] - if (config["macs2"]["do"] and config["intersectionApproach"]["do"]) or (config["macs2"]["do"] and config["macs2"]["mode_choice"] in ["broad"]): + CALL_MOD += ["seacr_" + mode["seacr"]] + if config["macs2"]["do"]: #and config["intersectionApproach"]["do"] or (config["macs2"]["do"] and config["macs2"]["mode_choice"] in ["broad"]): CALL_MOD += ["macs2_" + model_dir] + if config["sicer"]["do"]: + CALL_MOD += ["sicer_" + mode["sicer"]] ## fill CALL_MOD according to results.... def getPeakFilesByMark(wildcards): - if wildcards.CALLER in ["seacr_"+config["seacr"]["threshold"], "macs2_broad", "macs2_broad-nomodel" ] or (wildcards.CALLER in ["macs2_narrow","macs2_narrow-nomodel"] and config["intersectionApproach"]["do"]): + #if wildcards.CALLER in ["seacr_"+config["seacr"]["threshold"], "macs2_broad", "macs2_broad-nomodel" ] or (wildcards.CALLER in ["macs2_narrow","macs2_narrow-nomodel"] and config["intersectionApproach"]["do"]): + if config["intersectionApproach"]["do"]: return expand(os.path.join(analysis_dir, "08-ReproduciblePeaks/{{CALLER}}/{{MARK}}_{COND}_IA.bed"), CALLER=wildcards.CALLER, MARK=wildcards.MARK, COND=conds) - elif config["chipr"]["do"] and config["macs2"]["do"] and not config["compute_idr"]["do"]: + elif config["chipr"]["do"] and not config["compute_idr"]["do"]: return expand(os.path.join(analysis_dir,"08-ReproduciblePeaks/{{CALLER}}/Chipr/{{MARK}}_{COND}_DONE_optimal.bed"), CALLER=wildcards.CALLER, MARK=wildcards.MARK, COND=conds) - else : - return expand(os.path.join(analysis_dir, "08-ReproduciblePeaks/{{CALLER}}/{{MARK}}_{{COND}}_select.{}Peak".format(model)), + elif config["macs2"]["do"] : + return expand(os.path.join(analysis_dir, "08-ReproduciblePeaks/{{CALLER}}/{{MARK}}_{{COND}}_select.{}Peak".format(mode["macs2"])), CALLER=wildcards.CALLER, MARK=wildcards.MARK, COND=conds) + #---------------------------------- # get union peaks #---------------------------------- - + #print("conds :",conds) union_peaks_input = getPeakFilesByMark union_peaks_logs = os.path.join(analysis_dir, "09-CountMatrix/{CALLER}/logs/{MARK}_Optimal_selected_peaks.out") if len(conds) == 2 : @@ -999,7 +1051,7 @@ if len(conds) > 1 and config["differential_analysis"]["do"]: def getBAMFilesByMark(wildcards): - return expand(os.path.join(analysis_dir, "%s/{MARK}_{COND}_{REP}_%s_sort%s.bam" % (biasedRegions_dir, ref, biasedRegions)), + return expand(os.path.join("%s/{MARK}_{COND}_{REP}_%s_sort%s.bam" % (biasedRegions_dir, ref, biasedRegions)), MARK=wildcards.MARK, COND=conds, REP=rep) @@ -1053,6 +1105,7 @@ if len(conds) > 1 and config["differential_analysis"]["do"]: chipflowr_init_output_dir = os.path.join(analysis_dir, "10-DifferentialAnalysis/{{CALLER}}/{{MARK}}_{}_{}".format(method, norm)) chipflowr_init_config_r = os.path.join(analysis_dir, "10-DifferentialAnalysis/{{CALLER}}/{{MARK}}_{}_{}/config.R".format(method, norm)) chipflowr_init_genome = ref + final_output.extend(expand(chipflowr_init_config_r, CALLER=CALL_MOD, MARK=MARK_OK)) include: os.path.join(RULES, "chipflowr_init.rules") @@ -1074,6 +1127,7 @@ if len(conds) > 1 and config["differential_analysis"]["do"]: diffannot_logs = os.path.join(analysis_dir, "10-DifferentialAnalysis/{{CALLER}}/{{MARK}}_{}_{}/annotated_results/annotation.log".format(method, norm) ) + diffannot_output_done = os.path.join(analysis_dir,"10-DifferentialAnalysis/{{CALLER}}/{{MARK}}_{}_{}/annotated_results/annotation.done".format(method, norm)) include: os.path.join(RULES, "chipflowr_annotation.rules") final_output.extend(expand(diffannot_output_dir, CALLER=CALL_MOD, MARK=MARK_OK)) @@ -1085,17 +1139,27 @@ if config["igv_session"]["do"]: igv_session_input = expand(bamCoverage_output, SAMPLE=IP_ALL, REF=ref, allow_missing=True) if len(IP_REP) > 1 and config["macs2"]["do"]: if config["intersectionApproach"]["do"]: - igv_session_input += expand(intersectionApproach_output, CALLER="macs2_" + model, IP_IDR=IP_REP) - elif model in ["narrow"] and not config["intersectionApproach"]["do"] and config["compute_idr"]["do"]: + igv_session_input += expand(intersectionApproach_output, CALLER="macs2_" + model_dir, IP_IDR=IP_REP) + elif mode["macs2"] in ["narrow"] and not config["intersectionApproach"]["do"] and config["compute_idr"]["do"]: igv_session_input += expand(select_peaks_output, CALLER="macs2", IP_IDR=IP_REP) else : igv_session_input += expand(macs2_output, SAMPLE=IP_ALL) elif len(IP_REP) > 1 and config["seacr"]["do"]: - igv_session_input += expand(intersectionApproach_output, CALLER="seacr_" + config["seacr"]["threshold"], IP_IDR=IP_REP) + if config["intersectionApproach"]["do"]: + igv_session_input += expand(intersectionApproach_output, CALLER="seacr_" + mode["seacr"], IP_IDR=IP_REP) + else : + igv_session_input += expand(seacr_output, SAMPLE=IP_ALL) + elif len(IP_REP) > 1 and config["sicer"]["do"]: + if config["intersectionApproach"]["do"]: + igv_session_input += expand(intersectionApproach_output, CALLER="sicer_" + mode["sicer"], IP_IDR=IP_REP) + else : + igv_session_input += expand(sicer_output, SAMPLE=IP_ALL) if len(IP_ALL) > 1 and config["macs2"]["do"]: igv_session_input += expand(macs2_output, SAMPLE=IP_ALL) elif len(IP_ALL) > 1 and config["seacr"]["do"]: igv_session_input += expand(seacr_output, SAMPLE=IP_ALL) + elif len(IP_ALL) > 1 and config["sicer"]["do"]: + igv_session_input += expand(sicer_output, SAMPLE=IP_ALL) igv_session_output = os.path.join(analysis_dir, "12-IGV/igv_session.xml") igv_session_genome = ref