diff --git a/Snakefile b/Snakefile index 5e9730a88f0c7314cd52f76dca72f11c3823f74f..862c8deaa5d2353887d55c44ccb95a59dd698d7a 100644 --- a/Snakefile +++ b/Snakefile @@ -408,17 +408,12 @@ final_output = [] # quality control #---------------------------------- - - -""" fastqc_input_fastq = input_data -print(fastqc_input_fastq) +fastqc_input_fastq = input_data fastqc_output_done = os.path.join(analysis_dir, "00-Fastqc/{{SAMPLE}}{}_fastqc.done".format(rt1)) -print(fastqc_output_done) fastqc_wkdir = os.path.join(analysis_dir, "00-Fastqc") fastqc_log = os.path.join(analysis_dir, "00-Fastqc/logs/{{SAMPLE}}{}_fastqc_raw.log".format(rt1)) -print(fastqc_output_done) final_output.extend(expand(fastqc_output_done, SAMPLE=samples)) -include: os.path.join(RULES, "fastqc.rules") """ +include: os.path.join(RULES, "fastqc.rules") #---------------------------------- @@ -710,19 +705,23 @@ else : #---------------------------------- # Peak Calling with MACS2 #---------------------------------- +mode={} #dictionnaire contenant les modes de toutes les methodes utilisees if config["macs2"]["do"]: def INPUTtoIP(wildcards): return str(biasedRegions_dir + "/" + IP_INPUT[IP_INPUT.IP == wildcards.SAMPLE].iloc[0]['INPUT'] + "_{}_sort{}.bam".format(ref, biasedRegions)) - - model = config["macs2"]["mode_choice"] - model_dir = model - mod = [model] + + mode["macs2"] = config["macs2"]["mode_choice"] + model_dir = mode["macs2"] peak_caller = ["macs2"] + + if config["macs2"]["no_model"]: - model_dir += "-nomodel" - if model in ["narrow", "broad"]: + model_dir += "-nomodel" + + mod = [model_dir] + if mode["macs2"] in ["narrow", "broad"]: # Peak Calling on replicates - if model in ["narrow"]: + if mode["macs2"] in ["narrow"]: # add corresponding options macs2_options = "-p {} ".format(config["macs2"]['cutoff']) + config["macs2"]['options'] else: @@ -740,13 +739,13 @@ if config["macs2"]["do"]: macs2_input_bam = "{}/{{SAMPLE}}_{}_sort{}.bam".format(biasedRegions_dir, ref, biasedRegions) macs2_control = INPUTtoIP - + macs2_log_out = os.path.join(analysis_dir, "06-PeakCalling/macs2_{}/logs/{{SAMPLE}}.out".format(model_dir)) macs2_log_err = os.path.join(analysis_dir, "06-PeakCalling/macs2_{}/logs/{{SAMPLE}}.err".format(model_dir)) - macs2_output = os.path.join(analysis_dir, "06-PeakCalling/macs2_{}/{{SAMPLE}}_peaks.{}Peak".format(model_dir, model)) + macs2_output = os.path.join(analysis_dir, "06-PeakCalling/macs2_{}/{{SAMPLE}}_peaks.{}Peak".format(model_dir, mode["macs2"])) macs2_output_prefix = os.path.join(analysis_dir, "06-PeakCalling/macs2_{}/{{SAMPLE}}".format(model_dir)) - final_output.extend(expand(macs2_output, zip, SAMPLE=ALL_IP_PC)) + final_output.extend(expand(macs2_output, zip, SAMPLE=IP_ALL)) include: os.path.join(RULES, "macs2.rules") else : raise ValueError("Please provides valid model for MACS2") @@ -760,21 +759,21 @@ if config["seacr"]["do"]: if not config["macs2"]["do"]: peak_caller = ["seacr"] mod = [config["seacr"]["threshold"]] - model_dir = mod + else: peak_caller += ["seacr"] mod += [config["seacr"]["threshold"]] - #if model_dir : - # model_dir = - + mode["seacr"] = config["seacr"]["threshold"] # produce bedgrah files bedgraph_input = "{}/{{SAMPLE}}_{}_sort{}.bam".format(biasedRegions_dir, ref, biasedRegions) if config["design"]["spike"]: bedgraph_scaleFactor = compute_scaling_factor_output else: bedgraph_scaleFactor = bedgraph_input + bedgraph_genome = config["genome"]["fasta_file"]+".fai" + if paired: bedgraph_options = " -bedpe " else: @@ -798,27 +797,24 @@ if config["seacr"]["do"]: seacr_logs_std = os.path.join(analysis_dir, ("06-PeakCalling/seacr_"+config["seacr"]["threshold"]),"logs/{SAMPLE}_seacr_calling.out") seacr_logs_err = os.path.join(analysis_dir, ("06-PeakCalling/seacr_"+config["seacr"]["threshold"]),"logs/{SAMPLE}_seacr_calling.err") final_output.extend(expand(seacr_output, SAMPLE=IP_ALL)) - include: os.path.join(RULES, "seacr.rules") - + include: os.path.join(RULES, "seacr.rules") #---------------------------------- # Peak Calling with SICER #---------------------------------- if config["sicer"]["do"]: + window_size = config["sicer"]["window_size"] gap_size = config["sicer"]["gap_size"] - sicer_mode = "W{}-G{}".format(window_size, gap_size) - - #if model_dir : - # model_dir = - + mode["sicer"] = "W{}-G{}".format(window_size, gap_size) + if not config["macs2"]["do"] and not config["seacr"]["do"]: peak_caller = ["sicer"] - mod = [sicer_mode] + mod = [mode["sicer"]] else: peak_caller += ["sicer"] - mod += [sicer_mode] + mod += [mode["sicer"]] def INPUTtoIP(wildcards): return str(biasedRegions_dir + "/" + IP_INPUT[IP_INPUT.IP == wildcards.SAMPLE].iloc[0]['INPUT'] + "_{}_sort{}.bam".format(ref, biasedRegions)) @@ -829,14 +825,15 @@ if config["sicer"]["do"]: sicer_options = config["sicer"]["options"] sicer_genome = config["sicer"]["genome"] - sicer_logs_out = os.path.join(analysis_dir, "06-PeakCalling/sicer_{}/logs/{{SAMPLE}}_sicer_calling.out".format(sicer_mode)) - sicer_logs_err = os.path.join(analysis_dir, "06-PeakCalling/sicer_{}/logs/{{SAMPLE}}_sicer_calling.err".format(sicer_mode)) - sicer_output = os.path.join(analysis_dir, "06-PeakCalling/sicer_{}/{{SAMPLE}}_{}_sort{}-{}.scoreisland".format(sicer_mode, ref, biasedRegions, sicer_mode)) - sicer_output_dir = os.path.join(analysis_dir, "06-PeakCalling/sicer_{}/".format(sicer_mode)) + sicer_logs_out = os.path.join(analysis_dir, "06-PeakCalling/sicer_{}/logs/{{SAMPLE}}_sicer_calling.out".format(mode["sicer"])) + sicer_logs_err = os.path.join(analysis_dir, "06-PeakCalling/sicer_{}/logs/{{SAMPLE}}_sicer_calling.err".format(mode["sicer"])) + sicer_output = os.path.join(analysis_dir, "06-PeakCalling/sicer_{}/{{SAMPLE}}_{}_sort{}-{}.scoreisland".format(mode["sicer"], ref, biasedRegions, mode["sicer"])) + sicer_output_dir = os.path.join(analysis_dir, "06-PeakCalling/sicer_{}/".format(mode["sicer"])) include: os.path.join(RULES, "sicer.rules") - final_output.extend(expand(sicer_output, SAMPLE = ALL_IP_PC)) + final_output.extend(expand(sicer_output, SAMPLE = IP_ALL)) +#print("ip_all:\n ",IP_ALL,"\nrIP_REP:\n",IP_REP,"\n REP_IDR\n:",REP_IDR,"\n MARks\n:", marks) #---------------------------------- # Peak Calling metrics #---------------------------------- @@ -849,7 +846,7 @@ if config["macs2"]["do"] or config["seacr"]["do"] or config["sicer"]["do"]: return expand(os.path.join(analysis_dir, "06-PeakCalling/seacr_{{MOD}}/{IP_REP}.{{MOD}}.bed"), IP_REP=ALL_IP_PC) elif wildcards.CALLER == "sicer": return expand(os.path.join(analysis_dir, "06-PeakCalling/sicer_{{MOD}}/{IP_REP}_{REF}_sort{BIASED}-{{MOD}}.scoreisland"), IP_REP=ALL_IP_PC, REF = ref, BIASED = biasedRegions) - + #print("peak_caller:",peak_caller) stats_peakCalling_input = stats_pc_input stats_peakCalling_csv = os.path.join(analysis_dir, "{CALLER}_{MOD}_Peaks_metrics_mqc.out") stats_peakCalling_marks = marks @@ -859,21 +856,21 @@ if config["macs2"]["do"] or config["seacr"]["do"] or config["sicer"]["do"]: include: os.path.join(RULES, "stats_peakCalling.rules") final_output.extend(expand(stats_peakCalling_csv, zip, CALLER=peak_caller, MOD=mod)) - #---------------------------------- # IDR computing #---------------------------------- -if config["macs2"]["do"] and config["compute_idr"]["do"]: - - if model in ["narrow"]: - compute_idr_mode = "narrowPeak" - else : - compute_idr_mode = "broadPeak" - compute_idr_input1 = os.path.join(analysis_dir, "06-PeakCalling/macs2_{}/{{IP_IDR}}_{{CASE}}1_peaks.{}Peak".format(model_dir, model)) - compute_idr_input2 = os.path.join(analysis_dir, "06-PeakCalling/macs2_{}/{{IP_IDR}}_{{CASE}}2_peaks.{}Peak".format(model_dir, model)) - compute_idr_output = os.path.join(analysis_dir, "07-IDR/macs2_{}/{{IP_IDR}}_{{CASE}}1vs{{CASE}}2_{}_{}_idr.txt".format(model_dir, ref, model)) - compute_idr_output_peak = os.path.join(analysis_dir, "07-IDR/macs2_{}/{{IP_IDR}}_{{CASE}}1vs{{CASE}}2_{}_{}_idr{}.{}Peak".format(model_dir,ref, model, config["compute_idr"]["thresh"], model)) - compute_idr_log = os.path.join(analysis_dir, "07-IDR/macs2_{}/logs/{{IP_IDR}}_{{CASE}}1vs{{CASE}}2_{}_idr.out".format(model_dir,model)) +#ne fonctionne qu'avec macs en narrow +#call_mod = [ method+"_"+mode[method] for method in peak_caller] + +if config["macs2"]["do"] and mode["macs2"] in ["narrow"] and config["compute_idr"]["do"]: + + compute_idr_mode = "narrowPeak" + compute_idr_input1 = os.path.join(analysis_dir, "06-PeakCalling/macs2_{}/{{IP_IDR}}_{{CASE}}1_peaks.{}Peak".format(model_dir, mode["macs2"])) + compute_idr_input2 = os.path.join(analysis_dir, "06-PeakCalling/macs2_{}/{{IP_IDR}}_{{CASE}}2_peaks.{}Peak".format(model_dir, mode["macs2"])) + compute_idr_output = os.path.join(analysis_dir, "07-IDR/macs2_{}/{{IP_IDR}}_{{CASE}}1vs{{CASE}}2_{}_{}_idr.txt".format(model_dir, ref, mode["macs2"])) + compute_idr_output_peak = os.path.join(analysis_dir, "07-IDR/macs2_{}/{{IP_IDR}}_{{CASE}}1vs{{CASE}}2_{}_{}_idr{}.{}Peak".format(model_dir, + ref, mode["macs2"], config["compute_idr"]["thresh"], mode["macs2"])) + compute_idr_log = os.path.join(analysis_dir, "07-IDR/macs2_{}/logs/{{IP_IDR}}_{{CASE}}1vs{{CASE}}2_{}_idr.out".format(model_dir,mode["macs2"])) include: os.path.join(RULES, "compute_idr.rules") final_output.extend(expand(compute_idr_output, zip, IP_IDR=REP_IDR, CASE=CASE)) @@ -882,31 +879,30 @@ if config["macs2"]["do"] and config["compute_idr"]["do"]: #---------------------------------- # Select reproducible peaks #---------------------------------- -CALL_MOD = [] - -if config["macs2"]["do"] and model in ["narrow"] and not config["intersectionApproach"]["do"] and config["compute_idr"]["do"]: - CALL_MOD += ["macs2_" + model_dir] +CALL_MOD=[] +if config["macs2"]["do"] and config["macs2"]["mode_choice"] in ["narrow"] and not config["intersectionApproach"]["do"] and config["compute_idr"]["do"]: + CALL_MOD += ["macs2_" + mode["macs2"]] # Select IDR peaks def IDR_input_rep(wildcards): #if wildcards.CALLER == "macs2_narrow": - return str(os.path.join(analysis_dir, "07-IDR/macs2_"+model_dir+"/"+wildcards.IP_IDR+"_"+rep_flag+"1vs"+rep_flag+"2_"+ref+"_"+model+"_idr"+str(config["compute_idr"]["thresh"])+"."+model+"Peak")) + return str(os.path.join(analysis_dir, "07-IDR/macs2_"+mode["macs2"]+"/"+wildcards.IP_IDR+"_"+rep_flag+"1vs"+rep_flag+"2_"+ref+"_"+mode["macs2"]+"_idr"+str(config["compute_idr"]["thresh"])+"."+mode["macs2"]+"Peak")) def IDR_input_ppr(wildcards): #if wildcards.CALLER == "macs2_narrow": - #return [os.path.join(analysis_dir, "07-IDR/macs2/%s/{IP_IDR}_%s1vs%s2_%s_%s_idr%s.%sPeak" % (model_dir, + #return [os.path.join(analysis_dir, "07-IDR/macs2/%s/{IP_IDR}_%s1vs%s2_%s_%s_idr%s.%sPeak" % (model, # "PPR", "PPR", ref, model, config["compute_idr"]["thresh"], model))] - return str(os.path.join(analysis_dir, "07-IDR/macs2_"+model_dir+"/"+wildcards.IP_IDR+"_PPR1vsPPR2_"+ref+"_"+model+"_idr"+str(config["compute_idr"]["thresh"])+"."+model+"Peak")) + return str(os.path.join(analysis_dir, "07-IDR/macs2_"+mode["macs2"]+"/"+wildcards.IP_IDR+"_PPR1vsPPR2_"+ref+"_"+mode["macs2"]+"_idr"+str(config["compute_idr"]["thresh"])+"."+mode["macs2"]+"Peak")) def IDR_input_pool(wildcards): #if wildcards.CALLER == "macs2_narrow": - #return [os.path.join(analysis_dir, "06-PeakCalling/macs2/%s/{IP_IDR}_PPRPool_peaks.%sPeak" % (model_dir, model))] - return str(os.path.join(analysis_dir, "06-PeakCalling/macs2_"+model_dir+"/"+wildcards.IP_IDR+"_PPRPool_peaks."+model+"Peak")) + #return [os.path.join(analysis_dir, "06-PeakCalling/macs2/%s/{IP_IDR}_PPRPool_peaks.%sPeak" % (model, model))] + return str(os.path.join(analysis_dir, "06-PeakCalling/macs2_"+mode["macs2"]+"/"+wildcards.IP_IDR+"_PPRPool_peaks."+mode["macs2"]+"Peak")) select_peaks_input_rep = IDR_input_rep select_peaks_input_ppr = IDR_input_ppr select_peaks_input_pool = IDR_input_pool select_peaks_logs = os.path.join(analysis_dir, "08-ReproduciblePeaks/{{CALLER}}_{}/logs/{{IP_IDR}}_selected_peaks.o".format(model_dir)) - select_peaks_output = os.path.join(analysis_dir, "08-ReproduciblePeaks/{{CALLER}}_{}/{{IP_IDR}}_select.{}Peak".format(model_dir, model)) + select_peaks_output = os.path.join(analysis_dir, "08-ReproduciblePeaks/{{CALLER}}_{}/{{IP_IDR}}_select.{}Peak".format(model_dir,mode["macs2"])) include: os.path.join(RULES, "select_peaks.rules") final_output.extend(expand(select_peaks_output, CALLER="macs2", IP_IDR=IP_REP)) @@ -915,16 +911,20 @@ if config["macs2"]["do"] and model in ["narrow"] and not config["intersectionApp #---------------------------------- CALL_MOD = [] -if config["seacr"]["do"] : - CALL_MOD += ["seacr_" + config["seacr"]["threshold"]] +if config["seacr"]["do"] and config["intersectionApproach"]["do"]: + CALL_MOD += ["seacr_" + mode["seacr"]] if (config["macs2"]["do"] and config["intersectionApproach"]["do"]) or (config["macs2"]["do"] and config["macs2"]["mode_choice"] in ["broad"]): CALL_MOD += ["macs2_" + model_dir] +if config["sicer"]["do"] and config["intersectionApproach"]["do"]: + CALL_MOD += ["sicer_" + mode["sicer"]] def IA_input(wildcards): - if wildcards.CALLER == "macs2_"+ model_dir: - return expand(os.path.join(analysis_dir, "06-PeakCalling/macs2_{MOD}/{{IP_IDR}}_{CASE}_peaks.{MOD}Peak"), CASE=rep, MOD=config["macs2"]["mode_choice"]) - elif wildcards.CALLER == "seacr_"+ config["seacr"]["threshold"]: - return expand(os.path.join(analysis_dir, "06-PeakCalling/seacr_{MOD}/{{IP_IDR}}_{CASE}.{MOD}.bed"), CASE=rep, MOD=config["seacr"]["threshold"]) + if wildcards.CALLER == "macs2_" + config["macs2"]["mode_choice"] or wildcards.CALLER == "macs2_" + config["macs2"]["mode_choice"] + "-nomodel": + return expand(os.path.join(analysis_dir, "06-PeakCalling/macs2_{MOD}/{{IP_IDR}}_{CASE}_peaks.{MOD}Peak"), CASE=rep, MOD=model_dir) + elif wildcards.CALLER == "seacr_" + config["seacr"]["threshold"]: + return expand(os.path.join(analysis_dir, "06-PeakCalling/seacr_{MOD}/{{IP_IDR}}_{CASE}.{MOD}.bed"), CASE=rep, MOD=mode["seacr"]) + elif wildcards.CALLER == "sicer_" + mode["sicer"]: + return expand(os.path.join(analysis_dir, "06-PeakCalling/sicer_{MOD}/{{IP_IDR}}_{CASE}_{REF}_sort{BIASED}-{MOD}.scoreisland"), MOD = mode["sicer"], CASE = rep, REF= ref, BIASED=biasedRegions) if (config["macs2"]["do"] and config["macs2"]["mode_choice"] in ["broad"]) or config["intersectionApproach"]["do"] or config["seacr"]["do"] : intersectionApproach_input_rep = IA_input @@ -934,18 +934,35 @@ if (config["macs2"]["do"] and config["macs2"]["mode_choice"] in ["broad"]) or co # intersectionApproach_overlap = 0.8 # else: # intersectionApproach_overlap = config["intersectionApproach"]["ia_overlap"] + print("rep :",rep) include: os.path.join(RULES, "intersectionApproach.rules") - final_output.extend(expand(intersectionApproach_output, CALLER=CALL_MOD, IP_IDR=IP_REP, MOD=config["macs2"]["mode_choice"])) + final_output.extend(expand(intersectionApproach_output, CALLER=CALL_MOD, IP_IDR=IP_REP)) #MOD=config["macs2"]["mode_choice"] #---------------------------------- # ChipR reproducibility #---------------------------------- -if config["chipr"]["do"] and config["macs2"]["do"]: +CALL_MOD = [] +if config["macs2"]["do"]: + CALL_MOD += ["macs2_" + model_dir] +if config["seacr"]["do"]: + CALL_MOD += ["seacr_" + mode["seacr"]] +if config["sicer"]["do"]: + CALL_MOD += ["sicer_" + mode["sicer"]] +if config["chipr"]["do"]: def chipr_input(wildcards): - return expand(os.path.join(analysis_dir,"06-PeakCalling/macs2_{MOD}/{{MARK}}_{{COND}}_{REP}_peaks.{MOD}Peak"),MOD=model_dir,REP=rep_noidr) - + if config["macs2"]["do"]: + return expand(os.path.join(analysis_dir,"06-PeakCalling/macs2_{MOD}/{{MARK}}_{{COND}}_{REP}_peaks.{MOD}Peak"), MOD=model_dir,REP=rep_noidr) + + if config["seacr"]["do"]: + return expand(os.path.join(analysis_dir,"06-PeakCalling/seacr_{MOD}/{{MARK}}_{{COND}}_{REP}.{MOD}.bed"), MOD=mode["seacr"],REP=rep_noidr) + + if config["sicer"]["do"]: + return expand(os.path.join(analysis_dir,"06-PeakCalling/sicer_{MOD}/{{MARK}}_{{COND}}_{REP}_{REF}_sort{BIASED}-{MOD}.scoreisland"), + MOD=mode["sicer"],REP=rep_noidr,REF=ref, BIASED=biasedRegions) + + chipr_input_peak = chipr_input chipr_output_opti = os.path.join(analysis_dir,"08-ReproduciblePeaks/{CALLER}/Chipr/{MARK}_{COND}_DONE_optimal.bed") chipr_output_all = os.path.join(analysis_dir,"08-ReproduciblePeaks/{CALLER}/Chipr/{MARK}_{COND}_DONE_all.bed") @@ -956,18 +973,18 @@ if config["chipr"]["do"] and config["macs2"]["do"]: chipr_log_err = os.path.join(analysis_dir,"08-ReproduciblePeaks/{CALLER}/Chipr/logs/{MARK}_{COND}_log_err") include: os.path.join(RULES, "chipr.rules") - final_output.extend(expand(chipr_output_opti,MARK=mark,COND=conds,CALLER=CALL_MOD)) + final_output.extend(expand(chipr_output_opti,MARK=marks,COND=conds,CALLER=CALL_MOD)) #---------------------------------- # Compute ChipR metrics #---------------------------------- -if config["chipr"]["do"] and config["macs2"]["do"]: - +if config["chipr"]["do"] and (config["macs2"]["do"] or config["seacr"]["do"] or config["sicer"]["do"]): + #print(f"rep_noidr : {rep_noidr}\n call_mod : {CALL_MOD}") metrics_peaks_input = expand(os.path.join(analysis_dir,"08-ReproduciblePeaks/{CALLER}/Chipr/{MARK}_{COND}_DONE_optimal.bed"),MARK=marks,COND=conds,CALLER=CALL_MOD) metrics_peaks_marks = marks metrics_peaks_conds = conds - metrics_peaks_logs = os.path.join(analysis_dir, "08-ReproduciblePeaks/macs2_{}/Chipr/CHIPR_metrics.out".format(model_dir)) + metrics_peaks_logs = expand(os.path.join(analysis_dir, "08-ReproduciblePeaks/{CALLER}/Chipr/CHIPR_metrics.out"),CALLER = CALL_MOD) metrics_chipr_output = os.path.join(analysis_dir, "CHIPR_metrics.out") include: os.path.join(RULES, "metrics_chipr.rules") final_output.extend([metrics_chipr_output]) @@ -975,8 +992,15 @@ if config["chipr"]["do"] and config["macs2"]["do"]: #---------------------------------- # Compute IA metrics #---------------------------------- +CALL_MOD = [] +if config["intersectionApproach"]["do"] : + if (config["macs2"]["do"] and config["macs2"]["mode_choice"] in ["broad"]): + CALL_MOD += ["macs2_" + model_dir] + if config["seacr"]["do"] and config["seacr"]["threshold"] in ["relaxed"]: + CALL_MOD += ["seacr_" + mode["seacr"]] + if config["sicer"]["do"]: + CALL_MOD += ["sicer_" + mode["sicer"]] -if (config["macs2"]["do"] and config["macs2"]["mode_choice"] in ["broad"]) or config["intersectionApproach"]["do"]: stats_IA_input = expand(intersectionApproach_output, CALLER=CALL_MOD, IP_IDR=IP_REP) stats_IA_csv = os.path.join(analysis_dir, "IA_metrics.out") stats_IA_log = expand(os.path.join(analysis_dir, "08-ReproduciblePeaks/{CALLER}/logs/IA_metrics.out"),CALLER=CALL_MOD) @@ -990,7 +1014,8 @@ if (config["macs2"]["do"] and config["macs2"]["mode_choice"] in ["broad"]) or co if config["macs2"]["do"] and config["compute_idr"]["do"]: idr_peaks = [] idr_peaks.extend(expand(os.path.join(analysis_dir, "07-IDR/macs2_{}/{{IP_IDR}}_{{CASE}}1vs{{CASE}}2_{}_{}_idr{}.{}Peak".format(model_dir, - ref,model, config["compute_idr"]["thresh"], model)), zip, IP_IDR=REP_IDR, CASE=CASE)) + ref,mode["macs2"], config["compute_idr"]["thresh"], mode["macs2"])), zip, IP_IDR=REP_IDR, CASE=CASE)) + metrics_peaks_input = idr_peaks metrics_peaks_marks = marks metrics_peaks_conds = conds @@ -1005,38 +1030,32 @@ if config["macs2"]["do"] and config["compute_idr"]["do"]: #---------------------------------- CALL_MOD = [] -if (config["seacr"]["do"] and config["peakAnnotation"]["do"]) : - CALL_MOD += ["seacr_" + config["seacr"]["threshold"]] -if (config["macs2"]["do"] and config["peakAnnotation"]["do"]) : - CALL_MOD += ["macs2_" +config["macs2"]["mode_choice"]] - +if (config["seacr"]["do"] and config["peakAnnotation"]["do"]): + CALL_MOD += ["seacr_" + mode["seacr"]] +if (config["macs2"]["do"] and config["peakAnnotation"]["do"]): + CALL_MOD += ["macs2_" + model_dir] +if (config["sicer"]["do"] and config["peakAnnotation"]["do"]): + CALL_MOD += ["sicer_" + mode["sicer"]] +#print("IP_rep :",IP_REP,"case:",rep_noidr) #??? Définir peakAnnotation_log et peakAnnotation_output en tant que variable globale et leur affecter une valeur dans la fonction ??? def indpeak_annot(wildcards): if wildcards.CALLER.find("macs2")!=-1 : - return expand(os.path.join(analysis_dir, - "06-PeakCalling/{CALLER}_{MOD}/{{IP_IDR}}_{{CASE}}_peaks.{MOD}Peak"), - CALLER="macs2",MOD=config['macs2']['mode_choice']) #,CASE=rep) + return expand(os.path.join(analysis_dir, "06-PeakCalling/{CALLER}_{MOD}/{{IP_IDR}}_{{CASE}}_peaks.{MOD}Peak"), + CALLER="macs2",MOD= model_dir) #,CASE=rep) elif wildcards.CALLER.find("seacr")!=-1 : return expand(os.path.join(analysis_dir, "06-PeakCalling/{CALLER}_{MOD}/{{IP_IDR}}_{{CASE}}.{MOD}.bed"), - CALLER="seacr", MOD=config["seacr"]["threshold"]) #,CASE=rep) + CALLER="seacr", MOD=mode["seacr"]) #,CASE=rep) + elif wildcards.CALLER.find("sicer")!=-1 : + return expand(os.path.join(analysis_dir, "06-PeakCalling/{CALLER}_{MOD}/{{IP_IDR}}_{{CASE}}_{REF}_sort{BIASED}-{MOD}.scoreisland"), + CALLER="sicer", MOD=mode["sicer"], REF = ref, BIASED = biasedRegions) return - -if config["peakAnnotation"]["do"] and (config["macs2"]["do"] or config["seacr"]["do"]) : +if config["peakAnnotation"]["do"] and (config["macs2"]["do"] or config["seacr"]["do"] or config["sicer"]["do"]) : peakAnnotation_input = indpeak_annot - peakAnnotation_log = expand(os.path.join(analysis_dir, - "06-PeakCalling/{{CALLER}}/annotated_peaks/logs/{{IP_IDR}}_{{CASE}}_peaks.out"), - MOD=mod) #MOD=model_dir) - peakAnnotation_output = expand(os.path.join(analysis_dir, - "06-PeakCalling/{{CALLER}}/annotated_peaks/{{IP_IDR}}_{{CASE}}_peaks.txt"), - MOD=mod) #MOD=model_dir) - - #Debug - #print("-----") - #print("peakAnnotation_input={0}".format(expand(peakAnnotation_input, CALLER=CALL_MOD, IP_IDR=IP_REP, CASE=rep))) - #print("-----") - #print("peakAnnotation_output={0}".format(expand(peakAnnotation_output, CALLER=CALL_MOD, IP_IDR=IP_REP, CASE=rep))) - #print("-----") - + peakAnnotation_log = expand(os.path.join(analysis_dir,"06-PeakCalling/{{CALLER}}/annotated_peaks/logs/{{IP_IDR}}_{{CASE}}_peaks.out")) + #MOD=mod) #MOD=model) + peakAnnotation_output = expand(os.path.join(analysis_dir,"06-PeakCalling/{{CALLER}}/annotated_peaks/{{IP_IDR}}_{{CASE}}_peaks.txt")) + #MOD=mod) #MOD=model) + include: os.path.join(RULES, "individualPeakAnnotation.rules") final_output.extend(expand(peakAnnotation_output, IP_IDR=IP_REP, CASE=rep_noidr, CALLER=CALL_MOD)) @@ -1047,29 +1066,33 @@ CALL_MOD = [] if config["seacr"]["do"] : - CALL_MOD += ["seacr_" + config["seacr"]["threshold"]] + CALL_MOD += ["seacr_" + mode["seacr"]] #if (config["macs2"]["do"] and config["peakAnnotation"]["do"]) or (config["macs2"]["do"] and config["macs2"]["mode_choice"] in ["broad"]): #Repetition de la regele edictee dans la partie "Select reproducible peaks" #if config["macs2"]["do"] and model in ["narrow"] and not config["intersectionApproach"]["do"] and config["compute_idr"]["do"]: if config["macs2"]["do"] and config["peakAnnotation"]["do"] : CALL_MOD += ["macs2_" + model_dir] +if config["sicer"]["do"] and config["peakAnnotation"]["do"]: + CALL_MOD += ["sicer_"+ mode["sicer"]] CONSENSUS_MOD = [] -if (config["macs2"]["do"] and model in ["narrow"] and not config["intersectionApproach"]["do"] and config["compute_idr"]["do"]) : #"_select" files - CONSENSUS_MOD += ["select."+config["macs2"]["mode_choice"]+"Peak"] +if (config["macs2"]["do"] and mode["macs2"] in ["narrow"] and not config["intersectionApproach"]["do"] and config["compute_idr"]["do"]) : #"_select" files + CONSENSUS_MOD += ["select."+mode["macs2"]+"Peak"] else : CONSENSUS_MOD += ["IA.bed"] if config["peakAnnotation"]["do"] and ( - (config["macs2"]["do"] and model in ["narrow"] and not config["intersectionApproach"]["do"] and config["compute_idr"]["do"]) #"_select" files - or - config["seacr"]["do"] + (config["macs2"]["do"] and mode["macs2"] in ["narrow"] and not config["intersectionApproach"]["do"] and config["compute_idr"]["do"]) #"_select" files + or + config["seacr"]["do"] or (config["macs2"]["do"] and config["intersectionApproach"]["do"]) or - config["macs2"]["do"] and model in ["broad"] - ) : + (config["macs2"]["do"] and mode["macs2"] in ["broad"]) + or + config["sicer"]["do"] + ): peakAnnotation_input = expand(os.path.join(analysis_dir, "08-ReproduciblePeaks/{{CALLER}}/{{IP_IDR}}_{{CONSENSUS_MOD}}")) peakAnnotation_log = expand(os.path.join(analysis_dir, @@ -1092,30 +1115,33 @@ if config["peakAnnotation"]["do"] and ( #---------------------------------- # Run differential analysis -#---------------------------------- +#----------------------------------### CALL_MOD = [] if len(conds) > 1 and config["differential_analysis"]["do"]: - if config["macs2"]["do"] and model in ["narrow"] and not config["intersectionApproach"]["do"] and config["compute_idr"]["do"] or config["chipr"]["do"]: - CALL_MOD += ["macs2_" + model_dir] - if config["seacr"]["do"] : - CALL_MOD += ["seacr_" + config["seacr"]["threshold"]] - if (config["macs2"]["do"] and config["intersectionApproach"]["do"]) or (config["macs2"]["do"] and config["macs2"]["mode_choice"] in ["broad"]): + #if config["macs2"]["do"] and config["macs2"]["mode_choice"] in ["narrow"] and not config["intersectionApproach"]["do"] and config["compute_idr"]["do"] or config["chipr"]["do"]: + #CALL_MOD += ["macs2_" + model] + if config["seacr"]["do"]: + CALL_MOD += ["seacr_" + mode["seacr"]] + if config["macs2"]["do"]: # and config["intersectionApproach"]["do"]) or (config["macs2"]["do"] and config["macs2"]["mode_choice"] in ["broad"]): CALL_MOD += ["macs2_" + model_dir] - - ## fill CALL_MOD according to results.... + if config["sicer"]["do"]: + CALL_MOD += ["sicer_" + mode["sicer"]] + #print(f"model_dir: {model_dir}") + ## fill CALL_MOD according to results... def getPeakFilesByMark(wildcards): - if wildcards.CALLER in ["seacr_"+config["seacr"]["threshold"], "macs2_broad", "macs2_broad-nomodel" ] or (wildcards.CALLER in ["macs2_narrow","macs2_narrow-nomodel"] and config["intersectionApproach"]["do"]): + #if wildcards.CALLER in ["seacr_"+config["seacr"]["threshold"], "macs2_broad", "macs2_broad-nomodel" ] or (wildcards.CALLER in ["macs2_narrow","macs2_narrow-nomodel"] and config["intersectionApproach"]["do"]): + if config["intersectionApproach"]["do"]: return expand(os.path.join(analysis_dir, "08-ReproduciblePeaks/{{CALLER}}/{{MARK}}_{COND}_IA.bed"), CALLER=wildcards.CALLER, MARK=wildcards.MARK, COND=conds) - elif config["chipr"]["do"] and config["macs2"]["do"] and not config["compute_idr"]["do"]: + elif config["chipr"]["do"] and not config["compute_idr"]["do"]: return expand(os.path.join(analysis_dir,"08-ReproduciblePeaks/{{CALLER}}/Chipr/{{MARK}}_{COND}_DONE_optimal.bed"), CALLER=wildcards.CALLER, MARK=wildcards.MARK, COND=conds) - else : - return expand(os.path.join(analysis_dir, "08-ReproduciblePeaks/{{CALLER}}/{{MARK}}_{{COND}}_select.{}Peak".format(model)), + elif config["macs2"]["do"] : + return expand(os.path.join(analysis_dir, "08-ReproduciblePeaks/{{CALLER}}/{{MARK}}_{{COND}}_select.{}Peak".format(mode["macs2"])), CALLER=wildcards.CALLER, MARK=wildcards.MARK, COND=conds) - + #---------------------------------- # get union peaks @@ -1128,7 +1154,7 @@ if len(conds) > 1 and config["differential_analysis"]["do"]: elif len(conds) == 3 : union_peaks_output = os.path.join(analysis_dir, "09-CountMatrix/{{CALLER}}/{{MARK}}_{}u{}u{}_{}.optimal.Peak_overlap.bed".format(conds[0], conds[1], conds[2], ref)) else : - union_peaks_output = os.path.join(analysis_dir, "09-CountMatrix/{{CALLER}}/{{MARK}}_all_conds_{}.optimal.Peak_overlap.bed".format(ref)) + union_peaks_output = os.path.join(analysis_dir, "09-CountMatrix/{{CALLER}}/{{MARK}}_all_conds_{}.optimal.Peak_overlap.bed".format(ref)) include: os.path.join(RULES, "union_peaks.rules") final_output.extend(expand(union_peaks_output, zip, CALLER=CALL_MOD, MARK=MARK_OK)) @@ -1152,20 +1178,20 @@ if len(conds) > 1 and config["differential_analysis"]["do"]: def getBAMFilesByMark(wildcards): - return expand(os.path.join(analysis_dir, "%s/{MARK}_{COND}_{REP}_%s_sort%s.bam" % (biasedRegions_dir, ref, biasedRegions)), + return expand(os.path.join("{}/{{MARK}}_{{COND}}_{{REP}}_{}_sort{}.bam".format(biasedRegions_dir, ref, biasedRegions)), MARK=wildcards.MARK, COND=conds, REP=rep) #---------------------------------- # feature Count on peaks #---------------------------------- - - + feature_counts_input = getBAMFilesByMark feature_counts_optional_input = [] if config["differential_analysis"]["input_counting"]: feature_counts_optional_input += expand("{}/{{INPUT}}_{}_sort{}.bam".format(biasedRegions_dir, ref, biasedRegions), INPUT=CORR_INPUT) feature_counts_output_count = os.path.join(analysis_dir, "09-CountMatrix/{CALLER}/{MARK}_Matrix_Optimal_Peak.mtx") + if len(conds) == 2 : feature_counts_gff = os.path.join(analysis_dir, "09-CountMatrix/{{CALLER}}/{{MARK}}_{}u{}_{}.optimal.Peak_overlap.gff".format(conds[0], conds[1], ref)) elif len(conds) == 3 : @@ -1177,8 +1203,8 @@ if len(conds) > 1 and config["differential_analysis"]["do"]: feature_counts_options = "-t peak -g gene_id" feature_counts_threads = 4 final_output.extend(expand(feature_counts_output_count, zip, CALLER=CALL_MOD, MARK=MARK_OK)) - include: os.path.join(RULES, "feature_counts.rules") - + include: os.path.join(RULES, "feature_counts.rules") + #---------------------------------- # differential analysis on peaks #---------------------------------- @@ -1240,19 +1266,29 @@ if config["igv_session"]["do"]: igv_session_input = expand(bamCoverage_output, SAMPLE=IP_ALL, REF=ref, allow_missing=True) if len(IP_REP) > 1 and config["macs2"]["do"]: if config["intersectionApproach"]["do"]: - igv_session_input += expand(intersectionApproach_output, CALLER="macs2_" + model, IP_IDR=IP_REP) - elif model in ["narrow"] and not config["intersectionApproach"]["do"] and config["compute_idr"]["do"]: + igv_session_input += expand(intersectionApproach_output, CALLER="macs2_" + model_dir, IP_IDR=IP_REP) + elif mode["macs2"] in ["narrow"] and not config["intersectionApproach"]["do"] and config["compute_idr"]["do"]: igv_session_input += expand(select_peaks_output, CALLER="macs2", IP_IDR=IP_REP) else : igv_session_input += expand(macs2_output, SAMPLE=IP_ALL) elif len(IP_REP) > 1 and config["seacr"]["do"]: - igv_session_input += expand(intersectionApproach_output, CALLER="seacr_" + config["seacr"]["threshold"], IP_IDR=IP_REP) + if config["intersectionApproach"]["do"]: + igv_session_input += expand(intersectionApproach_output, CALLER="seacr_" + mode["seacr"], IP_IDR=IP_REP) + else : + igv_session_input += expand(seacr_output, SAMPLE=IP_ALL) + elif len(IP_REP) > 1 and config["sicer"]["do"]: + if config["intersectionApproach"]["do"]: + igv_session_input += expand(intersectionApproach_output, CALLER="sicer_" + mode["sicer"], IP_IDR=IP_REP) + else : + igv_session_input += expand(sicer_output, SAMPLE=IP_ALL) if len(IP_ALL) > 1 and config["macs2"]["do"]: igv_session_input += expand(macs2_output, SAMPLE=IP_ALL) elif len(IP_ALL) > 1 and config["seacr"]["do"]: igv_session_input += expand(seacr_output, SAMPLE=IP_ALL) + elif len(IP_ALL) > 1 and config["sicer"]["do"]: + igv_session_input += expand(sicer_output, SAMPLE=IP_ALL) - igv_session_output = os.path.join(analysis_dir, "12-IGV/igv_session.xml") + igv_session_output = os.path.join(analysis_dir, "12-IGV/test/igv_session.xml") igv_session_genome = ref igv_session_mark = marks igv_session_conds = conds @@ -1304,4 +1340,4 @@ onsuccess: if pattern.match(filepath): if not os.path.exists(dest): os.makedirs(dest) - shutil.move(filepath, dest) + shutil.move(filepath, dest) \ No newline at end of file