Commit 9a8db1fd authored by Blaise Li's avatar Blaise Li
Browse files

Check contrasts against library names.

Also did a little code cleaning and added a gene list to the small RNA
pipeline.
parent df0bbc29
......@@ -117,6 +117,11 @@ lib2raw = config["lib2raw"]
LIBS = list(lib2raw.keys())
REPS = config["replicates"]
COND_PAIRS = config["cond_pairs"]
msg = "\n".join([
"Some contrats do not use known library names.",
"Contrasts:"
", ".join([f"({cond}, {ref})" for (cond, ref) in COND_PAIRS])])
assert all([cond in LIBS and ref in LIBS for (cond, ref) in COND_PAIRS]), msg
CONTRASTS = [f"{cond1}_vs_{cond2}" for [cond1, cond2] in COND_PAIRS]
CONTRAST2PAIR = dict(zip(CONTRASTS, COND_PAIRS))
CONDITIONS = [{
......@@ -240,13 +245,6 @@ def get_distinct(nr):
filtered_product = filter_combinator(product, forbidden)
from itertools import repeat
#print(forbidden)
#LIBTREATREPS = list(map(dict, filtered_product(list(zip(repeat("lib"), LIBS)), list(zip(repeat("treat"), TREATS)), list(zip(repeat("rep"), REPS)))))
#
#LIBTREATS = list({"{lib}_{treat}".format(**libtreatrep) for libtreatrep in LIBTREATREPS})
wildcard_constraints:
lib="|".join(LIBS),
rep="\d+",
......@@ -262,23 +260,42 @@ rule all:
"""This top rule is used to drive the whole workflow by taking as input its final products."""
input:
expand(OPJ(
output_dir, "{trimmer}", "figures", aligner, "{counter}", "{contrast}", "{orientation}_{biotype}", "MA_with_{id_list}.pdf"), trimmer=TRIMMERS, counter=COUNTERS, contrast=CONTRASTS, orientation=ORIENTATIONS, biotype=DE_BIOTYPES, id_list=ID_LISTS),
output_dir, "{trimmer}", "figures", aligner, "{counter}",
"{contrast}", "{orientation}_{biotype}", "MA_with_{id_list}.pdf"),
trimmer=TRIMMERS, counter=COUNTERS, contrast=CONTRASTS,
orientation=ORIENTATIONS, biotype=DE_BIOTYPES, id_list=ID_LISTS),
expand(OPJ(
output_dir, "{trimmer}", "figures", aligner, "{counter}", "{contrast}", "{orientation}_{biotype}", "{fold_type}_distribution.pdf"), trimmer=TRIMMERS, counter=COUNTERS, contrast=CONTRASTS, orientation=ORIENTATIONS, biotype=DE_BIOTYPES, fold_type=["log2FoldChange"]),
output_dir, "{trimmer}", "figures", aligner, "{counter}",
"{contrast}", "{orientation}_{biotype}", "{fold_type}_distribution.pdf"),
trimmer=TRIMMERS, counter=COUNTERS, contrast=CONTRASTS,
orientation=ORIENTATIONS, biotype=DE_BIOTYPES, fold_type=["log2FoldChange"]),
expand(OPJ(
output_dir, "{trimmer}", aligner, "mapped_C_elegans", "{counter}", "deseq2", "{contrast}", "{orientation}_{biotype}", "counts_and_res.txt"), trimmer=TRIMMERS, counter=COUNTERS, contrast=CONTRASTS, orientation=ORIENTATIONS, biotype=DE_BIOTYPES),
output_dir, "{trimmer}", aligner, "mapped_C_elegans", "{counter}",
"deseq2", "{contrast}", "{orientation}_{biotype}", "counts_and_res.txt"),
trimmer=TRIMMERS, counter=COUNTERS, contrast=CONTRASTS,
orientation=ORIENTATIONS, biotype=DE_BIOTYPES),
expand(OPJ(
output_dir, "{trimmer}", "figures", aligner, "{counter}", "{biotype}_{orientation}_PCA.{fig_format}"), trimmer=TRIMMERS, counter=COUNTERS, biotype=COUNT_BIOTYPES, orientation=ORIENTATIONS, fig_format=FIG_FORMATS),
output_dir, "{trimmer}", "figures", aligner, "{counter}",
"{biotype}_{orientation}_PCA.{fig_format}"),
trimmer=TRIMMERS, counter=COUNTERS, biotype=COUNT_BIOTYPES,
orientation=ORIENTATIONS, fig_format=FIG_FORMATS),
#expand(OPJ(output_dir, "{trimmer}", aligner, "mapped_C_elegans", "{counter}", "all_on_C_elegans", "alltypes_{orientation}_counts.txt"), trimmer=TRIMMERS, counter=COUNTERS, orientation=["all"]),
#expand(expand(OPJ(output_dir, "{{trimmer}}", aligner, "mapped_C_elegans", "{{counter}}", "{lib}_{rep}_on_C_elegans", "{{biotype}}_{{orientation}}_counts.txt"), filtered_product, lib=LIBS, rep=REPS), trimmer=TRIMMERS, counter=COUNTERS, biotype=COUNT_BIOTYPES, orientation=["all"]),
expand(expand(OPJ(
output_dir, "{{trimmer}}", "figures", aligner, "{lib}_{rep}", "adapt_on_C_elegans_last_bases.pdf"), filtered_product, lib=LIBS, rep=REPS), trimmer=TRIMMERS),
output_dir, "{{trimmer}}", "figures", aligner, "{lib}_{rep}",
"adapt_on_C_elegans_last_bases.pdf"), filtered_product, lib=LIBS, rep=REPS),
trimmer=TRIMMERS),
expand(expand(OPJ(
output_dir, "{{trimmer}}", aligner, "mapped_C_elegans", "{lib}_{rep}_on_C_elegans_by_{{norm_type}}_{{orientation}}.bw"), filtered_product, lib=LIBS, rep=REPS), trimmer=TRIMMERS, norm_type=NORM_TYPES, orientation=["all"]),
output_dir, "{{trimmer}}", aligner, "mapped_C_elegans",
"{lib}_{rep}_on_C_elegans_by_{{norm_type}}_{{orientation}}.bw"), filtered_product, lib=LIBS, rep=REPS),
trimmer=TRIMMERS, norm_type=NORM_TYPES, orientation=["all"]),
#expand(OPJ(output_dir, "{trimmer}", "figures", aligner, "{lib}_mean", "{orientation}_on_merged_isolated_%d_{biotype}_min_%d_meta_profile.{fig_format}" % (MIN_DIST, META_MIN_LEN)), trimmer=TRIMMERS, lib=LIBS, orientation=["all"], biotype=["protein_coding"], fig_format=FIG_FORMATS),
#expand(OPJ(output_dir, "{trimmer}", "figures", aligner, "{lib}_mean", "{orientation}_on_merged_isolated_%d_{biotype}_min_%d_meta_profile.{fig_format}" % (MIN_DIST, META_MIN_LEN)), trimmer=TRIMMERS, lib=LIBS, orientation=["all"], biotype=METAGENE_BIOTYPES, fig_format=FIG_FORMATS),
expand(OPJ(
output_dir, "{trimmer}", "figures", aligner, "{lib}_by_{norm_type}_mean", "{orientation}_on_merged_isolated_%d_{biotype}_min_%d_meta_profile.{fig_format}" % (MIN_DIST, META_MIN_LEN)), trimmer=TRIMMERS, lib=LIBS, norm_type=NORM_TYPES, orientation=["all"], biotype=METAGENE_BIOTYPES, fig_format=FIG_FORMATS),
output_dir, "{trimmer}", "figures", aligner, "{lib}_by_{norm_type}_mean",
"{orientation}_on_merged_isolated_%d_{biotype}_min_%d_meta_profile.{fig_format}" % (MIN_DIST, META_MIN_LEN)),
trimmer=TRIMMERS, lib=LIBS, norm_type=NORM_TYPES, orientation=["all"],
biotype=METAGENE_BIOTYPES, fig_format=FIG_FORMATS),
include: ensure_relative(irules["link_raw_data"], workflow.basedir)
......@@ -368,10 +385,6 @@ rule sam2indexedbam:
err = OPJ(log_dir, "{trimmer}", "sam2indexedbam", "{lib}_{rep}_{type}.err"),
threads:
4
# shell:
# """
# nice -n 19 ionice -c2 -n7 sam2indexedbam.sh {input.sam} 1> {log.log} 2> {log.err}
# """
wrapper:
"file:///pasteur/homes/bli/src/bioinfo_utils/snakemake_wrappers/sam2indexedbam"
......@@ -530,14 +543,22 @@ def parse_htseq_counts(counts_filename):
rule feature_count_reads:
input:
sorted_bam = OPJ(output_dir, "{trimmer}", aligner, "mapped_C_elegans", "{lib}_{rep}_on_C_elegans_sorted.bam"),
bai = OPJ(output_dir, "{trimmer}", aligner, "mapped_C_elegans", "{lib}_{rep}_on_C_elegans_sorted.bam.bai"),
sorted_bam = OPJ(
output_dir, "{trimmer}", aligner, "mapped_C_elegans",
"{lib}_{rep}_on_C_elegans_sorted.bam"),
bai = OPJ(
output_dir, "{trimmer}", aligner, "mapped_C_elegans",
"{lib}_{rep}_on_C_elegans_sorted.bam.bai"),
# TODO: Why does the following fail?
#sorted_bam = rules.fuse_bams.output.sorted_bam,
#index = rules.fuse_bams.output.index,
output:
counts = OPJ(output_dir, "{trimmer}", aligner, "mapped_C_elegans", "feature_count", "{lib}_{rep}_on_C_elegans", "{biotype}_{orientation}_counts.txt"),
counts_converted = OPJ(output_dir, "{trimmer}", aligner, "mapped_C_elegans", "feature_count", "{lib}_{rep}_on_C_elegans", "{biotype}_{orientation}_counts_gene_names.txt"),
counts = OPJ(
output_dir, "{trimmer}", aligner, "mapped_C_elegans", "feature_count",
"{lib}_{rep}_on_C_elegans", "{biotype}_{orientation}_counts.txt"),
counts_converted = OPJ(
output_dir, "{trimmer}", aligner, "mapped_C_elegans", "feature_count",
"{lib}_{rep}_on_C_elegans", "{biotype}_{orientation}_counts_gene_names.txt"),
params:
stranded = feature_orientation2stranded(LIB_TYPE),
annot = biotype2annot,
......
......@@ -231,7 +231,17 @@ REPS = config["replicates"]
#CONTRASTS = ["%s_vs_%s" % cond_pair for cond_pair in COND_PAIRS]
#CONTRAST2PAIR = dict(zip(CONTRASTS, COND_PAIRS))
DE_COND_PAIRS = config["de_cond_pairs"]
msg = "\n".join([
"Some contrats do not use known library names.",
"Contrasts:"
", ".join([f"({cond}, {ref})" for (cond, ref) in DE_COND_PAIRS])])
assert all([cond in LIBS and ref in LIBS for (cond, ref) in DE_COND_PAIRS]), msg
IP_COND_PAIRS = config["ip_cond_pairs"]
msg = "\n".join([
"Some contrats do not use known library names.",
"Contrasts:"
", ".join([f"({cond}, {ref})" for (cond, ref) in IP_COND_PAIRS])])
assert all([cond in LIBS and ref in LIBS for (cond, ref) in IP_COND_PAIRS]), ""
COND_PAIRS = DE_COND_PAIRS + IP_COND_PAIRS
DE_CONTRASTS = [f"{cond1}_vs_{cond2}" for [cond1, cond2] in DE_COND_PAIRS]
IP_CONTRASTS = [f"{cond1}_vs_{cond2}" for [cond1, cond2] in IP_COND_PAIRS]
......@@ -290,6 +300,7 @@ STRUCTURAL_BIOTYPES = ["tRNA", "snRNA", "snoRNA", "rRNA", "ncRNA"]
GENE_LISTS = config["gene_lists"]
BOXPLOT_GENE_LISTS = [
"all_genes",
"replication_dependent_octamer_histone",
"piRNA_dependent_prot_si_down4",
"csr1_prot_si_supertargets_48hph",
"spermatogenic_Ortiz_2014", "oogenic_Ortiz_2014"]
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment