No more handling of missing combinations.

834736f7 · Blaise Li · 7803b7a5 · 834736f7
Commit 834736f7 authored 7 years ago by Blaise Li
--- a/Ribo-seq/Ribo-seq.snakefile
+++ b/Ribo-seq/Ribo-seq.snakefile
@@ -136,7 +136,7 @@ from libhts import plot_paired_scatters, plot_norm_correlations, plot_counts_dis
 from libworkflows import texscape, ensure_relative, cleanup_and_backup
 from libworkflows import get_chrom_sizes, column_converter, make_id_list_getter
 from libworkflows import read_int_from_file, strip_split, file_len, last_lines, save_plot, SHELL_FUNCTIONS
-from libworkflows import filter_combinator, sum_feature_counts, sum_htseq_counts, warn_context
+from libworkflows import sum_feature_counts, sum_htseq_counts, warn_context
 from smincludes import rules as irules
 strip = str.strip

@@ -348,8 +348,6 @@ def sum_te_counts(fname):
    return int(result.strip().split()[0])


-filtered_product = filter_combinator(product, forbidden)
-
 # Limit risks of ambiguity by imposing replicates to be numbers
 # and restricting possible forms of some other wildcards
 wildcard_constraints:
@@ -384,23 +382,14 @@ shell.prefix(SHELL_FUNCTIONS)
 bigwig_files = [
    # individual libraries
    expand(
-        expand(
-            OPJ(output_dir, aligner, f"mapped_{genome}", "{lib}_{rep}",
-                "{lib}_{rep}_{{read_type}}_on_%s_by_{{norm}}_{{orientation}}.bw" % genome),
-            filtered_product, lib=LIBS, rep=REPS),
-        read_type=READ_TYPES_FOR_MAPPING, norm=SIZE_FACTORS, orientation=["all"]),
-    #expand(
-    #    expand(
-    #        OPJ(output_dir, aligner, f"mapped_{genome}", "{lib}_{rep}", "{lib}_{rep}_{{read_type}}_on_%s_{{orientation}}.bw" % genome),
-    #            filtered_product, lib=LIBS, rep=REPS),
-    #    read_type=READ_TYPES_FOR_MAPPING, orientation=ORIENTATIONS),
+        OPJ(output_dir, aligner, f"mapped_{genome}", "{lib}_{rep}",
+            "{lib}_{rep}_{read_type}_on_%s_by_{norm}_{orientation}.bw" % genome),
+        lib=LIBS, rep=REPS, read_type=READ_TYPES_FOR_MAPPING, norm=SIZE_FACTORS, orientation=["all"]),
    # means of replicates
    expand(
-        expand(
-            OPJ(output_dir, aligner, f"mapped_{genome}", "{lib}_mean",
-                "{lib}_mean_{{read_type}}_on_%s_by_{{norm}}_{{orientation}}.bw" % genome),
-            filtered_product, lib=LIBS),
-        read_type=READ_TYPES_FOR_MAPPING, norm=SIZE_FACTORS, orientation=["all"]),
+        OPJ(output_dir, aligner, f"mapped_{genome}", "{lib}_mean",
+            "{lib}_mean_{read_type}_on_%s_by_{norm}_{orientation}.bw" % genome),
+        lib=LIBS, read_type=READ_TYPES_FOR_MAPPING, norm=SIZE_FACTORS, orientation=["all"]),
    ]

 meta_profiles = [
@@ -436,38 +425,26 @@ meta_profiles = [

 read_graphs = [
    expand(
-        expand(
-            OPJ(output_dir, "figures", "{{lib}}_{{rep}}", "{read_type}_base_composition_from_{position}.{fig_format}"),
-            read_type=READ_TYPES_FOR_COMPOSITION, position=["start", "end"], fig_format=FIG_FORMATS),
-        filtered_product, lib=LIBS, rep=REPS),
+        OPJ(output_dir, "figures", "{lib}_{rep}", "{read_type}_base_composition_from_{position}.{fig_format}"),
+        lib=LIBS, rep=REPS, read_type=READ_TYPES_FOR_COMPOSITION, position=["start", "end"], fig_format=FIG_FORMATS),
    expand(
-        expand(
-            OPJ(output_dir, "figures", "{{lib}}_{{rep}}", "{read_type}_base_logo_from_{position}.{fig_format}"),
-            read_type=READ_TYPES_FOR_COMPOSITION, position=["start", "end"], fig_format=FIG_FORMATS),
-        filtered_product, lib=LIBS, rep=REPS),
+        OPJ(output_dir, "figures", "{lib}_{rep}", "{read_type}_base_logo_from_{position}.{fig_format}"),
+        lib=LIBS, rep=REPS, read_type=READ_TYPES_FOR_COMPOSITION, position=["start", "end"], fig_format=FIG_FORMATS),
    expand(
-        expand(
-            OPJ(output_dir, "figures", "{{lib}}_{{rep}}", "{read_type}_{position}_base_composition.{fig_format}"),
-            read_type=READ_TYPES_FOR_COMPOSITION, position=POSITIONS, fig_format=FIG_FORMATS),
-        filtered_product, lib=LIBS, rep=REPS),
+        OPJ(output_dir, "figures", "{lib}_{rep}", "{read_type}_{position}_base_composition.{fig_format}"),
+        lib=LIBS, rep=REPS, read_type=READ_TYPES_FOR_COMPOSITION, position=POSITIONS, fig_format=FIG_FORMATS),
    expand(
-        expand(
-            OPJ(output_dir, "figures", "{{lib}}_{{rep}}", "{read_type}_{position}_base_logo.{fig_format}"),
-            read_type=READ_TYPES_FOR_COMPOSITION, position=POSITIONS, fig_format=FIG_FORMATS),
-        filtered_product, lib=LIBS, rep=REPS),
+        OPJ(output_dir, "figures", "{lib}_{rep}", "{read_type}_{position}_base_logo.{fig_format}"),
+        lib=LIBS, rep=REPS, read_type=READ_TYPES_FOR_COMPOSITION, position=POSITIONS, fig_format=FIG_FORMATS),
    expand(
-        expand(
-            OPJ(output_dir, "figures", "{{lib}}_{{rep}}", "{read_type}_size_distribution.{fig_format}"),
-            read_type=["trimmed", "nomap"], fig_format=FIG_FORMATS),
-        filtered_product, lib=LIBS, rep=REPS),
+        OPJ(output_dir, "figures", "{lib}_{rep}", "{read_type}_size_distribution.{fig_format}"),
+        lib=LIBS, rep=REPS, read_type=["trimmed", "nomap"], fig_format=FIG_FORMATS),
    expand(
-        expand(
-            OPJ(output_dir, "figures", "{{lib}}_{{rep}}", f"{size_selected}_smallRNA_barchart.{{fig_format}}"),
-            fig_format=FIG_FORMATS), filtered_product, lib=LIBS, rep=REPS),
+        OPJ(output_dir, "figures", "{lib}_{rep}", f"{size_selected}_smallRNA_barchart.{{fig_format}}"),
+        lib=LIBS, rep=REPS, fig_format=FIG_FORMATS),
    expand(
-        expand(
-            OPJ(output_dir, "figures", "{{lib}}_{{rep}}", "nb_reads.{fig_format}"),
-            fig_format=FIG_FORMATS), filtered_product, lib=LIBS, rep=REPS),
+        OPJ(output_dir, "figures", "{lib}_{rep}", "nb_reads.{fig_format}"),
+        lib=LIBS, rep=REPS, fig_format=FIG_FORMATS),
    ]

 if contrasts_dict["ip"]:
@@ -527,25 +504,31 @@ rule all:
    input:
        expand(
            OPJ(output_dir, aligner, f"mapped_{genome}", "{lib}_{rep}", "{read_type}_on_%s_nb_mapped.txt" % genome),
-            filtered_product, lib=LIBS, rep=REPS, read_type=[size_selected]),
+            lib=LIBS, rep=REPS, read_type=[size_selected]),
        expand(
            OPJ(output_dir, aligner, f"mapped_{genome}", "{lib}_{rep}", "{read_type}_on_%s_coverage.txt" % genome),
-            filtered_product, lib=LIBS, rep=REPS, read_type=[size_selected]),
+            lib=LIBS, rep=REPS, read_type=[size_selected]),
        expand(
            OPJ(output_dir, "figures", "{lib}_{rep}", "nb_reads.{fig_format}"),
-            filtered_product, lib=LIBS, rep=REPS, fig_format=FIG_FORMATS),
+            lib=LIBS, rep=REPS, fig_format=FIG_FORMATS),
        expand(
-            expand(
-                OPJ(output_dir, aligner, f"mapped_{genome}", "feature_count", "summaries",
-                    "{{lib}}_{{rep}}_%s_on_%s_{orientation}_counts.txt" % (size_selected, genome)),
-                orientation=ORIENTATIONS),
-            filtered_product, lib=LIBS, rep=REPS),
+            OPJ(output_dir, aligner, f"mapped_{genome}", "feature_count", "summaries",
+                "{lib}_{rep}_{read_type}_on_%s_{orientation}_counts.txt" % genome),
+            lib=LIBS, rep=REPS, read_type=["RPF"], orientation=ORIENTATIONS),
+        # TODO: adapt from RNA-seq
+        # expand(
+        #     OPJ(output_dir, aligner, f"mapped_{genome}", "feature_count", f"all_on_{genome}", "{biotype}_{orientation}_counts.txt"),
+        #     lib=LIBS, rep=REPS),
+        # expand(
+        #     expand(
+        #         OPJ(output_dir, aligner, f"mapped_{genome}", "feature_count",
+        #             f"all_on_{genome}", "{biotype}_{orientation}_TPM.txt"),
+        #         biotype=["alltypes"], orientation=ORIENTATIONS),
        bigwig_files,

 # rule future_all:
 #         meta_profiles,
 #         read_graphs,
-#         #expand(OPJ(output_dir, aligner, f"mapped_{genome}", "feature_count", "summaries", "{lib}_{rep}_nb_non_structural.txt"), filtered_product, lib=LIBS, rep=REPS),
 #         OPJ(output_dir, aligner, f"mapped_{genome}", f"RPM_folds_{size_selected}", "all", "pisimi_mean_log2_RPM_fold.txt"),
 #         expand(OPJ(output_dir, aligner, f"mapped_{genome}", f"deseq2_{size_selected}", "all", "pisimi_{fold_type}.txt"), fold_type=["log2FoldChange"]),
 #         #expand(OPJ(output_dir, aligner, f"mapped_{genome}", "RPM_folds_%s" % size_selected, "{contrast}", "{contrast}_{small_type}_RPM_folds.txt"), contrast=IP_CONTRASTS, small_type=DE_TYPES),
@@ -1056,26 +1039,6 @@ rule gather_read_counts_summaries:
        summaries.to_csv(output.summary_table, sep="\t")


-# Moved to script compute_genes_exon_lengths.py
-#rule compute_genes_exon_lengths:
-#    """To each gene ID, associate a length obtained by taking the union of all exons belonging to transcripts of this gene."""
-#    input:
-#        genes_gtf = OPJ(annot_dir, "genes.gtf"),
-#        dte_bed = OPJ(annot_dir, "DNA_transposons_rmsk.bed"),
-#        rte_bed = OPJ(annot_dir, "RNA_transposons_rmsk.bed"),
-#        satel_bed = OPJ(annot_dir, "satellites_rmsk.bed"),
-#        simrep_bed = OPJ(annot_dir, "simple_repeats_rmsk.bed"),
-#
-#    output:
-#        exon_lengths = OPJ(annot_dir, "union_exon_lengths.txt"),
-#    run:
-#        pd.concat((
-#            gtf_2_genes_exon_lengths(input.genes_gtf),
-#            repeat_bed_2_lengths(input.dte_bed),
-#            repeat_bed_2_lengths(input.rte_bed),
-#            repeat_bed_2_lengths(input.satel_bed),
-#            repeat_bed_2_lengths(input.simrep_bed))).to_csv(output.exon_lengths, sep="\t")       
-
 # TODO: use total RPF instead of non_structural to normalize
 rule compute_RPM:
    input: