From 834736f7c46b1df3c1f77059cdc06a2581a85f54 Mon Sep 17 00:00:00 2001
From: Blaise Li <blaise.li__git@nsup.org>
Date: Mon, 30 Apr 2018 16:50:06 +0200
Subject: [PATCH] No more handling of missing combinations.
---
Ribo-seq/Ribo-seq.snakefile | 109 ++++++++++++------------------------
1 file changed, 36 insertions(+), 73 deletions(-)
diff --git a/Ribo-seq/Ribo-seq.snakefile b/Ribo-seq/Ribo-seq.snakefile
index 8591dc5..3ac81ec 100644
--- a/Ribo-seq/Ribo-seq.snakefile
+++ b/Ribo-seq/Ribo-seq.snakefile
@@ -136,7 +136,7 @@ from libhts import plot_paired_scatters, plot_norm_correlations, plot_counts_dis
from libworkflows import texscape, ensure_relative, cleanup_and_backup
from libworkflows import get_chrom_sizes, column_converter, make_id_list_getter
from libworkflows import read_int_from_file, strip_split, file_len, last_lines, save_plot, SHELL_FUNCTIONS
-from libworkflows import filter_combinator, sum_feature_counts, sum_htseq_counts, warn_context
+from libworkflows import sum_feature_counts, sum_htseq_counts, warn_context
from smincludes import rules as irules
strip = str.strip
@@ -348,8 +348,6 @@ def sum_te_counts(fname):
return int(result.strip().split()[0])
-filtered_product = filter_combinator(product, forbidden)
-
# Limit risks of ambiguity by imposing replicates to be numbers
# and restricting possible forms of some other wildcards
wildcard_constraints:
@@ -384,23 +382,14 @@ shell.prefix(SHELL_FUNCTIONS)
bigwig_files = [
# individual libraries
expand(
- expand(
- OPJ(output_dir, aligner, f"mapped_{genome}", "{lib}_{rep}",
- "{lib}_{rep}_{{read_type}}_on_%s_by_{{norm}}_{{orientation}}.bw" % genome),
- filtered_product, lib=LIBS, rep=REPS),
- read_type=READ_TYPES_FOR_MAPPING, norm=SIZE_FACTORS, orientation=["all"]),
- #expand(
- # expand(
- # OPJ(output_dir, aligner, f"mapped_{genome}", "{lib}_{rep}", "{lib}_{rep}_{{read_type}}_on_%s_{{orientation}}.bw" % genome),
- # filtered_product, lib=LIBS, rep=REPS),
- # read_type=READ_TYPES_FOR_MAPPING, orientation=ORIENTATIONS),
+ OPJ(output_dir, aligner, f"mapped_{genome}", "{lib}_{rep}",
+ "{lib}_{rep}_{read_type}_on_%s_by_{norm}_{orientation}.bw" % genome),
+ lib=LIBS, rep=REPS, read_type=READ_TYPES_FOR_MAPPING, norm=SIZE_FACTORS, orientation=["all"]),
# means of replicates
expand(
- expand(
- OPJ(output_dir, aligner, f"mapped_{genome}", "{lib}_mean",
- "{lib}_mean_{{read_type}}_on_%s_by_{{norm}}_{{orientation}}.bw" % genome),
- filtered_product, lib=LIBS),
- read_type=READ_TYPES_FOR_MAPPING, norm=SIZE_FACTORS, orientation=["all"]),
+ OPJ(output_dir, aligner, f"mapped_{genome}", "{lib}_mean",
+ "{lib}_mean_{read_type}_on_%s_by_{norm}_{orientation}.bw" % genome),
+ lib=LIBS, read_type=READ_TYPES_FOR_MAPPING, norm=SIZE_FACTORS, orientation=["all"]),
]
meta_profiles = [
@@ -436,38 +425,26 @@ meta_profiles = [
read_graphs = [
expand(
- expand(
- OPJ(output_dir, "figures", "{{lib}}_{{rep}}", "{read_type}_base_composition_from_{position}.{fig_format}"),
- read_type=READ_TYPES_FOR_COMPOSITION, position=["start", "end"], fig_format=FIG_FORMATS),
- filtered_product, lib=LIBS, rep=REPS),
+ OPJ(output_dir, "figures", "{lib}_{rep}", "{read_type}_base_composition_from_{position}.{fig_format}"),
+ lib=LIBS, rep=REPS, read_type=READ_TYPES_FOR_COMPOSITION, position=["start", "end"], fig_format=FIG_FORMATS),
expand(
- expand(
- OPJ(output_dir, "figures", "{{lib}}_{{rep}}", "{read_type}_base_logo_from_{position}.{fig_format}"),
- read_type=READ_TYPES_FOR_COMPOSITION, position=["start", "end"], fig_format=FIG_FORMATS),
- filtered_product, lib=LIBS, rep=REPS),
+ OPJ(output_dir, "figures", "{lib}_{rep}", "{read_type}_base_logo_from_{position}.{fig_format}"),
+ lib=LIBS, rep=REPS, read_type=READ_TYPES_FOR_COMPOSITION, position=["start", "end"], fig_format=FIG_FORMATS),
expand(
- expand(
- OPJ(output_dir, "figures", "{{lib}}_{{rep}}", "{read_type}_{position}_base_composition.{fig_format}"),
- read_type=READ_TYPES_FOR_COMPOSITION, position=POSITIONS, fig_format=FIG_FORMATS),
- filtered_product, lib=LIBS, rep=REPS),
+ OPJ(output_dir, "figures", "{lib}_{rep}", "{read_type}_{position}_base_composition.{fig_format}"),
+ lib=LIBS, rep=REPS, read_type=READ_TYPES_FOR_COMPOSITION, position=POSITIONS, fig_format=FIG_FORMATS),
expand(
- expand(
- OPJ(output_dir, "figures", "{{lib}}_{{rep}}", "{read_type}_{position}_base_logo.{fig_format}"),
- read_type=READ_TYPES_FOR_COMPOSITION, position=POSITIONS, fig_format=FIG_FORMATS),
- filtered_product, lib=LIBS, rep=REPS),
+ OPJ(output_dir, "figures", "{lib}_{rep}", "{read_type}_{position}_base_logo.{fig_format}"),
+ lib=LIBS, rep=REPS, read_type=READ_TYPES_FOR_COMPOSITION, position=POSITIONS, fig_format=FIG_FORMATS),
expand(
- expand(
- OPJ(output_dir, "figures", "{{lib}}_{{rep}}", "{read_type}_size_distribution.{fig_format}"),
- read_type=["trimmed", "nomap"], fig_format=FIG_FORMATS),
- filtered_product, lib=LIBS, rep=REPS),
+ OPJ(output_dir, "figures", "{lib}_{rep}", "{read_type}_size_distribution.{fig_format}"),
+ lib=LIBS, rep=REPS, read_type=["trimmed", "nomap"], fig_format=FIG_FORMATS),
expand(
- expand(
- OPJ(output_dir, "figures", "{{lib}}_{{rep}}", f"{size_selected}_smallRNA_barchart.{{fig_format}}"),
- fig_format=FIG_FORMATS), filtered_product, lib=LIBS, rep=REPS),
+ OPJ(output_dir, "figures", "{lib}_{rep}", f"{size_selected}_smallRNA_barchart.{{fig_format}}"),
+ lib=LIBS, rep=REPS, fig_format=FIG_FORMATS),
expand(
- expand(
- OPJ(output_dir, "figures", "{{lib}}_{{rep}}", "nb_reads.{fig_format}"),
- fig_format=FIG_FORMATS), filtered_product, lib=LIBS, rep=REPS),
+ OPJ(output_dir, "figures", "{lib}_{rep}", "nb_reads.{fig_format}"),
+ lib=LIBS, rep=REPS, fig_format=FIG_FORMATS),
]
if contrasts_dict["ip"]:
@@ -527,25 +504,31 @@ rule all:
input:
expand(
OPJ(output_dir, aligner, f"mapped_{genome}", "{lib}_{rep}", "{read_type}_on_%s_nb_mapped.txt" % genome),
- filtered_product, lib=LIBS, rep=REPS, read_type=[size_selected]),
+ lib=LIBS, rep=REPS, read_type=[size_selected]),
expand(
OPJ(output_dir, aligner, f"mapped_{genome}", "{lib}_{rep}", "{read_type}_on_%s_coverage.txt" % genome),
- filtered_product, lib=LIBS, rep=REPS, read_type=[size_selected]),
+ lib=LIBS, rep=REPS, read_type=[size_selected]),
expand(
OPJ(output_dir, "figures", "{lib}_{rep}", "nb_reads.{fig_format}"),
- filtered_product, lib=LIBS, rep=REPS, fig_format=FIG_FORMATS),
+ lib=LIBS, rep=REPS, fig_format=FIG_FORMATS),
expand(
- expand(
- OPJ(output_dir, aligner, f"mapped_{genome}", "feature_count", "summaries",
- "{{lib}}_{{rep}}_%s_on_%s_{orientation}_counts.txt" % (size_selected, genome)),
- orientation=ORIENTATIONS),
- filtered_product, lib=LIBS, rep=REPS),
+ OPJ(output_dir, aligner, f"mapped_{genome}", "feature_count", "summaries",
+ "{lib}_{rep}_{read_type}_on_%s_{orientation}_counts.txt" % genome),
+ lib=LIBS, rep=REPS, read_type=["RPF"], orientation=ORIENTATIONS),
+ # TODO: adapt from RNA-seq
+ # expand(
+ # OPJ(output_dir, aligner, f"mapped_{genome}", "feature_count", f"all_on_{genome}", "{biotype}_{orientation}_counts.txt"),
+ # lib=LIBS, rep=REPS),
+ # expand(
+ # expand(
+ # OPJ(output_dir, aligner, f"mapped_{genome}", "feature_count",
+ # f"all_on_{genome}", "{biotype}_{orientation}_TPM.txt"),
+ # biotype=["alltypes"], orientation=ORIENTATIONS),
bigwig_files,
# rule future_all:
# meta_profiles,
# read_graphs,
-# #expand(OPJ(output_dir, aligner, f"mapped_{genome}", "feature_count", "summaries", "{lib}_{rep}_nb_non_structural.txt"), filtered_product, lib=LIBS, rep=REPS),
# OPJ(output_dir, aligner, f"mapped_{genome}", f"RPM_folds_{size_selected}", "all", "pisimi_mean_log2_RPM_fold.txt"),
# expand(OPJ(output_dir, aligner, f"mapped_{genome}", f"deseq2_{size_selected}", "all", "pisimi_{fold_type}.txt"), fold_type=["log2FoldChange"]),
# #expand(OPJ(output_dir, aligner, f"mapped_{genome}", "RPM_folds_%s" % size_selected, "{contrast}", "{contrast}_{small_type}_RPM_folds.txt"), contrast=IP_CONTRASTS, small_type=DE_TYPES),
@@ -1056,26 +1039,6 @@ rule gather_read_counts_summaries:
summaries.to_csv(output.summary_table, sep="\t")
-# Moved to script compute_genes_exon_lengths.py
-#rule compute_genes_exon_lengths:
-# """To each gene ID, associate a length obtained by taking the union of all exons belonging to transcripts of this gene."""
-# input:
-# genes_gtf = OPJ(annot_dir, "genes.gtf"),
-# dte_bed = OPJ(annot_dir, "DNA_transposons_rmsk.bed"),
-# rte_bed = OPJ(annot_dir, "RNA_transposons_rmsk.bed"),
-# satel_bed = OPJ(annot_dir, "satellites_rmsk.bed"),
-# simrep_bed = OPJ(annot_dir, "simple_repeats_rmsk.bed"),
-#
-# output:
-# exon_lengths = OPJ(annot_dir, "union_exon_lengths.txt"),
-# run:
-# pd.concat((
-# gtf_2_genes_exon_lengths(input.genes_gtf),
-# repeat_bed_2_lengths(input.dte_bed),
-# repeat_bed_2_lengths(input.rte_bed),
-# repeat_bed_2_lengths(input.satel_bed),
-# repeat_bed_2_lengths(input.simrep_bed))).to_csv(output.exon_lengths, sep="\t")
-
# TODO: use total RPF instead of non_structural to normalize
rule compute_RPM:
input:
--
GitLab