Skip to content
Snippets Groups Projects
Commit 834736f7 authored by Blaise Li's avatar Blaise Li
Browse files

No more handling of missing combinations.

parent 7803b7a5
No related branches found
No related tags found
No related merge requests found
......@@ -136,7 +136,7 @@ from libhts import plot_paired_scatters, plot_norm_correlations, plot_counts_dis
from libworkflows import texscape, ensure_relative, cleanup_and_backup
from libworkflows import get_chrom_sizes, column_converter, make_id_list_getter
from libworkflows import read_int_from_file, strip_split, file_len, last_lines, save_plot, SHELL_FUNCTIONS
from libworkflows import filter_combinator, sum_feature_counts, sum_htseq_counts, warn_context
from libworkflows import sum_feature_counts, sum_htseq_counts, warn_context
from smincludes import rules as irules
strip = str.strip
......@@ -348,8 +348,6 @@ def sum_te_counts(fname):
return int(result.strip().split()[0])
filtered_product = filter_combinator(product, forbidden)
# Limit risks of ambiguity by imposing replicates to be numbers
# and restricting possible forms of some other wildcards
wildcard_constraints:
......@@ -384,23 +382,14 @@ shell.prefix(SHELL_FUNCTIONS)
bigwig_files = [
# individual libraries
expand(
expand(
OPJ(output_dir, aligner, f"mapped_{genome}", "{lib}_{rep}",
"{lib}_{rep}_{{read_type}}_on_%s_by_{{norm}}_{{orientation}}.bw" % genome),
filtered_product, lib=LIBS, rep=REPS),
read_type=READ_TYPES_FOR_MAPPING, norm=SIZE_FACTORS, orientation=["all"]),
#expand(
# expand(
# OPJ(output_dir, aligner, f"mapped_{genome}", "{lib}_{rep}", "{lib}_{rep}_{{read_type}}_on_%s_{{orientation}}.bw" % genome),
# filtered_product, lib=LIBS, rep=REPS),
# read_type=READ_TYPES_FOR_MAPPING, orientation=ORIENTATIONS),
OPJ(output_dir, aligner, f"mapped_{genome}", "{lib}_{rep}",
"{lib}_{rep}_{read_type}_on_%s_by_{norm}_{orientation}.bw" % genome),
lib=LIBS, rep=REPS, read_type=READ_TYPES_FOR_MAPPING, norm=SIZE_FACTORS, orientation=["all"]),
# means of replicates
expand(
expand(
OPJ(output_dir, aligner, f"mapped_{genome}", "{lib}_mean",
"{lib}_mean_{{read_type}}_on_%s_by_{{norm}}_{{orientation}}.bw" % genome),
filtered_product, lib=LIBS),
read_type=READ_TYPES_FOR_MAPPING, norm=SIZE_FACTORS, orientation=["all"]),
OPJ(output_dir, aligner, f"mapped_{genome}", "{lib}_mean",
"{lib}_mean_{read_type}_on_%s_by_{norm}_{orientation}.bw" % genome),
lib=LIBS, read_type=READ_TYPES_FOR_MAPPING, norm=SIZE_FACTORS, orientation=["all"]),
]
meta_profiles = [
......@@ -436,38 +425,26 @@ meta_profiles = [
read_graphs = [
expand(
expand(
OPJ(output_dir, "figures", "{{lib}}_{{rep}}", "{read_type}_base_composition_from_{position}.{fig_format}"),
read_type=READ_TYPES_FOR_COMPOSITION, position=["start", "end"], fig_format=FIG_FORMATS),
filtered_product, lib=LIBS, rep=REPS),
OPJ(output_dir, "figures", "{lib}_{rep}", "{read_type}_base_composition_from_{position}.{fig_format}"),
lib=LIBS, rep=REPS, read_type=READ_TYPES_FOR_COMPOSITION, position=["start", "end"], fig_format=FIG_FORMATS),
expand(
expand(
OPJ(output_dir, "figures", "{{lib}}_{{rep}}", "{read_type}_base_logo_from_{position}.{fig_format}"),
read_type=READ_TYPES_FOR_COMPOSITION, position=["start", "end"], fig_format=FIG_FORMATS),
filtered_product, lib=LIBS, rep=REPS),
OPJ(output_dir, "figures", "{lib}_{rep}", "{read_type}_base_logo_from_{position}.{fig_format}"),
lib=LIBS, rep=REPS, read_type=READ_TYPES_FOR_COMPOSITION, position=["start", "end"], fig_format=FIG_FORMATS),
expand(
expand(
OPJ(output_dir, "figures", "{{lib}}_{{rep}}", "{read_type}_{position}_base_composition.{fig_format}"),
read_type=READ_TYPES_FOR_COMPOSITION, position=POSITIONS, fig_format=FIG_FORMATS),
filtered_product, lib=LIBS, rep=REPS),
OPJ(output_dir, "figures", "{lib}_{rep}", "{read_type}_{position}_base_composition.{fig_format}"),
lib=LIBS, rep=REPS, read_type=READ_TYPES_FOR_COMPOSITION, position=POSITIONS, fig_format=FIG_FORMATS),
expand(
expand(
OPJ(output_dir, "figures", "{{lib}}_{{rep}}", "{read_type}_{position}_base_logo.{fig_format}"),
read_type=READ_TYPES_FOR_COMPOSITION, position=POSITIONS, fig_format=FIG_FORMATS),
filtered_product, lib=LIBS, rep=REPS),
OPJ(output_dir, "figures", "{lib}_{rep}", "{read_type}_{position}_base_logo.{fig_format}"),
lib=LIBS, rep=REPS, read_type=READ_TYPES_FOR_COMPOSITION, position=POSITIONS, fig_format=FIG_FORMATS),
expand(
expand(
OPJ(output_dir, "figures", "{{lib}}_{{rep}}", "{read_type}_size_distribution.{fig_format}"),
read_type=["trimmed", "nomap"], fig_format=FIG_FORMATS),
filtered_product, lib=LIBS, rep=REPS),
OPJ(output_dir, "figures", "{lib}_{rep}", "{read_type}_size_distribution.{fig_format}"),
lib=LIBS, rep=REPS, read_type=["trimmed", "nomap"], fig_format=FIG_FORMATS),
expand(
expand(
OPJ(output_dir, "figures", "{{lib}}_{{rep}}", f"{size_selected}_smallRNA_barchart.{{fig_format}}"),
fig_format=FIG_FORMATS), filtered_product, lib=LIBS, rep=REPS),
OPJ(output_dir, "figures", "{lib}_{rep}", f"{size_selected}_smallRNA_barchart.{{fig_format}}"),
lib=LIBS, rep=REPS, fig_format=FIG_FORMATS),
expand(
expand(
OPJ(output_dir, "figures", "{{lib}}_{{rep}}", "nb_reads.{fig_format}"),
fig_format=FIG_FORMATS), filtered_product, lib=LIBS, rep=REPS),
OPJ(output_dir, "figures", "{lib}_{rep}", "nb_reads.{fig_format}"),
lib=LIBS, rep=REPS, fig_format=FIG_FORMATS),
]
if contrasts_dict["ip"]:
......@@ -527,25 +504,31 @@ rule all:
input:
expand(
OPJ(output_dir, aligner, f"mapped_{genome}", "{lib}_{rep}", "{read_type}_on_%s_nb_mapped.txt" % genome),
filtered_product, lib=LIBS, rep=REPS, read_type=[size_selected]),
lib=LIBS, rep=REPS, read_type=[size_selected]),
expand(
OPJ(output_dir, aligner, f"mapped_{genome}", "{lib}_{rep}", "{read_type}_on_%s_coverage.txt" % genome),
filtered_product, lib=LIBS, rep=REPS, read_type=[size_selected]),
lib=LIBS, rep=REPS, read_type=[size_selected]),
expand(
OPJ(output_dir, "figures", "{lib}_{rep}", "nb_reads.{fig_format}"),
filtered_product, lib=LIBS, rep=REPS, fig_format=FIG_FORMATS),
lib=LIBS, rep=REPS, fig_format=FIG_FORMATS),
expand(
expand(
OPJ(output_dir, aligner, f"mapped_{genome}", "feature_count", "summaries",
"{{lib}}_{{rep}}_%s_on_%s_{orientation}_counts.txt" % (size_selected, genome)),
orientation=ORIENTATIONS),
filtered_product, lib=LIBS, rep=REPS),
OPJ(output_dir, aligner, f"mapped_{genome}", "feature_count", "summaries",
"{lib}_{rep}_{read_type}_on_%s_{orientation}_counts.txt" % genome),
lib=LIBS, rep=REPS, read_type=["RPF"], orientation=ORIENTATIONS),
# TODO: adapt from RNA-seq
# expand(
# OPJ(output_dir, aligner, f"mapped_{genome}", "feature_count", f"all_on_{genome}", "{biotype}_{orientation}_counts.txt"),
# lib=LIBS, rep=REPS),
# expand(
# expand(
# OPJ(output_dir, aligner, f"mapped_{genome}", "feature_count",
# f"all_on_{genome}", "{biotype}_{orientation}_TPM.txt"),
# biotype=["alltypes"], orientation=ORIENTATIONS),
bigwig_files,
# rule future_all:
# meta_profiles,
# read_graphs,
# #expand(OPJ(output_dir, aligner, f"mapped_{genome}", "feature_count", "summaries", "{lib}_{rep}_nb_non_structural.txt"), filtered_product, lib=LIBS, rep=REPS),
# OPJ(output_dir, aligner, f"mapped_{genome}", f"RPM_folds_{size_selected}", "all", "pisimi_mean_log2_RPM_fold.txt"),
# expand(OPJ(output_dir, aligner, f"mapped_{genome}", f"deseq2_{size_selected}", "all", "pisimi_{fold_type}.txt"), fold_type=["log2FoldChange"]),
# #expand(OPJ(output_dir, aligner, f"mapped_{genome}", "RPM_folds_%s" % size_selected, "{contrast}", "{contrast}_{small_type}_RPM_folds.txt"), contrast=IP_CONTRASTS, small_type=DE_TYPES),
......@@ -1056,26 +1039,6 @@ rule gather_read_counts_summaries:
summaries.to_csv(output.summary_table, sep="\t")
# Moved to script compute_genes_exon_lengths.py
#rule compute_genes_exon_lengths:
# """To each gene ID, associate a length obtained by taking the union of all exons belonging to transcripts of this gene."""
# input:
# genes_gtf = OPJ(annot_dir, "genes.gtf"),
# dte_bed = OPJ(annot_dir, "DNA_transposons_rmsk.bed"),
# rte_bed = OPJ(annot_dir, "RNA_transposons_rmsk.bed"),
# satel_bed = OPJ(annot_dir, "satellites_rmsk.bed"),
# simrep_bed = OPJ(annot_dir, "simple_repeats_rmsk.bed"),
#
# output:
# exon_lengths = OPJ(annot_dir, "union_exon_lengths.txt"),
# run:
# pd.concat((
# gtf_2_genes_exon_lengths(input.genes_gtf),
# repeat_bed_2_lengths(input.dte_bed),
# repeat_bed_2_lengths(input.rte_bed),
# repeat_bed_2_lengths(input.satel_bed),
# repeat_bed_2_lengths(input.simrep_bed))).to_csv(output.exon_lengths, sep="\t")
# TODO: use total RPF instead of non_structural to normalize
rule compute_RPM:
input:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment