Commit 4b292bbd authored by Blaise Li's avatar Blaise Li
Browse files

Fixing extra comma issue (causing tuple of dict).

parent 34d7191e
This diff is collapsed.
......@@ -51,11 +51,12 @@ from libworkflows import ensure_relative
from libworkflows import get_chrom_sizes, column_converter
from libworkflows import strip_split, last_lines, save_plot, test_na_file
from libworkflows import make_id_list_getter, filter_combinator, SHELL_FUNCTIONS, warn_context
from libworkflows import feature_orientation2stranded
from libworkflows import read_htseq_counts, sum_htseq_counts
from libworkflows import read_feature_counts, sum_feature_counts
from smincludes import rules as irules
alignment_settings = {"bowtie2": ""},
alignment_settings = {"bowtie2": ""}
#TRIMMERS = ["cutadapt", "fastx_clipper"]
TRIMMERS = ["cutadapt"]
......@@ -357,7 +358,7 @@ rule sam2indexedbam:
rule fuse_bams:
"""This rules fuses the two sorted bam files corresponding to the mapping
"""This rule fuses the two sorted bam files corresponding to the mapping
of the reads containing the adaptor or not."""
input:
noadapt_sorted_bam = OPJ(output_dir, "{trimmer}", aligner, "mapped_C_elegans", "{lib}_{rep}_noadapt_on_C_elegans_sorted.bam"),
......@@ -508,31 +509,6 @@ def parse_htseq_counts(counts_filename):
yield (gene, int(count))
# NOTE: It is not clear that this really does what I think it does:
# https://www.biostars.org/p/161534/
# https://www.biostars.org/p/170406/
def feature_orientation2stranded(wildcards):
orientation = wildcards.orientation
if orientation == "fwd":
if LIB_TYPE[-2:] == "SF":
return 1
elif LIB_TYPE[-2:] == "SR":
return 2
else:
raise ValueError(f"{LIB_TYPE} library type not compatible with strand-aware read counting.")
elif orientation == "rev":
if LIB_TYPE[-2:] == "SF":
return 2
elif LIB_TYPE[-2:] == "SR":
return 1
else:
raise ValueError(f"{LIB_TYPE} library type not compatible with strand-aware read counting.")
elif orientation == "all":
return 0
else:
exit("Orientation is to be among \"fwd\", \"rev\" and \"all\".")
rule feature_count_reads:
input:
sorted_bam = OPJ(output_dir, "{trimmer}", aligner, "mapped_C_elegans", "{lib}_{rep}_on_C_elegans_sorted.bam"),
......@@ -544,7 +520,7 @@ rule feature_count_reads:
counts = OPJ(output_dir, "{trimmer}", aligner, "mapped_C_elegans", "feature_count", "{lib}_{rep}_on_C_elegans", "{biotype}_{orientation}_counts.txt"),
counts_converted = OPJ(output_dir, "{trimmer}", aligner, "mapped_C_elegans", "feature_count", "{lib}_{rep}_on_C_elegans", "{biotype}_{orientation}_counts_gene_names.txt"),
params:
stranded = feature_orientation2stranded,
stranded = feature_orientation2stranded(LIB_TYPE),
annot = biotype2annot,
message:
"Counting {wildcards.orientation} {wildcards.biotype} reads for {wildcards.lib}_{wildcards.rep} with featureCounts."
......@@ -805,12 +781,12 @@ rule make_normalized_bigwig:
# orient_filter = bamcoverage_filter,
#threads: 12 # to limit memory usage, actually
benchmark:
OPJ(log_dir, "{trimmer}", "make_normalized_bigwig", "{lib}_{rep}_{orientation}_benchmark.txt")
OPJ(log_dir, "{trimmer}", "make_normalized_bigwig", "{lib}_{rep}_by_{norm_type}_{orientation}_benchmark.txt")
params:
genome_binned = genome_binned,
log:
log = OPJ(log_dir, "{trimmer}", "make_normalized_bigwig", "{lib}_{rep}_{orientation}.log"),
err = OPJ(log_dir, "{trimmer}", "make_normalized_bigwig", "{lib}_{rep}_{orientation}.err"),
log = OPJ(log_dir, "{trimmer}", "make_normalized_bigwig", "{lib}_{rep}_by_{norm_type}_{orientation}.log"),
err = OPJ(log_dir, "{trimmer}", "make_normalized_bigwig", "{lib}_{rep}_by_{norm_type}_{orientation}.err"),
shell:
"""
bam2bigwig.sh {input.bam} {params.genome_binned} \\
......
......@@ -8,6 +8,18 @@ major, minor = sys.version_info[:2]
if major < 3 or (major == 3 and minor < 6):
sys.exit("Need at least python 3.6\n")
# TODO: scatterplots log(IP_RPM) vs log(input_RPM) (or mutant vs WT), colouring with gene list
# TODO: heatmaps with rows (genes) ranked by expression in WT (mean reference level (wild types or inputs)), columns representing fold in mut vs WT
# Possibly remove the low expressed ones
# TODO: MA plot: fold vs "mean" count (to be defined)?
# TODO: extract most abundant piRNA reads (WT_48hph)
#bioawk -c fastx '{print $seq}' results_prg1_HS/bowtie2/mapped_C_elegans/reads/WT_RT_1_18-26_on_C_elegans/piRNA.fastq.gz | sort | uniq -c | sort -n | mawk '$1 >= 25 {print}' | wc -l
# -> 6857
# map them on the genome (without the piRNA loci), or on pseudo-genomes with specific features (histone genes...), tolerating a certain amount of mismatches (using bowtie1)
# Or: scan the genome (or "pseudo-genome") to find where there are matches with 0 to 3 mismatches at specific positions (13-21) (and perfect matches in the 5' zone)
# TODO: meta-profiles with IP and input on the same meta profile for Ortiz_oogenic and Ortiz_spermatogenic
# Make external script to generate meta-profiles on a custom list of libraries, and a given list of genes.
......@@ -129,7 +141,7 @@ from libworkflows import filter_combinator, sum_feature_counts, sum_htseq_counts
from smincludes import rules as irules
strip = str.strip
alignment_settings = {"bowtie2": "-L 6 -i S,1,0.8 -N 0"},
alignment_settings = {"bowtie2": "-L 6 -i S,1,0.8 -N 0"}
# Positions in small RNA sequences for which to analyse nucleotide distribution
#POSITIONS = ["first", "last"]
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment