Skip to content
Snippets Groups Projects
Commit 7a2e276e authored by Blaise Li's avatar Blaise Li
Browse files

Experimenting with alignment settings.

From the results, it seems that these settings are OK for the smallest
size range, but not for longer reads.
parent a101b0ce
No related branches found
No related tags found
No related merge requests found
......@@ -32,18 +32,23 @@ from libworkflows import last_lines, ensure_relative, SHELL_FUNCTIONS, warn_cont
from libworkflows import feature_orientation2stranded, read_feature_counts, sum_feature_counts
from smincludes import rules as irules
# TODO: Have different settings for different size ranges
# recommended k-mer length for D. melanogaster is 20
# However, reads shorter thant the k-mer length will be ignored.
# http://crac.gforge.inria.fr/documentation/crac/#sec-2
alignment_settings = {
#"bowtie2": "-L 6 -i S,1,0.8 -N 0",
"bowtie2": "--local -L 6 -i S,1,0.8 -N 0",
"bowtie2": "-L 6 -i S,1,0.8 -N 1",
# Small RNA-seq parameters may not be compatible with --local
#"bowtie2": "--local -L 6 -i S,1,0.8 -N 0",
"crac": "-k 20 --stranded --use-x-in-cigar"}
# Lower stringency settings, to remap the unmapped
realignment_settings = {
# Try with almost-default settings
"bowtie2": "-N 1",
# Allow more mismatches in the seed
# Reduce minimal mismatch and gap open penalties
"bowtie2": "--local -L 6 -i S,1,0.8 -N 1 --mp 6,1 --rdg 4,3",
#"bowtie2": "--local -L 6 -i S,1,0.8 -N 1 --mp 6,1 --rdg 4,3",
# TODO: Find how to be less stringent with crac
"crac": "-k 20 --stranded --use-x-in-cigar"}
......@@ -372,11 +377,14 @@ rule plot_size_distribution:
title = f"read size distribution for {wildcards.lib}_{wildcards.rep}_{wildcards.read_type}"
plot_histo(output[0], data, title)
def set_alignment_settings(wildcards):
if wildcards.read_type.endswith("unmapped"):
return realignment_settings[aligner]
else:
return alignment_settings[aligner]
return alignment_settings[aligner]
def set_realignment_settings(wildcards):
return realignment_settings[aligner]
###########
# Mapping #
......@@ -394,8 +402,8 @@ rule map_on_genome:
params:
aligner = aligner,
index = genome_db,
settings = alignment_settings[aligner],
# settings = set_alignment_settings,
#settings = alignment_settings[aligner],
settings = set_alignment_settings,
message:
"Mapping {wildcards.lib}_{wildcards.rep}_{wildcards.read_type} on C. elegans genome."
log:
......@@ -422,8 +430,8 @@ rule remap_on_genome:
params:
aligner = aligner,
index = genome_db,
settings = realignment_settings[aligner],
# settings = set_alignment_settings,
#settings = realignment_settings[aligner],
settings = set_realignment_settings,
message:
"Re-mapping unmapped {wildcards.lib}_{wildcards.rep}_{wildcards.read_type} on C. elegans genome."
log:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment