Skip to content
Snippets Groups Projects
Commit 5a2c310b authored by Blaise Li's avatar Blaise Li
Browse files

Error in trimming length.

When low quality zones are kept for deduplication, they later need to be
trimmed.
parent 4340f14c
Branches
No related tags found
No related merge requests found
...@@ -67,11 +67,12 @@ count_fastq_reads() ...@@ -67,11 +67,12 @@ count_fastq_reads()
# 15-17: AT(or GC?)-rich (low diversity) # 15-17: AT(or GC?)-rich (low diversity)
# [fragment] # [fragment]
# -4 -> -1: 3' UMI # -4 -> -1: 3' UMI
#strip_low_qual_zones()
#{
# bioawk -c fastx '{print $name"\t"substr($seq, 1, 5)""substr($seq, 12, 3)""substr($seq, 18)"\t"substr($qual, 1, 5)""substr($qual, 12, 3)""substr($qual, 18)}' | mawk '{print "@"$1"\n"$2"\n+\n"$3}'
#}
strip_low_qual_zones() strip_low_qual_zones()
{
bioawk -c fastx '{print $name"\t"substr($seq, 1, 5)""substr($seq, 12, 3)""substr($seq, 18)"\t"substr($qual, 1, 5)""substr($qual, 12, 3)""substr($qual, 18)}' | mawk '{print "@"$1"\n"$2"\n+\n"$3}'
}
# Don't forget to remove ${total_fiveprime} and not ${fiveprime_random} when no stripping is done:
no_strip()
{ {
bioawk -c fastx '{print "@"$name"\n"$seq"\n+\n"$qual}' bioawk -c fastx '{print "@"$name"\n"$seq"\n+\n"$qual}'
} }
...@@ -161,7 +162,8 @@ dedup_trimmed() ...@@ -161,7 +162,8 @@ dedup_trimmed()
{ {
# $1: file in which to write the number of fastq records after adapter trimming # $1: file in which to write the number of fastq records after adapter trimming
# $2: file in which to write the number of fastq records after deduplication # $2: file in which to write the number of fastq records after deduplication
cmd="${trim_cmd} | tee >(process_without_deduplication ${trimmed_nodedup_out}) | strip_low_qual_zones | tee >(count_fastq_reads ${1}) | dedup | trim_random_nt ${fiveprime_random} ${threeprime_random} | tee >(count_fastq_reads ${2}) | gzip" #cmd="${trim_cmd} | tee >(process_without_deduplication ${trimmed_nodedup_out}) | strip_low_qual_zones | tee >(count_fastq_reads ${1}) | dedup | trim_random_nt ${fiveprime_random} ${threeprime_random} | tee >(count_fastq_reads ${2}) | gzip"
cmd="${trim_cmd} | tee >(process_without_deduplication ${trimmed_nodedup_out}) | no_strip | tee >(count_fastq_reads ${1}) | dedup | trim_random_nt ${total_fiveprime} ${threeprime_random} | tee >(count_fastq_reads ${2}) | gzip"
echo ${cmd} echo ${cmd}
eval ${cmd} > ${trimmed_and_dedup_out} || error_exit "${cmd} failed" eval ${cmd} > ${trimmed_and_dedup_out} || error_exit "${cmd} failed"
} }
......
...@@ -135,7 +135,7 @@ sns.set_context("talk") ...@@ -135,7 +135,7 @@ sns.set_context("talk")
from libsmallrna import PI_MIN, PI_MAX, SI_MIN, SI_MAX from libsmallrna import PI_MIN, PI_MAX, SI_MIN, SI_MAX
# Do this outside the workflow # Do this outside the workflow
#from libhts import gtf_2_genes_exon_lengths, repeat_bed_2_lengths #from libhts import gtf_2_genes_exon_lengths, repeat_bed_2_lengths
from libhts import do_deseq2, status_setter from libhts import do_deseq2, status_setter, make_empty_bigwig
from libhts import median_ratio_to_pseudo_ref_size_factors, size_factor_correlations from libhts import median_ratio_to_pseudo_ref_size_factors, size_factor_correlations
from libhts import plot_paired_scatters, plot_norm_correlations, plot_counts_distribution, plot_boxplots, plot_histo from libhts import plot_paired_scatters, plot_norm_correlations, plot_counts_distribution, plot_boxplots, plot_histo
from libworkflows import texscape, ensure_relative, cleanup_and_backup from libworkflows import texscape, ensure_relative, cleanup_and_backup
...@@ -2132,7 +2132,10 @@ rule make_normalized_bigwig: ...@@ -2132,7 +2132,10 @@ rule make_normalized_bigwig:
#scale = 1 / pd.read_table(input.summary, index_col=0).loc[ #scale = 1 / pd.read_table(input.summary, index_col=0).loc[
# wildcards.norm_file].loc[f"{wildcards.lib}_{wildcards.rep}"] # wildcards.norm_file].loc[f"{wildcards.lib}_{wildcards.rep}"]
assert size > 0 assert size > 0
no_reads = """needLargeMem: trying to allocate 0 bytes (limit: 100000000000) no_reads = """Error: The generated bedGraphFile was empty. Please adjust
your deepTools settings and check your input files.
"""
zero_bytes = """needLargeMem: trying to allocate 0 bytes (limit: 100000000000)
bam2bigwig.sh: bedGraphToBigWig failed bam2bigwig.sh: bedGraphToBigWig failed
""" """
try: try:
...@@ -2144,43 +2147,15 @@ bam2bigwig.sh: bedGraphToBigWig failed ...@@ -2144,43 +2147,15 @@ bam2bigwig.sh: bedGraphToBigWig failed
|| error_exit "bam2bigwig.sh failed" || error_exit "bam2bigwig.sh failed"
""" % size) """ % size)
except CalledProcessError as e: except CalledProcessError as e:
if last_lines(log.err, 2) == no_reads: if last_lines(log.err, 2) in {no_reads, zero_bytes}:
warn(f"{output.bigwig_norm} will be empty.\n") warn(f"{output.bigwig_norm} will be empty.\n")
#with open(output.bigwig_norm, "w") as bwfile: #with open(output.bigwig_norm, "w") as bwfile:
# bwfile.write("") # bwfile.write("")
with open(log.err, "a") as errfile: with open(log.err, "a") as errfile:
errfile.write("Generating zero-signal bigwig.") errfile.write("Generating zero-signal bigwig.")
bw_out = pyBigWig.open(output.bigwig_norm, "w") make_empty_bigwig(output.bigwig_norm, chrom_sizes)
bw_out.addHeader(list(chrom_sizes.items()))
for (chrom, chrom_len) in bw_out.chroms().items():
bw_out.addEntries(chrom, 0, values=np.nan_to_num(np.zeros(chrom_len)[0::10]), span=10, step=10)
bw_out.close()
else: else:
raise raise
#scale = 1 / size
#assert scale > 0
## TODO: make this a function of deeptools version
# no_reads = """Error: The generated bedGraphFile was empty. Please adjust
#your deepTools settings and check your input files.
#"""
# no_reads = """[bwClose] There was an error while finishing writing a bigWig file! The output is likely truncated.
#"""
#try:
# shell("""
# cmd="bamCoverage -b {input.bam} {params.orient_filter} \\
# -of=bigwig -bs 10 -p={threads} \\
# --scaleFactor %f -o {output.bigwig_norm} \\
# 1>> {log.log} 2>> {log.err}"
# > {log.err}
# echo ${{cmd}} > {log.log}
# eval ${{cmd}} || error_exit "bamCoverage failed"
# """ % scale)
#except CalledProcessError as e:
# if last_lines(log.err, 2) == no_reads:
# with open(output.bigwig_norm, "w") as bwfile:
# bwfile.write("")
# else:
# raise
rule merge_bigwig_reps: rule merge_bigwig_reps:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment