Error in trimming length.

When low quality zones are kept for deduplication, they later need to be trimmed.

Error in trimming length.
5a2c310b · Blaise Li · 4340f14c · 5a2c310b · 5a2c310b
Commit 5a2c310b authored 7 years ago by Blaise Li
--- a/CLIP/iCLIP_trim_and_dedup.sh
+++ b/CLIP/iCLIP_trim_and_dedup.sh
@@ -67,11 +67,12 @@ count_fastq_reads()
 #             15-17: AT(or GC?)-rich (low diversity)
 #                 [fragment]
 #                        -4 -> -1: 3' UMI
-#strip_low_qual_zones()
-#{
-#    bioawk -c fastx '{print $name"\t"substr($seq, 1, 5)""substr($seq, 12, 3)""substr($seq, 18)"\t"substr($qual, 1, 5)""substr($qual, 12, 3)""substr($qual, 18)}' | mawk '{print "@"$1"\n"$2"\n+\n"$3}'
-#}
 strip_low_qual_zones()
+{
+    bioawk -c fastx '{print $name"\t"substr($seq, 1, 5)""substr($seq, 12, 3)""substr($seq, 18)"\t"substr($qual, 1, 5)""substr($qual, 12, 3)""substr($qual, 18)}' | mawk '{print "@"$1"\n"$2"\n+\n"$3}'
+}
+# Don't forget to remove ${total_fiveprime} and not ${fiveprime_random} when no stripping is done:
+no_strip()
 {
    bioawk -c fastx '{print "@"$name"\n"$seq"\n+\n"$qual}'
 }
@@ -161,7 +162,8 @@ dedup_trimmed()
 {
    # $1: file in which to write the number of fastq records after adapter trimming
    # $2: file in which to write the number of fastq records after deduplication
-    cmd="${trim_cmd} | tee >(process_without_deduplication  ${trimmed_nodedup_out}) | strip_low_qual_zones | tee >(count_fastq_reads ${1}) | dedup | trim_random_nt ${fiveprime_random} ${threeprime_random} | tee >(count_fastq_reads ${2}) | gzip"
+    #cmd="${trim_cmd} | tee >(process_without_deduplication  ${trimmed_nodedup_out}) | strip_low_qual_zones | tee >(count_fastq_reads ${1}) | dedup | trim_random_nt ${fiveprime_random} ${threeprime_random} | tee >(count_fastq_reads ${2}) | gzip"
+    cmd="${trim_cmd} | tee >(process_without_deduplication  ${trimmed_nodedup_out}) | no_strip | tee >(count_fastq_reads ${1}) | dedup | trim_random_nt ${total_fiveprime} ${threeprime_random} | tee >(count_fastq_reads ${2}) | gzip"
    echo ${cmd}
    eval ${cmd} > ${trimmed_and_dedup_out} || error_exit "${cmd} failed"
 }

--- a/small_RNA-seq/small_RNA-seq.snakefile
+++ b/small_RNA-seq/small_RNA-seq.snakefile
@@ -135,7 +135,7 @@ sns.set_context("talk")
 from libsmallrna import PI_MIN, PI_MAX, SI_MIN, SI_MAX
 # Do this outside the workflow
 #from libhts import gtf_2_genes_exon_lengths, repeat_bed_2_lengths
-from libhts import do_deseq2, status_setter
+from libhts import do_deseq2, status_setter, make_empty_bigwig
 from libhts import median_ratio_to_pseudo_ref_size_factors, size_factor_correlations
 from libhts import plot_paired_scatters, plot_norm_correlations, plot_counts_distribution, plot_boxplots, plot_histo
 from libworkflows import texscape, ensure_relative, cleanup_and_backup
@@ -2132,7 +2132,10 @@ rule make_normalized_bigwig:
                #scale = 1 / pd.read_table(input.summary, index_col=0).loc[
                #    wildcards.norm_file].loc[f"{wildcards.lib}_{wildcards.rep}"]
            assert size > 0
-            no_reads = """needLargeMem: trying to allocate 0 bytes (limit: 100000000000)
+            no_reads = """Error: The generated bedGraphFile was empty. Please adjust
+your deepTools settings and check your input files.
+"""
+            zero_bytes = """needLargeMem: trying to allocate 0 bytes (limit: 100000000000)
 bam2bigwig.sh: bedGraphToBigWig failed
 """
            try:
@@ -2144,43 +2147,15 @@ bam2bigwig.sh: bedGraphToBigWig failed
                        || error_exit "bam2bigwig.sh failed"
                """ % size)
            except CalledProcessError as e:
-                if last_lines(log.err, 2) == no_reads:
+                if last_lines(log.err, 2) in {no_reads, zero_bytes}:
                    warn(f"{output.bigwig_norm} will be empty.\n")
                    #with open(output.bigwig_norm, "w") as bwfile:
                    #    bwfile.write("")
                    with open(log.err, "a") as errfile:
                        errfile.write("Generating zero-signal bigwig.")
-                    bw_out = pyBigWig.open(output.bigwig_norm, "w")
+                    make_empty_bigwig(output.bigwig_norm, chrom_sizes)
-                    bw_out.addHeader(list(chrom_sizes.items()))
-                    for (chrom, chrom_len) in bw_out.chroms().items():
-                        bw_out.addEntries(chrom, 0, values=np.nan_to_num(np.zeros(chrom_len)[0::10]), span=10, step=10)
-                    bw_out.close()
                else:
                    raise
-        #scale = 1 / size
-        #assert scale > 0
-        ## TODO: make this a function of deeptools version
-#        no_reads = """Error: The generated bedGraphFile was empty. Please adjust
-#your deepTools settings and check your input files.
-#"""
-#        no_reads = """[bwClose] There was an error while finishing writing a bigWig file! The output is likely truncated.
-#"""
-        #try:
-        #    shell("""
-        #        cmd="bamCoverage -b {input.bam} {params.orient_filter} \\
-        #            -of=bigwig -bs 10 -p={threads} \\
-        #            --scaleFactor %f -o {output.bigwig_norm} \\
-        #            1>> {log.log} 2>> {log.err}"
-        #        > {log.err}
-        #        echo ${{cmd}} > {log.log}
-        #        eval ${{cmd}} || error_exit "bamCoverage failed"
-        #    """ % scale)
-        #except CalledProcessError as e:
-        #    if last_lines(log.err, 2) == no_reads:
-        #        with open(output.bigwig_norm, "w") as bwfile:
-        #            bwfile.write("")
-        #    else:
-        #        raise
 rule merge_bigwig_reps: