diff --git a/CLIP/iCLIP_trim_and_dedup.sh b/CLIP/iCLIP_trim_and_dedup.sh
index 35af080547673c1894034b36e29224c6f2100c81..dca315ad5b9d4c42749c89d6966eddf3ad9b89ce 100755
--- a/CLIP/iCLIP_trim_and_dedup.sh
+++ b/CLIP/iCLIP_trim_and_dedup.sh
@@ -67,11 +67,12 @@ count_fastq_reads()
# 15-17: AT(or GC?)-rich (low diversity)
# [fragment]
# -4 -> -1: 3' UMI
-#strip_low_qual_zones()
-#{
-# bioawk -c fastx '{print $name"\t"substr($seq, 1, 5)""substr($seq, 12, 3)""substr($seq, 18)"\t"substr($qual, 1, 5)""substr($qual, 12, 3)""substr($qual, 18)}' | mawk '{print "@"$1"\n"$2"\n+\n"$3}'
-#}
strip_low_qual_zones()
+{
+ bioawk -c fastx '{print $name"\t"substr($seq, 1, 5)""substr($seq, 12, 3)""substr($seq, 18)"\t"substr($qual, 1, 5)""substr($qual, 12, 3)""substr($qual, 18)}' | mawk '{print "@"$1"\n"$2"\n+\n"$3}'
+}
+# Don't forget to remove ${total_fiveprime} and not ${fiveprime_random} when no stripping is done:
+no_strip()
{
bioawk -c fastx '{print "@"$name"\n"$seq"\n+\n"$qual}'
}
@@ -161,7 +162,8 @@ dedup_trimmed()
{
# $1: file in which to write the number of fastq records after adapter trimming
# $2: file in which to write the number of fastq records after deduplication
- cmd="${trim_cmd} | tee >(process_without_deduplication ${trimmed_nodedup_out}) | strip_low_qual_zones | tee >(count_fastq_reads ${1}) | dedup | trim_random_nt ${fiveprime_random} ${threeprime_random} | tee >(count_fastq_reads ${2}) | gzip"
+ #cmd="${trim_cmd} | tee >(process_without_deduplication ${trimmed_nodedup_out}) | strip_low_qual_zones | tee >(count_fastq_reads ${1}) | dedup | trim_random_nt ${fiveprime_random} ${threeprime_random} | tee >(count_fastq_reads ${2}) | gzip"
+ cmd="${trim_cmd} | tee >(process_without_deduplication ${trimmed_nodedup_out}) | no_strip | tee >(count_fastq_reads ${1}) | dedup | trim_random_nt ${total_fiveprime} ${threeprime_random} | tee >(count_fastq_reads ${2}) | gzip"
echo ${cmd}
eval ${cmd} > ${trimmed_and_dedup_out} || error_exit "${cmd} failed"
}
diff --git a/small_RNA-seq/small_RNA-seq.snakefile b/small_RNA-seq/small_RNA-seq.snakefile
index 768c48709e7bd25774993c347a7b4293716a005a..c45b95fa79d77a5899a460e7bec9518133f24d36 100644
--- a/small_RNA-seq/small_RNA-seq.snakefile
+++ b/small_RNA-seq/small_RNA-seq.snakefile
@@ -135,7 +135,7 @@ sns.set_context("talk")
from libsmallrna import PI_MIN, PI_MAX, SI_MIN, SI_MAX
# Do this outside the workflow
#from libhts import gtf_2_genes_exon_lengths, repeat_bed_2_lengths
-from libhts import do_deseq2, status_setter
+from libhts import do_deseq2, status_setter, make_empty_bigwig
from libhts import median_ratio_to_pseudo_ref_size_factors, size_factor_correlations
from libhts import plot_paired_scatters, plot_norm_correlations, plot_counts_distribution, plot_boxplots, plot_histo
from libworkflows import texscape, ensure_relative, cleanup_and_backup
@@ -2132,7 +2132,10 @@ rule make_normalized_bigwig:
#scale = 1 / pd.read_table(input.summary, index_col=0).loc[
# wildcards.norm_file].loc[f"{wildcards.lib}_{wildcards.rep}"]
assert size > 0
- no_reads = """needLargeMem: trying to allocate 0 bytes (limit: 100000000000)
+ no_reads = """Error: The generated bedGraphFile was empty. Please adjust
+your deepTools settings and check your input files.
+"""
+ zero_bytes = """needLargeMem: trying to allocate 0 bytes (limit: 100000000000)
bam2bigwig.sh: bedGraphToBigWig failed
"""
try:
@@ -2144,43 +2147,15 @@ bam2bigwig.sh: bedGraphToBigWig failed
|| error_exit "bam2bigwig.sh failed"
""" % size)
except CalledProcessError as e:
- if last_lines(log.err, 2) == no_reads:
+ if last_lines(log.err, 2) in {no_reads, zero_bytes}:
warn(f"{output.bigwig_norm} will be empty.\n")
#with open(output.bigwig_norm, "w") as bwfile:
# bwfile.write("")
with open(log.err, "a") as errfile:
errfile.write("Generating zero-signal bigwig.")
- bw_out = pyBigWig.open(output.bigwig_norm, "w")
- bw_out.addHeader(list(chrom_sizes.items()))
- for (chrom, chrom_len) in bw_out.chroms().items():
- bw_out.addEntries(chrom, 0, values=np.nan_to_num(np.zeros(chrom_len)[0::10]), span=10, step=10)
- bw_out.close()
+ make_empty_bigwig(output.bigwig_norm, chrom_sizes)
else:
raise
- #scale = 1 / size
- #assert scale > 0
- ## TODO: make this a function of deeptools version
-# no_reads = """Error: The generated bedGraphFile was empty. Please adjust
-#your deepTools settings and check your input files.
-#"""
-# no_reads = """[bwClose] There was an error while finishing writing a bigWig file! The output is likely truncated.
-#"""
- #try:
- # shell("""
- # cmd="bamCoverage -b {input.bam} {params.orient_filter} \\
- # -of=bigwig -bs 10 -p={threads} \\
- # --scaleFactor %f -o {output.bigwig_norm} \\
- # 1>> {log.log} 2>> {log.err}"
- # > {log.err}
- # echo ${{cmd}} > {log.log}
- # eval ${{cmd}} || error_exit "bamCoverage failed"
- # """ % scale)
- #except CalledProcessError as e:
- # if last_lines(log.err, 2) == no_reads:
- # with open(output.bigwig_norm, "w") as bwfile:
- # bwfile.write("")
- # else:
- # raise
rule merge_bigwig_reps: