diff --git a/CLIP/iCLIP_trim_and_dedup.sh b/CLIP/iCLIP_trim_and_dedup.sh index 35af080547673c1894034b36e29224c6f2100c81..dca315ad5b9d4c42749c89d6966eddf3ad9b89ce 100755 --- a/CLIP/iCLIP_trim_and_dedup.sh +++ b/CLIP/iCLIP_trim_and_dedup.sh @@ -67,11 +67,12 @@ count_fastq_reads() # 15-17: AT(or GC?)-rich (low diversity) # [fragment] # -4 -> -1: 3' UMI -#strip_low_qual_zones() -#{ -# bioawk -c fastx '{print $name"\t"substr($seq, 1, 5)""substr($seq, 12, 3)""substr($seq, 18)"\t"substr($qual, 1, 5)""substr($qual, 12, 3)""substr($qual, 18)}' | mawk '{print "@"$1"\n"$2"\n+\n"$3}' -#} strip_low_qual_zones() +{ + bioawk -c fastx '{print $name"\t"substr($seq, 1, 5)""substr($seq, 12, 3)""substr($seq, 18)"\t"substr($qual, 1, 5)""substr($qual, 12, 3)""substr($qual, 18)}' | mawk '{print "@"$1"\n"$2"\n+\n"$3}' +} +# Don't forget to remove ${total_fiveprime} and not ${fiveprime_random} when no stripping is done: +no_strip() { bioawk -c fastx '{print "@"$name"\n"$seq"\n+\n"$qual}' } @@ -161,7 +162,8 @@ dedup_trimmed() { # $1: file in which to write the number of fastq records after adapter trimming # $2: file in which to write the number of fastq records after deduplication - cmd="${trim_cmd} | tee >(process_without_deduplication ${trimmed_nodedup_out}) | strip_low_qual_zones | tee >(count_fastq_reads ${1}) | dedup | trim_random_nt ${fiveprime_random} ${threeprime_random} | tee >(count_fastq_reads ${2}) | gzip" + #cmd="${trim_cmd} | tee >(process_without_deduplication ${trimmed_nodedup_out}) | strip_low_qual_zones | tee >(count_fastq_reads ${1}) | dedup | trim_random_nt ${fiveprime_random} ${threeprime_random} | tee >(count_fastq_reads ${2}) | gzip" + cmd="${trim_cmd} | tee >(process_without_deduplication ${trimmed_nodedup_out}) | no_strip | tee >(count_fastq_reads ${1}) | dedup | trim_random_nt ${total_fiveprime} ${threeprime_random} | tee >(count_fastq_reads ${2}) | gzip" echo ${cmd} eval ${cmd} > ${trimmed_and_dedup_out} || error_exit "${cmd} failed" } diff --git a/small_RNA-seq/small_RNA-seq.snakefile b/small_RNA-seq/small_RNA-seq.snakefile index 768c48709e7bd25774993c347a7b4293716a005a..c45b95fa79d77a5899a460e7bec9518133f24d36 100644 --- a/small_RNA-seq/small_RNA-seq.snakefile +++ b/small_RNA-seq/small_RNA-seq.snakefile @@ -135,7 +135,7 @@ sns.set_context("talk") from libsmallrna import PI_MIN, PI_MAX, SI_MIN, SI_MAX # Do this outside the workflow #from libhts import gtf_2_genes_exon_lengths, repeat_bed_2_lengths -from libhts import do_deseq2, status_setter +from libhts import do_deseq2, status_setter, make_empty_bigwig from libhts import median_ratio_to_pseudo_ref_size_factors, size_factor_correlations from libhts import plot_paired_scatters, plot_norm_correlations, plot_counts_distribution, plot_boxplots, plot_histo from libworkflows import texscape, ensure_relative, cleanup_and_backup @@ -2132,7 +2132,10 @@ rule make_normalized_bigwig: #scale = 1 / pd.read_table(input.summary, index_col=0).loc[ # wildcards.norm_file].loc[f"{wildcards.lib}_{wildcards.rep}"] assert size > 0 - no_reads = """needLargeMem: trying to allocate 0 bytes (limit: 100000000000) + no_reads = """Error: The generated bedGraphFile was empty. Please adjust +your deepTools settings and check your input files. +""" + zero_bytes = """needLargeMem: trying to allocate 0 bytes (limit: 100000000000) bam2bigwig.sh: bedGraphToBigWig failed """ try: @@ -2144,43 +2147,15 @@ bam2bigwig.sh: bedGraphToBigWig failed || error_exit "bam2bigwig.sh failed" """ % size) except CalledProcessError as e: - if last_lines(log.err, 2) == no_reads: + if last_lines(log.err, 2) in {no_reads, zero_bytes}: warn(f"{output.bigwig_norm} will be empty.\n") #with open(output.bigwig_norm, "w") as bwfile: # bwfile.write("") with open(log.err, "a") as errfile: errfile.write("Generating zero-signal bigwig.") - bw_out = pyBigWig.open(output.bigwig_norm, "w") - bw_out.addHeader(list(chrom_sizes.items())) - for (chrom, chrom_len) in bw_out.chroms().items(): - bw_out.addEntries(chrom, 0, values=np.nan_to_num(np.zeros(chrom_len)[0::10]), span=10, step=10) - bw_out.close() + make_empty_bigwig(output.bigwig_norm, chrom_sizes) else: raise - #scale = 1 / size - #assert scale > 0 - ## TODO: make this a function of deeptools version -# no_reads = """Error: The generated bedGraphFile was empty. Please adjust -#your deepTools settings and check your input files. -#""" -# no_reads = """[bwClose] There was an error while finishing writing a bigWig file! The output is likely truncated. -#""" - #try: - # shell(""" - # cmd="bamCoverage -b {input.bam} {params.orient_filter} \\ - # -of=bigwig -bs 10 -p={threads} \\ - # --scaleFactor %f -o {output.bigwig_norm} \\ - # 1>> {log.log} 2>> {log.err}" - # > {log.err} - # echo ${{cmd}} > {log.log} - # eval ${{cmd}} || error_exit "bamCoverage failed" - # """ % scale) - #except CalledProcessError as e: - # if last_lines(log.err, 2) == no_reads: - # with open(output.bigwig_norm, "w") as bwfile: - # bwfile.write("") - # else: - # raise rule merge_bigwig_reps: