Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
B
bioinfo_utils
Manage
Activity
Members
Labels
Plan
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Blaise LI
bioinfo_utils
Commits
5a2c310b
Commit
5a2c310b
authored
7 years ago
by
Blaise Li
Browse files
Options
Downloads
Patches
Plain Diff
Error in trimming length.
When low quality zones are kept for deduplication, they later need to be trimmed.
parent
4340f14c
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
CLIP/iCLIP_trim_and_dedup.sh
+7
-5
7 additions, 5 deletions
CLIP/iCLIP_trim_and_dedup.sh
small_RNA-seq/small_RNA-seq.snakefile
+7
-32
7 additions, 32 deletions
small_RNA-seq/small_RNA-seq.snakefile
with
14 additions
and
37 deletions
CLIP/iCLIP_trim_and_dedup.sh
+
7
−
5
View file @
5a2c310b
...
@@ -67,11 +67,12 @@ count_fastq_reads()
...
@@ -67,11 +67,12 @@ count_fastq_reads()
# 15-17: AT(or GC?)-rich (low diversity)
# 15-17: AT(or GC?)-rich (low diversity)
# [fragment]
# [fragment]
# -4 -> -1: 3' UMI
# -4 -> -1: 3' UMI
#strip_low_qual_zones()
#{
# bioawk -c fastx '{print $name"\t"substr($seq, 1, 5)""substr($seq, 12, 3)""substr($seq, 18)"\t"substr($qual, 1, 5)""substr($qual, 12, 3)""substr($qual, 18)}' | mawk '{print "@"$1"\n"$2"\n+\n"$3}'
#}
strip_low_qual_zones
()
strip_low_qual_zones
()
{
bioawk
-c
fastx
'{print $name"\t"substr($seq, 1, 5)""substr($seq, 12, 3)""substr($seq, 18)"\t"substr($qual, 1, 5)""substr($qual, 12, 3)""substr($qual, 18)}'
| mawk
'{print "@"$1"\n"$2"\n+\n"$3}'
}
# Don't forget to remove ${total_fiveprime} and not ${fiveprime_random} when no stripping is done:
no_strip
()
{
{
bioawk
-c
fastx
'{print "@"$name"\n"$seq"\n+\n"$qual}'
bioawk
-c
fastx
'{print "@"$name"\n"$seq"\n+\n"$qual}'
}
}
...
@@ -161,7 +162,8 @@ dedup_trimmed()
...
@@ -161,7 +162,8 @@ dedup_trimmed()
{
{
# $1: file in which to write the number of fastq records after adapter trimming
# $1: file in which to write the number of fastq records after adapter trimming
# $2: file in which to write the number of fastq records after deduplication
# $2: file in which to write the number of fastq records after deduplication
cmd
=
"
${
trim_cmd
}
| tee >(process_without_deduplication
${
trimmed_nodedup_out
}
) | strip_low_qual_zones | tee >(count_fastq_reads
${
1
}
) | dedup | trim_random_nt
${
fiveprime_random
}
${
threeprime_random
}
| tee >(count_fastq_reads
${
2
}
) | gzip"
#cmd="${trim_cmd} | tee >(process_without_deduplication ${trimmed_nodedup_out}) | strip_low_qual_zones | tee >(count_fastq_reads ${1}) | dedup | trim_random_nt ${fiveprime_random} ${threeprime_random} | tee >(count_fastq_reads ${2}) | gzip"
cmd
=
"
${
trim_cmd
}
| tee >(process_without_deduplication
${
trimmed_nodedup_out
}
) | no_strip | tee >(count_fastq_reads
${
1
}
) | dedup | trim_random_nt
${
total_fiveprime
}
${
threeprime_random
}
| tee >(count_fastq_reads
${
2
}
) | gzip"
echo
${
cmd
}
echo
${
cmd
}
eval
${
cmd
}
>
${
trimmed_and_dedup_out
}
||
error_exit
"
${
cmd
}
failed"
eval
${
cmd
}
>
${
trimmed_and_dedup_out
}
||
error_exit
"
${
cmd
}
failed"
}
}
...
...
This diff is collapsed.
Click to expand it.
small_RNA-seq/small_RNA-seq.snakefile
+
7
−
32
View file @
5a2c310b
...
@@ -135,7 +135,7 @@ sns.set_context("talk")
...
@@ -135,7 +135,7 @@ sns.set_context("talk")
from libsmallrna import PI_MIN, PI_MAX, SI_MIN, SI_MAX
from libsmallrna import PI_MIN, PI_MAX, SI_MIN, SI_MAX
# Do this outside the workflow
# Do this outside the workflow
#from libhts import gtf_2_genes_exon_lengths, repeat_bed_2_lengths
#from libhts import gtf_2_genes_exon_lengths, repeat_bed_2_lengths
from libhts import do_deseq2, status_setter
from libhts import do_deseq2, status_setter
, make_empty_bigwig
from libhts import median_ratio_to_pseudo_ref_size_factors, size_factor_correlations
from libhts import median_ratio_to_pseudo_ref_size_factors, size_factor_correlations
from libhts import plot_paired_scatters, plot_norm_correlations, plot_counts_distribution, plot_boxplots, plot_histo
from libhts import plot_paired_scatters, plot_norm_correlations, plot_counts_distribution, plot_boxplots, plot_histo
from libworkflows import texscape, ensure_relative, cleanup_and_backup
from libworkflows import texscape, ensure_relative, cleanup_and_backup
...
@@ -2132,7 +2132,10 @@ rule make_normalized_bigwig:
...
@@ -2132,7 +2132,10 @@ rule make_normalized_bigwig:
#scale = 1 / pd.read_table(input.summary, index_col=0).loc[
#scale = 1 / pd.read_table(input.summary, index_col=0).loc[
# wildcards.norm_file].loc[f"{wildcards.lib}_{wildcards.rep}"]
# wildcards.norm_file].loc[f"{wildcards.lib}_{wildcards.rep}"]
assert size > 0
assert size > 0
no_reads = """needLargeMem: trying to allocate 0 bytes (limit: 100000000000)
no_reads = """Error: The generated bedGraphFile was empty. Please adjust
your deepTools settings and check your input files.
"""
zero_bytes = """needLargeMem: trying to allocate 0 bytes (limit: 100000000000)
bam2bigwig.sh: bedGraphToBigWig failed
bam2bigwig.sh: bedGraphToBigWig failed
"""
"""
try:
try:
...
@@ -2144,43 +2147,15 @@ bam2bigwig.sh: bedGraphToBigWig failed
...
@@ -2144,43 +2147,15 @@ bam2bigwig.sh: bedGraphToBigWig failed
|| error_exit "bam2bigwig.sh failed"
|| error_exit "bam2bigwig.sh failed"
""" % size)
""" % size)
except CalledProcessError as e:
except CalledProcessError as e:
if last_lines(log.err, 2)
==
no_reads:
if last_lines(log.err, 2)
in {
no_reads
, zero_bytes}
:
warn(f"{output.bigwig_norm} will be empty.\n")
warn(f"{output.bigwig_norm} will be empty.\n")
#with open(output.bigwig_norm, "w") as bwfile:
#with open(output.bigwig_norm, "w") as bwfile:
# bwfile.write("")
# bwfile.write("")
with open(log.err, "a") as errfile:
with open(log.err, "a") as errfile:
errfile.write("Generating zero-signal bigwig.")
errfile.write("Generating zero-signal bigwig.")
bw_out = pyBigWig.open(output.bigwig_norm, "w")
make_empty_bigwig(output.bigwig_norm, chrom_sizes)
bw_out.addHeader(list(chrom_sizes.items()))
for (chrom, chrom_len) in bw_out.chroms().items():
bw_out.addEntries(chrom, 0, values=np.nan_to_num(np.zeros(chrom_len)[0::10]), span=10, step=10)
bw_out.close()
else:
else:
raise
raise
#scale = 1 / size
#assert scale > 0
## TODO: make this a function of deeptools version
# no_reads = """Error: The generated bedGraphFile was empty. Please adjust
#your deepTools settings and check your input files.
#"""
# no_reads = """[bwClose] There was an error while finishing writing a bigWig file! The output is likely truncated.
#"""
#try:
# shell("""
# cmd="bamCoverage -b {input.bam} {params.orient_filter} \\
# -of=bigwig -bs 10 -p={threads} \\
# --scaleFactor %f -o {output.bigwig_norm} \\
# 1>> {log.log} 2>> {log.err}"
# > {log.err}
# echo ${{cmd}} > {log.log}
# eval ${{cmd}} || error_exit "bamCoverage failed"
# """ % scale)
#except CalledProcessError as e:
# if last_lines(log.err, 2) == no_reads:
# with open(output.bigwig_norm, "w") as bwfile:
# bwfile.write("")
# else:
# raise
rule merge_bigwig_reps:
rule merge_bigwig_reps:
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment