Skip to content
Snippets Groups Projects
Commit 480bbaaa authored by Blaise Li's avatar Blaise Li
Browse files

Can use cutadapt or fastx_clipper to trim adapter.

parent 5d7a7064
Branches
No related tags found
No related merge requests found
#!/usr/bin/env bash
# Usage: PRO-seq_trim_and_dedup.sh <raw fastq> <ADAPTER> <nb 5'> <nb 3'> <trimmed fastq> <untrimmed fastq> <cutadapt log> <nb_raw> <nb_adapt> <nb_adapt_deduped> <nb_noadapt> <nb_noadapt_deduped>
# Usage: PRO-seq_trim_and_dedup.sh <trimmer> <raw fastq> <ADAPTER> <nb 5'> <nb 3'> <trimmed fastq> <untrimmed fastq> <trimmer log> <nb_raw> <nb_adapt> <nb_adapt_deduped> <nb_noadapt> <nb_noadapt_deduped>
# http://linuxcommand.org/wss0150.php
PROGNAME=$(basename $0)
......@@ -18,27 +18,26 @@ function error_exit
# http://stackoverflow.com/questions/59895/can-a-bash-script-tell-what-directory-its-stored-in
#PACKAGEDIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
raw_in=${1}
trimmer=${1}
adapt=${2}
raw_in=${2}
fiveprime_random=${3}
threeprime_random=${4}
adapt=${3}
trimmed_and_dedup_out=${5}
fiveprime_random=${4}
threeprime_random=${5}
untrimmed_out=${6}
trimmed_and_dedup_out=${6}
log=${7}
untrimmed_out=${7}
nb_raw=${8}
nb_adapt=${9}
nb_adapt_deduped=${10}
nb_noadapt=${11}
nb_noadapt_deduped=${12}
log=${8}
# This script performs 2 sorting and deduplicating operations, depending on the
# presence or absence of the adapter in the read.
nb_raw=${9}
nb_adapt=${10}
nb_adapt_deduped=${11}
nb_noadapt=${12}
nb_noadapt_deduped=${13}
count_fastq_reads()
{
......@@ -46,6 +45,9 @@ count_fastq_reads()
wc -l | { read nblines; echo ${nblines} / 4 | bc > ${1}; }
}
# This script performs 2 sorting and deduplicating operations, depending on the
# presence or absence of the adapter in the read.
# The -s option of fastq-sort sorts the reads by their sequence.
sort_by_seq()
......@@ -67,7 +69,7 @@ trim_random_nt()
{
# $1: nb of bases to trim at 5' end
# $2: nb of bases to trim at 3' end
cutadapt -u ${1} -u -${2} - || error_exit "trim_random_nt failed"
cutadapt -u ${1} -u -${2} - 2> /dev/null || error_exit "trim_random_nt failed"
}
......@@ -76,14 +78,31 @@ trim_random_nt()
# second sorting and deduplicating.
mkfifo ${untrimmed_out}.fifo
minsize_trimmed=$(echo "${fiveprime_random} + 20 + ${threeprime_random}" | bc)
minsize_trimmed=$(echo "${fiveprime_random} + 16 + ${threeprime_random}" | bc)
case "${trimmer}" in
"cutadapt")
# -m ${minsize_random} is to discard reads that are shorter than this after trimming
trim_cmd="${trimmer} -a ${adapt} -m ${minsize_trimmed} --untrimmed-output=${untrimmed_out}.fifo - 2> ${log}"
;;
"fastx_clipper")
# -l is equivalent to -m in cutadapt (not sure it has effect with -C)
# -c is to keep only the trimmed reads
# -C is to keep only the non-trimmed reads
# -v is to have verbose things to put in the log
trim_cmd="tee >(${trimmer} -a ${adapt} -l ${minsize_trimmed} -C -M 3 > ${untrimmed_out}.fifo) | ${trimmer} -a ${adapt} -l ${minsize_trimmed} -v -c -M 3 2>> ${log}"
;;
*)
error_exit "Trimming adapter with ${trimmer} not implemented."
;;
esac
# a second cutadapt step removes the random nucleotides that helped identify PCR duplicates.
dedup_trimmed()
{
# $1: file in which to write the number of fastq records after adapter trimming
# $2: file in which to write the number of fastq records after deduplication
cmd="cutadapt -a ${adapt} -m ${minsize_trimmed} --untrimmed-output=${untrimmed_out}.fifo - 2> ${log} | tee >(count_fastq_reads ${1}) | dedup | trim_random_nt ${fiveprime_random} ${threeprime_random} | tee >(count_fastq_reads ${2}) | gzip"
cmd="${trim_cmd} | tee >(count_fastq_reads ${1}) | dedup | trim_random_nt ${fiveprime_random} ${threeprime_random} | tee >(count_fastq_reads ${2}) | gzip"
echo ${cmd}
eval ${cmd} > ${trimmed_and_dedup_out} || error_exit "${cmd} failed"
}
......@@ -99,9 +118,11 @@ dedup_untrimmed()
zcat ${raw_in} \
| tee >(count_fastq_reads ${nb_raw}) \
| dedup_trimmed ${nb_adapt} ${nb_adapt_deduped} &
pid_to_wait=$!
cat ${untrimmed_out}.fifo \
| tee >(count_fastq_reads ${nb_noadapt}) \
| dedup_untrimmed ${nb_noadapt_deduped} || rm -f ${untrimmed_out}.fifo
wait ${pid_to_wait}
rm -f ${untrimmed_out}.fifo
exit 0
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment