Commit 36c48731 authored by Rachel  LEGENDRE's avatar Rachel LEGENDRE
Browse files

add setting pipefail - add RAM to PPR - removing old files previous samtools

parent abd1b85a
......@@ -23,7 +23,7 @@
},
"preIDR_PPR" :
{
"ram" : "40G"
"ram" : "50G"
},
"preIDR_SPR" :
{
......
......@@ -33,7 +33,7 @@ rule preIDR_PPR:
"chipflow.img"
shell:
"""
set +o pipefail
set -euxo pipefail
tmp="{output}"
outfiles=($tmp)
tmp="{input}"
......@@ -42,28 +42,34 @@ rule preIDR_PPR:
FILE1=${{outfiles[0]}}
FILE2=${{outfiles[1]}}
FILE_Pool=${{outfiles[2]}}
echo "Merging BAM files for pseudoreplicates..." > {log.out}
echo "Delete old files in case of rerun..." > {log.out}
rm ${{FILE_Pool%%.*}}_header.sam
rm ${{FILE_Pool%%.*}}00
rm ${{FILE_Pool%%.*}}01
echo "Merging BAM files for pseudoreplicates..." >> {log.out}
samtools merge $FILE_Pool ${{infiles[0]}} ${{infiles[1]}}
echo "Spliting bam file $FILE_Pool ..." > {log.out}
samtools view -H $FILE_Pool > ${{FILE_Pool%%.*}}_header.sam
nlinesPPR=$(samtools view $FILE_Pool | wc -l ) # Number of reads in the BAM file
echo "Spliting bam file $FILE_Pool ..." >> {log.out}
samtools view -H $FILE_Pool >> ${{FILE_Pool%%.*}}_header.sam
nlinesPPR=$(samtools view ${{FILE_Pool}} | wc -l ) # Number of reads in the BAM file
nlinesPPR=$(( ($nlinesPPR+1)/2 )) # half that number
# This will shuffle the lines in the file and split it into two SAM files
samtools view $FILE_Pool | shuf | split -d -l ${{nlinesPPR}} - ${{FILE_Pool%%.*}} 2> {log.out}
samtools view ${{FILE_Pool}} | shuf | split -d -l ${{nlinesPPR}} - ${{FILE_Pool%%.*}} 2>> {log.out}
cat ${{FILE_Pool%%.*}}_header.sam ${{FILE_Pool%%.*}}00 | samtools view -bS | samtools sort -o ${{FILE1}}
cat ${{FILE_Pool%%.*}}_header.sam ${{FILE_Pool%%.*}}01 | samtools view -bS | samtools sort -o ${{FILE2}}
echo "Indexing self pseudos-replicates Bamfiles ${{FILE1}} ${{FILE2}}" > {log.out}
samtools index ${{FILE1}} 2> {log.out}
samtools index ${{FILE2}} 2> {log.out}
echo "Indexing self pseudos-replicates Bamfiles ${{FILE1}} ${{FILE2}}" >> {log.out}
samtools index ${{FILE1}} 2>> {log.out}
samtools index ${{FILE2}} 2>> {log.out}
# Cleaning of useless files
rm ${{FILE_Pool%%.*}}_header.sam
rm ${{FILE_Pool%%.*}}00
rm ${{FILE_Pool%%.*}}01
rm ${{FILE_Pool%%.*}}_header.sam
rm ${{FILE_Pool%%.*}}00
rm ${{FILE_Pool%%.*}}01
if [[ ${{#infiles[@]}} > 3 ]] ; then
......@@ -71,22 +77,22 @@ rule preIDR_PPR:
n=1
for $FILEN in ${{infiles[0]}} ${{infiles[1]}} ; do
FILE_Pool=${{FILE1/Rep1/Rep${{n}}3-Pool}}
samtools merge $FILE_Pool $FILEN $FILE2 2> {log.out}
samtools merge $FILE_Pool $FILEN $FILE2 2>> {log.out}
echo "Spliting bam file $FILE_Pool ..." > {log.out}
echo "Spliting bam file $FILE_Pool ..." >> {log.out}
nlinesPPR=$(samtools view $FILE_Pool | wc -l ) # Number of reads in the BAM file
nlinesPPR=$(( ($nlinesPPR+1)/2 )) # half that number
# This will shuffle the lines in the file and split it into two SAM files
samtools view $FILE_Pool | shuf | split -d -l ${{nlinesPPR}} - ${{FILE_Pool%%.*}} 2> {log.out}
samtools view $FILE_Pool | shuf | split -d -l ${{nlinesPPR}} - ${{FILE_Pool%%.*}} 2>> {log.out}
cat ${{FILE_Pool%%.*}}_header.sam ${{FILE_Pool%%.*}}00 | samtools view -bS - | samtools sort -o ${{FILE_Pool/Pool/PPR1}}
cat ${{FILE_Pool%%.*}}_header.sam ${{FILE_Pool%%.*}}01 | samtools view -bS - | samtools sort -o ${{FILE_Pool/Pool/PPR2}}
echo "Indexing self pseudos-replicates Bamfiles ${{file}}" > {log.out}
samtools index ${{FILE_Pool/Pool/PPR1}} 2> {log.out}
samtools index ${{FILE_Pool/Pool/PPR2}} 2> {log.out}
echo "Indexing self pseudos-replicates Bamfiles ${{file}}" >> {log.out}
samtools index ${{FILE_Pool/Pool/PPR1}} 2>> {log.out}
samtools index ${{FILE_Pool/Pool/PPR2}} 2>> {log.out}
# Cleaning of useless files
rm ${{FILE_Pool%%.*}}_header.sam
......@@ -96,11 +102,11 @@ rule preIDR_PPR:
n++
done
echo "Merging BAM files on maxi pool for pseudoreplicates..." > {log.out}
echo "Merging BAM files on maxi pool for pseudoreplicates..." >> {log.out}
FILE_MaxPool=${{outfiles[4]}}
samtools merge ${{FILE_MaxPool}}_tmp ${{infiles[0]}} ${{infiles[1]}} ${{infiles[2]}} 2> {log.out}
samtools sort -o ${{FILE_MaxPool}} ${{FILE_MaxPool}}_tmp 2> {log.out}
samtools index ${{FILE_MaxPool}} 2> {log.out}
samtools merge ${{FILE_MaxPool}}_tmp ${{infiles[0]}} ${{infiles[1]}} ${{infiles[2]}} 2>> {log.out}
samtools sort -o ${{FILE_MaxPool}} ${{FILE_MaxPool}}_tmp 2>> {log.out}
samtools index ${{FILE_MaxPool}} 2>> {log.out}
# Cleaning of useless files
......
......@@ -33,7 +33,7 @@ rule preIDR_pool:
"chipflow.img"
shell:
"""
set +o pipefail
set -euxo pipefail
#Merge treatment BAMS
echo "Merging BAM files for Input..." > {log.out}
......@@ -45,25 +45,25 @@ rule preIDR_pool:
FILE1=${{infiles[0]}}
FILE2=${{infiles[1]}}
samtools merge -f ${{outfiles[0]}}_tmp ${{FILE1}} ${{FILE2}} 2> {log.out}
samtools merge -f ${{outfiles[0]}}_tmp ${{FILE1}} ${{FILE2}} 2>> {log.out}
echo "Sorting pool Bamfile" > {log.out}
samtools sort -o ${{outfiles[0]}} ${{outfiles[0]}}_tmp 2> {log.out}
echo "Sorting pool Bamfile" >> {log.out}
samtools sort -o ${{outfiles[0]}} ${{outfiles[0]}}_tmp 2>> {log.out}
echo "Indexing pool Bamfiles" > {log.out}
samtools index ${{outfiles[0]}} 2> {log.out}
echo "Indexing pool Bamfiles" >> {log.out}
samtools index ${{outfiles[0]}} 2>> {log.out}
# Cleaning of useless files
echo "Removing unsort-files" > {log.out}
echo "Removing unsort-files" >> {log.out}
rm ${{outfiles[0]}}_tmp
# if more than 2 input, create also a maxi pool
if [[ ${{#infiles[@]}} > 3 ]] ; then
echo "Merging BAM files on maxi pool for Input..." > {log.out}
echo "Merging BAM files on maxi pool for Input..." >> {log.out}
FILE3=${{infiles[2]}}
samtools merge -f ${{outfiles[1]}}_tmp ${{FILE1}} ${{FILE2}} ${{FILE3}} 2> {log.out}
samtools sort -o ${{outfiles[1]}} ${{outfiles[1]}}_tmp 2> {log.out}
samtools index ${{outfiles[1]}} 2> {log.out}
samtools merge -f ${{outfiles[1]}}_tmp ${{FILE1}} ${{FILE2}} ${{FILE3}} 2>> {log.out}
samtools sort -o ${{outfiles[1]}} ${{outfiles[1]}}_tmp 2>> {log.out}
samtools index ${{outfiles[1]}} 2>> {log.out}
rm ${{outfiles[1]}}_tmp
fi
"""
......
......@@ -33,10 +33,15 @@ rule preIDR_SPR:
"chipflow.img"
shell:
"""
set +o pipefail
set -euxo pipefail
i=1
tmp="{output}"
outfiles=($tmp)
echo "Delete old files in case of rerun..." > {log.out}
rm ${{file%.*}}_header.sam
rm ${{file%.*}}00
rm ${{file%.*}}01
for file in {input} ; do
if [ $i -eq 1 ]
......@@ -50,11 +55,11 @@ rule preIDR_SPR:
SPR="SPR3"; SPR1=${{outfiles[3]}} ; SPR2=${{outfiles[4]}}
fi
echo "starting $SPR ..." > {log.out}
echo "starting $SPR ..." >> {log.out}
samtools view -H ${{file}} > ${{file%.*}}_header.sam
echo "Spliting bam file ${{file}} ..." > {log.out}
echo "Spliting bam file ${{file}} ..." >> {log.out}
nlinesSPR=$(samtools view ${{file}} | wc -l ) # Number of reads in the BAM file
......@@ -66,7 +71,7 @@ rule preIDR_SPR:
cat ${{file%.*}}_header.sam ${{file%.*}}01 | samtools view -bS | samtools sort -o ${{SPR2}}
echo "Indexing self pseudos-replicates Bamfiles ${{file}}" > {log.out}
echo "Indexing self pseudos-replicates Bamfiles ${{file}}" >> {log.out}
samtools index ${{SPR1}}
samtools index ${{SPR2}}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment