From da94c9ce1c200ea42893ec804206a291d8165401 Mon Sep 17 00:00:00 2001 From: Blaise Li <blaise.li@normalesup.org> Date: Tue, 13 Dec 2016 14:12:40 +0100 Subject: [PATCH] Added scripts to run and parse fastqc. --- do_qc.sh | 35 +++++++++++++++++++++++++++++++++++ parse_fastqc.py | 22 ++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100755 do_qc.sh create mode 100755 parse_fastqc.py diff --git a/do_qc.sh b/do_qc.sh new file mode 100755 index 0000000..54956f9 --- /dev/null +++ b/do_qc.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +# http://linuxcommand.org/wss0150.php +PROGNAME=$(basename $0) + +function error_exit +{ +# ---------------------------------------------------------------- +# Function for exit due to fatal program error +# Accepts 1 argument: +# string containing descriptive error message +# ---------------------------------------------------------------- + echo "${PROGNAME}: ${1:-"Unknown Error"}" 1>&2 + exit 1 +} + +lib=${1} +report_dir="${lib}_fastqc" + +if [ ! -e ${report_dir}.zip -a ! -e ${report_dir} ] +then + cmd="fastqc ${lib}.fastq.gz" + nice -n 19 ionice -c2 -n7 ${cmd} || error_exit "${cmd} failed" +fi + +if [ ! -e ${report_dir} ] +then + unzip ${report_dir}.zip +fi + +parse_fastqc.py ${report_dir}/fastqc_data.txt \ + > ${report_dir}/${lib}_overrepresented.fasta \ + || error_exit "fastqc result parsing failed for ${lib}" + +exit 0 diff --git a/parse_fastqc.py b/parse_fastqc.py new file mode 100755 index 0000000..079a158 --- /dev/null +++ b/parse_fastqc.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python3 + +import sys + +with open(sys.argv[1], "r") as fqc: + line = fqc.readline() + assert line.startswith("##FastQC") + while not line.startswith(">>Overrepresented sequences"): + line = fqc.readline() + seq, count, percent, source = fqc.readline().strip().split("\t") + assert seq == "#Sequence" + #cumul_percent = 0 + line = fqc.readline() + order = 0 + while not line.startswith(">>END_MODULE"): + seq, _, percent, _ = line.strip().split("\t") + order += 1 + #cumul_percent += float(percent) + print(">Over_represented_%d (%s)\n%s" % (order, percent, seq)) + line = fqc.readline() + +sys.exit(0) -- GitLab