From da94c9ce1c200ea42893ec804206a291d8165401 Mon Sep 17 00:00:00 2001
From: Blaise Li <blaise.li@normalesup.org>
Date: Tue, 13 Dec 2016 14:12:40 +0100
Subject: [PATCH] Added scripts to run and parse fastqc.

---
 do_qc.sh        | 35 +++++++++++++++++++++++++++++++++++
 parse_fastqc.py | 22 ++++++++++++++++++++++
 2 files changed, 57 insertions(+)
 create mode 100755 do_qc.sh
 create mode 100755 parse_fastqc.py

diff --git a/do_qc.sh b/do_qc.sh
new file mode 100755
index 0000000..54956f9
--- /dev/null
+++ b/do_qc.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# http://linuxcommand.org/wss0150.php
+PROGNAME=$(basename $0)
+
+function error_exit
+{
+#	----------------------------------------------------------------
+#	Function for exit due to fatal program error
+#		Accepts 1 argument:
+#			string containing descriptive error message
+#	----------------------------------------------------------------
+    echo "${PROGNAME}: ${1:-"Unknown Error"}" 1>&2
+    exit 1
+}
+
+lib=${1}
+report_dir="${lib}_fastqc"
+
+if [ ! -e ${report_dir}.zip -a ! -e ${report_dir} ]
+then
+    cmd="fastqc ${lib}.fastq.gz"
+    nice -n 19 ionice -c2 -n7 ${cmd} || error_exit "${cmd} failed"
+fi
+
+if [ ! -e ${report_dir} ]
+then
+    unzip ${report_dir}.zip
+fi
+
+parse_fastqc.py ${report_dir}/fastqc_data.txt \
+    > ${report_dir}/${lib}_overrepresented.fasta \
+    || error_exit "fastqc result parsing failed for ${lib}"
+
+exit 0
diff --git a/parse_fastqc.py b/parse_fastqc.py
new file mode 100755
index 0000000..079a158
--- /dev/null
+++ b/parse_fastqc.py
@@ -0,0 +1,22 @@
+#!/usr/bin/env python3
+
+import sys
+
+with open(sys.argv[1], "r") as fqc:
+    line = fqc.readline()
+    assert line.startswith("##FastQC")
+    while not line.startswith(">>Overrepresented sequences"):
+        line = fqc.readline()
+    seq, count, percent, source = fqc.readline().strip().split("\t")
+    assert seq == "#Sequence"
+    #cumul_percent = 0
+    line = fqc.readline()
+    order = 0
+    while not line.startswith(">>END_MODULE"):
+        seq, _, percent, _ = line.strip().split("\t")
+        order += 1
+        #cumul_percent += float(percent)
+        print(">Over_represented_%d (%s)\n%s" % (order, percent, seq))
+        line = fqc.readline()
+
+sys.exit(0)
-- 
GitLab