Skip to content
Snippets Groups Projects
Commit 234a0676 authored by Blaise Li's avatar Blaise Li
Browse files

Add 'gtf2bed/' from commit '1059f83a'

git-subtree-dir: gtf2bed
git-subtree-mainline: 0f82a98e
git-subtree-split: 1059f83a
parents 0f82a98e 1059f83a
No related branches found
No related tags found
No related merge requests found
-- import System.Environment (getArgs)
-- https://mail.haskell.org/pipermail/beginners/2011-October/008787.html
-- import Data.List.Utils (join, split)
-- import qualified Data.Text.Lazy as T
-- faster than Lazy
-- import qualified Data.Text as T
import qualified Data.ByteString.Lazy.Char8 as C
-- import Numeric (readDec)
-- fastRead :: T.Text -> Int
-- fastRead t = case readDec (T.unpack t) of
-- [(n, "")] -> n
fastRead :: C.ByteString -> Int
fastRead t = let Just (n, _) = C.readInt t in n
splitFields :: C.ByteString -> [C.ByteString]
splitFields = C.split '\t'
splitAnnots :: C.ByteString -> [C.ByteString]
-- C.init is to avoid the last ";"
-- C.dropWhile (== ' ') is to skip the spaces that followed the ';'
splitAnnots = map (C.dropWhile (== ' ')) . C.split ';' . C.init
splitAnnot :: C.ByteString -> (C.ByteString, C.ByteString)
splitAnnot t = (k, v)
where [k, v] = C.split ' ' t
notGeneId :: (C.ByteString, C.ByteString) -> Bool
notGeneId p = fst p /= C.pack "gene_id"
joinFields :: [C.ByteString] -> C.ByteString
joinFields = C.intercalate (C.pack "\t")
processLine :: C.ByteString -> C.ByteString
processLine l = bedline
where bedline = joinFields [chrom, bedStart, end, geneId, score, strand]
bedStart = C.pack $ show ((fastRead start :: Int) - 1)
geneId = C.init . C.tail . snd . head . dropWhile notGeneId $ annots
annots = map splitAnnot (splitAnnots annotField)
(chrom : _ : _ : start : end : score : strand : _ : annotField : _) = splitFields l
main :: IO ()
-- Main processing done as String to limit memory usage by strict Text
main = C.interact (C.unlines . map processLine . C.lines)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment