Select Git revision
      
  sqlAnnotToTxt.py
              Thomas  BIGOT authored 
   sqlAnnotToTxt.py  1.62 KiB 
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import glob,re,argparse,os,sqlite3
from taxadb.accessionid import AccessionID
from taxadb.taxid import TaxID
from collections import defaultdict
parser = argparse.ArgumentParser()
parser.add_argument("sqlite", help="sqlite DB")
parser.add_argument("txtDir", help="directory in which produce annotations")
parser.add_argument("taxadb", help="taxadb file")
parser.add_argument("--prefix", help="file prefix. Eg: “FAM”, default none",type=str,default="")
args = parser.parse_args()
conn = sqlite3.connect(args.sqlite)
tdb_taxid = TaxID(dbtype='sqlite', dbname=args.taxadb)
ccf = conn.cursor()
ckw = conn.cursor()
for currFamR in ccf.execute('SELECT * FROM family'):
    print("b")
    currF,currSize,currNbseq,currLCA = currFamR
    o = open("{dir}/{prefix}{id}.txt".format(dir=args.txtDir,prefix=args.prefix,id=currF),"w")
    lineage = tdb_taxid.lineage_name(int(currLCA), reverse=True)
    o.write("LENGTH\t{length}\nLCA\t{lca}\nNBSEQ\t{nbseq}\nKEYWORDS:\n".format(length=currSize,lca=("::".join(tdb_taxid.lineage_name(int(currLCA), reverse=True)) if lineage else ""),nbseq=currNbseq))
    for currFreq,currKw in ckw.execute('select freq,str from fam_kw JOIN keyword ON fam_kw.kwId = keyword.id WHERE fam_kw.famID=? order by freq desc',(currF,)):
        o.write("{kw}\t{count}\n".format(kw=currKw.strip("[]()"),count=currFreq))
    o.write("SEQUENCES:\n")
    with open("aligned/FAM{famName}.fasta".format(famName = str(currF).zfill(6))) as f:
        for line in f:
            if line.startswith(">"):
                o.write(line[1:])
    o.close()
    print("e")
    
    
conn.close()