Skip to content
Snippets Groups Projects
Select Git revision
  • fbd575542881a044a50a57a068fdd8ee12abd638
  • master default protected
2 results

compute_genes_exon_lengths.py

Blame
  • user avatar
    Blaise Li authored
    fbd57554
    History
    compute_genes_exon_lengths.py 1.62 KiB
    #!/usr/bin/env python3
    # vim: set fileencoding=<utf-8> :
    """This script reads a gtf file and extract transcripts of different biotypes in separate bed files."""
    
    import argparse
    import os
    import sys
    import pandas as pd
    from libhts import gtf_2_genes_exon_lengths, repeat_bed_2_lengths, spikein_gtf_2_lengths
    
    
    OPJ = os.path.join
    STRIP = str.strip
    SPLIT = str.split
    
    
    def main():
        """Main function of the program."""
        parser = argparse.ArgumentParser(
            description=__doc__,
            formatter_class=argparse.ArgumentDefaultsHelpFormatter)
        parser.add_argument(
            "-a", "--annot_dir",
            required=True,
            help="Directory in which to read annotations and write the resulting file.")
        args = parser.parse_args()
        annot_dir = args.annot_dir
        # input files
        genes_gtf = OPJ(annot_dir, "genes.gtf")
        spikein_gtf = OPJ(annot_dir, "spike_ins.gtf")
        mCherry_gtf = OPJ(annot_dir, "mCherry.gtf")
        dte_bed = OPJ(annot_dir, "DNA_transposons_rmsk.bed")
        rte_bed = OPJ(annot_dir, "RNA_transposons_rmsk.bed")
        satel_bed = OPJ(annot_dir, "satellites_rmsk.bed")
        simrep_bed = OPJ(annot_dir, "simple_repeats_rmsk.bed")
        # output file
        exon_lengths = OPJ(annot_dir, "union_exon_lengths.txt")
        pd.concat((
            gtf_2_genes_exon_lengths(genes_gtf),
            spikein_gtf_2_lengths(spikein_gtf),
            gtf_2_genes_exon_lengths(mCherry_gtf),
            repeat_bed_2_lengths(dte_bed),
            repeat_bed_2_lengths(rte_bed),
            repeat_bed_2_lengths(satel_bed),
            repeat_bed_2_lengths(simrep_bed))).to_csv(exon_lengths, sep="\t")       
        return 0
    
    
    if __name__ == "__main__":
        sys.exit(main())