Skip to content
Snippets Groups Projects
Select Git revision
  • 1f2f3aeacb9493addfc2b1bd617ad2b3a6b87411
  • master default protected
  • jkende-master-patch-56415
3 results

rebase.py

Blame
  • rebase.py 1.37 KiB
    #!/usr/bin/env python3
    
    def rebase_parser(rebase_file):
        """
        :param rebase_file: the rebase file to parse
        :type rebase_file: file object
        :return: at each call yields a tuple (str enz name, str binding site)
        :rtype: iterator
        """
        def clean_seq(seq):
            """
            remove each characters which are not a base
            """
            clean_seq = ''
            for char in seq:
                if char in 'ACGT':
                    clean_seq += char
            return clean_seq
    
        for line in rebase_file:
            fields = line.split()
            name = fields[0]
            seq = clean_seq(fields[2])
            yield (name, seq)
    
    
    def rebase2dict(rebase_path):
        """
        :param rebase_path: the path to rebase file to parse
        :type rebase_path: str
        :return: a dict with items (str enz name, str binding site)
        """
        with open(rebase_path, 'r') as rebase_input:
            # enz_dict = {}
            # for (name, seq) in rebase_parser(rebase_input):
            #     enz_dict[name] = seq
            enz_dict = dict(rebase_parser(rebase_input))
        return enz_dict
    
    
    if __name__ == '__main__':
        import sys
        import os.path
    
        if len(sys.argv) != 2:
            sys.exit("Usage: rebase.py rebase_file")
        rebase_path = sys.argv[1]
        if not os.path.exists(rebase_path):
            sys.exit("No such file: {}".format(rebase_path))
    
        enz_dict = rebase2dict(rebase_path)
        print(enz_dict)