diff --git a/pyScripts/get_interface_residues_ppc_dimer_IN_PROGRESS.py b/pyScripts/get_interface_residues_ppc_dimer_IN_PROGRESS.py new file mode 100644 index 0000000000000000000000000000000000000000..a2f7eb30499755806a4b7414c0b25166c75842eb --- /dev/null +++ b/pyScripts/get_interface_residues_ppc_dimer_IN_PROGRESS.py @@ -0,0 +1,324 @@ +# -*- coding: utf-8 -*- + +""" +Author: Alexandra Moine-Franel +Date: April 2018 +Version: 2 + + + +1) IDENTIFY PROTEIN/PROTEIN INTERFACE RESIDUES + +2) SAVE EACH PROTEIN FIRST CHAIN IN SEPARATE FILE (after cleaning) + - Number of models + - Alternative atomic locations + - Heteroatoms + + + + +Usage: [Script.py] [PROTEIN/PROTEIN COMPLEX] -d (DISTANCE) +------------------------------------------------------------------------------- + +Arguments: +------------ +[PROTEIN/PROTEIN COMPLEX]: + Input (.pdb format) + Protein/protein complex + +(DISTANCE)- optionnal + Input (float or integer) + Distance threshold between the target and the partner + (6 angstroms - by defaut) + + +Return: +------------ +[PROTEIN/PROTEIN INTERFACE RESIDUES]: + Output (.txt format) "PDB_Chain1-Chain2_distance.txt" + (chain ID, residue name and residue ID included) + +[PROTEIN CHAIN]: + Output (.pdb format) "PDB_Chain1.pdb" + (heteroatom not included) +""" + +# ============================================================================= + +import os +import sys +import argparse +import textwrap +import logging +import time + +from Bio.PDB import Select, PDBIO +from Bio.PDB.PDBParser import PDBParser +import itertools + +# ============================================================================= + +LOG = logging.getLogger("IDENTIFY PROTEIN/PROTEIN INTERFACE RESIDUES") + +MODEL_FILENAME = 'pdb_multiple_models' +ALTLOC_FILENAME = 'pdb_altloc' +NODIMER_FILENAME = 'pdb_not_dimer' + +# ============================================================================= + +def main(pdb, distance): + start = time.time() + + pdb_code = file.strip().split('.pdb')[:-1] + structure = PDBParser().get_structure(pdb_code, file) + model = structure[0] + + # Check if multiple models available + if len(list(structure.get_models())) != 0: + try: + if not os.path.exists('{}.txt'.format(MODEL_FILENAME)): + with open('{}.txt'.format(MODEL_FILENAME), 'w') as outfile: + outfile.write(pdb_code) + else: + with open('{}.txt'.format(MODEL_FILENAME), 'a') as outfile: + outfile.write(str(pdb_code) + '\n') + except IOError, e: + LOG.error('{}'.format(e)) + sys.exit(1) + + # Check if 3D structure contains alternative atomic locations + if get_altloc(model) != 0: + try: + if not os.path.exists('{}.txt'.format(ALTLOC_FILENAME)): + with open('{}.txt'.format(ALTLOC_FILENAME), 'w') as outfile: + outfile.write(str(pdb_code) + '\n') + else: + with open('{}.txt'.format(ALTLOC_FILENAME), 'a') as outfile: + outfile.write(str(pdb_code) + '\n') + except IOError, e: + LOG.error('{}'.format(e)) + sys.exit(1) + else: + + # Get the first copy of each chain + target_partner_chain = select_first_chain(get_protein_assembly(structure)) + print(target_partner_chain) + + # Check if 3D structure contains more than two molecule IDs + if len(target_partner_chain) > 2: + try: + if not os.path.exists('{}.txt'.format(NODIMER_FILENAME_FILENAME)): + with open('{}.txt'.format(NODIMER_FILENAME_FILENAME), 'w') as outfile: + outfile.write(pdb_code) + else: + with open('{}.txt'.format(NODIMER_FILENAME_FILENAME), 'a') as outfile: + outfile.write(str(pdb_code) + '\n') + except IOError, e: + LOG.error('{}'.format(e)) + sys.exit(1) + + + # Remove heteroatom (protein/protein complex, no ligand) + for chain in target_partner_chain: + residue_to_remove = remove_hetatm(model, chain) + for residue in residue_to_remove: + model[chain].detach_child(residue[1]) + + + # Get all chain-chain combinations + subset = permutation(target_partner_chain) + + + # Calculate chain-chain distance (interaction patch) + for chain_pair in subset: + target = chain_pair[0] + partner = chain_pair[1] + + if len(model[target].get_list()) > 3 and \ + len(model[partner].get_list()) > 3: + + get_interface_residues(target, partner, model, distance) + + + # Save each chain in different file + for chain in target_partner_chain: + io = PDBIO() + io.set_structure(model) + io.save('{}_{}.pdb'.format(pdb_code, chain), select = ChainSelection(chain), \ + preserve_atom_numbering = True) + + + + end = time.time() + print('TIME: {}'.format(end - start)) + + +class ChainSelection(Select): + def __init__(self, chain): + self.chain = chain + + def accept_chain(self, chain): + if chain.get_id() == self.chain: + return 1 + else: + return 0 + + +def get_altloc(model): + altloc = 0 + for chain in model.child_list: + for residue in chain.get_list(): + if residue.is_disordered(): + altloc = altloc + 1 + break + return altloc + + +def get_protein_assembly(structure): + assembly = {} + for molecule in structure.header['compound'].keys(): + assembly[molecule] = [] + for chain in structure.header['compound'][molecule]['chain'].split(','): + assembly[molecule].append(chain.strip().upper()) + return assembly + # {'1': ['A', 'C', 'E'], '2': ['B', 'D']} + + +def select_first_chain(assembly): + chain_id = [] + for molecule in assembly.keys(): + chain_id.append(assembly[molecule][0]) + return chain_id + # ['A', 'B'] + + +def permutation(list_id): + subset = [] + for permutation in itertools.permutations(list_id, 2): + subset.append(permutation) + print subset + return subset + # [(['A'], ['B']), (['B'], ['A'])] + + +def remove_hetatm(model, chain): + residue_to_remove = [] + for residue in model[chain].get_residues(): + if residue.id[0] != ' ': + residue_to_remove.append((model[chain].id, residue.id)) + return residue_to_remove + + + +def get_interface_residues(target, partner, model, distance): + ires = [] + with open('{}_{}{}_{}.txt'.format(model.parent.id, \ + target, partner, distance), 'wb') as outfile: + + for rest in model[target].get_residues(): + if ''.join(map(str,(rest.get_parent().get_id(), '.', + rest.get_resname(), rest.get_id()[1]))): + for atomt in rest.get_atoms(): + + for atomp in model[partner].get_atoms(): + xt = atomt.get_coord()[0] + xp = atomp.get_coord()[0] + dist_x = calculate_euclidist_x(xt, xp) + + if dist_x < float(distance): + yt = atomt.get_coord()[1] + yp = atomp.get_coord()[1] + dist_xy = calculate_euclidist_xy(xt, yt, xp, yp) + + if dist_xy < float(distance): + zt = atomt.get_coord()[2] + zp = atomp.get_coord()[2] + dist_xyz = calculate_euclidist_xyz(xt, yt, zt, xp, yp, zp) + + if dist_xyz < float(distance): + ires.append(''.join(map(str, \ + (rest.get_parent().get_id(), '.', \ + rest.get_resname(), rest.get_id()[1]))) + + for element in ires: + outfile.write(element + '\n') + + + +def calculate_euclidist_x(x1, x2): + """ + Calculate the distance x between two 1D points + ---------------------------------------------------------------------- + Arguments: + [float]: coordinate x of target atom + coordinate x of partnet atom + Return: + [float]: distance + """ + dist = ((x1-x2)**2) ** 0.5 + return dist + + +def calculate_euclidist_xy(x1, y1, x2, y2): + """ + Calculate the distance xy between two 2D points + ---------------------------------------------------------------------- + Arguments: + [float]: coordinates xy of target atom + coordinates xy of partnet atom + Return: + [float]: distance + """ + dist = ((x1-x2)**2 + (y1-y2)**2) ** 0.5 + return dist + + +def calculate_euclidist_xyz(x1, y1, z1, x2, y2, z2): + """ + Calculate the euclidian distance xyz between two 3D points + ---------------------------------------------------------------------- + Arguments: + [float]: coordinates xyz of target atom + coordinates xyz of partnet atom + Return: + [float]: distance + """ + dist = ((x1-x2)**2 + (y1-y2)**2 + (z1-z2)**2) ** 0.5 + return dist + + + +def setlogger(): + LOG.setLevel(logging.INFO) + ch = logging.StreamHandler() + ch.setLevel(logging.INFO) + formatter = logging.Formatter('%(asctime)s - %(name)s - %(funcName)s - \ + %(levelname)s - %(message)s') + ch.setFormatter(formatter) + LOG.addHandler(ch) + +# ============================================================================= + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description = + textwrap.dedent(''' + 1) Format protein/protein complex PDB structure (no ligand) + 1.a) Number of models + 1.b) Alternative atomic locations + 1.c) Heteroatoms + 2) Identify protein/protein interface residues + (for combinations of first chain of each protein) + 3) Save each chain in separate files''') + parser.add_argument('pdb', help = + textwrap.dedent(''' Input [.pdb file]: PBD 3D structure + (with HEADER included)''')) + parser.add_argument('-d', dest = 'distance', help = + textwrap.dedent('''Input [float or integer]: distance threshold \ + {by default, 6 Angstroms}''') + options = parser.parse_args() + + if options.distance is None: + options.distance = 6.0 + + setlogger() + main(options.file, options.distance)