diff --git a/pyScripts/get_interface_residues_ppc_dimer_IN_PROGRESS.py b/pyScripts/get_interface_residues_ppc_dimer_IN_PROGRESS.py index a2f7eb30499755806a4b7414c0b25166c75842eb..668838501684cf00b2e0c79846a473d4ecfd5391 100644 --- a/pyScripts/get_interface_residues_ppc_dimer_IN_PROGRESS.py +++ b/pyScripts/get_interface_residues_ppc_dimer_IN_PROGRESS.py @@ -10,37 +10,37 @@ Version: 2 1) IDENTIFY PROTEIN/PROTEIN INTERFACE RESIDUES 2) SAVE EACH PROTEIN FIRST CHAIN IN SEPARATE FILE (after cleaning) - - Number of models - - Alternative atomic locations - - Heteroatoms + - Number of models + - Alternative atomic locations + - Heteroatoms -Usage: [Script.py] [PROTEIN/PROTEIN COMPLEX] -d (DISTANCE) +Usage: [Script.py] [PROTEIN/PROTEIN COMPLEX] -d (DISTANCE) ------------------------------------------------------------------------------- Arguments: ------------ [PROTEIN/PROTEIN COMPLEX]: - Input (.pdb format) - Protein/protein complex + Input (.pdb format) + Protein/protein complex (DISTANCE)- optionnal - Input (float or integer) - Distance threshold between the target and the partner - (6 angstroms - by defaut) + Input (float or integer) + Distance threshold between the target and the partner + (6 angstroms - by defaut) Return: ------------ [PROTEIN/PROTEIN INTERFACE RESIDUES]: - Output (.txt format) "PDB_Chain1-Chain2_distance.txt" - (chain ID, residue name and residue ID included) + Output (.txt format) "PDB_Chain1-Chain2_distance.txt" + (chain ID, residue name and residue ID included) [PROTEIN CHAIN]: - Output (.pdb format) "PDB_Chain1.pdb" - (heteroatom not included) + Output (.pdb format) "PDB_Chain1.pdb" + (heteroatom not included) """ # ============================================================================= @@ -67,258 +67,255 @@ NODIMER_FILENAME = 'pdb_not_dimer' # ============================================================================= def main(pdb, distance): - start = time.time() - - pdb_code = file.strip().split('.pdb')[:-1] - structure = PDBParser().get_structure(pdb_code, file) - model = structure[0] - - # Check if multiple models available - if len(list(structure.get_models())) != 0: - try: - if not os.path.exists('{}.txt'.format(MODEL_FILENAME)): - with open('{}.txt'.format(MODEL_FILENAME), 'w') as outfile: - outfile.write(pdb_code) - else: - with open('{}.txt'.format(MODEL_FILENAME), 'a') as outfile: - outfile.write(str(pdb_code) + '\n') - except IOError, e: - LOG.error('{}'.format(e)) - sys.exit(1) - - # Check if 3D structure contains alternative atomic locations - if get_altloc(model) != 0: - try: - if not os.path.exists('{}.txt'.format(ALTLOC_FILENAME)): - with open('{}.txt'.format(ALTLOC_FILENAME), 'w') as outfile: - outfile.write(str(pdb_code) + '\n') - else: - with open('{}.txt'.format(ALTLOC_FILENAME), 'a') as outfile: - outfile.write(str(pdb_code) + '\n') - except IOError, e: - LOG.error('{}'.format(e)) - sys.exit(1) - else: - - # Get the first copy of each chain - target_partner_chain = select_first_chain(get_protein_assembly(structure)) - print(target_partner_chain) - - # Check if 3D structure contains more than two molecule IDs - if len(target_partner_chain) > 2: - try: - if not os.path.exists('{}.txt'.format(NODIMER_FILENAME_FILENAME)): - with open('{}.txt'.format(NODIMER_FILENAME_FILENAME), 'w') as outfile: - outfile.write(pdb_code) - else: - with open('{}.txt'.format(NODIMER_FILENAME_FILENAME), 'a') as outfile: - outfile.write(str(pdb_code) + '\n') - except IOError, e: - LOG.error('{}'.format(e)) - sys.exit(1) - - - # Remove heteroatom (protein/protein complex, no ligand) - for chain in target_partner_chain: - residue_to_remove = remove_hetatm(model, chain) - for residue in residue_to_remove: - model[chain].detach_child(residue[1]) - - - # Get all chain-chain combinations - subset = permutation(target_partner_chain) - - - # Calculate chain-chain distance (interaction patch) - for chain_pair in subset: - target = chain_pair[0] - partner = chain_pair[1] - - if len(model[target].get_list()) > 3 and \ - len(model[partner].get_list()) > 3: - - get_interface_residues(target, partner, model, distance) - - - # Save each chain in different file - for chain in target_partner_chain: - io = PDBIO() - io.set_structure(model) - io.save('{}_{}.pdb'.format(pdb_code, chain), select = ChainSelection(chain), \ - preserve_atom_numbering = True) - - - - end = time.time() - print('TIME: {}'.format(end - start)) + start = time.time() + + pdb_code = pdb.strip().split('.pdb')[:-1] + structure = PDBParser().get_structure(pdb_code, pdb) + model = structure[0] + + # Check if multiple models available + if len(list(structure.get_models())) != 0: + try: + if not os.path.exists('{}.txt'.format(MODEL_FILENAME)): + with open('{}.txt'.format(MODEL_FILENAME), 'w') as outfile: + outfile.write(pdb_code) + else: + with open('{}.txt'.format(MODEL_FILENAME), 'a') as outfile: + outfile.write(str(pdb_code) + '\n') + except IOError as e: + LOG.error('{}'.format(e)) + sys.exit(1) + + # Check if 3D structure contains alternative atomic locations + if get_altloc(model) != 0: + try: + if not os.path.exists('{}.txt'.format(ALTLOC_FILENAME)): + with open('{}.txt'.format(ALTLOC_FILENAME), 'w') as outfile: + outfile.write(str(pdb_code) + '\n') + else: + with open('{}.txt'.format(ALTLOC_FILENAME), 'a') as outfile: + outfile.write(str(pdb_code) + '\n') + except IOError as e: + LOG.error('{}'.format(e)) + sys.exit(1) + else: + + # Get the first copy of each chain + target_partner_chain = select_first_chain(get_protein_assembly(structure)) + print(target_partner_chain) + + # Check if 3D structure contains more than two molecule IDs + if len(target_partner_chain) > 2: + try: + if not os.path.exists('{}.txt'.format(NODIMER_FILENAME)): + with open('{}.txt'.format(NODIMER_FILENAME), 'w') as outfile: + outfile.write(pdb_code) + else: + with open('{}.txt'.format(NODIMER_FILENAME), 'a') as outfile: + outfile.write(str(pdb_code) + '\n') + except IOError as e: + LOG.error('{}'.format(e)) + sys.exit(1) + + + # Remove heteroatom (protein/protein complex, no ligand) + for chain in target_partner_chain: + residue_to_remove = remove_hetatm(model, chain) + for residue in residue_to_remove: + model[chain].detach_child(residue[1]) + + + # Get all chain-chain combinations + subset = permutation(target_partner_chain) + + + # Calculate chain-chain distance (interaction patch) + for chain_pair in subset: + target = chain_pair[0] + partner = chain_pair[1] + + if len(model[target].get_list()) > 3 and len(model[partner].get_list()) > 3: + + get_interface_residues(target, partner, model, distance) + + + # Save each chain in different file + for chain in target_partner_chain: + io = PDBIO() + io.set_structure(model) + io.save('{}_{}.pdb'.format(pdb_code, chain), select = ChainSelection(chain), + preserve_atom_numbering = True) + + + + end = time.time() + print('TIME: {}'.format(end - start)) class ChainSelection(Select): - def __init__(self, chain): - self.chain = chain + def __init__(self, chain): + self.chain = chain - def accept_chain(self, chain): - if chain.get_id() == self.chain: - return 1 - else: - return 0 + def accept_chain(self, chain): + if chain.get_id() == self.chain: + return 1 + else: + return 0 def get_altloc(model): - altloc = 0 - for chain in model.child_list: - for residue in chain.get_list(): - if residue.is_disordered(): - altloc = altloc + 1 - break - return altloc + altloc = 0 + for chain in model.child_list: + for residue in chain.get_list(): + if residue.is_disordered(): + altloc = altloc + 1 + break + return altloc def get_protein_assembly(structure): - assembly = {} - for molecule in structure.header['compound'].keys(): - assembly[molecule] = [] - for chain in structure.header['compound'][molecule]['chain'].split(','): - assembly[molecule].append(chain.strip().upper()) - return assembly - # {'1': ['A', 'C', 'E'], '2': ['B', 'D']} + assembly = {} + for molecule in structure.header['compound'].keys(): + assembly[molecule] = [] + for chain in structure.header['compound'][molecule]['chain'].split(','): + assembly[molecule].append(chain.strip().upper()) + return assembly + # {'1': ['A', 'C', 'E'], '2': ['B', 'D']} def select_first_chain(assembly): - chain_id = [] - for molecule in assembly.keys(): - chain_id.append(assembly[molecule][0]) - return chain_id - # ['A', 'B'] + chain_id = [] + for molecule in assembly.keys(): + chain_id.append(assembly[molecule][0]) + return chain_id + # ['A', 'B'] def permutation(list_id): - subset = [] - for permutation in itertools.permutations(list_id, 2): - subset.append(permutation) - print subset - return subset - # [(['A'], ['B']), (['B'], ['A'])] + subset = [] + for permutation in itertools.permutations(list_id, 2): + subset.append(permutation) + print(subset) + return subset + # [(['A'], ['B']), (['B'], ['A'])] def remove_hetatm(model, chain): - residue_to_remove = [] - for residue in model[chain].get_residues(): - if residue.id[0] != ' ': - residue_to_remove.append((model[chain].id, residue.id)) - return residue_to_remove + residue_to_remove = [] + for residue in model[chain].get_residues(): + if residue.id[0] != ' ': + residue_to_remove.append((model[chain].id, residue.id)) + return residue_to_remove def get_interface_residues(target, partner, model, distance): - ires = [] - with open('{}_{}{}_{}.txt'.format(model.parent.id, \ - target, partner, distance), 'wb') as outfile: + ires = [] + with open('{}_{}{}_{}.txt'.format(model.parent.id, target, partner, distance), 'wb') as outfile: - for rest in model[target].get_residues(): - if ''.join(map(str,(rest.get_parent().get_id(), '.', - rest.get_resname(), rest.get_id()[1]))): - for atomt in rest.get_atoms(): - - for atomp in model[partner].get_atoms(): - xt = atomt.get_coord()[0] - xp = atomp.get_coord()[0] - dist_x = calculate_euclidist_x(xt, xp) + for rest in model[target].get_residues(): + if ''.join(map(str,(rest.get_parent().get_id(), '.', + rest.get_resname(), rest.get_id()[1]))): + for atomt in rest.get_atoms(): + + for atomp in model[partner].get_atoms(): + xt = atomt.get_coord()[0] + xp = atomp.get_coord()[0] + dist_x = calculate_euclidist_x(xt, xp) - if dist_x < float(distance): - yt = atomt.get_coord()[1] - yp = atomp.get_coord()[1] - dist_xy = calculate_euclidist_xy(xt, yt, xp, yp) + if dist_x < float(distance): + yt = atomt.get_coord()[1] + yp = atomp.get_coord()[1] + dist_xy = calculate_euclidist_xy(xt, yt, xp, yp) - if dist_xy < float(distance): - zt = atomt.get_coord()[2] - zp = atomp.get_coord()[2] - dist_xyz = calculate_euclidist_xyz(xt, yt, zt, xp, yp, zp) + if dist_xy < float(distance): + zt = atomt.get_coord()[2] + zp = atomp.get_coord()[2] + dist_xyz = calculate_euclidist_xyz(xt, yt, zt, xp, yp, zp) - if dist_xyz < float(distance): - ires.append(''.join(map(str, \ - (rest.get_parent().get_id(), '.', \ - rest.get_resname(), rest.get_id()[1]))) + if dist_xyz < float(distance): + ires.append(''.join(map(str, + (rest.get_parent().get_id(), '.', + rest.get_resname(), rest.get_id()[1])))) - for element in ires: - outfile.write(element + '\n') + for element in ires: + outfile.write(element + '\n') def calculate_euclidist_x(x1, x2): - """ - Calculate the distance x between two 1D points - ---------------------------------------------------------------------- - Arguments: - [float]: coordinate x of target atom - coordinate x of partnet atom - Return: - [float]: distance - """ - dist = ((x1-x2)**2) ** 0.5 - return dist + """ + Calculate the distance x between two 1D points + ---------------------------------------------------------------------- + Arguments: + [float]: coordinate x of target atom + coordinate x of partnet atom + Return: + [float]: distance + """ + dist = ((x1-x2)**2) ** 0.5 + return dist def calculate_euclidist_xy(x1, y1, x2, y2): - """ - Calculate the distance xy between two 2D points - ---------------------------------------------------------------------- - Arguments: - [float]: coordinates xy of target atom - coordinates xy of partnet atom - Return: - [float]: distance - """ - dist = ((x1-x2)**2 + (y1-y2)**2) ** 0.5 - return dist - - + """ + Calculate the distance xy between two 2D points + ---------------------------------------------------------------------- + Arguments: + [float]: coordinates xy of target atom + coordinates xy of partnet atom + Return: + [float]: distance + """ + dist = ((x1-x2)**2 + (y1-y2)**2) ** 0.5 + return dist + + def calculate_euclidist_xyz(x1, y1, z1, x2, y2, z2): - """ - Calculate the euclidian distance xyz between two 3D points - ---------------------------------------------------------------------- - Arguments: - [float]: coordinates xyz of target atom - coordinates xyz of partnet atom - Return: - [float]: distance - """ - dist = ((x1-x2)**2 + (y1-y2)**2 + (z1-z2)**2) ** 0.5 - return dist + """ + Calculate the euclidian distance xyz between two 3D points + ---------------------------------------------------------------------- + Arguments: + [float]: coordinates xyz of target atom + coordinates xyz of partnet atom + Return: + [float]: distance + """ + dist = ((x1-x2)**2 + (y1-y2)**2 + (z1-z2)**2) ** 0.5 + return dist def setlogger(): - LOG.setLevel(logging.INFO) - ch = logging.StreamHandler() - ch.setLevel(logging.INFO) - formatter = logging.Formatter('%(asctime)s - %(name)s - %(funcName)s - \ - %(levelname)s - %(message)s') - ch.setFormatter(formatter) - LOG.addHandler(ch) + LOG.setLevel(logging.INFO) + ch = logging.StreamHandler() + ch.setLevel(logging.INFO) + formatter = logging.Formatter('%(asctime)s - %(name)s - %(funcName)s - %(levelname)s - %(message)s') + ch.setFormatter(formatter) + LOG.addHandler(ch) # ============================================================================= if __name__ == '__main__': - parser = argparse.ArgumentParser(description = - textwrap.dedent(''' - 1) Format protein/protein complex PDB structure (no ligand) - 1.a) Number of models - 1.b) Alternative atomic locations - 1.c) Heteroatoms - 2) Identify protein/protein interface residues - (for combinations of first chain of each protein) - 3) Save each chain in separate files''') - parser.add_argument('pdb', help = - textwrap.dedent(''' Input [.pdb file]: PBD 3D structure - (with HEADER included)''')) - parser.add_argument('-d', dest = 'distance', help = - textwrap.dedent('''Input [float or integer]: distance threshold \ - {by default, 6 Angstroms}''') - options = parser.parse_args() - - if options.distance is None: - options.distance = 6.0 - - setlogger() - main(options.file, options.distance) + parser = argparse.ArgumentParser(description = + textwrap.dedent(''' + 1) Format protein/protein complex PDB structure (no ligand) + 1.a) Number of models + 1.b) Alternative atomic locations + 1.c) Heteroatoms + 2) Identify protein/protein interface residues + (for combinations of first chain of each protein) + 3) Save each chain in separate files''')) + parser.add_argument('pdb', help = + textwrap.dedent(''' Input [.pdb file]: PBD 3D structure + (with HEADER included)''')) + parser.add_argument('-d', dest = 'distance', help = + textwrap.dedent('''Input [float or integer]: distance threshold + {by default, 6 Angstroms}''')) + options = parser.parse_args() + + if options.distance is None: + options.distance = 6.0 + + setlogger() + main(options.file, options.distance)