Skip to content
Snippets Groups Projects
Commit 1e790032 authored by Fabien  MAREUIL's avatar Fabien MAREUIL
Browse files

reformat

Former-commit-id: dbc51f36f32ea66e9d4c7f9ceda01e300ca1db4a
parent a6a52657
No related branches found
No related tags found
No related merge requests found
......@@ -10,37 +10,37 @@ Version: 2
1) IDENTIFY PROTEIN/PROTEIN INTERFACE RESIDUES
2) SAVE EACH PROTEIN FIRST CHAIN IN SEPARATE FILE (after cleaning)
- Number of models
- Alternative atomic locations
- Heteroatoms
- Number of models
- Alternative atomic locations
- Heteroatoms
Usage: [Script.py] [PROTEIN/PROTEIN COMPLEX] -d (DISTANCE)
Usage: [Script.py] [PROTEIN/PROTEIN COMPLEX] -d (DISTANCE)
-------------------------------------------------------------------------------
Arguments:
------------
[PROTEIN/PROTEIN COMPLEX]:
Input (.pdb format)
Protein/protein complex
Input (.pdb format)
Protein/protein complex
(DISTANCE)- optionnal
Input (float or integer)
Distance threshold between the target and the partner
(6 angstroms - by defaut)
Input (float or integer)
Distance threshold between the target and the partner
(6 angstroms - by defaut)
Return:
------------
[PROTEIN/PROTEIN INTERFACE RESIDUES]:
Output (.txt format) "PDB_Chain1-Chain2_distance.txt"
(chain ID, residue name and residue ID included)
Output (.txt format) "PDB_Chain1-Chain2_distance.txt"
(chain ID, residue name and residue ID included)
[PROTEIN CHAIN]:
Output (.pdb format) "PDB_Chain1.pdb"
(heteroatom not included)
Output (.pdb format) "PDB_Chain1.pdb"
(heteroatom not included)
"""
# =============================================================================
......@@ -67,258 +67,255 @@ NODIMER_FILENAME = 'pdb_not_dimer'
# =============================================================================
def main(pdb, distance):
start = time.time()
pdb_code = file.strip().split('.pdb')[:-1]
structure = PDBParser().get_structure(pdb_code, file)
model = structure[0]
# Check if multiple models available
if len(list(structure.get_models())) != 0:
try:
if not os.path.exists('{}.txt'.format(MODEL_FILENAME)):
with open('{}.txt'.format(MODEL_FILENAME), 'w') as outfile:
outfile.write(pdb_code)
else:
with open('{}.txt'.format(MODEL_FILENAME), 'a') as outfile:
outfile.write(str(pdb_code) + '\n')
except IOError, e:
LOG.error('{}'.format(e))
sys.exit(1)
# Check if 3D structure contains alternative atomic locations
if get_altloc(model) != 0:
try:
if not os.path.exists('{}.txt'.format(ALTLOC_FILENAME)):
with open('{}.txt'.format(ALTLOC_FILENAME), 'w') as outfile:
outfile.write(str(pdb_code) + '\n')
else:
with open('{}.txt'.format(ALTLOC_FILENAME), 'a') as outfile:
outfile.write(str(pdb_code) + '\n')
except IOError, e:
LOG.error('{}'.format(e))
sys.exit(1)
else:
# Get the first copy of each chain
target_partner_chain = select_first_chain(get_protein_assembly(structure))
print(target_partner_chain)
# Check if 3D structure contains more than two molecule IDs
if len(target_partner_chain) > 2:
try:
if not os.path.exists('{}.txt'.format(NODIMER_FILENAME_FILENAME)):
with open('{}.txt'.format(NODIMER_FILENAME_FILENAME), 'w') as outfile:
outfile.write(pdb_code)
else:
with open('{}.txt'.format(NODIMER_FILENAME_FILENAME), 'a') as outfile:
outfile.write(str(pdb_code) + '\n')
except IOError, e:
LOG.error('{}'.format(e))
sys.exit(1)
# Remove heteroatom (protein/protein complex, no ligand)
for chain in target_partner_chain:
residue_to_remove = remove_hetatm(model, chain)
for residue in residue_to_remove:
model[chain].detach_child(residue[1])
# Get all chain-chain combinations
subset = permutation(target_partner_chain)
# Calculate chain-chain distance (interaction patch)
for chain_pair in subset:
target = chain_pair[0]
partner = chain_pair[1]
if len(model[target].get_list()) > 3 and \
len(model[partner].get_list()) > 3:
get_interface_residues(target, partner, model, distance)
# Save each chain in different file
for chain in target_partner_chain:
io = PDBIO()
io.set_structure(model)
io.save('{}_{}.pdb'.format(pdb_code, chain), select = ChainSelection(chain), \
preserve_atom_numbering = True)
end = time.time()
print('TIME: {}'.format(end - start))
start = time.time()
pdb_code = pdb.strip().split('.pdb')[:-1]
structure = PDBParser().get_structure(pdb_code, pdb)
model = structure[0]
# Check if multiple models available
if len(list(structure.get_models())) != 0:
try:
if not os.path.exists('{}.txt'.format(MODEL_FILENAME)):
with open('{}.txt'.format(MODEL_FILENAME), 'w') as outfile:
outfile.write(pdb_code)
else:
with open('{}.txt'.format(MODEL_FILENAME), 'a') as outfile:
outfile.write(str(pdb_code) + '\n')
except IOError as e:
LOG.error('{}'.format(e))
sys.exit(1)
# Check if 3D structure contains alternative atomic locations
if get_altloc(model) != 0:
try:
if not os.path.exists('{}.txt'.format(ALTLOC_FILENAME)):
with open('{}.txt'.format(ALTLOC_FILENAME), 'w') as outfile:
outfile.write(str(pdb_code) + '\n')
else:
with open('{}.txt'.format(ALTLOC_FILENAME), 'a') as outfile:
outfile.write(str(pdb_code) + '\n')
except IOError as e:
LOG.error('{}'.format(e))
sys.exit(1)
else:
# Get the first copy of each chain
target_partner_chain = select_first_chain(get_protein_assembly(structure))
print(target_partner_chain)
# Check if 3D structure contains more than two molecule IDs
if len(target_partner_chain) > 2:
try:
if not os.path.exists('{}.txt'.format(NODIMER_FILENAME)):
with open('{}.txt'.format(NODIMER_FILENAME), 'w') as outfile:
outfile.write(pdb_code)
else:
with open('{}.txt'.format(NODIMER_FILENAME), 'a') as outfile:
outfile.write(str(pdb_code) + '\n')
except IOError as e:
LOG.error('{}'.format(e))
sys.exit(1)
# Remove heteroatom (protein/protein complex, no ligand)
for chain in target_partner_chain:
residue_to_remove = remove_hetatm(model, chain)
for residue in residue_to_remove:
model[chain].detach_child(residue[1])
# Get all chain-chain combinations
subset = permutation(target_partner_chain)
# Calculate chain-chain distance (interaction patch)
for chain_pair in subset:
target = chain_pair[0]
partner = chain_pair[1]
if len(model[target].get_list()) > 3 and len(model[partner].get_list()) > 3:
get_interface_residues(target, partner, model, distance)
# Save each chain in different file
for chain in target_partner_chain:
io = PDBIO()
io.set_structure(model)
io.save('{}_{}.pdb'.format(pdb_code, chain), select = ChainSelection(chain),
preserve_atom_numbering = True)
end = time.time()
print('TIME: {}'.format(end - start))
class ChainSelection(Select):
def __init__(self, chain):
self.chain = chain
def __init__(self, chain):
self.chain = chain
def accept_chain(self, chain):
if chain.get_id() == self.chain:
return 1
else:
return 0
def accept_chain(self, chain):
if chain.get_id() == self.chain:
return 1
else:
return 0
def get_altloc(model):
altloc = 0
for chain in model.child_list:
for residue in chain.get_list():
if residue.is_disordered():
altloc = altloc + 1
break
return altloc
altloc = 0
for chain in model.child_list:
for residue in chain.get_list():
if residue.is_disordered():
altloc = altloc + 1
break
return altloc
def get_protein_assembly(structure):
assembly = {}
for molecule in structure.header['compound'].keys():
assembly[molecule] = []
for chain in structure.header['compound'][molecule]['chain'].split(','):
assembly[molecule].append(chain.strip().upper())
return assembly
# {'1': ['A', 'C', 'E'], '2': ['B', 'D']}
assembly = {}
for molecule in structure.header['compound'].keys():
assembly[molecule] = []
for chain in structure.header['compound'][molecule]['chain'].split(','):
assembly[molecule].append(chain.strip().upper())
return assembly
# {'1': ['A', 'C', 'E'], '2': ['B', 'D']}
def select_first_chain(assembly):
chain_id = []
for molecule in assembly.keys():
chain_id.append(assembly[molecule][0])
return chain_id
# ['A', 'B']
chain_id = []
for molecule in assembly.keys():
chain_id.append(assembly[molecule][0])
return chain_id
# ['A', 'B']
def permutation(list_id):
subset = []
for permutation in itertools.permutations(list_id, 2):
subset.append(permutation)
print subset
return subset
# [(['A'], ['B']), (['B'], ['A'])]
subset = []
for permutation in itertools.permutations(list_id, 2):
subset.append(permutation)
print(subset)
return subset
# [(['A'], ['B']), (['B'], ['A'])]
def remove_hetatm(model, chain):
residue_to_remove = []
for residue in model[chain].get_residues():
if residue.id[0] != ' ':
residue_to_remove.append((model[chain].id, residue.id))
return residue_to_remove
residue_to_remove = []
for residue in model[chain].get_residues():
if residue.id[0] != ' ':
residue_to_remove.append((model[chain].id, residue.id))
return residue_to_remove
def get_interface_residues(target, partner, model, distance):
ires = []
with open('{}_{}{}_{}.txt'.format(model.parent.id, \
target, partner, distance), 'wb') as outfile:
ires = []
with open('{}_{}{}_{}.txt'.format(model.parent.id, target, partner, distance), 'wb') as outfile:
for rest in model[target].get_residues():
if ''.join(map(str,(rest.get_parent().get_id(), '.',
rest.get_resname(), rest.get_id()[1]))):
for atomt in rest.get_atoms():
for atomp in model[partner].get_atoms():
xt = atomt.get_coord()[0]
xp = atomp.get_coord()[0]
dist_x = calculate_euclidist_x(xt, xp)
for rest in model[target].get_residues():
if ''.join(map(str,(rest.get_parent().get_id(), '.',
rest.get_resname(), rest.get_id()[1]))):
for atomt in rest.get_atoms():
for atomp in model[partner].get_atoms():
xt = atomt.get_coord()[0]
xp = atomp.get_coord()[0]
dist_x = calculate_euclidist_x(xt, xp)
if dist_x < float(distance):
yt = atomt.get_coord()[1]
yp = atomp.get_coord()[1]
dist_xy = calculate_euclidist_xy(xt, yt, xp, yp)
if dist_x < float(distance):
yt = atomt.get_coord()[1]
yp = atomp.get_coord()[1]
dist_xy = calculate_euclidist_xy(xt, yt, xp, yp)
if dist_xy < float(distance):
zt = atomt.get_coord()[2]
zp = atomp.get_coord()[2]
dist_xyz = calculate_euclidist_xyz(xt, yt, zt, xp, yp, zp)
if dist_xy < float(distance):
zt = atomt.get_coord()[2]
zp = atomp.get_coord()[2]
dist_xyz = calculate_euclidist_xyz(xt, yt, zt, xp, yp, zp)
if dist_xyz < float(distance):
ires.append(''.join(map(str, \
(rest.get_parent().get_id(), '.', \
rest.get_resname(), rest.get_id()[1])))
if dist_xyz < float(distance):
ires.append(''.join(map(str,
(rest.get_parent().get_id(), '.',
rest.get_resname(), rest.get_id()[1]))))
for element in ires:
outfile.write(element + '\n')
for element in ires:
outfile.write(element + '\n')
def calculate_euclidist_x(x1, x2):
"""
Calculate the distance x between two 1D points
----------------------------------------------------------------------
Arguments:
[float]: coordinate x of target atom
coordinate x of partnet atom
Return:
[float]: distance
"""
dist = ((x1-x2)**2) ** 0.5
return dist
"""
Calculate the distance x between two 1D points
----------------------------------------------------------------------
Arguments:
[float]: coordinate x of target atom
coordinate x of partnet atom
Return:
[float]: distance
"""
dist = ((x1-x2)**2) ** 0.5
return dist
def calculate_euclidist_xy(x1, y1, x2, y2):
"""
Calculate the distance xy between two 2D points
----------------------------------------------------------------------
Arguments:
[float]: coordinates xy of target atom
coordinates xy of partnet atom
Return:
[float]: distance
"""
dist = ((x1-x2)**2 + (y1-y2)**2) ** 0.5
return dist
"""
Calculate the distance xy between two 2D points
----------------------------------------------------------------------
Arguments:
[float]: coordinates xy of target atom
coordinates xy of partnet atom
Return:
[float]: distance
"""
dist = ((x1-x2)**2 + (y1-y2)**2) ** 0.5
return dist
def calculate_euclidist_xyz(x1, y1, z1, x2, y2, z2):
"""
Calculate the euclidian distance xyz between two 3D points
----------------------------------------------------------------------
Arguments:
[float]: coordinates xyz of target atom
coordinates xyz of partnet atom
Return:
[float]: distance
"""
dist = ((x1-x2)**2 + (y1-y2)**2 + (z1-z2)**2) ** 0.5
return dist
"""
Calculate the euclidian distance xyz between two 3D points
----------------------------------------------------------------------
Arguments:
[float]: coordinates xyz of target atom
coordinates xyz of partnet atom
Return:
[float]: distance
"""
dist = ((x1-x2)**2 + (y1-y2)**2 + (z1-z2)**2) ** 0.5
return dist
def setlogger():
LOG.setLevel(logging.INFO)
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(funcName)s - \
%(levelname)s - %(message)s')
ch.setFormatter(formatter)
LOG.addHandler(ch)
LOG.setLevel(logging.INFO)
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(funcName)s - %(levelname)s - %(message)s')
ch.setFormatter(formatter)
LOG.addHandler(ch)
# =============================================================================
if __name__ == '__main__':
parser = argparse.ArgumentParser(description =
textwrap.dedent('''
1) Format protein/protein complex PDB structure (no ligand)
1.a) Number of models
1.b) Alternative atomic locations
1.c) Heteroatoms
2) Identify protein/protein interface residues
(for combinations of first chain of each protein)
3) Save each chain in separate files''')
parser.add_argument('pdb', help =
textwrap.dedent(''' Input [.pdb file]: PBD 3D structure
(with HEADER included)'''))
parser.add_argument('-d', dest = 'distance', help =
textwrap.dedent('''Input [float or integer]: distance threshold \
{by default, 6 Angstroms}''')
options = parser.parse_args()
if options.distance is None:
options.distance = 6.0
setlogger()
main(options.file, options.distance)
parser = argparse.ArgumentParser(description =
textwrap.dedent('''
1) Format protein/protein complex PDB structure (no ligand)
1.a) Number of models
1.b) Alternative atomic locations
1.c) Heteroatoms
2) Identify protein/protein interface residues
(for combinations of first chain of each protein)
3) Save each chain in separate files'''))
parser.add_argument('pdb', help =
textwrap.dedent(''' Input [.pdb file]: PBD 3D structure
(with HEADER included)'''))
parser.add_argument('-d', dest = 'distance', help =
textwrap.dedent('''Input [float or integer]: distance threshold
{by default, 6 Angstroms}'''))
options = parser.parse_args()
if options.distance is None:
options.distance = 6.0
setlogger()
main(options.file, options.distance)
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment