Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
I
ippidb-web
Manage
Activity
Members
Labels
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
iPPIDB
ippidb-web
Commits
a6a52657
Commit
a6a52657
authored
7 years ago
by
Fabien MAREUIL
Browse files
Options
Downloads
Patches
Plain Diff
new script alexa
Former-commit-id: e36ec1fe4be85cdf789737bc3123acf1809d6ea7
parent
7f8036d0
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
pyScripts/get_interface_residues_ppc_dimer_IN_PROGRESS.py
+324
-0
324 additions, 0 deletions
pyScripts/get_interface_residues_ppc_dimer_IN_PROGRESS.py
with
324 additions
and
0 deletions
pyScripts/get_interface_residues_ppc_dimer_IN_PROGRESS.py
0 → 100644
+
324
−
0
View file @
a6a52657
# -*- coding: utf-8 -*-
"""
Author: Alexandra Moine-Franel
Date: April 2018
Version: 2
1) IDENTIFY PROTEIN/PROTEIN INTERFACE RESIDUES
2) SAVE EACH PROTEIN FIRST CHAIN IN SEPARATE FILE (after cleaning)
- Number of models
- Alternative atomic locations
- Heteroatoms
Usage: [Script.py] [PROTEIN/PROTEIN COMPLEX] -d (DISTANCE)
-------------------------------------------------------------------------------
Arguments:
------------
[PROTEIN/PROTEIN COMPLEX]:
Input (.pdb format)
Protein/protein complex
(DISTANCE)- optionnal
Input (float or integer)
Distance threshold between the target and the partner
(6 angstroms - by defaut)
Return:
------------
[PROTEIN/PROTEIN INTERFACE RESIDUES]:
Output (.txt format)
"
PDB_Chain1-Chain2_distance.txt
"
(chain ID, residue name and residue ID included)
[PROTEIN CHAIN]:
Output (.pdb format)
"
PDB_Chain1.pdb
"
(heteroatom not included)
"""
# =============================================================================
import
os
import
sys
import
argparse
import
textwrap
import
logging
import
time
from
Bio.PDB
import
Select
,
PDBIO
from
Bio.PDB.PDBParser
import
PDBParser
import
itertools
# =============================================================================
LOG
=
logging
.
getLogger
(
"
IDENTIFY PROTEIN/PROTEIN INTERFACE RESIDUES
"
)
MODEL_FILENAME
=
'
pdb_multiple_models
'
ALTLOC_FILENAME
=
'
pdb_altloc
'
NODIMER_FILENAME
=
'
pdb_not_dimer
'
# =============================================================================
def
main
(
pdb
,
distance
):
start
=
time
.
time
()
pdb_code
=
file
.
strip
().
split
(
'
.pdb
'
)[:
-
1
]
structure
=
PDBParser
().
get_structure
(
pdb_code
,
file
)
model
=
structure
[
0
]
# Check if multiple models available
if
len
(
list
(
structure
.
get_models
()))
!=
0
:
try
:
if
not
os
.
path
.
exists
(
'
{}.txt
'
.
format
(
MODEL_FILENAME
)):
with
open
(
'
{}.txt
'
.
format
(
MODEL_FILENAME
),
'
w
'
)
as
outfile
:
outfile
.
write
(
pdb_code
)
else
:
with
open
(
'
{}.txt
'
.
format
(
MODEL_FILENAME
),
'
a
'
)
as
outfile
:
outfile
.
write
(
str
(
pdb_code
)
+
'
\n
'
)
except
IOError
,
e
:
LOG
.
error
(
'
{}
'
.
format
(
e
))
sys
.
exit
(
1
)
# Check if 3D structure contains alternative atomic locations
if
get_altloc
(
model
)
!=
0
:
try
:
if
not
os
.
path
.
exists
(
'
{}.txt
'
.
format
(
ALTLOC_FILENAME
)):
with
open
(
'
{}.txt
'
.
format
(
ALTLOC_FILENAME
),
'
w
'
)
as
outfile
:
outfile
.
write
(
str
(
pdb_code
)
+
'
\n
'
)
else
:
with
open
(
'
{}.txt
'
.
format
(
ALTLOC_FILENAME
),
'
a
'
)
as
outfile
:
outfile
.
write
(
str
(
pdb_code
)
+
'
\n
'
)
except
IOError
,
e
:
LOG
.
error
(
'
{}
'
.
format
(
e
))
sys
.
exit
(
1
)
else
:
# Get the first copy of each chain
target_partner_chain
=
select_first_chain
(
get_protein_assembly
(
structure
))
print
(
target_partner_chain
)
# Check if 3D structure contains more than two molecule IDs
if
len
(
target_partner_chain
)
>
2
:
try
:
if
not
os
.
path
.
exists
(
'
{}.txt
'
.
format
(
NODIMER_FILENAME_FILENAME
)):
with
open
(
'
{}.txt
'
.
format
(
NODIMER_FILENAME_FILENAME
),
'
w
'
)
as
outfile
:
outfile
.
write
(
pdb_code
)
else
:
with
open
(
'
{}.txt
'
.
format
(
NODIMER_FILENAME_FILENAME
),
'
a
'
)
as
outfile
:
outfile
.
write
(
str
(
pdb_code
)
+
'
\n
'
)
except
IOError
,
e
:
LOG
.
error
(
'
{}
'
.
format
(
e
))
sys
.
exit
(
1
)
# Remove heteroatom (protein/protein complex, no ligand)
for
chain
in
target_partner_chain
:
residue_to_remove
=
remove_hetatm
(
model
,
chain
)
for
residue
in
residue_to_remove
:
model
[
chain
].
detach_child
(
residue
[
1
])
# Get all chain-chain combinations
subset
=
permutation
(
target_partner_chain
)
# Calculate chain-chain distance (interaction patch)
for
chain_pair
in
subset
:
target
=
chain_pair
[
0
]
partner
=
chain_pair
[
1
]
if
len
(
model
[
target
].
get_list
())
>
3
and
\
len
(
model
[
partner
].
get_list
())
>
3
:
get_interface_residues
(
target
,
partner
,
model
,
distance
)
# Save each chain in different file
for
chain
in
target_partner_chain
:
io
=
PDBIO
()
io
.
set_structure
(
model
)
io
.
save
(
'
{}_{}.pdb
'
.
format
(
pdb_code
,
chain
),
select
=
ChainSelection
(
chain
),
\
preserve_atom_numbering
=
True
)
end
=
time
.
time
()
print
(
'
TIME: {}
'
.
format
(
end
-
start
))
class
ChainSelection
(
Select
):
def
__init__
(
self
,
chain
):
self
.
chain
=
chain
def
accept_chain
(
self
,
chain
):
if
chain
.
get_id
()
==
self
.
chain
:
return
1
else
:
return
0
def
get_altloc
(
model
):
altloc
=
0
for
chain
in
model
.
child_list
:
for
residue
in
chain
.
get_list
():
if
residue
.
is_disordered
():
altloc
=
altloc
+
1
break
return
altloc
def
get_protein_assembly
(
structure
):
assembly
=
{}
for
molecule
in
structure
.
header
[
'
compound
'
].
keys
():
assembly
[
molecule
]
=
[]
for
chain
in
structure
.
header
[
'
compound
'
][
molecule
][
'
chain
'
].
split
(
'
,
'
):
assembly
[
molecule
].
append
(
chain
.
strip
().
upper
())
return
assembly
# {'1': ['A', 'C', 'E'], '2': ['B', 'D']}
def
select_first_chain
(
assembly
):
chain_id
=
[]
for
molecule
in
assembly
.
keys
():
chain_id
.
append
(
assembly
[
molecule
][
0
])
return
chain_id
# ['A', 'B']
def
permutation
(
list_id
):
subset
=
[]
for
permutation
in
itertools
.
permutations
(
list_id
,
2
):
subset
.
append
(
permutation
)
print
subset
return
subset
# [(['A'], ['B']), (['B'], ['A'])]
def
remove_hetatm
(
model
,
chain
):
residue_to_remove
=
[]
for
residue
in
model
[
chain
].
get_residues
():
if
residue
.
id
[
0
]
!=
'
'
:
residue_to_remove
.
append
((
model
[
chain
].
id
,
residue
.
id
))
return
residue_to_remove
def
get_interface_residues
(
target
,
partner
,
model
,
distance
):
ires
=
[]
with
open
(
'
{}_{}{}_{}.txt
'
.
format
(
model
.
parent
.
id
,
\
target
,
partner
,
distance
),
'
wb
'
)
as
outfile
:
for
rest
in
model
[
target
].
get_residues
():
if
''
.
join
(
map
(
str
,(
rest
.
get_parent
().
get_id
(),
'
.
'
,
rest
.
get_resname
(),
rest
.
get_id
()[
1
]))):
for
atomt
in
rest
.
get_atoms
():
for
atomp
in
model
[
partner
].
get_atoms
():
xt
=
atomt
.
get_coord
()[
0
]
xp
=
atomp
.
get_coord
()[
0
]
dist_x
=
calculate_euclidist_x
(
xt
,
xp
)
if
dist_x
<
float
(
distance
):
yt
=
atomt
.
get_coord
()[
1
]
yp
=
atomp
.
get_coord
()[
1
]
dist_xy
=
calculate_euclidist_xy
(
xt
,
yt
,
xp
,
yp
)
if
dist_xy
<
float
(
distance
):
zt
=
atomt
.
get_coord
()[
2
]
zp
=
atomp
.
get_coord
()[
2
]
dist_xyz
=
calculate_euclidist_xyz
(
xt
,
yt
,
zt
,
xp
,
yp
,
zp
)
if
dist_xyz
<
float
(
distance
):
ires
.
append
(
''
.
join
(
map
(
str
,
\
(
rest
.
get_parent
().
get_id
(),
'
.
'
,
\
rest
.
get_resname
(),
rest
.
get_id
()[
1
])))
for
element
in
ires
:
outfile
.
write
(
element
+
'
\n
'
)
def
calculate_euclidist_x
(
x1
,
x2
):
"""
Calculate the distance x between two 1D points
----------------------------------------------------------------------
Arguments:
[float]: coordinate x of target atom
coordinate x of partnet atom
Return:
[float]: distance
"""
dist
=
((
x1
-
x2
)
**
2
)
**
0.5
return
dist
def
calculate_euclidist_xy
(
x1
,
y1
,
x2
,
y2
):
"""
Calculate the distance xy between two 2D points
----------------------------------------------------------------------
Arguments:
[float]: coordinates xy of target atom
coordinates xy of partnet atom
Return:
[float]: distance
"""
dist
=
((
x1
-
x2
)
**
2
+
(
y1
-
y2
)
**
2
)
**
0.5
return
dist
def
calculate_euclidist_xyz
(
x1
,
y1
,
z1
,
x2
,
y2
,
z2
):
"""
Calculate the euclidian distance xyz between two 3D points
----------------------------------------------------------------------
Arguments:
[float]: coordinates xyz of target atom
coordinates xyz of partnet atom
Return:
[float]: distance
"""
dist
=
((
x1
-
x2
)
**
2
+
(
y1
-
y2
)
**
2
+
(
z1
-
z2
)
**
2
)
**
0.5
return
dist
def
setlogger
():
LOG
.
setLevel
(
logging
.
INFO
)
ch
=
logging
.
StreamHandler
()
ch
.
setLevel
(
logging
.
INFO
)
formatter
=
logging
.
Formatter
(
'
%(asctime)s - %(name)s - %(funcName)s -
\
%(levelname)s - %(message)s
'
)
ch
.
setFormatter
(
formatter
)
LOG
.
addHandler
(
ch
)
# =============================================================================
if
__name__
==
'
__main__
'
:
parser
=
argparse
.
ArgumentParser
(
description
=
textwrap
.
dedent
(
'''
1) Format protein/protein complex PDB structure (no ligand)
1.a) Number of models
1.b) Alternative atomic locations
1.c) Heteroatoms
2) Identify protein/protein interface residues
(for combinations of first chain of each protein)
3) Save each chain in separate files
'''
)
parser
.
add_argument
(
'
pdb
'
,
help
=
textwrap
.
dedent
(
'''
Input [.pdb file]: PBD 3D structure
(with HEADER included)
'''
))
parser
.
add_argument
(
'
-d
'
,
dest
=
'
distance
'
,
help
=
textwrap
.
dedent
(
'''
Input [float or integer]: distance threshold
\
{by default, 6 Angstroms}
'''
)
options
=
parser
.
parse_args
()
if
options
.
distance
is
None
:
options
.
distance
=
6.0
setlogger
()
main
(
options
.
file
,
options
.
distance
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment