Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
I
ippidb-web
Manage
Activity
Members
Labels
Code
Merge requests
1
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
iPPIDB
ippidb-web
Commits
1e790032
Commit
1e790032
authored
7 years ago
by
Fabien MAREUIL
Browse files
Options
Downloads
Patches
Plain Diff
reformat
Former-commit-id: dbc51f36f32ea66e9d4c7f9ceda01e300ca1db4a
parent
a6a52657
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
pyScripts/get_interface_residues_ppc_dimer_IN_PROGRESS.py
+221
-224
221 additions, 224 deletions
pyScripts/get_interface_residues_ppc_dimer_IN_PROGRESS.py
with
221 additions
and
224 deletions
pyScripts/get_interface_residues_ppc_dimer_IN_PROGRESS.py
+
221
−
224
View file @
1e790032
...
...
@@ -10,37 +10,37 @@ Version: 2
1) IDENTIFY PROTEIN/PROTEIN INTERFACE RESIDUES
2) SAVE EACH PROTEIN FIRST CHAIN IN SEPARATE FILE (after cleaning)
- Number of models
- Alternative atomic locations
- Heteroatoms
- Number of models
- Alternative atomic locations
- Heteroatoms
Usage:
[Script.py] [PROTEIN/PROTEIN COMPLEX] -d (DISTANCE)
Usage:
[Script.py] [PROTEIN/PROTEIN COMPLEX] -d (DISTANCE)
-------------------------------------------------------------------------------
Arguments:
------------
[PROTEIN/PROTEIN COMPLEX]:
Input (.pdb format)
Protein/protein complex
Input (.pdb format)
Protein/protein complex
(DISTANCE)- optionnal
Input (float or integer)
Distance threshold between the target and the partner
(6 angstroms - by defaut)
Input (float or integer)
Distance threshold between the target and the partner
(6 angstroms - by defaut)
Return:
------------
[PROTEIN/PROTEIN INTERFACE RESIDUES]:
Output (.txt format)
"
PDB_Chain1-Chain2_distance.txt
"
(chain ID, residue name and residue ID included)
Output (.txt format)
"
PDB_Chain1-Chain2_distance.txt
"
(chain ID, residue name and residue ID included)
[PROTEIN CHAIN]:
Output (.pdb format)
"
PDB_Chain1.pdb
"
(heteroatom not included)
Output (.pdb format)
"
PDB_Chain1.pdb
"
(heteroatom not included)
"""
# =============================================================================
...
...
@@ -67,258 +67,255 @@ NODIMER_FILENAME = 'pdb_not_dimer'
# =============================================================================
def
main
(
pdb
,
distance
):
start
=
time
.
time
()
pdb_code
=
file
.
strip
().
split
(
'
.pdb
'
)[:
-
1
]
structure
=
PDBParser
().
get_structure
(
pdb_code
,
file
)
model
=
structure
[
0
]
# Check if multiple models available
if
len
(
list
(
structure
.
get_models
()))
!=
0
:
try
:
if
not
os
.
path
.
exists
(
'
{}.txt
'
.
format
(
MODEL_FILENAME
)):
with
open
(
'
{}.txt
'
.
format
(
MODEL_FILENAME
),
'
w
'
)
as
outfile
:
outfile
.
write
(
pdb_code
)
else
:
with
open
(
'
{}.txt
'
.
format
(
MODEL_FILENAME
),
'
a
'
)
as
outfile
:
outfile
.
write
(
str
(
pdb_code
)
+
'
\n
'
)
except
IOError
,
e
:
LOG
.
error
(
'
{}
'
.
format
(
e
))
sys
.
exit
(
1
)
# Check if 3D structure contains alternative atomic locations
if
get_altloc
(
model
)
!=
0
:
try
:
if
not
os
.
path
.
exists
(
'
{}.txt
'
.
format
(
ALTLOC_FILENAME
)):
with
open
(
'
{}.txt
'
.
format
(
ALTLOC_FILENAME
),
'
w
'
)
as
outfile
:
outfile
.
write
(
str
(
pdb_code
)
+
'
\n
'
)
else
:
with
open
(
'
{}.txt
'
.
format
(
ALTLOC_FILENAME
),
'
a
'
)
as
outfile
:
outfile
.
write
(
str
(
pdb_code
)
+
'
\n
'
)
except
IOError
,
e
:
LOG
.
error
(
'
{}
'
.
format
(
e
))
sys
.
exit
(
1
)
else
:
# Get the first copy of each chain
target_partner_chain
=
select_first_chain
(
get_protein_assembly
(
structure
))
print
(
target_partner_chain
)
# Check if 3D structure contains more than two molecule IDs
if
len
(
target_partner_chain
)
>
2
:
try
:
if
not
os
.
path
.
exists
(
'
{}.txt
'
.
format
(
NODIMER_FILENAME_FILENAME
)):
with
open
(
'
{}.txt
'
.
format
(
NODIMER_FILENAME_FILENAME
),
'
w
'
)
as
outfile
:
outfile
.
write
(
pdb_code
)
else
:
with
open
(
'
{}.txt
'
.
format
(
NODIMER_FILENAME_FILENAME
),
'
a
'
)
as
outfile
:
outfile
.
write
(
str
(
pdb_code
)
+
'
\n
'
)
except
IOError
,
e
:
LOG
.
error
(
'
{}
'
.
format
(
e
))
sys
.
exit
(
1
)
# Remove heteroatom (protein/protein complex, no ligand)
for
chain
in
target_partner_chain
:
residue_to_remove
=
remove_hetatm
(
model
,
chain
)
for
residue
in
residue_to_remove
:
model
[
chain
].
detach_child
(
residue
[
1
])
# Get all chain-chain combinations
subset
=
permutation
(
target_partner_chain
)
# Calculate chain-chain distance (interaction patch)
for
chain_pair
in
subset
:
target
=
chain_pair
[
0
]
partner
=
chain_pair
[
1
]
if
len
(
model
[
target
].
get_list
())
>
3
and
\
len
(
model
[
partner
].
get_list
())
>
3
:
get_interface_residues
(
target
,
partner
,
model
,
distance
)
# Save each chain in different file
for
chain
in
target_partner_chain
:
io
=
PDBIO
()
io
.
set_structure
(
model
)
io
.
save
(
'
{}_{}.pdb
'
.
format
(
pdb_code
,
chain
),
select
=
ChainSelection
(
chain
),
\
preserve_atom_numbering
=
True
)
end
=
time
.
time
()
print
(
'
TIME: {}
'
.
format
(
end
-
start
))
start
=
time
.
time
()
pdb_code
=
pdb
.
strip
().
split
(
'
.pdb
'
)[:
-
1
]
structure
=
PDBParser
().
get_structure
(
pdb_code
,
pdb
)
model
=
structure
[
0
]
# Check if multiple models available
if
len
(
list
(
structure
.
get_models
()))
!=
0
:
try
:
if
not
os
.
path
.
exists
(
'
{}.txt
'
.
format
(
MODEL_FILENAME
)):
with
open
(
'
{}.txt
'
.
format
(
MODEL_FILENAME
),
'
w
'
)
as
outfile
:
outfile
.
write
(
pdb_code
)
else
:
with
open
(
'
{}.txt
'
.
format
(
MODEL_FILENAME
),
'
a
'
)
as
outfile
:
outfile
.
write
(
str
(
pdb_code
)
+
'
\n
'
)
except
IOError
as
e
:
LOG
.
error
(
'
{}
'
.
format
(
e
))
sys
.
exit
(
1
)
# Check if 3D structure contains alternative atomic locations
if
get_altloc
(
model
)
!=
0
:
try
:
if
not
os
.
path
.
exists
(
'
{}.txt
'
.
format
(
ALTLOC_FILENAME
)):
with
open
(
'
{}.txt
'
.
format
(
ALTLOC_FILENAME
),
'
w
'
)
as
outfile
:
outfile
.
write
(
str
(
pdb_code
)
+
'
\n
'
)
else
:
with
open
(
'
{}.txt
'
.
format
(
ALTLOC_FILENAME
),
'
a
'
)
as
outfile
:
outfile
.
write
(
str
(
pdb_code
)
+
'
\n
'
)
except
IOError
as
e
:
LOG
.
error
(
'
{}
'
.
format
(
e
))
sys
.
exit
(
1
)
else
:
# Get the first copy of each chain
target_partner_chain
=
select_first_chain
(
get_protein_assembly
(
structure
))
print
(
target_partner_chain
)
# Check if 3D structure contains more than two molecule IDs
if
len
(
target_partner_chain
)
>
2
:
try
:
if
not
os
.
path
.
exists
(
'
{}.txt
'
.
format
(
NODIMER_FILENAME
)):
with
open
(
'
{}.txt
'
.
format
(
NODIMER_FILENAME
),
'
w
'
)
as
outfile
:
outfile
.
write
(
pdb_code
)
else
:
with
open
(
'
{}.txt
'
.
format
(
NODIMER_FILENAME
),
'
a
'
)
as
outfile
:
outfile
.
write
(
str
(
pdb_code
)
+
'
\n
'
)
except
IOError
as
e
:
LOG
.
error
(
'
{}
'
.
format
(
e
))
sys
.
exit
(
1
)
# Remove heteroatom (protein/protein complex, no ligand)
for
chain
in
target_partner_chain
:
residue_to_remove
=
remove_hetatm
(
model
,
chain
)
for
residue
in
residue_to_remove
:
model
[
chain
].
detach_child
(
residue
[
1
])
# Get all chain-chain combinations
subset
=
permutation
(
target_partner_chain
)
# Calculate chain-chain distance (interaction patch)
for
chain_pair
in
subset
:
target
=
chain_pair
[
0
]
partner
=
chain_pair
[
1
]
if
len
(
model
[
target
].
get_list
())
>
3
and
len
(
model
[
partner
].
get_list
())
>
3
:
get_interface_residues
(
target
,
partner
,
model
,
distance
)
# Save each chain in different file
for
chain
in
target_partner_chain
:
io
=
PDBIO
()
io
.
set_structure
(
model
)
io
.
save
(
'
{}_{}.pdb
'
.
format
(
pdb_code
,
chain
),
select
=
ChainSelection
(
chain
),
preserve_atom_numbering
=
True
)
end
=
time
.
time
()
print
(
'
TIME: {}
'
.
format
(
end
-
start
))
class
ChainSelection
(
Select
):
def
__init__
(
self
,
chain
):
self
.
chain
=
chain
def
__init__
(
self
,
chain
):
self
.
chain
=
chain
def
accept_chain
(
self
,
chain
):
if
chain
.
get_id
()
==
self
.
chain
:
return
1
else
:
return
0
def
accept_chain
(
self
,
chain
):
if
chain
.
get_id
()
==
self
.
chain
:
return
1
else
:
return
0
def
get_altloc
(
model
):
altloc
=
0
for
chain
in
model
.
child_list
:
for
residue
in
chain
.
get_list
():
if
residue
.
is_disordered
():
altloc
=
altloc
+
1
break
return
altloc
altloc
=
0
for
chain
in
model
.
child_list
:
for
residue
in
chain
.
get_list
():
if
residue
.
is_disordered
():
altloc
=
altloc
+
1
break
return
altloc
def
get_protein_assembly
(
structure
):
assembly
=
{}
for
molecule
in
structure
.
header
[
'
compound
'
].
keys
():
assembly
[
molecule
]
=
[]
for
chain
in
structure
.
header
[
'
compound
'
][
molecule
][
'
chain
'
].
split
(
'
,
'
):
assembly
[
molecule
].
append
(
chain
.
strip
().
upper
())
return
assembly
# {'1': ['A', 'C', 'E'], '2': ['B', 'D']}
assembly
=
{}
for
molecule
in
structure
.
header
[
'
compound
'
].
keys
():
assembly
[
molecule
]
=
[]
for
chain
in
structure
.
header
[
'
compound
'
][
molecule
][
'
chain
'
].
split
(
'
,
'
):
assembly
[
molecule
].
append
(
chain
.
strip
().
upper
())
return
assembly
# {'1': ['A', 'C', 'E'], '2': ['B', 'D']}
def
select_first_chain
(
assembly
):
chain_id
=
[]
for
molecule
in
assembly
.
keys
():
chain_id
.
append
(
assembly
[
molecule
][
0
])
return
chain_id
# ['A', 'B']
chain_id
=
[]
for
molecule
in
assembly
.
keys
():
chain_id
.
append
(
assembly
[
molecule
][
0
])
return
chain_id
# ['A', 'B']
def
permutation
(
list_id
):
subset
=
[]
for
permutation
in
itertools
.
permutations
(
list_id
,
2
):
subset
.
append
(
permutation
)
print
subset
return
subset
# [(['A'], ['B']), (['B'], ['A'])]
subset
=
[]
for
permutation
in
itertools
.
permutations
(
list_id
,
2
):
subset
.
append
(
permutation
)
print
(
subset
)
return
subset
# [(['A'], ['B']), (['B'], ['A'])]
def
remove_hetatm
(
model
,
chain
):
residue_to_remove
=
[]
for
residue
in
model
[
chain
].
get_residues
():
if
residue
.
id
[
0
]
!=
'
'
:
residue_to_remove
.
append
((
model
[
chain
].
id
,
residue
.
id
))
return
residue_to_remove
residue_to_remove
=
[]
for
residue
in
model
[
chain
].
get_residues
():
if
residue
.
id
[
0
]
!=
'
'
:
residue_to_remove
.
append
((
model
[
chain
].
id
,
residue
.
id
))
return
residue_to_remove
def
get_interface_residues
(
target
,
partner
,
model
,
distance
):
ires
=
[]
with
open
(
'
{}_{}{}_{}.txt
'
.
format
(
model
.
parent
.
id
,
\
target
,
partner
,
distance
),
'
wb
'
)
as
outfile
:
ires
=
[]
with
open
(
'
{}_{}{}_{}.txt
'
.
format
(
model
.
parent
.
id
,
target
,
partner
,
distance
),
'
wb
'
)
as
outfile
:
for
rest
in
model
[
target
].
get_residues
():
if
''
.
join
(
map
(
str
,(
rest
.
get_parent
().
get_id
(),
'
.
'
,
rest
.
get_resname
(),
rest
.
get_id
()[
1
]))):
for
atomt
in
rest
.
get_atoms
():
for
atomp
in
model
[
partner
].
get_atoms
():
xt
=
atomt
.
get_coord
()[
0
]
xp
=
atomp
.
get_coord
()[
0
]
dist_x
=
calculate_euclidist_x
(
xt
,
xp
)
for
rest
in
model
[
target
].
get_residues
():
if
''
.
join
(
map
(
str
,(
rest
.
get_parent
().
get_id
(),
'
.
'
,
rest
.
get_resname
(),
rest
.
get_id
()[
1
]))):
for
atomt
in
rest
.
get_atoms
():
for
atomp
in
model
[
partner
].
get_atoms
():
xt
=
atomt
.
get_coord
()[
0
]
xp
=
atomp
.
get_coord
()[
0
]
dist_x
=
calculate_euclidist_x
(
xt
,
xp
)
if
dist_x
<
float
(
distance
):
yt
=
atomt
.
get_coord
()[
1
]
yp
=
atomp
.
get_coord
()[
1
]
dist_xy
=
calculate_euclidist_xy
(
xt
,
yt
,
xp
,
yp
)
if
dist_x
<
float
(
distance
):
yt
=
atomt
.
get_coord
()[
1
]
yp
=
atomp
.
get_coord
()[
1
]
dist_xy
=
calculate_euclidist_xy
(
xt
,
yt
,
xp
,
yp
)
if
dist_xy
<
float
(
distance
):
zt
=
atomt
.
get_coord
()[
2
]
zp
=
atomp
.
get_coord
()[
2
]
dist_xyz
=
calculate_euclidist_xyz
(
xt
,
yt
,
zt
,
xp
,
yp
,
zp
)
if
dist_xy
<
float
(
distance
):
zt
=
atomt
.
get_coord
()[
2
]
zp
=
atomp
.
get_coord
()[
2
]
dist_xyz
=
calculate_euclidist_xyz
(
xt
,
yt
,
zt
,
xp
,
yp
,
zp
)
if
dist_xyz
<
float
(
distance
):
ires
.
append
(
''
.
join
(
map
(
str
,
\
(
rest
.
get_parent
().
get_id
(),
'
.
'
,
\
rest
.
get_resname
(),
rest
.
get_id
()[
1
])))
if
dist_xyz
<
float
(
distance
):
ires
.
append
(
''
.
join
(
map
(
str
,
(
rest
.
get_parent
().
get_id
(),
'
.
'
,
rest
.
get_resname
(),
rest
.
get_id
()[
1
])))
)
for
element
in
ires
:
outfile
.
write
(
element
+
'
\n
'
)
for
element
in
ires
:
outfile
.
write
(
element
+
'
\n
'
)
def
calculate_euclidist_x
(
x1
,
x2
):
"""
Calculate the distance x between two 1D points
----------------------------------------------------------------------
Arguments:
[float]: coordinate x of target atom
coordinate x of partnet atom
Return:
[float]: distance
"""
dist
=
((
x1
-
x2
)
**
2
)
**
0.5
return
dist
"""
Calculate the distance x between two 1D points
----------------------------------------------------------------------
Arguments:
[float]: coordinate x of target atom
coordinate x of partnet atom
Return:
[float]: distance
"""
dist
=
((
x1
-
x2
)
**
2
)
**
0.5
return
dist
def
calculate_euclidist_xy
(
x1
,
y1
,
x2
,
y2
):
"""
Calculate the distance xy between two 2D points
----------------------------------------------------------------------
Arguments:
[float]: coordinates xy of target atom
coordinates xy of partnet atom
Return:
[float]: distance
"""
dist
=
((
x1
-
x2
)
**
2
+
(
y1
-
y2
)
**
2
)
**
0.5
return
dist
"""
Calculate the distance xy between two 2D points
----------------------------------------------------------------------
Arguments:
[float]: coordinates xy of target atom
coordinates xy of partnet atom
Return:
[float]: distance
"""
dist
=
((
x1
-
x2
)
**
2
+
(
y1
-
y2
)
**
2
)
**
0.5
return
dist
def
calculate_euclidist_xyz
(
x1
,
y1
,
z1
,
x2
,
y2
,
z2
):
"""
Calculate the euclidian distance xyz between two 3D points
----------------------------------------------------------------------
Arguments:
[float]: coordinates xyz of target atom
coordinates xyz of partnet atom
Return:
[float]: distance
"""
dist
=
((
x1
-
x2
)
**
2
+
(
y1
-
y2
)
**
2
+
(
z1
-
z2
)
**
2
)
**
0.5
return
dist
"""
Calculate the euclidian distance xyz between two 3D points
----------------------------------------------------------------------
Arguments:
[float]: coordinates xyz of target atom
coordinates xyz of partnet atom
Return:
[float]: distance
"""
dist
=
((
x1
-
x2
)
**
2
+
(
y1
-
y2
)
**
2
+
(
z1
-
z2
)
**
2
)
**
0.5
return
dist
def
setlogger
():
LOG
.
setLevel
(
logging
.
INFO
)
ch
=
logging
.
StreamHandler
()
ch
.
setLevel
(
logging
.
INFO
)
formatter
=
logging
.
Formatter
(
'
%(asctime)s - %(name)s - %(funcName)s -
\
%(levelname)s - %(message)s
'
)
ch
.
setFormatter
(
formatter
)
LOG
.
addHandler
(
ch
)
LOG
.
setLevel
(
logging
.
INFO
)
ch
=
logging
.
StreamHandler
()
ch
.
setLevel
(
logging
.
INFO
)
formatter
=
logging
.
Formatter
(
'
%(asctime)s - %(name)s - %(funcName)s - %(levelname)s - %(message)s
'
)
ch
.
setFormatter
(
formatter
)
LOG
.
addHandler
(
ch
)
# =============================================================================
if
__name__
==
'
__main__
'
:
parser
=
argparse
.
ArgumentParser
(
description
=
textwrap
.
dedent
(
'''
1) Format protein/protein complex PDB structure (no ligand)
1.a) Number of models
1.b) Alternative atomic locations
1.c) Heteroatoms
2) Identify protein/protein interface residues
(for combinations of first chain of each protein)
3) Save each chain in separate files
'''
)
parser
.
add_argument
(
'
pdb
'
,
help
=
textwrap
.
dedent
(
'''
Input [.pdb file]: PBD 3D structure
(with HEADER included)
'''
))
parser
.
add_argument
(
'
-d
'
,
dest
=
'
distance
'
,
help
=
textwrap
.
dedent
(
'''
Input [float or integer]: distance threshold
\
{by default, 6 Angstroms}
'''
)
options
=
parser
.
parse_args
()
if
options
.
distance
is
None
:
options
.
distance
=
6.0
setlogger
()
main
(
options
.
file
,
options
.
distance
)
parser
=
argparse
.
ArgumentParser
(
description
=
textwrap
.
dedent
(
'''
1) Format protein/protein complex PDB structure (no ligand)
1.a) Number of models
1.b) Alternative atomic locations
1.c) Heteroatoms
2) Identify protein/protein interface residues
(for combinations of first chain of each protein)
3) Save each chain in separate files
'''
)
)
parser
.
add_argument
(
'
pdb
'
,
help
=
textwrap
.
dedent
(
'''
Input [.pdb file]: PBD 3D structure
(with HEADER included)
'''
))
parser
.
add_argument
(
'
-d
'
,
dest
=
'
distance
'
,
help
=
textwrap
.
dedent
(
'''
Input [float or integer]: distance threshold
{by default, 6 Angstroms}
'''
)
)
options
=
parser
.
parse_args
()
if
options
.
distance
is
None
:
options
.
distance
=
6.0
setlogger
()
main
(
options
.
file
,
options
.
distance
)
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment