diff --git a/ippisite/ippidb/forms.py b/ippisite/ippidb/forms.py
index f69c0084babdf7570157ce594dfd0d61f2c2fd31..fe0f196ddd522d9ceaee8e161137219f3cea4b26 100644
--- a/ippisite/ippidb/forms.py
+++ b/ippisite/ippidb/forms.py
@@ -102,17 +102,13 @@ class ComplexCompositionForm(forms.Form):
 
     def __init__(self,*args,**kwargs):
         super(ComplexCompositionForm, self).__init__(*args, **kwargs)
+        """ TODO : Filter based on submited PDBid"""
         self.fields['complex_protein'].queryset=Protein.objects.filter(pk__in=[569,570])
 
 
-class BaseProteinDomainComplexFormSet(BaseModelFormSet):
-    def __init__(self, *args, **kwargs):
-        super(BaseProteinDomainComplexFormSet,self).__init__(*args, **kwargs)
-        self.queryset = ProteinDomainComplex.objects.none()
-
-ProteinDomainComplexFormSet = modelformset_factory(ProteinDomainComplex, form=ProteinDomainComplexForm, formset=BaseProteinDomainComplexFormSet, extra=2)
-formset= ProteinDomainComplexFormSet()
-#print(formset)
+ComplexCompositionFormSet = formset_factory(ComplexCompositionForm, extra=2)
+formset= ComplexCompositionFormSet()
+print(formset.is_valid())
 
 class PpiForm(ModelForm):
 
@@ -154,7 +150,7 @@ class BaseCompoundFormSet(BaseFormSet):
         form.fields["molecule"] = forms.CharField(widget=forms.Select(choices=TYPE_MOLECULE))
         form.fields["molecule_comp"] = forms.CharField(widget=forms.TextInput(attrs={'placeholder':'Molecule composition', 'required':'required'}))
 
-CompoundFormSet = formset_factory(CompoundForm, formset=BaseCompoundFormSet, extra=2, max_num=500, can_delete=True)
+CompoundFormSet = formset_factory(CompoundForm, formset=BaseCompoundFormSet, max_num=500, can_delete=True)
 formset = CompoundFormSet()
 
 
diff --git a/ippisite/ippidb/static/css/main.css b/ippisite/ippidb/static/css/main.css
index 65e3e8031daa4781f36be8cbea839ccdcd93b9ef..e4d17a2fd3acc82fcba74ee7d465f969d0b049b9 100644
--- a/ippisite/ippidb/static/css/main.css
+++ b/ippisite/ippidb/static/css/main.css
@@ -13,6 +13,9 @@ Description: IPPI-DB Theme
 
 .clear_all{
     font-size: 14px;
+    color: #007bff;
+    text-decoration: none;
+    font-family: "BrandonGrotesqueReg"
 }
 
 #main-header{
@@ -616,7 +619,6 @@ font-size: 42px;
     padding: 10px;
     min-height: 150px;
     text-align: center;
-    min-height: 350px;
     border-top: 1px solid #2D96FA;
 }
 
diff --git a/ippisite/ippidb/templates/BibliographyForm.html b/ippisite/ippidb/templates/BibliographyForm.html
index 7b48a540fc92d1a52524d42ef76a8ac5926c1f96..1cb7263bdde8a066512551c4456988a7bf98e312 100644
--- a/ippisite/ippidb/templates/BibliographyForm.html
+++ b/ippisite/ippidb/templates/BibliographyForm.html
@@ -80,7 +80,6 @@
 			{% endif %}
 			</table>
 			<input class="submit_button" type="submit" value="{% trans "Next step" %}"/>
-			<div class="clear_button"><a href="{% url 'ippidb' %}?reset" class="clear_all">Clear all</a></div>
 		</form>
 	</div>
 </div>
diff --git a/ippisite/ippidb/templates/CompoundForm.html b/ippisite/ippidb/templates/CompoundForm.html
index db55d706c174baedb8257ee61c1b2b24639c6fb4..fd72c2eee2dd93dced600e9452950acd9295c65b 100644
--- a/ippisite/ippidb/templates/CompoundForm.html
+++ b/ippisite/ippidb/templates/CompoundForm.html
@@ -60,7 +60,7 @@
 							</div>
 						</div>
 				{% endfor %}
-			</div>
+				</div>
 			<!--<div class="add_form">
 				<div id="{{ form.compound_name.id_for_label }}" onclick="">Add compound<div>
 			</div>-->
@@ -68,7 +68,6 @@
 			<input type="hidden" name="ippi_wizard-current_step" value="CompoundForm" id="id_ippi_wizard-current_step"/>
 			</table>
 				<input type="submit" value="{% trans "Next step" %}"/>
-				<a href="{% url 'ippidb' %}?reset" class="clear_all">Clear all</a>
 		</form>
 	</div>
 </div>
diff --git a/ippisite/ippidb/templates/IdForm.html b/ippisite/ippidb/templates/IdForm.html
index 91446507da7c12e6ddc6517477572652c17fe095..ef8eeef8065dd15fba5d406b940e62917a19955c 100644
--- a/ippisite/ippidb/templates/IdForm.html
+++ b/ippisite/ippidb/templates/IdForm.html
@@ -51,7 +51,6 @@
 			{% endif %}
 			</table>
 			<input class="submit_button" type="submit" value="{% trans "Get Infos" %}"/>
-			<div class="clear_button"><a href="{% url 'ippidb' %}?reset" class="clear_all">Clear all</a></div>
 		</form>
 	</div>
 </div>
diff --git a/ippisite/ippidb/templates/PDBForm.html b/ippisite/ippidb/templates/PDBForm.html
index f9259f85d672e84f5cd04a774ff91f4f7156f731..50626d6d7e959970287c432aaab4383aac692e3a 100644
--- a/ippisite/ippidb/templates/PDBForm.html
+++ b/ippisite/ippidb/templates/PDBForm.html
@@ -40,7 +40,6 @@
 			{% endif %}
 			</table>
 			<input class="submit_button" type="submit" value="{% trans "Next step" %}"/>
-			<div class="clear_button"><a href="{% url 'ippidb' %}?reset" class="clear_all">Clear all</a></div>
 		</form>
 	</div>
 </div>
diff --git a/ippisite/ippidb/templates/PpiForm.html b/ippisite/ippidb/templates/PpiForm.html
index a49c8de059a1d7a288e99afb4c5283f1f38079fe..a632afd5021f807635311214332c77eedabac082 100644
--- a/ippisite/ippidb/templates/PpiForm.html
+++ b/ippisite/ippidb/templates/PpiForm.html
@@ -45,11 +45,9 @@
 			</div>
 			{% endif %}
 			</table>
-
-			<input type="hidden" name="ippi_wizard-current_step" value="PpiAndPpiComplexForm" id="id_ippi_wizard-current_step"/>
+			<input type="hidden" name="ippi_wizard-current_step" value="PpiForm" id="id_ippi_wizard-current_step"/>
 			<input class="submit_button" type="submit" name="{{ wizard.steps.next }}" value="{% trans "Next step" %}"/>
-			<div class="clear_button"><a href="{% url 'ippidb' %}?reset" class="clear_all">Clear all</a></div>
 		</form>
-	<div>
+	</div>
 </div>
 {% endblock %}
\ No newline at end of file
diff --git a/ippisite/ippidb/templates/ProteinDomainComplexForm.html b/ippisite/ippidb/templates/ProteinDomainComplexForm.html
index dab073f0ecc6ad6995a9c431389ac0a4819f4b05..f6bfa0ee19a27cb845ecb16e6474c77809732f03 100644
--- a/ippisite/ippidb/templates/ProteinDomainComplexForm.html
+++ b/ippisite/ippidb/templates/ProteinDomainComplexForm.html
@@ -28,23 +28,22 @@
 		<form action="" method="post">
 			{% csrf_token %}
 				<table>
-			{{ wizard.management_form }}
-	    	{{ wizard.form.non_field_errors}}		
+			{{ wizard.form.errors}}
+	    	{{ wizard.form.non_field_errors}}	
 			{% if wizard.form.forms %}
 				{{ wizard.form.management_form }}
 				{% for form in wizard.form.forms %}
-					
-				{% endfor %}
-			{% else %}
 				<div class="inline_box_complex_long">
 					<div class="input_field"> {{ form.complex_type }}</div>
 					<div class="input_field"> {{ form.complex_protein }}</div>
 					<div class="input_field"> {{ form.complex_domain }}</div>
 					<div class="input_field"> {{ form.ppc_copy_nb }}</div>
 				</div>
+				{% endfor %}
 			{% endif %}
-			<input type="hidden" name="ippi_wizard-current_step" value="ProteinDomainComplexForm" id="id_ippi_wizard-current_step"/>
 			</table>
+			<input type="hidden" name="ippi_wizard-current_step" value="ProteinDomainComplexForm" id="id_ippi_wizard-current_step"/>
+			<input class="submit_button" type="submit" value="{% trans "Next step" %}"/>
 		</form>
 	</div>
 </div>
diff --git a/ippisite/ippidb/templates/ProteinDomainComplexTypeForm.html b/ippisite/ippidb/templates/ProteinDomainComplexTypeForm.html
index 49a026f750cebf03597a26bdabb9f651e7934b05..f2c72bcc5a8e7143c2a884ca9958bd82808aa569 100644
--- a/ippisite/ippidb/templates/ProteinDomainComplexTypeForm.html
+++ b/ippisite/ippidb/templates/ProteinDomainComplexTypeForm.html
@@ -90,8 +90,8 @@ If your PPI complex is not among them, please select « Custom »</p>
 				</div>
 			{% endif %}
 			</table>
-				<input class="submit_button" type="submit" value="{% trans "Next step" %}"/>
-				<div class="clear_button"><a href="{% url 'ippidb' %}?reset" class="clear_all">Clear all</a></div>
+			<input type="hidden" name="ippi_wizard-current_step" value="ProteinDomainComplexTypeForm" id="id_ippi_wizard-current_step"/>
+			<input class="submit_button" type="submit" value="{% trans "Next step" %}"/>
 		</form>
 	</div>
 </div>
diff --git a/ippisite/ippidb/templates/TestsForm.html b/ippisite/ippidb/templates/TestsForm.html
index 769a7ecc607667b770108b0b7a28c0cd84c187f0..4713e834bb1c8cd394a9e2a96149654af635bf64 100644
--- a/ippisite/ippidb/templates/TestsForm.html
+++ b/ippisite/ippidb/templates/TestsForm.html
@@ -41,7 +41,6 @@
 			{% endif %}
 			</table>
 				<input type="submit" value="{% trans "submit" %}"/>
-				<a href="{% url 'ippidb' %}?reset" class="clear_all">Clear all</a>
 		</form>
 	</div>
 </div>
diff --git a/ippisite/ippidb/templates/add.html b/ippisite/ippidb/templates/add.html
index 389d23e55f11b97367c67006c8599a6ea5796cfc..d6ee91be8c140257b799b9e1e508d49ff1b8bc8d 100644
--- a/ippisite/ippidb/templates/add.html
+++ b/ippisite/ippidb/templates/add.html
@@ -57,6 +57,7 @@
 													{% block form %}{% endblock %}
 													
 												</div>
+												<div class="clear_button"><a href="{% url 'ippidb' %}?reset" class="clear_all">Clear all</a></div>
 											</div>
 										</div>
 									</div>
diff --git a/ippisite/ippidb/templates/admin-session.html b/ippisite/ippidb/templates/admin-session.html
index 68811903efd41a04da755a28df06cbb452cd8a31..21d659b6f8a30b79caf04fed17ccdefe7c339b78 100644
--- a/ippisite/ippidb/templates/admin-session.html
+++ b/ippisite/ippidb/templates/admin-session.html
@@ -46,8 +46,6 @@
                                                 <div class="view-content">
                                                                                                             
                                                 </div>
-                                                <input class="submit_button" type="submit" name="{{ wizard.steps.next }}" value="{% trans "Next step" %}"/>
-            <div class="clear_button"><a href="{% url 'ippidb' %}?reset" class="clear_all">Clear all</a></div>
                                             </div>
                                         </div>
                                     </div>
diff --git a/ippisite/ippidb/views.py b/ippisite/ippidb/views.py
index e8436ce5382a8422e95299b87cf1a6297472cb07..923955e731909e60704b425dbd6767cdcd1c078a 100644
--- a/ippisite/ippidb/views.py
+++ b/ippisite/ippidb/views.py
@@ -4,7 +4,7 @@ from django.shortcuts import render
 from django.http import HttpResponseRedirect, Http404
 from django.core.paginator import Paginator, EmptyPage, PageNotAnInteger
 from formtools.wizard.views import SessionWizardView, NamedUrlSessionWizardView
-from .forms import IdForm, BibliographyForm, PDBForm, ProteinForm, ComplexCompositionForm, ProteinDomainComplexTypeForm, ProteinDomainComplexForm, ProteinDomainComplexFormSet, PpiForm, PpiComplexForm, ProteinFormSet,TestsForm, CompoundForm, CompoundFormSet
+from .forms import IdForm, BibliographyForm, PDBForm, ProteinForm, ComplexCompositionForm, ComplexCompositionFormSet, ProteinDomainComplexTypeForm, ProteinDomainComplexForm, PpiForm, PpiComplexForm, ProteinFormSet,TestsForm, CompoundForm, CompoundFormSet
 from .models import Protein, Bibliography, ProteinDomainComplex, ProteinDomainBoundComplex, RefCompoundBiblio, TestActivityDescription, Compound, Ppi, Disease, Taxonomy
 from .ws import get_pdb_uniprot_mapping
 
@@ -59,7 +59,7 @@ FORMS = [("IdForm", ippidb.forms.IdForm),
          ("PDBForm", ippidb.forms.PDBForm),
          ("ProteinDomainComplexTypeForm",
           ippidb.forms.ProteinDomainComplexTypeForm),
-         ("ProteinDomainComplexForm", ippidb.forms.ComplexCompositionForm),
+         ("ProteinDomainComplexForm", ippidb.forms.ComplexCompositionFormSet),
          ("PpiForm", ippidb.forms.PpiForm),
          ("CompoundForm", ippidb.forms.CompoundFormSet),
          ("TestsForm", ippidb.forms.TestsForm),]
diff --git a/pyScripts/get_pdb_structure.py b/pyScripts/get_pdb_structure.py
new file mode 100644
index 0000000000000000000000000000000000000000..d5dd27838698a9550b60550d5b1d87dab7252d36
--- /dev/null
+++ b/pyScripts/get_pdb_structure.py
@@ -0,0 +1,80 @@
+# -*- coding: utf-8 -*-
+
+"""
+Author: Alexandra Moine-Franel
+Date: March 2018
+Version: 1
+
+
+DOWNLOAD PDB 3D STRUCTURE FILES FROM PDB 
+
+
+Usage: [Script.py] [pdb_id]
+------------------------------------------------------------------
+Argument:
+	[file]: list of PDB IDs (txt format)
+
+Return:
+	[file]: PDB files, stored in 'PDB' folder
+"""
+
+# =============================================================================
+
+import os
+import argparse
+import logging
+import csv
+import Bio
+from Bio.PDB import PDBList
+
+# =============================================================================
+
+LOG = logging.getLogger("DOWNLOAD PDB 3D STRUCTURE FILES")
+
+FOLDER = 'PDB'
+
+# =============================================================================
+
+def main(pdb_id):
+	get_pdb_file(get_pdbID(pdb_id))
+	rename_pdb_file()
+
+	
+def get_pdbID(txtfile):
+	pdb_id = []
+	with open(txtfile, 'rb') as pdb_txtfile:
+		for line in csv.reader(pdb_txtfile):	
+			pdb_id.append(line[0].upper())
+	return pdb_id		
+
+
+def get_pdb_file(pdbID):
+	for i in pdbID:
+		PDBList().retrieve_pdb_file(i, pdir = FOLDER, file_format = 'pdb')
+	
+
+def rename_pdb_file():
+	working_dir = os.path.dirname(__file__)
+	pdb_dir = os.path.abspath(os.path.join(working_dir, FOLDER))	
+	os.chdir(pdb_dir)
+	for filename in os.listdir(pdb_dir):
+		os.rename(filename, filename.replace('pdb', '').replace('ent', 'pdb'))
+	
+
+def setlogger():
+    LOG.setLevel(logging.INFO)
+    ch = logging.StreamHandler()
+    ch.setLevel(logging.INFO)
+    formatter = logging.Formatter('%(asctime)s - %(name)s - %(funcName)s - %(levelname)s - %(message)s')
+    ch.setFormatter(formatter)
+    LOG.addHandler(ch)
+
+# =============================================================================
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description = 
+					'DOWNLOAD PDB 3D STRUCTURE FILES FROM PDB')
+    parser.add_argument('pdb_id', help = 'Input [.txt file]: PDB IDs')
+    options = parser.parse_args()
+    setlogger()
+    main(options.pdb_id)
diff --git a/pyScripts/get_ppc_V2.py b/pyScripts/get_ppc_V2.py
new file mode 100644
index 0000000000000000000000000000000000000000..6d801b9c9a1e30ed70137e206681968e4bca6a66
--- /dev/null
+++ b/pyScripts/get_ppc_V2.py
@@ -0,0 +1,171 @@
+# -*- coding: utf-8 -*-
+
+"""
+Author: Alexandra Moine-Franel
+Date: March 2018
+Version: 2
+
+
+
+IDENTIFY PROTEIN-PROTEIN COMPLEXES FROM PDBe
+
+
+Usage: [Script.py] [PDBe]
+------------------------------------------------------------------
+Argument:
+	[PDBe]:
+    Input (json format)
+    PDBe (with [assembly_composition] and [assembly_type] information included)
+
+Return:
+	[list(PPC)]:
+    Output (txt format)
+    Protein/protein complex PDB IDs
+	
+	[json(PPC)]:
+	Output (json format)
+	Protein/protein complex information
+	
+	
+Warning:
+------- 
+PDBs with unique and repeated assembly composition annotation are both included!
+[unique annotation: 'protein/protein complex']
+[repeated annotation: 'protein/protein complex, protein/protein complex']
+"""
+
+# =============================================================================
+
+import os
+import sys
+import argparse
+import logging
+import json
+
+# =============================================================================
+
+LOG = logging.getLogger('Get PPC PDB IDs from PDBe')
+FILENAME = 'PPC-DIMER_3-26-18PDBe'
+
+XRAY = ['X-ray diffraction', 'X-ray powder diffraction']
+
+RESOLUTION = 3.5
+DIFFR = 0.05
+
+# =============================================================================
+
+def main(pdbe):
+	with open(pdbe) as infile:
+		structure = json.load(infile)
+		dimer = get_dimer(get_ppc(structure))
+		dimer_filt = filter_rfactor(filter_resolution(filter_annotation(dimer)))
+		to_json(dimer_filt, FILENAME)
+		to_txt(get_id(dimer_filt), FILENAME)
+
+	if os.path.exists('{}.txt'.format(FILENAME)):
+		LOG.info('Finished!')
+	else:
+		LOG.error('Warning! [Output] Not found!')
+		sys.exit(1)
+
+
+
+def get_ppc(pdbe):
+	ppc_unique = []
+	ppc_repeated = []
+	for i in xrange(len(pdbe)):
+		if 'assembly_composition' in pdbe[i].keys():
+			assembly_composition = pdbe[i]['assembly_composition']
+			if len(assembly_composition) == 1 and \
+			assembly_composition == ['protein/protein complex']:
+				ppc_unique.append(pdbe[i])
+			elif len(assembly_composition) > 1:
+				if assembly_composition.count('protein/protein complex') >= 1 and \
+				assembly_composition.count(
+				assembly_composition[0]) == len(assembly_composition):
+					ppc_repeated.append(pdbe[i])
+	ppc_all = ppc_unique + ppc_repeated
+	return ppc_all
+
+
+def get_dimer(pdbe):
+	dimer_unique = []
+	dimer_repeated = []
+	for i in xrange(len(pdbe)):
+		if 'assembly_type' in pdbe[i].keys():
+			assembly_type = pdbe[i]['assembly_type']
+			if len(assembly_type) == 1 and \
+			assembly_type == ['dimer']:
+				dimer_unique.append(pdbe[i])
+			elif len(assembly_type) > 1:
+				if assembly_type.count('dimer') >= 1 and \
+				assembly_type.count(assembly_type[0]) == len(assembly_type):
+					dimer_repeated.append(pdbe[i])
+	dimer_all = dimer_unique + dimer_repeated
+	return dimer_all
+
+
+def filter_annotation(pdbe):
+	annotation = []
+	for i in xrange(len(pdbe)):
+		if len(pdbe[i]['experimental_method']) == 1:
+			annotation.append(pdbe[i])
+	return annotation
+
+def filter_resolution(pdbe):
+	quality = []
+	for i in xrange(len(pdbe)):
+		if len([e for e in pdbe[i]['experimental_method'] if e in XRAY]) == 1:
+			if 'resolution' in pdbe[i].keys() and \
+			float(pdbe[i]['resolution']) <= RESOLUTION:
+				quality.append(pdbe[i])
+	return quality
+
+def filter_rfactor(pdbe):
+	quality = []
+	for i in xrange(len(pdbe)):
+		if len([e for e in pdbe[i]['experimental_method'] if e in XRAY]) == 1:
+			if 'r_free' in pdbe[i].keys() and 'r_factor' in pdbe[i].keys():
+				if float(pdbe[i]['r_free'] - pdbe[i]['r_factor']) <= abs(DIFFR):
+					quality.append(pdbe[i])
+	return quality
+
+
+def get_id(pdbe):
+	pdb_id = []
+	for i in xrange(len(pdbe)):
+		pdb_id.append(pdbe[i]['pdb_id'])
+	return pdb_id
+
+
+def to_json(data, outfile):
+	with open('{}.json'.format(outfile), 'wb') as jsonfile:
+		json.dump(data, jsonfile)
+
+
+def to_txt(data, outfile):
+	with open('{}.txt'.format(outfile), 'wb') as txtfile:
+		for pdb_id in data:
+			txtfile.write(pdb_id + '\n')
+
+
+
+def setlogger():
+	LOG.setLevel(logging.INFO)
+	ch = logging.StreamHandler()
+	ch.setLevel(logging.INFO)
+	formatter = logging.Formatter('%(asctime)s - %(name)s - %(funcName)s - \
+	%(levelname)s - %(message)s')
+	ch.setFormatter(formatter)
+	LOG.addHandler(ch)
+
+# =============================================================================
+
+if __name__ == "__main__":
+	parser = argparse.ArgumentParser(description = 'Get protein/protein \
+	complex PDB IDs from PDBe')
+	parser.add_argument('pdbe', help = 'Input [.json file]: \
+	PBDe (with assembly and structure quality information included')
+	options = parser.parse_args()
+	setlogger()
+	main(options.pdbe)
diff --git a/pyScripts/get_ppc_id_from_pdbe.py b/pyScripts/get_ppc_id_from_pdbe.py
new file mode 100644
index 0000000000000000000000000000000000000000..e41157a8c94a05634d5bcdfaea385670bb4a5abc
--- /dev/null
+++ b/pyScripts/get_ppc_id_from_pdbe.py
@@ -0,0 +1,121 @@
+# -*- coding: utf-8 -*-
+
+"""
+Author: Alexandra Moine-Franel
+Date: February 2018
+Version: 1
+
+
+
+IDENTIFY PROTEIN-PROTEIN COMPLEXES FROM PDBe
+
+
+Usage: [Script.py] [PDBe]
+------------------------------------------------------------------
+Argument:
+	[PDBe]:
+    Input (json format)
+    PDBe (with assembly information included)
+
+Return:
+	[list(PPC)]:
+    Output (txt format)
+    Protein/protein complex PDB IDs
+	
+	[json(PPC)]:
+	Output (json format)
+	Protein/protein complex information
+	
+	
+Warning:
+------- 
+PDBs with unique and repeated assembly composition annotation are both included!
+[unique annotation: 'protein/protein complex']
+[repeated annotation: 'protein/protein complex, protein/protein complex']
+"""
+
+# =============================================================================
+
+import argparse
+import logging
+import json
+import pandas as pd
+
+# =============================================================================
+
+LOG = logging.getLogger('Get protein/protein complex PDB IDs from PDBe')
+
+# =============================================================================
+
+def setlogger():
+    LOG.setLevel(logging.INFO)
+    ch = logging.StreamHandler()
+    ch.setLevel(logging.INFO)
+    formatter = logging.Formatter('%(asctime)s - %(name)s - %(funcName)s - %(levelname)s - %(message)s')
+    ch.setFormatter(formatter)
+    LOG.addHandler(ch)
+
+
+
+def main(pdbe):
+	structure = pd.read_json(pdbe, orient = 'columns')
+	ppc_to_txt(get_ppc_id(structure))
+	ppc_to_json(get_ppc_data(structure))
+
+
+
+def get_ppc_id(pdbe):
+	ppc_unique = []
+	ppc_repeated = []
+	for i in xrange(len(pdbe['grouped']['pdb_id']['groups'])):
+		assembly_composition = pdbe['grouped']['pdb_id']['groups'][i]['doclist']['docs'][0]['assembly_composition']
+		if len(assembly_composition) == 1 and assembly_composition == ['protein/protein complex']:
+			ppc_unique.append(pdbe['grouped']['pdb_id']['groups'][i]['doclist']['docs'][0]['pdb_id'])
+		elif len(assembly_composition) > 1:
+			if assembly_composition.count('protein/protein complex') >= 1 and \
+			assembly_composition.count(assembly_composition[0]) == len(assembly_composition):
+				ppc_repeated.append(pdbe['grouped']['pdb_id']['groups'][i]['doclist']['docs'][0]['pdb_id'])
+	ppc_all = ppc_unique + ppc_repeated
+	return ppc_all
+
+
+
+def get_ppc_data(pdbe):
+	ppc_unique_data = []
+	ppc_repeated_data = []
+	for i in xrange(len(pdbe['grouped']['pdb_id']['groups'])):
+		assembly_composition = pdbe['grouped']['pdb_id']['groups'][i]['doclist']['docs'][0]['assembly_composition']
+		if len(assembly_composition) == 1 and assembly_composition == ['protein/protein complex']:
+			ppc_unique_data.append(pdbe['grouped']['pdb_id']['groups'][i]['doclist']['docs'][0])
+		elif len(assembly_composition) > 1:
+			if assembly_composition.count('protein/protein complex') >= 1 and \
+			assembly_composition.count(assembly_composition[0]) == len(assembly_composition):
+				ppc_repeated_data.append(pdbe['grouped']['pdb_id']['groups'][i]['doclist']['docs'][0])
+	ppc_all_data = ppc_unique_data + ppc_repeated_data
+	return ppc_all_data
+
+
+
+def ppc_to_txt(ppc):
+	with open('PPC_2-20-18PDBe.txt', 'wb') as txtfile:
+		for pdb_id in ppc:
+			txtfile.write(pdb_id + '\n')
+
+
+
+def ppc_to_json(ppc):
+	with open('PPC_2-20-18PDBe.json', 'wb') as jsonfile:
+		json.dump(ppc, jsonfile)
+
+
+
+# ==============================================================================
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description = 
+					'Get protein/protein complex PDB IDs from PDBe')
+    parser.add_argument('pdbe', help = 'Input [.json file]: \
+					PBDe (with assembly information included')
+    options = parser.parse_args()
+    setlogger()
+    main(options.pdbe)
diff --git a/pyScripts/get_ppc_ligand_V2.py b/pyScripts/get_ppc_ligand_V2.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed1f1576c33a4f3be066d1a2326152198654130b
--- /dev/null
+++ b/pyScripts/get_ppc_ligand_V2.py
@@ -0,0 +1,268 @@
+# -*- coding: utf-8 -*-
+
+"""
+Author: Alexandra Moine-Franel
+Date: March 2018
+Version: 3
+
+
+
+IDENTIFY PROTEIN STRUCTURE WITH COMPOUNDS (with or without including ions)
+INVOLVED IN PROTEIN-PROTEIN INTERACTION FROM PDBe
+
+
+Usage: [Script.py] [PDBe] [IONS]
+------------------------------------------------------------------
+Arguments:
+	[PDBe]:
+	Input (json format)
+	PDBe (with [assembly_composition], [compound_id] and [uniprot_id] included)
+
+	[IONS]:
+	Input (txt format)
+	Compounds considered as ions 
+
+Return:
+	[list(protein)]:
+	Output (txt format)
+	Proteins with compounds associated with a protein/protein complex PDB IDs
+
+	[json(protein)]:
+	Output (json format)
+	Proteins with compounds associated with a protein/protein complex dataset
+
+	Compounds:
+	---------
+	** EXAMPLE: [u'FHB : 3-FLUORO-4-HYDROXYBENZOIC ACID']
+			or  [u'FHB : 3-FLUORO-4-HYDROXYBENZOIC ACID', u'FE : FE (III) ION']
+
+
+    [list(protein)]:
+    Output (txt format)
+    Proteins with compounds associated with a protein/protein complex PDB IDs
+    IONS ONLY EXCLUDED
+
+    [json(protein)]:
+    Output (json format)        
+    Proteins with compounds associated with a protein/protein complex dataset
+    IONS ONLY EXCLUDED
+
+	Compounds:
+	---------
+	** EXAMPLE: [u'FE : FE (III) ION']
+	
+
+
+!Warning!:
+------- 
+PDBs with unique and repeated assembly composition annotation are both included!
+[unique annotation: 'protein/protein complex']
+[repeated annotation: 'protein/protein complex, protein/protein complex']
+"""
+
+# =============================================================================
+
+import os
+import sys
+import argparse
+import logging
+import json
+
+# =============================================================================
+
+LOG = logging.getLogger('Get PPC PDB IDs with ligand')
+FILENAME_with_ions = 'PPC-PROT-COMPOUNDS_3-26-18PDBe'
+FILENAME_without_ions = 'PPC-PROT-COMPOUNDS_noIONS_3-26-18PDBe'
+
+XRAY = ['X-ray diffraction', 'X-ray powder diffraction']
+
+RESOLUTION = 3.5
+DIFFR = 0.05
+
+# =============================================================================
+
+def main(pdbe, ion):
+	with open(pdbe) as infile:
+		structure = json.load(infile)
+
+		# Get protein/protein complex (PPC)
+		ppc = filter_rfactor(filter_resolution(filter_annotation(
+		get_ppc(structure))))
+		to_txt(get_id(ppc), 'PPC_3-26-18PDBe')
+
+		# Get protein structure with [compounds]
+		prot_compound = filter_rfactor(filter_resolution(filter_annotation(
+		get_with_compound(get_prot(structure)))))
+		
+	# Collect PPC UniProt
+	ppc_uniprot = get_uniprot(ppc)
+
+	# Get PPC-associated proteins with compounds
+	ppc_ligand = get_prot_compound_ppc(prot_compound, ppc_uniprot)
+	to_json(ppc_ligand, FILENAME_with_ions)
+	to_txt(get_id(ppc_ligand), FILENAME_with_ions)
+
+	# Get PPC-associated proteins with compounds (ions only not included)
+	ions = []
+	with open(ion, 'r') as ion_file:
+		for i in ion_file:
+			ions.append(i.strip().upper())
+
+	prot_compound_without_ions = get_without_ions(prot_compound, ions)
+	ppc_ligand_without_ions = get_prot_compound_ppc(prot_compound_without_ions, ppc_uniprot)
+	to_json(ppc_ligand_without_ions, FILENAME_without_ions)
+	to_txt(get_id(ppc_ligand_without_ions), FILENAME_without_ions)
+
+
+	if os.path.exists('{}.txt'.format(FILENAME_with_ions)) and \
+	os.path.exists('{}.txt'.format(FILENAME_without_ions)):
+		LOG.info('Finished!')
+	else:
+		LOG.error('Warning! [Outputs] Not found!')
+		sys.exit(1)
+
+
+
+def get_ppc(pdbe):
+	ppc_unique = []
+	ppc_repeated = []
+	for i in xrange(len(pdbe)):
+		if 'assembly_composition' in pdbe[i].keys():
+			assembly_composition = pdbe[i]['assembly_composition']
+			if len(assembly_composition) == 1 and \
+			assembly_composition == ['protein/protein complex']:
+				ppc_unique.append(pdbe[i])
+			elif len(assembly_composition) > 1:
+				if assembly_composition.count('protein/protein complex') >= 1 and \
+				assembly_composition.count(
+				assembly_composition[0]) == len(assembly_composition):
+					ppc_repeated.append(pdbe[i])
+	ppc_all = ppc_unique + ppc_repeated
+	return ppc_all
+
+
+def get_prot(pdbe):
+	prot_unique = []
+	prot_repeated = []
+	for i in xrange(len(pdbe)):
+		if 'assembly_composition' in pdbe[i].keys():
+			assembly_composition = pdbe[i]['assembly_composition']
+			if len(assembly_composition) == 1 and \
+			assembly_composition == ['protein structure']:
+				prot_unique.append(pdbe[i])
+			elif len(assembly_composition) > 1:
+				if assembly_composition.count('protein structure') >= 1 and \
+				assembly_composition.count(
+				assembly_composition[0]) == len(assembly_composition):
+					prot_repeated.append(pdbe[i])
+	prot_all = prot_unique + prot_repeated
+	return prot_all
+
+
+def filter_annotation(pdbe):
+	annotation = []
+	for i in xrange(len(pdbe)):
+		if len(pdbe[i]['experimental_method']) == 1:
+			annotation.append(pdbe[i])
+	return annotation
+
+def filter_resolution(pdbe):
+	quality = []
+	for i in xrange(len(pdbe)):
+		if len([e for e in pdbe[i]['experimental_method'] if e in XRAY]) == 1:
+			if 'resolution' in pdbe[i].keys() and \
+			float(pdbe[i]['resolution']) <= RESOLUTION:
+				quality.append(pdbe[i])
+	return quality
+
+def filter_rfactor(pdbe):
+	quality = []
+	for i in xrange(len(pdbe)):
+		if len([e for e in pdbe[i]['experimental_method'] if e in XRAY]) == 1:
+			if 'r_free' in pdbe[i].keys() and 'r_factor' in pdbe[i].keys():
+				if float(pdbe[i]['r_free'] - pdbe[i]['r_factor']) <= abs(DIFFR):
+					quality.append(pdbe[i])
+	return quality
+
+
+def get_with_compound(pdbe):
+	compound = []
+	for i in xrange(len(pdbe)):
+		if 'compound_id' in pdbe[i].keys():
+			compound.append(pdbe[i])
+	return compound
+
+
+def get_uniprot(pdbe):
+	uniprot = []
+	for i in xrange(len(pdbe)):
+		if 'uniprot_id' in pdbe[i].keys():
+			uniprot.extend(pdbe[i]['uniprot_id'])
+	return uniprot
+
+
+def get_prot_compound_ppc(protein, uniprot):
+	prot_compound_ppc = []
+	for i in xrange(len(protein)):
+		if 'uniprot_id' in protein[i].keys():
+			if len([e for e in protein[i]['uniprot_id'] if e in uniprot]) >= 1:
+				prot_compound_ppc.append(protein[i])
+	return prot_compound_ppc
+
+
+def get_without_ions(protein, ions):
+	without_ions = []
+	with_ions_not_only = []
+	for i in xrange(len(protein)):
+		if 'compound_id' in protein[i].keys():
+			if len([e for e in protein[i]['compound_id'] if e in ions]) == 0:
+				without_ions.append(protein[i])
+			elif len([e for e in protein[i]['compound_id'] if e in ions]) >= 1:
+				if len([e for e in protein[i]['compound_id'] if e in ions]) \
+				!= len(protein[i]['compound_id']):
+					with_ions_not_only.append(protein[i])
+	print len(without_ions), len(with_ions_not_only)
+	compound_ions = without_ions + with_ions_not_only
+	return compound_ions
+
+
+def get_id(pdbe):
+	pdb_id = []
+	for i in xrange(len(pdbe)):
+		pdb_id.append(pdbe[i]['pdb_id'])
+	return pdb_id
+
+
+def to_json(data, outfile):
+	with open('{}.json'.format(outfile), 'wb') as jsonfile:
+		json.dump(data, jsonfile)
+
+
+def to_txt(data, outfile):
+	with open('{}.txt'.format(outfile), 'wb') as txtfile:
+		for pdb_id in data:
+			txtfile.write(pdb_id + '\n')
+
+
+
+def setlogger():
+	LOG.setLevel(logging.INFO)
+	ch = logging.StreamHandler()
+	ch.setLevel(logging.INFO)
+	formatter = logging.Formatter('%(asctime)s - %(name)s - %(funcName)s - \
+	%(levelname)s - %(message)s')
+	ch.setFormatter(formatter)
+	LOG.addHandler(ch)
+
+# =============================================================================
+
+if __name__ == "__main__":
+	parser = argparse.ArgumentParser(description = 'Get protein/protein \
+	complex PDB IDs from PDBe')
+	parser.add_argument('pdbe', help = 'Input [.json file]: \
+	PBDe (with assembly and structure quality information included')
+	parser.add_argument('ion', help = 'Input [.txt file]: Compounds considered \
+	as ions')
+	options = parser.parse_args()
+	setlogger()
+	main(options.pdbe, options.ion)
diff --git a/pyScripts/get_ppc_ligand_compound_from_pdbe.py b/pyScripts/get_ppc_ligand_compound_from_pdbe.py
new file mode 100644
index 0000000000000000000000000000000000000000..a91caa2d8a00c3559d5e14213294b8308dff1b43
--- /dev/null
+++ b/pyScripts/get_ppc_ligand_compound_from_pdbe.py
@@ -0,0 +1,131 @@
+# -*- coding: utf-8 -*-
+
+"""
+Author: Alexandra Moine-Franel
+Date: February 2018
+Version: 2
+
+
+
+IDENTIFY INTERACTING LIGANDS AND COMPOUNDS OF PROTEIN-PROTEIN COMPLEXES FROM PDBe
+
+
+Usage: [Script.py] [PDBe]
+------------------------------------------------------------------
+Argument:
+	[PDBe]:
+    Input (json format)
+    PDBe (with assembly information included)
+
+Return:
+	[list(ligands)]:
+    Output (txt format)
+    Interacting ligands of protein/protein complexes
+
+	[list(compounds)]:
+    Output (txt format)	
+    Compounds of protein/protein complexes
+
+	
+Warning:
+------- 
+PDBs with unique and repeated assembly composition annotation are both included!
+[unique annotation: 'protein/protein complex']
+[repeated annotation: 'protein/protein complex, protein/protein complex']
+"""
+
+# =============================================================================
+
+import argparse
+import logging
+import json
+import pandas as pd
+
+# =============================================================================
+
+LOG = logging.getLogger('Get ligands and compounds of protein/protein complexes from PDBe')
+
+# =============================================================================
+
+def setlogger():
+    LOG.setLevel(logging.INFO)
+    ch = logging.StreamHandler()
+    ch.setLevel(logging.INFO)
+    formatter = logging.Formatter('%(asctime)s - %(name)s - %(funcName)s - %(levelname)s - %(message)s')
+    ch.setFormatter(formatter)
+    LOG.addHandler(ch)
+
+
+
+def main(pdbe):
+	structure = pd.read_json(pdbe, orient = 'columns')
+	ligand_to_txt(get_ligand(get_ppc_data(structure)))
+	compound_to_txt(get_compound(get_ppc_data(structure)))
+
+def get_ppc_data(pdbe):
+	ppc_unique_data = []
+	ppc_repeated_data = []
+	for i in xrange(len(pdbe['grouped']['pdb_id']['groups'])):
+		assembly_composition = pdbe['grouped']['pdb_id']['groups'][i]['doclist']['docs'][0]['assembly_composition']
+		if len(assembly_composition) == 1 and assembly_composition == ['protein/protein complex']:
+			ppc_unique_data.append(pdbe['grouped']['pdb_id']['groups'][i]['doclist']['docs'][0])
+		elif len(assembly_composition) > 1:
+			if assembly_composition.count('protein/protein complex') >= 1 and \
+			assembly_composition.count(assembly_composition[0]) == len(assembly_composition):
+				ppc_repeated_data.append(pdbe['grouped']['pdb_id']['groups'][i]['doclist']['docs'][0])
+	ppc_all_data = ppc_unique_data + ppc_repeated_data
+	return ppc_all_data
+
+
+
+def get_ligand(ppc):
+	ppc_ligand = []
+	unique_ligand = []
+	for i in xrange(len(ppc)):
+		if 'interacting_ligands' in ppc[i].keys():
+			ppc_ligand.append(ppc[i]['interacting_ligands'][0].split(' :')[0])
+
+	for j in ppc_ligand:
+		if j not in unique_ligand:
+			unique_ligand.append(j)
+			
+	#print len(unique_ligand)
+	return unique_ligand
+
+
+
+def get_compound(ppc):
+        ppc_compound = []
+        unique_compound = []
+	for i in xrange(len(ppc)):
+                if 'compound_id' in ppc[i].keys():
+			ppc_compound.extend(ppc[i]['compound_id'])
+
+	for j in ppc_compound:
+		if j not in unique_compound:
+			unique_compound.append(j)
+
+	#print len(unique_compound)
+	return unique_compound
+
+
+def ligand_to_txt(ligand):
+	with open('PPC_LIGANDS_2-20-18PDBe.txt', 'wb') as txtfile:
+		for lig in ligand:
+			txtfile.write(lig + '\n')
+
+def compound_to_txt(compound):
+        with open('PPC_COMPOUNDS_2-20-18PDBe.txt', 'wb') as txtfile:
+                for c in compound:
+                        txtfile.write(c + '\n')
+
+# ==============================================================================
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description = 
+					'Get ligands and compounds of protein/protein complexes from PDBe')
+    parser.add_argument('pdbe', help = 'Input [.json file]: \
+					PBDe (with interacting ligand and compound information included')
+    options = parser.parse_args()
+    setlogger()
+    main(options.pdbe)
diff --git a/pyScripts/get_ppc_protcomp-ion_ppc_from_pdbe.py b/pyScripts/get_ppc_protcomp-ion_ppc_from_pdbe.py
new file mode 100644
index 0000000000000000000000000000000000000000..59a2c0ac99f63241860924a3b5001e069f73a7ac
--- /dev/null
+++ b/pyScripts/get_ppc_protcomp-ion_ppc_from_pdbe.py
@@ -0,0 +1,321 @@
+# -*- coding: utf-8 -*-
+
+"""
+Author: Alexandra Moine-Franel
+Date: February 2018
+Version: 2
+
+
+
+IDENTIFY PROTEIN STRUCTURE WITH COMPOUNDS (with or without including ions)
+INVOLVED IN PROTEIN-PROTEIN INTERACTION FROM PDBe
+
+
+Usage: [Script.py] [PDBe]
+------------------------------------------------------------------
+Argument:
+	[PDBe]:
+    Input (json format)
+    PDBe (with [assembly_composition], [compound_id] and [uniprot_id] included)
+
+Return:
+	[list(protein)]:
+    Output (txt format)
+    Proteins with compounds associated with a protein/protein complex PDB IDs
+	
+	[json(protein)]:
+    Output (json format)	
+    Proteins with compounds associated with a protein/protein complex dataset
+
+	Compounds:
+	---------
+	** EXAMPLE: [u'FHB : 3-FLUORO-4-HYDROXYBENZOIC ACID']
+			or  [u'FHB : 3-FLUORO-4-HYDROXYBENZOIC ACID', u'FE : FE (III) ION']
+
+
+
+    [list(protein)]:
+    Output (txt format)
+    Proteins with compounds associated with a protein/protein complex PDB IDs
+    IONS ONLY EXCLUDED
+
+    [json(protein)]:
+    Output (json format)        
+    Proteins with compounds associated with a protein/protein complex dataset
+    IONS ONLY EXCLUDED
+
+	Compounds:
+	---------
+	** EXAMPLE: [u'FE : FE (III) ION']
+	
+
+
+!Warning!:
+------- 
+PDBs with unique and repeated assembly composition annotation are both included!
+[unique annotation: 'protein/protein complex']
+[repeated annotation: 'protein/protein complex, protein/protein complex']
+"""
+
+# =============================================================================
+
+import argparse
+import logging
+import json
+import pandas as pd
+
+# =============================================================================
+
+LOG = logging.getLogger('Proteins associated with a PPC - Compounds')
+
+# =============================================================================
+
+def setlogger():
+    LOG.setLevel(logging.INFO)
+    ch = logging.StreamHandler()
+    ch.setLevel(logging.INFO)
+    formatter = logging.Formatter('%(asctime)s - %(name)s - %(funcName)s - %(levelname)s - %(message)s')
+    ch.setFormatter(formatter)
+    LOG.addHandler(ch)
+
+
+def main(pdbe):
+	structure = pd.read_json(pdbe, orient = 'columns')
+
+	# Get protein/protein complex [assembly_composition]
+	ppc = get_ppc_data(structure)
+
+	# Get protein structure [assembly_composition] with 'compounds'
+	prot_compound = get_compound_id(get_prot_data(structure))
+
+	# Collect PPC UniProt
+	ppc_uniprot = get_uniprot(ppc)
+
+	# Identify protein structure with compounds associated to PPC 
+	to_txt(get_id_prot_compound_ppc(prot_compound, ppc_uniprot))
+	to_json(get_data_prot_compound_ppc(prot_compound, ppc_uniprot))
+
+	
+	# Get protein structure [assembly_composition] with compounds (ions only not included)
+	ion = get_ion(get_compound_name(prot_compound))
+	prot_compound_without_ions = get_compound_without_ion(prot_compound, ion)
+	to_txt_ion(get_id_prot_compound_ppc(prot_compound_without_ions, ppc_uniprot))
+	to_json_ion(get_data_prot_compound_ppc(prot_compound_without_ions, ppc_uniprot))
+
+
+
+
+def get_ppc_data(pdbe):
+	ppc_unique_data = []
+	ppc_repeated_data = []
+	for i in xrange(len(pdbe['grouped']['pdb_id']['groups'])):
+		assembly_composition = pdbe['grouped']['pdb_id']['groups'][i]['doclist']['docs'][0]['assembly_composition']
+		if len(assembly_composition) == 1 and assembly_composition == ['protein/protein complex']:
+			ppc_unique_data.append(pdbe['grouped']['pdb_id']['groups'][i]['doclist']['docs'][0])
+		elif len(assembly_composition) > 1:
+			if assembly_composition.count('protein/protein complex') >= 1 and \
+			assembly_composition.count(assembly_composition[0]) == len(assembly_composition):
+				ppc_repeated_data.append(pdbe['grouped']['pdb_id']['groups'][i]['doclist']['docs'][0])
+	ppc_all_data = ppc_unique_data + ppc_repeated_data
+	return ppc_all_data
+
+
+
+def get_prot_data(pdbe):
+	prot_unique_data = []
+	prot_repeated_data = []
+	for i in xrange(len(pdbe['grouped']['pdb_id']['groups'])):
+		assembly_composition = pdbe['grouped']['pdb_id']['groups'][i]['doclist']['docs'][0]['assembly_composition']
+		if len(assembly_composition) == 1 and assembly_composition == ['protein structure']:
+			prot_unique_data.append(pdbe['grouped']['pdb_id']['groups'][i]['doclist']['docs'][0])
+		elif len(assembly_composition) > 1:
+			if assembly_composition.count('protein structure') >= 1 and \
+			assembly_composition.count(assembly_composition[0]) == len(assembly_composition):
+				prot_repeated_data.append(pdbe['grouped']['pdb_id']['groups'][i]['doclist']['docs'][0])
+	prot_all_data = prot_unique_data + prot_repeated_data
+	return prot_all_data
+
+
+
+def get_compound_id(data):
+	'''
+	Get 3D structure with compound [compound_id]
+	--------------------------------------------------
+	Argument:
+		[json]: protein structures (dataset)
+
+	Return:
+		[json]: protein structures with compounds (dataset)
+	'''
+	compound = []
+	for i in xrange(len(data)):
+                if 'compound_id' in data[i].keys():
+					compound.append(data[i])
+	return compound
+
+
+
+def get_uniprot(data):
+	'''
+	Get UniProt
+	-------------------------------------------------
+	Argument:
+		[json]: protein/protein complexes (dataset)
+
+	Return:
+		[list]: protein/protein complexes Uniprot IDs
+	'''
+	uniprot = []
+	for i in xrange(len(data)):
+		if 'uniprot_id' in data[i].keys():
+			uniprot.extend(data[i]['uniprot_id'])
+	return uniprot
+
+
+def get_id_prot_compound_ppc(data, uniprot):
+	'''
+	Get PDB IDs only of protein structure with compounds associated with a protein/protein complex
+	-----------------------------------------------------------------------------------------------
+	Arguments:
+		[json]: protein structures with compounds (dataset)
+		[list]: protein/protein complex uniprot id
+
+	Return:
+		[list]: protein structure with compounds associated with a protein/protein complex PDB IDs
+	''' 
+	prot_compound_pcc = []
+	for i in xrange(len(data)):
+		if 'uniprot_id' in data[i].keys():
+			if len([e for e in data[i]['uniprot_id'] if e in uniprot]) >= 1:
+				prot_compound_pcc.append(data[i]['pdb_id'])
+	return prot_compound_pcc
+				
+
+
+def get_data_prot_compound_ppc(data, uniprot):
+	'''
+	Get data only of protein structure with compounds associated with a protein/protein complex
+	-----------------------------------------------------------------------------------------------
+	Arguments:
+		[json]: protein structures with compounds (dataset)
+		[list]: protein/protein complex uniprot id
+
+	Return:
+		[json]: protein structure with compounds associated with a protein/protein complex (dataset)
+	''' 
+	prot_compound_pcc_data = []
+	for i in xrange(len(data)):
+		if 'uniprot_id' in data[i].keys():
+			if len([e for e in data[i]['uniprot_id'] if e in uniprot]) >= 1:
+					prot_compound_pcc_data.append(data[i])
+	return prot_compound_pcc_data
+
+
+
+
+def get_compound_name(data):
+	'''
+	Get protein compound names
+	--------------------------------------------------
+	Argument:
+		[json]: protein structures with compounds (dataset)
+
+	Return:
+		[list]: compound names
+		** EXAMPLE: [u'FHB : 3-FLUORO-4-HYDROXYBENZOIC ACID', u'FE : FE (III) ION']
+	'''
+	compound = []
+	for i in xrange(len(data)):
+		if 'compound_name' in data[i].keys():
+			compound.extend(data[i]['compound_name'])
+	return compound
+
+		
+
+def get_ion(compound_name):
+	'''
+	Get protein ions
+	--------------------------------------------------
+	Argument:
+		[list]: compound names
+		** EXAMPLE: [u'FHB : 3-FLUORO-4-HYDROXYBENZOIC ACID', u'FE : FE (III) ION']
+
+	Return:
+		[list]: ions
+		** EXAMPLE: ['FE']
+	'''
+	ion = []
+	ion_unique = []
+	for i in xrange(len(compound_name)):
+		if 'ION' in compound_name[i].split(' :')[1].split(' '):
+			ion.append(compound_name[i].split(' :')[0])
+	for j in ion:
+		if j not in ion_unique:
+			ion_unique.append(j)
+	print ion_unique
+	return ion_unique
+
+
+
+def get_compound_without_ion(data, ions):
+	'''
+	Get 3D structure with compound [compound_id], ion excluded
+	----------------------------------------------------------
+	Arguments:
+		[json]: protein structures (dataset)
+		[list]: ions
+
+	Return:
+		[json]: protein structures with compounds != ions (dataset)
+				
+	!Warning!
+	---------
+	Protein structures either with only compounds, or compounds and ions are included
+	** EXAMPLE: [u'FHB : 3-FLUORO-4-HYDROXYBENZOIC ACID']
+			or  [u'FHB : 3-FLUORO-4-HYDROXYBENZOIC ACID', u'FE : FE (III) ION']
+
+	Only protein structures with only ions are excluded
+	** EXAMPLE: [u'FE : FE (III) ION']				
+	'''
+	compound_without_ion = []
+	compound_with_ion = []
+	for i in xrange(len(data)):
+		if 'compound_id' in data[i].keys():
+			if len([e for e in data[i]['compound_id'] if e in ions]) == 0:
+				compound_without_ion.append(data[i])
+			elif len([e for e in data[i]['compound_id'] if e in ions]) >= 1:
+				if len([e for e in data[i]['compound_id'] if e in ions]) != len(data[i]['compound_id']):
+					compound_with_ion.append(data[i])
+	compound_ion = compound_without_ion + compound_with_ion				
+	return compound_ion
+
+
+
+def to_txt(index):
+	with open('PROT-COMPOUNDS_PPC_2-20-18PDBe.txt', 'wb') as txtfile:
+		for i in index:
+			txtfile.write(i + '\n')
+
+def to_json(index):
+	with open('PROT-COMPOUNDS_PPC_2-20-18PDBe.json', 'wb') as jsonfile:
+		json.dump(index, jsonfile)
+
+def to_txt_ion(index):
+	with open('PROT-COMPOUNDS-noIONS_PPC_2-20-18PDBe.txt', 'wb') as txtfile:
+		for i in index:
+			txtfile.write(i + '\n')
+
+def to_json_ion(index):
+	with open('PROT-COMPOUNDS-noIONS_PPC_2-20-18PDBe.json', 'wb') as jsonfile:
+		json.dump(index, jsonfile)
+
+# ==============================================================================
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description = 
+					'IDENTIFY PROTEIN STRUCTURE WITH COMPOUNDS INVOLVED IN PROTEIN-PROTEIN INTERACTION FROM PDBe')
+    parser.add_argument('pdbe', help = 'Input [.json file]: \
+					PBDe (with [assembly_composition], [compound_id] and [uniprot_id] included)')
+    options = parser.parse_args()
+    setlogger()
+    main(options.pdbe)
diff --git a/pyScripts/iPPI-DB_descriptors_V2.py b/pyScripts/iPPI-DB_descriptors_V2.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef516ce6ee41b26aecaada0d83730c0d57572e08
--- /dev/null
+++ b/pyScripts/iPPI-DB_descriptors_V2.py
@@ -0,0 +1,320 @@
+# -*- coding: utf-8 -*-
+
+"""
+Author: Alexandra Moine-Franel
+Date: January 2018
+Version: 2
+
+
+
+CALCULATE 2D/3D POCKET DESCRIPTORS FROM VOLSITE
+
+
+Usage: [Script.py] [PDB_descriptor.txt] [PDB_2D-3Ddescriptors.csv]
+------------------------------------------------------------------
+
+[PDB_descriptor.txt]:
+    Input (txt format)
+    Pocket descriptors calculated by VolSite
+	
+[PDB_2D-3Ddescriptors.csv]:
+    Output (csv format)
+    Pocket descriptors including:
+    * 89 descriptors from VolSite
+    * 10 descriptors using a linear combination of VolSite descriptors
+      (Kuenemann, 2016 Scientific Reports)
+    * 10 geometric descriptors from RDKit toolkit
+      (http://www.rdkit.org/docs/Overview.html, version 2017.09.1)
+"""
+
+# =============================================================================
+
+import os
+import sys
+import argparse
+import logging
+import csv
+import pandas as pd
+from rdkit import Chem
+from rdkit.Chem import Descriptors3D
+
+# =============================================================================
+
+LOG = logging.getLogger("CALCULATE 2D/3D POCKET DESCRIPTORS FROM VOLSITE")
+
+
+VOLSITE_HEADER = [
+    'Volume', 'CZ', 'CA', 'O', 'OD1', 'OG', 'N', 'NZ', 'DU',
+    'CZ40', 'CZ40-50', 'CZ50-60', 'CZ60-70', 'CZ70-80', 'CZ80-90', 'CZ90-100', 'CZ100-110', 'CZ110-120', 'CZ120',
+    'CA40', 'CA40-50', 'CA50-60', 'CA60-70', 'CA70-80', 'CA80-90', 'CA90-100', 'CA100-110', 'CA110-120', 'CA120',
+    'O40', 'O40-50', 'O50-60', 'O60-70', 'O70-80', 'O80-90', 'O90-100', 'O100-110', 'O110-120', 'O120',
+    'OD140', 'OD140-50', 'OD150-60', 'OD160-70', 'OD170-80', 'OD180-90', 'OD190-100', 'OD1100-110', 'OD1110-120',
+    'OD1120',
+    'OG40', 'OG40-50', 'OG50-60', 'OG60-70', 'OG70-80', 'OG80-90', 'OG90-100', 'OG100-110', 'OG110-120', 'OG120',
+    'N40', 'N40-50', 'N50-60', 'N60-70', 'N70-80', 'N80-90', 'N90-100', 'N100-110', 'N110-120', 'N120',
+    'NZ40', 'NZ40-50', 'NZ50-60', 'NZ60-70', 'NZ70-80', 'NZ80-90', 'NZ90-100', 'NZ100-110', 'NZ110-120', 'NZ120',
+    'DU40', 'DU40-50', 'DU50-60', 'DU60-70', 'DU70-80', 'DU80-90', 'DU90-100', 'DU100-110', 'DU110-120', 'DU120',
+    'POCKET'
+]
+
+DESC_COMBINATION = {
+    'T40': ['CZ40', 'CA40', 'O40', 'OD140', 'OG40', 'N40', 'NZ40', 'DU40'],
+    'T40-50': ['CZ40-50', 'CA40-50', 'O40-50', 'OD140-50', 'OG40-50', 'N40-50', 'NZ40-50', 'DU40-50'],
+    'T50-60': ['CZ50-60', 'CA50-60', 'O50-60', 'OD150-60', 'OG50-60', 'N50-60', 'NZ50-60', 'DU50-60'],
+    'T60-70': ['CZ60-70', 'CA60-70', 'O60-70', 'OD160-70', 'OG60-70', 'N60-70', 'NZ60-70', 'DU60-70'],
+    'T70-80': ['CZ70-80', 'CA70-80', 'O70-80', 'OD170-80', 'OG70-80', 'N70-80', 'NZ70-80', 'DU70-80'],
+    'T80-90': ['CZ80-90', 'CA80-90', 'O80-90', 'OD180-90', 'OG80-90', 'N80-90', 'NZ80-90', 'DU80-90'],
+    'T90-100': ['CZ90-100', 'CA90-100', 'O90-100', 'OD190-100', 'OG90-100', 'N90-100', 'NZ90-100', 'DU90-100'],
+    'T100-110': ['CZ100-110', 'CA100-110', 'O100-110', 'OD1100-110', 'OG100-110', 'N100-110', 'NZ100-110', 'DU100-110'],
+    'T110-120': ['CZ110-120', 'CA110-120', 'O110-120', 'OD1110-120', 'OG110-120', 'N110-120', 'NZ110-120', 'DU110-120'],
+}
+
+DESC_GEOM = [
+    'PMI1', 'PMI2', 'PMI3', 'NPR1', 'NPR2', 'Rgyr',
+    'Asphericity', 'SpherocityIndex', 'Eccentricity', 'InertialShapeFactor'
+]
+
+# =============================================================================
+
+def setlogger():
+    LOG.setLevel(logging.INFO)
+    ch = logging.StreamHandler()
+    ch.setLevel(logging.INFO)
+    formatter = logging.Formatter('%(asctime)s - %(name)s - %(funcName)s - \
+								   %(levelname)s - %(message)s')
+    ch.setFormatter(formatter)
+    LOG.addHandler(ch)
+
+
+def main(volsitedescriptor, pocketdescriptor):
+    volsite_csv = descriptor_volsite(volsitedescriptor)
+    combination_csv = descriptor_combination(volsite_csv)
+    volsite_combination_csv = "{}volsite+combinaison.csv".format(
+							  volsite_csv.rsplit("volsite.csv")[0])
+    merge_csv(volsite_csv, combination_csv, volsite_combination_csv)
+    geometric_csv = descriptor_geometric(volsitedescriptor)
+    merge_csv(volsite_combination_csv, geometric_csv, pocketdescriptor)
+
+    if os.path.exists(pocketdescriptor):
+        os.remove(volsite_csv)
+        os.remove(combination_csv)
+        os.remove(volsite_combination_csv)
+        os.remove(geometric_csv)
+        LOG.info('Finished!')
+    else:
+        LOG.error('Error: 2D/3D Pocket Descriptor file not found')
+        sys.exit(1)
+
+
+def descriptor_volsite(txtfile):
+    """
+    Convert VolSite output file from txt format to csv format
+    (descriptor headers added)
+    ---------------------------------------------------------
+    Argument:
+        [file]: VolSite descriptors (txt format)
+
+    Return:
+        [file]: VolSite descriptors (csv format)
+                with descriptor headers
+    """
+    try:
+        with open(txtfile, 'rb') as infile:
+            volsite_csv = "{}_volsite.csv".format(os.path.splitext(txtfile)[0])
+            with open(volsite_csv, 'wb') as outfile:
+                in_text = csv.reader(infile, delimiter=' ')
+                out_csv = csv.writer(outfile)
+                out_csv.writerow(VOLSITE_HEADER)
+                for line in in_text:
+                    out_csv.writerow(line)
+    except IOError, e:
+        LOG.error('{}'.format(e))
+        sys.exit(1)
+    return volsite_csv
+
+
+def descriptor_combination(volsite_descriptor_csv):
+    """
+    Calculate and write linear combinations of VolSite descriptors to csv file
+   (Kuenemann, 2016 Scientific Reports)
+    ---------------------------------------------------------------------------
+    Argument:
+        [file]: VolSite descriptors (csv format)
+
+    Return:
+        [file]: linear combinations of VolSite descriptors (csv format)
+    """
+    with open(volsite_descriptor_csv, 'rb') as infile:
+        volsite = csv.reader(infile, delimiter = ',')
+        descriptors = volsite.next()
+
+        # Create a dictionnary
+        # key(descriptor): values(list of all pocket values)
+        data = {}
+        # Count the number of pockets
+        numbpocket = 0
+        for header in descriptors:
+            data[header] = []
+        for pocket in volsite:
+            numbpocket += + 1
+            for header, value in zip(descriptors, pocket):
+                data[header].append(value)
+        combination_csv = "{}combination.csv".format(
+						  volsite_descriptor_csv.rsplit("volsite.csv")[0])
+        with open(combination_csv, 'wb') as outfile:
+            combivolsite = csv.writer(outfile, delimiter = ',', 
+									  lineterminator = '\n')
+            # Descriptor headers and values have to be in variables
+            # to write them in a new file column by column (sep = comma)
+            descname = []
+            cdata = []
+            for dch in DESC_COMBINATION.keys():
+                # dch = descriptor combination header (column)
+                bsum = []
+                for p in range(numbpocket):
+                    # p = pocket (row)
+                    buriedness_values = []
+                    for colname in DESC_COMBINATION[dch]:
+                        # Get buriedness value by descriptors
+                        buriedness_values.append(float(data[colname][p]))
+                    # Get sum of buriedness value by descriptor combinations
+                    bsum.append(sum(buriedness_values))
+                descname.append(dch)
+                cdata.append(bsum)
+            combivolsite.writerow(descname)
+            datatransposed = zip(*cdata)
+            for row in datatransposed:
+                combivolsite.writerow(row)
+    return combination_csv
+
+
+def descriptor_geometric(txtfile):
+    """
+    Write geometric descriptor from the negative image of binding pockets 
+    detected by VolSite to csv file
+    ---------------------------------------------------------------------------
+    Argument:
+        [file]: VolSite descriptors (txt format)
+
+    Return:
+        [file]: shape descriptors (csv format)
+    """
+    with open(txtfile, 'rb') as infile:
+        pocketvolsite = csv.reader(infile, delimiter = ',')
+        # Get the number of pockets
+        numbpocket = 0
+        for pocket in pocketvolsite:
+            numbpocket += + 1
+        geometric_csv = "{}geometric.csv".format(os.path.splitext(txtfile)[0])
+        with open(geometric_csv, 'wb') as outfile:
+            shape = csv.writer(outfile, delimiter = ',', lineterminator = '\n')
+            shape.writerow(DESC_GEOM)
+            # Calculate geometric descriptors pocket by pockets
+            for cavity in range(1, numbpocket + 1):
+				pocket_mol2 = "{}_CAVITY_N{}_ALL.mol2".format(
+							  geometric_csv.rsplit("_")[0], cavity)	
+				replace_mol2atom(pocket_mol2)
+				shape.writerow(calculate_shape_descriptor(
+				"temp_{}_CAVITY_N{}_ALL.mol2".format(
+				geometric_csv.rsplit("_")[0], cavity)))
+				os.remove("temp_{}_CAVITY_N{}_ALL.mol2".format(
+				geometric_csv.rsplit("_")[0], cavity))
+	return geometric_csv
+		
+
+def replace_mol2atom(cavity):
+    """
+    Replace the atom name assigned by VolSite to the probes by 'C' (carbone)
+	
+	Warning 1
+	Fixing the RDKit error: 'atom' with a degree > 1
+	
+	Warning 2
+	RDKit needs the output file [PDB_CAVITY_Nx_ALL_temp.mol2]
+	NOT the link
+	------------------------------------------------------------------
+    Argument:
+        [file]: negative image of a binding pocket (mol2 format)
+
+    Return:
+        [file]: negative image of a binding pocket with carbone as atom name
+				(mol2 format, temporary file used as input by RDKit)
+    """
+    header = True
+    parsing = False	
+    with open(cavity, 'rb') as infile:
+	with open("temp_{}.mol2".format(cavity.rsplit('.')[0]), 'wb') as tempf:
+		for line in infile:
+			if line.startswith('@<TRIPOS>ATOM'):
+				tempf.write(line)# + '\n')
+				header = False
+				parsing = True
+				continue
+			elif line.startswith('@<TRIPOS>BOND'):
+				parsing = False
+					
+			if header:
+				tempf.write(line)
+			if parsing:
+				tempf.write(line.replace(line.split()[5], 'C'))
+	#return tempf
+	
+			
+def calculate_shape_descriptor(cavity):
+    """
+    Calculate geometric descriptors with RDKit 
+    ------------------------------------------
+    Geometric descriptor details:
+    http://www.rdkit.org/Python_Docs/rdkit.Chem.Descriptors3D-module.html
+
+    Argument:
+        [file]: negative image of a binding pocket (mol2 format)
+
+    Return:
+        [list]: geometric descriptors
+    """
+    mol2file = Chem.MolFromMol2File(cavity, sanitize = False, removeHs = False)
+    geom_val = []
+    geom_val.append(Descriptors3D.PMI1(mol2file))
+    geom_val.append(Descriptors3D.PMI2(mol2file))
+    geom_val.append(Descriptors3D.PMI3(mol2file))
+    geom_val.append(Descriptors3D.NPR1(mol2file))
+    geom_val.append(Descriptors3D.NPR2(mol2file))
+    geom_val.append(Descriptors3D.RadiusOfGyration(mol2file))
+    geom_val.append(Descriptors3D.Asphericity(mol2file))
+    geom_val.append(Descriptors3D.SpherocityIndex(mol2file))
+    geom_val.append(Descriptors3D.Eccentricity(mol2file))
+    geom_val.append(Descriptors3D.InertialShapeFactor(mol2file))
+    return geom_val
+
+
+def merge_csv(csv_infile1, csv_infile2, csv_outfile):
+    """
+    Concatenate two csv files
+    -------------------------
+    Arguments:
+        [file]: input (csv format)
+        [file]: input (csv format)
+        [file]: output filename (csv format)
+    """
+    csv1 = pd.read_csv(csv_infile1)
+    csv2 = pd.read_csv(csv_infile2)
+    csv3 = pd.concat([csv1, csv2], axis = 1)
+    return csv3.to_csv(csv_outfile, sep = ',', index = False)
+
+# ==============================================================================
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description = 
+					'Calculate 2D/3D Pocket Descriptors from VolSite')
+    parser.add_argument('volsitedescriptor', help = 'Input [.txt file]: \
+					Pocket descriptors calculated by VolSite')
+    parser.add_argument('pocketdescriptor', help = 'Output [.csv filename]: \
+					Pocket descriptors including \
+					89 descriptors from VolSite, \
+					10 descriptors using a combination of VolSite descriptors \
+					and 10 geometric descriptors')
+
+    options = parser.parse_args()
+    setlogger()
+    main(options.volsitedescriptor, options.pocketdescriptor)
diff --git a/pyScripts/ion.pdf b/pyScripts/ion.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..46ecfa3582acda4ea7252068a70a9f9a1259d6f8
Binary files /dev/null and b/pyScripts/ion.pdf differ
diff --git a/pyScripts/ion.txt b/pyScripts/ion.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c2ec67982891e76539ee9654375ae4c8acb73de8
--- /dev/null
+++ b/pyScripts/ion.txt
@@ -0,0 +1,63 @@
+
+3NI
+4MO
+4PU
+4TI
+6MO
+AG
+AL
+AU
+AU3
+AUC
+BA
+BR
+BS3
+CA
+CD
+CE
+CF
+CL
+CO
+CR
+CS
+CU
+CU1
+ER3
+EU
+EU3
+F
+FE
+FE2
+GA
+HG
+IOD
+IR
+IR3
+K
+LA
+LI
+LU
+MG
+MN
+MN3
+MOO
+NA
+NI
+PD
+PR
+PT
+PT4
+RB
+RU
+SB
+SE4
+SM
+SR
+TB
+TH
+THE
+TL
+W
+ZN
+ZR
+
diff --git a/pyScripts/ppiinterface_dist.py b/pyScripts/ppiinterface_dist.py
new file mode 100644
index 0000000000000000000000000000000000000000..e79e69ac85a5c9070b319f5dfa89d29c2570756f
--- /dev/null
+++ b/pyScripts/ppiinterface_dist.py
@@ -0,0 +1,335 @@
+# -*- coding: utf-8 -*-
+
+"""
+Author: Alexandra Moine-Franel
+Date: February 2018
+Version: 4
+
+
+IDENTIFY PROTEIN INTERFACE / POCKET RESIDUES
+
+Determine protein residues at a specific distance either from protein partner, 
+ligand (small molecule) or negative image of the pocket generated by VolSite.
+
+
+
+Usage:	[Script.py] [TARGET] [PARTNER] -d (DISTANCE)
+-------------------------------------------------------------------------------
+[TARGET]:
+	Input (.mol2 or .pdb format)
+	Protein CHAIN
+
+[PARTNER]:
+	Input (.mol2 or .pdb format)
+	Negative image of the pocket generated by VolSite (.mol2 format)
+	OR Ligand (.pdb format)
+	OR Protein (.pdb format)
+	
+(DISTANCE)- optionnal
+	Input (float or integer)
+	Distance threshold between the target and the partner
+	* Distance target-protein/ligand = 5 angstroms - by defaut
+	* Distance target-cavity = 3.5 angstroms - by default
+	
+------------------	
+	
+[POCKET_RESIDUES]: 
+	Output (.txt format)
+	'PDB-targetchain-partnerchain_distance.txt' (target-protein/ligand)
+	OR 'PDB-targetchain-CAVITY_Nx_ALL_distance.txt' (target-cavity)
+"""
+
+# =============================================================================
+
+import csv
+import os
+import sys
+import argparse
+import logging
+#import time
+from biopandas.mol2 import PandasMol2
+from biopandas.pdb import PandasPdb
+
+# =============================================================================
+
+LOG = logging.getLogger("IDENTIFY PROTEIN INTERFACE / POCKET RESIDUES")
+
+# =============================================================================
+
+def main(target, partner, distance):
+	#start = time.time()
+	if target and partner:
+		if os.path.splitext(target)[-1] == '.mol2' and \
+		os.path.splitext(partner)[-1] == '.mol2':
+			mol2target = parse_mol2(target)
+			mol2partner = parse_mol2(partner)
+			mol2_respocket(mol2target, mol2partner, distance)
+			if os.path.exists('{}-{}_{}.txt'.format( \
+			mol2target.code, mol2partner.code.rsplit('.mol2')[0], \
+			distance)):
+				LOG.info('Finished!')
+				#end = time.time()
+				#print(end - start)
+			else:
+				LOG.error('Warning! [Output] Not found!')
+				sys.exit(1)			
+		elif os.path.splitext(target)[-1] == '.pdb' and \
+			os.path.splitext(partner)[-1] == '.pdb':
+			pdbtarget = parse_pdb(target)
+			pdbpartner = parse_pdb(partner)
+			if pdbpartner.df['ATOM'].empty == True:
+				pdb_resinterfacePL(pdbtarget, pdbpartner, distance)
+				if os.path.exists('{}-{}-{}_{}.txt'.format(pdbtarget.code, \
+				pdbtarget.df['ATOM']['chain_id'][0], pdbpartner.df['HETATM']['residue_name'][0], \
+				distance)):
+					LOG.info('Finished!')
+					#end = time.time()
+					#print(end - start)
+				else:
+					LOG.error('Warning! [Output] Not found!')
+					sys.exit(1)
+			else:
+				pdb_resinterfacePP(pdbtarget, pdbpartner, distance)
+				if os.path.exists('{}-{}-{}_{}.txt'.format(pdbtarget.code, \
+				pdbtarget.df['ATOM']['chain_id'][0], pdbpartner.df['ATOM']['chain_id'][0], \
+				distance)):
+					LOG.info('Finished!')
+					#end = time.time()
+					#print(end - start)
+				else:
+					LOG.error('Warning! [Output] Not found!')
+					sys.exit(1)
+		else:
+			LOG.error('Error! [TARGET] or [PARTNER] file not in .mol2 or .pdb format')
+			sys.exit(1)
+	elif target and not partner:
+		parser.error('Error: [PARTNER: protein, ligand or cavity] is required')
+	
+	
+
+				
+def parse_mol2(mol2file):
+	"""
+	Parse .mol2 file 
+	---------------------------------------------------------------------------
+	Argument: [file]: .mol2 format
+	Return: [dataframe]
+	"""
+	try:
+		pmol = PandasMol2().read_mol2(mol2file)
+	except(IOError), e:
+		LOG.error('{}'.format(e))
+		sys.exit(1)
+	else:
+		return pmol
+
+
+def parse_pdb(pdbfile):
+	"""
+	Parse .pdb file 
+	---------------------------------------------------------------------------
+	Argument: [file]: .pdb format
+	Return: [dataframe]
+	"""
+	try:
+		ppdb = PandasPdb().read_pdb(pdbfile)
+	except(IOError), e:
+		LOG.error('{}'.format(e))
+		sys.exit(1)
+	else:
+		return ppdb
+
+
+def mol2_respocket(mol2target, mol2partner, distance):
+	"""
+	Identify protein target residues at a specified distance from its partner
+	(i.e. the cavity negative image generated by VolSite).
+	---------------------------------------------------------------------------
+	Arguments:
+		[file]: protein target (.mol2 format)
+		[file]: negative image of the binding pocket (.mol2 format)
+		[float]: distance threshold (by default, 3.5 angstroms)
+	Return:
+		[file]: pocket residues (.txt format)
+	"""
+	ppires = []
+	with open('{}-{}_{}.txt'.format(mol2target.code, \
+	mol2partner.code.rsplit('.mol2')[0], \
+	distance), 'wb') as outfile:
+		for probe in xrange(len(mol2partner.df.atom_id)):
+			for atom in xrange(len(mol2target.df.atom_id)):
+				if mol2target.df.subst_name[atom] not in ppires:
+					xp = mol2target.df['x'][atom]
+					xc = mol2partner.df['x'][probe]
+					distX = x_dist(xp, xc)
+					if distX < float(distance):
+						yp = mol2target.df['y'][atom]
+						yc = mol2partner.df['y'][probe]
+						distXY = xy_dist(xp, yp, xc, yc)
+						if distXY < float(distance):
+							zp = mol2target.df['z'][atom]
+							zc = mol2partner.df['z'][probe]
+							distXYZ = euclidian_dist(xp, yp, zp, xc, yc, zc) 
+							if distXYZ < float(distance):
+								ppires.append(mol2target.df.subst_name[atom])
+		for resp in ppires:
+			outfile.write(resp + '\n')
+
+
+def pdb_resinterfacePP(pdbtarget, pdbpartner, distance):
+	"""
+	Identify protein target residues at a specified distance from its partner
+	(i.e. protein)
+	---------------------------------------------------------------------------
+	Arguments:
+		[file]: protein target (.pdb format)
+		[file]: protein partner (.pdb format)
+		[float]: distance threshold (by default, 5 angstroms)
+	Return:
+		[file]: interface residues (.txt format)
+	"""
+	ppires = []
+	with open('{}-{}-{}_{}.txt'.format(pdbtarget.code, \
+	pdbtarget.df['ATOM']['chain_id'][0], pdbpartner.df['ATOM']['chain_id'][0], \
+	distance), 'wb') as outfile:
+		for atomt in xrange(len(pdbpartner.df['ATOM']['atom_number'])):
+			for atomp in xrange(len(pdbtarget.df['ATOM']['atom_number'])):
+				if ''.join(map(str, (pdbtarget.df['ATOM']['residue_name'][atomp], \
+				pdbtarget.df['ATOM']['residue_number'][atomp]))) not in ppires:
+					xp = pdbtarget.df['ATOM']['x_coord'][atomp]
+					xc = pdbpartner.df['ATOM']['x_coord'][atomt]
+					distX = x_dist(xp, xc)
+					if distX < float(distance):
+						yp = pdbtarget.df['ATOM']['y_coord'][atomp]
+						yc = pdbpartner.df['ATOM']['y_coord'][atomt]
+						distXY = xy_dist(xp, yp, xc, yc)
+						if distXY < float(distance):
+							zp = pdbtarget.df['ATOM']['z_coord'][atomp]
+							zc = pdbpartner.df['ATOM']['z_coord'][atomt]
+							distXYZ = euclidian_dist(xp, yp, zp, xc, yc, zc) 
+							if distXYZ < float(distance):
+								ppires.append(''.join(map(str, \
+								(pdbtarget.df['ATOM']['residue_name'][atomp], \
+								pdbtarget.df['ATOM']['residue_number'][atomp]))))
+		for resp in ppires:
+			outfile.write(resp + '\n')
+
+
+def pdb_resinterfacePL(pdbtarget, pdbpartner, distance):
+	"""
+	Identify protein target residues at a specified distance from its partner
+	(i.e. ligand)
+	---------------------------------------------------------------------------
+	Arguments:
+		[file]: protein target (.pdb format)
+		[file]: ligand (.pdb format)	 
+		[float]: distance threshold (by default, 5 angstroms)
+	Return:
+		[file]: pocket residues (.txt format)
+	"""
+	ppires = []
+	with open('{}-{}-{}_{}.txt'.format(pdbtarget.code, \
+	pdbtarget.df['ATOM']['chain_id'][0], pdbpartner.df['HETATM']['residue_name'][0], \
+	distance), 'wb') as outfile:
+		for atomt in xrange(len(pdbpartner.df['HETATM']['atom_number'])):
+			for atomp in xrange(len(pdbtarget.df['ATOM']['atom_number'])):
+				if ''.join(map(str, (pdbtarget.df['ATOM']['residue_name'][atomp], \
+				pdbtarget.df['ATOM']['residue_number'][atomp]))) not in ppires:
+					xp = pdbtarget.df['ATOM']['x_coord'][atomp]
+					xc = pdbpartner.df['HETATM']['x_coord'][atomt]
+					distX = x_dist(xp, xc)
+					if distX < float(distance):
+						yp = pdbtarget.df['ATOM']['y_coord'][atomp]
+						yc = pdbpartner.df['HETATM']['y_coord'][atomt]
+
+						distXY = xy_dist(xp, yp, xc, yc)
+						if distXY < float(distance):
+							zp = pdbtarget.df['ATOM']['z_coord'][atomp]
+							zc = pdbpartner.df['HETATM']['z_coord'][atomt]
+							distXYZ = euclidian_dist(xp, yp, zp, xc, yc, zc) 
+							if distXYZ < float(distance):
+								ppires.append(''.join(map(str, \
+								(pdbtarget.df['ATOM']['residue_name'][atomp], \
+								pdbtarget.df['ATOM']['residue_number'][atomp]))))
+		for resp in ppires:
+			outfile.write(resp + '\n')
+
+
+def x_dist(x1, x2):
+	"""
+	Calculate the distance x between two 1D points
+	----------------------------------------------------------------------
+	Arguments:
+		[float]: coordinate x of target atom
+				 coordinate x of partnet atom
+	Return:
+		[float]: distance 
+	"""
+	dist = ((x1-x2)**2) ** 0.5
+	return dist
+
+
+def xy_dist(x1, y1, x2, y2):
+	"""
+	Calculate the distance xy between two 2D points
+	----------------------------------------------------------------------
+	Arguments:
+		[float]: coordinates xy of target atom
+				 coordinates xy of partnet atom
+	Return:
+		[float]: distance 
+	"""
+	dist = ((x1-x2)**2 + (y1-y2)**2) ** 0.5	
+	return dist
+
+	
+def euclidian_dist(x1, y1, z1, x2, y2, z2):
+	"""
+	Calculate the euclidian distance xyz between two 3D points
+	----------------------------------------------------------------------
+	Arguments:
+		[float]: coordinates xyz of target atom
+				 coordinates xyz of partnet atom
+	Return:
+		[float]: distance 
+	"""
+	dist = ((x1-x2)**2 + (y1-y2)**2 + (z1-z2)**2) ** 0.5
+	return dist
+	
+	
+	
+def setlogger():
+	LOG.setLevel(logging.INFO)
+	ch = logging.StreamHandler()
+	ch.setLevel(logging.INFO)
+	formatter = logging.Formatter('%(asctime)s - %(name)s - %(funcName)s - \
+	%(levelname)s - %(message)s')
+	ch.setFormatter(formatter)
+	LOG.addHandler(ch)
+
+
+def default_distance(options):
+	if os.path.splitext(options.partner)[-1] == '.mol2':
+		options.distance = 3.5
+	elif os.path.splitext(options.partner)[-1] == '.pdb':
+		options.distance = 5.0
+	return options.distance
+
+# ==============================================================================
+
+if __name__ == "__main__":
+	parser = argparse.ArgumentParser(description = 'Identify interface/pocket residues')
+	parser.add_argument('target', \
+	help = 'Input [.mol2 or .pdb file]: protein target')
+	parser.add_argument('partner', \
+	help = 'Input [.pdb file]: protein partner or ligand; OR [.mol2 file]: cavity')
+	parser.add_argument('-d', dest = 'distance', type = float, \
+	help = 'Input [float or integer]: distance threshold \
+	{by default, target-protein/ligand = 5A; target-cavity = 3.5A}')
+	options = parser.parse_args()
+	
+	if options.distance is None:
+		default_distance(options)
+	
+	setlogger()
+	main(options.target, options.partner, options.distance)