diff --git a/ippisite/db.sqlite3.REMOVED.git-id b/ippisite/db.sqlite3.REMOVED.git-id
index b63f9dc481dd16289e6aba0b596ae5129a6a3db1..d23a7914fd588b82f000b3e0267b142b66545d6d 100644
--- a/ippisite/db.sqlite3.REMOVED.git-id
+++ b/ippisite/db.sqlite3.REMOVED.git-id
@@ -1 +1 @@
-6def815c597d9d9c8c7e97e6b7e8fd71da7d64ac
\ No newline at end of file
+b3413ac6f067ab89b2dad63c444b3021ffeef640
\ No newline at end of file
diff --git a/ippisite/ippidb/management/commands/import_v1_data.py b/ippisite/ippidb/management/commands/import_v1_data.py
index 6e4a63edca6ae664edc8b58df560beebd89540c2..e525b46f460e13f0a66b0eef7a9fd99d9b27e9a5 100644
--- a/ippisite/ippidb/management/commands/import_v1_data.py
+++ b/ippisite/ippidb/management/commands/import_v1_data.py
@@ -560,6 +560,7 @@ select distinct protein.NumUniprot, domain.PfamNumAccession  , complexe.NbCopy,
             for row in rows:
                 try:
                     compound = Compound()
+                    compound.id = row[0]
                     compound.canonical_smile = row[1]
                     compound.is_macrocycle = (row[4] == 'Y')
                     compound.aromatic_ratio = row[5]
diff --git a/pyScripts/get_pdb_structure.py b/pyScripts/get_pdb_structure.py
new file mode 100644
index 0000000000000000000000000000000000000000..d5dd27838698a9550b60550d5b1d87dab7252d36
--- /dev/null
+++ b/pyScripts/get_pdb_structure.py
@@ -0,0 +1,80 @@
+# -*- coding: utf-8 -*-
+
+"""
+Author: Alexandra Moine-Franel
+Date: March 2018
+Version: 1
+
+
+DOWNLOAD PDB 3D STRUCTURE FILES FROM PDB 
+
+
+Usage: [Script.py] [pdb_id]
+------------------------------------------------------------------
+Argument:
+	[file]: list of PDB IDs (txt format)
+
+Return:
+	[file]: PDB files, stored in 'PDB' folder
+"""
+
+# =============================================================================
+
+import os
+import argparse
+import logging
+import csv
+import Bio
+from Bio.PDB import PDBList
+
+# =============================================================================
+
+LOG = logging.getLogger("DOWNLOAD PDB 3D STRUCTURE FILES")
+
+FOLDER = 'PDB'
+
+# =============================================================================
+
+def main(pdb_id):
+	get_pdb_file(get_pdbID(pdb_id))
+	rename_pdb_file()
+
+	
+def get_pdbID(txtfile):
+	pdb_id = []
+	with open(txtfile, 'rb') as pdb_txtfile:
+		for line in csv.reader(pdb_txtfile):	
+			pdb_id.append(line[0].upper())
+	return pdb_id		
+
+
+def get_pdb_file(pdbID):
+	for i in pdbID:
+		PDBList().retrieve_pdb_file(i, pdir = FOLDER, file_format = 'pdb')
+	
+
+def rename_pdb_file():
+	working_dir = os.path.dirname(__file__)
+	pdb_dir = os.path.abspath(os.path.join(working_dir, FOLDER))	
+	os.chdir(pdb_dir)
+	for filename in os.listdir(pdb_dir):
+		os.rename(filename, filename.replace('pdb', '').replace('ent', 'pdb'))
+	
+
+def setlogger():
+    LOG.setLevel(logging.INFO)
+    ch = logging.StreamHandler()
+    ch.setLevel(logging.INFO)
+    formatter = logging.Formatter('%(asctime)s - %(name)s - %(funcName)s - %(levelname)s - %(message)s')
+    ch.setFormatter(formatter)
+    LOG.addHandler(ch)
+
+# =============================================================================
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description = 
+					'DOWNLOAD PDB 3D STRUCTURE FILES FROM PDB')
+    parser.add_argument('pdb_id', help = 'Input [.txt file]: PDB IDs')
+    options = parser.parse_args()
+    setlogger()
+    main(options.pdb_id)
diff --git a/pyScripts/get_ppc_V2.py b/pyScripts/get_ppc_V2.py
new file mode 100644
index 0000000000000000000000000000000000000000..6d801b9c9a1e30ed70137e206681968e4bca6a66
--- /dev/null
+++ b/pyScripts/get_ppc_V2.py
@@ -0,0 +1,171 @@
+# -*- coding: utf-8 -*-
+
+"""
+Author: Alexandra Moine-Franel
+Date: March 2018
+Version: 2
+
+
+
+IDENTIFY PROTEIN-PROTEIN COMPLEXES FROM PDBe
+
+
+Usage: [Script.py] [PDBe]
+------------------------------------------------------------------
+Argument:
+	[PDBe]:
+    Input (json format)
+    PDBe (with [assembly_composition] and [assembly_type] information included)
+
+Return:
+	[list(PPC)]:
+    Output (txt format)
+    Protein/protein complex PDB IDs
+	
+	[json(PPC)]:
+	Output (json format)
+	Protein/protein complex information
+	
+	
+Warning:
+------- 
+PDBs with unique and repeated assembly composition annotation are both included!
+[unique annotation: 'protein/protein complex']
+[repeated annotation: 'protein/protein complex, protein/protein complex']
+"""
+
+# =============================================================================
+
+import os
+import sys
+import argparse
+import logging
+import json
+
+# =============================================================================
+
+LOG = logging.getLogger('Get PPC PDB IDs from PDBe')
+FILENAME = 'PPC-DIMER_3-26-18PDBe'
+
+XRAY = ['X-ray diffraction', 'X-ray powder diffraction']
+
+RESOLUTION = 3.5
+DIFFR = 0.05
+
+# =============================================================================
+
+def main(pdbe):
+	with open(pdbe) as infile:
+		structure = json.load(infile)
+		dimer = get_dimer(get_ppc(structure))
+		dimer_filt = filter_rfactor(filter_resolution(filter_annotation(dimer)))
+		to_json(dimer_filt, FILENAME)
+		to_txt(get_id(dimer_filt), FILENAME)
+
+	if os.path.exists('{}.txt'.format(FILENAME)):
+		LOG.info('Finished!')
+	else:
+		LOG.error('Warning! [Output] Not found!')
+		sys.exit(1)
+
+
+
+def get_ppc(pdbe):
+	ppc_unique = []
+	ppc_repeated = []
+	for i in xrange(len(pdbe)):
+		if 'assembly_composition' in pdbe[i].keys():
+			assembly_composition = pdbe[i]['assembly_composition']
+			if len(assembly_composition) == 1 and \
+			assembly_composition == ['protein/protein complex']:
+				ppc_unique.append(pdbe[i])
+			elif len(assembly_composition) > 1:
+				if assembly_composition.count('protein/protein complex') >= 1 and \
+				assembly_composition.count(
+				assembly_composition[0]) == len(assembly_composition):
+					ppc_repeated.append(pdbe[i])
+	ppc_all = ppc_unique + ppc_repeated
+	return ppc_all
+
+
+def get_dimer(pdbe):
+	dimer_unique = []
+	dimer_repeated = []
+	for i in xrange(len(pdbe)):
+		if 'assembly_type' in pdbe[i].keys():
+			assembly_type = pdbe[i]['assembly_type']
+			if len(assembly_type) == 1 and \
+			assembly_type == ['dimer']:
+				dimer_unique.append(pdbe[i])
+			elif len(assembly_type) > 1:
+				if assembly_type.count('dimer') >= 1 and \
+				assembly_type.count(assembly_type[0]) == len(assembly_type):
+					dimer_repeated.append(pdbe[i])
+	dimer_all = dimer_unique + dimer_repeated
+	return dimer_all
+
+
+def filter_annotation(pdbe):
+	annotation = []
+	for i in xrange(len(pdbe)):
+		if len(pdbe[i]['experimental_method']) == 1:
+			annotation.append(pdbe[i])
+	return annotation
+
+def filter_resolution(pdbe):
+	quality = []
+	for i in xrange(len(pdbe)):
+		if len([e for e in pdbe[i]['experimental_method'] if e in XRAY]) == 1:
+			if 'resolution' in pdbe[i].keys() and \
+			float(pdbe[i]['resolution']) <= RESOLUTION:
+				quality.append(pdbe[i])
+	return quality
+
+def filter_rfactor(pdbe):
+	quality = []
+	for i in xrange(len(pdbe)):
+		if len([e for e in pdbe[i]['experimental_method'] if e in XRAY]) == 1:
+			if 'r_free' in pdbe[i].keys() and 'r_factor' in pdbe[i].keys():
+				if float(pdbe[i]['r_free'] - pdbe[i]['r_factor']) <= abs(DIFFR):
+					quality.append(pdbe[i])
+	return quality
+
+
+def get_id(pdbe):
+	pdb_id = []
+	for i in xrange(len(pdbe)):
+		pdb_id.append(pdbe[i]['pdb_id'])
+	return pdb_id
+
+
+def to_json(data, outfile):
+	with open('{}.json'.format(outfile), 'wb') as jsonfile:
+		json.dump(data, jsonfile)
+
+
+def to_txt(data, outfile):
+	with open('{}.txt'.format(outfile), 'wb') as txtfile:
+		for pdb_id in data:
+			txtfile.write(pdb_id + '\n')
+
+
+
+def setlogger():
+	LOG.setLevel(logging.INFO)
+	ch = logging.StreamHandler()
+	ch.setLevel(logging.INFO)
+	formatter = logging.Formatter('%(asctime)s - %(name)s - %(funcName)s - \
+	%(levelname)s - %(message)s')
+	ch.setFormatter(formatter)
+	LOG.addHandler(ch)
+
+# =============================================================================
+
+if __name__ == "__main__":
+	parser = argparse.ArgumentParser(description = 'Get protein/protein \
+	complex PDB IDs from PDBe')
+	parser.add_argument('pdbe', help = 'Input [.json file]: \
+	PBDe (with assembly and structure quality information included')
+	options = parser.parse_args()
+	setlogger()
+	main(options.pdbe)
diff --git a/pyScripts/get_ppc_id_from_pdbe.py b/pyScripts/get_ppc_id_from_pdbe.py
new file mode 100644
index 0000000000000000000000000000000000000000..e41157a8c94a05634d5bcdfaea385670bb4a5abc
--- /dev/null
+++ b/pyScripts/get_ppc_id_from_pdbe.py
@@ -0,0 +1,121 @@
+# -*- coding: utf-8 -*-
+
+"""
+Author: Alexandra Moine-Franel
+Date: February 2018
+Version: 1
+
+
+
+IDENTIFY PROTEIN-PROTEIN COMPLEXES FROM PDBe
+
+
+Usage: [Script.py] [PDBe]
+------------------------------------------------------------------
+Argument:
+	[PDBe]:
+    Input (json format)
+    PDBe (with assembly information included)
+
+Return:
+	[list(PPC)]:
+    Output (txt format)
+    Protein/protein complex PDB IDs
+	
+	[json(PPC)]:
+	Output (json format)
+	Protein/protein complex information
+	
+	
+Warning:
+------- 
+PDBs with unique and repeated assembly composition annotation are both included!
+[unique annotation: 'protein/protein complex']
+[repeated annotation: 'protein/protein complex, protein/protein complex']
+"""
+
+# =============================================================================
+
+import argparse
+import logging
+import json
+import pandas as pd
+
+# =============================================================================
+
+LOG = logging.getLogger('Get protein/protein complex PDB IDs from PDBe')
+
+# =============================================================================
+
+def setlogger():
+    LOG.setLevel(logging.INFO)
+    ch = logging.StreamHandler()
+    ch.setLevel(logging.INFO)
+    formatter = logging.Formatter('%(asctime)s - %(name)s - %(funcName)s - %(levelname)s - %(message)s')
+    ch.setFormatter(formatter)
+    LOG.addHandler(ch)
+
+
+
+def main(pdbe):
+	structure = pd.read_json(pdbe, orient = 'columns')
+	ppc_to_txt(get_ppc_id(structure))
+	ppc_to_json(get_ppc_data(structure))
+
+
+
+def get_ppc_id(pdbe):
+	ppc_unique = []
+	ppc_repeated = []
+	for i in xrange(len(pdbe['grouped']['pdb_id']['groups'])):
+		assembly_composition = pdbe['grouped']['pdb_id']['groups'][i]['doclist']['docs'][0]['assembly_composition']
+		if len(assembly_composition) == 1 and assembly_composition == ['protein/protein complex']:
+			ppc_unique.append(pdbe['grouped']['pdb_id']['groups'][i]['doclist']['docs'][0]['pdb_id'])
+		elif len(assembly_composition) > 1:
+			if assembly_composition.count('protein/protein complex') >= 1 and \
+			assembly_composition.count(assembly_composition[0]) == len(assembly_composition):
+				ppc_repeated.append(pdbe['grouped']['pdb_id']['groups'][i]['doclist']['docs'][0]['pdb_id'])
+	ppc_all = ppc_unique + ppc_repeated
+	return ppc_all
+
+
+
+def get_ppc_data(pdbe):
+	ppc_unique_data = []
+	ppc_repeated_data = []
+	for i in xrange(len(pdbe['grouped']['pdb_id']['groups'])):
+		assembly_composition = pdbe['grouped']['pdb_id']['groups'][i]['doclist']['docs'][0]['assembly_composition']
+		if len(assembly_composition) == 1 and assembly_composition == ['protein/protein complex']:
+			ppc_unique_data.append(pdbe['grouped']['pdb_id']['groups'][i]['doclist']['docs'][0])
+		elif len(assembly_composition) > 1:
+			if assembly_composition.count('protein/protein complex') >= 1 and \
+			assembly_composition.count(assembly_composition[0]) == len(assembly_composition):
+				ppc_repeated_data.append(pdbe['grouped']['pdb_id']['groups'][i]['doclist']['docs'][0])
+	ppc_all_data = ppc_unique_data + ppc_repeated_data
+	return ppc_all_data
+
+
+
+def ppc_to_txt(ppc):
+	with open('PPC_2-20-18PDBe.txt', 'wb') as txtfile:
+		for pdb_id in ppc:
+			txtfile.write(pdb_id + '\n')
+
+
+
+def ppc_to_json(ppc):
+	with open('PPC_2-20-18PDBe.json', 'wb') as jsonfile:
+		json.dump(ppc, jsonfile)
+
+
+
+# ==============================================================================
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description = 
+					'Get protein/protein complex PDB IDs from PDBe')
+    parser.add_argument('pdbe', help = 'Input [.json file]: \
+					PBDe (with assembly information included')
+    options = parser.parse_args()
+    setlogger()
+    main(options.pdbe)
diff --git a/pyScripts/get_ppc_ligand_V2.py b/pyScripts/get_ppc_ligand_V2.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed1f1576c33a4f3be066d1a2326152198654130b
--- /dev/null
+++ b/pyScripts/get_ppc_ligand_V2.py
@@ -0,0 +1,268 @@
+# -*- coding: utf-8 -*-
+
+"""
+Author: Alexandra Moine-Franel
+Date: March 2018
+Version: 3
+
+
+
+IDENTIFY PROTEIN STRUCTURE WITH COMPOUNDS (with or without including ions)
+INVOLVED IN PROTEIN-PROTEIN INTERACTION FROM PDBe
+
+
+Usage: [Script.py] [PDBe] [IONS]
+------------------------------------------------------------------
+Arguments:
+	[PDBe]:
+	Input (json format)
+	PDBe (with [assembly_composition], [compound_id] and [uniprot_id] included)
+
+	[IONS]:
+	Input (txt format)
+	Compounds considered as ions 
+
+Return:
+	[list(protein)]:
+	Output (txt format)
+	Proteins with compounds associated with a protein/protein complex PDB IDs
+
+	[json(protein)]:
+	Output (json format)
+	Proteins with compounds associated with a protein/protein complex dataset
+
+	Compounds:
+	---------
+	** EXAMPLE: [u'FHB : 3-FLUORO-4-HYDROXYBENZOIC ACID']
+			or  [u'FHB : 3-FLUORO-4-HYDROXYBENZOIC ACID', u'FE : FE (III) ION']
+
+
+    [list(protein)]:
+    Output (txt format)
+    Proteins with compounds associated with a protein/protein complex PDB IDs
+    IONS ONLY EXCLUDED
+
+    [json(protein)]:
+    Output (json format)        
+    Proteins with compounds associated with a protein/protein complex dataset
+    IONS ONLY EXCLUDED
+
+	Compounds:
+	---------
+	** EXAMPLE: [u'FE : FE (III) ION']
+	
+
+
+!Warning!:
+------- 
+PDBs with unique and repeated assembly composition annotation are both included!
+[unique annotation: 'protein/protein complex']
+[repeated annotation: 'protein/protein complex, protein/protein complex']
+"""
+
+# =============================================================================
+
+import os
+import sys
+import argparse
+import logging
+import json
+
+# =============================================================================
+
+LOG = logging.getLogger('Get PPC PDB IDs with ligand')
+FILENAME_with_ions = 'PPC-PROT-COMPOUNDS_3-26-18PDBe'
+FILENAME_without_ions = 'PPC-PROT-COMPOUNDS_noIONS_3-26-18PDBe'
+
+XRAY = ['X-ray diffraction', 'X-ray powder diffraction']
+
+RESOLUTION = 3.5
+DIFFR = 0.05
+
+# =============================================================================
+
+def main(pdbe, ion):
+	with open(pdbe) as infile:
+		structure = json.load(infile)
+
+		# Get protein/protein complex (PPC)
+		ppc = filter_rfactor(filter_resolution(filter_annotation(
+		get_ppc(structure))))
+		to_txt(get_id(ppc), 'PPC_3-26-18PDBe')
+
+		# Get protein structure with [compounds]
+		prot_compound = filter_rfactor(filter_resolution(filter_annotation(
+		get_with_compound(get_prot(structure)))))
+		
+	# Collect PPC UniProt
+	ppc_uniprot = get_uniprot(ppc)
+
+	# Get PPC-associated proteins with compounds
+	ppc_ligand = get_prot_compound_ppc(prot_compound, ppc_uniprot)
+	to_json(ppc_ligand, FILENAME_with_ions)
+	to_txt(get_id(ppc_ligand), FILENAME_with_ions)
+
+	# Get PPC-associated proteins with compounds (ions only not included)
+	ions = []
+	with open(ion, 'r') as ion_file:
+		for i in ion_file:
+			ions.append(i.strip().upper())
+
+	prot_compound_without_ions = get_without_ions(prot_compound, ions)
+	ppc_ligand_without_ions = get_prot_compound_ppc(prot_compound_without_ions, ppc_uniprot)
+	to_json(ppc_ligand_without_ions, FILENAME_without_ions)
+	to_txt(get_id(ppc_ligand_without_ions), FILENAME_without_ions)
+
+
+	if os.path.exists('{}.txt'.format(FILENAME_with_ions)) and \
+	os.path.exists('{}.txt'.format(FILENAME_without_ions)):
+		LOG.info('Finished!')
+	else:
+		LOG.error('Warning! [Outputs] Not found!')
+		sys.exit(1)
+
+
+
+def get_ppc(pdbe):
+	ppc_unique = []
+	ppc_repeated = []
+	for i in xrange(len(pdbe)):
+		if 'assembly_composition' in pdbe[i].keys():
+			assembly_composition = pdbe[i]['assembly_composition']
+			if len(assembly_composition) == 1 and \
+			assembly_composition == ['protein/protein complex']:
+				ppc_unique.append(pdbe[i])
+			elif len(assembly_composition) > 1:
+				if assembly_composition.count('protein/protein complex') >= 1 and \
+				assembly_composition.count(
+				assembly_composition[0]) == len(assembly_composition):
+					ppc_repeated.append(pdbe[i])
+	ppc_all = ppc_unique + ppc_repeated
+	return ppc_all
+
+
+def get_prot(pdbe):
+	prot_unique = []
+	prot_repeated = []
+	for i in xrange(len(pdbe)):
+		if 'assembly_composition' in pdbe[i].keys():
+			assembly_composition = pdbe[i]['assembly_composition']
+			if len(assembly_composition) == 1 and \
+			assembly_composition == ['protein structure']:
+				prot_unique.append(pdbe[i])
+			elif len(assembly_composition) > 1:
+				if assembly_composition.count('protein structure') >= 1 and \
+				assembly_composition.count(
+				assembly_composition[0]) == len(assembly_composition):
+					prot_repeated.append(pdbe[i])
+	prot_all = prot_unique + prot_repeated
+	return prot_all
+
+
+def filter_annotation(pdbe):
+	annotation = []
+	for i in xrange(len(pdbe)):
+		if len(pdbe[i]['experimental_method']) == 1:
+			annotation.append(pdbe[i])
+	return annotation
+
+def filter_resolution(pdbe):
+	quality = []
+	for i in xrange(len(pdbe)):
+		if len([e for e in pdbe[i]['experimental_method'] if e in XRAY]) == 1:
+			if 'resolution' in pdbe[i].keys() and \
+			float(pdbe[i]['resolution']) <= RESOLUTION:
+				quality.append(pdbe[i])
+	return quality
+
+def filter_rfactor(pdbe):
+	quality = []
+	for i in xrange(len(pdbe)):
+		if len([e for e in pdbe[i]['experimental_method'] if e in XRAY]) == 1:
+			if 'r_free' in pdbe[i].keys() and 'r_factor' in pdbe[i].keys():
+				if float(pdbe[i]['r_free'] - pdbe[i]['r_factor']) <= abs(DIFFR):
+					quality.append(pdbe[i])
+	return quality
+
+
+def get_with_compound(pdbe):
+	compound = []
+	for i in xrange(len(pdbe)):
+		if 'compound_id' in pdbe[i].keys():
+			compound.append(pdbe[i])
+	return compound
+
+
+def get_uniprot(pdbe):
+	uniprot = []
+	for i in xrange(len(pdbe)):
+		if 'uniprot_id' in pdbe[i].keys():
+			uniprot.extend(pdbe[i]['uniprot_id'])
+	return uniprot
+
+
+def get_prot_compound_ppc(protein, uniprot):
+	prot_compound_ppc = []
+	for i in xrange(len(protein)):
+		if 'uniprot_id' in protein[i].keys():
+			if len([e for e in protein[i]['uniprot_id'] if e in uniprot]) >= 1:
+				prot_compound_ppc.append(protein[i])
+	return prot_compound_ppc
+
+
+def get_without_ions(protein, ions):
+	without_ions = []
+	with_ions_not_only = []
+	for i in xrange(len(protein)):
+		if 'compound_id' in protein[i].keys():
+			if len([e for e in protein[i]['compound_id'] if e in ions]) == 0:
+				without_ions.append(protein[i])
+			elif len([e for e in protein[i]['compound_id'] if e in ions]) >= 1:
+				if len([e for e in protein[i]['compound_id'] if e in ions]) \
+				!= len(protein[i]['compound_id']):
+					with_ions_not_only.append(protein[i])
+	print len(without_ions), len(with_ions_not_only)
+	compound_ions = without_ions + with_ions_not_only
+	return compound_ions
+
+
+def get_id(pdbe):
+	pdb_id = []
+	for i in xrange(len(pdbe)):
+		pdb_id.append(pdbe[i]['pdb_id'])
+	return pdb_id
+
+
+def to_json(data, outfile):
+	with open('{}.json'.format(outfile), 'wb') as jsonfile:
+		json.dump(data, jsonfile)
+
+
+def to_txt(data, outfile):
+	with open('{}.txt'.format(outfile), 'wb') as txtfile:
+		for pdb_id in data:
+			txtfile.write(pdb_id + '\n')
+
+
+
+def setlogger():
+	LOG.setLevel(logging.INFO)
+	ch = logging.StreamHandler()
+	ch.setLevel(logging.INFO)
+	formatter = logging.Formatter('%(asctime)s - %(name)s - %(funcName)s - \
+	%(levelname)s - %(message)s')
+	ch.setFormatter(formatter)
+	LOG.addHandler(ch)
+
+# =============================================================================
+
+if __name__ == "__main__":
+	parser = argparse.ArgumentParser(description = 'Get protein/protein \
+	complex PDB IDs from PDBe')
+	parser.add_argument('pdbe', help = 'Input [.json file]: \
+	PBDe (with assembly and structure quality information included')
+	parser.add_argument('ion', help = 'Input [.txt file]: Compounds considered \
+	as ions')
+	options = parser.parse_args()
+	setlogger()
+	main(options.pdbe, options.ion)
diff --git a/pyScripts/get_ppc_ligand_compound_from_pdbe.py b/pyScripts/get_ppc_ligand_compound_from_pdbe.py
new file mode 100644
index 0000000000000000000000000000000000000000..a91caa2d8a00c3559d5e14213294b8308dff1b43
--- /dev/null
+++ b/pyScripts/get_ppc_ligand_compound_from_pdbe.py
@@ -0,0 +1,131 @@
+# -*- coding: utf-8 -*-
+
+"""
+Author: Alexandra Moine-Franel
+Date: February 2018
+Version: 2
+
+
+
+IDENTIFY INTERACTING LIGANDS AND COMPOUNDS OF PROTEIN-PROTEIN COMPLEXES FROM PDBe
+
+
+Usage: [Script.py] [PDBe]
+------------------------------------------------------------------
+Argument:
+	[PDBe]:
+    Input (json format)
+    PDBe (with assembly information included)
+
+Return:
+	[list(ligands)]:
+    Output (txt format)
+    Interacting ligands of protein/protein complexes
+
+	[list(compounds)]:
+    Output (txt format)	
+    Compounds of protein/protein complexes
+
+	
+Warning:
+------- 
+PDBs with unique and repeated assembly composition annotation are both included!
+[unique annotation: 'protein/protein complex']
+[repeated annotation: 'protein/protein complex, protein/protein complex']
+"""
+
+# =============================================================================
+
+import argparse
+import logging
+import json
+import pandas as pd
+
+# =============================================================================
+
+LOG = logging.getLogger('Get ligands and compounds of protein/protein complexes from PDBe')
+
+# =============================================================================
+
+def setlogger():
+    LOG.setLevel(logging.INFO)
+    ch = logging.StreamHandler()
+    ch.setLevel(logging.INFO)
+    formatter = logging.Formatter('%(asctime)s - %(name)s - %(funcName)s - %(levelname)s - %(message)s')
+    ch.setFormatter(formatter)
+    LOG.addHandler(ch)
+
+
+
+def main(pdbe):
+	structure = pd.read_json(pdbe, orient = 'columns')
+	ligand_to_txt(get_ligand(get_ppc_data(structure)))
+	compound_to_txt(get_compound(get_ppc_data(structure)))
+
+def get_ppc_data(pdbe):
+	ppc_unique_data = []
+	ppc_repeated_data = []
+	for i in xrange(len(pdbe['grouped']['pdb_id']['groups'])):
+		assembly_composition = pdbe['grouped']['pdb_id']['groups'][i]['doclist']['docs'][0]['assembly_composition']
+		if len(assembly_composition) == 1 and assembly_composition == ['protein/protein complex']:
+			ppc_unique_data.append(pdbe['grouped']['pdb_id']['groups'][i]['doclist']['docs'][0])
+		elif len(assembly_composition) > 1:
+			if assembly_composition.count('protein/protein complex') >= 1 and \
+			assembly_composition.count(assembly_composition[0]) == len(assembly_composition):
+				ppc_repeated_data.append(pdbe['grouped']['pdb_id']['groups'][i]['doclist']['docs'][0])
+	ppc_all_data = ppc_unique_data + ppc_repeated_data
+	return ppc_all_data
+
+
+
+def get_ligand(ppc):
+	ppc_ligand = []
+	unique_ligand = []
+	for i in xrange(len(ppc)):
+		if 'interacting_ligands' in ppc[i].keys():
+			ppc_ligand.append(ppc[i]['interacting_ligands'][0].split(' :')[0])
+
+	for j in ppc_ligand:
+		if j not in unique_ligand:
+			unique_ligand.append(j)
+			
+	#print len(unique_ligand)
+	return unique_ligand
+
+
+
+def get_compound(ppc):
+        ppc_compound = []
+        unique_compound = []
+	for i in xrange(len(ppc)):
+                if 'compound_id' in ppc[i].keys():
+			ppc_compound.extend(ppc[i]['compound_id'])
+
+	for j in ppc_compound:
+		if j not in unique_compound:
+			unique_compound.append(j)
+
+	#print len(unique_compound)
+	return unique_compound
+
+
+def ligand_to_txt(ligand):
+	with open('PPC_LIGANDS_2-20-18PDBe.txt', 'wb') as txtfile:
+		for lig in ligand:
+			txtfile.write(lig + '\n')
+
+def compound_to_txt(compound):
+        with open('PPC_COMPOUNDS_2-20-18PDBe.txt', 'wb') as txtfile:
+                for c in compound:
+                        txtfile.write(c + '\n')
+
+# ==============================================================================
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description = 
+					'Get ligands and compounds of protein/protein complexes from PDBe')
+    parser.add_argument('pdbe', help = 'Input [.json file]: \
+					PBDe (with interacting ligand and compound information included')
+    options = parser.parse_args()
+    setlogger()
+    main(options.pdbe)
diff --git a/pyScripts/get_ppc_protcomp-ion_ppc_from_pdbe.py b/pyScripts/get_ppc_protcomp-ion_ppc_from_pdbe.py
new file mode 100644
index 0000000000000000000000000000000000000000..59a2c0ac99f63241860924a3b5001e069f73a7ac
--- /dev/null
+++ b/pyScripts/get_ppc_protcomp-ion_ppc_from_pdbe.py
@@ -0,0 +1,321 @@
+# -*- coding: utf-8 -*-
+
+"""
+Author: Alexandra Moine-Franel
+Date: February 2018
+Version: 2
+
+
+
+IDENTIFY PROTEIN STRUCTURE WITH COMPOUNDS (with or without including ions)
+INVOLVED IN PROTEIN-PROTEIN INTERACTION FROM PDBe
+
+
+Usage: [Script.py] [PDBe]
+------------------------------------------------------------------
+Argument:
+	[PDBe]:
+    Input (json format)
+    PDBe (with [assembly_composition], [compound_id] and [uniprot_id] included)
+
+Return:
+	[list(protein)]:
+    Output (txt format)
+    Proteins with compounds associated with a protein/protein complex PDB IDs
+	
+	[json(protein)]:
+    Output (json format)	
+    Proteins with compounds associated with a protein/protein complex dataset
+
+	Compounds:
+	---------
+	** EXAMPLE: [u'FHB : 3-FLUORO-4-HYDROXYBENZOIC ACID']
+			or  [u'FHB : 3-FLUORO-4-HYDROXYBENZOIC ACID', u'FE : FE (III) ION']
+
+
+
+    [list(protein)]:
+    Output (txt format)
+    Proteins with compounds associated with a protein/protein complex PDB IDs
+    IONS ONLY EXCLUDED
+
+    [json(protein)]:
+    Output (json format)        
+    Proteins with compounds associated with a protein/protein complex dataset
+    IONS ONLY EXCLUDED
+
+	Compounds:
+	---------
+	** EXAMPLE: [u'FE : FE (III) ION']
+	
+
+
+!Warning!:
+------- 
+PDBs with unique and repeated assembly composition annotation are both included!
+[unique annotation: 'protein/protein complex']
+[repeated annotation: 'protein/protein complex, protein/protein complex']
+"""
+
+# =============================================================================
+
+import argparse
+import logging
+import json
+import pandas as pd
+
+# =============================================================================
+
+LOG = logging.getLogger('Proteins associated with a PPC - Compounds')
+
+# =============================================================================
+
+def setlogger():
+    LOG.setLevel(logging.INFO)
+    ch = logging.StreamHandler()
+    ch.setLevel(logging.INFO)
+    formatter = logging.Formatter('%(asctime)s - %(name)s - %(funcName)s - %(levelname)s - %(message)s')
+    ch.setFormatter(formatter)
+    LOG.addHandler(ch)
+
+
+def main(pdbe):
+	structure = pd.read_json(pdbe, orient = 'columns')
+
+	# Get protein/protein complex [assembly_composition]
+	ppc = get_ppc_data(structure)
+
+	# Get protein structure [assembly_composition] with 'compounds'
+	prot_compound = get_compound_id(get_prot_data(structure))
+
+	# Collect PPC UniProt
+	ppc_uniprot = get_uniprot(ppc)
+
+	# Identify protein structure with compounds associated to PPC 
+	to_txt(get_id_prot_compound_ppc(prot_compound, ppc_uniprot))
+	to_json(get_data_prot_compound_ppc(prot_compound, ppc_uniprot))
+
+	
+	# Get protein structure [assembly_composition] with compounds (ions only not included)
+	ion = get_ion(get_compound_name(prot_compound))
+	prot_compound_without_ions = get_compound_without_ion(prot_compound, ion)
+	to_txt_ion(get_id_prot_compound_ppc(prot_compound_without_ions, ppc_uniprot))
+	to_json_ion(get_data_prot_compound_ppc(prot_compound_without_ions, ppc_uniprot))
+
+
+
+
+def get_ppc_data(pdbe):
+	ppc_unique_data = []
+	ppc_repeated_data = []
+	for i in xrange(len(pdbe['grouped']['pdb_id']['groups'])):
+		assembly_composition = pdbe['grouped']['pdb_id']['groups'][i]['doclist']['docs'][0]['assembly_composition']
+		if len(assembly_composition) == 1 and assembly_composition == ['protein/protein complex']:
+			ppc_unique_data.append(pdbe['grouped']['pdb_id']['groups'][i]['doclist']['docs'][0])
+		elif len(assembly_composition) > 1:
+			if assembly_composition.count('protein/protein complex') >= 1 and \
+			assembly_composition.count(assembly_composition[0]) == len(assembly_composition):
+				ppc_repeated_data.append(pdbe['grouped']['pdb_id']['groups'][i]['doclist']['docs'][0])
+	ppc_all_data = ppc_unique_data + ppc_repeated_data
+	return ppc_all_data
+
+
+
+def get_prot_data(pdbe):
+	prot_unique_data = []
+	prot_repeated_data = []
+	for i in xrange(len(pdbe['grouped']['pdb_id']['groups'])):
+		assembly_composition = pdbe['grouped']['pdb_id']['groups'][i]['doclist']['docs'][0]['assembly_composition']
+		if len(assembly_composition) == 1 and assembly_composition == ['protein structure']:
+			prot_unique_data.append(pdbe['grouped']['pdb_id']['groups'][i]['doclist']['docs'][0])
+		elif len(assembly_composition) > 1:
+			if assembly_composition.count('protein structure') >= 1 and \
+			assembly_composition.count(assembly_composition[0]) == len(assembly_composition):
+				prot_repeated_data.append(pdbe['grouped']['pdb_id']['groups'][i]['doclist']['docs'][0])
+	prot_all_data = prot_unique_data + prot_repeated_data
+	return prot_all_data
+
+
+
+def get_compound_id(data):
+	'''
+	Get 3D structure with compound [compound_id]
+	--------------------------------------------------
+	Argument:
+		[json]: protein structures (dataset)
+
+	Return:
+		[json]: protein structures with compounds (dataset)
+	'''
+	compound = []
+	for i in xrange(len(data)):
+                if 'compound_id' in data[i].keys():
+					compound.append(data[i])
+	return compound
+
+
+
+def get_uniprot(data):
+	'''
+	Get UniProt
+	-------------------------------------------------
+	Argument:
+		[json]: protein/protein complexes (dataset)
+
+	Return:
+		[list]: protein/protein complexes Uniprot IDs
+	'''
+	uniprot = []
+	for i in xrange(len(data)):
+		if 'uniprot_id' in data[i].keys():
+			uniprot.extend(data[i]['uniprot_id'])
+	return uniprot
+
+
+def get_id_prot_compound_ppc(data, uniprot):
+	'''
+	Get PDB IDs only of protein structure with compounds associated with a protein/protein complex
+	-----------------------------------------------------------------------------------------------
+	Arguments:
+		[json]: protein structures with compounds (dataset)
+		[list]: protein/protein complex uniprot id
+
+	Return:
+		[list]: protein structure with compounds associated with a protein/protein complex PDB IDs
+	''' 
+	prot_compound_pcc = []
+	for i in xrange(len(data)):
+		if 'uniprot_id' in data[i].keys():
+			if len([e for e in data[i]['uniprot_id'] if e in uniprot]) >= 1:
+				prot_compound_pcc.append(data[i]['pdb_id'])
+	return prot_compound_pcc
+				
+
+
+def get_data_prot_compound_ppc(data, uniprot):
+	'''
+	Get data only of protein structure with compounds associated with a protein/protein complex
+	-----------------------------------------------------------------------------------------------
+	Arguments:
+		[json]: protein structures with compounds (dataset)
+		[list]: protein/protein complex uniprot id
+
+	Return:
+		[json]: protein structure with compounds associated with a protein/protein complex (dataset)
+	''' 
+	prot_compound_pcc_data = []
+	for i in xrange(len(data)):
+		if 'uniprot_id' in data[i].keys():
+			if len([e for e in data[i]['uniprot_id'] if e in uniprot]) >= 1:
+					prot_compound_pcc_data.append(data[i])
+	return prot_compound_pcc_data
+
+
+
+
+def get_compound_name(data):
+	'''
+	Get protein compound names
+	--------------------------------------------------
+	Argument:
+		[json]: protein structures with compounds (dataset)
+
+	Return:
+		[list]: compound names
+		** EXAMPLE: [u'FHB : 3-FLUORO-4-HYDROXYBENZOIC ACID', u'FE : FE (III) ION']
+	'''
+	compound = []
+	for i in xrange(len(data)):
+		if 'compound_name' in data[i].keys():
+			compound.extend(data[i]['compound_name'])
+	return compound
+
+		
+
+def get_ion(compound_name):
+	'''
+	Get protein ions
+	--------------------------------------------------
+	Argument:
+		[list]: compound names
+		** EXAMPLE: [u'FHB : 3-FLUORO-4-HYDROXYBENZOIC ACID', u'FE : FE (III) ION']
+
+	Return:
+		[list]: ions
+		** EXAMPLE: ['FE']
+	'''
+	ion = []
+	ion_unique = []
+	for i in xrange(len(compound_name)):
+		if 'ION' in compound_name[i].split(' :')[1].split(' '):
+			ion.append(compound_name[i].split(' :')[0])
+	for j in ion:
+		if j not in ion_unique:
+			ion_unique.append(j)
+	print ion_unique
+	return ion_unique
+
+
+
+def get_compound_without_ion(data, ions):
+	'''
+	Get 3D structure with compound [compound_id], ion excluded
+	----------------------------------------------------------
+	Arguments:
+		[json]: protein structures (dataset)
+		[list]: ions
+
+	Return:
+		[json]: protein structures with compounds != ions (dataset)
+				
+	!Warning!
+	---------
+	Protein structures either with only compounds, or compounds and ions are included
+	** EXAMPLE: [u'FHB : 3-FLUORO-4-HYDROXYBENZOIC ACID']
+			or  [u'FHB : 3-FLUORO-4-HYDROXYBENZOIC ACID', u'FE : FE (III) ION']
+
+	Only protein structures with only ions are excluded
+	** EXAMPLE: [u'FE : FE (III) ION']				
+	'''
+	compound_without_ion = []
+	compound_with_ion = []
+	for i in xrange(len(data)):
+		if 'compound_id' in data[i].keys():
+			if len([e for e in data[i]['compound_id'] if e in ions]) == 0:
+				compound_without_ion.append(data[i])
+			elif len([e for e in data[i]['compound_id'] if e in ions]) >= 1:
+				if len([e for e in data[i]['compound_id'] if e in ions]) != len(data[i]['compound_id']):
+					compound_with_ion.append(data[i])
+	compound_ion = compound_without_ion + compound_with_ion				
+	return compound_ion
+
+
+
+def to_txt(index):
+	with open('PROT-COMPOUNDS_PPC_2-20-18PDBe.txt', 'wb') as txtfile:
+		for i in index:
+			txtfile.write(i + '\n')
+
+def to_json(index):
+	with open('PROT-COMPOUNDS_PPC_2-20-18PDBe.json', 'wb') as jsonfile:
+		json.dump(index, jsonfile)
+
+def to_txt_ion(index):
+	with open('PROT-COMPOUNDS-noIONS_PPC_2-20-18PDBe.txt', 'wb') as txtfile:
+		for i in index:
+			txtfile.write(i + '\n')
+
+def to_json_ion(index):
+	with open('PROT-COMPOUNDS-noIONS_PPC_2-20-18PDBe.json', 'wb') as jsonfile:
+		json.dump(index, jsonfile)
+
+# ==============================================================================
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description = 
+					'IDENTIFY PROTEIN STRUCTURE WITH COMPOUNDS INVOLVED IN PROTEIN-PROTEIN INTERACTION FROM PDBe')
+    parser.add_argument('pdbe', help = 'Input [.json file]: \
+					PBDe (with [assembly_composition], [compound_id] and [uniprot_id] included)')
+    options = parser.parse_args()
+    setlogger()
+    main(options.pdbe)
diff --git a/pyScripts/iPPI-DB_descriptors_V2.py b/pyScripts/iPPI-DB_descriptors_V2.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef516ce6ee41b26aecaada0d83730c0d57572e08
--- /dev/null
+++ b/pyScripts/iPPI-DB_descriptors_V2.py
@@ -0,0 +1,320 @@
+# -*- coding: utf-8 -*-
+
+"""
+Author: Alexandra Moine-Franel
+Date: January 2018
+Version: 2
+
+
+
+CALCULATE 2D/3D POCKET DESCRIPTORS FROM VOLSITE
+
+
+Usage: [Script.py] [PDB_descriptor.txt] [PDB_2D-3Ddescriptors.csv]
+------------------------------------------------------------------
+
+[PDB_descriptor.txt]:
+    Input (txt format)
+    Pocket descriptors calculated by VolSite
+	
+[PDB_2D-3Ddescriptors.csv]:
+    Output (csv format)
+    Pocket descriptors including:
+    * 89 descriptors from VolSite
+    * 10 descriptors using a linear combination of VolSite descriptors
+      (Kuenemann, 2016 Scientific Reports)
+    * 10 geometric descriptors from RDKit toolkit
+      (http://www.rdkit.org/docs/Overview.html, version 2017.09.1)
+"""
+
+# =============================================================================
+
+import os
+import sys
+import argparse
+import logging
+import csv
+import pandas as pd
+from rdkit import Chem
+from rdkit.Chem import Descriptors3D
+
+# =============================================================================
+
+LOG = logging.getLogger("CALCULATE 2D/3D POCKET DESCRIPTORS FROM VOLSITE")
+
+
+VOLSITE_HEADER = [
+    'Volume', 'CZ', 'CA', 'O', 'OD1', 'OG', 'N', 'NZ', 'DU',
+    'CZ40', 'CZ40-50', 'CZ50-60', 'CZ60-70', 'CZ70-80', 'CZ80-90', 'CZ90-100', 'CZ100-110', 'CZ110-120', 'CZ120',
+    'CA40', 'CA40-50', 'CA50-60', 'CA60-70', 'CA70-80', 'CA80-90', 'CA90-100', 'CA100-110', 'CA110-120', 'CA120',
+    'O40', 'O40-50', 'O50-60', 'O60-70', 'O70-80', 'O80-90', 'O90-100', 'O100-110', 'O110-120', 'O120',
+    'OD140', 'OD140-50', 'OD150-60', 'OD160-70', 'OD170-80', 'OD180-90', 'OD190-100', 'OD1100-110', 'OD1110-120',
+    'OD1120',
+    'OG40', 'OG40-50', 'OG50-60', 'OG60-70', 'OG70-80', 'OG80-90', 'OG90-100', 'OG100-110', 'OG110-120', 'OG120',
+    'N40', 'N40-50', 'N50-60', 'N60-70', 'N70-80', 'N80-90', 'N90-100', 'N100-110', 'N110-120', 'N120',
+    'NZ40', 'NZ40-50', 'NZ50-60', 'NZ60-70', 'NZ70-80', 'NZ80-90', 'NZ90-100', 'NZ100-110', 'NZ110-120', 'NZ120',
+    'DU40', 'DU40-50', 'DU50-60', 'DU60-70', 'DU70-80', 'DU80-90', 'DU90-100', 'DU100-110', 'DU110-120', 'DU120',
+    'POCKET'
+]
+
+DESC_COMBINATION = {
+    'T40': ['CZ40', 'CA40', 'O40', 'OD140', 'OG40', 'N40', 'NZ40', 'DU40'],
+    'T40-50': ['CZ40-50', 'CA40-50', 'O40-50', 'OD140-50', 'OG40-50', 'N40-50', 'NZ40-50', 'DU40-50'],
+    'T50-60': ['CZ50-60', 'CA50-60', 'O50-60', 'OD150-60', 'OG50-60', 'N50-60', 'NZ50-60', 'DU50-60'],
+    'T60-70': ['CZ60-70', 'CA60-70', 'O60-70', 'OD160-70', 'OG60-70', 'N60-70', 'NZ60-70', 'DU60-70'],
+    'T70-80': ['CZ70-80', 'CA70-80', 'O70-80', 'OD170-80', 'OG70-80', 'N70-80', 'NZ70-80', 'DU70-80'],
+    'T80-90': ['CZ80-90', 'CA80-90', 'O80-90', 'OD180-90', 'OG80-90', 'N80-90', 'NZ80-90', 'DU80-90'],
+    'T90-100': ['CZ90-100', 'CA90-100', 'O90-100', 'OD190-100', 'OG90-100', 'N90-100', 'NZ90-100', 'DU90-100'],
+    'T100-110': ['CZ100-110', 'CA100-110', 'O100-110', 'OD1100-110', 'OG100-110', 'N100-110', 'NZ100-110', 'DU100-110'],
+    'T110-120': ['CZ110-120', 'CA110-120', 'O110-120', 'OD1110-120', 'OG110-120', 'N110-120', 'NZ110-120', 'DU110-120'],
+}
+
+DESC_GEOM = [
+    'PMI1', 'PMI2', 'PMI3', 'NPR1', 'NPR2', 'Rgyr',
+    'Asphericity', 'SpherocityIndex', 'Eccentricity', 'InertialShapeFactor'
+]
+
+# =============================================================================
+
+def setlogger():
+    LOG.setLevel(logging.INFO)
+    ch = logging.StreamHandler()
+    ch.setLevel(logging.INFO)
+    formatter = logging.Formatter('%(asctime)s - %(name)s - %(funcName)s - \
+								   %(levelname)s - %(message)s')
+    ch.setFormatter(formatter)
+    LOG.addHandler(ch)
+
+
+def main(volsitedescriptor, pocketdescriptor):
+    volsite_csv = descriptor_volsite(volsitedescriptor)
+    combination_csv = descriptor_combination(volsite_csv)
+    volsite_combination_csv = "{}volsite+combinaison.csv".format(
+							  volsite_csv.rsplit("volsite.csv")[0])
+    merge_csv(volsite_csv, combination_csv, volsite_combination_csv)
+    geometric_csv = descriptor_geometric(volsitedescriptor)
+    merge_csv(volsite_combination_csv, geometric_csv, pocketdescriptor)
+
+    if os.path.exists(pocketdescriptor):
+        os.remove(volsite_csv)
+        os.remove(combination_csv)
+        os.remove(volsite_combination_csv)
+        os.remove(geometric_csv)
+        LOG.info('Finished!')
+    else:
+        LOG.error('Error: 2D/3D Pocket Descriptor file not found')
+        sys.exit(1)
+
+
+def descriptor_volsite(txtfile):
+    """
+    Convert VolSite output file from txt format to csv format
+    (descriptor headers added)
+    ---------------------------------------------------------
+    Argument:
+        [file]: VolSite descriptors (txt format)
+
+    Return:
+        [file]: VolSite descriptors (csv format)
+                with descriptor headers
+    """
+    try:
+        with open(txtfile, 'rb') as infile:
+            volsite_csv = "{}_volsite.csv".format(os.path.splitext(txtfile)[0])
+            with open(volsite_csv, 'wb') as outfile:
+                in_text = csv.reader(infile, delimiter=' ')
+                out_csv = csv.writer(outfile)
+                out_csv.writerow(VOLSITE_HEADER)
+                for line in in_text:
+                    out_csv.writerow(line)
+    except IOError, e:
+        LOG.error('{}'.format(e))
+        sys.exit(1)
+    return volsite_csv
+
+
+def descriptor_combination(volsite_descriptor_csv):
+    """
+    Calculate and write linear combinations of VolSite descriptors to csv file
+   (Kuenemann, 2016 Scientific Reports)
+    ---------------------------------------------------------------------------
+    Argument:
+        [file]: VolSite descriptors (csv format)
+
+    Return:
+        [file]: linear combinations of VolSite descriptors (csv format)
+    """
+    with open(volsite_descriptor_csv, 'rb') as infile:
+        volsite = csv.reader(infile, delimiter = ',')
+        descriptors = volsite.next()
+
+        # Create a dictionnary
+        # key(descriptor): values(list of all pocket values)
+        data = {}
+        # Count the number of pockets
+        numbpocket = 0
+        for header in descriptors:
+            data[header] = []
+        for pocket in volsite:
+            numbpocket += + 1
+            for header, value in zip(descriptors, pocket):
+                data[header].append(value)
+        combination_csv = "{}combination.csv".format(
+						  volsite_descriptor_csv.rsplit("volsite.csv")[0])
+        with open(combination_csv, 'wb') as outfile:
+            combivolsite = csv.writer(outfile, delimiter = ',', 
+									  lineterminator = '\n')
+            # Descriptor headers and values have to be in variables
+            # to write them in a new file column by column (sep = comma)
+            descname = []
+            cdata = []
+            for dch in DESC_COMBINATION.keys():
+                # dch = descriptor combination header (column)
+                bsum = []
+                for p in range(numbpocket):
+                    # p = pocket (row)
+                    buriedness_values = []
+                    for colname in DESC_COMBINATION[dch]:
+                        # Get buriedness value by descriptors
+                        buriedness_values.append(float(data[colname][p]))
+                    # Get sum of buriedness value by descriptor combinations
+                    bsum.append(sum(buriedness_values))
+                descname.append(dch)
+                cdata.append(bsum)
+            combivolsite.writerow(descname)
+            datatransposed = zip(*cdata)
+            for row in datatransposed:
+                combivolsite.writerow(row)
+    return combination_csv
+
+
+def descriptor_geometric(txtfile):
+    """
+    Write geometric descriptor from the negative image of binding pockets 
+    detected by VolSite to csv file
+    ---------------------------------------------------------------------------
+    Argument:
+        [file]: VolSite descriptors (txt format)
+
+    Return:
+        [file]: shape descriptors (csv format)
+    """
+    with open(txtfile, 'rb') as infile:
+        pocketvolsite = csv.reader(infile, delimiter = ',')
+        # Get the number of pockets
+        numbpocket = 0
+        for pocket in pocketvolsite:
+            numbpocket += + 1
+        geometric_csv = "{}geometric.csv".format(os.path.splitext(txtfile)[0])
+        with open(geometric_csv, 'wb') as outfile:
+            shape = csv.writer(outfile, delimiter = ',', lineterminator = '\n')
+            shape.writerow(DESC_GEOM)
+            # Calculate geometric descriptors pocket by pockets
+            for cavity in range(1, numbpocket + 1):
+				pocket_mol2 = "{}_CAVITY_N{}_ALL.mol2".format(
+							  geometric_csv.rsplit("_")[0], cavity)	
+				replace_mol2atom(pocket_mol2)
+				shape.writerow(calculate_shape_descriptor(
+				"temp_{}_CAVITY_N{}_ALL.mol2".format(
+				geometric_csv.rsplit("_")[0], cavity)))
+				os.remove("temp_{}_CAVITY_N{}_ALL.mol2".format(
+				geometric_csv.rsplit("_")[0], cavity))
+	return geometric_csv
+		
+
+def replace_mol2atom(cavity):
+    """
+    Replace the atom name assigned by VolSite to the probes by 'C' (carbone)
+	
+	Warning 1
+	Fixing the RDKit error: 'atom' with a degree > 1
+	
+	Warning 2
+	RDKit needs the output file [PDB_CAVITY_Nx_ALL_temp.mol2]
+	NOT the link
+	------------------------------------------------------------------
+    Argument:
+        [file]: negative image of a binding pocket (mol2 format)
+
+    Return:
+        [file]: negative image of a binding pocket with carbone as atom name
+				(mol2 format, temporary file used as input by RDKit)
+    """
+    header = True
+    parsing = False	
+    with open(cavity, 'rb') as infile:
+	with open("temp_{}.mol2".format(cavity.rsplit('.')[0]), 'wb') as tempf:
+		for line in infile:
+			if line.startswith('@<TRIPOS>ATOM'):
+				tempf.write(line)# + '\n')
+				header = False
+				parsing = True
+				continue
+			elif line.startswith('@<TRIPOS>BOND'):
+				parsing = False
+					
+			if header:
+				tempf.write(line)
+			if parsing:
+				tempf.write(line.replace(line.split()[5], 'C'))
+	#return tempf
+	
+			
+def calculate_shape_descriptor(cavity):
+    """
+    Calculate geometric descriptors with RDKit 
+    ------------------------------------------
+    Geometric descriptor details:
+    http://www.rdkit.org/Python_Docs/rdkit.Chem.Descriptors3D-module.html
+
+    Argument:
+        [file]: negative image of a binding pocket (mol2 format)
+
+    Return:
+        [list]: geometric descriptors
+    """
+    mol2file = Chem.MolFromMol2File(cavity, sanitize = False, removeHs = False)
+    geom_val = []
+    geom_val.append(Descriptors3D.PMI1(mol2file))
+    geom_val.append(Descriptors3D.PMI2(mol2file))
+    geom_val.append(Descriptors3D.PMI3(mol2file))
+    geom_val.append(Descriptors3D.NPR1(mol2file))
+    geom_val.append(Descriptors3D.NPR2(mol2file))
+    geom_val.append(Descriptors3D.RadiusOfGyration(mol2file))
+    geom_val.append(Descriptors3D.Asphericity(mol2file))
+    geom_val.append(Descriptors3D.SpherocityIndex(mol2file))
+    geom_val.append(Descriptors3D.Eccentricity(mol2file))
+    geom_val.append(Descriptors3D.InertialShapeFactor(mol2file))
+    return geom_val
+
+
+def merge_csv(csv_infile1, csv_infile2, csv_outfile):
+    """
+    Concatenate two csv files
+    -------------------------
+    Arguments:
+        [file]: input (csv format)
+        [file]: input (csv format)
+        [file]: output filename (csv format)
+    """
+    csv1 = pd.read_csv(csv_infile1)
+    csv2 = pd.read_csv(csv_infile2)
+    csv3 = pd.concat([csv1, csv2], axis = 1)
+    return csv3.to_csv(csv_outfile, sep = ',', index = False)
+
+# ==============================================================================
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description = 
+					'Calculate 2D/3D Pocket Descriptors from VolSite')
+    parser.add_argument('volsitedescriptor', help = 'Input [.txt file]: \
+					Pocket descriptors calculated by VolSite')
+    parser.add_argument('pocketdescriptor', help = 'Output [.csv filename]: \
+					Pocket descriptors including \
+					89 descriptors from VolSite, \
+					10 descriptors using a combination of VolSite descriptors \
+					and 10 geometric descriptors')
+
+    options = parser.parse_args()
+    setlogger()
+    main(options.volsitedescriptor, options.pocketdescriptor)
diff --git a/pyScripts/ion.pdf b/pyScripts/ion.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..46ecfa3582acda4ea7252068a70a9f9a1259d6f8
Binary files /dev/null and b/pyScripts/ion.pdf differ
diff --git a/pyScripts/ion.txt b/pyScripts/ion.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c2ec67982891e76539ee9654375ae4c8acb73de8
--- /dev/null
+++ b/pyScripts/ion.txt
@@ -0,0 +1,63 @@
+
+3NI
+4MO
+4PU
+4TI
+6MO
+AG
+AL
+AU
+AU3
+AUC
+BA
+BR
+BS3
+CA
+CD
+CE
+CF
+CL
+CO
+CR
+CS
+CU
+CU1
+ER3
+EU
+EU3
+F
+FE
+FE2
+GA
+HG
+IOD
+IR
+IR3
+K
+LA
+LI
+LU
+MG
+MN
+MN3
+MOO
+NA
+NI
+PD
+PR
+PT
+PT4
+RB
+RU
+SB
+SE4
+SM
+SR
+TB
+TH
+THE
+TL
+W
+ZN
+ZR
+
diff --git a/pyScripts/ppiinterface_dist.py b/pyScripts/ppiinterface_dist.py
new file mode 100644
index 0000000000000000000000000000000000000000..e79e69ac85a5c9070b319f5dfa89d29c2570756f
--- /dev/null
+++ b/pyScripts/ppiinterface_dist.py
@@ -0,0 +1,335 @@
+# -*- coding: utf-8 -*-
+
+"""
+Author: Alexandra Moine-Franel
+Date: February 2018
+Version: 4
+
+
+IDENTIFY PROTEIN INTERFACE / POCKET RESIDUES
+
+Determine protein residues at a specific distance either from protein partner, 
+ligand (small molecule) or negative image of the pocket generated by VolSite.
+
+
+
+Usage:	[Script.py] [TARGET] [PARTNER] -d (DISTANCE)
+-------------------------------------------------------------------------------
+[TARGET]:
+	Input (.mol2 or .pdb format)
+	Protein CHAIN
+
+[PARTNER]:
+	Input (.mol2 or .pdb format)
+	Negative image of the pocket generated by VolSite (.mol2 format)
+	OR Ligand (.pdb format)
+	OR Protein (.pdb format)
+	
+(DISTANCE)- optionnal
+	Input (float or integer)
+	Distance threshold between the target and the partner
+	* Distance target-protein/ligand = 5 angstroms - by defaut
+	* Distance target-cavity = 3.5 angstroms - by default
+	
+------------------	
+	
+[POCKET_RESIDUES]: 
+	Output (.txt format)
+	'PDB-targetchain-partnerchain_distance.txt' (target-protein/ligand)
+	OR 'PDB-targetchain-CAVITY_Nx_ALL_distance.txt' (target-cavity)
+"""
+
+# =============================================================================
+
+import csv
+import os
+import sys
+import argparse
+import logging
+#import time
+from biopandas.mol2 import PandasMol2
+from biopandas.pdb import PandasPdb
+
+# =============================================================================
+
+LOG = logging.getLogger("IDENTIFY PROTEIN INTERFACE / POCKET RESIDUES")
+
+# =============================================================================
+
+def main(target, partner, distance):
+	#start = time.time()
+	if target and partner:
+		if os.path.splitext(target)[-1] == '.mol2' and \
+		os.path.splitext(partner)[-1] == '.mol2':
+			mol2target = parse_mol2(target)
+			mol2partner = parse_mol2(partner)
+			mol2_respocket(mol2target, mol2partner, distance)
+			if os.path.exists('{}-{}_{}.txt'.format( \
+			mol2target.code, mol2partner.code.rsplit('.mol2')[0], \
+			distance)):
+				LOG.info('Finished!')
+				#end = time.time()
+				#print(end - start)
+			else:
+				LOG.error('Warning! [Output] Not found!')
+				sys.exit(1)			
+		elif os.path.splitext(target)[-1] == '.pdb' and \
+			os.path.splitext(partner)[-1] == '.pdb':
+			pdbtarget = parse_pdb(target)
+			pdbpartner = parse_pdb(partner)
+			if pdbpartner.df['ATOM'].empty == True:
+				pdb_resinterfacePL(pdbtarget, pdbpartner, distance)
+				if os.path.exists('{}-{}-{}_{}.txt'.format(pdbtarget.code, \
+				pdbtarget.df['ATOM']['chain_id'][0], pdbpartner.df['HETATM']['residue_name'][0], \
+				distance)):
+					LOG.info('Finished!')
+					#end = time.time()
+					#print(end - start)
+				else:
+					LOG.error('Warning! [Output] Not found!')
+					sys.exit(1)
+			else:
+				pdb_resinterfacePP(pdbtarget, pdbpartner, distance)
+				if os.path.exists('{}-{}-{}_{}.txt'.format(pdbtarget.code, \
+				pdbtarget.df['ATOM']['chain_id'][0], pdbpartner.df['ATOM']['chain_id'][0], \
+				distance)):
+					LOG.info('Finished!')
+					#end = time.time()
+					#print(end - start)
+				else:
+					LOG.error('Warning! [Output] Not found!')
+					sys.exit(1)
+		else:
+			LOG.error('Error! [TARGET] or [PARTNER] file not in .mol2 or .pdb format')
+			sys.exit(1)
+	elif target and not partner:
+		parser.error('Error: [PARTNER: protein, ligand or cavity] is required')
+	
+	
+
+				
+def parse_mol2(mol2file):
+	"""
+	Parse .mol2 file 
+	---------------------------------------------------------------------------
+	Argument: [file]: .mol2 format
+	Return: [dataframe]
+	"""
+	try:
+		pmol = PandasMol2().read_mol2(mol2file)
+	except(IOError), e:
+		LOG.error('{}'.format(e))
+		sys.exit(1)
+	else:
+		return pmol
+
+
+def parse_pdb(pdbfile):
+	"""
+	Parse .pdb file 
+	---------------------------------------------------------------------------
+	Argument: [file]: .pdb format
+	Return: [dataframe]
+	"""
+	try:
+		ppdb = PandasPdb().read_pdb(pdbfile)
+	except(IOError), e:
+		LOG.error('{}'.format(e))
+		sys.exit(1)
+	else:
+		return ppdb
+
+
+def mol2_respocket(mol2target, mol2partner, distance):
+	"""
+	Identify protein target residues at a specified distance from its partner
+	(i.e. the cavity negative image generated by VolSite).
+	---------------------------------------------------------------------------
+	Arguments:
+		[file]: protein target (.mol2 format)
+		[file]: negative image of the binding pocket (.mol2 format)
+		[float]: distance threshold (by default, 3.5 angstroms)
+	Return:
+		[file]: pocket residues (.txt format)
+	"""
+	ppires = []
+	with open('{}-{}_{}.txt'.format(mol2target.code, \
+	mol2partner.code.rsplit('.mol2')[0], \
+	distance), 'wb') as outfile:
+		for probe in xrange(len(mol2partner.df.atom_id)):
+			for atom in xrange(len(mol2target.df.atom_id)):
+				if mol2target.df.subst_name[atom] not in ppires:
+					xp = mol2target.df['x'][atom]
+					xc = mol2partner.df['x'][probe]
+					distX = x_dist(xp, xc)
+					if distX < float(distance):
+						yp = mol2target.df['y'][atom]
+						yc = mol2partner.df['y'][probe]
+						distXY = xy_dist(xp, yp, xc, yc)
+						if distXY < float(distance):
+							zp = mol2target.df['z'][atom]
+							zc = mol2partner.df['z'][probe]
+							distXYZ = euclidian_dist(xp, yp, zp, xc, yc, zc) 
+							if distXYZ < float(distance):
+								ppires.append(mol2target.df.subst_name[atom])
+		for resp in ppires:
+			outfile.write(resp + '\n')
+
+
+def pdb_resinterfacePP(pdbtarget, pdbpartner, distance):
+	"""
+	Identify protein target residues at a specified distance from its partner
+	(i.e. protein)
+	---------------------------------------------------------------------------
+	Arguments:
+		[file]: protein target (.pdb format)
+		[file]: protein partner (.pdb format)
+		[float]: distance threshold (by default, 5 angstroms)
+	Return:
+		[file]: interface residues (.txt format)
+	"""
+	ppires = []
+	with open('{}-{}-{}_{}.txt'.format(pdbtarget.code, \
+	pdbtarget.df['ATOM']['chain_id'][0], pdbpartner.df['ATOM']['chain_id'][0], \
+	distance), 'wb') as outfile:
+		for atomt in xrange(len(pdbpartner.df['ATOM']['atom_number'])):
+			for atomp in xrange(len(pdbtarget.df['ATOM']['atom_number'])):
+				if ''.join(map(str, (pdbtarget.df['ATOM']['residue_name'][atomp], \
+				pdbtarget.df['ATOM']['residue_number'][atomp]))) not in ppires:
+					xp = pdbtarget.df['ATOM']['x_coord'][atomp]
+					xc = pdbpartner.df['ATOM']['x_coord'][atomt]
+					distX = x_dist(xp, xc)
+					if distX < float(distance):
+						yp = pdbtarget.df['ATOM']['y_coord'][atomp]
+						yc = pdbpartner.df['ATOM']['y_coord'][atomt]
+						distXY = xy_dist(xp, yp, xc, yc)
+						if distXY < float(distance):
+							zp = pdbtarget.df['ATOM']['z_coord'][atomp]
+							zc = pdbpartner.df['ATOM']['z_coord'][atomt]
+							distXYZ = euclidian_dist(xp, yp, zp, xc, yc, zc) 
+							if distXYZ < float(distance):
+								ppires.append(''.join(map(str, \
+								(pdbtarget.df['ATOM']['residue_name'][atomp], \
+								pdbtarget.df['ATOM']['residue_number'][atomp]))))
+		for resp in ppires:
+			outfile.write(resp + '\n')
+
+
+def pdb_resinterfacePL(pdbtarget, pdbpartner, distance):
+	"""
+	Identify protein target residues at a specified distance from its partner
+	(i.e. ligand)
+	---------------------------------------------------------------------------
+	Arguments:
+		[file]: protein target (.pdb format)
+		[file]: ligand (.pdb format)	 
+		[float]: distance threshold (by default, 5 angstroms)
+	Return:
+		[file]: pocket residues (.txt format)
+	"""
+	ppires = []
+	with open('{}-{}-{}_{}.txt'.format(pdbtarget.code, \
+	pdbtarget.df['ATOM']['chain_id'][0], pdbpartner.df['HETATM']['residue_name'][0], \
+	distance), 'wb') as outfile:
+		for atomt in xrange(len(pdbpartner.df['HETATM']['atom_number'])):
+			for atomp in xrange(len(pdbtarget.df['ATOM']['atom_number'])):
+				if ''.join(map(str, (pdbtarget.df['ATOM']['residue_name'][atomp], \
+				pdbtarget.df['ATOM']['residue_number'][atomp]))) not in ppires:
+					xp = pdbtarget.df['ATOM']['x_coord'][atomp]
+					xc = pdbpartner.df['HETATM']['x_coord'][atomt]
+					distX = x_dist(xp, xc)
+					if distX < float(distance):
+						yp = pdbtarget.df['ATOM']['y_coord'][atomp]
+						yc = pdbpartner.df['HETATM']['y_coord'][atomt]
+
+						distXY = xy_dist(xp, yp, xc, yc)
+						if distXY < float(distance):
+							zp = pdbtarget.df['ATOM']['z_coord'][atomp]
+							zc = pdbpartner.df['HETATM']['z_coord'][atomt]
+							distXYZ = euclidian_dist(xp, yp, zp, xc, yc, zc) 
+							if distXYZ < float(distance):
+								ppires.append(''.join(map(str, \
+								(pdbtarget.df['ATOM']['residue_name'][atomp], \
+								pdbtarget.df['ATOM']['residue_number'][atomp]))))
+		for resp in ppires:
+			outfile.write(resp + '\n')
+
+
+def x_dist(x1, x2):
+	"""
+	Calculate the distance x between two 1D points
+	----------------------------------------------------------------------
+	Arguments:
+		[float]: coordinate x of target atom
+				 coordinate x of partnet atom
+	Return:
+		[float]: distance 
+	"""
+	dist = ((x1-x2)**2) ** 0.5
+	return dist
+
+
+def xy_dist(x1, y1, x2, y2):
+	"""
+	Calculate the distance xy between two 2D points
+	----------------------------------------------------------------------
+	Arguments:
+		[float]: coordinates xy of target atom
+				 coordinates xy of partnet atom
+	Return:
+		[float]: distance 
+	"""
+	dist = ((x1-x2)**2 + (y1-y2)**2) ** 0.5	
+	return dist
+
+	
+def euclidian_dist(x1, y1, z1, x2, y2, z2):
+	"""
+	Calculate the euclidian distance xyz between two 3D points
+	----------------------------------------------------------------------
+	Arguments:
+		[float]: coordinates xyz of target atom
+				 coordinates xyz of partnet atom
+	Return:
+		[float]: distance 
+	"""
+	dist = ((x1-x2)**2 + (y1-y2)**2 + (z1-z2)**2) ** 0.5
+	return dist
+	
+	
+	
+def setlogger():
+	LOG.setLevel(logging.INFO)
+	ch = logging.StreamHandler()
+	ch.setLevel(logging.INFO)
+	formatter = logging.Formatter('%(asctime)s - %(name)s - %(funcName)s - \
+	%(levelname)s - %(message)s')
+	ch.setFormatter(formatter)
+	LOG.addHandler(ch)
+
+
+def default_distance(options):
+	if os.path.splitext(options.partner)[-1] == '.mol2':
+		options.distance = 3.5
+	elif os.path.splitext(options.partner)[-1] == '.pdb':
+		options.distance = 5.0
+	return options.distance
+
+# ==============================================================================
+
+if __name__ == "__main__":
+	parser = argparse.ArgumentParser(description = 'Identify interface/pocket residues')
+	parser.add_argument('target', \
+	help = 'Input [.mol2 or .pdb file]: protein target')
+	parser.add_argument('partner', \
+	help = 'Input [.pdb file]: protein partner or ligand; OR [.mol2 file]: cavity')
+	parser.add_argument('-d', dest = 'distance', type = float, \
+	help = 'Input [float or integer]: distance threshold \
+	{by default, target-protein/ligand = 5A; target-cavity = 3.5A}')
+	options = parser.parse_args()
+	
+	if options.distance is None:
+		default_distance(options)
+	
+	setlogger()
+	main(options.target, options.partner, options.distance)