From 1e790032d20d60f9bcb0492853c825dc6c22c77a Mon Sep 17 00:00:00 2001
From: Fabien Mareuil <fmareuil@pasteur.fr>
Date: Thu, 12 Apr 2018 11:03:39 +0200
Subject: [PATCH] reformat

Former-commit-id: dbc51f36f32ea66e9d4c7f9ceda01e300ca1db4a
---
 ...nterface_residues_ppc_dimer_IN_PROGRESS.py | 445 +++++++++---------
 1 file changed, 221 insertions(+), 224 deletions(-)

diff --git a/pyScripts/get_interface_residues_ppc_dimer_IN_PROGRESS.py b/pyScripts/get_interface_residues_ppc_dimer_IN_PROGRESS.py
index a2f7eb30..66883850 100644
--- a/pyScripts/get_interface_residues_ppc_dimer_IN_PROGRESS.py
+++ b/pyScripts/get_interface_residues_ppc_dimer_IN_PROGRESS.py
@@ -10,37 +10,37 @@ Version: 2
 1) IDENTIFY PROTEIN/PROTEIN INTERFACE RESIDUES
 
 2) SAVE EACH PROTEIN FIRST CHAIN IN SEPARATE FILE (after cleaning)
-	- Number of models
-	- Alternative atomic locations
-	- Heteroatoms
+    - Number of models
+    - Alternative atomic locations
+    - Heteroatoms
 
 
 
 
-Usage:	[Script.py] [PROTEIN/PROTEIN COMPLEX] -d (DISTANCE)
+Usage:    [Script.py] [PROTEIN/PROTEIN COMPLEX] -d (DISTANCE)
 -------------------------------------------------------------------------------
 
 Arguments:
 ------------
 [PROTEIN/PROTEIN COMPLEX]:
-	Input (.pdb format)
-	Protein/protein complex
+    Input (.pdb format)
+    Protein/protein complex
 
 (DISTANCE)- optionnal
-	Input (float or integer)
-	Distance threshold between the target and the partner
-	(6 angstroms - by defaut)
+    Input (float or integer)
+    Distance threshold between the target and the partner
+    (6 angstroms - by defaut)
 
 
 Return:
 ------------
 [PROTEIN/PROTEIN INTERFACE RESIDUES]: 
-	Output (.txt format) "PDB_Chain1-Chain2_distance.txt"
-	(chain ID, residue name and residue ID included)
+    Output (.txt format) "PDB_Chain1-Chain2_distance.txt"
+    (chain ID, residue name and residue ID included)
 
 [PROTEIN CHAIN]: 
-	Output (.pdb format) "PDB_Chain1.pdb"
-	(heteroatom not included)
+    Output (.pdb format) "PDB_Chain1.pdb"
+    (heteroatom not included)
 """
 
 # =============================================================================
@@ -67,258 +67,255 @@ NODIMER_FILENAME = 'pdb_not_dimer'
 # =============================================================================
 
 def main(pdb, distance):
-	start = time.time()
-
-	pdb_code = file.strip().split('.pdb')[:-1]
-	structure = PDBParser().get_structure(pdb_code, file)
-	model = structure[0]
-
-	# Check if multiple models available
-	if len(list(structure.get_models())) != 0:
-		try:
-			if not os.path.exists('{}.txt'.format(MODEL_FILENAME)):
-				with open('{}.txt'.format(MODEL_FILENAME), 'w') as outfile:
-					outfile.write(pdb_code)
-			else:
-				with open('{}.txt'.format(MODEL_FILENAME), 'a') as outfile:
-					outfile.write(str(pdb_code) + '\n')
-		except IOError, e:
-			LOG.error('{}'.format(e))
-			sys.exit(1)
-			
-	# Check if 3D structure contains alternative atomic locations
-	if get_altloc(model) != 0: 
-		try:
-			if not os.path.exists('{}.txt'.format(ALTLOC_FILENAME)):
-				with open('{}.txt'.format(ALTLOC_FILENAME), 'w') as outfile:
-					outfile.write(str(pdb_code) + '\n')
-			else:
-				with open('{}.txt'.format(ALTLOC_FILENAME), 'a') as outfile:
-					outfile.write(str(pdb_code) + '\n')
-		except IOError, e:
-			LOG.error('{}'.format(e))
-			sys.exit(1)	
-	else:
-
-		# Get the first copy of each chain
-		target_partner_chain = select_first_chain(get_protein_assembly(structure))
-		print(target_partner_chain)
-
-		# Check if 3D structure contains more than two molecule IDs
-		if len(target_partner_chain) > 2:
-			try:
-				if not os.path.exists('{}.txt'.format(NODIMER_FILENAME_FILENAME)):
-					with open('{}.txt'.format(NODIMER_FILENAME_FILENAME), 'w') as outfile:
-						outfile.write(pdb_code)
-				else:
-					with open('{}.txt'.format(NODIMER_FILENAME_FILENAME), 'a') as outfile:
-						outfile.write(str(pdb_code) + '\n')
-			except IOError, e:
-				LOG.error('{}'.format(e))
-				sys.exit(1)
-
-		
-		# Remove heteroatom (protein/protein complex, no ligand)
-		for chain in target_partner_chain:
-			residue_to_remove = remove_hetatm(model, chain)
-			for residue in residue_to_remove:
-				model[chain].detach_child(residue[1])
-
-
-		# Get all chain-chain combinations
-		subset = permutation(target_partner_chain)
-
-
-		# Calculate chain-chain distance (interaction patch)
-		for chain_pair in subset:
-			target = chain_pair[0]
-			partner = chain_pair[1]	
-
-			if len(model[target].get_list()) > 3 and \
-			len(model[partner].get_list()) > 3:
-			
-				get_interface_residues(target, partner, model, distance)
-
-
-		# Save each chain in different file
-		for chain in target_partner_chain:
-			io = PDBIO()
-			io.set_structure(model)
-			io.save('{}_{}.pdb'.format(pdb_code, chain), select = ChainSelection(chain), \
-			preserve_atom_numbering = True)
-
-			
-			
-		end = time.time()
-		print('TIME: {}'.format(end - start))
+    start = time.time()
+
+    pdb_code = pdb.strip().split('.pdb')[:-1]
+    structure = PDBParser().get_structure(pdb_code, pdb)
+    model = structure[0]
+
+    # Check if multiple models available
+    if len(list(structure.get_models())) != 0:
+        try:
+            if not os.path.exists('{}.txt'.format(MODEL_FILENAME)):
+                with open('{}.txt'.format(MODEL_FILENAME), 'w') as outfile:
+                    outfile.write(pdb_code)
+            else:
+                with open('{}.txt'.format(MODEL_FILENAME), 'a') as outfile:
+                    outfile.write(str(pdb_code) + '\n')
+        except IOError as e:
+            LOG.error('{}'.format(e))
+            sys.exit(1)
+            
+    # Check if 3D structure contains alternative atomic locations
+    if get_altloc(model) != 0: 
+        try:
+            if not os.path.exists('{}.txt'.format(ALTLOC_FILENAME)):
+                with open('{}.txt'.format(ALTLOC_FILENAME), 'w') as outfile:
+                    outfile.write(str(pdb_code) + '\n')
+            else:
+                with open('{}.txt'.format(ALTLOC_FILENAME), 'a') as outfile:
+                    outfile.write(str(pdb_code) + '\n')
+        except IOError as e:
+            LOG.error('{}'.format(e))
+            sys.exit(1)    
+    else:
+
+        # Get the first copy of each chain
+        target_partner_chain = select_first_chain(get_protein_assembly(structure))
+        print(target_partner_chain)
+
+        # Check if 3D structure contains more than two molecule IDs
+        if len(target_partner_chain) > 2:
+            try:
+                if not os.path.exists('{}.txt'.format(NODIMER_FILENAME)):
+                    with open('{}.txt'.format(NODIMER_FILENAME), 'w') as outfile:
+                        outfile.write(pdb_code)
+                else:
+                    with open('{}.txt'.format(NODIMER_FILENAME), 'a') as outfile:
+                        outfile.write(str(pdb_code) + '\n')
+            except IOError as e:
+                LOG.error('{}'.format(e))
+                sys.exit(1)
+
+        
+        # Remove heteroatom (protein/protein complex, no ligand)
+        for chain in target_partner_chain:
+            residue_to_remove = remove_hetatm(model, chain)
+            for residue in residue_to_remove:
+                model[chain].detach_child(residue[1])
+
+
+        # Get all chain-chain combinations
+        subset = permutation(target_partner_chain)
+
+
+        # Calculate chain-chain distance (interaction patch)
+        for chain_pair in subset:
+            target = chain_pair[0]
+            partner = chain_pair[1]    
+
+            if len(model[target].get_list()) > 3 and len(model[partner].get_list()) > 3:
+            
+                get_interface_residues(target, partner, model, distance)
+
+
+        # Save each chain in different file
+        for chain in target_partner_chain:
+            io = PDBIO()
+            io.set_structure(model)
+            io.save('{}_{}.pdb'.format(pdb_code, chain), select = ChainSelection(chain),
+                    preserve_atom_numbering = True)
+
+            
+            
+        end = time.time()
+        print('TIME: {}'.format(end - start))
 
 
 class ChainSelection(Select):
-	def __init__(self, chain):
-		self.chain = chain
+    def __init__(self, chain):
+        self.chain = chain
 
-	def accept_chain(self, chain):
-		if chain.get_id() == self.chain:
-			return 1
-		else:
-			return 0
+    def accept_chain(self, chain):
+        if chain.get_id() == self.chain:
+            return 1
+        else:
+            return 0
 
 
 def get_altloc(model):
-	altloc = 0
-	for chain in model.child_list:
-		for residue in chain.get_list():
-			if residue.is_disordered():
-				altloc = altloc + 1
-				break
-	return altloc
+    altloc = 0
+    for chain in model.child_list:
+        for residue in chain.get_list():
+            if residue.is_disordered():
+                altloc = altloc + 1
+                break
+    return altloc
 
 
 def get_protein_assembly(structure):
-	assembly = {}
-	for molecule in structure.header['compound'].keys():
-		assembly[molecule] = []
-		for chain in structure.header['compound'][molecule]['chain'].split(','):
-			assembly[molecule].append(chain.strip().upper())
-	return assembly
-	# {'1': ['A', 'C', 'E'], '2': ['B', 'D']}
+    assembly = {}
+    for molecule in structure.header['compound'].keys():
+        assembly[molecule] = []
+        for chain in structure.header['compound'][molecule]['chain'].split(','):
+            assembly[molecule].append(chain.strip().upper())
+    return assembly
+    # {'1': ['A', 'C', 'E'], '2': ['B', 'D']}
 
 
 def select_first_chain(assembly):
-	chain_id = []
-	for molecule in assembly.keys():
-		chain_id.append(assembly[molecule][0])
-	return chain_id
-	# ['A', 'B']
+    chain_id = []
+    for molecule in assembly.keys():
+        chain_id.append(assembly[molecule][0])
+    return chain_id
+    # ['A', 'B']
 
 
 def permutation(list_id):
-	subset = []
-	for permutation in itertools.permutations(list_id, 2):
-		subset.append(permutation)
-	print subset
-	return subset
-	# [(['A'], ['B']), (['B'], ['A'])]
+    subset = []
+    for permutation in itertools.permutations(list_id, 2):
+        subset.append(permutation)
+    print(subset)
+    return subset
+    # [(['A'], ['B']), (['B'], ['A'])]
 
 
 def remove_hetatm(model, chain):
-	residue_to_remove = []
-	for residue in model[chain].get_residues():
-		if residue.id[0] != ' ': 
-			residue_to_remove.append((model[chain].id, residue.id))
-	return residue_to_remove
+    residue_to_remove = []
+    for residue in model[chain].get_residues():
+        if residue.id[0] != ' ': 
+            residue_to_remove.append((model[chain].id, residue.id))
+    return residue_to_remove
 
 
 
 def get_interface_residues(target, partner, model, distance):
-	ires = []
-	with open('{}_{}{}_{}.txt'.format(model.parent.id, \
-	target, partner, distance), 'wb') as outfile:			
+    ires = []
+    with open('{}_{}{}_{}.txt'.format(model.parent.id, target, partner, distance), 'wb') as outfile:
  
-			for rest in model[target].get_residues():
-				if ''.join(map(str,(rest.get_parent().get_id(), '.',
-				rest.get_resname(), rest.get_id()[1]))):
-					for atomt in rest.get_atoms():
-						
-						for atomp in model[partner].get_atoms():
-							xt = atomt.get_coord()[0]
-							xp = atomp.get_coord()[0]
-							dist_x = calculate_euclidist_x(xt, xp) 
+            for rest in model[target].get_residues():
+                if ''.join(map(str,(rest.get_parent().get_id(), '.',
+                rest.get_resname(), rest.get_id()[1]))):
+                    for atomt in rest.get_atoms():
+                        
+                        for atomp in model[partner].get_atoms():
+                            xt = atomt.get_coord()[0]
+                            xp = atomp.get_coord()[0]
+                            dist_x = calculate_euclidist_x(xt, xp) 
 
-							if dist_x < float(distance):
-								yt = atomt.get_coord()[1]
-								yp = atomp.get_coord()[1]
-								dist_xy = calculate_euclidist_xy(xt, yt, xp, yp)
+                            if dist_x < float(distance):
+                                yt = atomt.get_coord()[1]
+                                yp = atomp.get_coord()[1]
+                                dist_xy = calculate_euclidist_xy(xt, yt, xp, yp)
 
-								if dist_xy < float(distance):
-									zt = atomt.get_coord()[2]
-									zp = atomp.get_coord()[2]
-									dist_xyz = calculate_euclidist_xyz(xt, yt, zt, xp, yp, zp)
+                                if dist_xy < float(distance):
+                                    zt = atomt.get_coord()[2]
+                                    zp = atomp.get_coord()[2]
+                                    dist_xyz = calculate_euclidist_xyz(xt, yt, zt, xp, yp, zp)
 
-									if dist_xyz < float(distance):
-										ires.append(''.join(map(str, \
-										(rest.get_parent().get_id(), '.', \
-										rest.get_resname(), rest.get_id()[1])))
+                                    if dist_xyz < float(distance):
+                                        ires.append(''.join(map(str,
+                                                                (rest.get_parent().get_id(), '.',
+                                                                 rest.get_resname(), rest.get_id()[1]))))
 
-	for element in ires:
-			outfile.write(element + '\n')
+    for element in ires:
+            outfile.write(element + '\n')
 
 
 
 def calculate_euclidist_x(x1, x2):
-	"""
-	Calculate the distance x between two 1D points
-	----------------------------------------------------------------------
-	Arguments:
-		[float]: coordinate x of target atom
-				 coordinate x of partnet atom
-	Return:
-		[float]: distance 
-	"""
-	dist = ((x1-x2)**2) ** 0.5
-	return dist
+    """
+    Calculate the distance x between two 1D points
+    ----------------------------------------------------------------------
+    Arguments:
+        [float]: coordinate x of target atom
+                 coordinate x of partnet atom
+    Return:
+        [float]: distance 
+    """
+    dist = ((x1-x2)**2) ** 0.5
+    return dist
 
 
 def calculate_euclidist_xy(x1, y1, x2, y2):
-	"""
-	Calculate the distance xy between two 2D points
-	----------------------------------------------------------------------
-	Arguments:
-		[float]: coordinates xy of target atom
-				 coordinates xy of partnet atom
-	Return:
-		[float]: distance 
-	"""
-	dist = ((x1-x2)**2 + (y1-y2)**2) ** 0.5	
-	return dist
-
-	
+    """
+    Calculate the distance xy between two 2D points
+    ----------------------------------------------------------------------
+    Arguments:
+        [float]: coordinates xy of target atom
+                 coordinates xy of partnet atom
+    Return:
+        [float]: distance 
+    """
+    dist = ((x1-x2)**2 + (y1-y2)**2) ** 0.5    
+    return dist
+
+    
 def calculate_euclidist_xyz(x1, y1, z1, x2, y2, z2):
-	"""
-	Calculate the euclidian distance xyz between two 3D points
-	----------------------------------------------------------------------
-	Arguments:
-		[float]: coordinates xyz of target atom
-				 coordinates xyz of partnet atom
-	Return:
-		[float]: distance 
-	"""
-	dist = ((x1-x2)**2 + (y1-y2)**2 + (z1-z2)**2) ** 0.5
-	return dist
+    """
+    Calculate the euclidian distance xyz between two 3D points
+    ----------------------------------------------------------------------
+    Arguments:
+        [float]: coordinates xyz of target atom
+                 coordinates xyz of partnet atom
+    Return:
+        [float]: distance 
+    """
+    dist = ((x1-x2)**2 + (y1-y2)**2 + (z1-z2)**2) ** 0.5
+    return dist
 
 
 
 def setlogger():
-	LOG.setLevel(logging.INFO)
-	ch = logging.StreamHandler()
-	ch.setLevel(logging.INFO)
-	formatter = logging.Formatter('%(asctime)s - %(name)s - %(funcName)s - \
-	%(levelname)s - %(message)s')
-	ch.setFormatter(formatter)
-	LOG.addHandler(ch)
+    LOG.setLevel(logging.INFO)
+    ch = logging.StreamHandler()
+    ch.setLevel(logging.INFO)
+    formatter = logging.Formatter('%(asctime)s - %(name)s - %(funcName)s - %(levelname)s - %(message)s')
+    ch.setFormatter(formatter)
+    LOG.addHandler(ch)
 
 # =============================================================================
 
 if __name__ == '__main__':
-	parser = argparse.ArgumentParser(description = 
-	textwrap.dedent('''
-	1) Format protein/protein complex PDB structure (no ligand)
-		1.a) Number of models
-		1.b) Alternative atomic locations
-		1.c) Heteroatoms
-	2) Identify protein/protein interface residues 
-	   (for combinations of first chain of each protein)
-	3) Save each chain in separate files''')
-	parser.add_argument('pdb', help = 
-	textwrap.dedent(''' Input [.pdb file]: PBD 3D structure
-	(with HEADER included)'''))
-	parser.add_argument('-d', dest = 'distance', help = 
-	textwrap.dedent('''Input [float or integer]: distance threshold \
-	{by default, 6 Angstroms}''')
-	options = parser.parse_args()
-	
-	if options.distance is None:
-		options.distance = 6.0
-				
-	setlogger()
-	main(options.file, options.distance)
+    parser = argparse.ArgumentParser(description = 
+    textwrap.dedent('''
+    1) Format protein/protein complex PDB structure (no ligand)
+        1.a) Number of models
+        1.b) Alternative atomic locations
+        1.c) Heteroatoms
+    2) Identify protein/protein interface residues 
+       (for combinations of first chain of each protein)
+    3) Save each chain in separate files'''))
+    parser.add_argument('pdb', help = 
+    textwrap.dedent(''' Input [.pdb file]: PBD 3D structure
+    (with HEADER included)'''))
+    parser.add_argument('-d', dest = 'distance', help = 
+    textwrap.dedent('''Input [float or integer]: distance threshold
+    {by default, 6 Angstroms}'''))
+    options = parser.parse_args()
+    
+    if options.distance is None:
+        options.distance = 6.0
+                
+    setlogger()
+    main(options.file, options.distance)
-- 
GitLab