diff --git a/ippisite/ippidb/utils.py b/ippisite/ippidb/utils.py index dd72f6b1b59cc72e1d62567b72c0d0fd44dbf249..b931863521a387c128a6496eb2a172aa35af0717 100644 --- a/ippisite/ippidb/utils.py +++ b/ippisite/ippidb/utils.py @@ -11,12 +11,30 @@ except ImportError: import openbabel as ob -def mol2smi(mol_string): +def mol2smi(mol_string: str) -> str: + """ + Convert a compound structure from MOL to SMILES format + using open babel + + :param mol_string: structure for the compound in MOL format + :type mol_string: str + :return: structure for the compound in SMILES format + :rtype: str + """ m = pybel.readstring("mol", mol_string) return m.write(format="smi").strip() -def smi2mol(smi_string): +def smi2mol(smi_string: str) -> str: + """ + Convert a compound structure from SMILES to MOL format + using open babel + + :param smi_string: structure for the compound in SMILES format + :type smi_string: str + :return: structure for the compound in MOL format + :rtype: str + """ m = pybel.readstring("smi", smi_string) # generate 2D coordinates for MarvinJS layout # NB: the line below should be replaced as soon as the new version of openbabel @@ -31,7 +49,16 @@ smi2inchi_conv = ob.OBConversion() smi2inchi_conv.SetInAndOutFormats("smi", "inchi") -def smi2inchi(smi_string): +def smi2inchi(smi_string: str) -> str: + """ + Convert a compound structure from SMILES to InChi format + using open babel + + :param smi_string: structure for the compound in SMILES format + :type smi_string: str + :return: structure for the compound in InChi format + :rtype: str + """ mol = ob.OBMol() smi2inchi_conv.ReadString(mol, smi_string) return smi2inchi_conv.WriteString(mol).strip() @@ -42,7 +69,16 @@ smi2inchikey_conv.SetInAndOutFormats("smi", "inchi") smi2inchikey_conv.SetOptions("K", smi2inchikey_conv.OUTOPTIONS) -def smi2inchikey(smi_string): +def smi2inchikey(smi_string: str) -> str: + """ + Convert a compound structure from SMILES to InChiKey format + using open babel + + :param smi_string: structure for the compound in SMILES format + :type smi_string: str + :return: structure for the compound in InChiKey format + :rtype: str + """ mol = ob.OBMol() smi2inchikey_conv.ReadString(mol, smi_string) return smi2inchikey_conv.WriteString(mol).strip() @@ -52,7 +88,16 @@ smi2sdf_conv = ob.OBConversion() smi2sdf_conv.SetInAndOutFormats("smi", "sdf") -def smi2sdf(smi_dict): +def smi2sdf(smi_dict: dict) -> str: + """ + Convert a series of compound structures in SMILES to an SDF format + using open babel + + :param smi_dict: structure for the compound in SMILES format + :type smi_dict: dict + :return: structure for the compound in InChiKey format + :rtype: str + """ sdf_string = "" for id, smiles in smi_dict.items(): mol = ob.OBMol() @@ -63,37 +108,67 @@ def smi2sdf(smi_dict): class FingerPrinter(object): - def __init__(self, name="FP4"): + def __init__(self, name: str = "FP4"): + """ + :param name: name of the FingerPrint type to use, defaults to FP4 + :type name: str + """ self.fingerprinter = ob.OBFingerprint.FindFingerprint(name) self._smiles_parser = ob.OBConversion() self._smiles_parser.SetInFormat("smi") - def parse_smiles(self, smiles): - "parse a SMILES into a molecule" + def parse_smiles(self, smiles: str) -> ob.OBMol: + """ + Parse a SMILES into a molecule + + :param smiles: compound SMILES + :type smiles: str + :return: compound openbabel object + :rtype: ob.OBMol + """ mol = ob.OBMol() self._smiles_parser.ReadString(mol, smiles) return mol - def fp(self, smiles): - "generate a fingerprint from a SMILES string" + def fp(self, smiles: str) -> ob.vectorUnsignedInt: + """ + Generate a fingerprint from a SMILES string + + :param smiles: compound SMILES + :type smiles: str + :return: fingerprint + :rtype: ob.vectorUnsignedInt + """ fp = ob.vectorUnsignedInt() self.fingerprinter.GetFingerprint(self.parse_smiles(smiles), fp) return fp - def fp_dict(self, smiles_dict): + def fp_dict(self, smiles_dict: str) -> dict: """ - generate a dict of {compound id: fingerprint} from a dict of + Generate a dict of {compound id: fingerprint} from a dict of {compound id: fingerprint} + + :param smiles_dict: dictionary of compound SMILES + :type smiles_dict: dict + :return: the corresponding {compound id: fingerprint} dictionary + :rtype: dict """ return { compound_id: self.fp(smiles_entry) for compound_id, smiles_entry in smiles_dict.items() } - def tanimoto_fps(self, smiles_query, fp_dict): + def tanimoto_fps(self, smiles_query: str, fp_dict: dict) -> dict: """ - perform a tanimoto similarity search using a smiles query string + Perform a tanimoto similarity search using a smiles query string on a dict of {compound id: fingerprint} + + :param smiles_query: dictionary of compound SMILES + :type smiles_query: str + :param fp_dict: a {compound id: fingerprint} dictionary + :type fp_dict: dict + :return: the {compound id: tanimoto value} dictionary for this query + :rtype: dict """ fp_query = self.fp(smiles_query) return { @@ -101,10 +176,17 @@ class FingerPrinter(object): for compound_id, fp_entry in fp_dict.items() } - def tanimoto_smiles(self, query_smiles, smiles_dict): + def tanimoto_smiles(self, query_smiles: str, smiles_dict: dict) -> dict: """ - perform a tanimoto similarity search using a smiles query on a + Perform a tanimoto similarity search using a smiles query on a dict of {compound id: SMILES} + + :param query_smiles: dictionary of compound SMILES + :type query_smiles: str + :param smiles_dict: a {compound id: SMILES} dictionary + :param smiles_dict: dict + :return: the {compound id: tanimoto value} dictionary for this query + :rtype: dict """ fp_dict = self.fp_dict(smiles_dict) return self.tanimoto_fps(query_smiles, fp_dict)