[chemfp] chemfp-1.0 is out!

Greg Landrum greg at landrumdecker.com
Fri Nov 4 00:32:25 EDT 2011


Hi Andrew,
On Wed, Sep 21, 2011 at 1:32 AM, Andrew Dalke <dalke at dalkescientific.com> wrote:

> I've spent a lot of the last few weeks working on test

> cases and documentation. That's finally done!

>

> I've put the source code up at

>  http://code.google.com/p/chem-fingerprints/


Due to vacation, the RDKit release, and work travel it's taken me an
appallingly long amount of time to do this, but this morning I finally
managed to try out chemfp. The interface takes some getting used to,
but the docs are good and after a few minutes I was happily doing
searches.

The next step was to add support for the RDKit Morgan fingerprints
(they've been available as bit vects for a while). This also didn't
take too long; a diff is attached. I will try to find the time to get
the topological torsions and atom pair fingerprints in there as well
in the not-too-distant future.

One comment on the code: as I was adding the morgan fingerprints, it
seems like I had to add the parameter names and their types or default
values way too often. I don't have a concrete suggestion yet, but this
could stand to be re-factored a bit.

-greg
-------------- next part --------------
diff -r 777d2cdf002c chemfp/commandline/rdkit2fps.py
--- a/chemfp/commandline/rdkit2fps.py Tue Sep 20 23:08:08 2011 +0100
+++ b/chemfp/commandline/rdkit2fps.py Fri Nov 04 05:19:11 2011 +0100
@@ -44,6 +44,22 @@
"--useHs", type=int, default=1,
help="information about the number of hydrogens on each atom")

+morgan_group = parser.add_argument_group("RDKit morgan fingerprints")
+morgan_group.add_argument("--morgan", action="store_true",
+ help="generate morgan fingerprints")
+morgan_group.add_argument(
+ "--radius", type=int, metavar="INT", default=rdkit.RADIUS,
+ help="radius for the morgan algorithm (default=%d)" % rdkit.RADIUS)
+morgan_group.add_argument(
+ "--useFeatures", type=int, metavar="INT", default=rdkit.USE_FEATURES,
+ help="use chemical-feature invariants (default=%d)" % rdkit.USE_FEATURES)
+morgan_group.add_argument(
+ "--useChirality", type=int, metavar="INT", default=rdkit.USE_CHIRALITY,
+ help="include information about chirality (default=%d)" % rdkit.USE_CHIRALITY)
+morgan_group.add_argument(
+ "--useBondTypes", type=int, metavar="INT", default=rdkit.USE_BOND_TYPES,
+ help="include information about bond types (default=%d)" % rdkit.USE_BOND_TYPES)
+
maccs_group = parser.add_argument_group("166 bit MACCS substructure keys")
maccs_group.add_argument(
"--maccs166", action="store_true", help="generate MACCS fingerprints")
diff -r 777d2cdf002c chemfp/rdkit.py
--- a/chemfp/rdkit.py Tue Sep 20 23:08:08 2011 +0100
+++ b/chemfp/rdkit.py Fri Nov 04 05:19:11 2011 +0100
@@ -11,6 +11,7 @@

import rdkit
from rdkit import Chem
+from rdkit.Chem import rdMolDescriptors
import rdkit.rdBase
from rdkit.Chem.MACCSkeys import GenMACCSKeys

@@ -276,17 +277,65 @@
return decoders.from_binary_lsb(fp.ToBitString())[1]
return rdk_fingerprinter

+########### The MACCS fingerprinter
+
+
+def maccs166_fingerprinter(mol):
+ fp = GenMACCSKeys(mol)
+ # In RDKit the first bit is always bit 1 .. bit 0 is empty (?!?!)
+ bitstring_with_167_bits = fp.ToBitString()
+ return decoders.from_binary_lsb(bitstring_with_167_bits[1:])[1]
+
+def make_maccs166_fingerprinter():
+ return maccs166_fingerprinter
+
+
+########### The morgan fingerprinter
+
+# Some constants shared by the fingerprinter and the command-line code.
+
+# NUM_BITS borrowed from above
+RADIUS = 2
+USE_FEATURES = 0
+USE_CHIRALITY = 0
+USE_BOND_TYPES = 1
+
+def make_morgan_fingerprinter(fpSize=NUM_BITS,
+ radius=RADIUS,
+ useFeatures=USE_FEATURES,
+ useChirality=USE_CHIRALITY,
+ useBondTypes=USE_BOND_TYPES):
+ if not (fpSize > 0):
+ raise ValueError("fpSize must be positive")
+ if not (radius >= 0):
+ raise ValueError("radius cannot be negative")
+
+ def morgan_fingerprinter(mol):
+ fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(
+ mol, radius, nBits=fpSize, useChirality=useChirality,
+ useBondTypes=useBondTypes,useFeatures=useFeatures)
+ return decoders.from_binary_lsb(fp.ToBitString())[1]
+ return morgan_fingerprinter
+

_fingerprint_decoders = {"minPath": int,
"maxPath": int,
"fpSize": int,
"nBitsPerHash": int,
- "useHs": int}
+ "useHs": int,
+ "radius":int,
+ "useFeatures":int,
+ "useChirality":int,
+ "useBondTypes":int}
_fingerprint_defaults = {"minPath": 1,
"maxPath": 7,
"fpSize": 2048,
"nBitsPerHash": 4,
- "useHs": 1}
+ "useHs": 1,
+ "radius":2,
+ "useFeatures":0,
+ "useChirality":0,
+ "useBondTypes":1}

def decode_fingerprint_parameters(parameters):
fingerprinter_kwargs = _fingerprint_defaults.copy()
@@ -297,18 +346,8 @@
fingerprinter_kwargs[name] = decoder(value)
return fingerprinter_kwargs

-########### The MACCS fingerprinter


-def maccs166_fingerprinter(mol):
- fp = GenMACCSKeys(mol)
- # In RDKit the first bit is always bit 1 .. bit 0 is empty (?!?!)
- bitstring_with_167_bits = fp.ToBitString()
- return decoders.from_binary_lsb(bitstring_with_167_bits[1:])[1]
-
-def make_maccs166_fingerprinter():
- return maccs166_fingerprinter
-
####################

class _RDKitFingerprinter(types.Fingerprinter):
@@ -345,3 +384,21 @@
return cls(kwargs)

_get_fingerprinter = staticmethod(make_rdk_fingerprinter)
+
+class RDKitMorganFingerprinter_v1(_RDKitFingerprinter):
+ name = "RDKit-MorganFingerprint/1"
+ format_string = (
+ "radius=%(radius)d fpSize=%(fpSize)s useFeatures=%(useFeatures)d "
+ "useChirality=%(useChirality)d useBondTypes=%(useBondTypes)d")
+ software = SOFTWARE
+ def __init__(self, kwargs):
+ self.num_bits = kwargs["fpSize"]
+ super(RDKitMorganFingerprinter_v1, self).__init__(kwargs)
+
+ @classmethod
+ def from_parameters(cls, parameters):
+ kwargs = decode_fingerprint_parameters(parameters)
+ return cls(kwargs)
+
+ _get_fingerprinter = staticmethod(make_morgan_fingerprinter)
+


More information about the chemfp mailing list