RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
MorganGenerator.h
Go to the documentation of this file.
1//
2// Copyright (C) 2018-2022 Boran Adas and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10
11#include <RDGeneral/export.h>
12#ifndef RD_MORGANGEN_H_2018_07
13#define RD_MORGANGEN_H_2018_07
14
16#include <cstdint>
17namespace RDKit {
18
20
21/**
22 \brief Default atom invariants generator for Morgan fingerprint, generates
23 ECFP-type invariants
24
25 */
28 bool df_includeRingMembership;
29
30 public:
31 /**
32 \brief Construct a new MorganAtomInvGenerator object
33
34 \param includeRingMembership : if set, whether or not the atom is in a ring
35 will be used in the invariant list.
36 */
37 MorganAtomInvGenerator(const bool includeRingMembership = true);
38
39 std::vector<std::uint32_t> *getAtomInvariants(
40 const ROMol &mol) const override;
41
42 std::string infoString() const override;
43 void toJSON(boost::property_tree::ptree &pt) const override;
44 void fromJSON(const boost::property_tree::ptree &) override;
45 MorganAtomInvGenerator *clone() const override;
46};
47
48/**
49 \brief Alternative atom invariants generator for Morgan fingerprint, generate
50 FCFP-type invariants
51
52 */
55 void cleanUpPatterns();
56 std::vector<const ROMol *> *dp_patterns = nullptr;
57
58 public:
59 /**
60 \brief Construct a new MorganFeatureAtomInvGenerator object
61
62 \param patterns : if provided should contain the queries used to assign
63 atom-types. if not provided, feature definitions adapted from reference:
64 Gobbi and Poppinger, Biotech. Bioeng. _61_ 47-54 (1998) will be used for
65 Donor, Acceptor, Aromatic, Halogen, Basic, Acidic.
66 */
68 const std::vector<const ROMol *> *patterns = nullptr);
70
71 std::vector<std::uint32_t> *getAtomInvariants(
72 const ROMol &mol) const override;
73
74 std::string infoString() const override;
75 void toJSON(boost::property_tree::ptree &pt) const override;
76 void fromJSON(const boost::property_tree::ptree &) override;
78};
79
80/**
81 \brief Bond invariants generator for Morgan fingerprint
82
83 */
86 bool df_useBondTypes;
87 bool df_useChirality;
88
89 public:
90 /**
91 \brief Construct a new MorganBondInvGenerator object
92
93 \param useBondTypes : if set, bond types will be included as a part of the
94 bond invariants
95 \param useChirality : if set, chirality information will be included as a
96 part of the bond invariants
97 */
98 MorganBondInvGenerator(const bool useBondTypes = true,
99 const bool useChirality = false);
100
101 std::vector<std::uint32_t> *getBondInvariants(
102 const ROMol &mol) const override;
103
104 std::string infoString() const override;
105 void toJSON(boost::property_tree::ptree &pt) const override;
106 void fromJSON(const boost::property_tree::ptree &pt) override;
107 MorganBondInvGenerator *clone() const override;
108 ~MorganBondInvGenerator() override = default;
109};
110
111/**
112 \brief Class for holding Morgan fingerprint specific arguments
113
114 */
116 public:
118 unsigned int d_radius = 3;
120 bool df_useBondTypes = true;
121
122 std::string infoString() const override;
123 void toJSON(boost::property_tree::ptree &pt) const override;
124 void fromJSON(const boost::property_tree::ptree &pt) override;
125
126 /**
127 \brief Construct a new MorganArguments object
128
129 \param radius the number of iterations to grow the fingerprint
130 \param countSimulation if set, use count simulation while generating the
131 fingerprint
132 \param includeChirality if set, chirality information will be added to the
133 generated bit id, independently from bond invariants
134 \param onlyNonzeroInvariants if set, bits will only be set from atoms that
135 have a nonzero invariant
136 \param countBounds boundaries for count simulation, corresponding bit will
137 be set if the count is higher than the number provided for that spot
138 \param fpSize size of the generated fingerprint, does not affect the sparse
139 versions
140 \param includeRedundantEnvironments if set redundant environments will be
141 included in the fingerprint
142 \param useBondTypes if set bond types will be included in the fingerprint
143 */
144 MorganArguments(unsigned int radius = 3, bool countSimulation = false,
145 bool includeChirality = false,
146 bool onlyNonzeroInvariants = false,
147 std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
148 std::uint32_t fpSize = 2048,
149 bool includeRedundantEnvironments = false,
150 bool useBondTypes = true)
151 : FingerprintArguments(countSimulation, countBounds, fpSize, 1,
152 includeChirality),
153 df_onlyNonzeroInvariants(onlyNonzeroInvariants),
154 d_radius(radius),
155 df_includeRedundantEnvironments(includeRedundantEnvironments),
156 df_useBondTypes(useBondTypes) {};
157};
158
159/**
160 \brief Class for holding the bit-id created from Morgan fingerprint
161 environments and the additional data necessary extra outputs
162
163 */
164template <typename OutputType>
166 : public AtomEnvironment<OutputType> {
167 const OutputType d_code;
168 const unsigned int d_atomId;
169 const unsigned int d_layer;
170 const ROMol *d_mol = nullptr;
171
172 public:
173 OutputType getBitId(
174 FingerprintArguments *arguments, // unused
175 const std::vector<std::uint32_t> *atomInvariants, // unused
176 const std::vector<std::uint32_t> *bondInvariants, // unused
177 AdditionalOutput *additionalOutput, // unused
178 const bool hashResults = false, // unused
179 const std::uint64_t fpSize = 0 // unused
180 ) const override;
182 size_t bitId) const override;
183
184 /**
185 \brief Construct a new MorganAtomEnv object
186
187 \param code bit id generated from this environment
188 \param atomId atom id of the atom at the center of this environment
189 \param layer radius of this environment
190 */
191 MorganAtomEnv(const std::uint32_t code, const unsigned int atomId,
192 const unsigned int layer, const ROMol *mol)
193 : d_code(code), d_atomId(atomId), d_layer(layer), d_mol(mol) {}
194};
195
196/**
197 \brief Class that generates atom environments for Morgan fingerprint
198
199 */
200template <typename OutputType>
202 : public AtomEnvironmentGenerator<OutputType> {
203 public:
204 std::vector<AtomEnvironment<OutputType> *> getEnvironments(
205 const ROMol &mol, FingerprintArguments *arguments,
206 const std::vector<std::uint32_t> *fromAtoms,
207 const std::vector<std::uint32_t> *ignoreAtoms, const int confId,
208 const AdditionalOutput *additionalOutput,
209 const std::vector<std::uint32_t> *atomInvariants,
210 const std::vector<std::uint32_t> *bondInvariants,
211 const bool hashResults = false) const override;
212
213 std::string infoString() const override;
214 void toJSON(boost::property_tree::ptree &pt) const override;
215 void fromJSON(const boost::property_tree::ptree &pt) override;
216
217 OutputType getResultSize() const override;
218};
219
220/**
221 \brief Get a fingerprint generator for Morgan fingerprint
222
223 \tparam OutputType determines the size of the bitIds and the result, can be 32
224 or 64 bit unsigned integer
225
226 \param radius the number of iterations to grow the fingerprint
227
228 \param countSimulation if set, use count simulation while generating the
229 fingerprint
230
231 \param includeChirality if set, chirality information will be added to the
232 generated bit id, independently from bond invariants
233
234 \param onlyNonzeroInvariants if set, bits will only be set from atoms that
235 have a nonzero invariant
236
237 \param countBounds boundaries for count simulation, corresponding bit will be
238 set if the count is higher than the number provided for that spot
239
240 \param fpSize size of the generated fingerprint, does not affect the sparse
241 versions
242 \param countSimulation if set, use count simulation while generating the
243 fingerprint
244 \param includeChirality sets includeChirality flag for both MorganArguments
245 and the default bond generator MorganBondInvGenerator
246 \param useBondTypes if set, bond types will be included as a part of the
247 default bond invariants
248 \param onlyNonzeroInvariants if set, bits will only be set from atoms that
249 have a nonzero invariant
250 \param includeRedundantEnvironments if set redundant environments will be
251 included in the fingerprint
252 \param atomInvariantsGenerator custom atom invariants generator to use
253 \param bondInvariantsGenerator custom bond invariants generator to use
254 \param ownsAtomInvGen if set atom invariants generator is destroyed with the
255 fingerprint generator
256 \param ownsBondInvGen if set bond invariants generator is destroyed with the
257 fingerprint generator
258
259 \return FingerprintGenerator<OutputType>* that generates Morgan fingerprints
260
261This generator supports the following \c AdditionalOutput types:
262 - \c atomToBits : which bits each atom is the central atom for
263 - \c atomCounts : how many bits each atom sets
264 - \c bitInfoMap : map from bitId to (atomId, radius) pairs
265
266 */
267template <typename OutputType>
269 unsigned int radius, bool countSimulation, bool includeChirality,
270 bool useBondTypes, bool onlyNonzeroInvariants,
271 bool includeRedundantEnvironments,
272 AtomInvariantsGenerator *atomInvariantsGenerator = nullptr,
273 BondInvariantsGenerator *bondInvariantsGenerator = nullptr,
274 std::uint32_t fpSize = 2048,
275 std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
276 bool ownsAtomInvGen = false, bool ownsBondInvGen = false);
277//! \overload
278template <typename OutputType>
280 const MorganArguments &args,
281 AtomInvariantsGenerator *atomInvariantsGenerator = nullptr,
282 BondInvariantsGenerator *bondInvariantsGenerator = nullptr,
283 bool ownsAtomInvGen = false, bool ownsBondInvGen = false);
284
285/**
286 \brief Get a fingerprint generator for Morgan fingerprint
287
288 \tparam OutputType determines the size of the bitIds and the result, can be 32
289 or 64 bit unsigned integer
290
291 \param radius the number of iterations to grow the fingerprint
292
293 \param countSimulation if set, use count simulation while generating the
294 fingerprint
295
296 \param includeChirality if set, chirality information will be added to the
297 generated bit id, independently from bond invariants
298
299 \param onlyNonzeroInvariants if set, bits will only be set from atoms that
300 have a nonzero invariant
301
302 \param countBounds boundaries for count simulation, corresponding bit will be
303 set if the count is higher than the number provided for that spot
304
305 \param fpSize size of the generated fingerprint, does not affect the sparse
306 versions
307 \param countSimulation if set, use count simulation while generating the
308 fingerprint
309 \param includeChirality sets includeChirality flag for both MorganArguments
310 and the default bond generator MorganBondInvGenerator
311 \param useBondTypes if set, bond types will be included as a part of the
312 default bond invariants
313 \param onlyNonzeroInvariants if set, bits will only be set from atoms that
314 have a nonzero invariant
315 \param atomInvariantsGenerator custom atom invariants generator to use
316 \param bondInvariantsGenerator custom bond invariants generator to use
317 \param ownsAtomInvGen if set atom invariants generator is destroyed with the
318 fingerprint generator
319 \param ownsBondInvGen if set bond invariants generator is destroyed with the
320 fingerprint generator
321
322 \return FingerprintGenerator<OutputType>* that generates Morgan fingerprints
323
324This generator supports the following \c AdditionalOutput types:
325 - \c atomToBits : which bits each atom is the central atom for
326 - \c atomCounts : how many bits each atom sets
327 - \c bitInfoMap : map from bitId to (atomId, radius) pairs
328
329 */
330template <typename OutputType>
332 unsigned int radius, bool countSimulation = false,
333 bool includeChirality = false, bool useBondTypes = true,
334 bool onlyNonzeroInvariants = false,
335 AtomInvariantsGenerator *atomInvariantsGenerator = nullptr,
336 BondInvariantsGenerator *bondInvariantsGenerator = nullptr,
337 std::uint32_t fpSize = 2048,
338 std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
339 bool ownsAtomInvGen = false, bool ownsBondInvGen = false) {
341 radius, countSimulation, includeChirality, useBondTypes,
342 onlyNonzeroInvariants, false, atomInvariantsGenerator,
343 bondInvariantsGenerator, fpSize, countBounds, ownsAtomInvGen,
344 ownsBondInvGen);
345};
346
347} // namespace MorganFingerprint
348} // namespace RDKit
349
350#endif
abstract base class that generates atom-environments from a molecule
abstract base class that holds atom-environments that will be hashed to generate the fingerprint
abstract base class for atom invariants generators
abstract base class for bond invariants generators
Abstract base class that holds molecule independent arguments that are common amongst all fingerprint...
FingerprintArguments(bool countSimulation, const std::vector< std::uint32_t > countBounds, std::uint32_t fpSize, std::uint32_t numBitsPerFeature=1, bool includeChirality=false)
class that generates same fingerprint style for different output formats
Class for holding Morgan fingerprint specific arguments.
void toJSON(boost::property_tree::ptree &pt) const override
void fromJSON(const boost::property_tree::ptree &pt) override
MorganArguments(unsigned int radius=3, bool countSimulation=false, bool includeChirality=false, bool onlyNonzeroInvariants=false, std::vector< std::uint32_t > countBounds={1, 2, 4, 8}, std::uint32_t fpSize=2048, bool includeRedundantEnvironments=false, bool useBondTypes=true)
Construct a new MorganArguments object.
std::string infoString() const override
method that returns information string about the fingerprint specific argument set and the arguments ...
void updateAdditionalOutput(AdditionalOutput *output, size_t bitId) const override
MorganAtomEnv(const std::uint32_t code, const unsigned int atomId, const unsigned int layer, const ROMol *mol)
Construct a new MorganAtomEnv object.
OutputType getBitId(FingerprintArguments *arguments, const std::vector< std::uint32_t > *atomInvariants, const std::vector< std::uint32_t > *bondInvariants, AdditionalOutput *additionalOutput, const bool hashResults=false, const std::uint64_t fpSize=0) const override
calculates and returns the bit id to be set for this atom-environment
MorganAtomInvGenerator(const bool includeRingMembership=true)
Construct a new MorganAtomInvGenerator object.
void fromJSON(const boost::property_tree::ptree &) override
MorganAtomInvGenerator * clone() const override
void toJSON(boost::property_tree::ptree &pt) const override
std::string infoString() const override
method that returns information about this /c AtomInvariantsGenerator and its arguments
std::vector< std::uint32_t > * getAtomInvariants(const ROMol &mol) const override
get atom invariants from a molecule
std::string infoString() const override
method that returns information about this /c BondInvariantsGenerator and its arguments
MorganBondInvGenerator * clone() const override
MorganBondInvGenerator(const bool useBondTypes=true, const bool useChirality=false)
Construct a new MorganBondInvGenerator object.
void toJSON(boost::property_tree::ptree &pt) const override
std::vector< std::uint32_t > * getBondInvariants(const ROMol &mol) const override
get bond invariants from a molecule
void fromJSON(const boost::property_tree::ptree &pt) override
Class that generates atom environments for Morgan fingerprint.
void toJSON(boost::property_tree::ptree &pt) const override
std::string infoString() const override
method that returns information about this /c AtomEnvironmentGenerator and its arguments if any
OutputType getResultSize() const override
Returns the size of the fingerprint based on arguments.
std::vector< AtomEnvironment< OutputType > * > getEnvironments(const ROMol &mol, FingerprintArguments *arguments, const std::vector< std::uint32_t > *fromAtoms, const std::vector< std::uint32_t > *ignoreAtoms, const int confId, const AdditionalOutput *additionalOutput, const std::vector< std::uint32_t > *atomInvariants, const std::vector< std::uint32_t > *bondInvariants, const bool hashResults=false) const override
generate and return all atom-envorinments from a molecule
void fromJSON(const boost::property_tree::ptree &pt) override
void fromJSON(const boost::property_tree::ptree &) override
std::vector< std::uint32_t > * getAtomInvariants(const ROMol &mol) const override
get atom invariants from a molecule
MorganFeatureAtomInvGenerator * clone() const override
void toJSON(boost::property_tree::ptree &pt) const override
std::string infoString() const override
method that returns information about this /c AtomInvariantsGenerator and its arguments
MorganFeatureAtomInvGenerator(const std::vector< const ROMol * > *patterns=nullptr)
Construct a new MorganFeatureAtomInvGenerator object.
#define RDKIT_FINGERPRINTS_EXPORT
Definition export.h:193
RDKIT_FINGERPRINTS_EXPORT FingerprintGenerator< OutputType > * getMorganGenerator(unsigned int radius, bool countSimulation, bool includeChirality, bool useBondTypes, bool onlyNonzeroInvariants, bool includeRedundantEnvironments, AtomInvariantsGenerator *atomInvariantsGenerator=nullptr, BondInvariantsGenerator *bondInvariantsGenerator=nullptr, std::uint32_t fpSize=2048, std::vector< std::uint32_t > countBounds={1, 2, 4, 8}, bool ownsAtomInvGen=false, bool ownsBondInvGen=false)
Get a fingerprint generator for Morgan fingerprint.
Std stuff.