RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
new_canon.h
Go to the documentation of this file.
1//
2// Copyright (C) 2014 Greg Landrum
3// Adapted from pseudo-code from Roger Sayle
4//
5// @@ All Rights Reserved @@
6// This file is part of the RDKit.
7// The contents are covered by the terms of the BSD license
8// which is included in the file license.txt, found at the root
9// of the RDKit source tree.
10//
11
12#include <RDGeneral/export.h>
13#include <RDGeneral/hanoiSort.h>
14#include <GraphMol/ROMol.h>
15#include <GraphMol/RingInfo.h>
18#include <cstdint>
19#include <boost/dynamic_bitset.hpp>
21#include <cstring>
22#include <cassert>
23#include <cstring>
24#include <vector>
25
26// #define VERBOSE_CANON 1
27
28namespace RDKit {
29namespace Canon {
30struct canon_atom;
31
34 unsigned int bondStereo{
35 static_cast<unsigned int>(Bond::BondStereo::STEREONONE)};
36 unsigned int nbrSymClass{0};
37 unsigned int nbrIdx{0};
39 const canon_atom *controllingAtoms[4]{nullptr, nullptr, nullptr, nullptr};
40 const std::string *p_symbol{
41 nullptr}; // if provided, this is used to order bonds
42 unsigned int bondIdx{0};
43
46 unsigned int nsc, unsigned int bidx)
47 : bondType(bt),
48 bondStereo(static_cast<unsigned int>(bs)),
49 nbrSymClass(nsc),
50 nbrIdx(ni),
51 bondIdx(bidx) {}
52 bondholder(Bond::BondType bt, unsigned int bs, unsigned int ni,
53 unsigned int nsc, unsigned int bidx)
54 : bondType(bt),
55 bondStereo(bs),
56 nbrSymClass(nsc),
57 nbrIdx(ni),
58 bondIdx(bidx) {}
59
60 int compareStereo(const bondholder &o) const;
61
62 bool operator<(const bondholder &o) const { return compare(*this, o) < 0; }
63 static bool greater(const bondholder &lhs, const bondholder &rhs) {
64 return compare(lhs, rhs) > 0;
65 }
66
67 static int compare(const bondholder &x, const bondholder &y,
68 unsigned int div = 1) {
69 if (x.p_symbol && y.p_symbol) {
70 if ((*x.p_symbol) < (*y.p_symbol)) {
71 return -1;
72 } else if ((*x.p_symbol) > (*y.p_symbol)) {
73 return 1;
74 }
75 }
76 if (x.bondType < y.bondType) {
77 return -1;
78 } else if (x.bondType > y.bondType) {
79 return 1;
80 }
81 if (x.bondStereo < y.bondStereo) {
82 return -1;
83 } else if (x.bondStereo > y.bondStereo) {
84 return 1;
85 }
86 auto scdiv = x.nbrSymClass / div - y.nbrSymClass / div;
87 if (scdiv) {
88 return scdiv;
89 }
90 if (x.bondStereo && y.bondStereo) {
91 auto cs = x.compareStereo(y);
92 if (cs) {
93 return cs;
94 }
95 }
96 return 0;
97 }
98};
100 const Atom *atom{nullptr};
101 int index{-1};
102 unsigned int degree{0};
103 unsigned int totalNumHs{0};
104 bool hasRingNbr{false};
105 bool isRingStereoAtom{false};
106 unsigned int whichStereoGroup{0};
108 std::unique_ptr<int[]> nbrIds;
109 const std::string *p_symbol{
110 nullptr}; // if provided, this is used to order atoms
111 std::vector<int> neighborNum;
112 std::vector<int> revistedNeighbors;
113 std::vector<bondholder> bonds;
114};
115
117 canon_atom *atoms, std::vector<bondholder> &nbrs);
118
120 canon_atom *atoms, std::vector<bondholder> &nbrs, unsigned int atomIdx,
121 std::vector<std::pair<unsigned int, unsigned int>> &result);
122
123/*
124 * Different types of atom compare functions:
125 *
126 * - SpecialChiralityAtomCompareFunctor: Allows canonizing molecules exhibiting
127 *dependent chirality
128 * - SpecialSymmetryAtomCompareFunctor: Very specialized, allows canonizing
129 *highly symmetrical graphs/molecules
130 * - AtomCompareFunctor: Basic atom compare function which also allows to
131 *include neighbors within the ranking
132 */
133
135 public:
137 const ROMol *dp_mol{nullptr};
138 const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
139 *dp_bondsInPlay{nullptr};
140
143 Canon::canon_atom *atoms, const ROMol &m,
144 const boost::dynamic_bitset<> *atomsInPlay = nullptr,
145 const boost::dynamic_bitset<> *bondsInPlay = nullptr)
146 : dp_atoms(atoms),
147 dp_mol(&m),
148 dp_atomsInPlay(atomsInPlay),
149 dp_bondsInPlay(bondsInPlay) {}
150 int operator()(int i, int j) const {
151 PRECONDITION(dp_atoms, "no atoms");
152 PRECONDITION(dp_mol, "no molecule");
153 PRECONDITION(i != j, "bad call");
154 if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
155 return 0;
156 }
157
158 if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
160 }
161 if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
163 }
164 for (unsigned int ii = 0;
165 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size(); ++ii) {
166 int cmp =
167 bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
168 if (cmp) {
169 return cmp;
170 }
171 }
172
173 std::vector<std::pair<unsigned int, unsigned int>> swapsi;
174 std::vector<std::pair<unsigned int, unsigned int>> swapsj;
175 if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
176 updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[i].bonds, i, swapsi);
177 }
178 if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
179 updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[j].bonds, j, swapsj);
180 }
181
182 for (unsigned int ii = 0; ii < swapsi.size() && ii < swapsj.size(); ++ii) {
183 int cmp = swapsi[ii].second - swapsj[ii].second;
184
185 if (cmp) {
186 return cmp;
187 }
188 }
189
190 return 0;
191 }
192};
193
195 public:
197 const ROMol *dp_mol{nullptr};
198 const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
199 *dp_bondsInPlay{nullptr};
200
203 Canon::canon_atom *atoms, const ROMol &m,
204 const boost::dynamic_bitset<> *atomsInPlay = nullptr,
205 const boost::dynamic_bitset<> *bondsInPlay = nullptr)
206 : dp_atoms(atoms),
207 dp_mol(&m),
208 dp_atomsInPlay(atomsInPlay),
209 dp_bondsInPlay(bondsInPlay) {}
210 int operator()(int i, int j) const {
211 PRECONDITION(dp_atoms, "no atoms");
212 PRECONDITION(dp_mol, "no molecule");
213 PRECONDITION(i != j, "bad call");
214 if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
215 return 0;
216 }
217
218 if (dp_atoms[i].neighborNum < dp_atoms[j].neighborNum) {
219 return -1;
220 } else if (dp_atoms[i].neighborNum > dp_atoms[j].neighborNum) {
221 return 1;
222 }
223
224 if (dp_atoms[i].revistedNeighbors < dp_atoms[j].revistedNeighbors) {
225 return -1;
226 } else if (dp_atoms[i].revistedNeighbors > dp_atoms[j].revistedNeighbors) {
227 return 1;
228 }
229
230 if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
232 }
233 if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
235 }
236 for (unsigned int ii = 0;
237 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size(); ++ii) {
238 int cmp =
239 bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
240 if (cmp) {
241 return cmp;
242 }
243 }
244
245 if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
246 return -1;
247 } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
248 return 1;
249 }
250 return 0;
251 }
252};
253
254namespace {
255unsigned int getChiralRank(const ROMol *dp_mol, canon_atom *dp_atoms,
256 unsigned int i) {
257 unsigned int res = 0;
258 std::vector<unsigned int> perm;
259 perm.reserve(dp_atoms[i].atom->getDegree());
260 for (const auto nbr : dp_mol->atomNeighbors(dp_atoms[i].atom)) {
261 auto rnk = dp_atoms[nbr->getIdx()].index;
262 // make sure we don't have duplicate ranks
263 if (std::find(perm.begin(), perm.end(), rnk) != perm.end()) {
264 break;
265 } else {
266 perm.push_back(rnk);
267 }
268 }
269 if (perm.size() == dp_atoms[i].atom->getDegree()) {
270 auto ctag = dp_atoms[i].atom->getChiralTag();
273 auto sortedPerm = perm;
274 std::sort(sortedPerm.begin(), sortedPerm.end());
275 auto nswaps = countSwapsToInterconvert(perm, sortedPerm);
276 res = ctag == Atom::ChiralType::CHI_TETRAHEDRAL_CW ? 2 : 1;
277 if (nswaps % 2) {
278 res = res == 2 ? 1 : 2;
279 }
280 }
281 }
282 return res;
283}
284} // namespace
286 unsigned int getAtomRingNbrCode(unsigned int i) const {
287 if (!dp_atoms[i].hasRingNbr) {
288 return 0;
289 }
290
291 auto nbrs = dp_atoms[i].nbrIds.get();
292 unsigned int code = 0;
293 for (unsigned j = 0; j < dp_atoms[i].degree; ++j) {
294 if (dp_atoms[nbrs[j]].isRingStereoAtom) {
295 code += dp_atoms[nbrs[j]].index * 10000 + 1; // j;
296 }
297 }
298 return code;
299 }
300
301 int basecomp(int i, int j) const {
302 unsigned int ivi, ivj;
303
304 // always start with the current class:
305 ivi = dp_atoms[i].index;
306 ivj = dp_atoms[j].index;
307 if (ivi < ivj) {
308 return -1;
309 } else if (ivi > ivj) {
310 return 1;
311 }
312
314 // use the non-stereo ranks if they were assigned
315 int rankingNumber_i = 0;
316 int rankingNumber_j = 0;
317 dp_atoms[i].atom->getPropIfPresent(
319 dp_atoms[j].atom->getPropIfPresent(
321 if (rankingNumber_i < rankingNumber_j) {
322 return -1;
323 } else if (rankingNumber_i > rankingNumber_j) {
324 return 1;
325 }
326 }
327
329 // use the atom-mapping numbers if they were assigned
330 int molAtomMapNumber_i = 0;
331 int molAtomMapNumber_j = 0;
332 if (df_useAtomMaps ||
333 (df_useAtomMapsOnDummies && dp_atoms[i].atom->getAtomicNum() == 0)) {
334 dp_atoms[i].atom->getPropIfPresent(common_properties::molAtomMapNumber,
335 molAtomMapNumber_i);
336 }
337 if (df_useAtomMaps ||
338 (df_useAtomMapsOnDummies && dp_atoms[j].atom->getAtomicNum() == 0)) {
339 dp_atoms[j].atom->getPropIfPresent(common_properties::molAtomMapNumber,
340 molAtomMapNumber_j);
341 }
342 if (molAtomMapNumber_i < molAtomMapNumber_j) {
343 return -1;
344 } else if (molAtomMapNumber_i > molAtomMapNumber_j) {
345 return 1;
346 }
347 }
348 // start by comparing degree
349 ivi = dp_atoms[i].degree;
350 ivj = dp_atoms[j].degree;
351 if (ivi < ivj) {
352 return -1;
353 } else if (ivi > ivj) {
354 return 1;
355 }
356 if (dp_atoms[i].p_symbol && dp_atoms[j].p_symbol) {
357 if (*(dp_atoms[i].p_symbol) < *(dp_atoms[j].p_symbol)) {
358 return -1;
359 } else if (*(dp_atoms[i].p_symbol) > *(dp_atoms[j].p_symbol)) {
360 return 1;
361 } else {
362 return 0;
363 }
364 }
365
366 // move onto atomic number
367 ivi = dp_atoms[i].atom->getAtomicNum();
368 ivj = dp_atoms[j].atom->getAtomicNum();
369 if (ivi < ivj) {
370 return -1;
371 } else if (ivi > ivj) {
372 return 1;
373 }
374 // isotopes if we're using them
375 if (df_useIsotopes) {
376 ivi = dp_atoms[i].atom->getIsotope();
377 ivj = dp_atoms[j].atom->getIsotope();
378 if (ivi < ivj) {
379 return -1;
380 } else if (ivi > ivj) {
381 return 1;
382 }
383 }
384
385 // nHs
386 ivi = dp_atoms[i].totalNumHs;
387 ivj = dp_atoms[j].totalNumHs;
388 if (ivi < ivj) {
389 return -1;
390 } else if (ivi > ivj) {
391 return 1;
392 }
393 // charge
394 ivi = dp_atoms[i].atom->getFormalCharge();
395 ivj = dp_atoms[j].atom->getFormalCharge();
396 if (ivi < ivj) {
397 return -1;
398 } else if (ivi > ivj) {
399 return 1;
400 }
401 // presence of specified chirality if it's being used
403 ivi =
404 dp_atoms[i].atom->getChiralTag() != Atom::ChiralType::CHI_UNSPECIFIED;
405 ivj =
406 dp_atoms[j].atom->getChiralTag() != Atom::ChiralType::CHI_UNSPECIFIED;
407 if (ivi < ivj) {
408 return -1;
409 } else if (ivi > ivj) {
410 return 1;
411 }
412 }
413 // chirality if we're using it
414 if (df_useChirality) {
415 // look at enhanced stereo - whichStereoGroup == 0 means no stereo
416 ivi = dp_atoms[i].whichStereoGroup; // can't use the index itself, but if
417 // it's set then we're in an SG
418 ivj = dp_atoms[j].whichStereoGroup;
419 if (ivi || ivj) {
420 if (ivi && !ivj) {
421 return 1;
422 } else if (ivj && !ivi) {
423 return -1;
424 } else if (ivi && ivj) {
425 auto iType = dp_atoms[i].typeOfStereoGroup;
426 auto jType = dp_atoms[j].typeOfStereoGroup;
427 if (iType < jType) {
428 return -1;
429 } else if (iType > jType) {
430 return 1;
431 }
432 if (ivi != ivj) {
433 // now check the current classes of the other members of the SG
434 std::set<unsigned int> sgi;
435 for (const auto sgat :
436 dp_mol->getStereoGroups()[ivi - 1].getAtoms()) {
437 sgi.insert(dp_atoms[sgat->getIdx()].index);
438 }
439 std::set<unsigned int> sgj;
440 for (const auto sgat :
441 dp_mol->getStereoGroups()[ivj - 1].getAtoms()) {
442 sgj.insert(dp_atoms[sgat->getIdx()].index);
443 }
444 if (sgi < sgj) {
445 return -1;
446 } else if (sgi > sgj) {
447 return 1;
448 }
449 } else { // same stereo group
451 ivi = getChiralRank(dp_mol, dp_atoms, i);
452 ivj = getChiralRank(dp_mol, dp_atoms, j);
453 if (ivi < ivj) {
454 return -1;
455 } else if (ivi > ivj) {
456 return 1;
457 }
458 }
459 }
460 }
461 } else {
462 // if there's no stereogroup, then use whatever atom stereochem is
463 // specfied:
464 ivi = 0;
465 ivj = 0;
466 // can't actually use values here, because they are arbitrary
467 ivi = dp_atoms[i].atom->getChiralTag() != 0;
468 ivj = dp_atoms[j].atom->getChiralTag() != 0;
469 if (ivi < ivj) {
470 return -1;
471 } else if (ivi > ivj) {
472 return 1;
473 }
474 // stereo set
475 if (ivi && ivj) {
476 if (ivi) {
477 ivi = getChiralRank(dp_mol, dp_atoms, i);
478 }
479 if (ivj) {
480 ivj = getChiralRank(dp_mol, dp_atoms, j);
481 }
482 if (ivi < ivj) {
483 return -1;
484 } else if (ivi > ivj) {
485 return 1;
486 }
487 }
488 }
489 }
490
492 // ring stereochemistry
493 ivi = getAtomRingNbrCode(i);
494 ivj = getAtomRingNbrCode(j);
495 if (ivi < ivj) {
496 return -1;
497 } else if (ivi > ivj) {
498 return 1;
499 } // bond stereo is taken care of in the neighborhood comparison
500 }
501 return 0;
502 }
503
504 public:
506 const ROMol *dp_mol{nullptr};
507 const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
508 *dp_bondsInPlay{nullptr};
509 bool df_useNbrs{false};
510 bool df_useIsotopes{true};
511 bool df_useChirality{true};
513 bool df_useAtomMaps{true};
517
520 const boost::dynamic_bitset<> *atomsInPlay = nullptr,
521 const boost::dynamic_bitset<> *bondsInPlay = nullptr)
522 : dp_atoms(atoms),
523 dp_mol(&m),
524 dp_atomsInPlay(atomsInPlay),
525 dp_bondsInPlay(bondsInPlay) {}
526
527 int operator()(int i, int j) const {
528 if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
529 return 0;
530 }
531 int v = basecomp(i, j);
532 if (v) {
533 return v;
534 }
535
536 if (df_useNbrs) {
537 if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
539 }
540 if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
542 }
543
544 for (unsigned int ii = 0;
545 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
546 ++ii) {
547 int cmp =
548 bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
549 if (cmp) {
550 return cmp;
551 }
552 }
553
554 if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
555 return -1;
556 } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
557 return 1;
558 }
559 }
560 return 0;
561 }
562};
563
564/*
565 * A compare function to discriminate chiral atoms, similar to the CIP rules.
566 * This functionality is currently not used.
567 */
568
569const unsigned int ATNUM_CLASS_OFFSET = 10000;
571 void getAtomNeighborhood(std::vector<bondholder> &nbrs) const {
572 for (unsigned j = 0; j < nbrs.size(); ++j) {
573 unsigned int nbrIdx = nbrs[j].nbrIdx;
574 if (nbrIdx == ATNUM_CLASS_OFFSET) {
575 // Ignore the Hs
576 continue;
577 }
578 const Atom *nbr = dp_atoms[nbrIdx].atom;
579 nbrs[j].nbrSymClass =
580 nbr->getAtomicNum() * ATNUM_CLASS_OFFSET + dp_atoms[nbrIdx].index + 1;
581 }
582 std::sort(nbrs.begin(), nbrs.end(), bondholder::greater);
583 // FIX: don't want to be doing this long-term
584 }
585
586 int basecomp(int i, int j) const {
587 PRECONDITION(dp_atoms, "no atoms");
588 unsigned int ivi, ivj;
589
590 // always start with the current class:
591 ivi = dp_atoms[i].index;
592 ivj = dp_atoms[j].index;
593 if (ivi < ivj) {
594 return -1;
595 } else if (ivi > ivj) {
596 return 1;
597 }
598
599 // move onto atomic number
600 ivi = dp_atoms[i].atom->getAtomicNum();
601 ivj = dp_atoms[j].atom->getAtomicNum();
602 if (ivi < ivj) {
603 return -1;
604 } else if (ivi > ivj) {
605 return 1;
606 }
607
608 // isotopes:
609 ivi = dp_atoms[i].atom->getIsotope();
610 ivj = dp_atoms[j].atom->getIsotope();
611 if (ivi < ivj) {
612 return -1;
613 } else if (ivi > ivj) {
614 return 1;
615 }
616
617 // atom stereochem:
618 ivi = 0;
619 ivj = 0;
620 std::string cipCode;
621 if (dp_atoms[i].atom->getPropIfPresent(common_properties::_CIPCode,
622 cipCode)) {
623 ivi = cipCode == "R" ? 2 : 1;
624 }
625 if (dp_atoms[j].atom->getPropIfPresent(common_properties::_CIPCode,
626 cipCode)) {
627 ivj = cipCode == "R" ? 2 : 1;
628 }
629 if (ivi < ivj) {
630 return -1;
631 } else if (ivi > ivj) {
632 return 1;
633 }
634
635 // bond stereo is taken care of in the neighborhood comparison
636 return 0;
637 }
638
639 public:
641 const ROMol *dp_mol{nullptr};
642 bool df_useNbrs{false};
645 : dp_atoms(atoms), dp_mol(&m), df_useNbrs(false) {}
646 int operator()(int i, int j) const {
647 PRECONDITION(dp_atoms, "no atoms");
648 PRECONDITION(dp_mol, "no molecule");
649 PRECONDITION(i != j, "bad call");
650 int v = basecomp(i, j);
651 if (v) {
652 return v;
653 }
654
655 if (df_useNbrs) {
656 getAtomNeighborhood(dp_atoms[i].bonds);
657 getAtomNeighborhood(dp_atoms[j].bonds);
658
659 // we do two passes through the neighbor lists. The first just uses the
660 // atomic numbers (by passing the optional 10000 to bondholder::compare),
661 // the second takes the already-computed index into account
662 for (unsigned int ii = 0;
663 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
664 ++ii) {
665 int cmp = bondholder::compare(
666 dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii], ATNUM_CLASS_OFFSET);
667 if (cmp) {
668 return cmp;
669 }
670 }
671 for (unsigned int ii = 0;
672 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
673 ++ii) {
674 int cmp =
675 bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
676 if (cmp) {
677 return cmp;
678 }
679 }
680 if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
681 return -1;
682 } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
683 return 1;
684 }
685 }
686 return 0;
687 }
688};
689
690/*
691 * Basic canonicalization function to organize the partitions which will be
692 * sorted next.
693 * */
694
695template <typename CompareFunc>
696void RefinePartitions(const ROMol &mol, canon_atom *atoms, CompareFunc compar,
697 int mode, std::vector<int> &order,
698 std::vector<int> &count, int &activeset,
699 std::vector<int> &next, std::vector<int> &changed,
700 std::vector<char> &touchedPartitions) {
701 unsigned int nAtoms = mol.getNumAtoms();
702 int partition;
703 int symclass = 0;
704 int offset;
705 int index;
706 int len;
707 int i;
708 // std::vector<char> touchedPartitions(mol.getNumAtoms(),0);
709
710 // std::cerr<<"&&&&&&&&&&&&&&&& RP"<<std::endl;
711 while (activeset != -1) {
712 // std::cerr<<"ITER: "<<activeset<<" next: "<<next[activeset]<<std::endl;
713 // std::cerr<<" next: ";
714 // for(unsigned int ii=0;ii<nAtoms;++ii){
715 // std::cerr<<ii<<":"<<next[ii]<<" ";
716 // }
717 // std::cerr<<std::endl;
718 // for(unsigned int ii=0;ii<nAtoms;++ii){
719 // std::cerr<<order[ii]<<" count: "<<count[order[ii]]<<" index:
720 // "<<atoms[order[ii]].index<<std::endl;
721 // }
722
723 partition = activeset;
724 activeset = next[partition];
725 next[partition] = -2;
726
727 len = count[partition];
728 offset = atoms[partition].index;
729 auto start = std::span<int>(&order[offset], len);
730 // std::cerr<<"\n\n**************************************************************"<<std::endl;
731 // std::cerr<<" sort - class:"<<atoms[partition].index<<" len:
732 // "<<len<<":"; for(unsigned int ii=0;ii<len;++ii){
733 // std::cerr<<" "<<order[offset+ii]+1;
734 // }
735 // std::cerr<<std::endl;
736 // for(unsigned int ii=0;ii<nAtoms;++ii){
737 // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index:
738 // "<<atoms[order[ii]].index<<std::endl;
739 // }
740 hanoisort(start, count, changed, compar);
741 // std::cerr<<"*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*"<<std::endl;
742 // std::cerr<<" result:";
743 // for(unsigned int ii=0;ii<nAtoms;++ii){
744 // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index:
745 // "<<atoms[order[ii]].index<<std::endl;
746 // }
747 for (int k = 0; k < len; ++k) {
748 changed[start[k]] = 0;
749 }
750
751 index = start[0];
752 // std::cerr<<" len:"<<len<<" index:"<<index<<"
753 // count:"<<count[index]<<std::endl;
754 for (i = count[index]; i < len; i++) {
755 index = start[i];
756 if (count[index]) {
757 symclass = offset + i;
758 }
759 atoms[index].index = symclass;
760 // std::cerr<<" "<<index+1<<"("<<symclass<<")";
761 // if(mode && (activeset<0 || count[index]>count[activeset]) ){
762 // activeset=index;
763 //}
764 for (unsigned j = 0; j < atoms[index].degree; ++j) {
765 changed[atoms[index].nbrIds[j]] = 1;
766 }
767 }
768 // std::cerr<<std::endl;
769
770 if (mode) {
771 index = start[0];
772 for (i = count[index]; i < len; i++) {
773 index = start[i];
774 for (unsigned j = 0; j < atoms[index].degree; ++j) {
775 unsigned int nbor = atoms[index].nbrIds[j];
776 touchedPartitions[atoms[nbor].index] = 1;
777 }
778 }
779 for (unsigned int ii = 0; ii < nAtoms; ++ii) {
780 if (touchedPartitions[ii]) {
781 partition = order[ii];
782 if ((count[partition] > 1) && (next[partition] == -2)) {
783 next[partition] = activeset;
784 activeset = partition;
785 }
786 touchedPartitions[ii] = 0;
787 }
788 }
789 }
790 }
791} // end of RefinePartitions()
792
793template <typename CompareFunc>
794void BreakTies(const ROMol &mol, canon_atom *atoms, CompareFunc compar,
795 int mode, std::vector<int> &order, std::vector<int> &count,
796 int &activeset, std::vector<int> &next,
797 std::vector<int> &changed,
798 std::vector<char> &touchedPartitions) {
799 unsigned int nAtoms = mol.getNumAtoms();
800 int partition;
801 int offset;
802 int index;
803 int len;
804 int oldPart = 0;
805
806 for (unsigned int i = 0; i < nAtoms; i++) {
807 partition = order[i];
808 oldPart = atoms[partition].index;
809 while (count[partition] > 1) {
810 len = count[partition];
811 offset = atoms[partition].index + len - 1;
812 index = order[offset];
813 atoms[index].index = offset;
814 count[partition] = len - 1;
815 count[index] = 1;
816
817 // test for ions, water molecules with no
818 if (atoms[index].degree < 1) {
819 continue;
820 }
821 for (unsigned j = 0; j < atoms[index].degree; ++j) {
822 unsigned int nbor = atoms[index].nbrIds[j];
823 touchedPartitions[atoms[nbor].index] = 1;
824 changed[nbor] = 1;
825 }
826
827 for (unsigned int ii = 0; ii < nAtoms; ++ii) {
828 if (touchedPartitions[ii]) {
829 int npart = order[ii];
830 if ((count[npart] > 1) && (next[npart] == -2)) {
831 next[npart] = activeset;
832 activeset = npart;
833 }
834 touchedPartitions[ii] = 0;
835 }
836 }
837 RefinePartitions(mol, atoms, compar, mode, order, count, activeset, next,
838 changed, touchedPartitions);
839 }
840 // not sure if this works each time
841 if (atoms[partition].index != oldPart) {
842 i -= 1;
843 }
844 }
845} // end of BreakTies()
846
848 std::vector<int> &order,
849 std::vector<int> &count,
850 canon_atom *atoms);
851
853 unsigned int nAtoms, std::vector<int> &order, std::vector<int> &count,
854 int &activeset, std::vector<int> &next, std::vector<int> &changed);
855
856//! Note that atom maps on dummy atoms will always be used
858 const ROMol &mol, std::vector<unsigned int> &res, bool breakTies = true,
859 bool includeChirality = true, bool includeIsotopes = true,
860 bool includeAtomMaps = true, bool includeChiralPresence = false,
861 bool includeStereoGroups = true, bool useNonStereoRanks = false,
862 bool includeRingStereo = true);
863
864//! Note that atom maps on dummy atoms will always be used
866 const ROMol &mol, std::vector<unsigned int> &res,
867 const boost::dynamic_bitset<> &atomsInPlay,
868 const boost::dynamic_bitset<> &bondsInPlay,
869 const std::vector<std::string> *atomSymbols,
870 const std::vector<std::string> *bondSymbols, bool breakTies,
871 bool includeChirality, bool includeIsotope, bool includeAtomMaps,
872 bool includeChiralPresence, bool includeRingStereo = true);
873
874//! Note that atom maps on dummy atoms will always be used
876 const ROMol &mol, std::vector<unsigned int> &res,
877 const boost::dynamic_bitset<> &atomsInPlay,
878 const boost::dynamic_bitset<> &bondsInPlay,
879 const std::vector<std::string> *atomSymbols = nullptr,
880 bool breakTies = true, bool includeChirality = true,
881 bool includeIsotopes = true, bool includeAtomMaps = true,
882 bool includeChiralPresence = false, bool includeRingStereo = true) {
883 rankFragmentAtoms(mol, res, atomsInPlay, bondsInPlay, atomSymbols, nullptr,
884 breakTies, includeChirality, includeIsotopes,
885 includeAtomMaps, includeChiralPresence, includeRingStereo);
886};
887
889 std::vector<unsigned int> &res);
890
892 std::vector<Canon::canon_atom> &atoms,
893 bool includeChirality = true,
894 bool includeStereoGroups = true);
895
896namespace detail {
898 std::vector<Canon::canon_atom> &atoms,
899 bool includeChirality,
900 const std::vector<std::string> *atomSymbols,
901 const std::vector<std::string> *bondSymbols,
902 const boost::dynamic_bitset<> &atomsInPlay,
903 const boost::dynamic_bitset<> &bondsInPlay,
904 bool needsInit);
905template <typename T>
906void rankWithFunctor(T &ftor, bool breakTies, std::vector<int> &order,
907 bool useSpecial = false, bool useChirality = false,
908 bool includeRingStereo = true,
909 const boost::dynamic_bitset<> *atomsInPlay = nullptr,
910 const boost::dynamic_bitset<> *bondsInPlay = nullptr);
911
912} // namespace detail
913
914} // namespace Canon
915} // namespace RDKit
#define PRECONDITION(expr, mess)
Definition Invariant.h:108
Defines the primary molecule class ROMol as well as associated typedefs.
Defines the class StereoGroup which stores relationships between the absolute configurations of atoms...
The class for representing atoms.
Definition Atom.h:74
int getAtomicNum() const
returns our atomic number
Definition Atom.h:145
@ CHI_TETRAHEDRAL_CW
tetrahedral: clockwise rotation (SMILES @@)
Definition Atom.h:107
@ CHI_UNSPECIFIED
chirality that hasn't been specified
Definition Atom.h:106
@ CHI_TETRAHEDRAL_CCW
tetrahedral: counter-clockwise rotation (SMILES
Definition Atom.h:108
BondType
the type of Bond
Definition Bond.h:55
@ UNSPECIFIED
Definition Bond.h:56
BondStereo
the nature of the bond's stereochem (for cis/trans)
Definition Bond.h:94
@ STEREONONE
Definition Bond.h:95
const boost::dynamic_bitset * dp_bondsInPlay
Definition new_canon.h:508
AtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition new_canon.h:519
int operator()(int i, int j) const
Definition new_canon.h:527
const boost::dynamic_bitset * dp_atomsInPlay
Definition new_canon.h:507
Canon::canon_atom * dp_atoms
Definition new_canon.h:505
ChiralAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m)
Definition new_canon.h:644
int operator()(int i, int j) const
Definition new_canon.h:646
const boost::dynamic_bitset * dp_atomsInPlay
Definition new_canon.h:138
const boost::dynamic_bitset * dp_bondsInPlay
Definition new_canon.h:139
SpecialChiralityAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition new_canon.h:142
const boost::dynamic_bitset * dp_bondsInPlay
Definition new_canon.h:199
const boost::dynamic_bitset * dp_atomsInPlay
Definition new_canon.h:198
SpecialSymmetryAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition new_canon.h:202
unsigned int getNumAtoms() const
returns our number of atoms
Definition ROMol.h:507
CXXAtomIterator< const MolGraph, Atom *const, MolGraph::adjacency_iterator > atomNeighbors(Atom const *at) const
Definition ROMol.h:370
#define RDKIT_GRAPHMOL_EXPORT
Definition export.h:257
void rankWithFunctor(T &ftor, bool breakTies, std::vector< int > &order, bool useSpecial=false, bool useChirality=false, bool includeRingStereo=true, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
void initFragmentCanonAtoms(const ROMol &mol, std::vector< Canon::canon_atom > &atoms, bool includeChirality, const std::vector< std::string > *atomSymbols, const std::vector< std::string > *bondSymbols, const boost::dynamic_bitset<> &atomsInPlay, const boost::dynamic_bitset<> &bondsInPlay, bool needsInit)
RDKIT_GRAPHMOL_EXPORT void initCanonAtoms(const ROMol &mol, std::vector< Canon::canon_atom > &atoms, bool includeChirality=true, bool includeStereoGroups=true)
void RefinePartitions(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, std::vector< int > &order, std::vector< int > &count, int &activeset, std::vector< int > &next, std::vector< int > &changed, std::vector< char > &touchedPartitions)
Definition new_canon.h:696
const unsigned int ATNUM_CLASS_OFFSET
Definition new_canon.h:569
RDKIT_GRAPHMOL_EXPORT void CreateSinglePartition(unsigned int nAtoms, std::vector< int > &order, std::vector< int > &count, canon_atom *atoms)
RDKIT_GRAPHMOL_EXPORT void updateAtomNeighborNumSwaps(canon_atom *atoms, std::vector< bondholder > &nbrs, unsigned int atomIdx, std::vector< std::pair< unsigned int, unsigned int > > &result)
RDKIT_GRAPHMOL_EXPORT void ActivatePartitions(unsigned int nAtoms, std::vector< int > &order, std::vector< int > &count, int &activeset, std::vector< int > &next, std::vector< int > &changed)
RDKIT_GRAPHMOL_EXPORT void chiralRankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res)
RDKIT_GRAPHMOL_EXPORT void rankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res, bool breakTies=true, bool includeChirality=true, bool includeIsotopes=true, bool includeAtomMaps=true, bool includeChiralPresence=false, bool includeStereoGroups=true, bool useNonStereoRanks=false, bool includeRingStereo=true)
Note that atom maps on dummy atoms will always be used.
RDKIT_GRAPHMOL_EXPORT void updateAtomNeighborIndex(canon_atom *atoms, std::vector< bondholder > &nbrs)
void BreakTies(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, std::vector< int > &order, std::vector< int > &count, int &activeset, std::vector< int > &next, std::vector< int > &changed, std::vector< char > &touchedPartitions)
Definition new_canon.h:794
RDKIT_GRAPHMOL_EXPORT void rankFragmentAtoms(const ROMol &mol, std::vector< unsigned int > &res, const boost::dynamic_bitset<> &atomsInPlay, const boost::dynamic_bitset<> &bondsInPlay, const std::vector< std::string > *atomSymbols, const std::vector< std::string > *bondSymbols, bool breakTies, bool includeChirality, bool includeIsotope, bool includeAtomMaps, bool includeChiralPresence, bool includeRingStereo=true)
Note that atom maps on dummy atoms will always be used.
constexpr std::string_view _CanonicalRankingNumber
Definition types.h:75
constexpr std::string_view _CIPCode
Definition types.h:71
constexpr std::string_view molAtomMapNumber
Definition types.h:160
Std stuff.
StereoGroupType
Definition StereoGroup.h:30
void hanoisort(int *base, int nel, int *count, int *changed, CompareFunc compar)
Definition hanoiSort.h:154
unsigned int countSwapsToInterconvert(const T &ref, T probe)
Definition utils.h:54
const std::string * p_symbol
Definition new_canon.h:40
Bond::BondType bondType
Definition new_canon.h:33
static bool greater(const bondholder &lhs, const bondholder &rhs)
Definition new_canon.h:63
bool operator<(const bondholder &o) const
Definition new_canon.h:62
Bond::BondStereo stype
Definition new_canon.h:38
const canon_atom * controllingAtoms[4]
Definition new_canon.h:39
int compareStereo(const bondholder &o) const
bondholder(Bond::BondType bt, unsigned int bs, unsigned int ni, unsigned int nsc, unsigned int bidx)
Definition new_canon.h:52
bondholder(Bond::BondType bt, Bond::BondStereo bs, unsigned int ni, unsigned int nsc, unsigned int bidx)
Definition new_canon.h:45
unsigned int bondStereo
Definition new_canon.h:34
static int compare(const bondholder &x, const bondholder &y, unsigned int div=1)
Definition new_canon.h:67
unsigned int nbrSymClass
Definition new_canon.h:36
std::vector< bondholder > bonds
Definition new_canon.h:113
StereoGroupType typeOfStereoGroup
Definition new_canon.h:107
std::unique_ptr< int[]> nbrIds
Definition new_canon.h:108
std::vector< int > revistedNeighbors
Definition new_canon.h:112
std::vector< int > neighborNum
Definition new_canon.h:111
unsigned int totalNumHs
Definition new_canon.h:103
const std::string * p_symbol
Definition new_canon.h:109
unsigned int whichStereoGroup
Definition new_canon.h:106