From 22bef3f38631aae76cffedaf899394d2687a2def Mon Sep 17 00:00:00 2001 From: Richard West Date: Tue, 30 Jul 2024 11:32:40 -0400 Subject: [PATCH 01/22] Unit test for drawing poly-dentate species. For now it leaves behind a bunch of .png files so you can see if they look ok. --- test/rmgpy/molecule/drawTest.py | 110 +++++++++++++++++++++++++++++++- 1 file changed, 108 insertions(+), 2 deletions(-) diff --git a/test/rmgpy/molecule/drawTest.py b/test/rmgpy/molecule/drawTest.py index 03e68a0c21..afe560801b 100644 --- a/test/rmgpy/molecule/drawTest.py +++ b/test/rmgpy/molecule/drawTest.py @@ -33,9 +33,9 @@ import os import os.path +import itertools - -from rmgpy.molecule import Molecule +from rmgpy.molecule import Molecule, Atom, Bond from rmgpy.molecule.draw import MoleculeDrawer from rmgpy.species import Species @@ -144,3 +144,109 @@ def test_draw_hydrogen_bond_adsorbate(self): from cairo import PDFSurface surface, _cr, (_xoff, _yoff, _width, _height) = self.drawer.draw(molecule, file_format="pdf") assert isinstance(surface, PDFSurface) + + def test_draw_bidentate_adsorbate(self): + try: + from cairocffi import ImageSurface + except ImportError: + from cairo import ImageSurface + + test_molecules = [ + Molecule().from_adjacency_list( + """ +1 C u0 p0 c0 {2,S} {4,S} {5,S} {6,S} +2 C u0 p0 c0 {1,S} {3,S} {7,S} {8,S} +3 C u0 p0 c0 {2,S} {9,S} {10,S} {11,S} +4 X u0 p0 c0 {1,S} +5 H u0 p0 c0 {1,S} +6 H u0 p0 c0 {1,S} +7 H u0 p0 c0 {2,S} +8 X u0 p0 c0 {2,S} +9 H u0 p0 c0 {3,S} +10 H u0 p0 c0 {3,S} +11 H u0 p0 c0 {3,S} + """), + Molecule().from_adjacency_list( +""" +1 C u0 p0 c0 {2,S} {4,S} {5,S} {6,S} +2 C u0 p0 c0 {1,S} {3,S} {7,S} {8,S} +3 C u0 p0 c0 {2,S} {9,S} {10,S} {11,S} +4 X u0 p0 c0 {1,S} +5 H u0 p0 c0 {1,S} +6 H u0 p0 c0 {1,S} +7 H u0 p0 c0 {2,S} +8 H u0 p0 c0 {2,S} +9 H u0 p0 c0 {3,S} +10 H u0 p0 c0 {3,S} +11 X u0 p0 c0 {3,S} +"""), + ] + for molecule in [Molecule(smiles="CC(=O)CCO"), Molecule(smiles="C1CC=CC1CC"), Molecule(smiles="C=CCC(O)=O")]: + bondable = [a for a in molecule.atoms if a.is_non_hydrogen() and any(b.is_hydrogen() for b in a.bonds)] + for b1, b2 in itertools.combinations(bondable, 2): + # find a hydrogen atom bonded to each of the two atoms + for h1 in b1.bonds: + if h1.is_hydrogen(): + break + for h2 in b2.bonds: + if h2.is_hydrogen(): + break + molecule.remove_atom(h1) + molecule.remove_atom(h2) + x1 = Atom(element='X', radical_electrons=0, charge=0, label='', lone_pairs=0) + x2 = Atom(element='X', radical_electrons=0, charge=0, label='', lone_pairs=0) + molecule.add_atom(x1) + molecule.add_atom(x2) + molecule.add_bond(Bond(b1, x1, order=1)) + molecule.add_bond(Bond(b2, x2, order=1)) + test_molecules.append(molecule.copy(deep=True)) + molecule.remove_atom(x1) + molecule.remove_atom(x2) + molecule.add_atom(h1) + molecule.add_atom(h2) + molecule.add_bond(Bond(b1, h1, order=1)) + molecule.add_bond(Bond(b2, h2, order=1)) + + for b1, b2, b3 in itertools.combinations(bondable, 3): + # find a hydrogen atom bonded to each of the two atoms + for h1 in b1.bonds: + if h1.is_hydrogen(): + break + for h2 in b2.bonds: + if h2.is_hydrogen(): + break + for h3 in b3.bonds: + if h3.is_hydrogen(): + break + molecule.remove_atom(h1) + molecule.remove_atom(h2) + molecule.remove_atom(h3) + x1 = Atom(element='X', radical_electrons=0, charge=0, label='', lone_pairs=0) + x2 = Atom(element='X', radical_electrons=0, charge=0, label='', lone_pairs=0) + x3 = Atom(element='X', radical_electrons=0, charge=0, label='', lone_pairs=0) + molecule.add_atom(x1) + molecule.add_atom(x2) + molecule.add_atom(x3) + molecule.add_bond(Bond(b1, x1, order=1)) + molecule.add_bond(Bond(b2, x2, order=1)) + molecule.add_bond(Bond(b3, x3, order=1)) + test_molecules.append(molecule.copy(deep=True)) + molecule.remove_atom(x1) + molecule.remove_atom(x2) + molecule.remove_atom(x3) + molecule.add_atom(h1) + molecule.add_atom(h2) + molecule.add_atom(h3) + molecule.add_bond(Bond(b1, h1, order=1)) + molecule.add_bond(Bond(b2, h2, order=1)) + molecule.add_bond(Bond(b3, h3, order=1)) + + for number, molecule in enumerate(test_molecules, 1): + path = f"test_polydentate_{number}.png" + if os.path.exists(path): + os.unlink(path) + self.drawer.clear() + surface, _cr, (_xoff, _yoff, width, height) = self.drawer.draw(molecule, file_format="png", target=path) + assert os.path.exists(path), "File doesn't exist" + # os.unlink(path) + assert isinstance(surface, ImageSurface) \ No newline at end of file From 9c6a43d7d3a20fd0027259e60a838120f1014461 Mon Sep 17 00:00:00 2001 From: Richard West Date: Tue, 30 Jul 2024 14:00:50 -0400 Subject: [PATCH 02/22] One more test molecule for drawing a 4-dentate adsorbate. --- test/rmgpy/molecule/drawTest.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/test/rmgpy/molecule/drawTest.py b/test/rmgpy/molecule/drawTest.py index afe560801b..e1ecc8f200 100644 --- a/test/rmgpy/molecule/drawTest.py +++ b/test/rmgpy/molecule/drawTest.py @@ -240,6 +240,24 @@ def test_draw_bidentate_adsorbate(self): molecule.add_bond(Bond(b1, h1, order=1)) molecule.add_bond(Bond(b2, h2, order=1)) molecule.add_bond(Bond(b3, h3, order=1)) + + test_molecules.append(Molecule().from_adjacency_list( +""" +1 C u0 p0 c0 {2,S} {3,S} {9,S} {10,S} +2 X u0 p0 c0 {1,S} +3 C u0 p0 c0 {1,S} {4,S} {5,S} {11,S} +4 X u0 p0 c0 {3,S} +5 C u0 p0 c0 {3,S} {6,S} {7,S} {12,S} +6 X u0 p0 c0 {5,S} +7 C u0 p0 c0 {5,S} {8,S} {13,S} {14,S} +8 X u0 p0 c0 {7,S} +9 H u0 p0 c0 {1,S} +10 H u0 p0 c0 {1,S} +11 H u0 p0 c0 {3,S} +12 H u0 p0 c0 {5,S} +13 H u0 p0 c0 {7,S} +14 H u0 p0 c0 {7,S} +""")) for number, molecule in enumerate(test_molecules, 1): path = f"test_polydentate_{number}.png" From aded41d104550a19063481964a9c238d44e8ae68 Mon Sep 17 00:00:00 2001 From: Richard West Date: Tue, 30 Jul 2024 11:34:25 -0400 Subject: [PATCH 03/22] Draw bidentate adsorbates better. By temporarily connecting the surface sites to each other, the algorithm for drawing cycles kicks in and puts the sites near each other, instead of having one site floating away far from another. --- rmgpy/molecule/draw.py | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/rmgpy/molecule/draw.py b/rmgpy/molecule/draw.py index e18611b92a..dc3878539e 100644 --- a/rmgpy/molecule/draw.py +++ b/rmgpy/molecule/draw.py @@ -60,7 +60,7 @@ import numpy as np from rdkit.Chem import AllChem -from rmgpy.molecule.molecule import Atom, Molecule +from rmgpy.molecule.molecule import Atom, Molecule, Bond from rmgpy.qm.molecule import Geometry @@ -207,7 +207,9 @@ def draw(self, molecule, file_format, target=None): # replace the bonds after generating coordinates. This avoids # bugs with RDKit old_bond_dictionary = self._make_single_bonds() + self._connect_surface_sites() self._generate_coordinates() + self._disconnect_surface_sites() self._replace_bonds(old_bond_dictionary) # Generate labels to use @@ -1624,6 +1626,38 @@ def _replace_bonds(self, bond_order_dictionary): for bond, order in bond_order_dictionary.items(): bond.set_order_num(order) + def _connect_surface_sites(self): + """ + Creates single bonds between atoms that are surface sites. + This makes bidentate adsorbates look better. + Not well tested for things with n>2 surface sites. + """ + for atom1 in self.molecule.atoms: + if atom1.is_surface_site(): + for atom2 in self.molecule.atoms: + if atom2.is_surface_site() and atom1 != atom2: + # if atom2 is already bonded to a surface site, don't also bond it to atom1 + for atom3 in atom2.bonds: + if atom3.is_surface_site(): + break + else: # didn't break + bond = atom1.bonds.get(atom2) + if bond is None: + bond = Bond(atom1, atom2, 1) + atom1.bonds[atom2] = bond + atom2.bonds[atom1] = bond + + def _disconnect_surface_sites(self): + """ + Removes all bonds between atoms that are surface sites. + """ + for atom1 in self.molecule.atoms: + if atom1.is_surface_site(): + for atom2 in list(atom1.bonds.keys()): # make a list copy so we can delete from the dict + if atom2.is_surface_site(): + del atom1.bonds[atom2] + del atom2.bonds[atom1] + ################################################################################ From f4930217c31c5abc25cc03e73208fe326a804f06 Mon Sep 17 00:00:00 2001 From: Richard West Date: Tue, 30 Jul 2024 13:59:41 -0400 Subject: [PATCH 04/22] Better drawing of polydentate (n>2) adsorbates. Surface sites are connected to each other in a better order, based on how close they are in the molecule. --- rmgpy/molecule/draw.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/rmgpy/molecule/draw.py b/rmgpy/molecule/draw.py index dc3878539e..6eb34453bf 100644 --- a/rmgpy/molecule/draw.py +++ b/rmgpy/molecule/draw.py @@ -61,6 +61,7 @@ from rdkit.Chem import AllChem from rmgpy.molecule.molecule import Atom, Molecule, Bond +from rmgpy.molecule.pathfinder import find_shortest_path from rmgpy.qm.molecule import Geometry @@ -1634,18 +1635,15 @@ def _connect_surface_sites(self): """ for atom1 in self.molecule.atoms: if atom1.is_surface_site(): - for atom2 in self.molecule.atoms: - if atom2.is_surface_site() and atom1 != atom2: - # if atom2 is already bonded to a surface site, don't also bond it to atom1 - for atom3 in atom2.bonds: - if atom3.is_surface_site(): - break - else: # didn't break - bond = atom1.bonds.get(atom2) - if bond is None: - bond = Bond(atom1, atom2, 1) - atom1.bonds[atom2] = bond - atom2.bonds[atom1] = bond + other_sites = [a for a in self.molecule.atoms if a.is_surface_site() and a != atom1] + if not other_sites: break + nearest_sites = sorted(other_sites, key=lambda a: len(find_shortest_path(atom1, a))) + atom2 = nearest_sites[0] + bond = atom1.bonds.get(atom2) + if bond is None: + bond = Bond(atom1, atom2, 1) + atom1.bonds[atom2] = bond + atom2.bonds[atom1] = bond def _disconnect_surface_sites(self): """ From a43619cf70a3b1d20e72bc29e032e12757a0b1df Mon Sep 17 00:00:00 2001 From: Richard West Date: Tue, 30 Jul 2024 11:43:29 -0400 Subject: [PATCH 05/22] Rotate bidentates so the sites are horizontally aligned --- rmgpy/molecule/draw.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/rmgpy/molecule/draw.py b/rmgpy/molecule/draw.py index 6eb34453bf..8073c12806 100644 --- a/rmgpy/molecule/draw.py +++ b/rmgpy/molecule/draw.py @@ -480,6 +480,16 @@ def _generate_coordinates(self): index = atoms.index(site) coordinates[index, 1] = min(coordinates[:, 1]) - 0.8 # just move the site down a bit coordinates[index, 0] = coordinates[:, 0].mean() # and center it + sites = [atom for atom in self.molecule.atoms if atom.is_surface_site()] + if len(sites) == 2: + # rotate so the sites are horizontal + vector0 = coordinates[atoms.index(sites[1]), :] - coordinates[atoms.index(sites[0]), :] + angle = math.atan2(vector0[0], vector0[1]) - math.pi / 2 + rot = np.array([[math.cos(angle), math.sin(angle)], [-math.sin(angle), math.cos(angle)]], float) + self.coordinates = coordinates = np.dot(coordinates, rot) + # if sites are above the middle, flip it + if coordinates[atoms.index(sites[0]), 1] > coordinates[:, 1].mean(): + coordinates[:, 1] *= -1 def _find_cyclic_backbone(self): """ From 84fd6a1f068f12f0e0e6c50c575fe365610150c9 Mon Sep 17 00:00:00 2001 From: Richard West Date: Tue, 30 Jul 2024 11:57:11 -0400 Subject: [PATCH 06/22] Clean up files after testing drawings. --- test/rmgpy/molecule/drawTest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/rmgpy/molecule/drawTest.py b/test/rmgpy/molecule/drawTest.py index e1ecc8f200..3bfa734cc8 100644 --- a/test/rmgpy/molecule/drawTest.py +++ b/test/rmgpy/molecule/drawTest.py @@ -266,5 +266,5 @@ def test_draw_bidentate_adsorbate(self): self.drawer.clear() surface, _cr, (_xoff, _yoff, width, height) = self.drawer.draw(molecule, file_format="png", target=path) assert os.path.exists(path), "File doesn't exist" - # os.unlink(path) + os.unlink(path) assert isinstance(surface, ImageSurface) \ No newline at end of file From 46ad07f554a6c15ac87014c1001b4eab23fdc742 Mon Sep 17 00:00:00 2001 From: Richard West Date: Wed, 31 Jul 2024 19:29:07 -0400 Subject: [PATCH 07/22] Small optimization. Don't make a list if the any() can break early from a generator. --- rmgpy/molecule/draw.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rmgpy/molecule/draw.py b/rmgpy/molecule/draw.py index 8073c12806..e7d5f0901b 100644 --- a/rmgpy/molecule/draw.py +++ b/rmgpy/molecule/draw.py @@ -867,7 +867,7 @@ def _generate_functional_group_coordinates(self, atom0, atom1): # Check to see if atom1 is in any cycles in the molecule ring_system = None for ring_sys in self.ringSystems: - if any([atom1 in ring for ring in ring_sys]): + if any(atom1 in ring for ring in ring_sys): ring_system = ring_sys if ring_system is not None: From 852e55d18ff25867b33fca324b90735d70d43dec Mon Sep 17 00:00:00 2001 From: Richard West Date: Wed, 31 Jul 2024 19:30:19 -0400 Subject: [PATCH 08/22] Don't bond together sites that are attached to the main ring system. This is an attempt to address complications with drawing fused rings, when you join together two surface sites that are adsorbing things in a ring. --- rmgpy/molecule/draw.py | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/rmgpy/molecule/draw.py b/rmgpy/molecule/draw.py index e7d5f0901b..e4584336b5 100644 --- a/rmgpy/molecule/draw.py +++ b/rmgpy/molecule/draw.py @@ -1643,17 +1643,24 @@ def _connect_surface_sites(self): This makes bidentate adsorbates look better. Not well tested for things with n>2 surface sites. """ - for atom1 in self.molecule.atoms: - if atom1.is_surface_site(): - other_sites = [a for a in self.molecule.atoms if a.is_surface_site() and a != atom1] - if not other_sites: break - nearest_sites = sorted(other_sites, key=lambda a: len(find_shortest_path(atom1, a))) - atom2 = nearest_sites[0] - bond = atom1.bonds.get(atom2) - if bond is None: - bond = Bond(atom1, atom2, 1) - atom1.bonds[atom2] = bond - atom2.bonds[atom1] = bond + + ring_backbone = self._find_cyclic_backbone()[0] if self.cycles else [] + if ring_backbone: + print('Ring backbone found: {0}'.format(ring_backbone)) + sites = [a for a in self.molecule.atoms if a.is_surface_site()] + for atom1 in sites: + if atom1.bonds and next(iter(atom1.bonds)) in ring_backbone: + print(f"Atom {atom1} is attached to an atom in the ring backbone, so don't connect it.") + continue + other_sites = [a for a in sites if a != atom1] + if not other_sites: break + nearest_sites = sorted(other_sites, key=lambda a: len(find_shortest_path(atom1, a))) + atom2 = nearest_sites[0] + bond = atom1.bonds.get(atom2) + if bond is None: + bond = Bond(atom1, atom2, 1) + atom1.bonds[atom2] = bond + atom2.bonds[atom1] = bond def _disconnect_surface_sites(self): """ From cb948036b7098b6d3c0b07c79038286a07105d6d Mon Sep 17 00:00:00 2001 From: Richard West Date: Wed, 31 Jul 2024 19:33:43 -0400 Subject: [PATCH 09/22] If you have more than 3 sites, rotate molecule and draw sites vertically below. --- rmgpy/molecule/draw.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/rmgpy/molecule/draw.py b/rmgpy/molecule/draw.py index e4584336b5..d5242f31e0 100644 --- a/rmgpy/molecule/draw.py +++ b/rmgpy/molecule/draw.py @@ -490,6 +490,33 @@ def _generate_coordinates(self): # if sites are above the middle, flip it if coordinates[atoms.index(sites[0]), 1] > coordinates[:, 1].mean(): coordinates[:, 1] *= -1 + elif len(sites) > 2: + # find atoms bonded to sites + bonded = [] + for site in sites: + for atom in site.bonds: + if atom not in sites: + bonded.append(atom) + # find the best fit line through the bonded atoms + x = coordinates[[atoms.index(atom) for atom in bonded], 0] + y = coordinates[[atoms.index(atom) for atom in bonded], 1] + A = np.vstack([x, np.ones(len(x))]).T + m, c = np.linalg.lstsq(A, y, rcond=None)[0] + # rotate so the line is horizontal + angle = -math.atan(m) + rot = np.array([[math.cos(angle), math.sin(angle)], [-math.sin(angle), math.cos(angle)]], float) + self.coordinates = coordinates = np.dot(coordinates, rot) + # if the line is above the middle, flip it + if c > coordinates[[atoms.index(a) for a in atoms if not a.is_surface_site()], 1].mean(): + coordinates[:, 1] *= -1 + x = coordinates[[atoms.index(atom) for atom in bonded], 0] + y = coordinates[[atoms.index(atom) for atom in bonded], 1] + site_y_pos = min(y) - 0.8 + for site, x_pos in zip(sites, x): + index = atoms.index(site) + coordinates[index, 1] = site_y_pos + coordinates[index, 0] = x_pos + def _find_cyclic_backbone(self): """ From d30b496008d6fa89118c1d966c3b37f958cb00da Mon Sep 17 00:00:00 2001 From: Richard West Date: Wed, 31 Jul 2024 19:49:03 -0400 Subject: [PATCH 10/22] Use the new rotating method for all polydentates. --- rmgpy/molecule/draw.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/rmgpy/molecule/draw.py b/rmgpy/molecule/draw.py index d5242f31e0..992cc1e60e 100644 --- a/rmgpy/molecule/draw.py +++ b/rmgpy/molecule/draw.py @@ -481,16 +481,7 @@ def _generate_coordinates(self): coordinates[index, 1] = min(coordinates[:, 1]) - 0.8 # just move the site down a bit coordinates[index, 0] = coordinates[:, 0].mean() # and center it sites = [atom for atom in self.molecule.atoms if atom.is_surface_site()] - if len(sites) == 2: - # rotate so the sites are horizontal - vector0 = coordinates[atoms.index(sites[1]), :] - coordinates[atoms.index(sites[0]), :] - angle = math.atan2(vector0[0], vector0[1]) - math.pi / 2 - rot = np.array([[math.cos(angle), math.sin(angle)], [-math.sin(angle), math.cos(angle)]], float) - self.coordinates = coordinates = np.dot(coordinates, rot) - # if sites are above the middle, flip it - if coordinates[atoms.index(sites[0]), 1] > coordinates[:, 1].mean(): - coordinates[:, 1] *= -1 - elif len(sites) > 2: + if len(sites) >= 2: # find atoms bonded to sites bonded = [] for site in sites: From d2f9ab60d93cda5d1d26133b0161c12a444148f0 Mon Sep 17 00:00:00 2001 From: Richard West Date: Thu, 1 Aug 2024 11:39:15 -0400 Subject: [PATCH 11/22] Simplify drawing of ring-adsorbates The whole procedure of checking if an adatom is part of a ring was not really helping the graphics, and adding complication, so I removed it. --- rmgpy/molecule/draw.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/rmgpy/molecule/draw.py b/rmgpy/molecule/draw.py index 992cc1e60e..f3c8e5228d 100644 --- a/rmgpy/molecule/draw.py +++ b/rmgpy/molecule/draw.py @@ -1661,19 +1661,11 @@ def _connect_surface_sites(self): This makes bidentate adsorbates look better. Not well tested for things with n>2 surface sites. """ - - ring_backbone = self._find_cyclic_backbone()[0] if self.cycles else [] - if ring_backbone: - print('Ring backbone found: {0}'.format(ring_backbone)) sites = [a for a in self.molecule.atoms if a.is_surface_site()] for atom1 in sites: - if atom1.bonds and next(iter(atom1.bonds)) in ring_backbone: - print(f"Atom {atom1} is attached to an atom in the ring backbone, so don't connect it.") - continue other_sites = [a for a in sites if a != atom1] if not other_sites: break - nearest_sites = sorted(other_sites, key=lambda a: len(find_shortest_path(atom1, a))) - atom2 = nearest_sites[0] + atom2 = min(other_sites, key=lambda a: len(find_shortest_path(atom1, a))) bond = atom1.bonds.get(atom2) if bond is None: bond = Bond(atom1, atom2, 1) From 6c7f39e804367fe22aae457fec7b0d93e10e77d5 Mon Sep 17 00:00:00 2001 From: Richard West Date: Thu, 1 Aug 2024 11:40:08 -0400 Subject: [PATCH 12/22] Lower the sites a bit for some molecules. If the non-adatom part of the molecule pushed below the level of the sites, it looked bad. Now the sites are moved down a bit. --- rmgpy/molecule/draw.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rmgpy/molecule/draw.py b/rmgpy/molecule/draw.py index f3c8e5228d..a3893f2c6f 100644 --- a/rmgpy/molecule/draw.py +++ b/rmgpy/molecule/draw.py @@ -498,11 +498,12 @@ def _generate_coordinates(self): rot = np.array([[math.cos(angle), math.sin(angle)], [-math.sin(angle), math.cos(angle)]], float) self.coordinates = coordinates = np.dot(coordinates, rot) # if the line is above the middle, flip it - if c > coordinates[[atoms.index(a) for a in atoms if not a.is_surface_site()], 1].mean(): + not_site_indices = [atoms.index(a) for a in atoms if not a.is_surface_site()] + if c > coordinates[not_site_indices, 1].mean(): coordinates[:, 1] *= -1 x = coordinates[[atoms.index(atom) for atom in bonded], 0] y = coordinates[[atoms.index(atom) for atom in bonded], 1] - site_y_pos = min(y) - 0.8 + site_y_pos = min(min(y) - 0.8, min(coordinates[not_site_indices, 1]) - 0.5) for site, x_pos in zip(sites, x): index = atoms.index(site) coordinates[index, 1] = site_y_pos From 4452f1f78e4181af1d1efe95d7eb953ca996a79d Mon Sep 17 00:00:00 2001 From: Richard West Date: Thu, 1 Aug 2024 11:59:46 -0400 Subject: [PATCH 13/22] Small optimization --- rmgpy/molecule/draw.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/rmgpy/molecule/draw.py b/rmgpy/molecule/draw.py index a3893f2c6f..7a02f850bb 100644 --- a/rmgpy/molecule/draw.py +++ b/rmgpy/molecule/draw.py @@ -483,14 +483,11 @@ def _generate_coordinates(self): sites = [atom for atom in self.molecule.atoms if atom.is_surface_site()] if len(sites) >= 2: # find atoms bonded to sites - bonded = [] - for site in sites: - for atom in site.bonds: - if atom not in sites: - bonded.append(atom) + bonded = [next(iter(site.bonds)) for site in sites] + bonded_indices = [atoms.index(atom) for atom in bonded] # find the best fit line through the bonded atoms - x = coordinates[[atoms.index(atom) for atom in bonded], 0] - y = coordinates[[atoms.index(atom) for atom in bonded], 1] + x = coordinates[bonded_indices, 0] + y = coordinates[bonded_indices, 1] A = np.vstack([x, np.ones(len(x))]).T m, c = np.linalg.lstsq(A, y, rcond=None)[0] # rotate so the line is horizontal @@ -501,8 +498,8 @@ def _generate_coordinates(self): not_site_indices = [atoms.index(a) for a in atoms if not a.is_surface_site()] if c > coordinates[not_site_indices, 1].mean(): coordinates[:, 1] *= -1 - x = coordinates[[atoms.index(atom) for atom in bonded], 0] - y = coordinates[[atoms.index(atom) for atom in bonded], 1] + x = coordinates[bonded_indices, 0] + y = coordinates[bonded_indices, 1] site_y_pos = min(min(y) - 0.8, min(coordinates[not_site_indices, 1]) - 0.5) for site, x_pos in zip(sites, x): index = atoms.index(site) From 813da5d4d25e987e7974d290a6cd57dadfa880ed Mon Sep 17 00:00:00 2001 From: Richard West Date: Thu, 1 Aug 2024 12:28:46 -0400 Subject: [PATCH 14/22] Tidying up the new coordinate code a bit. Reduce redundancy. --- rmgpy/molecule/draw.py | 42 ++++++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/rmgpy/molecule/draw.py b/rmgpy/molecule/draw.py index 7a02f850bb..adf31901b5 100644 --- a/rmgpy/molecule/draw.py +++ b/rmgpy/molecule/draw.py @@ -460,28 +460,27 @@ def _generate_coordinates(self): coordinates[:, 0] = temp[:, 1] coordinates[:, 1] = temp[:, 0] - # For surface species, rotate them so the site is at the bottom. + # For surface species if self.molecule.contains_surface_site(): if len(self.molecule.atoms) == 1: return coordinates - for site in self.molecule.atoms: - if site.is_surface_site(): - break - else: - raise Exception("Can't find surface site") - if site.bonds: - adsorbate = next(iter(site.bonds)) - vector0 = coordinates[atoms.index(site), :] - coordinates[atoms.index(adsorbate), :] - angle = math.atan2(vector0[0], vector0[1]) - math.pi - rot = np.array([[math.cos(angle), math.sin(angle)], [-math.sin(angle), math.cos(angle)]], float) - self.coordinates = coordinates = np.dot(coordinates, rot) - else: - # van der waals - index = atoms.index(site) - coordinates[index, 1] = min(coordinates[:, 1]) - 0.8 # just move the site down a bit - coordinates[index, 0] = coordinates[:, 0].mean() # and center it sites = [atom for atom in self.molecule.atoms if atom.is_surface_site()] - if len(sites) >= 2: + if len(sites) == 1: + # rotate them so the site is at the bottom. + site = sites[0] + if site.bonds: + adsorbate = next(iter(site.bonds)) + vector0 = coordinates[atoms.index(site), :] - coordinates[atoms.index(adsorbate), :] + angle = math.atan2(vector0[0], vector0[1]) - math.pi + rot = np.array([[math.cos(angle), math.sin(angle)], [-math.sin(angle), math.cos(angle)]], float) + self.coordinates = coordinates = np.dot(coordinates, rot) + else: + # van der waals + index = atoms.index(site) + coordinates[index, 1] = min(coordinates[:, 1]) - 0.8 # just move the site down a bit + coordinates[index, 0] = coordinates[:, 0].mean() # and center it + else: # len(sites) >= 2: + # Rotate so the line of best fit through the adatoms is horizontal. # find atoms bonded to sites bonded = [next(iter(site.bonds)) for site in sites] bonded_indices = [atoms.index(atom) for atom in bonded] @@ -496,7 +495,7 @@ def _generate_coordinates(self): self.coordinates = coordinates = np.dot(coordinates, rot) # if the line is above the middle, flip it not_site_indices = [atoms.index(a) for a in atoms if not a.is_surface_site()] - if c > coordinates[not_site_indices, 1].mean(): + if coordinates[bonded_indices, 1].mean() > coordinates[not_site_indices, 1].mean(): coordinates[:, 1] *= -1 x = coordinates[bonded_indices, 0] y = coordinates[bonded_indices, 1] @@ -505,7 +504,6 @@ def _generate_coordinates(self): index = atoms.index(site) coordinates[index, 1] = site_y_pos coordinates[index, 0] = x_pos - def _find_cyclic_backbone(self): """ @@ -1656,13 +1654,13 @@ def _replace_bonds(self, bond_order_dictionary): def _connect_surface_sites(self): """ Creates single bonds between atoms that are surface sites. - This makes bidentate adsorbates look better. - Not well tested for things with n>2 surface sites. + This is to help make multidentate adsorbates look better. """ sites = [a for a in self.molecule.atoms if a.is_surface_site()] for atom1 in sites: other_sites = [a for a in sites if a != atom1] if not other_sites: break + # connect to the nearest site atom2 = min(other_sites, key=lambda a: len(find_shortest_path(atom1, a))) bond = atom1.bonds.get(atom2) if bond is None: From af72ade2dfa1489c32c49bebffe3eb821f56ec0b Mon Sep 17 00:00:00 2001 From: Richard West Date: Thu, 1 Aug 2024 14:51:52 -0400 Subject: [PATCH 15/22] Don't mess with drawing surface sites if there are more than 4. Things just look bad when you try drawing 5 or more sites all in a row at the bottom. You're better off imagining you've got a birds'-eye view not a side view. --- rmgpy/molecule/draw.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/rmgpy/molecule/draw.py b/rmgpy/molecule/draw.py index adf31901b5..875f3082f8 100644 --- a/rmgpy/molecule/draw.py +++ b/rmgpy/molecule/draw.py @@ -208,9 +208,12 @@ def draw(self, molecule, file_format, target=None): # replace the bonds after generating coordinates. This avoids # bugs with RDKit old_bond_dictionary = self._make_single_bonds() - self._connect_surface_sites() - self._generate_coordinates() - self._disconnect_surface_sites() + if molecule.contains_surface_site(): + self._connect_surface_sites() + self._generate_coordinates() + self._disconnect_surface_sites() + else: + self._generate_coordinates() self._replace_bonds(old_bond_dictionary) # Generate labels to use @@ -479,7 +482,7 @@ def _generate_coordinates(self): index = atoms.index(site) coordinates[index, 1] = min(coordinates[:, 1]) - 0.8 # just move the site down a bit coordinates[index, 0] = coordinates[:, 0].mean() # and center it - else: # len(sites) >= 2: + elif len(sites) <= 4: # Rotate so the line of best fit through the adatoms is horizontal. # find atoms bonded to sites bonded = [next(iter(site.bonds)) for site in sites] @@ -504,6 +507,9 @@ def _generate_coordinates(self): index = atoms.index(site) coordinates[index, 1] = site_y_pos coordinates[index, 0] = x_pos + else: + # more than 4 surface sites? leave them alone + pass def _find_cyclic_backbone(self): """ @@ -1657,6 +1663,8 @@ def _connect_surface_sites(self): This is to help make multidentate adsorbates look better. """ sites = [a for a in self.molecule.atoms if a.is_surface_site()] + if len(sites) > 4: + return for atom1 in sites: other_sites = [a for a in sites if a != atom1] if not other_sites: break From 1eae699862cadb96816a6d4de3198480671610b7 Mon Sep 17 00:00:00 2001 From: Richard West Date: Thu, 1 Aug 2024 15:14:09 -0400 Subject: [PATCH 16/22] Test: Draw a crazy big polydentate adsorbate --- test/rmgpy/molecule/drawTest.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/test/rmgpy/molecule/drawTest.py b/test/rmgpy/molecule/drawTest.py index 3bfa734cc8..6e314fd545 100644 --- a/test/rmgpy/molecule/drawTest.py +++ b/test/rmgpy/molecule/drawTest.py @@ -258,7 +258,35 @@ def test_draw_bidentate_adsorbate(self): 13 H u0 p0 c0 {7,S} 14 H u0 p0 c0 {7,S} """)) - + test_molecules.append(Molecule().from_adjacency_list( +""" +1 O u0 p2 c0 {4,S} {5,S} +2 O u0 p2 c0 {5,S} {6,S} +3 O u0 p2 c0 {6,S} {26,S} +4 C u0 p0 c0 {1,S} {7,S} {8,S} {19,S} +5 C u0 p0 c0 {1,S} {2,S} {10,S} {21,S} +6 C u0 p0 c0 {2,S} {3,S} {9,S} {20,S} +7 C u0 p0 c0 {4,S} {15,S} {16,S} {24,S} +8 C u0 p0 c0 {4,S} {17,S} {18,S} {25,S} +9 C u0 p0 c0 {6,S} {11,S} {12,S} {22,S} +10 C u0 p0 c0 {5,S} {13,S} {14,S} {23,S} +11 H u0 p0 c0 {9,S} +12 H u0 p0 c0 {9,S} +13 H u0 p0 c0 {10,S} +14 H u0 p0 c0 {10,S} +15 H u0 p0 c0 {7,S} +16 H u0 p0 c0 {7,S} +17 H u0 p0 c0 {8,S} +18 H u0 p0 c0 {8,S} +19 X u0 p0 c0 {4,S} +20 X u0 p0 c0 {6,S} +21 X u0 p0 c0 {5,S} +22 X u0 p0 c0 {9,S} +23 X u0 p0 c0 {10,S} +24 X u0 p0 c0 {7,S} +25 X u0 p0 c0 {8,S} +26 X u0 p0 c0 {3,S} +""")) for number, molecule in enumerate(test_molecules, 1): path = f"test_polydentate_{number}.png" if os.path.exists(path): From 6b1e3f35525c51e562dd3b677ae511f3e29b16d1 Mon Sep 17 00:00:00 2001 From: Richard West Date: Thu, 1 Aug 2024 20:26:45 -0400 Subject: [PATCH 17/22] Changing terminology. adatoms An adatom is an atom that is adsorbed to the surface. --- rmgpy/molecule/draw.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/rmgpy/molecule/draw.py b/rmgpy/molecule/draw.py index 875f3082f8..1d0ec996a4 100644 --- a/rmgpy/molecule/draw.py +++ b/rmgpy/molecule/draw.py @@ -472,24 +472,24 @@ def _generate_coordinates(self): # rotate them so the site is at the bottom. site = sites[0] if site.bonds: - adsorbate = next(iter(site.bonds)) - vector0 = coordinates[atoms.index(site), :] - coordinates[atoms.index(adsorbate), :] + adatom = next(iter(site.bonds)) + vector0 = coordinates[atoms.index(site), :] - coordinates[atoms.index(adatom), :] angle = math.atan2(vector0[0], vector0[1]) - math.pi rot = np.array([[math.cos(angle), math.sin(angle)], [-math.sin(angle), math.cos(angle)]], float) self.coordinates = coordinates = np.dot(coordinates, rot) else: - # van der waals + # van der Waals index = atoms.index(site) coordinates[index, 1] = min(coordinates[:, 1]) - 0.8 # just move the site down a bit coordinates[index, 0] = coordinates[:, 0].mean() # and center it elif len(sites) <= 4: # Rotate so the line of best fit through the adatoms is horizontal. # find atoms bonded to sites - bonded = [next(iter(site.bonds)) for site in sites] - bonded_indices = [atoms.index(atom) for atom in bonded] + adatoms = [next(iter(site.bonds)) for site in sites] + adatom_indices = [atoms.index(a) for a in adatoms] # find the best fit line through the bonded atoms - x = coordinates[bonded_indices, 0] - y = coordinates[bonded_indices, 1] + x = coordinates[adatom_indices, 0] + y = coordinates[adatom_indices, 1] A = np.vstack([x, np.ones(len(x))]).T m, c = np.linalg.lstsq(A, y, rcond=None)[0] # rotate so the line is horizontal @@ -498,10 +498,10 @@ def _generate_coordinates(self): self.coordinates = coordinates = np.dot(coordinates, rot) # if the line is above the middle, flip it not_site_indices = [atoms.index(a) for a in atoms if not a.is_surface_site()] - if coordinates[bonded_indices, 1].mean() > coordinates[not_site_indices, 1].mean(): + if coordinates[adatom_indices, 1].mean() > coordinates[not_site_indices, 1].mean(): coordinates[:, 1] *= -1 - x = coordinates[bonded_indices, 0] - y = coordinates[bonded_indices, 1] + x = coordinates[adatom_indices, 0] + y = coordinates[adatom_indices, 1] site_y_pos = min(min(y) - 0.8, min(coordinates[not_site_indices, 1]) - 0.5) for site, x_pos in zip(sites, x): index = atoms.index(site) From 9a95820330a3851b39b9ee60bf9143673da9919f Mon Sep 17 00:00:00 2001 From: Richard West Date: Thu, 1 Aug 2024 20:27:18 -0400 Subject: [PATCH 18/22] Change terminology in _[dis]connect_surface_sites --- rmgpy/molecule/draw.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/rmgpy/molecule/draw.py b/rmgpy/molecule/draw.py index 1d0ec996a4..dc10e58253 100644 --- a/rmgpy/molecule/draw.py +++ b/rmgpy/molecule/draw.py @@ -1665,27 +1665,27 @@ def _connect_surface_sites(self): sites = [a for a in self.molecule.atoms if a.is_surface_site()] if len(sites) > 4: return - for atom1 in sites: - other_sites = [a for a in sites if a != atom1] + for site1 in sites: + other_sites = [a for a in sites if a != site1] if not other_sites: break # connect to the nearest site - atom2 = min(other_sites, key=lambda a: len(find_shortest_path(atom1, a))) - bond = atom1.bonds.get(atom2) + site2 = min(other_sites, key=lambda a: len(find_shortest_path(site1, a))) + bond = site1.bonds.get(site2) if bond is None: - bond = Bond(atom1, atom2, 1) - atom1.bonds[atom2] = bond - atom2.bonds[atom1] = bond + bond = Bond(site1, site2, 1) + site1.bonds[site2] = bond + site2.bonds[site1] = bond def _disconnect_surface_sites(self): """ Removes all bonds between atoms that are surface sites. """ - for atom1 in self.molecule.atoms: - if atom1.is_surface_site(): - for atom2 in list(atom1.bonds.keys()): # make a list copy so we can delete from the dict - if atom2.is_surface_site(): - del atom1.bonds[atom2] - del atom2.bonds[atom1] + for site1 in self.molecule.atoms: + if site1.is_surface_site(): + for site2 in list(site1.bonds.keys()): # make a list copy so we can delete from the dict + if site2.is_surface_site(): + del site1.bonds[site2] + del site2.bonds[site1] ################################################################################ From dfcfa062e13155228a76ce9dc63ef8fbcdfd68ec Mon Sep 17 00:00:00 2001 From: Richard West Date: Wed, 7 Aug 2024 10:11:37 -0400 Subject: [PATCH 19/22] If more than 3 surface sites, don't connect sites that aren't neighbors. Otherwise *CC(*)(C*)C* is misleading May undo this commit. --- rmgpy/molecule/draw.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/rmgpy/molecule/draw.py b/rmgpy/molecule/draw.py index dc10e58253..1e2656c12a 100644 --- a/rmgpy/molecule/draw.py +++ b/rmgpy/molecule/draw.py @@ -1670,6 +1670,10 @@ def _connect_surface_sites(self): if not other_sites: break # connect to the nearest site site2 = min(other_sites, key=lambda a: len(find_shortest_path(site1, a))) + if len(find_shortest_path(site1, site2)) > 2 and len(sites) > 3: + # if there are more than 3 sites, don't connect sites that aren't neighbors + continue + bond = site1.bonds.get(site2) if bond is None: bond = Bond(site1, site2, 1) From b9c7cfce637972c2dbd512be6245c5e772f56c1d Mon Sep 17 00:00:00 2001 From: Richard West Date: Wed, 7 Aug 2024 17:07:57 -0400 Subject: [PATCH 20/22] If adsorbate site drawing causes problems, do it without. If two surface sites overlap, or if a site-adatom bond is too long, then give up on the fancy algorithm, and just let RDkit place the X atoms wherever, without them being bonded. --- rmgpy/molecule/draw.py | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/rmgpy/molecule/draw.py b/rmgpy/molecule/draw.py index 1e2656c12a..cc74aa56c6 100644 --- a/rmgpy/molecule/draw.py +++ b/rmgpy/molecule/draw.py @@ -49,6 +49,7 @@ import math import os.path import re +import itertools try: import cairocffi as cairo @@ -97,6 +98,12 @@ def create_new_surface(file_format, target=None, width=1024, height=768): ################################################################################ +class AdsorbateDrawingError(Exception): + """ + When something goes wrong trying to draw an adsorbate. + """ + pass + class MoleculeDrawer(object): """ This class provides functionality for drawing the skeletal formula of @@ -209,9 +216,13 @@ def draw(self, molecule, file_format, target=None): # bugs with RDKit old_bond_dictionary = self._make_single_bonds() if molecule.contains_surface_site(): - self._connect_surface_sites() - self._generate_coordinates() - self._disconnect_surface_sites() + try: + self._connect_surface_sites() + self._generate_coordinates() + self._disconnect_surface_sites() + except AdsorbateDrawingError as e: + self._disconnect_surface_sites() + self._generate_coordinates(fix_surface_sites=False) else: self._generate_coordinates() self._replace_bonds(old_bond_dictionary) @@ -329,11 +340,13 @@ def _find_ring_groups(self): if not found: self.ringSystems.append([cycle]) - def _generate_coordinates(self): + def _generate_coordinates(self, fix_surface_sites=True): """ Generate the 2D coordinates to be used when drawing the current molecule. The function uses rdKits 2D coordinate generation. Updates the self.coordinates Array in place. + If `fix_surface_sites` is True, then the surface sites are placed + at the bottom of the molecule. """ atoms = self.molecule.atoms natoms = len(atoms) @@ -464,7 +477,7 @@ def _generate_coordinates(self): coordinates[:, 1] = temp[:, 0] # For surface species - if self.molecule.contains_surface_site(): + if fix_surface_sites and self.molecule.contains_surface_site(): if len(self.molecule.atoms) == 1: return coordinates sites = [atom for atom in self.molecule.atoms if atom.is_surface_site()] @@ -503,10 +516,16 @@ def _generate_coordinates(self): x = coordinates[adatom_indices, 0] y = coordinates[adatom_indices, 1] site_y_pos = min(min(y) - 0.8, min(coordinates[not_site_indices, 1]) - 0.5) + if max(y) - site_y_pos > 1.5: + raise AdsorbateDrawingError("Adsorbate bond too long") + for x1, x2 in itertools.combinations(x, 2): + if abs(x1 - x2) < 0.2: + raise AdsorbateDrawingError("Sites overlapping") for site, x_pos in zip(sites, x): index = atoms.index(site) coordinates[index, 1] = site_y_pos coordinates[index, 0] = x_pos + else: # more than 4 surface sites? leave them alone pass From 89990aded8fe3827b5a6b9338832ef8de1235ff6 Mon Sep 17 00:00:00 2001 From: Richard West Date: Wed, 7 Aug 2024 17:08:24 -0400 Subject: [PATCH 21/22] Remove some now-redundant imports. Previous commit put the import at the top of the file. --- rmgpy/molecule/draw.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/rmgpy/molecule/draw.py b/rmgpy/molecule/draw.py index cc74aa56c6..3792772af3 100644 --- a/rmgpy/molecule/draw.py +++ b/rmgpy/molecule/draw.py @@ -409,7 +409,6 @@ def _generate_coordinates(self, fix_surface_sites=True): # If two atoms lie on top of each other, push them apart a bit # This is ugly, but at least the mess you end up with isn't as misleading # as leaving everything piled on top of each other at the origin - import itertools for atom1, atom2 in itertools.combinations(backbone, 2): i1, i2 = atoms.index(atom1), atoms.index(atom2) if np.linalg.norm(coordinates[i1, :] - coordinates[i2, :]) < 0.5: @@ -421,7 +420,6 @@ def _generate_coordinates(self, fix_surface_sites=True): # If two atoms lie on top of each other, push them apart a bit # This is ugly, but at least the mess you end up with isn't as misleading # as leaving everything piled on top of each other at the origin - import itertools for atom1, atom2 in itertools.combinations(backbone, 2): i1, i2 = atoms.index(atom1), atoms.index(atom2) if np.linalg.norm(coordinates[i1, :] - coordinates[i2, :]) < 0.5: From fd5bdfa78c91afadcf20c3eb9d152804e8030845 Mon Sep 17 00:00:00 2001 From: Richard West Date: Wed, 7 Aug 2024 17:09:47 -0400 Subject: [PATCH 22/22] Two more test molecules for adsorbate drawing. --- test/rmgpy/molecule/drawTest.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/rmgpy/molecule/drawTest.py b/test/rmgpy/molecule/drawTest.py index 6e314fd545..6c416f171d 100644 --- a/test/rmgpy/molecule/drawTest.py +++ b/test/rmgpy/molecule/drawTest.py @@ -287,6 +287,8 @@ def test_draw_bidentate_adsorbate(self): 25 X u0 p0 c0 {8,S} 26 X u0 p0 c0 {3,S} """)) + test_molecules.append(Molecule(smiles="*CC(*)(C*)OCC#*")) + test_molecules.append(Molecule(smiles="*CC(*)(C*)C*")) for number, molecule in enumerate(test_molecules, 1): path = f"test_polydentate_{number}.png" if os.path.exists(path):