diff --git a/doc/source/tutorials/images/1choFH_mapping_pymol.png b/doc/source/tutorials/images/1choFH_mapping_pymol.png new file mode 100644 index 00000000..f7b8a54d Binary files /dev/null and b/doc/source/tutorials/images/1choFH_mapping_pymol.png differ diff --git a/doc/source/tutorials/images/custom_roi_no_map.png b/doc/source/tutorials/images/custom_roi_no_map.png new file mode 100644 index 00000000..6f5339d7 Binary files /dev/null and b/doc/source/tutorials/images/custom_roi_no_map.png differ diff --git a/doc/source/tutorials/images/custom_roi_ring_break_map.png b/doc/source/tutorials/images/custom_roi_ring_break_map.png new file mode 100644 index 00000000..8f5a5fa4 Binary files /dev/null and b/doc/source/tutorials/images/custom_roi_ring_break_map.png differ diff --git a/doc/source/tutorials/protein_mutations.rst b/doc/source/tutorials/protein_mutations.rst index 28af5b8c..dd066f1e 100644 --- a/doc/source/tutorials/protein_mutations.rst +++ b/doc/source/tutorials/protein_mutations.rst @@ -198,7 +198,7 @@ figure out the residue index of our residue of interest (ROI): We can see that the residue with the index value of 8 are different between the two proteins. Let’s pass this value to the -```BioSimSpace.Align.matchAtoms`` `__ +`BioSimSpace.Align.matchAtoms `__ function: .. code:: ipython3 @@ -398,3 +398,169 @@ simulations `__. + +.. code:: ipython3 + + wt = BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), f"1choFH_apo_wt_flare_processed.pdb") + )[0] + mut = BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), f"1choFH_apo_mut_flare_processed.pdb") + )[0] + + wt = BSS.Parameters.ff14SB(wt, ensure_compatible=False).getMolecule() + mut = BSS.Parameters.ff14SB(mut, ensure_compatible=False).getMolecule() + + + +Comparing the residues between two proteins shows us that the residues +at index 15 are different between the proteins + +.. code:: ipython3 + + roi = [] + for i, res in enumerate(wt.getResidues()): + if res.name() != mut.getResidues()[i].name(): + print(res, mut.getResidues()[i]) + roi.append(res.index()) + + +.. parsed-literal:: + + + + +By default, the +`BioSimSpace.Align.matchAtoms `__ +would fail to create a mapping for the ROI region, as the underlying +RDKit MCS algorithm would be unable to determine a mapping between two +molecular graphs with a fundamental topological mismatch. Because +Proline’s sidechain forms a cyclic system with the backbone, its atoms +exist in a ring topology. The algorithm restricts the mapping of cyclic +atoms to acyclic atoms (a behavior governed by parameters like +``ringMatchesRingOnly``) to preserve the chemical integrity of the +substructure. Consequently, a 1:1 mapping between the ring-bound +sidechain of Proline and the acyclic sidechain of the Leucine residue +cannot be determined. + +Instead we can use the ``custom_roi_map`` argument of the +`BioSimSpace.Align.matchAtoms `__ +to override the RDKit MCS mapping. For example we can force an empty +mapping between the two residues: + +.. code:: ipython3 + + mapping = BSS.Align.matchAtoms(molecule0=wt, molecule1=mut, roi=[15], custom_roi_map={}) + +.. code:: ipython3 + + BSS.Align.viewMapping(wt, mut, mapping, roi=15, pixels=500) + + + +.. image:: images/custom_roi_no_map.png + :width: 800 + + +If we know the correct 1:1 atom mapping between the two residues, we can +pass that to the ``custom_roi_map`` which will allows us to setup an +alchemical bond transformation for mutating the leucine residue to +proline. Note that **absolute atom indices need to be passed, i.e the +indices of the residues in the context of the whole protein**. We can +use something like PyMol to help us map the atoms in the right order: + +.. image:: images/1choFH_mapping_pymol.png + :width: 800 + +.. code:: ipython3 + + mapping = BSS.Align.matchAtoms(molecule0=wt, molecule1=mut, roi=[15], custom_roi_map={204:204,205:205,203:203,202:202,211:208,206:206,213:210,207:207,214:211,210:213}) + +.. code:: ipython3 + + BSS.Align.viewMapping(wt, mut, mapping, roi=15, pixels=500) + + + +.. image:: images/custom_roi_ring_break_map.png + :width: 800 + + +We can then use ``allow_ring_breaking=True`` argument of the +`BioSimSpace.Align.merge `__ +to create the required alchemical transformation: + +.. code:: ipython3 + + aligned_wt = BSS.Align.rmsdAlign(molecule0=wt, mapping=mapping, molecule1=mut) + merged_protein = BSS.Align.merge(aligned_wt, mut, mapping, allow_ring_breaking=True, roi=[15]) + +But how do we actually know that the merge has built a perturbable +molecule that now has a bond annihilation or creation involved? We can +use Sire’s conversion features to check what kinds of alchemical +modifications are happening in our perturbable molecule. You can check +out the corresponding `Sire +tutorial `__ +for more details. + +.. code:: ipython3 + + merged_protein_sire = merged_protein._sire_object + pert = merged_protein_sire.perturbation() + pert_omm = pert.to_openmm(map={"coordinates":"coordinates0"}) + + pert_omm.changed_bonds(to_pandas=True) + + + + + +.. raw:: html + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
bondlength0length1k0k1
0N:203-H:2110.10100.1449363171.2282001.6
1CG:208-H:2110.15260.15260.0259408.0
+
+ + + +By comparing the ``k0`` and ``k1`` values in the changed bond dataframe, +we can see that the transformation is going to result in a bond being +created. diff --git a/src/BioSimSpace/Align/_align.py b/src/BioSimSpace/Align/_align.py index 1e551c8c..2602bb5c 100644 --- a/src/BioSimSpace/Align/_align.py +++ b/src/BioSimSpace/Align/_align.py @@ -713,6 +713,7 @@ def matchAtoms( complete_rings_only=True, max_scoring_matches=1000, roi=None, + custom_roi_map=None, prune_perturbed_constraints=False, prune_crossing_constraints=False, prune_atom_types=False, @@ -778,6 +779,12 @@ def matchAtoms( The region of interest to match. Consists of a list of ROI residue indices. + custom_roi_map : dict + A dictionary that maps atom indices in molecule0 to those in + molecule1 within the region of interest. This allows the user + override the MCS mapping within the region of interest. + Only relevant when 'roi' is not None. + prune_perturbed_constraints : bool Whether to remove hydrogen atoms that are perturbed to heavy atoms from the mapping. This option should be True when creating mappings @@ -851,6 +858,11 @@ def matchAtoms( >>> import BioSimSpace as BSS >>> mapping = BSS.Align.matchAtoms(molecule0, molecule1, roi=[12, 13, 14]) + + Find the mapping between two molecules with a custom region of interest mapping. + + >>> import BioSimSpace as BSS + >>> mapping = BSS.Align.matchAtoms(molecule0, molecule1, roi=[12], custom_roi_map={0 : 10, 3 : 7}) """ if roi is None: @@ -872,9 +884,10 @@ def matchAtoms( ) else: return _roiMatch( - molecule0=molecule0, - molecule1=molecule1, - roi=roi, + molecule0, + molecule1, + roi, + custom_roi_map, prune_perturbed_constraints=prune_perturbed_constraints, prune_crossing_constraints=prune_crossing_constraints, prune_atom_types=prune_atom_types, @@ -1215,6 +1228,7 @@ def _roiMatch( molecule0, molecule1, roi, + custom_roi_map, prune_perturbed_constraints=False, prune_crossing_constraints=False, prune_atom_types=False, @@ -1240,6 +1254,11 @@ def _roiMatch( The region of interest to match. Consists of a list of ROI residue indices + custom_roi_map : dict + A dictionary that maps atom indices in molecule0 to those in + molecule1 within the region of interest. This allows the user + override the MCS mapping within the region of interest. + prune_perturbed_constraints : bool Whether to remove hydrogen atoms that are perturbed to heavy atoms from the mapping. This option should be True when creating mappings @@ -1308,6 +1327,12 @@ def _roiMatch( >>> import BioSimSpace as BSS >>> mapping = BSS.Align._align._roiMatch(molecule0, molecule1, roi=[12], use_kartograf=True) + + Find the mapping between two molecules with a custom region of interest mapping. + + >>> import BioSimSpace as BSS + >>> mapping = BSS.Align._align._roiMatch(molecule0, molecule1, roi=[12], custom_roi_map={0 : 10, 3 : 7}) + """ from .._SireWrappers import Molecule as _Molecule @@ -1413,22 +1438,41 @@ def _roiMatch( res0_extracted, res1_extracted, kartograf_kwargs ) mapping = kartograf_mapping.componentA_to_componentB + + # Prevent the MCS mapping from being generated if a custom ROI mapping is provided. + elif custom_roi_map is not None: + # Validate custom_roi_map is a dict and is inside the ROI region + if not isinstance(custom_roi_map, dict): + raise TypeError("'custom_roi_map' must be of type 'dict'") + for k, v in custom_roi_map.items(): + if k not in res0_idx: + raise ValueError( + f"Key {k} in 'custom_roi_map' is not in the ROI region of molecule0." + ) + if v not in res1_idx: + raise ValueError( + f"Value {v} in 'custom_roi_map' is not in the ROI region of molecule1." + ) + mapping = None else: mapping = matchAtoms( res0_extracted, res1_extracted, ) - # Look up the absolute atom indices in the molecule - res0_lookup_table = list(mapping.keys()) - absolute_mapped_atoms_res0 = [res0_idx[i] for i in res0_lookup_table] + # Look up the absolute atom indices in the molecule if not using a custom ROI mapping. + if custom_roi_map is not None: + absolute_roi_mapping = custom_roi_map + else: + res0_lookup_table = list(mapping.keys()) + absolute_mapped_atoms_res0 = [res0_idx[i] for i in res0_lookup_table] - res1_lookup_table = list(mapping.values()) - absolute_mapped_atoms_res1 = [res1_idx[i] for i in res1_lookup_table] + res1_lookup_table = list(mapping.values()) + absolute_mapped_atoms_res1 = [res1_idx[i] for i in res1_lookup_table] - absolute_roi_mapping = dict( - zip(absolute_mapped_atoms_res0, absolute_mapped_atoms_res1) - ) + absolute_roi_mapping = dict( + zip(absolute_mapped_atoms_res0, absolute_mapped_atoms_res1) + ) # If we are at the last residue of interest, we don't need to worry # too much about the after ROI region as this region will be all of the diff --git a/tests/Align/test_align.py b/tests/Align/test_align.py index 23edc071..be01565e 100644 --- a/tests/Align/test_align.py +++ b/tests/Align/test_align.py @@ -671,6 +671,105 @@ def test_roi_flex_align(protein_inputs): assert coord.value() == pytest.approx(p1_roi_coords[i].value(), abs=0.5) +def test_empty_custom_roi_mapping(): + # mut contains a proline mutation at position 15 + wt = BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), f"1choFH_apo_wt_flare_processed.pdb") + )[0] + mut = BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), f"1choFH_apo_mut_flare_processed.pdb") + )[0] + + # use the custom_roi_map to specify that residue 15 in the WT protein should be + # excluded from the ROI mapping, even though it is in the ROI list + roi_res_idx = [a.index() for a in wt.getResidues()[15].getAtoms()] + mapping = BSS.Align.matchAtoms( + molecule0=wt, molecule1=mut, roi=[15], custom_roi_map={} + ) + + # check that the mapping does not contain any atoms of the region of interest of WT protein + for atom_idx in roi_res_idx: + assert atom_idx not in mapping.keys() + +@pytest.mark.skipif(has_amber is False, reason="Requires AMBER to be installed.") +def test_custom_roi_ring_break_merge(): + # wt contains a leucine at position 15 + # mut contains a proline at position 15 + wt = BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), f"1choFH_apo_wt_flare_processed.pdb") + )[0] + mut = BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), f"1choFH_apo_mut_flare_processed.pdb") + )[0] + + wt = BSS.Parameters.ff14SB(wt, ensure_compatible=False).getMolecule() + mut = BSS.Parameters.ff14SB(mut, ensure_compatible=False).getMolecule() + + # use the custom_roi_map to specify that residue 15 in the WT protein should be + # excluded from the ROI mapping, even though it is in the ROI list + mapping = BSS.Align.matchAtoms( + molecule0=wt, + molecule1=mut, + roi=[15], + custom_roi_map={ + 204: 204, + 205: 205, + 203: 203, + 202: 202, + 211: 208, + 206: 206, + 213: 210, + 207: 207, + 214: 211, + 210: 213, + }, + ) + + aligned_wt = BSS.Align.rmsdAlign(molecule0=wt, mapping=mapping, molecule1=mut) + merged_protein = BSS.Align.merge( + aligned_wt, mut, mapping, allow_ring_breaking=True, roi=[15] + ) + + merged_protein_sire = merged_protein._sire_object + pert = merged_protein_sire.perturbation() + pert_omm = pert.to_openmm(map={"coordinates": "coordinates0"}) + + changed_bonds_df = pert_omm.changed_bonds(to_pandas=True) + n_bonds_created = (changed_bonds_df["k0"] == 0).sum() + n_bonds_annihilated = (changed_bonds_df["k1"] == 0).sum() + + # assert that exactly one bond is being created, as mutating + # from leucine to proline should create a new bond in the ring of proline + assert n_bonds_created == 1 + assert n_bonds_annihilated == 0 + +@pytest.mark.skipif(has_amber is False, reason="Requires AMBER to be installed.") +def test_custom_roi_map_invalid_outside_roi(): + wt = BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), f"1choFH_apo_wt_flare_processed.pdb") + )[0] + mut = BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), f"1choFH_apo_mut_flare_processed.pdb") + )[0] + + wt = BSS.Parameters.ff14SB(wt, ensure_compatible=False).getMolecule() + mut = BSS.Parameters.ff14SB(mut, ensure_compatible=False).getMolecule() + + # provide some invalid mapping that is outside of the ROI, which should raise an error + with pytest.raises(ValueError): + mapping = BSS.Align.matchAtoms( + molecule0=wt, + molecule1=mut, + roi=[15], + + custom_roi_map={ + 0: 0, + 1: 1, + 2: 2, + }, + ) + + @pytest.mark.skipif(has_amber is False, reason="Requires AMBER and to be installed.") def test_roi_merge(protein_inputs): proteins, protein_mapping, roi = protein_inputs @@ -1214,9 +1313,9 @@ def test_ring_breaking_cross_bond_cleanup(): mol_info.atom_idx(p.atom3()).value(), } for a, b in changing: - assert not (a in atoms and b in atoms), ( - f"improper{suffix} spans absent bond ({a},{b})" - ) + assert not ( + a in atoms and b in atoms + ), f"improper{suffix} spans absent bond ({a},{b})" # Check that the ring-breaking and ring-making bond properties are set. def _read_pairs(prop_name): @@ -1227,9 +1326,9 @@ def _read_pairs(prop_name): stored_breaking = _read_pairs("ring_breaking_bonds") stored_making = _read_pairs("ring_making_bonds") - assert stored_breaking == ring_breaking, ( - f"ring_breaking_bonds property mismatch: {stored_breaking} != {ring_breaking}" - ) - assert stored_making == ring_making, ( - f"ring_making_bonds property mismatch: {stored_making} != {ring_making}" - ) + assert ( + stored_breaking == ring_breaking + ), f"ring_breaking_bonds property mismatch: {stored_breaking} != {ring_breaking}" + assert ( + stored_making == ring_making + ), f"ring_making_bonds property mismatch: {stored_making} != {ring_making}"