diff --git a/.img/loch.png b/.img/loch.png
index df1fbc2..09622d0 100644
Binary files a/.img/loch.png and b/.img/loch.png differ
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 0043ca4..61e8edf 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -14,7 +14,7 @@ repos:
 
   # Python formatting and linting
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.8.4
+    rev: v0.15.4
     hooks:
       # Run the formatter
       - id: ruff-format
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6a4b6be..50de0be 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,18 @@
 Changelog
 =========
 
+[2026.1.0](https://github.com/openbiosim/loch/compare/2025.2.0...2026.1.0) - Jun 2026
+-------------------------------------------------------------------------------------
+
+* Add support for getting and restoring sampling statistics.
+* Handle XED force field virtual sites [#17](https://github.com/OpenBioSim/loch/pull/17).
+* Add support for long-range Lennard-Jones dispersion correction [#18](https://github.com/OpenBioSim/loch/pull/18).
+* Add support for Beutler soft-core Lennard-Jones form [#18](https://github.com/OpenBioSim/loch/pull/18).
+* Fixed type check for ``water_template`` [#19](https://github.com/OpenBioSim/loch/pull/19).
+* Add support for simulations in the osmotic ensemble [#22](https://github.com/OpenBioSim/loch/pull/22).
+* Fixed non-uniform bulk insertion positions caused by use of normal rather than uniform random numbers [#24](https://github.com/OpenBioSim/loch/pull/24).
+* Add methods to update the system with the current water state and return system without ghost waters [#26](https://github.com/OpenBioSim/loch/pull/26).
+
 [2025.2.0](https://github.com/openbiosim/loch/compare/2025.1.0...2025.2.0) - Feb 2026
 -------------------------------------------------------------------------------------
 
diff --git a/README.md b/README.md
index 800f504..e6691e0 100644
--- a/README.md
+++ b/README.md
@@ -51,11 +51,12 @@ pip install -e .
 > [!Note]
 > Pixi does not run conda post-link scripts, so the `ocl-icd-system`
 > symlink needed for OpenCL won't be created automatically. After
-> creating the environment, run the following once to fix this:
+> creating the environment (or after a pixi update), run the following
+> to fix this:
 >
 > ```bash
 > pixi shell
-> ln -s /etc/OpenCL/vendors "${CONDA_PREFIX}/etc/OpenCL/vendors/ocl-icd-system"
+> ln -sfn /etc/OpenCL/vendors "${CONDA_PREFIX}/etc/OpenCL/vendors/ocl-icd-system"
 > ```
 
 ### Installing from source (full OpenBioSim development)
diff --git a/pixi.toml b/pixi.toml
index 446a378..16c6cff 100644
--- a/pixi.toml
+++ b/pixi.toml
@@ -5,7 +5,10 @@ platforms = ["linux-64", "osx-arm64", "win-64"]
 
 [dependencies]
 python = ">=3.10"
-biosimspace = "*"
+# main
+biosimspace = ">=2026.1.0,<2026.2.0"
+# devel
+#biosimspace = "==2026.2.0.dev"
 loguru = "*"
 pyopencl = "*"
 
diff --git a/recipes/loch/recipe.yaml b/recipes/loch/recipe.yaml
index d840361..1e854de 100644
--- a/recipes/loch/recipe.yaml
+++ b/recipes/loch/recipe.yaml
@@ -19,7 +19,10 @@ requirements:
     - setuptools
     - versioningit
   run:
-    - biosimspace
+    # main
+    - biosimspace >=2026.1.0,<2026.2.0
+    # devel
+    #- biosimspace ==2026.2.0.dev
     - loguru
     - pyopencl
     - python
diff --git a/src/loch/__init__.py b/src/loch/__init__.py
index 45a2c0c..2c8d0e7 100644
--- a/src/loch/__init__.py
+++ b/src/loch/__init__.py
@@ -20,5 +20,6 @@
 #####################################################################
 
 from ._sampler import *
+from ._softcore import *
 from ._utils import *
 from ._version import __version__
diff --git a/src/loch/_kernels.py b/src/loch/_kernels.py
index ba20ebb..8959791 100644
--- a/src/loch/_kernels.py
+++ b/src/loch/_kernels.py
@@ -272,7 +272,8 @@
             GLOBAL float* water_position,
             int is_target,
             GLOBAL float* randoms_rotation,
-            GLOBAL float* randoms_position,
+            GLOBAL float* randoms_position_sphere,
+            GLOBAL float* randoms_position_bulk,
             GLOBAL float* randoms_radius,
             GLOBAL const float* cell_matrix)
         {
@@ -319,9 +320,9 @@
                 if (is_target == 1)
                 {
                     // Generate a random position around the target using pre-generated normals.
-                    xyz[0] = randoms_position[tidx * 3];
-                    xyz[1] = randoms_position[tidx * 3 + 1];
-                    xyz[2] = randoms_position[tidx * 3 + 2];
+                    xyz[0] = randoms_position_sphere[tidx * 3];
+                    xyz[1] = randoms_position_sphere[tidx * 3 + 1];
+                    xyz[2] = randoms_position_sphere[tidx * 3 + 2];
 
                     float norm = sqrtf(xyz[0] * xyz[0] + xyz[1] * xyz[1] + xyz[2] * xyz[2]);
                     xyz[0] /= norm;
@@ -337,9 +338,9 @@
                 {
                     // Use pre-generated uniform randoms for bulk sampling.
                     float r[3];
-                    r[0] = randoms_position[tidx * 3];
-                    r[1] = randoms_position[tidx * 3 + 1];
-                    r[2] = randoms_position[tidx * 3 + 2];
+                    r[0] = randoms_position_bulk[tidx * 3];
+                    r[1] = randoms_position_bulk[tidx * 3 + 1];
+                    r[2] = randoms_position_bulk[tidx * 3 + 2];
 
                     for (int i = 0; i < 3; i++)
                     {
@@ -394,9 +395,11 @@
             float rf_cutoff,
             float rf_kappa,
             float rf_correction,
-            float sc_coulomb_power,
+            int softcore_form,
             float sc_shift_coulomb,
-            float sc_shift_delta)
+            float sc_shift_delta,
+            int sc_taylor_power,
+            float sc_beutler_alpha)
         {
             // Work out the atom index.
             const int idx_atom = GET_GLOBAL_ID(0);
@@ -538,7 +541,7 @@
                                 energy_coul[idx] += (q0 * q1) * (r_inv + (rf_kappa * r2) - rf_correction);
                             }
 
-                            // Zacharias soft-core potential.
+                            // Soft-core potential for ghost atoms.
                             else
                             {
                                 // Store required parameters.
@@ -549,35 +552,49 @@
                                 const float e = sqrtf(e0 * e1);
                                 const float a = alpha[idx_atom];
 
-                                // Compute the distance between the atoms.
-                                float r = sqrtf(r2);
+                                // Clamp r2 to avoid singularities.
+                                const float r2_sc = (r2 < 1e-6f) ? 1e-6f : r2;
 
-                                // Truncate the distance.
-                                if (r < 0.001f)
+                                // Precompute r^6 and sigma^6 using r2 directly (avoids sqrtf and powf).
+                                const float r6 = r2_sc * r2_sc * r2_sc;
+                                const float s2 = s * s;
+                                const float s6_val = s2 * s2 * s2;
+
+                                // Compute the LJ interaction using the chosen soft-core form.
+                                float sig6;
+                                float lj_prefactor = 1.0f;
+                                if (softcore_form == 1)
                                 {
-                                    r = 0.001f;
+                                    // Taylor soft-core LJ:
+                                    //   sig6 = sigma^6 / (alpha^m * sigma^6 + r^6)
+                                    const float alpha_m = (sc_taylor_power == 1) ? a
+                                        : (sc_taylor_power == 0) ? 1.0f
+                                        : powf(a, (float)sc_taylor_power);
+                                    sig6 = s6_val / (alpha_m * s6_val + r6);
                                 }
-
-                                // Compute the Lennard-Jones interaction.
-                                const float delta_lj = sc_shift_delta * a;
-                                const float s6 = powf(s, 6.0f) / powf((s * delta_lj) + (r * r), 3.0f);
-                                energy_lj[idx] += 4.0f * e * s6 * (s6 - 1.0f);
-
-                                // Compute the Coulomb power expression.
-                                float cpe;
-                                if (sc_coulomb_power == 0.0f)
+                                else if (softcore_form == 2)
                                 {
-                                    cpe = 1.0f;
+                                    // Beutler soft-core LJ:
+                                    //   sig6 = sigma^6 / (sc_beutler_alpha * sigma^6 * alpha + r^6)
+                                    //   V_LJ = (1 - alpha) * 4 * epsilon * sig6 * (sig6 - 1)
+                                    sig6 = s6_val / (sc_beutler_alpha * s6_val * a + r6);
+                                    lj_prefactor = 1.0f - a;
                                 }
                                 else
                                 {
-                                    cpe = powf((1.0f - a), sc_coulomb_power);
+                                    // Zacharias soft-core LJ:
+                                    //   sig6 = sigma^6 / (sigma*delta + r^2)^3
+                                    //   delta = shift_delta * alpha
+                                    const float delta_lj = sc_shift_delta * a;
+                                    const float denom = (s * delta_lj) + r2_sc;
+                                    sig6 = s6_val / (denom * denom * denom);
                                 }
+                                energy_lj[idx] += lj_prefactor * 4.0f * e * sig6 * (sig6 - 1.0f);
 
                                 // Compute the Coulomb interaction.
                                 energy_coul[idx] += (q0 * q1) *
-                                    ((cpe / sqrtf((sc_shift_coulomb * sc_shift_coulomb * a)
-                                    + (r * r))) + (rf_kappa * r2) - rf_correction);
+                                    ((1.0f / sqrtf((sc_shift_coulomb * sc_shift_coulomb * a)
+                                    + r2_sc)) + (rf_kappa * r2) - rf_correction);
 
                             }
                         }
diff --git a/src/loch/_platforms/_base.py b/src/loch/_platforms/_base.py
index 1584a4f..27b5380 100644
--- a/src/loch/_platforms/_base.py
+++ b/src/loch/_platforms/_base.py
@@ -55,6 +55,7 @@ def __init__(
 
         Parameters
         ----------
+
         device : int
             The GPU device index to use.
 
@@ -89,6 +90,7 @@ def compile_kernels(self) -> _Dict[str, _Callable]:
 
         Returns
         -------
+
         dict
             Dictionary mapping kernel names to callable kernel functions.
             Expected keys: 'update_water', 'deletion', 'water', 'energy',
@@ -103,11 +105,13 @@ def to_gpu(self, array: _np.ndarray) -> _Any:
 
         Parameters
         ----------
+
         array : numpy.ndarray
             Array to transfer to GPU.
 
         Returns
         -------
+
         GPU buffer
             Platform-specific GPU buffer object.
         """
@@ -120,6 +124,7 @@ def empty(self, shape, dtype) -> _Any:
 
         Parameters
         ----------
+
         shape : tuple
             Shape of the array to allocate.
 
@@ -128,6 +133,7 @@ def empty(self, shape, dtype) -> _Any:
 
         Returns
         -------
+
         GPU buffer
             Platform-specific GPU buffer object.
         """
@@ -140,11 +146,13 @@ def from_gpu(self, buffer: _Any) -> _np.ndarray:
 
         Parameters
         ----------
+
         buffer : GPU buffer
             Platform-specific GPU buffer to transfer from.
 
         Returns
         -------
+
         numpy.ndarray
             Array containing the data from GPU.
         """
@@ -185,6 +193,7 @@ def platform_name(self) -> str:
 
         Returns
         -------
+
         str
             Platform name ('cuda' or 'opencl').
         """
@@ -197,6 +206,7 @@ def cache_hit(self) -> bool:
 
         Returns
         -------
+
         bool
             True if kernels were loaded from cache, False if freshly compiled.
         """
@@ -209,6 +219,7 @@ def compiler_log(self) -> str:
 
         Returns
         -------
+
         str
             Compiler log output, or empty string if no warnings/messages.
         """
diff --git a/src/loch/_platforms/_cuda.py b/src/loch/_platforms/_cuda.py
index 4047115..ee220f2 100644
--- a/src/loch/_platforms/_cuda.py
+++ b/src/loch/_platforms/_cuda.py
@@ -68,6 +68,7 @@ def __init__(
 
         Parameters
         ----------
+
         device : int
             The CUDA device index to use.
 
@@ -113,6 +114,7 @@ def __init__(
         # Use the primary context (shared with OpenMM and other CUDA users).
         self._pycuda_context = self._cuda_device.retain_primary_context()
         self._pycuda_context.push()
+        self._push_count = 1
 
         self._device = self._pycuda_context.get_device()
 
@@ -134,6 +136,7 @@ def compile_kernels(self) -> _Dict[str, _Callable]:
 
         Returns
         -------
+
         dict
             Dictionary mapping kernel names to callable kernel functions.
         """
@@ -198,11 +201,13 @@ def to_gpu(self, array: _np.ndarray) -> _Any:
 
         Parameters
         ----------
+
         array : numpy.ndarray
             Array to transfer to GPU.
 
         Returns
         -------
+
         pycuda.gpuarray.GPUArray
             GPU array containing the data.
         """
@@ -214,6 +219,7 @@ def empty(self, shape, dtype) -> _Any:
 
         Parameters
         ----------
+
         shape : tuple
             Shape of the array to allocate.
 
@@ -222,6 +228,7 @@ def empty(self, shape, dtype) -> _Any:
 
         Returns
         -------
+
         pycuda.gpuarray.GPUArray
             Allocated GPU array.
         """
@@ -233,11 +240,13 @@ def from_gpu(self, buffer: _Any) -> _np.ndarray:
 
         Parameters
         ----------
+
         buffer : pycuda.gpuarray.GPUArray
             GPU array to transfer from.
 
         Returns
         -------
+
         numpy.ndarray
             Array containing the data from GPU.
         """
@@ -248,22 +257,26 @@ def push_context(self):
         Push the primary context onto the calling thread's context stack.
         """
         self._pycuda_context.push()
+        self._push_count += 1
 
     def pop_context(self):
         """
         Pop the primary context from the calling thread's context stack.
         """
         self._pycuda_context.pop()
+        self._push_count -= 1
 
     def cleanup(self):
         """
-        Clean up CUDA resources and pop the context pushed during __init__.
+        Clean up CUDA resources and pop all outstanding context pushes.
         """
         if self._pycuda_context is not None:
-            try:
-                self._pycuda_context.pop()
-            except Exception:
-                pass
+            for _ in range(self._push_count):
+                try:
+                    self._pycuda_context.pop()
+                except Exception:
+                    pass
+            self._push_count = 0
             self._pycuda_context = None
 
     @property
@@ -273,6 +286,7 @@ def platform_name(self) -> str:
 
         Returns
         -------
+
         str
             Platform name ('cuda').
         """
diff --git a/src/loch/_platforms/_opencl.py b/src/loch/_platforms/_opencl.py
index 6395c43..29f4e31 100644
--- a/src/loch/_platforms/_opencl.py
+++ b/src/loch/_platforms/_opencl.py
@@ -64,6 +64,7 @@ def __init__(
 
         Parameters
         ----------
+
         device : int
             The OpenCL device index to use.
 
@@ -131,6 +132,7 @@ def compile_kernels(self) -> _Dict[str, _Callable]:
 
         Returns
         -------
+
         dict
             Dictionary mapping kernel names to callable kernel functions.
         """
@@ -240,11 +242,13 @@ def to_gpu(self, array: _np.ndarray) -> _Any:
 
         Parameters
         ----------
+
         array : numpy.ndarray
             Array to transfer to GPU.
 
         Returns
         -------
+
         pyopencl.array.Array
             GPU array containing the data.
         """
@@ -256,6 +260,7 @@ def empty(self, shape, dtype) -> _Any:
 
         Parameters
         ----------
+
         shape : tuple
             Shape of the array to allocate.
 
@@ -264,6 +269,7 @@ def empty(self, shape, dtype) -> _Any:
 
         Returns
         -------
+
         pyopencl.array.Array
             Allocated GPU array.
         """
@@ -275,11 +281,13 @@ def from_gpu(self, buffer: _Any) -> _np.ndarray:
 
         Parameters
         ----------
+
         buffer : pyopencl.array.Array
             GPU array to transfer from.
 
         Returns
         -------
+
         numpy.ndarray
             Array containing the data from GPU.
         """
@@ -299,6 +307,7 @@ def platform_name(self) -> str:
 
         Returns
         -------
+
         str
             Platform name ('opencl').
         """
diff --git a/src/loch/_platforms/_rng.py b/src/loch/_platforms/_rng.py
index 42699f4..26ae59c 100644
--- a/src/loch/_platforms/_rng.py
+++ b/src/loch/_platforms/_rng.py
@@ -38,18 +38,26 @@ class BatchRandoms:
 
     Attributes
     ----------
+
     rotation : numpy.ndarray
         Shape (batch_size, 3) array of uniform [0,1) randoms for water rotation.
-    position : numpy.ndarray
-        Shape (batch_size, 3) array of normal randoms for position direction.
+
+    position_sphere : numpy.ndarray
+        Shape (batch_size, 3) array of normal randoms for sphere position direction.
+
+    position_bulk : numpy.ndarray
+        Shape (batch_size, 3) array of uniform [0,1) randoms for bulk box sampling.
+
     radius : numpy.ndarray
         Shape (batch_size,) array of uniform [0,1) randoms for radial distance.
+
     acceptance : numpy.ndarray
         Shape (batch_size,) array of uniform [0,1) randoms for Metropolis acceptance.
     """
 
     rotation: _np.ndarray
-    position: _np.ndarray
+    position_sphere: _np.ndarray
+    position_bulk: _np.ndarray
     radius: _np.ndarray
     acceptance: _np.ndarray
 
@@ -75,6 +83,7 @@ def __init__(self, batch_size: int, seed: _Optional[int] = None) -> None:
 
         Parameters
         ----------
+
         batch_size : int
             Number of parallel trials in a batch.
 
@@ -96,7 +105,10 @@ def _generate_batch(self) -> BatchRandoms:
             rotation=self._rng.uniform(0, 1, size=(self._batch_size, 3)).astype(
                 _np.float32
             ),
-            position=self._rng.normal(0, 1, size=(self._batch_size, 3)).astype(
+            position_sphere=self._rng.normal(0, 1, size=(self._batch_size, 3)).astype(
+                _np.float32
+            ),
+            position_bulk=self._rng.uniform(0, 1, size=(self._batch_size, 3)).astype(
                 _np.float32
             ),
             radius=self._rng.uniform(0, 1, size=self._batch_size).astype(_np.float32),
@@ -117,6 +129,7 @@ def get_batch_randoms(self) -> BatchRandoms:
 
         Returns
         -------
+
         BatchRandoms
             Container with rotation, position, radius, and acceptance arrays.
         """
diff --git a/src/loch/_sampler.py b/src/loch/_sampler.py
index 0ee4f11..0c17461 100644
--- a/src/loch/_sampler.py
+++ b/src/loch/_sampler.py
@@ -37,6 +37,17 @@
 
 from ._platforms import create_backend as _create_backend
 from ._platforms._rng import RNGManager as _RNGManager
+from ._softcore import SoftcoreForm as _SoftcoreForm
+
+import sys as _sys
+
+try:
+    "μ".encode(_sys.stdout.encoding or "utf-8")
+    _mu_sym = "μ"
+except (UnicodeEncodeError, LookupError):
+    _mu_sym = "mu"
+
+del _sys
 
 
 def _as_float32(arr: _np.ndarray) -> _np.ndarray:
@@ -64,6 +75,7 @@ def __init__(
         excess_chemical_potential: str = "-6.09 kcal/mol",
         standard_volume: str = "30.543 A^3",
         temperature: str = "298 K",
+        pressure: _Optional[str] = None,
         adams_shift: _Union[int, float] = 0.0,
         num_ghost_waters: int = 20,
         batch_size: int = 1000,
@@ -78,9 +90,11 @@ def __init__(
         lambda_value: float = 0.0,
         rest2_scale: float = 1.0,
         rest2_selection: _Optional[str] = None,
-        coulomb_power: float = 0.0,
         shift_coulomb: str = "1 A",
         shift_delta: str = "1.5 A",
+        softcore_form: str = "zacharias",
+        taylor_power: int = 1,
+        beutler_alpha: float = 0.5,
         swap_end_states: bool = False,
         restart: bool = False,
         overwrite: bool = False,
@@ -123,6 +137,12 @@ def __init__(
         temperature: str
             The temperature of the system.
 
+        pressure: str
+            The pressure for NPT simulation, e.g. "1 atm". If None (default),
+            the simulation runs in the μVT ensemble with a fixed box. When set,
+            the box is updated from the context at each move and the Adams value
+            is recomputed accordingly, sampling the μpT (osmotic) ensemble.
+
         adams_shift: float
             The Adams shift.
 
@@ -207,6 +227,22 @@ def __init__(
             The soft-core shift-delta parameter. This is used to soften the
             Lennard-Jones interaction.
 
+        softcore_form : str
+            The soft-core potential form to use for alchemical interactions.
+            Valid options are 'zacharias' (default), 'taylor', and 'beutler'.
+            The Beutler form is recommended for ABFE calculations.
+
+        taylor_power : int
+            The power to use for the alpha term in the Taylor soft-core LJ
+            expression, i.e. sig6 = sigma^6 / (alpha^m * sigma^6 + r^6).
+            Must be between 0 and 4. The default is 1. Only used when
+            softcore_form is 'taylor'.
+
+        beutler_alpha : float
+            The dimensionless scale factor for the r^6 shift in the Beutler
+            soft-core form. Must be >= 0. The default is 0.5. Only used when
+            softcore_form is 'beutler'.
+
         swap_end_states: bool
             Whether to swap the end states of the alchemical systems.
 
@@ -273,6 +309,11 @@ def __init__(
         else:
             self._is_pme = False
 
+        # LRC state: initialised lazily on first move() call.
+        self._has_gcmc_lrc = False
+        self._lrc_w_solute = 0.0
+        self._lrc_ww_half = 0.0
+
         try:
             self._radius = self._validate_sire_unit("radius", radius, _sr.u("A"))
         except Exception as e:
@@ -306,6 +347,16 @@ def __init__(
         except Exception as e:
             raise ValueError(f"Could not validate the 'temperature': {e}")
 
+        if pressure is not None:
+            try:
+                self._pressure = self._validate_sire_unit(
+                    "pressure", pressure, _sr.u("atm")
+                )
+            except Exception as e:
+                raise ValueError(f"Could not validate the 'pressure': {e}")
+        else:
+            self._pressure = None
+
         if not isinstance(num_ghost_waters, int):
             raise ValueError("'num_ghost_waters' must be of type 'int'")
         self._num_ghost_waters = num_ghost_waters
@@ -475,12 +526,6 @@ def __init__(
                 )
         self._rest2_selection = rest2_selection
 
-        try:
-            coulomb_power = float(coulomb_power)
-        except Exception:
-            raise ValueError("'coulomb_power' must be of type 'float'")
-        self._coulomb_power = float(coulomb_power)
-
         try:
             self._shift_coulomb = self._validate_sire_unit(
                 "shift_coulomb", shift_coulomb, _sr.u("A")
@@ -495,10 +540,41 @@ def __init__(
         except Exception as e:
             raise ValueError(f"Could not validate the 'shift_delta': {e}")
 
+        if not isinstance(softcore_form, str):
+            raise TypeError("'softcore_form' must be of type 'str'")
+        softcore_form = softcore_form.lower().replace(" ", "")
+        _valid_softcore_forms = {m.name.lower(): m for m in _SoftcoreForm}
+        if softcore_form not in _valid_softcore_forms:
+            raise ValueError(
+                f"'softcore_form' not recognised. Valid forms are: "
+                f"{', '.join(_valid_softcore_forms)}"
+            )
+        self._softcore_form = _valid_softcore_forms[softcore_form]
+
+        if not isinstance(taylor_power, int):
+            try:
+                taylor_power = int(taylor_power)
+            except Exception:
+                raise ValueError("'taylor_power' must be of type 'int'")
+        if not 0 <= taylor_power <= 4:
+            raise ValueError("'taylor_power' must be between 0 and 4")
+        self._taylor_power = taylor_power
+
+        try:
+            beutler_alpha = float(beutler_alpha)
+        except Exception:
+            raise ValueError("'beutler_alpha' must be of type 'float'")
+        if beutler_alpha < 0.0:
+            raise ValueError("'beutler_alpha' must be >= 0")
+        self._beutler_alpha = beutler_alpha
+
         if not isinstance(swap_end_states, bool):
             raise ValueError("'swap_end_states' must be of type 'bool'")
         self._swap_end_states = swap_end_states
 
+        if swap_end_states and self._lambda_schedule is not None:
+            self._lambda_schedule = self._lambda_schedule.reverse()
+
         # Check for waters and validate the template.
         try:
             self._water_template = system["water and not property is_perturbable"][0]
@@ -532,6 +608,39 @@ def __init__(
         except Exception as e:
             raise ValueError(f"Could not prepare the system for GCMC sampling: {e}")
 
+        # Compute per-molecule virtual site information. Virtual sites are
+        # appended after the real atoms of each molecule in the OpenMM system,
+        # so all subsequent molecules have their OpenMM particle indices shifted
+        # by the cumulative number of virtual sites in preceding molecules.
+        (
+            self._total_vsites,
+            self._vsite_atom_offsets,
+            self._mol_vsite_charges,
+        ) = self._get_vsite_offsets(self._system)
+
+        # Keep a copy of the Sire atom indices before applying the OpenMM
+        # offset. These are needed by any operation that works on the Sire
+        # topology (e.g. _flag_ghost_waters, ghost_residues), which has no
+        # knowledge of virtual site particles.
+        self._water_indices_sire = self._water_indices.copy()
+
+        if self._total_vsites > 0:
+            # Offset water oxygen indices from Sire atom indices to OpenMM
+            # particle indices.
+            self._water_indices = (
+                self._water_indices + self._vsite_atom_offsets[self._water_indices]
+            )
+
+            # Apply the same correction to the reference atom indices.
+            if self._reference is not None:
+                self._reference_indices = (
+                    self._reference_indices
+                    + self._vsite_atom_offsets[self._reference_indices]
+                )
+
+        # Update the total atom count to include virtual site particles.
+        self._num_atoms = self._system.num_atoms() + self._total_vsites
+
         # Validate the platform parameter.
         valid_platforms = {"auto", "cuda", "opencl"}
 
@@ -603,27 +712,8 @@ def __init__(
             * _openmm.unit.kelvin
         )
 
-        # Work out the volume of the system and GCMC sphere.
-        volume = self._space.volume().value()
-        gcmc_volume = (4.0 * _np.pi * self._radius.value() ** 3) / 3.0
-
-        # Work out the Adams value.
-        B = (
-            self._beta * self._excess_chemical_potential.value()
-            + _np.log(gcmc_volume / self._standard_volume.value())
-        ) + self._adams_shift
-
-        # Work out the bulk Adams value.
-        B_bulk = (
-            self._beta * self._excess_chemical_potential.value()
-            + _np.log(volume / self._standard_volume.value())
-        ) + self._adams_shift
-
-        # Store the exponentials for the Adams values.
-        self._exp_B = _np.exp(B)
-        self._exp_minus_B = _np.exp(-B)
-        self._exp_B_bulk = _np.exp(B_bulk)
-        self._exp_minus_B_bulk = _np.exp(-B_bulk)
+        # Compute the Adams values from the current box and sphere volume.
+        self._compute_adams_values(self._system)
 
         # Coulomb energy prefactor.
         self._prefactor = 1.0 / (4.0 * _np.pi * _sr.units.epsilon0.value())
@@ -658,9 +748,6 @@ def __init__(
                     self._log_file, level=self._log_level.upper(), filter="loch"
                 )
 
-        # Log the Adams value.
-        _logger.debug(f"Adams value: {B:.6f}")
-
         import atexit
 
         # Register the cleanup function.
@@ -690,6 +777,7 @@ def __str__(self) -> str:
             f"excess_chemical_potential={self._excess_chemical_potential}, "
             f"standard_volume={self._standard_volume}, "
             f"temperature={self._temperature}, "
+            f"pressure={self._pressure}, "
             f"num_ghost_waters={self._num_ghost_waters}, "
             f"adams_shift={self._adams_shift}, "
             f"batch_size={self._batch_size}, "
@@ -708,7 +796,6 @@ def __str__(self) -> str:
             f"restart={self._restart}, "
             f"rest2_scale={self._rest2_scale}, "
             f"rest2_selection={self._rest2_selection}, "
-            f"coulomb_power={self._coulomb_power}, "
             f"shift_coulomb={self._shift_coulomb}, "
             f"shift_delta={self._shift_delta}, "
             f"overwrite={self._overwrite}, "
@@ -776,6 +863,136 @@ def system(self) -> _Any:
         """
         return self._system.clone()
 
+    def _system_from_context(self, context: _Optional[_openmm.Context]) -> _Any:
+        """
+        Clone the internal system and, if a context is provided, update
+        atomic coordinates and the periodic box from it.
+        """
+        system = self._system.clone()
+
+        if context is not None:
+            if not isinstance(context, _openmm.Context):
+                raise ValueError("'context' must be of type 'openmm.Context'")
+
+            from sire.legacy.IO import setCoordinates
+
+            state = context.getState(getPositions=True)
+            positions = (
+                state.getPositions(asNumpy=True) / _openmm.unit.angstrom
+            ).tolist()
+
+            try:
+                system._system = setCoordinates(self._system._system, positions)
+            except Exception as e:
+                raise ValueError(
+                    f"Could not update the system with the current positions: {e}"
+                )
+
+            # Update the periodic box from the context.
+            box = state.getPeriodicBoxVectors()
+            v0 = [10 * box[0].x, 10 * box[0].y, 10 * box[0].z]
+            v1 = [10 * box[1].x, 10 * box[1].y, 10 * box[1].z]
+            v2 = [10 * box[2].x, 10 * box[2].y, 10 * box[2].z]
+            space = _sr.vol.TriclinicBox(
+                _sr.maths.Vector(*v0),
+                _sr.maths.Vector(*v1),
+                _sr.maths.Vector(*v2),
+            )
+            system.set_property("space", space)
+
+        return system
+
+    def update_system(self, context: _Optional[_openmm.Context] = None) -> _Any:
+        """
+        Update the system with the current water state and (optionally) positions.
+
+        Parameters
+        ----------
+
+        context: openmm.Context
+            The OpenMM context containing the current positions.
+
+        Returns
+        -------
+
+        system: sire.system.System
+            The updated GCMC system.
+        """
+
+        # Clone the system and update coordinates and box from the context.
+        system = self._system_from_context(context)
+
+        # Flag the ghost waters in the system according to the current water state.
+        system = self._flag_ghost_waters(system)
+
+        # Turn OFF interactions for ghost waters in the system. This is done
+        # by setting the charges and Lennard-Jones parameters of ghost waters
+        # to zero.
+        for mol in system["property is_ghost_water"].molecules():
+            cursor = mol.cursor()
+            for atom in cursor.atoms():
+                LJ = atom["LJ"]
+                atom["charge"] = 0.0 * _sr.units.mod_electron
+                atom["LJ"] = _sr.legacy.MM.LJParameter(
+                    LJ.sigma(), 0.0 * _sr.units.kcal_per_mol
+                )
+            mol = cursor.commit()
+            system.update(mol)
+
+        # Turn ON any ghost buffer waters that were inserted during sampling.
+        # These are waters in the last _num_ghost_waters slots that now have state=1.
+        first_ghost_buffer = self._num_waters - self._num_ghost_waters
+        for i in range(first_ghost_buffer, self._num_waters):
+            if self._water_state[i] == 1:
+                mol = system[
+                    system.atoms()[int(self._water_indices_sire[i])].molecule()
+                ]
+                cursor = mol.cursor()
+                for j, atom in enumerate(cursor.atoms()):
+                    atom["charge"] = self._water_charge[j] * _sr.units.mod_electron
+                    atom["LJ"] = _sr.legacy.MM.LJParameter(
+                        self._water_sigma[j] * _sr.units.angstrom,
+                        self._water_epsilon[j] * _sr.units.kcal_per_mol,
+                    )
+                mol = cursor.commit()
+                system.update(mol)
+
+        return system
+
+    def finalise_system(self, context: _Optional[_openmm.Context] = None) -> _Any:
+        """
+        Return the system with ghost waters removed and (optionally) positions
+        updated from an OpenMM context.
+
+        Unlike :meth:`update_system`, which retains ghost waters with zeroed
+        parameters, this method deletes them entirely so the returned system is
+        suitable for use as input to non-GCMC simulations or analyses.
+
+        Parameters
+        ----------
+
+        context: openmm.Context
+            The OpenMM context containing the current positions.
+
+        Returns
+        -------
+
+        system: sire.system.System
+            The system with ghost waters removed.
+        """
+
+        # Clone the system and update coordinates and box from the context.
+        system = self._system_from_context(context)
+
+        # Collect all ghost water molecules before removing any, since each
+        # removal shifts atom indices.
+        ghost_oxygens = self._water_indices_sire[self._get_ghost_waters()]
+        ghost_mols = [system[system.atoms()[int(i)].molecule()] for i in ghost_oxygens]
+        for mol in ghost_mols:
+            system.remove(mol)
+
+        return system
+
     def compiler_log(self) -> str:
         """
         Return the GPU kernel compiler log.
@@ -791,6 +1008,55 @@ def compiler_log(self) -> str:
         """
         return self._backend.compiler_log
 
+    def _compute_adams_values(self, system: _Any) -> None:
+        """
+        Compute the Adams values from the current box and sphere volume.
+
+        Updates self._exp_B, self._exp_minus_B, self._exp_B_bulk, and
+        self._exp_minus_B_bulk.
+
+        Parameters
+        ----------
+
+        system: sire.system.System, openmm.Context, openmm.State
+            The molecular system, OpenMM context, or OpenMM state.
+        """
+        if isinstance(system, _sr.system.System):
+            volume = system.property("space").volume().value()
+        elif isinstance(system, _openmm.Context):
+            box = system.getState().getPeriodicBoxVectors(asNumpy=True)
+            volume = _np.linalg.det(box / _openmm.unit.angstrom)
+        elif isinstance(system, _openmm.State):
+            box = system.getPeriodicBoxVectors(asNumpy=True)
+            volume = _np.linalg.det(box / _openmm.unit.angstrom)
+        else:
+            raise ValueError(
+                "'system' must be of type 'sire.system.System', 'openmm.Context', "
+                "or 'openmm.State'"
+            )
+
+        gcmc_volume = (4.0 * _np.pi * self._radius.value() ** 3) / 3.0
+
+        B = (
+            self._beta * self._excess_chemical_potential.value()
+            + _np.log(gcmc_volume / self._standard_volume.value())
+        ) + self._adams_shift
+
+        B_bulk = (
+            self._beta * self._excess_chemical_potential.value()
+            + _np.log(volume / self._standard_volume.value())
+        ) + self._adams_shift
+
+        self._exp_B = _np.exp(B)
+        self._exp_minus_B = _np.exp(-B)
+        self._exp_B_bulk = _np.exp(B_bulk)
+        self._exp_minus_B_bulk = _np.exp(-B_bulk)
+
+        _logger.debug(f"Adams value: {B:.6f}")
+
+        # Store the box volume in nm^3 for reuse in LRC corrections.
+        self._v_nm3 = volume / 1000.0
+
     def set_box(self, system: _Any) -> None:
         """
         Set the box information.
@@ -798,8 +1064,8 @@ def set_box(self, system: _Any) -> None:
         Parameters
         ----------
 
-        system: sire.system.System, openmm.Context
-            The molecular system, or OpenMM context.
+        system: sire.system.System, openmm.Context, openmm.State
+            The molecular system, OpenMM context, or OpenMM state.
         """
 
         # Get the space property from the system.
@@ -809,8 +1075,12 @@ def set_box(self, system: _Any) -> None:
             except Exception:
                 raise ValueError("'system' must contain a 'space' property")
         # Create a Sire TriclinicBox from the OpenMM box vectors.
-        elif isinstance(system, _openmm.Context):
-            box = system.getState().getPeriodicBoxVectors()
+        elif isinstance(system, (_openmm.Context, _openmm.State)):
+            box = (
+                system.getState().getPeriodicBoxVectors()
+                if isinstance(system, _openmm.Context)
+                else system.getPeriodicBoxVectors()
+            )
             v0 = [10 * box[0].x, 10 * box[0].y, 10 * box[0].z]
             v1 = [10 * box[1].x, 10 * box[1].y, 10 * box[1].z]
             v2 = [10 * box[2].x, 10 * box[2].y, 10 * box[2].z]
@@ -819,7 +1089,8 @@ def set_box(self, system: _Any) -> None:
             )
         else:
             raise ValueError(
-                "'system' must be of type 'sire.system.System' or 'openmm.Context'"
+                "'system' must be of type 'sire.system.System', 'openmm.Context', "
+                "or 'openmm.State'"
             )
 
         # Get the box information.
@@ -912,7 +1183,7 @@ def delete_waters(self, context: _openmm.Context) -> None:
         # Set the number of waters in the GCMC sphere to zero.
         self._N = 0
 
-    def num_waters(self) -> int:
+    def num_waters(self, context=None) -> int:
         """
         Return the number of waters in the GCMC region.
 
@@ -921,18 +1192,29 @@ def num_waters(self) -> int:
 
         num_waters: int
             The number of waters.
+
+        context: openmm.Context, optional
+            The OpenMM context to use for counting the waters. If None, then the
+            internal context will be used if available.
         """
 
-        # The last move was a bulk sampling move, so we need to recalculate
-        # the number of waters in the GCMC sphere.
-        if self._reference is not None and self._is_bulk:
-            if not self._openmm_context:
-                msg = "OpenMM context is not set!"
-                _logger.error(msg)
-                raise RuntimeError(msg)
+        # Whether we need to recalculate the number of waters in the GCMC sphere.
+        recalculate = context is not None or (
+            self._reference is not None and self._is_bulk
+        )
+
+        # We need to recalculate the number of waters.
+        if recalculate:
+            if context is None:
+                if not self._openmm_context:
+                    msg = "OpenMM context is not set!"
+                    _logger.error(msg)
+                    raise RuntimeError(msg)
+                else:
+                    context = self._openmm_context
 
             # Get the OpenMM state.
-            state = self._openmm_context.getState(getPositions=True)
+            state = context.getState(getPositions=True)
 
             # Get the current positions in Angstrom.
             positions = state.getPositions(asNumpy=True) / _openmm.unit.angstrom
@@ -1007,7 +1289,8 @@ def water_state(self) -> _np.ndarray:
 
     def move_acceptance_probability(self) -> float:
         """
-        Return the acceptance probability.
+        Return the acceptance probability. Note that this can be greater than
+        1, since multiple insertions/deletions can be accepter per move.
 
         Returns
         -------
@@ -1074,10 +1357,49 @@ def reset(self) -> None:
         # Clear the OpenMM context.
         self._openmm_context = None
 
+    def restore_stats(self, stats: dict) -> None:
+        """
+        Restore sampler statistics from a dictionary.
+
+        Parameters
+        ----------
+
+        stats : dict
+            Dictionary of sampler statistics as returned by ``get_stats()``.
+        """
+        self._num_moves = stats["num_moves"]
+        self._num_accepted = stats["num_accepted"]
+        self._num_insertions = stats["num_insertions"]
+        self._num_deletions = stats["num_deletions"]
+        self._num_accepted_attempts = stats["num_accepted_attempts"]
+        self._num_accepted_insertions = stats["num_accepted_insertions"]
+        self._num_accepted_deletions = stats["num_accepted_deletions"]
+
+    def get_stats(self) -> dict:
+        """
+        Return the current sampler statistics as a dictionary.
+
+        Returns
+        -------
+
+        dict
+            Dictionary of sampler statistics.
+        """
+        return {
+            "num_moves": self._num_moves,
+            "num_accepted": self._num_accepted,
+            "num_insertions": self._num_insertions,
+            "num_deletions": self._num_deletions,
+            "num_accepted_attempts": self._num_accepted_attempts,
+            "num_accepted_insertions": self._num_accepted_insertions,
+            "num_accepted_deletions": self._num_accepted_deletions,
+        }
+
     def ghost_residues(self) -> _np.ndarray:
         """
-        Return the current indices of the ghost water residues in the OpenMM
-        context.
+        Return the residue indices of the current ghost waters in the input
+        topology. These are Sire/BioSimSpace residue indices and do not
+        include any virtual site particles that were added on context creation.
 
         Returns
         -------
@@ -1155,6 +1477,10 @@ def move(self, context: _openmm.Context) -> list[int]:
                 # We only need to get the positions and initial energy for the first
                 # batch. These will be updated dynamically as moves are accepted.
                 if num_batches == 1:
+                    # Detect GCMC LRC parameters from context on first call.
+                    if not self._has_gcmc_lrc:
+                        self._init_gcmc_lrc(context)
+
                     # Get the OpenMM state.
                     state = context.getState(getPositions=True, getEnergy=self._is_pme)
 
@@ -1168,6 +1494,13 @@ def move(self, context: _openmm.Context) -> list[int]:
                     else:
                         initial_energy = None
 
+                    # For NPT (μpT), update the box and recompute Adams values
+                    # from the current context state so that volume fluctuations
+                    # driven by the barostat are reflected in the acceptance criterion.
+                    if self._pressure is not None:
+                        self.set_box(state)
+                        self._compute_adams_values(state)
+
                     # Sample within the GCMC sphere.
                     if self._reference is not None and not self._is_bulk:
                         target = self._get_target_position(positions_angstrom).astype(
@@ -1276,7 +1609,10 @@ def move(self, context: _openmm.Context) -> list[int]:
             # Get pre-computed random numbers for this batch.
             batch_randoms = self._rng_manager.get_batch_randoms()
             randoms_rotation = self._backend.to_gpu(batch_randoms.rotation)
-            randoms_position = self._backend.to_gpu(batch_randoms.position)
+            randoms_position_sphere = self._backend.to_gpu(
+                batch_randoms.position_sphere
+            )
+            randoms_position_bulk = self._backend.to_gpu(batch_randoms.position_bulk)
             randoms_radius = self._backend.to_gpu(batch_randoms.radius)
 
             # Generate the random water positions and orientations.
@@ -1289,7 +1625,8 @@ def move(self, context: _openmm.Context) -> list[int]:
                 self._water_positions,
                 is_target,
                 randoms_rotation,
-                randoms_position,
+                randoms_position_sphere,
+                randoms_position_bulk,
                 randoms_radius,
                 self._gpu_cell_matrix,
                 block=(self._num_threads, 1, 1),
@@ -1323,9 +1660,11 @@ def move(self, context: _openmm.Context) -> list[int]:
                 self._rf_cutoff,
                 self._rf_kappa,
                 self._rf_correction,
-                self._sc_coulomb_power,
+                self._sc_softcore_form,
                 self._sc_shift_coulomb,
                 self._sc_shift_delta,
+                self._sc_taylor_power,
+                self._sc_beutler_alpha,
                 block=(self._num_threads, 1, 1),
                 grid=(self._atom_blocks, self._batch_size, 1),
             )
@@ -1401,6 +1740,10 @@ def move(self, context: _openmm.Context) -> list[int]:
 
                 # Insertion move.
                 if is_deletion[idx] == 0:
+                    # Capture n_w before the insertion for LRC delta calculation.
+                    if self._has_gcmc_lrc:
+                        n_w_before_insert = context.getParameter("n_w")
+
                     # Accept the move.
                     self._accept_insertion(
                         idx, idx_water, positions_openmm, positions_angstrom, context
@@ -1428,6 +1771,19 @@ def move(self, context: _openmm.Context) -> list[int]:
                             getEnergy=True
                         ).getPotentialEnergy()
 
+                        # Add the analytic LRC delta so the PME correction sees only
+                        # the RF→PME electrostatic difference.
+                        if self._has_gcmc_lrc:
+                            dLRC = (
+                                (
+                                    self._lrc_w_solute
+                                    + 2.0 * n_w_before_insert * self._lrc_ww_half
+                                )
+                                / self._v_nm3
+                                * _openmm.unit.kilojoules_per_mole
+                            )
+                            dE_RF += dLRC
+
                         # Compute the PME acceptance correction.
                         acc_prob = _np.exp(
                             min(
@@ -1488,6 +1844,10 @@ def move(self, context: _openmm.Context) -> list[int]:
 
                 # Deletion move.
                 else:
+                    # Capture n_w before the deletion for LRC delta calculation.
+                    if self._has_gcmc_lrc:
+                        n_w_before_delete = context.getParameter("n_w")
+
                     # Accept the move.
                     self._accept_deletion(candidates[idx], context)
 
@@ -1513,6 +1873,20 @@ def move(self, context: _openmm.Context) -> list[int]:
                             getEnergy=True
                         ).getPotentialEnergy()
 
+                        # Add the analytic LRC delta.
+                        if self._has_gcmc_lrc:
+                            dLRC = (
+                                -(
+                                    self._lrc_w_solute
+                                    + 2.0
+                                    * (n_w_before_delete - 1.0)
+                                    * self._lrc_ww_half
+                                )
+                                / self._v_nm3
+                                * _openmm.unit.kilojoules_per_mole
+                            )
+                            dE_RF += dLRC
+
                         # Compute the PME acceptance correction.
                         acc_prob = _np.exp(
                             min(
@@ -1626,8 +2000,8 @@ def _validate_sire_unit(parameter: str, value: str, unit: _Any) -> _Any:
         parameter: str
             The name of the parameter.
 
-        value: str
-            The value to validate.
+        value: str, sire.units.GeneralUnit
+            The value or GeneralUnit to validate.
 
         unit: str
             The unit to validate.
@@ -1639,8 +2013,10 @@ def _validate_sire_unit(parameter: str, value: str, unit: _Any) -> _Any:
             The validated unit.
         """
 
-        if not isinstance(value, str):
-            raise ValueError(f"'{parameter}' must be of type 'str'")
+        if not isinstance(value, (str, _sr.units.GeneralUnit)):
+            raise ValueError(
+                f"'{parameter}' must be of type 'str' or 'sire.units.GeneralUnit'"
+            )
 
         try:
             u = _sr.u(value)
@@ -1708,6 +2084,75 @@ def _get_box_information(self, space):
 
         return cell_matrix, cell_matrix_inverse, M
 
+    @staticmethod
+    def _get_vsite_offsets(system):
+        """
+        Compute per-atom OpenMM index offsets due to virtual sites.
+
+        In OpenMM, virtual site particles are appended after the real atoms
+        of each molecule. Molecules that appear after a molecule with virtual
+        sites therefore have their OpenMM particle indices shifted relative
+        to their Sire atom indices.
+
+        Parameters
+        ----------
+
+        system: sire.system.System
+            The molecular system.
+
+        Returns
+        -------
+
+        total_vsites: int
+            Total number of virtual site particles in the system.
+
+        atom_offsets: numpy.ndarray
+            Array of shape (num_sire_atoms,) where entry i is the
+            cumulative number of virtual sites in all molecules that
+            precede the molecule containing Sire atom i. Adding this
+            offset to a Sire atom index yields the corresponding OpenMM
+            particle index.
+
+        mol_vsite_charges: dict
+            Mapping from molecule number to a list of virtual site charges in
+            units of elementary charge. Only molecules that carry virtual sites
+            appear as keys; all other molecules are absent from the dict.
+        """
+        n_sire_atoms = system.num_atoms()
+        atom_offsets = _np.zeros(n_sire_atoms, dtype=_np.int32)
+        mol_vsite_charges = {}
+        total_vsites = 0
+
+        try:
+            vsite_mols = system["property n_virtual_sites"].molecules()
+        except Exception:
+            # No molecules carry virtual sites.
+            return 0, atom_offsets, mol_vsite_charges
+
+        all_atoms = system.atoms()
+
+        for mol in vsite_mols:
+            n_vs = int(mol.property("n_virtual_sites"))
+            if n_vs <= 0:
+                continue
+
+            # Locate where this molecule's atoms sit in the global index space,
+            # then shift every subsequent atom's offset by n_vs in one operation.
+            mol_start = int(_np.array(all_atoms.find(mol.atoms()))[0])
+            mol_end = mol_start + mol.num_atoms()
+            atom_offsets[mol_end:] += n_vs
+            total_vsites += n_vs
+
+            try:
+                raw_charges = mol.property("vs_charges")
+                vs_charges = [float(raw_charges[k]) for k in range(n_vs)]
+            except Exception:
+                vs_charges = [0.0] * n_vs
+
+            mol_vsite_charges[mol.number()] = vs_charges
+
+        return total_vsites, atom_offsets, mol_vsite_charges
+
     @staticmethod
     def _get_reference_indices(system, reference):
         """
@@ -1860,6 +2305,10 @@ def _initialise_gpu_memory(self):
                     for q in mol.property("charge"):
                         charges[i] = q.value()
                         i += 1
+                    # Append virtual site charges (zero LJ, non-zero charge).
+                    for vc in self._mol_vsite_charges.get(mol.number(), []):
+                        charges[i] = vc
+                        i += 1
 
                 # Convert to a GPU array.
                 charges = self._backend.to_gpu(charges.astype(_np.float32))
@@ -1877,6 +2326,12 @@ def _initialise_gpu_memory(self):
                         sigmas[i] = lj.sigma().value()
                         epsilons[i] = lj.epsilon().value()
                         i += 1
+                    # Virtual sites have zero LJ. Use sigma=1.0 Å as a
+                    # nominal placeholder (epsilon=0 so it has no effect).
+                    for _ in self._mol_vsite_charges.get(mol.number(), []):
+                        sigmas[i] = 1.0
+                        epsilons[i] = 0.0
+                        i += 1
 
                 # Convert to GPU arrays.
                 sigmas = self._backend.to_gpu(sigmas.astype(_np.float32))
@@ -1900,6 +2355,15 @@ def _initialise_gpu_memory(self):
             # Link to the reference state.
             mols = _sr.morph.link_to_reference(self._system)
 
+            # Build map of extra options for the dynamics object.
+            _map = {}
+            if self._softcore_form == _SoftcoreForm.TAYLOR:
+                _map["use_taylor_softening"] = True
+                _map["taylor_power"] = self._taylor_power
+            elif self._softcore_form == _SoftcoreForm.BEUTLER:
+                _map["use_beutler_softening"] = True
+                _map["beutler_alpha"] = self._beutler_alpha
+
             # Create a dynamics object.
             d = mols.dynamics(
                 cutoff_type=self._cutoff,
@@ -1914,9 +2378,10 @@ def _initialise_gpu_memory(self):
                 rest2_selection=self._rest2_selection,
                 swap_end_states=self._swap_end_states,
                 platform="cpu",
+                map=_map,
             )
 
-            # Flags for the required force.
+            # Flag for the required force.
             has_gng = False
 
             # Find the required forces.
@@ -1946,7 +2411,7 @@ def _initialise_gpu_memory(self):
                 charges[i] = q
                 # Rescale and convert units.
                 sigmas[i] = _sr.u(f"{2.0 * half_sigma} nm").to("angstrom")
-                epsilons[i] = _sr.u(f"{(0.5 * two_sqrt_epsilon)**2} kJ/mol").to(
+                epsilons[i] = _sr.u(f"{(0.5 * two_sqrt_epsilon) ** 2} kJ/mol").to(
                     "kcal/mol"
                 )
                 # Store the softening parameter.
@@ -1979,8 +2444,9 @@ def _initialise_gpu_memory(self):
 
                         # This is a null LJ parameter.
                         if _np.isclose(lj.epsilon().value(), 0.0):
-                            idx = atoms.find(atom)
-                            is_ghost_fep[idx] = 1
+                            sire_idx = atoms.find(atom)
+                            omm_idx = sire_idx + int(self._vsite_atom_offsets[sire_idx])
+                            is_ghost_fep[omm_idx] = 1
 
                     # The charge at the perturbed state is zero.
                     elif _np.isclose(charge1, 0.0):
@@ -1989,8 +2455,9 @@ def _initialise_gpu_memory(self):
 
                         # This is a null LJ parameter.
                         if _np.isclose(lj.epsilon().value(), 0.0):
-                            idx = atoms.find(atom)
-                            is_ghost_fep[idx] = 1
+                            sire_idx = atoms.find(atom)
+                            omm_idx = sire_idx + int(self._vsite_atom_offsets[sire_idx])
+                            is_ghost_fep[omm_idx] = 1
 
             # Convert to GPU array.
             is_ghost_fep = self._backend.to_gpu(is_ghost_fep.astype(_np.int32))
@@ -2057,9 +2524,11 @@ def _initialise_gpu_memory(self):
         )
 
         # Store soft-core parameters as scalars.
-        self._sc_coulomb_power = _np.float32(self._coulomb_power)
+        self._sc_softcore_form = _np.int32(int(self._softcore_form))
         self._sc_shift_coulomb = _np.float32(self._shift_coulomb.value())
         self._sc_shift_delta = _np.float32(self._shift_delta.value())
+        self._sc_taylor_power = _np.int32(self._taylor_power)
+        self._sc_beutler_alpha = _np.float32(self._beutler_alpha)
 
         # Store immutable per-atom buffers on GPU.
         self._gpu_sigma = sigmas
@@ -2109,6 +2578,15 @@ def _initialise_gpu_memory(self):
             (1, self._num_waters), _np.int32
         )
 
+    def _init_gcmc_lrc(self, context):
+        """Detect and cache GCMC LRC parameters from the OpenMM context."""
+        try:
+            self._lrc_w_solute = context.getParameter("lrc_w_solute")
+            self._lrc_ww_half = context.getParameter("lrc_ww_half")
+            self._has_gcmc_lrc = True
+        except Exception:
+            self._has_gcmc_lrc = False
+
     def _accept_insertion(
         self, idx, idx_water, positions_openmm, positions_angstrom, context
     ):
@@ -2205,6 +2683,10 @@ def _accept_insertion(
         # Update the number of waters in the sampling volume.
         self._N += 1
 
+        # Update the GCMC LRC water count in the context.
+        if self._has_gcmc_lrc:
+            context.setParameter("n_w", context.getParameter("n_w") + 1.0)
+
     def _accept_deletion(self, idx, context):
         """
         Accept a deletion move.
@@ -2275,6 +2757,10 @@ def _accept_deletion(self, idx, context):
         # Update the number of waters in the sampling volume.
         self._N -= 1
 
+        # Update the GCMC LRC water count in the context.
+        if self._has_gcmc_lrc:
+            context.setParameter("n_w", context.getParameter("n_w") - 1.0)
+
     def _reject_deletion(self, idx, context):
         """
         Reject a deletion move.
@@ -2348,6 +2834,10 @@ def _reject_deletion(self, idx, context):
         # Update the number of waters in the sampling volume.
         self._N += 1
 
+        # Update the GCMC LRC water count in the context.
+        if self._has_gcmc_lrc:
+            context.setParameter("n_w", context.getParameter("n_w") + 1.0)
+
     def _set_water_state(self, context, indices=None, states=None, force=False):
         """
         Update the state for a list of waters. This can be used by external
@@ -2525,10 +3015,11 @@ def _set_nonbonded_forces(self, context):
                     self._nonbonded_force = force
                 elif self._is_fep and force.getName() == "GhostNonGhostNonbondedForce":
                     self._custom_nonbonded_force = force
-                elif "Barostat" in force.getName():
+                elif self._pressure is None and "Barostat" in force.getName():
                     msg = (
-                        f"GCMC must be used at constant volume: "
-                        f"'{force.getName()}' is not supported."
+                        f"A barostat was detected but no pressure was set: "
+                        f"'{force.getName()}' is not supported in the {_mu_sym}VT ensemble. "
+                        f"Pass pressure= to enable {_mu_sym}pT (osmotic) sampling."
                     )
                     _logger.error(msg)
                     raise TypeError(msg)
@@ -2730,8 +3221,12 @@ def _flag_ghost_waters(self, system):
         if not isinstance(system, _sr.system.System):
             raise ValueError("'system' must be a Sire system")
 
-        # Now extract the oxygen indices using cached ghost water indices.
-        ghost_oxygens = self._water_indices[self._get_ghost_waters()]
+        # Clone the system so that we don't modify the original.
+        system = system.clone()
+
+        # Use the Sire atom indices (no vsite offset) so that lookups into the
+        # input topology are correct regardless of virtual sites in the context.
+        ghost_oxygens = self._water_indices_sire[self._get_ghost_waters()]
 
         # Loop over the ghost waters and set the is_ghost property.
         for i in ghost_oxygens:
diff --git a/src/loch/_softcore.py b/src/loch/_softcore.py
new file mode 100644
index 0000000..40af129
--- /dev/null
+++ b/src/loch/_softcore.py
@@ -0,0 +1,32 @@
+######################################################################
+# Loch: GPU accelerated GCMC water sampling engine.
+#
+# Copyright: 2025-2026
+#
+# Authors: The OpenBioSim Team <team@openbiosim.org>
+#
+# Loch is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Loch is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Loch. If not, see <http://www.gnu.org/licenses/>.
+#####################################################################
+
+__all__ = ["SoftcoreForm"]
+
+from enum import IntEnum
+
+
+class SoftcoreForm(IntEnum):
+    """Enum for the soft-core potential form used for alchemical interactions."""
+
+    ZACHARIAS = 0
+    TAYLOR = 1
+    BEUTLER = 2
diff --git a/src/loch/_utils.py b/src/loch/_utils.py
index 3defbac..5f6fe85 100644
--- a/src/loch/_utils.py
+++ b/src/loch/_utils.py
@@ -36,6 +36,7 @@ def excess_chemical_potential(
     runtime: str = "5 ns",
     num_lambda: int = 24,
     replica_exchange: bool = False,
+    use_dispersion_correction: bool = False,
     work_dir: _Optional[str] = None,
 ) -> float:
     """
@@ -66,6 +67,9 @@ def excess_chemical_potential(
     replica_exchange: bool, optional
         Whether to use replica exchange during the calculation (default is False).
 
+    use_dispersion_correction: bool, optional
+        Whether to include the long-range dispersion correction (default is False).
+
     work_dir: str, optional
         Working directory for the decoupling simulation (default is None,
         which uses a temporary directory).
@@ -120,6 +124,9 @@ def excess_chemical_potential(
     if not isinstance(replica_exchange, bool):
         raise TypeError("'replica_exchange' must be a of type 'bool'.")
 
+    if not isinstance(use_dispersion_correction, bool):
+        raise TypeError("'use_dispersion_correction' must be a of type 'bool'.")
+
     if work_dir is not None:
         if not isinstance(work_dir, str):
             raise TypeError("'work_dir' must be a of type 'str'.")
@@ -154,7 +161,9 @@ def excess_chemical_potential(
             runtime=runtime,
             timestep="2 fs",
             h_mass_factor=1,
-            shift_delta="2.25 A",
+            lambda_schedule="decouple",
+            softcore_form="beutler",
+            use_dispersion_correction=use_dispersion_correction,
             output_directory=work_dir,
         )
     except Exception as e:
@@ -173,33 +182,6 @@ def excess_chemical_potential(
     system.update(mol)
     system = sr.morph.link_to_reference(system)
 
-    # Get the lambda schedule from the molecule.
-    s = mol.property("schedule")
-
-    # Avoid scaling kappa during decouple stage.
-    s.set_equation(stage="decouple", lever="kappa", force="ghost/ghost", equation=0)
-    s.set_equation(stage="decouple", lever="kappa", force="ghost-14", equation=0)
-
-    # Add new discharging stage.
-    s.set_equation(stage="decouple", lever="charge", equation=s.final())
-    s.prepend_stage("decharge", s.initial())
-    s.set_equation(
-        stage="decharge",
-        lever="charge",
-        equation=s.lam() * s.final() + s.initial() * (1 - s.lam()),
-    )
-    s.set_equation(stage="decharge", force="ghost/ghost", equation=s.initial())
-    s.set_equation(stage="decharge", force="ghost-14", equation=s.initial())
-    s.set_equation(
-        stage="decharge", lever="kappa", force="ghost/ghost", equation=-s.lam() + 1
-    )
-    s.set_equation(
-        stage="decharge", lever="kappa", force="ghost-14", equation=-s.lam() + 1
-    )
-
-    # Update the schedule in the configuration.
-    config.lambda_schedule = s
-
     # Set up the runner.
     try:
         runner = Runner(system, config)
@@ -229,6 +211,7 @@ def standard_volume(
     cutoff: str = "10 A",
     num_samples: int = 5000,
     sample_interval: str = "1 ps",
+    use_dispersion_correction: bool = False,
 ) -> float:
     """
     Calculate the standard volume of water at the given temperature and pressure.
@@ -239,21 +222,24 @@ def standard_volume(
     system: sire.system.System
         The bulk water system.
 
-    temperature : str, optional
+    temperature: str, optional
         Temperature of the system (default is "298 K").
 
-    pressure : str, optional
+    pressure: str, optional
         Pressure of the system (default is "1 bar").
 
-    cutoff : str, optional
+    cutoff: str, optional
         Non-bonded interaction cutoff distance (default is "10 A").
 
-    num_samples : int, optional
+    num_samples: int, optional
         Number of volume samples to collect (default is 5000).
 
-    sample_interval : str, optional
+    sample_interval: str, optional
         Interval at which to sample the volume (default is "1 ps").
 
+    use_dispersion_correction: bool, optional
+        Whether to include the long-range dispersion correction (default is False).
+
     Returns
     -------
 
@@ -304,6 +290,9 @@ def standard_volume(
     if not u.has_same_units(sr.units.picosecond):
         raise ValueError("'sample_interval' has incorrect units.")
 
+    if not isinstance(use_dispersion_correction, bool):
+        raise TypeError("'use_dispersion_correction' must be a of type 'bool'.")
+
     # Disable the dynamics progress bar.
     sr.base.ProgressBar.set_silent()
 
@@ -319,7 +308,12 @@ def standard_volume(
 
     # Set up the NPT simulation.
     try:
-        d = system.dynamics(temperature=temperature, pressure=pressure, timestep="2 fs")
+        d = system.dynamics(
+            temperature=temperature,
+            pressure=pressure,
+            timestep="2 fs",
+            map={"use_dispersion_correction": use_dispersion_correction},
+        )
     except Exception as e:
         raise ValueError(f"Unable to set up NPT dynamics: {e}")
 
diff --git a/tests/test_energy.py b/tests/test_energy.py
index b2721dc..703a8a2 100644
--- a/tests/test_energy.py
+++ b/tests/test_energy.py
@@ -1,11 +1,13 @@
 import math
 import openmm
+import openmm.unit as omm_unit
 import os
 import pytest
 
+import numpy as np
 import sire as sr
 
-from loch import GCMCSampler
+from loch import GCMCSampler, SoftcoreForm
 
 
 @pytest.mark.skipif(
@@ -13,8 +15,17 @@
     reason="Requires CUDA enabled GPU.",
 )
 @pytest.mark.parametrize("platform", ["cuda", "opencl"])
-@pytest.mark.parametrize("fixture", ["water_box", "bpti", "sd12"])
-def test_energy(fixture, platform, request):
+@pytest.mark.parametrize(
+    "fixture,softcore_form",
+    [
+        ("water_box", "zacharias"),
+        ("bpti", "zacharias"),
+        ("sd12", "zacharias"),
+        ("sd12", "taylor"),
+        ("sd12", "beutler"),
+    ],
+)
+def test_energy(fixture, softcore_form, platform, request):
     """
     Test that the RF energy difference agrees with OpenMM.
     """
@@ -36,6 +47,7 @@ def test_energy(fixture, platform, request):
         reference=reference,
         lambda_schedule=schedule,
         lambda_value=lambda_value,
+        softcore_form=softcore_form,
         log_level="debug",
         ghost_file=None,
         log_file=None,
@@ -43,6 +55,14 @@ def test_energy(fixture, platform, request):
         platform=platform,
     )
 
+    # Build map of extra options for the dynamics object.
+    dyn_map = {}
+    if sampler._softcore_form == SoftcoreForm.TAYLOR:
+        dyn_map["use_taylor_softening"] = True
+    elif sampler._softcore_form == SoftcoreForm.BEUTLER:
+        dyn_map["use_beutler_softening"] = True
+        dyn_map["beutler_alpha"] = sampler._beutler_alpha
+
     # Create a dynamics object using the modified GCMC system.
     d = sampler.system().dynamics(
         cutoff_type="rf",
@@ -53,10 +73,10 @@ def test_energy(fixture, platform, request):
         timestep="2 fs",
         schedule=schedule,
         lambda_value=lambda_value,
-        coulomb_power=sampler._coulomb_power,
         shift_coulomb=str(sampler._shift_coulomb),
         shift_delta=str(sampler._shift_delta),
         platform=platform,
+        map=dyn_map,
     )
 
     # Loop until we accept an insertion move.
@@ -212,7 +232,6 @@ def test_platform_consistency(fixture, request):
         timestep="2 fs",
         schedule=schedule,
         lambda_value=lambda_value,
-        coulomb_power=cuda_sampler._coulomb_power,
         shift_coulomb=str(cuda_sampler._shift_coulomb),
         shift_delta=str(cuda_sampler._shift_delta),
         platform="cuda",
@@ -227,7 +246,6 @@ def test_platform_consistency(fixture, request):
         timestep="2 fs",
         schedule=schedule,
         lambda_value=lambda_value,
-        coulomb_power=opencl_sampler._coulomb_power,
         shift_coulomb=str(opencl_sampler._shift_coulomb),
         shift_delta=str(opencl_sampler._shift_delta),
         platform="opencl",
@@ -264,22 +282,22 @@ def test_platform_consistency(fixture, request):
     relative_diff = abs(cuda_energy - opencl_energy) / max(
         abs(cuda_energy), abs(opencl_energy), 1.0
     )
-    assert (
-        relative_diff < 0.001
-    ), f"Platform energies differ: CUDA={cuda_energy:.6f}, OpenCL={opencl_energy:.6f}, relative_diff={relative_diff:.6f}"
+    assert relative_diff < 0.001, (
+        f"Platform energies differ: CUDA={cuda_energy:.6f}, OpenCL={opencl_energy:.6f}, relative_diff={relative_diff:.6f}"
+    )
 
 
-# Reference energy values captured with seed=42 on the original kernel implementation.
+# Reference energy values captured with seed=42.
 # These anchor the kernel output to exact values so that refactors (e.g. moving from
 # __device__ static arrays to buffer arguments) can be validated.
 _REFERENCE_ENERGIES = {
     "water_box": {
-        "energy_coul": -9.45853172201302,
-        "energy_lj": 3.2191088,
+        "energy_coul": -4.674884,
+        "energy_lj": 0.82380486,
     },
     "bpti": {
-        "energy_coul": -15.377882774621897,
-        "energy_lj": -0.58867246,
+        "energy_coul": -13.205343,
+        "energy_lj": 5.061536,
     },
 }
 
@@ -332,7 +350,6 @@ def test_energy_regression(fixture, platform, request):
         timestep="2 fs",
         schedule=schedule,
         lambda_value=lambda_value,
-        coulomb_power=sampler._coulomb_power,
         shift_coulomb=str(sampler._shift_coulomb),
         shift_delta=str(sampler._shift_delta),
         platform=platform,
@@ -351,12 +368,12 @@ def test_energy_regression(fixture, platform, request):
 
     # Check against reference values.
     ref = _REFERENCE_ENERGIES[fixture]
-    assert math.isclose(
-        energy_coul, ref["energy_coul"], abs_tol=1e-4
-    ), f"Coulomb energy changed: {energy_coul!r} != {ref['energy_coul']!r}"
-    assert math.isclose(
-        energy_lj, ref["energy_lj"], abs_tol=1e-4
-    ), f"LJ energy changed: {energy_lj!r} != {ref['energy_lj']!r}"
+    assert math.isclose(energy_coul, ref["energy_coul"], abs_tol=1e-4), (
+        f"Coulomb energy changed: {energy_coul!r} != {ref['energy_coul']!r}"
+    )
+    assert math.isclose(energy_lj, ref["energy_lj"], abs_tol=1e-4), (
+        f"LJ energy changed: {energy_lj!r} != {ref['energy_lj']!r}"
+    )
 
 
 @pytest.mark.skipif(
@@ -399,7 +416,6 @@ def _create_and_run(seed):
             timestep="2 fs",
             schedule=schedule,
             lambda_value=0.5,
-            coulomb_power=sampler._coulomb_power,
             shift_coulomb=str(sampler._shift_coulomb),
             shift_delta=str(sampler._shift_delta),
             platform=platform,
@@ -438,9 +454,165 @@ def _create_and_run(seed):
     energy2_coul = sampler2._debug["energy_coul"]
     energy2_lj = sampler2._debug["energy_lj"]
 
-    assert math.isclose(
-        energy1_coul, energy2_coul, abs_tol=1e-4
-    ), f"Coulomb energy mismatch: {energy1_coul!r} vs {energy2_coul!r}"
-    assert math.isclose(
-        energy1_lj, energy2_lj, abs_tol=1e-4
-    ), f"LJ energy mismatch: {energy1_lj!r} vs {energy2_lj!r}"
+    assert math.isclose(energy1_coul, energy2_coul, abs_tol=1e-4), (
+        f"Coulomb energy mismatch: {energy1_coul!r} vs {energy2_coul!r}"
+    )
+    assert math.isclose(energy1_lj, energy2_lj, abs_tol=1e-4), (
+        f"LJ energy mismatch: {energy1_lj!r} vs {energy2_lj!r}"
+    )
+
+
+@pytest.mark.skipif(
+    "CUDA_VISIBLE_DEVICES" not in os.environ,
+    reason="Requires CUDA enabled GPU.",
+)
+@pytest.mark.parametrize("platform", ["cuda", "opencl"])
+def test_update_system_insertion(platform, water_box):
+    """
+    After an accepted insertion of a ghost buffer water, update_system() must:
+      - restore real LJ/charge parameters on the inserted water, and
+      - reflect the current OpenMM coordinates for that water.
+    """
+    mols, reference = water_box
+
+    sampler = GCMCSampler(
+        mols,
+        cutoff_type="rf",
+        cutoff="10 A",
+        reference=reference,
+        log_level="debug",
+        ghost_file=None,
+        log_file=None,
+        test=True,
+        platform=platform,
+    )
+
+    d = sampler.system().dynamics(
+        cutoff_type="rf",
+        cutoff="10 A",
+        temperature="298 K",
+        pressure=None,
+        constraint="h_bonds",
+        timestep="2 fs",
+        platform=platform,
+    )
+
+    ctx = d.context()
+    first_ghost_buffer = sampler._num_waters - sampler._num_ghost_waters
+
+    # Loop until a ghost buffer water is inserted.
+    inserted_idx = None
+    while inserted_idx is None:
+        state_before = sampler.water_state()
+        moves = sampler.move(ctx)
+        if len(moves) == 0 or moves[0] != 0:
+            continue
+        state_after = sampler.water_state()
+        # Find a buffer-slot water that changed from 0 → 1.
+        for i in range(first_ghost_buffer, sampler._num_waters):
+            if state_before[i] == 0 and state_after[i] == 1:
+                inserted_idx = i
+                break
+
+    assert inserted_idx is not None, "No buffer water was inserted"
+
+    # Call update_system and get the returned system.
+    system = sampler.update_system(ctx)
+
+    # Get the sire atom index (no vsite offset) of the inserted water's oxygen.
+    o_idx = int(sampler._water_indices_sire[inserted_idx])
+    inserted_mol = system[system.atoms()[o_idx].molecule()]
+
+    # Check parameters are real (non-zero).
+    for j, atom in enumerate(inserted_mol.atoms()):
+        charge = atom.property("charge").value()
+        lj = atom.property("LJ")
+        epsilon = lj.epsilon().value()
+
+        assert charge == pytest.approx(sampler._water_charge[j], abs=1e-6), (
+            f"Atom {j} charge not restored: got {charge}, expected {sampler._water_charge[j]}"
+        )
+        assert epsilon == pytest.approx(sampler._water_epsilon[j], abs=1e-6), (
+            f"Atom {j} epsilon not restored: got {epsilon}, expected {sampler._water_epsilon[j]}"
+        )
+
+    # Check coordinates match the OpenMM context.
+    omm_positions = (
+        ctx.getState(getPositions=True, enforcePeriodicBox=False)
+        .getPositions(asNumpy=True)
+        .value_in_unit(omm_unit.angstrom)
+    )
+
+    # _water_indices uses the vsite-offset index into the OpenMM atom list.
+    omm_o_idx = int(sampler._water_indices[inserted_idx])
+
+    for j, atom in enumerate(inserted_mol.atoms()):
+        sire_coord = atom.property("coordinates")
+        sire_xyz = np.array(
+            [sire_coord.x().value(), sire_coord.y().value(), sire_coord.z().value()]
+        )
+        omm_xyz = omm_positions[omm_o_idx + j]
+        assert np.allclose(sire_xyz, omm_xyz, atol=1e-3), (
+            f"Atom {j} coordinates mismatch: sire={sire_xyz}, omm={omm_xyz}"
+        )
+
+
+@pytest.mark.skipif(
+    "CUDA_VISIBLE_DEVICES" not in os.environ,
+    reason="Requires CUDA enabled GPU.",
+)
+@pytest.mark.parametrize("platform", ["cuda", "opencl"])
+def test_finalise_system(platform, water_box):
+    """
+    After an accepted insertion of a ghost buffer water, finalise_system() must
+    return a system whose water count equals the number of real (non-ghost)
+    waters in the sampler.
+    """
+    mols, reference = water_box
+
+    sampler = GCMCSampler(
+        mols,
+        cutoff_type="rf",
+        cutoff="10 A",
+        reference=reference,
+        log_level="debug",
+        ghost_file=None,
+        log_file=None,
+        test=True,
+        platform=platform,
+    )
+
+    d = sampler.system().dynamics(
+        cutoff_type="rf",
+        cutoff="10 A",
+        temperature="298 K",
+        pressure=None,
+        constraint="h_bonds",
+        timestep="2 fs",
+        platform=platform,
+    )
+
+    ctx = d.context()
+    first_ghost_buffer = sampler._num_waters - sampler._num_ghost_waters
+
+    # Loop until a ghost buffer water is inserted.
+    inserted = False
+    while not inserted:
+        state_before = sampler.water_state()
+        moves = sampler.move(ctx)
+        if len(moves) == 0 or moves[0] != 0:
+            continue
+        state_after = sampler.water_state()
+        for i in range(first_ghost_buffer, sampler._num_waters):
+            if state_before[i] == 0 and state_after[i] == 1:
+                inserted = True
+                break
+
+    system = sampler.finalise_system(ctx)
+
+    expected_waters = int(sampler._get_non_ghost_waters().size)
+    actual_waters = system["water"].num_molecules()
+
+    assert actual_waters == expected_waters, (
+        f"Water count mismatch: got {actual_waters}, expected {expected_waters}"
+    )
diff --git a/tests/test_osmotic.py b/tests/test_osmotic.py
new file mode 100644
index 0000000..840801a
--- /dev/null
+++ b/tests/test_osmotic.py
@@ -0,0 +1,70 @@
+import os
+
+import openmm
+import pytest
+
+from loch import GCMCSampler
+
+
+@pytest.mark.skipif(
+    "CUDA_VISIBLE_DEVICES" not in os.environ,
+    reason="Requires CUDA enabled GPU.",
+)
+@pytest.mark.parametrize("platform", ["cuda", "opencl"])
+def test_osmotic_ensemble(water_box, platform):
+    """
+    When pressure is set, move() updates box parameters and Adams values
+    from the current OpenMM context state after each call.
+
+    Two moves are performed with the box scaled between them. After the
+    second move, _v_nm3, _exp_B_bulk, and _exp_minus_B_bulk must all
+    reflect the new volume.
+    """
+    mols, _ = water_box
+
+    sampler = GCMCSampler(
+        mols,
+        cutoff_type="rf",
+        cutoff="10 A",
+        pressure="1 atm",
+        ghost_file=None,
+        log_file=None,
+        test=True,
+        platform=platform,
+        seed=42,
+    )
+
+    # NPT dynamics so the context carries a barostat (bypasses the muVT guard).
+    d = sampler.system().dynamics(
+        cutoff_type="rf",
+        cutoff="10 A",
+        temperature="298 K",
+        pressure="1 atm",
+        constraint="h_bonds",
+        timestep="2 fs",
+        platform=platform,
+    )
+    context = d.context()
+
+    # First move: record initial box-dependent values.
+    sampler.move(context)
+    v_nm3_before = sampler._v_nm3
+    exp_B_bulk_before = sampler._exp_B_bulk
+    exp_minus_B_bulk_before = sampler._exp_minus_B_bulk
+
+    # Scale the box uniformly to simulate a barostat volume move.
+    scale = 1.1
+    box = context.getState().getPeriodicBoxVectors(asNumpy=True)
+    box_nm = box.value_in_unit(openmm.unit.nanometer)
+    context.setPeriodicBoxVectors(
+        openmm.Vec3(*box_nm[0] * scale) * openmm.unit.nanometer,
+        openmm.Vec3(*box_nm[1] * scale) * openmm.unit.nanometer,
+        openmm.Vec3(*box_nm[2] * scale) * openmm.unit.nanometer,
+    )
+
+    # Second move: box parameters must now reflect the scaled volume.
+    sampler.move(context)
+
+    assert sampler._v_nm3 == pytest.approx(v_nm3_before * scale**3, rel=1e-5)
+    assert sampler._exp_B_bulk != exp_B_bulk_before
+    assert sampler._exp_minus_B_bulk != exp_minus_B_bulk_before
diff --git a/tests/test_platform.py b/tests/test_platform.py
new file mode 100644
index 0000000..81649b4
--- /dev/null
+++ b/tests/test_platform.py
@@ -0,0 +1,93 @@
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+# Skip the entire module if PyCUDA is not installed.
+pytest.importorskip("pycuda")
+
+
+def _make_backend(mock_driver):
+    """Instantiate CUDAPlatform with a mocked PyCUDA driver."""
+    from loch._platforms._cuda import CUDAPlatform
+
+    mock_driver.Device.count.return_value = 1
+    mock_context = MagicMock()
+    mock_driver.Device.return_value.retain_primary_context.return_value = mock_context
+
+    backend = CUDAPlatform(
+        device=0,
+        num_points=3,
+        num_batch=10,
+        num_waters=5,
+        num_atoms=100,
+        num_threads=32,
+    )
+    return backend, mock_context
+
+
+class TestCUDAPushCount:
+    """Tests for CUDAPlatform context push-count tracking."""
+
+    def test_initial_push_count(self):
+        """Push count starts at 1 after __init__ (one push for the lifetime context)."""
+        with patch("loch._platforms._cuda._cuda") as mock_driver:
+            backend, mock_context = _make_backend(mock_driver)
+            assert backend._push_count == 1
+            mock_context.push.assert_called_once()
+
+    def test_push_increments_count(self):
+        """push_context() increments _push_count."""
+        with patch("loch._platforms._cuda._cuda") as mock_driver:
+            backend, _ = _make_backend(mock_driver)
+            backend.push_context()
+            assert backend._push_count == 2
+            backend.push_context()
+            assert backend._push_count == 3
+
+    def test_pop_decrements_count(self):
+        """pop_context() decrements _push_count."""
+        with patch("loch._platforms._cuda._cuda") as mock_driver:
+            backend, _ = _make_backend(mock_driver)
+            backend.push_context()
+            backend.pop_context()
+            assert backend._push_count == 1
+
+    def test_cleanup_pops_once_normally(self):
+        """cleanup() pops exactly once when no extra pushes are outstanding."""
+        with patch("loch._platforms._cuda._cuda") as mock_driver:
+            backend, mock_context = _make_backend(mock_driver)
+            backend.cleanup()
+            assert mock_context.pop.call_count == 1
+            assert backend._push_count == 0
+            assert backend._pycuda_context is None
+
+    def test_cleanup_pops_all_outstanding(self):
+        """cleanup() pops all outstanding pushes, simulating a crash mid-move."""
+        with patch("loch._platforms._cuda._cuda") as mock_driver:
+            backend, mock_context = _make_backend(mock_driver)
+            # Simulate two push_context() calls that were never popped (e.g.
+            # two GCMC moves crashed before their paired pop_context()).
+            backend.push_context()
+            backend.push_context()
+            assert backend._push_count == 3
+            backend.cleanup()
+            assert mock_context.pop.call_count == 3
+            assert backend._push_count == 0
+            assert backend._pycuda_context is None
+
+    def test_cleanup_handles_pop_exception(self):
+        """cleanup() continues safely if pop() raises (e.g. stack already empty)."""
+        with patch("loch._platforms._cuda._cuda") as mock_driver:
+            backend, mock_context = _make_backend(mock_driver)
+            mock_context.pop.side_effect = Exception("context stack is empty")
+            backend.cleanup()
+            assert backend._pycuda_context is None
+
+    def test_cleanup_idempotent(self):
+        """Calling cleanup() a second time is a no-op."""
+        with patch("loch._platforms._cuda._cuda") as mock_driver:
+            backend, mock_context = _make_backend(mock_driver)
+            backend.cleanup()
+            pop_count_after_first = mock_context.pop.call_count
+            backend.cleanup()
+            assert mock_context.pop.call_count == pop_count_after_first
diff --git a/tests/test_rng.py b/tests/test_rng.py
index cbb514f..6dda0a8 100644
--- a/tests/test_rng.py
+++ b/tests/test_rng.py
@@ -11,12 +11,14 @@ def test_dataclass_fields(self):
         """Test that BatchRandoms has the expected fields."""
         batch = BatchRandoms(
             rotation=np.zeros((10, 3)),
-            position=np.zeros((10, 3)),
+            position_sphere=np.zeros((10, 3)),
+            position_bulk=np.zeros((10, 3)),
             radius=np.zeros(10),
             acceptance=np.zeros(10),
         )
         assert hasattr(batch, "rotation")
-        assert hasattr(batch, "position")
+        assert hasattr(batch, "position_sphere")
+        assert hasattr(batch, "position_bulk")
         assert hasattr(batch, "radius")
         assert hasattr(batch, "acceptance")
 
@@ -32,7 +34,8 @@ def test_batch_shapes(self):
         batch = rng.get_batch_randoms()
 
         assert batch.rotation.shape == (batch_size, 3)
-        assert batch.position.shape == (batch_size, 3)
+        assert batch.position_sphere.shape == (batch_size, 3)
+        assert batch.position_bulk.shape == (batch_size, 3)
         assert batch.radius.shape == (batch_size,)
         assert batch.acceptance.shape == (batch_size,)
 
@@ -45,7 +48,8 @@ def test_batch_dtypes(self):
         batch = rng.get_batch_randoms()
 
         assert batch.rotation.dtype == np.float32
-        assert batch.position.dtype == np.float32
+        assert batch.position_sphere.dtype == np.float32
+        assert batch.position_bulk.dtype == np.float32
         assert batch.radius.dtype == np.float32
         assert batch.acceptance.dtype == np.float32
 
@@ -60,6 +64,7 @@ def test_uniform_range(self):
             batch = rng.get_batch_randoms()
 
             assert np.all(batch.rotation >= 0) and np.all(batch.rotation < 1)
+            assert np.all(batch.position_bulk >= 0) and np.all(batch.position_bulk < 1)
             assert np.all(batch.radius >= 0) and np.all(batch.radius < 1)
             assert np.all(batch.acceptance >= 0) and np.all(batch.acceptance < 1)
 
@@ -73,7 +78,7 @@ def test_normal_distribution(self):
         samples = []
         for _ in range(10):
             batch = rng.get_batch_randoms()
-            samples.append(batch.position.flatten())
+            samples.append(batch.position_sphere.flatten())
 
         all_samples = np.concatenate(samples)
 
@@ -92,7 +97,8 @@ def test_reproducibility_with_seed(self):
         batch2 = rng2.get_batch_randoms()
 
         np.testing.assert_array_equal(batch1.rotation, batch2.rotation)
-        np.testing.assert_array_equal(batch1.position, batch2.position)
+        np.testing.assert_array_equal(batch1.position_sphere, batch2.position_sphere)
+        np.testing.assert_array_equal(batch1.position_bulk, batch2.position_bulk)
         np.testing.assert_array_equal(batch1.radius, batch2.radius)
         np.testing.assert_array_equal(batch1.acceptance, batch2.acceptance)
 
diff --git a/tests/test_vsites.py b/tests/test_vsites.py
new file mode 100644
index 0000000..225b491
--- /dev/null
+++ b/tests/test_vsites.py
@@ -0,0 +1,210 @@
+import os
+
+import pytest
+import sire as sr
+
+from loch import GCMCSampler
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _add_vsites_to_mol(mol, n_vs):
+    """
+    Return a copy of mol with n_vs zero-charge virtual sites all parented to
+    atom 0.
+
+    Using atom 0 for all parent references works for any molecule regardless
+    of its size. Virtual sites have zero charge so they do not affect energies,
+    making it possible to compare sampler results with and without virtual sites.
+    """
+    n_atoms = mol.num_atoms()
+
+    # LocalCoordinatesSite requires three parent atoms; clamp to valid range so
+    # this helper works even for single-atom molecules (e.g. ions).
+    p1 = min(1, n_atoms - 1)
+    p2 = min(2, n_atoms - 1)
+
+    vsite_dict = {
+        str(k): {
+            "vs_indices": [0, p1, p2],
+            "vs_ows": [1, 0, 0],
+            "vs_xs": [1, -1, 0],
+            "vs_ys": [0, 1, -1],
+            # Offset each vsite slightly so their positions are distinct.
+            "vs_local": [(k + 1) * 0.03, 0, 0],
+        }
+        for k in range(n_vs)
+    }
+
+    # All vsites are children of atom 0.
+    parents = {str(i): [] for i in range(n_atoms)}
+    parents["0"] = list(range(n_vs))
+
+    cursor = mol.cursor()
+    cursor.set("n_virtual_sites", n_vs)
+    cursor.set("vs_charges", [0.0] * n_vs)
+    cursor.set("virtual_sites", vsite_dict)
+    cursor.set("parents", parents)
+    return cursor.commit()
+
+
+# ---------------------------------------------------------------------------
+# Unit tests for _get_vsite_offsets (no GPU required)
+# ---------------------------------------------------------------------------
+
+
+def test_get_vsite_offsets_no_vsites():
+    """
+    _get_vsite_offsets on a system with no virtual sites returns all-zero
+    offsets and empty per-molecule charge lists.
+    """
+    mols = sr.load_test_files("bpti.prm7", "bpti.rst7")
+
+    total_vsites, offsets, mol_vsite_charges = GCMCSampler._get_vsite_offsets(mols)
+
+    assert total_vsites == 0
+    assert offsets.shape == (mols.num_atoms(),)
+    assert (offsets == 0).all()
+    assert mol_vsite_charges == {}
+
+
+def test_get_vsite_offsets_with_vsites():
+    """
+    Adding N virtual sites to molecule 0 gives a zero offset for every atom
+    inside that molecule and an offset of N for all atoms in subsequent
+    molecules.
+    """
+    mols = sr.load_test_files("bpti.prm7", "bpti.rst7")
+
+    n_vs = 2
+    n_atoms_mol0 = mols[0].num_atoms()
+
+    mols_with_vs = mols.clone()
+    mols_with_vs.update(_add_vsites_to_mol(mols_with_vs[0], n_vs))
+
+    total_vsites, offsets, mol_vsite_charges = GCMCSampler._get_vsite_offsets(
+        mols_with_vs
+    )
+
+    assert total_vsites == n_vs
+
+    # Atoms in molecule 0 precede no vsite-bearing molecule, so offset is 0.
+    assert (offsets[:n_atoms_mol0] == 0).all()
+
+    # All atoms in molecules 1..N follow molecule 0 and are shifted by n_vs.
+    assert (offsets[n_atoms_mol0:] == n_vs).all()
+
+    # Only molecule 0 appears in the dict, with n_vs zero charges.
+    assert len(mol_vsite_charges) == 1
+    assert list(mol_vsite_charges.values())[0] == [0.0] * n_vs
+
+
+# ---------------------------------------------------------------------------
+# Integration tests (GPU required)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.skipif(
+    "CUDA_VISIBLE_DEVICES" not in os.environ,
+    reason="Requires CUDA enabled GPU.",
+)
+@pytest.mark.parametrize("platform", ["cuda", "opencl"])
+def test_vsite_index_offsets(bpti, platform):
+    """
+    GCMCSampler correctly offsets _water_indices and _reference_indices when
+    virtual sites are present on a preceding molecule.
+
+    In BPTI the protein is molecule 0 and all water molecules follow it.
+    Adding N vsites to the protein shifts every water's OpenMM particle index
+    by N. The reference atoms also live in the protein, so their OpenMM
+    indices are not shifted (no vsites precede molecule 0).
+    """
+    mols, reference = bpti
+
+    common_kwargs = dict(
+        reference=reference,
+        cutoff_type="rf",
+        cutoff="10 A",
+        ghost_file=None,
+        log_file=None,
+        test=True,
+        platform=platform,
+        seed=42,
+    )
+
+    # Baseline: no virtual sites.
+    baseline = GCMCSampler(mols, **common_kwargs)
+
+    # Add 2 zero-charge virtual sites to molecule 0 (the protein). All
+    # water molecules follow the protein, so their OpenMM indices shift by 2.
+    n_vs = 2
+    mols_with_vs = mols.clone()
+    mols_with_vs.update(_add_vsites_to_mol(mols_with_vs[0], n_vs))
+
+    sampler = GCMCSampler(mols_with_vs, **common_kwargs)
+
+    # Total vsite count must match what we added.
+    assert sampler._total_vsites == n_vs
+
+    # _num_atoms must include the virtual site particles.
+    assert sampler._num_atoms == baseline._num_atoms + n_vs
+
+    # Every water oxygen index is shifted by n_vs (waters follow the protein).
+    assert (sampler._water_indices == baseline._water_indices + n_vs).all()
+
+    # Reference atoms are inside molecule 0 (the protein), which has no
+    # preceding vsites, so their OpenMM indices are unchanged.
+    assert (sampler._reference_indices == baseline._reference_indices).all()
+
+
+@pytest.mark.skipif(
+    "CUDA_VISIBLE_DEVICES" not in os.environ,
+    reason="Requires CUDA enabled GPU.",
+)
+@pytest.mark.parametrize("platform", ["cuda", "opencl"])
+def test_flag_ghost_waters_sire_indices(bpti, platform):
+    """
+    _flag_ghost_waters must use Sire atom indices, not OpenMM particle indices.
+
+    With virtual sites present, OpenMM indices are larger than the corresponding
+    Sire atom indices. Using the OpenMM indices to look up atoms in the Sire
+    topology raises SireError::invalid_index. This test confirms that the
+    Sire-side indices are used and that _flag_ghost_waters completes without
+    error.
+    """
+    mols, reference = bpti
+
+    n_vs = 2
+    mols_with_vs = mols.clone()
+    mols_with_vs.update(_add_vsites_to_mol(mols_with_vs[0], n_vs))
+
+    sampler = GCMCSampler(
+        mols_with_vs,
+        reference=reference,
+        cutoff_type="rf",
+        cutoff="10 A",
+        ghost_file=None,
+        log_file=None,
+        test=True,
+        platform=platform,
+        seed=42,
+    )
+
+    n_sire_atoms = sampler.system().num_atoms()
+
+    # Sire indices must be within the topology's atom range.
+    assert (sampler._water_indices_sire < n_sire_atoms).all()
+
+    # OpenMM indices exceed the Sire range by n_vs, which is exactly the
+    # bug that would have caused SireError::invalid_index.
+    assert (sampler._water_indices == sampler._water_indices_sire + n_vs).all()
+
+    # _flag_ghost_waters must complete without raising an index error.
+    flagged = sampler._flag_ghost_waters(sampler.system())
+
+    # Every initially-ghost water molecule should be flagged.
+    n_flagged = len(flagged["property is_ghost_water"].molecules())
+    assert n_flagged == sampler._num_ghost_waters