diff --git a/.github/workflows/devel.yaml b/.github/workflows/devel.yaml index d25fb76b..c88ed73a 100644 --- a/.github/workflows/devel.yaml +++ b/.github/workflows/devel.yaml @@ -3,7 +3,7 @@ name: Release Devel on: workflow_dispatch: push: - branches: [ devel ] + branches: [devel] jobs: build: @@ -18,11 +18,13 @@ jobs: - { name: "linux", os: "ubuntu-latest", shell: "bash -l {0}" } - { name: "macos", os: "macos-latest", shell: "bash -l {0}" } exclude: - # Exclude all but the latest Python from all - # but Linux + # Exclude all but the latest Python from macOS - platform: { name: "macos", os: "macos-latest", shell: "bash -l {0}" } - python-version: "3.12" # MacOS can't run 3.12 yet... We want 3.10 and 3.11 + python-version: "3.10" + - platform: + { name: "macos", os: "macos-latest", shell: "bash -l {0}" } + python-version: "3.11" environment: name: somd2-build defaults: @@ -32,30 +34,43 @@ jobs: SIRE_DONT_PHONEHOME: 1 SIRE_SILENT_PHONEHOME: 1 steps: - - uses: conda-incubator/setup-miniconda@v3 + # + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + # + - name: Compute version info + shell: bash + run: python actions/update_recipe.py + # + - name: Create sdist + run: pip install build && python -m build --sdist && mv dist/*.tar.gz somd2-source.tar.gz + working-directory: ${{ github.workspace }} + # + - name: Install pixi + uses: prefix-dev/setup-pixi@v0.9.4 with: - auto-update-conda: true - python-version: ${{ matrix.python-version }} - activate-environment: somd2_build - miniforge-version: latest -# - - name: Clone the devel branch - run: git clone -b devel https://github.com/openbiosim/somd2 -# - - name: Setup Conda - run: conda install -y -c conda-forge boa anaconda-client packaging -# - - name: Update Conda recipe - run: python ${{ github.workspace }}/somd2/actions/update_recipe.py -# - - name: Prepare build location - run: mkdir ${{ github.workspace }}/build -# - - name: Build Conda package using conda build - run: conda build -c conda-forge -c openbiosim/label/dev ${{ github.workspace }}/somd2/recipes/somd2 -# - - name: Upload Conda package - run: python ${{ github.workspace }}/somd2/actions/upload_package.py + run-install: false + # + - name: Install rattler-build + shell: bash + run: pixi global install rattler-build + # + - name: Write Python variant config + shell: bash + run: printf 'python:\n - "${{ matrix.python-version }}"\n' > "${{ github.workspace }}/python_variant.yaml" + # + - name: Build package using rattler-build + shell: bash + run: rattler-build build --recipe "${{ github.workspace }}/recipes/somd2" -c conda-forge -c openbiosim/label/dev --variant-config "${{ github.workspace }}/python_variant.yaml" + # + - name: Install anaconda-client + shell: bash + run: python -m pip install anaconda-client + # + - name: Upload package + shell: bash + run: python actions/upload_package.py env: ANACONDA_TOKEN: ${{ secrets.ANACONDA_TOKEN }} ANACONDA_LABEL: dev diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 088186ed..a2e2fa50 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -20,10 +20,7 @@ jobs: platform: - { name: "linux", os: "ubuntu-latest", shell: "bash -l {0}" } - { name: "macos", os: "macos-latest", shell: "bash -l {0}" } - exclude: - - platform: - { name: "macos", os: "macos-latest", shell: "bash -l {0}" } - python-version: "3.12" # MacOS can't run 3.12 yet... + # No exclusions - release builds all combinations environment: name: somd2-build defaults: @@ -33,30 +30,45 @@ jobs: SIRE_DONT_PHONEHOME: 1 SIRE_SILENT_PHONEHOME: 1 steps: - - uses: conda-incubator/setup-miniconda@v3 + # + - uses: actions/checkout@v4 with: - auto-update-conda: true - python-version: ${{ matrix.python-version }} - activate-environment: somd2_build - miniforge-version: latest -# - - name: Clone the main branch - run: git clone -b main https://github.com/openbiosim/somd2 -# - - name: Setup Conda - run: conda install -y -c conda-forge boa anaconda-client packaging -# - - name: Update Conda recipe - run: python ${{ github.workspace }}/somd2/actions/update_recipe.py -# - - name: Prepare build location - run: mkdir ${{ github.workspace }}/build -# - - name: Build Conda package using conda build - run: conda build -c conda-forge -c openbiosim/label/main ${{ github.workspace }}/somd2/recipes/somd2 -# - - name: Upload Conda package - run: python ${{ github.workspace }}/somd2/actions/upload_package.py + ref: main + fetch-depth: 0 + # + - name: Compute version info + shell: bash + run: python actions/update_recipe.py + # + - name: Create sdist + run: pip install build && python -m build --sdist && mv dist/*.tar.gz somd2-source.tar.gz + working-directory: ${{ github.workspace }} + # + - name: Install pixi + uses: prefix-dev/setup-pixi@v0.9.4 + with: + run-install: false + # + - name: Install rattler-build + shell: bash + run: pixi global install rattler-build + # + - name: Write Python variant config + shell: bash + run: printf 'python:\n - "${{ matrix.python-version }}"\n' > "${{ github.workspace }}/python_variant.yaml" + # + - name: Build package using rattler-build + shell: bash + run: rattler-build build --recipe "${{ github.workspace }}/recipes/somd2" -c conda-forge -c openbiosim/label/main --variant-config "${{ github.workspace }}/python_variant.yaml" + # + - name: Install anaconda-client + shell: bash + run: python -m pip install anaconda-client + if: github.event.inputs.upload_packages == 'true' + # + - name: Upload package + shell: bash + run: python actions/upload_package.py env: ANACONDA_TOKEN: ${{ secrets.ANACONDA_TOKEN }} ANACONDA_LABEL: main diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 6191ca56..47cbc9f1 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -17,14 +17,13 @@ jobs: - { name: "linux", os: "ubuntu-latest", shell: "bash -l {0}" } - { name: "macos", os: "macos-latest", shell: "bash -l {0}" } exclude: - # Exclude all but the latest Python from all - # but Linux + # Exclude all but the latest Python from macOS - platform: { name: "macos", os: "macos-latest", shell: "bash -l {0}" } python-version: "3.10" - platform: { name: "macos", os: "macos-latest", shell: "bash -l {0}" } - python-version: "3.12" # MacOS can't run 3.12 yet... + python-version: "3.11" environment: name: somd2-build defaults: @@ -33,31 +32,39 @@ jobs: env: SIRE_DONT_PHONEHOME: 1 SIRE_SILENT_PHONEHOME: 1 - REPO: "${{ github.event.pull_request.head.repo.full_name || github.repository }}" steps: - - uses: conda-incubator/setup-miniconda@v3 + # + - uses: actions/checkout@v4 with: - auto-update-conda: true - python-version: ${{ matrix.python-version }} - activate-environment: somd2_build - miniforge-version: latest -# - - name: Clone the feature branch - run: git clone -b ${{ github.head_ref }} --single-branch https://github.com/${{ env.REPO }} somd2 -# - - name: Setup Conda - run: conda install -y -c conda-forge boa anaconda-client packaging -# - - name: Update Conda recipe - run: python ${{ github.workspace }}/somd2/actions/update_recipe.py -# - - name: Prepare build location - run: mkdir ${{ github.workspace }}/build -# - - name: Build Conda package using conda build using main channel + fetch-depth: 0 + # + - name: Compute version info + shell: bash + run: python actions/update_recipe.py + # + - name: Create sdist + run: pip install build && python -m build --sdist && mv dist/*.tar.gz somd2-source.tar.gz + working-directory: ${{ github.workspace }} + # + - name: Install pixi + uses: prefix-dev/setup-pixi@v0.9.4 + with: + run-install: false + # + - name: Install rattler-build + shell: bash + run: pixi global install rattler-build + # + - name: Write Python variant config + shell: bash + run: printf 'python:\n - "${{ matrix.python-version }}"\n' > "${{ github.workspace }}/python_variant.yaml" + # + - name: Build package using rattler-build (main channel) if: ${{ github.base_ref == 'main' }} - run: conda build -c conda-forge -c openbiosim/label/main ${{ github.workspace }}/somd2/recipes/somd2 -# - - name: Build Conda package using conda build using dev channel + shell: bash + run: rattler-build build --recipe "${{ github.workspace }}/recipes/somd2" -c conda-forge -c openbiosim/label/main --variant-config "${{ github.workspace }}/python_variant.yaml" + # + - name: Build package using rattler-build (dev channel) if: ${{ github.base_ref != 'main' }} - run: conda build -c conda-forge -c openbiosim/label/dev ${{ github.workspace }}/somd2/recipes/somd2 + shell: bash + run: rattler-build build --recipe "${{ github.workspace }}/recipes/somd2" -c conda-forge -c openbiosim/label/dev --variant-config "${{ github.workspace }}/python_variant.yaml" diff --git a/.gitignore b/.gitignore index e1a8d070..00840853 100644 --- a/.gitignore +++ b/.gitignore @@ -20,7 +20,6 @@ setup.err dist/ build/ somd2.egg-info -src/somd2/_version.py # Test output. output.yaml @@ -35,5 +34,5 @@ output.yaml # VSCode config .vscode/ -# Conda recipe (it is auto-generated) -recipes/somd2/meta.yaml +# Auto-generated version file +src/somd2/_version.py diff --git a/.img/somd2.png b/.img/somd2.png new file mode 100644 index 00000000..b2e9c40d Binary files /dev/null and b/.img/somd2.png differ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..61e8edfb --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,23 @@ +files: ^(src|tests)/ +exclude: ^tests/(input|output)/ + +repos: + # General file quality checks + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-added-large-files + args: [--maxkb=1000] # Prevent files larger than 1MB + - id: check-merge-conflict + + # Python formatting and linting + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.15.4 + hooks: + # Run the formatter + - id: ruff-format + # Run the linter (optional - remove if too strict) + - id: ruff + args: [--fix, --exit-zero] # Auto-fix but don't block commits diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..1df7d6ff --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,22 @@ +Changelog +========= + +[2026.1.0](https://github.com/openbiosim/somd2/compare/2025.1.0...2026.1.0) - Jun 2026 +-------------------------------------------------------------------------------------- + +* Improve constraint handling during minimisation and equilibration [#80](https://github.com/OpenBioSim/somd2/pull/80) +* Add support for GCMC on the OpenCL platform [#115](https://github.com/OpenBioSim/somd2/pull/115) +* Expose ring-breaking/making lambda schedules [#129](https://github.com/OpenBioSim/somd2/pull/129) +* Add support for Terminal Flip Monte Carlo [#138](https://github.com/OpenBioSim/somd2/pull/138) +* Add support for per-force energy decomposition [#143](https://github.com/OpenBioSim/somd2/pull/143) +* Add support for long-range dispersion correction and Beutler softcore [#147](https://github.com/OpenBioSim/somd2/pull/147) +* Add support for GCMC in the osmotic ensemble [#151](https://github.com/OpenBioSim/somd2/pull/151) +* Improve handling of simulation restarts via a `.done` sentinel file [#153](https://github.com/OpenBioSim/somd2/pull/153) +* Reduce checkpoint memory footprint by storing `NumPy` arrays in the replica exchange state pickle file [#155](https://github.com/OpenBioSim/somd2/pull/155) +* Remove redundant `s3` checkpoint files [#157](https://github.com/OpenBioSim/somd2/pull/157) +* Unconditionally apply AMBER water topology conversion to ensure fully rigid water constraints [#163](https://github.com/OpenBioSim/somd2/pull/163) + +[2025.1.0](https://github.com/OpenBioSim/somd2/releases/tag/2025.1.0) - Nov 2025 +------------------------------------------------------------------------------- + +* Initial public release. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..96737d30 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +graft tests diff --git a/README.md b/README.md index ab45be7b..6ec3b904 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,9 @@ +

+ + SOMD + +

+ # SOMD2 [![GitHub Actions](https://github.com/openbiosim/somd2/actions/workflows/devel.yaml/badge.svg)](https://github.com/openbiosim/somd2/actions/workflows/devel.yaml) @@ -9,42 +15,102 @@ simulations. Built on top of [Sire](https://github.com/OpenBioSim/sire) and [Ope ## Installation -First create a conda environment using the provided environment file: +### Conda package + +Install `somd2` directly from the `openbiosim` channel: ``` -conda env create -f environment.yaml +conda install -c conda-forge -c openbiosim somd2 ``` -(We recommend using [Miniforge](https://github.com/conda-forge/miniforge).) +Or, for the development version: -> [!NOTE] -> On macOS, you will need to use the `environment_macos.yaml` file instead. +``` +conda install -c conda-forge -c openbiosim/label/dev somd2 +``` -Now install `somd2` into the environment: +### Installing from source (standalone) + +To install from source using [pixi](https://pixi.sh), which will +automatically create an environment with all required dependencies +(including pre-built [Sire](https://github.com/OpenBioSim/sire), +[BioSimSpace](https://github.com/OpenBioSim/biosimspace), +[Ghostly](https://github.com/OpenBioSim/ghostly), and +[Loch](https://github.com/OpenBioSim/loch)): ``` -conda activate somd2 -pip install --editable . +git clone https://github.com/openbiosim/somd2 +cd somd2 +pixi install +pixi shell +pip install -e . ``` -Alternatively, to install into an existing conda environment: +### Installing from source (full OpenBioSim development) + +If you are developing across the full OpenBioSim stack, first install +[Sire](https://github.com/OpenBioSim/sire) from source by following the +instructions [here](https://github.com/OpenBioSim/sire#installation), then +activate its pixi environment: ``` -conda install -c conda-forge -c openbiosim somd2 +pixi shell --manifest-path /path/to/sire/pixi.toml -e dev ``` -Or, for the development version: +You may also need to install other packages from source, e.g. +[BioSimSpace](https://github.com/OpenBioSim/biosimspace), +[Ghostly](https://github.com/OpenBioSim/ghostly), and +[Loch](https://github.com/OpenBioSim/loch): ``` -conda install -c conda-forge -c openbiosim/label/dev somd2 +pip install -e /path/to/biosimspace +pip install -e /path/to/ghostly +pip install -e /path/to/loch +``` + +Then install `somd2` into the environment: + +``` +pip install -e . ``` +> [!Note] +> Pixi does not run conda post-link scripts, so the `ocl-icd-system` +> symlink needed for OpenCL won't be created automatically. After +> creating the environment (or after a pixi update), run the following +> to fix this: +> +> ```bash +> pixi shell +> ln -sfn /etc/OpenCL/vendors "${CONDA_PREFIX}/etc/OpenCL/vendors/ocl-icd-system" +> ``` + +### Testing + You should now have a `somd2` executable in your path. To test, run: ``` somd2 --help ``` +## Development + +Pre-commit hooks are used to ensure consistent code formatting and linting. +To set up pre-commit in your development environment: + +``` +pixi shell -e dev +pre-commit install +``` + +This will run [ruff](https://docs.astral.sh/ruff/) formatting and linting +checks automatically on each commit. To run the checks manually against all +files: + +``` +pre-commit run --all-files +``` + ## Usage In order to run an alchemical free-energy simulation you will need to @@ -145,13 +211,95 @@ somd2 --help | grep -A2 ' --gcmc' ``` > [!NOTE] -> GCMC is currently only supported when using the CUDA platform and isn't -> available on macOS, where the `pycuda` package is not available. +> GCMC is only supported when using the CUDA or OpenCL platforms. + +When using the CUDA platform, make sure that `nvcc` is in your `PATH`. If you +require a different `nvcc` to that provided by conda, you can set the +`PYCUDA_NVCC` environment variable to point to the desired `nvcc` binary. +Depending on your setup, you may also need to install the `cuda-nvvm` package +from `conda-forge`. + +## Terminal ring flip Monte Carlo + +SOMD2 supports terminal ring flip Monte Carlo (MC) moves to improve sampling +of terminal aromatic rings in perturbable ligands, as described in +[this paper](https://chemrxiv.org/doi/full/10.26434/chemrxiv-2025-2zkx5). +Each move attempts a discrete rotation of a terminal ring around the bond +connecting it to the rest of the molecule, accepted or rejected via the +Metropolis criterion. Terminal ring groups are detected automatically from +the molecular connectivity of perturbable molecules. + +To enable terminal flip MC, set the frequency at which moves are attempted: + +``` +somd2 perturbable_system.bss --terminal-flip-frequency "1 ps" +``` + +The flip angle for each group is determined automatically from the ring +geometry. To override this for all groups: + +``` +somd2 perturbable_system.bss --terminal-flip-frequency "1 ps" --terminal-flip-angle "180 degrees" +``` + +## Debugging with energy components + +To help diagnose simulation instabilities, `SOMD2` can record the potential +energy contribution from each OpenMM force group. This is enabled with the +`--save-energy-components` flag: + +``` +somd2 perturbable_system.bss --save-energy-components +``` + +One Parquet file per λ window is written to the output directory, named +`energy_components_.parquet`. Times are in nanoseconds and energies in +kcal/mol; both are stored as schema metadata in the file. + +The recording interval depends on the runner and active samplers: + +- **Replica exchange**: always `energy-frequency` +- **Standard runner, no MC**: `energy-frequency` +- **Standard runner, with MC**: the shortest active MC frequency, i.e. + `gcmc-frequency`, `terminal-flip-frequency`, or the smaller of the two + when both are active + +> [!NOTE] +> Energy components are written more frequently than checkpoint files and are +> not guarded by the file lock, so they may lead the checkpoint files by up +> to one `checkpoint-frequency` interval when copying output mid-simulation. -Make sure that `nvcc` is in your `PATH`. If you require a different `nvcc` to that -provided by conda, you can set the `PYCUDA_NVCC` environment variable to point -to the desired `nvcc` binary. Depending on your setup, you may also need to install -the `cuda-nvvm` package from `conda-forge`. +## Copying output files during a simulation + +When `SOMD2` writes checkpoint files it acquires an exclusive +[file lock](https://py-filelock.readthedocs.io) on `somd2.lock` inside the output +directory. This guarantees that checkpoint files are always in a consistent +state on disk. + +If you want to copy the output directory while a simulation is running (for +example, to create a backup or to inspect intermediate results), acquire the +same lock first so that you do not copy files mid-write. On Linux/macOS this +can be done with the `flock` command: + +```bash +flock /path/to/output/somd2.lock cp -r /path/to/output /destination +``` + +Or from Python using the [filelock](https://pypi.org/project/filelock/) package +(which `somd2` already depends on): + +```python +from filelock import FileLock + +with FileLock("/path/to/output/somd2.lock"): + # copy files here + ... +``` + +> [!NOTE] +> The `--timeout` option (default: `300 s`) controls how long `SOMD2` will +> wait to re-acquire the lock after your copy completes. If you hold the lock +> for longer than this, the simulation will raise a `Timeout` error. ## Analysis diff --git a/actions/update_recipe.py b/actions/update_recipe.py index ac3ff966..629369a0 100644 --- a/actions/update_recipe.py +++ b/actions/update_recipe.py @@ -1,50 +1,58 @@ -import sys +"""Compute git version info for rattler-build. + +This script computes GIT_DESCRIBE_TAG and GIT_DESCRIBE_NUMBER from the +git history and outputs them in GitHub Actions format for setting +environment variables. + +It also writes a _version.py file so that versioningit has a fallback +when .git is not available (e.g., when rattler-build excludes it). +""" + import os import subprocess +import sys -# Get the name of the script. script = os.path.abspath(sys.argv[0]) - -# we want to import the 'get_requirements' package from this directory -sys.path.insert(0, os.path.dirname(script)) - -# go up one directories to get the source directory -# (this script is in BioSimSpace/actions/) srcdir = os.path.dirname(os.path.dirname(script)) - -condadir = os.path.join(srcdir, "recipes", "somd2") - -print(f"conda recipe in {condadir}") - -# Store the name of the recipe and template YAML files. -recipe = os.path.join(condadir, "meta.yaml") -template = os.path.join(condadir, "template.yaml") - gitdir = os.path.join(srcdir, ".git") def run_cmd(cmd): - p = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE) - return str(p.stdout.read().decode("utf-8")).lstrip().rstrip() - - -# Get the remote. -remote = run_cmd( - f"git --git-dir={gitdir} --work-tree={srcdir} config --get remote.origin.url" -) -print(remote) - -# Get the branch. -branch = run_cmd( - f"git --git-dir={gitdir} --work-tree={srcdir} rev-parse --abbrev-ref HEAD" -) -print(branch) - -lines = open(template, "r").readlines() - -with open(recipe, "w") as FILE: - for line in lines: - line = line.replace("SOMD2_REMOTE", remote) - line = line.replace("SOMD2_BRANCH", branch) - - FILE.write(line) + p = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, _ = p.communicate() + return stdout.decode("utf-8").strip() + + +# Get the full git describe output (e.g., "2024.1.0-5-gabcdef" or "2024.1.0") +describe = run_cmd(f"git --git-dir={gitdir} --work-tree={srcdir} describe --tags") + +if "-" in describe: + # Format: tag-number-hash (e.g., "2024.1.0-5-gabcdef") + parts = describe.rsplit("-", 2) + tag = parts[0] + number = parts[1] + rev = parts[2] # e.g., "gabcdef" + version = f"{tag}+{number}.{rev}" +else: + # Exactly on a tag + tag = describe + number = "0" + version = tag + +print(f"GIT_DESCRIBE_TAG={tag}") +print(f"GIT_DESCRIBE_NUMBER={number}") +print(f"Version={version}") + +# Write to GITHUB_ENV if running in GitHub Actions +github_env = os.environ.get("GITHUB_ENV") +if github_env: + with open(github_env, "a") as f: + f.write(f"GIT_DESCRIBE_TAG={tag}\n") + f.write(f"GIT_DESCRIBE_NUMBER={number}\n") + print("Exported to GITHUB_ENV") + +# Write _version.py for versioningit fallback +version_file = os.path.join(srcdir, "src", "somd2", "_version.py") +with open(version_file, "w") as f: + f.write(f'__version__ = "{version}"\n') +print(f"Wrote {version_file}") diff --git a/actions/upload_package.py b/actions/upload_package.py index 799638cb..22547adc 100644 --- a/actions/upload_package.py +++ b/actions/upload_package.py @@ -1,16 +1,18 @@ +"""Upload built packages to the openbiosim Anaconda Cloud channel.""" + import os import sys import glob +import subprocess script = os.path.abspath(sys.argv[0]) -# go up one directories to get the source directory -# (this script is in somd2/actions/) +# Go up one directory to get the source directory. srcdir = os.path.dirname(os.path.dirname(script)) print(f"SOMD2 source is in {srcdir}\n") -# Get the anaconda token to authorise uploads +# Get the anaconda token to authorise uploads. if "ANACONDA_TOKEN" in os.environ: conda_token = os.environ["ANACONDA_TOKEN"] else: @@ -22,42 +24,30 @@ else: conda_label = "dev" -# get the root conda directory -conda = os.environ["CONDA"] - -# Set the path to the conda-bld directory. -conda_bld = os.path.join(conda, "envs", "somd2_build", "conda-bld") - -print(f"conda_bld = {conda_bld}") +# Search for rattler-build output first. +packages = glob.glob(os.path.join("output", "**", "*.conda"), recursive=True) -# Find the packages to upload -somd2_pkg = glob.glob(os.path.join(conda_bld, "*-*", "somd2-*.tar.bz2")) +# Fall back to conda-bld output. +if not packages: + if "CONDA" in os.environ: + conda = os.environ["CONDA"] + conda_bld = os.path.join(conda, "envs", "somd2_build", "conda-bld") + packages = glob.glob( + os.path.join(conda_bld, "**", "somd2-*.tar.bz2"), recursive=True + ) -if len(somd2_pkg) == 0: +if not packages: print("No somd2 packages to upload?") sys.exit(-1) -packages = somd2_pkg - -print(f"Uploading packages:") -print(" * ", "\n * ".join(packages)) - -packages = " ".join(packages) - - -def run_cmd(cmd): - import subprocess - - p = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE) - return str(p.stdout.read().decode("utf-8")).lstrip().rstrip() +print("Uploading packages:") +for pkg in packages: + print(f" * {pkg}") - -gitdir = os.path.join(srcdir, ".git") - -tag = run_cmd(f"git --git-dir={gitdir} --work-tree={srcdir} tag --contains") +packages_str = " ".join(packages) # Upload the packages to the openbiosim channel on Anaconda Cloud. -cmd = f"anaconda --token {conda_token} upload --user openbiosim --label {conda_label} --force {packages}" +cmd = f"anaconda --token {conda_token} upload --user openbiosim --label {conda_label} --force {packages_str}" print(f"\nUpload command:\n\n{cmd}\n") @@ -65,8 +55,12 @@ def run_cmd(cmd): print("Not uploading as the ANACONDA_TOKEN is not set!") sys.exit(-1) -output = run_cmd(cmd) -print(output) +def run_cmd(cmd): + p = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE) + return str(p.stdout.read().decode("utf-8")).lstrip().rstrip() + +output = run_cmd(cmd) +print(output) print("Package uploaded!") diff --git a/demo/bubble_tyk2.ipynb b/demo/bubble_tyk2.ipynb deleted file mode 100644 index e8f4aeb3..00000000 --- a/demo/bubble_tyk2.ipynb +++ /dev/null @@ -1,179 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "d086ec3c-3d0c-45b1-949f-0224e68043fe", - "metadata": {}, - "outputs": [], - "source": [ - "import sire as sr" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c2e8df62-7bcd-4028-a97e-51ed67530ce0", - "metadata": {}, - "outputs": [], - "source": [ - "mols = sr.stream.load(\"bound_31_42.bss\")\n", - "mols.add_shared_property(\"space\", mols.property(\"space\"))\n", - "timestep = \"1fs\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "169dbf60-fb80-469f-9859-d36ce6187ab4", - "metadata": {}, - "outputs": [], - "source": [ - "# link reference properties to main properties\n", - "for mol in mols.molecules(\"molecule property is_perturbable\"):\n", - " mols.update(mol.perturbation().link_to_reference().commit())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ccde94f0", - "metadata": {}, - "outputs": [], - "source": [ - "ligand = mols[\"molecule with property is_perturbable\"]\n", - "ligand" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f8dbad46", - "metadata": {}, - "outputs": [], - "source": [ - "ligand_center = ligand.evaluate().center()\n", - "radius = \"15 A\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0e3a9d11", - "metadata": {}, - "outputs": [], - "source": [ - "restraints = sr.restraints.positional(\n", - " mols,\n", - " f\"residues within {radius} of {ligand_center}\",\n", - " position=ligand_center,\n", - " r0=radius,\n", - " k=\"10 kcal mol-1 A-2\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9f711a43", - "metadata": {}, - "outputs": [], - "source": [ - "restraints" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "72927a60", - "metadata": {}, - "outputs": [], - "source": [ - "mols = (\n", - " mols.minimisation(\n", - " fixed=f\"not (residues within {radius} of {ligand_center})\",\n", - " restraints=restraints,\n", - " map={\"ignore_perturbations\": True},\n", - " )\n", - " .run()\n", - " .commit()\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "648ece01", - "metadata": {}, - "outputs": [], - "source": [ - "d = mols.dynamics(\n", - " timestep=timestep,\n", - " temperature=\"25oC\",\n", - " restraints=restraints,\n", - " fixed=f\"not (residues within {radius} of {ligand_center})\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "460f5a9c", - "metadata": {}, - "outputs": [], - "source": [ - "d.run(\"200ps\", save_frequency=\"1ps\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8c297b9d", - "metadata": {}, - "outputs": [], - "source": [ - "mols = d.commit()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a3af4a74", - "metadata": {}, - "outputs": [], - "source": [ - "sr.save(mols.trajectory()[0], \"tyk2_bubble\", format=[\"pdb\"])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c39e1ee9", - "metadata": {}, - "outputs": [], - "source": [ - "sr.save(mols.trajectory(), \"tyk2_bubble\", format=[\"DCD\"])" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/demo/first_example.ipynb b/demo/first_example.ipynb deleted file mode 100644 index 1d1290e6..00000000 --- a/demo/first_example.ipynb +++ /dev/null @@ -1,226 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Perturbable System first example\n", - "\n", - "This notebook will outline the basics of the new sire OpenMM functionality." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import BioSimSpace as BSS\n", - "import sire as sr" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Merged Molecules\n", - "This section will demonstrate the creation and visualisation of perturbations using BioSimSpace and sire, the system in this case will be a simple ethane → methanol transformation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ethane = BSS.Parameters.gaff(\"CC\").getMolecule()\n", - "methanol = BSS.Parameters.gaff(\"CO\").getMolecule()\n", - "mapping = BSS.Align.matchAtoms(ethane, methanol)\n", - "ethane = BSS.Align.rmsdAlign(ethane, methanol, mapping)\n", - "merged = BSS.Align.merge(ethane, methanol, mapping)\n", - "\n", - "solvated = BSS.Solvent.tip3p(molecule=merged, box=3 * [3 * BSS.Units.Length.nanometer])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Convert BioSimSpace to sire\n", - "sire_system = sr.convert.to(solvated, \"sire\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for mol in sire_system.molecules():\n", - " if mol.is_perturbable():\n", - " temp = mol\n", - "\n", - "temp.perturbation().view()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Running simulations of perturbed systems\n", - "\n", - "#### Once a perturbed molecule has been created `sire` can be used directly to run simulations and extract energy information.\n", - "\n", - "Here we will run a single simulation of the above perturbation at a lambda value of 0.5\n", - "By default, lambda behaviour is controlled by a simple morph, the same as `SOMD1`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import sire as sr" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Perturbable sire systems can be minimised directly at any chosen lambda value, functionality here is a wrapper around openmm minimisation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "m = sire_system.minimisation(lambda_val=0.5)\n", - "sire_system = m.run().commit()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Production simulations can also be run using sire dynamics - this is a simple wrapper around openMM, it adds convenience such as trajectory saving & automated calculation of energies\n", - "\n", - "Here, the `lambda_values` array is used to specify all lambda values at which the potential is to be calculated." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "d = sire_system.dynamics(lambda_value=0.5)\n", - "d.run(\"10ps\", energy_frequency=\"0.1ps\", lambda_windows=[0.0, 1.0])\n", - "sire_system = d.commit()\n", - "sire_system.energy_trajectory()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# OpenMM functionality\n", - "\n", - "Alternatively, the perturbable sire system can be converted to openMM, resulting in a `SOMMContext`, a simple wrapper around the `OpenMM::context` class containing information on the perturbation " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "omm = sr.convert.to(sire_system, \"openmm\")\n", - "omm" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Additional information regarding lambda can be set and called directly with this context" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "omm.set_lambda(0.5)\n", - "omm.get_lambda()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Simulations can then be run directly using this context, in precisely the same manner as any other openMM context" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "omm.getIntegrator().step(1000)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "omm.get_potential_energy()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "omm.set_lambda(0.0)\n", - "omm.get_potential_energy()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "sireDEV", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/demo/lambda_schedule.ipynb b/demo/lambda_schedule.ipynb deleted file mode 100644 index 1e8a6197..00000000 --- a/demo/lambda_schedule.ipynb +++ /dev/null @@ -1,272 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import sire as sr\n", - "import BioSimSpace as BSS" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Merged Molecules\n", - "This section will demonstrate the creation and visualisation of perturbations using BioSimSpace and sire, the system in this case will be a simple ethane → methanol transformation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ethane = BSS.Parameters.gaff(\"CC\").getMolecule()\n", - "methanol = BSS.Parameters.gaff(\"CO\").getMolecule()\n", - "mapping = BSS.Align.matchAtoms(ethane, methanol)\n", - "ethane = BSS.Align.rmsdAlign(ethane, methanol, mapping)\n", - "merged = BSS.Align.merge(ethane, methanol, mapping)\n", - "\n", - "solvated = BSS.Solvent.tip3p(molecule=merged, box=3 * [3 * BSS.Units.Length.nanometer])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Extract the sire system\n", - "sire_system = sr.system.System(solvated._sire_object)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for mol in sire_system.molecules():\n", - " if mol.is_perturbable():\n", - " temp = mol\n", - "\n", - "temp.perturbation().view()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Custom lambda scheduling\n", - "This section will demonstrate the creation and implementation of custom lambda scheduling in sire. This will exploit the new functionality of `sr.cas.LambdaSchedule`\n", - "\n", - "First, create an empty lambda schedule" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "l = sr.cas.LambdaSchedule()\n", - "l" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Add a simple morph to the lambda schedule" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "l.add_stage(\"morphing\", (1 - l.lam()) * l.initial() + l.lam() * l.final())\n", - "l" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This lambda schedule can then be converted to a dataframe and visualised" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df = l.get_lever_values(to_pandas=True, initial=0, final=1, num_lambda=10)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df.plot()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "More complex schedules can be created by adding levers to specific properties using add_lever " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Create a new lambda schedule\n", - "l_complex = sr.cas.LambdaSchedule()\n", - "\n", - "# Add multiple stages. The charging stages currently act only to set parameters equal to their initial/final values.\n", - "l_complex.add_stage(\"de-charging\", l_complex.initial())\n", - "l_complex.add_stage(\n", - " \"morphing\",\n", - " (1 - l_complex.lam()) * l_complex.initial() + l_complex.lam() * l_complex.final(),\n", - ")\n", - "l_complex.add_stage(\"re-charging\", l_complex.final())\n", - "\n", - "# By adding levers the de-charging and re-charging stages can be applied to specific properties, in this case charge\n", - "l_complex.add_lever(\"charge\")\n", - "l_complex.set_equation(\n", - " \"de-charging\", \"charge\", (1.0 - 0.8 * l_complex.lam()) * l_complex.final()\n", - ")\n", - "l_complex.set_equation(\n", - " \"re-charging\", \"charge\", (0.2 + 0.8 * l_complex.lam()) * l_complex.final()\n", - ")\n", - "\n", - "# We also need to morph the charges scaled by 0.2 (since we scale down to 0.2)\n", - "l_complex.set_equation(\n", - " \"morphing\",\n", - " \"charge\",\n", - " 0.2 * (l_complex.final()),\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df_complex = l_complex.get_lever_values(initial=0.0, final=1, num_lambda=100)\n", - "df_complex.plot()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Lambda schedules can be injected directly in to sire dynamics" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "m = sire_system.minimisation()\n", - "sire_system = m.run().commit()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "d = sire_system.dynamics()\n", - "d.set_schedule(l_complex)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "for lam in np.arange(0, 1.1, 0.1):\n", - " d.set_lambda(lam)\n", - " print(f\"lambda = {lam}, energy = {d.current_potential_energy()}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Alternatively, the SOMMContext can be extracted and lambda schedules set within it" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "omm = sr.convert.to(\n", - " sire_system, \"openmm\", map={\"cutoff\": sr.u(\"7.5A\"), \"cutoff_type\": \"PME\"}\n", - ")\n", - "omm.set_lambda_schedule(l_complex)\n", - "omm.set_lambda(0.0)\n", - "omm.get_potential_energy()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for lam in np.arange(0, 1.1, 0.1):\n", - " omm.set_lambda(lam)\n", - " print(f\"Lambda = {lam}, energy = {omm.get_potential_energy()}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "sireDEV", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/demo/repex.ipynb b/demo/repex.ipynb deleted file mode 100644 index d7fdb7b9..00000000 --- a/demo/repex.ipynb +++ /dev/null @@ -1,352 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "510fe067-ffb7-45d1-a18e-8aaf27f8036d", - "metadata": {}, - "outputs": [], - "source": [ - "import sire as sr" - ] - }, - { - "cell_type": "markdown", - "id": "83fcc9a0", - "metadata": {}, - "source": [ - "# Replica Exchange\n", - "\n", - "The ease with which multiple simulations can be handled simultaneously allows for a simple implementation of replica exchange." - ] - }, - { - "cell_type": "markdown", - "id": "a2912617-c78d-40c1-b846-c3d4633bd7d9", - "metadata": {}, - "source": [ - "Load an example perturbable system" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "957ecaa8-6885-441a-9c5a-c59671522c9b", - "metadata": {}, - "outputs": [], - "source": [ - "mols = sr.load_test_files(\"merged_molecule.s3\")" - ] - }, - { - "cell_type": "markdown", - "id": "e393e257-a50d-4204-97d2-0ddd9a2d60b4", - "metadata": {}, - "source": [ - "Create two replicas of the system, at two different lambda values" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "75b9681f-59cf-424e-ba00-b7d63415c396", - "metadata": {}, - "outputs": [], - "source": [ - "rep0 = mols.dynamics(timestep=\"4fs\", temperature=\"25oC\", lambda_value=0.0)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e7f24c6f-8c69-475c-bd0b-c576ee8419f5", - "metadata": {}, - "outputs": [], - "source": [ - "rep1 = mols.dynamics(timestep=\"4fs\", temperature=\"25oC\", lambda_value=0.2)" - ] - }, - { - "cell_type": "markdown", - "id": "573869dd", - "metadata": {}, - "source": [ - "### Implementation of a minimal `replica_exchange` function\n", - "\n", - "This function takes in a pair of sire `dynamics` objects and performs a Hamiltonian replica exchange move, returning the two systems as well as a boolean that indicates whether or not the move was accepted" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0304cd61", - "metadata": {}, - "outputs": [], - "source": [ - "def replica_exchange(replica0, replica1):\n", - " # Retrieve the information we need for each replica from the dynamics objects\n", - " lam0 = replica0.get_lambda()\n", - " lam1 = replica1.get_lambda()\n", - "\n", - " ensemble0 = replica0.ensemble()\n", - " ensemble1 = replica1.ensemble()\n", - "\n", - " temperature0 = ensemble0.temperature()\n", - " temperature1 = ensemble1.temperature()\n", - "\n", - " # The lambda_values argument allows us to retrieve the potential energy from both objects at both lambda values\n", - " nrgs0 = replica0.current_potential_energy(lambda_values=[lam0, lam1])\n", - " nrgs1 = replica1.current_potential_energy(lambda_values=[lam0, lam1])\n", - "\n", - " from sire.units import k_boltz\n", - "\n", - " beta0 = 1.0 / (k_boltz * temperature0)\n", - " beta1 = 1.0 / (k_boltz * temperature1)\n", - "\n", - " # Check properties of the ensemble to see if we need to include a pressure term\n", - " if not ensemble0.is_constant_pressure():\n", - " delta = beta1 * (nrgs1[0] - nrgs1[1]) + beta0 * (nrgs0[0] - nrgs0[1])\n", - " else:\n", - " volume0 = replica0.current_space().volume()\n", - " volume1 = replica1.current_space().volume()\n", - "\n", - " pressure0 = ensemble0.pressure()\n", - " pressure1 = ensemble1.pressure()\n", - "\n", - " delta = beta1 * (\n", - " nrgs1[0] - nrgs1[1] + pressure1 * (volume1 - volume0)\n", - " ) + beta0 * (nrgs0[0] - nrgs0[1] + pressure0 * (volume0 - volume1))\n", - "\n", - " from math import exp\n", - " import random\n", - "\n", - " move_passed = delta > 0 or (exp(delta) >= random.random())\n", - "\n", - " if move_passed:\n", - " if lam0 != lam1:\n", - " replica0.set_lambda(lam1)\n", - " replica1.set_lambda(lam0)\n", - " return (replica1, replica0, True)\n", - "\n", - " else:\n", - " return (replica0, replica1, False)" - ] - }, - { - "cell_type": "markdown", - "id": "38ebb8b1-7056-4873-b87a-1a1d8754472c", - "metadata": {}, - "source": [ - "Run dynamics on both replicas. We'll minimise each replica first, to prevent NaN errors. The error catching will mostly catch these and auto-minimise if found (i.e. you could comment out the minimisation lines)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c7f26b29-f270-4b0a-aeba-7200c9439945", - "metadata": {}, - "outputs": [], - "source": [ - "rep0.minimise()\n", - "rep0.run(\"5ps\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "158c49b5-4a42-4697-8e59-4e682f1905de", - "metadata": {}, - "outputs": [], - "source": [ - "rep1.minimise()\n", - "rep1.run(\"5ps\")" - ] - }, - { - "cell_type": "markdown", - "id": "38d21dfe-212f-480a-9d0e-2a1934fa93af", - "metadata": {}, - "source": [ - "Perform a replica exchange move between these two replicas. If the move passes, then the replicas are swapped (by swapping their lambda values). They are returned from this function in the same lambda order as they went in (i.e. in increasing lambda order)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6abfb021-1fd5-4faa-8b5c-f5631f88931c", - "metadata": {}, - "outputs": [], - "source": [ - "(rep0, rep1, swapped) = replica_exchange(rep0, rep1)" - ] - }, - { - "cell_type": "markdown", - "id": "dc19d9c6-14ed-4807-bc05-741b09219370", - "metadata": {}, - "source": [ - "Was the move successful?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9ac37cac-a278-431d-a50d-ce0d45f0dd10", - "metadata": {}, - "outputs": [], - "source": [ - "print(\"Swapped?\", swapped)" - ] - }, - { - "cell_type": "markdown", - "id": "cfd648a1-9f90-4a5f-92a6-7453c2a1e6fe", - "metadata": {}, - "source": [ - "Even if they were swapped, the order of lambda is preserved" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "12a7e248-881b-4ec4-be65-9c65d25541cc", - "metadata": {}, - "outputs": [], - "source": [ - "print(rep0.get_lambda(), rep1.get_lambda())" - ] - }, - { - "cell_type": "markdown", - "id": "e6ef979f", - "metadata": {}, - "source": [ - "#### This functionality also exists within the current version of sire (the sire version also supports temperature-based repex) and can be accessed with `sire.morph.replica_exchange`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2b8f77f2", - "metadata": {}, - "outputs": [], - "source": [ - "(rep0, rep1, swapped) = sr.morph.replica_exchange(rep0, rep1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8c349f46", - "metadata": {}, - "outputs": [], - "source": [ - "print(\"Swapped?\", swapped)\n", - "print(rep0.get_lambda(), rep1.get_lambda())" - ] - }, - { - "cell_type": "markdown", - "id": "c9e0078a", - "metadata": {}, - "source": [ - "# Non-equilibrium switching\n", - "\n", - "Direct access to the lambda value of dyamics objects allows it to be changed on-the-fly" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1e578418", - "metadata": {}, - "outputs": [], - "source": [ - "d = mols.dynamics(\n", - " timestep=\"4fs\", temperature=\"25oC\", lambda_value=0.0, energy_frequency=sr.u(\"1ps\")\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ef51e597", - "metadata": {}, - "outputs": [], - "source": [ - "d.minimise()\n", - "d.run(\"5ps\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f43f45d4", - "metadata": {}, - "outputs": [], - "source": [ - "d.get_lambda()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c40db3ed", - "metadata": {}, - "outputs": [], - "source": [ - "d.set_lambda(1.0)\n", - "d.run(\"5ps\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2b50b17e", - "metadata": {}, - "outputs": [], - "source": [ - "d.get_lambda()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7910c234", - "metadata": {}, - "outputs": [], - "source": [ - "df = d.energy_trajectory(to_pandas=True)\n", - "df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4cb7b999", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/demo/sire_restraints.ipynb b/demo/sire_restraints.ipynb deleted file mode 100644 index e70a164b..00000000 --- a/demo/sire_restraints.ipynb +++ /dev/null @@ -1,354 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "d086ec3c-3d0c-45b1-949f-0224e68043fe", - "metadata": {}, - "outputs": [], - "source": [ - "import sire as sr" - ] - }, - { - "cell_type": "markdown", - "id": "4721068a", - "metadata": {}, - "source": [ - "# Positional Restraints\n", - "This section of the notebook will demonstrate new sire positional restraint functionality, building to a system in which all molecules outside a defined 'bubble' are fixed in place " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c2e8df62-7bcd-4028-a97e-51ed67530ce0", - "metadata": {}, - "outputs": [], - "source": [ - "mols = sr.load_test_files(\"ala.top\", \"ala.crd\")\n", - "mols.make_whole()\n", - "mols.view()" - ] - }, - { - "cell_type": "markdown", - "id": "39a6ea9f", - "metadata": {}, - "source": [ - "Use the new sire `restraints` functionality to create a `restraints` object, defining the restraints which are to be applied to the simulated system. In this case a simple positional restraint will be added to the alpha carbon of our alanine dipeptide" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "169dbf60-fb80-469f-9859-d36ce6187ab4", - "metadata": {}, - "outputs": [], - "source": [ - "restraints = sr.restraints.positional(mols, \"resname ALA and not element H\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "526b6399-68a3-4564-9ff7-72eb48748933", - "metadata": {}, - "outputs": [], - "source": [ - "restraints" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0f8ae7c2", - "metadata": {}, - "outputs": [], - "source": [ - "print(mols[0].atoms([8]))" - ] - }, - { - "cell_type": "markdown", - "id": "f1a2165d", - "metadata": {}, - "source": [ - "`Restraint` can be further expanded to set the force constant `k` and the half-harmonic width `r0`, as well as set a restraint position" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a0f8acb9", - "metadata": {}, - "outputs": [], - "source": [ - "restraints = sr.restraints.positional(\n", - " mols,\n", - " \"resname ALA and not element H\",\n", - " k=\"100 kcal mol-1 A-2\",\n", - " r0=\"0.0 A\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "528b6d5f-05db-40ba-a85d-0f6a8dac8484", - "metadata": {}, - "outputs": [], - "source": [ - "mols = (\n", - " mols.minimisation(\n", - " restraints=restraints,\n", - " )\n", - " .run()\n", - " .commit()\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4f3b87c7-641b-48fe-825e-390e5a08a26e", - "metadata": {}, - "outputs": [], - "source": [ - "d = mols.dynamics(\n", - " timestep=\"4fs\",\n", - " temperature=\"25oC\",\n", - " restraints=restraints,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b6df854a-7180-4b1f-88cf-331383747c35", - "metadata": {}, - "outputs": [], - "source": [ - "d.run(\"20ps\", frame_frequency=\"0.5ps\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "47b25cfd-5282-4427-8588-7b28612d8a62", - "metadata": {}, - "outputs": [], - "source": [ - "mols = d.commit()\n", - "mols.view()" - ] - }, - { - "cell_type": "markdown", - "id": "5e1a166c", - "metadata": {}, - "source": [ - "This functionality can be expanded to, for example, freeze all atoms outside a given distance of the ligand, effectively truncating the simulated region (note that this currently doesn't improve performance, a future update will add the ability to approximate the contributions of the frozen atoms).\n", - "\n", - "This is a two-part process, first we restrain the atoms within the bubble itself in order to prevent them from leaving it, this is achieved with the `restraints.positional` functionality seen above:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a9200af9", - "metadata": {}, - "outputs": [], - "source": [ - "restraints_bubble = sr.restraints.positional(\n", - " mols,\n", - " \"molecules within 7.5 of resname ALA\",\n", - " position=mols[\"resname ALA\"].coordinates(),\n", - " r0=sr.u(\"10 A\"),\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "2a71791c", - "metadata": {}, - "source": [ - "Next, we pass the `fixed` argument in to both minimisation and dynamics (alternatively `fixed` can be specified along with all other simulation options within `map`)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f9375540", - "metadata": {}, - "outputs": [], - "source": [ - "mols = (\n", - " mols.minimisation(\n", - " restraints=restraints_bubble, fixed=\"not (molecules within 7.5 of molidx 0)\"\n", - " )\n", - " .run()\n", - " .commit()\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "91377b33", - "metadata": {}, - "outputs": [], - "source": [ - "map = {\n", - " \"restraints\": restraints_bubble,\n", - " \"fixed\": \"not (molecules within 7.5 of molidx 0)\",\n", - " \"temperature\": 300 * sr.units.kelvin,\n", - "}\n", - "d = mols.dynamics(map=map)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "650106cd", - "metadata": {}, - "outputs": [], - "source": [ - "d.run(\"20ps\", frame_frequency=\"0.5ps\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cdce4dbd", - "metadata": {}, - "outputs": [], - "source": [ - "mols = d.commit()\n", - "mols.view()" - ] - }, - { - "cell_type": "markdown", - "id": "0e53a900", - "metadata": {}, - "source": [ - "At a lower level, restraints can be passed in the `map` argument of `sire.convert`. This creates an openMM context.\n", - "\n", - "Openmm wants a list of indexes for fixed\n", - "\n", - "The key difference here is the form in which the mask of the `fixed` flag is passed" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "23810ad7", - "metadata": {}, - "outputs": [], - "source": [ - "mask_sire = mols[\"not (molecules within 7.5 of molidx 0)\"].atoms()\n", - "mask_openmm = [i.number().value() for i in mask_sire]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a3c14fc9", - "metadata": {}, - "outputs": [], - "source": [ - "omm = sr.convert.to(\n", - " mols, \"openmm\", map={\"restraints\": restraints, \"fixed\": mask_openmm}\n", - ")\n", - "omm" - ] - }, - { - "cell_type": "markdown", - "id": "b23f2c32", - "metadata": {}, - "source": [ - "# Combining restraints and lambda levers" - ] - }, - { - "cell_type": "markdown", - "id": "175be802", - "metadata": {}, - "source": [ - "Restraints can be perturbed in the same manner as any other potential using `sr.cas.lambdaschedule`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a7276e8b", - "metadata": {}, - "outputs": [], - "source": [ - "restraints = sr.restraints.positional(\n", - " mols,\n", - " \"resname ALA and not element H\",\n", - " k=\"100 kcal mol-1 A-2\",\n", - " r0=\"0.0 A\",\n", - " name=\"positional\",\n", - ")\n", - "dst_rest = sr.restraints.distance(mols, atoms0=0, atoms1=1, name=\"distance\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "37d59f38", - "metadata": {}, - "outputs": [], - "source": [ - "l = sr.cas.LambdaSchedule()\n", - "l.add_stage(\"distance_restraints\", 0)\n", - "l.add_stage(\"positional_restraints\", 1)\n", - "l.set_equation(\"distance_restraints\", \"distance\", l.lam() * l.initial())\n", - "l.set_equation(\"positional_restraints\", \"positional\", l.lam() * l.initial())\n", - "l" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3b098278", - "metadata": {}, - "outputs": [], - "source": [ - "l.get_lever_values(initial=1.0, final=1.0, levers=[\"distance\", \"positional\"]).plot()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8e186d9f", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/environment.yaml b/environment.yaml deleted file mode 100644 index e509e02b..00000000 --- a/environment.yaml +++ /dev/null @@ -1,15 +0,0 @@ -name: somd2 - -channels: - - conda-forge - - openbiosim/label/dev - -dependencies: - - biosimspace - - ghostly - - filelock - - loch - - loguru - - numba - - nvidia-ml-py - - versioningit diff --git a/environment_macos.yaml b/environment_macos.yaml deleted file mode 100644 index 750a5f24..00000000 --- a/environment_macos.yaml +++ /dev/null @@ -1,14 +0,0 @@ -name: somd2 - -channels: - - conda-forge - - openbiosim/label/dev - -dependencies: - - biosimspace - - git - - filelock - - loguru - - numba - - pip: - - git+https://github.com/openbiosim/ghostly diff --git a/pixi.toml b/pixi.toml new file mode 100644 index 00000000..e77834c8 --- /dev/null +++ b/pixi.toml @@ -0,0 +1,33 @@ +[workspace] +name = "somd2" +channels = ["conda-forge", "openbiosim/label/dev"] +platforms = ["linux-64", "osx-arm64"] + +[dependencies] +python = ">=3.10" +# main +biosimspace = ">=2026.1.0,<2026.2.0" +# devel +#biosimspace = "==2026.2.0.dev" +filelock = "*" +ghostly = "*" +loch = "*" +loguru = "*" +numba = "*" +nvidia-ml-py = "*" + +[feature.test.dependencies] +pytest = "*" + +[feature.lint.dependencies] +pre-commit = "*" +rattler-build = "*" +ruff = "*" + +[environments] +default = [] +test = ["test"] +dev = ["test", "lint"] + +[activation] +scripts = ["pixi.sh"] diff --git a/pyproject.toml b/pyproject.toml index fd326a62..10a39fcf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,3 +35,10 @@ distance-dirty = "{base_version}+{distance}.{vcs}{rev}.dirty" [tool.versioningit.write] file = "src/somd2/_version.py" + +[tool.ruff.lint] +ignore = ["E402"] + +[tool.ruff.lint.per-file-ignores] +"tests/**" = ["F841"] + diff --git a/recipes/somd2/conda_build_config.yaml b/recipes/somd2/conda_build_config.yaml deleted file mode 100644 index 3e8e203d..00000000 --- a/recipes/somd2/conda_build_config.yaml +++ /dev/null @@ -1,3 +0,0 @@ -pin_run_as_build: - sire: - max_pin: x.x diff --git a/recipes/somd2/recipe.yaml b/recipes/somd2/recipe.yaml new file mode 100644 index 00000000..7303d8ed --- /dev/null +++ b/recipes/somd2/recipe.yaml @@ -0,0 +1,59 @@ +context: + name: somd2 + +package: + name: ${{ name }} + version: ${{ env.get('GIT_DESCRIBE_TAG', default='PR') }} + +source: + path: ../../somd2-source.tar.gz + +build: + number: ${{ env.get('GIT_DESCRIBE_NUMBER', default='0') }} + script: python -m pip install . --no-deps --ignore-installed -vv + +requirements: + host: + - pip + - python + - setuptools + - versioningit + run: + # main + - biosimspace >=2026.1.0,<2026.2.0 + # devel + #- biosimspace ==2026.2.0.dev + - filelock + - ghostly + - loch + - loguru + - numba + - nvidia-ml-py + - python + +tests: + - if: linux + then: + - python: + imports: + - somd2 + - script: + - PYTHONPATH=. pytest -vvv --color=yes --import-mode=importlib ./tests + files: + source: + - tests/ + requirements: + run: + - pytest + +about: + homepage: https://github.com/openbiosim/somd2 + license: GPL-3.0-or-later + license_file: LICENSE + summary: "GPU accelerated free-energy perturbation simulation engine." + repository: https://github.com/openbiosim/somd2 + documentation: https://github.com/openbiosim/somd2 + +extra: + recipe-maintainers: + - lohedges diff --git a/recipes/somd2/template.yaml b/recipes/somd2/template.yaml deleted file mode 100644 index 0cd298a6..00000000 --- a/recipes/somd2/template.yaml +++ /dev/null @@ -1,80 +0,0 @@ -{% set name = "somd2" %} - -package: - name: {{ name }} - version: {{ environ.get('GIT_DESCRIBE_TAG', 'PR') }} - -source: - git_url: SOMD2_REMOTE - git_tag: SOMD2_BRANCH - -build: - number: {{ environ.get('GIT_DESCRIBE_NUMBER', 0) }} - script: {{ PYTHON }} -m pip install . --no-deps --ignore-installed -vv - -requirements: - host: - - biosimspace - - ghostly - - loch # [linux] - - loguru - - pip - - python - - numba - - nvidia-ml-py # [linux] - - setuptools - - sire - - versioningit - run: - - biosimspace - - ghostly - - loch # [linux] - - loguru - - numba - - nvidia-ml-py # [linux] - - python - - sire - -test: - script_env: - - SIRE_DONT_PHONEHOME - - SIRE_SILENT_PHONEHOME - requires: - - black == 25 # [linux and x86_64 and py==311] - - pytest - - pytest-black # [linux and x86_64 and py==311] - imports: - - somd2 # [linux] - source_files: - - src/somd2 - - tests - commands: - - pytest -vvv --color=yes --black src/somd2 # [linux and x86_64 and py==311] - - pytest -vvv --color=yes --import-mode=importlib tests # [linux] - -about: - home: https://github.com/openbiosim/somd2 - license: GPL-3.0-or-later - license_file: '{{ environ["RECIPE_DIR"] }}/LICENSE' - summary: "GPU accelerated free-energy pertubation simulation engine" - dev_url: https://github.com/openbiosim/somd2 - doc_url: https://github.com/openbiosim/somd2 - description: | - somd2 is an open-source GPU accelerated molecular dynamics engine for - alchemical free-energy calculations. Built on top of Sire, BioSimSpace, - and OpenMM. - - `conda install -c conda-forge -c openbiosim somd2` - - To install the development version: - - `conda install -c conda-forge -c openbiosim/label/dev somd2` - - When updating the development version it is generally advised to - update Sire at the same time: - - `conda install -c conda-forge -c openbiosim/label/dev somd2 sire` - -extra: - recipe-maintainers: - - lohedges diff --git a/scripts/Atoms_finding_sire_test.ipynb b/scripts/Atoms_finding_sire_test.ipynb deleted file mode 100644 index 5fa1f8f0..00000000 --- a/scripts/Atoms_finding_sire_test.ipynb +++ /dev/null @@ -1,206 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "A simple jupyter notebook used to find heavy atoms within 12 and 15 angstrom of a ligand. Assumes that files 1a~1b.prm7 and 1a~1b.rst7 are in the same directory." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import sire as sr\n", - "import re" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "ename": "OSError", - "evalue": "Cannot find file '1a~1b.prm7'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[2], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m root \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m1a~1b\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m----> 2\u001b[0m mols \u001b[39m=\u001b[39m sr\u001b[39m.\u001b[39;49mload(\u001b[39m\"\u001b[39;49m\u001b[39m%s\u001b[39;49;00m\u001b[39m.prm7\u001b[39;49m\u001b[39m\"\u001b[39;49m \u001b[39m%\u001b[39;49m root , \u001b[39m\"\u001b[39;49m\u001b[39m%s\u001b[39;49;00m\u001b[39m.rst7\u001b[39;49m\u001b[39m\"\u001b[39;49m \u001b[39m%\u001b[39;49m root)\n", - "File \u001b[0;32m~/mambaforge/envs/openbiosim/lib/python3.10/site-packages/sire/_load.py:399\u001b[0m, in \u001b[0;36mload\u001b[0;34m(path, show_warnings, silent, directory, gromacs_path, parallel, map, *args, **kwargs)\u001b[0m\n\u001b[1;32m 395\u001b[0m p \u001b[39m=\u001b[39m []\n\u001b[1;32m 397\u001b[0m \u001b[39mfor\u001b[39;00m i \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(\u001b[39m0\u001b[39m, \u001b[39mlen\u001b[39m(paths)):\n\u001b[1;32m 398\u001b[0m \u001b[39m# resolve the paths, downloading as needed\u001b[39;00m\n\u001b[0;32m--> 399\u001b[0m p \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m _resolve_path(paths[i], directory\u001b[39m=\u001b[39;49mdirectory, silent\u001b[39m=\u001b[39;49msilent)\n\u001b[1;32m 401\u001b[0m paths \u001b[39m=\u001b[39m p\n\u001b[1;32m 403\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(paths) \u001b[39m==\u001b[39m \u001b[39m0\u001b[39m:\n", - "File \u001b[0;32m~/mambaforge/envs/openbiosim/lib/python3.10/site-packages/sire/_load.py:261\u001b[0m, in \u001b[0;36m_resolve_path\u001b[0;34m(path, directory, silent)\u001b[0m\n\u001b[1;32m 257\u001b[0m paths \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m _resolve_path(match, directory\u001b[39m=\u001b[39mdirectory, silent\u001b[39m=\u001b[39msilent)\n\u001b[1;32m 259\u001b[0m \u001b[39mreturn\u001b[39;00m paths\n\u001b[0;32m--> 261\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mIOError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mCannot find file \u001b[39m\u001b[39m'\u001b[39m\u001b[39m{\u001b[39;00mpath\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m\"\u001b[39m)\n", - "\u001b[0;31mOSError\u001b[0m: Cannot find file '1a~1b.prm7'" - ] - } - ], - "source": [ - "root = \"1a~1b\"\n", - "mols = sr.load(\"%s.prm7\" % root, \"%s.rst7\" % root)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Change \"root\" variable according to your input file names" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sr.search.set_token(\"lnd\", \"resname LIG\")\n", - "# sr.search.set_token(\"lnd\",\"count(atoms) > 1 and not (protein or water)\")\n", - "ligand = mols[\"lnd\"]" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Uses a sire search token to define the criteria for the ligand, in this case the ligand is simply anything within a residue with residue name ``LIG``\n", - "\n", - "Can use some alternative search mechanism like ``sr.search.set_token(\"lnd\",\"count(atoms) > 1 and not (protein or water)\")``. This doesn't work in this case because the some parts of the truncated protein are not identified by ``protein``. A possible solution to this could be to instead search for ``not amino acid`` instead, need to check sire search functionality to see exactly how this is done." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ligand.view()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Use ``view`` to check that the ligand and only the ligand is captured." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "residues = mols[\n", - " \"((atoms within 15 of lnd) and (not atoms within 12 of lnd)) and protein\"\n", - "]" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Finds all atoms between 12 and 15 angstrom from the ligand. ``atoms`` can be swapped with ``residues`` if only complete residues are required. \n", - "This still includes hydrogen atoms - need only heavy atoms." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "residues.view()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "heavy = residues[\"not atomname /H*/\"]" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Remove hydrogen atoms," - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "heavy.view()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "an = heavy.numbers()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "nums = []\n", - "for at in an:\n", - " nums.append(int(re.findall(r\"\\d+\", str(at))[0]))" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Write atom numbers to a list in the form required by the addanchors script." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(nums)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "sireDEV", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.10" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/scripts/addanchors.py b/scripts/addanchors.py deleted file mode 100644 index d759969f..00000000 --- a/scripts/addanchors.py +++ /dev/null @@ -1,148 +0,0 @@ -import sire as sr -import os -import re -import argparse -import csv - -parser = argparse.ArgumentParser( - prog="Add Anchors", - description="A scipt to generate positional restraints for use in SOMD", -) - -parser.add_argument( - "-i", - "--input", - help="Input files - should contain sire compatible coordinate and topology files.", - nargs=2, - required=True, -) - -parser.add_argument( - "-r", - "--restrained", - help="Optional file - take in a .csv file that contains a list of restrained\ - atoms and use those. Bypasses sire operations used to find atoms. Assumes that the file is a single line containing only the list of atom numbers", - type=str, -) - -parser.add_argument( - "-d", - "--distances", - help="The distances between which to restrain atoms. Default is between 12.0 and 15.0 angstrom.", - nargs=2, - type=float, - default=[12.0, 15.0], -) - -parser.add_argument( - "-o", - "--outname", - help="Name of output file. If not defined the output file will be {name of first input file}_restrained.{filetype}", - type=str, -) - -args = parser.parse_args() - -mols = sr.load(args.input[0], args.input[1]) - -if not args.restrained: - if args.distances[0] >= args.distances[1]: - raise ValueError( - "Restraint distances are in the wrong order, please order -d small big" - ) - # Use sire to find all heavy atoms between 12 and 15 angstrom of the ligand - sr.search.set_token("lnd", "resname LIG") - ligand = mols["lnd"] - residues = mols[ - "((atoms within %s of lnd) and (not atoms within %s of lnd)) and protein" - % (args.distances[1], args.distances[0]) - ] - heavy = residues["not atomname /H*/"] - an = heavy.numbers() - restrained_atoms = [] - for at in an: - restrained_atoms.append(int(re.findall(r"\d+", str(at))[0])) - -else: - with open(args.restrained) as f: - reader = csv.reader(f) - data = list(reader) - restrained_atoms = [int(x) for x in data[0]] - -print("Atomnums for restrained atoms:") -print(restrained_atoms) -# restrained_atoms = [18612, 18613, 18614, 18615, 18616, 18617, 18618] - -n_existing_atoms = mols.num_atoms() -n_existing_residues = mols.num_residues() - -newmol = sr.mol.Molecule("dummies") - -editor = newmol.edit() - -# Create a residue -editor = ( - editor.add(sr.mol.ResName("Re")) - .renumber(sr.mol.ResNum(n_existing_residues + 1)) - .molecule() -) - -for i in range(0, len(restrained_atoms)): - editor = ( - editor.add(sr.mol.AtomName("Re")) - .renumber(sr.mol.AtomNum(n_existing_atoms + i + 1)) - .reparent(sr.mol.ResIdx(0)) - .molecule() - ) - -mol = editor.commit() - -cursor = mol.cursor()["atomname Re"] - -# need to set the properties to the correct type... -cursor[0]["charge"] = 1 * sr.units.mod_electron -cursor[0]["mass"] = 1 * sr.units.g_per_mol - -for i in range(0, len(cursor)): - atom = cursor.atom(i) - restrained_at = mols["atomnum %i" % restrained_atoms[i]] - atom["coordinates"] = restrained_at.property("coordinates") - atom["charge"] = 0 * sr.units.mod_electron - atom["element"] = sr.mol.Element(0) - atom["mass"] = 0 * sr.units.g_per_mol - atom["atomtype"] = "DM" - atom["LJ"] = sr.mm.LJParameter(1 * sr.units.angstrom, 0 * sr.units.kcal_per_mol) - -mol = cursor.molecule().commit() - -mols.add(mol) - -if args.outname: - f = sr.save(mols, args.outname, format=["PDB"]) - f = sr.save(mols, args.outname, format=["PRM7", "RST7"]) - -else: - origname = args.input[0].split(".")[0] - f = sr.save(mols, origname + "_restrained", format=["PDB"]) - f = sr.save(mols, origname + "_restrained", format=["PRM7", "RST7"]) - -# load to check -mols = sr.load(f) - -# list of tuples of atoms to save, will be used to specify restraints -# in somd-freenrg -paired_atoms = {} - -last = mols[-1] -for i in range(0, len(last.atoms())): - atom = last.atom(i) - restrained_at = mols["atomnum %i" % restrained_atoms[i]] - paired_atoms[atom.number().value()] = restrained_at.number().value() - # print(atom, atom.property("charge"), atom.property("LJ"), atom.residue()) - -ofile = open("restraint.cfg", "w") -ofile.write("use restraints = True\n") -ofile.write("restrained atoms = %s\n" % paired_atoms) -ofile.close() -print("Atom pairs {dummy atom: original atom}:") -print(paired_atoms) diff --git a/scripts/addanchors_exampleinputs.txt b/scripts/addanchors_exampleinputs.txt deleted file mode 100644 index fc1d2235..00000000 --- a/scripts/addanchors_exampleinputs.txt +++ /dev/null @@ -1,4 +0,0 @@ -To set up restraints with heavy atoms between 10 and 15 angstroms restrained, with input files called 1a~1b.prm7 and 1a~1b.rst7 output files named example.{ext}: -python addanchors.py -i 1a~1b.rst7 1a~1b.prm7 -d 10 15 -o example -To do the same, but now reading atoms from a file called "restrained.csv" -python addanchors.py -i 1a~1b.rst7 1a~1b.prm7 -r restrained.csv -o example diff --git a/setup.py b/setup.py deleted file mode 100644 index 60684932..00000000 --- a/setup.py +++ /dev/null @@ -1,3 +0,0 @@ -from setuptools import setup - -setup() diff --git a/src/somd2/__init__.py b/src/somd2/__init__.py index 6a45080f..b3070df9 100644 --- a/src/somd2/__init__.py +++ b/src/somd2/__init__.py @@ -37,3 +37,32 @@ # Store the sire version. from sire import __version__ as _sire_version from sire import __revisionid__ as _sire_revisionid + +# Store the BioSimSpace version. +from BioSimSpace import __version__ as _biosimspace_version + +# Store the ghostly version. +from ghostly import __version__ as _ghostly_version + +# Store the loch version. +from loch import __version__ as _loch_version + + +def get_versions(): + """ + Return the versions of SOMD2 and the OpenBioSim packages that it depends + on. + + Returns + ------- + + versions: dict + A dictionary mapping package name to version string. + """ + return { + "somd2": __version__, + "sire": f"{_sire_version}+{_sire_revisionid}", + "biosimspace": _biosimspace_version, + "ghostly": _ghostly_version, + "loch": _loch_version, + } diff --git a/src/somd2/_utils/__init__.py b/src/somd2/_utils/__init__.py index ec25a367..0fd4719a 100644 --- a/src/somd2/_utils/__init__.py +++ b/src/somd2/_utils/__init__.py @@ -19,14 +19,19 @@ # along with SOMD2. If not, see . ##################################################################### -import platform as _platform +import sys as _sys -if _platform.system() == "Windows": - _lam_sym = "lambda" -else: +try: + "λΔ°".encode(_sys.stdout.encoding or "utf-8") _lam_sym = "λ" + _delta_sym = "ΔE" + _degree_sym = "°" +except (UnicodeEncodeError, LookupError): + _lam_sym = "lambda" + _delta_sym = "delta" + _degree_sym = "deg" -del _platform +del _sys def _has_ghost(mol, idxs, is_lambda1=False): diff --git a/src/somd2/_utils/_schedules.py b/src/somd2/_utils/_schedules.py new file mode 100644 index 00000000..4c49a0ea --- /dev/null +++ b/src/somd2/_utils/_schedules.py @@ -0,0 +1,316 @@ +###################################################################### +# SOMD2: GPU accelerated alchemical free-energy engine. +# +# Copyright: 2023-2026 +# +# Authors: The OpenBioSim Team +# +# SOMD2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# SOMD2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with SOMD2. If not, see . +##################################################################### + +__all__ = [ + "annihilate", + "decouple", + "ring_break_morph", + "reverse_ring_break_morph", +] + + +def annihilate(fix_epsilon=True): + """ + Build the ABFE lambda schedule using decharge → annihilate. + + Annihilation removes ALL non-bonded interactions (including intramolecular LJ + between non-bonded pairs). + + Parameters + ---------- + fix_epsilon : bool, optional + If True (default), epsilon is held constant at its real-atom value + throughout the annihilate stage so that the (1-alpha) prefactor of the + Beutler soft-core provides the sole LJ decay pathway. The ghost-LRC + force is then explicitly scaled to zero over the stage to compensate. + If False, epsilon is scaled normally from initial to final and the LRC + follows naturally. + + Returns + ------- + + schedule : sire.legacy.CAS.LambdaSchedule + The lambda schedule. + """ + from sire.cas import LambdaSchedule as _LambdaSchedule + + # Start with the standard decouple schedule and modify the stages and + # equations as needed. This will be folded into Sire in future, but + # we will use this approach for prototyping. + s = _LambdaSchedule.standard_decouple() + + s.remove_stage("decouple") + + s.add_stage("decharge", equation=s.initial()) + s.set_equation( + stage="decharge", + lever="charge", + equation=s.lam() * s.final() + s.initial() * (1 - s.lam()), + ) + s.set_equation(stage="decharge", force="restraint", equation=s.lam() * s.final()) + + s.add_stage( + "annihilate", + equation=(-s.lam() + 1) * s.initial() + s.lam() * s.final(), + ) + s.set_equation(stage="annihilate", lever="charge", equation=s.final()) + s.set_equation(stage="annihilate", force="restraint", equation=s.final()) + + if fix_epsilon: + s.set_equation(stage="annihilate", lever="epsilon", equation=s.initial()) + s.set_equation( + stage="annihilate", + force="ghost-lrc", + lever="lrc_scale", + equation=1 - s.lam(), + ) + + return s + + +def decouple(fix_epsilon=True): + """ + Build the ABFE lambda schedule using decharge → decouple. + + Decoupling removes only INTERMOLECULAR non-bonded interactions; intramolecular + terms are preserved via kappa=0 on ghost/ghost and ghost-14 forces. + + Parameters + ---------- + fix_epsilon : bool, optional + If True (default), epsilon is held constant at its real-atom value + throughout the decouple stage (see annihilate for rationale). The + ghost-LRC force is then explicitly scaled to zero over the stage. + If False, epsilon is scaled normally and the LRC follows naturally. + + Returns + ------- + + schedule : sire.legacy.CAS.LambdaSchedule + The lambda schedule. + """ + from sire.cas import LambdaSchedule as _LambdaSchedule + + # Start with the standard decouple schedule and modify the stages and + # equations as needed. This will be folded into Sire in future, but + # we will use this approach for prototyping. + s = _LambdaSchedule.standard_decouple() + + s.set_equation(stage="decouple", lever="restraint", equation=s.final()) + s.set_equation(stage="decouple", lever="kappa", force="ghost/ghost", equation=0) + s.set_equation(stage="decouple", lever="kappa", force="ghost-14", equation=0) + s.set_equation(stage="decouple", lever="charge", equation=s.final()) + + if fix_epsilon: + s.set_equation(stage="decouple", lever="epsilon", equation=s.initial()) + s.set_equation( + stage="decouple", + force="ghost-lrc", + lever="lrc_scale", + equation=1 - s.lam(), + ) + + s.prepend_stage("decharge", s.initial()) + s.set_equation( + stage="decharge", + lever="charge", + equation=s.lam() * s.final() + s.initial() * (1 - s.lam()), + ) + s.set_equation(stage="decharge", force="ghost/ghost", equation=s.initial()) + s.set_equation(stage="decharge", force="ghost-14", equation=s.initial()) + s.set_equation( + stage="decharge", lever="kappa", force="ghost/ghost", equation=-s.lam() + 1 + ) + s.set_equation( + stage="decharge", lever="kappa", force="ghost-14", equation=-s.lam() + 1 + ) + s.set_equation(stage="decharge", lever="restraint", equation=s.initial() * s.lam()) + + return s + + +def ring_break_morph(): + """ + Build a lambda schedule for ring-breaking perturbations. + + Three stages: potential_swap → restraints_off → morph. + + During restraints_off the Morse restraint ramps off (morse_soft: 1→0) while + the ring-break softcore simultaneously ramps on (alpha: 1→0, kappa: 0→1), + equations mirror ring-break so that ``ring_break_morph().reverse()`` is the + providing a smooth handover with no gap between the two forces. The ring-make + correct schedule for the ring-making direction (used by + :func:`reverse_ring_break_morph`). Because ring_break_morph is only used for + ring-breaking perturbations (no ring-make force present), the ring-make + equations have no effect on forward simulations. + + Returns + ------- + + schedule : sire.legacy.CAS.LambdaSchedule + The lambda schedule. + """ + from sire.cas import LambdaSchedule as _LambdaSchedule + + s = _LambdaSchedule.standard_morph() + + # restraints_off [1/3, 2/3): Morse ramps off while ring-break softcore ramps + # on simultaneously (alpha: 1→0, kappa: 0→1). Bonded terms (angles, torsions) + # interpolate initial→final over the same stage. ring-make mirrors ring-break + # so that after .reverse(), the ring-make softcore ramps off as morse_soft ramps + # on in the reversed restraints_off stage, correct for ring-making perturbations. + s.prepend_stage("restraints_off", s.initial()) + s.set_equation(stage="restraints_off", lever="morse_soft", equation=1 - s.lam()) + s.set_equation(stage="restraints_off", lever="morse_hard", equation=0) + s.set_equation(stage="restraints_off", lever="bond_k", equation=s.final()) + s.set_equation(stage="restraints_off", lever="bond_length", equation=s.final()) + s.set_equation( + stage="restraints_off", + lever="angle_k", + equation=(1 - s.lam()) * s.initial() + s.lam() * s.final(), + ) + s.set_equation( + stage="restraints_off", + lever="angle_size", + equation=(1 - s.lam()) * s.initial() + s.lam() * s.final(), + ) + s.set_equation( + stage="restraints_off", + lever="torsion_k", + equation=(1 - s.lam()) * s.initial() + s.lam() * s.final(), + ) + s.set_equation( + stage="restraints_off", + lever="torsion_phase", + equation=(1 - s.lam()) * s.initial() + s.lam() * s.final(), + ) + s.set_equation( + stage="restraints_off", force="ring-break", lever="alpha", equation=1 - s.lam() + ) + s.set_equation( + stage="restraints_off", force="ring-break", lever="kappa", equation=s.lam() + ) + s.set_equation( + stage="restraints_off", force="ring-make", lever="alpha", equation=1 - s.lam() + ) + s.set_equation( + stage="restraints_off", force="ring-make", lever="kappa", equation=s.lam() + ) + + s.prepend_stage("potential_swap", s.initial()) + s.set_equation(stage="potential_swap", lever="morse_hard", equation=1 - s.lam()) + s.set_equation(stage="potential_swap", lever="morse_soft", equation=0 + s.lam()) + s.set_equation( + stage="potential_swap", + lever="bond_k", + equation=(1 - s.lam()) * s.initial() + s.lam() * s.final(), + ) + s.set_equation( + stage="potential_swap", + lever="bond_length", + equation=(1 - s.lam()) * s.initial() + s.lam() * s.final(), + ) + s.set_equation(stage="potential_swap", lever="angle_k", equation=s.initial()) + s.set_equation(stage="potential_swap", lever="angle_size", equation=s.initial()) + s.set_equation(stage="potential_swap", lever="torsion_k", equation=s.initial()) + s.set_equation(stage="potential_swap", lever="torsion_phase", equation=s.initial()) + # Softcore off throughout potential_swap: explicit constants so the schedule + # visualises correctly regardless of the initial/final values passed by the caller. + s.set_equation( + stage="potential_swap", force="ring-break", lever="alpha", equation=1 + ) + s.set_equation( + stage="potential_swap", force="ring-break", lever="kappa", equation=0 + ) + s.set_equation(stage="potential_swap", force="ring-make", lever="alpha", equation=1) + s.set_equation(stage="potential_swap", force="ring-make", lever="kappa", equation=0) + + # morph [2/3, 1]: standard nonbonded morphing with ring-break/ring-make fixed + # at fully open (kappa=1, alpha=0). ring-make mirrors ring-break so .reverse() + # gives kappa=1 at lam=0 of the reversed morph stage (ring-making start). + s.set_equation(stage="morph", lever="morse_hard", equation=0) + s.set_equation(stage="morph", lever="morse_soft", equation=0) + s.set_equation(stage="morph", lever="bond_k", equation=s.final()) + s.set_equation(stage="morph", lever="bond_length", equation=s.final()) + s.set_equation(stage="morph", lever="angle_k", equation=s.final()) + s.set_equation(stage="morph", lever="angle_size", equation=s.final()) + s.set_equation(stage="morph", lever="torsion_k", equation=s.final()) + s.set_equation(stage="morph", lever="torsion_phase", equation=s.final()) + s.set_equation(stage="morph", force="ring-break", lever="alpha", equation=0) + s.set_equation(stage="morph", force="ring-break", lever="kappa", equation=1) + s.set_equation(stage="morph", force="ring-make", lever="alpha", equation=0) + s.set_equation(stage="morph", force="ring-make", lever="kappa", equation=1) + + # coul_kappa: zero through both bonded stages so the CLJ exception carries no + # charge while atoms are at covalent distances; ramps 0→1 in morph only once + # the softcore has already separated the atoms. ring-make mirrors ring-break + # so .reverse() gives coul_kappa ramps 1→0 through the reversed morph stage. + s.set_equation( + stage="potential_swap", force="ring-break", lever="coul_kappa", equation=0 + ) + s.set_equation( + stage="restraints_off", force="ring-break", lever="coul_kappa", equation=0 + ) + s.set_equation( + stage="morph", force="ring-break", lever="coul_kappa", equation=s.lam() + ) + s.set_equation( + stage="potential_swap", force="ring-make", lever="coul_kappa", equation=0 + ) + s.set_equation( + stage="restraints_off", force="ring-make", lever="coul_kappa", equation=0 + ) + s.set_equation( + stage="morph", force="ring-make", lever="coul_kappa", equation=s.lam() + ) + + return s + + +def reverse_ring_break_morph(): + """ + Build a lambda schedule for ring-making perturbations (reverse ring-break). + + Returns ``ring_break_morph().reverse()``: three stages in reversed order + (morph → restraints_off → potential_swap) with all equations reflected about + λ=½ and initial/final end-states swapped. + + This schedule is correct for two equivalent use-cases: + + 1. A ring-making perturbation run with ``swap_end_states=False``: the + ring-make softcore force (kappa=1 at λ=0, ramping to 0) is controlled + directly by the ring-make lever equations. + 2. A ring-breaking perturbation run with ``swap_end_states=True`` (the + runner reverses the schedule automatically, yielding the same effective + schedule): the ring-make softcore — which now controls the original + ring-breaking bond after the end-state swap — is handled identically. + + The energy symmetry invariant holds for both cases: + ``E_ring_make_reverse(λ) == E_ring_break_forward(1-λ)`` at any fixed + geometry. + + Returns + ------- + + schedule : sire.legacy.CAS.LambdaSchedule + The lambda schedule. + """ + return ring_break_morph().reverse() diff --git a/src/somd2/_utils/_somd1.py b/src/somd2/_utils/_somd1.py index aa379f7b..826c6b25 100644 --- a/src/somd2/_utils/_somd1.py +++ b/src/somd2/_utils/_somd1.py @@ -19,7 +19,12 @@ # along with SOMD2. If not, see . ##################################################################### -__all__ = ["apply_pert", "make_compatible", "reconstruct_system"] +__all__ = [ + "apply_pert", + "make_compatible", + "reconstruct_intrascale", + "reconstruct_system", +] from sire.system import System as _System from sire.legacy.System import System as _LegacySystem @@ -259,6 +264,19 @@ def make_compatible(system, fix_perturbable_zero_sigmas=False): new_bonds0.set(idx0, idx1, p0.function()) new_bonds1.set(idx0, idx1, p1.function()) + # Pass through unique terms that have no ghost in the state they exist in. + for b_idx in bonds0_unique_idx.values(): + p = bonds0[b_idx] + a0, a1 = p.atom0(), p.atom1() + if not _has_ghost(mol, [a0, a1]): + new_bonds0.set(a0, a1, p.function()) + + for b_idx in bonds1_unique_idx.values(): + p = bonds1[b_idx] + a0, a1 = p.atom0(), p.atom1() + if not _has_ghost(mol, [a0, a1], True): + new_bonds1.set(a0, a1, p.function()) + # Set the new bonded terms. edit_mol = edit_mol.set_property("bond0", new_bonds0).molecule() edit_mol = edit_mol.set_property("bond1", new_bonds1).molecule() @@ -361,6 +379,19 @@ def make_compatible(system, fix_perturbable_zero_sigmas=False): new_angles0.set(idx0, idx1, idx2, p0.function()) new_angles1.set(idx0, idx1, idx2, p1.function()) + # Pass through unique terms that have no ghost in the state they exist in. + for a_idx in angles0_unique_idx.values(): + p = angles0[a_idx] + a0, a1, a2 = p.atom0(), p.atom1(), p.atom2() + if not _has_ghost(mol, [a0, a1, a2]): + new_angles0.set(a0, a1, a2, p.function()) + + for a_idx in angles1_unique_idx.values(): + p = angles1[a_idx] + a0, a1, a2 = p.atom0(), p.atom1(), p.atom2() + if not _has_ghost(mol, [a0, a1, a2], True): + new_angles1.set(a0, a1, a2, p.function()) + # Set the new angle terms. edit_mol = edit_mol.set_property("angle0", new_angles0).molecule() edit_mol = edit_mol.set_property("angle1", new_angles1).molecule() @@ -474,6 +505,25 @@ def make_compatible(system, fix_perturbable_zero_sigmas=False): new_dihedrals0.set(idx0, idx1, idx2, idx3, p0.function()) new_dihedrals1.set(idx0, idx1, idx2, idx3, p1.function()) + # Pass through unique terms that have no ghost in the state they exist in. + for d_idx in dihedrals0_unique_idx.values(): + p = dihedrals0[d_idx] + a0 = info.atom_idx(p.atom0()) + a1 = info.atom_idx(p.atom1()) + a2 = info.atom_idx(p.atom2()) + a3 = info.atom_idx(p.atom3()) + if not _has_ghost(mol, [a0, a1, a2, a3]): + new_dihedrals0.set(a0, a1, a2, a3, p.function()) + + for d_idx in dihedrals1_unique_idx.values(): + p = dihedrals1[d_idx] + a0 = info.atom_idx(p.atom0()) + a1 = info.atom_idx(p.atom1()) + a2 = info.atom_idx(p.atom2()) + a3 = info.atom_idx(p.atom3()) + if not _has_ghost(mol, [a0, a1, a2, a3], True): + new_dihedrals1.set(a0, a1, a2, a3, p.function()) + # Set the new dihedral terms. edit_mol = edit_mol.set_property("dihedral0", new_dihedrals0).molecule() edit_mol = edit_mol.set_property("dihedral1", new_dihedrals1).molecule() @@ -548,7 +598,7 @@ def make_compatible(system, fix_perturbable_zero_sigmas=False): for idx0 in impropers0_idx.keys(): if idx1.equivalent(idx0): # Don't store duplicates. - if not idx0 in impropers_shared_idx.keys(): + if idx0 not in impropers_shared_idx.keys(): impropers_shared_idx[idx1] = ( impropers0_idx[idx0], impropers1_idx[idx1], @@ -600,6 +650,25 @@ def make_compatible(system, fix_perturbable_zero_sigmas=False): new_impropers0.set(idx0, idx1, idx2, idx3, p0.function()) new_impropers1.set(idx0, idx1, idx2, idx3, p1.function()) + # Pass through unique terms that have no ghost in the state they exist in. + for i_idx in impropers0_unique_idx.values(): + p = impropers0[i_idx] + a0 = info.atom_idx(p.atom0()) + a1 = info.atom_idx(p.atom1()) + a2 = info.atom_idx(p.atom2()) + a3 = info.atom_idx(p.atom3()) + if not _has_ghost(mol, [a0, a1, a2, a3]): + new_impropers0.set(a0, a1, a2, a3, p.function()) + + for i_idx in impropers1_unique_idx.values(): + p = impropers1[i_idx] + a0 = info.atom_idx(p.atom0()) + a1 = info.atom_idx(p.atom1()) + a2 = info.atom_idx(p.atom2()) + a3 = info.atom_idx(p.atom3()) + if not _has_ghost(mol, [a0, a1, a2, a3], True): + new_impropers1.set(a0, a1, a2, a3, p.function()) + # Set the new improper terms. edit_mol = edit_mol.set_property("improper0", new_impropers0).molecule() edit_mol = edit_mol.set_property("improper1", new_impropers1).molecule() @@ -611,6 +680,92 @@ def make_compatible(system, fix_perturbable_zero_sigmas=False): return system +def reconstruct_intrascale(system): + """ + Reconstruct end-state connectivity and intrascale matrices for perturbable + molecules from their bonded terms. This is required when a perturbation + file is used with AMBER topology/coordinate input, since the pertfile + assumes unchanged connectivity, which does not hold for ring-breaking or + ring-size-changing perturbations. + + Parameters + ---------- + + system : sire.system.System, sire.legacy.System.System + The system containing the perturbable molecules. + + Returns + ------- + + system : sire.system.System + The updated system with corrected connectivity0, connectivity1, + intrascale0, and intrascale1 properties on each perturbable molecule. + """ + + import sire.legacy.CAS as _SireCAS + + if not isinstance(system, (_System, _LegacySystem)): + raise TypeError( + "'system' must of type 'sire.system.System' or 'sire.legacy.System.System'" + ) + + if isinstance(system, _LegacySystem): + system = _System(system) + + system = system.clone() + + try: + pert_mols = system.molecules("property is_perturbable") + except KeyError: + raise KeyError("No perturbable molecules in the system") + + r = _SireCAS.Symbol("r") + + for mol in pert_mols: + info = mol.info() + + # Build connectivity from bond0 potentials, skipping zero-k bonds. + conn0 = _SireMol.Connectivity(info).edit() + for bond in mol.property("bond0").potentials(): + if _SireMM.AmberBond(bond.function(), r).k() != 0.0: + conn0.connect(bond.atom0(), bond.atom1()) + conn0 = conn0.commit() + + # Build connectivity from bond1 potentials, skipping zero-k bonds. + conn1 = _SireMol.Connectivity(info).edit() + for bond in mol.property("bond1").potentials(): + if _SireMM.AmberBond(bond.function(), r).k() != 0.0: + conn1.connect(bond.atom0(), bond.atom1()) + conn1 = conn1.commit() + + # Get the 1-4 scale factors from the lambda=0 forcefield. + ff = mol.property("forcefield0") + sf14 = _SireMM.CLJScaleFactor( + ff.electrostatic14_scale_factor(), ff.vdw14_scale_factor() + ) + + # Build intrascale matrices from the per-state connectivity. + intra0 = _SireMM.CLJNBPairs(conn0, sf14) + intra1 = _SireMM.CLJNBPairs(conn1, sf14) + + edit_mol = mol.edit() + + if conn0 == conn1: + edit_mol = edit_mol.set_property("connectivity", conn0).molecule() + else: + edit_mol = edit_mol.set_property("connectivity0", conn0).molecule() + edit_mol = edit_mol.set_property("connectivity1", conn1).molecule() + + edit_mol = edit_mol.set_property("intrascale0", intra0).molecule() + edit_mol = edit_mol.set_property("intrascale1", intra1).molecule() + + system.update(edit_mol.commit()) + + from sire import morph as _morph + + return _morph.link_to_reference(system) + + def reconstruct_system(system): """ Reconstruct a perturbable system to its original state, i.e. extract the @@ -659,7 +814,6 @@ def reconstruct_system(system): # Loop over all perturbable molecules. for mol in pert_mols: - # Delete any AmberParams properties. try: cursor = mol.cursor() diff --git a/src/somd2/config/_config.py b/src/somd2/config/_config.py index 52babacd..d4482be8 100644 --- a/src/somd2/config/_config.py +++ b/src/somd2/config/_config.py @@ -68,8 +68,13 @@ class Config: "lambda_schedule": [ "standard_morph", "charge_scaled_morph", + "ring_break_morph", + "reverse_ring_break_morph", + "annihilate", + "decouple", ], "log_level": [level.lower() for level in _logger._core.levels], + "softcore_form": ["zacharias", "taylor", "beutler"], } # A dictionary of nargs for the various options. @@ -100,7 +105,6 @@ def __init__( lambda_schedule="standard_morph", charge_scale_factor=0.2, swap_end_states=False, - coulomb_power=0.0, shift_coulomb="1 A", shift_delta="1.5 A", restraints=None, @@ -134,7 +138,11 @@ def __init__( opencl_platform_index=0, oversubscription_factor=1, replica_exchange=False, + randomise_velocities=False, perturbed_system=None, + terminal_flip_frequency=None, + terminal_flip_angle=None, + terminal_flip_max_mobile_atoms=None, gcmc=False, gcmc_frequency=None, gcmc_selection=None, @@ -144,8 +152,13 @@ def __init__( gcmc_radius="4 A", gcmc_bulk_sampling_probability=0.1, gcmc_tolerance=0.0, + use_dispersion_correction=False, rest2_scale=1.0, rest2_selection=None, + softcore_form="zacharias", + taylor_power=1, + beutler_alpha=0.5, + beutler_fix_epsilon=True, output_directory="output", restart=False, use_backup=False, @@ -153,8 +166,10 @@ def __init__( overwrite=False, somd1_compatibility=False, pert_file=None, + auto_fix_minimise=True, save_crash_report=False, - save_energy_components=False, + save_energy_components=True, + save_xml=False, page_size=None, timeout="300 s", ): @@ -222,10 +237,6 @@ def __init__( swap_end_states: bool Whether to swap the end states of the alchemical system. - coulomb_power : float - Power to use for the soft-core Coulomb interaction. This is used - to soften the electrostatic interaction. - shift_coulomb : str The soft-core shift-coulomb parameter. This is used to soften the Coulomb interaction. @@ -363,11 +374,31 @@ def __init__( Whether to run replica exchange simulation. Currently this can only be used when GPU resources are available. + randomise_velocities: bool + Whether to randomise velocities at the start of each replica exchange cycle + or following a terminal flip Monte Carlo move. + perturbed_system: str The path to a stream file containing a Sire system for the equilibrated perturbed end state (lambda = 1). This will be used as the starting conformation all lambda windows > 0.5 when performing a replica exchange simulation. + terminal_flip_frequency: str + Frequency at which to attempt terminal ring flip Monte Carlo moves. If None + (the default), no terminal flip moves will be performed. When set, terminal + ring groups in perturbable molecules are detected automatically using Sire's + native connectivity. This must be a multiple of 'energy_frequency'. + + terminal_flip_angle: str + Override the flip angle used for all terminal ring groups, e.g. + ``"180 degrees"``. If None (the default), the angle is determined + automatically for each group from its geometry. + + terminal_flip_max_mobile_atoms: int or None + Maximum number of mobile atoms allowed in a terminal ring group. + Groups with more mobile atoms than this threshold are skipped during + detection. Defaults to None (no limit). + gcmc: bool Whether to perform Grand Canonical Monte Carlo (GCMC) water insertions/deletions. @@ -407,6 +438,12 @@ def __init__( of acceptance for a move. This can be used to exclude low probability candidates that can cause instabilities or crashes for the MD engine. + use_dispersion_correction: bool + Whether to use the long-range dispersion correction for LJ interactions. + When True, the correction is evaluated analytically via a CustomVolumeForce + and cached per lambda state, avoiding expensive recomputation on every + lambda change. Default False. + rest2_scale: float, list(float) The scaling factor for Replica Exchange with Solute Tempering (REST) simulations. This is the factor by which the temperature of the solute is scaled with respect to @@ -426,6 +463,30 @@ def __init__( those atoms will be considered as part of the REST2 region. This allows REST2 to be applied to protein mutations. + softcore_form: str + The soft-core potential form to use for alchemical interactions. Valid + options are "zacharias" (default), "taylor", and "beutler". The Beutler + form is recommended for ABFE calculations. + + taylor_power: int + The power to use for the alpha term in the Taylor soft-core LJ expression, + i.e. sig6 = sigma^6 / (alpha^m * sigma^6 + r^6). Must be between 0 and 4. + The default is 1. Only used when softcore_form is "taylor". + + beutler_alpha: float + The dimensionless scale factor for the r^6 shift in the Beutler soft-core + form. Must be >= 0. The default is 0.5. Only used when softcore_form is + "beutler". + + beutler_fix_epsilon: bool + Whether to hold LJ epsilon fixed at its real-atom value for + ghost-decoupling molecules when softcore_form is "beutler", so that the + Beutler (1-alpha) prefactor provides the sole LJ decay pathway. The + default is True. This is automatically disabled (regardless of this + setting) for any alchemical ion added to maintain a constant charge, + since an ion's persisting atom is a real (non-ghost) mutation and needs + its LJ epsilon to interpolate normally rather than being held fixed. + output_directory: str Path to a directory to store output files. @@ -458,12 +519,24 @@ def __init__( The path to a SOMD1 perturbation file to apply to the reference system. When set, this will automatically set 'somd1_compatibility' to True. + auto_fix_minimise: bool + Whether to attempt to automatically recover from simulation instabilities + by minimising and restarting. Defaults to True. + save_crash_report: bool Whether to save a crash report if the simulation crashes. save_energy_components: bool - Whether to save the energy contribution for each force when checkpointing. - This is useful when debugging crashes. + Whether to save per-force-group energy contributions to a Parquet file + in the output directory. Energies are recorded at every 'energy_frequency' + interval. When not running replica exchange, the interval is instead the + shortest active MC frequency when running with GCMC or terminal flip moves. + Intended for debugging purposes. + + save_xml: bool + Whether to write an XML file for the OpenMM system to the output + directory on startup. This can be useful for debugging or for + use with other tools that can read OpenMM XML files. page_size: int The page size for trajectory handling in megabytes. If None, then Sire @@ -507,7 +580,6 @@ def __init__( self.lambda_schedule = lambda_schedule self.charge_scale_factor = charge_scale_factor self.swap_end_states = swap_end_states - self.coulomb_power = coulomb_power self.shift_coulomb = shift_coulomb self.shift_delta = shift_delta self.restraints = restraints @@ -539,7 +611,11 @@ def __init__( self.opencl_platform_index = opencl_platform_index self.oversubscription_factor = oversubscription_factor self.replica_exchange = replica_exchange + self.randomise_velocities = randomise_velocities self.perturbed_system = perturbed_system + self.terminal_flip_frequency = terminal_flip_frequency + self.terminal_flip_angle = terminal_flip_angle + self.terminal_flip_max_mobile_atoms = terminal_flip_max_mobile_atoms self.gcmc = gcmc self.gcmc_frequency = gcmc_frequency self.gcmc_selection = gcmc_selection @@ -549,14 +625,21 @@ def __init__( self.gcmc_radius = gcmc_radius self.gcmc_bulk_sampling_probability = gcmc_bulk_sampling_probability self.gcmc_tolerance = gcmc_tolerance + self.use_dispersion_correction = use_dispersion_correction self.rest2_scale = rest2_scale self.rest2_selection = rest2_selection self.restart = restart self.use_backup = use_backup + self.softcore_form = softcore_form + self.taylor_power = taylor_power + self.beutler_alpha = beutler_alpha + self.beutler_fix_epsilon = beutler_fix_epsilon self.somd1_compatibility = somd1_compatibility self.pert_file = pert_file + self.auto_fix_minimise = auto_fix_minimise self.save_crash_report = save_crash_report self.save_energy_components = save_energy_components + self.save_xml = save_xml self.timeout = timeout self.num_energy_neighbours = num_energy_neighbours self.null_energy = null_energy @@ -644,20 +727,39 @@ def as_dict(self, sire_compatible=False): if value is None and sire_compatible: d[attr_l] = False + # Don't include lambda_schedule_name or perturbed_system_file in the dictionary, + # since these are just helper attributes. + d.pop("lambda_schedule_name", None) + d.pop("perturbed_system_file", None) + # Handle the lambda schedule separately so that we can use simplified # keyword options. - if self.lambda_schedule == _LambdaSchedule.standard_morph(): - d["lambda_schedule"] = "standard_morph" - elif self.lambda_schedule == _LambdaSchedule.charge_scaled_morph( - self._charge_scale_factor - ): - d["lambda_schedule"] = "charge_scaled_morph" + + # A keyword exists for this lambda schedule. + if self._lambda_schedule_name is not None: + d["lambda_schedule"] = self._lambda_schedule_name + # Try to match the lambda schedule to a known schedule, if not then convert to hex. + else: + if self.lambda_schedule == _LambdaSchedule.standard_morph(): + d["lambda_schedule"] = "standard_morph" + elif self.lambda_schedule == _LambdaSchedule.charge_scaled_morph( + self._charge_scale_factor + ): + d["lambda_schedule"] = "charge_scaled_morph" + else: + d["lambda_schedule"] = self._to_hex(self.lambda_schedule) + + # Serialise restraints. + if self.restraints is not None: + d["restraints"] = [self._to_hex(restraint) for restraint in self.restraints] # Use the path for the perturbed_system option, since the system # isn't serializable. - if self.perturbed_system is not None: + if ( + self.perturbed_system is not None + and self._perturbed_system_file is not None + ): d["perturbed_system"] = str(self._perturbed_system_file) - d.pop("perturbed_system_file", None) return d @@ -862,6 +964,14 @@ def h_mass_factor(self, h_mass_factor): "This will result in a reduction of the mass of hydrogen atoms, " "and will likely lead to undesired simulation behaviour." ) + if h_mass_factor > 3.0: + raise ValueError( + "Requested hydrogen mass repartitioning factor is greater than 3.0. " + "Above this value, heavy atoms bonded to multiple hydrogens can have " + "their mass reduced below the 3.5 g/mol threshold used for hydrogen " + "detection in the OpenMM conversion layer, causing hydrogen bonds not " + "to be constrained." + ) self._h_mass_factor = h_mass_factor @property @@ -981,18 +1091,48 @@ def lambda_schedule(self, lambda_schedule): if isinstance(lambda_schedule, str): # Strip whitespace and convert to lower case. lambda_schedule = lambda_schedule.strip().lower() - if lambda_schedule not in self._choices["lambda_schedule"]: - raise ValueError( - f"Lambda schedule not recognised. Valid lambda schedules are: {self._choices['lambda_schedule']}" - ) if lambda_schedule == "standard_morph": self._lambda_schedule = _LambdaSchedule.standard_morph() + self._lambda_schedule_name = "standard_morph" elif lambda_schedule == "charge_scaled_morph": self._lambda_schedule = _LambdaSchedule.charge_scaled_morph(0.2) + self._lambda_schedule_name = "charge_scaled_morph" + elif lambda_schedule == "ring_break_morph": + from .._utils._schedules import ( + ring_break_morph as _ring_break_morph, + ) + + self._lambda_schedule = _ring_break_morph() + self._lambda_schedule_name = "ring_break_morph" + elif lambda_schedule == "reverse_ring_break_morph": + from .._utils._schedules import ( + reverse_ring_break_morph as _reverse_ring_break_morph, + ) + + self._lambda_schedule = _reverse_ring_break_morph() + self._lambda_schedule_name = "reverse_ring_break_morph" + elif lambda_schedule == "annihilate": + self._lambda_schedule = None + self._lambda_schedule_name = "annihilate" + elif lambda_schedule == "decouple": + self._lambda_schedule = None + self._lambda_schedule_name = "decouple" + else: + try: + self._lambda_schedule = self._from_hex(lambda_schedule) + self._lambda_schedule_name = None + except Exception: + raise ValueError( + "Unable to deserialise 'lambda_schedule'. Ensure that this is a " + "hex string representation of a valid LambdaSchedule object, or " + f"one of the following strings: {', '.join(self._choices['lambda_schedule'])}" + ) else: self._lambda_schedule = lambda_schedule + self._lambda_schedule_name = None else: self._lambda_schedule = _LambdaSchedule.standard_morph() + self._lambda_schedule_name = "standard_morph" @property def charge_scale_factor(self): @@ -1022,19 +1162,6 @@ def swap_end_states(self, swap_end_states): raise ValueError("'swap_end_states' must be of type 'bool'") self._swap_end_states = swap_end_states - @property - def coulomb_power(self): - return self._coulomb_power - - @coulomb_power.setter - def coulomb_power(self, coulomb_power): - if not isinstance(coulomb_power, float): - try: - coulomb_power = float(coulomb_power) - except Exception: - raise ValueError("'coulomb_power' must be a of type 'float'") - self._coulomb_power = coulomb_power - @property def shift_coulomb(self): return self._shift_coulomb @@ -1091,12 +1218,27 @@ def restraints(self, restraints): restraints = [restraints] # Check that all restraints are of the correct type. + deserialised_restraints = [] for restraint in restraints: - if not isinstance(restraint, _sr.mm._MM.Restraints): + if isinstance(restraint, _sr.mm._MM.Restraints): + continue + elif isinstance(restraint, str): + try: + restraint = self._from_hex(restraint) + except Exception: + raise ValueError( + "Unable to deserialise restraint. Ensure that this " + "is a hex string representation of a valid sire.mm._MM.Restraints object." + ) + deserialised_restraints.append(restraint) + else: raise ValueError( "'restraints' must be a sire.mm._MM.Restraints object, or a list of these objects." ) + if len(deserialised_restraints) > 0: + restraints = deserialised_restraints + self._restraints = restraints @property @@ -1456,7 +1598,6 @@ def platform(self): @platform.setter def platform(self, platform): import os as _os - import sys as _sys if not isinstance(platform, str): raise TypeError("'platform' must be of type 'str'") @@ -1617,6 +1758,16 @@ def replica_exchange(self, replica_exchange): raise ValueError("'replica_exchange' must be of type 'bool'") self._replica_exchange = replica_exchange + @property + def randomise_velocities(self): + return self._randomise_velocities + + @randomise_velocities.setter + def randomise_velocities(self, randomise_velocities): + if not isinstance(randomise_velocities, bool): + raise ValueError("'randomise_velocities' must be of type 'bool'") + self._randomise_velocities = randomise_velocities + @property def perturbed_system(self): return self._perturbed_system @@ -1650,6 +1801,80 @@ def perturbed_system(self, perturbed_system): self._perturbed_system = None self._perturbed_system_file = None + @property + def terminal_flip_frequency(self): + return self._terminal_flip_frequency + + @terminal_flip_frequency.setter + def terminal_flip_frequency(self, terminal_flip_frequency): + if terminal_flip_frequency is not None: + if not isinstance(terminal_flip_frequency, str): + raise TypeError("'terminal_flip_frequency' must be of type 'str'") + + from sire.units import picosecond + + try: + t = _sr.u(terminal_flip_frequency) + except Exception: + raise ValueError( + f"Unable to parse 'terminal_flip_frequency' as a Sire GeneralUnit: " + f"{terminal_flip_frequency}" + ) + + if t.value() != 0 and not t.has_same_units(picosecond): + raise ValueError("'terminal_flip_frequency' units are invalid.") + + self._terminal_flip_frequency = t + else: + self._terminal_flip_frequency = None + + @property + def terminal_flip_angle(self): + return self._terminal_flip_angle + + @terminal_flip_angle.setter + def terminal_flip_angle(self, terminal_flip_angle): + if terminal_flip_angle is not None: + if not isinstance(terminal_flip_angle, str): + raise TypeError("'terminal_flip_angle' must be of type 'str'") + + from sire.units import degrees + + try: + a = _sr.u(terminal_flip_angle) + except Exception: + raise ValueError( + f"Unable to parse 'terminal_flip_angle' as a Sire GeneralUnit: " + f"{terminal_flip_angle}" + ) + + if not a.has_same_units(degrees): + raise ValueError("'terminal_flip_angle' units are invalid.") + + self._terminal_flip_angle = a + else: + self._terminal_flip_angle = None + + @property + def terminal_flip_max_mobile_atoms(self): + return self._terminal_flip_max_mobile_atoms + + @terminal_flip_max_mobile_atoms.setter + def terminal_flip_max_mobile_atoms(self, terminal_flip_max_mobile_atoms): + if terminal_flip_max_mobile_atoms is not None: + if not isinstance(terminal_flip_max_mobile_atoms, int): + try: + terminal_flip_max_mobile_atoms = int(terminal_flip_max_mobile_atoms) + except: + raise ValueError( + "'terminal_flip_max_mobile_atoms' must be of type 'int'" + ) + if terminal_flip_max_mobile_atoms < 1: + raise ValueError( + "'terminal_flip_max_mobile_atoms' must be greater than 0" + ) + self._terminal_flip_max_mobile_atoms = terminal_flip_max_mobile_atoms + @property def gcmc(self): return self._gcmc @@ -1785,7 +2010,7 @@ def gcmc_radius(self, gcmc_radius): gcmc_r = _sr.u(gcmc_radius) except: raise ValueError( - "Unable to parse 'gcmc_radius' " f"as a Sire GeneralUnit: {gcmc_radius}" + f"Unable to parse 'gcmc_radius' as a Sire GeneralUnit: {gcmc_radius}" ) if not gcmc_r.has_same_units(angstrom): @@ -1825,6 +2050,16 @@ def gcmc_tolerance(self, gcmc_tolerance): raise ValueError("'gcmc_tolerance' must be greater than or equal to 0.0") self._gcmc_tolerance = gcmc_tolerance + @property + def use_dispersion_correction(self): + return self._use_dispersion_correction + + @use_dispersion_correction.setter + def use_dispersion_correction(self, use_dispersion_correction): + if not isinstance(use_dispersion_correction, bool): + raise TypeError("'use_dispersion_correction' must be of type 'bool'") + self._use_dispersion_correction = use_dispersion_correction + @property def rest2_scale(self): return self._rest2_scale @@ -1871,6 +2106,62 @@ def restart(self, restart): raise ValueError("'restart' must be of type 'bool'") self._restart = restart + @property + def softcore_form(self): + return self._softcore_form + + @softcore_form.setter + def softcore_form(self, softcore_form): + if not isinstance(softcore_form, str): + raise TypeError("'softcore_form' must be of type 'str'") + softcore_form = softcore_form.lower().replace(" ", "") + if softcore_form not in self._choices["softcore_form"]: + raise ValueError( + f"'softcore_form' not recognised. Valid forms are: {', '.join(self._choices['softcore_form'])}" + ) + else: + self._softcore_form = softcore_form + + @property + def taylor_power(self): + return self._taylor_power + + @taylor_power.setter + def taylor_power(self, taylor_power): + if not isinstance(taylor_power, int): + try: + taylor_power = int(taylor_power) + except Exception: + raise ValueError("'taylor_power' must be of type 'int'") + if not 0 <= taylor_power <= 4: + raise ValueError("'taylor_power' must be between 0 and 4") + self._taylor_power = taylor_power + + @property + def beutler_alpha(self): + return self._beutler_alpha + + @beutler_alpha.setter + def beutler_alpha(self, beutler_alpha): + if not isinstance(beutler_alpha, float): + try: + beutler_alpha = float(beutler_alpha) + except Exception: + raise ValueError("'beutler_alpha' must be of type 'float'") + if beutler_alpha < 0.0: + raise ValueError("'beutler_alpha' must be >= 0") + self._beutler_alpha = beutler_alpha + + @property + def beutler_fix_epsilon(self): + return self._beutler_fix_epsilon + + @beutler_fix_epsilon.setter + def beutler_fix_epsilon(self, beutler_fix_epsilon): + if not isinstance(beutler_fix_epsilon, bool): + raise TypeError("'beutler_fix_epsilon' must be of type 'bool'") + self._beutler_fix_epsilon = beutler_fix_epsilon + @property def use_backup(self): return self._use_backup @@ -1907,6 +2198,16 @@ def pert_file(self, pert_file): self._pert_file = pert_file + @property + def auto_fix_minimise(self): + return self._auto_fix_minimise + + @auto_fix_minimise.setter + def auto_fix_minimise(self, auto_fix_minimise): + if not isinstance(auto_fix_minimise, bool): + raise ValueError("'auto_fix_minimise' must be of type 'bool'") + self._auto_fix_minimise = auto_fix_minimise + @property def save_crash_report(self): return self._save_crash_report @@ -1927,6 +2228,16 @@ def save_energy_components(self, save_energy_components): raise ValueError("'save_energy_components' must be of type 'bool'") self._save_energy_components = save_energy_components + @property + def save_xml(self): + return self._save_xml + + @save_xml.setter + def save_xml(self, save_xml): + if not isinstance(save_xml, bool): + raise ValueError("'save_xml' must be of type 'bool'") + self._save_xml = save_xml + @property def page_size(self): return self._page_size @@ -2016,18 +2327,32 @@ def output_directory(self, output_directory): output_directory = _Path(output_directory) except Exception as e: raise ValueError(f"Could not convert output path. {e}") - if not _Path(output_directory).exists() or not _Path(output_directory).is_dir(): + # Directory creation and logger setup are deferred until the runner + # is created (see _setup_output_directory), since this setter can be + # called multiple times via the Python API (e.g. before the user + # overrides the default) and doing it here would create stale + # directories and duplicate logger sinks. + self._output_directory = output_directory + + def _setup_output_directory(self): + """ + Internal method to create the output directory (if needed) and + configure the logger to write to it. Called once a runner is + created, by which point the user's final choice of output + directory is known. + """ + + output_directory = self._output_directory + + if not output_directory.exists() or not output_directory.is_dir(): try: - _Path(output_directory).mkdir(parents=True, exist_ok=True) + output_directory.mkdir(parents=True, exist_ok=True) except: raise ValueError( f"Output directory {output_directory} does not exist and cannot be created" ) - if self.log_file is not None: - # Can now add the log file - _logger.add(output_directory / self.log_file, level=self.log_level.upper()) - _logger.debug(f"Logging to {output_directory / self.log_file}") - self._output_directory = output_directory + + self._reset_logger(_logger) @property def write_config(self): @@ -2080,6 +2405,68 @@ def overwrite(self, overwrite): raise ValueError("'overwrite' must be of type 'bool'") self._overwrite = overwrite + @staticmethod + def _to_hex(obj): + """ + Internal method to serialise a Sire object to a hex string representation + for storage in the YAML config file. + + Parameters + ---------- + + obj: object + The Sire object to serialise. + + Returns + -------- + + hex: + The hex string representation of the Sire object. + """ + + from sire.stream import save + from sire.legacy.Qt import QByteArray + + try: + hex = QByteArray(save(obj)).to_hex().data() + except Exception as e: + raise ValueError(f"Unable to serialise object: {e}") + + return hex + + @staticmethod + def _from_hex(hex): + """ + Internal method to deserialise a Sire object from a hex string representation. + + Parameters + ---------- + + hex: str + The hex string representation of the Sire object. + + Returns + ------- + + obj: + The deserialised Sire object. + """ + from sire.stream import load + from sire.legacy.Qt import QByteArray + + try: + # Convert StringProperty to string. + try: + hex = hex.value() + except Exception: + pass + hex_byte_arrary = QByteArray.from_raw_data(hex, len(hex)) + obj = load(QByteArray.from_hex(hex_byte_arrary)) + except Exception as e: + raise ValueError(f"Unable to deserialise object: {e}") + + return obj + @classmethod def _create_parser(cls): """ @@ -2188,8 +2575,9 @@ def _reset_logger(self, logger): """ Internal method to reset the logger. - This can be used when a parallel process is spawned to ensure that - the logger is correctly configured. + Removes any existing sinks and re-adds them based on the current + config state. Used both when a parallel process is spawned, and + when the output directory is finalised for the main process. """ import sys @@ -2197,6 +2585,6 @@ def _reset_logger(self, logger): logger.remove() logger.add(sys.stderr, level=self.log_level.upper(), enqueue=True) if self.log_file is not None and self.output_directory is not None: - logger.add( - self.output_directory / self.log_file, level=self.log_level.upper() - ) + log_path = self.output_directory / self.log_file + logger.add(log_path, level=self.log_level.upper()) + logger.debug(f"Logging to {log_path}") diff --git a/src/somd2/io/_io.py b/src/somd2/io/_io.py index 6b664977..3ccb9eb2 100644 --- a/src/somd2/io/_io.py +++ b/src/somd2/io/_io.py @@ -34,8 +34,15 @@ import pyarrow as _pa import pyarrow.parquet as _pq import pandas as _pd +import warnings as _warnings import yaml as _yaml +# Options that have been removed from the config. Any of these found in a YAML +# config file will be silently dropped after emitting a deprecation warning. +_REMOVED_OPTIONS = { + "coulomb_power": "'coulomb_power' has been removed and will be ignored.", +} + def dataframe_to_parquet(df, metadata, filepath=None, filename=None): """ @@ -74,7 +81,7 @@ def dataframe_to_parquet(df, metadata, filepath=None, filename=None): table = table.replace_schema_metadata(combined_meta) if filename is None: if "lambda" in metadata and "temperature" in metadata: - filename = f"Lam_{metadata['lambda'].replace('.','')[:5]}_T_{metadata['temperature']}.parquet" + filename = f"Lam_{metadata['lambda'].replace('.', '')[:5]}_T_{metadata['temperature']}.parquet" else: filename = "output.parquet" if not filename.endswith(".parquet"): @@ -142,6 +149,11 @@ def yaml_to_dict(path): except Exception as e: raise ValueError(f"Could not load YAML file: {e}") + for key, msg in _REMOVED_OPTIONS.items(): + if key in d: + _warnings.warn(msg) + d.pop(key) + return d diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 32ac1097..c9734001 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -81,6 +81,10 @@ def __init__(self, system, config): self._config = config self._config._extra_args = {} + # Create the output directory and configure the logger now that the + # user's final choice of output directory is known. + self._config._setup_output_directory() + if self._config.replica_exchange and self._config.perturbed_system is not None: # Make sure the number of positions is correct. num_atoms = self._system.num_atoms() @@ -94,33 +98,17 @@ def __init__(self, system, config): _logger.error(msg) raise ValueError(msg) - # Make sure the coordinates property is linked. - perturbed_system = _sr.morph.link_to_perturbed( - self._config.perturbed_system - ) + # Log the versions of somd2 and its OpenBioSim dependencies. + from somd2 import get_versions as _get_versions - # Store the positions. - self._perturbed_positions = _sr.io.get_coords_array(perturbed_system) - - # Store the box vectors. - cell = self._config.perturbed_system.space().box_matrix() - c0 = cell.column0() - c1 = cell.column1() - c2 = cell.column2() - self._perturbed_box = ( - (c0.x().value(), c0.y().value(), c0.z().value()), - (c1.x().value(), c1.y().value(), c1.z().value()), - (c2.x().value(), c2.y().value(), c2.z().value()), - ) - else: - self._perturbed_positions = None - self._perturbed_box = None - - # Log the versions of somd2 and sire. - from somd2 import __version__, _sire_version, _sire_revisionid - - _logger.info(f"somd2 version: {__version__}") - _logger.info(f"sire version: {_sire_version}+{_sire_revisionid}") + versions = _get_versions() + _logger.info(f"somd2 version: {versions['somd2']}") + _logger.info(f"sire version: {versions['sire']}") + _logger.info(f"biosimspace version: {versions['biosimspace']}") + if self._config.ghost_modifications: + _logger.info(f"ghostly version: {versions['ghostly']}") + if self._config.gcmc: + _logger.info(f"loch version: {versions['loch']}") # Flag whether frames are being saved. if ( @@ -159,6 +147,14 @@ def __init__(self, system, config): _logger.error(msg) raise IOError(msg) + # Reconstruct end-state connectivity and intrascale matrices from + # the bonded terms. The lambda=0 reference topology is used as the + # starting point and the pertfile does not express changes in + # connectivity or intrascale directly. + from .._utils._somd1 import reconstruct_intrascale + + self._system = reconstruct_intrascale(self._system) + # If we're not using SOMD1 compatibility, then reconstruct the original # perturbable system. We only need to do this if applying modifications # to ghost atom bonded terms. @@ -182,6 +178,57 @@ def __init__(self, system, config): # Link properties to the lambda = 0 end state. self._system = _sr.morph.link_to_reference(self._system) + # Whether this is a ring-breaking schedule. + if ( + self._config._lambda_schedule_name is not None + and "ring_break" in self._config._lambda_schedule_name + ): + self._is_ring_breaking = True + else: + self._is_ring_breaking = False + + # Check to see if the end-state connectivities are the same. + if not self._is_ring_breaking: + for mol in self._system["property is_perturbable"].molecules(): + has_end_state_connectivity = False + try: + # The molecule will have two connectivity properties if + # the merge detected a change in connectivity. + c0 = mol.property("connectivity0") + c1 = mol.property("connectivity1") + has_end_state_connectivity = True + except: + # No connectivity change detected. + has_end_state_connectivity = False + pass + + # Check the connectivities regardless. + if has_end_state_connectivity: + if c0 != c1: + msg = ( + "End-state connectivities are different. If this is a ring-breaking " + "perturbation, please set 'lambda_schedule_name' to 'ring_breaking'." + ) + _logger.warning(msg) + break + + # Check for a periodic space. + self._has_space = self._check_space() + + # Check for water. + try: + # The search will fail if there are no water molecules. + water = self._system["water"].molecules() + self._has_water = True + except: + self._has_water = False + + # Warn if dispersion correction is requested but can't be applied. + if self._config.use_dispersion_correction and not self._has_water: + msg = "Cannot use dispersion correction for vacuum simulations. Disabling!" + _logger.warning(msg) + self._config.use_dispersion_correction = False + # Set the default configuration options. # Restrict the atomic properties used to define light atoms when @@ -196,6 +243,16 @@ def __init__(self, system, config): self._config.fix_perturbable_zero_sigmas ) + # Long-range dispersion correction. + self._config._extra_args["use_dispersion_correction"] = ( + self._config.use_dispersion_correction + ) + + # GCMC LRC map options. + if self._config.gcmc and self._config.use_dispersion_correction: + self._config._extra_args["use_gcmc_lrc"] = True + self._config._extra_args["num_gcmc_waters"] = self._config.gcmc_num_waters + # We're running in SOMD1 compatibility mode. if self._config.somd1_compatibility: from .._utils._somd1 import make_compatible @@ -209,54 +266,6 @@ def __init__(self, system, config): ) self._system = _sr.morph.link_to_reference(self._system) - # Next, swap the water topology so that it is in AMBER format. - - try: - waters = self._system["water"] - except: - waters = [] - - if len(waters) > 0: - from sire.legacy.IO import isAmberWater as _isAmberWater - from sire.legacy.IO import setAmberWater as _setAmberWater - - if not _isAmberWater(waters[0]): - num_atoms = waters[0].num_atoms() - - if num_atoms == 3: - # Here we assume TIP3P for any 3-point water model. - model = "tip3p" - elif num_atoms == 4: - # Check for OPC water. - try: - if ( - waters[0] - .search("element Xx") - .atoms()[0] - .charge() - .value() - < -1.1 - ): - model = "opc" - else: - model = "tip4p" - except: - model = "tip4p" - elif num_atoms == 5: - model = "tip5p" - try: - self._system = _System( - _setAmberWater(self._system._system, model) - ) - _logger.info( - "Converting water topology to AMBER format for SOMD1 compatibility." - ) - except Exception as e: - _logger.error( - "Unable to convert water topology to AMBER format for SOMD1 compatibility." - ) - raise e - # Ghost atoms are considered light when adding bond constraints. self._config._extra_args["ghosts_are_light"] = True @@ -271,24 +280,50 @@ def __init__(self, system, config): # Angle optimisation can sometimes fail. except Exception as e1: try: - self._system, self._modifications = modify( - self._system, optimise_angles=False - ) + self._system, self._modifications = modify(self._system) except Exception as e2: msg = f"Unable to apply modifications to ghost atom bonded terms: {e1}; {e2}" _logger.error(msg) raise RuntimeError(msg) - # Check for a periodic space. - self._has_space = self._check_space() - - # Check for water. + # Convert water topology to AMBER format if not already done. AMBER + # format adds an explicit H-H bond, giving fully rigid water (O-H and + # H-H constraints) rather than just O-H constraints under h_bonds. try: - # The search will fail if there are no water molecules. - water = self._system["water"].molecules() - self._has_water = True + waters = self._system["water"] except: - self._has_water = False + waters = [] + + if len(waters) > 0: + from sire.legacy.IO import isAmberWater as _isAmberWater + from sire.legacy.IO import setAmberWater as _setAmberWater + + if not _isAmberWater(waters[0]): + num_atoms = waters[0].num_atoms() + + if num_atoms == 3: + model = "tip3p" + elif num_atoms == 4: + try: + if ( + waters[0].search("element Xx").atoms()[0].charge().value() + < -1.1 + ): + model = "opc" + else: + model = "tip4p" + except: + model = "tip4p" + elif num_atoms == 5: + model = "tip5p" + try: + self._system = _System(_setAmberWater(self._system._system, model)) + _logger.info( + f"Converting water topology to AMBER {model.upper()} format." + ) + except Exception as e: + _logger.error("Unable to convert water topology to AMBER format.") + raise e # Check the end state constraints. self._check_end_state_constraints() @@ -338,6 +373,47 @@ def __init__(self, system, config): coalchemical_restraints ) + # Set the soft-core form. + if self._config.softcore_form == "taylor": + self._config._extra_args["use_taylor_softening"] = True + self._config._extra_args["taylor_power"] = self._config.taylor_power + elif self._config.softcore_form == "beutler": + schedule_name = self._config._lambda_schedule_name + if schedule_name not in (None, "annihilate", "decouple"): + raise ValueError( + "The Beutler soft-core form is only supported with the 'annihilate' " + "or 'decouple' lambda schedules, or a custom schedule." + ) + self._config._extra_args["use_beutler_softening"] = True + self._config._extra_args["beutler_alpha"] = self._config.beutler_alpha + + # Build deferred schedules now that the softcore form is known. Epsilon is + # only held fixed (with LJ decay handled entirely by the Beutler soft-core + # prefactor) for molecules undergoing a ghost-atom decoupling/annihilation. + # An alchemical ion is a real (non-ghost) atom mutating identity (e.g. a + # water oxygen turning into Na+), so its LJ epsilon needs to interpolate + # normally; fixing it would leave the ion's persisting atom stuck at its + # initial LJ parameters for the whole stage. Disable fix_epsilon whenever + # an alchemical ion has been added, regardless of the configured value. + fix_epsilon = ( + self._config.softcore_form == "beutler" and self._config.beutler_fix_epsilon + ) + if fix_epsilon and charge_diff != 0: + _logger.info( + "Disabling Beutler 'fix_epsilon' since an alchemical ion has been " + "added: the ion's persisting atom is a real (non-ghost) mutation " + "and needs its LJ epsilon to interpolate normally." + ) + fix_epsilon = False + if self._config._lambda_schedule_name == "annihilate": + from .._utils._schedules import annihilate as _annihilate + + self._config._lambda_schedule = _annihilate(fix_epsilon=fix_epsilon) + elif self._config._lambda_schedule_name == "decouple": + from .._utils._schedules import decouple as _decouple + + self._config._lambda_schedule = _decouple(fix_epsilon=fix_epsilon) + # Set the lambda values. if self._config.lambda_values: self._lambda_values = self._config.lambda_values @@ -392,8 +468,8 @@ def __init__(self, system, config): if len(self._config.rest2_scale) != len(self._lambda_energy): msg = f"Length of 'rest2_scale' must match the number of {_lam_sym} values." if is_missing: - msg += f"If you have omitted some 'lambda_values` from `lambda_energy`, please " - f"add them to `lambda_energy`, along with the corresponding `rest2_scale` values." + msg += "If you have omitted some 'lambda_values` from `lambda_energy`, please " + "add them to `lambda_energy`, along with the corresponding `rest2_scale` values." _logger.error(msg) raise ValueError(msg) # Make sure the end states are close to 1.0. @@ -419,7 +495,6 @@ def __init__(self, system, config): # Make sure the REST2 selection is valid. if self._config.rest2_selection is not None: - try: atoms = _sr.mol.selection_to_atoms( self._system, self._config.rest2_selection @@ -506,9 +581,34 @@ def __init__(self, system, config): # Check the output directories and create names of output files. self._filenames = self._prepare_output() + # Per-window cache of the last saved energy-components time (ns), + # used to skip duplicate rows on restart. + self._last_ec_time = {} + # Store the current system as a reference. self._reference_system = self._system.clone() + # Create a clone of the fully-prepared reference system with the + # perturbed end-state coordinates and periodic space. This is done + # after all system preparation so that the clone inherits the same + # topology and properties. It is used to seed starting coordinates + # for lambda > 0.5 replicas. + if self._config.replica_exchange and self._config.perturbed_system is not None: + from sire.legacy.IO import setCoordinates as _setCoordinates + + pert_coords = _sr.io.get_coords_array( + _sr.morph.link_to_perturbed(self._config.perturbed_system) + ) + self._perturbed_system = _sr.system.System( + _setCoordinates(self._system._system, pert_coords.tolist()) + ) + self._perturbed_system.set_space(self._config.perturbed_system.space()) + + # Link properties to the lambda = 0 end state. + self._perturbed_system = _sr.morph.link_to_reference(self._perturbed_system) + else: + self._perturbed_system = None + # Check for a valid restart. if self._config.restart: if self._config.use_backup: @@ -518,7 +618,19 @@ def __init__(self, system, config): self._is_restart = False self._cleanup() - # Save config whenever 'configure' is called to keep it up to date. + if self._config.replica_exchange and self._config.perturbed_system is not None: + # Check whether the perturbed system was loaded from file. If not + # we need to save to the output directory and update the config to + # point to the new file. + if self._config._perturbed_system_file is None: + filename = str( + _Path(self._config.output_directory) / "perturbed_system.s3" + ) + _sr.stream.save(self._config.perturbed_system, filename) + self._config._perturbed_system_file = filename + _logger.info(f"Saving perturbed system to {filename}") + + # Write YAML configuration file to the output directory. if self._config.write_config: _dict_to_yaml( self._config.as_dict(), @@ -530,13 +642,10 @@ def __init__(self, system, config): self._config.checkpoint_frequency / self._config.energy_frequency ) - # Zero the energy sample. - self._nrg_sample = 0 - # GCMC specific validation. if self._config.gcmc: - if self._config.platform != "cuda": - msg = "GCMC simulations require the CUDA platform." + if self._config.platform not in ["cuda", "opencl"]: + msg = "GCMC simulations require the CUDA or OpenCL platform." _logger.error(msg) raise ValueError(msg) @@ -545,11 +654,6 @@ def __init__(self, system, config): _logger.error(msg) raise ValueError(msg) - if self._config.pressure != None: - msg = "GCMC simulations must be run in the NVT ensemble." - _logger.error(msg) - raise ValueError(msg) - if isinstance(self._system, list): mols = self._system[0] else: @@ -638,6 +742,59 @@ def __init__(self, system, config): # Store the excess chemcical potential value. self._mu_ex = self._config.gcmc_excess_chemical_potential.value() + # Terminal flip specific validation and setup. + if self._config.terminal_flip_frequency is not None: + from math import isclose + + # Make sure the terminal flip frequency is a multiple of the + # energy frequency. + ratio = ( + self._config.terminal_flip_frequency / self._config.energy_frequency + ).value() + + if not isclose(ratio, round(ratio), abs_tol=1e-4): + msg = "'terminal_flip_frequency' must be a multiple of 'energy_frequency'." + _logger.error(msg) + raise ValueError(msg) + + # Auto-detect terminal ring groups using Sire connectivity. + from ._samplers import detect_terminal_groups + + if isinstance(self._system, list): + mols = self._system[0] + else: + mols = self._system + + flip_angle = ( + self._config.terminal_flip_angle.to("degrees").value() + if self._config.terminal_flip_angle is not None + else None + ) + self._terminal_groups = detect_terminal_groups( + mols, + flip_angle=flip_angle, + max_mobile_atoms=self._config.terminal_flip_max_mobile_atoms, + ) + + if not self._terminal_groups: + _logger.warning( + "No terminal ring groups detected. Terminal flip moves will not " + "be performed." + ) + else: + _logger.info( + f"Detected {len(self._terminal_groups)} terminal ring group(s) " + f"for terminal flip MC." + ) + for i, (angle, indices) in enumerate(self._terminal_groups): + _logger.info( + f" Group {i}: flip angle = {angle}°, " + f"anchor = {indices[0]}, pivot = {indices[1]}, " + f"{len(indices) - 2} mobile atom(s)" + ) + else: + self._terminal_groups = [] + # Store the initial system time. if isinstance(self._system, list): self._initial_time = [] @@ -697,61 +854,69 @@ def __init__(self, system, config): self._initial_constraint = self._config.constraint self._initial_perturbable_constraint = self._config.perturbable_constraint - # Create the default dynamics kwargs dictionary. These can be overloaded - # as needed. + # Common kwargs shared by both dynamics and GCMC sampling. + self._common_kwargs = { + "cutoff": self._config.cutoff, + "cutoff_type": self._config.cutoff_type, + "platform": self._config.platform, + "rest2_selection": self._config.rest2_selection, + "shift_coulomb": self._config.shift_coulomb, + "shift_delta": self._config.shift_delta, + "swap_end_states": self._config.swap_end_states, + "temperature": self._config.temperature, + } + + # Create the default dynamics kwargs dictionary. self._dynamics_kwargs = { - "integrator": config.integrator, - "temperature": config.temperature, - "pressure": config.pressure if self._has_water else None, - "surface_tension": config.surface_tension, - "barostat_frequency": config.barostat_frequency, - "timestep": config.timestep, - "restraints": config.restraints, - "cutoff_type": config.cutoff_type, - "cutoff": config.cutoff, - "schedule": config.lambda_schedule, - "platform": config.platform, - "constraint": config.constraint, - "perturbable_constraint": config.perturbable_constraint, - "include_constrained_energies": config.include_constrained_energies, - "dynamic_constraints": config.dynamic_constraints, - "swap_end_states": config.swap_end_states, - "com_reset_frequency": config.com_reset_frequency, + **self._common_kwargs, + "barostat_frequency": self._config.barostat_frequency, + "com_reset_frequency": self._config.com_reset_frequency, + "constraint": self._config.constraint, + "dynamic_constraints": self._config.dynamic_constraints, + "include_constrained_energies": self._config.include_constrained_energies, + "integrator": self._config.integrator, + "map": self._config._extra_args, + "perturbable_constraint": self._config.perturbable_constraint, + "pressure": self._config.pressure if self._has_water else None, + "restraints": self._config.restraints, + "schedule": self._config.lambda_schedule, + "surface_tension": self._config.surface_tension, + "timestep": self._config.timestep, "vacuum": not self._has_space, - "coulomb_power": config.coulomb_power, - "shift_coulomb": config.shift_coulomb, - "shift_delta": config.shift_delta, - "rest2_selection": config.rest2_selection, - "map": config._extra_args, } # Create the GCMC specific kwargs dictionary. if self._config.gcmc: self._gcmc_kwargs = { - "reference": self._config.gcmc_selection, + **self._common_kwargs, + "bulk_sampling_probability": self._config.gcmc_bulk_sampling_probability, "excess_chemical_potential": str( self._config.gcmc_excess_chemical_potential ), - "standard_volume": str(self._config.gcmc_standard_volume), - "radius": str(self._config.gcmc_radius), - "num_ghost_waters": self._config.gcmc_num_waters, - "bulk_sampling_probability": self._config.gcmc_bulk_sampling_probability, - "cutoff_type": self._config.cutoff_type, - "cutoff": str(self._config.cutoff), - "temperature": str(self._config.temperature), "lambda_schedule": self._config.lambda_schedule, - "coulomb_power": self._config.coulomb_power, - "shift_coulomb": str(self._config.shift_coulomb), - "shift_delta": str(self._config.shift_delta), - "swap_end_states": self._config.swap_end_states, - "tolerance": self._config.gcmc_tolerance, - "restart": self._is_restart, - "overwrite": self._config.overwrite, "no_logger": True, + "num_ghost_waters": self._config.gcmc_num_waters, + "pressure": self._config.pressure, + "overwrite": self._config.overwrite, + "radius": str(self._config.gcmc_radius), + "reference": self._config.gcmc_selection, + "restart": self._is_restart, + "softcore_form": self._config.softcore_form, + "taylor_power": self._config.taylor_power, + "standard_volume": str(self._config.gcmc_standard_volume), + "tolerance": self._config.gcmc_tolerance, } else: self._gcmc_kwargs = None + # Reverse the lambda schedule when swapping end states so that the + # schedule progresses from the perturbed end state to the reference. + # (The GCMC schedule is reversed inside loch itself.) + if self._config.swap_end_states: + self._dynamics_kwargs["schedule"] = self._dynamics_kwargs[ + "schedule" + ].reverse() + # Limit the number of CPU threads available to Sire when running in parallel. if self._is_gpu: # First get the total number of threads that are available to Sire. @@ -1115,13 +1280,16 @@ def increment_filename(base_filename, suffix): lam = f"{lambda_value:.5f}" filenames = {} filenames["checkpoint"] = str(output_directory / f"checkpoint_{lam}.s3") + filenames["checkpoint_state"] = str(output_directory / f"checkpoint_{lam}.npz") filenames["energy_traj"] = str(output_directory / f"energy_traj_{lam}.parquet") filenames["trajectory"] = str(output_directory / f"traj_{lam}.dcd") filenames["trajectory_chunk"] = str(output_directory / f"traj_{lam}_") filenames["energy_components"] = str( - output_directory / f"energy_components_{lam}.txt" + output_directory / f"energy_components_{lam}.parquet" ) filenames["gcmc_ghosts"] = str(output_directory / f"gcmc_ghosts_{lam}.txt") + filenames["sampler_stats"] = str(output_directory / f"sampler_stats_{lam}.pkl") + filenames["xml"] = str(output_directory / f"system_{lam}.xml") if restart: filenames["config"] = str( output_directory / increment_filename("config", "yaml") @@ -1316,6 +1484,7 @@ def _compare_configs(config1, config2): "energy_frequency", "frame_frequency", "save_velocities", + "perturbed_system", "platform", "max_threads", "max_gpus", @@ -1334,6 +1503,80 @@ def _compare_configs(config1, config2): v1 = config1[key] v2 = config2[key] + # None config options stored as a Sire property are converted + # to False, so None and Fasle are equivalent for the purposes of + # comparison. + if v1 is None and not v2: + continue + if v2 is None and not v1: + continue + + # Early exit equivalence check. + if v1 == v2: + continue + + # Custom lambda schedules are stored as a hexademical string of + # serialised object. We need to deserialise them before comparison. + if key == "lambda_schedule": + # Standard schedules are stored as strings, so we can compare these directly. + if v1 == v2: + continue + try: + v1 = _Config._from_hex(v1) + except Exception as e: + raise ValueError( + f"Unable to deserialise lambda schedule from config1: {str(e)}" + ) + try: + v2 = _Config._from_hex(v2) + except Exception as e: + raise ValueError( + f"Unable to deserialise lambda schedule from config2: {str(e)}" + ) + if v1 != v2: + raise ValueError( + f"{key} has changed since the last run. This is not " + "allowed when using the restart option." + ) + continue + + # Restraints are stored as a list of hexadecimal strings of serialised objects. + # We need to deserialise them before comparison. + elif key == "restraints": + if v1 and v2: + if len(v1) != len(v2): + raise ValueError( + f"Number of restraints has changed since the last run " + f"({len(v1)} vs {len(v2)}). This is not allowed when " + "using the restart option." + ) + # Deserialise all restraints from both configs. + try: + deserialized_v1 = [_Config._from_hex(r) for r in v1] + except Exception as e: + raise ValueError( + f"Unable to deserialise restraint from config1: {str(e)}" + ) + try: + deserialized_v2 = [_Config._from_hex(r) for r in v2] + except Exception as e: + raise ValueError( + f"Unable to deserialise restraint from config2: {str(e)}" + ) + # Match each restraint in v1 against v2, regardless of order. + unmatched = list(deserialized_v2) + for r1 in deserialized_v1: + for i, r2 in enumerate(unmatched): + if r1 == r2: + unmatched.pop(i) + break + else: + raise ValueError( + f"{key} has changed since the last run. This is not " + "allowed when using the restart option." + ) + continue + # Convert GeneralUnits to strings for comparison. if isinstance(v1, _GeneralUnit): v1 = str(v1) @@ -1343,14 +1586,14 @@ def _compare_configs(config1, config2): # Convert Sire containers to lists for comparison. try: v1 = v1.to_list() - except: + except Exception: pass try: v2 = v2.to_list() - except: + except Exception: pass - if (v1 == None and v2 == False) or (v2 == None and v1 == False): + if (v1 is None and v2 == False) or (v2 is None and v1 == False): continue # The GCMC frequency will be automaticall set if None. elif key == "gcmc_frequency" and v1 is None: @@ -1393,18 +1636,22 @@ def get_last_config(output_directory): f"No config files found in {self._config.output_directory}, " "attempting to retrieve config from lambda = 0 checkpoint file." ) - try: - system_temp = _sr.stream.load( - str(self._config.output_directory / "checkpoint_0.00000.s3") - ) - except: - expdir = self._config.output_directory / "checkpoint_0.00000.s3" - _logger.error(f"Unable to load checkpoint file from {expdir}.") - raise + s3_path = self._config.output_directory / "checkpoint_0.00000.s3" + if s3_path.exists(): + try: + system_temp = _sr.stream.load(str(s3_path)) + except: + _logger.error(f"Unable to load checkpoint file from {s3_path}.") + raise + else: + self._last_config = dict(system_temp.property("config")) + config = self._config.as_dict(sire_compatible=True) + del system_temp else: - self._last_config = dict(system_temp.property("config")) - config = self._config.as_dict(sire_compatible=True) - del system_temp + raise OSError( + f"No config file found in {self._config.output_directory}. " + "Cannot validate restart config without a config.yaml file." + ) self._compare_configs(self._last_config, config) @@ -1619,6 +1866,8 @@ def _checkpoint( lambda_energy=None, lambda_grad=None, is_final_block=False, + context=None, + gcmc_sampler=None, ): """ Save a checkpoint file. @@ -1661,31 +1910,40 @@ def _checkpoint( """ try: - from somd2 import __version__, _sire_version, _sire_revisionid + from somd2 import get_versions as _get_versions + + versions = _get_versions() # Get the lambda value. lam = self._lambda_values[index] + # -1 is the sentinel for a post-equilibration checkpoint. No + # energies are collected during equilibration, so skip all + # parquet-related work in this case. + is_post_equilibration = block == -1 + # Get the energy trajectory. - df = system.energy_trajectory(to_alchemlyb=True, energy_unit="kT") + if not is_post_equilibration: + df = system.energy_trajectory(to_alchemlyb=True, energy_unit="kT") # Set the lambda values at which energies were sampled. if lambda_energy is None: lambda_energy = self._lambda_values # Create the metadata. - metadata = { - "attrs": df.attrs, - "somd2 version": __version__, - "sire version": f"{_sire_version}+{_sire_revisionid}", - "lambda": str(lam), - "speed": speed, - "temperature": str(self._config.temperature.value()), - } - - # Add the lambda gradient if available. - if lambda_grad is not None: - metadata["lambda_grad"] = lambda_grad + if not is_post_equilibration: + metadata = { + "attrs": df.attrs, + "somd2 version": versions["somd2"], + "sire version": versions["sire"], + "lambda": f"{lam:.5f}", + "speed": speed, + "temperature": str(self._config.temperature.value()), + } + + # Add the lambda gradient if available. + if lambda_grad is not None: + metadata["lambda_grad"] = [f"{v:.5f}" for v in lambda_grad] if is_final_block: # Save the end-state GCMC topologies for trajectory analysis and visualisation. @@ -1755,28 +2013,22 @@ def _checkpoint( for chunk in traj_chunks: _Path(chunk).unlink() - # Add config and lambda value to the system properties. - system.set_property( - "config", self._config.as_dict(sire_compatible=True) + # Write the checkpoint system to file. + self._write_checkpoint_system( + system, index, context=context, gcmc_sampler=gcmc_sampler ) - system.set_property("lambda", lam) - - # Delete all frames from the system. - system.delete_all_frames() - - # Stream the final system to file. - _sr.stream.save(system, self._filenames[index]["checkpoint"]) - # Create the final parquet file. - _dataframe_to_parquet( - df, - metadata=metadata, - filename=self._filenames[index]["energy_traj"], - ) + # Append the final block's energy data. If no parquet exists + # yet (e.g. checkpoint_frequency=0), create one from scratch. + _energy_traj = self._filenames[index]["energy_traj"] + if _Path(_energy_traj).exists(): + _parquet_append(_energy_traj, df.iloc[-self._energy_per_block :]) + else: + _dataframe_to_parquet(df, metadata=metadata, filename=_energy_traj) else: # Update the starting block if necessary. - if block == 0: + if block <= 0: block = self._start_block # Save the current trajectory chunk to file. @@ -1792,36 +2044,63 @@ def _checkpoint( format=["DCD"], ) - # Encode the configuration and lambda value as system properties. - system.set_property( - "config", self._config.as_dict(sire_compatible=True) + # Write the checkpoint system to file. + self._write_checkpoint_system( + system, index, context=context, gcmc_sampler=gcmc_sampler ) - system.set_property("lambda", lam) - - # Delete all frames from the system. - system.delete_all_frames() - # Stream the checkpoint to file. - _sr.stream.save(system, self._filenames[index]["checkpoint"]) + # Skip parquet creation for post-equilibration checkpoints. + if not is_post_equilibration: + # Create the parquet file name. + filename = self._filenames[index]["energy_traj"] - # Create the parquet file name. - filename = self._filenames[index]["energy_traj"] - - # Create the parquet file. - if block == self._start_block: - _dataframe_to_parquet(df, metadata=metadata, filename=filename) - # Append to the parquet file. - else: - _parquet_append( - filename, - df.iloc[-self._energy_per_block :], - ) + # At the start block of a restart, append to the existing + # parquet so that historical data is preserved. For fresh + # runs, overwrite (or create) the parquet file. + if block == self._start_block and not ( + self._is_restart and _Path(filename).exists() + ): + _dataframe_to_parquet(df, metadata=metadata, filename=filename) + else: + _parquet_append( + filename, + df.iloc[-self._energy_per_block :], + ) except Exception as e: return index, e return index, None + def _write_checkpoint_system(self, system, index, context=None, gcmc_sampler=None): + """ + Write the system state to the checkpoint file. + + Subclasses may override this to store state differently, e.g. repex + records the simulation time in the dynamics cache pickle instead of + streaming a per-replica file. + + Parameters + ---------- + + system: :class: `System ` + The committed system to checkpoint. + + index: int + The index of the lambda window. + + context: openmm.Context, optional + The OpenMM context. Unused in the base implementation. + + gcmc_sampler: GCMCSampler, optional + The GCMC sampler. Unused in the base implementation. + """ + lam = self._lambda_values[index] + system.set_property("config", self._config.as_dict(sire_compatible=True)) + system.set_property("lambda", lam) + system.delete_all_frames() + _sr.stream.save(system, self._filenames[index]["checkpoint"]) + def _backup_checkpoint(self, index): """ Create a backup of the previous checkpoint files. @@ -1844,6 +2123,17 @@ def _backup_checkpoint(self, index): self._filenames[index]["checkpoint"], str(self._filenames[index]["checkpoint"]) + ".bak", ) + except Exception as e: + return index, e + + try: + # Backup the existing compact numpy checkpoint file, if it exists. + path = _Path(self._filenames[index]["checkpoint_state"]) + if path.exists() and path.stat().st_size > 0: + _copyfile( + self._filenames[index]["checkpoint_state"], + str(self._filenames[index]["checkpoint_state"]) + ".bak", + ) traj_filename = self._filenames[index]["trajectory"] except Exception as e: return index, e @@ -1859,11 +2149,23 @@ def _backup_checkpoint(self, index): except Exception as e: return index, e + try: + # Backup the existing energy components file, if it exists. + path = _Path(self._filenames[index]["energy_components"]) + if path.exists() and path.stat().st_size > 0: + _copyfile( + self._filenames[index]["energy_components"], + str(self._filenames[index]["energy_components"]) + ".bak", + ) + except Exception as e: + return index, e + return index, None - def _save_energy_components(self, index, context): + def _save_energy_components(self, index, context, time_ns): """ - Internal function to save the energy components for each force group to file. + Internal function to save the energy components for each force group to a + Parquet file. Parameters ---------- @@ -1873,44 +2175,62 @@ def _save_energy_components(self, index, context): context : openmm.Context The current OpenMM context. + + time_ns : float + The current simulation time in nanoseconds. """ - from copy import deepcopy + import json as _json import openmm + import pandas as _pd + import pyarrow as _pa + import pyarrow.parquet as _pq_local + + filepath = self._filenames[index]["energy_components"] - # Get the current context and system. - system = deepcopy(context.getSystem()) - - # Add each force to a unique group. - for i, f in enumerate(system.getForces()): - f.setForceGroup(i) - - # Create a new context. - new_context = openmm.Context(system, deepcopy(context.getIntegrator())) - new_context.setPositions(context.getState(getPositions=True).getPositions()) - - header = f"{'# Sample':>10}" - record = f"{self._nrg_sample:>10}" - - # Process the records. - for i, f in enumerate(system.getForces()): - state = new_context.getState(getEnergy=True, groups={i}) - name = f.getName() - name_len = len(name) - header += f"{f.getName():>{name_len+2}}" - record += f"{state.getPotentialEnergy().value_in_unit(openmm.unit.kilocalories_per_mole):>{name_len+2}.2f}" - - # Write to file. - if self._nrg_sample == 0: - with open(self._filenames[index]["energy_components"], "w") as f: - f.write(header + "\n") - f.write(record + "\n") + # Lazy-initialise the last saved time for restart deduplication. + # On the first call for this window, read the existing file (if any) + # to find the maximum time already written. + if index not in self._last_ec_time: + path = _Path(filepath) + if path.exists() and path.stat().st_size > 0: + existing = _pq_local.read_table(filepath).to_pandas() + self._last_ec_time[index] = float(existing["time"].max()) + else: + self._last_ec_time[index] = -1.0 + + # Skip rows that have already been written (restart deduplication). + if time_ns <= self._last_ec_time[index]: + return + + # Use the named force groups already assigned by sire_to_openmm_system, + # sorted alphabetically for a consistent column order across runs. + energies = {} + for name, grp in sorted(context._force_group_map.items()): + state = context.getState(getEnergy=True, groups=(1 << grp)) + energies[name] = state.getPotentialEnergy().value_in_unit( + openmm.unit.kilocalories_per_mole + ) + + row = {"time": round(time_ns, 6)} | energies + df = _pd.DataFrame([row]) + + path = _Path(filepath) + if path.exists() and path.stat().st_size > 0: + _parquet_append(filepath, df) else: - with open(self._filenames[index]["energy_components"], "a") as f: - f.write(record + "\n") + # First write: embed units as schema metadata under the "somd2" key, + # consistent with how the energy trajectory parquet files are written. + table = _pa.Table.from_pandas(df) + meta = _json.dumps( + {"time_units": "ns", "energy_units": "kcal/mol"} + ).encode() + table = table.replace_schema_metadata( + {b"somd2": meta, **table.schema.metadata} + ) + _pq_local.write_table(table, filepath) - # Increment the sample number. - self._nrg_sample += 1 + self._last_ec_time[index] = time_ns def _restore_backup_files(self): """ diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index fe7b34bd..7da3ba37 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -52,8 +52,8 @@ def __init__( dynamics_kwargs, gcmc_kwargs=None, output_directory=None, - perturbed_positions=None, - perturbed_box=None, + perturbed_system=None, + xml_filenames=None, ): """ Constructor. @@ -82,13 +82,13 @@ def __init__( output_directory: pathlib.Path The directory for simulation output. - perturbed_positions: numpy.ndarray - The positions for the perturbed state. If None, then the perturbed state - is not used. + perturbed_system: :class: `System ` + The perturbed end-state system used to seed starting coordinates for + lambda > 0.5 replicas. If None, the perturbed state is not used. - perturbed_box: numpy.ndarray - The box vectors for the perturbed state. If None, then the perturbed state - is not used. + xml_filenames: list of str + A list of file paths for the OpenMM XML output, one per replica. + If None, XML files are not written. """ # Warn if the number of replicas is not a multiple of the number of GPUs. @@ -102,10 +102,12 @@ def __init__( self._lambdas = lambdas self._rest2_scale_factors = rest2_scale_factors self._states = _np.array(range(len(lambdas))) - self._old_states = _np.array(range(len(lambdas))) + self._time = None self._openmm_states = [None] * len(lambdas) self._gcmc_samplers = [None] * len(lambdas) self._gcmc_states = [None] * len(lambdas) + self._gcmc_stats = [None] * len(lambdas) + self._terminal_flip_stats = [[0, 0]] * len(lambdas) self._num_proposed = _np.matrix(_np.zeros((len(lambdas), len(lambdas)))) self._num_accepted = _np.matrix(_np.zeros((len(lambdas), len(lambdas)))) self._num_swaps = _np.matrix(_np.zeros((len(lambdas), len(lambdas)))) @@ -119,8 +121,8 @@ def __init__( dynamics_kwargs, gcmc_kwargs=gcmc_kwargs, output_directory=output_directory, - perturbed_positions=perturbed_positions, - perturbed_box=perturbed_box, + perturbed_system=perturbed_system, + xml_filenames=xml_filenames, ) def __setstate__(self, state): @@ -130,6 +132,18 @@ def __setstate__(self, state): for key, value in state.items(): setattr(self, key, value) + # Provide defaults for attributes added after the initial release, + # so that old checkpoint files can still be loaded. + n = len(self._lambdas) + if not hasattr(self, "_gcmc_stats"): + self._gcmc_stats = [None] * n + if not hasattr(self, "_gcmc_states"): + self._gcmc_states = [None] * n + if not hasattr(self, "_terminal_flip_stats"): + self._terminal_flip_stats = [[0, 0]] * n + if not hasattr(self, "_time"): + self._time = None + def __getstate__(self): """ Get the state of the object. @@ -140,11 +154,13 @@ def __getstate__(self): "_lambdas": self._lambdas, "_rest2_scale_factors": self._rest2_scale_factors, "_states": self._states, - "_old_states": self._old_states, + "_time": self._time, "_openmm_states": self._openmm_states, # Don't pickle the GCMC samplers since they need to be recreated. "_gcmc_samplers": len(self._gcmc_samplers) * [None], "_gcmc_states": self._gcmc_states, + "_gcmc_stats": self._gcmc_stats, + "_terminal_flip_stats": self._terminal_flip_stats, "_num_proposed": self._num_proposed, "_num_accepted": self._num_accepted, "_num_swaps": self._num_swaps, @@ -161,8 +177,8 @@ def _create_dynamics( dynamics_kwargs, gcmc_kwargs=None, output_directory=None, - perturbed_positions=None, - perturbed_box=None, + perturbed_system=None, + xml_filenames=None, ): """ Create the dynamics objects. @@ -191,13 +207,13 @@ def _create_dynamics( output_directory: pathlib.Path The directory for simulation output. - perturbed_positions: numpy.ndarray - The positions for the perturbed state. If None, then the perturbed state - is not used. + perturbed_system: :class: `System ` + The perturbed end-state system used to seed starting coordinates for + lambda > 0.5 replicas. If None, the perturbed state is not used. - perturbed_box: numpy.ndarray - The box vectors for the perturbed state. If None, then the perturbed state - is not used. + xml_filenames: list of str + A list of file paths for the OpenMM XML output, one per replica. + If None, XML files are not written. """ from math import floor @@ -215,35 +231,39 @@ def _create_dynamics( # Initialise the dynamics object list. self._dynamics = [] - # A set of visited device indices. - devices = set() + # Per-device memory tracking for estimation. + device_mem = {} - # Determine whether there is a remainder in the number of replicas. + # Work out how many replicas are assigned to each device. + # Replicas are assigned round-robin, so the first (num_replicas % num_gpus) + # devices get one extra replica. + base = floor(num_replicas / num_gpus) remainder = num_replicas % num_gpus - - # Store the number of contexts for each device. The last device will - # have remainder contexts, while all others have - contexts_per_device = num_replicas * [floor(num_replicas / num_gpus)] - - # Set the last device to have the remainder contexts. - contexts_per_device[-1] = remainder + contexts_per_device = [ + base + (1 if i < remainder else 0) for i in range(num_gpus) + ] # Create the dynamics objects in serial. for i, (lam, scale) in enumerate(zip(lambdas, rest2_scale_factors)): # Work out the device index. device = i % num_gpus - # If we've not seen this device before then get the memory statistics - # prior to creating the dynamics object and GCMC sampler. - if device not in devices: - used_mem_before, free_mem_before, total_mem = self._check_device_memory( - device - ) + # Record baseline memory before the first replica on this device. + if device not in device_mem: + used_before, _, total_mem = self._check_device_memory(device) + device_mem[device] = { + "before": used_before, + "total": total_mem, + "count": 0, + } # This is a restart, get the system for this replica. if isinstance(system, list): mols = system[i] - # This is a new simulation. + # This is a new simulation. For lambda > 0.5, use the perturbed + # system to seed the starting coordinates and periodic space. + elif perturbed_system is not None and lam > 0.5: + mols = perturbed_system else: mols = system @@ -284,15 +304,6 @@ def _create_dynamics( f"Created GCMC sampler for lambda {lam:.5f} on device {device}" ) - # Log the initial position of the GCMC sphere. - if self._gcmc_samplers[i]._reference is not None: - positions = _sr.io.get_coords_array(mols) - target = self._gcmc_samplers[i]._get_target_position(positions) - _logger.info( - f"Initial GCMC sphere centre for lambda {lam:.5f} on device {device}: " - f"[{target[0]:.3f}, {target[1]:.3f}, {target[2]:.3f}] A" - ) - # Create the dynamics object. try: dynamics = mols.dynamics(**dynamics_kwargs) @@ -301,33 +312,6 @@ def _create_dynamics( _logger.error(msg) raise RuntimeError(msg) from e - # Update the box vectors and positions if the perturbed state is used. - if ( - perturbed_positions is not None - and perturbed_box is not None - and lam > 0.5 - ): - from openmm.unit import angstrom - - # Get the positions from the context. - positions = ( - dynamics.context() - .getState(getPositions=True) - .getPositions(asNumpy=True) - ) / angstrom - - # The positions array also contains the ghost water atoms that - # were added during the GCMC setup. We need to make sure that - # we copy these over to the perturbed positions array. - diff = len(positions) - len(perturbed_positions) - if diff != 0: - perturbed_positions = _np.concatenate( - [perturbed_positions, positions[-diff:]] - ) - - dynamics.context().setPeriodicBoxVectors(*perturbed_box * angstrom) - dynamics.context().setPositions(perturbed_positions * angstrom) - # Bind the GCMC sampler to the dynamics object. This allows the # dynamics object to reset the water state in its internal OpenMM # context following a crash recovery. @@ -337,26 +321,61 @@ def _create_dynamics( # Append the dynamics object. self._dynamics.append(dynamics) - # Check the memory footprint for this device. - if not device in devices: - # Add the device to the set of visited devices. - devices.add(device) - - # Get the current memory usage. - used_mem, free_mem, total_mem = self._check_device_memory(device) - - # Work out the memory used by this dynamics object and GCMC sampler. - mem_used = used_mem - used_mem_before + # Write the OpenMM XML file to the output directory. + if xml_filenames is not None: + _logger.info( + f"Writing OpenMM XML for lambda {lam:.5f} on device {device}" + ) + dynamics.to_xml(xml_filenames[i]) + + # Track memory footprint for this device. + info = device_mem[device] + info["count"] += 1 + num_contexts = contexts_per_device[device] + + # Estimate memory after the first or second replica. + if info["count"] == 1: + used_mem, _, _ = self._check_device_memory(device) + info["after_first"] = used_mem + + if num_contexts == 1: + # Only one replica on this device, use actual measurement. + est_total = used_mem + else: + # Wait for the second replica to get the marginal cost. + est_total = None + + elif info["count"] == 2: + used_mem, _, _ = self._check_device_memory(device) + # The first replica includes one-time context overhead. + # The marginal cost of subsequent replicas is the difference + # between the second and first. + first_cost = info["after_first"] - info["before"] + marginal_cost = used_mem - info["after_first"] + est_total = ( + info["before"] + first_cost + marginal_cost * (num_contexts - 1) + ) + _logger.info( + f"Memory per replica on device {device}: " + f"first = {first_cost / (1024**2):.0f} MiB, " + f"marginal = {marginal_cost / (1024**2):.0f} MiB" + ) + else: + est_total = None - # Work out the estimate for all replicas on this device. - est_total = mem_used * contexts_per_device[device] + if est_total is not None: + total_mem = info["total"] # If this exceeds the total memory, raise an error. if est_total > total_mem: + baseline = info["before"] + replica_cost = first_cost + marginal_cost * (num_contexts - 1) msg = ( f"Not enough memory on device {device} for all assigned replicas. " - f"Estimated memory usage: {est_total / 1e9:.2f} GB, " - f"Available memory: {total_mem / 1e9:.2f} GB." + f"Baseline usage before simulation: {baseline / (1024**3):.2f} GB " + f"Estimated replica memory: {replica_cost / (1024**3):.2f} GB, " + f"Total estimated: {est_total / (1024**3):.2f} GB, " + f"Available memory: {total_mem / (1024**3):.2f} GB." ) _logger.error(msg) raise MemoryError(msg) @@ -366,8 +385,15 @@ def _create_dynamics( _logger.warning( f"Device {device} will have less than 20% free memory " f"after creating all assigned replicas. " - f"{est_total / 1e9:.2f} GB, " - f"Available memory: {total_mem / 1e9:.2f} GB." + f"{est_total / (1024**3):.2f} GB, " + f"Available memory: {total_mem / (1024**3):.2f} GB." + ) + + else: + _logger.info( + f"Estimated memory usage on device {device} after creating all replicas: " + f"{est_total / (1024**3):.2f} GB, " + f"Available memory: {total_mem / (1024**3):.2f} GB." ) _logger.info( @@ -429,7 +455,6 @@ def save_openmm_state(self, index): index: int The index of the replica. """ - from openmm.unit import angstrom # Get the current OpenMM state. state = ( @@ -438,8 +463,40 @@ def save_openmm_state(self, index): .getState(getPositions=True, getVelocities=True) ) - # Store the state. - self._openmm_states[index] = state + # Store positions, velocities, and box vectors as compact numpy arrays + # rather than the OpenMM State object, which serialises to XML when + # pickled and is orders of magnitude larger. + self._openmm_states[index] = { + "positions": state.getPositions(asNumpy=True), + "velocities": state.getVelocities(asNumpy=True), + "box": state.getPeriodicBoxVectors(asNumpy=True), + } + + @staticmethod + def _apply_openmm_state(context, state): + """ + Apply a saved OpenMM state to a context. + + Parameters + ---------- + + context: openmm.Context + The OpenMM context to update. + + state: dict or openmm.State + The state to apply. Dicts (new format) contain "positions", + "velocities", and "box" numpy arrays. A bare openmm.State is + accepted for backwards compatibility with old checkpoint files. + """ + if isinstance(state, dict): + context.setPositions(state["positions"]) + context.setVelocities(state["velocities"]) + if state["box"] is not None: + context.setPeriodicBoxVectors(*state["box"]) + else: + # Legacy openmm.State from checkpoint files written before this + # format change. + context.setState(state) def save_gcmc_state(self, index): """ @@ -481,16 +538,23 @@ def set_states(self, states): """ self._states = states - def mix_states(self): + def mix_states(self, old_states): """ Mix the states of the dynamics objects. + + Parameters + ---------- + old_states : numpy.ndarray + The state indices from before the last replica mix. """ # Mix the states. for i, state in enumerate(self._states): # The state has changed. if i != state: _logger.debug(f"Replica {i} seeded from state {state}") - self._dynamics[i].context().setState(self._openmm_states[state]) + self._apply_openmm_state( + self._dynamics[i].context(), self._openmm_states[state] + ) # Swap the water state in the GCMCSamplers. if self._gcmc_samplers[i] is not None: @@ -501,19 +565,17 @@ def mix_states(self): # Update the water state in the GCMCSampler. self._gcmc_samplers[i].push() - self._gcmc_samplers[i]._set_water_state( - self._dynamics[i].context(), - indices=water_idxs, - states=self._gcmc_states[state][water_idxs], - ) - self._gcmc_samplers[i].pop() + try: + self._gcmc_samplers[i]._set_water_state( + self._dynamics[i].context(), + indices=water_idxs, + states=self._gcmc_states[state][water_idxs], + ) + finally: + self._gcmc_samplers[i].pop() # Update the swap matrix. - old_state = self._old_states[i] - self._num_swaps[old_state, state] += 1 - - # Store the current states. - self._old_states = self._states.copy() + self._num_swaps[old_states[i], state] += 1 def get_proposed(self): """ @@ -534,34 +596,83 @@ def get_swaps(self): return self._num_swaps @staticmethod - def _check_device_memory(index): + def _check_device_memory(device_index=0): """ - Check the memory usage of the specified CUDA device. + Check the memory usage of the specified GPU device. Parameters ---------- index: int - The index of the CUDA device. + The index of the GPU device. """ + + # Try to use pyopencl to detect the GPU vendor. + vendor = None + ocl_device = None try: - from pynvml import ( - nvmlInit, - nvmlShutdown, - nvmlDeviceGetHandleByIndex, - nvmlDeviceGetMemoryInfo, + import pyopencl as cl + + platforms = cl.get_platforms() + all_devices = [] + for platform in platforms: + try: + devices = platform.get_devices(device_type=cl.device_type.GPU) + all_devices.extend(devices) + except Exception: + continue + + if device_index < len(all_devices): + ocl_device = all_devices[device_index] + vendor = ocl_device.vendor + else: + msg = f"Device index {device_index} out of range. Found {len(all_devices)} GPU(s)." + _logger.error(msg) + raise IndexError(msg) + except IndexError: + raise + except Exception: + _logger.warning( + "Could not query GPU platform via OpenCL; falling back to pynvml for NVIDIA detection." ) - nvmlInit() - handle = nvmlDeviceGetHandleByIndex(index) - info = nvmlDeviceGetMemoryInfo(handle) - result = (info.used, info.free, info.total) - nvmlShutdown() - except Exception as e: - msg = f"Could not determine memory usage for device {index}: {e}" - _logger.error(msg) + # NVIDIA: Use pynvml (also used as fallback when OpenCL is unavailable). + if vendor is None or "NVIDIA" in vendor: + try: + import pynvml - return result + pynvml.nvmlInit() + handle = pynvml.nvmlDeviceGetHandleByIndex(device_index) + memory = pynvml.nvmlDeviceGetMemoryInfo(handle) + pynvml.nvmlShutdown() + return (memory.used, memory.free, memory.total) + except Exception as e: + if vendor is None: + msg = f"Could not get GPU memory info for device {device_index} via OpenCL or pynvml: {e}" + else: + msg = f"Could not get NVIDIA GPU memory info for device {device_index}: {e}" + _logger.error(msg) + raise RuntimeError(msg) from e + + # AMD: Use OpenCL extension. + elif "AMD" in vendor or "Advanced Micro Devices" in vendor: + try: + total = ocl_device.global_mem_size + free_memory_info = ocl_device.get_info(0x4038) + free_kb = ( + free_memory_info[0] + if isinstance(free_memory_info, list) + else free_memory_info + ) + free = free_kb * 1024 + used = total - free + return (used, free, total) + except Exception as e: + msg = ( + f"Could not get AMD GPU memory info for device {device_index}: {e}" + ) + _logger.error(msg) + raise RuntimeError(msg) from e class RepexRunner(_RunnerBase): @@ -601,9 +712,12 @@ def __init__(self, system, config): # Call the base class constructor. super().__init__(system, config) - # Make sure we're using the CUDA platform. - if self._config.platform != "cuda": - msg = "Currently replica exchange simulations can only be run on the CUDA platform." + # Make sure we're using the CUDA or OpenCL platform. + if self._config.platform not in ["cuda", "opencl"]: + msg = ( + "Currently replica exchange simulations can only be " + "run on the CUDA and OpenCL platforms." + ) _logger.error(msg) raise ValueError(msg) @@ -632,6 +746,11 @@ def __init__(self, system, config): # Store the name of the replica exchange swap acceptance matrix. self._repex_matrix = self._config.output_directory / "repex_matrix.txt" + # Sentinel file written only after a fully successful run (dynamics + + # trajectory consolidation + backup cleanup). Used to distinguish + # "truly complete" from "complete dynamics but killed during cleanup". + self._done_file = self._config.output_directory / "simulation.done" + # Flag that we haven't equilibrated. self._is_equilibration = False @@ -672,8 +791,18 @@ def __init__(self, system, config): } ) + # On a fresh (non-restart) run, remove any leftover sentinel so that + # a repeated run with --overwrite doesn't immediately exit as complete. + if not self._is_restart and self._done_file.exists(): + self._done_file.unlink() + # Create the dynamics cache. if not self._is_restart: + xml_filenames = ( + [self._filenames[i]["xml"] for i in range(len(self._lambda_values))] + if self._config.save_xml + else None + ) self._dynamics_cache = DynamicsCache( self._system, self._lambda_values, @@ -681,23 +810,17 @@ def __init__(self, system, config): self._num_gpus, dynamics_kwargs, gcmc_kwargs=self._gcmc_kwargs, - perturbed_positions=self._perturbed_positions, - perturbed_box=self._perturbed_box, + perturbed_system=self._perturbed_system, output_directory=self._config.output_directory, + xml_filenames=xml_filenames, ) + else: _logger.debug("Restarting from file") - # Check to see if the simulation is already complete. - time = self._system[0].time() - if time > self._config.runtime - self._config.timestep: - _logger.success("Simulation already complete. Exiting.") - _sys.exit(0) - else: - _logger.info( - f"Restarting at time {time}, time remaining = {self._config.runtime - time}" - ) - + # Load the dynamics cache first so we can read the simulation time + # from it (new format). Old-format restarts with .s3 files fall + # back to reading the time from the loaded Sire system. try: with open(self._repex_state, "rb") as f: self._dynamics_cache = _pickle.load(f) @@ -707,6 +830,47 @@ def __init__(self, system, config): ) raise e + # Derive the simulation time: prefer the value stored in the + # pickle (_time is set by the new-format _write_checkpoint_system); + # fall back to the Sire system for old-format checkpoints. + if self._dynamics_cache._time is not None and not isinstance( + self._system, list + ): + time = self._dynamics_cache._time + else: + time = self._system[0].time() + + # Check to see if the simulation is already complete. + if self._done_file.exists(): + # The runtime may have been extended beyond the previous run. + # If so, clear the sentinel and continue. + if time < self._config.runtime - self._config.timestep: + _logger.info( + "Runtime has been extended. Clearing completion sentinel." + ) + self._done_file.unlink() + else: + _logger.success("Simulation already complete. Exiting.") + _sys.exit(0) + + if time > self._config.runtime - self._config.timestep: + # Dynamics finished but the process was killed before cleanup + # completed (e.g. during DCD consolidation or backup removal). + # Consolidate any remaining trajectory chunks and tidy up. + _logger.warning( + "Simulation dynamics are complete but post-run cleanup was " + "not finished. Completing cleanup now." + ) + self._consolidate_trajectories() + self._cleanup() + self._done_file.touch() + _logger.success("Cleanup complete. Exiting.") + _sys.exit(0) + else: + _logger.info( + f"Restarting at time {time}, time remaining = {self._config.runtime - time}" + ) + # Make sure the number of replicas is the same. if len(self._dynamics_cache._lambdas) != self._config.num_lambda: _logger.error( @@ -714,6 +878,11 @@ def __init__(self, system, config): f"does not match the number of replicas in the configuration ({self._config.num_lambda})." ) + # For new-format restarts, set the system time so that dynamics + # objects are initialised with the correct integrator step count. + if not isinstance(self._system, list): + self._system.set_time(time) + # Create the dynamics objects. self._dynamics_cache._create_dynamics( self._system, @@ -729,18 +898,43 @@ def __init__(self, system, config): for i in range(len(self._lambda_values)): dynamics, gcmc_sampler = self._dynamics_cache.get(i) - # Reset the OpenMM state. - dynamics.context().setState(self._dynamics_cache._openmm_states[i]) + # Reset the OpenMM state, applying the last replica exchange + # mixing so the correct post-mix state is restored. + state = self._dynamics_cache._states[i] + DynamicsCache._apply_openmm_state( + dynamics.context(), self._dynamics_cache._openmm_states[state] + ) - # Reset the GCMC water state. + # Reset the GCMC water state and restore statistics. if gcmc_sampler is not None: gcmc_sampler.push() - gcmc_sampler._set_water_state( - dynamics.context(), - states=self._dynamics_cache._gcmc_states[i], - force=True, - ) - gcmc_sampler.pop() + try: + gcmc_sampler._set_water_state( + dynamics.context(), + states=self._dynamics_cache._gcmc_states[state], + force=True, + ) + finally: + gcmc_sampler.pop() + if self._dynamics_cache._gcmc_stats[i] is not None: + gcmc_sampler.restore_stats(self._dynamics_cache._gcmc_stats[i]) + + # Log the GCMC sphere centre for each replica using the actual context + # positions (accurate for both fresh runs and restarts). + import openmm.unit as _omm_unit + + for i, lam in enumerate(self._lambda_values): + dynamics, gcmc_sampler = self._dynamics_cache.get(i) + if gcmc_sampler is not None and gcmc_sampler._reference is not None: + state = dynamics.context().getState(getPositions=True) + positions = state.getPositions(asNumpy=True).value_in_unit( + _omm_unit.angstrom + ) + target = gcmc_sampler._get_target_position(positions) + _logger.info( + f"Initial GCMC sphere centre for lambda {lam:.5f}: " + f"[{target[0]:.3f}, {target[1]:.3f}, {target[2]:.3f}] A" + ) # Conversion factor for reduced potential. kT = (_sr.units.k_boltz * self._config.temperature).to(_sr.units.kcal_per_mol) @@ -748,7 +942,14 @@ def __init__(self, system, config): # If restarting, subtract the time already run from the total runtime if self._config.restart: - time = self._system[0].time() + time = ( + self._dynamics_cache._time + if ( + self._dynamics_cache._time is not None + and not isinstance(self._system, list) + ) + else self._system[0].time() + ) self._config.runtime = str(self._config.runtime - time) # Work out the current block number. @@ -761,6 +962,30 @@ def __init__(self, system, config): else: self._start_block = 0 + # Create a terminal flip sampler per replica (if terminal groups were detected). + if self._terminal_groups: + from ._samplers import TerminalFlipSampler + + self._terminal_flip_samplers = [ + TerminalFlipSampler( + self._terminal_groups, + float(self._config.temperature.value()), + ) + for _ in self._lambda_values + ] + _logger.info( + f"Terminal flip samplers ready ({len(self._terminal_groups)} group(s))" + ) + else: + self._terminal_flip_samplers = None + + # Restore terminal flip sampler statistics from checkpoint (deferred + # until here so that _terminal_flip_samplers is always initialised first). + if self._is_restart and self._terminal_flip_samplers is not None: + for i in range(len(self._lambda_values)): + attempted, accepted = self._dynamics_cache._terminal_flip_stats[i] + self._terminal_flip_samplers[i].reset(attempted, accepted) + from threading import Lock # Create a lock to guard the dynamics cache. @@ -821,12 +1046,12 @@ def run(self): frac = 1.0 checkpoint_frequency = self._config.energy_frequency - # Store the number of repex cycles per block. - cycles_per_checkpoint = int(frac) + # Store the number of repex cycles per block (may be fractional). + cycles_per_checkpoint = frac # Otherwise, we don't checkpoint. else: - cycles_per_checkpoint = cycles + cycles_per_checkpoint = float(cycles) num_blocks = 1 rem = 0 @@ -886,6 +1111,39 @@ def run(self): _logger.error("Equilibration cancelled. Exiting.") _sys.exit(1) + # Write a checkpoint immediately after equilibration so that a restart + # after an early production crash doesn't need to re-equilibrate. + if self._is_equilibration and not self._is_restart: + lock = _FileLock(self._lock_file) + with lock.acquire(timeout=self._config.timeout.to("seconds")): + for j in range(num_checkpoint_batches): + replicas = replica_list[ + j * num_checkpoint_workers : (j + 1) * num_checkpoint_workers + ] + with ThreadPoolExecutor( + max_workers=num_checkpoint_workers + ) as executor: + try: + for index, error in executor.map( + self._checkpoint, + replicas, + repeat(self._lambda_values), + repeat(-1), + repeat(cycles), + ): + if error is not None: + msg = ( + f"Post-equilibration checkpoint failed for {_lam_sym} = " + f"{self._lambda_values[index]:.5f}:\n{error}" + ) + _logger.error(msg) + raise error + except KeyboardInterrupt: + _logger.error( + "Post-equilibration checkpoint cancelled. Exiting." + ) + _sys.exit(1) + # Current block number. block = self._start_block @@ -908,9 +1166,30 @@ def run(self): else: cycles_per_gcmc = cycles + 1 + # Work out the number of cycles per terminal flip move. + if ( + self._config.terminal_flip_frequency is not None + and self._terminal_flip_samplers is not None + ): + cycles_per_flip = max( + 1, + round( + ( + self._config.terminal_flip_frequency + / self._config.energy_frequency + ).value() + ), + ) + else: + cycles_per_flip = cycles + 1 + + # Initialise the threshold for the next checkpoint cycle. This is a float + # to handle non-integer ratios between the checkpoint and energy frequencies. + next_checkpoint = cycles_per_checkpoint + # Perform the replica exchange simulation. for i in range(cycles): - _logger.info(f"Running dynamics for cycle {i+1} of {cycles}") + _logger.info(f"Running dynamics for cycle {i + 1} of {cycles}") # Log the states. This is the replica index for the state (positions # and velocities) used to seed each replica for the current cycle. @@ -924,14 +1203,28 @@ def run(self): # Clear the results list. results = [] - # Whether to checkpoint. - is_checkpoint = i > 0 and i % cycles_per_checkpoint == 0 + # Whether to checkpoint. Use a float threshold to correctly handle + # non-integer ratios between the checkpoint and energy frequencies. + is_checkpoint = (i + 1) >= next_checkpoint - 1e-10 # Whether to perform a GCMC move before the dynamics block. - is_gcmc = i % cycles_per_gcmc == 0 + is_gcmc = (i + 1) % cycles_per_gcmc == 0 + + # Whether to perform a terminal flip move before the dynamics block. + is_terminal_flip = (i + 1) % cycles_per_flip == 0 # Whether a frame is saved at the end of the cycle. - write_gcmc_ghosts = i > 0 and i % cycles_per_frame == 0 + write_gcmc_ghosts = (i + 1) % cycles_per_frame == 0 + + # Current simulation time in ns for energy components saving. + time_ns = ( + ( + self._start_block * checkpoint_frequency + + (i + 1) * self._config.energy_frequency + ).to("ns") + if self._config.save_energy_components + else None + ) # Run a dynamics block for each replica, making sure only each GPU is only # oversubscribed by a factor of self._config.oversubscription_factor. @@ -945,6 +1238,8 @@ def run(self): repeat(self._lambda_values), repeat(is_gcmc), repeat(write_gcmc_ghosts), + repeat(is_terminal_flip), + repeat(time_ns), ): if not result: _logger.error( @@ -968,8 +1263,7 @@ def run(self): for j in range(num_checkpoint_batches): # Get the indices of the replicas in this batch. replicas = replica_list[ - j - * num_checkpoint_workers : (j + 1) + j * num_checkpoint_workers : (j + 1) * num_checkpoint_workers ] with ThreadPoolExecutor(max_workers=num_workers) as executor: @@ -992,8 +1286,7 @@ def run(self): for j in range(num_checkpoint_batches): # Get the indices of the replicas in this batch. replicas = replica_list[ - j - * num_checkpoint_workers : (j + 1) + j * num_checkpoint_workers : (j + 1) * num_checkpoint_workers ] with ThreadPoolExecutor(max_workers=num_workers) as executor: @@ -1016,46 +1309,59 @@ def run(self): _logger.error("Checkpoint cancelled. Exiting.") _sys.exit(1) - if i < cycles: - # Assemble and energy matrix from the results. - _logger.info("Assembling energy matrix") - energy_matrix = self._assemble_results(results) - - # Mix the replicas. - _logger.info("Mixing replicas") - self._dynamics_cache.set_states( - self._mix_replicas( - self._config.num_lambda, - energy_matrix, - self._dynamics_cache.get_proposed(), - self._dynamics_cache.get_accepted(), - ) + # Assemble an energy matrix from the results. + _logger.info("Assembling energy matrix") + energy_matrix = self._assemble_results(results) + + # Mix the replicas. + _logger.info("Mixing replicas") + old_states = self._dynamics_cache.get_states() + self._dynamics_cache.set_states( + self._mix_replicas( + self._config.num_lambda, + energy_matrix, + self._dynamics_cache.get_proposed(), + self._dynamics_cache.get_accepted(), ) - self._dynamics_cache.mix_states() - - # This is a checkpoint cycle. - if is_checkpoint: - # Update the block number. - block += 1 - - # Guard the repex state and transition matrix saving with a file lock. - lock = _FileLock(self._lock_file) - with lock.acquire(timeout=self._config.timeout.to("seconds")): - # Save the transition matrix. - _logger.info("Saving replica exchange transition matrix") - self._save_transition_matrix() - - # Backup the dynamics cache pickle file, if it exists. - if self._repex_state.exists(): - _copyfile( - self._repex_state, - self._repex_state.with_suffix(".pkl.bak"), - ) + ) + self._dynamics_cache.mix_states(old_states) + + # Snapshot the pre-run state for crash recovery. + if self._config.auto_fix_minimise: + for i, state in enumerate(self._dynamics_cache.get_states()): + self._dynamics_cache._dynamics[i]._d._pre_run_state = ( + self._dynamics_cache._dynamics[i] + .context() + .getState(getPositions=True, getVelocities=True) + ) + + # This is a checkpoint cycle. + if is_checkpoint: + # Update the block number. + block += 1 - # Pickle the dynamics cache. - _logger.info("Saving replica exchange state") - with open(self._repex_state, "wb") as f: - _pickle.dump(self._dynamics_cache, f) + # Advance the checkpoint threshold. + next_checkpoint += cycles_per_checkpoint + + # Guard the repex state and transition matrix saving with a file lock. + lock = _FileLock(self._lock_file) + with lock.acquire(timeout=self._config.timeout.to("seconds")): + # Save the transition matrix. + _logger.info("Saving replica exchange transition matrix") + self._save_transition_matrix() + + # Backup the dynamics cache pickle file, if it exists. + if self._repex_state.exists(): + _copyfile( + self._repex_state, + self._repex_state.with_suffix(".pkl.bak"), + ) + + # Pickle the dynamics cache. + _logger.info("Saving replica exchange state") + self._save_sampler_stats() + with open(self._repex_state, "wb") as f: + _pickle.dump(self._dynamics_cache, f) # Record the end time for the production block. prod_end = time() @@ -1075,6 +1381,11 @@ def run(self): # Pickle final state of the dynamics cache. _logger.info("Saving final replica exchange state") + if self._terminal_flip_samplers is not None: + self._dynamics_cache._terminal_flip_stats = [ + [s.num_attempted, s.num_accepted] + for s in self._terminal_flip_samplers + ] with open(self._repex_state, "wb") as f: _pickle.dump(self._dynamics_cache, f) @@ -1098,12 +1409,18 @@ def run(self): # Delete all backup files from the working directory. self._cleanup() + # Write the sentinel file to signal that the run completed fully, + # including trajectory consolidation and cleanup. + self._done_file.touch() + def _run_block( self, index, lambdas, is_gcmc=False, write_gcmc_ghosts=False, + is_terminal_flip=False, + time_ns=None, ): """ Run a dynamics block for a given replica. @@ -1127,6 +1444,14 @@ def _run_block( Whether to write the indices of GCMC ghost residues to file. + is_terminal_flip: bool + Whether a terminal flip MC move should be performed before the + dynamics block. + + time_ns: float or None + The current simulation time in nanoseconds, used when saving energy + components. If None, energy components are not saved. + Returns ------- @@ -1148,24 +1473,50 @@ def _run_block( # Get the dynamics object (and GCMC sampler). dynamics, gcmc_sampler = self._dynamics_cache.get(index) - _logger.info(f"Running dynamics at {_lam_sym} = {lam:.5f}") + # Track whether any MC move changed the context positions so we + # can update _pre_run_state once at the end. Only needed when + # crash recovery is enabled. + needs_pre_run_snapshot = False + auto_fix_minimise = self._config.auto_fix_minimise - # Draw new velocities from the Maxwell-Boltzmann distribution. - dynamics.randomise_velocities() - - # Perform a GCMC move. For repex this needs to be done before the - # dynamics block so that the final energies, which are used in the - # repex acceptance criteria, are correct. - if is_gcmc and gcmc_sampler is not None: - # Push the PyCUDA context on top of the stack. + # Perform the GCMC move before dynamics so that the energies + # computed during dynamics are consistent with the state used + # for replica exchange mixing. + if gcmc_sampler is not None and is_gcmc: gcmc_sampler.push() + try: + _logger.info(f"Performing GCMC move at {_lam_sym} = {lam:.5f}") + gcmc_sampler.move(dynamics.context()) + finally: + gcmc_sampler.pop() + + if auto_fix_minimise: + needs_pre_run_snapshot = True + + # Write ghost residues immediately after the GCMC move so the + # ghost state and frame (saved during dynamics) are consistent. + if write_gcmc_ghosts: + gcmc_sampler.write_ghost_residues() + + # Perform a terminal flip move before dynamics if requested. + if self._terminal_flip_samplers is not None and is_terminal_flip: + _logger.info(f"Performing terminal flip move at {_lam_sym} = {lam:.5f}") + if self._terminal_flip_samplers[index].move(dynamics.context()): + if auto_fix_minimise: + needs_pre_run_snapshot = True + + # Snapshot the context state for crash recovery if any MC move + # changed positions. + if needs_pre_run_snapshot: + dynamics._d._pre_run_state = dynamics.context().getState( + getPositions=True, getVelocities=True + ) - # Perform the GCMC move. - _logger.info(f"Performing GCMC move at {_lam_sym} = {lam:.5f}") - gcmc_sampler.move(dynamics.context()) + _logger.info(f"Running dynamics at {_lam_sym} = {lam:.5f}") - # Remove the PyCUDA context from the stack. - gcmc_sampler.pop() + # Draw new velocities from the Maxwell-Boltzmann distribution. + if self._config.randomise_velocities: + dynamics.randomise_velocities() # Run the dynamics. dynamics.run( @@ -1175,7 +1526,7 @@ def _run_block( lambda_windows=lambdas, rest2_scale_factors=self._rest2_scale_factors, save_velocities=self._config.save_velocities, - auto_fix_minimise=True, + auto_fix_minimise=self._config.auto_fix_minimise, num_energy_neighbours=self._config.num_energy_neighbours, null_energy=self._config.null_energy, save_crash_report=self._config.save_crash_report, @@ -1190,31 +1541,21 @@ def _run_block( ), ) - # Set the state. - self._dynamics_cache.save_openmm_state(index) - - # Save the GCMC state. if gcmc_sampler is not None: + # Save the GCMC state. self._dynamics_cache.save_gcmc_state(index) - # The frame frequency was hit, so write the indices of the - # current ghost water residues to file. - if write_gcmc_ghosts: - gcmc_sampler.write_ghost_residues() + + # Save the OpenMM state. + self._dynamics_cache.save_openmm_state(index) + + # Save the energy contribution for each force. + if self._config.save_energy_components and time_ns is not None: + self._save_energy_components(index, dynamics.context(), time_ns) # Get the energy at each lambda value. - energies = ( - dynamics._d.energy_trajectory() - .to_pandas(to_alchemlyb=True, energy_unit="kcal/mol") - .iloc[-1, :] - .to_numpy() - ) + energies = dynamics._current_energy_array() except Exception as e: - try: - # Save the energy components for debugging purposes. - self._save_energy_components(index, dynamics.context()) - except: - pass return False, index, e # Return the index and the energies. @@ -1252,18 +1593,16 @@ def _minimise(self, index): # Get the dynamics object (and GCMC sampler). dynamics, gcmc_sampler = self._dynamics_cache.get(index) - if gcmc_sampler is not None: - # Push the PyCUDA context on top of the stack. + if gcmc_sampler is not None and not self._is_restart: gcmc_sampler.push() - - _logger.info( - f"Pre-equilibrating with GCMC moves at {_lam_sym} = {self._lambda_values[index]:.5f}" - ) - for i in range(100): - gcmc_sampler.move(dynamics.context()) - - # Remove the PyCUDA context from the stack. - gcmc_sampler.pop() + try: + _logger.info( + f"Pre-equilibrating with GCMC moves at {_lam_sym} = {self._lambda_values[index]:.5f}" + ) + for i in range(100): + gcmc_sampler.move(dynamics.context()) + finally: + gcmc_sampler.pop() # Minimise. dynamics.minimise(timeout=self._config.timeout) @@ -1346,17 +1685,15 @@ def _equilibrate(self, index): dynamics, gcmc_sampler = self._dynamics_cache.get(index) if gcmc_sampler is not None: - # Push the PyCUDA context on top of the stack. gcmc_sampler.push() - - _logger.info( - f"Equilibrating with GCMC moves at {_lam_sym} = {self._lambda_values[index]:.5f}" - ) - for i in range(100): - gcmc_sampler.move(dynamics.context()) - - # Remove the PyCUDA context from the stack. - gcmc_sampler.pop() + try: + _logger.info( + f"Equilibrating with GCMC moves at {_lam_sym} = {self._lambda_values[index]:.5f}" + ) + for i in range(100): + gcmc_sampler.move(dynamics.context()) + finally: + gcmc_sampler.pop() # Store the current water state. water_state = gcmc_sampler.water_state() @@ -1396,6 +1733,7 @@ def _equilibrate(self, index): dynamics_kwargs["device"] = device dynamics_kwargs["lambda_value"] = self._lambda_values[index] dynamics_kwargs["rest2_scale"] = self._rest2_scale_factors[index] + dynamics_kwargs["timestep"] = self._config._equilibration_timestep dynamics_kwargs["constraint"] = constraint dynamics_kwargs["perturbable_constraint"] = perturbable_constraint @@ -1415,7 +1753,7 @@ def _equilibrate(self, index): energy_frequency=0, frame_frequency=0, save_velocities=False, - auto_fix_minimise=True, + auto_fix_minimise=self._config.auto_fix_minimise, save_crash_report=self._config.save_crash_report, ) @@ -1451,6 +1789,14 @@ def _equilibrate(self, index): if gcmc_sampler is not None: self._reset_gcmc_sampler(gcmc_sampler, dynamics) + # Compute the current number of waters in the GCMC sampling + # volume after equilibration. + gcmc_sampler.push() + try: + gcmc_sampler.num_waters(context=dynamics.context()) + finally: + gcmc_sampler.pop() + # Set the new dynamics object. self._dynamics_cache.set(index, dynamics) @@ -1459,11 +1805,6 @@ def _equilibrate(self, index): ) except Exception as e: - try: - # Save the energy components for debugging purposes. - self._save_energy_components(index, dynamics.context()) - except: - pass return False, index, e return True, index, None @@ -1506,14 +1847,18 @@ def _compute_energies(self, index): # Loop over the states. for i in range(self._config.num_lambda): # Set the state. - dynamics.context().setState(self._dynamics_cache._openmm_states[i]) + DynamicsCache._apply_openmm_state( + dynamics.context(), self._dynamics_cache._openmm_states[i] + ) dynamics._d._clear_state() # Compute and store the energy for this state. energies[i] = dynamics.current_potential_energy().value() # Reset the state. - dynamics.context().setState(self._dynamics_cache._openmm_states[index]) + DynamicsCache._apply_openmm_state( + dynamics.context(), self._dynamics_cache._openmm_states[index] + ) return index, energies @@ -1538,6 +1883,40 @@ def _assemble_results(self, results): return matrix + def _check_restart(self): + """ + Check the output directory for a valid restart state. + + If per-replica checkpoint stream files (.s3) exist the base class is + used to load them (old format, backwards compatible). Otherwise the + repex state pickle is used and the original input system is returned + directly, since positions and velocities come from the OpenMM states + stored in the pickle. + """ + from pathlib import Path as _Path_local + + checkpoint_path = _Path_local(self._filenames[0]["checkpoint"]) + if checkpoint_path.exists(): + _logger.info("Restarting from legacy stream file checkpoint.") + return super()._check_restart() + + repex_state = self._config.output_directory / "repex_state.pkl" + if not repex_state.exists(): + return False, self._system + + return True, self._system + + def _write_checkpoint_system(self, system, index, context=None, gcmc_sampler=None): + """ + Record the current simulation time in the dynamics cache. + + For repex, per-replica stream files are not written. The simulation + time is stored in the dynamics cache pickle instead, and positions and + velocities are already stored as compact numpy arrays in the OpenMM + state dict. + """ + self._dynamics_cache._time = system.time() + def _checkpoint(self, index, lambdas, block, num_blocks, is_final_block=False): """ Checkpoint the simulation. @@ -1579,10 +1958,6 @@ def _checkpoint(self, index, lambdas, block, num_blocks, is_final_block=False): # Commit the current system. system = dynamics.commit() - # If performing GCMC, then we need to flag the ghost waters. - if gcmc_sampler is not None: - system = gcmc_sampler._flag_ghost_waters(system) - # Get the simulation speed. speed = dynamics.time_speed() @@ -1599,24 +1974,43 @@ def _checkpoint(self, index, lambdas, block, num_blocks, is_final_block=False): # dynamics object. dynamics._d._sire_mols.delete_all_frames() - _logger.info( - f"Finished block {block+1} of {self._start_block + num_blocks} " - f"for {_lam_sym} = {lam:.5f}" - ) + if block == -1: + _logger.info( + f"Writing post-equilibration checkpoint for {_lam_sym} = {lam:.5f}" + ) + else: + _logger.info( + f"Finished block {block + 1} of {self._start_block + num_blocks} " + f"for {_lam_sym} = {lam:.5f}" + ) # Log the number of waters within the GCMC sampling volume. if gcmc_sampler is not None: - # Push the PyCUDA context on top of the stack. gcmc_sampler.push() + try: + n_moves = gcmc_sampler._num_moves + acc_str = ( + f", acceptance rate = {gcmc_sampler.move_acceptance_probability():.3f}" + f" (ins = {gcmc_sampler.num_insertions()}, del = {gcmc_sampler.num_deletions()})" + if n_moves > 0 + else "" + ) + _logger.info( + f"Current number of waters in GCMC volume at {_lam_sym} = {lam:.5f} " + f"is {gcmc_sampler.num_waters()}{acc_str}" + ) + finally: + gcmc_sampler.pop() + # Log terminal flip acceptance rate for this replica. + if self._terminal_flip_samplers is not None: + sampler = self._terminal_flip_samplers[index] _logger.info( - f"Current number of waters in GCMC volume at {_lam_sym} = {lam:.5f} " - f"is {gcmc_sampler.num_waters()}" + f"Terminal flip acceptance rate at {_lam_sym} = {lam:.5f}: " + f"{sampler.acceptance_rate:.3f} " + f"({sampler.num_accepted}/{sampler.num_attempted})" ) - # Remove the PyCUDA context from the stack. - gcmc_sampler.pop() - if is_final_block: _logger.success(f"{_lam_sym} = {lam:.5f} complete") @@ -1625,6 +2019,45 @@ def _checkpoint(self, index, lambdas, block, num_blocks, is_final_block=False): except Exception as e: return index, e + def _consolidate_trajectories(self): + """ + Consolidate any remaining trajectory chunk files into the final DCD. + + Called when a restart detects that dynamics completed but the process + was killed before post-run cleanup finished. Safe to call when some + replicas are already fully consolidated (no chunks left) — those are + skipped automatically. + """ + from glob import glob as _glob_local + from pathlib import Path as _Path_local + from shutil import copyfile as _copyfile_local + + if not self._config.save_trajectories: + return + + for i in range(len(self._lambda_values)): + traj_filename = self._filenames[i]["trajectory"] + chunk_pattern = f"{self._filenames[i]['trajectory_chunk']}*" + traj_chunks = sorted(_glob_local(chunk_pattern)) + + # On a restart, prepend an existing final DCD as .prev so frames + # from a previous (possibly partial) consolidation are preserved. + path = _Path_local(traj_filename) + if path.exists() and path.stat().st_size > 0: + prev = f"{traj_filename}.prev" + _copyfile_local(traj_filename, prev) + traj_chunks = [prev] + traj_chunks + + if not traj_chunks: + continue + + topology0 = self._filenames["topology0"] + mols = _sr.load([topology0] + traj_chunks) + _sr.save(mols.trajectory(), traj_filename, format=["DCD"]) + + for chunk in traj_chunks: + _Path_local(chunk).unlink() + @staticmethod @_njit def _mix_replicas(num_replicas, energy_matrix, proposed, accepted): @@ -1686,6 +2119,21 @@ def _mix_replicas(num_replicas, energy_matrix, proposed, accepted): return states + def _save_sampler_stats(self): + """ + Save GCMC and terminal flip sampler statistics to the dynamics cache + prior to pickling. + """ + for i in range(len(self._lambda_values)): + _, gcmc_sampler = self._dynamics_cache.get(i) + if gcmc_sampler is not None: + self._dynamics_cache._gcmc_stats[i] = gcmc_sampler.get_stats() + + if self._terminal_flip_samplers is not None: + self._dynamics_cache._terminal_flip_stats = [ + [s.num_attempted, s.num_accepted] for s in self._terminal_flip_samplers + ] + def _save_transition_matrix(self): """ Internal method to save the replica exchange transition matrix. @@ -1736,14 +2184,12 @@ def _reset_gcmc_sampler(gcmc_sampler, dynamics): # clears the associated OpenMM forces. gcmc_sampler.reset() - # Push the PyCUDA context on top of the stack. gcmc_sampler.push() - - # Set the water state. - gcmc_sampler._set_water_state(dynamics.context(), force=True) - - # Remove the PyCUDA context from the stack. - gcmc_sampler.pop() + try: + # Set the water state. + gcmc_sampler._set_water_state(dynamics.context(), force=True) + finally: + gcmc_sampler.pop() # Re-bind the GCMC sampler to the dynamics object. gcmc_sampler.bind_dynamics(dynamics) diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 12a637c6..fb3980f6 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -40,11 +40,20 @@ class Runner(_RunnerBase): Standard simulation runner class. (Uncoupled simulations.) """ - from multiprocessing import Manager + _manager = None - _manager = Manager() - _lock = _manager.Lock() - _queue = _manager.Queue() + @classmethod + def _init_manager(cls): + """ + Initialise the shared-memory Manager the first time a Runner is + constructed in the parent process. Deferred from class definition time + so that importing this module does not fork a manager process before + OpenMM threads have been started. + """ + if cls._manager is None: + from multiprocessing import Manager + + cls._manager = Manager() def __init__(self, system, config): """ @@ -73,6 +82,16 @@ def __init__(self, system, config): # Call the base class constructor. super().__init__(system, config) + # Initialise the shared-memory manager lazily so that importing this + # module does not fork a manager process before OpenMM threads exist. + Runner._init_manager() + + # Create Lock and Queue as instance attributes so that they are + # pickled as manager proxies and shared correctly across all spawned + # worker processes, preventing race conditions on the GPU pool. + self._lock = Runner._manager.Lock() + self._queue = Runner._manager.Queue() + # Store the array of lambda values for energy sampling. if self._config.lambda_energy is not None: self._lambda_energy = self._config.lambda_energy.copy() @@ -155,7 +174,7 @@ def _initialise_gpu_devices(num_devices, oversubscription_factor=1): Returns ------- - devices : [(str, int)] + devices: [(str, int)] List of available device numbers with oversubscription factor. """ devices = [] @@ -247,6 +266,69 @@ def run(self): # Cleanup backup files. self._cleanup() + def _check_restart(self): + """ + Check the output directory for a valid restart state. + + Detects new-format (.npz) checkpoints and falls back to the legacy + .s3 stream file format when only old checkpoints are present. + """ + from pathlib import Path as _Path + + npz_path = _Path(self._filenames[0]["checkpoint_state"]) + s3_path = _Path(self._filenames[0]["checkpoint"]) + + if npz_path.exists(): + return True, self._system + elif s3_path.exists(): + _logger.info("Restarting from legacy stream file checkpoint.") + return super()._check_restart() + else: + return False, self._system + + def _write_checkpoint_system(self, system, index, context=None, gcmc_sampler=None): + """ + Write the system state to a compact numpy checkpoint file. + + Saves positions, velocities, box vectors, simulation time, and (for + GCMC) ghost water indices to a .npz file. The legacy .s3 stream file + is not written. + + If no context is provided (should not happen in normal operation), + falls back to the base .s3 implementation. + """ + if context is None: + super()._write_checkpoint_system(system, index) + return + + import openmm.unit as _omm_unit + + state = context.getState(getPositions=True, getVelocities=True) + pos = state.getPositions(asNumpy=True).value_in_unit(_omm_unit.nanometer) + vel = state.getVelocities(asNumpy=True).value_in_unit( + _omm_unit.nanometer / _omm_unit.picosecond + ) + time_ps = system.time().to("ps") + + save_kwargs = { + "positions": pos, + "velocities": vel, + "time_ps": _np.array([time_ps]), + } + + box = state.getPeriodicBoxVectors(asNumpy=True) + if box is not None: + save_kwargs["box"] = box.value_in_unit(_omm_unit.nanometer) + + if gcmc_sampler is not None: + # water_state() returns 1 for active, 0 for ghost. + water_state = gcmc_sampler.water_state() + save_kwargs["ghost_water_indices"] = _np.where(water_state == 0)[0].astype( + _np.int32 + ) + + _np.savez(self._filenames[index]["checkpoint_state"], **save_kwargs) + def run_window(self, index): """ Run a single lamdba window. @@ -276,9 +358,16 @@ def run_window(self, index): if self._is_restart: _logger.debug(f"Restarting {_lam_sym} = {lambda_value} from file") - system = self._system[index].clone() - - time = system.time() + if isinstance(self._system, list): + # Old format: system with saved positions loaded from .s3 stream file. + system = self._system[index].clone() + time = system.time() + else: + # New format: original input system; time stored in .npz checkpoint. + system = self._system.clone() + time = _sr.u( + f"{float(_np.load(self._filenames[index]['checkpoint_state'])['time_ps'].item()):.6f} ps" + ) if time > self._config.runtime - self._config.timestep: _logger.success( f"{_lam_sym} = {lambda_value} already complete. Skipping." @@ -379,7 +468,14 @@ def _run( # Check for completion if this is a restart. if is_restart: - time = system.time() + if isinstance(self._system, list): + time = system.time() + else: + # New format: time stored in .npz, not in the Sire system. + time = _sr.u( + f"{float(_np.load(self._filenames[index]['checkpoint_state'])['time_ps'].item()):.6f} ps" + ) + system.set_time(time) if time > self._config.runtime - self._config.timestep: _logger.success( f"{_lam_sym} = {lambda_value} already complete. Skipping." @@ -434,18 +530,24 @@ def generate_lam_vals(lambda_base, increment=0.001): # Get the GCMC system. system = gcmc_sampler.system() - # Log the initial position of the GCMC sphere. - if gcmc_sampler._reference is not None: - positions = _sr.io.get_coords_array(system) - target = gcmc_sampler._get_target_position(positions) - _logger.info( - f"Initial GCMC sphere centre at {_lam_sym} = {lambda_value:.5f}: " - f"[{target[0]:.3f}, {target[1]:.3f}, {target[2]:.3f}] A" - ) - else: gcmc_sampler = None + # Create the terminal flip sampler (if terminal groups were detected). + if self._terminal_groups: + from ._samplers import TerminalFlipSampler + + terminal_flip_sampler = TerminalFlipSampler( + self._terminal_groups, + float(self._config.temperature.value()), + ) + _logger.info( + f"Terminal flip sampler ready at {_lam_sym} = {lambda_value:.5f} " + f"(every {self._config.terminal_flip_frequency})" + ) + else: + terminal_flip_sampler = None + # Minimisation. if self._config.minimise: constraint = self._config.constraint @@ -512,6 +614,13 @@ def generate_lam_vals(lambda_base, increment=0.001): # Create the dynamics object. dynamics = system.dynamics(**dynamics_kwargs) + # Write the OpenMM XML file to the output directory. + if self._config.save_xml and not is_restart: + _logger.info( + f"Writing OpenMM XML for {_lam_sym} = {lambda_value:.5f}" + ) + dynamics.to_xml(self._filenames[index]["xml"]) + # Equilibrate with GCMC moves. if gcmc_sampler is not None: # Bind the GCMC sampler to the dynamics object. @@ -521,8 +630,12 @@ def generate_lam_vals(lambda_base, increment=0.001): f"Equilibrating with GCMC moves at {_lam_sym} = {lambda_value:.5f}" ) - for i in range(100): - gcmc_sampler.move(dynamics.context()) + gcmc_sampler.push() + try: + for i in range(100): + gcmc_sampler.move(dynamics.context()) + finally: + gcmc_sampler.pop() # Run without saving energies or frames. dynamics.run( @@ -530,7 +643,7 @@ def generate_lam_vals(lambda_base, increment=0.001): energy_frequency=0, frame_frequency=0, save_velocities=False, - auto_fix_minimise=True, + auto_fix_minimise=self._config.auto_fix_minimise, save_crash_report=self._config.save_crash_report, ) @@ -544,10 +657,6 @@ def generate_lam_vals(lambda_base, increment=0.001): system.set_time(_sr.u("0ps")) except Exception as e: - try: - self._save_energy_components(index, dynamics.context()) - except: - pass raise RuntimeError(f"Equilibration failed: {e}") # Work out the lambda values for finite-difference gradient analysis. @@ -597,6 +706,34 @@ def generate_lam_vals(lambda_base, increment=0.001): # Create the dynamics object. dynamics = system.dynamics(**dynamics_kwargs) + # Write the OpenMM XML file to the output directory (only if not already + # written during equilibration). + if self._config.save_xml and not is_restart and not is_equilibrated: + _logger.info(f"Writing OpenMM XML for {_lam_sym} = {lambda_value:.5f}") + dynamics.to_xml(self._filenames[index]["xml"]) + + # For new-format restarts, apply saved positions/velocities/box to context. + _new_format_restart = is_restart and not isinstance(self._system, list) + if _new_format_restart: + import openmm.unit as _omm_unit + + _npz_state = _np.load(self._filenames[index]["checkpoint_state"]) + dynamics.context().setPositions( + _npz_state["positions"] * _omm_unit.nanometer + ) + dynamics.context().setVelocities( + _npz_state["velocities"] * _omm_unit.nanometer / _omm_unit.picosecond + ) + if "box" in _npz_state: + from openmm import Vec3 as _Vec3 + + _box = _npz_state["box"] + dynamics.context().setPeriodicBoxVectors( + _Vec3(*_box[0]) * _omm_unit.nanometer, + _Vec3(*_box[1]) * _omm_unit.nanometer, + _Vec3(*_box[2]) * _omm_unit.nanometer, + ) + # Reset the GCMC sampler. This resets the sampling statistics and clears # the associated OpenMM forces. if gcmc_sampler is not None: @@ -608,36 +745,91 @@ def generate_lam_vals(lambda_base, increment=0.001): # If this is a restart, then we need to reset the GCMC water state # to match that of the restart system. if self._is_restart: - from openmm.unit import angstrom + if isinstance(self._system, list): + # Old format: restore ghost waters from cached indices/positions. + from openmm.unit import angstrom - # First set all waters to non-ghosts. - gcmc_sampler._set_water_state( - dynamics.context(), - states=_np.ones(len(gcmc_sampler._water_indices)), - force=True, - ) + gcmc_sampler.push() + try: + # First set all waters to non-ghosts. + gcmc_sampler._set_water_state( + dynamics.context(), + states=_np.ones(len(gcmc_sampler._water_indices)), + force=True, + ) - # Now set the ghost waters. - gcmc_sampler._set_water_state( - dynamics.context(), - self._restart_ghost_waters[index], - states=_np.zeros(len(gcmc_sampler._water_indices)), - force=True, - ) + # Now set the ghost waters. + gcmc_sampler._set_water_state( + dynamics.context(), + self._restart_ghost_waters[index], + states=_np.zeros(len(gcmc_sampler._water_indices)), + force=True, + ) + finally: + gcmc_sampler.pop() - # Finally, reset the context positions to match the restart system. - dynamics.context().setPositions( - self._restart_positions[index] * angstrom - ) + # Finally, reset the context positions to match the restart system. + dynamics.context().setPositions( + self._restart_positions[index] * angstrom + ) + else: + # New format: positions already applied; restore ghost water state. + ghost_idxs = _npz_state["ghost_water_indices"].tolist() + gcmc_sampler.push() + try: + gcmc_sampler._set_water_state( + dynamics.context(), + states=_np.ones(len(gcmc_sampler._water_indices)), + force=True, + ) + if ghost_idxs: + gcmc_sampler._set_water_state( + dynamics.context(), + ghost_idxs, + states=_np.zeros(len(gcmc_sampler._water_indices)), + force=True, + ) + finally: + gcmc_sampler.pop() # Otherwise, if we've performed equilibration, then we need to reset # the water state in the new context to match the equilibrated system. elif is_equilibrated: # Reset the water state. - gcmc_sampler._set_water_state( - dynamics.context(), - force=True, - ) + gcmc_sampler.push() + try: + gcmc_sampler._set_water_state( + dynamics.context(), + force=True, + ) + gcmc_sampler.num_waters(context=dynamics.context()) + finally: + gcmc_sampler.pop() + + # Restore sampler statistics from a previous run. + if self._is_restart: + stats = self._load_sampler_stats(index) + if stats is not None: + if gcmc_sampler is not None and "gcmc" in stats: + gcmc_sampler.restore_stats(stats["gcmc"]) + if terminal_flip_sampler is not None and "terminal_flip" in stats: + attempted, accepted = stats["terminal_flip"] + terminal_flip_sampler.reset(attempted, accepted) + + # Log the GCMC sphere centre using the actual context positions + # (accurate for both fresh runs and restarts). + if gcmc_sampler is not None and gcmc_sampler._reference is not None: + import openmm.unit as _omm_unit + + state = dynamics.context().getState(getPositions=True) + positions = state.getPositions(asNumpy=True).value_in_unit( + _omm_unit.angstrom + ) + target = gcmc_sampler._get_target_position(positions) + _logger.info( + f"Initial GCMC sphere centre at {_lam_sym} = {lambda_value:.5f}: " + f"[{target[0]:.3f}, {target[1]:.3f}, {target[2]:.3f}] A" + ) # Set the number of neighbours used for the energy calculation. # If not None, then we add one to account for the extra windows @@ -653,19 +845,45 @@ def generate_lam_vals(lambda_base, increment=0.001): # Store the checkpoint time in nanoseconds. checkpoint_interval = checkpoint_frequency.to("ns") + # Write a checkpoint immediately after equilibration so that a restart + # after an early production crash doesn't need to re-equilibrate. + if is_equilibrated: + lock = _FileLock(self._lock_file) + with lock.acquire(timeout=self._config.timeout.to("seconds")): + _, error = self._checkpoint( + system, + index, + block=-1, + speed=0.0, + lambda_energy=lambda_energy, + lambda_grad=lambda_grad, + context=dynamics.context(), + gcmc_sampler=gcmc_sampler, + ) + if error is not None: + msg = ( + f"Post-equilibration checkpoint failed for {_lam_sym} = " + f"{lambda_value:.5f}:\n{error}" + ) + _logger.error(msg) + raise error + _logger.info( + f"Writing post-equilibration checkpoint " + f"for {_lam_sym} = {lambda_value:.5f}" + ) + # Store the start time. start = _timer() # Run the simulation, checkpointing in blocks. if checkpoint_frequency.value() > 0.0: - # Calculate the number of blocks and the remainder time. frac = (time / checkpoint_frequency).value() # Handle the case where the runtime is less than the checkpoint frequency. if frac < 1.0: frac = 1.0 - checkpoint_frequency = _sr.u(f"{time} ps") + checkpoint_frequency = time checkpoint_interval = checkpoint_frequency.to("ns") num_blocks = int(frac) @@ -681,30 +899,128 @@ def generate_lam_vals(lambda_base, increment=0.001): # Run the dynamics. try: - # GCMC specific handling. Note that the frame and checkpoint - # frequencies are multiples of the energy frequency so we can - # run in energy frequency blocks with no remainder. - if self._config.gcmc: - # Initialise the run time and time at which the next frame is saved. + # Run in sub-blocks when any MC sampler is active or energy + # components are being saved; otherwise run the full block. + needs_subblock = ( + gcmc_sampler is not None + or terminal_flip_sampler is not None + or self._config.save_energy_components + ) + if needs_subblock: runtime = _sr.u("0ps") - save_frames = self._config.frame_frequency > 0 - next_frame = self._config.frame_frequency + ec_elapsed = _sr.u("0ps") + flip_counter = 0 + save_frames = ( + gcmc_sampler is not None + and self._config.frame_frequency > 0 + ) + next_frame = ( + self._config.frame_frequency if save_frames else None + ) + # Sub-block size: shortest active MC frequency, or + # energy_frequency when only saving energy components. + if ( + gcmc_sampler is not None + and terminal_flip_sampler is not None + ): + block_size = min( + self._config.gcmc_frequency, + self._config.terminal_flip_frequency, + ) + elif gcmc_sampler is not None: + block_size = self._config.gcmc_frequency + elif terminal_flip_sampler is not None: + block_size = self._config.terminal_flip_frequency + else: + block_size = self._config.energy_frequency + # How often to attempt each MC move (in sub-block units). + gcmc_every = ( + max( + 1, + round( + (self._config.gcmc_frequency / block_size).value() + ), + ) + if gcmc_sampler is not None + else None + ) + mc_flip_every = ( + max( + 1, + round( + ( + self._config.terminal_flip_frequency + / block_size + ).value() + ), + ) + if terminal_flip_sampler is not None + else None + ) + + while runtime < checkpoint_frequency: + needs_pre_run_snapshot = False + + # GCMC move. + if ( + gcmc_sampler is not None + and flip_counter % gcmc_every == 0 + ): + _logger.info( + f"Performing GCMC move at {_lam_sym} = {lambda_value:.5f}" + ) + gcmc_sampler.push() + try: + gcmc_sampler.move(dynamics.context()) + finally: + gcmc_sampler.pop() + needs_pre_run_snapshot = self._config.auto_fix_minimise + + # Terminal flip move. + if ( + terminal_flip_sampler is not None + and flip_counter % mc_flip_every == 0 + ): + _logger.info( + f"Performing terminal flip move at " + f"{_lam_sym} = {lambda_value:.5f}" + ) + flip_accepted = terminal_flip_sampler.move( + dynamics.context() + ) + if flip_accepted: + if self._config.auto_fix_minimise: + needs_pre_run_snapshot = True + if self._config.randomise_velocities: + dynamics.randomise_velocities() + + # Snapshot the context state for crash recovery if + # any MC move changed positions. + if needs_pre_run_snapshot: + dynamics._d._pre_run_state = ( + dynamics.context().getState( + getPositions=True, getVelocities=True + ) + ) + + # Write ghost residues immediately before the dynamics + # block if a frame will be saved within it. + if save_frames and runtime + block_size >= next_frame: + gcmc_sampler.write_ghost_residues() + next_frame += self._config.frame_frequency - # Loop until we reach the runtime. - while runtime <= checkpoint_frequency: - # Run the dynamics in blocks of the GCMC frequency. + # Run the dynamics block. dynamics.run( - self._config.gcmc_frequency, + block_size, energy_frequency=self._config.energy_frequency, frame_frequency=self._config.frame_frequency, lambda_windows=lambda_array, rest2_scale_factors=rest2_scale_factors, save_velocities=self._config.save_velocities, - auto_fix_minimise=True, + auto_fix_minimise=self._config.auto_fix_minimise, num_energy_neighbours=num_energy_neighbours, null_energy=self._config.null_energy, save_crash_report=self._config.save_crash_report, - # GCMC specific options. excess_chemical_potential=( self._mu_ex if gcmc_sampler is not None else None ), @@ -715,20 +1031,19 @@ def generate_lam_vals(lambda_base, increment=0.001): ), ) - # Perform a GCMC move. - _logger.info( - f"Performing GCMC move at {_lam_sym} = {lambda_value:.5f}" - ) - gcmc_sampler.move(dynamics.context()) + runtime += block_size + ec_elapsed += block_size + flip_counter += 1 - # Update the runtime. - runtime += self._config.energy_frequency - - # If a frame is saved, then we need to save current indices - # of the ghost water residues. - if save_frames and runtime >= next_frame: - gcmc_sampler.write_ghost_residues() - next_frame += self._config.frame_frequency + # Save energy components. + if self._config.save_energy_components: + self._save_energy_components( + index, + dynamics.context(), + (block * checkpoint_frequency + ec_elapsed).to( + "ns" + ), + ) else: dynamics.run( @@ -738,33 +1053,21 @@ def generate_lam_vals(lambda_base, increment=0.001): lambda_windows=lambda_array, rest2_scale_factors=rest2_scale_factors, save_velocities=self._config.save_velocities, - auto_fix_minimise=True, + auto_fix_minimise=self._config.auto_fix_minimise, num_energy_neighbours=num_energy_neighbours, null_energy=self._config.null_energy, save_crash_report=self._config.save_crash_report, ) except Exception as e: - try: - self._save_energy_components(index, dynamics.context()) - except: - pass raise RuntimeError( - f"Dynamics block {block+1} for {_lam_sym} = {lambda_value:.5f} failed: {e}" + f"Dynamics block {block + 1} for {_lam_sym} = {lambda_value:.5f} failed: {e}" ) # Checkpoint. try: - # Save the energy contribution for each force. - if self._config.save_energy_components: - self._save_energy_components(index, dynamics.context()) - # Commit the current system. system = dynamics.commit() - # If performing GCMC, then we need to flag the ghost waters. - if gcmc_sampler is not None: - system = gcmc_sampler._flag_ghost_waters(system) - # Record the end time. block_end = _timer() @@ -800,25 +1103,53 @@ def generate_lam_vals(lambda_base, increment=0.001): lambda_energy=lambda_energy, lambda_grad=lambda_grad, is_final_block=is_final_block, + context=dynamics.context(), + gcmc_sampler=gcmc_sampler, ) if error is not None: raise error + # Save sampler statistics alongside the checkpoint. + self._save_sampler_stats( + index, gcmc_sampler, terminal_flip_sampler + ) + # Delete all trajectory frames from the Sire system within the # dynamics object. dynamics._d._sire_mols.delete_all_frames() _logger.info( - f"Finished block {block+1} of {self._start_block + num_blocks} " + f"Finished block {block + 1} of {self._start_block + num_blocks} " f"for {_lam_sym} = {lambda_value:.5f}" ) # Log the number of waters within the GCMC sampling volume. if gcmc_sampler is not None: + gcmc_sampler.push() + try: + n_moves = gcmc_sampler._num_moves + acc_str = ( + f", acceptance rate = {gcmc_sampler.move_acceptance_probability():.3f}" + f" (ins = {gcmc_sampler.num_insertions()}, del = {gcmc_sampler.num_deletions()})" + if n_moves > 0 + else "" + ) + _logger.info( + f"Current number of waters in GCMC volume at {_lam_sym} = {lambda_value:.5f} " + f"is {gcmc_sampler.num_waters()}{acc_str}" + ) + finally: + gcmc_sampler.pop() + + # Log terminal flip acceptance rate. + if terminal_flip_sampler is not None: _logger.info( - f"Current number of waters in GCMC volume at {_lam_sym} = {lambda_value:.5f} " - f"is {gcmc_sampler.num_waters()}" + f"Terminal flip acceptance rate at " + f"{_lam_sym} = {lambda_value:.5f}: " + f"{terminal_flip_sampler.acceptance_rate:.3f} " + f"({terminal_flip_sampler.num_accepted}/" + f"{terminal_flip_sampler.num_attempted})" ) if is_final_block: @@ -835,6 +1166,18 @@ def generate_lam_vals(lambda_base, increment=0.001): block += 1 block_start = _timer() try: + # Perform one terminal flip at the start of the remainder block. + if terminal_flip_sampler is not None: + _logger.info( + f"Performing terminal flip move at " + f"{_lam_sym} = {lambda_value:.5f}" + ) + if ( + terminal_flip_sampler.move(dynamics.context()) + and self._config.randomise_velocities + ): + dynamics.randomise_velocities() + dynamics.run( rem, energy_frequency=self._config.energy_frequency, @@ -842,19 +1185,21 @@ def generate_lam_vals(lambda_base, increment=0.001): lambda_windows=lambda_array, rest2_scale_factors=rest2_scale_factors, save_velocities=self._config.save_velocities, - auto_fix_minimise=True, + auto_fix_minimise=self._config.auto_fix_minimise, num_energy_neighbours=num_energy_neighbours, null_energy=self._config.null_energy, save_crash_report=self._config.save_crash_report, ) - # Save the energy contribution for each force. - if self._config.save_energy_components: - self._save_energy_components(index, dynamics.context()) - # Commit the current system. system = dynamics.commit() + # Save the energy contribution for each force. + if self._config.save_energy_components: + self._save_energy_components( + index, dynamics.context(), system.time().to("ns") + ) + # Record the end time. block_end = _timer() @@ -878,6 +1223,8 @@ def generate_lam_vals(lambda_base, increment=0.001): lambda_energy=lambda_energy, lambda_grad=lambda_grad, is_final_block=True, + context=dynamics.context(), + gcmc_sampler=gcmc_sampler, ) # Delete all trajectory frames from the Sire system within the @@ -885,7 +1232,7 @@ def generate_lam_vals(lambda_base, increment=0.001): dynamics._d._sire_mols.delete_all_frames() _logger.info( - f"Finished block {block+1} of {self._start_block + num_blocks} " + f"Finished block {block + 1} of {self._start_block + num_blocks} " f"for {_lam_sym} = {lambda_value:.5f}" ) @@ -893,51 +1240,141 @@ def generate_lam_vals(lambda_base, increment=0.001): f"{_lam_sym} = {lambda_value:.5f} complete, speed = {speed:.2f} ns day-1" ) except Exception as e: - try: - self._save_energy_components(index, dynamics.context()) - except: - pass raise RuntimeError( - f"Final dynamics block for {lam_sym} = {lambda_value:.5f} failed: {e}" + f"Final dynamics block for {_lam_sym} = {lambda_value:.5f} failed: {e}" ) else: try: - if gcmc_sampler is not None: - # Initialise the run time and time at which the next frame is saved. + # Run in sub-blocks when any MC sampler is active or energy + # components are being saved; otherwise run a single block. + needs_subblock = ( + gcmc_sampler is not None + or terminal_flip_sampler is not None + or self._config.save_energy_components + ) + if needs_subblock: runtime = _sr.u("0ps") - save_frames = self._config.frame_frequency > 0 - next_frame = self._config.frame_frequency + ec_elapsed = _sr.u("0ps") + flip_counter = 0 + save_frames = ( + gcmc_sampler is not None and self._config.frame_frequency > 0 + ) + next_frame = self._config.frame_frequency if save_frames else None + # Sub-block size: shortest active MC frequency, or + # energy_frequency when only saving energy components. + if gcmc_sampler is not None and terminal_flip_sampler is not None: + block_size = min( + self._config.gcmc_frequency, + self._config.terminal_flip_frequency, + ) + elif gcmc_sampler is not None: + block_size = self._config.gcmc_frequency + elif terminal_flip_sampler is not None: + block_size = self._config.terminal_flip_frequency + else: + block_size = self._config.energy_frequency + # How often to attempt each MC move (in sub-block units). + gcmc_every = ( + max( + 1, round((self._config.gcmc_frequency / block_size).value()) + ) + if gcmc_sampler is not None + else None + ) + mc_flip_every = ( + max( + 1, + round( + ( + self._config.terminal_flip_frequency / block_size + ).value() + ), + ) + if terminal_flip_sampler is not None + else None + ) + time_base = self._config.runtime - time + + while runtime < time: + needs_pre_run_snapshot = False + + # GCMC move. + if gcmc_sampler is not None and flip_counter % gcmc_every == 0: + _logger.info( + f"Performing GCMC move at {_lam_sym} = {lambda_value:.5f}" + ) + gcmc_sampler.push() + try: + gcmc_sampler.move(dynamics.context()) + finally: + gcmc_sampler.pop() + needs_pre_run_snapshot = self._config.auto_fix_minimise + + # Terminal flip move. + if ( + terminal_flip_sampler is not None + and flip_counter % mc_flip_every == 0 + ): + _logger.info( + f"Performing terminal flip move at " + f"{_lam_sym} = {lambda_value:.5f}" + ) + flip_accepted = terminal_flip_sampler.move( + dynamics.context() + ) + if flip_accepted: + if self._config.auto_fix_minimise: + needs_pre_run_snapshot = True + if self._config.randomise_velocities: + dynamics.randomise_velocities() + + # Snapshot the context state for crash recovery if + # any MC move changed positions. + if needs_pre_run_snapshot: + dynamics._d._pre_run_state = dynamics.context().getState( + getPositions=True, getVelocities=True + ) - # Loop until we reach the runtime. - while runtime <= time: - # Run the dynamics in blocks of the GCMC frequency. + # Write ghost residues immediately before the dynamics + # block if a frame will be saved within it. + if save_frames and runtime + block_size >= next_frame: + gcmc_sampler.write_ghost_residues() + next_frame += self._config.frame_frequency + + # Run the dynamics block. dynamics.run( - self._config.gcmc_frequency, + block_size, energy_frequency=self._config.energy_frequency, frame_frequency=self._config.frame_frequency, lambda_windows=lambda_array, rest2_scale_factors=rest2_scale_factors, save_velocities=self._config.save_velocities, - auto_fix_minimise=True, + auto_fix_minimise=self._config.auto_fix_minimise, num_energy_neighbours=num_energy_neighbours, null_energy=self._config.null_energy, save_crash_report=self._config.save_crash_report, + excess_chemical_potential=( + self._mu_ex if gcmc_sampler is not None else None + ), + num_waters=( + _np.sum(gcmc_sampler.water_state()) + if gcmc_sampler is not None + else None + ), ) - # Perform a GCMC move. - _logger.info( - f"Performing GCMC move at {_lam_sym} = {lambda_value:.5f}" - ) - gcmc_sampler.move(dynamics.context()) + runtime += block_size + ec_elapsed += block_size + flip_counter += 1 - # Update the runtime. - runtime += self._config.energy_frequency + # Save energy components. + if self._config.save_energy_components: + self._save_energy_components( + index, + dynamics.context(), + (time_base + ec_elapsed).to("ns"), + ) - # If a frame is saved, then we need to save current indices - # of the ghost water residues. - if save_frames and runtime >= next_frame: - gcmc_sampler.write_ghost_residues() - next_frame += self._config.frame_frequency else: dynamics.run( time, @@ -946,16 +1383,12 @@ def generate_lam_vals(lambda_base, increment=0.001): lambda_windows=lambda_array, rest2_scale_factors=rest2_scale_factors, save_velocities=self._config.save_velocities, - auto_fix_minimise=True, + auto_fix_minimise=self._config.auto_fix_minimise, num_energy_neighbours=num_energy_neighbours, null_energy=self._config.null_energy, save_crash_report=self._config.save_crash_report, ) except Exception as e: - try: - self._save_energy_components(index, dynamics.context()) - except: - pass raise RuntimeError( f"Dynamics for {_lam_sym} = {lambda_value:.5f} failed: {e}" ) @@ -995,6 +1428,8 @@ def generate_lam_vals(lambda_base, increment=0.001): lambda_energy=lambda_energy, lambda_grad=lambda_grad, is_final_block=True, + context=dynamics.context(), + gcmc_sampler=gcmc_sampler, ) if error is not None: @@ -1002,12 +1437,73 @@ def generate_lam_vals(lambda_base, increment=0.001): _logger.error(msg) raise RuntimeError(msg) + # Save sampler statistics alongside the final checkpoint. + self._save_sampler_stats(index, gcmc_sampler, terminal_flip_sampler) + _logger.success( f"{_lam_sym} = {lambda_value:.5f} complete, speed = {speed:.2f} ns day-1" ) return time + def _save_sampler_stats(self, index, gcmc_sampler, terminal_flip_sampler): + """ + Save GCMC and terminal flip sampler statistics to a pickle file. + + Parameters + ---------- + + index : int + The index of the lambda value. + + gcmc_sampler : GCMCSampler or None + The GCMC sampler for this replica. + + terminal_flip_sampler : TerminalFlipSampler or None + The terminal flip sampler for this replica. + """ + import pickle as _pickle + + stats = {} + if gcmc_sampler is not None: + stats["gcmc"] = gcmc_sampler.get_stats() + if terminal_flip_sampler is not None: + stats["terminal_flip"] = [ + terminal_flip_sampler.num_attempted, + terminal_flip_sampler.num_accepted, + ] + with open(self._filenames[index]["sampler_stats"], "wb") as f: + _pickle.dump(stats, f) + + def _load_sampler_stats(self, index): + """ + Load sampler statistics from a pickle file. + + Parameters + ---------- + + index : int + The index of the lambda value. + + Returns + ------- + + dict or None + The sampler statistics, or None if the file does not exist. + """ + import pickle as _pickle + from pathlib import Path as _Path + + path = _Path(self._filenames[index]["sampler_stats"]) + if not path.exists(): + return None + try: + with open(path, "rb") as f: + return _pickle.load(f) + except Exception as e: + _logger.warning(f"Could not load sampler stats for index {index}: {e}") + return None + def _minimisation( self, system, diff --git a/src/somd2/runner/_samplers/__init__.py b/src/somd2/runner/_samplers/__init__.py new file mode 100644 index 00000000..5397014f --- /dev/null +++ b/src/somd2/runner/_samplers/__init__.py @@ -0,0 +1,23 @@ +###################################################################### +# SOMD2: GPU accelerated alchemical free-energy engine. +# +# Copyright: 2023-2026 +# +# Authors: The OpenBioSim Team +# +# SOMD2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# SOMD2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with SOMD2. If not, see . +##################################################################### + +from ._terminal_flip import TerminalFlipSampler as TerminalFlipSampler +from ._terminal_flip import detect_terminal_groups as detect_terminal_groups diff --git a/src/somd2/runner/_samplers/_terminal_flip.py b/src/somd2/runner/_samplers/_terminal_flip.py new file mode 100644 index 00000000..8eba9ac0 --- /dev/null +++ b/src/somd2/runner/_samplers/_terminal_flip.py @@ -0,0 +1,625 @@ +###################################################################### +# SOMD2: GPU accelerated alchemical free-energy engine. +# +# Copyright: 2023-2026 +# +# Authors: The OpenBioSim Team +# +# SOMD2 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# SOMD2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with SOMD2. If not, see . +##################################################################### + +# Adapted from the terminal ring flip MC implemenation in GrandFEP: +# https://github.com/deGrootLab/GrandFEP +# (Released under the MIT License.) +# +# Original method: Wang et al., ChemRxiv, 2025. +# https://doi.org/10.26434/chemrxiv-2025-2zkx5 + +__all__ = ["TerminalFlipSampler", "detect_terminal_groups"] + +import numpy as _np + +import sire.legacy.Mol as _Mol + +from somd2 import _logger +from somd2._utils import _delta_sym, _degree_sym + + +def _auto_flip_angle(mol, anchor_idx, pivot_idx, ring_neighbor_idxs): + """ + Compute the flip angle for a terminal group from the molecular geometry. + + The angle is measured between the two ring neighbours of the pivot, + projected onto the plane perpendicular to the rotation axis (anchor → + pivot). For a planar C₂-symmetric ring this is 180°; for higher-symmetry + rings it will be smaller. + + Parameters + ---------- + + mol : sire.legacy.Mol.Molecule + The perturbable molecule. + + anchor_idx : int + Molecule-local index of the anchor atom. + + pivot_idx : int + Molecule-local index of the pivot atom. + + ring_neighbor_idxs : list of int + Molecule-local indices of the two ring atoms directly bonded to the + pivot (i.e. the ortho atoms for a benzene ring). + + Returns + ------- + + float + Raw angle in degrees between the projected ring-neighbour vectors. + """ + + def _coords(idx): + v = mol.atom(_Mol.AtomIdx(idx)).property("coordinates") + return _np.array([v.x().value(), v.y().value(), v.z().value()]) + + anchor = _coords(anchor_idx) + pivot = _coords(pivot_idx) + n1 = _coords(ring_neighbor_idxs[0]) + n2 = _coords(ring_neighbor_idxs[1]) + + # Unit rotation axis from anchor to pivot. + k = pivot - anchor + k = k / _np.linalg.norm(k) + + # Project each ring-neighbour displacement onto the plane perp to k. + v1 = n1 - pivot + v1_perp = v1 - _np.dot(v1, k) * k + + v2 = n2 - pivot + v2_perp = v2 - _np.dot(v2, k) * k + + # Angle between the two projected vectors. + cos_angle = _np.dot(v1_perp, v2_perp) / ( + _np.linalg.norm(v1_perp) * _np.linalg.norm(v2_perp) + ) + return float(_np.degrees(_np.arccos(_np.clip(cos_angle, -1.0, 1.0)))) + + +def _round_to_symmetry_angle(raw_angle, tolerance=10.0): + """ + Round ``raw_angle`` to the nearest crystallographic symmetry angle + (360°/n for n = 2 … 12). Returns ``None`` if the closest match is more + than ``tolerance`` degrees away, indicating that the ring has no useful + rotational symmetry. + + Parameters + ---------- + + raw_angle : float + Measured angle in degrees. + + tolerance : float + Maximum deviation (degrees) from a symmetry angle. Default is 10.0. + + Returns + ------- + + float or None + The nearest symmetry angle in degrees, or None if none is close enough. + """ + symmetry_angles = [360.0 / n for n in range(2, 13)] + diffs = [abs(raw_angle - a) for a in symmetry_angles] + min_idx = int(_np.argmin(diffs)) + if diffs[min_idx] > tolerance: + return None + return symmetry_angles[min_idx] + + +def _detect_for_view(mol, all_atoms, flip_angle, max_mobile_atoms, seen): + """ + Detect terminal ring groups using a single, self-consistent end-state + view of a molecule (i.e. its "connectivity" and "coordinates" properties + both resolve to the same end state). + + Parameters + ---------- + + mol : sire.legacy.Mol.Molecule + The molecule, already linked to one end state (via + :func:`sire.morph.link_to_reference` or + :func:`sire.morph.link_to_perturbed`). + + all_atoms : sire molecule view + All atoms in the system, used to obtain absolute (OpenMM) atom + indices. + + flip_angle : float or None + See :func:`detect_terminal_groups`. + + max_mobile_atoms : int or None + See :func:`detect_terminal_groups`. + + seen : set + Set of (anchor_abs, pivot_abs, frozenset(mobile_abs)) tuples already + found by a previous call (e.g. for the other end state). Updated in + place; groups already present are skipped so the same physical group + is not double-counted when it is identical at both end states. + + Returns + ------- + + list of tuple + Each entry is (angle, [anchor_idx, pivot_idx, mobile_idx_0, ...]) + using absolute atom indices. + """ + groups = [] + + try: + connectivity = mol.property("connectivity") + except Exception: + _logger.warning(f"Molecule {mol} has no 'connectivity' property. Skipping.") + return groups + + num_atoms = mol.num_atoms() + seen_bonds = set() + rdmol = None # lazily initialised if geometric detection fails + + for i in range(num_atoms): + atom_i_idx = _Mol.AtomIdx(i) + + # Only consider non-ring atoms as anchors. + if connectivity.in_ring(atom_i_idx): + continue + + # Skip dead-end atoms (e.g. hydrogen bonded only to a ring + # carbon): a valid anchor must be part of a chain, so it needs + # at least two connections (one to the pivot, one elsewhere). + if len(connectivity.connections_to(atom_i_idx)) < 2: + continue + + for neighbor_idx in connectivity.connections_to(atom_i_idx): + j = neighbor_idx.value() + + # Only consider ring atoms as pivots. + if not connectivity.in_ring(_Mol.AtomIdx(j)): + continue + + # Avoid processing the same bond twice. + bond_key = (min(i, j), max(i, j)) + if bond_key in seen_bonds: + continue + seen_bonds.add(bond_key) + + # Collect mobile atoms via BFS from the pivot, not crossing + # the anchor. The pivot itself does not move (it is the + # rotation centre), so it is excluded from the mobile list. + mobile = _bfs_mobile(connectivity, i, j, num_atoms) + + if not mobile: + continue + + # Skip groups with too many mobile atoms. + if max_mobile_atoms is not None and len(mobile) > max_mobile_atoms: + _logger.warning( + f"Terminal group at pivot atom {j} has {len(mobile)} mobile " + f"atoms (max_mobile_atoms={max_mobile_atoms}). Skipping group." + ) + continue + + # Map molecule-local indices to absolute system indices, and + # deduplicate against any group already found for the other end + # state (the same physical group away from a perturbed region + # will typically be identical at both end states). + anchor_abs = all_atoms.find(mol.atom(atom_i_idx)) + pivot_abs = all_atoms.find(mol.atom(_Mol.AtomIdx(j))) + mobile_abs = [all_atoms.find(mol.atom(_Mol.AtomIdx(k))) for k in mobile] + + dedup_key = (anchor_abs, pivot_abs, frozenset(mobile_abs)) + if dedup_key in seen: + continue + seen.add(dedup_key) + + # Determine the flip angle for this group. + if flip_angle is not None: + group_angle = flip_angle + else: + # Find the two ring neighbours of the pivot (mobile atoms + # directly bonded to the pivot that are in the ring). + mobile_set = set(mobile) + pivot_idx_obj = _Mol.AtomIdx(j) + ring_neighbors = [ + n.value() + for n in connectivity.connections_to(pivot_idx_obj) + if n.value() in mobile_set + and connectivity.in_ring(_Mol.AtomIdx(n.value())) + ] + + if len(ring_neighbors) != 2: + _logger.warning( + f"Expected 2 ring neighbours for pivot atom {j}, " + f"found {len(ring_neighbors)}. Skipping group." + ) + continue + + raw = _auto_flip_angle(mol, i, j, ring_neighbors) + group_angle = _round_to_symmetry_angle(raw) + + if group_angle is None: + # Geometric detection failed; fall back to hybridization. + try: + if rdmol is None: + from sire.convert import to_rdkit as _to_rdkit + from rdkit.Chem import HybridizationType as _HybType + + rdmol = _to_rdkit(mol) + hyb = rdmol.GetAtomWithIdx(j).GetHybridization() + if hyb == _HybType.SP2: + group_angle = 180.0 + elif hyb == _HybType.SP3: + group_angle = 120.0 + else: + _logger.warning( + f"Terminal group at pivot atom {j}: geometric " + f"detection gave unrecognised angle " + f"({raw:.1f}{_degree_sym}) and hybridization " + f"({hyb}) has no defined flip angle. Skipping." + ) + continue + _logger.warning( + f"Terminal group at pivot atom {j}: geometric " + f"detection gave unrecognised angle " + f"({raw:.1f}{_degree_sym}), using hybridization-based " + f"angle {group_angle}{_degree_sym} (pivot is {hyb.name})." + ) + except Exception as e: + _logger.warning( + f"Terminal group at pivot atom {j} has no recognised " + f"rotational symmetry (raw angle = {raw:.1f}{_degree_sym}) " + f"and hybridization fallback failed: {e}. Skipping." + ) + continue + + _logger.debug( + f"Terminal group at pivot atom {j}: auto-detected flip " + f"angle = {group_angle}{_degree_sym} (raw = {raw:.1f}{_degree_sym})." + ) + + groups.append((group_angle, [anchor_abs, pivot_abs] + mobile_abs)) + + return groups + + +def detect_terminal_groups(system, flip_angle=None, max_mobile_atoms=None): + """ + Detect terminal ring groups in perturbable molecules using Sire's native + connectivity. + + A terminal ring group is identified by a bond between a non-ring atom + (the anchor) and a ring atom (the pivot), where the ring side of the bond + is connected to the rest of the molecule only through that single bond. + The mobile atoms are all atoms reachable from the pivot when the + anchor-pivot bond is cut. + + Each end state's connectivity is searched independently (rather than + requiring the two end states to share identical connectivity), so a + group that only exists as a genuine terminal ring at one end state (for + example, a ring fused to a second ring that breaks elsewhere in a + ring-breaking perturbation) is still detected. A group found identically + at both end states is only added once. A group detected from one end + state's connectivity may not correspond to a real, rotatable fragment at + the other end state (the rotation axis may still be part of a closed + ring there); attempting such a move is not unsafe, since the resulting + large bond stretch is simply rejected by the Metropolis criterion, but + it does waste a move attempt outside the lambda range where the group is + actually valid. + + Parameters + ---------- + + system : sire system or molecule group + The Sire system containing perturbable molecules. + + flip_angle : float or None + The flip angle in degrees. If None (the default), the angle is + determined automatically from the geometry of each terminal group + by measuring the angle between the two ring neighbours of the pivot + projected perpendicular to the rotation axis, then rounding to the + nearest crystallographic symmetry angle (360°/n for n = 2..12). If + a float is given it overrides the geometric measurement for all + groups. + + max_mobile_atoms : int or None + Maximum number of mobile atoms allowed in a terminal ring group. + Groups with more mobile atoms than this threshold are skipped. + Defaults to None (no limit). + + Returns + ------- + + list of tuple + Each entry is (angle, [anchor_idx, pivot_idx, mobile_idx_0, ...]) + where all indices are absolute atom indices corresponding to OpenMM + atom ordering. + """ + terminal_groups = [] + + # Get the perturbable molecules. + try: + pert_mols = system.molecules("property is_perturbable") + except Exception: + _logger.warning( + "No perturbable molecules found. Terminal flip detection skipped." + ) + return terminal_groups + + # All atoms in the system, used to obtain absolute (OpenMM) atom indices. + all_atoms = system.atoms() + + import sire.morph as _morph + + for mol in pert_mols: + seen = set() + + mol_ref = _morph.link_to_reference(mol) + terminal_groups.extend( + _detect_for_view(mol_ref, all_atoms, flip_angle, max_mobile_atoms, seen) + ) + + mol_pert = _morph.link_to_perturbed(mol) + terminal_groups.extend( + _detect_for_view(mol_pert, all_atoms, flip_angle, max_mobile_atoms, seen) + ) + + return terminal_groups + + +def _bfs_mobile(connectivity, anchor_idx, pivot_idx, num_atoms): + """ + Breadth-first search from ``pivot_idx``, not crossing ``anchor_idx``. + + Returns a sorted list of atom indices for atoms that will be rotated + (all reachable atoms except the anchor and the pivot itself, since the + pivot is the fixed rotation centre). + + Parameters + ---------- + + connectivity : sire.legacy.Mol.Connectivity + The molecular connectivity object. + + anchor_idx : int + Index of the anchor atom (defines the rotation axis start; fixed). + + pivot_idx : int + Index of the pivot atom (rotation centre; fixed). + + num_atoms : int + Total number of atoms in the molecule. + + Returns + ------- + + list of int + Sorted list of mobile atom indices. + """ + visited = {anchor_idx, pivot_idx} + queue = [pivot_idx] + + while queue: + current = queue.pop(0) + for neighbor in connectivity.connections_to(_Mol.AtomIdx(current)): + n = neighbor.value() + if n not in visited: + visited.add(n) + queue.append(n) + + # Exclude the anchor and pivot; only mobile atoms are rotated. + return sorted(visited - {anchor_idx, pivot_idx}) + + +class TerminalFlipSampler: + """ + Monte Carlo sampler for terminal ring flip moves. + + Each move selects one terminal group at random and attempts to rotate + its mobile atoms by ±``flip_angle`` degrees around the bond axis from + the anchor atom to the pivot atom. The move is accepted or rejected + according to the Metropolis criterion. + + The rotation uses Rodrigues' rotation formula:: + + v_rot = v·cos θ + (k × v)·sin θ + k·(k·v)·(1 − cos θ) + + where ``k`` is the unit vector along the rotation axis (anchor → pivot) + and ``v`` is the displacement of a mobile atom from the pivot. + + The sign of ``flip_angle`` is chosen uniformly at random so that the + proposal is symmetric, satisfying detailed balance for any angle. + """ + + def __init__(self, terminal_groups, temperature): + """ + Parameters + ---------- + + terminal_groups : list of tuple + Each entry is (angle, [anchor_idx, pivot_idx, mobile_idx_0, ...]) + where indices are absolute OpenMM atom indices. + + temperature : float + Simulation temperature in Kelvin. + """ + self._terminal_groups = terminal_groups + + # kBT in kJ/mol (R = 8.314462618e-3 kJ mol-1 K-1). + self._kBT = 8.314462618e-3 * temperature + + self._num_attempted = 0 + self._num_accepted = 0 + + def _rotate(self, context, group_idx, angle): + """ + Rotate the mobile atoms of a terminal group by ``angle`` degrees + around the anchor-to-pivot axis, updating the context in place. + + Parameters + ---------- + + context : openmm.Context + The active OpenMM context. + + group_idx : int + Index into ``self._terminal_groups`` selecting the group to rotate. + + angle : float + Rotation angle in degrees. + """ + from openmm import unit as _omm_unit + + _, atom_indices = self._terminal_groups[group_idx] + + positions = ( + context.getState(getPositions=True) + .getPositions(asNumpy=True) + .value_in_unit(_omm_unit.nanometer) + ) + + theta = _np.deg2rad(angle) + cos_t = _np.cos(theta) + sin_t = _np.sin(theta) + + # Anchor (axis start, fixed) and pivot (rotation centre, fixed). + p0 = positions[atom_indices[0]] + p1 = positions[atom_indices[1]] + + # Unit rotation axis from anchor to pivot. + axis = p1 - p0 + axis = axis / _np.linalg.norm(axis) + + # Rotate mobile atoms using Rodrigues' formula. + new_positions = positions.copy() + for atom_idx in atom_indices[2:]: + v = positions[atom_idx] - p1 + new_positions[atom_idx] = ( + p1 + + v * cos_t + + _np.cross(axis, v) * sin_t + + axis * _np.dot(axis, v) * (1.0 - cos_t) + ) + + context.setPositions(new_positions * _omm_unit.nanometer) + + def move(self, context): + """ + Attempt one terminal flip Monte Carlo move. + + A terminal group is chosen at random. The mobile atoms are rotated + by ±``flip_angle`` around the anchor-to-pivot axis. The move is + accepted with Metropolis probability ``min(1, exp(-ΔE / kBT))``. + + Parameters + ---------- + + context : openmm.Context + The active OpenMM context. + + Returns + ------- + + bool + True if the move was accepted, False otherwise. Returns False + immediately if there are no terminal groups. + """ + from openmm import unit as _omm_unit + + if not self._terminal_groups: + return False + + self._num_attempted += 1 + + # Randomly select one terminal group. + group_idx = _np.random.randint(len(self._terminal_groups)) + angle, _ = self._terminal_groups[group_idx] + + # Retrieve current positions and energy before the move. + state = context.getState(getPositions=True, getEnergy=True) + old_positions = state.getPositions(asNumpy=True).value_in_unit( + _omm_unit.nanometer + ) + e_old = state.getPotentialEnergy().value_in_unit(_omm_unit.kilojoule_per_mole) + + # Pick uniformly from the n-1 non-current states, where n = 360 / angle. + # For 180° (n=2) this is equivalent to a random sign; for higher + # symmetry orders it correctly samples any non-current state in one move. + n = round(360.0 / angle) + step = int(_np.random.randint(1, n)) + self._rotate(context, group_idx, step * angle) + + # Evaluate the energy of the proposed configuration. + e_new = ( + context.getState(getEnergy=True) + .getPotentialEnergy() + .value_in_unit(_omm_unit.kilojoule_per_mole) + ) + + # Metropolis acceptance criterion. + delta_e = (e_new - e_old) / self._kBT + if delta_e <= 0.0 or _np.random.random() < _np.exp(-delta_e): + self._num_accepted += 1 + _logger.debug( + f"Terminal flip accepted (group {group_idx}, " + f"{_delta_sym} = {e_new - e_old:.2f} kJ/mol, " + f"acc = {_np.exp(min(0.0, -delta_e)):.3f})" + ) + return True + else: + context.setPositions(old_positions * _omm_unit.nanometer) + _logger.debug( + f"Terminal flip rejected (group {group_idx}, " + f"{_delta_sym} = {e_new - e_old:.2f} kJ/mol, " + f"acc = {_np.exp(-delta_e):.3f})" + ) + return False + + @property + def num_attempted(self): + """Total number of terminal flip moves attempted.""" + return self._num_attempted + + @property + def num_accepted(self): + """Total number of terminal flip moves accepted.""" + return self._num_accepted + + @property + def acceptance_rate(self): + """Fraction of attempted moves that were accepted.""" + if self._num_attempted == 0: + return 0.0 + return self._num_accepted / self._num_attempted + + def reset(self, num_attempted=0, num_accepted=0): + """ + Reset the move counters. + + Parameters + ---------- + + num_attempted : int + Value to restore ``num_attempted`` to. Defaults to 0. + + num_accepted : int + Value to restore ``num_accepted`` to. Defaults to 0. + """ + self._num_attempted = num_attempted + self._num_accepted = num_accepted diff --git a/tests/_utils/test_somd1.py b/tests/_utils/test_somd1.py new file mode 100644 index 00000000..740cb41f --- /dev/null +++ b/tests/_utils/test_somd1.py @@ -0,0 +1,109 @@ +import pytest +import sire.legacy.Mol as _SireMol + + +def _unique_nonghost_terms(mol, term_type, n_atoms, final=False): + """ + Return the set of atom-index tuples for terms in `term_type{0 or 1}` that + are absent from the other end state and involve no ghost atoms in the state + they exist in. + """ + from somd2._utils import _has_ghost + + suffix_own = "1" if final else "0" + suffix_other = "0" if final else "1" + + info = mol.info() + + def potentials(suffix): + return mol.property(f"{term_type}{suffix}").potentials() + + def key(p): + return tuple( + info.atom_idx(getattr(p, f"atom{k}")()).value() for k in range(n_atoms) + ) + + own_keys = {key(p): p for p in potentials(suffix_own)} + other_keys = {key(p) for p in potentials(suffix_other)} + # also consider reversed keys for symmetric terms + other_keys |= {k[::-1] for k in other_keys} + + unique = {} + for k, p in own_keys.items(): + if k not in other_keys: + atoms = [info.atom_idx(getattr(p, f"atom{i}")()) for i in range(n_atoms)] + if not _has_ghost(mol, atoms, final): + unique[k] = p.function() + return unique + + +@pytest.fixture +def mols(request): + return request.getfixturevalue(request.param) + + +def test_make_compatible_ring_break(ring_break_mols): + """ + Verify that make_compatible preserves non-ghost bonded terms that are + unique to one end state, rather than silently dropping them. + + The 6YNGD→intgd perturbation breaks an N-C ring bond. The cross-bond + angles, dihedrals, and impropers that span this bond exist only in + state0 (the ring is intact there) and involve no ghost atoms, so they + must survive make_compatible unchanged. + """ + from somd2._utils._somd1 import make_compatible + + mol_before = ring_break_mols.molecules("property is_perturbable")[0] + + # Collect unique non-ghost terms in state0 before the call. + before = { + term: _unique_nonghost_terms(mol_before, term, n) + for term, n in [("angle", 3), ("dihedral", 4), ("improper", 4)] + } + + # Require that there are actually unique non-ghost terms to test against. + assert any(before[t] for t in before), ( + "No unique non-ghost terms found in state0 — test input may be wrong" + ) + + system_after = make_compatible(ring_break_mols) + mol_after = system_after.molecules("property is_perturbable")[0] + + info = mol_after.info() + + for term, n in [("angle", 3), ("dihedral", 4), ("improper", 4)]: + after_keys = { + tuple(info.atom_idx(getattr(p, f"atom{k}")()).value() for k in range(n)) + for p in mol_after.property(f"{term}0").potentials() + } + for atom_key in before[term]: + assert atom_key in after_keys or atom_key[::-1] in after_keys, ( + f"Unique non-ghost {term}0 term {atom_key} was incorrectly " + f"removed by make_compatible" + ) + + +@pytest.mark.parametrize("mols", ["pert_fwd_mols", "pert_rev_mols"], indirect=True) +def test_reconstruct_intrascale(mols): + """ + Verify that reconstruct_intrascale correctly rebuilds end-state connectivity + and intrascale matrices from bond potentials. + + The forward perturbation has two hydrogen atoms that are real at lambda=0 + and become ghost atoms (du) at lambda=1; the reverse perturbation is the + mirror image. In both cases the reconstructed connectivity objects must + differ between the two end states. + """ + from somd2._utils._somd1 import reconstruct_intrascale + + system = reconstruct_intrascale(mols) + + mol = system.molecules("property is_perturbable")[0] + + conn0 = mol.property("connectivity0") + conn1 = mol.property("connectivity1") + + assert isinstance(conn0, _SireMol.Connectivity) + assert isinstance(conn1, _SireMol.Connectivity) + assert conn0 != conn1 diff --git a/tests/conftest.py b/tests/conftest.py index 02dbb2a8..93f8e0ba 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,10 +1,46 @@ import os +from pathlib import Path + import pytest import sire as sr has_cuda = True if "CUDA_VISIBLE_DEVICES" in os.environ else False +@pytest.fixture(scope="session") +def diphenylethane_mols(): + """ + Load a merged perturbable system built from 1,2-diphenylethane (reference, + lambda = 0) and 1,2-diphenylethanol (perturbed, lambda = 1). + + SMILES: + reference : c1ccccc1CCc1ccccc1 + perturbed : OC(Cc1ccccc1)c1ccccc1 + + Both phenyl rings are terminal, so two terminal ring groups should be + detected. + """ + mols = sr.load_test_files("12diphenylethane_12diphenylethanol.s3") + return sr.morph.link_to_reference(mols) + + +@pytest.fixture(scope="session") +def phenethyl_mols(): + """ + Load a merged perturbable system built from phenethylamine (reference, + lambda = 0) and 2-phenylethanol (perturbed, lambda = 1). + + SMILES: + reference : NCCc1ccccc1 + perturbed : OCCc1ccccc1 + + The phenyl ring is terminal — attached to the aliphatic chain by a single + exocyclic bond — making it the only detectable terminal ring group. + """ + mols = sr.load_test_files("phenethylamine_2phenylethanol.s3") + return sr.morph.link_to_reference(mols) + + @pytest.fixture(scope="session") def ethane_methanol(): mols = sr.load(sr.expand(sr.tutorial_url, "merged_molecule.s3")) @@ -24,3 +60,56 @@ def ethane_methanol_ions(): mols = sr.load(sr.expand(sr.tutorial_url, "merged_molecule_ions.s3")) mols = sr.morph.link_to_reference(mols) return mols + + +@pytest.fixture(scope="session") +def pert_fwd_mols(): + """ + Load the forward perturbation system from AMBER files hosted on the sire + test server and apply the local forward pert file. + """ + from somd2._utils._somd1 import apply_pert + + mols = sr.load_test_files("somd1_forward.prm7", "somd1_forward.rst7") + pert_file = str(Path(__file__).parent / "inputs" / "forward.pert") + return apply_pert(mols, pert_file) + + +@pytest.fixture(scope="session") +def pert_rev_mols(): + """ + Load the reverse perturbation system from AMBER files hosted on the sire + test server and apply the local backward pert file. + """ + from somd2._utils._somd1 import apply_pert + + mols = sr.load_test_files("somd1_backward.prm7", "somd1_backward.rst7") + pert_file = str(Path(__file__).parent / "inputs" / "backward.pert") + return apply_pert(mols, pert_file) + + +@pytest.fixture(scope="session") +def syk_ring_break_mols(): + """ + Load the SYK 5035→5033 ring-breaking perturbation system. + + Reference state (λ=0): SYK-5035 with an intact ring containing a + breaking bond. Perturbed state (λ=1): SYK-5033, the open-chain analogue. + """ + mols = sr.load_test_files("syk_5035_5033.s3") + return sr.morph.link_to_reference(mols) + + +@pytest.fixture(scope="session") +def ring_break_mols(): + """ + Load the 6YNGD→intgd ring-breaking perturbation system. + + Reference state (λ=0): 6YNGD ligand with an intact N-C ring bond. + Perturbed state (λ=1): open-chain analogue (intgd) where that bond + is absent. The cross-bond angles, dihedrals, and impropers spanning + the breaking bond are non-ghost unique-to-state0 terms and must be + preserved by make_compatible. + """ + mols = sr.load_test_files("6yngd_to_intgd.s3") + return sr.morph.link_to_reference(mols) diff --git a/tests/inputs/backward.pert b/tests/inputs/backward.pert new file mode 100644 index 00000000..fc9c68fa --- /dev/null +++ b/tests/inputs/backward.pert @@ -0,0 +1,764 @@ +version 1 +molecule LIG + atom + name C + initial_type C1 + final_type C1 + initial_LJ 3.37953 0.10884 + final_LJ 3.37953 0.10884 + initial_charge 0.13695 + final_charge 0.15665 + endatom + atom + name C1CB + initial_type C5 + final_type C3 + initial_LJ 3.48065 0.08688 + final_LJ 3.48065 0.08688 + initial_charge 0.05488 + final_charge 0.03744 + endatom + atom + name C6B9 + initial_type C7 + final_type C5 + initial_LJ 3.48065 0.08688 + final_LJ 3.48065 0.08688 + initial_charge -0.00427 + final_charge -0.03795 + endatom + atom + name C9SX + initial_type C16 + final_type C14 + initial_LJ 3.48065 0.08688 + final_LJ 3.48065 0.08688 + initial_charge -0.11834 + final_charge -0.11662 + endatom + atom + name CC38 + initial_type C14 + final_type C12 + initial_LJ 3.48065 0.08688 + final_LJ 3.48065 0.08688 + initial_charge 0.04315 + final_charge 0.03754 + endatom + atom + name CFNO + initial_type C6 + final_type C4 + initial_LJ 3.48065 0.08688 + final_LJ 3.48065 0.08688 + initial_charge -0.36103 + final_charge -0.33381 + endatom + atom + name CFYG + initial_type C12 + final_type C10 + initial_LJ 3.48065 0.08688 + final_LJ 3.48065 0.08688 + initial_charge 0.15102 + final_charge 0.14521 + endatom + atom + name CG8F + initial_type C4 + final_type C2 + initial_LJ 3.48065 0.08688 + final_LJ 3.48065 0.08688 + initial_charge -0.19576 + final_charge -0.13592 + endatom + atom + name CHBR + initial_type C2 + final_type C16 + initial_LJ 3.37953 0.10884 + final_LJ 3.37953 0.10884 + initial_charge 0.37902 + final_charge -0.12559 + endatom + atom + name CHYF + initial_type C15 + final_type C13 + initial_LJ 3.39967 0.21000 + final_LJ 3.39967 0.21000 + initial_charge 0.23216 + final_charge 0.22972 + endatom + atom + name CM80 + initial_type C8 + final_type C6 + initial_LJ 3.48065 0.08688 + final_LJ 3.48065 0.08688 + initial_charge -0.17624 + final_charge -0.16460 + endatom + atom + name CNVG + initial_type C11 + final_type C9 + initial_LJ 3.48065 0.08688 + final_LJ 3.48065 0.08688 + initial_charge -0.17763 + final_charge -0.17388 + endatom + atom + name CO2R + initial_type C9 + final_type C7 + initial_LJ 3.48065 0.08688 + final_LJ 3.48065 0.08688 + initial_charge 0.21908 + final_charge 0.17414 + endatom + atom + name CPOI + initial_type C3 + final_type C15 + initial_LJ 3.37953 0.10884 + final_LJ 3.37953 0.10884 + initial_charge 0.22272 + final_charge 0.22186 + endatom + atom + name CVRJ + initial_type C10 + final_type C8 + initial_LJ 3.48065 0.08688 + final_LJ 3.48065 0.08688 + initial_charge 0.13107 + final_charge 0.12274 + endatom + atom + name CWWQ + initial_type C13 + final_type C11 + initial_LJ 3.48065 0.08688 + final_LJ 3.48065 0.08688 + initial_charge -0.14493 + final_charge -0.14827 + endatom + atom + name DU01 + initial_type du + final_type H5 + initial_LJ 0.00000 0.00000 + final_LJ 2.58323 0.01641 + initial_charge 0.00000 + final_charge 0.04771 + endatom + atom + name DU02 + initial_type du + final_type H7 + initial_LJ 0.00000 0.00000 + final_LJ 2.64454 0.01578 + initial_charge 0.00000 + final_charge 0.05601 + endatom + atom + name F + initial_type F1 + final_type F1 + initial_LJ 3.11815 0.06100 + final_LJ 3.11815 0.06100 + initial_charge -0.12847 + final_charge -0.13081 + endatom + atom + name FKXW + initial_type F4 + final_type H2 + initial_LJ 3.11815 0.06100 + final_LJ 2.64454 0.01578 + initial_charge -0.21530 + final_charge 0.05601 + endatom + atom + name FOGY + initial_type F2 + final_type H6 + initial_LJ 3.11815 0.06100 + final_LJ 2.64454 0.01578 + initial_charge -0.21530 + final_charge 0.05601 + endatom + atom + name FR3X + initial_type F3 + final_type F2 + initial_LJ 3.11815 0.06100 + final_LJ 3.11815 0.06100 + initial_charge -0.20706 + final_charge -0.22639 + endatom + atom + name H + initial_type H1 + final_type H1 + initial_LJ 1.10343 0.01409 + final_LJ 1.10343 0.01409 + initial_charge 0.45185 + final_charge 0.45014 + endatom + atom + name HCUZ + initial_type H6 + final_type H11 + initial_LJ 2.57258 0.01561 + final_LJ 2.57258 0.01561 + initial_charge 0.17099 + final_charge 0.16814 + endatom + atom + name HK3Y + initial_type H3 + final_type H8 + initial_LJ 1.10343 0.01409 + final_LJ 1.10343 0.01409 + initial_charge 0.45185 + final_charge 0.45014 + endatom + atom + name HK8P + initial_type H2 + final_type H3 + initial_LJ 0.53454 0.00001 + final_LJ 0.53454 0.00001 + initial_charge 0.42180 + final_charge 0.41410 + endatom + atom + name HQIP + initial_type H10 + final_type H14 + initial_LJ 2.58323 0.01641 + final_LJ 2.58323 0.01641 + initial_charge 0.08414 + final_charge 0.05920 + endatom + atom + name HR9O + initial_type H4 + final_type H9 + initial_LJ 2.57258 0.01561 + final_LJ 2.57258 0.01561 + initial_charge 0.14114 + final_charge 0.13043 + endatom + atom + name HREN + initial_type H7 + final_type H12 + initial_LJ 2.57258 0.01561 + final_LJ 2.57258 0.01561 + initial_charge 0.17060 + final_charge 0.16842 + endatom + atom + name HUDO + initial_type H5 + final_type H10 + initial_LJ 2.57258 0.01561 + final_LJ 2.57258 0.01561 + initial_charge 0.16343 + final_charge 0.15833 + endatom + atom + name HUN5 + initial_type H8 + final_type H13 + initial_LJ 2.57258 0.01561 + final_LJ 2.57258 0.01561 + initial_charge 0.16785 + final_charge 0.16457 + endatom + atom + name HZ3J + initial_type H9 + final_type H4 + initial_LJ 2.58323 0.01641 + final_LJ 2.58323 0.01641 + initial_charge 0.09026 + final_charge 0.04771 + endatom + atom + name N + initial_type N1 + final_type N2 + initial_LJ 3.20688 0.16769 + final_LJ 3.20688 0.16769 + initial_charge -0.33919 + final_charge -0.34137 + endatom + atom + name NNRE + initial_type N2 + final_type N1 + initial_LJ 3.20688 0.16769 + final_LJ 3.20688 0.16769 + initial_charge -1.02643 + final_charge -1.02860 + endatom + atom + name O + initial_type O1 + final_type O4 + initial_LJ 2.99716 0.20947 + final_LJ 2.99716 0.20947 + initial_charge -0.56256 + final_charge -0.58176 + endatom + atom + name OAK1 + initial_type O2 + final_type O1 + initial_LJ 3.02511 0.16847 + final_LJ 3.02511 0.16847 + initial_charge -0.25503 + final_charge -0.24315 + endatom + atom + name OMEC + initial_type O3 + final_type O2 + initial_LJ 3.03981 0.21021 + final_LJ 3.03981 0.21021 + initial_charge -0.64869 + final_charge -0.65063 + endatom + atom + name OOSF + initial_type O4 + final_type O3 + initial_LJ 3.03981 0.21021 + final_LJ 3.03981 0.21021 + initial_charge -0.64869 + final_charge -0.65063 + endatom + atom + name S + initial_type S1 + final_type S1 + initial_LJ 3.56359 0.25000 + final_LJ 3.56359 0.25000 + initial_charge 1.54093 + final_charge 1.53779 + endatom + bond + atom0 C + atom1 CHBR + initial_force 215.23769 + initial_equil 1.53368 + final_force 0.00000 + final_equil 1.53368 + endbond + bond + atom0 C + atom1 DU01 + initial_force 357.85825 + initial_equil 1.09398 + final_force 357.85825 + final_equil 1.09398 + endbond + bond + atom0 CHBR + atom1 DU02 + initial_force 357.85825 + initial_equil 1.09398 + final_force 357.85825 + final_equil 1.09398 + endbond + bond + atom0 CHBR + atom1 FKXW + initial_force 287.85918 + initial_equil 1.35926 + final_force 357.85825 + final_equil 1.09398 + endbond + bond + atom0 CHBR + atom1 FOGY + initial_force 287.85918 + initial_equil 1.35926 + final_force 357.85825 + final_equil 1.09398 + endbond + angle + atom0 CHBR + atom1 C + atom2 C1CB + initial_force 90.87409 + initial_equil 1.88728 + final_force 0.00000 + final_equil 1.88728 + endangle + angle + atom0 CHBR + atom1 C + atom2 HZ3J + initial_force 66.83561 + initial_equil 1.91883 + final_force 0.00000 + final_equil 1.91883 + endangle + angle + atom0 O + atom1 C + atom2 CHBR + initial_force 66.83561 + initial_equil 1.91883 + final_force 0.00000 + final_equil 1.91883 + endangle + angle + atom0 C + atom1 CHBR + atom2 CPOI + initial_force 90.87409 + initial_equil 1.88728 + final_force 0.00000 + final_equil 1.88728 + endangle + angle + atom0 C + atom1 CHBR + atom2 FKXW + initial_force 66.83561 + initial_equil 1.91883 + final_force 0.00000 + final_equil 1.91883 + endangle + angle + atom0 C + atom1 CHBR + atom2 FOGY + initial_force 66.83561 + initial_equil 1.91883 + final_force 0.00000 + final_equil 1.91883 + endangle + angle + atom0 C1CB + atom1 C + atom2 DU01 + initial_force 66.83561 + initial_equil 1.91883 + final_force 66.83561 + final_equil 1.91883 + endangle + angle + atom0 HZ3J + atom1 C + atom2 DU01 + initial_force 36.66571 + initial_equil 1.89088 + final_force 36.66571 + final_equil 1.89088 + endangle + angle + atom0 O + atom1 C + atom2 DU01 + initial_force 66.83561 + initial_equil 1.91883 + final_force 66.83561 + final_equil 1.91883 + endangle + angle + atom0 C + atom1 C1CB + atom2 CFNO + initial_force 76.84599 + initial_equil 2.26374 + final_force 84.46100 + final_equil 2.09590 + endangle + angle + atom0 C + atom1 C1CB + atom2 CG8F + initial_force 90.87409 + initial_equil 1.88728 + final_force 84.46100 + final_equil 2.09590 + endangle + angle + atom0 CPOI + atom1 CG8F + atom2 C1CB + initial_force 90.87409 + initial_equil 1.88728 + final_force 84.46100 + final_equil 2.09590 + endangle + angle + atom0 CPOI + atom1 CG8F + atom2 CO2R + initial_force 76.84599 + initial_equil 2.26374 + final_force 84.46100 + final_equil 2.09590 + endangle + angle + atom0 CPOI + atom1 CHBR + atom2 DU02 + initial_force 66.83561 + initial_equil 1.91883 + final_force 66.83561 + final_equil 1.91883 + endangle + angle + atom0 FKXW + atom1 CHBR + atom2 DU02 + initial_force 36.66571 + initial_equil 1.89088 + final_force 36.66571 + final_equil 1.89088 + endangle + angle + atom0 FOGY + atom1 CHBR + atom2 DU02 + initial_force 36.66571 + initial_equil 1.89088 + final_force 36.66571 + final_equil 1.89088 + endangle + angle + atom0 FOGY + atom1 CHBR + atom2 FKXW + initial_force 66.83561 + initial_equil 1.91883 + final_force 36.66571 + final_equil 1.89088 + endangle + angle + atom0 CHBR + atom1 CPOI + atom2 CG8F + initial_force 90.87409 + initial_equil 1.88728 + final_force 66.83561 + final_equil 1.91883 + endangle + dihedral + atom0 CHBR + atom1 C + atom2 C1CB + atom3 CFNO + initial_form 0.2173 3.0 -0.000000 + final_form 0.0000 3.0 -0.000000 + enddihedral + dihedral + atom0 CHBR + atom1 C + atom2 C1CB + atom3 CG8F + initial_form 0.2173 3.0 -0.000000 + final_form 0.0000 3.0 -0.000000 + enddihedral + dihedral + atom0 C1CB + atom1 C + atom2 CHBR + atom3 FKXW + initial_form 0.1467 3.0 -0.000000 + final_form 0.0000 3.0 -0.000000 + enddihedral + dihedral + atom0 C1CB + atom1 C + atom2 CHBR + atom3 FOGY + initial_form 0.1467 3.0 -0.000000 + final_form 0.0000 3.0 -0.000000 + enddihedral + dihedral + atom0 O + atom1 C + atom2 CHBR + atom3 CPOI + initial_form 0.1467 3.0 -0.000000 + final_form 0.0000 3.0 -0.000000 + enddihedral + dihedral + atom0 O + atom1 C + atom2 CHBR + atom3 FKXW + initial_form 0.1467 3.0 -0.000000 + final_form 0.0000 3.0 -0.000000 + enddihedral + dihedral + atom0 O + atom1 C + atom2 CHBR + atom3 FOGY + initial_form 0.1467 3.0 -0.000000 + final_form 0.0000 3.0 -0.000000 + enddihedral + dihedral + atom0 CHBR + atom1 C + atom2 O + atom3 HK8P + initial_form 0.0713 1.0 -0.000000 0.3984 3.0 -0.000000 + final_form 0.0000 1.0 -0.000000 0.0000 3.0 -0.000000 + enddihedral + dihedral + atom0 CPOI + atom1 CHBR + atom2 C + atom3 C1CB + initial_form 0.1467 3.0 -0.000000 + final_form 0.0000 3.0 -0.000000 + enddihedral + dihedral + atom0 CPOI + atom1 CHBR + atom2 C + atom3 HZ3J + initial_form 0.0887 3.0 -0.000000 + final_form 0.0000 3.0 -0.000000 + enddihedral + dihedral + atom0 FKXW + atom1 CHBR + atom2 C + atom3 HZ3J + initial_form 0.0939 3.0 -0.000000 0.4539 1.0 -0.000000 + final_form 0.0000 3.0 -0.000000 0.0000 1.0 -0.000000 + enddihedral + dihedral + atom0 FOGY + atom1 CHBR + atom2 C + atom3 HZ3J + initial_form 0.0939 3.0 -0.000000 0.4539 1.0 -0.000000 + final_form 0.0000 3.0 -0.000000 0.0000 1.0 -0.000000 + enddihedral + dihedral + atom0 C + atom1 CHBR + atom2 CPOI + atom3 CG8F + initial_form 0.1467 3.0 -0.000000 + final_form 0.0000 3.0 -0.000000 + enddihedral + dihedral + atom0 C + atom1 CHBR + atom2 CPOI + atom3 FR3X + initial_form 0.1467 3.0 -0.000000 + final_form 0.0000 3.0 -0.000000 + enddihedral + dihedral + atom0 C + atom1 CHBR + atom2 CPOI + atom3 HQIP + initial_form 0.0887 3.0 -0.000000 + final_form 0.0000 3.0 -0.000000 + enddihedral + dihedral + atom0 C + atom1 CFNO + atom2 CG8F + atom3 C1CB + initial_form 0.0000 2.0 3.141593 + final_form 1.7667 2.0 3.141593 + enddihedral + dihedral + atom0 CFNO + atom1 C1CB + atom2 C + atom3 DU01 + initial_form 0.0000 3.0 -0.000000 + final_form 0.2173 3.0 -0.000000 + enddihedral + dihedral + atom0 CG8F + atom1 C1CB + atom2 C + atom3 DU01 + initial_form 0.0000 3.0 -0.000000 + final_form 0.2173 3.0 -0.000000 + enddihedral + dihedral + atom0 FKXW + atom1 CHBR + atom2 CPOI + atom3 HQIP + initial_form 0.0939 3.0 -0.000000 0.4539 1.0 -0.000000 + final_form 0.2390 3.0 -0.000000 + enddihedral + dihedral + atom0 FOGY + atom1 CHBR + atom2 CPOI + atom3 FR3X + initial_form -0.1944 1.0 3.141593 0.0721 3.0 -0.000000 + final_form 0.0939 3.0 -0.000000 0.4539 1.0 -0.000000 + enddihedral + dihedral + atom0 FOGY + atom1 CHBR + atom2 CPOI + atom3 HQIP + initial_form 0.0939 3.0 -0.000000 0.4539 1.0 -0.000000 + final_form 0.2390 3.0 -0.000000 + enddihedral + dihedral + atom0 CG8F + atom1 CPOI + atom2 CHBR + atom3 DU02 + initial_form 0.0000 3.0 -0.000000 + final_form 0.1467 3.0 -0.000000 + enddihedral + dihedral + atom0 FR3X + atom1 CPOI + atom2 CHBR + atom3 DU02 + initial_form 0.0000 3.0 -0.000000 0.0000 1.0 -0.000000 + final_form 0.0939 3.0 -0.000000 0.4539 1.0 -0.000000 + enddihedral + dihedral + atom0 FR3X + atom1 CPOI + atom2 CHBR + atom3 FKXW + initial_form -0.1944 1.0 3.141593 0.0721 3.0 -0.000000 + final_form 0.0939 3.0 -0.000000 0.4539 1.0 -0.000000 + enddihedral + dihedral + atom0 HQIP + atom1 CPOI + atom2 CHBR + atom3 DU02 + initial_form 0.0000 3.0 -0.000000 + final_form 0.2390 3.0 -0.000000 + enddihedral + dihedral + atom0 HK8P + atom1 O + atom2 C + atom3 DU01 + initial_form 0.0000 3.0 -0.000000 + final_form 0.3884 3.0 -0.000000 + enddihedral +endmolecule diff --git a/tests/inputs/forward.pert b/tests/inputs/forward.pert new file mode 100644 index 00000000..6017b587 --- /dev/null +++ b/tests/inputs/forward.pert @@ -0,0 +1,764 @@ +version 1 +molecule LIG + atom + name C + initial_type C1 + final_type C1 + initial_LJ 3.37953 0.10884 + final_LJ 3.37953 0.10884 + initial_charge 0.15665 + final_charge 0.13695 + endatom + atom + name C1CB + initial_type C5 + final_type C7 + initial_LJ 3.48065 0.08688 + final_LJ 3.48065 0.08688 + initial_charge -0.03795 + final_charge -0.00427 + endatom + atom + name C6B9 + initial_type C7 + final_type C9 + initial_LJ 3.48065 0.08688 + final_LJ 3.48065 0.08688 + initial_charge 0.17414 + final_charge 0.21908 + endatom + atom + name CAK1 + initial_type C10 + final_type C12 + initial_LJ 3.48065 0.08688 + final_LJ 3.48065 0.08688 + initial_charge 0.14521 + final_charge 0.15102 + endatom + atom + name CFNO + initial_type C6 + final_type C8 + initial_LJ 3.48065 0.08688 + final_LJ 3.48065 0.08688 + initial_charge -0.16460 + final_charge -0.17624 + endatom + atom + name CFYG + initial_type C13 + final_type C15 + initial_LJ 3.39967 0.21000 + final_LJ 3.39967 0.21000 + initial_charge 0.22972 + final_charge 0.23216 + endatom + atom + name CG8F + initial_type C4 + final_type C6 + initial_LJ 3.48065 0.08688 + final_LJ 3.48065 0.08688 + initial_charge -0.33381 + final_charge -0.36103 + endatom + atom + name CHBR + initial_type C2 + final_type C4 + initial_LJ 3.48065 0.08688 + final_LJ 3.48065 0.08688 + initial_charge -0.13592 + final_charge -0.19576 + endatom + atom + name CHYF + initial_type C15 + final_type C3 + initial_LJ 3.37953 0.10884 + final_LJ 3.37953 0.10884 + initial_charge 0.22186 + final_charge 0.22272 + endatom + atom + name CM80 + initial_type C8 + final_type C10 + initial_LJ 3.48065 0.08688 + final_LJ 3.48065 0.08688 + initial_charge 0.12274 + final_charge 0.13107 + endatom + atom + name CNVG + initial_type C12 + final_type C14 + initial_LJ 3.48065 0.08688 + final_LJ 3.48065 0.08688 + initial_charge 0.03754 + final_charge 0.04315 + endatom + atom + name CO2R + initial_type C9 + final_type C11 + initial_LJ 3.48065 0.08688 + final_LJ 3.48065 0.08688 + initial_charge -0.17388 + final_charge -0.17763 + endatom + atom + name COGY + initial_type C16 + final_type C2 + initial_LJ 3.37953 0.10884 + final_LJ 3.37953 0.10884 + initial_charge -0.12559 + final_charge 0.37902 + endatom + atom + name CPOI + initial_type C3 + final_type C5 + initial_LJ 3.48065 0.08688 + final_LJ 3.48065 0.08688 + initial_charge 0.03744 + final_charge 0.05488 + endatom + atom + name CVRJ + initial_type C11 + final_type C13 + initial_LJ 3.48065 0.08688 + final_LJ 3.48065 0.08688 + initial_charge -0.14827 + final_charge -0.14493 + endatom + atom + name CWWQ + initial_type C14 + final_type C16 + initial_LJ 3.48065 0.08688 + final_LJ 3.48065 0.08688 + initial_charge -0.11662 + final_charge -0.11834 + endatom + atom + name F + initial_type F1 + final_type F1 + initial_LJ 3.11815 0.06100 + final_LJ 3.11815 0.06100 + initial_charge -0.13081 + final_charge -0.12847 + endatom + atom + name FOSF + initial_type F2 + final_type F3 + initial_LJ 3.11815 0.06100 + final_LJ 3.11815 0.06100 + initial_charge -0.22639 + final_charge -0.20706 + endatom + atom + name H + initial_type H1 + final_type H1 + initial_LJ 1.10343 0.01409 + final_LJ 1.10343 0.01409 + initial_charge 0.45014 + final_charge 0.45185 + endatom + atom + name H1ZX + initial_type H14 + final_type H10 + initial_LJ 2.58323 0.01641 + final_LJ 2.58323 0.01641 + initial_charge 0.05920 + final_charge 0.08414 + endatom + atom + name H98Q + initial_type H13 + final_type H8 + initial_LJ 2.57258 0.01561 + final_LJ 2.57258 0.01561 + initial_charge 0.16457 + final_charge 0.16785 + endatom + atom + name HCUZ + initial_type H8 + final_type H3 + initial_LJ 1.10343 0.01409 + final_LJ 1.10343 0.01409 + initial_charge 0.45014 + final_charge 0.45185 + endatom + atom + name HK3Y + initial_type H5 + final_type du + initial_LJ 2.58323 0.01641 + final_LJ 0.00000 0.00000 + initial_charge 0.04771 + final_charge 0.00000 + endatom + atom + name HK8P + initial_type H4 + final_type H9 + initial_LJ 2.58323 0.01641 + final_LJ 2.58323 0.01641 + initial_charge 0.04771 + final_charge 0.09026 + endatom + atom + name HKXW + initial_type H2 + final_type F4 + initial_LJ 2.64454 0.01578 + final_LJ 3.11815 0.06100 + initial_charge 0.05601 + final_charge -0.21530 + endatom + atom + name HNRE + initial_type H3 + final_type H2 + initial_LJ 0.53454 0.00001 + final_LJ 0.53454 0.00001 + initial_charge 0.41410 + final_charge 0.42180 + endatom + atom + name HQIP + initial_type H12 + final_type H7 + initial_LJ 2.57258 0.01561 + final_LJ 2.57258 0.01561 + initial_charge 0.16842 + final_charge 0.17060 + endatom + atom + name HR9O + initial_type H6 + final_type F2 + initial_LJ 2.64454 0.01578 + final_LJ 3.11815 0.06100 + initial_charge 0.05601 + final_charge -0.21530 + endatom + atom + name HREN + initial_type H9 + final_type H4 + initial_LJ 2.57258 0.01561 + final_LJ 2.57258 0.01561 + initial_charge 0.13043 + final_charge 0.14114 + endatom + atom + name HUDO + initial_type H7 + final_type du + initial_LJ 2.64454 0.01578 + final_LJ 0.00000 0.00000 + initial_charge 0.05601 + final_charge 0.00000 + endatom + atom + name HUN5 + initial_type H10 + final_type H5 + initial_LJ 2.57258 0.01561 + final_LJ 2.57258 0.01561 + initial_charge 0.15833 + final_charge 0.16343 + endatom + atom + name HZ3J + initial_type H11 + final_type H6 + initial_LJ 2.57258 0.01561 + final_LJ 2.57258 0.01561 + initial_charge 0.16814 + final_charge 0.17099 + endatom + atom + name N + initial_type N1 + final_type N2 + initial_LJ 3.20688 0.16769 + final_LJ 3.20688 0.16769 + initial_charge -1.02860 + final_charge -1.02643 + endatom + atom + name NR3X + initial_type N2 + final_type N1 + initial_LJ 3.20688 0.16769 + final_LJ 3.20688 0.16769 + initial_charge -0.34137 + final_charge -0.33919 + endatom + atom + name O + initial_type O1 + final_type O2 + initial_LJ 3.02511 0.16847 + final_LJ 3.02511 0.16847 + initial_charge -0.24315 + final_charge -0.25503 + endatom + atom + name O9SX + initial_type O3 + final_type O4 + initial_LJ 3.03981 0.21021 + final_LJ 3.03981 0.21021 + initial_charge -0.65063 + final_charge -0.64869 + endatom + atom + name OC38 + initial_type O2 + final_type O3 + initial_LJ 3.03981 0.21021 + final_LJ 3.03981 0.21021 + initial_charge -0.65063 + final_charge -0.64869 + endatom + atom + name OMEC + initial_type O4 + final_type O1 + initial_LJ 2.99716 0.20947 + final_LJ 2.99716 0.20947 + initial_charge -0.58176 + final_charge -0.56256 + endatom + atom + name S + initial_type S1 + final_type S1 + initial_LJ 3.56359 0.25000 + final_LJ 3.56359 0.25000 + initial_charge 1.53779 + final_charge 1.54093 + endatom + bond + atom0 C + atom1 COGY + initial_force 0.00000 + initial_equil 1.53368 + final_force 215.23769 + final_equil 1.53368 + endbond + bond + atom0 C + atom1 HK3Y + initial_force 357.85825 + initial_equil 1.09398 + final_force 357.85825 + final_equil 1.09398 + endbond + bond + atom0 COGY + atom1 HKXW + initial_force 357.85825 + initial_equil 1.09398 + final_force 287.85918 + final_equil 1.35926 + endbond + bond + atom0 COGY + atom1 HR9O + initial_force 357.85825 + initial_equil 1.09398 + final_force 287.85918 + final_equil 1.35926 + endbond + bond + atom0 COGY + atom1 HUDO + initial_force 357.85825 + initial_equil 1.09398 + final_force 357.85825 + final_equil 1.09398 + endbond + angle + atom0 COGY + atom1 C + atom2 HK8P + initial_force 0.00000 + initial_equil 1.91883 + final_force 66.83561 + final_equil 1.91883 + endangle + angle + atom0 CPOI + atom1 C + atom2 COGY + initial_force 0.00000 + initial_equil 1.88728 + final_force 90.87409 + final_equil 1.88728 + endangle + angle + atom0 OMEC + atom1 C + atom2 COGY + initial_force 0.00000 + initial_equil 1.91883 + final_force 66.83561 + final_equil 1.91883 + endangle + angle + atom0 C + atom1 COGY + atom2 CHYF + initial_force 0.00000 + initial_equil 1.88728 + final_force 90.87409 + final_equil 1.88728 + endangle + angle + atom0 C + atom1 COGY + atom2 HKXW + initial_force 0.00000 + initial_equil 1.91883 + final_force 66.83561 + final_equil 1.91883 + endangle + angle + atom0 C + atom1 COGY + atom2 HR9O + initial_force 0.00000 + initial_equil 1.91883 + final_force 66.83561 + final_equil 1.91883 + endangle + angle + atom0 CPOI + atom1 C + atom2 HK3Y + initial_force 66.83561 + initial_equil 1.91883 + final_force 66.83561 + final_equil 1.91883 + endangle + angle + atom0 HK8P + atom1 C + atom2 HK3Y + initial_force 36.66571 + initial_equil 1.89088 + final_force 36.66571 + final_equil 1.89088 + endangle + angle + atom0 OMEC + atom1 C + atom2 HK3Y + initial_force 66.83561 + initial_equil 1.91883 + final_force 66.83561 + final_equil 1.91883 + endangle + angle + atom0 C6B9 + atom1 CHBR + atom2 CHYF + initial_force 84.46100 + initial_equil 2.09590 + final_force 76.84599 + final_equil 2.26374 + endangle + angle + atom0 CPOI + atom1 CHBR + atom2 CHYF + initial_force 84.46100 + initial_equil 2.09590 + final_force 90.87409 + final_equil 1.88728 + endangle + angle + atom0 CHBR + atom1 CHYF + atom2 COGY + initial_force 66.83561 + initial_equil 1.91883 + final_force 90.87409 + final_equil 1.88728 + endangle + angle + atom0 CHYF + atom1 COGY + atom2 HUDO + initial_force 66.83561 + initial_equil 1.91883 + final_force 66.83561 + final_equil 1.91883 + endangle + angle + atom0 HKXW + atom1 COGY + atom2 HR9O + initial_force 36.66571 + initial_equil 1.89088 + final_force 66.83561 + final_equil 1.91883 + endangle + angle + atom0 HKXW + atom1 COGY + atom2 HUDO + initial_force 36.66571 + initial_equil 1.89088 + final_force 36.66571 + final_equil 1.89088 + endangle + angle + atom0 HR9O + atom1 COGY + atom2 HUDO + initial_force 36.66571 + initial_equil 1.89088 + final_force 36.66571 + final_equil 1.89088 + endangle + angle + atom0 C + atom1 CPOI + atom2 CG8F + initial_force 84.46100 + initial_equil 2.09590 + final_force 76.84599 + final_equil 2.26374 + endangle + angle + atom0 C + atom1 CPOI + atom2 CHBR + initial_force 84.46100 + initial_equil 2.09590 + final_force 90.87409 + final_equil 1.88728 + endangle + dihedral + atom0 C + atom1 CG8F + atom2 CHBR + atom3 CPOI + initial_form 1.7667 2.0 3.141593 + final_form 0.0000 2.0 3.141593 + enddihedral + dihedral + atom0 CPOI + atom1 C + atom2 COGY + atom3 CHYF + initial_form 0.0000 3.0 -0.000000 + final_form 0.1467 3.0 -0.000000 + enddihedral + dihedral + atom0 CPOI + atom1 C + atom2 COGY + atom3 HKXW + initial_form 0.0000 3.0 -0.000000 + final_form 0.1467 3.0 -0.000000 + enddihedral + dihedral + atom0 CPOI + atom1 C + atom2 COGY + atom3 HR9O + initial_form 0.0000 3.0 -0.000000 + final_form 0.1467 3.0 -0.000000 + enddihedral + dihedral + atom0 HK8P + atom1 C + atom2 COGY + atom3 HR9O + initial_form 0.0000 3.0 -0.000000 0.0000 1.0 -0.000000 + final_form 0.0939 3.0 -0.000000 0.4539 1.0 -0.000000 + enddihedral + dihedral + atom0 OMEC + atom1 C + atom2 COGY + atom3 HKXW + initial_form 0.0000 3.0 -0.000000 + final_form 0.1467 3.0 -0.000000 + enddihedral + dihedral + atom0 OMEC + atom1 C + atom2 COGY + atom3 HR9O + initial_form 0.0000 3.0 -0.000000 + final_form 0.1467 3.0 -0.000000 + enddihedral + dihedral + atom0 COGY + atom1 C + atom2 OMEC + atom3 HNRE + initial_form 0.0000 1.0 -0.000000 0.0000 3.0 -0.000000 + final_form 0.0713 1.0 -0.000000 0.3984 3.0 -0.000000 + enddihedral + dihedral + atom0 CHYF + atom1 COGY + atom2 C + atom3 HK8P + initial_form 0.0000 3.0 -0.000000 + final_form 0.0887 3.0 -0.000000 + enddihedral + dihedral + atom0 CHYF + atom1 COGY + atom2 C + atom3 OMEC + initial_form 0.0000 3.0 -0.000000 + final_form 0.1467 3.0 -0.000000 + enddihedral + dihedral + atom0 HKXW + atom1 COGY + atom2 C + atom3 HK8P + initial_form 0.0000 3.0 -0.000000 0.0000 1.0 -0.000000 + final_form 0.0939 3.0 -0.000000 0.4539 1.0 -0.000000 + enddihedral + dihedral + atom0 C + atom1 COGY + atom2 CHYF + atom3 CHBR + initial_form 0.0000 3.0 -0.000000 + final_form 0.1467 3.0 -0.000000 + enddihedral + dihedral + atom0 C + atom1 COGY + atom2 CHYF + atom3 FOSF + initial_form 0.0000 3.0 -0.000000 + final_form 0.1467 3.0 -0.000000 + enddihedral + dihedral + atom0 C + atom1 COGY + atom2 CHYF + atom3 H1ZX + initial_form 0.0000 3.0 -0.000000 + final_form 0.0887 3.0 -0.000000 + enddihedral + dihedral + atom0 CG8F + atom1 CPOI + atom2 C + atom3 COGY + initial_form 0.0000 3.0 -0.000000 + final_form 0.2173 3.0 -0.000000 + enddihedral + dihedral + atom0 CHBR + atom1 CPOI + atom2 C + atom3 COGY + initial_form 0.0000 3.0 -0.000000 + final_form 0.2173 3.0 -0.000000 + enddihedral + dihedral + atom0 CHBR + atom1 CHYF + atom2 COGY + atom3 HUDO + initial_form 0.1467 3.0 -0.000000 + final_form 0.0000 3.0 -0.000000 + enddihedral + dihedral + atom0 FOSF + atom1 CHYF + atom2 COGY + atom3 HKXW + initial_form 0.0939 3.0 -0.000000 0.4539 1.0 -0.000000 + final_form -0.1944 1.0 3.141593 0.0721 3.0 -0.000000 + enddihedral + dihedral + atom0 FOSF + atom1 CHYF + atom2 COGY + atom3 HR9O + initial_form 0.0939 3.0 -0.000000 0.4539 1.0 -0.000000 + final_form -0.1944 1.0 3.141593 0.0721 3.0 -0.000000 + enddihedral + dihedral + atom0 FOSF + atom1 CHYF + atom2 COGY + atom3 HUDO + initial_form 0.0939 3.0 -0.000000 0.4539 1.0 -0.000000 + final_form 0.0000 3.0 -0.000000 0.0000 1.0 -0.000000 + enddihedral + dihedral + atom0 HKXW + atom1 COGY + atom2 CHYF + atom3 H1ZX + initial_form 0.2390 3.0 -0.000000 + final_form 0.0939 3.0 -0.000000 0.4539 1.0 -0.000000 + enddihedral + dihedral + atom0 HR9O + atom1 COGY + atom2 CHYF + atom3 H1ZX + initial_form 0.2390 3.0 -0.000000 + final_form 0.0939 3.0 -0.000000 0.4539 1.0 -0.000000 + enddihedral + dihedral + atom0 HUDO + atom1 COGY + atom2 CHYF + atom3 H1ZX + initial_form 0.2390 3.0 -0.000000 + final_form 0.0000 3.0 -0.000000 + enddihedral + dihedral + atom0 CG8F + atom1 CPOI + atom2 C + atom3 HK3Y + initial_form 0.2173 3.0 -0.000000 + final_form 0.0000 3.0 -0.000000 + enddihedral + dihedral + atom0 CHBR + atom1 CPOI + atom2 C + atom3 HK3Y + initial_form 0.2173 3.0 -0.000000 + final_form 0.0000 3.0 -0.000000 + enddihedral + dihedral + atom0 HNRE + atom1 OMEC + atom2 C + atom3 HK3Y + initial_form 0.3884 3.0 -0.000000 + final_form 0.0000 3.0 -0.000000 + enddihedral +endmolecule diff --git a/tests/runner/test_config.py b/tests/runner/test_config.py index cfb14be6..39bd948a 100644 --- a/tests/runner/test_config.py +++ b/tests/runner/test_config.py @@ -1,4 +1,3 @@ -import pytest import tempfile import sire as sr @@ -63,18 +62,22 @@ def test_dynamics_options(): def test_logfile_creation(): - # Test that the logfile is created by either the initialisation of the runner or of a config + # Test that the logfile is only created once a runner is initialised, not + # by the config alone - this is deferred so that a user can change + # output_directory after constructing a Config (e.g. via the Python API) + # without leaving behind a stale directory/duplicate log sink from the + # default value. with tempfile.TemporaryDirectory() as tmpdir: # Load the demo stream file. mols = sr.load(sr.expand(sr.tutorial_url, "merged_molecule.s3")) from pathlib import Path - # Test that a logfile is created once a config object is initialised + # A config object alone should not create the logfile. config = Config(output_directory=tmpdir, log_file="test.log") assert config.log_file is not None - assert Path.exists(config.output_directory / config.log_file) + assert not Path.exists(config.output_directory / config.log_file) - # Test that a logfile is created once a runner object is initialised + # Test that a logfile is created once a runner object is initialised. runner = Runner(mols, Config(output_directory=tmpdir, log_file="test1.log")) assert runner._config.log_file is not None assert Path.exists(runner._config.output_directory / runner._config.log_file) diff --git a/tests/runner/test_hmr.py b/tests/runner/test_hmr.py index 998e91f4..4d8a55dd 100644 --- a/tests/runner/test_hmr.py +++ b/tests/runner/test_hmr.py @@ -1,8 +1,59 @@ import math +from collections import defaultdict +import sire as sr + +from somd2._utils import _lam_sym from somd2.runner import Runner +def _masses_by_element(system): + """ + Return atom masses for the perturbable ligand at both end states, grouped + and sorted by element symbol. Dummy atoms (element Xx) are excluded. + """ + result = {} + for label, link_fn in ( + ("lam0", sr.morph.link_to_reference), + ("lam1", sr.morph.link_to_perturbed), + ): + linked = link_fn(system) + mol = next(m for m in linked["not water"].molecules() if m.num_atoms() > 1) + by_elem = defaultdict(list) + for atom in mol.atoms(): + elem = atom.element().symbol() + if elem != "Xx": + by_elem[elem].append(round(atom.mass().value(), 4)) + result[label] = {k: sorted(v) for k, v in by_elem.items()} + return result + + +def test_hmr_pertfile(pert_fwd_mols, pert_rev_mols): + """ + Verify HMR gives consistent masses for forward and reverse perturbations. + + Ligand A is the reference (lambda=0) in the forward perturbation and the + perturbed state (lambda=1) in the reverse perturbation. After HMR, the + same physical atoms must carry the same masses in both input paths. + Likewise for Ligand B. + """ + fwd = Runner._repartition_h_mass(pert_fwd_mols, 1.5) + rev = Runner._repartition_h_mass(pert_rev_mols, 1.5) + + fwd_masses = _masses_by_element(fwd) + rev_masses = _masses_by_element(rev) + + # Ligand A: forward lambda=0 must match reverse lambda=1 + assert fwd_masses["lam0"] == rev_masses["lam1"], ( + f"Ligand A masses differ between forward {_lam_sym}=0 and reverse {_lam_sym}=1 after HMR" + ) + + # Ligand B: forward lambda=1 must match reverse lambda=0 + assert fwd_masses["lam1"] == rev_masses["lam0"], ( + f"Ligand B masses differ between forward {_lam_sym}=1 and reverse {_lam_sym}=0 after HMR" + ) + + def test_hmr(ethane_methanol, ethane_methanol_hmr): """Ensure that we can handle systems that have already been repartioned.""" diff --git a/tests/runner/test_lambda_values.py b/tests/runner/test_lambda_values.py index ec6267f1..ca63d7ab 100644 --- a/tests/runner/test_lambda_values.py +++ b/tests/runner/test_lambda_values.py @@ -1,9 +1,7 @@ from pathlib import Path import tempfile -import pytest -import sire as sr from somd2.runner import Runner from somd2.config import Config diff --git a/tests/runner/test_repex.py b/tests/runner/test_repex.py index a21eb9d7..9336053f 100644 --- a/tests/runner/test_repex.py +++ b/tests/runner/test_repex.py @@ -17,7 +17,6 @@ def test_repex_output(ethane_methanol): Validate that repex specific simulation output is generated. """ with tempfile.TemporaryDirectory() as tmpdir: - config = { "runtime": "12fs", "restart": False, @@ -92,7 +91,6 @@ def test_rest2_scale(ethane_methanol, rest2_scale, is_valid): """Validate the REST2 scale factor handling.""" with tempfile.TemporaryDirectory() as tmpdir: - config = { "runtime": "12fs", "restart": False, @@ -130,7 +128,6 @@ def test_rest2_selection(ethane_methanol, rest2_selection, is_valid): """Validate the REST2 selection handling.""" with tempfile.TemporaryDirectory() as tmpdir: - config = { "runtime": "12fs", "restart": False, diff --git a/tests/runner/test_restart.py b/tests/runner/test_restart.py index 5f9639df..fda3fcf5 100644 --- a/tests/runner/test_restart.py +++ b/tests/runner/test_restart.py @@ -48,12 +48,13 @@ def test_restart(mols, request): [str(Path(tmpdir) / "system0.prm7"), str(Path(tmpdir) / "traj_0.00000.dcd")] ) - # Check that both config and lambda have been written - # as properties to the streamed checkpoint file. - checkpoint = sr.stream.load(str(Path(tmpdir) / "checkpoint_0.00000.s3")) - props = checkpoint.property_keys() - assert "config" in props - assert "lambda" in props + # Check that the compact numpy checkpoint file was written. + import numpy as np + + checkpoint_state = np.load(str(Path(tmpdir) / "checkpoint_0.00000.npz")) + assert "positions" in checkpoint_state + assert "velocities" in checkpoint_state + assert "time_ps" in checkpoint_state del runner @@ -123,13 +124,6 @@ def test_restart(mols, request): with pytest.raises(ValueError): runner_constraints = Runner(mols, Config(**config_diffconstraint)) - config_diffcoulombpower = config_new.copy() - config_diffcoulombpower["runtime"] = "36fs" - config_diffcoulombpower["coulomb_power"] = 0.5 - - with pytest.raises(ValueError): - runner_coulombpower = Runner(mols, Config(**config_diffcoulombpower)) - config_diffcutofftype = config_new.copy() config_diffcutofftype["runtime"] = "36fs" config_diffcutofftype["cutoff_type"] = "rf" @@ -206,25 +200,68 @@ def test_restart(mols, request): with pytest.raises(ValueError): runner_swapendstates = Runner(mols, Config(**config_diffswapendstates)) - # Need to test restart from sire checkpoint file - # this needs to be done last as it requires unlinking the config files + # Removing the config yaml should raise an OSError since the new-format + # checkpoint stores no config (the yaml is the sole validation source). for file in Path(tmpdir).glob("*.yaml"): file.unlink() - # This should work as the config is read from the lambda=0 checkpoint file - runner_noconfig = Runner(mols, Config(**config_new)) + with pytest.raises(OSError): + runner_noconfig = Runner(mols, Config(**config_new)) - # remove config again - for file in Path(tmpdir).glob("*.yaml"): - file.unlink() + # Write a config yaml with a wrong pressure value and verify restart fails. + import yaml - # Load the checkpoint file using sire and change the pressure option - sire_checkpoint = sr.stream.load(str(Path(tmpdir) / "checkpoint_0.00000.s3")) - cfg = sire_checkpoint.property("config") - cfg["pressure"] = "0.5 atm" - sire_checkpoint.set_property("config", cfg) - sr.stream.save(sire_checkpoint, str(Path(tmpdir) / "checkpoint_0.00000.s3")) + bad_config = config_new.copy() + bad_config["pressure"] = "0.5 atm" + with open(Path(tmpdir) / "config.yaml", "w") as f: + yaml.dump(bad_config, f) - # Load the new checkpoint file and make sure the restart fails with pytest.raises(ValueError): runner_badconfig = Runner(mols, Config(**config_new)) + + +def test_restart_custom_schedule(ethane_methanol): + """ + Test that a restart works when using a non-standard lambda schedule. + """ + mols = ethane_methanol.clone() + schedule = sr.cas.LambdaSchedule.standard_decouple() + + with tempfile.TemporaryDirectory() as tmpdir: + config = { + "runtime": "12fs", + "restart": False, + "output_directory": tmpdir, + "energy_frequency": "4fs", + "checkpoint_frequency": "4fs", + "frame_frequency": "4fs", + "lambda_schedule": schedule, + "platform": "CPU", + "max_threads": 1, + "num_lambda": 2, + } + + # Instantiate a runner using the config defined above. + runner = Runner(mols, Config(**config)) + + del runner + + config_new = { + "runtime": "24fs", + "restart": True, + "output_directory": tmpdir, + "energy_frequency": "4fs", + "checkpoint_frequency": "4fs", + "frame_frequency": "4fs", + "lambda_schedule": schedule, + "platform": "CPU", + "max_threads": 1, + "num_lambda": 2, + "overwrite": True, + "log_level": "DEBUG", + } + + runner2 = Runner(mols, Config(**config_new)) + + # Run the simulation. + runner2.run() diff --git a/tests/runner/test_terminal_flip.py b/tests/runner/test_terminal_flip.py new file mode 100644 index 00000000..79011214 --- /dev/null +++ b/tests/runner/test_terminal_flip.py @@ -0,0 +1,406 @@ +""" +Tests for terminal ring flip Monte Carlo functionality. + +Two fixtures are used (both defined in conftest.py): + +``phenethyl_mols`` + Merged system from phenethylamine (NCCc1ccccc1) and 2-phenylethanol + (OCCc1ccccc1) via ``sr.load_test_files("phenethylamine_2phenylethanol.s3")``. + Contains one terminal phenyl ring. + +``diphenylethane_mols`` + Merged system from 1,2-diphenylethane (c1ccccc1CCc1ccccc1) and + 1,2-diphenylethanol (OC(Cc1ccccc1)c1ccccc1) via + ``sr.load_test_files("12diphenylethane_12diphenylethanol.s3")``. + Contains two terminal phenyl rings. +""" + +import pytest +import tempfile + +import numpy as np + +from somd2.config import Config +from somd2.runner import Runner +from somd2.runner._samplers import TerminalFlipSampler, detect_terminal_groups + +# --------------------------------------------------------------------------- +# detect_terminal_groups +# --------------------------------------------------------------------------- + + +def test_no_terminal_groups(ethane_methanol): + """ + The ethane → methanol perturbation contains no rings, so no terminal + ring groups should be detected. + """ + groups = detect_terminal_groups(ethane_methanol) + assert groups == [] + + +def test_detect_one_terminal_group(phenethyl_mols): + """ + The phenethyl system has exactly one terminal ring (the phenyl group + attached via the –CH2– chain). H atoms bonded to ring carbons must not + be reported as separate groups. + """ + groups = detect_terminal_groups(phenethyl_mols) + assert len(groups) == 1 + + +def test_terminal_group_flip_angle(phenethyl_mols): + """ + The default flip angle should be 180°. + """ + groups = detect_terminal_groups(phenethyl_mols) + angle, _ = groups[0] + assert angle == pytest.approx(180.0) + + +def test_terminal_group_atom_count(phenethyl_mols): + """ + For a mono-substituted benzene ring: + - 1 anchor atom (aliphatic C adjacent to ring) + - 1 pivot atom (ipso ring C) + - 5 mobile ring carbons + - 5 mobile ring hydrogens + Total indices list length = 12. + """ + groups = detect_terminal_groups(phenethyl_mols) + _, indices = groups[0] + # anchor + pivot + 10 mobile atoms + assert len(indices) == 12 + + +def test_anchor_not_in_mobile(phenethyl_mols): + """ + The anchor index must not appear in the mobile atom list. + """ + groups = detect_terminal_groups(phenethyl_mols) + _, indices = groups[0] + anchor_idx = indices[0] + mobile_indices = indices[2:] + assert anchor_idx not in mobile_indices + + +def test_pivot_not_in_mobile(phenethyl_mols): + """ + The pivot index must not appear in the mobile atom list (the pivot is the + fixed rotation centre). + """ + groups = detect_terminal_groups(phenethyl_mols) + _, indices = groups[0] + pivot_idx = indices[1] + mobile_indices = indices[2:] + assert pivot_idx not in mobile_indices + + +def test_auto_flip_angle_phenethyl(phenethyl_mols): + """ + With no flip_angle override, the angle for a monosubstituted benzene ring + should be auto-detected as 180° (C2 symmetry). + """ + groups = detect_terminal_groups(phenethyl_mols) + angle, _ = groups[0] + assert angle == pytest.approx(180.0) + + +def test_auto_flip_angle_diphenylethane(diphenylethane_mols): + """ + Both terminal phenyl groups in the diphenylethane system should + auto-detect as 180°. + """ + groups = detect_terminal_groups(diphenylethane_mols) + assert len(groups) == 2 + for angle, _ in groups: + assert angle == pytest.approx(180.0) + + +def test_custom_flip_angle(phenethyl_mols): + """ + An explicit flip_angle override should be stored and returned as-is, + bypassing the geometric auto-detection. + """ + groups = detect_terminal_groups(phenethyl_mols, flip_angle=90.0) + angle, _ = groups[0] + assert angle == pytest.approx(90.0) + + +def test_detect_two_terminal_groups(diphenylethane_mols): + """ + 1,2-diphenylethane → 1,2-diphenylethanol has two terminal phenyl rings, + each attached via a non-ring CH2/CH anchor, so exactly two groups should + be detected. + """ + groups = detect_terminal_groups(diphenylethane_mols) + assert len(groups) == 2 + + +def test_multiple_groups_unique_pivots(diphenylethane_mols): + """ + The two terminal groups must have distinct pivot atoms (each ring has its + own ipso carbon). + """ + groups = detect_terminal_groups(diphenylethane_mols) + pivot_indices = [indices[1] for _, indices in groups] + assert len(set(pivot_indices)) == 2 + + +def test_multiple_groups_disjoint_mobile(diphenylethane_mols): + """ + The mobile atom sets of the two terminal groups must be disjoint — each + group owns its own ring atoms. + """ + groups = detect_terminal_groups(diphenylethane_mols) + mobile_0 = set(groups[0][1][2:]) + mobile_1 = set(groups[1][1][2:]) + assert mobile_0.isdisjoint(mobile_1) + + +# --------------------------------------------------------------------------- +# Config validation +# --------------------------------------------------------------------------- + + +def test_config_terminal_flip_frequency_none(): + """terminal_flip_frequency defaults to None (disabled).""" + config = Config() + assert config.terminal_flip_frequency is None + + +def test_config_terminal_flip_frequency_valid(): + """A valid time string is parsed to a Sire GeneralUnit.""" + config = Config(terminal_flip_frequency="1 ps") + assert config.terminal_flip_frequency is not None + assert str(config.terminal_flip_frequency).startswith("1") + + +def test_config_terminal_flip_frequency_bad_units(): + """Non-time units should raise ValueError.""" + with pytest.raises(ValueError, match="units are invalid"): + Config(terminal_flip_frequency="5 A") + + +def test_config_terminal_flip_frequency_bad_type(): + """A non-string value should raise TypeError.""" + config = Config() + with pytest.raises(TypeError, match="must be of type 'str'"): + config.terminal_flip_frequency = 5 + + +def test_config_terminal_flip_angle_none(): + """terminal_flip_angle defaults to None (auto-detect).""" + config = Config() + assert config.terminal_flip_angle is None + + +def test_config_terminal_flip_angle_valid(): + """A valid angle string is parsed to a Sire GeneralUnit.""" + config = Config(terminal_flip_angle="180 degrees") + assert config.terminal_flip_angle is not None + + +def test_config_terminal_flip_angle_bad_units(): + """Non-angle units should raise ValueError.""" + with pytest.raises(ValueError, match="units are invalid"): + Config(terminal_flip_angle="5 A") + + +def test_config_terminal_flip_angle_bad_type(): + """A non-string value should raise TypeError.""" + config = Config() + with pytest.raises(TypeError, match="must be of type 'str'"): + config.terminal_flip_angle = 180 + + +# --------------------------------------------------------------------------- +# TerminalFlipSampler +# --------------------------------------------------------------------------- + + +def test_sampler_initial_state(phenethyl_mols): + """ + A freshly constructed sampler should report zero attempts and zero + accepted moves. + """ + groups = detect_terminal_groups(phenethyl_mols) + sampler = TerminalFlipSampler(groups, 300.0) + assert sampler.num_attempted == 0 + assert sampler.num_accepted == 0 + assert sampler.acceptance_rate == 0.0 + + +def test_sampler_move(phenethyl_mols): + """ + After one call to move(), num_attempted should be 1 and the statistics + should be internally consistent. The outcome (accepted or rejected) + depends on the torsional energy around the exocyclic bond and is not + deterministic for an arbitrary starting configuration. + """ + with tempfile.TemporaryDirectory() as tmpdir: + config = Config( + platform="cpu", + output_directory=tmpdir, + num_lambda=1, + lambda_values=[0.0], + terminal_flip_frequency="4fs", + energy_frequency="4fs", + checkpoint_frequency="4fs", + frame_frequency="4fs", + ) + runner = Runner(phenethyl_mols, config) + + # Create a dynamics object to obtain an OpenMM context. + dynamics_kwargs = runner._dynamics_kwargs.copy() + dynamics = runner._system.dynamics(**dynamics_kwargs) + + groups = detect_terminal_groups(phenethyl_mols) + sampler = TerminalFlipSampler(groups, 300.0) + + sampler.move(dynamics.context()) + + assert sampler.num_attempted == 1 + assert sampler.num_accepted in (0, 1) + assert 0.0 <= sampler.acceptance_rate <= 1.0 + + +def test_rotate(phenethyl_mols): + """ + _rotate() must: + - leave the anchor and pivot atoms stationary, + - move all mobile atoms, + - restore all mobile atom positions after two consecutive 180° flips. + """ + from openmm import unit as omm_unit + + with tempfile.TemporaryDirectory() as tmpdir: + config = Config( + platform="cpu", + output_directory=tmpdir, + num_lambda=1, + lambda_values=[0.0], + terminal_flip_frequency="4fs", + energy_frequency="4fs", + checkpoint_frequency="4fs", + frame_frequency="4fs", + ) + runner = Runner(phenethyl_mols, config) + + dynamics_kwargs = runner._dynamics_kwargs.copy() + dynamics = runner._system.dynamics(**dynamics_kwargs) + context = dynamics.context() + + groups = detect_terminal_groups(phenethyl_mols) + sampler = TerminalFlipSampler(groups, 300.0) + + _, indices = groups[0] + anchor_idx = indices[0] + pivot_idx = indices[1] + mobile_indices = indices[2:] + + pos_before = ( + context.getState(getPositions=True) + .getPositions(asNumpy=True) + .value_in_unit(omm_unit.nanometer) + ) + + sampler._rotate(context, 0, 180.0) + + pos_after = ( + context.getState(getPositions=True) + .getPositions(asNumpy=True) + .value_in_unit(omm_unit.nanometer) + ) + + # Anchor and pivot must not move. + np.testing.assert_allclose( + pos_after[anchor_idx], pos_before[anchor_idx], atol=1e-5 + ) + np.testing.assert_allclose( + pos_after[pivot_idx], pos_before[pivot_idx], atol=1e-5 + ) + + # All mobile atoms must have moved. + for idx in mobile_indices: + assert not np.allclose(pos_after[idx], pos_before[idx], atol=1e-5), ( + f"Mobile atom {idx} did not move after 180° rotation" + ) + + # A second 180° flip must restore all mobile atom positions. + sampler._rotate(context, 0, 180.0) + pos_restored = ( + context.getState(getPositions=True) + .getPositions(asNumpy=True) + .value_in_unit(omm_unit.nanometer) + ) + np.testing.assert_allclose( + pos_restored[mobile_indices], pos_before[mobile_indices], atol=1e-5 + ) + + +# --------------------------------------------------------------------------- +# Runner integration +# --------------------------------------------------------------------------- + + +def test_runner_no_terminal_groups(ethane_methanol): + """ + Setting terminal_flip_frequency on a ring-free molecule should succeed + (0 groups detected) and the simulation should complete normally. + """ + with tempfile.TemporaryDirectory() as tmpdir: + config = Config( + runtime="12fs", + output_directory=tmpdir, + energy_frequency="4fs", + checkpoint_frequency="4fs", + frame_frequency="4fs", + platform="cpu", + max_threads=1, + num_lambda=2, + terminal_flip_frequency="4fs", + ) + runner = Runner(ethane_methanol, config) + assert runner._terminal_groups == [] + runner.run() + + +def test_runner_with_terminal_flip(phenethyl_mols): + """ + With terminal_flip_frequency set and a terminal ring present, the runner + should detect one group and complete the simulation successfully. + """ + with tempfile.TemporaryDirectory() as tmpdir: + config = Config( + runtime="12fs", + output_directory=tmpdir, + energy_frequency="4fs", + checkpoint_frequency="4fs", + frame_frequency="4fs", + platform="cpu", + max_threads=1, + num_lambda=2, + terminal_flip_frequency="4fs", + ) + runner = Runner(phenethyl_mols, config) + assert len(runner._terminal_groups) == 1 + runner.run() + + +def test_runner_validation_frequency_multiple(ethane_methanol): + """ + terminal_flip_frequency must be a multiple of energy_frequency. + A non-multiple should raise ValueError during runner initialisation. + """ + with tempfile.TemporaryDirectory() as tmpdir: + config = Config( + output_directory=tmpdir, + platform="cpu", + num_lambda=2, + energy_frequency="4fs", + terminal_flip_frequency="3fs", # not a multiple of 4fs + ) + with pytest.raises( + ValueError, match="must be a multiple of 'energy_frequency'" + ): + Runner(ethane_methanol, config) diff --git a/tests/schedules/test_ring_break.py b/tests/schedules/test_ring_break.py new file mode 100644 index 00000000..bfc01466 --- /dev/null +++ b/tests/schedules/test_ring_break.py @@ -0,0 +1,455 @@ +import pytest +import sire as sr + +pytestmark = pytest.mark.skipif( + "openmm" not in sr.convert.supported_formats(), + reason="openmm support is not available", +) + +# Energy threshold (kcal/mol) for the "active" state: kappa=1 should give +# clearly non-zero CustomBondForce energy. +_ACTIVE_THRESHOLD = 0.1 + + +def _build_dynamics(mols, schedule, swap_end_states): + """ + Construct a dynamics context for the ring-break system with Morse restraints. + """ + from somd2._utils._somd1 import make_compatible + + mols = mols.clone() + + hard_restraints, mols = sr.restraints.morse_potential( + mols, + de="150 kcal/mol", + auto_parametrise=True, + direct_morse_replacement=True, + name="morse_hard", + ) + soft_restraints, _ = sr.restraints.morse_potential( + mols, + atoms0=hard_restraints[0].atom0(), + atoms1=hard_restraints[0].atom1(), + r0=hard_restraints[0].r0(), + k="125 kcal mol-1 A-2", + auto_parametrise=False, + de="50 kcal mol-1", + name="morse_soft", + ) + mols = make_compatible(mols) + + return mols.dynamics( + constraint="h_bonds", + perturbable_constraint="h_bonds_not_heavy_perturbed", + cutoff="10A", + cutoff_type="rf", + schedule=schedule, + dynamic_constraints=True, + include_constrained_energies=False, + swap_end_states=swap_end_states, + map={ + "ghosts_are_light": True, + "check_for_h_by_max_mass": True, + "check_for_h_by_mass": False, + "check_for_h_by_element": False, + "check_for_h_by_ambertype": False, + "fix_perturbable_zero_sigmas": True, + "restraints": [hard_restraints, soft_restraints], + }, + ) + + +def _force_energy_kcal(d, lam, force_name): + """Return the energy (kcal/mol) for *force_name* at *lam*.""" + import openmm + + context = d.context() + d.set_lambda(lam, update_constraints=True) + grp = context._force_group_map[force_name] + state = context.getState(getEnergy=True, groups=(1 << grp)) + return state.getPotentialEnergy().value_in_unit(openmm.unit.kilocalories_per_mole) + + +@pytest.fixture(scope="module") +def forward_dynamics(syk_ring_break_mols): + """Forward ring-breaking dynamics: swap_end_states=False, ring_break_morph.""" + from somd2._utils._schedules import ring_break_morph + + return _build_dynamics( + syk_ring_break_mols, ring_break_morph(), swap_end_states=False + ) + + +@pytest.fixture(scope="module") +def reverse_dynamics(syk_ring_break_mols): + """ + Reverse ring-making dynamics: swap_end_states=True, reverse_ring_break_morph. + + reverse_ring_break_morph() == ring_break_morph().reverse(), so this fixture + also implicitly tests the reversed schedule path used by the runner when a + ring-breaking perturbation is run with swap_end_states=True. + """ + from somd2._utils._schedules import reverse_ring_break_morph + + return _build_dynamics( + syk_ring_break_mols, reverse_ring_break_morph(), swap_end_states=True + ) + + +# ── force-presence tests ────────────────────────────────────────────────────── + + +def test_forward_has_ring_break_not_ring_make(forward_dynamics): + """ + Forward direction: ring-break CustomBondForce is registered; ring-make is absent. + """ + fmap = forward_dynamics.context()._force_group_map + assert "ring-break" in fmap, "ring-break force group missing for forward direction" + assert "ring-make" not in fmap, ( + "ring-make force group should not exist for forward direction" + ) + + +def test_reverse_has_ring_make_not_ring_break(reverse_dynamics): + """ + Reverse direction (swap_end_states=True): ring-make CustomBondForce is + registered; ring-break is absent. + """ + fmap = reverse_dynamics.context()._force_group_map + assert "ring-make" in fmap, "ring-make force group missing for reverse direction" + assert "ring-break" not in fmap, ( + "ring-break force group should not exist for reverse direction" + ) + + +# ── schedule kappa/alpha tests ──────────────────────────────────────────────── +# +# These tests verify kappa and alpha values by calling schedule.morph() directly, +# using the same initial/final values that lambdalever passes in production. +# They are completely independent of Sire's energy formula and will continue to +# work correctly regardless of changes to the softcore implementation. + +# ring_break_morph kappa/alpha points (3 equal stages: [0,1/3), [1/3,2/3), [2/3,1]): +# λ=0.00 potential_swap start kappa=0, alpha=1 +# λ=0.15 potential_swap mid kappa=0, alpha=1 +# λ=1/3 restraints_off start kappa=0, alpha=1 (within-stage lam=0) +# λ=0.45 restraints_off mid kappa=0.35, alpha=0.65 (within-stage lam=0.35) +# λ=0.50 restraints_off mid kappa=0.5, alpha=0.5 (within-stage lam=0.5) +# λ=0.55 restraints_off mid kappa=0.65, alpha=0.35 (within-stage lam=0.65) +# λ=0.60 restraints_off near end kappa=0.8, alpha=0.2 (within-stage lam=0.8) +# λ=2/3 morph start kappa=1, alpha=0 +# λ=0.85 morph mid kappa=1, alpha=0 +# λ=1.00 morph end kappa=1, alpha=0 +_FWD_KAPPA_ALPHA = [ + (0.00, 0.0, 1.0), + (0.15, 0.0, 1.0), + (1 / 3, 0.0, 1.0), + (0.45, 0.35, 0.65), + (0.50, 0.5, 0.5), + (0.55, 0.65, 0.35), + (0.60, 0.8, 0.2), + (2 / 3, 1.0, 0.0), + (0.85, 1.0, 0.0), + (1.00, 1.0, 0.0), +] + +# reverse_ring_break_morph ring-make kappa/alpha points (mirror of forward): +# λ=0.00 reversed morph start kappa=1, alpha=0 +# λ=0.15 reversed morph mid kappa=1, alpha=0 +# λ=1/3 reversed restraints_off start kappa=1, alpha=0 (within-stage lam=0) +# λ=0.45 reversed restraints_off mid kappa=0.65, alpha=0.35 +# λ=0.50 reversed restraints_off mid kappa=0.5, alpha=0.5 +# λ=0.55 reversed restraints_off mid kappa=0.35, alpha=0.65 +# λ=0.60 reversed restraints_off near end kappa=0.2, alpha=0.8 +# λ=2/3 reversed potential_swap start kappa=0, alpha=1 +# λ=0.85 reversed potential_swap mid kappa=0, alpha=1 +# λ=1.00 reversed potential_swap end kappa=0, alpha=1 +_REV_KAPPA_ALPHA = [ + (0.00, 1.0, 0.0), + (0.15, 1.0, 0.0), + (1 / 3, 1.0, 0.0), + (0.45, 0.65, 0.35), + (0.50, 0.5, 0.5), + (0.55, 0.35, 0.65), + (0.60, 0.2, 0.8), + (2 / 3, 0.0, 1.0), + (0.85, 0.0, 1.0), + (1.00, 0.0, 1.0), +] + +# ring_break_morph coul_kappa points (initial=0, final=1): +# λ=0.00–2/3 all pre-morph stages coul_kappa=0 +# λ=2/3 morph start coul_kappa=0 (within-stage lam=0) +# λ=0.85 morph mid coul_kappa=0.55 ((0.85-2/3)*3) +# λ=1.00 morph end coul_kappa=1.0 +_FWD_COUL_KAPPA = [ + (0.00, 0.0), + (0.15, 0.0), + (1 / 3, 0.0), + (0.45, 0.0), + (0.50, 0.0), + (0.55, 0.0), + (0.60, 0.0), + (2 / 3, 0.0), + (0.85, 0.55), + (1.00, 1.0), +] + +# reverse_ring_break_morph ring-make coul_kappa points (initial=1, final=0): +# λ=0.00 reversed morph start coul_kappa=1.0 +# λ=0.15 reversed morph mid coul_kappa=0.55 (1 - 0.15*3) +# λ=1/3 reversed morph end coul_kappa=0.0 +# λ=0.45–1.0 restraints_off/potential_swap coul_kappa=0 +_REV_COUL_KAPPA = [ + (0.00, 1.0), + (0.15, 0.55), + (1 / 3, 0.0), + (0.45, 0.0), + (0.50, 0.0), + (0.60, 0.0), + (2 / 3, 0.0), + (0.85, 0.0), + (1.00, 0.0), +] + + +@pytest.mark.parametrize("lam,expected_kappa,expected_alpha", _FWD_KAPPA_ALPHA) +def test_ring_break_morph_schedule(lam, expected_kappa, expected_alpha): + """ + ring_break_morph() produces the correct ring-break kappa and alpha at each λ. + + Uses lambdalever's initial/final values (kappa: 0→1, alpha: 1→0) to ensure + the test matches production behaviour exactly. + """ + from somd2._utils._schedules import ring_break_morph + + s = ring_break_morph() + kappa = s.morph("ring-break", "kappa", 0.0, 1.0, lam) + alpha = s.morph("ring-break", "alpha", 1.0, 0.0, lam) + assert abs(kappa - expected_kappa) < 1e-10, ( + f"ring-break kappa={kappa:.8f} at λ={lam:.4f}, expected {expected_kappa}" + ) + assert abs(alpha - expected_alpha) < 1e-10, ( + f"ring-break alpha={alpha:.8f} at λ={lam:.4f}, expected {expected_alpha}" + ) + + +@pytest.mark.parametrize("lam,expected_kappa,expected_alpha", _REV_KAPPA_ALPHA) +def test_reverse_ring_break_morph_schedule(lam, expected_kappa, expected_alpha): + """ + reverse_ring_break_morph() produces the correct ring-make kappa and alpha at each λ. + + Uses lambdalever's initial/final values (kappa: 1→0, alpha: 0→1) to ensure + the test matches production behaviour exactly. + """ + from somd2._utils._schedules import reverse_ring_break_morph + + s = reverse_ring_break_morph() + kappa = s.morph("ring-make", "kappa", 1.0, 0.0, lam) + alpha = s.morph("ring-make", "alpha", 0.0, 1.0, lam) + assert abs(kappa - expected_kappa) < 1e-10, ( + f"ring-make kappa={kappa:.8f} at λ={lam:.4f}, expected {expected_kappa}" + ) + assert abs(alpha - expected_alpha) < 1e-10, ( + f"ring-make alpha={alpha:.8f} at λ={lam:.4f}, expected {expected_alpha}" + ) + + +@pytest.mark.parametrize("lam,expected_coul_kappa", _FWD_COUL_KAPPA) +def test_ring_break_morph_coul_kappa(lam, expected_coul_kappa): + """ + ring_break_morph() pins coul_kappa=0 through all pre-morph stages and ramps + it 0→1 during morph only, so Coulomb only activates once atoms are separated. + + Uses lambdalever's initial/final values (coul_kappa: 0→1). + """ + from somd2._utils._schedules import ring_break_morph + + s = ring_break_morph() + coul_kappa = s.morph("ring-break", "coul_kappa", 0.0, 1.0, lam) + assert abs(coul_kappa - expected_coul_kappa) < 1e-10, ( + f"ring-break coul_kappa={coul_kappa:.8f} at λ={lam:.4f}, " + f"expected {expected_coul_kappa}" + ) + + +@pytest.mark.parametrize("lam,expected_coul_kappa", _REV_COUL_KAPPA) +def test_reverse_ring_break_morph_coul_kappa(lam, expected_coul_kappa): + """ + reverse_ring_break_morph() ramps ring-make coul_kappa 1→0 through the + reversed morph stage and pins it to 0 in all subsequent stages. + + Uses lambdalever's initial/final values (coul_kappa: 1→0). + """ + from somd2._utils._schedules import reverse_ring_break_morph + + s = reverse_ring_break_morph() + coul_kappa = s.morph("ring-make", "coul_kappa", 1.0, 0.0, lam) + assert abs(coul_kappa - expected_coul_kappa) < 1e-10, ( + f"ring-make coul_kappa={coul_kappa:.8f} at λ={lam:.4f}, " + f"expected {expected_coul_kappa}" + ) + + +# ── energy magnitude tests ──────────────────────────────────────────────────── + + +@pytest.mark.parametrize("lam", [2 / 3, 1.0]) +def test_ring_break_active_in_morph(forward_dynamics, lam): + """ + Ring-break energy is clearly non-zero (kappa=1) throughout the morph stage. + """ + e = _force_energy_kcal(forward_dynamics, lam, "ring-break") + assert abs(e) > _ACTIVE_THRESHOLD, ( + f"ring-break energy {e:.4f} kcal/mol at λ={lam:.4f} is below active " + f"threshold {_ACTIVE_THRESHOLD} kcal/mol (kappa should be 1)" + ) + + +def test_ring_make_active_at_lambda_zero(reverse_dynamics): + """ + Ring-make energy is non-zero at λ=0: the morph stage fixes kappa=1 + so the ring-make interaction is fully on from the start. + """ + e = _force_energy_kcal(reverse_dynamics, 0.0, "ring-make") + assert abs(e) > _ACTIVE_THRESHOLD, ( + f"ring-make energy {e:.4f} kcal/mol at λ=0 is below active threshold " + f"{_ACTIVE_THRESHOLD} kcal/mol (kappa should be 1 in morph stage)" + ) + + +def test_ring_make_inactive_at_lambda_one(reverse_dynamics): + """ + Ring-make energy is near-zero at λ=1 (potential_swap end, kappa=0). + + At λ=1 the system is at the ring-open end state; the hard-hard correction + term in the CustomBondForce is small because the pair is at nonbonded + separation, so the absolute energy remains below the active threshold. + """ + e = _force_energy_kcal(reverse_dynamics, 1.0, "ring-make") + assert abs(e) < _ACTIVE_THRESHOLD, ( + f"ring-make energy {e:.4f} kcal/mol at λ=1 exceeds threshold " + f"{_ACTIVE_THRESHOLD} kcal/mol (kappa should be 0)" + ) + + +# ── energy symmetry tests ───────────────────────────────────────────────────── +# +# The invariant ring_break_morph().reverse() == reverse_ring_break_morph() means +# that the softcore kappa/alpha values at (forward, λ) and (reverse, 1-λ) are +# equal. Both forces act on the same bond (the original ring_breaking_bond, +# which swap_end_states=True maps to ring_making_pairs), so the energies must +# also match. The hard-hard correction appears identically on both sides and +# cancels in the comparison, making this test robust to formula changes. +# +# Test points span zero and non-zero energy regions: +# λ=0.0 → forward kappa=0, reverse at 1-λ=1.0 kappa=0 (both ≈0) +# λ=0.55 → forward restraints_off (kappa=0.65), reverse restraints_off at 0.45 (kappa=0.65) +# λ=2/3 → forward morph start (kappa=1), reverse restraints_off start at 1/3 (kappa=1) +# λ=0.85 → forward morph (kappa=1), reverse reversed-morph at 0.15 (kappa=1) +# λ=1.0 → forward morph end (kappa=1), reverse at 0.0 reversed-morph (kappa=1) + + +@pytest.mark.parametrize("lam", [0.0, 0.55, 2 / 3, 0.85, 1.0]) +def test_energy_symmetry_forward_reverse(forward_dynamics, reverse_dynamics, lam): + """ + Single-point energy symmetry: E_ring_break_forward(λ) == E_ring_make_reverse(1-λ). + + Verifies that reverse_ring_break_morph() == ring_break_morph().reverse() and + that the mirrored kappa/alpha produce identical corrections on the same bond. + """ + e_fwd = _force_energy_kcal(forward_dynamics, lam, "ring-break") + e_rev = _force_energy_kcal(reverse_dynamics, 1.0 - lam, "ring-make") + assert abs(e_fwd - e_rev) < 1e-4, ( + f"Energy symmetry broken at λ={lam:.4f}: " + f"ring-break forward = {e_fwd:.6f} kcal/mol, " + f"ring-make reverse(1-λ={1 - lam:.4f}) = {e_rev:.6f} kcal/mol, " + f"difference = {abs(e_fwd - e_rev):.2e} kcal/mol" + ) + + +def test_schedule_symmetry(): + """ + reverse_ring_break_morph() must equal ring_break_morph().reverse(). + + Checks that the simplified implementation produces identical schedules by + comparing kappa values at a dense grid of lambda points using the default + initial/final values that lambdalever passes for ring-break kappa. + """ + from somd2._utils._schedules import ring_break_morph, reverse_ring_break_morph + + fwd = ring_break_morph() + rev = reverse_ring_break_morph() + rev_via_reverse = fwd.reverse() + + test_lambdas = [i / 20 for i in range(21)] + for lam in test_lambdas: + for force, lever, init, fin in [ + ("ring-break", "kappa", 0.0, 1.0), + ("ring-break", "alpha", 1.0, 0.0), + ("ring-break", "coul_kappa", 0.0, 1.0), + ("ring-make", "kappa", 1.0, 0.0), + ("ring-make", "alpha", 0.0, 1.0), + ("ring-make", "coul_kappa", 1.0, 0.0), + ]: + v_rev = rev.morph(force, lever, init, fin, lam) + v_rev2 = rev_via_reverse.morph(force, lever, init, fin, lam) + assert abs(v_rev - v_rev2) < 1e-12, ( + f"Mismatch for {force}/{lever} at λ={lam:.2f}: " + f"reverse_ring_break_morph={v_rev}, ring_break_morph().reverse()={v_rev2}" + ) + + +def test_force_contribution(forward_dynamics, syk_ring_break_mols): + """ + Verify the softcore CustomBondForce contribution at each end state. + + At λ=0 the softcore is fully off (α=1, LJ=0; coul_kappa=0, Coulomb=0) so + the total energy should match a system without the force to within numerical + precision. At λ=1 the exclusion has morphed away, so without the force the + ring-break pair sees a large LJ repulsion at bonded distance; with the force + the softcore smooths this repulsion, giving a substantially lower energy. + """ + + from somd2._utils._schedules import ring_break_morph + + # Delete the ring_breaking_bonds property from the perturbable molecule + # so that the soft-core ring-breaking force isn't created. + mols = syk_ring_break_mols.clone() + mol = mols["perturbable"].molecules()[0] + cursor = mol.cursor() + del [cursor["ring_breaking_bonds"]] + mols.update(cursor.commit()) + + # Build a dynamics object with the same schedule but without the ring-breaking force. + d = _build_dynamics(mols, ring_break_morph(), swap_end_states=False) + + # Get the λ=0 energies. At this end state the softcore is fully off (α=1 + # gives LJ=0; coul_kappa=0 gives Coulomb=0), so the force contributes + # nothing and both systems should agree to within numerical precision. + d.set_lambda(0.0, update_constraints=True) + forward_dynamics.set_lambda(0.0, update_constraints=True) + nrg_no_force = d.current_potential_energy().value() + nrg_with_force = forward_dynamics.current_potential_energy().value() + + assert abs(nrg_no_force - nrg_with_force) < 0.05, ( + f"Energy mismatch at λ=0: energy with force = {nrg_with_force:.6f} kcal/mol, " + f"energy without force = {nrg_no_force:.6f} kcal/mol, " + f"difference = {abs(nrg_with_force - nrg_no_force):.2e} kcal/mol" + ) + + # Get the λ=1 energies. At this end state the exclusion between the + # ring-break atoms has morphed away, so without the force they interact via + # the regular NonbondedForce with their perturbed LJ parameters (large sigma) + # at bonded distance, giving enormous repulsion. With the softcore force the + # repulsion is smoothed, giving a substantially lower energy. + d.set_lambda(1.0, update_constraints=True) + forward_dynamics.set_lambda(1.0, update_constraints=True) + nrg_no_force = d.current_potential_energy().value() + nrg_with_force = forward_dynamics.current_potential_energy().value() + + assert nrg_with_force < nrg_no_force, ( + f"Energy with softcore force ({nrg_with_force:.6f} kcal/mol) should be lower " + f"than without ({nrg_no_force:.6f} kcal/mol) at λ=1 with ring-closed geometry: " + f"the softcore should smooth the large LJ repulsion when the exclusion morphs away" + )