Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 42 additions & 9 deletions src/openfe_analysis/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,24 @@

def _determine_iteration_dt(dataset) -> float:
"""
Find out the timestep between each frame in the trajectory.
Determine the time increment between successive iterations
in a MultiStateReporter trajectory.

The timestep is inferred from the serialized MCMC move stored in the
``mcmc_moves`` group of the NetCDF file. Specifically, this assumes the
move defines both a ``timestep`` and ``n_steps`` parameter, such that

dt_iteration = n_steps * timestep

Parameters
----------
dataset : nc.Dataset
Dataset holding the MultiStateReporter generated NetCDF file.
NetCDF dataset produced by ``openmmtools.multistate.MultiStateReporter``.

Returns
-------
float
The timestep in units of picoseconds.
The time between successive iterations, in picoseconds.

Raises
------
Expand Down Expand Up @@ -52,12 +59,14 @@ def _determine_iteration_dt(dataset) -> float:


class FEReader(ReaderBase):
"""A MDAnalysis Reader for NetCDF files created by
`openmmtools.multistate.MultiStateReporter`
"""
MDAnalysis trajectory reader for NetCDF files written by
``openmmtools.multistate.MultiStateReporter``.

Looks along a multistate NetCDF file along one of two axes:
- constant state/lambda (varying replica)
- constant replica (varying lambda)
Exactly one of ``state_id`` or ``replica_id`` must be specified.
"""

_state_id: Optional[int]
Expand All @@ -75,22 +84,29 @@ def __init__(self, filename, convert_units=True, state_id=None, replica_id=None,
Parameters
----------
filename : pathlike or nc.Dataset
path to the .nc file
Path to a MultiStateReporter NetCDF file, or an already-open
``netCDF4.Dataset`` instance.
convert_units : bool
convert positions to Angstrom
If ``True`` (default), positions are converted to Angstroms.
Otherwise, raw OpenMM units (nanometers) are returned.
state_id : Optional[int]
The Hamiltonian state index to extract. Must be defined if
``replica_id`` is not defined. May be negative (see notes below).
replica_id : Optional[int]
The replica index to extract. Must be defined if ``state_id``
is not defined. May be negative (see notes below).

Raises
------
ValueError
If neither or both of ``state_id`` and ``replica_id`` are specified.

Notes
-----
A negative index may be passed to either ``state_id`` or
``replica_id``. This will be interpreted as indexing in reverse
starting from the last state/replica. For example, passing a
value of -2 for ``replica_id`` will select the before last replica.
starting from the last state/replica. For example, ``replica_id=-2``
will select the before last replica.
"""
if not ((state_id is None) ^ (replica_id is None)):
raise ValueError(
Expand Down Expand Up @@ -141,16 +157,31 @@ def n_frames(self) -> int:

@staticmethod
def parse_n_atoms(filename, **kwargs) -> int:
"""
Determine the number of atoms stored in a MultiStateReporter NetCDF file.

Parameters
----------
filename : path-like
Path to the NetCDF file.

Returns
-------
int
Number of atoms in the system.
"""
with nc.Dataset(filename) as ds:
n_atoms = ds.dimensions["atom"].size
return n_atoms

def _read_next_timestep(self, ts=None) -> Timestep:
# Advance the trajectory by one frame.
if (self._frame_index + 1) >= len(self):
raise EOFError
return self._read_frame(self._frame_index + 1)

def _read_frame(self, frame: int) -> Timestep:
# Read a single trajectory frame.
self._frame_index = frame

if self._state_id is not None:
Expand Down Expand Up @@ -187,11 +218,13 @@ def _read_frame(self, frame: int) -> Timestep:

@property
def dt(self) -> float:
# Time difference between successive trajectory frames.
return self._dt

def _reopen(self):
self._frame_index = -1

def close(self):
# Close the underlying NetCDF dataset if owned by this reader.
if self._dataset_owner:
self._dataset.close()
86 changes: 65 additions & 21 deletions src/openfe_analysis/rmsd.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,46 @@


def make_Universe(top: pathlib.Path, trj: nc.Dataset, state: int) -> mda.Universe:
"""Makes a Universe and applies some transformations
"""
Construct an MDAnalysis Universe from a MultiState NetCDF trajectory
and apply standard analysis transformations.

The Universe is created using the custom ``FEReader`` to extract a
single state from a multistate simulation.

Identifies two AtomGroups:
- protein, defined as having standard amino acid names, then filtered
down to CA
- ligand, defined as resname UNK

Then applies some transformations.
Depending on whether a protein is present, a sequence of trajectory
transformations is applied:

If a protein is present:
- prevents the protein from jumping between periodic images
- moves the ligand to the image closest to the protein
- aligns the entire system to minimise the protein RMSD
(class:`NoJump`)
- moves the ligand to the image closest to the protein (:class:`Minimiser`)
- aligns the entire system to minimise the protein RMSD (:class:`Aligner`)

If only a ligand:
If only a ligand is present:
- prevents the ligand from jumping between periodic images
- Aligns the ligand to minimize its RMSD

Parameters
----------
top : pathlib.Path or Topology
Path to a topology file (e.g. PDB) or an already-loaded MDAnalysis
topology object.
trj : netCDF4.Dataset
Open NetCDF dataset produced by
``openmmtools.multistate.MultiStateReporter``.
state : int
Thermodynamic state index to extract from the multistate trajectory.

Returns
-------
MDAnalysis.Universe
A Universe with trajectory transformations applied.
"""
u = mda.Universe(
top,
Expand Down Expand Up @@ -72,23 +96,37 @@ def make_Universe(top: pathlib.Path, trj: nc.Dataset, state: int) -> mda.Univers
def gather_rms_data(
pdb_topology: pathlib.Path, dataset: pathlib.Path, skip: Optional[int] = None
) -> dict[str, list[float]]:
"""Generate structural analysis of RBFE simulation
"""
Compute structural RMSD-based metrics for a multistate RBFE simulation.

For each thermodynamic state (lambda), this function:
- Loads the trajectory using ``FEReader``
- Applies standard PBC-handling and alignment transformations
- Computes protein and ligand structural metrics over time

The following analyses are produced per state:
- 1D protein CA RMSD time series
- 1D ligand RMSD time series
- Ligand center-of-mass displacement from its initial position
(``ligand_wander``)
- Flattened 2D protein RMSD matrix (pairwise RMSD between frames)

Parameters
----------
pdb_topology : pathlib.Path
path to pdb topology
Path to the PDB file defining system topology.
dataset : pathlib.Path
path to nc trajectory
Path to the NetCDF trajectory file produced by a multistate simulation.
skip : int, optional
step at which to progress through the trajectory. by default, selects a
step that produces roughly 500 frames of analysis per replicate

Produces, for each lambda state:
- 1D protein RMSD timeseries 'protein_RMSD'
- ligand RMSD timeseries
- ligand COM motion 'ligand_wander'
- 2D protein RMSD plot
Frame stride for analysis. If ``None``, a stride is chosen such that
approximately 500 frames are analyzed per state.

Returns
-------
dict[str, list]
Dictionary containing per-state analysis results with keys:
``protein_RMSD``, ``ligand_RMSD``, ``ligand_wander``,
``protein_2D_RMSD``, and ``time(ps)``.
"""
output = {
"protein_RMSD": [],
Expand Down Expand Up @@ -183,19 +221,25 @@ def gather_rms_data(


def twoD_RMSD(positions, w: Optional[npt.NDArray]) -> list[float]:
"""2 dimensions RMSD
"""
Compute a flattened 2D RMSD matrix from a trajectory.

For all unique frame pairs ``(i, j)`` with ``i < j``, this function
computes the RMSD between atomic coordinates after optimal alignment.

Parameters
----------
positions : np.ndarray
the protein positions for the entire trajectory
Atomic coordinates for all frames in the trajectory.
w : np.ndarray, optional
weights array
Per-atom weights to use in the RMSD calculation. If ``None``,
all atoms are weighted equally.

Returns
-------
rmsd_matrix : list
a flattened version of the 2d
list of float
Flattened list of RMSD values corresponding to all frame pairs
``(i, j)`` with ``i < j``.
"""
nframes, _, _ = positions.shape

Expand Down
70 changes: 57 additions & 13 deletions src/openfe_analysis/transformations.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,32 @@


class NoJump(TransformationBase):
"""Stops an AtomGroup from moving more than half a box length between frames

This transformation prevents an AtomGroup "teleporting" across the box
border between two subsequent frames. This then simplifies the calculation
of motion over time.
"""
Prevent an AtomGroup from jumping between periodic images.

This on-the-fly trajectory transformation removes large apparent
center-of-mass displacements caused by periodic boundary conditions.
If the AtomGroup moves by more than half a box length between
consecutive frames, it is translated by an integer number of box
vectors to keep its motion continuous.

The transformation operates in-place on the AtomGroup coordinates
and is intended to be applied before analyses that rely on smooth
time evolution (e.g. RMSD, COM motion).

Parameters
----------
ag : MDAnalysis.AtomGroup
AtomGroup whose center-of-mass motion should be made continuous.

Notes
-----
- This transformation assumes an orthorhombic unit cell.
- Only translations are applied; no rotations or scaling.
- The correction is based on center-of-mass motion and is therefore
most appropriate for compact groups (e.g. proteins, ligands).
- Must be applied before any alignment transformations to avoid
mixing reference frames.
"""

ag: mda.AtomGroup
Expand Down Expand Up @@ -45,11 +66,31 @@ def _transform(self, ts):


class Minimiser(TransformationBase):
"""Minimises the difference from ags to central_ag by choosing image

This transformation will translate any AtomGroup in *ags* in multiples of
the box vectors in order to minimise the distance between the center of mass
to the center of mass of each ag.
"""
Translate AtomGroups to the nearest periodic image relative to a reference.

This transformation shifts one or more AtomGroups by integer multiples
of the simulation box vectors such that their center of mass is as close
as possible to the center of mass of a reference AtomGroup.

It is commonly used to keep ligands in the same periodic image as a
protein during alchemical or replica-exchange simulations.

Parameters
----------
central_ag : MDAnalysis.AtomGroup
Reference AtomGroup whose center of mass defines the target image.
*ags : MDAnalysis.AtomGroup
One or more AtomGroups to be translated into the closest periodic
image relative to ``central_ag``.

Notes
-----
- This transformation assumes an orthorhombic simulation box.
- Translations are applied independently for each AtomGroup.
- Coordinates are modified in-place.
- This transformation does not prevent inter-frame jumps by itself
and is typically used in combination with :class:`NoJump`.
"""

central_ag: mda.AtomGroup
Expand All @@ -74,10 +115,13 @@ def _transform(self, ts):


class Aligner(TransformationBase):
"""On-the-fly transformation to align a trajectory to minimise RMSD
"""
Align a trajectory to a reference AtomGroup by minimizing RMSD.

centers all coordinates onto origin
rotates **entire universe** to minimise rmsd relative to **ref_ag**
This transformation performs an on-the-fly least-squares alignment
of the entire universe to a reference AtomGroup.
At each frame, the coordinates are translated and rotated to minimize the
RMSD of the atoms relative to their positions in the reference.
"""

ref_pos: npt.NDArray
Expand Down
10 changes: 8 additions & 2 deletions src/openfe_analysis/utils/multistate.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ def _determine_position_indices(dataset: nc.Dataset) -> NDArray:
indices : NDArray[int]
An ordered array of iteration indices which hold positions.

Raises
------
ValueError
If positions are not written at a consistent interval.

Note
----
This assumes that the indices are equally spaced by a given
Expand Down Expand Up @@ -55,7 +60,8 @@ def _determine_position_indices(dataset: nc.Dataset) -> NDArray:


def _state_to_replica(dataset: nc.Dataset, state_num: int, frame_num: int) -> int:
"""Convert a state index to replica index at a given Dataset frame
"""
Map a thermodynamic state index to the corresponding replica index.

Parameters
----------
Expand Down Expand Up @@ -170,7 +176,7 @@ def _get_unitcell(
dataset: nc.Dataset, replica_index: int, frame_num: int
) -> Optional[Tuple[unit.Quantity]]:
"""
Helper method to extract a unit cell from the stored
Helper method to extract unit cell dimensions from the stored
box vectors in a MultiState reporter generated NetCDF file
at a given state and Dataset frame.

Expand Down
Loading