From 06a36f0bf1b7573a9c1209fe4027a260a5a034c6 Mon Sep 17 00:00:00 2001
From: harryswift01 <harry.swift@stfc.ac.uk>
Date: Fri, 8 Aug 2025 14:34:42 +0100
Subject: [PATCH 01/16] Implement running average for force/torque covariance
 matrices: - Replaced per-frame overwrite and post-loop sum/division with
 incremental running average updates, matching the approach used in
 `waterEntropy`. - Added `frame_counts` tracking to compute averages on the
 fly without storing all frame data. - Updated `build_covariance_matrices` and
 `update_force_torque_matrices` to initialize averages on first frame and
 update them in place each step. - Ensures final returned matrices are already
 averaged and representative of all processed timesteps.

---
 CodeEntropy/levels.py | 156 ++++++++++++++++++++++++++++++------------
 1 file changed, 114 insertions(+), 42 deletions(-)

diff --git a/CodeEntropy/levels.py b/CodeEntropy/levels.py
index 6c58f2d..741b40b 100644
--- a/CodeEntropy/levels.py
+++ b/CodeEntropy/levels.py
@@ -733,40 +733,58 @@ def build_covariance_matrices(
         number_frames,
     ):
         """
-        Construct force and torque covariance matrices for all molecules and levels.
+        Construct average force and torque covariance matrices for all molecules and
+        entropy levels.
 
-        Parameters:
-            entropy_manager (EntropyManager): Instance of the EntropyManager
-            reduced_atom (Universe): The reduced atom selection.
-            number_molecules (int): Number of molecules in the system.
-            levels (list): List of entropy levels per molecule.
-            start (int): Start frame index.
-            end (int): End frame index.
-            step (int): Step size for frame iteration.
-            number_frames (int): Total number of frames to process.
+        Parameters
+        ----------
+        entropy_manager : EntropyManager
+            Instance of the EntropyManager.
+        reduced_atom : Universe
+            The reduced atom selection.
+        levels : dict
+            Dictionary mapping molecule IDs to lists of entropy levels.
+        groups : dict
+            Dictionary mapping group IDs to lists of molecule IDs.
+        start : int
+            Start frame index.
+        end : int
+            End frame index.
+        step : int
+            Step size for frame iteration.
+        number_frames : int
+            Total number of frames to process.
 
-        Returns:
-            tuple: A tuple containing:
-                - force_matrices (dict): Force covariance matrices by level.
-                - torque_matrices (dict): Torque covariance matrices by level.
+        Returns
+        -------
+        tuple
+            force_avg : dict
+                Averaged force covariance matrices by entropy level.
+            torque_avg : dict
+                Averaged torque covariance matrices by entropy level.
         """
         number_groups = len(groups)
-        force_matrices = {
+
+        force_avg = {
             "ua": {},
             "res": [None] * number_groups,
             "poly": [None] * number_groups,
         }
-        torque_matrices = {
+        torque_avg = {
             "ua": {},
             "res": [None] * number_groups,
             "poly": [None] * number_groups,
         }
+        frame_counts = {
+            "ua": {},
+            "res": np.zeros(number_groups, dtype=int),
+            "poly": np.zeros(number_groups, dtype=int),
+        }
 
         for timestep in reduced_atom.trajectory[start:end:step]:
             time_index = timestep.frame - start
 
-            for group_id in groups.keys():
-                molecules = groups[group_id]
+            for group_id, molecules in groups.items():
                 for mol_id in molecules:
                     mol = entropy_manager._get_molecule_container(reduced_atom, mol_id)
                     for level in levels[mol_id]:
@@ -778,11 +796,12 @@ def build_covariance_matrices(
                             levels[mol_id],
                             time_index,
                             number_frames,
-                            force_matrices,
-                            torque_matrices,
+                            force_avg,
+                            torque_avg,
+                            frame_counts,
                         )
 
-        return force_matrices, torque_matrices
+        return force_avg, torque_avg
 
     def update_force_torque_matrices(
         self,
@@ -793,22 +812,55 @@ def update_force_torque_matrices(
         level_list,
         time_index,
         num_frames,
-        force_matrices,
-        torque_matrices,
+        force_avg,
+        torque_avg,
+        frame_counts,
     ):
         """
-        Update force and torque matrices for a given molecule and entropy level.
+        Update the running averages of force and torque covariance matrices
+        for a given molecule and entropy level.
 
-        Parameters:
-            entropy_manager (EntropyManager): Instance of the EntropyManager
-            mol (AtomGroup): The molecule to process.
-            group_id (int): Index of the group.
-            level (str): Current entropy level ("united_atom", "residue", or "polymer").
-            level_list (list): List of levels for the molecule.
-            time_index (int): Index of the current frame.
-            num_frames (int): Total number of frames.
-            force_matrices (dict): Dictionary of force matrices to update.
-            torque_matrices (dict): Dictionary of torque matrices to update.
+        This function computes the force and torque covariance matrices for the
+        current frame and updates the existing averages in-place using the incremental
+        mean formula:
+
+            new_avg = old_avg + (value - old_avg) / n
+
+        where n is the number of frames processed so far for that molecule/level
+        combination. This ensures that the averages are maintained without storing
+        all previous frame data.
+
+        Parameters
+        ----------
+        entropy_manager : EntropyManager
+            Instance of the EntropyManager.
+        mol : AtomGroup
+            The molecule to process.
+        group_id : int
+            Index of the group to which the molecule belongs.
+        level : str
+            Current entropy level ("united_atom", "residue", or "polymer").
+        level_list : list
+            List of entropy levels for the molecule.
+        time_index : int
+            Index of the current frame relative to the start of the trajectory slice.
+        num_frames : int
+            Total number of frames to process.
+        force_avg : dict
+            Dictionary holding the running average force matrices, keyed by entropy
+            level.
+        torque_avg : dict
+            Dictionary holding the running average torque matrices, keyed by entropy
+            level.
+        frame_counts : dict
+            Dictionary holding the count of frames processed for each molecule/level
+            combination.
+
+        Returns
+        -------
+        None
+            Updates are performed in-place on `force_avg`, `torque_avg`, and
+            `frame_counts`.
         """
         highest = level == level_list[-1]
 
@@ -825,11 +877,19 @@ def update_force_torque_matrices(
                     level,
                     num_frames,
                     highest,
-                    force_matrices["ua"].get(key),
-                    torque_matrices["ua"].get(key),
+                    None if key not in force_avg["ua"] else force_avg["ua"][key],
+                    None if key not in torque_avg["ua"] else torque_avg["ua"][key],
                 )
-                force_matrices["ua"][key] = f_mat
-                torque_matrices["ua"][key] = t_mat
+
+                if key not in force_avg["ua"]:
+                    force_avg["ua"][key] = f_mat.copy()
+                    torque_avg["ua"][key] = t_mat.copy()
+                    frame_counts["ua"][key] = 1
+                else:
+                    frame_counts["ua"][key] += 1
+                    n = frame_counts["ua"][key]
+                    force_avg["ua"][key] += (f_mat - force_avg["ua"][key]) / n
+                    torque_avg["ua"][key] += (t_mat - torque_avg["ua"][key]) / n
 
         elif level in ["residue", "polymer"]:
             mol.trajectory[time_index]
@@ -839,11 +899,23 @@ def update_force_torque_matrices(
                 level,
                 num_frames,
                 highest,
-                force_matrices[key][group_id],
-                torque_matrices[key][group_id],
+                None if force_avg[key][group_id] is None else force_avg[key][group_id],
+                (
+                    None
+                    if torque_avg[key][group_id] is None
+                    else torque_avg[key][group_id]
+                ),
             )
-            force_matrices[key][group_id] = f_mat
-            torque_matrices[key][group_id] = t_mat
+
+            if force_avg[key][group_id] is None:
+                force_avg[key][group_id] = f_mat.copy()
+                torque_avg[key][group_id] = t_mat.copy()
+                frame_counts[key][group_id] = 1
+            else:
+                frame_counts[key][group_id] += 1
+                n = frame_counts[key][group_id]
+                force_avg[key][group_id] += (f_mat - force_avg[key][group_id]) / n
+                torque_avg[key][group_id] += (t_mat - torque_avg[key][group_id]) / n
 
     def filter_zero_rows_columns(self, arg_matrix):
         """

From 1497258e99221f6956691489f9a9adabaddccaf1 Mon Sep 17 00:00:00 2001
From: harryswift01 <harry.swift@stfc.ac.uk>
Date: Mon, 11 Aug 2025 15:46:49 +0100
Subject: [PATCH 02/16] Refactor matrix averaging logic for modularity: -
 Removed division by `n_frames` in `entropy.py` to retain running averages -
 Replaced `+=` with `=` in `get_matrices` to avoid accumulation across frames
 - Simplified force and torque matrix updates in `levels.py` for consistency

---
 CodeEntropy/entropy.py | 4 ----
 CodeEntropy/levels.py  | 4 ++--
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/CodeEntropy/entropy.py b/CodeEntropy/entropy.py
index 9451bb3..9296bc5 100644
--- a/CodeEntropy/entropy.py
+++ b/CodeEntropy/entropy.py
@@ -343,11 +343,9 @@ def _process_united_atom_entropy(
 
             f_matrix = force_matrix[key]
             f_matrix = self._level_manager.filter_zero_rows_columns(f_matrix)
-            f_matrix = f_matrix / number_frames
 
             t_matrix = torque_matrix[key]
             t_matrix = self._level_manager.filter_zero_rows_columns(t_matrix)
-            t_matrix = t_matrix / number_frames
 
             S_trans_res = ve.vibrational_entropy_calculation(
                 f_matrix, "force", self._args.temperature, highest
@@ -395,10 +393,8 @@ def _process_vibrational_entropy(
             level.
         """
         force_matrix = self._level_manager.filter_zero_rows_columns(force_matrix)
-        force_matrix = force_matrix / number_frames
 
         torque_matrix = self._level_manager.filter_zero_rows_columns(torque_matrix)
-        torque_matrix = torque_matrix / number_frames
 
         S_trans = ve.vibrational_entropy_calculation(
             force_matrix, "force", self._args.temperature, highest
diff --git a/CodeEntropy/levels.py b/CodeEntropy/levels.py
index 741b40b..ab28ec5 100644
--- a/CodeEntropy/levels.py
+++ b/CodeEntropy/levels.py
@@ -171,7 +171,7 @@ def get_matrices(
                 f"{force_matrix.shape}, new {force_block.shape}"
             )
         else:
-            force_matrix += force_block
+            force_matrix = force_block
 
         if torque_matrix is None:
             torque_matrix = np.zeros_like(torque_block)
@@ -181,7 +181,7 @@ def get_matrices(
                 f"{torque_matrix.shape}, new {torque_block.shape}"
             )
         else:
-            torque_matrix += torque_block
+            torque_matrix = torque_block
 
         return force_matrix, torque_matrix
 

From d3c85d0054cc42a8fd9dc2272996d45fe86f2b6a Mon Sep 17 00:00:00 2001
From: harryswift01 <harry.swift@stfc.ac.uk>
Date: Mon, 11 Aug 2025 16:09:36 +0100
Subject: [PATCH 03/16] refactor: warn on negative eigenvalues instead of
 halting with error

---
 CodeEntropy/entropy.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/CodeEntropy/entropy.py b/CodeEntropy/entropy.py
index 9296bc5..6030402 100644
--- a/CodeEntropy/entropy.py
+++ b/CodeEntropy/entropy.py
@@ -613,12 +613,9 @@ def frequency_calculation(self, lambdas, temp):
         lambdas = np.array(lambdas)  # Ensure input is a NumPy array
         logger.debug(f"Eigenvalues (lambdas): {lambdas}")
 
-        # Check for negatives and raise an error if any are found
-        if np.any(lambdas < 0):
-            logger.error(f"Negative eigenvalues encountered: {lambdas[lambdas < 0]}")
-            raise ValueError(
-                f"Negative eigenvalues encountered: {lambdas[lambdas < 0]}"
-            )
+        # Check for negatives and raise a warning if any are found
+        if np.any(lambdas <= 0):
+            logger.warning(f"Negative eigenvalues encountered: {lambdas[lambdas <= 0]}")
 
         # Compute frequencies safely
         frequencies = 1 / (2 * pi) * np.sqrt(lambdas / kT)

From b516673128e6d5e10726ba49765b2ef3a302a850 Mon Sep 17 00:00:00 2001
From: harryswift01 <harry.swift@stfc.ac.uk>
Date: Tue, 12 Aug 2025 09:12:52 +0100
Subject: [PATCH 04/16] Filter out the negative eigenvalues in the frequencies
 calculations and set them to 0

---
 CodeEntropy/entropy.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CodeEntropy/entropy.py b/CodeEntropy/entropy.py
index 6030402..2843f70 100644
--- a/CodeEntropy/entropy.py
+++ b/CodeEntropy/entropy.py
@@ -616,6 +616,7 @@ def frequency_calculation(self, lambdas, temp):
         # Check for negatives and raise a warning if any are found
         if np.any(lambdas <= 0):
             logger.warning(f"Negative eigenvalues encountered: {lambdas[lambdas <= 0]}")
+            lambdas[lambdas <= 0] = 0
 
         # Compute frequencies safely
         frequencies = 1 / (2 * pi) * np.sqrt(lambdas / kT)

From 2e8d34f653291d9fc82c2d398bbf1383ab23a15a Mon Sep 17 00:00:00 2001
From: harryswift01 <harry.swift@stfc.ac.uk>
Date: Wed, 13 Aug 2025 09:44:19 +0100
Subject: [PATCH 05/16] Filter and clean eigenvalues: retain only real,
 strictly positive values: - Added logging for complex and non-positive
 eigenvalues with index and value - Applied mask to filter out invalid values
 using np.isreal, np.isclose, and positivity check - Converted filtered values
 to pure real numbers using .real

---
 CodeEntropy/entropy.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/CodeEntropy/entropy.py b/CodeEntropy/entropy.py
index 2843f70..72710e3 100644
--- a/CodeEntropy/entropy.py
+++ b/CodeEntropy/entropy.py
@@ -613,10 +613,19 @@ def frequency_calculation(self, lambdas, temp):
         lambdas = np.array(lambdas)  # Ensure input is a NumPy array
         logger.debug(f"Eigenvalues (lambdas): {lambdas}")
 
-        # Check for negatives and raise a warning if any are found
-        if np.any(lambdas <= 0):
-            logger.warning(f"Negative eigenvalues encountered: {lambdas[lambdas <= 0]}")
-            lambdas[lambdas <= 0] = 0
+        for idx, val in enumerate(lambdas):
+            if not np.isreal(val):
+                logger.warning(f"[Index {idx}] Complex eigenvalue excluded: {val}")
+            elif val <= 0 or np.isclose(val, 0, atol=1e-07):
+                logger.warning(
+                    f"[Index {idx}] "
+                    f"Non-positive or near-zero eigenvalue excluded: {val}"
+                )
+
+        valid_mask = (
+            np.isreal(lambdas) & (lambdas > 0) & (~np.isclose(lambdas, 0, atol=1e-07))
+        )
+        lambdas = lambdas[valid_mask].real
 
         # Compute frequencies safely
         frequencies = 1 / (2 * pi) * np.sqrt(lambdas / kT)

From 81bc1ef2a1b215f9575ea52da13d04e1327d7122 Mon Sep 17 00:00:00 2001
From: harryswift01 <harry.swift@stfc.ac.uk>
Date: Wed, 13 Aug 2025 10:24:29 +0100
Subject: [PATCH 06/16] Optimise eigenvalue filtering: single-pass mask with
 summary warning: - Replaced per-element logging with a single summary warning
 based on mask length - Improved performance and readability by avoiding
 redundant loops

---
 CodeEntropy/entropy.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/CodeEntropy/entropy.py b/CodeEntropy/entropy.py
index 72710e3..ebe87ae 100644
--- a/CodeEntropy/entropy.py
+++ b/CodeEntropy/entropy.py
@@ -613,18 +613,17 @@ def frequency_calculation(self, lambdas, temp):
         lambdas = np.array(lambdas)  # Ensure input is a NumPy array
         logger.debug(f"Eigenvalues (lambdas): {lambdas}")
 
-        for idx, val in enumerate(lambdas):
-            if not np.isreal(val):
-                logger.warning(f"[Index {idx}] Complex eigenvalue excluded: {val}")
-            elif val <= 0 or np.isclose(val, 0, atol=1e-07):
-                logger.warning(
-                    f"[Index {idx}] "
-                    f"Non-positive or near-zero eigenvalue excluded: {val}"
-                )
-
+        lambdas = np.real_if_close(lambdas, tol=1000)
         valid_mask = (
             np.isreal(lambdas) & (lambdas > 0) & (~np.isclose(lambdas, 0, atol=1e-07))
         )
+
+        if len(lambdas) > np.count_nonzero(valid_mask):
+            logger.warning(
+                f"{len(lambdas) - np.count_nonzero(valid_mask)} "
+                f"invalid eigenvalues excluded (complex, non-positive, or near-zero)."
+            )
+
         lambdas = lambdas[valid_mask].real
 
         # Compute frequencies safely

From 4eb9e940109529d082456c0d026ed58f27dcc7f2 Mon Sep 17 00:00:00 2001
From: harryswift01 <harry.swift@stfc.ac.uk>
Date: Wed, 13 Aug 2025 16:19:53 +0100
Subject: [PATCH 07/16] Refactor trajectory iteration to use explicit index
 pairing via zip(): - Replaced direct iteration over trajectory slices with
 `zip(index, frame)` - Ensures consistent indexing and avoids skipping final
 frame

---
 CodeEntropy/levels.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/CodeEntropy/levels.py b/CodeEntropy/levels.py
index ab28ec5..889845f 100644
--- a/CodeEntropy/levels.py
+++ b/CodeEntropy/levels.py
@@ -781,8 +781,8 @@ def build_covariance_matrices(
             "poly": np.zeros(number_groups, dtype=int),
         }
 
-        for timestep in reduced_atom.trajectory[start:end:step]:
-            time_index = timestep.frame - start
+        indices = list(range(start, end, step))
+        for time_index, _ in zip(indices, reduced_atom.trajectory[start:end:step]):
 
             for group_id, molecules in groups.items():
                 for mol_id in molecules:
@@ -794,7 +794,7 @@ def build_covariance_matrices(
                             group_id,
                             level,
                             levels[mol_id],
-                            time_index,
+                            time_index - start,
                             number_frames,
                             force_avg,
                             torque_avg,

From 8575847353c356ec508e7be69eafdcd42cdbe7a0 Mon Sep 17 00:00:00 2001
From: harryswift01 <harry.swift@stfc.ac.uk>
Date: Wed, 13 Aug 2025 16:26:52 +0100
Subject: [PATCH 08/16] Refactor trajectory iteration to use explicit index
 pairing via zip() for `assign_conformation`: - Replaced direct iteration over
 trajectory slices with `zip(index, frame)` - Ensures consistent indexing and
 avoids skipping final frame

---
 CodeEntropy/entropy.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/CodeEntropy/entropy.py b/CodeEntropy/entropy.py
index ebe87ae..f708b08 100644
--- a/CodeEntropy/entropy.py
+++ b/CodeEntropy/entropy.py
@@ -748,8 +748,11 @@ def assign_conformation(
 
         # get the values of the angle for the dihedral
         # dihedral angle values have a range from -180 to 180
-        for timestep in data_container.trajectory[start:end:step]:
-            timestep_index = timestep.frame - start
+        indices = list(range(start, end, step))
+        for timestep_index, _ in zip(
+            indices, data_container.trajectory[start:end:step]
+        ):
+            timestep_index = timestep_index - start
             value = dihedral.value()
             # we want postive values in range 0 to 360 to make the peak assignment
             # work using the fact that dihedrals have circular symetry

From 65237a6e2dcdc4fc18a39d6c79129bdd2fa90ad5 Mon Sep 17 00:00:00 2001
From: harryswift01 <harry.swift@stfc.ac.uk>
Date: Thu, 14 Aug 2025 12:21:52 +0100
Subject: [PATCH 09/16] Refactor dihedral computation to skip conformation
 assignment when no dihedrals are present: - Added check to avoid calling
 `assign_conformation` if `get_dihedrals` returns an empty list - Prevented
 unnecessary allocation of empty `conformation` arrays - Ensured `states` are
 only generated when valid dihedrals exist -
 `conformational_entropy_calculation` is only called if the `state` is not
 empty

---
 CodeEntropy/entropy.py | 16 ++++++++++++----
 CodeEntropy/levels.py  | 29 +++++++++++++++++++----------
 2 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/CodeEntropy/entropy.py b/CodeEntropy/entropy.py
index f708b08..c143e25 100644
--- a/CodeEntropy/entropy.py
+++ b/CodeEntropy/entropy.py
@@ -354,9 +354,12 @@ def _process_united_atom_entropy(
                 t_matrix, "torque", self._args.temperature, highest
             )
 
-            S_conf_res = ce.conformational_entropy_calculation(
-                states[key], number_frames
-            )
+            if any(states[key]):
+                S_conf_res = ce.conformational_entropy_calculation(
+                    states[key], number_frames
+                )
+            else:
+                S_conf_res = 0
 
             S_trans += S_trans_res
             S_rot += S_rot_res
@@ -421,7 +424,12 @@ def _process_conformational_entropy(
             start, end, step (int): Frame bounds.
             n_frames (int): Number of frames used.
         """
-        S_conf = ce.conformational_entropy_calculation(states[group_id], number_frames)
+        if states[group_id]:
+            S_conf = ce.conformational_entropy_calculation(
+                states[group_id], number_frames
+            )
+        else:
+            S_conf = 0
 
         self._data_logger.add_results_data(group_id, level, "Conformational", S_conf)
 
diff --git a/CodeEntropy/levels.py b/CodeEntropy/levels.py
index 889845f..aa06c40 100644
--- a/CodeEntropy/levels.py
+++ b/CodeEntropy/levels.py
@@ -290,18 +290,27 @@ def compute_dihedral_conformations(
                 - dihedrals (list): List of dihedral angle definitions.
         """
         dihedrals = self.get_dihedrals(selector, level)
-        num_dihedrals = len(dihedrals)
 
-        conformation = np.zeros((num_dihedrals, number_frames))
-        for i, dihedral in enumerate(dihedrals):
-            conformation[i] = ce.assign_conformation(
-                selector, dihedral, number_frames, bin_width, start, end, step
-            )
+        if len(dihedrals) == 0:
+            logger.debug("No dihedrals found; skipping conformation assignment.")
+            states = []
+        else:
+            num_dihedrals = len(dihedrals)
+            conformation = np.zeros((num_dihedrals, number_frames))
 
-        states = [
-            "".join(str(int(conformation[d][f])) for d in range(num_dihedrals))
-            for f in range(number_frames)
-        ]
+            for i, dihedral in enumerate(dihedrals):
+                conformation[i] = ce.assign_conformation(
+                    selector, dihedral, number_frames, bin_width, start, end, step
+                )
+
+            states = [
+                state
+                for state in (
+                    "".join(str(int(conformation[d][f])) for d in range(num_dihedrals))
+                    for f in range(number_frames)
+                )
+                if state
+            ]
 
         return states
 

From 206a3c32de46951c82412f4049d90cf4eecf6332 Mon Sep 17 00:00:00 2001
From: harryswift01 <harry.swift@stfc.ac.uk>
Date: Thu, 14 Aug 2025 13:56:36 +0100
Subject: [PATCH 10/16] Refactor entropy computation to handle missing and
 empty state data safely: - Replaced ambiguous truth checks with type-safe
 logic for lists and NumPy arrays - Added guard against NoneType access for
 `states[group_id]` to prevent iteration errors

---
 CodeEntropy/entropy.py | 35 +++++++++++++++++++++++++----------
 1 file changed, 25 insertions(+), 10 deletions(-)

diff --git a/CodeEntropy/entropy.py b/CodeEntropy/entropy.py
index 266bc1b..80b15ca 100644
--- a/CodeEntropy/entropy.py
+++ b/CodeEntropy/entropy.py
@@ -354,12 +354,17 @@ def _process_united_atom_entropy(
                 t_matrix, "torque", self._args.temperature, highest
             )
 
-            if any(states[key]):
-                S_conf_res = ce.conformational_entropy_calculation(
-                    states[key], number_frames
-                )
-            else:
-                S_conf_res = 0
+            values = states[key]
+
+            contains_non_empty_states = (
+                np.any(values) if isinstance(values, np.ndarray) else any(values)
+            )
+
+            S_conf_res = (
+                ce.conformational_entropy_calculation(values, number_frames)
+                if contains_non_empty_states
+                else 0
+            )
 
             S_trans += S_trans_res
             S_rot += S_rot_res
@@ -424,12 +429,22 @@ def _process_conformational_entropy(
             start, end, step (int): Frame bounds.
             n_frames (int): Number of frames used.
         """
-        if states[group_id]:
-            S_conf = ce.conformational_entropy_calculation(
-                states[group_id], number_frames
+        group_states = states[group_id] if group_id < len(states) else None
+
+        if group_states is not None:
+            contains_state_data = (
+                group_states.any()
+                if isinstance(group_states, np.ndarray)
+                else any(group_states)
             )
         else:
-            S_conf = 0
+            contains_state_data = False
+
+        S_conf = (
+            ce.conformational_entropy_calculation(group_states, number_frames)
+            if contains_state_data
+            else 0
+        )
 
         self._data_logger.add_results_data(group_id, level, "Conformational", S_conf)
 

From 9d0c8a3c005cc6b3e2600fc75fe19b4d8e2f6c65 Mon Sep 17 00:00:00 2001
From: harryswift01 <harry.swift@stfc.ac.uk>
Date: Thu, 14 Aug 2025 15:45:15 +0100
Subject: [PATCH 11/16] fix and update unit tests to reflect changes made
 within this refactor

---
 tests/test_CodeEntropy/test_entropy.py | 71 +++++++++++++++++++++-----
 tests/test_CodeEntropy/test_levels.py  | 26 ++++++++++
 tests/test_CodeEntropy/test_main.py    |  2 +-
 3 files changed, 85 insertions(+), 14 deletions(-)

diff --git a/tests/test_CodeEntropy/test_entropy.py b/tests/test_CodeEntropy/test_entropy.py
index 3d82a32..ff663d4 100644
--- a/tests/test_CodeEntropy/test_entropy.py
+++ b/tests/test_CodeEntropy/test_entropy.py
@@ -805,7 +805,7 @@ def test_frequency_calculation_0(self):
 
         Ensures that the method returns 0 when the input eigenvalue (lambda) is zero.
         """
-        lambdas = 0
+        lambdas = [0]
         temp = 298
 
         run_manager = RunManager("mock_folder")
@@ -815,7 +815,7 @@ def test_frequency_calculation_0(self):
         )
         frequencies = ve.frequency_calculation(lambdas, temp)
 
-        assert frequencies == 0
+        assert np.allclose(frequencies, [0.0])
 
     def test_frequency_calculation_positive(self):
         """
@@ -842,30 +842,75 @@ def test_frequency_calculation_positive(self):
             [1899594266400.4016, 2013894687315.6213, 2195940987139.7097]
         )
 
-    def test_frequency_calculation_negative(self):
+    def test_frequency_calculation_filters_invalid(self):
         """
-        Test `frequency_calculation` with a negative eigenvalue.
+        Test `frequency_calculation` filters out invalid eigenvalues.
 
-        Ensures that the method raises a `ValueError` when any eigenvalue is negative,
-        as this is physically invalid for frequency calculations.
+        Ensures that negative, complex, and near-zero eigenvalues are excluded,
+        and frequencies are calculated only for valid ones.
         """
-        lambdas = np.array([585495.0917897299, -658074.5130064893, 782425.305888707])
+        lambdas = np.array(
+            [585495.0917897299, -658074.5130064893, 0.0, 782425.305888707]
+        )
         temp = 298
 
         # Create a mock RunManager and set return value for get_KT2J
-        run_manager = RunManager("temp_folder")
-        run_manager.get_KT2J
+        run_manager = MagicMock()
+        run_manager.get_KT2J.return_value = 2.479e-21  # example value in Joules
 
         # Instantiate VibrationalEntropy with mocks
         ve = VibrationalEntropy(
             run_manager, MagicMock(), MagicMock(), MagicMock(), MagicMock(), MagicMock()
         )
 
-        # Assert that ValueError is raised due to negative eigenvalue
-        with self.assertRaises(ValueError) as context:
-            ve.frequency_calculation(lambdas, temp)
+        # Call the method
+        frequencies = ve.frequency_calculation(lambdas, temp)
+
+        # Expected: only two valid eigenvalues used
+        expected_lambdas = np.array([585495.0917897299, 782425.305888707])
+        expected_frequencies = (
+            1
+            / (2 * np.pi)
+            * np.sqrt(expected_lambdas / run_manager.get_KT2J.return_value)
+        )
+
+        # Assert frequencies match expected
+        np.testing.assert_allclose(frequencies, expected_frequencies, rtol=1e-5)
+
+    def test_frequency_calculation_filters_invalid_with_warning(self):
+        """
+        Test `frequency_calculation` filters out invalid eigenvalues and logs a warning.
 
-        self.assertIn("Negative eigenvalues", str(context.exception))
+        Ensures that negative, complex, and near-zero eigenvalues are excluded,
+        and a warning is logged about the exclusions.
+        """
+        lambdas = np.array(
+            [585495.0917897299, -658074.5130064893, 0.0, 782425.305888707]
+        )
+        temp = 298
+
+        run_manager = MagicMock()
+        run_manager.get_KT2J.return_value = 2.479e-21  # example value
+
+        ve = VibrationalEntropy(
+            run_manager, MagicMock(), MagicMock(), MagicMock(), MagicMock(), MagicMock()
+        )
+
+        with self.assertLogs("CodeEntropy.entropy", level="WARNING") as cm:
+            frequencies = ve.frequency_calculation(lambdas, temp)
+
+        # Check that warning was logged
+        warning_messages = "\n".join(cm.output)
+        self.assertIn("invalid eigenvalues excluded", warning_messages)
+
+        # Check that only valid frequencies are returned
+        expected_lambdas = np.array([585495.0917897299, 782425.305888707])
+        expected_frequencies = (
+            1
+            / (2 * np.pi)
+            * np.sqrt(expected_lambdas / run_manager.get_KT2J.return_value)
+        )
+        np.testing.assert_allclose(frequencies, expected_frequencies, rtol=1e-5)
 
     def test_vibrational_entropy_calculation_force_not_highest(self):
         """
diff --git a/tests/test_CodeEntropy/test_levels.py b/tests/test_CodeEntropy/test_levels.py
index 734c7ef..673d6e8 100644
--- a/tests/test_CodeEntropy/test_levels.py
+++ b/tests/test_CodeEntropy/test_levels.py
@@ -257,6 +257,32 @@ def test_get_dihedrals_no_residue(self):
         # Should result in no resdies
         self.assertEqual(result, [])
 
+    def test_compute_dihedral_conformations_no_dihedrals(self):
+        """
+        Test `compute_dihedral_conformations` when no dihedrals are found.
+        Ensures it returns an empty list of states.
+        """
+        level_manager = LevelManager()
+
+        # Mock get_dihedrals to return an empty list
+        level_manager.get_dihedrals = MagicMock(return_value=[])
+
+        selector = MagicMock()
+
+        result = level_manager.compute_dihedral_conformations(
+            selector=selector,
+            level="united_atom",
+            number_frames=10,
+            bin_width=10.0,
+            start=0,
+            end=10,
+            step=1,
+            ce=MagicMock(),
+        )
+
+        # Match current behavior: returns only states (empty list)
+        self.assertEqual(result, [])
+
     def test_get_beads_polymer_level(self):
         """
         Test `get_beads` for 'polymer' level.
diff --git a/tests/test_CodeEntropy/test_main.py b/tests/test_CodeEntropy/test_main.py
index bee18f0..9a16f35 100644
--- a/tests/test_CodeEntropy/test_main.py
+++ b/tests/test_CodeEntropy/test_main.py
@@ -105,7 +105,7 @@ def test_main_entry_point_runs(self):
 
         config_path = os.path.join(self.test_dir, "config.yaml")
         with open(config_path, "w") as f:
-            f.write("run1:\n" "  selection_string: resid 1\n")
+            f.write("run1:\n" "  end: 60\n" "  selection_string: resid 1\n")
 
         result = subprocess.run(
             [

From 5863bdf5f10e3403f095ef9285a4231c3a1454bb Mon Sep 17 00:00:00 2001
From: harryswift01 <harry.swift@stfc.ac.uk>
Date: Thu, 14 Aug 2025 15:48:12 +0100
Subject: [PATCH 12/16] tidy up comments within the unit tests

---
 tests/test_CodeEntropy/test_levels.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/test_CodeEntropy/test_levels.py b/tests/test_CodeEntropy/test_levels.py
index 673d6e8..2066592 100644
--- a/tests/test_CodeEntropy/test_levels.py
+++ b/tests/test_CodeEntropy/test_levels.py
@@ -264,7 +264,6 @@ def test_compute_dihedral_conformations_no_dihedrals(self):
         """
         level_manager = LevelManager()
 
-        # Mock get_dihedrals to return an empty list
         level_manager.get_dihedrals = MagicMock(return_value=[])
 
         selector = MagicMock()
@@ -280,7 +279,6 @@ def test_compute_dihedral_conformations_no_dihedrals(self):
             ce=MagicMock(),
         )
 
-        # Match current behavior: returns only states (empty list)
         self.assertEqual(result, [])
 
     def test_get_beads_polymer_level(self):

From 927d1aae3deaa820d3c6d97f7ac5c52258e78dbe Mon Sep 17 00:00:00 2001
From: harryswift01 <harry.swift@stfc.ac.uk>
Date: Thu, 14 Aug 2025 15:49:56 +0100
Subject: [PATCH 13/16] change the default grouping configuration to group by
 `molecules` rather than `each`

---
 CodeEntropy/config/arg_config_manager.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CodeEntropy/config/arg_config_manager.py b/CodeEntropy/config/arg_config_manager.py
index 0d809ed..5f62832 100644
--- a/CodeEntropy/config/arg_config_manager.py
+++ b/CodeEntropy/config/arg_config_manager.py
@@ -67,7 +67,7 @@
     "grouping": {
         "type": str,
         "help": "How to group molecules for averaging",
-        "default": "each",
+        "default": "molecules",
     },
 }
 

From 9ea4806cb70b7637f277dc41f4db5602b84d3553 Mon Sep 17 00:00:00 2001
From: harryswift01 <harry.swift@stfc.ac.uk>
Date: Fri, 15 Aug 2025 11:08:17 +0100
Subject: [PATCH 14/16] Fix trajectory bounds logic in `_get_trajectory_bounds`
 to correctly interpret -1 as the last frame

---
 CodeEntropy/entropy.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/CodeEntropy/entropy.py b/CodeEntropy/entropy.py
index 80b15ca..967dc40 100644
--- a/CodeEntropy/entropy.py
+++ b/CodeEntropy/entropy.py
@@ -247,7 +247,11 @@ def _get_trajectory_bounds(self):
             Tuple of (start, end, step) frame indices.
         """
         start = self._args.start or 0
-        end = self._args.end or -1
+        end = (
+            len(self._universe.trajectory) - 1
+            if self._args.end == -1
+            else self._args.end
+        )
         step = self._args.step or 1
 
         return start, end, step

From 35912a38d9dfa440aab4863a4ff415c05cb684bf Mon Sep 17 00:00:00 2001
From: harryswift01 <harry.swift@stfc.ac.uk>
Date: Fri, 15 Aug 2025 11:19:42 +0100
Subject: [PATCH 15/16] Fix trajectory end index logic to ensure all frames are
 analyzed when end is -1

---
 CodeEntropy/entropy.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/CodeEntropy/entropy.py b/CodeEntropy/entropy.py
index 967dc40..e3bf58e 100644
--- a/CodeEntropy/entropy.py
+++ b/CodeEntropy/entropy.py
@@ -247,11 +247,7 @@ def _get_trajectory_bounds(self):
             Tuple of (start, end, step) frame indices.
         """
         start = self._args.start or 0
-        end = (
-            len(self._universe.trajectory) - 1
-            if self._args.end == -1
-            else self._args.end
-        )
+        end = len(self._universe.trajectory) if self._args.end == -1 else self._args.end
         step = self._args.step or 1
 
         return start, end, step

From 700f6bb864b5ad1d997ac578788475dea0becb22 Mon Sep 17 00:00:00 2001
From: harryswift01 <harry.swift@stfc.ac.uk>
Date: Fri, 15 Aug 2025 11:27:30 +0100
Subject: [PATCH 16/16] fix `test_get_trajectory_bounds` test to match new
 logic in `_get_trajectory_bounds`

---
 tests/test_CodeEntropy/test_entropy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_CodeEntropy/test_entropy.py b/tests/test_CodeEntropy/test_entropy.py
index ff663d4..a5b6455 100644
--- a/tests/test_CodeEntropy/test_entropy.py
+++ b/tests/test_CodeEntropy/test_entropy.py
@@ -222,7 +222,7 @@ def test_get_trajectory_bounds(self):
         self.assertIsInstance(entropy_manager._args.end, int)
         self.assertIsInstance(entropy_manager._args.step, int)
 
-        self.assertEqual(entropy_manager._get_trajectory_bounds(), (0, -1, 1))
+        self.assertEqual(entropy_manager._get_trajectory_bounds(), (0, 0, 1))
 
     @patch(
         "argparse.ArgumentParser.parse_args",