From 0f8b9767c7332f47e561d83b8af89ef619b4ccd4 Mon Sep 17 00:00:00 2001 From: skfegan Date: Thu, 3 Jul 2025 17:00:24 +0100 Subject: [PATCH 01/23] Switching loops to loop over timesteps first and then molecules when calculating the covariance matrices and separating the entropy calculation from the loop that makes the matrices. Part 1. --- CodeEntropy/entropy.py | 214 ++++++++++++++++++++--------------------- CodeEntropy/levels.py | 54 +++++------ 2 files changed, 131 insertions(+), 137 deletions(-) diff --git a/CodeEntropy/entropy.py b/CodeEntropy/entropy.py index 21a7e62..a44261a 100644 --- a/CodeEntropy/entropy.py +++ b/CodeEntropy/entropy.py @@ -63,6 +63,7 @@ def execute(self): reduced_atom = self._get_reduced_universe() number_molecules, levels = self._level_manager.select_levels(reduced_atom) + number_residues = len(reduced_atom.residues) ve = VibrationalEntropy( self._run_manager, @@ -79,86 +80,102 @@ def execute(self): self._level_manager, ) - for molecule_id in range(number_molecules): - mol_container = self._get_molecule_container(reduced_atom, molecule_id) - - for level in levels[molecule_id]: + # Initialise force and torque matrices + force_matrix_ua = [[] for _ in range(number_residues)] + force_matrix_res = [[] for _ in range(number_molecules)] + force_matrix_poly = [[] for _ in range(number_molecules)] + torque_matrix_ua = [[] for _ in range(number_residues)] + torque_matrix_res = [[] for _ in range(number_molecules)] + torque_matrix_poly = [[] for _ in range(number_molecules)] + + for timestep in reduced_atom.trajectory[start:end:step]: + time_index = timestep.frame - start + + for molecule_id in range(number_molecules): + mol_container = self._get_molecule_container(reduced_atom, molecule_id) + + for level in levels[molecule_id]: + # Identify if level is the highest (molecule) level + highest_level = level == levels[molecule_id][-1] + + # Get matrices for vibrational entropy calculations + if level == "united_atom": + for res_id, residue in enumerate(mol_container.residues): + res_container = self._run_manager.new_U_select_atom( + mol_container, + f"index {residue.atoms.indices[0]}:{residue.atoms.indices[-1]}", + ) + + force_matrix, torque_matrix = self._level_manager.get_matrices( + res_container,level, number_frames, + highest_level, force_matrix_ua[res_id], + torque_matrix_ua[res_id] + ) + force_matrix_ua[res_id] = force_matrix + torque_matrix_ua[res_id] = torque_matrix + + elif level == "residue": + force_matrix, torque_matrix = self._level_manager.get_matrices(mol_container,level,number_frames,highest_level, force_matrix_res[molecule_id],torque_matrix_res[molecule_id]) + force_matrix_res[molecule_id] = force_matrix + torque_matrix_res[molecule_id] = torque_matrix + + elif level =="polymer": + force_matrix, torque_matrix = self._level_manager.get_matrices(mol_container,level,number_frames,highest_level, force_matrix_poly[molecule_id],torque_matrix_poly[molecule_id]) + force_matrix_poly[molecule_id] = force_matrix + torque_matrix_poly[molecule_id] = torque_matrix + + #TODO When function is ready + # Get environment information for orientational entropy + ## if highest_level: + ## number_neighbours = get_neighbours() + + # Get states for conformational entropy calculation + bin_width = self._args.bin_width + for molecule_id in range(number_molecule): + mol_container = self._get_molecule_container(reduced_atom, molecule_id) + for level in levels[molecule_id]: + if level == "united_atom": + for res_id, residue in enumerate(mol_container.residues): + res_container = self._run_manager.new_U_select_atom( + mol_container, + f"index {residue.atoms.indices[0]}:{residue.atoms.indices[-1]}", + ) + heavy_res = self._run_manager.new_U_select_atom( + res_container, "not name H*" + ) + + dihedrals = self._level_manager.get_dihedrals(heavy_res, level) + states_ua[res_id] = ce.assign_conformations(heavy_res, dihedrals, number_frames, bin_width, start, end, step) + + if level == "residue": + dihedrals = self._level_manager.get_dihedrals(mol_container, level) + states_res[molecule_id] = ce.assign_conformations(mol_container, dihedrals, number_frames, bin_width, start, end, step) + + # Do the entropy calculations + for molecule_id in range(number_molecules): + mol_container = self._get_molecule_container(reduced_atom, molecule_id) + for level in levels[molecule_id]: + # Identify if level is the highest (molecule) level highest_level = level == levels[molecule_id][-1] - if level == "united_atom": - self._process_united_atom_level( - molecule_id, - mol_container, - ve, - ce, - level, - start, - end, - step, - number_frames, - highest_level, - ) - - logger.debug( - "%s level: molecule_data: %s", - level, - self._data_logger.molecule_data, - ) - logger.debug( - "%s level: residue_data: %s", - level, - self._data_logger.residue_data, - ) - - elif level in ("polymer", "residue"): - self._process_vibrational_only_levels( - molecule_id, - mol_container, - ve, - level, - start, - end, - step, - number_frames, - highest_level, - ) - logger.debug( - "%s level: molecule_data: %s", - level, - self._data_logger.molecule_data, - ) - logger.debug( - "%s level: residue_data: %s", - level, - self._data_logger.residue_data, - ) + if level == "united_atom": + for res_id, residue in enumerate(mol_container.residues): + self._process_united_atom_level(molecule_id, res_id, ve, ce, force_matrix_ua[res_id], torque_matrix_ua[res_id], states_ua[res_id], highest) - if level == "residue": - self._process_conformational_residue_level( - molecule_id, - mol_container, - ce, - level, - start, - end, - step, - number_frames, - ) + elif level == "residue": + self._process_vibrational_entropy(molecule_id, mol_container, ve, level, force_matrix_res[molecule_id], torque_matrix_res[molecule_id], highest) + self._process_conformational_entropy(molecule_id, mol_container, ce, states_res[molecule_id]) - logger.debug( - "%s level: molecule_data: %s", - level, - self._data_logger.molecule_data, - ) - logger.debug( - "%s level: residue_data: %s", - level, - self._data_logger.residue_data, - ) + elif level == "polymer": + self._process_vibrational_entropy(molecule_id, mol_container, ve, level, force_matrix_poly[molecule_id], torque_matrix_poly[molecule_id], highest) + + #TODO when orientational entropy function is ready + ## if highest: + ## self._process_orientational_entropy() - self._finalize_molecule_results() + self._finalize_molecule_results() - self._data_logger.log_tables() + self._data_logger.log_tables() def _get_trajectory_bounds(self): """ @@ -228,8 +245,8 @@ def _get_molecule_container(self, universe, molecule_id): selection_string = f"index {frag.indices[0]}:{frag.indices[-1]}" return self._run_manager.new_U_select_atom(universe, selection_string) - def _process_united_atom_level( - self, mol_id, mol_container, ve, ce, level, start, end, step, n_frames, highest + def _process_united_atom_entropy( + self, mol_id, res_id, ve, ce, force_matrix, torque_matrix, states, highest ): """ Calculates translational, rotational, and conformational entropy at the @@ -246,21 +263,9 @@ def _process_united_atom_level( highest (bool): Whether this is the highest level of resolution for the molecule. """ - bin_width = self._args.bin_width S_trans, S_rot, S_conf = 0, 0, 0 - for residue_id, residue in enumerate(mol_container.residues): - res_container = self._run_manager.new_U_select_atom( - mol_container, - f"index {residue.atoms.indices[0]}:{residue.atoms.indices[-1]}", - ) - heavy_res = self._run_manager.new_U_select_atom( - res_container, "not name H*" - ) - - force_matrix, torque_matrix = self._level_manager.get_matrices( - res_container, level, start, end, step, n_frames, highest - ) + for residue_id, residue in enumerate(mol_container.residues): S_trans_res = ve.vibrational_entropy_calculation( force_matrix, "force", self._args.temperature, highest ) @@ -268,9 +273,8 @@ def _process_united_atom_level( torque_matrix, "torque", self._args.temperature, highest ) - dihedrals = self._level_manager.get_dihedrals(heavy_res, level) S_conf_res = ce.conformational_entropy_calculation( - heavy_res, dihedrals, bin_width, start, end, step, n_frames + mol_id, states ) S_trans += S_trans_res @@ -297,26 +301,24 @@ def _process_united_atom_level( residue.resname, level, "Conformational", S_conf ) - def _process_vibrational_only_levels( - self, mol_id, mol_container, ve, level, start, end, step, n_frames, highest + def _process_vibrational_entropy( + self, mol_id, mol_container, ve, level, force_matrix, torque_matrix, highest ): """ - Calculates vibrational entropy at levels where conformational entropy is - not considered. + Calculates vibrational entropy. Args: mol_id (int): Molecule ID. mol_container (Universe): Selected molecule's universe. ve: VibrationalEntropy object. - level (str): Current granularity level ('polymer' or 'residue'). - start, end, step (int): Trajectory frame parameters. - n_frames (int): Number of trajectory frames. + level (str): Current granularity level. + force_matrix : Force covariance matrix + torque_matrix : Torque covariance matrix highest (bool): Flag indicating if this is the highest granularity level. """ - force_matrix, torque_matrix = self._level_manager.get_matrices( - mol_container, level, start, end, step, n_frames, highest - ) + force_matrix, torque_matrix = self.force_matrix, self.torque_matrix + S_trans = ve.vibrational_entropy_calculation( force_matrix, "force", self._args.temperature, highest ) @@ -331,9 +333,7 @@ def _process_vibrational_only_levels( residue.resname, level, "Rovibrational", S_rot ) - def _process_conformational_residue_level( - self, mol_id, mol_container, ce, level, start, end, step, n_frames - ): + def _process_conformational_entropy(self, mol_id, mol_container, ce, states): """ Computes conformational entropy at the residue level (whole-molecule dihedral analysis). @@ -346,11 +346,7 @@ def _process_conformational_residue_level( start, end, step (int): Frame bounds. n_frames (int): Number of frames used. """ - bin_width = self._args.bin_width - dihedrals = self._level_manager.get_dihedrals(mol_container, level) - S_conf = ce.conformational_entropy_calculation( - mol_container, dihedrals, bin_width, start, end, step, n_frames - ) + S_conf = ce.conformational_entropy_calculation(states) residue = mol_container.residues[mol_id] self._data_logger.add_results_data( residue.resname, level, "Conformational", S_conf diff --git a/CodeEntropy/levels.py b/CodeEntropy/levels.py index a4be74d..f9b18d6 100644 --- a/CodeEntropy/levels.py +++ b/CodeEntropy/levels.py @@ -70,7 +70,8 @@ def select_levels(self, data_container): return number_molecules, levels def get_matrices( - self, data_container, level, start, end, step, number_frames, highest_level + self, data_container, level, number_frames, highest_level, + force_matrix, torque_matrix ): """ Function to create the force matrix needed for the transvibrational entropy @@ -82,9 +83,6 @@ def get_matrices( molecule of interest. level : string, which of the polymer, residue, or united atom levels are the matrices for. - start : int, starting frame, default 0 (first frame) - end : int, ending frame, default -1 (last frame) - step : int, step for going through trajectories, default 1 Returns ------- @@ -100,30 +98,26 @@ def get_matrices( # initialize force and torque arrays weighted_forces = [ - [0 for x in range(number_frames)] for y in range(number_beads) - ] + [0 for x in range(number_beads) ] weighted_torques = [ - [0 for x in range(number_frames)] for y in range(number_beads) - ] + [0 for x in range(number_beads) ] # Calculate forces/torques for each bead for bead_index in range(number_beads): - for timestep in data_container.trajectory[start:end:step]: - # Set up axes - # translation and rotation use different axes - # how the axes are defined depends on the level - trans_axes, rot_axes = self.get_axes(data_container, level, bead_index) - - # Sort out coordinates, forces, and torques for each atom in the bead - timestep_index = timestep.frame - start - weighted_forces[bead_index][timestep_index] = self.get_weighted_forces( - data_container, list_of_beads[bead_index], trans_axes, highest_level - ) - weighted_torques[bead_index][timestep_index] = ( - self.get_weighted_torques( - data_container, list_of_beads[bead_index], rot_axes - ) + # Set up axes + # translation and rotation use different axes + # how the axes are defined depends on the level + trans_axes, rot_axes = self.get_axes(data_container, level, bead_index) + + # Sort out coordinates, forces, and torques for each atom in the bead + weighted_forces[bead_index] = self.get_weighted_forces( + data_container, list_of_beads[bead_index], trans_axes, highest_level + ) + weighted_torques[bead_index] = ( + self.get_weighted_torques( + data_container, list_of_beads[bead_index], rot_axes ) + ) # Make covariance matrices - looping over pairs of beads # list of pairs of indices @@ -154,23 +148,27 @@ def get_matrices( torque_submatrix[j][i] = np.transpose(torque_submatrix[i][j]) # use np.block to make submatrices into one matrix - force_matrix = np.block( + force_block = np.block( [ [force_submatrix[i][j] for j in range(number_beads)] for i in range(number_beads) ] ) - torque_matrix = np.block( + torque_block = np.block( [ [torque_submatrix[i][j] for j in range(number_beads)] for i in range(number_beads) ] ) - # fliter zeros to remove any rows/columns that are all zero - force_matrix = self.filter_zero_rows_columns(force_matrix) - torque_matrix = self.filter_zero_rows_columns(torque_matrix) + # # fliter zeros to remove any rows/columns that are all zero + # force_matrix = self.filter_zero_rows_columns(force_matrix) + # torque_matrix = self.filter_zero_rows_columns(torque_matrix) + + # Add forces/torques from this time frame into the matrices + force_matrix = np.add(force_matrix, force_block) + torque_matrix = np.add(torque_matrix, torque_block) logger.debug(f"Force Matrix: {force_matrix}") logger.debug(f"Torque Matrix: {torque_matrix}") From d314f530b51213101a767abcc836be7d5d52d7bb Mon Sep 17 00:00:00 2001 From: harryswift01 Date: Thu, 10 Jul 2025 09:57:45 +0100 Subject: [PATCH 02/23] applying pre-commit hooks and linting for code quality --- CodeEntropy/entropy.py | 214 ++++++++++++++++++++++++++++------------- CodeEntropy/levels.py | 29 +++--- 2 files changed, 162 insertions(+), 81 deletions(-) diff --git a/CodeEntropy/entropy.py b/CodeEntropy/entropy.py index a44261a..9ac4c40 100644 --- a/CodeEntropy/entropy.py +++ b/CodeEntropy/entropy.py @@ -88,8 +88,11 @@ def execute(self): torque_matrix_res = [[] for _ in range(number_molecules)] torque_matrix_poly = [[] for _ in range(number_molecules)] + states_ua = [] + states_res = [] + for timestep in reduced_atom.trajectory[start:end:step]: - time_index = timestep.frame - start + # time_index = timestep.frame - start for molecule_id in range(number_molecules): mol_container = self._get_molecule_container(reduced_atom, molecule_id) @@ -100,82 +103,151 @@ def execute(self): # Get matrices for vibrational entropy calculations if level == "united_atom": - for res_id, residue in enumerate(mol_container.residues): - res_container = self._run_manager.new_U_select_atom( - mol_container, - f"index {residue.atoms.indices[0]}:{residue.atoms.indices[-1]}", - ) - - force_matrix, torque_matrix = self._level_manager.get_matrices( - res_container,level, number_frames, - highest_level, force_matrix_ua[res_id], - torque_matrix_ua[res_id] - ) - force_matrix_ua[res_id] = force_matrix - torque_matrix_ua[res_id] = torque_matrix + for res_id, residue in enumerate(mol_container.residues): + res_container = self._run_manager.new_U_select_atom( + mol_container, + ( + f"index {residue.atoms.indices[0]}:" + f"{residue.atoms.indices[-1]}" + ), + ) + force_matrix, torque_matrix = ( + self._level_manager.get_matrices( + res_container, + level, + number_frames, + highest_level, + force_matrix_ua[res_id], + torque_matrix_ua[res_id], + ) + ) + force_matrix_ua[res_id] = force_matrix + torque_matrix_ua[res_id] = torque_matrix elif level == "residue": - force_matrix, torque_matrix = self._level_manager.get_matrices(mol_container,level,number_frames,highest_level, force_matrix_res[molecule_id],torque_matrix_res[molecule_id]) + force_matrix, torque_matrix = self._level_manager.get_matrices( + mol_container, + level, + number_frames, + highest_level, + force_matrix_res[molecule_id], + torque_matrix_res[molecule_id], + ) force_matrix_res[molecule_id] = force_matrix torque_matrix_res[molecule_id] = torque_matrix - - elif level =="polymer": - force_matrix, torque_matrix = self._level_manager.get_matrices(mol_container,level,number_frames,highest_level, force_matrix_poly[molecule_id],torque_matrix_poly[molecule_id]) + + elif level == "polymer": + force_matrix, torque_matrix = self._level_manager.get_matrices( + mol_container, + level, + number_frames, + highest_level, + force_matrix_poly[molecule_id], + torque_matrix_poly[molecule_id], + ) force_matrix_poly[molecule_id] = force_matrix torque_matrix_poly[molecule_id] = torque_matrix - #TODO When function is ready + # TODO When function is ready # Get environment information for orientational entropy - ## if highest_level: - ## number_neighbours = get_neighbours() - - # Get states for conformational entropy calculation - bin_width = self._args.bin_width - for molecule_id in range(number_molecule): - mol_container = self._get_molecule_container(reduced_atom, molecule_id) - for level in levels[molecule_id]: - if level == "united_atom": - for res_id, residue in enumerate(mol_container.residues): - res_container = self._run_manager.new_U_select_atom( - mol_container, - f"index {residue.atoms.indices[0]}:{residue.atoms.indices[-1]}", - ) - heavy_res = self._run_manager.new_U_select_atom( - res_container, "not name H*" - ) - - dihedrals = self._level_manager.get_dihedrals(heavy_res, level) - states_ua[res_id] = ce.assign_conformations(heavy_res, dihedrals, number_frames, bin_width, start, end, step) - - if level == "residue": - dihedrals = self._level_manager.get_dihedrals(mol_container, level) - states_res[molecule_id] = ce.assign_conformations(mol_container, dihedrals, number_frames, bin_width, start, end, step) - - # Do the entropy calculations - for molecule_id in range(number_molecules): - mol_container = self._get_molecule_container(reduced_atom, molecule_id) - for level in levels[molecule_id]: + # if highest_level: + # number_neighbours = get_neighbours() + + # Get states for conformational entropy calculation + bin_width = self._args.bin_width + for molecule_id in range(number_molecules): + mol_container = self._get_molecule_container(reduced_atom, molecule_id) + for level in levels[molecule_id]: + if level == "united_atom": + for res_id, residue in enumerate(mol_container.residues): + res_container = self._run_manager.new_U_select_atom( + mol_container, + ( + f"index {residue.atoms.indices[0]}:" + f"{residue.atoms.indices[-1]}" + ), + ) + heavy_res = self._run_manager.new_U_select_atom( + res_container, "not name H*" + ) + + dihedrals = self._level_manager.get_dihedrals(heavy_res, level) + states_ua[res_id] = ce.assign_conformations( + heavy_res, + dihedrals, + number_frames, + bin_width, + start, + end, + step, + ) + + if level == "residue": + dihedrals = self._level_manager.get_dihedrals(mol_container, level) + states_res[molecule_id] = ce.assign_conformations( + mol_container, + dihedrals, + number_frames, + bin_width, + start, + end, + step, + ) + + # Do the entropy calculations + for molecule_id in range(number_molecules): + mol_container = self._get_molecule_container(reduced_atom, molecule_id) + for level in levels[molecule_id]: # Identify if level is the highest (molecule) level highest_level = level == levels[molecule_id][-1] - if level == "united_atom": - for res_id, residue in enumerate(mol_container.residues): - self._process_united_atom_level(molecule_id, res_id, ve, ce, force_matrix_ua[res_id], torque_matrix_ua[res_id], states_ua[res_id], highest) + if level == "united_atom": + for res_id, residue in enumerate(mol_container.residues): + self._process_united_atom_entropy( + molecule_id, + mol_container, + res_id, + ve, + ce, + level, + force_matrix_ua[res_id], + torque_matrix_ua[res_id], + states_ua[res_id], + highest_level, + ) + + elif level == "residue": + self._process_vibrational_entropy( + molecule_id, + mol_container, + ve, + level, + force_matrix_res[molecule_id], + torque_matrix_res[molecule_id], + highest_level, + ) + self._process_conformational_entropy( + molecule_id, mol_container, ce, level, states_res[molecule_id] + ) - elif level == "residue": - self._process_vibrational_entropy(molecule_id, mol_container, ve, level, force_matrix_res[molecule_id], torque_matrix_res[molecule_id], highest) - self._process_conformational_entropy(molecule_id, mol_container, ce, states_res[molecule_id]) + elif level == "polymer": + self._process_vibrational_entropy( + molecule_id, + mol_container, + ve, + level, + force_matrix_poly[molecule_id], + torque_matrix_poly[molecule_id], + highest_level, + ) - elif level == "polymer": - self._process_vibrational_entropy(molecule_id, mol_container, ve, level, force_matrix_poly[molecule_id], torque_matrix_poly[molecule_id], highest) - - #TODO when orientational entropy function is ready - ## if highest: - ## self._process_orientational_entropy() + # TODO when orientational entropy function is ready + # if highest_level: + # self._process_orientational_entropy() - self._finalize_molecule_results() + self._finalize_molecule_results() - self._data_logger.log_tables() + self._data_logger.log_tables() def _get_trajectory_bounds(self): """ @@ -246,7 +318,17 @@ def _get_molecule_container(self, universe, molecule_id): return self._run_manager.new_U_select_atom(universe, selection_string) def _process_united_atom_entropy( - self, mol_id, res_id, ve, ce, force_matrix, torque_matrix, states, highest + self, + mol_id, + mol_container, + res_id, + ve, + ce, + level, + force_matrix, + torque_matrix, + states, + highest, ): """ Calculates translational, rotational, and conformational entropy at the @@ -273,9 +355,7 @@ def _process_united_atom_entropy( torque_matrix, "torque", self._args.temperature, highest ) - S_conf_res = ce.conformational_entropy_calculation( - mol_id, states - ) + S_conf_res = ce.conformational_entropy_calculation(mol_id, states) S_trans += S_trans_res S_rot += S_rot_res @@ -333,7 +413,7 @@ def _process_vibrational_entropy( residue.resname, level, "Rovibrational", S_rot ) - def _process_conformational_entropy(self, mol_id, mol_container, ce, states): + def _process_conformational_entropy(self, mol_id, mol_container, ce, level, states): """ Computes conformational entropy at the residue level (whole-molecule dihedral analysis). diff --git a/CodeEntropy/levels.py b/CodeEntropy/levels.py index f9b18d6..1e9e7c4 100644 --- a/CodeEntropy/levels.py +++ b/CodeEntropy/levels.py @@ -70,8 +70,13 @@ def select_levels(self, data_container): return number_molecules, levels def get_matrices( - self, data_container, level, number_frames, highest_level, - force_matrix, torque_matrix + self, + data_container, + level, + number_frames, + highest_level, + force_matrix, + torque_matrix, ): """ Function to create the force matrix needed for the transvibrational entropy @@ -97,10 +102,8 @@ def get_matrices( number_beads = len(list_of_beads) # initialize force and torque arrays - weighted_forces = [ - [0 for x in range(number_beads) ] - weighted_torques = [ - [0 for x in range(number_beads) ] + weighted_forces = [[0 for x in range(number_beads)]] + weighted_torques = [[0 for x in range(number_beads)]] # Calculate forces/torques for each bead for bead_index in range(number_beads): @@ -113,10 +116,8 @@ def get_matrices( weighted_forces[bead_index] = self.get_weighted_forces( data_container, list_of_beads[bead_index], trans_axes, highest_level ) - weighted_torques[bead_index] = ( - self.get_weighted_torques( - data_container, list_of_beads[bead_index], rot_axes - ) + weighted_torques[bead_index] = self.get_weighted_torques( + data_container, list_of_beads[bead_index], rot_axes ) # Make covariance matrices - looping over pairs of beads @@ -162,10 +163,10 @@ def get_matrices( ] ) - # # fliter zeros to remove any rows/columns that are all zero - # force_matrix = self.filter_zero_rows_columns(force_matrix) - # torque_matrix = self.filter_zero_rows_columns(torque_matrix) - + # # fliter zeros to remove any rows/columns that are all zero + # force_matrix = self.filter_zero_rows_columns(force_matrix) + # torque_matrix = self.filter_zero_rows_columns(torque_matrix) + # Add forces/torques from this time frame into the matrices force_matrix = np.add(force_matrix, force_block) torque_matrix = np.add(torque_matrix, torque_block) From ced888dc1072ca5b21f0506ef15c8b9a12568326 Mon Sep 17 00:00:00 2001 From: harryswift01 Date: Fri, 11 Jul 2025 16:01:13 +0100 Subject: [PATCH 03/23] Refactor matrix accumulation to handle shape mismatches with zero-padding: - Modified force and torque matrix initializations to use None instead of empty lists, enabling clearer checks for uninitialized matrices. - Updated the logic that accumulates force and torque covariance matrices to handle cases where matrices have different shapes. - Introduced a new helper method `_pad_and_add` that pads two 2D numpy arrays with zeros to the maximum dimensions of both arrays, then returns their element-wise sum. - This method ensures that when covariance matrices from different frames have incompatible shapes, they can still be safely combined without losing data. - Updated `LevelManager.create_force_and_torque_matrix` to use `_pad_and_add` when accumulating matrices with mismatched shapes. - Cleaned up some list initializations and matrix sub-block assembly for clarity and consistency. - Fixed handling of dihedrals in entropy calculations by cycling through each dihedral individually rather than processing them as a batch. This change ensures correct assignment of conformations per dihedral. --- CodeEntropy/entropy.py | 48 +++++++++--------- CodeEntropy/levels.py | 111 ++++++++++++++++++++++++++++------------- 2 files changed, 101 insertions(+), 58 deletions(-) diff --git a/CodeEntropy/entropy.py b/CodeEntropy/entropy.py index 9ac4c40..8b067b7 100644 --- a/CodeEntropy/entropy.py +++ b/CodeEntropy/entropy.py @@ -81,15 +81,15 @@ def execute(self): ) # Initialise force and torque matrices - force_matrix_ua = [[] for _ in range(number_residues)] - force_matrix_res = [[] for _ in range(number_molecules)] - force_matrix_poly = [[] for _ in range(number_molecules)] - torque_matrix_ua = [[] for _ in range(number_residues)] - torque_matrix_res = [[] for _ in range(number_molecules)] - torque_matrix_poly = [[] for _ in range(number_molecules)] + force_matrix_ua = [None for _ in range(number_residues)] + force_matrix_res = [None for _ in range(number_molecules)] + force_matrix_poly = [None for _ in range(number_molecules)] + torque_matrix_ua = [None for _ in range(number_residues)] + torque_matrix_res = [None for _ in range(number_molecules)] + torque_matrix_poly = [None for _ in range(number_molecules)] - states_ua = [] - states_res = [] + states_ua = [None for _ in range(number_residues)] + states_res = [None for _ in range(number_molecules)] for timestep in reduced_atom.trajectory[start:end:step]: # time_index = timestep.frame - start @@ -172,9 +172,23 @@ def execute(self): ) dihedrals = self._level_manager.get_dihedrals(heavy_res, level) - states_ua[res_id] = ce.assign_conformations( - heavy_res, - dihedrals, + for dihedral in dihedrals: + states_ua[res_id] = ce.assign_conformation( + heavy_res, + dihedral, + number_frames, + bin_width, + start, + end, + step, + ) + + if level == "residue": + dihedrals = self._level_manager.get_dihedrals(mol_container, level) + for dihedral in dihedrals: + states_res[molecule_id] = ce.assign_conformation( + mol_container, + dihedral, number_frames, bin_width, start, @@ -182,18 +196,6 @@ def execute(self): step, ) - if level == "residue": - dihedrals = self._level_manager.get_dihedrals(mol_container, level) - states_res[molecule_id] = ce.assign_conformations( - mol_container, - dihedrals, - number_frames, - bin_width, - start, - end, - step, - ) - # Do the entropy calculations for molecule_id in range(number_molecules): mol_container = self._get_molecule_container(reduced_atom, molecule_id) diff --git a/CodeEntropy/levels.py b/CodeEntropy/levels.py index 1e9e7c4..81846bb 100644 --- a/CodeEntropy/levels.py +++ b/CodeEntropy/levels.py @@ -79,20 +79,27 @@ def get_matrices( torque_matrix, ): """ - Function to create the force matrix needed for the transvibrational entropy - calculation and the torque matrix for the rovibrational entropy calculation. + Compute and accumulate force/torque covariance matrices for a given level. - Input - ----- - data_container : MDAnalysis universe type with the information on the - molecule of interest. - level : string, which of the polymer, residue, or united atom levels - are the matrices for. + Parameters + ---------- + data_container : MDAnalysis.Universe + Atom group for a molecule or residue. + level : str + 'polymer', 'residue', or 'united_atom'. + number_frames : int + Number of frames being processed. + highest_level : bool + Whether this is the top (polymer) level. + force_matrix, torque_matrix : np.ndarray or None + Accumulated matrices to add to. Returns ------- - force_matrix : force covariance matrix for transvibrational entropy - torque_matrix : torque convariance matrix for rovibrational entropy + force_matrix : np.ndarray + Accumulated force covariance matrix. + torque_matrix : np.ndarray + Accumulated torque covariance matrix. """ # Make beads @@ -102,8 +109,8 @@ def get_matrices( number_beads = len(list_of_beads) # initialize force and torque arrays - weighted_forces = [[0 for x in range(number_beads)]] - weighted_torques = [[0 for x in range(number_beads)]] + weighted_forces = [None for _ in range(number_beads)] + weighted_torques = [None for _ in range(number_beads)] # Calculate forces/torques for each bead for bead_index in range(number_beads): @@ -125,10 +132,10 @@ def get_matrices( pair_list = [(i, j) for i in range(number_beads) for j in range(number_beads)] force_submatrix = [ - [0 for x in range(number_beads)] for y in range(number_beads) + [0 for _ in range(number_beads)] for _ in range(number_beads) ] torque_submatrix = [ - [0 for x in range(number_beads)] for y in range(number_beads) + [0 for _ in range(number_beads)] for _ in range(number_beads) ] for i, j in pair_list: @@ -137,16 +144,17 @@ def get_matrices( # for [j][i] if i <= j: # calculate the force covariance segment of the matrix - force_submatrix[i][j] = self.create_submatrix( + f_sub = self.create_submatrix( weighted_forces[i], weighted_forces[j], number_frames ) - force_submatrix[j][i] = np.transpose(force_submatrix[i][j]) - - # calculate the torque covariance segment of the matrix - torque_submatrix[i][j] = self.create_submatrix( + t_sub = self.create_submatrix( weighted_torques[i], weighted_torques[j], number_frames ) - torque_submatrix[j][i] = np.transpose(torque_submatrix[i][j]) + + force_submatrix[i][j] = f_sub + force_submatrix[j][i] = f_sub.T + torque_submatrix[i][j] = t_sub + torque_submatrix[j][i] = t_sub.T # use np.block to make submatrices into one matrix force_block = np.block( @@ -163,19 +171,56 @@ def get_matrices( ] ) - # # fliter zeros to remove any rows/columns that are all zero - # force_matrix = self.filter_zero_rows_columns(force_matrix) - # torque_matrix = self.filter_zero_rows_columns(torque_matrix) - - # Add forces/torques from this time frame into the matrices - force_matrix = np.add(force_matrix, force_block) - torque_matrix = np.add(torque_matrix, torque_block) + # Accumulate into full matrices, with shape-safe padding if needed + if force_matrix is None: + force_matrix = np.zeros_like(force_block) + elif force_matrix.shape != force_block.shape: + force_matrix = self._pad_and_add(force_matrix, force_block) + else: + force_matrix += force_block - logger.debug(f"Force Matrix: {force_matrix}") - logger.debug(f"Torque Matrix: {torque_matrix}") + if torque_matrix is None: + torque_matrix = np.zeros_like(torque_block) + elif torque_matrix.shape != torque_block.shape: + torque_matrix = self._pad_and_add(torque_matrix, torque_block) + else: + torque_matrix += torque_block return force_matrix, torque_matrix + def _pad_and_add(self, A, B): + """ + Pads two 2D numpy arrays with zeros to match their largest dimensions + and returns their element-wise sum. + + Both input arrays A and B can have different shapes. This function + creates zero-padded versions of A and B with the shape equal to the + maximum number of rows and columns from both arrays, then adds them. + + Parameters + ---------- + A : np.ndarray + First 2D array to pad and add. + B : np.ndarray + Second 2D array to pad and add. + + Returns + ------- + np.ndarray + A new 2D array containing the element-wise sum of the padded A and B, + with shape (max_rows, max_cols). + """ + max_rows = max(A.shape[0], B.shape[0]) + max_cols = max(A.shape[1], B.shape[1]) + + A_pad = np.zeros((max_rows, max_cols)) + B_pad = np.zeros((max_rows, max_cols)) + + A_pad[: A.shape[0], : A.shape[1]] = A + B_pad[: B.shape[0], : B.shape[1]] = B + + return A_pad + B_pad + def get_dihedrals(self, data_container, level): """ Define the set of dihedrals for use in the conformational entropy function. @@ -657,12 +702,8 @@ def create_submatrix(self, data_i, data_j, number_frames): # For each frame calculate the outer product (cross product) of the data from # the two beads and add the result to the submatrix - for frame in range(number_frames): - outer_product_matrix = np.outer(data_i[frame], data_j[frame]) - submatrix = np.add(submatrix, outer_product_matrix) - - # Divide by the number of frames to get the average - submatrix /= number_frames + outer_product_matrix = np.outer(data_i, data_j) + submatrix = np.add(submatrix, outer_product_matrix) logger.debug(f"Submatrix: {submatrix}") From aa42de480837feee3797a98db4d662a53056ad2a Mon Sep 17 00:00:00 2001 From: harryswift01 Date: Mon, 21 Jul 2025 16:05:24 +0100 Subject: [PATCH 04/23] Refactor get_beads to remove padding and simplify force/torque matrix storage - Removed the `_pad_and_add` function from `get_beads` so it returns only the actual bead selections without added padding. - Changed force and torque matrix storage from large pre-allocated lists with None placeholders to dictionaries keyed by `(molecule_id, residue_id)`. - These changes eliminate unnecessary data padding and improve direct access to relevant matrices. - Result is cleaner data structures and simpler, more reliable indexing. --- CodeEntropy/entropy.py | 41 ++++++++++++++-------------- CodeEntropy/levels.py | 62 +++++++++--------------------------------- 2 files changed, 34 insertions(+), 69 deletions(-) diff --git a/CodeEntropy/entropy.py b/CodeEntropy/entropy.py index 8b067b7..df5cc13 100644 --- a/CodeEntropy/entropy.py +++ b/CodeEntropy/entropy.py @@ -61,9 +61,10 @@ def execute(self): self._data_logger.residue_data, ) + # Get reduced universe and initialize values reduced_atom = self._get_reduced_universe() number_molecules, levels = self._level_manager.select_levels(reduced_atom) - number_residues = len(reduced_atom.residues) + # number_residues = len(reduced_atom.residues) ve = VibrationalEntropy( self._run_manager, @@ -81,14 +82,14 @@ def execute(self): ) # Initialise force and torque matrices - force_matrix_ua = [None for _ in range(number_residues)] + force_matrix_ua = {} + torque_matrix_ua = {} force_matrix_res = [None for _ in range(number_molecules)] - force_matrix_poly = [None for _ in range(number_molecules)] - torque_matrix_ua = [None for _ in range(number_residues)] torque_matrix_res = [None for _ in range(number_molecules)] + force_matrix_poly = [None for _ in range(number_molecules)] torque_matrix_poly = [None for _ in range(number_molecules)] - states_ua = [None for _ in range(number_residues)] + states_ua = {} states_res = [None for _ in range(number_molecules)] for timestep in reduced_atom.trajectory[start:end:step]: @@ -104,6 +105,8 @@ def execute(self): # Get matrices for vibrational entropy calculations if level == "united_atom": for res_id, residue in enumerate(mol_container.residues): + key = (molecule_id, res_id) + res_container = self._run_manager.new_U_select_atom( mol_container, ( @@ -117,12 +120,13 @@ def execute(self): level, number_frames, highest_level, - force_matrix_ua[res_id], - torque_matrix_ua[res_id], + force_matrix_ua.get(key), + torque_matrix_ua.get(key), ) ) - force_matrix_ua[res_id] = force_matrix - torque_matrix_ua[res_id] = torque_matrix + + force_matrix_ua[key] = force_matrix + torque_matrix_ua[key] = torque_matrix elif level == "residue": force_matrix, torque_matrix = self._level_manager.get_matrices( @@ -148,11 +152,6 @@ def execute(self): force_matrix_poly[molecule_id] = force_matrix torque_matrix_poly[molecule_id] = torque_matrix - # TODO When function is ready - # Get environment information for orientational entropy - # if highest_level: - # number_neighbours = get_neighbours() - # Get states for conformational entropy calculation bin_width = self._args.bin_width for molecule_id in range(number_molecules): @@ -160,6 +159,8 @@ def execute(self): for level in levels[molecule_id]: if level == "united_atom": for res_id, residue in enumerate(mol_container.residues): + key = (molecule_id, res_id) + res_container = self._run_manager.new_U_select_atom( mol_container, ( @@ -173,7 +174,7 @@ def execute(self): dihedrals = self._level_manager.get_dihedrals(heavy_res, level) for dihedral in dihedrals: - states_ua[res_id] = ce.assign_conformation( + states_ua[key] = ce.assign_conformation( heavy_res, dihedral, number_frames, @@ -183,7 +184,7 @@ def execute(self): step, ) - if level == "residue": + elif level == "residue": dihedrals = self._level_manager.get_dihedrals(mol_container, level) for dihedral in dihedrals: states_res[molecule_id] = ce.assign_conformation( @@ -200,11 +201,11 @@ def execute(self): for molecule_id in range(number_molecules): mol_container = self._get_molecule_container(reduced_atom, molecule_id) for level in levels[molecule_id]: - # Identify if level is the highest (molecule) level highest_level = level == levels[molecule_id][-1] if level == "united_atom": for res_id, residue in enumerate(mol_container.residues): + key = (molecule_id, res_id) self._process_united_atom_entropy( molecule_id, mol_container, @@ -212,9 +213,9 @@ def execute(self): ve, ce, level, - force_matrix_ua[res_id], - torque_matrix_ua[res_id], - states_ua[res_id], + force_matrix_ua[key], + torque_matrix_ua[key], + states_ua[key], highest_level, ) diff --git a/CodeEntropy/levels.py b/CodeEntropy/levels.py index 81846bb..9f00487 100644 --- a/CodeEntropy/levels.py +++ b/CodeEntropy/levels.py @@ -127,10 +127,7 @@ def get_matrices( data_container, list_of_beads[bead_index], rot_axes ) - # Make covariance matrices - looping over pairs of beads - # list of pairs of indices - pair_list = [(i, j) for i in range(number_beads) for j in range(number_beads)] - + # Create covariance submatrices force_submatrix = [ [0 for _ in range(number_beads)] for _ in range(number_beads) ] @@ -138,32 +135,26 @@ def get_matrices( [0 for _ in range(number_beads)] for _ in range(number_beads) ] - for i, j in pair_list: - # for each pair of beads - # reducing effort because the matrix for [i][j] is the transpose of the one - # for [j][i] - if i <= j: - # calculate the force covariance segment of the matrix + for i in range(number_beads): + for j in range(i, number_beads): f_sub = self.create_submatrix( weighted_forces[i], weighted_forces[j], number_frames ) t_sub = self.create_submatrix( weighted_torques[i], weighted_torques[j], number_frames ) - force_submatrix[i][j] = f_sub force_submatrix[j][i] = f_sub.T torque_submatrix[i][j] = t_sub torque_submatrix[j][i] = t_sub.T - # use np.block to make submatrices into one matrix + # Convert block matrices to full matrix force_block = np.block( [ [force_submatrix[i][j] for j in range(number_beads)] for i in range(number_beads) ] ) - torque_block = np.block( [ [torque_submatrix[i][j] for j in range(number_beads)] @@ -171,56 +162,29 @@ def get_matrices( ] ) - # Accumulate into full matrices, with shape-safe padding if needed + # Enforce consistent shape before accumulation if force_matrix is None: force_matrix = np.zeros_like(force_block) elif force_matrix.shape != force_block.shape: - force_matrix = self._pad_and_add(force_matrix, force_block) + raise ValueError( + f"Inconsistent force matrix shape: existing " + f"{force_matrix.shape}, new {force_block.shape}" + ) else: force_matrix += force_block if torque_matrix is None: torque_matrix = np.zeros_like(torque_block) elif torque_matrix.shape != torque_block.shape: - torque_matrix = self._pad_and_add(torque_matrix, torque_block) + raise ValueError( + f"Inconsistent torque matrix shape: existing " + f"{torque_matrix.shape}, new {torque_block.shape}" + ) else: torque_matrix += torque_block return force_matrix, torque_matrix - def _pad_and_add(self, A, B): - """ - Pads two 2D numpy arrays with zeros to match their largest dimensions - and returns their element-wise sum. - - Both input arrays A and B can have different shapes. This function - creates zero-padded versions of A and B with the shape equal to the - maximum number of rows and columns from both arrays, then adds them. - - Parameters - ---------- - A : np.ndarray - First 2D array to pad and add. - B : np.ndarray - Second 2D array to pad and add. - - Returns - ------- - np.ndarray - A new 2D array containing the element-wise sum of the padded A and B, - with shape (max_rows, max_cols). - """ - max_rows = max(A.shape[0], B.shape[0]) - max_cols = max(A.shape[1], B.shape[1]) - - A_pad = np.zeros((max_rows, max_cols)) - B_pad = np.zeros((max_rows, max_cols)) - - A_pad[: A.shape[0], : A.shape[1]] = A - B_pad[: B.shape[0], : B.shape[1]] = B - - return A_pad + B_pad - def get_dihedrals(self, data_container, level): """ Define the set of dihedrals for use in the conformational entropy function. From 57250c45cd77f92760bbf1f4a14cd874fbdc3195 Mon Sep 17 00:00:00 2001 From: harryswift01 Date: Tue, 22 Jul 2025 14:16:10 +0100 Subject: [PATCH 05/23] add in averaging over the `number_frames` for each of the forces and torques matrices --- CodeEntropy/entropy.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/CodeEntropy/entropy.py b/CodeEntropy/entropy.py index df5cc13..1e51175 100644 --- a/CodeEntropy/entropy.py +++ b/CodeEntropy/entropy.py @@ -197,6 +197,22 @@ def execute(self): step, ) + force_matrix_ua = {k: v / number_frames for k, v in force_matrix_ua.items()} + torque_matrix_ua = {k: v / number_frames for k, v in torque_matrix_ua.items()} + + force_matrix_res = [ + f / number_frames if f is not None else None for f in force_matrix_res + ] + torque_matrix_res = [ + t / number_frames if t is not None else None for t in torque_matrix_res + ] + force_matrix_poly = [ + f / number_frames if f is not None else None for f in force_matrix_poly + ] + torque_matrix_poly = [ + t / number_frames if t is not None else None for t in torque_matrix_poly + ] + # Do the entropy calculations for molecule_id in range(number_molecules): mol_container = self._get_molecule_container(reduced_atom, molecule_id) From 909772d919b642b4d9d18ad82c9183e28e2dfa99 Mon Sep 17 00:00:00 2001 From: harryswift01 Date: Thu, 24 Jul 2025 15:35:05 +0100 Subject: [PATCH 06/23] Fix timestep iteration by applying correct slicing to ensure the correct timestep is being used --- CodeEntropy/entropy.py | 53 ++++++++++++++++++------------------------ 1 file changed, 23 insertions(+), 30 deletions(-) diff --git a/CodeEntropy/entropy.py b/CodeEntropy/entropy.py index 1e51175..f31e182 100644 --- a/CodeEntropy/entropy.py +++ b/CodeEntropy/entropy.py @@ -93,7 +93,7 @@ def execute(self): states_res = [None for _ in range(number_molecules)] for timestep in reduced_atom.trajectory[start:end:step]: - # time_index = timestep.frame - start + time_index = timestep.frame - start for molecule_id in range(number_molecules): mol_container = self._get_molecule_container(reduced_atom, molecule_id) @@ -104,6 +104,7 @@ def execute(self): # Get matrices for vibrational entropy calculations if level == "united_atom": + mol_container.trajectory[time_index] for res_id, residue in enumerate(mol_container.residues): key = (molecule_id, res_id) @@ -114,6 +115,9 @@ def execute(self): f"{residue.atoms.indices[-1]}" ), ) + + res_container.trajectory[time_index] + force_matrix, torque_matrix = ( self._level_manager.get_matrices( res_container, @@ -129,6 +133,8 @@ def execute(self): torque_matrix_ua[key] = torque_matrix elif level == "residue": + mol_container.trajectory[time_index] + force_matrix, torque_matrix = self._level_manager.get_matrices( mol_container, level, @@ -141,6 +147,8 @@ def execute(self): torque_matrix_res[molecule_id] = torque_matrix elif level == "polymer": + mol_container.trajectory[time_index] + force_matrix, torque_matrix = self._level_manager.get_matrices( mol_container, level, @@ -233,6 +241,7 @@ def execute(self): torque_matrix_ua[key], states_ua[key], highest_level, + number_frames, ) elif level == "residue": @@ -246,7 +255,12 @@ def execute(self): highest_level, ) self._process_conformational_entropy( - molecule_id, mol_container, ce, level, states_res[molecule_id] + molecule_id, + mol_container, + ce, + level, + states_res[molecule_id], + number_frames, ) elif level == "polymer": @@ -348,6 +362,7 @@ def _process_united_atom_entropy( torque_matrix, states, highest, + number_frames, ): """ Calculates translational, rotational, and conformational entropy at the @@ -374,7 +389,7 @@ def _process_united_atom_entropy( torque_matrix, "torque", self._args.temperature, highest ) - S_conf_res = ce.conformational_entropy_calculation(mol_id, states) + S_conf_res = ce.conformational_entropy_calculation(states, number_frames) S_trans += S_trans_res S_rot += S_rot_res @@ -416,7 +431,6 @@ def _process_vibrational_entropy( highest (bool): Flag indicating if this is the highest granularity level. """ - force_matrix, torque_matrix = self.force_matrix, self.torque_matrix S_trans = ve.vibrational_entropy_calculation( force_matrix, "force", self._args.temperature, highest @@ -432,7 +446,9 @@ def _process_vibrational_entropy( residue.resname, level, "Rovibrational", S_rot ) - def _process_conformational_entropy(self, mol_id, mol_container, ce, level, states): + def _process_conformational_entropy( + self, mol_id, mol_container, ce, level, states, number_frames + ): """ Computes conformational entropy at the residue level (whole-molecule dihedral analysis). @@ -445,7 +461,7 @@ def _process_conformational_entropy(self, mol_id, mol_container, ce, level, stat start, end, step (int): Frame bounds. n_frames (int): Number of frames used. """ - S_conf = ce.conformational_entropy_calculation(states) + S_conf = ce.conformational_entropy_calculation(states, number_frames) residue = mol_container.residues[mol_id] self._data_logger.add_results_data( residue.resname, level, "Conformational", S_conf @@ -813,9 +829,7 @@ def assign_conformation( return conformations - def conformational_entropy_calculation( - self, data_container, dihedrals, bin_width, start, end, step, number_frames - ): + def conformational_entropy_calculation(self, states, number_frames): """ Function to calculate conformational entropies using eq. (7) in Higham, S.-Y. Chou, F. Gräter and R. H. Henchman, Molecular Physics, 2018, 116, @@ -834,27 +848,6 @@ def conformational_entropy_calculation( S_conf_total = 0 - # For each dihedral, identify the conformation in each frame - num_dihedrals = len(dihedrals) - conformation = np.zeros((num_dihedrals, number_frames)) - index = 0 - for dihedral in dihedrals: - conformation[index] = self.assign_conformation( - data_container, dihedral, number_frames, bin_width, start, end, step - ) - index += 1 - - logger.debug(f"Conformation matrix: {conformation}") - - # For each frame, convert the conformation of all dihedrals into a - # state string - states = ["" for x in range(number_frames)] - for frame_index in range(number_frames): - for index in range(num_dihedrals): - states[frame_index] += str(conformation[index][frame_index]) - - logger.debug(f"States: {states}") - # Count how many times each state occurs, then use the probability # to get the entropy # entropy = sum over states p*ln(p) From a267f47fd5b2fc0e82998dd6fd9884628b1ce04c Mon Sep 17 00:00:00 2001 From: harryswift01 Date: Thu, 24 Jul 2025 16:06:08 +0100 Subject: [PATCH 07/23] Refactor repeated for-loops into a unified loop to eliminate redundancy --- CodeEntropy/entropy.py | 70 ++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 37 deletions(-) diff --git a/CodeEntropy/entropy.py b/CodeEntropy/entropy.py index f31e182..305a33e 100644 --- a/CodeEntropy/entropy.py +++ b/CodeEntropy/entropy.py @@ -160,11 +160,30 @@ def execute(self): force_matrix_poly[molecule_id] = force_matrix torque_matrix_poly[molecule_id] = torque_matrix - # Get states for conformational entropy calculation bin_width = self._args.bin_width + + force_matrix_ua = {k: v / number_frames for k, v in force_matrix_ua.items()} + torque_matrix_ua = {k: v / number_frames for k, v in torque_matrix_ua.items()} + + force_matrix_res = [ + f / number_frames if f is not None else None for f in force_matrix_res + ] + torque_matrix_res = [ + t / number_frames if t is not None else None for t in torque_matrix_res + ] + force_matrix_poly = [ + f / number_frames if f is not None else None for f in force_matrix_poly + ] + torque_matrix_poly = [ + t / number_frames if t is not None else None for t in torque_matrix_poly + ] + + # Do the entropy calculations for molecule_id in range(number_molecules): mol_container = self._get_molecule_container(reduced_atom, molecule_id) for level in levels[molecule_id]: + highest_level = level == levels[molecule_id][-1] + if level == "united_atom": for res_id, residue in enumerate(mol_container.residues): key = (molecule_id, res_id) @@ -192,42 +211,6 @@ def execute(self): step, ) - elif level == "residue": - dihedrals = self._level_manager.get_dihedrals(mol_container, level) - for dihedral in dihedrals: - states_res[molecule_id] = ce.assign_conformation( - mol_container, - dihedral, - number_frames, - bin_width, - start, - end, - step, - ) - - force_matrix_ua = {k: v / number_frames for k, v in force_matrix_ua.items()} - torque_matrix_ua = {k: v / number_frames for k, v in torque_matrix_ua.items()} - - force_matrix_res = [ - f / number_frames if f is not None else None for f in force_matrix_res - ] - torque_matrix_res = [ - t / number_frames if t is not None else None for t in torque_matrix_res - ] - force_matrix_poly = [ - f / number_frames if f is not None else None for f in force_matrix_poly - ] - torque_matrix_poly = [ - t / number_frames if t is not None else None for t in torque_matrix_poly - ] - - # Do the entropy calculations - for molecule_id in range(number_molecules): - mol_container = self._get_molecule_container(reduced_atom, molecule_id) - for level in levels[molecule_id]: - highest_level = level == levels[molecule_id][-1] - - if level == "united_atom": for res_id, residue in enumerate(mol_container.residues): key = (molecule_id, res_id) self._process_united_atom_entropy( @@ -245,6 +228,19 @@ def execute(self): ) elif level == "residue": + + dihedrals = self._level_manager.get_dihedrals(mol_container, level) + for dihedral in dihedrals: + states_res[molecule_id] = ce.assign_conformation( + mol_container, + dihedral, + number_frames, + bin_width, + start, + end, + step, + ) + self._process_vibrational_entropy( molecule_id, mol_container, From 32142896949892cc3201fc0f925a96111f142319 Mon Sep 17 00:00:00 2001 From: harryswift01 Date: Mon, 28 Jul 2025 14:10:47 +0100 Subject: [PATCH 08/23] Fix indentation and matrix key usage in `_process_united_atom_entropy` - Ensured correct indentation to prevent excessive looping - Passed appropriate `force_matrix`, `torque_matrix`, and `states` within the loop of `_process_united_atom_entropy` - Used correct keys for accessing matrices to maintain consistency and accuracy --- CodeEntropy/entropy.py | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/CodeEntropy/entropy.py b/CodeEntropy/entropy.py index 305a33e..58e4d4a 100644 --- a/CodeEntropy/entropy.py +++ b/CodeEntropy/entropy.py @@ -211,21 +211,19 @@ def execute(self): step, ) - for res_id, residue in enumerate(mol_container.residues): - key = (molecule_id, res_id) - self._process_united_atom_entropy( - molecule_id, - mol_container, - res_id, - ve, - ce, - level, - force_matrix_ua[key], - torque_matrix_ua[key], - states_ua[key], - highest_level, - number_frames, - ) + self._process_united_atom_entropy( + molecule_id, + mol_container, + res_id, + ve, + ce, + level, + force_matrix_ua, + torque_matrix_ua, + states_ua, + highest_level, + number_frames, + ) elif level == "residue": @@ -378,14 +376,19 @@ def _process_united_atom_entropy( S_trans, S_rot, S_conf = 0, 0, 0 for residue_id, residue in enumerate(mol_container.residues): + + key = (mol_id, residue_id) + S_trans_res = ve.vibrational_entropy_calculation( - force_matrix, "force", self._args.temperature, highest + force_matrix[key], "force", self._args.temperature, highest ) S_rot_res = ve.vibrational_entropy_calculation( - torque_matrix, "torque", self._args.temperature, highest + torque_matrix[key], "torque", self._args.temperature, highest ) - S_conf_res = ce.conformational_entropy_calculation(states, number_frames) + S_conf_res = ce.conformational_entropy_calculation( + states[key], number_frames + ) S_trans += S_trans_res S_rot += S_rot_res @@ -457,7 +460,7 @@ def _process_conformational_entropy( start, end, step (int): Frame bounds. n_frames (int): Number of frames used. """ - S_conf = ce.conformational_entropy_calculation(states, number_frames) + S_conf = ce.conformational_entropy_calculation(states[mol_id], number_frames) residue = mol_container.residues[mol_id] self._data_logger.add_results_data( residue.resname, level, "Conformational", S_conf From 0ac02afdda6b4423adb0bb2b46f3dda552c968a0 Mon Sep 17 00:00:00 2001 From: harryswift01 Date: Mon, 28 Jul 2025 16:13:25 +0100 Subject: [PATCH 09/23] removal of duplicated timestep slice within the united atom level calculations --- CodeEntropy/entropy.py | 1 - 1 file changed, 1 deletion(-) diff --git a/CodeEntropy/entropy.py b/CodeEntropy/entropy.py index 58e4d4a..f2cd681 100644 --- a/CodeEntropy/entropy.py +++ b/CodeEntropy/entropy.py @@ -104,7 +104,6 @@ def execute(self): # Get matrices for vibrational entropy calculations if level == "united_atom": - mol_container.trajectory[time_index] for res_id, residue in enumerate(mol_container.residues): key = (molecule_id, res_id) From d23ec92ae32c4c4761726c3c2c9c9beb4365ea51 Mon Sep 17 00:00:00 2001 From: harryswift01 Date: Tue, 29 Jul 2025 10:15:54 +0100 Subject: [PATCH 10/23] Ensuring if the united atom isn't bonded, the axis are reproduceable within expected range --- CodeEntropy/levels.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/CodeEntropy/levels.py b/CodeEntropy/levels.py index 9f00487..849c1d3 100644 --- a/CodeEntropy/levels.py +++ b/CodeEntropy/levels.py @@ -375,15 +375,19 @@ def get_axes(self, data_container, level, index=0): f"not name H* and bonded index {index}" ) - # center at position of heavy atom - atom_group = data_container.select_atoms(f"index {index}") - center = atom_group.positions[0] + if len(atom_set) == 0: + # if no bonds to other residues use pricipal axes of residue + rot_axes = data_container.residues.principal_axes() + else: + # center at position of heavy atom + atom_group = data_container.select_atoms(f"index {index}") + center = atom_group.positions[0] - # get vector for average position of hydrogens - vector = self.get_avg_pos(atom_set, center) + # get vector for average position of hydrogens + vector = self.get_avg_pos(atom_set, center) - # use spherical coordinates function to get rotational axes - rot_axes = self.get_sphCoord_axes(vector) + # use spherical coordinates function to get rotational axes + rot_axes = self.get_sphCoord_axes(vector) logger.debug(f"Translational Axes: {trans_axes}") logger.debug(f"Rotational Axes: {rot_axes}") From ea381c06ade4b020cd38fbedd161b7cd0df740a1 Mon Sep 17 00:00:00 2001 From: skfegan Date: Tue, 29 Jul 2025 15:00:09 +0100 Subject: [PATCH 11/23] filtering matrices to avoid zero eigenvalues --- CodeEntropy/entropy.py | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/CodeEntropy/entropy.py b/CodeEntropy/entropy.py index f2cd681..555518e 100644 --- a/CodeEntropy/entropy.py +++ b/CodeEntropy/entropy.py @@ -92,6 +92,7 @@ def execute(self): states_ua = {} states_res = [None for _ in range(number_molecules)] + # Looping over timesteps to build the covariance matrices for timestep in reduced_atom.trajectory[start:end:step]: time_index = timestep.frame - start @@ -161,22 +162,6 @@ def execute(self): bin_width = self._args.bin_width - force_matrix_ua = {k: v / number_frames for k, v in force_matrix_ua.items()} - torque_matrix_ua = {k: v / number_frames for k, v in torque_matrix_ua.items()} - - force_matrix_res = [ - f / number_frames if f is not None else None for f in force_matrix_res - ] - torque_matrix_res = [ - t / number_frames if t is not None else None for t in torque_matrix_res - ] - force_matrix_poly = [ - f / number_frames if f is not None else None for f in force_matrix_poly - ] - torque_matrix_poly = [ - t / number_frames if t is not None else None for t in torque_matrix_poly - ] - # Do the entropy calculations for molecule_id in range(number_molecules): mol_container = self._get_molecule_container(reduced_atom, molecule_id) @@ -378,11 +363,19 @@ def _process_united_atom_entropy( key = (mol_id, residue_id) + f_matrix = force_matrix[key] + f_matrix = self._level_manager.filter_zero_rows_columns(f_matrix) + f_matrix = f_matrix / number_frames + + t_matrix = torque_matrix[key] + t_matrix = self._level_manager.filter_zero_rows_columns(t_matrix) + t_matrix = t_matrix / number_frames + S_trans_res = ve.vibrational_entropy_calculation( - force_matrix[key], "force", self._args.temperature, highest + f_matrix, "force", self._args.temperature, highest ) S_rot_res = ve.vibrational_entropy_calculation( - torque_matrix[key], "torque", self._args.temperature, highest + t_matrix, "torque", self._args.temperature, highest ) S_conf_res = ce.conformational_entropy_calculation( @@ -429,7 +422,11 @@ def _process_vibrational_entropy( highest (bool): Flag indicating if this is the highest granularity level. """ - + number_frames = len(mol_container.trajectory) + force_matrix = self._level_manager.filter_zero_rows_columns(force_matrix) + force_matrix = force_matrix / number_frames + torque_matrix = self._level_manager.filter_zero_rows_columns(torque_matrix) + torque_matrix = torque_matrix / number_frames S_trans = ve.vibrational_entropy_calculation( force_matrix, "force", self._args.temperature, highest ) From 3e17391e702712a2cdf118a2152926008fb0db94 Mon Sep 17 00:00:00 2001 From: skfegan Date: Tue, 29 Jul 2025 16:18:21 +0100 Subject: [PATCH 12/23] correcting conformational state string --- CodeEntropy/entropy.py | 42 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/CodeEntropy/entropy.py b/CodeEntropy/entropy.py index 555518e..99f2412 100644 --- a/CodeEntropy/entropy.py +++ b/CodeEntropy/entropy.py @@ -64,7 +64,6 @@ def execute(self): # Get reduced universe and initialize values reduced_atom = self._get_reduced_universe() number_molecules, levels = self._level_manager.select_levels(reduced_atom) - # number_residues = len(reduced_atom.residues) ve = VibrationalEntropy( self._run_manager, @@ -169,6 +168,7 @@ def execute(self): highest_level = level == levels[molecule_id][-1] if level == "united_atom": + # get the conformational states for res_id, residue in enumerate(mol_container.residues): key = (molecule_id, res_id) @@ -183,9 +183,15 @@ def execute(self): res_container, "not name H*" ) + # get dihedrals dihedrals = self._level_manager.get_dihedrals(heavy_res, level) + num_dihedrals = len(dihedrals) + + # assign conformation for each dihedral + conformation = np.zeros((num_dihedrals, number_frames)) + dihedral_index = 0 for dihedral in dihedrals: - states_ua[key] = ce.assign_conformation( + conformation[dihedral_index] = ce.assign_conformation( heavy_res, dihedral, number_frames, @@ -194,7 +200,17 @@ def execute(self): end, step, ) + dihedral_index += 1 + + # concatenate conformations into state string + states_ua[key] = ["" for x in range(number_frames)] + for frame_index in range(number_frames): + for dihedral_index in range(num_dihedrals): + states_ua[key][frame_index] += str(conformation[dihedral_index][frame_index]) + logger.debug(f"States UA {states_ua[key]}") + + # Calculate the united atom entropy self._process_united_atom_entropy( molecule_id, mol_container, @@ -211,9 +227,15 @@ def execute(self): elif level == "residue": + # get dihedrals dihedrals = self._level_manager.get_dihedrals(mol_container, level) + num_dihedrals = len(dihedrals) + + # for each dihedral assign conformation + conformation = np.zeros((num_dihedrals, number_frames)) + dihedral_index = 0 for dihedral in dihedrals: - states_res[molecule_id] = ce.assign_conformation( + conformation[dihedral_index] = ce.assign_conformation( mol_container, dihedral, number_frames, @@ -222,6 +244,15 @@ def execute(self): end, step, ) + dihedral_index += 1 + + # concatenate conformations into state string + states_res = ["" for x in range(number_frames)] + for frame_index in range(number_frames): + for dihedral_index in range(num_dihedrals): + states_res[frame_index] += str(conformation[dihedral_index][frame_index]) + logger.debug(f"States UA {states_res}") + self._process_vibrational_entropy( molecule_id, @@ -237,7 +268,7 @@ def execute(self): mol_container, ce, level, - states_res[molecule_id], + states_res, number_frames, ) @@ -423,10 +454,13 @@ def _process_vibrational_entropy( level. """ number_frames = len(mol_container.trajectory) + force_matrix = self._level_manager.filter_zero_rows_columns(force_matrix) force_matrix = force_matrix / number_frames + torque_matrix = self._level_manager.filter_zero_rows_columns(torque_matrix) torque_matrix = torque_matrix / number_frames + S_trans = ve.vibrational_entropy_calculation( force_matrix, "force", self._args.temperature, highest ) From 41834982ec7e19a91050a6e868ca57426620f464 Mon Sep 17 00:00:00 2001 From: harryswift01 Date: Tue, 29 Jul 2025 16:51:26 +0100 Subject: [PATCH 13/23] run pre-commit on all files to ensure code consistency --- CodeEntropy/entropy.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/CodeEntropy/entropy.py b/CodeEntropy/entropy.py index 99f2412..6e3f74d 100644 --- a/CodeEntropy/entropy.py +++ b/CodeEntropy/entropy.py @@ -201,15 +201,16 @@ def execute(self): step, ) dihedral_index += 1 - + # concatenate conformations into state string states_ua[key] = ["" for x in range(number_frames)] for frame_index in range(number_frames): for dihedral_index in range(num_dihedrals): - states_ua[key][frame_index] += str(conformation[dihedral_index][frame_index]) + states_ua[key][frame_index] += str( + conformation[dihedral_index][frame_index] + ) logger.debug(f"States UA {states_ua[key]}") - # Calculate the united atom entropy self._process_united_atom_entropy( molecule_id, @@ -250,10 +251,11 @@ def execute(self): states_res = ["" for x in range(number_frames)] for frame_index in range(number_frames): for dihedral_index in range(num_dihedrals): - states_res[frame_index] += str(conformation[dihedral_index][frame_index]) + states_res[frame_index] += str( + conformation[dihedral_index][frame_index] + ) logger.debug(f"States UA {states_res}") - self._process_vibrational_entropy( molecule_id, mol_container, @@ -454,10 +456,10 @@ def _process_vibrational_entropy( level. """ number_frames = len(mol_container.trajectory) - + force_matrix = self._level_manager.filter_zero_rows_columns(force_matrix) force_matrix = force_matrix / number_frames - + torque_matrix = self._level_manager.filter_zero_rows_columns(torque_matrix) torque_matrix = torque_matrix / number_frames From 549891671273220527859fec45801164907c3f17 Mon Sep 17 00:00:00 2001 From: harryswift01 Date: Wed, 30 Jul 2025 11:25:11 +0100 Subject: [PATCH 14/23] Refactor timestep entropy computation workflow into modular, maintainable components: - Extracted dihedral conformation logic into a new helper method. - Modularized the `_compute_entropies` method to delegate level-specific logic cleanly, including: - United atom level: per-residue dihedral state assignment and entropy processing. - Residue level: molecule-wide dihedral state assignment and entropy processing. - Polymer level: vibrational entropy only. - Introduced `_initialize_molecules`, `_build_covariance_matrices`, and `_update_force_torque_matrices` to encapsulate setup logic and matrix construction. - Implemented `_handle_water_entropy` to isolate water-specific entropy logic and update selection strings accordingly. - Ensured all new methods follow consistent naming and parameter conventions. --- CodeEntropy/entropy.py | 495 +++++++++++++++++++++++++---------------- 1 file changed, 304 insertions(+), 191 deletions(-) diff --git a/CodeEntropy/entropy.py b/CodeEntropy/entropy.py index 6e3f74d..620b55a 100644 --- a/CodeEntropy/entropy.py +++ b/CodeEntropy/entropy.py @@ -36,35 +36,284 @@ def __init__(self, run_manager, args, universe, data_logger, level_manager): def execute(self): """ - Executes the full entropy computation workflow over selected molecules and - levels. This includes both vibrational and conformational entropy, recorded - per molecule and residue. + Run the full entropy computation workflow. + + This method orchestrates the entire entropy analysis pipeline, including: + - Handling water entropy if present. + - Initializing molecular structures and levels. + - Building force and torque covariance matrices. + - Computing vibrational and conformational entropies. + - Finalizing and logging results. """ start, end, step = self._get_trajectory_bounds() number_frames = self._get_number_frames(start, end, step) + self._handle_water_entropy(start, end, step) + reduced_atom, number_molecules, levels = self._initialize_molecules() + + force_matrices, torque_matrices, states_ua, states_res = ( + self._build_covariance_matrices( + reduced_atom, number_molecules, levels, start, end, step, number_frames + ) + ) + + self._compute_entropies( + reduced_atom, + number_molecules, + levels, + force_matrices, + torque_matrices, + states_ua, + states_res, + number_frames, + start, + end, + step, + ) + + self._finalize_molecule_results() + self._data_logger.log_tables() + + def _handle_water_entropy(self, start, end, step): + """ + Compute and exclude water entropy from the system if applicable. + + If water molecules are present and water entropy calculation is enabled, + this method computes their entropy and updates the selection string to + exclude water from further analysis. + + Parameters: + start (int): Start frame index. + end (int): End frame index. + step (int): Step size for frame iteration. + """ has_water = self._universe.select_atoms("water").n_atoms > 0 if has_water and self._args.water_entropy: self._calculate_water_entropy(self._universe, start, end, step) - - if self._args.selection_string != "all": - self._args.selection_string += " and not water" - else: - self._args.selection_string = "not water" - + self._args.selection_string = ( + self._args.selection_string + " and not water" + if self._args.selection_string != "all" + else "not water" + ) logger.debug( - "WaterEntropy: molecule_data: %s", - self._data_logger.molecule_data, + "WaterEntropy: molecule_data: %s", self._data_logger.molecule_data ) logger.debug( - "WaterEntropy: residue_data: %s", - self._data_logger.residue_data, + "WaterEntropy: residue_data: %s", self._data_logger.residue_data ) - # Get reduced universe and initialize values + def _initialize_molecules(self): + """ + Prepare the reduced universe and determine molecule-level configurations. + + Returns: + tuple: A tuple containing: + - reduced_atom (Universe): The reduced atom selection. + - number_molecules (int): Number of molecules in the system. + - levels (list): List of entropy levels per molecule. + """ reduced_atom = self._get_reduced_universe() number_molecules, levels = self._level_manager.select_levels(reduced_atom) + return reduced_atom, number_molecules, levels + + def _build_covariance_matrices( + self, reduced_atom, number_molecules, levels, start, end, step, number_frames + ): + """ + Construct force and torque covariance matrices for all molecules and levels. + + Parameters: + reduced_atom (Universe): The reduced atom selection. + number_molecules (int): Number of molecules in the system. + levels (list): List of entropy levels per molecule. + start (int): Start frame index. + end (int): End frame index. + step (int): Step size for frame iteration. + number_frames (int): Total number of frames to process. + + Returns: + tuple: A tuple containing: + - force_matrices (dict): Force covariance matrices by level. + - torque_matrices (dict): Torque covariance matrices by level. + - states_ua (dict): Conformational states at the united-atom level. + - states_res (list): Conformational states at the residue level. + """ + force_matrices = { + "ua": {}, + "res": [None] * number_molecules, + "poly": [None] * number_molecules, + } + torque_matrices = { + "ua": {}, + "res": [None] * number_molecules, + "poly": [None] * number_molecules, + } + states_ua = {} + states_res = [None] * number_molecules + + for timestep in reduced_atom.trajectory[start:end:step]: + time_index = timestep.frame - start + + for mol_id in range(number_molecules): + mol = self._get_molecule_container(reduced_atom, mol_id) + for level in levels[mol_id]: + self._update_force_torque_matrices( + mol, + mol_id, + level, + levels[mol_id], + time_index, + number_frames, + force_matrices, + torque_matrices, + ) + + return force_matrices, torque_matrices, states_ua, states_res + + def _update_force_torque_matrices( + self, + mol, + mol_id, + level, + level_list, + time_index, + num_frames, + force_matrices, + torque_matrices, + ): + """ + Update force and torque matrices for a given molecule and entropy level. + + Parameters: + mol (AtomGroup): The molecule to process. + mol_id (int): Index of the molecule. + level (str): Current entropy level ("united_atom", "residue", or "polymer"). + level_list (list): List of levels for the molecule. + time_index (int): Index of the current frame. + num_frames (int): Total number of frames. + force_matrices (dict): Dictionary of force matrices to update. + torque_matrices (dict): Dictionary of torque matrices to update. + """ + highest = level == level_list[-1] + + if level == "united_atom": + for res_id, residue in enumerate(mol.residues): + key = (mol_id, res_id) + res = self._run_manager.new_U_select_atom( + mol, f"index {residue.atoms.indices[0]}:{residue.atoms.indices[-1]}" + ) + res.trajectory[time_index] + + f_mat, t_mat = self._level_manager.get_matrices( + res, + level, + num_frames, + highest, + force_matrices["ua"].get(key), + torque_matrices["ua"].get(key), + ) + force_matrices["ua"][key] = f_mat + torque_matrices["ua"][key] = t_mat + + elif level in ["residue", "polymer"]: + mol.trajectory[time_index] + key = "res" if level == "residue" else "poly" + f_mat, t_mat = self._level_manager.get_matrices( + mol, + level, + num_frames, + highest, + force_matrices[key][mol_id], + torque_matrices[key][mol_id], + ) + force_matrices[key][mol_id] = f_mat + torque_matrices[key][mol_id] = t_mat + + def _compute_dihedral_conformations( + self, + selector, + level, + ce, + number_frames, + bin_width, + start, + end, + step, + ): + """ + Compute dihedral conformations for a given selector and entropy level. + + Parameters: + selector (AtomGroup): Atom selection to compute dihedrals for. + level (str): Entropy level ("united_atom" or "residue"). + ce (ConformationalEntropy): Conformational entropy calculator. + number_frames (int): Number of frames to process. + bin_width (float): Bin width for dihedral angle discretization. + start (int): Start frame index. + end (int): End frame index. + step (int): Step size for frame iteration. + + Returns: + tuple: A tuple containing: + - states (list): List of conformation strings per frame. + - dihedrals (list): List of dihedral angle definitions. + """ + dihedrals = self._level_manager.get_dihedrals(selector, level) + num_dihedrals = len(dihedrals) + + conformation = np.zeros((num_dihedrals, number_frames)) + for i, dihedral in enumerate(dihedrals): + conformation[i] = ce.assign_conformation( + selector, dihedral, number_frames, bin_width, start, end, step + ) + states = [ + "".join(str(int(conformation[d][f])) for d in range(num_dihedrals)) + for f in range(number_frames) + ] + + return states, dihedrals + + def _compute_entropies( + self, + reduced_atom, + number_molecules, + levels, + force_matrices, + torque_matrices, + states_ua, + states_res, + number_frames, + start, + end, + step, + ): + """ + Compute vibrational and conformational entropies for all molecules and levels. + + This method iterates over each molecule and its associated entropy levels + (united_atom, residue, polymer), computing the corresponding entropy + contributions using force/torque matrices and dihedral conformations. + + For each level: + - "united_atom": Computes per-residue conformational states and entropy. + - "residue": Computes molecule-level conformational and vibrational entropy. + - "polymer": Computes only vibrational entropy. + + Parameters: + reduced_atom (Universe): The reduced atom selection from the trajectory. + number_molecules (int): Number of molecules in the system. + levels (list): List of entropy levels per molecule. + force_matrices (dict): Precomputed force covariance matrices. + torque_matrices (dict): Precomputed torque covariance matrices. + states_ua (dict): Dictionary to store united-atom conformational states. + states_res (list): List to store residue-level conformational states. + number_frames (int): Total number of trajectory frames to process. + start (int): Start frame index. + end (int): End frame index. + step (int): Step size for frame iteration. + """ + bin_width = self._args.bin_width ve = VibrationalEntropy( self._run_manager, self._args, @@ -80,194 +329,66 @@ def execute(self): self._level_manager, ) - # Initialise force and torque matrices - force_matrix_ua = {} - torque_matrix_ua = {} - force_matrix_res = [None for _ in range(number_molecules)] - torque_matrix_res = [None for _ in range(number_molecules)] - force_matrix_poly = [None for _ in range(number_molecules)] - torque_matrix_poly = [None for _ in range(number_molecules)] - - states_ua = {} - states_res = [None for _ in range(number_molecules)] - - # Looping over timesteps to build the covariance matrices - for timestep in reduced_atom.trajectory[start:end:step]: - time_index = timestep.frame - start - - for molecule_id in range(number_molecules): - mol_container = self._get_molecule_container(reduced_atom, molecule_id) - - for level in levels[molecule_id]: - # Identify if level is the highest (molecule) level - highest_level = level == levels[molecule_id][-1] - - # Get matrices for vibrational entropy calculations - if level == "united_atom": - for res_id, residue in enumerate(mol_container.residues): - key = (molecule_id, res_id) - - res_container = self._run_manager.new_U_select_atom( - mol_container, - ( - f"index {residue.atoms.indices[0]}:" - f"{residue.atoms.indices[-1]}" - ), - ) - - res_container.trajectory[time_index] - - force_matrix, torque_matrix = ( - self._level_manager.get_matrices( - res_container, - level, - number_frames, - highest_level, - force_matrix_ua.get(key), - torque_matrix_ua.get(key), - ) - ) - - force_matrix_ua[key] = force_matrix - torque_matrix_ua[key] = torque_matrix - - elif level == "residue": - mol_container.trajectory[time_index] - - force_matrix, torque_matrix = self._level_manager.get_matrices( - mol_container, - level, - number_frames, - highest_level, - force_matrix_res[molecule_id], - torque_matrix_res[molecule_id], - ) - force_matrix_res[molecule_id] = force_matrix - torque_matrix_res[molecule_id] = torque_matrix - - elif level == "polymer": - mol_container.trajectory[time_index] - - force_matrix, torque_matrix = self._level_manager.get_matrices( - mol_container, - level, - number_frames, - highest_level, - force_matrix_poly[molecule_id], - torque_matrix_poly[molecule_id], - ) - force_matrix_poly[molecule_id] = force_matrix - torque_matrix_poly[molecule_id] = torque_matrix - - bin_width = self._args.bin_width - - # Do the entropy calculations - for molecule_id in range(number_molecules): - mol_container = self._get_molecule_container(reduced_atom, molecule_id) - for level in levels[molecule_id]: - highest_level = level == levels[molecule_id][-1] + for mol_id in range(number_molecules): + mol = self._get_molecule_container(reduced_atom, mol_id) + for level in levels[mol_id]: + highest = level == levels[mol_id][-1] if level == "united_atom": - # get the conformational states - for res_id, residue in enumerate(mol_container.residues): - key = (molecule_id, res_id) - + for res_id, residue in enumerate(mol.residues): + key = (mol_id, res_id) res_container = self._run_manager.new_U_select_atom( - mol_container, - ( - f"index {residue.atoms.indices[0]}:" - f"{residue.atoms.indices[-1]}" - ), + mol, + f"index {residue.atoms.indices[0]}:" + f"{residue.atoms.indices[-1]}", ) heavy_res = self._run_manager.new_U_select_atom( res_container, "not name H*" ) - # get dihedrals - dihedrals = self._level_manager.get_dihedrals(heavy_res, level) - num_dihedrals = len(dihedrals) - - # assign conformation for each dihedral - conformation = np.zeros((num_dihedrals, number_frames)) - dihedral_index = 0 - for dihedral in dihedrals: - conformation[dihedral_index] = ce.assign_conformation( - heavy_res, - dihedral, - number_frames, - bin_width, - start, - end, - step, - ) - dihedral_index += 1 - - # concatenate conformations into state string - states_ua[key] = ["" for x in range(number_frames)] - for frame_index in range(number_frames): - for dihedral_index in range(num_dihedrals): - states_ua[key][frame_index] += str( - conformation[dihedral_index][frame_index] - ) - logger.debug(f"States UA {states_ua[key]}") - - # Calculate the united atom entropy + states_ua[key], _ = self._compute_dihedral_conformations( + heavy_res, + level, + ce, + number_frames, + bin_width, + start, + end, + step, + ) + self._process_united_atom_entropy( - molecule_id, - mol_container, + mol_id, + mol, res_id, ve, ce, level, - force_matrix_ua, - torque_matrix_ua, + force_matrices["ua"], + torque_matrices["ua"], states_ua, - highest_level, + highest, number_frames, ) elif level == "residue": - - # get dihedrals - dihedrals = self._level_manager.get_dihedrals(mol_container, level) - num_dihedrals = len(dihedrals) - - # for each dihedral assign conformation - conformation = np.zeros((num_dihedrals, number_frames)) - dihedral_index = 0 - for dihedral in dihedrals: - conformation[dihedral_index] = ce.assign_conformation( - mol_container, - dihedral, - number_frames, - bin_width, - start, - end, - step, - ) - dihedral_index += 1 - - # concatenate conformations into state string - states_res = ["" for x in range(number_frames)] - for frame_index in range(number_frames): - for dihedral_index in range(num_dihedrals): - states_res[frame_index] += str( - conformation[dihedral_index][frame_index] - ) - logger.debug(f"States UA {states_res}") + states_res, _ = self._compute_dihedral_conformations( + mol, level, ce, number_frames, bin_width, start, end, step + ) self._process_vibrational_entropy( - molecule_id, - mol_container, + mol_id, + mol, ve, level, - force_matrix_res[molecule_id], - torque_matrix_res[molecule_id], - highest_level, + force_matrices["res"][mol_id], + torque_matrices["res"][mol_id], + highest, ) + self._process_conformational_entropy( - molecule_id, - mol_container, + mol_id, + mol, ce, level, states_res, @@ -276,23 +397,15 @@ def execute(self): elif level == "polymer": self._process_vibrational_entropy( - molecule_id, - mol_container, + mol_id, + mol, ve, level, - force_matrix_poly[molecule_id], - torque_matrix_poly[molecule_id], - highest_level, + force_matrices["poly"][mol_id], + torque_matrices["poly"][mol_id], + highest, ) - # TODO when orientational entropy function is ready - # if highest_level: - # self._process_orientational_entropy() - - self._finalize_molecule_results() - - self._data_logger.log_tables() - def _get_trajectory_bounds(self): """ Returns the start, end, and step frame indices based on input arguments. From d6cbdc1ad1a8b924b30c2ca33be9791c3e58115f Mon Sep 17 00:00:00 2001 From: harryswift01 Date: Wed, 30 Jul 2025 13:05:20 +0100 Subject: [PATCH 15/23] Further refinements to the per timestep refactor, relocating functions to `LevelManager` - Relocating the following functions as they deal more intrinsically with the levels rather than the entropy calculations - Relocated `compute_dihedral_conformations` to `LevelManager` - Relocated `build_covariance_matrices` to `LevelManager` - Relocated `update_force_torque_matrices` to `LevelManager` --- CodeEntropy/entropy.py | 191 +++++------------------------------------ CodeEntropy/levels.py | 170 ++++++++++++++++++++++++++++++++++++ 2 files changed, 191 insertions(+), 170 deletions(-) diff --git a/CodeEntropy/entropy.py b/CodeEntropy/entropy.py index 620b55a..b789285 100644 --- a/CodeEntropy/entropy.py +++ b/CodeEntropy/entropy.py @@ -52,8 +52,15 @@ def execute(self): reduced_atom, number_molecules, levels = self._initialize_molecules() force_matrices, torque_matrices, states_ua, states_res = ( - self._build_covariance_matrices( - reduced_atom, number_molecules, levels, start, end, step, number_frames + self._level_manager.build_covariance_matrices( + self, + reduced_atom, + number_molecules, + levels, + start, + end, + step, + number_frames, ) ) @@ -116,164 +123,6 @@ def _initialize_molecules(self): number_molecules, levels = self._level_manager.select_levels(reduced_atom) return reduced_atom, number_molecules, levels - def _build_covariance_matrices( - self, reduced_atom, number_molecules, levels, start, end, step, number_frames - ): - """ - Construct force and torque covariance matrices for all molecules and levels. - - Parameters: - reduced_atom (Universe): The reduced atom selection. - number_molecules (int): Number of molecules in the system. - levels (list): List of entropy levels per molecule. - start (int): Start frame index. - end (int): End frame index. - step (int): Step size for frame iteration. - number_frames (int): Total number of frames to process. - - Returns: - tuple: A tuple containing: - - force_matrices (dict): Force covariance matrices by level. - - torque_matrices (dict): Torque covariance matrices by level. - - states_ua (dict): Conformational states at the united-atom level. - - states_res (list): Conformational states at the residue level. - """ - force_matrices = { - "ua": {}, - "res": [None] * number_molecules, - "poly": [None] * number_molecules, - } - torque_matrices = { - "ua": {}, - "res": [None] * number_molecules, - "poly": [None] * number_molecules, - } - states_ua = {} - states_res = [None] * number_molecules - - for timestep in reduced_atom.trajectory[start:end:step]: - time_index = timestep.frame - start - - for mol_id in range(number_molecules): - mol = self._get_molecule_container(reduced_atom, mol_id) - for level in levels[mol_id]: - self._update_force_torque_matrices( - mol, - mol_id, - level, - levels[mol_id], - time_index, - number_frames, - force_matrices, - torque_matrices, - ) - - return force_matrices, torque_matrices, states_ua, states_res - - def _update_force_torque_matrices( - self, - mol, - mol_id, - level, - level_list, - time_index, - num_frames, - force_matrices, - torque_matrices, - ): - """ - Update force and torque matrices for a given molecule and entropy level. - - Parameters: - mol (AtomGroup): The molecule to process. - mol_id (int): Index of the molecule. - level (str): Current entropy level ("united_atom", "residue", or "polymer"). - level_list (list): List of levels for the molecule. - time_index (int): Index of the current frame. - num_frames (int): Total number of frames. - force_matrices (dict): Dictionary of force matrices to update. - torque_matrices (dict): Dictionary of torque matrices to update. - """ - highest = level == level_list[-1] - - if level == "united_atom": - for res_id, residue in enumerate(mol.residues): - key = (mol_id, res_id) - res = self._run_manager.new_U_select_atom( - mol, f"index {residue.atoms.indices[0]}:{residue.atoms.indices[-1]}" - ) - res.trajectory[time_index] - - f_mat, t_mat = self._level_manager.get_matrices( - res, - level, - num_frames, - highest, - force_matrices["ua"].get(key), - torque_matrices["ua"].get(key), - ) - force_matrices["ua"][key] = f_mat - torque_matrices["ua"][key] = t_mat - - elif level in ["residue", "polymer"]: - mol.trajectory[time_index] - key = "res" if level == "residue" else "poly" - f_mat, t_mat = self._level_manager.get_matrices( - mol, - level, - num_frames, - highest, - force_matrices[key][mol_id], - torque_matrices[key][mol_id], - ) - force_matrices[key][mol_id] = f_mat - torque_matrices[key][mol_id] = t_mat - - def _compute_dihedral_conformations( - self, - selector, - level, - ce, - number_frames, - bin_width, - start, - end, - step, - ): - """ - Compute dihedral conformations for a given selector and entropy level. - - Parameters: - selector (AtomGroup): Atom selection to compute dihedrals for. - level (str): Entropy level ("united_atom" or "residue"). - ce (ConformationalEntropy): Conformational entropy calculator. - number_frames (int): Number of frames to process. - bin_width (float): Bin width for dihedral angle discretization. - start (int): Start frame index. - end (int): End frame index. - step (int): Step size for frame iteration. - - Returns: - tuple: A tuple containing: - - states (list): List of conformation strings per frame. - - dihedrals (list): List of dihedral angle definitions. - """ - dihedrals = self._level_manager.get_dihedrals(selector, level) - num_dihedrals = len(dihedrals) - - conformation = np.zeros((num_dihedrals, number_frames)) - for i, dihedral in enumerate(dihedrals): - conformation[i] = ce.assign_conformation( - selector, dihedral, number_frames, bin_width, start, end, step - ) - - states = [ - "".join(str(int(conformation[d][f])) for d in range(num_dihedrals)) - for f in range(number_frames) - ] - - return states, dihedrals - def _compute_entropies( self, reduced_atom, @@ -346,15 +195,17 @@ def _compute_entropies( res_container, "not name H*" ) - states_ua[key], _ = self._compute_dihedral_conformations( - heavy_res, - level, - ce, - number_frames, - bin_width, - start, - end, - step, + states_ua[key] = ( + self._level_manager.compute_dihedral_conformations( + heavy_res, + level, + ce, + number_frames, + bin_width, + start, + end, + step, + ) ) self._process_united_atom_entropy( @@ -372,7 +223,7 @@ def _compute_entropies( ) elif level == "residue": - states_res, _ = self._compute_dihedral_conformations( + states_res = self._level_manager.compute_dihedral_conformations( mol, level, ce, number_frames, bin_width, start, end, step ) diff --git a/CodeEntropy/levels.py b/CodeEntropy/levels.py index 849c1d3..7c18b37 100644 --- a/CodeEntropy/levels.py +++ b/CodeEntropy/levels.py @@ -261,6 +261,51 @@ def get_dihedrals(self, data_container, level): return dihedrals + def compute_dihedral_conformations( + self, + selector, + level, + ce, + number_frames, + bin_width, + start, + end, + step, + ): + """ + Compute dihedral conformations for a given selector and entropy level. + + Parameters: + selector (AtomGroup): Atom selection to compute dihedrals for. + level (str): Entropy level ("united_atom" or "residue"). + ce (ConformationalEntropy): Conformational entropy calculator. + number_frames (int): Number of frames to process. + bin_width (float): Bin width for dihedral angle discretization. + start (int): Start frame index. + end (int): End frame index. + step (int): Step size for frame iteration. + + Returns: + tuple: A tuple containing: + - states (list): List of conformation strings per frame. + - dihedrals (list): List of dihedral angle definitions. + """ + dihedrals = self.get_dihedrals(selector, level) + num_dihedrals = len(dihedrals) + + conformation = np.zeros((num_dihedrals, number_frames)) + for i, dihedral in enumerate(dihedrals): + conformation[i] = ce.assign_conformation( + selector, dihedral, number_frames, bin_width, start, end, step + ) + + states = [ + "".join(str(int(conformation[d][f])) for d in range(num_dihedrals)) + for f in range(number_frames) + ] + + return states + def get_beads(self, data_container, level): """ Function to define beads depending on the level in the hierarchy. @@ -677,6 +722,131 @@ def create_submatrix(self, data_i, data_j, number_frames): return submatrix + def build_covariance_matrices( + self, + entropy_manager, + reduced_atom, + number_molecules, + levels, + start, + end, + step, + number_frames, + ): + """ + Construct force and torque covariance matrices for all molecules and levels. + + Parameters: + entropy_manager (EntropyManager): Instance of the EntropyManager + reduced_atom (Universe): The reduced atom selection. + number_molecules (int): Number of molecules in the system. + levels (list): List of entropy levels per molecule. + start (int): Start frame index. + end (int): End frame index. + step (int): Step size for frame iteration. + number_frames (int): Total number of frames to process. + + Returns: + tuple: A tuple containing: + - force_matrices (dict): Force covariance matrices by level. + - torque_matrices (dict): Torque covariance matrices by level. + - states_ua (dict): Conformational states at the united-atom level. + - states_res (list): Conformational states at the residue level. + """ + force_matrices = { + "ua": {}, + "res": [None] * number_molecules, + "poly": [None] * number_molecules, + } + torque_matrices = { + "ua": {}, + "res": [None] * number_molecules, + "poly": [None] * number_molecules, + } + states_ua = {} + states_res = [None] * number_molecules + + for timestep in reduced_atom.trajectory[start:end:step]: + time_index = timestep.frame - start + + for mol_id in range(number_molecules): + mol = entropy_manager._get_molecule_container(reduced_atom, mol_id) + for level in levels[mol_id]: + self.update_force_torque_matrices( + entropy_manager, + mol, + mol_id, + level, + levels[mol_id], + time_index, + number_frames, + force_matrices, + torque_matrices, + ) + + return force_matrices, torque_matrices, states_ua, states_res + + def update_force_torque_matrices( + self, + entropy_manager, + mol, + mol_id, + level, + level_list, + time_index, + num_frames, + force_matrices, + torque_matrices, + ): + """ + Update force and torque matrices for a given molecule and entropy level. + + Parameters: + entropy_manager (EntropyManager): Instance of the EntropyManager + mol (AtomGroup): The molecule to process. + mol_id (int): Index of the molecule. + level (str): Current entropy level ("united_atom", "residue", or "polymer"). + level_list (list): List of levels for the molecule. + time_index (int): Index of the current frame. + num_frames (int): Total number of frames. + force_matrices (dict): Dictionary of force matrices to update. + torque_matrices (dict): Dictionary of torque matrices to update. + """ + highest = level == level_list[-1] + + if level == "united_atom": + for res_id, residue in enumerate(mol.residues): + key = (mol_id, res_id) + res = entropy_manager._run_manager.new_U_select_atom( + mol, f"index {residue.atoms.indices[0]}:{residue.atoms.indices[-1]}" + ) + res.trajectory[time_index] + + f_mat, t_mat = self.get_matrices( + res, + level, + num_frames, + highest, + force_matrices["ua"].get(key), + torque_matrices["ua"].get(key), + ) + force_matrices["ua"][key] = f_mat + torque_matrices["ua"][key] = t_mat + + elif level in ["residue", "polymer"]: + mol.trajectory[time_index] + key = "res" if level == "residue" else "poly" + f_mat, t_mat = self.get_matrices( + mol, + level, + num_frames, + highest, + force_matrices[key][mol_id], + torque_matrices[key][mol_id], + ) + force_matrices[key][mol_id] = f_mat + torque_matrices[key][mol_id] = t_mat + def filter_zero_rows_columns(self, arg_matrix): """ function for removing rows and columns that contain only zeros from a matrix From 80ced0c5cfe80e3d480e74a0609ce0b88f29c7a6 Mon Sep 17 00:00:00 2001 From: harryswift01 Date: Wed, 30 Jul 2025 15:28:17 +0100 Subject: [PATCH 16/23] fixing and adding additional test cases for the per-timestep switch over --- CodeEntropy/entropy.py | 2 - CodeEntropy/levels.py | 4 +- tests/test_CodeEntropy/test_entropy.py | 291 ++++++++++++++++--------- tests/test_CodeEntropy/test_levels.py | 146 +++++++++++-- 4 files changed, 316 insertions(+), 127 deletions(-) diff --git a/CodeEntropy/entropy.py b/CodeEntropy/entropy.py index b789285..9177eca 100644 --- a/CodeEntropy/entropy.py +++ b/CodeEntropy/entropy.py @@ -211,7 +211,6 @@ def _compute_entropies( self._process_united_atom_entropy( mol_id, mol, - res_id, ve, ce, level, @@ -329,7 +328,6 @@ def _process_united_atom_entropy( self, mol_id, mol_container, - res_id, ve, ce, level, diff --git a/CodeEntropy/levels.py b/CodeEntropy/levels.py index 7c18b37..c79c68f 100644 --- a/CodeEntropy/levels.py +++ b/CodeEntropy/levels.py @@ -377,7 +377,7 @@ def get_axes(self, data_container, level, index=0): trans_axes = data_container.atoms.principal_axes() rot_axes = data_container.atoms.principal_axes() - if level == "residue": + elif level == "residue": # Translation # for residues use principal axes of whole molecule for translation trans_axes = data_container.atoms.principal_axes() @@ -409,7 +409,7 @@ def get_axes(self, data_container, level, index=0): # use spherical coordinates function to get rotational axes rot_axes = self.get_sphCoord_axes(vector) - if level == "united_atom": + elif level == "united_atom": # Translation # for united atoms use principal axes of residue for translation trans_axes = data_container.residues.principal_axes() diff --git a/tests/test_CodeEntropy/test_entropy.py b/tests/test_CodeEntropy/test_entropy.py index dd31cf6..0043a71 100644 --- a/tests/test_CodeEntropy/test_entropy.py +++ b/tests/test_CodeEntropy/test_entropy.py @@ -67,63 +67,60 @@ def test_execute_full_workflow(self): run_manager, args, u, data_logger, level_manager ) + # Mock internal methods entropy_manager._get_trajectory_bounds = MagicMock(return_value=(0, 10, 1)) entropy_manager._get_number_frames = MagicMock(return_value=11) - entropy_manager._get_reduced_universe = MagicMock( - return_value="reduced_universe" + entropy_manager._handle_water_entropy = MagicMock() + entropy_manager._initialize_molecules = MagicMock( + return_value=("reduced_atom", 3, [["united_atom", "polymer", "residue"]]) ) - entropy_manager._get_molecule_container = MagicMock( - return_value=MagicMock(residues=[1, 2, 3]) + entropy_manager._level_manager.build_covariance_matrices = MagicMock( + return_value=( + "force_matrices", + "torque_matrices", + "states_ua", + "states_res", + ) ) + entropy_manager._compute_entropies = MagicMock() entropy_manager._finalize_molecule_results = MagicMock() entropy_manager._data_logger.log_tables = MagicMock() - entropy_manager._level_manager.select_levels = MagicMock( - return_value=(1, [["united_atom", "polymer", "residue"]]) - ) - - # Patch entropy classes and processing methods - ve = MagicMock() - ce = MagicMock() - with ( - patch("CodeEntropy.entropy.VibrationalEntropy", return_value=ve), - patch("CodeEntropy.entropy.ConformationalEntropy", return_value=ce), - ): - - entropy_manager._process_united_atom_level = MagicMock( - side_effect=lambda *args, **kwargs: data_logger.add_results_data( - "A", "united_atom", "Conformational", 1.0 - ) - ) - entropy_manager._process_vibrational_only_levels = MagicMock( - side_effect=lambda *args, **kwargs: data_logger.add_results_data( - "A", "polymer", "Transvibrational", 2.0 - ) - ) - entropy_manager._process_conformational_residue_level = MagicMock( - side_effect=lambda *args, **kwargs: data_logger.add_residue_data( - 0, "A", "residue", "Conformational", 3.0 - ) - ) - - entropy_manager.execute() + # Execute + entropy_manager.execute() # Assertions - entropy_manager._process_united_atom_level.assert_called_once() - self.assertEqual(entropy_manager._process_vibrational_only_levels.call_count, 2) - entropy_manager._process_conformational_residue_level.assert_called_once() + entropy_manager._get_trajectory_bounds.assert_called_once() + entropy_manager._get_number_frames.assert_called_once_with(0, 10, 1) + entropy_manager._handle_water_entropy.assert_called_once_with(0, 10, 1) + entropy_manager._initialize_molecules.assert_called_once() + entropy_manager._level_manager.build_covariance_matrices.assert_called_once_with + ( + entropy_manager, + "reduced_atom", + 3, + [["united_atom", "polymer", "residue"]], + 0, + 10, + 1, + 11, + ) + entropy_manager._compute_entropies.assert_called_once_with( + "reduced_atom", + 3, + [["united_atom", "polymer", "residue"]], + "force_matrices", + "torque_matrices", + "states_ua", + "states_res", + 11, + 0, + 10, + 1, + ) entropy_manager._finalize_molecule_results.assert_called_once() entropy_manager._data_logger.log_tables.assert_called_once() - # Check molecule-level entropy types - molecule_types = set(entry[2] for entry in data_logger.molecule_data) - self.assertIn("Conformational", molecule_types) - self.assertIn("Transvibrational", molecule_types) - - # Check residue-level entropy types - residue_types = set(entry[3] for entry in data_logger.residue_data) - self.assertIn("Conformational", residue_types) - def test_water_entropy_sets_selection_string_when_all(self): """ Tests that when `selection_string` is initially 'all' and water entropy is @@ -131,7 +128,9 @@ def test_water_entropy_sets_selection_string_when_all(self): calculating water entropy. """ mock_universe = MagicMock() - mock_universe.select_atoms.return_value.n_atoms = 5 + mock_universe.select_atoms.return_value.n_atoms = ( + 5 # Simulate water atoms present + ) args = MagicMock(water_entropy=True, selection_string="all") run_manager = MagicMock() @@ -141,18 +140,23 @@ def test_water_entropy_sets_selection_string_when_all(self): manager = EntropyManager( run_manager, args, mock_universe, data_logger, level_manager ) + + # Mock methods used in execute manager._get_trajectory_bounds = MagicMock(return_value=(0, 10, 1)) manager._get_number_frames = MagicMock(return_value=11) manager._calculate_water_entropy = MagicMock() - manager._get_reduced_universe = MagicMock(return_value="reduced") - manager._level_manager.select_levels = MagicMock(return_value=(0, [])) + manager._initialize_molecules = MagicMock(return_value=("reduced", 1, [])) + manager._level_manager.build_covariance_matrices = MagicMock( + return_value=(None, None, None, None) + ) + manager._compute_entropies = MagicMock() manager._finalize_molecule_results = MagicMock() manager._data_logger.log_tables = MagicMock() manager.execute() manager._calculate_water_entropy.assert_called_once() - assert args.selection_string == "not water" + self.assertEqual(args.selection_string, "not water") def test_water_entropy_appends_to_custom_selection_string(self): """ @@ -161,7 +165,9 @@ def test_water_entropy_appends_to_custom_selection_string(self): to the existing selection string after calculating water entropy. """ mock_universe = MagicMock() - mock_universe.select_atoms.return_value.n_atoms = 5 + mock_universe.select_atoms.return_value.n_atoms = ( + 5 # Simulate water atoms present + ) args = MagicMock(water_entropy=True, selection_string="protein") run_manager = MagicMock() @@ -171,18 +177,23 @@ def test_water_entropy_appends_to_custom_selection_string(self): manager = EntropyManager( run_manager, args, mock_universe, data_logger, level_manager ) + + # Mock methods used in execute manager._get_trajectory_bounds = MagicMock(return_value=(0, 10, 1)) manager._get_number_frames = MagicMock(return_value=11) manager._calculate_water_entropy = MagicMock() - manager._get_reduced_universe = MagicMock(return_value="reduced") - manager._level_manager.select_levels = MagicMock(return_value=(0, [])) + manager._initialize_molecules = MagicMock(return_value=("reduced", 1, [])) + manager._level_manager.build_covariance_matrices = MagicMock( + return_value=(None, None, None, None) + ) + manager._compute_entropies = MagicMock() manager._finalize_molecule_results = MagicMock() manager._data_logger.log_tables = MagicMock() manager.execute() manager._calculate_water_entropy.assert_called_once() - assert args.selection_string == "protein and not water" + self.assertEqual(args.selection_string, "protein and not water") def test_get_trajectory_bounds(self): """ @@ -409,8 +420,8 @@ def test_get_molecule_container(self, mock_args): def test_process_united_atom_level(self): """ - Tests that `_process_united_atom_level` correctly logs global and residue-level - entropy results for a known molecular system using MDAnalysis. + Tests that `_process_united_atom_entropy` correctly logs global and + residue-level entropy results for a known molecular system using MDAnalysis. """ # Load a known test universe @@ -425,41 +436,48 @@ def test_process_united_atom_level(self): data_logger = DataLogger() manager = EntropyManager(run_manager, args, u, data_logger, level_manager) + # Prepare mock molecule container reduced_atom = manager._get_reduced_universe() mol_container = manager._get_molecule_container(reduced_atom, 0) n_residues = len(mol_container.residues) - ve = VibrationalEntropy(run_manager, args, u, data_logger, level_manager) - ce = ConformationalEntropy(run_manager, args, u, data_logger, level_manager) + # Create dummy matrices and states + force_matrix = {(0, i): np.eye(3) for i in range(n_residues)} + torque_matrix = {(0, i): np.eye(3) * 2 for i in range(n_residues)} + states = {(0, i): np.ones((10, 3)) for i in range(n_residues)} + + # Mock entropy calculators + ve = MagicMock() + ce = MagicMock() + ve.vibrational_entropy_calculation.side_effect = lambda m, t, temp, high: ( + 1.0 if t == "force" else 2.0 + ) + ce.conformational_entropy_calculation.return_value = 3.0 - # Run the function - manager._process_united_atom_level( + # Run the method + manager._process_united_atom_entropy( mol_id=0, mol_container=mol_container, ve=ve, ce=ce, level="united_atom", - start=1, - end=1, - step=1, - n_frames=1, + force_matrix=force_matrix, + torque_matrix=torque_matrix, + states=states, highest=True, + number_frames=10, ) - # Check that results were logged for each entropy type + # Check molecule-level results df = data_logger.molecule_data self.assertEqual(len(df), 3) # Trans, Rot, Conf - # Check that residue-level results were logged + # Check residue-level results residue_df = data_logger.residue_data self.assertEqual(len(residue_df), 3 * n_residues) # 3 types per residue # Check that all expected types are present - expected_types = { - "Transvibrational", - "Rovibrational", - "Conformational", - } + expected_types = {"Transvibrational", "Rovibrational", "Conformational"} actual_types = set(entry[2] for entry in df) self.assertSetEqual(actual_types, expected_types) @@ -469,10 +487,9 @@ def test_process_united_atom_level(self): def test_process_vibrational_only_levels(self): """ - Tests that `_process_vibrational_only_levels` correctly logs vibrational + Tests that `_process_vibrational_entropy` correctly logs vibrational entropy results for a known molecular system using MDAnalysis. """ - # Load a known test universe tprfile = os.path.join(self.test_data_dir, "md_A4_dna.tpr") trrfile = os.path.join(self.test_data_dir, "md_A4_dna_xf.trr") @@ -485,27 +502,29 @@ def test_process_vibrational_only_levels(self): data_logger = DataLogger() manager = EntropyManager(run_manager, args, u, data_logger, level_manager) + # Prepare mock molecule container reduced_atom = manager._get_reduced_universe() mol_container = manager._get_molecule_container(reduced_atom, 0) - # Patch methods to isolate the test - manager._level_manager.get_matrices = MagicMock( - return_value=("mock_force", "mock_torque") - ) + # Simulate trajectory length + mol_container.trajectory = [None] * 10 # 10 frames - ve = VibrationalEntropy(run_manager, args, u, data_logger, level_manager) - ve.vibrational_entropy_calculation = MagicMock(side_effect=[1.11, 2.22]) + # Create dummy matrices + force_matrix = np.eye(3) + torque_matrix = np.eye(3) * 2 - # Run the function - manager._process_vibrational_only_levels( + # Mock entropy calculator + ve = MagicMock() + ve.vibrational_entropy_calculation.side_effect = [1.11, 2.22] + + # Run the method + manager._process_vibrational_entropy( mol_id=0, mol_container=mol_container, ve=ve, level="Vibrational", - start=1, - end=1, - step=1, - n_frames=1, + force_matrix=force_matrix, + torque_matrix=torque_matrix, highest=True, ) @@ -513,25 +532,93 @@ def test_process_vibrational_only_levels(self): df = data_logger.molecule_data self.assertEqual(len(df), 2) # Transvibrational and Rovibrational - expected_types = { - "Transvibrational", - "Rovibrational", - } + expected_types = {"Transvibrational", "Rovibrational"} actual_types = set(entry[2] for entry in df) self.assertSetEqual(actual_types, expected_types) - # Check entropy values results = [entry[3] for entry in df] self.assertIn(1.11, results) self.assertIn(2.22, results) + def test_compute_entropies_polymer_branch(self): + """ + Test _compute_entropies triggers _process_vibrational_entropy for 'polymer' + level. + """ + # Setup the manager + args = MagicMock(bin_width=0.1) + run_manager = MagicMock() + level_manager = MagicMock() + data_logger = DataLogger() + manager = EntropyManager( + run_manager, args, MagicMock(), data_logger, level_manager + ) + + # Setup dummy universe and reduced atom + reduced_atom = MagicMock() + + # Number of molecules = 1 for simplicity + number_molecules = 1 + + # Levels includes 'polymer' + levels = [["polymer"]] + + # Provide dummy force and torque matrices with the expected structure + force_matrices = {"poly": {0: np.eye(3)}} + torque_matrices = {"poly": {0: np.eye(3) * 2}} + + # States dictionaries (could be empty) + states_ua = {} + states_res = [] + + # Setup frames and slicing params + number_frames = 5 + start = 0 + end = 5 + step = 1 + + # Mock the molecule container and its residues + mol_mock = MagicMock() + mol_mock.residues = [] # No residues needed for polymer level + + # Patch _get_molecule_container to return our mock molecule + manager._get_molecule_container = MagicMock(return_value=mol_mock) + + # Patch _process_vibrational_entropy to monitor calls + manager._process_vibrational_entropy = MagicMock() + + # Call the method + manager._compute_entropies( + reduced_atom, + number_molecules, + levels, + force_matrices, + torque_matrices, + states_ua, + states_res, + number_frames, + start, + end, + step, + ) + + # Assert _process_vibrational_entropy called with expected args + manager._process_vibrational_entropy.assert_called_once_with( + 0, # mol_id + mol_mock, + unittest.mock.ANY, # ve instance created internally + "polymer", + force_matrices["poly"][0], + torque_matrices["poly"][0], + True, # highest level (since only one level) + ) + def test_process_conformational_residue_level(self): """ - Tests that `_process_conformational_residue_level` correctly logs conformational + Tests that `_process_conformational_entropy` correctly logs conformational entropy results at the residue level for a known molecular system using MDAnalysis. """ - # Load a known test universe tprfile = os.path.join(self.test_data_dir, "md_A4_dna.tpr") trrfile = os.path.join(self.test_data_dir, "md_A4_dna_xf.trr") @@ -544,26 +631,25 @@ def test_process_conformational_residue_level(self): data_logger = DataLogger() manager = EntropyManager(run_manager, args, u, data_logger, level_manager) + # Prepare mock molecule container reduced_atom = manager._get_reduced_universe() mol_container = manager._get_molecule_container(reduced_atom, 0) - # Patch methods to isolate the test - mock_dihedrals = ["phi", "psi", "chi1"] - manager._level_manager.get_dihedrals = MagicMock(return_value=mock_dihedrals) + # Create dummy states + states = {0: np.ones((10, 3))} - ce = ConformationalEntropy(run_manager, args, u, data_logger, level_manager) - ce.conformational_entropy_calculation = MagicMock(return_value=3.33) + # Mock entropy calculator + ce = MagicMock() + ce.conformational_entropy_calculation.return_value = 3.33 - # Run the function - manager._process_conformational_residue_level( + # Run the method + manager._process_conformational_entropy( mol_id=0, mol_container=mol_container, ce=ce, level="residue", - start=1, - end=1, - step=1, - n_frames=1, + states=states, + number_frames=10, ) # Check that results were logged @@ -574,7 +660,6 @@ def test_process_conformational_residue_level(self): actual_types = set(entry[2] for entry in df) self.assertSetEqual(actual_types, expected_types) - # Check entropy values results = [entry[3] for entry in df] self.assertIn(3.33, results) diff --git a/tests/test_CodeEntropy/test_levels.py b/tests/test_CodeEntropy/test_levels.py index 81166bd..865687e 100644 --- a/tests/test_CodeEntropy/test_levels.py +++ b/tests/test_CodeEntropy/test_levels.py @@ -108,11 +108,10 @@ def test_get_matrices(self): force_matrix, torque_matrix = level_manager.get_matrices( data_container=data_container, level="residue", - start=0, - end=2, - step=1, number_frames=2, - highest_level="polymer", + highest_level=True, + force_matrix=None, + torque_matrix=None, ) # Assertions @@ -123,11 +122,82 @@ def test_get_matrices(self): # Check that internal methods were called self.assertEqual(level_manager.get_beads.call_count, 1) - self.assertEqual(level_manager.get_axes.call_count, 4) # 2 beads × 2 frames + self.assertEqual(level_manager.get_axes.call_count, 2) # 2 beads self.assertEqual( level_manager.create_submatrix.call_count, 6 ) # 3 force + 3 torque + def test_get_matrices_force_shape_mismatch(self): + """ + Test that get_matrices raises a ValueError when the provided force_matrix + has a shape mismatch with the computed force block matrix. + """ + level_manager = LevelManager() + + # Mock internal methods + level_manager.get_beads = MagicMock(return_value=["bead1", "bead2"]) + level_manager.get_axes = MagicMock(return_value=("trans_axes", "rot_axes")) + level_manager.get_weighted_forces = MagicMock( + return_value=np.array([1.0, 2.0, 3.0]) + ) + level_manager.get_weighted_torques = MagicMock( + return_value=np.array([0.5, 1.5, 2.5]) + ) + level_manager.create_submatrix = MagicMock(return_value=np.identity(3)) + + data_container = MagicMock() + + # Incorrect shape for force matrix (should be 6x6 for 2 beads) + bad_force_matrix = np.zeros((3, 3)) + correct_torque_matrix = np.zeros((6, 6)) + + with self.assertRaises(ValueError) as context: + level_manager.get_matrices( + data_container=data_container, + level="residue", + number_frames=2, + highest_level=True, + force_matrix=bad_force_matrix, + torque_matrix=correct_torque_matrix, + ) + + self.assertIn("Inconsistent force matrix shape", str(context.exception)) + + def test_get_matrices_torque_shape_mismatch(self): + """ + Test that get_matrices raises a ValueError when the provided torque_matrix + has a shape mismatch with the computed torque block matrix. + """ + level_manager = LevelManager() + + # Mock internal methods + level_manager.get_beads = MagicMock(return_value=["bead1", "bead2"]) + level_manager.get_axes = MagicMock(return_value=("trans_axes", "rot_axes")) + level_manager.get_weighted_forces = MagicMock( + return_value=np.array([1.0, 2.0, 3.0]) + ) + level_manager.get_weighted_torques = MagicMock( + return_value=np.array([0.5, 1.5, 2.5]) + ) + level_manager.create_submatrix = MagicMock(return_value=np.identity(3)) + + data_container = MagicMock() + + correct_force_matrix = np.zeros((6, 6)) + bad_torque_matrix = np.zeros((3, 3)) # Incorrect shape + + with self.assertRaises(ValueError) as context: + level_manager.get_matrices( + data_container=data_container, + level="residue", + number_frames=2, + highest_level=True, + force_matrix=correct_force_matrix, + torque_matrix=bad_torque_matrix, + ) + + self.assertIn("Inconsistent torque matrix shape", str(context.exception)) + def test_get_dihedrals_united_atom(self): """ Test `get_dihedrals` for 'united_atom' level. @@ -247,6 +317,38 @@ def test_get_beads_united_atom_level(self): data_container.select_atoms.call_count, 4 ) # 1 for heavy_atoms + 3 beads + def test_get_axes_united_atom_no_bonds(self): + """ + Test `get_axes` for 'united_atom' level when no bonded atoms are found. + Ensures that rotational axes fall back to residues' principal axes. + """ + level_manager = LevelManager() + + data_container = MagicMock() + + # Mock principal axes for translation and rotation + mock_rot_axes = MagicMock(name="rot_axes") + + data_container.residues.principal_axes.return_value = mock_rot_axes + data_container.residues.principal_axes.return_value = mock_rot_axes + data_container.residues.principal_axes.return_value = mock_rot_axes # fallback + + # First select_atoms returns empty bonded atom set + atom_set = MagicMock() + atom_set.__len__.return_value = 0 # triggers fallback + + data_container.select_atoms.side_effect = [atom_set] + + trans_axes, rot_axes = level_manager.get_axes( + data_container=data_container, level="united_atom", index=5 + ) + + # Assertions + self.assertEqual(trans_axes, mock_rot_axes) + self.assertEqual(rot_axes, mock_rot_axes) + data_container.residues.principal_axes.assert_called() + self.assertEqual(data_container.select_atoms.call_count, 1) + def test_get_axes_polymer_level(self): """ Test `get_axes` for 'polymer' level. @@ -328,6 +430,8 @@ def test_get_axes_united_atom_level(self): data_container.residues.principal_axes.return_value = "trans_axes" atom_set = MagicMock() + atom_set.__len__.return_value = 1 + atom_group = MagicMock() atom_group.positions = [[1.0, 2.0, 3.0]] @@ -660,30 +764,31 @@ def test_get_weighted_torques_negative_moi_raises(self): "Negative value encountered for moment of inertia", str(context.exception) ) - def test_create_submatrix_basic_outer_product_average(self): + def test_create_submatrix_basic_outer_product(self): """ - Test with known vectors to verify correct average outer product. + Test with known vectors to verify correct outer product. """ level_manager = LevelManager() - data_i = [np.array([1, 0, 0]), np.array([0, 1, 0])] - data_j = [np.array([0, 1, 0]), np.array([1, 0, 0])] - number_frames = 2 - - expected = (np.outer(data_i[0], data_j[0]) + np.outer(data_i[1], data_j[1])) / 2 + data_i = np.array([1, 0, 0]) + data_j = np.array([0, 1, 0]) + number_frames = 1 # Not used in current implementation + expected = np.outer(data_i, data_j) result = level_manager.create_submatrix(data_i, data_j, number_frames) - np.testing.assert_array_almost_equal(result, expected) + + np.testing.assert_array_equal(result, expected) def test_create_submatrix_zero_vectors_returns_zero_matrix(self): """ - Test that all-zero input vectors should return a zero matrix. + Test that all-zero input vectors return a zero matrix. """ level_manager = LevelManager() - data_i = [np.zeros(3) for _ in range(3)] - data_j = [np.zeros(3) for _ in range(3)] - result = level_manager.create_submatrix(data_i, data_j, 3) + data_i = np.zeros(3) + data_j = np.zeros(3) + result = level_manager.create_submatrix(data_i, data_j, 1) + np.testing.assert_array_equal(result, np.zeros((3, 3))) def test_create_submatrix_single_frame(self): @@ -702,12 +807,13 @@ def test_create_submatrix_single_frame(self): def test_create_submatrix_symmetric_result_when_data_equal(self): """ - Test that if data_i == data_j, the result should be symmetric. + Test that if data_i == data_j, the result is symmetric. """ level_manager = LevelManager() - data = [np.array([1, 2, 3]), np.array([4, 5, 6])] - result = level_manager.create_submatrix(data, data, 2) + data = np.array([1, 2, 3]) + result = level_manager.create_submatrix(data, data, 1) + self.assertTrue(np.allclose(result, result.T)) # Check symmetry def test_filter_zero_rows_columns_no_zeros(self): From 218616ce748f915bdb9b2b9d365afc0112e9f8fc Mon Sep 17 00:00:00 2001 From: skfegan Date: Thu, 31 Jul 2025 10:35:32 +0100 Subject: [PATCH 17/23] separating the building of conformational states from the covariance matrices and entropy calculations. --- CodeEntropy/entropy.py | 77 +++++++++++++++++-------------------- CodeEntropy/levels.py | 87 ++++++++++++++++++++++++++++++++++++++---- 2 files changed, 113 insertions(+), 51 deletions(-) diff --git a/CodeEntropy/entropy.py b/CodeEntropy/entropy.py index 9177eca..fd44297 100644 --- a/CodeEntropy/entropy.py +++ b/CodeEntropy/entropy.py @@ -47,11 +47,25 @@ def execute(self): """ start, end, step = self._get_trajectory_bounds() number_frames = self._get_number_frames(start, end, step) + ve = VibrationalEntropy( + self._run_manager, + self._args, + self._universe, + self._data_logger, + self._level_manager, + ) + ce = ConformationalEntropy( + self._run_manager, + self._args, + self._universe, + self._data_logger, + self._level_manager, + ) self._handle_water_entropy(start, end, step) reduced_atom, number_molecules, levels = self._initialize_molecules() - force_matrices, torque_matrices, states_ua, states_res = ( + force_matrices, torque_matrices = ( self._level_manager.build_covariance_matrices( self, reduced_atom, @@ -64,6 +78,21 @@ def execute(self): ) ) + states_ua, states_res = ( + self._level_manager.build_conformational_states( + self, + reduced_atom, + number_molecules, + levels, + start, + end, + step, + number_frames, + self._args.bin_width, + ce, + ) + ) + self._compute_entropies( reduced_atom, number_molecules, @@ -76,6 +105,8 @@ def execute(self): start, end, step, + ve, + ce, ) self._finalize_molecule_results() @@ -136,6 +167,8 @@ def _compute_entropies( start, end, step, + ve, + ce, ): """ Compute vibrational and conformational entropies for all molecules and levels. @@ -163,20 +196,6 @@ def _compute_entropies( step (int): Step size for frame iteration. """ bin_width = self._args.bin_width - ve = VibrationalEntropy( - self._run_manager, - self._args, - self._universe, - self._data_logger, - self._level_manager, - ) - ce = ConformationalEntropy( - self._run_manager, - self._args, - self._universe, - self._data_logger, - self._level_manager, - ) for mol_id in range(number_molecules): mol = self._get_molecule_container(reduced_atom, mol_id) @@ -184,30 +203,6 @@ def _compute_entropies( highest = level == levels[mol_id][-1] if level == "united_atom": - for res_id, residue in enumerate(mol.residues): - key = (mol_id, res_id) - res_container = self._run_manager.new_U_select_atom( - mol, - f"index {residue.atoms.indices[0]}:" - f"{residue.atoms.indices[-1]}", - ) - heavy_res = self._run_manager.new_U_select_atom( - res_container, "not name H*" - ) - - states_ua[key] = ( - self._level_manager.compute_dihedral_conformations( - heavy_res, - level, - ce, - number_frames, - bin_width, - start, - end, - step, - ) - ) - self._process_united_atom_entropy( mol_id, mol, @@ -222,10 +217,6 @@ def _compute_entropies( ) elif level == "residue": - states_res = self._level_manager.compute_dihedral_conformations( - mol, level, ce, number_frames, bin_width, start, end, step - ) - self._process_vibrational_entropy( mol_id, mol, diff --git a/CodeEntropy/levels.py b/CodeEntropy/levels.py index c79c68f..582edfa 100644 --- a/CodeEntropy/levels.py +++ b/CodeEntropy/levels.py @@ -46,7 +46,7 @@ def select_levels(self, data_container): # fragments is MDAnalysis terminology for what chemists would call molecules number_molecules = len(data_container.atoms.fragments) - logger.debug("The number of molecules is {}.".format(number_molecules)) + logger.debug(f"The number of molecules is {number_molecules}.") fragments = data_container.atoms.fragments levels = [[] for _ in range(number_molecules)] @@ -265,12 +265,12 @@ def compute_dihedral_conformations( self, selector, level, - ce, number_frames, bin_width, start, end, step, + ce, ): """ Compute dihedral conformations for a given selector and entropy level. @@ -278,7 +278,6 @@ def compute_dihedral_conformations( Parameters: selector (AtomGroup): Atom selection to compute dihedrals for. level (str): Entropy level ("united_atom" or "residue"). - ce (ConformationalEntropy): Conformational entropy calculator. number_frames (int): Number of frames to process. bin_width (float): Bin width for dihedral angle discretization. start (int): Start frame index. @@ -750,8 +749,6 @@ def build_covariance_matrices( tuple: A tuple containing: - force_matrices (dict): Force covariance matrices by level. - torque_matrices (dict): Torque covariance matrices by level. - - states_ua (dict): Conformational states at the united-atom level. - - states_res (list): Conformational states at the residue level. """ force_matrices = { "ua": {}, @@ -763,8 +760,6 @@ def build_covariance_matrices( "res": [None] * number_molecules, "poly": [None] * number_molecules, } - states_ua = {} - states_res = [None] * number_molecules for timestep in reduced_atom.trajectory[start:end:step]: time_index = timestep.frame - start @@ -784,7 +779,7 @@ def build_covariance_matrices( torque_matrices, ) - return force_matrices, torque_matrices, states_ua, states_res + return force_matrices, torque_matrices def update_force_torque_matrices( self, @@ -896,3 +891,79 @@ def filter_zero_rows_columns(self, arg_matrix): logger.debug(f"arg_matrix: {arg_matrix}") return arg_matrix + + def build_conformational_states( + self, + entropy_manager, + reduced_atom, + number_molecules, + levels, + start, + end, + step, + number_frames, + bin_width, + ce, + ): + """ + Construct the conformational states for each molecule at + relevant levels. + + Parameters: + entropy_manager (EntropyManager): Instance of the EntropyManager + reduced_atom (Universe): The reduced atom selection. + number_molecules (int): Number of molecules in the system. + levels (list): List of entropy levels per molecule. + start (int): Start frame index. + end (int): End frame index. + step (int): Step size for frame iteration. + number_frames (int): Total number of frames to process. + + Returns: + tuple: A tuple containing: + - states_ua (dict): Conformational states at the united-atom level. + - states_res (list): Conformational states at the residue level. + """ + states_ua = {} + states_res = [None] * number_molecules + + for mol_id in range(number_molecules): + mol = entropy_manager._get_molecule_container(reduced_atom, mol_id) + for level in levels[mol_id]: + if level == "united_atom": + for res_id, residue in enumerate(mol.residues): + key = (mol_id, res_id) + + res_container = entropy_manager._run_manager.new_U_select_atom( + mol, + f"index {residue.atoms.indices[0]}:" + f"{residue.atoms.indices[-1]}", + ) + heavy_res = entropy_manager._run_manager.new_U_select_atom( + res_container, "not name H*" + ) + + states_ua[key] = self.compute_dihedral_conformations( + heavy_res, + level, + number_frames, + bin_width, + start, + end, + step, + ce, + ) + + if level == "res": + states_res = self.compute_dihedral_conformations( + mol, + level, + number_frames, + bin_width, + start, + end, + step, + ce, + ) + + return states_ua, states_res From fa656696eb78655ec1f7fdc02439defffd83f679 Mon Sep 17 00:00:00 2001 From: skfegan Date: Thu, 31 Jul 2025 17:16:46 +0100 Subject: [PATCH 18/23] adding averaging --- CodeEntropy/config/arg_config_manager.py | 5 + CodeEntropy/entropy.py | 105 +++++++++---------- CodeEntropy/group_molecules.py | 91 ++++++++++++++++ CodeEntropy/levels.py | 128 +++++++++++++---------- CodeEntropy/run.py | 6 ++ 5 files changed, 225 insertions(+), 110 deletions(-) create mode 100644 CodeEntropy/group_molecules.py diff --git a/CodeEntropy/config/arg_config_manager.py b/CodeEntropy/config/arg_config_manager.py index 373dd9d..0d809ed 100644 --- a/CodeEntropy/config/arg_config_manager.py +++ b/CodeEntropy/config/arg_config_manager.py @@ -64,6 +64,11 @@ "help": "If set to False, disables the calculation of water entropy", "default": True, }, + "grouping": { + "type": str, + "help": "How to group molecules for averaging", + "default": "each", + }, } diff --git a/CodeEntropy/entropy.py b/CodeEntropy/entropy.py index fd44297..20d9806 100644 --- a/CodeEntropy/entropy.py +++ b/CodeEntropy/entropy.py @@ -16,7 +16,7 @@ class EntropyManager: molecular dynamics trajectory. """ - def __init__(self, run_manager, args, universe, data_logger, level_manager): + def __init__(self, run_manager, args, universe, data_logger, level_manager, group_molecules): """ Initializes the EntropyManager with required components. @@ -32,6 +32,7 @@ def __init__(self, run_manager, args, universe, data_logger, level_manager): self._universe = universe self._data_logger = data_logger self._level_manager = level_manager + self._group_molecules = group_molecules self._GAS_CONST = 8.3144598484848 def execute(self): @@ -53,6 +54,7 @@ def execute(self): self._universe, self._data_logger, self._level_manager, + self._group_molecules, ) ce = ConformationalEntropy( self._run_manager, @@ -60,17 +62,20 @@ def execute(self): self._universe, self._data_logger, self._level_manager, + self._group_molecules, ) self._handle_water_entropy(start, end, step) - reduced_atom, number_molecules, levels = self._initialize_molecules() + reduced_atom, number_molecules, levels, groups = self._initialize_molecules() + number_groups = len(groups) + number_frames = len(reduced_atom.trajectory) force_matrices, torque_matrices = ( self._level_manager.build_covariance_matrices( self, reduced_atom, - number_molecules, levels, + groups, start, end, step, @@ -84,6 +89,7 @@ def execute(self): reduced_atom, number_molecules, levels, + groups, start, end, step, @@ -95,16 +101,13 @@ def execute(self): self._compute_entropies( reduced_atom, - number_molecules, levels, + groups, force_matrices, torque_matrices, states_ua, states_res, number_frames, - start, - end, - step, ve, ce, ) @@ -152,21 +155,21 @@ def _initialize_molecules(self): """ reduced_atom = self._get_reduced_universe() number_molecules, levels = self._level_manager.select_levels(reduced_atom) - return reduced_atom, number_molecules, levels + grouping = self._args.grouping + groups = self._group_molecules.grouping_molecules(reduced_atom, grouping) + + return reduced_atom, number_molecules, levels, groups def _compute_entropies( self, reduced_atom, - number_molecules, levels, + groups, force_matrices, torque_matrices, states_ua, states_res, number_frames, - start, - end, - step, ve, ce, ): @@ -191,20 +194,15 @@ def _compute_entropies( states_ua (dict): Dictionary to store united-atom conformational states. states_res (list): List to store residue-level conformational states. number_frames (int): Total number of trajectory frames to process. - start (int): Start frame index. - end (int): End frame index. - step (int): Step size for frame iteration. """ - bin_width = self._args.bin_width - - for mol_id in range(number_molecules): - mol = self._get_molecule_container(reduced_atom, mol_id) - for level in levels[mol_id]: - highest = level == levels[mol_id][-1] + for group_id in groups.keys(): + mol = self._get_molecule_container(reduced_atom, groups[group_id][0]) + for level in levels[groups[group_id][0]]: + highest = level == levels[groups[group_id][0]][-1] if level == "united_atom": self._process_united_atom_entropy( - mol_id, + group_id, mol, ve, ce, @@ -218,18 +216,17 @@ def _compute_entropies( elif level == "residue": self._process_vibrational_entropy( - mol_id, - mol, + group_id, + number_frames, ve, level, - force_matrices["res"][mol_id], - torque_matrices["res"][mol_id], + force_matrices["res"][group_id], + torque_matrices["res"][group_id], highest, ) self._process_conformational_entropy( - mol_id, - mol, + group_id, ce, level, states_res, @@ -238,12 +235,12 @@ def _compute_entropies( elif level == "polymer": self._process_vibrational_entropy( - mol_id, - mol, + group_id, + number_frames, ve, level, - force_matrices["poly"][mol_id], - torque_matrices["poly"][mol_id], + force_matrices["poly"][group_id], + torque_matrices["poly"][group_id], highest, ) @@ -317,7 +314,7 @@ def _get_molecule_container(self, universe, molecule_id): def _process_united_atom_entropy( self, - mol_id, + group_id, mol_container, ve, ce, @@ -347,7 +344,7 @@ def _process_united_atom_entropy( for residue_id, residue in enumerate(mol_container.residues): - key = (mol_id, residue_id) + key = (group_id, residue_id) f_matrix = force_matrix[key] f_matrix = self._level_manager.filter_zero_rows_columns(f_matrix) @@ -373,27 +370,27 @@ def _process_united_atom_entropy( S_conf += S_conf_res self._data_logger.add_residue_data( - mol_id, residue.resname, level, "Transvibrational", S_trans_res + residue_id, residue.resname, level, "Transvibrational", S_trans_res ) self._data_logger.add_residue_data( - mol_id, residue.resname, level, "Rovibrational", S_rot_res + residue_id, residue.resname, level, "Rovibrational", S_rot_res ) self._data_logger.add_residue_data( - mol_id, residue.resname, level, "Conformational", S_conf_res + residue_id, residue.resname, level, "Conformational", S_conf_res ) self._data_logger.add_results_data( - residue.resname, level, "Transvibrational", S_trans + group_id, level, "Transvibrational", S_trans ) self._data_logger.add_results_data( - residue.resname, level, "Rovibrational", S_rot + group_id, level, "Rovibrational", S_rot ) self._data_logger.add_results_data( - residue.resname, level, "Conformational", S_conf + group_id, level, "Conformational", S_conf ) def _process_vibrational_entropy( - self, mol_id, mol_container, ve, level, force_matrix, torque_matrix, highest + self, group_id, number_frames, ve, level, force_matrix, torque_matrix, highest ): """ Calculates vibrational entropy. @@ -408,8 +405,6 @@ def _process_vibrational_entropy( highest (bool): Flag indicating if this is the highest granularity level. """ - number_frames = len(mol_container.trajectory) - force_matrix = self._level_manager.filter_zero_rows_columns(force_matrix) force_matrix = force_matrix / number_frames @@ -422,16 +417,16 @@ def _process_vibrational_entropy( S_rot = ve.vibrational_entropy_calculation( torque_matrix, "torque", self._args.temperature, highest ) - residue = mol_container.residues[mol_id] + self._data_logger.add_results_data( - residue.resname, level, "Transvibrational", S_trans + group_id, level, "Transvibrational", S_trans ) self._data_logger.add_results_data( - residue.resname, level, "Rovibrational", S_rot + group_id, level, "Rovibrational", S_rot ) def _process_conformational_entropy( - self, mol_id, mol_container, ce, level, states, number_frames + self, group_id, ce, level, states, number_frames ): """ Computes conformational entropy at the residue level (whole-molecule dihedral @@ -445,10 +440,10 @@ def _process_conformational_entropy( start, end, step (int): Frame bounds. n_frames (int): Number of frames used. """ - S_conf = ce.conformational_entropy_calculation(states[mol_id], number_frames) - residue = mol_container.residues[mol_id] + S_conf = ce.conformational_entropy_calculation(states[group_id], number_frames) + self._data_logger.add_results_data( - residue.resname, level, "Conformational", S_conf + group_id, level, "Conformational", S_conf ) def _finalize_molecule_results(self): @@ -599,12 +594,12 @@ class VibrationalEntropy(EntropyManager): vibrational modes and thermodynamic properties. """ - def __init__(self, run_manager, args, universe, data_logger, level_manager): + def __init__(self, run_manager, args, universe, data_logger, level_manager, group_molecules): """ Initializes the VibrationalEntropy manager with all required components and defines physical constants used in vibrational entropy calculations. """ - super().__init__(run_manager, args, universe, data_logger, level_manager) + super().__init__(run_manager, args, universe, data_logger, level_manager, group_molecules) self._PLANCK_CONST = 6.62607004081818e-34 def frequency_calculation(self, lambdas, temp): @@ -723,12 +718,12 @@ class ConformationalEntropy(EntropyManager): analysis using statistical mechanics principles. """ - def __init__(self, run_manager, args, universe, data_logger, level_manager): + def __init__(self, run_manager, args, universe, data_logger, level_manager, group_molecules): """ Initializes the ConformationalEntropy manager with all required components and sets the gas constant used in conformational entropy calculations. """ - super().__init__(run_manager, args, universe, data_logger, level_manager) + super().__init__(run_manager, args, universe, data_logger, level_manager, group_molecules) def assign_conformation( self, data_container, dihedral, number_frames, bin_width, start, end, step @@ -760,7 +755,7 @@ def assign_conformation( # get the values of the angle for the dihedral # dihedral angle values have a range from -180 to 180 - for timestep in data_container.trajectory[start:end:step]: + for timestep in data_container.trajectory[start:end+1:step]: timestep_index = timestep.frame - start value = dihedral.value() # we want postive values in range 0 to 360 to make the peak assignment diff --git a/CodeEntropy/group_molecules.py b/CodeEntropy/group_molecules.py new file mode 100644 index 0000000..42474ae --- /dev/null +++ b/CodeEntropy/group_molecules.py @@ -0,0 +1,91 @@ +import logging +import MDAnalysis as mda + +logger = logging.getLogger(__name__) + +class GroupMolecules: + """ + Groups molecules for averaging. + """ + + def __init__(self): + """ + Initializes the class with relevant information. + + Args: + run_manager: Manager for universe and selection operations. + args: Argument namespace containing user parameters. + universe: MDAnalysis universe representing the simulation system. + data_logger: Logger for storing and exporting entropy data. + """ + self._molecule_groups = None + + def grouping_molecules(self, universe, grouping): + """ + Grouping molecules by desired level of detail. + """ + + molecule_groups = {} + + if grouping == "each": + molecule_groups = self._by_none(universe) + + if grouping == "molecules": + molecule_groups = self._by_molecules(universe) + + return molecule_groups + + def _by_none(self, universe): + """ + Don't group molecules. Every molecule is in its own group. + """ + + # fragments is MDAnalysis terminology for molecules + number_molecules = len(universe.atoms.fragments) + + molecule_groups = {} + + for molecule_i in range(number_molecules): + molecule_groups[molecule_i] = [molecule_i] + + number_groups = len(molecule_groups) + + logger.info(f"Number of molecule groups: {number_groups}") + logger.debug(f"Molecule groups are: {molecule_groups}") + + return molecule_groups + + def _by_molecules(self, universe): + """ + Group molecules by chemical type. + Based on number of atoms and atom names. + """ + + # fragments is MDAnalysis terminology for molecules + number_molecules = len(universe.atoms.fragments) + fragments = universe.atoms.fragments + + molecule_groups = {} + + for molecule_i in range(number_molecules): + names_i = fragments[molecule_i].names + number_atoms_i = len(names_i) + + for molecule_j in range(number_molecules): + names_j = fragments[molecule_j].names + number_atoms_j = len(names_j) + + if number_atoms_i == number_atoms_j and (names_i == names_j).all: + if molecule_j in molecule_groups.keys(): + molecule_groups[molecule_j].append(molecule_i) + else: + molecule_groups[molecule_j] = [] + molecule_groups[molecule_j].append(molecule_i) + break + + number_groups = len(molecule_groups) + + logger.info(f"Number of molecule groups: {number_groups}") + logger.debug(f"Molecule groups are: {molecule_groups}") + + return molecule_groups diff --git a/CodeEntropy/levels.py b/CodeEntropy/levels.py index 582edfa..ad597c9 100644 --- a/CodeEntropy/levels.py +++ b/CodeEntropy/levels.py @@ -725,8 +725,8 @@ def build_covariance_matrices( self, entropy_manager, reduced_atom, - number_molecules, levels, + groups, start, end, step, @@ -750,34 +750,37 @@ def build_covariance_matrices( - force_matrices (dict): Force covariance matrices by level. - torque_matrices (dict): Torque covariance matrices by level. """ + number_groups = len(groups) force_matrices = { "ua": {}, - "res": [None] * number_molecules, - "poly": [None] * number_molecules, + "res": [None] * number_groups, + "poly": [None] * number_groups, } torque_matrices = { "ua": {}, - "res": [None] * number_molecules, - "poly": [None] * number_molecules, + "res": [None] * number_groups, + "poly": [None] * number_groups, } for timestep in reduced_atom.trajectory[start:end:step]: time_index = timestep.frame - start - for mol_id in range(number_molecules): - mol = entropy_manager._get_molecule_container(reduced_atom, mol_id) - for level in levels[mol_id]: - self.update_force_torque_matrices( - entropy_manager, - mol, - mol_id, - level, - levels[mol_id], - time_index, - number_frames, - force_matrices, - torque_matrices, - ) + for group_id in groups.keys(): + molecules = groups[group_id] + for mol_id in molecules: + mol = entropy_manager._get_molecule_container(reduced_atom, mol_id) + for level in levels[mol_id]: + self.update_force_torque_matrices( + entropy_manager, + mol, + group_id, + level, + levels[mol_id], + time_index, + number_frames, + force_matrices, + torque_matrices, + ) return force_matrices, torque_matrices @@ -785,7 +788,7 @@ def update_force_torque_matrices( self, entropy_manager, mol, - mol_id, + group_id, level, level_list, time_index, @@ -799,7 +802,7 @@ def update_force_torque_matrices( Parameters: entropy_manager (EntropyManager): Instance of the EntropyManager mol (AtomGroup): The molecule to process. - mol_id (int): Index of the molecule. + group_id (int): Index of the group. level (str): Current entropy level ("united_atom", "residue", or "polymer"). level_list (list): List of levels for the molecule. time_index (int): Index of the current frame. @@ -811,7 +814,7 @@ def update_force_torque_matrices( if level == "united_atom": for res_id, residue in enumerate(mol.residues): - key = (mol_id, res_id) + key = (group_id, res_id) res = entropy_manager._run_manager.new_U_select_atom( mol, f"index {residue.atoms.indices[0]}:{residue.atoms.indices[-1]}" ) @@ -836,11 +839,11 @@ def update_force_torque_matrices( level, num_frames, highest, - force_matrices[key][mol_id], - torque_matrices[key][mol_id], + force_matrices[key][group_id], + torque_matrices[key][group_id], ) - force_matrices[key][mol_id] = f_mat - torque_matrices[key][mol_id] = t_mat + force_matrices[key][group_id] = f_mat + torque_matrices[key][group_id] = t_mat def filter_zero_rows_columns(self, arg_matrix): """ @@ -898,6 +901,7 @@ def build_conformational_states( reduced_atom, number_molecules, levels, + groups, start, end, step, @@ -924,27 +928,47 @@ def build_conformational_states( - states_ua (dict): Conformational states at the united-atom level. - states_res (list): Conformational states at the residue level. """ + number_groups = len(groups) states_ua = {} - states_res = [None] * number_molecules + states_res = [None] * number_groups - for mol_id in range(number_molecules): - mol = entropy_manager._get_molecule_container(reduced_atom, mol_id) - for level in levels[mol_id]: - if level == "united_atom": - for res_id, residue in enumerate(mol.residues): - key = (mol_id, res_id) - - res_container = entropy_manager._run_manager.new_U_select_atom( + for group_id in groups.keys(): + molecules = groups[group_id] + for mol_id in molecules: + mol = entropy_manager._get_molecule_container(reduced_atom, mol_id) + for level in levels[mol_id]: + if level == "united_atom": + for res_id, residue in enumerate(mol.residues): + key = (group_id, res_id) + + res_container = entropy_manager._run_manager.new_U_select_atom( + mol, + f"index {residue.atoms.indices[0]}:" + f"{residue.atoms.indices[-1]}", + ) + heavy_res = entropy_manager._run_manager.new_U_select_atom( + res_container, "not name H*" + ) + + states = self.compute_dihedral_conformations( + heavy_res, + level, + number_frames, + bin_width, + start, + end, + step, + ce, + ) + + if key in states_ua.keys(): + states_ua[key].append(states) + else: + states_ua[key] = states + + if level == "res": + states = self.compute_dihedral_conformations( mol, - f"index {residue.atoms.indices[0]}:" - f"{residue.atoms.indices[-1]}", - ) - heavy_res = entropy_manager._run_manager.new_U_select_atom( - res_container, "not name H*" - ) - - states_ua[key] = self.compute_dihedral_conformations( - heavy_res, level, number_frames, bin_width, @@ -954,16 +978,10 @@ def build_conformational_states( ce, ) - if level == "res": - states_res = self.compute_dihedral_conformations( - mol, - level, - number_frames, - bin_width, - start, - end, - step, - ce, - ) + states_res[group_id] += states + + + logger.debug(f"states_ua {states_ua}") + logger.debug(f"states_res {states_res}") return states_ua, states_res diff --git a/CodeEntropy/run.py b/CodeEntropy/run.py index e89d259..2c81fae 100644 --- a/CodeEntropy/run.py +++ b/CodeEntropy/run.py @@ -11,6 +11,7 @@ from CodeEntropy.config.logging_config import LoggingConfig from CodeEntropy.entropy import EntropyManager from CodeEntropy.levels import LevelManager +from CodeEntropy.group_molecules import GroupMolecules logger = logging.getLogger(__name__) @@ -140,6 +141,9 @@ def run_entropy_workflow(self): # Create LevelManager instance level_manager = LevelManager() + # Create GroupMolecules instance + group_molecules = GroupMolecules() + # Inject all dependencies into EntropyManager entropy_manager = EntropyManager( run_manager=self, @@ -147,6 +151,8 @@ def run_entropy_workflow(self): universe=u, data_logger=self._data_logger, level_manager=level_manager, + group_molecules=group_molecules, + ) entropy_manager.execute() From 3c38e72a9006ac6d79171ec0c41bd0b931566fab Mon Sep 17 00:00:00 2001 From: harryswift01 Date: Fri, 1 Aug 2025 08:37:26 +0100 Subject: [PATCH 19/23] applying pre-commit hooks and linting for code quality --- CodeEntropy/entropy.py | 93 ++++++++++++++++------------------ CodeEntropy/group_molecules.py | 4 +- CodeEntropy/levels.py | 11 ++-- CodeEntropy/run.py | 3 +- 4 files changed, 52 insertions(+), 59 deletions(-) diff --git a/CodeEntropy/entropy.py b/CodeEntropy/entropy.py index 20d9806..c6c1e26 100644 --- a/CodeEntropy/entropy.py +++ b/CodeEntropy/entropy.py @@ -16,7 +16,9 @@ class EntropyManager: molecular dynamics trajectory. """ - def __init__(self, run_manager, args, universe, data_logger, level_manager, group_molecules): + def __init__( + self, run_manager, args, universe, data_logger, level_manager, group_molecules + ): """ Initializes the EntropyManager with required components. @@ -67,36 +69,31 @@ def execute(self): self._handle_water_entropy(start, end, step) reduced_atom, number_molecules, levels, groups = self._initialize_molecules() - number_groups = len(groups) number_frames = len(reduced_atom.trajectory) - force_matrices, torque_matrices = ( - self._level_manager.build_covariance_matrices( - self, - reduced_atom, - levels, - groups, - start, - end, - step, - number_frames, - ) + force_matrices, torque_matrices = self._level_manager.build_covariance_matrices( + self, + reduced_atom, + levels, + groups, + start, + end, + step, + number_frames, ) - states_ua, states_res = ( - self._level_manager.build_conformational_states( - self, - reduced_atom, - number_molecules, - levels, - groups, - start, - end, - step, - number_frames, - self._args.bin_width, - ce, - ) + states_ua, states_res = self._level_manager.build_conformational_states( + self, + reduced_atom, + number_molecules, + levels, + groups, + start, + end, + step, + number_frames, + self._args.bin_width, + ce, ) self._compute_entropies( @@ -379,15 +376,9 @@ def _process_united_atom_entropy( residue_id, residue.resname, level, "Conformational", S_conf_res ) - self._data_logger.add_results_data( - group_id, level, "Transvibrational", S_trans - ) - self._data_logger.add_results_data( - group_id, level, "Rovibrational", S_rot - ) - self._data_logger.add_results_data( - group_id, level, "Conformational", S_conf - ) + self._data_logger.add_results_data(group_id, level, "Transvibrational", S_trans) + self._data_logger.add_results_data(group_id, level, "Rovibrational", S_rot) + self._data_logger.add_results_data(group_id, level, "Conformational", S_conf) def _process_vibrational_entropy( self, group_id, number_frames, ve, level, force_matrix, torque_matrix, highest @@ -418,12 +409,8 @@ def _process_vibrational_entropy( torque_matrix, "torque", self._args.temperature, highest ) - self._data_logger.add_results_data( - group_id, level, "Transvibrational", S_trans - ) - self._data_logger.add_results_data( - group_id, level, "Rovibrational", S_rot - ) + self._data_logger.add_results_data(group_id, level, "Transvibrational", S_trans) + self._data_logger.add_results_data(group_id, level, "Rovibrational", S_rot) def _process_conformational_entropy( self, group_id, ce, level, states, number_frames @@ -442,9 +429,7 @@ def _process_conformational_entropy( """ S_conf = ce.conformational_entropy_calculation(states[group_id], number_frames) - self._data_logger.add_results_data( - group_id, level, "Conformational", S_conf - ) + self._data_logger.add_results_data(group_id, level, "Conformational", S_conf) def _finalize_molecule_results(self): """ @@ -594,12 +579,16 @@ class VibrationalEntropy(EntropyManager): vibrational modes and thermodynamic properties. """ - def __init__(self, run_manager, args, universe, data_logger, level_manager, group_molecules): + def __init__( + self, run_manager, args, universe, data_logger, level_manager, group_molecules + ): """ Initializes the VibrationalEntropy manager with all required components and defines physical constants used in vibrational entropy calculations. """ - super().__init__(run_manager, args, universe, data_logger, level_manager, group_molecules) + super().__init__( + run_manager, args, universe, data_logger, level_manager, group_molecules + ) self._PLANCK_CONST = 6.62607004081818e-34 def frequency_calculation(self, lambdas, temp): @@ -718,12 +707,16 @@ class ConformationalEntropy(EntropyManager): analysis using statistical mechanics principles. """ - def __init__(self, run_manager, args, universe, data_logger, level_manager, group_molecules): + def __init__( + self, run_manager, args, universe, data_logger, level_manager, group_molecules + ): """ Initializes the ConformationalEntropy manager with all required components and sets the gas constant used in conformational entropy calculations. """ - super().__init__(run_manager, args, universe, data_logger, level_manager, group_molecules) + super().__init__( + run_manager, args, universe, data_logger, level_manager, group_molecules + ) def assign_conformation( self, data_container, dihedral, number_frames, bin_width, start, end, step @@ -755,7 +748,7 @@ def assign_conformation( # get the values of the angle for the dihedral # dihedral angle values have a range from -180 to 180 - for timestep in data_container.trajectory[start:end+1:step]: + for timestep in data_container.trajectory[start : end + 1 : step]: timestep_index = timestep.frame - start value = dihedral.value() # we want postive values in range 0 to 360 to make the peak assignment diff --git a/CodeEntropy/group_molecules.py b/CodeEntropy/group_molecules.py index 42474ae..f8e43fb 100644 --- a/CodeEntropy/group_molecules.py +++ b/CodeEntropy/group_molecules.py @@ -1,8 +1,8 @@ import logging -import MDAnalysis as mda logger = logging.getLogger(__name__) + class GroupMolecules: """ Groups molecules for averaging. @@ -57,7 +57,7 @@ def _by_none(self, universe): def _by_molecules(self, universe): """ - Group molecules by chemical type. + Group molecules by chemical type. Based on number of atoms and atom names. """ diff --git a/CodeEntropy/levels.py b/CodeEntropy/levels.py index ad597c9..e50356f 100644 --- a/CodeEntropy/levels.py +++ b/CodeEntropy/levels.py @@ -941,10 +941,12 @@ def build_conformational_states( for res_id, residue in enumerate(mol.residues): key = (group_id, res_id) - res_container = entropy_manager._run_manager.new_U_select_atom( - mol, - f"index {residue.atoms.indices[0]}:" - f"{residue.atoms.indices[-1]}", + res_container = ( + entropy_manager._run_manager.new_U_select_atom( + mol, + f"index {residue.atoms.indices[0]}:" + f"{residue.atoms.indices[-1]}", + ) ) heavy_res = entropy_manager._run_manager.new_U_select_atom( res_container, "not name H*" @@ -979,7 +981,6 @@ def build_conformational_states( ) states_res[group_id] += states - logger.debug(f"states_ua {states_ua}") logger.debug(f"states_res {states_res}") diff --git a/CodeEntropy/run.py b/CodeEntropy/run.py index 2c81fae..f90f56d 100644 --- a/CodeEntropy/run.py +++ b/CodeEntropy/run.py @@ -10,8 +10,8 @@ from CodeEntropy.config.data_logger import DataLogger from CodeEntropy.config.logging_config import LoggingConfig from CodeEntropy.entropy import EntropyManager -from CodeEntropy.levels import LevelManager from CodeEntropy.group_molecules import GroupMolecules +from CodeEntropy.levels import LevelManager logger = logging.getLogger(__name__) @@ -152,7 +152,6 @@ def run_entropy_workflow(self): data_logger=self._data_logger, level_manager=level_manager, group_molecules=group_molecules, - ) entropy_manager.execute() From 0f87a049fa6d7be63ba5c2e6beb5a0c3f4e92b76 Mon Sep 17 00:00:00 2001 From: harryswift01 Date: Fri, 1 Aug 2025 10:45:48 +0100 Subject: [PATCH 20/23] fix timestep slicing issue within `assign_conformation` --- CodeEntropy/entropy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CodeEntropy/entropy.py b/CodeEntropy/entropy.py index c6c1e26..a58b9f9 100644 --- a/CodeEntropy/entropy.py +++ b/CodeEntropy/entropy.py @@ -748,7 +748,7 @@ def assign_conformation( # get the values of the angle for the dihedral # dihedral angle values have a range from -180 to 180 - for timestep in data_container.trajectory[start : end + 1 : step]: + for timestep in data_container.trajectory[start:end:step]: timestep_index = timestep.frame - start value = dihedral.value() # we want postive values in range 0 to 360 to make the peak assignment From fcf83ef4b3fc0fdea50f23cfed6afb67a7b52d01 Mon Sep 17 00:00:00 2001 From: harryswift01 Date: Fri, 1 Aug 2025 10:55:46 +0100 Subject: [PATCH 21/23] remove unused `number_molecules` from `build_conformational_states` function --- CodeEntropy/levels.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/CodeEntropy/levels.py b/CodeEntropy/levels.py index e50356f..854efcc 100644 --- a/CodeEntropy/levels.py +++ b/CodeEntropy/levels.py @@ -899,7 +899,6 @@ def build_conformational_states( self, entropy_manager, reduced_atom, - number_molecules, levels, groups, start, @@ -916,7 +915,6 @@ def build_conformational_states( Parameters: entropy_manager (EntropyManager): Instance of the EntropyManager reduced_atom (Universe): The reduced atom selection. - number_molecules (int): Number of molecules in the system. levels (list): List of entropy levels per molecule. start (int): Start frame index. end (int): End frame index. From 83d827df4ecbe431549ca23d689c8ba2e70e663b Mon Sep 17 00:00:00 2001 From: harryswift01 Date: Fri, 1 Aug 2025 15:24:43 +0100 Subject: [PATCH 22/23] fixing and adding additional test cases for the grouping by molecule integration --- CodeEntropy/entropy.py | 10 +- CodeEntropy/levels.py | 5 +- tests/test_CodeEntropy/test_entropy.py | 288 +++++++++--------- .../test_CodeEntropy/test_group_molecules.py | 95 ++++++ tests/test_CodeEntropy/test_levels.py | 135 ++++++++ 5 files changed, 386 insertions(+), 147 deletions(-) create mode 100644 tests/test_CodeEntropy/test_group_molecules.py diff --git a/CodeEntropy/entropy.py b/CodeEntropy/entropy.py index a58b9f9..9451bb3 100644 --- a/CodeEntropy/entropy.py +++ b/CodeEntropy/entropy.py @@ -69,7 +69,6 @@ def execute(self): self._handle_water_entropy(start, end, step) reduced_atom, number_molecules, levels, groups = self._initialize_molecules() - number_frames = len(reduced_atom.trajectory) force_matrices, torque_matrices = self._level_manager.build_covariance_matrices( self, @@ -85,7 +84,6 @@ def execute(self): states_ua, states_res = self._level_manager.build_conformational_states( self, reduced_atom, - number_molecules, levels, groups, start, @@ -847,12 +845,16 @@ class OrientationalEntropy(EntropyManager): and orientational degrees of freedom. """ - def __init__(self, run_manager, args, universe, data_logger, level_manager): + def __init__( + self, run_manager, args, universe, data_logger, level_manager, group_molecules + ): """ Initializes the OrientationalEntropy manager with all required components and sets the gas constant used in orientational entropy calculations. """ - super().__init__(run_manager, args, universe, data_logger, level_manager) + super().__init__( + run_manager, args, universe, data_logger, level_manager, group_molecules + ) def orientational_entropy_calculation(self, neighbours_dict): """ diff --git a/CodeEntropy/levels.py b/CodeEntropy/levels.py index 854efcc..6c58f2d 100644 --- a/CodeEntropy/levels.py +++ b/CodeEntropy/levels.py @@ -978,7 +978,10 @@ def build_conformational_states( ce, ) - states_res[group_id] += states + if states_res[group_id] is None: + states_res[group_id] = states + else: + states_res[group_id] += states logger.debug(f"states_ua {states_ua}") logger.debug(f"states_res {states_res}") diff --git a/tests/test_CodeEntropy/test_entropy.py b/tests/test_CodeEntropy/test_entropy.py index 0043a71..3336ce2 100644 --- a/tests/test_CodeEntropy/test_entropy.py +++ b/tests/test_CodeEntropy/test_entropy.py @@ -48,11 +48,7 @@ def tearDown(self): shutil.rmtree(self.test_dir) def test_execute_full_workflow(self): - """ - Tests that `execute` runs the full entropy workflow for a known system, - triggering all processing branches and logging expected results. - """ - # Load test universe + # Setup universe and args as before tprfile = os.path.join(self.test_data_dir, "md_A4_dna.tpr") trrfile = os.path.join(self.test_data_dir, "md_A4_dna_xf.trr") u = mda.Universe(tprfile, trrfile) @@ -63,136 +59,149 @@ def test_execute_full_workflow(self): run_manager = RunManager("temp_folder") level_manager = LevelManager() data_logger = DataLogger() + group_molecules = MagicMock() entropy_manager = EntropyManager( - run_manager, args, u, data_logger, level_manager + run_manager, args, u, data_logger, level_manager, group_molecules ) - # Mock internal methods + # Mocks for trajectory and molecules entropy_manager._get_trajectory_bounds = MagicMock(return_value=(0, 10, 1)) entropy_manager._get_number_frames = MagicMock(return_value=11) entropy_manager._handle_water_entropy = MagicMock() + + mock_reduced_atom = MagicMock() + mock_reduced_atom.trajectory = [1] * 11 + + mock_groups = {0: [0], 1: [1], 2: [2]} + mock_levels = { + 0: ["united_atom", "polymer", "residue"], + 1: ["united_atom", "polymer", "residue"], + 2: ["united_atom", "polymer", "residue"], + } + entropy_manager._initialize_molecules = MagicMock( - return_value=("reduced_atom", 3, [["united_atom", "polymer", "residue"]]) + return_value=(mock_reduced_atom, 3, mock_levels, mock_groups) ) entropy_manager._level_manager.build_covariance_matrices = MagicMock( - return_value=( - "force_matrices", - "torque_matrices", - "states_ua", - "states_res", - ) + return_value=("force_matrices", "torque_matrices") + ) + entropy_manager._level_manager.build_conformational_states = MagicMock( + return_value=(["state_ua"], ["state_res"]) ) entropy_manager._compute_entropies = MagicMock() entropy_manager._finalize_molecule_results = MagicMock() entropy_manager._data_logger.log_tables = MagicMock() - # Execute - entropy_manager.execute() - - # Assertions - entropy_manager._get_trajectory_bounds.assert_called_once() - entropy_manager._get_number_frames.assert_called_once_with(0, 10, 1) - entropy_manager._handle_water_entropy.assert_called_once_with(0, 10, 1) - entropy_manager._initialize_molecules.assert_called_once() - entropy_manager._level_manager.build_covariance_matrices.assert_called_once_with + # Create mocks for VibrationalEntropy and ConformationalEntropy + ve = MagicMock() + ce = MagicMock() + + with ( + patch("CodeEntropy.entropy.VibrationalEntropy", return_value=ve), + patch("CodeEntropy.entropy.ConformationalEntropy", return_value=ce), + ): + + entropy_manager.execute() + + # Assert the key calls happened with expected arguments ( + entropy_manager._level_manager.build_conformational_states + ).assert_called_once_with( entropy_manager, - "reduced_atom", - 3, - [["united_atom", "polymer", "residue"]], + mock_reduced_atom, + mock_levels, + mock_groups, 0, 10, 1, 11, + args.bin_width, + ce, ) + entropy_manager._compute_entropies.assert_called_once_with( - "reduced_atom", - 3, - [["united_atom", "polymer", "residue"]], + mock_reduced_atom, + mock_levels, + mock_groups, "force_matrices", "torque_matrices", - "states_ua", - "states_res", + ["state_ua"], + ["state_res"], 11, - 0, - 10, - 1, + ve, + ce, ) + entropy_manager._finalize_molecule_results.assert_called_once() entropy_manager._data_logger.log_tables.assert_called_once() def test_water_entropy_sets_selection_string_when_all(self): """ Tests that when `selection_string` is initially 'all' and water entropy is - enabled, the `execute` method sets `selection_string` to 'not water' after + enabled, `_handle_water_entropy` sets `selection_string` to 'not water' after calculating water entropy. """ mock_universe = MagicMock() - mock_universe.select_atoms.return_value.n_atoms = ( - 5 # Simulate water atoms present - ) + mock_universe.select_atoms.return_value.n_atoms = 5 # Simulate water present args = MagicMock(water_entropy=True, selection_string="all") run_manager = MagicMock() level_manager = MagicMock() data_logger = DataLogger() + group_molecules = MagicMock() manager = EntropyManager( - run_manager, args, mock_universe, data_logger, level_manager + run_manager, + args, + mock_universe, + data_logger, + level_manager, + group_molecules, ) - # Mock methods used in execute - manager._get_trajectory_bounds = MagicMock(return_value=(0, 10, 1)) - manager._get_number_frames = MagicMock(return_value=11) + # Patch water entropy calculation manager._calculate_water_entropy = MagicMock() - manager._initialize_molecules = MagicMock(return_value=("reduced", 1, [])) - manager._level_manager.build_covariance_matrices = MagicMock( - return_value=(None, None, None, None) - ) - manager._compute_entropies = MagicMock() - manager._finalize_molecule_results = MagicMock() - manager._data_logger.log_tables = MagicMock() - manager.execute() + # Call _handle_water_entropy directly + manager._handle_water_entropy(0, 10, 1) - manager._calculate_water_entropy.assert_called_once() + manager._calculate_water_entropy.assert_called_once_with( + mock_universe, 0, 10, 1 + ) self.assertEqual(args.selection_string, "not water") def test_water_entropy_appends_to_custom_selection_string(self): """ Tests that when `selection_string` is a custom value and water - entropy is enabled, the `execute` method appends ' and not water' - to the existing selection string after calculating water entropy. + entropy is enabled, `_handle_water_entropy` appends ' and not water' + to the existing selection string. """ mock_universe = MagicMock() - mock_universe.select_atoms.return_value.n_atoms = ( - 5 # Simulate water atoms present - ) + mock_universe.select_atoms.return_value.n_atoms = 5 # Simulate water present args = MagicMock(water_entropy=True, selection_string="protein") run_manager = MagicMock() level_manager = MagicMock() data_logger = DataLogger() + group_molecules = MagicMock() manager = EntropyManager( - run_manager, args, mock_universe, data_logger, level_manager + run_manager, + args, + mock_universe, + data_logger, + level_manager, + group_molecules, ) - # Mock methods used in execute - manager._get_trajectory_bounds = MagicMock(return_value=(0, 10, 1)) - manager._get_number_frames = MagicMock(return_value=11) manager._calculate_water_entropy = MagicMock() - manager._initialize_molecules = MagicMock(return_value=("reduced", 1, [])) - manager._level_manager.build_covariance_matrices = MagicMock( - return_value=(None, None, None, None) - ) - manager._compute_entropies = MagicMock() - manager._finalize_molecule_results = MagicMock() - manager._data_logger.log_tables = MagicMock() - manager.execute() + # Call _handle_water_entropy directly + manager._handle_water_entropy(0, 10, 1) - manager._calculate_water_entropy.assert_called_once() + manager._calculate_water_entropy.assert_called_once_with( + mock_universe, 0, 10, 1 + ) self.assertEqual(args.selection_string, "protein and not water") def test_get_trajectory_bounds(self): @@ -206,7 +215,7 @@ def test_get_trajectory_bounds(self): args, _ = parser.parse_known_args() entropy_manager = EntropyManager( - MagicMock(), args, MagicMock(), MagicMock(), MagicMock() + MagicMock(), args, MagicMock(), MagicMock(), MagicMock(), MagicMock() ) self.assertIsInstance(entropy_manager._args.start, int) @@ -236,7 +245,7 @@ def test_get_number_frames(self, mock_args): args = parser.parse_args() entropy_manager = EntropyManager( - MagicMock(), args, MagicMock(), MagicMock(), MagicMock() + MagicMock(), args, MagicMock(), MagicMock(), MagicMock(), MagicMock() ) entropy_manager._get_trajectory_bounds() number_frames = entropy_manager._get_number_frames( @@ -268,7 +277,7 @@ def test_get_number_frames_sliced_trajectory(self, mock_args): args = parser.parse_args() entropy_manager = EntropyManager( - MagicMock(), args, MagicMock(), MagicMock(), MagicMock() + MagicMock(), args, MagicMock(), MagicMock(), MagicMock(), MagicMock() ) entropy_manager._get_trajectory_bounds() number_frames = entropy_manager._get_number_frames( @@ -301,7 +310,7 @@ def test_get_number_frames_sliced_trajectory_step(self, mock_args): args = parser.parse_args() entropy_manager = EntropyManager( - MagicMock(), args, MagicMock(), MagicMock(), MagicMock() + MagicMock(), args, MagicMock(), MagicMock(), MagicMock(), MagicMock() ) entropy_manager._get_trajectory_bounds() number_frames = entropy_manager._get_number_frames( @@ -335,7 +344,9 @@ def test_get_reduced_universe_all(self, mock_args): parser = config_manager.setup_argparse() args = parser.parse_args() - entropy_manager = EntropyManager(MagicMock(), args, u, MagicMock(), MagicMock()) + entropy_manager = EntropyManager( + MagicMock(), args, u, MagicMock(), MagicMock(), MagicMock() + ) entropy_manager._get_reduced_universe() @@ -366,7 +377,9 @@ def test_get_reduced_universe_reduced(self, mock_args): parser = config_manager.setup_argparse() args = parser.parse_args() - entropy_manager = EntropyManager(run_manager, args, u, MagicMock(), MagicMock()) + entropy_manager = EntropyManager( + run_manager, args, u, MagicMock(), MagicMock(), MagicMock() + ) reduced_u = entropy_manager._get_reduced_universe() @@ -402,7 +415,9 @@ def test_get_molecule_container(self, mock_args): parser = config_manager.setup_argparse() args = parser.parse_args() - entropy_manager = EntropyManager(run_manager, args, u, MagicMock(), MagicMock()) + entropy_manager = EntropyManager( + run_manager, args, u, MagicMock(), MagicMock(), MagicMock() + ) # Call the method molecule_id = 0 @@ -434,7 +449,10 @@ def test_process_united_atom_level(self): run_manager = RunManager("temp_folder") level_manager = LevelManager() data_logger = DataLogger() - manager = EntropyManager(run_manager, args, u, data_logger, level_manager) + group_molecules = MagicMock() + manager = EntropyManager( + run_manager, args, u, data_logger, level_manager, group_molecules + ) # Prepare mock molecule container reduced_atom = manager._get_reduced_universe() @@ -456,7 +474,7 @@ def test_process_united_atom_level(self): # Run the method manager._process_united_atom_entropy( - mol_id=0, + group_id=0, mol_container=mol_container, ve=ve, ce=ce, @@ -500,7 +518,10 @@ def test_process_vibrational_only_levels(self): run_manager = RunManager("temp_folder") level_manager = LevelManager() data_logger = DataLogger() - manager = EntropyManager(run_manager, args, u, data_logger, level_manager) + group_molecules = MagicMock() + manager = EntropyManager( + run_manager, args, u, data_logger, level_manager, group_molecules + ) # Prepare mock molecule container reduced_atom = manager._get_reduced_universe() @@ -519,8 +540,8 @@ def test_process_vibrational_only_levels(self): # Run the method manager._process_vibrational_entropy( - mol_id=0, - mol_container=mol_container, + group_id=0, + number_frames=10, ve=ve, level="Vibrational", force_matrix=force_matrix, @@ -545,73 +566,50 @@ def test_compute_entropies_polymer_branch(self): Test _compute_entropies triggers _process_vibrational_entropy for 'polymer' level. """ - # Setup the manager args = MagicMock(bin_width=0.1) run_manager = MagicMock() level_manager = MagicMock() data_logger = DataLogger() + group_molecules = MagicMock() manager = EntropyManager( - run_manager, args, MagicMock(), data_logger, level_manager + run_manager, args, MagicMock(), data_logger, level_manager, group_molecules ) - # Setup dummy universe and reduced atom reduced_atom = MagicMock() + number_frames = 5 + groups = {0: [0]} # One molecule only + levels = [["polymer"]] # One level for that molecule - # Number of molecules = 1 for simplicity - number_molecules = 1 - - # Levels includes 'polymer' - levels = [["polymer"]] - - # Provide dummy force and torque matrices with the expected structure force_matrices = {"poly": {0: np.eye(3)}} torque_matrices = {"poly": {0: np.eye(3) * 2}} - - # States dictionaries (could be empty) states_ua = {} states_res = [] - # Setup frames and slicing params - number_frames = 5 - start = 0 - end = 5 - step = 1 - - # Mock the molecule container and its residues mol_mock = MagicMock() - mol_mock.residues = [] # No residues needed for polymer level - - # Patch _get_molecule_container to return our mock molecule + mol_mock.residues = [] manager._get_molecule_container = MagicMock(return_value=mol_mock) - - # Patch _process_vibrational_entropy to monitor calls manager._process_vibrational_entropy = MagicMock() - # Call the method + ve = MagicMock() + ve.vibrational_entropy_calculation.side_effect = [1.11] + + ce = MagicMock() + ce.conformational_entropy_calculation.return_value = 3.33 + manager._compute_entropies( reduced_atom, - number_molecules, levels, + groups, force_matrices, torque_matrices, states_ua, states_res, number_frames, - start, - end, - step, + ve, + ce, ) - # Assert _process_vibrational_entropy called with expected args - manager._process_vibrational_entropy.assert_called_once_with( - 0, # mol_id - mol_mock, - unittest.mock.ANY, # ve instance created internally - "polymer", - force_matrices["poly"][0], - torque_matrices["poly"][0], - True, # highest level (since only one level) - ) + manager._process_vibrational_entropy.assert_called_once() def test_process_conformational_residue_level(self): """ @@ -629,11 +627,10 @@ def test_process_conformational_residue_level(self): run_manager = RunManager("temp_folder") level_manager = LevelManager() data_logger = DataLogger() - manager = EntropyManager(run_manager, args, u, data_logger, level_manager) - - # Prepare mock molecule container - reduced_atom = manager._get_reduced_universe() - mol_container = manager._get_molecule_container(reduced_atom, 0) + group_molecules = MagicMock() + manager = EntropyManager( + run_manager, args, u, data_logger, level_manager, group_molecules + ) # Create dummy states states = {0: np.ones((10, 3))} @@ -644,8 +641,7 @@ def test_process_conformational_residue_level(self): # Run the method manager._process_conformational_entropy( - mol_id=0, - mol_container=mol_container, + group_id=0, ce=ce, level="residue", states=states, @@ -680,7 +676,7 @@ def test_finalize_molecule_results_aggregates_and_logs_total_entropy(self): ] data_logger.residue_data = [] - manager = EntropyManager(None, args, None, data_logger, None) + manager = EntropyManager(None, args, None, data_logger, None, None) # Patch save method data_logger.save_dataframes_as_json = MagicMock() @@ -723,7 +719,7 @@ def test_finalize_molecule_results_skips_invalid_entries(self, mock_logger): ] data_logger.residue_data = [] - manager = EntropyManager(None, args, None, data_logger, None) + manager = EntropyManager(None, args, None, data_logger, None, None) # Patch save method data_logger.save_dataframes_as_json = MagicMock() @@ -762,7 +758,7 @@ def setUp(self): os.chdir(self.test_dir) self.entropy_manager = EntropyManager( - MagicMock(), MagicMock(), MagicMock(), MagicMock(), MagicMock() + MagicMock(), MagicMock(), MagicMock(), MagicMock(), MagicMock(), MagicMock() ) def tearDown(self): @@ -790,9 +786,12 @@ def test_vibrational_entropy_init(self): run_manager = RunManager("temp_folder") level_manager = LevelManager() data_logger = DataLogger() + group_molecules = MagicMock() # Instantiate VibrationalEntropy - ve = VibrationalEntropy(run_manager, args, universe, data_logger, level_manager) + ve = VibrationalEntropy( + run_manager, args, universe, data_logger, level_manager, group_molecules + ) # Basic assertions to check initialization self.assertIsInstance(ve, VibrationalEntropy) @@ -812,7 +811,7 @@ def test_frequency_calculation_0(self): run_manager = RunManager("mock_folder") ve = VibrationalEntropy( - run_manager, MagicMock(), MagicMock(), MagicMock(), MagicMock() + run_manager, MagicMock(), MagicMock(), MagicMock(), MagicMock(), MagicMock() ) frequencies = ve.frequency_calculation(lambdas, temp) @@ -833,7 +832,7 @@ def test_frequency_calculation_positive(self): # Instantiate VibrationalEntropy with mocks ve = VibrationalEntropy( - run_manager, MagicMock(), MagicMock(), MagicMock(), MagicMock() + run_manager, MagicMock(), MagicMock(), MagicMock(), MagicMock(), MagicMock() ) # Call the method under test @@ -859,7 +858,7 @@ def test_frequency_calculation_negative(self): # Instantiate VibrationalEntropy with mocks ve = VibrationalEntropy( - run_manager, MagicMock(), MagicMock(), MagicMock(), MagicMock() + run_manager, MagicMock(), MagicMock(), MagicMock(), MagicMock(), MagicMock() ) # Assert that ValueError is raised due to negative eigenvalue @@ -883,7 +882,7 @@ def test_vibrational_entropy_calculation_force_not_highest(self): # Instantiate VibrationalEntropy with mocks ve = VibrationalEntropy( - run_manager, MagicMock(), MagicMock(), MagicMock(), MagicMock() + run_manager, MagicMock(), MagicMock(), MagicMock(), MagicMock(), MagicMock() ) # Patch frequency_calculation to return known frequencies @@ -928,7 +927,7 @@ def test_vibrational_entropy_polymer_force(self): run_manager = RunManager("mock_folder") ve = VibrationalEntropy( - run_manager, MagicMock(), MagicMock(), MagicMock(), MagicMock() + run_manager, MagicMock(), MagicMock(), MagicMock(), MagicMock(), MagicMock() ) S_vib = ve.vibrational_entropy_calculation( @@ -958,7 +957,7 @@ def test_vibrational_entropy_polymer_torque(self): run_manager = RunManager("mock_folder") ve = VibrationalEntropy( - run_manager, MagicMock(), MagicMock(), MagicMock(), MagicMock() + run_manager, MagicMock(), MagicMock(), MagicMock(), MagicMock(), MagicMock() ) S_vib = ve.vibrational_entropy_calculation( @@ -1171,10 +1170,11 @@ def test_confirmational_entropy_init(self): run_manager = RunManager("temp_folder") level_manager = LevelManager() data_logger = DataLogger() + group_molecules = MagicMock() # Instantiate ConformationalEntropy ce = ConformationalEntropy( - run_manager, args, universe, data_logger, level_manager + run_manager, args, universe, data_logger, level_manager, group_molecules ) # Basic assertions to check initialization @@ -1212,8 +1212,11 @@ def test_assign_conformation(self): run_manager = RunManager("temp_folder") level_manager = LevelManager() data_logger = DataLogger() + group_molecules = MagicMock() - ce = ConformationalEntropy(run_manager, args, u, data_logger, level_manager) + ce = ConformationalEntropy( + run_manager, args, u, data_logger, level_manager, group_molecules + ) result = ce.assign_conformation( data_container=data_container, @@ -1272,10 +1275,11 @@ def test_orientational_entropy_init(self): run_manager = RunManager("temp_folder") level_manager = LevelManager() data_logger = DataLogger() + group_molecules = MagicMock() # Instantiate OrientationalEntropy oe = OrientationalEntropy( - run_manager, args, universe, data_logger, level_manager + run_manager, args, universe, data_logger, level_manager, group_molecules ) # Basic assertions to check initialization @@ -1296,7 +1300,7 @@ def test_orientational_entropy_calculation(self): } # Create an instance of OrientationalEntropy with dummy dependencies - oe = OrientationalEntropy(None, None, None, None, None) + oe = OrientationalEntropy(None, None, None, None, None, None) # Run the method result = oe.orientational_entropy_calculation(neighbours_dict) @@ -1317,7 +1321,7 @@ def test_orientational_entropy_water_branch_is_covered(self): """ neighbours_dict = {"H2O": 1} # Matches the condition exactly - oe = OrientationalEntropy(None, None, None, None, None) + oe = OrientationalEntropy(None, None, None, None, None, None) result = oe.orientational_entropy_calculation(neighbours_dict) # Since the logic is skipped, total entropy should be 0.0 diff --git a/tests/test_CodeEntropy/test_group_molecules.py b/tests/test_CodeEntropy/test_group_molecules.py new file mode 100644 index 0000000..c810280 --- /dev/null +++ b/tests/test_CodeEntropy/test_group_molecules.py @@ -0,0 +1,95 @@ +import os +import shutil +import tempfile +import unittest +from unittest.mock import MagicMock + +import numpy as np + +from CodeEntropy.group_molecules import GroupMolecules + + +class TestMain(unittest.TestCase): + """ + Unit tests for the functionality of GroupMolecules class. + """ + + def setUp(self): + """ + Set up a temporary directory as the working directory before each test. + Initialize GroupMolecules instance. + """ + self.test_dir = tempfile.mkdtemp(prefix="CodeEntropy_") + self._orig_dir = os.getcwd() + os.chdir(self.test_dir) + self.group_molecules = GroupMolecules() + + def tearDown(self): + """ + Clean up by removing the temporary directory and restoring the original working + directory. + """ + os.chdir(self._orig_dir) + shutil.rmtree(self.test_dir) + + def test_by_none_returns_individual_groups(self): + """ + Test _by_none returns each molecule in its own group when grouping is 'each'. + """ + mock_universe = MagicMock() + # Simulate universe.atoms.fragments has 3 molecules + mock_universe.atoms.fragments = [MagicMock(), MagicMock(), MagicMock()] + + groups = self.group_molecules._by_none(mock_universe) + expected = {0: [0], 1: [1], 2: [2]} + self.assertEqual(groups, expected) + + def test_by_molecules_groups_by_chemical_type(self): + """ + Test _by_molecules groups molecules with identical atom counts and names + together. + """ + mock_universe = MagicMock() + + # Use numpy arrays for .names to allow numpy-like comparison + fragment0 = MagicMock() + fragment0.names = np.array(["H", "O", "H"]) + fragment1 = MagicMock() + fragment1.names = np.array(["H", "O", "H"]) + fragment2 = MagicMock() + fragment2.names = np.array(["C", "C", "H", "H"]) + + mock_universe.atoms.fragments = [fragment0, fragment1, fragment2] + + groups = self.group_molecules._by_molecules(mock_universe) + + # Expect first two grouped, third separate + self.assertIn(0, groups) + self.assertIn(2, groups) + self.assertCountEqual(groups[0], [0, 1]) + self.assertEqual(groups[2], [2]) + + def test_grouping_molecules_dispatches_correctly(self): + """ + Test grouping_molecules method dispatches to correct grouping strategy. + """ + mock_universe = MagicMock() + mock_universe.atoms.fragments = [MagicMock()] # Just 1 molecule to keep simple + + # When grouping='each', calls _by_none + groups = self.group_molecules.grouping_molecules(mock_universe, "each") + self.assertEqual(groups, {0: [0]}) + + # When grouping='molecules', calls _by_molecules (mock to test call) + self.group_molecules._by_molecules = MagicMock(return_value={"mocked": [42]}) + groups = self.group_molecules.grouping_molecules(mock_universe, "molecules") + self.group_molecules._by_molecules.assert_called_once_with(mock_universe) + self.assertEqual(groups, {"mocked": [42]}) + + # If grouping unknown, should return empty dict + groups = self.group_molecules.grouping_molecules(mock_universe, "unknown") + self.assertEqual(groups, {}) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_CodeEntropy/test_levels.py b/tests/test_CodeEntropy/test_levels.py index 865687e..734c7ef 100644 --- a/tests/test_CodeEntropy/test_levels.py +++ b/tests/test_CodeEntropy/test_levels.py @@ -858,3 +858,138 @@ def test_filter_zero_rows_columns_partial_zero_removal(self): expected = np.array([[1, 2, 3]]) result = level_manager.filter_zero_rows_columns(matrix) np.testing.assert_array_equal(result, expected) + + def test_build_conformational_states_united_atom_accumulates_states(self): + """ + Test that the 'build_conformational_states' method correctly accumulates + united atom level conformational states for multiple molecules within the + same group. + + Specifically, when called with two molecules in the same group, the method + should append the states returned for the second molecule to the list of + states for the first molecule, resulting in a nested list structure. + + Verifies: + - The states_ua dictionary accumulates states as a nested list. + - The compute_dihedral_conformations method is called once per molecule. + """ + level_manager = LevelManager() + entropy_manager = MagicMock() + reduced_atom = MagicMock() + ce = MagicMock() + + # Setup mock residue for molecules + residue = MagicMock() + residue.atoms.indices = [10, 11, 12] + + # Setup two mock molecules with the same residue + mol_0 = MagicMock() + mol_0.residues = [residue] + mol_1 = MagicMock() + mol_1.residues = [residue] + + # entropy_manager returns different molecules by mol_id + entropy_manager._get_molecule_container.side_effect = [mol_0, mol_1] + + # new_U_select_atom returns dummy selections twice per molecule call + dummy_sel_1 = MagicMock() + dummy_sel_2 = MagicMock() + # For mol_0: light then heavy + # For mol_1: light then heavy + entropy_manager._run_manager.new_U_select_atom.side_effect = [ + dummy_sel_1, + dummy_sel_2, + dummy_sel_1, + dummy_sel_2, + ] + + # Mock compute_dihedral_conformations to return different states for each call + state_1 = ["ua_state_1"] + state_2 = ["ua_state_2"] + level_manager.compute_dihedral_conformations = MagicMock( + side_effect=[state_1, state_2] + ) + + groups = {0: [0, 1]} # Group 0 contains molecule 0 and molecule 1 + levels = [["united_atom"], ["united_atom"]] + start, end, step = 0, 10, 1 + number_frames = 10 + bin_width = 0.1 + + states_ua, states_res = level_manager.build_conformational_states( + entropy_manager, + reduced_atom, + levels, + groups, + start, + end, + step, + number_frames, + bin_width, + ce, + ) + + assert states_ua[(0, 0)] == ["ua_state_1", ["ua_state_2"]] + + # Confirm compute_dihedral_conformations was called twice (once per molecule) + assert level_manager.compute_dihedral_conformations.call_count == 2 + + def test_build_conformational_states_residue_level_accumulates_states(self): + """ + Test that the 'build_conformational_states' method correctly accumulates + residue level conformational states for multiple molecules within the + same group. + + When called with multiple molecules assigned to the same group at residue level, + the method should concatenate the returned states into a single flat list. + + Verifies: + - The states_res list contains concatenated residue states from all molecules. + - The states_ua dictionary remains empty for residue level. + - compute_dihedral_conformations is called once per molecule. + """ + level_manager = LevelManager() + entropy_manager = MagicMock() + reduced_atom = MagicMock() + ce = MagicMock() + + # Setup molecule with no residues + mol = MagicMock() + mol.residues = [] + entropy_manager._get_molecule_container.return_value = mol + + # Setup return values for compute_dihedral_conformations + states_1 = ["res_state1"] + states_2 = ["res_state2"] + level_manager.compute_dihedral_conformations = MagicMock( + side_effect=[states_1, states_2] + ) + + # Setup inputs with 2 molecules in same group + groups = {0: [0, 1]} # Both mol 0 and mol 1 are in group 0 + levels = [["res"], ["res"]] + start, end, step = 0, 10, 1 + number_frames = 10 + bin_width = 0.1 + + # Run + states_ua, states_res = level_manager.build_conformational_states( + entropy_manager, + reduced_atom, + levels, + groups, + start, + end, + step, + number_frames, + bin_width, + ce, + ) + + # Confirm accumulation occurred + assert states_ua == {} + assert states_res[0] == ["res_state1", "res_state2"] + assert states_res == [["res_state1", "res_state2"]] + + # Assert both calls to compute_dihedral_conformations happened + assert level_manager.compute_dihedral_conformations.call_count == 2 From fe089bdba23b3c59e2519fda98e40dbfc5967790 Mon Sep 17 00:00:00 2001 From: harryswift01 Date: Fri, 1 Aug 2025 15:26:53 +0100 Subject: [PATCH 23/23] fixing comments within the testing files --- tests/test_CodeEntropy/test_group_molecules.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_CodeEntropy/test_group_molecules.py b/tests/test_CodeEntropy/test_group_molecules.py index c810280..1914e5e 100644 --- a/tests/test_CodeEntropy/test_group_molecules.py +++ b/tests/test_CodeEntropy/test_group_molecules.py @@ -51,7 +51,6 @@ def test_by_molecules_groups_by_chemical_type(self): """ mock_universe = MagicMock() - # Use numpy arrays for .names to allow numpy-like comparison fragment0 = MagicMock() fragment0.names = np.array(["H", "O", "H"]) fragment1 = MagicMock()