diff --git a/docs/guide/cli/cli_yaml.rst b/docs/guide/cli/cli_yaml.rst index b69493686..5a4652f91 100644 --- a/docs/guide/cli/cli_yaml.rst +++ b/docs/guide/cli/cli_yaml.rst @@ -15,10 +15,12 @@ as an example, the settings file which re-specifies the default behaviour would threed: True max3d: 0.95 element_change: True + protocol: + method: RelativeHybridTopologyProtocol The name of the algorithm is given behind the ``method:`` key and the arguments to the algorithm are then optionally given behind the ``settings:`` key. -Both the `network:` and `mapper:` sections are optional. +The ``network:``, ``mapper:``, and ``protocol:`` sections are all optional. This is then provided to the ``openfe plan-rbfe-network`` command as :: @@ -76,3 +78,20 @@ settings file :: method: generate_radial_network settings: central_ligand: '0' + +Customising the Protocol +------------------------- + +The Settings of a Protocol can be customised. The settings variable names map directly between the Python API and +yaml settings files. For example, to customise the production length of +the RFE Protocol, from Python would require a line of code such as:: + + settings.simulation_settings.production_length = '5.4 ns' + +This would be achieved via the yaml file as:: + + protocol: + method: RelativeHybridTopologyProtocol + settings: + simulation_settings: + production_length: 5.4 ns diff --git a/openfe/setup/ligand_network_planning.py b/openfe/setup/ligand_network_planning.py index 99aa2ee87..de48d50c6 100644 --- a/openfe/setup/ligand_network_planning.py +++ b/openfe/setup/ligand_network_planning.py @@ -88,12 +88,18 @@ def generate_radial_network( # handle central_ligand arg possibilities # after this, central_ligand is resolved to a SmallMoleculeComponent if isinstance(central_ligand, int): + central_ligand_index = central_ligand ligands = list(ligands) try: - central_ligand = ligands[central_ligand] + central_ligand = ligands[central_ligand_index] except IndexError: raise ValueError(f"index '{central_ligand}' out of bounds, there are " f"{len(ligands)} ligands") + # you could do a list comprehension like: + # ligands = [l for l in ligands if l != central] + # but this wouldn't properly catch when multiple identical central ligands have been passed + # so instead slice out the central ligand + ligands = ligands[:central_ligand_index] + ligands[central_ligand_index + 1:] elif isinstance(central_ligand, str): ligands = list(ligands) possibles = [l for l in ligands if l.name == central_ligand] diff --git a/openfecli/commands/plan_rbfe_network.py b/openfecli/commands/plan_rbfe_network.py index 2c2d13fdd..495c35ea9 100644 --- a/openfecli/commands/plan_rbfe_network.py +++ b/openfecli/commands/plan_rbfe_network.py @@ -18,6 +18,7 @@ def plan_rbfe_network_main( solvent, protein, cofactors, + protocol=None, ): """Utility method to plan a relative binding free energy network. @@ -37,6 +38,8 @@ def plan_rbfe_network_main( protein component for complex simulations, to which the ligands are bound cofactors : Iterable[SmallMoleculeComponent] any cofactors alongisde the protein, can be empty list + protocol: Optional[Protocol] + fully set up Protocol to use Returns ------- @@ -53,6 +56,7 @@ def plan_rbfe_network_main( mappers=mapper, mapping_scorer=mapping_scorer, ligand_network_planner=ligand_network_planner, + protocol=protocol, ) alchemical_network = network_planner( ligands=small_molecules, solvent=solvent, protein=protein, @@ -145,6 +149,7 @@ def plan_rbfe_network( mapping_scorer = yaml_options.scorer ligand_network_planner = yaml_options.ligand_network_planner solvent = yaml_options.solvent + protocol = yaml_options.protocol write("\t\tSolvent: " + str(solvent)) write("") @@ -169,6 +174,7 @@ def plan_rbfe_network( solvent=solvent, protein=protein, cofactors=cofactors, + protocol=protocol, ) write("\tDone") write("") diff --git a/openfecli/commands/plan_rhfe_network.py b/openfecli/commands/plan_rhfe_network.py index fe5bd3114..e2054baf7 100644 --- a/openfecli/commands/plan_rhfe_network.py +++ b/openfecli/commands/plan_rhfe_network.py @@ -15,7 +15,7 @@ def plan_rhfe_network_main( mapper, mapping_scorer, ligand_network_planner, small_molecules, - solvent, + solvent, protocol=None ): """Utility method to plan a relative hydration free energy network. @@ -31,6 +31,7 @@ def plan_rhfe_network_main( molecules of the system solvent : SolventComponent Solvent component used for solvation + protocol : Optional[Protocol] Returns ------- @@ -46,6 +47,7 @@ def plan_rhfe_network_main( mappers=mapper, mapping_scorer=mapping_scorer, ligand_network_planner=ligand_network_planner, + protocol=protocol, ) alchemical_network = network_planner( ligands=small_molecules, solvent=solvent @@ -142,6 +144,7 @@ def plan_rhfe_network(molecules: List[str], yaml_settings: str, output_dir: str) ligand_network_planner=ligand_network_planner, small_molecules=small_molecules, solvent=solvent, + protocol=yaml_options.protocol, ) write("\tDone") write("") diff --git a/openfecli/parameters/plan_network_options.py b/openfecli/parameters/plan_network_options.py index 3bd4c1d4b..303f73ac4 100644 --- a/openfecli/parameters/plan_network_options.py +++ b/openfecli/parameters/plan_network_options.py @@ -4,7 +4,9 @@ """ import click +import difflib from collections import namedtuple +from gufe.settings import SettingsBaseModel try: # todo; once we're fully v2, we can use ConfigDict not nested class from pydantic.v1 import BaseModel # , ConfigDict @@ -18,7 +20,7 @@ PlanNetworkOptions = namedtuple('PlanNetworkOptions', ['mapper', 'scorer', - 'ligand_network_planner', 'solvent']) + 'ligand_network_planner', 'solvent', 'protocol']) class MapperSelection(BaseModel): @@ -41,6 +43,24 @@ class Config: settings: dict[str, Any] = {} +class SolventSelection(BaseModel): + class Config: + extra = 'allow' + anystr_lower = True + + method: Optional[str] = None + settings: dict[str, Any] = {} + + +class ProtocolSelection(BaseModel): + class Config: + extra = 'allow' + anystr_lower = True + + method: Optional[str] = None + settings: dict[str, Any] = {} + + class CliYaml(BaseModel): # model_config = ConfigDict(extra='allow') class Config: @@ -48,6 +68,8 @@ class Config: mapper: Optional[MapperSelection] = None network: Optional[NetworkSelection] = None + solvent: Optional[SolventSelection] = None + protocol: Optional[ProtocolSelection] = None def parse_yaml_planner_options(contents: str) -> CliYaml: @@ -81,23 +103,81 @@ def parse_yaml_planner_options(contents: str) -> CliYaml: return CliYaml(**raw) -def load_yaml_planner_options(path: Optional[str], context) -> PlanNetworkOptions: - """Load cli options from yaml file path and resolve these to objects +def nearest_match(a: str, possible: list[str]) -> str: + """figure out what *a* might have been meant from *possible*""" + # todo: this is using a standard library approach, others are possible + return max( + possible, + key=lambda x: difflib.SequenceMatcher(a=a, b=x).ratio() + ) - Parameters - ---------- - path : str - path to the yaml file - context - unused + +def apply_onto(settings: SettingsBaseModel, options: dict) -> None: + """recursively apply things from options onto settings""" + # this is pydantic v1, v2 has different name for this + fields = list(settings.__fields__) + + for k, v in options.items(): + # print(f"doing k='{k}' v='{v}' on {settings.__class__}") + if k not in fields: + guess = nearest_match(k, fields) + raise ValueError(f"Unknown field '{k}', " + f"did you mean '{guess}'?") + + thing = getattr(settings, k) + if isinstance(thing, SettingsBaseModel): + if not isinstance(v, dict): + raise ValueError(f"must set sub-settings '{k}' to dict, " + f"got: '{v}'") + apply_onto(thing, v) + else: + # print(f'-> setting {k} to {v}') + setattr(settings, k, v) + + +def resolve_protocol_choices(options: Optional[ProtocolSelection]): + """Turn Protocol section into a fully formed Protocol Returns ------- - PlanNetworkOptions : namedtuple - a namedtuple with fields 'mapper', 'scorer', 'network_planning_algorithm', - and 'solvent' fields. - these fields each hold appropriate objects ready for use + Optional[Protocol] + + Raises + ------ + ValueError + if an unsupported method name is input """ + if not options: + return None + + # issue #644, make this selection not static + allowed = {'RelativeHybridTopologyProtocol', + # 'AbsoluteSolvationProtocol', + # 'PlainMDProtocol', + } + if options.method.lower() == 'relativehybridtopologyprotocol': + from openfe.protocols import openmm_rfe + protocol = openmm_rfe.RelativeHybridTopologyProtocol + # This wouldn't be reachable from any plan command, so leave out + #elif options.method.lower() == 'absolutesolvationprotocol': + # from openfe.protocols import openmm_afe + # protocol = openmm_afe.AbsoluteSolvationProtocol + #elif options.method.lower() == 'plainmdprotocol': + # from openfe.protocols import openmm_md + # protocol = openmm_md.PlainMDProtocol + else: + raise ValueError(f"Unsupported protocol method '{options.method}'. " + f"Supported methods are {','.join(allowed)}") + + settings = protocol.default_settings() + # work through the fields in yaml input and apply these onto settings + if options.settings: + apply_onto(settings, options.settings) + + return protocol(settings) + + +def load_yaml_planner_options_from_cliyaml(opt: CliYaml) -> PlanNetworkOptions: from gufe import SolventComponent from openfe.setup.ligand_network_planning import ( generate_radial_network, @@ -114,17 +194,8 @@ def load_yaml_planner_options(path: Optional[str], context) -> PlanNetworkOption ) from functools import partial - if path is not None: - with open(path, 'r') as f: - raw = f.read() - - # convert raw yaml to normalised pydantic model - opt = parse_yaml_planner_options(raw) - else: - opt = None - # convert normalised inputs to objects - if opt and opt.mapper: + if opt.mapper: mapper_choices = { 'lomap': LomapAtomMapper, 'lomapatommapper': LomapAtomMapper, @@ -144,12 +215,10 @@ def load_yaml_planner_options(path: Optional[str], context) -> PlanNetworkOption # todo: choice of scorer goes here mapping_scorer = default_lomap_score - if opt and opt.network: + if opt.network: network_choices = { 'generate_radial_network': generate_radial_network, - 'radial': generate_radial_network, 'generate_minimal_spanning_network': generate_minimal_spanning_network, - 'mst': generate_minimal_spanning_network, 'generate_minimal_redundant_network': generate_minimal_redundant_network, 'generate_maximal_network': generate_maximal_network, } @@ -157,7 +226,8 @@ def load_yaml_planner_options(path: Optional[str], context) -> PlanNetworkOption try: func = network_choices[opt.network.method] except KeyError: - raise KeyError(f"Bad network algorithm choice: '{opt.network.method}'") + raise ValueError(f"Bad network algorithm choice: '{opt.network.method}'. " + f"Available options are {', '.join(network_choices.keys())}") ligand_network_planner = partial(func, **opt.network.settings) else: @@ -166,26 +236,64 @@ def load_yaml_planner_options(path: Optional[str], context) -> PlanNetworkOption # todo: choice of solvent goes here solvent = SolventComponent() + if opt.protocol: + protocol = resolve_protocol_choices(opt.protocol) + else: + protocol = None + return PlanNetworkOptions( - mapper_obj, - mapping_scorer, - ligand_network_planner, - solvent, + mapper=mapper_obj, + scorer=mapping_scorer, + ligand_network_planner=ligand_network_planner, + solvent=solvent, + protocol=protocol, ) +def load_yaml_planner_options(path: Optional[str], context) -> PlanNetworkOptions: + """Load cli options from yaml file path and resolve these to objects + + Parameters + ---------- + path : str + path to the yaml file + context + unused + + Returns + ------- + PlanNetworkOptions : namedtuple + a namedtuple with fields 'mapper', 'scorer', 'network_planning_algorithm', + and 'solvent' fields. + these fields each hold appropriate objects ready for use + """ + if path is not None: + with open(path, 'r') as f: + raw = f.read() + + # convert raw yaml to normalised pydantic model + opt = parse_yaml_planner_options(raw) + else: + opt = CliYaml() + + return load_yaml_planner_options_from_cliyaml(opt) + + _yaml_help = """\ Path to planning settings yaml file -Currently it can contain sections for customising the -atom mapper and network planning algorithm, -these are addressed using a `mapper:` or `network:` key in the yaml file. -The algorithm to be used for these sections is then specified by the `method:` key. -For choosing mappers, either the LomapAtomMapper or KartografAtomMapper are allowed choices, -while for the network planning algorithm either the generate_minimal_spanning_tree or -generate_minimal_redundant_network options are allowed. -Finally, a `settings:` key can be given to customise the algorithm, -with allowable options corresponding to the keyword arguments of the Python API for these algorithms. +Currently it can contain sections for customising the atom mapper, network +planning algorithm, and protocol. These are addressed using a ``mapper:``, +``network:`` or ``protocol:`` key in the yaml file. The algorithm to be used +for these sections is then specified by the ``method:`` key. Finally, a +``settings:`` key can be given to customise the algorithm, with allowable +options corresponding to the keyword arguments of the Python API for these +algorithms. + +For choosing mappers, either the ``LomapAtomMapper`` or ``KartografAtomMapper`` +are allowed choices. For the network planning algorithm either the +``generate_minimal_spanning_tree``, ``generate_radial_network`` or +``generate_minimal_redundant_network`` options are allowed. For example, this is a valid settings yaml file to specify that the Lomap atom mapper should be used forbidding element changes, @@ -201,6 +309,10 @@ def load_yaml_planner_options(path: Optional[str], context) -> PlanNetworkOption method: generate_minimal_redundant_network settings: mst_num: 3 + +The Settings of a Protocol can also be customised in this settings yaml file. +To do this, the nested variable names from the Python API are directly converted +to the nested yaml format. """ diff --git a/openfecli/tests/parameters/test_plan_network_options.py b/openfecli/tests/parameters/test_plan_network_options.py index ae69b605f..babf4dcbb 100644 --- a/openfecli/tests/parameters/test_plan_network_options.py +++ b/openfecli/tests/parameters/test_plan_network_options.py @@ -2,6 +2,9 @@ # For details, see https://github.com/OpenFreeEnergy/openfe from openfecli.parameters import plan_network_options import pytest +from openff.units import unit + +from openfe.protocols import openmm_rfe @pytest.fixture @@ -26,6 +29,7 @@ def partial_mapper_yaml(): timeout: 120.0 """ + @pytest.fixture def partial_network_yaml(): return """\ @@ -36,6 +40,19 @@ def partial_network_yaml(): """ +@pytest.fixture +def protocol_settings_yaml(): + return """\ +protocol: + method: RelativeHybridTopologyProtocol + settings: + protocol_repeats: 2 + simulation_settings: + production_length: 7.5 ns + equilibration_length: 2200 ps +""" + + def test_loading_full_yaml(full_yaml): d = plan_network_options.parse_yaml_planner_options(full_yaml) @@ -64,3 +81,83 @@ def test_loading_network_yaml(partial_network_yaml): assert d.network assert d.network.method == 'generate_radial_network' assert d.network.settings['scorer'] == 'default_lomap_scorer' + + +def test_parsing_protocol_yaml(protocol_settings_yaml): + d = plan_network_options.parse_yaml_planner_options(protocol_settings_yaml) + + assert d + assert d.protocol.method == 'relativehybridtopologyprotocol' + assert d.protocol.settings['protocol_repeats'] == 2 + assert d.protocol.settings['simulation_settings']['production_length'] == '7.5 ns' + + +def test_resolving_protocol_yaml(protocol_settings_yaml): + cliyaml = plan_network_options.parse_yaml_planner_options(protocol_settings_yaml) + + pno = plan_network_options.load_yaml_planner_options_from_cliyaml(cliyaml) + + prot = pno.protocol + assert isinstance(prot, openmm_rfe.RelativeHybridTopologyProtocol) + assert prot.settings.protocol_repeats == 2 + assert prot.settings.simulation_settings.production_length.m == pytest.approx(7.5) + assert prot.settings.simulation_settings.production_length.u == unit.nanosecond + assert prot.settings.simulation_settings.equilibration_length.m == pytest.approx(2.2) + assert prot.settings.simulation_settings.equilibration_length.u == unit.nanosecond + + +def test_nearest_match(): + # check that misspelt options are given a likely correction + # here production -> production_length + + yaml = """\ +protocol: + method: RelativeHybridTopologyProtocol + settings: + simulation_settings: + production: 5 ns + equilibration_length: 1 ns +""" + cliyaml = plan_network_options.parse_yaml_planner_options(yaml) + + with pytest.raises(ValueError, match="did you mean 'production_length'"): + plan_network_options.load_yaml_planner_options_from_cliyaml(cliyaml) + + +def test_bad_network_option(): + yaml = """\ +network: + method: generate_starmap + settings: + central_ligand: 0 +""" + cliyaml = plan_network_options.parse_yaml_planner_options(yaml) + + with pytest.raises(ValueError, match="Bad network algorithm choice: 'generate_starmap'. Available options are"): + plan_network_options.load_yaml_planner_options_from_cliyaml(cliyaml) + + +def test_bad_protocol_option(): + yaml = """\ +protocol: + method: wizardry +""" + cliyaml = plan_network_options.parse_yaml_planner_options(yaml) + + with pytest.raises(ValueError, match="Unsupported protocol method 'wizardry'. Supported methods are"): + plan_network_options.load_yaml_planner_options_from_cliyaml(cliyaml) + + +def test_bad_protocol_settings_input(): + # input to modifying settings object must be dict + # i.e. can't set `simulation_settings = 4' + yaml = """\ +protocol: + method: RelativeHybridTopologyProtocol + settings: + simulation_settings: 24 +""" + cliyaml = plan_network_options.parse_yaml_planner_options(yaml) + + with pytest.raises(ValueError, match="must set sub-settings 'simulation_settings' to dict"): + plan_network_options.load_yaml_planner_options_from_cliyaml(cliyaml)