Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 72 additions & 51 deletions ax/benchmark/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,13 @@
from ax.generation_strategy.generation_strategy import GenerationStrategy
from ax.orchestration.orchestrator import Orchestrator
from ax.service.utils.best_point import (
_aggregate_and_cumulate_trace,
_compute_trace_values,
_pivot_data_with_feasibility,
_prepare_data_for_trace,
derelativize_opt_config,
get_trace,
is_row_feasible,
)
from ax.service.utils.best_point_mixin import BestPointMixin
from ax.service.utils.orchestrator_options import OrchestratorOptions, TrialType
Expand Down Expand Up @@ -791,64 +795,80 @@ def get_opt_trace_by_steps(experiment: Experiment) -> npt.NDArray:
that is in terms of steps, with one element added each time a step
completes.

Supports single-objective, multi-objective, and constrained problems.
For multi-objective problems, the trace is in terms of hypervolume.

Args:
experiment: An experiment produced by `benchmark_replication`; it must
have `BenchmarkTrialMetadata` (as produced by `BenchmarkRunner`) for
each trial, and its data must have a "step" column.
"""
optimization_config = none_throws(experiment.optimization_config)
full_df = experiment.lookup_data().full_df

if optimization_config.is_moo_problem:
raise NotImplementedError(
"Cumulative epochs only supported for single objective problems."
)
if len(optimization_config.outcome_constraints) > 0:
raise NotImplementedError(
"Cumulative epochs not supported for problems with outcome constraints."
)
full_df["row_feasible"] = is_row_feasible(
df=full_df,
optimization_config=optimization_config,
# For the sake of this function, we only care about feasible trials. The
# distinction between infeasible and undetermined is not important.
undetermined_value=False,
)

objective_name = optimization_config.objective.metric.name
data = experiment.lookup_data()
full_df = data.full_df
# Pivot to wide format with feasibility
df_wide = _pivot_data_with_feasibility(
df=full_df,
index=["trial_index", "arm_name", MAP_KEY],
optimization_config=optimization_config,
)

# Has timestamps; needs to be merged with full_df because it contains
# data on epochs that didn't actually run due to early stopping, and we need
# to know which actually ran
def _get_df(trial: Trial) -> pd.DataFrame:
def _get_timestamps(experiment: Experiment) -> pd.Series:
"""
Get the (virtual) time each epoch finished at.
Get the (virtual) time at which each training progression finished.
"""
metadata = trial.run_metadata["benchmark_metadata"]
backend_simulator = none_throws(metadata.backend_simulator)
# Data for the first metric, which is the only metric
df = next(iter(metadata.dfs.values()))
start_time = backend_simulator.get_sim_trial_by_index(
trial.index
).sim_start_time
df["time"] = df["virtual runtime"] + start_time
return df

with_timestamps = pd.concat(
(
_get_df(trial=assert_is_instance(trial, Trial))
for trial in experiment.trials.values()
),
axis=0,
ignore_index=True,
)[["trial_index", MAP_KEY, "time"]]

df = (
full_df.loc[
full_df["metric_name"] == objective_name,
["trial_index", "arm_name", "mean", MAP_KEY],
]
.merge(with_timestamps, how="left")
.sort_values("time", ignore_index=True)
frames = []
for trial in experiment.trials.values():
trial = assert_is_instance(trial, Trial)
metadata = trial.run_metadata["benchmark_metadata"]
backend_simulator = none_throws(metadata.backend_simulator)
sim_trial = backend_simulator.get_sim_trial_by_index(
trial_index=trial.index
)
start_time = sim_trial.sim_start_time
# timestamps are identical across all metrics, so just use the first one
frame = next(iter(metadata.dfs.values())).copy()
frame["time"] = frame["virtual runtime"] + start_time
frames.append(frame)
df = pd.concat(frames, axis=0, ignore_index=True).set_index(
["trial_index", "arm_name", MAP_KEY]
)
return df["time"]

# Compute timestamps and join with df_wide *before* cumulative computations.
# This is critical because cumulative HV/objective calculations depend on
# the temporal ordering of observations.
timestamps = _get_timestamps(experiment=experiment)

# Merge timestamps and sort by time before cumulative computations
df_wide = df_wide.join(
timestamps, on=["trial_index", "arm_name", MAP_KEY], how="left"
).sort_values(by="time", ascending=True, ignore_index=True)

# Compute per-evaluation (trial_index, MAP_KEY) cumulative values,
# with keep_order=True to preserve ordering by timestamp
df_wide["value"], maximize = _compute_trace_values(
df_wide=df_wide,
optimization_config=optimization_config,
use_cumulative_best=True,
)
return (
df["mean"].cummin()
if optimization_config.objective.minimize
else df["mean"].cummax()
# Get a value for each (trial_index, arm_name, MAP_KEY) tuple
value_by_arm_pull = df_wide[["trial_index", "arm_name", MAP_KEY, "value"]]

# Aggregate by trial and step, then compute cumulative best
return _aggregate_and_cumulate_trace(
df=value_by_arm_pull,
by=["trial_index", MAP_KEY],
maximize=maximize,
keep_order=True,
).to_numpy()


Expand All @@ -867,14 +887,15 @@ def get_benchmark_result_with_cumulative_steps(
opt_trace = get_opt_trace_by_steps(experiment=experiment)
return replace(
result,
optimization_trace=opt_trace,
cost_trace=np.arange(1, len(opt_trace) + 1, dtype=int),
optimization_trace=opt_trace.tolist(),
cost_trace=np.arange(1, len(opt_trace) + 1, dtype=int).tolist(),
# Empty
oracle_trace=np.full(len(opt_trace), np.nan),
inference_trace=np.full(len(opt_trace), np.nan),
oracle_trace=np.full_like(opt_trace, np.nan).tolist(),
inference_trace=np.full_like(opt_trace, np.nan).tolist(),
is_feasible_trace=None,
score_trace=compute_score_trace(
optimization_trace=opt_trace,
baseline_value=baseline_value,
optimal_value=optimal_value,
),
).tolist(),
)
52 changes: 45 additions & 7 deletions ax/benchmark/testing/benchmark_stubs.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,15 +312,52 @@ def get_async_benchmark_problem(
n_steps: int = 1,
lower_is_better: bool = False,
report_inference_value_as_trace: bool = False,
num_objectives: int = 1,
num_constraints: int = 0,
) -> BenchmarkProblem:
"""
Create an early-stopping benchmark problem with MAP_KEY data.

Args:
map_data: Whether to use map metrics (required for early stopping).
step_runtime_fn: Optional runtime function for steps.
n_steps: Number of steps per trial.
lower_is_better: Whether lower values are better (for SOO).
report_inference_value_as_trace: Whether to report inference trace.
num_objectives: Number of objectives (1 for SOO, >1 for MOO).
num_constraints: Number of outcome constraints to add.

Returns:
A BenchmarkProblem suitable for early-stopping evaluation.
"""
search_space = get_discrete_search_space()
test_function = IdentityTestFunction(n_steps=n_steps)
optimization_config = get_soo_opt_config(
outcome_names=["objective"],
use_map_metric=map_data,
observe_noise_sd=True,
lower_is_better=lower_is_better,
)

# Create outcome names for objectives and constraints
objective_names = [f"objective_{i}" for i in range(num_objectives)]
constraint_names = [f"constraint_{i}" for i in range(num_constraints)]
outcome_names = [*objective_names, *constraint_names]

test_function = IdentityTestFunction(n_steps=n_steps, outcome_names=outcome_names)

if num_objectives == 1:
# Single-objective: first outcome is objective, rest are constraints
optimization_config = get_soo_opt_config(
outcome_names=outcome_names,
lower_is_better=lower_is_better,
observe_noise_sd=True,
use_map_metric=map_data,
)
else:
# Multi-objective: pass all outcomes (objectives + constraints)
# get_moo_opt_config will use the last num_constraints as constraints
optimization_config = get_moo_opt_config(
outcome_names=outcome_names,
ref_point=[1.0] * num_objectives,
num_constraints=num_constraints,
lower_is_better=lower_is_better,
observe_noise_sd=True,
use_map_metric=map_data,
)

return BenchmarkProblem(
name="test",
Expand All @@ -330,6 +367,7 @@ def get_async_benchmark_problem(
num_trials=4,
baseline_value=19 if lower_is_better else 0,
optimal_value=0 if lower_is_better else 19,
worst_feasible_value=5.0 if num_constraints > 0 else None,
step_runtime_function=step_runtime_fn,
report_inference_value_as_trace=report_inference_value_as_trace,
)
Expand Down
85 changes: 71 additions & 14 deletions ax/benchmark/tests/test_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -1195,28 +1195,85 @@ def test_get_opt_trace_by_cumulative_epochs(self) -> None:
new_opt_trace = get_opt_trace_by_steps(experiment=experiment)
self.assertEqual(list(new_opt_trace), [0.0, 0.0, 1.0, 1.0, 2.0, 3.0])

method = get_sobol_benchmark_method()
with self.subTest("MOO"):
problem = get_multi_objective_benchmark_problem()

with self.subTest("Multi-objective"):
# Multi-objective problem with step data
problem = get_async_benchmark_problem(
map_data=True,
n_steps=5,
num_objectives=2,
# Ensure we don't have two finishing at the same time, for
# determinism
step_runtime_fn=lambda params: params["x0"] * (1 - 0.01 * params["x0"]),
)
experiment = self.run_optimization_with_orchestrator(
problem=problem, method=method, seed=0
)
with self.assertRaisesRegex(
NotImplementedError, "only supported for single objective"
):
get_opt_trace_by_steps(experiment=experiment)
new_opt_trace = get_opt_trace_by_steps(experiment=experiment)
self.assertListEqual(
new_opt_trace.tolist(),
[
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
4.0,
4.0,
4.0,
4.0,
4.0,
4.0,
4.0,
],
)

with self.subTest("Constrained"):
problem = get_benchmark_problem("constrained_gramacy_observed_noise")
# Constrained problem with step data.
problem = get_async_benchmark_problem(
map_data=True,
n_steps=5,
num_constraints=1,
# Ensure we don't have two finishing at the same time, for
# determinism
step_runtime_fn=lambda params: params["x0"] * (1 - 0.01 * params["x0"]),
)
experiment = self.run_optimization_with_orchestrator(
problem=problem, method=method, seed=0
)
with self.assertRaisesRegex(
NotImplementedError,
"not supported for problems with outcome constraints",
):
get_opt_trace_by_steps(experiment=experiment)
new_opt_trace = get_opt_trace_by_steps(experiment=experiment)
self.assertListEqual(
new_opt_trace.tolist(),
[
0.0,
0.0,
0.0,
0.0,
0.0,
1.0,
1.0,
2.0,
2.0,
2.0,
2.0,
2.0,
2.0,
3.0,
3.0,
3.0,
3.0,
3.0,
3.0,
3.0,
],
)

def test_get_benchmark_result_with_cumulative_steps(self) -> None:
"""See test_get_opt_trace_by_cumulative_epochs for more info."""
Expand Down
Loading