diff --git a/ext/Descriptions/mountain_car.md b/ext/Descriptions/mountain_car.md new file mode 100644 index 00000000..138bba72 --- /dev/null +++ b/ext/Descriptions/mountain_car.md @@ -0,0 +1,57 @@ +The **Mountain Car problem** is a classic benchmark in control and reinforcement learning. +It involves an underpowered car that must climb a steep hill to reach a target position. +Because the car's engine is too weak to climb the hill directly, it must first drive away from the goal to gain momentum by oscillating in a valley. +The goal is to reach the target position in **minimum time**. + +### Mathematical formulation + +The problem can be stated as + +```math +\begin{aligned} +\min_{x,v,u,t_f} \quad & J = t_f \[0.5em] + ext{s.t.} \quad & \dot{x}(t) = v(t), \[0.5em] +& \dot{v}(t) = a \, u(t) - b \, \cos(c \, x(t)), \[0.5em] +& x(0) = -0.5, \quad v(0) = 0.0, \[0.5em] +& x(t_f) = 0.5, \quad v(t_f) \ge 0.0, \[0.5em] +& -1.2 \le x(t) \le 0.5, \[0.5em] +& -0.07 \le v(t) \le 0.07, \[0.5em] +& -1 \le u(t) \le 1, \[0.5em] +& t_f \ge 0. +\end{aligned} +``` + +### Parameters + +| Parameter | Symbol | Value | +|-----------|--------|-------| +| Power coefficient | $a$ | 0.001 | +| Gravity coefficient | $b$ | 0.0025 | +| Frequency coefficient | $c$ | 3.0 | +| Initial position | $x_0$ | -0.5 | +| Target position | $x_f$ | 0.5 | + +### Qualitative behaviour + +- The car must oscillate back and forth in the valley to build enough kinetic energy to climb the right hill. +- The optimal strategy typically involves a sequence of full-throttle accelerations in alternating directions (**bang-bang control**). +- The minimum time objective makes the problem sensitive to the initial guess for the final time. + +### Characteristics + +- Two-dimensional state space (position and velocity), +- Scalar control (effort/acceleration), +- **Free final time** with minimum time objective, +- State constraints (position and velocity bounds), +- Control constraints (bounds on acceleration). + +### References + +- **Dymos Documentation**. [*The Mountain Car Problem*](https://openmdao.github.io/dymos/examples/mountain_car/mountain_car.html). + Provides a detailed description and implementation of the Mountain Car problem using the Dymos library. + +- **OpenAI Gym**. [*MountainCar-v0*](https://gym.openai.com/envs/MountainCar-v0/). + A standard reinforcement learning environment for the Mountain Car problem. + +- Moore, A. W. (1990). *Efficient Memory-based Learning for Robot Control*. PhD thesis, University of Cambridge. + Introduces the Mountain Car problem as a benchmark for reinforcement learning and control. diff --git a/ext/JuMPModels/mountain_car.jl b/ext/JuMPModels/mountain_car.jl new file mode 100644 index 00000000..3f379e89 --- /dev/null +++ b/ext/JuMPModels/mountain_car.jl @@ -0,0 +1,114 @@ +""" +$(TYPEDSIGNATURES) + +Constructs and returns a JuMP model for the **Mountain Car Problem**. +The model represents the dynamics of an underpowered car climbing a hill. +The objective is to minimize the final time `tf` to reach the target position. + +# Arguments + +- `::JuMPBackend`: Specifies the backend for building the JuMP model. +- `grid_size::Int=100`: (Keyword) Number of discretisation steps for the time horizon. + +# Returns + +- `model::JuMP.Model`: A JuMP model representing the Mountain Car optimal control problem. + +# Example + +```julia-repl +julia> using OptimalControlProblems +julia> using JuMP + +julia> model = OptimalControlProblems.mountain_car(JuMPBackend(); N=100) +``` + +# References + +- Problem formulation: https://openmdao.github.io/dymos/examples/mountain_car/mountain_car.html +""" +function OptimalControlProblems.mountain_car( + ::JuMPBackend, + args...; + grid_size::Int=grid_size_data(:mountain_car), + parameters::Union{Nothing,NamedTuple}=nothing, + kwargs..., +) + + # parameters + params = parameters_data(:mountain_car, parameters) + t0 = params[:t0] + tf_start = params[:tf_start] + tf_min = params[:tf_min] + pos_t0 = params[:pos_t0] + vel_t0 = params[:vel_t0] + pos_tf = params[:pos_tf] + vel_tf_min = params[:vel_tf_min] + pos_min = params[:pos_min] + pos_max = params[:pos_max] + vel_min = params[:vel_min] + vel_max = params[:vel_max] + u_min = params[:u_min] + u_max = params[:u_max] + a = params[:a] + b = params[:b] + c = params[:c] + + # model + model = JuMP.Model(args...; kwargs...) + + # metadata + model[:time_grid] = () -> range(t0, value(model[:tf]), grid_size + 1) + model[:state_components] = ["pos", "vel"] + model[:costate_components] = ["∂pos", "∂vel"] + model[:control_components] = ["u"] + model[:variable_components] = ["tf"] + + # N = grid_size + @expression(model, N, grid_size) + + # variables + @variables( + model, + begin + pos_min ≤ pos[0:N] ≤ pos_max, (start = pos_t0) + vel_min ≤ vel[0:N] ≤ vel_max, (start = vel_t0) + u_min ≤ u[0:N] ≤ u_max, (start = 0.0) + tf ≥ tf_min, (start = tf_start) + end + ) + + # boundary constraints + @constraints( + model, + begin + pos[0] == pos_t0 + vel[0] == vel_t0 + pos[N] == pos_tf + vel[N] ≥ vel_tf_min + end + ) + + # dynamics + @expressions( + model, + begin + Δt, (tf - t0) / N + dpos[i = 0:N], vel[i] + dvel[i = 0:N], a * u[i] - b * cos(c * pos[i]) + end + ) + + @constraints( + model, + begin + ∂pos[i = 1:N], pos[i] == pos[i - 1] + 0.5 * Δt * (dpos[i] + dpos[i - 1]) + ∂vel[i = 1:N], vel[i] == vel[i - 1] + 0.5 * Δt * (dvel[i] + dvel[i - 1]) + end + ) + + # objective + @objective(model, Min, tf) + + return model +end diff --git a/ext/MetaData/mountain_car.jl b/ext/MetaData/mountain_car.jl new file mode 100644 index 00000000..cc96b7b4 --- /dev/null +++ b/ext/MetaData/mountain_car.jl @@ -0,0 +1,21 @@ +mountain_car_meta = OrderedDict( + :grid_size => 200, + :parameters => ( + t0 = 0.0, + tf_start = 50.0, + tf_min = 0.1, + pos_t0 = -0.5, + vel_t0 = 0.0, + pos_tf = 0.5, + vel_tf_min = 0.0, + pos_min = -1.2, + pos_max = 0.5, + vel_min = -0.07, + vel_max = 0.07, + u_min = -1.0, + u_max = 1.0, + a = 0.001, + b = 0.0025, + c = 3.0, + ), +) diff --git a/ext/OptimalControlModels/mountain_car.jl b/ext/OptimalControlModels/mountain_car.jl new file mode 100644 index 00000000..984675bc --- /dev/null +++ b/ext/OptimalControlModels/mountain_car.jl @@ -0,0 +1,96 @@ +""" +$(TYPEDSIGNATURES) + +Constructs an **OptimalControl problem** for the Mountain Car problem. +The goal is to drive an underpowered car up a steep hill. +The objective is to minimize the time required to reach the target position. +The problem formulation can be found [here](https://openmdao.github.io/dymos/examples/mountain_car/mountain_car.html). + +# Arguments + +- `::OptimalControlBackend`: Placeholder type specifying the OptimalControl backend or solver interface. +- `grid_size::Int=100`: (Keyword) Number of discretisation points for the direct transcription grid. + +# Returns + +- `docp`: The direct optimal control problem object representing the Mountain Car problem. + +# Example + +```julia-repl +julia> using OptimalControlProblems + +julia> docp = OptimalControlProblems.mountain_car(OptimalControlBackend(); N=100); +``` +""" +function OptimalControlProblems.mountain_car( + ::OptimalControlBackend, + description::Symbol...; + grid_size::Int=grid_size_data(:mountain_car), + parameters::Union{Nothing,NamedTuple}=nothing, + kwargs..., +) + + # parameters + params = parameters_data(:mountain_car, parameters) + t0 = params[:t0] + tf_start = params[:tf_start] + tf_min = params[:tf_min] + pos_t0 = params[:pos_t0] + vel_t0 = params[:vel_t0] + pos_tf = params[:pos_tf] + vel_tf_min = params[:vel_tf_min] + pos_min = params[:pos_min] + pos_max = params[:pos_max] + vel_min = params[:vel_min] + vel_max = params[:vel_max] + u_min = params[:u_min] + u_max = params[:u_max] + a = params[:a] + b = params[:b] + c = params[:c] + + # model + ocp = @def begin + tf ∈ R, variable + t ∈ [t0, tf], time + x = (pos, vel) ∈ R², state + u ∈ R, control + + # constraints + tf ≥ tf_min + pos_min ≤ pos(t) ≤ pos_max + vel_min ≤ vel(t) ≤ vel_max + u_min ≤ u(t) ≤ u_max + + # initial conditions + pos(t0) == pos_t0 + vel(t0) == vel_t0 + + # final conditions + pos(tf) == pos_tf + vel(tf) ≥ vel_tf_min + + # dynamics + ẋ(t) == [vel(t), a * u(t) - b * cos(c * pos(t))] + + # objective + tf → min + end + + # initial guess + init = (state=[pos_t0, vel_t0], control=0.0, variable=tf_start) + + # discretise the optimal control problem + docp = direct_transcription( + ocp, + description...; + lagrange_to_mayer=false, + init=init, + grid_size=grid_size, + disc_method=:trapeze, + kwargs..., + ) + + return docp +end diff --git a/ext/OptimalControlModels_s/mountain_car_s.jl b/ext/OptimalControlModels_s/mountain_car_s.jl new file mode 100644 index 00000000..cd770838 --- /dev/null +++ b/ext/OptimalControlModels_s/mountain_car_s.jl @@ -0,0 +1,97 @@ +""" +$(TYPEDSIGNATURES) + +Constructs an **OptimalControl problem** for the Mountain Car problem (symbolic version). +The goal is to drive an underpowered car up a steep hill. +The objective is to minimize the time required to reach the target position. +The problem formulation can be found [here](https://openmdao.github.io/dymos/examples/mountain_car/mountain_car.html). + +# Arguments + +- `::OptimalControlBackend`: Placeholder type specifying the OptimalControl backend or solver interface. +- `grid_size::Int=100`: (Keyword) Number of discretisation points for the direct transcription grid. + +# Returns + +- `docp`: The direct optimal control problem object representing the Mountain Car problem. + +# Example + +```julia-repl +julia> using OptimalControlProblems + +julia> docp = OptimalControlProblems.mountain_car_s(OptimalControlBackend(); N=100); +``` +""" +function OptimalControlProblems.mountain_car_s( + ::OptimalControlBackend, + description::Symbol...; + grid_size::Int=grid_size_data(:mountain_car), + parameters::Union{Nothing,NamedTuple}=nothing, + kwargs..., +) + + # parameters + params = parameters_data(:mountain_car, parameters) + t0 = params[:t0] + tf_start = params[:tf_start] + tf_min = params[:tf_min] + pos_t0 = params[:pos_t0] + vel_t0 = params[:vel_t0] + pos_tf = params[:pos_tf] + vel_tf_min = params[:vel_tf_min] + pos_min = params[:pos_min] + pos_max = params[:pos_max] + vel_min = params[:vel_min] + vel_max = params[:vel_max] + u_min = params[:u_min] + u_max = params[:u_max] + a = params[:a] + b = params[:b] + c = params[:c] + + # model + ocp = @def begin + tf ∈ R, variable + t ∈ [t0, tf], time + x = (pos, vel) ∈ R², state + u ∈ R, control + + # constraints + tf ≥ tf_min + pos_min ≤ pos(t) ≤ pos_max + vel_min ≤ vel(t) ≤ vel_max + u_min ≤ u(t) ≤ u_max + + # initial conditions + pos(t0) == pos_t0 + vel(t0) == vel_t0 + + # final conditions + pos(tf) == pos_tf + vel(tf) ≥ vel_tf_min + + # dynamics + ∂(pos)(t) == vel(t) + ∂(vel)(t) == a * u(t) - b * cos(c * pos(t)) + + # objective + tf → min + end + + # initial guess + init = (state=[pos_t0, vel_t0], control=0.0, variable=tf_start) + + # discretise the optimal control problem + docp = direct_transcription( + ocp, + description...; + lagrange_to_mayer=false, + init=init, + grid_size=grid_size, + disc_method=:trapeze, + kwargs..., + ) + + return docp +end diff --git a/src/OptimalControlProblems.jl b/src/OptimalControlProblems.jl index 9729407d..2a27ad5c 100644 --- a/src/OptimalControlProblems.jl +++ b/src/OptimalControlProblems.jl @@ -72,6 +72,17 @@ function make_list_of_problems() :robot, :space_shuttle, :steering, + :beam, + :cart_pendulum, + :chain, + :dielectrophoretic_particle, + :double_oscillator, + :electric_vehicle, + :glider, + :jackson, + :robbins, + :rocket, + :vanderpol, ] list_of_problems = setdiff(list_of_problems, problems_to_exclude)