Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions ext/Descriptions/mountain_car.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
The **Mountain Car problem** is a classic benchmark in control and reinforcement learning.
It involves an underpowered car that must climb a steep hill to reach a target position.
Because the car's engine is too weak to climb the hill directly, it must first drive away from the goal to gain momentum by oscillating in a valley.
The goal is to reach the target position in **minimum time**.

### Mathematical formulation

The problem can be stated as

```math
\begin{aligned}
\min_{x,v,u,t_f} \quad & J = t_f \[0.5em]
ext{s.t.} \quad & \dot{x}(t) = v(t), \[0.5em]
& \dot{v}(t) = a \, u(t) - b \, \cos(c \, x(t)), \[0.5em]
& x(0) = -0.5, \quad v(0) = 0.0, \[0.5em]
& x(t_f) = 0.5, \quad v(t_f) \ge 0.0, \[0.5em]
& -1.2 \le x(t) \le 0.5, \[0.5em]
& -0.07 \le v(t) \le 0.07, \[0.5em]
& -1 \le u(t) \le 1, \[0.5em]
& t_f \ge 0.
\end{aligned}
```

### Parameters

| Parameter | Symbol | Value |
|-----------|--------|-------|
| Power coefficient | $a$ | 0.001 |
| Gravity coefficient | $b$ | 0.0025 |
| Frequency coefficient | $c$ | 3.0 |
| Initial position | $x_0$ | -0.5 |
| Target position | $x_f$ | 0.5 |

### Qualitative behaviour

- The car must oscillate back and forth in the valley to build enough kinetic energy to climb the right hill.
- The optimal strategy typically involves a sequence of full-throttle accelerations in alternating directions (**bang-bang control**).
- The minimum time objective makes the problem sensitive to the initial guess for the final time.

### Characteristics

- Two-dimensional state space (position and velocity),
- Scalar control (effort/acceleration),
- **Free final time** with minimum time objective,
- State constraints (position and velocity bounds),
- Control constraints (bounds on acceleration).

### References

- **Dymos Documentation**. [*The Mountain Car Problem*](https://openmdao.github.io/dymos/examples/mountain_car/mountain_car.html).
Provides a detailed description and implementation of the Mountain Car problem using the Dymos library.

- **OpenAI Gym**. [*MountainCar-v0*](https://gym.openai.com/envs/MountainCar-v0/).
A standard reinforcement learning environment for the Mountain Car problem.

- Moore, A. W. (1990). *Efficient Memory-based Learning for Robot Control*. PhD thesis, University of Cambridge.
Introduces the Mountain Car problem as a benchmark for reinforcement learning and control.
114 changes: 114 additions & 0 deletions ext/JuMPModels/mountain_car.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
"""
$(TYPEDSIGNATURES)

Constructs and returns a JuMP model for the **Mountain Car Problem**.
The model represents the dynamics of an underpowered car climbing a hill.
The objective is to minimize the final time `tf` to reach the target position.

# Arguments

- `::JuMPBackend`: Specifies the backend for building the JuMP model.
- `grid_size::Int=100`: (Keyword) Number of discretisation steps for the time horizon.

# Returns

- `model::JuMP.Model`: A JuMP model representing the Mountain Car optimal control problem.

# Example

```julia-repl
julia> using OptimalControlProblems
julia> using JuMP

julia> model = OptimalControlProblems.mountain_car(JuMPBackend(); N=100)
```

# References

- Problem formulation: https://openmdao.github.io/dymos/examples/mountain_car/mountain_car.html
"""
function OptimalControlProblems.mountain_car(
::JuMPBackend,
args...;
grid_size::Int=grid_size_data(:mountain_car),
parameters::Union{Nothing,NamedTuple}=nothing,
kwargs...,
)

# parameters
params = parameters_data(:mountain_car, parameters)
t0 = params[:t0]
tf_start = params[:tf_start]
tf_min = params[:tf_min]
pos_t0 = params[:pos_t0]
vel_t0 = params[:vel_t0]
pos_tf = params[:pos_tf]
vel_tf_min = params[:vel_tf_min]
pos_min = params[:pos_min]
pos_max = params[:pos_max]
vel_min = params[:vel_min]
vel_max = params[:vel_max]
u_min = params[:u_min]
u_max = params[:u_max]
a = params[:a]
b = params[:b]
c = params[:c]

# model
model = JuMP.Model(args...; kwargs...)

# metadata
model[:time_grid] = () -> range(t0, value(model[:tf]), grid_size + 1)
model[:state_components] = ["pos", "vel"]
model[:costate_components] = ["∂pos", "∂vel"]
model[:control_components] = ["u"]
model[:variable_components] = ["tf"]

# N = grid_size
@expression(model, N, grid_size)

# variables
@variables(
model,
begin
pos_min ≤ pos[0:N] ≤ pos_max, (start = pos_t0)
vel_min ≤ vel[0:N] ≤ vel_max, (start = vel_t0)
u_min ≤ u[0:N] ≤ u_max, (start = 0.0)
tf ≥ tf_min, (start = tf_start)
end
)

# boundary constraints
@constraints(
model,
begin
pos[0] == pos_t0
vel[0] == vel_t0
pos[N] == pos_tf
vel[N] ≥ vel_tf_min
end
)

# dynamics
@expressions(
model,
begin
Δt, (tf - t0) / N
dpos[i = 0:N], vel[i]
dvel[i = 0:N], a * u[i] - b * cos(c * pos[i])
end
)

@constraints(
model,
begin
∂pos[i = 1:N], pos[i] == pos[i - 1] + 0.5 * Δt * (dpos[i] + dpos[i - 1])
∂vel[i = 1:N], vel[i] == vel[i - 1] + 0.5 * Δt * (dvel[i] + dvel[i - 1])
end
)

# objective
@objective(model, Min, tf)

return model
end
21 changes: 21 additions & 0 deletions ext/MetaData/mountain_car.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
mountain_car_meta = OrderedDict(
:grid_size => 200,
:parameters => (
t0 = 0.0,
tf_start = 50.0,
tf_min = 0.1,
pos_t0 = -0.5,
vel_t0 = 0.0,
pos_tf = 0.5,
vel_tf_min = 0.0,
pos_min = -1.2,
pos_max = 0.5,
vel_min = -0.07,
vel_max = 0.07,
u_min = -1.0,
u_max = 1.0,
a = 0.001,
b = 0.0025,
c = 3.0,
),
)
96 changes: 96 additions & 0 deletions ext/OptimalControlModels/mountain_car.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
"""
$(TYPEDSIGNATURES)

Constructs an **OptimalControl problem** for the Mountain Car problem.
The goal is to drive an underpowered car up a steep hill.
The objective is to minimize the time required to reach the target position.
The problem formulation can be found [here](https://openmdao.github.io/dymos/examples/mountain_car/mountain_car.html).

# Arguments

- `::OptimalControlBackend`: Placeholder type specifying the OptimalControl backend or solver interface.
- `grid_size::Int=100`: (Keyword) Number of discretisation points for the direct transcription grid.

# Returns

- `docp`: The direct optimal control problem object representing the Mountain Car problem.

# Example

```julia-repl
julia> using OptimalControlProblems

julia> docp = OptimalControlProblems.mountain_car(OptimalControlBackend(); N=100);
```
"""
function OptimalControlProblems.mountain_car(
::OptimalControlBackend,
description::Symbol...;
grid_size::Int=grid_size_data(:mountain_car),
parameters::Union{Nothing,NamedTuple}=nothing,
kwargs...,
)

# parameters
params = parameters_data(:mountain_car, parameters)
t0 = params[:t0]
tf_start = params[:tf_start]
tf_min = params[:tf_min]
pos_t0 = params[:pos_t0]
vel_t0 = params[:vel_t0]
pos_tf = params[:pos_tf]
vel_tf_min = params[:vel_tf_min]
pos_min = params[:pos_min]
pos_max = params[:pos_max]
vel_min = params[:vel_min]
vel_max = params[:vel_max]
u_min = params[:u_min]
u_max = params[:u_max]
a = params[:a]
b = params[:b]
c = params[:c]

# model
ocp = @def begin
tf ∈ R, variable
t ∈ [t0, tf], time
x = (pos, vel) ∈ R², state
u ∈ R, control

# constraints
tf ≥ tf_min
pos_min ≤ pos(t) ≤ pos_max
vel_min ≤ vel(t) ≤ vel_max
u_min ≤ u(t) ≤ u_max

# initial conditions
pos(t0) == pos_t0
vel(t0) == vel_t0

# final conditions
pos(tf) == pos_tf
vel(tf) ≥ vel_tf_min

# dynamics
ẋ(t) == [vel(t), a * u(t) - b * cos(c * pos(t))]

# objective
tf → min
end

# initial guess
init = (state=[pos_t0, vel_t0], control=0.0, variable=tf_start)

# discretise the optimal control problem
docp = direct_transcription(
ocp,
description...;
lagrange_to_mayer=false,
init=init,
grid_size=grid_size,
disc_method=:trapeze,
kwargs...,
)

return docp
end
97 changes: 97 additions & 0 deletions ext/OptimalControlModels_s/mountain_car_s.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
"""
$(TYPEDSIGNATURES)

Constructs an **OptimalControl problem** for the Mountain Car problem (symbolic version).
The goal is to drive an underpowered car up a steep hill.
The objective is to minimize the time required to reach the target position.
The problem formulation can be found [here](https://openmdao.github.io/dymos/examples/mountain_car/mountain_car.html).

# Arguments

- `::OptimalControlBackend`: Placeholder type specifying the OptimalControl backend or solver interface.
- `grid_size::Int=100`: (Keyword) Number of discretisation points for the direct transcription grid.

# Returns

- `docp`: The direct optimal control problem object representing the Mountain Car problem.

# Example

```julia-repl
julia> using OptimalControlProblems

julia> docp = OptimalControlProblems.mountain_car_s(OptimalControlBackend(); N=100);
```
"""
function OptimalControlProblems.mountain_car_s(
::OptimalControlBackend,
description::Symbol...;
grid_size::Int=grid_size_data(:mountain_car),
parameters::Union{Nothing,NamedTuple}=nothing,
kwargs...,
)

# parameters
params = parameters_data(:mountain_car, parameters)
t0 = params[:t0]
tf_start = params[:tf_start]
tf_min = params[:tf_min]
pos_t0 = params[:pos_t0]
vel_t0 = params[:vel_t0]
pos_tf = params[:pos_tf]
vel_tf_min = params[:vel_tf_min]
pos_min = params[:pos_min]
pos_max = params[:pos_max]
vel_min = params[:vel_min]
vel_max = params[:vel_max]
u_min = params[:u_min]
u_max = params[:u_max]
a = params[:a]
b = params[:b]
c = params[:c]

# model
ocp = @def begin
tf ∈ R, variable
t ∈ [t0, tf], time
x = (pos, vel) ∈ R², state
u ∈ R, control

# constraints
tf ≥ tf_min
pos_min ≤ pos(t) ≤ pos_max
vel_min ≤ vel(t) ≤ vel_max
u_min ≤ u(t) ≤ u_max

# initial conditions
pos(t0) == pos_t0
vel(t0) == vel_t0

# final conditions
pos(tf) == pos_tf
vel(tf) ≥ vel_tf_min

# dynamics
∂(pos)(t) == vel(t)
∂(vel)(t) == a * u(t) - b * cos(c * pos(t))

# objective
tf → min
end

# initial guess
init = (state=[pos_t0, vel_t0], control=0.0, variable=tf_start)

# discretise the optimal control problem
docp = direct_transcription(
ocp,
description...;
lagrange_to_mayer=false,
init=init,
grid_size=grid_size,
disc_method=:trapeze,
kwargs...,
)

return docp
end
Loading
Loading