Skip to content

Nonmonotone variant of linesearch methods #96

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions src/algorithms/drls.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ See also: [`DRLS`](@ref).
- `gamma`: stepsize to use, chosen appropriately based on Lf and mf by defaults.
- `max_backtracks=20`: maximum number of line-search backtracks.
- `directions=LBFGS(5)`: strategy to use to compute line-search directions.
- `monotonicity=1`: parameter controlling the averaging scheme for nonmonotone linesearch; monotonicity ∈ (0,1], monotone scheme by default.

# References
1. Themelis, Stella, Patrinos, "Douglas-Rachford splitting and ADMM for nonconvex optimization: Accelerated and Newton-type linesearch algorithms", Computational Optimization and Applications, vol. 82, no. 2, pp. 395-440 (2022).
Expand All @@ -61,6 +62,7 @@ Base.@kwdef struct DRLSIteration{R,Tx,Tf,Tg,Tmf,TLf,D}
dre_sign::Int = mf === nothing || mf <= 0 ? 1 : -1
max_backtracks::Int = 20
directions::D = LBFGS(5)
monotonicity::R = real(eltype(x0))(1)
end

Base.IteratorSize(::Type{<:DRLSIteration}) = Base.IsInfinite()
Expand All @@ -80,6 +82,7 @@ Base.@kwdef mutable struct DRLSState{R,Tx,TH}
f_u::R
g_v::R
H::TH
merit::R = zero(gamma)
tau::R = zero(gamma)
u0::Tx = similar(x)
u1::Tx = similar(x)
Expand Down Expand Up @@ -116,6 +119,8 @@ function Base.iterate(iter::DRLSIteration)
g_v = g_v,
H = initialize(iter.directions, x),
)
# initialize merit
state.merit = DRE(state)
return state, state
end

Expand All @@ -141,8 +146,8 @@ update_direction_state!(iter::DRLSIteration, state::DRLSState) =
update_direction_state!(acceleration_style(typeof(iter.directions)), iter, state)

function Base.iterate(iter::DRLSIteration{R,Tx,Tf}, state::DRLSState) where {R,Tx,Tf}
DRE_curr = DRE(state)
threshold = iter.dre_sign * DRE_curr - iter.c / iter.gamma * norm(state.res)^2
# retrieve merit and set threshold
threshold = iter.dre_sign * state.merit - iter.c / iter.gamma * norm(state.res)^2

set_next_direction!(iter, state)

Expand All @@ -157,13 +162,14 @@ function Base.iterate(iter::DRLSIteration{R,Tx,Tf}, state::DRLSState) where {R,T
state.g_v = prox!(state.v, iter.g, state.w, iter.gamma)
state.res .= state.u .- state.v
state.xbar .= state.x .- iter.lambda * state.res
DRE_x = DRE(state)

update_direction_state!(iter, state)

a, b, c = R(0), R(0), R(0)

for k = 1:iter.max_backtracks
if iter.dre_sign * DRE(state) <= threshold
if iter.dre_sign * DRE_x <= threshold
break
end

Expand All @@ -189,7 +195,10 @@ function Base.iterate(iter::DRLSIteration{R,Tx,Tf}, state::DRLSState) where {R,T
state.g_v = prox!(state.v, iter.g, state.w, iter.gamma)
state.res .= state.u .- state.v
state.xbar .= state.x .- iter.lambda * state.res
DRE_x = DRE(state)
end
# update merit with averaging rule
state.merit = (1 - iter.monotonicity) * state.merit + iter.monotonicity * DRE_x

return state, state
end
Expand Down
28 changes: 15 additions & 13 deletions src/algorithms/panoc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ See also: [`PANOC`](@ref).
- `minimum_gamma=1e-7`: lower bound to `gamma` in case `adaptive == true`.
- `max_backtracks=20`: maximum number of line-search backtracks.
- `directions=LBFGS(5)`: strategy to use to compute line-search directions.
- `monotonicity=1`: parameter controlling the averaging scheme for nonmonotone linesearch; monotonicity ∈ (0,1], monotone scheme by default.

# References
1. Stella, Themelis, Sopasakis, Patrinos, "A simple and efficient algorithm for nonlinear model predictive control", 56th IEEE Conference on Decision and Control (2017).
Expand All @@ -49,6 +50,7 @@ Base.@kwdef struct PANOCIteration{R,Tx,Tf,TA,Tg,TLf,Tgamma,D}
minimum_gamma::R = real(eltype(x0))(1e-7)
max_backtracks::Int = 20
directions::D = LBFGS(5)
monotonicity::R = real(eltype(x0))(1)
end

Base.IteratorSize(::Type{<:PANOCIteration}) = Base.IsInfinite()
Expand All @@ -65,6 +67,7 @@ Base.@kwdef mutable struct PANOCState{R,Tx,TAx,TH}
g_z::R # value of nonsmooth term (at z)
res::Tx # fixed-point residual at iterate (= x - z)
H::TH # variable metric
merit::R = zero(gamma)
tau::R = zero(gamma)
x_prev::Tx = similar(x)
res_prev::Tx = similar(x)
Expand Down Expand Up @@ -108,6 +111,8 @@ function Base.iterate(iter::PANOCIteration{R}) where {R}
res = x - z,
H = initialize(iter.directions, x),
)
# initialize merit
state.merit = f_model(iter, state) + state.g_z
return state, state
end

Expand Down Expand Up @@ -138,9 +143,9 @@ reset_direction_state!(iter::PANOCIteration, state::PANOCState) =
function Base.iterate(iter::PANOCIteration{R,Tx,Tf}, state::PANOCState) where {R,Tx,Tf}
f_Az, a, b, c = R(Inf), R(Inf), R(Inf), R(Inf)

f_Az_upp = if iter.adaptive == true
if iter.adaptive == true
gamma_prev = state.gamma
state.gamma, state.g_z, f_Az, f_Az_upp = backtrack_stepsize!(
state.gamma, state.g_z, f_Az, _ = backtrack_stepsize!(
state.gamma,
iter.f,
iter.A,
Expand All @@ -160,13 +165,8 @@ function Base.iterate(iter::PANOCIteration{R,Tx,Tf}, state::PANOCState) where {R
if state.gamma != gamma_prev
reset_direction_state!(iter, state)
end
f_Az_upp
else
f_model(iter, state)
end

# compute FBE
FBE_x = f_Az_upp + state.g_z

# compute direction
set_next_direction!(iter, state)
Expand All @@ -192,17 +192,18 @@ function Base.iterate(iter::PANOCIteration{R,Tx,Tf}, state::PANOCState) where {R
copyto!(state.z_curr, state.z)
state.f_Ax = state.f_Ax_d

# retrieve merit and set threshold
sigma = iter.beta * (0.5 / state.gamma) * (1 - iter.alpha)
tol = 10 * eps(R) * (1 + abs(FBE_x))
threshold = FBE_x - sigma * norm(state.res)^2 + tol
tol = 10 * eps(R) * (1 + abs(state.merit))
threshold = state.merit - sigma * norm(state.res)^2 + tol

state.y .= state.x .- state.gamma .* state.At_grad_f_Ax
state.g_z = prox!(state.z, iter.g, state.y, state.gamma)
state.res .= state.x .- state.z
FBE_x_new = f_model(iter, state) + state.g_z
FBE_x = f_model(iter, state) + state.g_z

for k = 1:iter.max_backtracks
if FBE_x_new <= threshold
if FBE_x <= threshold
break
end

Expand Down Expand Up @@ -246,11 +247,12 @@ function Base.iterate(iter::PANOCIteration{R,Tx,Tf}, state::PANOCState) where {R
state.y .= state.x .- state.gamma .* state.At_grad_f_Ax
state.g_z = prox!(state.z, iter.g, state.y, state.gamma)
state.res .= state.x .- state.z
FBE_x_new = f_model(iter, state) + state.g_z
FBE_x = f_model(iter, state) + state.g_z
end

update_direction_state!(iter, state)

# update merit with averaging rule
state.merit = (1 - iter.monotonicity) * state.merit + iter.monotonicity * FBE_x
return state, state
end

Expand Down
38 changes: 22 additions & 16 deletions src/algorithms/panocplus.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ See also: [`PANOCplus`](@ref).
- `minimum_gamma=1e-7`: lower bound to `gamma` in case `adaptive == true`.
- `max_backtracks=20`: maximum number of line-search backtracks.
- `directions=LBFGS(5)`: strategy to use to compute line-search directions.
- `monotonicity=1`: parameter controlling the averaging scheme for nonmonotone linesearch; monotonicity ∈ (0,1], monotone scheme by default.

# References
1. De Marchi, Themelis, "Proximal Gradient Algorithms under Local Lipschitz Gradient Continuity", Journal of Optimization Theory and Applications, vol. 194, no. 3, pp. 771-794 (2022).
Expand All @@ -49,6 +50,7 @@ Base.@kwdef struct PANOCplusIteration{R,Tx,Tf,TA,Tg,TLf,Tgamma,D}
minimum_gamma::R = real(eltype(x0))(1e-7)
max_backtracks::Int = 20
directions::D = LBFGS(5)
monotonicity::R = real(eltype(x0))(1)
end

Base.IteratorSize(::Type{<:PANOCplusIteration}) = Base.IsInfinite()
Expand All @@ -65,6 +67,7 @@ Base.@kwdef mutable struct PANOCplusState{R,Tx,TAx,TH}
g_z::R # value of nonsmooth term (at z)
res::Tx # fixed-point residual at iterate (= x - z)
H::TH # variable metric
merit::R = zero(gamma)
tau::R = zero(gamma)
x_prev::Tx = similar(x)
res_prev::Tx = similar(x)
Expand Down Expand Up @@ -121,10 +124,11 @@ function Base.iterate(iter::PANOCplusIteration{R}) where {R}
)
else
mul!(state.Az, iter.A, state.z)
f_Az, grad_f_Az = value_and_gradient(iter.f, state.Az)
state.grad_f_Az = grad_f_Az
_, state.grad_f_Az = value_and_gradient(iter.f, state.Az)
end
mul!(state.At_grad_f_Az, adjoint(iter.A), state.grad_f_Az)
# initialize merit
state.merit = f_model(iter, state) + state.g_z
return state, state
end

Expand Down Expand Up @@ -170,12 +174,11 @@ function Base.iterate(iter::PANOCplusIteration{R}, state::PANOCplusState) where
state.x_prev .= state.x
state.res_prev .= state.res

# compute FBE
FBE_x = f_model(iter, state) + state.g_z

# retrieve merit and set threshold
sigma = iter.beta * (0.5 / state.gamma) * (1 - iter.alpha)
tol = 10 * eps(R) * (1 + abs(FBE_x))
threshold = FBE_x - sigma * norm(state.res)^2 + tol
tol = 10 * eps(R) * (1 + abs(state.merit))
threshold = state.merit - sigma * norm(state.res)^2 + tol
FBE_x = f_model(iter, state) + state.g_z

tau_backtracks = 0
can_update_direction = true
Expand Down Expand Up @@ -224,8 +227,8 @@ function Base.iterate(iter::PANOCplusIteration{R}, state::PANOCplusState) where
end
mul!(state.At_grad_f_Az, adjoint(iter.A), state.grad_f_Az)

FBE_x_new = f_Az_upp + state.g_z
if FBE_x_new <= threshold || tau_backtracks >= iter.max_backtracks
FBE_x = f_Az_upp + state.g_z
if FBE_x <= threshold || tau_backtracks >= iter.max_backtracks
break
end
state.tau = tau_backtracks >= iter.max_backtracks - 1 ? R(0) : state.tau / 2
Expand All @@ -235,6 +238,9 @@ function Base.iterate(iter::PANOCplusIteration{R}, state::PANOCplusState) where

update_direction_state!(iter, state)

# update merit with averaging rule
state.merit = (1 - iter.monotonicity) * state.merit + iter.monotonicity * FBE_x

return state, state

end
Expand Down Expand Up @@ -280,13 +286,13 @@ See also: [`PANOCplusIteration`](@ref), [`IterativeAlgorithm`](@ref).
1. De Marchi, Themelis, "Proximal Gradient Algorithms under Local Lipschitz Gradient Continuity", Journal of Optimization Theory and Applications, vol. 194, no. 3, pp. 771-794 (2022).
"""
PANOCplus(;
maxit = 1_000,
tol = 1e-8,
stop = (iter, state) -> default_stopping_criterion(tol, iter, state),
solution = default_solution,
verbose = false,
freq = 10,
display = default_display,
maxit=1_000,
tol=1e-8,
stop=(iter, state) -> default_stopping_criterion(tol, iter, state),
solution=default_solution,
verbose=false,
freq=10,
display=default_display,
kwargs...,
) = IterativeAlgorithm(
PANOCplusIteration;
Expand Down
28 changes: 15 additions & 13 deletions src/algorithms/zerofpr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ See also: [`ZeroFPR`](@ref).
- `minimum_gamma=1e-7`: lower bound to `gamma` in case `adaptive == true`.
- `max_backtracks=20`: maximum number of line-search backtracks.
- `directions=LBFGS(5)`: strategy to use to compute line-search directions.
- `monotonicity=1`: parameter controlling the averaging scheme for nonmonotone linesearch; monotonicity ∈ (0,1], monotone scheme by default.

# References
1. Themelis, Stella, Patrinos, "Forward-backward envelope for the sum of two nonconvex functions: Further properties and nonmonotone line-search algorithms", SIAM Journal on Optimization, vol. 28, no. 3, pp. 2274-2303 (2018).
Expand All @@ -50,6 +51,7 @@ Base.@kwdef struct ZeroFPRIteration{R,Tx,Tf,TA,Tg,TLf,Tgamma,D}
minimum_gamma::R = real(eltype(x0))(1e-7)
max_backtracks::Int = 20
directions::D = LBFGS(5)
monotonicity::R = real(eltype(x0))(1)
end

Base.IteratorSize(::Type{<:ZeroFPRIteration}) = Base.IsInfinite()
Expand All @@ -66,6 +68,7 @@ Base.@kwdef mutable struct ZeroFPRState{R,Tx,TAx,TH}
g_xbar::R # value of nonsmooth term (at xbar)
res::Tx # fixed-point residual at iterate (= x - xbar)
H::TH # variable metric
merit::R = zero(gamma)
tau::R = zero(gamma)
Axbar::TAx = similar(Ax)
grad_f_Axbar::TAx = similar(Ax)
Expand Down Expand Up @@ -106,6 +109,8 @@ function Base.iterate(iter::ZeroFPRIteration{R}) where {R}
res = x - xbar,
H = initialize(iter.directions, x),
)
# initialize merit
state.merit = f_model(iter, state) + state.g_xbar
return state, state
end

Expand Down Expand Up @@ -140,9 +145,9 @@ reset_direction_state!(iter::ZeroFPRIteration, state::ZeroFPRState) =
reset_direction_state!(acceleration_style(typeof(iter.directions)), iter, state)

function Base.iterate(iter::ZeroFPRIteration{R}, state::ZeroFPRState) where {R}
f_Axbar_upp, f_Axbar = if iter.adaptive == true
if iter.adaptive == true
gamma_prev = state.gamma
state.gamma, state.g_xbar, f_Axbar, f_Axbar_upp = backtrack_stepsize!(
state.gamma, state.g_xbar, _, _ = backtrack_stepsize!(
state.gamma,
iter.f,
iter.A,
Expand All @@ -162,21 +167,15 @@ function Base.iterate(iter::ZeroFPRIteration{R}, state::ZeroFPRState) where {R}
if state.gamma != gamma_prev
reset_direction_state!(iter, state)
end
f_Axbar_upp, f_Axbar
else
mul!(state.Axbar, iter.A, state.xbar)
f_Axbar, grad_f_Axbar = value_and_gradient(iter.f, state.Axbar)
state.grad_f_Axbar .= grad_f_Axbar
f_model(iter, state), f_Axbar
_, state.grad_f_Axbar = value_and_gradient(iter.f, state.Axbar)
end

# compute FBE
FBE_x = f_Axbar_upp + state.g_xbar

# compute residual at xbar
mul!(state.At_grad_f_Axbar, iter.A', state.grad_f_Axbar)
state.y .= state.xbar .- state.gamma .* state.At_grad_f_Axbar
g_xbarbar = prox!(state.xbarbar, iter.g, state.y, state.gamma)
prox!(state.xbarbar, iter.g, state.y, state.gamma)
state.res_xbar .= state.xbar .- state.xbarbar

if state.is_prev_set == true
Expand All @@ -193,9 +192,11 @@ function Base.iterate(iter::ZeroFPRIteration{R}, state::ZeroFPRState) where {R}
state.tau = R(1)
mul!(state.Ad, iter.A, state.d)

# retrieve merit and set threshold
sigma = iter.beta * (0.5 / state.gamma) * (1 - iter.alpha)
tol = 10 * eps(R) * (1 + abs(FBE_x))
threshold = FBE_x - sigma * norm(state.res)^2 + tol
tol = 10 * eps(R) * (1 + abs(state.merit))
threshold = state.merit - sigma * norm(state.res)^2 + tol
FBE_x = f_model(iter, state) + state.g_xbar

for k = 1:iter.max_backtracks
state.x .= state.xbar_prev .+ state.tau .* state.d
Expand All @@ -215,7 +216,8 @@ function Base.iterate(iter::ZeroFPRIteration{R}, state::ZeroFPRState) where {R}

state.tau = k >= iter.max_backtracks - 1 ? R(0) : state.tau / 2
end

# update merit with averaging rule
state.merit = (1 - iter.monotonicity) * state.merit + iter.monotonicity * FBE_x
return state, state
end

Expand Down
Loading
Loading