JuliaFirstOrder · aldma · May 14, 2025 · May 15, 2025 · May 16, 2025 · May 16, 2025
diff --git a/src/algorithms/drls.jl b/src/algorithms/drls.jl
@@ -43,6 +43,7 @@ See also: [`DRLS`](@ref).
 - `gamma`: stepsize to use, chosen appropriately based on Lf and mf by defaults.
 - `max_backtracks=20`: maximum number of line-search backtracks.
 - `directions=LBFGS(5)`: strategy to use to compute line-search directions.
+- `monotonicity=1`: parameter controlling the averaging scheme for nonmonotone linesearch; monotonicity ∈ (0,1], monotone scheme by default.
 
 # References
 1. Themelis, Stella, Patrinos, "Douglas-Rachford splitting and ADMM for nonconvex optimization: Accelerated and Newton-type linesearch algorithms", Computational Optimization and Applications, vol. 82, no. 2, pp. 395-440 (2022).
@@ -61,6 +62,7 @@ Base.@kwdef struct DRLSIteration{R,Tx,Tf,Tg,Tmf,TLf,D}
     dre_sign::Int = mf === nothing || mf <= 0 ? 1 : -1
     max_backtracks::Int = 20
     directions::D = LBFGS(5)
+    monotonicity::R = real(eltype(x0))(1)
 end
 
 Base.IteratorSize(::Type{<:DRLSIteration}) = Base.IsInfinite()
@@ -80,6 +82,7 @@ Base.@kwdef mutable struct DRLSState{R,Tx,TH}
     f_u::R
     g_v::R
     H::TH
+    merit::R = zero(gamma)
     tau::R = zero(gamma)
     u0::Tx = similar(x)
     u1::Tx = similar(x)
@@ -116,6 +119,8 @@ function Base.iterate(iter::DRLSIteration)
         g_v = g_v,
         H = initialize(iter.directions, x),
     )
+    # initialize merit
+    state.merit = DRE(state)
     return state, state
 end
 
@@ -141,8 +146,8 @@ update_direction_state!(iter::DRLSIteration, state::DRLSState) =
     update_direction_state!(acceleration_style(typeof(iter.directions)), iter, state)
 
 function Base.iterate(iter::DRLSIteration{R,Tx,Tf}, state::DRLSState) where {R,Tx,Tf}
-    DRE_curr = DRE(state)
-    threshold = iter.dre_sign * DRE_curr - iter.c / iter.gamma * norm(state.res)^2
+    # retrieve merit and set threshold
+    threshold = iter.dre_sign * state.merit - iter.c / iter.gamma * norm(state.res)^2
 
     set_next_direction!(iter, state)
 
@@ -157,13 +162,14 @@ function Base.iterate(iter::DRLSIteration{R,Tx,Tf}, state::DRLSState) where {R,T
     state.g_v = prox!(state.v, iter.g, state.w, iter.gamma)
     state.res .= state.u .- state.v
     state.xbar .= state.x .- iter.lambda * state.res
+    DRE_x = DRE(state)
 
     update_direction_state!(iter, state)
 
     a, b, c = R(0), R(0), R(0)
 
     for k = 1:iter.max_backtracks
-        if iter.dre_sign * DRE(state) <= threshold
+        if iter.dre_sign * DRE_x <= threshold
             break
         end
 
@@ -189,7 +195,10 @@ function Base.iterate(iter::DRLSIteration{R,Tx,Tf}, state::DRLSState) where {R,T
         state.g_v = prox!(state.v, iter.g, state.w, iter.gamma)
         state.res .= state.u .- state.v
         state.xbar .= state.x .- iter.lambda * state.res
+        DRE_x = DRE(state)
     end
+    # update merit with averaging rule
+    state.merit = (1 - iter.monotonicity) * state.merit + iter.monotonicity * DRE_x
 
     return state, state
 end

diff --git a/src/algorithms/panoc.jl b/src/algorithms/panoc.jl
@@ -32,6 +32,7 @@ See also: [`PANOC`](@ref).
 - `minimum_gamma=1e-7`: lower bound to `gamma` in case `adaptive == true`.
 - `max_backtracks=20`: maximum number of line-search backtracks.
 - `directions=LBFGS(5)`: strategy to use to compute line-search directions.
+- `monotonicity=1`: parameter controlling the averaging scheme for nonmonotone linesearch; monotonicity ∈ (0,1], monotone scheme by default.
 
 # References
 1. Stella, Themelis, Sopasakis, Patrinos, "A simple and efficient algorithm for nonlinear model predictive control", 56th IEEE Conference on Decision and Control (2017).
@@ -49,6 +50,7 @@ Base.@kwdef struct PANOCIteration{R,Tx,Tf,TA,Tg,TLf,Tgamma,D}
     minimum_gamma::R = real(eltype(x0))(1e-7)
     max_backtracks::Int = 20
     directions::D = LBFGS(5)
+    monotonicity::R = real(eltype(x0))(1)
 end
 
 Base.IteratorSize(::Type{<:PANOCIteration}) = Base.IsInfinite()
@@ -65,6 +67,7 @@ Base.@kwdef mutable struct PANOCState{R,Tx,TAx,TH}
     g_z::R            # value of nonsmooth term (at z)
     res::Tx           # fixed-point residual at iterate (= x - z)
     H::TH             # variable metric
+    merit::R = zero(gamma)
     tau::R = zero(gamma)
     x_prev::Tx = similar(x)
     res_prev::Tx = similar(x)
@@ -108,6 +111,8 @@ function Base.iterate(iter::PANOCIteration{R}) where {R}
         res = x - z,
         H = initialize(iter.directions, x),
     )
+    # initialize merit
+    state.merit = f_model(iter, state) + state.g_z
     return state, state
 end
 
@@ -138,9 +143,9 @@ reset_direction_state!(iter::PANOCIteration, state::PANOCState) =
 function Base.iterate(iter::PANOCIteration{R,Tx,Tf}, state::PANOCState) where {R,Tx,Tf}
     f_Az, a, b, c = R(Inf), R(Inf), R(Inf), R(Inf)
 
-    f_Az_upp = if iter.adaptive == true
+    if iter.adaptive == true
         gamma_prev = state.gamma
-        state.gamma, state.g_z, f_Az, f_Az_upp = backtrack_stepsize!(
+        state.gamma, state.g_z, f_Az, _ = backtrack_stepsize!(
             state.gamma,
             iter.f,
             iter.A,
@@ -160,13 +165,8 @@ function Base.iterate(iter::PANOCIteration{R,Tx,Tf}, state::PANOCState) where {R
         if state.gamma != gamma_prev
             reset_direction_state!(iter, state)
         end
-        f_Az_upp
-    else
-        f_model(iter, state)
     end
 
-    # compute FBE
-    FBE_x = f_Az_upp + state.g_z
 
     # compute direction
     set_next_direction!(iter, state)
@@ -192,17 +192,18 @@ function Base.iterate(iter::PANOCIteration{R,Tx,Tf}, state::PANOCState) where {R
     copyto!(state.z_curr, state.z)
     state.f_Ax = state.f_Ax_d
 
+    # retrieve merit and set threshold
     sigma = iter.beta * (0.5 / state.gamma) * (1 - iter.alpha)
-    tol = 10 * eps(R) * (1 + abs(FBE_x))
-    threshold = FBE_x - sigma * norm(state.res)^2 + tol
+    tol = 10 * eps(R) * (1 + abs(state.merit))
+    threshold = state.merit - sigma * norm(state.res)^2 + tol
 
     state.y .= state.x .- state.gamma .* state.At_grad_f_Ax
     state.g_z = prox!(state.z, iter.g, state.y, state.gamma)
     state.res .= state.x .- state.z
-    FBE_x_new = f_model(iter, state) + state.g_z
+    FBE_x = f_model(iter, state) + state.g_z
 
     for k = 1:iter.max_backtracks
-        if FBE_x_new <= threshold
+        if FBE_x <= threshold
             break
         end
 
@@ -246,11 +247,12 @@ function Base.iterate(iter::PANOCIteration{R,Tx,Tf}, state::PANOCState) where {R
         state.y .= state.x .- state.gamma .* state.At_grad_f_Ax
         state.g_z = prox!(state.z, iter.g, state.y, state.gamma)
         state.res .= state.x .- state.z
-        FBE_x_new = f_model(iter, state) + state.g_z
+        FBE_x = f_model(iter, state) + state.g_z
     end
 
     update_direction_state!(iter, state)
-
+    # update merit with averaging rule
+    state.merit = (1 - iter.monotonicity) * state.merit + iter.monotonicity * FBE_x
     return state, state
 end
 

diff --git a/src/algorithms/panocplus.jl b/src/algorithms/panocplus.jl
@@ -32,6 +32,7 @@ See also: [`PANOCplus`](@ref).
 - `minimum_gamma=1e-7`: lower bound to `gamma` in case `adaptive == true`.
 - `max_backtracks=20`: maximum number of line-search backtracks.
 - `directions=LBFGS(5)`: strategy to use to compute line-search directions.
+- `monotonicity=1`: parameter controlling the averaging scheme for nonmonotone linesearch; monotonicity ∈ (0,1], monotone scheme by default.
 
 # References
 1. De Marchi, Themelis, "Proximal Gradient Algorithms under Local Lipschitz Gradient Continuity", Journal of Optimization Theory and Applications, vol. 194, no. 3, pp. 771-794 (2022).
@@ -49,6 +50,7 @@ Base.@kwdef struct PANOCplusIteration{R,Tx,Tf,TA,Tg,TLf,Tgamma,D}
     minimum_gamma::R = real(eltype(x0))(1e-7)
     max_backtracks::Int = 20
     directions::D = LBFGS(5)
+    monotonicity::R = real(eltype(x0))(1)
 end
 
 Base.IteratorSize(::Type{<:PANOCplusIteration}) = Base.IsInfinite()
@@ -65,6 +67,7 @@ Base.@kwdef mutable struct PANOCplusState{R,Tx,TAx,TH}
     g_z::R            # value of nonsmooth term (at z)
     res::Tx           # fixed-point residual at iterate (= x - z)
     H::TH             # variable metric
+    merit::R = zero(gamma)
     tau::R = zero(gamma)
     x_prev::Tx = similar(x)
     res_prev::Tx = similar(x)
@@ -121,10 +124,11 @@ function Base.iterate(iter::PANOCplusIteration{R}) where {R}
         )
     else
         mul!(state.Az, iter.A, state.z)
-        f_Az, grad_f_Az = value_and_gradient(iter.f, state.Az)
-        state.grad_f_Az = grad_f_Az
+        _, state.grad_f_Az = value_and_gradient(iter.f, state.Az)
     end
     mul!(state.At_grad_f_Az, adjoint(iter.A), state.grad_f_Az)
+    # initialize merit
+    state.merit = f_model(iter, state) + state.g_z
     return state, state
 end
 
@@ -170,12 +174,11 @@ function Base.iterate(iter::PANOCplusIteration{R}, state::PANOCplusState) where
     state.x_prev .= state.x
     state.res_prev .= state.res
 
-    # compute FBE
-    FBE_x = f_model(iter, state) + state.g_z
-
+    # retrieve merit and set threshold
     sigma = iter.beta * (0.5 / state.gamma) * (1 - iter.alpha)
-    tol = 10 * eps(R) * (1 + abs(FBE_x))
-    threshold = FBE_x - sigma * norm(state.res)^2 + tol
+    tol = 10 * eps(R) * (1 + abs(state.merit))
+    threshold = state.merit - sigma * norm(state.res)^2 + tol
+    FBE_x = f_model(iter, state) + state.g_z
 
     tau_backtracks = 0
     can_update_direction = true
@@ -224,8 +227,8 @@ function Base.iterate(iter::PANOCplusIteration{R}, state::PANOCplusState) where
         end
         mul!(state.At_grad_f_Az, adjoint(iter.A), state.grad_f_Az)
 
-        FBE_x_new = f_Az_upp + state.g_z
-        if FBE_x_new <= threshold || tau_backtracks >= iter.max_backtracks
+        FBE_x = f_Az_upp + state.g_z
+        if FBE_x <= threshold || tau_backtracks >= iter.max_backtracks
             break
         end
         state.tau = tau_backtracks >= iter.max_backtracks - 1 ? R(0) : state.tau / 2
@@ -235,6 +238,9 @@ function Base.iterate(iter::PANOCplusIteration{R}, state::PANOCplusState) where
 
     update_direction_state!(iter, state)
 
+    # update merit with averaging rule
+    state.merit = (1 - iter.monotonicity) * state.merit + iter.monotonicity * FBE_x
+
     return state, state
 
 end
@@ -280,13 +286,13 @@ See also: [`PANOCplusIteration`](@ref), [`IterativeAlgorithm`](@ref).
 1. De Marchi, Themelis, "Proximal Gradient Algorithms under Local Lipschitz Gradient Continuity", Journal of Optimization Theory and Applications, vol. 194, no. 3, pp. 771-794 (2022).
 """
 PANOCplus(;
-    maxit = 1_000,
-    tol = 1e-8,
-    stop = (iter, state) -> default_stopping_criterion(tol, iter, state),
-    solution = default_solution,
-    verbose = false,
-    freq = 10,
-    display = default_display,
+    maxit=1_000,
+    tol=1e-8,
+    stop=(iter, state) -> default_stopping_criterion(tol, iter, state),
+    solution=default_solution,
+    verbose=false,
+    freq=10,
+    display=default_display,
     kwargs...,
 ) = IterativeAlgorithm(
     PANOCplusIteration;

diff --git a/src/algorithms/zerofpr.jl b/src/algorithms/zerofpr.jl
@@ -33,6 +33,7 @@ See also: [`ZeroFPR`](@ref).
 - `minimum_gamma=1e-7`: lower bound to `gamma` in case `adaptive == true`.
 - `max_backtracks=20`: maximum number of line-search backtracks.
 - `directions=LBFGS(5)`: strategy to use to compute line-search directions.
+- `monotonicity=1`: parameter controlling the averaging scheme for nonmonotone linesearch; monotonicity ∈ (0,1], monotone scheme by default.
 
 # References
 1. Themelis, Stella, Patrinos, "Forward-backward envelope for the sum of two nonconvex functions: Further properties and nonmonotone line-search algorithms", SIAM Journal on Optimization, vol. 28, no. 3, pp. 2274-2303 (2018).
@@ -50,6 +51,7 @@ Base.@kwdef struct ZeroFPRIteration{R,Tx,Tf,TA,Tg,TLf,Tgamma,D}
     minimum_gamma::R = real(eltype(x0))(1e-7)
     max_backtracks::Int = 20
     directions::D = LBFGS(5)
+    monotonicity::R = real(eltype(x0))(1)
 end
 
 Base.IteratorSize(::Type{<:ZeroFPRIteration}) = Base.IsInfinite()
@@ -66,6 +68,7 @@ Base.@kwdef mutable struct ZeroFPRState{R,Tx,TAx,TH}
     g_xbar::R         # value of nonsmooth term (at xbar)
     res::Tx           # fixed-point residual at iterate (= x - xbar)
     H::TH             # variable metric
+    merit::R = zero(gamma)
     tau::R = zero(gamma)
     Axbar::TAx = similar(Ax)
     grad_f_Axbar::TAx = similar(Ax)
@@ -106,6 +109,8 @@ function Base.iterate(iter::ZeroFPRIteration{R}) where {R}
         res = x - xbar,
         H = initialize(iter.directions, x),
     )
+    # initialize merit
+    state.merit = f_model(iter, state) + state.g_xbar
     return state, state
 end
 
@@ -140,9 +145,9 @@ reset_direction_state!(iter::ZeroFPRIteration, state::ZeroFPRState) =
     reset_direction_state!(acceleration_style(typeof(iter.directions)), iter, state)
 
 function Base.iterate(iter::ZeroFPRIteration{R}, state::ZeroFPRState) where {R}
-    f_Axbar_upp, f_Axbar = if iter.adaptive == true
+    if iter.adaptive == true
         gamma_prev = state.gamma
-        state.gamma, state.g_xbar, f_Axbar, f_Axbar_upp = backtrack_stepsize!(
+        state.gamma, state.g_xbar, _, _ = backtrack_stepsize!(
             state.gamma,
             iter.f,
             iter.A,
@@ -162,21 +167,15 @@ function Base.iterate(iter::ZeroFPRIteration{R}, state::ZeroFPRState) where {R}
         if state.gamma != gamma_prev
             reset_direction_state!(iter, state)
         end
-        f_Axbar_upp, f_Axbar
     else
         mul!(state.Axbar, iter.A, state.xbar)
-        f_Axbar, grad_f_Axbar = value_and_gradient(iter.f, state.Axbar)
-        state.grad_f_Axbar .= grad_f_Axbar
-        f_model(iter, state), f_Axbar
+        _, state.grad_f_Axbar = value_and_gradient(iter.f, state.Axbar)
     end
 
-    # compute FBE
-    FBE_x = f_Axbar_upp + state.g_xbar
-
     # compute residual at xbar
     mul!(state.At_grad_f_Axbar, iter.A', state.grad_f_Axbar)
     state.y .= state.xbar .- state.gamma .* state.At_grad_f_Axbar
-    g_xbarbar = prox!(state.xbarbar, iter.g, state.y, state.gamma)
+    prox!(state.xbarbar, iter.g, state.y, state.gamma)
     state.res_xbar .= state.xbar .- state.xbarbar
 
     if state.is_prev_set == true
@@ -193,9 +192,11 @@ function Base.iterate(iter::ZeroFPRIteration{R}, state::ZeroFPRState) where {R}
     state.tau = R(1)
     mul!(state.Ad, iter.A, state.d)
 
+    # retrieve merit and set threshold
     sigma = iter.beta * (0.5 / state.gamma) * (1 - iter.alpha)
-    tol = 10 * eps(R) * (1 + abs(FBE_x))
-    threshold = FBE_x - sigma * norm(state.res)^2 + tol
+    tol = 10 * eps(R) * (1 + abs(state.merit))
+    threshold = state.merit - sigma * norm(state.res)^2 + tol
+    FBE_x = f_model(iter, state) + state.g_xbar
 
     for k = 1:iter.max_backtracks
         state.x .= state.xbar_prev .+ state.tau .* state.d
@@ -215,7 +216,8 @@ function Base.iterate(iter::ZeroFPRIteration{R}, state::ZeroFPRState) where {R}
 
         state.tau = k >= iter.max_backtracks - 1 ? R(0) : state.tau / 2
     end
-
+    # update merit with averaging rule
+    state.merit = (1 - iter.monotonicity) * state.merit + iter.monotonicity * FBE_x
     return state, state
 end