diff --git a/src/algorithms/drls.jl b/src/algorithms/drls.jl
index f7aed73..2d430fd 100644
--- a/src/algorithms/drls.jl
+++ b/src/algorithms/drls.jl
@@ -43,6 +43,7 @@ See also: [`DRLS`](@ref).
 - `gamma`: stepsize to use, chosen appropriately based on Lf and mf by defaults.
 - `max_backtracks=20`: maximum number of line-search backtracks.
 - `directions=LBFGS(5)`: strategy to use to compute line-search directions.
+- `monotonicity=1`: parameter controlling the averaging scheme for nonmonotone linesearch; monotonicity ∈ (0,1], monotone scheme by default.
 
 # References
 1. Themelis, Stella, Patrinos, "Douglas-Rachford splitting and ADMM for nonconvex optimization: Accelerated and Newton-type linesearch algorithms", Computational Optimization and Applications, vol. 82, no. 2, pp. 395-440 (2022).
@@ -61,6 +62,7 @@ Base.@kwdef struct DRLSIteration{R,Tx,Tf,Tg,Tmf,TLf,D}
     dre_sign::Int = mf === nothing || mf <= 0 ? 1 : -1
     max_backtracks::Int = 20
     directions::D = LBFGS(5)
+    monotonicity::R = real(eltype(x0))(1)
 end
 
 Base.IteratorSize(::Type{<:DRLSIteration}) = Base.IsInfinite()
@@ -80,6 +82,7 @@ Base.@kwdef mutable struct DRLSState{R,Tx,TH}
     f_u::R
     g_v::R
     H::TH
+    merit::R = zero(gamma)
     tau::R = zero(gamma)
     u0::Tx = similar(x)
     u1::Tx = similar(x)
@@ -116,6 +119,8 @@ function Base.iterate(iter::DRLSIteration)
         g_v = g_v,
         H = initialize(iter.directions, x),
     )
+    # initialize merit
+    state.merit = DRE(state)
     return state, state
 end
 
@@ -141,8 +146,8 @@ update_direction_state!(iter::DRLSIteration, state::DRLSState) =
     update_direction_state!(acceleration_style(typeof(iter.directions)), iter, state)
 
 function Base.iterate(iter::DRLSIteration{R,Tx,Tf}, state::DRLSState) where {R,Tx,Tf}
-    DRE_curr = DRE(state)
-    threshold = iter.dre_sign * DRE_curr - iter.c / iter.gamma * norm(state.res)^2
+    # retrieve merit and set threshold
+    threshold = iter.dre_sign * state.merit - iter.c / iter.gamma * norm(state.res)^2
 
     set_next_direction!(iter, state)
 
@@ -157,13 +162,14 @@ function Base.iterate(iter::DRLSIteration{R,Tx,Tf}, state::DRLSState) where {R,T
     state.g_v = prox!(state.v, iter.g, state.w, iter.gamma)
     state.res .= state.u .- state.v
     state.xbar .= state.x .- iter.lambda * state.res
+    DRE_x = DRE(state)
 
     update_direction_state!(iter, state)
 
     a, b, c = R(0), R(0), R(0)
 
     for k = 1:iter.max_backtracks
-        if iter.dre_sign * DRE(state) <= threshold
+        if iter.dre_sign * DRE_x <= threshold
             break
         end
 
@@ -189,7 +195,10 @@ function Base.iterate(iter::DRLSIteration{R,Tx,Tf}, state::DRLSState) where {R,T
         state.g_v = prox!(state.v, iter.g, state.w, iter.gamma)
         state.res .= state.u .- state.v
         state.xbar .= state.x .- iter.lambda * state.res
+        DRE_x = DRE(state)
     end
+    # update merit with averaging rule
+    state.merit = (1 - iter.monotonicity) * state.merit + iter.monotonicity * DRE_x
 
     return state, state
 end
diff --git a/src/algorithms/panoc.jl b/src/algorithms/panoc.jl
index c7f2558..7294689 100644
--- a/src/algorithms/panoc.jl
+++ b/src/algorithms/panoc.jl
@@ -32,6 +32,7 @@ See also: [`PANOC`](@ref).
 - `minimum_gamma=1e-7`: lower bound to `gamma` in case `adaptive == true`.
 - `max_backtracks=20`: maximum number of line-search backtracks.
 - `directions=LBFGS(5)`: strategy to use to compute line-search directions.
+- `monotonicity=1`: parameter controlling the averaging scheme for nonmonotone linesearch; monotonicity ∈ (0,1], monotone scheme by default.
 
 # References
 1. Stella, Themelis, Sopasakis, Patrinos, "A simple and efficient algorithm for nonlinear model predictive control", 56th IEEE Conference on Decision and Control (2017).
@@ -49,6 +50,7 @@ Base.@kwdef struct PANOCIteration{R,Tx,Tf,TA,Tg,TLf,Tgamma,D}
     minimum_gamma::R = real(eltype(x0))(1e-7)
     max_backtracks::Int = 20
     directions::D = LBFGS(5)
+    monotonicity::R = real(eltype(x0))(1)
 end
 
 Base.IteratorSize(::Type{<:PANOCIteration}) = Base.IsInfinite()
@@ -65,6 +67,7 @@ Base.@kwdef mutable struct PANOCState{R,Tx,TAx,TH}
     g_z::R            # value of nonsmooth term (at z)
     res::Tx           # fixed-point residual at iterate (= x - z)
     H::TH             # variable metric
+    merit::R = zero(gamma)
     tau::R = zero(gamma)
     x_prev::Tx = similar(x)
     res_prev::Tx = similar(x)
@@ -108,6 +111,8 @@ function Base.iterate(iter::PANOCIteration{R}) where {R}
         res = x - z,
         H = initialize(iter.directions, x),
     )
+    # initialize merit
+    state.merit = f_model(iter, state) + state.g_z
     return state, state
 end
 
@@ -138,9 +143,9 @@ reset_direction_state!(iter::PANOCIteration, state::PANOCState) =
 function Base.iterate(iter::PANOCIteration{R,Tx,Tf}, state::PANOCState) where {R,Tx,Tf}
     f_Az, a, b, c = R(Inf), R(Inf), R(Inf), R(Inf)
 
-    f_Az_upp = if iter.adaptive == true
+    if iter.adaptive == true
         gamma_prev = state.gamma
-        state.gamma, state.g_z, f_Az, f_Az_upp = backtrack_stepsize!(
+        state.gamma, state.g_z, f_Az, _ = backtrack_stepsize!(
             state.gamma,
             iter.f,
             iter.A,
@@ -160,13 +165,8 @@ function Base.iterate(iter::PANOCIteration{R,Tx,Tf}, state::PANOCState) where {R
         if state.gamma != gamma_prev
             reset_direction_state!(iter, state)
         end
-        f_Az_upp
-    else
-        f_model(iter, state)
     end
 
-    # compute FBE
-    FBE_x = f_Az_upp + state.g_z
 
     # compute direction
     set_next_direction!(iter, state)
@@ -192,17 +192,18 @@ function Base.iterate(iter::PANOCIteration{R,Tx,Tf}, state::PANOCState) where {R
     copyto!(state.z_curr, state.z)
     state.f_Ax = state.f_Ax_d
 
+    # retrieve merit and set threshold
     sigma = iter.beta * (0.5 / state.gamma) * (1 - iter.alpha)
-    tol = 10 * eps(R) * (1 + abs(FBE_x))
-    threshold = FBE_x - sigma * norm(state.res)^2 + tol
+    tol = 10 * eps(R) * (1 + abs(state.merit))
+    threshold = state.merit - sigma * norm(state.res)^2 + tol
 
     state.y .= state.x .- state.gamma .* state.At_grad_f_Ax
     state.g_z = prox!(state.z, iter.g, state.y, state.gamma)
     state.res .= state.x .- state.z
-    FBE_x_new = f_model(iter, state) + state.g_z
+    FBE_x = f_model(iter, state) + state.g_z
 
     for k = 1:iter.max_backtracks
-        if FBE_x_new <= threshold
+        if FBE_x <= threshold
             break
         end
 
@@ -246,11 +247,12 @@ function Base.iterate(iter::PANOCIteration{R,Tx,Tf}, state::PANOCState) where {R
         state.y .= state.x .- state.gamma .* state.At_grad_f_Ax
         state.g_z = prox!(state.z, iter.g, state.y, state.gamma)
         state.res .= state.x .- state.z
-        FBE_x_new = f_model(iter, state) + state.g_z
+        FBE_x = f_model(iter, state) + state.g_z
     end
 
     update_direction_state!(iter, state)
-
+    # update merit with averaging rule
+    state.merit = (1 - iter.monotonicity) * state.merit + iter.monotonicity * FBE_x
     return state, state
 end
 
diff --git a/src/algorithms/panocplus.jl b/src/algorithms/panocplus.jl
index d407039..4e68fb0 100644
--- a/src/algorithms/panocplus.jl
+++ b/src/algorithms/panocplus.jl
@@ -32,6 +32,7 @@ See also: [`PANOCplus`](@ref).
 - `minimum_gamma=1e-7`: lower bound to `gamma` in case `adaptive == true`.
 - `max_backtracks=20`: maximum number of line-search backtracks.
 - `directions=LBFGS(5)`: strategy to use to compute line-search directions.
+- `monotonicity=1`: parameter controlling the averaging scheme for nonmonotone linesearch; monotonicity ∈ (0,1], monotone scheme by default.
 
 # References
 1. De Marchi, Themelis, "Proximal Gradient Algorithms under Local Lipschitz Gradient Continuity", Journal of Optimization Theory and Applications, vol. 194, no. 3, pp. 771-794 (2022).
@@ -49,6 +50,7 @@ Base.@kwdef struct PANOCplusIteration{R,Tx,Tf,TA,Tg,TLf,Tgamma,D}
     minimum_gamma::R = real(eltype(x0))(1e-7)
     max_backtracks::Int = 20
     directions::D = LBFGS(5)
+    monotonicity::R = real(eltype(x0))(1)
 end
 
 Base.IteratorSize(::Type{<:PANOCplusIteration}) = Base.IsInfinite()
@@ -65,6 +67,7 @@ Base.@kwdef mutable struct PANOCplusState{R,Tx,TAx,TH}
     g_z::R            # value of nonsmooth term (at z)
     res::Tx           # fixed-point residual at iterate (= x - z)
     H::TH             # variable metric
+    merit::R = zero(gamma)
     tau::R = zero(gamma)
     x_prev::Tx = similar(x)
     res_prev::Tx = similar(x)
@@ -121,10 +124,11 @@ function Base.iterate(iter::PANOCplusIteration{R}) where {R}
         )
     else
         mul!(state.Az, iter.A, state.z)
-        f_Az, grad_f_Az = value_and_gradient(iter.f, state.Az)
-        state.grad_f_Az = grad_f_Az
+        _, state.grad_f_Az = value_and_gradient(iter.f, state.Az)
     end
     mul!(state.At_grad_f_Az, adjoint(iter.A), state.grad_f_Az)
+    # initialize merit
+    state.merit = f_model(iter, state) + state.g_z
     return state, state
 end
 
@@ -170,12 +174,11 @@ function Base.iterate(iter::PANOCplusIteration{R}, state::PANOCplusState) where
     state.x_prev .= state.x
     state.res_prev .= state.res
 
-    # compute FBE
-    FBE_x = f_model(iter, state) + state.g_z
-
+    # retrieve merit and set threshold
     sigma = iter.beta * (0.5 / state.gamma) * (1 - iter.alpha)
-    tol = 10 * eps(R) * (1 + abs(FBE_x))
-    threshold = FBE_x - sigma * norm(state.res)^2 + tol
+    tol = 10 * eps(R) * (1 + abs(state.merit))
+    threshold = state.merit - sigma * norm(state.res)^2 + tol
+    FBE_x = f_model(iter, state) + state.g_z
 
     tau_backtracks = 0
     can_update_direction = true
@@ -224,8 +227,8 @@ function Base.iterate(iter::PANOCplusIteration{R}, state::PANOCplusState) where
         end
         mul!(state.At_grad_f_Az, adjoint(iter.A), state.grad_f_Az)
 
-        FBE_x_new = f_Az_upp + state.g_z
-        if FBE_x_new <= threshold || tau_backtracks >= iter.max_backtracks
+        FBE_x = f_Az_upp + state.g_z
+        if FBE_x <= threshold || tau_backtracks >= iter.max_backtracks
             break
         end
         state.tau = tau_backtracks >= iter.max_backtracks - 1 ? R(0) : state.tau / 2
@@ -235,6 +238,9 @@ function Base.iterate(iter::PANOCplusIteration{R}, state::PANOCplusState) where
 
     update_direction_state!(iter, state)
 
+    # update merit with averaging rule
+    state.merit = (1 - iter.monotonicity) * state.merit + iter.monotonicity * FBE_x
+
     return state, state
 
 end
@@ -280,13 +286,13 @@ See also: [`PANOCplusIteration`](@ref), [`IterativeAlgorithm`](@ref).
 1. De Marchi, Themelis, "Proximal Gradient Algorithms under Local Lipschitz Gradient Continuity", Journal of Optimization Theory and Applications, vol. 194, no. 3, pp. 771-794 (2022).
 """
 PANOCplus(;
-    maxit = 1_000,
-    tol = 1e-8,
-    stop = (iter, state) -> default_stopping_criterion(tol, iter, state),
-    solution = default_solution,
-    verbose = false,
-    freq = 10,
-    display = default_display,
+    maxit=1_000,
+    tol=1e-8,
+    stop=(iter, state) -> default_stopping_criterion(tol, iter, state),
+    solution=default_solution,
+    verbose=false,
+    freq=10,
+    display=default_display,
     kwargs...,
 ) = IterativeAlgorithm(
     PANOCplusIteration;
diff --git a/src/algorithms/zerofpr.jl b/src/algorithms/zerofpr.jl
index 8830969..96835a2 100644
--- a/src/algorithms/zerofpr.jl
+++ b/src/algorithms/zerofpr.jl
@@ -33,6 +33,7 @@ See also: [`ZeroFPR`](@ref).
 - `minimum_gamma=1e-7`: lower bound to `gamma` in case `adaptive == true`.
 - `max_backtracks=20`: maximum number of line-search backtracks.
 - `directions=LBFGS(5)`: strategy to use to compute line-search directions.
+- `monotonicity=1`: parameter controlling the averaging scheme for nonmonotone linesearch; monotonicity ∈ (0,1], monotone scheme by default.
 
 # References
 1. Themelis, Stella, Patrinos, "Forward-backward envelope for the sum of two nonconvex functions: Further properties and nonmonotone line-search algorithms", SIAM Journal on Optimization, vol. 28, no. 3, pp. 2274-2303 (2018).
@@ -50,6 +51,7 @@ Base.@kwdef struct ZeroFPRIteration{R,Tx,Tf,TA,Tg,TLf,Tgamma,D}
     minimum_gamma::R = real(eltype(x0))(1e-7)
     max_backtracks::Int = 20
     directions::D = LBFGS(5)
+    monotonicity::R = real(eltype(x0))(1)
 end
 
 Base.IteratorSize(::Type{<:ZeroFPRIteration}) = Base.IsInfinite()
@@ -66,6 +68,7 @@ Base.@kwdef mutable struct ZeroFPRState{R,Tx,TAx,TH}
     g_xbar::R         # value of nonsmooth term (at xbar)
     res::Tx           # fixed-point residual at iterate (= x - xbar)
     H::TH             # variable metric
+    merit::R = zero(gamma)
     tau::R = zero(gamma)
     Axbar::TAx = similar(Ax)
     grad_f_Axbar::TAx = similar(Ax)
@@ -106,6 +109,8 @@ function Base.iterate(iter::ZeroFPRIteration{R}) where {R}
         res = x - xbar,
         H = initialize(iter.directions, x),
     )
+    # initialize merit
+    state.merit = f_model(iter, state) + state.g_xbar
     return state, state
 end
 
@@ -140,9 +145,9 @@ reset_direction_state!(iter::ZeroFPRIteration, state::ZeroFPRState) =
     reset_direction_state!(acceleration_style(typeof(iter.directions)), iter, state)
 
 function Base.iterate(iter::ZeroFPRIteration{R}, state::ZeroFPRState) where {R}
-    f_Axbar_upp, f_Axbar = if iter.adaptive == true
+    if iter.adaptive == true
         gamma_prev = state.gamma
-        state.gamma, state.g_xbar, f_Axbar, f_Axbar_upp = backtrack_stepsize!(
+        state.gamma, state.g_xbar, _, _ = backtrack_stepsize!(
             state.gamma,
             iter.f,
             iter.A,
@@ -162,21 +167,15 @@ function Base.iterate(iter::ZeroFPRIteration{R}, state::ZeroFPRState) where {R}
         if state.gamma != gamma_prev
             reset_direction_state!(iter, state)
         end
-        f_Axbar_upp, f_Axbar
     else
         mul!(state.Axbar, iter.A, state.xbar)
-        f_Axbar, grad_f_Axbar = value_and_gradient(iter.f, state.Axbar)
-        state.grad_f_Axbar .= grad_f_Axbar
-        f_model(iter, state), f_Axbar
+        _, state.grad_f_Axbar = value_and_gradient(iter.f, state.Axbar)
     end
 
-    # compute FBE
-    FBE_x = f_Axbar_upp + state.g_xbar
-
     # compute residual at xbar
     mul!(state.At_grad_f_Axbar, iter.A', state.grad_f_Axbar)
     state.y .= state.xbar .- state.gamma .* state.At_grad_f_Axbar
-    g_xbarbar = prox!(state.xbarbar, iter.g, state.y, state.gamma)
+    prox!(state.xbarbar, iter.g, state.y, state.gamma)
     state.res_xbar .= state.xbar .- state.xbarbar
 
     if state.is_prev_set == true
@@ -193,9 +192,11 @@ function Base.iterate(iter::ZeroFPRIteration{R}, state::ZeroFPRState) where {R}
     state.tau = R(1)
     mul!(state.Ad, iter.A, state.d)
 
+    # retrieve merit and set threshold
     sigma = iter.beta * (0.5 / state.gamma) * (1 - iter.alpha)
-    tol = 10 * eps(R) * (1 + abs(FBE_x))
-    threshold = FBE_x - sigma * norm(state.res)^2 + tol
+    tol = 10 * eps(R) * (1 + abs(state.merit))
+    threshold = state.merit - sigma * norm(state.res)^2 + tol
+    FBE_x = f_model(iter, state) + state.g_xbar
 
     for k = 1:iter.max_backtracks
         state.x .= state.xbar_prev .+ state.tau .* state.d
@@ -215,7 +216,8 @@ function Base.iterate(iter::ZeroFPRIteration{R}, state::ZeroFPRState) where {R}
 
         state.tau = k >= iter.max_backtracks - 1 ? R(0) : state.tau / 2
     end
-
+    # update merit with averaging rule
+    state.merit = (1 - iter.monotonicity) * state.merit + iter.monotonicity * FBE_x
     return state, state
 end
 
diff --git a/test/problems/test_lasso_small.jl b/test/problems/test_lasso_small.jl
index 8ffa2df..155846a 100644
--- a/test/problems/test_lasso_small.jl
+++ b/test/problems/test_lasso_small.jl
@@ -145,6 +145,17 @@ using ProximalAlgorithms:
         @test x0 == x0_backup
     end
 
+    @testset "ZeroFPR (fixed step, nonmonotone)" begin
+        x0 = zeros(T, n)
+        x0_backup = copy(x0)
+        solver = ProximalAlgorithms.ZeroFPR(tol = TOL, monotonicity = R(0.2))
+        x, it = @inferred solver(x0 = x0, f = f_autodiff, A = A, g = g, Lf = Lf)
+        @test eltype(x) == T
+        @test norm(x - x_star, Inf) <= TOL
+        @test it < 20
+        @test x0 == x0_backup
+    end
+
     @testset "ZeroFPR (adaptive step)" begin
         x0 = zeros(T, n)
         x0_backup = copy(x0)
@@ -156,6 +167,17 @@ using ProximalAlgorithms:
         @test x0 == x0_backup
     end
 
+    @testset "ZeroFPR (adaptive step, nonmonotone)" begin
+        x0 = zeros(T, n)
+        x0_backup = copy(x0)
+        solver = ProximalAlgorithms.ZeroFPR(adaptive = true, tol = TOL, monotonicity = R(0.2))
+        x, it = @inferred solver(x0 = x0, f = f_autodiff, A = A, g = g)
+        @test eltype(x) == T
+        @test norm(x - x_star, Inf) <= TOL
+        @test it < 30
+        @test x0 == x0_backup
+    end
+
     @testset "PANOC (fixed step)" begin
 
         x0 = zeros(T, n)
@@ -169,6 +191,17 @@ using ProximalAlgorithms:
 
     end
 
+    @testset "PANOC (fixed, nonmonotone)" begin
+        x0 = zeros(T, n)
+        x0_backup = copy(x0)
+        solver = ProximalAlgorithms.PANOC(tol = TOL, monotonicity=R(0.3))
+        x, it = @inferred solver(x0 = x0, f = f_autodiff, A = A, g = g, Lf = Lf)
+        @test eltype(x) == T
+        @test norm(x - x_star, Inf) <= TOL
+        @test it < 20
+        @test x0 == x0_backup
+    end
+
     @testset "PANOC (adaptive step)" begin
         x0 = zeros(T, n)
         x0_backup = copy(x0)
@@ -180,6 +213,17 @@ using ProximalAlgorithms:
         @test x0 == x0_backup
     end
 
+    @testset "PANOC (adaptive, nonmonotone)" begin
+        x0 = zeros(T, n)
+        x0_backup = copy(x0)
+        solver = ProximalAlgorithms.PANOC(adaptive = true, tol = TOL, monotonicity=R(0.3))
+        x, it = @inferred solver(x0 = x0, f = f_autodiff, A = A, g = g)
+        @test eltype(x) == T
+        @test norm(x - x_star, Inf) <= TOL
+        @test it < 35
+        @test x0 == x0_backup
+    end
+
     @testset "PANOCplus (fixed step)" begin
         x0 = zeros(T, n)
         x0_backup = copy(x0)
@@ -202,6 +246,17 @@ using ProximalAlgorithms:
         @test x0 == x0_backup
     end
 
+    @testset "PANOCplus (adaptive step, nonmonotone)" begin
+        x0 = zeros(T, n)
+        x0_backup = copy(x0)
+        solver = ProximalAlgorithms.PANOCplus(adaptive = true, tol = TOL, monotonicity=R(0.1))
+        x, it = @inferred solver(x0 = x0, f = f_autodiff, A = A, g = g)
+        @test eltype(x) == T
+        @test norm(x - x_star, Inf) <= TOL
+        @test it < 40
+        @test x0 == x0_backup
+    end
+
     @testset "DouglasRachford" begin
         x0 = zeros(T, n)
         x0_backup = copy(x0)
@@ -230,6 +285,23 @@ using ProximalAlgorithms:
         @test x0 == x0_backup
     end
 
+    @testset "DouglasRachford line search ($acc) (nonmonotone)" for (acc, maxit) in [
+        (LBFGS(5), 25),
+        (Broyden(), 20),
+        (AndersonAcceleration(5), 12),
+        (NesterovExtrapolation(FixedNesterovSequence), 60),
+        (NesterovExtrapolation(SimpleNesterovSequence), 50),
+    ]
+        x0 = zeros(T, n)
+        x0_backup = copy(x0)
+        solver = ProximalAlgorithms.DRLS(tol=10 * TOL, directions=acc, monotonicity=R(0.5))
+        z, it = @inferred solver(x0=x0, f=fA_prox, g=g, Lf=Lf)
+        @test eltype(z) == T
+        @test norm(z - x_star, Inf) <= 10 * TOL
+        @test it < maxit
+        @test x0 == x0_backup
+    end
+
     @testset "AFBA" begin
         x0 = zeros(T, n)
         x0_backup = copy(x0)
diff --git a/test/problems/test_lasso_small_strongly_convex.jl b/test/problems/test_lasso_small_strongly_convex.jl
index b282c57..d2d1675 100644
--- a/test/problems/test_lasso_small_strongly_convex.jl
+++ b/test/problems/test_lasso_small_strongly_convex.jl
@@ -54,8 +54,8 @@ using ProximalAlgorithms
     x0_backup = copy(x0)
 
     @testset "SFISTA" begin
-        solver = ProximalAlgorithms.SFISTA(tol = TOL)
-        y, it = solver(x0 = x0, f = fA_autodiff, g = g, Lf = Lf, mf = mf)
+        solver = ProximalAlgorithms.SFISTA(tol=TOL)
+        y, it = solver(x0=x0, f=fA_autodiff, g=g, Lf=Lf, mf=mf)
         @test eltype(y) == T
         @test norm(y - x_star) <= TOL
         @test it < 40
@@ -63,8 +63,8 @@ using ProximalAlgorithms
     end
 
     @testset "ForwardBackward" begin
-        solver = ProximalAlgorithms.ForwardBackward(tol = TOL)
-        y, it = solver(x0 = x0, f = fA_autodiff, g = g, Lf = Lf)
+        solver = ProximalAlgorithms.ForwardBackward(tol=TOL)
+        y, it = solver(x0=x0, f=fA_autodiff, g=g, Lf=Lf)
         @test eltype(y) == T
         @test norm(y - x_star, Inf) <= TOL
         @test it < 110
@@ -72,8 +72,8 @@ using ProximalAlgorithms
     end
 
     @testset "ForwardBackward (adaptive step)" begin
-        solver = ProximalAlgorithms.ForwardBackward(tol = TOL, adaptive = true)
-        y, it = solver(x0 = x0, f = fA_autodiff, g = g)
+        solver = ProximalAlgorithms.ForwardBackward(tol=TOL, adaptive=true)
+        y, it = solver(x0=x0, f=fA_autodiff, g=g)
         @test eltype(y) == T
         @test norm(y - x_star, Inf) <= TOL
         @test it < 300
@@ -82,11 +82,11 @@ using ProximalAlgorithms
 
     @testset "ForwardBackward (adaptive step, regret)" begin
         solver = ProximalAlgorithms.ForwardBackward(
-            tol = TOL,
-            adaptive = true,
-            increase_gamma = T(1.01),
+            tol=TOL,
+            adaptive=true,
+            increase_gamma=T(1.01),
         )
-        y, it = solver(x0 = x0, f = fA_autodiff, g = g)
+        y, it = solver(x0=x0, f=fA_autodiff, g=g)
         @test eltype(y) == T
         @test norm(y - x_star, Inf) <= TOL
         @test it < 80
@@ -94,8 +94,8 @@ using ProximalAlgorithms
     end
 
     @testset "FastForwardBackward" begin
-        solver = ProximalAlgorithms.FastForwardBackward(tol = TOL)
-        y, it = solver(x0 = x0, f = fA_autodiff, g = g, Lf = Lf, mf = mf)
+        solver = ProximalAlgorithms.FastForwardBackward(tol=TOL)
+        y, it = solver(x0=x0, f=fA_autodiff, g=g, Lf=Lf, mf=mf)
         @test eltype(y) == T
         @test norm(y - x_star, Inf) <= TOL
         @test it < 35
@@ -103,8 +103,8 @@ using ProximalAlgorithms
     end
 
     @testset "FastForwardBackward (adaptive step)" begin
-        solver = ProximalAlgorithms.FastForwardBackward(tol = TOL, adaptive = true)
-        y, it = solver(x0 = x0, f = fA_autodiff, g = g)
+        solver = ProximalAlgorithms.FastForwardBackward(tol=TOL, adaptive=true)
+        y, it = solver(x0=x0, f=fA_autodiff, g=g)
         @test eltype(y) == T
         @test norm(y - x_star, Inf) <= TOL
         @test it < 100
@@ -113,11 +113,11 @@ using ProximalAlgorithms
 
     @testset "FastForwardBackward (adaptive step, regret)" begin
         solver = ProximalAlgorithms.FastForwardBackward(
-            tol = TOL,
-            adaptive = true,
-            increase_gamma = T(1.01),
+            tol=TOL,
+            adaptive=true,
+            increase_gamma=T(1.01),
         )
-        y, it = solver(x0 = x0, f = fA_autodiff, g = g)
+        y, it = solver(x0=x0, f=fA_autodiff, g=g)
         @test eltype(y) == T
         @test norm(y - x_star, Inf) <= TOL
         @test it < 100
@@ -125,14 +125,14 @@ using ProximalAlgorithms
     end
 
     @testset "FastForwardBackward (custom extrapolation)" begin
-        solver = ProximalAlgorithms.FastForwardBackward(tol = TOL)
+        solver = ProximalAlgorithms.FastForwardBackward(tol=TOL)
         y, it = solver(
-            x0 = x0,
-            f = fA_autodiff,
-            g = g,
-            gamma = 1 / Lf,
-            mf = mf,
-            extrapolation_sequence = ProximalAlgorithms.ConstantNesterovSequence(
+            x0=x0,
+            f=fA_autodiff,
+            g=g,
+            gamma=1 / Lf,
+            mf=mf,
+            extrapolation_sequence=ProximalAlgorithms.ConstantNesterovSequence(
                 mf,
                 1 / Lf,
             ),
@@ -144,17 +144,53 @@ using ProximalAlgorithms
     end
 
     @testset "DRLS" begin
-        solver = ProximalAlgorithms.DRLS(tol = TOL)
-        v, it = solver(x0 = x0, f = fA_prox, g = g, mf = mf)
+        solver = ProximalAlgorithms.DRLS(tol=TOL)
+        v, it = solver(x0=x0, f=fA_prox, g=g, mf=mf)
         @test eltype(v) == T
         @test norm(v - x_star, Inf) <= TOL
         @test it < 14
         @test x0 == x0_backup
     end
 
+    @testset "DRLS (nonmonotone)" begin
+        solver = ProximalAlgorithms.DRLS(tol=TOL, monotonicity=T(0.1))
+        v, it = solver(x0=x0, f=fA_prox, g=g, mf=mf)
+        @test eltype(v) == T
+        @test norm(v - x_star, Inf) <= TOL
+        @test it < 14
+        @test x0 == x0_backup
+    end
+
+    @testset "ZeroFPR (fixed, nonmonotone)" begin
+        solver = ProximalAlgorithms.ZeroFPR(tol = TOL, monotonicity = T(0.2))
+        y, it = solver(x0 = x0, f = fA_autodiff, g = g, Lf=Lf)
+        @test eltype(y) == T
+        @test norm(y - x_star, Inf) <= TOL
+        @test it < 30
+        @test x0 == x0_backup
+    end
+
+    @testset "ZeroFPR (adaptive, nonmonotone)" begin
+        solver = ProximalAlgorithms.ZeroFPR(adaptive = true, tol = TOL, monotonicity = T(0.2))
+        y, it = solver(x0 = x0, f = fA_autodiff, g = g, Lf=Lf)
+        @test eltype(y) == T
+        @test norm(y - x_star, Inf) <= TOL
+        @test it < 30
+        @test x0 == x0_backup
+    end
+
     @testset "PANOC" begin
-        solver = ProximalAlgorithms.PANOC(tol = TOL)
-        y, it = solver(x0 = x0, f = fA_autodiff, g = g, Lf = Lf)
+        solver = ProximalAlgorithms.PANOC(tol=TOL)
+        y, it = solver(x0=x0, f=fA_autodiff, g=g, Lf=Lf)
+        @test eltype(y) == T
+        @test norm(y - x_star, Inf) <= TOL
+        @test it < 45
+        @test x0 == x0_backup
+    end
+
+    @testset "PANOC (nonmonotone)" begin
+        solver = ProximalAlgorithms.PANOC(tol=TOL, monotonicity=T(0.2))
+        y, it = solver(x0=x0, f=fA_autodiff, g=g, Lf=Lf)
         @test eltype(y) == T
         @test norm(y - x_star, Inf) <= TOL
         @test it < 45
@@ -162,8 +198,17 @@ using ProximalAlgorithms
     end
 
     @testset "PANOCplus" begin
-        solver = ProximalAlgorithms.PANOCplus(tol = TOL)
-        y, it = solver(x0 = x0, f = fA_autodiff, g = g, Lf = Lf)
+        solver = ProximalAlgorithms.PANOCplus(tol=TOL)
+        y, it = solver(x0=x0, f=fA_autodiff, g=g, Lf=Lf)
+        @test eltype(y) == T
+        @test norm(y - x_star, Inf) <= TOL
+        @test it < 45
+        @test x0 == x0_backup
+    end
+
+    @testset "PANOCplus (nonmonotone)" begin
+        solver = ProximalAlgorithms.PANOCplus(tol=TOL, monotonicity=T(0.1))
+        y, it = solver(x0=x0, f=fA_autodiff, g=g, Lf=Lf)
         @test eltype(y) == T
         @test norm(y - x_star, Inf) <= TOL
         @test it < 45
diff --git a/test/problems/test_nonconvex_qp.jl b/test/problems/test_nonconvex_qp.jl
index fadc5f2..0f359f8 100644
--- a/test/problems/test_nonconvex_qp.jl
+++ b/test/problems/test_nonconvex_qp.jl
@@ -35,6 +35,16 @@ using Test
         @test x0 == x0_backup
     end
 
+    @testset "PANOC (nonmonotone)" begin
+        x0 = zeros(T, n)
+        x0_backup = copy(x0)
+        solver = ProximalAlgorithms.PANOC(tol = TOL, monotonicity=T(0.1))
+        x, it = solver(x0 = x0, f = f, g = g)
+        z = min.(upp, max.(low, x .- gamma .* (Q * x + q)))
+        @test norm(x - z, Inf) / gamma <= TOL
+        @test x0 == x0_backup
+    end
+
     @testset "PANOCplus" begin
         x0 = zeros(T, n)
         x0_backup = copy(x0)
@@ -45,6 +55,16 @@ using Test
         @test x0 == x0_backup
     end
 
+    @testset "PANOCplus (nonmonotone)" begin
+        x0 = zeros(T, n)
+        x0_backup = copy(x0)
+        solver = ProximalAlgorithms.PANOCplus(tol = TOL, monotonicity=T(0.1))
+        x, it = solver(x0 = x0, f = f, g = g)
+        z = min.(upp, max.(low, x .- gamma .* (Q * x + q)))
+        @test norm(x - z, Inf) / gamma <= TOL
+        @test x0 == x0_backup
+    end
+
     @testset "ZeroFPR" begin
         x0 = zeros(T, n)
         x0_backup = copy(x0)
@@ -102,6 +122,16 @@ end
             @test x0 == x0_backup
         end
 
+        @testset "PANOC (nonmonotone)" begin
+            x0 = zeros(T, n)
+            x0_backup = copy(x0)
+            solver = ProximalAlgorithms.PANOC(tol = TOL, monotonicity=T(0.1))
+            x, it = solver(x0 = x0, f = f, g = g)
+            z = min.(upp, max.(low, x .- gamma .* (Q * x + q)))
+            @test norm(x - z, Inf) / gamma <= TOL
+            @test x0 == x0_backup
+        end
+
         @testset "PANOCplus" begin
             x0 = zeros(T, n)
             x0_backup = copy(x0)
@@ -112,6 +142,16 @@ end
             @test x0 == x0_backup
         end
 
+        @testset "PANOCplus (nonmonotone)" begin
+            x0 = zeros(T, n)
+            x0_backup = copy(x0)
+            solver = ProximalAlgorithms.PANOCplus(tol = TOL, monotonicity=T(0.1))
+            x, it = solver(x0 = x0, f = f, g = g)
+            z = min.(upp, max.(low, x .- gamma .* (Q * x + q)))
+            @test norm(x - z, Inf) / gamma <= TOL
+            @test x0 == x0_backup
+        end
+
         @testset "ZeroFPR" begin
             x0 = zeros(T, n)
             x0_backup = copy(x0)
@@ -122,6 +162,16 @@ end
             @test x0 == x0_backup
         end
 
+        @testset "ZeroFPR (nonmonotone)" begin
+            x0 = zeros(T, n)
+            x0_backup = copy(x0)
+            solver = ProximalAlgorithms.ZeroFPR(tol = TOL, monotonicity=T(0.4))
+            x, it = solver(x0 = x0, f = f, g = g)
+            z = min.(upp, max.(low, x .- gamma .* (Q * x + q)))
+            @test norm(x - z, Inf) / gamma <= TOL
+            @test x0 == x0_backup
+        end
+
         @testset "LiLin" begin
             x0 = zeros(T, n)
             x0_backup = copy(x0)
diff --git a/test/problems/test_sparse_logistic_small.jl b/test/problems/test_sparse_logistic_small.jl
index 5103120..19e5de6 100644
--- a/test/problems/test_sparse_logistic_small.jl
+++ b/test/problems/test_sparse_logistic_small.jl
@@ -98,6 +98,17 @@ using LinearAlgebra
         @test x0 == x0_backup
     end
 
+    @testset "ZeroFPR (adaptive, nonmonotone)" begin
+        x0 = zeros(T, n)
+        x0_backup = copy(x0)
+        solver = ProximalAlgorithms.ZeroFPR(adaptive = true, tol = TOL, monotonicity=R(0.5))
+        x, it = solver(x0 = x0, f = f_autodiff, A = A, g = g)
+        @test eltype(x) == T
+        @test norm(x - x_star, Inf) <= 1e-4
+        @test it < 30
+        @test x0 == x0_backup
+    end
+
     @testset "PANOC (adaptive step)" begin
         x0 = zeros(T, n)
         x0_backup = copy(x0)
@@ -109,6 +120,17 @@ using LinearAlgebra
         @test x0 == x0_backup
     end
 
+    @testset "PANOC (adaptive, nonmonotone)" begin
+        x0 = zeros(T, n)
+        x0_backup = copy(x0)
+        solver = ProximalAlgorithms.PANOC(adaptive = true, tol = TOL, monotonicity=R(0.5))
+        x, it = solver(x0 = x0, f = f_autodiff, A = A, g = g)
+        @test eltype(x) == T
+        @test norm(x - x_star, Inf) <= 1e-4
+        @test it < 50
+        @test x0 == x0_backup
+    end
+
     @testset "PANOCplus (adaptive step)" begin
         x0 = zeros(T, n)
         x0_backup = copy(x0)
@@ -120,4 +142,26 @@ using LinearAlgebra
         @test x0 == x0_backup
     end
 
+    @testset "PANOCplus (adaptive step, nonmonotone)" begin
+        x0 = zeros(T, n)
+        x0_backup = copy(x0)
+        solver = ProximalAlgorithms.PANOCplus(adaptive = true, tol = TOL, monotonicity=R(0.9))
+        x, it = solver(x0 = x0, f = f_autodiff, A = A, g = g)
+        @test eltype(x) == T
+        @test norm(x - x_star, Inf) <= 1e-4
+        @test it < 50
+        @test x0 == x0_backup
+    end
+
+    @testset "PANOCplus (adaptive step, very nonmonotone)" begin
+        x0 = zeros(T, n)
+        x0_backup = copy(x0)
+        solver = ProximalAlgorithms.PANOCplus(adaptive = true, tol = TOL, monotonicity=R(0.1))
+        x, it = solver(x0 = x0, f = f_autodiff, A = A, g = g)
+        @test eltype(x) == T
+        @test norm(x - x_star, Inf) <= 1e-4
+        @test it < 110
+        @test x0 == x0_backup
+    end
+
 end