Skip to content

Commit b4e9240

Browse files
committed
Use np.ndarray for Hessians, not Hessian class (faster matrix-vector multiplication)
1 parent db5e6c4 commit b4e9240

File tree

8 files changed

+162
-167
lines changed

8 files changed

+162
-167
lines changed

pybobyqa/controller.py

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535
import numpy as np
3636
import scipy.linalg as LA
3737

38-
from .hessian import *
3938
from .model import *
4039
from .trust_region import *
4140
from .util import *
@@ -266,21 +265,21 @@ def initialise_random_directions(self, number_of_samples, num_directions, params
266265

267266
def trust_region_step(self):
268267
# Build model for full least squares objectives
269-
gopt, hq = self.model.build_full_model()
270-
d, gnew, crvmin = trsbox(self.model.xopt(), gopt, hq, self.model.sl, self.model.su, self.delta)
271-
return d, gopt, hq, gnew, crvmin
268+
gopt, H = self.model.build_full_model()
269+
d, gnew, crvmin = trsbox(self.model.xopt(), gopt, H, self.model.sl, self.model.su, self.delta)
270+
return d, gopt, H, gnew, crvmin
272271

273272
def geometry_step(self, knew, adelt, number_of_samples, params):
274273
logging.debug("Running geometry-fixing step")
275274
try:
276-
c, g, hess = self.model.lagrange_polynomial(knew) # based at xopt
275+
c, g, H = self.model.lagrange_polynomial(knew) # based at xopt
277276
# Solve problem: bounds are sl <= xnew <= su, and ||xnew-xopt|| <= adelt
278-
xnew = trsbox_geometry(self.model.xopt(), c, g, hess, self.model.sl, self.model.su, adelt)
277+
xnew = trsbox_geometry(self.model.xopt(), c, g, H, self.model.sl, self.model.su, adelt)
279278
except LA.LinAlgError:
280279
exit_info = ExitInformation(EXIT_LINALG_ERROR, "Singular matrix encountered in geometry step")
281280
return exit_info # didn't fix geometry - return & quit
282281

283-
gopt, hq = self.model.build_full_model() # save here, to calculate predicted value from geometry step
282+
gopt, H = self.model.build_full_model() # save here, to calculate predicted value from geometry step
284283
fopt = self.model.fopt() # again, evaluate now, before model.change_point()
285284
d = xnew - self.model.xopt()
286285
x = self.model.as_absolute_coordinates(xnew)
@@ -300,8 +299,8 @@ def geometry_step(self, knew, adelt, number_of_samples, params):
300299
# Estimate actual reduction to add to diffs vector
301300
f = np.mean(f_list[:num_samples_run]) # estimate actual objective value
302301

303-
# pred_reduction = - calculate_model_value(gopt, hq, d)
304-
pred_reduction = - model_value(gopt, hq, d)
302+
# pred_reduction = - calculate_model_value(gopt, H, d)
303+
pred_reduction = - model_value(gopt, H, d)
305304
actual_reduction = fopt - f
306305
self.diffs = [abs(pred_reduction - actual_reduction), self.diffs[0], self.diffs[1]]
307306
return None # exit_info = None
@@ -381,9 +380,9 @@ def choose_point_to_replace(self, d, skip_kopt=True):
381380

382381
return knew, exit_info
383382

384-
def done_with_current_rho(self, xnew, gnew, crvmin, hq, current_iter):
383+
def done_with_current_rho(self, xnew, gnew, crvmin, H, current_iter):
385384
# (xnew, gnew, crvmin) come from trust region step
386-
# hq is Hessian of model for the full objective
385+
# H is Hessian of model for the full objective
387386

388387
# Wait at least 3 iterations between reductions of rho
389388
if current_iter <= self.last_successful_iter + 2:
@@ -402,7 +401,7 @@ def done_with_current_rho(self, xnew, gnew, crvmin, hq, current_iter):
402401
if xnew[j] == self.model.su[j]:
403402
bdtest = -gnew[j]
404403
if bdtest < bdtol:
405-
curv = hq.get_element(j, j) # curv = Hessian(j, j)
404+
curv = H[j,j]
406405
bdtest += 0.5 * curv * self.rho
407406
if bdtest < bdtol:
408407
return False
@@ -425,10 +424,10 @@ def reduce_rho(self, current_iter, params):
425424
self.last_successful_iter = current_iter # reset successful iteration check
426425
return
427426

428-
def calculate_ratio(self, current_iter, f_list, d, gopt, hq):
427+
def calculate_ratio(self, current_iter, f_list, d, gopt, H):
429428
exit_info = None
430429
f = np.mean(f_list) # estimate actual objective value
431-
pred_reduction = - model_value(gopt, hq, d)
430+
pred_reduction = - model_value(gopt, H, d)
432431
actual_reduction = self.model.fopt() - f
433432
self.diffs = [abs(actual_reduction - pred_reduction), self.diffs[0], self.diffs[1]]
434433
if min(sqrt(sumsq(d)), self.delta) > self.rho: # if ||d|| >= rho, successful!

pybobyqa/model.py

Lines changed: 36 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
import numpy as np
3535
import scipy.linalg as LA
3636

37-
from .hessian import Hessian, to_upper_triangular_vector
37+
from .hessian import to_upper_triangular_vector
3838
from .trust_region import trsbox_geometry
3939
from .util import sumsq, model_value
4040

@@ -74,7 +74,7 @@ def __init__(self, npt, x0, f0, xl, xu, f0_nsamples, n=None, abs_tol=-1e20, prec
7474
# Model information
7575
self.model_const = 0.0 # constant term for model m(s) = c + J*s
7676
self.model_grad = np.zeros((n,)) # Jacobian term for model m(s) = c + J*s
77-
self.model_hess = Hessian(n)
77+
self.model_hess = np.zeros((n,n))
7878

7979
# Saved point (in absolute coordinates) - always check this value before quitting solver
8080
self.xsave = None
@@ -199,7 +199,7 @@ def shift_base(self, xbase_shift):
199199
self.factorisation_current = False
200200

201201
# Update model (always centred on xbase)
202-
Hx = self.model_hess.vec_mul(xbase_shift)
202+
Hx = self.model_hess.dot(xbase_shift)
203203
self.model_const += np.dot(self.model_grad + 0.5*Hx, xbase_shift)
204204
self.model_grad += Hx
205205
return
@@ -209,7 +209,7 @@ def save_point(self, x, f, nsamples, x_in_abs_coords=True):
209209
self.xsave = x.copy() if x_in_abs_coords else self.as_absolute_coordinates(x)
210210
self.fsave = f
211211
self.gradsave = self.model_grad.copy()
212-
self.hesssave = self.model_hess.as_full().copy()
212+
self.hesssave = self.model_hess.copy()
213213
self.nsamples_save = nsamples
214214
return True
215215
else:
@@ -219,7 +219,7 @@ def get_final_results(self):
219219
# Return x and fval for optimal point (either from xsave+fsave or kopt)
220220
if self.fsave is None or self.fopt() <= self.fsave: # optimal has changed since xsave+fsave were last set
221221
g, hess = self.build_full_model() # model based at xopt
222-
return self.xopt(abs_coordinates=True).copy(), self.fopt(), g, hess.as_full(), self.nsamples[self.kopt]
222+
return self.xopt(abs_coordinates=True).copy(), self.fopt(), g, hess, self.nsamples[self.kopt]
223223
else:
224224
return self.xsave, self.fsave, self.gradsave, self.hesssave, self.nsamples_save
225225

@@ -231,7 +231,7 @@ def model_value(self, d, d_based_at_xopt=True, with_const_term=False):
231231
# Model is always centred around xbase
232232
const = self.model_const if with_const_term else 0.0
233233
d_to_use = d + self.xopt() if d_based_at_xopt else d
234-
Hd = self.model_hess.vec_mul(d_to_use)
234+
Hd = self.model_hess.dot(d_to_use)
235235
return const + np.dot(self.model_grad + 0.5 * Hd, d_to_use)
236236

237237
def interpolation_matrix(self):
@@ -281,7 +281,7 @@ def interpolate_model(self, verbose=False, min_chg_hess=True, get_norm_model_chg
281281
# It's good to see which bits are needed for this specifically (here & 1 line below)
282282
for t in range(self.npt()-1):
283283
dx = self.xpt(fval_row_idx[t]) - self.xopt()
284-
rhs[t] = rhs[t] - 0.5 * np.dot(dx, self.model_hess.vec_mul(dx)) # include old Hessian
284+
rhs[t] = rhs[t] - 0.5 * np.dot(dx, self.model_hess.dot(dx)) # include old Hessian
285285

286286
try:
287287
coeffs = self.solve_system(rhs)
@@ -296,7 +296,7 @@ def interpolate_model(self, verbose=False, min_chg_hess=True, get_norm_model_chg
296296
# Old gradient and Hessian (save so can compute changes later)
297297
if verbose or get_norm_model_chg:
298298
old_model_grad = self.model_grad.copy()
299-
old_model_hess = self.model_hess.as_full()
299+
old_model_hess = self.model_hess.copy()
300300
else:
301301
old_model_grad = None
302302
old_model_hess = None
@@ -305,24 +305,21 @@ def interpolate_model(self, verbose=False, min_chg_hess=True, get_norm_model_chg
305305
self.model_const = self.fopt() # true in all cases
306306
if self.npt() == self.n() + 1:
307307
self.model_grad = coeffs.copy()
308-
self.model_hess = Hessian(self.n()) # zeros
308+
self.model_hess = np.zeros((self.n(), self.n()))
309309
elif self.npt() == (self.n() + 1) * (self.n() + 2) // 2:
310310
self.model_grad = coeffs[:self.n()]
311-
self.model_hess = Hessian(self.n(), coeffs[self.n():]) # rest of coeffs are upper triangular part of Hess
311+
self.model_hess = build_symmetric_matrix_from_vector(self.n(), coeffs[self.n():]) # rest of coeffs are upper triangular part of Hess
312312
else:
313313
self.model_grad = coeffs[self.npt()-1:] # last n values
314-
if min_chg_hess:
315-
hess_full = self.model_hess.as_full()
316-
else:
317-
hess_full = np.zeros((self.n(), self.n()))
314+
if not min_chg_hess:
315+
self.model_hess = np.zeros((self.n(), self.n()))
318316
for i in range(self.npt()-1):
319317
dx = self.xpt(fval_row_idx[i]) - self.xopt()
320-
hess_full += coeffs[i] * np.outer(dx, dx)
321-
self.model_hess = Hessian(self.n(), hess_full)
318+
self.model_hess += coeffs[i] * np.outer(dx, dx)
322319

323320
# Base model at xbase, not xopt (note negative signs)
324321
xopt = self.xopt()
325-
Hx = self.model_hess.vec_mul(xopt)
322+
Hx = self.model_hess.dot(xopt)
326323
self.model_const += np.dot(-self.model_grad + 0.5*Hx, xopt)
327324
self.model_grad += -Hx
328325

@@ -331,7 +328,7 @@ def interpolate_model(self, verbose=False, min_chg_hess=True, get_norm_model_chg
331328
norm_chg_hess = 0.0
332329
if verbose or get_norm_model_chg:
333330
norm_chg_grad = LA.norm(self.model_grad - old_model_grad)
334-
norm_chg_hess = LA.norm(self.model_hess.as_full() - old_model_hess, ord='fro')
331+
norm_chg_hess = LA.norm(self.model_hess - old_model_hess, ord='fro')
335332
if verbose:
336333
for k in range(self.npt()):
337334
f_pred = self.model_value(self.xpt(k), d_based_at_xopt=False, with_const_term=True)
@@ -342,7 +339,7 @@ def interpolate_model(self, verbose=False, min_chg_hess=True, get_norm_model_chg
342339

343340
def build_full_model(self):
344341
# Make model centred around xopt
345-
g = self.model_grad + self.model_hess.vec_mul(self.xopt())
342+
g = self.model_grad + self.model_hess.dot(self.xopt())
346343
return g, self.model_hess
347344

348345
def lagrange_polynomial(self, k, factorise_first=True):
@@ -382,21 +379,20 @@ def lagrange_polynomial(self, k, factorise_first=True):
382379
c = 1.0 if k==self.kopt else 0.0 # true in all cases
383380
if self.npt() == self.n() + 1:
384381
g = coeffs.copy()
385-
hess = Hessian(self.n()) # zeros
382+
H = np.zeros((self.n(), self.n()))
386383
elif self.npt() == (self.n() + 1) * (self.n() + 2) // 2:
387384
g = coeffs[:self.n()]
388-
hess = Hessian(self.n(), coeffs[self.n():]) # rest of coeffs are upper triangular part of Hess
385+
H = build_symmetric_matrix_from_vector(self.n(), coeffs[self.n():]) # rest of coeffs are upper triangular part of Hess
389386
else:
390387
g = coeffs[self.npt() - 1:] # last n values
391388
fval_row_idx = np.delete(np.arange(self.npt()), self.kopt) # indices of all rows except kopt
392-
hess_full = np.zeros((self.n(), self.n()))
389+
H = np.zeros((self.n(), self.n()))
393390
for i in range(self.npt() - 1):
394391
dx = self.xpt(fval_row_idx[i]) - self.xopt()
395-
hess_full += coeffs[i] * np.outer(dx, dx)
396-
hess = Hessian(self.n(), hess_full)
392+
H += coeffs[i] * np.outer(dx, dx)
397393

398394
# (c, g, hess) currently based around xopt
399-
return c, g, hess
395+
return c, g, H
400396

401397
def poisedness_constant(self, delta, xbase=None, xbase_in_abs_coords=True):
402398
# Calculate the poisedness constant of the current interpolation set in B(xbase, delta)
@@ -407,14 +403,14 @@ def poisedness_constant(self, delta, xbase=None, xbase_in_abs_coords=True):
407403
elif xbase_in_abs_coords:
408404
xbase = xbase - self.xbase # shift to correct position
409405
for k in range(self.npt()):
410-
c, g, hess = self.lagrange_polynomial(k, factorise_first=True) # based at self.xopt()
406+
c, g, H = self.lagrange_polynomial(k, factorise_first=True) # based at self.xopt()
411407
# Switch base of poly from xopt to xbase, as required by trsbox_geometry
412408
base_chg = self.xopt() - xbase
413-
Hx = hess.vec_mul(base_chg)
409+
Hx = H.dot(base_chg)
414410
c += np.dot(-g + 0.5 * Hx, base_chg)
415411
g += -Hx
416-
xmax = trsbox_geometry(xbase, c, g, hess, self.sl, self.su, delta)
417-
lmax = abs(c + model_value(g, hess, xmax-xbase)) # evaluate Lagrange poly
412+
xmax = trsbox_geometry(xbase, c, g, H, self.sl, self.su, delta)
413+
lmax = abs(c + model_value(g, H, xmax-xbase)) # evaluate Lagrange poly
418414
if overall_max is None or lmax > overall_max:
419415
overall_max = lmax
420416
return overall_max
@@ -456,3 +452,14 @@ def build_interpolation_matrix(Y, approx_delta=1.0):
456452
right_scaling[p:] = approx_delta
457453
return A, left_scaling, right_scaling
458454

455+
456+
def build_symmetric_matrix_from_vector(n, entries):
457+
assert entries.shape == (n*(n+1)//2,), "Entries vector has wrong size, got %g, expect %g (for n=%g)" % (len(entries), n*(n+1)//2, n)
458+
A = np.zeros((n, n))
459+
ih = -1
460+
for j in range(n): # j = 0, ..., n-1
461+
for i in range(j + 1): # i = 0, ..., j
462+
ih += 1
463+
A[i, j] = entries[ih]
464+
A[j, i] = entries[ih]
465+
return A

pybobyqa/solver.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ def solve_main(objfun, x0, args, xl, xu, npt, rhobeg, rhoend, maxfun, nruns_so_f
222222

223223

224224
# Trust region step
225-
d, gopt, hq, gnew, crvmin = control.trust_region_step()
225+
d, gopt, H, gnew, crvmin = control.trust_region_step()
226226
logging.debug("Trust region step is d = " + str(d))
227227
xnew = control.model.xopt() + d
228228
dnorm = min(LA.norm(d), control.delta)
@@ -242,7 +242,7 @@ def solve_main(objfun, x0, args, xl, xu, npt, rhobeg, rhoend, maxfun, nruns_so_f
242242
diagnostic_info.update_iter_type(ITER_SAFETY)
243243
diagnostic_info.update_slow_iter(-1)
244244

245-
if not control.done_with_current_rho(xnew, gnew, crvmin, hq, current_iter):
245+
if not control.done_with_current_rho(xnew, gnew, crvmin, H, current_iter):
246246
distsq = (10.0 * control.rho) ** 2
247247
number_of_samples = max(nsamples(control.delta, control.rho, current_iter, nruns_so_far), 1)
248248
update_delta = True # we do reduce delta for safety steps
@@ -378,7 +378,7 @@ def solve_main(objfun, x0, args, xl, xu, npt, rhobeg, rhoend, maxfun, nruns_so_f
378378
break # quit
379379

380380
# Estimate f in order to compute 'actual reduction'
381-
ratio, exit_info = control.calculate_ratio(current_iter, f_list[:num_samples_run], d, gopt, hq)
381+
ratio, exit_info = control.calculate_ratio(current_iter, f_list[:num_samples_run], d, gopt, H)
382382
if exit_info is not None:
383383
if exit_info.able_to_do_restart() and params("restarts.use_restarts") and params(
384384
"restarts.use_soft_restarts"):

0 commit comments

Comments
 (0)