# -*- coding: utf-8 -*-
# Copyright 2021 IRT Saint Exupéry, https://www.irt-saintexupery.com
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License version 3 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
# Contributors:
# INITIAL AUTHORS - initial API and implementation and/or initial
# documentation
# :author: Francois Gallard
# OTHER AUTHORS - MACROSCOPIC CHANGES
"""Hessian matrix approximations from gradient pairs."""
from __future__ import division, unicode_literals
import logging
from numpy import array, atleast_2d, concatenate, cumsum
from numpy import diag as np_diag
from numpy import dot, eye, inf, sqrt, trace, zeros
from numpy.linalg import LinAlgError, cholesky, inv, multi_dot, norm
from numpy.matlib import repmat
from scipy.optimize import leastsq
LOGGER = logging.getLogger(__name__)
[docs]class HessianApproximation(object):
"""Abstract class for hessian approximations from optimization history."""
def __init__(self, history):
"""Constructor.
:param history: the optimization history
"""
self.history = history
self.x_ref = None
self.fgrad_ref = None
self.f_ref = None
self.b_mat_history = []
self.h_mat_history = []
[docs] def get_x_grad_history(
self,
funcname,
first_iter=0,
last_iter=0,
at_most_niter=-1,
func_index=None,
normalize_design_space=False,
design_space=None,
):
"""Get gradient history and design variables history of gradient evaluations.
:param funcname: function name
:param first_iter: first iteration after which the history is
extracted (Default value = 0)
:param last_iter: last iteration before which the history is
extracted (Default value = 0)
:param at_most_niter: maximum number of iterations to take
(Default value = -1)
:param func_index: Default value = None)
:param normalize_design_space: if True, scale the values
to work in a normalized design space (x between 0 and 1)
:param design_space: the design space used to scale all values
mandatory if normalize_design_space==True
"""
x_grad_hist, x_hist = self.history.get_func_grad_history(funcname, x_hist=True)
if normalize_design_space:
(
x_hist,
x_grad_hist,
) = self._normalize_x_g(x_hist, x_grad_hist, design_space)
assert len(x_grad_hist) == len(x_hist)
n_pairs = len(x_grad_hist)
if n_pairs < 2:
raise ValueError(
"Cannot build approximation for function : {}"
" because its gradient history "
"is too small : {}.".format(funcname, n_pairs)
)
# LOGGER.info("Building Hessian approximation with " +
# str(n_pairs) + " pairs of x and gradients")
x_grad_hist = array(x_grad_hist)
x_hist = array(x_hist)
n_iter, nparam = x_hist.shape
grad_shape = x_grad_hist.shape
n_iter_g = grad_shape[0]
nparam_g = grad_shape[-1]
# Function is a vector, jacobian is a 2D matrix
if len(grad_shape) == 3:
if func_index is None:
raise ValueError(
"Function {} has a vector output "
"then function index of output "
"must be specified.".format(funcname)
)
if func_index < 0 or func_index >= grad_shape[1]:
raise ValueError(
"Function {} has a vector output of size {},"
"function index {} is out of range"
".".format(funcname, grad_shape[1], func_index)
)
x_grad_hist = x_grad_hist[:, func_index, :]
if (n_iter != n_iter_g) or (nparam_g != nparam):
raise ValueError(
"Inconsistent gradient and design" " variables optimization history"
)
if last_iter == 0:
x_hist = x_hist[first_iter:, :]
x_grad_hist = x_grad_hist[first_iter:, :]
else:
x_hist = x_hist[first_iter:last_iter, :]
x_grad_hist = x_grad_hist[first_iter:last_iter, :]
n_iter = x_hist.shape[0]
if 0 < at_most_niter < n_iter:
x_hist = x_hist[n_iter - at_most_niter :, :]
x_grad_hist = x_grad_hist[n_iter - at_most_niter :, :]
n_iter, nparam = x_hist.shape
if n_iter < 2 or nparam == 0:
raise ValueError(
"Insufficient optimization history size, "
"niter={} nparam = {}.".format(n_iter, nparam)
)
self.x_ref = x_hist[-1]
self.fgrad_ref = x_grad_hist[-1]
if last_iter == 0:
self.f_ref = array(self.history.get_func_history(funcname))[-1]
else:
self.f_ref = array(self.history.get_func_history(funcname))[:last_iter][-1]
return x_hist, x_grad_hist, n_iter, nparam
@staticmethod
def _normalize_x_g(x_hist, x_grad_hist, design_space):
"""scale the values to work in a normalized design space (x between 0 and 1)
:param design_space: the design space used to scale all values
"""
if design_space is None:
raise ValueError(
"Design space must be provided when using "
"a normalize_design_space option!"
)
unnormalize_vect = design_space.unnormalize_vect
def normalize_gradient(x_vect):
"""Unnormalize a gradient."""
return unnormalize_vect(x_vect, minus_lb=False, no_check=True)
x_scaled, grad_scaled = [], []
for x_v, g_v in zip(x_hist, x_grad_hist):
x_scaled.append(design_space.normalize_vect(x_v))
grad_scaled.append(normalize_gradient(g_v))
return x_scaled, grad_scaled
[docs] @staticmethod
def get_s_k_y_k(x_hist, x_grad_hist, iteration):
"""Generate the s_k and y_k terms, respectively design variable difference and
gradients difference between iterates.
:param x_hist: design variables history array
:param x_grad_hist: gradients history array
:param iteration: iteration number for which the pair must be generated
"""
n_iter = x_grad_hist.shape[0]
if iteration >= n_iter:
raise ValueError(
"Iteration {} is higher than number of gradients "
"in the database : {}.".format(iteration, n_iter)
)
s_k = atleast_2d(x_hist[iteration + 1] - x_hist[iteration]).T
y_k = atleast_2d(x_grad_hist[iteration + 1] - x_grad_hist[iteration]).T
return s_k, y_k
[docs] @staticmethod
def iterate_s_k_y_k(x_hist, x_grad_hist):
"""Generate the s_k and y_k terms, respectively design variable difference and
gradients difference between iterates.
:param x_hist: design variables history array
:param x_grad_hist: gradients history array
"""
n_iter = x_hist.shape[0]
for k in range(n_iter - 1):
s_k, y_k = HessianApproximation.get_s_k_y_k(x_hist, x_grad_hist, k)
yield s_k, y_k
[docs] def build_approximation(
self,
funcname,
save_diag=False,
first_iter=0,
last_iter=-1,
b_mat0=None,
at_most_niter=-1,
return_x_grad=False,
func_index=None,
save_matrix=False,
scaling=False,
normalize_design_space=False,
design_space=None,
): # pylint: disable=W0221
"""Builds the hessian approximation B.
:param funcname: function name
:param save_diag: if True, returns the list of diagonal approximations
(Default value = False)
:param first_iter: first iteration after which the history is extracted
(Default value = 0)
:param last_iter: last iteration before which the history is extracted
(Default value = -1)
:param b_mat0: initial approximation matrix (Default value = None)
:param at_most_niter: maximum number of iterations to take
(Default value = -1)
:param return_x_grad: if True, also returns the last gradient and x
(Default value = False)
:param func_index: Default value = None)
(Default value = False)
:param scaling: do scaling step
:param normalize_design_space: if True, scale the values
to work in a normalized design space (x between 0 and 1)
:param design_space: the design space used to scale all values
mandatory if normalize_design_space==True
:returns: the B matrix, its diagonal,
and eventually the x and grad history pairs
used to build B, if return_x_grad=True,
otherwise, None and None are returned for args consistency
"""
normalize_ds = normalize_design_space
x_hist, x_grad_hist, _, _ = self.get_x_grad_history(
funcname,
first_iter,
last_iter,
at_most_niter,
func_index,
normalize_ds,
design_space,
)
if b_mat0 is None:
last_n_grad = x_grad_hist.shape[0] - 2
s_k, y_k = self.get_s_k_y_k(x_hist, x_grad_hist, last_n_grad)
alpha = dot(y_k.T, s_k) / dot(y_k.T, y_k)
b_mat = (1.0 / alpha) * eye(len(x_grad_hist[0]))
elif b_mat0.size == 0:
n_x = len(x_hist[0])
b_mat = zeros((n_x, n_x))
else:
b_mat = b_mat0
diag = []
for s_k, y_k in self.iterate_s_k_y_k(x_hist, x_grad_hist):
self.iterate_approximation(b_mat, s_k, y_k, scaling=scaling)
if save_diag:
diag.append(np_diag(b_mat).copy())
if save_matrix:
self.b_mat_history.append(b_mat.copy())
if return_x_grad:
return b_mat, diag, x_hist[-1, :], x_grad_hist[-1, :]
return b_mat, diag, None, None
[docs] @staticmethod
def compute_scaling(hessk, hessk_dsk, dskt_hessk_dsk, dyk, dyt_dsk):
"""Compute scaling.
:param hessk: previous approximation
:param hessk_dsk: product between hessk and dsk
:param dskt_hessk_dsk: product between dsk^t, hessk and dsk
:param dyk: gradients difference between iterates
:param dyt_dsk: product between dyk^t and dsk^t
"""
coeff1 = (len(hessk_dsk) - 1) / (
trace(hessk) - norm(hessk_dsk) ** 2 / dskt_hessk_dsk
)
coeff2 = dyt_dsk / norm(dyk) ** 2
return coeff1, coeff2
[docs] @staticmethod
def iterate_approximation(hessk, dsk, dyk, scaling=False):
"""BFGS iteration from step k to step k+1.
:param hessk: previous approximation
:param dsk: design variable difference between iterates
:param dyk: gradients difference between iterates
:param scaling: do scaling step
:returns: updated approximation
"""
dyt_dsk = dot(dyk.T, dsk)
hessk_dsk = dot(hessk, dsk)
dskt_hessk_dsk = multi_dot((dsk.T, hessk, dsk))
# Build the next approximation:
b_first_term = hessk - multi_dot((hessk, dsk, dsk.T, hessk)) / dskt_hessk_dsk
b_second_term = dot(dyk, dyk.T) / dyt_dsk
if not scaling:
hessk[:, :] = b_first_term + b_second_term
else:
c_1, c_2 = HessianApproximation.compute_scaling(
hessk, hessk_dsk, dskt_hessk_dsk, dyk, dyt_dsk
)
hessk[:, :] = c_1 * b_first_term + c_2 * b_second_term
[docs] def build_inverse_approximation(
self,
funcname,
save_diag=False,
first_iter=0,
last_iter=-1,
h_mat0=None,
at_most_niter=-1,
return_x_grad=False,
func_index=None,
save_matrix=False,
factorize=False,
scaling=False,
angle_tol=1e-5,
step_tol=1e10,
normalize_design_space=False,
design_space=None,
):
"""Builds the inversed hessian approximation H.
:param funcname: function name
:param save_diag: if True, returns the list of diagonal approximations
(Default value = False)
:param first_iter: first iteration after which the history is extracted
(Default value = 0)
:param last_iter: last iteration before which the history is extracted
(Default value = -1)
:param h_mat0: initial inverse approximation matrix
(Default value = None)
:param at_most_niter: maximum number of iterations to take
(Default value = -1)
:param return_x_grad: if True, also returns the last gradient and x
(Default value = False)
:param func_index: Default value = None)
:param normalize_design_space: if True, scale the values
to work in a normalized design space (x between 0 and 1)
:param design_space: the design space used to scale all values
mandatory if normalize_design_space==True
:returns: the H matrix, its diagonal,
and eventually the x and grad history pairs
used to build H, if return_x_grad=True,
otherwise, None and None are returned for args consistency
"""
normalize_ds = normalize_design_space
x_hist, x_grad_hist, _, _ = self.get_x_grad_history(
funcname,
first_iter,
last_iter,
at_most_niter,
func_index,
normalize_ds,
design_space,
)
h_factor = None # to become a matrix G such that H = G*G', optionally
b_factor = None # to become the inverse of the matrix G
if h_mat0 is None:
last_n_grad = x_grad_hist.shape[0] - 2
s_k, y_k = self.get_s_k_y_k(x_hist, x_grad_hist, last_n_grad)
alpha = dot(y_k.T, s_k) / dot(y_k.T, y_k)
n_x = len(x_grad_hist[0])
h_mat = alpha * eye(n_x)
b_mat = 1.0 / alpha * eye(n_x)
if factorize:
h_factor = sqrt(alpha) * eye(n_x)
b_factor = eye(n_x) / sqrt(alpha)
elif len(h_mat0) == 0:
n_x = len(x_hist[0])
h_mat = zeros((n_x, n_x))
b_mat = zeros((n_x, n_x))
if factorize:
h_factor = zeros((n_x, n_x))
b_factor = zeros((n_x, n_x))
else:
h_mat = h_mat0
try:
b_mat = inv(h_mat)
except LinAlgError:
raise LinAlgError("The inversion of h_mat failed")
if factorize or scaling:
try:
h_factor = cholesky(h_mat)
b_factor = cholesky(b_mat).T
except LinAlgError:
raise LinAlgError(
"The Cholesky decomposition of h_factor" " or b_factor failed"
)
diag = []
count = 0
k = 0
for s_k, y_k in self.iterate_s_k_y_k(x_hist, x_grad_hist):
k = k + 1
if dot(s_k.T, y_k) > angle_tol and norm(y_k, inf) < step_tol:
count = count + 1
self.iterate_inverse_approximation(
h_mat,
s_k,
y_k,
h_factor,
b_mat,
b_factor,
factorize=factorize,
scaling=scaling,
)
if save_diag:
diag.append(np_diag(h_mat).copy())
if save_matrix:
self.h_mat_history.append(h_mat.copy())
if return_x_grad:
return h_mat, diag, x_hist[-1, :], x_grad_hist[-1, :], None, None, None
return h_mat, diag, None, None, h_factor, b_mat, b_factor
[docs] @staticmethod
def compute_corrections(x_hist, x_grad_hist):
"""Computes the corrections from the history."""
n_iter = x_hist.shape[0]
x_corr = x_hist[1:n_iter].T - x_hist[: n_iter - 1].T
grad_corr = x_grad_hist[1:n_iter].T - x_grad_hist[: n_iter - 1].T
return x_corr, grad_corr
[docs] @staticmethod
def rebuild_history(x_corr, x_0, grad_corr, g_0):
"""Computes the history from the corrections."""
# Rebuild the argument history:
x_hist = repmat(x_0, x_corr.shape[1], 1) + cumsum(x_corr.T, axis=0)
x_hist = concatenate((atleast_2d(x_0), x_hist), axis=0)
# Rebuild the gradient history
x_grad_hist = repmat(g_0, grad_corr.shape[1], 1) + cumsum(grad_corr.T, axis=0)
x_grad_hist = concatenate((atleast_2d(g_0), x_grad_hist), axis=0)
return x_hist, x_grad_hist
[docs] @staticmethod
def iterate_inverse_approximation(
h_mat,
s_k,
y_k,
h_factor=None,
b_mat=None,
b_factor=None,
factorize=False,
scaling=False,
):
"""Inverse BFGS iteration.
:param h_mat: previous approximation
:param s_k: design variable difference between iterates
:param y_k: gradients difference between iterates
:returns: updated inverse approximation
"""
# Compute the two terms of the non-scaled updated matrix:
yts = dot(y_k.T, s_k)
proj = eye(len(s_k)) - dot(s_k, y_k.T) / yts
h_first_term = multi_dot((proj, h_mat, proj.T))
h_second_term = dot(s_k, s_k.T) / yts
b_s = dot(b_mat, s_k)
st_b_s = dot(s_k.T, b_s)
# Compute the scaling coefficients:
if scaling:
coeff1, coeff2 = HessianApproximation.compute_scaling(
b_mat, b_s, st_b_s, y_k, yts
)
else:
coeff1, coeff2 = 1.0, 1.0
# Update the inverse approximation H and, optionally, the factor G:
h_mat[:, :] = h_first_term / coeff1 + h_second_term / coeff2
if factorize:
sst_b = dot(s_k, b_s.T)
left = proj / sqrt(coeff1) + sst_b / sqrt(coeff2 * yts * st_b_s)
h_factor[:, :] = dot(left, h_factor)
# b_factor[:, :] = dot(eye(len(s_k)) - sstB.T / stBs / sqrt(coeff1)
# + dot(y_k, s_k.T)
# / sqrt(coeff2 * stBs * yts),
# b_factor)
right = sqrt(coeff1) * (eye(len(s_k)) - sst_b / st_b_s)
right += sqrt(coeff2) * dot(s_k, y_k.T) / sqrt(st_b_s * yts)
b_factor[:, :] = dot(b_factor, right)
# Update the Hessian approximation:
b_first_term = b_mat - multi_dot((b_s, b_s.T)) / st_b_s
b_second_term = dot(y_k, y_k.T) / yts
b_mat[:, :] = coeff1 * b_first_term + coeff2 * b_second_term
# b_mat[:, :] = multi_dot((proj.T, b_mat, proj)) \
# + dot(y_k, y_k.T) / yts
[docs]class BFGSApprox(HessianApproximation):
"""Builds a BFGS approximation from optimization history."""
[docs] @staticmethod
def iterate_s_k_y_k(x_hist, x_grad_hist):
"""Generate the s_k and y_k terms, respectively design variable difference and
gradients difference between iterates.
:param x_hist: design variables history array
:param x_grad_hist: gradients history array
"""
n_iter = x_hist.shape[0]
for k in range(n_iter - 1):
s_k, y_k = BFGSApprox.get_s_k_y_k(x_hist, x_grad_hist, k)
# All pairs curvatures shall be positive
# if dot(s_k.T, y_k) > 0.:
if dot(s_k.T, y_k) > 1e-16 * dot(y_k.T, y_k):
yield s_k, y_k
[docs]class SR1Approx(HessianApproximation):
"""Builds a Symmetric Rank One approximation from optimization history."""
EPSILON = 1e-8
[docs] @staticmethod
def iterate_approximation(b_mat, s_k, y_k, scaling=False):
"""SR1 iteration.
:param b_mat: previous approximation
:param s_k: design variable difference between iterates
:param y_k: gradients difference between iterates
:param scaling: do scaling step
:returns: updated approximation
"""
d_mat = y_k - multi_dot((b_mat, s_k))
den = multi_dot((d_mat.T, s_k))
if abs(den) > SR1Approx.EPSILON * norm(s_k) * norm(d_mat):
b_mat[:, :] = b_mat + multi_dot((d_mat, d_mat.T)) / den
else:
LOGGER.debug(
"Denominator of SR1 update is too small, " "update s_kipped %s.", den
)
[docs]class LSTSQApprox(HessianApproximation):
"""Builds a Least squares approximation from optimization history."""
[docs] def build_approximation(
self,
funcname,
save_diag=False,
first_iter=0,
last_iter=-1,
b_mat0=None,
at_most_niter=-1,
return_x_grad=False,
scaling=False,
func_index=-1,
normalize_design_space=False,
design_space=None,
):
"""Builds the hessian approximation.
:param funcname: function name
:param save_diag: if True, returns the list of diagonal approximations
(Default value = False)
:param first_iter: first iteration after which the history is extracted
(Default value = 0)
:param last_iter: last iteration before which the history is extracted
(Default value = -1)
:param b_mat0: initial approximation matrix (Default value = None)
:param at_most_niter: Default value = -1)
:param return_x_grad: Default value = False)
:param func_index: Default value = -1)
:param normalize_design_space: if True, scale the values
to work in a normalized design space (x between 0 and 1)
:param design_space: the design space used to scale all values
mandatory if normalize_design_space==True
:returns: the B matrix, its diagonal,
and eventually the x and grad history pairs
used to build B, if return_x_grad=True,
otherwise, None and None are returned for args consistency
"""
x_hist, x_grad_hist, n_iter, nparam = self.get_x_grad_history(
funcname,
first_iter,
last_iter,
at_most_niter,
func_index=func_index,
normalize_design_space=normalize_design_space,
design_space=design_space,
)
sec_dim = max(nparam, n_iter)
diag = []
assert len(x_grad_hist) == len(x_hist)
def y_to_b(y_vars):
"""Reshapes the approximation from vector to matrix.
:param y_vars: the vector approximation
:param y_vars: returns: the matrix shaped approximation
:returns: the matrix shaped approximation
"""
y_mat = y_vars.reshape((nparam, nparam))
return y_mat + y_mat.T
def func(y_vars):
"""Create the least square function.
:param y_vars: the current approximation vector
:param y_vars: returns: the estimated error vector
:returns: the estimated error vector
"""
b_mat_current = y_to_b(y_vars)
err = zeros((nparam, sec_dim))
for i in range(n_iter):
x_l = x_hist[i] - self.x_ref
err[:, i] = dot(b_mat_current, x_l) - x_grad_hist[i]
err = err.reshape(nparam * sec_dim)
if n_iter < nparam: #
err += y_vars
return err
y_0 = zeros(nparam * nparam)
LOGGER.debug("Start least squares problem..")
y_opt, ier = leastsq(func, x0=y_0) # , cov_x, infodict, mesg, ier
LOGGER.debug("End least squares, msg=%s", str(ier))
b_mat = y_to_b(y_opt)
if return_x_grad:
return b_mat, diag, x_hist[-1, :], x_grad_hist[-1, :]
return b_mat, diag, None, None