Source code for gemseo_mma.opt.core.mma

# Copyright 2021 IRT Saint Exupéry, https://www.irt-saintexupery.com
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public
# License version 3 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
"""GCMMA-MMA-Python. This file is part of GCMMA-MMA-Python.

GCMMA-MMA-Python is licensed under the terms of GNU General Public License as published
by the Free Software Foundation. For more information and the LICENSE file,
see [here](https://github.com/arjendeetman/GCMMA-MMA-Python).
The original work is written by Krister Svanberg in MATLAB.
This is the python version of the code written Arjen Deetman. version 09-11-2019.

MMA optimizer.

Original work written by Krister Svanberg in Matlab.
This is the python version of the code written by Arjen Deetman.
"""

from __future__ import annotations

import numpy as np
from numpy import ndarray
from numpy.linalg import lstsq
from scipy.linalg import solve  # or use numpy: from numpy.linalg import solve
from scipy.sparse import diags  # or use numpy: from numpy import diag as diags


# Function for the MMA sub problem

[docs]
def solve_mma_local_approximation_problem(
    m: int,
    n: int,
    n_iterations: int,
    xval: ndarray,
    xmin: ndarray,
    xmax: ndarray,
    xold1: ndarray,
    xold2: ndarray,
    f0val: ndarray,
    df0dx: ndarray,
    fval: ndarray,
    dfdx: ndarray,
    low: ndarray,
    upp: ndarray,
    a0: float,
    a: ndarray,
    c: ndarray,
    d: ndarray,
    move: float,
    external_move_limit: float = 10.0,
    internal_limit: float = 0.01,
    asyinit: float = 0.5,
    asyincr: float = 1.2,
    asydecr: float = 0.7,
) -> tuple[
    ndarray,
    ndarray,
    ndarray,
    ndarray,
    ndarray,
    ndarray,
    ndarray,
    ndarray,
    ndarray,
    ndarray,
    ndarray,
]:
    """MMA sub function.

    This function mmasub performs one MMA-iteration, aimed at solving the nonlinear
    programming problem:

    Minimize    f_0(x) + a_0*z + sum( c_i*y_i + 0.5*d_i*(y_i)^2 )
    subject to  f_i(x) - a_i*z - y_i <= 0,  i = 1,...,m
                xmin_j <= x_j <= xmax_j,    j = 1,...,n
                z >= 0,   y_i >= 0,         i = 1,...,m

    Args:
        m: The number of general constraints.
        n: The number of variables x_j.
        n_iterations: The current iteration number
            (=1 the first time mmasub is called).
        xval: The column vector with the current values of the variables x_j.
        xmin: The column vector with the lower bounds for the variables x_j.
        xmax: The column vector with the upper bounds for the variables x_j.
        xold1: The value of xval, one iteration ago (provided that n_iterations>1).
        xold2: The value of xval, two iterations ago (provided that n_iterations>2).
        f0val: The value of the objective function f_0 at xval.
        df0dx: The column vector with the derivatives of the objective function
            f_0 with respect to the variables x_j, calculated at xval.
        fval: The column vector with the values of the constraint functions f_i,
            calculated at xval.
        dfdx: The (m x n)-matrix with the derivatives of the constraint functions
            f_i with respect to the variables x_j, calculated at xval.
        low: The column vector with the lower asymptotes from the previous
            iteration (provided that n_iterations>1).
        upp: The column vector with the upper asymptotes from the previous
            iteration (provided that n_iterations>1).
        a0: The constants a_0 in the term a_0*z.
        a: The column vector with the constants a_i in the terms a_i*z.
        c: The column vector with the constants c_i in the terms c_i*y_i.
        d: The column vector with the constants d_i in the terms 0.5*d_i*(y_i)^2.
        move: The maximum optimization step.
        external_move_limit: The maximum distance of the asymptotes from the current
            design variable value.
        internal_limit: The minimum distance of the asymptotes from the current design
            variable value.
        asyinit: The initial asymptotes distance from the current design variable value.
        asyincr: The incremental factor for successful iterations.
        asydecr: The decremental factor for unsuccessful iterations.

    Returns:
        The Column vector with the optimal values of the variables x_j
        in the current MMA subproblem.

        The Column vector with the optimal values of the variables y_i
        in the current MMA subproblem.

        The Scalar with the optimal value of the variable z
        in the current MMA subproblem.

        The Lagrange multipliers for the m general MMA constraints.

        The Lagrange multipliers for the n constraints alfa_j - x_j <= 0.

        The Lagrange multipliers for the n constraints x_j - beta_j <= 0.

        The Lagrange multipliers for the m constraints -y_i <= 0.

        The Lagrange multiplier for the single constraint -z <= 0.

        The Slack variables for the m general MMA constraints.

        The Column vector with the lower asymptotes, calculated and used
        in the current MMA subproblem.

        The Column vector with the upper asymptotes, calculated and used
        in the current MMA subproblem.
    """
    epsimin = 0.0000001
    raa0 = 0.00001
    albefa = 0.1
    eeen = np.ones((n, 1))
    eeem = np.ones((m, 1))
    zeron = np.zeros((n, 1))
    # Calculation of the asymptotes low and upp
    if n_iterations <= 2:
        low = xval - asyinit * (xmax - xmin)
        upp = xval + asyinit * (xmax - xmin)
    else:
        zzz = (xval - xold1) * (xold1 - xold2)
        factor = eeen.copy()
        factor[np.where(zzz > 0)] = asyincr
        factor[np.where(zzz < 0)] = asydecr
        low = xval - factor * (xold1 - low)
        upp = xval + factor * (upp - xold1)
        lowmin = xval - external_move_limit * (xmax - xmin)
        lowmax = xval - internal_limit * (xmax - xmin)
        uppmin = xval + internal_limit * (xmax - xmin)
        uppmax = xval + external_move_limit * (xmax - xmin)
        low = np.maximum(low, lowmin)
        low = np.minimum(low, lowmax)
        upp = np.minimum(upp, uppmax)
        upp = np.maximum(upp, uppmin)
    # Calculation of the bounds alfa and beta
    zzz1 = low + albefa * (xval - low)
    zzz2 = xval - move * (xmax - xmin)
    zzz = np.maximum(zzz1, zzz2)
    alfa = np.maximum(zzz, xmin)
    zzz1 = upp - albefa * (upp - xval)
    zzz2 = xval + move * (xmax - xmin)
    zzz = np.minimum(zzz1, zzz2)
    beta = np.minimum(zzz, xmax)
    # Calculations of p0, q0, pp, qq and b
    xmami = xmax - xmin
    xmamieps = 0.00001 * eeen
    xmami = np.maximum(xmami, xmamieps)
    xmamiinv = eeen / xmami
    ux1 = upp - xval
    ux2 = ux1 * ux1
    xl1 = xval - low
    xl2 = xl1 * xl1
    uxinv = eeen / ux1
    xlinv = eeen / xl1
    p0 = zeron.copy()
    q0 = zeron.copy()
    p0 = np.maximum(df0dx, 0)
    q0 = np.maximum(-df0dx, 0)
    pq0 = 0.001 * (p0 + q0) + raa0 * xmamiinv
    p0 = p0 + pq0
    q0 = q0 + pq0
    p0 = p0 * ux2
    q0 = q0 * xl2
    pp = np.zeros((m, n))
    qq = np.zeros((m, n))
    pp = np.maximum(dfdx, 0)
    qq = np.maximum(-dfdx, 0)
    ppqq = 0.001 * (pp + qq) + raa0 * np.dot(eeem, xmamiinv.T)
    pp = pp + ppqq
    qq = qq + ppqq
    pp = (diags(ux2.flatten(), 0).dot(pp.T)).T
    qq = (diags(xl2.flatten(), 0).dot(qq.T)).T
    b = np.dot(pp, uxinv) + np.dot(qq, xlinv) - fval
    xmma, ymma, zmma, lam, xsi, eta, mu, zet, s = __subsolv(
        m, n, epsimin, low, upp, alfa, beta, p0, q0, pp, qq, a0, a, b, c, d
    )
    # Return values
    return xmma, ymma, zmma, lam, xsi, eta, mu, zet, s, low, upp



# Function for solving the subproblem (can be used for MMA)
def __subsolv(
    m: int,
    n: int,
    epsimin: float,
    low: ndarray,
    upp: ndarray,
    alfa: ndarray,
    beta: ndarray,
    p0: ndarray,
    q0: ndarray,
    pp: ndarray,
    qq: ndarray,
    a0: ndarray,
    a: ndarray,
    b: ndarray,
    c: ndarray,
    d: ndarray,
) -> tuple[
    ndarray,
    ndarray,
    ndarray,
    ndarray,
    ndarray,
    ndarray,
    ndarray,
    ndarray,
    ndarray,
]:
    """Solve the subproblem by a primal-dual Newton method.

    This function subsolv solves the MMA subproblem:

    minimize SUM[p0j/(uppj-xj) + q0j/(xj-lowj)] + a0*z + SUM[ci*yi + 0.5*di*(yi)^2],

    subject to SUM[pij/(uppj-xj) + qij/(xj-lowj)] - ai*z - yi <= bi,
        alfaj <=  xj <=  betaj,  yi >= 0,  z >= 0.

    Args:
        m: The number of general constraints.
        n: The number of variables x_j.
        epsimin:The convergence tolerance of the local approximation sub problem.
        low: The column vector with the lower asymptotes, calculated and used in the
            current MMA subproblem.
        upp: The column vector with the upper asymptotes, calculated and used in the
            current MMA subproblem.
        alfa: The column vector of local lower bound alfa_j.
        beta: The column vector of local upper bound beta_j.
        p0: The column vector of p0_j.
        q0: The column vector of q0_j.
        pp: The m x n matrix of p_ij.
        qq: The m x n matrix of q_ij.
        a0: The constants a_0 in the term a_0*z.
        a: The column vector with the constants a_i in the terms a_i*z.
        c: The column vector with the constants c_i in the terms c_i*y_i.
        d: The column vector with the constants d_i in the terms 0.5*d_i*(y_i)^2.
        b: The column vector of b_i.

    Returns:
        All the unkowns of the subproblem.
    """
    een = np.ones((n, 1))
    eem = np.ones((m, 1))
    epsi = 1
    x = 0.5 * (alfa + beta)
    y = eem.copy()
    z = np.array([[1.0]])
    lam = eem.copy()
    xsi = een / (x - alfa)
    xsi = np.maximum(xsi, een)
    eta = een / (beta - x)
    eta = np.maximum(eta, een)
    mu = np.maximum(eem, 0.5 * c)
    zet = np.array([[1.0]])
    s = eem.copy()
    itera = 0
    # Start while epsi>epsimin
    while epsi > epsimin:
        epsvecn = epsi * een
        epsvecm = epsi * eem
        ux1 = upp - x
        xl1 = x - low
        ux2 = ux1 * ux1
        xl2 = xl1 * xl1
        uxinv1 = een / ux1
        xlinv1 = een / xl1
        plam = p0 + np.dot(pp.T, lam)
        qlam = q0 + np.dot(qq.T, lam)
        gvec = np.dot(pp, uxinv1) + np.dot(qq, xlinv1)
        dpsidx = plam / ux2 - qlam / xl2
        rex = dpsidx - xsi + eta
        rey = c + d * y - mu - lam
        rez = a0 - zet - np.dot(a.T, lam)
        relam = gvec - a * z - y + s - b
        rexsi = xsi * (x - alfa) - epsvecn
        reeta = eta * (beta - x) - epsvecn
        remu = mu * y - epsvecm
        rezet = zet * z - epsi
        res = lam * s - epsvecm
        residu1 = np.concatenate((rex, rey, rez), axis=0)
        residu2 = np.concatenate((relam, rexsi, reeta, remu, rezet, res), axis=0)
        residu = np.concatenate((residu1, residu2), axis=0)
        residunorm = np.sqrt((np.dot(residu.T, residu)).item())
        residumax = np.max(np.abs(residu))
        ittt = 0
        # Start while (residumax>0.9*epsi) and (ittt<200)
        while (residumax > 0.9 * epsi) and (ittt < 200):
            ittt = ittt + 1
            itera = itera + 1
            ux1 = upp - x
            xl1 = x - low
            ux2 = ux1 * ux1
            xl2 = xl1 * xl1
            ux3 = ux1 * ux2
            xl3 = xl1 * xl2
            uxinv1 = een / ux1
            xlinv1 = een / xl1
            uxinv2 = een / ux2
            xlinv2 = een / xl2
            plam = p0 + np.dot(pp.T, lam)
            qlam = q0 + np.dot(qq.T, lam)
            gvec = np.dot(pp, uxinv1) + np.dot(qq, xlinv1)
            gg = (diags(uxinv2.flatten(), 0).dot(pp.T)).T - (
                diags(xlinv2.flatten(), 0).dot(qq.T)
            ).T
            dpsidx = plam / ux2 - qlam / xl2
            delx = dpsidx - epsvecn / (x - alfa) + epsvecn / (beta - x)
            dely = c + d * y - lam - epsvecm / y
            delz = a0 - np.dot(a.T, lam) - epsi / z
            dellam = gvec - a * z - y - b + epsvecm / lam
            diagx = plam / ux3 + qlam / xl3
            diagx = 2 * diagx + xsi / (x - alfa) + eta / (beta - x)
            diagxinv = een / diagx
            diagy = d + mu / y
            diagyinv = eem / diagy
            diaglam = s / lam
            diaglamyi = diaglam + diagyinv
            # Start if m<n
            if m < n:
                blam = dellam + dely / diagy - np.dot(gg, (delx / diagx))
                bb = np.concatenate((blam, delz), axis=0)
                alam = np.asarray(
                    diags(diaglamyi.flatten(), 0)
                    + (diags(diagxinv.flatten(), 0).dot(gg.T).T).dot(gg.T)
                )
                aar1 = np.concatenate((alam, a), axis=1)
                aar2 = np.concatenate((a, -zet / z), axis=0).T
                aa = np.concatenate((aar1, aar2), axis=0)
                solut = solve(aa, bb)
                dlam = solut[0:m]
                dz = solut[m : m + 1]
                dx = -delx / diagx - np.dot(gg.T, dlam) / diagx
            else:
                diaglamyiinv = eem / diaglamyi
                dellamyi = dellam + dely / diagy
                axx = np.asarray(
                    diags(diagx.flatten(), 0)
                    + (diags(diaglamyiinv.flatten(), 0).dot(gg).T).dot(gg)
                )
                azz = zet / z + np.dot(a.T, (a / diaglamyi))
                axz = np.dot(-gg.T, (a / diaglamyi))
                bx = delx + np.dot(gg.T, (dellamyi / diaglamyi))
                bz = delz - np.dot(a.T, (dellamyi / diaglamyi))
                aar1 = np.concatenate((axx, axz), axis=1)
                aar2 = np.concatenate((axz.T, azz), axis=1)
                aa = np.concatenate((aar1, aar2), axis=0)
                bb = np.concatenate((-bx, -bz), axis=0)
                solut, _i, _j, _k = lstsq(aa, bb, rcond=-1)
                dx = solut[0:n]
                dz = solut[n : n + 1]
                dlam = (
                    np.dot(gg, dx) / diaglamyi
                    - dz * (a / diaglamyi)
                    + dellamyi / diaglamyi
                )
                # End if m<n
            dy = -dely / diagy + dlam / diagy
            dxsi = -xsi + epsvecn / (x - alfa) - (xsi * dx) / (x - alfa)
            deta = -eta + epsvecn / (beta - x) + (eta * dx) / (beta - x)
            dmu = -mu + epsvecm / y - (mu * dy) / y
            dzet = -zet + epsi / z - zet * dz / z
            ds = -s + epsvecm / lam - (s * dlam) / lam
            xx = np.concatenate((y, z, lam, xsi, eta, mu, zet, s), axis=0)
            dxx = np.concatenate((dy, dz, dlam, dxsi, deta, dmu, dzet, ds), axis=0)
            #
            stepxx = -1.01 * dxx / xx
            stmxx = np.max(stepxx)
            stepalfa = -1.01 * dx / (x - alfa)
            stmalfa = np.max(stepalfa)
            stepbeta = 1.01 * dx / (beta - x)
            stmbeta = np.max(stepbeta)
            stmalbe = max(stmalfa, stmbeta)
            stmalbexx = max(stmalbe, stmxx)
            stminv = max(stmalbexx, 1.0)
            steg = 1.0 / stminv
            #
            xold = x.copy()
            yold = y.copy()
            zold = z.copy()
            lamold = lam.copy()
            xsiold = xsi.copy()
            etaold = eta.copy()
            muold = mu.copy()
            zetold = zet.copy()
            sold = s.copy()
            #
            itto = 0
            resinew = 2 * residunorm
            # Start: while (resinew>residunorm) and (itto<50)
            while (resinew > residunorm) and (itto < 50):
                itto = itto + 1
                x = xold + steg * dx
                y = yold + steg * dy
                z = zold + steg * dz
                lam = lamold + steg * dlam
                xsi = xsiold + steg * dxsi
                eta = etaold + steg * deta
                mu = muold + steg * dmu
                zet = zetold + steg * dzet
                s = sold + steg * ds
                ux1 = upp - x
                xl1 = x - low
                ux2 = ux1 * ux1
                xl2 = xl1 * xl1
                uxinv1 = een / ux1
                xlinv1 = een / xl1
                plam = p0 + np.dot(pp.T, lam)
                qlam = q0 + np.dot(qq.T, lam)
                gvec = np.dot(pp, uxinv1) + np.dot(qq, xlinv1)
                dpsidx = plam / ux2 - qlam / xl2
                rex = dpsidx - xsi + eta
                rey = c + d * y - mu - lam
                rez = a0 - zet - np.dot(a.T, lam)
                relam = gvec - np.dot(a, z) - y + s - b
                rexsi = xsi * (x - alfa) - epsvecn
                reeta = eta * (beta - x) - epsvecn
                remu = mu * y - epsvecm
                rezet = np.dot(zet, z) - epsi
                res = lam * s - epsvecm
                residu1 = np.concatenate((rex, rey, rez), axis=0)
                residu2 = np.concatenate(
                    (relam, rexsi, reeta, remu, rezet, res), axis=0
                )
                residu = np.concatenate((residu1, residu2), axis=0)
                resinew = np.sqrt(np.dot(residu.T, residu))
                steg = steg / 2
                # End: while (resinew>residunorm) and (itto<50)
            residunorm = resinew.copy()
            residumax = max(abs(residu))
            steg = 2 * steg
            # End: while (residumax>0.9*epsi) and (ittt<200)
        epsi = 0.1 * epsi
        # End: while epsi>epsimin
    xmma = x.copy()
    ymma = y.copy()
    zmma = z.copy()
    lamma = lam
    xsimma = xsi
    etamma = eta
    mumma = mu
    zetmma = zet
    smma = s
    # Return values
    return xmma, ymma, zmma, lamma, xsimma, etamma, mumma, zetmma, smma


# Function for Karush-Kuhn-Tucker check

[docs]
def compute_kkt_residual_on_local_approximation(
    m: int,
    n: int,
    x: ndarray,
    y: ndarray,
    z: ndarray,
    lam: ndarray,
    xsi: ndarray,
    eta: ndarray,
    mu: ndarray,
    zet: ndarray,
    s: ndarray,
    xmin: ndarray,
    xmax: ndarray,
    df0dx: ndarray,
    fval: ndarray,
    dfdx: ndarray,
    a0: float,
    a: ndarray,
    c: ndarray,
    d: ndarray,
) -> tuple[ndarray, ndarray, ndarray]:
    """KKT residual computation.

    The left hand sides of the KKT conditions for the following nonlinear programming
    problem are calculated.

    Minimize f_0(x) + a_0*z + sum(c_i*y_i + 0.5*d_i*(y_i)^2)
    subject to  f_i(x) - a_i*z - y_i <= 0,   i = 1,...,m
                xmax_j <= x_j <= xmin_j,     j = 1,...,n
                z >= 0,   y_i >= 0,          i = 1,...,m

    Args:
        m: The number of general constraints.
        n: The number of variables x_j.
        x: The current values of the n variables x_j.
        y: The current values of the m variables y_i.
        z: The current value of the single variable z.
        lam: The Lagrange multipliers for the m general constraints.
        xsi: The Lagrange multipliers for the n constraints xmin_j - x_j <= 0.
        eta: The Lagrange multipliers for the n constraints x_j - xmax_j <= 0.
        mu:  The Lagrange multipliers for the m constraints -y_i <= 0.
        zet: The Lagrange multiplier for the single constraint -z <= 0.
        s: The Slack variables for the m general constraints.
        xmin: The Lower bounds for the variables x_j.
        xmax: The Upper bounds for the variables x_j.
        df0dx: The vector with the derivatives of the objective function f_0 with
            respect to the variables x_j, calculated at x.
        fval: The vector with the values of the constraint functions f_i, calculated
        at x.
        dfdx: The (m x n)-matrix with the derivatives of the constraint functions f_i
            with respect to the variables x_j, calculated at x.
            dfdx(i,j) = the derivative of f_i with respect to x_j.
        a0: The constants a_0 in the term a_0*z.
        a: The vector with the constants a_i in the terms a_i*z.
        c: The vector with the constants c_i in the terms c_i*y_i.
        d: The vector with the constants d_i in the terms 0.5*d_i*(y_i)^2.

    Returns:
        The vector residual, its norm and its maximum.
    """
    rex = df0dx + np.dot(dfdx.T, lam) - xsi + eta
    rey = c + d * y - mu - lam
    rez = a0 - zet - np.dot(a.T, lam)
    relam = fval - a * z - y + s
    rexsi = xsi * (x - xmin)
    reeta = eta * (xmax - x)
    remu = mu * y
    rezet = zet * z
    res = lam * s
    residu1 = np.concatenate((rex, rey, rez), axis=0)
    residu2 = np.concatenate((relam, rexsi, reeta, remu, rezet, res), axis=0)
    residu = np.concatenate((residu1, residu2), axis=0)
    residunorm = np.sqrt((np.dot(residu.T, residu)).item())
    residumax = np.max(np.abs(residu))
    return residu, residunorm, residumax