Source code for gemseo_mlearning.regression.ot_gpr

# Copyright 2021 IRT Saint Exupéry, https://www.irt-saintexupery.com
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License version 3 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
"""Gaussian process regression model from OpenTURNS."""
from __future__ import annotations

from typing import ClassVar
from typing import Iterable

from gemseo.core.dataset import Dataset
from gemseo.mlearning.core.ml_algo import DataType
from gemseo.mlearning.core.ml_algo import TransformerType
from gemseo.mlearning.regression.regression import MLRegressionAlgo
from gemseo.utils.data_conversion import (
    concatenate_dict_of_arrays_to_array,
)
from gemseo.utils.python_compatibility import Final
from numpy import array
from numpy import atleast_2d
from numpy import diag
from numpy import ndarray
from openturns import ConstantBasisFactory
from openturns import KrigingAlgorithm
from openturns import Point
from openturns import ResourceMap
from openturns import SquaredExponential


[docs]class OTGaussianProcessRegressor(MLRegressionAlgo):
    """Gaussian process regression model from OpenTURNS."""

    LIBRARY: Final[str] = "OpenTURNS"
    SHORT_ALGO_NAME: ClassVar[str] = "GPR"

    MAX_SIZE_FOR_LAPACK: ClassVar[int] = 100
    """The maximum size of the learning dataset to use LAPACK as linear algebra library.

    Use HMAT otherwise.
    """

    HMATRIX_ASSEMBLY_EPSILON: ClassVar[float] = 1e-5
    """The epsilon used for the assembly of the H-matrix.

    Used when ``use_hmat`` is ``True``.
    """

    HMATRIX_RECOMPRESSION_EPSILON: ClassVar[float] = 1e-4
    """The epsilon used for the recompression of the H-matrix.

    Used when ``use_hmat`` is ``True``.
    """

    def __init__(
        self,
        data: Dataset,
        transformer: TransformerType | None = None,
        input_names: Iterable[str] = None,
        output_names: Iterable[str] = None,
        use_hmat: bool = None,
    ) -> None:
        """# noqa: D205 D212 D415
        Args:
            use_hmat: Whether to use the HMAT or LAPACK as linear algebra method.
                If ``None``,
                use HMAT when the learning size is greater
                than :attr:`MAX_SIZE_FOR_LAPACK`.
        """
        super().__init__(
            data,
            transformer=transformer,
            input_names=input_names,
            output_names=output_names,
            use_hmat=use_hmat,
        )
        dimension = data.dimension[data.INPUT_GROUP]
        self.__covariance_model = SquaredExponential([0.1] * dimension, [1.0])
        self.__covariance_model.setActiveParameter([])
        self.__basis = ConstantBasisFactory(dimension).build()
        self.__use_hmat = None
        if use_hmat is None:
            self.use_hmat = len(data) > self.MAX_SIZE_FOR_LAPACK
        else:
            self.use_hmat = use_hmat

    @property
    def use_hmat(self) -> bool:
        """Whether to use the HMAT linear algebra method or LAPACK."""
        return self.__use_hmat

    @use_hmat.setter
    def use_hmat(self, use_hmat: bool) -> None:
        self.__use_hmat = use_hmat
        if use_hmat:
            linear_algebra_method = "HMAT"
            ResourceMap.SetAsScalar(
                "HMatrix-AssemblyEpsilon", self.HMATRIX_ASSEMBLY_EPSILON
            )
            ResourceMap.SetAsScalar(
                "HMatrix-RecompressionEpsilon", self.HMATRIX_RECOMPRESSION_EPSILON
            )
        else:
            linear_algebra_method = "LAPACK"
        ResourceMap.SetAsString("KrigingAlgorithm-LinearAlgebra", linear_algebra_method)

    def _fit(self, input_data: ndarray, output_data: ndarray) -> None:
        algo = KrigingAlgorithm(
            input_data, output_data, self.__covariance_model, self.__basis
        )
        algo.run()
        self.algo = algo.getResult()

    def _predict(self, input_data: ndarray) -> ndarray:
        return atleast_2d(self.algo.getConditionalMean(input_data))

[docs]    def predict_std(self, input_data: DataType) -> ndarray:
        """Predict the standard deviation from input data.

        Args:
            input_data: The input data with shape (n_samples, n_inputs).

        Returns:
            output_data: The output data with shape (n_samples, n_outputs).
        """
        if isinstance(input_data, dict):
            input_data = concatenate_dict_of_arrays_to_array(
                input_data, self.input_names
            )

        one_dim = input_data.ndim == 1
        input_data = atleast_2d(input_data)
        inputs = self.learning_set.INPUT_GROUP
        if inputs in self.transformer:
            input_data = self.transformer[inputs].transform(input_data)

        output_data = array(
            [
                diag(self.algo.getConditionalCovariance(input_datum)).tolist()
                for input_datum in input_data
            ]
        )

        if one_dim:
            return output_data[0]

        return output_data

    def _predict_jacobian(self, input_data: ndarray) -> ndarray:
        gradient = self.algo.getMetaModel().gradient
        return array([array(gradient(Point(data))).T for data in input_data])