Source code for gemseo_mlearning.regression.ot_gpr

# Copyright 2021 IRT Saint Exupéry, https://www.irt-saintexupery.com
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License version 3 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
"""Gaussian process regression model from OpenTURNS."""
from __future__ import annotations

from typing import ClassVar
from typing import Iterable

from gemseo.core.dataset import Dataset
from gemseo.mlearning.core.ml_algo import DataType
from gemseo.mlearning.core.ml_algo import TransformerType
from gemseo.mlearning.regression.regression import MLRegressionAlgo
from gemseo.utils.data_conversion import (
    concatenate_dict_of_arrays_to_array,
)
from gemseo.utils.python_compatibility import Final
from numpy import array
from numpy import atleast_2d
from numpy import diag
from numpy import ndarray
from openturns import ConstantBasisFactory
from openturns import KrigingAlgorithm
from openturns import Point
from openturns import ResourceMap
from openturns import SquaredExponential


[docs]class OTGaussianProcessRegressor(MLRegressionAlgo): """Gaussian process regression model from OpenTURNS.""" LIBRARY: Final[str] = "OpenTURNS" SHORT_ALGO_NAME: ClassVar[str] = "GPR" MAX_SIZE_FOR_LAPACK: ClassVar[int] = 100 """The maximum size of the learning dataset to use LAPACK as linear algebra library. Use HMAT otherwise. """ HMATRIX_ASSEMBLY_EPSILON: ClassVar[float] = 1e-5 """The epsilon used for the assembly of the H-matrix. Used when ``use_hmat`` is ``True``. """ HMATRIX_RECOMPRESSION_EPSILON: ClassVar[float] = 1e-4 """The epsilon used for the recompression of the H-matrix. Used when ``use_hmat`` is ``True``. """ def __init__( self, data: Dataset, transformer: TransformerType | None = None, input_names: Iterable[str] = None, output_names: Iterable[str] = None, use_hmat: bool = None, ) -> None: """# noqa: D205 D212 D415 Args: use_hmat: Whether to use the HMAT or LAPACK as linear algebra method. If ``None``, use HMAT when the learning size is greater than :attr:`MAX_SIZE_FOR_LAPACK`. """ super().__init__( data, transformer=transformer, input_names=input_names, output_names=output_names, use_hmat=use_hmat, ) dimension = data.dimension[data.INPUT_GROUP] self.__covariance_model = SquaredExponential([0.1] * dimension, [1.0]) self.__covariance_model.setActiveParameter([]) self.__basis = ConstantBasisFactory(dimension).build() self.__use_hmat = None if use_hmat is None: self.use_hmat = len(data) > self.MAX_SIZE_FOR_LAPACK else: self.use_hmat = use_hmat @property def use_hmat(self) -> bool: """Whether to use the HMAT linear algebra method or LAPACK.""" return self.__use_hmat @use_hmat.setter def use_hmat(self, use_hmat: bool) -> None: self.__use_hmat = use_hmat if use_hmat: linear_algebra_method = "HMAT" ResourceMap.SetAsScalar( "HMatrix-AssemblyEpsilon", self.HMATRIX_ASSEMBLY_EPSILON ) ResourceMap.SetAsScalar( "HMatrix-RecompressionEpsilon", self.HMATRIX_RECOMPRESSION_EPSILON ) else: linear_algebra_method = "LAPACK" ResourceMap.SetAsString("KrigingAlgorithm-LinearAlgebra", linear_algebra_method) def _fit(self, input_data: ndarray, output_data: ndarray) -> None: algo = KrigingAlgorithm( input_data, output_data, self.__covariance_model, self.__basis ) algo.run() self.algo = algo.getResult() def _predict(self, input_data: ndarray) -> ndarray: return atleast_2d(self.algo.getConditionalMean(input_data))
[docs] def predict_std(self, input_data: DataType) -> ndarray: """Predict the standard deviation from input data. Args: input_data: The input data with shape (n_samples, n_inputs). Returns: output_data: The output data with shape (n_samples, n_outputs). """ if isinstance(input_data, dict): input_data = concatenate_dict_of_arrays_to_array( input_data, self.input_names ) one_dim = input_data.ndim == 1 input_data = atleast_2d(input_data) inputs = self.learning_set.INPUT_GROUP if inputs in self.transformer: input_data = self.transformer[inputs].transform(input_data) output_data = array( [ diag(self.algo.getConditionalCovariance(input_datum)).tolist() for input_datum in input_data ] ) if one_dim: return output_data[0] return output_data
def _predict_jacobian(self, input_data: ndarray) -> ndarray: gradient = self.algo.getMetaModel().gradient return array([array(gradient(Point(data))).T for data in input_data])