# -*- coding: utf-8 -*-
# Copyright 2021 IRT Saint Exupéry, https://www.irt-saintexupery.com
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License version 3 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
# Contributors:
# INITIAL AUTHORS - initial API and implementation and/or initial
# documentation
# :author: Francois Gallard, Matthias De Lozzo
# OTHER AUTHORS - MACROSCOPIC CHANGES
r"""The Gaussian process algorithm for regression.
Overview
--------
The Gaussian process regression (GPR) surrogate model
expresses the model output as a weighted sum of kernel functions
centered on the learning input data:
.. math::
y = \mu
+ w_1\kappa(\|x-x_1\|;\epsilon)
+ w_2\kappa(\|x-x_2\|;\epsilon)
+ ...
+ w_N\kappa(\|x-x_N\|;\epsilon)
Details
-------
The GPR model relies on the assumption
that the original model :math:`f` to replace
is an instance of a Gaussian process (GP) with mean :math:`\mu`
and covariance :math:`\sigma^2\kappa(\|x-x'\|;\epsilon)`.
Then, the GP conditioned by the learning set
:math:`(x_i,y_i)_{1\leq i \leq N}`
is entirely defined by its expectation:
.. math::
\hat{f}(x) = \hat{\mu} + \hat{w}^T k(x)
and its covariance:
.. math::
\hat{c}(x,x') = \hat{\sigma}^2 - k(x)^T K^{-1} k(x')
where :math:`[\hat{\mu};\hat{w}]=([1_N~K]^T[1_N~K])^{-1}[1_N~K]^TY` with
:math:`K_{ij}=\kappa(\|x_i-x_j\|;\hat{\epsilon})`,
:math:`k_i(x)=\kappa(\|x-x_i\|;\hat{\epsilon})`
and :math:`Y_i=y_i`.
The correlation length vector :math:`\epsilon`
is estimated by numerical non-linear optimization.
Surrogate model
---------------
The expectation :math:`\hat{f}` is the GPR surrogate model of :math:`f`.
Error measure
-------------
The standard deviation :math:`\hat{s}` is a local error measure
of :math:`\hat{f}`:
.. math::
\hat{s}(x):=\sqrt{\hat{c}(x,x)}
Interpolation or regression
---------------------------
The GPR surrogate model can be regressive or interpolative
according to the value of the nugget effect :math:`\\alpha\geq 0`
which is a regularization term
applied to the correlation matrix :math:`K`.
When :math:`\alpha = 0`,
the surrogate model interpolates the learning data.
Dependence
----------
The GPR model relies on the GaussianProcessRegressor class
of the `scikit-learn library <https://scikit-learn.org/stable/modules/
generated/sklearn.gaussian_process.GaussianProcessRegressor.html>`_.
"""
from __future__ import division, unicode_literals
import logging
from typing import Callable, Iterable, Optional, Union
import openturns
from numpy import atleast_2d, ndarray
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern
from gemseo.core.dataset import Dataset
from gemseo.mlearning.core.ml_algo import DataType, TransformerType
from gemseo.mlearning.regression.regression import MLRegressionAlgo
from gemseo.utils.data_conversion import DataConversion
LOGGER = logging.getLogger(__name__)
[docs]class GaussianProcessRegression(MLRegressionAlgo):
"""Gaussian process regression."""
LIBRARY = "scikit-learn"
ABBR = "GPR"
def __init__(
self,
data, # type: Dataset
transformer=None, # type: Optional[TransformerType]
input_names=None, # type: Optional[Iterable[str]]
output_names=None, # type: Optional[Iterable[str]]
kernel=None, # type: Optional[openturns.CovarianceModel]
alpha=1e-10, # type: Union[float,ndarray]
optimizer="fmin_l_bfgs_b", # type: Union[str,Callable]
n_restarts_optimizer=10, # type: int
random_state=None, # type: Optional[int]
): # type: (...) -> None
"""
Args:
kernel: The kernel function. If None, use a ``Matern(2.5)``.
alpha: The nugget effect to regularize the model.
optimizer: The optimization algorithm to find the hyperparameters.
n_restarts_optimizer: The number of restarts of the optimizer.
random_state: The seed used to initialize the centers.
If None, the random number generator is the RandomState instance
used by `numpy.random`.
"""
super(GaussianProcessRegression, self).__init__(
data,
transformer=transformer,
input_names=input_names,
output_names=output_names,
kernel=kernel,
alpha=alpha,
optimizer=optimizer,
n_restarts_optimizer=n_restarts_optimizer,
random_state=random_state,
)
if kernel is None:
raw_input_shape, _ = self._get_raw_shapes()
self.kernel = Matern(
(1.0,) * raw_input_shape, [(0.01, 100)] * raw_input_shape, nu=2.5
)
else:
self.kernel = kernel
nro = n_restarts_optimizer
self.algo = GaussianProcessRegressor(
normalize_y=False,
kernel=self.kernel,
copy_X_train=True,
alpha=alpha,
optimizer=optimizer,
n_restarts_optimizer=nro,
random_state=random_state,
)
self.parameters["kernel"] = self.kernel.__class__.__name__
def _fit(
self,
input_data, # type: ndarray
output_data, # type: ndarray
): # type: (...) -> None
self.algo.fit(input_data, output_data)
def _predict(
self,
input_data, # type: ndarray
): # type: (...) -> ndarray
output_pred = self.algo.predict(input_data, False)
return output_pred
[docs] def predict_std(
self,
input_data, # type:DataType
): # type: (...) -> ndarray
"""Predict the standard deviation from input data.
Args:
input_data: The input data with shape (n_samples, n_inputs).
Returns:
output_data: The output data with shape (n_samples, n_outputs).
"""
as_dict = isinstance(input_data, dict)
if as_dict:
input_data = DataConversion.dict_to_array(input_data, self.input_names)
input_data = atleast_2d(input_data)
inputs = self.learning_set.INPUT_GROUP
if inputs in self.transformer:
input_data = self.transformer[inputs].transform(input_data)
_, output_std = self.algo.predict(input_data, True)
return sum(output_std) / len(output_std)