Source code for gemseo.disciplines.surrogate

# Copyright 2021 IRT Saint Exupéry, https://www.irt-saintexupery.com
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License version 3 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
# Contributors:
#    INITIAL AUTHORS - initial API and implementation and/or initial
#                         documentation
#        :author: Matthias De Lozzo
#    OTHER AUTHORS   - MACROSCOPIC CHANGES
"""Surrogate discipline."""
from __future__ import annotations

import logging
from typing import Any
from typing import Iterable
from typing import Mapping

from numpy import ndarray

from gemseo.core.discipline import MDODiscipline
from gemseo.datasets.dataset import Dataset
from gemseo.mlearning.core.ml_algo import MLAlgoParameterType
from gemseo.mlearning.core.ml_algo import TransformerType
from gemseo.mlearning.quality_measures.error_measure import MLErrorMeasure
from gemseo.mlearning.quality_measures.error_measure_factory import (
    MLErrorMeasureFactory,
)
from gemseo.mlearning.regression.factory import RegressionModelFactory
from gemseo.mlearning.regression.regression import MLRegressionAlgo
from gemseo.utils.string_tools import MultiLineString
from gemseo.utils.string_tools import pretty_str

LOGGER = logging.getLogger(__name__)


[docs]class SurrogateDiscipline(MDODiscipline): """A discipline wrapping a regression model built from a dataset. Examples: >>> import numpy as np >>> from gemseo.datasets.io_dataset import IODataset >>> from gemseo.disciplines.surrogate import SurrogateDiscipline >>> >>> # Create an input-output dataset. >>> dataset = IODataset() >>> dataset.add_input_variable("x", np.array([[1.], [2.], [3.]])) >>> dataset.add_output_variable("y", np.array([[3.], [5.], [6.]])) >>> >>> # Build a surrogate discipline relying on a linear regression model. >>> surrogate_discipline = SurrogateDiscipline("LinearRegressor", dataset) >>> >>> # Assess its quality with the R2 measure. >>> r2 = surrogate_discipline.get_error_measure("R2Measure") >>> learning_r2 = r2.evaluate_learn() >>> >>> # Execute the surrogate discipline, with default or custom input values. >>> surrogate_discipline.execute() >>> surrogate_discipline.execute({"x": np.array([1.5])}) """ regression_model: MLRegressionAlgo """The regression model called by the surrogate discipline.""" __error_measure_factory: MLErrorMeasureFactory """The factory of error measures.""" def __init__( self, surrogate: str | MLRegressionAlgo, data: Dataset | None = None, transformer: TransformerType = MLRegressionAlgo.DEFAULT_TRANSFORMER, disc_name: str | None = None, default_inputs: dict[str, ndarray] | None = None, input_names: Iterable[str] | None = None, output_names: Iterable[str] | None = None, **parameters: MLAlgoParameterType, ) -> None: """ Args: surrogate: Either the name of a class deriving from :class:`.MLRegressionAlgo` or the instance of an :class:`.MLRegressionAlgo`. data: The learning dataset to train the regression model. If ``None``, the regression model is supposed to be trained. transformer: The strategies to transform the variables. The values are instances of :class:`.Transformer` while the keys are the names of either the variables or the groups of variables, e.g. ``"inputs"`` or ``"outputs"`` in the case of the regression algorithms. If a group is specified, the :class:`.Transformer` will be applied to all the variables of this group. If :attr:`~.MLAlgo.IDENTITY, do not transform the variables. The :attr:`.MLRegressionAlgo.DEFAULT_TRANSFORMER` uses the :class:`.MinMaxScaler` strategy for both input and output variables. disc_name: The name to be given to the surrogate discipline. If ``None``, concatenate :attr:`.SHORT_ALGO_NAME` and ``data.name``. default_inputs: The default values of the inputs. If ``None``, use the center of the learning input space. input_names: The names of the input variables. If ``None``, consider all input variables mentioned in the learning dataset. output_names: The names of the output variables. If ``None``, consider all input variables mentioned in the learning dataset. **parameters: The parameters of the machine learning algorithm. Raises: ValueError: If the learning dataset is missing whilst the regression model is not trained. """ # noqa: D205, D212, D415 self.__error_measure_factory = MLErrorMeasureFactory() if isinstance(surrogate, MLRegressionAlgo): self.regression_model = surrogate name = self.regression_model.learning_set.name elif data is None: raise ValueError("data is required to train the surrogate model.") else: factory = RegressionModelFactory() self.regression_model = factory.create( surrogate, data=data, transformer=transformer, input_names=input_names, output_names=output_names, **parameters, ) name = f"{self.regression_model.SHORT_ALGO_NAME}_{data.name}" disc_name = disc_name or name if not self.regression_model.is_trained: self.regression_model.learn() msg = MultiLineString() msg.add("Build the surrogate discipline: {}", disc_name) msg.indent() msg.add("Dataset size: {}", data.n_samples) msg.add("Surrogate model: {}", self.regression_model.__class__.__name__) LOGGER.info("%s", msg) if not name.startswith(self.regression_model.SHORT_ALGO_NAME): disc_name = f"{self.regression_model.SHORT_ALGO_NAME}_{disc_name}" msg = MultiLineString() msg.add("Use the surrogate discipline: {}", disc_name) msg.indent() super().__init__(disc_name) self._initialize_grammars(input_names, output_names) msg.add("Inputs: {}", pretty_str(self.get_input_data_names())) msg.add("Outputs: {}", pretty_str(self.get_output_data_names())) self._set_default_inputs(default_inputs) self.add_differentiated_inputs() self.add_differentiated_outputs() try: self.regression_model.predict_jacobian(self.default_inputs) self.linearization_mode = self.LinearizationMode.AUTO msg.add("Jacobian: use surrogate model jacobian") except NotImplementedError: self.linearization_mode = self.LinearizationMode.FINITE_DIFFERENCES msg.add("Jacobian: use finite differences") LOGGER.info("%s", msg) @property def _string_representation(self) -> MultiLineString: """The string representation of the object.""" mls = MultiLineString() mls.add("Surrogate discipline: {}", self.name) mls.indent() mls.add("Dataset name: {}", self.regression_model.learning_set.name) mls.add("Dataset size: {}", len(self.regression_model.learning_set)) mls.add("Surrogate model: {}", self.regression_model.__class__.__name__) mls.add("Inputs: {}", pretty_str(self.regression_model.input_names)) mls.add("Outputs: {}", pretty_str(self.regression_model.output_names)) mls.add("Linearization mode: {}", self.linearization_mode) return mls def __repr__(self) -> str: return str(self._string_representation) def _repr_html_(self) -> str: return self._string_representation._repr_html_() def _initialize_grammars( self, input_names: Iterable[str] | None = None, output_names: Iterable[str] | None = None, ) -> None: """Initialize the input and output grammars from the regression model. Args: input_names: The names of the inputs to consider. If ``None``, use all the inputs of the regression model. output_names: The names of the inputs to consider. If ``None``, use all the inputs of the regression model. """ self.input_grammar.update_from_names( input_names or self.regression_model.input_names ) self.output_grammar.update_from_names( output_names or self.regression_model.output_names ) def _set_default_inputs( self, default_inputs: Mapping[str, ndarray] | None = None, ) -> None: """Set the default values of the inputs. Args: default_inputs: The default values of the inputs. If ``None``, use the center of the learning input space. """ if default_inputs is None: self.default_inputs = self.regression_model.input_space_center else: self.default_inputs = default_inputs def _run(self) -> None: for name, value in self.regression_model.predict(self.get_input_data()).items(): self.local_data[name] = value.flatten() def _compute_jacobian( self, inputs: Iterable[str] | None = None, outputs: Iterable[str] | None = None, ) -> None: self._init_jacobian(inputs, outputs, MDODiscipline.InitJacobianType.EMPTY) self.jac = self.regression_model.predict_jacobian(self.get_input_data())
[docs] def get_error_measure( self, measure_name: str, **measure_options: Any, ) -> MLErrorMeasure: """Return an error measure. Args: measure_name: The class name of the error measure. **measure_options: The options of the error measure. Returns: The error measure. """ return self.__error_measure_factory.create( measure_name, algo=self.regression_model, **measure_options )