Source code for gemseo.core.surrogate_disc

# -*- coding: utf-8 -*-
# Copyright 2021 IRT Saint Exupéry, https://www.irt-saintexupery.com
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License version 3 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

# Contributors:
#    INITIAL AUTHORS - initial API and implementation and/or initial
#                         documentation
#        :author: Matthias De Lozzo
#    OTHER AUTHORS   - MACROSCOPIC CHANGES
"""Surrogate discipline."""
from __future__ import division, unicode_literals

import logging
from typing import Dict, Iterable, Mapping, Optional, Union

from numpy import ndarray

from gemseo.core.dataset import Dataset
from gemseo.core.discipline import MDODiscipline
from gemseo.core.jacobian_assembly import JacobianAssembly
from gemseo.mlearning.core.ml_algo import MLAlgoParameterType, TransformerType
from gemseo.mlearning.regression.factory import RegressionModelFactory
from gemseo.mlearning.regression.regression import MLRegressionAlgo
from gemseo.utils.string_tools import MultiLineString, pretty_repr

LOGGER = logging.getLogger(__name__)


[docs]class SurrogateDiscipline(MDODiscipline): """A :class:`.MDODiscipline` approximating another one with a surrogate model. This surrogate model is a regression model implemented as a :class:`.MLRegressionAlgo`. This :class:`.MLRegressionAlgo` is built from an input- output :class:`.Dataset` composed of evaluations of the original discipline. """ _ATTR_TO_SERIALIZE = MDODiscipline._ATTR_TO_SERIALIZE + ("regression_model",) def __init__( self, surrogate, # type: Union[str,MLRegressionAlgo] data=None, # type: Optional[Dataset] transformer=MLRegressionAlgo.DEFAULT_TRANSFORMER, # type: Optional[TransformerType] disc_name=None, # type: Optional[str] default_inputs=None, # type: Optional[Dict[str,ndarray]] input_names=None, # type: Optional[Iterable[str]] output_names=None, # type: Optional[Iterable[str]] **parameters # type: MLAlgoParameterType ): # type: (...) -> None """ Args: surrogate: Either the class name or the instance of the :class:`.MLRegressionAlgo`. data: The learning dataset to train the regression model. If None, the regression model is supposed to be trained. transformer: The strategies to transform the variables. The values are instances of :class:`.Transformer` while the keys are the names of either the variables or the groups of variables, e.g. "inputs" or "outputs" in the case of the regression algorithms. If a group is specified, the :class:`.Transformer` will be applied to all the variables of this group. If None, do not transform the variables. The :attr:`.MLRegressionAlgo.DEFAULT_TRANSFORMER` uses the :class:`.MinMaxScaler` strategy for both input and output variables. disc_name: The name to be given to the surrogate discipline. If None, concatenate :attr:`.ABBR` and ``data.name``. default_inputs: The default values of the inputs. If None, use the center of the learning input space. input_names: The names of the input variables. If None, consider all input variables mentioned in the learning dataset. output_names: The names of the output variables. If None, consider all input variables mentioned in the learning dataset. **parameters: The parameters of the machine learning algorithm. Raises: ValueError: If the learning dataset is missing whilst the regression model is not trained. """ if isinstance(surrogate, MLRegressionAlgo): self.regression_model = surrogate name = self.regression_model.learning_set.name elif data is None: raise ValueError("data is required to train the surrogate model.") else: factory = RegressionModelFactory() self.regression_model = factory.create( surrogate, data=data, transformer=transformer, input_names=input_names, output_names=output_names, **parameters ) name = "{}_{}".format(self.regression_model.ABBR, data.name) disc_name = disc_name or name if not self.regression_model.is_trained: self.regression_model.learn() msg = MultiLineString() msg.add("Build the surrogate discipline: {}", disc_name) msg.indent() msg.add("Dataset name: {}", data.name) msg.add("Dataset size: {}", data.length) msg.add("Surrogate model: {}", self.regression_model.__class__.__name__) LOGGER.info("%s", msg) if not name.startswith(self.regression_model.ABBR): disc_name = "{}_{}".format(self.regression_model.ABBR, disc_name) msg = MultiLineString() msg.add("Use the surrogate discipline: {}", disc_name) msg.indent() super(SurrogateDiscipline, self).__init__(disc_name) self._initialize_grammars(input_names, output_names) msg.add("Inputs: {}", pretty_repr(self.get_input_data_names())) msg.add("Outputs: {}", pretty_repr(self.get_output_data_names())) self._set_default_inputs(default_inputs) self.add_differentiated_inputs() self.add_differentiated_outputs() try: self.regression_model.predict_jacobian(self.default_inputs) self.linearization_mode = JacobianAssembly.AUTO_MODE msg.add("Jacobian: use surrogate model jacobian") except NotImplementedError: self.linearization_mode = self.FINITE_DIFFERENCES msg.add("Jacobian: use finite differences") LOGGER.info("%s", msg) def __repr__(self): # type: (...) -> str model = self.regression_model.__class__.__name__ data_name = self.regression_model.learning_set.name length = len(self.regression_model.learning_set) inputs = sorted(self.regression_model.input_names) outputs = sorted(self.regression_model.output_names) arguments = [ "name={}".format(self.name), "algo={}".format(model), "data={}".format(data_name), "size={}".format(length), "inputs=[{}]".format(pretty_repr(inputs)), "outputs=[{}]".format(pretty_repr(outputs)), "jacobian={}".format(self.linearization_mode), ] msg = "SurrogateDiscipline({})".format(", ".join(arguments)) return msg def __str__(self): # type: (...) -> str data_name = self.regression_model.learning_set.name length = len(self.regression_model.learning_set) msg = MultiLineString() msg.add("Surrogate discipline: {}", self.name) msg.indent() msg.add("Dataset name: {}", data_name) msg.add("Dataset size: {}", length) msg.add("Surrogate model: {}", self.regression_model.__class__.__name__) inputs = sorted(self.regression_model.input_names) outputs = sorted(self.regression_model.output_names) msg.add("Inputs: {}", pretty_repr(inputs)) msg.add("Outputs: {}", pretty_repr(outputs)) return str(msg) def _initialize_grammars( self, input_names=None, # type: Optional[Iterable[str]] output_names=None, # type: Optional[Iterable[str]] ): # type: (...) -> None """Initialize the input and output grammars from the regression model. Args: input_names: The names of the inputs to consider. If None, use all the inputs of the regression model. output_names: The names of the inputs to consider. If None, use all the inputs of the regression model. """ self.input_grammar.initialize_from_data_names( input_names or self.regression_model.input_names ) self.output_grammar.initialize_from_data_names( output_names or self.regression_model.output_names ) def _set_default_inputs( self, default_inputs=None, # type: Mapping[str,ndarray] ): # type: (...) -> None """Set the default values of the inputs. Args: default_inputs: The default values of the inputs. If None, use the the center of the learning input space. """ if default_inputs is None: self._default_inputs = self.regression_model.input_space_center else: self._default_inputs = default_inputs def _run(self): # type: (...) -> None input_data = self.get_input_data() output_data = self.regression_model.predict(input_data) output_data = {key: val.flatten() for key, val in output_data.items()} self.local_data.update(output_data) def _compute_jacobian( self, inputs=None, # type: Optional[Iterable[str]], outputs=None, # type: Optional[Iterable[str]] ): # type: (...) -> None input_data = self.get_input_data() self._init_jacobian(inputs, outputs) self.jac = self.regression_model.predict_jacobian(input_data)