Source code for gemseo.core.surrogate_disc

# -*- coding: utf-8 -*-
# Copyright 2021 IRT Saint Exupéry, https://www.irt-saintexupery.com
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License version 3 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

# Contributors:
#    INITIAL AUTHORS - initial API and implementation and/or initial
#                         documentation
#        :author: Matthias De Lozzo
#    OTHER AUTHORS   - MACROSCOPIC CHANGES
"""
Surrogate discipline baseclass
******************************
"""
from __future__ import division, unicode_literals

import logging

from gemseo.core.discipline import MDODiscipline
from gemseo.core.jacobian_assembly import JacobianAssembly
from gemseo.mlearning.regression.factory import RegressionModelFactory
from gemseo.mlearning.regression.regression import MLRegressionAlgo
from gemseo.utils.data_conversion import DataConversion
from gemseo.utils.string_tools import MultiLineString, pretty_repr

LOGGER = logging.getLogger(__name__)


[docs]class SurrogateDiscipline(MDODiscipline): """Surrogate discipline class.""" def __init__( self, surrogate, data=None, transformer=MLRegressionAlgo.DEFAULT_TRANSFORMER, disc_name=None, default_inputs=None, input_names=None, output_names=None, **parameters ): """Constructor. :param surrogate: name of the surrogate model algorithm. :type surrogate: str or MLRegressionAlgo :param Dataset data: dataset to train the surrogate. If None, assumes that the surrogate is trained. Default: None. :param dict(Transformer) transformer: transformation strategy for data groups. If None, do not scale data. Default: MLRegressionAlgo.DEFAULT_TRANSFORMER, which is a min/max scaler applied to the inputs and a min/max scaler applied to the outputs. :param str disc_name: name of the surrogate discipline. If None, use surrogate.ABBR + data.name . Default: None :param dict default_inputs: default inputs. If None, use the center of the learning input space. Default: None. :param list(str) input_names: list of input names. If None, use all inputs. Default: None. :param list(str) output_names: list of output names. If None, use all outputs. Default: None. :param parameters: surrogate model parameters. """ if isinstance(surrogate, MLRegressionAlgo): self.regression_model = surrogate name = self.regression_model.learning_set.name elif data is None: raise ValueError("data is required to train the surrogate model.") else: factory = RegressionModelFactory() self.regression_model = factory.create( surrogate, data=data, transformer=transformer, input_names=input_names, output_names=output_names, **parameters ) name = self.regression_model.ABBR + "_" + data.name disc_name = disc_name or name if not self.regression_model.is_trained: self.regression_model.learn() msg = MultiLineString() msg.add("Build the surrogate discipline: {}", disc_name) msg.indent() msg.add("Dataset name: {}", data.name) msg.add("Dataset size: {}", data.length) msg.add("Surrogate model: {}", self.regression_model.__class__.__name__) LOGGER.info("%s", msg) if not name.startswith(self.regression_model.ABBR): disc_name = self.regression_model.ABBR + "_" + disc_name msg = MultiLineString() msg.add("Use the surrogate discipline: {}", disc_name) msg.indent() super(SurrogateDiscipline, self).__init__(disc_name) self._initialize_grammars(input_names, output_names) msg.add("Inputs: {}", pretty_repr(self.get_input_data_names())) msg.add("Outputs: {}", pretty_repr(self.get_output_data_names())) self._set_default_inputs(default_inputs) self.add_differentiated_inputs() self.add_differentiated_outputs() try: self.regression_model.predict_jacobian(self.default_inputs) self.linearization_mode = JacobianAssembly.AUTO_MODE msg.add("Jacobian: use surrogate model jacobian") except NotImplementedError: self.linearization_mode = self.FINITE_DIFFERENCES msg.add("Jacobian: use finite differences") LOGGER.info("%s", msg) def __repr__(self): model = self.regression_model.__class__.__name__ data_name = self.regression_model.learning_set.name length = len(self.regression_model.learning_set) inputs = sorted(self.regression_model.input_names) outputs = sorted(self.regression_model.output_names) arguments = [ "name={}".format(self.name), "algo={}".format(model), "data={}".format(data_name), "size={}".format(length), "inputs=[{}]".format(pretty_repr(inputs)), "outputs=[{}]".format(pretty_repr(outputs)), "jacobian={}".format(self.linearization_mode), ] msg = "SurrogateDiscipline({})".format(", ".join(arguments)) return msg def __str__(self): data_name = self.regression_model.learning_set.name length = len(self.regression_model.learning_set) msg = MultiLineString() msg.add("Surrogate discipline: {}", self.name) msg.indent() msg.add("Dataset name: {}", data_name) msg.add("Dataset size: {}", length) msg.add("Surrogate model: {}", self.regression_model.__class__.__name__) inputs = sorted(self.regression_model.input_names) outputs = sorted(self.regression_model.output_names) msg.add("Inputs: {}", pretty_repr(inputs)) msg.add("Outputs: {}", pretty_repr(outputs)) return str(msg) def _initialize_grammars(self, input_names=None, output_names=None): """Initializes the inputs and outputs grammars from data.""" learning_set = self.regression_model.learning_set in_grp = learning_set.INPUT_GROUP out_grp = learning_set.OUTPUT_GROUP if input_names is None: inputs = learning_set.get_data_by_group(in_grp)[0, :] input_names = learning_set.get_names(in_grp) else: inputs = learning_set.get_data_by_names(input_names, False)[0, :] if output_names is None: outputs = learning_set.get_data_by_group(out_grp)[0, :] output_names = learning_set.get_names(out_grp) else: outputs = learning_set.get_data_by_names(output_names, False)[0, :] inputs = DataConversion.array_to_dict(inputs, input_names, learning_set.sizes) outputs = DataConversion.array_to_dict( outputs, output_names, learning_set.sizes ) self.input_grammar.initialize_from_base_dict(inputs) self.output_grammar.initialize_from_base_dict(outputs) def _set_default_inputs(self, default_inputs=None): """Set default inputs either from the center of the learning input space or from user specification. :param dict default_inputs: user default inputs. If None, use the learning input space center. Default: None. """ if default_inputs is None: self._default_inputs = self.regression_model.input_space_center else: self._default_inputs = default_inputs def _run(self): input_data = self.get_input_data() output_data = self.regression_model.predict(input_data) output_data = {key: val.flatten() for key, val in output_data.items()} self.local_data.update(output_data) def _compute_jacobian(self, inputs=None, outputs=None): input_data = self.get_input_data() self._init_jacobian(inputs, outputs) self.jac = self.regression_model.predict_jacobian(input_data)