Source code for gemseo_mlearning.algos.opt.core.surrogate_based
# Copyright 2021 IRT Saint Exupéry, https://www.irt-saintexupery.com
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License version 3 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""A class for surrogate-based optimization."""
from __future__ import annotations
import logging
from types import MappingProxyType
from typing import Mapping
from gemseo.algos.doe.doe_factory import DOEFactory
from gemseo.algos.doe.doe_library import DOELibrary
from gemseo.algos.doe.doe_library import DOELibraryOptionType
from gemseo.algos.doe.lib_openturns import OpenTURNS
from gemseo.algos.opt_problem import OptimizationProblem
from gemseo.mlearning.core.ml_algo import MLAlgoParameterType
from gemseo.mlearning.regression.factory import RegressionModelFactory
from gemseo.mlearning.regression.gpr import GaussianProcessRegressor
from gemseo.mlearning.regression.regression import MLRegressionAlgo
from gemseo_mlearning.adaptive.acquisition import MLDataAcquisition
from gemseo_mlearning.adaptive.criteria.optimum.criterion import ExpectedImprovement
from gemseo_mlearning.adaptive.distributions import get_regressor_distribution
from gemseo_mlearning.algos.opt import OptimizationLibraryOptionType
LOGGER = logging.getLogger(__name__)
[docs]class SurrogateBasedOptimizer:
"""An optimizer based on surrogate models."""
__STOP_BECAUSE_ALREADY_KNOWN = "The acquired input data is already known."
__STOP_BECAUSE_MAX_ACQUISITIONS = "All the data acquisitions have been made."
def __init__(
self,
problem: OptimizationProblem,
acquisition_algorithm: str,
doe_size: int = 0,
doe_algorithm: str = OpenTURNS.OT_LHSO,
doe_options: Mapping[str, DOELibraryOptionType] = MappingProxyType({}),
regression_algorithm: str = GaussianProcessRegressor.__name__,
regression_options: Mapping[str, MLAlgoParameterType] = MappingProxyType({}),
acquisition_options: Mapping[
str, OptimizationLibraryOptionType
] = MappingProxyType({}),
) -> None:
"""
Args:
acquisition_algorithm: The name of the algorithm to optimize the data
acquisition criterion.
N.B. this algorithm must handle integers if some of the optimization
variables are integers.
problem: The optimization problem.
doe_size: The size of the initial DOE.
Should be ``0`` if the DOE algorithm does not have a ``n_samples`` option.
doe_algorithm: The name of the algorithm for the initial sampling.
doe_options: The options of the algorithm for the initial sampling.
regression_algorithm: The name of the regression algorithm for the
objective function.
regression_options: The options of the regression algorithm for the
objective function.
acquisition_options: The options of the algorithm to optimize
the data acquisition criterion.
""" # noqa: D205, D212, D415
self.__acquisition = None
self.__distribution = None
self.__problem = problem
# Initialize the surrogate model of the objective function
# Store max_iter as it will be overwritten by DOELibrary
max_iter = self.__problem.max_iter
options = dict(doe_options)
if doe_size > 0 and DOELibrary.N_SAMPLES not in options:
options[DOELibrary.N_SAMPLES] = doe_size
DOEFactory().execute(self.__problem, doe_algorithm, **options)
self.__problem.max_iter = max_iter
self.__model = RegressionModelFactory().create(
regression_algorithm,
data=self.__problem.to_dataset(opt_naming=False),
transformer=MLRegressionAlgo.DEFAULT_TRANSFORMER,
**regression_options,
)
self.__distribution = get_regressor_distribution(self.__model)
self.__acquisition = MLDataAcquisition(
ExpectedImprovement.__name__,
self.__problem.design_space,
self.__distribution,
)
self.__acquisition.set_acquisition_algorithm(
acquisition_algorithm, **acquisition_options
)
[docs] def execute(self, number_of_acquisitions: int) -> str:
"""Execute the surrogate-based optimization.
Args:
number_of_acquisitions: The number of learning points to be acquired.
Returns:
The termination message.
"""
self.__distribution.learn()
message = self.__STOP_BECAUSE_MAX_ACQUISITIONS
for _ in range(number_of_acquisitions):
input_data = self.__acquisition.compute_next_input_data()
if input_data in self.__problem.database:
message = self.__STOP_BECAUSE_ALREADY_KNOWN
break
self.__problem.evaluate_functions(input_data, normalize=False)
self.__distribution.change_learning_set(
self.__problem.to_dataset(opt_naming=False)
)
self.__acquisition.update_problem()
return message