Source code for gemseo_mlearning.adaptive.acquisition
# Copyright 2021 IRT Saint Exupéry, https://www.irt-saintexupery.com
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License version 3 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
# Contributors:
# INITIAL AUTHORS - API and implementation and/or documentation
# :author: Matthias De Lozzo
# OTHER AUTHORS - MACROSCOPIC CHANGES
"""Acquisition of learning data from a machine learning algorithm and a criterion."""
from __future__ import annotations
import logging
from typing import Any
from typing import ClassVar
from gemseo.algos.database import Database
from gemseo.algos.design_space import DesignSpace
from gemseo.algos.doe.doe_factory import DOEFactory
from gemseo.algos.opt.opt_factory import OptimizersFactory
from gemseo.algos.opt_problem import OptimizationProblem
from gemseo.core.discipline import MDODiscipline
from gemseo.mlearning.core.ml_algo import DataType
from numpy import array
from gemseo_mlearning.adaptive.criterion import MLDataAcquisitionCriterionFactory
from gemseo_mlearning.adaptive.distribution import MLRegressorDistribution
LOGGER = logging.getLogger(__name__)
_CRITERION_FACTORY = MLDataAcquisitionCriterionFactory()
[docs]class MLDataAcquisition:
"""Data acquisition for adaptive learning."""
default_algo_name: ClassVar[str] = "NLOPT_COBYLA"
"""The name of the default algorithm to find the point(s).
Typically a DoE or an optimizer.
"""
default_opt_options: ClassVar[dict[str, Any]] = {"max_iter": 100}
"""The names and values of the default optimization options."""
default_doe_options: ClassVar[dict[str, Any]] = {"n_samples": 100}
"""The names and values of the default DoE options."""
def __init__(
self,
criterion: str,
input_space: DesignSpace,
distribution: MLRegressorDistribution,
**options: Any,
) -> None:
"""# noqa: D205 D212 D415
Args:
criterion: The name of a data acquisition criterion
selecting new point(s) to reach a particular goal
(name of a class inheriting from :class:`.MLDataAcquisitionCriterion`).
input_space: The input space on which to look for the new learning point.
distribution: The distribution of the machine learning algorithm.
**options: The options of the acquisition criterion.
Raises:
NotImplementedError: When the output dimension is greater than 1.
"""
if distribution.output_dimension > 1:
raise NotImplementedError(
"MLDataAcquisition works only with scalar output."
)
self.__algo_name = self.default_algo_name
self.__algo_options = self.default_opt_options
self.__algo = OptimizersFactory().create(self.__algo_name)
self.__criterion = criterion
self.__input_space = input_space
self.__criterion_options = options.copy()
self.__distribution = distribution
self.__database = Database()
self.__problem = self.__build_optimization_problem()
def __build_optimization_problem(self) -> OptimizationProblem:
"""Create the optimization problem.
The data acquisition criterion is the objective (either a cost or a performance)
while the input space is the design space.
Approximate the Jacobian with finite differences if missing.
Returns:
The optimization problem.
"""
problem = OptimizationProblem(self.__input_space)
problem.objective = _CRITERION_FACTORY.create(
self.__criterion, self.__distribution, **self.__criterion_options
)
problem.objective.name = self.__criterion
if not problem.objective.has_jac():
problem.differentiation_method = OptimizationProblem.FINITE_DIFFERENCES
if problem.objective.MAXIMIZE:
problem.change_objective_sign()
return problem
[docs] def set_acquisition_algorithm(self, algo_name: str, **options: Any) -> None:
"""Set sampling or optimization algorithm.
Args:
algo_name: The name of the algorithm to find the learning point(s).
Typically a DoE or an optimizer.
**options: The values of some algorithm options;
use the default values for the other ones.
"""
self.__algo_name = algo_name
factory = DOEFactory()
if factory.is_available(algo_name):
self.__algo_options = self.default_doe_options.copy()
else:
factory = OptimizersFactory()
self.__algo_options = self.default_opt_options.copy()
self.__algo_options.update(options)
self.__algo = factory.create(algo_name)
[docs] def update_algo(
self, discipline: MDODiscipline, n_samples: int = 1
) -> tuple[Database, OptimizationProblem]:
"""Update the machine learning algorithm by learning new samples.
This method acquires new learning input-output samples
and trains the machine learning algorithm
with the resulting enriched learning set.
Args:
discipline: The discipline computing the reference output data
from the input data provided by the acquisition process.
n_samples: The number of samples to update the machine learning algorithm.
Returns:
The concatenation of the optimization histories
related to the different points
and the last optimization problem.
"""
for index in range(n_samples):
root_logger = logging.getLogger()
saved_level = root_logger.level
root_logger.setLevel(logging.WARNING)
LOGGER.setLevel(logging.WARNING)
input_data = self.compute_next_input_data(as_dict=True)
for inputs, outputs in self.__problem.database.items():
self.__database[array([index + 1] + inputs.unwrap().tolist())] = outputs
discipline.execute(input_data)
learning_cache = self.__distribution.algo.learning_set.export_to_cache()
learning_cache[input_data] = (
{k: discipline.local_data[k] for k in self.__distribution.output_names},
None,
)
self.__distribution.change_learning_set(learning_cache.export_to_dataset())
self.__problem = self.__build_optimization_problem()
LOGGER.setLevel(saved_level)
LOGGER.info("Add sample %s out of %s", index + 1, n_samples)
return self.__database, self.__problem