Source code for gemseo_benchmark.benchmarker.benchmarker

# Copyright 2021 IRT Saint Exupéry, https://www.irt-saintexupery.com
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License version 3 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
# Contributors:
#    INITIAL AUTHORS - initial API and implementation and/or initial
#                           documentation
#        :author: Benoit Pauwels
#    OTHER AUTHORS   - MACROSCOPIC CHANGES
"""A benchmarker of optimization algorithms on reference problems."""
from __future__ import annotations

import sys
from pathlib import Path
from typing import Iterable

from gemseo import configure_logger
from gemseo.algos.database import Database
from gemseo.algos.opt.opt_factory import OptimizersFactory
from gemseo.core.parallel_execution.callable_parallel_execution import (
    CallableParallelExecution,
)
from gemseo.utils.string_tools import pretty_str

from gemseo_benchmark import join_substrings
from gemseo_benchmark.algorithms.algorithm_configuration import AlgorithmConfiguration
from gemseo_benchmark.algorithms.algorithms_configurations import (
    AlgorithmsConfigurations,
)
from gemseo_benchmark.benchmarker.worker import Worker
from gemseo_benchmark.benchmarker.worker import WorkerOutputs
from gemseo_benchmark.problems.problem import Problem
from gemseo_benchmark.results.performance_history import PerformanceHistory
from gemseo_benchmark.results.results import Results

LOGGER = configure_logger()


[docs]class Benchmarker:
    """A benchmarker of optimization algorithms on reference problems."""

    _HISTORY_CLASS = PerformanceHistory

    def __init__(
        self,
        histories_path: Path,
        results_path: Path | None = None,
        databases_path: Path | None = None,
        pseven_logs_path: Path | None = None,
    ) -> None:
        """
        Args:
            histories_path: The path to the directory where to save the performance
                histories.
            results_path: The path to the file for saving the performance histories
                paths.
                If exists, the file is updated with the new performance histories paths.
            databases_path: The path to the destination directory for the databases.
                If ``None``, the databases will not be saved.
            pseven_logs_path: The path to the destination directory for the pSeven
                log files.
                If ``None``, the pSeven log files will not be saved.
        """  # noqa: D205, D212, D415
        self._databases_path = databases_path
        self.__histories_path = histories_path
        self.__optimizers_factory = OptimizersFactory()
        self.__is_algorithm_available = self.__optimizers_factory.is_available
        self.__pseven_logs_path = pseven_logs_path
        self.__results_path = results_path
        if results_path is not None and results_path.is_file():
            self._results = Results(results_path)
        else:
            self._results = Results()

[docs]    def execute(
        self,
        problems: Iterable[Problem],
        algorithms: AlgorithmsConfigurations,
        overwrite_histories: bool = False,
        number_of_processes: int = 1,
        use_threading: bool = False,
    ) -> Results:
        """Run optimization algorithms on reference problems.

        Args:
            problems: The benchmarking problems.
            algorithms: The algorithms configurations.
            overwrite_histories: Whether to overwrite the existing performance
                histories.
            number_of_processes: The maximum simultaneous number of threads or
                processes used to parallelize the execution.
            use_threading: Whether to use threads instead of processes
                to parallelize the execution.

        Returns:
            The results of the optimization.

        Raises:
            ValueError: If the algorithm is not available.
        """
        # Prepare the inputs of the benchmarking workers
        inputs = list()
        for algorithm_configuration in algorithms:
            algorithm_name = algorithm_configuration.algorithm_name
            if not self.__is_algorithm_available(algorithm_name):
                raise ValueError(f"The algorithm is not available: {algorithm_name}.")

            algorithm_configuration = self.__disable_stopping_criteria(
                algorithm_configuration
            )
            inputs.extend(
                [
                    (
                        (
                            self.__set_pseven_log_file(
                                algorithm_configuration, problem, problem_instance_index
                            ),
                            problem,
                            problem_instance,
                            problem_instance_index,
                        )
                    )
                    for problem in problems
                    for problem_instance_index, problem_instance in enumerate(problem)
                    if not self.__skip_instance(
                        algorithm_configuration,
                        problem,
                        problem_instance_index,
                        overwrite_histories,
                    )
                ]
            )

        if inputs:
            worker = Worker(self.__optimizers_factory, self._HISTORY_CLASS)
            if number_of_processes == 1:
                for worker_inputs in inputs:
                    self.__worker_callback(0, worker(worker_inputs))
            else:
                CallableParallelExecution(
                    [worker],
                    number_of_processes,
                    use_threading,
                ).execute(inputs, self.__worker_callback)

        return self._results

    @staticmethod
    def __disable_stopping_criteria(
        algorithm_configuration: AlgorithmConfiguration,
    ) -> AlgorithmConfiguration:
        """Disable the stopping criteria.

        Args:
            algorithm_configuration: The algorithm configuration.

        Returns:
            A copy of the algorithm configuration with disabled stopping criteria.
        """
        options = {
            "xtol_rel": 0.0,
            "xtol_abs": 0.0,
            "ftol_rel": 0.0,
            "ftol_abs": 0.0,
            "stop_crit_n_x": sys.maxsize,
        }
        options.update(algorithm_configuration.algorithm_options)
        return AlgorithmConfiguration(
            algorithm_configuration.algorithm_name,
            algorithm_configuration.name,
            **options,
        )

    def __skip_instance(
        self,
        algorithm_configuration: AlgorithmConfiguration,
        bench_problem: Problem,
        index: int,
        overwrite_histories: bool,
    ) -> bool:
        """Check whether a problem instance has already been solved.

        Args:
            algorithm_configuration: The algorithm configuration.
            bench_problem: The benchmarking problem.
            index: The index of the instance.
            overwrite_histories: Whether to overwrite existing histories.

        Returns:
            Whether to solve the problem instance.
        """
        instance = index + 1
        problem_name = bench_problem.name

        if not overwrite_histories and self._results.contains(
            algorithm_configuration.name,
            problem_name,
            self.__get_history_path(algorithm_configuration, problem_name, index),
        ):
            LOGGER.info(
                "Skipping instance %s of problem %s for algorithm configuration %s.",
                instance,
                problem_name,
                algorithm_configuration.name,
            )
            return True

        LOGGER.info(
            "Solving instance %s of problem %s with algorithm configuration %s.",
            instance,
            problem_name,
            algorithm_configuration.name,
        )
        return False

    def __set_pseven_log_file(
        self,
        algorithm_configuration: AlgorithmConfiguration,
        problem: Problem,
        index: int,
    ) -> AlgorithmConfiguration:
        """Copy an algorithm configuration by adding the path to the pSeven log file.

        Args:
            algorithm_configuration: The algorithm configuration.
            problem: The benchmarking problem.
            index: The index of the problem instance.

        Returns:
            A copy of the configuration including the path to the pSeven log file.
        """
        if not self.__pseven_logs_path or not self.__is_algorithm_available("PSEVEN"):
            return algorithm_configuration

        from gemseo.algos.opt.lib_pseven import PSevenOpt

        if algorithm_configuration.algorithm_name not in PSevenOpt().descriptions:
            return algorithm_configuration

        return AlgorithmConfiguration(
            algorithm_configuration.algorithm_name,
            algorithm_configuration.name,
            **algorithm_configuration.algorithm_options,
            log_path=pretty_str(
                self.__get_pseven_log_path(algorithm_configuration, problem.name, index)
            ),
        )

    def __worker_callback(self, _: int, outputs: WorkerOutputs) -> None:
        """Save the history and database of a benchmarking worker.

        Args:
            _: The index of the worker.
            outputs: The outputs of the worker.
        """
        problem, problem_instance_index, database, history = outputs
        self._save_history(history, problem_instance_index)
        if self._databases_path is not None:
            self.__save_database(
                database,
                history.algorithm_configuration,
                problem.name,
                problem_instance_index,
            )

        if self.__results_path:
            self._results.to_file(self.__results_path, indent=4)

    def _save_history(self, history: PerformanceHistory, index: int) -> None:
        """Save a performance history into a history file.

        Args:
            history: The performance history.
            index: The index of the problem instance.
        """
        problem_name = history.problem_name
        algorithm_configuration = history.algorithm_configuration
        path = self.__get_history_path(
            algorithm_configuration, problem_name, index, make_parents=True
        )
        history.to_file(path)
        self._results.add_path(algorithm_configuration.name, problem_name, path)

    def __get_history_path(
        self,
        algorithm_configuration: AlgorithmConfiguration,
        problem_name: str,
        index: int,
        make_parents: bool = False,
    ) -> Path:
        """Return a path for a history file.

        Args:
            algorithm_configuration: The algorithm configuration.
            problem_name: The name of the problem.
            index: The index of the problem instance.
            make_parents: Whether to make the parent directories.

        Returns:
            The path for the history file.
        """
        return self._get_path(
            self.__histories_path,
            algorithm_configuration,
            problem_name,
            index,
            "json",
            make_parents=make_parents,
        )

    def __get_pseven_log_path(
        self,
        algorithm_configuration: AlgorithmConfiguration,
        problem_name: str,
        index: int,
    ) -> Path:
        """Return a path for a pSeven log file.

        Args:
            algorithm_configuration: The algorithm configuration.
            problem_name: The name of the problem.
            index: The index of the problem instance.

        Returns:
            The path for the pSeven log file.

        Raises:
            ValueError: If the path to the destination directory for the
                pSeven files is not set.
        """
        if not self.__pseven_logs_path:
            raise ValueError("The directory for the pSeven files is not set.")

        return self._get_path(
            self.__pseven_logs_path,
            algorithm_configuration,
            problem_name,
            index,
            "txt",
            make_parents=True,
        )

    @staticmethod
    def _get_path(
        root_dir: Path,
        algorithm_configuration: AlgorithmConfiguration,
        problem_name: str,
        index: int,
        extension: str = "json",
        make_parents: bool = False,
    ) -> Path:
        """Return a path in the file tree dedicated to a specific optimization run.

        Args:
            root_dir: The path to the root directory.
            algorithm_configuration: The algorithm configuration.
            problem_name: The name of the problem.
            index: The index of the problem instance.
            extension: The extension of the path.
                If ``None``, the extension is for a JSON file.
            make_parents: Whether to make the parent directories of the path.

        Returns:
            The path for the file.
        """
        configuration_name = join_substrings(algorithm_configuration.name)
        path = (
            root_dir.resolve()
            / configuration_name
            / join_substrings(problem_name)
            / f"{configuration_name}.{index + 1}.{extension}"
        )
        if make_parents:
            path.parent.mkdir(parents=True, exist_ok=True)

        return path

    def __save_database(
        self,
        database: Database,
        algorithm_configuration: AlgorithmConfiguration,
        problem_name: str,
        index: int,
    ) -> None:
        """Save the database of a problem.

        Args:
            database: The database.
            algorithm_configuration: The algorithm configuration.
            problem_name: The name of the problem.
            index: The index of the problem instance.
        """
        database.to_hdf(
            self._get_path(
                self._databases_path,
                algorithm_configuration,
                problem_name,
                index,
                "h5",
                make_parents=True,
            )
        )