Source code for gemseo.algos.doe.lib_custom

# Copyright 2021 IRT Saint Exupéry, https://www.irt-saintexupery.com
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License version 3 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
# Contributors:
#    INITIAL AUTHORS - initial API and implementation and/or initial
#                           documentation
#        :author: Damien Guenot
#    OTHER AUTHORS   - MACROSCOPIC CHANGES
#        :author: Francois Gallard
"""Design of experiments from custom data."""

from __future__ import annotations

import logging
from collections.abc import Iterable
from collections.abc import Mapping
from collections.abc import Sequence
from pathlib import Path
from typing import TYPE_CHECKING
from typing import ClassVar
from typing import Final
from typing import Optional
from typing import TextIO
from typing import Union

from numpy import apply_along_axis
from numpy import ndarray
from numpy import vstack
from pandas import read_csv

from gemseo.algos.doe.doe_library import DOEAlgorithmDescription
from gemseo.algos.doe.doe_library import DOELibrary
from gemseo.typing import RealArray

if TYPE_CHECKING:
    from gemseo.algos.design_space import DesignSpace
    from gemseo.core.parallel_execution.callable_parallel_execution import CallbackType

OptionType = Optional[Union[str, int, float, bool, list[str], Path, TextIO, RealArray]]

LOGGER = logging.getLogger(__name__)



[docs]
class CustomDOE(DOELibrary):
    """A design of experiments from samples provided as a file or an array.

    The samples are provided either as a file in text or csv format or as a sequence of
    sequences of numbers, e.g. a 2D numpy array.

    A csv file format is assumed to have a header whereas a text file (extension .txt)
    does not.
    """

    COMMENTS_KEYWORD: Final[str] = "comments"
    """The name given to the string indicating a comment line."""

    DELIMITER_KEYWORD: Final[str] = "delimiter"
    """The name given to the string separating two fields."""

    DOE_FILE: Final[str] = "doe_file"
    """The name given to the DOE file."""

    SAMPLES: Final[str] = "samples"
    """The name given to the samples."""

    SKIPROWS_KEYWORD: Final[str] = "skiprows"
    """The name given to the number of skipped rows in the DOE file."""

    LIBRARY_NAME: ClassVar[str] = "GEMSEO"

    _USE_UNIT_HYPERCUBE: ClassVar[bool] = False

    def __init__(self) -> None:  # noqa:D107
        super().__init__()
        name = self.__class__.__name__
        self.algo_name = name

        desc = {
            "CustomDOE": (
                "This samples are provided "
                "either as a file in text or csv format "
                "or as a sequence of sequences of numbers."
            )
        }
        self.descriptions[name] = DOEAlgorithmDescription(
            algorithm_name=name,
            description=desc[name],
            internal_algorithm_name=name,
            library_name=name,
        )

    def _get_options(
        self,
        doe_file: str | Path | None = None,
        samples: RealArray
        | dict[str, RealArray]
        | list[dict[str, RealArray]]
        | None = None,
        delimiter: str | None = ",",
        comments: str | Sequence[str] | None = "#",
        skiprows: int = 0,
        max_time: float = 0,
        eval_jac: bool = False,
        n_processes: int = 1,
        wait_time_between_samples: float = 0.0,
        callbacks: Iterable[CallbackType] = (),
        **kwargs: OptionType,
    ) -> dict[str, OptionType]:
        """Set the options.

        Args:
            doe_file: The path to the file containing the input samples.
                If ``None``, use ``samples``.
            samples: The input samples.
                They must be at least a 2D-array,
                a dictionary of 2D-arrays
                or a list of dictionaries of 1D-arrays.
                If ``None``, use ``doe_file``.
            delimiter: The character used to separate values.
                If ``None``, use whitespace.
            comments:  The characters or list of characters
                used to indicate the start of a comment.
                ``None`` implies no comments.
            skiprows: The number of first lines to skip.
            eval_jac: Whether to evaluate the jacobian.
            n_processes: The maximum simultaneous number of processes
                used to parallelize the execution.
            wait_time_between_samples: The waiting time between two samples.
            max_time: The maximum runtime in seconds,
                disabled if 0.
            callbacks: The functions to be evaluated
                after each call to :meth:`.OptimizationProblem.evaluate_functions`;
                to be called as ``callback(index, (output, jacobian))``.
            **kwargs: The additional arguments.

        Returns:
            The processed options.
        """
        return self._process_options(
            max_time=max_time,
            doe_file=str(doe_file) if doe_file is not None else None,
            samples=samples,
            delimiter=delimiter,
            comments=comments,
            skiprows=skiprows,
            eval_jac=eval_jac,
            n_processes=n_processes,
            wait_time_between_samples=wait_time_between_samples,
            callbacks=callbacks,
            **kwargs,
        )

    # TODO: API: remove dimension

[docs]
    @staticmethod
    def read_file(
        doe_file: str | Path | TextIO,
        delimiter: str | None = ",",
        comments: str | Sequence[str] | None = "#",
        skiprows: int = 0,
        dimension: int = 0,
    ) -> RealArray:
        """Read a file containing several samples (one per line) and return them.

        Args:
            doe_file: Either the file, the filename, or the generator to read.
            delimiter: The character used to separate values.
                If ``None``, use whitespace.
            comments:  The characters or list of characters
                used to indicate the start of a comment.
                ``None`` implies no comments.
            skiprows: Skip the first ``skiprows`` lines.
            dimension: The dimension of the variables space if known.

        Returns:
            The samples.
        """
        try:
            samples = read_csv(
                doe_file,
                delimiter=delimiter,
                skiprows=skiprows,
                header=None,
                comment=comments,
            ).to_numpy()
        except Exception:
            LOGGER.exception("Failed to load the DOE file %s", doe_file)
            raise

        return samples


    def _generate_samples(
        self, design_space: DesignSpace, **options: OptionType
    ) -> RealArray:
        """
        Raises:
            ValueError: If no ``doe_file`` and no ``samples`` are given.
                If both ``doe_file`` and ``samples`` are given.
                If the dimension of ``samples`` is different from the
                one of the problem.
        """  # noqa: D205, D212, D415
        error_message = (
            "The algorithm CustomDOE requires "
            "either 'doe_file' or 'samples' as option."
        )
        samples = options.get(self.SAMPLES)
        dimension = design_space.dimension
        if samples is None:
            doe_file = options.get(self.DOE_FILE)
            if doe_file is None:
                raise ValueError(error_message)
            samples = self.read_file(
                doe_file,
                comments=options[self.COMMENTS_KEYWORD],
                delimiter=options[self.DELIMITER_KEYWORD],
                skiprows=options[self.SKIPROWS_KEYWORD],
                dimension=design_space.dimension,
            )
        elif options.get(self.DOE_FILE) is not None:
            raise ValueError(error_message)

        if isinstance(samples, Mapping):
            samples = design_space.dict_to_array(samples)
        elif not isinstance(samples, ndarray):
            samples = vstack([design_space.dict_to_array(sample) for sample in samples])

        if samples.shape[1] != dimension:
            msg = (
                f"Dimension mismatch between the variables space ({dimension}) "
                f"and the samples ({samples.shape[1]})."
            )
            raise ValueError(msg)

        return apply_along_axis(design_space.transform_vect, axis=1, arr=samples)