Source code for gemseo.algos.doe.lib_custom

# Copyright 2021 IRT Saint Exupéry, https://www.irt-saintexupery.com
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License version 3 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
# Contributors:
#    INITIAL AUTHORS - initial API and implementation and/or initial
#                           documentation
#        :author: Damien Guenot
#    OTHER AUTHORS   - MACROSCOPIC CHANGES
#        :author: Francois Gallard
"""Design of experiments from custom data."""

from __future__ import annotations

import logging
from collections.abc import Iterable
from collections.abc import Mapping
from collections.abc import Sequence
from pathlib import Path
from typing import TYPE_CHECKING
from typing import ClassVar
from typing import Final
from typing import Optional
from typing import TextIO
from typing import Union

from numpy import apply_along_axis
from numpy import ndarray
from numpy import vstack
from pandas import read_csv

from gemseo.algos.doe.doe_library import DOEAlgorithmDescription
from gemseo.algos.doe.doe_library import DOELibrary
from gemseo.typing import RealArray

if TYPE_CHECKING:
    from gemseo.algos.design_space import DesignSpace
    from gemseo.core.parallel_execution.callable_parallel_execution import CallbackType

OptionType = Optional[Union[str, int, float, bool, list[str], Path, TextIO, RealArray]]

LOGGER = logging.getLogger(__name__)


[docs] class CustomDOE(DOELibrary): """A design of experiments from samples provided as a file or an array. The samples are provided either as a file in text or csv format or as a sequence of sequences of numbers, e.g. a 2D numpy array. A csv file format is assumed to have a header whereas a text file (extension .txt) does not. """ COMMENTS_KEYWORD: Final[str] = "comments" """The name given to the string indicating a comment line.""" DELIMITER_KEYWORD: Final[str] = "delimiter" """The name given to the string separating two fields.""" DOE_FILE: Final[str] = "doe_file" """The name given to the DOE file.""" SAMPLES: Final[str] = "samples" """The name given to the samples.""" SKIPROWS_KEYWORD: Final[str] = "skiprows" """The name given to the number of skipped rows in the DOE file.""" LIBRARY_NAME: ClassVar[str] = "GEMSEO" _USE_UNIT_HYPERCUBE: ClassVar[bool] = False def __init__(self) -> None: # noqa:D107 super().__init__() name = self.__class__.__name__ self.algo_name = name desc = { "CustomDOE": ( "This samples are provided " "either as a file in text or csv format " "or as a sequence of sequences of numbers." ) } self.descriptions[name] = DOEAlgorithmDescription( algorithm_name=name, description=desc[name], internal_algorithm_name=name, library_name=name, ) def _get_options( self, doe_file: str | Path | None = None, samples: RealArray | dict[str, RealArray] | list[dict[str, RealArray]] | None = None, delimiter: str | None = ",", comments: str | Sequence[str] | None = "#", skiprows: int = 0, max_time: float = 0, eval_jac: bool = False, n_processes: int = 1, wait_time_between_samples: float = 0.0, callbacks: Iterable[CallbackType] = (), **kwargs: OptionType, ) -> dict[str, OptionType]: """Set the options. Args: doe_file: The path to the file containing the input samples. If ``None``, use ``samples``. samples: The input samples. They must be at least a 2D-array, a dictionary of 2D-arrays or a list of dictionaries of 1D-arrays. If ``None``, use ``doe_file``. delimiter: The character used to separate values. If ``None``, use whitespace. comments: The characters or list of characters used to indicate the start of a comment. ``None`` implies no comments. skiprows: The number of first lines to skip. eval_jac: Whether to evaluate the jacobian. n_processes: The maximum simultaneous number of processes used to parallelize the execution. wait_time_between_samples: The waiting time between two samples. max_time: The maximum runtime in seconds, disabled if 0. callbacks: The functions to be evaluated after each call to :meth:`.OptimizationProblem.evaluate_functions`; to be called as ``callback(index, (output, jacobian))``. **kwargs: The additional arguments. Returns: The processed options. """ return self._process_options( max_time=max_time, doe_file=str(doe_file) if doe_file is not None else None, samples=samples, delimiter=delimiter, comments=comments, skiprows=skiprows, eval_jac=eval_jac, n_processes=n_processes, wait_time_between_samples=wait_time_between_samples, callbacks=callbacks, **kwargs, ) # TODO: API: remove dimension
[docs] @staticmethod def read_file( doe_file: str | Path | TextIO, delimiter: str | None = ",", comments: str | Sequence[str] | None = "#", skiprows: int = 0, dimension: int = 0, ) -> RealArray: """Read a file containing several samples (one per line) and return them. Args: doe_file: Either the file, the filename, or the generator to read. delimiter: The character used to separate values. If ``None``, use whitespace. comments: The characters or list of characters used to indicate the start of a comment. ``None`` implies no comments. skiprows: Skip the first ``skiprows`` lines. dimension: The dimension of the variables space if known. Returns: The samples. """ try: samples = read_csv( doe_file, delimiter=delimiter, skiprows=skiprows, header=None, comment=comments, ).to_numpy() except Exception: LOGGER.exception("Failed to load the DOE file %s", doe_file) raise return samples
def _generate_samples( self, design_space: DesignSpace, **options: OptionType ) -> RealArray: """ Raises: ValueError: If no ``doe_file`` and no ``samples`` are given. If both ``doe_file`` and ``samples`` are given. If the dimension of ``samples`` is different from the one of the problem. """ # noqa: D205, D212, D415 error_message = ( "The algorithm CustomDOE requires " "either 'doe_file' or 'samples' as option." ) samples = options.get(self.SAMPLES) dimension = design_space.dimension if samples is None: doe_file = options.get(self.DOE_FILE) if doe_file is None: raise ValueError(error_message) samples = self.read_file( doe_file, comments=options[self.COMMENTS_KEYWORD], delimiter=options[self.DELIMITER_KEYWORD], skiprows=options[self.SKIPROWS_KEYWORD], dimension=design_space.dimension, ) elif options.get(self.DOE_FILE) is not None: raise ValueError(error_message) if isinstance(samples, Mapping): samples = design_space.dict_to_array(samples) elif not isinstance(samples, ndarray): samples = vstack([design_space.dict_to_array(sample) for sample in samples]) if samples.shape[1] != dimension: msg = ( f"Dimension mismatch between the variables space ({dimension}) " f"and the samples ({samples.shape[1]})." ) raise ValueError(msg) return apply_along_axis(design_space.transform_vect, axis=1, arr=samples)