Source code for gemseo.algos.parameter_space

# -*- coding: utf-8 -*-
# Copyright 2021 IRT Saint Exupéry, https://www.irt-saintexupery.com
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License version 3 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

# Contributors:
#    INITIAL AUTHORS - initial API and implementation and/or initial
#                           documentation
#        :author: Matthias De Lozzo
#    OTHER AUTHORS   - MACROSCOPIC CHANGES

"""Variable space defining both deterministic and uncertain variables.

Overview
--------

The :class:`.ParameterSpace` class describes a set of parameters of interest
which can be either deterministic or uncertain.
This class inherits from :class:`.DesignSpace`.

Capabilities
------------

The :meth:`.DesignSpace.add_variable` aims to add deterministic variables from:

- a variable name,
- an optional variable size (default: 1),
- an optional variable type (default: float),
- an optional lower bound (default: - infinity),
- an optional upper bound (default: + infinity),
- an optional current value (default: None).

The :meth:`.add_random_variable` aims to add uncertain
variables (a.k.a. random variables) from:

- a variable name,
- a distribution name
  (see :meth:`~gemseo.uncertainty.api.get_available_distributions`),
- an optional variable size,
- optional distribution parameters (:code:`parameters` set as
  a tuple of positional arguments for :class:`.OTDistribution`
  or a dictionary of keyword arguments for :class:`.SPDistribution`,
  or keyword arguments for standard probability distribution such
  as :class:`.OTNormalDistribution` and :class:`.SPNormalDistribution`).

The :class:`.ParameterSpace` also provides the following methods:

- :meth:`.compute_samples`: returns several samples
  of the uncertain variables,
- :meth:`.evaluate_cdf`: evaluate the cumulative density function
  for the different variables and their different
- :meth:`.get_range` returns the numerical range
  of the different uncertain parameters,
- :meth:`.get_support`: returns the mathematical support
  of the different uncertain variables,
- :meth:`.is_uncertain`: checks if a parameter is uncertain,
- :meth:`.is_deterministic`: checks if a parameter is deterministic.
"""
from __future__ import division, unicode_literals

import collections
import logging
import sys
from copy import deepcopy
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Mapping, Optional, Union

if TYPE_CHECKING:
    from gemseo.core.dataset import Dataset

from numpy import array, ndarray

from gemseo.algos.design_space import DesignSpace, DesignVariable
from gemseo.uncertainty.distributions.composed import ComposedDistribution
from gemseo.uncertainty.distributions.factory import (
    DistributionFactory,
    DistributionParametersType,
)
from gemseo.utils.data_conversion import DataConversion
from gemseo.utils.py23_compat import Path

if sys.version_info < (3, 7, 0):
    RandomVariable = collections.namedtuple(
        "RandomVariable", ["distribution", "size", "parameters"]
    )
    RandomVariable.__new__.__defaults__ = (1, {})
else:
    RandomVariable = collections.namedtuple(
        "RandomVariable",
        ["distribution", "size", "parameters"],
        defaults=(1, {}),
    )

LOGGER = logging.getLogger(__name__)


[docs]class ParameterSpace(DesignSpace): """Parameter space. Attributes: uncertain_variables (List(str)): The names of the uncertain variables. distributions (Dict(str,Distribution)): The marginal probability distributions of the uncertain variables. distribution (ComposedDistribution): The joint probability distribution of the uncertain variables. """ _INITIAL_DISTRIBUTION = "Initial distribution" _TRANSFORMATION = "Transformation" _SUPPORT = "Support" _MEAN = "Mean" _STANDARD_DEVIATION = "Standard deviation" _RANGE = "Range" _BLANK = "" _PARAMETER_SPACE = "Parameter space" def __init__( self, hdf_file=None, # type: Optional[Union[str,Path]] copula=ComposedDistribution._INDEPENDENT_COPULA, # type: str name=None, # type: Optional[str] ): # type: (...) -> None """ Args: copula: A name of copula defining the dependency between random variables. """ LOGGER.debug("*** Create a new parameter space ***") super(ParameterSpace, self).__init__(hdf_file=hdf_file, name=name) self.uncertain_variables = [] self.distributions = {} self.distribution = None if copula not in ComposedDistribution.AVAILABLE_COPULA_MODELS: raise ValueError("{} is not a copula name.".format(copula)) self._copula = copula self.__distributions_definitions = {} # To be defined as: # self.__distributions_definitions["u"] = ("SPNormalDistribution", {"mu": 1.}) # where the first component of the tuple is a distribution name # and the second one a mapping of the distribution parameter.
[docs] def is_uncertain( self, variable, # type: str ): # type: (...) -> bool """Check if a variable is uncertain. Args: variable: The name of the variable. Returns: True is the variable is uncertain. """ return variable in self.uncertain_variables
[docs] def is_deterministic( self, variable, # type: str ): # type: (...) -> bool """Check if a variable is deterministic. Args: variable: The name of the variable. Returns: True is the variable is deterministic. """ deterministic = set(self.variables_names) - set(self.uncertain_variables) return variable in deterministic
def __update_parameter_space( self, variable, # type: str ): # type: (...) -> None """Update the parameter space with a random variable. Args: variable: The name of the random variable. """ if variable not in self.variables_names: l_b = self.distributions[variable].math_lower_bound u_b = self.distributions[variable].math_upper_bound value = self.distributions[variable].mean size = self.distributions[variable].dimension self.add_variable(variable, size, "float", l_b, u_b, value) else: l_b = self.distributions[variable].math_lower_bound u_b = self.distributions[variable].math_upper_bound value = self.distributions[variable].mean self.set_lower_bound(variable, l_b) self.set_upper_bound(variable, u_b) self.set_current_variable(variable, value)
[docs] def add_random_variable( self, name, # type: str distribution, # type: str size=1, # type: int **parameters # type: DistributionParametersType ): # type: (...) -> None """Add a random variable from a probability distribution. Args: name: The name of the random variable. distribution: The name of a class implementing a probability distribution, e.g. 'OTUniformDistribution' or 'SPDistribution'. size: The dimension of the random variable. **parameters: The parameters of the distribution. """ self.__distributions_definitions[name] = (distribution, parameters) factory = DistributionFactory() distribution = factory.create( distribution, variable=name, dimension=size, **parameters ) LOGGER.debug("Add the random variable: %s.", name) self.distributions[name] = distribution self.uncertain_variables.append(name) self._build_composed_distribution() self.__update_parameter_space(name)
def _build_composed_distribution(self): # type: (...) -> None """Build the composed distribution from the marginal ones.""" tmp_marginal = self.distributions[self.uncertain_variables[0]] marginals = [self.distributions[name] for name in self.uncertain_variables] self.distribution = tmp_marginal._COMPOSED_DISTRIBUTION(marginals, self._copula)
[docs] def get_range( self, variable, # type: str ): # type: (...) -> List[ndarray] """Return the numerical range of a random variable. Args: variable: The name of the random variable. Returns: The range of the components of the random variable. """ return self.distributions[variable].range
[docs] def get_support( self, variable, # type: str ): # type: (...) -> List[ndarray] """Return the mathematical support of a random variable. Args: variable: The name of the random variable. Returns: The support of the components of the random variable. """ return self.distributions[variable].support
[docs] def remove_variable( self, name, # type: str ): # type: (...) -> None """Remove a variable from the probability space. Args: name: The name of the variable. """ if name in self.uncertain_variables: del self.distributions[name] self.uncertain_variables.remove(name) if self.uncertain_variables: self._build_composed_distribution() super(ParameterSpace, self).remove_variable(name)
[docs] def compute_samples( self, n_samples=1, # type: int as_dict=False, # type: bool ): # type: (...) -> Union[Dict[str,ndarray],ndarray] """Sample the random variables and return the realizations. Args: n_samples: A number of samples. as_dict: The type of the returned object. If True, return a dictionary. Otherwise, return an array. Returns: The realizations of the random variables, either stored in an array or in a dictionary whose values are the names of the random variables and the values are the evaluations. """ sample = self.distribution.compute_samples(n_samples) if as_dict: sample = [ DataConversion.array_to_dict( data_array, self.uncertain_variables, self.variables_sizes ) for data_array in sample ] return sample
[docs] def evaluate_cdf( self, value, # type: Dict[str,ndarray] inverse=False, # type:bool ): # type: (...) -> Dict[str,ndarray] """Evaluate the cumulative density function (or its inverse) of each marginal. Args: value: The values of the uncertain variables passed as a dictionary whose keys are the names of the variables. inverse: The type of function to evaluate. If True, compute the cumulative density function. Otherwise, compute the inverse cumulative density function. Returns: A dictionary where the keys are the names of the random variables and the values are the evaluations. """ if inverse: self.__check_dict_of_array(value) values = {} for name in self.uncertain_variables: val = value[name] distribution = self.distributions[name] if inverse: current_v = distribution.compute_inverse_cdf(val) else: current_v = distribution.compute_cdf(val) values[name] = array(current_v) return values
def __check_dict_of_array( self, obj, # type: Any ): # type: (...) -> None """Check if the object is a dictionary whose values are numpy arrays. Args: obj: The object to test. """ error_msg = ( "obj must be a dictionary whose keys are the variables " "names and values are arrays " "whose dimensions are the variables ones and components are in [0, 1]." ) if not isinstance(obj, dict): raise TypeError(error_msg) for variable, value in obj.items(): if variable not in self.uncertain_variables: LOGGER.debug( "%s is not defined in the probability space; " "available variables are [%s]; " "use uniform distribution for %s.", variable, ", ".join(self.uncertain_variables), variable, ) else: if not isinstance(value, ndarray): raise TypeError(error_msg) if len(value.flatten()) != self.variables_sizes[variable]: raise ValueError(error_msg) if any(value.flatten() > 1.0) or any(value.flatten() < 0.0): raise ValueError(error_msg) def __str__(self): # type: (...) -> str table = super(ParameterSpace, self).get_pretty_table() distribution = [] for variable in self.variables_names: if variable in self.uncertain_variables: dist = self.distributions[variable] for _ in range(dist.dimension): distribution.append(str(dist)) else: for _ in range(self.variables_sizes[variable]): distribution.append(self._BLANK) table.add_column(self._INITIAL_DISTRIBUTION, distribution) table.title = self._PARAMETER_SPACE desc = str(table) return desc
[docs] def get_tabular_view( self, decimals=2, # type: int ): # type: (...) -> str """Return a tabular view of the parameter space. This view contains statistical information. Args: decimals: The number of decimals to print. Returns: The tabular view. """ table = super(ParameterSpace, self).get_pretty_table() distribution = [] transformation = [] support = [] mean = [] std = [] rnge = [] for variable in self.variables_names: if variable in self.uncertain_variables: dist = self.distributions[variable] tmp_mean = dist.mean tmp_std = dist.standard_deviation tmp_range = dist.range tmp_support = dist.support for dim in range(dist.dimension): distribution.append(str(dist)) transformation.append(dist.transformation) mean.append(tmp_mean[dim]) mean[-1] = round(mean[-1], decimals) std.append(tmp_std[dim]) std[-1] = round(std[-1], decimals) rnge.append(tmp_range[dim]) support.append(tmp_support[dim]) else: for _ in range(self.variables_sizes[variable]): distribution.append(self._BLANK) transformation.append(self._BLANK) mean.append(self._BLANK) std.append(self._BLANK) support.append(self._BLANK) rnge.append(self._BLANK) table.add_column(self._INITIAL_DISTRIBUTION, distribution) table.add_column(self._TRANSFORMATION, transformation) table.add_column(self._SUPPORT, support) table.add_column(self._MEAN, mean) table.add_column(self._STANDARD_DEVIATION, std) table.add_column(self._RANGE, rnge) table.title = self._PARAMETER_SPACE desc = str(table) return desc
[docs] def unnormalize_vect( self, x_vect, # type:ndarray minus_lb=True, # type:bool no_check=False, # type: bool use_dist=False, # type:bool ): # type: (...) ->ndarray """Unnormalize a normalized vector of the parameter space. If `use_dist` is True, use the inverse cumulative probability distributions of the random variables to unscale the components of the random variables. Otherwise, use the approach defined in :meth:`.DesignSpace.unnormalize_vect` with `minus_lb` and `no_check`. For the components of the deterministic variables, use the approach defined in :meth:`.DesignSpace.unnormalize_vect` with `minus_lb` and `no_check`. Args: x_vect: The values of the design variables. minus_lb: If True, remove the lower bounds at normalization. no_check: If True, do not check that the values are in [0,1]. use_dist: If True, unnormalize the components of the random variables with their inverse cumulative probability distributions. Returns: The unnormalized vector. """ if not use_dist: return super(ParameterSpace, self).unnormalize_vect(x_vect) data_names = self.variables_names data_sizes = self.variables_sizes dict_sample = DataConversion.array_to_dict(x_vect, data_names, data_sizes) x_u_geom = super(ParameterSpace, self).unnormalize_vect(x_vect) x_u = self.evaluate_cdf(dict_sample, inverse=True) x_u_geom = DataConversion.array_to_dict(x_u_geom, data_names, data_sizes) missing_names = list(set(data_names) - set(x_u.keys())) for name in missing_names: x_u[name] = x_u_geom[name] x_u = DataConversion.dict_to_array(x_u, data_names) return x_u
[docs] def transform_vect( self, vector # type: ndarray ): # type:(...) -> ndarray return self.normalize_vect(vector, use_dist=True)
[docs] def untransform_vect( self, vector # type: ndarray ): # type:(...) -> ndarray return self.unnormalize_vect(vector, use_dist=True)
[docs] def normalize_vect( self, x_vect, # type:ndarray minus_lb=True, # type: bool use_dist=False, # type: bool ): # type: (...) ->ndarray """Normalize a vector of the parameter space. If `use_dist` is True, use the cumulative probability distributions of the random variables to scale the components of the random variables between 0 and 1. Otherwise, use the approach defined in :meth:`.DesignSpace.normalize_vect` with `minus_lb`. For the components of the deterministic variables, use the approach defined in :meth:`.DesignSpace.normalize_vect` with `minus_lb`. Args: x_vect: The values of the design variables. minus_lb: If True, remove the lower bounds at normalization. use_dist: If True, normalize the components of the random variables with their cumulative probability distributions. Returns: The normalized vector. """ if not use_dist: return super(ParameterSpace, self).normalize_vect(x_vect) data_names = self.variables_names data_sizes = self.variables_sizes dict_sample = DataConversion.array_to_dict(x_vect, data_names, data_sizes) x_u_geom = super(ParameterSpace, self).normalize_vect(x_vect) x_u = self.evaluate_cdf(dict_sample, inverse=False) x_u_geom = DataConversion.array_to_dict(x_u_geom, data_names, data_sizes) missing_names = list(set(data_names) - set(x_u.keys())) for name in missing_names: x_u[name] = x_u_geom[name] x_u = DataConversion.dict_to_array(x_u, data_names) return x_u
@property def deterministic_variables(self): # type: (...) -> List[str] """The deterministic variables.""" return [ variable for variable in self.variables_names if variable not in self.uncertain_variables ]
[docs] def extract_uncertain_space( self, as_design_space=False, # type: bool ): # type: (...) -> Union[DesignSpace,ParameterSpace] """Define a new :class:`.DesignSpace` from the uncertain variables only. Args: as_design_space: If False, return a :class:`.ParameterSpace` containing the original uncertain variables as is; otherwise, return a :class:`.DesignSpace` where the original uncertain variables are made deterministic. In that case, the bounds of a deterministic variable correspond to the limits of the support of the original probability distribution and the current value correspond to its mean. Return: A :class:`.ParameterSpace` defined by the uncertain variables only. """ uncertain_space = deepcopy(self).filter(self.uncertain_variables) if as_design_space: return uncertain_space.to_design_space() return uncertain_space
[docs] def extract_deterministic_space(self): # type: (...) -> DesignSpace """Define a new :class:`.DesignSpace` from the deterministic variables only. Return: A :class:`.DesignSpace` defined by the deterministic variables only. """ deterministic_space = DesignSpace() for name in self.deterministic_variables: deterministic_space.add_variable( name, self.get_size(name), self.get_type(name) ) value = self._current_x.get(name) if value is not None: deterministic_space.set_current_variable(name, value) deterministic_space.set_lower_bound(name, self.get_lower_bound(name)) deterministic_space.set_upper_bound(name, self.get_upper_bound(name)) return deterministic_space
[docs] @staticmethod def init_from_dataset( dataset, # type: Dataset groups=None, # type: Optional[Iterable[str]] uncertain=None, # type: Optional[Mapping[str,bool]] copula=ComposedDistribution._INDEPENDENT_COPULA, # type: str ): # type: (...) -> ParameterSpace """Initialize the parameter space from a dataset. Args: dataset: The dataset used for the initialization. groups: The groups of the dataset to be considered. If None, consider all the groups. uncertain: Whether the variables should be uncertain or not. copula: A name of copula defining the dependency between random variables. """ parameter_space = ParameterSpace(copula=copula) if uncertain is None: uncertain = {} if groups is None: groups = dataset.groups for group in groups: for name in dataset.get_names(group): data = dataset.get_data_by_names(name)[name] l_b = data.min(0) u_b = data.max(0) value = (l_b + u_b) / 2 size = len(l_b) if uncertain.get(name, False): for idx in range(size): parameter_space.add_random_variable( "{}_{}".format(name, idx), "OTUniformDistribution", 1, minimum=float(l_b[idx]), maximum=float(u_b[idx]), ) else: parameter_space.add_variable(name, size, "float", l_b, u_b, value) return parameter_space
[docs] def to_design_space(self): # type: (...) -> DesignSpace """Convert the parameter space into a :class:`.DesignSpace`. The original deterministic variables are kept as is while the original uncertain variables are made deterministic. In that case, the bounds of a deterministic variable correspond to the limits of the support of the original probability distribution and the current value correspond to its mean. Return: A :class:`.DesignSpace` where all original variables are made deterministic. """ design_space = self.extract_deterministic_space() for name in self.uncertain_variables: design_space.add_variable( name, size=self.get_size(name), var_type=self.get_type(name), l_b=self.get_lower_bound(name), u_b=self.get_upper_bound(name), value=self.get_current_x([name]), ) return design_space
def __getitem__( self, name, # type: str ): # type: (...) -> Union[DesignVariable, RandomVariable] if name not in self.variables_names: raise KeyError("Variable '{}' is not known.".format(name)) if self.is_uncertain(name): return RandomVariable( distribution=self.__distributions_definitions[name][0], size=self.get_size(name), parameters=self.__distributions_definitions[name][1], ) else: try: value = self.get_current_x([name]) except KeyError: value = None return DesignVariable( size=self.get_size(name), var_type=self.get_type(name), l_b=self.get_lower_bound(name), u_b=self.get_upper_bound(name), value=value, ) def __setitem__( self, name, # type: str item, # type: Union[DesignVariable, RandomVariable] ): # type: (...) -> None if isinstance(item, RandomVariable): self.add_random_variable( name, item.distribution, size=item.size, **item.parameters ) else: self.add_variable( name, size=item.size, var_type=item.var_type, l_b=item.l_b, u_b=item.u_b, value=item.value, )