Source code for gemseo.uncertainty.sensitivity.sobol.analysis

# -*- coding: utf-8 -*-
# Copyright 2021 IRT Saint Exupéry, https://www.irt-saintexupery.com
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License version 3 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

# Contributors:
#    INITIAL AUTHORS - initial API and implementation and/or initial
#                           documentation
#        :author: Matthias De Lozzo
#    OTHER AUTHORS   - MACROSCOPIC CHANGES

r"""Class for the estimation of Sobol' indices.

Let us consider the model :math:`Y=f(X_1,\ldots,X_d)`
where:

- :math:`X_1,\ldots,X_d` are independent random variables,
- :math:`E\left[f(X_1,\ldots,X_d)^2\right]<\infty`.

Then, the following decomposition is unique:

.. math::

   Y=f_0 + \sum_{i=1}^df_i(X_i) + \sum_{i,j=1\atop i\neq j}^d f_{i,j}(X_i,X_j)
   + \sum_{i,j,k=1\atop i\neq j\neq k}^d f_{i,j,k}(X_i,X_j,X_k) + \ldots +
   f_{1,\ldots,d}(X_1,\ldots,X_d)

where:

- :math:`f_0=E[Y]`,
- :math:`f_i(X_i)=E[Y|X_i]-f_0`,
- :math:`f_{i,j}(X_i,X_j)=E[Y|X_i,X_j]-f_i(X_i)-f_j(X_j)-f_0`
- and so on.

Then, the shift to variance leads to:

.. math::

   V[Y]=\sum_{i=1}^dV\left[f_i(X_i)\right] +
   \sum_{i,j=1\atop j\neq i}^d V\left[f_{i,j}(X_i,X_j)\right] + \ldots +
   V\left[f_{1,\ldots,d}(X_1,\ldots,X_d)\right]

and the Sobol' indices are obtained by dividing by the variance and sum up to 1:

.. math::

   1=\sum_{i=1}^dS_i + \sum_{i,j=1\atop j\neq i}^d S_{i,j} +
   \sum_{i,j,k=1\atop i\neq j\neq k}^d S_{i,j,k} + \ldots + S_{1,\ldots,d}

A Sobol' index represents the share of output variance explained
by a parameter or a group of parameters. For the parameter :math:`X_i`,

- :math:`S_i` is the first-order Sobol' index
  measuring the individual effect of :math:`X_i`,
- :math:`S_{i,j}` is the second-order Sobol' index
  measuring the joint effect between :math:`X_i` and :math:`X_j`,
- :math:`S_{i,j,k}` is the third-order Sobol' index
  measuring the joint effect between :math:`X_i`, :math:`X_j` and :math:`X_k`,
- and so on.

In practice, we only consider the first-order Sobol' index:

.. math::

   S_i=\frac{V[E[Y|X_i]]}{V[Y]}

and the total-order Sobol' index:

.. math::

   S_i^T=\sum_{u\subset\{1,\ldots,d\}\atop u \ni i}S_u

The latter represents the sum of the individual effect of :math:`X_i` and
the joint effects between :math:`X_i` and any parameter or group of parameters.

This methodology relies on the :class:`.SobolAnalysis` class. Precisely,
:attr:`.SobolAnalysis.indices` contains
both :attr:`.SobolAnalysis.first_order_indices` and
:attr:`.SobolAnalysis.total_order_indices`
while :attr:`.SobolAnalysis.main_indices` represents total-order Sobol'
indices.
Lastly, the :meth:`.SobolAnalysis.plot` method represents
the estimations of both first-order and total-order Sobol' indices along with
their 95% confidence interval.

The user can select the algorithm to estimate the Sobol' indices.
The computation relies on
`OpenTURNS capabilities <http://www.openturns.org/>`_.
"""

from __future__ import division, unicode_literals

import logging
from typing import Dict, Iterable, Mapping, Optional, Sequence, Tuple, Union

import matplotlib.pyplot as plt
from matplotlib.transforms import Affine2D
from numpy import array
from openturns import (
    JansenSensitivityAlgorithm,
    MartinezSensitivityAlgorithm,
    MauntzKucherenkoSensitivityAlgorithm,
    SaltelliSensitivityAlgorithm,
    Sample,
)
from past.utils import old_div

from gemseo.algos.doe.doe_lib import DOELibraryOptionType
from gemseo.algos.doe.lib_openturns import OpenTURNS
from gemseo.algos.parameter_space import ParameterSpace
from gemseo.core.discipline import MDODiscipline
from gemseo.uncertainty.sensitivity.analysis import IndicesType, SensitivityAnalysis
from gemseo.utils.data_conversion import DataConversion
from gemseo.utils.py23_compat import Path

LOGGER = logging.getLogger(__name__)


[docs]class SobolAnalysis(SensitivityAnalysis):
    """Sensitivity analysis based on the Sobol' indices.

    Examples:
        >>> from numpy import pi
        >>> from gemseo.api import create_discipline, create_parameter_space
        >>> from gemseo.uncertainty.sensitivity.sobol.analysis import SobolAnalysis
        >>>
        >>> expressions = {"y": "sin(x1)+7*sin(x2)**2+0.1*x3**4*sin(x1)"}
        >>> discipline = create_discipline(
        ...     "AnalyticDiscipline", expressions_dict=expressions
        ... )
        >>>
        >>> parameter_space = create_parameter_space()
        >>> parameter_space.add_random_variable(
        ...     "x1", "OTUniformDistribution", minimum=-pi, maximum=pi
        ... )
        >>> parameter_space.add_random_variable(
        ...     "x2", "OTUniformDistribution", minimum=-pi, maximum=pi
        ... )
        >>> parameter_space.add_random_variable(
        ...     "x3", "OTUniformDistribution", minimum=-pi, maximum=pi
        ... )
        >>>
        >>> analysis = SobolAnalysis(discipline, parameter_space, n_samples=10000)
        >>> indices = analysis.compute_indices()
    """

    _ALGOS = {
        "Saltelli": SaltelliSensitivityAlgorithm,
        "Jansen": JansenSensitivityAlgorithm,
        "MauntzKucherenko": MauntzKucherenkoSensitivityAlgorithm,
        "Martinez": MartinezSensitivityAlgorithm,
    }
    _FIRST = "first"
    _TOTAL = "total"
    _FIRST_METHOD = "Sobol({})".format(_FIRST)
    _TOTAL_METHOD = "Sobol({})".format(_TOTAL)

    AVAILABLE_ALGOS = sorted(_ALGOS.keys())
    DEFAULT_DRIVER = OpenTURNS.OT_SOBOL_INDICES

    def __init__(
        self,
        discipline,  # type: MDODiscipline
        parameter_space,  # type: ParameterSpace
        n_samples,  # type: int
        algo=None,  # type:Optional[str]
        algo_options=None,  # type: Optional[Mapping[str,DOELibraryOptionType]]
    ):  # type: (...) -> None  # noqa: D107,D205,D212,D415
        self.__sobol = None
        super(SobolAnalysis, self).__init__(discipline, parameter_space, n_samples)
        self.main_method = self._FIRST

    @SensitivityAnalysis.main_method.setter
    def main_method(
        self,
        name,  # type: str
    ):  # type:(...) -> None # noqa: D102
        if name == self._FIRST:
            self._main_method = self._FIRST_METHOD
            LOGGER.info("Use first order indices as main indices.")
        elif name == self._TOTAL:
            self._main_method = self._TOTAL_METHOD
            LOGGER.info("Use total order indices as main indices.")
        else:
            raise NotImplementedError(
                "{} is a bad method name. "
                "Available ones are {}.".format(name, [self._FIRST, self._TOTAL])
            )

[docs]    def compute_indices(
        self,
        outputs=None,  # type: Optional[Sequence[str]]
        algo="Saltelli",  # type: str
    ):  # type: (...) -> Dict[str,IndicesType]
        # noqa:D205,D212,D415
        """
        Args:
            algo: The name of the algorithm to estimate the Sobol' indices
        """
        try:
            algo = self._ALGOS[algo]
        except Exception:
            raise TypeError(
                "{} is not an available algorithm "
                "to compute Sobol indices.".format(algo)
            )
        output_names = outputs or self.default_output
        if not isinstance(output_names, list):
            output_names = [output_names]
        inputs = Sample(self.dataset.get_data_by_group(self.dataset.INPUT_GROUP))
        outputs = self.dataset.get_data_by_names(output_names, True)
        dim = self.dataset.dimension[self.dataset.INPUT_GROUP]
        n_samples = int(old_div(len(self.dataset), (dim + 2)))
        self.__sobol = {}
        for output_name, value in outputs.items():
            self.__sobol[output_name] = []
            for index in range(value.shape[1]):
                sub_outputs = Sample(value[:, index][:, None])
                self.__sobol[output_name].append(algo(inputs, sub_outputs, n_samples))
        return self.indices

    def __get_indices(
        self,
        first_order=True,  # type: bool
    ):  # type: (...) -> IndicesType
        """Get the indices, either first-order or total order.

        Args:
            first_order: The type of indices.
                If True, return first-order Sobol' indices.
                Otherwise, return total-order Sobol' indices.

        Returns:
            The Sobol' indices, either first-order or total order.
        """
        if first_order:
            method = "getFirstOrderIndices"
        else:
            method = "getTotalOrderIndices"
        array_to_dict = DataConversion.array_to_dict
        inputs_names = self.dataset.get_names(self.dataset.INPUT_GROUP)
        sizes = self.dataset.sizes
        indices = {}
        for name in self.__sobol:
            indices[name] = [
                array_to_dict(array(getattr(sobol, method)()), inputs_names, sizes)
                for sobol in self.__sobol[name]
            ]
        return indices

    @property
    def first_order_indices(self):  # type: (...) -> IndicesType
        """dict: The first-order Sobol' indices.

        With the following structure:

        .. code-block:: python

            {
                "output_name": [
                    {
                        "input_name": data_array,
                    }
                ]
            }
        """
        return self.__get_indices()

    @property
    def total_order_indices(self):  # type: (...) -> IndicesType
        """dict: The total-order Sobol' indices.

        With the following structure:

        .. code-block:: python

            {
                "output_name": [
                    {
                        "input_name": data_array,
                    }
                ]
            }
        """
        return self.__get_indices(False)

[docs]    def get_intervals(
        self,
        first_order=True,  # type: bool
    ):  # type: (...) -> IndicesType
        """Get the confidence interval for Sobol' indices.

        Args:
            first_order: The type of indices.
                If True, returns the intervals for the first-order indices.
                Otherwise, for the total-order indices.

        Returns:
            dict: The confidence intervals for the Sobol' indices.

            With the following structure:

            .. code-block:: python

                {
                    "output_name": [
                        {
                            "input_name": data_array,
                        }
                    ]
                }
        """
        array_to_dict = DataConversion.array_to_dict
        inputs_names = self.dataset.get_names(self.dataset.INPUT_GROUP)
        sizes = self.dataset.sizes
        intervals = {}
        for output_name in self.__sobol:
            intervals[output_name] = []
            for sobol in self.__sobol[output_name]:
                if first_order:
                    interval = sobol.getFirstOrderIndicesInterval()
                else:
                    interval = sobol.getTotalOrderIndicesInterval()
                lower_bound = array(interval.getLowerBound())
                upper_bound = array(interval.getUpperBound())
                lower_bound = array_to_dict(lower_bound, inputs_names, sizes)
                upper_bound = array_to_dict(upper_bound, inputs_names, sizes)
                intervals[output_name].append(
                    {
                        name: array([lower_bound[name][0], upper_bound[name][0]])
                        for name in lower_bound
                    }
                )
        return intervals

    @property
    def indices(
        self,
    ):  # type: (...) -> Dict[str, IndicesType] # noqa: D102
        return {"first": self.first_order_indices, "total": self.total_order_indices}

    @property
    def main_indices(self):  # type: (...) -> IndicesType # noqa: D102
        if self.main_method == self._TOTAL_METHOD:
            return self.total_order_indices
        else:
            return self.first_order_indices

[docs]    def plot(
        self,
        output,  # type: Union[str,Tuple[str,int]]
        inputs=None,  # type: Optional[Iterable[str]]
        title=None,  # type: Optional[str]
        save=True,  # type: bool
        show=False,  # type: bool
        file_path=None,  # type: Optional[Union[str,Path]]
        directory_path=None,  # type: Optional[Union[str,Path]]
        file_name=None,  # type: Optional[str]
        file_format=None,  # type: Optional[str]
        sort=True,  # type:bool
        sort_by_total=True,  # type:bool
    ):  # noqa: D417
        r"""Plot the first- and total-order Sobol' indices.

        For :math:`i\in\{1,\ldots,d\}`, plot :math:`S_i^{1}` and :math:`S_T^{1}`
        with their confidence intervals,

        Args:
            sort: The sorting option.
                If True, sort variables before display.
            sort_by_total: The type of sorting.
                If True, sort variables according to total-order Sobol' indices.
                Otherwise, use first-order Sobol' indices.
        """
        if not isinstance(output, tuple):
            output = (output, 0)
        fig, ax = plt.subplots()
        if sort_by_total:
            indices = self.total_order_indices
        else:
            indices = self.first_order_indices
        intervals = self.get_intervals()
        indices = indices[output[0]][output[1]]
        intervals = intervals[output[0]][output[1]]
        first_order_indices = self.first_order_indices[output[0]][output[1]]
        total_order_indices = self.total_order_indices[output[0]][output[1]]
        if sort:
            names = [
                name
                for name, _ in sorted(
                    list(indices.items()), key=lambda item: item[1], reverse=True
                )
            ]
        else:
            names = list(indices.keys())
        names = self._filter_names(names, inputs)
        errorbar_options = {"marker": "o", "linestyle": "", "markersize": 7}
        trans1 = Affine2D().translate(-0.01, 0.0) + ax.transData
        trans2 = Affine2D().translate(+0.01, 0.0) + ax.transData
        values = [first_order_indices[name] for name in names]
        yerr = array(
            [
                [
                    first_order_indices[name][0] - intervals[name][0],
                    intervals[name][1] - first_order_indices[name][0],
                ]
                for name in names
            ]
        ).T
        ax.errorbar(
            names,
            values,
            yerr,
            label="First order",
            transform=trans2,
            **errorbar_options
        )
        intervals = self.get_intervals(False)
        intervals = intervals[output[0]][output[1]]
        values = [total_order_indices[name] for name in names]
        yerr = array(
            [
                [
                    total_order_indices[name][0] - intervals[name][0],
                    intervals[name][1] - total_order_indices[name][0],
                ]
                for name in names
            ]
        ).T
        ax.errorbar(
            names,
            values,
            yerr,
            label="Total order",
            transform=trans1,
            **errorbar_options
        )
        ax.legend(
            loc="lower left",
            bbox_to_anchor=(0.0, 1.01),
            ncol=2,
            borderaxespad=0,
            frameon=False,
        )
        output = "{}({})".format(output[0], output[1])
        ax.set_title(title or "Sobol indices for the output {}".format(output))
        self._save_show_plot(
            fig,
            save=save,
            show=show,
            file_path=file_path,
            file_name=file_name,
            file_format=file_format,
            directory_path=directory_path,
        )