Source code for gemseo.uncertainty.statistics.tolerance_interval.empirical

# Copyright 2021 IRT Saint Exupéry, https://www.irt-saintexupery.com
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License version 3 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
# Contributors:
#    INITIAL AUTHORS - initial API and implementation and/or initial
#                           documentation
#        :author: Clément Laboulfie
#    OTHER AUTHORS   - MACROSCOPIC CHANGES
"""Computation of tolerance intervals empirically."""

from __future__ import annotations

from typing import TYPE_CHECKING

from numpy import argmax
from numpy import argmin
from numpy import linspace
from numpy import sort
from scipy.stats import binom

from gemseo.uncertainty.statistics.tolerance_interval.distribution import (
    _BaseToleranceInterval,
)

if TYPE_CHECKING:
    from gemseo.typing import RealArray



[docs]
class EmpiricalToleranceInterval(_BaseToleranceInterval):
    """Computation of empirical tolerance intervals."""

    __sorted_samples: RealArray
    """The samples sorted in ascending order."""

    def __init__(self, data: RealArray) -> None:
        """
        Args:
            data: The samples from which the tolerance interval is estimated,
            shaped as ``(n_samples,)``.
        """  # noqa: D205 D212 D415
        self._size = len(data)
        self.__sorted_samples = sort(data)

    def _compute_lower_bound(
        self,
        coverage: float,
        alpha: float,
        size: int,
    ) -> float:
        # Meeker et al., p. 88, 5.3.2
        indices = linspace(0, size, num=size + 1)
        return self.__sorted_samples[
            max(
                argmin((binom.cdf(size - indices, size, coverage) - (1 - alpha)) > 0)
                - 1,
                0,
            )
        ]

    def _compute_upper_bound(
        self,
        coverage: float,
        alpha: float,
        size: int,
    ) -> float:
        # Meeker et al., p. 88, 5.3.2
        indices = linspace(1, size + 1, num=size + 1)
        return self.__sorted_samples[
            min(
                argmax((binom.cdf(indices - 1, size, coverage) - (1 - alpha)) > 0), size
            )
        ]

    def _compute_bounds(
        self,
        coverage: float,
        alpha: float,
        size: int,
    ) -> tuple[float, float]:
        # Meeker et al., p. 87, 5.3.1
        nu = size - binom.ppf(1 - alpha, size, coverage)
        if nu % 2:
            # nu is odd
            nu_1 = nu / 2 - 1 / 2
            nu_2 = nu_1 + 1
        else:
            # nu is even
            nu_1 = nu_2 = nu / 2

        lower = max(int(nu_1), 0)
        upper = min(int(size - nu_2 + 1), size)

        return self.__sorted_samples[lower], self.__sorted_samples[upper]