Source code for gemseo.uncertainty.distributions.scipy.distribution_fitter

# Copyright 2021 IRT Saint Exupéry, https://www.irt-saintexupery.com
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License version 3 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
# Contributors:
#    INITIAL AUTHORS - initial API and implementation and/or initial
#                           documentation
#        :author: Matthias De Lozzo
#    OTHER AUTHORS   - MACROSCOPIC CHANGES
"""Fitting a probability distribution to data using the SciPy library."""

from __future__ import annotations

from typing import TYPE_CHECKING
from typing import Any
from typing import ClassVar

import scipy.stats as scipy_stats
from scipy.stats import goodness_of_fit
from scipy.stats import rv_continuous
from strenum import StrEnum

from gemseo.uncertainty.distributions.base_distribution_fitter import (
    BaseDistributionFitter,
)
from gemseo.uncertainty.distributions.scipy.distribution import SPDistribution

if TYPE_CHECKING:
    from scipy.stats._fit import GoodnessOfFitResult

    from gemseo.typing import StrKeyMapping


[docs] class SPDistributionFitter(BaseDistributionFitter[SPDistribution]): """Fit a probability distribution to data using the SciPy library.""" DistributionName: ClassVar[StrEnum] = StrEnum( "DistributionName", [rv.__name__.rsplit("_gen")[0] for rv in rv_continuous.__subclasses__()], )
[docs] class FittingCriterion(StrEnum): # noqa: D106 ANDERSON_DARLING = "AndersonDarling" CRAMER_VON_MISES = "CramerVonMises" FILLIBEN = "Filliben" KOLMOGOROV_SMIRNOV = "KolmogorovSmirnov"
default_fitting_criterion: ClassVar[FittingCriterion] = ( FittingCriterion.ANDERSON_DARLING ) _CRITERIA_TO_WRAPPED_OBJECTS: ClassVar[dict[FittingCriterion, str]] = { FittingCriterion.ANDERSON_DARLING: "ad", FittingCriterion.CRAMER_VON_MISES: "cvm", FittingCriterion.FILLIBEN: "filliben", FittingCriterion.KOLMOGOROV_SMIRNOV: "ks", } SignificanceTest: ClassVar[FittingCriterion] = FittingCriterion
[docs] def fit( # noqa: D102 self, distribution: DistributionName, ) -> SPDistribution: scipy_distribution = getattr(scipy_stats, f"{distribution}") parameters = scipy_distribution.fit(self._samples) return SPDistribution(distribution, parameters)
def _compute_measure( self, distribution: SPDistribution | DistributionName, criterion: FittingCriterion, level: float, ) -> Any: if isinstance(distribution, SPDistribution): distribution = distribution.distribution.dist.name return goodness_of_fit( getattr(scipy_stats, distribution), self._samples, statistic=self._CRITERIA_TO_WRAPPED_OBJECTS[criterion], random_state=0, ) @staticmethod def _format_significance_test_goodness_of_fit( result: GoodnessOfFitResult, level: float ) -> tuple[bool, StrKeyMapping]: return result.pvalue >= level, { "p-value": result.pvalue, "statistics": result.statistic, "level": level, }