gemseo.uncertainty.statistics.ot_parametric_statistics module#

OpenTURNS-based parametric estimation of statistics from a dataset.

class OTParametricStatistics(dataset, distributions, variable_names=(), fitting_criterion=None, level=0.05, selection_criterion=SelectionCriterion.BEST, name='')[source]#

Bases: BaseParametricStatistics[OTDistribution, BIC, DistributionName, FittingCriterion, SignificanceTest]

A toolbox to compute statistics using OpenTURNS probability distribution-fitting.

Examples

>>> from gemseo import (
...     create_discipline,
...     create_parameter_space,
...     sample_disciplines,
... )
>>> from gemseo.uncertainty.statistics.parametric_statistics import (
...     OTParametricStatistics,
... )
>>>
>>> discipline = create_discipline(
...     "AnalyticDiscipline", {"y1": "x1+2*x2", "y2": "x1-3*x2"}
... )
>>> parameter_space = create_parameter_space()
>>> parameter_space.add_random_variable(
...     "x1", "OTUniformDistribution", minimum=-1, maximum=1
... )
>>> parameter_space.add_random_variable(
...     "x2", "OTNormalDistribution", mu=0.5, sigma=2
... )
>>>
>>> dataset = sample_disciplines(
...     [discipline],
...     parameter_space,
...     ["y1"],
...     algo_name="OT_MONTE_CARLO",
...     n_samples=100,
... )
>>>
>>> statistics = OTParametricStatistics(
...     dataset, ["Normal", "Uniform", "Triangular"]
... )
>>> fitting_matrix = statistics.get_fitting_matrix()
>>> mean = statistics.compute_mean()
Parameters:
  • dataset (Dataset) -- A dataset.

  • distributions (Sequence[_DistributionNameT]) -- The names of the probability distributions.

  • variable_names (Iterable[str]) --

    The names of the variables for which to compute statistics. If empty, consider all the variables of the dataset.

    By default it is set to ().

  • fitting_criterion (_FittingCriterionT | None) -- The name of the fitting criterion to measure the goodness-of-fit of the probability distributions. If empty, use the default one. Use get_criteria() to get the available criteria.

  • level (float) --

    A test level, i.e. the risk of committing a Type 1 error, that is an incorrect rejection of a true null hypothesis, for criteria based on test hypothesis.

    By default it is set to 0.05.

  • selection_criterion (SelectionCriterion) --

    The name of the criterion to select a distribution among distributions.

    By default it is set to "best".

  • name (str) --

    A name for the toolbox computing statistics. If empty, concatenate the names of the dataset and the name of the class.

    By default it is set to "".

class DistributionName(*values)#

Bases: StrEnum

Arcsine = 'Arcsine'#
Beta = 'Beta'#
Burr = 'Burr'#
Chi = 'Chi'#
ChiSquare = 'ChiSquare'#
Dirichlet = 'Dirichlet'#
Exponential = 'Exponential'#
FisherSnedecor = 'FisherSnedecor'#
Frechet = 'Frechet'#
Gamma = 'Gamma'#
GeneralizedPareto = 'GeneralizedPareto'#
Gumbel = 'Gumbel'#
Histogram = 'Histogram'#
InverseNormal = 'InverseNormal'#
Laplace = 'Laplace'#
LogNormal = 'LogNormal'#
LogUniform = 'LogUniform'#
Logistic = 'Logistic'#
MeixnerDistribution = 'MeixnerDistribution'#
Normal = 'Normal'#
Pareto = 'Pareto'#
Rayleigh = 'Rayleigh'#
Rice = 'Rice'#
Student = 'Student'#
Trapezoidal = 'Trapezoidal'#
Triangular = 'Triangular'#
TruncatedNormal = 'TruncatedNormal'#
Uniform = 'Uniform'#
VonMises = 'VonMises'#
WeibullMax = 'WeibullMax'#
WeibullMin = 'WeibullMin'#
class FittingCriterion(*values)#

Bases: StrEnum

BIC = 'BIC'#
ChiSquared = 'ChiSquared'#
Kolmogorov = 'Kolmogorov'#
class SignificanceTest(*values)#

Bases: StrEnum

ChiSquared = 'ChiSquared'#
Kolmogorov = 'Kolmogorov'#