gemseo.uncertainty.statistics.sp_parametric_statistics module#

SciPy-based parametric estimation of statistics from a dataset.

class SPParametricStatistics(dataset, distributions, variable_names=(), fitting_criterion=None, level=0.05, selection_criterion=SelectionCriterion.BEST, name='')[source]#

Bases: BaseParametricStatistics[SPDistribution, AndersonDarling, DistributionName, FittingCriterion, FittingCriterion]

A toolbox to compute statistics using SciPy probability distribution-fitting.

Examples

>>> from gemseo import (
...     create_discipline,
...     create_parameter_space,
...     sample_disciplines,
... )
>>> from gemseo.uncertainty.statistics.sp_parametric_statistics import (
...     SPParametricStatistics,
... )
>>>
>>> discipline = create_discipline(
...     "AnalyticDiscipline", {"y1": "x1+2*x2", "y2": "x1-3*x2"}
... )
>>>
>>> parameter_space = create_parameter_space()
>>> parameter_space.add_random_variable(
...     "x1", "SPUniformDistribution", minimum=-1, maximum=1
... )
>>> parameter_space.add_random_variable(
...     "x2", "SPNormalDistribution", mu=0.5, sigma=2
... )
>>>
>>> dataset = sample_disciplines(
...     [discipline],
...     parameter_space,
...     ["y1"],
...     algo_name="OT_MONTE_CARLO",
...     n_samples=100,
... )
>>>
>>> statistics = SPParametricStatistics(dataset, ["norm", "uniform", "triang"])
>>> fitting_matrix = statistics.get_fitting_matrix()
>>> mean = statistics.compute_mean()
Parameters:
  • dataset (Dataset) -- A dataset.

  • distributions (Sequence[_DistributionNameT]) -- The names of the probability distributions.

  • variable_names (Iterable[str]) --

    The names of the variables for which to compute statistics. If empty, consider all the variables of the dataset.

    By default it is set to ().

  • fitting_criterion (_FittingCriterionT | None) -- The name of the fitting criterion to measure the goodness-of-fit of the probability distributions. If empty, use the default one. Use get_criteria() to get the available criteria.

  • level (float) --

    A test level, i.e. the risk of committing a Type 1 error, that is an incorrect rejection of a true null hypothesis, for criteria based on test hypothesis.

    By default it is set to 0.05.

  • selection_criterion (SelectionCriterion) --

    The name of the criterion to select a distribution among distributions.

    By default it is set to "best".

  • name (str) --

    A name for the toolbox computing statistics. If empty, concatenate the names of the dataset and the name of the class.

    By default it is set to "".

class DistributionName(*values)#

Bases: StrEnum

alpha = 'alpha'#
anglit = 'anglit'#
arcsine = 'arcsine'#
argus = 'argus'#
beta = 'beta'#
betaprime = 'betaprime'#
bradford = 'bradford'#
burr = 'burr'#
burr12 = 'burr12'#
cauchy = 'cauchy'#
chi = 'chi'#
chi2 = 'chi2'#
cosine = 'cosine'#
crystalball = 'crystalball'#
dgamma = 'dgamma'#
dpareto_lognorm = 'dpareto_lognorm'#
dweibull = 'dweibull'#
expon = 'expon'#
exponnorm = 'exponnorm'#
exponpow = 'exponpow'#
exponweib = 'exponweib'#
f = 'f'#
fatiguelife = 'fatiguelife'#
foldcauchy = 'foldcauchy'#
foldnorm = 'foldnorm'#
gamma = 'gamma'#
gausshyper = 'gausshyper'#
genexpon = 'genexpon'#
genextreme = 'genextreme'#
gengamma = 'gengamma'#
genhalflogistic = 'genhalflogistic'#
genhyperbolic = 'genhyperbolic'#
geninvgauss = 'geninvgauss'#
genlogistic = 'genlogistic'#
gennorm = 'gennorm'#
genpareto = 'genpareto'#
gibrat = 'gibrat'#
gompertz = 'gompertz'#
gumbel_l = 'gumbel_l'#
gumbel_r = 'gumbel_r'#
halfcauchy = 'halfcauchy'#
halfgennorm = 'halfgennorm'#
halflogistic = 'halflogistic'#
halfnorm = 'halfnorm'#
hypsecant = 'hypsecant'#
invgamma = 'invgamma'#
invgauss = 'invgauss'#
invweibull = 'invweibull'#
irwinhall = 'irwinhall'#
jf_skew_t = 'jf_skew_t'#
johnsonsb = 'johnsonsb'#
johnsonsu = 'johnsonsu'#
kappa3 = 'kappa3'#
kappa4 = 'kappa4'#
ksone = 'ksone'#
kstwo = 'kstwo'#
kstwobign = 'kstwobign'#
landau = 'landau'#
laplace = 'laplace'#
laplace_asymmetric = 'laplace_asymmetric'#
levy = 'levy'#
levy_l = 'levy_l'#
levy_stable = 'levy_stable'#
loggamma = 'loggamma'#
logistic = 'logistic'#
loglaplace = 'loglaplace'#
lognorm = 'lognorm'#
lomax = 'lomax'#
maxwell = 'maxwell'#
mielke = 'mielke'#
moyal = 'moyal'#
nakagami = 'nakagami'#
ncf = 'ncf'#
nct = 'nct'#
ncx2 = 'ncx2'#
norm = 'norm'#
norminvgauss = 'norminvgauss'#
pareto = 'pareto'#
pearson3 = 'pearson3'#
powerlaw = 'powerlaw'#
powerlognorm = 'powerlognorm'#
powernorm = 'powernorm'#
rayleigh = 'rayleigh'#
rdist = 'rdist'#
recipinvgauss = 'recipinvgauss'#
reciprocal = 'reciprocal'#
rel_breitwigner = 'rel_breitwigner'#
rice = 'rice'#
rv_histogram = 'rv_histogram'#
semicircular = 'semicircular'#
skewcauchy = 'skewcauchy'#
skewnorm = 'skewnorm'#
studentized_range = 'studentized_range'#
t = 't'#
trapezoid = 'trapezoid'#
triang = 'triang'#
truncexpon = 'truncexpon'#
truncnorm = 'truncnorm'#
truncpareto = 'truncpareto'#
truncweibull_min = 'truncweibull_min'#
tukeylambda = 'tukeylambda'#
uniform = 'uniform'#
vonmises = 'vonmises'#
weibull_max = 'weibull_max'#
weibull_min = 'weibull_min'#
wrapcauchy = 'wrapcauchy'#
class FittingCriterion(*values)#

Bases: StrEnum

ANDERSON_DARLING = 'AndersonDarling'#
CRAMER_VON_MISES = 'CramerVonMises'#
FILLIBEN = 'Filliben'#
KOLMOGOROV_SMIRNOV = 'KolmogorovSmirnov'#
SignificanceTest#

alias of FittingCriterion