# Correlation analysis¶

import pprint

from matplotlib import pyplot as plt
from numpy import pi

from gemseo.algos.parameter_space import ParameterSpace
from gemseo.api import create_discipline
from gemseo.uncertainty.sensitivity.correlation.analysis import CorrelationAnalysis


In this example, we consider a function from $$[-\pi,\pi]^3$$ to $$\mathbb{R}^3$$:

$(y_1,y_2)=\left(f(x_1,x_2,x_3),f(x_2,x_1,x_3)\right)$

where $$f(a,b,c)=\sin(a)+7\sin(b)^2+0.1*c^4\sin(a)$$ is the Ishigami function:

expressions = {
"y1": "sin(x1)+7*sin(x2)**2+0.1*x3**4*sin(x1)",
"y2": "sin(x2)+7*sin(x1)**2+0.1*x3**4*sin(x2)",
}
discipline = create_discipline(
"AnalyticDiscipline", expressions_dict=expressions, name="Ishigami2"
)


Then, we consider the case where the deterministic variables $$x_1$$, $$x_2$$ and $$x_3$$ are replaced with the uncertain variables $$X_1$$, $$X_2$$ and $$X_3$$. The latter are independent and identically distributed according to an uniform distribution between $$-\pi$$ and $$\pi$$:

space = ParameterSpace()
for variable in ["x1", "x2", "x3"]:
variable, "OTUniformDistribution", minimum=-pi, maximum=pi
)


From that, we would like to carry out a sensitivity analysis with the random outputs $$Y_1=f(X_1,X_2,X_3)$$ and $$Y_2=f(X_2,X_1,X_3)$$. For that, we can compute the correlation coefficients from a CorrelationAnalysis:

correlation = CorrelationAnalysis(discipline, space, 1000)
correlation.compute_indices()


Out:

{'pearson': {'y1': [{'x1': array([0.42024386]), 'x2': array([0.04679518]), 'x3': array([0.00886217])}], 'y2': [{'x1': array([0.01510994]), 'x2': array([0.41388005]), 'x3': array([-0.02055015])}]}, 'spearman': {'y1': [{'x1': array([0.41617398]), 'x2': array([0.03525258]), 'x3': array([0.00116835])}], 'y2': [{'x1': array([0.00271857]), 'x2': array([0.4047482]), 'x3': array([0.00252372])}]}, 'pcc': {'y1': [{'x1': array([0.41925706]), 'x2': array([0.0312552]), 'x3': array([0.01615963])}], 'y2': [{'x1': array([-0.00359038]), 'x2': array([0.41410056]), 'x3': array([-0.02918736])}]}, 'prcc': {'y1': [{'x1': array([0.41530944]), 'x2': array([0.01751477]), 'x3': array([0.0079852])}], 'y2': [{'x1': array([-0.01756057]), 'x2': array([0.40506592]), 'x3': array([-0.00392587])}]}, 'src': {'y1': [{'x1': array([0.17575275]), 'x2': array([0.00080584]), 'x3': array([0.0002149])}], 'y2': [{'x1': array([1.06963174e-05]), 'x2': array([0.17173179]), 'x3': array([0.00070628])}]}, 'srrc': {'y1': [{'x1': array([0.17267986]), 'x2': array([0.00025422]), 'x3': array([5.27275335e-05])}], 'y2': [{'x1': array([0.00025847]), 'x2': array([0.16446521]), 'x3': array([1.28895734e-05])}]}, 'ssrrc': {'y1': [{'x1': array([0.41922876]), 'x2': array([0.02838738]), 'x3': array([0.01465943])}], 'y2': [{'x1': array([-0.00327052]), 'x2': array([0.41440534]), 'x3': array([-0.02657598])}]}}


The resulting indices are the Pearson correlation coefficients, the Spearman correlation coefficients, the Partial Correlation Coefficients (PCC), the Partial Rank Correlation Coefficients (PRCC), the Standard Regression Coefficients (SRC), the Standard Rank Regression Coefficient (SRRC) and the Signed Standard Rank Regression Coefficient (SSRRC):

pprint.pprint(correlation.indices)


Out:

{'pcc': {'y1': [{'x1': array([0.41925706]),
'x2': array([0.0312552]),
'x3': array([0.01615963])}],
'y2': [{'x1': array([-0.00359038]),
'x2': array([0.41410056]),
'x3': array([-0.02918736])}]},
'pearson': {'y1': [{'x1': array([0.42024386]),
'x2': array([0.04679518]),
'x3': array([0.00886217])}],
'y2': [{'x1': array([0.01510994]),
'x2': array([0.41388005]),
'x3': array([-0.02055015])}]},
'prcc': {'y1': [{'x1': array([0.41530944]),
'x2': array([0.01751477]),
'x3': array([0.0079852])}],
'y2': [{'x1': array([-0.01756057]),
'x2': array([0.40506592]),
'x3': array([-0.00392587])}]},
'spearman': {'y1': [{'x1': array([0.41617398]),
'x2': array([0.03525258]),
'x3': array([0.00116835])}],
'y2': [{'x1': array([0.00271857]),
'x2': array([0.4047482]),
'x3': array([0.00252372])}]},
'src': {'y1': [{'x1': array([0.17575275]),
'x2': array([0.00080584]),
'x3': array([0.0002149])}],
'y2': [{'x1': array([1.06963174e-05]),
'x2': array([0.17173179]),
'x3': array([0.00070628])}]},
'srrc': {'y1': [{'x1': array([0.17267986]),
'x2': array([0.00025422]),
'x3': array([5.27275335e-05])}],
'y2': [{'x1': array([0.00025847]),
'x2': array([0.16446521]),
'x3': array([1.28895734e-05])}]},
'ssrrc': {'y1': [{'x1': array([0.41922876]),
'x2': array([0.02838738]),
'x3': array([0.01465943])}],
'y2': [{'x1': array([-0.00327052]),
'x2': array([0.41440534]),
'x3': array([-0.02657598])}]}}


The main indices corresponds to the Spearman correlation indices (this main method can be changed with CorrelationAnalysis.main_method):

pprint.pprint(correlation.main_indices)


Out:

{'y1': [{'x1': array([0.41617398]),
'x2': array([0.03525258]),
'x3': array([0.00116835])}],
'y2': [{'x1': array([0.00271857]),
'x2': array([0.4047482]),
'x3': array([0.00252372])}]}


We can also sort the input parameters by decreasing order of influence and observe that this ranking is not the same for both outputs:

print(correlation.sort_parameters("y1"))
print(correlation.sort_parameters("y2"))


Out:

['x1', 'x2', 'x3']
['x2', 'x1', 'x3']


Lastly, we can use the method CorrelationAnalysis.plot() to visualize the different correlation coefficients:

correlation.plot("y1", save=False, show=False)
correlation.plot("y2", save=False, show=False)
# Workaround for HTML rendering, instead of show=True
plt.show()


Out:

/home/docs/checkouts/readthedocs.org/user_builds/gemseo/conda/3.2.2/lib/python3.8/site-packages/gemseo/post/dataset/dataset_plot.py:383: UserWarning: This figure includes Axes that are not compatible with tight_layout, so results might be incorrect.
sub_figure.tight_layout()


Total running time of the script: ( 0 minutes 2.815 seconds)

Gallery generated by Sphinx-Gallery