# -*- coding: utf-8 -*-
# Copyright 2021 IRT Saint Exupéry, https://www.irt-saintexupery.com
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License version 3 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
# Contributors:
# INITIAL AUTHORS - API and implementation and/or documentation
# :author: Francois Gallard
# :author: Damien Guenot
# OTHER AUTHORS - MACROSCOPIC CHANGES
"""
Correlations in the optimization database
*****************************************
"""
from __future__ import absolute_import, division, unicode_literals
from os.path import basename, dirname, join, splitext
import matplotlib.gridspec as gridspec
import numpy as np
import pylab
from future import standard_library
from matplotlib import ticker
from numpy import atleast_2d
from gemseo.post.opt_post_processor import OptPostProcessor
standard_library.install_aliases()
from gemseo import LOGGER
[docs]class Correlations(OptPostProcessor):
"""
The **Correlations** post processing
builds scatter plots of correlated variables among design
variables, outputs functions and constraints
The plot method considers all variable correlations
greater than 95%. An other level value, a sublist of variable names
or both can be passed as options. The x- and y- figure sizes
can also be modified in option.
It is possible either to save the plot, to show the plot or both.
"""
def _run(self, **options):
"""Visualizes the optimization history
:param options: options for the post processing,
see associated JSON file
"""
functions = self.opt_problem.get_all_functions()
func_names = [func.name for func in functions]
self._plot(func_names, **options)
def _plot(
self,
func_names=None,
coeff_limit=0.95,
n_plots_x=5,
n_plots_y=5,
save=False,
show=False,
file_path=None,
extension="pdf",
):
"""
Plots the correlations graph
:param coeff_limit: if the correlation between the variables
is lower than coeff_limit, the plot is not made
:type coeff_limit: bool
:param show: if True, displays the plot windows
:type show: bool
:param save: if True, exports plot to pdf
:type save: bool
:param file_path: the base paths of the files to export
:type file_path: str
:param func_names: the func_names on which correlations is computed
:type func_names: list(str)
:param n_plots_x: number of horizontal plots
:type n_plots_x: int
:param n_plots_y: number of vertical plots
:type n_plots_y: int
:param extension: file extension
:type extension: str
"""
n_slide = 0
values_array, variables_names, _ = self.database.get_history_array(
func_names, None, True, 0.0
)
corr_coeffs_array = self.__compute_correlations(values_array)
i_corr, j_corr = np.where(
(np.abs(corr_coeffs_array) > coeff_limit)
& (np.abs(corr_coeffs_array) < (1.0 - 1e-9))
)
LOGGER.info("Detected %s correlations > %s", i_corr.size, coeff_limit)
if i_corr.size <= 16:
n_plots_x = 4
n_plots_y = 4
figs = []
spec = gridspec.GridSpec(n_plots_y, n_plots_x, wspace=0.3, hspace=0.75)
spec.update(top=0.95, bottom=0.06, left=0.08, right=0.95)
fig = None
fig_indx = 0
if file_path is not None:
root = splitext(file_path)[0]
root_dir = dirname(root)
base_n = basename(root)
else:
root_dir = "."
base_n = ""
for plot_index, (i, j) in enumerate(zip(i_corr, j_corr)):
plot_index_loc = plot_index % (n_plots_x * n_plots_y)
if plot_index_loc == 0:
if fig is not None: # Save previous plot
fig_indx += 1
base_loc = base_n + "correlations_" + str(fig_indx)
fpath = join(root_dir, base_loc)
self._save_and_show(
fig, file_path=fpath, save=save, show=show, extension=extension
)
pylab.plt.close(fig)
fig = pylab.plt.figure()
figs.append(fig)
mng = pylab.plt.get_current_fig_manager()
mng.resize(1200, 900)
ticker.MaxNLocator(nbins=3)
# plt.suptitle('All variables are normalized')
self.__create_sub_correlation_plot(
i,
j,
corr_coeffs_array[i, j],
fig,
spec,
plot_index_loc,
n_plots_y,
n_plots_x,
values_array,
variables_names,
)
if fig is not None:
base_loc = base_n + "correlations_" + str(fig_indx + 1)
fpath = join(root_dir, base_loc)
self._save_and_show(
fig, save=save, show=show, file_path=fpath, extension=extension
)
pylab.plt.close(fig)
return n_slide
def __create_sub_correlation_plot(
self,
i_ind,
j_ind,
corr_coeff,
fig,
spec,
plot_index,
n_plot_v,
n_plot_h,
values_array,
variables_names,
):
"""Creates a correlation plot"""
gs_curr = spec[int(plot_index / n_plot_v), plot_index % n_plot_h]
ax1 = fig.add_subplot(gs_curr)
x_plt = values_array[:, i_ind]
y_plt = values_array[:, j_ind]
ax1.scatter(x_plt, y_plt, c="b", s=30)
self.out_data_dict[(i_ind, j_ind)] = (
variables_names[i_ind],
variables_names[j_ind],
corr_coeff,
)
ax1.set_xlabel(variables_names[i_ind], fontsize=9)
# Update y labels spacing
start, stop = ax1.get_ylim()
ax1.yaxis.set_ticks(np.arange(start, stop, 0.24999999 * (stop - start)))
start, stop = ax1.get_xlim()
ax1.xaxis.set_ticks(np.arange(start, stop, 0.24999999 * (stop - start)))
ax1.set_ylabel(variables_names[j_ind], fontsize=10)
ax1.tick_params(labelsize=10)
ax1.set_title("R=%5f" % corr_coeff, fontsize=12)
ax1.grid()
@classmethod
def __compute_correlations(cls, values_array):
"""Compute correlations"""
ccoeff = np.corrcoef(values_array.astype(float), rowvar=False)
return np.tril(atleast_2d(ccoeff)) # Keep upper diagonal only