Source code for

# -*- coding: utf-8 -*-
# Copyright 2021 IRT Saint Exupéry,
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License version 3 as published by the Free Software Foundation.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# Lesser General Public License for more details.
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

# Contributors:
#    INITIAL AUTHORS - API and implementation and/or documentation
#        :author: Francois Gallard
#        :author: Damien Guenot
Correlations in the optimization database

from __future__ import absolute_import, division, unicode_literals

from os.path import basename, dirname, join, splitext

import matplotlib.gridspec as gridspec
import numpy as np
import pylab
from future import standard_library
from matplotlib import ticker
from numpy import atleast_2d

from import OptPostProcessor


from gemseo import LOGGER

[docs]class Correlations(OptPostProcessor): """ The **Correlations** post processing builds scatter plots of correlated variables among design variables, outputs functions and constraints The plot method considers all variable correlations greater than 95%. An other level value, a sublist of variable names or both can be passed as options. The x- and y- figure sizes can also be modified in option. It is possible either to save the plot, to show the plot or both. """ def _run(self, **options): """Visualizes the optimization history :param options: options for the post processing, see associated JSON file """ functions = self.opt_problem.get_all_functions() func_names = [ for func in functions] self._plot(func_names, **options) def _plot( self, func_names=None, coeff_limit=0.95, n_plots_x=5, n_plots_y=5, save=False, show=False, file_path=None, extension="pdf", ): """ Plots the correlations graph :param coeff_limit: if the correlation between the variables is lower than coeff_limit, the plot is not made :type coeff_limit: bool :param show: if True, displays the plot windows :type show: bool :param save: if True, exports plot to pdf :type save: bool :param file_path: the base paths of the files to export :type file_path: str :param func_names: the func_names on which correlations is computed :type func_names: list(str) :param n_plots_x: number of horizontal plots :type n_plots_x: int :param n_plots_y: number of vertical plots :type n_plots_y: int :param extension: file extension :type extension: str """ n_slide = 0 values_array, variables_names, _ = self.database.get_history_array( func_names, None, True, 0.0 ) corr_coeffs_array = self.__compute_correlations(values_array) i_corr, j_corr = np.where( (np.abs(corr_coeffs_array) > coeff_limit) & (np.abs(corr_coeffs_array) < (1.0 - 1e-9)) )"Detected %s correlations > %s", i_corr.size, coeff_limit) if i_corr.size <= 16: n_plots_x = 4 n_plots_y = 4 figs = [] spec = gridspec.GridSpec(n_plots_y, n_plots_x, wspace=0.3, hspace=0.75) spec.update(top=0.95, bottom=0.06, left=0.08, right=0.95) fig = None fig_indx = 0 if file_path is not None: root = splitext(file_path)[0] root_dir = dirname(root) base_n = basename(root) else: root_dir = "." base_n = "" for plot_index, (i, j) in enumerate(zip(i_corr, j_corr)): plot_index_loc = plot_index % (n_plots_x * n_plots_y) if plot_index_loc == 0: if fig is not None: # Save previous plot fig_indx += 1 base_loc = base_n + "correlations_" + str(fig_indx) fpath = join(root_dir, base_loc) self._save_and_show( fig, file_path=fpath, save=save, show=show, extension=extension ) pylab.plt.close(fig) fig = pylab.plt.figure() figs.append(fig) mng = pylab.plt.get_current_fig_manager() mng.resize(1200, 900) ticker.MaxNLocator(nbins=3) # plt.suptitle('All variables are normalized') self.__create_sub_correlation_plot( i, j, corr_coeffs_array[i, j], fig, spec, plot_index_loc, n_plots_y, n_plots_x, values_array, variables_names, ) if fig is not None: base_loc = base_n + "correlations_" + str(fig_indx + 1) fpath = join(root_dir, base_loc) self._save_and_show( fig, save=save, show=show, file_path=fpath, extension=extension ) pylab.plt.close(fig) return n_slide def __create_sub_correlation_plot( self, i_ind, j_ind, corr_coeff, fig, spec, plot_index, n_plot_v, n_plot_h, values_array, variables_names, ): """Creates a correlation plot""" gs_curr = spec[int(plot_index / n_plot_v), plot_index % n_plot_h] ax1 = fig.add_subplot(gs_curr) x_plt = values_array[:, i_ind] y_plt = values_array[:, j_ind] ax1.scatter(x_plt, y_plt, c="b", s=30) self.out_data_dict[(i_ind, j_ind)] = ( variables_names[i_ind], variables_names[j_ind], corr_coeff, ) ax1.set_xlabel(variables_names[i_ind], fontsize=9) # Update y labels spacing start, stop = ax1.get_ylim() ax1.yaxis.set_ticks(np.arange(start, stop, 0.24999999 * (stop - start))) start, stop = ax1.get_xlim() ax1.xaxis.set_ticks(np.arange(start, stop, 0.24999999 * (stop - start))) ax1.set_ylabel(variables_names[j_ind], fontsize=10) ax1.tick_params(labelsize=10) ax1.set_title("R=%5f" % corr_coeff, fontsize=12) ax1.grid() @classmethod def __compute_correlations(cls, values_array): """Compute correlations""" ccoeff = np.corrcoef(values_array.astype(float), rowvar=False) return np.tril(atleast_2d(ccoeff)) # Keep upper diagonal only