Source code for gemseo.mlearning.qual_measure.r2_measure

# -*- coding: utf-8 -*-
# Copyright 2021 IRT Saint Exupéry, https://www.irt-saintexupery.com
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License version 3 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

# Contributors:
#    INITIAL AUTHORS - initial API and implementation and/or initial
#                         documentation
#        :author: Syver Doving Agdestein
#    OTHER AUTHORS   - MACROSCOPIC CHANGES
"""The R2 to measure the quality of a regression algorithm.

The :mod:`~gemseo.mlearning.qual_measure.r2_measure` module
implements the concept of R2 measures for machine learning algorithms.

This concept is implemented through the :class:`.R2Measure` class
and overloads the :meth:`!MLErrorMeasure._compute_measure` method.

The R2 is defined by

.. math::

    R_2(\\hat{y}) = 1 - \\frac{\\sum_i (\\hat{y}_i - y_i)^2}
                              {\\sum_i (y_i-\\bar{y})^2},

where
:math:`\\hat{y}` are the predictions,
:math:`y` are the data points and
:math:`\\bar{y}` is the mean of :math:`y`.
"""
from __future__ import division, unicode_literals

from copy import deepcopy
from typing import List, NoReturn, Optional, Union

from numpy import atleast_2d
from numpy import delete as npdelete
from numpy import mean, ndarray, repeat
from sklearn.metrics import mean_squared_error, r2_score

from gemseo.mlearning.qual_measure.error_measure import MLErrorMeasure
from gemseo.mlearning.regression.regression import MLRegressionAlgo


[docs]class R2Measure(MLErrorMeasure): """The R2 measure for machine learning.""" SMALLER_IS_BETTER = False def __init__( self, algo, # type: MLRegressionAlgo ): # type: (...) -> None """ Args: algo: A machine learning algorithm for regression. """ super(R2Measure, self).__init__(algo) def _compute_measure( self, outputs, # type: ndarray predictions, # type: ndarray multioutput=True, # type: bool ): # type: (...) -> Union[float,ndarray] multioutput = "raw_values" if multioutput else "uniform_average" return r2_score(outputs, predictions, multioutput=multioutput)
[docs] def evaluate_kfolds( self, n_folds=5, # type: int samples=None, # type: Optional[List[int]] multioutput=True, # type: bool randomize=False, # type:bool ): # type: (...) -> Union[float,ndarray] folds, samples = self._compute_folds(samples, n_folds, randomize) in_grp = self.algo.learning_set.INPUT_GROUP out_grp = self.algo.learning_set.OUTPUT_GROUP inputs = self.algo.learning_set.get_data_by_group(in_grp) outputs = self.algo.learning_set.get_data_by_group(out_grp) multiout = "raw_values" if multioutput else "uniform_average" algo = deepcopy(self.algo) num = 0 ymean = mean(outputs, axis=0) ymean = atleast_2d(ymean) ymean = repeat(ymean, outputs.shape[0], axis=0) den = mean_squared_error(outputs, ymean, multioutput=multiout) * len(ymean) for n_fold in range(n_folds): fold = folds[n_fold] train = npdelete(samples, fold) algo.learn(samples=train) expected = outputs[fold] predicted = algo.predict(inputs[fold]) tmp = mean_squared_error(expected, predicted, multioutput=multiout) num += tmp * len(fold) quality = 1 - num / den return quality
[docs] def evaluate_bootstrap( self, n_replicates=100, # type: int samples=None, # type: Optional[List[int]] multioutput=True, # type: bool ): # type: (...) -> NoReturn raise NotImplementedError