Source code for gemseo.mlearning.qual_measure.r2_measure

# -*- coding: utf-8 -*-
# Copyright 2021 IRT Saint Exupéry, https://www.irt-saintexupery.com
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License version 3 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

# Contributors:
#    INITIAL AUTHORS - initial API and implementation and/or initial
#                         documentation
#        :author: Syver Doving Agdestein
#    OTHER AUTHORS   - MACROSCOPIC CHANGES
"""The R2 to measure the quality of a regression algorithm.

The :mod:`~gemseo.mlearning.qual_measure.r2_measure` module
implements the concept of R2 measures for machine learning algorithms.

This concept is implemented through the :class:`.R2Measure` class
and overloads the :meth:`!MLErrorMeasure._compute_measure` method.

The R2 is defined by

.. math::

    R_2(\\hat{y}) = 1 - \\frac{\\sum_i (\\hat{y}_i - y_i)^2}
                              {\\sum_i (y_i-\\bar{y})^2},

where
:math:`\\hat{y}` are the predictions,
:math:`y` are the data points and
:math:`\\bar{y}` is the mean of :math:`y`.
"""
from __future__ import division, unicode_literals

from copy import deepcopy
from typing import List, NoReturn, Optional, Union

from numpy import atleast_2d
from numpy import delete as npdelete
from numpy import mean, ndarray, repeat
from sklearn.metrics import mean_squared_error, r2_score

from gemseo.mlearning.qual_measure.error_measure import MLErrorMeasure
from gemseo.mlearning.regression.regression import MLRegressionAlgo


[docs]class R2Measure(MLErrorMeasure):
    """The R2 measure for machine learning."""

    SMALLER_IS_BETTER = False

    def __init__(
        self,
        algo,  # type: MLRegressionAlgo
    ):  # type: (...) -> None
        """
        Args:
            algo: A machine learning algorithm for regression.
        """
        super(R2Measure, self).__init__(algo)

    def _compute_measure(
        self,
        outputs,  # type: ndarray
        predictions,  # type: ndarray
        multioutput=True,  # type: bool
    ):  # type: (...) -> Union[float,ndarray]
        multioutput = "raw_values" if multioutput else "uniform_average"
        return r2_score(outputs, predictions, multioutput=multioutput)

[docs]    def evaluate_kfolds(
        self,
        n_folds=5,  # type: int
        samples=None,  # type: Optional[List[int]]
        multioutput=True,  # type: bool
        randomize=False,  # type:bool
    ):  # type: (...) -> Union[float,ndarray]
        folds, samples = self._compute_folds(samples, n_folds, randomize)

        in_grp = self.algo.learning_set.INPUT_GROUP
        out_grp = self.algo.learning_set.OUTPUT_GROUP
        inputs = self.algo.learning_set.get_data_by_group(in_grp)
        outputs = self.algo.learning_set.get_data_by_group(out_grp)

        multiout = "raw_values" if multioutput else "uniform_average"

        algo = deepcopy(self.algo)

        num = 0
        ymean = mean(outputs, axis=0)
        ymean = atleast_2d(ymean)
        ymean = repeat(ymean, outputs.shape[0], axis=0)
        den = mean_squared_error(outputs, ymean, multioutput=multiout) * len(ymean)
        for n_fold in range(n_folds):
            fold = folds[n_fold]
            train = npdelete(samples, fold)
            algo.learn(samples=train)
            expected = outputs[fold]
            predicted = algo.predict(inputs[fold])
            tmp = mean_squared_error(expected, predicted, multioutput=multiout)
            num += tmp * len(fold)

        quality = 1 - num / den

        return quality

[docs]    def evaluate_bootstrap(
        self,
        n_replicates=100,  # type: int
        samples=None,  # type: Optional[List[int]]
        multioutput=True,  # type: bool
    ):  # type: (...) -> NoReturn
        raise NotImplementedError