Source code for gemseo.mlearning.qual_measure.error_measure
# -*- coding: utf-8 -*-
# Copyright 2021 IRT Saint Exupéry, https://www.irt-saintexupery.com
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License version 3 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
# Contributors:
# INITIAL AUTHORS - initial API and implementation and/or initial
# documentation
# :author: Syver Doving Agdestein
# OTHER AUTHORS - MACROSCOPIC CHANGES
"""
Error measure
=============
The :mod:`~gemseo.mlearning.qual_measure.error_measure` module implements
the concept of error measures for machine learning algorithms.
This concept is implemented through the :class:`.MLErrorMeasure` class
and implements the different evaluation methods.
The error measure class is adapted for supervised machine learning algorithms,
as it measures the error of a predicted value to some reference value.
"""
from __future__ import absolute_import, division, unicode_literals
import numpy.random as npr
from future import standard_library
from numpy import arange, array_split
from numpy import delete as npdelete
from numpy import vstack
from gemseo.mlearning.qual_measure.quality_measure import MLQualityMeasure
standard_library.install_aliases()
[docs]class MLErrorMeasure(MLQualityMeasure):
""" Error measure for machine learning. """
[docs] def evaluate_learn(self, multioutput=True):
"""Evaluate quality measure using the learning dataset.
:param bool multioutput: if True, return the error measure for each
output component. Otherwise, average these errors. Default: True.
:return: quality measure value.
"""
if not self.algo.is_trained:
self.algo.learn()
in_grp = self.algo.learning_set.INPUT_GROUP
out_grp = self.algo.learning_set.OUTPUT_GROUP
inputs = self.algo.learning_set.get_data_by_group(in_grp)
outputs = self.algo.learning_set.get_data_by_group(out_grp)
predictions = self.algo.predict(inputs)
measure = self._compute_measure(outputs, predictions, multioutput)
return measure
[docs] def evaluate_test(self, test_data, multioutput=True):
"""Evaluate quality measure using a test dataset.
:param Dataset test_data: test data.
:param bool multioutput: if True, return the error measure for each
output component. Otherwise, average these errors. Default: True.
:return: quality measure value.
"""
if not self.algo.is_trained:
self.algo.learn()
in_grp = test_data.INPUT_GROUP
out_grp = test_data.OUTPUT_GROUP
inputs = test_data.get_data_by_group(in_grp)
outputs = test_data.get_data_by_group(out_grp)
predictions = self.algo.predict(inputs)
measure = self._compute_measure(outputs, predictions, multioutput)
return measure
[docs] def evaluate_kfolds(self, n_folds=5, multioutput=True):
"""Evaluate quality measure using the k-folds technique.
:param int n_folds: number of folds. Default: 5.
:param bool multioutput: if True, return the error measure for each
output component. Otherwise, average these errors. Default: True.
:return: quality measure value.
"""
n_samples = self.algo.learning_set.n_samples
inds = arange(n_samples)
folds = array_split(inds, n_folds)
in_grp = self.algo.learning_set.INPUT_GROUP
out_grp = self.algo.learning_set.OUTPUT_GROUP
inputs = self.algo.learning_set.get_data_by_group(in_grp)
outputs = self.algo.learning_set.get_data_by_group(out_grp)
expected = []
predicted = []
for n_fold in range(n_folds):
fold = folds[n_fold]
train = npdelete(inds, fold)
self.algo.learn(samples=train)
predicted.append(self.algo.predict(inputs[fold]))
expected.append(outputs[fold])
expected = vstack(expected)
predicted = vstack(predicted)
return self._compute_measure(expected, predicted, multioutput)
[docs] def evaluate_bootstrap(self, n_replicates=100, multioutput=True):
"""Evaluate quality measure using the bootstrap technique.
:param int n_replicates: number of bootstrap replicates. Default: 100.
:param bool multioutput: if True, return the error measure for each
output component. Otherwise, average these errors. Default: True.
:return: quality measure value.
"""
n_samples = self.algo.learning_set.n_samples
inds = arange(n_samples)
in_grp = self.algo.learning_set.INPUT_GROUP
out_grp = self.algo.learning_set.OUTPUT_GROUP
inputs = self.algo.learning_set.get_data_by_group(in_grp)
outputs = self.algo.learning_set.get_data_by_group(out_grp)
expected = []
predicted = []
for _ in range(n_replicates):
train = npr.choice(n_samples, n_samples)
test = npdelete(inds, train)
self.algo.learn(samples=train)
predicted.append(self.algo.predict(inputs[test]))
expected.append(outputs[test])
expected = vstack(expected)
predicted = vstack(predicted)
return self._compute_measure(expected, predicted, multioutput)
def _compute_measure(self, outputs, predictions, multioutput=True):
"""Compute error measure.
:param ndarray outputs: reference outputs.
:param ndarray predictions: predicted outputs.
:param bool multioutput: if True, return the error measure for each
output component. Otherwise, average these errors. Default: True.
:return: measure value.
"""
raise NotImplementedError