Source code for gemseo.mlearning.data_formatters.regression_data_formatters
# Copyright 2021 IRT Saint Exupéry, https://www.irt-saintexupery.com
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License version 3 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""Data formatters for regression algorithms."""
from __future__ import annotations
from collections.abc import Mapping
from functools import wraps
from typing import TYPE_CHECKING
from numpy import eye
if TYPE_CHECKING:
from typing import Any
from typing import Callable
from gemseo.mlearning.core.algos.ml_algo import DataType
from gemseo.mlearning.regression.algos.base_regressor import BaseRegressor
from gemseo.typing import RealArray
from gemseo.mlearning.data_formatters.supervised_data_formatters import (
SupervisedDataFormatters,
)
from gemseo.utils.data_conversion import concatenate_dict_of_arrays_to_array
from gemseo.utils.data_conversion import split_array_to_dict_of_arrays
[docs]
class RegressionDataFormatters(SupervisedDataFormatters):
"""Data formatters for regression algorithms."""
[docs]
@classmethod
def format_dict_jacobian(
cls,
func: Callable[[BaseRegressor, RealArray, Any, ...], RealArray],
) -> Callable[[BaseRegressor, DataType, Any, ...], DataType]:
"""Make an array-based function callable with a dictionary of NumPy arrays.
Args:
func: The function to be called;
it takes a NumPy array in input and returns a NumPy array.
Returns:
The wrapped ``func`` function, callable with
either a NumPy data array
or a dictionary of numpy data arrays indexed by variables names.
The return value will have the same type as the input data.
"""
@wraps(func)
def wrapper(
algo: BaseRegressor, input_data: DataType, *args: Any, **kwargs: Any
) -> DataType:
"""Evaluate ``func`` with either array or dictionary-based data.
Firstly,
the pre-processing stage converts the input data to a NumPy data array,
if these data are expressed as a dictionary of NumPy data arrays.
Then,
the processing evaluates the function ``func``
from this NumPy input data array.
Lastly,
the post-processing transforms the output data
to a dictionary of output NumPy data array
if the input data were passed as a dictionary of NumPy data arrays.
Args:
algo: The regression algorithm.
input_data: The input data.
*args: The positional arguments of the function ``func``.
**kwargs: The keyword arguments of the function ``func``.
Returns:
The output data with the same type as the input one.
"""
as_dict = isinstance(input_data, Mapping)
if as_dict:
input_data = concatenate_dict_of_arrays_to_array(
input_data, algo.input_names
)
single_sample = len(input_data.shape) == 1
jacobians = func(algo, input_data, *args, **kwargs)
if as_dict:
varsizes = algo.learning_set.variable_names_to_n_components
if single_sample:
jacobians = split_array_to_dict_of_arrays(
jacobians, varsizes, algo.output_names, algo.input_names
)
else:
jacobians = split_array_to_dict_of_arrays(
jacobians, varsizes, algo.output_names, algo.input_names
)
return jacobians
return wrapper
[docs]
@classmethod
def transform_jacobian(
cls,
func: Callable[[BaseRegressor, RealArray, Any, ...], RealArray],
) -> Callable[[BaseRegressor, RealArray, Any, ...], RealArray]:
"""Apply transformation to inputs and inverse transformation to outputs.
Args:
func: The function of interest to be called.
Returns:
A function evaluating the function ``func``,
after transforming its input data
and/or before transforming its output data.
"""
@wraps(func)
def wrapper(
algo: BaseRegressor, input_data: RealArray, *args: Any, **kwargs: Any
) -> RealArray:
"""Evaluate ``func`` after or before data transformation.
Firstly,
the pre-processing stage transforms the input data if required.
Then,
the processing evaluates the function ``func``.
Lastly,
the post-processing stage transforms the output data if required.
Args:
algo: The regression algorithm.
input_data: The input data.
*args: The positional arguments of the function.
**kwargs: The keyword arguments of the function.
Returns:
Either the raw output data of ``func``
or a transformed version according to the requirements.
Raises:
NotImplementedError: When the transformer is applied to a variable
rather than to a group of variables.
"""
if (
algo._input_variables_to_transform
or algo._output_variables_to_transform
):
# TODO: implement this case
msg = (
"The Jacobian of regression models cannot be computed "
"when the transformed quantities are variables; "
"please transform the whole group 'inputs' or 'outputs' "
"or do not use data transformation."
)
raise NotImplementedError(msg)
inputs = algo.learning_set.INPUT_GROUP
if inputs in algo.transformer:
jac = algo.transformer[inputs].compute_jacobian(input_data)
input_data = algo.transformer[inputs].transform(input_data)
else:
jac = eye(input_data.shape[1])
jac = func(algo, input_data, *args, **kwargs) @ jac
output_data = algo.predict_raw(input_data)
outputs = algo.learning_set.OUTPUT_GROUP
if outputs in algo.transformer:
jac = (
algo.transformer[outputs].compute_jacobian_inverse(output_data)
@ jac
)
return jac
return wrapper