Source code for gemseo.mlearning.data_formatters.regression_data_formatters

# Copyright 2021 IRT Saint Exupéry, https://www.irt-saintexupery.com
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License version 3 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
"""Data formatters for regression algorithms."""

from __future__ import annotations

from collections.abc import Mapping
from functools import wraps
from typing import TYPE_CHECKING

from numpy import eye

if TYPE_CHECKING:
    from typing import Any
    from typing import Callable

    from gemseo.mlearning.core.ml_algo import DataType
    from gemseo.mlearning.regression.regression import BaseMLRegressionAlgo
    from gemseo.typing import RealArray

from gemseo.mlearning.data_formatters.supervised_data_formatters import (
    SupervisedDataFormatters,
)
from gemseo.utils.data_conversion import concatenate_dict_of_arrays_to_array
from gemseo.utils.data_conversion import split_array_to_dict_of_arrays


[docs] class RegressionDataFormatters(SupervisedDataFormatters): """Data formatters for regression algorithms."""
[docs] @classmethod def format_dict_jacobian( cls, func: Callable[[BaseMLRegressionAlgo, RealArray, Any, ...], RealArray], ) -> Callable[[BaseMLRegressionAlgo, DataType, Any, ...], DataType]: """Make an array-based function callable with a dictionary of NumPy arrays. Args: func: The function to be called; it takes a NumPy array in input and returns a NumPy array. Returns: The wrapped ``func`` function, callable with either a NumPy data array or a dictionary of numpy data arrays indexed by variables names. The return value will have the same type as the input data. """ @wraps(func) def wrapper( algo: BaseMLRegressionAlgo, input_data: DataType, *args: Any, **kwargs: Any ) -> DataType: """Evaluate ``func`` with either array or dictionary-based data. Firstly, the pre-processing stage converts the input data to a NumPy data array, if these data are expressed as a dictionary of NumPy data arrays. Then, the processing evaluates the function ``func`` from this NumPy input data array. Lastly, the post-processing transforms the output data to a dictionary of output NumPy data array if the input data were passed as a dictionary of NumPy data arrays. Args: algo: The regression algorithm. input_data: The input data. *args: The positional arguments of the function ``func``. **kwargs: The keyword arguments of the function ``func``. Returns: The output data with the same type as the input one. """ as_dict = isinstance(input_data, Mapping) if as_dict: input_data = concatenate_dict_of_arrays_to_array( input_data, algo.input_names ) single_sample = len(input_data.shape) == 1 jacobians = func(algo, input_data, *args, **kwargs) if as_dict: varsizes = algo.learning_set.variable_names_to_n_components if single_sample: jacobians = split_array_to_dict_of_arrays( jacobians, varsizes, algo.output_names, algo.input_names ) else: jacobians = split_array_to_dict_of_arrays( jacobians, varsizes, algo.output_names, algo.input_names ) return jacobians return wrapper
[docs] @classmethod def transform_jacobian( cls, func: Callable[[BaseMLRegressionAlgo, RealArray, Any, ...], RealArray], ) -> Callable[[BaseMLRegressionAlgo, RealArray, Any, ...], RealArray]: """Apply transformation to inputs and inverse transformation to outputs. Args: func: The function of interest to be called. Returns: A function evaluating the function ``func``, after transforming its input data and/or before transforming its output data. """ @wraps(func) def wrapper( algo: BaseMLRegressionAlgo, input_data: RealArray, *args: Any, **kwargs: Any ) -> RealArray: """Evaluate ``func`` after or before data transformation. Firstly, the pre-processing stage transforms the input data if required. Then, the processing evaluates the function ``func``. Lastly, the post-processing stage transforms the output data if required. Args: algo: The regression algorithm. input_data: The input data. *args: The positional arguments of the function. **kwargs: The keyword arguments of the function. Returns: Either the raw output data of ``func`` or a transformed version according to the requirements. Raises: NotImplementedError: When the transformer is applied to a variable rather than to a group of variables. """ if ( algo._input_variables_to_transform or algo._output_variables_to_transform ): # TODO: implement this case msg = ( "The Jacobian of regression models cannot be computed " "when the transformed quantities are variables; " "please transform the whole group 'inputs' or 'outputs' " "or do not use data transformation." ) raise NotImplementedError(msg) inputs = algo.learning_set.INPUT_GROUP if inputs in algo.transformer: jac = algo.transformer[inputs].compute_jacobian(input_data) input_data = algo.transformer[inputs].transform(input_data) else: jac = eye(input_data.shape[1]) jac = func(algo, input_data, *args, **kwargs) @ jac output_data = algo.predict_raw(input_data) outputs = algo.learning_set.OUTPUT_GROUP if outputs in algo.transformer: jac = ( algo.transformer[outputs].compute_jacobian_inverse(output_data) @ jac ) return jac return wrapper