Source code for gemseo.mlearning.api

# Copyright 2021 IRT Saint Exupéry, https://www.irt-saintexupery.com
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License version 3 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
# Contributors:
#    INITIAL AUTHORS - initial API and implementation and/or initial
#                           documentation
#        :author: Matthias De Lozzo
#        :author: Syver Doving Agdestein
#    OTHER AUTHORS   - MACROSCOPIC CHANGES
"""Machine learning API.

The machine learning API provides methods for creating new and loading existing machine
learning models. It also provides methods for listing available models and options.
"""
from __future__ import annotations

import logging
from pathlib import Path
from typing import Mapping

from gemseo.api import _get_schema
from gemseo.core.dataset import Dataset
from gemseo.mlearning.classification.classification import MLClassificationAlgo
from gemseo.mlearning.cluster.cluster import MLClusteringAlgo
from gemseo.mlearning.core.ml_algo import MLAlgo
from gemseo.mlearning.core.ml_algo import TransformerType
from gemseo.mlearning.core.supervised import MLSupervisedAlgo
from gemseo.mlearning.regression.regression import MLRegressionAlgo
from gemseo.mlearning.transform.scaler.min_max_scaler import MinMaxScaler

LOGGER = logging.getLogger(__name__)

# pylint: disable=import-outside-toplevel


[docs]def get_mlearning_models() -> list[str]: """Get available machine learning algorithms. Returns: The available machine learning algorithms. See also -------- import_mlearning_model create_mlearning_model get_mlearning_options import_mlearning_model """ from gemseo.mlearning.core.factory import MLAlgoFactory factory = MLAlgoFactory() return factory.models
[docs]def get_regression_models() -> list[str]: """Get available regression models. Returns: The available regression models. See also -------- create_regression_model get_regression_options import_regression_model """ from gemseo.mlearning.regression.factory import RegressionModelFactory factory = RegressionModelFactory() return factory.models
[docs]def get_classification_models() -> list[str]: """Get available classification models. Returns: The available classification models. See also -------- create_classification_model get_classification_options import_classification_model """ from gemseo.mlearning.classification.factory import ClassificationModelFactory factory = ClassificationModelFactory() return factory.models
[docs]def get_clustering_models() -> list[str]: """Get available clustering models. Returns: The available clustering models. See also -------- create_clustering_model get_clustering_options import_clustering_model """ from gemseo.mlearning.cluster.factory import ClusteringModelFactory factory = ClusteringModelFactory() return factory.models
[docs]def create_mlearning_model( name: str, data: Dataset, transformer: Mapping[str, TransformerType] | None = None, **parameters, ) -> MLAlgo: """Create a machine learning algorithm from a learning dataset. Args: name: The name of the machine learning algorithm. data: The learning dataset. transformer: The strategies to transform the variables. Values are instances of :class:`.Transformer` while keys are names of either variables or groups of variables. If None, do not transform the variables. parameters: The parameters of the machine learning algorithm. Returns: A machine learning model. See also -------- get_mlearning_models get_mlearning_options import_mlearning_model """ from gemseo.mlearning.core.factory import MLAlgoFactory factory = MLAlgoFactory() return factory.create(name, data=data, transformer=transformer, **parameters)
minmax_inputs = {Dataset.INPUT_GROUP: MinMaxScaler()}
[docs]def create_regression_model( name: str, data: Dataset, transformer: Mapping[str, TransformerType] | None = MLRegressionAlgo.DEFAULT_TRANSFORMER, # noqa: B950 **parameters, ) -> MLRegressionAlgo: """Create a regression model from a learning dataset. Args: name: The name of the regression algorithm. data: The learning dataset. transformer: The strategies to transform the variables. Values are instances of :class:`.Transformer` while keys are names of either variables or groups of variables. If None, do not transform the variables. parameters: The parameters of the regression model. Returns: A regression model. See also -------- get_regression_models get_regression_options import_regression_model """ from gemseo.mlearning.regression.factory import RegressionModelFactory factory = RegressionModelFactory() if ( name == "PCERegressor" and isinstance(transformer, dict) and Dataset.INPUT_GROUP in transformer ): LOGGER.warning( "Remove input data transformation because " "PCERegressor does not support transformers." ) del transformer[Dataset.INPUT_GROUP] return factory.create(name, data=data, transformer=transformer, **parameters)
[docs]def create_classification_model( name: str, data: Dataset, transformer: Mapping[str, TransformerType] | None = MLSupervisedAlgo.DEFAULT_TRANSFORMER, # noqa: B950 **parameters, ) -> MLClassificationAlgo: """Create a classification model from a learning dataset. Args: name: The name of the classification algorithm. data: The learning dataset. transformer: The strategies to transform the variables. Values are instances of :class:`.Transformer` while keys are names of either variables or groups of variables. If None, do not transform the variables. parameters: The parameters of the classification model. Returns: A classification model. See also -------- get_classification_models get_classification_options import_classification_model """ from gemseo.mlearning.classification.factory import ClassificationModelFactory factory = ClassificationModelFactory() return factory.create(name, data=data, transformer=transformer, **parameters)
[docs]def create_clustering_model( name: str, data: Dataset, transformer: Mapping[str, TransformerType] | None = None, **parameters, ) -> MLClusteringAlgo: """Create a clustering model from a learning dataset. Args: name: The name of the clustering algorithm. data: The learning dataset. transformer: The strategies to transform the variables. Values are instances of :class:`.Transformer` while keys are names of either variables or groups of variables. If None, do not transform the variables. parameters: The parameters of the clustering model. Returns: A clustering model. See also -------- get_clustering_models get_clustering_options import_clustering_model """ from gemseo.mlearning.cluster.factory import ClusteringModelFactory factory = ClusteringModelFactory() return factory.create(name, data=data, transformer=transformer, **parameters)
[docs]def import_mlearning_model( directory: str | Path, ) -> MLAlgo: """Import a machine learning algorithm from a directory. Args: directory: The path to the directory. Returns: A machine learning model. See also -------- create_mlearning_model get_mlearning_models get_mlearning_options """ from gemseo.mlearning.core.factory import MLAlgoFactory factory = MLAlgoFactory() return factory.load(directory)
[docs]def import_regression_model( directory: str | Path, ) -> MLRegressionAlgo: """Import a regression model from a directory. Args: directory: The path of the directory. Returns: A regression model. See also -------- create_regression_model get_regression_models get_regression_options """ from gemseo.mlearning.regression.factory import RegressionModelFactory factory = RegressionModelFactory() return factory.load(directory)
[docs]def import_classification_model( directory: str | Path, ) -> MLClassificationAlgo: """Import a classification model from a directory. Args: directory: The path to the directory. Returns: A classification model. See also -------- create_classification_model get_classification_models get_classification_options """ from gemseo.mlearning.classification.factory import ClassificationModelFactory factory = ClassificationModelFactory() return factory.load(directory)
[docs]def import_clustering_model( directory: str | Path, ) -> MLClusteringAlgo: """Import a clustering model from a directory. Args: directory: The path to the directory. Returns: A clustering model. See also -------- create_clustering_model get_clustering_models get_clustering_options """ from gemseo.mlearning.cluster.factory import ClusteringModelFactory factory = ClusteringModelFactory() return factory.load(directory)
[docs]def get_mlearning_options( model_name: str, output_json: bool = False, pretty_print: bool = True, ) -> dict[str, str] | str: """Find the available options for a machine learning algorithm. Args: model_name: The name of the machine learning algorithm. output_json: Whether to apply JSON format for the schema. pretty_print: Whether to print the schema in a pretty table. Returns: The options schema of the machine learning algorithm. See also -------- create_mlearning_model get_mlearning_models import_mlearning_model """ from gemseo.mlearning.core.factory import MLAlgoFactory factory = MLAlgoFactory().factory grammar = factory.get_options_grammar(model_name) return _get_schema(grammar, output_json, pretty_print)
[docs]def get_regression_options( model_name: str, output_json: bool = False, pretty_print: bool = True, ) -> dict[str, str] | str: """Find the available options for a regression model. Args: model_name: The name of the regression model. output_json: Whether to apply JSON format for the schema. pretty_print: Print the schema in a pretty table. Returns: The options schema of the regression model. See also -------- create_regression_model get_regression_models import_regression_model """ from gemseo.mlearning.regression.factory import RegressionModelFactory factory = RegressionModelFactory().factory grammar = factory.get_options_grammar(model_name) return _get_schema(grammar, output_json, pretty_print)
[docs]def get_classification_options( model_name: str, output_json: bool = False, pretty_print: bool = True, ) -> dict[str, str] | str: """Find the available options for a classification model. Args: model_name: The name of the classification model. output_json: Whether to apply JSON format for the schema. pretty_print: Print the schema in a pretty table. Returns: The options schema of the classification model. See also -------- create_classification_model get_classification_models import_classification_model """ from gemseo.mlearning.classification.factory import ClassificationModelFactory factory = ClassificationModelFactory().factory grammar = factory.get_options_grammar(model_name) return _get_schema(grammar, output_json, pretty_print)
[docs]def get_clustering_options( model_name: str, output_json: bool = False, pretty_print: bool = True, ) -> dict[str, str] | str: """Find the available options for clustering model. Args: model_name: The name of the clustering model. output_json: Whether to apply JSON format for the schema. pretty_print: Print the schema in a pretty table. Returns: The options schema of the clustering model. See also -------- create_clustering_model get_clustering_models import_clustering_model """ from gemseo.mlearning.cluster.factory import ClusteringModelFactory factory = ClusteringModelFactory().factory grammar = factory.get_options_grammar(model_name) return _get_schema(grammar, output_json, pretty_print)