# Copyright 2021 IRT Saint Exupéry, https://www.irt-saintexupery.com
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License version 3 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
# Contributors:
# INITIAL AUTHORS - initial API and implementation and/or initial
# documentation
# :author: Matthias De Lozzo
# :author: Syver Doving Agdestein
# OTHER AUTHORS - MACROSCOPIC CHANGES
"""Machine learning API.
The machine learning API provides methods for creating new and loading existing machine
learning models. It also provides methods for listing available models and options.
"""
from __future__ import annotations
import logging
from pathlib import Path
from typing import Mapping
from gemseo.api import _get_schema
from gemseo.core.dataset import Dataset
from gemseo.mlearning.classification.classification import MLClassificationAlgo
from gemseo.mlearning.cluster.cluster import MLClusteringAlgo
from gemseo.mlearning.core.ml_algo import MLAlgo
from gemseo.mlearning.core.ml_algo import TransformerType
from gemseo.mlearning.core.supervised import MLSupervisedAlgo
from gemseo.mlearning.regression.regression import MLRegressionAlgo
from gemseo.mlearning.transform.scaler.min_max_scaler import MinMaxScaler
LOGGER = logging.getLogger(__name__)
# pylint: disable=import-outside-toplevel
[docs]def get_mlearning_models() -> list[str]:
"""Get available machine learning algorithms.
Returns:
The available machine learning algorithms.
See also
--------
import_mlearning_model
create_mlearning_model
get_mlearning_options
import_mlearning_model
"""
from gemseo.mlearning.core.factory import MLAlgoFactory
factory = MLAlgoFactory()
return factory.models
[docs]def get_regression_models() -> list[str]:
"""Get available regression models.
Returns:
The available regression models.
See also
--------
create_regression_model
get_regression_options
import_regression_model
"""
from gemseo.mlearning.regression.factory import RegressionModelFactory
factory = RegressionModelFactory()
return factory.models
[docs]def get_classification_models() -> list[str]:
"""Get available classification models.
Returns:
The available classification models.
See also
--------
create_classification_model
get_classification_options
import_classification_model
"""
from gemseo.mlearning.classification.factory import ClassificationModelFactory
factory = ClassificationModelFactory()
return factory.models
[docs]def get_clustering_models() -> list[str]:
"""Get available clustering models.
Returns:
The available clustering models.
See also
--------
create_clustering_model
get_clustering_options
import_clustering_model
"""
from gemseo.mlearning.cluster.factory import ClusteringModelFactory
factory = ClusteringModelFactory()
return factory.models
[docs]def create_mlearning_model(
name: str,
data: Dataset,
transformer: Mapping[str, TransformerType] | None = None,
**parameters,
) -> MLAlgo:
"""Create a machine learning algorithm from a learning dataset.
Args:
name: The name of the machine learning algorithm.
data: The learning dataset.
transformer: The strategies to transform the variables.
Values are instances of :class:`.Transformer`
while keys are names of either variables or groups of variables.
If None, do not transform the variables.
parameters: The parameters of the machine learning algorithm.
Returns:
A machine learning model.
See also
--------
get_mlearning_models
get_mlearning_options
import_mlearning_model
"""
from gemseo.mlearning.core.factory import MLAlgoFactory
factory = MLAlgoFactory()
return factory.create(name, data=data, transformer=transformer, **parameters)
minmax_inputs = {Dataset.INPUT_GROUP: MinMaxScaler()}
[docs]def create_regression_model(
name: str,
data: Dataset,
transformer: Mapping[str, TransformerType]
| None = MLRegressionAlgo.DEFAULT_TRANSFORMER, # noqa: B950
**parameters,
) -> MLRegressionAlgo:
"""Create a regression model from a learning dataset.
Args:
name: The name of the regression algorithm.
data: The learning dataset.
transformer: The strategies to transform the variables.
Values are instances of :class:`.Transformer`
while keys are names of either variables or groups of variables.
If None, do not transform the variables.
parameters: The parameters of the regression model.
Returns:
A regression model.
See also
--------
get_regression_models
get_regression_options
import_regression_model
"""
from gemseo.mlearning.regression.factory import RegressionModelFactory
factory = RegressionModelFactory()
if (
name == "PCERegressor"
and isinstance(transformer, dict)
and Dataset.INPUT_GROUP in transformer
):
LOGGER.warning(
"Remove input data transformation because "
"PCERegressor does not support transformers."
)
del transformer[Dataset.INPUT_GROUP]
return factory.create(name, data=data, transformer=transformer, **parameters)
[docs]def create_classification_model(
name: str,
data: Dataset,
transformer: Mapping[str, TransformerType]
| None = MLSupervisedAlgo.DEFAULT_TRANSFORMER, # noqa: B950
**parameters,
) -> MLClassificationAlgo:
"""Create a classification model from a learning dataset.
Args:
name: The name of the classification algorithm.
data: The learning dataset.
transformer: The strategies to transform the variables.
Values are instances of :class:`.Transformer`
while keys are names of either variables or groups of variables.
If None, do not transform the variables.
parameters: The parameters of the classification model.
Returns:
A classification model.
See also
--------
get_classification_models
get_classification_options
import_classification_model
"""
from gemseo.mlearning.classification.factory import ClassificationModelFactory
factory = ClassificationModelFactory()
return factory.create(name, data=data, transformer=transformer, **parameters)
[docs]def create_clustering_model(
name: str,
data: Dataset,
transformer: Mapping[str, TransformerType] | None = None,
**parameters,
) -> MLClusteringAlgo:
"""Create a clustering model from a learning dataset.
Args:
name: The name of the clustering algorithm.
data: The learning dataset.
transformer: The strategies to transform the variables.
Values are instances of :class:`.Transformer`
while keys are names of either variables or groups of variables.
If None, do not transform the variables.
parameters: The parameters of the clustering model.
Returns:
A clustering model.
See also
--------
get_clustering_models
get_clustering_options
import_clustering_model
"""
from gemseo.mlearning.cluster.factory import ClusteringModelFactory
factory = ClusteringModelFactory()
return factory.create(name, data=data, transformer=transformer, **parameters)
[docs]def import_mlearning_model(
directory: str | Path,
) -> MLAlgo:
"""Import a machine learning algorithm from a directory.
Args:
directory: The path to the directory.
Returns:
A machine learning model.
See also
--------
create_mlearning_model
get_mlearning_models
get_mlearning_options
"""
from gemseo.mlearning.core.factory import MLAlgoFactory
factory = MLAlgoFactory()
return factory.load(directory)
[docs]def import_regression_model(
directory: str | Path,
) -> MLRegressionAlgo:
"""Import a regression model from a directory.
Args:
directory: The path of the directory.
Returns:
A regression model.
See also
--------
create_regression_model
get_regression_models
get_regression_options
"""
from gemseo.mlearning.regression.factory import RegressionModelFactory
factory = RegressionModelFactory()
return factory.load(directory)
[docs]def import_classification_model(
directory: str | Path,
) -> MLClassificationAlgo:
"""Import a classification model from a directory.
Args:
directory: The path to the directory.
Returns:
A classification model.
See also
--------
create_classification_model
get_classification_models
get_classification_options
"""
from gemseo.mlearning.classification.factory import ClassificationModelFactory
factory = ClassificationModelFactory()
return factory.load(directory)
[docs]def import_clustering_model(
directory: str | Path,
) -> MLClusteringAlgo:
"""Import a clustering model from a directory.
Args:
directory: The path to the directory.
Returns:
A clustering model.
See also
--------
create_clustering_model
get_clustering_models
get_clustering_options
"""
from gemseo.mlearning.cluster.factory import ClusteringModelFactory
factory = ClusteringModelFactory()
return factory.load(directory)
[docs]def get_mlearning_options(
model_name: str,
output_json: bool = False,
pretty_print: bool = True,
) -> dict[str, str] | str:
"""Find the available options for a machine learning algorithm.
Args:
model_name: The name of the machine learning algorithm.
output_json: Whether to apply JSON format for the schema.
pretty_print: Whether to print the schema in a pretty table.
Returns:
The options schema of the machine learning algorithm.
See also
--------
create_mlearning_model
get_mlearning_models
import_mlearning_model
"""
from gemseo.mlearning.core.factory import MLAlgoFactory
factory = MLAlgoFactory().factory
grammar = factory.get_options_grammar(model_name)
return _get_schema(grammar, output_json, pretty_print)
[docs]def get_regression_options(
model_name: str,
output_json: bool = False,
pretty_print: bool = True,
) -> dict[str, str] | str:
"""Find the available options for a regression model.
Args:
model_name: The name of the regression model.
output_json: Whether to apply JSON format for the schema.
pretty_print: Print the schema in a pretty table.
Returns:
The options schema of the regression model.
See also
--------
create_regression_model
get_regression_models
import_regression_model
"""
from gemseo.mlearning.regression.factory import RegressionModelFactory
factory = RegressionModelFactory().factory
grammar = factory.get_options_grammar(model_name)
return _get_schema(grammar, output_json, pretty_print)
[docs]def get_classification_options(
model_name: str,
output_json: bool = False,
pretty_print: bool = True,
) -> dict[str, str] | str:
"""Find the available options for a classification model.
Args:
model_name: The name of the classification model.
output_json: Whether to apply JSON format for the schema.
pretty_print: Print the schema in a pretty table.
Returns:
The options schema of the classification model.
See also
--------
create_classification_model
get_classification_models
import_classification_model
"""
from gemseo.mlearning.classification.factory import ClassificationModelFactory
factory = ClassificationModelFactory().factory
grammar = factory.get_options_grammar(model_name)
return _get_schema(grammar, output_json, pretty_print)
[docs]def get_clustering_options(
model_name: str,
output_json: bool = False,
pretty_print: bool = True,
) -> dict[str, str] | str:
"""Find the available options for clustering model.
Args:
model_name: The name of the clustering model.
output_json: Whether to apply JSON format for the schema.
pretty_print: Print the schema in a pretty table.
Returns:
The options schema of the clustering model.
See also
--------
create_clustering_model
get_clustering_models
import_clustering_model
"""
from gemseo.mlearning.cluster.factory import ClusteringModelFactory
factory = ClusteringModelFactory().factory
grammar = factory.get_options_grammar(model_name)
return _get_schema(grammar, output_json, pretty_print)