
# K-means

Load Iris dataset and create clusters.


## Import



In [None]:
from __future__ import annotations

from gemseo import configure_logger
from gemseo import create_benchmark_dataset
from gemseo.datasets.dataset import Dataset
from gemseo.mlearning import create_clustering_model
from gemseo.post.dataset.scatter_plot_matrix import ScatterMatrix
from numpy import array

configure_logger()

## Create dataset
We import the Iris benchmark dataset through the API.



In [None]:
iris = create_benchmark_dataset("IrisDataset")

# Extract inputs as a new dataset
data = iris.get_view(group_names=iris.PARAMETER_GROUP).to_numpy()
variables = iris.get_variable_names(iris.PARAMETER_GROUP)
variables

dataset = Dataset.from_array(data, variables)

## Create clustering model
We know that there are three classes of Iris plants.
We will thus try to identify three clusters.



In [None]:
model = create_clustering_model("KMeans", data=dataset, n_clusters=3)
model.learn()
model

## Predict output
Once it is built, we can use it for prediction.



In [None]:
input_value = {
    "sepal_length": array([4.5]),
    "sepal_width": array([3.0]),
    "petal_length": array([1.0]),
    "petal_width": array([0.2]),
}
output_value = model.predict(input_value)
output_value

## Plot clusters
Show cluster labels



In [None]:
dataset.add_variable("km_specy", model.labels.reshape((-1, 1)), "labels")
ScatterMatrix(dataset, kde=True, classifier="km_specy").execute(save=False, show=True)