Source code for gemseo.problems.dataset.iris
# Copyright 2021 IRT Saint Exupéry, https://www.irt-saintexupery.com
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License version 3 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
# Contributors:
# INITIAL AUTHORS - initial API and implementation and/or initial
# documentation
# :author: Matthias De Lozzo
# OTHER AUTHORS - MACROSCOPIC CHANGES
"""
Iris dataset
============
This is one of the best known :class:`.Dataset`
to be found in the machine learning literature.
It was introduced by the statistician Ronald Fisher
in his 1936 paper "The use of multiple measurements in taxonomic problems",
Annals of Eugenics. 7 (2): 179–188.
It contains 150 instances of iris plants:
- 50 Iris Setosa,
- 50 Iris Versicolour,
- 50 Iris Virginica.
Each instance is characterized by:
- its sepal length in cm,
- its sepal width in cm,
- its petal length in cm,
- its petal width in cm.
This :class:`.Dataset` can be used for either clustering purposes
or classification ones.
`More information about the Iris dataset
<https://en.wikipedia.org/wiki/Iris_flower_data_set>`_
"""
from __future__ import annotations
from pathlib import Path
from gemseo.core.dataset import Dataset
[docs]class IrisDataset(Dataset):
"""Iris dataset parametrization."""
def __init__(self, name="Iris", by_group=True, as_io=False):
"""Constructor."""
super().__init__(name, by_group)
file_path = Path(__file__).parent / "iris.data"
variables = [
"sepal_length",
"sepal_width",
"petal_length",
"petal_width",
"specy",
]
sizes = {
"sepal_length": 1,
"sepal_width": 1,
"petal_length": 1,
"petal_width": 1,
"specy": 1,
}
if as_io:
groups = {
"sepal_length": "inputs",
"sepal_width": "inputs",
"petal_length": "inputs",
"petal_width": "inputs",
"specy": "outputs",
}
else:
groups = {"specy": "labels"}
self.set_from_file(file_path, variables, sizes, groups, ",", False)