1 #ifndef GRAIL_DATASET_H
2 #define GRAIL_DATASET_H
4 #include "../DT/DTConsiderationType.hh"
5 #include "../DT/DecisionNode.h"
6 #include "DatasetSample.hh"
10 #include <initializer_list>
17 class ISimulatedGameAction;
41 Dataset(std::initializer_list<DTConsiderationType> considerationTypes);
47 Dataset(std::vector<DTConsiderationType> considerationTypes);
50 void AddSample(std::unique_ptr<ISimulatedGameAction> decision, std::initializer_list<float> data);
53 void AddSample(std::unique_ptr<ISimulatedGameAction> decision, std::vector<float> data);
56 void AddSample(std::unique_ptr<DatasetSample> sample);
68 std::vector<std::unique_ptr<DatasetSample>>
Samples;
92 bool CheckDataCompatibility(
const Dataset& other)
const;
94 double CalculateDecisionEntropy()
const;
96 std::unique_ptr<Dataset> SplitByFilter(std::function<
bool(std::vector<float>&)> filter)
const;
97 std::vector<std::unique_ptr<Dataset>> SplitContinuous(
size_t column,
float splitValue)
const;
98 std::vector<std::pair<std::unique_ptr<Dataset>,
float>> SplitNominal(
int column)
const;
100 std::vector<DTConsiderationType> considerationTypes;
This class encapsulates the C4.5 Algorithm used to generate a decision tree (see Grail....
Definition: C45Algorithm.h:22
Definition: Dataset.hh:20
size_t DecisionVariablesCount() const
Gets the number of measures (also know as considerations / decisions / columns in dataset).
Definition: Dataset.cpp:23
void AddSample(std::unique_ptr< ISimulatedGameAction > decision, std::initializer_list< float > data)
Constructs and insert new sample to the dataset. The sample is constructed using the @decision and va...
Definition: Dataset.cpp:36
Dataset(std::initializer_list< DTConsiderationType > considerationTypes)
Creates a new dataset.
Definition: Dataset.cpp:11
bool CopyFromOther(Dataset &sourceDataset)
Adds samples from another dataset to the dataset this function was called on. It performs a basic che...
Definition: Dataset.cpp:91
const DTConsiderationType GetConsiderationType(int columnIndex) const
Returns the type of the i-th consideration; i = columnIndex.
Definition: Dataset.cpp:32
std::vector< std::unique_ptr< DatasetSample > > Samples
Data stored in Dataset.
Definition: Dataset.hh:68
bool MoveFromOther(Dataset &sourceDataset)
Moves samples from another dataset to the dataset this function was called on. It performs a basic ch...
Definition: Dataset.cpp:81
const std::vector< DTConsiderationType > & GetConsiderationTypes() const
Gets types of the respective consideration, in order of appearance. The types are either NUMERIC or N...
Definition: Dataset.cpp:28
double ValidateBinary(DecisionNode< ISimulatedGameAction > &decisionTreeNodeRoot)
Tests a decision tree (represented by the root node) against a dataset. Returns the accuracy of decis...
Definition: Dataset.cpp:122
Class for internal usage. Decision tree node base type.
Definition: DecisionNode.h:21