The traineR
package seeks to unify the different ways of creating predictive models and their different predictive formats. It includes methods such as K-Nearest Neighbors, Decision Trees, ADA Boosting, Extreme Gradient Boosting, Random Forest, Neural Networks, Deep Learning, Support Vector Machines, Bayesian, Linear Discriminant Analysis, Quadratic Discriminant Analysis and Logical Regression.
The main idea of the package is that all predictions can be execute using a standard syntax, also that all predictive methods can be used in the same way by default, for example, that all packages are use classification in their default invocation and all methods use a formula to determine the predictor variables (independent variables) and the response variable.
For the following examples we will use the Puromycin
dataset:
conc | rate | state |
---|---|---|
0.02 | 76 | treated |
0.02 | 47 | treated |
0.06 | 97 | treated |
0.06 | 107 | treated |
0.11 | 123 | treated |
0.11 | 139 | treated |
0.22 | 159 | treated |
0.22 | 152 | treated |
0.56 | 191 | treated |
0.56 | 201 | treated |
<- seq_len(nrow(Puromycin))
n <- sample(n, length(n) * 0.7)
.sample <- Puromycin[.sample,]
data.train <- Puromycin[-.sample,] data.test
Modeling:
<- train.glm(state~., data.train)
model model
#>
#> Call: glm(formula = state ~ ., family = binomial, data = data.train)
#>
#> Coefficients:
#> (Intercept) conc rate
#> 2.79486 2.22258 -0.02771
#>
#> Degrees of Freedom: 15 Total (i.e. Null); 13 Residual
#> Null Deviance: 22.18
#> Residual Deviance: 19.8 AIC: 25.8
Prediction as probability:
Note: the result is always a matrix.
<- predict(model, data.test , type = "prob")
prediction prediction
#> treated untreated
#> [1,] 0.4402031 0.5597969
#> [2,] 0.5091897 0.4908103
#> [3,] 0.6926111 0.3073889
#> [4,] 0.7778385 0.2221615
#> [5,] 0.2723493 0.7276507
#> [6,] 0.4197604 0.5802396
#> [7,] 0.5379817 0.4620183
Prediction as classification:
Note: the result is always a factor.
<- predict(model, data.test , type = "class")
prediction prediction
#> [1] untreated treated treated treated untreated untreated treated
#> Levels: treated untreated
Confusion Matrix
<- confusion.matrix(data.test, prediction)
mc mc
#> prediction
#> real treated untreated
#> treated 3 1
#> untreated 1 2
Some Rates:
general.indexes(mc = mc)
#>
#> Confusion Matrix:
#> prediction
#> real treated untreated
#> treated 3 1
#> untreated 1 2
#>
#> Overall Accuracy: 0.7143
#> Overall Error: 0.2857
#>
#> Category Accuracy:
#>
#> treated untreated
#> 0.750000 0.666667
Modeling:
<- train.ada(state~., data.train, iter = 200)
model model
#> Call:
#> ada(state ~ ., data = data.train, iter = 200)
#>
#> Loss: exponential Method: discrete Iteration: 200
#>
#> Final Confusion Matrix for Data:
#> Final Prediction
#> True value treated untreated
#> treated 4 4
#> untreated 5 3
#>
#> Train Error: 0.562
#>
#> Out-Of-Bag Error: 0.5 iteration= 6
#>
#> Additional Estimates of number of iterations:
#>
#> train.err1 train.kap1
#> 1 1
Prediction as probability:
<- predict(model, data.test , type = "prob")
prediction prediction
#> treated untreated
#> [1,] 0.5 0.5
#> [2,] 0.5 0.5
#> [3,] 0.5 0.5
#> [4,] 0.5 0.5
#> [5,] 0.5 0.5
#> [6,] 0.5 0.5
#> [7,] 0.5 0.5
Prediction as classification:
<- predict(model, data.test , type = "class")
prediction prediction
#> [1] treated untreated untreated untreated untreated untreated treated
#> Levels: treated untreated
Confusion Matrix:
<- confusion.matrix(data.test, prediction)
mc mc
#> prediction
#> real treated untreated
#> treated 1 3
#> untreated 1 2
Some Rates:
general.indexes(mc = mc)
#>
#> Confusion Matrix:
#> prediction
#> real treated untreated
#> treated 1 3
#> untreated 1 2
#>
#> Overall Accuracy: 0.4286
#> Overall Error: 0.5714
#>
#> Category Accuracy:
#>
#> treated untreated
#> 0.250000 0.666667
For the following examples we will use the iris
dataset:
Sepal.Length | Sepal.Width | Petal.Length | Petal.Width | Species |
---|---|---|---|---|
5.1 | 3.5 | 1.4 | 0.2 | setosa |
4.9 | 3.0 | 1.4 | 0.2 | setosa |
4.7 | 3.2 | 1.3 | 0.2 | setosa |
4.6 | 3.1 | 1.5 | 0.2 | setosa |
5.0 | 3.6 | 1.4 | 0.2 | setosa |
5.4 | 3.9 | 1.7 | 0.4 | setosa |
4.6 | 3.4 | 1.4 | 0.3 | setosa |
5.0 | 3.4 | 1.5 | 0.2 | setosa |
4.4 | 2.9 | 1.4 | 0.2 | setosa |
4.9 | 3.1 | 1.5 | 0.1 | setosa |
data("iris")
<- seq_len(nrow(iris))
n <- sample(n, length(n) * 0.75)
.sample <- iris[.sample,]
data.train <- iris[-.sample,] data.test
Modeling:
<- train.rpart(Species~., data.train)
model model
#> n= 112
#>
#> node), split, n, loss, yval, (yprob)
#> * denotes terminal node
#>
#> 1) root 112 72 versicolor (0.29464286 0.35714286 0.34821429)
#> 2) Petal.Length< 2.6 33 0 setosa (1.00000000 0.00000000 0.00000000) *
#> 3) Petal.Length>=2.6 79 39 versicolor (0.00000000 0.50632911 0.49367089)
#> 6) Petal.Length< 4.85 40 2 versicolor (0.00000000 0.95000000 0.05000000) *
#> 7) Petal.Length>=4.85 39 2 virginica (0.00000000 0.05128205 0.94871795) *
Prediction as probability:
<- predict(model, data.test , type = "prob")
prediction prediction
#> setosa versicolor virginica
#> 3 1 0.00000000 0.0000000
#> 6 1 0.00000000 0.0000000
#> 8 1 0.00000000 0.0000000
#> 9 1 0.00000000 0.0000000
#> 11 1 0.00000000 0.0000000
#> 12 1 0.00000000 0.0000000
#> 16 1 0.00000000 0.0000000
#> 17 1 0.00000000 0.0000000
#> 18 1 0.00000000 0.0000000
#> 20 1 0.00000000 0.0000000
#> 21 1 0.00000000 0.0000000
#> 27 1 0.00000000 0.0000000
#> 31 1 0.00000000 0.0000000
#> 34 1 0.00000000 0.0000000
#> 48 1 0.00000000 0.0000000
#> 49 1 0.00000000 0.0000000
#> 50 1 0.00000000 0.0000000
#> 53 0 0.05128205 0.9487179
#> 74 0 0.95000000 0.0500000
#> 76 0 0.95000000 0.0500000
#> 83 0 0.95000000 0.0500000
#> 84 0 0.05128205 0.9487179
#> 89 0 0.95000000 0.0500000
#> 91 0 0.95000000 0.0500000
#> 92 0 0.95000000 0.0500000
#> 99 0 0.95000000 0.0500000
#> 100 0 0.95000000 0.0500000
#> 108 0 0.05128205 0.9487179
#> 110 0 0.05128205 0.9487179
#> 112 0 0.05128205 0.9487179
#> 114 0 0.05128205 0.9487179
#> 115 0 0.05128205 0.9487179
#> 122 0 0.05128205 0.9487179
#> 126 0 0.05128205 0.9487179
#> 127 0 0.95000000 0.0500000
#> 131 0 0.05128205 0.9487179
#> 133 0 0.05128205 0.9487179
#> 146 0 0.05128205 0.9487179
Prediction as classification:
<- predict(model, data.test , type = "class")
prediction prediction
#> [1] setosa setosa setosa setosa setosa setosa
#> [7] setosa setosa setosa setosa setosa setosa
#> [13] setosa setosa setosa setosa setosa virginica
#> [19] versicolor versicolor versicolor virginica versicolor versicolor
#> [25] versicolor versicolor versicolor virginica virginica virginica
#> [31] virginica virginica virginica virginica versicolor virginica
#> [37] virginica virginica
#> Levels: setosa versicolor virginica
Confusion Matrix:
<- confusion.matrix(data.test, prediction)
mc mc
#> prediction
#> real setosa versicolor virginica
#> setosa 17 0 0
#> versicolor 0 8 2
#> virginica 0 1 10
Some Rates:
general.indexes(mc = mc)
#>
#> Confusion Matrix:
#> prediction
#> real setosa versicolor virginica
#> setosa 17 0 0
#> versicolor 0 8 2
#> virginica 0 1 10
#>
#> Overall Accuracy: 0.9211
#> Overall Error: 0.0789
#>
#> Category Accuracy:
#>
#> setosa versicolor virginica
#> 1.000000 0.800000 0.909091
The model still supports the functions of the original package.
library(rpart.plot)
prp(model, extra = 104, branch.type = 2,
box.col = c("pink", "palegreen3", "cyan")[model$frame$yval])
Modeling:
<- train.bayes(Species~., data.train)
model model
#>
#> Naive Bayes Classifier for Discrete Predictors
#>
#> Call:
#> naiveBayes.default(x = X, y = Y, laplace = laplace)
#>
#> A-priori probabilities:
#> Y
#> setosa versicolor virginica
#> 0.2946429 0.3571429 0.3482143
#>
#> Conditional probabilities:
#> Sepal.Length
#> Y [,1] [,2]
#> setosa 4.960606 0.3472566
#> versicolor 5.935000 0.5201331
#> virginica 6.602564 0.6339048
#>
#> Sepal.Width
#> Y [,1] [,2]
#> setosa 3.363636 0.3586591
#> versicolor 2.757500 0.3373255
#> virginica 2.994872 0.3316218
#>
#> Petal.Length
#> Y [,1] [,2]
#> setosa 1.448485 0.1954501
#> versicolor 4.245000 0.4408325
#> virginica 5.569231 0.5587363
#>
#> Petal.Width
#> Y [,1] [,2]
#> setosa 0.2393939 0.1143990
#> versicolor 1.3275000 0.2087662
#> virginica 2.0179487 0.2827234
Prediction as probability:
<- predict(model, data.test , type = "prob")
prediction prediction
#> setosa versicolor virginica
#> [1,] 1.000000e+00 1.037271e-18 4.164344e-25
#> [2,] 1.000000e+00 4.976443e-14 4.931910e-20
#> [3,] 1.000000e+00 1.327965e-17 6.824485e-24
#> [4,] 1.000000e+00 9.552476e-18 1.801530e-24
#> [5,] 1.000000e+00 1.664774e-17 2.071879e-23
#> [6,] 1.000000e+00 3.566408e-17 1.528434e-23
#> [7,] 1.000000e+00 4.597282e-16 1.857046e-21
#> [8,] 1.000000e+00 1.020594e-16 1.564602e-22
#> [9,] 1.000000e+00 3.506289e-17 2.015997e-23
#> [10,] 1.000000e+00 2.749912e-17 2.472554e-23
#> [11,] 1.000000e+00 2.929050e-15 1.714519e-21
#> [12,] 1.000000e+00 1.953700e-14 5.993291e-21
#> [13,] 1.000000e+00 1.703106e-16 3.995771e-23
#> [14,] 1.000000e+00 3.658024e-19 1.400540e-24
#> [15,] 1.000000e+00 2.860773e-18 9.893992e-25
#> [16,] 1.000000e+00 9.709688e-18 1.098373e-23
#> [17,] 1.000000e+00 5.340503e-18 2.536019e-24
#> [18,] 8.164302e-100 4.062157e-01 5.937843e-01
#> [19,] 5.420440e-78 9.971313e-01 2.868727e-03
#> [20,] 1.823501e-76 9.841226e-01 1.587739e-02
#> [21,] 2.684709e-51 9.999568e-01 4.319162e-05
#> [22,] 2.716361e-108 5.704499e-01 4.295501e-01
#> [23,] 2.094370e-59 9.997114e-01 2.886117e-04
#> [24,] 4.451216e-66 9.998223e-01 1.776635e-04
#> [25,] 4.038503e-81 9.860836e-01 1.391637e-02
#> [26,] 1.228099e-24 9.999994e-01 5.858081e-07
#> [27,] 4.029146e-60 9.997843e-01 2.156942e-04
#> [28,] 2.302422e-183 5.493627e-07 9.999995e-01
#> [29,] 6.898987e-215 5.417678e-12 1.000000e+00
#> [30,] 5.391006e-134 3.532819e-03 9.964672e-01
#> [31,] 5.870843e-124 2.523277e-02 9.747672e-01
#> [32,] 8.347183e-154 5.382292e-06 9.999946e-01
#> [33,] 3.167287e-119 2.612004e-02 9.738800e-01
#> [34,] 1.960754e-166 4.427747e-06 9.999956e-01
#> [35,] 1.050467e-106 2.448969e-01 7.551031e-01
#> [36,] 4.554596e-179 5.921153e-07 9.999994e-01
#> [37,] 1.489642e-165 3.094398e-06 9.999969e-01
#> [38,] 1.564894e-155 2.123876e-06 9.999979e-01
Prediction as classification:
<- predict(model, data.test , type = "class")
prediction prediction
#> [1] setosa setosa setosa setosa setosa setosa
#> [7] setosa setosa setosa setosa setosa setosa
#> [13] setosa setosa setosa setosa setosa virginica
#> [19] versicolor versicolor versicolor versicolor versicolor versicolor
#> [25] versicolor versicolor versicolor virginica virginica virginica
#> [31] virginica virginica virginica virginica virginica virginica
#> [37] virginica virginica
#> Levels: setosa versicolor virginica
Confusion Matrix:
<- confusion.matrix(data.test, prediction)
mc mc
#> prediction
#> real setosa versicolor virginica
#> setosa 17 0 0
#> versicolor 0 9 1
#> virginica 0 0 11
Some Rates:
general.indexes(mc = mc)
#>
#> Confusion Matrix:
#> prediction
#> real setosa versicolor virginica
#> setosa 17 0 0
#> versicolor 0 9 1
#> virginica 0 0 11
#>
#> Overall Accuracy: 0.9737
#> Overall Error: 0.0263
#>
#> Category Accuracy:
#>
#> setosa versicolor virginica
#> 1.000000 0.900000 1.000000
Modeling:
<- train.lda(Species~., data.train)
model model
#> Call:
#> lda(Species ~ ., data = data.train)
#>
#> Prior probabilities of groups:
#> setosa versicolor virginica
#> 0.2946429 0.3571429 0.3482143
#>
#> Group means:
#> Sepal.Length Sepal.Width Petal.Length Petal.Width
#> setosa 4.960606 3.363636 1.448485 0.2393939
#> versicolor 5.935000 2.757500 4.245000 1.3275000
#> virginica 6.602564 2.994872 5.569231 2.0179487
#>
#> Coefficients of linear discriminants:
#> LD1 LD2
#> Sepal.Length 1.037875 -0.03581897
#> Sepal.Width 1.510931 2.31416896
#> Petal.Length -2.436464 -0.73575183
#> Petal.Width -2.336288 2.36218130
#>
#> Proportion of trace:
#> LD1 LD2
#> 0.9905 0.0095
Prediction as probability:
<- predict(model, data.test , type = "prob")
prediction prediction
#> setosa versicolor virginica
#> 3 1.000000e+00 4.739598e-19 2.569383e-37
#> 6 1.000000e+00 3.862882e-21 4.656143e-39
#> 8 1.000000e+00 1.110455e-19 5.350897e-38
#> 9 1.000000e+00 1.083528e-14 1.094014e-31
#> 11 1.000000e+00 1.904849e-23 7.641272e-43
#> 12 1.000000e+00 7.706275e-18 1.896661e-35
#> 16 1.000000e+00 6.564434e-28 1.013051e-47
#> 17 1.000000e+00 3.541858e-25 1.507026e-44
#> 18 1.000000e+00 5.810645e-21 1.938031e-39
#> 20 1.000000e+00 4.697332e-22 1.474857e-40
#> 21 1.000000e+00 2.514745e-19 1.315848e-37
#> 27 1.000000e+00 5.550436e-17 6.331925e-34
#> 31 1.000000e+00 9.741766e-16 5.875936e-33
#> 34 1.000000e+00 2.246415e-28 5.892962e-49
#> 48 1.000000e+00 1.262186e-17 2.348814e-35
#> 49 1.000000e+00 4.963892e-23 2.962846e-42
#> 50 1.000000e+00 5.453540e-20 1.535722e-38
#> 53 2.833577e-21 9.948969e-01 5.103106e-03
#> 74 3.778069e-22 9.982769e-01 1.723067e-03
#> 76 2.499367e-17 9.998912e-01 1.087988e-04
#> 83 5.068650e-16 9.999904e-01 9.552258e-06
#> 84 9.789468e-32 8.826578e-02 9.117342e-01
#> 89 1.291692e-17 9.998529e-01 1.471097e-04
#> 91 5.106349e-23 9.972350e-01 2.765024e-03
#> 92 1.983343e-21 9.957941e-01 4.205925e-03
#> 99 2.096295e-10 1.000000e+00 4.727057e-08
#> 100 1.336322e-18 9.998203e-01 1.797111e-04
#> 108 1.486870e-41 1.321463e-04 9.998679e-01
#> 110 4.887218e-43 1.105866e-06 9.999989e-01
#> 112 3.872454e-36 2.704717e-03 9.972953e-01
#> 114 3.763508e-39 3.540781e-04 9.996459e-01
#> 115 7.614355e-43 5.316390e-06 9.999947e-01
#> 122 7.082438e-36 1.344119e-03 9.986559e-01
#> 126 1.552903e-35 2.665842e-03 9.973342e-01
#> 127 2.119160e-28 2.569341e-01 7.430659e-01
#> 131 2.113696e-40 2.422832e-04 9.997577e-01
#> 133 1.782964e-43 9.058368e-06 9.999909e-01
#> 146 6.559679e-36 4.794374e-04 9.995206e-01
Prediction as classification:
<- predict(model, data.test , type = "class")
prediction prediction
#> [1] setosa setosa setosa setosa setosa setosa
#> [7] setosa setosa setosa setosa setosa setosa
#> [13] setosa setosa setosa setosa setosa versicolor
#> [19] versicolor versicolor versicolor virginica versicolor versicolor
#> [25] versicolor versicolor versicolor virginica virginica virginica
#> [31] virginica virginica virginica virginica virginica virginica
#> [37] virginica virginica
#> Levels: setosa versicolor virginica
Confusion Matrix:
<- confusion.matrix(data.test, prediction)
mc mc
#> prediction
#> real setosa versicolor virginica
#> setosa 17 0 0
#> versicolor 0 9 1
#> virginica 0 0 11
Some Rates:
general.indexes(mc = mc)
#>
#> Confusion Matrix:
#> prediction
#> real setosa versicolor virginica
#> setosa 17 0 0
#> versicolor 0 9 1
#> virginica 0 0 11
#>
#> Overall Accuracy: 0.9737
#> Overall Error: 0.0263
#>
#> Category Accuracy:
#>
#> setosa versicolor virginica
#> 1.000000 0.900000 1.000000
Modeling:
<- train.qda(Species~., data.train)
model model
#> Call:
#> qda(Species ~ ., data = data.train)
#>
#> Prior probabilities of groups:
#> setosa versicolor virginica
#> 0.2946429 0.3571429 0.3482143
#>
#> Group means:
#> Sepal.Length Sepal.Width Petal.Length Petal.Width
#> setosa 4.960606 3.363636 1.448485 0.2393939
#> versicolor 5.935000 2.757500 4.245000 1.3275000
#> virginica 6.602564 2.994872 5.569231 2.0179487
Prediction as probability:
<- predict(model, data.test , type = "prob")
prediction prediction
#> setosa versicolor virginica
#> 3 1.000000e+00 7.283826e-24 1.780589e-32
#> 6 1.000000e+00 9.541669e-29 1.647842e-36
#> 8 1.000000e+00 3.398729e-25 3.867038e-34
#> 9 1.000000e+00 5.286804e-18 1.509027e-26
#> 11 1.000000e+00 5.754499e-31 3.430820e-40
#> 12 1.000000e+00 4.448181e-23 3.734149e-31
#> 16 1.000000e+00 1.081634e-40 5.074032e-47
#> 17 1.000000e+00 4.040875e-35 1.144422e-41
#> 18 1.000000e+00 1.555337e-27 6.678148e-36
#> 20 1.000000e+00 7.520922e-30 2.479910e-37
#> 21 1.000000e+00 8.323528e-24 5.437390e-34
#> 27 1.000000e+00 3.842787e-22 3.854860e-30
#> 31 1.000000e+00 1.912271e-19 1.394634e-28
#> 34 1.000000e+00 2.987053e-40 1.044789e-47
#> 48 1.000000e+00 3.564506e-22 1.891543e-30
#> 49 1.000000e+00 1.595204e-30 2.120023e-39
#> 50 1.000000e+00 2.971919e-25 1.425256e-34
#> 53 2.231102e-80 9.979786e-01 2.021360e-03
#> 74 3.039329e-66 8.025436e-01 1.974564e-01
#> 76 2.941336e-61 9.999327e-01 6.733450e-05
#> 83 3.641037e-43 9.999694e-01 3.063005e-05
#> 84 3.341500e-89 3.888487e-02 9.611151e-01
#> 89 8.407552e-48 9.994276e-01 5.724317e-04
#> 91 1.532033e-56 8.846841e-01 1.153159e-01
#> 92 1.127414e-65 9.936651e-01 6.334876e-03
#> 99 6.052430e-20 9.999563e-01 4.371974e-05
#> 100 1.957728e-49 9.995182e-01 4.817904e-04
#> 108 5.756386e-152 1.555212e-07 9.999998e-01
#> 110 3.528563e-157 5.618974e-07 9.999994e-01
#> 112 2.720638e-106 1.959787e-03 9.980402e-01
#> 114 2.735318e-98 1.256290e-05 9.999874e-01
#> 115 3.356330e-117 1.080482e-11 1.000000e+00
#> 122 3.696258e-93 1.371080e-04 9.998629e-01
#> 126 9.673489e-133 3.546191e-04 9.996454e-01
#> 127 3.232341e-83 1.068858e-01 8.931142e-01
#> 131 4.268497e-146 2.470100e-05 9.999753e-01
#> 133 1.079507e-127 1.124143e-06 9.999989e-01
#> 146 9.279185e-116 1.239866e-08 1.000000e+00
Prediction as classification:
<- predict(model, data.test , type = "class")
prediction prediction
#> [1] setosa setosa setosa setosa setosa setosa
#> [7] setosa setosa setosa setosa setosa setosa
#> [13] setosa setosa setosa setosa setosa versicolor
#> [19] versicolor versicolor versicolor virginica versicolor versicolor
#> [25] versicolor versicolor versicolor virginica virginica virginica
#> [31] virginica virginica virginica virginica virginica virginica
#> [37] virginica virginica
#> Levels: setosa versicolor virginica
Confusion Matrix:
<- confusion.matrix(data.test, prediction)
mc mc
#> prediction
#> real setosa versicolor virginica
#> setosa 17 0 0
#> versicolor 0 9 1
#> virginica 0 0 11
Some Rates:
general.indexes(mc = mc)
#>
#> Confusion Matrix:
#> prediction
#> real setosa versicolor virginica
#> setosa 17 0 0
#> versicolor 0 9 1
#> virginica 0 0 11
#>
#> Overall Accuracy: 0.9737
#> Overall Error: 0.0263
#>
#> Category Accuracy:
#>
#> setosa versicolor virginica
#> 1.000000 0.900000 1.000000
Modeling:
<- train.randomForest(Species~., data.train)
model model
#>
#> Call:
#> randomForest(formula = Species ~ ., data = data.train, importance = TRUE)
#> Type of random forest: classification
#> Number of trees: 500
#> No. of variables tried at each split: 2
#>
#> OOB estimate of error rate: 5.36%
#> Confusion matrix:
#> setosa versicolor virginica class.error
#> setosa 33 0 0 0.00000000
#> versicolor 0 37 3 0.07500000
#> virginica 0 3 36 0.07692308
Prediction as probability:
<- predict(model, data.test , type = "prob")
prediction prediction
#> setosa versicolor virginica
#> 3 1.000 0.000 0.000
#> 6 0.998 0.002 0.000
#> 8 1.000 0.000 0.000
#> 9 0.994 0.004 0.002
#> 11 0.996 0.004 0.000
#> 12 1.000 0.000 0.000
#> 16 0.984 0.016 0.000
#> 17 0.998 0.002 0.000
#> 18 1.000 0.000 0.000
#> 20 1.000 0.000 0.000
#> 21 0.974 0.026 0.000
#> 27 1.000 0.000 0.000
#> 31 1.000 0.000 0.000
#> 34 0.988 0.012 0.000
#> 48 1.000 0.000 0.000
#> 49 1.000 0.000 0.000
#> 50 1.000 0.000 0.000
#> 53 0.000 0.802 0.198
#> 74 0.000 0.992 0.008
#> 76 0.000 0.998 0.002
#> 83 0.000 1.000 0.000
#> 84 0.000 0.128 0.872
#> 89 0.000 1.000 0.000
#> 91 0.000 0.994 0.006
#> 92 0.000 0.994 0.006
#> 99 0.006 0.974 0.020
#> 100 0.000 1.000 0.000
#> 108 0.000 0.000 1.000
#> 110 0.000 0.000 1.000
#> 112 0.000 0.000 1.000
#> 114 0.000 0.072 0.928
#> 115 0.000 0.010 0.990
#> 122 0.000 0.158 0.842
#> 126 0.000 0.032 0.968
#> 127 0.000 0.264 0.736
#> 131 0.000 0.000 1.000
#> 133 0.000 0.000 1.000
#> 146 0.000 0.014 0.986
Prediction as classification:
<- predict(model, data.test , type = "class")
prediction prediction
#> [1] setosa setosa setosa setosa setosa setosa
#> [7] setosa setosa setosa setosa setosa setosa
#> [13] setosa setosa setosa setosa setosa versicolor
#> [19] versicolor versicolor versicolor virginica versicolor versicolor
#> [25] versicolor versicolor versicolor virginica virginica virginica
#> [31] virginica virginica virginica virginica virginica virginica
#> [37] virginica virginica
#> Levels: setosa versicolor virginica
Confusion Matrix:
<- confusion.matrix(data.test, prediction)
mc mc
#> prediction
#> real setosa versicolor virginica
#> setosa 17 0 0
#> versicolor 0 9 1
#> virginica 0 0 11
Some Rates:
general.indexes(mc = mc)
#>
#> Confusion Matrix:
#> prediction
#> real setosa versicolor virginica
#> setosa 17 0 0
#> versicolor 0 9 1
#> virginica 0 0 11
#>
#> Overall Accuracy: 0.9737
#> Overall Error: 0.0263
#>
#> Category Accuracy:
#>
#> setosa versicolor virginica
#> 1.000000 0.900000 1.000000
The model still supports the functions of the original package.
library(randomForest)
varImpPlot(model)
Modeling:
<- train.knn(Species~., data.train)
model model
#>
#> Call:
#> kknn::train.kknn(formula = Species ~ ., data = data.train)
#>
#> Type of response variable: nominal
#> Minimal misclassification: 0.05357143
#> Best kernel: optimal
#> Best k: 8
Prediction as probability:
<- predict(model, data.test , type = "prob")
prediction prediction
#> setosa versicolor virginica
#> [1,] 1 0.00000000 0.0000000
#> [2,] 1 0.00000000 0.0000000
#> [3,] 1 0.00000000 0.0000000
#> [4,] 1 0.00000000 0.0000000
#> [5,] 1 0.00000000 0.0000000
#> [6,] 1 0.00000000 0.0000000
#> [7,] 1 0.00000000 0.0000000
#> [8,] 1 0.00000000 0.0000000
#> [9,] 1 0.00000000 0.0000000
#> [10,] 1 0.00000000 0.0000000
#> [11,] 1 0.00000000 0.0000000
#> [12,] 1 0.00000000 0.0000000
#> [13,] 1 0.00000000 0.0000000
#> [14,] 1 0.00000000 0.0000000
#> [15,] 1 0.00000000 0.0000000
#> [16,] 1 0.00000000 0.0000000
#> [17,] 1 0.00000000 0.0000000
#> [18,] 0 0.96293700 0.0370630
#> [19,] 0 0.96293700 0.0370630
#> [20,] 0 1.00000000 0.0000000
#> [21,] 0 1.00000000 0.0000000
#> [22,] 0 0.11321177 0.8867882
#> [23,] 0 1.00000000 0.0000000
#> [24,] 0 1.00000000 0.0000000
#> [25,] 0 0.89876334 0.1012367
#> [26,] 0 1.00000000 0.0000000
#> [27,] 0 1.00000000 0.0000000
#> [28,] 0 0.01197511 0.9880249
#> [29,] 0 0.00000000 1.0000000
#> [30,] 0 0.03706300 0.9629370
#> [31,] 0 0.09389501 0.9061050
#> [32,] 0 0.00000000 1.0000000
#> [33,] 0 0.11321177 0.8867882
#> [34,] 0 0.00000000 1.0000000
#> [35,] 0 0.10123666 0.8987633
#> [36,] 0 0.00000000 1.0000000
#> [37,] 0 0.00000000 1.0000000
#> [38,] 0 0.00000000 1.0000000
Prediction as classification:
<- predict(model, data.test , type = "class")
prediction prediction
#> [1] setosa setosa setosa setosa setosa setosa
#> [7] setosa setosa setosa setosa setosa setosa
#> [13] setosa setosa setosa setosa setosa versicolor
#> [19] versicolor versicolor versicolor virginica versicolor versicolor
#> [25] versicolor versicolor versicolor virginica virginica virginica
#> [31] virginica virginica virginica virginica virginica virginica
#> [37] virginica virginica
#> Levels: setosa versicolor virginica
Confusion Matrix:
<- confusion.matrix(data.test, prediction)
mc mc
#> prediction
#> real setosa versicolor virginica
#> setosa 17 0 0
#> versicolor 0 9 1
#> virginica 0 0 11
Some Rates:
general.indexes(mc = mc)
#>
#> Confusion Matrix:
#> prediction
#> real setosa versicolor virginica
#> setosa 17 0 0
#> versicolor 0 9 1
#> virginica 0 0 11
#>
#> Overall Accuracy: 0.9737
#> Overall Error: 0.0263
#>
#> Category Accuracy:
#>
#> setosa versicolor virginica
#> 1.000000 0.900000 1.000000
Modeling:
<- train.nnet(Species~., data.train, size = 20) model
#> # weights: 163
#> initial value 215.206309
#> iter 10 value 37.709699
#> iter 20 value 1.982372
#> iter 30 value 0.090012
#> final value 0.000047
#> converged
model
#> a 4-20-3 network with 163 weights
#> inputs: Sepal.Length Sepal.Width Petal.Length Petal.Width
#> output(s): Species
#> options were - softmax modelling
Prediction as probability:
<- predict(model, data.test , type = "prob")
prediction prediction
#> setosa versicolor virginica
#> 3 1.000000e+00 9.445818e-12 3.043049e-27
#> 6 1.000000e+00 8.733221e-12 3.944353e-27
#> 8 1.000000e+00 9.469569e-12 3.656317e-27
#> 9 1.000000e+00 1.163167e-11 3.840243e-27
#> 11 1.000000e+00 8.818073e-12 3.498297e-27
#> 12 1.000000e+00 9.628105e-12 3.948991e-27
#> 16 1.000000e+00 8.259265e-12 3.610398e-27
#> 17 1.000000e+00 8.232772e-12 3.078802e-27
#> 18 1.000000e+00 8.921348e-12 3.352714e-27
#> 20 1.000000e+00 8.600326e-12 3.593398e-27
#> 21 1.000000e+00 1.026156e-11 4.211786e-27
#> 27 1.000000e+00 9.555198e-12 4.005126e-27
#> 31 1.000000e+00 1.136378e-11 4.333767e-27
#> 34 1.000000e+00 8.194106e-12 3.254558e-27
#> 48 1.000000e+00 9.762806e-12 3.481879e-27
#> 49 1.000000e+00 8.798001e-12 3.514541e-27
#> 50 1.000000e+00 9.543288e-12 3.356273e-27
#> 53 1.416136e-13 1.000000e+00 9.689602e-24
#> 74 9.630305e-15 1.000000e+00 1.913711e-22
#> 76 1.431091e-11 1.000000e+00 7.671147e-27
#> 83 1.793395e-11 1.000000e+00 4.856907e-27
#> 84 1.177774e-16 9.066895e-16 1.000000e+00
#> 89 2.808740e-11 1.000000e+00 1.963732e-26
#> 91 6.507654e-15 1.000000e+00 4.363636e-22
#> 92 1.327521e-13 1.000000e+00 3.125519e-23
#> 99 1.066792e-06 9.999989e-01 3.041239e-25
#> 100 3.302866e-12 1.000000e+00 1.333266e-25
#> 108 3.884555e-17 1.394405e-15 1.000000e+00
#> 110 7.566610e-16 4.101344e-16 1.000000e+00
#> 112 9.912112e-17 9.549205e-16 1.000000e+00
#> 114 1.161894e-16 8.961849e-16 1.000000e+00
#> 115 3.822818e-16 5.471029e-16 1.000000e+00
#> 122 3.771786e-16 5.545864e-16 1.000000e+00
#> 126 1.231448e-16 8.720092e-16 1.000000e+00
#> 127 3.127655e-07 8.695622e-01 1.304375e-01
#> 131 3.864067e-17 1.397868e-15 1.000000e+00
#> 133 1.204361e-16 8.776195e-16 1.000000e+00
#> 146 3.916816e-16 5.509783e-16 1.000000e+00
Prediction as classification:
<- predict(model, data.test , type = "class")
prediction prediction
#> [1] setosa setosa setosa setosa setosa setosa
#> [7] setosa setosa setosa setosa setosa setosa
#> [13] setosa setosa setosa setosa setosa versicolor
#> [19] versicolor versicolor versicolor virginica versicolor versicolor
#> [25] versicolor versicolor versicolor virginica virginica virginica
#> [31] virginica virginica virginica virginica versicolor virginica
#> [37] virginica virginica
#> Levels: setosa versicolor virginica
Confusion Matrix:
<- confusion.matrix(data.test, prediction)
mc mc
#> prediction
#> real setosa versicolor virginica
#> setosa 17 0 0
#> versicolor 0 9 1
#> virginica 0 1 10
Some Rates:
general.indexes(mc = mc)
#>
#> Confusion Matrix:
#> prediction
#> real setosa versicolor virginica
#> setosa 17 0 0
#> versicolor 0 9 1
#> virginica 0 1 10
#>
#> Overall Accuracy: 0.9474
#> Overall Error: 0.0526
#>
#> Category Accuracy:
#>
#> setosa versicolor virginica
#> 1.000000 0.900000 0.909091
Modeling:
<- train.neuralnet(Species~., data.train, hidden = c(5, 7, 6),
model linear.output = FALSE, threshold = 0.01, stepmax = 1e+06)
summary(model)
#> Length Class Mode
#> call 7 -none- call
#> response 336 -none- logical
#> covariate 448 -none- numeric
#> model.list 2 -none- list
#> err.fct 1 -none- function
#> act.fct 1 -none- function
#> linear.output 1 -none- logical
#> data 5 data.frame list
#> exclude 0 -none- NULL
#> net.result 1 -none- list
#> weights 1 -none- list
#> generalized.weights 1 -none- list
#> startweights 1 -none- list
#> result.matrix 139 -none- numeric
#> prmdt 4 -none- list
Prediction as probability:
<- predict(model, data.test , type = "prob")
prediction prediction
#> setosa versicolor virginica
#> 3 1.000000e+00 1.285066e-03 1.044123e-08
#> 6 1.000000e+00 1.280897e-03 1.047568e-08
#> 8 1.000000e+00 1.283393e-03 1.044414e-08
#> 9 1.000000e+00 1.259949e-03 1.057419e-08
#> 11 1.000000e+00 1.305484e-03 1.032710e-08
#> 12 1.000000e+00 1.269736e-03 1.052630e-08
#> 16 1.000000e+00 1.327859e-03 1.025500e-08
#> 17 1.000000e+00 1.319906e-03 1.028125e-08
#> 18 1.000000e+00 1.292498e-03 1.040400e-08
#> 20 1.000000e+00 1.292348e-03 1.042200e-08
#> 21 1.000000e+00 1.273429e-03 1.047926e-08
#> 27 1.000000e+00 1.262960e-03 1.056551e-08
#> 31 1.000000e+00 1.260675e-03 1.056026e-08
#> 34 1.000000e+00 1.343136e-03 1.016778e-08
#> 48 1.000000e+00 1.273830e-03 1.050513e-08
#> 49 1.000000e+00 1.303432e-03 1.034201e-08
#> 50 1.000000e+00 1.287452e-03 1.041874e-08
#> 53 1.053184e-15 1.000000e+00 1.149350e-14
#> 74 2.867574e-16 1.000000e+00 8.623473e-14
#> 76 5.302027e-16 1.000000e+00 3.503389e-17
#> 83 3.343950e-16 1.000000e+00 1.504865e-17
#> 84 9.771561e-14 2.412753e-08 1.000000e+00
#> 89 1.233481e-15 1.000000e+00 1.764094e-16
#> 91 6.158734e-17 1.000000e+00 8.076216e-12
#> 92 9.890973e-16 1.000000e+00 9.534546e-14
#> 99 1.269321e-11 1.000000e+00 3.961338e-16
#> 100 2.617239e-16 1.000000e+00 2.072035e-16
#> 108 1.589220e-12 6.675033e-12 1.000000e+00
#> 110 1.066502e-12 3.522721e-12 1.000000e+00
#> 112 5.912521e-13 6.202702e-11 1.000000e+00
#> 114 5.524834e-13 1.984961e-11 1.000000e+00
#> 115 6.565169e-13 9.465667e-12 1.000000e+00
#> 122 1.657231e-13 3.141155e-10 1.000000e+00
#> 126 1.151267e-12 4.571738e-11 1.000000e+00
#> 127 3.648794e-15 2.741522e-03 9.984767e-01
#> 131 1.794269e-12 1.417045e-11 1.000000e+00
#> 133 1.001981e-12 3.924622e-12 1.000000e+00
#> 146 4.791125e-13 7.277817e-11 1.000000e+00
Prediction as classification:
<- predict(model, data.test , type = "class")
prediction prediction
#> [1] setosa setosa setosa setosa setosa setosa
#> [7] setosa setosa setosa setosa setosa setosa
#> [13] setosa setosa setosa setosa setosa versicolor
#> [19] versicolor versicolor versicolor virginica versicolor versicolor
#> [25] versicolor versicolor versicolor virginica virginica virginica
#> [31] virginica virginica virginica virginica virginica virginica
#> [37] virginica virginica
#> Levels: setosa versicolor virginica
Confusion Matrix:
<- confusion.matrix(data.test, prediction)
mc mc
#> prediction
#> real setosa versicolor virginica
#> setosa 17 0 0
#> versicolor 0 9 1
#> virginica 0 0 11
Some Rates:
general.indexes(mc = mc)
#>
#> Confusion Matrix:
#> prediction
#> real setosa versicolor virginica
#> setosa 17 0 0
#> versicolor 0 9 1
#> virginica 0 0 11
#>
#> Overall Accuracy: 0.9737
#> Overall Error: 0.0263
#>
#> Category Accuracy:
#>
#> setosa versicolor virginica
#> 1.000000 0.900000 1.000000
Modeling:
<- train.svm(Species~., data.train)
model model
#>
#> Call:
#> svm(formula = Species ~ ., data = data.train, probability = TRUE)
#>
#>
#> Parameters:
#> SVM-Type: C-classification
#> SVM-Kernel: radial
#> cost: 1
#>
#> Number of Support Vectors: 47
Prediction as probability:
<- predict(model, data.test , type = "prob")
prediction prediction
#> setosa versicolor virginica
#> 3 0.964592066 0.020095251 0.015312683
#> 6 0.956900077 0.025918059 0.017181864
#> 8 0.970170861 0.016654699 0.013174440
#> 9 0.945849964 0.034958295 0.019191741
#> 11 0.969049746 0.018322508 0.012627746
#> 12 0.966375575 0.018852671 0.014771754
#> 16 0.843698151 0.077904894 0.078396955
#> 17 0.959405448 0.023353019 0.017241533
#> 18 0.970884316 0.016663763 0.012451921
#> 20 0.965069285 0.019659062 0.015271654
#> 21 0.962770438 0.021768064 0.015461499
#> 27 0.965357749 0.020753195 0.013889056
#> 31 0.960712206 0.024142089 0.015145705
#> 34 0.926014849 0.038738392 0.035246759
#> 48 0.961070169 0.022388915 0.016540916
#> 49 0.970029788 0.017588700 0.012381512
#> 50 0.969264755 0.017111370 0.013623876
#> 53 0.017096835 0.842362459 0.140540706
#> 74 0.011384256 0.971311980 0.017303764
#> 76 0.013650108 0.970553435 0.015796457
#> 83 0.012225898 0.983049595 0.004724507
#> 84 0.008263044 0.246755599 0.744981357
#> 89 0.019588941 0.973146267 0.007264792
#> 91 0.009288681 0.961932636 0.028778683
#> 92 0.010776093 0.961758871 0.027465036
#> 99 0.040261189 0.942976450 0.016762360
#> 100 0.011433152 0.978806250 0.009760598
#> 108 0.009985126 0.018913501 0.971101372
#> 110 0.012039636 0.009990783 0.977969581
#> 112 0.006779663 0.017390748 0.975829589
#> 114 0.006068054 0.018393432 0.975538514
#> 115 0.008593846 0.001989262 0.989416893
#> 122 0.007354556 0.031761835 0.960883609
#> 126 0.009238988 0.034775140 0.955985873
#> 127 0.008504042 0.177187327 0.814308631
#> 131 0.010813199 0.022919403 0.966267399
#> 133 0.006573046 0.001310111 0.992116843
#> 146 0.007796006 0.004851255 0.987352740
Prediction as classification:
<- predict(model, data.test , type = "class")
prediction prediction
#> [1] setosa setosa setosa setosa setosa setosa
#> [7] setosa setosa setosa setosa setosa setosa
#> [13] setosa setosa setosa setosa setosa versicolor
#> [19] versicolor versicolor versicolor virginica versicolor versicolor
#> [25] versicolor versicolor versicolor virginica virginica virginica
#> [31] virginica virginica virginica virginica virginica virginica
#> [37] virginica virginica
#> Levels: setosa versicolor virginica
Confusion Matrix:
<- confusion.matrix(data.test, prediction)
mc mc
#> prediction
#> real setosa versicolor virginica
#> setosa 17 0 0
#> versicolor 0 9 1
#> virginica 0 0 11
Some Rates:
general.indexes(mc = mc)
#>
#> Confusion Matrix:
#> prediction
#> real setosa versicolor virginica
#> setosa 17 0 0
#> versicolor 0 9 1
#> virginica 0 0 11
#>
#> Overall Accuracy: 0.9737
#> Overall Error: 0.0263
#>
#> Category Accuracy:
#>
#> setosa versicolor virginica
#> 1.000000 0.900000 1.000000
Modeling:
<- train.xgboost(Species~., data.train, nrounds = 79, maximize = FALSE, verbose = 0)
model model
#> ##### xgb.Booster
#> raw: 141 Kb
#> call:
#> xgb.train(params = params, data = train_aux, nrounds = nrounds,
#> watchlist = watchlist, obj = obj, feval = feval, verbose = verbose,
#> print_every_n = print_every_n, early_stopping_rounds = early_stopping_rounds,
#> maximize = maximize, save_period = save_period, save_name = save_name,
#> xgb_model = xgb_model, callbacks = callbacks, eval_metric = "mlogloss")
#> params (as set within xgb.train):
#> booster = "gbtree", objective = "multi:softprob", eta = "0.3", gamma = "0", max_depth = "6", min_child_weight = "1", subsample = "1", colsample_bytree = "1", num_class = "3", eval_metric = "mlogloss", validate_parameters = "TRUE"
#> xgb.attributes:
#> niter
#> callbacks:
#> cb.evaluation.log()
#> # of features: 4
#> niter: 79
#> nfeatures : 4
#> evaluation_log:
#> iter train_mlogloss
#> 1 0.745818
#> 2 0.532089
#> ---
#> 78 0.016986
#> 79 0.016926
Prediction as probability:
<- predict(model, data.test , type = "prob")
prediction prediction
#> setosa versicolor virginica
#> [1,] 0.9937896132 0.004962827 0.001247580
#> [2,] 0.9893139005 0.009444213 0.001241961
#> [3,] 0.9937896132 0.004962827 0.001247580
#> [4,] 0.9917478561 0.006653214 0.001598984
#> [5,] 0.9893139005 0.009444213 0.001241961
#> [6,] 0.9937896132 0.004962827 0.001247580
#> [7,] 0.9784784317 0.020293176 0.001228359
#> [8,] 0.9893139005 0.009444213 0.001241961
#> [9,] 0.9937896132 0.004962827 0.001247580
#> [10,] 0.9937896132 0.004962827 0.001247580
#> [11,] 0.9893139005 0.009444213 0.001241961
#> [12,] 0.9937896132 0.004962827 0.001247580
#> [13,] 0.9937896132 0.004962827 0.001247580
#> [14,] 0.9832919240 0.015473676 0.001234402
#> [15,] 0.9937896132 0.004962827 0.001247580
#> [16,] 0.9893139005 0.009444213 0.001241961
#> [17,] 0.9937896132 0.004962827 0.001247580
#> [18,] 0.0015188885 0.984066725 0.014414346
#> [19,] 0.0020552438 0.990204930 0.007739806
#> [20,] 0.0007248118 0.998103499 0.001171651
#> [21,] 0.0031411399 0.993551850 0.003306998
#> [22,] 0.0030600282 0.006188738 0.990751207
#> [23,] 0.0031792743 0.995616198 0.001204490
#> [24,] 0.0044371835 0.990891397 0.004671475
#> [25,] 0.0023348408 0.993504763 0.004160363
#> [26,] 0.0157054812 0.968777895 0.015516602
#> [27,] 0.0028050132 0.994599223 0.002595817
#> [28,] 0.0008807033 0.004374124 0.994745135
#> [29,] 0.0009060728 0.004415777 0.994678140
#> [30,] 0.0003370693 0.001674097 0.997988820
#> [31,] 0.0042399820 0.015624903 0.980135143
#> [32,] 0.0034619896 0.004091620 0.992446363
#> [33,] 0.0085503524 0.125932753 0.865516901
#> [34,] 0.0009060728 0.004415777 0.994678140
#> [35,] 0.0029157929 0.176868439 0.820215821
#> [36,] 0.0004626763 0.002297942 0.997239351
#> [37,] 0.0003370693 0.001674097 0.997988820
#> [38,] 0.0009689934 0.004722423 0.994308650
Prediction as classification:
<- predict(model, data.test , type = "class")
prediction prediction
#> [1] setosa setosa setosa setosa setosa setosa
#> [7] setosa setosa setosa setosa setosa setosa
#> [13] setosa setosa setosa setosa setosa versicolor
#> [19] versicolor versicolor versicolor virginica versicolor versicolor
#> [25] versicolor versicolor versicolor virginica virginica virginica
#> [31] virginica virginica virginica virginica virginica virginica
#> [37] virginica virginica
#> Levels: setosa versicolor virginica
Confusion Matrix:
<- confusion.matrix(data.test, prediction)
mc mc
#> prediction
#> real setosa versicolor virginica
#> setosa 17 0 0
#> versicolor 0 9 1
#> virginica 0 0 11
Some Rates:
general.indexes(mc = mc)
#>
#> Confusion Matrix:
#> prediction
#> real setosa versicolor virginica
#> setosa 17 0 0
#> versicolor 0 9 1
#> virginica 0 0 11
#>
#> Overall Accuracy: 0.9737
#> Overall Error: 0.0263
#>
#> Category Accuracy:
#>
#> setosa versicolor virginica
#> 1.000000 0.900000 1.000000