Title: | Out of Bag Learning Curve |
---|---|
Description: | Provides functions to calculate the out-of-bag learning curve for random forests for any measure that is available in the 'mlr' package. Supported random forest packages are 'randomForest' and 'ranger' and trained models of these packages with the train function of 'mlr'. The main function is OOBCurve() that calculates the out-of-bag curve depending on the number of trees. With the OOBCurvePars() function out-of-bag curves can also be calculated for 'mtry', 'sample.fraction' and 'min.node.size' for the 'ranger' package. |
Authors: | Philipp Probst [aut, cre] |
Maintainer: | Philipp Probst <[email protected]> |
License: | GPL-3 |
Version: | 0.3 |
Built: | 2025-02-18 05:58:13 UTC |
Source: | https://github.com/philipppro/oobcurve |
With the help of this function the out of bag learning curve for random forests can be created for any measure that is available in the mlr package.
OOBCurve(mod, measures = list(auc), task, data)
OOBCurve(mod, measures = list(auc), task, data)
mod |
An object of class |
measures |
List of performance measure(s) of mlr to evaluate. Default is auc only. See the mlr tutorial for a list of available measures for the corresponding task. |
task |
Learning task created by the function |
data |
Original data that was used for training the random forest. |
Returns a dataframe with a column for each desired measure.
OOBCurvePars
for out-of-bag curves of other parameters.
library(mlr) library(ranger) # Classification data = getTaskData(sonar.task) sonar.task = makeClassifTask(data = data, target = "Class") lrn = makeLearner("classif.ranger", keep.inbag = TRUE, par.vals = list(num.trees = 100)) mod = train(lrn, sonar.task) # Alternatively use ranger directly # mod = ranger(Class ~., data = data, num.trees = 100, keep.inbag = TRUE) # Alternatively use randomForest # mod = randomForest(Class ~., data = data, ntree = 100, keep.inbag = TRUE) # Application of the main function results = OOBCurve(mod, measures = list(mmce, auc, brier), task = sonar.task, data = data) # Plot the generated results plot(results$mmce, type = "l", ylab = "oob-mmce", xlab = "ntrees") plot(results$auc, type = "l", ylab = "oob-auc", xlab = "ntrees") plot(results$brier, type = "l", ylab = "oob-brier-score", xlab = "ntrees") # Regression data = getTaskData(bh.task) bh.task = makeRegrTask(data = data, target = "medv") lrn = makeLearner("regr.ranger", keep.inbag = TRUE, par.vals = list(num.trees = 100)) mod = train(lrn, bh.task) # Application of the main function results = OOBCurve(mod, measures = list(mse, mae, rsq), task = bh.task, data = data) # Plot the generated results plot(results$mse, type = "l", ylab = "oob-mse", xlab = "ntrees") plot(results$mae, type = "l", ylab = "oob-mae", xlab = "ntrees") plot(results$rsq, type = "l", ylab = "oob-mae", xlab = "ntrees")
library(mlr) library(ranger) # Classification data = getTaskData(sonar.task) sonar.task = makeClassifTask(data = data, target = "Class") lrn = makeLearner("classif.ranger", keep.inbag = TRUE, par.vals = list(num.trees = 100)) mod = train(lrn, sonar.task) # Alternatively use ranger directly # mod = ranger(Class ~., data = data, num.trees = 100, keep.inbag = TRUE) # Alternatively use randomForest # mod = randomForest(Class ~., data = data, ntree = 100, keep.inbag = TRUE) # Application of the main function results = OOBCurve(mod, measures = list(mmce, auc, brier), task = sonar.task, data = data) # Plot the generated results plot(results$mmce, type = "l", ylab = "oob-mmce", xlab = "ntrees") plot(results$auc, type = "l", ylab = "oob-auc", xlab = "ntrees") plot(results$brier, type = "l", ylab = "oob-brier-score", xlab = "ntrees") # Regression data = getTaskData(bh.task) bh.task = makeRegrTask(data = data, target = "medv") lrn = makeLearner("regr.ranger", keep.inbag = TRUE, par.vals = list(num.trees = 100)) mod = train(lrn, bh.task) # Application of the main function results = OOBCurve(mod, measures = list(mse, mae, rsq), task = bh.task, data = data) # Plot the generated results plot(results$mse, type = "l", ylab = "oob-mse", xlab = "ntrees") plot(results$mae, type = "l", ylab = "oob-mae", xlab = "ntrees") plot(results$rsq, type = "l", ylab = "oob-mae", xlab = "ntrees")
With the help of this function the out of bag curves for parameters like mtry, sample.fraction and min.node.size of random forests can be created for any measure that is available in the mlr package.
OOBCurvePars(lrn, task, pars = c("mtry"), nr.grid = 10, par.vals = NULL, measures = list(auc))
OOBCurvePars(lrn, task, pars = c("mtry"), nr.grid = 10, par.vals = NULL, measures = list(auc))
lrn |
The learner created with |
task |
Learning task created by the function |
pars |
One of the hyperparameter "mtry", "sample.fraction" or "min.node.size". |
nr.grid |
Number of points on hyperparameter space that should be evaluated (distributed equally) |
par.vals |
Optional vector of hyperparameter points that should be evaluated. If set, nr.grid is not used anymore. Default is NULL. |
measures |
List of performance measure(s) of mlr to evaluate. Default is mmce for classification and mse for regression. See the mlr tutorial for a list of available measures for the corresponding task. |
Returns a list with parameter values and a list of performances.
OOBCurve
for out-of-bag curves dependent on the number of trees.
## Not run: library(mlr) task = sonar.task lrn = makeLearner("classif.ranger", predict.type = "prob", num.trees = 1000) results = OOBCurvePars(lrn, task, measures = list(auc)) plot(results$par.vals, results$performances$auc, type = "l", xlab = "mtry", ylab = "auc") lrn = makeLearner("classif.ranger", predict.type = "prob", num.trees = 1000, replace = FALSE) results = OOBCurvePars(lrn, task, pars = "sample.fraction", measures = list(mmce)) plot(results$par.vals, results$performances$mmce, type = "l", xlab = "sample.fract.", ylab = "mmce") results = OOBCurvePars(lrn, task, pars = "min.node.size", measures = list(mmce)) plot(results$par.vals, results$performances$mmce, type = "l", xlab = "min.node.size", ylab = "mmce") ## End(Not run)
## Not run: library(mlr) task = sonar.task lrn = makeLearner("classif.ranger", predict.type = "prob", num.trees = 1000) results = OOBCurvePars(lrn, task, measures = list(auc)) plot(results$par.vals, results$performances$auc, type = "l", xlab = "mtry", ylab = "auc") lrn = makeLearner("classif.ranger", predict.type = "prob", num.trees = 1000, replace = FALSE) results = OOBCurvePars(lrn, task, pars = "sample.fraction", measures = list(mmce)) plot(results$par.vals, results$performances$mmce, type = "l", xlab = "sample.fract.", ylab = "mmce") results = OOBCurvePars(lrn, task, pars = "min.node.size", measures = list(mmce)) plot(results$par.vals, results$performances$mmce, type = "l", xlab = "min.node.size", ylab = "mmce") ## End(Not run)