# devtools::install_github("MrDomani/autofeat")
library(autofeat)
library(mlr)
#> Loading required package: ParamHelpers
#> 'mlr' is in maintenance mode since July 2019. Future development
#> efforts will go into its successor 'mlr3' (<https://mlr3.mlr-org.com>).
data("titanic_imputed", package = "DALEX")
i <- sample(1:nrow(titanic_imputed), size = round(0.7 * nrow(titanic_imputed)))
X_train <- data.matrix(titanic_imputed[i,c("age", "fare", "sibsp", "parch")])
y_train <- factor(titanic_imputed$survived[i])
X_test <- data.matrix(titanic_imputed[-i,c("age", "fare", "sibsp", "parch")])
y_test <- factor(titanic_imputed$survived[-i])
X_SAFEd <- SAFE(X_train, y_train, X_test, y_test, n_iter = 5)
#>
#> 1th iteration out of 5 complete
#> 2th iteration out of 5 complete
#> 3th iteration out of 5 complete
#> 4th iteration out of 5 complete
#> 5th iteration out of 5 complete
X_train_SAFEd <- X_SAFEd$X_train
X_test_SAFEd <- X_SAFEd$X_valid
task1 <- makeClassifTask("original_train", data.frame(X_train, target = y_train), "target")
task2 <- makeClassifTask("SAFE_train", data.frame(X_train_SAFEd, target = y_train), "target")
task1_test <- makeClassifTask("original_test", data.frame(X_test, target = y_test), "target")
task2_test <- makeClassifTask("SAFE_test", data.frame(X_test_SAFEd, target = y_test), "target")
lrn <- makeLearner("classif.ranger", predict.type = "prob")
crossval(lrn, task1, measures = list(auc, ppv))$aggr
#> Resampling: cross-validation
#> Measures: auc ppv
#> [Resample] iter 1: 0.6858032 0.7313433
#> [Resample] iter 2: 0.7536364 0.7894737
#> [Resample] iter 3: 0.6273585 0.7037037
#> [Resample] iter 4: 0.6268222 0.7642276
#> [Resample] iter 5: 0.6339423 0.7111111
#> [Resample] iter 6: 0.6531120 0.7600000
#> [Resample] iter 7: 0.7471429 0.7680000
#> [Resample] iter 8: 0.6293333 0.7218045
#> [Resample] iter 9: 0.6657185 0.7841727
#> [Resample] iter 10: 0.6715335 0.7111111
#>
#> Aggregated Result: auc.test.mean=0.6694403,ppv.test.mean=0.7444948
#>
#> auc.test.mean ppv.test.mean
#> 0.6694403 0.7444948
crossval(lrn, task2, measures = list(auc, ppv))$aggr
#> Resampling: cross-validation
#> Measures: auc ppv
#> [Resample] iter 1: 0.6813131 0.7899160
#> [Resample] iter 2: 0.7561275 0.7661290
#> [Resample] iter 3: 0.6760244 0.7500000
#> [Resample] iter 4: 0.6295782 0.6984127
#> [Resample] iter 5: 0.6667585 0.6818182
#> [Resample] iter 6: 0.6019656 0.7500000
#> [Resample] iter 7: 0.6210672 0.7521368
#> [Resample] iter 8: 0.7386783 0.7600000
#> [Resample] iter 9: 0.6503889 0.6953125
#> [Resample] iter 10: 0.5878899 0.7230769
#>
#> Aggregated Result: auc.test.mean=0.6609792,ppv.test.mean=0.7366802
#>
#> auc.test.mean ppv.test.mean
#> 0.6609792 0.7366802
ranger1 <- train(lrn, task1)
ranger2 <- train(lrn, task2)
performance(predict(ranger1, task1), auc)
#> auc
#> 0.8685697
performance(predict(ranger2, task2), auc)
#> auc
#> 0.9096177
performance(predict(ranger1, task1_test), auc)
#> auc
#> 0.686658
performance(predict(ranger2, task2_test), auc)
#> auc
#> 0.6770653