
Base learner to encode one single class of a categorical feature
Source:R/RcppExports.R
BaselearnerCategoricalBinary.RdThis class create a one-column one-hot encoded data matrix with ones at
x == class_name and zero otherwise.
Format
S4 object.
Arguments
- data_source
CategoricalDataRaw
The raw data object. Must be an object generated by CategoricalDataRaw.- class_name
(
character(1))
The class for which a binary vector is created as data representation.- blearner_type
(
character(1))
Type of the base learner (if not specified,blearner_type = "binary"is used). The unique id of the base learner is defined by appendingblearner_typeto the feature name:paste0(data_source$getIdentifier(), "_", class_name, "_", blearner_type).
Usage
BaselearnerCategoricalBinary$new(data_source, class_name)
BaselearnerCategoricalBinary$new(data_source, class_name, blearner_type)Methods
$summarizeFactory():() -> ()$transfromData(newdata):list(InMemoryData) -> matrix()$getMeta():() -> list()
Inherited methods from Baselearner
$getData():() -> matrix()$getDF():() -> integer()$getPenalty():() -> numeric()$getPenaltyMat():() -> matrix()$getFeatureName():() -> character()$getModelName():() -> character()$getBaselearnerId():() -> character()
Examples
# Sample data:
x = sample(c("one","two"), 20, TRUE)
y = c(one = 0.8, two = -1.2)[x] + rnorm(20, 0, 0.2)
dat = data.frame(x, y)
# S4 API:
ds = CategoricalDataRaw$new(x, "cat")
bl = BaselearnerCategoricalBinary$new(ds, "one")
bl$getData()
#> [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
#> [1,] 0 0 1 0 1 0 0 0 1 0 0 0 1 1
#> [,15] [,16] [,17] [,18] [,19] [,20]
#> [1,] 1 0 1 1 0 1
bl$summarizeFactory()
#> Categorical base learner of feature cat and category one
bl$transformData(list(ds))
#> $design
#> 20 x 1 sparse Matrix of class "dgCMatrix"
#>
#> [1,] .
#> [2,] .
#> [3,] 1
#> [4,] .
#> [5,] 1
#> [6,] .
#> [7,] .
#> [8,] .
#> [9,] 1
#> [10,] .
#> [11,] .
#> [12,] .
#> [13,] 1
#> [14,] 1
#> [15,] 1
#> [16,] .
#> [17,] 1
#> [18,] 1
#> [19,] .
#> [20,] 1
#>
bl$getBaselearnerId()
#> [1] "cat_one_binary"
# R6 API:
cboost = Compboost$new(dat, "y")
cboost$addBaselearner("x", "binary", BaselearnerCategoricalBinary)
cboost$train(500, 0)
#> Train 500 iterations in 0 Seconds.
#> Final risk based on the train set: 0.026
#>
table(cboost$getSelectedBaselearner())
#>
#> x_one_binary x_two_binary
#> 251 249
plotPEUni(cboost, "x", individual = FALSE)