Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: [R-package] require lgb.Dataset, remove support for passing 'colnames' and 'categorical_feature' for lgb.train() and lgb.cv() #6714

Draft
wants to merge 8 commits into
base: master
Choose a base branch
from
63 changes: 2 additions & 61 deletions R-package/R/lgb.cv.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@ CVBooster <- R6::R6Class(
#' @description Cross validation logic used by LightGBM
#' @inheritParams lgb_shared_params
#' @param nfold the original dataset is randomly partitioned into \code{nfold} equal size subsamples.
#' @param label Deprecated. See "Deprecated Arguments" section below.
#' @param weight Deprecated. See "Deprecated Arguments" section below.
#' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals}
#' @param showsd \code{boolean}, whether to show standard deviation of cross validation.
#' This parameter defaults to \code{TRUE}. Setting it to \code{FALSE} can lead to a
Expand All @@ -36,8 +34,6 @@ CVBooster <- R6::R6Class(
#' @param folds \code{list} provides a possibility to use a list of pre-defined CV folds
#' (each element must be a vector of test fold's indices). When folds are supplied,
#' the \code{nfold} and \code{stratified} parameters are ignored.
#' @param colnames Deprecated. See "Deprecated Arguments" section below.
#' @param categorical_feature Deprecated. See "Deprecated Arguments" section below.
#' @param callbacks List of callback functions that are applied at each iteration.
#' @param reset_data Boolean, setting it to TRUE (not the default value) will transform the booster model
#' into a predictor model which frees up memory and the original datasets
Expand Down Expand Up @@ -69,20 +65,12 @@ CVBooster <- R6::R6Class(
#' )
#' }
#'
#' @section Deprecated Arguments:
#'
#' A future release of \code{lightgbm} will require passing an \code{lgb.Dataset}
#' to argument \code{'data'}. It will also remove support for passing arguments
#' \code{'categorical_feature'}, \code{'colnames'}, \code{'label'}, and \code{'weight'}.
#'
#' @importFrom data.table data.table setorderv
#' @export
lgb.cv <- function(params = list()
, data
, nrounds = 100L
, nfold = 3L
, label = NULL
, weight = NULL
, obj = NULL
, eval = NULL
, verbose = 1L
Expand All @@ -92,8 +80,6 @@ lgb.cv <- function(params = list()
, stratified = TRUE
, folds = NULL
, init_model = NULL
, colnames = NULL
, categorical_feature = NULL
, early_stopping_rounds = NULL
, callbacks = list()
, reset_data = FALSE
Expand All @@ -104,33 +90,8 @@ lgb.cv <- function(params = list()
if (nrounds <= 0L) {
stop("nrounds should be greater than zero")
}

# If 'data' is not an lgb.Dataset, try to construct one using 'label'
if (!.is_Dataset(x = data)) {
warning(paste0(
"Passing anything other than an lgb.Dataset object to lgb.cv() is deprecated. "
, "Either pass an lgb.Dataset object, or use lightgbm()."
))
if (is.null(label)) {
stop("'label' must be provided for lgb.cv if 'data' is not an 'lgb.Dataset'")
}
data <- lgb.Dataset(data = data, label = label)
}

# raise deprecation warnings if necessary
# ref: https://github.com/microsoft/LightGBM/issues/6435
args <- names(match.call())
if ("categorical_feature" %in% args) {
.emit_dataset_kwarg_warning("categorical_feature", "lgb.cv")
}
if ("colnames" %in% args) {
.emit_dataset_kwarg_warning("colnames", "lgb.cv")
}
if ("label" %in% args) {
.emit_dataset_kwarg_warning("label", "lgb.cv")
}
if ("weight" %in% args) {
.emit_dataset_kwarg_warning("weight", "lgb.cv")
stop("lgb.cv: data must be an lgb.Dataset instance")
}

# set some parameters, resolving the way they were passed in with other parameters
Expand Down Expand Up @@ -214,37 +175,17 @@ lgb.cv <- function(params = list()
data$construct()

# Check interaction constraints
cnames <- NULL
if (!is.null(colnames)) {
cnames <- colnames
} else if (!is.null(data$get_colnames())) {
cnames <- data$get_colnames()
}
params[["interaction_constraints"]] <- .check_interaction_constraints(
interaction_constraints = interaction_constraints
, column_names = cnames
, column_names = data$get_colnames()
)

if (!is.null(weight)) {
data$set_field(field_name = "weight", data = weight)
}

# Update parameters with parsed parameters
data$update_params(params = params)

# Create the predictor set
data$.__enclos_env__$private$set_predictor(predictor = predictor)

# Write column names
if (!is.null(colnames)) {
data$set_colnames(colnames = colnames)
}

# Write categorical features
if (!is.null(categorical_feature)) {
data$set_categorical_feature(categorical_feature = categorical_feature)
}

if (!is.null(folds)) {

# Check for list of folds or for single value
Expand Down
36 changes: 1 addition & 35 deletions R-package/R/lgb.train.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
#' @inheritParams lgb_shared_params
#' @param valids a list of \code{lgb.Dataset} objects, used for validation
#' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals}
#' @param colnames Deprecated. See "Deprecated Arguments" section below.
#' @param categorical_feature Deprecated. See "Deprecated Arguments" section below.
#' @param callbacks List of callback functions that are applied at each iteration.
#' @param reset_data Boolean, setting it to TRUE (not the default value) will transform the
#' booster model into a predictor model which frees up memory and the
Expand Down Expand Up @@ -42,12 +40,6 @@
#' )
#' }
#'
#' @section Deprecated Arguments:
#'
#' A future release of \code{lightgbm} will remove support for passing arguments
#' \code{'categorical_feature'} and \code{'colnames'}. Pass those things to
#' \code{lgb.Dataset} instead.
#'
#' @export
lgb.train <- function(params = list(),
data,
Expand All @@ -59,8 +51,6 @@ lgb.train <- function(params = list(),
record = TRUE,
eval_freq = 1L,
init_model = NULL,
colnames = NULL,
categorical_feature = NULL,
early_stopping_rounds = NULL,
callbacks = list(),
reset_data = FALSE,
Expand All @@ -83,16 +73,6 @@ lgb.train <- function(params = list(),
}
}

# raise deprecation warnings if necessary
# ref: https://github.com/microsoft/LightGBM/issues/6435
args <- names(match.call())
if ("categorical_feature" %in% args) {
.emit_dataset_kwarg_warning("categorical_feature", "lgb.train")
}
if ("colnames" %in% args) {
.emit_dataset_kwarg_warning("colnames", "lgb.train")
}

# set some parameters, resolving the way they were passed in with other parameters
# in `params`.
# this ensures that the model stored with Booster$save() correctly represents
Expand Down Expand Up @@ -171,21 +151,12 @@ lgb.train <- function(params = list(),

# Construct datasets, if needed
data$update_params(params = params)
if (!is.null(categorical_feature)) {
data$set_categorical_feature(categorical_feature)
}
data$construct()

# Check interaction constraints
cnames <- NULL
if (!is.null(colnames)) {
cnames <- colnames
} else if (!is.null(data$get_colnames())) {
cnames <- data$get_colnames()
}
params[["interaction_constraints"]] <- .check_interaction_constraints(
interaction_constraints = interaction_constraints
, column_names = cnames
, column_names = data$get_colnames()
)

# Update parameters with parsed parameters
Expand All @@ -194,11 +165,6 @@ lgb.train <- function(params = list(),
# Create the predictor set
data$.__enclos_env__$private$set_predictor(predictor)

# Write column names
if (!is.null(colnames)) {
data$set_colnames(colnames)
}

valid_contain_train <- FALSE
train_data_name <- "train"
reduced_valid_sets <- list()
Expand Down
16 changes: 0 additions & 16 deletions R-package/R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -258,19 +258,3 @@
return(a == b)
}
}

# ref: https://github.com/microsoft/LightGBM/issues/6435
.emit_dataset_kwarg_warning <- function(calling_function, argname) {
msg <- sprintf(
paste0(
"Argument '%s' to %s() is deprecated and will be removed in a future release. "
, "Set '%s' with lgb.Dataset() instead. "
, "See https://github.com/microsoft/LightGBM/issues/6435."
)
, argname
, calling_function
, argname
)
warning(msg)
return(invisible(NULL))
}
20 changes: 0 additions & 20 deletions R-package/man/lgb.cv.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 0 additions & 14 deletions R-package/man/lgb.train.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions R-package/tests/testthat/test_basic.R
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,7 @@ test_that("lgb.cv() rejects negative or 0 value passed to nrounds", {
}
})

test_that("lgb.cv() throws an informative error if 'data' is not an lgb.Dataset and labels are not given", {
test_that("lgb.cv() throws an informative error if 'data' is not an lgb.Dataset", {
bad_values <- list(
4L
, "hello"
Expand All @@ -454,7 +454,7 @@ test_that("lgb.cv() throws an informative error if 'data' is not an lgb.Dataset
, 10L
, nfold = 5L
)
}, regexp = "'label' must be provided for lgb.cv if 'data' is not an 'lgb.Dataset'", fixed = TRUE)
}, regexp = "lgb.cv: data must be an lgb.Dataset instance", fixed = TRUE)
}
})

Expand Down
Loading