Execute one step using the rquery.rquery_db_executor SQL supplier. Note: it is not a good practice to use SQL nodes in data.table intended pipelines (loss of class information and cost of data transfer). This implementation is only here for completeness.
# S3 method for relop_sql
ex_data_table_step(
optree,
...,
tables = list(),
source_usage = NULL,
source_limit = NULL,
env = parent.frame()
)
relop operations tree.
not used, force later arguments to bind by name.
named list map from table names used in nodes to data.tables and data.frames.
list mapping source table names to vectors of columns used.
if not null limit all table sources to no more than this many rows (used for debugging).
environment to work in.
# WARNING: example tries to change rquery.rquery_db_executor option to RSQLite and back.
if (requireNamespace("DBI", quietly = TRUE) &&
requireNamespace("RSQLite", quietly = TRUE)) {
# example database connection
my_db <- DBI::dbConnect(RSQLite::SQLite(),
":memory:")
old_o <- options(list("rquery.rquery_db_executor" = list(db = my_db)))
# example data
d <- data.frame(v1 = c(1, 2, NA, 3),
v2 = c(NA, "b", NA, "c"),
v3 = c(NA, NA, 7, 8),
stringsAsFactors = FALSE)
# example xform
vars <- column_names(d)
# build a NA/NULLs per-row counting expression.
# names are "quoted" by wrapping them with as.name().
# constants can be quoted by an additional list wrapping.
expr <- lapply(vars,
function(vi) {
list("+ (CASE WHEN (",
as.name(vi),
"IS NULL ) THEN 1.0 ELSE 0.0 END)")
})
expr <- unlist(expr, recursive = FALSE)
expr <- c(list(0.0), expr)
# instantiate the operator node
op_tree <- local_td(d) %.>%
sql_node(., "num_missing" %:=% list(expr))
cat(format(op_tree))
d %.>% op_tree
options(old_o)
DBI::dbDisconnect(my_db)
}
#> mk_td("d", c(
#> "v1",
#> "v2",
#> "v3")) %.>%
#> sql_node(.,
#> num_missing %:=% 0 + (CASE WHEN ( v1 IS NULL ) THEN 1.0 ELSE 0.0 END) + (CASE WHEN ( v2 IS NULL ) THEN 1.0 ELSE 0.0 END) + (CASE WHEN ( v3 IS NULL ) THEN 1.0 ELSE 0.0 END),
#> *=TRUE)