# spliffelse.R
#
# This module exports analogues of
# ifelse/if_else/fifelse and
# case_when/fcase . It solves three
# problems:
#
# 1) ifelse( cond(x), f1(x), f2(x) )
# and the others waste time
# evaluating f1 and f2 for elements
# of x that they don't need to.
#
# 2) this can make them crash, because
# f1 and f2 will probably get called
# on elements they weren't designed
# to handle. For example, they might
# call assertions to ensure their
# arguments are in range, and
# therefore throw an error.
#
# 3) to get round these, experts advise
# foregoing ifelse and the others.
# Instead, use this scheme:
# result <- rep( length( cond(x) ) )
# result[ cond ] <- f1( x[ cond ] )
# result[ ! cond ] <- f2( x[ ! cond ] )
# That is, create a vector to hold
# the result, then call f1 only on
# the elements of x for which cond
# holds, and f2 only on the elements
# for which it doesn't. The advantage
# is that this avoids needless calls,
# as well as the risk of them crashing.
# The disadvantage is that it's more
# to type so more error-prone,
# and lower-level than it should
# be. What we obviously need is an
# abstraction that looks like
# a conditional expression, to
# make intent clear, but that works
# as above.
#
# Exports:
# spliffelse( cond, expr1, vars=NULL
# , expr2, vars=NULL
# )
# Split-workflow analogue to
# ifelse/if_else/fifelse .
#
# spliffcase( cond1, expr1, vars=NULL
# , ...
# , condN, exprN, vars=NULL
# , default=expr, vars=NULL
# )
# Split-workflow analogue to
# case_when/fcase . The
# argument syntax is as for
# fcase , i.e. commas separate
# conditions from their branches,
# and the default is signalled
# by the name 'default'.
#
# In these, the conditions should
# evaluate to logical vectors. The
# expressions expr1 etc. should
# evaluate to vectors of the same
# length as the condition(s), and
# should be type-compatible in that
# all elements returned can be
# combined into a single vector.
#
# Both constructs work by evaluating
# a condition to a logical vector,
# then getting indices of the elements
# for which it's true. They then
# evaluate (say) f1(x) on _only_
# those elements of x . They do
# this by carrying out the evaluation
# in a new environment wherein
# the name 'x' is rebound to x[ cond ] .
# That is, to the elements of x
# for which cond is true.
#
# In real-life, the expressions
# probably will contain more
# than one variable. So
# uses are more likely to
# look like
# spliffelse( cond(x,y,z), f1(u,x,y,z,t), f2(v,x,s,t) )
#
# This brings a problem.
# For the rebinding to give
# the right results, we
# have to know which names
# denote vectors to be subsetted
# and rebound, and which are
# other kinds of entity such as
# constants like pi and TRUE ,
# control parameters, and so
# on.
#
# Saying it another way,
# I designed spliffelse and
# spliffcase to help me
# write efficient R code
# that used vectorised functions
# rather than mapping scalar
# functions over vectors.
# So I have a lot of functions
# most of whose arguments
# are the same length as one
# another, each i'th element
# being part of the i'th
# observation. Basically,
# columns of a data frame.
#
# But these functions may
# take other arguments
# which affect each
# observation in the same
# way. An example would
# be the locale to operate
# in, the currency to
# use, or the number of
# digits to print results
# to. These are not part
# of the observations, and
# should not be subsetted
# and rebound.
#
# To solve this, spliffelse
# and spliffcase assume
# that all variables
# mentioned in an expression
# should be subsetted and
# rebound, as should those
# mentioned in its condition.
#
# ....vars
library( pryr )
library( glue )
library( assertthat )
#1) spliffelse
#=============
spliffelse <- function( cond_vec, then_expr, else_expr )
{
assert_that( is.logical( cond_vec ) )
#
# Ensure the condition is Boolean.
len <- length( cond_vec )
#
# Get the length of the condition vector.
result_vec <- rep( NA, len )
#
# Create a result vector the same
# length, as yet undefined.
then_env <- new.env()
then_vars <- all.vars( substitute( then_expr ) )
for ( v in then_vars ) {
assign( v, eval(as.symbol(v))[ cond_vec ], then_env )
}
else_env <- new.env()
else_vars <- all.vars( substitute( else_expr ) )
for ( v in else_vars ) {
assign( v, eval(as.symbol(v))[ ! cond_vec ], else_env )
}
result_vec[ cond_vec ] <- eval( substitute( then_expr ), envir=then_env )
result_vec[ ! cond_vec ] <- eval( substitute( else_expr ), envir=else_env )
result_vec
}
#2) spliffcase
#=============
spliffcase <- function( ... )
{
uneval_args <- named_dots( ... )
#
# A list of all the arguments
# as eexpressions.
ns <- names( uneval_args )
#
# Their names. These will be either
# "vars", "default", or the expression
# as a string.
ns[ ns != "vars" & ns != "default" ] <- "N"
#
# Replace the names of any arguments
# that are not "vars" or "default" by
# "N". I can then check to see that
# the sequence of arguments follows
# the right pattern.
ns2 <- c()
ua2 <- list()
# Loop round the arguments removing
# subsequences that are OK and assigning
# them to ua2 and their names to
# ns2 . Both will end up with
# lengths that are a multiple of
# 3, with each condition being
# followed by an expression and
# then an indication of its vars.
while ( TRUE ) {
if ( length( ns ) == 0 ) {
break
#
# No argument left, so they're
# all OK.
}
else if ( length( ns ) >= 2 && all( head(ns,2) == c("default","vars") ) ) {
ns2 <- c( ns2, c("cond","branch","vars") )
ns <- tail( ns, -2 )
ua2 <- list2( !!! ua2, TRUE, !!! head( uneval_args, 2 ) )
uneval_args <- tail( uneval_args, -2 )
#
# We've found a "default" and
# its "vars" . Prepend a TRUE as
# the condition.
}
else if ( length( ns ) >= 1 && all( head(ns,1) == c("default") ) ) {
ns2 <- c( ns2, c("cond","branch","novars") )
ns <- tail( ns, -1 )
ua2 <- list2( !!! ua2, TRUE, !!! head( uneval_args, 1 ), "novars" )
uneval_args <- tail( uneval_args, -1 )
#
# A "default" without a "vars".
# Prepend a TRUE, and append a dummy
# element to signal the missing "vars".
}
else if ( length( ns ) >= 3 && all( head(ns,3) == c("N","N","vars") ) ) {
ns2 <- c( ns2, c("cond","branch","vars") )
ns <- tail(ns,-3)
ua2 <- list2( !!! ua2, !!! head( uneval_args, 3 ) )
uneval_args <- tail( uneval_args, -3 )
#
# Two ordinary arguments and a "vars".
# Assume the first two to be a condition
# and its expression.
}
else if ( length( ns ) >= 2 && all( head(ns,2) == c("N","N") ) ) {
ns2 <- c( ns2, c("cond","branch","novars") )
ns <- tail(ns,-2)
ua2 <- list2( !!! ua2, !!! head( uneval_args, 2 ), "novars" )
uneval_args <- tail( uneval_args, -2 )
#
# Two ordinary arguments without a
# "vars". Assume them to be a condition
# and its expression. Insert a dummy
# element to replace the missing "vars".
}
else
stop( glue( "Error: illegal combination of arguments {ns2} {ns}" ) )
}
num_branches <- length( ua2 ) / 3
#
# ua2 now contains a triple for
# each condition. So the number of
# case branches is its size over 3.
# Now loop round the branches.
for ( i in ( 1 : num_branches ) * 3 - 2 ) {
uneval_cond <- ua2[[ i ]]
uneval_br <- ua2[[ i + 1 ]]
#
# Get the condition and its
# expression. Don't evaluate
# them yet.
cond <- eval( uneval_cond )
#
# Now evaluate the condition.
assert_that( is.logical( cond ) )
#
# It had better be Boolean.
if ( i == 1 ) {
len <- length( cond )
#
# If this is the first branch,
# the number of elements in
# the condition vector will
# determine the size of the
# result vector.
result_vec <- rep( NA, len )
#
# Make the result vector. All
# undefined so far.
dones <- rep( FALSE, len )
#
# This is a "gate" to indicate,
# on each pass, which elements
# of the result have been
# decided on.
}
t_indices <- which( ifelse( dones, FALSE, cond ) )
#
# The indices of elements in
# the result to assign to. As
# the condition, except that
# if an element has already been
# assigned, we don't assign it
# again. I.e. earlier conditions
# take precedence over later
# ones.
dones <- dones | cond
#
# Update the done-gate to indicate
# which result elements are
# _now_ complete.
what_vars <- ua2[[ i + 2 ]]
#
# This will be either "novars"
# or the expression
# extra( ) .
if ( what_vars == "novars" ) {
branch_vars <- append( all.vars( uneval_cond )
, all.vars( uneval_br )
)
#
# If it's "novars", the caller
# left it to us to work them out.
# Take all the variables in the
# condition and its expression.
}
else {
assert_that( is.call( what_vars ) )
extra_vars <- tail( what_vars, -1 )
branch_vars <- append( all.vars( uneval_cond )
, all.vars( uneval_br )
) %>% append( as.character( extra_vars ) )
#
# Otherwise, take all those variables,
# but add those the caller specifies.
# This is a fudge. The might want
# to remove some too.
}
# Now loop over the variables. Bind
# each, in a new environment, to its vector
# subsetted by the condition. But only do
# this for atomic non-null variables.
branch_env <- new.env()
for ( var in branch_vars ) {
val <- eval( as.symbol( var ) )
if ( ! is.null( val ) && is.atomic( val ) ) {
assign( var, val[ t_indices ], branch_env )
}
}
result_vec[ t_indices ] <- eval( uneval_br, envir=branch_env )
#
# Evaluate the expression in this
# new environment, putting the result
# elements into the corresponding
# slots in the result vector.
}
result_vec
#
# Finally, return the result vector.
}
#4) Try them
#===========
# This shows that if you have a
# conditional
# ifelse( x %% 2 == 0, f1(x), f2(x) )
# then f1 gets called on odd
# elements of x , and f2 gets called
# on even elements, even though
# the form of the conditional makes
# it look as though they don't.
# If f1 isn't designed for odds,
# or f2 for evens, this could cause
# crashes. The same happens for
# if_else and fifelse .
#
try_spliffelse_1 <- function()
{
library( tidyverse )
library( data.table )
library( glue )
hate_odds <- function( x )
{
if ( any( x %% 2 == 1 ) ) stop( glue( "I hate odd numbers ({ x[ x%% 2 == 1 ] }). " ) )
x
}
hate_evens <- function( x )
{
if ( any( x %% 2 == 0 ) ) stop( glue( "I hate even numbers ({ x[ x%% 2 == 0 ] }). " ) )
x
}
x <- 1:100
ifelse( x %% 2 == 0, hate_odds( x ) / 2, hate_evens( x ) + 3 )
if_else( x %% 2 == 0, hate_odds( x ) / 2, hate_evens( x ) + 3 )
fifelse( x %% 2 == 0, hate_odds( x ) / 2, hate_evens( x ) + 3 )
spliffelse( x %% 2 == 0, hate_odds( x ) / 2, hate_evens( x ) + 3 )
}
# This shows a vectorised case expression
# where only sliffcase gives the obvious result,
# a vector made by combining the branch
# expressions elementwise. fcase crashes,
# and case_when gives some weird stack
# of glue results.
#
try_spliffelse_2 <- function()
{
library( rlang )
x <- 1:10
case_when( x %% 2 == 0 ~
glue( "Even ({x})" )
, x %% 3 == 0 ~
glue( "Divisible by 3 ({x})" )
, TRUE ~
"Otherwise"
) ->xxxx
fcase( x %% 2 == 0 ,
glue( "Even ({x})" )
, x %% 3 == 0 ,
glue( "Divisible by 3 ({x})" )
, default =
"Otherwise"
)
spliffcase( x %% 2 == 0 ,
glue( "Even ({x})" )
, x %% 3 == 0 ,
glue( "Divisible by 3 ({x})" )
, default =
"Otherwise"
)
}
# This is an analogue to try_spliffelse_1 .
# It shows that case_when and fcase
# will both call a function that can't
# process odd numbers on elements
# that are odd, even when its condition
# should rule that out.
#
try_spliffelse_3 <- function()
{
hate_odds <- function( x )
{
if ( any( x %% 2 == 1 ) ) stop( glue( "I hate odd numbers ({ x[ x%% 2 == 1 ] }). " ) )
x
}
case_when( x %% 2 == 0 ~
hate_odds( x )
, x %% 3 == 0 ~
glue( "Divisible by 3 ({x})" )
, TRUE ~
"Otherwise"
)
fcase( x %% 2 == 0 ,
hate_odds( x )
, x %% 3 == 0 ,
glue( "Divisible by 3 ({x})" )
, default =
"Otherwise"
)
spliffcase( x %% 2 == 0 ,
hate_odds( x )
, x %% 3 == 0 ,
glue( "Divisible by 3 ({x})" )
, default =
"Otherwise"
)
}
# This demonstrates spliffcase's automatic
# extraction of variables to subset and rebind.
#
try_spliffelse_4 <- function()
{
x <- 1:10
y <- x * 10
z <- x * 100
spliffcase( x %% 2 == 0 ,
y + z
, x %% 3 == 0 ,
( y + z ) * 10
, default =
( y + z ) * 100
)
}
# This demonstrates that spliffcase's automatic
# extraction of variables ignores lists (non-
# atomic values).
#
try_spliffelse_5 <- function()
{
x <- 1:10
y <- x * 10
z <- x * 100
t <- x * 1000
a <- list('100','200','300','400','500')
spliffcase( x %% 2 == 0 ,
y + z
, x %% 3 == 0 ,
( y + z ) * 10
, default =
( y + z ) * as.numeric( a[[ 2 ]] )
)
}
# This demonstrates that spliffcase's automatic
# extraction of variables sometimes has to
# be augmented. The spliffcases below don't
# know that they need variable t , because
# it's inside a string in glue . So
# we supply the first spliffcase with t
# explicitly. The second spliffcase, lacking
# this help, crashes.
#
try_spliffelse_6 <- function()
{
x <- 1:10
y <- x * 10
z <- x * 100
t <- x * 1000
a <- list('100','200','300','400','500')
spliffcase( x %% 2 == 0 ,
y + z + as.integer(glue("{t}")), vars=also("t")
, x %% 3 == 0 ,
( y + z ) * 10
, default =
( y + z ) * as.numeric( a[[ 2 ]] )
)
spliffcase( x %% 2 == 0 ,
y + z + as.integer(glue("{t}"))
, x %% 3 == 0 ,
( y + z ) * 10
, default =
( y + z ) * as.numeric( a[[ 2 ]] )
)
}
# This example is based on the post
# below by Martin Maechler:
#
# Martin Maechler, Aug 08, 2016; 10:36am
# "Re: ifelse() woes ... can we agree on a ifelse2() ?"
# https://r.789695.n4.nabble.com/ifelse-woes-can-we-agree-on-a-ifelse2-td4723584.html#a4723614
#
# I mean the fact that many many usages of ifelse() are of the
# form
# ifelse(logiFn(x), f1(x), f2(x))
#
# {with f1() or f2() often even being constant}
#
# and e.g., in the case where logiFn(x) gives few TRUEs and f1(.)
# is expensive and f2(.) very cheap (say "constant" NA), it is
# much more efficient to use
#
# ans <- x
# Y <- logiFn(x)
# ans[ Y] <- f1(x[ Y])
# ans[!Y] <- f2(x[!Y])
#
# as the expensive function is only called on a small subset of
# the full x.
#
# It implements his example,
# using a stupidly slow
# squaring function as f1 , and
# the constant function 1 as f2 .
# logiFn is implemented using
# runif , and gives few TRUEs.
# I benchmark this for ifelse ,
# if_else , fifelse, spliffelse ,
# and Maechler's low-level code.
# The mean benchmark times, in
# microseconds, were:
# 218.6270
# 246.1460
# 451.3929
# 197.0651
# 180.1431
#
try_spliffelse_7 <- function()
{
library( tidyverse )
library( data.table )
library( microbenchmark )
library( ggplot2 )
# Doesn't depend on x other than
# to get the right number of
# runif results. Returns TRUE
# 10% of the time.
#
logiFn <- function( x )
{
set.seed( 1 )
runif( length( x ), 0, 1 ) > 0.9
}
x <- 1:10
#
# We'll try his example on this.
# Squares x, not efficiently.
#
f1 <- function( x )
{
as.numeric(
eval(
parse( text =
capture.output(
dput(
x ^ 24.69136
)
)
)
)
) ^ ( 1 / 12.3456789 )
}
# Returns the constant 1.
#
f2 <- function( x )
{
1
}
# Checks that all elements of
# the list values are equal.
# Used in benchmarking below.
#
check_equal <- function( values )
{
all( map_lgl( values[ -1 ]
, function( x ) {
identical( values[[ 1 ]], x )
}
)
)
}
res <-
microbenchmark( ifelse( logiFn(x), f1(x), f2(x) )
, if_else( logiFn(x), f1(x), f2(x) )
, fifelse( logiFn(x), f1(x), f2(x) )
, spliffelse( logiFn(x), f1(x), f2(x) )
, { ans <- x
Y <- logiFn(x)
ans[ Y] <- f1(x[ Y])
ans[!Y] <- f2(x[!Y])
ans
}
, check = check_equal
)
ggplot2::autoplot( res )
print( res )
}
# This runs similar code to the above,
# but with my reporting of odd and
# even numbers added. It confirms that
# ifelse , if_else and fifelse all
# call the functions on elements they
# shouldn't call them on. spliffelse
# doesn't, and neither, of course, does
# Maechler's low-level code.
#
try_spliffelse_8 <- function()
{
logiFn_2 <- function( x )
{
set.seed( 1 )
x %% 2 == 0
}
x <- 1:10
f1_2 <- function( x )
{
if ( any ( x %% 2 != 0 ) )
cat( glue( "Got odds { x[ x%%2 != 0 ] }" ), "\n" )
as.numeric(
eval(
parse( text =
capture.output(
dput(
x ^ 24.69136
)
)
)
)
) ^ ( 1 / 12.3456789 )
}
f2_2 <- function( x )
{
if ( any( x %% 2 == 0 ) )
cat( glue( "Got evens { x[ x%%2 == 0 ] }" ), "\n" )
1
}
cat( "Doing ifelse.\n" )
ans_ifelse <- ifelse( logiFn_2(x), f1_2(x), f2_2(x) )
cat( "Doing if_else.\n" )
ans_if_else <- if_else( logiFn_2(x), f1_2(x), f2_2(x) )
cat( "Doing fifelse.\n" )
ans_fifelse <- fifelse( logiFn_2(x), f1_2(x), f2_2(x) )
cat( "Doing spliffelse.\n" )
ans_spliffelse <- spliffelse( logiFn_2(x), f1_2(x), f2_2(x) )
cat( "Doing Maechler.\n" )
ans_Maechler <- { ans <- x
Y <- logiFn_2(x)
ans[ Y] <- f1_2(x[ Y])
ans[!Y] <- f2_2(x[!Y])
ans
}
answers <- list( ans_ifelse = ans_ifelse
, ans_if_else = ans_if_else
, ans_fifelse = ans_fifelse
, ans_spliffelse = ans_spliffelse
, ans_Maechler = ans_Maechler
)
check_equal <- function( values )
{
all( map_lgl( values[ -1 ]
, function( x ) {
identical( values[[ 1 ]], x )
}
)
)
}
cat( "Checking equality of \n" )
print( answers )
check_equal( answers )
}