Skip to content

select + semi_join generates a warning when not immutable #472

@dakvid

Description

@dakvid

Something seems to be not quite right with the non immutable translation of a select and semi_join - it issues a warning about dropped columns not existing to remove.

library(dplyr, warn.conflicts = FALSE)      # 1.1.4
library(data.table, warn.conflicts = FALSE) # 1.15.4
library(dtplyr, warn.conflicts = FALSE)     # 1.3.1

x <- data.table(a = 1:3, b = 1:3, c = 1:3)
y <- data.table(b = 2L)

x |>
  lazy_dt() |>
  select(a, b) |>
  semi_join(y, by = "b")
# Source: local data table [1 x 2]
# Call:   `_DT53`[, .(a, b)][unique(`_DT53`[, .(a, b)][`_DT54`, which = TRUE, 
#     nomatch = NULL, on = .(b)])]
# 
#       a     b
#   <int> <int>
# 1     2     2

x |>
  lazy_dt(immutable = FALSE) |>
  select(a, b) |>
  semi_join(y, by = "b")
# Source: local data table [1 x 2]
# Call:   `_DT55`[, `:=`("c", NULL)][unique(`_DT55`[, `:=`("c", NULL)][`_DT56`, 
#     which = TRUE, nomatch = NULL, on = .(b)])]
# 
#       a     b
#   <int> <int>
# 1     2     2
# 
# # Use as.data.table()/as.data.frame()/as_tibble() to access results
# Warning message:
# In `[.data.table`(`_DT55`, , `:=`("c", NULL)) :
#   Column 'c' does not exist to remove

Compare with inner_join:

x <- data.table(a = 1:3, b = 1:3, c = 1:3)
y <- data.table(b = 2L)

x |>
  lazy_dt() |>
  select(a, b) |>
  inner_join(y, by = "b")
# Source: local data table [1 x 2]
# Call:   `_DT59`[, .(a, b)][`_DT60`, on = .(b), nomatch = NULL, allow.cartesian = TRUE]
# 
#       a     b
#   <int> <int>
# 1     2     2

x |>
  lazy_dt(immutable = FALSE) |>
  select(a, b) |>
  inner_join(y, by = "b")
# Source: local data table [1 x 2]
# Call:   `_DT61`[, `:=`("c", NULL)][`_DT62`, on = .(b), nomatch = NULL, 
#     allow.cartesian = TRUE]
# 
#       a     b
#   <int> <int>
# 1     2     2

Or left_join:

x <- data.table(a = 1:3, b = 1:3, c = 1:3)
y <- data.table(b = 2L)

x |>
  lazy_dt() |>
  select(a, b) |>
  left_join(y, by = "b")
# Source: local data table [3 x 2]
# Call:   setcolorder(`_DT64`[`_DT63`[, .(a, b)], on = .(b), allow.cartesian = TRUE], 
#     2:1)
# 
#       a     b
#   <int> <int>
# 1     1     1
# 2     2     2
# 3     3     3

x |>
  lazy_dt(immutable = FALSE) |>
  select(a, b) |>
  left_join(y, by = "b")
# Source: local data table [3 x 2]
# Call:   setcolorder(`_DT66`[`_DT65`[, `:=`("c", NULL)], on = .(b), allow.cartesian = TRUE], 
#     2:1)
# 
#       a     b
#   <int> <int>
# 1     1     1
# 2     2     2
# 3     3     3

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions