Skip to content

Conversation

@paleolimbot
Copy link
Member

This needs some extensive testing but is very cool! The overhead here is very low (we can execute x + 1 just as fast as native R data frames!).

library(sedonadb)

df <- data.frame(x = 1:1e6)

df |> sedonadb:::sd_transmute(y = x + 1)
#> ┌─────────┐
#> │    y    │
#> │ float64 │
#> ╞═════════╡
#> │     2.0 │
#> ├╌╌╌╌╌╌╌╌╌┤
#> │     3.0 │
#> ├╌╌╌╌╌╌╌╌╌┤
#> │     4.0 │
#> ├╌╌╌╌╌╌╌╌╌┤
#> │     5.0 │
#> ├╌╌╌╌╌╌╌╌╌┤
#> │     6.0 │
#> ├╌╌╌╌╌╌╌╌╌┤
#> │     7.0 │
#> └─────────┘
#> Preview of up to 6 row(s)
df |> sedonadb:::sd_transmute(y = sum(x))
#> ┌──────────────┐
#> │       y      │
#> │     int64    │
#> ╞══════════════╡
#> │ 500000500000 │
#> ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
#> │ 500000500000 │
#> ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
#> │ 500000500000 │
#> ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
#> │ 500000500000 │
#> ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
#> │ 500000500000 │
#> ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
#> │ 500000500000 │
#> └──────────────┘
#> Preview of up to 6 row(s)
df |> sedonadb:::sd_transmute(y = sum(x + 1) + x)
#> ┌────────────────┐
#> │        y       │
#> │     float64    │
#> ╞════════════════╡
#> │ 500001500001.0 │
#> ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
#> │ 500001500002.0 │
#> ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
#> │ 500001500003.0 │
#> ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
#> │ 500001500004.0 │
#> ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
#> │ 500001500005.0 │
#> ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
#> │ 500001500006.0 │
#> └────────────────┘
#> Preview of up to 6 row(s)

bench::mark(
  sd = df |> 
    sedonadb:::sd_transmute(y = sum(x + 1)) |> 
    sd_collect(),
  arrow = df |> 
    arrow::as_arrow_table() |> 
    dplyr::transmute(y = x + 1) |> 
    dplyr::collect(),
  duckdb = df |> 
    arrow::as_arrow_table() |> 
    dplyr::transmute(y = x + 1) |> 
    dplyr::collect(),
  dplyr = df |> dplyr::transmute(y = x + 1),
  check = FALSE
)
#> # A tibble: 4 × 6
#>   expression      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 sd           1.25ms   2.77ms      334.     7.7MB     84.5
#> 2 arrow        9.09ms    9.8ms      100.   38.01MB     17.1
#> 3 duckdb       9.19ms   9.64ms      103.  145.85KB     16.4
#> 4 dplyr        1.27ms   1.37ms      697.    8.89MB    161.

Created on 2026-01-08 with reprex v2.1.1

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant