From 81eee3558ec4ec623d42a5f75f66409a322ab722 Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Thu, 21 Dec 2023 15:40:50 -0500
Subject: [PATCH] squash commits
---
docs/src/dplyr.md | 10 +++---
docs/src/index.md | 16 +++++----
src/macros.jl | 24 +++++++++++--
src/parsing.jl | 27 ++++++++++----
test/dataframes.jl | 12 +++----
test/grouping.jl | 4 +--
test/multicol.jl | 90 ++++++++++++++++++++++++++++++++++++++++++++++
test/runtests.jl | 3 +-
8 files changed, 156 insertions(+), 30 deletions(-)
create mode 100644 test/multicol.jl
diff --git a/docs/src/dplyr.md b/docs/src/dplyr.md
index c083520e..b42d6d0d 100644
--- a/docs/src/dplyr.md
+++ b/docs/src/dplyr.md
@@ -136,22 +136,22 @@ Similarly, to select the first column, use the syntax `$1`.
@select msleep $1
```
-To select all the columns *except* a specific column, use the `Not` function for inverse selection. We also need to wrap `Not` in the `$` sign, because it is not a symbol.
+To select all the columns *except* a specific column, use the `Not` function for inverse selection.
```@repl 1
-@select msleep $(Not(:name))
+@select msleep Not(:name)
```
To select a range of columns by name, use the `Between` operator:
```@repl 1
-@select msleep $(Between(:name, :order))
+@select msleep Between(:name, :order)
```
-To select all columns that start with the character string `"sl"` use [regular expressions](https://regexone.com/):
+To select all columns that start with the character string `"sl"` use [regular expressions](https://regexone.com/) in conjunction with `Cols`.
```@repl 1
-@select msleep $(r"^sl")
+@select msleep Cols(r"^sl")
```
Regular expressions are powerful, but can be difficult for new users to understand. Here are some quick tips.
diff --git a/docs/src/index.md b/docs/src/index.md
index 062aaee2..b3bafb84 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -49,12 +49,6 @@ but exported by DataFramesMeta for convenience.
# Provided macros
-!!! note
-
- Newer versions of DataFrames.jl support the operators `Between`, `All`, `Cols`,
- and `Not` when selecting and transforming columns. DataFramesMeta does not currently
- support this syntax.
-
## `@select` and `@select!`
Column selections and transformations. Only newly created columns are kept.
@@ -79,6 +73,16 @@ gd = groupby(df, :x);
@select!(gd, :y = 2 .* :y .* first(:y))
```
+To select or de-select multiple columns, use `Not`, `Between`, `All`, and `Cols`.
+These multi-column selectors are all re-exported from DataFrames.jl.
+
+```julia
+@select df Not(:x)
+@select df Between(:x, :y)
+@select df All()
+@select df Cols(r"x") # Regular expressions.
+```
+
## `@transform` and `@transform!`
Add additional columns based on keyword-like arguments. Operates on both a
diff --git a/src/macros.jl b/src/macros.jl
index 71e21e8d..055cb4a2 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -1786,7 +1786,7 @@ end
function select_helper(x, args...)
x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = false)
- t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs)
+ t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags, allow_multicol = true) for ex in exprs)
quote
$select($x, $(t...); $(kw...))
end
@@ -1851,6 +1851,18 @@ transformations by row, `@select` allows `@byrow` at the
beginning of a block of selections (i.e. `@byrow begin... end`).
All transformations in the block will operate by row.
+To select many columns at once use the tools `Not`, `Between`, `All`, and `Cols`.
+
+* `@select df Not(:a)` keeps all columns except for `:a`
+* `@select df Between(:a, :z)` keeps all columns between `:a` and `:z`, inclusive
+* `@select df All()` keeps all columns
+* `@select df Cols(...)` can be used to combine many different selectors, as well as use
+ regular expressions. For example `Cols(r"a")` selects all columns that start with `"a"`.
+
+Expressions inside `Not(...)`, `Between(...)` etc. are untouched by DataFramesMeta's
+parsing. To refer to a variable `x` which represents a column inside `Not`, write `Not(x)`,
+rather than `Not($(DOLLAR)x)`.
+
$ASTABLE_MACRO_FLAG_DOCS
$ASTABLE_RHS_SELECT_TRANSFORM_DOCS
@@ -1869,7 +1881,7 @@ When inputs are given in "block" format, the last lines may be written
```
@select gd begin
:a
- @select copycols = false
+ @kwarg copycols = false
end
```
@@ -2024,6 +2036,14 @@ transformations by row, `@select!` allows `@byrow` at the
beginning of a block of select!ations (i.e. `@byrow begin... end`).
All transformations in the block will operate by row.
+To select many columns at once use the tools `Not`, `Between`, `All`, and `Cols`.
+
+* `@select df Not(:a)` keeps all columns except for `:a`
+* `@select df Between(:a, :z)` keeps all columns between `:a` and `:z`, inclusive
+* `@select df All()` keeps all columns
+* `@select df Cols(...)` can be used to combine many different selectors, as well as use
+ regular expressions. For example `Cols(r"a")` selects all columns that start with `"a"`.
+
$ASTABLE_MACRO_FLAG_DOCS
$ASTABLE_RHS_SELECT_TRANSFORM_DOCS
diff --git a/src/parsing.jl b/src/parsing.jl
index 4f1b9e00..e2119e8e 100644
--- a/src/parsing.jl
+++ b/src/parsing.jl
@@ -18,17 +18,29 @@ a `QuoteNode` or an expression beginning with
If input is not a valid column identifier,
returns `nothing`.
"""
-get_column_expr(x) = nothing
-function get_column_expr(e::Expr)
+get_column_expr(x; allow_multicol::Bool = false) = nothing
+function get_column_expr(e::Expr; allow_multicol::Bool = false)
e.head == :$ && return e.args[1]
onearg(e, :AsTable) && return :($AsTable($(e.args[2])))
if onearg(e, :cols)
Base.depwarn("cols is deprecated use $DOLLAR to escape column names instead", :cols)
return e.args[2]
end
+ if e.head === :call
+ e1 = e.args[1]
+ if e1 === :All || e1 === :Not || e1 === :Between || e1 == :Cols
+ if allow_multicol
+ return e
+ else
+ s = "Multi-column references outside of @select, @rselect, @select!" *
+ " and @rselect! must be wrapped in AsTable"
+ throw(ArgumentError(s))
+ end
+ end
+ end
return nothing
end
-get_column_expr(x::QuoteNode) = x
+get_column_expr(x::QuoteNode; allow_multicol::Bool = false) = x
get_column_expr_rename(x) = nothing
function get_column_expr_rename(e::Expr)
@@ -314,10 +326,12 @@ end
function fun_to_vec(ex::Expr;
gensym_names::Bool=false,
outer_flags::NamedTuple=deepcopy(DEFAULT_FLAGS),
- no_dest::Bool=false)
+ no_dest::Bool=false,
+ allow_multicol::Bool=false)
# classify the type of expression
# :x # handled via dispatch
# $:x # handled as though above
+ # All(), Between(...), Cols(...), Not(...), requires allow_multicol (only true in select)
# f(:x) # requires no_dest, for `@with` and `@subset` in future
# :y = :x # Simple pair
# :y = $:x # Extract and return simple pair (no function)
@@ -342,7 +356,7 @@ function fun_to_vec(ex::Expr;
# :x
# handled below via dispatch on ::QuoteNode
- ex_col = get_column_expr(ex)
+ ex_col = get_column_expr(ex; allow_multicol = allow_multicol)
if ex_col !== nothing
return ex_col
end
@@ -404,7 +418,8 @@ end
fun_to_vec(ex::QuoteNode;
no_dest::Bool=false,
gensym_names::Bool=false,
- outer_flags::Union{NamedTuple, Nothing}=nothing) = ex
+ outer_flags::Union{NamedTuple, Nothing}=nothing,
+ allow_multicol::Bool = false) = ex
"""
diff --git a/test/dataframes.jl b/test/dataframes.jl
index 5393b25e..05cffa6e 100644
--- a/test/dataframes.jl
+++ b/test/dataframes.jl
@@ -275,10 +275,10 @@ s = [:i, :g]
# not part of DataFramesMeta.
@test_throws LoadError @eval @transform(df, [:i, :g])
@test_throws LoadError @eval @transform(df, All())
- @test_throws LoadError @eval @transform(df, Between(:i, :t)).Between == df.i
- @test_throws LoadError @eval @transform(df, Not(:i)).Not == df.i
+ @test_throws LoadError @eval @transform(df, Between(:i, :t))
+ @test_throws LoadError @eval @transform(df, Not(:i))
@test_throws LoadError @eval @transform(df, Not([:i, :g]))
- @test_throws MethodError @eval @transform(df, :n = sum(Between(:i, :t)))
+ @test_throws LoadError @eval @transform(df, :n = sum(Between(:i, :t)))
end
@testset "@select" begin
@@ -546,11 +546,7 @@ cr = "c"
@testset "limits of @select" begin
## Test for not-implemented or strange behavior
@test_throws LoadError @eval @select(df, [:i, :g])
- @test_throws LoadError @eval @select(df, All())
- @test_throws LoadError @eval @select(df, Between(:i, :t)).Between == df.i
- @test_throws LoadError @eval @select(df, Not(:i)).Not == df.i
- @test_throws LoadError @eval @select(df, Not([:i, :g]))
- @test_throws MethodError @eval @select(df, :n = sum(Between(:i, :t)))
+ @test_throws LoadError @eval @select(df, :n = sum(Between(:i, :t)))
end
@testset "with" begin
diff --git a/test/grouping.jl b/test/grouping.jl
index 35ea8df2..0db08b6e 100644
--- a/test/grouping.jl
+++ b/test/grouping.jl
@@ -148,7 +148,7 @@ gd = groupby(df, :g)
newvar = :n
@testset "Limits of @combine" begin
- @test_throws MethodError @eval @combine(gd, :n = sum(Between(:i, :t)))
+ @test_throws LoadError @eval @combine(gd, :n = sum(Between(:i, :t)))
@test_throws ArgumentError @eval @combine(gd, :n = mean(:i) + mean(cols(1)))
end
@@ -287,7 +287,7 @@ gd = groupby(df, :g)
newvar = :n
@testset "limits of @by" begin
- @test_throws MethodError @eval @by(df, :g, :n = sum(Between(:i, :t)))
+ @test_throws LoadError @eval @by(df, :g, :n = sum(Between(:i, :t)))
@test_throws ArgumentError @eval @by(df, :g, :n = mean(:i) + mean(cols(1)))
end
diff --git a/test/multicol.jl b/test/multicol.jl
new file mode 100644
index 00000000..2e44607c
--- /dev/null
+++ b/test/multicol.jl
@@ -0,0 +1,90 @@
+module TestMultiCol
+
+using Test
+using DataFrames
+using DataFramesMeta
+
+df = DataFrame(A = 1, AA = 2, B = 3)
+
+@testset "select_multi" begin
+ df = DataFrame(A = 1, AA = 2, B = 3)
+
+ t = @select df Not(:A)
+ @test t == DataFrame(AA = 2, B = 3)
+
+ t = @select df All()
+ @test t == DataFrame(A = 1, AA = 2, B = 3)
+
+ t = @select df Cols(r"A")
+ @test t == DataFrame(A = 1, AA = 2)
+
+ t = @select df Between(:AA, :B)
+ @test t == DataFrame(AA = 2, B = 3)
+end
+
+@testset "othermacros_multi" begin
+ df = DataFrame(A = 1, AA = 2, B = 3)
+
+ @test_throws LoadError @eval @with df Not(:A)
+
+ @test_throws LoadError @eval @with df All()
+
+ @test_throws LoadError @eval @with df Cols(r"A")
+
+ @test_throws LoadError @eval @with df Between(:AA, :B)
+
+ @test_throws LoadError @eval @with(df, begin
+ 1
+ Not(:A)
+ end)
+
+ @test_throws LoadError @eval @with df begin
+ 1
+ All()
+ end
+
+ @test_throws LoadError @eval @with df begin
+ 1
+ Cols(r"A")
+ end
+
+ @test_throws LoadError @eval @with df begin
+ 1
+ Between(:AA, :B)
+ end
+end
+
+@testset "othermacros_multi" begin
+ df = DataFrame(A = 1, AA = 2, B = 3)
+
+ @test_throws LoadError @eval @select df :y = Not(:A)
+
+ @test_throws LoadError @eval @select df :y = All()
+
+ @test_throws LoadError @eval @select df :y = Cols(r"A")
+
+ @test_throws LoadError @eval @select df :y = Between(:AA, :B)
+
+ @test_throws LoadError @eval @select(df, :y = begin
+ 1
+ Not(:A)
+ end)
+
+ @test_throws LoadError @eval @select df :y = begin
+ 1
+ All()
+ end
+
+ @test_throws LoadError @eval @select df :y = begin
+ 1
+ Cols(r"A")
+ end
+
+ @test_throws LoadError @eval @select df :y = begin
+ 1
+ Between(:AA, :B)
+ end
+end
+
+
+end # module
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index 3cb43e93..4c12faf9 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -15,7 +15,8 @@ my_tests = ["dataframes.jl",
"byrow.jl",
"astable.jl",
"astable_flag.jl",
- "passmissing.jl"]
+ "passmissing.jl",
+ "multicol.jl"]
println("Running tests:")