Skip to content

Commit

Permalink
squash commits (#372)
Browse files Browse the repository at this point in the history
  • Loading branch information
pdeffebach authored Dec 22, 2023
1 parent 2621e98 commit 8ebefa1
Show file tree
Hide file tree
Showing 8 changed files with 156 additions and 30 deletions.
10 changes: 5 additions & 5 deletions docs/src/dplyr.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,22 +136,22 @@ Similarly, to select the first column, use the syntax `$1`.
@select msleep $1
```

To select all the columns *except* a specific column, use the `Not` function for inverse selection. We also need to wrap `Not` in the `$` sign, because it is not a symbol.
To select all the columns *except* a specific column, use the `Not` function for inverse selection.

```@repl 1
@select msleep $(Not(:name))
@select msleep Not(:name)
```

To select a range of columns by name, use the `Between` operator:

```@repl 1
@select msleep $(Between(:name, :order))
@select msleep Between(:name, :order)
```

To select all columns that start with the character string `"sl"` use [regular expressions](https://regexone.com/):
To select all columns that start with the character string `"sl"` use [regular expressions](https://regexone.com/) in conjunction with `Cols`.

```@repl 1
@select msleep $(r"^sl")
@select msleep Cols(r"^sl")
```

Regular expressions are powerful, but can be difficult for new users to understand. Here are some quick tips.
Expand Down
16 changes: 10 additions & 6 deletions docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,6 @@ but exported by DataFramesMeta for convenience.

# Provided macros

!!! note

Newer versions of DataFrames.jl support the operators `Between`, `All`, `Cols`,
and `Not` when selecting and transforming columns. DataFramesMeta does not currently
support this syntax.

## `@select` and `@select!`

Column selections and transformations. Only newly created columns are kept.
Expand All @@ -79,6 +73,16 @@ gd = groupby(df, :x);
@select!(gd, :y = 2 .* :y .* first(:y))
```

To select or de-select multiple columns, use `Not`, `Between`, `All`, and `Cols`.
These multi-column selectors are all re-exported from DataFrames.jl.

```julia
@select df Not(:x)
@select df Between(:x, :y)
@select df All()
@select df Cols(r"x") # Regular expressions.
```

## `@transform` and `@transform!`

Add additional columns based on keyword-like arguments. Operates on both a
Expand Down
24 changes: 22 additions & 2 deletions src/macros.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1786,7 +1786,7 @@ end
function select_helper(x, args...)
x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = false)

t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs)
t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags, allow_multicol = true) for ex in exprs)
quote
$select($x, $(t...); $(kw...))
end
Expand Down Expand Up @@ -1851,6 +1851,18 @@ transformations by row, `@select` allows `@byrow` at the
beginning of a block of selections (i.e. `@byrow begin... end`).
All transformations in the block will operate by row.
To select many columns at once use the tools `Not`, `Between`, `All`, and `Cols`.
* `@select df Not(:a)` keeps all columns except for `:a`
* `@select df Between(:a, :z)` keeps all columns between `:a` and `:z`, inclusive
* `@select df All()` keeps all columns
* `@select df Cols(...)` can be used to combine many different selectors, as well as use
regular expressions. For example `Cols(r"a")` selects all columns that start with `"a"`.
Expressions inside `Not(...)`, `Between(...)` etc. are untouched by DataFramesMeta's
parsing. To refer to a variable `x` which represents a column inside `Not`, write `Not(x)`,
rather than `Not($(DOLLAR)x)`.
$ASTABLE_MACRO_FLAG_DOCS
$ASTABLE_RHS_SELECT_TRANSFORM_DOCS
Expand All @@ -1869,7 +1881,7 @@ When inputs are given in "block" format, the last lines may be written
```
@select gd begin
:a
@select copycols = false
@kwarg copycols = false
end
```
Expand Down Expand Up @@ -2024,6 +2036,14 @@ transformations by row, `@select!` allows `@byrow` at the
beginning of a block of select!ations (i.e. `@byrow begin... end`).
All transformations in the block will operate by row.
To select many columns at once use the tools `Not`, `Between`, `All`, and `Cols`.
* `@select df Not(:a)` keeps all columns except for `:a`
* `@select df Between(:a, :z)` keeps all columns between `:a` and `:z`, inclusive
* `@select df All()` keeps all columns
* `@select df Cols(...)` can be used to combine many different selectors, as well as use
regular expressions. For example `Cols(r"a")` selects all columns that start with `"a"`.
$ASTABLE_MACRO_FLAG_DOCS
$ASTABLE_RHS_SELECT_TRANSFORM_DOCS
Expand Down
27 changes: 21 additions & 6 deletions src/parsing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,29 @@ a `QuoteNode` or an expression beginning with
If input is not a valid column identifier,
returns `nothing`.
"""
get_column_expr(x) = nothing
function get_column_expr(e::Expr)
get_column_expr(x; allow_multicol::Bool = false) = nothing
function get_column_expr(e::Expr; allow_multicol::Bool = false)
e.head == :$ && return e.args[1]
onearg(e, :AsTable) && return :($AsTable($(e.args[2])))
if onearg(e, :cols)
Base.depwarn("cols is deprecated use $DOLLAR to escape column names instead", :cols)
return e.args[2]
end
if e.head === :call
e1 = e.args[1]
if e1 === :All || e1 === :Not || e1 === :Between || e1 == :Cols
if allow_multicol
return e
else
s = "Multi-column references outside of @select, @rselect, @select!" *
" and @rselect! must be wrapped in AsTable"
throw(ArgumentError(s))
end
end
end
return nothing
end
get_column_expr(x::QuoteNode) = x
get_column_expr(x::QuoteNode; allow_multicol::Bool = false) = x

get_column_expr_rename(x) = nothing
function get_column_expr_rename(e::Expr)
Expand Down Expand Up @@ -314,10 +326,12 @@ end
function fun_to_vec(ex::Expr;
gensym_names::Bool=false,
outer_flags::NamedTuple=deepcopy(DEFAULT_FLAGS),
no_dest::Bool=false)
no_dest::Bool=false,
allow_multicol::Bool=false)
# classify the type of expression
# :x # handled via dispatch
# $:x # handled as though above
# All(), Between(...), Cols(...), Not(...), requires allow_multicol (only true in select)
# f(:x) # requires no_dest, for `@with` and `@subset` in future
# :y = :x # Simple pair
# :y = $:x # Extract and return simple pair (no function)
Expand All @@ -342,7 +356,7 @@ function fun_to_vec(ex::Expr;
# :x
# handled below via dispatch on ::QuoteNode

ex_col = get_column_expr(ex)
ex_col = get_column_expr(ex; allow_multicol = allow_multicol)
if ex_col !== nothing
return ex_col
end
Expand Down Expand Up @@ -404,7 +418,8 @@ end
fun_to_vec(ex::QuoteNode;
no_dest::Bool=false,
gensym_names::Bool=false,
outer_flags::Union{NamedTuple, Nothing}=nothing) = ex
outer_flags::Union{NamedTuple, Nothing}=nothing,
allow_multicol::Bool = false) = ex


"""
Expand Down
12 changes: 4 additions & 8 deletions test/dataframes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -275,10 +275,10 @@ s = [:i, :g]
# not part of DataFramesMeta.
@test_throws LoadError @eval @transform(df, [:i, :g])
@test_throws LoadError @eval @transform(df, All())
@test_throws LoadError @eval @transform(df, Between(:i, :t)).Between == df.i
@test_throws LoadError @eval @transform(df, Not(:i)).Not == df.i
@test_throws LoadError @eval @transform(df, Between(:i, :t))
@test_throws LoadError @eval @transform(df, Not(:i))
@test_throws LoadError @eval @transform(df, Not([:i, :g]))
@test_throws MethodError @eval @transform(df, :n = sum(Between(:i, :t)))
@test_throws LoadError @eval @transform(df, :n = sum(Between(:i, :t)))
end

@testset "@select" begin
Expand Down Expand Up @@ -546,11 +546,7 @@ cr = "c"
@testset "limits of @select" begin
## Test for not-implemented or strange behavior
@test_throws LoadError @eval @select(df, [:i, :g])
@test_throws LoadError @eval @select(df, All())
@test_throws LoadError @eval @select(df, Between(:i, :t)).Between == df.i
@test_throws LoadError @eval @select(df, Not(:i)).Not == df.i
@test_throws LoadError @eval @select(df, Not([:i, :g]))
@test_throws MethodError @eval @select(df, :n = sum(Between(:i, :t)))
@test_throws LoadError @eval @select(df, :n = sum(Between(:i, :t)))
end

@testset "with" begin
Expand Down
4 changes: 2 additions & 2 deletions test/grouping.jl
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ gd = groupby(df, :g)
newvar = :n

@testset "Limits of @combine" begin
@test_throws MethodError @eval @combine(gd, :n = sum(Between(:i, :t)))
@test_throws LoadError @eval @combine(gd, :n = sum(Between(:i, :t)))
@test_throws ArgumentError @eval @combine(gd, :n = mean(:i) + mean(cols(1)))
end

Expand Down Expand Up @@ -287,7 +287,7 @@ gd = groupby(df, :g)
newvar = :n

@testset "limits of @by" begin
@test_throws MethodError @eval @by(df, :g, :n = sum(Between(:i, :t)))
@test_throws LoadError @eval @by(df, :g, :n = sum(Between(:i, :t)))
@test_throws ArgumentError @eval @by(df, :g, :n = mean(:i) + mean(cols(1)))
end

Expand Down
90 changes: 90 additions & 0 deletions test/multicol.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
module TestMultiCol

using Test
using DataFrames
using DataFramesMeta

df = DataFrame(A = 1, AA = 2, B = 3)

@testset "select_multi" begin
df = DataFrame(A = 1, AA = 2, B = 3)

t = @select df Not(:A)
@test t == DataFrame(AA = 2, B = 3)

t = @select df All()
@test t == DataFrame(A = 1, AA = 2, B = 3)

t = @select df Cols(r"A")
@test t == DataFrame(A = 1, AA = 2)

t = @select df Between(:AA, :B)
@test t == DataFrame(AA = 2, B = 3)
end

@testset "othermacros_multi" begin
df = DataFrame(A = 1, AA = 2, B = 3)

@test_throws LoadError @eval @with df Not(:A)

@test_throws LoadError @eval @with df All()

@test_throws LoadError @eval @with df Cols(r"A")

@test_throws LoadError @eval @with df Between(:AA, :B)

@test_throws LoadError @eval @with(df, begin
1
Not(:A)
end)

@test_throws LoadError @eval @with df begin
1
All()
end

@test_throws LoadError @eval @with df begin
1
Cols(r"A")
end

@test_throws LoadError @eval @with df begin
1
Between(:AA, :B)
end
end

@testset "othermacros_multi" begin
df = DataFrame(A = 1, AA = 2, B = 3)

@test_throws LoadError @eval @select df :y = Not(:A)

@test_throws LoadError @eval @select df :y = All()

@test_throws LoadError @eval @select df :y = Cols(r"A")

@test_throws LoadError @eval @select df :y = Between(:AA, :B)

@test_throws LoadError @eval @select(df, :y = begin
1
Not(:A)
end)

@test_throws LoadError @eval @select df :y = begin
1
All()
end

@test_throws LoadError @eval @select df :y = begin
1
Cols(r"A")
end

@test_throws LoadError @eval @select df :y = begin
1
Between(:AA, :B)
end
end


end # module
3 changes: 2 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ my_tests = ["dataframes.jl",
"byrow.jl",
"astable.jl",
"astable_flag.jl",
"passmissing.jl"]
"passmissing.jl",
"multicol.jl"]

println("Running tests:")

Expand Down

0 comments on commit 8ebefa1

Please sign in to comment.