Skip to content

Commit

Permalink
Sync with DataFrames.jl 0.19. Remove support from AbstractDict
Browse files Browse the repository at this point in the history
  • Loading branch information
bkamins authored Jul 15, 2019
2 parents 8626384 + a323b68 commit aec4be0
Show file tree
Hide file tree
Showing 17 changed files with 124 additions and 246 deletions.
1 change: 0 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
language: julia
julia:
- 0.7
- 1.0
- nightly
os:
Expand Down
21 changes: 21 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name = "DataFramesMeta"
uuid = "1313f7d8-7da2-5740-9ea0-a2ca25f37964"
version = "0.5.0"

[deps]
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"

[extras]
Lazy = "50d2b5c4-7a5e-59d5-8109-a42b560f39c0"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Lazy", "Random", "Statistics", "Test"]

[compat]
julia = "1"
DataFrames = ">= 0.19"
Tables = ">= 0.2.3"
27 changes: 14 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
[![Travis](https://travis-ci.org/JuliaStats/DataFramesMeta.jl.svg?branch=master)](https://travis-ci.org/JuliaStats/DataFramesMeta.jl)
[![AppVeyor](https://ci.appveyor.com/api/projects/status/github/juliastats/dataframesmeta.jl?branch=master&svg=true)](https://ci.appveyor.com/project/tshort/dataframesmeta-jl/branch/master)

Metaprogramming tools for DataFrames.jl and `AbstractDict` objects.
Metaprogramming tools for DataFrames.jl objects.
These macros improve performance and provide more convenient syntax.

# Features
Expand Down Expand Up @@ -42,15 +42,6 @@ colref = :x
@with(df, :y + cols(colref)) # Equivalent to df[:y] + df[colref]
```

This works for `AbstractDict` types, too:

```julia
y = 3
d = Dict(:s => 3, :y => 44, :d => 5)

@with(d, :s + :y + y)
```

`@with` is the fundamental macro used by the other metaprogramming
utilities.

Expand All @@ -73,7 +64,7 @@ Select row subsets.

## `@select`

Column selections and transformations. Also works with `AbstractDict` types.
Column selections and transformations.

```julia
@select(df, :x, :y, :z)
Expand All @@ -88,8 +79,6 @@ Add additional columns based on keyword arguments.
@transform(df, newCol = cos.(:x), anotherCol = :x.^2 + 3*:x .+ 4)
```

`@transform` works for `AbstractDict` types, too.

## `@byrow!`

Act on a DataFrame row-by-row. Includes support for control flow and `begin end` blocks. Since the "environment" induced by `@byrow! df` is implicitly a single row of `df`, one uses regular operators and comparisons instead of their elementwise counterparts as in `@with`.
Expand Down Expand Up @@ -190,6 +179,18 @@ macro.
Again, this is experimental. Based on feedback, we may decide to only
use `@linq` or only support the set of linq-like macros.

Alternatively you can use Lazy.jl `@>` macro like this:

```julia
x_thread = @> begin
df
@transform(y = 10 * :x)
@where(:a .> 2)
@by(:b, meanX = mean(:x), meanY = mean(:y))
@orderby(:meanX)
@select(:meanX, :meanY, var = :b)
end
```
## Operations on GroupedDataFrames

The following operations are now included:
Expand Down
3 changes: 0 additions & 3 deletions REQUIRE

This file was deleted.

1 change: 0 additions & 1 deletion appveyor.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
environment:
matrix:
- julia_version: 0.7
- julia_version: 1.0
- julia_version: nightly

Expand Down
53 changes: 15 additions & 38 deletions src/DataFramesMeta.jl
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ function with_helper(d, body)
function $funname($(values(membernames)...))
$body
end
$funname($((:($d[$key]) for key in keys(membernames))...))
$funname($((:($d[!, $key]) for key in keys(membernames))...))
end
end
end
Expand All @@ -83,11 +83,11 @@ end
"""
@with(d, expr)
`@with` allows DataFrame columns or AbstractDict keys to be referenced as symbols.
`@with` allows DataFrame columns keys to be referenced as symbols.
### Arguments
* `d` : an AbstractDataFrame or AbstractDict type
* `d` : an AbstractDataFrame type
* `expr` : the expression to evaluate in `d`
### Details
Expand All @@ -110,7 +110,7 @@ becomes
```julia
tempfun(a, b) = a .+ b .+ 1
tempfun(d[:a], d[:b])
tempfun(d[!, :a], d[!, :b])
```
All of the other DataFramesMeta macros are based on `@with`.
Expand All @@ -126,11 +126,6 @@ julia> using DataFramesMeta
julia> y = 3;
julia> d = Dict(:s => 3, :y => 44, :d => 5);
julia> @with(d, :s + :y + y)
50
julia> df = DataFrame(x = 1:3, y = [2, 1, 2]);
julia> x = [2, 1, 0];
Expand Down Expand Up @@ -163,7 +158,7 @@ julia> @with(df, df[:x .> 1, ^(:y)]) # The ^ means leave the :y alone
julia> colref = :x;
julia> @with(df, :y + cols(colref)) # Equivalent to df[:y] + df[colref]
julia> @with(df, :y + cols(colref)) # Equivalent to df[!, :y] + df[!, colref]
3
3
5
Expand Down Expand Up @@ -289,7 +284,7 @@ end
##
##############################################################################

select(d::AbstractDataFrame, arg) = d[arg]
select(d::AbstractDataFrame, arg) = d[!, arg]


##############################################################################
Expand Down Expand Up @@ -378,10 +373,10 @@ end
##
##############################################################################

function transform(d::Union{AbstractDataFrame, AbstractDict}; kwargs...)
function transform(d::AbstractDataFrame; kwargs...)
result = copy(d)
for (k, v) in kwargs
result[k] = isa(v, Function) ? v(d) : v
result[!, k] = isa(v, Function) ? v(d) : v
end
return result
end
Expand All @@ -400,7 +395,7 @@ function transform(g::GroupedDataFrame; kwargs...)
t = _transform!(Tables.allocatecolumn(typeof(first), size(result, 1)),
first, 1, g, v, starts, ends)
end
result[k] = t
result[!, k] = t
end
return result
end
Expand Down Expand Up @@ -488,29 +483,18 @@ Add additional columns or keys based on keyword arguments.
### Arguments
* `d` : an AbstractDict type, AbstractDataFrame, or GroupedDataFrame
* `d` : an `AbstractDataFrame`, or `GroupedDataFrame`
* `i...` : keyword arguments defining new columns or keys
For AbstractDict types, `@transform` only works with keys that are symbols.
### Returns
* `::AbstractDataFrame`, `::AbstractDict`, or `::GroupedDataFrame`
* `::AbstractDataFrame` or `::GroupedDataFrame`
### Examples
```jldoctest
julia> using DataFramesMeta, DataFrames
julia> d = Dict(:s => 3, :y => 44, :d => 5);
julia> @transform(d, x = :y + :d)
Dict{Symbol,Int64} with 4 entries:
:d => 5
:s => 3
:y => 44
:x => 49
julia> df = DataFrame(A = 1:3, B = [2, 1, 2]);
julia> @transform(df, a = 2 * :A, x = :A .+ :B)
Expand Down Expand Up @@ -691,10 +675,10 @@ end
##############################################################################


function select(d::Union{AbstractDataFrame, AbstractDict}; kwargs...)
function select(d::AbstractDataFrame; kwargs...)
result = typeof(d)()
for (k, v) in kwargs
result[k] = v
result[!, k] = v
end
return result
end
Expand Down Expand Up @@ -734,26 +718,19 @@ Select and transform columns.
### Arguments
* `d` : an AbstractDataFrame or AbstractDict
* `d` : an AbstractDataFrame
* `e` : keyword arguments specifying new columns in terms of existing columns
or symbols to specify existing columns
### Returns
* `::AbstractDataFrame` or `::AbstractDict`
* `::AbstractDataFrame`
### Examples
```jldoctest
julia> using DataFrames, DataFramesMeta
julia> d = Dict(:s => 3, :y => 44, :d => 5);
julia> @select(d, x = :y + :d, :s)
Dict{Symbol,Int64} with 2 entries:
:s => 3
:x => 49
julia> df = DataFrame(a = repeat(1:4, outer = 2), b = repeat(2:-1:1, outer = 4), c = randn(8))
8×3 DataFrames.DataFrame
│ Row │ a │ b │ c │
Expand Down
4 changes: 3 additions & 1 deletion src/byrow.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ byrow_find_newcols(x, newcol_decl) = (x, Any[])
function byrow_helper(df, body)
e_body, e_newcols = byrow_find_newcols(body, Any[])
quote
_N = length($df[1])
_N = length($df[!, 1])
_DF = @transform($df, $(e_newcols...))
$(with_helper(:_DF, :(for row = 1:_N
$(byrow_replace(e_body))
Expand All @@ -71,6 +71,8 @@ use `byrow!` for data transformations. `_N` is introduced to represent the
length of the dataframe, `_D` represents the `dataframe` including added columns,
and `row` represents the index of the current row.
Also note that the returned data frame does not share columns with `d`.
### Arguments
* `d` : an `AbstractDataFrame`
Expand Down
1 change: 0 additions & 1 deletion test/REQUIRE

This file was deleted.

21 changes: 9 additions & 12 deletions test/chaining.jl
Original file line number Diff line number Diff line change
Expand Up @@ -28,19 +28,16 @@ x_as = @as _x_ begin
@select(_x_, var = :b, :meanX, :meanY)
end

# Uncomment and add to README.md when it starts working:
# @> is broken in 0.7 Lazy
#x_thread = @> begin
# df
# @where(:a .> 2)
# @transform(y = 10 * :x)
# @by(:b, meanX = mean(:x), meanY = mean(:y))
# @orderby(:b, -:meanX)
# @select(var = :b, :meanX, :meanY)
#end
x_thread = @> begin
df
@where(:a .> 2)
@transform(y = 10 * :x)
@by(:b, meanX = mean(:x), meanY = mean(:y))
@orderby(:b, -:meanX)
@select(var = :b, :meanX, :meanY)
end

@test x == x_as
#@test x == x_thread

@test x == x_thread

end # module
8 changes: 4 additions & 4 deletions test/data.table.timings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,10 @@ function dt_timings(D)
@time @by(D, [:id1, :id2], sv =sum(:v1));
@time @by(D, :id3, sv = sum(:v1), mv3 = mean(:v3));
@time @by(D, :id3, sv = sum(:v1), mv3 = mean(:v3));
@time aggregate(D[[4,7:9;]], :id4, mean);
@time aggregate(D[[4,7:9;]], :id4, mean);
@time aggregate(D[[6,7:9;]], :id6, sum);
@time aggregate(D[[6,7:9;]], :id6, sum);
@time aggregate(D[!, [4;7:9]], :id4, mean);
@time aggregate(D[!, [4;7:9]], :id4, mean);
@time aggregate(D[!, [6;7:9]], :id6, sum);
@time aggregate(D[!, [6;7:9]], :id6, sum);
return
end

Expand Down
36 changes: 18 additions & 18 deletions test/dataframes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ df = DataFrame(A = 1:3, B = [2, 1, 2])

x = [2, 1, 0]

@test @with(df, :A .+ 1) == df[:A] .+ 1
@test @with(df, :A .+ :B) == df[:A] .+ df[:B]
@test @with(df, :A .+ x) == df[:A] .+ x
@test @with(df, :A .+ 1) == df.A .+ 1
@test @with(df, :A .+ :B) == df.A .+ df.B
@test @with(df, :A .+ x) == df.A .+ x

x = @with df begin
res = 0.0
Expand All @@ -21,30 +21,30 @@ x = @with df begin
res
end
idx = :A
@test @with(df, cols(idx) .+ :B) == df[:A] .+ df[:B]
@test @with(df, cols(idx) .+ :B) == df.A .+ df.B
idx2 = :B
@test @with(df, cols(idx) .+ cols(idx2)) == df[:A] .+ df[:B]
@test @with(df, cols(idx) .+ cols(idx2)) == df.A .+ df.B

@test x == sum(df[:A] .* df[:B])
@test @with(df, df[:A .> 1, ^([:B, :A])]) == df[df[:A] .> 1, [:B, :A]]
@test @with(df, DataFrame(a = :A * 2, b = :A .+ :B)) == DataFrame(a = df[:A] * 2, b = df[:A] .+ df[:B])
@test x == sum(df.A .* df.B)
@test @with(df, df[:A .> 1, ^([:B, :A])]) == df[df.A .> 1, [:B, :A]]
@test @with(df, DataFrame(a = :A * 2, b = :A .+ :B)) == DataFrame(a = df.A * 2, b = df.A .+ df.B)

@test DataFramesMeta.where(df, 1) == df[1, :]

@test @where(df, :A .> 1) == df[df[:A] .> 1,:]
@test @where(df, :B .> 1) == df[df[:B] .> 1,:]
@test @where(df, :A .> x) == df[df[:A] .> x,:]
@test @where(df, :B .> x) == df[df[:B] .> x,:]
@test @where(df, :A .> :B) == df[df[:A] .> df[:B],:]
@test @where(df, :A .> 1, :B .> 1) == df[map(&, df[:A] .> 1, df[:B] .> 1),:]
@test @where(df, :A .> 1, :A .< 4, :B .> 1) == df[map(&, df[:A] .> 1, df[:A] .< 4, df[:B] .> 1),:]
@test @where(df, :A .> 1) == df[df.A .> 1,:]
@test @where(df, :B .> 1) == df[df.B .> 1,:]
@test @where(df, :A .> x) == df[df.A .> x,:]
@test @where(df, :B .> x) == df[df.B .> x,:]
@test @where(df, :A .> :B) == df[df.A .> df.B,:]
@test @where(df, :A .> 1, :B .> 1) == df[map(&, df.A .> 1, df.B .> 1),:]
@test @where(df, :A .> 1, :A .< 4, :B .> 1) == df[map(&, df.A .> 1, df.A .< 4, df.B .> 1),:]

@test DataFramesMeta.select(df, :A) == df[:A]
@test DataFramesMeta.select(df, :A) == df.A

@test DataFramesMeta.orderby(df, df[[1, 3, 2], :]) == df[[1, 3, 2], :]

@test @byrow!(df, if :A > :B; :A = 0 end) == DataFrame(A = [1, 0, 0], B = [2, 1, 2])
@test df == DataFrame(A = [1, 0, 0], B = [2, 1, 2])
@test df == DataFrame(A = [1, 2, 3], B = [2, 1, 2])

df = DataFrame(A = 1:3, B = [2, 1, 2]) # Restore df
y = 0
Expand All @@ -61,7 +61,7 @@ df2 = @byrow! df begin
end
end

@test df2[:colX] == [pi, 1.0, 3pi]
@test df2.colX == [pi, 1.0, 3pi]
@test df2[2, :colY] == 2

end # module
Loading

0 comments on commit aec4be0

Please sign in to comment.