From 1623436ff4d6d59fe6bd5517b388bdec5bc71093 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sat, 13 Jul 2019 21:29:47 +0200 Subject: [PATCH 1/6] sync with DataFrames.jl 0.19 --- Project.toml | 20 ++++++++++++ README.md | 27 ++++++++-------- REQUIRE | 3 -- src/DataFramesMeta.jl | 53 +++++++++---------------------- src/byrow.jl | 4 ++- test/REQUIRE | 1 - test/chaining.jl | 21 ++++++------ test/data.table.timings.jl | 8 ++--- test/dataframes.jl | 36 ++++++++++----------- test/dict.jl | 41 ------------------------ test/grouping.jl | 50 ++++++++++++++--------------- test/linqmacro.jl | 6 ++-- test/runtests.jl | 3 +- test/speedtests.jl | 65 -------------------------------------- test/timings.jl | 21 +++--------- 15 files changed, 117 insertions(+), 242 deletions(-) create mode 100644 Project.toml delete mode 100644 REQUIRE delete mode 100644 test/REQUIRE delete mode 100644 test/dict.jl delete mode 100644 test/speedtests.jl diff --git a/Project.toml b/Project.toml new file mode 100644 index 00000000..63ee4cbf --- /dev/null +++ b/Project.toml @@ -0,0 +1,20 @@ +name = "DataFramesMeta" +uuid = "1313f7d8-7da2-5740-9ea0-a2ca25f37964" +version = "0.5.0" + +[deps] +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" + +[extras] +Lazy = "50d2b5c4-7a5e-59d5-8109-a42b560f39c0" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[targets] +test = ["Lazy", "Random", "Test"] + +[compat] +julia = "0.7, 1" +DataFrames = ">= 0.19" +Tables = ">= 0.2.3" diff --git a/README.md b/README.md index 9d56655b..46b6bb2e 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ [![Travis](https://travis-ci.org/JuliaStats/DataFramesMeta.jl.svg?branch=master)](https://travis-ci.org/JuliaStats/DataFramesMeta.jl) [![AppVeyor](https://ci.appveyor.com/api/projects/status/github/juliastats/dataframesmeta.jl?branch=master&svg=true)](https://ci.appveyor.com/project/tshort/dataframesmeta-jl/branch/master) -Metaprogramming tools for DataFrames.jl and `AbstractDict` objects. +Metaprogramming tools for DataFrames.jl objects. These macros improve performance and provide more convenient syntax. # Features @@ -42,15 +42,6 @@ colref = :x @with(df, :y + cols(colref)) # Equivalent to df[:y] + df[colref] ``` -This works for `AbstractDict` types, too: - -```julia -y = 3 -d = Dict(:s => 3, :y => 44, :d => 5) - -@with(d, :s + :y + y) -``` - `@with` is the fundamental macro used by the other metaprogramming utilities. @@ -73,7 +64,7 @@ Select row subsets. ## `@select` -Column selections and transformations. Also works with `AbstractDict` types. +Column selections and transformations. ```julia @select(df, :x, :y, :z) @@ -88,8 +79,6 @@ Add additional columns based on keyword arguments. @transform(df, newCol = cos.(:x), anotherCol = :x.^2 + 3*:x .+ 4) ``` -`@transform` works for `AbstractDict` types, too. - ## `@byrow!` Act on a DataFrame row-by-row. Includes support for control flow and `begin end` blocks. Since the "environment" induced by `@byrow! df` is implicitly a single row of `df`, one uses regular operators and comparisons instead of their elementwise counterparts as in `@with`. @@ -190,6 +179,18 @@ macro. Again, this is experimental. Based on feedback, we may decide to only use `@linq` or only support the set of linq-like macros. +Alternatively you can use Lazy.jl `@>` macro like this: + +```julia +x_thread = @> begin + df + @transform(y = 10 * :x) + @where(:a .> 2) + @by(:b, meanX = mean(:x), meanY = mean(:y)) + @orderby(:meanX) + @select(:meanX, :meanY, var = :b) +end +``` ## Operations on GroupedDataFrames The following operations are now included: diff --git a/REQUIRE b/REQUIRE deleted file mode 100644 index 9c3d802c..00000000 --- a/REQUIRE +++ /dev/null @@ -1,3 +0,0 @@ -julia 0.7 -DataFrames 0.17.1 -Tables 0.1.15 diff --git a/src/DataFramesMeta.jl b/src/DataFramesMeta.jl index 36c3b737..a356dd54 100644 --- a/src/DataFramesMeta.jl +++ b/src/DataFramesMeta.jl @@ -70,7 +70,7 @@ function with_helper(d, body) function $funname($(values(membernames)...)) $body end - $funname($((:($d[$key]) for key in keys(membernames))...)) + $funname($((:($d[!, $key]) for key in keys(membernames))...)) end end end @@ -83,11 +83,11 @@ end """ @with(d, expr) -`@with` allows DataFrame columns or AbstractDict keys to be referenced as symbols. +`@with` allows DataFrame columns keys to be referenced as symbols. ### Arguments -* `d` : an AbstractDataFrame or AbstractDict type +* `d` : an AbstractDataFrame type * `expr` : the expression to evaluate in `d` ### Details @@ -110,7 +110,7 @@ becomes ```julia tempfun(a, b) = a .+ b .+ 1 -tempfun(d[:a], d[:b]) +tempfun(d[!, :a], d[!, :b]) ``` All of the other DataFramesMeta macros are based on `@with`. @@ -126,11 +126,6 @@ julia> using DataFramesMeta julia> y = 3; -julia> d = Dict(:s => 3, :y => 44, :d => 5); - -julia> @with(d, :s + :y + y) -50 - julia> df = DataFrame(x = 1:3, y = [2, 1, 2]); julia> x = [2, 1, 0]; @@ -163,7 +158,7 @@ julia> @with(df, df[:x .> 1, ^(:y)]) # The ^ means leave the :y alone julia> colref = :x; -julia> @with(df, :y + cols(colref)) # Equivalent to df[:y] + df[colref] +julia> @with(df, :y + cols(colref)) # Equivalent to df[!, :y] + df[!, colref] 3 3 5 @@ -289,7 +284,7 @@ end ## ############################################################################## -select(d::AbstractDataFrame, arg) = d[arg] +select(d::AbstractDataFrame, arg) = d[!, arg] ############################################################################## @@ -378,10 +373,10 @@ end ## ############################################################################## -function transform(d::Union{AbstractDataFrame, AbstractDict}; kwargs...) +function transform(d::AbstractDataFrame; kwargs...) result = copy(d) for (k, v) in kwargs - result[k] = isa(v, Function) ? v(d) : v + result[!, k] = isa(v, Function) ? v(d) : v end return result end @@ -400,7 +395,7 @@ function transform(g::GroupedDataFrame; kwargs...) t = _transform!(Tables.allocatecolumn(typeof(first), size(result, 1)), first, 1, g, v, starts, ends) end - result[k] = t + result[!, k] = t end return result end @@ -488,29 +483,18 @@ Add additional columns or keys based on keyword arguments. ### Arguments -* `d` : an AbstractDict type, AbstractDataFrame, or GroupedDataFrame +* `d` : an `AbstractDataFrame`, or `GroupedDataFrame` * `i...` : keyword arguments defining new columns or keys -For AbstractDict types, `@transform` only works with keys that are symbols. - ### Returns -* `::AbstractDataFrame`, `::AbstractDict`, or `::GroupedDataFrame` +* `::AbstractDataFrame` or `::GroupedDataFrame` ### Examples ```jldoctest julia> using DataFramesMeta, DataFrames -julia> d = Dict(:s => 3, :y => 44, :d => 5); - -julia> @transform(d, x = :y + :d) -Dict{Symbol,Int64} with 4 entries: - :d => 5 - :s => 3 - :y => 44 - :x => 49 - julia> df = DataFrame(A = 1:3, B = [2, 1, 2]); julia> @transform(df, a = 2 * :A, x = :A .+ :B) @@ -691,10 +675,10 @@ end ############################################################################## -function select(d::Union{AbstractDataFrame, AbstractDict}; kwargs...) +function select(d::AbstractDataFrame; kwargs...) result = typeof(d)() for (k, v) in kwargs - result[k] = v + result[!, k] = v end return result end @@ -734,26 +718,19 @@ Select and transform columns. ### Arguments -* `d` : an AbstractDataFrame or AbstractDict +* `d` : an AbstractDataFrame * `e` : keyword arguments specifying new columns in terms of existing columns or symbols to specify existing columns ### Returns -* `::AbstractDataFrame` or `::AbstractDict` +* `::AbstractDataFrame` ### Examples ```jldoctest julia> using DataFrames, DataFramesMeta -julia> d = Dict(:s => 3, :y => 44, :d => 5); - -julia> @select(d, x = :y + :d, :s) -Dict{Symbol,Int64} with 2 entries: - :s => 3 - :x => 49 - julia> df = DataFrame(a = repeat(1:4, outer = 2), b = repeat(2:-1:1, outer = 4), c = randn(8)) 8×3 DataFrames.DataFrame │ Row │ a │ b │ c │ diff --git a/src/byrow.jl b/src/byrow.jl index ae9e4777..02ac7d60 100644 --- a/src/byrow.jl +++ b/src/byrow.jl @@ -44,7 +44,7 @@ byrow_find_newcols(x, newcol_decl) = (x, Any[]) function byrow_helper(df, body) e_body, e_newcols = byrow_find_newcols(body, Any[]) quote - _N = length($df[1]) + _N = length($df[!, 1]) _DF = @transform($df, $(e_newcols...)) $(with_helper(:_DF, :(for row = 1:_N $(byrow_replace(e_body)) @@ -71,6 +71,8 @@ use `byrow!` for data transformations. `_N` is introduced to represent the length of the dataframe, `_D` represents the `dataframe` including added columns, and `row` represents the index of the current row. +Also note that the returned data frame does not share columns with `d`. + ### Arguments * `d` : an `AbstractDataFrame` diff --git a/test/REQUIRE b/test/REQUIRE deleted file mode 100644 index e2749de5..00000000 --- a/test/REQUIRE +++ /dev/null @@ -1 +0,0 @@ -Lazy diff --git a/test/chaining.jl b/test/chaining.jl index 862bc039..16bd3512 100644 --- a/test/chaining.jl +++ b/test/chaining.jl @@ -28,19 +28,16 @@ x_as = @as _x_ begin @select(_x_, var = :b, :meanX, :meanY) end -# Uncomment and add to README.md when it starts working: -# @> is broken in 0.7 Lazy -#x_thread = @> begin -# df -# @where(:a .> 2) -# @transform(y = 10 * :x) -# @by(:b, meanX = mean(:x), meanY = mean(:y)) -# @orderby(:b, -:meanX) -# @select(var = :b, :meanX, :meanY) -#end +x_thread = @> begin + df + @where(:a .> 2) + @transform(y = 10 * :x) + @by(:b, meanX = mean(:x), meanY = mean(:y)) + @orderby(:b, -:meanX) + @select(var = :b, :meanX, :meanY) +end @test x == x_as -#@test x == x_thread - +@test x == x_thread end # module diff --git a/test/data.table.timings.jl b/test/data.table.timings.jl index b9798763..427452d6 100644 --- a/test/data.table.timings.jl +++ b/test/data.table.timings.jl @@ -57,10 +57,10 @@ function dt_timings(D) @time @by(D, [:id1, :id2], sv =sum(:v1)); @time @by(D, :id3, sv = sum(:v1), mv3 = mean(:v3)); @time @by(D, :id3, sv = sum(:v1), mv3 = mean(:v3)); - @time aggregate(D[[4,7:9;]], :id4, mean); - @time aggregate(D[[4,7:9;]], :id4, mean); - @time aggregate(D[[6,7:9;]], :id6, sum); - @time aggregate(D[[6,7:9;]], :id6, sum); + @time aggregate(D[!, [4;7:9]], :id4, mean); + @time aggregate(D[!, [4;7:9]], :id4, mean); + @time aggregate(D[!, [6;7:9]], :id6, sum); + @time aggregate(D[!, [6;7:9]], :id6, sum); return end diff --git a/test/dataframes.jl b/test/dataframes.jl index 64430995..a2d1d548 100644 --- a/test/dataframes.jl +++ b/test/dataframes.jl @@ -9,9 +9,9 @@ df = DataFrame(A = 1:3, B = [2, 1, 2]) x = [2, 1, 0] -@test @with(df, :A .+ 1) == df[:A] .+ 1 -@test @with(df, :A .+ :B) == df[:A] .+ df[:B] -@test @with(df, :A .+ x) == df[:A] .+ x +@test @with(df, :A .+ 1) == df.A .+ 1 +@test @with(df, :A .+ :B) == df.A .+ df.B +@test @with(df, :A .+ x) == df.A .+ x x = @with df begin res = 0.0 @@ -21,30 +21,30 @@ x = @with df begin res end idx = :A -@test @with(df, cols(idx) .+ :B) == df[:A] .+ df[:B] +@test @with(df, cols(idx) .+ :B) == df.A .+ df.B idx2 = :B -@test @with(df, cols(idx) .+ cols(idx2)) == df[:A] .+ df[:B] +@test @with(df, cols(idx) .+ cols(idx2)) == df.A .+ df.B -@test x == sum(df[:A] .* df[:B]) -@test @with(df, df[:A .> 1, ^([:B, :A])]) == df[df[:A] .> 1, [:B, :A]] -@test @with(df, DataFrame(a = :A * 2, b = :A .+ :B)) == DataFrame(a = df[:A] * 2, b = df[:A] .+ df[:B]) +@test x == sum(df.A .* df.B) +@test @with(df, df[:A .> 1, ^([:B, :A])]) == df[df.A .> 1, [:B, :A]] +@test @with(df, DataFrame(a = :A * 2, b = :A .+ :B)) == DataFrame(a = df.A * 2, b = df.A .+ df.B) @test DataFramesMeta.where(df, 1) == df[1, :] -@test @where(df, :A .> 1) == df[df[:A] .> 1,:] -@test @where(df, :B .> 1) == df[df[:B] .> 1,:] -@test @where(df, :A .> x) == df[df[:A] .> x,:] -@test @where(df, :B .> x) == df[df[:B] .> x,:] -@test @where(df, :A .> :B) == df[df[:A] .> df[:B],:] -@test @where(df, :A .> 1, :B .> 1) == df[map(&, df[:A] .> 1, df[:B] .> 1),:] -@test @where(df, :A .> 1, :A .< 4, :B .> 1) == df[map(&, df[:A] .> 1, df[:A] .< 4, df[:B] .> 1),:] +@test @where(df, :A .> 1) == df[df.A .> 1,:] +@test @where(df, :B .> 1) == df[df.B .> 1,:] +@test @where(df, :A .> x) == df[df.A .> x,:] +@test @where(df, :B .> x) == df[df.B .> x,:] +@test @where(df, :A .> :B) == df[df.A .> df.B,:] +@test @where(df, :A .> 1, :B .> 1) == df[map(&, df.A .> 1, df.B .> 1),:] +@test @where(df, :A .> 1, :A .< 4, :B .> 1) == df[map(&, df.A .> 1, df.A .< 4, df.B .> 1),:] -@test DataFramesMeta.select(df, :A) == df[:A] +@test DataFramesMeta.select(df, :A) == df.A @test DataFramesMeta.orderby(df, df[[1, 3, 2], :]) == df[[1, 3, 2], :] @test @byrow!(df, if :A > :B; :A = 0 end) == DataFrame(A = [1, 0, 0], B = [2, 1, 2]) -@test df == DataFrame(A = [1, 0, 0], B = [2, 1, 2]) +@test df == DataFrame(A = [1, 2, 3], B = [2, 1, 2]) df = DataFrame(A = 1:3, B = [2, 1, 2]) # Restore df y = 0 @@ -61,7 +61,7 @@ df2 = @byrow! df begin end end -@test df2[:colX] == [pi, 1.0, 3pi] +@test df2.colX == [pi, 1.0, 3pi] @test df2[2, :colY] == 2 end # module diff --git a/test/dict.jl b/test/dict.jl deleted file mode 100644 index dd1e62ca..00000000 --- a/test/dict.jl +++ /dev/null @@ -1,41 +0,0 @@ -module TestDicts - -using Test -using DataFramesMeta - -y = 3 -d = Dict(:s => 3, :y => 44, :d => 5, :e => :(a + b)) -@test @with(d, :s + :y) == d[:s] + d[:y] -@test @with(d, :s + y) == d[:s] + y -@test @with(d, d) == d -@test @with(d, :s + d[^(:y)]) == d[:s] + d[:y] -@test @with(d, :e.head) == d[:e].head -@test @with(Dict(:s => 3, :y => 44, :d => 5, :e => :(a + b)), :e.head) == d[:e].head - -x = @with d begin - z = y + :y - 1 - :s + z -end -@test x == y + d[:y] - 1 + d[:s] - -fun = d -> @with d begin - z = y + :y - 1 - :s + z -end -@test fun(d) == y + d[:y] - 1 + d[:s] - -d2 = @transform(d, z = :y + :s) -@test d2[:z] == 47 - -d2 = @select(d, :y, z = :y + :s, :e) -@test d2[:z] == 47 - -@test DataFramesMeta.with_helper(:df, :(f.(1))) == :(f.(1)) -@test DataFramesMeta.with_helper(:df, :(f.(b + c))) == :(f.(b + c)) -@test DataFramesMeta.with_helper(:df, Expr(:., :a, QuoteNode(:b))) == Expr(:., :a, QuoteNode(:b)) -@test DataFramesMeta.select_helper(:df, 1).args[2].args[2].args[2].args[3] == 1 -@test DataFramesMeta.select_helper(:df, QuoteNode(:a)).args[2].args[2].args[2].args[2].args[2].args[2].args[3].args[1] == :a -@test DataFramesMeta.expandargs(QuoteNode(:a)) == Expr(:kw, :a, QuoteNode(:a)) -@test DataFramesMeta.byrow_find_newcols(:(;), Any[]) == (Any[], Any[]) - -end # module diff --git a/test/grouping.jl b/test/grouping.jl index 2114068d..1ee4e6c0 100644 --- a/test/grouping.jl +++ b/test/grouping.jl @@ -11,52 +11,52 @@ using Tables d = DataFrame(n = 1:20, x = [3, 3, 3, 3, 1, 1, 1, 2, 1, 1, 2, 1, 1, 2, 2, 2, 3, 1, 1, 2]) g = groupby(d, :x, sort=true) -@test @where(d, :x .== 3) == DataFramesMeta.where(d, x -> x[:x] .== 3) -@test DataFrame(@where(g, length(:x) > 5)) == DataFrame(DataFramesMeta.where(g, x -> length(x[:x]) > 5)) -@test DataFrame(@where(g, length(:x) > 5))[:n][1:3] == [5, 6, 7] +@test @where(d, :x .== 3) == DataFramesMeta.where(d, x -> x.x .== 3) +@test DataFrame(@where(g, length(:x) > 5)) == DataFrame(DataFramesMeta.where(g, x -> length(x.x) > 5)) +@test DataFrame(@where(g, length(:x) > 5))[!, :n][1:3] == [5, 6, 7] -@test DataFrame(DataFramesMeta.orderby(g, x -> mean(x[:n]))) == DataFrame(@orderby(g, mean(:n))) +@test DataFrame(DataFramesMeta.orderby(g, x -> mean(x.n))) == DataFrame(@orderby(g, mean(:n))) @test (@transform(g, y = :n .- median(:n)))[1,:y] == -5.0 d = DataFrame(n = 1:20, x = [3, 3, 3, 3, 1, 1, 1, 2, 1, 1, 2, 1, 1, 2, 2, 2, 3, 1, 1, 2]) g = groupby(d, :x, sort=true) -@test @based_on(g, nsum = sum(:n))[:nsum] == [99, 84, 27] +@test @based_on(g, nsum = sum(:n)).nsum == [99, 84, 27] # Transform tests -d = DataFrame(a = [1,1,1,2,2,3,3,1], - b = Any[1,2,3,missing,missing,6.0,5.0,4], +d = DataFrame(a = [1,1,1,2,2,3,3,1], + b = Any[1,2,3,missing,missing,6.0,5.0,4], c = CategoricalArray([1,2,3,1,2,3,1,2])) g = groupby(d, :a) -## Scalar output +## Scalar output # Type promotion Int -> Float -t = @transform(g, t = :b[1])[:t] -@test isequal(t, [1.0, 1.0, 1.0, 1.0, missing, missing, 6.0, 6.0]) && +t = @transform(g, t = :b[1]).t +@test isequal(t, [1.0, 1.0, 1.0, 1.0, missing, missing, 6.0, 6.0]) && t isa Vector{Union{Float64, Missing}} # Type promotion Number -> Any -t = @transform(g, t = isequal(:b[1], 1) ? :b[1] : "a")[:t] -@test isequal(t, [1, 1, 1, 1, "a", "a", "a", "a"]) && +t = @transform(g, t = isequal(:b[1], 1) ? :b[1] : "a").t +@test isequal(t, [1, 1, 1, 1, "a", "a", "a", "a"]) && t isa Vector{Any} -## Vector output +## Vector output # Normal use -t = @transform(g, t = :b .- mean(:b))[:t] -@test isequal(t, [-1.5, -0.5, 0.5, 1.5, missing, missing, 0.5, -0.5]) && +t = @transform(g, t = :b .- mean(:b)).t +@test isequal(t, [-1.5, -0.5, 0.5, 1.5, missing, missing, 0.5, -0.5]) && t isa Vector{Union{Float64, Missing}} # Type promotion -t = @transform(g, t = isequal(:b[1], 1) ? fill(1, length(:b)) : fill(2.0, length(:b)))[:t] -@test isequal(t, [1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0]) && +t = @transform(g, t = isequal(:b[1], 1) ? fill(1, length(:b)) : fill(2.0, length(:b))).t +@test isequal(t, [1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0]) && t isa Vector{Float64} # Vectors whose eltypes promote to any -t = @transform(g, t = isequal(:b[1], 1) ? :b : fill("a", length(:b)))[:t] -@test isequal(t, [1, 2, 3, 4, "a", "a", "a", "a"]) && +t = @transform(g, t = isequal(:b[1], 1) ? :b : fill("a", length(:b))).t +@test isequal(t, [1, 2, 3, 4, "a", "a", "a", "a"]) && t isa Vector{Any} -# Categorical Array +# Categorical Array # Scalar -t = @transform(g, t = :c[1])[:t] -@test isequal(t, [1, 1, 1, 1, 1, 1, 3, 3]) && +t = @transform(g, t = :c[1]).t +@test isequal(t, [1, 1, 1, 1, 1, 1, 3, 3]) && t isa CategoricalVector{Int} -# Vector -t = @transform(g, t = :c)[:t] -@test isequal(t, [1, 2, 3, 2, 1, 2, 3, 1]) && +# Vector +t = @transform(g, t = :c).t +@test isequal(t, [1, 2, 3, 2, 1, 2, 3, 1]) && t isa CategoricalVector{Int} end # module diff --git a/test/linqmacro.jl b/test/linqmacro.jl index 339c2d28..585cb452 100644 --- a/test/linqmacro.jl +++ b/test/linqmacro.jl @@ -40,7 +40,7 @@ xlinq2 = @linq df |> orderby(-mean(:x)) |> based_on(meanX = mean(:x), meanY = mean(:y)) -@test xlinq2[[:meanX, :meanY]] == xlinq[[:meanX, :meanY]] +@test xlinq2[!, [:meanX, :meanY]] == xlinq[!, [:meanX, :meanY]] xlinq3 = @linq df |> where(:a .> 2, :b .!= "c") |> @@ -49,8 +49,8 @@ xlinq3 = @linq df |> orderby(-mean(:x)) |> based_on(meanX = mean(:x), meanY = mean(:y)) -@test xlinq3[[:meanX, :meanY]] == xlinq[[:meanX, :meanY]] +@test xlinq3[!, [:meanX, :meanY]] == xlinq[!, [:meanX, :meanY]] -@test (@linq df |> with(:a)) == df[:a] +@test (@linq df |> with(:a)) == df.a end diff --git a/test/runtests.jl b/test/runtests.jl index 962dc54b..b4551391 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,8 +1,7 @@ fatalerrors = false anyerrors = false -my_tests = ["dict.jl", - "dataframes.jl", +my_tests = ["dataframes.jl", "grouping.jl", "chaining.jl", "linqmacro.jl"] diff --git a/test/speedtests.jl b/test/speedtests.jl deleted file mode 100644 index 6f59b2c9..00000000 --- a/test/speedtests.jl +++ /dev/null @@ -1,65 +0,0 @@ -module SpeedTests - -using Test, Random -using DataFrames -using DataFramesMeta -using Devectorize - -Random.seed!(1) -const n = 5_000_000 -a = rand(n) -b = rand(n) -df = DataFrame(a = da, b = db) -df2 = DataFrame(Any[a, b]) -names!(df2, [:a, :b]) - -function dot1(a::Vector, b::Vector) - x = 0.0 - for i in 1:length(a) - x += a[i] * b[i] - end - return x -end - -function dot3(df::DataFrame) - da, db = df[:a], df[:b] - T = eltype(da) - x = 0.0 - for i in 1:length(da) - x += da[i]::T * db[i]::T - end - return x -end - -function dot4(df::DataFrame) - da, db = df[:a], df[:b] - return dot2(da, db) -end - -function dot8(a::Vector, b::Vector) - x = 0.0 - for i in 1:length(a) - if !(isnan(a[i]) || isnan(b[i])) - x += a[i] * b[i] - end - end - return x -end - -function dot9(df::DataFrame) - @with df begin - x = 0.0 - for i in 1:length(:a) - x += values(:a)[i] * values(:b)[i] - end - x - end -end - -t1 = @elapsed dot1(a, b) -t3 = @elapsed dot3(df) -t4 = @elapsed dot4(df) -t8 = @elapsed dot8(a, b) -t9 = @elapsed dot9(df) - -end diff --git a/test/timings.jl b/test/timings.jl index 5a52f55b..fe2290aa 100644 --- a/test/timings.jl +++ b/test/timings.jl @@ -7,22 +7,22 @@ using DataFramesMeta Random.seed!(1) const n = 5_000_000 -a = rand(n) -b = rand(n) +da = rand(n) +db = rand(n) df = DataFrame(a = da, b = db) function dot3(df::DataFrame) - da, db = df[:a], df[:b] + da, db = df.a, df.b T = eltype(da) x = 0.0 for i in 1:length(da) - x += df[:a][i] * df[:a][i] + x += df.a[i] * df.a[i] end return x end function dot4(df::DataFrame) - da, db = df[:a], df[:b] + da, db = df.a, df.b T = eltype(da) x = 0.0 for i in 1:length(da) @@ -31,18 +31,7 @@ function dot4(df::DataFrame) return x end -function dot5(df::DataFrame) - @with df begin - x = 0.0 - for i in 1:length(:a) - x += (:a).data[i] * (:b).data[i] - end - x - end -end - @show t3 = @elapsed dot3(df) @show t4 = @elapsed dot4(df) -@show t5 = @elapsed dot5(df) end # module From 219173ea06fd73727e92b8370b7fdfc8a9d1fd81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 15 Jul 2019 20:17:31 +0200 Subject: [PATCH 2/6] drop Julia 0.7 support --- .travis.yml | 1 - appveyor.yml | 1 - 2 files changed, 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index f03b0c76..c5c2130a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,5 @@ language: julia julia: - - 0.7 - 1.0 - nightly os: diff --git a/appveyor.yml b/appveyor.yml index 2a628342..756f881d 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,6 +1,5 @@ environment: matrix: - - julia_version: 0.7 - julia_version: 1.0 - julia_version: nightly From b30816058b174eaaff58c6be54c52abf71d702f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 15 Jul 2019 20:30:16 +0200 Subject: [PATCH 3/6] improve error reporting in tests --- test/runtests.jl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index b4551391..9bd436a9 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -12,11 +12,14 @@ for my_test in my_tests try include(my_test) println("\t\033[1m\033[32mPASSED\033[0m: $(my_test)") - catch + catch e global anyerrors = true println("\t\033[1m\033[31mFAILED\033[0m: $(my_test)") if fatalerrors - rethrow() + rethrow(e) + elseif !quiet + showerror(stdout, e, backtrace()) + println() end end end From 4adf8c454651a6e579d93a22a3fc038183817ee6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 15 Jul 2019 20:37:17 +0200 Subject: [PATCH 4/6] one more fix --- test/runtests.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/runtests.jl b/test/runtests.jl index 9bd436a9..572e993d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,5 +1,6 @@ fatalerrors = false anyerrors = false +quiet = false my_tests = ["dataframes.jl", "grouping.jl", From ec2c735a9e8d30a22456cbc69caab8a7af19abfb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 15 Jul 2019 20:51:05 +0200 Subject: [PATCH 5/6] add Statistics to deps --- Project.toml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index 63ee4cbf..1b400fa3 100644 --- a/Project.toml +++ b/Project.toml @@ -9,12 +9,13 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" [extras] Lazy = "50d2b5c4-7a5e-59d5-8109-a42b560f39c0" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Lazy", "Random", "Test"] +test = ["Lazy", "Random", "Statistics", Test"] [compat] -julia = "0.7, 1" +julia = "1" DataFrames = ">= 0.19" Tables = ">= 0.2.3" From a323b686d99293ffd2052c58c415dcb856ef028b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 15 Jul 2019 20:58:19 +0200 Subject: [PATCH 6/6] fix typo --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 1b400fa3..fafc8474 100644 --- a/Project.toml +++ b/Project.toml @@ -13,7 +13,7 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Lazy", "Random", "Statistics", Test"] +test = ["Lazy", "Random", "Statistics", "Test"] [compat] julia = "1"