diff --git a/NEWS.md b/NEWS.md index 8febdb2b..3ad78175 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,8 @@ # TidierData.jl updates +## v0.14.5 - 2024-01-23 +- Adds `@relocate()` + ## v0.14.4 - 2023-12-30 - Adds `@unnest_wider()` - Adds `@unnest_longer()` diff --git a/Project.toml b/Project.toml index 2bc6ccb4..bddd09d7 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "TidierData" uuid = "fe2206b3-d496-4ee9-a338-6a095c4ece80" authors = ["Karandeep Singh"] -version = "0.14.4" +version = "0.14.5" [deps] Chain = "8be319e6-bccf-4806-a6f7-6fae938471bc" diff --git a/README.md b/README.md index 66b021f1..a2273e6a 100644 --- a/README.md +++ b/README.md @@ -86,6 +86,7 @@ TidierData.jl currently supports the following top-level macros: - `@slice()`, `@slice_sample()`, `@slice_min()`, `@slice_max()`, `@slice_head()`, and `@slice_tail()` - `@group_by()` and `@ungroup()` - `@arrange()` +- `@relocate()` - `@pull()` - `@count()` and `@tally()` - `@left_join()`, `@right_join()`, `@inner_join()`, `@full_join()`, `@anti_join()`, and `@semi_join()` diff --git a/docs/src/index.md b/docs/src/index.md index 7425e442..7fa847b3 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -98,6 +98,7 @@ TidierData.jl currently supports the following top-level macros: - `@slice()`, `@slice_sample()`, `@slice_min()`, `@slice_max()`, `@slice_head()`, and `@slice_tail()` - `@group_by()` and `@ungroup()` - `@arrange()` + - `@relocate()` - `@pull()` - `@count()` and `@tally()` - `@left_join()`, `@right_join()`, `@inner_join()`, `@full_join()`, `@anti_join()`, and `@semi_join()` diff --git a/src/TidierData.jl b/src/TidierData.jl index c56d6a4f..439b3a2e 100644 --- a/src/TidierData.jl +++ b/src/TidierData.jl @@ -21,7 +21,7 @@ export TidierData_set, across, desc, n, row_number, everything, starts_with, end @group_by, @ungroup, @slice, @arrange, @distinct, @pull, @left_join, @right_join, @inner_join, @full_join, @anti_join, @semi_join, @pivot_wider, @pivot_longer, @bind_rows, @bind_cols, @clean_names, @count, @tally, @drop_missing, @glimpse, @separate, @unite, @summary, @fill_missing, @slice_sample, @slice_min, @slice_max, @slice_head, @slice_tail, @rename_with, @separate_rows, - @unnest_longer, @unnest_wider, @nest + @unnest_longer, @unnest_wider, @nest, @relocate # Package global variables const code = Ref{Bool}(false) # output DataFrames.jl code? @@ -53,6 +53,7 @@ include("summary.jl") include("is_type.jl") include("missings.jl") include("nests.jl") +include("relocate.jl") # Function to set global variables """ diff --git a/src/docstrings.jl b/src/docstrings.jl index a904b81a..78ed027c 100644 --- a/src/docstrings.jl +++ b/src/docstrings.jl @@ -3316,3 +3316,67 @@ julia> @chain df begin 15 │ e 15 45 30 ``` """ + +const docstring_relocate = +""" + @relocate(df, columns, before = nothing, after = nothing) + +Rearranges the columns of a data frame. This function allows for moving specified columns to a new position within the data frame, either before or after a given target column. The `columns`, `before`, and `after` arguments all accept tidy selection functions. Only one of `before` or `after` should be specified. If neither are specified, the selected columns will be moved to the beginning of the data frame. + +# Arguments +- `df`: The data frame. +- `columns`: Column or columns to to be moved. +- `before`: (Optional) Column or columns before which the specified columns will be moved. If not provided or `nothing`, this argument is ignored. +- `after`: (Optional) Column or columns after which the specified columns will be moved. If not provided or `nothing`, this argument is ignored. + +# Examples +```jldoctest +julia> df = DataFrame(A = 1:5, B = 6:10, C = ["A", "b", "C", "D", "E"], D = ['A', 'B','A', 'B','C'], + E = 1:5, F = ["A", "b", "C", "D", "E"]); + +julia> @relocate(df, where(is_string), before = where(is_integer)) +5×6 DataFrame + Row │ C F A B E D + │ String String Int64 Int64 Int64 Char +─────┼─────────────────────────────────────────── + 1 │ A A 1 6 1 A + 2 │ b b 2 7 2 B + 3 │ C C 3 8 3 A + 4 │ D D 4 9 4 B + 5 │ E E 5 10 5 C + + +julia> @relocate(df, B, C, D, after = E) +5×6 DataFrame + Row │ A E B C D F + │ Int64 Int64 Int64 String Char String +─────┼─────────────────────────────────────────── + 1 │ 1 1 6 A A A + 2 │ 2 2 7 b B b + 3 │ 3 3 8 C A C + 4 │ 4 4 9 D B D + 5 │ 5 5 10 E C E + +julia> @relocate(df, B, C, D, after = starts_with("E")) +5×6 DataFrame + Row │ A E B C D F + │ Int64 Int64 Int64 String Char String +─────┼─────────────────────────────────────────── + 1 │ 1 1 6 A A A + 2 │ 2 2 7 b B b + 3 │ 3 3 8 C A C + 4 │ 4 4 9 D B D + 5 │ 5 5 10 E C E + +julia> @relocate(df, B:C) # bring columns to the front +5×6 DataFrame + Row │ B C A D E F + │ Int64 String Int64 Char Int64 String +─────┼─────────────────────────────────────────── + 1 │ 6 A 1 A 1 A + 2 │ 7 b 2 B 2 b + 3 │ 8 C 3 A 3 C + 4 │ 9 D 4 B 4 D + 5 │ 10 E 5 C 5 E +``` +""" \ No newline at end of file diff --git a/src/relocate.jl b/src/relocate.jl new file mode 100644 index 00000000..32f8c91f --- /dev/null +++ b/src/relocate.jl @@ -0,0 +1,96 @@ +function relocate(df, columns; before=nothing, after=nothing) + cols_expr = columns isa Expr ? (columns,) : columns + column_symbols = names(df, Cols(cols_expr...)) + column_symbols = Symbol.(column_symbols) + # Handle before and after as collections + before_cols = before isa Symbol ? [before] : before + after_cols = after isa Symbol ? [after] : after + before_col_symbols = isnothing(before_cols) ? [] : Symbol.(names(df, Cols(before_cols...))) + after_col_symbols = isnothing(after_cols) ? [] : Symbol.(names(df, Cols(after_cols...))) + # Convert all DataFrame column names to symbols + df_column_names = Symbol.(names(df)) + # Reorder the columns + new_order = Symbol[] + inserted = false + for col in df_column_names + if !isempty(before_col_symbols) && col == first(before_col_symbols) && !inserted + append!(new_order, column_symbols) # Place all specified columns + new_order = vcat(new_order, setdiff(before_col_symbols, column_symbols)) # Then all before columns, excluding duplicates + inserted = true + elseif !isempty(after_col_symbols) && col == first(after_col_symbols) && !inserted + new_order = vcat(new_order, setdiff(after_col_symbols, column_symbols)) # Place all after columns, excluding duplicates + append!(new_order, column_symbols) # Then all specified columns + inserted = true + end + if col ∉ column_symbols && col ∉ before_col_symbols && col ∉ after_col_symbols + push!(new_order, col) + end + end + # Move columns to the leftmost position if neither before nor after is specified + if isempty(before_col_symbols) && isempty(after_col_symbols) + new_order = vcat(column_symbols, filter(x -> !(x in column_symbols), df_column_names)) + end + select!(df, new_order) +end + +""" +$docstring_relocate +""" +macro relocate(df, args...) + before_col_expr = :nothing + after_col_expr = :nothing + # Extract the columns_to_move expression and keyword arguments + col_exprs = args[1:end-1] + last_arg = args[end] + # Check if the last argument is a keyword argument + if last_arg isa Expr && last_arg.head == :(=) + if last_arg.args[1] == :after || last_arg.args[1] == :after + after_col_expr = last_arg.args[2] + elseif last_arg.args[1] == :before || last_arg.args[1] == :before + before_col_expr = last_arg.args[2] + else + error("Invalid keyword argument: only 'before' or 'after' are accepted.") + end + col_exprs = args[1:end-1] + else + col_exprs = args + end + + # Additional check for invalid keyword arguments in the rest of args + for arg in col_exprs + if arg isa Expr && arg.head == :(=) && !(arg.args[1] in [:before, :before, :after, :after]) + error("Invalid keyword argument: only 'before' or 'after' are accepted.") + end + end + # Parse the column expressions + interpolated_col_exprs = parse_interpolation.(col_exprs) + tidy_col_exprs = [parse_tidy(i[1]) for i in interpolated_col_exprs] + # Parse before and after + if before_col_expr != :nothing + interpolated_before_col = parse_interpolation(before_col_expr) + tidy_before_col_exprs = [parse_tidy(interpolated_before_col[1])] + else + tidy_before_col_exprs = [] + end + if after_col_expr != :nothing + interpolated_after_col = parse_interpolation(after_col_expr) + tidy_after_col_exprs = [parse_tidy(interpolated_after_col[1])] + else + tidy_after_col_exprs = [] + end + relocation_expr = + quote + if $(esc(df)) isa GroupedDataFrame + local df_copy = transform($(esc(df)), ungroup = false) + relocate(df_copy, [$(tidy_col_exprs...)], before=[$(tidy_before_col_exprs...)], after=[$(tidy_after_col_exprs...)]) + local grouped_df = groupby(parent(df_copy), groupcols($(esc(df)))) + grouped_df + else + local df_copy = copy($(esc(df))) + relocate(df_copy, [$(tidy_col_exprs...)], before=[$(tidy_before_col_exprs...)], after=[$(tidy_after_col_exprs...)]) + df_copy + end + end + + return relocation_expr +end \ No newline at end of file