Skip to content

Commit

Permalink
Merge pull request #76 from drizk1/tidied-fill_missing
Browse files Browse the repository at this point in the history
  • Loading branch information
Karandeep Singh committed Dec 22, 2023
2 parents de86edf + 1bb4921 commit baa0594
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 38 deletions.
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# TidierData.jl updates

## v0.14.3 - 2023-12-22
- Adds support for interpolation and tidy selection in `@fill_missing`
- Fixes tidy selection in `@separate_rows()`

## v0.14.2 - 2023-12-21
- `@slice()` now supports interpolation and user-defined functions
- Adds `where()`
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "TidierData"
uuid = "fe2206b3-d496-4ee9-a338-6a095c4ece80"
authors = ["Karandeep Singh"]
version = "0.14.2"
version = "0.14.3"

[deps]
Chain = "8be319e6-bccf-4806-a6f7-6fae938471bc"
Expand Down
40 changes: 21 additions & 19 deletions src/missings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -40,26 +40,27 @@ function fill_missing(df::DataFrame, method::String)
return fill_missing(df, Symbol.(names(df)), method)
end

function fill_missing(df::DataFrame, cols::Vector{Symbol}, method::String)
function fill_missing(df::DataFrame, columns, method::String)
new_df = copy(df)

for col in cols
cols_expr = columns isa Expr ? (columns,) : columns
column_symbols = names(df, Cols(cols_expr...))
for col_sym in column_symbols
if method == "down"
last_observation = new_df[1, col]
last_observation = new_df[1, col_sym]
for i in 1:nrow(new_df)
if ismissing(new_df[i, col])
new_df[i, col] = last_observation
if ismissing(new_df[i, col_sym])
new_df[i, col_sym] = last_observation
else
last_observation = new_df[i, col]
last_observation = new_df[i, col_sym]
end
end
elseif method == "up"
next_observation = new_df[end, col]
next_observation = new_df[end, col_sym]
for i in nrow(new_df):-1:1
if ismissing(new_df[i, col])
new_df[i, col] = next_observation
if ismissing(new_df[i, col_sym])
new_df[i, col_sym] = next_observation
else
next_observation = new_df[i, col]
next_observation = new_df[i, col_sym]
end
end
else
Expand All @@ -70,12 +71,14 @@ function fill_missing(df::DataFrame, cols::Vector{Symbol}, method::String)
return new_df
end

function fill_missing(gdf::GroupedDataFrame, cols::Vector{Symbol}, method::String)
function fill_missing(gdf::GroupedDataFrame, columns, method::String)
group_cols = groupcols(gdf)
results = []
cols_expr = columns isa Expr ? (columns,) : columns
column_symbols = names(gdf, Cols(cols_expr...))
for group in gdf
# call the DataFrame version of fill_missing on the SubDataFrame
processed_group = fill_missing(DataFrame(group), cols, method)
processed_group = fill_missing(DataFrame(group), column_symbols, method)
push!(results, processed_group)
end
combined_df = vcat(results...)
Expand All @@ -100,13 +103,12 @@ macro fill_missing(df, args...)
end
end

cols = args[1:(length(args)-1)]
method = args[length(args)]
interpolated_exprs = parse_interpolation.(args[1:(length(args)-1)])
tidy_exprs = [i[1] for i in interpolated_exprs]
tidy_exprs = parse_tidy.(tidy_exprs)

# Requires Julia 1.9
# cols..., method = args

cols_quoted = QuoteNode.(cols)
method = esc(last(args))
cols_quoted = tidy_exprs

return quote
if $(esc(df)) isa GroupedDataFrame
Expand Down
22 changes: 4 additions & 18 deletions src/separate_unite.jl
Original file line number Diff line number Diff line change
Expand Up @@ -146,26 +146,12 @@ end
function separate_rows(df::Union{DataFrame, GroupedDataFrame}, columns, delimiter::Union{Regex, String})
is_grouped = df isa GroupedDataFrame
grouping_columns = is_grouped ? groupcols(df) : Symbol[]

# Ungroup if necessary
temp_df = copy(is_grouped ? parent(df) : df)
# temp_df = copy(df)

# Convert all references to column symbols
column_symbols = []
for col in columns
if col isa Integer
push!(column_symbols, Symbol(names(temp_df)[col]))
elseif col isa AbstractRange
append!(column_symbols, Symbol.(names(temp_df)[collect(col)]))
elseif typeof(col) <: Between
# Get the column indices for the Between range
col_indices = DataFrames.index(temp_df)[col]
append!(column_symbols, Symbol.(names(temp_df)[col_indices]))
else
push!(column_symbols, Symbol(col))
end
end

cols_expr = columns isa Expr ? (columns,) : columns
column_symbols = names(df, Cols(cols_expr...))
column_symbols = Symbol.(column_symbols)

# Initialize an array to hold expanded data for each column
expanded_data = Dict{Symbol, Vector{Any}}()
Expand Down

2 comments on commit baa0594

@kdpsingh
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/97618

Tip: Release Notes

Did you know you can add release notes too? Just add markdown formatted text underneath the comment after the text
"Release notes:" and it will be added to the registry PR, and if TagBot is installed it will also be added to the
release that TagBot creates. i.e.

@JuliaRegistrator register

Release notes:

## Breaking changes

- blah

To add them here just re-invoke and the PR will be updated.

Tagging

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.14.3 -m "<description of version>" baa0594df1f0e1628a9d911ceed88e12b927b978
git push origin v0.14.3

Please sign in to comment.