Merge pull request #76 from drizk1/tidied-fill_missing

TidierOrg · Dec 22, 2023 · baa0594 · baa0594 · kdpsingh · Dec 22, 2023
2 parents de86edf + 1bb4921
commit baa0594
Show file tree

Hide file tree

Showing 4 changed files with 30 additions and 38 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,9 @@
 # TidierData.jl updates
 
+## v0.14.3 - 2023-12-22
+- Adds support for interpolation and tidy selection in `@fill_missing`
+- Fixes tidy selection in `@separate_rows()`
+
 ## v0.14.2 - 2023-12-21
 - `@slice()` now supports interpolation and user-defined functions
 - Adds `where()`

diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "TidierData"
 uuid = "fe2206b3-d496-4ee9-a338-6a095c4ece80"
 authors = ["Karandeep Singh"]
-version = "0.14.2"
+version = "0.14.3"
 
 [deps]
 Chain = "8be319e6-bccf-4806-a6f7-6fae938471bc"

diff --git a/src/missings.jl b/src/missings.jl
@@ -40,26 +40,27 @@ function fill_missing(df::DataFrame, method::String)
   return fill_missing(df, Symbol.(names(df)), method)
 end
 
-function fill_missing(df::DataFrame, cols::Vector{Symbol}, method::String)
+function fill_missing(df::DataFrame, columns, method::String)
   new_df = copy(df)
-
-  for col in cols
+  cols_expr = columns isa Expr ? (columns,) : columns
+  column_symbols = names(df, Cols(cols_expr...)) 
+  for col_sym in column_symbols
       if method == "down"
-          last_observation = new_df[1, col]
+          last_observation = new_df[1, col_sym]
           for i in 1:nrow(new_df)
-              if ismissing(new_df[i, col])
-                  new_df[i, col] = last_observation
+              if ismissing(new_df[i, col_sym])
+                  new_df[i, col_sym] = last_observation
               else
-                  last_observation = new_df[i, col]
+                  last_observation = new_df[i, col_sym]
               end
           end
       elseif method == "up"
-          next_observation = new_df[end, col]
+          next_observation = new_df[end, col_sym]
           for i in nrow(new_df):-1:1
-              if ismissing(new_df[i, col])
-                  new_df[i, col] = next_observation
+              if ismissing(new_df[i, col_sym])
+                  new_df[i, col_sym] = next_observation
               else
-                  next_observation = new_df[i, col]
+                  next_observation = new_df[i, col_sym]
               end
           end
       else
@@ -70,12 +71,14 @@ function fill_missing(df::DataFrame, cols::Vector{Symbol}, method::String)
   return new_df
 end
 
-function fill_missing(gdf::GroupedDataFrame, cols::Vector{Symbol}, method::String)
+function fill_missing(gdf::GroupedDataFrame, columns, method::String)
   group_cols = groupcols(gdf)
   results = []
+  cols_expr = columns isa Expr ? (columns,) : columns
+  column_symbols = names(gdf, Cols(cols_expr...)) 
   for group in gdf
       # call the DataFrame version of fill_missing on the SubDataFrame
-      processed_group = fill_missing(DataFrame(group), cols, method)
+      processed_group = fill_missing(DataFrame(group), column_symbols, method)
       push!(results, processed_group)
   end
   combined_df = vcat(results...)
@@ -100,13 +103,12 @@ macro fill_missing(df, args...)
       end
   end
 
-  cols = args[1:(length(args)-1)]
-  method = args[length(args)]
+  interpolated_exprs = parse_interpolation.(args[1:(length(args)-1)])
+  tidy_exprs = [i[1] for i in interpolated_exprs]
+  tidy_exprs = parse_tidy.(tidy_exprs)
 
-  # Requires Julia 1.9
-  # cols..., method = args
-
-  cols_quoted = QuoteNode.(cols)
+  method = esc(last(args))
+  cols_quoted = tidy_exprs
 
   return quote
       if $(esc(df)) isa GroupedDataFrame

diff --git a/src/separate_unite.jl b/src/separate_unite.jl
@@ -146,26 +146,12 @@ end
 function separate_rows(df::Union{DataFrame, GroupedDataFrame}, columns, delimiter::Union{Regex, String})
   is_grouped = df isa GroupedDataFrame
   grouping_columns = is_grouped ? groupcols(df) : Symbol[]
-
   # Ungroup if necessary
   temp_df = copy(is_grouped ? parent(df) : df)
-   # temp_df = copy(df)
-
-  # Convert all references to column symbols
-  column_symbols = []
-  for col in columns
-      if col isa Integer
-          push!(column_symbols, Symbol(names(temp_df)[col]))
-      elseif col isa AbstractRange
-          append!(column_symbols, Symbol.(names(temp_df)[collect(col)]))
-      elseif typeof(col) <: Between
-          # Get the column indices for the Between range
-          col_indices = DataFrames.index(temp_df)[col]
-          append!(column_symbols, Symbol.(names(temp_df)[col_indices]))
-      else
-          push!(column_symbols, Symbol(col))
-      end
-  end
+
+  cols_expr = columns isa Expr ? (columns,) : columns
+  column_symbols = names(df, Cols(cols_expr...)) 
+  column_symbols = Symbol.(column_symbols) 
 
   # Initialize an array to hold expanded data for each column
   expanded_data = Dict{Symbol, Vector{Any}}()