From 3214aed7db163ffc68f9064789092922edc8338e Mon Sep 17 00:00:00 2001 From: Collin Schwantes Date: Wed, 11 Sep 2024 11:36:42 -0600 Subject: [PATCH] adding a prune data package method --- NAMESPACE | 1 + ...adata.R => modify_frictionless_metadata.R} | 43 ++++++++++++++++++- man/expand_frictionless_metadata.Rd | 8 +++- man/prune_datapackage.Rd | 19 ++++++++ vignettes/metadata.Rmd | 11 +++++ 5 files changed, 79 insertions(+), 3 deletions(-) rename R/{expand_frictionless_metadata.R => modify_frictionless_metadata.R} (70%) create mode 100644 man/prune_datapackage.Rd diff --git a/NAMESPACE b/NAMESPACE index 821e2ce..ea8bf06 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -28,6 +28,7 @@ export(obfuscate_gps) export(obfuscate_lat) export(obfuscate_lon) export(othertext_lookup) +export(prune_datapackage) export(read_excel_all_sheets) export(read_googlesheets) export(remove_deletions) diff --git a/R/expand_frictionless_metadata.R b/R/modify_frictionless_metadata.R similarity index 70% rename from R/expand_frictionless_metadata.R rename to R/modify_frictionless_metadata.R index d19eda7..7d54c20 100644 --- a/R/expand_frictionless_metadata.R +++ b/R/modify_frictionless_metadata.R @@ -8,6 +8,8 @@ #' @param resource_name Character. Item within the datapackage to be updated #' @param resource_path Character. Path to csv file #' @param data_package_path Character. Path to datapackage.json file +#' @param prune_datapackage Logical. Should properties not in the structural metadata +#' be removed? #' #' @return Updates the datapackage, returns nothing #' @export @@ -46,7 +48,8 @@ expand_frictionless_metadata <- function(structural_metadata, resource_name, resource_path, - data_package_path ){ + data_package_path, + prune_datapackage = TRUE){ data_package <- frictionless::read_package(data_package_path) @@ -82,6 +85,10 @@ expand_frictionless_metadata <- function(structural_metadata, my_data_schema$fields[[idx]] <- x } + if(prune_datapackage){ + my_data_schema <- prune_datapackage(my_data_schema,structural_metadata) + } + # update the datapackage.json data_package <- data_package|> frictionless::remove_resource(resource_name) |> @@ -95,3 +102,37 @@ expand_frictionless_metadata <- function(structural_metadata, invisible() } + + +#' Prune data pacakge +#' +#' method to remove properties from the metadata for a dataset in a datapackage +#' +#' @param my_data_schema list. schema object from frictionless +#' @param structural_metadata dataframe. structural metadata for a dataset +#' +#' @return pruned data_schema - +#' @export +#' +prune_datapackage <- function(my_data_schema, structural_metadata){ + + # get property names + property_names <- names(structural_metadata) + + # add minimal property values + property_names_complete <- append(c("name","type"),property_names) |> + unique() + + # create storage object + my_data_schema_pruned <- my_data_schema + + # map over fields and remove metadata items not in property names complete + my_data_schema_pruned$fields <- purrr::map(my_data_schema$fields, function(schema_item){ + + properties_to_drop <- names(schema_item) %in% property_names_complete + out <- schema_item[properties_to_drop] + return(out) + }) + + return(my_data_schema_pruned) +} diff --git a/man/expand_frictionless_metadata.Rd b/man/expand_frictionless_metadata.Rd index d3d4253..c207064 100644 --- a/man/expand_frictionless_metadata.Rd +++ b/man/expand_frictionless_metadata.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/expand_frictionless_metadata.R +% Please edit documentation in R/modify_frictionless_metadata.R \name{expand_frictionless_metadata} \alias{expand_frictionless_metadata} \title{Expand Frictionless Metadata with structural metadata} @@ -8,7 +8,8 @@ expand_frictionless_metadata( structural_metadata, resource_name, resource_path, - data_package_path + data_package_path, + prune_datapackage = TRUE ) } \arguments{ @@ -20,6 +21,9 @@ expand_frictionless_metadata( \item{resource_path}{Character. Path to csv file} \item{data_package_path}{Character. Path to datapackage.json file} + +\item{prune_datapackage}{Logical. Should properties not in the structural metadata +be removed?} } \value{ Updates the datapackage, returns nothing diff --git a/man/prune_datapackage.Rd b/man/prune_datapackage.Rd new file mode 100644 index 0000000..17dab6e --- /dev/null +++ b/man/prune_datapackage.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/modify_frictionless_metadata.R +\name{prune_datapackage} +\alias{prune_datapackage} +\title{Prune data pacakge} +\usage{ +prune_datapackage(my_data_schema, structural_metadata) +} +\arguments{ +\item{my_data_schema}{list. schema object from frictionless} + +\item{structural_metadata}{dataframe. structural metadata for a dataset} +} +\value{ +pruned data_schema - +} +\description{ +method to remove properties from the metadata for a dataset in a datapackage +} diff --git a/vignettes/metadata.Rmd b/vignettes/metadata.Rmd index e1dc9c9..1c7874b 100644 --- a/vignettes/metadata.Rmd +++ b/vignettes/metadata.Rmd @@ -215,6 +215,17 @@ expand_frictionless_metadata(structural_metadata = structural_metadata, resource_path = "data_examples/my_data.csv", data_package_path = "data_examples/datapackage.json") +## remove an element from the structural metadata and datapackage + +# dropping the comments field because comments field +structural_metadata <- structural_metadata[-5] + +expand_frictionless_metadata(structural_metadata = structural_metadata, + resource_name = "my_data", # name of the file with no extension + resource_path = "data_examples/my_data.csv", + data_package_path = "data_examples/datapackage.json", + prune_datapackage = TRUE) # this is the default + # there are methods for embargoing or restricting deposits in {deposits}