Skip to content

Commit 1bb2d4d

Browse files
author
Keith Goldfeld
committed
Adding genDataDensity and addDataDensity
1 parent 1bdef31 commit 1bb2d4d

6 files changed

+163
-4
lines changed

NAMESPACE

+2
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ export(addCondition)
66
export(addCorData)
77
export(addCorFlex)
88
export(addCorGen)
9+
export(addDataDensity)
910
export(addMarkov)
1011
export(addMultiFac)
1112
export(addPeriods)
@@ -34,6 +35,7 @@ export(genCorGen)
3435
export(genCorMat)
3536
export(genCorOrdCat)
3637
export(genData)
38+
export(genDataDensity)
3739
export(genDummy)
3840
export(genFactor)
3941
export(genFormula)

R/add_data.R

+55-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#' Add columns to existing data set
22
#'
3-
#' @param dtDefs name of definitions for added columns
4-
#' @param dtOld name of data table that is to be updated
3+
#' @param dtDefs Name of definitions for added columns
4+
#' @param dtOld Name of data table that is to be updated
55
#' @param envir Environment the data definitions are evaluated in.
66
#' Defaults to [base::parent.frame].
77
#' @return an updated data.table that contains the added simulated data
@@ -523,3 +523,56 @@ addSynthetic <- function(dtOld, dtFrom,
523523
dS[]
524524

525525
}
526+
527+
#' @title Add data from a density defined by a vector of integers
528+
#' @description Data are generated from an a density defined by a vector of integers.
529+
#' @param dtOld Name of data table that is to be updated.
530+
#' @param dataDist Vector that defines the desired density.
531+
#' @param varname Name of variable name.
532+
#' @param uselimits Indicator to use minimum and maximum of input data vector as
533+
#' limits for sampling. Defaults to FALSE, in which case a smoothed density that
534+
#' extends beyond the limits is used.
535+
#' @param id A string specifying the field that serves as the record id. The
536+
#' default field is "id".
537+
#' @return A data table with the generated data.
538+
#' @examples
539+
#' def <- defData(varname = "x1", formula = 5, dist = "poisson")
540+
#'
541+
#' data_dist <- data_dist <- c(1, 2, 2, 3, 4, 4, 4, 5, 6, 6, 7, 7, 7, 8, 9, 10, 10)
542+
#'
543+
#' dd <- genData(500, def)
544+
#' dd <- addDataDensity(dd, data_dist, varname = "x2")
545+
#' dd <- addDataDensity(dd, data_dist, varname = "x3", uselimits = TRUE)
546+
#' @export
547+
#' @concept generate_data
548+
#'
549+
#'
550+
addDataDensity <- function(dtOld, dataDist, varname, uselimits = FALSE) {
551+
552+
assertNotMissing(dtOld = missing(dtOld), dataDist = missing(dataDist), varname = missing(varname))
553+
assertClass(dtOld = dtOld, class = "data.table")
554+
555+
assertNotInDataTable(varname, dtOld)
556+
557+
dataDist <- round(dataDist, 0)
558+
559+
if (uselimits) {
560+
density_est <- density(dataDist, n = 10000, from = min(data_dist), to = max(data_dist))
561+
} else {
562+
density_est <- density(dataDist, n = 10000)
563+
}
564+
565+
x <- density_est$x
566+
y <- density_est$y
567+
568+
# Normalize the density values to create a probability distribution
569+
570+
probabilities <- y / sum(y)
571+
572+
# Sample from the x values according to the probabilities
573+
574+
.x <- sample(x, size = nrow(dtOld), replace = TRUE, prob = probabilities)
575+
576+
dtOld[, (varname) := .x]
577+
dtOld[]
578+
}

R/generate_data.R

+31
Original file line numberDiff line numberDiff line change
@@ -1175,3 +1175,34 @@ genSynthetic <- function(dtFrom, n = nrow(dtFrom),
11751175

11761176
}
11771177

1178+
#' @title Generate data from a density defined by a vector of integers
1179+
#' @description Data are generated from an a density defined by a vector of integers
1180+
#' @param n Number of samples to draw from the density.
1181+
#' @param dataDist Vector that defines the desired density
1182+
#' @param varname Name of variable name
1183+
#' @param uselimits Indicator to use minimum and maximum of input data vector as
1184+
#' limits for sampling. Defaults to FALSE, in which case a smoothed density that
1185+
#' extends beyond the limits is used.
1186+
#' @param id A string specifying the field that serves as the record id. The
1187+
#' default field is "id".
1188+
#' @return A data table with the generated data
1189+
#' @examples
1190+
#' data_dist <- data_dist <- c(1, 2, 2, 3, 4, 4, 4, 5, 6, 6, 7, 7, 7, 8, 9, 10, 10)
1191+
#'
1192+
#' genDataDensity(500, data_dist, varname = "x1", id = "id")
1193+
#' genDataDensity(500, data_dist, varname = "x1", uselimits = TRUE, id = "id")
1194+
#' @export
1195+
#' @concept generate_data
1196+
1197+
genDataDensity <- function(n, dataDist, varname, uselimits = FALSE, id = "id") {
1198+
1199+
assertNotMissing(n = missing(n), dataDist = missing(dataDist), varname = missing(varname))
1200+
1201+
dataDist <- round(dataDist, 0)
1202+
1203+
.dd <- genData(n, id = id)
1204+
addDataDensity(.dd, dataDist, varname, uselimits)[]
1205+
1206+
}
1207+
1208+

man/addColumns.Rd

+2-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/addDataDensity.Rd

+38
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/genDataDensity.Rd

+35
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)