diff --git a/.Rbuildignore b/.Rbuildignore index 31be86b..d3b1a37 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -6,3 +6,4 @@ ^graphs/.*$ ^doc$ ^Meta$ +^causaleffect\.Rproj$ diff --git a/DESCRIPTION b/DESCRIPTION index 6d4a26b..85b90bd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -11,8 +11,15 @@ URL: https://github.com/santikka/causaleffect/ Description: Functions for identification and transportation of causal effects. Provides a conditional causal effect identification algorithm (IDC) by Shpitser, I. and Pearl, J. (2006) , an algorithm for transportability from multiple domains with limited experiments by Bareinboim, E. and Pearl, J. (2014) , and a selection bias recovery algorithm by Bareinboim, E. and Tian, J. (2015) . All of the previously mentioned algorithms are based on a causal effect identification algorithm by Tian , J. (2002) . License: GPL (>= 2) Imports: igraph -Suggests: R.rsp, XML +Suggests: + R.rsp, + testthat (>= 3.0.0), + XML VignetteBuilder: R.rsp NeedsCompilation: no Author: Santtu Tikka [aut, cre] () Maintainer: Santtu Tikka +Config/testthat/edition: 3 +RoxygenNote: 7.3.2 +Roxygen: list(markdown = TRUE) +Encoding: UTF-8 diff --git a/NAMESPACE b/NAMESPACE index 543ee85..2911ffe 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,8 @@ export(aux.effect) export(generalize) export(causal.effect) +export(simplify) +export(join) export(get.expression) export(meta.transport) export(parse.graphml) @@ -28,4 +30,4 @@ importFrom(igraph, `%->%`) importFrom(igraph, V) importFrom(igraph, E) importFrom(stats, setNames) -importFrom(utils, combn) \ No newline at end of file +importFrom(utils, combn) diff --git a/R/causal.effect.R b/R/causal.effect.R index f58d8e9..a2dedc9 100644 --- a/R/causal.effect.R +++ b/R/causal.effect.R @@ -1,11 +1,43 @@ +# Causal Effect +# +# This function computes the causal effect of a set of variables x on another +# set of variables y given an optional set of variables z, using a given causal +# graph G. +# +# +# y: +# x: +# z: +# G: +# expr: +# simp: +# steps: +# primes: +# prune: +# stop_on_nonid: +# +# Returns: +# +# +# Causaleffect dependencies: probability, deconstruct, get.expression, pid, +# id(?),idc, observed.graph, unobserved.graph, parse.expression, set.primes(?) + + + + causal.effect <- function(y, x, z = NULL, G, expr = TRUE, simp = FALSE, steps = FALSE, primes = FALSE, prune = FALSE, stop_on_nonid = TRUE) { + # if there aren't any attributes in the graph, then we need to create them. if (length(igraph::edge.attributes(G)) == 0) { G <- igraph::set.edge.attribute(G, "description", 1:length(igraph::E(G)), NA) } + # G.obs is the version of G that has no unobserved variables or bidirected edges representing unobserved confounders G.obs <- observed.graph(G) if (!igraph::is.dag(G.obs)) stop("Graph 'G' is not a DAG") + # This is the topological sort of a graph's directed edges topo <- igraph::topological.sort(G.obs) + # This labels the vertices of the topological sort to be the same as the names of the original graph. topo <- igraph::get.vertex.attribute(G, "name")[topo] + # sanity checks to make sure that x, y and z are in the topological sort of the graph if (length(setdiff(y, topo)) > 0) stop("Set 'y' contains variables not present in the graph.") if (length(setdiff(x, topo)) > 0) stop("Set 'x' contains variables not present in the graph.") if (length(z) > 0 && !identical(z, "")) { diff --git a/R/insert.R b/R/insert.R index b509801..cfa2bd6 100644 --- a/R/insert.R +++ b/R/insert.R @@ -1,24 +1,128 @@ +#' Insert +#' +#' \code{Insert} will insert a missing variable into a joint distribution \eqn{P(J|D)} +#' using d-separation criteria in a given graph \code{G}. +#' It is called when there are variables without corresponding terms in the expression +#' +#' @param J character vector. The set of variables representing the joint distribution. +#' @param D character vector. The set of variables representing the conditioning set of the joint distribution. +#' @param M character vector. The variable to be inserted. +#' @param cond character vector. The set of conditioning variables. +#' @param S character vector. The current summation variable. +#' @param O character vector. The set of observed variables. +#' @param G.unobs igraph object created with \code{igraph::unobserved.graph(G)}. Separate graph that turns bidirected edges into explicit nodes for unobserved confounders. +#' @param G igraph object created with \code{igraph::graph.formula()}. Main graph G. Includes bidirected edges. +#' @param G.obs igraph object created with \code{igraph::observed.graph(G)}. Separate graph that does not contain bidirected edges (only contains the directed edges with observed nodes). +#' @param topo igraph list object created with \code{igraph::topological.sort} and \code{igraph::get.vertex.attribute}. The topological ordering of the vertices in graph G. +#' +#' @dependencies This function depends on several functions from the causaleffect package, including: \link{powerset} and \link{wrap.dSep}. +#' +#' @return \code{Insert} returns a list with: +#' \code{J.new}{character vector. An updated set of joint distribution variables.} +#' \code{D.new}{character vector. An updated set of conditioning variables.} +#' \code{M}{character vector. The inserted variable.} +#' \code{ds[[i]]}{character vector. The subset from the power set used in the insertion.} +#' However, if no conditions were met, \code{insert} will return the original \code{J} and \code{D}. +#' +#' @references Tikka, S., & Karvanen, J. (2017). Simplifying probabilistic expressions in causal inference. Journal of Machine Learning Research, 18(36), 1-30. +#' +#' @author Haley Hummel, +#' Psychology PhD student at Oregon State University +#' +#' @examples +#' \dontrun{ +#' # defining graph information for G_1 using igraph +#' G_1 <- graph.formula(x -+ y, z -+ x, z -+ y , x -+ z, z -+ x, simplify = FALSE) +#' G_1 <- set.edge.attribute(graph = G_1, name = "description", index = c(4,5), value = "U") +#' +#' # defining observed nodes of graph G_1 using igraph +#' G_1.obs <- observed.graph(G_1) +#' +#' # defining unobserved nodes of graph G_1 using igraph +#' G_1.unobs <- unobserved.graph(G_1) +#' +#' # defining topological sort of graph G_1 using igraph +#' topo_1 <- igraph::topological.sort(G_1.obs) +#' topo_1 <- igraph::get.vertex.attribute(G_1, "name")[topo_1] +#' +#' # we can obtain the following from running simplify(P_1, topo_1, G_1.unobs, G_1, +#' # G_1.obs) with break points (the browser() function). I added print statements +#' # after step #5 in simplify(): +#' # Step 6 - Inside nested while loop before join operation +#' # P$children[[k]]$var: y (this represents vari in simplify()) +#' # P$children[[k]]$cond: z x (this represents cond in simplify()) +#' # P$sumset[j]: z (this reprensents S in simplify()) +#' +#' J_1 <- character() +#' D_1 <- character() +#' M_1 <- "x" +#' cond_1 <- c("z", "x") +#' S_1 <- "z" +#' O_1 <- c("z", "y") +#' +#' # we can obtain the following from the graph information: +#' # G.unobs = G_1.unobs +#' # G = G_1 +#' # G.obs = G_1.obs +#' # topo = topo_1 +#' +#' # we expect the output from this (representing J, D) to be: +#' # [1] character(0) +#' # [2] character(0) +#' +#' insert(J_1, D_1, M_1, cond_1, S_1, O_1, G_1.unobs, G_1, G_1.obs, topo_1) +#' } +#' +#' @seealso \code{\link{join}}, \code{\link{simplify}}, \code{\link{wrap.dSep}}, \code{\link{powerset}} +#' +#' @keywords models manip math utilities +#' @keywords graphs methods multivariate distribution probability +#' @concept probabilistic expressions +#' @concept graph theory +#' @concept joint distribution +#' @concept causal inference +#' @concept d-separation + insert <- function(J, D, M, cond, S, O, G.unobs, G, G.obs, topo) { + # Identify which elements of M are in D mis.ind <- which(M %in% D) + # If no elements of M are in D, return original J and D if (length(mis.ind) == 0) return(list(J, D)) + + # Identify the missing variable to be inserted mis <- M[mis.ind] M <- mis[length(mis)] + # If M is in cond, return original J and D if (M %in% cond) return(list(J, D)) + + # Find the first element of J in the topological ordering 'topo'. + # Set V.prev to this element + # V.pi is set of vertices that are the ancestors of (precede) V.prev. J.min <- min(which(J %in% topo)) V.prev <- J[J.min] ind <- which(topo == V.prev) + # Get all vertices before V.prev in topological order V.pi <- topo[0:(ind-1)] + + # Compute the power set of V.pi excluding M. + # n is the numbere of subsets in the power set. ds <- powerset(setdiff(V.pi, M)) n <- length(ds) + # Create the candidate set add for (i in 1:n) { add <- union(ds[[i]], M) + # Compute the set A a.set <- union(setdiff(add, D), setdiff(D, add)) - if (wrap.dSep(G.unobs, J, a.set, setdiff(D, a.set)) && + + # Check the d-separation criteria + if (wrap.dSep(G.unobs, J, a.set, setdiff(D, a.set)) && wrap.dSep(G.unobs, M, S, setdiff(ds[[i]], S))) { + # Update J.new and D.new J.new <- union(J, M) D.new <- ds[[i]] return(list(J.new, D.new, M, ds[[i]])) } } + # If no conditions were met, return original J and D return(list(J, D)) -} \ No newline at end of file +} diff --git a/R/join.R b/R/join.R index 8ba5729..86d291f 100644 --- a/R/join.R +++ b/R/join.R @@ -1,33 +1,148 @@ +#' Join +#' +#' \code{Join} and \code{insert} are essentially two variations of the underlying procedure of determining whether the terms of the atomic expression actually represent a joint distribution. \code{Join} is called when we are processing terms that already exist in the expression. +#' Attempts to combine two terms: the joint term \code{P(J|D)} obtained from \code{simplify()} and the +#' term \code{P(V|C) := P(Vk|Ck)} of the current iteration step. The goal is to +#' determine if these terms can be combined based on the d-separation criteria in the graph \code{G}. +#' +#' +#' +#' @param J character vector. Joint set \code{P(J|D)}; already processed and included in joint distribution +#' from previous \code{\link{simplify}} iteration. Initially, may be empty for the starting point of +#' the joint distribution. \code{vari} is added to expand it if d-separation conditions are met. +#' @param D character vector. Term \code{P(V|C) := P(Vk|Ck)}; set of variables that condition the joint distribution. +#' \code{Join} checks and updates \code{D} as necessary to maintain the validity of the joint distribution +#' when combined with \code{vari}. +#' @param vari character scalar. Current variable being considered for inclusion in the joint distribution. +#' @param cond character vector. Set of variables that condition the current variable \code{vari}. \code{Join} uses \code{cond} +#' to evaluate conditional independence and determine if \code{vari} can be added to \code{J}. +#' @param S likely a character vector. Not used directly in \code{join}. Current summation variable. +#' @param M character vector. Missing variables (variables not contained within the expression). +#' @param O character vector. Observed variables (variables contained within the expression). +#' @param G.unobs igraph object created with \code{igraph::unobserved.graph(G)}. Separate graph that turns bidirected edges into explicit nodes for unobserved confounders. +#' @param G igraph object created with \code{igraph::graph.formula()}. Main graph G. Includes bidirected edges. +#' @param G.obs igraph object created with \code{igraph::observed.graph(G)}. Separate graph that does not contain bidirected edges (only contains the directed edges with observed nodes). +#' @param topo igraph list object created with \code{igraph::topological.sort} and \code{igraph::get.vertex.attribute}. The topological ordering of the vertices in graph G. +#' +#' @dependencies This function depends on several functions from the causaleffect package, including: \link{powerset}, \link{wrap.dSep}, and \link{insert}. +#' +#' @return \code{Join} returns the joint result, or the original result if none of the conditions for joining were met. +#' +#' @references Tikka, S., & Karvanen, J. (2017). Simplifying probabilistic expressions in causal inference. Journal of Machine Learning Research, 18(36), 1-30. +#' +#' @author Haley Hummel, +#' Psychology PhD student at Oregon State University +#' +#' @seealso \code{\link{causal.effect}}, \code{\link{parse.expression}}, \code{\link{get.expression}}, \code{\link{probability}} +#' +#' +#' @examples +#' \dontrun{ +#' +#' # defining graph information for G_1 using igraph +#' G_1 <- graph.formula(x -+ y, z -+ x, z -+ y , x -+ z, z -+ x, simplify = FALSE) +#' G_1 <- set.edge.attribute(graph = G_1, name = "description", index = c(4,5), value = "U") +#' +#' # defining observed nodes of graph G_1 using igraph +#' G_1.obs <- observed.graph(G_1) +#' +#' # defining unobserved nodes of graph G_1 using igraph +#' G_1.unobs <- unobserved.graph(G_1) +#' +#' # defining topological sort of graph G_1 using igraph +#' topo_1 <- igraph::topological.sort(G_1.obs) +#' topo_1 <- igraph::get.vertex.attribute(G_1, "name")[topo_1] +#' +#' # we can obtain the following from running simplify(P_1, topo_1, G_1.unobs, G_1, +#' # G_1.obs) with break points (the browser() function). I added print statements +#' # after step #5 in simplify(): +#' # Step 6 - Inside nested while loop before join operation +#' # P$children[[k]]$var: y (this represents vari in simplify()) +#' # P$children[[k]]$cond: z x (this represents cond in simplify()) +#' # P$sumset[j]: z (this reprensents S in simplify()) +#' +#' J_1 <- character() +#' D_1 <- character() +#' vari_1 <- "y" +#' cond_1 <- c("z", "x") +#' S_1 <- "z" +#' M_1 <- "x" +#' O_1 <- c("z", "y") +#' +#' # we can obtain the following from the graph information: +#' # G.unobs = G_1.unobs +#' # G = G_1 +#' # G.obs = G_1.obs +#' # topo = topo_1 +#' +#' # we expect the output from this to be: +#' # [1] "y" +#' # [2] "z" "x" +#' +#' join(J_1, D_1, vari_1, cond_1, S_1, M_1, O_1, G_1.unobs, G_1, G_1.obs, topo_1) +#' } +#' +#' @seealso \code{\link{simplify}}, \code{\link{wrap.dSep}}, \code{\link{insert}} +#' +#' @keywords models manip math utilities +#' @concept probabilistic expressions +#' @concept graph theory +#' @concept causal inference + + join <- function(J, D, vari, cond, S, M, O, G.unobs, G, G.obs, topo) { +# initialize J and D as empty character vectors J.new <- character() D.new <- character() +# check if J is empty. If it is, set J.new (the joint subset) to vari and +# D.new (the conditioning subset) to cond, and then return these values. +# This represents the simplest case where the first variable forms the joint distribution alone. if (length(J) == 0) { J.new <- vari D.new <- cond return(list(J.new, D.new)) } +# Set up necessary variables for iteration. Calculate ancestors: find the +# of the first element of J in the topological order (the topo vector). +# V.prev is set to this element. Compute V.pi (aka: "G") as the set of +# vertices in topo that precede the first element of V.prev (aka: "J"). J.min <- min(which(J %in% topo)) V.prev <- J[J.min] ind <- which(topo == V.prev) V.pi <- topo[0:(ind-1)] +# The power set "ds" of V.pi, excluding elements in vari, is computed. ds <- powerset(setdiff(V.pi, vari)) n <- length(ds) +# Iterate over the power set, forming candidate sets add, a.set, and b.set, +# sets used to characterize the changes needed in the conditioning sets to +# enable the combination of two probabilistic terms while preserving the +# required conditional independencies +# A represents necessary changes to the conditioning set D to combine the joint +# distribution term P(J|D) with the current term P(vari|cond) +# B represents necessary changes to the conditioning set cond to combine the +# joint term P(J|D) with the current P(vari|cond) for (i in 1:n) { add <- union(ds[[i]], vari) - a.set <- union(setdiff(add, D), setdiff(D, add)) + a.set <- union(setdiff(add, D), setdiff(D, add)) b.set <- union(setdiff(ds[[i]], cond), setdiff(cond, ds[[i]])) - if (wrap.dSep(G.unobs, J, a.set, setdiff(D, a.set)) && +# Check if they meet conditional independence (d-separation) conditions using the wrap.dSep function. + if (wrap.dSep(G.unobs, J, a.set, setdiff(D, a.set)) && wrap.dSep(G.unobs, vari, b.set, setdiff(cond, b.set))) { +# If conditions are satisfied, update J.new and D.new and return J.new <- union(J, vari) D.new <- ds[[i]] return(list(J.new, D.new)) } } +# If any element of M is in D, attempt to insert a missing variable from M into J +# and D using the insert and join functions. if (any(M %in% D)) { joint <- insert(J, D, M, cond, S, O, G.unobs, G, G.obs, topo) +# If the joint operation results in a larger J, return the joint result if (length(joint[[1]]) > length(J)) { return(joint) } } +# If no updates were made, return the original J and D return(list(J, D)) } diff --git a/R/parse.expression.R b/R/parse.expression.R index 0f26c7a..a938512 100644 --- a/R/parse.expression.R +++ b/R/parse.expression.R @@ -1,12 +1,111 @@ +#' Parse.expression +#' +#' The \code{`parse.expression`} function takes a probabilistic expression and processes it based on the topological order, unobserved and observed graphs, and the underlying graph structure to simplify or modify the expression. +#' +#' @param P probability object. The identified probabilistic expression taken from the output of \code{`causal.effect`}. Typically includes components such as numerator (`num`), denominator (`den`), product (`product`), summation set (`sumset`), and a fraction indicator (`fraction`). +#' @param topo igraph list object created with \code{igraph::topological.sort} and \code{igraph::get.vertex.attribute}. The topological ordering of the vertices in graph G. +#' @param G.unobs object created with \link{unobserved.graph(G)}. Separate graph that turns bidirected edges into explicit nodes for unobserved confounders. +#' @param G object created with \code{igraph::graph.formula()}. Main graph G. Includes bidirected edges. +#' @param G.obs object created with \link{observed.graph(G)}. Separate graph that does not contain bidirected edges (only contains the directed edges with observed nodes). +#' +#' @dependencies This function depends on several functions from the causaleffect package, including: \link{simplify} and \link{probability}. +#' +#' @return A parsed probability object, potentially with adjusted summation sets and children, or `NULL` if the expression can be fully simplified away. This output can be used as the `P` for \link{simplify}. +#' +#' @details +#' The function recursively processes the input probability object (`P`) by applying rules based on the topological order and the graph structures. The function handles fractions, products, and summation sets, simplifying the expression where possible. +#' +#' If the expression involves a fraction, the function attempts to cancel out terms and simplify both the numerator and the denominator. It also handles product terms by recursively parsing the children of the product and adjusting the summation sets accordingly. +#' +#' The function ultimately returns a simplified expression or `NULL` if the expression reduces entirely. +#' +#' #' @references Tikka, S., & Karvanen, J. (2017). Simplifying probabilistic expressions in causal inference. Journal of Machine Learning Research, 18(36), 1-30. +#' +#' @author Haley Hummel, +#' Psychology PhD student at Oregon State University +#' +#' @examples +#' \dontrun{ +#' +#'# defining graph information for G_1 using igraph +#' G_1 <- graph.formula(x -+ y, z -+ x, z -+ y , x -+ z, z -+ x, simplify = FALSE) +#' G_1 <- set.edge.attribute(graph = G_1, name = "description", index = c(4,5), value = "U") +#' +#' # defining observed nodes of graph G_1 using igraph +#' G_1.obs <- observed.graph(G_1) +#' +#' # defining unobserved nodes of graph G_1 using igraph +#' G_1.unobs <- unobserved.graph(G_1) +#' +#' # defining topological sort of graph G_1 using igraph +#' topo_1 <- igraph::topological.sort(G_1.obs) +#' topo_1 <- igraph::get.vertex.attribute(G_1, "name")[topo_1] +#' +#' # run causal.effect. simp = TRUE vs. simp = FALSE matters — as a simplification +#' # procedure is applied to the resulting probability object if simp = TRUE. +#' # d-separation and the rules of do-calculus are applied repeatedly to simplify +#' # the expression. The procedure is NOT applied if simp = FALSE. +#' # For this example, the outputs for simp = TRUE vs. simp = FALSE are the same. +#' +#' causal.effect("y", "x", G = G_1, expr = FALSE, simp = TRUE) +#' +#' # causal.effect generates a probability structure, which can then be applied to be the +#' # input of the function parse.expression. +#' # the initial probabilistic expression should be: ∑z P(y|z,x)P(z) +#' # the simplified expression should look like: ∑z P(y|z,x)P(z) +#' # The expr = FALSE is key to NOT printing a string (e.g. in the above 2 lines) to generate a longer output. +#' P_1 <- probability( +#' sumset = c("z"), +#' product = TRUE, +#' fraction = FALSE, +#' sum = FALSE, +#' children = list( +#' probability(var = "y", cond = c("z", "x")), +#' probability(var = "z", cond = character(0)) +#' ), +#' den = list(), +#' num = list(), +#' domain = 0, +#' weight = 0 +#' ) +#' +#' # now must define expected output from parse.expression +#' expected_output_1 <- probability( +#' sumset = "z", +#' product = TRUE, +#' fraction = FALSE, +#' sum = FALSE, +#' children = list( +#' probability(var = "y", cond = c("z", "x")), +#' probability(var = "z", cond = character(0)) +#' ), +#' den = list(), +#' num = list(), +#' domain = 0, +#' weight = 0 +#' ) +#' +#' parse.expression(P_1, topo_1, G_1.unobs, G_1, G_1.obs), expected_output_1) +#' +#' } +#' +#' @export + + parse.expression <- function(P, topo, G.unobs, G, G.obs) { + # Check if the expression is a fraction if (P$fraction) { + # If so, attempt to cancel out common terms in the fraction P <- cancel.out(P) + # If it's still a fraction after cancellation, recursively parse the denominator if (P$fraction) { P$den <- parse.expression(P$den, topo, G.unobs, G, G.obs) + # If the denominator becomes empty, update the summation set and children if (length(P$den) == 0) { sum_p <- P$sumset P <- P$num P$sumset <- union(sum_p, P$sumset) %ts% topo + # If the numerator is a product, update its children if (P$product) { if (length(P$children) == 1) { sum_p <- P$sumset @@ -16,6 +115,7 @@ parse.expression <- function(P, topo, G.unobs, G, G.obs) { } return(P) } + # Adjust the summation set if there are nodes that are independent of the denominator if (length(P$sumset) > 0 && length(P$den) > 0) { nodep <- setdiff(P$sumset, dependencies(P$den)) if (length(nodep) > 0) { @@ -23,19 +123,24 @@ parse.expression <- function(P, topo, G.unobs, G, G.obs) { P$sumset <- setdiff(P$sumset, nodep) %ts% topo } } + # Recursively parse the numerator and attempt to cancel out terms again P$num <- parse.expression(P$num, topo, G.unobs, G, G.obs) P <- cancel.out(P) } return(P) } + # Initialize flag to determine if terms should be simplified simplify_terms <- TRUE + # If the expression is a product of other expressions, then identify non-atomic children (products, sums, or fractions) if (P$product) { non_atomic <- sapply(P$children, FUN = function(x) (x$product || length(x$sumset) > 0 || x$fraction || x$sum)) + # If there are non-atomic children, parse them recursively if (sum(non_atomic) > 0) { parse_children <- P$children[non_atomic] P$children <- P$children[!non_atomic] for (i in 1:length(parse_children)) { P.parse <- parse.expression(parse_children[[i]], topo, G.unobs, G, G.obs) + # If the parsed child has a collapse flag, merge its children into the current expression if (!is.null(P.parse$collapse)) { P$children <- c(P$children, P.parse$children) } else { @@ -43,27 +148,34 @@ parse.expression <- function(P, topo, G.unobs, G, G.obs) { } } } + # If there are still non-atomic children after parsing, do NOT simplify terms if (length(P$children) > 0) { non_atomic <- sapply(P$children, FUN = function(x) (x$product || length(x$sumset) > 0 || x$fraction || x$sum)) if (sum(non_atomic) > 0) simplify_terms <- FALSE } else return(NULL) } + # If there are no variables left in the summation set, return the expression if (length(P$sumset) == 0) return(P) + # If the expression is not a product and the summation set matches the variables, return NULL if (!P$product) { if (identical(P$sumset, P$var)) return(NULL) else return(P) } + # If simplification is possible, order children and summation set, and simplify the expression if (simplify_terms) { ord.children <- order(unlist(lapply(P$children, FUN = function(x) which(topo == x$var))), decreasing = TRUE) ord.sum <- order(sapply(P$sumset, FUN = function(x) which(topo == x)), decreasing = TRUE) P$children <- P$children[ord.children] P$sumset <- P$sumset[ord.sum] P <- simplify(P, topo, G.unobs, G, G.obs) + # If all children have been simplified away, return NULL if (length(P$children) == 0) return(NULL) } + # Initialize a new probability object to hold any children that can be removed from summation P.parse <- probability(product = TRUE, children = list()) remove <- c() j <- 0 + # Iterate over children to identify those independent of the summation set if (length(P$sumset) > 0) { for (i in 1:length(P$children)) { dep <- dependencies(P$children[[i]]) @@ -73,10 +185,12 @@ parse.expression <- function(P, topo, G.unobs, G, G.obs) { } } } else return(P) + # If any children can be removed, add them to the new probability object if (j > 0) { P.parse$children <- P$children[remove] P.parse$collapse <- TRUE P$children <- P$children[-remove] + # If only one child remains, update the summation set and recursively parse the child if (length(P$sumset) > 0) { if (length(P$children) == 1) { sum_p <- P$sumset @@ -85,9 +199,9 @@ parse.expression <- function(P, topo, G.unobs, G, G.obs) { P <- parse.expression(P, topo, G.unobs, G, G.obs) } } + # Add the remaining child to the new probability object and return it if (length(P$children) > 0) P.parse$children[[j + 1]] <- P return(P.parse) } return(P) } - diff --git a/R/powerset.R b/R/powerset.R index f0279a1..b8c9042 100644 --- a/R/powerset.R +++ b/R/powerset.R @@ -1,7 +1,35 @@ +#' Powerset +#' +#' Generates the power set of a given set. The power set is the set of all possible subsets of the original set, including the empty set and the set itself. +#' +#' @param set A vector representing the original set for which the power set will be generated. The set can contain any type of elements (e.g., numeric, character, or logical). +#' +#' @details The function computes all possible combinations of the elements of the input set. This includes the empty subset, individual elements, and all larger subsets up to and including the full set. The number of subsets in the power set of a set of size \code{n} is \code{2^n}. +#' +#' @return A list of vectors, where each vector is a subset of the original input set. The list contains \code{2^n} subsets, where \code{n} is the length of the input set. If the input set is empty, the function returns a list containing only the empty set. +#' +#' @examples +#' +#' +#' @seealso \code{\link{join}} for using powerset with conditional independence in probabilistic graphical models. +#' +#' @keywords set theory combinatorics +#' @concept power set +#' @concept subsets + + powerset <- function(set) { n <- length(set) +# If the input set n is empty, return a list containing only the empty set if (n == 0) return(list(c())) +# Generate a representation of all possible combinations of elements being +# included or excluded from the subsets: all binary numbers from 0 to 2^n - 1. +# Then, convert them to logical vectors of length n. Each logical vector +# indicates which elements of the input set are included in a particular subset. indices <- sapply(0:(2^n-1), function(p) as.logical(intToBits(p)[1:n]), simplify = FALSE) +# Use these logical vectors to create subsets of the input set. Elements that +# correspond to TRUE values in the vector are extracted. Each logical +# vector corresponds to one possible subset. subsets <- lapply(indices, function(i) set[i]) return(subsets) -} \ No newline at end of file +} diff --git a/R/simplify.R b/R/simplify.R index a967d64..c4bdb5c 100644 --- a/R/simplify.R +++ b/R/simplify.R @@ -1,18 +1,124 @@ +#' Simplify +#' +#' This function algebraically simplifies probabilistic expressions given by the ID algorithm from \link{causal.effect}. It always attempts to perform maximal simplification, meaning that as many variables of the set are removed as possible. If the simplification in terms of the entire set cannot be completed, the intermediate result with as many variables simplified as possible should be returned. +#' +#' Run \link{causal.effect} with the graph information first, then use the output of \link{causal.effect} as the \code{P} in \link{parse.expression}. Use the output from \link{parse.expression} as the \code{P} in \code{simplify}. +#' +#' For further information, see Tikka & Karvanen (2017) "Simplifying Probabilistic Expressions in Causal Inference" Algorithm 1. +#' +#' @param P probability object created with \link{probability()}. The probabilistic expression that will be simplified. +#' @param topo igraph list object created with \code{igraph::topological.sort} and \code{igraph::get.vertex.attribute}. The topological ordering of the vertices in graph G. +#' @param G.unobs object created with \link{unobserved.graph(G)}. Separate graph that turns bidirected edges into explicit nodes for unobserved confounders. +#' @param G object created with \code{igraph::graph.formula()}. Main graph G. Includes bidirected edges. +#' @param G.obs object created with \link{observed.graph(G)}. Separate graph that does not contain bidirected edges (only contains the directed edges with observed nodes). +#' +#' @dependencies This function depends on several functions from the causaleffect package, including: \link{irrelevant}, \link{wrap.dSep}, \link{dSep}, \link{join}, \link{ancestors}, \link{factorize}, \link{parents}, \link{children}, and \link{powerset}. +#' +#' @return \code{simplify()} will return the simplified atomic expression in a list structure. For example (from example below): +#' \preformatted{ +#' $var: character(0) +#' +#' $cond: character(0) +#' +#' $sumset: [1] "z" +#' +#' $do: character(0) +#' +#' $product: [1] TRUE +#' +#' $fraction: [1] FALSE +#' +#' $sum: [1] FALSE +#' +#' $children: list() +#' +#' $den: list() +#' +#' $num: list() +#' +#' $domain: [1] 0 +#' +#' $weight: [1] 0 +#' +#' $attr(,"class"): [1] "probability"} +#' +#' +#' This long list structure can be converted into a string formatted in LaTeX syntax by the \code{get.expression} function. For example: +#' +#' +#' \preformatted{string_expression <- simplify(P, topo, G.unobs, G, G.obs) +#' get.expression(string_expression) +#' +#' The resulting string should look like (from example below): "\\sum_{w}P(y|w,x)P(w)"} +#' +#' @references Tikka, S., & Karvanen, J. (2017). Simplifying probabilistic expressions in causal inference. Journal of Machine Learning Research, 18(36), 1-30. +#' +#' @author Haley Hummel, +#' Psychology PhD student at Oregon State University +#' +#' @seealso \code{\link{causal.effect}}, \code{\link{parse.expression}}, \code{\link{get.expression}}, \code{\link{probability}} +#' +#' +#' @examples +#' \dontrun{ +#' # defining graph information for G_1 using igraph +#' G_1 <- graph.formula(x -+ y, z -+ x, z -+ y , x -+ z, z -+ x, simplify = FALSE) +#' G_1 <- set.edge.attribute(graph = G_1, name = "description", index = c(4,5), value = "U") +#' +#' # defining observed nodes of graph G_1 using igraph +#' G_1.obs <- observed.graph(G_1) +#' +#' # defining unobserved nodes of graph G_1 using igraph +#' G_1.unobs <- unobserved.graph(G_1) +#' +#' # defining topological sort of graph G_1 using igraph +#' topo_1 <- igraph::topological.sort(G_1.obs) +#' topo_1 <- igraph::get.vertex.attribute(G_1, "name")[topo_1] +#' +#' # run causal.effect. simp = TRUE vs. simp = FALSE matters — as a simplification +#' # procedure is applied to the resulting probability object if simp = TRUE. +#' # d-separation and the rules of do-calculus are applied repeatedly to simplify +#' # the expression. The procedure is NOT applied if simp = FALSE. +#' causal.effect("y", "x", G = G_1, expr = FALSE, simp = TRUE) +#' +#' # causal.effect generates a probability structure, which can then be applied to be the +#' # input of the function parse.expression. +#' parse.expression(causal_effect_output, topo_1, G_1.unobs, G_1, G_1.obs) +#' +#' # parse.expression generates a list structure, which can then be applied to be the +#' # input of the simplify function. +#' # call simplify function, which will print out the simplified list structure +#' simplify(parse_expression_output, topo_1, G_1.unobs, G_1, G_1.obs) +#' } +#' +#' @keywords models manip math utilities +#' @concept probabilistic expressions +#' @concept graph theory +#' @concept causal inference + + simplify <- function(P, topo, G.unobs, G, G.obs) { +# initialize j to 0 j <- 0 +# WHILE loop runs until all elements in P['sumset'] are processed while (j < length(P$sumset)) { +# make a copy of original expression P (P.orig) used to go back to original +# expression if simplification does not work P.orig <- P irl.len <- 0 irrel <- NULL terms <- list() vars <- sapply(P$children, "[[", "var") +# initialize all other variables j <- j + 1 i <- which(vars == P$sumset[j]) k <- 1 R.var <- character() + R.var <- character() R.cond <- list() J <- character() D <- character() +# remove irrelevant terms from expression to reduce complexity if (i > 1) { irrel <- irrelevant(P$children[1:(i-1)], P$sumset[j], P$sumset, G.unobs) irl.len <- length(irrel) @@ -23,8 +129,11 @@ simplify <- function(P, topo, G.unobs, G, G.obs) { vars <- vars[-irrel] } } +# topological sorting - separate variables into Missing (M) and Observed (O) M <- topo[!(topo %in% vars)] O <- topo[(topo %in% vars)] +# perform join operation (join components of expression to simplify). If successful, +# update the components accordingly. If fails, break loop & reset expression. while (k <= i) { joint <- join(J, D, P$children[[k]]$var, P$children[[k]]$cond, P$sumset[j], M, O, G.unobs, G, G.obs, topo) if (length(joint[[1]]) <= length(J)) { @@ -44,6 +153,8 @@ simplify <- function(P, topo, G.unobs, G, G.obs) { } } } +# if simplification possible, factorize expression using intermediate sets and +# update sumset.Check for further elimination of redundant terms using cancel(). if (k == i + 1) { P <- factorize(J, D, P, topo, i) S <- P$sumset[j] @@ -54,10 +165,11 @@ simplify <- function(P, topo, G.unobs, G, G.obs) { else { P <- P.cancel j <- 0 - } + } } else j <- 0 if (irl.len > 0) P$children <- c(terms, P$children) } else P <- P.orig } +# return simplified expression, or the original if simplification was not possible. return(P) -} \ No newline at end of file +} diff --git a/man/insert.Rd b/man/insert.Rd new file mode 100644 index 0000000..4454c03 --- /dev/null +++ b/man/insert.Rd @@ -0,0 +1,111 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/insert.R +\name{insert} +\alias{insert} +\title{Insert} +\usage{ +insert(J, D, M, cond, S, O, G.unobs, G, G.obs, topo) +} +\arguments{ +\item{J}{character vector. The set of variables representing the joint distribution.} + +\item{D}{character vector. The set of variables representing the conditioning set of the joint distribution.} + +\item{M}{character vector. The variable to be inserted.} + +\item{cond}{character vector. The set of conditioning variables.} + +\item{S}{character vector. The current summation variable.} + +\item{O}{character vector. The set of observed variables.} + +\item{G.unobs}{igraph object created with \code{igraph::unobserved.graph(G)}. Separate graph that turns bidirected edges into explicit nodes for unobserved confounders.} + +\item{G}{igraph object created with \code{igraph::graph.formula()}. Main graph G. Includes bidirected edges.} + +\item{G.obs}{igraph object created with \code{igraph::observed.graph(G)}. Separate graph that does not contain bidirected edges (only contains the directed edges with observed nodes).} + +\item{topo}{igraph list object created with \code{igraph::topological.sort} and \code{igraph::get.vertex.attribute}. The topological ordering of the vertices in graph G.} +} +\value{ +\code{Insert} returns a list with: +\code{J.new}{character vector. An updated set of joint distribution variables.} +\code{D.new}{character vector. An updated set of conditioning variables.} +\code{M}{character vector. The inserted variable.} +\code{ds[[i]]}{character vector. The subset from the power set used in the insertion.} +However, if no conditions were met, \code{insert} will return the original \code{J} and \code{D}. +} +\description{ +\code{Insert} will insert a missing variable into a joint distribution \eqn{P(J|D)} +using d-separation criteria in a given graph \code{G}. +It is called when there are variables without corresponding terms in the expression +} +\examples{ +\dontrun{ +# defining graph information for G_1 using igraph +G_1 <- graph.formula(x -+ y, z -+ x, z -+ y , x -+ z, z -+ x, simplify = FALSE) +G_1 <- set.edge.attribute(graph = G_1, name = "description", index = c(4,5), value = "U") + +# defining observed nodes of graph G_1 using igraph +G_1.obs <- observed.graph(G_1) + +# defining unobserved nodes of graph G_1 using igraph +G_1.unobs <- unobserved.graph(G_1) + +# defining topological sort of graph G_1 using igraph +topo_1 <- igraph::topological.sort(G_1.obs) +topo_1 <- igraph::get.vertex.attribute(G_1, "name")[topo_1] + +# we can obtain the following from running simplify(P_1, topo_1, G_1.unobs, G_1, +# G_1.obs) with break points (the browser() function). I added print statements +# after step #5 in simplify(): +# Step 6 - Inside nested while loop before join operation +# P$children[[k]]$var: y (this represents vari in simplify()) +# P$children[[k]]$cond: z x (this represents cond in simplify()) +# P$sumset[j]: z (this reprensents S in simplify()) + +J_1 <- character() +D_1 <- character() +M_1 <- "x" +cond_1 <- c("z", "x") +S_1 <- "z" +O_1 <- c("z", "y") + +# we can obtain the following from the graph information: +# G.unobs = G_1.unobs +# G = G_1 +# G.obs = G_1.obs +# topo = topo_1 + +# we expect the output from this (representing J, D) to be: +# [1] character(0) +# [2] character(0) + +insert(J_1, D_1, M_1, cond_1, S_1, O_1, G_1.unobs, G_1, G_1.obs, topo_1) +} + +} +\references{ +Tikka, S., & Karvanen, J. (2017). Simplifying probabilistic expressions in causal inference. Journal of Machine Learning Research, 18(36), 1-30. +} +\seealso{ +\code{\link{join}}, \code{\link{simplify}}, \code{\link{wrap.dSep}}, \code{\link{powerset}} +} +\author{ +Haley Hummel, +Psychology PhD student at Oregon State University +} +\concept{causal inference} +\concept{d-separation} +\concept{graph theory} +\concept{joint distribution} +\concept{probabilistic expressions} +\keyword{distribution} +\keyword{graphs} +\keyword{manip} +\keyword{math} +\keyword{methods} +\keyword{models} +\keyword{multivariate} +\keyword{probability} +\keyword{utilities} diff --git a/man/join.Rd b/man/join.Rd new file mode 100644 index 0000000..16aff8f --- /dev/null +++ b/man/join.Rd @@ -0,0 +1,111 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/join.R +\name{join} +\alias{join} +\title{Join} +\usage{ +join(J, D, vari, cond, S, M, O, G.unobs, G, G.obs, topo) +} +\arguments{ +\item{J}{character vector. Joint set \code{P(J|D)}; already processed and included in joint distribution +from previous \code{\link{simplify}} iteration. Initially, may be empty for the starting point of +the joint distribution. \code{vari} is added to expand it if d-separation conditions are met.} + +\item{D}{character vector. Term \code{P(V|C) := P(Vk|Ck)}; set of variables that condition the joint distribution. +\code{Join} checks and updates \code{D} as necessary to maintain the validity of the joint distribution +when combined with \code{vari}.} + +\item{vari}{character scalar. Current variable being considered for inclusion in the joint distribution.} + +\item{cond}{character vector. Set of variables that condition the current variable \code{vari}. \code{Join} uses \code{cond} +to evaluate conditional independence and determine if \code{vari} can be added to \code{J}.} + +\item{S}{likely a character vector. Not used directly in \code{join}. Current summation variable.} + +\item{M}{character vector. Missing variables (variables not contained within the expression).} + +\item{O}{character vector. Observed variables (variables contained within the expression).} + +\item{G.unobs}{igraph object created with \code{igraph::unobserved.graph(G)}. Separate graph that turns bidirected edges into explicit nodes for unobserved confounders.} + +\item{G}{igraph object created with \code{igraph::graph.formula()}. Main graph G. Includes bidirected edges.} + +\item{G.obs}{igraph object created with \code{igraph::observed.graph(G)}. Separate graph that does not contain bidirected edges (only contains the directed edges with observed nodes).} + +\item{topo}{igraph list object created with \code{igraph::topological.sort} and \code{igraph::get.vertex.attribute}. The topological ordering of the vertices in graph G.} +} +\value{ +\code{Join} returns the joint result, or the original result if none of the conditions for joining were met. +} +\description{ +\code{Join} and \code{insert} are essentially two variations of the underlying procedure of determining whether the terms of the atomic expression actually represent a joint distribution. \code{Join} is called when we are processing terms that already exist in the expression. +Attempts to combine two terms: the joint term \code{P(J|D)} obtained from \code{simplify()} and the +term \code{P(V|C) := P(Vk|Ck)} of the current iteration step. The goal is to +determine if these terms can be combined based on the d-separation criteria in the graph \code{G}. +} +\examples{ +\dontrun{ + +# defining graph information for G_1 using igraph +G_1 <- graph.formula(x -+ y, z -+ x, z -+ y , x -+ z, z -+ x, simplify = FALSE) +G_1 <- set.edge.attribute(graph = G_1, name = "description", index = c(4,5), value = "U") + +# defining observed nodes of graph G_1 using igraph +G_1.obs <- observed.graph(G_1) + +# defining unobserved nodes of graph G_1 using igraph +G_1.unobs <- unobserved.graph(G_1) + +# defining topological sort of graph G_1 using igraph +topo_1 <- igraph::topological.sort(G_1.obs) +topo_1 <- igraph::get.vertex.attribute(G_1, "name")[topo_1] + +# we can obtain the following from running simplify(P_1, topo_1, G_1.unobs, G_1, +# G_1.obs) with break points (the browser() function). I added print statements +# after step #5 in simplify(): + # Step 6 - Inside nested while loop before join operation + # P$children[[k]]$var: y (this represents vari in simplify()) + # P$children[[k]]$cond: z x (this represents cond in simplify()) + # P$sumset[j]: z (this reprensents S in simplify()) + +J_1 <- character() +D_1 <- character() +vari_1 <- "y" +cond_1 <- c("z", "x") +S_1 <- "z" +M_1 <- "x" +O_1 <- c("z", "y") + +# we can obtain the following from the graph information: +# G.unobs = G_1.unobs +# G = G_1 +# G.obs = G_1.obs +# topo = topo_1 + +# we expect the output from this to be: +# [1] "y" +# [2] "z" "x" + +join(J_1, D_1, vari_1, cond_1, S_1, M_1, O_1, G_1.unobs, G_1, G_1.obs, topo_1) +} + +} +\references{ +Tikka, S., & Karvanen, J. (2017). Simplifying probabilistic expressions in causal inference. Journal of Machine Learning Research, 18(36), 1-30. +} +\seealso{ +\code{\link{causal.effect}}, \code{\link{parse.expression}}, \code{\link{get.expression}}, \code{\link{probability}} + +\code{\link{simplify}}, \code{\link{wrap.dSep}}, \code{\link{insert}} +} +\author{ +Haley Hummel, +Psychology PhD student at Oregon State University +} +\concept{causal inference} +\concept{graph theory} +\concept{probabilistic expressions} +\keyword{manip} +\keyword{math} +\keyword{models} +\keyword{utilities} diff --git a/man/parse.expression.Rd b/man/parse.expression.Rd new file mode 100644 index 0000000..760402f --- /dev/null +++ b/man/parse.expression.Rd @@ -0,0 +1,104 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parse.expression.R +\name{parse.expression} +\alias{parse.expression} +\title{Parse.expression} +\usage{ +parse.expression(P, topo, G.unobs, G, G.obs) +} +\arguments{ +\item{P}{probability object. The identified probabilistic expression taken from the output of \code{`causal.effect`}. Typically includes components such as numerator (\code{num}), denominator (\code{den}), product (\code{product}), summation set (\code{sumset}), and a fraction indicator (\code{fraction}).} + +\item{topo}{igraph list object created with \code{igraph::topological.sort} and \code{igraph::get.vertex.attribute}. The topological ordering of the vertices in graph G.} + +\item{G.unobs}{object created with \link{unobserved.graph(G)}. Separate graph that turns bidirected edges into explicit nodes for unobserved confounders.} + +\item{G}{object created with \code{igraph::graph.formula()}. Main graph G. Includes bidirected edges.} + +\item{G.obs}{object created with \link{observed.graph(G)}. Separate graph that does not contain bidirected edges (only contains the directed edges with observed nodes).} +} +\value{ +A parsed probability object, potentially with adjusted summation sets and children, or \code{NULL} if the expression can be fully simplified away. This output can be used as the \code{P} for \link{simplify}. +} +\description{ +The \code{`parse.expression`} function takes a probabilistic expression and processes it based on the topological order, unobserved and observed graphs, and the underlying graph structure to simplify or modify the expression. +} +\details{ +The function recursively processes the input probability object (\code{P}) by applying rules based on the topological order and the graph structures. The function handles fractions, products, and summation sets, simplifying the expression where possible. + +If the expression involves a fraction, the function attempts to cancel out terms and simplify both the numerator and the denominator. It also handles product terms by recursively parsing the children of the product and adjusting the summation sets accordingly. + +The function ultimately returns a simplified expression or \code{NULL} if the expression reduces entirely. + +#' @references Tikka, S., & Karvanen, J. (2017). Simplifying probabilistic expressions in causal inference. Journal of Machine Learning Research, 18(36), 1-30. +} +\examples{ +\dontrun{ + +# defining graph information for G_1 using igraph +G_1 <- graph.formula(x -+ y, z -+ x, z -+ y , x -+ z, z -+ x, simplify = FALSE) +G_1 <- set.edge.attribute(graph = G_1, name = "description", index = c(4,5), value = "U") + +# defining observed nodes of graph G_1 using igraph +G_1.obs <- observed.graph(G_1) + +# defining unobserved nodes of graph G_1 using igraph +G_1.unobs <- unobserved.graph(G_1) + +# defining topological sort of graph G_1 using igraph +topo_1 <- igraph::topological.sort(G_1.obs) +topo_1 <- igraph::get.vertex.attribute(G_1, "name")[topo_1] + +# run causal.effect. simp = TRUE vs. simp = FALSE matters — as a simplification +# procedure is applied to the resulting probability object if simp = TRUE. +# d-separation and the rules of do-calculus are applied repeatedly to simplify +# the expression. The procedure is NOT applied if simp = FALSE. +# For this example, the outputs for simp = TRUE vs. simp = FALSE are the same. + +causal.effect("y", "x", G = G_1, expr = FALSE, simp = TRUE) + +# causal.effect generates a probability structure, which can then be applied to be the +# input of the function parse.expression. +# the initial probabilistic expression should be: ∑z P(y|z,x)P(z) +# the simplified expression should look like: ∑z P(y|z,x)P(z) +# The expr = FALSE is key to NOT printing a string (e.g. in the above 2 lines) to generate a longer output. +P_1 <- probability( + sumset = c("z"), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + probability(var = "y", cond = c("z", "x")), + probability(var = "z", cond = character(0)) + ), + den = list(), + num = list(), + domain = 0, + weight = 0 +) + +# now must define expected output from parse.expression +expected_output_1 <- probability( + sumset = "z", + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + probability(var = "y", cond = c("z", "x")), + probability(var = "z", cond = character(0)) + ), + den = list(), + num = list(), + domain = 0, + weight = 0 +) + +parse.expression(P_1, topo_1, G_1.unobs, G_1, G_1.obs), expected_output_1) + +} + +} +\author{ +Haley Hummel, +Psychology PhD student at Oregon State University +} diff --git a/man/powerset.Rd b/man/powerset.Rd new file mode 100644 index 0000000..d58b1f9 --- /dev/null +++ b/man/powerset.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/powerset.R +\name{powerset} +\alias{powerset} +\title{Powerset} +\usage{ +powerset(set) +} +\arguments{ +\item{set}{A vector representing the original set for which the power set will be generated. The set can contain any type of elements (e.g., numeric, character, or logical).} +} +\value{ +A list of vectors, where each vector is a subset of the original input set. The list contains \code{2^n} subsets, where \code{n} is the length of the input set. If the input set is empty, the function returns a list containing only the empty set. +} +\description{ +Generates the power set of a given set. The power set is the set of all possible subsets of the original set, including the empty set and the set itself. +} +\details{ +The function computes all possible combinations of the elements of the input set. This includes the empty subset, individual elements, and all larger subsets up to and including the full set. The number of subsets in the power set of a set of size \code{n} is \code{2^n}. +} +\seealso{ +\code{\link{join}} for using powerset with conditional independence in probabilistic graphical models. +} +\concept{power set} +\concept{subsets} +\keyword{combinatorics} +\keyword{set} +\keyword{theory} diff --git a/man/simplify.Rd b/man/simplify.Rd new file mode 100644 index 0000000..2841e82 --- /dev/null +++ b/man/simplify.Rd @@ -0,0 +1,113 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/simplify.R +\name{simplify} +\alias{simplify} +\title{Simplify} +\usage{ +simplify(P, topo, G.unobs, G, G.obs) +} +\arguments{ +\item{P}{probability object created with \link{probability()}. The probabilistic expression that will be simplified.} + +\item{topo}{igraph list object created with \code{igraph::topological.sort} and \code{igraph::get.vertex.attribute}. The topological ordering of the vertices in graph G.} + +\item{G.unobs}{object created with \link{unobserved.graph(G)}. Separate graph that turns bidirected edges into explicit nodes for unobserved confounders.} + +\item{G}{object created with \code{igraph::graph.formula()}. Main graph G. Includes bidirected edges.} + +\item{G.obs}{object created with \link{observed.graph(G)}. Separate graph that does not contain bidirected edges (only contains the directed edges with observed nodes).} +} +\value{ +\code{simplify()} will return the simplified atomic expression in a list structure. For example (from example below): +\preformatted{ + $var: character(0) + + $cond: character(0) + + $sumset: [1] "z" + + $do: character(0) + + $product: [1] TRUE + + $fraction: [1] FALSE + + $sum: [1] FALSE + + $children: list() + + $den: list() + + $num: list() + + $domain: [1] 0 + + $weight: [1] 0 + + $attr(,"class"): [1] "probability"} + +This long list structure can be converted into a string formatted in LaTeX syntax by the \code{get.expression} function. For example: + +\preformatted{string_expression <- simplify(P, topo, G.unobs, G, G.obs) +get.expression(string_expression) + +The resulting string should look like (from example below): "\\sum_{w}P(y|w,x)P(w)"} +} +\description{ +This function algebraically simplifies probabilistic expressions given by the ID algorithm from \link{causal.effect}. It always attempts to perform maximal simplification, meaning that as many variables of the set are removed as possible. If the simplification in terms of the entire set cannot be completed, the intermediate result with as many variables simplified as possible should be returned. +} +\details{ +Run \link{causal.effect} with the graph information first, then use the output of \link{causal.effect} as the \code{P} in \link{parse.expression}. Use the output from \link{parse.expression} as the \code{P} in \code{simplify}. + +For further information, see Tikka & Karvanen (2017) "Simplifying Probabilistic Expressions in Causal Inference" Algorithm 1. +} +\examples{ +\dontrun{ +# defining graph information for G_1 using igraph +G_1 <- graph.formula(x -+ y, z -+ x, z -+ y , x -+ z, z -+ x, simplify = FALSE) +G_1 <- set.edge.attribute(graph = G_1, name = "description", index = c(4,5), value = "U") + +# defining observed nodes of graph G_1 using igraph +G_1.obs <- observed.graph(G_1) + +# defining unobserved nodes of graph G_1 using igraph +G_1.unobs <- unobserved.graph(G_1) + +# defining topological sort of graph G_1 using igraph +topo_1 <- igraph::topological.sort(G_1.obs) +topo_1 <- igraph::get.vertex.attribute(G_1, "name")[topo_1] + +# run causal.effect. simp = TRUE vs. simp = FALSE matters — as a simplification +# procedure is applied to the resulting probability object if simp = TRUE. +# d-separation and the rules of do-calculus are applied repeatedly to simplify +# the expression. The procedure is NOT applied if simp = FALSE. +causal.effect("y", "x", G = G_1, expr = FALSE, simp = TRUE) + +# causal.effect generates a probability structure, which can then be applied to be the +# input of the function parse.expression. +parse.expression(causal_effect_output, topo_1, G_1.unobs, G_1, G_1.obs) + +# parse.expression generates a list structure, which can then be applied to be the +# input of the simplify function. +# call simplify function, which will print out the simplified list structure +simplify(parse_expression_output, topo_1, G_1.unobs, G_1, G_1.obs) +} + +} +\references{ +Tikka, S., & Karvanen, J. (2017). Simplifying probabilistic expressions in causal inference. Journal of Machine Learning Research, 18(36), 1-30. +} +\seealso{ +\code{\link{causal.effect}}, \code{\link{parse.expression}}, \code{\link{get.expression}}, \code{\link{probability}} +} +\author{ +Haley Hummel, +Psychology PhD student at Oregon State University +} +\concept{causal inference} +\concept{graph theory} +\concept{probabilistic expressions} +\keyword{manip} +\keyword{math} +\keyword{models} +\keyword{utilities} diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..0beb742 --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,12 @@ +# This file is part of the standard setup for testthat. +# It is recommended that you do not modify it. +# +# Where should you do additional test configuration? +# Learn more about the roles of various files in: +# * https://r-pkgs.org/testing-design.html#sec-tests-files-overview +# * https://testthat.r-lib.org/articles/special-files.html + +library(testthat) +library(causaleffect) + +test_check("causaleffect") diff --git a/tests/testthat/all_3_test_cases.R b/tests/testthat/all_3_test_cases.R new file mode 100644 index 0000000..78cfa21 --- /dev/null +++ b/tests/testthat/all_3_test_cases.R @@ -0,0 +1,2046 @@ +library(testthat) +library(igraph) +library(causaleffect) + +causal_effect_files <- list.files("~/Projects/causaleffect/R", pattern = "\\.R$", full.names = TRUE) +lapply(causal_effect_files, source) + +#------------------------------------------------------------------- +# test case #1 from pp. 6-7 of causaleffect on CRAN - includes unobserved confounders. +#------------------------------------------------------------------- +# unit tests for functions: +# (1) topo, +# (2) causal.effect with simp = FALSE, +# (3) causal.effect with simp = TRUE, +# (4) parse.expression (same for causal.effect simp = TRUE vs. FALSE; no need for duplicate unit tests), +# (5) simplify (same for causal.effect simp = TRUE vs. FALSE), +# (6) join (same for causal.effect simp = TRUE vs. FALSE) +# (7) insert (same for causal.effect simp = TRUE vs. FALSE) + +# causal.effect with simp = TRUE and simp = FALSE yield the same expression, so +# there are only 7 unit tests compared to 9 unit tests for test cases #2 and #3 + +#------------------------------------------------------------------- +# defining graphs, nodes, and topological ordering using igraph package +G_1 <- graph.formula(x -+ y, z -+ x, z -+ y , x -+ z, z -+ x, simplify = FALSE) +G_1 <- set.edge.attribute(graph = G_1, name = "description", index = c(4,5), value = "U") +G_1.obs <- observed.graph(G_1) +G_1.unobs <- unobserved.graph(G_1) +topo_1 <- igraph::topological.sort(G_1.obs) +topo_1 <- igraph::get.vertex.attribute(G_1, "name")[topo_1] + +print(topo_1) + +plot(G_1) +# ^^ plotting this gives us a bidirected edge, which represents a latent confounder we can see in unobserved.graph +plot(observed.graph(G_1.obs)) +plot(unobserved.graph(G_1.unobs)) +# ^^ unobserved.graph plots observed graph, plus unobserved node(s) + + +#------------------------------------------------------------------- +# (1) testing that topo works with test case #1 +# currently PASSES + +test_that("topo works on graph with unobserved confounders G_1", { + expect_equal(topo_1, c("z", "x", "y")) +}) + +#------------------------------------------------------------------- +# (2) testing that causal.effect works with test case #1 when simp = FALSE +# expression should NOT be simplified. +# currently PASSES + +test_that("causal.effect works on graph with unobserved confounders G_1", { + expect_equal(causal.effect("y", "x", G = G_1, simp = FALSE), + "\\sum_{z}P(y|z,x)P(z)") + +}) + +#------------------------------------------------------------------- +# (3) testing that causal.effect works with test case #1 when simp = TRUE +# expression should be the same, since it cannot be simplified. +# currently PASSES + +test_that("causal.effect works on graph with unobserved confounders G_1", { + expect_equal(causal.effect("y", "x", G = G_1, simp = TRUE), + "\\sum_{z}P(y|z,x)P(z)") +}) + +#------------------------------------------------------------------- +# (4) testing that parse.expression works with test case #1 +# causal.effect with simp = TRUE and simp = FALSE (they are the same) +# currently PASSES + +# define P_1 for parse.expression(). P needs to be a probability object. +# the initial probabilistic expression should be: ∑z P(y|z,x)P(z) +# the simplified expression should look like: ∑z P(y|z,x)P(z) + +# I used the output from causal.effect("y", "x", G = G_1, expr = FALSE, simp = TRUE). +# The expr = FALSE is key to NOT printing a string! +P_1 <- probability( + sumset = c("z"), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + probability(var = "y", cond = c("z", "x")), + probability(var = "z", cond = character(0)) + ), + den = list(), + num = list(), + domain = 0, + weight = 0 +) + + +# now must define expected output from parse.expression +expected_output_1 <- probability( + sumset = "z", + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + probability(var = "y", cond = c("z", "x")), + probability(var = "z", cond = character(0)) + ), + den = list(), + num = list(), + domain = 0, + weight = 0 +) + + +# now running testthat +test_that("parse.expression works on graph with unobserved confounders G_1", { + expect_equal(parse.expression(P_1, topo_1, G_1.unobs, G_1, G_1.obs), + expected_output_1) + +}) + +#------------------------------------------------------------------- +# (5) testing that simplify works with test case #1 +# currently PASSES + +# we can use the same P_1 and expected_output_1 as we used for parse.expression, as the expression +# passes through parse.expression unchanged. + +test_that("simplify works on graph with unobserved confounders G_1", { + expect_equal(simplify(P_1, topo_1, G_1.unobs, G_1, G_1.obs), + expected_output_1) +}) + +#------------------------------------------------------------------- +# (6) testing that join works with test case #1 +# currently PASSES + +# we can obtain the following from running simplify(P_1, topo_1, G_1.unobs, G_1, +# G_1.obs) with break points (the browser() function). I added print statements +# after step #5 in simplify(): +# Step 6 - Inside nested while loop before join operation +# P$children[[k]]$var: y (this represents vari in simplify()) +# P$children[[k]]$cond: z x (this represents cond in simplify()) +# P$sumset[j]: z (this reprensents S in simplify()) + +J_1 <- character() +D_1 <- character() +vari_1 <- "y" +cond_1 <- c("z", "x") +S_1 <- "z" +M_1 <- "x" +O_1 <- c("z", "y") + +# we can obtain the following from the graph information: +# G.unobs = G_1.unobs +# G = G_1 +# G.obs = G_1.obs +# topo = topo_1 + +# we expect the output from this to be: +# [1] "y" +# [2] "z" "x" + +join_output_1 <- list( + c("y"), + c("z", "x") +) + +test_that("join works on graph with unobserved confounders G_1", { + expect_equal(join(J_1, D_1, vari_1, cond_1, S_1, M_1, O_1, G_1.unobs, G_1, G_1.obs, topo_1), + join_output_1) +}) + +#------------------------------------------------------------------- +# (7) testing that insert works with test case #1 +# currently PASSES + +# we can obtain the following from running simplify(P_1, topo_1, G_1.unobs, G_1, +# G_1.obs) with break points (the browser() function). I added print statements +# after step #5 in simplify(): +# Step 6 - Inside nested while loop before join operation +# P$children[[k]]$cond: z x (this represents cond in simplify()) +# P$sumset[j]: z (this represents S in simplify()) + +J_1 <- character() +D_1 <- character() +M_1 <- "x" +cond_1 <- c("z", "x") +S_1 <- "z" +O_1 <- c("z", "y") + +# we can obtain the following from the graph information: +# G.unobs = G_1.unobs +# G = G_1 +# G.obs = G_1.obs +# topo = topo_1 + +# we expect the output from this (representing J, D) to be: +# [1] character(0) +# [2] character(0) + +insert_output_1 <- list(character(0), character(0)) + +test_that("insert works on graph with unobserved confounders G_1", { + expect_equal(insert(J_1, D_1, M_1, cond_1, S_1, O_1, G_1.unobs, G_1, G_1.obs, topo_1), + insert_output_1) +}) + + +#------------------------------------------------------------------- +# test case #2 from pp. 6-7 of causaleffect on CRAN - pruning. +#------------------------------------------------------------------- +# unit tests for functions: +# (1) topo, +# (2) causal.effect with simp = FALSE, +# (3) parse.expression from causal.effect simp = FALSE, +# (4) simplify from causal.effect simp = FALSE, +# (5) causal.effect with simp = TRUE, +# (6) parse.expression from causal.effect simp = TRUE, +# (7) simplify from causal.effect simp = TRUE +# (8) DOES NOT PASS YET - join (same for causal.effect simp = TRUE vs. FALSE; no need for duplicate unit tests) +# (9) DOES NOT PASS YET - insert (same for causal.effect simp = TRUE vs. FALSE; no need for duplicate unit tests) + +#------------------------------------------------------------------- +# defining graphs, nodes, and topological ordering using igraph package + +G_2 <- graph.formula(x -+ z_4, z_4 -+ y, z_1 -+ x, z_2 -+ z_1, + z_3 -+ z_2, z_3 -+ x, z_5 -+ z_1, z_5 -+ z_4, x -+ z_2, z_2 -+ x, + z_3 -+ z_2, z_2 -+ z_3, z_2 -+ y, y -+ z_2, + z_4 -+ y, y -+ z_4, z_5 -+ z_4, z_4 -+ z_5, simplify = FALSE) +G_2 <- set.edge.attribute(graph = G_2, "description", 9:18, "U") +G_2.obs <- observed.graph(G_2) +G_2.unobs <- unobserved.graph(G_2) +topo_2 <- igraph::topo_sort(G_2.obs) +topo_2 <- igraph::get.vertex.attribute(G_2, "name")[topo_2] + +print(topo_2) + +plot(G_2) +# ^^ plotting this gives us a bidirected edge, which represents a latent confounder we can see in unobserved.graph +plot(observed.graph(G_2.obs)) +plot(unobserved.graph(G_2.unobs)) +# ^^ unobserved.graph plots observed graph, plus unobserved node(s) + + +#------------------------------------------------------------------- +# (1) testing that topo works with test case #2 +# currently PASSES + +test_that("topo works on graph with pruning G_2", { + expect_equal(topo_2, c("z_3", "z_5", "z_2", "z_1", "x", "z_4", "y")) +}) + + +#------------------------------------------------------------------- +# (2) testing that causal.effect works with test case #2 when simp = FALSE +# expression should NOT be simplified. +# currently PASSES + +test_that("causal.effect works on graph with pruning G_2", { + expect_equal(causal.effect("y", "x", G = G_2, primes = TRUE, prune = TRUE, simp = FALSE), + "\\frac{\\sum_{z_3,z_5,z_2,z_4}P(y|z_3,z_5,z_2,z_1,x,z_4)P(z_4|z_3,z_5,z_2,z_1,x)P(x|z_3,z_5,z_2,z_1)P(z_2|z_3,z_5)P(z_5|z_3)P(z_3)}{\\sum_{z_3,z_5,z_2,z_4,y^{\\prime}}P(y^{\\prime}|z_3,z_5,z_2,z_1,x,z_4)P(z_4|z_3,z_5,z_2,z_1,x)P(x|z_3,z_5,z_2,z_1)P(z_2|z_3,z_5)P(z_5|z_3)P(z_3)}") + +}) + +#------------------------------------------------------------------- +# (3) testing that parse.expression works with test case #2 +# causal.effect with simp = FALSE +# currently PASSES + +# Trying to do set.primes before parse.expression +vars <- c("z_3", "z_5", "z_2", "z_1", "x", "z_4", "y") +counter <- setNames(rep(0, length(vars)), vars) +counter["y"] <- 2 +set.primes(vars, FALSE, counter) + + +# define P_2 for parse.expression() using the output from +# causal.effect("y", "x", G = G_2, expr = FALSE, primes = TRUE, prune = TRUE, simp = TRUE). +# expr = FALSE and simp = TRUE +# the initial probabilistic expression should be: +# \\frac{\\sum_{z_3,z_5,z_2,z_4}P(y|z_3,z_5,z_2,z_1,x,z_4)P(z_4|z_3,z_5,z_2,z_1,x)P(x|z_3,z_5,z_2,z_1)P(z_2|z_3,z_5)P(z_5|z_3)P(z_3)} +# {\\sum_{z_3,z_5,z_2,z_4,y^{\\prime}}P(y^{\\prime}|z_3,z_5,z_2,z_1,x,z_4)P(z_4|z_3,z_5,z_2,z_1,x)P(x|z_3,z_5,z_2,z_1)P(z_2|z_3,z_5)P(z_5|z_3)P(z_3)} + +P_2_pe1 <- list( + var = character(0), + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = TRUE, + sum = FALSE, + children = list(), + den = list( + var = character(0), + cond = character(0), + sumset = c("z_2"), + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + list( + var = "x", + cond = c("z_1", "z_2"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_2", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + num = list( + var = character(0), + cond = character(0), + sumset = c("z_2", "z_5"), + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + list( + var = "y", + cond = c("x", "z_1", "z_2", "z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "x", + cond = c("z_1", "z_2", "z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_2", + cond = "z_5", + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_5", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + domain = 0, + weight = 0, + class = "probability", + algorithm = "pid", + query = list(y = "y", x = "x", z = NULL) +) + + +# must define expected output object to match output from parse.expression: +# Provided R structure (simplified) +expected_output_2_pe1 <- list( + var = character(0), + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = TRUE, + sum = FALSE, + children = list(), + den = list( + var = character(0), + cond = character(0), + sumset = c("z_2"), + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + list( + var = "x", + cond = c("z_1", "z_2"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_2", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + num = list( + var = character(0), + cond = character(0), + sumset = c("z_2", "z_5"), + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + list( + var = "y", + cond = c("x", "z_1", "z_2", "z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "x", + cond = c("z_1", "z_2", "z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_2", + cond = c("z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_5", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + domain = 0, + weight = 0, + class = "probability", + algorithm = "pid", + query = list(y = "y", x = "x", z = NULL) +) + + +# now running testthat +test_that("parse.expression works on graph with pruning G_2", { + expect_equal(parse.expression(P_2_pe1, topo_2, G_2.unobs, G_2, G_2.obs), + expected_output_2_pe1) + +}) + +#------------------------------------------------------------------- +# (4) testing that simplify works with test case #2 +# causal.effect with simp = FALSE +# currently PASSES + +# the simplified expression should look like: +# \\frac{\\sum_{z_2,z_5}P(y|x,z_1,z_2,z_5)P(x|z_1,z_2,z_5)P(z_2|z_5)P(z_5)}{\\sum_{z_2}P(x|z_1,z_2)P(z_2)} +P_2_s1 <- list( + var = character(0), + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = TRUE, + sum = FALSE, + children = list(), + den = list( + var = character(0), + cond = character(0), + sumset = c("z_2"), + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + list( + var = "x", + cond = c("z_1", "z_2"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_2", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + num = list( + var = character(0), + cond = character(0), + sumset = c("z_2", "z_5"), + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + list( + var = "y", + cond = c("x", "z_1", "z_2", "z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "x", + cond = c("z_1", "z_2", "z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_2", + cond = c("z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_5", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + domain = 0, + weight = 0, + class = "probability", + algorithm = "pid", + query = list(y = "y", x = "x", z = NULL) +) + + +# now must define the expected output object for simplify() +expected_output_2_s1 <- list( + var = character(0), + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = TRUE, + sum = FALSE, + children = list(), + den = list( + var = character(0), + cond = character(0), + sumset = c("z_2"), + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + list( + var = "x", + cond = c("z_1", "z_2"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_2", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + num = list( + var = character(0), + cond = character(0), + sumset = c("z_2", "z_5"), + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + list( + var = "y", + cond = c("x", "z_1", "z_2", "z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "x", + cond = c("z_1", "z_2", "z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_2", + cond = "z_5", + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_5", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + domain = 0, + weight = 0, + class = "probability", + algorithm = "pid", + query = list(y = "y", x = "x", z = NULL) +) + + +# now running testthat +test_that("simplify works on graph with pruning G_2", { + expect_equal(simplify(P_2_s1, topo_2, G_2.unobs, G_2, G_2.obs), + expected_output_2_s1) +}) + +#------------------------------------------------------------------- +# (5) testing that causal.effect works with test case #2 when simp = TRUE +# expression should be simplified. +# currently PASSES + +test_that("causal.effect works on graph with pruning G_2", { + expect_equal(causal.effect("y", "x", G = G_2, primes = TRUE, prune = TRUE, simp = TRUE), + "\\frac{\\sum_{z_2,z_5}P(y|x,z_1,z_2,z_5)P(x|z_1,z_2,z_5)P(z_2|z_5)P(z_5)}{\\sum_{z_2}P(x|z_1,z_2)P(z_2)}") +}) + +#------------------------------------------------------------------- +# (6) testing that parse.expression works with test case #2 +# causal.effect with simp = TRUE +# currently PASSES + + +# Trying to do set.primes before parse.expression +vars <- c("z_3", "z_5", "z_2", "z_1", "x", "z_4", "y") +counter <- setNames(rep(0, length(vars)), vars) +counter["y"] <- 2 +set.primes(vars, FALSE, counter) + + +# define P_2 for parse.expression() using the output from +# causal.effect("y", "x", G = G_2, expr = FALSE, primes = TRUE, prune = TRUE, simp = TRUE). +# expr = FALSE and simp = TRUE +# the initial probabilistic expression should be: +# \\frac{\\sum_{z_2,z_5}P(y|x,z_1,z_2,z_5)P(x|z_1,z_2,z_5)P(z_2|z_5)P(z_5)}{\\sum_{z_2}P(x|z_1,z_2)P(z_2)} + +P_2_pe2 <- list( + var = character(0), + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = TRUE, + sum = FALSE, + children = list(), + den = list( + var = character(0), + cond = character(0), + sumset = c("z_2"), + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + list( + var = "x", + cond = c("z_1", "z_2"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_2", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + num = list( + var = character(0), + cond = character(0), + sumset = c("z_2", "z_5"), + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + list( + var = "y", + cond = c("x", "z_1", "z_2", "z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "x", + cond = c("z_1", "z_2", "z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_2", + cond = "z_5", + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_5", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + domain = 0, + weight = 0, + class = "probability", + algorithm = "pid", + query = list(y = "y", x = "x", z = NULL) +) + + +# must define expected output object to match output from parse.expression: +expected_output_2_pe2 <- list( + var = character(0), + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = TRUE, + sum = FALSE, + children = list(), + den = list( + var = character(0), + cond = character(0), + sumset = c("z_2"), + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + list( + var = "x", + cond = c("z_1", "z_2"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_2", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + num = list( + var = character(0), + cond = character(0), + sumset = c("z_2", "z_5"), + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + list( + var = "y", + cond = c("x", "z_1", "z_2", "z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "x", + cond = c("z_1", "z_2", "z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_2", + cond = c("z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_5", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + domain = 0, + weight = 0, + class = "probability", + algorithm = "pid", + query = list(y = "y", x = "x", z = NULL) +) + + +# now running testthat +test_that("parse.expression works on graph with pruning G_2", { + expect_equal(parse.expression(P_2_pe2, topo_2, G_2.unobs, G_2, G_2.obs), + expected_output_2_pe2) + +}) + + +#------------------------------------------------------------------- +# (7) testing that simplify works with test case #2 +# causal.effect with simp = TRUE +# currently PASSES + +# the simplified expression should look like: +#"\frac{\sum_{z_2,z_5}P(y|x,z_1,z_2,z_5)P(x|z_1,z_2,z_5)P(z_2|z_5)P(z_5)}{\sum_{z_2}P(x|z_1,z_2)P(z_2)}" +P_2_s2 <- list( + var = character(0), + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = TRUE, + sum = FALSE, + children = list(), + den = list( + var = character(0), + cond = character(0), + sumset = c("z_2"), + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + list( + var = "x", + cond = c("z_1", "z_2"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_2", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + num = list( + var = character(0), + cond = character(0), + sumset = c("z_2", "z_5"), + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + list( + var = "y", + cond = c("x", "z_1", "z_2", "z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "x", + cond = c("z_1", "z_2", "z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_2", + cond = c("z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_5", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + domain = 0, + weight = 0, + class = "probability", + algorithm = "pid", + query = list(y = "y", x = "x", z = NULL) +) + + +# now must define the expected output object for simplify() +expected_output_2_s2 <- list( + var = character(0), + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = TRUE, + sum = FALSE, + children = list(), + den = list( + var = character(0), + cond = character(0), + sumset = c("z_2"), + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + list( + var = "x", + cond = c("z_1", "z_2"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_2", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + num = list( + var = character(0), + cond = character(0), + sumset = c("z_2", "z_5"), + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + list( + var = "y", + cond = c("x", "z_1", "z_2", "z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "x", + cond = c("z_1", "z_2", "z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_2", + cond = "z_5", + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_5", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + domain = 0, + weight = 0, + class = "probability", + algorithm = "pid", + query = list(y = "y", x = "x", z = NULL) +) + + +# now running testthat +test_that("simplify works on graph with pruning G_2", { + expect_equal(simplify(P_2_s2, topo_2, G_2.unobs, G_2, G_2.obs), + expected_output_2_s2) +}) + +#------------------------------------------------------------------- +# (8) testing that join works with test case #2 +# produces identical results with simp = TRUE vs. simp = FALSE +# (no need for duplicate unit tests) + +# we can obtain the following from running simplify(P_2_s1 (or s2), topo_2, G_2.unobs, G_2, G_2.obs) with break points +# (the browser() function). I added print statements +# after step #5 in simplify(): +# Step 6 - Inside nested while loop before join operation +# P$children[[k]]$var: y (this represents vari in simplify()) +# P$children[[k]]$cond: w x (this represents cond in simplify()) +# P$sumset[j]: w (this reprensents S in simplify()) + +simplify(P_2_s2, topo_2, G_2.unobs, G_2, G_2.obs) + +J_2 <- character(0) +D_2 <- character(0) +vari_2 <- "y" +cond_2 <- c("w", "x") +S_2 <- "w" +M_2 <- c("x", "z") +O_2 <- c("w", "y") + +# we can obtain the following from the graph information: +# G.unobs = G_2.unobs +# G = G_2 +# G.obs = G_2.obs +# topo = topo_2 + +# we expect the output from this to be: + + +join_output_2_s2 <- + + test_that("join works on graph with pruning G_2 with simp = TRUE", { + expect_equal(join(J_2_s2, D_2_s2, vari_2_s2, cond_2_s2, S_2_s2, M_2_s2, O_2_s2, G_2.unobs, G_2, G_2.obs, topo_2), + join_output_2_s2) + }) + +#------------------------------------------------------------------- +# (9) testing that insert works with test case #2 +# produces identical results with simp = TRUE vs. simp = FALSE +# (no need for duplicate unit tests) + +# we can obtain the following from running simplify(P_2_s1 (or s2), topo_3, G_3.unobs, G_3, G_3.obs) with break points +# (the browser() function). I added print statements +# after step #5 in simplify(): +# Step 6 - Inside nested while loop before join operation +# P$children[[k]]$var: y (this represents vari in simplify()) +# P$children[[k]]$cond: w x (this represents cond in simplify()) +# P$sumset[j]: w (this reprensents S in simplify()) + +J_2 <- character(0) +D_2 <- character(0) +M_2 <- c("x", "z") +cond_2 <- c("w", "x") +S_2 <- "w" +O_2 <- c("w", "y") + +# we can obtain the following from the graph information: +# G.unobs = G_2.unobs +# G = G_2 +# G.obs = G_2.obs +# topo = topo_2 + +# we expect the output from this (representing J, D) to be: + +insert_output_3 <- list(character(0), character(0)) + +test_that("insert works on simple observed graph G_3 with simp = FALSE", { + expect_equal(insert(J_3, D_3, M_3, cond_3, S_3, O_3, G_3.unobs, G_3, G_3.obs, topo_3), + insert_output_3) +}) + + +#------------------------------------------------------------------- +# test case #3 from pp. 6-7 of causaleffect on CRAN - only observed variables +#------------------------------------------------------------------- +# unit tests for functions: +# (1) topo, +# (2) causal.effect with simp = FALSE, +# (3) parse.expression from causal.effect simp = FALSE, +# (4) simplify from causal.effect simp = FALSE, +# (5) causal.effect with simp = TRUE, +# (6) parse.expression from causal.effect simp = TRUE, +# (7) simplify from causal.effect simp = TRUE, +# (8) join (same for causal.effect simp = TRUE vs. FALSE; no need for duplicate unit tests) +# (9) insert (same for causal.effect simp = TRUE vs. FALSE; no need for duplicate unit tests) + +#------------------------------------------------------------------- +# defining graphs, nodes, and topological ordering using igraph package + +G_3 <- graph.formula(x -+ y, w -+ x, w -+ z, z -+ y) +G_3.obs <- observed.graph(G_3) +G_3.unobs <- unobserved.graph(G_3) +topo_3 <- igraph::topological.sort(G_3.obs) +topo_3 <- igraph::get.vertex.attribute(G_3, "name")[topo_3] + +plot(G_3) +plot(G_3.obs) +plot(G_3.unobs) + +#------------------------------------------------------------------- +# (1) testing that topo works with test case #3. +# currently PASSES + +test_that("topo works on simple observed graph G_3", { + expect_equal(topo_3, c("w", "x", "z", "y")) +}) + +#------------------------------------------------------------------- +# (2) testing that causal.effect works with test case #3 when simp = FALSE +# expression should NOT be simplified. +# currently PASSES + +test_that("causal.effect works on simple observed graph G_3", { + expect_equal(causal.effect("y", "x", G = G_3, simp = FALSE), + "\\sum_{w,z}P(y|w,x,z)P(z|w)P(w)") + +}) + +#------------------------------------------------------------------- +# (3) testing that parse.expression works with test case #3 +# causal.effect simp = FALSE +# currently PASSES + +# define P_3_pe1 for parse.expression() using the output from causal.effect with +# expr = FALSE and simp = FALSE +# P needs to be a probability object. +# the initial probabilistic expression should be: ∑w,z P(y∣w,x,z)P(z∣w)P(w). +# the simplified expression should look like: ∑w P(y∣w,x)P(w) +P_3_pe1 <- probability( + sumset = c("w", "z"), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + probability(var = "y", cond = c("w", "x", "z")), + probability(var = "z", cond = c("w")), + probability(var = "w", cond = character(0)) + ), + den = list(), + num = list(), + domain = 0, + weight = 0 +) + +# must define expected output object to match output from parse.expression: ∑w P(y|w,x)P(w) +expected_output_3_pe1 <- probability( + sumset = "w", + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + probability(var = "y", cond = c("w", "x")), + probability(var = "w", cond = character(0)) + ), + den = list(), + num = list(), + domain = 0, + weight = 0 +) + +# now running testthat +test_that("parse.expression works on simple observed graph G_3", { + expect_equal(parse.expression(P_3_pe1, topo_3, G_3.unobs, G_3, G_3.obs), + expected_output_3_pe1) + +}) + +#------------------------------------------------------------------- +# (4) testing that simplify works with test case #3 +# causal.effect with simp = FALSE +# currently PASSES + +# define P_3_s1 for simplify() using the output of parse.expression. +# P needs to be a list object. +# the simplified expression should look like: ∑w P(y∣w,x)P(w) +P_3_s1 <- list( + var = character(0), + cond = character(0), + sumset = "w", + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + structure( + list( + var = "y", + cond = c("w", "x"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0 + ), + class = "probability" + ), + structure( + list( + var = "w", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0 + ), + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0 +) +attr(P_3_s1, "class") <- "probability" + +# must define expected output object to match output from simplify: ∑w P(y|w,x)P(w) +expected_output_3_s1 <- probability( + sumset = "w", + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + probability(var = "y", cond = c("w", "x")), + probability(var = "w", cond = character(0)) + ), + den = list(), + num = list(), + domain = 0, + weight = 0 +) +attr(expected_output_3_s1, "class") <- "probability" + +#now running testthat +test_that("simplify works on simple observed graph G_3", { + expect_equal(simplify(P_3_s1, topo_3, G_3.unobs, G_3, G_3.obs), + expected_output_3_s1) +}) + +#------------------------------------------------------------------- +# (5) testing that causal.effect works with test case #3 when simp = TRUE +# expression should be simplified. +# currently PASSES + +test_that("causal.effect works on simple observed graph G_3", { + expect_equal(causal.effect("y", "x", G = G_3, simp = TRUE), + "\\sum_{w}P(y|w,x)P(w)") +}) + +#------------------------------------------------------------------- +# (6) testing that parse.expression works with test case #3 +# causal.effect simp = TRUE +# currently PASSES + +# define P_3_pe2 for parse.expression() using the output from causal.effect with +# expr = FALSE and simp = TRUE +# P needs to be a probability object. +# the initial probabilistic expression should be: ∑w P(y|w,x)P(w) +# the simplified expression should look like: P(y∣w,x)P(w) +P_3_pe2 <- list( + var = character(0), + cond = character(0), + sumset = "w", + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + structure( + list( + var = "y", + cond = c("w", "x"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0 + ), + class = "probability" + ), + structure( + list( + var = "w", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0 + ), + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0 +) + +# must define expected output object to match output from parse.expression: P(y∣w,x)P(w) +expected_output_3_pe2 <- list( + var = character(0), + cond = character(0), + sumset = "w", + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + structure( + list( + var = "y", + cond = c("w", "x"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0 + ), + class = "probability" + ), + structure( + list( + var = "w", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0 + ), + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0 +) + +# now running testthat +test_that("parse.expression works on simple observed graph G_3", { + expect_equal(parse.expression(P_3_pe2, topo_3, G_3.unobs, G_3, G_3.obs), + expected_output_3_pe2) + +}) + +#------------------------------------------------------------------- +# (7) testing that simplify works with test case #3 +# causal.effect with simp = TRUE +# currently PASSES + +# define P_3_s2 for simplify() using the output of parse.expression. +# P needs to be a list object. +# the simplified expression should look like: P(y∣w,x)P(w) +P_3_s2 <- list( + var = character(0), + cond = character(0), + sumset = "w", + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + structure( + list( + var = "y", + cond = c("w", "x"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0 + ), + class = "probability" + ), + structure( + list( + var = "w", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0 + ), + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0 +) +attr(P_3_s2, "class") <- "probability" + +# must define expected output object to match output from simplify: P(y|w,x)P(w) +expected_output_3_s2 <- list( + var = character(0), + cond = character(0), + sumset = "w", + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + structure( + list( + var = "y", + cond = c("w", "x"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0 + ), + class = "probability" + ), + structure( + list( + var = "w", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0 + ), + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0 +) +attr(expected_output_3_s2, "class") <- "probability" + +# now running testthat +test_that("simplify works on simple observed graph G_3", { + expect_equal(simplify(P_3_s2, topo_3, G_3.unobs, G_3, G_3.obs), + expected_output_3_s2) +}) + +#------------------------------------------------------------------- +# (8) testing that join works with test case #3 +# produces identical results with simp = TRUE vs. simp = FALSE +# (no need for duplicate unit tests) +# currently PASSES + +# we can obtain the following from running simplify(P_3_s1 (or s2), topo_3, G_3.unobs, G_3, G_3.obs) with break points +# (the browser() function). I added print statements +# after step #5 in simplify(): +# Step 6 - Inside nested while loop before join operation +# P$children[[k]]$var: y (this represents vari in simplify()) +# P$children[[k]]$cond: w x (this represents cond in simplify()) +# P$sumset[j]: w (this reprensents S in simplify()) + +J_3 <- character(0) +D_3 <- character(0) +vari_3 <- "y" +cond_3 <- c("w", "x") +S_3 <- "w" +M_3 <- c("x", "z") +O_3 <- c("w", "y") + +# we can obtain the following from the graph information: +# G.unobs = G_3.unobs +# G = G_3 +# G.obs = G_3.obs +# topo = topo_3 + +# we expect the output from this to be: +# [1] "y" +# [2] "w" "x" + +join_output_3 <- list( + c("y"), + c("w", "x") +) + +test_that("join works on simple observed graph G_3 with simp = FALSE", { + expect_equal(join(J_3, D_3, vari_3, cond_3, S_3, M_3, O_3, G_3.unobs, G_3, G_3.obs, topo_3), + join_output_3) +}) + +#------------------------------------------------------------------- +# (9) testing that insert works with test case #3 +# produces identical results with simp = TRUE vs. simp = FALSE +# (no need for duplicate unit tests) +# currently PASSES + +# we can obtain the following from running simplify(P_3_s1 (or s2), topo_3, G_3.unobs, G_3, G_3.obs) with break points +# (the browser() function). I added print statements +# after step #5 in simplify(): +# Step 6 - Inside nested while loop before join operation +# P$children[[k]]$var: y (this represents vari in simplify()) +# P$children[[k]]$cond: w x (this represents cond in simplify()) +# P$sumset[j]: w (this reprensents S in simplify()) + +J_3 <- character(0) +D_3 <- character(0) +M_3 <- c("x", "z") +cond_3 <- c("w", "x") +S_3 <- "w" +O_3 <- c("w", "y") + +# we can obtain the following from the graph information: +# G.unobs = G_3.unobs +# G = G_3 +# G.obs = G_3.obs +# topo = topo_3 + +# we expect the output from this (representing J, D) to be: +# [1] character(0) +# [2] character(0) + +insert_output_3 <- list(character(0), character(0)) + +test_that("insert works on simple observed graph G_3 with simp = FALSE", { + expect_equal(insert(J_3, D_3, M_3, cond_3, S_3, O_3, G_3.unobs, G_3, G_3.obs, topo_3), + insert_output_3) +}) diff --git a/tests/testthat/simplify_with_breakpoints.R b/tests/testthat/simplify_with_breakpoints.R new file mode 100644 index 0000000..725c361 --- /dev/null +++ b/tests/testthat/simplify_with_breakpoints.R @@ -0,0 +1,122 @@ +# This file was used to create `join` and `insert` unit tests for Vignettes #1, #2, and #3. + # for example, I ran this code, then ran: `simplify(P_3_s1 (or s2), topo_3, G_3.unobs, G_3, G_3.obs)` with break points + # (the browser() function). I added print statements after step #5 in simplify() + # Example output: + # Step 6 - Inside nested while loop before join operation + # P$children[[k]]$var: y (this represents vari in simplify()) + # P$children[[k]]$cond: w x (this represents cond in simplify()) + # P$sumset[j]: w (this reprensents S in simplify()) + +simplify <- function(P, topo, G.unobs, G, G.obs) { + step <- 0 # Initialize a step counter + + step <- step + 1 + cat("Step", step, "- Initialize j\n") + j <- 0 + browser() # Breakpoint 1: At the start of the function + + while (j < length(P$sumset)) { + step <- step + 1 + cat("Step", step, "- Start of while loop\n") + browser() # Breakpoint 2: At the start of the while loop + + P.orig <- P + irl.len <- 0 + irrel <- NULL + terms <- list() + vars <- sapply(P$children, "[[", "var") + + j <- j + 1 + i <- which(vars == P$sumset[j]) + k <- 1 + R.var <- character() + R.cond <- list() + J <- character() + D <- character() + + step <- step + 1 + cat("Step", step, "- After initialization of variables\n") + browser() # Breakpoint 3: After initialization of variables + + if (i > 1) { + irrel <- irrelevant(P$children[1:(i-1)], P$sumset[j], P$sumset, G.unobs) + irl.len <- length(irrel) + if (irl.len > 0) { + i <- i - irl.len + terms <- P$children[irrel] + P$children[irrel] <- NULL + vars <- vars[-irrel] + } + } + + step <- step + 1 + cat("Step", step, "- After removing irrelevant terms\n") + browser() # Breakpoint 4: After removing irrelevant terms + + M <- topo[!(topo %in% vars)] + O <- topo[(topo %in% vars)] + + step <- step + 1 + cat("Step", step, "- After topological sorting\n") + cat("M:", M, "\n") + cat("O:", O, "\n") + browser() # Breakpoint 5: After topological sorting + + while (k <= i) { + step <- step + 1 + cat("Step", step, "- Inside nested while loop before join operation\n") + cat("P$children[[k]]$var:", P$children[[k]]$var, "\n") + cat("P$children[[k]]$cond:", P$children[[k]]$cond, "\n") + cat("P$sumset[j]:", P$sumset[j], "\n") + browser() # Breakpoint 6: Before join operation + + joint <- join(J, D, P$children[[k]]$var, P$children[[k]]$cond, P$sumset[j], M, O, G.unobs, G, G.obs, topo) + + cat("Step", step, "- Inside nested while loop after join operation\n") + browser() # Breakpoint 7: Inside the nested while loop after join operation + + if (length(joint[[1]]) <= length(J)) { + J <- character() + D <- character() + k <- 1 + break + } else { + J <- joint[[1]] + D <- joint[[2]] + if (length(joint) > 2) { + R.var <- union(R.var, joint[[3]]) + R.cond <- c(R.cond, list(joint[[4]])) + M <- setdiff(M, R.var) + } else { + k <- k + 1 + } + } + step <- step + 1 + cat("Step", step, "- End of nested while loop iteration\n") + browser() # Breakpoint 8: End of nested while loop iteration + } + + if (k == i + 1) { + P <- factorize(J, D, P, topo, i) + S <- P$sumset[j] + P$sumset <- P$sumset[-j] + if (length(R.var) > 0) { + P.cancel <- cancel(P, R.var, R.cond, S) + if (identical(P.cancel, P)) P <- P.orig + else { + P <- P.cancel + j <- 0 + } + } else j <- 0 + if (irl.len > 0) P$children <- c(terms, P$children) + } else P <- P.orig + + step <- step + 1 + cat("Step", step, "- After potential simplification\n") + browser() # Breakpoint 9: After potential simplification + } + + step <- step + 1 + cat("Step", step, "- Return statement\n") + return(P) +} diff --git a/tests/testthat/test_case_1.R b/tests/testthat/test_case_1.R new file mode 100644 index 0000000..2ec5a02 --- /dev/null +++ b/tests/testthat/test_case_1.R @@ -0,0 +1,206 @@ +library(testthat) +library(igraph) +library(causaleffect) + +causal_effect_files <- list.files("~/Projects/causaleffect/R", pattern = "\\.R$", full.names = TRUE) +lapply(causal_effect_files, source) + +#------------------------------------------------------------------- +# test case #1 from pp. 6-7 of causaleffect on CRAN - includes unobserved confounders. +#------------------------------------------------------------------- +# unit tests for functions: +# (1) topo, +# (2) causal.effect with simp = FALSE, +# (3) causal.effect with simp = TRUE, +# (4) parse.expression (same for causal.effect simp = TRUE vs. FALSE; no need for duplicate unit tests), +# (5) simplify (same for causal.effect simp = TRUE vs. FALSE), +# (6) join (same for causal.effect simp = TRUE vs. FALSE) +# (7) insert (same for causal.effect simp = TRUE vs. FALSE) + +# causal.effect with simp = TRUE and simp = FALSE yield the same expression, so +# there are only 7 unit tests compared to 9 unit tests for test cases #2 and #3 + +#------------------------------------------------------------------- +# defining graphs, nodes, and topological ordering using igraph package +G_1 <- graph.formula(x -+ y, z -+ x, z -+ y , x -+ z, z -+ x, simplify = FALSE) +G_1 <- set.edge.attribute(graph = G_1, name = "description", index = c(4,5), value = "U") +G_1.obs <- observed.graph(G_1) +G_1.unobs <- unobserved.graph(G_1) +topo_1 <- igraph::topological.sort(G_1.obs) +topo_1 <- igraph::get.vertex.attribute(G_1, "name")[topo_1] + +print(topo_1) + +plot(G_1) +# ^^ plotting this gives us a bidirected edge, which represents a latent confounder we can see in unobserved.graph +plot(observed.graph(G_1.obs)) +plot(unobserved.graph(G_1.unobs)) +# ^^ unobserved.graph plots observed graph, plus unobserved node(s) + + +#------------------------------------------------------------------- +# (1) testing that topo works with test case #1 + # currently PASSES + +test_that("topo works on graph with unobserved confounders G_1", { + expect_equal(topo_1, c("z", "x", "y")) +}) + +#------------------------------------------------------------------- +# (2) testing that causal.effect works with test case #1 when simp = FALSE + # expression should NOT be simplified. + # currently PASSES + +test_that("causal.effect works on graph with unobserved confounders G_1", { + expect_equal(causal.effect("y", "x", G = G_1, simp = FALSE), + "\\sum_{z}P(y|z,x)P(z)") + +}) + +#------------------------------------------------------------------- +# (3) testing that causal.effect works with test case #1 when simp = TRUE + # expression should be the same, since it cannot be simplified. + # currently PASSES + +test_that("causal.effect works on graph with unobserved confounders G_1", { + expect_equal(causal.effect("y", "x", G = G_1, simp = TRUE), + "\\sum_{z}P(y|z,x)P(z)") +}) + +#------------------------------------------------------------------- +# (4) testing that parse.expression works with test case #1 + # causal.effect with simp = TRUE and simp = FALSE (they are the same) + # currently PASSES + +# define P_1 for parse.expression(). P needs to be a probability object. +# the initial probabilistic expression should be: ∑z P(y|z,x)P(z) +# the simplified expression should look like: ∑z P(y|z,x)P(z) + +# I used the output from causal.effect("y", "x", G = G_1, expr = FALSE, simp = TRUE). +# The expr = FALSE is key to NOT printing a string! +P_1 <- probability( + sumset = c("z"), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + probability(var = "y", cond = c("z", "x")), + probability(var = "z", cond = character(0)) + ), + den = list(), + num = list(), + domain = 0, + weight = 0 +) + + +# now must define expected output from parse.expression +expected_output_1 <- probability( + sumset = "z", + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + probability(var = "y", cond = c("z", "x")), + probability(var = "z", cond = character(0)) + ), + den = list(), + num = list(), + domain = 0, + weight = 0 +) + + +# now running testthat +test_that("parse.expression works on graph with unobserved confounders G_1", { + expect_equal(parse.expression(P_1, topo_1, G_1.unobs, G_1, G_1.obs), + expected_output_1) + +}) + +#------------------------------------------------------------------- +# (5) testing that simplify works with test case #1 + # currently PASSES + +# we can use the same P_1 and expected_output_1 as we used for parse.expression, as the expression +# passes through parse.expression unchanged. + +test_that("simplify works on graph with unobserved confounders G_1", { + expect_equal(simplify(P_1, topo_1, G_1.unobs, G_1, G_1.obs), + expected_output_1) +}) + +#------------------------------------------------------------------- +# (6) testing that join works with test case #1 + # currently PASSES + +# we can obtain the following from running simplify(P_1, topo_1, G_1.unobs, G_1, +# G_1.obs) with break points (the browser() function). I added print statements +# after step #5 in simplify(): + # Step 6 - Inside nested while loop before join operation + # P$children[[k]]$var: y (this represents vari in simplify()) + # P$children[[k]]$cond: z x (this represents cond in simplify()) + # P$sumset[j]: z (this reprensents S in simplify()) + +J_1 <- character() +D_1 <- character() +vari_1 <- "y" +cond_1 <- c("z", "x") +S_1 <- "z" +M_1 <- "x" +O_1 <- c("z", "y") + +# we can obtain the following from the graph information: + # G.unobs = G_1.unobs + # G = G_1 + # G.obs = G_1.obs + # topo = topo_1 + +# we expect the output from this to be: +# [1] "y" +# [2] "z" "x" + +join_output_1 <- list( + c("y"), + c("z", "x") + ) + +test_that("join works on graph with unobserved confounders G_1", { + expect_equal(join(J_1, D_1, vari_1, cond_1, S_1, M_1, O_1, G_1.unobs, G_1, G_1.obs, topo_1), + join_output_1) +}) + +#------------------------------------------------------------------- +# (7) testing that insert works with test case #1 + # currently PASSES + +# we can obtain the following from running simplify(P_1, topo_1, G_1.unobs, G_1, +# G_1.obs) with break points (the browser() function). I added print statements +# after step #5 in simplify(): + # Step 6 - Inside nested while loop before join operation + # P$children[[k]]$cond: z x (this represents cond in simplify()) + # P$sumset[j]: z (this represents S in simplify()) + +J_1 <- character() +D_1 <- character() +M_1 <- "x" +cond_1 <- c("z", "x") +S_1 <- "z" +O_1 <- c("z", "y") + +# we can obtain the following from the graph information: +# G.unobs = G_1.unobs +# G = G_1 +# G.obs = G_1.obs +# topo = topo_1 + +# we expect the output from this (representing J, D) to be: +# [1] character(0) +# [2] character(0) + +insert_output_1 <- list(character(0), character(0)) + +test_that("insert works on graph with unobserved confounders G_1", { + expect_equal(insert(J_1, D_1, M_1, cond_1, S_1, O_1, G_1.unobs, G_1, G_1.obs, topo_1), + insert_output_1) +}) diff --git a/tests/testthat/test_case_2.R b/tests/testthat/test_case_2.R new file mode 100644 index 0000000..e37bb73 --- /dev/null +++ b/tests/testthat/test_case_2.R @@ -0,0 +1,1342 @@ +library(testthat) +library(igraph) +library(causaleffect) + +causal_effect_files <- list.files("~/Projects/causaleffect/R", pattern = "\\.R$", full.names = TRUE) +lapply(causal_effect_files, source) + +#------------------------------------------------------------------- +# test case #2 from pp. 6-7 of causaleffect on CRAN - pruning. +#------------------------------------------------------------------- +# unit tests for functions: +# (1) topo, +# (2) causal.effect with simp = FALSE, +# (3) parse.expression from causal.effect simp = FALSE, +# (4) simplify from causal.effect simp = FALSE, +# (5) causal.effect with simp = TRUE, +# (6) parse.expression from causal.effect simp = TRUE, +# (7) simplify from causal.effect simp = TRUE +# (8) DOES NOT PASS YET - join (same for causal.effect simp = TRUE vs. FALSE; no need for duplicate unit tests) +# (9) DOES NOT PASS YET - insert (same for causal.effect simp = TRUE vs. FALSE; no need for duplicate unit tests) + +#------------------------------------------------------------------- +# defining graphs, nodes, and topological ordering using igraph package + +G_2 <- graph.formula(x -+ z_4, z_4 -+ y, z_1 -+ x, z_2 -+ z_1, + z_3 -+ z_2, z_3 -+ x, z_5 -+ z_1, z_5 -+ z_4, x -+ z_2, z_2 -+ x, + z_3 -+ z_2, z_2 -+ z_3, z_2 -+ y, y -+ z_2, + z_4 -+ y, y -+ z_4, z_5 -+ z_4, z_4 -+ z_5, simplify = FALSE) +G_2 <- set.edge.attribute(graph = G_2, "description", 9:18, "U") +G_2.obs <- observed.graph(G_2) +G_2.unobs <- unobserved.graph(G_2) +topo_2 <- igraph::topo_sort(G_2.obs) +topo_2 <- igraph::get.vertex.attribute(G_2, "name")[topo_2] + +print(topo_2) + +plot(G_2) +# ^^ plotting this gives us a bidirected edge, which represents a latent confounder we can see in unobserved.graph +plot(observed.graph(G_2.obs)) +plot(unobserved.graph(G_2.unobs)) +# ^^ unobserved.graph plots observed graph, plus unobserved node(s) + + +#------------------------------------------------------------------- +# (1) testing that topo works with test case #2 + # currently PASSES + +test_that("topo works on graph with pruning G_2", { + expect_equal(topo_2, c("z_3", "z_5", "z_2", "z_1", "x", "z_4", "y")) +}) + + +#------------------------------------------------------------------- +# (2) testing that causal.effect works with test case #2 when simp = FALSE + # expression should NOT be simplified. + # currently PASSES + +test_that("causal.effect works on graph with pruning G_2", { + expect_equal(causal.effect("y", "x", G = G_2, primes = TRUE, prune = TRUE, simp = FALSE), + "\\frac{\\sum_{z_3,z_5,z_2,z_4}P(y|z_3,z_5,z_2,z_1,x,z_4)P(z_4|z_3,z_5,z_2,z_1,x)P(x|z_3,z_5,z_2,z_1)P(z_2|z_3,z_5)P(z_5|z_3)P(z_3)}{\\sum_{z_3,z_5,z_2,z_4,y^{\\prime}}P(y^{\\prime}|z_3,z_5,z_2,z_1,x,z_4)P(z_4|z_3,z_5,z_2,z_1,x)P(x|z_3,z_5,z_2,z_1)P(z_2|z_3,z_5)P(z_5|z_3)P(z_3)}") + +}) + +#------------------------------------------------------------------- +# (3) testing that parse.expression works with test case #2 + # causal.effect with simp = FALSE + # currently PASSES + +# Trying to do set.primes before parse.expression +vars <- c("z_3", "z_5", "z_2", "z_1", "x", "z_4", "y") +counter <- setNames(rep(0, length(vars)), vars) +counter["y"] <- 2 +set.primes(vars, FALSE, counter) + + +# define P_2 for parse.expression() using the output from + # causal.effect("y", "x", G = G_2, expr = FALSE, primes = TRUE, prune = TRUE, simp = TRUE). + # expr = FALSE and simp = TRUE + # the initial probabilistic expression should be: + # \\frac{\\sum_{z_3,z_5,z_2,z_4}P(y|z_3,z_5,z_2,z_1,x,z_4)P(z_4|z_3,z_5,z_2,z_1,x)P(x|z_3,z_5,z_2,z_1)P(z_2|z_3,z_5)P(z_5|z_3)P(z_3)} + # {\\sum_{z_3,z_5,z_2,z_4,y^{\\prime}}P(y^{\\prime}|z_3,z_5,z_2,z_1,x,z_4)P(z_4|z_3,z_5,z_2,z_1,x)P(x|z_3,z_5,z_2,z_1)P(z_2|z_3,z_5)P(z_5|z_3)P(z_3)} + +P_2_pe1 <- list( + var = character(0), + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = TRUE, + sum = FALSE, + children = list(), + den = list( + var = character(0), + cond = character(0), + sumset = c("z_2"), + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + list( + var = "x", + cond = c("z_1", "z_2"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_2", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + num = list( + var = character(0), + cond = character(0), + sumset = c("z_2", "z_5"), + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + list( + var = "y", + cond = c("x", "z_1", "z_2", "z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "x", + cond = c("z_1", "z_2", "z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_2", + cond = "z_5", + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_5", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + domain = 0, + weight = 0, + class = "probability", + algorithm = "pid", + query = list(y = "y", x = "x", z = NULL) +) + + +# must define expected output object to match output from parse.expression: +# Provided R structure (simplified) +expected_output_2_pe1 <- list( + var = character(0), + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = TRUE, + sum = FALSE, + children = list(), + den = list( + var = character(0), + cond = character(0), + sumset = c("z_2"), + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + list( + var = "x", + cond = c("z_1", "z_2"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_2", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + num = list( + var = character(0), + cond = character(0), + sumset = c("z_2", "z_5"), + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + list( + var = "y", + cond = c("x", "z_1", "z_2", "z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "x", + cond = c("z_1", "z_2", "z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_2", + cond = c("z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_5", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + domain = 0, + weight = 0, + class = "probability", + algorithm = "pid", + query = list(y = "y", x = "x", z = NULL) +) + + +# now running testthat +test_that("parse.expression works on graph with pruning G_2", { + expect_equal(parse.expression(P_2_pe1, topo_2, G_2.unobs, G_2, G_2.obs), + expected_output_2_pe1) + +}) + +#------------------------------------------------------------------- +# (4) testing that simplify works with test case #2 + # causal.effect with simp = FALSE + # currently PASSES + +# the simplified expression should look like: + # \\frac{\\sum_{z_2,z_5}P(y|x,z_1,z_2,z_5)P(x|z_1,z_2,z_5)P(z_2|z_5)P(z_5)}{\\sum_{z_2}P(x|z_1,z_2)P(z_2)} +P_2_s1 <- list( + var = character(0), + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = TRUE, + sum = FALSE, + children = list(), + den = list( + var = character(0), + cond = character(0), + sumset = c("z_2"), + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + list( + var = "x", + cond = c("z_1", "z_2"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_2", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + num = list( + var = character(0), + cond = character(0), + sumset = c("z_2", "z_5"), + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + list( + var = "y", + cond = c("x", "z_1", "z_2", "z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "x", + cond = c("z_1", "z_2", "z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_2", + cond = c("z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_5", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + domain = 0, + weight = 0, + class = "probability", + algorithm = "pid", + query = list(y = "y", x = "x", z = NULL) +) + + +# now must define the expected output object for simplify() +expected_output_2_s1 <- list( + var = character(0), + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = TRUE, + sum = FALSE, + children = list(), + den = list( + var = character(0), + cond = character(0), + sumset = c("z_2"), + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + list( + var = "x", + cond = c("z_1", "z_2"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_2", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + num = list( + var = character(0), + cond = character(0), + sumset = c("z_2", "z_5"), + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + list( + var = "y", + cond = c("x", "z_1", "z_2", "z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "x", + cond = c("z_1", "z_2", "z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_2", + cond = "z_5", + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_5", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + domain = 0, + weight = 0, + class = "probability", + algorithm = "pid", + query = list(y = "y", x = "x", z = NULL) +) + + +# now running testthat +test_that("simplify works on graph with pruning G_2", { + expect_equal(simplify(P_2_s1, topo_2, G_2.unobs, G_2, G_2.obs), + expected_output_2_s1) +}) + +#------------------------------------------------------------------- +# (5) testing that causal.effect works with test case #2 when simp = TRUE + # expression should be simplified. + # currently PASSES + +test_that("causal.effect works on graph with pruning G_2", { + expect_equal(causal.effect("y", "x", G = G_2, primes = TRUE, prune = TRUE, simp = TRUE), + "\\frac{\\sum_{z_2,z_5}P(y|x,z_1,z_2,z_5)P(x|z_1,z_2,z_5)P(z_2|z_5)P(z_5)}{\\sum_{z_2}P(x|z_1,z_2)P(z_2)}") +}) + +#------------------------------------------------------------------- +# (6) testing that parse.expression works with test case #2 + # causal.effect with simp = TRUE + # currently PASSES + + +# Trying to do set.primes before parse.expression +vars <- c("z_3", "z_5", "z_2", "z_1", "x", "z_4", "y") +counter <- setNames(rep(0, length(vars)), vars) +counter["y"] <- 2 +set.primes(vars, FALSE, counter) + + +# define P_2 for parse.expression() using the output from + # causal.effect("y", "x", G = G_2, expr = FALSE, primes = TRUE, prune = TRUE, simp = TRUE). + # expr = FALSE and simp = TRUE + # the initial probabilistic expression should be: + # \\frac{\\sum_{z_2,z_5}P(y|x,z_1,z_2,z_5)P(x|z_1,z_2,z_5)P(z_2|z_5)P(z_5)}{\\sum_{z_2}P(x|z_1,z_2)P(z_2)} + +P_2_pe2 <- list( + var = character(0), + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = TRUE, + sum = FALSE, + children = list(), + den = list( + var = character(0), + cond = character(0), + sumset = c("z_2"), + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + list( + var = "x", + cond = c("z_1", "z_2"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_2", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + num = list( + var = character(0), + cond = character(0), + sumset = c("z_2", "z_5"), + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + list( + var = "y", + cond = c("x", "z_1", "z_2", "z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "x", + cond = c("z_1", "z_2", "z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_2", + cond = "z_5", + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_5", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + domain = 0, + weight = 0, + class = "probability", + algorithm = "pid", + query = list(y = "y", x = "x", z = NULL) +) + + +# must define expected output object to match output from parse.expression: +expected_output_2_pe2 <- list( + var = character(0), + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = TRUE, + sum = FALSE, + children = list(), + den = list( + var = character(0), + cond = character(0), + sumset = c("z_2"), + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + list( + var = "x", + cond = c("z_1", "z_2"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_2", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + num = list( + var = character(0), + cond = character(0), + sumset = c("z_2", "z_5"), + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + list( + var = "y", + cond = c("x", "z_1", "z_2", "z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "x", + cond = c("z_1", "z_2", "z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_2", + cond = c("z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_5", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + domain = 0, + weight = 0, + class = "probability", + algorithm = "pid", + query = list(y = "y", x = "x", z = NULL) +) + + +# now running testthat +test_that("parse.expression works on graph with pruning G_2", { + expect_equal(parse.expression(P_2_pe2, topo_2, G_2.unobs, G_2, G_2.obs), + expected_output_2_pe2) + +}) + + +#------------------------------------------------------------------- +# (7) testing that simplify works with test case #2 + # causal.effect with simp = TRUE + # currently PASSES + +# the simplified expression should look like: + #"\frac{\sum_{z_2,z_5}P(y|x,z_1,z_2,z_5)P(x|z_1,z_2,z_5)P(z_2|z_5)P(z_5)}{\sum_{z_2}P(x|z_1,z_2)P(z_2)}" +P_2_s2 <- list( + var = character(0), + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = TRUE, + sum = FALSE, + children = list(), + den = list( + var = character(0), + cond = character(0), + sumset = c("z_2"), + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + list( + var = "x", + cond = c("z_1", "z_2"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_2", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + num = list( + var = character(0), + cond = character(0), + sumset = c("z_2", "z_5"), + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + list( + var = "y", + cond = c("x", "z_1", "z_2", "z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "x", + cond = c("z_1", "z_2", "z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_2", + cond = c("z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_5", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + domain = 0, + weight = 0, + class = "probability", + algorithm = "pid", + query = list(y = "y", x = "x", z = NULL) +) + + +# now must define the expected output object for simplify() +expected_output_2_s2 <- list( + var = character(0), + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = TRUE, + sum = FALSE, + children = list(), + den = list( + var = character(0), + cond = character(0), + sumset = c("z_2"), + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + list( + var = "x", + cond = c("z_1", "z_2"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_2", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + num = list( + var = character(0), + cond = character(0), + sumset = c("z_2", "z_5"), + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + list( + var = "y", + cond = c("x", "z_1", "z_2", "z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "x", + cond = c("z_1", "z_2", "z_5"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_2", + cond = "z_5", + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + list( + var = "z_5", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0, + class = "probability" + ), + domain = 0, + weight = 0, + class = "probability", + algorithm = "pid", + query = list(y = "y", x = "x", z = NULL) +) + + +# now running testthat +test_that("simplify works on graph with pruning G_2", { + expect_equal(simplify(P_2_s2, topo_2, G_2.unobs, G_2, G_2.obs), + expected_output_2_s2) +}) + +#------------------------------------------------------------------- +# (8) testing that join works with test case #2 + # produces identical results with simp = TRUE vs. simp = FALSE + # (no need for duplicate unit tests) + +# we can obtain the following from running simplify(P_2_s1 (or s2), topo_2, G_2.unobs, G_2, G_2.obs) with break points +# (the browser() function). I added print statements +# after step #5 in simplify(): + # Step 6 - Inside nested while loop before join operation + # P$children[[k]]$var: y (this represents vari in simplify()) + # P$children[[k]]$cond: w x (this represents cond in simplify()) + # P$sumset[j]: w (this reprensents S in simplify()) + +simplify(P_2_s2, topo_2, G_2.unobs, G_2, G_2.obs) + +J_2 <- character(0) +D_2 <- character(0) +vari_2 <- "y" +cond_2 <- c("w", "x") +S_2 <- "w" +M_2 <- c("x", "z") +O_2 <- c("w", "y") + +# we can obtain the following from the graph information: + # G.unobs = G_2.unobs + # G = G_2 + # G.obs = G_2.obs + # topo = topo_2 + +# we expect the output from this to be: + + +join_output_2_s2 <- + +test_that("join works on graph with pruning G_2 with simp = TRUE", { + expect_equal(join(J_2_s2, D_2_s2, vari_2_s2, cond_2_s2, S_2_s2, M_2_s2, O_2_s2, G_2.unobs, G_2, G_2.obs, topo_2), + join_output_2_s2) +}) + +#------------------------------------------------------------------- +# (9) testing that insert works with test case #2 + # produces identical results with simp = TRUE vs. simp = FALSE + # (no need for duplicate unit tests) + +# we can obtain the following from running simplify(P_2_s1 (or s2), topo_3, G_3.unobs, G_3, G_3.obs) with break points +# (the browser() function). I added print statements +# after step #5 in simplify(): + # Step 6 - Inside nested while loop before join operation + # P$children[[k]]$var: y (this represents vari in simplify()) + # P$children[[k]]$cond: w x (this represents cond in simplify()) + # P$sumset[j]: w (this reprensents S in simplify()) + +J_2 <- character(0) +D_2 <- character(0) +M_2 <- c("x", "z") +cond_2 <- c("w", "x") +S_2 <- "w" +O_2 <- c("w", "y") + +# we can obtain the following from the graph information: + # G.unobs = G_2.unobs + # G = G_2 + # G.obs = G_2.obs + # topo = topo_2 + +# we expect the output from this (representing J, D) to be: + +insert_output_3 <- list(character(0), character(0)) + +test_that("insert works on simple observed graph G_3 with simp = FALSE", { + expect_equal(insert(J_3, D_3, M_3, cond_3, S_3, O_3, G_3.unobs, G_3, G_3.obs, topo_3), + insert_output_3) +}) diff --git a/tests/testthat/test_case_3.R b/tests/testthat/test_case_3.R new file mode 100644 index 0000000..1e45dba --- /dev/null +++ b/tests/testthat/test_case_3.R @@ -0,0 +1,508 @@ +library(testthat) +library(igraph) +library(causaleffect) + +causal_effect_files <- list.files("~/Projects/causaleffect/R", pattern = "\\.R$", full.names = TRUE) +lapply(causal_effect_files, source) + +#------------------------------------------------------------------- +# test case #3 from pp. 6-7 of causaleffect on CRAN - only observed variables +#------------------------------------------------------------------- +# unit tests for functions: +# (1) topo, +# (2) causal.effect with simp = FALSE, +# (3) parse.expression from causal.effect simp = FALSE, +# (4) simplify from causal.effect simp = FALSE, +# (5) causal.effect with simp = TRUE, +# (6) parse.expression from causal.effect simp = TRUE, +# (7) simplify from causal.effect simp = TRUE, +# (8) join (same for causal.effect simp = TRUE vs. FALSE; no need for duplicate unit tests) +# (9) insert (same for causal.effect simp = TRUE vs. FALSE; no need for duplicate unit tests) + +#------------------------------------------------------------------- +# defining graphs, nodes, and topological ordering using igraph package + +G_3 <- graph.formula(x -+ y, w -+ x, w -+ z, z -+ y) +G_3.obs <- observed.graph(G_3) +G_3.unobs <- unobserved.graph(G_3) +topo_3 <- igraph::topological.sort(G_3.obs) +topo_3 <- igraph::get.vertex.attribute(G_3, "name")[topo_3] + +plot(G_3) +plot(G_3.obs) +plot(G_3.unobs) + +#------------------------------------------------------------------- +# (1) testing that topo works with test case #3. + # currently PASSES + +test_that("topo works on simple observed graph G_3", { + expect_equal(topo_3, c("w", "x", "z", "y")) +}) + +#------------------------------------------------------------------- +# (2) testing that causal.effect works with test case #3 when simp = FALSE + # expression should NOT be simplified. + # currently PASSES + +test_that("causal.effect works on simple observed graph G_3", { + expect_equal(causal.effect("y", "x", G = G_3, simp = FALSE), + "\\sum_{w,z}P(y|w,x,z)P(z|w)P(w)") + +}) + +#------------------------------------------------------------------- +# (3) testing that parse.expression works with test case #3 + # causal.effect simp = FALSE + # currently PASSES + +# define P_3_pe1 for parse.expression() using the output from causal.effect with + # expr = FALSE and simp = FALSE + # P needs to be a probability object. + # the initial probabilistic expression should be: ∑w,z P(y∣w,x,z)P(z∣w)P(w). + # the simplified expression should look like: ∑w P(y∣w,x)P(w) +P_3_pe1 <- probability( + sumset = c("w", "z"), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + probability(var = "y", cond = c("w", "x", "z")), + probability(var = "z", cond = c("w")), + probability(var = "w", cond = character(0)) + ), + den = list(), + num = list(), + domain = 0, + weight = 0 +) + +# must define expected output object to match output from parse.expression: ∑w P(y|w,x)P(w) +expected_output_3_pe1 <- probability( + sumset = "w", + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + probability(var = "y", cond = c("w", "x")), + probability(var = "w", cond = character(0)) + ), + den = list(), + num = list(), + domain = 0, + weight = 0 +) + +# now running testthat +test_that("parse.expression works on simple observed graph G_3", { + expect_equal(parse.expression(P_3_pe1, topo_3, G_3.unobs, G_3, G_3.obs), + expected_output_3_pe1) + +}) + +#------------------------------------------------------------------- +# (4) testing that simplify works with test case #3 + # causal.effect with simp = FALSE + # currently PASSES + +# define P_3_s1 for simplify() using the output of parse.expression. + # P needs to be a list object. + # the simplified expression should look like: ∑w P(y∣w,x)P(w) +P_3_s1 <- list( + var = character(0), + cond = character(0), + sumset = "w", + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + structure( + list( + var = "y", + cond = c("w", "x"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0 + ), + class = "probability" + ), + structure( + list( + var = "w", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0 + ), + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0 +) +attr(P_3_s1, "class") <- "probability" + +# must define expected output object to match output from simplify: ∑w P(y|w,x)P(w) +expected_output_3_s1 <- probability( + sumset = "w", + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + probability(var = "y", cond = c("w", "x")), + probability(var = "w", cond = character(0)) + ), + den = list(), + num = list(), + domain = 0, + weight = 0 +) +attr(expected_output_3_s1, "class") <- "probability" + +#now running testthat +test_that("simplify works on simple observed graph G_3", { + expect_equal(simplify(P_3_s1, topo_3, G_3.unobs, G_3, G_3.obs), + expected_output_3_s1) +}) + +#------------------------------------------------------------------- +# (5) testing that causal.effect works with test case #3 when simp = TRUE + # expression should be simplified. + # currently PASSES + +test_that("causal.effect works on simple observed graph G_3", { + expect_equal(causal.effect("y", "x", G = G_3, simp = TRUE), + "\\sum_{w}P(y|w,x)P(w)") +}) + +#------------------------------------------------------------------- +# (6) testing that parse.expression works with test case #3 + # causal.effect simp = TRUE + # currently PASSES + +# define P_3_pe2 for parse.expression() using the output from causal.effect with + # expr = FALSE and simp = TRUE + # P needs to be a probability object. + # the initial probabilistic expression should be: ∑w P(y|w,x)P(w) + # the simplified expression should look like: P(y∣w,x)P(w) +P_3_pe2 <- list( + var = character(0), + cond = character(0), + sumset = "w", + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + structure( + list( + var = "y", + cond = c("w", "x"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0 + ), + class = "probability" + ), + structure( + list( + var = "w", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0 + ), + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0 +) + +# must define expected output object to match output from parse.expression: P(y∣w,x)P(w) +expected_output_3_pe2 <- list( + var = character(0), + cond = character(0), + sumset = "w", + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + structure( + list( + var = "y", + cond = c("w", "x"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0 + ), + class = "probability" + ), + structure( + list( + var = "w", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0 + ), + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0 +) + +# now running testthat +test_that("parse.expression works on simple observed graph G_3", { + expect_equal(parse.expression(P_3_pe2, topo_3, G_3.unobs, G_3, G_3.obs), + expected_output_3_pe2) + +}) + +#------------------------------------------------------------------- +# (7) testing that simplify works with test case #3 + # causal.effect with simp = TRUE + # currently PASSES + +# define P_3_s2 for simplify() using the output of parse.expression. + # P needs to be a list object. + # the simplified expression should look like: P(y∣w,x)P(w) +P_3_s2 <- list( + var = character(0), + cond = character(0), + sumset = "w", + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + structure( + list( + var = "y", + cond = c("w", "x"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0 + ), + class = "probability" + ), + structure( + list( + var = "w", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0 + ), + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0 +) +attr(P_3_s2, "class") <- "probability" + +# must define expected output object to match output from simplify: P(y|w,x)P(w) +expected_output_3_s2 <- list( + var = character(0), + cond = character(0), + sumset = "w", + do = character(0), + product = TRUE, + fraction = FALSE, + sum = FALSE, + children = list( + structure( + list( + var = "y", + cond = c("w", "x"), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0 + ), + class = "probability" + ), + structure( + list( + var = "w", + cond = character(0), + sumset = character(0), + do = character(0), + product = FALSE, + fraction = FALSE, + sum = FALSE, + children = list(), + den = list(), + num = list(), + domain = 0, + weight = 0 + ), + class = "probability" + ) + ), + den = list(), + num = list(), + domain = 0, + weight = 0 +) +attr(expected_output_3_s2, "class") <- "probability" + +# now running testthat +test_that("simplify works on simple observed graph G_3", { + expect_equal(simplify(P_3_s2, topo_3, G_3.unobs, G_3, G_3.obs), + expected_output_3_s2) +}) + +#------------------------------------------------------------------- +# (8) testing that join works with test case #3 + # produces identical results with simp = TRUE vs. simp = FALSE + # (no need for duplicate unit tests) + # currently PASSES + +# we can obtain the following from running simplify(P_3_s1 (or s2), topo_3, G_3.unobs, G_3, G_3.obs) with break points +# (the browser() function). I added print statements +# after step #5 in simplify(): + # Step 6 - Inside nested while loop before join operation + # P$children[[k]]$var: y (this represents vari in simplify()) + # P$children[[k]]$cond: w x (this represents cond in simplify()) + # P$sumset[j]: w (this reprensents S in simplify()) + +J_3 <- character(0) +D_3 <- character(0) +vari_3 <- "y" +cond_3 <- c("w", "x") +S_3 <- "w" +M_3 <- c("x", "z") +O_3 <- c("w", "y") + +# we can obtain the following from the graph information: + # G.unobs = G_3.unobs + # G = G_3 + # G.obs = G_3.obs + # topo = topo_3 + +# we expect the output from this to be: + # [1] "y" + # [2] "w" "x" + +join_output_3 <- list( + c("y"), + c("w", "x") +) + +test_that("join works on simple observed graph G_3 with simp = FALSE", { + expect_equal(join(J_3, D_3, vari_3, cond_3, S_3, M_3, O_3, G_3.unobs, G_3, G_3.obs, topo_3), + join_output_3) +}) + +#------------------------------------------------------------------- +# (9) testing that insert works with test case #3 + # produces identical results with simp = TRUE vs. simp = FALSE + # (no need for duplicate unit tests) + # currently PASSES + +# we can obtain the following from running simplify(P_3_s1 (or s2), topo_3, G_3.unobs, G_3, G_3.obs) with break points +# (the browser() function). I added print statements +# after step #5 in simplify(): + # Step 6 - Inside nested while loop before join operation + # P$children[[k]]$var: y (this represents vari in simplify()) + # P$children[[k]]$cond: w x (this represents cond in simplify()) + # P$sumset[j]: w (this reprensents S in simplify()) + +J_3 <- character(0) +D_3 <- character(0) +M_3 <- c("x", "z") +cond_3 <- c("w", "x") +S_3 <- "w" +O_3 <- c("w", "y") + +# we can obtain the following from the graph information: + # G.unobs = G_3.unobs + # G = G_3 + # G.obs = G_3.obs + # topo = topo_3 + +# we expect the output from this (representing J, D) to be: + # [1] character(0) + # [2] character(0) + +insert_output_3 <- list(character(0), character(0)) + +test_that("insert works on simple observed graph G_3 with simp = FALSE", { + expect_equal(insert(J_3, D_3, M_3, cond_3, S_3, O_3, G_3.unobs, G_3, G_3.obs, topo_3), + insert_output_3) +})