change to subworkflows in workflow instead of subgraph4

NVIDIA-Merlin · Jun 9, 2023 · dcc59ba · dcc59ba
1 parent 96cd7c0
commit dcc59ba
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 28 deletions.
diff --git a/nvtabular/workflow/workflow.py b/nvtabular/workflow/workflow.py
@@ -143,8 +143,8 @@ def fit_schema(self, input_schema: Schema):
         return self
 
     @property
-    def subgraphs(self):
-        return self.graph.subgraphs.keys()
+    def subworkflows(self):
+        return list(self.graph.subgraphs.keys())
 
     @property
     def input_dtypes(self):
@@ -169,7 +169,7 @@ def output_node(self):
     def _input_columns(self):
         return self.graph._input_columns()
 
-    def get_subgraph(self, subgraph_name):
+    def get_subworkflow(self, subgraph_name):
         subgraph = self.graph.subgraph(subgraph_name)
         return Workflow(subgraph.output_node)
 

diff --git a/tests/unit/workflow/test_workflow_subgraphs.py b/tests/unit/workflow/test_workflow_subgraphs.py
@@ -14,18 +14,16 @@
 # limitations under the License.
 #
 
-import math
 import os
 
 import numpy as np
 import pytest
 from pandas.api.types import is_integer_dtype
 
-from merlin.core.dispatch import HAS_GPU
 from merlin.core.utils import set_dask_client
 from merlin.dag.ops.subgraph import Subgraph
 from nvtabular import Workflow, ops
-from tests.conftest import assert_eq, get_cats
+from tests.conftest import assert_eq
 
 
 @pytest.mark.parametrize("gpu_memory_frac", [0.01, 0.1])
@@ -74,25 +72,6 @@ def get_norms(tar):
     concat_ops = "_FillMissing_1_LogOp_1"
     if replace:
         concat_ops = ""
-    assert math.isclose(get_norms(df.x).mean(), norms.means["x" + concat_ops], rel_tol=1e-1)
-    assert math.isclose(get_norms(df.y).mean(), norms.means["y" + concat_ops], rel_tol=1e-1)
-
-    assert math.isclose(get_norms(df.x).std(), norms.stds["x" + concat_ops], rel_tol=1e-1)
-    assert math.isclose(get_norms(df.y).std(), norms.stds["y" + concat_ops], rel_tol=1e-1)
-    # Check that categories match
-    if engine == "parquet":
-        cats_expected0 = df["name-cat"].unique().values_host if HAS_GPU else df["name-cat"].unique()
-        cats0 = get_cats(workflow, "name-cat")
-        # adding the None entry as a string because of move from gpu
-        assert all(cat in sorted(cats_expected0.tolist()) for cat in cats0.tolist())
-        assert len(cats0.tolist()) == len(cats_expected0.tolist())
-    cats_expected1 = (
-        df["name-string"].unique().values_host if HAS_GPU else df["name-string"].unique()
-    )
-    cats1 = get_cats(workflow, "name-string")
-    # adding the None entry as a string because of move from gpu
-    assert all(cat in sorted(cats_expected1.tolist()) for cat in cats1.tolist())
-    assert len(cats1.tolist()) == len(cats_expected1.tolist())
 
     # Write to new "shuffled" and "processed" dataset
     df_pp = workflow.transform(dataset).to_ddf().compute()
@@ -101,8 +80,8 @@ def get_norms(tar):
         assert is_integer_dtype(df_pp["name-cat"].dtype)
     assert is_integer_dtype(df_pp["name-string"].dtype)
 
-    subgraph_cat = workflow.get_subgraph("cat_graph")
-    subgraph_cont = workflow.get_subgraph("cont_graph")
+    subgraph_cat = workflow.get_subworkflow("cat_graph")
+    subgraph_cont = workflow.get_subworkflow("cont_graph")
     assert isinstance(subgraph_cat, Workflow)
     assert isinstance(subgraph_cont, Workflow)
     # will not be the same nodes of saved out and loaded back
@@ -111,7 +90,7 @@ def get_norms(tar):
         assert subgraph_cont.output_node == cont_features
     # check failure path works as expected
     with pytest.raises(ValueError) as exc:
-        workflow.get_subgraph("not_exist")
+        workflow.get_subworkflow("not_exist")
     assert "No subgraph named" in str(exc.value)
 
     sub_cat_df = subgraph_cat.transform(dataset).to_ddf().compute()