Skip to content

Commit

Permalink
Merge branch 'main' into embedding-op-fresh
Browse files Browse the repository at this point in the history
  • Loading branch information
rnyak committed Jun 6, 2023
2 parents f557980 + 69fbd57 commit a1f169d
Show file tree
Hide file tree
Showing 6 changed files with 31 additions and 10 deletions.
23 changes: 22 additions & 1 deletion examples/03-Running-on-multiple-GPUs-or-on-CPU.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "77464844",
"metadata": {},
Expand All @@ -53,6 +54,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "1c5598ae",
"metadata": {},
Expand Down Expand Up @@ -92,6 +94,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "63ac0cf2",
"metadata": {},
Expand All @@ -100,6 +103,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "4def0005",
"metadata": {},
Expand All @@ -123,6 +127,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "d7c3f9ea",
"metadata": {},
Expand All @@ -148,6 +153,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "728c3009",
"metadata": {},
Expand Down Expand Up @@ -176,11 +182,15 @@
"\n",
"# Deploy a Single-Machine Multi-GPU Cluster\n",
"protocol = \"tcp\" # \"tcp\" or \"ucx\"\n",
"\n",
"if numba.cuda.is_available():\n",
" NUM_GPUS = list(range(len(numba.cuda.gpus)))\n",
"else:\n",
" NUM_GPUS = []\n",
"visible_devices = \",\".join([str(n) for n in NUM_GPUS]) # Delect devices to place workers\n",
"try:\n",
" visible_devices = os.environ[\"CUDA_VISIBLE_DEVICES\"]\n",
"except KeyError:\n",
" visible_devices = \",\".join([str(n) for n in NUM_GPUS]) # Delect devices to place workers\n",
"device_limit_frac = 0.7 # Spill GPU-Worker memory to host at this limit.\n",
"device_pool_frac = 0.8\n",
"part_mem_frac = 0.15\n",
Expand All @@ -206,6 +216,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "d14dc098",
"metadata": {},
Expand Down Expand Up @@ -242,6 +253,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "0576affe",
"metadata": {},
Expand Down Expand Up @@ -589,6 +601,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "94ef0024",
"metadata": {},
Expand All @@ -599,6 +612,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "768fc24e",
"metadata": {},
Expand All @@ -622,6 +636,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "61785127",
"metadata": {},
Expand Down Expand Up @@ -678,6 +693,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "01ea40bb",
"metadata": {},
Expand All @@ -686,6 +702,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "987f3274",
"metadata": {},
Expand Down Expand Up @@ -714,6 +731,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "b06c962e",
"metadata": {},
Expand Down Expand Up @@ -745,6 +763,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "d28ae761",
"metadata": {},
Expand All @@ -755,6 +774,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "4e07864d",
"metadata": {},
Expand All @@ -763,6 +783,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "8f971a22",
"metadata": {},
Expand Down
3 changes: 1 addition & 2 deletions nvtabular/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@
from merlin.core import dispatch, utils # noqa
from merlin.dag import ColumnSelector
from merlin.schema import ColumnSchema, Schema
from nvtabular import workflow # noqa
from nvtabular import _version
from nvtabular import _version, ops, workflow # noqa

# suppress some warnings with cudf warning about column ordering with dlpack
# and numba warning about deprecated environment variables
Expand Down
2 changes: 1 addition & 1 deletion nvtabular/ops/categorify.py
Original file line number Diff line number Diff line change
Expand Up @@ -1025,7 +1025,7 @@ def _top_level_groupby(df, options: FitOptions = None, spill=True):
del df_gb

# Extract null groups into gb_null
isnull = gb.isnull().any(1)
isnull = gb.isnull().any(axis=1)
gb_null = gb[~isnull]
gb = gb[isnull]
if not len(gb_null):
Expand Down
7 changes: 4 additions & 3 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,10 @@ def assert_eq(a, b, *args, **kwargs):

import pytest
from asvdb import ASVDb, BenchmarkInfo, utils
from dask.distributed import Client, LocalCluster
from numba import cuda

import nvtabular
from merlin.core.utils import Distributed
from merlin.dag.node import iter_nodes

REPO_ROOT = Path(__file__).parent.parent
Expand Down Expand Up @@ -97,8 +97,9 @@ def assert_eq(a, b, *args, **kwargs):

@pytest.fixture(scope="module")
def client():
cluster = LocalCluster(n_workers=2)
client = Client(cluster)
distributed = Distributed(n_workers=2)
cluster = distributed.cluster
client = distributed.client
yield client
client.close()
cluster.close()
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/framework_utils/test_tf_layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,4 +318,4 @@ def test_multihot_empty_rows():
)

y_hat = model(x).numpy()
np.testing.assert_allclose(y_hat, multi_hot_embedding_rows, rtol=1e-06)
np.testing.assert_allclose(y_hat, multi_hot_embedding_rows, rtol=1e-05)
4 changes: 2 additions & 2 deletions tests/unit/test_tf4rec.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
NUM_ROWS = 10000


def test_tf4rec():
def test_tf4rec(tmpdir):
inputs = {
"user_session": np.random.randint(1, 10000, NUM_ROWS),
"product_id": np.random.randint(1, 51996, NUM_ROWS),
Expand All @@ -29,7 +29,7 @@ def test_tf4rec():

cat_feats = (
["user_session", "product_id", "category_id"]
>> nvt.ops.Categorify()
>> nvt.ops.Categorify(out_path=str(tmpdir))
>> nvt.ops.LambdaOp(lambda col: col + 1)
)

Expand Down

0 comments on commit a1f169d

Please sign in to comment.