2025-09-20 nightly release (2a68dde)

pytorchbot · pytorchbot · commit ba72d5737cb2 · 2025-09-20T11:35:07.000Z
diff --git a/torchrec/ir/tests/test_serializer.py b/torchrec/ir/tests/test_serializer.py
@@ -297,7 +297,7 @@ def test_serialize_deserialize_ebc(self) -> None:
         self.assertEqual(len(deserialized_out), len(eager_out))
         for deserialized, orginal in zip(deserialized_out, eager_out):
             self.assertEqual(deserialized.shape, orginal.shape)
-            self.assertTrue(torch.allclose(deserialized, orginal))
+            torch.testing.assert_close(deserialized, orginal)
 
     def test_serialize_deserialize_ebc_with_vbe_kjt(self) -> None:
         model = self.generate_model_for_vbe_kjt()
@@ -374,14 +374,14 @@ def test_serialize_deserialize_ebc_with_vbe_kjt(self) -> None:
         self.assertEqual(len(deserialized_out), len(eager_out))
         for deserialized, orginal in zip(deserialized_out, eager_out):
             self.assertEqual(deserialized.shape, orginal.shape)
-            self.assertTrue(torch.allclose(deserialized, orginal))
+            torch.testing.assert_close(deserialized, orginal)
 
         deserialized_out_2 = deserialized_model(kjt_2)
 
         self.assertEqual(len(deserialized_out_2), len(eager_out_2))
         for deserialized, orginal in zip(deserialized_out_2, eager_out_2):
             self.assertEqual(deserialized.shape, orginal.shape)
-            self.assertTrue(torch.allclose(deserialized, orginal))
+            torch.testing.assert_close(deserialized, orginal)
 
     def test_dynamic_shape_ebc_disabled_in_oss_compatibility(self) -> None:
         model = self.generate_model()
@@ -428,7 +428,61 @@ def test_dynamic_shape_ebc_disabled_in_oss_compatibility(self) -> None:
 
         for i, tensor in enumerate(deserialized_out):
             self.assertEqual(eager_out[i].shape, tensor.shape)
-            assert torch.allclose(eager_out[i], tensor)
+            torch.testing.assert_close(eager_out[i], tensor)
+
+    def test_variable_batch_size_ebc_disabled_in_oss_compatibility(self) -> None:
+        model = self.generate_model()
+        feature1 = KeyedJaggedTensor.from_offsets_sync(
+            keys=["f1", "f2", "f3"],
+            values=torch.tensor([0, 1, 2, 3, 2, 3]),
+            offsets=torch.tensor([0, 2, 2, 3, 4, 5, 6]),  # batch size = 2
+        )
+
+        feature2 = KeyedJaggedTensor.from_offsets_sync(
+            keys=["f1", "f2", "f3"],
+            values=torch.tensor([0, 1, 2, 3, 2, 3, 4, 5, 6]),
+            offsets=torch.tensor([0, 2, 2, 3, 4, 5, 7, 8, 8, 9]),  # batch size = 3
+        )
+        eager_out1 = model(feature1)
+        eager_out2 = model(feature2)
+        # feature1.lengths()
+        # feature2.lengths()
+
+        # Serialize EBC with sample input (feature1, batch size = 2)
+        collection = mark_dynamic_kjt(feature1, variable_batch=True)
+        model, sparse_fqns = encapsulate_ir_modules(model, JsonSerializer)
+        ep = torch.export.export(
+            model,
+            (feature1,),
+            {},
+            dynamic_shapes=collection.dynamic_shapes(model, (feature1,)),
+            strict=False,
+            # Allows KJT to not be unflattened and run a forward on unflattened EP
+            preserve_module_call_signature=tuple(sparse_fqns),
+        )
+
+        # Run forward on ExportedProgram
+        ep_output1 = ep.module()(feature1)
+        ep_output2 = ep.module()(feature2)
+
+        # other asserts
+        for eager_out, ep_out in [(eager_out1, ep_output1), (eager_out2, ep_output2)]:
+            for a, b in zip(eager_out, ep_out):
+                self.assertEqual(a.shape, b.shape)
+
+        # Deserialize EBC
+        unflatten_ep = torch.export.unflatten(ep)
+        deserialized_model = decapsulate_ir_modules(unflatten_ep, JsonSerializer)
+        deserialized_model.load_state_dict(model.state_dict())
+
+        # Run forward on deserialized model
+        deserialized_out1 = deserialized_model(feature1)
+        deserialized_out2 = deserialized_model(feature2)
+
+        for e, d in ([eager_out1, deserialized_out1], [eager_out2, deserialized_out2]):
+            for a, b in zip(e, d):
+                self.assertEqual(a.shape, b.shape)
+                torch.testing.assert_close(a, b)
 
     def test_ir_emb_lookup_device(self) -> None:
         model = self.generate_model()
@@ -573,7 +627,7 @@ def forward(self, features: KeyedJaggedTensor) -> List[torch.Tensor]:
         deserialized_out = deserialized_model(id_list_features)
         self.assertEqual(len(deserialized_out), len(eager_out))
         for x, y in zip(deserialized_out, eager_out):
-            self.assertTrue(torch.allclose(x, y))
+            torch.testing.assert_close(x, y)
 
     def test_regroup_as_dict_module(self) -> None:
         class Model(nn.Module):
diff --git a/torchrec/ir/utils.py b/torchrec/ir/utils.py
@@ -195,6 +195,7 @@ def mark_dynamic_kjt(
     kjt: KeyedJaggedTensor,
     shapes_collection: Optional[ShapesCollection] = None,
     variable_length: bool = False,
+    variable_batch: bool = False,
     vlen: Optional[DIM] = None,
     llen: Optional[DIM] = None,
 ) -> ShapesCollection:
@@ -211,10 +212,18 @@ def mark_dynamic_kjt(
     it will use the default name "vlen" for values, and "llen", "lofs" if variable length.
     A passed-in dynamic dim is useful if the dynamic dim is already used in other places.
 
+    variable batch size means the batch size is dynamic during different training iterations
+    the batch size for all features are the same within one iteration/batch. so it still follows
+    the correlation: len(lengths) == len(keys) * batch_size
+
+    in the variable length scenario, the batch size could be different for each feature within
+    the iteration/batch, so it doesn't follow the correlation: len(lengths) == len(keys) * batch_size
+
     Args:
         kjt (KeyedJaggedTensor): The KJT to make dynamic.
         shapes_collection (Optional[ShapesCollection]): The collection to update.
-        variable_length (bool): Whether the KJT is variable length.
+        variable_length (bool): Whether the KJT is variable length len(lengths) != len(keys) * batch_size
+        variable_batch (bool): Whether the KJT is variable batch size, len(lengths) == len(keys) * batch_size, it only works when variable_length is False.
         vlen (Optional[DIM]): The dynamic length for the values. If it's None, it will use the default name "vlen".
         llen (Optional[DIM]): The dynamic length for the lengths, it's only used when variable_length is true. If it's None, it will use the default name "llen".
         batch_size (Optional[DIM]): The dynamic length for the batch_size, it's only used when variable_length and mark_batch_size are both true.
@@ -245,6 +254,21 @@ def _has_dim(t: Optional[torch.Tensor]) -> bool:
             shapes_collection[kjt._lengths] = (llen,)
         if _has_dim(kjt._offsets):
             shapes_collection[kjt._offsets] = (llen + 1,)
+    elif variable_batch:
+        # variable batch size means the batch size is dynamic during different training iterations
+        # the batch size for all features are the same within one iteration/batch
+        #
+        # this is fundamentally different from variable length, where the batch size is different
+        # for each feature within one iteration/batch
+        #
+        # it's the user's responsibility to make sure that in a variable batch scenario,
+        # the argument variable_batch is only used when setting variable_length to False,
+        # otherwise it will lead to unexpected behavior with the dynamic shapes in torch.export
+        batch_size = _get_dim("batch_size")
+        if _has_dim(kjt._lengths):
+            shapes_collection[kjt._lengths] = (batch_size * len(kjt.keys()),)
+        if _has_dim(kjt._offsets):
+            shapes_collection[kjt._offsets] = (batch_size * len(kjt.keys()) + 1,)
     return shapes_collection
 
 
diff --git a/torchrec/modules/tests/test_itep_embedding_modules.py b/torchrec/modules/tests/test_itep_embedding_modules.py
@@ -190,6 +190,11 @@ def generate_expected_address_lookup_buffer(
 
         return torch.tensor(address_lookup, dtype=torch.int64)
 
+    # pyre-ignore[56]: Pyre was not able to infer the type of argument
+    @unittest.skipIf(
+        torch.cuda.device_count() <= 1,
+        "Not enough GPUs, this test requires at least two GPUs",
+    )
     def test_init_itep_module(self) -> None:
         itep_module = GenericITEPModule(
             table_name_to_unpruned_hash_sizes=self._table_name_to_unpruned_hash_sizes,
@@ -222,6 +227,11 @@ def test_init_itep_module(self) -> None:
             equal_nan=True,
         )
 
+    # pyre-ignore[56]: Pyre was not able to infer the type of argument
+    @unittest.skipIf(
+        torch.cuda.device_count() <= 1,
+        "Not enough GPUs, this test requires at least two GPUs",
+    )
     def test_init_itep_module_without_pruned_table(self) -> None:
         itep_module = GenericITEPModule(
             table_name_to_unpruned_hash_sizes={},
@@ -353,6 +363,11 @@ def test_eval_forward(
         # Check that reset_weight_momentum is not called
         self.assertEqual(mock_reset_weight_momentum.call_count, 0)
 
+    # pyre-ignore[56]: Pyre was not able to infer the type of argument
+    @unittest.skipIf(
+        torch.cuda.device_count() <= 1,
+        "Not enough GPUs, this test requires at least two GPUs",
+    )
     def test_iter_increment_per_forward(self) -> None:
         """Test that the iteration counter increments correctly with each forward pass."""
         itep_module = GenericITEPModule(
diff --git a/torchrec/sparse/jagged_tensor.py b/torchrec/sparse/jagged_tensor.py
@@ -1066,7 +1066,7 @@ def _jt_flatten_spec(t: JaggedTensor, spec: TreeSpec) -> List[Optional[torch.Ten
 def _assert_tensor_has_no_elements_or_has_integers(
     tensor: Optional[torch.Tensor], tensor_name: str
 ) -> None:
-    if is_torchdynamo_compiling() or tensor is None:
+    if torch.compiler.is_compiling() or tensor is None:
         # Skipping the check tensor.numel() == 0 to not guard on pt2 symbolic shapes.
         # TODO(ivankobzarev): Use guard_size_oblivious to pass tensor.numel() == 0 once it is torch scriptable.
         return