Fix TF trainer bug when first input is None (#21630)

neo-alex · web-flow · commit 11da67dfa954 · 2025-09-09T18:48:35.000-07:00
* Fix TF trainer bug when first flatten input is None

* Test the fix by adapting 2 unit tests

* Same fix for jax &amp; torch
diff --git a/keras/src/backend/jax/trainer.py b/keras/src/backend/jax/trainer.py
@@ -105,7 +105,10 @@ def _update_metrics_variables(
             ]
         ) as scope:
             self._loss_tracker.update_state(
-                unscaled_loss, sample_weight=tree.flatten(x)[0].shape[0]
+                unscaled_loss,
+                sample_weight=next(
+                    i for i in tree.flatten(x) if i is not None
+                ).shape[0],
             )
             logs = self.compute_metrics(x, y, y_pred, sample_weight)
 
diff --git a/keras/src/backend/tensorflow/trainer.py b/keras/src/backend/tensorflow/trainer.py
@@ -68,7 +68,9 @@ def train_step(self, data):
             )
             self._loss_tracker.update_state(
                 loss_module.unscale_loss_for_distribution(loss),
-                sample_weight=tf.shape(tree.flatten(x)[0])[0],
+                sample_weight=tf.shape(
+                    next(i for i in tree.flatten(x) if i is not None)
+                )[0],
             )
             if self.optimizer is not None:
                 loss = self.optimizer.scale_loss(loss)
@@ -96,7 +98,9 @@ def test_step(self, data):
         )
         self._loss_tracker.update_state(
             loss_module.unscale_loss_for_distribution(loss),
-            sample_weight=tf.shape(tree.flatten(x)[0])[0],
+            sample_weight=tf.shape(
+                next(i for i in tree.flatten(x) if i is not None)
+            )[0],
         )
         return self.compute_metrics(x, y, y_pred, sample_weight=sample_weight)
 
diff --git a/keras/src/backend/torch/trainer.py b/keras/src/backend/torch/trainer.py
@@ -54,7 +54,10 @@ def train_step(self, data):
             x=x, y=y, y_pred=y_pred, sample_weight=sample_weight, training=True
         )
         self._loss_tracker.update_state(
-            loss, sample_weight=tree.flatten(x)[0].shape[0]
+            loss,
+            sample_weight=next(
+                i for i in tree.flatten(x) if i is not None
+            ).shape[0],
         )
         if self.optimizer is not None:
             loss = self.optimizer.scale_loss(loss)
@@ -90,7 +93,10 @@ def test_step(self, data):
             x=x, y=y, y_pred=y_pred, sample_weight=sample_weight, training=False
         )
         self._loss_tracker.update_state(
-            loss, sample_weight=tree.flatten(x)[0].shape[0]
+            loss,
+            sample_weight=next(
+                i for i in tree.flatten(x) if i is not None
+            ).shape[0],
         )
         return self.compute_metrics(x, y, y_pred, sample_weight=sample_weight)
 
diff --git a/keras/src/models/model_test.py b/keras/src/models/model_test.py
@@ -163,14 +163,14 @@ def __init__(self):
             super().__init__()
             self.dense = layers.Dense(2)
 
-        def call(self, a, b=None):
-            x = a if b is None else a + b
-            return self.dense(x)
-
-    x1 = Input((2,), name="x1")
-    x2 = Input((2,), name="x2", optional=True)
-    y = OptionalInputLayer()(x1, x2)
-    model = Model({"x1": x1, "x2": x2}, y)
+        def call(self, x, o=None):
+            z = x if o is None else x + o
+            return self.dense(z)
+
+    x = Input((2,), name="x")
+    o = Input((2,), name="o", optional=True)
+    y = OptionalInputLayer()(x, o)
+    model = Model({"x": x, "o": o}, y)
     return model
 
 
@@ -1241,27 +1241,27 @@ def test_functional_deeply_nested_outputs_struct_losses(self):
     )
     def test_functional_optional_inputs(self, is_optional_none):
         model = _get_model_optional_inputs()
-        x1 = np.ones((2, 2))
-        x2 = None if is_optional_none else np.ones((2, 2))
+        x = np.ones((2, 2))
+        o = None if is_optional_none else np.ones((2, 2))
         y_true = np.ones((2, 2))
 
         model.compile(loss="mse", optimizer="adam")
-        model.fit(x={"x1": x1, "x2": x2}, y=y_true)
-        model.evaluate(x={"x1": x1, "x2": x2}, y=y_true)
-        model.predict(x={"x1": x1, "x2": x2})
+        model.fit(x={"x": x, "o": o}, y=y_true)
+        model.evaluate(x={"x": x, "o": o}, y=y_true)
+        model.predict(x={"x": x, "o": o})
 
     @parameterized.named_parameters(
         ("optional_none", True), ("optional_tensor", False)
     )
     def test_functional_optional_inputs_generator(self, is_optional_none):
         model = _get_model_optional_inputs()
-        x1 = np.ones((2, 2))
-        x2 = None if is_optional_none else np.ones((2, 2))
+        x = np.ones((2, 2))
+        o = None if is_optional_none else np.ones((2, 2))
         y_true = np.ones((2, 2))
 
         def data_generator(with_y=True):
             for _ in range(4):
-                yield ({"x1": x1, "x2": x2},) + ((y_true,) if with_y else ())
+                yield ({"x": x, "o": o},) + ((y_true,) if with_y else ())
 
         model.compile(loss="mse", optimizer="adam")
         model.fit(data_generator())

Original file line number	Diff line number	Diff line change
`@@ -105,7 +105,10 @@ def _update_metrics_variables(`
`105`	`105`	`]`
`106`	`106`	`) as scope:`
`107`	`107`	`self._loss_tracker.update_state(`
`108`		`- unscaled_loss, sample_weight=tree.flatten(x)[0].shape[0]`
	`108`	`+ unscaled_loss,`
	`109`	`+ sample_weight=next(`
	`110`	`+ i for i in tree.flatten(x) if i is not None`
	`111`	`+ ).shape[0],`
`109`	`112`	`)`
`110`	`113`	`logs = self.compute_metrics(x, y, y_pred, sample_weight)`
`111`	`114`
Original file line number	Diff line number	Diff line change
`@@ -68,7 +68,9 @@ def train_step(self, data):`
`68`	`68`	`)`
`69`	`69`	`self._loss_tracker.update_state(`
`70`	`70`	`loss_module.unscale_loss_for_distribution(loss),`
`71`		`- sample_weight=tf.shape(tree.flatten(x)[0])[0],`
	`71`	`+ sample_weight=tf.shape(`
	`72`	`+ next(i for i in tree.flatten(x) if i is not None)`
	`73`	`+ )[0],`
`72`	`74`	`)`
`73`	`75`	`if self.optimizer is not None:`
`74`	`76`	`loss = self.optimizer.scale_loss(loss)`
`@@ -96,7 +98,9 @@ def test_step(self, data):`
`96`	`98`	`)`
`97`	`99`	`self._loss_tracker.update_state(`
`98`	`100`	`loss_module.unscale_loss_for_distribution(loss),`
`99`		`- sample_weight=tf.shape(tree.flatten(x)[0])[0],`
	`101`	`+ sample_weight=tf.shape(`
	`102`	`+ next(i for i in tree.flatten(x) if i is not None)`
	`103`	`+ )[0],`
`100`	`104`	`)`
`101`	`105`	`return self.compute_metrics(x, y, y_pred, sample_weight=sample_weight)`
`102`	`106`
Original file line number	Diff line number	Diff line change
`@@ -54,7 +54,10 @@ def train_step(self, data):`
`54`	`54`	`x=x, y=y, y_pred=y_pred, sample_weight=sample_weight, training=True`
`55`	`55`	`)`
`56`	`56`	`self._loss_tracker.update_state(`
`57`		`- loss, sample_weight=tree.flatten(x)[0].shape[0]`
	`57`	`+ loss,`
	`58`	`+ sample_weight=next(`
	`59`	`+ i for i in tree.flatten(x) if i is not None`
	`60`	`+ ).shape[0],`
`58`	`61`	`)`
`59`	`62`	`if self.optimizer is not None:`
`60`	`63`	`loss = self.optimizer.scale_loss(loss)`
`@@ -90,7 +93,10 @@ def test_step(self, data):`
`90`	`93`	`x=x, y=y, y_pred=y_pred, sample_weight=sample_weight, training=False`
`91`	`94`	`)`
`92`	`95`	`self._loss_tracker.update_state(`
`93`		`- loss, sample_weight=tree.flatten(x)[0].shape[0]`
	`96`	`+ loss,`
	`97`	`+ sample_weight=next(`
	`98`	`+ i for i in tree.flatten(x) if i is not None`
	`99`	`+ ).shape[0],`
`94`	`100`	`)`
`95`	`101`	`return self.compute_metrics(x, y, y_pred, sample_weight=sample_weight)`
`96`	`102`