Remove unused TPU testing for backbones (#1266)

mattdangerw · mattdangerw · commit ba4525649171 · 2023-11-07T14:32:34.000-08:00
TPU testing would be great, but we should not do it quite like this.

- These are not run in any form of CI right now. Do they run at all?
  I don't think anyone has tried these for many months.
- They basically replicate the simplest tests from the main fixture
  inside a TPU strategy scope. We would be much better off handling
  this inside some common test setup, rather than replicating test
  code.
- They would not work multi-backend.
- They do not test presets, tasks, backprop, and a lot of the
  actual important functionality for our pretrained model offering.

Let's remove for now as I consolidate our preset testing code. We
can bring this back later when we have a plan to run these in a more
sustainable way for our test suite.
diff --git a/keras_nlp/conftest.py b/keras_nlp/conftest.py
@@ -12,30 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
-
 import pytest
 import tensorflow as tf
 
 from keras_nlp.backend import config as backend_config
 from keras_nlp.backend import keras
 
 
-@pytest.fixture(scope="session")
-def tpu_strategy():
-    tpu_name = os.getenv("KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS")
-    resolver = tf.distribute.cluster_resolver.TPUClusterResolver.connect(
-        tpu=tpu_name,
-    )
-    return tf.distribute.TPUStrategy(resolver)
-
-
-@pytest.fixture(scope="class")
-def tpu_test_class(request, tpu_strategy):
-    # set a class attribute on the invoking test context
-    request.cls.tpu_strategy = tpu_strategy
-
-
 def pytest_addoption(parser):
     parser.addoption(
         "--run_large",
@@ -49,18 +32,6 @@ def pytest_addoption(parser):
         default=False,
         help="run extra_large tests",
     )
-    parser.addoption(
-        "--run_tpu",
-        action="store_true",
-        default=False,
-        help="run tpu tests",
-    )
-    parser.addoption(
-        "--mixed_precision",
-        action="store_true",
-        default=False,
-        help="run with mixed precision",
-    )
     parser.addoption(
         "--docstring_module",
         action="store",
@@ -70,18 +41,13 @@ def pytest_addoption(parser):
 
 
 def pytest_configure(config):
-    if config.getoption("--mixed_precision"):
-        keras.mixed_precision.set_global_policy("mixed_float16")
-    config.addinivalue_line(
-        "markers", "large: mark test as being slow or requiring a network"
-    )
     config.addinivalue_line(
         "markers",
-        "extra_large: mark test as being too large to run continuously",
+        "large: mark test as being slow or requiring a network",
     )
     config.addinivalue_line(
         "markers",
-        "tpu: mark test as tpu test",
+        "extra_large: mark test as being too large to run continuously",
     )
     config.addinivalue_line(
         "markers",
@@ -93,7 +59,6 @@ def pytest_collection_modifyitems(config, items):
     run_extra_large_tests = config.getoption("--run_extra_large")
     # Run large tests for --run_extra_large or --run_large.
     run_large_tests = config.getoption("--run_large") or run_extra_large_tests
-    run_tpu = config.getoption("--run_tpu")
 
     # Messages to annotate skipped tests with.
     skip_large = pytest.mark.skipif(
@@ -104,10 +69,6 @@ def pytest_collection_modifyitems(config, items):
         not run_extra_large_tests,
         reason="need --run_extra_large option to run",
     )
-    skip_tpu = pytest.mark.skipif(
-        not run_tpu,
-        reason="need --run_tpu option to run",
-    )
     skip_tf_only = pytest.mark.skipif(
         not backend_config.backend() == "tensorflow",
         reason="tests only run on tf backend",
@@ -117,8 +78,6 @@ def pytest_collection_modifyitems(config, items):
             item.add_marker(skip_large)
         if "extra_large" in item.keywords:
             item.add_marker(skip_extra_large)
-        if "tpu" in item.keywords:
-            item.add_marker(skip_tpu)
         if "tf_only" in item.keywords:
             item.add_marker(skip_tf_only)
 
diff --git a/keras_nlp/models/albert/albert_backbone_test.py b/keras_nlp/models/albert/albert_backbone_test.py
@@ -101,34 +101,3 @@ def test_saved_model(self):
         self.assertAllClose(
             model_output["pooled_output"], restored_output["pooled_output"]
         )
-
-
-@pytest.mark.tpu
-@pytest.mark.usefixtures("tpu_test_class")
-class AlbertBackboneTPUTest(TestCase):
-    def setUp(self):
-        with self.tpu_strategy.scope():
-            self.backbone = AlbertBackbone(
-                vocabulary_size=10,
-                num_layers=2,
-                num_heads=2,
-                num_groups=1,
-                num_inner_repetitions=1,
-                embedding_dim=16,
-                hidden_dim=2,
-                intermediate_dim=2,
-                max_sequence_length=4,
-            )
-
-        self.input_batch = {
-            "token_ids": np.ones((8, 128), dtype="int32"),
-            "segment_ids": np.ones((8, 128), dtype="int32"),
-            "padding_mask": np.ones((8, 128), dtype="int32"),
-        }
-        self.input_dataset = tf.data.Dataset.from_tensor_slices(
-            self.input_batch
-        ).batch(2)
-
-    def test_predict(self):
-        self.backbone.compile()
-        self.backbone.predict(self.input_dataset)
diff --git a/keras_nlp/models/bart/bart_backbone_test.py b/keras_nlp/models/bart/bart_backbone_test.py
@@ -91,31 +91,3 @@ def test_saved_model(self):
             model_output["decoder_sequence_output"],
             restored_output["decoder_sequence_output"],
         )
-
-
-@pytest.mark.tpu
-@pytest.mark.usefixtures("tpu_test_class")
-class BartBackboneTPUTest(TestCase):
-    def setUp(self):
-        with self.tpu_strategy.scope():
-            self.backbone = BartBackbone(
-                vocabulary_size=1000,
-                num_layers=2,
-                num_heads=2,
-                hidden_dim=64,
-                intermediate_dim=128,
-                max_sequence_length=128,
-            )
-        self.input_batch = {
-            "encoder_token_ids": np.ones((8, 128), dtype="int32"),
-            "encoder_padding_mask": np.ones((8, 128), dtype="int32"),
-            "decoder_token_ids": np.ones((8, 128), dtype="int32"),
-            "decoder_padding_mask": np.ones((8, 128), dtype="int32"),
-        }
-        self.input_dataset = tf.data.Dataset.from_tensor_slices(
-            self.input_batch
-        ).batch(2)
-
-    def test_predict(self):
-        self.backbone.compile()
-        self.backbone.predict(self.input_dataset)
diff --git a/keras_nlp/models/bert/bert_backbone_test.py b/keras_nlp/models/bert/bert_backbone_test.py
@@ -85,30 +85,3 @@ def test_saved_model(self):
         # Check that output matches.
         restored_output = restored_model(self.input_batch)
         self.assertAllClose(model_output, restored_output)
-
-
-@pytest.mark.tpu
-@pytest.mark.usefixtures("tpu_test_class")
-class BertBackboneTPUTest(TestCase):
-    def setUp(self):
-        with self.tpu_strategy.scope():
-            self.backbone = BertBackbone(
-                vocabulary_size=1000,
-                num_layers=2,
-                num_heads=2,
-                hidden_dim=64,
-                intermediate_dim=128,
-                max_sequence_length=128,
-            )
-        self.input_batch = {
-            "token_ids": np.ones((8, 128), dtype="int32"),
-            "segment_ids": np.ones((8, 128), dtype="int32"),
-            "padding_mask": np.ones((8, 128), dtype="int32"),
-        }
-        self.input_dataset = tf.data.Dataset.from_tensor_slices(
-            self.input_batch
-        ).batch(2)
-
-    def test_predict(self):
-        self.backbone.compile()
-        self.backbone.predict(self.input_dataset)
diff --git a/keras_nlp/models/deberta_v3/deberta_v3_backbone_test.py b/keras_nlp/models/deberta_v3/deberta_v3_backbone_test.py
@@ -90,30 +90,3 @@ def test_saved_model(self):
         # Check that output matches.
         restored_output = restored_model(self.input_batch)
         self.assertAllClose(model_output, restored_output)
-
-
-@pytest.mark.tpu
-@pytest.mark.usefixtures("tpu_test_class")
-class DebertaV3BackboneTPUTest(TestCase):
-    def setUp(self):
-        with self.tpu_strategy.scope():
-            self.backbone = DebertaV3Backbone(
-                vocabulary_size=10,
-                num_layers=2,
-                num_heads=2,
-                hidden_dim=2,
-                intermediate_dim=4,
-                max_sequence_length=5,
-                bucket_size=2,
-            )
-        self.input_batch = {
-            "token_ids": np.ones((2, 5), dtype="int32"),
-            "padding_mask": np.ones((2, 5), dtype="int32"),
-        }
-        self.input_dataset = tf.data.Dataset.from_tensor_slices(
-            self.input_batch
-        ).batch(2)
-
-    def test_predict(self):
-        self.backbone.compile()
-        self.backbone.predict(self.input_dataset)
diff --git a/keras_nlp/models/distil_bert/distil_bert_backbone_test.py b/keras_nlp/models/distil_bert/distil_bert_backbone_test.py
@@ -83,29 +83,3 @@ def test_saved_model(self):
         # Check that output matches.
         restored_output = restored_model(self.input_batch)
         self.assertAllClose(model_output, restored_output)
-
-
-@pytest.mark.tpu
-@pytest.mark.usefixtures("tpu_test_class")
-class DistilBertTPUTest(TestCase):
-    def setUp(self):
-        with self.tpu_strategy.scope():
-            self.backbone = DistilBertBackbone(
-                vocabulary_size=1000,
-                num_layers=2,
-                num_heads=2,
-                hidden_dim=64,
-                intermediate_dim=128,
-                max_sequence_length=128,
-            )
-        self.input_batch = {
-            "token_ids": np.ones((8, 128), dtype="int32"),
-            "padding_mask": np.ones((8, 128), dtype="int32"),
-        }
-        self.input_dataset = tf.data.Dataset.from_tensor_slices(
-            self.input_batch
-        ).batch(2)
-
-    def test_predict(self):
-        self.backbone.compile()
-        self.backbone.predict(self.input_dataset)
diff --git a/keras_nlp/models/f_net/f_net_backbone_test.py b/keras_nlp/models/f_net/f_net_backbone_test.py
@@ -81,29 +81,3 @@ def test_saved_model(self):
         self.assertAllClose(
             model_output["pooled_output"], restored_output["pooled_output"]
         )
-
-
-@pytest.mark.tpu
-@pytest.mark.usefixtures("tpu_test_class")
-class FNetBackboneTPUTest(TestCase):
-    def setUp(self):
-        with self.tpu_strategy.scope():
-            self.backbone = FNetBackbone(
-                vocabulary_size=100,
-                num_layers=2,
-                hidden_dim=16,
-                intermediate_dim=32,
-                max_sequence_length=128,
-                num_segments=4,
-            )
-        self.input_batch = {
-            "token_ids": np.ones((8, 128), dtype="int32"),
-            "segment_ids": np.ones((8, 128), dtype="int32"),
-        }
-        self.input_dataset = tf.data.Dataset.from_tensor_slices(
-            self.input_batch
-        ).batch(2)
-
-    def test_predict(self):
-        self.backbone.compile()
-        self.backbone.predict(self.input_dataset)
diff --git a/keras_nlp/models/gpt2/gpt2_backbone_test.py b/keras_nlp/models/gpt2/gpt2_backbone_test.py
@@ -101,29 +101,3 @@ def test_create_layout_map(self):
         # bridge elsewhere and must disable. See
         # https://github.com/keras-team/keras-nlp/issues/1001
         tf.config.experimental.disable_mlir_bridge()
-
-
-@pytest.mark.tpu
-@pytest.mark.usefixtures("tpu_test_class")
-class GPT2BackboneTPUTest(TestCase):
-    def setUp(self):
-        with self.tpu_strategy.scope():
-            self.model = GPT2Backbone(
-                vocabulary_size=10,
-                num_layers=2,
-                num_heads=2,
-                hidden_dim=2,
-                intermediate_dim=4,
-                max_sequence_length=5,
-            )
-        self.input_batch = {
-            "token_ids": np.ones((2, 5), dtype="int32"),
-            "padding_mask": np.ones((2, 5), dtype="int32"),
-        }
-        self.input_dataset = tf.data.Dataset.from_tensor_slices(
-            self.input_batch
-        ).batch(2)
-
-    def test_predict(self):
-        self.model.compile()
-        self.model.predict(self.input_dataset)
diff --git a/keras_nlp/models/gpt_neo_x/gpt_neo_x_backbone_test.py b/keras_nlp/models/gpt_neo_x/gpt_neo_x_backbone_test.py
@@ -83,29 +83,3 @@ def test_saved_model(self):
         # Check that output matches.
         restored_output = restored_model(self.input_batch)
         self.assertAllClose(model_output, restored_output)
-
-
-@pytest.mark.tpu
-@pytest.mark.usefixtures("tpu_test_class")
-class GPTNeoXBackboneTPUTest(TestCase):
-    def setUp(self):
-        with self.tpu_strategy.scope():
-            GPTNeoXBackbone(
-                vocabulary_size=10,
-                num_layers=4,
-                num_heads=4,
-                hidden_dim=64,
-                intermediate_dim=64,
-                max_sequence_length=10,
-            )
-        self.input_batch = {
-            "token_ids": np.ones((2, 5), dtype="int32"),
-            "padding_mask": np.ones((2, 5), dtype="int32"),
-        }
-        self.input_dataset = tf.data.Dataset.from_tensor_slices(
-            self.input_batch
-        ).batch(2)
-
-    def test_predict(self):
-        self.model.compile()
-        self.model.predict(self.input_dataset)
diff --git a/keras_nlp/models/opt/opt_backbone_test.py b/keras_nlp/models/opt/opt_backbone_test.py
@@ -101,29 +101,3 @@ def test_create_layout_map(self):
         # bridge elsewhere and must disable. See
         # https://github.com/keras-team/keras-nlp/issues/1001
         tf.config.experimental.disable_mlir_bridge()
-
-
-@pytest.mark.tpu
-@pytest.mark.usefixtures("tpu_test_class")
-class OPTBackboneTPUTest(TestCase):
-    def setUp(self):
-        with self.tpu_strategy.scope():
-            self.backbone = OPTBackbone(
-                vocabulary_size=1000,
-                num_layers=2,
-                num_heads=2,
-                hidden_dim=32,
-                intermediate_dim=128,
-                max_sequence_length=128,
-            )
-        self.input_batch = {
-            "token_ids": np.ones((8, 128), dtype="int32"),
-            "padding_mask": np.ones((8, 128), dtype="int32"),
-        }
-        self.input_dataset = tf.data.Dataset.from_tensor_slices(
-            self.input_batch
-        ).batch(2)
-
-    def test_predict(self):
-        self.backbone.compile()
-        self.backbone.predict(self.input_dataset)
diff --git a/keras_nlp/models/roberta/roberta_backbone_test.py b/keras_nlp/models/roberta/roberta_backbone_test.py
diff --git a/keras_nlp/models/t5/t5_backbone_test.py b/keras_nlp/models/t5/t5_backbone_test.py
diff --git a/keras_nlp/models/whisper/whisper_backbone_test.py b/keras_nlp/models/whisper/whisper_backbone_test.py
diff --git a/keras_nlp/models/xlm_roberta/xlm_roberta_backbone_test.py b/keras_nlp/models/xlm_roberta/xlm_roberta_backbone_test.py
diff --git a/keras_nlp/models/xlnet/xlnet_backbone_test.py b/keras_nlp/models/xlnet/xlnet_backbone_test.py