Revert "Add serialization support for SentencepieceResources."

broken · broken · commit e9ba8554b99e · 2021-07-01T12:48:15.000-07:00
This reverts commit 52f9004. The correct symbols are not exported from TF; so it will break Windows builds.
diff --git a/tensorflow_text/core/kernels/BUILD b/tensorflow_text/core/kernels/BUILD
@@ -363,7 +363,6 @@ tf_text_kernel_library(
         # absl/strings dep
         # absl/types:span dep
         "@com_google_sentencepiece//src:sentencepiece_cc_proto",
-        "@com_google_sentencepiece//src:sentencepiece_model_cc_proto",
         "@com_google_sentencepiece//src:sentencepiece_processor",
     ] + tf_deps(deps = [
         # tf:framework tensorflow dep,
diff --git a/tensorflow_text/core/kernels/sentencepiece_kernels.cc b/tensorflow_text/core/kernels/sentencepiece_kernels.cc
@@ -20,7 +20,6 @@
 #include "absl/strings/string_view.h"
 #include "absl/synchronization/mutex.h"
 #include "absl/types/span.h"
-#include "src/sentencepiece_model.pb.h"
 #include "src/sentencepiece.pb.h"
 #include "src/sentencepiece_processor.h"
 #include "tensorflow/core/framework/bounds_check.h"
@@ -33,7 +32,6 @@
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/framework/types.pb.h"
-#include "tensorflow/core/graph/graph_def_builder.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/refcount.h"
 #include "tensorflow/core/lib/core/status.h"
@@ -56,7 +54,7 @@ struct SentencepieceResource : public ResourceBase {
   bool add_bos = false;
   bool add_eos = false;
   bool reverse = false;
-  mutable absl::Mutex mu;
+  absl::Mutex mu;
 
   string DebugString() const override { return "Sentencepiece Resource"; }
 
@@ -66,25 +64,6 @@ struct SentencepieceResource : public ResourceBase {
     return (add_bos == this->add_bos) && (add_eos == this->add_eos) &&
            (reverse == this->reverse);
   }
-
-  Status AsGraphDef(GraphDefBuilder* builder, Node** out) const override {
-    absl::ReaderMutexLock l(&mu);
-    // We set use_node_name_sharing with a unique node name so that the resource
-    // can outlive the kernel. This means that the lifetime of the re-created
-    // resource will be tied to the lifetime of the resource manager it is
-    // created in.
-    static std::atomic<int64> counter(0);
-    std::string unique_node_name = strings::StrCat(
-        "SentencepieceResourceFromGraphDef", "/", counter.fetch_add(1));
-    std::string model = processor.model_proto().SerializeAsString();
-    *out = ops::SourceOp(
-        "SentencepieceOp",
-        builder->opts()
-            .WithName(unique_node_name)
-            .WithAttr("model", model)
-            .WithAttr("use_node_name_sharing", true));
-    return Status::OK();
-  }
 };
 
 // According to .../tensorflow/core/util/work_sharder.cc, this values determines
diff --git a/tensorflow_text/python/ops/sentencepiece_tokenizer_test.py b/tensorflow_text/python/ops/sentencepiece_tokenizer_test.py
@@ -30,7 +30,6 @@
 from tensorflow.python.framework import test_util
 from tensorflow.python.lib.io import file_io
 from tensorflow.python.module import module
-from tensorflow.python.ops import gen_experimental_dataset_ops
 from tensorflow.python.ops.ragged import ragged_factory_ops
 from tensorflow.python.ops.ragged import ragged_gather_ops
 from tensorflow.python.platform import gfile
@@ -500,40 +499,5 @@ def testInvalidModel(self):
         result.eval()
 
 
-# Test that datasets depending on a sentencepiece tokenizer resources can be
-# serialized without external references.
-# This test is separate from `SentencepieceTokenizerOpTest` below because
-# context._reset_context() must be called from outside the context created by
-# `@test_util.run_all_in_graph_and_eager_modes`.
-class DatasetSerializationTest(test_util.TensorFlowTestCase):
-
-  def testSerialization(self):
-    with context.eager_mode():
-      sentencepiece_model_file = (
-          'tensorflow_text/python/ops/test_data/'
-          'test_oss_model.model')
-      model = gfile.GFile(sentencepiece_model_file, 'rb').read()
-      sp = SentencepieceTokenizer(model)
-      strings = ['hello', 'world']
-      dataset = dataset_ops.Dataset.from_tensor_slices(strings)
-      # Ensure we can map the tokenizer across the dataset.
-      dataset = dataset.map(sp.tokenize)
-      graph = dataset._as_serialized_graph()
-      element_spec = dataset.element_spec
-      dataset_graph_string = graph.numpy()
-      expected = sp.tokenize(strings)
-
-    # Reset the eager context to make sure that the serialized dataset graph
-    # is self-contained.
-    context._reset_context()
-
-    with context.eager_mode():
-      restored = dataset_ops.from_variant(
-          gen_experimental_dataset_ops.dataset_from_graph(dataset_graph_string),
-          element_spec)
-      for i, result in enumerate(restored):
-        self.assertAllEqual(result, expected[i])
-
-
 if __name__ == '__main__':
   test.main()