Skip to content

Commit e9ba855

Browse files
committed
Revert "Add serialization support for SentencepieceResources."
This reverts commit 52f9004. The correct symbols are not exported from TF; so it will break Windows builds.
1 parent a14c2d5 commit e9ba855

File tree

3 files changed

+1
-59
lines changed

3 files changed

+1
-59
lines changed

tensorflow_text/core/kernels/BUILD

-1
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,6 @@ tf_text_kernel_library(
363363
# absl/strings dep
364364
# absl/types:span dep
365365
"@com_google_sentencepiece//src:sentencepiece_cc_proto",
366-
"@com_google_sentencepiece//src:sentencepiece_model_cc_proto",
367366
"@com_google_sentencepiece//src:sentencepiece_processor",
368367
] + tf_deps(deps = [
369368
# tf:framework tensorflow dep,

tensorflow_text/core/kernels/sentencepiece_kernels.cc

+1-22
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
#include "absl/strings/string_view.h"
2121
#include "absl/synchronization/mutex.h"
2222
#include "absl/types/span.h"
23-
#include "src/sentencepiece_model.pb.h"
2423
#include "src/sentencepiece.pb.h"
2524
#include "src/sentencepiece_processor.h"
2625
#include "tensorflow/core/framework/bounds_check.h"
@@ -33,7 +32,6 @@
3332
#include "tensorflow/core/framework/tensor_types.h"
3433
#include "tensorflow/core/framework/types.h"
3534
#include "tensorflow/core/framework/types.pb.h"
36-
#include "tensorflow/core/graph/graph_def_builder.h"
3735
#include "tensorflow/core/lib/core/errors.h"
3836
#include "tensorflow/core/lib/core/refcount.h"
3937
#include "tensorflow/core/lib/core/status.h"
@@ -56,7 +54,7 @@ struct SentencepieceResource : public ResourceBase {
5654
bool add_bos = false;
5755
bool add_eos = false;
5856
bool reverse = false;
59-
mutable absl::Mutex mu;
57+
absl::Mutex mu;
6058

6159
string DebugString() const override { return "Sentencepiece Resource"; }
6260

@@ -66,25 +64,6 @@ struct SentencepieceResource : public ResourceBase {
6664
return (add_bos == this->add_bos) && (add_eos == this->add_eos) &&
6765
(reverse == this->reverse);
6866
}
69-
70-
Status AsGraphDef(GraphDefBuilder* builder, Node** out) const override {
71-
absl::ReaderMutexLock l(&mu);
72-
// We set use_node_name_sharing with a unique node name so that the resource
73-
// can outlive the kernel. This means that the lifetime of the re-created
74-
// resource will be tied to the lifetime of the resource manager it is
75-
// created in.
76-
static std::atomic<int64> counter(0);
77-
std::string unique_node_name = strings::StrCat(
78-
"SentencepieceResourceFromGraphDef", "/", counter.fetch_add(1));
79-
std::string model = processor.model_proto().SerializeAsString();
80-
*out = ops::SourceOp(
81-
"SentencepieceOp",
82-
builder->opts()
83-
.WithName(unique_node_name)
84-
.WithAttr("model", model)
85-
.WithAttr("use_node_name_sharing", true));
86-
return Status::OK();
87-
}
8867
};
8968

9069
// According to .../tensorflow/core/util/work_sharder.cc, this values determines

tensorflow_text/python/ops/sentencepiece_tokenizer_test.py

-36
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
from tensorflow.python.framework import test_util
3131
from tensorflow.python.lib.io import file_io
3232
from tensorflow.python.module import module
33-
from tensorflow.python.ops import gen_experimental_dataset_ops
3433
from tensorflow.python.ops.ragged import ragged_factory_ops
3534
from tensorflow.python.ops.ragged import ragged_gather_ops
3635
from tensorflow.python.platform import gfile
@@ -500,40 +499,5 @@ def testInvalidModel(self):
500499
result.eval()
501500

502501

503-
# Test that datasets depending on a sentencepiece tokenizer resources can be
504-
# serialized without external references.
505-
# This test is separate from `SentencepieceTokenizerOpTest` below because
506-
# context._reset_context() must be called from outside the context created by
507-
# `@test_util.run_all_in_graph_and_eager_modes`.
508-
class DatasetSerializationTest(test_util.TensorFlowTestCase):
509-
510-
def testSerialization(self):
511-
with context.eager_mode():
512-
sentencepiece_model_file = (
513-
'tensorflow_text/python/ops/test_data/'
514-
'test_oss_model.model')
515-
model = gfile.GFile(sentencepiece_model_file, 'rb').read()
516-
sp = SentencepieceTokenizer(model)
517-
strings = ['hello', 'world']
518-
dataset = dataset_ops.Dataset.from_tensor_slices(strings)
519-
# Ensure we can map the tokenizer across the dataset.
520-
dataset = dataset.map(sp.tokenize)
521-
graph = dataset._as_serialized_graph()
522-
element_spec = dataset.element_spec
523-
dataset_graph_string = graph.numpy()
524-
expected = sp.tokenize(strings)
525-
526-
# Reset the eager context to make sure that the serialized dataset graph
527-
# is self-contained.
528-
context._reset_context()
529-
530-
with context.eager_mode():
531-
restored = dataset_ops.from_variant(
532-
gen_experimental_dataset_ops.dataset_from_graph(dataset_graph_string),
533-
element_spec)
534-
for i, result in enumerate(restored):
535-
self.assertAllEqual(result, expected[i])
536-
537-
538502
if __name__ == '__main__':
539503
test.main()

0 commit comments

Comments
 (0)