Skip to content

Commit cda7246

Browse files
committed
Fix a compilation problem of casting char to int. Also moved the fwp test files with the other test files, and got it properly reading.
PiperOrigin-RevId: 407200636
1 parent f6388a4 commit cda7246

9 files changed

+162
-184
lines changed

tensorflow_text/BUILD

+6-6
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,11 @@ py_library(
111111

112112
# public_names_test
113113

114+
# This is required for other external users to build tests using these models.
115+
exports_files(
116+
glob(["python/ops/test_data/**"]),
117+
)
118+
114119
##########################
115120
# Individual tf.text ops #
116121
##########################
@@ -224,7 +229,7 @@ py_test(
224229
size = "small",
225230
srcs = ["python/ops/fast_wordpiece_tokenizer_test.py"],
226231
data = [
227-
"//tensorflow_text/core/kernels:fast_wordpiece_tokenizer_testdata",
232+
":python/ops/test_data/fast_wordpiece_tokenizer_model.fb",
228233
],
229234
python_version = "PY3",
230235
srcs_version = "PY3",
@@ -748,11 +753,6 @@ py_tf_text_library(
748753
],
749754
)
750755

751-
# This is required for other external users to build tests using this model.
752-
exports_files(
753-
glob(["python/ops/test_data/test_oss_model.*"]),
754-
)
755-
756756
py_test(
757757
name = "sentencepiece_tokenizer_test",
758758
size = "large",

tensorflow_text/core/kernels/BUILD

+3-7
Original file line numberDiff line numberDiff line change
@@ -198,15 +198,12 @@ tf_cc_library(
198198
],
199199
)
200200

201-
filegroup(
202-
name = "fast_wordpiece_tokenizer_testdata",
203-
srcs = glob(["testdata/**"]),
204-
)
205-
206201
cc_test(
207202
name = "fast_wordpiece_tokenizer_test",
208203
srcs = ["fast_wordpiece_tokenizer_test.cc"],
209-
data = [":fast_wordpiece_tokenizer_testdata"],
204+
data = [
205+
"//tensorflow_text:python/ops/test_data/fast_wordpiece_tokenizer_model.fb",
206+
],
210207
deps = [
211208
":fast_wordpiece_tokenizer",
212209
":fast_wordpiece_tokenizer_model_builder",
@@ -240,7 +237,6 @@ tf_cc_library(
240237
"@com_google_absl//absl/status",
241238
"@com_google_absl//absl/status:statusor",
242239
"@com_google_absl//absl/strings",
243-
"//third_party/icu/data:icu_normalization_data",
244240
"@icu//:nfkc",
245241
# lite/kernels/shim:status_macros tensorflow dep,
246242
],

tensorflow_text/core/kernels/darts_clone_trie_wrapper.h

+5-3
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#define THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_DARTS_CLONE_TRIE_WRAPPER_H_
2929

3030
#include <stdint.h>
31+
#include <string.h>
3132

3233
#include "absl/status/statusor.h"
3334

@@ -81,7 +82,7 @@ class DartsCloneTrieWrapper {
8182
// Traverses one step from 'cursor' following 'ch'. If successful (i.e., there
8283
// exists such an edge), moves 'cursor' to the new node and returns true.
8384
// Otherwise, does nothing (i.e., 'cursor' is not changed) and returns false.
84-
bool TryTraverseOneStep(TraversalCursor& cursor, char ch) const {
85+
bool TryTraverseOneStep(TraversalCursor& cursor, unsigned char ch) const {
8586
const uint32_t next_node_id = cursor.node_id ^ offset(cursor.unit) ^ ch;
8687
const uint32_t next_node_unit = trie_array_[next_node_id];
8788
if (label(next_node_unit) != ch) {
@@ -124,9 +125,10 @@ class DartsCloneTrieWrapper {
124125
uint32_t cur_id = cursor.node_id;
125126
uint32_t cur_unit = cursor.unit;
126127
for (; size > 0; --size, ++ptr) {
127-
cur_id ^= offset(cur_unit) ^ *ptr;
128+
const unsigned char ch = static_cast<const unsigned char>(*ptr);
129+
cur_id ^= offset(cur_unit) ^ ch;
128130
cur_unit = trie_array_[cur_id];
129-
if (label(cur_unit) != *ptr) {
131+
if (label(cur_unit) != ch) {
130132
return false;
131133
}
132134
}

tensorflow_text/core/kernels/fast_wordpiece_tokenizer_test.cc

+2-3
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,13 @@ namespace {
2727
using ::testing::ElementsAre;
2828

2929
constexpr char kTestConfigPath[] =
30-
"/google3/third_party/tensorflow_text/core/kernels/testdata/"
30+
"third_party/tensorflow_text/python/ops/test_data/"
3131
"fast_wordpiece_tokenizer_model.fb";
3232

3333
TEST(FastWordpieceTokenizerTest, LoadAndTokenize) {
3434
std::string config_flatbuffer;
3535
auto status = tensorflow::ReadFileToString(
36-
tensorflow::Env::Default(),
37-
absl::GetFlag(FLAGS_test_srcdir) + kTestConfigPath, &config_flatbuffer);
36+
tensorflow::Env::Default(), kTestConfigPath, &config_flatbuffer);
3837
ASSERT_TRUE(status.ok());
3938

4039
// The config_flatbuffer used here is built from the following config:

tensorflow_text/core/pybinds/BUILD

+1-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ py_test(
5454
name = "pywrap_fast_wordpiece_tokenizer_model_builder_test",
5555
srcs = ["pywrap_fast_wordpiece_tokenizer_model_builder_test.py"],
5656
data = [
57-
"//tensorflow_text/core/kernels:fast_wordpiece_tokenizer_testdata",
57+
"//tensorflow_text:python/ops/test_data/fast_wordpiece_tokenizer_model.fb",
5858
],
5959
python_version = "PY3",
6060
deps = [

tensorflow_text/core/pybinds/pywrap_fast_wordpiece_tokenizer_model_builder_test.py

+2-9
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,12 @@
1919
from __future__ import division
2020
from __future__ import print_function
2121

22-
import os
23-
from absl import flags
24-
2522
from tensorflow.python.framework import test_util
2623
from tensorflow.python.platform import gfile
2724
from tensorflow.python.platform import test
2825
from tensorflow_text.core.pybinds import pywrap_fast_wordpiece_tokenizer_model_builder
2926

30-
FLAGS = flags.FLAGS
31-
32-
EXPECTED_MODEL_BUFFER_PATH = "google3/third_party/tensorflow_text/core/kernels/testdata/fast_wordpiece_tokenizer_model.fb"
27+
EXPECTED_MODEL_BUFFER_PATH = "third_party/tensorflow_text/python/ops/test_data/fast_wordpiece_tokenizer_model.fb"
3328

3429

3530
class PywrapFastWordpieceBuilderTest(test_util.TensorFlowTestCase):
@@ -42,9 +37,7 @@ def test_build(self):
4237
max_bytes_per_token = 100
4338
suffix_indicator = "##"
4439
unk_token = "<unk>"
45-
expected_model_buffer = gfile.GFile(
46-
os.path.join(FLAGS.test_srcdir, EXPECTED_MODEL_BUFFER_PATH),
47-
"rb").read()
40+
expected_model_buffer = gfile.GFile(EXPECTED_MODEL_BUFFER_PATH, "rb").read()
4841
self.assertEqual(
4942
pywrap_fast_wordpiece_tokenizer_model_builder
5043
.build_fast_wordpiece_model(

0 commit comments

Comments
 (0)