Fix a compilation problem of casting char to int. Also moved the fwp test files with the other test files, and got it properly reading.

broken · broken · commit cda72469fbbc · 2021-11-04T14:58:16.000-07:00
PiperOrigin-RevId: 407200636
diff --git a/tensorflow_text/BUILD b/tensorflow_text/BUILD
@@ -111,6 +111,11 @@ py_library(
 
 # public_names_test
 
+# This is required for other external users to build tests using these models.
+exports_files(
+    glob(["python/ops/test_data/**"]),
+)
+
 ##########################
 # Individual tf.text ops #
 ##########################
@@ -224,7 +229,7 @@ py_test(
     size = "small",
     srcs = ["python/ops/fast_wordpiece_tokenizer_test.py"],
     data = [
-        "//tensorflow_text/core/kernels:fast_wordpiece_tokenizer_testdata",
+        ":python/ops/test_data/fast_wordpiece_tokenizer_model.fb",
     ],
     python_version = "PY3",
     srcs_version = "PY3",
@@ -748,11 +753,6 @@ py_tf_text_library(
     ],
 )
 
-# This is required for other external users to build tests using this model.
-exports_files(
-    glob(["python/ops/test_data/test_oss_model.*"]),
-)
-
 py_test(
     name = "sentencepiece_tokenizer_test",
     size = "large",
diff --git a/tensorflow_text/core/kernels/BUILD b/tensorflow_text/core/kernels/BUILD
@@ -198,15 +198,12 @@ tf_cc_library(
     ],
 )
 
-filegroup(
-    name = "fast_wordpiece_tokenizer_testdata",
-    srcs = glob(["testdata/**"]),
-)
-
 cc_test(
     name = "fast_wordpiece_tokenizer_test",
     srcs = ["fast_wordpiece_tokenizer_test.cc"],
-    data = [":fast_wordpiece_tokenizer_testdata"],
+    data = [
+        "//tensorflow_text:python/ops/test_data/fast_wordpiece_tokenizer_model.fb",
+    ],
     deps = [
         ":fast_wordpiece_tokenizer",
         ":fast_wordpiece_tokenizer_model_builder",
@@ -240,7 +237,6 @@ tf_cc_library(
         "@com_google_absl//absl/status",
         "@com_google_absl//absl/status:statusor",
         "@com_google_absl//absl/strings",
-        "//third_party/icu/data:icu_normalization_data",
         "@icu//:nfkc",
         # lite/kernels/shim:status_macros tensorflow dep,
     ],
diff --git a/tensorflow_text/core/kernels/darts_clone_trie_wrapper.h b/tensorflow_text/core/kernels/darts_clone_trie_wrapper.h
@@ -28,6 +28,7 @@
 #define THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_DARTS_CLONE_TRIE_WRAPPER_H_
 
 #include <stdint.h>
+#include <string.h>
 
 #include "absl/status/statusor.h"
 
@@ -81,7 +82,7 @@ class DartsCloneTrieWrapper {
   // Traverses one step from 'cursor' following 'ch'. If successful (i.e., there
   // exists such an edge), moves 'cursor' to the new node and returns true.
   // Otherwise, does nothing (i.e., 'cursor' is not changed) and returns false.
-  bool TryTraverseOneStep(TraversalCursor& cursor, char ch) const {
+  bool TryTraverseOneStep(TraversalCursor& cursor, unsigned char ch) const {
     const uint32_t next_node_id = cursor.node_id ^ offset(cursor.unit) ^ ch;
     const uint32_t next_node_unit = trie_array_[next_node_id];
     if (label(next_node_unit) != ch) {
@@ -124,9 +125,10 @@ class DartsCloneTrieWrapper {
     uint32_t cur_id = cursor.node_id;
     uint32_t cur_unit = cursor.unit;
     for (; size > 0; --size, ++ptr) {
-      cur_id ^= offset(cur_unit) ^ *ptr;
+      const unsigned char ch = static_cast<const unsigned char>(*ptr);
+      cur_id ^= offset(cur_unit) ^ ch;
       cur_unit = trie_array_[cur_id];
-      if (label(cur_unit) != *ptr) {
+      if (label(cur_unit) != ch) {
         return false;
       }
     }
diff --git a/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_test.cc b/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_test.cc
@@ -27,14 +27,13 @@ namespace {
 using ::testing::ElementsAre;
 
 constexpr char kTestConfigPath[] =
-    "/google3/third_party/tensorflow_text/core/kernels/testdata/"
+    "third_party/tensorflow_text/python/ops/test_data/"
     "fast_wordpiece_tokenizer_model.fb";
 
 TEST(FastWordpieceTokenizerTest, LoadAndTokenize) {
   std::string config_flatbuffer;
   auto status = tensorflow::ReadFileToString(
-      tensorflow::Env::Default(),
-      absl::GetFlag(FLAGS_test_srcdir) + kTestConfigPath, &config_flatbuffer);
+      tensorflow::Env::Default(), kTestConfigPath, &config_flatbuffer);
   ASSERT_TRUE(status.ok());
 
   // The config_flatbuffer used here is built from the following config:
diff --git a/tensorflow_text/core/pybinds/BUILD b/tensorflow_text/core/pybinds/BUILD
@@ -54,7 +54,7 @@ py_test(
     name = "pywrap_fast_wordpiece_tokenizer_model_builder_test",
     srcs = ["pywrap_fast_wordpiece_tokenizer_model_builder_test.py"],
     data = [
-        "//tensorflow_text/core/kernels:fast_wordpiece_tokenizer_testdata",
+        "//tensorflow_text:python/ops/test_data/fast_wordpiece_tokenizer_model.fb",
     ],
     python_version = "PY3",
     deps = [
diff --git a/tensorflow_text/core/pybinds/pywrap_fast_wordpiece_tokenizer_model_builder_test.py b/tensorflow_text/core/pybinds/pywrap_fast_wordpiece_tokenizer_model_builder_test.py
@@ -19,17 +19,12 @@
 from __future__ import division
 from __future__ import print_function
 
-import os
-from absl import flags
-
 from tensorflow.python.framework import test_util
 from tensorflow.python.platform import gfile
 from tensorflow.python.platform import test
 from tensorflow_text.core.pybinds import pywrap_fast_wordpiece_tokenizer_model_builder
 
-FLAGS = flags.FLAGS
-
-EXPECTED_MODEL_BUFFER_PATH = "google3/third_party/tensorflow_text/core/kernels/testdata/fast_wordpiece_tokenizer_model.fb"
+EXPECTED_MODEL_BUFFER_PATH = "third_party/tensorflow_text/python/ops/test_data/fast_wordpiece_tokenizer_model.fb"
 
 
 class PywrapFastWordpieceBuilderTest(test_util.TensorFlowTestCase):
@@ -42,9 +37,7 @@ def test_build(self):
     max_bytes_per_token = 100
     suffix_indicator = "##"
     unk_token = "<unk>"
-    expected_model_buffer = gfile.GFile(
-        os.path.join(FLAGS.test_srcdir, EXPECTED_MODEL_BUFFER_PATH),
-        "rb").read()
+    expected_model_buffer = gfile.GFile(EXPECTED_MODEL_BUFFER_PATH, "rb").read()
     self.assertEqual(
         pywrap_fast_wordpiece_tokenizer_model_builder
         .build_fast_wordpiece_model(
diff --git a/tensorflow_text/python/ops/fast_wordpiece_tokenizer_test.py b/tensorflow_text/python/ops/fast_wordpiece_tokenizer_test.py
diff --git a/tensorflow_text/python/ops/test_data/fast_wordpiece_README.google.txt b/tensorflow_text/python/ops/test_data/fast_wordpiece_README.google.txt
diff --git a/tensorflow_text/python/ops/test_data/fast_wordpiece_tokenizer_model.fb b/tensorflow_text/python/ops/test_data/fast_wordpiece_tokenizer_model.fb