diff --git a/dbms/src/Functions/FunctionsNull.h b/dbms/src/Functions/FunctionsNull.h index 7e37305d300..046beb380fe 100644 --- a/dbms/src/Functions/FunctionsNull.h +++ b/dbms/src/Functions/FunctionsNull.h @@ -64,7 +64,7 @@ class FunctionCoalesce : public IFunction public: static constexpr auto name = "coalesce"; static FunctionPtr create(const Context & context); - FunctionCoalesce(const Context & context) + explicit FunctionCoalesce(const Context & context) : context(context) {} diff --git a/dbms/src/Functions/FunctionsString.cpp b/dbms/src/Functions/FunctionsString.cpp index 0d8b06a8bd2..83e5d08307a 100644 --- a/dbms/src/Functions/FunctionsString.cpp +++ b/dbms/src/Functions/FunctionsString.cpp @@ -13,6 +13,8 @@ // limitations under the License. #include +#include +#include #include #include #include @@ -4127,9 +4129,11 @@ class FunctionASCII : public IFunction std::string getName() const override { return name; } size_t getNumberOfArguments() const override { return 1; } + bool useDefaultImplementationForConstants() const override { return true; } + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { - if (arguments.size() != 1) + if unlikely (arguments.size() != 1) throw Exception( fmt::format("Number of arguments for function {} doesn't match: passed {}, should be 1.", getName(), arguments.size()), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); @@ -4140,28 +4144,25 @@ class FunctionASCII : public IFunction void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override { const IColumn * c0_col = block.getByPosition(arguments[0]).column.get(); - const auto * c0_const = checkAndGetColumn(c0_col); const auto * c0_string = checkAndGetColumn(c0_col); + if unlikely (c0_string == nullptr) + throw Exception( + fmt::format("Illegal argument of function {}", getName()), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - Field res_field; - int val_num = c0_col->size(); + auto val_num = static_cast(c0_col->size()); auto col_res = ColumnInt64::create(); - col_res->reserve(val_num); - if (c0_const == nullptr && c0_string == nullptr) - throw Exception(fmt::format("Illegal argument of function {}", getName()), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ColumnInt64::Container & data = col_res->getData(); + data.resize(val_num); - for (int i = 0; i < val_num; i++) - { - c0_col->get(i, res_field); - String handled_str = res_field.get(); - Int64 res = handled_str.empty() ? 0 : static_cast(handled_str[0]); - col_res->insert(res); - } + const auto & chars = c0_string->getChars(); + const auto & offsets = c0_string->getOffsets(); + + for (ssize_t i = 0; i < val_num; i++) + data[i] = chars[offsets[i - 1]]; block.getByPosition(result).column = std::move(col_res); } - -private: }; class FunctionLength : public IFunction @@ -4178,9 +4179,11 @@ class FunctionLength : public IFunction std::string getName() const override { return name; } size_t getNumberOfArguments() const override { return 1; } + bool useDefaultImplementationForConstants() const override { return true; } + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { - if (arguments.size() != 1) + if unlikely (arguments.size() != 1) throw Exception( fmt::format("Number of arguments for function {} doesn't match: passed {}, should be 1.", getName(), arguments.size()), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); @@ -4191,22 +4194,21 @@ class FunctionLength : public IFunction void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override { const IColumn * c0_col = block.getByPosition(arguments[0]).column.get(); - const auto * c0_const = checkAndGetColumn(c0_col); const auto * c0_string = checkAndGetColumn(c0_col); + if unlikely (c0_string == nullptr) + throw Exception( + fmt::format("Illegal argument of function {}", getName()), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - Field res_field; - int val_num = c0_col->size(); + auto val_num = static_cast(c0_col->size()); auto col_res = ColumnInt64::create(); - col_res->reserve(val_num); - if (c0_const == nullptr && c0_string == nullptr) - throw Exception(fmt::format("Illegal argument of function {}", getName()), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ColumnInt64::Container & data = col_res->getData(); + data.resize(val_num); - for (int i = 0; i < val_num; i++) - { - c0_col->get(i, res_field); - String handled_str = res_field.get(); - col_res->insert(static_cast(handled_str.size())); - } + const auto & offsets = c0_string->getOffsets(); + + for (ssize_t i = 0; i < val_num; i++) + data[i] = offsets[i] - offsets[i - 1] - 1; block.getByPosition(result).column = std::move(col_res); } diff --git a/dbms/src/Functions/GatherUtils/Algorithms.h b/dbms/src/Functions/GatherUtils/Algorithms.h index 3b63e6ae328..659238caa1c 100644 --- a/dbms/src/Functions/GatherUtils/Algorithms.h +++ b/dbms/src/Functions/GatherUtils/Algorithms.h @@ -187,7 +187,7 @@ void concat(const std::vector> & array_sources, Si size_t sources_num = array_sources.size(); std::vector is_const(sources_num); - auto checkAndGetSizeToReserve = [](auto source, IArraySource * array_source) { + auto check_and_get_size_to_reserve = [](auto source, IArraySource * array_source) { if (source == nullptr) throw Exception("Concat function expected " + demangle(typeid(Source).name()) + " or " + demangle(typeid(ConstSource).name()) + " but got " @@ -199,17 +199,18 @@ void concat(const std::vector> & array_sources, Si size_t size_to_reserve = 0; for (auto i : ext::range(0, sources_num)) { - auto & source = array_sources[i]; + const auto & source = array_sources[i]; is_const[i] = source->isConst(); if (is_const[i]) - size_to_reserve += checkAndGetSizeToReserve(typeid_cast *>(source.get()), source.get()); + size_to_reserve + += check_and_get_size_to_reserve(typeid_cast *>(source.get()), source.get()); else - size_to_reserve += checkAndGetSizeToReserve(typeid_cast(source.get()), source.get()); + size_to_reserve += check_and_get_size_to_reserve(typeid_cast(source.get()), source.get()); } sink.reserve(size_to_reserve); - auto writeNext = [&sink](auto source) { + auto write_next = [&sink](auto source) { writeSlice(source->getWhole(), sink); source->next(); }; @@ -218,11 +219,11 @@ void concat(const std::vector> & array_sources, Si { for (auto i : ext::range(0, sources_num)) { - auto & source = array_sources[i]; + const auto & source = array_sources[i]; if (is_const[i]) - writeNext(static_cast *>(source.get())); + write_next(static_cast *>(source.get())); else - writeNext(static_cast(source.get())); + write_next(static_cast(source.get())); } sink.next(); } @@ -383,11 +384,11 @@ void NO_INLINE pad(SourceA && src, SourceB && padding, Sink && sink, ssize_t len size_t left = static_cast(length) - slice.size; if (is_left) { - StringSource::Slice padSlice = padding.getWhole(); - while (left > padSlice.size && padSlice.size != 0) + StringSource::Slice pad_slice = padding.getWhole(); + while (left > pad_slice.size && pad_slice.size != 0) { - writeSlice(padSlice, sink); - left -= padSlice.size; + writeSlice(pad_slice, sink); + left -= pad_slice.size; } writeSlice(padding.getSliceFromLeft(0, left), sink); @@ -396,11 +397,11 @@ void NO_INLINE pad(SourceA && src, SourceB && padding, Sink && sink, ssize_t len else { writeSlice(slice, sink); - StringSource::Slice padSlice = padding.getWhole(); - while (left > padSlice.size && padSlice.size != 0) + StringSource::Slice pad_slice = padding.getWhole(); + while (left > pad_slice.size && pad_slice.size != 0) { - writeSlice(padSlice, sink); - left -= padSlice.size; + writeSlice(pad_slice, sink); + left -= pad_slice.size; } writeSlice(padding.getSliceFromLeft(0, left), sink); diff --git a/dbms/src/Functions/tests/bench_collation.cpp b/dbms/src/Functions/tests/bench_collation.cpp index 0859ccfa561..25247c7897d 100644 --- a/dbms/src/Functions/tests/bench_collation.cpp +++ b/dbms/src/Functions/tests/bench_collation.cpp @@ -17,7 +17,12 @@ #include #include +<<<<<<<< HEAD:dbms/src/Functions/tests/bench_collation.cpp /// this is a hack, include the cpp file so we can test MatchImpl directly +======== +/// this is a hack, include the cpp file so we can test functions directly +#include // NOLINT +>>>>>>>> b30c1f5090 (Improve the performance of `length` and `ascii` functions (#9345)):dbms/src/Functions/tests/bench_function_string.cpp #include // NOLINT namespace DB @@ -146,5 +151,65 @@ BENCH_LIKE_COLLATOR(ASCII_BIN); BENCH_LIKE_COLLATOR(BINARY); BENCH_LIKE_COLLATOR(LATIN1_BIN); +class LengthBench : public benchmark::Fixture +{ +public: + using ColStringType = typename TypeTraits::FieldType; + + ColumnsWithTypeAndName data1{toVec("col", std::vector(data_num, ""))}; + ColumnsWithTypeAndName data2{toVec("col", std::vector(data_num, "aaaaaaaaaa"))}; + ColumnsWithTypeAndName data3{toVec("col", std::vector(data_num, "啊aaaaaaaa"))}; + + void SetUp(const benchmark::State &) override {} +}; + +BENCHMARK_DEFINE_F(LengthBench, bench) +(benchmark::State & state) +try +{ + FunctionLength function_length; + std::vector blocks{Block(data1), Block(data2), Block(data3)}; + for (auto & block : blocks) + block.insert({nullptr, std::make_shared>(), "res"}); + ColumnNumbers arguments{0}; + for (auto _ : state) + { + for (auto & block : blocks) + function_length.executeImpl(block, arguments, 1); + } +} +CATCH +BENCHMARK_REGISTER_F(LengthBench, bench)->Iterations(10); + +class ASCIIBench : public benchmark::Fixture +{ +public: + using ColStringType = typename TypeTraits::FieldType; + + ColumnsWithTypeAndName data1{toVec("col", std::vector(data_num, ""))}; + ColumnsWithTypeAndName data2{toVec("col", std::vector(data_num, "aaaaaaaaaa"))}; + ColumnsWithTypeAndName data3{toVec("col", std::vector(data_num, "啊aaaaaaaa"))}; + + void SetUp(const benchmark::State &) override {} +}; + +BENCHMARK_DEFINE_F(ASCIIBench, bench) +(benchmark::State & state) +try +{ + FunctionASCII function_ascii; + std::vector blocks{Block(data1), Block(data2), Block(data3)}; + for (auto & block : blocks) + block.insert({nullptr, std::make_shared>(), "res"}); + ColumnNumbers arguments{0}; + for (auto _ : state) + { + for (auto & block : blocks) + function_ascii.executeImpl(block, arguments, 1); + } +} +CATCH +BENCHMARK_REGISTER_F(ASCIIBench, bench)->Iterations(10); + } // namespace tests } // namespace DB diff --git a/dbms/src/Functions/tests/bench_function_string.cpp b/dbms/src/Functions/tests/bench_function_string.cpp new file mode 100644 index 00000000000..25247c7897d --- /dev/null +++ b/dbms/src/Functions/tests/bench_function_string.cpp @@ -0,0 +1,215 @@ +// Copyright 2023 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +<<<<<<<< HEAD:dbms/src/Functions/tests/bench_collation.cpp +/// this is a hack, include the cpp file so we can test MatchImpl directly +======== +/// this is a hack, include the cpp file so we can test functions directly +#include // NOLINT +>>>>>>>> b30c1f5090 (Improve the performance of `length` and `ascii` functions (#9345)):dbms/src/Functions/tests/bench_function_string.cpp +#include // NOLINT + +namespace DB +{ +namespace tests +{ + +class CollationBench : public benchmark::Fixture +{ +public: + using ColStringType = typename TypeTraits::FieldType; + using ColUInt8Type = typename TypeTraits::FieldType; + + ColumnsWithTypeAndName data{toVec("col0", std::vector(1000000, "aaaaaaaaaaaaa")), + toVec("col1", std::vector(1000000, "aaaaaaaaaaaaa")), + toVec("result", std::vector{})}; + + ColumnsWithTypeAndName like_data{toVec("col0", std::vector(1000000, "qwdgefwabchfue")), + createConstColumn(1000000, "%abc%"), + createConstColumn(1000000, static_cast('\\')), + toVec("result", std::vector{})}; +}; + +class CollationLessBench : public CollationBench +{ +public: + void SetUp(const benchmark::State &) override {} +}; + +class CollationEqBench : public CollationBench +{ +public: + void SetUp(const benchmark::State &) override {} +}; + +class CollationLikeBench : public CollationBench +{ +public: + void SetUp(const benchmark::State &) override {} +}; + +#define BENCH_LESS_COLLATOR(collator) \ + BENCHMARK_DEFINE_F(CollationLessBench, collator) \ + (benchmark::State & state) \ + try \ + { \ + FunctionLess fl; \ + TiDB::TiDBCollatorPtr collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::collator); \ + fl.setCollator(collator); \ + Block block(data); \ + ColumnNumbers arguments{0, 1}; \ + for (auto _ : state) \ + { \ + fl.executeImpl(block, arguments, 2); \ + } \ + } \ + CATCH \ + BENCHMARK_REGISTER_F(CollationLessBench, collator)->Iterations(10); + + +#define BENCH_EQ_COLLATOR(collator) \ + BENCHMARK_DEFINE_F(CollationEqBench, collator) \ + (benchmark::State & state) \ + try \ + { \ + FunctionEquals fe; \ + TiDB::TiDBCollatorPtr collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::collator); \ + fe.setCollator(collator); \ + Block block(data); \ + ColumnNumbers arguments{0, 1}; \ + for (auto _ : state) \ + { \ + fe.executeImpl(block, arguments, 2); \ + } \ + } \ + CATCH \ + BENCHMARK_REGISTER_F(CollationEqBench, collator)->Iterations(10); + + +#define BENCH_LIKE_COLLATOR(collator) \ + BENCHMARK_DEFINE_F(CollationLikeBench, collator) \ + (benchmark::State & state) \ + try \ + { \ + FunctionLike3Args fl; \ + TiDB::TiDBCollatorPtr collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::collator); \ + fl.setCollator(collator); \ + Block block(like_data); \ + ColumnNumbers arguments{0, 1, 2}; \ + for (auto _ : state) \ + { \ + fl.executeImpl(block, arguments, 3); \ + } \ + } \ + CATCH \ + BENCHMARK_REGISTER_F(CollationLikeBench, collator)->Iterations(10); + + +BENCH_LESS_COLLATOR(UTF8MB4_BIN); +BENCH_LESS_COLLATOR(UTF8MB4_GENERAL_CI); +BENCH_LESS_COLLATOR(UTF8MB4_UNICODE_CI); +BENCH_LESS_COLLATOR(UTF8_BIN); +BENCH_LESS_COLLATOR(UTF8_GENERAL_CI); +BENCH_LESS_COLLATOR(UTF8_UNICODE_CI); +BENCH_LESS_COLLATOR(ASCII_BIN); +BENCH_LESS_COLLATOR(BINARY); +BENCH_LESS_COLLATOR(LATIN1_BIN); + +BENCH_EQ_COLLATOR(UTF8MB4_BIN); +BENCH_EQ_COLLATOR(UTF8MB4_GENERAL_CI); +BENCH_EQ_COLLATOR(UTF8MB4_UNICODE_CI); +BENCH_EQ_COLLATOR(UTF8_BIN); +BENCH_EQ_COLLATOR(UTF8_GENERAL_CI); +BENCH_EQ_COLLATOR(UTF8_UNICODE_CI); +BENCH_EQ_COLLATOR(ASCII_BIN); +BENCH_EQ_COLLATOR(BINARY); +BENCH_EQ_COLLATOR(LATIN1_BIN); + +BENCH_LIKE_COLLATOR(UTF8MB4_BIN); +BENCH_LIKE_COLLATOR(UTF8MB4_GENERAL_CI); +BENCH_LIKE_COLLATOR(UTF8MB4_UNICODE_CI); +BENCH_LIKE_COLLATOR(UTF8_BIN); +BENCH_LIKE_COLLATOR(UTF8_GENERAL_CI); +BENCH_LIKE_COLLATOR(UTF8_UNICODE_CI); +BENCH_LIKE_COLLATOR(ASCII_BIN); +BENCH_LIKE_COLLATOR(BINARY); +BENCH_LIKE_COLLATOR(LATIN1_BIN); + +class LengthBench : public benchmark::Fixture +{ +public: + using ColStringType = typename TypeTraits::FieldType; + + ColumnsWithTypeAndName data1{toVec("col", std::vector(data_num, ""))}; + ColumnsWithTypeAndName data2{toVec("col", std::vector(data_num, "aaaaaaaaaa"))}; + ColumnsWithTypeAndName data3{toVec("col", std::vector(data_num, "啊aaaaaaaa"))}; + + void SetUp(const benchmark::State &) override {} +}; + +BENCHMARK_DEFINE_F(LengthBench, bench) +(benchmark::State & state) +try +{ + FunctionLength function_length; + std::vector blocks{Block(data1), Block(data2), Block(data3)}; + for (auto & block : blocks) + block.insert({nullptr, std::make_shared>(), "res"}); + ColumnNumbers arguments{0}; + for (auto _ : state) + { + for (auto & block : blocks) + function_length.executeImpl(block, arguments, 1); + } +} +CATCH +BENCHMARK_REGISTER_F(LengthBench, bench)->Iterations(10); + +class ASCIIBench : public benchmark::Fixture +{ +public: + using ColStringType = typename TypeTraits::FieldType; + + ColumnsWithTypeAndName data1{toVec("col", std::vector(data_num, ""))}; + ColumnsWithTypeAndName data2{toVec("col", std::vector(data_num, "aaaaaaaaaa"))}; + ColumnsWithTypeAndName data3{toVec("col", std::vector(data_num, "啊aaaaaaaa"))}; + + void SetUp(const benchmark::State &) override {} +}; + +BENCHMARK_DEFINE_F(ASCIIBench, bench) +(benchmark::State & state) +try +{ + FunctionASCII function_ascii; + std::vector blocks{Block(data1), Block(data2), Block(data3)}; + for (auto & block : blocks) + block.insert({nullptr, std::make_shared>(), "res"}); + ColumnNumbers arguments{0}; + for (auto _ : state) + { + for (auto & block : blocks) + function_ascii.executeImpl(block, arguments, 1); + } +} +CATCH +BENCHMARK_REGISTER_F(ASCIIBench, bench)->Iterations(10); + +} // namespace tests +} // namespace DB diff --git a/dbms/src/Functions/tests/gtest_strings_ascii.cpp b/dbms/src/Functions/tests/gtest_strings_ascii.cpp index 90caecadbf7..5123b605299 100644 --- a/dbms/src/Functions/tests/gtest_strings_ascii.cpp +++ b/dbms/src/Functions/tests/gtest_strings_ascii.cpp @@ -22,7 +22,6 @@ #include #include -#include #include #pragma GCC diagnostic push @@ -39,116 +38,49 @@ class StringASCII : public DB::tests::FunctionTest { }; -// test string and string TEST_F(StringASCII, strAndStrTest) { - const Context context = TiFlashTestEnv::getContext(); - - auto & factory = FunctionFactory::instance(); - - std::vector strs{"hello", "HELLO", "23333", "#%@#^", ""}; - - for (int i = 0; i < 2; i++) { - MutableColumnPtr csp; - csp = ColumnString::create(); - - for (const auto & str : strs) - { - csp->insert(Field(str.c_str(), str.size())); - } - - Block test_block; - ColumnWithTypeAndName ctn = ColumnWithTypeAndName(std::move(csp), std::make_shared(), "test_ascii"); - ColumnsWithTypeAndName ctns{ctn}; - test_block.insert(ctn); - ColumnNumbers cns{0}; - - // test ascii - auto bp = factory.tryGet("ascii", context); - ASSERT_TRUE(bp != nullptr); - ASSERT_FALSE(bp->isVariadic()); - - auto func = bp->build(ctns); - test_block.insert({nullptr, func->getReturnType(), "res"}); - func->execute(test_block, cns, 1); - const IColumn * res = test_block.getByPosition(1).column.get(); - const ColumnInt64 * res_string = checkAndGetColumn(res); - - Field res_field; - std::vector results{104, 72, 50, 35, 0}; - for (size_t t = 0; t < results.size(); t++) - { - res_string->get(t, res_field); - Int64 res_val = res_field.get(); - EXPECT_EQ(results[t], res_val); - } + // test const + ASSERT_COLUMN_EQ(createConstColumn(0, 0), executeFunction("ascii", createConstColumn(0, ""))); + ASSERT_COLUMN_EQ( + createConstColumn(1, 38), + executeFunction("ascii", createConstColumn(1, "&ad"))); + ASSERT_COLUMN_EQ( + createConstColumn(5, 38), + executeFunction("ascii", createConstColumn(5, "&ad"))); } -} -// test NULL -TEST_F(StringASCII, nullTest) -{ - const Context context = TiFlashTestEnv::getContext(); - - auto & factory = FunctionFactory::instance(); - - std::vector strs{"a", "b", "c", "d", "e", "f"}; - std::vector results{0, 98, 0, 100, 101, 0}; - std::vector null_map{1, 0, 1, 0, 0, 1}; - auto input_str_col = ColumnString::create(); - for (const auto & str : strs) { - Field field(str.c_str(), str.size()); - input_str_col->insert(field); + // test vec + ASSERT_COLUMN_EQ(createColumn({}), executeFunction("ascii", createColumn({}))); + ASSERT_COLUMN_EQ( + createColumn({230, 104, 72, 50, 35, 0}), + executeFunction("ascii", createColumn({"我a", "hello", "HELLO", "23333", "#%@#^", ""}))); } - auto input_null_map = ColumnUInt8::create(strs.size(), 0); - ColumnUInt8::Container & input_vec_null_map = input_null_map->getData(); - for (size_t i = 0; i < strs.size(); i++) { - input_vec_null_map[i] = null_map[i]; + // test nullable const + ASSERT_COLUMN_EQ( + createConstColumn(0, {}), + executeFunction("ascii", createConstColumn>(0, "aaa"))); + ASSERT_COLUMN_EQ( + createConstColumn(1, {97}), + executeFunction("ascii", createConstColumn>(1, "aaa"))); + ASSERT_COLUMN_EQ( + createConstColumn(3, {97}), + executeFunction("ascii", createConstColumn>(3, "aaa"))); } - auto input_null_col = ColumnNullable::create(std::move(input_str_col), std::move(input_null_map)); - DataTypePtr string_type = std::make_shared(); - DataTypePtr nullable_string_type = makeNullable(string_type); - - auto col1 = ColumnWithTypeAndName(std::move(input_null_col), nullable_string_type, "ascii"); - ColumnsWithTypeAndName ctns{col1}; - - Block test_block; - test_block.insert(col1); - ColumnNumbers cns{0}; - - auto bp = factory.tryGet("ascii", context); - ASSERT_TRUE(bp != nullptr); - ASSERT_FALSE(bp->isVariadic()); - auto func = bp->build(ctns); - test_block.insert({nullptr, func->getReturnType(), "res"}); - func->execute(test_block, cns, 1); - auto res_col = test_block.getByPosition(1).column; - - ColumnPtr result_null_map_column = static_cast(*res_col).getNullMapColumnPtr(); - MutableColumnPtr mutable_result_null_map_column = (*std::move(result_null_map_column)).mutate(); - NullMap & result_null_map = static_cast(*mutable_result_null_map_column).getData(); - const IColumn * res = test_block.getByPosition(1).column.get(); - const ColumnNullable * res_nullable_string = checkAndGetColumn(res); - const IColumn & res_string = res_nullable_string->getNestedColumn(); - - Field res_field; - - for (size_t i = 0; i < null_map.size(); i++) { - EXPECT_EQ(result_null_map[i], null_map[i]); - if (result_null_map[i] == 0) - { - res_string.get(i, res_field); - Int64 res_val = res_field.get(); - EXPECT_EQ(results[i], res_val); - } + // test nullable vec + std::vector null_map{0, 1, 0, 1, 0, 0, 1}; + ASSERT_COLUMN_EQ( + createNullableColumn({0, 0, 97, 0, 233, 233, 0}, null_map), + executeFunction( + "ascii", + createNullableColumn({"", "a", "abcd", "嗯", "饼干", "馒头", "???"}, null_map))); } } - } // namespace tests -} // namespace DB \ No newline at end of file +} // namespace DB diff --git a/dbms/src/Functions/tests/gtest_strings_length.cpp b/dbms/src/Functions/tests/gtest_strings_length.cpp index e5937c03700..b650638e88a 100644 --- a/dbms/src/Functions/tests/gtest_strings_length.cpp +++ b/dbms/src/Functions/tests/gtest_strings_length.cpp @@ -39,117 +39,53 @@ class StringLength : public DB::tests::FunctionTest { }; -// test string and string -TEST_F(StringLength, strAndStrTest) +TEST_F(StringLength, length) { - const Context context = TiFlashTestEnv::getContext(); - - auto & factory = FunctionFactory::instance(); - - std::vector strs{"hi~", "23333", "pingcap", "你好", "233哈哈", ""}; - std::vector results{3, 5, 7, 6, 9, 0}; - - for (int i = 0; i < 2; i++) { - MutableColumnPtr csp; - csp = ColumnString::create(); - - for (const auto & str : strs) - { - csp->insert(Field(str.c_str(), str.size())); - } - - Block test_block; - ColumnWithTypeAndName ctn = ColumnWithTypeAndName(std::move(csp), std::make_shared(), "test_ascii"); - ColumnsWithTypeAndName ctns{ctn}; - test_block.insert(ctn); - ColumnNumbers cns{0}; - - // test length - auto bp = factory.tryGet("length", context); - ASSERT_TRUE(bp != nullptr); - ASSERT_FALSE(bp->isVariadic()); - - auto func = bp->build(ctns); - test_block.insert({nullptr, func->getReturnType(), "res"}); - func->execute(test_block, cns, 1); - const IColumn * res = test_block.getByPosition(1).column.get(); - const ColumnInt64 * res_string = checkAndGetColumn(res); - - Field res_field; - - for (size_t t = 0; t < results.size(); t++) - { - res_string->get(t, res_field); - Int64 res_val = res_field.get(); - EXPECT_EQ(results[t], res_val); - } + // test const + ASSERT_COLUMN_EQ(createConstColumn(0, 0), executeFunction("length", createConstColumn(0, ""))); + ASSERT_COLUMN_EQ( + createConstColumn(1, 3), + executeFunction("length", createConstColumn(1, "aaa"))); + ASSERT_COLUMN_EQ( + createConstColumn(3, 3), + executeFunction("length", createConstColumn(3, "aaa"))); } -} -// test NULL -TEST_F(StringLength, nullTest) -{ - const Context context = TiFlashTestEnv::getContext(); - - auto & factory = FunctionFactory::instance(); - - std::vector strs{"a", "abcd", "嗯", "饼干", "馒头", "???"}; - std::vector results{0, 4, 0, 6, 6, 0}; - std::vector null_map{1, 0, 1, 0, 0, 1}; - auto input_str_col = ColumnString::create(); - for (const auto & str : strs) { - Field field(str.c_str(), str.size()); - input_str_col->insert(field); + // test vec + ASSERT_COLUMN_EQ(createColumn({}), executeFunction("length", createColumn({}))); + + ASSERT_COLUMN_EQ( + createColumn({0, 3, 5, 7, 6, 9, 0, 9, 16, 0}), + executeFunction( + "length", + createColumn( + {"", "hi~", "23333", "pingcap", "你好", "233哈哈", "", "asdの的", "ヽ( ̄▽ ̄)و", ""}))); } - auto input_null_map = ColumnUInt8::create(strs.size(), 0); - ColumnUInt8::Container & input_vec_null_map = input_null_map->getData(); - for (size_t i = 0; i < strs.size(); i++) { - input_vec_null_map[i] = null_map[i]; + // test nullable const + ASSERT_COLUMN_EQ( + createConstColumn(0, {}), + executeFunction("length", createConstColumn>(0, "aaa"))); + ASSERT_COLUMN_EQ( + createConstColumn(1, {3}), + executeFunction("length", createConstColumn>(1, "aaa"))); + ASSERT_COLUMN_EQ( + createConstColumn(3, {3}), + executeFunction("length", createConstColumn>(3, "aaa"))); } - auto input_null_col = ColumnNullable::create(std::move(input_str_col), std::move(input_null_map)); - DataTypePtr string_type = std::make_shared(); - DataTypePtr nullable_string_type = makeNullable(string_type); - - auto col1 = ColumnWithTypeAndName(std::move(input_null_col), nullable_string_type, "length"); - ColumnsWithTypeAndName ctns{col1}; - - Block test_block; - test_block.insert(col1); - ColumnNumbers cns{0}; - - auto bp = factory.tryGet("length", context); - ASSERT_TRUE(bp != nullptr); - ASSERT_FALSE(bp->isVariadic()); - auto func = bp->build(ctns); - test_block.insert({nullptr, func->getReturnType(), "res"}); - func->execute(test_block, cns, 1); - auto res_col = test_block.getByPosition(1).column; - - ColumnPtr result_null_map_column = static_cast(*res_col).getNullMapColumnPtr(); - MutableColumnPtr mutable_result_null_map_column = (*std::move(result_null_map_column)).mutate(); - NullMap & result_null_map = static_cast(*mutable_result_null_map_column).getData(); - const IColumn * res = test_block.getByPosition(1).column.get(); - const ColumnNullable * res_nullable_string = checkAndGetColumn(res); - const IColumn & res_string = res_nullable_string->getNestedColumn(); - - Field res_field; - - for (size_t i = 0; i < null_map.size(); i++) { - EXPECT_EQ(result_null_map[i], null_map[i]); - if (result_null_map[i] == 0) - { - res_string.get(i, res_field); - Int64 res_val = res_field.get(); - EXPECT_EQ(results[i], res_val); - } + // test nullable vec + std::vector null_map{1, 0, 1, 0, 0, 1}; + ASSERT_COLUMN_EQ( + createNullableColumn({0, 4, 0, 6, 6, 0}, null_map), + executeFunction( + "length", + createNullableColumn({"a", "abcd", "嗯", "饼干", "馒头", "???"}, null_map))); } } - } // namespace tests } // namespace DB diff --git a/dbms/src/Functions/tests/gtest_strings_position.cpp b/dbms/src/Functions/tests/gtest_strings_position.cpp index 130dc102b2e..8bab5557f99 100644 --- a/dbms/src/Functions/tests/gtest_strings_position.cpp +++ b/dbms/src/Functions/tests/gtest_strings_position.cpp @@ -92,7 +92,7 @@ TEST_F(StringPosition, strAndStrTest) bp->build(ctns)->execute(test_block, cns, 2); const IColumn * res = test_block.getByPosition(2).column.get(); - const ColumnInt64 * res_string = checkAndGetColumn(res); + const auto * res_string = checkAndGetColumn(res); Field res_field; @@ -159,7 +159,7 @@ TEST_F(StringPosition, utf8StrAndStrTest) bp->build(ctns)->execute(test_block, cns, 2); const IColumn * res = test_block.getByPosition(2).column.get(); - const ColumnInt64 * res_string = checkAndGetColumn(res); + const auto * res_string = checkAndGetColumn(res); Field res_field; @@ -233,7 +233,7 @@ TEST_F(StringPosition, nullTest) MutableColumnPtr mutable_result_null_map_column = (*std::move(result_null_map_column)).mutate(); NullMap & result_null_map = static_cast(*mutable_result_null_map_column).getData(); const IColumn * res = test_block.getByPosition(2).column.get(); - const ColumnNullable * res_nullable_string = checkAndGetColumn(res); + const auto * res_nullable_string = checkAndGetColumn(res); const IColumn & res_string = res_nullable_string->getNestedColumn(); Field res_field; diff --git a/libs/libcommon/include/common/StringRef.h b/libs/libcommon/include/common/StringRef.h index 86ed15d34fb..d420d702ac0 100644 --- a/libs/libcommon/include/common/StringRef.h +++ b/libs/libcommon/include/common/StringRef.h @@ -23,7 +23,6 @@ #include #include #include -#include // for std::logic_error #include #include @@ -172,8 +171,8 @@ inline size_t hashLessThan8(const char * data, size_t size) { if (size > 8) { - UInt64 a = unalignedLoad(data); - UInt64 b = unalignedLoad(data + size - 8); + auto a = unalignedLoad(data); + auto b = unalignedLoad(data + size - 8); return hashLen16(a, rotateByAtLeast1(b + size, size)) ^ b; } @@ -200,13 +199,13 @@ struct CRC32Hash do { - UInt64 word = unalignedLoad(pos); + auto word = unalignedLoad(pos); res = _mm_crc32_u64(res, word); pos += 8; } while (pos + 8 < end); - UInt64 word = unalignedLoad(end - 8); /// I'm not sure if this is normal. + auto word = unalignedLoad(end - 8); /// I'm not sure if this is normal. res = _mm_crc32_u64(res, word); return res;