Skip to content

Commit

Permalink
fix strcmp (#3868)
Browse files Browse the repository at this point in the history
  • Loading branch information
windtalker authored Jan 14, 2022
1 parent 3cb3e18 commit 01793a4
Show file tree
Hide file tree
Showing 7 changed files with 226 additions and 19 deletions.
4 changes: 2 additions & 2 deletions dbms/src/Core/AccurateComparison.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,8 +215,8 @@ struct CmpOp
template <typename A, typename B>
struct ReversedCmpOp
{
using SymmetricOp = CmpOp<A, B>;
static Int8 apply(A a, B b) { return -SymmetricOp::apply(b, a); }
using SymmetricOp = CmpOp<B, A>;
static Int8 apply(A a, B b) { return SymmetricOp::apply(b, a); }
};


Expand Down
1 change: 1 addition & 0 deletions dbms/src/Functions/FunctionsComparison.h
Original file line number Diff line number Diff line change
Expand Up @@ -1683,6 +1683,7 @@ class FunctionStrcmp : public FunctionComparison<CmpOp, NameStrcmp>
{
const IColumn * col_left_untyped = block.getByPosition(arguments[0]).column.get();
const IColumn * col_right_untyped = block.getByPosition(arguments[1]).column.get();

bool success = executeString<ColumnInt8>(block, result, col_left_untyped, col_right_untyped);
if (!success)
{
Expand Down
71 changes: 64 additions & 7 deletions dbms/src/Functions/tests/gtest_strings_cmp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,70 @@ class Strcmp : public DB::tests::FunctionTest
TEST_F(Strcmp, Strcmp)
try
{
ASSERT_COLUMN_EQ(createColumn<Int8>({-1, 1, 0, 0}), executeFunction("strcmp", {createColumn<String>({"a", "b", "a", ""}), createColumn<String>({"b", "a", "a", ""})}));
ASSERT_COLUMN_EQ(createColumn<Nullable<Int8>>({-1, 1, -1, std::nullopt, std::nullopt}), executeFunction("strcmp", {createColumn<Nullable<String>>({"1", "123", "123", "123", std::nullopt}), createColumn<Nullable<String>>({"123", "1", "45", std::nullopt, "123"})}));
ASSERT_COLUMN_EQ(createColumn<Nullable<Int8>>({-1, 1, 0, std::nullopt, std::nullopt}), executeFunction("strcmp", {createColumn<Nullable<String>>({"", "123", "", "", std::nullopt}), createColumn<Nullable<String>>({"123", "", "", std::nullopt, ""})}));
ASSERT_COLUMN_EQ(createColumn<Nullable<Int8>>({-1}), executeFunction("strcmp", {createColumn<String>({"a"}), createConstColumn<Nullable<String>>(1, "b")}));
ASSERT_COLUMN_EQ(createConstColumn<Nullable<Int8>>(1, -1), executeFunction("strcmp", {createConstColumn<Nullable<String>>(1, "a"), createConstColumn<Nullable<String>>(1, "b")}));
ASSERT_COLUMN_EQ(createColumn<Nullable<Int8>>({-1}), executeFunction("strcmp", {createColumn<String>({"a"}), createColumn<Nullable<String>>({"b"})}));
ASSERT_COLUMN_EQ(createColumn<Nullable<Int8>>({std::nullopt}), executeFunction("strcmp", {createColumn<String>({"a"}), createColumn<Nullable<String>>({std::nullopt})}));
// without collation
{
// column with column
ASSERT_COLUMN_EQ(createColumn<Int8>({-1, 1, 0, 0}), executeFunction("strcmp", {createColumn<String>({"a", "b", "a", ""}), createColumn<String>({"b", "a", "a", ""})}));

// column with nullable
ASSERT_COLUMN_EQ(createColumn<Nullable<Int8>>({-1, 1, -1, std::nullopt, std::nullopt}), executeFunction("strcmp", {createColumn<Nullable<String>>({"1", "123", "123", "123", std::nullopt}), createColumn<Nullable<String>>({"123", "1", "45", std::nullopt, "123"})}));

// nullable with nullable
ASSERT_COLUMN_EQ(createColumn<Nullable<Int8>>({-1, 1, 0, std::nullopt, std::nullopt}), executeFunction("strcmp", {createColumn<Nullable<String>>({"", "123", "", "", std::nullopt}), createColumn<Nullable<String>>({"123", "", "", std::nullopt, ""})}));

// column with constant
ASSERT_COLUMN_EQ(createColumn<Nullable<Int8>>({-1}), executeFunction("strcmp", {createColumn<String>({"a"}), createConstColumn<Nullable<String>>(1, "b")}));

// constant with column
ASSERT_COLUMN_EQ(createColumn<Nullable<Int8>>({1, 0, -1, std::nullopt}), executeFunction("strcmp", {createConstColumn<Nullable<String>>(4, "b"), createColumn<Nullable<String>>({"a", "b", "c", std::nullopt})}));

// constant with constant
ASSERT_COLUMN_EQ(createConstColumn<Nullable<Int8>>(1, -1), executeFunction("strcmp", {createConstColumn<Nullable<String>>(1, "a"), createConstColumn<Nullable<String>>(1, "b")}));

// constant with nullable
ASSERT_COLUMN_EQ(createColumn<Nullable<Int8>>({-1}), executeFunction("strcmp", {createColumn<String>({"a"}), createColumn<Nullable<String>>({"b"})}));
ASSERT_COLUMN_EQ(createColumn<Nullable<Int8>>({std::nullopt}), executeFunction("strcmp", {createColumn<String>({"a"}), createColumn<Nullable<String>>({std::nullopt})}));
}

// with collation
{
// column with column
ASSERT_COLUMN_EQ(createColumn<Int8>({-1, 1, 0, 0}), executeFunction("strcmp", {createColumn<String>({"a", "b", "a", ""}), createColumn<String>({"b", "a", "a", ""})}, TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI)));
ASSERT_COLUMN_EQ(createColumn<Int8>({-1, 1, 0, 0}), executeFunction("strcmp", {createColumn<String>({"a", "b", "a", ""}), createColumn<String>({"b", "a", "a", ""})}, TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::BINARY)));

ASSERT_COLUMN_EQ(createColumn<Int8>({-1, 1, 0, 0}), executeFunction("strcmp", {createColumn<String>({"a", "b", "a", ""}), createColumn<String>({"B", "A", "A", ""})}, TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI)));

// column with nullable
ASSERT_COLUMN_EQ(createColumn<Nullable<Int8>>({-1, 1, -1, std::nullopt, std::nullopt}), executeFunction("strcmp", {createColumn<Nullable<String>>({"1", "123", "123", "123", std::nullopt}), createColumn<Nullable<String>>({"123", "1", "45", std::nullopt, "123"})}, TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI)));

// nullable with nullable
ASSERT_COLUMN_EQ(createColumn<Nullable<Int8>>({-1, 1, 0, std::nullopt, std::nullopt}), executeFunction("strcmp", {createColumn<Nullable<String>>({"", "123", "", "", std::nullopt}), createColumn<Nullable<String>>({"123", "", "", std::nullopt, ""})}, TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI)));

// column with constant
ASSERT_COLUMN_EQ(createColumn<Nullable<Int8>>({-1}), executeFunction("strcmp", {createColumn<String>({"a"}), createConstColumn<Nullable<String>>(1, "b")}, TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI)));
ASSERT_COLUMN_EQ(createColumn<Nullable<Int8>>({-1}), executeFunction("strcmp", {createColumn<String>({"A"}), createConstColumn<Nullable<String>>(1, "b")}, TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI)));
ASSERT_COLUMN_EQ(createColumn<Nullable<Int8>>({-1}), executeFunction("strcmp", {createColumn<String>({"A"}), createConstColumn<Nullable<String>>(1, "B")}, TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI)));

// constant with column
ASSERT_COLUMN_EQ(createColumn<Nullable<Int8>>({1, 0, -1, std::nullopt}), executeFunction("strcmp", {createConstColumn<Nullable<String>>(4, "b"), createColumn<Nullable<String>>({"a", "b", "c", std::nullopt})}, TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI)));
ASSERT_COLUMN_EQ(createColumn<Nullable<Int8>>({1, 0, -1, std::nullopt}), executeFunction("strcmp", {createConstColumn<Nullable<String>>(4, "B"), createColumn<Nullable<String>>({"a", "b", "c", std::nullopt})}, TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI)));
ASSERT_COLUMN_EQ(createColumn<Nullable<Int8>>({1, 0, -1, std::nullopt}), executeFunction("strcmp", {createConstColumn<Nullable<String>>(4, "b"), createColumn<Nullable<String>>({"A", "B", "C", std::nullopt})}, TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI)));

// constant with constant
ASSERT_COLUMN_EQ(createConstColumn<Nullable<Int8>>(1, -1), executeFunction("strcmp", {createConstColumn<Nullable<String>>(1, "a"), createConstColumn<Nullable<String>>(1, "b")}, TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI)));
ASSERT_COLUMN_EQ(createConstColumn<Nullable<Int8>>(1, -1), executeFunction("strcmp", {createConstColumn<Nullable<String>>(1, "A"), createConstColumn<Nullable<String>>(1, "b")}, TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI)));
ASSERT_COLUMN_EQ(createConstColumn<Nullable<Int8>>(1, -1), executeFunction("strcmp", {createConstColumn<Nullable<String>>(1, "a"), createConstColumn<Nullable<String>>(1, "B")}, TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI)));
ASSERT_COLUMN_EQ(createConstColumn<Nullable<Int8>>(1, -1), executeFunction("strcmp", {createConstColumn<Nullable<String>>(1, "A"), createConstColumn<Nullable<String>>(1, "B")}, TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI)));

// constant with nullable
ASSERT_COLUMN_EQ(createColumn<Nullable<Int8>>({-1}), executeFunction("strcmp", {createColumn<String>({"a"}), createColumn<Nullable<String>>({"b"})}, TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI)));
ASSERT_COLUMN_EQ(createColumn<Nullable<Int8>>({-1}), executeFunction("strcmp", {createColumn<String>({"A"}), createColumn<Nullable<String>>({"b"})}, TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI)));
ASSERT_COLUMN_EQ(createColumn<Nullable<Int8>>({-1}), executeFunction("strcmp", {createColumn<String>({"a"}), createColumn<Nullable<String>>({"B"})}, TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI)));
ASSERT_COLUMN_EQ(createColumn<Nullable<Int8>>({-1}), executeFunction("strcmp", {createColumn<String>({"A"}), createColumn<Nullable<String>>({"B"})}, TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI)));

ASSERT_COLUMN_EQ(createColumn<Nullable<Int8>>({std::nullopt}), executeFunction("strcmp", {createColumn<String>({"a"}), createColumn<Nullable<String>>({std::nullopt})}, TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI)));
ASSERT_COLUMN_EQ(createColumn<Nullable<Int8>>({std::nullopt}), executeFunction("strcmp", {createColumn<String>({"A"}), createColumn<Nullable<String>>({std::nullopt})}, TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI)));
}
}
CATCH
} // namespace tests
Expand Down
4 changes: 2 additions & 2 deletions dbms/src/TestUtils/FunctionTestUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ ::testing::AssertionResult columnEqual(
return columnEqual(expected.column, actual.column);
}

ColumnWithTypeAndName FunctionTest::executeFunction(const String & func_name, const ColumnsWithTypeAndName & columns)
ColumnWithTypeAndName FunctionTest::executeFunction(const String & func_name, const ColumnsWithTypeAndName & columns, const TiDB::TiDBCollatorPtr & collator)
{
auto & factory = FunctionFactory::instance();

Expand All @@ -96,7 +96,7 @@ ColumnWithTypeAndName FunctionTest::executeFunction(const String & func_name, co
auto bp = factory.tryGet(func_name, context);
if (!bp)
throw TiFlashTestException(fmt::format("Function {} not found!", func_name));
auto func = bp->build(columns);
auto func = bp->build(columns, collator);
block.insert({nullptr, func->getReturnType(), "res"});
func->execute(block, cns, columns.size());
return block.getByPosition(columns.size());
Expand Down
3 changes: 2 additions & 1 deletion dbms/src/TestUtils/FunctionTestUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,8 @@ class FunctionTest : public ::testing::Test
dag_context_ptr = std::make_unique<DAGContext>(1024);
context.setDAGContext(dag_context_ptr.get());
}
ColumnWithTypeAndName executeFunction(const String & func_name, const ColumnsWithTypeAndName & columns);

ColumnWithTypeAndName executeFunction(const String & func_name, const ColumnsWithTypeAndName & columns, const TiDB::TiDBCollatorPtr & collator = nullptr);

template <typename... Args>
ColumnWithTypeAndName executeFunction(const String & func_name, const ColumnWithTypeAndName & first_column, const Args &... columns)
Expand Down
47 changes: 47 additions & 0 deletions tests/fullstack-test/expr/strcmp.test
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,51 @@ mysql> set @@tidb_enforce_mpp = 1; select strcmp(a, b) from test.cmp;
| NULL |
+--------------+

mysql> drop table if exists test.t;
mysql> create table test.t (a varchar(30));
mysql> insert into test.t values ('a'), ('b'), ('c'), (NULL);
mysql> alter table test.t set tiflash replica 1;

func> wait_table test t

mysql> set @@tidb_enforce_mpp = 1; select strcmp('b', a) from test.t;
+----------------+
| strcmp('b', a) |
+----------------+
| 1 |
| 0 |
| -1 |
| NULL |
+----------------+


mysql> set @@tidb_enforce_mpp = 1; select strcmp(a, 'b') from test.t;
+----------------+
| strcmp(a, 'b') |
+----------------+
| -1 |
| 0 |
| 1 |
| NULL |
+----------------+

mysql> set @@tidb_enforce_mpp = 1; select strcmp(a, NULL) from test.t;
+-----------------+
| strcmp(a, NULL) |
+-----------------+
| NULL |
| NULL |
| NULL |
| NULL |
+-----------------+

mysql> set @@tidb_enforce_mpp = 1; select strcmp(NULL, a) from test.t;
+-----------------+
| strcmp(NULL, a) |
+-----------------+
| NULL |
| NULL |
| NULL |
| NULL |
+-----------------+

115 changes: 108 additions & 7 deletions tests/tidb-ci/new_collation_fullstack/strcmp.test
Original file line number Diff line number Diff line change
@@ -1,16 +1,117 @@
mysql> drop table if exists test.t;
mysql> create table test.t(a varchar(30) charset utf8mb4 collate utf8mb4_general_ci, b varchar(30) charset utf8mb4 collate utf8mb4_general_ci);
mysql> alter table test.t set tiflash replica 1;
mysql> insert into test.t values ('abc', 'ABC'),('Abc', 'abc'),('def', 'dEf');
mysql> insert into test.t values ('abc', 'ABC'),('Abc', 'abc'),('def', 'dEf'), ('a', 'A'), ('b', 'B'), ('c', 'C'), (NULL, NULL), ('a', NULL), (NULL, 'a'), ('', NULL), (NULL, ''), ('', '');
func> wait_table test t

>> DBGInvoke __try_flush()

mysql> set session tidb_isolation_read_engines='tiflash'; select /*+ read_from_storage(tiflash[t]) */ count(*) from test.t where strcmp(a, b) = 0;
+----------+
| count(*) |
+----------+
| 3 |
+----------+
mysql> set tidb_enforce_mpp=1; select a, b, strcmp(a, b) from test.t;
+------+------+--------------+
| a | b | strcmp(a, b) |
+------+------+--------------+
| abc | ABC | 0 |
| Abc | abc | 0 |
| def | dEf | 0 |
| a | A | 0 |
| b | B | 0 |
| c | C | 0 |
| NULL | NULL | NULL |
| a | NULL | NULL |
| NULL | a | NULL |
| | NULL | NULL |
| NULL | | NULL |
| | | 0 |
+------+------+--------------+

mysql> set tidb_enforce_mpp=1; select a, b, strcmp('b', a) from test.t;
+------+------+----------------+
| a | b | strcmp('b', a) |
+------+------+----------------+
| abc | ABC | 1 |
| Abc | abc | 1 |
| def | dEf | -1 |
| a | A | 1 |
| b | B | 0 |
| c | C | -1 |
| NULL | NULL | NULL |
| a | NULL | 1 |
| NULL | a | NULL |
| | NULL | 1 |
| NULL | | NULL |
| | | 1 |
+------+------+----------------+

mysql> set tidb_enforce_mpp=1; select a, b, strcmp(a, 'b') from test.t;
+------+------+----------------+
| a | b | strcmp(a, 'b') |
+------+------+----------------+
| abc | ABC | -1 |
| Abc | abc | -1 |
| def | dEf | 1 |
| a | A | -1 |
| b | B | 0 |
| c | C | 1 |
| NULL | NULL | NULL |
| a | NULL | -1 |
| NULL | a | NULL |
| | NULL | -1 |
| NULL | | NULL |
| | | -1 |
+------+------+----------------+

mysql> set tidb_enforce_mpp=1; select a, b, strcmp('b', b) from test.t;
+------+------+----------------+
| a | b | strcmp('b', b) |
+------+------+----------------+
| abc | ABC | 1 |
| Abc | abc | 1 |
| def | dEf | -1 |
| a | A | 1 |
| b | B | 0 |
| c | C | -1 |
| NULL | NULL | NULL |
| a | NULL | NULL |
| NULL | a | 1 |
| | NULL | NULL |
| NULL | | 1 |
| | | 1 |
+------+------+----------------+

mysql> set tidb_enforce_mpp=1; select a, b, strcmp(b, 'b') from test.t;
+------+------+----------------+
| a | b | strcmp(b, 'b') |
+------+------+----------------+
| abc | ABC | -1 |
| Abc | abc | -1 |
| def | dEf | 1 |
| a | A | -1 |
| b | B | 0 |
| c | C | 1 |
| NULL | NULL | NULL |
| a | NULL | NULL |
| NULL | a | -1 |
| | NULL | NULL |
| NULL | | -1 |
| | | -1 |
+------+------+----------------+

mysql> set tidb_enforce_mpp=1; select a, b, strcmp(NULL, a) from test.t;
+------+------+-----------------+
| a | b | strcmp(NULL, a) |
+------+------+-----------------+
| abc | ABC | NULL |
| Abc | abc | NULL |
| def | dEf | NULL |
| a | A | NULL |
| b | B | NULL |
| c | C | NULL |
| NULL | NULL | NULL |
| a | NULL | NULL |
| NULL | a | NULL |
| | NULL | NULL |
| NULL | | NULL |
| | | NULL |
+------+------+-----------------+

mysql> drop table if exists test.t

0 comments on commit 01793a4

Please sign in to comment.