diff --git a/executor/infoschema_reader.go b/executor/infoschema_reader.go index ef39c9cc96d35..739ab763e5fc0 100644 --- a/executor/infoschema_reader.go +++ b/executor/infoschema_reader.go @@ -629,7 +629,7 @@ func (e *hugeMemTableRetriever) dataForColumnsInTable(ctx context.Context, sctx colLen += (len(col.Elems) - 1) } charMaxLen = colLen - charOctLen = colLen + charOctLen = calcCharOctLength(colLen, col.Charset) } else if col.Tp == mysql.TypeEnum { // Example: In MySQL enum('a', 'ab', 'cdef') has length 4, because // the longest string in the enum is 'cdef' @@ -641,10 +641,10 @@ func (e *hugeMemTableRetriever) dataForColumnsInTable(ctx context.Context, sctx } } charMaxLen = colLen - charOctLen = colLen + charOctLen = calcCharOctLength(colLen, col.Charset) } else if types.IsString(col.Tp) { charMaxLen = colLen - charOctLen = colLen + charOctLen = calcCharOctLength(colLen, col.Charset) } else if types.IsTypeFractionable(col.Tp) { datetimePrecision = decimal } else if types.IsTypeNumeric(col.Tp) { @@ -688,6 +688,14 @@ func (e *hugeMemTableRetriever) dataForColumnsInTable(ctx context.Context, sctx } } +func calcCharOctLength(lenInChar int, cs string) int { + lenInBytes := lenInChar + if desc, err := charset.GetCharsetDesc(cs); err == nil { + lenInBytes = desc.Maxlen * lenInChar + } + return lenInBytes +} + func (e *memtableRetriever) setDataFromPartitions(ctx sessionctx.Context, schemas []*model.DBInfo) error { tableRowsMap, colLengthMap, err := tableStatsCache.get(ctx) if err != nil { diff --git a/executor/infoschema_reader_test.go b/executor/infoschema_reader_test.go index 559246b5e3a05..058c1de382324 100644 --- a/executor/infoschema_reader_test.go +++ b/executor/infoschema_reader_test.go @@ -232,6 +232,40 @@ func (s *testInfoschemaTableSuite) TestCharacterSetCollations(c *C) { testkit.Rows("utf8mb4_bin utf8mb4")) } +// https://github.com/pingcap/tidb/issues/25467. +func (s *testInfoschemaTableSuite) TestDataTypesMaxLengthAndOctLength(c *C) { + tk := testkit.NewTestKit(c, s.store) + tk.MustExec("drop database if exists test_oct_length;") + tk.MustExec("create database test_oct_length;") + tk.MustExec("use test_oct_length;") + + testCases := []struct { + colTp string + maxLen int + octLen int + }{ + {"varchar(255) collate ascii_bin", 255, 255}, + {"varchar(255) collate utf8mb4_bin", 255, 255 * 4}, + {"varchar(255) collate utf8_bin", 255, 255 * 3}, + {"char(10) collate ascii_bin", 10, 10}, + {"char(10) collate utf8mb4_bin", 10, 10 * 4}, + {"set('a', 'b', 'cccc') collate ascii_bin", 8, 8}, + {"set('a', 'b', 'cccc') collate utf8mb4_bin", 8, 8 * 4}, + {"enum('a', 'b', 'cccc') collate ascii_bin", 4, 4}, + {"enum('a', 'b', 'cccc') collate utf8mb4_bin", 4, 4 * 4}, + } + for _, tc := range testCases { + createSQL := fmt.Sprintf("create table t (a %s);", tc.colTp) + tk.MustExec(createSQL) + result := tk.MustQuery("select character_maximum_length, character_octet_length " + + "from information_schema.columns " + + "where table_schema=(select database()) and table_name='t';") + expectedRows := testkit.Rows(fmt.Sprintf("%d %d", tc.maxLen, tc.octLen)) + result.Check(expectedRows) + tk.MustExec("drop table t;") + } +} + func (s *testInfoschemaTableSuite) TestDDLJobs(c *C) { tk := testkit.NewTestKit(c, s.store) tk.MustExec("create database if not exists test_ddl_jobs") diff --git a/infoschema/tables_test.go b/infoschema/tables_test.go index 3a6c8962a37d4..af6f1559281f5 100644 --- a/infoschema/tables_test.go +++ b/infoschema/tables_test.go @@ -241,7 +241,7 @@ func (s *testTableSuite) TestInfoschemaFieldValue(c *C) { tk.MustQuery("select CHARACTER_MAXIMUM_LENGTH,CHARACTER_OCTET_LENGTH,NUMERIC_PRECISION,NUMERIC_SCALE,DATETIME_PRECISION from information_schema.COLUMNS where table_name='timeschema'"). Check(testkit.Rows(" ", " 3", " 3", " 4", " ")) tk.MustQuery("select CHARACTER_MAXIMUM_LENGTH,CHARACTER_OCTET_LENGTH,NUMERIC_PRECISION,NUMERIC_SCALE,DATETIME_PRECISION from information_schema.COLUMNS where table_name='strschema'"). - Check(testkit.Rows("3 3 ", "3 3 ", "255 255 ", "255 255 ")) + Check(testkit.Rows("3 12 ", "3 12 ", "255 255 ", "255 1020 ")) tk.MustQuery("select NUMERIC_SCALE from information_schema.COLUMNS where table_name='floatschema'"). Check(testkit.Rows("", "3"))