From 1b34cc234e8283e8d67af35292767180fb6c47d3 Mon Sep 17 00:00:00 2001 From: Mingcong Han Date: Thu, 9 Jan 2020 21:41:12 -0600 Subject: [PATCH] planner: add HashCode method for LogicalPlan & use PlanHash for GroupExpr's fingerPrint (#14224) --- .../transformation_rules_suite_out.json | 1 - planner/core/hashcode.go | 99 ++++++++++++++++ planner/core/plan.go | 4 + planner/memo/expr_iterator_test.go | 112 +++++++++++------- planner/memo/group_expr.go | 16 ++- planner/memo/group_expr_test.go | 18 ++- planner/memo/group_test.go | 54 ++++++++- 7 files changed, 252 insertions(+), 52 deletions(-) create mode 100644 planner/core/hashcode.go diff --git a/planner/cascades/testdata/transformation_rules_suite_out.json b/planner/cascades/testdata/transformation_rules_suite_out.json index aabd085071940..c6eda758be26e 100644 --- a/planner/cascades/testdata/transformation_rules_suite_out.json +++ b/planner/cascades/testdata/transformation_rules_suite_out.json @@ -1065,7 +1065,6 @@ "SQL": "select a from (select floor(a) as a from t) as t2", "Result": [ "Group#0 Schema:[Column#13]", - " Projection_4 input:[Group#1], floor(test.t.a)->Column#13", " Projection_2 input:[Group#1], floor(test.t.a)->Column#13", "Group#1 Schema:[test.t.a]", " TableScan_1 table:t" diff --git a/planner/core/hashcode.go b/planner/core/hashcode.go new file mode 100644 index 0000000000000..966e24ef07c8b --- /dev/null +++ b/planner/core/hashcode.go @@ -0,0 +1,99 @@ +// Copyright 2019 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package core + +import ( + "bytes" + "encoding/binary" + "sort" + + "github.com/pingcap/tidb/util/plancodec" +) + +func encodeIntAsUint32(result []byte, value int) []byte { + var buf [4]byte + binary.BigEndian.PutUint32(buf[:], uint32(value)) + return append(result, buf[:]...) +} + +// HashCode implements LogicalPlan interface. +func (p *baseLogicalPlan) HashCode() []byte { + // We use PlanID for the default hash, so if two plans do not have + // the same id, the hash value will never be the same. + result := make([]byte, 0, 4) + result = encodeIntAsUint32(result, p.id) + return result +} + +// HashCode implements LogicalPlan interface. +func (p *LogicalProjection) HashCode() []byte { + // PlanType + SelectOffset + ExprNum + [Exprs] + // Expressions are commonly `Column`s, whose hashcode has the length 9, so + // we pre-alloc 10 bytes for each expr's hashcode. + result := make([]byte, 0, 12+len(p.Exprs)*10) + result = encodeIntAsUint32(result, plancodec.TypeStringToPhysicalID(p.tp)) + result = encodeIntAsUint32(result, p.SelectBlockOffset()) + result = encodeIntAsUint32(result, len(p.Exprs)) + for _, expr := range p.Exprs { + exprHashCode := expr.HashCode(p.ctx.GetSessionVars().StmtCtx) + result = encodeIntAsUint32(result, len(exprHashCode)) + result = append(result, exprHashCode...) + } + return result +} + +// HashCode implements LogicalPlan interface. +func (p *LogicalTableDual) HashCode() []byte { + // PlanType + SelectOffset + RowCount + result := make([]byte, 0, 12) + result = encodeIntAsUint32(result, plancodec.TypeStringToPhysicalID(p.tp)) + result = encodeIntAsUint32(result, p.SelectBlockOffset()) + result = encodeIntAsUint32(result, p.RowCount) + return result +} + +// HashCode implements LogicalPlan interface. +func (p *LogicalSelection) HashCode() []byte { + // PlanType + SelectOffset + ConditionNum + [Conditions] + // Conditions are commonly `ScalarFunction`s, whose hashcode usually has a + // length larger than 20, so we pre-alloc 25 bytes for each expr's hashcode. + result := make([]byte, 0, 12+len(p.Conditions)*25) + result = encodeIntAsUint32(result, plancodec.TypeStringToPhysicalID(p.tp)) + result = encodeIntAsUint32(result, p.SelectBlockOffset()) + result = encodeIntAsUint32(result, len(p.Conditions)) + + condHashCodes := make([][]byte, len(p.Conditions)) + for i, expr := range p.Conditions { + condHashCodes[i] = expr.HashCode(p.ctx.GetSessionVars().StmtCtx) + } + // Sort the conditions, so `a > 1 and a < 100` can equal to `a < 100 and a > 1`. + sort.Slice(condHashCodes, func(i, j int) bool { return bytes.Compare(condHashCodes[i], condHashCodes[j]) < 0 }) + + for _, condHashCode := range condHashCodes { + result = encodeIntAsUint32(result, len(condHashCode)) + result = append(result, condHashCode...) + } + return result +} + +// HashCode implements LogicalPlan interface. +func (p *LogicalLimit) HashCode() []byte { + // PlanType + SelectOffset + Offset + Count + result := make([]byte, 24) + binary.BigEndian.PutUint32(result, uint32(plancodec.TypeStringToPhysicalID(p.tp))) + binary.BigEndian.PutUint32(result[4:], uint32(p.SelectBlockOffset())) + binary.BigEndian.PutUint64(result[8:], p.Offset) + binary.BigEndian.PutUint64(result[16:], p.Count) + return result +} diff --git a/planner/core/plan.go b/planner/core/plan.go index 412219e4e2032..a4bf7ba8f4f4e 100644 --- a/planner/core/plan.go +++ b/planner/core/plan.go @@ -80,6 +80,10 @@ func enforceProperty(p *property.PhysicalProperty, tsk task, ctx sessionctx.Cont type LogicalPlan interface { Plan + // HashCode encodes a LogicalPlan to fast compare whether a LogicalPlan equals to another. + // We use a strict encode method here which ensures there is no conflict. + HashCode() []byte + // PredicatePushDown pushes down the predicates in the where/on/having clauses as deeply as possible. // It will accept a predicate that is an expression slice, and return the expressions that can't be pushed. // Because it might change the root if the having clause exists, we need to return a plan that represents a new root. diff --git a/planner/memo/expr_iterator_test.go b/planner/memo/expr_iterator_test.go index baf7f39fdaec8..a9ede5d43155a 100644 --- a/planner/memo/expr_iterator_test.go +++ b/planner/memo/expr_iterator_test.go @@ -15,6 +15,7 @@ package memo import ( . "github.com/pingcap/check" + "github.com/pingcap/tidb/expression" plannercore "github.com/pingcap/tidb/planner/core" ) @@ -58,17 +59,27 @@ func (s *testMemoSuite) TestNewExprIterFromGroupElem(c *C) { } func (s *testMemoSuite) TestExprIterNext(c *C) { - g0 := NewGroupWithSchema(NewGroupExpr(plannercore.LogicalProjection{}.Init(s.sctx, 0)), s.schema) - g0.Insert(NewGroupExpr(plannercore.LogicalLimit{}.Init(s.sctx, 0))) - g0.Insert(NewGroupExpr(plannercore.LogicalProjection{}.Init(s.sctx, 0))) - g0.Insert(NewGroupExpr(plannercore.LogicalLimit{}.Init(s.sctx, 0))) - g0.Insert(NewGroupExpr(plannercore.LogicalProjection{}.Init(s.sctx, 0))) - - g1 := NewGroupWithSchema(NewGroupExpr(plannercore.LogicalSelection{}.Init(s.sctx, 0)), s.schema) - g1.Insert(NewGroupExpr(plannercore.LogicalLimit{}.Init(s.sctx, 0))) - g1.Insert(NewGroupExpr(plannercore.LogicalSelection{}.Init(s.sctx, 0))) - g1.Insert(NewGroupExpr(plannercore.LogicalLimit{}.Init(s.sctx, 0))) - g1.Insert(NewGroupExpr(plannercore.LogicalSelection{}.Init(s.sctx, 0))) + g0 := NewGroupWithSchema(NewGroupExpr( + plannercore.LogicalProjection{Exprs: []expression.Expression{expression.Zero}}.Init(s.sctx, 0)), s.schema) + g0.Insert(NewGroupExpr( + plannercore.LogicalLimit{Count: 1}.Init(s.sctx, 0))) + g0.Insert(NewGroupExpr( + plannercore.LogicalProjection{Exprs: []expression.Expression{expression.One}}.Init(s.sctx, 0))) + g0.Insert(NewGroupExpr( + plannercore.LogicalLimit{Count: 2}.Init(s.sctx, 0))) + g0.Insert(NewGroupExpr( + plannercore.LogicalProjection{Exprs: []expression.Expression{expression.Null}}.Init(s.sctx, 0))) + + g1 := NewGroupWithSchema(NewGroupExpr( + plannercore.LogicalSelection{Conditions: []expression.Expression{expression.Null}}.Init(s.sctx, 0)), s.schema) + g1.Insert(NewGroupExpr( + plannercore.LogicalLimit{Count: 3}.Init(s.sctx, 0))) + g1.Insert(NewGroupExpr( + plannercore.LogicalSelection{Conditions: []expression.Expression{expression.One}}.Init(s.sctx, 0))) + g1.Insert(NewGroupExpr( + plannercore.LogicalLimit{Count: 4}.Init(s.sctx, 0))) + g1.Insert(NewGroupExpr( + plannercore.LogicalSelection{Conditions: []expression.Expression{expression.Zero}}.Init(s.sctx, 0))) expr := NewGroupExpr(plannercore.LogicalJoin{}.Init(s.sctx, 0)) expr.Children = append(expr.Children, g0) @@ -102,26 +113,36 @@ func (s *testMemoSuite) TestExprIterNext(c *C) { } func (s *testMemoSuite) TestExprIterReset(c *C) { - g0 := NewGroupWithSchema(NewGroupExpr(plannercore.LogicalProjection{}.Init(s.sctx, 0)), s.schema) - g0.Insert(NewGroupExpr(plannercore.LogicalLimit{}.Init(s.sctx, 0))) - g0.Insert(NewGroupExpr(plannercore.LogicalProjection{}.Init(s.sctx, 0))) - g0.Insert(NewGroupExpr(plannercore.LogicalLimit{}.Init(s.sctx, 0))) - g0.Insert(NewGroupExpr(plannercore.LogicalProjection{}.Init(s.sctx, 0))) - - sel1 := NewGroupExpr(plannercore.LogicalSelection{}.Init(s.sctx, 0)) - sel2 := NewGroupExpr(plannercore.LogicalSelection{}.Init(s.sctx, 0)) - sel3 := NewGroupExpr(plannercore.LogicalSelection{}.Init(s.sctx, 0)) + g0 := NewGroupWithSchema(NewGroupExpr( + plannercore.LogicalProjection{Exprs: []expression.Expression{expression.Zero}}.Init(s.sctx, 0)), s.schema) + g0.Insert(NewGroupExpr( + plannercore.LogicalLimit{Count: 1}.Init(s.sctx, 0))) + g0.Insert(NewGroupExpr( + plannercore.LogicalProjection{Exprs: []expression.Expression{expression.One}}.Init(s.sctx, 0))) + g0.Insert(NewGroupExpr( + plannercore.LogicalLimit{Count: 2}.Init(s.sctx, 0))) + g0.Insert(NewGroupExpr( + plannercore.LogicalProjection{Exprs: []expression.Expression{expression.Null}}.Init(s.sctx, 0))) + + sel1 := NewGroupExpr(plannercore.LogicalSelection{Conditions: []expression.Expression{expression.Null}}.Init(s.sctx, 0)) + sel2 := NewGroupExpr(plannercore.LogicalSelection{Conditions: []expression.Expression{expression.One}}.Init(s.sctx, 0)) + sel3 := NewGroupExpr(plannercore.LogicalSelection{Conditions: []expression.Expression{expression.Zero}}.Init(s.sctx, 0)) g1 := NewGroupWithSchema(sel1, s.schema) - g1.Insert(NewGroupExpr(plannercore.LogicalLimit{}.Init(s.sctx, 0))) + g1.Insert(NewGroupExpr(plannercore.LogicalLimit{Count: 3}.Init(s.sctx, 0))) g1.Insert(sel2) - g1.Insert(NewGroupExpr(plannercore.LogicalLimit{}.Init(s.sctx, 0))) + g1.Insert(NewGroupExpr(plannercore.LogicalLimit{Count: 4}.Init(s.sctx, 0))) g1.Insert(sel3) - g2 := NewGroupWithSchema(NewGroupExpr(plannercore.LogicalSelection{}.Init(s.sctx, 0)), s.schema) - g2.Insert(NewGroupExpr(plannercore.LogicalLimit{}.Init(s.sctx, 0))) - g2.Insert(NewGroupExpr(plannercore.LogicalSelection{}.Init(s.sctx, 0))) - g2.Insert(NewGroupExpr(plannercore.LogicalLimit{}.Init(s.sctx, 0))) - g2.Insert(NewGroupExpr(plannercore.LogicalSelection{}.Init(s.sctx, 0))) + g2 := NewGroupWithSchema(NewGroupExpr( + plannercore.LogicalSelection{Conditions: []expression.Expression{expression.Null}}.Init(s.sctx, 0)), s.schema) + g2.Insert(NewGroupExpr( + plannercore.LogicalLimit{Count: 3}.Init(s.sctx, 0))) + g2.Insert(NewGroupExpr( + plannercore.LogicalSelection{Conditions: []expression.Expression{expression.One}}.Init(s.sctx, 0))) + g2.Insert(NewGroupExpr( + plannercore.LogicalLimit{Count: 4}.Init(s.sctx, 0))) + g2.Insert(NewGroupExpr( + plannercore.LogicalSelection{Conditions: []expression.Expression{expression.Zero}}.Init(s.sctx, 0))) // link join with Group 0 and 1 expr := NewGroupExpr(plannercore.LogicalJoin{}.Init(s.sctx, 0)) @@ -185,25 +206,36 @@ func countMatchedIter(group *Group, pattern *Pattern) int { } func (s *testMemoSuite) TestExprIterWithEngineType(c *C) { - g1 := NewGroupWithSchema(NewGroupExpr(plannercore.LogicalSelection{}.Init(s.sctx, 0)), s.schema).SetEngineType(EngineTiFlash) - g1.Insert(NewGroupExpr(plannercore.LogicalLimit{}.Init(s.sctx, 0))) - g1.Insert(NewGroupExpr(plannercore.LogicalProjection{}.Init(s.sctx, 0))) - g1.Insert(NewGroupExpr(plannercore.LogicalLimit{}.Init(s.sctx, 0))) - - g2 := NewGroupWithSchema(NewGroupExpr(plannercore.LogicalSelection{}.Init(s.sctx, 0)), s.schema).SetEngineType(EngineTiKV) - g2.Insert(NewGroupExpr(plannercore.LogicalLimit{}.Init(s.sctx, 0))) - g2.Insert(NewGroupExpr(plannercore.LogicalProjection{}.Init(s.sctx, 0))) - g2.Insert(NewGroupExpr(plannercore.LogicalLimit{}.Init(s.sctx, 0))) - - flashGather := NewGroupExpr(plannercore.TiKVSingleGather{}.Init(s.sctx, 0)) + g1 := NewGroupWithSchema(NewGroupExpr( + plannercore.LogicalSelection{Conditions: []expression.Expression{expression.One}}.Init(s.sctx, 0)), s.schema).SetEngineType(EngineTiFlash) + g1.Insert(NewGroupExpr( + plannercore.LogicalLimit{Count: 1}.Init(s.sctx, 0))) + g1.Insert(NewGroupExpr( + plannercore.LogicalProjection{Exprs: []expression.Expression{expression.One}}.Init(s.sctx, 0))) + g1.Insert(NewGroupExpr( + plannercore.LogicalLimit{Count: 2}.Init(s.sctx, 0))) + + g2 := NewGroupWithSchema(NewGroupExpr( + plannercore.LogicalSelection{Conditions: []expression.Expression{expression.One}}.Init(s.sctx, 0)), s.schema).SetEngineType(EngineTiKV) + g2.Insert(NewGroupExpr( + plannercore.LogicalLimit{Count: 2}.Init(s.sctx, 0))) + g2.Insert(NewGroupExpr( + plannercore.LogicalProjection{Exprs: []expression.Expression{expression.One}}.Init(s.sctx, 0))) + g2.Insert(NewGroupExpr( + plannercore.LogicalLimit{Count: 3}.Init(s.sctx, 0))) + + flashGather := NewGroupExpr( + plannercore.TiKVSingleGather{}.Init(s.sctx, 0)) flashGather.Children = append(flashGather.Children, g1) g3 := NewGroupWithSchema(flashGather, s.schema).SetEngineType(EngineTiDB) - tikvGather := NewGroupExpr(plannercore.TiKVSingleGather{}.Init(s.sctx, 0)) + tikvGather := NewGroupExpr( + plannercore.TiKVSingleGather{}.Init(s.sctx, 0)) tikvGather.Children = append(tikvGather.Children, g2) g3.Insert(tikvGather) - join := NewGroupExpr(plannercore.LogicalJoin{}.Init(s.sctx, 0)) + join := NewGroupExpr( + plannercore.LogicalJoin{}.Init(s.sctx, 0)) join.Children = append(join.Children, g3, g3) g4 := NewGroupWithSchema(join, s.schema).SetEngineType(EngineTiDB) diff --git a/planner/memo/group_expr.go b/planner/memo/group_expr.go index 5b5201dfb503d..3bb48a4622f23 100644 --- a/planner/memo/group_expr.go +++ b/planner/memo/group_expr.go @@ -14,7 +14,7 @@ package memo import ( - "fmt" + "encoding/binary" "reflect" "github.com/pingcap/tidb/expression" @@ -51,11 +51,17 @@ func NewGroupExpr(node plannercore.LogicalPlan) *GroupExpr { // FingerPrint gets the unique fingerprint of the Group expression. func (e *GroupExpr) FingerPrint() string { - if e.selfFingerprint == "" { - e.selfFingerprint = fmt.Sprintf("%v", e.ExprNode.ID()) - for i := range e.Children { - e.selfFingerprint += e.Children[i].FingerPrint() + if len(e.selfFingerprint) == 0 { + planHash := e.ExprNode.HashCode() + buffer := make([]byte, 2, 2+len(e.Children)*8+len(planHash)) + binary.BigEndian.PutUint16(buffer, uint16(len(e.Children))) + for _, child := range e.Children { + var buf [8]byte + binary.BigEndian.PutUint64(buf[:], uint64(reflect.ValueOf(child).Pointer())) + buffer = append(buffer, buf[:]...) } + buffer = append(buffer, planHash...) + e.selfFingerprint = string(buffer) } return e.selfFingerprint } diff --git a/planner/memo/group_expr_test.go b/planner/memo/group_expr_test.go index 0a68e1ee03685..31e8f5fea8665 100644 --- a/planner/memo/group_expr_test.go +++ b/planner/memo/group_expr_test.go @@ -14,7 +14,11 @@ package memo import ( + "encoding/binary" + "reflect" + . "github.com/pingcap/check" + "github.com/pingcap/tidb/expression" plannercore "github.com/pingcap/tidb/planner/core" ) @@ -27,9 +31,15 @@ func (s *testMemoSuite) TestNewGroupExpr(c *C) { } func (s *testMemoSuite) TestGroupExprFingerprint(c *C) { - p := &plannercore.LogicalLimit{} + p := &plannercore.LogicalLimit{Count: 3} expr := NewGroupExpr(p) - - // we haven't set the id of the created LogicalLimit, so the result is 0. - c.Assert(expr.FingerPrint(), Equals, "0") + childGroup := NewGroupWithSchema(nil, expression.NewSchema()) + expr.SetChildren(childGroup) + // ChildNum(2 bytes) + ChildPointer(8 bytes) + LogicalLimit HashCode + planHash := p.HashCode() + buffer := make([]byte, 10+len(planHash)) + binary.BigEndian.PutUint16(buffer, 1) + binary.BigEndian.PutUint64(buffer[2:], uint64(reflect.ValueOf(childGroup).Pointer())) + copy(buffer[10:], planHash) + c.Assert(expr.FingerPrint(), Equals, string(buffer)) } diff --git a/planner/memo/group_test.go b/planner/memo/group_test.go index 325e383d6ce21..72a3c9b4b6f56 100644 --- a/planner/memo/group_test.go +++ b/planner/memo/group_test.go @@ -112,6 +112,56 @@ func (s *testMemoSuite) TestGroupExists(c *C) { c.Assert(g.Exists(expr), IsFalse) } +func (s *testMemoSuite) TestGroupFingerPrint(c *C) { + stmt1, err := s.ParseOneStmt("select * from t where a > 1 and a < 100", "", "") + c.Assert(err, IsNil) + p1, _, err := plannercore.BuildLogicalPlan(context.Background(), s.sctx, stmt1, s.is) + c.Assert(err, IsNil) + logic1, ok := p1.(plannercore.LogicalPlan) + c.Assert(ok, IsTrue) + // Plan tree should be: DataSource -> Selection -> Projection + proj, ok := logic1.(*plannercore.LogicalProjection) + c.Assert(ok, IsTrue) + sel, ok := logic1.Children()[0].(*plannercore.LogicalSelection) + c.Assert(ok, IsTrue) + group1 := Convert2Group(logic1) + oldGroupExpr := group1.Equivalents.Front().Value.(*GroupExpr) + + // Insert a GroupExpr with the same ExprNode. + newGroupExpr := NewGroupExpr(proj) + newGroupExpr.SetChildren(oldGroupExpr.Children[0]) + group1.Insert(newGroupExpr) + c.Assert(group1.Equivalents.Len(), Equals, 1) + + // Insert a GroupExpr with different children。 + newGroupExpr2 := NewGroupExpr(proj) + newGroup := NewGroupWithSchema(oldGroupExpr, group1.Prop.Schema) + newGroupExpr2.SetChildren(newGroup) + group1.Insert(newGroupExpr2) + c.Assert(group1.Equivalents.Len(), Equals, 2) + + // Insert a GroupExpr with different ExprNode. + limit := plannercore.LogicalLimit{}.Init(proj.SCtx(), 0) + newGroupExpr3 := NewGroupExpr(limit) + newGroupExpr3.SetChildren(oldGroupExpr.Children[0]) + group1.Insert(newGroupExpr3) + c.Assert(group1.Equivalents.Len(), Equals, 3) + + // Insert two LogicalSelections with same conditions but different order. + c.Assert(len(sel.Conditions), Equals, 2) + newSelection := plannercore.LogicalSelection{ + Conditions: make([]expression.Expression, 2)}.Init(sel.SCtx(), sel.SelectBlockOffset()) + newSelection.Conditions[0], newSelection.Conditions[1] = sel.Conditions[1], sel.Conditions[0] + newGroupExpr4 := NewGroupExpr(sel) + newGroupExpr5 := NewGroupExpr(newSelection) + newGroupExpr4.SetChildren(oldGroupExpr.Children[0]) + newGroupExpr5.SetChildren(oldGroupExpr.Children[0]) + group1.Insert(newGroupExpr4) + c.Assert(group1.Equivalents.Len(), Equals, 4) + group1.Insert(newGroupExpr5) + c.Assert(group1.Equivalents.Len(), Equals, 4) +} + func (s *testMemoSuite) TestGroupGetFirstElem(c *C) { expr0 := NewGroupExpr(plannercore.LogicalProjection{}.Init(s.sctx, 0)) expr1 := NewGroupExpr(plannercore.LogicalLimit{}.Init(s.sctx, 0)) @@ -182,9 +232,9 @@ func (s *testMemoSuite) TestEngineTypeSet(c *C) { } func (s *testMemoSuite) TestFirstElemAfterDelete(c *C) { - oldExpr := NewGroupExpr(plannercore.LogicalLimit{}.Init(s.sctx, 0)) + oldExpr := NewGroupExpr(plannercore.LogicalLimit{Count: 10}.Init(s.sctx, 0)) g := NewGroupWithSchema(oldExpr, s.schema) - newExpr := NewGroupExpr(plannercore.LogicalLimit{}.Init(s.sctx, 0)) + newExpr := NewGroupExpr(plannercore.LogicalLimit{Count: 20}.Init(s.sctx, 0)) g.Insert(newExpr) c.Assert(g.GetFirstElem(OperandLimit), NotNil) c.Assert(g.GetFirstElem(OperandLimit).Value, Equals, oldExpr)