Skip to content

Commit

Permalink
Remove GC's panic(), use Warn log instead: GC-PANIC (#20910)
Browse files Browse the repository at this point in the history
  • Loading branch information
LeftHandCold authored Dec 25, 2024
1 parent 336242f commit f48a2aa
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 25 deletions.
2 changes: 1 addition & 1 deletion pkg/vm/engine/tae/db/checkpoint/replay.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,12 +170,12 @@ func (c *CkpReplayer) ReadCkpFiles() (err error) {
entry, _, closeCB, err := replayEntries(file.name, compacted)
if err != nil {
logutil.Errorf("replay compacted checkpoint file %s failed: %v", file.name, err.Error())
return err
}
if len(entry) != 1 {
for _, e := range entry {
logutil.Infof("compacted checkpoint entry: %v", e.String())
}
panic("invalid compacted checkpoint file")
}
r.tryAddNewCompactedCheckpointEntry(entry[0])
closeCB()
Expand Down
30 changes: 24 additions & 6 deletions pkg/vm/engine/tae/db/gc/v3/checkpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -665,8 +665,12 @@ func (c *checkpointCleaner) getEntriesToMerge(ts *types.TS) (
start = *gcWaterMark
}
if !ts.GE(&start) {
panic(fmt.Sprintf("getEntriesToMerge end < start. "+
"end: %v, start: %v", ts.ToString(), start.ToString()))
logutil.Warn("GC-PANIC-MERGE-CKP",
zap.String("task", c.TaskNameLocked()),
zap.String("start", start.ToString()),
zap.String("end", ts.ToString()),
)
return
}
compacted := c.checkpointCli.GetCompacted()
ickps := c.checkpointCli.ICKPRange(&start, ts, c.config.maxMergeCheckpointCount)
Expand Down Expand Up @@ -764,7 +768,12 @@ func (c *checkpointCleaner) mergeCheckpointFilesLocked(

checkpointMaxEnd = toMergeEntries[len(toMergeEntries)-1].GetEnd()
if checkpointMaxEnd.GT(checkpointLowWaterMark) {
panic(fmt.Sprintf("checkpointMaxEnd %s < checkpointLowWaterMark %s", checkpointMaxEnd.ToString(), checkpointLowWaterMark.ToString()))
logutil.Warn("GC-PANIC-MERGE-FILES",
zap.String("task", c.TaskNameLocked()),
zap.String("checkpointMaxEnd", checkpointMaxEnd.ToString()),
zap.String("checkpointLowWaterMark", checkpointLowWaterMark.ToString()),
)
return
}

if toMergeEntries, err = c.filterCheckpoints(
Expand All @@ -780,7 +789,11 @@ func (c *checkpointCleaner) mergeCheckpointFilesLocked(
// get the scanned window, it should not be nil
window := c.GetScannedWindowLocked()
if checkpointMaxEnd.GT(&window.tsRange.end) {
panic(fmt.Sprintf("checkpointMaxEnd %s < window end %s", checkpointMaxEnd.ToString(), window.tsRange.end.ToString()))
logutil.Warn("GC-PANIC-MERGE-FILES",
zap.String("checkpointMaxEnd", checkpointMaxEnd.ToString()),
zap.String("window-end", window.tsRange.end.ToString()),
)
return
}

sourcer := window.MakeFilesReader(c.ctx, c.fs.Service)
Expand Down Expand Up @@ -815,7 +828,9 @@ func (c *checkpointCleaner) mergeCheckpointFilesLocked(
pitrs,
gcFileCount)
if newCheckpoint == nil {
panic("MergeCheckpoint new checkpoint is nil")
logutil.Warn("GC-PANIC-NEW-CHECKPOINT-EMPTY",
zap.String("task", c.TaskNameLocked()))
return
}
newFiles := tmpNewFiles
for _, stats := range c.GetScannedWindowLocked().files {
Expand Down Expand Up @@ -1038,7 +1053,10 @@ func (c *checkpointCleaner) tryGCAgainstGCKPLocked(
}
scanMark := c.GetScanWaterMark().GetEnd()
if scanMark.IsEmpty() {
panic("scanMark is empty")
logutil.Warn("GC-PANIC-SCANMARK-EMPTY",
zap.String("task", c.TaskNameLocked()),
zap.String("mergeMark", mergeMark.ToString()))
return nil
}
if waterMark.GT(&scanMark) {
waterMark = scanMark
Expand Down
8 changes: 6 additions & 2 deletions pkg/vm/engine/tae/db/gc/v3/exec_v1.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@ package gc

import (
"context"
"fmt"
"github.com/matrixorigin/matrixone/pkg/common/malloc"
"github.com/matrixorigin/matrixone/pkg/logutil"
"go.uber.org/zap"
"unsafe"

"github.com/matrixorigin/matrixone/pkg/container/types"
Expand Down Expand Up @@ -319,7 +320,10 @@ func MakeSnapshotAndPitrFineFilter(
continue
}
if dropTS.IsEmpty() {
panic(fmt.Sprintf("dropTS is empty, name: %s, createTS: %s", name, createTS.ToString()))
logutil.Warn("GC-PANIC-TS-EMPTY",
zap.String("name", name),
zap.String("createTS", createTS.ToString()))
continue
}
if !logtail.ObjectIsSnapshotRefers(
&stats, pitr, &createTS, &dropTS, snapshots,
Expand Down
71 changes: 55 additions & 16 deletions pkg/vm/engine/tae/logtail/snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -384,8 +384,9 @@ func (sm *SnapshotMeta) updateTableInfo(

for _, info := range orderedInfos {
if info.stats.BlkCnt() != 1 {
panic(fmt.Sprintf("mo_table object %v blk cnt %v",
info.stats.ObjectName(), info.stats.BlkCnt()))
logutil.Warn("GC-PANIC-UPDATE-TABLE-P1",
zap.String("object", info.stats.ObjectName().String()),
zap.Uint32("blkCnt", info.stats.BlkCnt()))
}
if !info.deleteAt.IsEmpty() {
sm.aobjDelTsMap[info.deleteAt] = struct{}{}
Expand Down Expand Up @@ -435,7 +436,9 @@ func (sm *SnapshotMeta) updateTableInfo(
}
if name == catalog2.MO_PITR {
if sm.pitr.tid > 0 && sm.pitr.tid != tid {
panic(fmt.Sprintf("pitr table %v is not unique", tid))
logutil.Warn("GC-PANIC-UPDATE-TABLE-P2",
zap.Uint64("tid", tid),
zap.Uint64("old-tid", sm.pitr.tid))
}
sm.pitr.tid = tid
}
Expand All @@ -445,8 +448,12 @@ func (sm *SnapshotMeta) updateTableInfo(
table := sm.tables[account][tid]
if table != nil {
if table.createAt.GT(&createAt) {
panic(fmt.Sprintf("table %v %v create at %v is greater than %v",
tid, tuple.ErrString(nil), table.createAt.ToString(), createAt.ToString()))
logutil.Warn("GC-PANIC-UPDATE-TABLE-P3",
zap.Uint64("tid", tid),
zap.String("name", tuple.ErrString(nil)),
zap.String("old-create-at", table.createAt.ToString()),
zap.String("new-create-at", createAt.ToString()))
table.createAt = createAt
}
if table.pk == pk {
sm.tablePKIndex[pk] = append(sm.tablePKIndex[pk], table)
Expand All @@ -473,8 +480,9 @@ func (sm *SnapshotMeta) updateTableInfo(
deleteRows := make([]tombstone, 0)
for _, info := range tTombstones {
if info.stats.BlkCnt() != 1 {
panic(fmt.Sprintf("mo_table tombstone %v blk cnt %v",
info.stats.ObjectName(), info.stats.BlkCnt()))
logutil.Warn("GC-PANIC-UPDATE-TABLE-P4",
zap.String("object", info.stats.ObjectName().String()),
zap.Uint32("blk-cnt", info.stats.BlkCnt()))
}
objectBat, _, err := blockio.LoadOneBlock(
ctx,
Expand Down Expand Up @@ -516,13 +524,21 @@ func (sm *SnapshotMeta) updateTableInfo(
continue
}
if len(sm.tablePKIndex[pk]) == 0 {
logutil.Warnf("[UpdateTableInfoWarn] delete table %v not found @ rowid %v, commit %v, start is %v, end is %v",
del.pk.ErrString(nil), del.rowid.String(), del.ts.ToString(), startts.ToString(), endts.ToString())
logutil.Warn("GC-PANIC-UPDATE-TABLE-P5",
zap.String("pk", del.pk.ErrString(nil)),
zap.String("rowid", del.rowid.String()),
zap.String("commit", del.ts.ToString()),
zap.String("start", startts.ToString()),
zap.String("end", endts.ToString()))

continue
}
table := sm.tablePKIndex[pk][0]
if !table.deleteAt.IsEmpty() && table.deleteAt.GT(&del.ts) {
panic(fmt.Sprintf("table %v delete at %v is greater than %v", table.tid, table.deleteAt, del.ts))
logutil.Warn("GC-PANIC-UPDATE-TABLE-P6",
zap.Uint64("tid", table.tid),
zap.String("old-delete-at", table.deleteAt.ToString()),
zap.String("new-delete-at", del.ts.ToString()))
}
table.deleteAt = del.ts
sm.tablePKIndex[pk] = sm.tablePKIndex[pk][1:]
Expand All @@ -546,7 +562,7 @@ func (sm *SnapshotMeta) updateTableInfo(

for pk, tables := range sm.tablePKIndex {
if len(tables) > 1 {
logutil.Warn("UpdateSnapTable-Error",
logutil.Warn("GC-PANIC-UPDATE-TABLE-P7",
zap.String("table", pk),
zap.Int("len", len(tables)),
)
Expand Down Expand Up @@ -663,7 +679,7 @@ func (sm *SnapshotMeta) Update(
if deleteTS.IsEmpty() {
// Compatible with the cluster restored by backup
logutil.Warn(
"GC-SnapshotMeta-Update-Collector-Skip",
"GC-PANIC-UPDATE-SNAPSHOT-META",
zap.Uint64("table-id", tid),
zap.String("object-name", stats.ObjectName().String()),
zap.String("create-at", createTS.ToString()),
Expand Down Expand Up @@ -919,7 +935,14 @@ func (sm *SnapshotMeta) GetPITR(
level := bat.Vecs[0].GetStringAt(r)
if level == PitrLevelCluster {
if !pitr.cluster.IsEmpty() {
panic("cluster duplicate pitr ")
logutil.Warn("GC-PANIC-DUP-PIRT-P1",
zap.String("level", "cluster"),
zap.String("old", pitr.cluster.ToString()),
zap.String("new", pitrTs.ToString()),
)
if pitr.cluster.LT(&pitrTs) {
continue
}
}
pitr.cluster = pitrTs

Expand All @@ -934,14 +957,30 @@ func (sm *SnapshotMeta) GetPITR(
id := uint64(account)
p := pitr.database[id]
if !p.IsEmpty() {
panic("db duplicate pitr ")
logutil.Warn("GC-PANIC-DUP-PIRT-P2",
zap.String("level", "database"),
zap.Uint64("id", id),
zap.String("old", p.ToString()),
zap.String("new", pitrTs.ToString()),
)
if p.LT(&pitrTs) {
continue
}
}
pitr.database[id] = pitrTs
} else if level == PitrLevelTable {
id := uint64(account)
p := pitr.tables[id]
if !p.IsEmpty() {
panic("table duplicate pitr ")
logutil.Warn("GC-PANIC-DUP-PIRT-P3",
zap.String("level", "table"),
zap.Uint64("id", id),
zap.String("old", p.ToString()),
zap.String("new", pitrTs.ToString()),
)
if p.LT(&pitrTs) {
continue
}
}
pitr.tables[id] = pitrTs
}
Expand Down Expand Up @@ -1152,7 +1191,7 @@ func (sm *SnapshotMeta) RebuildTableInfo(ins *containers.Batch) {
continue
}
if len(sm.tablePKIndex[pk]) > 0 {
logutil.Warn("RebuildTableInfo-PK-Exists",
logutil.Warn("GC-PANIC-REBUILD-TABLE",
zap.String("pk", pk),
zap.Uint64("table", tid))
}
Expand Down

0 comments on commit f48a2aa

Please sign in to comment.