From 27b51f7a78b6a45ea97c3380d74a1a6a425fe8c7 Mon Sep 17 00:00:00 2001 From: Faye Amacker <33205765+fxamacker@users.noreply.github.com> Date: Fri, 26 Apr 2024 08:56:02 -0500 Subject: [PATCH] Fix GetAllChildReferences used by migration filter Migration programs in onflow/flow-go added a flag to filter old unreferenced slabs and onflow/atree added some functions to support that. However, some of the old unreferenced slabs are not filtered during migration. This commit fixes the migration filter by handling nested storage ID inside element such as Cadence SomeValue. --- storage.go | 71 ++++++++---- storage_test.go | 286 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 334 insertions(+), 23 deletions(-) diff --git a/storage.go b/storage.go index ee9adca..53319ad 100644 --- a/storage.go +++ b/storage.go @@ -1071,40 +1071,60 @@ func (s *PersistentSlabStorage) FixLoadedBrokenReferences(needToFix func(old Val return false } - var isMetaDataSlab bool - switch slab.(type) { - case *ArrayMetaDataSlab, *MapMetaDataSlab: - isMetaDataSlab = true - } + case *ArrayMetaDataSlab, *MapMetaDataSlab: // metadata slabs + var foundBrokenRef bool - var foundBrokenRef bool - for _, childStorable := range slab.ChildStorables() { + for _, childStorable := range slab.ChildStorables() { - storageIDStorable, ok := childStorable.(StorageIDStorable) - if !ok { - continue - } + if slabIDStorable, ok := childStorable.(StorageIDStorable); ok { - childID := StorageID(storageIDStorable) + childID := StorageID(slabIDStorable) - // Track parent-child relationship of root slabs and non-root slabs. - if isMetaDataSlab { - parentOf[childID] = id - } + // Track parent-child relationship of root slabs and non-root slabs. + parentOf[childID] = id - if s.existIfLoaded(childID) { - continue + if !s.existIfLoaded(childID) { + foundBrokenRef = true + } + + // Continue with remaining child storables to track parent-child relationship. + } } - foundBrokenRef = true + return foundBrokenRef + + default: // data slabs + childStorables := slab.ChildStorables() + + for len(childStorables) > 0 { + + var nextChildStorables []Storable + + for _, childStorable := range childStorables { + + if slabIDStorable, ok := childStorable.(StorageIDStorable); ok { - if !isMetaDataSlab { - return true + if !s.existIfLoaded(StorageID(slabIDStorable)) { + return true + } + + continue + } + + // Append child storables of this childStorable to + // handle nested StorageIDStorable, such as Cadence SomeValue. + nextChildStorables = append( + nextChildStorables, + childStorable.ChildStorables()..., + ) + } + + childStorables = nextChildStorables } - } - return foundBrokenRef + return false + } } var brokenStorageIDs []StorageID @@ -1273,6 +1293,11 @@ func (s *PersistentSlabStorage) getAllChildReferences(slab Slab) ( storageIDStorable, ok := childStorable.(StorageIDStorable) if !ok { + nextChildStorables = append( + nextChildStorables, + childStorable.ChildStorables()..., + ) + continue } diff --git a/storage_test.go b/storage_test.go index d3dadcf..6d08735 100644 --- a/storage_test.go +++ b/storage_test.go @@ -1884,6 +1884,163 @@ func TestFixLoadedBrokenReferences(t *testing.T) { require.Equal(t, fixedData[rootID], savedData) }) + t.Run("broken nested storable in root map data slab", func(t *testing.T) { + + rootID := StorageID{Address: address, Index: StorageIndex{0, 0, 0, 0, 0, 0, 0, 1}} + + brokenRefs := map[StorageID][]StorageID{ + rootID: {rootID}, + } + + data := map[StorageID][]byte{ + rootID: { + // extra data + // version + 0x00, + // flag: root + map data + 0x88, + // extra data (CBOR encoded array of 3 elements) + 0x83, + // type info + 0x18, 0x2a, + // count: 1 + 0x01, + // seed + 0x1b, 0x52, 0xa8, 0x78, 0x3, 0x85, 0x2c, 0xaa, 0x49, + + // version + 0x00, + // flag: root + map data + 0x88, + + // the following encoded data is valid CBOR + + // elements (array of 3 elements) + 0x83, + + // level: 0 + 0x00, + + // hkeys (byte string of length 8 * 1) + 0x5b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, + // hkey: 0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + // elements (array of 1 elements) + // each element is encoded as CBOR array of 2 elements (key, value) + 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + // element: [uint64(0):SomeValue(StorageID(0x0.1))] + 0x82, + 0xd8, 0xa4, 0x00, + 0xd8, cborTagSomeValue, 0xd8, 0xff, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + }, + } + + fixedData := map[StorageID][]byte{ + rootID: { + // extra data + // version + 0x00, + // flag: root + map data + 0x88, + // extra data (CBOR encoded array of 3 elements) + 0x83, + // type info + 0x18, 0x2a, + // count: 0 + 0x00, + // seed + 0x1b, 0x52, 0xa8, 0x78, 0x3, 0x85, 0x2c, 0xaa, 0x49, + + // version + 0x00, + // flag: root + map data + 0x88, + + // the following encoded data is valid CBOR + + // elements (array of 3 elements) + 0x83, + + // level: 0 + 0x00, + + // hkeys (byte string of length 8 * 0) + 0x5b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + // elements (array of 0 elements) + 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, + } + + storage := newTestPersistentStorageWithData(t, data) + + // Load data in storage + for id := range data { + _, found, err := storage.Retrieve(id) + require.NoError(t, err) + require.True(t, found) + } + + // Check health before fixing broken reference + _, err := CheckStorageHealth(storage, -1) + require.ErrorContains(t, err, "slab (0x0.1) not found: slab not found during slab iteration") + + var fixedRootIDs map[StorageID][]StorageID + var skippedRootIDs map[StorageID][]StorageID + + // Don't fix any broken references + fixedRootIDs, skippedRootIDs, err = storage.FixLoadedBrokenReferences(func(_ Value) bool { + return false + }) + require.NoError(t, err) + require.Equal(t, 0, len(fixedRootIDs)) + require.Equal(t, len(brokenRefs), len(skippedRootIDs)) + + for rootID, slabIDsWithBrokenRef := range brokenRefs { + require.ElementsMatch(t, slabIDsWithBrokenRef, skippedRootIDs[rootID]) + } + + // No data is modified because no fix happened + require.Equal(t, 0, len(storage.deltas)) + + // Fix broken references + fixedRootIDs, skippedRootIDs, err = storage.FixLoadedBrokenReferences(func(_ Value) bool { + return true + }) + require.NoError(t, err) + require.Equal(t, len(brokenRefs), len(fixedRootIDs)) + + for rootID, slabIDsWithBrokenRef := range brokenRefs { + require.ElementsMatch(t, slabIDsWithBrokenRef, fixedRootIDs[rootID]) + } + + require.Equal(t, 0, len(skippedRootIDs)) + require.Equal(t, 1, len(storage.deltas)) + + // Check health after fixing broken reference + rootIDs, err := CheckStorageHealth(storage, -1) + require.NoError(t, err) + require.Equal(t, 1, len(rootIDs)) + + _, ok := rootIDs[rootID] + require.True(t, ok) + + // Save data in storage + err = storage.FastCommit(runtime.NumCPU()) + require.NoError(t, err) + require.Equal(t, 0, len(storage.deltas)) + + // Check encoded data + baseStorage := storage.baseStorage.(*InMemBaseStorage) + require.Equal(t, 1, len(baseStorage.segments)) + + savedData, found, err := baseStorage.Retrieve(rootID) + require.NoError(t, err) + require.True(t, found) + require.Equal(t, fixedData[rootID], savedData) + }) + t.Run("broken non-root map data slab", func(t *testing.T) { rootID := StorageID{Address: address, Index: StorageIndex{0, 0, 0, 0, 0, 0, 0, 1}} nonRootDataID1 := StorageID{Address: address, Index: StorageIndex{0, 0, 0, 0, 0, 0, 0, 2}} @@ -3140,6 +3297,60 @@ func TestGetAllChildReferencesFromArray(t *testing.T) { testGetAllChildReferences(t, data, parentRootID, expectedRefIDs, expectedBrokenRefIDs) }) + t.Run("root data slab with ref in nested storable", func(t *testing.T) { + parentRootID := StorageID{Address: address, Index: StorageIndex{0, 0, 0, 0, 0, 0, 0, 1}} + childRootID := StorageID{Address: address, Index: StorageIndex{0, 0, 0, 0, 0, 0, 0, 2}} + + expectedRefIDs := []StorageID{childRootID} + expectedBrokenRefIDs := []StorageID{} + + data := map[StorageID][]byte{ + parentRootID: { + // extra data + // version + 0x00, + // extra data flag + 0x80, + // array of extra data + 0x81, + // type info + 0x18, 0x2a, + + // version + 0x00, + // array data slab flag + 0x80, + // CBOR encoded array head (fixed size 3 byte) + 0x99, 0x00, 0x01, + // CBOR encoded array elements + 0xd8, cborTagSomeValue, 0xd8, 0xff, 0x50, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, + }, + + childRootID: { + // extra data + // version + 0x00, + // extra data flag + 0x80, + // array of extra data + 0x81, + // type info + 0x18, 0x2a, + + // version + 0x00, + // array data slab flag + 0x80, + // CBOR encoded array head (fixed size 3 byte) + 0x99, 0x00, 0x01, + // CBOR encoded array elements + 0xd8, 0xa4, 0x00, + }, + } + + testGetAllChildReferences(t, data, parentRootID, expectedRefIDs, expectedBrokenRefIDs) + }) + t.Run("root data slab with broken ref", func(t *testing.T) { parentRootID := StorageID{Address: address, Index: StorageIndex{0, 0, 0, 0, 0, 0, 0, 1}} childRootID := StorageID{Address: address, Index: StorageIndex{0, 0, 0, 0, 0, 0, 0, 2}} @@ -3773,6 +3984,81 @@ func TestGetAllChildReferencesFromMap(t *testing.T) { testGetAllChildReferences(t, data, rootID, expectedRefIDs, expectedBrokenRefIDs) }) + t.Run("root data slab with ref in nested storable", func(t *testing.T) { + rootID := StorageID{Address: address, Index: StorageIndex{0, 0, 0, 0, 0, 0, 0, 1}} + childRootID := StorageID{Address: address, Index: StorageIndex{0, 0, 0, 0, 0, 0, 0, 2}} + + expectedRefIDs := []StorageID{childRootID} + expectedBrokenRefIDs := []StorageID{} + + data := map[StorageID][]byte{ + rootID: { + // extra data + // version + 0x00, + // flag: root + map data + 0x88, + // extra data (CBOR encoded array of 3 elements) + 0x83, + // type info + 0x18, 0x2a, + // count: 1 + 0x01, + // seed + 0x1b, 0x52, 0xa8, 0x78, 0x3, 0x85, 0x2c, 0xaa, 0x49, + + // version + 0x00, + // flag: root + map data + 0x88, + + // the following encoded data is valid CBOR + + // elements (array of 3 elements) + 0x83, + + // level: 0 + 0x00, + + // hkeys (byte string of length 8 * 1) + 0x5b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, + // hkey: 0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + // elements (array of 1 elements) + // each element is encoded as CBOR array of 2 elements (key, value) + 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + // element: [uint64(0):uint64(0)] + 0x82, + 0xd8, 0xa4, 0x00, + 0xd8, cborTagSomeValue, 0xd8, 0xff, 0x50, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, + }, + + childRootID: { + // extra data + // version + 0x00, + // extra data flag + 0x80, + // array of extra data + 0x81, + // type info + 0x18, 0x2a, + + // version + 0x00, + // array data slab flag + 0x80, + // CBOR encoded array head (fixed size 3 byte) + 0x99, 0x00, 0x01, + // CBOR encoded array elements + 0xd8, 0xa4, 0x00, + }, + } + + testGetAllChildReferences(t, data, rootID, expectedRefIDs, expectedBrokenRefIDs) + }) + t.Run("root data slab with broken ref", func(t *testing.T) { rootID := StorageID{Address: address, Index: StorageIndex{0, 0, 0, 0, 0, 0, 0, 1}} childRootID := StorageID{Address: address, Index: StorageIndex{0, 0, 0, 0, 0, 0, 0, 2}}