Skip to content

Commit

Permalink
Merge pull request #8538 from dolthub/nicktobey/json2
Browse files Browse the repository at this point in the history
Fix JSON merge issue that would report imprecise diffs in some situations
  • Loading branch information
nicktobey authored Nov 7, 2024
2 parents b3853f6 + c9b6269 commit 1785ef9
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 12 deletions.
18 changes: 16 additions & 2 deletions go/store/prolly/tree/indexed_json_diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,10 +245,24 @@ func (jd *IndexedJsonDiffer) Next(ctx context.Context) (diff JsonDiff, err error
case 0:
key := fromCurrentLocation.Clone().key

fromNextCharacter, err := jd.currentFromCursor.nextCharacter(ctx)
if err == io.EOF {
return JsonDiff{}, jsonParseError
}
if err != nil {
return JsonDiff{}, err
}
toNextCharacter, err := jd.currentToCursor.nextCharacter(ctx)
if err == io.EOF {
return JsonDiff{}, jsonParseError
}
if err != nil {
return JsonDiff{}, err
}
// Both sides have the same key. If they're both an object or both an array, continue.
// Otherwise, compare them and possibly return a modification.
if (fromScanner.current() == '{' && toScanner.current() == '{') ||
(fromScanner.current() == '[' && toScanner.current() == '[') {
if (fromNextCharacter == '{' && toNextCharacter == '{') ||
(fromNextCharacter == '[' && toNextCharacter == '[') {
err = advanceCursor(ctx, &jd.currentFromCursor)
if err != nil {
return JsonDiff{}, err
Expand Down
32 changes: 23 additions & 9 deletions go/store/prolly/tree/json_cursor.go
Original file line number Diff line number Diff line change
Expand Up @@ -195,20 +195,28 @@ func (j *JsonCursor) AdvanceToLocation(ctx context.Context, path jsonLocation, f
return true, nil
}

func (j *JsonCursor) advanceCursor(ctx context.Context) error {
err := j.cur.advance(ctx)
if err != nil {
return err
}
if !j.cur.Valid() {
// We hit the end of the tree. This shouldn't happen.
return io.EOF
}
j.jsonScanner = ScanJsonFromMiddle(j.cur.currentValue(), j.jsonScanner.currentPath)
return nil
}

func (j *JsonCursor) AdvanceToNextLocation(ctx context.Context) (crossedBoundary bool, err error) {
err = j.jsonScanner.AdvanceToNextLocation()
if err == io.EOF {
crossedBoundary = true
// We hit the end of the chunk, load the next one
err = j.cur.advance(ctx)
err = j.advanceCursor(ctx)
if err != nil {
return
}
if !j.cur.Valid() {
// We hit the end of the tree. This shouldn't happen.
return true, io.EOF
return false, err
}
j.jsonScanner = ScanJsonFromMiddle(j.cur.currentValue(), j.jsonScanner.currentPath)
return true, j.jsonScanner.AdvanceToNextLocation()
} else if err != nil {
return
Expand All @@ -221,6 +229,12 @@ func (j *JsonCursor) GetCurrentPath() jsonLocation {
return j.jsonScanner.currentPath
}

func (j *JsonCursor) nextCharacter() byte {
return j.jsonScanner.jsonBuffer[j.jsonScanner.valueOffset]
func (j *JsonCursor) nextCharacter(ctx context.Context) (byte, error) {
if j.jsonScanner.atEndOfChunk() {
err := j.advanceCursor(ctx)
if err != nil {
return 255, err
}
}
return j.jsonScanner.jsonBuffer[j.jsonScanner.valueOffset], nil
}
34 changes: 33 additions & 1 deletion go/store/prolly/tree/json_diff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/dolthub/go-mysql-server/sql/expression/function/json"

"github.com/dolthub/go-mysql-server/sql/types"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

Expand Down Expand Up @@ -415,6 +416,34 @@ func largeJsonDiffTests(t *testing.T) []jsonDiffTest {
from: largeObject,
to: insert(emptyDocument, "$.level6", insert(emptyDocument, "$.level4", lookup(largeObject, "$.level6.level4"))),
},
{
// This is a regression test.
//
// If:
// - A chunk begins with an object "A"
// - If a value "A.b" within this object was modified
// - The previous chunk was also modified
// Then the differ would incorrectly report that the entire "A" object had been modified, instead of the sub-value "A.b"
// The values in this test case are specifically chosen to meet these conditions,
// as there is a chunk boundary immediately before "$.level5.level3.level1"
name: "correctly diff object that begins on chunk boundary",
from: largeObject,
to: set(set(largeObject, "$.level5.level2.number", 2), "$.level5.level3.level1.number", 2),
expectedDiffs: []JsonDiff{
{
Key: makeJsonPathKey(`level5`, `level2`, `number`),
From: types.JSONDocument{Val: 1},
To: types.JSONDocument{Val: 2},
Type: ModifiedDiff,
},
{
Key: makeJsonPathKey(`level5`, `level3`, `level1`, `number`),
From: types.JSONDocument{Val: 1},
To: types.JSONDocument{Val: 2},
Type: ModifiedDiff,
},
},
},
}
}

Expand Down Expand Up @@ -489,7 +518,10 @@ func runTest(t *testing.T, test jsonDiffTest) {
return cmp == 0
}
if test.expectedDiffs != nil {
require.Equal(t, len(test.expectedDiffs), len(actualDiffs))

if !assert.Equal(t, len(test.expectedDiffs), len(actualDiffs)) {
require.Fail(t, "Diffs don't match", "Expected: %v\nActual: %v", test.expectedDiffs, actualDiffs)
}
for i, expected := range test.expectedDiffs {
actual := actualDiffs[i]
require.True(t, diffsEqual(expected, actual), fmt.Sprintf("Expected: %v\nActual: %v", expected, actual))
Expand Down

0 comments on commit 1785ef9

Please sign in to comment.