Skip to content

Commit

Permalink
Merge pull request #8631 from dolthub/macneale4/alter-pk-diff
Browse files Browse the repository at this point in the history
Give a little more information in dolt_diff_* when there is a pk change
  • Loading branch information
macneale4 authored Dec 17, 2024
2 parents ae30696 + ac9cad6 commit 9f07a4a
Show file tree
Hide file tree
Showing 5 changed files with 120 additions and 43 deletions.
2 changes: 1 addition & 1 deletion go/libraries/doltcore/sqle/dtables/commit_diff_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ func (dt *CommitDiffTable) LookupPartitions(ctx *sql.Context, i sql.IndexLookup)
fromSch: dt.targetSchema,
}

isDiffable, err := dp.isDiffablePartition(ctx)
isDiffable, _, err := dp.isDiffablePartition(ctx)
if err != nil {
return nil, err
}
Expand Down
39 changes: 30 additions & 9 deletions go/libraries/doltcore/sqle/dtables/diff_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -684,31 +684,42 @@ func (dp DiffPartition) GetRowIter(ctx *sql.Context, ddb *doltdb.DoltDB, joiner

// isDiffablePartition checks if the commit pair for this partition is "diffable".
// If the primary key sets changed between the two commits, it may not be
// possible to diff them.
func (dp *DiffPartition) isDiffablePartition(ctx *sql.Context) (bool, error) {
// possible to diff them. We return two bools: simpleDiff is returned if the primary key sets are close enough that we
// can confidently merge the diff (using schema.ArePrimaryKeySetsDiffable). fuzzyDiff is returned if the primary key
// sets are not close enough to merge the diff, but we can still make an approximate comparison (using schema.MapSchemaBasedOnTagAndName).
func (dp *DiffPartition) isDiffablePartition(ctx *sql.Context) (simpleDiff bool, fuzzyDiff bool, err error) {
// dp.to is nil when a table has been deleted previously. In this case, we return
// false, to stop processing diffs, since that previously deleted table is considered
// a logically different table and we don't want to mix the diffs together.
if dp.to == nil {
return false, nil
return false, false, nil
}

// dp.from is nil when the to commit created a new table
if dp.from == nil {
return true, nil
return true, false, nil
}

fromSch, err := dp.from.GetSchema(ctx)
if err != nil {
return false, err
return false, false, err
}

toSch, err := dp.to.GetSchema(ctx)
if err != nil {
return false, err
return false, false, err
}

easyDiff := schema.ArePrimaryKeySetsDiffable(dp.from.Format(), fromSch, toSch)
if easyDiff {
return true, false, nil
}

return schema.ArePrimaryKeySetsDiffable(dp.from.Format(), fromSch, toSch), nil
_, _, err = schema.MapSchemaBasedOnTagAndName(fromSch, toSch)
if err == nil {
return false, true, nil
}
return false, false, nil
}

type partitionSelectFunc func(*sql.Context, DiffPartition) (bool, error)
Expand Down Expand Up @@ -762,6 +773,7 @@ type DiffPartitions struct {
selectFunc partitionSelectFunc
toSch schema.Schema
fromSch schema.Schema
stopNext bool
}

// processCommit is called in a commit iteration loop. Adds partitions when it finds a commit and its parent that have
Expand Down Expand Up @@ -821,6 +833,10 @@ func (dps *DiffPartitions) processCommit(ctx *sql.Context, cmHash hash.Hash, cm
}

func (dps *DiffPartitions) Next(ctx *sql.Context) (sql.Partition, error) {
if dps.stopNext {
return nil, io.EOF
}

for {
cmHash, optCmt, err := dps.cmItr.Next(ctx)
if err != nil {
Expand Down Expand Up @@ -852,16 +868,21 @@ func (dps *DiffPartitions) Next(ctx *sql.Context) (sql.Partition, error) {

if next != nil {
// If we can't diff this commit with its parent, don't traverse any lower
canDiff, err := next.isDiffablePartition(ctx)
simpleDiff, fuzzyDiff, err := next.isDiffablePartition(ctx)
if err != nil {
return nil, err
}

if !canDiff {
if !simpleDiff && !fuzzyDiff {
ctx.Warn(PrimaryKeyChangeWarningCode, fmt.Sprintf(PrimaryKeyChangeWarning, next.fromName, next.toName))
return nil, io.EOF
}

if !simpleDiff && fuzzyDiff {
ctx.Warn(PrimaryKeyChangeWarningCode, fmt.Sprintf(PrimaryKeyChangeWarning, next.fromName, next.toName))
dps.stopNext = true
}

return *next, nil
}
}
Expand Down
10 changes: 7 additions & 3 deletions go/libraries/doltcore/sqle/dtables/prolly_row_conv.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,14 @@ func (c ProllyRowConverter) putFields(ctx context.Context, tup val.Tuple, proj v
virtualOffset := 0
for i, j := range proj {
if j == -1 {
// Skip over virtual columns in non-pk cols as they are not stored
if !isPk && c.inSchema.GetNonPKCols().GetByIndex(i).Virtual {
virtualOffset++
nonPkCols := c.inSchema.GetNonPKCols()
if len(nonPkCols.GetColumns()) > i {
// Skip over virtual columns in non-pk cols as they are not stored
if !isPk && nonPkCols.GetByIndex(i).Virtual {
virtualOffset++
}
}

continue
}

Expand Down
39 changes: 36 additions & 3 deletions go/libraries/doltcore/sqle/enginetest/dolt_queries_diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -528,14 +528,44 @@ var DiffSystemTableScriptTests = []queries.ScriptTest{
},
{
Query: "SELECT COUNT(*) FROM DOLT_DIFF_t;",
Expected: []sql.Row{{1}},
Expected: []sql.Row{{7}},
},
{
Query: "SELECT to_pk, to_c1, from_pk, from_c1, diff_type FROM DOLT_DIFF_t where to_commit=@Commit4;",
Expected: []sql.Row{{7, 8, nil, nil, "added"}},
},
},
},
{
// Similar to previous test, but with one row to avoid ordering issues.
Name: "altered keyless table add pk", // https://github.com/dolthub/dolt/issues/8625
SetUpScript: []string{
"create table tbl (i int, j int);",
"insert into tbl values (42, 23);",
"call dolt_commit('-Am', 'commit1');",
"alter table tbl add primary key(i);",
"call dolt_commit('-am', 'commit2');",
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "SELECT to_i,to_j,from_i,from_j,diff_type FROM dolt_diff_tbl;",
// Output in the situation is admittedly wonky. Updating the PK leaves in a place where we can't really render
// the diff, but we want to show something. In this case, the 'pk' column tag changes, so in the last two rows
// of the output you see we add "nil,23" and remove "nil,23" when in fact those columns were "42" with a different
// tag.
//
// In the past we just returned an empty set in this case. The
// warning is kind of essential to understand what is happening.
Expected: []sql.Row{
{42, 23, nil, nil, "added"},
{nil, nil, nil, 23, "removed"},
},
ExpectedWarningsCount: 1,
ExpectedWarning: 1105,
ExpectedWarningMessageSubstring: "due to primary key set change",
},
},
},
{
Name: "table with commit column should maintain its data in diff",
SetUpScript: []string{
Expand Down Expand Up @@ -713,8 +743,10 @@ var Dolt1DiffSystemTableScripts = []queries.ScriptTest{
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "SELECT to_pk1, to_pk2, from_pk1, from_pk2, diff_type from dolt_diff_t;",
Expected: []sql.Row{{"2", "2", nil, nil, "added"}},
Query: "SELECT to_pk1, to_pk2, from_pk1, from_pk2, diff_type from dolt_diff_t;",
Expected: []sql.Row{
{"2", "2", nil, nil, "added"},
},
},
},
},
Expand Down Expand Up @@ -5298,6 +5330,7 @@ var CommitDiffSystemTableScriptTests = []queries.ScriptTest{
},
},
},

{
Name: "added and dropped table",
SetUpScript: []string{
Expand Down
73 changes: 46 additions & 27 deletions integration-tests/bats/sql-diff.bats
Original file line number Diff line number Diff line change
Expand Up @@ -591,7 +591,10 @@ SQL
done
}

@test "sql-diff: supports multiple primary keys" {
run_2pk5col_ints() {
local query_name=$1

# Initial setup
dolt checkout -b firstbranch
dolt sql <<SQL
CREATE TABLE test (
Expand All @@ -605,40 +608,56 @@ CREATE TABLE test (
PRIMARY KEY (pk1,pk2)
);
SQL
dolt table import -u test `batshelper 2pk5col-ints.csv`
dolt table import -u test $(batshelper 2pk5col-ints.csv)
dolt add .
dolt commit -m "create/init table test"

# for each query file in helper/queries/2pk5col-ints/
# run query on db, create sql diff patch, confirm they're equivalent
dolt branch newbranch
for query in delete add update single_pk_update all_pk_update create_table
do
dolt checkout newbranch
dolt sql < $BATS_TEST_DIRNAME/helper/queries/2pk5col-ints/$query.sql
dolt add .
dolt diff -r sql
dolt commit -m "applied $query query "

# confirm a difference exists
# Apply the query
dolt checkout newbranch
dolt sql < "$BATS_TEST_DIRNAME/helper/queries/2pk5col-ints/${query_name}.sql"
dolt add .
dolt diff -r sql
dolt commit -m "applied ${query_name} query"

run dolt diff -r sql firstbranch newbranch
[ "$status" -eq 0 ]
[ ! "$output" = "" ]
# Confirm a difference exists
run dolt diff -r sql firstbranch newbranch
[ "$status" -eq 0 ]
[ ! "$output" = "" ]

dolt diff -r sql firstbranch > patch.sql newbranch
dolt checkout firstbranch
dolt sql < patch.sql
rm patch.sql
dolt add .
dolt commit -m "Reconciled with newbranch"
# Generate patch, apply on firstbranch, and verify no differences
dolt diff -r sql firstbranch > patch.sql newbranch
dolt checkout firstbranch
dolt sql < patch.sql
rm patch.sql
dolt add .
dolt commit -m "Reconciled with newbranch"

# confirm that both branches have the same content
run dolt diff -r sql firstbranch newbranch
[ "$status" -eq 0 ]
[ "$output" = "" ]
done
# Confirm branches are identical
run dolt diff -r sql firstbranch newbranch
[ "$status" -eq 0 ]
[ "$output" = "" ]
}

@test "sql-diff: supports multiple primary keys (delete)" {
run_2pk5col_ints "delete"
}
@test "sql-diff: supports multiple primary keys (add)" {
run_2pk5col_ints "add"
}
@test "sql-diff: supports multiple primary keys (update)" {
run_2pk5col_ints "update"
}
@test "sql-diff: supports multiple primary keys (single_pk_update)" {
run_2pk5col_ints "single_pk_update"
}
@test "sql-diff: supports multiple primary keys (all_pk_update)" {
run_2pk5col_ints "all_pk_update"
}
@test "sql-diff: supports multiple primary keys (create_table)" {
run_2pk5col_ints "create_table"
}


@test "sql-diff: escapes values for MySQL string literals" {
# https://dev.mysql.com/doc/refman/8.0/en/string-literals.html
Expand Down

0 comments on commit 9f07a4a

Please sign in to comment.