From 052e742d24060ec5d93125332cd0920d6792d9be Mon Sep 17 00:00:00 2001 From: Fan Yang Date: Tue, 7 Jan 2025 17:13:32 +0800 Subject: [PATCH] fix: support enum columns for mysqlsh copy-instance utility (#350) * test: add enum column to mysqlsh copy-instance test * test: add UUID column * test: uuid and enum * fix: convert enum ordinal to string for duckdb insert --- .github/workflows/mysql-copy-tests.yml | 36 ++++++++++++++++++++------ catalog/inserter.go | 21 ++++++++++++++- 2 files changed, 48 insertions(+), 9 deletions(-) diff --git a/.github/workflows/mysql-copy-tests.yml b/.github/workflows/mysql-copy-tests.yml index 233292e..da0a62e 100644 --- a/.github/workflows/mysql-copy-tests.yml +++ b/.github/workflows/mysql-copy-tests.yml @@ -46,17 +46,22 @@ jobs: - name: Setup test data in source MySQL run: | - mysqlsh -hlocalhost -P13306 -uroot -proot --sql -e " + mysqlsh -hlocalhost -P13306 -uroot -proot --sql <<'EOF' CREATE DATABASE testdb; USE testdb; + -- Normal table, which should be copied to MyDuck via duckdb's csv import CREATE TABLE users ( id INT AUTO_INCREMENT PRIMARY KEY, - name VARCHAR(100) + name VARCHAR(100), + status ENUM('active', 'inactive', 'pending') DEFAULT 'pending' ); - INSERT INTO users (name) VALUES ('test1'), ('test2'), ('test3'); + INSERT INTO users (name, status) VALUES + ('test1', 'active'), + ('test2', 'inactive'), + ('test3', 'pending'); -- Make a gap in the id sequence - INSERT INTO users VALUES (100, 'test100'); - INSERT INTO users (name) VALUES ('test101'); + INSERT INTO users VALUES (100, 'test100', 'active'); + INSERT INTO users (name, status) VALUES ('test101', 'inactive'); -- A table with non-default starting auto_increment value CREATE TABLE items ( @@ -66,7 +71,22 @@ jobs: ) AUTO_INCREMENT=1000; INSERT INTO items (v, name) VALUES (1, 'item1'), (2, 'item2'), (3, 'item3'); - " + + -- Table with UUID primary key + -- For such tables, MySQL Shell generates nontrivial LOAD DATA statements + -- to copy the data to MyDuck: LOAD DATA ... (@id, title, created_at) SET id = FROM_BASE64(@id), + -- which can only be executed by the go-mysql-server framework for now. + CREATE TABLE documents ( + id BINARY(16) PRIMARY KEY, + title VARCHAR(200), + status ENUM('draft', 'published', 'archived') DEFAULT 'draft', + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ); + + INSERT INTO documents (id, title, status) VALUES + (UUID_TO_BIN(UUID()), 'Document 1', 'published'), + (UUID_TO_BIN(UUID()), 'Document 2', 'draft'); + EOF - name: Build and start MyDuck Server run: | @@ -85,7 +105,7 @@ jobs: --users false --ignore-version true # Verify the data was copied - for table in users items; do + for table in users items documents; do mysqlsh -hlocalhost -P13306 -uroot -proot --sql -e " SELECT * FROM testdb.$table ORDER BY id; " | tee source_data_$table.tsv @@ -96,4 +116,4 @@ jobs: diff source_data_$table.tsv copied_data_$table.tsv done - + diff --git a/catalog/inserter.go b/catalog/inserter.go index 46c4947..b92a20f 100644 --- a/catalog/inserter.go +++ b/catalog/inserter.go @@ -24,6 +24,7 @@ type rowInserter struct { stmt *stdsql.Stmt err error flushSQL string + enums []int } var _ sql.RowInserter = &rowInserter{} @@ -75,9 +76,15 @@ func (ri *rowInserter) init(ctx *sql.Context) { } insert.WriteString(" INTO ") insert.WriteString(ConnectIdentifiersANSI(ri.db, ri.table)) - insert.WriteString(" SELECT * FROM ") + insert.WriteString(" FROM ") insert.WriteString(QuoteIdentifierANSI(ri.tmpTable)) ri.flushSQL = insert.String() + + for i, col := range ri.schema { + if _, ok := col.Type.(sql.EnumType); ok { + ri.enums = append(ri.enums, i) + } + } } func (ri *rowInserter) StatementBegin(ctx *sql.Context) { @@ -106,6 +113,18 @@ func (ri *rowInserter) Insert(ctx *sql.Context, row sql.Row) error { if ri.err != nil { return ri.err } + + // For enum columns, we have to convert the enum ordinal to the enum string. + for _, i := range ri.enums { + if idx, ok := row[i].(uint16); ok { + if s, ok := ri.schema[i].Type.(sql.EnumType).At(int(idx)); ok { + row[i] = s + } else { + return fmt.Errorf("invalid enum value %d for column %s", idx, ri.schema[i].Name) + } + } + } + if _, err := ri.stmt.ExecContext(ctx, row...); err != nil { ri.err = err return err