From 48cb9ab7c7aea1953b086fb68fff6e2b5dd2f0c1 Mon Sep 17 00:00:00 2001 From: Tim Vaillancourt Date: Thu, 15 Aug 2024 20:29:52 +0200 Subject: [PATCH] Refine `wait_timeout` override to be at cut-over only (#1406) Signed-off-by: Tim Vaillancourt --- doc/command-line-flags.md | 4 ---- go/base/context.go | 1 + go/cmd/gh-ost/main.go | 1 - go/logic/applier.go | 40 ++++++++++++++++++++++++++++++++----- go/mysql/connection.go | 5 ----- go/mysql/connection_test.go | 4 +--- 6 files changed, 37 insertions(+), 18 deletions(-) diff --git a/doc/command-line-flags.md b/doc/command-line-flags.md index dd8f1cd74..cf2b2ca95 100644 --- a/doc/command-line-flags.md +++ b/doc/command-line-flags.md @@ -202,10 +202,6 @@ List of metrics and threshold values; topping the threshold of any will cause th Typically `gh-ost` is used to migrate tables on a master. If you wish to only perform the migration in full on a replica, connect `gh-ost` to said replica and pass `--migrate-on-replica`. `gh-ost` will briefly connect to the master but otherwise will make no changes on the master. Migration will be fully executed on the replica, while making sure to maintain a small replication lag. -### mysql-wait-timeout - -If set to a value greater than zero, causes `gh-ost` to set a provided [MySQL `wait_timeout`](https://dev.mysql.com/doc/refman/8.0/en/server-system-variables.html#sysvar_wait_timeout) for MySQL sessions opened by `gh-ost`, specified in seconds. - ### postpone-cut-over-flag-file Indicate a file name, such that the final [cut-over](cut-over.md) step does not take place as long as the file exists. diff --git a/go/base/context.go b/go/base/context.go index 41baae696..59227ea2d 100644 --- a/go/base/context.go +++ b/go/base/context.go @@ -164,6 +164,7 @@ type MigrationContext struct { Hostname string AssumeMasterHostname string ApplierTimeZone string + ApplierWaitTimeout int64 TableEngine string RowsEstimate int64 RowsDeltaEstimate int64 diff --git a/go/cmd/gh-ost/main.go b/go/cmd/gh-ost/main.go index 39c815bc8..3e6057995 100644 --- a/go/cmd/gh-ost/main.go +++ b/go/cmd/gh-ost/main.go @@ -51,7 +51,6 @@ func main() { flag.StringVar(&migrationContext.AssumeMasterHostname, "assume-master-host", "", "(optional) explicitly tell gh-ost the identity of the master. Format: some.host.com[:port] This is useful in master-master setups where you wish to pick an explicit master, or in a tungsten-replicator where gh-ost is unable to determine the master") flag.IntVar(&migrationContext.InspectorConnectionConfig.Key.Port, "port", 3306, "MySQL port (preferably a replica, not the master)") flag.Float64Var(&migrationContext.InspectorConnectionConfig.Timeout, "mysql-timeout", 0.0, "Connect, read and write timeout for MySQL") - flag.Float64Var(&migrationContext.InspectorConnectionConfig.WaitTimeout, "mysql-wait-timeout", 0.0, "wait_timeout for MySQL sessions") flag.StringVar(&migrationContext.CliUser, "user", "", "MySQL user") flag.StringVar(&migrationContext.CliPassword, "password", "", "MySQL password") flag.StringVar(&migrationContext.CliMasterUser, "master-user", "", "MySQL user on master, if different from that on replica. Requires --assume-master-host") diff --git a/go/logic/applier.go b/go/logic/applier.go index 9b190919f..990fbe720 100644 --- a/go/logic/applier.go +++ b/go/logic/applier.go @@ -89,7 +89,7 @@ func (this *Applier) InitDBConnections() (err error) { return err } this.migrationContext.ApplierMySQLVersion = version - if err := this.validateAndReadTimeZone(); err != nil { + if err := this.validateAndReadGlobalVariables(); err != nil { return err } if !this.migrationContext.AliyunRDS && !this.migrationContext.GoogleCloudPlatform && !this.migrationContext.AzureMySQL { @@ -106,10 +106,13 @@ func (this *Applier) InitDBConnections() (err error) { return nil } -// validateAndReadTimeZone potentially reads server time-zone -func (this *Applier) validateAndReadTimeZone() error { - query := `select /* gh-ost */ @@global.time_zone` - if err := this.db.QueryRow(query).Scan(&this.migrationContext.ApplierTimeZone); err != nil { +// validateAndReadGlobalVariables potentially reads server global variables, such as the time_zone and wait_timeout. +func (this *Applier) validateAndReadGlobalVariables() error { + query := `select /* gh-ost */ @@global.time_zone, @@global.wait_timeout` + if err := this.db.QueryRow(query).Scan( + &this.migrationContext.ApplierTimeZone, + &this.migrationContext.ApplierWaitTimeout, + ); err != nil { return err } @@ -934,6 +937,27 @@ func (this *Applier) CreateAtomicCutOverSentryTable() error { return nil } +// InitAtomicCutOverWaitTimeout sets the cut-over session wait_timeout in order to reduce the +// time an unresponsive (but still connected) gh-ost process can hold the cut-over lock. +func (this *Applier) InitAtomicCutOverWaitTimeout(tx *gosql.Tx) error { + cutOverWaitTimeoutSeconds := this.migrationContext.CutOverLockTimeoutSeconds * 3 + this.migrationContext.Log.Infof("Setting cut-over idle timeout as %d seconds", cutOverWaitTimeoutSeconds) + query := fmt.Sprintf(`set /* gh-ost */ session wait_timeout:=%d`, cutOverWaitTimeoutSeconds) + _, err := tx.Exec(query) + return err +} + +// RevertAtomicCutOverWaitTimeout restores the original wait_timeout for the applier session post-cut-over. +func (this *Applier) RevertAtomicCutOverWaitTimeout() { + this.migrationContext.Log.Infof("Reverting cut-over idle timeout to %d seconds", this.migrationContext.ApplierWaitTimeout) + query := fmt.Sprintf(`set /* gh-ost */ session wait_timeout:=%d`, this.migrationContext.ApplierWaitTimeout) + if _, err := sqlutils.ExecNoPrepare(this.db, query); err != nil { + this.migrationContext.Log.Errorf("Failed to restore applier wait_timeout to %d seconds: %v", + this.migrationContext.ApplierWaitTimeout, err, + ) + } +} + // AtomicCutOverMagicLock func (this *Applier) AtomicCutOverMagicLock(sessionIdChan chan int64, tableLocked chan<- error, okToUnlockTable <-chan bool, tableUnlocked chan<- error) error { tx, err := this.db.Begin() @@ -979,6 +1003,12 @@ func (this *Applier) AtomicCutOverMagicLock(sessionIdChan chan int64, tableLocke return err } + if err := this.InitAtomicCutOverWaitTimeout(tx); err != nil { + tableLocked <- err + return err + } + defer this.RevertAtomicCutOverWaitTimeout() + query = fmt.Sprintf(`lock /* gh-ost */ tables %s.%s write, %s.%s write`, sql.EscapeName(this.migrationContext.DatabaseName), sql.EscapeName(this.migrationContext.OriginalTableName), diff --git a/go/mysql/connection.go b/go/mysql/connection.go index 7db51afe4..33bde2b62 100644 --- a/go/mysql/connection.go +++ b/go/mysql/connection.go @@ -31,7 +31,6 @@ type ConnectionConfig struct { Timeout float64 TransactionIsolation string Charset string - WaitTimeout float64 } func NewConnectionConfig() *ConnectionConfig { @@ -52,7 +51,6 @@ func (this *ConnectionConfig) DuplicateCredentials(key InstanceKey) *ConnectionC Timeout: this.Timeout, TransactionIsolation: this.TransactionIsolation, Charset: this.Charset, - WaitTimeout: this.WaitTimeout, } config.ImpliedKey = &config.Key return config @@ -141,9 +139,6 @@ func (this *ConnectionConfig) GetDBUri(databaseName string) string { fmt.Sprintf("readTimeout=%fs", this.Timeout), fmt.Sprintf("writeTimeout=%fs", this.Timeout), } - if this.WaitTimeout > 0 { - connectionParams = append(connectionParams, fmt.Sprintf("wait_timeout=%fs", this.WaitTimeout)) - } return fmt.Sprintf("%s:%s@tcp(%s:%d)/%s?%s", this.User, this.Password, hostname, this.Key.Port, databaseName, strings.Join(connectionParams, "&")) } diff --git a/go/mysql/connection_test.go b/go/mysql/connection_test.go index 6398e0169..cc2271f1d 100644 --- a/go/mysql/connection_test.go +++ b/go/mysql/connection_test.go @@ -82,7 +82,6 @@ func TestGetDBUri(t *testing.T) { c.User = "gromit" c.Password = "penguin" c.Timeout = 1.2345 - c.WaitTimeout = 0 // should be ignored c.TransactionIsolation = transactionIsolation c.Charset = "utf8mb4,utf8,latin1" @@ -96,11 +95,10 @@ func TestGetDBUriWithTLSSetup(t *testing.T) { c.User = "gromit" c.Password = "penguin" c.Timeout = 1.2345 - c.WaitTimeout = 60 c.tlsConfig = &tls.Config{} c.TransactionIsolation = transactionIsolation c.Charset = "utf8mb4_general_ci,utf8_general_ci,latin1" uri := c.GetDBUri("test") - test.S(t).ExpectEquals(uri, `gromit:penguin@tcp(myhost:3306)/test?autocommit=true&interpolateParams=true&charset=utf8mb4_general_ci,utf8_general_ci,latin1&tls=ghost&transaction_isolation="REPEATABLE-READ"&timeout=1.234500s&readTimeout=1.234500s&writeTimeout=1.234500s&wait_timeout=60.000000s`) + test.S(t).ExpectEquals(uri, `gromit:penguin@tcp(myhost:3306)/test?autocommit=true&interpolateParams=true&charset=utf8mb4_general_ci,utf8_general_ci,latin1&tls=ghost&transaction_isolation="REPEATABLE-READ"&timeout=1.234500s&readTimeout=1.234500s&writeTimeout=1.234500s`) }