Skip to content
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 61 additions & 25 deletions pkg/migration/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,17 @@ var (
)

type Runner struct {
migration *Migration
db *sql.DB
dbConfig *dbconn.DBConfig
replicas []*sql.DB
migration *Migration
db *sql.DB
dbConfig *dbconn.DBConfig
replicas []*sql.DB
// monitorDB is a small dedicated connection pool used by the Aurora
// throttlers to poll perf-schema / global-status. Sharing the main
// r.db pool let throttler polls queue behind chunk writes, which
// delayed the very signal we wanted to react to (and counted the
// throttler's own SELECT as an active query thread). nil unless Aurora
// throttling is enabled.
monitorDB *sql.DB
checkpointTable *table.TableInfo

// Changes enccapsulates all changes
Expand Down Expand Up @@ -670,6 +677,9 @@ func (r *Runner) closeReplicas() error {
// - one replication throttler per --replica-dsn (slowest wins)
// - a commit-latency throttler if the source is detected as Aurora and
// --max-commit-latency is positive (issue #468)
// - an active-threads throttler if the source is detected as Aurora and
// the migration user can read the relevant perf-schema tables (issue
// #831)
Comment thread
morgo marked this conversation as resolved.
//
// Multiple replica DSNs can be specified as a comma-separated list.
// This is common logic shared between resume and new migration paths.
Expand All @@ -691,26 +701,38 @@ func (r *Runner) setupThrottler(ctx context.Context) error {
throttlers = append(throttlers, replicaThrottlers...)
}

if r.migration.MaxCommitLatency > 0 {
isAurora, err := throttler.IsAurora(ctx, r.db)
if err != nil {
// Probe failure (e.g., performance_schema disabled, no privileges)
// is non-fatal — Aurora-only feature on a non-Aurora server, or
// a perf-schema-locked Aurora user. Log at Debug so operators can
// diagnose if they expected throttling, without alerting users
// who don't care.
r.logger.Debug("Aurora probe failed, skipping commit-latency throttler", "error", err)
} else if isAurora {
cl, err := throttler.NewCommitLatencyThrottler(r.db, r.migration.MaxCommitLatency, r.logger)
if err != nil {
_ = r.closeReplicas()
return fmt.Errorf("could not create commit-latency throttler: %w", err)
}
r.logger.Info("Aurora detected, enabling commit-latency throttler",
"threshold", r.migration.MaxCommitLatency)
throttlers = append(throttlers, cl)
}
// Aurora throttlers — assembled by the shared throttler.AuroraSetup
// helper so the move runner can use the same wiring. The two Aurora
// throttlers (commit-latency, active-threads) have independent gates:
// setting MaxCommitLatency=0 disables only commit-latency, leaving
// active-threads enabled when Aurora is detected and the user has
// SELECT on performance_schema.threads + events_waits_current. Build
// returns a zero result on non-Aurora sources so this call is safe
// to make unconditionally.
//
// OpenMonitor is invoked lazily by the helper only after IsAurora
// returns true AND at least one throttler will be built, so non-
// Aurora users never pay the connect cost. MaxOpenConnections=2 lets
// both Aurora throttlers poll concurrently without serializing on a
// single conn, with a touch of headroom.
auroraRes, err := throttler.AuroraSetup{
Source: r.db,
OpenMonitor: func() (*sql.DB, error) {
monitorCfg := *r.dbConfig // shallow copy — MaxOpenConnections is value-typed
monitorCfg.MaxOpenConnections = 2
return dbconn.NewWithConnectionType(r.dsn(), &monitorCfg, "monitor database")
},
CommitLatencyThreshold: r.migration.MaxCommitLatency,
Logger: r.logger,
}.Build(ctx)
if err != nil {
_ = r.closeReplicas()
return err
}
if auroraRes.MonitorDB != nil {
r.monitorDB = auroraRes.MonitorDB
}
throttlers = append(throttlers, auroraRes.Throttlers...)

if len(throttlers) == 0 {
return nil // use default Noop throttler
Expand All @@ -721,8 +743,13 @@ func (r *Runner) setupThrottler(ctx context.Context) error {
if err := r.throttler.Open(ctx); err != nil {
// multiThrottler already closes child throttlers on partial Open
// failure, but the *sql.DB connections backing replica throttlers
// are owned by r.replicas — clean those up too rather than leaving
// them dangling until Runner.Close() runs.
// are owned by r.replicas (and the Aurora monitor pool is owned
// by r.monitorDB) — clean those up too rather than leaving them
// dangling until Runner.Close() runs.
if r.monitorDB != nil {
_ = r.monitorDB.Close()
r.monitorDB = nil
}
_ = r.closeReplicas()
return fmt.Errorf("opening throttlers: %w", err)
}
Expand Down Expand Up @@ -1024,6 +1051,15 @@ func (r *Runner) Close() error {
errs = append(errs, err)
}
}
// Close the Aurora monitor pool after the throttler so its background
// pollers observe Close() / ctx cancellation before we yank the pool
// out from under them. No-op when not Aurora.
if r.monitorDB != nil {
if err := r.monitorDB.Close(); err != nil {
errs = append(errs, err)
}
r.monitorDB = nil
}
if err := r.closeReplicas(); err != nil {
errs = append(errs, err)
}
Expand Down
134 changes: 134 additions & 0 deletions pkg/throttler/aurora.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
package throttler

import (
"context"
"database/sql"
"fmt"
"log/slog"
"time"
)

// AuroraSetup orchestrates probing for Aurora and assembling the Aurora-
// specific throttlers (commit-latency + active-threads). It exists so both
// the migration runner and the move runner can wire up the same throttlers
// without duplicating the IsAurora / monitor-pool / probe / construct dance.
//
// The throttler package intentionally does not import dbconn — opening the
// monitor pool happens via the caller-supplied OpenMonitor closure, which
// lets the caller own DSN, TLS, and pool sizing.
//
// The two Aurora throttlers are independent signals and have independent
// gates. Disabling one does not disable the other — see Build for details.
type AuroraSetup struct {
// Source is the caller's main *sql.DB. Used only for the one-shot
// IsAurora and CanReadActiveThreads probes — these are cheap and run
// once at setup, so making them share the main pool is fine.
Source *sql.DB

// OpenMonitor opens a dedicated *sql.DB used exclusively by the Aurora
// throttlers for recurring polls. Called at most once, only after
// IsAurora has returned true and at least one Aurora throttler is
// going to be constructed, so non-Aurora callers never pay the
// connect cost. The caller owns closing the returned DB — see
// AuroraResult.MonitorDB.
OpenMonitor func() (*sql.DB, error)

// CommitLatencyThreshold gates the commit-latency throttler. A non-
// positive value disables that throttler only — the active-threads
// throttler is independent and is still enabled when Aurora is
// detected and the privilege probe succeeds.
CommitLatencyThreshold time.Duration

Logger *slog.Logger
}

// AuroraResult is the output of AuroraSetup.Build. When Throttlers is empty
// MonitorDB is nil — there's no pool to close. When Throttlers is non-empty
// MonitorDB is non-nil and the caller owns its lifecycle.
type AuroraResult struct {
Throttlers []Throttler
MonitorDB *sql.DB
}

// Build probes the source for Aurora and assembles the Aurora throttlers.
//
// The two Aurora throttlers have independent gates:
// - Commit-latency is enabled when CommitLatencyThreshold > 0.
// - Active-threads is enabled when Aurora is detected and the user has
// SELECT on performance_schema.threads and events_waits_current.
//
// Returns a zero AuroraResult (nil throttlers, nil monitor DB, nil error) in
// any of these benign cases:
// - IsAurora probe failed (non-Aurora source, or perf_schema not readable —
// logged at Debug so the non-Aurora common case stays quiet)
// - IsAurora returned false
// - Aurora was detected but neither gate produced a throttler (commit-
// latency disabled by threshold AND active-threads denied by privilege)
//
// Returns a non-nil error only when something the caller almost certainly
// wants to surface goes wrong: OpenMonitor itself fails, or constructing a
// throttler fails for a reason other than "missing privileges" (which is
// expected and downgraded to Info — see CanReadActiveThreads handling).
//
// On a successful Aurora build, the monitor pool is opened and the enabled
// throttlers are returned. The caller composes them via NewMultiThrottler
// with whatever other throttlers it has and is responsible for calling
// Close on each throttler AND Close on MonitorDB at shutdown.
func (s AuroraSetup) Build(ctx context.Context) (AuroraResult, error) {
Comment thread
morgo marked this conversation as resolved.
isAurora, err := IsAurora(ctx, s.Source)
switch {
case err != nil:
// Non-Aurora MySQL with locked-down perf_schema lands here too;
// keep it at Debug so the common case isn't noisy.
s.Logger.Debug("Aurora probe failed, skipping Aurora throttlers", "error", err)
return AuroraResult{}, nil
case !isAurora:
return AuroraResult{}, nil
}

// Decide independently which throttlers will be built before opening
// the monitor pool, so we don't open a pool we'd immediately discard.
enableCommitLatency := s.CommitLatencyThreshold > 0
enableActiveThreads := true
if ok, probeErr := CanReadActiveThreads(ctx, s.Source); probeErr != nil {
// Surface at Info because Aurora is confirmed and the operator
// likely expected this throttler to be enabled.
s.Logger.Info("Aurora active-threads throttler disabled: grant SELECT on performance_schema.threads and performance_schema.events_waits_current to enable",
"error", probeErr)
enableActiveThreads = false
Comment thread
morgo marked this conversation as resolved.
Outdated
} else if !ok {
enableActiveThreads = false
}
if !enableCommitLatency && !enableActiveThreads {
return AuroraResult{}, nil
}

monitorDB, err := s.OpenMonitor()
if err != nil {
return AuroraResult{}, fmt.Errorf("could not open monitor DB for Aurora throttlers: %w", err)
}

var throttlers []Throttler

if enableCommitLatency {
cl, err := NewCommitLatencyThrottler(monitorDB, s.CommitLatencyThreshold, s.Logger)
if err != nil {
_ = monitorDB.Close()
return AuroraResult{}, fmt.Errorf("could not create commit-latency throttler: %w", err)
}
s.Logger.Info("Aurora detected, enabling commit-latency throttler",
"threshold", s.CommitLatencyThreshold)
throttlers = append(throttlers, cl)
}

if enableActiveThreads {
at, err := NewActiveThreadsThrottler(monitorDB, s.Logger)
if err != nil {
_ = monitorDB.Close()
return AuroraResult{}, fmt.Errorf("could not create active-threads throttler: %w", err)
}
throttlers = append(throttlers, at)
}

return AuroraResult{Throttlers: throttlers, MonitorDB: monitorDB}, nil
}
Loading
Loading