From 2a301a340fca44132b1b03431de52f94539cf5df Mon Sep 17 00:00:00 2001
From: Nedyalko Dyakov <1547186+ndyakov@users.noreply.github.com>
Date: Mon, 1 Dec 2025 11:08:26 +0200
Subject: [PATCH 1/2] [maintnotif] Cluster specific handlers (#3613)

* maint notification handlers for cluster messages

* unrelax all conns

* trigger ci on feature branches
---
 .github/workflows/build.yml                   |   2 +-
 .../maintnotifications/logs/log_messages.go   |  45 ++++++
 maintnotifications/README.md                  |  10 +-
 maintnotifications/manager.go                 |  52 ++++++-
 maintnotifications/manager_test.go            |   2 +
 maintnotifications/pool_hook_test.go          |  20 ++-
 .../push_notification_handler.go              | 132 ++++++++++++++++--
 7 files changed, 244 insertions(+), 19 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index b3fc813611..14e01860ef 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -4,7 +4,7 @@ on:
   push:
     branches: [master, v9, 'v9.*']
   pull_request:
-    branches: [master, v9, v9.7, v9.8, 'ndyakov/*', 'ofekshenawa/*', 'htemelski-redis/*', 'ce/*']
+    branches: [master, v9, v9.7, v9.8, 'ndyakov/**', 'ofekshenawa/**', 'ce/**']
 
 permissions:
   contents: read
diff --git a/internal/maintnotifications/logs/log_messages.go b/internal/maintnotifications/logs/log_messages.go
index 34cb1692d9..7418d9f652 100644
--- a/internal/maintnotifications/logs/log_messages.go
+++ b/internal/maintnotifications/logs/log_messages.go
@@ -121,6 +121,11 @@ const (
 	UnrelaxedTimeoutMessage                       = "clearing relaxed timeout"
 	ManagerNotInitializedMessage                  = "manager not initialized"
 	FailedToMarkForHandoffMessage                 = "failed to mark connection for handoff"
+	InvalidSeqIDInSMigratingNotificationMessage   = "invalid SeqID in SMIGRATING notification"
+	InvalidSeqIDInSMigratedNotificationMessage    = "invalid SeqID in SMIGRATED notification"
+	InvalidHostPortInSMigratedNotificationMessage = "invalid host:port in SMIGRATED notification"
+	SlotMigratingMessage                          = "slots migrating, applying relaxed timeout"
+	SlotMigratedMessage                           = "slots migrated, triggering cluster state reload"
 
 	// ========================================
 	// used in pool/conn
@@ -623,3 +628,43 @@ func ExtractDataFromLogMessage(logMessage string) map[string]interface{} {
 	// If JSON parsing fails, return empty map
 	return result
 }
+
+// Cluster notification functions
+func InvalidSeqIDInSMigratingNotification(seqID interface{}) string {
+	message := fmt.Sprintf("%s: %v", InvalidSeqIDInSMigratingNotificationMessage, seqID)
+	return appendJSONIfDebug(message, map[string]interface{}{
+		"seqID": fmt.Sprintf("%v", seqID),
+	})
+}
+
+func InvalidSeqIDInSMigratedNotification(seqID interface{}) string {
+	message := fmt.Sprintf("%s: %v", InvalidSeqIDInSMigratedNotificationMessage, seqID)
+	return appendJSONIfDebug(message, map[string]interface{}{
+		"seqID": fmt.Sprintf("%v", seqID),
+	})
+}
+
+func InvalidHostPortInSMigratedNotification(hostPort interface{}) string {
+	message := fmt.Sprintf("%s: %v", InvalidHostPortInSMigratedNotificationMessage, hostPort)
+	return appendJSONIfDebug(message, map[string]interface{}{
+		"hostPort": fmt.Sprintf("%v", hostPort),
+	})
+}
+
+func SlotMigrating(connID uint64, seqID int64, slotRanges []string) string {
+	message := fmt.Sprintf("conn[%d] %s seqID=%d slots=%v", connID, SlotMigratingMessage, seqID, slotRanges)
+	return appendJSONIfDebug(message, map[string]interface{}{
+		"connID":     connID,
+		"seqID":      seqID,
+		"slotRanges": slotRanges,
+	})
+}
+
+func SlotMigrated(seqID int64, hostPort string, slotRanges []string) string {
+	message := fmt.Sprintf("%s seqID=%d host:port=%s slots=%v", SlotMigratedMessage, seqID, hostPort, slotRanges)
+	return appendJSONIfDebug(message, map[string]interface{}{
+		"seqID":      seqID,
+		"hostPort":   hostPort,
+		"slotRanges": slotRanges,
+	})
+}
diff --git a/maintnotifications/README.md b/maintnotifications/README.md
index 2ac6b9cb1d..c931e61f8f 100644
--- a/maintnotifications/README.md
+++ b/maintnotifications/README.md
@@ -2,8 +2,14 @@
 
 Seamless Redis connection handoffs during cluster maintenance operations without dropping connections.
 
-## ⚠️ **Important Note**
-**Maintenance notifications are currently supported only in standalone Redis clients.** Cluster clients (ClusterClient, FailoverClient, etc.) do not yet support this functionality.
+## Cluster Support
+
+**Cluster notifications are now supported for ClusterClient!**
+
+- **SMIGRATING**: `["SMIGRATING", SeqID, slot/range, ...]` - Relaxes timeouts when slots are being migrated
+- **SMIGRATED**: `["SMIGRATED", SeqID, host:port, slot/range, ...]` - Reloads cluster state when slot migration completes
+
+**Note:** Other maintenance notifications (MOVING, MIGRATING, MIGRATED, FAILING_OVER, FAILED_OVER) are supported only in standalone Redis clients. Cluster clients support SMIGRATING and SMIGRATED for cluster-specific slot migration handling.
 
 ## Quick Start
 
diff --git a/maintnotifications/manager.go b/maintnotifications/manager.go
index 775c163e14..cf35b9605a 100644
--- a/maintnotifications/manager.go
+++ b/maintnotifications/manager.go
@@ -18,11 +18,13 @@ import (
 
 // Push notification type constants for maintenance
 const (
-	NotificationMoving      = "MOVING"
-	NotificationMigrating   = "MIGRATING"
-	NotificationMigrated    = "MIGRATED"
-	NotificationFailingOver = "FAILING_OVER"
-	NotificationFailedOver  = "FAILED_OVER"
+	NotificationMoving      = "MOVING"       // Per-connection handoff notification
+	NotificationMigrating   = "MIGRATING"    // Per-connection migration start notification - relaxes timeouts
+	NotificationMigrated    = "MIGRATED"     // Per-connection migration complete notification - clears relaxed timeouts
+	NotificationFailingOver = "FAILING_OVER" // Per-connection failover start notification - relaxes timeouts
+	NotificationFailedOver  = "FAILED_OVER"  // Per-connection failover complete notification - clears relaxed timeouts
+	NotificationSMigrating  = "SMIGRATING"   // Cluster slot migrating notification - relaxes timeouts
+	NotificationSMigrated   = "SMIGRATED"    // Cluster slot migrated notification - triggers cluster state reload
 )
 
 // maintenanceNotificationTypes contains all notification types that maintenance handles
@@ -32,6 +34,8 @@ var maintenanceNotificationTypes = []string{
 	NotificationMigrated,
 	NotificationFailingOver,
 	NotificationFailedOver,
+	NotificationSMigrating,
+	NotificationSMigrated,
 }
 
 // NotificationHook is called before and after notification processing
@@ -65,6 +69,10 @@ type Manager struct {
 	// MOVING operation tracking - using sync.Map for better concurrent performance
 	activeMovingOps sync.Map // map[MovingOperationKey]*MovingOperation
 
+	// SMIGRATED notification deduplication - tracks processed SeqIDs
+	// Multiple connections may receive the same SMIGRATED notification
+	processedSMigratedSeqIDs sync.Map // map[int64]bool
+
 	// Atomic state tracking - no locks needed for state queries
 	activeOperationCount atomic.Int64 // Number of active operations
 	closed               atomic.Bool  // Manager closed state
@@ -73,6 +81,9 @@ type Manager struct {
 	hooks        []NotificationHook
 	hooksMu      sync.RWMutex // Protects hooks slice
 	poolHooksRef *PoolHook
+
+	// Cluster state reload callback for SMIGRATED notifications
+	clusterStateReloadCallback ClusterStateReloadCallback
 }
 
 // MovingOperation tracks an active MOVING operation.
@@ -83,6 +94,14 @@ type MovingOperation struct {
 	Deadline    time.Time
 }
 
+// ClusterStateReloadCallback is a callback function that triggers cluster state reload.
+// This is used by node clients to notify their parent ClusterClient about SMIGRATED notifications.
+// The hostPort parameter indicates the destination node (e.g., "127.0.0.1:6379").
+// The slotRanges parameter contains the migrated slots (e.g., ["1234", "5000-6000"]).
+// Currently, implementations typically reload the entire cluster state, but in the future
+// this could be optimized to reload only the specific slots.
+type ClusterStateReloadCallback func(ctx context.Context, hostPort string, slotRanges []string)
+
 // NewManager creates a new simplified manager.
 func NewManager(client interfaces.ClientInterface, pool pool.Pooler, config *Config) (*Manager, error) {
 	if client == nil {
@@ -223,6 +242,15 @@ func (hm *Manager) GetActiveOperationCount() int64 {
 	return hm.activeOperationCount.Load()
 }
 
+// MarkSMigratedSeqIDProcessed attempts to mark a SMIGRATED SeqID as processed.
+// Returns true if this is the first time processing this SeqID (should process),
+// false if it was already processed (should skip).
+// This prevents duplicate processing when multiple connections receive the same notification.
+func (hm *Manager) MarkSMigratedSeqIDProcessed(seqID int64) bool {
+	_, alreadyProcessed := hm.processedSMigratedSeqIDs.LoadOrStore(seqID, true)
+	return !alreadyProcessed // Return true if NOT already processed
+}
+
 // Close closes the manager.
 func (hm *Manager) Close() error {
 	// Use atomic operation for thread-safe close check
@@ -318,3 +346,17 @@ func (hm *Manager) AddNotificationHook(notificationHook NotificationHook) {
 	defer hm.hooksMu.Unlock()
 	hm.hooks = append(hm.hooks, notificationHook)
 }
+
+// SetClusterStateReloadCallback sets the callback function that will be called when a SMIGRATED notification is received.
+// This allows node clients to notify their parent ClusterClient to reload cluster state.
+func (hm *Manager) SetClusterStateReloadCallback(callback ClusterStateReloadCallback) {
+	hm.clusterStateReloadCallback = callback
+}
+
+// TriggerClusterStateReload calls the cluster state reload callback if it's set.
+// This is called when a SMIGRATED notification is received.
+func (hm *Manager) TriggerClusterStateReload(ctx context.Context, hostPort string, slotRanges []string) {
+	if hm.clusterStateReloadCallback != nil {
+		hm.clusterStateReloadCallback(ctx, hostPort, slotRanges)
+	}
+}
diff --git a/maintnotifications/manager_test.go b/maintnotifications/manager_test.go
index 35dc4a32ab..aed0c9f7d5 100644
--- a/maintnotifications/manager_test.go
+++ b/maintnotifications/manager_test.go
@@ -217,6 +217,8 @@ func TestManagerRefactoring(t *testing.T) {
 			NotificationMigrated,
 			NotificationFailingOver,
 			NotificationFailedOver,
+			NotificationSMigrating,
+			NotificationSMigrated,
 		}
 
 		if len(maintenanceNotificationTypes) != len(expectedTypes) {
diff --git a/maintnotifications/pool_hook_test.go b/maintnotifications/pool_hook_test.go
index 6ec61eeda0..972605949d 100644
--- a/maintnotifications/pool_hook_test.go
+++ b/maintnotifications/pool_hook_test.go
@@ -700,9 +700,25 @@ func TestConnectionHook(t *testing.T) {
 			t.Errorf("Connection should be pooled after handoff (shouldPool=%v, shouldRemove=%v)", shouldPool, shouldRemove)
 		}
 
-		// Wait for handoff to complete
-		time.Sleep(50 * time.Millisecond)
+		// Wait for handoff to complete with polling instead of fixed sleep
+		// This avoids flakiness on slow CI runners where 50ms may not be enough
+		maxWait := 500 * time.Millisecond
+		pollInterval := 10 * time.Millisecond
+		deadline := time.Now().Add(maxWait)
 
+		handoffCompleted := false
+		for time.Now().Before(deadline) {
+			if conn.IsUsable() && !processor.IsHandoffPending(conn) {
+				handoffCompleted = true
+				break
+			}
+			time.Sleep(pollInterval)
+		}
+
+		if !handoffCompleted {
+			t.Fatalf("Handoff did not complete within %v (IsUsable=%v, IsHandoffPending=%v)",
+				maxWait, conn.IsUsable(), processor.IsHandoffPending(conn))
+		}
 		// After handoff completion, connection should be usable again
 		if !conn.IsUsable() {
 			t.Error("Connection should be usable after handoff completion")
diff --git a/maintnotifications/push_notification_handler.go b/maintnotifications/push_notification_handler.go
index 937b4ae82e..020400208e 100644
--- a/maintnotifications/push_notification_handler.go
+++ b/maintnotifications/push_notification_handler.go
@@ -49,6 +49,10 @@ func (snh *NotificationHandler) HandlePushNotification(ctx context.Context, hand
 		err = snh.handleFailingOver(ctx, handlerCtx, modifiedNotification)
 	case NotificationFailedOver:
 		err = snh.handleFailedOver(ctx, handlerCtx, modifiedNotification)
+	case NotificationSMigrating:
+		err = snh.handleSMigrating(ctx, handlerCtx, modifiedNotification)
+	case NotificationSMigrated:
+		err = snh.handleSMigrated(ctx, handlerCtx, modifiedNotification)
 	default:
 		// Ignore other notification types (e.g., pub/sub messages)
 		err = nil
@@ -61,7 +65,9 @@ func (snh *NotificationHandler) HandlePushNotification(ctx context.Context, hand
 }
 
 // handleMoving processes MOVING notifications.
-// ["MOVING", seqNum, timeS, endpoint] - per-connection handoff
+// MOVING indicates that a connection should be handed off to a new endpoint.
+// This is a per-connection notification that triggers connection handoff.
+// Expected format: ["MOVING", seqNum, timeS, endpoint]
 func (snh *NotificationHandler) handleMoving(ctx context.Context, handlerCtx push.NotificationHandlerContext, notification []interface{}) error {
 	if len(notification) < 3 {
 		internal.Logger.Printf(ctx, logs.InvalidNotification("MOVING", notification))
@@ -167,9 +173,10 @@ func (snh *NotificationHandler) markConnForHandoff(conn *pool.Conn, newEndpoint
 }
 
 // handleMigrating processes MIGRATING notifications.
+// MIGRATING indicates that a connection migration is starting.
+// This is a per-connection notification that applies relaxed timeouts.
+// Expected format: ["MIGRATING", ...]
 func (snh *NotificationHandler) handleMigrating(ctx context.Context, handlerCtx push.NotificationHandlerContext, notification []interface{}) error {
-	// MIGRATING notifications indicate that a connection is about to be migrated
-	// Apply relaxed timeouts to the specific connection that received this notification
 	if len(notification) < 2 {
 		internal.Logger.Printf(ctx, logs.InvalidNotification("MIGRATING", notification))
 		return ErrInvalidNotification
@@ -195,9 +202,10 @@ func (snh *NotificationHandler) handleMigrating(ctx context.Context, handlerCtx
 }
 
 // handleMigrated processes MIGRATED notifications.
+// MIGRATED indicates that a connection migration has completed.
+// This is a per-connection notification that clears relaxed timeouts.
+// Expected format: ["MIGRATED", ...]
 func (snh *NotificationHandler) handleMigrated(ctx context.Context, handlerCtx push.NotificationHandlerContext, notification []interface{}) error {
-	// MIGRATED notifications indicate that a connection migration has completed
-	// Restore normal timeouts for the specific connection that received this notification
 	if len(notification) < 2 {
 		internal.Logger.Printf(ctx, logs.InvalidNotification("MIGRATED", notification))
 		return ErrInvalidNotification
@@ -224,9 +232,10 @@ func (snh *NotificationHandler) handleMigrated(ctx context.Context, handlerCtx p
 }
 
 // handleFailingOver processes FAILING_OVER notifications.
+// FAILING_OVER indicates that a failover is starting.
+// This is a per-connection notification that applies relaxed timeouts.
+// Expected format: ["FAILING_OVER", ...]
 func (snh *NotificationHandler) handleFailingOver(ctx context.Context, handlerCtx push.NotificationHandlerContext, notification []interface{}) error {
-	// FAILING_OVER notifications indicate that a connection is about to failover
-	// Apply relaxed timeouts to the specific connection that received this notification
 	if len(notification) < 2 {
 		internal.Logger.Printf(ctx, logs.InvalidNotification("FAILING_OVER", notification))
 		return ErrInvalidNotification
@@ -253,9 +262,10 @@ func (snh *NotificationHandler) handleFailingOver(ctx context.Context, handlerCt
 }
 
 // handleFailedOver processes FAILED_OVER notifications.
+// FAILED_OVER indicates that a failover has completed.
+// This is a per-connection notification that clears relaxed timeouts.
+// Expected format: ["FAILED_OVER", ...]
 func (snh *NotificationHandler) handleFailedOver(ctx context.Context, handlerCtx push.NotificationHandlerContext, notification []interface{}) error {
-	// FAILED_OVER notifications indicate that a connection failover has completed
-	// Restore normal timeouts for the specific connection that received this notification
 	if len(notification) < 2 {
 		internal.Logger.Printf(ctx, logs.InvalidNotification("FAILED_OVER", notification))
 		return ErrInvalidNotification
@@ -280,3 +290,107 @@ func (snh *NotificationHandler) handleFailedOver(ctx context.Context, handlerCtx
 	conn.ClearRelaxedTimeout()
 	return nil
 }
+
+// handleSMigrating processes SMIGRATING notifications.
+// SMIGRATING indicates that a cluster slot is in the process of migrating to a different node.
+// This is a per-connection notification that applies relaxed timeouts during slot migration.
+// Expected format: ["SMIGRATING", SeqID, slot/range1-range2, ...]
+func (snh *NotificationHandler) handleSMigrating(ctx context.Context, handlerCtx push.NotificationHandlerContext, notification []interface{}) error {
+	if len(notification) < 3 {
+		internal.Logger.Printf(ctx, logs.InvalidNotification("SMIGRATING", notification))
+		return ErrInvalidNotification
+	}
+
+	// Extract SeqID (position 1)
+	seqID, ok := notification[1].(int64)
+	if !ok {
+		internal.Logger.Printf(ctx, logs.InvalidSeqIDInSMigratingNotification(notification[1]))
+		return ErrInvalidNotification
+	}
+
+	// Extract slot ranges (position 2+)
+	// For now, we just extract them for logging
+	// Format can be: single slot "1234" or range "100-200"
+	var slotRanges []string
+	for i := 2; i < len(notification); i++ {
+		if slotRange, ok := notification[i].(string); ok {
+			slotRanges = append(slotRanges, slotRange)
+		}
+	}
+
+	if handlerCtx.Conn == nil {
+		internal.Logger.Printf(ctx, logs.NoConnectionInHandlerContext("SMIGRATING"))
+		return ErrInvalidNotification
+	}
+
+	conn, ok := handlerCtx.Conn.(*pool.Conn)
+	if !ok {
+		internal.Logger.Printf(ctx, logs.InvalidConnectionTypeInHandlerContext("SMIGRATING", handlerCtx.Conn, handlerCtx))
+		return ErrInvalidNotification
+	}
+
+	// Apply relaxed timeout to this specific connection
+	if internal.LogLevel.InfoOrAbove() {
+		internal.Logger.Printf(ctx, logs.SlotMigrating(conn.GetID(), seqID, slotRanges))
+	}
+	conn.SetRelaxedTimeout(snh.manager.config.RelaxedTimeout, snh.manager.config.RelaxedTimeout)
+	return nil
+}
+
+// handleSMigrated processes SMIGRATED notifications.
+// SMIGRATED indicates that a cluster slot has finished migrating to a different node.
+// This is a cluster-level notification that triggers cluster state reload.
+// Expected format: ["SMIGRATED", SeqID, host:port, slot1/range1-range2, ...]
+// Note: Multiple connections may receive the same notification, so we deduplicate by SeqID before triggering reload.
+// but we still process the notification on each connection to clear the relaxed timeout.
+func (snh *NotificationHandler) handleSMigrated(ctx context.Context, handlerCtx push.NotificationHandlerContext, notification []interface{}) error {
+	if len(notification) < 4 {
+		internal.Logger.Printf(ctx, logs.InvalidNotification("SMIGRATED", notification))
+		return ErrInvalidNotification
+	}
+
+	// Extract SeqID (position 1)
+	seqID, ok := notification[1].(int64)
+	if !ok {
+		internal.Logger.Printf(ctx, logs.InvalidSeqIDInSMigratedNotification(notification[1]))
+		return ErrInvalidNotification
+	}
+
+	// Deduplicate by SeqID - multiple connections may receive the same notification
+	if snh.manager.MarkSMigratedSeqIDProcessed(seqID) {
+		// Extract host:port (position 2)
+		hostPort, ok := notification[2].(string)
+		if !ok {
+			internal.Logger.Printf(ctx, logs.InvalidHostPortInSMigratedNotification(notification[2]))
+			return ErrInvalidNotification
+		}
+
+		// Extract slot ranges (position 3+)
+		// For now, we just extract them for logging
+		// Format can be: single slot "1234" or range "100-200"
+		var slotRanges []string
+		for i := 3; i < len(notification); i++ {
+			if slotRange, ok := notification[i].(string); ok {
+				slotRanges = append(slotRanges, slotRange)
+			}
+		}
+
+		if internal.LogLevel.InfoOrAbove() {
+			internal.Logger.Printf(ctx, logs.SlotMigrated(seqID, hostPort, slotRanges))
+		}
+		// Trigger cluster state reload via callback, passing host:port and slot ranges
+		// For now, implementations just log these and trigger a full reload
+		// In the future, this could be optimized to reload only the specific slots
+		snh.manager.TriggerClusterStateReload(ctx, hostPort, slotRanges)
+	}
+
+	// clear relaxed timeout
+	if handlerCtx.Conn != nil {
+		conn, ok := handlerCtx.Conn.(*pool.Conn)
+		if ok {
+			conn.ClearRelaxedTimeout()
+		}
+	}
+
+	return nil
+}

From 6332cfdde3f14bfe4abe93f871acc3784a235091 Mon Sep 17 00:00:00 2001
From: Nedyalko Dyakov <1547186+ndyakov@users.noreply.github.com>
Date: Thu, 4 Dec 2025 15:24:11 +0200
Subject: [PATCH 2/2] feat(maintnotif): lazy cluster topology reload (#3614)

* lazy cluster topology reload

* fix discrepancies between options structs

* Update osscluster_lazy_reload_test.go

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Update osscluster.go

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

---------

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 commands_test.go               |  20 +++-
 osscluster.go                  | 135 +++++++++++++++++-----
 osscluster_lazy_reload_test.go | 201 +++++++++++++++++++++++++++++++++
 sentinel.go                    | 124 +++++++++++++-------
 universal.go                   | 137 +++++++++++++---------
 5 files changed, 491 insertions(+), 126 deletions(-)
 create mode 100644 osscluster_lazy_reload_test.go

diff --git a/commands_test.go b/commands_test.go
index c5fa40e603..21f8952858 100644
--- a/commands_test.go
+++ b/commands_test.go
@@ -8922,27 +8922,37 @@ var _ = Describe("Commands", func() {
 			const key = "latency-monitor-threshold"
 
 			old := client.ConfigGet(ctx, key).Val()
-			client.ConfigSet(ctx, key, "1")
+			// Use a higher threshold (100ms) to avoid capturing normal operations
+			// that could cause flakiness due to timing variations
+			client.ConfigSet(ctx, key, "100")
 			defer client.ConfigSet(ctx, key, old[key])
 
 			result, err := client.Latency(ctx).Result()
 			Expect(err).NotTo(HaveOccurred())
 			Expect(len(result)).Should(Equal(0))
 
-			err = client.Do(ctx, "DEBUG", "SLEEP", 0.01).Err()
+			// Use a longer sleep (150ms) to ensure it exceeds the 100ms threshold
+			err = client.Do(ctx, "DEBUG", "SLEEP", 0.15).Err()
 			Expect(err).NotTo(HaveOccurred())
 
 			result, err = client.Latency(ctx).Result()
 			Expect(err).NotTo(HaveOccurred())
-			Expect(len(result)).Should(Equal(1))
+			Expect(len(result)).Should(BeNumerically(">=", 1))
 
 			// reset latency by event name
-			err = client.LatencyReset(ctx, result[0].Name).Err()
+			eventName := result[0].Name
+			err = client.LatencyReset(ctx, eventName).Err()
 			Expect(err).NotTo(HaveOccurred())
 
+			// Verify the specific event was reset (not that all events are gone)
+			// This avoids flakiness from other operations triggering latency events
 			result, err = client.Latency(ctx).Result()
 			Expect(err).NotTo(HaveOccurred())
-			Expect(len(result)).Should(Equal(0))
+			for _, event := range result {
+				if event.Name == eventName {
+					Fail("Event " + eventName + " should have been reset")
+				}
+			}
 		})
 	})
 })
diff --git a/osscluster.go b/osscluster.go
index 6994ae83f6..19b915c648 100644
--- a/osscluster.go
+++ b/osscluster.go
@@ -91,13 +91,29 @@ type ClusterOptions struct {
 	MinRetryBackoff time.Duration
 	MaxRetryBackoff time.Duration
 
-	DialTimeout           time.Duration
-	ReadTimeout           time.Duration
-	WriteTimeout          time.Duration
+	DialTimeout  time.Duration
+	ReadTimeout  time.Duration
+	WriteTimeout time.Duration
+
+	// DialerRetries is the maximum number of retry attempts when dialing fails.
+	//
+	// default: 5
+	DialerRetries int
+
+	// DialerRetryTimeout is the backoff duration between retry attempts.
+	//
+	// default: 100 milliseconds
+	DialerRetryTimeout time.Duration
+
 	ContextTimeoutEnabled bool
 
-	PoolFIFO        bool
-	PoolSize        int // applies per cluster node and not for the whole cluster
+	PoolFIFO bool
+	PoolSize int // applies per cluster node and not for the whole cluster
+
+	// MaxConcurrentDials is the maximum number of concurrent connection creation goroutines.
+	// If <= 0, defaults to PoolSize. If > PoolSize, it will be capped at PoolSize.
+	MaxConcurrentDials int
+
 	PoolTimeout     time.Duration
 	MinIdleConns    int
 	MaxIdleConns    int
@@ -157,7 +173,8 @@ type ClusterOptions struct {
 	// cluster upgrade notifications gracefully and manage connection/pool state
 	// transitions seamlessly. Requires Protocol: 3 (RESP3) for push notifications.
 	// If nil, maintnotifications upgrades are in "auto" mode and will be enabled if the server supports it.
-	// The ClusterClient does not directly work with maintnotifications, it is up to the clients in the Nodes map to work with maintnotifications.
+	// The ClusterClient supports SMIGRATING and SMIGRATED notifications for cluster state management.
+	// Individual node clients handle other maintenance notifications (MOVING, MIGRATING, etc.).
 	MaintNotificationsConfig *maintnotifications.Config
 	// ShardPicker is used to pick a shard when the request_policy is
 	// ReqDefault and the command has no keys.
@@ -176,9 +193,24 @@ func (opt *ClusterOptions) init() {
 		opt.ReadOnly = true
 	}
 
+	if opt.DialTimeout == 0 {
+		opt.DialTimeout = 5 * time.Second
+	}
+	if opt.DialerRetries == 0 {
+		opt.DialerRetries = 5
+	}
+	if opt.DialerRetryTimeout == 0 {
+		opt.DialerRetryTimeout = 100 * time.Millisecond
+	}
+
 	if opt.PoolSize == 0 {
 		opt.PoolSize = 5 * runtime.GOMAXPROCS(0)
 	}
+	if opt.MaxConcurrentDials <= 0 {
+		opt.MaxConcurrentDials = opt.PoolSize
+	} else if opt.MaxConcurrentDials > opt.PoolSize {
+		opt.MaxConcurrentDials = opt.PoolSize
+	}
 	if opt.ReadBufferSize == 0 {
 		opt.ReadBufferSize = proto.DefaultBufferSize
 	}
@@ -320,10 +352,13 @@ func setupClusterQueryParams(u *url.URL, o *ClusterOptions) (*ClusterOptions, er
 	o.MinRetryBackoff = q.duration("min_retry_backoff")
 	o.MaxRetryBackoff = q.duration("max_retry_backoff")
 	o.DialTimeout = q.duration("dial_timeout")
+	o.DialerRetries = q.int("dialer_retries")
+	o.DialerRetryTimeout = q.duration("dialer_retry_timeout")
 	o.ReadTimeout = q.duration("read_timeout")
 	o.WriteTimeout = q.duration("write_timeout")
 	o.PoolFIFO = q.bool("pool_fifo")
 	o.PoolSize = q.int("pool_size")
+	o.MaxConcurrentDials = q.int("max_concurrent_dials")
 	o.MinIdleConns = q.int("min_idle_conns")
 	o.MaxIdleConns = q.int("max_idle_conns")
 	o.MaxActiveConns = q.int("max_active_conns")
@@ -379,21 +414,25 @@ func (opt *ClusterOptions) clientOptions() *Options {
 		MinRetryBackoff: opt.MinRetryBackoff,
 		MaxRetryBackoff: opt.MaxRetryBackoff,
 
-		DialTimeout:           opt.DialTimeout,
-		ReadTimeout:           opt.ReadTimeout,
-		WriteTimeout:          opt.WriteTimeout,
+		DialTimeout:        opt.DialTimeout,
+		DialerRetries:      opt.DialerRetries,
+		DialerRetryTimeout: opt.DialerRetryTimeout,
+		ReadTimeout:        opt.ReadTimeout,
+		WriteTimeout:       opt.WriteTimeout,
+
 		ContextTimeoutEnabled: opt.ContextTimeoutEnabled,
 
-		PoolFIFO:              opt.PoolFIFO,
-		PoolSize:              opt.PoolSize,
-		PoolTimeout:           opt.PoolTimeout,
-		MinIdleConns:          opt.MinIdleConns,
-		MaxIdleConns:          opt.MaxIdleConns,
-		MaxActiveConns:        opt.MaxActiveConns,
-		ConnMaxIdleTime:       opt.ConnMaxIdleTime,
-		ConnMaxLifetime:       opt.ConnMaxLifetime,
-		ReadBufferSize:        opt.ReadBufferSize,
-		WriteBufferSize:       opt.WriteBufferSize,
+		PoolFIFO:           opt.PoolFIFO,
+		PoolSize:           opt.PoolSize,
+		MaxConcurrentDials: opt.MaxConcurrentDials,
+		PoolTimeout:        opt.PoolTimeout,
+		MinIdleConns:       opt.MinIdleConns,
+		MaxIdleConns:       opt.MaxIdleConns,
+		MaxActiveConns:     opt.MaxActiveConns,
+		ConnMaxIdleTime:    opt.ConnMaxIdleTime,
+		ConnMaxLifetime:    opt.ConnMaxLifetime,
+		ReadBufferSize:     opt.ReadBufferSize,
+		WriteBufferSize:    opt.WriteBufferSize,
 		DisableIdentity:       opt.DisableIdentity,
 		DisableIndentity:      opt.DisableIdentity,
 		IdentitySuffix:        opt.IdentitySuffix,
@@ -984,9 +1023,11 @@ func (c *clusterState) slotNodes(slot int) []*clusterNode {
 //------------------------------------------------------------------------------
 
 type clusterStateHolder struct {
-	load      func(ctx context.Context) (*clusterState, error)
-	state     atomic.Value
-	reloading uint32 // atomic
+	load func(ctx context.Context) (*clusterState, error)
+
+	state         atomic.Value
+	reloading     uint32 // atomic
+	reloadPending uint32 // atomic - set to 1 when reload is requested during active reload
 }
 
 func newClusterStateHolder(load func(ctx context.Context) (*clusterState, error)) *clusterStateHolder {
@@ -1005,17 +1046,37 @@ func (c *clusterStateHolder) Reload(ctx context.Context) (*clusterState, error)
 }
 
 func (c *clusterStateHolder) LazyReload() {
+	// If already reloading, mark that another reload is pending
 	if !atomic.CompareAndSwapUint32(&c.reloading, 0, 1) {
+		atomic.StoreUint32(&c.reloadPending, 1)
 		return
 	}
+
 	go func() {
-		defer atomic.StoreUint32(&c.reloading, 0)
+		for {
+			_, err := c.Reload(context.Background())
+			if err != nil {
+				atomic.StoreUint32(&c.reloadPending, 0)
+				atomic.StoreUint32(&c.reloading, 0)
+				return
+			}
 
-		_, err := c.Reload(context.Background())
-		if err != nil {
-			return
+			// Clear pending flag after reload completes, before cooldown
+			// This captures notifications that arrived during the reload
+			atomic.StoreUint32(&c.reloadPending, 0)
+
+			// Wait cooldown period
+			time.Sleep(200 * time.Millisecond)
+
+			// Check if another reload was requested during cooldown
+			if atomic.LoadUint32(&c.reloadPending) == 0 {
+				// No pending reload, we're done
+				atomic.StoreUint32(&c.reloading, 0)
+				return
+			}
+
+			// Pending reload requested, loop to reload again
 		}
-		time.Sleep(200 * time.Millisecond)
 	}()
 }
 
@@ -1079,6 +1140,26 @@ func NewClusterClient(opt *ClusterOptions) *ClusterClient {
 		txPipeline: c.processTxPipeline,
 	})
 
+	// Set up SMIGRATED notification handling for cluster state reload
+	// When a node client receives a SMIGRATED notification, it should trigger
+	// cluster state reload on the parent ClusterClient
+	if opt.MaintNotificationsConfig != nil {
+		c.nodes.OnNewNode(func(nodeClient *Client) {
+			manager := nodeClient.GetMaintNotificationsManager()
+			if manager != nil {
+				manager.SetClusterStateReloadCallback(func(ctx context.Context, hostPort string, slotRanges []string) {
+					// Log the migration details for now
+					if internal.LogLevel.InfoOrAbove() {
+						internal.Logger.Printf(ctx, "cluster: slots %v migrated to %s, reloading cluster state", slotRanges, hostPort)
+					}
+					// Currently we reload the entire cluster state
+					// In the future, this could be optimized to reload only the specific slots
+					c.state.LazyReload()
+				})
+			}
+		})
+	}
+
 	return c
 }
 
diff --git a/osscluster_lazy_reload_test.go b/osscluster_lazy_reload_test.go
new file mode 100644
index 0000000000..994fd40e74
--- /dev/null
+++ b/osscluster_lazy_reload_test.go
@@ -0,0 +1,201 @@
+package redis
+
+import (
+	"context"
+	"sync/atomic"
+	"testing"
+	"time"
+)
+
+// TestLazyReloadQueueBehavior tests that LazyReload properly queues reload requests
+func TestLazyReloadQueueBehavior(t *testing.T) {
+	t.Run("SingleReload", func(t *testing.T) {
+		var reloadCount atomic.Int32
+
+		holder := newClusterStateHolder(func(ctx context.Context) (*clusterState, error) {
+			reloadCount.Add(1)
+			time.Sleep(50 * time.Millisecond) // Simulate reload work
+			return &clusterState{}, nil
+		})
+
+		// Trigger one reload
+		holder.LazyReload()
+
+		// Wait for reload to complete
+		time.Sleep(300 * time.Millisecond)
+
+		if count := reloadCount.Load(); count != 1 {
+			t.Errorf("Expected 1 reload, got %d", count)
+		}
+	})
+
+	t.Run("ConcurrentReloadsDeduplication", func(t *testing.T) {
+		var reloadCount atomic.Int32
+
+		holder := newClusterStateHolder(func(ctx context.Context) (*clusterState, error) {
+			reloadCount.Add(1)
+			time.Sleep(50 * time.Millisecond) // Simulate reload work
+			return &clusterState{}, nil
+		})
+
+		// Trigger multiple reloads concurrently
+		for i := 0; i < 10; i++ {
+			go holder.LazyReload()
+		}
+
+		// Wait for all to complete
+		time.Sleep(100 * time.Millisecond)
+
+		// Should only reload once (all concurrent calls deduplicated)
+		if count := reloadCount.Load(); count != 1 {
+			t.Errorf("Expected 1 reload (deduplication), got %d", count)
+		}
+	})
+
+	t.Run("PendingReloadDuringCooldown", func(t *testing.T) {
+		var reloadCount atomic.Int32
+
+		holder := newClusterStateHolder(func(ctx context.Context) (*clusterState, error) {
+			reloadCount.Add(1)
+			time.Sleep(10 * time.Millisecond) // Simulate reload work
+			return &clusterState{}, nil
+		})
+
+		// Trigger first reload
+		holder.LazyReload()
+
+		// Wait for reload to complete but still in cooldown
+		time.Sleep(50 * time.Millisecond)
+
+		// Trigger second reload during cooldown period
+		holder.LazyReload()
+
+		// Wait for second reload to complete
+		time.Sleep(300 * time.Millisecond)
+
+		// Should have reloaded twice (second request queued and executed)
+		if count := reloadCount.Load(); count != 2 {
+			t.Errorf("Expected 2 reloads (queued during cooldown), got %d", count)
+		}
+	})
+
+	t.Run("MultiplePendingReloadsCollapsed", func(t *testing.T) {
+		var reloadCount atomic.Int32
+
+		holder := newClusterStateHolder(func(ctx context.Context) (*clusterState, error) {
+			reloadCount.Add(1)
+			time.Sleep(10 * time.Millisecond) // Simulate reload work
+			return &clusterState{}, nil
+		})
+
+		// Trigger first reload
+		holder.LazyReload()
+
+		// Wait for reload to start
+		time.Sleep(5 * time.Millisecond)
+
+		// Trigger multiple reloads during active reload + cooldown
+		for i := 0; i < 10; i++ {
+			holder.LazyReload()
+			time.Sleep(5 * time.Millisecond)
+		}
+
+		// Wait for all to complete
+		time.Sleep(400 * time.Millisecond)
+
+		// Should have reloaded exactly twice:
+		// 1. Initial reload
+		// 2. One more reload for all the pending requests (collapsed into one)
+		if count := reloadCount.Load(); count != 2 {
+			t.Errorf("Expected 2 reloads (initial + collapsed pending), got %d", count)
+		}
+	})
+
+	t.Run("ReloadAfterCooldownPeriod", func(t *testing.T) {
+		var reloadCount atomic.Int32
+
+		holder := newClusterStateHolder(func(ctx context.Context) (*clusterState, error) {
+			reloadCount.Add(1)
+			time.Sleep(10 * time.Millisecond) // Simulate reload work
+			return &clusterState{}, nil
+		})
+
+		// Trigger first reload
+		holder.LazyReload()
+
+		// Wait for reload + cooldown to complete
+		time.Sleep(300 * time.Millisecond)
+
+		// Trigger second reload after cooldown
+		holder.LazyReload()
+
+		// Wait for second reload to complete
+		time.Sleep(300 * time.Millisecond)
+
+		// Should have reloaded twice (separate reload cycles)
+		if count := reloadCount.Load(); count != 2 {
+			t.Errorf("Expected 2 reloads (separate cycles), got %d", count)
+		}
+	})
+
+	t.Run("ErrorDuringReload", func(t *testing.T) {
+		var reloadCount atomic.Int32
+		var shouldFail atomic.Bool
+		shouldFail.Store(true)
+
+		holder := newClusterStateHolder(func(ctx context.Context) (*clusterState, error) {
+			reloadCount.Add(1)
+			if shouldFail.Load() {
+				return nil, context.DeadlineExceeded
+			}
+			return &clusterState{}, nil
+		})
+
+		// Trigger reload that will fail
+		holder.LazyReload()
+
+		// Wait for failed reload
+		time.Sleep(50 * time.Millisecond)
+
+		// Trigger another reload (should succeed now)
+		shouldFail.Store(false)
+		holder.LazyReload()
+
+		// Wait for successful reload
+		time.Sleep(300 * time.Millisecond)
+
+		// Should have attempted reload twice (first failed, second succeeded)
+		if count := reloadCount.Load(); count != 2 {
+			t.Errorf("Expected 2 reload attempts, got %d", count)
+		}
+	})
+
+	t.Run("CascadingSMIGRATEDScenario", func(t *testing.T) {
+		// Simulate the real-world scenario: multiple SMIGRATED notifications
+		// arriving in quick succession from different node clients
+		var reloadCount atomic.Int32
+
+		holder := newClusterStateHolder(func(ctx context.Context) (*clusterState, error) {
+			reloadCount.Add(1)
+			time.Sleep(20 * time.Millisecond) // Simulate realistic reload time
+			return &clusterState{}, nil
+		})
+
+		// Simulate 5 SMIGRATED notifications arriving within 100ms
+		for i := 0; i < 5; i++ {
+			go holder.LazyReload()
+			time.Sleep(20 * time.Millisecond)
+		}
+
+		// Wait for all reloads to complete
+		time.Sleep(500 * time.Millisecond)
+
+		// Should reload at most 2 times:
+		// 1. First notification triggers reload
+		// 2. Notifications 2-5 collapse into one pending reload
+		count := reloadCount.Load()
+		if count < 1 || count > 2 {
+			t.Errorf("Expected 1-2 reloads for cascading scenario, got %d", count)
+		}
+	})
+}
diff --git a/sentinel.go b/sentinel.go
index 663f7b1ad9..8565a31e60 100644
--- a/sentinel.go
+++ b/sentinel.go
@@ -89,7 +89,18 @@ type FailoverOptions struct {
 	MinRetryBackoff time.Duration
 	MaxRetryBackoff time.Duration
 
-	DialTimeout           time.Duration
+	DialTimeout time.Duration
+
+	// DialerRetries is the maximum number of retry attempts when dialing fails.
+	//
+	// default: 5
+	DialerRetries int
+
+	// DialerRetryTimeout is the backoff duration between retry attempts.
+	//
+	// default: 100 milliseconds
+	DialerRetryTimeout time.Duration
+
 	ReadTimeout           time.Duration
 	WriteTimeout          time.Duration
 	ContextTimeoutEnabled bool
@@ -110,7 +121,12 @@ type FailoverOptions struct {
 
 	PoolFIFO bool
 
-	PoolSize        int
+	PoolSize int
+
+	// MaxConcurrentDials is the maximum number of concurrent connection creation goroutines.
+	// If <= 0, defaults to PoolSize. If > PoolSize, it will be capped at PoolSize.
+	MaxConcurrentDials int
+
 	PoolTimeout     time.Duration
 	MinIdleConns    int
 	MaxIdleConns    int
@@ -141,6 +157,10 @@ type FailoverOptions struct {
 
 	UnstableResp3 bool
 
+	// PushNotificationProcessor is the processor for handling push notifications.
+	// If nil, a default processor will be created for RESP3 connections.
+	PushNotificationProcessor push.NotificationProcessor
+
 	// MaintNotificationsConfig is not supported for FailoverClients at the moment
 	// MaintNotificationsConfig provides custom configuration for maintnotifications upgrades.
 	// When MaintNotificationsConfig.Mode is not "disabled", the client will handle
@@ -174,27 +194,32 @@ func (opt *FailoverOptions) clientOptions() *Options {
 		ReadBufferSize:  opt.ReadBufferSize,
 		WriteBufferSize: opt.WriteBufferSize,
 
-		DialTimeout:           opt.DialTimeout,
-		ReadTimeout:           opt.ReadTimeout,
-		WriteTimeout:          opt.WriteTimeout,
+		DialTimeout:        opt.DialTimeout,
+		DialerRetries:      opt.DialerRetries,
+		DialerRetryTimeout: opt.DialerRetryTimeout,
+		ReadTimeout:        opt.ReadTimeout,
+		WriteTimeout:       opt.WriteTimeout,
+
 		ContextTimeoutEnabled: opt.ContextTimeoutEnabled,
 
-		PoolFIFO:        opt.PoolFIFO,
-		PoolSize:        opt.PoolSize,
-		PoolTimeout:     opt.PoolTimeout,
-		MinIdleConns:    opt.MinIdleConns,
-		MaxIdleConns:    opt.MaxIdleConns,
-		MaxActiveConns:  opt.MaxActiveConns,
-		ConnMaxIdleTime: opt.ConnMaxIdleTime,
-		ConnMaxLifetime: opt.ConnMaxLifetime,
+		PoolFIFO:           opt.PoolFIFO,
+		PoolSize:           opt.PoolSize,
+		MaxConcurrentDials: opt.MaxConcurrentDials,
+		PoolTimeout:        opt.PoolTimeout,
+		MinIdleConns:       opt.MinIdleConns,
+		MaxIdleConns:       opt.MaxIdleConns,
+		MaxActiveConns:     opt.MaxActiveConns,
+		ConnMaxIdleTime:    opt.ConnMaxIdleTime,
+		ConnMaxLifetime:    opt.ConnMaxLifetime,
 
 		TLSConfig: opt.TLSConfig,
 
 		DisableIdentity:  opt.DisableIdentity,
 		DisableIndentity: opt.DisableIndentity,
 
-		IdentitySuffix: opt.IdentitySuffix,
-		UnstableResp3:  opt.UnstableResp3,
+		IdentitySuffix:            opt.IdentitySuffix,
+		UnstableResp3:             opt.UnstableResp3,
+		PushNotificationProcessor: opt.PushNotificationProcessor,
 
 		MaintNotificationsConfig: &maintnotifications.Config{
 			Mode: maintnotifications.ModeDisabled,
@@ -222,27 +247,32 @@ func (opt *FailoverOptions) sentinelOptions(addr string) *Options {
 		ReadBufferSize:  4096,
 		WriteBufferSize: 4096,
 
-		DialTimeout:           opt.DialTimeout,
-		ReadTimeout:           opt.ReadTimeout,
-		WriteTimeout:          opt.WriteTimeout,
+		DialTimeout:        opt.DialTimeout,
+		DialerRetries:      opt.DialerRetries,
+		DialerRetryTimeout: opt.DialerRetryTimeout,
+		ReadTimeout:        opt.ReadTimeout,
+		WriteTimeout:       opt.WriteTimeout,
+
 		ContextTimeoutEnabled: opt.ContextTimeoutEnabled,
 
-		PoolFIFO:        opt.PoolFIFO,
-		PoolSize:        opt.PoolSize,
-		PoolTimeout:     opt.PoolTimeout,
-		MinIdleConns:    opt.MinIdleConns,
-		MaxIdleConns:    opt.MaxIdleConns,
-		MaxActiveConns:  opt.MaxActiveConns,
-		ConnMaxIdleTime: opt.ConnMaxIdleTime,
-		ConnMaxLifetime: opt.ConnMaxLifetime,
+		PoolFIFO:           opt.PoolFIFO,
+		PoolSize:           opt.PoolSize,
+		MaxConcurrentDials: opt.MaxConcurrentDials,
+		PoolTimeout:        opt.PoolTimeout,
+		MinIdleConns:       opt.MinIdleConns,
+		MaxIdleConns:       opt.MaxIdleConns,
+		MaxActiveConns:     opt.MaxActiveConns,
+		ConnMaxIdleTime:    opt.ConnMaxIdleTime,
+		ConnMaxLifetime:    opt.ConnMaxLifetime,
 
 		TLSConfig: opt.TLSConfig,
 
 		DisableIdentity:  opt.DisableIdentity,
 		DisableIndentity: opt.DisableIndentity,
 
-		IdentitySuffix: opt.IdentitySuffix,
-		UnstableResp3:  opt.UnstableResp3,
+		IdentitySuffix:            opt.IdentitySuffix,
+		UnstableResp3:             opt.UnstableResp3,
+		PushNotificationProcessor: opt.PushNotificationProcessor,
 
 		MaintNotificationsConfig: &maintnotifications.Config{
 			Mode: maintnotifications.ModeDisabled,
@@ -276,26 +306,31 @@ func (opt *FailoverOptions) clusterOptions() *ClusterOptions {
 		ReadBufferSize:  opt.ReadBufferSize,
 		WriteBufferSize: opt.WriteBufferSize,
 
-		DialTimeout:           opt.DialTimeout,
-		ReadTimeout:           opt.ReadTimeout,
-		WriteTimeout:          opt.WriteTimeout,
+		DialTimeout:        opt.DialTimeout,
+		DialerRetries:      opt.DialerRetries,
+		DialerRetryTimeout: opt.DialerRetryTimeout,
+		ReadTimeout:        opt.ReadTimeout,
+		WriteTimeout:       opt.WriteTimeout,
+
 		ContextTimeoutEnabled: opt.ContextTimeoutEnabled,
 
-		PoolFIFO:        opt.PoolFIFO,
-		PoolSize:        opt.PoolSize,
-		PoolTimeout:     opt.PoolTimeout,
-		MinIdleConns:    opt.MinIdleConns,
-		MaxIdleConns:    opt.MaxIdleConns,
-		MaxActiveConns:  opt.MaxActiveConns,
-		ConnMaxIdleTime: opt.ConnMaxIdleTime,
-		ConnMaxLifetime: opt.ConnMaxLifetime,
+		PoolFIFO:           opt.PoolFIFO,
+		PoolSize:           opt.PoolSize,
+		MaxConcurrentDials: opt.MaxConcurrentDials,
+		PoolTimeout:        opt.PoolTimeout,
+		MinIdleConns:       opt.MinIdleConns,
+		MaxIdleConns:       opt.MaxIdleConns,
+		MaxActiveConns:     opt.MaxActiveConns,
+		ConnMaxIdleTime:    opt.ConnMaxIdleTime,
+		ConnMaxLifetime:    opt.ConnMaxLifetime,
 
 		TLSConfig: opt.TLSConfig,
 
-		DisableIdentity:       opt.DisableIdentity,
-		DisableIndentity:      opt.DisableIndentity,
-		IdentitySuffix:        opt.IdentitySuffix,
-		FailingTimeoutSeconds: opt.FailingTimeoutSeconds,
+		DisableIdentity:           opt.DisableIdentity,
+		DisableIndentity:          opt.DisableIndentity,
+		IdentitySuffix:            opt.IdentitySuffix,
+		FailingTimeoutSeconds:     opt.FailingTimeoutSeconds,
+		PushNotificationProcessor: opt.PushNotificationProcessor,
 
 		MaintNotificationsConfig: &maintnotifications.Config{
 			Mode: maintnotifications.ModeDisabled,
@@ -399,11 +434,14 @@ func setupFailoverConnParams(u *url.URL, o *FailoverOptions) (*FailoverOptions,
 	o.MinRetryBackoff = q.duration("min_retry_backoff")
 	o.MaxRetryBackoff = q.duration("max_retry_backoff")
 	o.DialTimeout = q.duration("dial_timeout")
+	o.DialerRetries = q.int("dialer_retries")
+	o.DialerRetryTimeout = q.duration("dialer_retry_timeout")
 	o.ReadTimeout = q.duration("read_timeout")
 	o.WriteTimeout = q.duration("write_timeout")
 	o.ContextTimeoutEnabled = q.bool("context_timeout_enabled")
 	o.PoolFIFO = q.bool("pool_fifo")
 	o.PoolSize = q.int("pool_size")
+	o.MaxConcurrentDials = q.int("max_concurrent_dials")
 	o.MinIdleConns = q.int("min_idle_conns")
 	o.MaxIdleConns = q.int("max_idle_conns")
 	o.MaxActiveConns = q.int("max_active_conns")
diff --git a/universal.go b/universal.go
index 1dc9764dc7..39acdb8e5e 100644
--- a/universal.go
+++ b/universal.go
@@ -8,6 +8,7 @@ import (
 
 	"github.com/redis/go-redis/v9/auth"
 	"github.com/redis/go-redis/v9/maintnotifications"
+	"github.com/redis/go-redis/v9/push"
 )
 
 // UniversalOptions information is required by UniversalClient to establish
@@ -57,7 +58,18 @@ type UniversalOptions struct {
 	MinRetryBackoff time.Duration
 	MaxRetryBackoff time.Duration
 
-	DialTimeout           time.Duration
+	DialTimeout time.Duration
+
+	// DialerRetries is the maximum number of retry attempts when dialing fails.
+	//
+	// default: 5
+	DialerRetries int
+
+	// DialerRetryTimeout is the backoff duration between retry attempts.
+	//
+	// default: 100 milliseconds
+	DialerRetryTimeout time.Duration
+
 	ReadTimeout           time.Duration
 	WriteTimeout          time.Duration
 	ContextTimeoutEnabled bool
@@ -79,7 +91,12 @@ type UniversalOptions struct {
 	// PoolFIFO uses FIFO mode for each node connection pool GET/PUT (default LIFO).
 	PoolFIFO bool
 
-	PoolSize        int
+	PoolSize int
+
+	// MaxConcurrentDials is the maximum number of concurrent connection creation goroutines.
+	// If <= 0, defaults to PoolSize. If > PoolSize, it will be capped at PoolSize.
+	MaxConcurrentDials int
+
 	PoolTimeout     time.Duration
 	MinIdleConns    int
 	MaxIdleConns    int
@@ -121,6 +138,10 @@ type UniversalOptions struct {
 
 	UnstableResp3 bool
 
+	// PushNotificationProcessor is the processor for handling push notifications.
+	// If nil, a default processor will be created for RESP3 connections.
+	PushNotificationProcessor push.NotificationProcessor
+
 	// IsClusterMode can be used when only one Addrs is provided (e.g. Elasticache supports setting up cluster mode with configuration endpoint).
 	IsClusterMode bool
 
@@ -156,32 +177,36 @@ func (o *UniversalOptions) Cluster() *ClusterOptions {
 		MinRetryBackoff: o.MinRetryBackoff,
 		MaxRetryBackoff: o.MaxRetryBackoff,
 
-		DialTimeout:           o.DialTimeout,
-		ReadTimeout:           o.ReadTimeout,
-		WriteTimeout:          o.WriteTimeout,
+		DialTimeout:        o.DialTimeout,
+		DialerRetries:      o.DialerRetries,
+		DialerRetryTimeout: o.DialerRetryTimeout,
+		ReadTimeout:        o.ReadTimeout,
+		WriteTimeout:       o.WriteTimeout,
+
 		ContextTimeoutEnabled: o.ContextTimeoutEnabled,
 
 		ReadBufferSize:  o.ReadBufferSize,
 		WriteBufferSize: o.WriteBufferSize,
 
-		PoolFIFO: o.PoolFIFO,
-
-		PoolSize:        o.PoolSize,
-		PoolTimeout:     o.PoolTimeout,
-		MinIdleConns:    o.MinIdleConns,
-		MaxIdleConns:    o.MaxIdleConns,
-		MaxActiveConns:  o.MaxActiveConns,
-		ConnMaxIdleTime: o.ConnMaxIdleTime,
-		ConnMaxLifetime: o.ConnMaxLifetime,
+		PoolFIFO:           o.PoolFIFO,
+		PoolSize:           o.PoolSize,
+		MaxConcurrentDials: o.MaxConcurrentDials,
+		PoolTimeout:        o.PoolTimeout,
+		MinIdleConns:       o.MinIdleConns,
+		MaxIdleConns:       o.MaxIdleConns,
+		MaxActiveConns:     o.MaxActiveConns,
+		ConnMaxIdleTime:    o.ConnMaxIdleTime,
+		ConnMaxLifetime:    o.ConnMaxLifetime,
 
 		TLSConfig: o.TLSConfig,
 
-		DisableIdentity:          o.DisableIdentity,
-		DisableIndentity:         o.DisableIndentity,
-		IdentitySuffix:           o.IdentitySuffix,
-		FailingTimeoutSeconds:    o.FailingTimeoutSeconds,
-		UnstableResp3:            o.UnstableResp3,
-		MaintNotificationsConfig: o.MaintNotificationsConfig,
+		DisableIdentity:           o.DisableIdentity,
+		DisableIndentity:          o.DisableIndentity,
+		IdentitySuffix:            o.IdentitySuffix,
+		FailingTimeoutSeconds:     o.FailingTimeoutSeconds,
+		UnstableResp3:             o.UnstableResp3,
+		PushNotificationProcessor: o.PushNotificationProcessor,
+		MaintNotificationsConfig:  o.MaintNotificationsConfig,
 	}
 }
 
@@ -217,31 +242,36 @@ func (o *UniversalOptions) Failover() *FailoverOptions {
 		MinRetryBackoff: o.MinRetryBackoff,
 		MaxRetryBackoff: o.MaxRetryBackoff,
 
-		DialTimeout:           o.DialTimeout,
-		ReadTimeout:           o.ReadTimeout,
-		WriteTimeout:          o.WriteTimeout,
+		DialTimeout:        o.DialTimeout,
+		DialerRetries:      o.DialerRetries,
+		DialerRetryTimeout: o.DialerRetryTimeout,
+		ReadTimeout:        o.ReadTimeout,
+		WriteTimeout:       o.WriteTimeout,
+
 		ContextTimeoutEnabled: o.ContextTimeoutEnabled,
 
 		ReadBufferSize:  o.ReadBufferSize,
 		WriteBufferSize: o.WriteBufferSize,
 
-		PoolFIFO:        o.PoolFIFO,
-		PoolSize:        o.PoolSize,
-		PoolTimeout:     o.PoolTimeout,
-		MinIdleConns:    o.MinIdleConns,
-		MaxIdleConns:    o.MaxIdleConns,
-		MaxActiveConns:  o.MaxActiveConns,
-		ConnMaxIdleTime: o.ConnMaxIdleTime,
-		ConnMaxLifetime: o.ConnMaxLifetime,
+		PoolFIFO:           o.PoolFIFO,
+		PoolSize:           o.PoolSize,
+		MaxConcurrentDials: o.MaxConcurrentDials,
+		PoolTimeout:        o.PoolTimeout,
+		MinIdleConns:       o.MinIdleConns,
+		MaxIdleConns:       o.MaxIdleConns,
+		MaxActiveConns:     o.MaxActiveConns,
+		ConnMaxIdleTime:    o.ConnMaxIdleTime,
+		ConnMaxLifetime:    o.ConnMaxLifetime,
 
 		TLSConfig: o.TLSConfig,
 
 		ReplicaOnly: o.ReadOnly,
 
-		DisableIdentity:  o.DisableIdentity,
-		DisableIndentity: o.DisableIndentity,
-		IdentitySuffix:   o.IdentitySuffix,
-		UnstableResp3:    o.UnstableResp3,
+		DisableIdentity:           o.DisableIdentity,
+		DisableIndentity:          o.DisableIndentity,
+		IdentitySuffix:            o.IdentitySuffix,
+		UnstableResp3:             o.UnstableResp3,
+		PushNotificationProcessor: o.PushNotificationProcessor,
 		// Note: MaintNotificationsConfig not supported for FailoverOptions
 	}
 }
@@ -271,30 +301,35 @@ func (o *UniversalOptions) Simple() *Options {
 		MinRetryBackoff: o.MinRetryBackoff,
 		MaxRetryBackoff: o.MaxRetryBackoff,
 
-		DialTimeout:           o.DialTimeout,
-		ReadTimeout:           o.ReadTimeout,
-		WriteTimeout:          o.WriteTimeout,
+		DialTimeout:        o.DialTimeout,
+		DialerRetries:      o.DialerRetries,
+		DialerRetryTimeout: o.DialerRetryTimeout,
+		ReadTimeout:        o.ReadTimeout,
+		WriteTimeout:       o.WriteTimeout,
+
 		ContextTimeoutEnabled: o.ContextTimeoutEnabled,
 
 		ReadBufferSize:  o.ReadBufferSize,
 		WriteBufferSize: o.WriteBufferSize,
 
-		PoolFIFO:        o.PoolFIFO,
-		PoolSize:        o.PoolSize,
-		PoolTimeout:     o.PoolTimeout,
-		MinIdleConns:    o.MinIdleConns,
-		MaxIdleConns:    o.MaxIdleConns,
-		MaxActiveConns:  o.MaxActiveConns,
-		ConnMaxIdleTime: o.ConnMaxIdleTime,
-		ConnMaxLifetime: o.ConnMaxLifetime,
+		PoolFIFO:           o.PoolFIFO,
+		PoolSize:           o.PoolSize,
+		MaxConcurrentDials: o.MaxConcurrentDials,
+		PoolTimeout:        o.PoolTimeout,
+		MinIdleConns:       o.MinIdleConns,
+		MaxIdleConns:       o.MaxIdleConns,
+		MaxActiveConns:     o.MaxActiveConns,
+		ConnMaxIdleTime:    o.ConnMaxIdleTime,
+		ConnMaxLifetime:    o.ConnMaxLifetime,
 
 		TLSConfig: o.TLSConfig,
 
-		DisableIdentity:          o.DisableIdentity,
-		DisableIndentity:         o.DisableIndentity,
-		IdentitySuffix:           o.IdentitySuffix,
-		UnstableResp3:            o.UnstableResp3,
-		MaintNotificationsConfig: o.MaintNotificationsConfig,
+		DisableIdentity:           o.DisableIdentity,
+		DisableIndentity:          o.DisableIndentity,
+		IdentitySuffix:            o.IdentitySuffix,
+		UnstableResp3:             o.UnstableResp3,
+		PushNotificationProcessor: o.PushNotificationProcessor,
+		MaintNotificationsConfig:  o.MaintNotificationsConfig,
 	}
 }