From f3e91263a7c978c0ba1e6c9b57a24bc600b774d3 Mon Sep 17 00:00:00 2001
From: Nedyalko Dyakov <nedyalko.dyakov@gmail.com>
Date: Mon, 24 Nov 2025 17:17:22 +0200
Subject: [PATCH] lazy cluster topology reload

---
 commands_test.go               |  20 +++-
 osscluster.go                  |  59 ++++++++--
 osscluster_lazy_reload_test.go | 201 +++++++++++++++++++++++++++++++++
 3 files changed, 266 insertions(+), 14 deletions(-)
 create mode 100644 osscluster_lazy_reload_test.go

diff --git a/commands_test.go b/commands_test.go
index edbae4e7..24640c23 100644
--- a/commands_test.go
+++ b/commands_test.go
@@ -8905,27 +8905,37 @@ var _ = Describe("Commands", func() {
 			const key = "latency-monitor-threshold"
 
 			old := client.ConfigGet(ctx, key).Val()
-			client.ConfigSet(ctx, key, "1")
+			// Use a higher threshold (100ms) to avoid capturing normal operations
+			// that could cause flakiness due to timing variations
+			client.ConfigSet(ctx, key, "100")
 			defer client.ConfigSet(ctx, key, old[key])
 
 			result, err := client.Latency(ctx).Result()
 			Expect(err).NotTo(HaveOccurred())
 			Expect(len(result)).Should(Equal(0))
 
-			err = client.Do(ctx, "DEBUG", "SLEEP", 0.01).Err()
+			// Use a longer sleep (150ms) to ensure it exceeds the 100ms threshold
+			err = client.Do(ctx, "DEBUG", "SLEEP", 0.15).Err()
 			Expect(err).NotTo(HaveOccurred())
 
 			result, err = client.Latency(ctx).Result()
 			Expect(err).NotTo(HaveOccurred())
-			Expect(len(result)).Should(Equal(1))
+			Expect(len(result)).Should(BeNumerically(">=", 1))
 
 			// reset latency by event name
-			err = client.LatencyReset(ctx, result[0].Name).Err()
+			eventName := result[0].Name
+			err = client.LatencyReset(ctx, eventName).Err()
 			Expect(err).NotTo(HaveOccurred())
 
+			// Verify the specific event was reset (not that all events are gone)
+			// This avoids flakiness from other operations triggering latency events
 			result, err = client.Latency(ctx).Result()
 			Expect(err).NotTo(HaveOccurred())
-			Expect(len(result)).Should(Equal(0))
+			for _, event := range result {
+				if event.Name == eventName {
+					Fail("Event " + eventName + " should have been reset")
+				}
+			}
 		})
 	})
 })
diff --git a/osscluster.go b/osscluster.go
index 7925d2c6..768b665a 100644
--- a/osscluster.go
+++ b/osscluster.go
@@ -146,7 +146,8 @@ type ClusterOptions struct {
 	// cluster upgrade notifications gracefully and manage connection/pool state
 	// transitions seamlessly. Requires Protocol: 3 (RESP3) for push notifications.
 	// If nil, maintnotifications upgrades are in "auto" mode and will be enabled if the server supports it.
-	// The ClusterClient does not directly work with maintnotifications, it is up to the clients in the Nodes map to work with maintnotifications.
+	// The ClusterClient supports SMIGRATING and SMIGRATED notifications for cluster state management.
+	// Individual node clients handle other maintenance notifications (MOVING, MIGRATING, etc.).
 	MaintNotificationsConfig *maintnotifications.Config
 }
 
@@ -945,8 +946,9 @@ func (c *clusterState) slotNodes(slot int) []*clusterNode {
 type clusterStateHolder struct {
 	load func(ctx context.Context) (*clusterState, error)
 
-	state     atomic.Value
-	reloading uint32 // atomic
+	state         atomic.Value
+	reloading     uint32 // atomic
+	reloadPending uint32 // atomic - set to 1 when reload is requested during active reload
 }
 
 func newClusterStateHolder(fn func(ctx context.Context) (*clusterState, error)) *clusterStateHolder {
@@ -965,17 +967,36 @@ func (c *clusterStateHolder) Reload(ctx context.Context) (*clusterState, error)
 }
 
 func (c *clusterStateHolder) LazyReload() {
+	// If already reloading, mark that another reload is pending
 	if !atomic.CompareAndSwapUint32(&c.reloading, 0, 1) {
+		atomic.StoreUint32(&c.reloadPending, 1)
 		return
 	}
-	go func() {
-		defer atomic.StoreUint32(&c.reloading, 0)
 
-		_, err := c.Reload(context.Background())
-		if err != nil {
-			return
+	go func() {
+		for {
+			_, err := c.Reload(context.Background())
+			if err != nil {
+				atomic.StoreUint32(&c.reloading, 0)
+				return
+			}
+
+			// Clear pending flag after reload completes, before cooldown
+			// This captures notifications that arrived during the reload
+			atomic.StoreUint32(&c.reloadPending, 0)
+
+			// Wait cooldown period
+			time.Sleep(200 * time.Millisecond)
+
+			// Check if another reload was requested during cooldown
+			if atomic.LoadUint32(&c.reloadPending) == 0 {
+				// No pending reload, we're done
+				atomic.StoreUint32(&c.reloading, 0)
+				return
+			}
+
+			// Pending reload requested, loop to reload again
 		}
-		time.Sleep(200 * time.Millisecond)
 	}()
 }
 
@@ -1038,6 +1059,26 @@ func NewClusterClient(opt *ClusterOptions) *ClusterClient {
 		txPipeline: c.processTxPipeline,
 	})
 
+	// Set up SMIGRATED notification handling for cluster state reload
+	// When a node client receives a SMIGRATED notification, it should trigger
+	// cluster state reload on the parent ClusterClient
+	if opt.MaintNotificationsConfig != nil {
+		c.nodes.OnNewNode(func(nodeClient *Client) {
+			manager := nodeClient.GetMaintNotificationsManager()
+			if manager != nil {
+				manager.SetClusterStateReloadCallback(func(ctx context.Context, hostPort string, slotRanges []string) {
+					// Log the migration details for now
+					if internal.LogLevel.InfoOrAbove() {
+						internal.Logger.Printf(ctx, "cluster: slots %v migrated to %s, reloading cluster state", slotRanges, hostPort)
+					}
+					// Currently we reload the entire cluster state
+					// In the future, this could be optimized to reload only the specific slots
+					c.state.LazyReload()
+				})
+			}
+		})
+	}
+
 	return c
 }
 
diff --git a/osscluster_lazy_reload_test.go b/osscluster_lazy_reload_test.go
new file mode 100644
index 00000000..f66bb424
--- /dev/null
+++ b/osscluster_lazy_reload_test.go
@@ -0,0 +1,201 @@
+package redis
+
+import (
+	"context"
+	"sync/atomic"
+	"testing"
+	"time"
+)
+
+// TestLazyReloadQueueBehavior tests that LazyReload properly queues reload requests
+func TestLazyReloadQueueBehavior(t *testing.T) {
+	t.Run("SingleReload", func(t *testing.T) {
+		var reloadCount atomic.Int32
+
+		holder := newClusterStateHolder(func(ctx context.Context) (*clusterState, error) {
+			reloadCount.Add(1)
+			time.Sleep(50 * time.Millisecond) // Simulate reload work
+			return &clusterState{}, nil
+		})
+
+		// Trigger one reload
+		holder.LazyReload()
+
+		// Wait for reload to complete
+		time.Sleep(300 * time.Millisecond)
+
+		if count := reloadCount.Load(); count != 1 {
+			t.Errorf("Expected 1 reload, got %d", count)
+		}
+	})
+
+	t.Run("ConcurrentReloadsDeduplication", func(t *testing.T) {
+		var reloadCount atomic.Int32
+
+		holder := newClusterStateHolder(func(ctx context.Context) (*clusterState, error) {
+			reloadCount.Add(1)
+			time.Sleep(50 * time.Millisecond) // Simulate reload work
+			return &clusterState{}, nil
+		})
+
+		// Trigger multiple reloads concurrently
+		for i := 0; i < 10; i++ {
+			go holder.LazyReload()
+		}
+
+		// Wait for all to complete
+		time.Sleep(100 * time.Millisecond)
+
+		// Should only reload once (all concurrent calls deduplicated)
+		if count := reloadCount.Load(); count != 1 {
+			t.Errorf("Expected 1 reload (deduplication), got %d", count)
+		}
+	})
+
+	t.Run("PendingReloadDuringCooldown", func(t *testing.T) {
+		var reloadCount atomic.Int32
+
+		holder := newClusterStateHolder(func(ctx context.Context) (*clusterState, error) {
+			reloadCount.Add(1)
+			time.Sleep(10 * time.Millisecond) // Simulate reload work
+			return &clusterState{}, nil
+		})
+
+		// Trigger first reload
+		holder.LazyReload()
+
+		// Wait for reload to complete but still in cooldown
+		time.Sleep(50 * time.Millisecond)
+
+		// Trigger second reload during cooldown period
+		holder.LazyReload()
+
+		// Wait for second reload to complete
+		time.Sleep(300 * time.Millisecond)
+
+		// Should have reloaded twice (second request queued and executed)
+		if count := reloadCount.Load(); count != 2 {
+			t.Errorf("Expected 2 reloads (queued during cooldown), got %d", count)
+		}
+	})
+
+	t.Run("MultiplePendingReloadsCollapsed", func(t *testing.T) {
+		var reloadCount atomic.Int32
+
+		holder := newClusterStateHolder(func(ctx context.Context) (*clusterState, error) {
+			reloadCount.Add(1)
+			time.Sleep(10 * time.Millisecond) // Simulate reload work
+			return &clusterState{}, nil
+		})
+
+		// Trigger first reload
+		holder.LazyReload()
+
+		// Wait for reload to start
+		time.Sleep(5 * time.Millisecond)
+
+		// Trigger multiple reloads during active reload + cooldown
+		for i := 0; i < 10; i++ {
+			holder.LazyReload()
+			time.Sleep(5 * time.Millisecond)
+		}
+
+		// Wait for all to complete
+		time.Sleep(400 * time.Millisecond)
+
+		// Should have reloaded exactly twice:
+		// 1. Initial reload
+		// 2. One more reload for all the pending requests (collapsed into one)
+		if count := reloadCount.Load(); count != 2 {
+			t.Errorf("Expected 2 reloads (initial + collapsed pending), got %d", count)
+		}
+	})
+
+	t.Run("ReloadAfterCooldownPeriod", func(t *testing.T) {
+		var reloadCount atomic.Int32
+
+		holder := newClusterStateHolder(func(ctx context.Context) (*clusterState, error) {
+			reloadCount.Add(1)
+			time.Sleep(10 * time.Millisecond) // Simulate reload work
+			return &clusterState{}, nil
+		})
+
+		// Trigger first reload
+		holder.LazyReload()
+
+		// Wait for reload + cooldown to complete
+		time.Sleep(300 * time.Millisecond)
+
+		// Trigger second reload after cooldown
+		holder.LazyReload()
+
+		// Wait for second reload to complete
+		time.Sleep(300 * time.Millisecond)
+
+		// Should have reloaded twice (separate reload cycles)
+		if count := reloadCount.Load(); count != 2 {
+			t.Errorf("Expected 2 reloads (separate cycles), got %d", count)
+		}
+	})
+
+	t.Run("ErrorDuringReload", func(t *testing.T) {
+		var reloadCount atomic.Int32
+		var shouldFail atomic.Bool
+		shouldFail.Store(true)
+
+		holder := newClusterStateHolder(func(ctx context.Context) (*clusterState, error) {
+			reloadCount.Add(1)
+			if shouldFail.Load() {
+				return nil, context.DeadlineExceeded
+			}
+			return &clusterState{}, nil
+		})
+
+		// Trigger reload that will fail
+		holder.LazyReload()
+
+		// Wait for failed reload
+		time.Sleep(50 * time.Millisecond)
+
+		// Trigger another reload (should succeed now)
+		shouldFail.Store(false)
+		holder.LazyReload()
+
+		// Wait for successful reload
+		time.Sleep(300 * time.Millisecond)
+
+		// Should have attempted reload twice (first failed, second succeeded)
+		if count := reloadCount.Load(); count != 2 {
+			t.Errorf("Expected 2 reload attempts, got %d", count)
+		}
+	})
+
+	t.Run("CascadingSMigratedScenario", func(t *testing.T) {
+		// Simulate the real-world scenario: multiple SMIGRATED notifications
+		// arriving in quick succession from different node clients
+		var reloadCount atomic.Int32
+
+		holder := newClusterStateHolder(func(ctx context.Context) (*clusterState, error) {
+			reloadCount.Add(1)
+			time.Sleep(20 * time.Millisecond) // Simulate realistic reload time
+			return &clusterState{}, nil
+		})
+
+		// Simulate 5 SMIGRATED notifications arriving within 100ms
+		for i := 0; i < 5; i++ {
+			go holder.LazyReload()
+			time.Sleep(20 * time.Millisecond)
+		}
+
+		// Wait for all reloads to complete
+		time.Sleep(500 * time.Millisecond)
+
+		// Should reload at most 2 times:
+		// 1. First notification triggers reload
+		// 2. Notifications 2-5 collapse into one pending reload
+		count := reloadCount.Load()
+		if count < 1 || count > 2 {
+			t.Errorf("Expected 1-2 reloads for cascading scenario, got %d", count)
+		}
+	})
+}