1
0
mirror of https://github.com/redis/go-redis.git synced 2025-12-02 06:22:31 +03:00
Files
go-redis/example/cluster-state-machine/advanced.go
Nedyalko Dyakov b6d7cdbd84 chore(ci): Add redis 8.4-RC1-pre & examples (#3572)
* add disable maintnotifications example

* add 8.4-RC1-pre

* println -> printf for linter

* address jit comment

Fix broken initialization of idle connections

optimize push notif

wip

wip

wip

wip
2025-10-29 13:49:32 +02:00

531 lines
14 KiB
Go

package main
import (
"context"
"fmt"
"math/rand"
"sync"
"sync/atomic"
"time"
"github.com/redis/go-redis/v9"
"github.com/redis/go-redis/v9/maintnotifications"
)
// AdvancedMetrics extends basic metrics with more detailed tracking
type AdvancedMetrics struct {
Metrics
// Latency buckets (in microseconds)
latency0_1ms atomic.Int64 // 0-1ms
latency1_5ms atomic.Int64 // 1-5ms
latency5_10ms atomic.Int64 // 5-10ms
latency10_50ms atomic.Int64 // 10-50ms
latency50ms atomic.Int64 // >50ms
}
func (am *AdvancedMetrics) recordSuccess(latency time.Duration) {
am.Metrics.recordSuccess(latency)
// Record latency bucket
micros := latency.Microseconds()
switch {
case micros < 1000:
am.latency0_1ms.Add(1)
case micros < 5000:
am.latency1_5ms.Add(1)
case micros < 10000:
am.latency5_10ms.Add(1)
case micros < 50000:
am.latency10_50ms.Add(1)
default:
am.latency50ms.Add(1)
}
}
func (am *AdvancedMetrics) printDetailed() {
am.Metrics.print()
total := am.successOps.Load()
if total > 0 {
fmt.Printf("\n=== Latency Distribution ===\n")
fmt.Printf("0-1ms: %d (%.2f%%)\n", am.latency0_1ms.Load(), float64(am.latency0_1ms.Load())/float64(total)*100)
fmt.Printf("1-5ms: %d (%.2f%%)\n", am.latency1_5ms.Load(), float64(am.latency1_5ms.Load())/float64(total)*100)
fmt.Printf("5-10ms: %d (%.2f%%)\n", am.latency5_10ms.Load(), float64(am.latency5_10ms.Load())/float64(total)*100)
fmt.Printf("10-50ms: %d (%.2f%%)\n", am.latency10_50ms.Load(), float64(am.latency10_50ms.Load())/float64(total)*100)
fmt.Printf(">50ms: %d (%.2f%%)\n", am.latency50ms.Load(), float64(am.latency50ms.Load())/float64(total)*100)
}
}
// runAdvancedExample demonstrates advanced monitoring and potential issues
func runAdvancedExample() {
ctx := context.Background()
fmt.Println("\n=== Advanced State Machine Monitoring ===\n")
fmt.Println("This example includes detailed state machine monitoring")
fmt.Println("to help identify potential concurrency issues.\n")
// Test 1: Extreme concurrency with tiny pool
testExtremeContention(ctx)
// Test 2: Rapid acquire/release cycles
testRapidCycles(ctx)
// Test 3: Long-running operations
testLongRunningOps(ctx)
// Test 4: Concurrent pipelines
testConcurrentPipelines(ctx)
// Test 5: PubSub + Get/Set pool exhaustion
testPubSubWithGetSet()
}
func testExtremeContention(ctx context.Context) {
fmt.Println("Test 1: Extreme Contention")
fmt.Println("---------------------------")
fmt.Println("Pool size: 2, Goroutines: 200")
fmt.Println("This tests the state machine under extreme contention.\n")
client := redis.NewClusterClient(&redis.ClusterOptions{
Addrs: getRedisAddrs(),
MaintNotificationsConfig: &maintnotifications.Config{
Mode: maintnotifications.ModeDisabled,
},
PoolSize: 2, // Extremely small
PoolTimeout: 1 * time.Second,
})
defer client.Close()
metrics := &AdvancedMetrics{}
const numGoroutines = 200
const opsPerGoroutine = 10
start := time.Now()
var wg sync.WaitGroup
for g := 0; g < numGoroutines; g++ {
wg.Add(1)
go func(goroutineID int) {
defer wg.Done()
for i := 0; i < opsPerGoroutine; i++ {
key := fmt.Sprintf("extreme:%d:%d", goroutineID, i)
value := fmt.Sprintf("v%d", i)
opStart := time.Now()
err := client.Set(ctx, key, value, 0).Err()
latency := time.Since(opStart)
if err != nil {
if isPoolTimeout(err) {
metrics.recordPoolTimeout()
}
metrics.recordFailure()
} else {
metrics.recordSuccess(latency)
}
}
}(g)
}
wg.Wait()
elapsed := time.Since(start)
fmt.Printf("✓ Completed in %v\n", elapsed)
fmt.Printf(" Throughput: %.0f ops/sec\n", float64(numGoroutines*opsPerGoroutine)/elapsed.Seconds())
metrics.printDetailed()
printPoolStats(client.PoolStats())
fmt.Println()
}
func printPoolStats(stats *redis.PoolStats) {
fmt.Println("===== Pool Stats: =====")
fmt.Printf(" Hits: %d\n", stats.Hits)
fmt.Printf(" Misses: %d\n", stats.Misses)
fmt.Printf(" Timeouts: %d\n", stats.Timeouts)
fmt.Printf(" TotalConns: %d\n", stats.TotalConns)
fmt.Printf(" IdleConns: %d\n", stats.IdleConns)
fmt.Printf(" StaleConns: %d\n", stats.StaleConns)
fmt.Printf(" WaitCount: %d\n", stats.WaitCount)
fmt.Printf(" WaitDurationNs: %d\n", stats.WaitDurationNs)
fmt.Printf(" Unusable: %d\n", stats.Unusable)
fmt.Printf(" PubSubStats: %+v\n", stats.PubSubStats)
fmt.Println("===== End Pool Stats: =====")
}
func testRapidCycles(ctx context.Context) {
fmt.Println("Test 2: Rapid Acquire/Release Cycles")
fmt.Println("-------------------------------------")
fmt.Println("Testing rapid connection state transitions.\n")
client := redis.NewClusterClient(&redis.ClusterOptions{
Addrs: getRedisAddrs(),
MaintNotificationsConfig: &maintnotifications.Config{
Mode: maintnotifications.ModeDisabled,
},
PoolSize: 5,
MaxIdleConns: 1,
PoolTimeout: 2 * time.Second,
})
defer client.Close()
metrics := &AdvancedMetrics{}
const numGoroutines = 50
const opsPerGoroutine = 100
start := time.Now()
var wg sync.WaitGroup
for g := 0; g < numGoroutines; g++ {
wg.Add(1)
go func(goroutineID int) {
defer wg.Done()
for i := 0; i < opsPerGoroutine; i++ {
key := fmt.Sprintf("rapid:%d:%d", goroutineID, i)
value := "x"
opStart := time.Now()
err := client.Set(ctx, key, value, 0).Err()
latency := time.Since(opStart)
if err != nil {
if isPoolTimeout(err) {
metrics.recordPoolTimeout()
}
metrics.recordFailure()
} else {
metrics.recordSuccess(latency)
}
// No delay - rapid fire
}
}(g)
}
wg.Wait()
elapsed := time.Since(start)
fmt.Printf("✓ Completed in %v\n", elapsed)
fmt.Printf(" Throughput: %.0f ops/sec\n", float64(numGoroutines*opsPerGoroutine)/elapsed.Seconds())
metrics.printDetailed()
printPoolStats(client.PoolStats())
fmt.Println()
}
func testLongRunningOps(ctx context.Context) {
fmt.Println("Test 3: Long-Running Operations")
fmt.Println("--------------------------------")
fmt.Println("Testing connection holding with slow operations.\n")
client := redis.NewClusterClient(&redis.ClusterOptions{
Addrs: getRedisAddrs(),
MaintNotificationsConfig: &maintnotifications.Config{
Mode: maintnotifications.ModeDisabled,
},
PoolSize: 2,
MaxIdleConns: 1,
MaxActiveConns: 5,
PoolTimeout: 3 * time.Second,
})
defer client.Close()
metrics := &AdvancedMetrics{}
const numGoroutines = 100
const opsPerGoroutine = 200
start := time.Now()
var wg sync.WaitGroup
for g := 0; g < numGoroutines; g++ {
wg.Add(1)
go func(goroutineID int) {
defer wg.Done()
for i := 0; i < opsPerGoroutine; i++ {
key := fmt.Sprintf("slow:%d:%d", goroutineID, i)
value := fmt.Sprintf("data-%d", i)
opStart := time.Now()
// Simulate slow operation by doing multiple commands
pipe := client.Pipeline()
pipe.Set(ctx, key, value, 0)
pipe.Get(ctx, key)
pipe.Incr(ctx, fmt.Sprintf("counter:%d", goroutineID))
_, err := pipe.Exec(ctx)
latency := time.Since(opStart)
if err != nil {
if isPoolTimeout(err) {
metrics.recordPoolTimeout()
}
metrics.recordFailure()
} else {
metrics.recordSuccess(latency)
}
// Simulate processing time
time.Sleep(time.Millisecond * time.Duration(rand.Intn(20)))
}
}(g)
}
wg.Wait()
elapsed := time.Since(start)
fmt.Printf("✓ Completed in %v\n", elapsed)
fmt.Printf(" Throughput: %.0f ops/sec\n", float64(numGoroutines*opsPerGoroutine)/elapsed.Seconds())
metrics.printDetailed()
printPoolStats(client.PoolStats())
fmt.Println()
}
// testConcurrentPipelines tests pipeline operations under concurrency
func testConcurrentPipelines(ctx context.Context) {
fmt.Println("Test 4: Concurrent Pipelines")
fmt.Println("-----------------------------")
fmt.Println("Testing pipeline operations with connection state machine.\n")
client := redis.NewClusterClient(&redis.ClusterOptions{
Addrs: getRedisAddrs(),
MaintNotificationsConfig: &maintnotifications.Config{
Mode: maintnotifications.ModeDisabled,
},
PoolSize: 10,
MaxIdleConns: 5,
MinIdleConns: 5,
PoolTimeout: 5 * time.Second,
})
defer client.Close()
metrics := &AdvancedMetrics{}
const numGoroutines = 64
const pipelinesPerGoroutine = 100
const commandsPerPipeline = 100
start := time.Now()
var wg sync.WaitGroup
for g := 0; g < numGoroutines; g++ {
wg.Add(1)
go func(goroutineID int) {
defer wg.Done()
for i := 0; i < pipelinesPerGoroutine; i++ {
opStart := time.Now()
pipe := client.Pipeline()
for j := 0; j < commandsPerPipeline; j++ {
key := fmt.Sprintf("pipe:%d:%d:%d", goroutineID, i, j)
pipe.Set(ctx, key, j, 0)
}
_, err := pipe.Exec(ctx)
latency := time.Since(opStart)
if err != nil {
if isPoolTimeout(err) {
metrics.recordPoolTimeout()
}
metrics.recordFailure()
} else {
metrics.recordSuccess(latency)
}
}
}(g)
}
wg.Wait()
elapsed := time.Since(start)
totalCommands := numGoroutines * pipelinesPerGoroutine * commandsPerPipeline
fmt.Printf("✓ Completed %d commands in %d pipelines in %v\n", totalCommands, numGoroutines*pipelinesPerGoroutine, elapsed)
fmt.Printf(" Throughput: %.0f ops/sec\n", float64(totalCommands)/elapsed.Seconds())
metrics.printDetailed()
printPoolStats(client.PoolStats())
fmt.Println()
}
// testPubSubWithGetSet tests pool exhaustion with concurrent pub/sub and get/set operations
func testPubSubWithGetSet() {
fmt.Println("=== Test 5: PubSub + Get/Set Pool Exhaustion ===")
fmt.Println("Testing pool with 100 publishers, 10 subscribers (10 channels), and 100 get/set goroutines")
fmt.Println("Pool size: 100 connections")
fmt.Println()
ctx := context.Background()
// Create client with pool size 100
client := redis.NewClusterClient(&redis.ClusterOptions{
Addrs: getRedisAddrs(),
MaintNotificationsConfig: &maintnotifications.Config{
Mode: maintnotifications.ModeDisabled,
},
PoolSize: 100,
PoolTimeout: 5 * time.Second,
})
defer client.Close()
metrics := &AdvancedMetrics{}
const testDuration = 10 * time.Second
const numChannels = 10
const numPublishers = 100
const numSubscribers = 10
const numGetSetWorkers = 100
// Channel names
channels := make([]string, numChannels)
for i := 0; i < numChannels; i++ {
channels[i] = fmt.Sprintf("test-channel-%d", i)
}
start := time.Now()
var wg sync.WaitGroup
stopSignal := make(chan struct{})
// Track pub/sub specific metrics
var publishCount atomic.Int64
var receiveCount atomic.Int64
var subscribeErrors atomic.Int64
// Start subscribers (10 goroutines, each subscribing to all 10 channels)
for s := 0; s < numSubscribers; s++ {
wg.Add(1)
go func(subscriberID int) {
defer wg.Done()
// Create a dedicated pubsub connection
pubsub := client.Subscribe(ctx, channels...)
defer pubsub.Close()
// Wait for subscription confirmation
_, err := pubsub.Receive(ctx)
if err != nil {
subscribeErrors.Add(1)
fmt.Printf("Subscriber %d: failed to subscribe: %v\n", subscriberID, err)
return
}
// Receive messages until stop signal
ch := pubsub.Channel()
for {
select {
case <-stopSignal:
return
case msg := <-ch:
if msg != nil {
receiveCount.Add(1)
}
case <-time.After(100 * time.Millisecond):
// Timeout to check stop signal periodically
}
}
}(s)
}
// Give subscribers time to connect
time.Sleep(500 * time.Millisecond)
// Start publishers (100 goroutines)
for p := 0; p < numPublishers; p++ {
wg.Add(1)
go func(publisherID int) {
defer wg.Done()
for {
select {
case <-stopSignal:
return
default:
opStart := time.Now()
// Publish to a random channel
channelIdx := rand.Intn(numChannels)
message := fmt.Sprintf("msg-%d-%d", publisherID, time.Now().UnixNano())
err := client.Publish(ctx, channels[channelIdx], message).Err()
latency := time.Since(opStart)
if err != nil {
if isPoolTimeout(err) {
metrics.recordPoolTimeout()
}
metrics.recordFailure()
} else {
metrics.recordSuccess(latency)
publishCount.Add(1)
}
// Small delay to avoid overwhelming the system
time.Sleep(10 * time.Millisecond)
}
}
}(p)
}
// Start get/set workers (100 goroutines)
for w := 0; w < numGetSetWorkers; w++ {
wg.Add(1)
go func(workerID int) {
defer wg.Done()
for {
select {
case <-stopSignal:
return
default:
opStart := time.Now()
// Alternate between SET and GET
key := fmt.Sprintf("worker:%d:key", workerID)
var err error
if rand.Intn(2) == 0 {
err = client.Set(ctx, key, workerID, 0).Err()
} else {
err = client.Get(ctx, key).Err()
// Ignore key not found errors
if err == redis.Nil {
err = nil
}
}
latency := time.Since(opStart)
if err != nil {
if isPoolTimeout(err) {
metrics.recordPoolTimeout()
}
metrics.recordFailure()
} else {
metrics.recordSuccess(latency)
}
// Small delay to avoid overwhelming the system
time.Sleep(5 * time.Millisecond)
}
}
}(w)
}
// Run for specified duration
time.Sleep(testDuration)
close(stopSignal)
wg.Wait()
elapsed := time.Since(start)
fmt.Printf("✓ Test completed in %v\n", elapsed)
fmt.Printf(" Published: %d messages\n", publishCount.Load())
fmt.Printf(" Received: %d messages\n", receiveCount.Load())
fmt.Printf(" Subscribe errors: %d\n", subscribeErrors.Load())
fmt.Printf(" Get/Set operations: %d\n", metrics.successOps.Load())
fmt.Printf(" Total throughput: %.0f ops/sec\n", float64(metrics.successOps.Load())/elapsed.Seconds())
metrics.printDetailed()
printPoolStats(client.PoolStats())
fmt.Println()
}