1
0
mirror of https://github.com/redis/go-redis.git synced 2025-12-02 06:22:31 +03:00

chore(ci): Add redis 8.4-RC1-pre & examples (#3572)

* add disable maintnotifications example

* add 8.4-RC1-pre

* println -> printf for linter

* address jit comment

Fix broken initialization of idle connections

optimize push notif

wip

wip

wip

wip
This commit is contained in:
Nedyalko Dyakov
2025-10-28 15:47:39 +02:00
committed by Nedyalko Dyakov
parent 1510181ece
commit b6d7cdbd84
48 changed files with 4388 additions and 174 deletions

View File

@@ -18,22 +18,20 @@ runs:
- name: Setup Test environment
env:
REDIS_VERSION: ${{ inputs.redis-version }}
CLIENT_LIBS_TEST_IMAGE: "redislabs/client-libs-test:${{ inputs.redis-version }}"
run: |
set -e
redis_version_np=$(echo "$REDIS_VERSION" | grep -oP '^\d+.\d+')
# Mapping of redis version to redis testing containers
declare -A redis_version_mapping=(
["8.4.x"]="8.4-RC1-pre"
["8.2.x"]="8.2.1-pre"
["8.0.x"]="8.0.2"
["7.4.x"]="rs-7.4.0-v5"
["7.2.x"]="rs-7.2.0-v17"
)
if [[ -v redis_version_mapping[$REDIS_VERSION] ]]; then
echo "REDIS_VERSION=${redis_version_np}" >> $GITHUB_ENV
echo "REDIS_IMAGE=redis:${{ inputs.redis-version }}" >> $GITHUB_ENV
echo "REDIS_IMAGE=redis:${REDIS_VERSION}" >> $GITHUB_ENV
echo "CLIENT_LIBS_TEST_IMAGE=redislabs/client-libs-test:${redis_version_mapping[$REDIS_VERSION]}" >> $GITHUB_ENV
else
echo "Version not found in the mapping."

View File

@@ -2,7 +2,7 @@ name: Go
on:
push:
branches: [master, v9, v9.7, v9.8, 'ndyakov/*', 'ofekshenawa/*', 'htemelski-redis/*', 'ce/*']
branches: [master, v9, 'v9.*']
pull_request:
branches: [master, v9, v9.7, v9.8, 'ndyakov/*', 'ofekshenawa/*', 'htemelski-redis/*', 'ce/*']
@@ -18,9 +18,9 @@ jobs:
fail-fast: false
matrix:
redis-version:
- "8.4.x" # Redis CE 8.4
- "8.2.x" # Redis CE 8.2
- "8.0.x" # Redis CE 8.0
- "7.4.x" # Redis stack 7.4
go-version:
- "1.23.x"
- "1.24.x"
@@ -44,9 +44,9 @@ jobs:
# Mapping of redis version to redis testing containers
declare -A redis_version_mapping=(
["8.4.x"]="8.4-RC1-pre"
["8.2.x"]="8.2.1-pre"
["8.0.x"]="8.0.2"
["7.4.x"]="rs-7.4.0-v5"
)
if [[ -v redis_version_mapping[$REDIS_VERSION] ]]; then
echo "REDIS_VERSION=${redis_version_np}" >> $GITHUB_ENV
@@ -74,10 +74,9 @@ jobs:
fail-fast: false
matrix:
redis-version:
- "8.4.x" # Redis CE 8.4
- "8.2.x" # Redis CE 8.2
- "8.0.x" # Redis CE 8.0
- "7.4.x" # Redis stack 7.4
- "7.2.x" # Redis stack 7.2
go-version:
- "1.23.x"
- "1.24.x"

View File

@@ -1,8 +1,8 @@
GO_MOD_DIRS := $(shell find . -type f -name 'go.mod' -exec dirname {} \; | sort)
REDIS_VERSION ?= 8.2
REDIS_VERSION ?= 8.4
RE_CLUSTER ?= false
RCE_DOCKER ?= true
CLIENT_LIBS_TEST_IMAGE ?= redislabs/client-libs-test:8.2.1-pre
CLIENT_LIBS_TEST_IMAGE ?= redislabs/client-libs-test:8.4-RC1-pre
docker.start:
export RE_CLUSTER=$(RE_CLUSTER) && \

View File

@@ -21,14 +21,46 @@ type AutoPipelineConfig struct {
// This prevents overwhelming the server with too many concurrent pipelines.
// Default: 10
MaxConcurrentBatches int
// UseRingBuffer enables the high-performance ring buffer queue.
// When enabled, uses a pre-allocated ring buffer with lock-free enqueue
// instead of the slice-based queue. This provides:
// - 6x faster enqueue operations
// - 100% reduction in allocations during enqueue
// - Better performance under high concurrency
// Default: true (enabled)
UseRingBuffer bool
// RingBufferSize is the size of the ring buffer queue.
// Only used when UseRingBuffer is true.
// Must be a power of 2 for optimal performance (will be rounded up if not).
// Default: 1024
RingBufferSize int
// MaxFlushDelay is the maximum delay after flushing before checking for more commands.
// A small delay (e.g., 100μs) can significantly reduce CPU usage by allowing
// more commands to batch together, at the cost of slightly higher latency.
//
// Trade-off:
// - 0 (default): Lowest latency, higher CPU usage
// - 100μs: Balanced (recommended for most workloads)
// - 500μs: Lower CPU usage, higher latency
//
// Based on benchmarks, 100μs can reduce CPU usage by 50%
// while adding only ~100μs average latency per command.
// Default: 0 (no delay)
MaxFlushDelay time.Duration
}
// DefaultAutoPipelineConfig returns the default autopipelining configuration.
func DefaultAutoPipelineConfig() *AutoPipelineConfig {
return &AutoPipelineConfig{
MaxBatchSize: 30,
FlushInterval: 10 * time.Microsecond,
MaxConcurrentBatches: 30,
MaxBatchSize: 50,
FlushInterval: time.Millisecond,
MaxConcurrentBatches: 10,
UseRingBuffer: true, // Enable ring buffer by default
RingBufferSize: 1024,
MaxFlushDelay: 0, // No delay by default (lowest latency)
}
}
@@ -74,14 +106,24 @@ func (c *autoPipelineCmd) String() string {
//
// This provides significant performance improvements for workloads with many
// concurrent small operations, as it reduces the number of network round-trips.
//
// AutoPipeliner implements the Cmdable interface, so you can use it like a regular client:
//
// ap := client.AutoPipeline()
// ap.Set(ctx, "key", "value", 0)
// ap.Get(ctx, "key")
// ap.Close()
type AutoPipeliner struct {
cmdable // Embed cmdable to get all Redis command methods
pipeliner pipelinerClient
config *AutoPipelineConfig
// Command queue - hybrid approach for best performance
// Command queue - either slice-based or ring buffer
mu sync.Mutex
queue []*queuedCmd
queueLen atomic.Int32 // Fast path check without lock
queue []*queuedCmd // Slice-based queue (legacy)
ring *autoPipelineRing // Ring buffer queue (high-performance)
queueLen atomic.Int32 // Fast path check without lock
// Flush control
flushCh chan struct{} // Signal to flush immediately
@@ -109,13 +151,22 @@ func NewAutoPipeliner(pipeliner pipelinerClient, config *AutoPipelineConfig) *Au
ap := &AutoPipeliner{
pipeliner: pipeliner,
config: config,
queue: make([]*queuedCmd, 0, config.MaxBatchSize),
flushCh: make(chan struct{}, 1),
sem: make(chan struct{}, config.MaxConcurrentBatches),
ctx: ctx,
cancel: cancel,
}
// Initialize cmdable to route all commands through Process
ap.cmdable = ap.Process
// Initialize queue based on configuration
if config.UseRingBuffer {
ap.ring = newAutoPipelineRing(config.RingBufferSize)
} else {
ap.queue = make([]*queuedCmd, 0, config.MaxBatchSize)
}
// Start background flusher
ap.wg.Add(1)
go ap.flusher()
@@ -140,6 +191,15 @@ func (ap *AutoPipeliner) Do(ctx context.Context, args ...interface{}) Cmder {
cmd.SetErr(ErrClosed)
return cmd
}
// Check if this is a blocking command (has read timeout set)
// Blocking commands like BLPOP, BRPOP, BZMPOP should not be autopipelined
if cmd.readTimeout() != nil {
// Execute blocking commands directly without autopipelining
_ = ap.pipeliner.Process(ctx, cmd)
return cmd
}
done := ap.process(ctx, cmd)
return &autoPipelineCmd{Cmder: cmd, done: done}
}
@@ -152,6 +212,13 @@ func (ap *AutoPipeliner) Do(ctx context.Context, args ...interface{}) Cmder {
//
// For sequential usage, use Do() instead.
func (ap *AutoPipeliner) Process(ctx context.Context, cmd Cmder) error {
// Check if this is a blocking command (has read timeout set)
// Blocking commands like BLPOP, BRPOP, BZMPOP should not be autopipelined
if cmd.readTimeout() != nil {
// Execute blocking commands directly without autopipelining
return ap.pipeliner.Process(ctx, cmd)
}
_ = ap.process(ctx, cmd)
return nil
}
@@ -165,6 +232,14 @@ func (ap *AutoPipeliner) process(ctx context.Context, cmd Cmder) <-chan struct{}
return closedCh
}
// Use ring buffer if enabled
if ap.config.UseRingBuffer {
done := ap.ring.putOne(cmd)
// putOne will signal the flusher via condition variable if needed
return done
}
// Legacy slice-based queue
// Create queued command with done channel
qc := &queuedCmd{
cmd: cmd,
@@ -176,16 +251,12 @@ func (ap *AutoPipeliner) process(ctx context.Context, cmd Cmder) <-chan struct{}
ap.queue = append(ap.queue, qc)
queueLen := len(ap.queue)
ap.queueLen.Store(int32(queueLen))
// Trigger immediate flush if batch is full
shouldFlush := queueLen >= ap.config.MaxBatchSize
ap.mu.Unlock()
if shouldFlush {
select {
case ap.flushCh <- struct{}{}:
default:
}
// Always signal the flusher (non-blocking)
select {
case ap.flushCh <- struct{}{}:
default:
}
return qc.done
}
@@ -195,16 +266,12 @@ func (ap *AutoPipeliner) process(ctx context.Context, cmd Cmder) <-chan struct{}
ap.queue = append(ap.queue, qc)
queueLen := len(ap.queue)
ap.queueLen.Store(int32(queueLen))
// Trigger immediate flush if batch is full
shouldFlush := queueLen >= ap.config.MaxBatchSize
ap.mu.Unlock()
if shouldFlush {
select {
case ap.flushCh <- struct{}{}:
default:
}
// Always signal the flusher (non-blocking)
select {
case ap.flushCh <- struct{}{}:
default:
}
return qc.done
@@ -241,6 +308,11 @@ func (ap *AutoPipeliner) Close() error {
// Cancel context to stop flusher
ap.cancel()
// Wake up flusher if it's waiting
if ap.config.UseRingBuffer {
ap.ring.wakeAll()
}
// Wait for flusher to finish
ap.wg.Wait()
@@ -251,76 +323,152 @@ func (ap *AutoPipeliner) Close() error {
func (ap *AutoPipeliner) flusher() {
defer ap.wg.Done()
// Adaptive delays:
// - Single command: flush almost immediately (1ns) to minimize latency
// - Multiple commands: wait a bit (10µs) to allow batching
const singleCmdDelay = 1 * time.Nanosecond
const batchDelay = 10 * time.Microsecond
if !ap.config.UseRingBuffer {
// Legacy slice-based flusher
ap.flusherSlice()
return
}
// Start with batch delay
timer := time.NewTimer(batchDelay)
defer timer.Stop()
currentDelay := batchDelay
// Ring buffer flusher
var (
cmds = make([]Cmder, 0, ap.config.MaxBatchSize)
doneChans = make([]chan struct{}, 0, ap.config.MaxBatchSize)
)
for {
// Try to get next command (non-blocking)
cmd, done := ap.ring.nextWriteCmd()
if cmd == nil {
// No command available
// If we have buffered commands, execute them first
if len(cmds) > 0 {
ap.executeBatch(cmds, doneChans)
cmds = cmds[:0]
doneChans = doneChans[:0]
}
// Check for shutdown before blocking
select {
case <-ap.ctx.Done():
return
default:
}
// Wait for next command (blocking)
// This will be woken up by wakeAll() during shutdown
cmd, done = ap.ring.waitForWrite()
// If nil, ring is closed
if cmd == nil {
return
}
}
// Add command to batch
cmds = append(cmds, cmd)
doneChans = append(doneChans, done)
// Execute batch if full
if len(cmds) >= ap.config.MaxBatchSize {
ap.executeBatch(cmds, doneChans)
cmds = cmds[:0]
doneChans = doneChans[:0]
}
}
}
// executeBatch executes a batch of commands.
func (ap *AutoPipeliner) executeBatch(cmds []Cmder, doneChans []chan struct{}) {
if len(cmds) == 0 {
return
}
// Acquire semaphore (limit concurrent batches)
select {
case ap.sem <- struct{}{}:
defer func() {
<-ap.sem
}()
case <-ap.ctx.Done():
// Context cancelled, set error on all commands and notify
for i, cmd := range cmds {
cmd.SetErr(ErrClosed)
doneChans[i] <- struct{}{} // Send signal instead of close
ap.ring.finishCmd()
}
return
}
// Fast path for single command
if len(cmds) == 1 {
_ = ap.pipeliner.Process(context.Background(), cmds[0])
doneChans[0] <- struct{}{} // Send signal instead of close
ap.ring.finishCmd()
return
}
// Execute pipeline for multiple commands
pipe := ap.pipeliner.Pipeline()
for _, cmd := range cmds {
_ = pipe.Process(context.Background(), cmd)
}
// Execute and wait for completion
_, _ = pipe.Exec(context.Background())
// Notify completion and finish slots
for _, done := range doneChans {
done <- struct{}{} // Send signal instead of close
ap.ring.finishCmd()
}
}
// flusherSlice is the legacy slice-based flusher.
func (ap *AutoPipeliner) flusherSlice() {
for {
// Wait for a command to arrive
select {
case <-ap.flushCh:
// Command arrived, continue
case <-ap.ctx.Done():
// Final flush before shutdown
ap.flushBatch()
ap.flushBatchSlice()
return
}
case <-ap.flushCh:
// Immediate flush requested (batch full)
if !timer.Stop() {
select {
case <-timer.C:
default:
}
// Drain any additional signals
for {
select {
case <-ap.flushCh:
default:
goto drained
}
ap.flushBatch()
}
drained:
// Reset timer based on remaining queue
qLen := ap.queueLen.Load()
if qLen == 1 {
currentDelay = singleCmdDelay
} else {
currentDelay = batchDelay
}
timer.Reset(currentDelay)
case <-timer.C:
qLen := ap.queueLen.Load()
if qLen > 0 {
ap.flushBatch()
// Flush all pending commands
for ap.Len() > 0 {
select {
case <-ap.ctx.Done():
ap.flushBatchSlice()
return
default:
}
// Adaptive delay based on queue size after flush
qLen = ap.queueLen.Load()
var nextDelay time.Duration
if qLen == 1 {
// Single command waiting - flush very quickly
nextDelay = singleCmdDelay
} else if qLen > 1 {
// Multiple commands - use batch delay to accumulate more
nextDelay = batchDelay
} else {
// Empty queue - use batch delay
nextDelay = batchDelay
}
ap.flushBatchSlice()
// Only reset timer if delay changed
if nextDelay != currentDelay {
currentDelay = nextDelay
timer.Reset(nextDelay)
} else {
timer.Reset(currentDelay)
if ap.config.MaxFlushDelay > 0 && ap.Len() > 0 {
time.Sleep(ap.config.MaxFlushDelay)
}
}
}
}
// flushBatch flushes the current batch of commands.
func (ap *AutoPipeliner) flushBatch() {
// flushBatchSlice flushes commands from the slice-based queue (legacy).
func (ap *AutoPipeliner) flushBatchSlice() {
// Get commands from queue
ap.mu.Lock()
if len(ap.queue) == 0 {
@@ -379,5 +527,66 @@ func (ap *AutoPipeliner) flushBatch() {
// Len returns the current number of queued commands.
func (ap *AutoPipeliner) Len() int {
if ap.config.UseRingBuffer {
return ap.ring.len()
}
return int(ap.queueLen.Load())
}
// Pipeline returns a new pipeline that uses the underlying pipeliner.
// This allows you to create a traditional pipeline from an autopipeliner.
func (ap *AutoPipeliner) Pipeline() Pipeliner {
return ap.pipeliner.Pipeline()
}
// AutoPipeline returns itself.
// This satisfies the Cmdable interface.
func (ap *AutoPipeliner) AutoPipeline() *AutoPipeliner {
return ap
}
// Pipelined executes a function in a pipeline context.
// This is a convenience method that creates a pipeline, executes the function,
// and returns the results.
func (ap *AutoPipeliner) Pipelined(ctx context.Context, fn func(Pipeliner) error) ([]Cmder, error) {
return ap.pipeliner.Pipeline().Pipelined(ctx, fn)
}
// TxPipelined executes a function in a transaction pipeline context.
// This is a convenience method that creates a transaction pipeline, executes the function,
// and returns the results.
//
// Note: This uses the underlying client's TxPipeline if available (Client, Ring, ClusterClient).
// For other clients, this will panic.
func (ap *AutoPipeliner) TxPipelined(ctx context.Context, fn func(Pipeliner) error) ([]Cmder, error) {
// Try to get TxPipeline from the underlying client
// This works for Client, Ring, and ClusterClient
type txPipeliner interface {
TxPipeline() Pipeliner
}
if txp, ok := ap.pipeliner.(txPipeliner); ok {
return txp.TxPipeline().Pipelined(ctx, fn)
}
panic("redis: TxPipelined not supported by this client type")
}
// TxPipeline returns a new transaction pipeline that uses the underlying pipeliner.
// This allows you to create a traditional transaction pipeline from an autopipeliner.
//
// Note: This uses the underlying client's TxPipeline if available (Client, Ring, ClusterClient).
// For other clients, this will panic.
func (ap *AutoPipeliner) TxPipeline() Pipeliner {
// Try to get TxPipeline from the underlying client
// This works for Client, Ring, and ClusterClient
type txPipeliner interface {
TxPipeline() Pipeliner
}
if txp, ok := ap.pipeliner.(txPipeliner); ok {
return txp.TxPipeline()
}
panic("redis: TxPipeline not supported by this client type")
}

View File

@@ -42,20 +42,20 @@ func BenchmarkManualPipeline(b *testing.B) {
const batchSize = 100
b.ResetTimer()
for i := 0; i < b.N; i += batchSize {
pipe := client.Pipeline()
end := i + batchSize
if end > b.N {
end = b.N
}
for j := i; j < end; j++ {
key := fmt.Sprintf("key%d", j)
pipe.Set(ctx, key, j, 0)
}
if _, err := pipe.Exec(ctx); err != nil {
b.Fatal(err)
}
@@ -87,7 +87,7 @@ func BenchmarkAutoPipeline(b *testing.B) {
i++
}
})
b.StopTimer()
// Wait for final flush
time.Sleep(50 * time.Millisecond)
@@ -165,7 +165,7 @@ func BenchmarkConcurrentAutoPipeline(b *testing.B) {
defer ap.Close()
b.ResetTimer()
var wg sync.WaitGroup
commandsPerGoroutine := b.N / bm.goroutines
if commandsPerGoroutine == 0 {
@@ -183,7 +183,7 @@ func BenchmarkConcurrentAutoPipeline(b *testing.B) {
}(g)
}
wg.Wait()
b.StopTimer()
time.Sleep(50 * time.Millisecond)
})
@@ -215,7 +215,7 @@ func BenchmarkAutoPipelineBatchSizes(b *testing.B) {
key := fmt.Sprintf("key%d", i)
ap.Do(ctx, "SET", key, i)
}
b.StopTimer()
time.Sleep(50 * time.Millisecond)
})
@@ -252,7 +252,7 @@ func BenchmarkAutoPipelineFlushIntervals(b *testing.B) {
key := fmt.Sprintf("key%d", i)
ap.Do(ctx, "SET", key, i)
}
b.StopTimer()
time.Sleep(100 * time.Millisecond)
})
@@ -272,12 +272,12 @@ func BenchmarkThroughput(b *testing.B) {
defer client.Close()
b.ResetTimer()
var wg sync.WaitGroup
var count int64
deadline := time.Now().Add(duration)
wg.Add(numGoroutines)
for g := 0; g < numGoroutines; g++ {
go func(goroutineID int) {
@@ -295,7 +295,7 @@ func BenchmarkThroughput(b *testing.B) {
}(g)
}
wg.Wait()
b.ReportMetric(float64(count)/duration.Seconds(), "ops/sec")
})
@@ -311,12 +311,12 @@ func BenchmarkThroughput(b *testing.B) {
defer ap.Close()
b.ResetTimer()
var wg sync.WaitGroup
var count int64
deadline := time.Now().Add(duration)
wg.Add(numGoroutines)
for g := 0; g < numGoroutines; g++ {
go func(goroutineID int) {
@@ -331,11 +331,200 @@ func BenchmarkThroughput(b *testing.B) {
}(g)
}
wg.Wait()
b.StopTimer()
time.Sleep(100 * time.Millisecond)
b.ReportMetric(float64(count)/duration.Seconds(), "ops/sec")
})
}
// BenchmarkRingBufferVsSliceQueue compares ring buffer with legacy slice queue
func BenchmarkRingBufferVsSliceQueue(b *testing.B) {
b.Run("RingBuffer", func(b *testing.B) {
ctx := context.Background()
client := redis.NewClient(&redis.Options{
Addr: ":6379",
AutoPipelineConfig: &redis.AutoPipelineConfig{
MaxBatchSize: 50,
FlushInterval: time.Millisecond,
MaxConcurrentBatches: 10,
UseRingBuffer: true,
RingBufferSize: 1024,
},
})
defer client.Close()
ap := client.AutoPipeline()
defer ap.Close()
b.ResetTimer()
b.ReportAllocs()
b.RunParallel(func(pb *testing.PB) {
i := 0
for pb.Next() {
key := fmt.Sprintf("key%d", i)
ap.Do(ctx, "SET", key, i)
i++
}
})
})
b.Run("SliceQueue", func(b *testing.B) {
ctx := context.Background()
client := redis.NewClient(&redis.Options{
Addr: ":6379",
AutoPipelineConfig: &redis.AutoPipelineConfig{
MaxBatchSize: 50,
FlushInterval: time.Millisecond,
MaxConcurrentBatches: 10,
UseRingBuffer: false, // Use legacy slice queue
},
})
defer client.Close()
ap := client.AutoPipeline()
defer ap.Close()
b.ResetTimer()
b.ReportAllocs()
b.RunParallel(func(pb *testing.PB) {
i := 0
for pb.Next() {
key := fmt.Sprintf("key%d", i)
ap.Do(ctx, "SET", key, i)
i++
}
})
})
}
// BenchmarkMaxFlushDelay benchmarks different MaxFlushDelay values
func BenchmarkMaxFlushDelay(b *testing.B) {
delays := []time.Duration{
0,
50 * time.Microsecond,
100 * time.Microsecond,
200 * time.Microsecond,
}
for _, delay := range delays {
b.Run(fmt.Sprintf("delay_%dus", delay.Microseconds()), func(b *testing.B) {
ctx := context.Background()
client := redis.NewClient(&redis.Options{
Addr: ":6379",
AutoPipelineConfig: &redis.AutoPipelineConfig{
MaxBatchSize: 50,
FlushInterval: time.Millisecond,
MaxConcurrentBatches: 10,
UseRingBuffer: true,
RingBufferSize: 1024,
MaxFlushDelay: delay,
},
})
defer client.Close()
ap := client.AutoPipeline()
defer ap.Close()
b.ResetTimer()
b.ReportAllocs()
b.RunParallel(func(pb *testing.PB) {
i := 0
for pb.Next() {
key := fmt.Sprintf("key%d", i)
ap.Do(ctx, "SET", key, i)
i++
}
})
})
}
}
// BenchmarkBufferSizes benchmarks different buffer sizes
func BenchmarkBufferSizes(b *testing.B) {
bufferSizes := []int{
32 * 1024, // 32 KiB
64 * 1024, // 64 KiB (default)
128 * 1024, // 128 KiB
256 * 1024, // 256 KiB
512 * 1024, // 512 KiB
}
for _, size := range bufferSizes {
b.Run(fmt.Sprintf("buffer_%dKiB", size/1024), func(b *testing.B) {
ctx := context.Background()
client := redis.NewClient(&redis.Options{
Addr: ":6379",
ReadBufferSize: size,
WriteBufferSize: size,
AutoPipelineConfig: &redis.AutoPipelineConfig{
MaxBatchSize: 50,
FlushInterval: time.Millisecond,
MaxConcurrentBatches: 10,
UseRingBuffer: true,
RingBufferSize: 1024,
},
})
defer client.Close()
ap := client.AutoPipeline()
defer ap.Close()
b.ResetTimer()
b.ReportAllocs()
b.RunParallel(func(pb *testing.PB) {
i := 0
for pb.Next() {
key := fmt.Sprintf("key%d", i)
ap.Do(ctx, "SET", key, i)
i++
}
})
})
}
}
// BenchmarkRingBufferSizes benchmarks different ring buffer sizes
func BenchmarkRingBufferSizes(b *testing.B) {
ringSizes := []int{
256,
512,
1024, // default
2048,
4096,
}
for _, size := range ringSizes {
b.Run(fmt.Sprintf("ring_%d", size), func(b *testing.B) {
ctx := context.Background()
client := redis.NewClient(&redis.Options{
Addr: ":6379",
AutoPipelineConfig: &redis.AutoPipelineConfig{
MaxBatchSize: 50,
FlushInterval: time.Millisecond,
MaxConcurrentBatches: 10,
UseRingBuffer: true,
RingBufferSize: size,
},
})
defer client.Close()
ap := client.AutoPipeline()
defer ap.Close()
b.ResetTimer()
b.ReportAllocs()
b.RunParallel(func(pb *testing.PB) {
i := 0
for pb.Next() {
key := fmt.Sprintf("key%d", i)
ap.Do(ctx, "SET", key, i)
i++
}
})
})
}
}

View File

@@ -0,0 +1,74 @@
package redis_test
import (
"context"
"time"
"github.com/redis/go-redis/v9"
. "github.com/bsm/ginkgo/v2"
. "github.com/bsm/gomega"
)
var _ = Describe("AutoPipeline Blocking Commands", func() {
ctx := context.Background()
var client *redis.Client
var ap *redis.AutoPipeliner
BeforeEach(func() {
client = redis.NewClient(&redis.Options{
Addr: redisAddr,
})
Expect(client.FlushDB(ctx).Err()).NotTo(HaveOccurred())
ap = client.AutoPipeline()
})
AfterEach(func() {
if ap != nil {
Expect(ap.Close()).NotTo(HaveOccurred())
}
Expect(client.Close()).NotTo(HaveOccurred())
})
It("should not autopipeline blocking commands", func() {
// Push a value to the list
Expect(client.RPush(ctx, "list", "value").Err()).NotTo(HaveOccurred())
// BLPOP should execute immediately without autopipelining
start := time.Now()
result := ap.Do(ctx, "BLPOP", "list", "1")
val, err := result.(*redis.StringSliceCmd).Result()
elapsed := time.Since(start)
Expect(err).NotTo(HaveOccurred())
Expect(val).To(Equal([]string{"list", "value"}))
// Should complete quickly since value is available
Expect(elapsed).To(BeNumerically("<", 100*time.Millisecond))
})
It("should mix blocking and non-blocking commands", func() {
// Push values
Expect(client.RPush(ctx, "list3", "a", "b", "c").Err()).NotTo(HaveOccurred())
Expect(client.Set(ctx, "key1", "value1", 0).Err()).NotTo(HaveOccurred())
// Mix blocking and non-blocking commands
blpopCmd := ap.Do(ctx, "BLPOP", "list3", "1")
getCmd := ap.Do(ctx, "GET", "key1")
brpopCmd := ap.Do(ctx, "BRPOP", "list3", "1")
// Get results
blpopVal, err := blpopCmd.(*redis.StringSliceCmd).Result()
Expect(err).NotTo(HaveOccurred())
Expect(blpopVal).To(Equal([]string{"list3", "a"}))
getVal, err := getCmd.(*redis.StringCmd).Result()
Expect(err).NotTo(HaveOccurred())
Expect(getVal).To(Equal("value1"))
brpopVal, err := brpopCmd.(*redis.StringSliceCmd).Result()
Expect(err).NotTo(HaveOccurred())
Expect(brpopVal).To(Equal([]string{"list3", "c"}))
})
})

View File

@@ -0,0 +1,198 @@
package redis_test
import (
"context"
"time"
"github.com/redis/go-redis/v9"
. "github.com/bsm/ginkgo/v2"
. "github.com/bsm/gomega"
)
var _ = Describe("AutoPipeline Cmdable Interface", func() {
ctx := context.Background()
var client *redis.Client
var ap *redis.AutoPipeliner
BeforeEach(func() {
client = redis.NewClient(&redis.Options{
Addr: redisAddr,
})
Expect(client.FlushDB(ctx).Err()).NotTo(HaveOccurred())
ap = client.AutoPipeline()
})
AfterEach(func() {
if ap != nil {
Expect(ap.Close()).NotTo(HaveOccurred())
}
Expect(client.Close()).NotTo(HaveOccurred())
})
It("should support string commands", func() {
// Use autopipeline like a regular client
setCmd := ap.Set(ctx, "key1", "value1", 0)
getCmd := ap.Get(ctx, "key1")
incrCmd := ap.Incr(ctx, "counter")
decrCmd := ap.Decr(ctx, "counter")
// Get results
Expect(setCmd.Err()).NotTo(HaveOccurred())
Expect(setCmd.Val()).To(Equal("OK"))
val, err := getCmd.Result()
Expect(err).NotTo(HaveOccurred())
Expect(val).To(Equal("value1"))
Expect(incrCmd.Val()).To(Equal(int64(1)))
Expect(decrCmd.Val()).To(Equal(int64(0)))
})
It("should support hash commands", func() {
// Use hash commands
hsetCmd := ap.HSet(ctx, "hash1", "field1", "value1", "field2", "value2")
hgetCmd := ap.HGet(ctx, "hash1", "field1")
hgetallCmd := ap.HGetAll(ctx, "hash1")
// Get results
Expect(hsetCmd.Val()).To(Equal(int64(2)))
Expect(hgetCmd.Val()).To(Equal("value1"))
Expect(hgetallCmd.Val()).To(Equal(map[string]string{
"field1": "value1",
"field2": "value2",
}))
})
It("should support list commands", func() {
// Use list commands
rpushCmd := ap.RPush(ctx, "list1", "a", "b", "c")
lrangeCmd := ap.LRange(ctx, "list1", 0, -1)
lpopCmd := ap.LPop(ctx, "list1")
// Get results
Expect(rpushCmd.Val()).To(Equal(int64(3)))
Expect(lrangeCmd.Val()).To(Equal([]string{"a", "b", "c"}))
Expect(lpopCmd.Val()).To(Equal("a"))
})
It("should support set commands", func() {
// Use set commands
saddCmd := ap.SAdd(ctx, "set1", "member1", "member2", "member3")
smembersCmd := ap.SMembers(ctx, "set1")
sismemberCmd := ap.SIsMember(ctx, "set1", "member1")
// Get results
Expect(saddCmd.Val()).To(Equal(int64(3)))
Expect(smembersCmd.Val()).To(ConsistOf("member1", "member2", "member3"))
Expect(sismemberCmd.Val()).To(BeTrue())
})
It("should support sorted set commands", func() {
// Use sorted set commands
zaddCmd := ap.ZAdd(ctx, "zset1",
redis.Z{Score: 1, Member: "one"},
redis.Z{Score: 2, Member: "two"},
redis.Z{Score: 3, Member: "three"},
)
zrangeCmd := ap.ZRange(ctx, "zset1", 0, -1)
zscoreCmd := ap.ZScore(ctx, "zset1", "two")
// Get results
Expect(zaddCmd.Val()).To(Equal(int64(3)))
Expect(zrangeCmd.Val()).To(Equal([]string{"one", "two", "three"}))
Expect(zscoreCmd.Val()).To(Equal(float64(2)))
})
It("should support generic commands", func() {
// Set some keys
ap.Set(ctx, "key1", "value1", 0)
ap.Set(ctx, "key2", "value2", 0)
ap.Set(ctx, "key3", "value3", 0)
// Use generic commands
existsCmd := ap.Exists(ctx, "key1", "key2", "key3")
delCmd := ap.Del(ctx, "key1")
ttlCmd := ap.TTL(ctx, "key2")
// Get results
Expect(existsCmd.Val()).To(Equal(int64(3)))
Expect(delCmd.Val()).To(Equal(int64(1)))
Expect(ttlCmd.Val()).To(Equal(time.Duration(-1))) // No expiration
})
It("should support Do method for custom commands", func() {
// Use Do for custom commands
setCmd := ap.Do(ctx, "SET", "custom_key", "custom_value")
getCmd := ap.Do(ctx, "GET", "custom_key")
// Get results
setVal, err := setCmd.(*redis.Cmd).Result()
Expect(err).NotTo(HaveOccurred())
Expect(setVal).To(Equal("OK"))
getVal, err := getCmd.(*redis.Cmd).Result()
Expect(err).NotTo(HaveOccurred())
Expect(getVal).To(Equal("custom_value"))
})
It("should support Pipeline method", func() {
// Get a traditional pipeline from autopipeliner
pipe := ap.Pipeline()
Expect(pipe).NotTo(BeNil())
// Use the pipeline
pipe.Set(ctx, "pipe_key", "pipe_value", 0)
pipe.Get(ctx, "pipe_key")
cmds, err := pipe.Exec(ctx)
Expect(err).NotTo(HaveOccurred())
Expect(cmds).To(HaveLen(2))
})
It("should support Pipelined method", func() {
// Use Pipelined for convenience
cmds, err := ap.Pipelined(ctx, func(pipe redis.Pipeliner) error {
pipe.Set(ctx, "pipelined_key", "pipelined_value", 0)
pipe.Get(ctx, "pipelined_key")
return nil
})
Expect(err).NotTo(HaveOccurred())
Expect(cmds).To(HaveLen(2))
Expect(cmds[0].(*redis.StatusCmd).Val()).To(Equal("OK"))
Expect(cmds[1].(*redis.StringCmd).Val()).To(Equal("pipelined_value"))
})
It("should support AutoPipeline method", func() {
// AutoPipeline should return itself
ap2 := ap.AutoPipeline()
Expect(ap2).To(Equal(ap))
})
It("should mix autopipelined and direct commands", func() {
// Use autopipeline commands
ap.Set(ctx, "ap_key1", "ap_value1", 0)
ap.Set(ctx, "ap_key2", "ap_value2", 0)
// Use traditional pipeline
pipe := ap.Pipeline()
pipe.Set(ctx, "pipe_key1", "pipe_value1", 0)
pipe.Set(ctx, "pipe_key2", "pipe_value2", 0)
_, err := pipe.Exec(ctx)
Expect(err).NotTo(HaveOccurred())
// Verify all keys exist
val1, _ := ap.Get(ctx, "ap_key1").Result()
val2, _ := ap.Get(ctx, "ap_key2").Result()
val3, _ := ap.Get(ctx, "pipe_key1").Result()
val4, _ := ap.Get(ctx, "pipe_key2").Result()
Expect(val1).To(Equal("ap_value1"))
Expect(val2).To(Equal("ap_value2"))
Expect(val3).To(Equal("pipe_value1"))
Expect(val4).To(Equal("pipe_value2"))
})
})

236
autopipeline_ring.go Normal file
View File

@@ -0,0 +1,236 @@
package redis
import (
"math/bits"
"sync"
"sync/atomic"
)
// autoPipelineRing is a pre-allocated ring buffer queue for autopipelining.
// It provides lock-free enqueue and FIFO ordering guarantees.
//
// Ring buffer architecture:
// - Pre-allocated slots (no allocations during enqueue)
// - Per-slot channels for request-response matching
// - Atomic write pointer for lock-free enqueue
// - Separate read pointers for write and read goroutines
//
// The ring buffer uses three pointers:
// - write: Where app goroutines add commands (atomic increment)
// - read1: Where flush goroutine reads commands to send
// - read2: Where result goroutine matches responses (currently unused, for future optimization)
type autoPipelineRing struct {
store []autoPipelineSlot // Pre-allocated slots
mask uint32 // Size - 1 (for fast modulo via bitwise AND)
write uint32 // Write position (atomic, incremented by app goroutines)
read1 uint32 // Read position for flush goroutine
read2 uint32 // Read position for result matching (reserved for future use)
cmds []Cmder // Persistent buffer for collecting commands (reused, no allocations)
doneChans []chan struct{} // Persistent buffer for collecting done channels (reused, no allocations)
}
// autoPipelineSlot represents a single command slot in the ring buffer.
type autoPipelineSlot struct {
c1 *sync.Cond // Condition variable for write synchronization (shared mutex with c2)
c2 *sync.Cond // Condition variable for wait/signal (shared mutex with c1)
cmd Cmder // The command to execute
done chan struct{} // Completion notification channel (pre-allocated, reused)
mark uint32 // State: 0=empty, 1=queued, 2=sent (atomic)
slept bool // Whether writer goroutine is sleeping on this slot
}
// State constants for autoPipelineSlot.mark
const (
apSlotEmpty uint32 = 0 // Slot is empty and available
apSlotQueued uint32 = 1 // Command queued, ready to be sent
apSlotSent uint32 = 2 // Command sent, waiting for response
apSlotClosed uint32 = 3 // Ring is closed, stop waiting
)
// newAutoPipelineRing creates a new ring buffer with the specified size.
// Size will be rounded up to the next power of 2 for efficient modulo operations.
func newAutoPipelineRing(size int) *autoPipelineRing {
// Round up to power of 2 for fast modulo via bitwise AND
if size <= 0 {
size = 1024 // Default size
}
if size&(size-1) != 0 {
// Not a power of 2, round up
size = 1 << (32 - bits.LeadingZeros32(uint32(size)))
}
r := &autoPipelineRing{
store: make([]autoPipelineSlot, size),
mask: uint32(size - 1),
cmds: make([]Cmder, 0, size), // Persistent buffer, reused
doneChans: make([]chan struct{}, 0, size), // Persistent buffer, reused
}
// Initialize each slot with condition variables and pre-allocated channel
for i := range r.store {
m := &sync.Mutex{}
r.store[i].c1 = sync.NewCond(m)
r.store[i].c2 = sync.NewCond(m) // Share the same mutex
r.store[i].done = make(chan struct{}, 1) // Buffered channel for signal (not close)
}
return r
}
// putOne enqueues a command into the ring buffer.
// Returns the done channel that will be signaled when the command completes.
//
// Ring buffer enqueue implementation:
// - Atomic increment for write position
// - Wait on condition variable if slot is full
// - Signal reader if it's sleeping
func (r *autoPipelineRing) putOne(cmd Cmder) <-chan struct{} {
// Atomic increment to get next slot
slot := &r.store[atomic.AddUint32(&r.write, 1)&r.mask]
// Lock the slot
slot.c1.L.Lock()
// Wait if slot is not empty (mark != 0)
for slot.mark != 0 {
slot.c1.Wait()
}
// Store command and mark as queued
slot.cmd = cmd
slot.mark = 1
s := slot.slept
slot.c1.L.Unlock()
// If reader is sleeping, wake it up
if s {
slot.c2.Broadcast()
}
return slot.done
}
// nextWriteCmd tries to get the next command (non-blocking).
// Returns nil if no command is available.
// Should only be called by the flush goroutine.
func (r *autoPipelineRing) nextWriteCmd() (Cmder, chan struct{}) {
r.read1++
p := r.read1 & r.mask
slot := &r.store[p]
slot.c1.L.Lock()
if slot.mark == 1 {
cmd := slot.cmd
done := slot.done
slot.mark = 2
slot.c1.L.Unlock()
return cmd, done
}
// No command available, rollback read position
r.read1--
slot.c1.L.Unlock()
return nil, nil
}
// waitForWrite waits for the next command (blocking).
// Should only be called by the flush goroutine.
// Returns nil if the ring is closed.
func (r *autoPipelineRing) waitForWrite() (Cmder, chan struct{}) {
r.read1++
p := r.read1 & r.mask
slot := &r.store[p]
slot.c1.L.Lock()
// Wait until command is available (mark == 1) or closed (mark == 3)
for slot.mark != 1 && slot.mark != apSlotClosed {
slot.slept = true
slot.c2.Wait() // c1 and c2 share the same mutex
slot.slept = false
}
// Check if closed
if slot.mark == apSlotClosed {
r.read1-- // Rollback read position
slot.c1.L.Unlock()
return nil, nil
}
cmd := slot.cmd
done := slot.done
slot.mark = 2
slot.c1.L.Unlock()
return cmd, done
}
// finishCmd marks a command as completed and clears the slot.
// Should only be called by the flush goroutine.
func (r *autoPipelineRing) finishCmd() {
r.read2++
p := r.read2 & r.mask
slot := &r.store[p]
slot.c1.L.Lock()
if slot.mark == 2 {
// Drain the done channel before reusing
select {
case <-slot.done:
default:
}
// Clear slot for reuse
slot.cmd = nil
slot.mark = 0
}
slot.c1.L.Unlock()
slot.c1.Signal() // Wake up any writer waiting on this slot
}
// len returns the approximate number of queued commands.
// This is an estimate and may not be exact due to concurrent access.
func (r *autoPipelineRing) len() int {
write := atomic.LoadUint32(&r.write)
read := atomic.LoadUint32(&r.read1)
// Handle wrap-around
if write >= read {
return int(write - read)
}
// Wrapped around
return int(write + (^uint32(0) - read) + 1)
}
// cap returns the capacity of the ring buffer.
func (r *autoPipelineRing) cap() int {
return len(r.store)
}
// reset resets the ring buffer to empty state.
// This should only be called when no goroutines are accessing the ring.
func (r *autoPipelineRing) reset() {
atomic.StoreUint32(&r.write, 0)
atomic.StoreUint32(&r.read1, 0)
atomic.StoreUint32(&r.read2, 0)
for i := range r.store {
r.store[i].c1.L.Lock()
r.store[i].cmd = nil
r.store[i].mark = 0
r.store[i].slept = false
r.store[i].c1.L.Unlock()
}
}
// wakeAll wakes up all waiting goroutines.
// This is used during shutdown to unblock the flusher.
func (r *autoPipelineRing) wakeAll() {
for i := range r.store {
r.store[i].c1.L.Lock()
if r.store[i].mark == 0 {
r.store[i].mark = apSlotClosed
}
r.store[i].c1.L.Unlock()
r.store[i].c2.Broadcast()
}
}

View File

@@ -446,3 +446,73 @@ func TestAutoPipelineConcurrency(t *testing.T) {
}
}
// TestAutoPipelineSingleCommandNoBlock verifies that single commands don't block
func TestAutoPipelineSingleCommandNoBlock(t *testing.T) {
ctx := context.Background()
client := redis.NewClient(&redis.Options{
Addr: ":6379",
AutoPipelineConfig: redis.DefaultAutoPipelineConfig(),
})
defer client.Close()
ap := client.AutoPipeline()
defer ap.Close()
start := time.Now()
cmd := ap.Do(ctx, "PING")
err := cmd.Err()
elapsed := time.Since(start)
if err != nil {
t.Fatalf("Command failed: %v", err)
}
// The command is wrapped in autoPipelineCmd, so we can't directly access Val()
// Just check that it completed without error
t.Logf("Command completed successfully")
// Single command should complete within 50ms (adaptive delay is 10ms)
if elapsed > 50*time.Millisecond {
t.Errorf("Single command took too long: %v (should be < 50ms)", elapsed)
}
t.Logf("Single command completed in %v", elapsed)
}
// TestAutoPipelineSequentialSingleThread verifies sequential single-threaded execution
func TestAutoPipelineSequentialSingleThread(t *testing.T) {
ctx := context.Background()
client := redis.NewClient(&redis.Options{
Addr: ":6379",
AutoPipelineConfig: redis.DefaultAutoPipelineConfig(),
})
defer client.Close()
ap := client.AutoPipeline()
defer ap.Close()
// Execute 10 commands sequentially in a single thread
start := time.Now()
for i := 0; i < 10; i++ {
key := fmt.Sprintf("test:key:%d", i)
t.Logf("Sending command %d", i)
cmd := ap.Do(ctx, "SET", key, i)
t.Logf("Waiting for command %d to complete", i)
err := cmd.Err()
if err != nil {
t.Fatalf("Command %d failed: %v", i, err)
}
t.Logf("Command %d completed", i)
}
elapsed := time.Since(start)
// Should complete reasonably fast (< 100ms for 10 commands)
if elapsed > 100*time.Millisecond {
t.Errorf("10 sequential commands took too long: %v (should be < 100ms)", elapsed)
}
t.Logf("10 sequential commands completed in %v (avg: %v per command)", elapsed, elapsed/10)
}

View File

@@ -256,6 +256,7 @@ var (
_ Cmdable = (*Ring)(nil)
_ Cmdable = (*ClusterClient)(nil)
_ Cmdable = (*Pipeline)(nil)
_ Cmdable = (*AutoPipeliner)(nil)
)
type cmdable func(ctx context.Context, cmd Cmder) error

View File

@@ -2,7 +2,7 @@
services:
redis:
image: ${CLIENT_LIBS_TEST_IMAGE:-redislabs/client-libs-test:8.2.1-pre}
image: ${CLIENT_LIBS_TEST_IMAGE:-redislabs/client-libs-test:8.4-RC1-pre}
platform: linux/amd64
container_name: redis-standalone
environment:
@@ -23,7 +23,7 @@ services:
- all
osscluster:
image: ${CLIENT_LIBS_TEST_IMAGE:-redislabs/client-libs-test:8.2.1-pre}
image: ${CLIENT_LIBS_TEST_IMAGE:-redislabs/client-libs-test:8.4-RC1-pre}
platform: linux/amd64
container_name: redis-osscluster
environment:
@@ -40,7 +40,7 @@ services:
- all
sentinel-cluster:
image: ${CLIENT_LIBS_TEST_IMAGE:-redislabs/client-libs-test:8.2.1-pre}
image: ${CLIENT_LIBS_TEST_IMAGE:-redislabs/client-libs-test:8.4-RC1-pre}
platform: linux/amd64
container_name: redis-sentinel-cluster
network_mode: "host"
@@ -60,7 +60,7 @@ services:
- all
sentinel:
image: ${CLIENT_LIBS_TEST_IMAGE:-redislabs/client-libs-test:8.2.1-pre}
image: ${CLIENT_LIBS_TEST_IMAGE:-redislabs/client-libs-test:8.4-RC1-pre}
platform: linux/amd64
container_name: redis-sentinel
depends_on:
@@ -84,7 +84,7 @@ services:
- all
ring-cluster:
image: ${CLIENT_LIBS_TEST_IMAGE:-redislabs/client-libs-test:8.2.1-pre}
image: ${CLIENT_LIBS_TEST_IMAGE:-redislabs/client-libs-test:8.4-RC1-pre}
platform: linux/amd64
container_name: redis-ring-cluster
environment:

158
example/basic/main.go Normal file
View File

@@ -0,0 +1,158 @@
package main
import (
"context"
"fmt"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/redis/go-redis/v9"
"github.com/redis/go-redis/v9/maintnotifications"
)
func main() {
ctx := context.Background()
rdb := redis.NewClient(&redis.Options{
Addr: ":6379",
Password: "asdf",
Username: "default",
MinIdleConns: 100,
MaintNotificationsConfig: &maintnotifications.Config{
Mode: maintnotifications.ModeDisabled,
},
})
commandRunner, stopCommandRunner := NewCommandRunner(rdb)
defer stopCommandRunner()
commandRunner.FireCommandsUntilStop(ctx)
}
type CommandRunnerStats struct {
Operations int64
Errors int64
TimeoutErrors int64
ErrorsList []error
}
// CommandRunner provides utilities for running commands during tests
type CommandRunner struct {
client redis.UniversalClient
stopCh chan struct{}
operationCount atomic.Int64
errorCount atomic.Int64
timeoutErrors atomic.Int64
errors []error
errorsMutex sync.Mutex
}
// NewCommandRunner creates a new command runner
func NewCommandRunner(client redis.UniversalClient) (*CommandRunner, func()) {
stopCh := make(chan struct{})
cr := &CommandRunner{
client: client,
stopCh: stopCh,
errors: make([]error, 0),
}
return cr, cr.Stop
}
func (cr *CommandRunner) Stop() {
select {
case cr.stopCh <- struct{}{}:
close(cr.stopCh)
return
case <-time.After(500 * time.Millisecond):
return
}
}
func (cr *CommandRunner) Close() {
close(cr.stopCh)
}
// FireCommandsUntilStop runs commands continuously until stop signal
func (cr *CommandRunner) FireCommandsUntilStop(ctx context.Context) {
fmt.Printf("[CR] Starting command runner...\n")
defer fmt.Printf("[CR] Command runner stopped\n")
// High frequency for timeout testing
ticker := time.NewTicker(100 * time.Millisecond)
defer ticker.Stop()
counter := 0
for {
select {
case <-cr.stopCh:
return
case <-ctx.Done():
return
case <-ticker.C:
poolSize := cr.client.PoolStats().IdleConns
if poolSize == 0 {
poolSize = 1
}
wg := sync.WaitGroup{}
for i := 0; i < int(poolSize); i++ {
wg.Add(1)
go func(i int) {
defer wg.Done()
key := fmt.Sprintf("timeout-test-key-%d-%d", counter, i)
value := fmt.Sprintf("timeout-test-value-%d-%d", counter, i)
// Use a short timeout context for individual operations
opCtx, cancel := context.WithTimeout(ctx, 2*time.Second)
err := cr.client.Set(opCtx, key, value, time.Minute).Err()
cancel()
cr.operationCount.Add(1)
if err != nil {
if err == redis.ErrClosed || strings.Contains(err.Error(), "client is closed") {
select {
case <-cr.stopCh:
return
default:
}
return
}
fmt.Printf("Error: %v\n", err)
cr.errorCount.Add(1)
// Check if it's a timeout error
if isTimeoutError(err) {
cr.timeoutErrors.Add(1)
}
cr.errorsMutex.Lock()
cr.errors = append(cr.errors, err)
cr.errorsMutex.Unlock()
}
}(i)
}
wg.Wait()
counter++
}
}
}
func isTimeoutError(err error) bool {
return strings.Contains(err.Error(), "timeout")
}
// GetStats returns operation statistics
func (cr *CommandRunner) GetStats() CommandRunnerStats {
cr.errorsMutex.Lock()
defer cr.errorsMutex.Unlock()
errorList := make([]error, len(cr.errors))
copy(errorList, cr.errors)
stats := CommandRunnerStats{
Operations: cr.operationCount.Load(),
Errors: cr.errorCount.Load(),
TimeoutErrors: cr.timeoutErrors.Load(),
ErrorsList: errorList,
}
return stats
}

View File

@@ -0,0 +1,247 @@
# Potential Concurrency Issues with State Machine
This document outlines potential concurrency issues that may occur when using the cluster client with the connection state machine under high load.
## Overview
The connection state machine manages connection lifecycle through atomic state transitions:
```
CREATED → INITIALIZING → IDLE ⇄ IN_USE
UNUSABLE (handoff/reauth)
IDLE/CLOSED
```
## Potential Issues
### 1. Race Conditions in State Transitions
**Scenario**: Multiple goroutines trying to acquire the same connection simultaneously.
**What happens**:
- Thread A: Reads connection state as IDLE
- Thread B: Reads connection state as IDLE (before A transitions it)
- Thread A: Attempts IDLE → IN_USE transition (succeeds via CAS)
- Thread B: Attempts IDLE → IN_USE transition (fails via CAS)
**Current mitigation**: The code uses Compare-And-Swap (CAS) operations in `TryAcquire()` to ensure only one goroutine can successfully transition the connection. The losing goroutine will get a different connection or create a new one.
**Test**: Run `go run *.go -mode=detect` and look for the "Race Condition Detection" test results.
### 2. Pool Exhaustion Under High Concurrency
**Scenario**: Many goroutines competing for a small pool of connections.
**What happens**:
- All connections are IN_USE
- New requests wait for a connection to become available
- If pool timeout is too short, requests fail with pool timeout errors
- If pool timeout is too long, requests queue up and latency increases
**Current mitigation**:
- Semaphore-based connection limiting with FIFO fairness
- Configurable pool timeout
- Pool size can be tuned per workload
**Test**: Run Example 2 or the "Extreme Contention" test to see this in action.
### 3. State Machine Deadlock (Theoretical)
**Scenario**: A connection gets stuck in an intermediate state.
**What could happen**:
- Connection transitions to UNUSABLE for handoff/reauth
- Background operation fails or hangs
- Connection never transitions back to IDLE
- Connection is stuck in pool but unusable
**Current mitigation**:
- Connections in UNUSABLE state are placed at the end of the idle queue
- Pool's `popIdle()` tries multiple connections (up to `popAttempts`)
- Health checks remove stale connections
- Timeouts on all operations
**Test**: The "Connection Churn" test exercises rapid state transitions.
### 4. Thundering Herd on Pool Initialization
**Scenario**: Many goroutines start simultaneously with an empty pool.
**What happens**:
- All goroutines call Get() at the same time
- Pool is empty, so all create new connections
- Potential to exceed pool size temporarily
- High initial latency spike
**Current mitigation**:
- Semaphore limits concurrent connection creation
- Pool size checks before creating connections
- MinIdleConns can pre-warm the pool
**Test**: Run the "Thundering Herd" test to see this behavior.
### 5. Connection Reuse Inefficiency
**Scenario**: Connections are not reused efficiently under bursty load.
**What happens**:
- Burst of requests creates many connections
- Burst ends, connections become idle
- Next burst might create new connections instead of reusing idle ones
- Pool size grows unnecessarily
**Current mitigation**:
- LIFO (default) or FIFO pool ordering
- MaxIdleConns limits idle connection count
- Idle connection health checks
**Test**: Run the "Bursty Traffic" test to observe this pattern.
## How to Identify Issues
### Symptoms of State Machine Issues
1. **High pool timeout rate**: More than 1-2% of operations timing out
2. **Increasing latency**: Average latency growing over time
3. **Error bursts**: Multiple errors occurring in quick succession
4. **Slow operations**: Operations taking >100ms consistently
### Using the Example App
```bash
# Run all tests
go run *.go -mode=all
# Focus on issue detection
go run *.go -mode=detect
# Advanced monitoring with latency distribution
go run *.go -mode=advanced
```
### What to Look For
**Good indicators**:
- Success rate >99%
- Average latency <10ms
- No pool timeouts (or very few)
- Latency distribution: most operations in 0-5ms range
**Warning signs**:
- Success rate <95%
- Average latency >50ms
- Pool timeouts >1% of operations
- Many operations in >50ms latency bucket
- Error bursts detected
## Recommendations
### For Production Use
1. **Size the pool appropriately**:
- Start with `PoolSize = 10 * number of cluster nodes`
- Monitor pool timeout rate
- Increase if seeing >1% pool timeouts
2. **Set reasonable timeouts**:
- `PoolTimeout`: 3-5 seconds (time to wait for a connection)
- `ReadTimeout`: 3 seconds (time to read response)
- `WriteTimeout`: 3 seconds (time to write command)
3. **Use MinIdleConns for steady load**:
- Set to 20-30% of PoolSize
- Pre-warms the pool
- Reduces initial latency spikes
4. **Monitor metrics**:
- Track pool timeout rate
- Monitor average latency
- Alert on error bursts
### Tuning for Different Workloads
**High throughput, low latency**:
```go
PoolSize: 20,
MinIdleConns: 5,
PoolTimeout: 2 * time.Second,
```
**Bursty traffic**:
```go
PoolSize: 30,
MinIdleConns: 10,
PoolTimeout: 5 * time.Second,
```
**Low traffic, resource constrained**:
```go
PoolSize: 5,
MinIdleConns: 0,
PoolTimeout: 3 * time.Second,
```
## Debugging Log Messages
### "Connection state changed by hook to IDLE/UNUSABLE, pooling as-is"
This message appears when the connection state is not IN_USE when `putConn()` tries to release it.
**What's happening**:
1. Connection is being returned to pool
2. Pool tries to transition IN_USE → IDLE
3. Transition fails because connection is already in a different state (IDLE or UNUSABLE)
4. Pool logs this message and pools the connection as-is
**Possible causes**:
1. **Hook changed state to UNUSABLE** (normal for handoff/reauth):
- Maintenance notifications hook marks connection for handoff
- Re-auth hook marks connection for re-authentication
- Connection is pooled in UNUSABLE state for background processing
2. **Connection already in IDLE state** (potential issue):
- Connection was released twice
- Connection was never properly acquired
- Race condition in connection lifecycle
**This is normal** when you see it occasionally (<1% of operations) with state=UNUSABLE.
**This indicates a problem** when:
- You see it on **every operation** or very frequently (>10%)
- The state is IDLE (not UNUSABLE)
- Pool timeout rate is high
- Operations are failing
**How to investigate**:
1. Check which state the connection is in (IDLE vs UNUSABLE)
2. If UNUSABLE: Check if handoff/reauth is completing
3. If IDLE: There may be a bug in connection lifecycle management
**How to reduce log verbosity**:
The example has maintenance notifications disabled but hooks may still be registered.
To completely silence these logs, you can set a custom logger that filters them out.
## Known Limitations
1. **No connection state visibility**: Can't easily inspect connection states from outside
2. **No per-node pool metrics**: Pool stats are aggregated across all nodes
3. **Limited backpressure**: No built-in circuit breaker or rate limiting
4. **Hook state transitions**: Hooks can change connection state during OnPut, which may cause confusion
## Testing Recommendations
1. **Load test before production**: Use this example app to test your specific workload
2. **Test failure scenarios**: Simulate node failures, network issues
3. **Monitor in staging**: Run with production-like load in staging first
4. **Gradual rollout**: Deploy to a subset of traffic first
## Further Reading
- `internal/pool/conn_state.go`: State machine implementation
- `internal/pool/pool.go`: Connection pool implementation
- `internal/pool/conn.go`: Connection with state machine
- `internal/semaphore.go`: Semaphore for connection limiting

View File

@@ -0,0 +1,268 @@
# Quick Start Guide
## Running Against Your Docker Cluster
### Step 0: Initialize the Cluster (First Time Only)
If this is the first time running the cluster, you need to initialize it:
```bash
cd example/cluster-state-machine
# Check cluster health
./check-cluster.sh
# If cluster is not initialized (state: fail), initialize it
./init-cluster.sh
# Verify cluster is ready
./check-cluster.sh
```
**Expected output:**
```
✓ Cluster state: OK
✓ Hash slots are assigned
```
### Step 1: Find Your Cluster Ports
```bash
# List running Redis containers
docker ps | grep redis
# Example output:
# CONTAINER ID IMAGE PORTS NAMES
# abc123def456 redis:latest 0.0.0.0:16600->6379/tcp redis-node-1
# def456ghi789 redis:latest 0.0.0.0:16601->6379/tcp redis-node-2
# ghi789jkl012 redis:latest 0.0.0.0:16602->6379/tcp redis-node-3
# jkl012mno345 redis:latest 0.0.0.0:16603->6379/tcp redis-node-4
# mno345pqr678 redis:latest 0.0.0.0:16604->6379/tcp redis-node-5
# pqr678stu901 redis:latest 0.0.0.0:16605->6379/tcp redis-node-6
```
### Step 2: Run the Example
```bash
cd example/cluster-state-machine
# Basic test (default) - using all 6 nodes
go run *.go -addrs="localhost:16600,localhost:16601,localhost:16602,localhost:16603,localhost:16604,localhost:16605"
# Or use just the master nodes (typically first 3)
go run *.go -addrs="localhost:16600,localhost:16601,localhost:16602"
# Advanced monitoring
go run *.go -addrs="localhost:16600,localhost:16601,localhost:16602" -mode=advanced
# Issue detection
go run *.go -addrs="localhost:16600,localhost:16601,localhost:16602" -mode=detect
# Run all tests
go run *.go -addrs="localhost:16600,localhost:16601,localhost:16602" -mode=all
```
### Step 3: Interpret Results
**Good Results:**
```
✓ Completed in 2.5s
Total operations: 5000
Successful: 5000 (100.00%)
Failed: 0 (0.00%)
Pool timeouts: 0 (0.00%)
Average latency: 2.3ms
Throughput: 2000 ops/sec
```
**Warning Signs:**
```
⚠️ Completed in 15.2s
Total operations: 5000
Successful: 4750 (95.00%)
Failed: 250 (5.00%)
Pool timeouts: 150 (3.00%)
Average latency: 45.7ms
Throughput: 328 ops/sec
```
## Common Issues
### Issue: "CLUSTERDOWN Hash slot not served"
**Problem:** Cluster is not initialized or in failed state
**Solution:**
```bash
cd example/cluster-state-machine
# Check cluster health
./check-cluster.sh
# If cluster state is "fail", initialize it
./init-cluster.sh
# Wait a few seconds and verify
sleep 3
./check-cluster.sh
```
### Issue: "connection refused"
**Problem:** Can't connect to Redis cluster
**Solution:**
```bash
# Check if cluster is running
docker ps | grep redis
# Check if ports are correct
docker port <container-name>
# Try connecting with redis-cli
redis-cli -c -p 16600 ping
# Or test each node
for port in 16600 16601 16602 16603 16604 16605; do
echo "Testing port $port..."
redis-cli -c -p $port ping
done
```
### Issue: "pool timeout" errors
**Problem:** Too many concurrent requests for pool size
**Solutions:**
1. Increase pool size in the example code
2. Reduce number of concurrent goroutines
3. Check if cluster is overloaded
### Issue: "Connection state changed by hook to UNUSABLE"
**Problem:** Maintenance notifications hook is marking connections for handoff
**This is normal** if:
- You see it occasionally (<1% of operations)
- Operations still succeed
- No performance degradation
**This is a problem** if:
- You see it very frequently (>10% of operations)
- Many operations are failing
- High latency
**Solution:**
- Maintenance notifications are disabled in the example by default
- If you're still seeing this, check if you have streaming auth enabled
- Increase pool size to handle UNUSABLE connections
## Understanding the Logs
### Normal Logs
```
redis: 2025/10/27 18:10:57 pool.go:691: Connection state changed by hook to IDLE, pooling as-is
```
This is informational - the hook changed the state before the pool could transition it.
### Error Logs
```
redis: 2025/10/27 18:10:58 pool.go:393: redis: connection pool: failed to dial after 5 attempts: dial tcp :7000: connect: connection refused
```
This means the cluster is not reachable. Check your Docker containers and ports.
```
redis: 2025/10/27 18:10:59 pool.go:621: redis: connection pool: failed to get a usable connection after 5 attempts
```
This means all connections in the pool are UNUSABLE. This could indicate:
- Handoff operations are stuck
- Re-auth operations are failing
- Connections are in bad state
## Debugging Tips
### Enable Verbose Logging
Set the log level to see more details:
```go
// In your test code
redis.SetLogger(redis.NewLogger(os.Stderr))
```
### Monitor Pool Stats
Add this to the example to see pool statistics:
```go
stats := client.PoolStats()
fmt.Printf("Pool Stats:\n")
fmt.Printf(" Hits: %d\n", stats.Hits)
fmt.Printf(" Misses: %d\n", stats.Misses)
fmt.Printf(" Timeouts: %d\n", stats.Timeouts)
fmt.Printf(" TotalConns: %d\n", stats.TotalConns)
fmt.Printf(" IdleConns: %d\n", stats.IdleConns)
fmt.Printf(" StaleConns: %d\n", stats.StaleConns)
```
### Check Cluster Health
```bash
# Connect to cluster
redis-cli -c -p 16600
# Check cluster info
CLUSTER INFO
# Check cluster nodes
CLUSTER NODES
# Check if all slots are covered
CLUSTER SLOTS
# Check cluster state
CLUSTER INFO | grep cluster_state
```
## Performance Tuning
### For High Throughput
```go
PoolSize: 20,
MinIdleConns: 5,
PoolTimeout: 2 * time.Second,
```
### For Bursty Traffic
```go
PoolSize: 30,
MinIdleConns: 10,
PoolTimeout: 5 * time.Second,
```
### For Low Latency
```go
PoolSize: 15,
MinIdleConns: 5,
PoolTimeout: 1 * time.Second,
ReadTimeout: 1 * time.Second,
WriteTimeout: 1 * time.Second,
```
## Next Steps
1. Run the basic test to establish a baseline
2. Run the advanced test to see latency distribution
3. Run the detect test to find potential issues
4. Adjust pool size and timeouts based on results
5. Test with your actual workload patterns
For more details, see:
- [README.md](README.md) - Full documentation
- [POTENTIAL_ISSUES.md](POTENTIAL_ISSUES.md) - Detailed issue analysis

View File

@@ -0,0 +1,237 @@
# Redis Cluster State Machine Example
This example demonstrates the connection state machine behavior in the Redis cluster client under high concurrency.
## What This Example Shows
1. **Basic Concurrent Operations**: Multiple goroutines performing SET operations concurrently
2. **High Concurrency Stress Test**: Limited pool size with many concurrent goroutines to stress the state machine
3. **Connection Pool Behavior**: Monitoring connection reuse and state transitions over time
4. **Mixed Read/Write Workload**: Realistic workload with both reads and writes
## Connection State Machine
The connection state machine manages connection lifecycle:
```
CREATED → INITIALIZING → IDLE ⇄ IN_USE
UNUSABLE (handoff/reauth)
IDLE/CLOSED
```
### States
- **CREATED**: Connection just created, not yet initialized
- **INITIALIZING**: Connection initialization in progress
- **IDLE**: Connection initialized and idle in pool, ready to be acquired
- **IN_USE**: Connection actively processing a command
- **UNUSABLE**: Connection temporarily unusable (handoff, reauth, etc.)
- **CLOSED**: Connection closed
## Running the Example
### Prerequisites
You need a Redis cluster running.
**Option 1: Use existing Docker cluster**
If you already have a Redis cluster running in Docker:
```bash
# Find your cluster ports
docker ps | grep redis
# Note the ports (e.g., 16600, 16601, 16602, etc.)
```
**Option 2: Start a new cluster**
```bash
# From the go-redis root directory
docker-compose up -d
# This will start a cluster on ports 16600-16605
```
### Run the Example
**Quick Start (using run.sh script):**
```bash
cd example/cluster-state-machine
# Run basic tests (default, uses ports 16600-16605)
./run.sh
# Run specific mode
./run.sh basic
./run.sh advanced
./run.sh detect
./run.sh all
```
**Manual Run:**
```bash
cd example/cluster-state-machine
# Run with default addresses (localhost:6379)
go run *.go
# Run with Docker cluster addresses (ports 16600-16605)
go run *.go -addrs="localhost:16600,localhost:16601,localhost:16602,localhost:16603,localhost:16604,localhost:16605"
# Or use a subset of nodes
go run *.go -addrs="localhost:16600,localhost:16601,localhost:16602"
# Specify test mode
go run *.go -addrs="localhost:16600,localhost:16601,localhost:16602" -mode=basic
go run *.go -addrs="localhost:16600,localhost:16601,localhost:16602" -mode=advanced
go run *.go -addrs="localhost:16600,localhost:16601,localhost:16602" -mode=detect
go run *.go -addrs="localhost:16600,localhost:16601,localhost:16602" -mode=all
```
**Available flags:**
- `-addrs`: Comma-separated Redis addresses (default: "localhost:6379")
- `-mode`: Test mode - basic, advanced, detect, or all (default: "basic")
## What to Look For
### Normal Behavior
- High success rate (>99%)
- Low latency (typically <10ms)
- Few or no pool timeouts
- Efficient connection reuse
### Potential Issues
If you see:
1. **High pool timeout rate**: Pool size may be too small for the workload
2. **High failure rate**: Could indicate connection state machine issues
3. **Increasing latency**: May indicate connection contention or state transition delays
4. **Many pool timeouts in Example 2**: This is expected due to intentionally small pool size
## Understanding the Metrics
- **Total Operations**: Total number of Redis operations attempted
- **Successful**: Operations that completed successfully
- **Failed**: Operations that failed (excluding timeouts)
- **Timeouts**: Operations that timed out
- **Pool Timeouts**: Number of times we couldn't acquire a connection from the pool
- **Avg Latency**: Average latency for successful operations
## Tuning Parameters
You can modify these parameters in the code to experiment:
- `PoolSize`: Number of connections per cluster node
- `PoolTimeout`: How long to wait for a connection from the pool
- `MinIdleConns`: Minimum number of idle connections to maintain
- `numGoroutines`: Number of concurrent goroutines
- `opsPerGoroutine`: Number of operations per goroutine
## Test Modes
### Basic Mode (default)
Runs 4 examples demonstrating normal usage patterns:
1. **Basic Concurrent Operations**: 50 goroutines, 100 ops each
2. **High Concurrency Stress**: 100 goroutines with small pool (5 connections)
3. **Connection Pool Behavior**: 20 workers running for 5 seconds
4. **Mixed Read/Write**: 30 goroutines with 70/30 read/write ratio
### Advanced Mode
Includes detailed latency distribution and state monitoring:
1. **Extreme Contention**: 200 goroutines with pool size of 2
2. **Rapid Cycles**: 50 goroutines doing rapid-fire operations
3. **Long-Running Operations**: Pipeline operations with delays
4. **Concurrent Pipelines**: Multiple pipelines executing simultaneously
### Detect Mode
Runs tests specifically designed to expose concurrency issues:
1. **Thundering Herd**: All goroutines start simultaneously
2. **Bursty Traffic**: Alternating high/low load patterns
3. **Connection Churn**: Rapidly creating and closing clients
4. **Race Condition Detection**: Mixed operations with high contention
## Common Scenarios
### Scenario 1: Pool Exhaustion
Example 2 intentionally creates pool exhaustion by using a small pool (5 connections) with many goroutines (100). This tests:
- Connection state machine under contention
- Pool timeout handling
- Connection reuse efficiency
### Scenario 2: Sustained Load
Example 3 runs workers continuously for 5 seconds, testing:
- Connection lifecycle management
- State transitions over time
- Connection health checks
### Scenario 3: Mixed Workload
Example 4 uses a realistic 70/30 read/write ratio, testing:
- State machine with different operation types
- Connection reuse patterns
- Concurrent read/write handling
## Debugging Tips
If you encounter issues:
1. **Enable debug logging**: Set `REDIS_DEBUG=1` environment variable
2. **Reduce concurrency**: Lower `numGoroutines` to isolate issues
3. **Increase pool size**: If seeing many pool timeouts
4. **Check cluster health**: Ensure all cluster nodes are responsive
5. **Monitor connection states**: Add logging to track state transitions
## Expected Output
```
=== Redis Cluster State Machine Example ===
Example 1: Basic Concurrent Operations
---------------------------------------
✓ Completed 5000 operations from 50 goroutines in 1.2s
Throughput: 4166 ops/sec
=== Metrics ===
Total Operations: 5000
Successful: 5000 (100.00%)
Failed: 0 (0.00%)
Timeouts: 0 (0.00%)
Pool Timeouts: 0
Avg Latency: 2.4ms
Example 2: High Concurrency Stress Test
----------------------------------------
✓ Completed stress test in 3.5s
Throughput: 1428 ops/sec
=== Metrics ===
Total Operations: 5000
Successful: 4950 (99.00%)
Failed: 0 (0.00%)
Timeouts: 50 (1.00%)
Pool Timeouts: 50
Avg Latency: 8.2ms
...
```
## Related Files
- `internal/pool/conn_state.go`: Connection state machine implementation
- `internal/pool/pool.go`: Connection pool implementation
- `internal/pool/conn.go`: Connection implementation with state machine
- `osscluster.go`: Cluster client implementation

View File

@@ -0,0 +1,530 @@
package main
import (
"context"
"fmt"
"math/rand"
"sync"
"sync/atomic"
"time"
"github.com/redis/go-redis/v9"
"github.com/redis/go-redis/v9/maintnotifications"
)
// AdvancedMetrics extends basic metrics with more detailed tracking
type AdvancedMetrics struct {
Metrics
// Latency buckets (in microseconds)
latency0_1ms atomic.Int64 // 0-1ms
latency1_5ms atomic.Int64 // 1-5ms
latency5_10ms atomic.Int64 // 5-10ms
latency10_50ms atomic.Int64 // 10-50ms
latency50ms atomic.Int64 // >50ms
}
func (am *AdvancedMetrics) recordSuccess(latency time.Duration) {
am.Metrics.recordSuccess(latency)
// Record latency bucket
micros := latency.Microseconds()
switch {
case micros < 1000:
am.latency0_1ms.Add(1)
case micros < 5000:
am.latency1_5ms.Add(1)
case micros < 10000:
am.latency5_10ms.Add(1)
case micros < 50000:
am.latency10_50ms.Add(1)
default:
am.latency50ms.Add(1)
}
}
func (am *AdvancedMetrics) printDetailed() {
am.Metrics.print()
total := am.successOps.Load()
if total > 0 {
fmt.Printf("\n=== Latency Distribution ===\n")
fmt.Printf("0-1ms: %d (%.2f%%)\n", am.latency0_1ms.Load(), float64(am.latency0_1ms.Load())/float64(total)*100)
fmt.Printf("1-5ms: %d (%.2f%%)\n", am.latency1_5ms.Load(), float64(am.latency1_5ms.Load())/float64(total)*100)
fmt.Printf("5-10ms: %d (%.2f%%)\n", am.latency5_10ms.Load(), float64(am.latency5_10ms.Load())/float64(total)*100)
fmt.Printf("10-50ms: %d (%.2f%%)\n", am.latency10_50ms.Load(), float64(am.latency10_50ms.Load())/float64(total)*100)
fmt.Printf(">50ms: %d (%.2f%%)\n", am.latency50ms.Load(), float64(am.latency50ms.Load())/float64(total)*100)
}
}
// runAdvancedExample demonstrates advanced monitoring and potential issues
func runAdvancedExample() {
ctx := context.Background()
fmt.Println("\n=== Advanced State Machine Monitoring ===\n")
fmt.Println("This example includes detailed state machine monitoring")
fmt.Println("to help identify potential concurrency issues.\n")
// Test 1: Extreme concurrency with tiny pool
testExtremeContention(ctx)
// Test 2: Rapid acquire/release cycles
testRapidCycles(ctx)
// Test 3: Long-running operations
testLongRunningOps(ctx)
// Test 4: Concurrent pipelines
testConcurrentPipelines(ctx)
// Test 5: PubSub + Get/Set pool exhaustion
testPubSubWithGetSet()
}
func testExtremeContention(ctx context.Context) {
fmt.Println("Test 1: Extreme Contention")
fmt.Println("---------------------------")
fmt.Println("Pool size: 2, Goroutines: 200")
fmt.Println("This tests the state machine under extreme contention.\n")
client := redis.NewClusterClient(&redis.ClusterOptions{
Addrs: getRedisAddrs(),
MaintNotificationsConfig: &maintnotifications.Config{
Mode: maintnotifications.ModeDisabled,
},
PoolSize: 2, // Extremely small
PoolTimeout: 1 * time.Second,
})
defer client.Close()
metrics := &AdvancedMetrics{}
const numGoroutines = 200
const opsPerGoroutine = 10
start := time.Now()
var wg sync.WaitGroup
for g := 0; g < numGoroutines; g++ {
wg.Add(1)
go func(goroutineID int) {
defer wg.Done()
for i := 0; i < opsPerGoroutine; i++ {
key := fmt.Sprintf("extreme:%d:%d", goroutineID, i)
value := fmt.Sprintf("v%d", i)
opStart := time.Now()
err := client.Set(ctx, key, value, 0).Err()
latency := time.Since(opStart)
if err != nil {
if isPoolTimeout(err) {
metrics.recordPoolTimeout()
}
metrics.recordFailure()
} else {
metrics.recordSuccess(latency)
}
}
}(g)
}
wg.Wait()
elapsed := time.Since(start)
fmt.Printf("✓ Completed in %v\n", elapsed)
fmt.Printf(" Throughput: %.0f ops/sec\n", float64(numGoroutines*opsPerGoroutine)/elapsed.Seconds())
metrics.printDetailed()
printPoolStats(client.PoolStats())
fmt.Println()
}
func printPoolStats(stats *redis.PoolStats) {
fmt.Println("===== Pool Stats: =====")
fmt.Printf(" Hits: %d\n", stats.Hits)
fmt.Printf(" Misses: %d\n", stats.Misses)
fmt.Printf(" Timeouts: %d\n", stats.Timeouts)
fmt.Printf(" TotalConns: %d\n", stats.TotalConns)
fmt.Printf(" IdleConns: %d\n", stats.IdleConns)
fmt.Printf(" StaleConns: %d\n", stats.StaleConns)
fmt.Printf(" WaitCount: %d\n", stats.WaitCount)
fmt.Printf(" WaitDurationNs: %d\n", stats.WaitDurationNs)
fmt.Printf(" Unusable: %d\n", stats.Unusable)
fmt.Printf(" PubSubStats: %+v\n", stats.PubSubStats)
fmt.Println("===== End Pool Stats: =====")
}
func testRapidCycles(ctx context.Context) {
fmt.Println("Test 2: Rapid Acquire/Release Cycles")
fmt.Println("-------------------------------------")
fmt.Println("Testing rapid connection state transitions.\n")
client := redis.NewClusterClient(&redis.ClusterOptions{
Addrs: getRedisAddrs(),
MaintNotificationsConfig: &maintnotifications.Config{
Mode: maintnotifications.ModeDisabled,
},
PoolSize: 5,
MaxIdleConns: 1,
PoolTimeout: 2 * time.Second,
})
defer client.Close()
metrics := &AdvancedMetrics{}
const numGoroutines = 50
const opsPerGoroutine = 100
start := time.Now()
var wg sync.WaitGroup
for g := 0; g < numGoroutines; g++ {
wg.Add(1)
go func(goroutineID int) {
defer wg.Done()
for i := 0; i < opsPerGoroutine; i++ {
key := fmt.Sprintf("rapid:%d:%d", goroutineID, i)
value := "x"
opStart := time.Now()
err := client.Set(ctx, key, value, 0).Err()
latency := time.Since(opStart)
if err != nil {
if isPoolTimeout(err) {
metrics.recordPoolTimeout()
}
metrics.recordFailure()
} else {
metrics.recordSuccess(latency)
}
// No delay - rapid fire
}
}(g)
}
wg.Wait()
elapsed := time.Since(start)
fmt.Printf("✓ Completed in %v\n", elapsed)
fmt.Printf(" Throughput: %.0f ops/sec\n", float64(numGoroutines*opsPerGoroutine)/elapsed.Seconds())
metrics.printDetailed()
printPoolStats(client.PoolStats())
fmt.Println()
}
func testLongRunningOps(ctx context.Context) {
fmt.Println("Test 3: Long-Running Operations")
fmt.Println("--------------------------------")
fmt.Println("Testing connection holding with slow operations.\n")
client := redis.NewClusterClient(&redis.ClusterOptions{
Addrs: getRedisAddrs(),
MaintNotificationsConfig: &maintnotifications.Config{
Mode: maintnotifications.ModeDisabled,
},
PoolSize: 2,
MaxIdleConns: 1,
MaxActiveConns: 5,
PoolTimeout: 3 * time.Second,
})
defer client.Close()
metrics := &AdvancedMetrics{}
const numGoroutines = 100
const opsPerGoroutine = 200
start := time.Now()
var wg sync.WaitGroup
for g := 0; g < numGoroutines; g++ {
wg.Add(1)
go func(goroutineID int) {
defer wg.Done()
for i := 0; i < opsPerGoroutine; i++ {
key := fmt.Sprintf("slow:%d:%d", goroutineID, i)
value := fmt.Sprintf("data-%d", i)
opStart := time.Now()
// Simulate slow operation by doing multiple commands
pipe := client.Pipeline()
pipe.Set(ctx, key, value, 0)
pipe.Get(ctx, key)
pipe.Incr(ctx, fmt.Sprintf("counter:%d", goroutineID))
_, err := pipe.Exec(ctx)
latency := time.Since(opStart)
if err != nil {
if isPoolTimeout(err) {
metrics.recordPoolTimeout()
}
metrics.recordFailure()
} else {
metrics.recordSuccess(latency)
}
// Simulate processing time
time.Sleep(time.Millisecond * time.Duration(rand.Intn(20)))
}
}(g)
}
wg.Wait()
elapsed := time.Since(start)
fmt.Printf("✓ Completed in %v\n", elapsed)
fmt.Printf(" Throughput: %.0f ops/sec\n", float64(numGoroutines*opsPerGoroutine)/elapsed.Seconds())
metrics.printDetailed()
printPoolStats(client.PoolStats())
fmt.Println()
}
// testConcurrentPipelines tests pipeline operations under concurrency
func testConcurrentPipelines(ctx context.Context) {
fmt.Println("Test 4: Concurrent Pipelines")
fmt.Println("-----------------------------")
fmt.Println("Testing pipeline operations with connection state machine.\n")
client := redis.NewClusterClient(&redis.ClusterOptions{
Addrs: getRedisAddrs(),
MaintNotificationsConfig: &maintnotifications.Config{
Mode: maintnotifications.ModeDisabled,
},
PoolSize: 10,
MaxIdleConns: 5,
MinIdleConns: 5,
PoolTimeout: 5 * time.Second,
})
defer client.Close()
metrics := &AdvancedMetrics{}
const numGoroutines = 64
const pipelinesPerGoroutine = 100
const commandsPerPipeline = 100
start := time.Now()
var wg sync.WaitGroup
for g := 0; g < numGoroutines; g++ {
wg.Add(1)
go func(goroutineID int) {
defer wg.Done()
for i := 0; i < pipelinesPerGoroutine; i++ {
opStart := time.Now()
pipe := client.Pipeline()
for j := 0; j < commandsPerPipeline; j++ {
key := fmt.Sprintf("pipe:%d:%d:%d", goroutineID, i, j)
pipe.Set(ctx, key, j, 0)
}
_, err := pipe.Exec(ctx)
latency := time.Since(opStart)
if err != nil {
if isPoolTimeout(err) {
metrics.recordPoolTimeout()
}
metrics.recordFailure()
} else {
metrics.recordSuccess(latency)
}
}
}(g)
}
wg.Wait()
elapsed := time.Since(start)
totalCommands := numGoroutines * pipelinesPerGoroutine * commandsPerPipeline
fmt.Printf("✓ Completed %d commands in %d pipelines in %v\n", totalCommands, numGoroutines*pipelinesPerGoroutine, elapsed)
fmt.Printf(" Throughput: %.0f ops/sec\n", float64(totalCommands)/elapsed.Seconds())
metrics.printDetailed()
printPoolStats(client.PoolStats())
fmt.Println()
}
// testPubSubWithGetSet tests pool exhaustion with concurrent pub/sub and get/set operations
func testPubSubWithGetSet() {
fmt.Println("=== Test 5: PubSub + Get/Set Pool Exhaustion ===")
fmt.Println("Testing pool with 100 publishers, 10 subscribers (10 channels), and 100 get/set goroutines")
fmt.Println("Pool size: 100 connections")
fmt.Println()
ctx := context.Background()
// Create client with pool size 100
client := redis.NewClusterClient(&redis.ClusterOptions{
Addrs: getRedisAddrs(),
MaintNotificationsConfig: &maintnotifications.Config{
Mode: maintnotifications.ModeDisabled,
},
PoolSize: 100,
PoolTimeout: 5 * time.Second,
})
defer client.Close()
metrics := &AdvancedMetrics{}
const testDuration = 10 * time.Second
const numChannels = 10
const numPublishers = 100
const numSubscribers = 10
const numGetSetWorkers = 100
// Channel names
channels := make([]string, numChannels)
for i := 0; i < numChannels; i++ {
channels[i] = fmt.Sprintf("test-channel-%d", i)
}
start := time.Now()
var wg sync.WaitGroup
stopSignal := make(chan struct{})
// Track pub/sub specific metrics
var publishCount atomic.Int64
var receiveCount atomic.Int64
var subscribeErrors atomic.Int64
// Start subscribers (10 goroutines, each subscribing to all 10 channels)
for s := 0; s < numSubscribers; s++ {
wg.Add(1)
go func(subscriberID int) {
defer wg.Done()
// Create a dedicated pubsub connection
pubsub := client.Subscribe(ctx, channels...)
defer pubsub.Close()
// Wait for subscription confirmation
_, err := pubsub.Receive(ctx)
if err != nil {
subscribeErrors.Add(1)
fmt.Printf("Subscriber %d: failed to subscribe: %v\n", subscriberID, err)
return
}
// Receive messages until stop signal
ch := pubsub.Channel()
for {
select {
case <-stopSignal:
return
case msg := <-ch:
if msg != nil {
receiveCount.Add(1)
}
case <-time.After(100 * time.Millisecond):
// Timeout to check stop signal periodically
}
}
}(s)
}
// Give subscribers time to connect
time.Sleep(500 * time.Millisecond)
// Start publishers (100 goroutines)
for p := 0; p < numPublishers; p++ {
wg.Add(1)
go func(publisherID int) {
defer wg.Done()
for {
select {
case <-stopSignal:
return
default:
opStart := time.Now()
// Publish to a random channel
channelIdx := rand.Intn(numChannels)
message := fmt.Sprintf("msg-%d-%d", publisherID, time.Now().UnixNano())
err := client.Publish(ctx, channels[channelIdx], message).Err()
latency := time.Since(opStart)
if err != nil {
if isPoolTimeout(err) {
metrics.recordPoolTimeout()
}
metrics.recordFailure()
} else {
metrics.recordSuccess(latency)
publishCount.Add(1)
}
// Small delay to avoid overwhelming the system
time.Sleep(10 * time.Millisecond)
}
}
}(p)
}
// Start get/set workers (100 goroutines)
for w := 0; w < numGetSetWorkers; w++ {
wg.Add(1)
go func(workerID int) {
defer wg.Done()
for {
select {
case <-stopSignal:
return
default:
opStart := time.Now()
// Alternate between SET and GET
key := fmt.Sprintf("worker:%d:key", workerID)
var err error
if rand.Intn(2) == 0 {
err = client.Set(ctx, key, workerID, 0).Err()
} else {
err = client.Get(ctx, key).Err()
// Ignore key not found errors
if err == redis.Nil {
err = nil
}
}
latency := time.Since(opStart)
if err != nil {
if isPoolTimeout(err) {
metrics.recordPoolTimeout()
}
metrics.recordFailure()
} else {
metrics.recordSuccess(latency)
}
// Small delay to avoid overwhelming the system
time.Sleep(5 * time.Millisecond)
}
}
}(w)
}
// Run for specified duration
time.Sleep(testDuration)
close(stopSignal)
wg.Wait()
elapsed := time.Since(start)
fmt.Printf("✓ Test completed in %v\n", elapsed)
fmt.Printf(" Published: %d messages\n", publishCount.Load())
fmt.Printf(" Received: %d messages\n", receiveCount.Load())
fmt.Printf(" Subscribe errors: %d\n", subscribeErrors.Load())
fmt.Printf(" Get/Set operations: %d\n", metrics.successOps.Load())
fmt.Printf(" Total throughput: %.0f ops/sec\n", float64(metrics.successOps.Load())/elapsed.Seconds())
metrics.printDetailed()
printPoolStats(client.PoolStats())
fmt.Println()
}

View File

@@ -0,0 +1,109 @@
#!/bin/bash
# Script to check Redis cluster health on ports 16600-16605
echo "=== Redis Cluster Health Check ==="
echo ""
# Check if redis-cli is available
if ! command -v redis-cli &> /dev/null; then
echo "❌ redis-cli not found. Please install redis-tools."
exit 1
fi
# Check each port
echo "Checking connectivity to cluster nodes..."
REACHABLE_PORTS=()
for port in 16600 16601 16602 16603 16604 16605; do
if redis-cli -p $port ping &> /dev/null; then
echo "✓ Port $port is reachable"
REACHABLE_PORTS+=($port)
else
echo "✗ Port $port is NOT reachable"
fi
done
echo ""
if [ ${#REACHABLE_PORTS[@]} -eq 0 ]; then
echo "❌ No cluster nodes are reachable!"
echo ""
echo "Solutions:"
echo "1. Check if Docker containers are running:"
echo " docker ps | grep redis"
echo ""
echo "2. Start the cluster:"
echo " docker-compose up -d"
exit 1
fi
# Check cluster state on first reachable port
PORT=${REACHABLE_PORTS[0]}
echo "Checking cluster state on port $PORT..."
echo ""
CLUSTER_STATE=$(redis-cli -p $PORT CLUSTER INFO 2>/dev/null | grep cluster_state | cut -d: -f2 | tr -d '\r')
if [ "$CLUSTER_STATE" = "ok" ]; then
echo "✓ Cluster state: OK"
else
echo "❌ Cluster state: $CLUSTER_STATE"
echo ""
echo "The cluster is not in OK state. This causes 'CLUSTERDOWN Hash slot not served' errors."
echo ""
echo "Cluster Info:"
redis-cli -p $PORT CLUSTER INFO
echo ""
echo "Cluster Nodes:"
redis-cli -p $PORT CLUSTER NODES
echo ""
echo "Solutions:"
echo ""
echo "1. Check if all hash slots are assigned:"
echo " redis-cli -p $PORT CLUSTER SLOTS"
echo ""
echo "2. If cluster was never initialized, create it:"
echo " redis-cli --cluster create \\"
echo " localhost:16600 localhost:16601 localhost:16602 \\"
echo " localhost:16603 localhost:16604 localhost:16605 \\"
echo " --cluster-replicas 1 --cluster-yes"
echo ""
echo "3. If cluster is in failed state, try fixing it:"
echo " redis-cli --cluster fix localhost:$PORT"
echo ""
echo "4. If nothing works, reset and recreate:"
echo " docker-compose down -v"
echo " docker-compose up -d"
echo " # Wait a few seconds, then create cluster"
exit 1
fi
# Check slot coverage
echo ""
echo "Checking hash slot coverage..."
SLOTS_OUTPUT=$(redis-cli -p $PORT CLUSTER SLOTS 2>/dev/null)
if [ -z "$SLOTS_OUTPUT" ]; then
echo "❌ No hash slots assigned!"
echo ""
echo "The cluster needs to be initialized. Run:"
echo " redis-cli --cluster create \\"
echo " localhost:16600 localhost:16601 localhost:16602 \\"
echo " localhost:16603 localhost:16604 localhost:16605 \\"
echo " --cluster-replicas 1 --cluster-yes"
exit 1
else
echo "✓ Hash slots are assigned"
fi
# Show cluster nodes
echo ""
echo "Cluster Nodes:"
redis-cli -p $PORT CLUSTER NODES
echo ""
echo "=== Cluster is healthy and ready! ==="
echo ""
echo "You can now run the example:"
echo " ./run.sh basic"

Binary file not shown.

View File

@@ -0,0 +1,12 @@
module cluster-state-machine
go 1.25.3
replace github.com/redis/go-redis/v9 => ../..
require github.com/redis/go-redis/v9 v9.0.0-00010101000000-000000000000
require (
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
)

View File

@@ -0,0 +1,8 @@
github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=
github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c=
github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=

View File

@@ -0,0 +1,75 @@
#!/bin/bash
# Script to initialize Redis cluster on ports 16600-16605
echo "=== Initializing Redis Cluster ==="
echo ""
# Check if redis-cli is available
if ! command -v redis-cli &> /dev/null; then
echo "❌ redis-cli not found. Please install redis-tools."
exit 1
fi
# Check connectivity
echo "Checking connectivity to all nodes..."
for port in 16600 16601 16602 16603 16604 16605; do
if redis-cli -p $port ping &> /dev/null; then
echo "✓ Port $port is reachable"
else
echo "❌ Port $port is NOT reachable"
echo ""
echo "Make sure all Redis nodes are running:"
echo " docker ps | grep redis"
exit 1
fi
done
echo ""
echo "Creating cluster with 3 masters and 3 replicas..."
echo ""
echo "This will configure:"
echo " - Masters: 16600, 16601, 16602"
echo " - Replicas: 16603, 16604, 16605"
echo ""
# Create the cluster
redis-cli --cluster create \
localhost:16600 localhost:16601 localhost:16602 \
localhost:16603 localhost:16604 localhost:16605 \
--cluster-replicas 1 \
--cluster-yes
if [ $? -eq 0 ]; then
echo ""
echo "✓ Cluster created successfully!"
echo ""
echo "Verifying cluster state..."
sleep 2
CLUSTER_STATE=$(redis-cli -p 16600 CLUSTER INFO | grep cluster_state | cut -d: -f2 | tr -d '\r')
if [ "$CLUSTER_STATE" = "ok" ]; then
echo "✓ Cluster state: OK"
echo ""
echo "Cluster is ready! You can now run the example:"
echo " ./run.sh basic"
else
echo "⚠ Cluster state: $CLUSTER_STATE"
echo "You may need to wait a few seconds for the cluster to stabilize."
fi
else
echo ""
echo "❌ Failed to create cluster"
echo ""
echo "Troubleshooting:"
echo "1. Make sure all nodes are empty (no data)"
echo "2. Try resetting the nodes:"
echo " for port in 16600 16601 16602 16603 16604 16605; do"
echo " redis-cli -p \$port FLUSHALL"
echo " redis-cli -p \$port CLUSTER RESET"
echo " done"
echo "3. Then run this script again"
exit 1
fi

View File

@@ -0,0 +1,352 @@
package main
import (
"context"
"fmt"
"sync"
"sync/atomic"
"time"
"github.com/redis/go-redis/v9"
"github.com/redis/go-redis/v9/maintnotifications"
)
// IssueDetector helps identify potential concurrency issues
type IssueDetector struct {
// Timing anomalies
slowOps atomic.Int64 // Operations taking >100ms
verySlowOps atomic.Int64 // Operations taking >1s
// Error patterns
consecutiveErrors atomic.Int64
errorBursts atomic.Int64 // Multiple errors in short time
// Pool issues
poolExhaustion atomic.Int64
longWaits atomic.Int64 // Waits >500ms for connection
// State machine issues
stateConflicts atomic.Int64 // Potential state transition conflicts
lastErrorTime atomic.Int64 // Unix nano
errorCount atomic.Int64
}
func (id *IssueDetector) recordOp(latency time.Duration, err error) {
if err != nil {
id.errorCount.Add(1)
now := time.Now().UnixNano()
lastErr := id.lastErrorTime.Swap(now)
// Check for error burst (multiple errors within 100ms)
if lastErr > 0 && (now-lastErr) < 100*1000*1000 {
id.errorBursts.Add(1)
}
if isPoolTimeout(err) {
id.poolExhaustion.Add(1)
}
return
}
// Reset error tracking on success
id.errorCount.Store(0)
// Track slow operations
if latency > 100*time.Millisecond {
id.slowOps.Add(1)
}
if latency > 1*time.Second {
id.verySlowOps.Add(1)
}
if latency > 500*time.Millisecond {
id.longWaits.Add(1)
}
}
func (id *IssueDetector) print() {
fmt.Printf("\n=== Issue Detector ===\n")
hasIssues := false
if id.verySlowOps.Load() > 0 {
fmt.Printf("⚠️ Very slow operations (>1s): %d\n", id.verySlowOps.Load())
hasIssues = true
}
if id.slowOps.Load() > 0 {
fmt.Printf("⚠️ Slow operations (>100ms): %d\n", id.slowOps.Load())
hasIssues = true
}
if id.errorBursts.Load() > 0 {
fmt.Printf("⚠️ Error bursts detected: %d\n", id.errorBursts.Load())
hasIssues = true
}
if id.poolExhaustion.Load() > 0 {
fmt.Printf("⚠️ Pool exhaustion events: %d\n", id.poolExhaustion.Load())
hasIssues = true
}
if id.longWaits.Load() > 0 {
fmt.Printf("⚠️ Long waits (>500ms): %d\n", id.longWaits.Load())
hasIssues = true
}
if id.stateConflicts.Load() > 0 {
fmt.Printf("⚠️ Potential state conflicts: %d\n", id.stateConflicts.Load())
hasIssues = true
}
if !hasIssues {
fmt.Printf("✓ No issues detected\n")
}
}
// runIssueDetection runs tests specifically designed to detect concurrency issues
func runIssueDetection() {
ctx := context.Background()
fmt.Println("\n=== Issue Detection Tests ===\n")
fmt.Println("Running tests designed to expose potential concurrency issues")
fmt.Println("in the connection state machine.\n")
// Test 1: Thundering herd
testThunderingHerd(ctx)
// Test 2: Bursty traffic
testBurstyTraffic(ctx)
// Test 3: Connection churn
testConnectionChurn(ctx)
}
func testThunderingHerd(ctx context.Context) {
fmt.Println("Test 1: Thundering Herd")
fmt.Println("-----------------------")
fmt.Println("All goroutines start simultaneously, competing for connections.\n")
client := redis.NewClusterClient(&redis.ClusterOptions{
Addrs: getRedisAddrs(),
MaintNotificationsConfig: &maintnotifications.Config{
Mode: maintnotifications.ModeDisabled,
},
PoolSize: 5,
PoolTimeout: 2 * time.Second,
})
defer client.Close()
detector := &IssueDetector{}
const numGoroutines = 100
var wg sync.WaitGroup
startGate := make(chan struct{})
// Prepare all goroutines
for g := 0; g < numGoroutines; g++ {
wg.Add(1)
go func(goroutineID int) {
defer wg.Done()
// Wait for start signal
<-startGate
key := fmt.Sprintf("herd:%d", goroutineID)
value := "data"
opStart := time.Now()
err := client.Set(ctx, key, value, 0).Err()
latency := time.Since(opStart)
detector.recordOp(latency, err)
}(g)
}
// Release the herd!
start := time.Now()
close(startGate)
wg.Wait()
elapsed := time.Since(start)
fmt.Printf("✓ Completed in %v\n", elapsed)
detector.print()
fmt.Println()
}
func testBurstyTraffic(ctx context.Context) {
fmt.Println("Test 2: Bursty Traffic")
fmt.Println("----------------------")
fmt.Println("Alternating between high and low load.\n")
client := redis.NewClusterClient(&redis.ClusterOptions{
Addrs: getRedisAddrs(),
MaintNotificationsConfig: &maintnotifications.Config{
Mode: maintnotifications.ModeDisabled,
},
PoolSize: 8,
PoolTimeout: 3 * time.Second,
})
defer client.Close()
detector := &IssueDetector{}
const numBursts = 5
const goroutinesPerBurst = 50
start := time.Now()
for burst := 0; burst < numBursts; burst++ {
var wg sync.WaitGroup
// High load burst
for g := 0; g < goroutinesPerBurst; g++ {
wg.Add(1)
go func(burstID, goroutineID int) {
defer wg.Done()
key := fmt.Sprintf("burst:%d:%d", burstID, goroutineID)
value := "data"
opStart := time.Now()
err := client.Set(ctx, key, value, 0).Err()
latency := time.Since(opStart)
detector.recordOp(latency, err)
}(burst, g)
}
wg.Wait()
// Quiet period
time.Sleep(100 * time.Millisecond)
}
elapsed := time.Since(start)
fmt.Printf("✓ Completed %d bursts in %v\n", numBursts, elapsed)
detector.print()
fmt.Println()
}
func testConnectionChurn(ctx context.Context) {
fmt.Println("Test 3: Connection Churn")
fmt.Println("------------------------")
fmt.Println("Rapidly creating and closing connections.\n")
detector := &IssueDetector{}
const numIterations = 10
const goroutinesPerIteration = 20
start := time.Now()
for iter := 0; iter < numIterations; iter++ {
// Create new client
client := redis.NewClusterClient(&redis.ClusterOptions{
Addrs: getRedisAddrs(),
MaintNotificationsConfig: &maintnotifications.Config{
Mode: maintnotifications.ModeDisabled,
},
PoolSize: 5,
PoolTimeout: 2 * time.Second,
})
var wg sync.WaitGroup
for g := 0; g < goroutinesPerIteration; g++ {
wg.Add(1)
go func(iterID, goroutineID int) {
defer wg.Done()
key := fmt.Sprintf("churn:%d:%d", iterID, goroutineID)
value := "data"
opStart := time.Now()
err := client.Set(ctx, key, value, 0).Err()
latency := time.Since(opStart)
detector.recordOp(latency, err)
}(iter, g)
}
wg.Wait()
// Close client
client.Close()
// Small delay before next iteration
time.Sleep(50 * time.Millisecond)
}
elapsed := time.Since(start)
fmt.Printf("✓ Completed %d iterations in %v\n", numIterations, elapsed)
detector.print()
fmt.Println()
}
// testRaceConditions attempts to expose race conditions in state transitions
func testRaceConditions(ctx context.Context) {
fmt.Println("Test 4: Race Condition Detection")
fmt.Println("---------------------------------")
fmt.Println("Attempting to trigger race conditions in state machine.\n")
client := redis.NewClusterClient(&redis.ClusterOptions{
Addrs: getRedisAddrs(),
MaintNotificationsConfig: &maintnotifications.Config{
Mode: maintnotifications.ModeDisabled,
},
PoolSize: 3, // Very small to increase contention
PoolTimeout: 1 * time.Second,
})
defer client.Close()
detector := &IssueDetector{}
const numGoroutines = 100
const opsPerGoroutine = 20
start := time.Now()
var wg sync.WaitGroup
for g := 0; g < numGoroutines; g++ {
wg.Add(1)
go func(goroutineID int) {
defer wg.Done()
for i := 0; i < opsPerGoroutine; i++ {
key := fmt.Sprintf("race:%d:%d", goroutineID, i)
value := "x"
opStart := time.Now()
// Mix of operations to stress state machine
var err error
switch i % 3 {
case 0:
err = client.Set(ctx, key, value, 0).Err()
case 1:
_, err = client.Get(ctx, key).Result()
if err == redis.Nil {
err = nil
}
case 2:
pipe := client.Pipeline()
pipe.Set(ctx, key, value, 0)
pipe.Get(ctx, key)
_, err = pipe.Exec(ctx)
}
latency := time.Since(opStart)
detector.recordOp(latency, err)
}
}(g)
}
wg.Wait()
elapsed := time.Since(start)
fmt.Printf("✓ Completed in %v\n", elapsed)
fmt.Printf(" Total operations: %d\n", numGoroutines*opsPerGoroutine)
detector.print()
fmt.Println()
}

View File

@@ -0,0 +1,392 @@
package main
import (
"context"
"flag"
"fmt"
"math/rand"
"os"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/redis/go-redis/v9"
"github.com/redis/go-redis/v9/maintnotifications"
)
// getRedisAddrs parses the comma-separated addresses
func getRedisAddrs() []string {
addrs := strings.Split(*redisAddrs, ",")
for i := range addrs {
addrs[i] = strings.TrimSpace(addrs[i])
}
return addrs
}
// isPoolTimeout checks if an error is a pool timeout error
// Note: This is defined in multiple files to avoid import cycles
func isPoolTimeout(err error) bool {
if err == nil {
return false
}
return strings.Contains(err.Error(), "pool timeout")
}
// Metrics tracks operation statistics
type Metrics struct {
totalOps atomic.Int64
successOps atomic.Int64
failedOps atomic.Int64
timeoutOps atomic.Int64
poolTimeouts atomic.Int64
totalLatencyNs atomic.Int64
}
func (m *Metrics) recordSuccess(latency time.Duration) {
m.totalOps.Add(1)
m.successOps.Add(1)
m.totalLatencyNs.Add(latency.Nanoseconds())
}
func (m *Metrics) recordFailure() {
m.totalOps.Add(1)
m.failedOps.Add(1)
}
func (m *Metrics) recordTimeout() {
m.totalOps.Add(1)
m.timeoutOps.Add(1)
}
func (m *Metrics) recordPoolTimeout() {
m.poolTimeouts.Add(1)
}
func (m *Metrics) print() {
total := m.totalOps.Load()
success := m.successOps.Load()
failed := m.failedOps.Load()
timeouts := m.timeoutOps.Load()
poolTimeouts := m.poolTimeouts.Load()
avgLatency := time.Duration(0)
if success > 0 {
avgLatency = time.Duration(m.totalLatencyNs.Load() / success)
}
fmt.Printf("\n=== Metrics ===\n")
fmt.Printf("Total Operations: %d\n", total)
fmt.Printf("Successful: %d (%.2f%%)\n", success, float64(success)/float64(total)*100)
fmt.Printf("Failed: %d (%.2f%%)\n", failed, float64(failed)/float64(total)*100)
fmt.Printf("Timeouts: %d (%.2f%%)\n", timeouts, float64(timeouts)/float64(total)*100)
fmt.Printf("Pool Timeouts: %d\n", poolTimeouts)
fmt.Printf("Avg Latency: %v\n", avgLatency)
}
var (
redisAddrs = flag.String("addrs", "localhost:6379", "Comma-separated Redis addresses (e.g., localhost:7000,localhost:7001,localhost:7002)")
mode = flag.String("mode", "basic", "Test mode: basic, advanced, detect, all")
)
func main() {
// Parse command line flags
flag.Parse()
ctx := context.Background()
fmt.Println("=== Redis Cluster State Machine Example ===\n")
fmt.Println("This example demonstrates the connection state machine")
fmt.Println("under high concurrency with the cluster client.\n")
fmt.Printf("Redis addresses: %s\n\n", *redisAddrs)
switch *mode {
case "basic":
runBasicExamples(ctx)
case "advanced":
runAdvancedExample()
case "detect":
runIssueDetection()
case "all":
runBasicExamples(ctx)
runAdvancedExample()
runIssueDetection()
default:
fmt.Printf("Unknown mode: %s\n", *mode)
fmt.Println("Available modes: basic, advanced, detect, all")
os.Exit(1)
}
fmt.Println("\n=== All tests completed ===")
}
func runBasicExamples(ctx context.Context) {
fmt.Println("=== Basic Examples ===\n")
// Example 1: Basic concurrent operations
example1(ctx)
// Example 2: High concurrency stress test
example2(ctx)
// Example 3: Connection pool behavior under load
example3(ctx)
// Example 4: Mixed read/write workload
example4(ctx)
}
func example1(ctx context.Context) {
fmt.Println("Example 1: Basic Concurrent Operations")
fmt.Println("---------------------------------------")
client := redis.NewClusterClient(&redis.ClusterOptions{
Addrs: getRedisAddrs(),
MaintNotificationsConfig: &maintnotifications.Config{
Mode: maintnotifications.ModeDisabled,
},
})
defer client.Close()
metrics := &Metrics{}
const numGoroutines = 100
const opsPerGoroutine = 5000
start := time.Now()
var wg sync.WaitGroup
for g := 0; g < numGoroutines; g++ {
wg.Add(1)
go func(goroutineID int) {
defer wg.Done()
for i := 0; i < opsPerGoroutine; i++ {
key := fmt.Sprintf("user:%d:%d", goroutineID, i)
value := fmt.Sprintf("data-%d-%d", goroutineID, i)
opStart := time.Now()
err := client.Set(ctx, key, value, 0).Err()
latency := time.Since(opStart)
if err != nil {
if isPoolTimeout(err) {
metrics.recordPoolTimeout()
}
metrics.recordFailure()
fmt.Printf("Error in goroutine %d: %v\n", goroutineID, err)
} else {
metrics.recordSuccess(latency)
}
}
}(g)
}
wg.Wait()
elapsed := time.Since(start)
fmt.Printf("✓ Completed %d operations from %d goroutines in %v\n",
numGoroutines*opsPerGoroutine, numGoroutines, elapsed)
fmt.Printf(" Throughput: %.0f ops/sec\n", float64(numGoroutines*opsPerGoroutine)/elapsed.Seconds())
metrics.print()
fmt.Println()
}
func example2(ctx context.Context) {
fmt.Println("Example 2: High Concurrency Stress Test")
fmt.Println("----------------------------------------")
fmt.Println("Testing with limited pool size and many concurrent goroutines")
fmt.Println("to stress the connection state machine and pool management.\n")
client := redis.NewClusterClient(&redis.ClusterOptions{
Addrs: getRedisAddrs(),
MaintNotificationsConfig: &maintnotifications.Config{
Mode: maintnotifications.ModeDisabled,
},
PoolSize: 5, // Intentionally small to create contention
PoolTimeout: 2 * time.Second,
})
defer client.Close()
metrics := &Metrics{}
const numGoroutines = 250
const opsPerGoroutine = 250
start := time.Now()
var wg sync.WaitGroup
for g := 0; g < numGoroutines; g++ {
wg.Add(1)
go func(goroutineID int) {
defer wg.Done()
for i := 0; i < opsPerGoroutine; i++ {
key := fmt.Sprintf("stress:%d:%d", goroutineID, i)
value := fmt.Sprintf("value-%d", i)
opStart := time.Now()
err := client.Set(ctx, key, value, 0).Err()
latency := time.Since(opStart)
if err != nil {
if isPoolTimeout(err) {
metrics.recordPoolTimeout()
}
metrics.recordFailure()
} else {
metrics.recordSuccess(latency)
}
// Small random delay to simulate real workload
time.Sleep(time.Microsecond * time.Duration(rand.Intn(100)))
}
}(g)
}
wg.Wait()
elapsed := time.Since(start)
fmt.Printf("✓ Completed stress test in %v\n", elapsed)
fmt.Printf(" Throughput: %.0f ops/sec\n", float64(numGoroutines*opsPerGoroutine)/elapsed.Seconds())
metrics.print()
fmt.Println()
}
func example3(ctx context.Context) {
fmt.Println("Example 3: Connection Pool Behavior Under Load")
fmt.Println("-----------------------------------------------")
fmt.Println("Monitoring connection reuse and state transitions.\n")
client := redis.NewClusterClient(&redis.ClusterOptions{
Addrs: getRedisAddrs(),
MaintNotificationsConfig: &maintnotifications.Config{
Mode: maintnotifications.ModeDisabled,
},
PoolSize: 8,
MinIdleConns: 2,
PoolTimeout: 3 * time.Second,
})
defer client.Close()
metrics := &Metrics{}
const duration = 5 * time.Second
const numWorkers = 100
start := time.Now()
stopChan := make(chan struct{})
var wg sync.WaitGroup
// Start workers
for w := 0; w < numWorkers; w++ {
wg.Add(1)
go func(workerID int) {
defer wg.Done()
counter := 0
for {
select {
case <-stopChan:
return
default:
key := fmt.Sprintf("worker:%d:counter", workerID)
counter++
opStart := time.Now()
err := client.Set(ctx, key, counter, 0).Err()
latency := time.Since(opStart)
if err != nil {
if isPoolTimeout(err) {
metrics.recordPoolTimeout()
}
metrics.recordFailure()
} else {
metrics.recordSuccess(latency)
}
// Variable workload
time.Sleep(time.Millisecond * time.Duration(rand.Intn(50)))
}
}
}(w)
}
// Let it run for the specified duration
time.Sleep(duration)
close(stopChan)
wg.Wait()
elapsed := time.Since(start)
fmt.Printf("✓ Ran %d workers for %v\n", numWorkers, duration)
fmt.Printf(" Throughput: %.0f ops/sec\n", float64(metrics.totalOps.Load())/elapsed.Seconds())
metrics.print()
fmt.Println()
}
func example4(ctx context.Context) {
fmt.Println("Example 4: Mixed Read/Write Workload")
fmt.Println("-------------------------------------")
fmt.Println("Testing connection state machine with mixed operations.\n")
client := redis.NewClusterClient(&redis.ClusterOptions{
Addrs: getRedisAddrs(),
MaintNotificationsConfig: &maintnotifications.Config{
Mode: maintnotifications.ModeDisabled,
},
PoolSize: 10,
PoolTimeout: 5 * time.Second,
})
defer client.Close()
metrics := &Metrics{}
const numGoroutines = 300
const opsPerGoroutine = 1000
// Pre-populate some data
for i := 0; i < 1000; i++ {
key := fmt.Sprintf("data:%d", i)
client.Set(ctx, key, fmt.Sprintf("value-%d", i), 0)
}
start := time.Now()
var wg sync.WaitGroup
for g := 0; g < numGoroutines; g++ {
wg.Add(1)
go func(goroutineID int) {
defer wg.Done()
for i := 0; i < opsPerGoroutine; i++ {
key := fmt.Sprintf("data:%d", rand.Intn(100))
opStart := time.Now()
var err error
// 60% reads, 40% writes
if rand.Float32() < 0.6 {
_, err = client.Get(ctx, key).Result()
if err == redis.Nil {
err = nil // Key not found is not an error
}
} else {
value := fmt.Sprintf("updated-%d-%d", goroutineID, i)
err = client.Set(ctx, key, value, 0).Err()
}
latency := time.Since(opStart)
if err != nil {
if isPoolTimeout(err) {
metrics.recordPoolTimeout()
}
metrics.recordFailure()
} else {
metrics.recordSuccess(latency)
}
}
}(g)
}
wg.Wait()
elapsed := time.Since(start)
fmt.Printf("✓ Completed mixed workload in %v\n", elapsed)
fmt.Printf(" Throughput: %.0f ops/sec\n", float64(numGoroutines*opsPerGoroutine)/elapsed.Seconds())
metrics.print()
fmt.Println()
}

View File

@@ -0,0 +1,45 @@
#!/bin/bash
# Quick run script for cluster state machine example
# Usage: ./run.sh [mode]
# Modes: basic, advanced, detect, all
# Default cluster addresses (ports 16600-16605)
ADDRS="localhost:16600,localhost:16601,localhost:16602,localhost:16603,localhost:16604,localhost:16605"
# Get mode from argument or use default
MODE="${1:-basic}"
echo "=== Running Cluster State Machine Example ==="
echo "Cluster addresses: $ADDRS"
echo "Mode: $MODE"
echo ""
# Check if cluster is reachable
echo "Checking cluster connectivity..."
if command -v redis-cli &> /dev/null; then
for port in 16600 16601 16602; do
if redis-cli -p $port ping &> /dev/null; then
echo "✓ Port $port is reachable"
else
echo "✗ Port $port is NOT reachable"
echo ""
echo "Make sure your Redis cluster is running on ports 16600-16605"
echo "Check with: docker ps | grep redis"
exit 1
fi
done
echo ""
else
echo "⚠ redis-cli not found, skipping connectivity check"
echo ""
fi
# Run the example
echo "Running tests..."
echo ""
go run *.go -addrs="$ADDRS" -mode="$MODE"
echo ""
echo "=== Done ==="

View File

@@ -0,0 +1,133 @@
# Disable Maintenance Notifications Example
This example demonstrates how to use the go-redis client with maintenance notifications **disabled**.
## What are Maintenance Notifications?
Maintenance notifications are a Redis Cloud feature that allows the server to notify clients about:
- Planned maintenance events
- Failover operations
- Node migrations
- Cluster topology changes
The go-redis client supports three modes:
- **`ModeDisabled`**: Client doesn't send `CLIENT MAINT_NOTIFICATIONS ON` command
- **`ModeEnabled`**: Client forcefully sends the command, interrupts connection on error
- **`ModeAuto`** (default): Client tries to send the command, disables feature on error
## When to Disable Maintenance Notifications
You should disable maintenance notifications when:
1. **Connecting to non-Redis Cloud / Redis Enterprise instances** - Standard Redis servers don't support this feature
2. **You want to handle failovers manually** - Your application has custom failover logic
3. **Minimizing client-side overhead** - You want the simplest possible client behavior
4. **The Redis server doesn't support the feature** - Older Redis versions or forks
## Usage
### Basic Example
```go
import (
"github.com/redis/go-redis/v9"
"github.com/redis/go-redis/v9/maintnotifications"
)
rdb := redis.NewClient(&redis.Options{
Addr: "localhost:6379",
// Explicitly disable maintenance notifications
MaintNotificationsConfig: &maintnotifications.Config{
Mode: maintnotifications.ModeDisabled,
},
})
defer rdb.Close()
```
### Cluster Client Example
```go
rdbCluster := redis.NewClusterClient(&redis.ClusterOptions{
Addrs: []string{"localhost:7000", "localhost:7001", "localhost:7002"},
// Disable maintenance notifications for cluster
MaintNotificationsConfig: &maintnotifications.Config{
Mode: maintnotifications.ModeDisabled,
},
})
defer rdbCluster.Close()
```
### Default Behavior (ModeAuto)
If you don't specify `MaintNotifications`, the client defaults to `ModeAuto`:
```go
// This uses ModeAuto by default
rdb := redis.NewClient(&redis.Options{
Addr: "localhost:6379",
// MaintNotificationsConfig: nil means ModeAuto
})
```
With `ModeAuto`, the client will:
1. Try to enable maintenance notifications
2. If the server doesn't support it, silently disable the feature
3. Continue normal operation
## Running the Example
1. Start a Redis server:
```bash
redis-server --port 6379
```
2. Run the example:
```bash
go run main.go
```
## Expected Output
```
=== Example 1: Explicitly Disabled ===
✓ Connected successfully (maintenance notifications disabled)
✓ SET operation successful
✓ GET operation successful: value1
=== Example 2: Default Behavior (ModeAuto) ===
✓ Connected successfully (maintenance notifications auto-enabled)
=== Example 3: Cluster Client with Disabled Notifications ===
Cluster not available (expected): ...
=== Example 4: Performance Comparison ===
✓ 1000 SET operations (disabled): 45ms
✓ 1000 SET operations (auto): 46ms
=== Cleanup ===
✓ Database flushed
=== Summary ===
Maintenance notifications can be disabled by setting:
MaintNotificationsConfig: &maintnotifications.Config{
Mode: maintnotifications.ModeDisabled,
}
This is useful when:
- Connecting to non-Redis Cloud instances
- You want to handle failovers manually
- You want to minimize client-side overhead
- The Redis server doesn't support CLIENT MAINT_NOTIFICATIONS
```
## Performance Impact
Disabling maintenance notifications has minimal performance impact. The main differences are:
1. **Connection Setup**: One less command (`CLIENT MAINT_NOTIFICATIONS ON`) during connection initialization
2. **Runtime Overhead**: No background processing of maintenance notifications
3. **Memory Usage**: Slightly lower memory footprint (no notification handlers)
In most cases, the performance difference is negligible (< 1%).

View File

@@ -0,0 +1,12 @@
module github.com/redis/go-redis/example/disable-maintnotifications
go 1.23
replace github.com/redis/go-redis/v9 => ../..
require github.com/redis/go-redis/v9 v9.7.0
require (
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
)

View File

@@ -0,0 +1,8 @@
github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=
github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c=
github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=

View File

@@ -0,0 +1,144 @@
package main
import (
"context"
"fmt"
"time"
"github.com/redis/go-redis/v9"
"github.com/redis/go-redis/v9/maintnotifications"
)
func main() {
ctx := context.Background()
// Example 0: Explicitly disable maintenance notifications
fmt.Println("=== Example 0: Explicitly Enabled ===")
rdb0 := redis.NewClient(&redis.Options{
Addr: "localhost:6379",
// Explicitly disable maintenance notifications
// This prevents the client from sending CLIENT MAINT_NOTIFICATIONS ON
MaintNotificationsConfig: &maintnotifications.Config{
Mode: maintnotifications.ModeEnabled,
},
})
defer rdb0.Close()
// Test the connection
if err := rdb0.Ping(ctx).Err(); err != nil {
fmt.Printf("Failed to connect: %v\n\n", err)
}
fmt.Println("When ModeEnabled, the client will return an error if the server doesn't support maintenance notifications.")
fmt.Printf("ModeAuto will silently disable the feature.\n\n")
// Example 1: Explicitly disable maintenance notifications
fmt.Println("=== Example 1: Explicitly Disabled ===")
rdb1 := redis.NewClient(&redis.Options{
Addr: "localhost:6379",
// Explicitly disable maintenance notifications
// This prevents the client from sending CLIENT MAINT_NOTIFICATIONS ON
MaintNotificationsConfig: &maintnotifications.Config{
Mode: maintnotifications.ModeDisabled,
},
})
defer rdb1.Close()
// Test the connection
if err := rdb1.Ping(ctx).Err(); err != nil {
fmt.Printf("Failed to connect: %v\n\n", err)
return
}
fmt.Println("✓ Connected successfully (maintenance notifications disabled)")
// Perform some operations
if err := rdb1.Set(ctx, "example:key1", "value1", 0).Err(); err != nil {
fmt.Printf("Failed to set key: %v\n\n", err)
return
}
fmt.Println("✓ SET operation successful")
val, err := rdb1.Get(ctx, "example:key1").Result()
if err != nil {
fmt.Printf("Failed to get key: %v\n\n", err)
return
}
fmt.Printf("✓ GET operation successful: %s\n\n", val)
// Example 2: Using nil config (defaults to ModeAuto)
fmt.Printf("\n=== Example 2: Default Behavior (ModeAuto) ===\n")
rdb2 := redis.NewClient(&redis.Options{
Addr: "localhost:6379",
// MaintNotifications: nil means ModeAuto (enabled for Redis Cloud)
})
defer rdb2.Close()
if err := rdb2.Ping(ctx).Err(); err != nil {
fmt.Printf("Failed to connect: %v\n\n", err)
return
}
fmt.Println("✓ Connected successfully (maintenance notifications auto-enabled)")
// Example 4: Comparing behavior with and without maintenance notifications
fmt.Printf("\n=== Example 4: Performance Comparison ===\n")
// Client with auto-enabled notifications
startauto := time.Now()
for i := 0; i < 1000; i++ {
key := fmt.Sprintf("test:auto:%d", i)
if err := rdb2.Set(ctx, key, i, time.Minute).Err(); err != nil {
fmt.Printf("Failed to set key: %v\n", err)
return
}
}
autoDuration := time.Since(startauto)
fmt.Printf("✓ 1000 SET operations (auto): %v\n", autoDuration)
// print pool stats
fmt.Printf("Pool stats (auto): %+v\n", rdb2.PoolStats())
// give the server a moment to take chill
fmt.Println("---")
time.Sleep(time.Second)
// Client with disabled notifications
start := time.Now()
for i := 0; i < 1000; i++ {
key := fmt.Sprintf("test:disabled:%d", i)
if err := rdb1.Set(ctx, key, i, time.Minute).Err(); err != nil {
fmt.Printf("Failed to set key: %v\n", err)
return
}
}
disabledDuration := time.Since(start)
fmt.Printf("✓ 1000 SET operations (disabled): %v\n", disabledDuration)
fmt.Printf("Pool stats (disabled): %+v\n", rdb1.PoolStats())
// performance comparison note
fmt.Printf("\nNote: The pool stats and performance are identical because there is no background processing overhead.\n")
fmt.Println("Since the server doesn't support maintenance notifications, there is no difference in behavior.")
fmt.Printf("The only difference is that the \"ModeDisabled\" client doesn't send the CLIENT MAINT_NOTIFICATIONS ON command.\n\n")
fmt.Println("p.s. reordering the execution here makes it look like there is a small performance difference, but it's just noise.")
// Cleanup
fmt.Printf("\n=== Cleanup ===\n")
if err := rdb1.FlushDB(ctx).Err(); err != nil {
fmt.Printf("Failed to flush DB: %v\n", err)
return
}
fmt.Println("✓ Database flushed")
fmt.Printf("\n=== Summary ===\n")
fmt.Println("Maintenance notifications can be disabled by setting:")
fmt.Println(" MaintNotifications: &maintnotifications.Config{")
fmt.Println(" Mode: maintnotifications.ModeDisabled,")
fmt.Println(" }")
fmt.Printf("\nThis is useful when:\n")
fmt.Println(" - Connecting to non-Redis Cloud instances")
fmt.Println(" - You want to handle failovers manually")
fmt.Println(" - You want to minimize client-side overhead")
fmt.Println(" - The Redis server doesn't support CLIENT MAINT_NOTIFICATIONS")
fmt.Printf("\nFor more information, see:\n")
fmt.Println(" https://github.com/redis/go-redis/tree/master/maintnotifications")
}

View File

@@ -0,0 +1,136 @@
package redis_test
import (
"context"
"fmt"
"github.com/redis/go-redis/v9"
)
func ExampleAutoPipeliner_cmdable() {
ctx := context.Background()
client := redis.NewClient(&redis.Options{
Addr: "localhost:6379",
})
defer client.Close()
// Create an autopipeliner
ap := client.AutoPipeline()
defer ap.Close()
// Use autopipeliner like a regular client - all commands are automatically batched!
// No need to call Do() - you can use typed methods directly
// String commands
ap.Set(ctx, "name", "Alice", 0)
ap.Set(ctx, "age", "30", 0)
// Hash commands
ap.HSet(ctx, "user:1", "name", "Bob", "email", "bob@example.com")
// List commands
ap.RPush(ctx, "tasks", "task1", "task2", "task3")
// Set commands
ap.SAdd(ctx, "tags", "go", "redis", "autopipeline")
// Sorted set commands
ap.ZAdd(ctx, "scores",
redis.Z{Score: 100, Member: "player1"},
redis.Z{Score: 200, Member: "player2"},
)
// Get results - commands are executed automatically when you access results
name, _ := ap.Get(ctx, "name").Result()
age, _ := ap.Get(ctx, "age").Result()
user, _ := ap.HGetAll(ctx, "user:1").Result()
tasks, _ := ap.LRange(ctx, "tasks", 0, -1).Result()
tags, _ := ap.SMembers(ctx, "tags").Result()
scores, _ := ap.ZRangeWithScores(ctx, "scores", 0, -1).Result()
fmt.Println("Name:", name)
fmt.Println("Age:", age)
fmt.Println("User:", user)
fmt.Println("Tasks:", tasks)
fmt.Println("Tags count:", len(tags))
fmt.Println("Scores count:", len(scores))
// Output:
// Name: Alice
// Age: 30
// User: map[email:bob@example.com name:Bob]
// Tasks: [task1 task2 task3]
// Tags count: 3
// Scores count: 2
}
func ExampleAutoPipeliner_mixedUsage() {
ctx := context.Background()
client := redis.NewClient(&redis.Options{
Addr: "localhost:6379",
})
defer client.Close()
ap := client.AutoPipeline()
defer ap.Close()
// You can mix autopipelined commands with traditional pipelines
// Autopipelined commands (batched automatically)
ap.Set(ctx, "auto1", "value1", 0)
ap.Set(ctx, "auto2", "value2", 0)
// Traditional pipeline (explicit batching)
pipe := ap.Pipeline()
pipe.Set(ctx, "pipe1", "value1", 0)
pipe.Set(ctx, "pipe2", "value2", 0)
pipe.Exec(ctx)
// Pipelined helper (convenience method)
ap.Pipelined(ctx, func(pipe redis.Pipeliner) error {
pipe.Set(ctx, "helper1", "value1", 0)
pipe.Set(ctx, "helper2", "value2", 0)
return nil
})
fmt.Println("All commands executed successfully")
// Output:
// All commands executed successfully
}
func ExampleAutoPipeliner_genericFunction() {
ctx := context.Background()
client := redis.NewClient(&redis.Options{
Addr: "localhost:6379",
})
defer client.Close()
// AutoPipeliner implements Cmdable, so you can pass it to functions
// that accept any Redis client type
ap := client.AutoPipeline()
defer ap.Close()
// This function works with any Cmdable (Client, Pipeline, AutoPipeliner, etc.)
setUserData := func(c redis.Cmdable, userID string, name string, email string) error {
c.HSet(ctx, "user:"+userID, "name", name, "email", email)
c.SAdd(ctx, "users", userID)
return nil
}
// Use with autopipeliner - commands are batched automatically
setUserData(ap, "123", "Alice", "alice@example.com")
setUserData(ap, "456", "Bob", "bob@example.com")
// Verify
users, _ := ap.SMembers(ctx, "users").Result()
fmt.Println("Users count:", len(users))
// Output:
// Users count: 2
}

122
idle_conn_init_test.go Normal file
View File

@@ -0,0 +1,122 @@
package redis_test
import (
"context"
"fmt"
"sync"
"testing"
"time"
"github.com/redis/go-redis/v9"
"github.com/redis/go-redis/v9/internal/pool"
)
// TestIdleConnectionsAreInitialized verifies that connections created by MinIdleConns
// are properly initialized before being used (i.e., AUTH/HELLO/SELECT commands are executed).
func TestIdleConnectionsAreInitialized(t *testing.T) {
// Create client with MinIdleConns
opt := &redis.Options{
Addr: ":6379",
Password: "asdf",
DB: 1,
MinIdleConns: 5,
PoolSize: 10,
Protocol: 3,
MaxActiveConns: 50,
}
client := redis.NewClient(opt)
defer client.Close()
// Wait for minIdle connections to be created
time.Sleep(200 * time.Millisecond)
// Now use these connections - they should be properly initialized
// If they're not initialized, we'll get NOAUTH or WRONGDB errors
ctx := context.Background()
var wg sync.WaitGroup
errors := make(chan error, 200000)
start := time.Now()
for i := 0; i < 100; i++ {
wg.Add(1)
go func(id int) {
defer wg.Done()
// Each goroutine performs multiple operations
for j := 0; j < 2000; j++ {
key := fmt.Sprintf("test_key_%d_%d", id, j)
// This will fail with NOAUTH if connection is not initialized
err := client.Set(ctx, key, "value", 0).Err()
if err != nil {
errors <- fmt.Errorf("SET failed for %s: %w", key, err)
return
}
val, err := client.Get(ctx, key).Result()
if err != nil {
errors <- fmt.Errorf("GET failed for %s: %w", key, err)
return
}
if val != "value" {
errors <- fmt.Errorf("GET returned wrong value for %s: got %s, want 'value'", key, val)
return
}
err = client.Del(ctx, key).Err()
if err != nil {
errors <- fmt.Errorf("DEL failed for %s: %w", key, err)
return
}
}
}(i)
}
wg.Wait()
close(errors)
fmt.Printf("\nTOOK %s\n", time.Since(start))
// Check for errors
var errCount int
for err := range errors {
t.Errorf("Operation error: %v", err)
errCount++
}
if errCount > 0 {
t.Fatalf("Got %d errors during operations (likely NOAUTH or WRONGDB)", errCount)
}
// Verify final state
err := client.Ping(ctx).Err()
if err != nil {
t.Errorf("Final Ping failed: %v", err)
}
fmt.Printf("pool stats: %+v\n", client.PoolStats())
}
// testPoolHook implements pool.PoolHook for testing
type testPoolHook struct {
onGet func(ctx context.Context, conn *pool.Conn, isNewConn bool) (bool, error)
onPut func(ctx context.Context, conn *pool.Conn) (bool, bool, error)
onRemove func(ctx context.Context, conn *pool.Conn, reason error)
}
func (h *testPoolHook) OnGet(ctx context.Context, conn *pool.Conn, isNewConn bool) (bool, error) {
if h.onGet != nil {
return h.onGet(ctx, conn, isNewConn)
}
return true, nil
}
func (h *testPoolHook) OnPut(ctx context.Context, conn *pool.Conn) (bool, bool, error) {
if h.onPut != nil {
return h.onPut(ctx, conn)
}
return true, false, nil
}
func (h *testPoolHook) OnRemove(ctx context.Context, conn *pool.Conn, reason error) {
if h.onRemove != nil {
h.onRemove(ctx, conn, reason)
}
}

View File

@@ -33,12 +33,12 @@ var _ = Describe("Buffer Size Configuration", func() {
Expect(err).NotTo(HaveOccurred())
defer connPool.CloseConn(cn)
// Check that default buffer sizes are used (32KiB)
// Check that default buffer sizes are used (64KiB)
writerBufSize := getWriterBufSizeUnsafe(cn)
readerBufSize := getReaderBufSizeUnsafe(cn)
Expect(writerBufSize).To(Equal(proto.DefaultBufferSize)) // Default 32KiB buffer size
Expect(readerBufSize).To(Equal(proto.DefaultBufferSize)) // Default 32KiB buffer size
Expect(writerBufSize).To(Equal(proto.DefaultBufferSize)) // Default 64KiB buffer size
Expect(readerBufSize).To(Equal(proto.DefaultBufferSize)) // Default 64KiB buffer size
})
It("should use custom buffer sizes when specified", func() {
@@ -78,16 +78,16 @@ var _ = Describe("Buffer Size Configuration", func() {
Expect(err).NotTo(HaveOccurred())
defer connPool.CloseConn(cn)
// Check that default buffer sizes are used (32KiB)
// Check that default buffer sizes are used (64KiB)
writerBufSize := getWriterBufSizeUnsafe(cn)
readerBufSize := getReaderBufSizeUnsafe(cn)
Expect(writerBufSize).To(Equal(proto.DefaultBufferSize)) // Default 32KiB buffer size
Expect(readerBufSize).To(Equal(proto.DefaultBufferSize)) // Default 32KiB buffer size
Expect(writerBufSize).To(Equal(proto.DefaultBufferSize)) // Default 64KiB buffer size
Expect(readerBufSize).To(Equal(proto.DefaultBufferSize)) // Default 64KiB buffer size
})
It("should use 32KiB default buffer sizes for standalone NewConn", func() {
// Test that NewConn (without pool) also uses 32KiB buffers
It("should use 64KiB default buffer sizes for standalone NewConn", func() {
// Test that NewConn (without pool) also uses 64KiB buffers
netConn := newDummyConn()
cn := pool.NewConn(netConn)
defer cn.Close()
@@ -95,11 +95,11 @@ var _ = Describe("Buffer Size Configuration", func() {
writerBufSize := getWriterBufSizeUnsafe(cn)
readerBufSize := getReaderBufSizeUnsafe(cn)
Expect(writerBufSize).To(Equal(proto.DefaultBufferSize)) // Default 32KiB buffer size
Expect(readerBufSize).To(Equal(proto.DefaultBufferSize)) // Default 32KiB buffer size
Expect(writerBufSize).To(Equal(proto.DefaultBufferSize)) // Default 64KiB buffer size
Expect(readerBufSize).To(Equal(proto.DefaultBufferSize)) // Default 64KiB buffer size
})
It("should use 32KiB defaults even when pool is created directly without buffer sizes", func() {
It("should use 64KiB defaults even when pool is created directly without buffer sizes", func() {
// Test the scenario where someone creates a pool directly (like in tests)
// without setting ReadBufferSize and WriteBufferSize
connPool = pool.NewConnPool(&pool.Options{
@@ -113,12 +113,12 @@ var _ = Describe("Buffer Size Configuration", func() {
Expect(err).NotTo(HaveOccurred())
defer connPool.CloseConn(cn)
// Should still get 32KiB defaults because NewConnPool sets them
// Should still get 64KiB defaults because NewConnPool sets them
writerBufSize := getWriterBufSizeUnsafe(cn)
readerBufSize := getReaderBufSizeUnsafe(cn)
Expect(writerBufSize).To(Equal(proto.DefaultBufferSize)) // Default 32KiB buffer size
Expect(readerBufSize).To(Equal(proto.DefaultBufferSize)) // Default 32KiB buffer size
Expect(writerBufSize).To(Equal(proto.DefaultBufferSize)) // Default 64KiB buffer size
Expect(readerBufSize).To(Equal(proto.DefaultBufferSize)) // Default 64KiB buffer size
})
})

View File

@@ -18,9 +18,9 @@ import (
var noDeadline = time.Time{}
// Global time cache updated every 50ms by background goroutine.
// Global time cache updated every 100ms by background goroutine.
// This avoids expensive time.Now() syscalls in hot paths like getEffectiveReadTimeout.
// Max staleness: 50ms, which is acceptable for timeout deadline checks (timeouts are typically 3-30 seconds).
// Max staleness: 100ms, which is acceptable for timeout deadline checks (timeouts are typically 3-30 seconds).
var globalTimeCache struct {
nowNs atomic.Int64
}
@@ -31,7 +31,7 @@ func init() {
// Start background updater
go func() {
ticker := time.NewTicker(50 * time.Millisecond)
ticker := time.NewTicker(100 * time.Millisecond)
defer ticker.Stop()
for range ticker.C {
@@ -41,12 +41,20 @@ func init() {
}
// getCachedTimeNs returns the current time in nanoseconds from the global cache.
// This is updated every 50ms by a background goroutine, avoiding expensive syscalls.
// Max staleness: 50ms.
// This is updated every 100ms by a background goroutine, avoiding expensive syscalls.
// Max staleness: 100ms.
func getCachedTimeNs() int64 {
return globalTimeCache.nowNs.Load()
}
// GetCachedTimeNs returns the current time in nanoseconds from the global cache.
// This is updated every 100ms by a background goroutine, avoiding expensive syscalls.
// Max staleness: 100ms.
// Exported for use by other packages that need fast time access.
func GetCachedTimeNs() int64 {
return getCachedTimeNs()
}
// Global atomic counter for connection IDs
var connIDCounter uint64
@@ -170,6 +178,9 @@ func (cn *Conn) UsedAt() time.Time {
unixNano := atomic.LoadInt64(&cn.usedAt)
return time.Unix(0, unixNano)
}
func (cn *Conn) UsedAtNs() int64 {
return atomic.LoadInt64(&cn.usedAt)
}
func (cn *Conn) SetUsedAt(tm time.Time) {
atomic.StoreInt64(&cn.usedAt, tm.UnixNano())
@@ -488,7 +499,7 @@ func (cn *Conn) getEffectiveReadTimeout(normalTimeout time.Duration) time.Durati
return time.Duration(readTimeoutNs)
}
// Use cached time to avoid expensive syscall (max 50ms staleness is acceptable for timeout checks)
// Use cached time to avoid expensive syscall (max 100ms staleness is acceptable for timeout checks)
nowNs := getCachedTimeNs()
// Check if deadline has passed
if nowNs < deadlineNs {
@@ -522,7 +533,7 @@ func (cn *Conn) getEffectiveWriteTimeout(normalTimeout time.Duration) time.Durat
return time.Duration(writeTimeoutNs)
}
// Use cached time to avoid expensive syscall (max 50ms staleness is acceptable for timeout checks)
// Use cached time to avoid expensive syscall (max 100ms staleness is acceptable for timeout checks)
nowNs := getCachedTimeNs()
// Check if deadline has passed
if nowNs < deadlineNs {
@@ -699,20 +710,22 @@ func (cn *Conn) GetStateMachine() *ConnStateMachine {
// TryAcquire attempts to acquire the connection for use.
// This is an optimized inline method for the hot path (Get operation).
//
// It tries to transition from IDLE -> IN_USE or CREATED -> IN_USE.
// It tries to transition from IDLE -> IN_USE or CREATED -> CREATED.
// Returns true if the connection was successfully acquired, false otherwise.
// The CREATED->CREATED is done so we can keep the state correct for later
// initialization of the connection in initConn.
//
// Performance: This is faster than calling GetStateMachine() + TryTransitionFast()
//
// NOTE: We directly access cn.stateMachine.state here instead of using the state machine's
// methods. This breaks encapsulation but is necessary for performance.
// The IDLE->IN_USE and CREATED->IN_USE transitions don't need
// The IDLE->IN_USE and CREATED->CREATED transitions don't need
// waiter notification, and benchmarks show 1-3% improvement. If the state machine ever
// needs to notify waiters on these transitions, update this to use TryTransitionFast().
func (cn *Conn) TryAcquire() bool {
// The || operator short-circuits, so only 1 CAS in the common case
return cn.stateMachine.state.CompareAndSwap(uint32(StateIdle), uint32(StateInUse)) ||
cn.stateMachine.state.CompareAndSwap(uint32(StateCreated), uint32(StateInUse))
cn.stateMachine.state.CompareAndSwap(uint32(StateCreated), uint32(StateCreated))
}
// Release releases the connection back to the pool.
@@ -877,7 +890,7 @@ func (cn *Conn) MaybeHasData() bool {
// deadline computes the effective deadline time based on context and timeout.
// It updates the usedAt timestamp to now.
// Uses cached time to avoid expensive syscall (max 50ms staleness is acceptable for deadline calculation).
// Uses cached time to avoid expensive syscall (max 100ms staleness is acceptable for deadline calculation).
func (cn *Conn) deadline(ctx context.Context, timeout time.Duration) time.Time {
// Use cached time for deadline calculation (called 2x per command: read + write)
tm := time.Unix(0, getCachedTimeNs())

View File

@@ -30,7 +30,7 @@ func connCheck(conn net.Conn) error {
var sysErr error
if err := rawConn.Read(func(fd uintptr) bool {
if err := rawConn.Control(func(fd uintptr) {
var buf [1]byte
// Use MSG_PEEK to peek at data without consuming it
n, _, err := syscall.Recvfrom(int(fd), buf[:], syscall.MSG_PEEK|syscall.MSG_DONTWAIT)
@@ -45,7 +45,6 @@ func connCheck(conn net.Conn) error {
default:
sysErr = err
}
return true
}); err != nil {
return err
}

View File

@@ -155,10 +155,18 @@ type ConnPool struct {
var _ Pooler = (*ConnPool)(nil)
func NewConnPool(opt *Options) *ConnPool {
p := &ConnPool{
cfg: opt,
semSize := opt.PoolSize
if opt.MaxActiveConns > 0 && opt.MaxActiveConns < opt.PoolSize {
if opt.MaxActiveConns < opt.PoolSize {
opt.MaxActiveConns = opt.PoolSize
}
semSize = opt.MaxActiveConns
}
//semSize = opt.PoolSize
semaphore: internal.NewFastSemaphore(opt.PoolSize),
p := &ConnPool{
cfg: opt,
semaphore: internal.NewFastSemaphore(semSize),
conns: make(map[uint64]*Conn),
idleConns: make([]*Conn, 0, opt.PoolSize),
}

View File

@@ -8,12 +8,15 @@ import (
"math"
"math/big"
"strconv"
"sync"
"github.com/redis/go-redis/v9/internal/util"
)
// DefaultBufferSize is the default size for read/write buffers (32 KiB).
const DefaultBufferSize = 32 * 1024
// DefaultBufferSize is the default size for read/write buffers (64 KiB).
// This is a balance between memory usage and performance.
// For high-throughput scenarios, consider using 512 KiB.
const DefaultBufferSize = 64 * 1024
// redis resp protocol data type.
const (
@@ -55,6 +58,15 @@ func ParseErrorReply(line []byte) error {
//------------------------------------------------------------------------------
// Buffer pool for string reply parsing to reduce allocations
var stringReplyBufPool = sync.Pool{
New: func() interface{} {
// Start with 2KB buffer - will grow as needed
b := make([]byte, 2*1024)
return &b
},
}
type Reader struct {
rd *bufio.Reader
}
@@ -314,13 +326,34 @@ func (r *Reader) readStringReply(line []byte) (string, error) {
return "", err
}
b := make([]byte, n+2)
_, err = io.ReadFull(r.rd, b)
// Get buffer from pool
bufPtr := stringReplyBufPool.Get().(*[]byte)
buf := *bufPtr
// Resize if needed (grow capacity if buffer is too small)
if cap(buf) < n+2 {
buf = make([]byte, n+2)
} else {
buf = buf[:n+2]
}
_, err = io.ReadFull(r.rd, buf)
if err != nil {
// Return buffer to pool even on error
*bufPtr = buf
stringReplyBufPool.Put(bufPtr)
return "", err
}
return util.BytesToString(b[:n]), nil
// Must copy to string since we're returning the buffer to pool
// This is still faster than allocating a new []byte every time
result := string(buf[:n])
// Return buffer to pool
*bufPtr = buf
stringReplyBufPool.Put(bufPtr)
return result, nil
}
func (r *Reader) readVerb(line []byte) (string, error) {
@@ -471,7 +504,8 @@ func (r *Reader) ReadString() (string, error) {
switch line[0] {
case RespStatus, RespInt, RespFloat:
return string(line[1:]), nil
// Use BytesToString for zero-copy conversion when possible
return util.BytesToString(line[1:]), nil
case RespString:
return r.readStringReply(line)
case RespBool:

View File

@@ -0,0 +1,80 @@
package proto
import (
"bytes"
"fmt"
"testing"
)
// BenchmarkReadStringReply benchmarks the optimized readStringReply with buffer pooling
func BenchmarkReadStringReply(b *testing.B) {
sizes := []int{10, 50, 100, 500, 1000, 5000}
for _, size := range sizes {
b.Run(fmt.Sprintf("size_%d", size), func(b *testing.B) {
// Create a RESP bulk string reply
value := bytes.Repeat([]byte("x"), size)
reply := fmt.Sprintf("$%d\r\n%s\r\n", size, value)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
r := NewReader(bytes.NewReader([]byte(reply)))
line, err := r.readLine()
if err != nil {
b.Fatal(err)
}
_, err = r.readStringReply(line)
if err != nil {
b.Fatal(err)
}
}
})
}
}
// BenchmarkReadString benchmarks the optimized ReadString with BytesToString
func BenchmarkReadString(b *testing.B) {
testCases := []struct {
name string
reply string
}{
{"status", "+OK\r\n"},
{"int", ":42\r\n"},
{"small_string", "$5\r\nhello\r\n"},
{"medium_string", "$100\r\n" + string(bytes.Repeat([]byte("x"), 100)) + "\r\n"},
{"large_string", "$1000\r\n" + string(bytes.Repeat([]byte("x"), 1000)) + "\r\n"},
}
for _, tc := range testCases {
b.Run(tc.name, func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
r := NewReader(bytes.NewReader([]byte(tc.reply)))
_, err := r.ReadString()
if err != nil {
b.Fatal(err)
}
}
})
}
}
// BenchmarkReadStringParallel benchmarks concurrent ReadString calls
func BenchmarkReadStringParallel(b *testing.B) {
reply := "$100\r\n" + string(bytes.Repeat([]byte("x"), 100)) + "\r\n"
b.ReportAllocs()
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
r := NewReader(bytes.NewReader([]byte(reply)))
_, err := r.ReadString()
if err != nil {
b.Fatal(err)
}
}
})
}

View File

@@ -69,7 +69,7 @@ var RCEDocker = false
// Notes version of redis we are executing tests against.
// This can be used before we change the bsm fork of ginkgo for one,
// which have support for label sets, so we can filter tests per redis version.
var RedisVersion float64 = 8.2
var RedisVersion float64 = 8.4
func SkipBeforeRedisVersion(version float64, msg string) {
if RedisVersion < version {
@@ -96,7 +96,7 @@ var _ = BeforeSuite(func() {
RedisVersion, _ = strconv.ParseFloat(strings.Trim(os.Getenv("REDIS_VERSION"), "\""), 64)
if RedisVersion == 0 {
RedisVersion = 8.2
RedisVersion = 8.4
}
fmt.Printf("RECluster: %v\n", RECluster)

View File

@@ -319,6 +319,7 @@ func (cf *ClientFactory) Create(key string, options *CreateClientOptions) (redis
}
var client redis.UniversalClient
var opts interface{}
// Determine if this is a cluster configuration
if len(cf.config.Endpoints) > 1 || cf.isClusterEndpoint() {
@@ -349,6 +350,7 @@ func (cf *ClientFactory) Create(key string, options *CreateClientOptions) (redis
}
}
opts = clusterOptions
client = redis.NewClusterClient(clusterOptions)
} else {
// Create single client
@@ -379,9 +381,14 @@ func (cf *ClientFactory) Create(key string, options *CreateClientOptions) (redis
}
}
opts = clientOptions
client = redis.NewClient(clientOptions)
}
if err := client.Ping(context.Background()).Err(); err != nil {
return nil, fmt.Errorf("failed to connect to Redis: %w\nOptions: %+v", err, opts)
}
// Store the client
cf.clients[key] = client
@@ -832,7 +839,6 @@ func (m *TestDatabaseManager) DeleteDatabase(ctx context.Context) error {
return fmt.Errorf("failed to trigger database deletion: %w", err)
}
// Wait for deletion to complete
status, err := m.faultInjector.WaitForAction(ctx, resp.ActionID,
WithMaxWaitTime(2*time.Minute),

View File

@@ -145,17 +145,25 @@ type Options struct {
ContextTimeoutEnabled bool
// ReadBufferSize is the size of the bufio.Reader buffer for each connection.
// Larger buffers can improve performance for commands that return large responses.
// Buffers are allocated once per connection and persist for the connection's lifetime.
//
// Larger buffers can significantly improve performance for commands that return large responses.
// For high-throughput scenarios, consider using 512 KiB.
//
// Smaller buffers can improve memory usage for larger pools.
//
// default: 32KiB (32768 bytes)
// default: 64 KiB (65536 bytes)
ReadBufferSize int
// WriteBufferSize is the size of the bufio.Writer buffer for each connection.
// Larger buffers can improve performance for large pipelines and commands with many arguments.
// Buffers are allocated once per connection and persist for the connection's lifetime.
//
// Larger buffers can significantly improve performance for large pipelines and commands with many arguments.
// For high-throughput scenarios, consider using 512 KiB.
//
// Smaller buffers can improve memory usage for larger pools.
//
// default: 32KiB (32768 bytes)
// default: 64 KiB (65536 bytes)
WriteBufferSize int
// PoolFIFO type of connection pool.

View File

@@ -102,17 +102,25 @@ type ClusterOptions struct {
ConnMaxLifetime time.Duration
// ReadBufferSize is the size of the bufio.Reader buffer for each connection.
// Larger buffers can improve performance for commands that return large responses.
// Buffers are allocated once per connection and persist for the connection's lifetime.
//
// Larger buffers can significantly improve performance for commands that return large responses.
// For high-throughput scenarios, consider using 512 KiB.
//
// Smaller buffers can improve memory usage for larger pools.
//
// default: 32KiB (32768 bytes)
// default: 64 KiB (65536 bytes)
ReadBufferSize int
// WriteBufferSize is the size of the bufio.Writer buffer for each connection.
// Larger buffers can improve performance for large pipelines and commands with many arguments.
// Buffers are allocated once per connection and persist for the connection's lifetime.
//
// Larger buffers can significantly improve performance for large pipelines and commands with many arguments.
// For high-throughput scenarios, consider using 512 KiB.
//
// Smaller buffers can improve memory usage for larger pools.
//
// default: 32KiB (32768 bytes)
// default: 64 KiB (65536 bytes)
WriteBufferSize int
TLSConfig *tls.Config

View File

@@ -1373,13 +1373,39 @@ func (c *Conn) TxPipeline() Pipeliner {
// processPushNotifications processes all pending push notifications on a connection
// This ensures that cluster topology changes are handled immediately before the connection is used
// This method should be called by the client before using WithReader for command execution
// This method should be called by the client before using WithWriter for command execution
//
// Performance optimization: Skip the expensive MaybeHasData() syscall if a health check
// was performed recently (within 5 seconds). The health check already verified the connection
// is healthy and checked for unexpected data (push notifications).
func (c *baseClient) processPushNotifications(ctx context.Context, cn *pool.Conn) error {
// Only process push notifications for RESP3 connections with a processor
// Also check if there is any data to read before processing
// Which is an optimization on UNIX systems where MaybeHasData is a syscall
if c.opt.Protocol != 3 || c.pushProcessor == nil {
return nil
}
// Performance optimization: Skip MaybeHasData() syscall if health check was recent
// If the connection was health-checked within the last 5 seconds, we can skip the
// expensive syscall since the health check already verified no unexpected data.
// This is safe because:
// 1. Health check (connCheck) uses the same syscall (Recvfrom with MSG_PEEK)
// 2. If push notifications arrived, they would have been detected by health check
// 3. 5 seconds is short enough that connection state is still fresh
// 4. Push notifications will be processed by the next WithReader call
lastHealthCheckNs := cn.UsedAtNs()
if lastHealthCheckNs > 0 {
// Use pool's cached time to avoid expensive time.Now() syscall
nowNs := pool.GetCachedTimeNs()
if nowNs-lastHealthCheckNs < int64(5*time.Second) {
// Recent health check confirmed no unexpected data, skip the syscall
return nil
}
}
// Check if there is any data to read before processing
// This is an optimization on UNIX systems where MaybeHasData is a syscall
// On Windows, MaybeHasData always returns true, so this check is a no-op
if c.opt.Protocol != 3 || c.pushProcessor == nil || !cn.MaybeHasData() {
if !cn.MaybeHasData() {
return nil
}

View File

@@ -245,6 +245,52 @@ var _ = Describe("Client", func() {
Expect(val).Should(HaveKeyWithValue("proto", int64(3)))
})
It("should initialize idle connections created by MinIdleConns", func() {
opt := redisOptions()
opt.MinIdleConns = 5
opt.Password = "asdf" // Set password to require AUTH
opt.DB = 1 // Set DB to require SELECT
db := redis.NewClient(opt)
defer func() {
Expect(db.Close()).NotTo(HaveOccurred())
}()
// Wait for minIdle connections to be created
time.Sleep(100 * time.Millisecond)
// Verify that idle connections were created
stats := db.PoolStats()
Expect(stats.IdleConns).To(BeNumerically(">=", 5))
// Now use these connections - they should be properly initialized
// If they're not initialized, we'll get NOAUTH or WRONGDB errors
var wg sync.WaitGroup
for i := 0; i < 10; i++ {
wg.Add(1)
go func(id int) {
defer wg.Done()
// Each goroutine performs multiple operations
for j := 0; j < 5; j++ {
key := fmt.Sprintf("test_key_%d_%d", id, j)
err := db.Set(ctx, key, "value", 0).Err()
Expect(err).NotTo(HaveOccurred())
val, err := db.Get(ctx, key).Result()
Expect(err).NotTo(HaveOccurred())
Expect(val).To(Equal("value"))
err = db.Del(ctx, key).Err()
Expect(err).NotTo(HaveOccurred())
}
}(i)
}
wg.Wait()
// Verify no errors occurred
Expect(db.Ping(ctx).Err()).NotTo(HaveOccurred())
})
It("processes custom commands", func() {
cmd := redis.NewCmd(ctx, "PING")
_ = client.Process(ctx, cmd)

16
ring.go
View File

@@ -125,17 +125,25 @@ type RingOptions struct {
ConnMaxLifetime time.Duration
// ReadBufferSize is the size of the bufio.Reader buffer for each connection.
// Larger buffers can improve performance for commands that return large responses.
// Buffers are allocated once per connection and persist for the connection's lifetime.
//
// Larger buffers can significantly improve performance for commands that return large responses.
// For high-throughput scenarios, consider using 512 KiB.
//
// Smaller buffers can improve memory usage for larger pools.
//
// default: 32KiB (32768 bytes)
// default: 64 KiB (65536 bytes)
ReadBufferSize int
// WriteBufferSize is the size of the bufio.Writer buffer for each connection.
// Larger buffers can improve performance for large pipelines and commands with many arguments.
// Buffers are allocated once per connection and persist for the connection's lifetime.
//
// Larger buffers can significantly improve performance for large pipelines and commands with many arguments.
// For high-throughput scenarios, consider using 512 KiB.
//
// Smaller buffers can improve memory usage for larger pools.
//
// default: 32KiB (32768 bytes)
// default: 64 KiB (65536 bytes)
WriteBufferSize int
TLSConfig *tls.Config

View File

@@ -94,17 +94,25 @@ type FailoverOptions struct {
ContextTimeoutEnabled bool
// ReadBufferSize is the size of the bufio.Reader buffer for each connection.
// Larger buffers can improve performance for commands that return large responses.
// Buffers are allocated once per connection and persist for the connection's lifetime.
//
// Larger buffers can significantly improve performance for commands that return large responses.
// For high-throughput scenarios, consider using 512 KiB.
//
// Smaller buffers can improve memory usage for larger pools.
//
// default: 32KiB (32768 bytes)
// default: 64 KiB (65536 bytes)
ReadBufferSize int
// WriteBufferSize is the size of the bufio.Writer buffer for each connection.
// Larger buffers can improve performance for large pipelines and commands with many arguments.
// Buffers are allocated once per connection and persist for the connection's lifetime.
//
// Larger buffers can significantly improve performance for large pipelines and commands with many arguments.
// For high-throughput scenarios, consider using 512 KiB.
//
// Smaller buffers can improve memory usage for larger pools.
//
// default: 32KiB (32768 bytes)
// default: 64 KiB (65536 bytes)
WriteBufferSize int
PoolFIFO bool

View File

@@ -63,17 +63,25 @@ type UniversalOptions struct {
ContextTimeoutEnabled bool
// ReadBufferSize is the size of the bufio.Reader buffer for each connection.
// Larger buffers can improve performance for commands that return large responses.
// Buffers are allocated once per connection and persist for the connection's lifetime.
//
// Larger buffers can significantly improve performance for commands that return large responses.
// For high-throughput scenarios, consider using 512 KiB.
//
// Smaller buffers can improve memory usage for larger pools.
//
// default: 32KiB (32768 bytes)
// default: 64 KiB (65536 bytes)
ReadBufferSize int
// WriteBufferSize is the size of the bufio.Writer buffer for each connection.
// Larger buffers can improve performance for large pipelines and commands with many arguments.
// Buffers are allocated once per connection and persist for the connection's lifetime.
//
// Larger buffers can significantly improve performance for large pipelines and commands with many arguments.
// For high-throughput scenarios, consider using 512 KiB.
//
// Smaller buffers can improve memory usage for larger pools.
//
// default: 32KiB (32768 bytes)
// default: 64 KiB (65536 bytes)
WriteBufferSize int
// PoolFIFO uses FIFO mode for each node connection pool GET/PUT (default LIFO).