From 4edf49429306016e69e7deff8f54d469b1336413 Mon Sep 17 00:00:00 2001 From: ofekshenawa <104765379+ofekshenawa@users.noreply.github.com> Date: Wed, 10 Dec 2025 18:14:05 +0200 Subject: [PATCH] fix(retry): Add retry mechanism for NOREPLICAS error (#3647) --- error.go | 14 +++++++++++ error_test.go | 3 ++- error_wrapping_test.go | 14 ++++++++--- internal/proto/redis_errors.go | 39 +++++++++++++++++++++++++++++ internal/proto/redis_errors_test.go | 20 +++++++++------ 5 files changed, 78 insertions(+), 12 deletions(-) diff --git a/error.go b/error.go index 12b5604d..d2462a49 100644 --- a/error.go +++ b/error.go @@ -124,6 +124,9 @@ func shouldRetry(err error, retryTimeout bool) bool { if proto.IsTryAgainError(err) { return true } + if proto.IsNoReplicasError(err) { + return true + } // Fallback to string checking for backward compatibility with plain errors s := err.Error() @@ -145,6 +148,9 @@ func shouldRetry(err error, retryTimeout bool) bool { if strings.HasPrefix(s, "MASTERDOWN ") { return true } + if strings.HasPrefix(s, "NOREPLICAS ") { + return true + } return false } @@ -342,6 +348,14 @@ func IsOOMError(err error) bool { return proto.IsOOMError(err) } +// IsNoReplicasError checks if an error is a Redis NOREPLICAS error, even if wrapped. +// NOREPLICAS errors occur when not enough replicas acknowledge a write operation. +// This typically happens with WAIT/WAITAOF commands or CLUSTER SETSLOT with synchronous +// replication when the required number of replicas cannot confirm the write within the timeout. +func IsNoReplicasError(err error) bool { + return proto.IsNoReplicasError(err) +} + //------------------------------------------------------------------------------ type timeoutError interface { diff --git a/error_test.go b/error_test.go index 6c130d38..5accb0e0 100644 --- a/error_test.go +++ b/error_test.go @@ -45,7 +45,8 @@ var _ = Describe("error", func() { proto.ParseErrorReply([]byte("-READONLY You can't write against a read only replica")): true, proto.ParseErrorReply([]byte("-CLUSTERDOWN The cluster is down")): true, proto.ParseErrorReply([]byte("-TRYAGAIN Command cannot be processed, please try again")): true, - proto.ParseErrorReply([]byte("-ERR other")): false, + proto.ParseErrorReply([]byte("-NOREPLICAS Not enough good replicas to write")): true, + proto.ParseErrorReply([]byte("-ERR other")): false, } for err, expected := range data { diff --git a/error_wrapping_test.go b/error_wrapping_test.go index 574d1379..757aa1da 100644 --- a/error_wrapping_test.go +++ b/error_wrapping_test.go @@ -239,10 +239,10 @@ func TestErrorWrappingInHookScenario(t *testing.T) { // TestShouldRetryWithTypedErrors tests that shouldRetry works with typed errors func TestShouldRetryWithTypedErrors(t *testing.T) { tests := []struct { - name string - errorMsg string - shouldRetry bool - retryTimeout bool + name string + errorMsg string + shouldRetry bool + retryTimeout bool }{ { name: "LOADING error should retry", @@ -280,6 +280,12 @@ func TestShouldRetryWithTypedErrors(t *testing.T) { shouldRetry: true, retryTimeout: false, }, + { + name: "NOREPLICAS error should retry", + errorMsg: "NOREPLICAS Not enough good replicas to write", + shouldRetry: true, + retryTimeout: false, + }, } for _, tt := range tests { diff --git a/internal/proto/redis_errors.go b/internal/proto/redis_errors.go index f553e2f9..a28240f5 100644 --- a/internal/proto/redis_errors.go +++ b/internal/proto/redis_errors.go @@ -212,6 +212,25 @@ func NewOOMError(msg string) *OOMError { return &OOMError{msg: msg} } +// NoReplicasError is returned when not enough replicas acknowledge a write. +// This error occurs when using WAIT/WAITAOF commands or CLUSTER SETSLOT with +// synchronous replication, and the required number of replicas cannot confirm +// the write within the timeout period. +type NoReplicasError struct { + msg string +} + +func (e *NoReplicasError) Error() string { + return e.msg +} + +func (e *NoReplicasError) RedisError() {} + +// NewNoReplicasError creates a new NoReplicasError with the given message. +func NewNoReplicasError(msg string) *NoReplicasError { + return &NoReplicasError{msg: msg} +} + // parseTypedRedisError parses a Redis error message and returns a typed error if applicable. // This function maintains backward compatibility by keeping the same error messages. func parseTypedRedisError(msg string) error { @@ -235,6 +254,8 @@ func parseTypedRedisError(msg string) error { return NewTryAgainError(msg) case strings.HasPrefix(msg, "MASTERDOWN "): return NewMasterDownError(msg) + case strings.HasPrefix(msg, "NOREPLICAS "): + return NewNoReplicasError(msg) case msg == "ERR max number of clients reached": return NewMaxClientsError(msg) case strings.HasPrefix(msg, "NOAUTH "), strings.HasPrefix(msg, "WRONGPASS "), strings.Contains(msg, "unauthenticated"): @@ -486,3 +507,21 @@ func IsOOMError(err error) bool { // Fallback to string checking for backward compatibility return strings.HasPrefix(err.Error(), "OOM ") } + +// IsNoReplicasError checks if an error is a NoReplicasError, even if wrapped. +func IsNoReplicasError(err error) bool { + if err == nil { + return false + } + var noReplicasErr *NoReplicasError + if errors.As(err, &noReplicasErr) { + return true + } + // Check if wrapped error is a RedisError with NOREPLICAS prefix + var redisErr RedisError + if errors.As(err, &redisErr) && strings.HasPrefix(redisErr.Error(), "NOREPLICAS ") { + return true + } + // Fallback to string checking for backward compatibility + return strings.HasPrefix(err.Error(), "NOREPLICAS ") +} diff --git a/internal/proto/redis_errors_test.go b/internal/proto/redis_errors_test.go index 452a4524..ba7eeb50 100644 --- a/internal/proto/redis_errors_test.go +++ b/internal/proto/redis_errors_test.go @@ -9,12 +9,12 @@ import ( // TestTypedRedisErrors tests that typed Redis errors are created correctly func TestTypedRedisErrors(t *testing.T) { tests := []struct { - name string - errorMsg string - expectedType interface{} - expectedMsg string - checkFunc func(error) bool - extractAddr func(error) string + name string + errorMsg string + expectedType interface{} + expectedMsg string + checkFunc func(error) bool + extractAddr func(error) string }{ { name: "LOADING error", @@ -132,6 +132,13 @@ func TestTypedRedisErrors(t *testing.T) { expectedMsg: "OOM command not allowed when used memory > 'maxmemory'", checkFunc: IsOOMError, }, + { + name: "NOREPLICAS error", + errorMsg: "NOREPLICAS Not enough good replicas to write", + expectedType: &NoReplicasError{}, + expectedMsg: "NOREPLICAS Not enough good replicas to write", + checkFunc: IsNoReplicasError, + }, } for _, tt := range tests { @@ -389,4 +396,3 @@ func TestBackwardCompatibility(t *testing.T) { }) } } -