From 9448059c01adc988a840b4e0de80b42c620a46f6 Mon Sep 17 00:00:00 2001 From: Nedyalko Dyakov Date: Tue, 28 Oct 2025 00:39:13 +0200 Subject: [PATCH] initConn sets IDLE state - Handle unexpected conn state changes --- internal/pool/conn.go | 2 ++ internal/pool/pool.go | 20 ++++++++++++++++---- maintnotifications/handoff_worker.go | 2 ++ redis.go | 6 ++++++ 4 files changed, 26 insertions(+), 4 deletions(-) diff --git a/internal/pool/conn.go b/internal/pool/conn.go index 56be7098..5ba3db41 100644 --- a/internal/pool/conn.go +++ b/internal/pool/conn.go @@ -603,6 +603,7 @@ func (cn *Conn) SetNetConnAndInitConn(ctx context.Context, netConn net.Conn) err // Execute initialization // NOTE: ExecuteInitConn (via baseClient.initConn) will transition to IDLE on success // or CLOSED on failure. We don't need to do it here. + // NOTE: Initconn returns conn in IDLE state initErr := cn.ExecuteInitConn(ctx) if initErr != nil { // ExecuteInitConn already transitioned to CLOSED, just return the error @@ -745,6 +746,7 @@ func (cn *Conn) ClearHandoffState() { // Mark connection as usable again // Use state machine directly instead of deprecated SetUsable + // probably done by initConn cn.stateMachine.Transition(StateIdle) } diff --git a/internal/pool/pool.go b/internal/pool/pool.go index 2dedca05..5df4962b 100644 --- a/internal/pool/pool.go +++ b/internal/pool/pool.go @@ -684,11 +684,22 @@ func (p *ConnPool) putConn(ctx context.Context, cn *Conn, freeTurn bool) { // Using inline Release() method for better performance (avoids pointer dereference) transitionedToIdle := cn.Release() + // Handle unexpected state changes if !transitionedToIdle { // Fast path failed - hook might have changed state (e.g., to UNUSABLE for handoff) // Keep the state set by the hook and pool the connection anyway currentState := cn.GetStateMachine().GetState() - internal.Logger.Printf(ctx, "Connection state changed by hook to %v, pooling as-is", currentState) + switch currentState { + case StateUnusable: + // expected state, don't log it + case StateClosed: + internal.Logger.Printf(ctx, "Unexpected conn[%d] state changed by hook to %v, closing it", cn.GetID(), currentState) + shouldCloseConn = true + p.removeConnWithLock(cn) + default: + // Pool as-is + internal.Logger.Printf(ctx, "Unexpected conn[%d] state changed by hook to %v, pooling as-is", cn.GetID(), currentState) + } } // unusable conns are expected to become usable at some point (background process is reconnecting them) @@ -704,15 +715,16 @@ func (p *ConnPool) putConn(ctx context.Context, cn *Conn, freeTurn bool) { p.idleConns = append([]*Conn{cn}, p.idleConns...) p.connsMu.Unlock() } - } else { + p.idleConnsLen.Add(1) + } else if !shouldCloseConn { p.connsMu.Lock() p.idleConns = append(p.idleConns, cn) p.connsMu.Unlock() + p.idleConnsLen.Add(1) } - p.idleConnsLen.Add(1) } else { - p.removeConnWithLock(cn) shouldCloseConn = true + p.removeConnWithLock(cn) } if freeTurn { diff --git a/maintnotifications/handoff_worker.go b/maintnotifications/handoff_worker.go index 2fdeec16..5b60e39b 100644 --- a/maintnotifications/handoff_worker.go +++ b/maintnotifications/handoff_worker.go @@ -456,6 +456,8 @@ func (hwm *handoffWorkerManager) performHandoffInternal( // - set the connection as usable again // - clear the handoff state (shouldHandoff, endpoint, seqID) // - reset the handoff retries to 0 + // Note: Theoretically there may be a short window where the connection is in the pool + // and IDLE (initConn completed) but still has handoff state set. conn.ClearHandoffState() internal.Logger.Printf(ctx, logs.HandoffSucceeded(connID, newEndpoint)) diff --git a/redis.go b/redis.go index a355531c..1f4b0224 100644 --- a/redis.go +++ b/redis.go @@ -298,6 +298,12 @@ func (c *baseClient) _getConn(ctx context.Context) (*pool.Conn, error) { return nil, err } + // initConn will transition to IDLE state, so we need to acquire it + // before returning it to the user. + if !cn.TryAcquire() { + return nil, fmt.Errorf("redis: connection is not usable") + } + return cn, nil }