From 3d0b4b1068018f624d5ef7c9f90b536ed58345b5 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Fri, 10 Jan 2025 13:18:04 -0600
Subject: [PATCH] Use a non-locking initial test in TAS_SPIN on AArch64.

Our testing showed that this is helpful at sufficiently high
contention levels and doesn't hurt performance on smaller machines.
The new TAS_SPIN macro for AArch64 is identical to the ones added
for PPC and x86_64 (see commits bc2a050d40 and b03d196be0).

Reported-by: Salvatore Dipietro
Reviewed-by: Jingtang Zhang, Andres Freund
Tested-by: Tom Lane
Discussion: https://postgr.es/m/ZxgDEb_VpWyNZKB_%40nathan
---
 src/include/storage/s_lock.h | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
index 516fffc53ab..2f73f9fcf57 100644
--- a/src/include/storage/s_lock.h
+++ b/src/include/storage/s_lock.h
@@ -263,18 +263,24 @@ tas(volatile slock_t *lock)
 
 #define S_UNLOCK(lock) __sync_lock_release(lock)
 
-/*
- * Using an ISB instruction to delay in spinlock loops appears beneficial on
- * high-core-count ARM64 processors.  It seems mostly a wash for smaller gear,
- * and ISB doesn't exist at all on pre-v7 ARM chips.
- */
 #if defined(__aarch64__)
 
+/*
+ * On ARM64, it's a win to use a non-locking test before the TAS proper.  It
+ * may be a win on 32-bit ARM, too, but nobody's tested it yet.
+ */
+#define TAS_SPIN(lock)	(*(lock) ? 1 : TAS(lock))
+
 #define SPIN_DELAY() spin_delay()
 
 static __inline__ void
 spin_delay(void)
 {
+	/*
+	 * Using an ISB instruction to delay in spinlock loops appears beneficial
+	 * on high-core-count ARM64 processors.  It seems mostly a wash for smaller
+	 * gear, and ISB doesn't exist at all on pre-v7 ARM chips.
+	 */
 	__asm__ __volatile__(
 		" isb;				\n");
 }