From 476e962af766e47f1c5df434533f0996df8f8c28 Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date: Thu, 20 Nov 2025 15:30:05 -0300
Subject: [PATCH] Add gmp-arch and udiv_qrnnd
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To enable “longlong.h” removal, the udiv_qrnnd is moved to a gmp-arch.h
file.  It allows each architecture to implement its own arch-specific
optimizations.  The generic implementation now uses a static inline,
which provides better type checking than the GNU extension to cast the
asm constraint (and it works better with clang).

Most of the architecture uses the generic implementation, which is
expanded from a macro, except for alpha, x86, m68k, sh, and sparc.
I kept that alpha, which uses out-of-the-line implementations and x86,
where there is no easy way to use the div{q} instruction from C code.
For the rest, the compiler generates good enough code.

The hppa also provides arch-specific implementations, but they are not
routed in “longlong.h” and thus never used.

Reviewed-by: Wilco Dijkstra  <Wilco.Dijkstra@arm.com>
---
 soft-fp/soft-fp.h                   |  19 +-
 stdio-common/_itoa.c                |   7 +-
 stdio-common/_itowa.c               |   7 +-
 stdlib/Makefile                     |   1 -
 stdlib/addmul_1.c                   |   1 +
 stdlib/divmod_1.c                   |   1 +
 stdlib/mod_1.c                      |   1 +
 stdlib/mul_1.c                      |   1 +
 stdlib/strtod_l.c                   |   1 +
 stdlib/submul_1.c                   |   1 +
 stdlib/udiv_qrnnd.c                 |  10 -
 sysdeps/alpha/Makefile              |   4 +
 sysdeps/alpha/gmp-arch.h            |  39 ++++
 sysdeps/generic/gmp-arch.h          | 100 ++++++++++
 sysdeps/hppa/hppa1.1/udiv_qrnnd.S   |  77 --------
 sysdeps/hppa/udiv_qrnnd.S           | 285 ----------------------------
 sysdeps/ieee754/dbl-64/dbl2mpn.c    |   1 +
 sysdeps/ieee754/ldbl-128/ldbl2mpn.c |   1 +
 sysdeps/ieee754/ldbl-96/ldbl2mpn.c  |   1 +
 sysdeps/wordsize-32/divdi3.c        |  10 +-
 sysdeps/x86/gmp-arch.h              |  52 +++++
 sysdeps/x86/ldbl2mpn.c              |   1 +
 22 files changed, 219 insertions(+), 402 deletions(-)
 delete mode 100644 stdlib/udiv_qrnnd.c
 create mode 100644 sysdeps/alpha/gmp-arch.h
 create mode 100644 sysdeps/generic/gmp-arch.h
 delete mode 100644 sysdeps/hppa/hppa1.1/udiv_qrnnd.S
 delete mode 100644 sysdeps/hppa/udiv_qrnnd.S
 create mode 100644 sysdeps/x86/gmp-arch.h

diff --git a/soft-fp/soft-fp.h b/soft-fp/soft-fp.h
index 1f08714831..4c0859fa06 100644
--- a/soft-fp/soft-fp.h
+++ b/soft-fp/soft-fp.h
@@ -316,22 +316,6 @@
 #include "op-8.h"
 #include "op-common.h"
 
-/* Sigh.  Silly things longlong.h needs.  */
-#define UWtype		_FP_W_TYPE
-#define W_TYPE_SIZE	_FP_W_TYPE_SIZE
-
-typedef int QItype __attribute__ ((mode (QI)));
-typedef int SItype __attribute__ ((mode (SI)));
-typedef int DItype __attribute__ ((mode (DI)));
-typedef unsigned int UQItype __attribute__ ((mode (QI)));
-typedef unsigned int USItype __attribute__ ((mode (SI)));
-typedef unsigned int UDItype __attribute__ ((mode (DI)));
-#if _FP_W_TYPE_SIZE == 32
-typedef unsigned int UHWtype __attribute__ ((mode (HI)));
-#elif _FP_W_TYPE_SIZE == 64
-typedef USItype UHWtype;
-#endif
-
 #ifndef CMPtype
 # define CMPtype	int
 #endif
@@ -341,7 +325,10 @@ typedef USItype UHWtype;
 
 #ifndef umul_ppmm
 # ifdef _LIBC
+#  include <gmp.h>
+#  include <stdlib/gmp-impl.h>
 #  include <stdlib/longlong.h>
+#  include <gmp-arch.h>
 # else
 #  include "longlong.h"
 # endif
diff --git a/stdio-common/_itoa.c b/stdio-common/_itoa.c
index 08859f0dd0..e9cbcfab3b 100644
--- a/stdio-common/_itoa.c
+++ b/stdio-common/_itoa.c
@@ -26,6 +26,7 @@
 #include <limits.h>
 #include <stdlib/gmp-impl.h>
 #include <stdlib/longlong.h>
+#include <gmp-arch.h>
 
 #include <_itoa.h>
 
@@ -308,8 +309,8 @@ _itoa (unsigned long long int value, char *buflim, unsigned int base,
 		if (big_normalization_steps == 0)
 		  xh = 0;
 		else
-		  xh = (mp_limb_t) (value >> 64 - big_normalization_steps);
-		xl = (mp_limb_t) (value >> 32 - big_normalization_steps);
+		  xh = (mp_limb_t) (value >> (64 - big_normalization_steps));
+		xl = (mp_limb_t) (value >> (32 - big_normalization_steps));
 		udiv_qrnnd (x1hi, r, xh, xl, big_base_norm);
 
 		xl = ((mp_limb_t) value) << big_normalization_steps;
@@ -320,7 +321,7 @@ _itoa (unsigned long long int value, char *buflim, unsigned int base,
 		  xh = x1hi;
 		else
 		  xh = ((x1hi << big_normalization_steps)
-			| (x1lo >> 32 - big_normalization_steps));
+			| (x1lo >> (32 - big_normalization_steps)));
 		xl = x1lo << big_normalization_steps;
 		udiv_qrnnd (t[0], x, xh, xl, big_base_norm);
 		t[1] = x >> big_normalization_steps;
diff --git a/stdio-common/_itowa.c b/stdio-common/_itowa.c
index 6443b293a4..2da7489502 100644
--- a/stdio-common/_itowa.c
+++ b/stdio-common/_itowa.c
@@ -21,6 +21,7 @@
 #include <limits.h>
 #include <stdlib/gmp-impl.h>
 #include <stdlib/longlong.h>
+#include <gmp-arch.h>
 
 #include <_itowa.h>
 
@@ -228,8 +229,8 @@ _itowa (unsigned long long int value, wchar_t *buflim, unsigned int base,
 		if (big_normalization_steps == 0)
 		  xh = 0;
 		else
-		  xh = (mp_limb_t) (value >> 64 - big_normalization_steps);
-		xl = (mp_limb_t) (value >> 32 - big_normalization_steps);
+		  xh = (mp_limb_t) (value >> (64 - big_normalization_steps));
+		xl = (mp_limb_t) (value >> (32 - big_normalization_steps));
 		udiv_qrnnd (x1hi, r, xh, xl, big_base_norm);
 
 		xl = ((mp_limb_t) value) << big_normalization_steps;
@@ -240,7 +241,7 @@ _itowa (unsigned long long int value, wchar_t *buflim, unsigned int base,
 		  xh = x1hi;
 		else
 		  xh = ((x1hi << big_normalization_steps)
-			| (x1lo >> 32 - big_normalization_steps));
+			| (x1lo >> (32 - big_normalization_steps)));
 		xl = x1lo << big_normalization_steps;
 		udiv_qrnnd (t[0], x, xh, xl, big_base_norm);
 		t[1] = x >> big_normalization_steps;
diff --git a/stdlib/Makefile b/stdlib/Makefile
index 545211dfbf..fdb7ab91cf 100644
--- a/stdlib/Makefile
+++ b/stdlib/Makefile
@@ -494,7 +494,6 @@ mpn-routines := \
   rshift \
   sub_n \
   submul_1 \
-  udiv_qrnnd \
   # mpn-routines
 mpn-headers = \
   asm-syntax.h \
diff --git a/stdlib/addmul_1.c b/stdlib/addmul_1.c
index 52411c3d10..a5bf7fd37e 100644
--- a/stdlib/addmul_1.c
+++ b/stdlib/addmul_1.c
@@ -24,6 +24,7 @@ along with the GNU MP Library; see the file COPYING.LIB.  If not, see
 #include <gmp.h>
 #include "gmp-impl.h"
 #include "longlong.h"
+#include <gmp-arch.h>
 
 mp_limb_t
 mpn_addmul_1 (register mp_ptr res_ptr, register mp_srcptr s1_ptr,
diff --git a/stdlib/divmod_1.c b/stdlib/divmod_1.c
index b91ab9e593..676145d727 100644
--- a/stdlib/divmod_1.c
+++ b/stdlib/divmod_1.c
@@ -28,6 +28,7 @@ along with the GNU MP Library; see the file COPYING.LIB.  If not, see
 #include <stdbit.h>
 #include "gmp-impl.h"
 #include "longlong.h"
+#include <gmp-arch.h>
 
 #ifndef UMUL_TIME
 #define UMUL_TIME 1
diff --git a/stdlib/mod_1.c b/stdlib/mod_1.c
index 74c1f6a521..be3ae3d648 100644
--- a/stdlib/mod_1.c
+++ b/stdlib/mod_1.c
@@ -25,6 +25,7 @@ along with the GNU MP Library; see the file COPYING.LIB.  If not, see
 #include <stdbit.h>
 #include "gmp-impl.h"
 #include "longlong.h"
+#include <gmp-arch.h>
 
 #ifndef UMUL_TIME
 #define UMUL_TIME 1
diff --git a/stdlib/mul_1.c b/stdlib/mul_1.c
index 225b4917d9..0a04a8eb1e 100644
--- a/stdlib/mul_1.c
+++ b/stdlib/mul_1.c
@@ -22,6 +22,7 @@ along with the GNU MP Library; see the file COPYING.LIB.  If not, see
 #include <gmp.h>
 #include "gmp-impl.h"
 #include "longlong.h"
+#include <gmp-arch.h>
 
 mp_limb_t
 mpn_mul_1 (register mp_ptr res_ptr, register mp_srcptr s1_ptr,
diff --git a/stdlib/strtod_l.c b/stdlib/strtod_l.c
index 5814475e40..7618adb285 100644
--- a/stdlib/strtod_l.c
+++ b/stdlib/strtod_l.c
@@ -83,6 +83,7 @@ extern double ____strtod_l_internal (const char *, char **, int, locale_t);
 #include <gmp.h>
 #include "gmp-impl.h"
 #include "longlong.h"
+#include "gmp-arch.h"
 #include "fpioconst.h"
 
 #include <assert.h>
diff --git a/stdlib/submul_1.c b/stdlib/submul_1.c
index 3eb4d015f5..0005c2e04e 100644
--- a/stdlib/submul_1.c
+++ b/stdlib/submul_1.c
@@ -24,6 +24,7 @@ along with the GNU MP Library; see the file COPYING.LIB.  If not, see
 #include <gmp.h>
 #include "gmp-impl.h"
 #include "longlong.h"
+#include <gmp-arch.h>
 
 mp_limb_t
 mpn_submul_1 (register mp_ptr res_ptr, register mp_srcptr s1_ptr,
diff --git a/stdlib/udiv_qrnnd.c b/stdlib/udiv_qrnnd.c
deleted file mode 100644
index d32796c04d..0000000000
--- a/stdlib/udiv_qrnnd.c
+++ /dev/null
@@ -1,10 +0,0 @@
-/* For some machines GNU MP needs to define an auxiliary function:
-
-   udiv_qrnnd (quotient, remainder, high_numerator, low_numerator, denominator)
-
-   Divides a two-word unsigned integer, composed by the integers
-   HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
-   in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
-   than DENOMINATOR for correct operation.  If, in addition, the most
-   significant bit of DENOMINATOR must be 1, then the pre-processor symbol
-   UDIV_NEEDS_NORMALIZATION is defined to 1.  */
diff --git a/sysdeps/alpha/Makefile b/sysdeps/alpha/Makefile
index 39dea0d72c..cae60186c8 100644
--- a/sysdeps/alpha/Makefile
+++ b/sysdeps/alpha/Makefile
@@ -32,6 +32,10 @@ ifeq ($(subdir),string)
 sysdep_routines += stxcpy stxncpy
 endif
 
+ifeq ($(subdir),stdlib)
+sysdep_routines += udiv_qrnnd
+endif
+
 ifeq ($(subdir),elf)
 # The ld.so startup code cannot use literals until it self-relocates.
 CFLAGS-rtld.c = -mbuild-constants
diff --git a/sysdeps/alpha/gmp-arch.h b/sysdeps/alpha/gmp-arch.h
new file mode 100644
index 0000000000..5897af7ce7
--- /dev/null
+++ b/sysdeps/alpha/gmp-arch.h
@@ -0,0 +1,39 @@
+/* Multiprecision generic functions.  Alpha version.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef __GMP_ALPHA_ARCH_H
+#define __GMP_ALPHA_ARCH_H
+
+extern mp_limb_t __udiv_qrnnd (mp_limb_t *, mp_limb_t, mp_limb_t, mp_limb_t)
+     attribute_hidden;
+
+static __always_inline void
+udiv_qrnnd_alpha (mp_limb_t *q, mp_limb_t *r, mp_limb_t n1, mp_limb_t n0,
+		  mp_limb_t d)
+{
+  *q = __udiv_qrnnd (r, n1, n0, d);
+}
+#undef UDIV_NEEDS_NORMALIZATION
+#define UDIV_NEEDS_NORMALIZATION 0
+# undef udiv_qrnnd
+# define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
+  udiv_qrnnd_alpha (&__q, &__r, __n1, __n0, __d)
+
+#include <sysdeps/generic/gmp-arch.h>
+
+#endif
diff --git a/sysdeps/generic/gmp-arch.h b/sysdeps/generic/gmp-arch.h
new file mode 100644
index 0000000000..b093f59e20
--- /dev/null
+++ b/sysdeps/generic/gmp-arch.h
@@ -0,0 +1,100 @@
+/* Multiprecision generic functions.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef __GMP_ARCH_H
+#define __GMP_ARCH_H
+
+#include <stdint.h>
+#include <gmp.h>
+
+#define LL_B ((mp_limb_t) 1 << (BITS_PER_MP_LIMB / 2))
+
+static __always_inline mp_limb_t
+ll_lowpart (mp_limb_t t)
+{
+  return t & (LL_B - 1);
+}
+
+static __always_inline mp_limb_t
+ll_highpart (mp_limb_t t)
+{
+  return t >> (BITS_PER_MP_LIMB / 2);
+}
+
+/* udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+   denominator) divides a UDWtype, composed by the UWtype integers
+   HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
+   in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
+   than DENOMINATOR for correct operation.  If, in addition, the most
+   significant bit of DENOMINATOR must be 1, then the pre-processor symbol
+   UDIV_NEEDS_NORMALIZATION is defined to 1.  */
+#ifndef udiv_qrnnd
+static __always_inline void
+udiv_qrnnd_generic (mp_limb_t *q, mp_limb_t *r, mp_limb_t n1, mp_limb_t n0,
+		    mp_limb_t d)
+{
+  mp_limb_t d1 = ll_highpart (d),
+            d0 = ll_lowpart (d),
+            q1, q0;
+  mp_limb_t r1, r0, m;
+
+  r1 = n1 % d1;
+  q1 = n1 / d1;
+  m = q1 * d0;
+  r1 = r1 * LL_B | ll_highpart (n0);
+  if (r1 < m)
+    {
+      q1--;
+      r1 += d;
+      if (r1 >= d)
+        if (r1 < m)
+          {
+            q1--;
+            r1 += d;
+          }
+    }
+  r1 -= m;
+
+  r0 = r1 % d1;
+  q0 = r1 / d1;
+  m = q0 * d0;
+  r0 = r0 * LL_B | ll_lowpart (n0);
+  if (r0 < m)
+    {
+      q0--;
+      r0 += d;
+      if (r0 >= d)
+        if (r0 < m)
+          {
+            q0--;
+            r0 += d;
+          }
+    }
+  r0 -= m;
+
+  *q = q1 * LL_B | q0;
+  *r = r0;
+}
+# undef UDIV_NEEDS_NORMALIZATION
+# define UDIV_NEEDS_NORMALIZATION 1
+# undef udiv_qrnnd
+# define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
+  udiv_qrnnd_generic (&__q, &__r, __n1, __n0, __d)
+#endif
+
+#endif /* __GMP_ARCH_H */
diff --git a/sysdeps/hppa/hppa1.1/udiv_qrnnd.S b/sysdeps/hppa/hppa1.1/udiv_qrnnd.S
deleted file mode 100644
index 2f8101bbd2..0000000000
--- a/sysdeps/hppa/hppa1.1/udiv_qrnnd.S
+++ /dev/null
@@ -1,77 +0,0 @@
-;! HP-PA  __udiv_qrnnd division support, used from longlong.h.
-;! This version runs fast on PA 7000 and later.
-
-;! Copyright (C) 1993-2025 Free Software Foundation, Inc.
-
-;! This file is part of the GNU MP Library.
-
-;! The GNU MP Library is free software; you can redistribute it and/or modify
-;! it under the terms of the GNU Lesser General Public License as published by
-;! the Free Software Foundation; either version 2.1 of the License, or (at your
-;! option) any later version.
-
-;! The GNU MP Library is distributed in the hope that it will be useful, but
-;! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-;! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-;! License for more details.
-
-;! You should have received a copy of the GNU Lesser General Public License
-;! along with the GNU MP Library.  If not, see
-;! <https://www.gnu.org/licenses/>.
-
-
-;! INPUT PARAMETERS
-;! rem_ptr	gr26
-;! n1		gr25
-;! n0		gr24
-;! d		gr23
-
-	.text
-L$0000:
-	.word		0x43f00000
-	.word		0x0
-	.export		__udiv_qrnnd
-__udiv_qrnnd:
-	.proc
-	.callinfo	frame=64,no_calls
-	.entry
-	ldo		64(%r30),%r30
-
-	stws		%r25,-16(%r30)	;! n_hi
-	stws		%r24,-12(%r30)	;! n_lo
-	b,l		L$0,%r1
-	ldo		L$0000-L$0(%r1),%r1
-L$0:
-	fldds		-16(%r30),%fr5
-	stws		%r23,-12(%r30)
-	comib,<=	0,%r25,L$1
-	fcnvxf,dbl,dbl	%fr5,%fr5
-	fldds		0(%r1),%fr4
-	fadd,dbl	%fr4,%fr5,%fr5
-L$1:
-	fcpy,sgl	%fr0,%fr6L
-	fldws		-12(%r30),%fr6R
-	fcnvxf,dbl,dbl	%fr6,%fr4
-
-	fdiv,dbl	%fr5,%fr4,%fr5
-
-	fcnvfx,dbl,dbl	%fr5,%fr4
-	fstws		%fr4R,-16(%r30)
-	xmpyu		%fr4R,%fr6R,%fr6
-	ldws		-16(%r30),%r28
-	fstds		%fr6,-16(%r30)
-	ldws		-12(%r30),%r21
-	ldws		-16(%r30),%r20
-	sub		%r24,%r21,%r22
-	subb		%r25,%r20,%r1
-	comib,=		0,%r1,L$2
-	ldo		-64(%r30),%r30
-
-	add		%r22,%r23,%r22
-	ldo		-1(%r28),%r28
-L$2:
-	bv		0(%r2)
-	stws		%r22,0(%r26)
-
-	.exit
-	.procend
diff --git a/sysdeps/hppa/udiv_qrnnd.S b/sysdeps/hppa/udiv_qrnnd.S
deleted file mode 100644
index 317fcda722..0000000000
--- a/sysdeps/hppa/udiv_qrnnd.S
+++ /dev/null
@@ -1,285 +0,0 @@
-;! HP-PA  __udiv_qrnnd division support, used from longlong.h.
-;! This version runs fast on pre-PA7000 CPUs.
-
-;! Copyright (C) 1993-2025 Free Software Foundation, Inc.
-
-;! This file is part of the GNU MP Library.
-
-;! The GNU MP Library is free software; you can redistribute it and/or modify
-;! it under the terms of the GNU Lesser General Public License as published by
-;! the Free Software Foundation; either version 2.1 of the License, or (at your
-;! option) any later version.
-
-;! The GNU MP Library is distributed in the hope that it will be useful, but
-;! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-;! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-;! License for more details.
-
-;! You should have received a copy of the GNU Lesser General Public License
-;! along with the GNU MP Library.  If not, see
-;! <https://www.gnu.org/licenses/>.
-
-
-;! INPUT PARAMETERS
-;! rem_ptr	gr26
-;! n1		gr25
-;! n0		gr24
-;! d		gr23
-
-;! The code size is a bit excessive.  We could merge the last two ds;addc
-;! sequences by simply moving the "bb,< Odd" instruction down.  The only
-;! trouble is the FFFFFFFF code that would need some hacking.
-
-	.text
-	.export		__udiv_qrnnd
-__udiv_qrnnd:
-	.proc
-	.callinfo	frame=0,no_calls
-	.entry
-
-	comb,<		%r23,%r0,L$largedivisor
-	 sub		%r0,%r23,%r1		;! clear cy as side-effect
-	ds		%r0,%r1,%r0
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r28
-	ds		%r25,%r23,%r25
-	comclr,>=	%r25,%r0,%r0
-	addl		%r25,%r23,%r25
-	stws		%r25,0(%r26)
-	bv		0(%r2)
-	 addc		%r28,%r28,%r28
-
-L$largedivisor:
-	extru		%r24,31,1,%r20		;! r20 = n0 & 1
-	bb,<		%r23,31,L$odd
-	 extru		%r23,30,31,%r22		;! r22 = d >> 1
-	shd		%r25,%r24,1,%r24	;! r24 = new n0
-	extru		%r25,30,31,%r25		;! r25 = new n1
-	sub		%r0,%r22,%r21
-	ds		%r0,%r21,%r0
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	comclr,>=	%r25,%r0,%r0
-	addl		%r25,%r22,%r25
-	sh1addl		%r25,%r20,%r25
-	stws		%r25,0(%r26)
-	bv		0(%r2)
-	 addc		%r24,%r24,%r28
-
-L$odd:	addib,sv,n	1,%r22,L$FF..		;! r22 = (d / 2 + 1)
-	shd		%r25,%r24,1,%r24	;! r24 = new n0
-	extru		%r25,30,31,%r25		;! r25 = new n1
-	sub		%r0,%r22,%r21
-	ds		%r0,%r21,%r0
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r28
-	comclr,>=	%r25,%r0,%r0
-	addl		%r25,%r22,%r25
-	sh1addl		%r25,%r20,%r25
-;! We have computed (n1,,n0) / (d + 1), q' = r28, r' = r25
-	add,nuv		%r28,%r25,%r25
-	addl		%r25,%r1,%r25
-	addc		%r0,%r28,%r28
-	sub,<<		%r25,%r23,%r0
-	addl		%r25,%r1,%r25
-	stws		%r25,0(%r26)
-	bv		0(%r2)
-	 addc		%r0,%r28,%r28
-
-;! This is just a special case of the code above.
-;! We come here when d == 0xFFFFFFFF
-L$FF..:	add,uv		%r25,%r24,%r24
-	sub,<<		%r24,%r23,%r0
-	ldo		1(%r24),%r24
-	stws		%r24,0(%r26)
-	bv		0(%r2)
-	 addc		%r0,%r25,%r28
-
-	.exit
-	.procend
diff --git a/sysdeps/ieee754/dbl-64/dbl2mpn.c b/sysdeps/ieee754/dbl-64/dbl2mpn.c
index d69973419b..cacf5da7d8 100644
--- a/sysdeps/ieee754/dbl-64/dbl2mpn.c
+++ b/sysdeps/ieee754/dbl-64/dbl2mpn.c
@@ -18,6 +18,7 @@
 #include "gmp.h"
 #include "gmp-impl.h"
 #include "longlong.h"
+#include <gmp-arch.h>
 #include <ieee754.h>
 #include <float.h>
 #include <stdlib.h>
diff --git a/sysdeps/ieee754/ldbl-128/ldbl2mpn.c b/sysdeps/ieee754/ldbl-128/ldbl2mpn.c
index d3f3476d56..fd75db1271 100644
--- a/sysdeps/ieee754/ldbl-128/ldbl2mpn.c
+++ b/sysdeps/ieee754/ldbl-128/ldbl2mpn.c
@@ -18,6 +18,7 @@
 #include "gmp.h"
 #include "gmp-impl.h"
 #include "longlong.h"
+#include <gmp-arch.h>
 #include <ieee754.h>
 #include <float.h>
 #include <math.h>
diff --git a/sysdeps/ieee754/ldbl-96/ldbl2mpn.c b/sysdeps/ieee754/ldbl-96/ldbl2mpn.c
index 982f427380..6c7aadbca2 100644
--- a/sysdeps/ieee754/ldbl-96/ldbl2mpn.c
+++ b/sysdeps/ieee754/ldbl-96/ldbl2mpn.c
@@ -18,6 +18,7 @@
 #include "gmp.h"
 #include "gmp-impl.h"
 #include "longlong.h"
+#include <gmp-arch.h>
 #include <ieee754.h>
 #include <float.h>
 #include <math.h>
diff --git a/sysdeps/wordsize-32/divdi3.c b/sysdeps/wordsize-32/divdi3.c
index 387022ab14..360094134f 100644
--- a/sysdeps/wordsize-32/divdi3.c
+++ b/sysdeps/wordsize-32/divdi3.c
@@ -25,20 +25,16 @@
 #error This is for 32-bit targets only
 #endif
 
-typedef unsigned int UQItype	__attribute__ ((mode (QI)));
-typedef          int SItype	__attribute__ ((mode (SI)));
-typedef unsigned int USItype	__attribute__ ((mode (SI)));
-typedef          int DItype	__attribute__ ((mode (DI)));
-typedef unsigned int UDItype	__attribute__ ((mode (DI)));
 #define Wtype SItype
 #define HWtype SItype
 #define DWtype DItype
-#define UWtype USItype
 #define UHWtype USItype
 #define UDWtype UDItype
-#define W_TYPE_SIZE 32
 
+#include <gmp.h>
+#include <stdlib/gmp-impl.h>
 #include <stdlib/longlong.h>
+#include <gmp-arch.h>
 
 #if __BYTE_ORDER == __BIG_ENDIAN
 struct DWstruct { Wtype high, low;};
diff --git a/sysdeps/x86/gmp-arch.h b/sysdeps/x86/gmp-arch.h
new file mode 100644
index 0000000000..5d65b16216
--- /dev/null
+++ b/sysdeps/x86/gmp-arch.h
@@ -0,0 +1,52 @@
+/* Multiprecision generic functions.  x86 version.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef __GMP_X64_ARCH_H
+#define __GMP_X64_ARCH_H
+
+#include <gmp.h>
+
+static __always_inline void
+udiv_qrnnd_x86 (mp_limb_t *q, mp_limb_t *r, mp_limb_t n1, mp_limb_t n0,
+		mp_limb_t d)
+{
+#ifdef __x86_64__
+  asm ("div{q} %4"
+       : "=a" (*q),
+         "=d" (*r)
+       : "0" (n0),
+	 "1" (n1),
+	 "rm" (d));
+#else
+  asm ("div{l} %4"
+       : "=a" (*q),
+         "=d" (*r)
+       : "0" (n0),
+	 "1" (n1),
+	 "rm" (d));
+#endif
+}
+#undef UDIV_NEEDS_NORMALIZATION
+#define UDIV_NEEDS_NORMALIZATION 0
+#undef udiv_qrnnd
+#define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
+  udiv_qrnnd_x86 (&__q, &__r, __n1, __n0, __d)
+
+#include <sysdeps/generic/gmp-arch.h>
+
+#endif
diff --git a/sysdeps/x86/ldbl2mpn.c b/sysdeps/x86/ldbl2mpn.c
index c41fb8da14..593c472e8a 100644
--- a/sysdeps/x86/ldbl2mpn.c
+++ b/sysdeps/x86/ldbl2mpn.c
@@ -18,6 +18,7 @@
 #include "gmp.h"
 #include "gmp-impl.h"
 #include "longlong.h"
+#include <gmp-arch.h>
 #include <ieee754.h>
 #include <float.h>
 #include <stdlib.h>