initial import

2025-12-24 17:51:17 +03:00 · 1995-02-18 01:27:10 +00:00
commit 28f540f45b
2263 changed files with 218361 additions and 0 deletions
--- a/sysdeps/sparc/DEFS.h
+++ b/sysdeps/sparc/DEFS.h
@@ -0,0 +1,4 @@
+#define	FUNC(name)	\
+	.global name;	\
+	.align 4;	\
+	name:
--- a/sysdeps/sparc/Dist
+++ b/sysdeps/sparc/Dist
@@ -0,0 +1,4 @@
+DEFS.h
+mul.S umul.S
+divrem.m4 sdiv.S udiv.S rem.S urem.S
+alloca.S
--- a/sysdeps/sparc/Implies
+++ b/sysdeps/sparc/Implies
@@ -0,0 +1,2 @@
+# SPARC uses IEEE 754 floating point.
+ieee754
--- a/sysdeps/sparc/Makefile
+++ b/sysdeps/sparc/Makefile
@@ -0,0 +1,57 @@
+# Copyright (C) 1991, 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Library General Public License
+# as published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Library General Public License for more details.
+
+# You should have received a copy of the GNU Library General Public
+# License along with the GNU C Library; see the file COPYING.LIB.  If
+# not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+# Cambridge, MA 02139, USA.
+
+ifeq ($(subdir),gnulib)
+routines = mul umul $(divrem) alloca
+endif	# gnulib
+
+# We distribute these files, even though they are generated,
+# so as to avoid the need for a functioning m4 to build the library.
+divrem := sdiv udiv rem urem
+
+divrem-NAME-sdiv := div
+divrem-NAME-udiv := udiv
+divrem-NAME-rem := rem
+divrem-NAME-urem := urem
+divrem-NAME = $(+divrem-NAME-$(basename $(notdir $@)))
+divrem-OP-div := div
+divrem-OP-udiv := div
+divrem-OP-rem := rem
+divrem-OP-urem := rem
+divrem-S-div := true
+divrem-S-rem := true
+divrem-S-udiv := false
+divrem-S-urem := false
+$(divrem:%=$(sysdep_dir)/sparc/%.S): $(sysdep_dir)/sparc/divrem.m4
+	(echo "define(NAME,\`.$(+divrem-NAME)')\
+	       define(OP,\`$(+divrem-OP-$(+divrem-NAME))')\
+	       define(S,\`$(+divrem-S-$(+divrem-NAME))')\
+	       /* This file is generated from divrem.m4; DO NOT EDIT! */"; \
+	 cat $<) | $(M4) > $@-tmp
+# Make it unwritable so noone will edit it by mistake.
+	-chmod a-w $@-tmp
+	mv -f $@-tmp $@
+	test -d CVS && cvs commit -m'Regenerated from $<' $@
+
+sysdep-realclean := $(sysdep-realclean) $(divrem:%=sysdeps/sparc/%.S)
+
+ifeq ($(subdir),crypt)
+
+crypt := crypt.sparc # Use crypt/crypt.sparc.S.
+
+endif # crypt
--- a/sysdeps/sparc/__longjmp.S
+++ b/sysdeps/sparc/__longjmp.S
@@ -0,0 +1,47 @@
+/* Copyright (C) 1991, 1993 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB.  If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA.  */
+
+#include <sysdep.h>
+#ifdef __svr4__
+#include <sys/trap.h>
+#else
+#include <machine/trap.h>
+#endif
+
+/* NOTE: This code depends on the definition of `__jmp_buf' in <jmp_buf.h>.  */
+
+ENTRY (__longjmp)
+	/* Do a "flush register windows trap".  The trap handler in the
+	   kernel writes all the register windows to their stack slots, and
+	   marks them all as invalid (needing to be sucked up from the
+	   stack when used).  This ensures that all information needed to
+	   unwind to these callers is in memory, not in the register
+	   windows.  */
+	ta ST_FLUSH_WINDOWS
+	ld [%o0], %o7		/* Return PC.  */
+	ld [%o0 + 4], %fp	/* Saved SP.  */
+	sub %fp, 64, %sp	/* Allocate a register save area.  */
+
+	/* if (%o1 == 0) %o1 = 1; */
+	tst %o1
+	be,a Ldone
+	mov 1, %o1
+
+Ldone:	retl
+	/* On the way out, put the return value in %o0.  */
+	restore %o1, 0, %o0
--- a/sysdeps/sparc/add_n.S
+++ b/sysdeps/sparc/add_n.S
@@ -0,0 +1,134 @@
+! sparc __mpn_add_n -- Add two limb vectors of the same length > 0 and store
+! sum in a third limb vector.
+
+! Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr	%o0
+! s1_ptr	%o1
+! s2_ptr	%o2
+! size		%o3
+
+#include "sysdep.h"
+
+	.text
+	.align	4
+	.global	C_SYMBOL_NAME(__mpn_add_n)
+C_SYMBOL_NAME(__mpn_add_n):
+	ld	[%o1+0],%o4		! read first limb from s1_ptr
+	srl	%o3,4,%g1
+	ld	[%o2+0],%o5		! read first limb from s2_ptr
+
+	sub	%g0,%o3,%o3
+	andcc	%o3,(16-1),%o3
+	be	Lzero
+	 nop
+
+	sll	%o3,2,%o3		! multiply by 4
+	sub	%o0,%o3,%o0		! adjust res_ptr
+	sub	%o1,%o3,%o1		! adjust s1_ptr
+	sub	%o2,%o3,%o2		! adjust s2_ptr
+
+	mov	%o4,%g2
+
+	sethi	%hi(Lbase),%g3
+	or	%g3,%lo(Lbase),%g3
+	sll	%o3,2,%o3		! multiply by 4
+	jmp	%g3+%o3
+	 mov	%o5,%g3
+
+Loop:	addxcc	%g2,%g3,%o3
+	add	%o1,64,%o1
+	st	%o3,[%o0+60]
+	add	%o2,64,%o2
+	ld	[%o1+0],%o4
+	add	%o0,64,%o0
+	ld	[%o2+0],%o5
+Lzero:	sub	%g1,1,%g1	! add 0 + 16r limbs (adjust loop counter)
+Lbase:	ld	[%o1+4],%g2
+	addxcc	%o4,%o5,%o3
+	ld	[%o2+4],%g3
+	st	%o3,[%o0+0]
+	ld	[%o1+8],%o4	! add 15 + 16r limbs
+	addxcc	%g2,%g3,%o3
+	ld	[%o2+8],%o5
+	st	%o3,[%o0+4]
+	ld	[%o1+12],%g2	! add 14 + 16r limbs
+	addxcc	%o4,%o5,%o3
+	ld	[%o2+12],%g3
+	st	%o3,[%o0+8]
+	ld	[%o1+16],%o4	! add 13 + 16r limbs
+	addxcc	%g2,%g3,%o3
+	ld	[%o2+16],%o5
+	st	%o3,[%o0+12]
+	ld	[%o1+20],%g2	! add 12 + 16r limbs
+	addxcc	%o4,%o5,%o3
+	ld	[%o2+20],%g3
+	st	%o3,[%o0+16]
+	ld	[%o1+24],%o4	! add 11 + 16r limbs
+	addxcc	%g2,%g3,%o3
+	ld	[%o2+24],%o5
+	st	%o3,[%o0+20]
+	ld	[%o1+28],%g2	! add 10 + 16r limbs
+	addxcc	%o4,%o5,%o3
+	ld	[%o2+28],%g3
+	st	%o3,[%o0+24]
+	ld	[%o1+32],%o4	! add 9 + 16r limbs
+	addxcc	%g2,%g3,%o3
+	ld	[%o2+32],%o5
+	st	%o3,[%o0+28]
+	ld	[%o1+36],%g2	! add 8 + 16r limbs
+	addxcc	%o4,%o5,%o3
+	ld	[%o2+36],%g3
+	st	%o3,[%o0+32]
+	ld	[%o1+40],%o4	! add 7 + 16r limbs
+	addxcc	%g2,%g3,%o3
+	ld	[%o2+40],%o5
+	st	%o3,[%o0+36]
+	ld	[%o1+44],%g2	! add 6 + 16r limbs
+	addxcc	%o4,%o5,%o3
+	ld	[%o2+44],%g3
+	st	%o3,[%o0+40]
+	ld	[%o1+48],%o4	! add 5 + 16r limbs
+	addxcc	%g2,%g3,%o3
+	ld	[%o2+48],%o5
+	st	%o3,[%o0+44]
+	ld	[%o1+52],%g2	! add 4 + 16r limbs
+	addxcc	%o4,%o5,%o3
+	ld	[%o2+52],%g3
+	st	%o3,[%o0+48]
+	ld	[%o1+56],%o4	! add 3 + 16r limbs
+	addxcc	%g2,%g3,%o3
+	ld	[%o2+56],%o5
+	st	%o3,[%o0+52]
+	ld	[%o1+60],%g2	! add 2 + 16r limbs
+	addxcc	%o4,%o5,%o3
+	ld	[%o2+60],%g3
+	st	%o3,[%o0+56]
+	addx	%g0,%g0,%o4
+	tst	%g1
+	bne	Loop
+	 subcc	%g0,%o4,%g0	! restore cy (delay slot)
+
+	addxcc	%g2,%g3,%o3
+	st	%o3,[%o0+60]	! store most significant limb
+
+	retl
+	 addx	%g0,%g0,%o0	! return carry-out from most sign. limb
--- a/sysdeps/sparc/addmul_1.S
+++ b/sysdeps/sparc/addmul_1.S
@@ -0,0 +1,146 @@
+! SPARC __mpn_addmul_1 -- Multiply a limb vector with a limb and add
+! the result to a second limb vector.
+
+! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr	o0
+! s1_ptr	o1
+! size		o2
+! s2_limb	o3
+
+#include "sysdep.h"
+
+.text
+	.align 4
+	.global	C_SYMBOL_NAME(__mpn_addmul_1)
+C_SYMBOL_NAME(__mpn_addmul_1):
+	! Make S1_PTR and RES_PTR point at the end of their blocks
+	! and put (- 4 x SIZE) in index/loop counter.
+	sll	%o2,2,%o2
+	add	%o0,%o2,%o4	! RES_PTR in o4 since o0 is retval
+	add	%o1,%o2,%o1
+	sub	%g0,%o2,%o2
+
+	cmp	%o3,0xfff
+	bgu	Large
+	nop
+
+	ld	[%o1+%o2],%o5
+	mov	0,%o0
+	b	L0
+	 add	%o4,-4,%o4
+Loop0:
+	addcc	%o5,%g1,%g1
+	ld	[%o1+%o2],%o5
+	addx	%o0,%g0,%o0
+	st	%g1,[%o4+%o2]
+L0:	wr	%g0,%o3,%y
+	sra	%o5,31,%g2
+	and	%o3,%g2,%g2
+	andcc	%g1,0,%g1
+	mulscc	%g1,%o5,%g1
+ 	mulscc	%g1,%o5,%g1
+ 	mulscc	%g1,%o5,%g1
+ 	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,0,%g1
+	sra	%g1,20,%g4
+	sll	%g1,12,%g1
+ 	rd	%y,%g3
+	srl	%g3,20,%g3
+	or	%g1,%g3,%g1
+
+	addcc	%g1,%o0,%g1
+	addx	%g2,%g4,%o0	! add sign-compensation and cy to hi limb
+	addcc	%o2,4,%o2	! loop counter
+	bne	Loop0
+	 ld	[%o4+%o2],%o5
+
+	addcc	%o5,%g1,%g1
+	addx	%o0,%g0,%o0
+	retl
+	st	%g1,[%o4+%o2]
+
+
+Large:	ld	[%o1+%o2],%o5
+	mov	0,%o0
+	sra	%o3,31,%g4	! g4 = mask of ones iff S2_LIMB < 0
+	b	L1
+	 add	%o4,-4,%o4
+Loop:
+	addcc	%o5,%g3,%g3
+	ld	[%o1+%o2],%o5
+	addx	%o0,%g0,%o0
+	st	%g3,[%o4+%o2]
+L1:	wr	%g0,%o5,%y
+	and	%o5,%g4,%g2
+	andcc	%g0,%g0,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%g0,%g1
+	rd	%y,%g3
+	addcc	%g3,%o0,%g3
+	addx	%g2,%g1,%o0
+	addcc	%o2,4,%o2
+	bne	Loop
+	 ld	[%o4+%o2],%o5
+
+	addcc	%o5,%g3,%g3
+	addx	%o0,%g0,%o0
+	retl
+	st	%g3,[%o4+%o2]
--- a/sysdeps/sparc/alloca.S
+++ b/sysdeps/sparc/alloca.S
@@ -0,0 +1,32 @@
+/* Copyright (C) 1994 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB.  If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA.  */
+
+#include "DEFS.h"
+
+/* Code produced by Sun's C compiler calls this function with two extra
+   arguments which it makes relocatable symbols but seem always to be
+   the constant 96; I have no idea what they are for.  */
+
+#ifndef NO_UNDERSCORES
+#define __builtin_alloca ___builtin_alloca
+#endif
+
+FUNC (__builtin_alloca)
+	sub %sp, %o0, %sp	/* Push some stack space.  */
+	retl			/* Return; the returned buffer leaves 96 */
+	add %sp, 96, %o0	/* bytes of register save area at the top. */
--- a/sysdeps/sparc/bsd-_setjmp.S
+++ b/sysdeps/sparc/bsd-_setjmp.S
@@ -0,0 +1,26 @@
+/* BSD `_setjmp' entry point to `sigsetjmp (..., 0)'.  Sparc version.
+Copyright (C) 1994 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB.  If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA.  */
+
+#include <sysdep.h>
+
+ENTRY (setjmp)
+	sethi %hi(C_SYMBOL_NAME (__sigsetjmp)), %g1
+	or %lo(C_SYMBOL_NAME (__sigsetjmp)), %g1, %g1
+	jmp %g1
+	mov %g0, %o1		/* Pass second argument of zero.  */
--- a/sysdeps/sparc/bsd-setjmp.S
+++ b/sysdeps/sparc/bsd-setjmp.S
@@ -0,0 +1,26 @@
+/* BSD `setjmp' entry point to `sigsetjmp (..., 1)'.  Sparc version.
+Copyright (C) 1994 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB.  If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA.  */
+
+#include <sysdep.h>
+
+ENTRY (setjmp)
+	sethi %hi(C_SYMBOL_NAME (__sigsetjmp)), %g1
+	or %lo(C_SYMBOL_NAME (__sigsetjmp)), %g1, %g1
+	jmp %g1
+	mov 1, %o1		/* Pass second argument of one.  */
--- a/sysdeps/sparc/bytesex.h
+++ b/sysdeps/sparc/bytesex.h
@@ -0,0 +1,3 @@
+/* SPARC is big-endian.  */
+
+#define __BYTE_ORDER __BIG_ENDIAN
--- a/sysdeps/sparc/divrem.m4
+++ b/sysdeps/sparc/divrem.m4
@@ -0,0 +1,234 @@
+/*
+ * Division and remainder, from Appendix E of the Sparc Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ *  NAME	name of function to generate
+ *  OP		OP=div => %o0 / %o1; OP=rem => %o0 % %o1
+ *  S		S=true => signed; S=false => unsigned
+ *
+ * Algorithm parameters:
+ *  N		how many bits per iteration we try to get (4)
+ *  WORDSIZE	total number of bits (32)
+ *
+ * Derived constants:
+ *  TOPBITS	number of bits in the top `decade' of a number
+ *
+ * Important variables:
+ *  Q		the partial quotient under development (initially 0)
+ *  R		the remainder so far, initially the dividend
+ *  ITER	number of main division loop iterations required;
+ *		equal to ceil(log2(quotient) / N).  Note that this
+ *		is the log base (2^N) of the quotient.
+ *  V		the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ *  Current estimate for non-large dividend is
+ *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ *  different path, as the upper bits of the quotient must be developed
+ *  one bit at a time.
+ */
+
+define(N, `4')dnl
+define(WORDSIZE, `32')dnl
+define(TOPBITS, eval(WORDSIZE - N*((WORDSIZE-1)/N)))dnl
+dnl
+define(dividend, `%o0')dnl
+define(divisor, `%o1')dnl
+define(Q, `%o2')dnl
+define(R, `%o3')dnl
+define(ITER, `%o4')dnl
+define(V, `%o5')dnl
+dnl
+dnl m4 reminder: ifelse(a,b,c,d) => if a is b, then c, else d
+define(T, `%g1')dnl
+define(SC, `%g7')dnl
+ifelse(S, `true', `define(SIGN, `%g6')')dnl
+
+dnl
+dnl This is the recursive definition for developing quotient digits.
+dnl
+dnl Parameters:
+dnl  $1	the current depth, 1 <= $1 <= N
+dnl  $2	the current accumulation of quotient bits
+dnl  N	max depth
+dnl
+dnl We add a new bit to $2 and either recurse or insert the bits in
+dnl the quotient.  R, Q, and V are inputs and outputs as defined above;
+dnl the condition codes are expected to reflect the input R, and are
+dnl modified to reflect the output R.
+dnl
+define(DEVELOP_QUOTIENT_BITS,
+`	! depth $1, accumulated bits $2
+	bl	L.$1.eval(2**N+$2)
+	srl	V,1,V
+	! remainder is positive
+	subcc	R,V,R
+	ifelse($1, N,
+	`	b	9f
+		add	Q, ($2*2+1), Q
+	', `	DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2+1)')')
+L.$1.eval(2**N+$2):
+	! remainder is negative
+	addcc	R,V,R
+	ifelse($1, N,
+	`	b	9f
+		add	Q, ($2*2-1), Q
+	', `	DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2-1)')')
+	ifelse($1, 1, `9:')')dnl
+
+#include "DEFS.h"
+#ifdef __svr4__
+#include <sys/trap.h>
+#else
+#include <machine/trap.h>
+#endif
+
+FUNC(NAME)
+ifelse(S, `true',
+`	! compute sign of result; if neither is negative, no problem
+	orcc	divisor, dividend, %g0	! either negative?
+	bge	2f			! no, go do the divide
+ifelse(OP, `div',
+`	xor	divisor, dividend, SIGN	! compute sign in any case',
+`	mov	dividend, SIGN		! sign of remainder matches dividend')
+	tst	divisor
+	bge	1f
+	tst	dividend
+	! divisor is definitely negative; dividend might also be negative
+	bge	2f			! if dividend not negative...
+	sub	%g0, divisor, divisor	! in any case, make divisor nonneg
+1:	! dividend is negative, divisor is nonnegative
+	sub	%g0, dividend, dividend	! make dividend nonnegative
+2:
+')
+	! Ready to divide.  Compute size of quotient; scale comparand.
+	orcc	divisor, %g0, V
+	bne	1f
+	mov	dividend, R
+
+		! Divide by zero trap.  If it returns, return 0 (about as
+		! wrong as possible, but that is what SunOS does...).
+		ta	ST_DIV0
+		retl
+		clr	%o0
+
+1:
+	cmp	R, V			! if divisor exceeds dividend, done
+	blu	Lgot_result		! (and algorithm fails otherwise)
+	clr	Q
+	sethi	%hi(1 << (WORDSIZE - TOPBITS - 1)), T
+	cmp	R, T
+	blu	Lnot_really_big
+	clr	ITER
+
+	! `Here the dividend is >= 2**(31-N) or so.  We must be careful here,
+	! as our usual N-at-a-shot divide step will cause overflow and havoc.
+	! The number of bits in the result here is N*ITER+SC, where SC <= N.
+	! Compute ITER in an unorthodox manner: know we need to shift V into
+	! the top decade: so do not even bother to compare to R.'
+	1:
+		cmp	V, T
+		bgeu	3f
+		mov	1, SC
+		sll	V, N, V
+		b	1b
+		add	ITER, 1, ITER
+
+	! Now compute SC.
+	2:	addcc	V, V, V
+		bcc	Lnot_too_big
+		add	SC, 1, SC
+
+		! We get here if the divisor overflowed while shifting.
+		! This means that R has the high-order bit set.
+		! Restore V and subtract from R.
+		sll	T, TOPBITS, T	! high order bit
+		srl	V, 1, V		! rest of V
+		add	V, T, V
+		b	Ldo_single_div
+		sub	SC, 1, SC
+
+	Lnot_too_big:
+	3:	cmp	V, R
+		blu	2b
+		nop
+		be	Ldo_single_div
+		nop
+	/* NB: these are commented out in the V8-Sparc manual as well */
+	/* (I do not understand this) */
+	! V > R: went too far: back up 1 step
+	!	srl	V, 1, V
+	!	dec	SC
+	! do single-bit divide steps
+	!
+	! We have to be careful here.  We know that R >= V, so we can do the
+	! first divide step without thinking.  BUT, the others are conditional,
+	! and are only done if R >= 0.  Because both R and V may have the high-
+	! order bit set in the first step, just falling into the regular
+	! division loop will mess up the first time around.
+	! So we unroll slightly...
+	Ldo_single_div:
+		subcc	SC, 1, SC
+		bl	Lend_regular_divide
+		nop
+		sub	R, V, R
+		mov	1, Q
+		b	Lend_single_divloop
+		nop
+	Lsingle_divloop:
+		sll	Q, 1, Q
+		bl	1f
+		srl	V, 1, V
+		! R >= 0
+		sub	R, V, R
+		b	2f
+		add	Q, 1, Q
+	1:	! R < 0
+		add	R, V, R
+		sub	Q, 1, Q
+	2:
+	Lend_single_divloop:
+		subcc	SC, 1, SC
+		bge	Lsingle_divloop
+		tst	R
+		b,a	Lend_regular_divide
+
+Lnot_really_big:
+1:
+	sll	V, N, V
+	cmp	V, R
+	bleu	1b
+	addcc	ITER, 1, ITER
+	be	Lgot_result
+	sub	ITER, 1, ITER
+
+	tst	R	! set up for initial iteration
+Ldivloop:
+	sll	Q, N, Q
+	DEVELOP_QUOTIENT_BITS(1, 0)
+Lend_regular_divide:
+	subcc	ITER, 1, ITER
+	bge	Ldivloop
+	tst	R
+	bl,a	Lgot_result
+	! non-restoring fixup here (one instruction only!)
+ifelse(OP, `div',
+`	sub	Q, 1, Q
+', `	add	R, divisor, R
+')
+
+Lgot_result:
+ifelse(S, `true',
+`	! check to see if answer should be < 0
+	tst	SIGN
+	bl,a	1f
+	ifelse(OP, `div', `sub %g0, Q, Q', `sub %g0, R, R')
+1:')
+	retl
+	ifelse(OP, `div', `mov Q, %o0', `mov R, %o0')
--- a/sysdeps/sparc/jmp_buf.h
+++ b/sysdeps/sparc/jmp_buf.h
@@ -0,0 +1,14 @@
+/* Define the machine-dependent type `jmp_buf'.  SPARC version.  */
+
+/* NOTE: The assembly code in __longjmp.S and setjmp.S knows the layout
+   of this structure.  You must hack the assembly code if you want to change
+   the order of the members.  */
+
+typedef struct
+  {
+    /* Return PC (register o7).  */
+    __ptr_t __pc;
+
+    /* Saved FP.  */
+    __ptr_t __fp;
+  } __jmp_buf[1];
--- a/sysdeps/sparc/memcopy.h
+++ b/sysdeps/sparc/memcopy.h
@@ -0,0 +1,21 @@
+/* Copyright (C) 1991 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB.  If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA.  */
+
+#include <sysdeps/generic/memcopy.h>
+#undef	reg_char
+#define	reg_char	int
--- a/sysdeps/sparc/mul_1.S
+++ b/sysdeps/sparc/mul_1.S
@@ -0,0 +1,198 @@
+! SPARC __mpn_mul_1 -- Multiply a limb vector with a limb and store
+! the result in a second limb vector.
+
+! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr	o0
+! s1_ptr	o1
+! size		o2
+! s2_limb	o3
+
+! ADD CODE FOR SMALL MULTIPLIERS!
+!1:	ld
+!	st
+!
+!2:	ld	,a
+!	addxcc	a,a,x
+!	st	x,
+!
+!3_unrolled:
+!	ld	,a
+!	addxcc	a,a,x1		! 2a + cy
+!	addx	%g0,%g0,x2
+!	addcc	a,x1,x		! 3a + c
+!	st	x,
+!
+!	ld	,a
+!	addxcc	a,a,y1
+!	addx	%g0,%g0,y2
+!	addcc	a,y1,x
+!	st	x,
+!
+!4_unrolled:
+!	ld	,a
+!	srl	a,2,x1		! 4a
+!	addxcc	y2,x1,x
+!	sll	a,30,x2
+!	st	x,
+!
+!	ld	,a
+!	srl	a,2,y1
+!	addxcc	x2,y1,y
+!	sll	a,30,y2
+!	st	x,
+!
+!5_unrolled:
+!	ld	,a
+!	srl	a,2,x1		! 4a
+!	addxcc	a,x1,x		! 5a + c
+!	sll	a,30,x2
+!	addx	%g0,x2,x2
+!	st	x,
+!
+!	ld	,a
+!	srl	a,2,y1
+!	addxcc	a,y1,x
+!	sll	a,30,y2
+!	addx	%g0,y2,y2
+!	st	x,
+!
+!8_unrolled:
+!	ld	,a
+!	srl	a,3,x1		! 8a
+!	addxcc	y2,x1,x
+!	sll	a,29,x2
+!	st	x,
+!
+!	ld	,a
+!	srl	a,3,y1
+!	addxcc	x2,y1,y
+!	sll	a,29,y2
+!	st	x,
+
+#include "sysdep.h"
+
+.text
+	.align 4
+	.global	C_SYMBOL_NAME(__mpn_mul_1)
+C_SYMBOL_NAME(__mpn_mul_1):
+	! Make S1_PTR and RES_PTR point at the end of their blocks
+	! and put (- 4 x SIZE) in index/loop counter.
+	sll	%o2,2,%o2
+	add	%o0,%o2,%o4	! RES_PTR in o4 since o0 is retval
+	add	%o1,%o2,%o1
+	sub	%g0,%o2,%o2
+
+	cmp	%o3,0xfff
+	bgu	Large
+	nop
+
+	ld	[%o1+%o2],%o5
+	mov	0,%o0
+	b	L0
+	 add	%o4,-4,%o4
+Loop0:
+	st	%g1,[%o4+%o2]
+L0:	wr	%g0,%o3,%y
+	sra	%o5,31,%g2
+	and	%o3,%g2,%g2
+	andcc	%g1,0,%g1
+	mulscc	%g1,%o5,%g1
+ 	mulscc	%g1,%o5,%g1
+ 	mulscc	%g1,%o5,%g1
+ 	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,0,%g1
+	sra	%g1,20,%g4
+	sll	%g1,12,%g1
+ 	rd	%y,%g3
+	srl	%g3,20,%g3
+	or	%g1,%g3,%g1
+
+	addcc	%g1,%o0,%g1
+	addx	%g2,%g4,%o0	! add sign-compensation and cy to hi limb
+	addcc	%o2,4,%o2	! loop counter
+	bne,a	Loop0
+	 ld	[%o1+%o2],%o5
+
+	retl
+	st	%g1,[%o4+%o2]
+
+
+Large:	ld	[%o1+%o2],%o5
+	mov	0,%o0
+	sra	%o3,31,%g4	! g4 = mask of ones iff S2_LIMB < 0
+	b	L1
+	 add	%o4,-4,%o4
+Loop:
+	st	%g3,[%o4+%o2]
+L1:	wr	%g0,%o5,%y
+	and	%o5,%g4,%g2	! g2 = S1_LIMB iff S2_LIMB < 0, else 0
+	andcc	%g0,%g0,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%g0,%g1
+	rd	%y,%g3
+	addcc	%g3,%o0,%g3
+	addx	%g2,%g1,%o0	! add sign-compensation and cy to hi limb
+	addcc	%o2,4,%o2	! loop counter
+	bne,a	Loop
+	 ld	[%o1+%o2],%o5
+
+	retl
+	st	%g3,[%o4+%o2]
--- a/sysdeps/sparc/rem.S
+++ b/sysdeps/sparc/rem.S
@@ -0,0 +1,365 @@
+   /* This file is generated from divrem.m4; DO NOT EDIT! */
+/*
+ * Division and remainder, from Appendix E of the Sparc Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ *  .rem	name of function to generate
+ *  rem		rem=div => %o0 / %o1; rem=rem => %o0 % %o1
+ *  true		true=true => signed; true=false => unsigned
+ *
+ * Algorithm parameters:
+ *  N		how many bits per iteration we try to get (4)
+ *  WORDSIZE	total number of bits (32)
+ *
+ * Derived constants:
+ *  TOPBITS	number of bits in the top decade of a number
+ *
+ * Important variables:
+ *  Q		the partial quotient under development (initially 0)
+ *  R		the remainder so far, initially the dividend
+ *  ITER	number of main division loop iterations required;
+ *		equal to ceil(log2(quotient) / N).  Note that this
+ *		is the log base (2^N) of the quotient.
+ *  V		the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ *  Current estimate for non-large dividend is
+ *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ *  different path, as the upper bits of the quotient must be developed
+ *  one bit at a time.
+ */
+
+
+
+#include "DEFS.h"
+#ifdef __svr4__
+#include <sys/trap.h>
+#else
+#include <machine/trap.h>
+#endif
+
+FUNC(.rem)
+	! compute sign of result; if neither is negative, no problem
+	orcc	%o1, %o0, %g0	! either negative?
+	bge	2f			! no, go do the divide
+	mov	%o0, %g6		! sign of remainder matches %o0
+	tst	%o1
+	bge	1f
+	tst	%o0
+	! %o1 is definitely negative; %o0 might also be negative
+	bge	2f			! if %o0 not negative...
+	sub	%g0, %o1, %o1	! in any case, make %o1 nonneg
+1:	! %o0 is negative, %o1 is nonnegative
+	sub	%g0, %o0, %o0	! make %o0 nonnegative
+2:
+
+	! Ready to divide.  Compute size of quotient; scale comparand.
+	orcc	%o1, %g0, %o5
+	bne	1f
+	mov	%o0, %o3
+
+		! Divide by zero trap.  If it returns, return 0 (about as
+		! wrong as possible, but that is what SunOS does...).
+		ta	ST_DIV0
+		retl
+		clr	%o0
+
+1:
+	cmp	%o3, %o5			! if %o1 exceeds %o0, done
+	blu	Lgot_result		! (and algorithm fails otherwise)
+	clr	%o2
+	sethi	%hi(1 << (32 - 4 - 1)), %g1
+	cmp	%o3, %g1
+	blu	Lnot_really_big
+	clr	%o4
+
+	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
+	! as our usual N-at-a-shot divide step will cause overflow and havoc.
+	! The number of bits in the result here is N*ITER+SC, where SC <= N.
+	! Compute ITER in an unorthodox manner: know we need to shift V into
+	! the top decade: so do not even bother to compare to R.
+	1:
+		cmp	%o5, %g1
+		bgeu	3f
+		mov	1, %g7
+		sll	%o5, 4, %o5
+		b	1b
+		add	%o4, 1, %o4
+
+	! Now compute %g7.
+	2:	addcc	%o5, %o5, %o5
+		bcc	Lnot_too_big
+		add	%g7, 1, %g7
+
+		! We get here if the %o1 overflowed while shifting.
+		! This means that %o3 has the high-order bit set.
+		! Restore %o5 and subtract from %o3.
+		sll	%g1, 4, %g1	! high order bit
+		srl	%o5, 1, %o5		! rest of %o5
+		add	%o5, %g1, %o5
+		b	Ldo_single_div
+		sub	%g7, 1, %g7
+
+	Lnot_too_big:
+	3:	cmp	%o5, %o3
+		blu	2b
+		nop
+		be	Ldo_single_div
+		nop
+	/* NB: these are commented out in the V8-Sparc manual as well */
+	/* (I do not understand this) */
+	! %o5 > %o3: went too far: back up 1 step
+	!	srl	%o5, 1, %o5
+	!	dec	%g7
+	! do single-bit divide steps
+	!
+	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
+	! first divide step without thinking.  BUT, the others are conditional,
+	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
+	! order bit set in the first step, just falling into the regular
+	! division loop will mess up the first time around.
+	! So we unroll slightly...
+	Ldo_single_div:
+		subcc	%g7, 1, %g7
+		bl	Lend_regular_divide
+		nop
+		sub	%o3, %o5, %o3
+		mov	1, %o2
+		b	Lend_single_divloop
+		nop
+	Lsingle_divloop:
+		sll	%o2, 1, %o2
+		bl	1f
+		srl	%o5, 1, %o5
+		! %o3 >= 0
+		sub	%o3, %o5, %o3
+		b	2f
+		add	%o2, 1, %o2
+	1:	! %o3 < 0
+		add	%o3, %o5, %o3
+		sub	%o2, 1, %o2
+	2:
+	Lend_single_divloop:
+		subcc	%g7, 1, %g7
+		bge	Lsingle_divloop
+		tst	%o3
+		b,a	Lend_regular_divide
+
+Lnot_really_big:
+1:
+	sll	%o5, 4, %o5
+	cmp	%o5, %o3
+	bleu	1b
+	addcc	%o4, 1, %o4
+	be	Lgot_result
+	sub	%o4, 1, %o4
+
+	tst	%o3	! set up for initial iteration
+Ldivloop:
+	sll	%o2, 4, %o2
+		! depth 1, accumulated bits 0
+	bl	L.1.16
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 2, accumulated bits 1
+	bl	L.2.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 3, accumulated bits 3
+	bl	L.3.19
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 7
+	bl	L.4.23
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (7*2+1), %o2
+	
+L.4.23:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (7*2-1), %o2
+	
+	
+L.3.19:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 5
+	bl	L.4.21
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (5*2+1), %o2
+	
+L.4.21:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (5*2-1), %o2
+	
+	
+	
+L.2.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 3, accumulated bits 1
+	bl	L.3.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 3
+	bl	L.4.19
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (3*2+1), %o2
+	
+L.4.19:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (3*2-1), %o2
+	
+	
+L.3.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 1
+	bl	L.4.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (1*2+1), %o2
+	
+L.4.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (1*2-1), %o2
+	
+	
+	
+	
+L.1.16:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 2, accumulated bits -1
+	bl	L.2.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 3, accumulated bits -1
+	bl	L.3.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -1
+	bl	L.4.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-1*2+1), %o2
+	
+L.4.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-1*2-1), %o2
+	
+	
+L.3.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -3
+	bl	L.4.13
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-3*2+1), %o2
+	
+L.4.13:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-3*2-1), %o2
+	
+	
+	
+L.2.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 3, accumulated bits -3
+	bl	L.3.13
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -5
+	bl	L.4.11
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-5*2+1), %o2
+	
+L.4.11:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-5*2-1), %o2
+	
+	
+L.3.13:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -7
+	bl	L.4.9
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-7*2+1), %o2
+	
+L.4.9:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-7*2-1), %o2
+	
+	
+	
+	
+	9:
+Lend_regular_divide:
+	subcc	%o4, 1, %o4
+	bge	Ldivloop
+	tst	%o3
+	bl,a	Lgot_result
+	! non-restoring fixup here (one instruction only!)
+	add	%o3, %o1, %o3
+
+
+Lgot_result:
+	! check to see if answer should be < 0
+	tst	%g6
+	bl,a	1f
+	sub %g0, %o3, %o3
+1:
+	retl
+	mov %o3, %o0
--- a/sysdeps/sparc/sdiv.S
+++ b/sysdeps/sparc/sdiv.S
@@ -0,0 +1,365 @@
+   /* This file is generated from divrem.m4; DO NOT EDIT! */
+/*
+ * Division and remainder, from Appendix E of the Sparc Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ *  .div	name of function to generate
+ *  div		div=div => %o0 / %o1; div=rem => %o0 % %o1
+ *  true		true=true => signed; true=false => unsigned
+ *
+ * Algorithm parameters:
+ *  N		how many bits per iteration we try to get (4)
+ *  WORDSIZE	total number of bits (32)
+ *
+ * Derived constants:
+ *  TOPBITS	number of bits in the top decade of a number
+ *
+ * Important variables:
+ *  Q		the partial quotient under development (initially 0)
+ *  R		the remainder so far, initially the dividend
+ *  ITER	number of main division loop iterations required;
+ *		equal to ceil(log2(quotient) / N).  Note that this
+ *		is the log base (2^N) of the quotient.
+ *  V		the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ *  Current estimate for non-large dividend is
+ *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ *  different path, as the upper bits of the quotient must be developed
+ *  one bit at a time.
+ */
+
+
+
+#include "DEFS.h"
+#ifdef __svr4__
+#include <sys/trap.h>
+#else
+#include <machine/trap.h>
+#endif
+
+FUNC(.div)
+	! compute sign of result; if neither is negative, no problem
+	orcc	%o1, %o0, %g0	! either negative?
+	bge	2f			! no, go do the divide
+	xor	%o1, %o0, %g6	! compute sign in any case
+	tst	%o1
+	bge	1f
+	tst	%o0
+	! %o1 is definitely negative; %o0 might also be negative
+	bge	2f			! if %o0 not negative...
+	sub	%g0, %o1, %o1	! in any case, make %o1 nonneg
+1:	! %o0 is negative, %o1 is nonnegative
+	sub	%g0, %o0, %o0	! make %o0 nonnegative
+2:
+
+	! Ready to divide.  Compute size of quotient; scale comparand.
+	orcc	%o1, %g0, %o5
+	bne	1f
+	mov	%o0, %o3
+
+		! Divide by zero trap.  If it returns, return 0 (about as
+		! wrong as possible, but that is what SunOS does...).
+		ta	ST_DIV0
+		retl
+		clr	%o0
+
+1:
+	cmp	%o3, %o5			! if %o1 exceeds %o0, done
+	blu	Lgot_result		! (and algorithm fails otherwise)
+	clr	%o2
+	sethi	%hi(1 << (32 - 4 - 1)), %g1
+	cmp	%o3, %g1
+	blu	Lnot_really_big
+	clr	%o4
+
+	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
+	! as our usual N-at-a-shot divide step will cause overflow and havoc.
+	! The number of bits in the result here is N*ITER+SC, where SC <= N.
+	! Compute ITER in an unorthodox manner: know we need to shift V into
+	! the top decade: so do not even bother to compare to R.
+	1:
+		cmp	%o5, %g1
+		bgeu	3f
+		mov	1, %g7
+		sll	%o5, 4, %o5
+		b	1b
+		add	%o4, 1, %o4
+
+	! Now compute %g7.
+	2:	addcc	%o5, %o5, %o5
+		bcc	Lnot_too_big
+		add	%g7, 1, %g7
+
+		! We get here if the %o1 overflowed while shifting.
+		! This means that %o3 has the high-order bit set.
+		! Restore %o5 and subtract from %o3.
+		sll	%g1, 4, %g1	! high order bit
+		srl	%o5, 1, %o5		! rest of %o5
+		add	%o5, %g1, %o5
+		b	Ldo_single_div
+		sub	%g7, 1, %g7
+
+	Lnot_too_big:
+	3:	cmp	%o5, %o3
+		blu	2b
+		nop
+		be	Ldo_single_div
+		nop
+	/* NB: these are commented out in the V8-Sparc manual as well */
+	/* (I do not understand this) */
+	! %o5 > %o3: went too far: back up 1 step
+	!	srl	%o5, 1, %o5
+	!	dec	%g7
+	! do single-bit divide steps
+	!
+	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
+	! first divide step without thinking.  BUT, the others are conditional,
+	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
+	! order bit set in the first step, just falling into the regular
+	! division loop will mess up the first time around.
+	! So we unroll slightly...
+	Ldo_single_div:
+		subcc	%g7, 1, %g7
+		bl	Lend_regular_divide
+		nop
+		sub	%o3, %o5, %o3
+		mov	1, %o2
+		b	Lend_single_divloop
+		nop
+	Lsingle_divloop:
+		sll	%o2, 1, %o2
+		bl	1f
+		srl	%o5, 1, %o5
+		! %o3 >= 0
+		sub	%o3, %o5, %o3
+		b	2f
+		add	%o2, 1, %o2
+	1:	! %o3 < 0
+		add	%o3, %o5, %o3
+		sub	%o2, 1, %o2
+	2:
+	Lend_single_divloop:
+		subcc	%g7, 1, %g7
+		bge	Lsingle_divloop
+		tst	%o3
+		b,a	Lend_regular_divide
+
+Lnot_really_big:
+1:
+	sll	%o5, 4, %o5
+	cmp	%o5, %o3
+	bleu	1b
+	addcc	%o4, 1, %o4
+	be	Lgot_result
+	sub	%o4, 1, %o4
+
+	tst	%o3	! set up for initial iteration
+Ldivloop:
+	sll	%o2, 4, %o2
+		! depth 1, accumulated bits 0
+	bl	L.1.16
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 2, accumulated bits 1
+	bl	L.2.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 3, accumulated bits 3
+	bl	L.3.19
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 7
+	bl	L.4.23
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (7*2+1), %o2
+	
+L.4.23:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (7*2-1), %o2
+	
+	
+L.3.19:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 5
+	bl	L.4.21
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (5*2+1), %o2
+	
+L.4.21:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (5*2-1), %o2
+	
+	
+	
+L.2.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 3, accumulated bits 1
+	bl	L.3.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 3
+	bl	L.4.19
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (3*2+1), %o2
+	
+L.4.19:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (3*2-1), %o2
+	
+	
+L.3.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 1
+	bl	L.4.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (1*2+1), %o2
+	
+L.4.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (1*2-1), %o2
+	
+	
+	
+	
+L.1.16:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 2, accumulated bits -1
+	bl	L.2.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 3, accumulated bits -1
+	bl	L.3.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -1
+	bl	L.4.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-1*2+1), %o2
+	
+L.4.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-1*2-1), %o2
+	
+	
+L.3.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -3
+	bl	L.4.13
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-3*2+1), %o2
+	
+L.4.13:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-3*2-1), %o2
+	
+	
+	
+L.2.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 3, accumulated bits -3
+	bl	L.3.13
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -5
+	bl	L.4.11
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-5*2+1), %o2
+	
+L.4.11:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-5*2-1), %o2
+	
+	
+L.3.13:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -7
+	bl	L.4.9
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-7*2+1), %o2
+	
+L.4.9:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-7*2-1), %o2
+	
+	
+	
+	
+	9:
+Lend_regular_divide:
+	subcc	%o4, 1, %o4
+	bge	Ldivloop
+	tst	%o3
+	bl,a	Lgot_result
+	! non-restoring fixup here (one instruction only!)
+	sub	%o2, 1, %o2
+
+
+Lgot_result:
+	! check to see if answer should be < 0
+	tst	%g6
+	bl,a	1f
+	sub %g0, %o2, %o2
+1:
+	retl
+	mov %o2, %o0
--- a/sysdeps/sparc/setjmp.S
+++ b/sysdeps/sparc/setjmp.S
@@ -0,0 +1,31 @@
+/* Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB.  If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA.  */
+
+#include <sysdep.h>
+
+/* NOTE: This code depends on the definition of `__jmp_buf' in <jmp_buf.h>.  */
+
+ENTRY (__sigsetjmp)
+	/* Save our return PC and SP (second store in the jmp delay slot).  */
+	st %o7, [%o0]
+	/* Save the signal mask if requested.  We do this as a tail-call
+	   for simplicity; it always returns zero.  */
+	sethi %hi(C_SYMBOL_NAME (__sigjmp_save)), %g1
+	or %lo(C_SYMBOL_NAME (__sigjmp_save)), %g1, %g1
+	jmp %g1
+	st %sp, [%o0 + 4]
--- a/sysdeps/sparc/sparc8/addmul_1.S
+++ b/sysdeps/sparc/sparc8/addmul_1.S
@@ -0,0 +1,116 @@
+! SPARC v8 __mpn_addmul_1 -- Multiply a limb vector with a limb and
+! add the result to a second limb vector.
+
+! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr	o0
+! s1_ptr	o1
+! size		o2
+! s2_limb	o3
+
+#include "sysdep.h"
+
+.text
+	.align 4
+	.global	C_SYMBOL_NAME(__mpn_addmul_1)
+C_SYMBOL_NAME(__mpn_addmul_1):
+	orcc	%g0,%g0,%g2
+	ld	[%o1+0],%o4	! 1
+
+	sll	%o2,4,%g1
+	and	%g1,(4-1)<<4,%g1
+	sethi	%hi(LL),%g3
+	or	%g3,%lo(LL),%g3
+	jmp	%g3+%g1
+	nop
+LL:
+LL00:	add	%o0,-4,%o0
+	b	Loop00		/* 4, 8, 12, ... */
+	add	%o1,-4,%o1
+	nop
+LL01:	b	Loop01		/* 1, 5, 9, ... */
+	nop
+	nop
+	nop
+LL10:	add	%o0,-12,%o0	/* 2, 6, 10, ... */
+	b	Loop10
+	add	%o1,4,%o1
+	nop
+LL11:	add	%o0,-8,%o0	/* 3, 7, 11, ... */
+	b	Loop11
+	add	%o1,-8,%o1
+	nop
+
+1:	addcc	%g3,%g2,%g3	! 1
+	ld	[%o1+4],%o4	! 2
+	rd	%y,%g2		! 1
+	addx	%g0,%g2,%g2
+	ld	[%o0+0],%g1	! 2
+	addcc	%g1,%g3,%g3
+	st	%g3,[%o0+0]	! 1
+Loop00:	umul	%o4,%o3,%g3	! 2
+	ld	[%o0+4],%g1	! 2
+	addxcc	%g3,%g2,%g3	! 2
+	ld	[%o1+8],%o4	! 3
+	rd	%y,%g2		! 2
+	addx	%g0,%g2,%g2
+	nop
+	addcc	%g1,%g3,%g3
+	st	%g3,[%o0+4]	! 2
+Loop11:	umul	%o4,%o3,%g3	! 3
+	addxcc	%g3,%g2,%g3	! 3
+	ld	[%o1+12],%o4	! 4
+	rd	%y,%g2		! 3
+	add	%o1,16,%o1
+	addx	%g0,%g2,%g2
+	ld	[%o0+8],%g1	! 2
+	addcc	%g1,%g3,%g3
+	st	%g3,[%o0+8]	! 3
+Loop10:	umul	%o4,%o3,%g3	! 4
+	addxcc	%g3,%g2,%g3	! 4
+	ld	[%o1+0],%o4	! 1
+	rd	%y,%g2		! 4
+	addx	%g0,%g2,%g2
+	ld	[%o0+12],%g1	! 2
+	addcc	%g1,%g3,%g3
+	st	%g3,[%o0+12]	! 4
+	add	%o0,16,%o0
+	addx	%g0,%g2,%g2
+Loop01:	addcc	%o2,-4,%o2
+	bg	1b
+	umul	%o4,%o3,%g3	! 1
+
+	addcc	%g3,%g2,%g3	! 4
+	rd	%y,%g2		! 4
+	addx	%g0,%g2,%g2
+	ld	[%o0+0],%g1	! 2
+	addcc	%g1,%g3,%g3
+	st	%g3,[%o0+0]	! 4
+	addx	%g0,%g2,%o0
+
+	retl
+	 nop
+
+
+!	umul, ld, addxcc, rd, st
+
+!	umul, ld, addxcc, rd, ld, addcc, st, addx
+
--- a/sysdeps/sparc/sparc8/mul_1.S
+++ b/sysdeps/sparc/sparc8/mul_1.S
@@ -0,0 +1,91 @@
+! SPARC v8 __mpn_mul_1 -- Multiply a limb vector with a single limb and
+! store the product in a second limb vector.
+
+! Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr	o0
+! s1_ptr	o1
+! size		o2
+! s2_limb	o3
+
+#include "sysdep.h"
+
+.text
+	.align	8
+	.global	C_SYMBOL_NAME(__mpn_mul_1)
+C_SYMBOL_NAME(__mpn_mul_1):
+	sll	%o2,4,%g1
+	and	%g1,(4-1)<<4,%g1
+	sethi	%hi(LL),%g3
+	or	%g3,%lo(LL),%g3
+	jmp	%g3+%g1
+	ld	[%o1+0],%o4	! 1
+LL:
+LL00:	add	%o0,-4,%o0
+	add	%o1,-4,%o1
+	b	Loop00		/* 4, 8, 12, ... */
+	orcc	%g0,%g0,%g2
+LL01:	b	Loop01		/* 1, 5, 9, ... */
+	orcc	%g0,%g0,%g2
+	nop
+	nop
+LL10:	add	%o0,-12,%o0	/* 2, 6, 10, ... */
+	add	%o1,4,%o1
+	b	Loop10
+	orcc	%g0,%g0,%g2
+	nop
+LL11:	add	%o0,-8,%o0	/* 3, 7, 11, ... */
+	add	%o1,-8,%o1
+	b	Loop11
+	orcc	%g0,%g0,%g2
+
+Loop:	addcc	%g3,%g2,%g3	! 1
+	ld	[%o1+4],%o4	! 2
+	st	%g3,[%o0+0]	! 1
+	rd	%y,%g2		! 1
+Loop00:	umul	%o4,%o3,%g3	! 2
+	addxcc	%g3,%g2,%g3	! 2
+	ld	[%o1+8],%o4	! 3
+	st	%g3,[%o0+4]	! 2
+	rd	%y,%g2		! 2
+Loop11:	umul	%o4,%o3,%g3	! 3
+	addxcc	%g3,%g2,%g3	! 3
+	ld	[%o1+12],%o4	! 4
+	add	%o1,16,%o1
+	st	%g3,[%o0+8]	! 3
+	rd	%y,%g2		! 3
+Loop10:	umul	%o4,%o3,%g3	! 4
+	addxcc	%g3,%g2,%g3	! 4
+	ld	[%o1+0],%o4	! 1
+	st	%g3,[%o0+12]	! 4
+	add	%o0,16,%o0
+	rd	%y,%g2		! 4
+	addx	%g0,%g2,%g2
+Loop01:	addcc	%o2,-4,%o2
+	bg	Loop
+	umul	%o4,%o3,%g3	! 1
+
+	addcc	%g3,%g2,%g3	! 4
+	st	%g3,[%o0+0]	! 4
+	rd	%y,%g2		! 4
+
+	retl
+	addx	%g0,%g2,%o0
--- a/sysdeps/sparc/sparc8/submul_1.S
+++ b/sysdeps/sparc/sparc8/submul_1.S
@@ -0,0 +1,57 @@
+! SPARC v8 __mpn_submul_1 -- Multiply a limb vector with a limb and
+! subtract the result from a second limb vector.
+
+! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr	o0
+! s1_ptr	o1
+! size		o2
+! s2_limb	o3
+
+#include "sysdep.h"
+
+.text
+	.align 4
+	.global	C_SYMBOL_NAME(__mpn_submul_1)
+C_SYMBOL_NAME(__mpn_submul_1):
+	sub	%g0,%o2,%o2		! negate ...
+	sll	%o2,2,%o2		! ... and scale size
+	sub	%o1,%o2,%o1		! o1 is offset s1_ptr
+	sub	%o0,%o2,%g1		! g1 is offset res_ptr
+
+	mov	0,%o0			! clear cy_limb
+
+Loop:	ld	[%o1+%o2],%o4
+	ld	[%g1+%o2],%g2
+	umul	%o4,%o3,%o5
+	rd	%y,%g3
+	addcc	%o5,%o0,%o5
+	addx	%g3,0,%o0
+	subcc	%g2,%o5,%g2
+	addx	%o0,0,%o0
+	st	%g2,[%g1+%o2]
+
+	addcc	%o2,4,%o2
+	bne	Loop
+	 nop
+
+	retl
+	 nop
--- a/sysdeps/sparc/sparc8/udiv_qrnnd.S
+++ b/sysdeps/sparc/sparc8/udiv_qrnnd.S
@@ -0,0 +1,186 @@
+! SPARC  __udiv_qrnnd division support, used from longlong.h.
+
+! Copyright (C) 1993, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+! INPUT PARAMETERS
+! rem_ptr	o0
+! n1		o1
+! n0		o2
+! d		o3
+
+#include "sysdep.h"
+
+	.text
+	.align 4
+	.global	C_SYMBOL_NAME(__udiv_qrnnd)
+C_SYMBOL_NAME(__udiv_qrnnd):
+	tst	%o3
+	bneg	Largedivisor
+	mov	8,%g1
+
+	b	Lp1
+	addxcc	%o2,%o2,%o2
+
+Lplop:	bcc	Ln1
+	addxcc	%o2,%o2,%o2
+Lp1:	addx	%o1,%o1,%o1
+	subcc	%o1,%o3,%o4
+	bcc	Ln2
+	addxcc	%o2,%o2,%o2
+Lp2:	addx	%o1,%o1,%o1
+	subcc	%o1,%o3,%o4
+	bcc	Ln3
+	addxcc	%o2,%o2,%o2
+Lp3:	addx	%o1,%o1,%o1
+	subcc	%o1,%o3,%o4
+	bcc	Ln4
+	addxcc	%o2,%o2,%o2
+Lp4:	addx	%o1,%o1,%o1
+	addcc	%g1,-1,%g1
+	bne	Lplop
+	subcc	%o1,%o3,%o4
+	bcc	Ln5
+	addxcc	%o2,%o2,%o2
+Lp5:	st	%o1,[%o0]
+	retl
+	xnor	%g0,%o2,%o0
+
+Lnlop:	bcc	Lp1
+	addxcc	%o2,%o2,%o2
+Ln1:	addx	%o4,%o4,%o4
+	subcc	%o4,%o3,%o1
+	bcc	Lp2
+	addxcc	%o2,%o2,%o2
+Ln2:	addx	%o4,%o4,%o4
+	subcc	%o4,%o3,%o1
+	bcc	Lp3
+	addxcc	%o2,%o2,%o2
+Ln3:	addx	%o4,%o4,%o4
+	subcc	%o4,%o3,%o1
+	bcc	Lp4
+	addxcc	%o2,%o2,%o2
+Ln4:	addx	%o4,%o4,%o4
+	addcc	%g1,-1,%g1
+	bne	Lnlop
+	subcc	%o4,%o3,%o1
+	bcc	Lp5
+	addxcc	%o2,%o2,%o2
+Ln5:	st	%o4,[%o0]
+	retl
+	xnor	%g0,%o2,%o0
+
+Largedivisor:
+	and	%o2,1,%o5	! %o5 = n0 & 1
+
+	srl	%o2,1,%o2
+	sll	%o1,31,%g2
+	or	%g2,%o2,%o2	! %o2 = lo(n1n0 >> 1)
+	srl	%o1,1,%o1	! %o1 = hi(n1n0 >> 1)
+
+	and	%o3,1,%g2
+	srl	%o3,1,%g3	! %g3 = floor(d / 2)
+	add	%g3,%g2,%g3	! %g3 = ceil(d / 2)
+
+	b	LLp1
+	addxcc	%o2,%o2,%o2
+
+LLplop:	bcc	LLn1
+	addxcc	%o2,%o2,%o2
+LLp1:	addx	%o1,%o1,%o1
+	subcc	%o1,%g3,%o4
+	bcc	LLn2
+	addxcc	%o2,%o2,%o2
+LLp2:	addx	%o1,%o1,%o1
+	subcc	%o1,%g3,%o4
+	bcc	LLn3
+	addxcc	%o2,%o2,%o2
+LLp3:	addx	%o1,%o1,%o1
+	subcc	%o1,%g3,%o4
+	bcc	LLn4
+	addxcc	%o2,%o2,%o2
+LLp4:	addx	%o1,%o1,%o1
+	addcc	%g1,-1,%g1
+	bne	LLplop
+	subcc	%o1,%g3,%o4
+	bcc	LLn5
+	addxcc	%o2,%o2,%o2
+LLp5:	add	%o1,%o1,%o1	! << 1
+	tst	%g2
+	bne	Oddp
+	add	%o5,%o1,%o1
+	st	%o1,[%o0]
+	retl
+	xnor	%g0,%o2,%o0
+
+LLnlop:	bcc	LLp1
+	addxcc	%o2,%o2,%o2
+LLn1:	addx	%o4,%o4,%o4
+	subcc	%o4,%g3,%o1
+	bcc	LLp2
+	addxcc	%o2,%o2,%o2
+LLn2:	addx	%o4,%o4,%o4
+	subcc	%o4,%g3,%o1
+	bcc	LLp3
+	addxcc	%o2,%o2,%o2
+LLn3:	addx	%o4,%o4,%o4
+	subcc	%o4,%g3,%o1
+	bcc	LLp4
+	addxcc	%o2,%o2,%o2
+LLn4:	addx	%o4,%o4,%o4
+	addcc	%g1,-1,%g1
+	bne	LLnlop
+	subcc	%o4,%g3,%o1
+	bcc	LLp5
+	addxcc	%o2,%o2,%o2
+LLn5:	add	%o4,%o4,%o4	! << 1
+	tst	%g2
+	bne	Oddn
+	add	%o5,%o4,%o4
+	st	%o4,[%o0]
+	retl
+	xnor	%g0,%o2,%o0
+
+Oddp:	xnor	%g0,%o2,%o2
+	! q' in %o2. r' in %o1
+	addcc	%o1,%o2,%o1
+	bcc	LLp6
+	addx	%o2,0,%o2
+	sub	%o1,%o3,%o1
+LLp6:	subcc	%o1,%o3,%g0
+	bcs	LLp7
+	subx	%o2,-1,%o2
+	sub	%o1,%o3,%o1
+LLp7:	st	%o1,[%o0]
+	retl
+	mov	%o2,%o0
+
+Oddn:	xnor	%g0,%o2,%o2
+	! q' in %o2. r' in %o4
+	addcc	%o4,%o2,%o4
+	bcc	LLn6
+	addx	%o2,0,%o2
+	sub	%o4,%o3,%o4
+LLn6:	subcc	%o4,%o3,%g0
+	bcs	LLn7
+	subx	%o2,-1,%o2
+	sub	%o4,%o3,%o4
+LLn7:	st	%o4,[%o0]
+	retl
+	mov	%o2,%o0
--- a/sysdeps/sparc/sqrt.c
+++ b/sysdeps/sparc/sqrt.c
@@ -0,0 +1,34 @@
+/* Copyright (C) 1991, 1992 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB.  If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA.  */
+
+#include <ansidecl.h>
+#include <errno.h>
+#include <math.h>
+
+#ifndef	__GNUC__
+  #error This file uses GNU C extensions; you must compile with GCC.
+#endif
+
+/* Return the square root of X.  */
+double
+DEFUN(sqrt, (x), double x)
+{
+  register double result;
+  asm("fsqrtd %1, %0" : "=f" (result) : "f" (x));
+  return result;
+}
--- a/sysdeps/sparc/sub_n.S
+++ b/sysdeps/sparc/sub_n.S
@@ -0,0 +1,134 @@
+! sparc __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+! store difference in a third limb vector.
+
+! Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr	%o0
+! s1_ptr	%o1
+! s2_ptr	%o2
+! size		%o3
+
+#include "sysdep.h"
+
+	.text
+	.align	4
+	.global	C_SYMBOL_NAME(__mpn_sub_n)
+C_SYMBOL_NAME(__mpn_sub_n):
+	ld	[%o1+0],%o4		! read first limb from s1_ptr
+	srl	%o3,4,%g1
+	ld	[%o2+0],%o5		! read first limb from s2_ptr
+
+	sub	%g0,%o3,%o3
+	andcc	%o3,(16-1),%o3
+	be	Lzero
+	 nop
+
+	sll	%o3,2,%o3		! multiply by 4
+	sub	%o0,%o3,%o0		! adjust res_ptr
+	sub	%o1,%o3,%o1		! adjust s1_ptr
+	sub	%o2,%o3,%o2		! adjust s2_ptr
+
+	mov	%o4,%g2
+
+	sethi	%hi(Lbase),%g3
+	or	%g3,%lo(Lbase),%g3
+	sll	%o3,2,%o3		! multiply by 4
+	jmp	%g3+%o3
+	 mov	%o5,%g3
+
+Loop:	subxcc	%g2,%g3,%o3
+	add	%o1,64,%o1
+	st	%o3,[%o0+60]
+	add	%o2,64,%o2
+	ld	[%o1+0],%o4
+	add	%o0,64,%o0
+	ld	[%o2+0],%o5
+Lzero:	sub	%g1,1,%g1	! add 0 + 16r limbs (adjust loop counter)
+Lbase:	ld	[%o1+4],%g2
+	subxcc	%o4,%o5,%o3
+	ld	[%o2+4],%g3
+	st	%o3,[%o0+0]
+	ld	[%o1+8],%o4	! add 15 + 16r limbs
+	subxcc	%g2,%g3,%o3
+	ld	[%o2+8],%o5
+	st	%o3,[%o0+4]
+	ld	[%o1+12],%g2	! add 14 + 16r limbs
+	subxcc	%o4,%o5,%o3
+	ld	[%o2+12],%g3
+	st	%o3,[%o0+8]
+	ld	[%o1+16],%o4	! add 13 + 16r limbs
+	subxcc	%g2,%g3,%o3
+	ld	[%o2+16],%o5
+	st	%o3,[%o0+12]
+	ld	[%o1+20],%g2	! add 12 + 16r limbs
+	subxcc	%o4,%o5,%o3
+	ld	[%o2+20],%g3
+	st	%o3,[%o0+16]
+	ld	[%o1+24],%o4	! add 11 + 16r limbs
+	subxcc	%g2,%g3,%o3
+	ld	[%o2+24],%o5
+	st	%o3,[%o0+20]
+	ld	[%o1+28],%g2	! add 10 + 16r limbs
+	subxcc	%o4,%o5,%o3
+	ld	[%o2+28],%g3
+	st	%o3,[%o0+24]
+	ld	[%o1+32],%o4	! add 9 + 16r limbs
+	subxcc	%g2,%g3,%o3
+	ld	[%o2+32],%o5
+	st	%o3,[%o0+28]
+	ld	[%o1+36],%g2	! add 8 + 16r limbs
+	subxcc	%o4,%o5,%o3
+	ld	[%o2+36],%g3
+	st	%o3,[%o0+32]
+	ld	[%o1+40],%o4	! add 7 + 16r limbs
+	subxcc	%g2,%g3,%o3
+	ld	[%o2+40],%o5
+	st	%o3,[%o0+36]
+	ld	[%o1+44],%g2	! add 6 + 16r limbs
+	subxcc	%o4,%o5,%o3
+	ld	[%o2+44],%g3
+	st	%o3,[%o0+40]
+	ld	[%o1+48],%o4	! add 5 + 16r limbs
+	subxcc	%g2,%g3,%o3
+	ld	[%o2+48],%o5
+	st	%o3,[%o0+44]
+	ld	[%o1+52],%g2	! add 4 + 16r limbs
+	subxcc	%o4,%o5,%o3
+	ld	[%o2+52],%g3
+	st	%o3,[%o0+48]
+	ld	[%o1+56],%o4	! add 3 + 16r limbs
+	subxcc	%g2,%g3,%o3
+	ld	[%o2+56],%o5
+	st	%o3,[%o0+52]
+	ld	[%o1+60],%g2	! add 2 + 16r limbs
+	subxcc	%o4,%o5,%o3
+	ld	[%o2+60],%g3
+	st	%o3,[%o0+56]
+	subx	%g0,%g0,%o4
+	tst	%g1
+	bne	Loop
+	 subcc	%g0,%o4,%g0	! restore cy (delay slot)
+
+	subxcc	%g2,%g3,%o3
+	st	%o3,[%o0+60]	! store most significant limb
+
+	retl
+	 addx	%g0,%g0,%o0	! return carry-out from most sign. limb
--- a/sysdeps/sparc/submul_1.S
+++ b/sysdeps/sparc/submul_1.S
@@ -0,0 +1,146 @@
+! SPARC __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+! the result from a second limb vector.
+
+! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr	o0
+! s1_ptr	o1
+! size		o2
+! s2_limb	o3
+
+#include "sysdep.h"
+
+.text
+	.align 4
+	.global	C_SYMBOL_NAME(__mpn_submul_1)
+C_SYMBOL_NAME(__mpn_submul_1):
+	! Make S1_PTR and RES_PTR point at the end of their blocks
+	! and put (- 4 x SIZE) in index/loop counter.
+	sll	%o2,2,%o2
+	add	%o0,%o2,%o4	! RES_PTR in o4 since o0 is retval
+	add	%o1,%o2,%o1
+	sub	%g0,%o2,%o2
+
+	cmp	%o3,0xfff
+	bgu	Large
+	nop
+
+	ld	[%o1+%o2],%o5
+	mov	0,%o0
+	b	L0
+	 add	%o4,-4,%o4
+Loop0:
+	subcc	%o5,%g1,%g1
+	ld	[%o1+%o2],%o5
+	addx	%o0,%g0,%o0
+	st	%g1,[%o4+%o2]
+L0:	wr	%g0,%o3,%y
+	sra	%o5,31,%g2
+	and	%o3,%g2,%g2
+	andcc	%g1,0,%g1
+	mulscc	%g1,%o5,%g1
+ 	mulscc	%g1,%o5,%g1
+ 	mulscc	%g1,%o5,%g1
+ 	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,0,%g1
+	sra	%g1,20,%g4
+	sll	%g1,12,%g1
+ 	rd	%y,%g3
+	srl	%g3,20,%g3
+	or	%g1,%g3,%g1
+
+	addcc	%g1,%o0,%g1
+	addx	%g2,%g4,%o0	! add sign-compensation and cy to hi limb
+	addcc	%o2,4,%o2	! loop counter
+	bne	Loop0
+	 ld	[%o4+%o2],%o5
+
+	subcc	%o5,%g1,%g1
+	addx	%o0,%g0,%o0
+	retl
+	st	%g1,[%o4+%o2]
+
+
+Large:	ld	[%o1+%o2],%o5
+	mov	0,%o0
+	sra	%o3,31,%g4	! g4 = mask of ones iff S2_LIMB < 0
+	b	L1
+	 add	%o4,-4,%o4
+Loop:
+	subcc	%o5,%g3,%g3
+	ld	[%o1+%o2],%o5
+	addx	%o0,%g0,%o0
+	st	%g3,[%o4+%o2]
+L1:	wr	%g0,%o5,%y
+	and	%o5,%g4,%g2
+	andcc	%g0,%g0,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%g0,%g1
+	rd	%y,%g3
+	addcc	%g3,%o0,%g3
+	addx	%g2,%g1,%o0
+	addcc	%o2,4,%o2
+	bne	Loop
+	 ld	[%o4+%o2],%o5
+
+	subcc	%o5,%g3,%g3
+	addx	%o0,%g0,%o0
+	retl
+	st	%g3,[%o4+%o2]
--- a/sysdeps/sparc/udiv.S
+++ b/sysdeps/sparc/udiv.S
@@ -0,0 +1,348 @@
+   /* This file is generated from divrem.m4; DO NOT EDIT! */
+/*
+ * Division and remainder, from Appendix E of the Sparc Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ *  .udiv	name of function to generate
+ *  div		div=div => %o0 / %o1; div=rem => %o0 % %o1
+ *  false		false=true => signed; false=false => unsigned
+ *
+ * Algorithm parameters:
+ *  N		how many bits per iteration we try to get (4)
+ *  WORDSIZE	total number of bits (32)
+ *
+ * Derived constants:
+ *  TOPBITS	number of bits in the top decade of a number
+ *
+ * Important variables:
+ *  Q		the partial quotient under development (initially 0)
+ *  R		the remainder so far, initially the dividend
+ *  ITER	number of main division loop iterations required;
+ *		equal to ceil(log2(quotient) / N).  Note that this
+ *		is the log base (2^N) of the quotient.
+ *  V		the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ *  Current estimate for non-large dividend is
+ *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ *  different path, as the upper bits of the quotient must be developed
+ *  one bit at a time.
+ */
+
+
+
+#include "DEFS.h"
+#ifdef __svr4__
+#include <sys/trap.h>
+#else
+#include <machine/trap.h>
+#endif
+
+FUNC(.udiv)
+
+	! Ready to divide.  Compute size of quotient; scale comparand.
+	orcc	%o1, %g0, %o5
+	bne	1f
+	mov	%o0, %o3
+
+		! Divide by zero trap.  If it returns, return 0 (about as
+		! wrong as possible, but that is what SunOS does...).
+		ta	ST_DIV0
+		retl
+		clr	%o0
+
+1:
+	cmp	%o3, %o5			! if %o1 exceeds %o0, done
+	blu	Lgot_result		! (and algorithm fails otherwise)
+	clr	%o2
+	sethi	%hi(1 << (32 - 4 - 1)), %g1
+	cmp	%o3, %g1
+	blu	Lnot_really_big
+	clr	%o4
+
+	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
+	! as our usual N-at-a-shot divide step will cause overflow and havoc.
+	! The number of bits in the result here is N*ITER+SC, where SC <= N.
+	! Compute ITER in an unorthodox manner: know we need to shift V into
+	! the top decade: so do not even bother to compare to R.
+	1:
+		cmp	%o5, %g1
+		bgeu	3f
+		mov	1, %g7
+		sll	%o5, 4, %o5
+		b	1b
+		add	%o4, 1, %o4
+
+	! Now compute %g7.
+	2:	addcc	%o5, %o5, %o5
+		bcc	Lnot_too_big
+		add	%g7, 1, %g7
+
+		! We get here if the %o1 overflowed while shifting.
+		! This means that %o3 has the high-order bit set.
+		! Restore %o5 and subtract from %o3.
+		sll	%g1, 4, %g1	! high order bit
+		srl	%o5, 1, %o5		! rest of %o5
+		add	%o5, %g1, %o5
+		b	Ldo_single_div
+		sub	%g7, 1, %g7
+
+	Lnot_too_big:
+	3:	cmp	%o5, %o3
+		blu	2b
+		nop
+		be	Ldo_single_div
+		nop
+	/* NB: these are commented out in the V8-Sparc manual as well */
+	/* (I do not understand this) */
+	! %o5 > %o3: went too far: back up 1 step
+	!	srl	%o5, 1, %o5
+	!	dec	%g7
+	! do single-bit divide steps
+	!
+	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
+	! first divide step without thinking.  BUT, the others are conditional,
+	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
+	! order bit set in the first step, just falling into the regular
+	! division loop will mess up the first time around.
+	! So we unroll slightly...
+	Ldo_single_div:
+		subcc	%g7, 1, %g7
+		bl	Lend_regular_divide
+		nop
+		sub	%o3, %o5, %o3
+		mov	1, %o2
+		b	Lend_single_divloop
+		nop
+	Lsingle_divloop:
+		sll	%o2, 1, %o2
+		bl	1f
+		srl	%o5, 1, %o5
+		! %o3 >= 0
+		sub	%o3, %o5, %o3
+		b	2f
+		add	%o2, 1, %o2
+	1:	! %o3 < 0
+		add	%o3, %o5, %o3
+		sub	%o2, 1, %o2
+	2:
+	Lend_single_divloop:
+		subcc	%g7, 1, %g7
+		bge	Lsingle_divloop
+		tst	%o3
+		b,a	Lend_regular_divide
+
+Lnot_really_big:
+1:
+	sll	%o5, 4, %o5
+	cmp	%o5, %o3
+	bleu	1b
+	addcc	%o4, 1, %o4
+	be	Lgot_result
+	sub	%o4, 1, %o4
+
+	tst	%o3	! set up for initial iteration
+Ldivloop:
+	sll	%o2, 4, %o2
+		! depth 1, accumulated bits 0
+	bl	L.1.16
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 2, accumulated bits 1
+	bl	L.2.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 3, accumulated bits 3
+	bl	L.3.19
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 7
+	bl	L.4.23
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (7*2+1), %o2
+	
+L.4.23:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (7*2-1), %o2
+	
+	
+L.3.19:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 5
+	bl	L.4.21
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (5*2+1), %o2
+	
+L.4.21:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (5*2-1), %o2
+	
+	
+	
+L.2.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 3, accumulated bits 1
+	bl	L.3.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 3
+	bl	L.4.19
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (3*2+1), %o2
+	
+L.4.19:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (3*2-1), %o2
+	
+	
+L.3.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 1
+	bl	L.4.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (1*2+1), %o2
+	
+L.4.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (1*2-1), %o2
+	
+	
+	
+	
+L.1.16:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 2, accumulated bits -1
+	bl	L.2.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 3, accumulated bits -1
+	bl	L.3.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -1
+	bl	L.4.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-1*2+1), %o2
+	
+L.4.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-1*2-1), %o2
+	
+	
+L.3.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -3
+	bl	L.4.13
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-3*2+1), %o2
+	
+L.4.13:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-3*2-1), %o2
+	
+	
+	
+L.2.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 3, accumulated bits -3
+	bl	L.3.13
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -5
+	bl	L.4.11
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-5*2+1), %o2
+	
+L.4.11:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-5*2-1), %o2
+	
+	
+L.3.13:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -7
+	bl	L.4.9
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-7*2+1), %o2
+	
+L.4.9:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-7*2-1), %o2
+	
+	
+	
+	
+	9:
+Lend_regular_divide:
+	subcc	%o4, 1, %o4
+	bge	Ldivloop
+	tst	%o3
+	bl,a	Lgot_result
+	! non-restoring fixup here (one instruction only!)
+	sub	%o2, 1, %o2
+
+
+Lgot_result:
+
+	retl
+	mov %o2, %o0
--- a/sysdeps/sparc/udiv_qrnnd.S
+++ b/sysdeps/sparc/udiv_qrnnd.S
@@ -0,0 +1,143 @@
+! SPARC  __udiv_qrnnd division support, used from longlong.h.
+
+! Copyright (C) 1993, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+! INPUT PARAMETERS
+! rem_ptr	i0
+! n1		i1
+! n0		i2
+! d		i3
+
+#include "sysdep.h"
+#undef ret	/* Kludge for glibc */
+
+	.text
+	.align	8
+LC0:	.double	0r4294967296
+LC1:	.double	0r2147483648
+
+	.align	4
+	.global	C_SYMBOL_NAME(__udiv_qrnnd)
+C_SYMBOL_NAME(__udiv_qrnnd):
+	!#PROLOGUE# 0
+	save	%sp,-104,%sp
+	!#PROLOGUE# 1
+	st	%i1,[%fp-8]
+	ld	[%fp-8],%f10
+	sethi	%hi(LC0),%o7
+	fitod	%f10,%f4
+	ldd	[%o7+%lo(LC0)],%f8
+	cmp	%i1,0
+	bge	L248
+	mov	%i0,%i5
+	faddd	%f4,%f8,%f4
+L248:
+	st	%i2,[%fp-8]
+	ld	[%fp-8],%f10
+	fmuld	%f4,%f8,%f6
+	cmp	%i2,0
+	bge	L249
+	fitod	%f10,%f2
+	faddd	%f2,%f8,%f2
+L249:
+	st	%i3,[%fp-8]
+	faddd	%f6,%f2,%f2
+	ld	[%fp-8],%f10
+	cmp	%i3,0
+	bge	L250
+	fitod	%f10,%f4
+	faddd	%f4,%f8,%f4
+L250:
+	fdivd	%f2,%f4,%f2
+	sethi	%hi(LC1),%o7
+	ldd	[%o7+%lo(LC1)],%f4
+	fcmped	%f2,%f4
+	nop
+	fbge,a	L251
+	fsubd	%f2,%f4,%f2
+	fdtoi	%f2,%f2
+	st	%f2,[%fp-8]
+	b	L252
+	ld	[%fp-8],%i4
+L251:
+	fdtoi	%f2,%f2
+	st	%f2,[%fp-8]
+	ld	[%fp-8],%i4
+	sethi	%hi(-2147483648),%g2
+	xor	%i4,%g2,%i4
+L252:
+	wr	%g0,%i4,%y
+	sra	%i3,31,%g2
+	and	%i4,%g2,%g2
+	andcc	%g0,0,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,0,%g1
+	add	%g1,%g2,%i0
+	rd	%y,%g3
+	subcc	%i2,%g3,%o7
+	subxcc	%i1,%i0,%g0
+	be	L253
+	cmp	%o7,%i3
+
+	add	%i4,-1,%i0
+	add	%o7,%i3,%o7
+	st	%o7,[%i5]
+	ret
+	restore
+L253:
+	blu	L246
+	mov	%i4,%i0
+	add	%i4,1,%i0
+	sub	%o7,%i3,%o7
+L246:
+	st	%o7,[%i5]
+	ret
+	restore
--- a/sysdeps/sparc/umul.S
+++ b/sysdeps/sparc/umul.S
@@ -0,0 +1,153 @@
+/*
+ * Unsigned multiply.  Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the
+ * upper 32 bits of the 64-bit product).
+ *
+ * This code optimizes short (less than 13-bit) multiplies.  Short
+ * multiplies require 25 instruction cycles, and long ones require
+ * 45 instruction cycles.
+ *
+ * On return, overflow has occurred (%o1 is not zero) if and only if
+ * the Z condition code is clear, allowing, e.g., the following:
+ *
+ *	call	.umul
+ *	nop
+ *	bnz	overflow	(or tnz)
+ */
+
+#include "DEFS.h"
+FUNC(.umul)
+	or	%o0, %o1, %o4
+	mov	%o0, %y		! multiplier -> Y
+	andncc	%o4, 0xfff, %g0	! test bits 12..31 of *both* args
+	be	Lmul_shortway	! if zero, can do it the short way
+	andcc	%g0, %g0, %o4	! zero the partial product and clear N and V
+
+	/*
+	 * Long multiply.  32 steps, followed by a final shift step.
+	 */
+	mulscc	%o4, %o1, %o4	! 1
+	mulscc	%o4, %o1, %o4	! 2
+	mulscc	%o4, %o1, %o4	! 3
+	mulscc	%o4, %o1, %o4	! 4
+	mulscc	%o4, %o1, %o4	! 5
+	mulscc	%o4, %o1, %o4	! 6
+	mulscc	%o4, %o1, %o4	! 7
+	mulscc	%o4, %o1, %o4	! 8
+	mulscc	%o4, %o1, %o4	! 9
+	mulscc	%o4, %o1, %o4	! 10
+	mulscc	%o4, %o1, %o4	! 11
+	mulscc	%o4, %o1, %o4	! 12
+	mulscc	%o4, %o1, %o4	! 13
+	mulscc	%o4, %o1, %o4	! 14
+	mulscc	%o4, %o1, %o4	! 15
+	mulscc	%o4, %o1, %o4	! 16
+	mulscc	%o4, %o1, %o4	! 17
+	mulscc	%o4, %o1, %o4	! 18
+	mulscc	%o4, %o1, %o4	! 19
+	mulscc	%o4, %o1, %o4	! 20
+	mulscc	%o4, %o1, %o4	! 21
+	mulscc	%o4, %o1, %o4	! 22
+	mulscc	%o4, %o1, %o4	! 23
+	mulscc	%o4, %o1, %o4	! 24
+	mulscc	%o4, %o1, %o4	! 25
+	mulscc	%o4, %o1, %o4	! 26
+	mulscc	%o4, %o1, %o4	! 27
+	mulscc	%o4, %o1, %o4	! 28
+	mulscc	%o4, %o1, %o4	! 29
+	mulscc	%o4, %o1, %o4	! 30
+	mulscc	%o4, %o1, %o4	! 31
+	mulscc	%o4, %o1, %o4	! 32
+	mulscc	%o4, %g0, %o4	! final shift
+
+
+	/*
+	 * Normally, with the shift-and-add approach, if both numbers are
+	 * positive you get the correct result.  With 32-bit two's-complement
+	 * numbers, -x is represented as
+	 *
+	 *		  x		    32
+	 *	( 2  -  ------ ) mod 2  *  2
+	 *		   32
+	 *		  2
+	 *
+	 * (the `mod 2' subtracts 1 from 1.bbbb).  To avoid lots of 2^32s,
+	 * we can treat this as if the radix point were just to the left
+	 * of the sign bit (multiply by 2^32), and get
+	 *
+	 *	-x  =  (2 - x) mod 2
+	 *
+	 * Then, ignoring the `mod 2's for convenience:
+	 *
+	 *   x *  y	= xy
+	 *  -x *  y	= 2y - xy
+	 *   x * -y	= 2x - xy
+	 *  -x * -y	= 4 - 2x - 2y + xy
+	 *
+	 * For signed multiplies, we subtract (x << 32) from the partial
+	 * product to fix this problem for negative multipliers (see mul.s).
+	 * Because of the way the shift into the partial product is calculated
+	 * (N xor V), this term is automatically removed for the multiplicand,
+	 * so we don't have to adjust.
+	 *
+	 * But for unsigned multiplies, the high order bit wasn't a sign bit,
+	 * and the correction is wrong.  So for unsigned multiplies where the
+	 * high order bit is one, we end up with xy - (y << 32).  To fix it
+	 * we add y << 32.
+	 */
+#if 0
+	tst	%o1
+	bl,a	1f		! if %o1 < 0 (high order bit = 1),
+	add	%o4, %o0, %o4	! %o4 += %o0 (add y to upper half)
+1:	rd	%y, %o0		! get lower half of product
+	retl
+	addcc	%o4, %g0, %o1	! put upper half in place and set Z for %o1==0
+#else
+	/* Faster code from tege@sics.se.  */
+	sra	%o1, 31, %o2	! make mask from sign bit
+	and	%o0, %o2, %o2	! %o2 = 0 or %o0, depending on sign of %o1
+	rd	%y, %o0		! get lower half of product
+	retl
+	addcc	%o4, %o2, %o1	! add compensation and put upper half in place
+#endif
+
+Lmul_shortway:
+	/*
+	 * Short multiply.  12 steps, followed by a final shift step.
+	 * The resulting bits are off by 12 and (32-12) = 20 bit positions,
+	 * but there is no problem with %o0 being negative (unlike above),
+	 * and overflow is impossible (the answer is at most 24 bits long).
+	 */
+	mulscc	%o4, %o1, %o4	! 1
+	mulscc	%o4, %o1, %o4	! 2
+	mulscc	%o4, %o1, %o4	! 3
+	mulscc	%o4, %o1, %o4	! 4
+	mulscc	%o4, %o1, %o4	! 5
+	mulscc	%o4, %o1, %o4	! 6
+	mulscc	%o4, %o1, %o4	! 7
+	mulscc	%o4, %o1, %o4	! 8
+	mulscc	%o4, %o1, %o4	! 9
+	mulscc	%o4, %o1, %o4	! 10
+	mulscc	%o4, %o1, %o4	! 11
+	mulscc	%o4, %o1, %o4	! 12
+	mulscc	%o4, %g0, %o4	! final shift
+
+	/*
+	 * %o4 has 20 of the bits that should be in the result; %y has
+	 * the bottom 12 (as %y's top 12).  That is:
+	 *
+	 *	  %o4		    %y
+	 * +----------------+----------------+
+	 * | -12- |   -20-  | -12- |   -20-  |
+	 * +------(---------+------)---------+
+	 *	   -----result-----
+	 *
+	 * The 12 bits of %o4 left of the `result' area are all zero;
+	 * in fact, all top 20 bits of %o4 are zero.
+	 */
+
+	rd	%y, %o5
+	sll	%o4, 12, %o0	! shift middle bits left 12
+	srl	%o5, 20, %o5	! shift low bits right 20
+	or	%o5, %o0, %o0
+	retl
+	addcc	%g0, %g0, %o1	! %o1 = zero, and set Z
--- a/sysdeps/sparc/urem.S
+++ b/sysdeps/sparc/urem.S
@@ -0,0 +1,348 @@
+   /* This file is generated from divrem.m4; DO NOT EDIT! */
+/*
+ * Division and remainder, from Appendix E of the Sparc Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ *  .urem	name of function to generate
+ *  rem		rem=div => %o0 / %o1; rem=rem => %o0 % %o1
+ *  false		false=true => signed; false=false => unsigned
+ *
+ * Algorithm parameters:
+ *  N		how many bits per iteration we try to get (4)
+ *  WORDSIZE	total number of bits (32)
+ *
+ * Derived constants:
+ *  TOPBITS	number of bits in the top decade of a number
+ *
+ * Important variables:
+ *  Q		the partial quotient under development (initially 0)
+ *  R		the remainder so far, initially the dividend
+ *  ITER	number of main division loop iterations required;
+ *		equal to ceil(log2(quotient) / N).  Note that this
+ *		is the log base (2^N) of the quotient.
+ *  V		the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ *  Current estimate for non-large dividend is
+ *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ *  different path, as the upper bits of the quotient must be developed
+ *  one bit at a time.
+ */
+
+
+
+#include "DEFS.h"
+#ifdef __svr4__
+#include <sys/trap.h>
+#else
+#include <machine/trap.h>
+#endif
+
+FUNC(.urem)
+
+	! Ready to divide.  Compute size of quotient; scale comparand.
+	orcc	%o1, %g0, %o5
+	bne	1f
+	mov	%o0, %o3
+
+		! Divide by zero trap.  If it returns, return 0 (about as
+		! wrong as possible, but that is what SunOS does...).
+		ta	ST_DIV0
+		retl
+		clr	%o0
+
+1:
+	cmp	%o3, %o5			! if %o1 exceeds %o0, done
+	blu	Lgot_result		! (and algorithm fails otherwise)
+	clr	%o2
+	sethi	%hi(1 << (32 - 4 - 1)), %g1
+	cmp	%o3, %g1
+	blu	Lnot_really_big
+	clr	%o4
+
+	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
+	! as our usual N-at-a-shot divide step will cause overflow and havoc.
+	! The number of bits in the result here is N*ITER+SC, where SC <= N.
+	! Compute ITER in an unorthodox manner: know we need to shift V into
+	! the top decade: so do not even bother to compare to R.
+	1:
+		cmp	%o5, %g1
+		bgeu	3f
+		mov	1, %g7
+		sll	%o5, 4, %o5
+		b	1b
+		add	%o4, 1, %o4
+
+	! Now compute %g7.
+	2:	addcc	%o5, %o5, %o5
+		bcc	Lnot_too_big
+		add	%g7, 1, %g7
+
+		! We get here if the %o1 overflowed while shifting.
+		! This means that %o3 has the high-order bit set.
+		! Restore %o5 and subtract from %o3.
+		sll	%g1, 4, %g1	! high order bit
+		srl	%o5, 1, %o5		! rest of %o5
+		add	%o5, %g1, %o5
+		b	Ldo_single_div
+		sub	%g7, 1, %g7
+
+	Lnot_too_big:
+	3:	cmp	%o5, %o3
+		blu	2b
+		nop
+		be	Ldo_single_div
+		nop
+	/* NB: these are commented out in the V8-Sparc manual as well */
+	/* (I do not understand this) */
+	! %o5 > %o3: went too far: back up 1 step
+	!	srl	%o5, 1, %o5
+	!	dec	%g7
+	! do single-bit divide steps
+	!
+	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
+	! first divide step without thinking.  BUT, the others are conditional,
+	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
+	! order bit set in the first step, just falling into the regular
+	! division loop will mess up the first time around.
+	! So we unroll slightly...
+	Ldo_single_div:
+		subcc	%g7, 1, %g7
+		bl	Lend_regular_divide
+		nop
+		sub	%o3, %o5, %o3
+		mov	1, %o2
+		b	Lend_single_divloop
+		nop
+	Lsingle_divloop:
+		sll	%o2, 1, %o2
+		bl	1f
+		srl	%o5, 1, %o5
+		! %o3 >= 0
+		sub	%o3, %o5, %o3
+		b	2f
+		add	%o2, 1, %o2
+	1:	! %o3 < 0
+		add	%o3, %o5, %o3
+		sub	%o2, 1, %o2
+	2:
+	Lend_single_divloop:
+		subcc	%g7, 1, %g7
+		bge	Lsingle_divloop
+		tst	%o3
+		b,a	Lend_regular_divide
+
+Lnot_really_big:
+1:
+	sll	%o5, 4, %o5
+	cmp	%o5, %o3
+	bleu	1b
+	addcc	%o4, 1, %o4
+	be	Lgot_result
+	sub	%o4, 1, %o4
+
+	tst	%o3	! set up for initial iteration
+Ldivloop:
+	sll	%o2, 4, %o2
+		! depth 1, accumulated bits 0
+	bl	L.1.16
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 2, accumulated bits 1
+	bl	L.2.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 3, accumulated bits 3
+	bl	L.3.19
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 7
+	bl	L.4.23
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (7*2+1), %o2
+	
+L.4.23:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (7*2-1), %o2
+	
+	
+L.3.19:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 5
+	bl	L.4.21
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (5*2+1), %o2
+	
+L.4.21:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (5*2-1), %o2
+	
+	
+	
+L.2.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 3, accumulated bits 1
+	bl	L.3.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 3
+	bl	L.4.19
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (3*2+1), %o2
+	
+L.4.19:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (3*2-1), %o2
+	
+	
+L.3.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 1
+	bl	L.4.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (1*2+1), %o2
+	
+L.4.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (1*2-1), %o2
+	
+	
+	
+	
+L.1.16:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 2, accumulated bits -1
+	bl	L.2.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 3, accumulated bits -1
+	bl	L.3.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -1
+	bl	L.4.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-1*2+1), %o2
+	
+L.4.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-1*2-1), %o2
+	
+	
+L.3.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -3
+	bl	L.4.13
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-3*2+1), %o2
+	
+L.4.13:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-3*2-1), %o2
+	
+	
+	
+L.2.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 3, accumulated bits -3
+	bl	L.3.13
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -5
+	bl	L.4.11
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-5*2+1), %o2
+	
+L.4.11:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-5*2-1), %o2
+	
+	
+L.3.13:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -7
+	bl	L.4.9
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-7*2+1), %o2
+	
+L.4.9:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-7*2-1), %o2
+	
+	
+	
+	
+	9:
+Lend_regular_divide:
+	subcc	%o4, 1, %o4
+	bge	Ldivloop
+	tst	%o3
+	bl,a	Lgot_result
+	! non-restoring fixup here (one instruction only!)
+	add	%o3, %o1, %o3
+
+
+Lgot_result:
+
+	retl
+	mov %o3, %o0