1
0
mirror of https://sourceware.org/git/glibc.git synced 2025-08-08 17:42:12 +03:00

initial import

This commit is contained in:
Roland McGrath
1995-02-18 01:27:10 +00:00
commit 28f540f45b
2263 changed files with 218361 additions and 0 deletions

4
sysdeps/sparc/DEFS.h Normal file
View File

@@ -0,0 +1,4 @@
#define FUNC(name) \
.global name; \
.align 4; \
name:

4
sysdeps/sparc/Dist Normal file
View File

@@ -0,0 +1,4 @@
DEFS.h
mul.S umul.S
divrem.m4 sdiv.S udiv.S rem.S urem.S
alloca.S

2
sysdeps/sparc/Implies Normal file
View File

@@ -0,0 +1,2 @@
# SPARC uses IEEE 754 floating point.
ieee754

57
sysdeps/sparc/Makefile Normal file
View File

@@ -0,0 +1,57 @@
# Copyright (C) 1991, 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
# This file is part of the GNU C Library.
# The GNU C Library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Library General Public License
# as published by the Free Software Foundation; either version 2 of
# the License, or (at your option) any later version.
# The GNU C Library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Library General Public License for more details.
# You should have received a copy of the GNU Library General Public
# License along with the GNU C Library; see the file COPYING.LIB. If
# not, write to the Free Software Foundation, Inc., 675 Mass Ave,
# Cambridge, MA 02139, USA.
ifeq ($(subdir),gnulib)
routines = mul umul $(divrem) alloca
endif # gnulib
# We distribute these files, even though they are generated,
# so as to avoid the need for a functioning m4 to build the library.
divrem := sdiv udiv rem urem
+divrem-NAME-sdiv := div
+divrem-NAME-udiv := udiv
+divrem-NAME-rem := rem
+divrem-NAME-urem := urem
+divrem-NAME = $(+divrem-NAME-$(basename $(notdir $@)))
+divrem-OP-div := div
+divrem-OP-udiv := div
+divrem-OP-rem := rem
+divrem-OP-urem := rem
+divrem-S-div := true
+divrem-S-rem := true
+divrem-S-udiv := false
+divrem-S-urem := false
$(divrem:%=$(sysdep_dir)/sparc/%.S): $(sysdep_dir)/sparc/divrem.m4
(echo "define(NAME,\`.$(+divrem-NAME)')\
define(OP,\`$(+divrem-OP-$(+divrem-NAME))')\
define(S,\`$(+divrem-S-$(+divrem-NAME))')\
/* This file is generated from divrem.m4; DO NOT EDIT! */"; \
cat $<) | $(M4) > $@-tmp
# Make it unwritable so noone will edit it by mistake.
-chmod a-w $@-tmp
mv -f $@-tmp $@
test -d CVS && cvs commit -m'Regenerated from $<' $@
sysdep-realclean := $(sysdep-realclean) $(divrem:%=sysdeps/sparc/%.S)
ifeq ($(subdir),crypt)
crypt := crypt.sparc # Use crypt/crypt.sparc.S.
endif # crypt

47
sysdeps/sparc/__longjmp.S Normal file
View File

@@ -0,0 +1,47 @@
/* Copyright (C) 1991, 1993 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
Cambridge, MA 02139, USA. */
#include <sysdep.h>
#ifdef __svr4__
#include <sys/trap.h>
#else
#include <machine/trap.h>
#endif
/* NOTE: This code depends on the definition of `__jmp_buf' in <jmp_buf.h>. */
ENTRY (__longjmp)
/* Do a "flush register windows trap". The trap handler in the
kernel writes all the register windows to their stack slots, and
marks them all as invalid (needing to be sucked up from the
stack when used). This ensures that all information needed to
unwind to these callers is in memory, not in the register
windows. */
ta ST_FLUSH_WINDOWS
ld [%o0], %o7 /* Return PC. */
ld [%o0 + 4], %fp /* Saved SP. */
sub %fp, 64, %sp /* Allocate a register save area. */
/* if (%o1 == 0) %o1 = 1; */
tst %o1
be,a Ldone
mov 1, %o1
Ldone: retl
/* On the way out, put the return value in %o0. */
restore %o1, 0, %o0

134
sysdeps/sparc/add_n.S Normal file
View File

@@ -0,0 +1,134 @@
! sparc __mpn_add_n -- Add two limb vectors of the same length > 0 and store
! sum in a third limb vector.
! Copyright (C) 1992, 1994 Free Software Foundation, Inc.
! This file is part of the GNU MP Library.
! The GNU MP Library is free software; you can redistribute it and/or modify
! it under the terms of the GNU Library General Public License as published by
! the Free Software Foundation; either version 2 of the License, or (at your
! option) any later version.
! The GNU MP Library is distributed in the hope that it will be useful, but
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
! License for more details.
! You should have received a copy of the GNU Library General Public License
! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
! INPUT PARAMETERS
! res_ptr %o0
! s1_ptr %o1
! s2_ptr %o2
! size %o3
#include "sysdep.h"
.text
.align 4
.global C_SYMBOL_NAME(__mpn_add_n)
C_SYMBOL_NAME(__mpn_add_n):
ld [%o1+0],%o4 ! read first limb from s1_ptr
srl %o3,4,%g1
ld [%o2+0],%o5 ! read first limb from s2_ptr
sub %g0,%o3,%o3
andcc %o3,(16-1),%o3
be Lzero
nop
sll %o3,2,%o3 ! multiply by 4
sub %o0,%o3,%o0 ! adjust res_ptr
sub %o1,%o3,%o1 ! adjust s1_ptr
sub %o2,%o3,%o2 ! adjust s2_ptr
mov %o4,%g2
sethi %hi(Lbase),%g3
or %g3,%lo(Lbase),%g3
sll %o3,2,%o3 ! multiply by 4
jmp %g3+%o3
mov %o5,%g3
Loop: addxcc %g2,%g3,%o3
add %o1,64,%o1
st %o3,[%o0+60]
add %o2,64,%o2
ld [%o1+0],%o4
add %o0,64,%o0
ld [%o2+0],%o5
Lzero: sub %g1,1,%g1 ! add 0 + 16r limbs (adjust loop counter)
Lbase: ld [%o1+4],%g2
addxcc %o4,%o5,%o3
ld [%o2+4],%g3
st %o3,[%o0+0]
ld [%o1+8],%o4 ! add 15 + 16r limbs
addxcc %g2,%g3,%o3
ld [%o2+8],%o5
st %o3,[%o0+4]
ld [%o1+12],%g2 ! add 14 + 16r limbs
addxcc %o4,%o5,%o3
ld [%o2+12],%g3
st %o3,[%o0+8]
ld [%o1+16],%o4 ! add 13 + 16r limbs
addxcc %g2,%g3,%o3
ld [%o2+16],%o5
st %o3,[%o0+12]
ld [%o1+20],%g2 ! add 12 + 16r limbs
addxcc %o4,%o5,%o3
ld [%o2+20],%g3
st %o3,[%o0+16]
ld [%o1+24],%o4 ! add 11 + 16r limbs
addxcc %g2,%g3,%o3
ld [%o2+24],%o5
st %o3,[%o0+20]
ld [%o1+28],%g2 ! add 10 + 16r limbs
addxcc %o4,%o5,%o3
ld [%o2+28],%g3
st %o3,[%o0+24]
ld [%o1+32],%o4 ! add 9 + 16r limbs
addxcc %g2,%g3,%o3
ld [%o2+32],%o5
st %o3,[%o0+28]
ld [%o1+36],%g2 ! add 8 + 16r limbs
addxcc %o4,%o5,%o3
ld [%o2+36],%g3
st %o3,[%o0+32]
ld [%o1+40],%o4 ! add 7 + 16r limbs
addxcc %g2,%g3,%o3
ld [%o2+40],%o5
st %o3,[%o0+36]
ld [%o1+44],%g2 ! add 6 + 16r limbs
addxcc %o4,%o5,%o3
ld [%o2+44],%g3
st %o3,[%o0+40]
ld [%o1+48],%o4 ! add 5 + 16r limbs
addxcc %g2,%g3,%o3
ld [%o2+48],%o5
st %o3,[%o0+44]
ld [%o1+52],%g2 ! add 4 + 16r limbs
addxcc %o4,%o5,%o3
ld [%o2+52],%g3
st %o3,[%o0+48]
ld [%o1+56],%o4 ! add 3 + 16r limbs
addxcc %g2,%g3,%o3
ld [%o2+56],%o5
st %o3,[%o0+52]
ld [%o1+60],%g2 ! add 2 + 16r limbs
addxcc %o4,%o5,%o3
ld [%o2+60],%g3
st %o3,[%o0+56]
addx %g0,%g0,%o4
tst %g1
bne Loop
subcc %g0,%o4,%g0 ! restore cy (delay slot)
addxcc %g2,%g3,%o3
st %o3,[%o0+60] ! store most significant limb
retl
addx %g0,%g0,%o0 ! return carry-out from most sign. limb

146
sysdeps/sparc/addmul_1.S Normal file
View File

@@ -0,0 +1,146 @@
! SPARC __mpn_addmul_1 -- Multiply a limb vector with a limb and add
! the result to a second limb vector.
! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
! This file is part of the GNU MP Library.
! The GNU MP Library is free software; you can redistribute it and/or modify
! it under the terms of the GNU Library General Public License as published by
! the Free Software Foundation; either version 2 of the License, or (at your
! option) any later version.
! The GNU MP Library is distributed in the hope that it will be useful, but
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
! License for more details.
! You should have received a copy of the GNU Library General Public License
! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
! INPUT PARAMETERS
! res_ptr o0
! s1_ptr o1
! size o2
! s2_limb o3
#include "sysdep.h"
.text
.align 4
.global C_SYMBOL_NAME(__mpn_addmul_1)
C_SYMBOL_NAME(__mpn_addmul_1):
! Make S1_PTR and RES_PTR point at the end of their blocks
! and put (- 4 x SIZE) in index/loop counter.
sll %o2,2,%o2
add %o0,%o2,%o4 ! RES_PTR in o4 since o0 is retval
add %o1,%o2,%o1
sub %g0,%o2,%o2
cmp %o3,0xfff
bgu Large
nop
ld [%o1+%o2],%o5
mov 0,%o0
b L0
add %o4,-4,%o4
Loop0:
addcc %o5,%g1,%g1
ld [%o1+%o2],%o5
addx %o0,%g0,%o0
st %g1,[%o4+%o2]
L0: wr %g0,%o3,%y
sra %o5,31,%g2
and %o3,%g2,%g2
andcc %g1,0,%g1
mulscc %g1,%o5,%g1
mulscc %g1,%o5,%g1
mulscc %g1,%o5,%g1
mulscc %g1,%o5,%g1
mulscc %g1,%o5,%g1
mulscc %g1,%o5,%g1
mulscc %g1,%o5,%g1
mulscc %g1,%o5,%g1
mulscc %g1,%o5,%g1
mulscc %g1,%o5,%g1
mulscc %g1,%o5,%g1
mulscc %g1,%o5,%g1
mulscc %g1,0,%g1
sra %g1,20,%g4
sll %g1,12,%g1
rd %y,%g3
srl %g3,20,%g3
or %g1,%g3,%g1
addcc %g1,%o0,%g1
addx %g2,%g4,%o0 ! add sign-compensation and cy to hi limb
addcc %o2,4,%o2 ! loop counter
bne Loop0
ld [%o4+%o2],%o5
addcc %o5,%g1,%g1
addx %o0,%g0,%o0
retl
st %g1,[%o4+%o2]
Large: ld [%o1+%o2],%o5
mov 0,%o0
sra %o3,31,%g4 ! g4 = mask of ones iff S2_LIMB < 0
b L1
add %o4,-4,%o4
Loop:
addcc %o5,%g3,%g3
ld [%o1+%o2],%o5
addx %o0,%g0,%o0
st %g3,[%o4+%o2]
L1: wr %g0,%o5,%y
and %o5,%g4,%g2
andcc %g0,%g0,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%g0,%g1
rd %y,%g3
addcc %g3,%o0,%g3
addx %g2,%g1,%o0
addcc %o2,4,%o2
bne Loop
ld [%o4+%o2],%o5
addcc %o5,%g3,%g3
addx %o0,%g0,%o0
retl
st %g3,[%o4+%o2]

32
sysdeps/sparc/alloca.S Normal file
View File

@@ -0,0 +1,32 @@
/* Copyright (C) 1994 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
Cambridge, MA 02139, USA. */
#include "DEFS.h"
/* Code produced by Sun's C compiler calls this function with two extra
arguments which it makes relocatable symbols but seem always to be
the constant 96; I have no idea what they are for. */
#ifndef NO_UNDERSCORES
#define __builtin_alloca ___builtin_alloca
#endif
FUNC (__builtin_alloca)
sub %sp, %o0, %sp /* Push some stack space. */
retl /* Return; the returned buffer leaves 96 */
add %sp, 96, %o0 /* bytes of register save area at the top. */

View File

@@ -0,0 +1,26 @@
/* BSD `_setjmp' entry point to `sigsetjmp (..., 0)'. Sparc version.
Copyright (C) 1994 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
Cambridge, MA 02139, USA. */
#include <sysdep.h>
ENTRY (setjmp)
sethi %hi(C_SYMBOL_NAME (__sigsetjmp)), %g1
or %lo(C_SYMBOL_NAME (__sigsetjmp)), %g1, %g1
jmp %g1
mov %g0, %o1 /* Pass second argument of zero. */

View File

@@ -0,0 +1,26 @@
/* BSD `setjmp' entry point to `sigsetjmp (..., 1)'. Sparc version.
Copyright (C) 1994 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
Cambridge, MA 02139, USA. */
#include <sysdep.h>
ENTRY (setjmp)
sethi %hi(C_SYMBOL_NAME (__sigsetjmp)), %g1
or %lo(C_SYMBOL_NAME (__sigsetjmp)), %g1, %g1
jmp %g1
mov 1, %o1 /* Pass second argument of one. */

3
sysdeps/sparc/bytesex.h Normal file
View File

@@ -0,0 +1,3 @@
/* SPARC is big-endian. */
#define __BYTE_ORDER __BIG_ENDIAN

234
sysdeps/sparc/divrem.m4 Normal file
View File

@@ -0,0 +1,234 @@
/*
* Division and remainder, from Appendix E of the Sparc Version 8
* Architecture Manual, with fixes from Gordon Irlam.
*/
/*
* Input: dividend and divisor in %o0 and %o1 respectively.
*
* m4 parameters:
* NAME name of function to generate
* OP OP=div => %o0 / %o1; OP=rem => %o0 % %o1
* S S=true => signed; S=false => unsigned
*
* Algorithm parameters:
* N how many bits per iteration we try to get (4)
* WORDSIZE total number of bits (32)
*
* Derived constants:
* TOPBITS number of bits in the top `decade' of a number
*
* Important variables:
* Q the partial quotient under development (initially 0)
* R the remainder so far, initially the dividend
* ITER number of main division loop iterations required;
* equal to ceil(log2(quotient) / N). Note that this
* is the log base (2^N) of the quotient.
* V the current comparand, initially divisor*2^(ITER*N-1)
*
* Cost:
* Current estimate for non-large dividend is
* ceil(log2(quotient) / N) * (10 + 7N/2) + C
* A large dividend is one greater than 2^(31-TOPBITS) and takes a
* different path, as the upper bits of the quotient must be developed
* one bit at a time.
*/
define(N, `4')dnl
define(WORDSIZE, `32')dnl
define(TOPBITS, eval(WORDSIZE - N*((WORDSIZE-1)/N)))dnl
dnl
define(dividend, `%o0')dnl
define(divisor, `%o1')dnl
define(Q, `%o2')dnl
define(R, `%o3')dnl
define(ITER, `%o4')dnl
define(V, `%o5')dnl
dnl
dnl m4 reminder: ifelse(a,b,c,d) => if a is b, then c, else d
define(T, `%g1')dnl
define(SC, `%g7')dnl
ifelse(S, `true', `define(SIGN, `%g6')')dnl
dnl
dnl This is the recursive definition for developing quotient digits.
dnl
dnl Parameters:
dnl $1 the current depth, 1 <= $1 <= N
dnl $2 the current accumulation of quotient bits
dnl N max depth
dnl
dnl We add a new bit to $2 and either recurse or insert the bits in
dnl the quotient. R, Q, and V are inputs and outputs as defined above;
dnl the condition codes are expected to reflect the input R, and are
dnl modified to reflect the output R.
dnl
define(DEVELOP_QUOTIENT_BITS,
` ! depth $1, accumulated bits $2
bl L.$1.eval(2**N+$2)
srl V,1,V
! remainder is positive
subcc R,V,R
ifelse($1, N,
` b 9f
add Q, ($2*2+1), Q
', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2+1)')')
L.$1.eval(2**N+$2):
! remainder is negative
addcc R,V,R
ifelse($1, N,
` b 9f
add Q, ($2*2-1), Q
', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2-1)')')
ifelse($1, 1, `9:')')dnl
#include "DEFS.h"
#ifdef __svr4__
#include <sys/trap.h>
#else
#include <machine/trap.h>
#endif
FUNC(NAME)
ifelse(S, `true',
` ! compute sign of result; if neither is negative, no problem
orcc divisor, dividend, %g0 ! either negative?
bge 2f ! no, go do the divide
ifelse(OP, `div',
` xor divisor, dividend, SIGN ! compute sign in any case',
` mov dividend, SIGN ! sign of remainder matches dividend')
tst divisor
bge 1f
tst dividend
! divisor is definitely negative; dividend might also be negative
bge 2f ! if dividend not negative...
sub %g0, divisor, divisor ! in any case, make divisor nonneg
1: ! dividend is negative, divisor is nonnegative
sub %g0, dividend, dividend ! make dividend nonnegative
2:
')
! Ready to divide. Compute size of quotient; scale comparand.
orcc divisor, %g0, V
bne 1f
mov dividend, R
! Divide by zero trap. If it returns, return 0 (about as
! wrong as possible, but that is what SunOS does...).
ta ST_DIV0
retl
clr %o0
1:
cmp R, V ! if divisor exceeds dividend, done
blu Lgot_result ! (and algorithm fails otherwise)
clr Q
sethi %hi(1 << (WORDSIZE - TOPBITS - 1)), T
cmp R, T
blu Lnot_really_big
clr ITER
! `Here the dividend is >= 2**(31-N) or so. We must be careful here,
! as our usual N-at-a-shot divide step will cause overflow and havoc.
! The number of bits in the result here is N*ITER+SC, where SC <= N.
! Compute ITER in an unorthodox manner: know we need to shift V into
! the top decade: so do not even bother to compare to R.'
1:
cmp V, T
bgeu 3f
mov 1, SC
sll V, N, V
b 1b
add ITER, 1, ITER
! Now compute SC.
2: addcc V, V, V
bcc Lnot_too_big
add SC, 1, SC
! We get here if the divisor overflowed while shifting.
! This means that R has the high-order bit set.
! Restore V and subtract from R.
sll T, TOPBITS, T ! high order bit
srl V, 1, V ! rest of V
add V, T, V
b Ldo_single_div
sub SC, 1, SC
Lnot_too_big:
3: cmp V, R
blu 2b
nop
be Ldo_single_div
nop
/* NB: these are commented out in the V8-Sparc manual as well */
/* (I do not understand this) */
! V > R: went too far: back up 1 step
! srl V, 1, V
! dec SC
! do single-bit divide steps
!
! We have to be careful here. We know that R >= V, so we can do the
! first divide step without thinking. BUT, the others are conditional,
! and are only done if R >= 0. Because both R and V may have the high-
! order bit set in the first step, just falling into the regular
! division loop will mess up the first time around.
! So we unroll slightly...
Ldo_single_div:
subcc SC, 1, SC
bl Lend_regular_divide
nop
sub R, V, R
mov 1, Q
b Lend_single_divloop
nop
Lsingle_divloop:
sll Q, 1, Q
bl 1f
srl V, 1, V
! R >= 0
sub R, V, R
b 2f
add Q, 1, Q
1: ! R < 0
add R, V, R
sub Q, 1, Q
2:
Lend_single_divloop:
subcc SC, 1, SC
bge Lsingle_divloop
tst R
b,a Lend_regular_divide
Lnot_really_big:
1:
sll V, N, V
cmp V, R
bleu 1b
addcc ITER, 1, ITER
be Lgot_result
sub ITER, 1, ITER
tst R ! set up for initial iteration
Ldivloop:
sll Q, N, Q
DEVELOP_QUOTIENT_BITS(1, 0)
Lend_regular_divide:
subcc ITER, 1, ITER
bge Ldivloop
tst R
bl,a Lgot_result
! non-restoring fixup here (one instruction only!)
ifelse(OP, `div',
` sub Q, 1, Q
', ` add R, divisor, R
')
Lgot_result:
ifelse(S, `true',
` ! check to see if answer should be < 0
tst SIGN
bl,a 1f
ifelse(OP, `div', `sub %g0, Q, Q', `sub %g0, R, R')
1:')
retl
ifelse(OP, `div', `mov Q, %o0', `mov R, %o0')

14
sysdeps/sparc/jmp_buf.h Normal file
View File

@@ -0,0 +1,14 @@
/* Define the machine-dependent type `jmp_buf'. SPARC version. */
/* NOTE: The assembly code in __longjmp.S and setjmp.S knows the layout
of this structure. You must hack the assembly code if you want to change
the order of the members. */
typedef struct
{
/* Return PC (register o7). */
__ptr_t __pc;
/* Saved FP. */
__ptr_t __fp;
} __jmp_buf[1];

21
sysdeps/sparc/memcopy.h Normal file
View File

@@ -0,0 +1,21 @@
/* Copyright (C) 1991 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
Cambridge, MA 02139, USA. */
#include <sysdeps/generic/memcopy.h>
#undef reg_char
#define reg_char int

198
sysdeps/sparc/mul_1.S Normal file
View File

@@ -0,0 +1,198 @@
! SPARC __mpn_mul_1 -- Multiply a limb vector with a limb and store
! the result in a second limb vector.
! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
! This file is part of the GNU MP Library.
! The GNU MP Library is free software; you can redistribute it and/or modify
! it under the terms of the GNU Library General Public License as published by
! the Free Software Foundation; either version 2 of the License, or (at your
! option) any later version.
! The GNU MP Library is distributed in the hope that it will be useful, but
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
! License for more details.
! You should have received a copy of the GNU Library General Public License
! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
! INPUT PARAMETERS
! res_ptr o0
! s1_ptr o1
! size o2
! s2_limb o3
! ADD CODE FOR SMALL MULTIPLIERS!
!1: ld
! st
!
!2: ld ,a
! addxcc a,a,x
! st x,
!
!3_unrolled:
! ld ,a
! addxcc a,a,x1 ! 2a + cy
! addx %g0,%g0,x2
! addcc a,x1,x ! 3a + c
! st x,
!
! ld ,a
! addxcc a,a,y1
! addx %g0,%g0,y2
! addcc a,y1,x
! st x,
!
!4_unrolled:
! ld ,a
! srl a,2,x1 ! 4a
! addxcc y2,x1,x
! sll a,30,x2
! st x,
!
! ld ,a
! srl a,2,y1
! addxcc x2,y1,y
! sll a,30,y2
! st x,
!
!5_unrolled:
! ld ,a
! srl a,2,x1 ! 4a
! addxcc a,x1,x ! 5a + c
! sll a,30,x2
! addx %g0,x2,x2
! st x,
!
! ld ,a
! srl a,2,y1
! addxcc a,y1,x
! sll a,30,y2
! addx %g0,y2,y2
! st x,
!
!8_unrolled:
! ld ,a
! srl a,3,x1 ! 8a
! addxcc y2,x1,x
! sll a,29,x2
! st x,
!
! ld ,a
! srl a,3,y1
! addxcc x2,y1,y
! sll a,29,y2
! st x,
#include "sysdep.h"
.text
.align 4
.global C_SYMBOL_NAME(__mpn_mul_1)
C_SYMBOL_NAME(__mpn_mul_1):
! Make S1_PTR and RES_PTR point at the end of their blocks
! and put (- 4 x SIZE) in index/loop counter.
sll %o2,2,%o2
add %o0,%o2,%o4 ! RES_PTR in o4 since o0 is retval
add %o1,%o2,%o1
sub %g0,%o2,%o2
cmp %o3,0xfff
bgu Large
nop
ld [%o1+%o2],%o5
mov 0,%o0
b L0
add %o4,-4,%o4
Loop0:
st %g1,[%o4+%o2]
L0: wr %g0,%o3,%y
sra %o5,31,%g2
and %o3,%g2,%g2
andcc %g1,0,%g1
mulscc %g1,%o5,%g1
mulscc %g1,%o5,%g1
mulscc %g1,%o5,%g1
mulscc %g1,%o5,%g1
mulscc %g1,%o5,%g1
mulscc %g1,%o5,%g1
mulscc %g1,%o5,%g1
mulscc %g1,%o5,%g1
mulscc %g1,%o5,%g1
mulscc %g1,%o5,%g1
mulscc %g1,%o5,%g1
mulscc %g1,%o5,%g1
mulscc %g1,0,%g1
sra %g1,20,%g4
sll %g1,12,%g1
rd %y,%g3
srl %g3,20,%g3
or %g1,%g3,%g1
addcc %g1,%o0,%g1
addx %g2,%g4,%o0 ! add sign-compensation and cy to hi limb
addcc %o2,4,%o2 ! loop counter
bne,a Loop0
ld [%o1+%o2],%o5
retl
st %g1,[%o4+%o2]
Large: ld [%o1+%o2],%o5
mov 0,%o0
sra %o3,31,%g4 ! g4 = mask of ones iff S2_LIMB < 0
b L1
add %o4,-4,%o4
Loop:
st %g3,[%o4+%o2]
L1: wr %g0,%o5,%y
and %o5,%g4,%g2 ! g2 = S1_LIMB iff S2_LIMB < 0, else 0
andcc %g0,%g0,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%g0,%g1
rd %y,%g3
addcc %g3,%o0,%g3
addx %g2,%g1,%o0 ! add sign-compensation and cy to hi limb
addcc %o2,4,%o2 ! loop counter
bne,a Loop
ld [%o1+%o2],%o5
retl
st %g3,[%o4+%o2]

365
sysdeps/sparc/rem.S Normal file
View File

@@ -0,0 +1,365 @@
/* This file is generated from divrem.m4; DO NOT EDIT! */
/*
* Division and remainder, from Appendix E of the Sparc Version 8
* Architecture Manual, with fixes from Gordon Irlam.
*/
/*
* Input: dividend and divisor in %o0 and %o1 respectively.
*
* m4 parameters:
* .rem name of function to generate
* rem rem=div => %o0 / %o1; rem=rem => %o0 % %o1
* true true=true => signed; true=false => unsigned
*
* Algorithm parameters:
* N how many bits per iteration we try to get (4)
* WORDSIZE total number of bits (32)
*
* Derived constants:
* TOPBITS number of bits in the top decade of a number
*
* Important variables:
* Q the partial quotient under development (initially 0)
* R the remainder so far, initially the dividend
* ITER number of main division loop iterations required;
* equal to ceil(log2(quotient) / N). Note that this
* is the log base (2^N) of the quotient.
* V the current comparand, initially divisor*2^(ITER*N-1)
*
* Cost:
* Current estimate for non-large dividend is
* ceil(log2(quotient) / N) * (10 + 7N/2) + C
* A large dividend is one greater than 2^(31-TOPBITS) and takes a
* different path, as the upper bits of the quotient must be developed
* one bit at a time.
*/
#include "DEFS.h"
#ifdef __svr4__
#include <sys/trap.h>
#else
#include <machine/trap.h>
#endif
FUNC(.rem)
! compute sign of result; if neither is negative, no problem
orcc %o1, %o0, %g0 ! either negative?
bge 2f ! no, go do the divide
mov %o0, %g6 ! sign of remainder matches %o0
tst %o1
bge 1f
tst %o0
! %o1 is definitely negative; %o0 might also be negative
bge 2f ! if %o0 not negative...
sub %g0, %o1, %o1 ! in any case, make %o1 nonneg
1: ! %o0 is negative, %o1 is nonnegative
sub %g0, %o0, %o0 ! make %o0 nonnegative
2:
! Ready to divide. Compute size of quotient; scale comparand.
orcc %o1, %g0, %o5
bne 1f
mov %o0, %o3
! Divide by zero trap. If it returns, return 0 (about as
! wrong as possible, but that is what SunOS does...).
ta ST_DIV0
retl
clr %o0
1:
cmp %o3, %o5 ! if %o1 exceeds %o0, done
blu Lgot_result ! (and algorithm fails otherwise)
clr %o2
sethi %hi(1 << (32 - 4 - 1)), %g1
cmp %o3, %g1
blu Lnot_really_big
clr %o4
! Here the dividend is >= 2**(31-N) or so. We must be careful here,
! as our usual N-at-a-shot divide step will cause overflow and havoc.
! The number of bits in the result here is N*ITER+SC, where SC <= N.
! Compute ITER in an unorthodox manner: know we need to shift V into
! the top decade: so do not even bother to compare to R.
1:
cmp %o5, %g1
bgeu 3f
mov 1, %g7
sll %o5, 4, %o5
b 1b
add %o4, 1, %o4
! Now compute %g7.
2: addcc %o5, %o5, %o5
bcc Lnot_too_big
add %g7, 1, %g7
! We get here if the %o1 overflowed while shifting.
! This means that %o3 has the high-order bit set.
! Restore %o5 and subtract from %o3.
sll %g1, 4, %g1 ! high order bit
srl %o5, 1, %o5 ! rest of %o5
add %o5, %g1, %o5
b Ldo_single_div
sub %g7, 1, %g7
Lnot_too_big:
3: cmp %o5, %o3
blu 2b
nop
be Ldo_single_div
nop
/* NB: these are commented out in the V8-Sparc manual as well */
/* (I do not understand this) */
! %o5 > %o3: went too far: back up 1 step
! srl %o5, 1, %o5
! dec %g7
! do single-bit divide steps
!
! We have to be careful here. We know that %o3 >= %o5, so we can do the
! first divide step without thinking. BUT, the others are conditional,
! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
! order bit set in the first step, just falling into the regular
! division loop will mess up the first time around.
! So we unroll slightly...
Ldo_single_div:
subcc %g7, 1, %g7
bl Lend_regular_divide
nop
sub %o3, %o5, %o3
mov 1, %o2
b Lend_single_divloop
nop
Lsingle_divloop:
sll %o2, 1, %o2
bl 1f
srl %o5, 1, %o5
! %o3 >= 0
sub %o3, %o5, %o3
b 2f
add %o2, 1, %o2
1: ! %o3 < 0
add %o3, %o5, %o3
sub %o2, 1, %o2
2:
Lend_single_divloop:
subcc %g7, 1, %g7
bge Lsingle_divloop
tst %o3
b,a Lend_regular_divide
Lnot_really_big:
1:
sll %o5, 4, %o5
cmp %o5, %o3
bleu 1b
addcc %o4, 1, %o4
be Lgot_result
sub %o4, 1, %o4
tst %o3 ! set up for initial iteration
Ldivloop:
sll %o2, 4, %o2
! depth 1, accumulated bits 0
bl L.1.16
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 2, accumulated bits 1
bl L.2.17
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 3, accumulated bits 3
bl L.3.19
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits 7
bl L.4.23
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (7*2+1), %o2
L.4.23:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (7*2-1), %o2
L.3.19:
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits 5
bl L.4.21
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (5*2+1), %o2
L.4.21:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (5*2-1), %o2
L.2.17:
! remainder is negative
addcc %o3,%o5,%o3
! depth 3, accumulated bits 1
bl L.3.17
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits 3
bl L.4.19
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (3*2+1), %o2
L.4.19:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (3*2-1), %o2
L.3.17:
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits 1
bl L.4.17
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (1*2+1), %o2
L.4.17:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (1*2-1), %o2
L.1.16:
! remainder is negative
addcc %o3,%o5,%o3
! depth 2, accumulated bits -1
bl L.2.15
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 3, accumulated bits -1
bl L.3.15
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits -1
bl L.4.15
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-1*2+1), %o2
L.4.15:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-1*2-1), %o2
L.3.15:
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits -3
bl L.4.13
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-3*2+1), %o2
L.4.13:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-3*2-1), %o2
L.2.15:
! remainder is negative
addcc %o3,%o5,%o3
! depth 3, accumulated bits -3
bl L.3.13
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits -5
bl L.4.11
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-5*2+1), %o2
L.4.11:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-5*2-1), %o2
L.3.13:
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits -7
bl L.4.9
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-7*2+1), %o2
L.4.9:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-7*2-1), %o2
9:
Lend_regular_divide:
subcc %o4, 1, %o4
bge Ldivloop
tst %o3
bl,a Lgot_result
! non-restoring fixup here (one instruction only!)
add %o3, %o1, %o3
Lgot_result:
! check to see if answer should be < 0
tst %g6
bl,a 1f
sub %g0, %o3, %o3
1:
retl
mov %o3, %o0

365
sysdeps/sparc/sdiv.S Normal file
View File

@@ -0,0 +1,365 @@
/* This file is generated from divrem.m4; DO NOT EDIT! */
/*
* Division and remainder, from Appendix E of the Sparc Version 8
* Architecture Manual, with fixes from Gordon Irlam.
*/
/*
* Input: dividend and divisor in %o0 and %o1 respectively.
*
* m4 parameters:
* .div name of function to generate
* div div=div => %o0 / %o1; div=rem => %o0 % %o1
* true true=true => signed; true=false => unsigned
*
* Algorithm parameters:
* N how many bits per iteration we try to get (4)
* WORDSIZE total number of bits (32)
*
* Derived constants:
* TOPBITS number of bits in the top decade of a number
*
* Important variables:
* Q the partial quotient under development (initially 0)
* R the remainder so far, initially the dividend
* ITER number of main division loop iterations required;
* equal to ceil(log2(quotient) / N). Note that this
* is the log base (2^N) of the quotient.
* V the current comparand, initially divisor*2^(ITER*N-1)
*
* Cost:
* Current estimate for non-large dividend is
* ceil(log2(quotient) / N) * (10 + 7N/2) + C
* A large dividend is one greater than 2^(31-TOPBITS) and takes a
* different path, as the upper bits of the quotient must be developed
* one bit at a time.
*/
#include "DEFS.h"
#ifdef __svr4__
#include <sys/trap.h>
#else
#include <machine/trap.h>
#endif
FUNC(.div)
! compute sign of result; if neither is negative, no problem
orcc %o1, %o0, %g0 ! either negative?
bge 2f ! no, go do the divide
xor %o1, %o0, %g6 ! compute sign in any case
tst %o1
bge 1f
tst %o0
! %o1 is definitely negative; %o0 might also be negative
bge 2f ! if %o0 not negative...
sub %g0, %o1, %o1 ! in any case, make %o1 nonneg
1: ! %o0 is negative, %o1 is nonnegative
sub %g0, %o0, %o0 ! make %o0 nonnegative
2:
! Ready to divide. Compute size of quotient; scale comparand.
orcc %o1, %g0, %o5
bne 1f
mov %o0, %o3
! Divide by zero trap. If it returns, return 0 (about as
! wrong as possible, but that is what SunOS does...).
ta ST_DIV0
retl
clr %o0
1:
cmp %o3, %o5 ! if %o1 exceeds %o0, done
blu Lgot_result ! (and algorithm fails otherwise)
clr %o2
sethi %hi(1 << (32 - 4 - 1)), %g1
cmp %o3, %g1
blu Lnot_really_big
clr %o4
! Here the dividend is >= 2**(31-N) or so. We must be careful here,
! as our usual N-at-a-shot divide step will cause overflow and havoc.
! The number of bits in the result here is N*ITER+SC, where SC <= N.
! Compute ITER in an unorthodox manner: know we need to shift V into
! the top decade: so do not even bother to compare to R.
1:
cmp %o5, %g1
bgeu 3f
mov 1, %g7
sll %o5, 4, %o5
b 1b
add %o4, 1, %o4
! Now compute %g7.
2: addcc %o5, %o5, %o5
bcc Lnot_too_big
add %g7, 1, %g7
! We get here if the %o1 overflowed while shifting.
! This means that %o3 has the high-order bit set.
! Restore %o5 and subtract from %o3.
sll %g1, 4, %g1 ! high order bit
srl %o5, 1, %o5 ! rest of %o5
add %o5, %g1, %o5
b Ldo_single_div
sub %g7, 1, %g7
Lnot_too_big:
3: cmp %o5, %o3
blu 2b
nop
be Ldo_single_div
nop
/* NB: these are commented out in the V8-Sparc manual as well */
/* (I do not understand this) */
! %o5 > %o3: went too far: back up 1 step
! srl %o5, 1, %o5
! dec %g7
! do single-bit divide steps
!
! We have to be careful here. We know that %o3 >= %o5, so we can do the
! first divide step without thinking. BUT, the others are conditional,
! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
! order bit set in the first step, just falling into the regular
! division loop will mess up the first time around.
! So we unroll slightly...
Ldo_single_div:
subcc %g7, 1, %g7
bl Lend_regular_divide
nop
sub %o3, %o5, %o3
mov 1, %o2
b Lend_single_divloop
nop
Lsingle_divloop:
sll %o2, 1, %o2
bl 1f
srl %o5, 1, %o5
! %o3 >= 0
sub %o3, %o5, %o3
b 2f
add %o2, 1, %o2
1: ! %o3 < 0
add %o3, %o5, %o3
sub %o2, 1, %o2
2:
Lend_single_divloop:
subcc %g7, 1, %g7
bge Lsingle_divloop
tst %o3
b,a Lend_regular_divide
Lnot_really_big:
1:
sll %o5, 4, %o5
cmp %o5, %o3
bleu 1b
addcc %o4, 1, %o4
be Lgot_result
sub %o4, 1, %o4
tst %o3 ! set up for initial iteration
Ldivloop:
sll %o2, 4, %o2
! depth 1, accumulated bits 0
bl L.1.16
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 2, accumulated bits 1
bl L.2.17
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 3, accumulated bits 3
bl L.3.19
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits 7
bl L.4.23
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (7*2+1), %o2
L.4.23:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (7*2-1), %o2
L.3.19:
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits 5
bl L.4.21
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (5*2+1), %o2
L.4.21:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (5*2-1), %o2
L.2.17:
! remainder is negative
addcc %o3,%o5,%o3
! depth 3, accumulated bits 1
bl L.3.17
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits 3
bl L.4.19
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (3*2+1), %o2
L.4.19:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (3*2-1), %o2
L.3.17:
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits 1
bl L.4.17
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (1*2+1), %o2
L.4.17:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (1*2-1), %o2
L.1.16:
! remainder is negative
addcc %o3,%o5,%o3
! depth 2, accumulated bits -1
bl L.2.15
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 3, accumulated bits -1
bl L.3.15
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits -1
bl L.4.15
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-1*2+1), %o2
L.4.15:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-1*2-1), %o2
L.3.15:
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits -3
bl L.4.13
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-3*2+1), %o2
L.4.13:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-3*2-1), %o2
L.2.15:
! remainder is negative
addcc %o3,%o5,%o3
! depth 3, accumulated bits -3
bl L.3.13
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits -5
bl L.4.11
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-5*2+1), %o2
L.4.11:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-5*2-1), %o2
L.3.13:
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits -7
bl L.4.9
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-7*2+1), %o2
L.4.9:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-7*2-1), %o2
9:
Lend_regular_divide:
subcc %o4, 1, %o4
bge Ldivloop
tst %o3
bl,a Lgot_result
! non-restoring fixup here (one instruction only!)
sub %o2, 1, %o2
Lgot_result:
! check to see if answer should be < 0
tst %g6
bl,a 1f
sub %g0, %o2, %o2
1:
retl
mov %o2, %o0

31
sysdeps/sparc/setjmp.S Normal file
View File

@@ -0,0 +1,31 @@
/* Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
Cambridge, MA 02139, USA. */
#include <sysdep.h>
/* NOTE: This code depends on the definition of `__jmp_buf' in <jmp_buf.h>. */
ENTRY (__sigsetjmp)
/* Save our return PC and SP (second store in the jmp delay slot). */
st %o7, [%o0]
/* Save the signal mask if requested. We do this as a tail-call
for simplicity; it always returns zero. */
sethi %hi(C_SYMBOL_NAME (__sigjmp_save)), %g1
or %lo(C_SYMBOL_NAME (__sigjmp_save)), %g1, %g1
jmp %g1
st %sp, [%o0 + 4]

View File

@@ -0,0 +1,116 @@
! SPARC v8 __mpn_addmul_1 -- Multiply a limb vector with a limb and
! add the result to a second limb vector.
! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
! This file is part of the GNU MP Library.
! The GNU MP Library is free software; you can redistribute it and/or modify
! it under the terms of the GNU Library General Public License as published by
! the Free Software Foundation; either version 2 of the License, or (at your
! option) any later version.
! The GNU MP Library is distributed in the hope that it will be useful, but
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
! License for more details.
! You should have received a copy of the GNU Library General Public License
! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
! INPUT PARAMETERS
! res_ptr o0
! s1_ptr o1
! size o2
! s2_limb o3
#include "sysdep.h"
.text
.align 4
.global C_SYMBOL_NAME(__mpn_addmul_1)
C_SYMBOL_NAME(__mpn_addmul_1):
orcc %g0,%g0,%g2
ld [%o1+0],%o4 ! 1
sll %o2,4,%g1
and %g1,(4-1)<<4,%g1
sethi %hi(LL),%g3
or %g3,%lo(LL),%g3
jmp %g3+%g1
nop
LL:
LL00: add %o0,-4,%o0
b Loop00 /* 4, 8, 12, ... */
add %o1,-4,%o1
nop
LL01: b Loop01 /* 1, 5, 9, ... */
nop
nop
nop
LL10: add %o0,-12,%o0 /* 2, 6, 10, ... */
b Loop10
add %o1,4,%o1
nop
LL11: add %o0,-8,%o0 /* 3, 7, 11, ... */
b Loop11
add %o1,-8,%o1
nop
1: addcc %g3,%g2,%g3 ! 1
ld [%o1+4],%o4 ! 2
rd %y,%g2 ! 1
addx %g0,%g2,%g2
ld [%o0+0],%g1 ! 2
addcc %g1,%g3,%g3
st %g3,[%o0+0] ! 1
Loop00: umul %o4,%o3,%g3 ! 2
ld [%o0+4],%g1 ! 2
addxcc %g3,%g2,%g3 ! 2
ld [%o1+8],%o4 ! 3
rd %y,%g2 ! 2
addx %g0,%g2,%g2
nop
addcc %g1,%g3,%g3
st %g3,[%o0+4] ! 2
Loop11: umul %o4,%o3,%g3 ! 3
addxcc %g3,%g2,%g3 ! 3
ld [%o1+12],%o4 ! 4
rd %y,%g2 ! 3
add %o1,16,%o1
addx %g0,%g2,%g2
ld [%o0+8],%g1 ! 2
addcc %g1,%g3,%g3
st %g3,[%o0+8] ! 3
Loop10: umul %o4,%o3,%g3 ! 4
addxcc %g3,%g2,%g3 ! 4
ld [%o1+0],%o4 ! 1
rd %y,%g2 ! 4
addx %g0,%g2,%g2
ld [%o0+12],%g1 ! 2
addcc %g1,%g3,%g3
st %g3,[%o0+12] ! 4
add %o0,16,%o0
addx %g0,%g2,%g2
Loop01: addcc %o2,-4,%o2
bg 1b
umul %o4,%o3,%g3 ! 1
addcc %g3,%g2,%g3 ! 4
rd %y,%g2 ! 4
addx %g0,%g2,%g2
ld [%o0+0],%g1 ! 2
addcc %g1,%g3,%g3
st %g3,[%o0+0] ! 4
addx %g0,%g2,%o0
retl
nop
! umul, ld, addxcc, rd, st
! umul, ld, addxcc, rd, ld, addcc, st, addx

View File

@@ -0,0 +1,91 @@
! SPARC v8 __mpn_mul_1 -- Multiply a limb vector with a single limb and
! store the product in a second limb vector.
! Copyright (C) 1992, 1994 Free Software Foundation, Inc.
! This file is part of the GNU MP Library.
! The GNU MP Library is free software; you can redistribute it and/or modify
! it under the terms of the GNU Library General Public License as published by
! the Free Software Foundation; either version 2 of the License, or (at your
! option) any later version.
! The GNU MP Library is distributed in the hope that it will be useful, but
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
! License for more details.
! You should have received a copy of the GNU Library General Public License
! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
! INPUT PARAMETERS
! res_ptr o0
! s1_ptr o1
! size o2
! s2_limb o3
#include "sysdep.h"
.text
.align 8
.global C_SYMBOL_NAME(__mpn_mul_1)
C_SYMBOL_NAME(__mpn_mul_1):
sll %o2,4,%g1
and %g1,(4-1)<<4,%g1
sethi %hi(LL),%g3
or %g3,%lo(LL),%g3
jmp %g3+%g1
ld [%o1+0],%o4 ! 1
LL:
LL00: add %o0,-4,%o0
add %o1,-4,%o1
b Loop00 /* 4, 8, 12, ... */
orcc %g0,%g0,%g2
LL01: b Loop01 /* 1, 5, 9, ... */
orcc %g0,%g0,%g2
nop
nop
LL10: add %o0,-12,%o0 /* 2, 6, 10, ... */
add %o1,4,%o1
b Loop10
orcc %g0,%g0,%g2
nop
LL11: add %o0,-8,%o0 /* 3, 7, 11, ... */
add %o1,-8,%o1
b Loop11
orcc %g0,%g0,%g2
Loop: addcc %g3,%g2,%g3 ! 1
ld [%o1+4],%o4 ! 2
st %g3,[%o0+0] ! 1
rd %y,%g2 ! 1
Loop00: umul %o4,%o3,%g3 ! 2
addxcc %g3,%g2,%g3 ! 2
ld [%o1+8],%o4 ! 3
st %g3,[%o0+4] ! 2
rd %y,%g2 ! 2
Loop11: umul %o4,%o3,%g3 ! 3
addxcc %g3,%g2,%g3 ! 3
ld [%o1+12],%o4 ! 4
add %o1,16,%o1
st %g3,[%o0+8] ! 3
rd %y,%g2 ! 3
Loop10: umul %o4,%o3,%g3 ! 4
addxcc %g3,%g2,%g3 ! 4
ld [%o1+0],%o4 ! 1
st %g3,[%o0+12] ! 4
add %o0,16,%o0
rd %y,%g2 ! 4
addx %g0,%g2,%g2
Loop01: addcc %o2,-4,%o2
bg Loop
umul %o4,%o3,%g3 ! 1
addcc %g3,%g2,%g3 ! 4
st %g3,[%o0+0] ! 4
rd %y,%g2 ! 4
retl
addx %g0,%g2,%o0

View File

@@ -0,0 +1,57 @@
! SPARC v8 __mpn_submul_1 -- Multiply a limb vector with a limb and
! subtract the result from a second limb vector.
! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
! This file is part of the GNU MP Library.
! The GNU MP Library is free software; you can redistribute it and/or modify
! it under the terms of the GNU Library General Public License as published by
! the Free Software Foundation; either version 2 of the License, or (at your
! option) any later version.
! The GNU MP Library is distributed in the hope that it will be useful, but
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
! License for more details.
! You should have received a copy of the GNU Library General Public License
! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
! INPUT PARAMETERS
! res_ptr o0
! s1_ptr o1
! size o2
! s2_limb o3
#include "sysdep.h"
.text
.align 4
.global C_SYMBOL_NAME(__mpn_submul_1)
C_SYMBOL_NAME(__mpn_submul_1):
sub %g0,%o2,%o2 ! negate ...
sll %o2,2,%o2 ! ... and scale size
sub %o1,%o2,%o1 ! o1 is offset s1_ptr
sub %o0,%o2,%g1 ! g1 is offset res_ptr
mov 0,%o0 ! clear cy_limb
Loop: ld [%o1+%o2],%o4
ld [%g1+%o2],%g2
umul %o4,%o3,%o5
rd %y,%g3
addcc %o5,%o0,%o5
addx %g3,0,%o0
subcc %g2,%o5,%g2
addx %o0,0,%o0
st %g2,[%g1+%o2]
addcc %o2,4,%o2
bne Loop
nop
retl
nop

View File

@@ -0,0 +1,186 @@
! SPARC __udiv_qrnnd division support, used from longlong.h.
! Copyright (C) 1993, 1994 Free Software Foundation, Inc.
! This file is part of the GNU MP Library.
! The GNU MP Library is free software; you can redistribute it and/or modify
! it under the terms of the GNU Library General Public License as published by
! the Free Software Foundation; either version 2 of the License, or (at your
! option) any later version.
! The GNU MP Library is distributed in the hope that it will be useful, but
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
! License for more details.
! You should have received a copy of the GNU Library General Public License
! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
! INPUT PARAMETERS
! rem_ptr o0
! n1 o1
! n0 o2
! d o3
#include "sysdep.h"
.text
.align 4
.global C_SYMBOL_NAME(__udiv_qrnnd)
C_SYMBOL_NAME(__udiv_qrnnd):
tst %o3
bneg Largedivisor
mov 8,%g1
b Lp1
addxcc %o2,%o2,%o2
Lplop: bcc Ln1
addxcc %o2,%o2,%o2
Lp1: addx %o1,%o1,%o1
subcc %o1,%o3,%o4
bcc Ln2
addxcc %o2,%o2,%o2
Lp2: addx %o1,%o1,%o1
subcc %o1,%o3,%o4
bcc Ln3
addxcc %o2,%o2,%o2
Lp3: addx %o1,%o1,%o1
subcc %o1,%o3,%o4
bcc Ln4
addxcc %o2,%o2,%o2
Lp4: addx %o1,%o1,%o1
addcc %g1,-1,%g1
bne Lplop
subcc %o1,%o3,%o4
bcc Ln5
addxcc %o2,%o2,%o2
Lp5: st %o1,[%o0]
retl
xnor %g0,%o2,%o0
Lnlop: bcc Lp1
addxcc %o2,%o2,%o2
Ln1: addx %o4,%o4,%o4
subcc %o4,%o3,%o1
bcc Lp2
addxcc %o2,%o2,%o2
Ln2: addx %o4,%o4,%o4
subcc %o4,%o3,%o1
bcc Lp3
addxcc %o2,%o2,%o2
Ln3: addx %o4,%o4,%o4
subcc %o4,%o3,%o1
bcc Lp4
addxcc %o2,%o2,%o2
Ln4: addx %o4,%o4,%o4
addcc %g1,-1,%g1
bne Lnlop
subcc %o4,%o3,%o1
bcc Lp5
addxcc %o2,%o2,%o2
Ln5: st %o4,[%o0]
retl
xnor %g0,%o2,%o0
Largedivisor:
and %o2,1,%o5 ! %o5 = n0 & 1
srl %o2,1,%o2
sll %o1,31,%g2
or %g2,%o2,%o2 ! %o2 = lo(n1n0 >> 1)
srl %o1,1,%o1 ! %o1 = hi(n1n0 >> 1)
and %o3,1,%g2
srl %o3,1,%g3 ! %g3 = floor(d / 2)
add %g3,%g2,%g3 ! %g3 = ceil(d / 2)
b LLp1
addxcc %o2,%o2,%o2
LLplop: bcc LLn1
addxcc %o2,%o2,%o2
LLp1: addx %o1,%o1,%o1
subcc %o1,%g3,%o4
bcc LLn2
addxcc %o2,%o2,%o2
LLp2: addx %o1,%o1,%o1
subcc %o1,%g3,%o4
bcc LLn3
addxcc %o2,%o2,%o2
LLp3: addx %o1,%o1,%o1
subcc %o1,%g3,%o4
bcc LLn4
addxcc %o2,%o2,%o2
LLp4: addx %o1,%o1,%o1
addcc %g1,-1,%g1
bne LLplop
subcc %o1,%g3,%o4
bcc LLn5
addxcc %o2,%o2,%o2
LLp5: add %o1,%o1,%o1 ! << 1
tst %g2
bne Oddp
add %o5,%o1,%o1
st %o1,[%o0]
retl
xnor %g0,%o2,%o0
LLnlop: bcc LLp1
addxcc %o2,%o2,%o2
LLn1: addx %o4,%o4,%o4
subcc %o4,%g3,%o1
bcc LLp2
addxcc %o2,%o2,%o2
LLn2: addx %o4,%o4,%o4
subcc %o4,%g3,%o1
bcc LLp3
addxcc %o2,%o2,%o2
LLn3: addx %o4,%o4,%o4
subcc %o4,%g3,%o1
bcc LLp4
addxcc %o2,%o2,%o2
LLn4: addx %o4,%o4,%o4
addcc %g1,-1,%g1
bne LLnlop
subcc %o4,%g3,%o1
bcc LLp5
addxcc %o2,%o2,%o2
LLn5: add %o4,%o4,%o4 ! << 1
tst %g2
bne Oddn
add %o5,%o4,%o4
st %o4,[%o0]
retl
xnor %g0,%o2,%o0
Oddp: xnor %g0,%o2,%o2
! q' in %o2. r' in %o1
addcc %o1,%o2,%o1
bcc LLp6
addx %o2,0,%o2
sub %o1,%o3,%o1
LLp6: subcc %o1,%o3,%g0
bcs LLp7
subx %o2,-1,%o2
sub %o1,%o3,%o1
LLp7: st %o1,[%o0]
retl
mov %o2,%o0
Oddn: xnor %g0,%o2,%o2
! q' in %o2. r' in %o4
addcc %o4,%o2,%o4
bcc LLn6
addx %o2,0,%o2
sub %o4,%o3,%o4
LLn6: subcc %o4,%o3,%g0
bcs LLn7
subx %o2,-1,%o2
sub %o4,%o3,%o4
LLn7: st %o4,[%o0]
retl
mov %o2,%o0

34
sysdeps/sparc/sqrt.c Normal file
View File

@@ -0,0 +1,34 @@
/* Copyright (C) 1991, 1992 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
Cambridge, MA 02139, USA. */
#include <ansidecl.h>
#include <errno.h>
#include <math.h>
#ifndef __GNUC__
#error This file uses GNU C extensions; you must compile with GCC.
#endif
/* Return the square root of X. */
double
DEFUN(sqrt, (x), double x)
{
register double result;
asm("fsqrtd %1, %0" : "=f" (result) : "f" (x));
return result;
}

134
sysdeps/sparc/sub_n.S Normal file
View File

@@ -0,0 +1,134 @@
! sparc __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
! store difference in a third limb vector.
! Copyright (C) 1992, 1994 Free Software Foundation, Inc.
! This file is part of the GNU MP Library.
! The GNU MP Library is free software; you can redistribute it and/or modify
! it under the terms of the GNU Library General Public License as published by
! the Free Software Foundation; either version 2 of the License, or (at your
! option) any later version.
! The GNU MP Library is distributed in the hope that it will be useful, but
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
! License for more details.
! You should have received a copy of the GNU Library General Public License
! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
! INPUT PARAMETERS
! res_ptr %o0
! s1_ptr %o1
! s2_ptr %o2
! size %o3
#include "sysdep.h"
.text
.align 4
.global C_SYMBOL_NAME(__mpn_sub_n)
C_SYMBOL_NAME(__mpn_sub_n):
ld [%o1+0],%o4 ! read first limb from s1_ptr
srl %o3,4,%g1
ld [%o2+0],%o5 ! read first limb from s2_ptr
sub %g0,%o3,%o3
andcc %o3,(16-1),%o3
be Lzero
nop
sll %o3,2,%o3 ! multiply by 4
sub %o0,%o3,%o0 ! adjust res_ptr
sub %o1,%o3,%o1 ! adjust s1_ptr
sub %o2,%o3,%o2 ! adjust s2_ptr
mov %o4,%g2
sethi %hi(Lbase),%g3
or %g3,%lo(Lbase),%g3
sll %o3,2,%o3 ! multiply by 4
jmp %g3+%o3
mov %o5,%g3
Loop: subxcc %g2,%g3,%o3
add %o1,64,%o1
st %o3,[%o0+60]
add %o2,64,%o2
ld [%o1+0],%o4
add %o0,64,%o0
ld [%o2+0],%o5
Lzero: sub %g1,1,%g1 ! add 0 + 16r limbs (adjust loop counter)
Lbase: ld [%o1+4],%g2
subxcc %o4,%o5,%o3
ld [%o2+4],%g3
st %o3,[%o0+0]
ld [%o1+8],%o4 ! add 15 + 16r limbs
subxcc %g2,%g3,%o3
ld [%o2+8],%o5
st %o3,[%o0+4]
ld [%o1+12],%g2 ! add 14 + 16r limbs
subxcc %o4,%o5,%o3
ld [%o2+12],%g3
st %o3,[%o0+8]
ld [%o1+16],%o4 ! add 13 + 16r limbs
subxcc %g2,%g3,%o3
ld [%o2+16],%o5
st %o3,[%o0+12]
ld [%o1+20],%g2 ! add 12 + 16r limbs
subxcc %o4,%o5,%o3
ld [%o2+20],%g3
st %o3,[%o0+16]
ld [%o1+24],%o4 ! add 11 + 16r limbs
subxcc %g2,%g3,%o3
ld [%o2+24],%o5
st %o3,[%o0+20]
ld [%o1+28],%g2 ! add 10 + 16r limbs
subxcc %o4,%o5,%o3
ld [%o2+28],%g3
st %o3,[%o0+24]
ld [%o1+32],%o4 ! add 9 + 16r limbs
subxcc %g2,%g3,%o3
ld [%o2+32],%o5
st %o3,[%o0+28]
ld [%o1+36],%g2 ! add 8 + 16r limbs
subxcc %o4,%o5,%o3
ld [%o2+36],%g3
st %o3,[%o0+32]
ld [%o1+40],%o4 ! add 7 + 16r limbs
subxcc %g2,%g3,%o3
ld [%o2+40],%o5
st %o3,[%o0+36]
ld [%o1+44],%g2 ! add 6 + 16r limbs
subxcc %o4,%o5,%o3
ld [%o2+44],%g3
st %o3,[%o0+40]
ld [%o1+48],%o4 ! add 5 + 16r limbs
subxcc %g2,%g3,%o3
ld [%o2+48],%o5
st %o3,[%o0+44]
ld [%o1+52],%g2 ! add 4 + 16r limbs
subxcc %o4,%o5,%o3
ld [%o2+52],%g3
st %o3,[%o0+48]
ld [%o1+56],%o4 ! add 3 + 16r limbs
subxcc %g2,%g3,%o3
ld [%o2+56],%o5
st %o3,[%o0+52]
ld [%o1+60],%g2 ! add 2 + 16r limbs
subxcc %o4,%o5,%o3
ld [%o2+60],%g3
st %o3,[%o0+56]
subx %g0,%g0,%o4
tst %g1
bne Loop
subcc %g0,%o4,%g0 ! restore cy (delay slot)
subxcc %g2,%g3,%o3
st %o3,[%o0+60] ! store most significant limb
retl
addx %g0,%g0,%o0 ! return carry-out from most sign. limb

146
sysdeps/sparc/submul_1.S Normal file
View File

@@ -0,0 +1,146 @@
! SPARC __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
! the result from a second limb vector.
! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
! This file is part of the GNU MP Library.
! The GNU MP Library is free software; you can redistribute it and/or modify
! it under the terms of the GNU Library General Public License as published by
! the Free Software Foundation; either version 2 of the License, or (at your
! option) any later version.
! The GNU MP Library is distributed in the hope that it will be useful, but
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
! License for more details.
! You should have received a copy of the GNU Library General Public License
! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
! INPUT PARAMETERS
! res_ptr o0
! s1_ptr o1
! size o2
! s2_limb o3
#include "sysdep.h"
.text
.align 4
.global C_SYMBOL_NAME(__mpn_submul_1)
C_SYMBOL_NAME(__mpn_submul_1):
! Make S1_PTR and RES_PTR point at the end of their blocks
! and put (- 4 x SIZE) in index/loop counter.
sll %o2,2,%o2
add %o0,%o2,%o4 ! RES_PTR in o4 since o0 is retval
add %o1,%o2,%o1
sub %g0,%o2,%o2
cmp %o3,0xfff
bgu Large
nop
ld [%o1+%o2],%o5
mov 0,%o0
b L0
add %o4,-4,%o4
Loop0:
subcc %o5,%g1,%g1
ld [%o1+%o2],%o5
addx %o0,%g0,%o0
st %g1,[%o4+%o2]
L0: wr %g0,%o3,%y
sra %o5,31,%g2
and %o3,%g2,%g2
andcc %g1,0,%g1
mulscc %g1,%o5,%g1
mulscc %g1,%o5,%g1
mulscc %g1,%o5,%g1
mulscc %g1,%o5,%g1
mulscc %g1,%o5,%g1
mulscc %g1,%o5,%g1
mulscc %g1,%o5,%g1
mulscc %g1,%o5,%g1
mulscc %g1,%o5,%g1
mulscc %g1,%o5,%g1
mulscc %g1,%o5,%g1
mulscc %g1,%o5,%g1
mulscc %g1,0,%g1
sra %g1,20,%g4
sll %g1,12,%g1
rd %y,%g3
srl %g3,20,%g3
or %g1,%g3,%g1
addcc %g1,%o0,%g1
addx %g2,%g4,%o0 ! add sign-compensation and cy to hi limb
addcc %o2,4,%o2 ! loop counter
bne Loop0
ld [%o4+%o2],%o5
subcc %o5,%g1,%g1
addx %o0,%g0,%o0
retl
st %g1,[%o4+%o2]
Large: ld [%o1+%o2],%o5
mov 0,%o0
sra %o3,31,%g4 ! g4 = mask of ones iff S2_LIMB < 0
b L1
add %o4,-4,%o4
Loop:
subcc %o5,%g3,%g3
ld [%o1+%o2],%o5
addx %o0,%g0,%o0
st %g3,[%o4+%o2]
L1: wr %g0,%o5,%y
and %o5,%g4,%g2
andcc %g0,%g0,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%o3,%g1
mulscc %g1,%g0,%g1
rd %y,%g3
addcc %g3,%o0,%g3
addx %g2,%g1,%o0
addcc %o2,4,%o2
bne Loop
ld [%o4+%o2],%o5
subcc %o5,%g3,%g3
addx %o0,%g0,%o0
retl
st %g3,[%o4+%o2]

348
sysdeps/sparc/udiv.S Normal file
View File

@@ -0,0 +1,348 @@
/* This file is generated from divrem.m4; DO NOT EDIT! */
/*
* Division and remainder, from Appendix E of the Sparc Version 8
* Architecture Manual, with fixes from Gordon Irlam.
*/
/*
* Input: dividend and divisor in %o0 and %o1 respectively.
*
* m4 parameters:
* .udiv name of function to generate
* div div=div => %o0 / %o1; div=rem => %o0 % %o1
* false false=true => signed; false=false => unsigned
*
* Algorithm parameters:
* N how many bits per iteration we try to get (4)
* WORDSIZE total number of bits (32)
*
* Derived constants:
* TOPBITS number of bits in the top decade of a number
*
* Important variables:
* Q the partial quotient under development (initially 0)
* R the remainder so far, initially the dividend
* ITER number of main division loop iterations required;
* equal to ceil(log2(quotient) / N). Note that this
* is the log base (2^N) of the quotient.
* V the current comparand, initially divisor*2^(ITER*N-1)
*
* Cost:
* Current estimate for non-large dividend is
* ceil(log2(quotient) / N) * (10 + 7N/2) + C
* A large dividend is one greater than 2^(31-TOPBITS) and takes a
* different path, as the upper bits of the quotient must be developed
* one bit at a time.
*/
#include "DEFS.h"
#ifdef __svr4__
#include <sys/trap.h>
#else
#include <machine/trap.h>
#endif
FUNC(.udiv)
! Ready to divide. Compute size of quotient; scale comparand.
orcc %o1, %g0, %o5
bne 1f
mov %o0, %o3
! Divide by zero trap. If it returns, return 0 (about as
! wrong as possible, but that is what SunOS does...).
ta ST_DIV0
retl
clr %o0
1:
cmp %o3, %o5 ! if %o1 exceeds %o0, done
blu Lgot_result ! (and algorithm fails otherwise)
clr %o2
sethi %hi(1 << (32 - 4 - 1)), %g1
cmp %o3, %g1
blu Lnot_really_big
clr %o4
! Here the dividend is >= 2**(31-N) or so. We must be careful here,
! as our usual N-at-a-shot divide step will cause overflow and havoc.
! The number of bits in the result here is N*ITER+SC, where SC <= N.
! Compute ITER in an unorthodox manner: know we need to shift V into
! the top decade: so do not even bother to compare to R.
1:
cmp %o5, %g1
bgeu 3f
mov 1, %g7
sll %o5, 4, %o5
b 1b
add %o4, 1, %o4
! Now compute %g7.
2: addcc %o5, %o5, %o5
bcc Lnot_too_big
add %g7, 1, %g7
! We get here if the %o1 overflowed while shifting.
! This means that %o3 has the high-order bit set.
! Restore %o5 and subtract from %o3.
sll %g1, 4, %g1 ! high order bit
srl %o5, 1, %o5 ! rest of %o5
add %o5, %g1, %o5
b Ldo_single_div
sub %g7, 1, %g7
Lnot_too_big:
3: cmp %o5, %o3
blu 2b
nop
be Ldo_single_div
nop
/* NB: these are commented out in the V8-Sparc manual as well */
/* (I do not understand this) */
! %o5 > %o3: went too far: back up 1 step
! srl %o5, 1, %o5
! dec %g7
! do single-bit divide steps
!
! We have to be careful here. We know that %o3 >= %o5, so we can do the
! first divide step without thinking. BUT, the others are conditional,
! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
! order bit set in the first step, just falling into the regular
! division loop will mess up the first time around.
! So we unroll slightly...
Ldo_single_div:
subcc %g7, 1, %g7
bl Lend_regular_divide
nop
sub %o3, %o5, %o3
mov 1, %o2
b Lend_single_divloop
nop
Lsingle_divloop:
sll %o2, 1, %o2
bl 1f
srl %o5, 1, %o5
! %o3 >= 0
sub %o3, %o5, %o3
b 2f
add %o2, 1, %o2
1: ! %o3 < 0
add %o3, %o5, %o3
sub %o2, 1, %o2
2:
Lend_single_divloop:
subcc %g7, 1, %g7
bge Lsingle_divloop
tst %o3
b,a Lend_regular_divide
Lnot_really_big:
1:
sll %o5, 4, %o5
cmp %o5, %o3
bleu 1b
addcc %o4, 1, %o4
be Lgot_result
sub %o4, 1, %o4
tst %o3 ! set up for initial iteration
Ldivloop:
sll %o2, 4, %o2
! depth 1, accumulated bits 0
bl L.1.16
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 2, accumulated bits 1
bl L.2.17
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 3, accumulated bits 3
bl L.3.19
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits 7
bl L.4.23
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (7*2+1), %o2
L.4.23:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (7*2-1), %o2
L.3.19:
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits 5
bl L.4.21
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (5*2+1), %o2
L.4.21:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (5*2-1), %o2
L.2.17:
! remainder is negative
addcc %o3,%o5,%o3
! depth 3, accumulated bits 1
bl L.3.17
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits 3
bl L.4.19
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (3*2+1), %o2
L.4.19:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (3*2-1), %o2
L.3.17:
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits 1
bl L.4.17
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (1*2+1), %o2
L.4.17:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (1*2-1), %o2
L.1.16:
! remainder is negative
addcc %o3,%o5,%o3
! depth 2, accumulated bits -1
bl L.2.15
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 3, accumulated bits -1
bl L.3.15
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits -1
bl L.4.15
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-1*2+1), %o2
L.4.15:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-1*2-1), %o2
L.3.15:
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits -3
bl L.4.13
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-3*2+1), %o2
L.4.13:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-3*2-1), %o2
L.2.15:
! remainder is negative
addcc %o3,%o5,%o3
! depth 3, accumulated bits -3
bl L.3.13
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits -5
bl L.4.11
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-5*2+1), %o2
L.4.11:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-5*2-1), %o2
L.3.13:
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits -7
bl L.4.9
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-7*2+1), %o2
L.4.9:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-7*2-1), %o2
9:
Lend_regular_divide:
subcc %o4, 1, %o4
bge Ldivloop
tst %o3
bl,a Lgot_result
! non-restoring fixup here (one instruction only!)
sub %o2, 1, %o2
Lgot_result:
retl
mov %o2, %o0

143
sysdeps/sparc/udiv_qrnnd.S Normal file
View File

@@ -0,0 +1,143 @@
! SPARC __udiv_qrnnd division support, used from longlong.h.
! Copyright (C) 1993, 1994 Free Software Foundation, Inc.
! This file is part of the GNU MP Library.
! The GNU MP Library is free software; you can redistribute it and/or modify
! it under the terms of the GNU Library General Public License as published by
! the Free Software Foundation; either version 2 of the License, or (at your
! option) any later version.
! The GNU MP Library is distributed in the hope that it will be useful, but
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
! License for more details.
! You should have received a copy of the GNU Library General Public License
! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
! INPUT PARAMETERS
! rem_ptr i0
! n1 i1
! n0 i2
! d i3
#include "sysdep.h"
#undef ret /* Kludge for glibc */
.text
.align 8
LC0: .double 0r4294967296
LC1: .double 0r2147483648
.align 4
.global C_SYMBOL_NAME(__udiv_qrnnd)
C_SYMBOL_NAME(__udiv_qrnnd):
!#PROLOGUE# 0
save %sp,-104,%sp
!#PROLOGUE# 1
st %i1,[%fp-8]
ld [%fp-8],%f10
sethi %hi(LC0),%o7
fitod %f10,%f4
ldd [%o7+%lo(LC0)],%f8
cmp %i1,0
bge L248
mov %i0,%i5
faddd %f4,%f8,%f4
L248:
st %i2,[%fp-8]
ld [%fp-8],%f10
fmuld %f4,%f8,%f6
cmp %i2,0
bge L249
fitod %f10,%f2
faddd %f2,%f8,%f2
L249:
st %i3,[%fp-8]
faddd %f6,%f2,%f2
ld [%fp-8],%f10
cmp %i3,0
bge L250
fitod %f10,%f4
faddd %f4,%f8,%f4
L250:
fdivd %f2,%f4,%f2
sethi %hi(LC1),%o7
ldd [%o7+%lo(LC1)],%f4
fcmped %f2,%f4
nop
fbge,a L251
fsubd %f2,%f4,%f2
fdtoi %f2,%f2
st %f2,[%fp-8]
b L252
ld [%fp-8],%i4
L251:
fdtoi %f2,%f2
st %f2,[%fp-8]
ld [%fp-8],%i4
sethi %hi(-2147483648),%g2
xor %i4,%g2,%i4
L252:
wr %g0,%i4,%y
sra %i3,31,%g2
and %i4,%g2,%g2
andcc %g0,0,%g1
mulscc %g1,%i3,%g1
mulscc %g1,%i3,%g1
mulscc %g1,%i3,%g1
mulscc %g1,%i3,%g1
mulscc %g1,%i3,%g1
mulscc %g1,%i3,%g1
mulscc %g1,%i3,%g1
mulscc %g1,%i3,%g1
mulscc %g1,%i3,%g1
mulscc %g1,%i3,%g1
mulscc %g1,%i3,%g1
mulscc %g1,%i3,%g1
mulscc %g1,%i3,%g1
mulscc %g1,%i3,%g1
mulscc %g1,%i3,%g1
mulscc %g1,%i3,%g1
mulscc %g1,%i3,%g1
mulscc %g1,%i3,%g1
mulscc %g1,%i3,%g1
mulscc %g1,%i3,%g1
mulscc %g1,%i3,%g1
mulscc %g1,%i3,%g1
mulscc %g1,%i3,%g1
mulscc %g1,%i3,%g1
mulscc %g1,%i3,%g1
mulscc %g1,%i3,%g1
mulscc %g1,%i3,%g1
mulscc %g1,%i3,%g1
mulscc %g1,%i3,%g1
mulscc %g1,%i3,%g1
mulscc %g1,%i3,%g1
mulscc %g1,%i3,%g1
mulscc %g1,0,%g1
add %g1,%g2,%i0
rd %y,%g3
subcc %i2,%g3,%o7
subxcc %i1,%i0,%g0
be L253
cmp %o7,%i3
add %i4,-1,%i0
add %o7,%i3,%o7
st %o7,[%i5]
ret
restore
L253:
blu L246
mov %i4,%i0
add %i4,1,%i0
sub %o7,%i3,%o7
L246:
st %o7,[%i5]
ret
restore

153
sysdeps/sparc/umul.S Normal file
View File

@@ -0,0 +1,153 @@
/*
* Unsigned multiply. Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the
* upper 32 bits of the 64-bit product).
*
* This code optimizes short (less than 13-bit) multiplies. Short
* multiplies require 25 instruction cycles, and long ones require
* 45 instruction cycles.
*
* On return, overflow has occurred (%o1 is not zero) if and only if
* the Z condition code is clear, allowing, e.g., the following:
*
* call .umul
* nop
* bnz overflow (or tnz)
*/
#include "DEFS.h"
FUNC(.umul)
or %o0, %o1, %o4
mov %o0, %y ! multiplier -> Y
andncc %o4, 0xfff, %g0 ! test bits 12..31 of *both* args
be Lmul_shortway ! if zero, can do it the short way
andcc %g0, %g0, %o4 ! zero the partial product and clear N and V
/*
* Long multiply. 32 steps, followed by a final shift step.
*/
mulscc %o4, %o1, %o4 ! 1
mulscc %o4, %o1, %o4 ! 2
mulscc %o4, %o1, %o4 ! 3
mulscc %o4, %o1, %o4 ! 4
mulscc %o4, %o1, %o4 ! 5
mulscc %o4, %o1, %o4 ! 6
mulscc %o4, %o1, %o4 ! 7
mulscc %o4, %o1, %o4 ! 8
mulscc %o4, %o1, %o4 ! 9
mulscc %o4, %o1, %o4 ! 10
mulscc %o4, %o1, %o4 ! 11
mulscc %o4, %o1, %o4 ! 12
mulscc %o4, %o1, %o4 ! 13
mulscc %o4, %o1, %o4 ! 14
mulscc %o4, %o1, %o4 ! 15
mulscc %o4, %o1, %o4 ! 16
mulscc %o4, %o1, %o4 ! 17
mulscc %o4, %o1, %o4 ! 18
mulscc %o4, %o1, %o4 ! 19
mulscc %o4, %o1, %o4 ! 20
mulscc %o4, %o1, %o4 ! 21
mulscc %o4, %o1, %o4 ! 22
mulscc %o4, %o1, %o4 ! 23
mulscc %o4, %o1, %o4 ! 24
mulscc %o4, %o1, %o4 ! 25
mulscc %o4, %o1, %o4 ! 26
mulscc %o4, %o1, %o4 ! 27
mulscc %o4, %o1, %o4 ! 28
mulscc %o4, %o1, %o4 ! 29
mulscc %o4, %o1, %o4 ! 30
mulscc %o4, %o1, %o4 ! 31
mulscc %o4, %o1, %o4 ! 32
mulscc %o4, %g0, %o4 ! final shift
/*
* Normally, with the shift-and-add approach, if both numbers are
* positive you get the correct result. With 32-bit two's-complement
* numbers, -x is represented as
*
* x 32
* ( 2 - ------ ) mod 2 * 2
* 32
* 2
*
* (the `mod 2' subtracts 1 from 1.bbbb). To avoid lots of 2^32s,
* we can treat this as if the radix point were just to the left
* of the sign bit (multiply by 2^32), and get
*
* -x = (2 - x) mod 2
*
* Then, ignoring the `mod 2's for convenience:
*
* x * y = xy
* -x * y = 2y - xy
* x * -y = 2x - xy
* -x * -y = 4 - 2x - 2y + xy
*
* For signed multiplies, we subtract (x << 32) from the partial
* product to fix this problem for negative multipliers (see mul.s).
* Because of the way the shift into the partial product is calculated
* (N xor V), this term is automatically removed for the multiplicand,
* so we don't have to adjust.
*
* But for unsigned multiplies, the high order bit wasn't a sign bit,
* and the correction is wrong. So for unsigned multiplies where the
* high order bit is one, we end up with xy - (y << 32). To fix it
* we add y << 32.
*/
#if 0
tst %o1
bl,a 1f ! if %o1 < 0 (high order bit = 1),
add %o4, %o0, %o4 ! %o4 += %o0 (add y to upper half)
1: rd %y, %o0 ! get lower half of product
retl
addcc %o4, %g0, %o1 ! put upper half in place and set Z for %o1==0
#else
/* Faster code from tege@sics.se. */
sra %o1, 31, %o2 ! make mask from sign bit
and %o0, %o2, %o2 ! %o2 = 0 or %o0, depending on sign of %o1
rd %y, %o0 ! get lower half of product
retl
addcc %o4, %o2, %o1 ! add compensation and put upper half in place
#endif
Lmul_shortway:
/*
* Short multiply. 12 steps, followed by a final shift step.
* The resulting bits are off by 12 and (32-12) = 20 bit positions,
* but there is no problem with %o0 being negative (unlike above),
* and overflow is impossible (the answer is at most 24 bits long).
*/
mulscc %o4, %o1, %o4 ! 1
mulscc %o4, %o1, %o4 ! 2
mulscc %o4, %o1, %o4 ! 3
mulscc %o4, %o1, %o4 ! 4
mulscc %o4, %o1, %o4 ! 5
mulscc %o4, %o1, %o4 ! 6
mulscc %o4, %o1, %o4 ! 7
mulscc %o4, %o1, %o4 ! 8
mulscc %o4, %o1, %o4 ! 9
mulscc %o4, %o1, %o4 ! 10
mulscc %o4, %o1, %o4 ! 11
mulscc %o4, %o1, %o4 ! 12
mulscc %o4, %g0, %o4 ! final shift
/*
* %o4 has 20 of the bits that should be in the result; %y has
* the bottom 12 (as %y's top 12). That is:
*
* %o4 %y
* +----------------+----------------+
* | -12- | -20- | -12- | -20- |
* +------(---------+------)---------+
* -----result-----
*
* The 12 bits of %o4 left of the `result' area are all zero;
* in fact, all top 20 bits of %o4 are zero.
*/
rd %y, %o5
sll %o4, 12, %o0 ! shift middle bits left 12
srl %o5, 20, %o5 ! shift low bits right 20
or %o5, %o0, %o0
retl
addcc %g0, %g0, %o1 ! %o1 = zero, and set Z

348
sysdeps/sparc/urem.S Normal file
View File

@@ -0,0 +1,348 @@
/* This file is generated from divrem.m4; DO NOT EDIT! */
/*
* Division and remainder, from Appendix E of the Sparc Version 8
* Architecture Manual, with fixes from Gordon Irlam.
*/
/*
* Input: dividend and divisor in %o0 and %o1 respectively.
*
* m4 parameters:
* .urem name of function to generate
* rem rem=div => %o0 / %o1; rem=rem => %o0 % %o1
* false false=true => signed; false=false => unsigned
*
* Algorithm parameters:
* N how many bits per iteration we try to get (4)
* WORDSIZE total number of bits (32)
*
* Derived constants:
* TOPBITS number of bits in the top decade of a number
*
* Important variables:
* Q the partial quotient under development (initially 0)
* R the remainder so far, initially the dividend
* ITER number of main division loop iterations required;
* equal to ceil(log2(quotient) / N). Note that this
* is the log base (2^N) of the quotient.
* V the current comparand, initially divisor*2^(ITER*N-1)
*
* Cost:
* Current estimate for non-large dividend is
* ceil(log2(quotient) / N) * (10 + 7N/2) + C
* A large dividend is one greater than 2^(31-TOPBITS) and takes a
* different path, as the upper bits of the quotient must be developed
* one bit at a time.
*/
#include "DEFS.h"
#ifdef __svr4__
#include <sys/trap.h>
#else
#include <machine/trap.h>
#endif
FUNC(.urem)
! Ready to divide. Compute size of quotient; scale comparand.
orcc %o1, %g0, %o5
bne 1f
mov %o0, %o3
! Divide by zero trap. If it returns, return 0 (about as
! wrong as possible, but that is what SunOS does...).
ta ST_DIV0
retl
clr %o0
1:
cmp %o3, %o5 ! if %o1 exceeds %o0, done
blu Lgot_result ! (and algorithm fails otherwise)
clr %o2
sethi %hi(1 << (32 - 4 - 1)), %g1
cmp %o3, %g1
blu Lnot_really_big
clr %o4
! Here the dividend is >= 2**(31-N) or so. We must be careful here,
! as our usual N-at-a-shot divide step will cause overflow and havoc.
! The number of bits in the result here is N*ITER+SC, where SC <= N.
! Compute ITER in an unorthodox manner: know we need to shift V into
! the top decade: so do not even bother to compare to R.
1:
cmp %o5, %g1
bgeu 3f
mov 1, %g7
sll %o5, 4, %o5
b 1b
add %o4, 1, %o4
! Now compute %g7.
2: addcc %o5, %o5, %o5
bcc Lnot_too_big
add %g7, 1, %g7
! We get here if the %o1 overflowed while shifting.
! This means that %o3 has the high-order bit set.
! Restore %o5 and subtract from %o3.
sll %g1, 4, %g1 ! high order bit
srl %o5, 1, %o5 ! rest of %o5
add %o5, %g1, %o5
b Ldo_single_div
sub %g7, 1, %g7
Lnot_too_big:
3: cmp %o5, %o3
blu 2b
nop
be Ldo_single_div
nop
/* NB: these are commented out in the V8-Sparc manual as well */
/* (I do not understand this) */
! %o5 > %o3: went too far: back up 1 step
! srl %o5, 1, %o5
! dec %g7
! do single-bit divide steps
!
! We have to be careful here. We know that %o3 >= %o5, so we can do the
! first divide step without thinking. BUT, the others are conditional,
! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
! order bit set in the first step, just falling into the regular
! division loop will mess up the first time around.
! So we unroll slightly...
Ldo_single_div:
subcc %g7, 1, %g7
bl Lend_regular_divide
nop
sub %o3, %o5, %o3
mov 1, %o2
b Lend_single_divloop
nop
Lsingle_divloop:
sll %o2, 1, %o2
bl 1f
srl %o5, 1, %o5
! %o3 >= 0
sub %o3, %o5, %o3
b 2f
add %o2, 1, %o2
1: ! %o3 < 0
add %o3, %o5, %o3
sub %o2, 1, %o2
2:
Lend_single_divloop:
subcc %g7, 1, %g7
bge Lsingle_divloop
tst %o3
b,a Lend_regular_divide
Lnot_really_big:
1:
sll %o5, 4, %o5
cmp %o5, %o3
bleu 1b
addcc %o4, 1, %o4
be Lgot_result
sub %o4, 1, %o4
tst %o3 ! set up for initial iteration
Ldivloop:
sll %o2, 4, %o2
! depth 1, accumulated bits 0
bl L.1.16
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 2, accumulated bits 1
bl L.2.17
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 3, accumulated bits 3
bl L.3.19
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits 7
bl L.4.23
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (7*2+1), %o2
L.4.23:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (7*2-1), %o2
L.3.19:
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits 5
bl L.4.21
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (5*2+1), %o2
L.4.21:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (5*2-1), %o2
L.2.17:
! remainder is negative
addcc %o3,%o5,%o3
! depth 3, accumulated bits 1
bl L.3.17
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits 3
bl L.4.19
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (3*2+1), %o2
L.4.19:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (3*2-1), %o2
L.3.17:
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits 1
bl L.4.17
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (1*2+1), %o2
L.4.17:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (1*2-1), %o2
L.1.16:
! remainder is negative
addcc %o3,%o5,%o3
! depth 2, accumulated bits -1
bl L.2.15
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 3, accumulated bits -1
bl L.3.15
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits -1
bl L.4.15
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-1*2+1), %o2
L.4.15:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-1*2-1), %o2
L.3.15:
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits -3
bl L.4.13
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-3*2+1), %o2
L.4.13:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-3*2-1), %o2
L.2.15:
! remainder is negative
addcc %o3,%o5,%o3
! depth 3, accumulated bits -3
bl L.3.13
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits -5
bl L.4.11
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-5*2+1), %o2
L.4.11:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-5*2-1), %o2
L.3.13:
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits -7
bl L.4.9
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-7*2+1), %o2
L.4.9:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-7*2-1), %o2
9:
Lend_regular_divide:
subcc %o4, 1, %o4
bge Ldivloop
tst %o3
bl,a Lgot_result
! non-restoring fixup here (one instruction only!)
add %o3, %o1, %o3
Lgot_result:
retl
mov %o3, %o0