mirror of
https://sourceware.org/git/glibc.git
synced 2025-12-24 17:51:17 +03:00
Update.
1997-08-24 12:24 Ulrich Drepper <drepper@cygnus.com> * configure.in (INSTALL): Quote `$'. * libc.map: Add __xpg_basename. * csu/Makefile (initfini.s): Disable optimization. * elf/dl-deps.c: Implement handling of DL_FILTER. * elf/dl-load.c (_dl_init_paths): Add error check. * intl/finddomain.c (_nl_find_domain): Correct comment. * intl/localealias.c: Include <bits/libc-lock.h> not <libc-lock.h>. * libio/stdio.h: Make {,v}snprintf available if __USE_BSD. Change extern inline functions to work correctly in C++. * locale/iso-4217.def: Update for more recent ISO 4217 version. * locale/loadlocale.c (_nl_load_locale): Add cast. * manual/message.texi: Finish gettext section. * posix/getopt_init.c: Don't use relative #include path. (__getopt_clean_environment): Change function to take pointer to environment as argument. Optimize generation of test string a bit. * sysdeps/unix/sysv/linux/init-first.c: Call __getopt_clean_environment with additional argument. * poisx/glob.c: Add prototype for next_brace_sub. * sysdeps/generic/dl-sysdep.c: Recognize AT_BASE value on auxiliary vector. * sysdeps/i386/dl-machine.h (elf_machine_load_address): Rewrite to not generate relocation entry. Suggested by Richard Henderson. (ELF_MACHINE_BEFORE_RTLD_RELOC): Removed. (elf_machine_runtime_setup): Add .aligns. * sysdeps/i386/fpu/fraiseexcpt.c: Add volatile to asms. * sysdeps/i386/fpu/bits/mathinline.h: Partially undo change of 1997-08-14 03:14. gcc 2.7.2* is really broken in some aspects. * sysdeps/standalone/i386/i386.h: Clean up asm statements a bit. * sysdeps/standalone/i960/i960ca.h: Likewise. 1997-08-22 19:04 Richard Henderson <rth@cygnus.com> * elf/rtld.c (_dl_start): Init _dl_rtld_map.l_opencount due to undocumented test addition in _dl_map_object. Support ET_EXEC versions of ld.so, for debugging at least: * elf/dl-load.c (_dl_map_object): Add_name_to_object could get called despite the DT_SONAME != NULL test, segfaulting. Simplify the code here as well. * elf/dl-lookup.c (do_lookup): Skip objects with no symtab. (_dl_setup_hash): Likewise for hash tables. * elf/dl-version.c (_dl_check_map_versions): Likewise for strtabs. * elf/rtld.c (_dl_start): Likewise for rpath. (_dl_rtld_libname2): New variable. (dl_main): Use it to add an soname for ourselves when we don't have one of our own. Base it on the target's .interp. (dl_main): Again, skip printing of objects that don't have strtabs. Sparc 32 merge: * elf/dl-runtime.c (ELF_FIXUP_RETURN_VALUE): Provide default value. (fixup): Simplify code. Use ELF_FIXUP_RETURN_VALUE. (profile_fixup): Likewise, though this still needs fixing for Sparc32 and PPC. * sysdeps/powerpc/dl-machine.h: Transmute ELF_FIXUP_RETURNS_ADDRESS to ELF_FIXUP_RETURN_VALUE. * sysdeps/sparc/sparc32/dl-machine.h: Implement lazy relocation. Fix up _dl_start_user to handle _dl_skip_args properly. Use _dl_hwcap to determine if "flush" is available/needed. * sysdeps/sparc/configure.in: Remove. It doesn't actually do anything anymore, and what it did do is done somewhere else. * sysdeps/sparc/configure: Likewise. * sysdeps/sparc/fpu/bits/mathdef.h (FP_ILOGB0, FP_ILOGBNAN): New. * sysdeps/sparc/fpu/fraiseexcpt.c: Rearrange for smaller code. * sysdeps/sparc/sparc32/Makefile: Fix sparc->sparc/sparc32 bits in divrem expansions. * sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h (END, LOC): New definitions for assembly syntax differences. * sysdeps/sparc/sparc32/__longjmp.S: %g6,%g7 are reserved to the "system". Use %g2,%g3 instead. Use new local label macro. * sysdeps/sparc/sparc32/add_n.S: Use <sysdep.h> and ENTRY, END, and LOC for proper assembly headers/footers. * sysdeps/sparc/sparc32/addmul_1.S: Likewise. * sysdeps/sparc/sparc32/alloca.S: Likewise. * sysdeps/sparc/sparc32/dotmul.S: Likewise. * sysdeps/sparc/sparc32/lshift.S: Likewise. * sysdeps/sparc/sparc32/mul_1.S: Likewise. * sysdeps/sparc/sparc32/rshift.S: Likewise. * sysdeps/sparc/sparc32/sparcv8/addmul_1.S: Likewise. * sysdeps/sparc/sparc32/sparcv8/mul_1.S: Likewise. * sysdeps/sparc/sparc32/sparcv8/submul_1.S: Likewise. * sysdeps/sparc/sparc32/sparcv8/udiv_qrnnd.S: Likewise. * sysdeps/sparc/sparc32/sub_n.S: Likewise. * sysdeps/sparc/sparc32/submul_1.S: Likewise. * sysdeps/sparc/sparc32/udiv_qrnnd.S: Likewise. * sysdeps/sparc/sparc32/umul.S: Likewise. * sysdeps/sparc/sparc32/divrem.m4: Likewise. * sysdeps/sparc/sparc32/rem.S: Regenerate. * sysdeps/sparc/sparc32/sdiv.S: Regenerate. * sysdeps/sparc/sparc32/udiv.S: Regenerate. * sysdeps/sparc/sparc32/urem.S: Regenerate. * sysdeps/sparc/sparc32/sparcv8/dotmul.S: New file. * sysdeps/sparc/sparc32/sparcv8/rem.S: New file. * sysdeps/sparc/sparc32/sparcv8/sdiv.S: New file. * sysdeps/sparc/sparc32/sparcv8/udiv.S: New file. * sysdeps/sparc/sparc32/sparcv8/umul.S: New file. * sysdeps/sparc/sparc32/sparcv8/urem.S: New file. * sysdeps/sparc/sparc32/bsd-_setjmp.S: Dike out. * sysdeps/sparc/sparc32/bsd-setjmp.S: Likewise. * sysdeps/sparc/sparc32/setjmp.S: Add _setjmp and setjmp entry points. * sysdeps/unix/sysv/linux/sparc/sparc32/__sigtrampoline.S: Clean up PIC code. * sysdeps/sparc/sparc32/elf/start.S: New file, slightly modified from the sparc64 version. * sysdeps/sparc/sparc32/elf/start.c: Removed. * sysdeps/unix/sysv/linux/sparc/sparc32/init-first.h: Rewrite in assembly based on the sparc64 version. * sysdeps/sparc/sparc32/fpu/bits/fenv.h: Duh. Use proper syntax for manipulating %fsr. * sysdeps/sparc/sparc32/fpu/fpu_control.h: Make IEEE conformance be the default. * elf/elf.h (HWCAP_SPARC_*): New definitions. * elf/rtld.c (_dl_hwcap): New variable. * sysdeps/generic/dl-sysdep.c (_dl_sysdep_start): Record AT_HWCAP. * sysdeps/unix/sysv/linux/sparc/sparc32/getpagesize.c: New file. Attempt to get hold of the page size based on what we might have been told at startup time in _dl_pagesize. This will be obsolete when I finish the kernel hooks for a proper sysconf(), stay tuned. Sparc 64 merge: * sysdeps/sparc/sparc64/dl-machine.h (ELF_FIXUP_RETURN_VALUE): New. Figure out the right thing to return based on the .plt format. * sysdeps/sparc/sparc64/fpu/fpu_control.h: Update comment. * sysdeps/unix/sysv/linux/sparc/sparc64/bits/types.h (__dev_t): Should have been 64-bits wide. * sysdeps/unix/sysv/linux/sparc/sparc64/init-first.h: sll->sllx, optimize for branch delay slot usage. 1997-08-22 Andreas Schwab <schwab@issan.informatik.uni-dortmund.de> * csu/Makefile ($(objpfx)crt%.o): Fix a missing *.so -> *.os change. 1997-08-20 Andreas Jaeger <aj@arthur.rhein-neckar.de> * math/libm-test.c (identities): Change epsilon. * sysdeps/i386/fpu/bits/mathinline.h: Correct arguments to fabs, fabsf, fabsl, __fabsl. * sysdeps/libm-i387/e_remainderl.S: Pop extra value from FPU stack. * sysdeps/libm-ieee754/s_csinhl.c: Include <fenv.h>.
This commit is contained in:
4
sysdeps/sparc/configure
vendored
4
sysdeps/sparc/configure
vendored
@@ -1,4 +0,0 @@
|
||||
# Local configure fragment for sysdeps/sparc.
|
||||
|
||||
# The assembler on SPARC needs the -fPIC flag even when it's assembler code.
|
||||
ASFLAGS_SO=-fPIC
|
||||
@@ -1,6 +0,0 @@
|
||||
sinclude(./aclocal.m4)dnl Autoconf lossage
|
||||
GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory.
|
||||
# Local configure fragment for sysdeps/sparc.
|
||||
|
||||
# The assembler on SPARC needs the -fPIC flag even when it's assembler code.
|
||||
ASFLAGS_SO=-fPIC
|
||||
@@ -62,3 +62,7 @@ typedef double double_t;
|
||||
#define INFINITY HUGE_VAL
|
||||
|
||||
#endif
|
||||
|
||||
/* The values returned by `ilogb' for 0 and NaN respectively. */
|
||||
#define FP_ILOGB0 0x80000001
|
||||
#define FP_ILOGBNAN 0x7fffffff
|
||||
|
||||
@@ -20,14 +20,16 @@
|
||||
#include <fenv.h>
|
||||
#include <math.h>
|
||||
|
||||
static void
|
||||
ignore_me(double foo)
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
feraiseexcept (int excepts)
|
||||
{
|
||||
static volatile double sink;
|
||||
static const struct {
|
||||
double zero, one, max, min, sixteen, pi;
|
||||
} c = {
|
||||
0.0, 1.0, DBL_MAX, DBL_MIN, 16.0, M_PI
|
||||
};
|
||||
|
||||
/* Raise exceptions represented by EXPECTS. But we must raise only
|
||||
one signal at a time. It is important the if the overflow/underflow
|
||||
exception and the inexact exception are given at the same time,
|
||||
@@ -37,30 +39,30 @@ feraiseexcept (int excepts)
|
||||
if ((FE_INVALID & excepts) != 0)
|
||||
{
|
||||
/* One example of a invalid operation is 0/0. */
|
||||
ignore_me (0.0 / 0.0);
|
||||
sink = c.zero / c.zero;
|
||||
}
|
||||
|
||||
/* Next: division by zero. */
|
||||
if ((FE_DIVBYZERO & excepts) != 0)
|
||||
{
|
||||
ignore_me (1.0 / 0.0);
|
||||
sink = c.one / c.zero;
|
||||
}
|
||||
|
||||
/* Next: overflow. */
|
||||
if ((FE_OVERFLOW & excepts) != 0)
|
||||
{
|
||||
ignore_me (LDBL_MAX * LDBL_MAX);
|
||||
sink = c.max * c.max;
|
||||
}
|
||||
|
||||
/* Next: underflow. */
|
||||
if ((FE_UNDERFLOW & excepts) != 0)
|
||||
{
|
||||
ignore_me (LDBL_MIN / 16.0);
|
||||
sink = c.min / c.sixteen;
|
||||
}
|
||||
|
||||
/* Last: inexact. */
|
||||
if ((FE_INEXACT & excepts) != 0)
|
||||
{
|
||||
ignore_me (1.0 / M_PI);
|
||||
sink = c.one / c.pi;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -37,7 +37,7 @@ divrem := sdiv udiv rem urem
|
||||
+divrem-S-rem := true
|
||||
+divrem-S-udiv := false
|
||||
+divrem-S-urem := false
|
||||
$(divrem:%=$(sysdep_dir)/sparc/%.S): $(sysdep_dir)/sparc/divrem.m4
|
||||
$(divrem:%=$(sysdep_dir)/sparc/sparc32/%.S): $(sysdep_dir)/sparc/sparc32/divrem.m4
|
||||
(echo "define(NAME,\`.$(+divrem-NAME)')\
|
||||
define(OP,\`$(+divrem-OP-$(+divrem-NAME))')\
|
||||
define(S,\`$(+divrem-S-$(+divrem-NAME))')\
|
||||
@@ -48,4 +48,4 @@ $(divrem:%=$(sysdep_dir)/sparc/%.S): $(sysdep_dir)/sparc/divrem.m4
|
||||
mv -f $@-tmp $@
|
||||
test ! -d CVS || cvs commit -m'Regenerated from $<' $@
|
||||
|
||||
sysdep-realclean := $(sysdep-realclean) $(divrem:%=sysdeps/sparc/%.S)
|
||||
sysdep-realclean := $(sysdep-realclean) $(divrem:%=sysdeps/sparc/sparc32/%.S)
|
||||
|
||||
@@ -22,38 +22,43 @@
|
||||
#include <bits/setjmp.h>
|
||||
#define ENV(reg) [%g1 + (reg * 4)]
|
||||
|
||||
ENTRY (__longjmp)
|
||||
ENTRY(__longjmp)
|
||||
/* Store our arguments in global registers so we can still
|
||||
use them while unwinding frames and their register windows. */
|
||||
mov %o0, %g1 /* ENV in %g1 */
|
||||
orcc %o1, %g0, %g6 /* VAL in %g6 */
|
||||
orcc %o1, %g0, %g2 /* VAL in %g2 */
|
||||
be,a 0f /* Branch if zero; else skip delay slot. */
|
||||
mov 1, %g6 /* Delay slot only hit if zero: VAL = 1. */
|
||||
mov 1, %g2 /* Delay slot only hit if zero: VAL = 1. */
|
||||
0:
|
||||
|
||||
/* Cache target FP in register %g7. */
|
||||
ld ENV (JB_FP), %g7
|
||||
/* Cache target FP in register %g3. */
|
||||
ld ENV(JB_FP), %g3
|
||||
|
||||
/* Now we will loop, unwinding the register windows up the stack
|
||||
until the restored %fp value matches the target value in %g7. */
|
||||
until the restored %fp value matches the target value in %g3. */
|
||||
|
||||
loop: cmp %fp, %g7 /* Have we reached the target frame? */
|
||||
bl,a loop /* Loop while current fp is below target. */
|
||||
LOC(loop):
|
||||
cmp %fp, %g3 /* Have we reached the target frame? */
|
||||
bl,a LOC(loop) /* Loop while current fp is below target. */
|
||||
restore /* Unwind register window in delay slot. */
|
||||
be,a found /* Better have hit it exactly. */
|
||||
ld ENV (JB_SP), %o0 /* Delay slot: extract target SP. */
|
||||
be,a LOC(found) /* Better have hit it exactly. */
|
||||
ld ENV(JB_SP), %o0 /* Delay slot: extract target SP. */
|
||||
|
||||
bogus: /* Get here only if the jmp_buf or stack is clobbered. */
|
||||
call C_SYMBOL_NAME (abort)
|
||||
nop
|
||||
LOC(bogus):
|
||||
/* Get here only if the jmp_buf or stack is clobbered. */
|
||||
call C_SYMBOL_NAME(abort)
|
||||
nop
|
||||
unimp 0
|
||||
|
||||
found: /* We have unwound register windows so %fp matches the target. */
|
||||
LOC(found):
|
||||
/* We have unwound register windows so %fp matches the target. */
|
||||
cmp %o0, %sp /* Check jmp_buf SP vs register window. */
|
||||
bge,a sp_ok /* Saved must not be deeper than register. */
|
||||
bge,a LOC(sp_ok) /* Saved must not be deeper than register. */
|
||||
mov %o0, %sp /* OK, install new SP. */
|
||||
b,a bogus /* Bogus, we lose. */
|
||||
b,a LOC(bogus) /* Bogus, we lose. */
|
||||
|
||||
sp_ok: ld ENV (JB_PC), %o0 /* Extract target return PC. */
|
||||
LOC(sp_ok):
|
||||
ld ENV(JB_PC), %o0 /* Extract target return PC. */
|
||||
jmp %o0 + 8 /* Return there. */
|
||||
mov %g6, %o0 /* Delay slot: set return value. */
|
||||
mov %g2, %o0 /* Delay slot: set return value. */
|
||||
|
||||
END(__longjmp)
|
||||
|
||||
@@ -1,20 +1,20 @@
|
||||
! SPARC __mpn_add_n -- Add two limb vectors of the same length > 0 and store
|
||||
! sum in a third limb vector.
|
||||
|
||||
! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
|
||||
|
||||
!
|
||||
! Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
|
||||
!
|
||||
! This file is part of the GNU MP Library.
|
||||
|
||||
!
|
||||
! The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
! it under the terms of the GNU Library General Public License as published by
|
||||
! the Free Software Foundation; either version 2 of the License, or (at your
|
||||
! option) any later version.
|
||||
|
||||
!
|
||||
! The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
! License for more details.
|
||||
|
||||
!
|
||||
! You should have received a copy of the GNU Library General Public License
|
||||
! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
|
||||
@@ -22,205 +22,217 @@
|
||||
|
||||
|
||||
! INPUT PARAMETERS
|
||||
#define res_ptr %o0
|
||||
#define s1_ptr %o1
|
||||
#define s2_ptr %o2
|
||||
#define size %o3
|
||||
#define RES_PTR %o0
|
||||
#define S1_PTR %o1
|
||||
#define S2_PTR %o2
|
||||
#define SIZE %o3
|
||||
|
||||
#include "sysdep.h"
|
||||
#include <sysdep.h>
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.global C_SYMBOL_NAME(__mpn_add_n)
|
||||
C_SYMBOL_NAME(__mpn_add_n):
|
||||
xor s2_ptr,res_ptr,%g1
|
||||
ENTRY(__mpn_add_n)
|
||||
xor S2_PTR,RES_PTR,%g1
|
||||
andcc %g1,4,%g0
|
||||
bne L1 ! branch if alignment differs
|
||||
nop
|
||||
bne LOC(1) ! branch if alignment differs
|
||||
nop
|
||||
! ** V1a **
|
||||
L0: andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0
|
||||
be L_v1 ! if no, branch
|
||||
nop
|
||||
/* Add least significant limb separately to align res_ptr and s2_ptr */
|
||||
ld [s1_ptr],%g4
|
||||
add s1_ptr,4,s1_ptr
|
||||
ld [s2_ptr],%g2
|
||||
add s2_ptr,4,s2_ptr
|
||||
add size,-1,size
|
||||
LOC(0): andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0
|
||||
be LOC(v1) ! if no, branch
|
||||
nop
|
||||
/* Add least significant limb separately to align RES_PTR and S2_PTR */
|
||||
ld [S1_PTR],%g4
|
||||
add S1_PTR,4,S1_PTR
|
||||
ld [S2_PTR],%g2
|
||||
add S2_PTR,4,S2_PTR
|
||||
add SIZE,-1,SIZE
|
||||
addcc %g4,%g2,%o4
|
||||
st %o4,[res_ptr]
|
||||
add res_ptr,4,res_ptr
|
||||
L_v1: addx %g0,%g0,%o4 ! save cy in register
|
||||
cmp size,2 ! if size < 2 ...
|
||||
bl Lend2 ! ... branch to tail code
|
||||
st %o4,[RES_PTR]
|
||||
add RES_PTR,4,RES_PTR
|
||||
LOC(v1):
|
||||
addx %g0,%g0,%o4 ! save cy in register
|
||||
cmp SIZE,2 ! if SIZE < 2 ...
|
||||
bl LOC(end2) ! ... branch to tail code
|
||||
subcc %g0,%o4,%g0 ! restore cy
|
||||
|
||||
ld [s1_ptr+0],%g4
|
||||
addcc size,-10,size
|
||||
ld [s1_ptr+4],%g1
|
||||
ldd [s2_ptr+0],%g2
|
||||
blt Lfin1
|
||||
ld [S1_PTR+0],%g4
|
||||
addcc SIZE,-10,SIZE
|
||||
ld [S1_PTR+4],%g1
|
||||
ldd [S2_PTR+0],%g2
|
||||
blt LOC(fin1)
|
||||
subcc %g0,%o4,%g0 ! restore cy
|
||||
/* Add blocks of 8 limbs until less than 8 limbs remain */
|
||||
Loop1: addxcc %g4,%g2,%o4
|
||||
ld [s1_ptr+8],%g4
|
||||
addxcc %g1,%g3,%o5
|
||||
ld [s1_ptr+12],%g1
|
||||
ldd [s2_ptr+8],%g2
|
||||
std %o4,[res_ptr+0]
|
||||
LOC(loop1):
|
||||
addxcc %g4,%g2,%o4
|
||||
ld [s1_ptr+16],%g4
|
||||
ld [S1_PTR+8],%g4
|
||||
addxcc %g1,%g3,%o5
|
||||
ld [s1_ptr+20],%g1
|
||||
ldd [s2_ptr+16],%g2
|
||||
std %o4,[res_ptr+8]
|
||||
ld [S1_PTR+12],%g1
|
||||
ldd [S2_PTR+8],%g2
|
||||
std %o4,[RES_PTR+0]
|
||||
addxcc %g4,%g2,%o4
|
||||
ld [s1_ptr+24],%g4
|
||||
ld [S1_PTR+16],%g4
|
||||
addxcc %g1,%g3,%o5
|
||||
ld [s1_ptr+28],%g1
|
||||
ldd [s2_ptr+24],%g2
|
||||
std %o4,[res_ptr+16]
|
||||
ld [S1_PTR+20],%g1
|
||||
ldd [S2_PTR+16],%g2
|
||||
std %o4,[RES_PTR+8]
|
||||
addxcc %g4,%g2,%o4
|
||||
ld [s1_ptr+32],%g4
|
||||
ld [S1_PTR+24],%g4
|
||||
addxcc %g1,%g3,%o5
|
||||
ld [s1_ptr+36],%g1
|
||||
ldd [s2_ptr+32],%g2
|
||||
std %o4,[res_ptr+24]
|
||||
ld [S1_PTR+28],%g1
|
||||
ldd [S2_PTR+24],%g2
|
||||
std %o4,[RES_PTR+16]
|
||||
addxcc %g4,%g2,%o4
|
||||
ld [S1_PTR+32],%g4
|
||||
addxcc %g1,%g3,%o5
|
||||
ld [S1_PTR+36],%g1
|
||||
ldd [S2_PTR+32],%g2
|
||||
std %o4,[RES_PTR+24]
|
||||
addx %g0,%g0,%o4 ! save cy in register
|
||||
addcc size,-8,size
|
||||
add s1_ptr,32,s1_ptr
|
||||
add s2_ptr,32,s2_ptr
|
||||
add res_ptr,32,res_ptr
|
||||
bge Loop1
|
||||
addcc SIZE,-8,SIZE
|
||||
add S1_PTR,32,S1_PTR
|
||||
add S2_PTR,32,S2_PTR
|
||||
add RES_PTR,32,RES_PTR
|
||||
bge LOC(loop1)
|
||||
subcc %g0,%o4,%g0 ! restore cy
|
||||
|
||||
Lfin1: addcc size,8-2,size
|
||||
blt Lend1
|
||||
LOC(fin1):
|
||||
addcc SIZE,8-2,SIZE
|
||||
blt LOC(end1)
|
||||
subcc %g0,%o4,%g0 ! restore cy
|
||||
/* Add blocks of 2 limbs until less than 2 limbs remain */
|
||||
Loope1: addxcc %g4,%g2,%o4
|
||||
ld [s1_ptr+8],%g4
|
||||
LOC(loope1):
|
||||
addxcc %g4,%g2,%o4
|
||||
ld [S1_PTR+8],%g4
|
||||
addxcc %g1,%g3,%o5
|
||||
ld [s1_ptr+12],%g1
|
||||
ldd [s2_ptr+8],%g2
|
||||
std %o4,[res_ptr+0]
|
||||
ld [S1_PTR+12],%g1
|
||||
ldd [S2_PTR+8],%g2
|
||||
std %o4,[RES_PTR+0]
|
||||
addx %g0,%g0,%o4 ! save cy in register
|
||||
addcc size,-2,size
|
||||
add s1_ptr,8,s1_ptr
|
||||
add s2_ptr,8,s2_ptr
|
||||
add res_ptr,8,res_ptr
|
||||
bge Loope1
|
||||
addcc SIZE,-2,SIZE
|
||||
add S1_PTR,8,S1_PTR
|
||||
add S2_PTR,8,S2_PTR
|
||||
add RES_PTR,8,RES_PTR
|
||||
bge LOC(loope1)
|
||||
subcc %g0,%o4,%g0 ! restore cy
|
||||
Lend1: addxcc %g4,%g2,%o4
|
||||
LOC(end1):
|
||||
addxcc %g4,%g2,%o4
|
||||
addxcc %g1,%g3,%o5
|
||||
std %o4,[res_ptr+0]
|
||||
std %o4,[RES_PTR+0]
|
||||
addx %g0,%g0,%o4 ! save cy in register
|
||||
|
||||
andcc size,1,%g0
|
||||
be Lret1
|
||||
andcc SIZE,1,%g0
|
||||
be LOC(ret1)
|
||||
subcc %g0,%o4,%g0 ! restore cy
|
||||
/* Add last limb */
|
||||
ld [s1_ptr+8],%g4
|
||||
ld [s2_ptr+8],%g2
|
||||
ld [S1_PTR+8],%g4
|
||||
ld [S2_PTR+8],%g2
|
||||
addxcc %g4,%g2,%o4
|
||||
st %o4,[res_ptr+8]
|
||||
st %o4,[RES_PTR+8]
|
||||
|
||||
Lret1: retl
|
||||
LOC(ret1):
|
||||
retl
|
||||
addx %g0,%g0,%o0 ! return carry-out from most sign. limb
|
||||
|
||||
L1: xor s1_ptr,res_ptr,%g1
|
||||
LOC(1): xor S1_PTR,RES_PTR,%g1
|
||||
andcc %g1,4,%g0
|
||||
bne L2
|
||||
bne LOC(2)
|
||||
nop
|
||||
! ** V1b **
|
||||
mov s2_ptr,%g1
|
||||
mov s1_ptr,s2_ptr
|
||||
b L0
|
||||
mov %g1,s1_ptr
|
||||
mov S2_PTR,%g1
|
||||
mov S1_PTR,S2_PTR
|
||||
b LOC(0)
|
||||
mov %g1,S1_PTR
|
||||
|
||||
! ** V2 **
|
||||
/* If we come here, the alignment of s1_ptr and res_ptr as well as the
|
||||
alignment of s2_ptr and res_ptr differ. Since there are only two ways
|
||||
/* If we come here, the alignment of S1_PTR and RES_PTR as well as the
|
||||
alignment of S2_PTR and RES_PTR differ. Since there are only two ways
|
||||
things can be aligned (that we care about) we now know that the alignment
|
||||
of s1_ptr and s2_ptr are the same. */
|
||||
of S1_PTR and S2_PTR are the same. */
|
||||
|
||||
L2: cmp size,1
|
||||
be Ljone
|
||||
LOC(2): cmp SIZE,1
|
||||
be LOC(jone)
|
||||
nop
|
||||
andcc s1_ptr,4,%g0 ! s1_ptr unaligned? Side effect: cy=0
|
||||
be L_v2 ! if no, branch
|
||||
andcc S1_PTR,4,%g0 ! S1_PTR unaligned? Side effect: cy=0
|
||||
be LOC(v2) ! if no, branch
|
||||
nop
|
||||
/* Add least significant limb separately to align s1_ptr and s2_ptr */
|
||||
ld [s1_ptr],%g4
|
||||
add s1_ptr,4,s1_ptr
|
||||
ld [s2_ptr],%g2
|
||||
add s2_ptr,4,s2_ptr
|
||||
add size,-1,size
|
||||
/* Add least significant limb separately to align S1_PTR and S2_PTR */
|
||||
ld [S1_PTR],%g4
|
||||
add S1_PTR,4,S1_PTR
|
||||
ld [S2_PTR],%g2
|
||||
add S2_PTR,4,S2_PTR
|
||||
add SIZE,-1,SIZE
|
||||
addcc %g4,%g2,%o4
|
||||
st %o4,[res_ptr]
|
||||
add res_ptr,4,res_ptr
|
||||
st %o4,[RES_PTR]
|
||||
add RES_PTR,4,RES_PTR
|
||||
|
||||
L_v2: addx %g0,%g0,%o4 ! save cy in register
|
||||
addcc size,-8,size
|
||||
blt Lfin2
|
||||
LOC(v2):
|
||||
addx %g0,%g0,%o4 ! save cy in register
|
||||
addcc SIZE,-8,SIZE
|
||||
blt LOC(fin2)
|
||||
subcc %g0,%o4,%g0 ! restore cy
|
||||
/* Add blocks of 8 limbs until less than 8 limbs remain */
|
||||
Loop2: ldd [s1_ptr+0],%g2
|
||||
ldd [s2_ptr+0],%o4
|
||||
LOC(loop2):
|
||||
ldd [S1_PTR+0],%g2
|
||||
ldd [S2_PTR+0],%o4
|
||||
addxcc %g2,%o4,%g2
|
||||
st %g2,[res_ptr+0]
|
||||
st %g2,[RES_PTR+0]
|
||||
addxcc %g3,%o5,%g3
|
||||
st %g3,[res_ptr+4]
|
||||
ldd [s1_ptr+8],%g2
|
||||
ldd [s2_ptr+8],%o4
|
||||
st %g3,[RES_PTR+4]
|
||||
ldd [S1_PTR+8],%g2
|
||||
ldd [S2_PTR+8],%o4
|
||||
addxcc %g2,%o4,%g2
|
||||
st %g2,[res_ptr+8]
|
||||
st %g2,[RES_PTR+8]
|
||||
addxcc %g3,%o5,%g3
|
||||
st %g3,[res_ptr+12]
|
||||
ldd [s1_ptr+16],%g2
|
||||
ldd [s2_ptr+16],%o4
|
||||
st %g3,[RES_PTR+12]
|
||||
ldd [S1_PTR+16],%g2
|
||||
ldd [S2_PTR+16],%o4
|
||||
addxcc %g2,%o4,%g2
|
||||
st %g2,[res_ptr+16]
|
||||
st %g2,[RES_PTR+16]
|
||||
addxcc %g3,%o5,%g3
|
||||
st %g3,[res_ptr+20]
|
||||
ldd [s1_ptr+24],%g2
|
||||
ldd [s2_ptr+24],%o4
|
||||
st %g3,[RES_PTR+20]
|
||||
ldd [S1_PTR+24],%g2
|
||||
ldd [S2_PTR+24],%o4
|
||||
addxcc %g2,%o4,%g2
|
||||
st %g2,[res_ptr+24]
|
||||
st %g2,[RES_PTR+24]
|
||||
addxcc %g3,%o5,%g3
|
||||
st %g3,[res_ptr+28]
|
||||
st %g3,[RES_PTR+28]
|
||||
addx %g0,%g0,%o4 ! save cy in register
|
||||
addcc size,-8,size
|
||||
add s1_ptr,32,s1_ptr
|
||||
add s2_ptr,32,s2_ptr
|
||||
add res_ptr,32,res_ptr
|
||||
bge Loop2
|
||||
addcc SIZE,-8,SIZE
|
||||
add S1_PTR,32,S1_PTR
|
||||
add S2_PTR,32,S2_PTR
|
||||
add RES_PTR,32,RES_PTR
|
||||
bge LOC(loop2)
|
||||
subcc %g0,%o4,%g0 ! restore cy
|
||||
|
||||
Lfin2: addcc size,8-2,size
|
||||
blt Lend2
|
||||
LOC(fin2):
|
||||
addcc SIZE,8-2,SIZE
|
||||
blt LOC(end2)
|
||||
subcc %g0,%o4,%g0 ! restore cy
|
||||
Loope2: ldd [s1_ptr+0],%g2
|
||||
ldd [s2_ptr+0],%o4
|
||||
LOC(loope2):
|
||||
ldd [S1_PTR+0],%g2
|
||||
ldd [S2_PTR+0],%o4
|
||||
addxcc %g2,%o4,%g2
|
||||
st %g2,[res_ptr+0]
|
||||
st %g2,[RES_PTR+0]
|
||||
addxcc %g3,%o5,%g3
|
||||
st %g3,[res_ptr+4]
|
||||
st %g3,[RES_PTR+4]
|
||||
addx %g0,%g0,%o4 ! save cy in register
|
||||
addcc size,-2,size
|
||||
add s1_ptr,8,s1_ptr
|
||||
add s2_ptr,8,s2_ptr
|
||||
add res_ptr,8,res_ptr
|
||||
bge Loope2
|
||||
addcc SIZE,-2,SIZE
|
||||
add S1_PTR,8,S1_PTR
|
||||
add S2_PTR,8,S2_PTR
|
||||
add RES_PTR,8,RES_PTR
|
||||
bge LOC(loope2)
|
||||
subcc %g0,%o4,%g0 ! restore cy
|
||||
Lend2: andcc size,1,%g0
|
||||
be Lret2
|
||||
LOC(end2):
|
||||
andcc SIZE,1,%g0
|
||||
be LOC(ret2)
|
||||
subcc %g0,%o4,%g0 ! restore cy
|
||||
/* Add last limb */
|
||||
Ljone: ld [s1_ptr],%g4
|
||||
ld [s2_ptr],%g2
|
||||
LOC(jone):
|
||||
ld [S1_PTR],%g4
|
||||
ld [S2_PTR],%g2
|
||||
addxcc %g4,%g2,%o4
|
||||
st %o4,[res_ptr]
|
||||
st %o4,[RES_PTR]
|
||||
|
||||
Lret2: retl
|
||||
LOC(ret2):
|
||||
retl
|
||||
addx %g0,%g0,%o0 ! return carry-out from most sign. limb
|
||||
|
||||
END(__mpn_add_n)
|
||||
|
||||
@@ -1,20 +1,20 @@
|
||||
! SPARC __mpn_addmul_1 -- Multiply a limb vector with a limb and add
|
||||
! the result to a second limb vector.
|
||||
|
||||
! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
|
||||
|
||||
!
|
||||
! Copyright (C) 1992, 1993, 1994, 1997 Free Software Foundation, Inc.
|
||||
!
|
||||
! This file is part of the GNU MP Library.
|
||||
|
||||
!
|
||||
! The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
! it under the terms of the GNU Library General Public License as published by
|
||||
! the Free Software Foundation; either version 2 of the License, or (at your
|
||||
! option) any later version.
|
||||
|
||||
!
|
||||
! The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
! License for more details.
|
||||
|
||||
!
|
||||
! You should have received a copy of the GNU Library General Public License
|
||||
! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
|
||||
@@ -22,17 +22,14 @@
|
||||
|
||||
|
||||
! INPUT PARAMETERS
|
||||
! res_ptr o0
|
||||
! s1_ptr o1
|
||||
! size o2
|
||||
! s2_limb o3
|
||||
! RES_PTR o0
|
||||
! S1_PTR o1
|
||||
! SIZE o2
|
||||
! S2_LIMB o3
|
||||
|
||||
#include "sysdep.h"
|
||||
#include <sysdep.h>
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.global C_SYMBOL_NAME(__mpn_addmul_1)
|
||||
C_SYMBOL_NAME(__mpn_addmul_1):
|
||||
ENTRY(__mpn_addmul_1)
|
||||
! Make S1_PTR and RES_PTR point at the end of their blocks
|
||||
! and put (- 4 x SIZE) in index/loop counter.
|
||||
sll %o2,2,%o2
|
||||
@@ -41,19 +38,19 @@ C_SYMBOL_NAME(__mpn_addmul_1):
|
||||
sub %g0,%o2,%o2
|
||||
|
||||
cmp %o3,0xfff
|
||||
bgu Large
|
||||
bgu LOC(large)
|
||||
nop
|
||||
|
||||
ld [%o1+%o2],%o5
|
||||
mov 0,%o0
|
||||
b L0
|
||||
b LOC(0)
|
||||
add %o4,-4,%o4
|
||||
Loop0:
|
||||
LOC(loop0):
|
||||
addcc %o5,%g1,%g1
|
||||
ld [%o1+%o2],%o5
|
||||
addx %o0,%g0,%o0
|
||||
st %g1,[%o4+%o2]
|
||||
L0: wr %g0,%o3,%y
|
||||
LOC(0): wr %g0,%o3,%y
|
||||
sra %o5,31,%g2
|
||||
and %o3,%g2,%g2
|
||||
andcc %g1,0,%g1
|
||||
@@ -79,7 +76,7 @@ L0: wr %g0,%o3,%y
|
||||
addcc %g1,%o0,%g1
|
||||
addx %g2,%g4,%o0 ! add sign-compensation and cy to hi limb
|
||||
addcc %o2,4,%o2 ! loop counter
|
||||
bne Loop0
|
||||
bne LOC(loop0)
|
||||
ld [%o4+%o2],%o5
|
||||
|
||||
addcc %o5,%g1,%g1
|
||||
@@ -88,17 +85,18 @@ L0: wr %g0,%o3,%y
|
||||
st %g1,[%o4+%o2]
|
||||
|
||||
|
||||
Large: ld [%o1+%o2],%o5
|
||||
LOC(large):
|
||||
ld [%o1+%o2],%o5
|
||||
mov 0,%o0
|
||||
sra %o3,31,%g4 ! g4 = mask of ones iff S2_LIMB < 0
|
||||
b L1
|
||||
b LOC(1)
|
||||
add %o4,-4,%o4
|
||||
Loop:
|
||||
LOC(loop):
|
||||
addcc %o5,%g3,%g3
|
||||
ld [%o1+%o2],%o5
|
||||
addx %o0,%g0,%o0
|
||||
st %g3,[%o4+%o2]
|
||||
L1: wr %g0,%o5,%y
|
||||
LOC(1): wr %g0,%o5,%y
|
||||
and %o5,%g4,%g2
|
||||
andcc %g0,%g0,%g1
|
||||
mulscc %g1,%o3,%g1
|
||||
@@ -138,10 +136,12 @@ L1: wr %g0,%o5,%y
|
||||
addcc %g3,%o0,%g3
|
||||
addx %g2,%g1,%o0
|
||||
addcc %o2,4,%o2
|
||||
bne Loop
|
||||
bne LOC(loop)
|
||||
ld [%o4+%o2],%o5
|
||||
|
||||
addcc %o5,%g3,%g3
|
||||
addx %o0,%g0,%o0
|
||||
retl
|
||||
st %g3,[%o4+%o2]
|
||||
|
||||
END(__mpn_addmul_1)
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
#include "sysdep.h"
|
||||
#include <sysdep.h>
|
||||
|
||||
/* Code produced by Sun's C compiler calls this function with two extra
|
||||
arguments which it makes relocatable symbols but seem always to be
|
||||
@@ -30,3 +30,4 @@ ENTRY (__builtin_alloca)
|
||||
sub %sp, %o0, %sp /* Push some stack space. */
|
||||
retl /* Return; the returned buffer leaves 96 */
|
||||
add %sp, 96, %o0 /* bytes of register save area at the top. */
|
||||
END (__builtin_alloca)
|
||||
|
||||
@@ -1,40 +1 @@
|
||||
/* BSD `_setjmp' entry point to `sigsetjmp (..., 0)'. Sparc version.
|
||||
Copyright (C) 1994, 1997 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Library General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Library General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public
|
||||
License along with the GNU C Library; see the file COPYING.LIB. If not,
|
||||
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
ENTRY (_setjmp)
|
||||
|
||||
#ifdef PIC
|
||||
save %sp, -64, %sp
|
||||
1: call 2f
|
||||
sethi %hi(_GLOBAL_OFFSET_TABLE_-(1b-.)), %g1
|
||||
2: or %l7, %lo(_GLOBAL_OFFSET_TABLE_-(1b-.)), %g1
|
||||
add %g1, %o7, %g1
|
||||
sethi %hi(C_SYMBOL_NAME(__sigsetjmp)), %g2
|
||||
restore
|
||||
or %g2, %lo(C_SYMBOL_NAME(__sigsetjmp)), %g2
|
||||
ld [%g1+%g2], %g1
|
||||
#else
|
||||
sethi %hi(C_SYMBOL_NAME(__sigsetjmp)), %g1
|
||||
or %g1, %lo(C_SYMBOL_NAME(__sigsetjmp)), %g1
|
||||
#endif
|
||||
|
||||
jmp %g1
|
||||
mov %g0, %o1 /* Pass second argument of zero. */
|
||||
/* _setjmp is in setjmp.S */
|
||||
|
||||
@@ -1,40 +1 @@
|
||||
/* BSD `setjmp' entry point to `sigsetjmp (..., 1)'. Sparc version.
|
||||
Copyright (C) 1994, 1997 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Library General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Library General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public
|
||||
License along with the GNU C Library; see the file COPYING.LIB. If not,
|
||||
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
ENTRY (setjmp)
|
||||
|
||||
#ifdef PIC
|
||||
save %sp, -64, %sp
|
||||
1: call 2f
|
||||
sethi %hi(_GLOBAL_OFFSET_TABLE_-(1b-.)), %g1
|
||||
2: or %l7, %lo(_GLOBAL_OFFSET_TABLE_-(1b-.)), %g1
|
||||
add %g1, %o7, %g1
|
||||
sethi %hi(C_SYMBOL_NAME(__sigsetjmp)), %g2
|
||||
restore
|
||||
or %g2, %lo(C_SYMBOL_NAME(__sigsetjmp)), %g2
|
||||
ld [%g1+%g2], %g1
|
||||
#else
|
||||
sethi %hi(C_SYMBOL_NAME(__sigsetjmp)), %g1
|
||||
or %g1, %lo(C_SYMBOL_NAME(__sigsetjmp)), %g1
|
||||
#endif
|
||||
|
||||
jmp %g1
|
||||
mov 1, %o1 /* Pass second argument of one. */
|
||||
/* setjmp is in setjmp.S */
|
||||
|
||||
@@ -47,8 +47,8 @@ define(V, `%o5')dnl
|
||||
dnl
|
||||
dnl m4 reminder: ifelse(a,b,c,d) => if a is b, then c, else d
|
||||
define(T, `%g1')dnl
|
||||
define(SC, `%g7')dnl
|
||||
ifelse(S, `true', `define(SIGN, `%g6')')dnl
|
||||
define(SC, `%g2')dnl
|
||||
ifelse(S, `true', `define(SIGN, `%g3')')dnl
|
||||
|
||||
dnl
|
||||
dnl This is the recursive definition for developing quotient digits.
|
||||
@@ -65,7 +65,7 @@ dnl modified to reflect the output R.
|
||||
dnl
|
||||
define(DEVELOP_QUOTIENT_BITS,
|
||||
` ! depth $1, accumulated bits $2
|
||||
bl L.$1.eval(2**N+$2)
|
||||
bl LOC($1.eval(2**N+$2))
|
||||
srl V,1,V
|
||||
! remainder is positive
|
||||
subcc R,V,R
|
||||
@@ -73,7 +73,7 @@ define(DEVELOP_QUOTIENT_BITS,
|
||||
` b 9f
|
||||
add Q, ($2*2+1), Q
|
||||
', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2+1)')')
|
||||
L.$1.eval(2**N+$2):
|
||||
LOC($1.eval(2**N+$2)):
|
||||
! remainder is negative
|
||||
addcc R,V,R
|
||||
ifelse($1, N,
|
||||
@@ -82,18 +82,10 @@ L.$1.eval(2**N+$2):
|
||||
', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2-1)')')
|
||||
ifelse($1, 1, `9:')')dnl
|
||||
|
||||
#include "sysdep.h"
|
||||
#ifdef __linux__
|
||||
#include <asm/traps.h>
|
||||
#else
|
||||
#ifdef __svr4__
|
||||
#include <sysdep.h>
|
||||
#include <sys/trap.h>
|
||||
#else
|
||||
#include <machine/trap.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
FUNC(NAME)
|
||||
ENTRY(NAME)
|
||||
ifelse(S, `true',
|
||||
` ! compute sign of result; if neither is negative, no problem
|
||||
orcc divisor, dividend, %g0 ! either negative?
|
||||
@@ -124,11 +116,11 @@ ifelse(OP, `div',
|
||||
|
||||
1:
|
||||
cmp R, V ! if divisor exceeds dividend, done
|
||||
blu Lgot_result ! (and algorithm fails otherwise)
|
||||
blu LOC(got_result) ! (and algorithm fails otherwise)
|
||||
clr Q
|
||||
sethi %hi(1 << (WORDSIZE - TOPBITS - 1)), T
|
||||
cmp R, T
|
||||
blu Lnot_really_big
|
||||
blu LOC(not_really_big)
|
||||
clr ITER
|
||||
|
||||
! `Here the dividend is >= 2**(31-N) or so. We must be careful here,
|
||||
@@ -146,7 +138,7 @@ ifelse(OP, `div',
|
||||
|
||||
! Now compute SC.
|
||||
2: addcc V, V, V
|
||||
bcc Lnot_too_big
|
||||
bcc LOC(not_too_big)
|
||||
add SC, 1, SC
|
||||
|
||||
! We get here if the divisor overflowed while shifting.
|
||||
@@ -155,14 +147,14 @@ ifelse(OP, `div',
|
||||
sll T, TOPBITS, T ! high order bit
|
||||
srl V, 1, V ! rest of V
|
||||
add V, T, V
|
||||
b Ldo_single_div
|
||||
b LOC(do_single_div)
|
||||
sub SC, 1, SC
|
||||
|
||||
Lnot_too_big:
|
||||
LOC(not_too_big):
|
||||
3: cmp V, R
|
||||
blu 2b
|
||||
nop
|
||||
be Ldo_single_div
|
||||
be LOC(do_single_div)
|
||||
nop
|
||||
/* NB: these are commented out in the V8-Sparc manual as well */
|
||||
/* (I do not understand this) */
|
||||
@@ -177,15 +169,15 @@ ifelse(OP, `div',
|
||||
! order bit set in the first step, just falling into the regular
|
||||
! division loop will mess up the first time around.
|
||||
! So we unroll slightly...
|
||||
Ldo_single_div:
|
||||
LOC(do_single_div):
|
||||
subcc SC, 1, SC
|
||||
bl Lend_regular_divide
|
||||
bl LOC(end_regular_divide)
|
||||
nop
|
||||
sub R, V, R
|
||||
mov 1, Q
|
||||
b Lend_single_divloop
|
||||
b LOC(end_single_divloop)
|
||||
nop
|
||||
Lsingle_divloop:
|
||||
LOC(single_divloop):
|
||||
sll Q, 1, Q
|
||||
bl 1f
|
||||
srl V, 1, V
|
||||
@@ -197,37 +189,37 @@ ifelse(OP, `div',
|
||||
add R, V, R
|
||||
sub Q, 1, Q
|
||||
2:
|
||||
Lend_single_divloop:
|
||||
LOC(end_single_divloop):
|
||||
subcc SC, 1, SC
|
||||
bge Lsingle_divloop
|
||||
bge LOC(single_divloop)
|
||||
tst R
|
||||
b,a Lend_regular_divide
|
||||
b,a LOC(end_regular_divide)
|
||||
|
||||
Lnot_really_big:
|
||||
LOC(not_really_big):
|
||||
1:
|
||||
sll V, N, V
|
||||
cmp V, R
|
||||
bleu 1b
|
||||
addcc ITER, 1, ITER
|
||||
be Lgot_result
|
||||
be LOC(got_result)
|
||||
sub ITER, 1, ITER
|
||||
|
||||
tst R ! set up for initial iteration
|
||||
Ldivloop:
|
||||
LOC(divloop):
|
||||
sll Q, N, Q
|
||||
DEVELOP_QUOTIENT_BITS(1, 0)
|
||||
Lend_regular_divide:
|
||||
LOC(end_regular_divide):
|
||||
subcc ITER, 1, ITER
|
||||
bge Ldivloop
|
||||
bge LOC(divloop)
|
||||
tst R
|
||||
bl,a Lgot_result
|
||||
bl,a LOC(got_result)
|
||||
! non-restoring fixup here (one instruction only!)
|
||||
ifelse(OP, `div',
|
||||
` sub Q, 1, Q
|
||||
', ` add R, divisor, R
|
||||
')
|
||||
|
||||
Lgot_result:
|
||||
LOC(got_result):
|
||||
ifelse(S, `true',
|
||||
` ! check to see if answer should be < 0
|
||||
tst SIGN
|
||||
@@ -236,3 +228,5 @@ ifelse(S, `true',
|
||||
1:')
|
||||
retl
|
||||
ifelse(OP, `div', `mov Q, %o0', `mov R, %o0')
|
||||
|
||||
END(NAME)
|
||||
|
||||
@@ -30,7 +30,7 @@
|
||||
#define OPCODE_CALL 0x40000000 /* call ?; add PC-rel word address */
|
||||
#define OPCODE_SETHI_G1 0x03000000 /* sethi ?, %g1; add value>>10 */
|
||||
#define OPCODE_JMP_G1 0x81c06000 /* jmp %g1+?; add lo 10 bits of value */
|
||||
#define OPCODE_SAVE_SP64 0x9de3bfc0 /* save %sp, -64, %sp */
|
||||
#define OPCODE_SAVE_SP 0x9de3bfa8 /* save %sp, -(16+6)*4, %sp */
|
||||
|
||||
|
||||
/* Return nonzero iff E_MACHINE is compatible with the running host. */
|
||||
@@ -55,7 +55,7 @@ elf_machine_dynamic (void)
|
||||
static inline Elf32_Addr
|
||||
elf_machine_load_address (void)
|
||||
{
|
||||
register Elf32_Addr pc __asm("%o7"), got;
|
||||
register Elf32_Addr pc __asm("%o7"), pic __asm("%l7"), got;
|
||||
|
||||
/* Utilize the fact that a local .got entry will be partially
|
||||
initialized at startup awaiting its RELATIVE fixup. */
|
||||
@@ -64,50 +64,187 @@ elf_machine_load_address (void)
|
||||
".Load_address:\n\t"
|
||||
"call 1f\n\t"
|
||||
"or %1,%%lo(.Load_address),%1\n"
|
||||
"1:\tld [%%l7+%1],%1"
|
||||
: "=r"(pc), "=r"(got));
|
||||
"1:\tld [%2+%1],%1"
|
||||
: "=r"(pc), "=r"(got) : "r"(pic));
|
||||
|
||||
return pc - got;
|
||||
}
|
||||
|
||||
Elf32_Addr addr;
|
||||
/* Set up the loaded object described by L so its unrelocated PLT
|
||||
entries will jump to the on-demand fixup code in dl-runtime.c. */
|
||||
|
||||
asm (
|
||||
"add %%fp,0x44,%%o2\n\t" /* o2 = point to argc */
|
||||
"ld [%%o2 - 4],%%o0\n\t" /* o0 = load argc */
|
||||
"sll %%o0, 2, %%o0\n\t" /* o0 = argc * sizeof (int) */
|
||||
"add %%o2,%%o0,%%o2\n\t" /* o2 = skip over argv */
|
||||
"add %%o2,4,%%o2\n\t" /* skip over null after argv */
|
||||
static inline int
|
||||
elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
|
||||
{
|
||||
Elf32_Addr *plt;
|
||||
extern void _dl_runtime_resolve (Elf32_Word);
|
||||
|
||||
/* Now %o2 is pointing to env, skip over that as well. */
|
||||
"1:\n\t"
|
||||
"ld [%%o2],%%o0\n\t"
|
||||
"cmp %%o0,0\n\t"
|
||||
"bnz 1b\n\t"
|
||||
"add %%o2,4,%%o2\n\t"
|
||||
if (l->l_info[DT_JMPREL] && lazy)
|
||||
{
|
||||
/* The entries for functions in the PLT have not yet been filled in.
|
||||
Their initial contents will arrange when called to set the high 22
|
||||
bits of %g1 with an offset into the .rela.plt section and jump to
|
||||
the beginning of the PLT. */
|
||||
plt = (Elf32_Addr *) (l->l_addr + l->l_info[DT_PLTGOT]->d_un.d_ptr);
|
||||
|
||||
/* Note that above, we want to advance the NULL after envp so
|
||||
we always add 4. */
|
||||
/* The beginning of the PLT does:
|
||||
|
||||
/* Now, search for the AT_BASE property. */
|
||||
"2:\n\t"
|
||||
"ld [%%o2],%%o0\n\t"
|
||||
"cmp %%o0,0\n\t"
|
||||
"be,a 3f\n\t"
|
||||
"or %%g0,%%g0,%0\n\t"
|
||||
"cmp %%o0,7\n\t" /* AT_BASE = 7 */
|
||||
"be,a 3f\n\t"
|
||||
"ld [%%o2+4],%0\n\t"
|
||||
"b 2b\n\t"
|
||||
"add %%o2,8,%%o2\n\t"
|
||||
/* At this point %0 has the load address for the interpreter */
|
||||
"3:\n\t"
|
||||
: "=r" (addr)
|
||||
: /* no inputs */
|
||||
: "o0", "o2");
|
||||
return addr;
|
||||
save %sp, -64, %sp
|
||||
pltpc: call _dl_runtime_resolve
|
||||
nop
|
||||
.word MAP
|
||||
|
||||
This saves the register window containing the arguments, and the
|
||||
PC value (pltpc) implicitly saved in %o7 by the call points near the
|
||||
location where we store the link_map pointer for this object. */
|
||||
|
||||
plt[0] = OPCODE_SAVE_SP;
|
||||
/* Construct PC-relative word address. */
|
||||
plt[1] = OPCODE_CALL | (((Elf32_Addr) &_dl_runtime_resolve -
|
||||
(Elf32_Addr) &plt[1]) >> 2);
|
||||
plt[2] = OPCODE_NOP; /* Fill call delay slot. */
|
||||
plt[3] = (Elf32_Addr) l;
|
||||
}
|
||||
|
||||
return lazy;
|
||||
}
|
||||
|
||||
/* This code is used in dl-runtime.c to call the `fixup' function
|
||||
and then redirect to the address it returns. */
|
||||
#define ELF_MACHINE_RUNTIME_TRAMPOLINE asm ("\
|
||||
.globl _dl_runtime_resolve
|
||||
.type _dl_runtime_resolve, @function
|
||||
_dl_runtime_resolve:
|
||||
/* Set up the arguments to fixup --
|
||||
%o0 = link_map out of plt0
|
||||
%o1 = offset of reloc entry */
|
||||
ld [%o7 + 8], %o0
|
||||
srl %g1, 10, %o1
|
||||
call fixup
|
||||
sub %o1, 4*12, %o1
|
||||
jmp %o0
|
||||
restore
|
||||
.size _dl_runtime_resolve, . - _dl_runtime_resolve");
|
||||
|
||||
/* The address of the JMP_SLOT reloc is the .plt entry, thus we don't
|
||||
dereference the reloc's addr to get the final destination. Ideally
|
||||
there would be a generic way to return the value of the symbol from
|
||||
elf_machine_relplt, but as it is, the address of the .plt entry is
|
||||
good enough. */
|
||||
#define ELF_FIXUP_RETURN_VALUE(map, result) ((Elf32_Addr) &(result))
|
||||
|
||||
/* Nonzero iff TYPE should not be allowed to resolve to one of
|
||||
the main executable's symbols, as for a COPY reloc. */
|
||||
#define elf_machine_lookup_noexec_p(type) ((type) == R_SPARC_COPY)
|
||||
|
||||
/* Nonzero iff TYPE describes relocation of a PLT entry, so
|
||||
PLT entries should not be allowed to define the value. */
|
||||
#define elf_machine_lookup_noplt_p(type) ((type) == R_SPARC_JMP_SLOT)
|
||||
|
||||
/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */
|
||||
#define ELF_MACHINE_RELOC_NOPLT R_SPARC_JMP_SLOT
|
||||
|
||||
/* The SPARC never uses Elf32_Rel relocations. */
|
||||
#define ELF_MACHINE_NO_REL 1
|
||||
|
||||
/* The SPARC overlaps DT_RELA and DT_PLTREL. */
|
||||
#define ELF_MACHINE_PLTREL_OVERLAP 1
|
||||
|
||||
/* The PLT uses Elf32_Rela relocs. */
|
||||
#define elf_machine_relplt elf_machine_rela
|
||||
|
||||
/* Initial entry point code for the dynamic linker.
|
||||
The C function `_dl_start' is the real entry point;
|
||||
its return value is the user program's entry point. */
|
||||
|
||||
#define RTLD_START __asm__ ("\
|
||||
.text
|
||||
.globl _start
|
||||
.type _start,@function
|
||||
_start:
|
||||
/* Allocate space for functions to drop their arguments. */
|
||||
sub %sp, 6*4, %sp
|
||||
/* Pass pointer to argument block to _dl_start. */
|
||||
call _dl_start
|
||||
add %sp, 22*4, %o0
|
||||
/* FALTHRU */
|
||||
.globl _dl_start_user
|
||||
.type _dl_start_user,@function
|
||||
_dl_start_user:
|
||||
/* Load the PIC register. */
|
||||
1: call 2f
|
||||
sethi %hi(_GLOBAL_OFFSET_TABLE_-(1b-.)), %l7
|
||||
2: or %l7, %lo(_GLOBAL_OFFSET_TABLE_-(1b-.)), %l7
|
||||
add %l7, %o7, %l7
|
||||
/* Save the user entry point address in %l0 */
|
||||
mov %o0, %l0
|
||||
/* See if we were run as a command with the executable file name as an
|
||||
extra leading argument. If so, adjust the contents of the stack. */
|
||||
sethi %hi(_dl_skip_args), %g2
|
||||
or %g2, %lo(_dl_skip_args), %g2
|
||||
ld [%l7+%g2], %i0
|
||||
ld [%i0], %i0
|
||||
tst %i0
|
||||
beq 3f
|
||||
nop
|
||||
/* Find out how far to shift. */
|
||||
ld [%sp+22*4], %i1 /* load argc */
|
||||
sub %i1, %i0, %i1
|
||||
sll %i0, 2, %i2
|
||||
st %i1, [%sp+22*4]
|
||||
add %sp, 23*4, %i1
|
||||
add %i1, %i2, %i2
|
||||
/* Copy down argv */
|
||||
21: ld [%i2], %i3
|
||||
add %i2, 4, %i2
|
||||
tst %i3
|
||||
st %i3, [%i1]
|
||||
bne 21b
|
||||
add %i1, 4, %i1
|
||||
/* Copy down env */
|
||||
22: ld [%i2], %i3
|
||||
add %i2, 4, %i2
|
||||
tst %i3
|
||||
st %i3, [%i1]
|
||||
bne 22b
|
||||
add %i1, 4, %i1
|
||||
/* Copy down auxiliary table. */
|
||||
23: ld [%i2], %i3
|
||||
ld [%i2+4], %i4
|
||||
add %i2, 8, %i2
|
||||
tst %i3
|
||||
st %i3, [%i1]
|
||||
st %i4, [%i1+4]
|
||||
bne 23b
|
||||
add %i1, 8, %i1
|
||||
/* Load _dl_default_scope[2] to pass to _dl_init_next. */
|
||||
3: sethi %hi(_dl_default_scope), %g1
|
||||
or %g1, %lo(_dl_default_scope), %g1
|
||||
ld [%l7+%g1], %l1
|
||||
ld [%l1+2*4], %l1
|
||||
/* Call _dl_init_next to return the address of an initializer to run. */
|
||||
4: call _dl_init_next
|
||||
mov %l1, %o0
|
||||
tst %o0
|
||||
beq 5f
|
||||
nop
|
||||
jmpl %o0, %o7
|
||||
nop
|
||||
ba,a 4b
|
||||
/* Clear the startup flag. */
|
||||
5: sethi %hi(_dl_starting_up), %g1
|
||||
or %g1, %lo(_dl_starting_up), %g1
|
||||
ld [%l7+%g1], %g1
|
||||
st %g0, [%g1]
|
||||
/* Pass our finalizer function to the user in %g1. */
|
||||
sethi %hi(_dl_fini), %g1
|
||||
or %g1, %lo(_dl_fini), %g1
|
||||
ld [%l7+%g1], %g1
|
||||
/* Jump to the user's entry point and deallocate the extra stack we got. */
|
||||
jmp %l0
|
||||
add %sp, 6*4, %sp
|
||||
.size _dl_start_user,.-_dl_start_user");
|
||||
|
||||
#ifdef RESOLVE
|
||||
/* Perform the relocation specified by RELOC and SYM (which is fully resolved).
|
||||
MAP is the object containing the reloc. */
|
||||
@@ -117,7 +254,7 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
|
||||
const Elf32_Sym *sym, const struct r_found_version *version,
|
||||
Elf32_Addr *const reloc_addr)
|
||||
{
|
||||
Elf32_Addr loadbase;
|
||||
extern unsigned long _dl_hwcap;
|
||||
|
||||
if (ELF32_R_TYPE (reloc->r_info) == R_SPARC_RELATIVE)
|
||||
{
|
||||
@@ -144,6 +281,7 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
|
||||
switch (ELF32_R_TYPE (reloc->r_info))
|
||||
{
|
||||
case R_SPARC_COPY:
|
||||
#ifndef RTLD_BOOTSTRAP
|
||||
if (sym->st_size > refsym->st_size
|
||||
|| (_dl_verbose && sym->st_size < refsym->st_size))
|
||||
{
|
||||
@@ -159,14 +297,21 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
|
||||
}
|
||||
memcpy (reloc_addr, (void *) value, MIN (sym->st_size,
|
||||
refsym->st_size));
|
||||
#endif
|
||||
break;
|
||||
case R_SPARC_GLOB_DAT:
|
||||
case R_SPARC_32:
|
||||
*reloc_addr = value;
|
||||
break;
|
||||
case R_SPARC_JMP_SLOT:
|
||||
reloc_addr[1] = OPCODE_SETHI_G1 | (value >> 10);
|
||||
/* For thread safety, write the instructions from the bottom and
|
||||
flush before we overwrite the critical "b,a". */
|
||||
reloc_addr[2] = OPCODE_JMP_G1 | (value & 0x3ff);
|
||||
if (1 || (_dl_hwcap & 1)) /* HWCAP_SPARC_FLUSH */
|
||||
__asm __volatile ("flush %0+8" : : "r"(reloc_addr));
|
||||
reloc_addr[1] = OPCODE_SETHI_G1 | (value >> 10);
|
||||
if (1 || (_dl_hwcap & 1)) /* HWCAP_SPARC_FLUSH */
|
||||
__asm __volatile ("flush %0+4" : : "r"(reloc_addr));
|
||||
break;
|
||||
case R_SPARC_8:
|
||||
*(char *) reloc_addr = value;
|
||||
@@ -218,146 +363,3 @@ elf_machine_lazy_rel (struct link_map *map, const Elf32_Rela *reloc)
|
||||
}
|
||||
|
||||
#endif /* RESOLVE */
|
||||
|
||||
/* Nonzero iff TYPE should not be allowed to resolve to one of
|
||||
the main executable's symbols, as for a COPY reloc. */
|
||||
#define elf_machine_lookup_noexec_p(type) ((type) == R_SPARC_COPY)
|
||||
|
||||
/* Nonzero iff TYPE describes relocation of a PLT entry, so
|
||||
PLT entries should not be allowed to define the value. */
|
||||
#define elf_machine_lookup_noplt_p(type) ((type) == R_SPARC_JMP_SLOT)
|
||||
|
||||
/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */
|
||||
#define ELF_MACHINE_RELOC_NOPLT R_SPARC_JMP_SLOT
|
||||
|
||||
/* The SPARC never uses Elf32_Rel relocations. */
|
||||
#define ELF_MACHINE_NO_REL 1
|
||||
|
||||
/* The SPARC overlaps DT_RELA and DT_PLTREL. */
|
||||
#define ELF_MACHINE_PLTREL_OVERLAP 1
|
||||
|
||||
/* Set up the loaded object described by L so its unrelocated PLT
|
||||
entries will jump to the on-demand fixup code in dl-runtime.c. */
|
||||
|
||||
static inline int
|
||||
elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
|
||||
{
|
||||
Elf32_Addr *plt;
|
||||
extern void _dl_runtime_resolve (Elf32_Word);
|
||||
|
||||
if (l->l_info[DT_JMPREL] && lazy)
|
||||
{
|
||||
/* The entries for functions in the PLT have not yet been filled in.
|
||||
Their initial contents will arrange when called to set the high 22
|
||||
bits of %g1 with an offset into the .rela.plt section and jump to
|
||||
the beginning of the PLT. */
|
||||
plt = (Elf32_Addr *) (l->l_addr + l->l_info[DT_PLTGOT]->d_un.d_ptr);
|
||||
|
||||
/* The beginning of the PLT does:
|
||||
|
||||
save %sp, -64, %sp
|
||||
pltpc: call _dl_runtime_resolve
|
||||
nop
|
||||
.word MAP
|
||||
|
||||
This saves the register window containing the arguments, and the
|
||||
PC value (pltpc) implicitly saved in %o7 by the call points near the
|
||||
location where we store the link_map pointer for this object. */
|
||||
|
||||
plt[0] = OPCODE_SAVE_SP64; /* save %sp, -64, %sp */
|
||||
/* Construct PC-relative word address. */
|
||||
plt[1] = OPCODE_CALL | (((Elf32_Addr) &_dl_runtime_resolve -
|
||||
(Elf32_Addr) &plt[1]) >> 2);
|
||||
plt[2] = OPCODE_NOP; /* Fill call delay slot. */
|
||||
plt[3] = (Elf32_Addr *) l;
|
||||
}
|
||||
|
||||
return lazy;
|
||||
}
|
||||
|
||||
/* This code is used in dl-runtime.c to call the `fixup' function
|
||||
and then redirect to the address it returns. */
|
||||
#define ELF_MACHINE_RUNTIME_TRAMPOLINE asm ("\
|
||||
# Trampoline for _dl_runtime_resolver
|
||||
.globl _dl_runtime_resolve
|
||||
.type _dl_runtime_resolve, @function
|
||||
_dl_runtime_resolve:
|
||||
t 1
|
||||
#call %g0
|
||||
# Pass two args to fixup: the PLT address computed from the PC saved
|
||||
# in the PLT's call insn, and the reloc offset passed in %g1.
|
||||
#ld [%o7 + 8], %o1 | Second arg, loaded from PLTPC[2].
|
||||
#call fixup
|
||||
#shrl %g1, 22, %o0 | First arg, set in delay slot of call.
|
||||
# Jump to the real function.
|
||||
#jmpl %o0, %g0
|
||||
# In the delay slot of that jump, restore the register window
|
||||
# saved by the first insn of the PLT.
|
||||
#restore
|
||||
.size _dl_runtime_resolve, . - _dl_runtime_resolve
|
||||
");
|
||||
|
||||
/* The PLT uses Elf32_Rela relocs. */
|
||||
#define elf_machine_relplt elf_machine_rela
|
||||
|
||||
|
||||
/* Mask identifying addresses reserved for the user program,
|
||||
where the dynamic linker should not map anything. */
|
||||
#define ELF_MACHINE_USER_ADDRESS_MASK ???
|
||||
|
||||
/* Initial entry point code for the dynamic linker.
|
||||
The C function `_dl_start' is the real entry point;
|
||||
its return value is the user program's entry point. */
|
||||
|
||||
#define RTLD_START __asm__ ( \
|
||||
".text\n\
|
||||
.globl _start\n\
|
||||
.type _start,@function\n\
|
||||
_start:\n\
|
||||
/* Pass pointer to argument block to _dl_start. */\n\
|
||||
add %sp,64,%o0\n\
|
||||
call _dl_start\n\
|
||||
nop\n\
|
||||
\n\
|
||||
mov %o0,%l0\n\
|
||||
\n\
|
||||
2:\n\
|
||||
call 1f\n\
|
||||
nop\n\
|
||||
1:\n\
|
||||
sethi %hi(_GLOBAL_OFFSET_TABLE_-(2b-.)),%l2\n\
|
||||
sethi %hi(_dl_default_scope),%l3\n\
|
||||
or %l2,%lo(_GLOBAL_OFFSET_TABLE_-(2b-.)),%l2\n\
|
||||
or %l3,%lo(_dl_default_scope),%l3\n\
|
||||
add %o7,%l2,%l1\n\
|
||||
# %l1 has the GOT. %l3 has _dl_default_scope GOT offset\n\
|
||||
ld [%l1+%l3],%l4\n\
|
||||
# %l4 has pointer to _dl_default_scope. Now, load _dl_default_scope [2]\n\
|
||||
ld [%l4+8],%l4\n\
|
||||
# %l4 has _dl_default_scope [2]\n\
|
||||
# call _dl_init_next until it returns 0, pass _dl_default_scope [2]\n\
|
||||
3:\n\
|
||||
call _dl_init_next\n\
|
||||
mov %l4,%o0\n\
|
||||
cmp %o0,%g0\n\
|
||||
bz,a 4f\n\
|
||||
nop\n\
|
||||
call %o0\n\
|
||||
/* Pass pointer to argument block to this init function */\n\
|
||||
add %sp,64,%o0\n\
|
||||
b,a 3b\n\
|
||||
4:\n\
|
||||
# Clear the _dl_starting_up variable and pass _dl_fini in %g1 as per ELF ABI.\n\
|
||||
sethi %hi(_dl_starting_up),%l4\n\
|
||||
sethi %hi(_dl_fini),%l3\n\
|
||||
or %l4,%lo(_dl_starting_up),%l4\n\
|
||||
or %l3,%lo(_dl_fini),%l3\n\
|
||||
# clear _dl_starting_up\n\
|
||||
ld [%l1+%l4],%l5\n\
|
||||
st %g0,[%l5]\n\
|
||||
# load out fini function for atexit in %g1\n\
|
||||
ld [%l3+%l1],%g1\n\
|
||||
# jump to the user program entry point.\n\
|
||||
jmpl %l0,%g0\n\
|
||||
nop\n\
|
||||
");
|
||||
|
||||
@@ -10,11 +10,13 @@
|
||||
* This code optimizes short (less than 13-bit) multiplies.
|
||||
*/
|
||||
|
||||
#include "sysdep.h"
|
||||
#include <sysdep.h>
|
||||
|
||||
|
||||
ENTRY(.mul)
|
||||
mov %o0, %y ! multiplier -> Y
|
||||
andncc %o0, 0xfff, %g0 ! test bits 12..31
|
||||
be Lmul_shortway ! if zero, can do it the short way
|
||||
be LOC(mul_shortway) ! if zero, can do it the short way
|
||||
andcc %g0, %g0, %o4 ! zero the partial product and clear N and V
|
||||
|
||||
/*
|
||||
@@ -81,7 +83,7 @@ ENTRY(.mul)
|
||||
! and put upper half in place
|
||||
#endif
|
||||
|
||||
Lmul_shortway:
|
||||
LOC(mul_shortway):
|
||||
/*
|
||||
* Short multiply. 12 steps, followed by a final shift step.
|
||||
* The resulting bits are off by 12 and (32-12) = 20 bit positions,
|
||||
@@ -121,3 +123,5 @@ Lmul_shortway:
|
||||
or %o5, %o0, %o0 ! construct low part of result
|
||||
retl
|
||||
sra %o4, 20, %o1 ! ... and extract high part of result
|
||||
|
||||
END(.mul)
|
||||
|
||||
86
sysdeps/sparc/sparc32/elf/start.S
Normal file
86
sysdeps/sparc/sparc32/elf/start.S
Normal file
@@ -0,0 +1,86 @@
|
||||
/* Startup code for elf32-sparc
|
||||
Copyright (C) 1997 Free Software Foundation, Inc.
|
||||
Contributed by Richard Henderson <richard@gnu.ai.mit.edu>, 1997.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Library General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Library General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public
|
||||
License along with the GNU C Library; see the file COPYING.LIB. If not,
|
||||
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
|
||||
.section ".text"
|
||||
.align 4
|
||||
.global _start
|
||||
.type _start,#function
|
||||
_start:
|
||||
|
||||
/* Terminate the stack frame, and reserve space for functions to
|
||||
drop their arguments. */
|
||||
mov %g0, %fp
|
||||
sub %sp, 6*4, %sp
|
||||
|
||||
/* Save %g1. When starting a binary via the dynamic linker, %g1
|
||||
contains the address of the shared library termination function,
|
||||
which we will register below with atexit() to be called by exit().
|
||||
If we are statically linked, this will be NULL. */
|
||||
|
||||
/* Do essential libc initialization (sp points to argc, argv, and envp) */
|
||||
call __libc_init_first
|
||||
mov %g1, %l0
|
||||
|
||||
/* Now that we have the proper stack frame, register library termination
|
||||
function, if there is any: */
|
||||
|
||||
cmp %l0, 0
|
||||
beq 1f
|
||||
nop
|
||||
call atexit
|
||||
mov %l0, %o0
|
||||
1:
|
||||
|
||||
/* Extract the arguments and environment as encoded on the stack. The
|
||||
argument info starts after one register window (16 words) past the SP. */
|
||||
ld [%sp+22*4], %o0
|
||||
add %sp, 23*4, %o1
|
||||
sll %o0, 4, %o2
|
||||
add %o2, %o1, %o2
|
||||
sethi %hi(__environ), %g2
|
||||
add %o2, 4, %o2
|
||||
st %o2, [%g2+%lo(__environ)]
|
||||
|
||||
mov %o0, %l0 /* tuck them away */
|
||||
mov %o1, %l1
|
||||
|
||||
/* Call _init, the entry point to our own .init section. */
|
||||
call _init
|
||||
mov %o2, %l2
|
||||
|
||||
/* Register our .fini section with atexit. */
|
||||
sethi %hi(_fini), %o0
|
||||
call atexit
|
||||
add %o0, %lo(_fini), %o0
|
||||
|
||||
/* Call the user's main and exit with its return value. */
|
||||
mov %l0, %o0
|
||||
mov %l1, %o1
|
||||
call main
|
||||
mov %l2, %o2
|
||||
call exit
|
||||
nop
|
||||
|
||||
/* Die very horribly if exit returns. */
|
||||
unimp
|
||||
|
||||
.size _start,.-_start
|
||||
@@ -1,68 +0,0 @@
|
||||
/* Copyright (C) 1991, 1992, 1993, 1994, 1997 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Library General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Library General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public
|
||||
License along with the GNU C Library; see the file COPYING.LIB. If not,
|
||||
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
extern char **__environ;
|
||||
|
||||
extern void __libc_init_first __P ((int argc, char **argv, char **envp));
|
||||
extern int main __P ((int argc, char **argv, char **envp));
|
||||
|
||||
register long int sp asm("%sp"), fp asm("%fp");
|
||||
|
||||
void
|
||||
_start (void)
|
||||
{
|
||||
/* It is important that these be declared `register'.
|
||||
Otherwise, when compiled without optimization, they are put on the
|
||||
stack, which loses completely after we zero the FP. */
|
||||
register int argc;
|
||||
register char **argv, **envp;
|
||||
register long int g1 asm ("%g1");
|
||||
unsigned long int copy_g1 = g1;
|
||||
|
||||
/* Unwind the frame built when we entered the function. */
|
||||
asm("restore");
|
||||
if (copy_g1)
|
||||
atexit (copy_g1);
|
||||
|
||||
/* And clear the frame pointer. */
|
||||
fp = 0;
|
||||
|
||||
/* The argument info starts after one register
|
||||
window (64 bytes) past the SP. */
|
||||
argc = ((int *) sp)[16];
|
||||
argv = (char **) &((int *) sp)[17];
|
||||
envp = &argv[argc + 1];
|
||||
__environ = envp;
|
||||
|
||||
/* Allocate 24 bytes of stack space for the register save area. */
|
||||
sp -= 24;
|
||||
__libc_init_first (argc, argv, envp);
|
||||
#ifdef ELF_INIT_FINI
|
||||
{
|
||||
extern void _fini (void);
|
||||
_init ();
|
||||
atexit (_fini);
|
||||
}
|
||||
#endif
|
||||
exit (main (argc, argv, envp));
|
||||
}
|
||||
@@ -72,5 +72,5 @@ typedef unsigned int fenv_t;
|
||||
#endif
|
||||
|
||||
/* For internal use only: access the fp state register. */
|
||||
#define __fenv_stfsr(X) __asm__("stfsr %0" : "=m"(X))
|
||||
#define __fenv_ldfsr(X) __asm__ __volatile__("ldfsr %0" : : "m"(X))
|
||||
#define __fenv_stfsr(X) __asm__("st %%fsr,%0" : "=m"(X))
|
||||
#define __fenv_ldfsr(X) __asm__ __volatile__("ld %0,%%fsr" : : "m"(X))
|
||||
|
||||
@@ -41,13 +41,11 @@
|
||||
|
||||
/* Now two recommended cw */
|
||||
|
||||
/* Linux default:
|
||||
/* Linux and IEEE default:
|
||||
- extended precision
|
||||
- rounding to nearest
|
||||
- exceptions on overflow, zero divide and NaN */
|
||||
#define _FPU_DEFAULT 0x1e
|
||||
|
||||
/* IEEE: same as above, but exceptions */
|
||||
- no exceptions. */
|
||||
#define _FPU_DEFAULT 0x0
|
||||
#define _FPU_IEEE 0x0
|
||||
|
||||
/* Type of the control word. */
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
! sparc __mpn_lshift --
|
||||
|
||||
! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
|
||||
|
||||
! Sparc __mpn_lshift --
|
||||
!
|
||||
! Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
|
||||
!
|
||||
! This file is part of the GNU MP Library.
|
||||
|
||||
!
|
||||
! The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
! it under the terms of the GNU Library General Public License as published by
|
||||
! the Free Software Foundation; either version 2 of the License, or (at your
|
||||
! option) any later version.
|
||||
|
||||
!
|
||||
! The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
! License for more details.
|
||||
|
||||
!
|
||||
! You should have received a copy of the GNU Library General Public License
|
||||
! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
|
||||
@@ -21,17 +21,14 @@
|
||||
|
||||
|
||||
! INPUT PARAMETERS
|
||||
! res_ptr %o0
|
||||
! src_ptr %o1
|
||||
! size %o2
|
||||
! cnt %o3
|
||||
! RES_PTR %o0
|
||||
! SRC_PTR %o1
|
||||
! SIZE %o2
|
||||
! CNT %o3
|
||||
|
||||
#include "sysdep.h"
|
||||
#include <sysdep.h>
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.global C_SYMBOL_NAME(__mpn_lshift)
|
||||
C_SYMBOL_NAME(__mpn_lshift):
|
||||
ENTRY(__mpn_lshift)
|
||||
sll %o2,2,%g1
|
||||
add %o1,%g1,%o1 ! make %o1 point at end of src
|
||||
ld [%o1-4],%g2 ! load first limb
|
||||
@@ -40,12 +37,13 @@ C_SYMBOL_NAME(__mpn_lshift):
|
||||
add %o2,-1,%o2
|
||||
andcc %o2,4-1,%g4 ! number of limbs in first loop
|
||||
srl %g2,%o5,%g1 ! compute function result
|
||||
be L0 ! if multiple of 4 limbs, skip first loop
|
||||
be LOC(0) ! if multiple of 4 limbs, skip first loop
|
||||
st %g1,[%sp+80]
|
||||
|
||||
sub %o2,%g4,%o2 ! adjust count for main loop
|
||||
|
||||
Loop0: ld [%o1-8],%g3
|
||||
LOC(loop0):
|
||||
ld [%o1-8],%g3
|
||||
add %o0,-4,%o0
|
||||
add %o1,-4,%o1
|
||||
addcc %g4,-1,%g4
|
||||
@@ -53,14 +51,15 @@ Loop0: ld [%o1-8],%g3
|
||||
srl %g3,%o5,%g1
|
||||
mov %g3,%g2
|
||||
or %o4,%g1,%o4
|
||||
bne Loop0
|
||||
bne LOC(loop0)
|
||||
st %o4,[%o0+0]
|
||||
|
||||
L0: tst %o2
|
||||
be Lend
|
||||
LOC(0): tst %o2
|
||||
be LOC(end)
|
||||
nop
|
||||
|
||||
Loop: ld [%o1-8],%g3
|
||||
LOC(loop):
|
||||
ld [%o1-8],%g3
|
||||
add %o0,-16,%o0
|
||||
addcc %o2,-4,%o2
|
||||
sll %g2,%o3,%o4
|
||||
@@ -86,10 +85,13 @@ Loop: ld [%o1-8],%g3
|
||||
|
||||
add %o1,-16,%o1
|
||||
or %g4,%g1,%g4
|
||||
bne Loop
|
||||
bne LOC(loop)
|
||||
st %g4,[%o0+0]
|
||||
|
||||
Lend: sll %g2,%o3,%g2
|
||||
LOC(end):
|
||||
sll %g2,%o3,%g2
|
||||
st %g2,[%o0-4]
|
||||
retl
|
||||
ld [%sp+80],%o0
|
||||
|
||||
END(__mpn_lshift)
|
||||
|
||||
@@ -1,20 +1,20 @@
|
||||
! SPARC __mpn_mul_1 -- Multiply a limb vector with a limb and store
|
||||
! the result in a second limb vector.
|
||||
|
||||
! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
|
||||
|
||||
!
|
||||
! Copyright (C) 1992, 1993, 1994, 1997 Free Software Foundation, Inc.
|
||||
!
|
||||
! This file is part of the GNU MP Library.
|
||||
|
||||
!
|
||||
! The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
! it under the terms of the GNU Library General Public License as published by
|
||||
! the Free Software Foundation; either version 2 of the License, or (at your
|
||||
! option) any later version.
|
||||
|
||||
!
|
||||
! The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
! License for more details.
|
||||
|
||||
!
|
||||
! You should have received a copy of the GNU Library General Public License
|
||||
! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
|
||||
@@ -22,10 +22,10 @@
|
||||
|
||||
|
||||
! INPUT PARAMETERS
|
||||
! res_ptr o0
|
||||
! s1_ptr o1
|
||||
! size o2
|
||||
! s2_limb o3
|
||||
! RES_PTR o0
|
||||
! S1_PTR o1
|
||||
! SIZE o2
|
||||
! S2_LIMB o3
|
||||
|
||||
! ADD CODE FOR SMALL MULTIPLIERS!
|
||||
!1: ld
|
||||
@@ -89,12 +89,9 @@
|
||||
! sll a,29,y2
|
||||
! st x,
|
||||
|
||||
#include "sysdep.h"
|
||||
#include <sysdep.h>
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.global C_SYMBOL_NAME(__mpn_mul_1)
|
||||
C_SYMBOL_NAME(__mpn_mul_1):
|
||||
ENTRY(__mpn_mul_1)
|
||||
! Make S1_PTR and RES_PTR point at the end of their blocks
|
||||
! and put (- 4 x SIZE) in index/loop counter.
|
||||
sll %o2,2,%o2
|
||||
@@ -103,16 +100,16 @@ C_SYMBOL_NAME(__mpn_mul_1):
|
||||
sub %g0,%o2,%o2
|
||||
|
||||
cmp %o3,0xfff
|
||||
bgu Large
|
||||
bgu LOC(large)
|
||||
nop
|
||||
|
||||
ld [%o1+%o2],%o5
|
||||
mov 0,%o0
|
||||
b L0
|
||||
b LOC(0)
|
||||
add %o4,-4,%o4
|
||||
Loop0:
|
||||
LOC(loop0):
|
||||
st %g1,[%o4+%o2]
|
||||
L0: wr %g0,%o3,%y
|
||||
LOC(0): wr %g0,%o3,%y
|
||||
sra %o5,31,%g2
|
||||
and %o3,%g2,%g2
|
||||
andcc %g1,0,%g1
|
||||
@@ -138,21 +135,22 @@ L0: wr %g0,%o3,%y
|
||||
addcc %g1,%o0,%g1
|
||||
addx %g2,%g4,%o0 ! add sign-compensation and cy to hi limb
|
||||
addcc %o2,4,%o2 ! loop counter
|
||||
bne,a Loop0
|
||||
bne,a LOC(loop0)
|
||||
ld [%o1+%o2],%o5
|
||||
|
||||
retl
|
||||
st %g1,[%o4+%o2]
|
||||
|
||||
|
||||
Large: ld [%o1+%o2],%o5
|
||||
LOC(large):
|
||||
ld [%o1+%o2],%o5
|
||||
mov 0,%o0
|
||||
sra %o3,31,%g4 ! g4 = mask of ones iff S2_LIMB < 0
|
||||
b L1
|
||||
b LOC(1)
|
||||
add %o4,-4,%o4
|
||||
Loop:
|
||||
LOC(loop):
|
||||
st %g3,[%o4+%o2]
|
||||
L1: wr %g0,%o5,%y
|
||||
LOC(1): wr %g0,%o5,%y
|
||||
and %o5,%g4,%g2 ! g2 = S1_LIMB iff S2_LIMB < 0, else 0
|
||||
andcc %g0,%g0,%g1
|
||||
mulscc %g1,%o3,%g1
|
||||
@@ -192,8 +190,10 @@ L1: wr %g0,%o5,%y
|
||||
addcc %g3,%o0,%g3
|
||||
addx %g2,%g1,%o0 ! add sign-compensation and cy to hi limb
|
||||
addcc %o2,4,%o2 ! loop counter
|
||||
bne,a Loop
|
||||
bne,a LOC(loop)
|
||||
ld [%o1+%o2],%o5
|
||||
|
||||
retl
|
||||
st %g3,[%o4+%o2]
|
||||
|
||||
END(__mpn_mul_1)
|
||||
|
||||
@@ -37,22 +37,14 @@
|
||||
|
||||
|
||||
|
||||
#include "sysdep.h"
|
||||
#ifdef __linux__
|
||||
#include <asm/traps.h>
|
||||
#else
|
||||
#ifdef __svr4__
|
||||
#include <sysdep.h>
|
||||
#include <sys/trap.h>
|
||||
#else
|
||||
#include <machine/trap.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
ENTRY(.rem)
|
||||
! compute sign of result; if neither is negative, no problem
|
||||
orcc %o1, %o0, %g0 ! either negative?
|
||||
bge 2f ! no, go do the divide
|
||||
mov %o0, %g6 ! sign of remainder matches %o0
|
||||
mov %o0, %g3 ! sign of remainder matches %o0
|
||||
tst %o1
|
||||
bge 1f
|
||||
tst %o0
|
||||
@@ -76,11 +68,11 @@ ENTRY(.rem)
|
||||
|
||||
1:
|
||||
cmp %o3, %o5 ! if %o1 exceeds %o0, done
|
||||
blu Lgot_result ! (and algorithm fails otherwise)
|
||||
blu LOC(got_result) ! (and algorithm fails otherwise)
|
||||
clr %o2
|
||||
sethi %hi(1 << (32 - 4 - 1)), %g1
|
||||
cmp %o3, %g1
|
||||
blu Lnot_really_big
|
||||
blu LOC(not_really_big)
|
||||
clr %o4
|
||||
|
||||
! Here the dividend is >= 2**(31-N) or so. We must be careful here,
|
||||
@@ -91,15 +83,15 @@ ENTRY(.rem)
|
||||
1:
|
||||
cmp %o5, %g1
|
||||
bgeu 3f
|
||||
mov 1, %g7
|
||||
mov 1, %g2
|
||||
sll %o5, 4, %o5
|
||||
b 1b
|
||||
add %o4, 1, %o4
|
||||
|
||||
! Now compute %g7.
|
||||
! Now compute %g2.
|
||||
2: addcc %o5, %o5, %o5
|
||||
bcc Lnot_too_big
|
||||
add %g7, 1, %g7
|
||||
bcc LOC(not_too_big)
|
||||
add %g2, 1, %g2
|
||||
|
||||
! We get here if the %o1 overflowed while shifting.
|
||||
! This means that %o3 has the high-order bit set.
|
||||
@@ -107,20 +99,20 @@ ENTRY(.rem)
|
||||
sll %g1, 4, %g1 ! high order bit
|
||||
srl %o5, 1, %o5 ! rest of %o5
|
||||
add %o5, %g1, %o5
|
||||
b Ldo_single_div
|
||||
sub %g7, 1, %g7
|
||||
b LOC(do_single_div)
|
||||
sub %g2, 1, %g2
|
||||
|
||||
Lnot_too_big:
|
||||
LOC(not_too_big):
|
||||
3: cmp %o5, %o3
|
||||
blu 2b
|
||||
nop
|
||||
be Ldo_single_div
|
||||
be LOC(do_single_div)
|
||||
nop
|
||||
/* NB: these are commented out in the V8-Sparc manual as well */
|
||||
/* (I do not understand this) */
|
||||
! %o5 > %o3: went too far: back up 1 step
|
||||
! srl %o5, 1, %o5
|
||||
! dec %g7
|
||||
! dec %g2
|
||||
! do single-bit divide steps
|
||||
!
|
||||
! We have to be careful here. We know that %o3 >= %o5, so we can do the
|
||||
@@ -129,15 +121,15 @@ ENTRY(.rem)
|
||||
! order bit set in the first step, just falling into the regular
|
||||
! division loop will mess up the first time around.
|
||||
! So we unroll slightly...
|
||||
Ldo_single_div:
|
||||
subcc %g7, 1, %g7
|
||||
bl Lend_regular_divide
|
||||
LOC(do_single_div):
|
||||
subcc %g2, 1, %g2
|
||||
bl LOC(end_regular_divide)
|
||||
nop
|
||||
sub %o3, %o5, %o3
|
||||
mov 1, %o2
|
||||
b Lend_single_divloop
|
||||
b LOC(end_single_divloop)
|
||||
nop
|
||||
Lsingle_divloop:
|
||||
LOC(single_divloop):
|
||||
sll %o2, 1, %o2
|
||||
bl 1f
|
||||
srl %o5, 1, %o5
|
||||
@@ -149,221 +141,223 @@ ENTRY(.rem)
|
||||
add %o3, %o5, %o3
|
||||
sub %o2, 1, %o2
|
||||
2:
|
||||
Lend_single_divloop:
|
||||
subcc %g7, 1, %g7
|
||||
bge Lsingle_divloop
|
||||
LOC(end_single_divloop):
|
||||
subcc %g2, 1, %g2
|
||||
bge LOC(single_divloop)
|
||||
tst %o3
|
||||
b,a Lend_regular_divide
|
||||
b,a LOC(end_regular_divide)
|
||||
|
||||
Lnot_really_big:
|
||||
LOC(not_really_big):
|
||||
1:
|
||||
sll %o5, 4, %o5
|
||||
cmp %o5, %o3
|
||||
bleu 1b
|
||||
addcc %o4, 1, %o4
|
||||
be Lgot_result
|
||||
be LOC(got_result)
|
||||
sub %o4, 1, %o4
|
||||
|
||||
tst %o3 ! set up for initial iteration
|
||||
Ldivloop:
|
||||
LOC(divloop):
|
||||
sll %o2, 4, %o2
|
||||
! depth 1, accumulated bits 0
|
||||
bl L.1.16
|
||||
bl LOC(1.16)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
! depth 2, accumulated bits 1
|
||||
bl L.2.17
|
||||
bl LOC(2.17)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
! depth 3, accumulated bits 3
|
||||
bl L.3.19
|
||||
bl LOC(3.19)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
! depth 4, accumulated bits 7
|
||||
bl L.4.23
|
||||
bl LOC(4.23)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (7*2+1), %o2
|
||||
|
||||
L.4.23:
|
||||
|
||||
LOC(4.23):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (7*2-1), %o2
|
||||
|
||||
|
||||
L.3.19:
|
||||
|
||||
|
||||
LOC(3.19):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
! depth 4, accumulated bits 5
|
||||
bl L.4.21
|
||||
bl LOC(4.21)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (5*2+1), %o2
|
||||
|
||||
L.4.21:
|
||||
|
||||
LOC(4.21):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (5*2-1), %o2
|
||||
|
||||
|
||||
|
||||
L.2.17:
|
||||
|
||||
|
||||
|
||||
LOC(2.17):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
! depth 3, accumulated bits 1
|
||||
bl L.3.17
|
||||
bl LOC(3.17)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
! depth 4, accumulated bits 3
|
||||
bl L.4.19
|
||||
bl LOC(4.19)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (3*2+1), %o2
|
||||
|
||||
L.4.19:
|
||||
|
||||
LOC(4.19):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (3*2-1), %o2
|
||||
|
||||
|
||||
L.3.17:
|
||||
|
||||
|
||||
LOC(3.17):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
! depth 4, accumulated bits 1
|
||||
bl L.4.17
|
||||
bl LOC(4.17)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (1*2+1), %o2
|
||||
|
||||
L.4.17:
|
||||
|
||||
LOC(4.17):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (1*2-1), %o2
|
||||
|
||||
|
||||
|
||||
|
||||
L.1.16:
|
||||
|
||||
|
||||
|
||||
|
||||
LOC(1.16):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
! depth 2, accumulated bits -1
|
||||
bl L.2.15
|
||||
bl LOC(2.15)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
! depth 3, accumulated bits -1
|
||||
bl L.3.15
|
||||
bl LOC(3.15)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
! depth 4, accumulated bits -1
|
||||
bl L.4.15
|
||||
bl LOC(4.15)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (-1*2+1), %o2
|
||||
|
||||
L.4.15:
|
||||
|
||||
LOC(4.15):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (-1*2-1), %o2
|
||||
|
||||
|
||||
L.3.15:
|
||||
|
||||
|
||||
LOC(3.15):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
! depth 4, accumulated bits -3
|
||||
bl L.4.13
|
||||
bl LOC(4.13)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (-3*2+1), %o2
|
||||
|
||||
L.4.13:
|
||||
|
||||
LOC(4.13):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (-3*2-1), %o2
|
||||
|
||||
|
||||
|
||||
L.2.15:
|
||||
|
||||
|
||||
|
||||
LOC(2.15):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
! depth 3, accumulated bits -3
|
||||
bl L.3.13
|
||||
bl LOC(3.13)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
! depth 4, accumulated bits -5
|
||||
bl L.4.11
|
||||
bl LOC(4.11)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (-5*2+1), %o2
|
||||
|
||||
L.4.11:
|
||||
|
||||
LOC(4.11):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (-5*2-1), %o2
|
||||
|
||||
|
||||
L.3.13:
|
||||
|
||||
|
||||
LOC(3.13):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
! depth 4, accumulated bits -7
|
||||
bl L.4.9
|
||||
bl LOC(4.9)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (-7*2+1), %o2
|
||||
|
||||
L.4.9:
|
||||
|
||||
LOC(4.9):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (-7*2-1), %o2
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
9:
|
||||
Lend_regular_divide:
|
||||
LOC(end_regular_divide):
|
||||
subcc %o4, 1, %o4
|
||||
bge Ldivloop
|
||||
bge LOC(divloop)
|
||||
tst %o3
|
||||
bl,a Lgot_result
|
||||
bl,a LOC(got_result)
|
||||
! non-restoring fixup here (one instruction only!)
|
||||
add %o3, %o1, %o3
|
||||
|
||||
|
||||
Lgot_result:
|
||||
LOC(got_result):
|
||||
! check to see if answer should be < 0
|
||||
tst %g6
|
||||
tst %g3
|
||||
bl,a 1f
|
||||
sub %g0, %o3, %o3
|
||||
1:
|
||||
retl
|
||||
mov %o3, %o0
|
||||
|
||||
END(.rem)
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
! sparc __mpn_rshift --
|
||||
|
||||
! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
|
||||
|
||||
!
|
||||
! Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
|
||||
!
|
||||
! This file is part of the GNU MP Library.
|
||||
|
||||
!
|
||||
! The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
! it under the terms of the GNU Library General Public License as published by
|
||||
! the Free Software Foundation; either version 2 of the License, or (at your
|
||||
! option) any later version.
|
||||
|
||||
!
|
||||
! The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
! License for more details.
|
||||
|
||||
!
|
||||
! You should have received a copy of the GNU Library General Public License
|
||||
! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
|
||||
@@ -21,28 +21,26 @@
|
||||
|
||||
|
||||
! INPUT PARAMETERS
|
||||
! res_ptr %o0
|
||||
! src_ptr %o1
|
||||
! size %o2
|
||||
! cnt %o3
|
||||
! RES_PTR %o0
|
||||
! SRC_PTR %o1
|
||||
! SIZE %o2
|
||||
! CNT %o3
|
||||
|
||||
#include "sysdep.h"
|
||||
#include <sysdep.h>
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.global C_SYMBOL_NAME(__mpn_rshift)
|
||||
C_SYMBOL_NAME(__mpn_rshift):
|
||||
ENTRY(__mpn_rshift)
|
||||
ld [%o1],%g2 ! load first limb
|
||||
sub %g0,%o3,%o5 ! negate shift count
|
||||
add %o2,-1,%o2
|
||||
andcc %o2,4-1,%g4 ! number of limbs in first loop
|
||||
sll %g2,%o5,%g1 ! compute function result
|
||||
be L0 ! if multiple of 4 limbs, skip first loop
|
||||
be LOC(0) ! if multiple of 4 limbs, skip first loop
|
||||
st %g1,[%sp+80]
|
||||
|
||||
sub %o2,%g4,%o2 ! adjust count for main loop
|
||||
|
||||
Loop0: ld [%o1+4],%g3
|
||||
LOC(loop0):
|
||||
ld [%o1+4],%g3
|
||||
add %o0,4,%o0
|
||||
add %o1,4,%o1
|
||||
addcc %g4,-1,%g4
|
||||
@@ -50,14 +48,15 @@ Loop0: ld [%o1+4],%g3
|
||||
sll %g3,%o5,%g1
|
||||
mov %g3,%g2
|
||||
or %o4,%g1,%o4
|
||||
bne Loop0
|
||||
bne LOC(loop0)
|
||||
st %o4,[%o0-4]
|
||||
|
||||
L0: tst %o2
|
||||
be Lend
|
||||
LOC(0): tst %o2
|
||||
be LOC(end)
|
||||
nop
|
||||
|
||||
Loop: ld [%o1+4],%g3
|
||||
LOC(loop):
|
||||
ld [%o1+4],%g3
|
||||
add %o0,16,%o0
|
||||
addcc %o2,-4,%o2
|
||||
srl %g2,%o3,%o4
|
||||
@@ -83,10 +82,13 @@ Loop: ld [%o1+4],%g3
|
||||
|
||||
add %o1,16,%o1
|
||||
or %g4,%g1,%g4
|
||||
bne Loop
|
||||
bne LOC(loop)
|
||||
st %g4,[%o0-4]
|
||||
|
||||
Lend: srl %g2,%o3,%g2
|
||||
LOC(end):
|
||||
srl %g2,%o3,%g2
|
||||
st %g2,[%o0-0]
|
||||
retl
|
||||
ld [%sp+80],%o0
|
||||
|
||||
END(__mpn_rshift)
|
||||
|
||||
@@ -37,22 +37,14 @@
|
||||
|
||||
|
||||
|
||||
#include "sysdep.h"
|
||||
#ifdef __linux__
|
||||
#include <asm/traps.h>
|
||||
#else
|
||||
#ifdef __svr4__
|
||||
#include <sysdep.h>
|
||||
#include <sys/trap.h>
|
||||
#else
|
||||
#include <machine/trap.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
ENTRY(.div)
|
||||
! compute sign of result; if neither is negative, no problem
|
||||
orcc %o1, %o0, %g0 ! either negative?
|
||||
bge 2f ! no, go do the divide
|
||||
xor %o1, %o0, %g6 ! compute sign in any case
|
||||
xor %o1, %o0, %g3 ! compute sign in any case
|
||||
tst %o1
|
||||
bge 1f
|
||||
tst %o0
|
||||
@@ -76,11 +68,11 @@ ENTRY(.div)
|
||||
|
||||
1:
|
||||
cmp %o3, %o5 ! if %o1 exceeds %o0, done
|
||||
blu Lgot_result ! (and algorithm fails otherwise)
|
||||
blu LOC(got_result) ! (and algorithm fails otherwise)
|
||||
clr %o2
|
||||
sethi %hi(1 << (32 - 4 - 1)), %g1
|
||||
cmp %o3, %g1
|
||||
blu Lnot_really_big
|
||||
blu LOC(not_really_big)
|
||||
clr %o4
|
||||
|
||||
! Here the dividend is >= 2**(31-N) or so. We must be careful here,
|
||||
@@ -91,15 +83,15 @@ ENTRY(.div)
|
||||
1:
|
||||
cmp %o5, %g1
|
||||
bgeu 3f
|
||||
mov 1, %g7
|
||||
mov 1, %g2
|
||||
sll %o5, 4, %o5
|
||||
b 1b
|
||||
add %o4, 1, %o4
|
||||
|
||||
! Now compute %g7.
|
||||
! Now compute %g2.
|
||||
2: addcc %o5, %o5, %o5
|
||||
bcc Lnot_too_big
|
||||
add %g7, 1, %g7
|
||||
bcc LOC(not_too_big)
|
||||
add %g2, 1, %g2
|
||||
|
||||
! We get here if the %o1 overflowed while shifting.
|
||||
! This means that %o3 has the high-order bit set.
|
||||
@@ -107,20 +99,20 @@ ENTRY(.div)
|
||||
sll %g1, 4, %g1 ! high order bit
|
||||
srl %o5, 1, %o5 ! rest of %o5
|
||||
add %o5, %g1, %o5
|
||||
b Ldo_single_div
|
||||
sub %g7, 1, %g7
|
||||
b LOC(do_single_div)
|
||||
sub %g2, 1, %g2
|
||||
|
||||
Lnot_too_big:
|
||||
LOC(not_too_big):
|
||||
3: cmp %o5, %o3
|
||||
blu 2b
|
||||
nop
|
||||
be Ldo_single_div
|
||||
be LOC(do_single_div)
|
||||
nop
|
||||
/* NB: these are commented out in the V8-Sparc manual as well */
|
||||
/* (I do not understand this) */
|
||||
! %o5 > %o3: went too far: back up 1 step
|
||||
! srl %o5, 1, %o5
|
||||
! dec %g7
|
||||
! dec %g2
|
||||
! do single-bit divide steps
|
||||
!
|
||||
! We have to be careful here. We know that %o3 >= %o5, so we can do the
|
||||
@@ -129,15 +121,15 @@ ENTRY(.div)
|
||||
! order bit set in the first step, just falling into the regular
|
||||
! division loop will mess up the first time around.
|
||||
! So we unroll slightly...
|
||||
Ldo_single_div:
|
||||
subcc %g7, 1, %g7
|
||||
bl Lend_regular_divide
|
||||
LOC(do_single_div):
|
||||
subcc %g2, 1, %g2
|
||||
bl LOC(end_regular_divide)
|
||||
nop
|
||||
sub %o3, %o5, %o3
|
||||
mov 1, %o2
|
||||
b Lend_single_divloop
|
||||
b LOC(end_single_divloop)
|
||||
nop
|
||||
Lsingle_divloop:
|
||||
LOC(single_divloop):
|
||||
sll %o2, 1, %o2
|
||||
bl 1f
|
||||
srl %o5, 1, %o5
|
||||
@@ -149,221 +141,223 @@ ENTRY(.div)
|
||||
add %o3, %o5, %o3
|
||||
sub %o2, 1, %o2
|
||||
2:
|
||||
Lend_single_divloop:
|
||||
subcc %g7, 1, %g7
|
||||
bge Lsingle_divloop
|
||||
LOC(end_single_divloop):
|
||||
subcc %g2, 1, %g2
|
||||
bge LOC(single_divloop)
|
||||
tst %o3
|
||||
b,a Lend_regular_divide
|
||||
b,a LOC(end_regular_divide)
|
||||
|
||||
Lnot_really_big:
|
||||
LOC(not_really_big):
|
||||
1:
|
||||
sll %o5, 4, %o5
|
||||
cmp %o5, %o3
|
||||
bleu 1b
|
||||
addcc %o4, 1, %o4
|
||||
be Lgot_result
|
||||
be LOC(got_result)
|
||||
sub %o4, 1, %o4
|
||||
|
||||
tst %o3 ! set up for initial iteration
|
||||
Ldivloop:
|
||||
LOC(divloop):
|
||||
sll %o2, 4, %o2
|
||||
! depth 1, accumulated bits 0
|
||||
bl L.1.16
|
||||
bl LOC(1.16)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
! depth 2, accumulated bits 1
|
||||
bl L.2.17
|
||||
bl LOC(2.17)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
! depth 3, accumulated bits 3
|
||||
bl L.3.19
|
||||
bl LOC(3.19)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
! depth 4, accumulated bits 7
|
||||
bl L.4.23
|
||||
bl LOC(4.23)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (7*2+1), %o2
|
||||
|
||||
L.4.23:
|
||||
|
||||
LOC(4.23):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (7*2-1), %o2
|
||||
|
||||
|
||||
L.3.19:
|
||||
|
||||
|
||||
LOC(3.19):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
! depth 4, accumulated bits 5
|
||||
bl L.4.21
|
||||
bl LOC(4.21)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (5*2+1), %o2
|
||||
|
||||
L.4.21:
|
||||
|
||||
LOC(4.21):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (5*2-1), %o2
|
||||
|
||||
|
||||
|
||||
L.2.17:
|
||||
|
||||
|
||||
|
||||
LOC(2.17):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
! depth 3, accumulated bits 1
|
||||
bl L.3.17
|
||||
bl LOC(3.17)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
! depth 4, accumulated bits 3
|
||||
bl L.4.19
|
||||
bl LOC(4.19)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (3*2+1), %o2
|
||||
|
||||
L.4.19:
|
||||
|
||||
LOC(4.19):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (3*2-1), %o2
|
||||
|
||||
|
||||
L.3.17:
|
||||
|
||||
|
||||
LOC(3.17):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
! depth 4, accumulated bits 1
|
||||
bl L.4.17
|
||||
bl LOC(4.17)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (1*2+1), %o2
|
||||
|
||||
L.4.17:
|
||||
|
||||
LOC(4.17):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (1*2-1), %o2
|
||||
|
||||
|
||||
|
||||
|
||||
L.1.16:
|
||||
|
||||
|
||||
|
||||
|
||||
LOC(1.16):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
! depth 2, accumulated bits -1
|
||||
bl L.2.15
|
||||
bl LOC(2.15)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
! depth 3, accumulated bits -1
|
||||
bl L.3.15
|
||||
bl LOC(3.15)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
! depth 4, accumulated bits -1
|
||||
bl L.4.15
|
||||
bl LOC(4.15)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (-1*2+1), %o2
|
||||
|
||||
L.4.15:
|
||||
|
||||
LOC(4.15):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (-1*2-1), %o2
|
||||
|
||||
|
||||
L.3.15:
|
||||
|
||||
|
||||
LOC(3.15):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
! depth 4, accumulated bits -3
|
||||
bl L.4.13
|
||||
bl LOC(4.13)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (-3*2+1), %o2
|
||||
|
||||
L.4.13:
|
||||
|
||||
LOC(4.13):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (-3*2-1), %o2
|
||||
|
||||
|
||||
|
||||
L.2.15:
|
||||
|
||||
|
||||
|
||||
LOC(2.15):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
! depth 3, accumulated bits -3
|
||||
bl L.3.13
|
||||
bl LOC(3.13)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
! depth 4, accumulated bits -5
|
||||
bl L.4.11
|
||||
bl LOC(4.11)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (-5*2+1), %o2
|
||||
|
||||
L.4.11:
|
||||
|
||||
LOC(4.11):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (-5*2-1), %o2
|
||||
|
||||
|
||||
L.3.13:
|
||||
|
||||
|
||||
LOC(3.13):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
! depth 4, accumulated bits -7
|
||||
bl L.4.9
|
||||
bl LOC(4.9)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (-7*2+1), %o2
|
||||
|
||||
L.4.9:
|
||||
|
||||
LOC(4.9):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (-7*2-1), %o2
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
9:
|
||||
Lend_regular_divide:
|
||||
LOC(end_regular_divide):
|
||||
subcc %o4, 1, %o4
|
||||
bge Ldivloop
|
||||
bge LOC(divloop)
|
||||
tst %o3
|
||||
bl,a Lgot_result
|
||||
bl,a LOC(got_result)
|
||||
! non-restoring fixup here (one instruction only!)
|
||||
sub %o2, 1, %o2
|
||||
|
||||
|
||||
Lgot_result:
|
||||
LOC(got_result):
|
||||
! check to see if answer should be < 0
|
||||
tst %g6
|
||||
tst %g3
|
||||
bl,a 1f
|
||||
sub %g0, %o2, %o2
|
||||
1:
|
||||
retl
|
||||
mov %o2, %o0
|
||||
|
||||
END(.div)
|
||||
|
||||
@@ -22,33 +22,29 @@
|
||||
#define _ASM 1
|
||||
#include <bits/setjmp.h>
|
||||
|
||||
ENTRY(_setjmp)
|
||||
b 1f
|
||||
set 0, %o1
|
||||
END(_setjmp)
|
||||
|
||||
ENTRY(setjmp)
|
||||
set 1, %o1
|
||||
END(setjmp)
|
||||
|
||||
ENTRY (__sigsetjmp)
|
||||
/* Save our SP and FP; in the delay slot of the jump, save our
|
||||
return PC. Save the signal mask if requested with a tail-call
|
||||
for simplicity; it always returns zero. */
|
||||
ta ST_FLUSH_WINDOWS
|
||||
#ifdef PIC
|
||||
mov %o7,%g1
|
||||
2:
|
||||
call 1f
|
||||
nop
|
||||
1:
|
||||
sethi %hi(_GLOBAL_OFFSET_TABLE_-(2b-.)),%g2
|
||||
or %g2,%lo(_GLOBAL_OFFSET_TABLE_-(2b-.)),%g2
|
||||
add %g2,%o7,%g2
|
||||
sethi %hi(C_SYMBOL_NAME (__sigjmp_save)), %g3
|
||||
or %g3,%lo(C_SYMBOL_NAME (__sigjmp_save)), %g3
|
||||
st %sp, [%o0 + (JB_SP * 4)]
|
||||
st %fp, [%o0 + (JB_FP * 4)]
|
||||
mov %g1,%o7
|
||||
ld [%g2+%g3],%g1
|
||||
jmp %g1
|
||||
st %o7, [%o0+(JB_PC*4)]
|
||||
#else
|
||||
sethi %hi(C_SYMBOL_NAME (__sigjmp_save)), %g1
|
||||
st %sp, [%o0 + (JB_SP*4)]
|
||||
or %lo(C_SYMBOL_NAME (__sigjmp_save)), %g1, %g1
|
||||
st %fp, [%o0 + (JB_FP*4)]
|
||||
jmp %g1
|
||||
st %o7, [%o0 + (JB_PC*4)]
|
||||
#endif /* PIC */
|
||||
/* Save our PC, SP and FP. Save the signal mask if requested with
|
||||
a tail-call for simplicity; it always returns zero. */
|
||||
ta ST_FLUSH_WINDOWS
|
||||
|
||||
st %o7, [%o0 + (JB_PC * 4)]
|
||||
st %sp, [%o0 + (JB_SP * 4)]
|
||||
st %fp, [%o0 + (JB_FP * 4)]
|
||||
|
||||
mov %o7, %g1
|
||||
call __sigjmp_save
|
||||
mov %g1, %o7
|
||||
END(__sigsetjmp)
|
||||
|
||||
weak_extern(_setjmp)
|
||||
weak_extern(setjmp)
|
||||
|
||||
@@ -27,54 +27,52 @@
|
||||
! size o2
|
||||
! s2_limb o3
|
||||
|
||||
#include "sysdep.h"
|
||||
#include <sysdep.h>
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.global C_SYMBOL_NAME(__mpn_addmul_1)
|
||||
C_SYMBOL_NAME(__mpn_addmul_1):
|
||||
orcc %g0,%g0,%g2
|
||||
ENTRY(__mpn_addmul_1)
|
||||
ld [%o1+0],%o4 ! 1
|
||||
|
||||
sll %o2,4,%g1
|
||||
and %g1,(4-1)<<4,%g1
|
||||
#if PIC
|
||||
orcc %g0,%g0,%g2
|
||||
mov %o7,%g4 ! Save return address register
|
||||
call 1f
|
||||
add %o7,LL-1f,%g3
|
||||
1: mov %g4,%o7 ! Restore return address register
|
||||
#else
|
||||
sethi %hi(LL),%g3
|
||||
or %g3,%lo(LL),%g3
|
||||
#endif
|
||||
jmp %g3+%g1
|
||||
and %g1,(4-1)<<4,%g1
|
||||
1: call 2f
|
||||
add %o7,3f-1b,%g3
|
||||
2: jmp %g3+%g1
|
||||
mov %g4,%o7 ! Restore return address register
|
||||
|
||||
.align 4
|
||||
3:
|
||||
LOC(00):
|
||||
add %o0,-4,%o0
|
||||
b LOC(loop00) /* 4, 8, 12, ... */
|
||||
add %o1,-4,%o1
|
||||
nop
|
||||
LL:
|
||||
LL00: add %o0,-4,%o0
|
||||
b Loop00 /* 4, 8, 12, ... */
|
||||
add %o1,-4,%o1
|
||||
nop
|
||||
LL01: b Loop01 /* 1, 5, 9, ... */
|
||||
LOC(01):
|
||||
b LOC(loop01) /* 1, 5, 9, ... */
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
LOC(10):
|
||||
add %o0,-12,%o0 /* 2, 6, 10, ... */
|
||||
b LOC(loop10)
|
||||
add %o1,4,%o1
|
||||
nop
|
||||
LL10: add %o0,-12,%o0 /* 2, 6, 10, ... */
|
||||
b Loop10
|
||||
add %o1,4,%o1
|
||||
nop
|
||||
LL11: add %o0,-8,%o0 /* 3, 7, 11, ... */
|
||||
b Loop11
|
||||
add %o1,-8,%o1
|
||||
LOC(11):
|
||||
add %o0,-8,%o0 /* 3, 7, 11, ... */
|
||||
b LOC(loop11)
|
||||
add %o1,-8,%o1
|
||||
nop
|
||||
|
||||
1: addcc %g3,%g2,%g3 ! 1
|
||||
LOC(loop):
|
||||
addcc %g3,%g2,%g3 ! 1
|
||||
ld [%o1+4],%o4 ! 2
|
||||
rd %y,%g2 ! 1
|
||||
addx %g0,%g2,%g2
|
||||
ld [%o0+0],%g1 ! 2
|
||||
addcc %g1,%g3,%g3
|
||||
st %g3,[%o0+0] ! 1
|
||||
Loop00: umul %o4,%o3,%g3 ! 2
|
||||
LOC(loop00):
|
||||
umul %o4,%o3,%g3 ! 2
|
||||
ld [%o0+4],%g1 ! 2
|
||||
addxcc %g3,%g2,%g3 ! 2
|
||||
ld [%o1+8],%o4 ! 3
|
||||
@@ -83,7 +81,8 @@ Loop00: umul %o4,%o3,%g3 ! 2
|
||||
nop
|
||||
addcc %g1,%g3,%g3
|
||||
st %g3,[%o0+4] ! 2
|
||||
Loop11: umul %o4,%o3,%g3 ! 3
|
||||
LOC(loop11):
|
||||
umul %o4,%o3,%g3 ! 3
|
||||
addxcc %g3,%g2,%g3 ! 3
|
||||
ld [%o1+12],%o4 ! 4
|
||||
rd %y,%g2 ! 3
|
||||
@@ -92,7 +91,8 @@ Loop11: umul %o4,%o3,%g3 ! 3
|
||||
ld [%o0+8],%g1 ! 2
|
||||
addcc %g1,%g3,%g3
|
||||
st %g3,[%o0+8] ! 3
|
||||
Loop10: umul %o4,%o3,%g3 ! 4
|
||||
LOC(loop10):
|
||||
umul %o4,%o3,%g3 ! 4
|
||||
addxcc %g3,%g2,%g3 ! 4
|
||||
ld [%o1+0],%o4 ! 1
|
||||
rd %y,%g2 ! 4
|
||||
@@ -102,9 +102,10 @@ Loop10: umul %o4,%o3,%g3 ! 4
|
||||
st %g3,[%o0+12] ! 4
|
||||
add %o0,16,%o0
|
||||
addx %g0,%g2,%g2
|
||||
Loop01: addcc %o2,-4,%o2
|
||||
bg 1b
|
||||
umul %o4,%o3,%g3 ! 1
|
||||
LOC(loop01):
|
||||
addcc %o2,-4,%o2
|
||||
bg LOC(loop)
|
||||
umul %o4,%o3,%g3 ! 1
|
||||
|
||||
addcc %g3,%g2,%g3 ! 4
|
||||
rd %y,%g2 ! 4
|
||||
@@ -112,13 +113,7 @@ Loop01: addcc %o2,-4,%o2
|
||||
ld [%o0+0],%g1 ! 2
|
||||
addcc %g1,%g3,%g3
|
||||
st %g3,[%o0+0] ! 4
|
||||
addx %g0,%g2,%o0
|
||||
|
||||
retl
|
||||
nop
|
||||
|
||||
|
||||
! umul, ld, addxcc, rd, st
|
||||
|
||||
! umul, ld, addxcc, rd, ld, addcc, st, addx
|
||||
addx %g0,%g2,%o0
|
||||
|
||||
END(__mpn_addmul_1)
|
||||
|
||||
13
sysdeps/sparc/sparc32/sparcv8/dotmul.S
Normal file
13
sysdeps/sparc/sparc32/sparcv8/dotmul.S
Normal file
@@ -0,0 +1,13 @@
|
||||
/*
|
||||
* Sparc v8 has multiply.
|
||||
*/
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
ENTRY(.mul)
|
||||
|
||||
smul %o0, %o1, %o0
|
||||
retl
|
||||
rd %y, %o1
|
||||
|
||||
END(.mul)
|
||||
@@ -27,73 +27,77 @@
|
||||
! size o2
|
||||
! s2_limb o3
|
||||
|
||||
#include "sysdep.h"
|
||||
#include <sysdep.h>
|
||||
|
||||
.text
|
||||
.align 8
|
||||
.global C_SYMBOL_NAME(__mpn_mul_1)
|
||||
C_SYMBOL_NAME(__mpn_mul_1):
|
||||
ENTRY(__mpn_mul_1)
|
||||
sll %o2,4,%g1
|
||||
and %g1,(4-1)<<4,%g1
|
||||
#if PIC
|
||||
mov %o7,%g4 ! Save return address register
|
||||
call 1f
|
||||
add %o7,LL-1f,%g3
|
||||
1: mov %g4,%o7 ! Restore return address register
|
||||
#else
|
||||
sethi %hi(LL),%g3
|
||||
or %g3,%lo(LL),%g3
|
||||
#endif
|
||||
and %g1,(4-1)<<4,%g1
|
||||
1: call 2f
|
||||
add %o7,3f-1b,%g3
|
||||
2: mov %g4,%o7 ! Restore return address register
|
||||
jmp %g3+%g1
|
||||
ld [%o1+0],%o4 ! 1
|
||||
LL:
|
||||
LL00: add %o0,-4,%o0
|
||||
add %o1,-4,%o1
|
||||
b Loop00 /* 4, 8, 12, ... */
|
||||
orcc %g0,%g0,%g2
|
||||
LL01: b Loop01 /* 1, 5, 9, ... */
|
||||
orcc %g0,%g0,%g2
|
||||
nop
|
||||
nop
|
||||
LL10: add %o0,-12,%o0 /* 2, 6, 10, ... */
|
||||
add %o1,4,%o1
|
||||
b Loop10
|
||||
orcc %g0,%g0,%g2
|
||||
nop
|
||||
LL11: add %o0,-8,%o0 /* 3, 7, 11, ... */
|
||||
add %o1,-8,%o1
|
||||
b Loop11
|
||||
orcc %g0,%g0,%g2
|
||||
ld [%o1+0],%o4 ! 1
|
||||
|
||||
Loop: addcc %g3,%g2,%g3 ! 1
|
||||
.align 4
|
||||
3:
|
||||
LOC(00):
|
||||
add %o0,-4,%o0
|
||||
add %o1,-4,%o1
|
||||
b LOC(loop00) /* 4, 8, 12, ... */
|
||||
orcc %g0,%g0,%g2
|
||||
LOC(01):
|
||||
b LOC(loop01) /* 1, 5, 9, ... */
|
||||
orcc %g0,%g0,%g2
|
||||
nop
|
||||
nop
|
||||
LOC(10):
|
||||
add %o0,-12,%o0 /* 2, 6, 10, ... */
|
||||
add %o1,4,%o1
|
||||
b LOC(loop10)
|
||||
orcc %g0,%g0,%g2
|
||||
nop
|
||||
LOC(11):
|
||||
add %o0,-8,%o0 /* 3, 7, 11, ... */
|
||||
add %o1,-8,%o1
|
||||
b LOC(loop11)
|
||||
orcc %g0,%g0,%g2
|
||||
|
||||
LOC(loop):
|
||||
addcc %g3,%g2,%g3 ! 1
|
||||
ld [%o1+4],%o4 ! 2
|
||||
st %g3,[%o0+0] ! 1
|
||||
rd %y,%g2 ! 1
|
||||
Loop00: umul %o4,%o3,%g3 ! 2
|
||||
LOC(loop00):
|
||||
umul %o4,%o3,%g3 ! 2
|
||||
addxcc %g3,%g2,%g3 ! 2
|
||||
ld [%o1+8],%o4 ! 3
|
||||
st %g3,[%o0+4] ! 2
|
||||
rd %y,%g2 ! 2
|
||||
Loop11: umul %o4,%o3,%g3 ! 3
|
||||
LOC(loop11):
|
||||
umul %o4,%o3,%g3 ! 3
|
||||
addxcc %g3,%g2,%g3 ! 3
|
||||
ld [%o1+12],%o4 ! 4
|
||||
add %o1,16,%o1
|
||||
st %g3,[%o0+8] ! 3
|
||||
rd %y,%g2 ! 3
|
||||
Loop10: umul %o4,%o3,%g3 ! 4
|
||||
LOC(loop10):
|
||||
umul %o4,%o3,%g3 ! 4
|
||||
addxcc %g3,%g2,%g3 ! 4
|
||||
ld [%o1+0],%o4 ! 1
|
||||
st %g3,[%o0+12] ! 4
|
||||
add %o0,16,%o0
|
||||
rd %y,%g2 ! 4
|
||||
addx %g0,%g2,%g2
|
||||
Loop01: addcc %o2,-4,%o2
|
||||
bg Loop
|
||||
umul %o4,%o3,%g3 ! 1
|
||||
LOC(loop01):
|
||||
addcc %o2,-4,%o2
|
||||
bg LOC(loop)
|
||||
umul %o4,%o3,%g3 ! 1
|
||||
|
||||
addcc %g3,%g2,%g3 ! 4
|
||||
st %g3,[%o0+0] ! 4
|
||||
rd %y,%g2 ! 4
|
||||
|
||||
retl
|
||||
addx %g0,%g2,%o0
|
||||
addx %g0,%g2,%o0
|
||||
|
||||
END(__mpn_mul_1)
|
||||
|
||||
18
sysdeps/sparc/sparc32/sparcv8/rem.S
Normal file
18
sysdeps/sparc/sparc32/sparcv8/rem.S
Normal file
@@ -0,0 +1,18 @@
|
||||
/*
|
||||
* Sparc v8 has divide.
|
||||
*/
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
ENTRY(.rem)
|
||||
|
||||
sra %o0, 31, %o2
|
||||
wr %o2, 0, %y
|
||||
sdivcc %o0, %o1, %o2
|
||||
bvs,a 1f
|
||||
xnor %o2, %g0, %o2
|
||||
1: smul %o2, %o1, %o2
|
||||
retl
|
||||
sub %o0, %o2, %o0
|
||||
|
||||
END(.rem)
|
||||
14
sysdeps/sparc/sparc32/sparcv8/sdiv.S
Normal file
14
sysdeps/sparc/sparc32/sparcv8/sdiv.S
Normal file
@@ -0,0 +1,14 @@
|
||||
/*
|
||||
* Sparc v8 has divide.
|
||||
*/
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
ENTRY(.div)
|
||||
|
||||
sra %o0, 31, %o2
|
||||
wr %o2, 0, %y
|
||||
ret
|
||||
sdiv %o0, %o1, %o0
|
||||
|
||||
END(.div)
|
||||
@@ -27,12 +27,9 @@
|
||||
! size o2
|
||||
! s2_limb o3
|
||||
|
||||
#include "sysdep.h"
|
||||
#include <sysdep.h>
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.global C_SYMBOL_NAME(__mpn_submul_1)
|
||||
C_SYMBOL_NAME(__mpn_submul_1):
|
||||
ENTRY(__mpn_submul_1)
|
||||
sub %g0,%o2,%o2 ! negate ...
|
||||
sll %o2,2,%o2 ! ... and scale size
|
||||
sub %o1,%o2,%o1 ! o1 is offset s1_ptr
|
||||
@@ -40,7 +37,8 @@ C_SYMBOL_NAME(__mpn_submul_1):
|
||||
|
||||
mov 0,%o0 ! clear cy_limb
|
||||
|
||||
Loop: ld [%o1+%o2],%o4
|
||||
LOC(loop):
|
||||
ld [%o1+%o2],%o4
|
||||
ld [%g1+%o2],%g2
|
||||
umul %o4,%o3,%o5
|
||||
rd %y,%g3
|
||||
@@ -51,8 +49,10 @@ Loop: ld [%o1+%o2],%o4
|
||||
st %g2,[%g1+%o2]
|
||||
|
||||
addcc %o2,4,%o2
|
||||
bne Loop
|
||||
bne LOC(loop)
|
||||
nop
|
||||
|
||||
retl
|
||||
nop
|
||||
|
||||
END(__mpn_submul_1)
|
||||
|
||||
13
sysdeps/sparc/sparc32/sparcv8/udiv.S
Normal file
13
sysdeps/sparc/sparc32/sparcv8/udiv.S
Normal file
@@ -0,0 +1,13 @@
|
||||
/*
|
||||
* Sparc v8 has divide.
|
||||
*/
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
ENTRY(.udiv)
|
||||
|
||||
wr %g0, 0, %y
|
||||
retl
|
||||
udiv %o0, %o1, %o0
|
||||
|
||||
END(.udiv)
|
||||
@@ -27,66 +27,75 @@
|
||||
|
||||
#include "sysdep.h"
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.global C_SYMBOL_NAME(__udiv_qrnnd)
|
||||
C_SYMBOL_NAME(__udiv_qrnnd):
|
||||
ENTRY(__udiv_qrnnd)
|
||||
tst %o3
|
||||
bneg Largedivisor
|
||||
bneg LOC(largedivisor)
|
||||
mov 8,%g1
|
||||
|
||||
b Lp1
|
||||
b LOC(p1)
|
||||
addxcc %o2,%o2,%o2
|
||||
|
||||
Lplop: bcc Ln1
|
||||
LOC(plop):
|
||||
bcc LOC(n1)
|
||||
addxcc %o2,%o2,%o2
|
||||
Lp1: addx %o1,%o1,%o1
|
||||
LOC(p1):
|
||||
addx %o1,%o1,%o1
|
||||
subcc %o1,%o3,%o4
|
||||
bcc Ln2
|
||||
bcc LOC(n2)
|
||||
addxcc %o2,%o2,%o2
|
||||
Lp2: addx %o1,%o1,%o1
|
||||
LOC(p2):
|
||||
addx %o1,%o1,%o1
|
||||
subcc %o1,%o3,%o4
|
||||
bcc Ln3
|
||||
bcc LOC(n3)
|
||||
addxcc %o2,%o2,%o2
|
||||
Lp3: addx %o1,%o1,%o1
|
||||
LOC(p3):
|
||||
addx %o1,%o1,%o1
|
||||
subcc %o1,%o3,%o4
|
||||
bcc Ln4
|
||||
bcc LOC(n4)
|
||||
addxcc %o2,%o2,%o2
|
||||
Lp4: addx %o1,%o1,%o1
|
||||
LOC(p4):
|
||||
addx %o1,%o1,%o1
|
||||
addcc %g1,-1,%g1
|
||||
bne Lplop
|
||||
bne LOC(plop)
|
||||
subcc %o1,%o3,%o4
|
||||
bcc Ln5
|
||||
bcc LOC(n5)
|
||||
addxcc %o2,%o2,%o2
|
||||
Lp5: st %o1,[%o0]
|
||||
LOC(p5):
|
||||
st %o1,[%o0]
|
||||
retl
|
||||
xnor %g0,%o2,%o0
|
||||
|
||||
Lnlop: bcc Lp1
|
||||
LOC(nlop):
|
||||
bcc LOC(p1)
|
||||
addxcc %o2,%o2,%o2
|
||||
Ln1: addx %o4,%o4,%o4
|
||||
LOC(n1):
|
||||
addx %o4,%o4,%o4
|
||||
subcc %o4,%o3,%o1
|
||||
bcc Lp2
|
||||
bcc LOC(p2)
|
||||
addxcc %o2,%o2,%o2
|
||||
Ln2: addx %o4,%o4,%o4
|
||||
LOC(n2):
|
||||
addx %o4,%o4,%o4
|
||||
subcc %o4,%o3,%o1
|
||||
bcc Lp3
|
||||
bcc LOC(p3)
|
||||
addxcc %o2,%o2,%o2
|
||||
Ln3: addx %o4,%o4,%o4
|
||||
LOC(n3):
|
||||
addx %o4,%o4,%o4
|
||||
subcc %o4,%o3,%o1
|
||||
bcc Lp4
|
||||
bcc LOC(p4)
|
||||
addxcc %o2,%o2,%o2
|
||||
Ln4: addx %o4,%o4,%o4
|
||||
LOC(n4):
|
||||
addx %o4,%o4,%o4
|
||||
addcc %g1,-1,%g1
|
||||
bne Lnlop
|
||||
bne LOC(nlop)
|
||||
subcc %o4,%o3,%o1
|
||||
bcc Lp5
|
||||
bcc LOC(p5)
|
||||
addxcc %o2,%o2,%o2
|
||||
Ln5: st %o4,[%o0]
|
||||
LOC(n5):
|
||||
st %o4,[%o0]
|
||||
retl
|
||||
xnor %g0,%o2,%o0
|
||||
|
||||
Largedivisor:
|
||||
LOC(largedivisor):
|
||||
and %o2,1,%o5 ! %o5 = n0 & 1
|
||||
|
||||
srl %o2,1,%o2
|
||||
@@ -98,89 +107,109 @@ Largedivisor:
|
||||
srl %o3,1,%g3 ! %g3 = floor(d / 2)
|
||||
add %g3,%g2,%g3 ! %g3 = ceil(d / 2)
|
||||
|
||||
b LLp1
|
||||
b LOC(Lp1)
|
||||
addxcc %o2,%o2,%o2
|
||||
|
||||
LLplop: bcc LLn1
|
||||
LOC(Lplop):
|
||||
bcc LOC(Ln1)
|
||||
addxcc %o2,%o2,%o2
|
||||
LLp1: addx %o1,%o1,%o1
|
||||
LOC(Lp1):
|
||||
addx %o1,%o1,%o1
|
||||
subcc %o1,%g3,%o4
|
||||
bcc LLn2
|
||||
bcc LOC(Ln2)
|
||||
addxcc %o2,%o2,%o2
|
||||
LLp2: addx %o1,%o1,%o1
|
||||
LOC(Lp2):
|
||||
addx %o1,%o1,%o1
|
||||
subcc %o1,%g3,%o4
|
||||
bcc LLn3
|
||||
bcc LOC(Ln3)
|
||||
addxcc %o2,%o2,%o2
|
||||
LLp3: addx %o1,%o1,%o1
|
||||
LOC(Lp3):
|
||||
addx %o1,%o1,%o1
|
||||
subcc %o1,%g3,%o4
|
||||
bcc LLn4
|
||||
bcc LOC(Ln4)
|
||||
addxcc %o2,%o2,%o2
|
||||
LLp4: addx %o1,%o1,%o1
|
||||
LOC(Lp4):
|
||||
addx %o1,%o1,%o1
|
||||
addcc %g1,-1,%g1
|
||||
bne LLplop
|
||||
bne LOC(Lplop)
|
||||
subcc %o1,%g3,%o4
|
||||
bcc LLn5
|
||||
bcc LOC(Ln5)
|
||||
addxcc %o2,%o2,%o2
|
||||
LLp5: add %o1,%o1,%o1 ! << 1
|
||||
LOC(Lp5):
|
||||
add %o1,%o1,%o1 ! << 1
|
||||
tst %g2
|
||||
bne Oddp
|
||||
bne LOC(Oddp)
|
||||
add %o5,%o1,%o1
|
||||
st %o1,[%o0]
|
||||
retl
|
||||
xnor %g0,%o2,%o0
|
||||
|
||||
LLnlop: bcc LLp1
|
||||
LOC(Lnlop):
|
||||
bcc LOC(Lp1)
|
||||
addxcc %o2,%o2,%o2
|
||||
LLn1: addx %o4,%o4,%o4
|
||||
LOC(Ln1):
|
||||
addx %o4,%o4,%o4
|
||||
subcc %o4,%g3,%o1
|
||||
bcc LLp2
|
||||
bcc LOC(Lp2)
|
||||
addxcc %o2,%o2,%o2
|
||||
LLn2: addx %o4,%o4,%o4
|
||||
LOC(Ln2):
|
||||
addx %o4,%o4,%o4
|
||||
subcc %o4,%g3,%o1
|
||||
bcc LLp3
|
||||
bcc LOC(Lp3)
|
||||
addxcc %o2,%o2,%o2
|
||||
LLn3: addx %o4,%o4,%o4
|
||||
LOC(Ln3):
|
||||
addx %o4,%o4,%o4
|
||||
subcc %o4,%g3,%o1
|
||||
bcc LLp4
|
||||
bcc LOC(Lp4)
|
||||
addxcc %o2,%o2,%o2
|
||||
LLn4: addx %o4,%o4,%o4
|
||||
LOC(Ln4):
|
||||
addx %o4,%o4,%o4
|
||||
addcc %g1,-1,%g1
|
||||
bne LLnlop
|
||||
bne LOC(Lnlop)
|
||||
subcc %o4,%g3,%o1
|
||||
bcc LLp5
|
||||
bcc LOC(Lp5)
|
||||
addxcc %o2,%o2,%o2
|
||||
LLn5: add %o4,%o4,%o4 ! << 1
|
||||
LOC(Ln5):
|
||||
add %o4,%o4,%o4 ! << 1
|
||||
tst %g2
|
||||
bne Oddn
|
||||
bne LOC(Oddn)
|
||||
add %o5,%o4,%o4
|
||||
st %o4,[%o0]
|
||||
retl
|
||||
xnor %g0,%o2,%o0
|
||||
|
||||
Oddp: xnor %g0,%o2,%o2
|
||||
LOC(Oddp):
|
||||
xnor %g0,%o2,%o2
|
||||
! q' in %o2. r' in %o1
|
||||
addcc %o1,%o2,%o1
|
||||
bcc LLp6
|
||||
bcc LOC(Lp6)
|
||||
addx %o2,0,%o2
|
||||
sub %o1,%o3,%o1
|
||||
LLp6: subcc %o1,%o3,%g0
|
||||
bcs LLp7
|
||||
LOC(Lp6):
|
||||
subcc %o1,%o3,%g0
|
||||
bcs LOC(Lp7)
|
||||
subx %o2,-1,%o2
|
||||
sub %o1,%o3,%o1
|
||||
LLp7: st %o1,[%o0]
|
||||
LOC(Lp7):
|
||||
st %o1,[%o0]
|
||||
retl
|
||||
mov %o2,%o0
|
||||
|
||||
Oddn: xnor %g0,%o2,%o2
|
||||
LOC(Oddn):
|
||||
xnor %g0,%o2,%o2
|
||||
! q' in %o2. r' in %o4
|
||||
addcc %o4,%o2,%o4
|
||||
bcc LLn6
|
||||
bcc LOC(Ln6)
|
||||
addx %o2,0,%o2
|
||||
sub %o4,%o3,%o4
|
||||
LLn6: subcc %o4,%o3,%g0
|
||||
bcs LLn7
|
||||
LOC(Ln6):
|
||||
subcc %o4,%o3,%g0
|
||||
bcs LOC(Ln7)
|
||||
subx %o2,-1,%o2
|
||||
sub %o4,%o3,%o4
|
||||
LLn7: st %o4,[%o0]
|
||||
LOC(Ln7):
|
||||
st %o4,[%o0]
|
||||
retl
|
||||
mov %o2,%o0
|
||||
|
||||
END(__udiv_qrnnd)
|
||||
|
||||
13
sysdeps/sparc/sparc32/sparcv8/umul.S
Normal file
13
sysdeps/sparc/sparc32/sparcv8/umul.S
Normal file
@@ -0,0 +1,13 @@
|
||||
/*
|
||||
* Sparc v8 has multiply.
|
||||
*/
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
ENTRY(.umul)
|
||||
|
||||
umul %o0, %o1, %o0
|
||||
retl
|
||||
rd %y, %o1
|
||||
|
||||
END(.umul)
|
||||
15
sysdeps/sparc/sparc32/sparcv8/urem.S
Normal file
15
sysdeps/sparc/sparc32/sparcv8/urem.S
Normal file
@@ -0,0 +1,15 @@
|
||||
/*
|
||||
* Sparc v8 has divide.
|
||||
*/
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
ENTRY(.urem)
|
||||
|
||||
wr %g0, 0, %y
|
||||
udiv %o0, %o1, %o2
|
||||
umul %o2, %o1, %o2
|
||||
retl
|
||||
sub %o0, %o2, %o0
|
||||
|
||||
END(.urem)
|
||||
@@ -1,20 +1,20 @@
|
||||
! SPARC __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
|
||||
! store difference in a third limb vector.
|
||||
|
||||
!
|
||||
! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
|
||||
|
||||
!
|
||||
! This file is part of the GNU MP Library.
|
||||
|
||||
!
|
||||
! The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
! it under the terms of the GNU Library General Public License as published by
|
||||
! the Free Software Foundation; either version 2 of the License, or (at your
|
||||
! option) any later version.
|
||||
|
||||
!
|
||||
! The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
! License for more details.
|
||||
|
||||
!
|
||||
! You should have received a copy of the GNU Library General Public License
|
||||
! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
|
||||
@@ -22,290 +22,308 @@
|
||||
|
||||
|
||||
! INPUT PARAMETERS
|
||||
#define res_ptr %o0
|
||||
#define s1_ptr %o1
|
||||
#define s2_ptr %o2
|
||||
#define size %o3
|
||||
#define RES_PTR %o0
|
||||
#define S1_PTR %o1
|
||||
#define S2_PTR %o2
|
||||
#define SIZE %o3
|
||||
|
||||
#include "sysdep.h"
|
||||
#include <sysdep.h>
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.global C_SYMBOL_NAME(__mpn_sub_n)
|
||||
C_SYMBOL_NAME(__mpn_sub_n):
|
||||
xor s2_ptr,res_ptr,%g1
|
||||
ENTRY(__mpn_sub_n)
|
||||
xor S2_PTR,RES_PTR,%g1
|
||||
andcc %g1,4,%g0
|
||||
bne L1 ! branch if alignment differs
|
||||
bne LOC(1) ! branch if alignment differs
|
||||
nop
|
||||
! ** V1a **
|
||||
andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0
|
||||
be L_v1 ! if no, branch
|
||||
andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0
|
||||
be LOC(v1) ! if no, branch
|
||||
nop
|
||||
/* Add least significant limb separately to align res_ptr and s2_ptr */
|
||||
ld [s1_ptr],%g4
|
||||
add s1_ptr,4,s1_ptr
|
||||
ld [s2_ptr],%g2
|
||||
add s2_ptr,4,s2_ptr
|
||||
add size,-1,size
|
||||
/* Add least significant limb separately to align RES_PTR and S2_PTR */
|
||||
ld [S1_PTR],%g4
|
||||
add S1_PTR,4,S1_PTR
|
||||
ld [S2_PTR],%g2
|
||||
add S2_PTR,4,S2_PTR
|
||||
add SIZE,-1,SIZE
|
||||
subcc %g4,%g2,%o4
|
||||
st %o4,[res_ptr]
|
||||
add res_ptr,4,res_ptr
|
||||
L_v1: addx %g0,%g0,%o4 ! save cy in register
|
||||
cmp size,2 ! if size < 2 ...
|
||||
bl Lend2 ! ... branch to tail code
|
||||
st %o4,[RES_PTR]
|
||||
add RES_PTR,4,RES_PTR
|
||||
LOC(v1):
|
||||
addx %g0,%g0,%o4 ! save cy in register
|
||||
cmp SIZE,2 ! if SIZE < 2 ...
|
||||
bl LOC(end2) ! ... branch to tail code
|
||||
subcc %g0,%o4,%g0 ! restore cy
|
||||
|
||||
ld [s1_ptr+0],%g4
|
||||
addcc size,-10,size
|
||||
ld [s1_ptr+4],%g1
|
||||
ldd [s2_ptr+0],%g2
|
||||
blt Lfin1
|
||||
ld [S1_PTR+0],%g4
|
||||
addcc SIZE,-10,SIZE
|
||||
ld [S1_PTR+4],%g1
|
||||
ldd [S2_PTR+0],%g2
|
||||
blt LOC(fin1)
|
||||
subcc %g0,%o4,%g0 ! restore cy
|
||||
/* Add blocks of 8 limbs until less than 8 limbs remain */
|
||||
Loop1: subxcc %g4,%g2,%o4
|
||||
ld [s1_ptr+8],%g4
|
||||
subxcc %g1,%g3,%o5
|
||||
ld [s1_ptr+12],%g1
|
||||
ldd [s2_ptr+8],%g2
|
||||
std %o4,[res_ptr+0]
|
||||
LOC(loop1):
|
||||
subxcc %g4,%g2,%o4
|
||||
ld [s1_ptr+16],%g4
|
||||
ld [S1_PTR+8],%g4
|
||||
subxcc %g1,%g3,%o5
|
||||
ld [s1_ptr+20],%g1
|
||||
ldd [s2_ptr+16],%g2
|
||||
std %o4,[res_ptr+8]
|
||||
ld [S1_PTR+12],%g1
|
||||
ldd [S2_PTR+8],%g2
|
||||
std %o4,[RES_PTR+0]
|
||||
subxcc %g4,%g2,%o4
|
||||
ld [s1_ptr+24],%g4
|
||||
ld [S1_PTR+16],%g4
|
||||
subxcc %g1,%g3,%o5
|
||||
ld [s1_ptr+28],%g1
|
||||
ldd [s2_ptr+24],%g2
|
||||
std %o4,[res_ptr+16]
|
||||
ld [S1_PTR+20],%g1
|
||||
ldd [S2_PTR+16],%g2
|
||||
std %o4,[RES_PTR+8]
|
||||
subxcc %g4,%g2,%o4
|
||||
ld [s1_ptr+32],%g4
|
||||
ld [S1_PTR+24],%g4
|
||||
subxcc %g1,%g3,%o5
|
||||
ld [s1_ptr+36],%g1
|
||||
ldd [s2_ptr+32],%g2
|
||||
std %o4,[res_ptr+24]
|
||||
ld [S1_PTR+28],%g1
|
||||
ldd [S2_PTR+24],%g2
|
||||
std %o4,[RES_PTR+16]
|
||||
subxcc %g4,%g2,%o4
|
||||
ld [S1_PTR+32],%g4
|
||||
subxcc %g1,%g3,%o5
|
||||
ld [S1_PTR+36],%g1
|
||||
ldd [S2_PTR+32],%g2
|
||||
std %o4,[RES_PTR+24]
|
||||
addx %g0,%g0,%o4 ! save cy in register
|
||||
addcc size,-8,size
|
||||
add s1_ptr,32,s1_ptr
|
||||
add s2_ptr,32,s2_ptr
|
||||
add res_ptr,32,res_ptr
|
||||
bge Loop1
|
||||
addcc SIZE,-8,SIZE
|
||||
add S1_PTR,32,S1_PTR
|
||||
add S2_PTR,32,S2_PTR
|
||||
add RES_PTR,32,RES_PTR
|
||||
bge LOC(loop1)
|
||||
subcc %g0,%o4,%g0 ! restore cy
|
||||
|
||||
Lfin1: addcc size,8-2,size
|
||||
blt Lend1
|
||||
LOC(fin1):
|
||||
addcc SIZE,8-2,SIZE
|
||||
blt LOC(end1)
|
||||
subcc %g0,%o4,%g0 ! restore cy
|
||||
/* Add blocks of 2 limbs until less than 2 limbs remain */
|
||||
Loope1: subxcc %g4,%g2,%o4
|
||||
ld [s1_ptr+8],%g4
|
||||
LOC(loope1):
|
||||
subxcc %g4,%g2,%o4
|
||||
ld [S1_PTR+8],%g4
|
||||
subxcc %g1,%g3,%o5
|
||||
ld [s1_ptr+12],%g1
|
||||
ldd [s2_ptr+8],%g2
|
||||
std %o4,[res_ptr+0]
|
||||
ld [S1_PTR+12],%g1
|
||||
ldd [S2_PTR+8],%g2
|
||||
std %o4,[RES_PTR+0]
|
||||
addx %g0,%g0,%o4 ! save cy in register
|
||||
addcc size,-2,size
|
||||
add s1_ptr,8,s1_ptr
|
||||
add s2_ptr,8,s2_ptr
|
||||
add res_ptr,8,res_ptr
|
||||
bge Loope1
|
||||
addcc SIZE,-2,SIZE
|
||||
add S1_PTR,8,S1_PTR
|
||||
add S2_PTR,8,S2_PTR
|
||||
add RES_PTR,8,RES_PTR
|
||||
bge LOC(loope1)
|
||||
subcc %g0,%o4,%g0 ! restore cy
|
||||
Lend1: subxcc %g4,%g2,%o4
|
||||
LOC(end1):
|
||||
subxcc %g4,%g2,%o4
|
||||
subxcc %g1,%g3,%o5
|
||||
std %o4,[res_ptr+0]
|
||||
std %o4,[RES_PTR+0]
|
||||
addx %g0,%g0,%o4 ! save cy in register
|
||||
|
||||
andcc size,1,%g0
|
||||
be Lret1
|
||||
andcc SIZE,1,%g0
|
||||
be LOC(ret1)
|
||||
subcc %g0,%o4,%g0 ! restore cy
|
||||
/* Add last limb */
|
||||
ld [s1_ptr+8],%g4
|
||||
ld [s2_ptr+8],%g2
|
||||
ld [S1_PTR+8],%g4
|
||||
ld [S2_PTR+8],%g2
|
||||
subxcc %g4,%g2,%o4
|
||||
st %o4,[res_ptr+8]
|
||||
st %o4,[RES_PTR+8]
|
||||
|
||||
Lret1: retl
|
||||
LOC(ret1):
|
||||
retl
|
||||
addx %g0,%g0,%o0 ! return carry-out from most sign. limb
|
||||
|
||||
L1: xor s1_ptr,res_ptr,%g1
|
||||
LOC(1): xor S1_PTR,RES_PTR,%g1
|
||||
andcc %g1,4,%g0
|
||||
bne L2
|
||||
bne LOC(2)
|
||||
nop
|
||||
! ** V1b **
|
||||
andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0
|
||||
be L_v1b ! if no, branch
|
||||
andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0
|
||||
be LOC(v1b) ! if no, branch
|
||||
nop
|
||||
/* Add least significant limb separately to align res_ptr and s1_ptr */
|
||||
ld [s2_ptr],%g4
|
||||
add s2_ptr,4,s2_ptr
|
||||
ld [s1_ptr],%g2
|
||||
add s1_ptr,4,s1_ptr
|
||||
add size,-1,size
|
||||
/* Add least significant limb separately to align RES_PTR and S1_PTR */
|
||||
ld [S2_PTR],%g4
|
||||
add S2_PTR,4,S2_PTR
|
||||
ld [S1_PTR],%g2
|
||||
add S1_PTR,4,S1_PTR
|
||||
add SIZE,-1,SIZE
|
||||
subcc %g2,%g4,%o4
|
||||
st %o4,[res_ptr]
|
||||
add res_ptr,4,res_ptr
|
||||
L_v1b: addx %g0,%g0,%o4 ! save cy in register
|
||||
cmp size,2 ! if size < 2 ...
|
||||
bl Lend2 ! ... branch to tail code
|
||||
st %o4,[RES_PTR]
|
||||
add RES_PTR,4,RES_PTR
|
||||
LOC(v1b):
|
||||
addx %g0,%g0,%o4 ! save cy in register
|
||||
cmp SIZE,2 ! if SIZE < 2 ...
|
||||
bl LOC(end2) ! ... branch to tail code
|
||||
subcc %g0,%o4,%g0 ! restore cy
|
||||
|
||||
ld [s2_ptr+0],%g4
|
||||
addcc size,-10,size
|
||||
ld [s2_ptr+4],%g1
|
||||
ldd [s1_ptr+0],%g2
|
||||
blt Lfin1b
|
||||
ld [S2_PTR+0],%g4
|
||||
addcc SIZE,-10,SIZE
|
||||
ld [S2_PTR+4],%g1
|
||||
ldd [S1_PTR+0],%g2
|
||||
blt LOC(fin1b)
|
||||
subcc %g0,%o4,%g0 ! restore cy
|
||||
/* Add blocks of 8 limbs until less than 8 limbs remain */
|
||||
Loop1b: subxcc %g2,%g4,%o4
|
||||
ld [s2_ptr+8],%g4
|
||||
subxcc %g3,%g1,%o5
|
||||
ld [s2_ptr+12],%g1
|
||||
ldd [s1_ptr+8],%g2
|
||||
std %o4,[res_ptr+0]
|
||||
LOC(loop1b):
|
||||
subxcc %g2,%g4,%o4
|
||||
ld [s2_ptr+16],%g4
|
||||
ld [S2_PTR+8],%g4
|
||||
subxcc %g3,%g1,%o5
|
||||
ld [s2_ptr+20],%g1
|
||||
ldd [s1_ptr+16],%g2
|
||||
std %o4,[res_ptr+8]
|
||||
ld [S2_PTR+12],%g1
|
||||
ldd [S1_PTR+8],%g2
|
||||
std %o4,[RES_PTR+0]
|
||||
subxcc %g2,%g4,%o4
|
||||
ld [s2_ptr+24],%g4
|
||||
ld [S2_PTR+16],%g4
|
||||
subxcc %g3,%g1,%o5
|
||||
ld [s2_ptr+28],%g1
|
||||
ldd [s1_ptr+24],%g2
|
||||
std %o4,[res_ptr+16]
|
||||
ld [S2_PTR+20],%g1
|
||||
ldd [S1_PTR+16],%g2
|
||||
std %o4,[RES_PTR+8]
|
||||
subxcc %g2,%g4,%o4
|
||||
ld [s2_ptr+32],%g4
|
||||
ld [S2_PTR+24],%g4
|
||||
subxcc %g3,%g1,%o5
|
||||
ld [s2_ptr+36],%g1
|
||||
ldd [s1_ptr+32],%g2
|
||||
std %o4,[res_ptr+24]
|
||||
ld [S2_PTR+28],%g1
|
||||
ldd [S1_PTR+24],%g2
|
||||
std %o4,[RES_PTR+16]
|
||||
subxcc %g2,%g4,%o4
|
||||
ld [S2_PTR+32],%g4
|
||||
subxcc %g3,%g1,%o5
|
||||
ld [S2_PTR+36],%g1
|
||||
ldd [S1_PTR+32],%g2
|
||||
std %o4,[RES_PTR+24]
|
||||
addx %g0,%g0,%o4 ! save cy in register
|
||||
addcc size,-8,size
|
||||
add s1_ptr,32,s1_ptr
|
||||
add s2_ptr,32,s2_ptr
|
||||
add res_ptr,32,res_ptr
|
||||
bge Loop1b
|
||||
addcc SIZE,-8,SIZE
|
||||
add S1_PTR,32,S1_PTR
|
||||
add S2_PTR,32,S2_PTR
|
||||
add RES_PTR,32,RES_PTR
|
||||
bge LOC(loop1b)
|
||||
subcc %g0,%o4,%g0 ! restore cy
|
||||
|
||||
Lfin1b: addcc size,8-2,size
|
||||
blt Lend1b
|
||||
LOC(fin1b):
|
||||
addcc SIZE,8-2,SIZE
|
||||
blt LOC(end1b)
|
||||
subcc %g0,%o4,%g0 ! restore cy
|
||||
/* Add blocks of 2 limbs until less than 2 limbs remain */
|
||||
Loope1b:subxcc %g2,%g4,%o4
|
||||
ld [s2_ptr+8],%g4
|
||||
LOC(loope1b):
|
||||
subxcc %g2,%g4,%o4
|
||||
ld [S2_PTR+8],%g4
|
||||
subxcc %g3,%g1,%o5
|
||||
ld [s2_ptr+12],%g1
|
||||
ldd [s1_ptr+8],%g2
|
||||
std %o4,[res_ptr+0]
|
||||
ld [S2_PTR+12],%g1
|
||||
ldd [S1_PTR+8],%g2
|
||||
std %o4,[RES_PTR+0]
|
||||
addx %g0,%g0,%o4 ! save cy in register
|
||||
addcc size,-2,size
|
||||
add s1_ptr,8,s1_ptr
|
||||
add s2_ptr,8,s2_ptr
|
||||
add res_ptr,8,res_ptr
|
||||
bge Loope1b
|
||||
addcc SIZE,-2,SIZE
|
||||
add S1_PTR,8,S1_PTR
|
||||
add S2_PTR,8,S2_PTR
|
||||
add RES_PTR,8,RES_PTR
|
||||
bge LOC(loope1b)
|
||||
subcc %g0,%o4,%g0 ! restore cy
|
||||
Lend1b: subxcc %g2,%g4,%o4
|
||||
LOC(end1b):
|
||||
subxcc %g2,%g4,%o4
|
||||
subxcc %g3,%g1,%o5
|
||||
std %o4,[res_ptr+0]
|
||||
std %o4,[RES_PTR+0]
|
||||
addx %g0,%g0,%o4 ! save cy in register
|
||||
|
||||
andcc size,1,%g0
|
||||
be Lret1b
|
||||
andcc SIZE,1,%g0
|
||||
be LOC(ret1b)
|
||||
subcc %g0,%o4,%g0 ! restore cy
|
||||
/* Add last limb */
|
||||
ld [s2_ptr+8],%g4
|
||||
ld [s1_ptr+8],%g2
|
||||
ld [S2_PTR+8],%g4
|
||||
ld [S1_PTR+8],%g2
|
||||
subxcc %g2,%g4,%o4
|
||||
st %o4,[res_ptr+8]
|
||||
st %o4,[RES_PTR+8]
|
||||
|
||||
Lret1b: retl
|
||||
LOC(ret1b):
|
||||
retl
|
||||
addx %g0,%g0,%o0 ! return carry-out from most sign. limb
|
||||
|
||||
! ** V2 **
|
||||
/* If we come here, the alignment of s1_ptr and res_ptr as well as the
|
||||
alignment of s2_ptr and res_ptr differ. Since there are only two ways
|
||||
/* If we come here, the alignment of S1_PTR and RES_PTR as well as the
|
||||
alignment of S2_PTR and RES_PTR differ. Since there are only two ways
|
||||
things can be aligned (that we care about) we now know that the alignment
|
||||
of s1_ptr and s2_ptr are the same. */
|
||||
of S1_PTR and S2_PTR are the same. */
|
||||
|
||||
L2: cmp size,1
|
||||
be Ljone
|
||||
LOC(2): cmp SIZE,1
|
||||
be LOC(jone)
|
||||
nop
|
||||
andcc s1_ptr,4,%g0 ! s1_ptr unaligned? Side effect: cy=0
|
||||
be L_v2 ! if no, branch
|
||||
andcc S1_PTR,4,%g0 ! S1_PTR unaligned? Side effect: cy=0
|
||||
be LOC(v2) ! if no, branch
|
||||
nop
|
||||
/* Add least significant limb separately to align s1_ptr and s2_ptr */
|
||||
ld [s1_ptr],%g4
|
||||
add s1_ptr,4,s1_ptr
|
||||
ld [s2_ptr],%g2
|
||||
add s2_ptr,4,s2_ptr
|
||||
add size,-1,size
|
||||
/* Add least significant limb separately to align S1_PTR and S2_PTR */
|
||||
ld [S1_PTR],%g4
|
||||
add S1_PTR,4,S1_PTR
|
||||
ld [S2_PTR],%g2
|
||||
add S2_PTR,4,S2_PTR
|
||||
add SIZE,-1,SIZE
|
||||
subcc %g4,%g2,%o4
|
||||
st %o4,[res_ptr]
|
||||
add res_ptr,4,res_ptr
|
||||
st %o4,[RES_PTR]
|
||||
add RES_PTR,4,RES_PTR
|
||||
|
||||
L_v2: addx %g0,%g0,%o4 ! save cy in register
|
||||
addcc size,-8,size
|
||||
blt Lfin2
|
||||
LOC(v2):
|
||||
addx %g0,%g0,%o4 ! save cy in register
|
||||
addcc SIZE,-8,SIZE
|
||||
blt LOC(fin2)
|
||||
subcc %g0,%o4,%g0 ! restore cy
|
||||
/* Add blocks of 8 limbs until less than 8 limbs remain */
|
||||
Loop2: ldd [s1_ptr+0],%g2
|
||||
ldd [s2_ptr+0],%o4
|
||||
LOC(loop2):
|
||||
ldd [S1_PTR+0],%g2
|
||||
ldd [S2_PTR+0],%o4
|
||||
subxcc %g2,%o4,%g2
|
||||
st %g2,[res_ptr+0]
|
||||
st %g2,[RES_PTR+0]
|
||||
subxcc %g3,%o5,%g3
|
||||
st %g3,[res_ptr+4]
|
||||
ldd [s1_ptr+8],%g2
|
||||
ldd [s2_ptr+8],%o4
|
||||
st %g3,[RES_PTR+4]
|
||||
ldd [S1_PTR+8],%g2
|
||||
ldd [S2_PTR+8],%o4
|
||||
subxcc %g2,%o4,%g2
|
||||
st %g2,[res_ptr+8]
|
||||
st %g2,[RES_PTR+8]
|
||||
subxcc %g3,%o5,%g3
|
||||
st %g3,[res_ptr+12]
|
||||
ldd [s1_ptr+16],%g2
|
||||
ldd [s2_ptr+16],%o4
|
||||
st %g3,[RES_PTR+12]
|
||||
ldd [S1_PTR+16],%g2
|
||||
ldd [S2_PTR+16],%o4
|
||||
subxcc %g2,%o4,%g2
|
||||
st %g2,[res_ptr+16]
|
||||
st %g2,[RES_PTR+16]
|
||||
subxcc %g3,%o5,%g3
|
||||
st %g3,[res_ptr+20]
|
||||
ldd [s1_ptr+24],%g2
|
||||
ldd [s2_ptr+24],%o4
|
||||
st %g3,[RES_PTR+20]
|
||||
ldd [S1_PTR+24],%g2
|
||||
ldd [S2_PTR+24],%o4
|
||||
subxcc %g2,%o4,%g2
|
||||
st %g2,[res_ptr+24]
|
||||
st %g2,[RES_PTR+24]
|
||||
subxcc %g3,%o5,%g3
|
||||
st %g3,[res_ptr+28]
|
||||
st %g3,[RES_PTR+28]
|
||||
addx %g0,%g0,%o4 ! save cy in register
|
||||
addcc size,-8,size
|
||||
add s1_ptr,32,s1_ptr
|
||||
add s2_ptr,32,s2_ptr
|
||||
add res_ptr,32,res_ptr
|
||||
bge Loop2
|
||||
addcc SIZE,-8,SIZE
|
||||
add S1_PTR,32,S1_PTR
|
||||
add S2_PTR,32,S2_PTR
|
||||
add RES_PTR,32,RES_PTR
|
||||
bge LOC(loop2)
|
||||
subcc %g0,%o4,%g0 ! restore cy
|
||||
|
||||
Lfin2: addcc size,8-2,size
|
||||
blt Lend2
|
||||
LOC(fin2):
|
||||
addcc SIZE,8-2,SIZE
|
||||
blt LOC(end2)
|
||||
subcc %g0,%o4,%g0 ! restore cy
|
||||
Loope2: ldd [s1_ptr+0],%g2
|
||||
ldd [s2_ptr+0],%o4
|
||||
LOC(loope2):
|
||||
ldd [S1_PTR+0],%g2
|
||||
ldd [S2_PTR+0],%o4
|
||||
subxcc %g2,%o4,%g2
|
||||
st %g2,[res_ptr+0]
|
||||
st %g2,[RES_PTR+0]
|
||||
subxcc %g3,%o5,%g3
|
||||
st %g3,[res_ptr+4]
|
||||
st %g3,[RES_PTR+4]
|
||||
addx %g0,%g0,%o4 ! save cy in register
|
||||
addcc size,-2,size
|
||||
add s1_ptr,8,s1_ptr
|
||||
add s2_ptr,8,s2_ptr
|
||||
add res_ptr,8,res_ptr
|
||||
bge Loope2
|
||||
addcc SIZE,-2,SIZE
|
||||
add S1_PTR,8,S1_PTR
|
||||
add S2_PTR,8,S2_PTR
|
||||
add RES_PTR,8,RES_PTR
|
||||
bge LOC(loope2)
|
||||
subcc %g0,%o4,%g0 ! restore cy
|
||||
Lend2: andcc size,1,%g0
|
||||
be Lret2
|
||||
LOC(end2):
|
||||
andcc SIZE,1,%g0
|
||||
be LOC(ret2)
|
||||
subcc %g0,%o4,%g0 ! restore cy
|
||||
/* Add last limb */
|
||||
Ljone: ld [s1_ptr],%g4
|
||||
ld [s2_ptr],%g2
|
||||
LOC(jone):
|
||||
ld [S1_PTR],%g4
|
||||
ld [S2_PTR],%g2
|
||||
subxcc %g4,%g2,%o4
|
||||
st %o4,[res_ptr]
|
||||
st %o4,[RES_PTR]
|
||||
|
||||
Lret2: retl
|
||||
LOC(ret2):
|
||||
retl
|
||||
addx %g0,%g0,%o0 ! return carry-out from most sign. limb
|
||||
|
||||
END(__mpn_sub_n)
|
||||
|
||||
@@ -1,20 +1,20 @@
|
||||
! SPARC __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
|
||||
! the result from a second limb vector.
|
||||
|
||||
! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
|
||||
|
||||
!
|
||||
! Copyright (C) 1992, 1993, 1994, 1997 Free Software Foundation, Inc.
|
||||
!
|
||||
! This file is part of the GNU MP Library.
|
||||
|
||||
!
|
||||
! The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
! it under the terms of the GNU Library General Public License as published by
|
||||
! the Free Software Foundation; either version 2 of the License, or (at your
|
||||
! option) any later version.
|
||||
|
||||
!
|
||||
! The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
! License for more details.
|
||||
|
||||
!
|
||||
! You should have received a copy of the GNU Library General Public License
|
||||
! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
|
||||
@@ -22,17 +22,14 @@
|
||||
|
||||
|
||||
! INPUT PARAMETERS
|
||||
! res_ptr o0
|
||||
! s1_ptr o1
|
||||
! size o2
|
||||
! s2_limb o3
|
||||
! RES_PTR o0
|
||||
! S1_PTR o1
|
||||
! SIZE o2
|
||||
! S2_LIMB o3
|
||||
|
||||
#include "sysdep.h"
|
||||
#include <sysdep.h>
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.global C_SYMBOL_NAME(__mpn_submul_1)
|
||||
C_SYMBOL_NAME(__mpn_submul_1):
|
||||
ENTRY(__mpn_submul_1)
|
||||
! Make S1_PTR and RES_PTR point at the end of their blocks
|
||||
! and put (- 4 x SIZE) in index/loop counter.
|
||||
sll %o2,2,%o2
|
||||
@@ -41,19 +38,19 @@ C_SYMBOL_NAME(__mpn_submul_1):
|
||||
sub %g0,%o2,%o2
|
||||
|
||||
cmp %o3,0xfff
|
||||
bgu Large
|
||||
bgu LOC(large)
|
||||
nop
|
||||
|
||||
ld [%o1+%o2],%o5
|
||||
mov 0,%o0
|
||||
b L0
|
||||
b LOC(0)
|
||||
add %o4,-4,%o4
|
||||
Loop0:
|
||||
LOC(loop0):
|
||||
subcc %o5,%g1,%g1
|
||||
ld [%o1+%o2],%o5
|
||||
addx %o0,%g0,%o0
|
||||
st %g1,[%o4+%o2]
|
||||
L0: wr %g0,%o3,%y
|
||||
LOC(0): wr %g0,%o3,%y
|
||||
sra %o5,31,%g2
|
||||
and %o3,%g2,%g2
|
||||
andcc %g1,0,%g1
|
||||
@@ -79,7 +76,7 @@ L0: wr %g0,%o3,%y
|
||||
addcc %g1,%o0,%g1
|
||||
addx %g2,%g4,%o0 ! add sign-compensation and cy to hi limb
|
||||
addcc %o2,4,%o2 ! loop counter
|
||||
bne Loop0
|
||||
bne LOC(loop0)
|
||||
ld [%o4+%o2],%o5
|
||||
|
||||
subcc %o5,%g1,%g1
|
||||
@@ -88,17 +85,18 @@ L0: wr %g0,%o3,%y
|
||||
st %g1,[%o4+%o2]
|
||||
|
||||
|
||||
Large: ld [%o1+%o2],%o5
|
||||
LOC(large):
|
||||
ld [%o1+%o2],%o5
|
||||
mov 0,%o0
|
||||
sra %o3,31,%g4 ! g4 = mask of ones iff S2_LIMB < 0
|
||||
b L1
|
||||
b LOC(1)
|
||||
add %o4,-4,%o4
|
||||
Loop:
|
||||
LOC(loop):
|
||||
subcc %o5,%g3,%g3
|
||||
ld [%o1+%o2],%o5
|
||||
addx %o0,%g0,%o0
|
||||
st %g3,[%o4+%o2]
|
||||
L1: wr %g0,%o5,%y
|
||||
LOC(1): wr %g0,%o5,%y
|
||||
and %o5,%g4,%g2
|
||||
andcc %g0,%g0,%g1
|
||||
mulscc %g1,%o3,%g1
|
||||
@@ -138,10 +136,12 @@ L1: wr %g0,%o5,%y
|
||||
addcc %g3,%o0,%g3
|
||||
addx %g2,%g1,%o0
|
||||
addcc %o2,4,%o2
|
||||
bne Loop
|
||||
bne LOC(loop)
|
||||
ld [%o4+%o2],%o5
|
||||
|
||||
subcc %o5,%g3,%g3
|
||||
addx %o0,%g0,%o0
|
||||
retl
|
||||
st %g3,[%o4+%o2]
|
||||
|
||||
END(__mpn_submul_1)
|
||||
|
||||
@@ -37,16 +37,8 @@
|
||||
|
||||
|
||||
|
||||
#include "sysdep.h"
|
||||
#ifdef __linux__
|
||||
#include <asm/traps.h>
|
||||
#else
|
||||
#ifdef __svr4__
|
||||
#include <sysdep.h>
|
||||
#include <sys/trap.h>
|
||||
#else
|
||||
#include <machine/trap.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
ENTRY(.udiv)
|
||||
|
||||
@@ -63,11 +55,11 @@ ENTRY(.udiv)
|
||||
|
||||
1:
|
||||
cmp %o3, %o5 ! if %o1 exceeds %o0, done
|
||||
blu Lgot_result ! (and algorithm fails otherwise)
|
||||
blu LOC(got_result) ! (and algorithm fails otherwise)
|
||||
clr %o2
|
||||
sethi %hi(1 << (32 - 4 - 1)), %g1
|
||||
cmp %o3, %g1
|
||||
blu Lnot_really_big
|
||||
blu LOC(not_really_big)
|
||||
clr %o4
|
||||
|
||||
! Here the dividend is >= 2**(31-N) or so. We must be careful here,
|
||||
@@ -78,15 +70,15 @@ ENTRY(.udiv)
|
||||
1:
|
||||
cmp %o5, %g1
|
||||
bgeu 3f
|
||||
mov 1, %g7
|
||||
mov 1, %g2
|
||||
sll %o5, 4, %o5
|
||||
b 1b
|
||||
add %o4, 1, %o4
|
||||
|
||||
! Now compute %g7.
|
||||
! Now compute %g2.
|
||||
2: addcc %o5, %o5, %o5
|
||||
bcc Lnot_too_big
|
||||
add %g7, 1, %g7
|
||||
bcc LOC(not_too_big)
|
||||
add %g2, 1, %g2
|
||||
|
||||
! We get here if the %o1 overflowed while shifting.
|
||||
! This means that %o3 has the high-order bit set.
|
||||
@@ -94,20 +86,20 @@ ENTRY(.udiv)
|
||||
sll %g1, 4, %g1 ! high order bit
|
||||
srl %o5, 1, %o5 ! rest of %o5
|
||||
add %o5, %g1, %o5
|
||||
b Ldo_single_div
|
||||
sub %g7, 1, %g7
|
||||
b LOC(do_single_div)
|
||||
sub %g2, 1, %g2
|
||||
|
||||
Lnot_too_big:
|
||||
LOC(not_too_big):
|
||||
3: cmp %o5, %o3
|
||||
blu 2b
|
||||
nop
|
||||
be Ldo_single_div
|
||||
be LOC(do_single_div)
|
||||
nop
|
||||
/* NB: these are commented out in the V8-Sparc manual as well */
|
||||
/* (I do not understand this) */
|
||||
! %o5 > %o3: went too far: back up 1 step
|
||||
! srl %o5, 1, %o5
|
||||
! dec %g7
|
||||
! dec %g2
|
||||
! do single-bit divide steps
|
||||
!
|
||||
! We have to be careful here. We know that %o3 >= %o5, so we can do the
|
||||
@@ -116,15 +108,15 @@ ENTRY(.udiv)
|
||||
! order bit set in the first step, just falling into the regular
|
||||
! division loop will mess up the first time around.
|
||||
! So we unroll slightly...
|
||||
Ldo_single_div:
|
||||
subcc %g7, 1, %g7
|
||||
bl Lend_regular_divide
|
||||
LOC(do_single_div):
|
||||
subcc %g2, 1, %g2
|
||||
bl LOC(end_regular_divide)
|
||||
nop
|
||||
sub %o3, %o5, %o3
|
||||
mov 1, %o2
|
||||
b Lend_single_divloop
|
||||
b LOC(end_single_divloop)
|
||||
nop
|
||||
Lsingle_divloop:
|
||||
LOC(single_divloop):
|
||||
sll %o2, 1, %o2
|
||||
bl 1f
|
||||
srl %o5, 1, %o5
|
||||
@@ -136,217 +128,219 @@ ENTRY(.udiv)
|
||||
add %o3, %o5, %o3
|
||||
sub %o2, 1, %o2
|
||||
2:
|
||||
Lend_single_divloop:
|
||||
subcc %g7, 1, %g7
|
||||
bge Lsingle_divloop
|
||||
LOC(end_single_divloop):
|
||||
subcc %g2, 1, %g2
|
||||
bge LOC(single_divloop)
|
||||
tst %o3
|
||||
b,a Lend_regular_divide
|
||||
b,a LOC(end_regular_divide)
|
||||
|
||||
Lnot_really_big:
|
||||
LOC(not_really_big):
|
||||
1:
|
||||
sll %o5, 4, %o5
|
||||
cmp %o5, %o3
|
||||
bleu 1b
|
||||
addcc %o4, 1, %o4
|
||||
be Lgot_result
|
||||
be LOC(got_result)
|
||||
sub %o4, 1, %o4
|
||||
|
||||
tst %o3 ! set up for initial iteration
|
||||
Ldivloop:
|
||||
LOC(divloop):
|
||||
sll %o2, 4, %o2
|
||||
! depth 1, accumulated bits 0
|
||||
bl L.1.16
|
||||
bl LOC(1.16)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
! depth 2, accumulated bits 1
|
||||
bl L.2.17
|
||||
bl LOC(2.17)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
! depth 3, accumulated bits 3
|
||||
bl L.3.19
|
||||
bl LOC(3.19)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
! depth 4, accumulated bits 7
|
||||
bl L.4.23
|
||||
bl LOC(4.23)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (7*2+1), %o2
|
||||
|
||||
L.4.23:
|
||||
|
||||
LOC(4.23):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (7*2-1), %o2
|
||||
|
||||
|
||||
L.3.19:
|
||||
|
||||
|
||||
LOC(3.19):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
! depth 4, accumulated bits 5
|
||||
bl L.4.21
|
||||
bl LOC(4.21)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (5*2+1), %o2
|
||||
|
||||
L.4.21:
|
||||
|
||||
LOC(4.21):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (5*2-1), %o2
|
||||
|
||||
|
||||
|
||||
L.2.17:
|
||||
|
||||
|
||||
|
||||
LOC(2.17):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
! depth 3, accumulated bits 1
|
||||
bl L.3.17
|
||||
bl LOC(3.17)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
! depth 4, accumulated bits 3
|
||||
bl L.4.19
|
||||
bl LOC(4.19)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (3*2+1), %o2
|
||||
|
||||
L.4.19:
|
||||
|
||||
LOC(4.19):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (3*2-1), %o2
|
||||
|
||||
|
||||
L.3.17:
|
||||
|
||||
|
||||
LOC(3.17):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
! depth 4, accumulated bits 1
|
||||
bl L.4.17
|
||||
bl LOC(4.17)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (1*2+1), %o2
|
||||
|
||||
L.4.17:
|
||||
|
||||
LOC(4.17):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (1*2-1), %o2
|
||||
|
||||
|
||||
|
||||
|
||||
L.1.16:
|
||||
|
||||
|
||||
|
||||
|
||||
LOC(1.16):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
! depth 2, accumulated bits -1
|
||||
bl L.2.15
|
||||
bl LOC(2.15)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
! depth 3, accumulated bits -1
|
||||
bl L.3.15
|
||||
bl LOC(3.15)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
! depth 4, accumulated bits -1
|
||||
bl L.4.15
|
||||
bl LOC(4.15)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (-1*2+1), %o2
|
||||
|
||||
L.4.15:
|
||||
|
||||
LOC(4.15):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (-1*2-1), %o2
|
||||
|
||||
|
||||
L.3.15:
|
||||
|
||||
|
||||
LOC(3.15):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
! depth 4, accumulated bits -3
|
||||
bl L.4.13
|
||||
bl LOC(4.13)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (-3*2+1), %o2
|
||||
|
||||
L.4.13:
|
||||
|
||||
LOC(4.13):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (-3*2-1), %o2
|
||||
|
||||
|
||||
|
||||
L.2.15:
|
||||
|
||||
|
||||
|
||||
LOC(2.15):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
! depth 3, accumulated bits -3
|
||||
bl L.3.13
|
||||
bl LOC(3.13)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
! depth 4, accumulated bits -5
|
||||
bl L.4.11
|
||||
bl LOC(4.11)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (-5*2+1), %o2
|
||||
|
||||
L.4.11:
|
||||
|
||||
LOC(4.11):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (-5*2-1), %o2
|
||||
|
||||
|
||||
L.3.13:
|
||||
|
||||
|
||||
LOC(3.13):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
! depth 4, accumulated bits -7
|
||||
bl L.4.9
|
||||
bl LOC(4.9)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (-7*2+1), %o2
|
||||
|
||||
L.4.9:
|
||||
|
||||
LOC(4.9):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (-7*2-1), %o2
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
9:
|
||||
Lend_regular_divide:
|
||||
LOC(end_regular_divide):
|
||||
subcc %o4, 1, %o4
|
||||
bge Ldivloop
|
||||
bge LOC(divloop)
|
||||
tst %o3
|
||||
bl,a Lgot_result
|
||||
bl,a LOC(got_result)
|
||||
! non-restoring fixup here (one instruction only!)
|
||||
sub %o2, 1, %o2
|
||||
|
||||
|
||||
Lgot_result:
|
||||
LOC(got_result):
|
||||
|
||||
retl
|
||||
mov %o2, %o0
|
||||
|
||||
END(.udiv)
|
||||
|
||||
@@ -1,50 +1,52 @@
|
||||
! SPARC __udiv_qrnnd division support, used from longlong.h.
|
||||
|
||||
!
|
||||
! Copyright (C) 1993, 1994, 1997 Free Software Foundation, Inc.
|
||||
|
||||
!
|
||||
! This file is part of the GNU MP Library.
|
||||
|
||||
!
|
||||
! The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
! it under the terms of the GNU Library General Public License as published by
|
||||
! the Free Software Foundation; either version 2 of the License, or (at your
|
||||
! option) any later version.
|
||||
|
||||
!
|
||||
! The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||
! License for more details.
|
||||
|
||||
!
|
||||
! You should have received a copy of the GNU Library General Public License
|
||||
! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
!
|
||||
! Added PIC support - May/96, Miguel de Icaza
|
||||
|
||||
!
|
||||
! INPUT PARAMETERS
|
||||
! rem_ptr i0
|
||||
! n1 i1
|
||||
! n0 i2
|
||||
! d i3
|
||||
|
||||
#include "sysdep.h"
|
||||
#include <sysdep.h>
|
||||
#undef ret /* Kludge for glibc */
|
||||
|
||||
#ifdef PIC
|
||||
.text
|
||||
#else
|
||||
.section .rodata,#alloc
|
||||
#endif
|
||||
.align 8
|
||||
|
||||
.type two_to_32,@object
|
||||
.size two_to_32,8
|
||||
two_to_32:
|
||||
.double 0r4294967296
|
||||
.size two_to_32,8
|
||||
|
||||
.type two_to_31,@object
|
||||
.size two_to_31,8
|
||||
two_to_31:
|
||||
.double 0r2147483648
|
||||
.size two_to_31,8
|
||||
|
||||
.align 4
|
||||
.global __udiv_qrnnd
|
||||
.type __udiv_qrnnd,@function
|
||||
.text
|
||||
ENTRY(__udiv_qrnnd)
|
||||
!#PROLOGUE# 0
|
||||
save %sp,-104,%sp
|
||||
@@ -52,57 +54,58 @@ ENTRY(__udiv_qrnnd)
|
||||
st %i1,[%fp-8]
|
||||
ld [%fp-8],%f10
|
||||
#ifdef PIC
|
||||
.Lbase: call 1f
|
||||
LOC(base):
|
||||
call 1f
|
||||
fitod %f10,%f4
|
||||
1: ldd [%o7-(.Lbase-two_to_32)],%f8
|
||||
1: ldd [%o7-(LOC(base)-two_to_32)],%f8
|
||||
#else
|
||||
sethi %hi(two_to_32),%o7
|
||||
fitod %f10,%f4
|
||||
ldd [%o7+%lo(two_to_32)],%f8
|
||||
#endif
|
||||
cmp %i1,0
|
||||
bge L248
|
||||
bge LOC(248)
|
||||
mov %i0,%i5
|
||||
faddd %f4,%f8,%f4
|
||||
.L248:
|
||||
LOC(248):
|
||||
st %i2,[%fp-8]
|
||||
ld [%fp-8],%f10
|
||||
fmuld %f4,%f8,%f6
|
||||
cmp %i2,0
|
||||
bge L249
|
||||
bge LOC(249)
|
||||
fitod %f10,%f2
|
||||
faddd %f2,%f8,%f2
|
||||
.L249:
|
||||
LOC(249):
|
||||
st %i3,[%fp-8]
|
||||
faddd %f6,%f2,%f2
|
||||
ld [%fp-8],%f10
|
||||
cmp %i3,0
|
||||
bge L250
|
||||
bge LOC(250)
|
||||
fitod %f10,%f4
|
||||
faddd %f4,%f8,%f4
|
||||
.L250:
|
||||
LOC(250):
|
||||
fdivd %f2,%f4,%f2
|
||||
#ifdef PIC
|
||||
ldd [%o7-(.Lbase-two_to_31)],%f4
|
||||
ldd [%o7-(LOC(base)-two_to_31)],%f4
|
||||
#else
|
||||
sethi %hi(two_to_31),%o7
|
||||
ldd [%o7+%lo(two_to_31)],%f4
|
||||
#endif
|
||||
fcmped %f2,%f4
|
||||
nop
|
||||
fbge,a L251
|
||||
fbge,a LOC(251)
|
||||
fsubd %f2,%f4,%f2
|
||||
fdtoi %f2,%f2
|
||||
st %f2,[%fp-8]
|
||||
b L252
|
||||
b LOC(252)
|
||||
ld [%fp-8],%i4
|
||||
.L251:
|
||||
LOC(251):
|
||||
fdtoi %f2,%f2
|
||||
st %f2,[%fp-8]
|
||||
ld [%fp-8],%i4
|
||||
sethi %hi(-2147483648),%g2
|
||||
xor %i4,%g2,%i4
|
||||
.L252:
|
||||
LOC(252):
|
||||
wr %g0,%i4,%y
|
||||
sra %i3,31,%g2
|
||||
and %i4,%g2,%g2
|
||||
@@ -144,7 +147,7 @@ ENTRY(__udiv_qrnnd)
|
||||
rd %y,%g3
|
||||
subcc %i2,%g3,%o7
|
||||
subxcc %i1,%i0,%g0
|
||||
be L253
|
||||
be LOC(253)
|
||||
cmp %o7,%i3
|
||||
|
||||
add %i4,-1,%i0
|
||||
@@ -152,14 +155,14 @@ ENTRY(__udiv_qrnnd)
|
||||
st %o7,[%i5]
|
||||
ret
|
||||
restore
|
||||
.L253:
|
||||
blu L246
|
||||
LOC(253):
|
||||
blu LOC(246)
|
||||
mov %i4,%i0
|
||||
add %i4,1,%i0
|
||||
sub %o7,%i3,%o7
|
||||
.L246:
|
||||
LOC(246):
|
||||
st %o7,[%i5]
|
||||
ret
|
||||
restore
|
||||
|
||||
.size __udiv_qrnnd, .-__udiv_qrnnd
|
||||
END(__udiv_qrnnd)
|
||||
|
||||
@@ -14,13 +14,14 @@
|
||||
* bnz overflow (or tnz)
|
||||
*/
|
||||
|
||||
#include "DEFS.h"
|
||||
FUNC(.umul)
|
||||
#include <sysdep.h>
|
||||
|
||||
ENTRY(.umul)
|
||||
or %o0, %o1, %o4
|
||||
mov %o0, %y ! multiplier -> Y
|
||||
andncc %o4, 0xfff, %g0 ! test bits 12..31 of *both* args
|
||||
be Lmul_shortway ! if zero, can do it the short way
|
||||
andcc %g0, %g0, %o4 ! zero the partial product and clear N and V
|
||||
mov %o0, %y ! multiplier -> Y
|
||||
andncc %o4, 0xfff, %g0 ! test bits 12..31 of *both* args
|
||||
be LOC(mul_shortway) ! if zero, can do it the short way
|
||||
andcc %g0, %g0, %o4 ! zero the partial product; clear N & V
|
||||
|
||||
/*
|
||||
* Long multiply. 32 steps, followed by a final shift step.
|
||||
@@ -59,7 +60,6 @@ FUNC(.umul)
|
||||
mulscc %o4, %o1, %o4 ! 32
|
||||
mulscc %o4, %g0, %o4 ! final shift
|
||||
|
||||
|
||||
/*
|
||||
* Normally, with the shift-and-add approach, if both numbers are
|
||||
* positive you get the correct result. With 32-bit two's-complement
|
||||
@@ -97,20 +97,20 @@ FUNC(.umul)
|
||||
#if 0
|
||||
tst %o1
|
||||
bl,a 1f ! if %o1 < 0 (high order bit = 1),
|
||||
add %o4, %o0, %o4 ! %o4 += %o0 (add y to upper half)
|
||||
add %o4, %o0, %o4 ! %o4 += %o0 (add y to upper half)
|
||||
1: rd %y, %o0 ! get lower half of product
|
||||
retl
|
||||
addcc %o4, %g0, %o1 ! put upper half in place and set Z for %o1==0
|
||||
addcc %o4, %g0, %o1 ! put upper half in place and set Z for %o1==0
|
||||
#else
|
||||
/* Faster code from tege@sics.se. */
|
||||
sra %o1, 31, %o2 ! make mask from sign bit
|
||||
and %o0, %o2, %o2 ! %o2 = 0 or %o0, depending on sign of %o1
|
||||
rd %y, %o0 ! get lower half of product
|
||||
retl
|
||||
addcc %o4, %o2, %o1 ! add compensation and put upper half in place
|
||||
addcc %o4, %o2, %o1 ! add compensation and put upper half in place
|
||||
#endif
|
||||
|
||||
Lmul_shortway:
|
||||
LOC(mul_shortway):
|
||||
/*
|
||||
* Short multiply. 12 steps, followed by a final shift step.
|
||||
* The resulting bits are off by 12 and (32-12) = 20 bit positions,
|
||||
@@ -150,4 +150,6 @@ Lmul_shortway:
|
||||
srl %o5, 20, %o5 ! shift low bits right 20
|
||||
or %o5, %o0, %o0
|
||||
retl
|
||||
addcc %g0, %g0, %o1 ! %o1 = zero, and set Z
|
||||
addcc %g0, %g0, %o1 ! %o1 = zero, and set Z
|
||||
|
||||
END(.umul)
|
||||
|
||||
@@ -37,16 +37,8 @@
|
||||
|
||||
|
||||
|
||||
#include "sysdep.h"
|
||||
#ifdef __linux__
|
||||
#include <asm/traps.h>
|
||||
#else
|
||||
#ifdef __svr4__
|
||||
#include <sysdep.h>
|
||||
#include <sys/trap.h>
|
||||
#else
|
||||
#include <machine/trap.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
ENTRY(.urem)
|
||||
|
||||
@@ -63,11 +55,11 @@ ENTRY(.urem)
|
||||
|
||||
1:
|
||||
cmp %o3, %o5 ! if %o1 exceeds %o0, done
|
||||
blu Lgot_result ! (and algorithm fails otherwise)
|
||||
blu LOC(got_result) ! (and algorithm fails otherwise)
|
||||
clr %o2
|
||||
sethi %hi(1 << (32 - 4 - 1)), %g1
|
||||
cmp %o3, %g1
|
||||
blu Lnot_really_big
|
||||
blu LOC(not_really_big)
|
||||
clr %o4
|
||||
|
||||
! Here the dividend is >= 2**(31-N) or so. We must be careful here,
|
||||
@@ -78,15 +70,15 @@ ENTRY(.urem)
|
||||
1:
|
||||
cmp %o5, %g1
|
||||
bgeu 3f
|
||||
mov 1, %g7
|
||||
mov 1, %g2
|
||||
sll %o5, 4, %o5
|
||||
b 1b
|
||||
add %o4, 1, %o4
|
||||
|
||||
! Now compute %g7.
|
||||
! Now compute %g2.
|
||||
2: addcc %o5, %o5, %o5
|
||||
bcc Lnot_too_big
|
||||
add %g7, 1, %g7
|
||||
bcc LOC(not_too_big)
|
||||
add %g2, 1, %g2
|
||||
|
||||
! We get here if the %o1 overflowed while shifting.
|
||||
! This means that %o3 has the high-order bit set.
|
||||
@@ -94,20 +86,20 @@ ENTRY(.urem)
|
||||
sll %g1, 4, %g1 ! high order bit
|
||||
srl %o5, 1, %o5 ! rest of %o5
|
||||
add %o5, %g1, %o5
|
||||
b Ldo_single_div
|
||||
sub %g7, 1, %g7
|
||||
b LOC(do_single_div)
|
||||
sub %g2, 1, %g2
|
||||
|
||||
Lnot_too_big:
|
||||
LOC(not_too_big):
|
||||
3: cmp %o5, %o3
|
||||
blu 2b
|
||||
nop
|
||||
be Ldo_single_div
|
||||
be LOC(do_single_div)
|
||||
nop
|
||||
/* NB: these are commented out in the V8-Sparc manual as well */
|
||||
/* (I do not understand this) */
|
||||
! %o5 > %o3: went too far: back up 1 step
|
||||
! srl %o5, 1, %o5
|
||||
! dec %g7
|
||||
! dec %g2
|
||||
! do single-bit divide steps
|
||||
!
|
||||
! We have to be careful here. We know that %o3 >= %o5, so we can do the
|
||||
@@ -116,15 +108,15 @@ ENTRY(.urem)
|
||||
! order bit set in the first step, just falling into the regular
|
||||
! division loop will mess up the first time around.
|
||||
! So we unroll slightly...
|
||||
Ldo_single_div:
|
||||
subcc %g7, 1, %g7
|
||||
bl Lend_regular_divide
|
||||
LOC(do_single_div):
|
||||
subcc %g2, 1, %g2
|
||||
bl LOC(end_regular_divide)
|
||||
nop
|
||||
sub %o3, %o5, %o3
|
||||
mov 1, %o2
|
||||
b Lend_single_divloop
|
||||
b LOC(end_single_divloop)
|
||||
nop
|
||||
Lsingle_divloop:
|
||||
LOC(single_divloop):
|
||||
sll %o2, 1, %o2
|
||||
bl 1f
|
||||
srl %o5, 1, %o5
|
||||
@@ -136,217 +128,219 @@ ENTRY(.urem)
|
||||
add %o3, %o5, %o3
|
||||
sub %o2, 1, %o2
|
||||
2:
|
||||
Lend_single_divloop:
|
||||
subcc %g7, 1, %g7
|
||||
bge Lsingle_divloop
|
||||
LOC(end_single_divloop):
|
||||
subcc %g2, 1, %g2
|
||||
bge LOC(single_divloop)
|
||||
tst %o3
|
||||
b,a Lend_regular_divide
|
||||
b,a LOC(end_regular_divide)
|
||||
|
||||
Lnot_really_big:
|
||||
LOC(not_really_big):
|
||||
1:
|
||||
sll %o5, 4, %o5
|
||||
cmp %o5, %o3
|
||||
bleu 1b
|
||||
addcc %o4, 1, %o4
|
||||
be Lgot_result
|
||||
be LOC(got_result)
|
||||
sub %o4, 1, %o4
|
||||
|
||||
tst %o3 ! set up for initial iteration
|
||||
Ldivloop:
|
||||
LOC(divloop):
|
||||
sll %o2, 4, %o2
|
||||
! depth 1, accumulated bits 0
|
||||
bl L.1.16
|
||||
bl LOC(1.16)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
! depth 2, accumulated bits 1
|
||||
bl L.2.17
|
||||
bl LOC(2.17)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
! depth 3, accumulated bits 3
|
||||
bl L.3.19
|
||||
bl LOC(3.19)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
! depth 4, accumulated bits 7
|
||||
bl L.4.23
|
||||
bl LOC(4.23)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (7*2+1), %o2
|
||||
|
||||
L.4.23:
|
||||
|
||||
LOC(4.23):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (7*2-1), %o2
|
||||
|
||||
|
||||
L.3.19:
|
||||
|
||||
|
||||
LOC(3.19):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
! depth 4, accumulated bits 5
|
||||
bl L.4.21
|
||||
bl LOC(4.21)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (5*2+1), %o2
|
||||
|
||||
L.4.21:
|
||||
|
||||
LOC(4.21):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (5*2-1), %o2
|
||||
|
||||
|
||||
|
||||
L.2.17:
|
||||
|
||||
|
||||
|
||||
LOC(2.17):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
! depth 3, accumulated bits 1
|
||||
bl L.3.17
|
||||
bl LOC(3.17)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
! depth 4, accumulated bits 3
|
||||
bl L.4.19
|
||||
bl LOC(4.19)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (3*2+1), %o2
|
||||
|
||||
L.4.19:
|
||||
|
||||
LOC(4.19):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (3*2-1), %o2
|
||||
|
||||
|
||||
L.3.17:
|
||||
|
||||
|
||||
LOC(3.17):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
! depth 4, accumulated bits 1
|
||||
bl L.4.17
|
||||
bl LOC(4.17)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (1*2+1), %o2
|
||||
|
||||
L.4.17:
|
||||
|
||||
LOC(4.17):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (1*2-1), %o2
|
||||
|
||||
|
||||
|
||||
|
||||
L.1.16:
|
||||
|
||||
|
||||
|
||||
|
||||
LOC(1.16):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
! depth 2, accumulated bits -1
|
||||
bl L.2.15
|
||||
bl LOC(2.15)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
! depth 3, accumulated bits -1
|
||||
bl L.3.15
|
||||
bl LOC(3.15)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
! depth 4, accumulated bits -1
|
||||
bl L.4.15
|
||||
bl LOC(4.15)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (-1*2+1), %o2
|
||||
|
||||
L.4.15:
|
||||
|
||||
LOC(4.15):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (-1*2-1), %o2
|
||||
|
||||
|
||||
L.3.15:
|
||||
|
||||
|
||||
LOC(3.15):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
! depth 4, accumulated bits -3
|
||||
bl L.4.13
|
||||
bl LOC(4.13)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (-3*2+1), %o2
|
||||
|
||||
L.4.13:
|
||||
|
||||
LOC(4.13):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (-3*2-1), %o2
|
||||
|
||||
|
||||
|
||||
L.2.15:
|
||||
|
||||
|
||||
|
||||
LOC(2.15):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
! depth 3, accumulated bits -3
|
||||
bl L.3.13
|
||||
bl LOC(3.13)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
! depth 4, accumulated bits -5
|
||||
bl L.4.11
|
||||
bl LOC(4.11)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (-5*2+1), %o2
|
||||
|
||||
L.4.11:
|
||||
|
||||
LOC(4.11):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (-5*2-1), %o2
|
||||
|
||||
|
||||
L.3.13:
|
||||
|
||||
|
||||
LOC(3.13):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
! depth 4, accumulated bits -7
|
||||
bl L.4.9
|
||||
bl LOC(4.9)
|
||||
srl %o5,1,%o5
|
||||
! remainder is positive
|
||||
subcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (-7*2+1), %o2
|
||||
|
||||
L.4.9:
|
||||
|
||||
LOC(4.9):
|
||||
! remainder is negative
|
||||
addcc %o3,%o5,%o3
|
||||
b 9f
|
||||
add %o2, (-7*2-1), %o2
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
9:
|
||||
Lend_regular_divide:
|
||||
LOC(end_regular_divide):
|
||||
subcc %o4, 1, %o4
|
||||
bge Ldivloop
|
||||
bge LOC(divloop)
|
||||
tst %o3
|
||||
bl,a Lgot_result
|
||||
bl,a LOC(got_result)
|
||||
! non-restoring fixup here (one instruction only!)
|
||||
add %o3, %o1, %o3
|
||||
|
||||
|
||||
Lgot_result:
|
||||
LOC(got_result):
|
||||
|
||||
retl
|
||||
mov %o3, %o0
|
||||
|
||||
END(.urem)
|
||||
|
||||
@@ -220,6 +220,13 @@ elf_machine_lazy_rel (struct link_map *map, const Elf64_Rela *reloc)
|
||||
/* The SPARC overlaps DT_RELA and DT_PLTREL. */
|
||||
#define ELF_MACHINE_PLTREL_OVERLAP 1
|
||||
|
||||
/* The return value from dl-runtime's fixup, if it should be special. */
|
||||
#define ELF_FIXUP_RETURN_VALUE(map, result) \
|
||||
((map)->l_info[DT_SPARC(PLTFMT)] \
|
||||
&& (map)->l_info[DT_SPARC(PLTFMT)]->d_un.d_val == 2 \
|
||||
? (result) + (map)->l_info[DT_PLTGOT]->d_un.d_ptr + (map)->l_addr \
|
||||
: (result))
|
||||
|
||||
/* Set up the loaded object described by L so its unrelocated PLT
|
||||
entries will jump to the on-demand fixup code in dl-runtime.c. */
|
||||
|
||||
@@ -232,10 +239,10 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
|
||||
if (l->l_info[DT_JMPREL] && lazy)
|
||||
{
|
||||
got = (Elf64_Addr *) (l->l_addr + l->l_info[DT_PLTGOT]->d_un.d_ptr);
|
||||
got[1] = (Elf64_Addr) l; /* Identify this shared object. */
|
||||
/* This function will get called to fix up the GOT entry indicated by
|
||||
the offset on the stack, and then jump to the resolved address. */
|
||||
got[2] = (Elf64_Addr) &_dl_runtime_resolve;
|
||||
got[1] = (Elf64_Addr) &_dl_runtime_resolve;
|
||||
got[2] = (Elf64_Addr) l; /* Identify this shared object. */
|
||||
}
|
||||
|
||||
return lazy;
|
||||
@@ -248,9 +255,9 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
|
||||
.type _dl_runtime_resolve, @function
|
||||
_dl_runtime_resolve:
|
||||
save %sp, -160, %sp
|
||||
mov %g5, %o0
|
||||
mov %g1, %o0
|
||||
call fixup
|
||||
mov %g6, %o1
|
||||
mov %g2, %o1
|
||||
jmp %o0
|
||||
restore
|
||||
.size _dl_runtime_resolve, .-_dl_runtime_resolve
|
||||
|
||||
@@ -41,13 +41,11 @@
|
||||
|
||||
/* Now two recommended cw */
|
||||
|
||||
/* Linux default:
|
||||
/* Linux and IEEE default:
|
||||
- extended precision
|
||||
- rounding to nearest
|
||||
- no exceptions */
|
||||
#define _FPU_DEFAULT 0x0
|
||||
|
||||
/* IEEE: same as above */
|
||||
#define _FPU_IEEE 0x0
|
||||
|
||||
/* Type of the control word. */
|
||||
|
||||
Reference in New Issue
Block a user